Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for importing rsync-time-backup backups. #65

Merged
merged 3 commits into from
Jun 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,15 @@ Usage: ``borg-import rsynchl RSYNC_ROOT BORG_REPOSITORY``

See ``borg-import rsynchl -h`` for help.

`rsync-time-backup <https://github.com/laurent22/rsync-time-backup>`_
---------------------------------------------------------------------

Similar to `rsynchl`, except with timestamp extraction optimized for `rsync-time-backup` folder names.

Usage: ``borg-import rsync_tmbackup --prefix=foo- RSYNC_ROOT BORG_REPOSITORY``

See ``borg-import rsync_tmbackup -h`` for help.

Backup tools based on rsync with hard links
-------------------------------------------

Expand Down
24 changes: 22 additions & 2 deletions src/borg_import/helpers/testsuite/test_timestamps.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,27 @@ def test_datetime_from_mtime(tmpdir):


def test_datetime_from_string():
assert datetime_from_string('1999-12-31T23:59:59') == datetime(1999, 12, 31, 23, 59, 59)
assert datetime_from_string('Mon Oct 31 23:35:50 UTC 2016') == datetime(2016, 10, 31, 23, 35, 50)
dfs = datetime_from_string('1999-12-31T23:59:59')
dt_trg = datetime(1999, 12, 31, 23, 59, 59).astimezone(tz=timezone.utc)
assert dfs == dt_trg
# Of course, two datetimes can be equal in different timezones. Make
# sure the timezone info matches UTC, which borg itself expects.
assert dfs.tzinfo == dt_trg.tzinfo == timezone.utc

# FIXME: When this format is passed to datetime_from_string, the internal
# strptime discards timezone info, and creates a naive time.
# UTC is handled specially inside datetime_from_string to accommodate
# strptime's quirks; local conversions using this format may or may not work.
dfs = datetime_from_string('Mon Oct 31 23:35:50 UTC 2016')
dt_trg = datetime(2016, 10, 31, 23, 35, 50, tzinfo=timezone.utc)
assert dfs == dt_trg
assert dfs.tzinfo == dt_trg.tzinfo == timezone.utc

# rsync-time-backup format.
dfs = datetime_from_string('2022-12-21-063019')
dt_trg = datetime(2022, 12, 21, 6, 30, 19).astimezone(tz=timezone.utc)
assert dfs == dt_trg
assert dfs.tzinfo == dt_trg.tzinfo == timezone.utc

with pytest.raises(ValueError):
datetime_from_string('total crap')
27 changes: 25 additions & 2 deletions src/borg_import/helpers/timestamps.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,17 @@ def datetime_from_mtime(path):
at backup time).
"""
t = path.stat().st_mtime
# Borg needs tz-aware timestamps in UTC timezone.
return datetime.fromtimestamp(t, tz=timezone.utc)


def datetime_from_string(s):
"""
parse datetime from a string

returns a datetime object if the format could be parsed.
returns a tz-aware datetime object in UTC timezone if the format could be
parsed.

raises ValueError if not.
"""
s = s.strip()
Expand All @@ -29,10 +32,30 @@ def datetime_from_string(s):
'%Y-%m-%d %H:%M',
# date tool output [C / en_US locale]:
'%a %b %d %H:%M:%S %Z %Y',
# rsync-time-backup format
'%Y-%m-%d-%H%M%S'
# for more, see https://xkcd.com/1179/
]:
try:
return datetime.strptime(s, ts_format)
if ts_format in ('%a %b %d %H:%M:%S %Z %Y',) and 'UTC' in s:
# %Z returns a naive datetime, despite a timezone being specified.
# However, strptime %Z only tends to work on local times and
# UTC.
#
# Per astimezone docs:
# If self is naive, it is presumed to represent time in the
# system timezone.
#
# If we had a UTC timezone, prevent conversion to aware
# datetime from assuming a local timezone before conversion
# to UTC.
return datetime.strptime(s, ts_format).replace(tzinfo=timezone.utc)
else:
# If "UTC" wasn't specified using the above ts_format, assume
# the timezone specified was local and hope for the best.
# This handles all other ts_formats as well, which are assumed
# to be local since they don't carry timezone.
return datetime.strptime(s, ts_format).astimezone(tz=timezone.utc)
except ValueError:
# didn't work with this format, try next
pass
Expand Down
76 changes: 76 additions & 0 deletions src/borg_import/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

from .rsnapshots import get_snapshots
from .rsynchl import get_rsyncsnapshots
from .rsync_tmbackup import get_tmbackup_snapshots
ThomasWaldmann marked this conversation as resolved.
Show resolved Hide resolved

log = logging.getLogger(__name__)

Expand Down Expand Up @@ -206,6 +207,81 @@ def import_rsynchl(self, args):
import_journal.unlink()


class rsyncTmBackupImporter(Importer):
name = 'rsync_tmbackup'
description = 'import rsync-time-backup backups'
epilog = """
Imports from rsync-time-backup backup sets by renaming each snapshot to a
common name independent of the snapshot, which allows the Borg files cache
to work with maximum efficiency. The only difference between this and
generic rsync+hardlink backups is how archive timestamps are derived.

An archive will be created for each folder in the rsync_root. A non-empty
prefix is required. The archive name will be the prefix concatenated with
the timestamp folder name (%Y-%m-%d-%H%M%S local time) and the archive
timestamp will be derived from the folder name (ISO 8601 UTC). If the borg
repository already contains an archive with the derived name, that folder
will be skipped.

The directory is called "borg-import-dir" inside the specified root,
and borg-import will note which snapshot is currently located there
in a file called "borg-import-dir.snapshot" besides it, in case
things go wrong.

Otherwise nothing in the rsync root is modified, and neither
are the contents of the snapshots.
"""

def populate_parser(self, parser):
parser.add_argument('rsync_root', metavar='RSYNC_ROOT',
help='Path to root directory', type=Path)
# TODO: support the full wealth of borg possibilities
parser.add_argument('repository', metavar='BORG_REPOSITORY',
help='Borg repository (must be an absolute local path or a remote repo specification)')
parser.set_defaults(function=self.import_rsync_tmbackup)

def import_rsync_tmbackup(self, args):
existing_archives = list_borg_archives(args)

import_path = args.rsync_root / 'borg-import-dir'
import_journal = args.rsync_root / 'borg-import-dir.snapshot'

if import_path.exists():
print('{} exists. Cannot continue.'.format(import_path))
return 1

if not args.prefix:
print('"--prefix" argument must be non-empty to use rsync-time-backup import')
return 1

for rsnapshot in get_tmbackup_snapshots(args.rsync_root, args.prefix):
timestamp = rsnapshot['timestamp'].replace(microsecond=0)
snapshot_original_path = rsnapshot['path']
name = rsnapshot['name']

if name in existing_archives:
print('Skipping (already exists in repository):', name)
continue

print('Importing {} (timestamp {}) '.format(name, timestamp))
log.debug(' Moving {} -> {}'.format(rsnapshot['path'], import_path))

# We move the snapshots to import_path so that the files cache in Borg can work effectively.

with import_journal.open('w') as fd:
fd.write('Current snapshot: %s\n' % rsnapshot['name'])
fd.write('Original path: %s\n' % snapshot_original_path)

snapshot_original_path.rename(import_path)

try:
borg_import(args, name, import_path, timestamp=timestamp)
finally:
log.debug(' Moving {} -> {}'.format(import_path, rsnapshot['path']))
import_path.rename(snapshot_original_path)
import_journal.unlink()


def build_parser():
common_parser = argparse.ArgumentParser(add_help=False)
common_group = common_parser.add_argument_group('Common options')
Expand Down
37 changes: 37 additions & 0 deletions src/borg_import/rsync_tmbackup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
import re
from pathlib import Path

from .helpers.discover import discover, parser
from .helpers.names import make_name
from .helpers.timestamps import datetime_from_string


def get_tmbackup_snapshots(root, prefix):
"""Get all snapshot metadata discovered in the rsync root directory."""
regex = re.compile(r'(?P<snapshot_date>.+)')

if not Path("backup.marker").exists():
raise FileNotFoundError("backup.marker file should exist for rsync-time-backup import")

for path in discover(str(root), 1):
parsed = parser(path, regex)
if parsed is not None and parsed['snapshot_date'] not in ("latest",):
abs_path = root / path
meta = dict(
name=make_name("".join([prefix, parsed['snapshot_date']])),
path=abs_path,
timestamp=datetime_from_string(path),
)
yield meta
elif parsed['snapshot_date'] in ("latest",):
# latest is a symlink to the most recent build. Import it anyway
# in case user wants to do borg mount/has existing references
# to latest.
abs_path = root / path
timestamp = Path("latest").resolve().name
meta = dict(
name=make_name("".join([prefix, "latest"])),
path=abs_path,
timestamp=datetime_from_string(timestamp),
)
yield meta
Loading