Skip to content

Commit

Permalink
initial role
Browse files Browse the repository at this point in the history
  • Loading branch information
haslersn committed Oct 24, 2024
1 parent ce11ad7 commit c75044f
Show file tree
Hide file tree
Showing 6 changed files with 152 additions and 27 deletions.
91 changes: 65 additions & 26 deletions files/zfs-restic-uploader
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@ import subprocess
import datetime
import json
import udatetime
import re
import sys


ZFS_SNAPSHOTDIR = '.zfs/snapshot'

Expand All @@ -16,18 +19,28 @@ LOGICAL_REFERENCED_TAG = "logicalreferenced="
DEBUG = False


def _run(command: str, input: Optional[str] = None, void_stderr: bool = False) -> None:
other_args = dict()
if void_stderr and not DEBUG:
other_args["stderr"] = subprocess.DEVNULL
subprocess.run(command, shell=True, text=True, input=input, **other_args)
# class which we use to prevent buffering stdout
class Unbuffered(object):
def __init__(self, stream):
self.stream = stream
def write(self, data):
self.stream.write(data)
self.stream.flush()
def writelines(self, datas):
self.stream.writelines(datas)
self.stream.flush()
def __getattr__(self, attr):
return getattr(self.stream, attr)


def _run(command: str, input: Optional[str] = None, stdout = None, stderr = None, check: bool = True) -> None:
return subprocess.run(command, shell=True, text=True, input=input, stdout=stdout, stderr=stderr, check=check)


def _eval(command: str, input: Optional[str] = None, void_stderr: bool = False) -> str:
other_args = dict()
if void_stderr and not DEBUG:
other_args["stderr"] = subprocess.DEVNULL
return subprocess.run(command, shell=True, text=True, stdout=subprocess.PIPE, input=input, **other_args).stdout
def _eval(command: str, input: Optional[str] = None, stdout = None, stderr = None, check: bool = True) -> None:
if stdout is None:
stdout = subprocess.PIPE
return subprocess.run(command, shell=True, text=True, input=input, stdout=stdout, stderr=stderr, check=check).stdout


def _get_year(timestamp: int) -> int:
Expand All @@ -48,10 +61,12 @@ class Backuper:
restic_repo_prefix: str,
zfs_dataset_common_prefix: str,
restic_password_file: str,
exclude_snapnames_regex: str,
dry_run: bool):
self.restic_repo_prefix: str = restic_repo_prefix.rstrip("/")
self.zfs_dataset_common_prefix: str = zfs_dataset_common_prefix
self.restic_password_file: str = restic_password_file
self.exclude_snapnames_regex = re.compile(exclude_snapnames_regex)
self.dry_run: bool = dry_run
self._dry_run_finished_backups: List[Dict[str, Any]] = []

Expand All @@ -62,7 +77,7 @@ class Backuper:
return f"restic {arg_string}"

def _get_dataset_snapshots(self, dataset_name: str) -> List[Dict[str, Any]]:
lines = _eval(f"sudo zfs list -Hp -o name,creation,used,logicalreferenced -t snapshot '{dataset_name}'")
lines = _eval(f"zfs list -Hp -o name,creation,used,logicalreferenced -t snapshot '{dataset_name}'")
snapshots: List[Dict[str, Any]] = []
for line in lines.split("\n"):
if len(line) == 0:
Expand All @@ -77,15 +92,18 @@ class Backuper:
snapshots.append(snapshot)
snapshots_with_size = []
for i, snapshot in enumerate(snapshots):
snapshot_name = snapshot["name"]
if self.exclude_snapnames_regex.match(snapshot["name"]):
print(f"Not considering snapshot {dataset_name}@{snapshot_name} - excluded by regex")
continue
if i == 0 or snapshots[i - 1]["used"] != 0:
snapshots_with_size.append(snapshot)
continue
parent_name = snapshots[i - 1]["name"]
snapshot_name = snapshot["name"]
if "0\n" != _eval(f"zfs diff {dataset_name}@{parent_name} {dataset_name}@{snapshot_name} 2>&1 | head -c1 | wc -c"):
snapshots_with_size.append(snapshot)
continue
print(F"Not considering snapshot {dataset_name}@{snapshot_name} because of zero diff.")
print(f"Not considering snapshot {dataset_name}@{snapshot_name} because of zero diff.")
return snapshots_with_size

def _get_snapshot_tag(self, datum: Dict[str, Any]) -> str:
Expand Down Expand Up @@ -113,7 +131,7 @@ class Backuper:
return repo_name, path_in_restic_repo

def _init_restic_repo(self, restic_repo):
result = _eval(self._restic_cmd(restic_repo, "cat", ["config"]), void_stderr=True)
result = _eval(self._restic_cmd(restic_repo, "cat", ["config"]), stderr=subprocess.DEVNULL, check=False)
if "chunker_polynomial" not in result:
print(f"Initializing restic repo {restic_repo}.")
_run(self._restic_cmd(restic_repo, "init"))
Expand All @@ -125,7 +143,7 @@ class Backuper:
_run(self._restic_cmd(restic_repo, "check"))

def _pre(self, dataset_name):
_run(f"zfs mount {dataset_name}")
_run(f"zfs mount {dataset_name}", check=False)
restic_repo, _ = self._get_repo_name_and_path(dataset_name)
self._init_restic_repo(restic_repo)

Expand All @@ -142,16 +160,20 @@ class Backuper:

snapshot_time_readable = str(datetime.datetime.fromtimestamp(snapshot["creation"]))

# Hack to allow colon in snapname
tmpdir = _eval("mktemp -d").strip()
_run(f"ln -s {snapshot_path} {tmpdir}/snapshot")

# Use proot to "mount" coorect path. See https://github.com/restic/restic/issues/2092
proot_command = f"proot -b '{snapshot_path}':'{path_in_restic_repo}'"
proot_command = f"proot -b '{tmpdir}/snapshot':'{path_in_restic_repo}'"
logical_referenced = snapshot["logicalreferenced"]
tags = [f"{SNAPSHOT_TAG}{snapshot_name}",
f"{LOGICAL_REFERENCED_TAG}{logical_referenced}"]
tags_with_flag = []
for tag in tags:
tags_with_flag.append("--tag")
tags_with_flag.append(tag)
restic_backup_args = ["--ignore-ctime", "--time", snapshot_time_readable, "--compression", "max"] + tags_with_flag
restic_backup_args = ["--ignore-ctime", "--time", snapshot_time_readable, "--compression", "max", "--exclude-caches"] + tags_with_flag
if parent_restic_snapshot_id is not None:
restic_backup_args += ["--parent", parent_restic_snapshot_id]
restic_backup_args.append(path_in_restic_repo)
Expand Down Expand Up @@ -229,7 +251,7 @@ class Backuper:
print(F"Skipping snapshot {dataset_name}@{snapshot_name} because it does not need to be kept according to the policy.")
continue
if snapshot_name in snapshot_names_in_restic:
print(F"Skipping snapshot {dataset_name}@{snapshot_name} because it's already migrated.")
print(F"Skipping snapshot {dataset_name}@{snapshot_name} because it's already uploaded.")
continue
return snapshot
return None
Expand All @@ -243,7 +265,7 @@ class Backuper:

snapshot = self._find_next_snapshot(dataset_name, snapshots, snapshots_in_restic, keep_last_n, keep_weekly_n, keep_monthly_n)
if snapshot is None:
print(f"No further snapshots need to backuped for {dataset_name}.")
print(f"No further snapshots need to be uploaded for {dataset_name}.")
return None

parent_restic_snapshot_id = None
Expand Down Expand Up @@ -278,15 +300,20 @@ def main():
if os.geteuid() != 0:
print("Please run as root.")
exit(1)
parser = argparse.ArgumentParser(description='Migrate zfs backups to restic.')

sys.stdout = Unbuffered(sys.stdout)

parser = argparse.ArgumentParser(description='Upload ZFS snapshots to restic.')
parser.add_argument('-r', '--restic-repo-prefix', required=True,
help='The prefix used for the restic repo. It is appended with the dataset name.')
parser.add_argument('-c', '--zfs-dataset-common-prefix', default="",
help='The prefix which should be removed from each dataset name for use in the restic repo. Eg. backup01')
parser.add_argument('-p', '--restic-password-file', required=True,
help='The path to the restic password file.')
parser.add_argument('--exclude-snapnames-regex', required=False,
help='Do not consider ZFS snapshots whose snapname matches this regex for uploading.')
parser.add_argument('--dry-run', required=False, action='store_true',
help='Perform a dryrun, do not backup anything.')
help='Perform a dry-run, do not backup anything.')

subparsers = parser.add_subparsers(title='commands', description="The command to run", required=True, dest='subparser_name')

Expand All @@ -308,9 +335,10 @@ def main():
parser_next_snapshot.add_argument('--keep-monthly-n', default=None, type=int,
help="Keep the last n monthly snapshots. A monthly snapshot is the newest snapshot in a month. Defaults to all")

parser_single_dataset = subparsers.add_parser('dataset', help='Backup all snapshots of a dataset')
parser_single_dataset.add_argument('dataset_name',
help="The name of the dataset to backup.")
parser_single_dataset = subparsers.add_parser('dataset', help='Backup all snapshots of one or multiple dataset(s)')
parser_single_dataset.add_argument('dataset_names',
help="The name(s) of the dataset(s) to backup. Multiple datasets can be given as separate consecutive arguments.",
nargs="+")
parser_single_dataset.add_argument('--keep-last-n', default=None, type=int,
help="Keep the last n snapshots. Defaults to all")
parser_single_dataset.add_argument('--keep-weekly-n', default=None, type=int,
Expand All @@ -320,7 +348,7 @@ def main():

args = parser.parse_args()

backuper = Backuper(restic_repo_prefix=args.restic_repo_prefix, zfs_dataset_common_prefix=args.zfs_dataset_common_prefix, restic_password_file=args.restic_password_file, dry_run=args.dry_run)
backuper = Backuper(restic_repo_prefix=args.restic_repo_prefix, zfs_dataset_common_prefix=args.zfs_dataset_common_prefix, restic_password_file=args.restic_password_file, exclude_snapnames_regex=args.exclude_snapnames_regex, dry_run=args.dry_run)

if args.subparser_name == "single_snapshot":
if args.parent_snapshot is None:
Expand All @@ -329,7 +357,18 @@ def main():
elif args.subparser_name == "next_snapshot_in_dataset":
backuper.backup_next_snapshot_from_dataset(dataset_name=args.dataset_name, keep_last_n=args.keep_last_n, keep_weekly_n=args.keep_weekly_n, keep_monthly_n=args.keep_monthly_n)
elif args.subparser_name == "dataset":
backuper.backup_dataset(dataset_name=args.dataset_name, keep_last_n=args.keep_last_n, keep_weekly_n=args.keep_weekly_n, keep_monthly_n=args.keep_monthly_n)
for dataset_name in args.dataset_names:
print(f"Start processing dataset {dataset_name}")
exceptions = {}
try:
backuper.backup_dataset(dataset_name=dataset_name, keep_last_n=args.keep_last_n, keep_weekly_n=args.keep_weekly_n, keep_monthly_n=args.keep_monthly_n)
except Exception as e:
exceptions[dataset_name] = e
if exceptions:
print(f"Error: there were errors for some datasets:")
for dataset_name, e in exceptions.items():
print(f" While processing dataset {dataset_name}:\n{e}")



if __name__ == "__main__":
Expand Down
3 changes: 3 additions & 0 deletions handlers/main.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
- name: Reload systemd configuration
service:
daemon_reload: True
52 changes: 51 additions & 1 deletion tasks/main.yml
Original file line number Diff line number Diff line change
@@ -1,2 +1,52 @@
---
- name: Install dependencies
apt:
name:
- proot
- python3-udatetime
- restic

- name: Install zfs-restic-uploader
copy:
src: zfs-restic-uploader
dest: /opt/zfs-restic-uploader
mode: 0755

- name: Create config directory
file:
path: /etc/zfs-restic-uploader
state: directory

- name: Place restic password
copy:
content: "{{ zru_restic_repo_password }}"
dest: /etc/zfs-restic-uploader/restic-password
mode: 0600

- name: Place env file
template:
src: env.j2
dest: /etc/zfs-restic-uploader/env
mode: 0600

- name: Create systemd service
template:
src: service.j2
dest: /etc/systemd/system/zfs-restic-uploader.service
mode: 0644
notify:
- Reload systemd configuration

- name: Create systemd timer
template:
src: timer.j2
dest: /etc/systemd/system/zfs-restic-uploader.timer
mode: 0644
notify:
- Reload systemd configuration

- meta: flush_handlers

- name: Enable zfs-restic-uploader timer
systemd:
name: zfs-restic-uploader.timer
enabled: true
4 changes: 4 additions & 0 deletions templates/env.j2
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# {{ ansible_managed }}
AWS_ACCESS_KEY_ID={{ zru_access_key_id | quote }}
AWS_SECRET_ACCESS_KEY={{ zru_secret_access_key | quote }}
XDG_CACHE_HOME={{ zru_cache_directory }}
19 changes: 19 additions & 0 deletions templates/service.j2
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# {{ ansible_managed }}
[Unit]
Description=upload ZFS snapshots to restic repository
Requires=zfs.target
After=zfs.target

[Service]
Type=oneshot
EnvironmentFile=/etc/zfs-restic-uploader/env
ExecStart=/opt/zfs-restic-uploader \
-r {{ zru_restic_repo_prefix | quote }} \
-p /etc/zfs-restic-uploader/restic-password \
-c {{ zru_zfs_dataset_common_prefix | quote }} \
--exclude-snapnames-regex {{ zru_exclude_snapnames_regex | quote }} \
dataset {{ zru_zfs_datasets | map("quote") | join(" ") }} \
--keep-last-n {{ zru_keep_last_n | quote }} \
--keep-weekly-n {{ zru_keep_weekly_n | quote }} \
--keep-monthly-n {{ zru_keep_monthly_n | quote }}
Restart=on-failure
10 changes: 10 additions & 0 deletions templates/timer.j2
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
# {{ ansible_managed }}
[Unit]
Description=upload ZFS snapshots to restic repository

[Timer]
OnCalendar={{ zru_schedule }}
Unit=kopia.service

[Install]
WantedBy=timers.target

0 comments on commit c75044f

Please sign in to comment.