-
Notifications
You must be signed in to change notification settings - Fork 3
/
check_restic.py
175 lines (145 loc) · 6.11 KB
/
check_restic.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
#!/usr/bin/env python3
import argparse
import logging
import subprocess
import json
import datetime
import dateutil.parser
import nagiosplugin
import os
_log = logging.getLogger('nagiosplugin')
class Restic(nagiosplugin.Resource):
def __init__(self, restic_bin='restic', host=None, path=None, repo=None,
password_file=None, sudo=False):
self.restic_bin = restic_bin
self.host = host
self.path = path
self.repo = repo
self.password_file = password_file
self.sudo = sudo
def probe(self):
"""
Run restic and parse its output
:return:
"""
# For some reason, check.main() is the only place where exceptions are
# printed nicely
if not self.repo and not os.environ.get('RESTIC_REPOSITORY'):
raise nagiosplugin.CheckError(
'Please specify repository location (-r, --repo or '
'$RESTIC_REPOSITORY)')
if not self.password_file and \
not (os.environ.get('RESTIC_PASSWORD') or
os.environ.get('RESTIC_PASSWORD_FILE')):
raise nagiosplugin.CheckError(
'Please specify password or its location (-p, --password-file,'
' $RESTIC_PASSWORD or $RESTIC_PASSWORD_FILE)')
cmd = [self.restic_bin, 'snapshots', '--json', '--no-lock', '--latest', '1']
if self.sudo:
cmd = ['sudo'] + cmd
if self.host:
cmd.extend(['--host', self.host])
if self.path:
cmd.extend(['--path', self.path])
if self.repo:
cmd.extend(['--repo', self.repo])
if self.password_file:
cmd.extend(['--password-file', self.password_file])
_log.info('Using command: %s' % ' '.join(cmd))
try:
restic_result = subprocess.check_output(cmd,
stderr=subprocess.STDOUT)
except subprocess.CalledProcessError as e:
raise nagiosplugin.CheckError('Failed to run %s: %s' % (
' '.join(cmd), e.output.decode()))
except IOError as e:
raise nagiosplugin.CheckError('Failed to run %s: %s' % (
' '.join(cmd), e))
_log.debug('Got output: %s' % restic_result)
try:
snapshots = json.loads(restic_result)
except json.decoder.JSONDecodeError as e:
raise nagiosplugin.CheckError(
'Unable to parse restic output: %s' % e)
_log.debug('Output decoded to: %s' % snapshots)
if not snapshots:
raise nagiosplugin.CheckError('Could not find snapshots')
last_snapshots = {}
while True:
try:
e = next(e for e in snapshots if '_'.join(e['paths']) not in
last_snapshots.keys())
last_snapshots['_'.join(e['paths'])] = e
except StopIteration:
break
for path, snapshot in last_snapshots.items():
snapshot_age = datetime.datetime.now(datetime.UTC) - \
dateutil.parser.parse(snapshot['time'])
snapshot_age = snapshot_age.total_seconds() / (60*60)
yield nagiosplugin.Metric(path, snapshot_age, uom='h',
context='last_snapshot_age')
class ResticSummary(nagiosplugin.Summary):
def ok(self, results):
"""
Show all results in the output
:param results:
:return:
"""
ret = ['%s is %.2f hours old' % (
r.metric.name, r.metric.value) for r in results]
return 'Snapshot %s' % ', '.join(ret)
def problem(self, results):
"""
Show only the results that have crossed the threshold
:param results:
:return:
"""
if results.results[0].state == nagiosplugin.Unknown:
return results.results[0].hint
ret = ['%s is %.2f hours old' % (r.metric.name, r.metric.value)
for r in results if r.state != nagiosplugin.Ok]
return 'Snapshot %s' % ', '.join(ret)
@nagiosplugin.guarded
def main():
argp = argparse.ArgumentParser(description=__doc__)
argp.add_argument(
'--sudo', action='store_true',
help='Use "sudo" when invoking restic (default: %(default)s)')
argp.add_argument(
'--restic-bin', type=str, metavar='RESTIC-BIN', default='restic',
help='Path to the restic binary, or the name of restic in $PATH '
'(default: %(default)s)')
argp.add_argument(
'-w', '--warning', metavar='HOURS', type=int, default=25,
help='Snapshots older than HOURS are WARNING (default: %(default)s)')
argp.add_argument(
'-c', '--critical', metavar='HOURS', type=int, default=49,
help='Snapshots older than HOURS are CRITICAL (default: %(default)s)')
argp.add_argument('-H', '--host', metavar='HOST',
help='only consider snapshots for this host')
argp.add_argument('--path', metavar='PATH',
help='only consider snapshots for this path')
argp.add_argument(
'-r', '--repo', metavar='REPO',
help='repository to check backups (default: $RESTIC_REPOSITORY)')
argp.add_argument(
'-p', '--password-file', metavar='PASSWORD_FILE',
help='read the repository password from a file (default: '
'$RESTIC_PASSWORD_FILE)')
argp.add_argument('-v', '--verbose', action='count', default=0,
help='increase output verbosity (use up to 3 times)')
argp.add_argument(
'-t', '--timeout', metavar='SECONDS', type=int, default=10,
help='Plugin timeout in seconds (default: %(default)s)')
args = argp.parse_args()
check = nagiosplugin.Check(
Restic(restic_bin=args.restic_bin, host=args.host, path=args.path,
repo=args.repo, password_file=args.password_file,
sudo=args.sudo),
nagiosplugin.ScalarContext('last_snapshot_age',
args.warning, args.critical),
ResticSummary(),
)
check.main(verbose=args.verbose, timeout=args.timeout)
if __name__ == '__main__':
main()