Skip to content

Commit

Permalink
Improve logging (#43)
Browse files Browse the repository at this point in the history
Only log the output of check commands if they actually did output
something. And improve the service status logging to be more easily
readable/trackable by humans, especially with multiple services. Also
fixed a copy&paste mistake with the down-count bug log message.
  • Loading branch information
corubba authored Jan 20, 2024
1 parent added65 commit 7d21f5c
Showing 1 changed file with 14 additions and 8 deletions.
22 changes: 14 additions & 8 deletions anycast_healthchecker/servicecheck.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,8 +130,10 @@ def _run_check(self):
self.metric_check_duration.labels(**self.labels).set(duration)

if proc.returncode != 0:
self.log.info("stderr from the check %s", errs)
self.log.info("stdout from the check %s", outs)
if errs:
self.log.info("stderr from the check %s", errs)
if outs:
self.log.info("stdout from the check %s", outs)

return proc.returncode

Expand Down Expand Up @@ -305,21 +307,23 @@ def _run(self):
if check_status == 0:
if up_cnt == (self.config['check_rise'] - 1):
self.extra['status'] = 'up'
self.log.info("status UP", extra=self.extra)
self.metric_state.labels(**self.labels).set(check_status)
# Service exceeded all consecutive checks. Set its state
# accordingly and put an item in queue. But do it only if
# previous state was different, to prevent unnecessary bird
# reloads when a service flaps between states.
if check_state != 'UP':
check_state = 'UP'
self.log.info("changed to UP", extra=self.extra)
self.log.info("adding %s in the queue",
self.ip_with_prefixlen,
extra=self.extra)
self.action.put(self.add_operation)
else:
self.log.info("status UP", extra=self.extra)
elif up_cnt < self.config['check_rise']:
up_cnt += 1
self.log.info("going up %s", up_cnt, extra=self.extra)
self.log.info("going up %s/%s", up_cnt, self.config['check_rise'], extra=self.extra)
else:
self.log.error("up_cnt is higher %s, it's a BUG!",
up_cnt,
Expand All @@ -328,7 +332,6 @@ def _run(self):
else:
if down_cnt == (self.config['check_fail'] - 1):
self.extra['status'] = 'down'
self.log.info("status DOWN", extra=self.extra)
# Service exceeded all consecutive checks.
# Set its state accordingly and put an item in queue.
# But do it only if previous state was different, to
Expand All @@ -337,16 +340,19 @@ def _run(self):
self.metric_state.labels(**self.labels).set(check_status)
if check_state != 'DOWN':
check_state = 'DOWN'
self.log.info("changed to DOWN", extra=self.extra)
self.log.info("adding %s in the queue",
self.ip_with_prefixlen,
extra=self.extra)
self.action.put(self.del_operation)
else:
self.log.info("status DOWN", extra=self.extra)
elif down_cnt < self.config['check_fail']:
down_cnt += 1
self.log.info("going down %s", down_cnt, extra=self.extra)
self.log.info("going down %s/%s", down_cnt, self.config['check_fail'], extra=self.extra)
else:
self.log.error("up_cnt is higher %s, it's a BUG!",
up_cnt,
self.log.error("down_cnt is higher %s, it's a BUG!",
down_cnt,
extra=self.extra)
up_cnt = 0

Expand Down

0 comments on commit 7d21f5c

Please sign in to comment.