From 4c411212cd2df91537013a5ed8583a9803c87bc0 Mon Sep 17 00:00:00 2001 From: Idriss Neumann Date: Tue, 24 Dec 2024 14:17:36 +0100 Subject: [PATCH] Add TCP checks --- README.md | 4 +- VERSION | 2 +- src/utils/monitor.py | 100 ++++++++++++++++++++++++++++++++++++------- 3 files changed, 87 insertions(+), 19 deletions(-) diff --git a/README.md b/README.md index 4ae355c..f96e01d 100644 --- a/README.md +++ b/README.md @@ -244,9 +244,9 @@ In order to use that, just override the `/app/imalive.yml` with the following co ```yaml --- monitors: - - type: http + - type: http # only http and tcp are supported name: imalive - url: http://localhost:8081 + url: http://localhost:8081 # if it's a tcp check, it must looks like host:port method: POST # optional (GET by default, only POST, PUT and GET are supported) body: '{"foo": "bar"}' # optional (body is ignored if method is GET) check_tls: false # optional (true by default) diff --git a/VERSION b/VERSION index 249b676..ee74734 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -4.0.21 +4.1.0 diff --git a/src/utils/monitor.py b/src/utils/monitor.py index 7ee1614..0afad36 100644 --- a/src/utils/monitor.py +++ b/src/utils/monitor.py @@ -4,6 +4,8 @@ import requests import asyncio import threading +import socket +import time import requests import yaml @@ -22,7 +24,7 @@ def check_status_code_pattern(actual_code, pattern): regexp = "^{}$".format(pattern.replace('*', '[0-9]+')) return bool(re.match(regexp, str(actual_code))) -def check_http_monitor(monitor, gauges): +def init_vars_monitor(monitor): vdate = datetime.now() labels = { @@ -30,37 +32,101 @@ def check_http_monitor(monitor, gauges): 'family': monitor['family'] if is_not_empty_key(monitor, 'family') else monitor['name'] } - if monitor['type'] != 'http': - log_msg("DEBUG", { - "status": "ok", + pmonitor = monitor.copy() + del_key_if_exists(pmonitor, 'username') + del_key_if_exists(pmonitor, 'password') + + timeout = get_or_else(monitor, 'timeout', 30) + level = get_or_else(monitor, 'level', 'DEBUG') + if level not in ['INFO', 'DEBUG']: + level = 'DEBUG' + + return vdate, labels, pmonitor, level, timeout + +def fail_monitor(monitor, gauges): + vdate, labels, pmonitor, _, _ = init_vars_monitor(monitor) + + type_monitor = monitor['type'] if is_not_empty_key(monitor, 'type') else "undefined" + log_msg("ERROR", { + "status": "ko", + "type": "monitor", + "time": vdate.isoformat(), + "message": "Bad configuration of monitor: name = {}, type = {}".format(monitor['name'], type_monitor), + "monitor": pmonitor + }) + set_gauge(gauges['result'], 0, {**labels, 'kind': 'result'}) + +def check_tcp_monitor(monitor, gauges): + vdate, labels, pmonitor, level, timeout = init_vars_monitor(monitor) + + if is_empty_key(monitor, 'url'): + log_msg("ERROR", { + "status": "ko", + "type": "monitor", + "time": vdate.isoformat(), + "message": "Missing mandatory url", + "monitor": pmonitor + }) + set_gauge(gauges['result'], 0, {**labels, 'kind': 'result'}) + return + + if not re.match(r"^[a-zA-Z0-9.-]+:\d+$", monitor['url']): + log_msg("ERROR", { + "status": "ko", "type": "monitor", "time": vdate.isoformat(), - "message": "Not an http monitor", - "monitor": monitor + "message": "Incorrect url (expected host:port): actual = {}".format(monitor['url']), + "monitor": pmonitor }) set_gauge(gauges['result'], 0, {**labels, 'kind': 'result'}) return + host, port = monitor['url'].split(":") + port = int(port) + + try: + start_time = time.time() + with socket.create_connection((host, port), timeout=timeout): + duration = time.time() - start_time + set_gauge(gauges['result'], 1, {**labels, 'kind': 'result'}) + log_msg(level, { + "status": "ok", + "type": "monitor", + "time": vdate.isoformat(), + "duration": duration, + "message": "Monitor is healthy", + "monitor": pmonitor + }) + except (socket.timeout, ConnectionRefusedError, socket.error) as e: + duration = time.time() - start_time + log_msg("ERROR", { + "status": "ko", + "type": "monitor", + "time": vdate.isoformat(), + "message": "Unable to open connection, e.type = {}, e.msg = {}".format(type(e), e), + "monitor": pmonitor + }) + set_gauge(gauges['result'], 0, {**labels, 'kind': 'result'}) + +def check_http_monitor(monitor, gauges): + vdate, labels, pmonitor, level, timeout = init_vars_monitor(monitor) + if is_empty_key(monitor, 'url'): log_msg("ERROR", { "status": "ko", "type": "monitor", "time": vdate.isoformat(), "message": "Missing mandatory url", - "monitor": monitor + "monitor": pmonitor }) set_gauge(gauges['result'], 0, {**labels, 'kind': 'result'}) return method = get_or_else(monitor, 'method', 'GET') - timeout = get_or_else(monitor, 'timeout', 30) expected_http_code = get_or_else(monitor, 'expected_http_code', '20*') expected_contain = get_or_else(monitor, 'expected_contain', None) body = get_or_else(monitor, 'body', None) check_tls = is_true(get_or_else(monitor, 'check_tls', True)) - level = get_or_else(monitor, 'level', 'DEBUG') - if level not in ['INFO', 'DEBUG']: - level = 'DEBUG' duration = None auth = None @@ -74,10 +140,6 @@ def check_http_monitor(monitor, gauges): if is_not_empty_key(header, 'name') and is_not_empty_key(header, 'value'): headers[sanitize_header_name(header['name'])] = header['value'] - pmonitor = monitor.copy() - del_key_if_exists(pmonitor, 'username') - del_key_if_exists(pmonitor, 'password') - try: if method == "GET": response = requests.get(monitor['url'], auth=auth, headers=headers, timeout=timeout, verify=check_tls) @@ -169,7 +231,13 @@ def loop_monitors(): for monitor in loaded_data['monitors']: if is_empty_key(monitor, 'name'): continue - check_http_monitor(monitor, gauges[monitor['name']]) + + if is_not_empty_key(monitor, 'type') and 'http' == monitor['type']: + check_http_monitor(monitor, gauges[monitor['name']]) + elif is_not_empty_key(monitor, 'type') and 'tcp' == monitor['type']: + check_tcp_monitor(monitor, gauges[monitor['name']]) + else: + fail_monitor(monitor, gauges[monitor['name']]) sleep(WAIT_TIME) def start_monitors():