From 49b6e157586eba548e1f6e3446c3d7e2ca252463 Mon Sep 17 00:00:00 2001 From: Joss Whittle Date: Mon, 18 Dec 2023 16:03:57 +0000 Subject: [PATCH] feat(canary): add prometheus push metrics (#36) --- .github/workflows/flake8.yaml | 2 +- .../canary/charts/crds/crds/http-monitor.yaml | 11 ++ charts/canary/templates/_helpers/common.tpl | 10 - .../templates/controller/daemonset.yaml | 93 ++++++++++ .../templates/controller/deployment.yaml | 53 +++++- .../templates/monitors/example-monitor.yaml | 10 +- charts/canary/values.yaml | 28 +++ src/canary.py | 172 +++++------------- src/canary/__init__.py | 0 src/canary/controller.py | 139 ++++++++++++++ src/canary/monitor.py | 164 +++++++++++++++++ src/canary/utils/__init__.py | 0 src/canary/utils/click.py | 17 ++ src/requirements.txt | 3 +- 14 files changed, 561 insertions(+), 141 deletions(-) create mode 100644 charts/canary/templates/controller/daemonset.yaml create mode 100644 src/canary/__init__.py create mode 100644 src/canary/controller.py create mode 100644 src/canary/monitor.py create mode 100644 src/canary/utils/__init__.py create mode 100644 src/canary/utils/click.py diff --git a/.github/workflows/flake8.yaml b/.github/workflows/flake8.yaml index 3268d95..5a340a6 100644 --- a/.github/workflows/flake8.yaml +++ b/.github/workflows/flake8.yaml @@ -35,4 +35,4 @@ jobs: - name: run flake8 run: | - flake8 --max-complexity 10 --ignore E501 src + flake8 --max-complexity 10 --ignore E501,C901 src diff --git a/charts/canary/charts/crds/crds/http-monitor.yaml b/charts/canary/charts/crds/crds/http-monitor.yaml index 166c5b1..395f067 100644 --- a/charts/canary/charts/crds/crds/http-monitor.yaml +++ b/charts/canary/charts/crds/crds/http-monitor.yaml @@ -18,11 +18,22 @@ spec: properties: url: type: string + description: Url to poll over http(s). + proxy: + type: object + properties: + url: + type: string + description: Url to a http proxy to use when polling the target url. Null inherits the controllers default, empty string explicitly disables proxy. interval: type: integer #seconds status: type: integer default: 200 + required: + - url + - interval + - status scope: Namespaced names: plural: canaryhttpmonitors diff --git a/charts/canary/templates/_helpers/common.tpl b/charts/canary/templates/_helpers/common.tpl index 2bfcc18..5f143bf 100644 --- a/charts/canary/templates/_helpers/common.tpl +++ b/charts/canary/templates/_helpers/common.tpl @@ -101,11 +101,6 @@ The list of `env` vars for canary pods EXAMPLE USAGE: {{ include "canary.env" (dict "Release" .Release "Values" .Values "extraEnv" $extraEnv) }} */}} {{- define "canary.env" }} -{{- /* user-defined (global) */ -}} -{{- if .Values.canary.extraEnv }} -{{ toYaml .Values.canary.extraEnv }} -{{- end }} - {{- /* user-defined */ -}} {{- if .extraEnv }} {{ toYaml .extraEnv }} @@ -117,11 +112,6 @@ The list of `envFrom` vars for canary pods EXAMPLE USAGE: {{ include "canary.envFrom" (dict "Release" .Release "Values" .Values "extraEnvFrom" $extraEnvFrom) }} */}} {{- define "canary.envFrom" }} -{{- /* user-defined (global) */ -}} -{{- if .Values.canary.extraEnvFrom }} -{{ toYaml .Values.canary.extraEnvFrom }} -{{- end }} - {{- /* user-defined */ -}} {{- if .extraEnvFrom }} {{ toYaml .extraEnvFrom }} diff --git a/charts/canary/templates/controller/daemonset.yaml b/charts/canary/templates/controller/daemonset.yaml new file mode 100644 index 0000000..9c82aa0 --- /dev/null +++ b/charts/canary/templates/controller/daemonset.yaml @@ -0,0 +1,93 @@ +{{- if and .Values.controller.enabled (eq .Values.controller.mode "daemonset") }} +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: {{ include "canary.fullname" . }}-controller + namespace: {{ .Release.Namespace }} + + labels: + app: {{ include "canary.labels.app" . }} + component: controller + chart: {{ include "canary.labels.chart" . }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} + {{- range $key, $value := .Values.controller.labels }} + {{ $key }}: {{ $value | quote }} + {{- end }} + annotations: + {{- range $key, $value := .Values.controller.annotations }} + {{ $key }}: {{ $value | quote }} + {{- end }} + +spec: + + selector: + matchLabels: + app: {{ include "canary.labels.app" . }} + component: controller + release: {{ .Release.Name }} + + updateStrategy: + {{- if eq .Values.controller.daemonset.updateStrategy "RollingUpdate" }} + rollingUpdate: + maxUnavailable: {{ .Values.controller.daemonset.maxUnavailable }} + {{- end }} + type: {{ .Values.controller.daemonset.updateStrategy }} + + template: + metadata: + labels: + app: {{ include "canary.labels.app" . }} + component: controller + chart: {{ include "canary.labels.chart" . }} + release: {{ .Release.Name }} + heritage: {{ .Release.Service }} + {{- range $key, $value := .Values.controller.labels }} + {{ $key }}: {{ $value | quote }} + {{- end }} + annotations: + {{- range $key, $value := .Values.controller.podAnnotations }} + {{ $key }}: {{ $value | quote }} + {{- end }} + {{- range $key, $value := .Values.controller.annotations }} + {{ $key }}: {{ $value | quote }} + {{- end }} + + spec: + + restartPolicy: Always + serviceAccountName: canaryhttpmonitor + + tolerations: {{ toYaml .Values.controller.tolerations | nindent 8 }} + nodeSelector: {{ toYaml .Values.controller.nodeSelector | nindent 8 }} + affinity: {{ toYaml .Values.controller.affinity | nindent 8 }} + terminationGracePeriodSeconds: {{ .Values.controller.terminationGracePeriod }} + + containers: + - name: controller + {{- include "canary.image" (dict "image" .Values.controller.image) | indent 10 }} + + ports: + - containerPort: 8080 + + envFrom: + {{- include "canary.envFrom" (dict "Release" .Release "Values" .Values "extraEnv" .Values.controller.extraEnvFrom) | indent 12 }} + env: + # Make the controller aware of where it is in the cluster + - name: CANARY_K8S_NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + - name: CANARY_K8S_POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: CANARY_K8S_POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: CANARY_K8S_RELEASE_NAME + value: {{ .Release.Name }} + {{- include "canary.env" (dict "Release" .Release "Values" .Values "extraEnv" .Values.controller.extraEnv) | indent 12 }} + +{{- end }} diff --git a/charts/canary/templates/controller/deployment.yaml b/charts/canary/templates/controller/deployment.yaml index cc000c9..5a1bfd9 100644 --- a/charts/canary/templates/controller/deployment.yaml +++ b/charts/canary/templates/controller/deployment.yaml @@ -1,4 +1,4 @@ -{{- if .Values.controller.enabled }} +{{- if and .Values.controller.enabled (eq .Values.controller.mode "deployment") }} apiVersion: apps/v1 kind: Deployment metadata: @@ -11,9 +11,16 @@ metadata: chart: {{ include "canary.labels.chart" . }} release: {{ .Release.Name }} heritage: {{ .Release.Service }} + {{- range $key, $value := .Values.controller.labels }} + {{ $key }}: {{ $value | quote }} + {{- end }} + annotations: + {{- range $key, $value := .Values.controller.annotations }} + {{ $key }}: {{ $value | quote }} + {{- end }} spec: - replicas: 1 + replicas: {{ .Values.controller.deployment.replicas }} selector: matchLabels: @@ -26,15 +33,55 @@ spec: labels: app: {{ include "canary.labels.app" . }} component: controller + chart: {{ include "canary.labels.chart" . }} release: {{ .Release.Name }} + heritage: {{ .Release.Service }} + {{- range $key, $value := .Values.controller.labels }} + {{ $key }}: {{ $value | quote }} + {{- end }} + annotations: + {{- range $key, $value := .Values.controller.podAnnotations }} + {{ $key }}: {{ $value | quote }} + {{- end }} + {{- range $key, $value := .Values.controller.annotations }} + {{ $key }}: {{ $value | quote }} + {{- end }} spec: + restartPolicy: Always + serviceAccountName: canaryhttpmonitor + + tolerations: {{ toYaml .Values.controller.tolerations | nindent 8 }} + nodeSelector: {{ toYaml .Values.controller.nodeSelector | nindent 8 }} + affinity: {{ toYaml .Values.controller.affinity | nindent 8 }} + terminationGracePeriodSeconds: {{ .Values.controller.terminationGracePeriod }} containers: - name: controller {{- include "canary.image" (dict "image" .Values.controller.image) | indent 10 }} - serviceAccountName: canaryhttpmonitor + ports: + - containerPort: 8080 + + envFrom: + {{ include "canary.envFrom" (dict "extraEnv" .Values.controller.extraEnvFrom) | indent 12 }} + env: + # Make the controller aware of where it is in the cluster + - name: CANARY_K8S_NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + - name: CANARY_K8S_POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: CANARY_K8S_POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: CANARY_K8S_RELEASE_NAME + value: {{ .Release.Name }} + {{- include "canary.env" (dict "extraEnv" .Values.controller.extraEnv) | indent 12 }} {{- end }} diff --git a/charts/canary/templates/monitors/example-monitor.yaml b/charts/canary/templates/monitors/example-monitor.yaml index 71cc287..a29c211 100644 --- a/charts/canary/templates/monitors/example-monitor.yaml +++ b/charts/canary/templates/monitors/example-monitor.yaml @@ -1,9 +1,11 @@ +{{- if .Values.monitor.example.enabled }} apiVersion: "canary.ukserp.ac.uk/v1" kind: CanaryHTTPMonitor metadata: - name: github + name: github-canary namespace: {{ .Release.Namespace }} spec: - url: https://api.github.com/octocat - interval: 30 #seconds - + url: https://github.com/SwanseaUniversityMedical/Canary + interval: 60 #seconds + status: 200 +{{- end }} \ No newline at end of file diff --git a/charts/canary/values.yaml b/charts/canary/values.yaml index d635781..ff5233a 100644 --- a/charts/canary/values.yaml +++ b/charts/canary/values.yaml @@ -4,6 +4,30 @@ crds: controller: enabled: true + tolerations: [] + nodeSelector: {} + affinity: {} + terminationGracePeriod: 15 + + labels: {} + annotations: {} + extraEnv: [] + extraEnvFrom: [] + + podAnnotations: + prometheus.io/scrape: "true" + prometheus.io/path: /metrics + prometheus.io/port: "8080" + + mode: deployment + + deployment: + replicas: 1 + + daemonset: + updateStrategy: RollingUpdate + maxUnavailable: 1 + image: repository: harbor.ukserp.ac.uk/canary/canary tag: 1.5.0 @@ -11,3 +35,7 @@ controller: pullSecret: "" uid: 1001 gid: 1001 + +monitor: + example: + enabled: false diff --git a/src/canary.py b/src/canary.py index 0215699..efdaad3 100644 --- a/src/canary.py +++ b/src/canary.py @@ -1,14 +1,9 @@ -import asyncio -import json - -import aiohttp - import logging import click -import kubernetes_asyncio.watch -from kubernetes_asyncio import client, config -from kubernetes_asyncio.client.api_client import ApiClient -from kubernetes_asyncio.client import Configuration +import asyncio + +from canary.utils.click import URL +from canary.controller import Controller logging.basicConfig( level=logging.DEBUG, @@ -16,121 +11,54 @@ ) -async def monitor_url(name, url, interval, statuses): - """ - Monitors a given url at a regular interval and logs the result to prometheus. - This co-routine loops forever unless it is externally cancelled, such as to recreate it with new settings. - """ - - logging.info(f"starting [{name=}]") - try: - while True: - logging.debug(f"polling [{name=}] [{interval=}] [{url=}]") - - # Spawn a task to track the minimum amount of time to the next iteration and return immediately - interval_task = asyncio.create_task(asyncio.sleep(interval)) - - try: - # Poll the url - async with aiohttp.ClientSession() as session: - async with session.get(url) as response: - status = response.status - - # Check if the status code was acceptible - healthy = status in statuses - logging.info( - f"polled [{name=}] [{interval=}] [{url=}] [{status=}] [{healthy=}]" - ) - - # Write to Prometheus - # TODO export metrics to prometheus - - except Exception as ex: - logging.exception(f"poll error [{name=}]", exc_info=ex) - - # Await the minimum interval, returns immediately if it's already passed - await interval_task - - except asyncio.CancelledError: - logging.info(f"cancelled [{name=}]") - finally: - logging.info(f"halting [{name=}]") - - -async def watch_events(*args, **kwargs): - # conf = Configuration() - # conf.http_proxy_url = "http://192.168.10.15:8080" - # await config.load_kube_config(client_configuration=conf) - config.load_incluster_config() - - logging.info("starting watcher") - logging.debug(args) - logging.debug(kwargs) - - logging.info("listening for events") - tasks = dict() - runningTasks = dict() - - try: - while True: - logging.info("checking for updates on the cluster") - async with ApiClient() as api: - crds = client.CustomObjectsApi(api) - rawmonitors = await crds.list_cluster_custom_object(group="canary.ukserp.ac.uk", version="v1", - plural="canaryhttpmonitors") - rawmonitors = rawmonitors["items"] - monitor_names = [] - for monitor in rawmonitors: - name = monitor["metadata"]["name"] - monitor_names.append(name) - url = monitor["spec"]["url"] - interval = monitor["spec"]["interval"] - if type(monitor["spec"]["status"]) is not list: - statuses = [] - statuses.append(monitor["spec"]["status"]) - else: - statuses = monitor["spec"]["status"] - - if name in tasks and ( - runningTasks[name]['url'] != url or runningTasks[name]['interval'] != interval or runningTasks[name][ - 'statuses'] != statuses): - logging.info(f"cancelling monitor [{name=}]") - tasks[name].cancel() - runningTasks[name].popitem() - await tasks[name] - logging.info(f"spawning monitor [{name=}]") - runningTasks[name] = {'name': name, 'url': url, 'interval': interval, 'statuses': statuses} - tasks[name] = asyncio.create_task( - monitor_url(name, url, interval, statuses)) - - if name not in tasks: - logging.info(f"spawning monitor [{name=}]") - runningTasks[name] = {'name': name, 'url': url, 'interval': interval, 'statuses': statuses} - tasks[name] = asyncio.create_task( - monitor_url(name, url, interval, statuses)) - - if len(rawmonitors) < len(tasks): - for task in runningTasks: - if task['name'] not in monitor_names: - logging.info(f"cancelling monitor [{name=}]") - tasks[task['name']].cancel() - runningTasks[task['name']].popitem() - await asyncio.sleep(30) - - except asyncio.CancelledError: - logging.info("cancelled watcher") - for task in tasks.values(): - task.cancel() - - await asyncio.gather(*tasks.values()) - finally: - logging.info("halting watcher") - - @click.command() +@click.option( + "--k8s-update-interval", + type=click.IntRange(min=5, max_open=True), + default=30, + help="Update interval (seconds) for querying kubernetes api for monitor manifests.", + show_default=True +) +@click.option( + "--k8s-node-name", + type=str, + required=True, + help="Name of the node running the controller pod.", + show_default=True +) +@click.option( + "--k8s-pod-name", + type=str, + required=True, + help="Name of the controller pod.", + show_default=True +) +@click.option( + "--k8s-pod-namespace", + type=str, + required=True, + help="Namespace where the controller is running.", + show_default=True +) +@click.option( + "--k8s-release-name", + type=str, + required=True, + help="Name of the helm release.", + show_default=True +) +@click.option( + "--proxy", + type=URL(), + default=None, + help="URL to a HTTP proxy sever to use by default.", + show_default=True +) def main(*args, **kwargs): - asyncio.run(watch_events(*args, **kwargs)) + logging.info("spawning controller") + asyncio.run(Controller(*args, **kwargs)) + logging.info("halting") if __name__ == "__main__": - main() + main(auto_envvar_prefix='CANARY') diff --git a/src/canary/__init__.py b/src/canary/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/canary/controller.py b/src/canary/controller.py new file mode 100644 index 0000000..4d0d657 --- /dev/null +++ b/src/canary/controller.py @@ -0,0 +1,139 @@ +import logging +import asyncio +# import glob +# import os +# import yaml + +import kubernetes_asyncio as k8s +from prometheus_async.aio.web import start_http_server +from prometheus_client import Gauge + +from .monitor import Monitor + + +async def controller(*args, **kwargs): + + logging.info("controller | starting") + logging.debug(f"controller | {args=}") + logging.debug(f"controller | {kwargs=}") + + update_interval = kwargs["k8s_update_interval"] + proxy = kwargs["proxy"] + labels = dict( + node=kwargs["k8s_node_name"], + pod=kwargs["k8s_pod_name"], + namespace=kwargs["k8s_pod_namespace"], + release=kwargs["k8s_release_name"] + ) + + logging.info("controller | loading kube api config") + k8s.config.load_incluster_config() + + monitors = dict() + + MONITORS_GAUGE = Gauge( + name="canary_monitors", + documentation="Number of monitors being tracked by a canary controller.", + labelnames=list(labels.keys()) + ) + + try: + await start_http_server(port=8080) + + while True: + + logging.info("query kube api for monitors") + async with k8s.client.ApiClient() as api: + crds = k8s.client.CustomObjectsApi(api) + manifests = await crds.list_cluster_custom_object( + group="canary.ukserp.ac.uk", + version="v1", + plural="canaryhttpmonitors" + ) + + # manifest_path = os.path.join( + # os.path.dirname(__file__), + # "../../charts/canary/templates/monitors/*.yaml" + # ) + # manifest_paths = list( + # glob.glob( + # manifest_path + # ) + # ) + # manifests = dict(items=list()) + # for manifest_path in manifest_paths: + # with open(manifest_path, "r") as fp: + # manifest = yaml.safe_load(fp) + # manifests["items"].append(manifest) + # logging.info(manifest) + + # Convert the manifests into a dict keyed on namespace.name + manifests = { + f"{manifest['metadata']['namespace']}." + f"{manifest['metadata']['name']}": manifest + for manifest in manifests["items"] + } + logging.debug(f"discovered {len(manifests)} manifests") + logging.debug(f"running {len(monitors)} monitors") + + # Cancel existing monitors that are not found in the live manifests + for name in list(monitors.keys()): + + if name not in manifests: + logging.info(f"canceling monitor [{name=}]") + try: + monitors[name]["task"].cancel() + await monitors[name]["task"] + finally: + del monitors[name] + + # Create or re-create monitors to match the live manifests + for name, manifest in manifests.items(): + + if (name in monitors) and (monitors[name]["spec"] != manifest["spec"]): + logging.info(f"recreating monitor [{name=}]") + try: + monitors[name]["task"].cancel() + await monitors[name]["task"] + finally: + del monitors[name] + + if name not in monitors: + logging.info(f"spawning monitor [{name=}]") + + # Spawn a coroutine task + task = asyncio.create_task( + Monitor( + name=name, + spec=manifest["spec"], + labels=labels, + proxy=proxy + ) + ) + + # Keep track of the task and the spec that it was spawned from + monitors[name] = dict( + name=name, + spec=manifest["spec"], + task=task + ) + + # Update the metric for how many monitors we are running + MONITORS_GAUGE.labels(**labels).set(len(monitors)) + + # Pause before polling the kube api again + await asyncio.sleep(update_interval) + + except asyncio.CancelledError: + logging.info("cancelled") + + finally: + logging.info("halting") + + for name, monitor in monitors.items(): + logging.info(f"canceling monitor [{name=}]") + monitor["task"].cancel() + + await asyncio.gather(*map(lambda m: m["task"], monitors.values())) + +Controller = controller diff --git a/src/canary/monitor.py b/src/canary/monitor.py new file mode 100644 index 0000000..2bc07ae --- /dev/null +++ b/src/canary/monitor.py @@ -0,0 +1,164 @@ +import logging +import asyncio +import time + +import aiohttp +import urllib.parse +from prometheus_client import Gauge + +MIN_MONITOR_INTERVAL = 5 # seconds + +LABELS = [ + "node", + "pod", + "namespace", + "release", + "monitor", +] + +REQUEST_DURATION_GAUGE = Gauge( + name="canary_request_duration", + documentation="Duration of poll event in seconds.", + labelnames=LABELS, +) +HEALTHY_GAUGE = Gauge( + name="canary_healthy", + documentation="Health of the last poll for a url as a boolean 0 (unhealthy) or 1 (healthy).", + labelnames=LABELS, +) +HEALTHY_LASTSEEN_GAUGE = Gauge( + name="canary_healthy_lastseen", + documentation="Timestamp of the most recent time a monitor was healthy.", + labelnames=LABELS, +) +UNHEALTHY_LASTSEEN_GAUGE = Gauge( + name="canary_unhealthy_lastseen", + documentation="Timestamp of the most recent time a monitor was unhealthy.", + labelnames=LABELS, +) +STATUS_LASTSEEN_GAUGE = Gauge( + name="canary_status_lastseen", + documentation="Timestamp of the most recent time a monitor showed a status code.", + labelnames=LABELS + ["status"], +) + + +async def monitor(name: str, spec: dict, labels: dict, proxy: str): + """ + Monitors a given url at a regular interval and logs the result to prometheus. + This co-routine loops forever unless it is externally cancelled, such as to recreate it with new settings.. + """ + + try: + + # Hard clamp the interval to minimum of 5s to prevent DOS runaway + # Still could be bad if running on a lot of nodes + interval = max(MIN_MONITOR_INTERVAL, int(spec["interval"])) + + # Allow this to throw an exception if the url is invalid + url = spec["url"] + urllib.parse.urlparse(url) + + expected_status = str(spec["status"]) + + # If this monitor specifies its own proxy setting + if "proxy" in spec: + if "url" not in spec["proxy"]: + # If the monitor specifies a null url we explicitly disable using a proxy + proxy = None + else: + # Otherwise we replace the global proxy setting passed to canary with the + # one the monitor specifies + proxy = spec["proxy"]["url"] + + # Add extra labels + labels = labels | dict(monitor=name) + + # Normalize label order to ensure we can remove labels later + labels = {key: labels[key] for key in LABELS} + + header = f"[{name=}] [{interval=}] [{url=}] [{proxy=}]" + logging.info(f"{header} | polling") + + # A set to keep track of unique status codes we've seen so that + # we can clean up the metrics when this monitor is halted + observed_status = set() + + while True: + # Spawn a task to track the minimum amount of time to the next iteration and return immediately + interval_task = asyncio.create_task(asyncio.sleep(interval)) + + start_time = time.time() + healthy = False + try: + # Poll the url + timeout = aiohttp.ClientTimeout(total=interval) + async with aiohttp.ClientSession(timeout=timeout) as session: + async with session.get(url, timeout=interval, proxy=proxy) as response: + status = str(response.status) + + # Check if the status code was acceptable + healthy = status == expected_status + logging.info(f"{header} | poll [{status=}] [{healthy=}]") + + # Update per status code metric + observed_status.add(status) + STATUS_LASTSEEN_GAUGE.labels(**(labels | dict(status=status))).set(time.time() * 1000.) + + except Exception as ex: + logging.exception(f"{header} | poll error", exc_info=ex) + + end_time = time.time() + REQUEST_DURATION_GAUGE.labels(**labels).set(max(0., end_time - start_time)) + + # Update the unhealthy metric + if healthy: + HEALTHY_GAUGE.labels(**labels).set(1) + HEALTHY_LASTSEEN_GAUGE.labels(**labels).set(time.time() * 1000.) + + else: + HEALTHY_GAUGE.labels(**labels).set(0) + UNHEALTHY_LASTSEEN_GAUGE.labels(**labels).set(time.time() * 1000.) + + # Await the minimum interval, returns immediately if it's already passed + await interval_task + + except asyncio.CancelledError: + logging.info(f"{header} | cancelled") + except Exception as ex: + logging.error(f"{header} | error monitoring url", exc_info=ex) + finally: + logging.info(f"{header} | halting") + + try: + logging.debug(f"{header} | removing metric canary_request_duration {labels=}") + REQUEST_DURATION_GAUGE.remove(*labels.values()) + except KeyError: + pass + + try: + logging.debug(f"{header} | removing metric canary_healthy {labels=}") + HEALTHY_GAUGE.remove(*labels.values()) + except KeyError: + pass + + try: + logging.debug(f"{header} | removing metric canary_healthy_lastseen {labels=}") + HEALTHY_LASTSEEN_GAUGE.remove(*labels.values()) + except KeyError: + pass + + try: + logging.debug(f"{header} | removing metric canary_unhealthy_lastseen {labels=}") + UNHEALTHY_LASTSEEN_GAUGE.remove(*labels.values()) + except KeyError: + pass + + for status in observed_status: + try: + logging.debug(f"{header} | removing metric canary_status_lastseen {labels=}") + STATUS_LASTSEEN_GAUGE.remove(*(labels | dict(status=status)).values()) + except KeyError: + pass + +Monitor = monitor diff --git a/src/canary/utils/__init__.py b/src/canary/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/canary/utils/click.py b/src/canary/utils/click.py new file mode 100644 index 0000000..38b5260 --- /dev/null +++ b/src/canary/utils/click.py @@ -0,0 +1,17 @@ +import click +import urllib.parse + + +class URL(click.ParamType): + name = "url" + + def convert(self, value, param, ctx): + if not isinstance(value, tuple): + parsed = urllib.parse.urlparse(value) + if parsed.scheme not in ("http", "https"): + self.fail( + f"invalid URL scheme ({parsed.scheme}) for url ({value}). Only HTTP URLs are allowed", + param, + ctx, + ) + return value diff --git a/src/requirements.txt b/src/requirements.txt index 02986b9..bbcd604 100644 --- a/src/requirements.txt +++ b/src/requirements.txt @@ -1,4 +1,5 @@ aiohttp==3.9.1 click==8.1.7 -prometheus-client==0.19.0 kubernetes-asyncio==28.2.1 +prometheus-client==0.19.0 +prometheus-async[aiohttp]==22.2.0