Skip to content

Commit 6e3ce2a

Browse files
authored
Merge pull request #66 from uc-cdis/feat/common_metrics
Feat/common metrics
2 parents dbae428 + f28e6c1 commit 6e3ce2a

File tree

7 files changed

+562
-220
lines changed

7 files changed

+562
-220
lines changed

.pre-commit-config.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,17 @@
11
repos:
22
- repo: git@github.com:Yelp/detect-secrets
3-
rev: v1.1.0
3+
rev: v1.5.0
44
hooks:
55
- id: detect-secrets
66
args: ['--baseline', '.secrets.baseline']
77
exclude: poetry.lock
88
- repo: https://github.com/pre-commit/pre-commit-hooks
9-
rev: v4.0.1
9+
rev: v5.0.0
1010
hooks:
1111
- id: end-of-file-fixer
1212
- id: no-commit-to-branch
1313
args: [--branch, develop, --branch, master, --pattern, release/.*]
1414
- repo: https://github.com/psf/black
15-
rev: 21.5b2
15+
rev: 24.10.0
1616
hooks:
1717
- id: black

.secrets.baseline

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,7 @@
111111
}
112112
]
113113
},
114-
"version": "1.1.0",
114+
"version": "1.5.0",
115115
"filters_used": [
116116
{
117117
"path": "detect_secrets.filters.allowlist.is_line_allowlisted"

README.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,10 @@
33
This package includes several utility Python tools for the Gen3 stack. It is meant to be imported as a means to access
44
supplementary tools and resources that are reusable and not exclusive to any specific repo we use.
55

6+
## metrics
7+
8+
- Prometheus
9+
610

711
## profiling
812

cdispyutils/metrics.py

Lines changed: 166 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,166 @@
1+
"""
2+
Small wrapper around the Prometheus client for metrics gathering in a multi-
3+
process Python environment. This is intended to be extended and instantiated by
4+
services, stored at some application context level, and then used to add metrics
5+
(which are likely later exposed at the /metrics endpoint for Prometheus to scrape).
6+
"""
7+
8+
import os
9+
import pathlib
10+
11+
from cdislogging import get_logger
12+
from prometheus_client import (
13+
CONTENT_TYPE_LATEST,
14+
CollectorRegistry,
15+
Counter,
16+
Gauge,
17+
generate_latest,
18+
multiprocess,
19+
)
20+
21+
logger = get_logger(__name__)
22+
23+
24+
class BaseMetrics(object):
25+
"""
26+
Class to handle Prometheus metrics
27+
28+
Attributes:
29+
enabled (bool): If this is false, the class functions will be no-ops (no operations), effectively
30+
doing nothing. This is the behavior when metrics are disabled. Why? So application code
31+
doesn't have to check, it always tries to log a metric.
32+
prometheus_metrics (dict): Dictionary to store Prometheus metrics
33+
_registry (CollectorRegistry): Prometheus registry
34+
"""
35+
36+
def __init__(self, enabled=True, prometheus_dir="/var/tmp/prometheus_metrics"):
37+
"""
38+
Create a metrics class.
39+
40+
Args:
41+
enabled (bool): If this is false, the class functions will be no-ops (no operations), effectively
42+
doing nothing. This is the behavior when metrics are disabled. Why? So application code
43+
doesn't have to check, it always tries to log a metric.
44+
prometheus_dir (str): Directory to use when setting PROMETHEUS_MULTIPROC_DIR env var (which prometheus requires
45+
for multiprocess metrics collection). Note that this the prometheus client is very
46+
finicky about when the ENV var is set.
47+
"""
48+
self.enabled = enabled
49+
self.prometheus_metrics = {}
50+
if not enabled:
51+
return
52+
53+
pathlib.Path(prometheus_dir).mkdir(parents=True, exist_ok=True)
54+
os.environ["PROMETHEUS_MULTIPROC_DIR"] = prometheus_dir
55+
56+
logger.info(
57+
f"PROMETHEUS_MULTIPROC_DIR is {os.environ['PROMETHEUS_MULTIPROC_DIR']}"
58+
)
59+
60+
self._registry = CollectorRegistry()
61+
multiprocess.MultiProcessCollector(self._registry)
62+
63+
def get_latest_metrics(self):
64+
"""
65+
Generate the latest Prometheus metrics
66+
Returns:
67+
str: Latest Prometheus metrics
68+
str: Content type of the latest Prometheus metrics
69+
"""
70+
# When metrics gathering is not enabled, the metrics endpoint should not error, but it should
71+
# not return any data.
72+
if not self.enabled:
73+
return "", CONTENT_TYPE_LATEST
74+
75+
return generate_latest(self._registry), CONTENT_TYPE_LATEST
76+
77+
def increment_counter(self, name, labels, description=""):
78+
"""
79+
Increment a Prometheus counter metric.
80+
Note that this function should not be called directly - implement a function like
81+
`add_login_event` instead. A metric's labels should always be consistent.
82+
Args:
83+
name (str): Name of the metric
84+
labels (dict): Dictionary of labels for the metric
85+
"""
86+
if not self.enabled:
87+
return
88+
89+
# create the counter if it doesn't already exist
90+
if name not in self.prometheus_metrics:
91+
logger.info(
92+
f"Creating counter '{name}' with description '{description}' and labels: {labels}"
93+
)
94+
self.prometheus_metrics[name] = Counter(name, description, [*labels.keys()])
95+
elif type(self.prometheus_metrics[name]) is not Counter:
96+
raise ValueError(
97+
f"Trying to create counter '{name}' but a {type(self.prometheus_metrics[name])} with this name already exists"
98+
)
99+
100+
logger.debug(f"Incrementing counter '{name}' with labels: {labels}")
101+
self.prometheus_metrics[name].labels(*labels.values()).inc()
102+
103+
def dec_gauge(self, name, labels, value, description=""):
104+
"""
105+
Decrement a Prometheus gauge metric.
106+
Note that this function should not be called directly - implement a function like
107+
`add_signed_url_event` instead. A metric's labels should always be consistent.
108+
Args:
109+
name (str): Name of the metric
110+
labels (dict): Dictionary of labels for the metric
111+
value (int): Value to set the metric to
112+
description (str): describing the gauge in case it doesn't already exist
113+
"""
114+
if not self.enabled:
115+
return
116+
117+
self._create_gauge_if_not_exist(name, labels, value, description)
118+
logger.debug(f"Decrementing gauge '{name}' by '{value}' with labels: {labels}")
119+
self.prometheus_metrics[name].labels(*labels.values()).dec(value)
120+
121+
def inc_gauge(self, name, labels, value, description=""):
122+
"""
123+
Increment a Prometheus gauge metric.
124+
Note that this function should not be called directly - implement a function like
125+
`add_signed_url_event` instead. A metric's labels should always be consistent.
126+
Args:
127+
name (str): Name of the metric
128+
labels (dict): Dictionary of labels for the metric
129+
value (int): Value to set the metric to
130+
description (str): describing the gauge in case it doesn't already exist
131+
"""
132+
if not self.enabled:
133+
return
134+
135+
self._create_gauge_if_not_exist(name, labels, value, description)
136+
logger.debug(f"Incrementing gauge '{name}' by '{value}' with labels: {labels}")
137+
self.prometheus_metrics[name].labels(*labels.values()).inc(value)
138+
139+
def set_gauge(self, name, labels, value, description=""):
140+
"""
141+
Set a Prometheus gauge metric.
142+
Note that this function should not be called directly - implement a function like
143+
`add_signed_url_event` instead. A metric's labels should always be consistent.
144+
Args:
145+
name (str): Name of the metric
146+
labels (dict): Dictionary of labels for the metric
147+
value (int): Value to set the metric to
148+
"""
149+
if not self.enabled:
150+
return
151+
152+
self._create_gauge_if_not_exist(name, labels, value, description)
153+
logger.debug(f"Setting gauge '{name}' with '{value}' with labels: {labels}")
154+
self.prometheus_metrics[name].labels(*labels.values()).set(value)
155+
156+
def _create_gauge_if_not_exist(self, name, labels, value, description):
157+
# create the gauge if it doesn't already exist
158+
if name not in self.prometheus_metrics:
159+
logger.info(
160+
f"Creating gauge '{name}' with description '{description}' and labels: {labels}"
161+
)
162+
self.prometheus_metrics[name] = Gauge(name, description, [*labels.keys()])
163+
elif type(self.prometheus_metrics[name]) is not Gauge:
164+
raise ValueError(
165+
f"Trying to create gauge '{name}' but a {type(self.prometheus_metrics[name])} with this name already exists"
166+
)

0 commit comments

Comments
 (0)