Skip to content

Commit 4009116

Browse files
lubosmjdkliban
authored andcommitted
Include the worker's name in the http.server.duration metric
closes #5844
1 parent dcad800 commit 4009116

File tree

5 files changed

+35
-10
lines changed

5 files changed

+35
-10
lines changed

CHANGES/5844.feature

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Included the worker's name in the ``http.server.duration`` OpenTelemetry metric attributes.

pulpcore/app/util.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import hashlib
22
import zlib
33
import os
4+
import socket
45
import tempfile
56
import gnupg
67

@@ -658,6 +659,11 @@ def init_domain_metrics_exporter():
658659
DomainMetricsEmitter.build(domain)
659660

660661

662+
@lru_cache(maxsize=1)
663+
def get_worker_name():
664+
return f"{os.getpid()}@{socket.gethostname()}"
665+
666+
661667
class PGAdvisoryLock:
662668
"""
663669
A context manager that will hold a postgres advisory lock non-blocking.

pulpcore/app/wsgi.py

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,14 +9,37 @@
99

1010
from django.core.wsgi import get_wsgi_application
1111
from opentelemetry.instrumentation.wsgi import OpenTelemetryMiddleware
12+
from opentelemetry.exporter.otlp.proto.http.metric_exporter import (
13+
OTLPMetricExporter,
14+
)
15+
from opentelemetry.sdk.metrics import MeterProvider
16+
from opentelemetry.sdk.metrics.export import PeriodicExportingMetricReader
1217

1318
from pulpcore.app.entrypoint import using_pulp_api_worker
19+
from pulpcore.app.util import get_worker_name
1420

1521
if not using_pulp_api_worker.get(False):
1622
raise RuntimeError("This app must be executed using pulpcore-api entrypoint.")
1723

24+
25+
class WorkerNameMetricsExporter(OTLPMetricExporter):
26+
def export(self, metrics_data, timeout_millis=10_000, **kwargs):
27+
for resource_metric in metrics_data.resource_metrics:
28+
for scope_metric in resource_metric.scope_metrics:
29+
for metric in scope_metric.metrics:
30+
if metric.name == "http.server.duration":
31+
histogram_data = metric.data.data_points[0]
32+
histogram_data.attributes["worker.process"] = get_worker_name()
33+
34+
return super().export(metrics_data, timeout_millis, **kwargs)
35+
36+
37+
exporter = WorkerNameMetricsExporter()
38+
reader = PeriodicExportingMetricReader(exporter)
39+
provider = MeterProvider(metric_readers=[reader])
40+
1841
application = get_wsgi_application()
19-
application = OpenTelemetryMiddleware(application)
42+
application = OpenTelemetryMiddleware(application, meter_provider=provider)
2043

2144
# Disabling Storage metrics until we find a solution to resource usage.
2245
# https://github.com/pulp/pulpcore/issues/5468

pulpcore/content/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424

2525
from pulpcore.app.apps import pulp_plugin_configs # noqa: E402: module level not at top of file
2626
from pulpcore.app.models import ContentAppStatus # noqa: E402: module level not at top of file
27+
from pulpcore.app.util import get_worker_name # noqa: E402: module level not at top of file
2728

2829
from .handler import Handler # noqa: E402: module level not at top of file
2930
from .authentication import authenticate # noqa: E402: module level not at top of file
@@ -38,7 +39,7 @@
3839

3940
async def _heartbeat():
4041
content_app_status = None
41-
name = "{pid}@{hostname}".format(pid=os.getpid(), hostname=socket.gethostname())
42+
name = get_worker_name()
4243
heartbeat_interval = settings.CONTENT_APP_TTL // 4
4344
msg = "Content App '{name}' heartbeat written, sleeping for '{interarrival}' seconds".format(
4445
name=name, interarrival=heartbeat_interval

pulpcore/content/handler.py

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,7 @@
33
from multidict import CIMultiDict
44
import os
55
import re
6-
import socket
76
from gettext import gettext as _
8-
from functools import lru_cache
97

108
from aiohttp.client_exceptions import ClientResponseError
119
from aiohttp.web import FileResponse, StreamResponse, HTTPOk
@@ -56,6 +54,7 @@
5654
from pulpcore.app.util import ( # noqa: E402: module level not at top of file
5755
MetricsEmitter,
5856
get_domain,
57+
get_worker_name,
5958
cache_key,
6059
)
6160

@@ -67,11 +66,6 @@
6766
log = logging.getLogger(__name__)
6867

6968

70-
@lru_cache(maxsize=1)
71-
def _get_content_app_name():
72-
return f"{os.getpid()}@{socket.gethostname()}"
73-
74-
7569
class PathNotResolved(HTTPNotFound):
7670
"""
7771
The path could not be resolved to a published file.
@@ -1167,6 +1161,6 @@ async def finalize():
11671161
def _report_served_artifact_size(self, size):
11681162
attributes = {
11691163
"domain_name": get_domain().name,
1170-
"content_app_name": _get_content_app_name(),
1164+
"worker_name": get_worker_name(),
11711165
}
11721166
self.artifacts_size_counter.add(size, attributes)

0 commit comments

Comments
 (0)