From cbbca088477e713fec042e8b08e19f9efc87d6a4 Mon Sep 17 00:00:00 2001 From: Lubos Mjachky Date: Thu, 26 Sep 2024 16:45:36 +0200 Subject: [PATCH] Get on par with the current version of dashboards and otel-collector --- profiles/opentelemetry_dev/compose.yaml | 8 +- .../grafana/dashboards/pulp_insights.json | 289 +++++++----------- .../otel-collector/otel-collector-config.yaml | 14 +- 3 files changed, 117 insertions(+), 194 deletions(-) diff --git a/profiles/opentelemetry_dev/compose.yaml b/profiles/opentelemetry_dev/compose.yaml index 9d9c54a..a93b7cb 100644 --- a/profiles/opentelemetry_dev/compose.yaml +++ b/profiles/opentelemetry_dev/compose.yaml @@ -17,16 +17,16 @@ services: otel-collector: container_name: otel-collector - image: otel/opentelemetry-collector + image: otel/opentelemetry-collector-contrib command: [--config=/etc/otel-collector/otel-collector-config.yaml] volumes: - "{OCI_ENV_DIR}/profiles/{COMPOSE_PROFILE}/otel-collector:/etc/otel-collector" ports: - "1888" # pprof extension - - "8888" # Prometheus metrics exposed by the collector - - "8889" # Prometheus exporter metrics + - "8888:8888" # Prometheus metrics exposed by the collector + - "8889:8889" # Prometheus exporter metrics - "13133" # health_check extension - - "4318" # OTLP http receiver + - "4318:4318" # OTLP http receiver - "55679" # zpages extension depends_on: - prometheus diff --git a/profiles/opentelemetry_dev/grafana/dashboards/pulp_insights.json b/profiles/opentelemetry_dev/grafana/dashboards/pulp_insights.json index d0e7f0b..9f45d6d 100644 --- a/profiles/opentelemetry_dev/grafana/dashboards/pulp_insights.json +++ b/profiles/opentelemetry_dev/grafana/dashboards/pulp_insights.json @@ -11,6 +11,12 @@ "hide": true, "iconColor": "rgba(0, 211, 255, 1)", "name": "Annotations & Alerts", + "target": { + "limit": 100, + "matchAny": false, + "tags": [], + "type": "dashboard" + }, "type": "dashboard" } ] @@ -29,15 +35,15 @@ "x": 0, "y": 0 }, - "id": 13, + "id": 19, "panels": [], - "title": "Pulp Content", + "title": "Disk Usage", "type": "row" }, { "datasource": { "type": "prometheus", - "uid": "${datasource}" + "uid": "PDD8BE47D10408F45" }, "fieldConfig": { "defaults": { @@ -52,7 +58,7 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 5, + "fillOpacity": 0, "gradientMode": "none", "hideFrom": { "legend": false, @@ -61,9 +67,6 @@ }, "insertNulls": false, "lineInterpolation": "linear", - "lineStyle": { - "fill": "solid" - }, "lineWidth": 1, "pointSize": 5, "scaleDistribution": { @@ -93,17 +96,17 @@ } ] }, - "unit": "ms" + "unit": "bytes" }, "overrides": [] }, "gridPos": { "h": 8, - "w": 7, + "w": 12, "x": 0, "y": 1 }, - "id": 9, + "id": 21, "options": { "legend": { "calcs": [], @@ -120,60 +123,31 @@ { "datasource": { "type": "prometheus", - "uid": "${datasource}" + "uid": "PDD8BE47D10408F45" }, - "disableTextWrap": false, "editorMode": "builder", - "expr": "histogram_quantile(0.99, sum by(le) (rate(pulp_http_server_duration_milliseconds_bucket{exported_job=\"pulp-content\"}[5m])))", - "fullMetaSearch": false, - "includeNullMetadata": false, - "instant": false, - "interval": "2m", - "legendFormat": "P99", + "expr": "pulp_disk_usage_Bytes", + "legendFormat": "{{domain_name}}", "range": true, - "refId": "A", - "useBackend": false - }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "disableTextWrap": false, - "editorMode": "builder", - "expr": "histogram_quantile(0.95, sum by(le) (rate(pulp_http_server_duration_milliseconds_bucket{exported_job=\"pulp-content\"}[5m])))", - "fullMetaSearch": false, - "hide": false, - "includeNullMetadata": false, - "instant": false, - "interval": "2m", - "legendFormat": "P95", - "range": true, - "refId": "B", - "useBackend": false - }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "disableTextWrap": false, - "editorMode": "builder", - "expr": "histogram_quantile(0.5, sum by(le) (rate(pulp_http_server_duration_milliseconds_bucket{exported_job=\"pulp-content\"}[5m])))", - "fullMetaSearch": false, - "hide": false, - "includeNullMetadata": false, - "instant": false, - "interval": "2m", - "legendFormat": "P90", - "range": true, - "refId": "C", - "useBackend": false + "refId": "A" } ], - "title": "[Content] Latency Percentiles (2m step)", + "title": "Disk Usage by Domain", "type": "timeseries" }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 9 + }, + "id": 13, + "panels": [], + "title": "Pulp Content", + "type": "row" + }, { "datasource": { "type": "prometheus", @@ -192,8 +166,8 @@ "axisPlacement": "auto", "barAlignment": 0, "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "opacity", + "fillOpacity": 5, + "gradientMode": "none", "hideFrom": { "legend": false, "tooltip": false, @@ -201,6 +175,9 @@ }, "insertNulls": false, "lineInterpolation": "linear", + "lineStyle": { + "fill": "solid" + }, "lineWidth": 1, "pointSize": 5, "scaleDistribution": { @@ -229,17 +206,18 @@ "value": 80 } ] - } + }, + "unit": "ms" }, "overrides": [] }, "gridPos": { "h": 8, "w": 7, - "x": 7, - "y": 1 + "x": 0, + "y": 10 }, - "id": 14, + "id": 9, "options": { "legend": { "calcs": [], @@ -256,21 +234,58 @@ { "datasource": { "type": "prometheus", - "uid": "PBFA97CFB590B2093" + "uid": "${datasource}" }, "disableTextWrap": false, "editorMode": "builder", - "expr": "rate(pulp_http_server_duration_milliseconds_count{exported_job=\"pulp-content\", http_status_code!=\"5..\"}[5m])", + "expr": "histogram_quantile(0.99, sum by(le) (rate(pulp_http_server_duration_milliseconds_bucket{exported_job=\"pulp-content\"}[$__rate_interval])))", "fullMetaSearch": false, "includeNullMetadata": false, "instant": false, - "legendFormat": "{{http_status_code}}", + "interval": "2m", + "legendFormat": "P99", "range": true, "refId": "A", "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "histogram_quantile(0.95, sum by(le) (rate(pulp_http_server_duration_milliseconds_bucket{exported_job=\"pulp-content\"}[$__rate_interval])))", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": false, + "instant": false, + "interval": "2m", + "legendFormat": "P95", + "range": true, + "refId": "B", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "${datasource}" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "histogram_quantile(0.5, sum by(le) (rate(pulp_http_server_duration_milliseconds_bucket{exported_job=\"pulp-content\"}[$__rate_interval])))", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": false, + "instant": false, + "interval": "2m", + "legendFormat": "P90", + "range": true, + "refId": "C", + "useBackend": false } ], - "title": "Request rate for status code != 5xx", + "title": "[Content] Latency Percentiles (2m step)", "type": "timeseries" }, { @@ -335,8 +350,8 @@ "gridPos": { "h": 8, "w": 7, - "x": 14, - "y": 1 + "x": 7, + "y": 10 }, "id": 17, "options": { @@ -359,11 +374,12 @@ }, "disableTextWrap": false, "editorMode": "builder", - "expr": "sum(rate(pulp_http_server_duration_milliseconds_count{exported_job=\"pulp-content\", http_status_code!=\"5..\"}[5m]))", + "expr": "sum(rate(pulp_http_server_duration_milliseconds_count{exported_job=\"pulp-content\", http_status_code!=\"5..\"}[$__rate_interval]))", "fullMetaSearch": false, "includeNullMetadata": true, "instant": false, - "legendFormat": "__auto", + "interval": "2m", + "legendFormat": "[2xx, 3xx, 4xx]", "range": true, "refId": "A", "useBackend": false @@ -375,18 +391,19 @@ }, "disableTextWrap": false, "editorMode": "builder", - "expr": "rate(pulp_http_server_duration_milliseconds_count{exported_job=\"pulp-content\", http_status_code=\"5..\"}[5m])", + "expr": "rate(pulp_http_server_duration_milliseconds_count{exported_job=\"pulp-content\", http_status_code=\"5..\"}[$__rate_interval])", "fullMetaSearch": false, "hide": false, "includeNullMetadata": false, "instant": false, + "interval": "2m", "legendFormat": "__auto", "range": true, "refId": "B", "useBackend": false } ], - "title": "Request rate of [2xx,3xx,4xx] against 5xx", + "title": "[Content] Request rate of [2xx,3xx,4xx] against 5xx (2min step)", "type": "timeseries" }, { @@ -395,7 +412,7 @@ "h": 1, "w": 24, "x": 0, - "y": 9 + "y": 18 }, "id": 12, "panels": [], @@ -470,7 +487,7 @@ "h": 8, "w": 7, "x": 0, - "y": 10 + "y": 19 }, "id": 6, "options": { @@ -606,106 +623,7 @@ "h": 8, "w": 7, "x": 7, - "y": 10 - }, - "id": 15, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "PBFA97CFB590B2093" - }, - "disableTextWrap": false, - "editorMode": "builder", - "expr": "rate(pulp_http_server_duration_milliseconds_count{exported_job=\"pulp-api\", http_status_code!=\"5..\"}[5m])", - "fullMetaSearch": false, - "includeNullMetadata": false, - "instant": false, - "legendFormat": "{{http_status_code}}", - "range": true, - "refId": "A", - "useBackend": false - } - ], - "title": "Request rate for status code != 5xx", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${datasource}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 10, - "gradientMode": "opacity", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 8, - "w": 7, - "x": 14, - "y": 10 + "y": 19 }, "id": 16, "options": { @@ -728,10 +646,11 @@ }, "disableTextWrap": false, "editorMode": "builder", - "expr": "sum(rate(pulp_http_server_duration_milliseconds_count{exported_job=\"pulp-api\", http_status_code!=\"5..\"}[5m]))", + "expr": "sum(rate(pulp_http_server_duration_milliseconds_count{exported_job=\"pulp-api\", http_status_code!=\"5..\"}[$__rate_interval]))", "fullMetaSearch": false, "includeNullMetadata": true, "instant": false, + "interval": "2m", "legendFormat": "[2xx, 3xx, 4xx]", "range": true, "refId": "A", @@ -744,31 +663,32 @@ }, "disableTextWrap": false, "editorMode": "builder", - "expr": "sum(rate(pulp_http_server_duration_milliseconds_count{exported_job=\"pulp-api\", http_status_code=\"5..\"}[5m]))", + "expr": "sum(rate(pulp_http_server_duration_milliseconds_count{exported_job=\"pulp-api\", http_status_code=\"5..\"}[$__rate_interval]))", "fullMetaSearch": false, "hide": false, "includeNullMetadata": false, "instant": false, + "interval": "2m", "legendFormat": "5xx requests", "range": true, "refId": "B", "useBackend": false } ], - "title": "Request rate of [2xx,3xx,4xx] against 5xx", + "title": "[API] Request rate of [2xx,3xx,4xx] against 5xx (2min step)", "type": "timeseries" } ], "refresh": "5s", - "schemaVersion": 38, + "schemaVersion": 39, "tags": [], "templating": { "list": [ { "current": { - "selected": false, - "text": "Prometheus", - "value": "PBFA97CFB590B2093" + "selected": true, + "text": "crcs02ue1-prometheus", + "value": "PDD8BE47D10408F45" }, "hide": 0, "includeAll": false, @@ -776,6 +696,7 @@ "name": "datasource", "options": [], "query": "prometheus", + "queryValue": "crcs", "refresh": 1, "regex": "", "skipUrlSync": false, @@ -789,8 +710,8 @@ }, "timepicker": {}, "timezone": "", - "title": "Pulp Insights", + "title": "Pulp Metrics", "uid": "e50bb9f2-372c-4e94-aa61-fe1f1554812c", - "version": 2, + "version": 1, "weekStart": "" } diff --git a/profiles/opentelemetry_dev/otel-collector/otel-collector-config.yaml b/profiles/opentelemetry_dev/otel-collector/otel-collector-config.yaml index 140f936..952643a 100644 --- a/profiles/opentelemetry_dev/otel-collector/otel-collector-config.yaml +++ b/profiles/opentelemetry_dev/otel-collector/otel-collector-config.yaml @@ -13,6 +13,13 @@ processors: memory_limiter: check_interval: 1s limit_mib: 4000 + transform: + error_mode: ignore + metric_statements: + - context: metric + statements: + - set(description, "Duration of HTTP server requests.") where name == "http.server.duration" + - set(description, "Number of active HTTP server requests.") where name == "http.server.active_requests" exporters: prometheus: @@ -33,11 +40,6 @@ extensions: endpoint: 0.0.0.0:55679 service: -# telemetry: -# metrics: -# level: detailed -# address: 0.0.0.0:8888 - extensions: [] pipelines: traces: @@ -46,7 +48,7 @@ service: exporters: [otlp/jaeger] metrics: receivers: [otlp] - processors: [memory_limiter, batch] + processors: [transform, batch, memory_limiter] exporters: [logging, prometheus] logs: receivers: [otlp]