diff --git a/CHANGELOG.md b/CHANGELOG.md index 51f355cef7a..6a80186545d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -56,6 +56,7 @@ * [CHANGE] Remove backwards compatibility for `thanos_memcached_` prefixed metrics in dashboards and alerts removed in 2.12. #9674 * [ENHANCEMENT] Unify ingester autoscaling panels on 'Mimir / Writes' dashboard to work for both ingest-storage and non-ingest-storage autoscaling. #9617 +* [ENHANCEMENT] Dashboards: visualize the age of source blocks in the "Mimir / Compactor" dashboard. #9697 * [BUGFIX] Dashboards: Fix autoscaling metrics joins when series churn. #9412 #9450 #9432 * [BUGFIX] Alerts: Fix autoscaling metrics joins in `MimirAutoscalerNotActive` when series churn. #9412 * [BUGFIX] Alerts: Exclude failed cache "add" operations from alerting since failures are expected in normal operation. #9658 diff --git a/operations/helm/tests/metamonitoring-values-generated/mimir-distributed/templates/metamonitoring/grafana-dashboards.yaml b/operations/helm/tests/metamonitoring-values-generated/mimir-distributed/templates/metamonitoring/grafana-dashboards.yaml index b56d5301ae2..826a0ad5cbd 100644 --- a/operations/helm/tests/metamonitoring-values-generated/mimir-distributed/templates/metamonitoring/grafana-dashboards.yaml +++ b/operations/helm/tests/metamonitoring-values-generated/mimir-distributed/templates/metamonitoring/grafana-dashboards.yaml @@ -4688,7 +4688,7 @@ data: "sort": "none" } }, - "span": 4, + "span": 3, "targets": [ { "expr": "sum(cortex_bucket_index_estimated_compaction_jobs{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}) and (sum(rate(cortex_bucket_index_estimated_compaction_jobs_errors_total{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) == 0)", @@ -4700,6 +4700,86 @@ data: "title": "Estimated Compaction Jobs", "type": "timeseries" }, + { + "datasource": "$datasource", + "description": "### Source blocks age\nThe difference between the maximum timestamp of the block being compacted and the current time.\nA steadily increasing value indicates that the compactor cannot keep up with the produced blocks by the ingesters.\nIncrease the number of compactors when this value is consistently increasing.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 6, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(cortex_compactor_block_max_time_delta_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(cortex_compactor_block_max_time_delta_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(cortex_compactor_block_max_time_delta_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_compactor_block_max_time_delta_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Source blocks age", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, { "datasource": "$datasource", "description": "### TSDB compactions / sec\nRate of TSDB compactions. Single TSDB compaction takes one or more input blocks and produces one or more (during \"split\" phase) output blocks.\n\n", @@ -4726,7 +4806,7 @@ data: }, "overrides": [ ] }, - "id": 6, + "id": 7, "links": [ ], "options": { "legend": { @@ -4737,7 +4817,7 @@ data: "sort": "none" } }, - "span": 4, + "span": 3, "targets": [ { "expr": "sum(rate(prometheus_tsdb_compactions_total{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", @@ -4775,7 +4855,7 @@ data: }, "overrides": [ ] }, - "id": 7, + "id": 8, "links": [ ], "nullPointMode": "null as zero", "options": { @@ -4787,7 +4867,7 @@ data: "sort": "none" } }, - "span": 4, + "span": 3, "targets": [ { "expr": "histogram_quantile(0.99, sum(rate(prometheus_tsdb_compaction_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", @@ -4866,7 +4946,7 @@ data: }, "overrides": [ ] }, - "id": 8, + "id": 9, "links": [ ], "options": { "legend": { @@ -4915,7 +4995,7 @@ data: }, "overrides": [ ] }, - "id": 9, + "id": 10, "links": [ ], "options": { "legend": { @@ -4975,7 +5055,7 @@ data: }, "overrides": [ ] }, - "id": 10, + "id": 11, "links": [ ], "options": { "legend": { @@ -5054,7 +5134,7 @@ data: } ] }, - "id": 11, + "id": 12, "links": [ ], "options": { "legend": { @@ -5151,7 +5231,7 @@ data: } ] }, - "id": 12, + "id": 13, "links": [ ], "options": { "legend": { @@ -5205,7 +5285,7 @@ data: }, "overrides": [ ] }, - "id": 13, + "id": 14, "links": [ ], "nullPointMode": "null as zero", "options": { @@ -5296,7 +5376,7 @@ data: }, "overrides": [ ] }, - "id": 14, + "id": 15, "links": [ ], "options": { "legend": { @@ -5329,7 +5409,7 @@ data: "unit": "percentunit" } }, - "id": 15, + "id": 16, "links": [ ], "options": { "legend": { @@ -5377,7 +5457,7 @@ data: }, "overrides": [ ] }, - "id": 16, + "id": 17, "links": [ ], "nullPointMode": "null as zero", "options": { @@ -5456,7 +5536,7 @@ data: }, "overrides": [ ] }, - "id": 17, + "id": 18, "links": [ ], "nullPointMode": "null as zero", "options": { @@ -5547,7 +5627,7 @@ data: }, "overrides": [ ] }, - "id": 18, + "id": 19, "links": [ ], "nullPointMode": "null as zero", "options": { @@ -5626,7 +5706,7 @@ data: }, "overrides": [ ] }, - "id": 19, + "id": 20, "links": [ ], "nullPointMode": "null as zero", "options": { @@ -5705,7 +5785,7 @@ data: }, "overrides": [ ] }, - "id": 20, + "id": 21, "links": [ ], "nullPointMode": "null as zero", "options": { @@ -5784,7 +5864,7 @@ data: }, "overrides": [ ] }, - "id": 21, + "id": 22, "links": [ ], "nullPointMode": "null as zero", "options": { @@ -6011,7 +6091,7 @@ data: } ] }, - "id": 22, + "id": 23, "links": [ ], "options": { "legend": { @@ -6059,7 +6139,7 @@ data: }, "overrides": [ ] }, - "id": 23, + "id": 24, "links": [ ], "nullPointMode": "null as zero", "options": { diff --git a/operations/mimir-mixin-compiled-baremetal/dashboards/mimir-compactor.json b/operations/mimir-mixin-compiled-baremetal/dashboards/mimir-compactor.json index 795a8c7102d..fa6410dfb54 100644 --- a/operations/mimir-mixin-compiled-baremetal/dashboards/mimir-compactor.json +++ b/operations/mimir-mixin-compiled-baremetal/dashboards/mimir-compactor.json @@ -620,7 +620,7 @@ "sort": "none" } }, - "span": 4, + "span": 3, "targets": [ { "expr": "sum(cortex_bucket_index_estimated_compaction_jobs{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}) and (sum(rate(cortex_bucket_index_estimated_compaction_jobs_errors_total{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) == 0)", @@ -632,6 +632,86 @@ "title": "Estimated Compaction Jobs", "type": "timeseries" }, + { + "datasource": "$datasource", + "description": "### Source blocks age\nThe difference between the maximum timestamp of the block being compacted and the current time.\nA steadily increasing value indicates that the compactor cannot keep up with the produced blocks by the ingesters.\nIncrease the number of compactors when this value is consistently increasing.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 6, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(cortex_compactor_block_max_time_delta_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(cortex_compactor_block_max_time_delta_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(cortex_compactor_block_max_time_delta_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_compactor_block_max_time_delta_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Source blocks age", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, { "datasource": "$datasource", "description": "### TSDB compactions / sec\nRate of TSDB compactions. Single TSDB compaction takes one or more input blocks and produces one or more (during \"split\" phase) output blocks.\n\n", @@ -658,7 +738,7 @@ }, "overrides": [ ] }, - "id": 6, + "id": 7, "links": [ ], "options": { "legend": { @@ -669,7 +749,7 @@ "sort": "none" } }, - "span": 4, + "span": 3, "targets": [ { "expr": "sum(rate(prometheus_tsdb_compactions_total{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", @@ -707,7 +787,7 @@ }, "overrides": [ ] }, - "id": 7, + "id": 8, "links": [ ], "nullPointMode": "null as zero", "options": { @@ -719,7 +799,7 @@ "sort": "none" } }, - "span": 4, + "span": 3, "targets": [ { "expr": "histogram_quantile(0.99, sum(rate(prometheus_tsdb_compaction_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", @@ -798,7 +878,7 @@ }, "overrides": [ ] }, - "id": 8, + "id": 9, "links": [ ], "options": { "legend": { @@ -847,7 +927,7 @@ }, "overrides": [ ] }, - "id": 9, + "id": 10, "links": [ ], "options": { "legend": { @@ -907,7 +987,7 @@ }, "overrides": [ ] }, - "id": 10, + "id": 11, "links": [ ], "options": { "legend": { @@ -986,7 +1066,7 @@ } ] }, - "id": 11, + "id": 12, "links": [ ], "options": { "legend": { @@ -1083,7 +1163,7 @@ } ] }, - "id": 12, + "id": 13, "links": [ ], "options": { "legend": { @@ -1137,7 +1217,7 @@ }, "overrides": [ ] }, - "id": 13, + "id": 14, "links": [ ], "nullPointMode": "null as zero", "options": { @@ -1228,7 +1308,7 @@ }, "overrides": [ ] }, - "id": 14, + "id": 15, "links": [ ], "options": { "legend": { @@ -1261,7 +1341,7 @@ "unit": "percentunit" } }, - "id": 15, + "id": 16, "links": [ ], "options": { "legend": { @@ -1309,7 +1389,7 @@ }, "overrides": [ ] }, - "id": 16, + "id": 17, "links": [ ], "nullPointMode": "null as zero", "options": { @@ -1388,7 +1468,7 @@ }, "overrides": [ ] }, - "id": 17, + "id": 18, "links": [ ], "nullPointMode": "null as zero", "options": { @@ -1479,7 +1559,7 @@ }, "overrides": [ ] }, - "id": 18, + "id": 19, "links": [ ], "nullPointMode": "null as zero", "options": { @@ -1558,7 +1638,7 @@ }, "overrides": [ ] }, - "id": 19, + "id": 20, "links": [ ], "nullPointMode": "null as zero", "options": { @@ -1637,7 +1717,7 @@ }, "overrides": [ ] }, - "id": 20, + "id": 21, "links": [ ], "nullPointMode": "null as zero", "options": { @@ -1716,7 +1796,7 @@ }, "overrides": [ ] }, - "id": 21, + "id": 22, "links": [ ], "nullPointMode": "null as zero", "options": { @@ -1943,7 +2023,7 @@ } ] }, - "id": 22, + "id": 23, "links": [ ], "options": { "legend": { @@ -1991,7 +2071,7 @@ }, "overrides": [ ] }, - "id": 23, + "id": 24, "links": [ ], "nullPointMode": "null as zero", "options": { diff --git a/operations/mimir-mixin-compiled/dashboards/mimir-compactor.json b/operations/mimir-mixin-compiled/dashboards/mimir-compactor.json index 059989f4a89..06790f88849 100644 --- a/operations/mimir-mixin-compiled/dashboards/mimir-compactor.json +++ b/operations/mimir-mixin-compiled/dashboards/mimir-compactor.json @@ -620,7 +620,7 @@ "sort": "none" } }, - "span": 4, + "span": 3, "targets": [ { "expr": "sum(cortex_bucket_index_estimated_compaction_jobs{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}) and (sum(rate(cortex_bucket_index_estimated_compaction_jobs_errors_total{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) == 0)", @@ -632,6 +632,86 @@ "title": "Estimated Compaction Jobs", "type": "timeseries" }, + { + "datasource": "$datasource", + "description": "### Source blocks age\nThe difference between the maximum timestamp of the block being compacted and the current time.\nA steadily increasing value indicates that the compactor cannot keep up with the produced blocks by the ingesters.\nIncrease the number of compactors when this value is consistently increasing.\n\n", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 1, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "min": 0, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "ms" + }, + "overrides": [ ] + }, + "id": 6, + "links": [ ], + "nullPointMode": "null as zero", + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "histogram_quantile(0.99, sum(rate(cortex_compactor_block_max_time_delta_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "99th Percentile", + "refId": "A" + }, + { + "expr": "histogram_quantile(0.50, sum(rate(cortex_compactor_block_max_time_delta_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", + "format": "time_series", + "legendFormat": "50th Percentile", + "refId": "B" + }, + { + "expr": "sum(rate(cortex_compactor_block_max_time_delta_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_compactor_block_max_time_delta_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "Average", + "refId": "C" + } + ], + "title": "Source blocks age", + "type": "timeseries", + "yaxes": [ + { + "format": "ms", + "label": null, + "logBase": 1, + "max": null, + "min": 0, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ] + }, { "datasource": "$datasource", "description": "### TSDB compactions / sec\nRate of TSDB compactions. Single TSDB compaction takes one or more input blocks and produces one or more (during \"split\" phase) output blocks.\n\n", @@ -658,7 +738,7 @@ }, "overrides": [ ] }, - "id": 6, + "id": 7, "links": [ ], "options": { "legend": { @@ -669,7 +749,7 @@ "sort": "none" } }, - "span": 4, + "span": 3, "targets": [ { "expr": "sum(rate(prometheus_tsdb_compactions_total{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))", @@ -707,7 +787,7 @@ }, "overrides": [ ] }, - "id": 7, + "id": 8, "links": [ ], "nullPointMode": "null as zero", "options": { @@ -719,7 +799,7 @@ "sort": "none" } }, - "span": 4, + "span": 3, "targets": [ { "expr": "histogram_quantile(0.99, sum(rate(prometheus_tsdb_compaction_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3", @@ -798,7 +878,7 @@ }, "overrides": [ ] }, - "id": 8, + "id": 9, "links": [ ], "options": { "legend": { @@ -847,7 +927,7 @@ }, "overrides": [ ] }, - "id": 9, + "id": 10, "links": [ ], "options": { "legend": { @@ -907,7 +987,7 @@ }, "overrides": [ ] }, - "id": 10, + "id": 11, "links": [ ], "options": { "legend": { @@ -986,7 +1066,7 @@ } ] }, - "id": 11, + "id": 12, "links": [ ], "options": { "legend": { @@ -1083,7 +1163,7 @@ } ] }, - "id": 12, + "id": 13, "links": [ ], "options": { "legend": { @@ -1137,7 +1217,7 @@ }, "overrides": [ ] }, - "id": 13, + "id": 14, "links": [ ], "nullPointMode": "null as zero", "options": { @@ -1228,7 +1308,7 @@ }, "overrides": [ ] }, - "id": 14, + "id": 15, "links": [ ], "options": { "legend": { @@ -1261,7 +1341,7 @@ "unit": "percentunit" } }, - "id": 15, + "id": 16, "links": [ ], "options": { "legend": { @@ -1309,7 +1389,7 @@ }, "overrides": [ ] }, - "id": 16, + "id": 17, "links": [ ], "nullPointMode": "null as zero", "options": { @@ -1388,7 +1468,7 @@ }, "overrides": [ ] }, - "id": 17, + "id": 18, "links": [ ], "nullPointMode": "null as zero", "options": { @@ -1479,7 +1559,7 @@ }, "overrides": [ ] }, - "id": 18, + "id": 19, "links": [ ], "nullPointMode": "null as zero", "options": { @@ -1558,7 +1638,7 @@ }, "overrides": [ ] }, - "id": 19, + "id": 20, "links": [ ], "nullPointMode": "null as zero", "options": { @@ -1637,7 +1717,7 @@ }, "overrides": [ ] }, - "id": 20, + "id": 21, "links": [ ], "nullPointMode": "null as zero", "options": { @@ -1716,7 +1796,7 @@ }, "overrides": [ ] }, - "id": 21, + "id": 22, "links": [ ], "nullPointMode": "null as zero", "options": { @@ -1943,7 +2023,7 @@ } ] }, - "id": 22, + "id": 23, "links": [ ], "options": { "legend": { @@ -1991,7 +2071,7 @@ }, "overrides": [ ] }, - "id": 23, + "id": 24, "links": [ ], "nullPointMode": "null as zero", "options": { diff --git a/operations/mimir-mixin/dashboards/compactor.libsonnet b/operations/mimir-mixin/dashboards/compactor.libsonnet index e519228549e..9db9dbcc7e1 100644 --- a/operations/mimir-mixin/dashboards/compactor.libsonnet +++ b/operations/mimir-mixin/dashboards/compactor.libsonnet @@ -255,6 +255,18 @@ local fixTargetsForTransformations(panel, refIds) = panel { ||| ), ) + .addPanel( + $.timeseriesPanel('Source blocks age') + + $.latencyPanel('cortex_compactor_block_max_time_delta_seconds', '{%s}' % $.jobMatcher($._config.job_names.compactor)) + + $.panelDescription( + 'Source blocks age', + ||| + The difference between the maximum timestamp of the block being compacted and the current time. + A steadily increasing value indicates that the compactor cannot keep up with the produced blocks by the ingesters. + Increase the number of compactors when this value is consistently increasing. + ||| + ), + ) .addPanel( $.timeseriesPanel('TSDB compactions / sec') + $.queryPanel('sum(rate(prometheus_tsdb_compactions_total{%s}[$__rate_interval]))' % $.jobMatcher($._config.job_names.compactor), 'compactions') +