Skip to content

Commit

Permalink
dashboards: add block max time delta panel (#9697)
Browse files Browse the repository at this point in the history
* dashboards: add block max time delta panel

This helps identify when the compactor is falling behind

Signed-off-by: Dimitar Dimitrov <dimitar.dimitrov@grafana.com>

* Add CHANGELOG.md entry

Signed-off-by: Dimitar Dimitrov <dimitar.dimitrov@grafana.com>

---------

Signed-off-by: Dimitar Dimitrov <dimitar.dimitrov@grafana.com>
  • Loading branch information
dimitarvdimitrov authored Oct 22, 2024
1 parent 824e0e1 commit a479a81
Show file tree
Hide file tree
Showing 5 changed files with 316 additions and 63 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@

* [CHANGE] Remove backwards compatibility for `thanos_memcached_` prefixed metrics in dashboards and alerts removed in 2.12. #9674
* [ENHANCEMENT] Unify ingester autoscaling panels on 'Mimir / Writes' dashboard to work for both ingest-storage and non-ingest-storage autoscaling. #9617
* [ENHANCEMENT] Dashboards: visualize the age of source blocks in the "Mimir / Compactor" dashboard. #9697
* [BUGFIX] Dashboards: Fix autoscaling metrics joins when series churn. #9412 #9450 #9432
* [BUGFIX] Alerts: Fix autoscaling metrics joins in `MimirAutoscalerNotActive` when series churn. #9412
* [BUGFIX] Alerts: Exclude failed cache "add" operations from alerting since failures are expected in normal operation. #9658
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4688,7 +4688,7 @@ data:
"sort": "none"
}
},
"span": 4,
"span": 3,
"targets": [
{
"expr": "sum(cortex_bucket_index_estimated_compaction_jobs{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}) and (sum(rate(cortex_bucket_index_estimated_compaction_jobs_errors_total{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) == 0)",
Expand All @@ -4700,6 +4700,86 @@ data:
"title": "Estimated Compaction Jobs",
"type": "timeseries"
},
{
"datasource": "$datasource",
"description": "### Source blocks age\nThe difference between the maximum timestamp of the block being compacted and the current time.\nA steadily increasing value indicates that the compactor cannot keep up with the produced blocks by the ingesters.\nIncrease the number of compactors when this value is consistently increasing.\n\n",
"fieldConfig": {
"defaults": {
"custom": {
"drawStyle": "line",
"fillOpacity": 1,
"lineWidth": 1,
"pointSize": 5,
"showPoints": "never",
"spanNulls": false,
"stacking": {
"group": "A",
"mode": "none"
}
},
"min": 0,
"thresholds": {
"mode": "absolute",
"steps": [ ]
},
"unit": "ms"
},
"overrides": [ ]
},
"id": 6,
"links": [ ],
"nullPointMode": "null as zero",
"options": {
"legend": {
"showLegend": true
},
"tooltip": {
"mode": "multi",
"sort": "none"
}
},
"span": 3,
"targets": [
{
"expr": "histogram_quantile(0.99, sum(rate(cortex_compactor_block_max_time_delta_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3",
"format": "time_series",
"legendFormat": "99th Percentile",
"refId": "A"
},
{
"expr": "histogram_quantile(0.50, sum(rate(cortex_compactor_block_max_time_delta_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3",
"format": "time_series",
"legendFormat": "50th Percentile",
"refId": "B"
},
{
"expr": "sum(rate(cortex_compactor_block_max_time_delta_seconds_sum{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) * 1e3 / sum(rate(cortex_compactor_block_max_time_delta_seconds_count{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))",
"format": "time_series",
"legendFormat": "Average",
"refId": "C"
}
],
"title": "Source blocks age",
"type": "timeseries",
"yaxes": [
{
"format": "ms",
"label": null,
"logBase": 1,
"max": null,
"min": 0,
"show": true
},
{
"format": "short",
"label": null,
"logBase": 1,
"max": null,
"min": null,
"show": false
}
]
},
{
"datasource": "$datasource",
"description": "### TSDB compactions / sec\nRate of TSDB compactions. Single TSDB compaction takes one or more input blocks and produces one or more (during \"split\" phase) output blocks.\n\n",
Expand All @@ -4726,7 +4806,7 @@ data:
},
"overrides": [ ]
},
"id": 6,
"id": 7,
"links": [ ],
"options": {
"legend": {
Expand All @@ -4737,7 +4817,7 @@ data:
"sort": "none"
}
},
"span": 4,
"span": 3,
"targets": [
{
"expr": "sum(rate(prometheus_tsdb_compactions_total{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval]))",
Expand Down Expand Up @@ -4775,7 +4855,7 @@ data:
},
"overrides": [ ]
},
"id": 7,
"id": 8,
"links": [ ],
"nullPointMode": "null as zero",
"options": {
Expand All @@ -4787,7 +4867,7 @@ data:
"sort": "none"
}
},
"span": 4,
"span": 3,
"targets": [
{
"expr": "histogram_quantile(0.99, sum(rate(prometheus_tsdb_compaction_duration_seconds_bucket{cluster=~\"$cluster\", job=~\"($namespace)/((compactor.*|cortex|mimir|mimir-backend.*))\"}[$__rate_interval])) by (le)) * 1e3",
Expand Down Expand Up @@ -4866,7 +4946,7 @@ data:
},
"overrides": [ ]
},
"id": 8,
"id": 9,
"links": [ ],
"options": {
"legend": {
Expand Down Expand Up @@ -4915,7 +4995,7 @@ data:
},
"overrides": [ ]
},
"id": 9,
"id": 10,
"links": [ ],
"options": {
"legend": {
Expand Down Expand Up @@ -4975,7 +5055,7 @@ data:
},
"overrides": [ ]
},
"id": 10,
"id": 11,
"links": [ ],
"options": {
"legend": {
Expand Down Expand Up @@ -5054,7 +5134,7 @@ data:
}
]
},
"id": 11,
"id": 12,
"links": [ ],
"options": {
"legend": {
Expand Down Expand Up @@ -5151,7 +5231,7 @@ data:
}
]
},
"id": 12,
"id": 13,
"links": [ ],
"options": {
"legend": {
Expand Down Expand Up @@ -5205,7 +5285,7 @@ data:
},
"overrides": [ ]
},
"id": 13,
"id": 14,
"links": [ ],
"nullPointMode": "null as zero",
"options": {
Expand Down Expand Up @@ -5296,7 +5376,7 @@ data:
},
"overrides": [ ]
},
"id": 14,
"id": 15,
"links": [ ],
"options": {
"legend": {
Expand Down Expand Up @@ -5329,7 +5409,7 @@ data:
"unit": "percentunit"
}
},
"id": 15,
"id": 16,
"links": [ ],
"options": {
"legend": {
Expand Down Expand Up @@ -5377,7 +5457,7 @@ data:
},
"overrides": [ ]
},
"id": 16,
"id": 17,
"links": [ ],
"nullPointMode": "null as zero",
"options": {
Expand Down Expand Up @@ -5456,7 +5536,7 @@ data:
},
"overrides": [ ]
},
"id": 17,
"id": 18,
"links": [ ],
"nullPointMode": "null as zero",
"options": {
Expand Down Expand Up @@ -5547,7 +5627,7 @@ data:
},
"overrides": [ ]
},
"id": 18,
"id": 19,
"links": [ ],
"nullPointMode": "null as zero",
"options": {
Expand Down Expand Up @@ -5626,7 +5706,7 @@ data:
},
"overrides": [ ]
},
"id": 19,
"id": 20,
"links": [ ],
"nullPointMode": "null as zero",
"options": {
Expand Down Expand Up @@ -5705,7 +5785,7 @@ data:
},
"overrides": [ ]
},
"id": 20,
"id": 21,
"links": [ ],
"nullPointMode": "null as zero",
"options": {
Expand Down Expand Up @@ -5784,7 +5864,7 @@ data:
},
"overrides": [ ]
},
"id": 21,
"id": 22,
"links": [ ],
"nullPointMode": "null as zero",
"options": {
Expand Down Expand Up @@ -6011,7 +6091,7 @@ data:
}
]
},
"id": 22,
"id": 23,
"links": [ ],
"options": {
"legend": {
Expand Down Expand Up @@ -6059,7 +6139,7 @@ data:
},
"overrides": [ ]
},
"id": 23,
"id": 24,
"links": [ ],
"nullPointMode": "null as zero",
"options": {
Expand Down
Loading

0 comments on commit a479a81

Please sign in to comment.