diff --git a/CHANGELOG.md b/CHANGELOG.md index 212b30a8a6c..b17a4a643ec 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -77,6 +77,7 @@ * [BUGFIX] Dashboards: Fix autoscaling metrics joins when series churn. #9412 #9450 #9432 * [BUGFIX] Alerts: Fix autoscaling metrics joins in `MimirAutoscalerNotActive` when series churn. #9412 * [BUGFIX] Alerts: Exclude failed cache "add" operations from alerting since failures are expected in normal operation. #9658 +* [BUGFIX] Alerts: Exclude read-only replicas from `IngesterInstanceHasNoTenants` alert. #9843 ### Jsonnet diff --git a/operations/helm/tests/metamonitoring-values-generated/mimir-distributed/templates/metamonitoring/mixin-alerts.yaml b/operations/helm/tests/metamonitoring-values-generated/mimir-distributed/templates/metamonitoring/mixin-alerts.yaml index 056f25bba7d..b45db52d7ef 100644 --- a/operations/helm/tests/metamonitoring-values-generated/mimir-distributed/templates/metamonitoring/mixin-alerts.yaml +++ b/operations/helm/tests/metamonitoring-values-generated/mimir-distributed/templates/metamonitoring/mixin-alerts.yaml @@ -176,7 +176,11 @@ spec: message: Mimir ingester {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace }} has no tenants assigned. runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringesterinstancehasnotenants expr: | - (min by(cluster, namespace, pod) (cortex_ingester_memory_users) == 0) + ( + (min by(cluster, namespace, pod) (cortex_ingester_memory_users) == 0) + unless + (max by(cluster, namespace, pod) (cortex_lifecycler_read_only) > 0) + ) and on (cluster, namespace) # Only if there are more timeseries than would be expected due to continuous testing load ( diff --git a/operations/mimir-mixin-compiled-baremetal/alerts.yaml b/operations/mimir-mixin-compiled-baremetal/alerts.yaml index 9b605e56d6d..4f87df5ba22 100644 --- a/operations/mimir-mixin-compiled-baremetal/alerts.yaml +++ b/operations/mimir-mixin-compiled-baremetal/alerts.yaml @@ -164,7 +164,11 @@ groups: message: Mimir ingester {{ $labels.instance }} in {{ $labels.cluster }}/{{ $labels.namespace }} has no tenants assigned. runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringesterinstancehasnotenants expr: | - (min by(cluster, namespace, instance) (cortex_ingester_memory_users) == 0) + ( + (min by(cluster, namespace, instance) (cortex_ingester_memory_users) == 0) + unless + (max by(cluster, namespace, instance) (cortex_lifecycler_read_only) > 0) + ) and on (cluster, namespace) # Only if there are more timeseries than would be expected due to continuous testing load ( diff --git a/operations/mimir-mixin-compiled/alerts.yaml b/operations/mimir-mixin-compiled/alerts.yaml index 3d1a5d3352d..dc7cdd4e8eb 100644 --- a/operations/mimir-mixin-compiled/alerts.yaml +++ b/operations/mimir-mixin-compiled/alerts.yaml @@ -164,7 +164,11 @@ groups: message: Mimir ingester {{ $labels.pod }} in {{ $labels.cluster }}/{{ $labels.namespace }} has no tenants assigned. runbook_url: https://grafana.com/docs/mimir/latest/operators-guide/mimir-runbooks/#mimiringesterinstancehasnotenants expr: | - (min by(cluster, namespace, pod) (cortex_ingester_memory_users) == 0) + ( + (min by(cluster, namespace, pod) (cortex_ingester_memory_users) == 0) + unless + (max by(cluster, namespace, pod) (cortex_lifecycler_read_only) > 0) + ) and on (cluster, namespace) # Only if there are more timeseries than would be expected due to continuous testing load ( diff --git a/operations/mimir-mixin/alerts/alerts.libsonnet b/operations/mimir-mixin/alerts/alerts.libsonnet index 63118434fa8..f66af7829a6 100644 --- a/operations/mimir-mixin/alerts/alerts.libsonnet +++ b/operations/mimir-mixin/alerts/alerts.libsonnet @@ -296,7 +296,11 @@ local utils = import 'mixin-utils/utils.libsonnet'; alert: $.alertName('IngesterInstanceHasNoTenants'), 'for': '1h', expr: ||| - (min by(%(alert_aggregation_labels)s, %(per_instance_label)s) (cortex_ingester_memory_users) == 0) + ( + (min by(%(alert_aggregation_labels)s, %(per_instance_label)s) (cortex_ingester_memory_users) == 0) + unless + (max by(%(alert_aggregation_labels)s, %(per_instance_label)s) (cortex_lifecycler_read_only) > 0) + ) and on (%(alert_aggregation_labels)s) # Only if there are more timeseries than would be expected due to continuous testing load (