From ef0a8f9f2dbdb59e9a972efb18289ec584172455 Mon Sep 17 00:00:00 2001 From: Coleen Iona Quadros Date: Tue, 4 Nov 2025 17:29:38 +0100 Subject: [PATCH 01/14] add multiple sources for inhibition Signed-off-by: Coleen Iona Quadros --- config/config.go | 7 + inhibit/inhibit.go | 55 +++- inhibit/inhibit_bench_test.go | 16 +- inhibit/inhibit_test.go | 24 +- inhibit/metric_test.go | 517 ++++++++++++++++++++++++++++++++++ 5 files changed, 595 insertions(+), 24 deletions(-) create mode 100644 inhibit/metric_test.go diff --git a/config/config.go b/config/config.go index 4f8d803e63..bcc66ff185 100644 --- a/config/config.go +++ b/config/config.go @@ -1002,6 +1002,11 @@ func (r *Route) UnmarshalYAML(unmarshal func(any) error) error { return nil } +type Source struct { + SrcMatchers Matchers `yaml:"matchers,omitempty" json:"matchers,omitempty"` + Equal []string `yaml:"equal,omitempty" json:"equal,omitempty"` +} + // InhibitRule defines an inhibition rule that mutes alerts that match the // target labels if an alert matching the source labels exists. // Both alerts have to have a set of labels being equal. @@ -1016,6 +1021,8 @@ type InhibitRule struct { SourceMatchRE MatchRegexps `yaml:"source_match_re,omitempty" json:"source_match_re,omitempty"` // SourceMatchers defines a set of label matchers that have to be fulfilled for source alerts. SourceMatchers Matchers `yaml:"source_matchers,omitempty" json:"source_matchers,omitempty"` + // Sources defines a set of source matchers and equal labels. + Sources []Source `yaml:"source,omitempty" json:"source,omitempty"` // TargetMatch defines a set of labels that have to equal the given // value for target alerts. Deprecated. Remove before v1.0 release. TargetMatch map[string]string `yaml:"target_match,omitempty" json:"target_match,omitempty"` diff --git a/inhibit/inhibit.go b/inhibit/inhibit.go index 6e0a61f40e..85ae58db5a 100644 --- a/inhibit/inhibit.go +++ b/inhibit/inhibit.go @@ -16,6 +16,7 @@ package inhibit import ( "context" "log/slog" + "strings" "sync" "time" @@ -74,6 +75,7 @@ func NewInhibitor(ap provider.Alerts, rs []config.InhibitRule, mk types.AlertMar ruleNames[cr.Name] = struct{}{} } } + return ih } @@ -114,21 +116,46 @@ func (ih *Inhibitor) processAlert(ctx context.Context, a *types.Alert) { trace.WithSpanKind(trace.SpanKindInternal), ) defer span.End() - - // Update the inhibition rules' cache. - for _, r := range ih.rules { - if r.SourceMatchers.Matches(a.Labels) { - attr := attribute.String("alerting.inhibit_rule.name", r.Name) - span.AddEvent("alert matched rule source", trace.WithAttributes(attr)) - if err := r.scache.Set(a); err != nil { - message := "error on set alert" - ih.logger.Error(message, "err", err) - span.SetStatus(codes.Error, message) - span.RecordError(err) - continue + // Update the inhibition rules' cache. + cachedSum := 0 + indexedSum := 0 + for _, r := range ih.rules { + if len(r.Sources) > 0 { + allSrcMatched := true + for _, src := range r.Sources { + if !src.SrcMatchers.Matches(a.Labels) { + allSrcMatched = false + break + } else { + attr := attribute.String("alerting.inhibit_rule.name", r.Name) + span.AddEvent("alert matched rule source", trace.WithAttributes(attr)) + if err := src.scache.Set(a); err != nil { + message := "error on set alert" + ih.logger.Error(message, "err", err) + span.SetStatus(codes.Error, message) + span.RecordError(err) + continue + } + span.SetAttributes(attr) + } + if allSrcMatched { + if err := r.scache.Set(a); err != nil { + ih.logger.Error("error on set alert", "err", err) + continue + } + r.updateIndex(a) + } + } else { + if r.SourceMatchers.Matches(a.Labels) { + if err := r.scache.Set(a); err != nil { + ih.logger.Error("error on set alert", "err", err) + continue + } + r.updateIndex(a) + } + } + ih.processAlert(a) } - span.SetAttributes(attr) - r.updateIndex(a) } } } diff --git a/inhibit/inhibit_bench_test.go b/inhibit/inhibit_bench_test.go index 8f34443f7a..95832bc6e1 100644 --- a/inhibit/inhibit_bench_test.go +++ b/inhibit/inhibit_bench_test.go @@ -109,8 +109,12 @@ func allRulesMatchBenchmark(b *testing.B, numInhibitionRules, numInhibitingAlert n: numInhibitionRules, newRuleFunc: func(idx int) config.InhibitRule { return config.InhibitRule{ - SourceMatchers: config.Matchers{ - mustNewMatcher(b, labels.MatchEqual, "src", strconv.Itoa(idx)), + Sources: []config.Source{ + { + SrcMatchers: config.Matchers{ + mustNewMatcher(b, labels.MatchEqual, "src", strconv.Itoa(idx)), + }, + }, }, TargetMatchers: config.Matchers{ mustNewMatcher(b, labels.MatchEqual, "dst", "0"), @@ -152,8 +156,12 @@ func lastRuleMatchesBenchmark(b *testing.B, n int) benchmarkOptions { n: n, newRuleFunc: func(idx int) config.InhibitRule { return config.InhibitRule{ - SourceMatchers: config.Matchers{ - mustNewMatcher(b, labels.MatchEqual, "src", strconv.Itoa(idx)), + Sources: []config.Source{ + { + SrcMatchers: config.Matchers{ + mustNewMatcher(b, labels.MatchEqual, "src", strconv.Itoa(idx)), + }, + }, }, TargetMatchers: config.Matchers{ mustNewMatcher(b, labels.MatchEqual, "dst", "0"), diff --git a/inhibit/inhibit_test.go b/inhibit/inhibit_test.go index 9c4cc5fb80..7207785fd7 100644 --- a/inhibit/inhibit_test.go +++ b/inhibit/inhibit_test.go @@ -250,12 +250,20 @@ func TestInhibitRuleMatchers(t *testing.T) { t.Parallel() rule1 := config.InhibitRule{ - SourceMatchers: config.Matchers{&labels.Matcher{Type: labels.MatchEqual, Name: "s1", Value: "1"}}, + Sources: []config.Source{ + { + SrcMatchers: config.Matchers{&labels.Matcher{Type: labels.MatchEqual, Name: "s1", Value: "1"}}, + }, + }, TargetMatchers: config.Matchers{&labels.Matcher{Type: labels.MatchNotEqual, Name: "t1", Value: "1"}}, Equal: []string{"e"}, } rule2 := config.InhibitRule{ - SourceMatchers: config.Matchers{&labels.Matcher{Type: labels.MatchEqual, Name: "s2", Value: "1"}}, + Sources: []config.Source{ + { + SrcMatchers: config.Matchers{&labels.Matcher{Type: labels.MatchEqual, Name: "s2", Value: "1"}}, + }, + }, TargetMatchers: config.Matchers{&labels.Matcher{Type: labels.MatchEqual, Name: "t2", Value: "1"}}, Equal: []string{"e"}, } @@ -352,8 +360,10 @@ func TestInhibitRuleName(t *testing.T) { config1 := config.InhibitRule{ Name: "test-rule", - SourceMatchers: []*labels.Matcher{ - {Type: labels.MatchEqual, Name: "severity", Value: "critical"}, + Sources: []config.Source{ + { + SrcMatchers: config.Matchers{&labels.Matcher{Type: labels.MatchEqual, Name: "severity", Value: "critical"}}, + }, }, TargetMatchers: []*labels.Matcher{ {Type: labels.MatchEqual, Name: "severity", Value: "warning"}, @@ -361,8 +371,10 @@ func TestInhibitRuleName(t *testing.T) { Equal: []string{"instance"}, } config2 := config.InhibitRule{ - SourceMatchers: []*labels.Matcher{ - {Type: labels.MatchEqual, Name: "severity", Value: "critical"}, + Sources: []config.Source{ + { + SrcMatchers: config.Matchers{&labels.Matcher{Type: labels.MatchEqual, Name: "severity", Value: "critical"}}, + }, }, TargetMatchers: []*labels.Matcher{ {Type: labels.MatchEqual, Name: "severity", Value: "warning"}, diff --git a/inhibit/metric_test.go b/inhibit/metric_test.go new file mode 100644 index 0000000000..48007b29de --- /dev/null +++ b/inhibit/metric_test.go @@ -0,0 +1,517 @@ +// Copyright The Prometheus Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package inhibit + +import ( + "testing" + "time" + + "github.com/prometheus/client_golang/prometheus" + io_prometheus_client "github.com/prometheus/client_model/go" + "github.com/prometheus/common/model" + "github.com/stretchr/testify/require" + + "github.com/prometheus/alertmanager/config" + "github.com/prometheus/alertmanager/pkg/labels" + "github.com/prometheus/alertmanager/provider/mem" + "github.com/prometheus/alertmanager/types" +) + +// getMetricValue retrieves a specific metric value from the registry. +func getMetricValue(t *testing.T, reg *prometheus.Registry, metricName string, labels map[string]string) (float64, uint64, bool) { + t.Helper() + metricFamilies, err := reg.Gather() + require.NoError(t, err) + + for _, mf := range metricFamilies { + if mf.GetName() != metricName { + continue + } + for _, metric := range mf.GetMetric() { + if labelsMatch(metric, labels) { + if mf.GetType() == io_prometheus_client.MetricType_GAUGE { + return metric.GetGauge().GetValue(), 0, true + } + if mf.GetType() == io_prometheus_client.MetricType_SUMMARY { + return 0, metric.GetSummary().GetSampleCount(), true + } + } + } + } + return 0, 0, false +} + +func labelsMatch(metric *io_prometheus_client.Metric, wantLabels map[string]string) bool { + for wantKey, wantVal := range wantLabels { + found := false + for _, labelPair := range metric.GetLabel() { + if labelPair.GetName() == wantKey && labelPair.GetValue() == wantVal { + found = true + break + } + } + if !found { + return false + } + } + return true +} + +func TestInhibitorMetrics_RuleMatchesDuration(t *testing.T) { + reg := prometheus.NewRegistry() + metrics := NewInhibitorMetrics(reg) + + rules := []config.InhibitRule{ + { + Name: "test-rule", + Sources: []config.Source{ + { + SrcMatchers: config.Matchers{&labels.Matcher{Type: labels.MatchEqual, Name: "severity", Value: "critical"}}, + }, + }, + TargetMatchers: []*labels.Matcher{ + {Type: labels.MatchEqual, Name: "severity", Value: "warning"}, + }, + }, + } + + marker := types.NewMarker(reg) + inhibitor := NewInhibitor(nil, rules, marker, nopLogger, metrics) + + // Test case 1: Target matches (should record matched="true") + targetAlert := model.LabelSet{ + "severity": "warning", + "instance": "server1", + } + inhibitor.Mutes(targetAlert) + + _, count, found := getMetricValue(t, reg, "alertmanager_inhibit_rule_matches_duration_seconds", + map[string]string{"rule": "test-rule", "matched": "true"}) + require.True(t, found, "Should find matched=true metric") + require.Equal(t, uint64(1), count, "Should have 1 sample for matched=true") + + // Test case 2: Target doesn't match (should record matched="false") + nonMatchingAlert := model.LabelSet{ + "severity": "info", + "instance": "server2", + } + inhibitor.Mutes(nonMatchingAlert) + + _, count, found = getMetricValue(t, reg, "alertmanager_inhibit_rule_matches_duration_seconds", + map[string]string{"rule": "test-rule", "matched": "false"}) + require.True(t, found, "Should find matched=false metric") + require.Equal(t, uint64(1), count, "Should have 1 sample for matched=false") +} + +func TestInhibitorMetrics_RuleMutesDuration_Muted(t *testing.T) { + reg := prometheus.NewRegistry() + metrics := NewInhibitorMetrics(reg) + + rules := []config.InhibitRule{ + { + Name: "test-rule", + Sources: []config.Source{ + { + SrcMatchers: config.Matchers{&labels.Matcher{Type: labels.MatchEqual, Name: "severity", Value: "critical"}}, + }, + }, + TargetMatchers: []*labels.Matcher{ + {Type: labels.MatchEqual, Name: "severity", Value: "warning"}, + }, + }, + } + + marker := types.NewMarker(reg) + inhibitor := NewInhibitor(nil, rules, marker, nopLogger, metrics) + + // Add a source alert that will inhibit + sourceAlert := &types.Alert{ + Alert: model.Alert{ + Labels: model.LabelSet{ + "severity": "critical", + "instance": "server1", + }, + StartsAt: time.Now().Add(-time.Minute), + EndsAt: time.Now().Add(time.Hour), + }, + } + inhibitor.rules[0].scache.Set(sourceAlert) + inhibitor.rules[0].updateIndex(sourceAlert) + + // Test that target alert is muted + targetAlert := model.LabelSet{ + "severity": "warning", + "instance": "server1", + } + muted := inhibitor.Mutes(targetAlert) + require.True(t, muted, "Alert should be muted") + + // Verify per-rule muted="true" metric was recorded + _, count, found := getMetricValue(t, reg, "alertmanager_inhibit_rule_mutes_duration_seconds", + map[string]string{"rule": "test-rule", "muted": "true"}) + require.True(t, found, "Should find per-rule muted=true metric") + require.Equal(t, uint64(1), count, "Should have 1 sample for per-rule muted=true") + + // Verify global muted="true" metric was recorded + _, count, found = getMetricValue(t, reg, "alertmanager_inhibitor_mutes_duration_seconds", + map[string]string{"muted": "true"}) + require.True(t, found, "Should find global muted=true metric") + require.Equal(t, uint64(1), count, "Should have 1 sample for global muted=true") +} + +func TestInhibitorMetrics_RuleMutesDuration_NotMuted(t *testing.T) { + reg := prometheus.NewRegistry() + metrics := NewInhibitorMetrics(reg) + + rules := []config.InhibitRule{ + { + Name: "test-rule", + Sources: []config.Source{ + { + SrcMatchers: config.Matchers{&labels.Matcher{Type: labels.MatchEqual, Name: "severity", Value: "critical"}}, + }, + }, + TargetMatchers: []*labels.Matcher{ + {Type: labels.MatchEqual, Name: "severity", Value: "warning"}, + }, + Equal: []string{"instance"}, + }, + } + + marker := types.NewMarker(reg) + inhibitor := NewInhibitor(nil, rules, marker, nopLogger, metrics) + + // Add a source alert with different instance + sourceAlert := &types.Alert{ + Alert: model.Alert{ + Labels: model.LabelSet{ + "severity": "critical", + "instance": "server1", + }, + StartsAt: time.Now().Add(-time.Minute), + EndsAt: time.Now().Add(time.Hour), + }, + } + inhibitor.rules[0].scache.Set(sourceAlert) + + // Test that target alert with different instance is NOT muted + targetAlert := model.LabelSet{ + "severity": "warning", + "instance": "server2", + } + muted := inhibitor.Mutes(targetAlert) + require.False(t, muted, "Alert should not be muted") + + // Verify per-rule muted="false" metric was recorded + _, count, found := getMetricValue(t, reg, "alertmanager_inhibit_rule_mutes_duration_seconds", + map[string]string{"rule": "test-rule", "muted": "false"}) + require.True(t, found, "Should find per-rule muted=false metric") + require.Equal(t, uint64(1), count, "Should have 1 sample for per-rule muted=false") + + // Verify global muted="false" metric was recorded + _, count, found = getMetricValue(t, reg, "alertmanager_inhibitor_mutes_duration_seconds", + map[string]string{"muted": "false"}) + require.True(t, found, "Should find global muted=false metric") + require.Equal(t, uint64(1), count, "Should have 1 sample for global muted=false") +} + +func TestInhibitorMetrics_NoRuleMatches(t *testing.T) { + reg := prometheus.NewRegistry() + metrics := NewInhibitorMetrics(reg) + + rules := []config.InhibitRule{ + { + Name: "test-rule", + Sources: []config.Source{ + { + SrcMatchers: config.Matchers{&labels.Matcher{Type: labels.MatchEqual, Name: "severity", Value: "critical"}}, + }, + }, + TargetMatchers: []*labels.Matcher{ + {Type: labels.MatchEqual, Name: "severity", Value: "warning"}, + }, + Equal: []string{"instance"}, + }, + } + + marker := types.NewMarker(reg) + inhibitor := NewInhibitor(nil, rules, marker, nopLogger, metrics) + + // Test with alert that doesn't match any rule's target + nonMatchingAlert := model.LabelSet{ + "severity": "info", + "instance": "server1", + } + muted := inhibitor.Mutes(nonMatchingAlert) + require.False(t, muted, "Alert should not be muted") + + // Verify that global muted="false" metric was recorded + _, count, found := getMetricValue(t, reg, "alertmanager_inhibitor_mutes_duration_seconds", + map[string]string{"muted": "false"}) + require.True(t, found, "Should find global muted=false metric") + require.Equal(t, uint64(1), count, "Should have 1 sample for global muted=false") + + // Verify per-rule matched="false" was recorded + _, count, found = getMetricValue(t, reg, "alertmanager_inhibit_rule_matches_duration_seconds", + map[string]string{"rule": "test-rule", "matched": "false"}) + require.True(t, found, "Should find rule matched=false metric") + require.Equal(t, uint64(1), count, "Should have 1 sample for rule matched=false") +} + +func TestInhibitorMetrics_MultipleRules(t *testing.T) { + reg := prometheus.NewRegistry() + metrics := NewInhibitorMetrics(reg) + + rules := []config.InhibitRule{ + { + Name: "rule-1", + Sources: []config.Source{ + { + SrcMatchers: config.Matchers{&labels.Matcher{Type: labels.MatchEqual, Name: "severity", Value: "critical"}}, + }, + }, + TargetMatchers: []*labels.Matcher{ + {Type: labels.MatchEqual, Name: "severity", Value: "warning"}, + }, + Equal: []string{"instance"}, + }, + { + Name: "rule-2", + Sources: []config.Source{ + { + SrcMatchers: config.Matchers{&labels.Matcher{Type: labels.MatchEqual, Name: "team", Value: "sre"}}, + }, + }, + TargetMatchers: []*labels.Matcher{ + {Type: labels.MatchEqual, Name: "team", Value: "dev"}, + }, + Equal: []string{"service"}, + }, + } + + marker := types.NewMarker(reg) + inhibitor := NewInhibitor(nil, rules, marker, nopLogger, metrics) + + // Add source alert for rule-1 + sourceAlert1 := &types.Alert{ + Alert: model.Alert{ + Labels: model.LabelSet{ + "severity": "critical", + "instance": "server1", + }, + StartsAt: time.Now().Add(-time.Minute), + EndsAt: time.Now().Add(time.Hour), + }, + } + inhibitor.rules[0].scache.Set(sourceAlert1) + inhibitor.rules[0].updateIndex(sourceAlert1) + + // Test alert that matches rule-1 + targetAlert1 := model.LabelSet{ + "severity": "warning", + "instance": "server1", + } + muted1 := inhibitor.Mutes(targetAlert1) + require.True(t, muted1, "Alert should be muted by rule-1") + + // Verify metrics for rule-1 + _, count, found := getMetricValue(t, reg, "alertmanager_inhibit_rule_matches_duration_seconds", + map[string]string{"rule": "rule-1", "matched": "true"}) + require.True(t, found, "Should find rule-1 matched=true metric") + require.Equal(t, 1, int(count)) + + _, count, found = getMetricValue(t, reg, "alertmanager_inhibit_rule_mutes_duration_seconds", + map[string]string{"rule": "rule-1", "muted": "true"}) + require.True(t, found, "Should find rule-1 muted=true metric") + require.Equal(t, 1, int(count)) + + // Verify global muted="true" metric + _, count, found = getMetricValue(t, reg, "alertmanager_inhibitor_mutes_duration_seconds", + map[string]string{"muted": "true"}) + require.True(t, found, "Should find global muted=true metric") + require.Equal(t, 1, int(count)) + + // Test alert that matches rule-2 target but has no source + targetAlert2 := model.LabelSet{ + "team": "dev", + "service": "api", + } + muted2 := inhibitor.Mutes(targetAlert2) + require.False(t, muted2, "Alert should not be muted") + + // Verify metrics for rule-2 (both rules process this alert since rule-1 doesn't match target) + _, count, found = getMetricValue(t, reg, "alertmanager_inhibit_rule_matches_duration_seconds", + map[string]string{"rule": "rule-1", "matched": "false"}) + require.True(t, found, "Should find rule-1 matched=false metric") + require.Equal(t, 1, int(count)) + + _, count, found = getMetricValue(t, reg, "alertmanager_inhibit_rule_matches_duration_seconds", + map[string]string{"rule": "rule-2", "matched": "true"}) + require.True(t, found, "Should find rule-2 matched=true metric") + require.Equal(t, 1, int(count)) + + _, count, found = getMetricValue(t, reg, "alertmanager_inhibit_rule_mutes_duration_seconds", + map[string]string{"rule": "rule-2", "muted": "false"}) + require.True(t, found, "Should find rule-2 muted=false metric") + require.Equal(t, 1, int(count)) + + // Verify global muted="false" metric + _, count, found = getMetricValue(t, reg, "alertmanager_inhibitor_mutes_duration_seconds", + map[string]string{"muted": "false"}) + require.True(t, found, "Should find global muted=false metric") + require.Equal(t, 1, int(count), "Should have 1 samples") +} + +func TestInhibitorMetrics_CacheAndIndexItems(t *testing.T) { + reg := prometheus.NewRegistry() + metrics := NewInhibitorMetrics(reg) + + rules := []config.InhibitRule{ + { + Name: "named-rule", + Sources: []config.Source{ + { + SrcMatchers: config.Matchers{&labels.Matcher{Type: labels.MatchEqual, Name: "severity", Value: "critical"}}, + }, + }, + TargetMatchers: []*labels.Matcher{ + {Type: labels.MatchEqual, Name: "severity", Value: "warning"}, + }, + Equal: []string{"instance"}, + }, + { + Sources: []config.Source{ + { + SrcMatchers: config.Matchers{&labels.Matcher{Type: labels.MatchEqual, Name: "severity", Value: "critical"}}, + }, + }, + TargetMatchers: []*labels.Matcher{ + {Type: labels.MatchEqual, Name: "severity", Value: "warning"}, + }, + Equal: []string{"cluster"}, + }, + } + + marker := types.NewMarker(reg) + provider, err := mem.NewAlerts(t.Context(), marker, 15*time.Minute, nil, nopLogger, reg) + require.NoError(t, err) + inhibitor := NewInhibitor(provider, rules, marker, nopLogger, metrics) + go inhibitor.Run() + + // Add multiple source alerts + for i := 1; i <= 3; i++ { + sourceAlert := &types.Alert{ + Alert: model.Alert{ + Labels: model.LabelSet{ + "severity": "critical", + "instance": model.LabelValue("server" + string(rune('0'+i))), + "cluster": model.LabelValue("cluster" + string(rune('0'+i))), + }, + StartsAt: time.Now().Add(-time.Minute), + EndsAt: time.Now().Add(time.Hour), + }, + } + require.NoError(t, provider.Put(sourceAlert)) + } + + // Wait for the inhibitor to process alerts and update metrics + // The Run() goroutine processes alerts asynchronously + require.Eventually(t, func() bool { + value, _, found := getMetricValue(t, reg, "alertmanager_inhibitor_source_alerts_cache_items", + map[string]string{}) + return found && value == 6 + }, 2*time.Second, 50*time.Millisecond, "Cache items metric should reach 6") + + // Stop the inhibitor + inhibitor.Stop() + + // Global metrics (no labels) show the sum across all rules + value, _, found := getMetricValue(t, reg, "alertmanager_inhibitor_source_alerts_cache_items", + map[string]string{}) + require.True(t, found, "Should find global cache items metric") + require.Equal(t, float64(6), value, "Global cache should contain 6 alerts total") + + value, _, found = getMetricValue(t, reg, "alertmanager_inhibitor_source_alerts_index_items", + map[string]string{}) + require.True(t, found, "Should find global index items metric") + require.Equal(t, float64(6), value, "Global index should contain 6 entries total") + + // Per-rule metrics show individual rule values + value, _, found = getMetricValue(t, reg, "alertmanager_inhibit_rule_source_alerts_cache_items", + map[string]string{"rule": "named-rule"}) + require.True(t, found, "Should find per-rule cache items metric") + require.Equal(t, float64(3), value, "Named rule cache should contain 3 alerts") + + value, _, found = getMetricValue(t, reg, "alertmanager_inhibit_rule_source_alerts_index_items", + map[string]string{"rule": "named-rule"}) + require.True(t, found, "Should find per-rule index items metric") + require.Equal(t, float64(3), value, "Named rule index should contain 3 entries") +} + +func TestInhibitorMetrics_Registration(t *testing.T) { + reg := prometheus.NewRegistry() + metrics := NewInhibitorMetrics(reg) + + require.NotNil(t, metrics, "Metrics should be created") + + // Create a rule and use the metrics so they appear in Gather() output + rules := []config.InhibitRule{ + { + Name: "test-rule", + Sources: []config.Source{ + { + SrcMatchers: config.Matchers{&labels.Matcher{Type: labels.MatchEqual, Name: "severity", Value: "critical"}}, + }, + }, + TargetMatchers: []*labels.Matcher{ + {Type: labels.MatchEqual, Name: "severity", Value: "warning"}, + }, + Equal: []string{"instance"}, + }, + } + + marker := types.NewMarker(reg) + inhibitor := NewInhibitor(nil, rules, marker, nopLogger, metrics) + + // Use the metrics to ensure they show up in Gather() + testAlert := model.LabelSet{ + "severity": "warning", + "instance": "server1", + } + inhibitor.Mutes(testAlert) + + // Verify all metrics are registered and have data + metricFamilies, err := reg.Gather() + require.NoError(t, err) + + registeredMetrics := map[string]bool{ + "alertmanager_inhibitor_source_alerts_cache_items": false, + "alertmanager_inhibitor_source_alerts_index_items": false, + "alertmanager_inhibitor_mutes_duration_seconds": false, + "alertmanager_inhibit_rule_source_alerts_cache_items": false, + "alertmanager_inhibit_rule_source_alerts_index_items": false, + "alertmanager_inhibit_rule_matches_duration_seconds": false, + "alertmanager_inhibit_rule_mutes_duration_seconds": false, + } + + for _, mf := range metricFamilies { + if _, exists := registeredMetrics[mf.GetName()]; exists { + registeredMetrics[mf.GetName()] = true + } + } + + for metricName, registered := range registeredMetrics { + require.True(t, registered, "Metric %s should be registered", metricName) + } +} From b643332650bca8bc5cae8d2a93b8136891cddfa8 Mon Sep 17 00:00:00 2001 From: Coleen Iona Quadros Date: Fri, 7 Nov 2025 16:26:22 +0100 Subject: [PATCH 02/14] refactor and update cache Signed-off-by: Coleen Iona Quadros --- inhibit/inhibit.go | 284 ++++++++++++++++++++++++++++++++-------- inhibit/inhibit_test.go | 188 ++++++++++++++++++++++++-- inhibit/metric_test.go | 38 ++---- 3 files changed, 420 insertions(+), 90 deletions(-) diff --git a/inhibit/inhibit.go b/inhibit/inhibit.go index 85ae58db5a..41a943874b 100644 --- a/inhibit/inhibit.go +++ b/inhibit/inhibit.go @@ -15,6 +15,7 @@ package inhibit import ( "context" + "github.com/prometheus/client_golang/prometheus" "log/slog" "strings" "sync" @@ -115,49 +116,54 @@ func (ih *Inhibitor) processAlert(ctx context.Context, a *types.Alert) { ), trace.WithSpanKind(trace.SpanKindInternal), ) - defer span.End() - // Update the inhibition rules' cache. - cachedSum := 0 - indexedSum := 0 - for _, r := range ih.rules { - if len(r.Sources) > 0 { - allSrcMatched := true - for _, src := range r.Sources { - if !src.SrcMatchers.Matches(a.Labels) { - allSrcMatched = false - break - } else { - attr := attribute.String("alerting.inhibit_rule.name", r.Name) - span.AddEvent("alert matched rule source", trace.WithAttributes(attr)) - if err := src.scache.Set(a); err != nil { - message := "error on set alert" - ih.logger.Error(message, "err", err) - span.SetStatus(codes.Error, message) - span.RecordError(err) - continue - } - span.SetAttributes(attr) - } - if allSrcMatched { - if err := r.scache.Set(a); err != nil { - ih.logger.Error("error on set alert", "err", err) - continue - } - r.updateIndex(a) - } - } else { - if r.SourceMatchers.Matches(a.Labels) { - if err := r.scache.Set(a); err != nil { - ih.logger.Error("error on set alert", "err", err) - continue - } - r.updateIndex(a) + // Update the inhibition rules' cache. + cachedSum := 0 + indexedSum := 0 + cached := 0 + indexed := 0 + for _, r := range ih.rules { + if len(r.Sources) > 0 { + cached = 0 + indexed = 0 + for _, src := range r.Sources { + if src.SrcMatchers.Matches(a.Labels) { + attr := attribute.String("alerting.inhibit_rule.name", r.Name) + span.AddEvent("alert matched rule source", trace.WithAttributes(attr)) + if err := src.scache.Set(a); err != nil { + message := "error on set alert" + ih.logger.Error(message, "err", err) + span.SetStatus(codes.Error, message) + span.RecordError(err) + continue } + src.updateIndex(a) + cached += src.scache.Len() + indexed += src.sindex.Len() + break + } + } + + } else { + if r.SourceMatchers.Matches(a.Labels) { + if err := r.scache.Set(a); err != nil { + ih.logger.Error("error on set alert", "err", err) + continue } - ih.processAlert(a) + r.updateIndex(a) } + cached = r.scache.Len() + indexed = r.sindex.Len() + + } + if r.Name != "" { + r.metrics.sourceAlertsCacheItems.With(prometheus.Labels{"rule": r.Name}).Set(float64(cached)) + r.metrics.sourceAlertsIndexItems.With(prometheus.Labels{"rule": r.Name}).Set(float64(indexed)) } + cachedSum += cached + indexedSum += indexed } + ih.metrics.sourceAlertsCacheItems.Set(float64(cachedSum)) + ih.metrics.sourceAlertsIndexItems.Set(float64(indexedSum)) } func (ih *Inhibitor) WaitForLoading() { @@ -216,7 +222,6 @@ func (ih *Inhibitor) Mutes(ctx context.Context, lset model.LabelSet) bool { ) defer span.End() - now := time.Now() for _, r := range ih.rules { if !r.TargetMatchers.Matches(lset) { // If target side of rule doesn't match, we don't need to look any further. @@ -229,14 +234,46 @@ func (ih *Inhibitor) Mutes(ctx context.Context, lset model.LabelSet) bool { ) // If we are here, the target side matches. If the source side matches, too, we // need to exclude inhibiting alerts for which the same is true. - if inhibitedByFP, eq := r.hasEqual(lset, r.SourceMatchers.Matches(lset), now); eq { - ih.marker.SetInhibited(fp, inhibitedByFP.String()) - span.AddEvent("alert inhibited", - trace.WithAttributes( - attribute.String("alerting.inhibit_rule.source.fingerprint", inhibitedByFP.String()), - ), - ) - return true + + if len(r.Sources) > 0 { + var inhibitorIDs []string + for _, source := range r.Sources { + if inhibitedByFP, eq := source.hasEqual(lset, source.SrcMatchers.Matches(lset), ruleStart, r.TargetMatchers); eq && !source.foundMatch { + inhibitorIDs = append(inhibitorIDs, inhibitedByFP.String()) + source.foundMatch = true + } + } + if allSourcesMatched := r.allSourcesSatisfied(); allSourcesMatched { + compositeInhibitorID := strings.Join(inhibitorIDs, ",") + ih.marker.SetInhibited(fp, compositeInhibitorID) + ih.marker.SetInhibited(fp, inhibitedByFP.String()) + span.AddEvent("alert inhibited", + trace.WithAttributes( + attribute.String("alerting.inhibit_rule.source.fingerprint", compositeInhibitorID.String()), + ), + ) + now := time.Now() + sinceStart := now.Sub(start) + sinceRuleStart := now.Sub(ruleStart) + ih.metrics.mutesDurationMuted.Observe(sinceStart.Seconds()) + r.metrics.mutesDurationMuted.Observe(sinceRuleStart.Seconds()) + return true + } + // Reset for next use. + for _, source := range r.Sources { + source.foundMatch = false + } + + } else { + if inhibitedByFP, eq := r.hasEqual(lset, r.SourceMatchers.Matches(lset), ruleStart); eq { + ih.marker.SetInhibited(fp, inhibitedByFP.String()) + now := time.Now() + sinceStart := now.Sub(start) + sinceRuleStart := now.Sub(ruleStart) + ih.metrics.mutesDurationMuted.Observe(sinceStart.Seconds()) + r.metrics.mutesDurationMuted.Observe(sinceRuleStart.Seconds()) + return true + } } } ih.marker.SetInhibited(fp) @@ -245,6 +282,25 @@ func (ih *Inhibitor) Mutes(ctx context.Context, lset model.LabelSet) bool { return false } +type Source struct { + // The set of Filters which define the group of source alerts (which inhibit + // the target alerts). + SrcMatchers labels.Matchers + // A set of label names whose label values need to be identical in source and + // target alerts in order for the inhibition to take effect. + Equal map[model.LabelName]struct{} + // Cache of alerts matching source labels. + scache *store.Alerts + + // Index of fingerprints of source alert equal labels to fingerprint of source alert. + // The index helps speed up source alert lookups from scache significantely in scenarios with 100s of source alerts cached. + // The index items might overwrite eachother if multiple source alerts have exact equal labels. + // Overwrites only happen if the new source alert has bigger EndsAt value. + sindex *index + + foundMatch bool +} + // An InhibitRule specifies that a class of (source) alerts should inhibit // notifications for another class of (target) alerts if all specified matching // labels are equal between the two alerts. This may be used to inhibit alerts @@ -256,6 +312,8 @@ type InhibitRule struct { // The set of Filters which define the group of source alerts (which inhibit // the target alerts). SourceMatchers labels.Matchers + + Sources []*Source // The set of Filters which define the group of target alerts (which are // inhibited by the source alerts). TargetMatchers labels.Matchers @@ -276,18 +334,35 @@ type InhibitRule struct { // NewInhibitRule returns a new InhibitRule based on a configuration definition. func NewInhibitRule(cr config.InhibitRule) *InhibitRule { var ( + + sources []*Source sourcem labels.Matchers targetm labels.Matchers ) - // cr.SourceMatch will be deprecated. This for loop appends regex matchers. - for ln, lv := range cr.SourceMatch { - matcher, err := labels.NewMatcher(labels.MatchEqual, ln, lv) - if err != nil { - // This error must not happen because the config already validates the yaml. - panic(err) + if len(cr.Sources) > 0 { + for _, sm := range cr.Sources { + var sourcesm labels.Matchers + sourcesm = append(sourcesm, sm.SrcMatchers...) + equal := map[model.LabelName]struct{}{} + for _, ln := range sm.Equal { + equal[model.LabelName(ln)] = struct{}{} + } + sources = append(sources, &Source{ + SrcMatchers: sourcesm, + Equal: equal, + scache: store.NewAlerts(), + sindex: newIndex(), + }) } - sourcem = append(sourcem, matcher) + } else { + for ln, lv := range cr.SourceMatch { + matcher, err := labels.NewMatcher(labels.MatchEqual, ln, lv) + if err != nil { + // This error must not happen because the config already validates the yaml. + panic(err) + } + sourcem = append(sourcem, matcher) } // cr.SourceMatchRE will be deprecated. This for loop appends regex matchers. for ln, lv := range cr.SourceMatchRE { @@ -347,6 +422,15 @@ func (r *InhibitRule) fingerprintEquals(lset model.LabelSet) model.Fingerprint { for n := range r.Equal { equalSet[n] = lset[n] } + + return equalSet.Fingerprint() +} + +func (s *Source) fingerprintEquals(lset model.LabelSet) model.Fingerprint { + equalSet := model.LabelSet{} + for n := range s.Equal { + equalSet[n] = lset[n] + } return equalSet.Fingerprint() } @@ -384,6 +468,39 @@ func (r *InhibitRule) updateIndex(alert *types.Alert) { // If the existing alert resolves after the new alert, do nothing. } +func (src *Source) updateIndex(alert *types.Alert) { + fp := alert.Fingerprint() + // Calculate source labelset subset which is in equals. + eq := src.fingerprintEquals(alert.Labels) + + // Check if the equal labelset is already in the index. + indexed, ok := src.sindex.Get(eq) + if !ok { + // If not, add it. + src.sindex.Set(eq, fp) + return + } + // If the indexed fingerprint is the same as the new fingerprint, do nothing. + if indexed == fp { + return + } + + // New alert and existing index are not the same, compare them. + existing, err := src.scache.Get(indexed) + if err != nil { + // failed to get the existing alert, overwrite the index. + src.sindex.Set(eq, fp) + return + } + + // If the new alert resolves after the existing alert, replace the index. + if existing.ResolvedAt(alert.EndsAt) { + src.sindex.Set(eq, fp) + return + } + // If the existing alert resolves after the new alert, do nothing. +} + // findEqualSourceAlert returns the source alert that matches the equal labels of the given label set. func (r *InhibitRule) findEqualSourceAlert(lset model.LabelSet, now time.Time) (*types.Alert, bool) { equalsFP := r.fingerprintEquals(lset) @@ -404,10 +521,40 @@ func (r *InhibitRule) findEqualSourceAlert(lset model.LabelSet, now time.Time) ( return nil, false } +func (s *Source) findEqualSourceAlert(lset model.LabelSet, now time.Time) (*types.Alert, bool) { + equalsFP := s.fingerprintEquals(lset) + sourceFP, ok := s.sindex.Get(equalsFP) + if ok { + alert, err := s.scache.Get(sourceFP) + if err != nil { + return nil, false + } + + if alert.ResolvedAt(now) { + return nil, false + } + + return alert, true + } + + return nil, false +} + func (r *InhibitRule) gcCallback(alerts []types.Alert) { for _, a := range alerts { - fp := r.fingerprintEquals(a.Labels) - r.sindex.Delete(fp) + if len(r.Sources) > 0 { + for _, src := range r.Sources { + if src.SrcMatchers.Matches(a.Labels) { + fp := src.fingerprintEquals(a.Labels) + src.sindex.Delete(fp) + + break + } + } + } else { + fp := r.fingerprintEquals(a.Labels) + r.sindex.Delete(fp) + } } } @@ -426,3 +573,28 @@ func (r *InhibitRule) hasEqual(lset model.LabelSet, excludeTwoSidedMatch bool, n return model.Fingerprint(0), false } + +func (s *Source) hasEqual(lset model.LabelSet, excludeTwoSidedMatch bool, now time.Time, targetMatchers labels.Matchers) (model.Fingerprint, bool) { + equal, found := s.findEqualSourceAlert(lset, now) + if found { + if excludeTwoSidedMatch && targetMatchers.Matches(equal.Labels) { + return model.Fingerprint(0), false + } + return equal.Fingerprint(), found + } + + return model.Fingerprint(0), false +} + +func (r *InhibitRule) allSourcesSatisfied() bool { + for _, source := range r.Sources { + if !source.foundMatch { + return false + } + } + // Reset for next use. + for _, source := range r.Sources { + source.foundMatch = false + } + return true +} diff --git a/inhibit/inhibit_test.go b/inhibit/inhibit_test.go index 7207785fd7..4199a0444d 100644 --- a/inhibit/inhibit_test.go +++ b/inhibit/inhibit_test.go @@ -250,20 +250,12 @@ func TestInhibitRuleMatchers(t *testing.T) { t.Parallel() rule1 := config.InhibitRule{ - Sources: []config.Source{ - { - SrcMatchers: config.Matchers{&labels.Matcher{Type: labels.MatchEqual, Name: "s1", Value: "1"}}, - }, - }, + SourceMatchers: config.Matchers{&labels.Matcher{Type: labels.MatchEqual, Name: "s1", Value: "1"}}, TargetMatchers: config.Matchers{&labels.Matcher{Type: labels.MatchNotEqual, Name: "t1", Value: "1"}}, Equal: []string{"e"}, } rule2 := config.InhibitRule{ - Sources: []config.Source{ - { - SrcMatchers: config.Matchers{&labels.Matcher{Type: labels.MatchEqual, Name: "s2", Value: "1"}}, - }, - }, + SourceMatchers: config.Matchers{&labels.Matcher{Type: labels.MatchEqual, Name: "s2", Value: "1"}}, TargetMatchers: config.Matchers{&labels.Matcher{Type: labels.MatchEqual, Name: "t2", Value: "1"}}, Equal: []string{"e"}, } @@ -569,3 +561,179 @@ func TestInhibit(t *testing.T) { } } } + +func TestInhibitByMultipleSources(t *testing.T) { + t.Parallel() + + now := time.Now() + inhibitRules := func() []config.InhibitRule { + return []config.InhibitRule{ + { + Sources: []config.Source{ + { + SrcMatchers: config.Matchers{ + &labels.Matcher{Type: labels.MatchEqual, Name: "s1", Value: "1"}, + &labels.Matcher{Type: labels.MatchEqual, Name: "s11", Value: "1"}, + }, + Equal: []string{"e"}, + }, + { + SrcMatchers: config.Matchers{ + &labels.Matcher{Type: labels.MatchEqual, Name: "s2", Value: "1"}, + &labels.Matcher{Type: labels.MatchEqual, Name: "s22", Value: "1"}, + }, + Equal: []string{"f"}, + }, + }, + TargetMatchers: config.Matchers{&labels.Matcher{Type: labels.MatchEqual, Name: "t", Value: "1"}}, + }, + } + } + // alertOne is muted by alertTwo and alertThree when it is active. + alertOne := func() *types.Alert { + return &types.Alert{ + Alert: model.Alert{ + Labels: model.LabelSet{"t": "1", "e": "1", "f": "1"}, + StartsAt: now.Add(-time.Minute), + EndsAt: now.Add(time.Hour), + }, + } + } + alertTwo := func(resolved bool) *types.Alert { + var end time.Time + if resolved { + end = now.Add(-time.Second) + } else { + end = now.Add(time.Hour) + } + return &types.Alert{ + Alert: model.Alert{ + Labels: model.LabelSet{"s1": "1", "s11": "1", "e": "1"}, + StartsAt: now.Add(-time.Minute), + EndsAt: end, + }, + } + } + + alertThree := func(resolved bool) *types.Alert { + var end time.Time + if resolved { + end = now.Add(-time.Second) + } else { + end = now.Add(time.Hour) + } + return &types.Alert{ + Alert: model.Alert{ + Labels: model.LabelSet{"s2": "1", "s22": "1", "f": "1"}, + StartsAt: now.Add(-time.Minute), + EndsAt: end, + }, + } + } + + type exp struct { + lbls model.LabelSet + muted bool + } + for i, tc := range []struct { + alerts []*types.Alert + expected []exp + }{ + { + // alertOne shouldn't be muted since alertTwo and alertThree hasn't fired. + alerts: []*types.Alert{alertOne()}, + expected: []exp{ + { + lbls: model.LabelSet{"t": "1", "e": "f"}, + muted: false, + }, + }, + }, + { + // alertOne shouldnt be muted by alertTwo which is active since alertThree is not active. + alerts: []*types.Alert{alertOne(), alertTwo(false), alertThree(true)}, + expected: []exp{ + { + lbls: model.LabelSet{"t": "1", "e": "f"}, + muted: false, + }, + { + lbls: model.LabelSet{"s1": "1", "e": "f"}, + muted: false, + }, + { + lbls: model.LabelSet{"s2": "1", "e": "f"}, + muted: false, + }, + }, + }, + + { + // alertOne shouldn't be muted by alertTwo which is active since alertThree is not active. + alerts: []*types.Alert{alertOne(), alertTwo(true), alertThree(false)}, + expected: []exp{ + { + lbls: model.LabelSet{"t": "1", "e": "1", "f": "1"}, + muted: false, + }, + { + lbls: model.LabelSet{"s1": "1", "e": "1", "f": "1"}, + muted: false, + }, + { + lbls: model.LabelSet{"s2": "1", "e": "1", "f": "1"}, + muted: false, + }, + }, + }, + + { + // alertOne should be muted since alertTwo and alertThree are active. + alerts: []*types.Alert{alertOne(), alertTwo(false), alertThree(false)}, + expected: []exp{ + { + lbls: model.LabelSet{"t": "1", "f": "5"}, + muted: false, + }, + { + lbls: model.LabelSet{"t": "1", "e": "1", "f": "1"}, + muted: true, + }, + { + lbls: model.LabelSet{"t": "1", "e": "2", "f": "1"}, + muted: false, + }, + { + lbls: model.LabelSet{"t": "1", "e": "1", "f": "4"}, + muted: false, + }, + }, + }, + } { + ap := newFakeAlerts(tc.alerts) + mk := types.NewMarker(prometheus.NewRegistry()) + inhibitor := NewInhibitor(ap, inhibitRules(), mk, nopLogger, NewInhibitorMetrics(prometheus.NewRegistry())) + + go func() { + for ap.finished != nil { + select { + case <-ap.finished: + ap.finished = nil + default: + } + } + inhibitor.Stop() + }() + inhibitor.Run() + + for _, expected := range tc.expected { + if inhibitor.Mutes(expected.lbls) != expected.muted { + mute := "unmuted" + if expected.muted { + mute = "muted" + } + t.Errorf("tc: %d, expected alert with labels %q to be %s", i, expected.lbls, mute) + } + } + } +} diff --git a/inhibit/metric_test.go b/inhibit/metric_test.go index 48007b29de..2868f1af99 100644 --- a/inhibit/metric_test.go +++ b/inhibit/metric_test.go @@ -75,14 +75,13 @@ func TestInhibitorMetrics_RuleMatchesDuration(t *testing.T) { rules := []config.InhibitRule{ { Name: "test-rule", - Sources: []config.Source{ - { - SrcMatchers: config.Matchers{&labels.Matcher{Type: labels.MatchEqual, Name: "severity", Value: "critical"}}, - }, + SourceMatchers: []*labels.Matcher{ + {Type: labels.MatchEqual, Name: "severity", Value: "critical"}, }, TargetMatchers: []*labels.Matcher{ {Type: labels.MatchEqual, Name: "severity", Value: "warning"}, }, + Equal: []string{"instance"}, }, } @@ -121,14 +120,13 @@ func TestInhibitorMetrics_RuleMutesDuration_Muted(t *testing.T) { rules := []config.InhibitRule{ { Name: "test-rule", - Sources: []config.Source{ - { - SrcMatchers: config.Matchers{&labels.Matcher{Type: labels.MatchEqual, Name: "severity", Value: "critical"}}, - }, + SourceMatchers: []*labels.Matcher{ + {Type: labels.MatchEqual, Name: "severity", Value: "critical"}, }, TargetMatchers: []*labels.Matcher{ {Type: labels.MatchEqual, Name: "severity", Value: "warning"}, }, + Equal: []string{"instance"}, }, } @@ -276,10 +274,8 @@ func TestInhibitorMetrics_MultipleRules(t *testing.T) { rules := []config.InhibitRule{ { Name: "rule-1", - Sources: []config.Source{ - { - SrcMatchers: config.Matchers{&labels.Matcher{Type: labels.MatchEqual, Name: "severity", Value: "critical"}}, - }, + SourceMatchers: []*labels.Matcher{ + {Type: labels.MatchEqual, Name: "severity", Value: "critical"}, }, TargetMatchers: []*labels.Matcher{ {Type: labels.MatchEqual, Name: "severity", Value: "warning"}, @@ -288,10 +284,8 @@ func TestInhibitorMetrics_MultipleRules(t *testing.T) { }, { Name: "rule-2", - Sources: []config.Source{ - { - SrcMatchers: config.Matchers{&labels.Matcher{Type: labels.MatchEqual, Name: "team", Value: "sre"}}, - }, + SourceMatchers: []*labels.Matcher{ + {Type: labels.MatchEqual, Name: "team", Value: "sre"}, }, TargetMatchers: []*labels.Matcher{ {Type: labels.MatchEqual, Name: "team", Value: "dev"}, @@ -380,10 +374,8 @@ func TestInhibitorMetrics_CacheAndIndexItems(t *testing.T) { rules := []config.InhibitRule{ { Name: "named-rule", - Sources: []config.Source{ - { - SrcMatchers: config.Matchers{&labels.Matcher{Type: labels.MatchEqual, Name: "severity", Value: "critical"}}, - }, + SourceMatchers: []*labels.Matcher{ + {Type: labels.MatchEqual, Name: "severity", Value: "critical"}, }, TargetMatchers: []*labels.Matcher{ {Type: labels.MatchEqual, Name: "severity", Value: "warning"}, @@ -391,10 +383,8 @@ func TestInhibitorMetrics_CacheAndIndexItems(t *testing.T) { Equal: []string{"instance"}, }, { - Sources: []config.Source{ - { - SrcMatchers: config.Matchers{&labels.Matcher{Type: labels.MatchEqual, Name: "severity", Value: "critical"}}, - }, + SourceMatchers: []*labels.Matcher{ + {Type: labels.MatchEqual, Name: "severity", Value: "critical"}, }, TargetMatchers: []*labels.Matcher{ {Type: labels.MatchEqual, Name: "severity", Value: "warning"}, From 8256a2b546b2f15ebd466a30ef4ee1f9e5c9598b Mon Sep 17 00:00:00 2001 From: Coleen Iona Quadros Date: Tue, 11 Nov 2025 12:43:42 +0100 Subject: [PATCH 03/14] Add Benchmark for multiple sources Signed-off-by: Coleen Iona Quadros --- inhibit/inhibit_bench_test.go | 50 +++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/inhibit/inhibit_bench_test.go b/inhibit/inhibit_bench_test.go index 95832bc6e1..1ce624300d 100644 --- a/inhibit/inhibit_bench_test.go +++ b/inhibit/inhibit_bench_test.go @@ -76,6 +76,15 @@ func BenchmarkMutes(b *testing.B) { b.Run("10000 inhibition rules, last rule matches", func(b *testing.B) { benchmarkMutes(b, lastRuleMatchesBenchmark(b, 10000)) }) + b.Run("10 inhibition rules, 5 sources, 100 inhibiting alerts", func(b *testing.B) { + benchmarkMutes(b, manySourcesBenchMark(b, 5, 10, 100)) + }) + b.Run("100 inhibition rules, 10 sources, 1000 inhibiting alerts", func(b *testing.B) { + benchmarkMutes(b, manySourcesBenchMark(b, 10, 100, 1000)) + }) + b.Run("1000 inhibition rules, 20 sources, 100 inhibiting alerts", func(b *testing.B) { + benchmarkMutes(b, manySourcesBenchMark(b, 20, 1000, 1000)) + }) } // benchmarkOptions allows the declaration of a wide range of benchmarks. @@ -189,6 +198,47 @@ func lastRuleMatchesBenchmark(b *testing.B, n int) benchmarkOptions { } } +func manySourcesBenchMark(b *testing.B, numSources, numInhibitionRules, numInhibitingAlerts int) benchmarkOptions { + return benchmarkOptions{ + n: numInhibitionRules, + newRuleFunc: func(idx int) config.InhibitRule { + sources := []config.Source{} + for i := 0; i < numSources; i++ { + sources = append(sources, config.Source{ + SrcMatchers: config.Matchers{ + mustNewMatcher(b, labels.MatchEqual, "src", strconv.Itoa(i)), + }, + }) + } + return config.InhibitRule{ + Sources: sources, + TargetMatchers: config.Matchers{ + mustNewMatcher(b, labels.MatchEqual, "dst", "0"), + }, + } + }, + newAlertsFunc: func(idx int, _ config.InhibitRule) []types.Alert { + var alerts []types.Alert + for i := 0; i < numInhibitingAlerts; i++ { + alerts = append(alerts, types.Alert{ + Alert: model.Alert{ + Labels: model.LabelSet{ + "src": model.LabelValue(strconv.Itoa(idx)), + "idx": model.LabelValue(strconv.Itoa(i)), + }, + }, + }) + } + return alerts + }, benchFunc: func(mutesFunc func(set model.LabelSet) bool) error { + if ok := mutesFunc(model.LabelSet{"dst": "0"}); !ok { + return errors.New("expected dst=0 to be muted") + } + return nil + }, + } +} + func benchmarkMutes(b *testing.B, opts benchmarkOptions) { r := prometheus.NewRegistry() m := types.NewMarker(r) From 93a69bdf81eba5472bbf30f4f45e2e2be3714cb3 Mon Sep 17 00:00:00 2001 From: Coleen Iona Quadros Date: Tue, 11 Nov 2025 12:55:34 +0100 Subject: [PATCH 04/14] update Signed-off-by: Coleen Iona Quadros --- inhibit/inhibit.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/inhibit/inhibit.go b/inhibit/inhibit.go index 41a943874b..9327594f20 100644 --- a/inhibit/inhibit.go +++ b/inhibit/inhibit.go @@ -241,6 +241,8 @@ func (ih *Inhibitor) Mutes(ctx context.Context, lset model.LabelSet) bool { if inhibitedByFP, eq := source.hasEqual(lset, source.SrcMatchers.Matches(lset), ruleStart, r.TargetMatchers); eq && !source.foundMatch { inhibitorIDs = append(inhibitorIDs, inhibitedByFP.String()) source.foundMatch = true + } else { + break } } if allSourcesMatched := r.allSourcesSatisfied(); allSourcesMatched { From b03b1a73c5d50547ef19e4d0ec3afd5c7e007703 Mon Sep 17 00:00:00 2001 From: Coleen Iona Quadros Date: Tue, 11 Nov 2025 15:39:24 +0100 Subject: [PATCH 05/14] refactor test Signed-off-by: Coleen Iona Quadros --- config/config.go | 5 +++-- inhibit/inhibit_bench_test.go | 26 ++++++++++++++------------ 2 files changed, 17 insertions(+), 14 deletions(-) diff --git a/config/config.go b/config/config.go index bcc66ff185..57c12804f4 100644 --- a/config/config.go +++ b/config/config.go @@ -1021,8 +1021,9 @@ type InhibitRule struct { SourceMatchRE MatchRegexps `yaml:"source_match_re,omitempty" json:"source_match_re,omitempty"` // SourceMatchers defines a set of label matchers that have to be fulfilled for source alerts. SourceMatchers Matchers `yaml:"source_matchers,omitempty" json:"source_matchers,omitempty"` - // Sources defines a set of source matchers and equal labels. - Sources []Source `yaml:"source,omitempty" json:"source,omitempty"` + // Sources defines a set of source matchers and equal labels for source alerts. + // All Source entries have to match for the inhibition to take effect. + Sources []Source `yaml:"sources,omitempty" json:"sources,omitempty"` // TargetMatch defines a set of labels that have to equal the given // value for target alerts. Deprecated. Remove before v1.0 release. TargetMatch map[string]string `yaml:"target_match,omitempty" json:"target_match,omitempty"` diff --git a/inhibit/inhibit_bench_test.go b/inhibit/inhibit_bench_test.go index 1ce624300d..cde4829646 100644 --- a/inhibit/inhibit_bench_test.go +++ b/inhibit/inhibit_bench_test.go @@ -77,13 +77,13 @@ func BenchmarkMutes(b *testing.B) { benchmarkMutes(b, lastRuleMatchesBenchmark(b, 10000)) }) b.Run("10 inhibition rules, 5 sources, 100 inhibiting alerts", func(b *testing.B) { - benchmarkMutes(b, manySourcesBenchMark(b, 5, 10, 100)) + benchmarkMutes(b, multipleSourcesBenchMark(b, 5, 10, 100)) }) b.Run("100 inhibition rules, 10 sources, 1000 inhibiting alerts", func(b *testing.B) { - benchmarkMutes(b, manySourcesBenchMark(b, 10, 100, 1000)) + benchmarkMutes(b, multipleSourcesBenchMark(b, 10, 100, 1000)) }) b.Run("1000 inhibition rules, 20 sources, 100 inhibiting alerts", func(b *testing.B) { - benchmarkMutes(b, manySourcesBenchMark(b, 20, 1000, 1000)) + benchmarkMutes(b, multipleSourcesBenchMark(b, 20, 1000, 1000)) }) } @@ -198,7 +198,7 @@ func lastRuleMatchesBenchmark(b *testing.B, n int) benchmarkOptions { } } -func manySourcesBenchMark(b *testing.B, numSources, numInhibitionRules, numInhibitingAlerts int) benchmarkOptions { +func multipleSourcesBenchMark(b *testing.B, numSources, numInhibitionRules, numInhibitingAlerts int) benchmarkOptions { return benchmarkOptions{ n: numInhibitionRules, newRuleFunc: func(idx int) config.InhibitRule { @@ -219,15 +219,17 @@ func manySourcesBenchMark(b *testing.B, numSources, numInhibitionRules, numInhib }, newAlertsFunc: func(idx int, _ config.InhibitRule) []types.Alert { var alerts []types.Alert - for i := 0; i < numInhibitingAlerts; i++ { - alerts = append(alerts, types.Alert{ - Alert: model.Alert{ - Labels: model.LabelSet{ - "src": model.LabelValue(strconv.Itoa(idx)), - "idx": model.LabelValue(strconv.Itoa(i)), + for src := 0; src < numSources; src++ { + for i := 0; i < numInhibitingAlerts; i++ { + alerts = append(alerts, types.Alert{ + Alert: model.Alert{ + Labels: model.LabelSet{ + "src": model.LabelValue(strconv.Itoa(src)), + "idx": model.LabelValue(strconv.Itoa(i)), + }, }, - }, - }) + }) + } } return alerts }, benchFunc: func(mutesFunc func(set model.LabelSet) bool) error { From 7f4e2f20e0fa283aa705489d98f46d0d2573f371 Mon Sep 17 00:00:00 2001 From: Coleen Iona Quadros Date: Wed, 12 Nov 2025 10:00:43 +0100 Subject: [PATCH 06/14] refactor Signed-off-by: Coleen Iona Quadros --- inhibit/inhibit.go | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/inhibit/inhibit.go b/inhibit/inhibit.go index 9327594f20..93248557d0 100644 --- a/inhibit/inhibit.go +++ b/inhibit/inhibit.go @@ -238,9 +238,11 @@ func (ih *Inhibitor) Mutes(ctx context.Context, lset model.LabelSet) bool { if len(r.Sources) > 0 { var inhibitorIDs []string for _, source := range r.Sources { - if inhibitedByFP, eq := source.hasEqual(lset, source.SrcMatchers.Matches(lset), ruleStart, r.TargetMatchers); eq && !source.foundMatch { - inhibitorIDs = append(inhibitorIDs, inhibitedByFP.String()) - source.foundMatch = true + if !source.foundMatch { + if inhibitedByFP, eq := source.hasEqual(lset, source.SrcMatchers.Matches(lset), ruleStart, r.TargetMatchers); eq { + inhibitorIDs = append(inhibitorIDs, inhibitedByFP.String()) + source.foundMatch = true + } } else { break } From 63aa15ebea713c11b0d423249822cd0d06c73634 Mon Sep 17 00:00:00 2001 From: Coleen Iona Quadros Date: Fri, 28 Nov 2025 11:30:53 +0100 Subject: [PATCH 07/14] rebase Signed-off-by: Coleen Iona Quadros --- inhibit/inhibit.go | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/inhibit/inhibit.go b/inhibit/inhibit.go index 93248557d0..6f031d23d5 100644 --- a/inhibit/inhibit.go +++ b/inhibit/inhibit.go @@ -317,7 +317,7 @@ type InhibitRule struct { // the target alerts). SourceMatchers labels.Matchers - Sources []*Source + Sources []*Source // The set of Filters which define the group of target alerts (which are // inhibited by the source alerts). TargetMatchers labels.Matchers @@ -338,7 +338,6 @@ type InhibitRule struct { // NewInhibitRule returns a new InhibitRule based on a configuration definition. func NewInhibitRule(cr config.InhibitRule) *InhibitRule { var ( - sources []*Source sourcem labels.Matchers targetm labels.Matchers @@ -367,19 +366,19 @@ func NewInhibitRule(cr config.InhibitRule) *InhibitRule { panic(err) } sourcem = append(sourcem, matcher) - } - // cr.SourceMatchRE will be deprecated. This for loop appends regex matchers. - for ln, lv := range cr.SourceMatchRE { - matcher, err := labels.NewMatcher(labels.MatchRegexp, ln, lv.String()) - if err != nil { - // This error must not happen because the config already validates the yaml. - panic(err) } - sourcem = append(sourcem, matcher) + // cr.SourceMatchRE will be deprecated. This for loop appends regex matchers. + for ln, lv := range cr.SourceMatchRE { + matcher, err := labels.NewMatcher(labels.MatchRegexp, ln, lv.String()) + if err != nil { + // This error must not happen because the config already validates the yaml. + panic(err) + } + sourcem = append(sourcem, matcher) + } + // We append the new-style matchers. This can be simplified once the deprecated matcher syntax is removed. + sourcem = append(sourcem, cr.SourceMatchers...) } - // We append the new-style matchers. This can be simplified once the deprecated matcher syntax is removed. - sourcem = append(sourcem, cr.SourceMatchers...) - // cr.TargetMatch will be deprecated. This for loop appends regex matchers. for ln, lv := range cr.TargetMatch { matcher, err := labels.NewMatcher(labels.MatchEqual, ln, lv) From d6c3ef8d22b99a91bf213d53a151247243ef3ba9 Mon Sep 17 00:00:00 2001 From: Coleen Iona Quadros Date: Fri, 28 Nov 2025 14:18:15 +0100 Subject: [PATCH 08/14] rebase fix Signed-off-by: Coleen Iona Quadros --- inhibit/inhibit.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/inhibit/inhibit.go b/inhibit/inhibit.go index 6f031d23d5..56601dbf5d 100644 --- a/inhibit/inhibit.go +++ b/inhibit/inhibit.go @@ -316,7 +316,8 @@ type InhibitRule struct { // The set of Filters which define the group of source alerts (which inhibit // the target alerts). SourceMatchers labels.Matchers - + // The set of Sources which define multiple groups of source alerts (which inhibit + // the target alerts). Sources []*Source // The set of Filters which define the group of target alerts (which are // inhibited by the source alerts). @@ -408,6 +409,7 @@ func NewInhibitRule(cr config.InhibitRule) *InhibitRule { rule := &InhibitRule{ Name: cr.Name, SourceMatchers: sourcem, + Sources: sources, TargetMatchers: targetm, Equal: equal, scache: store.NewAlerts(), @@ -550,7 +552,6 @@ func (r *InhibitRule) gcCallback(alerts []types.Alert) { if src.SrcMatchers.Matches(a.Labels) { fp := src.fingerprintEquals(a.Labels) src.sindex.Delete(fp) - break } } From 99c4847b76f746f68dd3e7d1a9ab554bc1bef142 Mon Sep 17 00:00:00 2001 From: Coleen Iona Quadros Date: Fri, 28 Nov 2025 17:18:59 +0100 Subject: [PATCH 09/14] refactor Signed-off-by: Coleen Iona Quadros --- inhibit/inhibit.go | 28 +++++----------------------- inhibit/inhibit_test.go | 2 +- 2 files changed, 6 insertions(+), 24 deletions(-) diff --git a/inhibit/inhibit.go b/inhibit/inhibit.go index 56601dbf5d..7177f50613 100644 --- a/inhibit/inhibit.go +++ b/inhibit/inhibit.go @@ -237,17 +237,16 @@ func (ih *Inhibitor) Mutes(ctx context.Context, lset model.LabelSet) bool { if len(r.Sources) > 0 { var inhibitorIDs []string + sourceHasNoEqual := false for _, source := range r.Sources { - if !source.foundMatch { - if inhibitedByFP, eq := source.hasEqual(lset, source.SrcMatchers.Matches(lset), ruleStart, r.TargetMatchers); eq { - inhibitorIDs = append(inhibitorIDs, inhibitedByFP.String()) - source.foundMatch = true - } + if inhibitedByFP, eq := source.hasEqual(lset, source.SrcMatchers.Matches(lset), ruleStart, r.TargetMatchers); eq { + inhibitorIDs = append(inhibitorIDs, inhibitedByFP.String()) } else { + sourceHasNoEqual = true break } } - if allSourcesMatched := r.allSourcesSatisfied(); allSourcesMatched { + if !sourceHasNoEqual { compositeInhibitorID := strings.Join(inhibitorIDs, ",") ih.marker.SetInhibited(fp, compositeInhibitorID) ih.marker.SetInhibited(fp, inhibitedByFP.String()) @@ -263,10 +262,6 @@ func (ih *Inhibitor) Mutes(ctx context.Context, lset model.LabelSet) bool { r.metrics.mutesDurationMuted.Observe(sinceRuleStart.Seconds()) return true } - // Reset for next use. - for _, source := range r.Sources { - source.foundMatch = false - } } else { if inhibitedByFP, eq := r.hasEqual(lset, r.SourceMatchers.Matches(lset), ruleStart); eq { @@ -589,16 +584,3 @@ func (s *Source) hasEqual(lset model.LabelSet, excludeTwoSidedMatch bool, now ti return model.Fingerprint(0), false } - -func (r *InhibitRule) allSourcesSatisfied() bool { - for _, source := range r.Sources { - if !source.foundMatch { - return false - } - } - // Reset for next use. - for _, source := range r.Sources { - source.foundMatch = false - } - return true -} diff --git a/inhibit/inhibit_test.go b/inhibit/inhibit_test.go index 4199a0444d..f744232bb5 100644 --- a/inhibit/inhibit_test.go +++ b/inhibit/inhibit_test.go @@ -696,7 +696,7 @@ func TestInhibitByMultipleSources(t *testing.T) { muted: false, }, { - lbls: model.LabelSet{"t": "1", "e": "1", "f": "1"}, + lbls: model.LabelSet{"t": "1", "f": "1", "e": "1"}, muted: true, }, { From 45af49724cdbf4c3717ce1be939ceb343f181dce Mon Sep 17 00:00:00 2001 From: Coleen Iona Quadros Date: Fri, 28 Nov 2025 17:21:37 +0100 Subject: [PATCH 10/14] remove foundmatch Signed-off-by: Coleen Iona Quadros --- inhibit/inhibit.go | 2 -- 1 file changed, 2 deletions(-) diff --git a/inhibit/inhibit.go b/inhibit/inhibit.go index 7177f50613..3eca8fcdf1 100644 --- a/inhibit/inhibit.go +++ b/inhibit/inhibit.go @@ -296,8 +296,6 @@ type Source struct { // The index items might overwrite eachother if multiple source alerts have exact equal labels. // Overwrites only happen if the new source alert has bigger EndsAt value. sindex *index - - foundMatch bool } // An InhibitRule specifies that a class of (source) alerts should inhibit From 70cb766d51a89d2eebea0d72452e40420c15e306 Mon Sep 17 00:00:00 2001 From: Coleen Iona Quadros Date: Tue, 2 Dec 2025 09:58:18 +0100 Subject: [PATCH 11/14] add Usage info Signed-off-by: Coleen Iona Quadros --- CHANGELOG.md | 1 + README.md | 17 +++++++++++++++++ 2 files changed, 18 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 44610e0547..ec720d226b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,7 @@ * [BUGFIX] Fix duplicate `other` in error messages for config. #4366 * [BUGFIX] Fix logic that considers an alert reopened in Jira. #4478 * [BUGFIX] Fix Jira issue count #4615 +* [FEATURE] Add Multiple Sources support for Inhibition Rules. #4504 ## 0.28.1 / 2025-03-07 diff --git a/README.md b/README.md index 2fdfdd9768..a6854aa24f 100644 --- a/README.md +++ b/README.md @@ -160,6 +160,23 @@ inhibit_rules: # the inhibition rule will apply! equal: ['alertname'] +# Multiple Sources can be defined when setting inhibitions. +# When all source matchers are matched, the inhibition is applied to +# the target alerts. + +inhibit_rules: + - source: + - matchers: + - alertname="instance_down" + - application="abc" + equal: ["cluster"] + - matchers: + - alertname="instance_down" + - application="xyz" + equal: ["severity"] + target_matchers: + - alertname="no_info" + - application="def" receivers: - name: 'team-X-mails' From 52729893ec90eeb70788c7ed1b3a46ea6946916c Mon Sep 17 00:00:00 2001 From: Coleen Iona Quadros Date: Mon, 8 Dec 2025 18:47:12 +0100 Subject: [PATCH 12/14] refactor to new InhibitRule struct with Multiple sources Signed-off-by: Coleen Iona Quadros --- README.md | 2 +- config/config.go | 4 +- inhibit/inhibit.go | 230 +++++++++------------------------- inhibit/inhibit_bench_test.go | 8 +- inhibit/inhibit_test.go | 60 +++++---- inhibit/metric_test.go | 16 +-- 6 files changed, 106 insertions(+), 214 deletions(-) diff --git a/README.md b/README.md index a6854aa24f..5b7dc0e3e4 100644 --- a/README.md +++ b/README.md @@ -165,7 +165,7 @@ inhibit_rules: # the target alerts. inhibit_rules: - - source: + - sources: - matchers: - alertname="instance_down" - application="abc" diff --git a/config/config.go b/config/config.go index 57c12804f4..bf3735e79a 100644 --- a/config/config.go +++ b/config/config.go @@ -1002,7 +1002,7 @@ func (r *Route) UnmarshalYAML(unmarshal func(any) error) error { return nil } -type Source struct { +type InhibitRuleSource struct { SrcMatchers Matchers `yaml:"matchers,omitempty" json:"matchers,omitempty"` Equal []string `yaml:"equal,omitempty" json:"equal,omitempty"` } @@ -1023,7 +1023,7 @@ type InhibitRule struct { SourceMatchers Matchers `yaml:"source_matchers,omitempty" json:"source_matchers,omitempty"` // Sources defines a set of source matchers and equal labels for source alerts. // All Source entries have to match for the inhibition to take effect. - Sources []Source `yaml:"sources,omitempty" json:"sources,omitempty"` + Sources []InhibitRuleSource `yaml:"sources,omitempty" json:"sources,omitempty"` // TargetMatch defines a set of labels that have to equal the given // value for target alerts. Deprecated. Remove before v1.0 release. TargetMatch map[string]string `yaml:"target_match,omitempty" json:"target_match,omitempty"` diff --git a/inhibit/inhibit.go b/inhibit/inhibit.go index 3eca8fcdf1..302a9f4120 100644 --- a/inhibit/inhibit.go +++ b/inhibit/inhibit.go @@ -122,39 +122,24 @@ func (ih *Inhibitor) processAlert(ctx context.Context, a *types.Alert) { cached := 0 indexed := 0 for _, r := range ih.rules { - if len(r.Sources) > 0 { - cached = 0 - indexed = 0 - for _, src := range r.Sources { - if src.SrcMatchers.Matches(a.Labels) { - attr := attribute.String("alerting.inhibit_rule.name", r.Name) - span.AddEvent("alert matched rule source", trace.WithAttributes(attr)) - if err := src.scache.Set(a); err != nil { - message := "error on set alert" - ih.logger.Error(message, "err", err) - span.SetStatus(codes.Error, message) - span.RecordError(err) - continue - } - src.updateIndex(a) - cached += src.scache.Len() - indexed += src.sindex.Len() - break - } - } - - } else { - if r.SourceMatchers.Matches(a.Labels) { - if err := r.scache.Set(a); err != nil { - ih.logger.Error("error on set alert", "err", err) + for _, src := range r.Sources { + if src.SrcMatchers.Matches(a.Labels) { + attr := attribute.String("alerting.inhibit_rule.name", r.Name) + span.AddEvent("alert matched rule source", trace.WithAttributes(attr)) + if err := src.scache.Set(a); err != nil { + message := "error on set alert" + ih.logger.Error(message, "err", err) + span.SetStatus(codes.Error, message) + span.RecordError(err) continue } - r.updateIndex(a) + src.updateIndex(a) + cached += src.scache.Len() + indexed += src.sindex.Len() + break } - cached = r.scache.Len() - indexed = r.sindex.Len() - } + if r.Name != "" { r.metrics.sourceAlertsCacheItems.With(prometheus.Labels{"rule": r.Name}).Set(float64(cached)) r.metrics.sourceAlertsIndexItems.With(prometheus.Labels{"rule": r.Name}).Set(float64(indexed)) @@ -183,7 +168,11 @@ func (ih *Inhibitor) Run() { runCtx, runCancel := context.WithCancel(ctx) for _, rule := range ih.rules { - go rule.scache.Run(runCtx, 15*time.Minute) + // Start GC goroutine for each rule's source cache. + for _, src := range rule.Sources { + go src.scache.Run(runCtx, 15*time.Minute) + } + //go rule.scache.Run(runCtx, 15*time.Minute) } g.Add(func() error { @@ -235,45 +224,23 @@ func (ih *Inhibitor) Mutes(ctx context.Context, lset model.LabelSet) bool { // If we are here, the target side matches. If the source side matches, too, we // need to exclude inhibiting alerts for which the same is true. - if len(r.Sources) > 0 { - var inhibitorIDs []string - sourceHasNoEqual := false - for _, source := range r.Sources { - if inhibitedByFP, eq := source.hasEqual(lset, source.SrcMatchers.Matches(lset), ruleStart, r.TargetMatchers); eq { - inhibitorIDs = append(inhibitorIDs, inhibitedByFP.String()) - } else { - sourceHasNoEqual = true - break - } - } - if !sourceHasNoEqual { - compositeInhibitorID := strings.Join(inhibitorIDs, ",") - ih.marker.SetInhibited(fp, compositeInhibitorID) - ih.marker.SetInhibited(fp, inhibitedByFP.String()) - span.AddEvent("alert inhibited", - trace.WithAttributes( - attribute.String("alerting.inhibit_rule.source.fingerprint", compositeInhibitorID.String()), - ), - ) - now := time.Now() - sinceStart := now.Sub(start) - sinceRuleStart := now.Sub(ruleStart) - ih.metrics.mutesDurationMuted.Observe(sinceStart.Seconds()) - r.metrics.mutesDurationMuted.Observe(sinceRuleStart.Seconds()) - return true - } - - } else { - if inhibitedByFP, eq := r.hasEqual(lset, r.SourceMatchers.Matches(lset), ruleStart); eq { - ih.marker.SetInhibited(fp, inhibitedByFP.String()) - now := time.Now() - sinceStart := now.Sub(start) - sinceRuleStart := now.Sub(ruleStart) - ih.metrics.mutesDurationMuted.Observe(sinceStart.Seconds()) - r.metrics.mutesDurationMuted.Observe(sinceRuleStart.Seconds()) - return true + // If we are here, the target side matches. If the source side matches, too, we + // need to exclude inhibiting alerts for which the same is true. + var inhibitorIDs []string + sourceHasNoEqual := false + for _, source := range r.Sources { + if inhibitedByFP, eq := source.hasEqual(lset, source.SrcMatchers.Matches(lset), ruleStart, r.TargetMatchers); eq { + inhibitorIDs = append(inhibitorIDs, inhibitedByFP.String()) + } else { + sourceHasNoEqual = true + break } } + if !sourceHasNoEqual { + compositeInhibitorID := strings.Join(inhibitorIDs, ",") + ih.marker.SetInhibited(fp, compositeInhibitorID) + return true + } } ih.marker.SetInhibited(fp) span.AddEvent("alert not inhibited") @@ -306,9 +273,6 @@ type Source struct { type InhibitRule struct { // Name is an optional name for the inhibition rule. Name string - // The set of Filters which define the group of source alerts (which inhibit - // the target alerts). - SourceMatchers labels.Matchers // The set of Sources which define multiple groups of source alerts (which inhibit // the target alerts). Sources []*Source @@ -318,15 +282,6 @@ type InhibitRule struct { // A set of label names whose label values need to be identical in source and // target alerts in order for the inhibition to take effect. Equal map[model.LabelName]struct{} - - // Cache of alerts matching source labels. - scache *store.Alerts - - // Index of fingerprints of source alert equal labels to fingerprint of source alert. - // The index helps speed up source alert lookups from scache significantely in scenarios with 100s of source alerts cached. - // The index items might overwrite eachother if multiple source alerts have exact equal labels. - // Overwrites only happen if the new source alert has bigger EndsAt value. - sindex *index } // NewInhibitRule returns a new InhibitRule based on a configuration definition. @@ -345,12 +300,13 @@ func NewInhibitRule(cr config.InhibitRule) *InhibitRule { for _, ln := range sm.Equal { equal[model.LabelName(ln)] = struct{}{} } - sources = append(sources, &Source{ + src := &Source{ SrcMatchers: sourcesm, Equal: equal, scache: store.NewAlerts(), sindex: newIndex(), - }) + } + sources = append(sources, src) } } else { for ln, lv := range cr.SourceMatch { @@ -372,6 +328,18 @@ func NewInhibitRule(cr config.InhibitRule) *InhibitRule { } // We append the new-style matchers. This can be simplified once the deprecated matcher syntax is removed. sourcem = append(sourcem, cr.SourceMatchers...) + + equal := map[model.LabelName]struct{}{} + for _, ln := range cr.Equal { + equal[model.LabelName(ln)] = struct{}{} + } + + sources = append(sources, &Source{ + SrcMatchers: sourcem, + Equal: equal, + scache: store.NewAlerts(), + sindex: newIndex(), + }) } // cr.TargetMatch will be deprecated. This for loop appends regex matchers. for ln, lv := range cr.TargetMatch { @@ -394,22 +362,15 @@ func NewInhibitRule(cr config.InhibitRule) *InhibitRule { // We append the new-style matchers. This can be simplified once the deprecated matcher syntax is removed. targetm = append(targetm, cr.TargetMatchers...) - equal := map[model.LabelName]struct{}{} - for _, ln := range cr.Equal { - equal[model.LabelName(ln)] = struct{}{} - } - rule := &InhibitRule{ Name: cr.Name, - SourceMatchers: sourcem, Sources: sources, TargetMatchers: targetm, - Equal: equal, - scache: store.NewAlerts(), - sindex: newIndex(), } - - rule.scache.SetGCCallback(rule.gcCallback) + // set GC callback for source caches + for _, src := range rule.Sources { + src.scache.SetGCCallback(rule.gcCallback) + } return rule } @@ -432,40 +393,6 @@ func (s *Source) fingerprintEquals(lset model.LabelSet) model.Fingerprint { return equalSet.Fingerprint() } -// updateIndex updates the source alert index if necessary. -func (r *InhibitRule) updateIndex(alert *types.Alert) { - fp := alert.Fingerprint() - // Calculate source labelset subset which is in equals. - eq := r.fingerprintEquals(alert.Labels) - - // Check if the equal labelset is already in the index. - indexed, ok := r.sindex.Get(eq) - if !ok { - // If not, add it. - r.sindex.Set(eq, fp) - return - } - // If the indexed fingerprint is the same as the new fingerprint, do nothing. - if indexed == fp { - return - } - - // New alert and existing index are not the same, compare them. - existing, err := r.scache.Get(indexed) - if err != nil { - // failed to get the existing alert, overwrite the index. - r.sindex.Set(eq, fp) - return - } - - // If the new alert resolves after the existing alert, replace the index. - if existing.ResolvedAt(alert.EndsAt) { - r.sindex.Set(eq, fp) - return - } - // If the existing alert resolves after the new alert, do nothing. -} - func (src *Source) updateIndex(alert *types.Alert) { fp := alert.Fingerprint() // Calculate source labelset subset which is in equals. @@ -499,26 +426,6 @@ func (src *Source) updateIndex(alert *types.Alert) { // If the existing alert resolves after the new alert, do nothing. } -// findEqualSourceAlert returns the source alert that matches the equal labels of the given label set. -func (r *InhibitRule) findEqualSourceAlert(lset model.LabelSet, now time.Time) (*types.Alert, bool) { - equalsFP := r.fingerprintEquals(lset) - sourceFP, ok := r.sindex.Get(equalsFP) - if ok { - alert, err := r.scache.Get(sourceFP) - if err != nil { - return nil, false - } - - if alert.ResolvedAt(now) { - return nil, false - } - - return alert, true - } - - return nil, false -} - func (s *Source) findEqualSourceAlert(lset model.LabelSet, now time.Time) (*types.Alert, bool) { equalsFP := s.fingerprintEquals(lset) sourceFP, ok := s.sindex.Get(equalsFP) @@ -540,35 +447,14 @@ func (s *Source) findEqualSourceAlert(lset model.LabelSet, now time.Time) (*type func (r *InhibitRule) gcCallback(alerts []types.Alert) { for _, a := range alerts { - if len(r.Sources) > 0 { - for _, src := range r.Sources { - if src.SrcMatchers.Matches(a.Labels) { - fp := src.fingerprintEquals(a.Labels) - src.sindex.Delete(fp) - break - } + for _, src := range r.Sources { + if src.SrcMatchers.Matches(a.Labels) { + fp := src.fingerprintEquals(a.Labels) + src.sindex.Delete(fp) + break } - } else { - fp := r.fingerprintEquals(a.Labels) - r.sindex.Delete(fp) - } - } -} - -// hasEqual checks whether the source cache contains alerts matching the equal -// labels for the given label set. If so, the fingerprint of one of those alerts -// is returned. If excludeTwoSidedMatch is true, alerts that match both the -// source and the target side of the rule are disregarded. -func (r *InhibitRule) hasEqual(lset model.LabelSet, excludeTwoSidedMatch bool, now time.Time) (model.Fingerprint, bool) { - equal, found := r.findEqualSourceAlert(lset, now) - if found { - if excludeTwoSidedMatch && r.TargetMatchers.Matches(equal.Labels) { - return model.Fingerprint(0), false } - return equal.Fingerprint(), found } - - return model.Fingerprint(0), false } func (s *Source) hasEqual(lset model.LabelSet, excludeTwoSidedMatch bool, now time.Time, targetMatchers labels.Matchers) (model.Fingerprint, bool) { diff --git a/inhibit/inhibit_bench_test.go b/inhibit/inhibit_bench_test.go index cde4829646..5d6a30c150 100644 --- a/inhibit/inhibit_bench_test.go +++ b/inhibit/inhibit_bench_test.go @@ -118,7 +118,7 @@ func allRulesMatchBenchmark(b *testing.B, numInhibitionRules, numInhibitingAlert n: numInhibitionRules, newRuleFunc: func(idx int) config.InhibitRule { return config.InhibitRule{ - Sources: []config.Source{ + Sources: []config.InhibitRuleSource{ { SrcMatchers: config.Matchers{ mustNewMatcher(b, labels.MatchEqual, "src", strconv.Itoa(idx)), @@ -165,7 +165,7 @@ func lastRuleMatchesBenchmark(b *testing.B, n int) benchmarkOptions { n: n, newRuleFunc: func(idx int) config.InhibitRule { return config.InhibitRule{ - Sources: []config.Source{ + Sources: []config.InhibitRuleSource{ { SrcMatchers: config.Matchers{ mustNewMatcher(b, labels.MatchEqual, "src", strconv.Itoa(idx)), @@ -202,9 +202,9 @@ func multipleSourcesBenchMark(b *testing.B, numSources, numInhibitionRules, numI return benchmarkOptions{ n: numInhibitionRules, newRuleFunc: func(idx int) config.InhibitRule { - sources := []config.Source{} + sources := []config.InhibitRuleSource{} for i := 0; i < numSources; i++ { - sources = append(sources, config.Source{ + sources = append(sources, config.InhibitRuleSource{ SrcMatchers: config.Matchers{ mustNewMatcher(b, labels.MatchEqual, "src", strconv.Itoa(i)), }, diff --git a/inhibit/inhibit_test.go b/inhibit/inhibit_test.go index f744232bb5..864bbbffef 100644 --- a/inhibit/inhibit_test.go +++ b/inhibit/inhibit_test.go @@ -126,19 +126,23 @@ func TestInhibitRuleHasEqual(t *testing.T) { for _, c := range cases { t.Run(c.name, func(t *testing.T) { r := &InhibitRule{ - Equal: map[model.LabelName]struct{}{}, - scache: store.NewAlerts(), - sindex: newIndex(), + Sources: []*Source{ + { + scache: store.NewAlerts(), + sindex: newIndex(), + }, + }, + Equal: map[model.LabelName]struct{}{}, } for _, ln := range c.equal { r.Equal[ln] = struct{}{} } for _, v := range c.initial { - r.scache.Set(v) - r.updateIndex(v) + r.Sources[0].scache.Set(v) + r.Sources[0].updateIndex(v) } - if _, have := r.hasEqual(c.input, false, time.Now()); have != c.result { + if _, have := r.Sources[0].hasEqual(c.input, false, time.Now(), r.TargetMatchers); have != c.result { t.Errorf("Unexpected result %t, expected %t", have, c.result) } }) @@ -179,15 +183,15 @@ func TestInhibitRuleMatches(t *testing.T) { }, } - ih.rules[0].scache = store.NewAlerts() - ih.rules[0].scache.Set(sourceAlert1) - ih.rules[0].sindex = newIndex() - ih.rules[0].updateIndex(sourceAlert1) + ih.rules[0].Sources[0].scache = store.NewAlerts() + ih.rules[0].Sources[0].scache.Set(sourceAlert1) + ih.rules[0].Sources[0].sindex = newIndex() + ih.rules[0].Sources[0].updateIndex(sourceAlert1) - ih.rules[1].scache = store.NewAlerts() - ih.rules[1].scache.Set(sourceAlert2) - ih.rules[1].sindex = newIndex() - ih.rules[1].updateIndex(sourceAlert2) + ih.rules[1].Sources[0].scache = store.NewAlerts() + ih.rules[1].Sources[0].scache.Set(sourceAlert2) + ih.rules[1].Sources[0].sindex = newIndex() + ih.rules[1].Sources[0].updateIndex(sourceAlert2) cases := []struct { target model.LabelSet @@ -280,15 +284,15 @@ func TestInhibitRuleMatchers(t *testing.T) { }, } - ih.rules[0].scache = store.NewAlerts() - ih.rules[0].scache.Set(sourceAlert1) - ih.rules[0].sindex = newIndex() - ih.rules[0].updateIndex(sourceAlert1) + ih.rules[0].Sources[0].scache = store.NewAlerts() + ih.rules[0].Sources[0].scache.Set(sourceAlert1) + ih.rules[0].Sources[0].sindex = newIndex() + ih.rules[0].Sources[0].updateIndex(sourceAlert1) - ih.rules[1].scache = store.NewAlerts() - ih.rules[1].scache.Set(sourceAlert2) - ih.rules[1].sindex = newIndex() - ih.rules[1].updateIndex(sourceAlert2) + ih.rules[1].Sources[0].scache = store.NewAlerts() + ih.rules[1].Sources[0].scache.Set(sourceAlert2) + ih.rules[1].Sources[0].sindex = newIndex() + ih.rules[1].Sources[0].updateIndex(sourceAlert2) cases := []struct { target model.LabelSet @@ -352,7 +356,7 @@ func TestInhibitRuleName(t *testing.T) { config1 := config.InhibitRule{ Name: "test-rule", - Sources: []config.Source{ + Sources: []config.InhibitRuleSource{ { SrcMatchers: config.Matchers{&labels.Matcher{Type: labels.MatchEqual, Name: "severity", Value: "critical"}}, }, @@ -363,7 +367,7 @@ func TestInhibitRuleName(t *testing.T) { Equal: []string{"instance"}, } config2 := config.InhibitRule{ - Sources: []config.Source{ + Sources: []config.InhibitRuleSource{ { SrcMatchers: config.Matchers{&labels.Matcher{Type: labels.MatchEqual, Name: "severity", Value: "critical"}}, }, @@ -569,7 +573,7 @@ func TestInhibitByMultipleSources(t *testing.T) { inhibitRules := func() []config.InhibitRule { return []config.InhibitRule{ { - Sources: []config.Source{ + Sources: []config.InhibitRuleSource{ { SrcMatchers: config.Matchers{ &labels.Matcher{Type: labels.MatchEqual, Name: "s1", Value: "1"}, @@ -667,7 +671,6 @@ func TestInhibitByMultipleSources(t *testing.T) { }, }, }, - { // alertOne shouldn't be muted by alertTwo which is active since alertThree is not active. alerts: []*types.Alert{alertOne(), alertTwo(true), alertThree(false)}, @@ -686,7 +689,6 @@ func TestInhibitByMultipleSources(t *testing.T) { }, }, }, - { // alertOne should be muted since alertTwo and alertThree are active. alerts: []*types.Alert{alertOne(), alertTwo(false), alertThree(false)}, @@ -699,6 +701,10 @@ func TestInhibitByMultipleSources(t *testing.T) { lbls: model.LabelSet{"t": "1", "f": "1", "e": "1"}, muted: true, }, + { + lbls: model.LabelSet{"s3": "1", "t": "1", "s11": "1", "e": "1", "f": "1"}, + muted: true, + }, { lbls: model.LabelSet{"t": "1", "e": "2", "f": "1"}, muted: false, diff --git a/inhibit/metric_test.go b/inhibit/metric_test.go index 2868f1af99..655410a4b9 100644 --- a/inhibit/metric_test.go +++ b/inhibit/metric_test.go @@ -144,8 +144,8 @@ func TestInhibitorMetrics_RuleMutesDuration_Muted(t *testing.T) { EndsAt: time.Now().Add(time.Hour), }, } - inhibitor.rules[0].scache.Set(sourceAlert) - inhibitor.rules[0].updateIndex(sourceAlert) + inhibitor.rules[0].Sources[0].scache.Set(sourceAlert) + inhibitor.rules[0].Sources[0].updateIndex(sourceAlert) // Test that target alert is muted targetAlert := model.LabelSet{ @@ -175,7 +175,7 @@ func TestInhibitorMetrics_RuleMutesDuration_NotMuted(t *testing.T) { rules := []config.InhibitRule{ { Name: "test-rule", - Sources: []config.Source{ + Sources: []config.InhibitRuleSource{ { SrcMatchers: config.Matchers{&labels.Matcher{Type: labels.MatchEqual, Name: "severity", Value: "critical"}}, }, @@ -201,7 +201,7 @@ func TestInhibitorMetrics_RuleMutesDuration_NotMuted(t *testing.T) { EndsAt: time.Now().Add(time.Hour), }, } - inhibitor.rules[0].scache.Set(sourceAlert) + inhibitor.rules[0].Sources[0].scache.Set(sourceAlert) // Test that target alert with different instance is NOT muted targetAlert := model.LabelSet{ @@ -231,7 +231,7 @@ func TestInhibitorMetrics_NoRuleMatches(t *testing.T) { rules := []config.InhibitRule{ { Name: "test-rule", - Sources: []config.Source{ + Sources: []config.InhibitRuleSource{ { SrcMatchers: config.Matchers{&labels.Matcher{Type: labels.MatchEqual, Name: "severity", Value: "critical"}}, }, @@ -308,8 +308,8 @@ func TestInhibitorMetrics_MultipleRules(t *testing.T) { EndsAt: time.Now().Add(time.Hour), }, } - inhibitor.rules[0].scache.Set(sourceAlert1) - inhibitor.rules[0].updateIndex(sourceAlert1) + inhibitor.rules[0].Sources[0].scache.Set(sourceAlert1) + inhibitor.rules[0].Sources[0].updateIndex(sourceAlert1) // Test alert that matches rule-1 targetAlert1 := model.LabelSet{ @@ -459,7 +459,7 @@ func TestInhibitorMetrics_Registration(t *testing.T) { rules := []config.InhibitRule{ { Name: "test-rule", - Sources: []config.Source{ + Sources: []config.InhibitRuleSource{ { SrcMatchers: config.Matchers{&labels.Matcher{Type: labels.MatchEqual, Name: "severity", Value: "critical"}}, }, From 1ed8eb685b66b6309a686fe6236c10c654b48b62 Mon Sep 17 00:00:00 2001 From: Coleen Iona Quadros Date: Mon, 8 Dec 2025 19:42:28 +0100 Subject: [PATCH 13/14] rebase Signed-off-by: Coleen Iona Quadros --- inhibit/inhibit.go | 20 +- inhibit/inhibit_bench_test.go | 4 +- inhibit/inhibit_test.go | 4 +- inhibit/metric_test.go | 507 ---------------------------------- 4 files changed, 8 insertions(+), 527 deletions(-) delete mode 100644 inhibit/metric_test.go diff --git a/inhibit/inhibit.go b/inhibit/inhibit.go index 302a9f4120..ebea70178e 100644 --- a/inhibit/inhibit.go +++ b/inhibit/inhibit.go @@ -15,7 +15,6 @@ package inhibit import ( "context" - "github.com/prometheus/client_golang/prometheus" "log/slog" "strings" "sync" @@ -116,11 +115,9 @@ func (ih *Inhibitor) processAlert(ctx context.Context, a *types.Alert) { ), trace.WithSpanKind(trace.SpanKindInternal), ) + defer span.End() + // Update the inhibition rules' cache. - cachedSum := 0 - indexedSum := 0 - cached := 0 - indexed := 0 for _, r := range ih.rules { for _, src := range r.Sources { if src.SrcMatchers.Matches(a.Labels) { @@ -133,22 +130,12 @@ func (ih *Inhibitor) processAlert(ctx context.Context, a *types.Alert) { span.RecordError(err) continue } + span.SetAttributes(attr) src.updateIndex(a) - cached += src.scache.Len() - indexed += src.sindex.Len() break } } - - if r.Name != "" { - r.metrics.sourceAlertsCacheItems.With(prometheus.Labels{"rule": r.Name}).Set(float64(cached)) - r.metrics.sourceAlertsIndexItems.With(prometheus.Labels{"rule": r.Name}).Set(float64(indexed)) - } - cachedSum += cached - indexedSum += indexed } - ih.metrics.sourceAlertsCacheItems.Set(float64(cachedSum)) - ih.metrics.sourceAlertsIndexItems.Set(float64(indexedSum)) } func (ih *Inhibitor) WaitForLoading() { @@ -211,6 +198,7 @@ func (ih *Inhibitor) Mutes(ctx context.Context, lset model.LabelSet) bool { ) defer span.End() + ruleStart := time.Now() for _, r := range ih.rules { if !r.TargetMatchers.Matches(lset) { // If target side of rule doesn't match, we don't need to look any further. diff --git a/inhibit/inhibit_bench_test.go b/inhibit/inhibit_bench_test.go index 5d6a30c150..7dab6676ee 100644 --- a/inhibit/inhibit_bench_test.go +++ b/inhibit/inhibit_bench_test.go @@ -232,8 +232,8 @@ func multipleSourcesBenchMark(b *testing.B, numSources, numInhibitionRules, numI } } return alerts - }, benchFunc: func(mutesFunc func(set model.LabelSet) bool) error { - if ok := mutesFunc(model.LabelSet{"dst": "0"}); !ok { + }, benchFunc: func(mutesFunc func(context.Context, model.LabelSet) bool) error { + if ok := mutesFunc(context.Background(), model.LabelSet{"dst": "0"}); !ok { return errors.New("expected dst=0 to be muted") } return nil diff --git a/inhibit/inhibit_test.go b/inhibit/inhibit_test.go index 864bbbffef..ee62c208d0 100644 --- a/inhibit/inhibit_test.go +++ b/inhibit/inhibit_test.go @@ -718,7 +718,7 @@ func TestInhibitByMultipleSources(t *testing.T) { } { ap := newFakeAlerts(tc.alerts) mk := types.NewMarker(prometheus.NewRegistry()) - inhibitor := NewInhibitor(ap, inhibitRules(), mk, nopLogger, NewInhibitorMetrics(prometheus.NewRegistry())) + inhibitor := NewInhibitor(ap, inhibitRules(), mk, nopLogger) go func() { for ap.finished != nil { @@ -733,7 +733,7 @@ func TestInhibitByMultipleSources(t *testing.T) { inhibitor.Run() for _, expected := range tc.expected { - if inhibitor.Mutes(expected.lbls) != expected.muted { + if inhibitor.Mutes(context.Background(), expected.lbls) != expected.muted { mute := "unmuted" if expected.muted { mute = "muted" diff --git a/inhibit/metric_test.go b/inhibit/metric_test.go deleted file mode 100644 index 655410a4b9..0000000000 --- a/inhibit/metric_test.go +++ /dev/null @@ -1,507 +0,0 @@ -// Copyright The Prometheus Authors -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package inhibit - -import ( - "testing" - "time" - - "github.com/prometheus/client_golang/prometheus" - io_prometheus_client "github.com/prometheus/client_model/go" - "github.com/prometheus/common/model" - "github.com/stretchr/testify/require" - - "github.com/prometheus/alertmanager/config" - "github.com/prometheus/alertmanager/pkg/labels" - "github.com/prometheus/alertmanager/provider/mem" - "github.com/prometheus/alertmanager/types" -) - -// getMetricValue retrieves a specific metric value from the registry. -func getMetricValue(t *testing.T, reg *prometheus.Registry, metricName string, labels map[string]string) (float64, uint64, bool) { - t.Helper() - metricFamilies, err := reg.Gather() - require.NoError(t, err) - - for _, mf := range metricFamilies { - if mf.GetName() != metricName { - continue - } - for _, metric := range mf.GetMetric() { - if labelsMatch(metric, labels) { - if mf.GetType() == io_prometheus_client.MetricType_GAUGE { - return metric.GetGauge().GetValue(), 0, true - } - if mf.GetType() == io_prometheus_client.MetricType_SUMMARY { - return 0, metric.GetSummary().GetSampleCount(), true - } - } - } - } - return 0, 0, false -} - -func labelsMatch(metric *io_prometheus_client.Metric, wantLabels map[string]string) bool { - for wantKey, wantVal := range wantLabels { - found := false - for _, labelPair := range metric.GetLabel() { - if labelPair.GetName() == wantKey && labelPair.GetValue() == wantVal { - found = true - break - } - } - if !found { - return false - } - } - return true -} - -func TestInhibitorMetrics_RuleMatchesDuration(t *testing.T) { - reg := prometheus.NewRegistry() - metrics := NewInhibitorMetrics(reg) - - rules := []config.InhibitRule{ - { - Name: "test-rule", - SourceMatchers: []*labels.Matcher{ - {Type: labels.MatchEqual, Name: "severity", Value: "critical"}, - }, - TargetMatchers: []*labels.Matcher{ - {Type: labels.MatchEqual, Name: "severity", Value: "warning"}, - }, - Equal: []string{"instance"}, - }, - } - - marker := types.NewMarker(reg) - inhibitor := NewInhibitor(nil, rules, marker, nopLogger, metrics) - - // Test case 1: Target matches (should record matched="true") - targetAlert := model.LabelSet{ - "severity": "warning", - "instance": "server1", - } - inhibitor.Mutes(targetAlert) - - _, count, found := getMetricValue(t, reg, "alertmanager_inhibit_rule_matches_duration_seconds", - map[string]string{"rule": "test-rule", "matched": "true"}) - require.True(t, found, "Should find matched=true metric") - require.Equal(t, uint64(1), count, "Should have 1 sample for matched=true") - - // Test case 2: Target doesn't match (should record matched="false") - nonMatchingAlert := model.LabelSet{ - "severity": "info", - "instance": "server2", - } - inhibitor.Mutes(nonMatchingAlert) - - _, count, found = getMetricValue(t, reg, "alertmanager_inhibit_rule_matches_duration_seconds", - map[string]string{"rule": "test-rule", "matched": "false"}) - require.True(t, found, "Should find matched=false metric") - require.Equal(t, uint64(1), count, "Should have 1 sample for matched=false") -} - -func TestInhibitorMetrics_RuleMutesDuration_Muted(t *testing.T) { - reg := prometheus.NewRegistry() - metrics := NewInhibitorMetrics(reg) - - rules := []config.InhibitRule{ - { - Name: "test-rule", - SourceMatchers: []*labels.Matcher{ - {Type: labels.MatchEqual, Name: "severity", Value: "critical"}, - }, - TargetMatchers: []*labels.Matcher{ - {Type: labels.MatchEqual, Name: "severity", Value: "warning"}, - }, - Equal: []string{"instance"}, - }, - } - - marker := types.NewMarker(reg) - inhibitor := NewInhibitor(nil, rules, marker, nopLogger, metrics) - - // Add a source alert that will inhibit - sourceAlert := &types.Alert{ - Alert: model.Alert{ - Labels: model.LabelSet{ - "severity": "critical", - "instance": "server1", - }, - StartsAt: time.Now().Add(-time.Minute), - EndsAt: time.Now().Add(time.Hour), - }, - } - inhibitor.rules[0].Sources[0].scache.Set(sourceAlert) - inhibitor.rules[0].Sources[0].updateIndex(sourceAlert) - - // Test that target alert is muted - targetAlert := model.LabelSet{ - "severity": "warning", - "instance": "server1", - } - muted := inhibitor.Mutes(targetAlert) - require.True(t, muted, "Alert should be muted") - - // Verify per-rule muted="true" metric was recorded - _, count, found := getMetricValue(t, reg, "alertmanager_inhibit_rule_mutes_duration_seconds", - map[string]string{"rule": "test-rule", "muted": "true"}) - require.True(t, found, "Should find per-rule muted=true metric") - require.Equal(t, uint64(1), count, "Should have 1 sample for per-rule muted=true") - - // Verify global muted="true" metric was recorded - _, count, found = getMetricValue(t, reg, "alertmanager_inhibitor_mutes_duration_seconds", - map[string]string{"muted": "true"}) - require.True(t, found, "Should find global muted=true metric") - require.Equal(t, uint64(1), count, "Should have 1 sample for global muted=true") -} - -func TestInhibitorMetrics_RuleMutesDuration_NotMuted(t *testing.T) { - reg := prometheus.NewRegistry() - metrics := NewInhibitorMetrics(reg) - - rules := []config.InhibitRule{ - { - Name: "test-rule", - Sources: []config.InhibitRuleSource{ - { - SrcMatchers: config.Matchers{&labels.Matcher{Type: labels.MatchEqual, Name: "severity", Value: "critical"}}, - }, - }, - TargetMatchers: []*labels.Matcher{ - {Type: labels.MatchEqual, Name: "severity", Value: "warning"}, - }, - Equal: []string{"instance"}, - }, - } - - marker := types.NewMarker(reg) - inhibitor := NewInhibitor(nil, rules, marker, nopLogger, metrics) - - // Add a source alert with different instance - sourceAlert := &types.Alert{ - Alert: model.Alert{ - Labels: model.LabelSet{ - "severity": "critical", - "instance": "server1", - }, - StartsAt: time.Now().Add(-time.Minute), - EndsAt: time.Now().Add(time.Hour), - }, - } - inhibitor.rules[0].Sources[0].scache.Set(sourceAlert) - - // Test that target alert with different instance is NOT muted - targetAlert := model.LabelSet{ - "severity": "warning", - "instance": "server2", - } - muted := inhibitor.Mutes(targetAlert) - require.False(t, muted, "Alert should not be muted") - - // Verify per-rule muted="false" metric was recorded - _, count, found := getMetricValue(t, reg, "alertmanager_inhibit_rule_mutes_duration_seconds", - map[string]string{"rule": "test-rule", "muted": "false"}) - require.True(t, found, "Should find per-rule muted=false metric") - require.Equal(t, uint64(1), count, "Should have 1 sample for per-rule muted=false") - - // Verify global muted="false" metric was recorded - _, count, found = getMetricValue(t, reg, "alertmanager_inhibitor_mutes_duration_seconds", - map[string]string{"muted": "false"}) - require.True(t, found, "Should find global muted=false metric") - require.Equal(t, uint64(1), count, "Should have 1 sample for global muted=false") -} - -func TestInhibitorMetrics_NoRuleMatches(t *testing.T) { - reg := prometheus.NewRegistry() - metrics := NewInhibitorMetrics(reg) - - rules := []config.InhibitRule{ - { - Name: "test-rule", - Sources: []config.InhibitRuleSource{ - { - SrcMatchers: config.Matchers{&labels.Matcher{Type: labels.MatchEqual, Name: "severity", Value: "critical"}}, - }, - }, - TargetMatchers: []*labels.Matcher{ - {Type: labels.MatchEqual, Name: "severity", Value: "warning"}, - }, - Equal: []string{"instance"}, - }, - } - - marker := types.NewMarker(reg) - inhibitor := NewInhibitor(nil, rules, marker, nopLogger, metrics) - - // Test with alert that doesn't match any rule's target - nonMatchingAlert := model.LabelSet{ - "severity": "info", - "instance": "server1", - } - muted := inhibitor.Mutes(nonMatchingAlert) - require.False(t, muted, "Alert should not be muted") - - // Verify that global muted="false" metric was recorded - _, count, found := getMetricValue(t, reg, "alertmanager_inhibitor_mutes_duration_seconds", - map[string]string{"muted": "false"}) - require.True(t, found, "Should find global muted=false metric") - require.Equal(t, uint64(1), count, "Should have 1 sample for global muted=false") - - // Verify per-rule matched="false" was recorded - _, count, found = getMetricValue(t, reg, "alertmanager_inhibit_rule_matches_duration_seconds", - map[string]string{"rule": "test-rule", "matched": "false"}) - require.True(t, found, "Should find rule matched=false metric") - require.Equal(t, uint64(1), count, "Should have 1 sample for rule matched=false") -} - -func TestInhibitorMetrics_MultipleRules(t *testing.T) { - reg := prometheus.NewRegistry() - metrics := NewInhibitorMetrics(reg) - - rules := []config.InhibitRule{ - { - Name: "rule-1", - SourceMatchers: []*labels.Matcher{ - {Type: labels.MatchEqual, Name: "severity", Value: "critical"}, - }, - TargetMatchers: []*labels.Matcher{ - {Type: labels.MatchEqual, Name: "severity", Value: "warning"}, - }, - Equal: []string{"instance"}, - }, - { - Name: "rule-2", - SourceMatchers: []*labels.Matcher{ - {Type: labels.MatchEqual, Name: "team", Value: "sre"}, - }, - TargetMatchers: []*labels.Matcher{ - {Type: labels.MatchEqual, Name: "team", Value: "dev"}, - }, - Equal: []string{"service"}, - }, - } - - marker := types.NewMarker(reg) - inhibitor := NewInhibitor(nil, rules, marker, nopLogger, metrics) - - // Add source alert for rule-1 - sourceAlert1 := &types.Alert{ - Alert: model.Alert{ - Labels: model.LabelSet{ - "severity": "critical", - "instance": "server1", - }, - StartsAt: time.Now().Add(-time.Minute), - EndsAt: time.Now().Add(time.Hour), - }, - } - inhibitor.rules[0].Sources[0].scache.Set(sourceAlert1) - inhibitor.rules[0].Sources[0].updateIndex(sourceAlert1) - - // Test alert that matches rule-1 - targetAlert1 := model.LabelSet{ - "severity": "warning", - "instance": "server1", - } - muted1 := inhibitor.Mutes(targetAlert1) - require.True(t, muted1, "Alert should be muted by rule-1") - - // Verify metrics for rule-1 - _, count, found := getMetricValue(t, reg, "alertmanager_inhibit_rule_matches_duration_seconds", - map[string]string{"rule": "rule-1", "matched": "true"}) - require.True(t, found, "Should find rule-1 matched=true metric") - require.Equal(t, 1, int(count)) - - _, count, found = getMetricValue(t, reg, "alertmanager_inhibit_rule_mutes_duration_seconds", - map[string]string{"rule": "rule-1", "muted": "true"}) - require.True(t, found, "Should find rule-1 muted=true metric") - require.Equal(t, 1, int(count)) - - // Verify global muted="true" metric - _, count, found = getMetricValue(t, reg, "alertmanager_inhibitor_mutes_duration_seconds", - map[string]string{"muted": "true"}) - require.True(t, found, "Should find global muted=true metric") - require.Equal(t, 1, int(count)) - - // Test alert that matches rule-2 target but has no source - targetAlert2 := model.LabelSet{ - "team": "dev", - "service": "api", - } - muted2 := inhibitor.Mutes(targetAlert2) - require.False(t, muted2, "Alert should not be muted") - - // Verify metrics for rule-2 (both rules process this alert since rule-1 doesn't match target) - _, count, found = getMetricValue(t, reg, "alertmanager_inhibit_rule_matches_duration_seconds", - map[string]string{"rule": "rule-1", "matched": "false"}) - require.True(t, found, "Should find rule-1 matched=false metric") - require.Equal(t, 1, int(count)) - - _, count, found = getMetricValue(t, reg, "alertmanager_inhibit_rule_matches_duration_seconds", - map[string]string{"rule": "rule-2", "matched": "true"}) - require.True(t, found, "Should find rule-2 matched=true metric") - require.Equal(t, 1, int(count)) - - _, count, found = getMetricValue(t, reg, "alertmanager_inhibit_rule_mutes_duration_seconds", - map[string]string{"rule": "rule-2", "muted": "false"}) - require.True(t, found, "Should find rule-2 muted=false metric") - require.Equal(t, 1, int(count)) - - // Verify global muted="false" metric - _, count, found = getMetricValue(t, reg, "alertmanager_inhibitor_mutes_duration_seconds", - map[string]string{"muted": "false"}) - require.True(t, found, "Should find global muted=false metric") - require.Equal(t, 1, int(count), "Should have 1 samples") -} - -func TestInhibitorMetrics_CacheAndIndexItems(t *testing.T) { - reg := prometheus.NewRegistry() - metrics := NewInhibitorMetrics(reg) - - rules := []config.InhibitRule{ - { - Name: "named-rule", - SourceMatchers: []*labels.Matcher{ - {Type: labels.MatchEqual, Name: "severity", Value: "critical"}, - }, - TargetMatchers: []*labels.Matcher{ - {Type: labels.MatchEqual, Name: "severity", Value: "warning"}, - }, - Equal: []string{"instance"}, - }, - { - SourceMatchers: []*labels.Matcher{ - {Type: labels.MatchEqual, Name: "severity", Value: "critical"}, - }, - TargetMatchers: []*labels.Matcher{ - {Type: labels.MatchEqual, Name: "severity", Value: "warning"}, - }, - Equal: []string{"cluster"}, - }, - } - - marker := types.NewMarker(reg) - provider, err := mem.NewAlerts(t.Context(), marker, 15*time.Minute, nil, nopLogger, reg) - require.NoError(t, err) - inhibitor := NewInhibitor(provider, rules, marker, nopLogger, metrics) - go inhibitor.Run() - - // Add multiple source alerts - for i := 1; i <= 3; i++ { - sourceAlert := &types.Alert{ - Alert: model.Alert{ - Labels: model.LabelSet{ - "severity": "critical", - "instance": model.LabelValue("server" + string(rune('0'+i))), - "cluster": model.LabelValue("cluster" + string(rune('0'+i))), - }, - StartsAt: time.Now().Add(-time.Minute), - EndsAt: time.Now().Add(time.Hour), - }, - } - require.NoError(t, provider.Put(sourceAlert)) - } - - // Wait for the inhibitor to process alerts and update metrics - // The Run() goroutine processes alerts asynchronously - require.Eventually(t, func() bool { - value, _, found := getMetricValue(t, reg, "alertmanager_inhibitor_source_alerts_cache_items", - map[string]string{}) - return found && value == 6 - }, 2*time.Second, 50*time.Millisecond, "Cache items metric should reach 6") - - // Stop the inhibitor - inhibitor.Stop() - - // Global metrics (no labels) show the sum across all rules - value, _, found := getMetricValue(t, reg, "alertmanager_inhibitor_source_alerts_cache_items", - map[string]string{}) - require.True(t, found, "Should find global cache items metric") - require.Equal(t, float64(6), value, "Global cache should contain 6 alerts total") - - value, _, found = getMetricValue(t, reg, "alertmanager_inhibitor_source_alerts_index_items", - map[string]string{}) - require.True(t, found, "Should find global index items metric") - require.Equal(t, float64(6), value, "Global index should contain 6 entries total") - - // Per-rule metrics show individual rule values - value, _, found = getMetricValue(t, reg, "alertmanager_inhibit_rule_source_alerts_cache_items", - map[string]string{"rule": "named-rule"}) - require.True(t, found, "Should find per-rule cache items metric") - require.Equal(t, float64(3), value, "Named rule cache should contain 3 alerts") - - value, _, found = getMetricValue(t, reg, "alertmanager_inhibit_rule_source_alerts_index_items", - map[string]string{"rule": "named-rule"}) - require.True(t, found, "Should find per-rule index items metric") - require.Equal(t, float64(3), value, "Named rule index should contain 3 entries") -} - -func TestInhibitorMetrics_Registration(t *testing.T) { - reg := prometheus.NewRegistry() - metrics := NewInhibitorMetrics(reg) - - require.NotNil(t, metrics, "Metrics should be created") - - // Create a rule and use the metrics so they appear in Gather() output - rules := []config.InhibitRule{ - { - Name: "test-rule", - Sources: []config.InhibitRuleSource{ - { - SrcMatchers: config.Matchers{&labels.Matcher{Type: labels.MatchEqual, Name: "severity", Value: "critical"}}, - }, - }, - TargetMatchers: []*labels.Matcher{ - {Type: labels.MatchEqual, Name: "severity", Value: "warning"}, - }, - Equal: []string{"instance"}, - }, - } - - marker := types.NewMarker(reg) - inhibitor := NewInhibitor(nil, rules, marker, nopLogger, metrics) - - // Use the metrics to ensure they show up in Gather() - testAlert := model.LabelSet{ - "severity": "warning", - "instance": "server1", - } - inhibitor.Mutes(testAlert) - - // Verify all metrics are registered and have data - metricFamilies, err := reg.Gather() - require.NoError(t, err) - - registeredMetrics := map[string]bool{ - "alertmanager_inhibitor_source_alerts_cache_items": false, - "alertmanager_inhibitor_source_alerts_index_items": false, - "alertmanager_inhibitor_mutes_duration_seconds": false, - "alertmanager_inhibit_rule_source_alerts_cache_items": false, - "alertmanager_inhibit_rule_source_alerts_index_items": false, - "alertmanager_inhibit_rule_matches_duration_seconds": false, - "alertmanager_inhibit_rule_mutes_duration_seconds": false, - } - - for _, mf := range metricFamilies { - if _, exists := registeredMetrics[mf.GetName()]; exists { - registeredMetrics[mf.GetName()] = true - } - } - - for metricName, registered := range registeredMetrics { - require.True(t, registered, "Metric %s should be registered", metricName) - } -} From cce060c572b4b39669f58df6c074e11f53ad4826 Mon Sep 17 00:00:00 2001 From: Coleen Iona Quadros Date: Mon, 8 Dec 2025 19:46:07 +0100 Subject: [PATCH 14/14] add span back Signed-off-by: Coleen Iona Quadros --- inhibit/inhibit.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/inhibit/inhibit.go b/inhibit/inhibit.go index ebea70178e..0f25bc5508 100644 --- a/inhibit/inhibit.go +++ b/inhibit/inhibit.go @@ -227,6 +227,11 @@ func (ih *Inhibitor) Mutes(ctx context.Context, lset model.LabelSet) bool { if !sourceHasNoEqual { compositeInhibitorID := strings.Join(inhibitorIDs, ",") ih.marker.SetInhibited(fp, compositeInhibitorID) + span.AddEvent("alert inhibited", + trace.WithAttributes( + attribute.String("alerting.inhibit_rule.source.fingerprint", compositeInhibitorID), + ), + ) return true } }