Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,15 @@ package operatorstateanalyzer

import (
"context"
"fmt"
"path/filepath"
"sort"
"time"

"github.com/openshift/origin/pkg/monitortestframework"
"github.com/sirupsen/logrus"

"github.com/openshift/origin/pkg/dataloader"
"github.com/openshift/origin/pkg/monitor/monitorapi"
"github.com/openshift/origin/pkg/test/ginkgo/junitapi"
"k8s.io/client-go/rest"
Expand All @@ -14,6 +19,16 @@ import (
type operatorStateChecker struct {
}

type OperatorStateMetrics struct {
OperatorName string
ProgressingCount int
TotalProgressingSeconds float64
MaxIndividualProgressingSeconds float64
DegradedCount int
TotalDegradedSeconds float64
MaxIndividualDegradedSeconds float64
}

func NewAnalyzer() monitortestframework.MonitorTest {
return &operatorStateChecker{}
}
Expand Down Expand Up @@ -44,9 +59,102 @@ func (*operatorStateChecker) EvaluateTestsFromConstructedIntervals(ctx context.C
}

func (*operatorStateChecker) WriteContentToStorage(ctx context.Context, storageDir, timeSuffix string, finalIntervals monitorapi.Intervals, finalResourceState monitorapi.ResourcesMap) error {
metrics := calculateOperatorStateMetrics(finalIntervals)
if len(metrics) > 0 {
rows := generateRowsFromMetrics(metrics)
dataFile := dataloader.DataFile{
TableName: "operator_state_metrics",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm wondering if instead of the generic "Metric" we should have defined "Count", "TotalSeconds" and maybe "MinSeconds" and "MaxSeconds" instead of "IndividualDurationSeconds". Will see if others have thoughts on this.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the consensus was to make this a single row per operator/condition tracking

"Count", "TotalSeconds" and "MaxIndividualDurationSeconds"

Schema: map[string]dataloader.DataType{
"Operator": dataloader.DataTypeString,
"State": dataloader.DataTypeString,
"Count": dataloader.DataTypeInteger,
"TotalSeconds": dataloader.DataTypeFloat64,
"MaxIndividualDurationSeconds": dataloader.DataTypeFloat64,
},
Rows: rows,
}
fileName := filepath.Join(storageDir, fmt.Sprintf("operator-state-metrics%s-%s", timeSuffix, dataloader.AutoDataLoaderSuffix))
if err := dataloader.WriteDataFile(fileName, dataFile); err != nil {
return fmt.Errorf("failed to write operator state metrics: %w", err)
}
logrus.Infof("Write operator state metrics to %s successfully.", fileName)
}

return nil
}

// calculateOperatorStateMetrics processes raw intervals and aggregates them into a metrics summary map.
func calculateOperatorStateMetrics(finalIntervals monitorapi.Intervals) map[string]*OperatorStateMetrics {
metrics := make(map[string]*OperatorStateMetrics)

for _, interval := range finalIntervals {
if interval.Source != monitorapi.SourceOperatorState {
continue
}
if interval.Locator.Type != monitorapi.LocatorTypeClusterOperator {
continue
}
operatorName := interval.Locator.Keys[monitorapi.LocatorClusterOperatorKey]
if _, ok := metrics[operatorName]; !ok {
metrics[operatorName] = &OperatorStateMetrics{OperatorName: operatorName}
}

duration := interval.To.Sub(interval.From).Seconds()
condition := interval.Message.Annotations[monitorapi.AnnotationCondition]

switch condition {
case "Progressing":
metrics[operatorName].ProgressingCount++
metrics[operatorName].TotalProgressingSeconds += duration
if duration > metrics[operatorName].MaxIndividualProgressingSeconds {
metrics[operatorName].MaxIndividualProgressingSeconds = duration
}
case "Degraded":
metrics[operatorName].DegradedCount++
metrics[operatorName].TotalDegradedSeconds += duration
if duration > metrics[operatorName].MaxIndividualDegradedSeconds {
metrics[operatorName].MaxIndividualDegradedSeconds = duration
}
}
}
return metrics
}

// generateRowsFromMetrics converts the aggregated metrics map into a slice of rows for the dataloader.
func generateRowsFromMetrics(metrics map[string]*OperatorStateMetrics) []map[string]string {
rows := []map[string]string{}

// Sort operator names for consistent output order in tests
operatorNames := make([]string, 0, len(metrics))
for name := range metrics {
operatorNames = append(operatorNames, name)
}
sort.Strings(operatorNames)

for _, operatorName := range operatorNames {
metric := metrics[operatorName]
if metric.ProgressingCount > 0 {
rows = append(rows, map[string]string{
"Operator": operatorName,
"State": "Progressing",
"Count": fmt.Sprintf("%d", metric.ProgressingCount),
"TotalSeconds": fmt.Sprintf("%f", metric.TotalProgressingSeconds),
"MaxIndividualDurationSeconds": fmt.Sprintf("%f", metric.MaxIndividualProgressingSeconds),
})
}
if metric.DegradedCount > 0 {
rows = append(rows, map[string]string{
"Operator": operatorName,
"State": "Degraded",
"Count": fmt.Sprintf("%d", metric.DegradedCount),
"TotalSeconds": fmt.Sprintf("%f", metric.TotalDegradedSeconds),
"MaxIndividualDurationSeconds": fmt.Sprintf("%f", metric.MaxIndividualDegradedSeconds),
})
}
}
return rows
}

func (*operatorStateChecker) Cleanup(ctx context.Context) error {
// TODO wire up the start to a context we can kill here
return nil
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
package operatorstateanalyzer

import (
"testing"
"time"

"github.com/openshift/origin/pkg/monitor/monitorapi"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)

func TestOperatorStateAnalyzer(t *testing.T) {
tests := []struct {
name string
intervals monitorapi.Intervals
expectedMetrics map[string]*OperatorStateMetrics
expectedRows []map[string]string
}{
{
name: "single operator, progressing and degraded",
intervals: monitorapi.Intervals{
makeTestInterval("operator-a", "Progressing", 10),
makeTestInterval("operator-a", "Progressing", 5),
makeTestInterval("operator-a", "Degraded", 15),
},
expectedMetrics: map[string]*OperatorStateMetrics{
"operator-a": {
OperatorName: "operator-a",
ProgressingCount: 2,
TotalProgressingSeconds: 15,
MaxIndividualProgressingSeconds: 10,
DegradedCount: 1,
TotalDegradedSeconds: 15,
MaxIndividualDegradedSeconds: 15,
},
},
expectedRows: []map[string]string{
{
"Operator": "operator-a",
"State": "Progressing",
"Count": "2",
"TotalSeconds": "15.000000",
"MaxIndividualDurationSeconds": "10.000000",
},
{
"Operator": "operator-a",
"State": "Degraded",
"Count": "1",
"TotalSeconds": "15.000000",
"MaxIndividualDurationSeconds": "15.000000",
},
},
},
{
name: "multiple operators",
intervals: monitorapi.Intervals{
makeTestInterval("operator-a", "Progressing", 10),
makeTestInterval("operator-b", "Degraded", 20),
},
expectedMetrics: map[string]*OperatorStateMetrics{
"operator-a": {
OperatorName: "operator-a",
ProgressingCount: 1,
TotalProgressingSeconds: 10,
MaxIndividualProgressingSeconds: 10,
},
"operator-b": {
OperatorName: "operator-b",
DegradedCount: 1,
TotalDegradedSeconds: 20,
MaxIndividualDegradedSeconds: 20,
},
},
expectedRows: []map[string]string{
{
"Operator": "operator-a",
"State": "Progressing",
"Count": "1",
"TotalSeconds": "10.000000",
"MaxIndividualDurationSeconds": "10.000000",
},
{
"Operator": "operator-b",
"State": "Degraded",
"Count": "1",
"TotalSeconds": "20.000000",
"MaxIndividualDurationSeconds": "20.000000",
},
},
},
{
name: "no relevant intervals",
intervals: monitorapi.Intervals{},
expectedMetrics: map[string]*OperatorStateMetrics{},
expectedRows: []map[string]string{},
},
{
name: "operator with only degraded state",
intervals: monitorapi.Intervals{
makeTestInterval("operator-c", "Degraded", 30),
},
expectedMetrics: map[string]*OperatorStateMetrics{
"operator-c": {
OperatorName: "operator-c",
DegradedCount: 1,
TotalDegradedSeconds: 30,
MaxIndividualDegradedSeconds: 30,
},
},
expectedRows: []map[string]string{
{
"Operator": "operator-c",
"State": "Degraded",
"Count": "1",
"TotalSeconds": "30.000000",
"MaxIndividualDurationSeconds": "30.000000",
},
},
},
}

for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
// Test calculateOperatorStateMetrics
metrics := calculateOperatorStateMetrics(tc.intervals)
require.Equal(t, len(tc.expectedMetrics), len(metrics), "number of operators should match")
for op, expected := range tc.expectedMetrics {
actual, ok := metrics[op]
require.True(t, ok, "operator %s not found in metrics", op)
assert.Equal(t, expected.OperatorName, actual.OperatorName, "OperatorName should match")
assert.Equal(t, expected.ProgressingCount, actual.ProgressingCount, "ProgressingCount should match")
assert.InDelta(t, expected.TotalProgressingSeconds, actual.TotalProgressingSeconds, 0.001, "TotalProgressingSeconds should match")
assert.InDelta(t, expected.MaxIndividualProgressingSeconds, actual.MaxIndividualProgressingSeconds, 0.001, "MaxIndividualProgressingSeconds should match")
assert.Equal(t, expected.DegradedCount, actual.DegradedCount, "DegradedCount should match")
assert.InDelta(t, expected.TotalDegradedSeconds, actual.TotalDegradedSeconds, 0.001, "TotalDegradedSeconds should match")
assert.InDelta(t, expected.MaxIndividualDegradedSeconds, actual.MaxIndividualDegradedSeconds, 0.001, "MaxIndividualDegradedSeconds should match")
}

// Test generateRowsFromMetrics
rows := generateRowsFromMetrics(metrics)
assert.ElementsMatch(t, tc.expectedRows, rows, "generated rows should match expected rows")
})
}
}

// Helper function to create intervals for testing
func makeTestInterval(operatorName, condition string, durationSeconds float64) monitorapi.Interval {
from := time.Unix(1, 0)
to := from.Add(time.Duration(durationSeconds * float64(time.Second)))
return monitorapi.Interval{
Source: monitorapi.SourceOperatorState,
Condition: monitorapi.Condition{
Locator: monitorapi.Locator{
Type: monitorapi.LocatorTypeClusterOperator,
Keys: map[monitorapi.LocatorKey]string{
monitorapi.LocatorClusterOperatorKey: operatorName,
},
},
Message: monitorapi.Message{
Annotations: map[monitorapi.AnnotationKey]string{
monitorapi.AnnotationCondition: condition,
},
},
},
From: from,
To: to,
}
}