Skip to content

Commit

Permalink
feat: PC-13724 Add custom thresholds for SHR (#530)
Browse files Browse the repository at this point in the history
## Motivation

Currently, System Health Review report is built using error budget
remaining metric. Whether an SLO is marked as Red (Exhausted), Green
(Healthy), or Yellow (At Risk) depends upon a static value. This PR
opens up a window towards user-defined thresholds. If user decides that
SLO should be marked as Healthy when it has more than 50% budget
remaining, they will set `spec.systemHealthReview.thresholds.greenGt`
field to `0.5`. If user wants to remove AtRisk threshold, they will set
`spec.systemHealthReview.thresholds.redLte` to the same value as
`greenGt` (`0.5`). Additionally, if users decides that NoData SLOs are
supposed to be displayed on their report, a field `ShowNoData` is
provided that can be set to true in such case.

## Summary

`Thresholds` object with three fields (`GreenGreaterThan`,
`RedLessThanOrEqualTo` and `ShowNoData`) was added to
`SystemHealthReviewConfig` along with validation of said fields.

## Release Notes

`Thresholds` object with three fields (`GreenGreaterThan`,
`RedLessThanOrEqualTo` and `ShowNoData`) was added to
`SystemHealthReviewConfig`. It allows users to define custom values for
`Healthy` (`Green`), `AtRisk` (`Yellow`) and `Exhausted` (`Red`)
thresholds and to decide whether to display or hide `NoData` SLOs on
System Health Review report. It will be available once Nobl9 platform
version 1.97.2 is released.
  • Loading branch information
lukasz-dobek authored Sep 6, 2024
1 parent 3af59f1 commit 107fd6f
Show file tree
Hide file tree
Showing 7 changed files with 182 additions and 1 deletion.
4 changes: 4 additions & 0 deletions internal/manifest/v1alpha/examples/report.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,10 @@ func Report() []Example {
},
},
},
Thresholds: report.Thresholds{
RedLessThanOrEqual: ptr(0.8),
GreenGreaterThan: ptr(0.95),
},
},
},
),
Expand Down
9 changes: 9 additions & 0 deletions manifest/v1alpha/report/example_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,11 @@ func ExampleReport_systemHealthReview() {
},
},
},
Thresholds: report.Thresholds{
RedLessThanOrEqual: ptr(0.8),
GreenGreaterThan: ptr(0.95),
ShowNoData: false,
},
},
},
)
Expand Down Expand Up @@ -135,6 +140,10 @@ func ExampleReport_systemHealthReview() {
// labels:
// key3:
// - value1
// thresholds:
// redLte: 0.8
// greenGt: 0.95
// showNoData: false
}

func ExampleReport_sloHistory() {
Expand Down
4 changes: 4 additions & 0 deletions manifest/v1alpha/report/examples.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@
key2:
- value2
- value3
thresholds:
redLte: 0.8
greenGt: 0.95
showNoData: false
- apiVersion: n9/v1alpha
kind: Report
metadata:
Expand Down
10 changes: 10 additions & 0 deletions manifest/v1alpha/report/system_health_review.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,16 @@ type SystemHealthReviewConfig struct {
TimeFrame SystemHealthReviewTimeFrame `json:"timeFrame" validate:"required"`
RowGroupBy RowGroupBy `json:"rowGroupBy" validate:"required" example:"project"`
Columns []ColumnSpec `json:"columns" validate:"min=1,max=30"`
Thresholds Thresholds `json:"thresholds" validate:"required"`
}

type Thresholds struct {
RedLessThanOrEqual *float64 `json:"redLte" validate:"required" example:"0.8"`
// Yellow is calculated as the difference between Red and Green
// thresholds. If Red and Green are the same, Yellow is not used on the report.
GreenGreaterThan *float64 `json:"greenGt" validate:"required" example:"0.95"`
// ShowNoData customizes the report to either show or hide rows with no data.
ShowNoData bool `json:"showNoData"`
}

type ColumnSpec struct {
Expand Down
29 changes: 29 additions & 0 deletions manifest/v1alpha/report/validation_system_health_review.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@ var systemHealthReviewValidation = govy.New[SystemHealthReviewConfig](
WithName("timeFrame").
Required().
Include(timeFrameValidation),
govy.For(func(s SystemHealthReviewConfig) Thresholds { return s.Thresholds }).
WithName("thresholds").
Required().
Include(reportThresholdsValidation),
)

var columnValidation = govy.New[ColumnSpec](
Expand Down Expand Up @@ -53,6 +57,31 @@ var timeFrameValidation = govy.New[SystemHealthReviewTimeFrame](
Include(snapshotTimeFrameLatestPointValidation),
)

var reportThresholdsValidation = govy.New[Thresholds](
govy.For(govy.GetSelf[Thresholds]()).
Rules(redLteValidation),
govy.ForPointer(func(s Thresholds) *float64 { return s.RedLessThanOrEqual }).
WithName("redLte").
Required().
Rules(rules.GTE(0.0), rules.LTE(1.0)),
govy.ForPointer(func(s Thresholds) *float64 { return s.GreenGreaterThan }).
WithName("greenGt").
Required().
Rules(rules.GTE(0.0), rules.LTE(1.0)),
)

var redLteValidation = govy.NewRule(func(v Thresholds) error {
if v.RedLessThanOrEqual != nil && v.GreenGreaterThan != nil {
if *v.RedLessThanOrEqual > *v.GreenGreaterThan {
return govy.NewPropertyError(
"redLte",
v.RedLessThanOrEqual,
errors.Errorf("must be less than or equal to 'greenGt' (%v)", *v.GreenGreaterThan))
}
}
return nil
})

var snapshotValidation = govy.New[SnapshotTimeFrame](
govy.For(func(s SnapshotTimeFrame) SnapshotPoint { return s.Point }).
WithName("point").
Expand Down
127 changes: 126 additions & 1 deletion manifest/v1alpha/report/validation_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ func TestValidate_Spec(t *testing.T) {
},
}
err := validate(report)
testutils.AssertContainsErrors(t, report, err, 1, testutils.ExpectedError{
testutils.AssertContainsErrors(t, report, err, 2, testutils.ExpectedError{
Prop: "spec",
Message: "exactly one report type configuration is required",
})
Expand Down Expand Up @@ -512,6 +512,10 @@ func TestValidate_Spec_SystemHealthReview(t *testing.T) {
Columns: []ColumnSpec{
{DisplayName: "Column 1", Labels: properLabel},
},
Thresholds: Thresholds{
RedLessThanOrEqual: func(f float64) *float64 { return &f }(0.0),
GreenGreaterThan: func(f float64) *float64 { return &f }(0.2),
},
},
},
"fails with empty columns": {
Expand All @@ -531,6 +535,10 @@ func TestValidate_Spec_SystemHealthReview(t *testing.T) {
},
RowGroupBy: RowGroupByProject,
Columns: []ColumnSpec{},
Thresholds: Thresholds{
RedLessThanOrEqual: func(f float64) *float64 { return &f }(0.0),
GreenGreaterThan: func(f float64) *float64 { return &f }(0.2),
},
},
},
"fails with too many columns": {
Expand Down Expand Up @@ -582,6 +590,10 @@ func TestValidate_Spec_SystemHealthReview(t *testing.T) {
{DisplayName: "Column 30", Labels: properLabel},
{DisplayName: "Column 31", Labels: properLabel},
},
Thresholds: Thresholds{
RedLessThanOrEqual: func(f float64) *float64 { return &f }(0.0),
GreenGreaterThan: func(f float64) *float64 { return &f }(0.2),
},
},
},
"fails with empty labels": {
Expand All @@ -603,6 +615,10 @@ func TestValidate_Spec_SystemHealthReview(t *testing.T) {
Columns: []ColumnSpec{
{DisplayName: "Column 1", Labels: map[LabelKey][]LabelValue{}},
},
Thresholds: Thresholds{
RedLessThanOrEqual: func(f float64) *float64 { return &f }(0.0),
GreenGreaterThan: func(f float64) *float64 { return &f }(0.2),
},
},
},
"fails with empty displayName": {
Expand All @@ -624,6 +640,60 @@ func TestValidate_Spec_SystemHealthReview(t *testing.T) {
Columns: []ColumnSpec{
{Labels: properLabel},
},
Thresholds: Thresholds{
RedLessThanOrEqual: func(f float64) *float64 { return &f }(0.0),
GreenGreaterThan: func(f float64) *float64 { return &f }(0.2),
},
},
},
"fails with empty thresholds": {
ExpectedErrorsCount: 1,
ExpectedErrors: []testutils.ExpectedError{
{
Prop: "spec.systemHealthReview.thresholds",
Code: rules.ErrorCodeRequired,
},
},
Config: SystemHealthReviewConfig{
TimeFrame: SystemHealthReviewTimeFrame{
Snapshot: SnapshotTimeFrame{
Point: SnapshotPointLatest,
},
TimeZone: "Europe/Warsaw",
},
RowGroupBy: RowGroupByProject,
Columns: []ColumnSpec{
{DisplayName: "Column 1", Labels: properLabel},
},
},
},
"fails with invalid thresholds": {
ExpectedErrorsCount: 2,
ExpectedErrors: []testutils.ExpectedError{
{
Prop: "spec.systemHealthReview.thresholds.redLte",
Code: rules.ErrorCodeGreaterThanOrEqualTo,
},
{
Prop: "spec.systemHealthReview.thresholds.greenGt",
Code: rules.ErrorCodeLessThanOrEqualTo,
},
},
Config: SystemHealthReviewConfig{
TimeFrame: SystemHealthReviewTimeFrame{
Snapshot: SnapshotTimeFrame{
Point: SnapshotPointLatest,
},
TimeZone: "Europe/Warsaw",
},
RowGroupBy: RowGroupByProject,
Columns: []ColumnSpec{
{DisplayName: "Column 1", Labels: properLabel},
},
Thresholds: Thresholds{
RedLessThanOrEqual: func(f float64) *float64 { return &f }(-0.1),
GreenGreaterThan: func(f float64) *float64 { return &f }(1.1),
},
},
},
} {
Expand All @@ -634,6 +704,32 @@ func TestValidate_Spec_SystemHealthReview(t *testing.T) {
testutils.AssertContainsErrors(t, report, err, test.ExpectedErrorsCount, test.ExpectedErrors...)
})
}

t.Run("fails when red is greater than green", func(t *testing.T) {
report := validReport()
report.Spec.SystemHealthReview = &SystemHealthReviewConfig{
TimeFrame: SystemHealthReviewTimeFrame{
Snapshot: SnapshotTimeFrame{
Point: SnapshotPointLatest,
},
TimeZone: "Europe/Warsaw",
},
RowGroupBy: RowGroupByProject,
Columns: []ColumnSpec{
{DisplayName: "Column 1", Labels: properLabel},
},
Thresholds: Thresholds{
RedLessThanOrEqual: func(f float64) *float64 { return &f }(0.2),
GreenGreaterThan: func(f float64) *float64 { return &f }(0.1),
},
}
err := validate(report)
testutils.AssertContainsErrors(t, report, err, 1, testutils.ExpectedError{
Prop: "spec.systemHealthReview.thresholds.redLte",
Message: "must be less than or equal to 'greenGt' (0.1)",
},
)
})
}

func TestValidate_Spec_SystemHealthReview_TimeFrame(t *testing.T) {
Expand All @@ -655,6 +751,10 @@ func TestValidate_Spec_SystemHealthReview_TimeFrame(t *testing.T) {
Columns: []ColumnSpec{
{DisplayName: "Column 1", Labels: map[LabelKey][]LabelValue{"key1": {"value1"}}},
},
Thresholds: Thresholds{
RedLessThanOrEqual: func(f float64) *float64 { return &f }(0.0),
GreenGreaterThan: func(f float64) *float64 { return &f }(0.2),
},
},
},
"fails with empty point in snapshot": {
Expand All @@ -674,6 +774,10 @@ func TestValidate_Spec_SystemHealthReview_TimeFrame(t *testing.T) {
Columns: []ColumnSpec{
{DisplayName: "Column 1", Labels: map[LabelKey][]LabelValue{"key1": {"value1"}}},
},
Thresholds: Thresholds{
RedLessThanOrEqual: func(f float64) *float64 { return &f }(0.0),
GreenGreaterThan: func(f float64) *float64 { return &f }(0.2),
},
},
},
"fails with empty data in past point snapshot": {
Expand All @@ -695,6 +799,10 @@ func TestValidate_Spec_SystemHealthReview_TimeFrame(t *testing.T) {
Columns: []ColumnSpec{
{DisplayName: "Column 1", Labels: map[LabelKey][]LabelValue{"key1": {"value1"}}},
},
Thresholds: Thresholds{
RedLessThanOrEqual: func(f float64) *float64 { return &f }(0.0),
GreenGreaterThan: func(f float64) *float64 { return &f }(0.2),
},
},
},
"fails with wrong rrule format in past point snapshot": {
Expand All @@ -721,6 +829,10 @@ func TestValidate_Spec_SystemHealthReview_TimeFrame(t *testing.T) {
Columns: []ColumnSpec{
{DisplayName: "Column 1", Labels: map[LabelKey][]LabelValue{"key1": {"value1"}}},
},
Thresholds: Thresholds{
RedLessThanOrEqual: func(f float64) *float64 { return &f }(0.0),
GreenGreaterThan: func(f float64) *float64 { return &f }(0.2),
},
},
},
"fails with invalid rrule in past point snapshot": {
Expand All @@ -747,6 +859,10 @@ func TestValidate_Spec_SystemHealthReview_TimeFrame(t *testing.T) {
Columns: []ColumnSpec{
{DisplayName: "Column 1", Labels: map[LabelKey][]LabelValue{"key1": {"value1"}}},
},
Thresholds: Thresholds{
RedLessThanOrEqual: func(f float64) *float64 { return &f }(0.0),
GreenGreaterThan: func(f float64) *float64 { return &f }(0.2),
},
},
},
"fails with forbidden fields provided in latest point snapshot": {
Expand Down Expand Up @@ -774,6 +890,10 @@ func TestValidate_Spec_SystemHealthReview_TimeFrame(t *testing.T) {
Columns: []ColumnSpec{
{DisplayName: "Column 1", Labels: map[LabelKey][]LabelValue{"key1": {"value1"}}},
},
Thresholds: Thresholds{
RedLessThanOrEqual: func(f float64) *float64 { return &f }(0.0),
GreenGreaterThan: func(f float64) *float64 { return &f }(0.2),
},
},
},
} {
Expand Down Expand Up @@ -853,6 +973,11 @@ func validReport() Report {
},
},
},
Thresholds: Thresholds{
RedLessThanOrEqual: func(f float64) *float64 { return &f }(0.8),
GreenGreaterThan: func(f float64) *float64 { return &f }(0.95),
ShowNoData: true,
},
},
},
}
Expand Down
Binary file modified sdk/config_activity.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.

0 comments on commit 107fd6f

Please sign in to comment.