diff --git a/DESIGN.md b/DESIGN.md index 9e80114..f0f7b63 100644 --- a/DESIGN.md +++ b/DESIGN.md @@ -31,3 +31,57 @@ If the target system is unable to reconcile the created [`PrometheusRule`](https - We should look into how to implement [Multiwindow, Multi-Burn-Rate Alerts](https://sre.google/workbook/alerting-on-slos/#6-multiwindow-multi-burn-rate-alerts) based on the OpenSLO spec - [Objectives](https://github.com/OpenSLO/OpenSLO#objectives) are cool + +## Design + +```mermaid +--- +Title: OSKO Dependency Graph +--- +flowchart LR; +subgraph userspace +sloObject(SLO) +sliObject(SLI) +dataSourceObject(DataSource) +end +subgraph controllerspace +prometheusRuleObject(PrometheusRule) +end + +sloController(SLO Controller) +mimirRuleController(Mimir Rule Controller) +sliController(SLI Controller) +dataSourceController(DataSource Controller) + +subgraph external +mimir[Mimir] +cortex[Cortex] +end + +cortexRuleController(Optional: Cortex Rule Controller) +cortexRuleController --> |Watch| prometheusRuleObject +cortexRuleController --> |Updates| cortex + +mimirRuleController --> |Watch| prometheusRuleObject +mimirRuleController --> |Updates| mimir + +sloController --> |Own| sloObject +sloController --> |Watch| sliObject +sloController --> |Watch| dataSourceObject +sloController --> |Own| prometheusRuleObject + +sliController --> |Own| sliObject +sliController --> |Watch| dataSourceObject + +dataSourceController --> |Own| dataSourceObject + +sloObject --> |Reference| sliObject +sliObject --> |Reference| dataSourceObject +%% reference slo -> datasource asi netreba, to bereme na zaklade SLIs ne? Dela to pak +%% hnusnej graf :D, kdyztak zkus odkomentovat +%% sloObject --> |Reference| dataSourceObject +%% prometheusRuleObject --> |Reference| dataSourceObject + + + +``` diff --git a/Dockerfile b/Dockerfile index 1306111..cab2a5b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -13,7 +13,7 @@ RUN go mod download # Copy the go source COPY cmd/main.go cmd/main.go -COPY apis/ apis/ +COPY api/ api/ COPY internal/ internal/ # Build diff --git a/Makefile b/Makefile index e0e1ee3..428cf90 100644 --- a/Makefile +++ b/Makefile @@ -74,6 +74,10 @@ build: manifests generate fmt vet ## Build manager binary. run: manifests generate fmt vet ## Run a controller from your host. go run ./cmd/main.go +.PHONY: run-pretty-debug +run-pretty-debug: manifests generate fmt vet ## Run a controller from your host with pretty debug output. + go run ./cmd/main.go --zap-log-level=debug 2>&1 | zap-pretty + # If you wish built the manager image targeting other platforms you can use the --platform flag. # (i.e. docker build --platform linux/arm64 ). However, you must enable docker buildKit for it. # More info: https://docs.docker.com/develop/develop-images/build_enhancements/ diff --git a/PROJECT b/PROJECT index 6d0a21b..63d8da3 100644 --- a/PROJECT +++ b/PROJECT @@ -16,7 +16,7 @@ resources: domain: openslo group: openslo kind: Datasource - path: github.com/oskoperator/osko/apis/openslo/v1 + path: github.com/oskoperator/osko/api/openslo/v1 version: v1 - api: crdVersion: v1 @@ -25,7 +25,7 @@ resources: domain: openslo group: openslo kind: SLO - path: github.com/oskoperator/osko/apis/openslo/v1 + path: github.com/oskoperator/osko/api/openslo/v1 version: v1 - api: crdVersion: v1 @@ -34,7 +34,7 @@ resources: domain: openslo group: openslo kind: SLI - path: github.com/oskoperator/osko/apis/openslo/v1 + path: github.com/oskoperator/osko/api/openslo/v1 version: v1 - api: crdVersion: v1 @@ -43,7 +43,7 @@ resources: domain: openslo group: openslo kind: AlertPolicy - path: github.com/oskoperator/osko/apis/openslo/v1 + path: github.com/oskoperator/osko/api/openslo/v1 version: v1 - api: crdVersion: v1 @@ -52,7 +52,7 @@ resources: domain: openslo group: openslo kind: AlertCondition - path: github.com/oskoperator/osko/apis/openslo/v1 + path: github.com/oskoperator/osko/api/openslo/v1 version: v1 - api: crdVersion: v1 @@ -61,7 +61,7 @@ resources: domain: openslo group: openslo kind: AlertNotificationTarget - path: github.com/oskoperator/osko/apis/openslo/v1 + path: github.com/oskoperator/osko/api/openslo/v1 version: v1 - api: crdVersion: v1 @@ -69,7 +69,7 @@ resources: domain: openslo group: openslo kind: Service - path: github.com/oskoperator/osko/apis/openslo/v1 + path: github.com/oskoperator/osko/api/openslo/v1 version: v1 - controller: true domain: openslo @@ -81,4 +81,13 @@ resources: group: monitoring.coreos.com kind: PrometheusRule version: v1 +- api: + crdVersion: v1 + namespaced: true + controller: true + domain: openslo + group: osko + kind: MimirRule + path: github.com/oskoperator/osko/api/osko/v1alpha1 + version: v1alpha1 version: "3" diff --git a/apis/openslo/v1/alertcondition_types.go b/api/openslo/v1/alertcondition_types.go similarity index 100% rename from apis/openslo/v1/alertcondition_types.go rename to api/openslo/v1/alertcondition_types.go diff --git a/apis/openslo/v1/alertnotificationtarget_types.go b/api/openslo/v1/alertnotificationtarget_types.go similarity index 100% rename from apis/openslo/v1/alertnotificationtarget_types.go rename to api/openslo/v1/alertnotificationtarget_types.go diff --git a/apis/openslo/v1/alertpolicy_types.go b/api/openslo/v1/alertpolicy_types.go similarity index 100% rename from apis/openslo/v1/alertpolicy_types.go rename to api/openslo/v1/alertpolicy_types.go diff --git a/apis/openslo/v1/common_types.go b/api/openslo/v1/common_types.go similarity index 100% rename from apis/openslo/v1/common_types.go rename to api/openslo/v1/common_types.go diff --git a/apis/openslo/v1/datasource_types.go b/api/openslo/v1/datasource_types.go similarity index 96% rename from apis/openslo/v1/datasource_types.go rename to api/openslo/v1/datasource_types.go index ef9b927..8d68bec 100644 --- a/apis/openslo/v1/datasource_types.go +++ b/api/openslo/v1/datasource_types.go @@ -1,7 +1,7 @@ package v1 import ( - osko "github.com/oskoperator/osko/apis/osko/v1alpha1" + osko "github.com/oskoperator/osko/api/osko/v1alpha1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) diff --git a/apis/openslo/v1/groupversion_info.go b/api/openslo/v1/groupversion_info.go similarity index 100% rename from apis/openslo/v1/groupversion_info.go rename to api/openslo/v1/groupversion_info.go diff --git a/apis/openslo/v1/service_types.go b/api/openslo/v1/service_types.go similarity index 100% rename from apis/openslo/v1/service_types.go rename to api/openslo/v1/service_types.go diff --git a/apis/openslo/v1/sli_types.go b/api/openslo/v1/sli_types.go similarity index 100% rename from apis/openslo/v1/sli_types.go rename to api/openslo/v1/sli_types.go diff --git a/apis/openslo/v1/slo_types.go b/api/openslo/v1/slo_types.go similarity index 96% rename from apis/openslo/v1/slo_types.go rename to api/openslo/v1/slo_types.go index 8d49540..43c3fe5 100644 --- a/apis/openslo/v1/slo_types.go +++ b/api/openslo/v1/slo_types.go @@ -77,7 +77,7 @@ type SLOStatus struct { //+kubebuilder:object:root=true //+kubebuilder:subresource:status -//+kubebuilder:printcolumn:name="Status",type=string,JSONPath=.status.ready,description="The reason for the current status of the SLO resource" +//+kubebuilder:printcolumn:name="Ready",type=string,JSONPath=.status.ready,description="The reason for the current status of the SLO resource" //+kubebuilder:printcolumn:name="Window",type=string,JSONPath=.spec.timeWindow[0].duration,description="The time window for the SLO resource" //+kubebuilder:printcolumn:name="Age",type=date,JSONPath=.metadata.creationTimestamp,description="The time when the SLO resource was created" diff --git a/apis/openslo/v1/zz_generated.deepcopy.go b/api/openslo/v1/zz_generated.deepcopy.go similarity index 99% rename from apis/openslo/v1/zz_generated.deepcopy.go rename to api/openslo/v1/zz_generated.deepcopy.go index 89cbc48..719fb01 100644 --- a/apis/openslo/v1/zz_generated.deepcopy.go +++ b/api/openslo/v1/zz_generated.deepcopy.go @@ -6,7 +6,7 @@ package v1 import ( - "github.com/oskoperator/osko/apis/osko/v1alpha1" + "github.com/oskoperator/osko/api/osko/v1alpha1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" runtime "k8s.io/apimachinery/pkg/runtime" ) diff --git a/apis/osko/v1alpha1/common_types.go b/api/osko/v1alpha1/common_types.go similarity index 100% rename from apis/osko/v1alpha1/common_types.go rename to api/osko/v1alpha1/common_types.go diff --git a/apis/osko/v1alpha1/connection_details.go b/api/osko/v1alpha1/connection_details.go similarity index 100% rename from apis/osko/v1alpha1/connection_details.go rename to api/osko/v1alpha1/connection_details.go diff --git a/apis/osko/v1alpha1/cortex_types.go b/api/osko/v1alpha1/cortex_types.go similarity index 100% rename from apis/osko/v1alpha1/cortex_types.go rename to api/osko/v1alpha1/cortex_types.go diff --git a/apis/osko/v1alpha1/groupversion_info.go b/api/osko/v1alpha1/groupversion_info.go similarity index 80% rename from apis/osko/v1alpha1/groupversion_info.go rename to api/osko/v1alpha1/groupversion_info.go index 7f4ee47..08a7c18 100644 --- a/apis/osko/v1alpha1/groupversion_info.go +++ b/api/osko/v1alpha1/groupversion_info.go @@ -1,6 +1,6 @@ // Package v1alpha1 contains API Schema definitions for the slo-kubernetes-operator v1alpha1 API group // +kubebuilder:object:generate=true -// +groupName=slo-kubernetes-operator.openslo +// +groupName=osko.dev package v1alpha1 import ( @@ -10,7 +10,7 @@ import ( var ( // GroupVersion is group version used to register these objects - GroupVersion = schema.GroupVersion{Group: "slo-kubernetes-operator.openslo", Version: "v1alpha1"} + GroupVersion = schema.GroupVersion{Group: "osko.dev", Version: "v1alpha1"} // SchemeBuilder is used to add go types to the GroupVersionKind scheme SchemeBuilder = &scheme.Builder{GroupVersion: GroupVersion} diff --git a/apis/osko/v1alpha1/mimir_types.go b/api/osko/v1alpha1/mimir_types.go similarity index 100% rename from apis/osko/v1alpha1/mimir_types.go rename to api/osko/v1alpha1/mimir_types.go diff --git a/api/osko/v1alpha1/mimirrule_types.go b/api/osko/v1alpha1/mimirrule_types.go new file mode 100644 index 0000000..97a45b2 --- /dev/null +++ b/api/osko/v1alpha1/mimirrule_types.go @@ -0,0 +1,69 @@ +package v1alpha1 + +import ( + "github.com/prometheus/common/model" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +// EDIT THIS FILE! THIS IS SCAFFOLDING FOR YOU TO OWN! +// NOTE: json tags are required. Any new fields you add must have json tags for the fields to be serialized. + +// MimirRuleSpec defines the desired state of MimirRule +type MimirRuleSpec struct { + // Groups is an example field of MimirRule. Edit mimirrule_types.go to remove/update + Groups []RuleGroup `json:"groups"` +} + +// MimirRuleStatus defines the observed state of MimirRule +type MimirRuleStatus struct { + Conditions []metav1.Condition `json:"conditions,omitempty"` + LastEvaluationTime metav1.Time `json:"lastEvaluationTime,omitempty"` + Ready string `json:"ready,omitempty"` +} + +type RuleGroup struct { + Name string `json:"name"` + SourceTenants []string `json:"source_tenants,omitempty"` + Rules []Rule `json:"rules"` + Interval model.Duration `json:"interval,omitempty"` + EvaluationDelay *model.Duration `json:"evaluation_delay,omitempty"` + Limit int `json:"limit,omitempty"` + AlignEvaluationTimeOnInterval bool `json:"align_evaluation_time_on_interval,omitempty"` +} + +type Rule struct { + Record string `json:"record,omitempty"` + Alert string `json:"alert,omitempty"` + Expr string `json:"expr"` + For model.Duration `json:"for,omitempty"` + KeepFiringFor model.Duration `json:"keep_firing_for,omitempty"` + Labels map[string]string `json:"labels,omitempty"` + Annotations map[string]string `json:"annotations,omitempty"` +} + +//+kubebuilder:object:root=true +//+kubebuilder:subresource:status +//+kubebuilder:printcolumn:name="Ready",type=string,JSONPath=.status.ready,description="The reason for the current status of the MimirRule resource" +//+kubebuilder:printcolumn:name="Age",type=date,JSONPath=`.metadata.creationTimestamp` + +// MimirRule is the Schema for the mimirrules API +type MimirRule struct { + metav1.TypeMeta `json:",inline"` + metav1.ObjectMeta `json:"metadata,omitempty"` + + Spec MimirRuleSpec `json:"spec,omitempty"` + Status MimirRuleStatus `json:"status,omitempty"` +} + +//+kubebuilder:object:root=true + +// MimirRuleList contains a list of MimirRule +type MimirRuleList struct { + metav1.TypeMeta `json:",inline"` + metav1.ListMeta `json:"metadata,omitempty"` + Items []MimirRule `json:"items"` +} + +func init() { + SchemeBuilder.Register(&MimirRule{}, &MimirRuleList{}) +} diff --git a/api/osko/v1alpha1/zz_generated.deepcopy.go b/api/osko/v1alpha1/zz_generated.deepcopy.go new file mode 100644 index 0000000..48ea7dc --- /dev/null +++ b/api/osko/v1alpha1/zz_generated.deepcopy.go @@ -0,0 +1,266 @@ +//go:build !ignore_autogenerated +// +build !ignore_autogenerated + +// Code generated by controller-gen. DO NOT EDIT. + +package v1alpha1 + +import ( + "github.com/prometheus/common/model" + "k8s.io/apimachinery/pkg/apis/meta/v1" + runtime "k8s.io/apimachinery/pkg/runtime" +) + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *ConnectionDetails) DeepCopyInto(out *ConnectionDetails) { + *out = *in + if in.SourceTenants != nil { + in, out := &in.SourceTenants, &out.SourceTenants + *out = make([]string, len(*in)) + copy(*out, *in) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ConnectionDetails. +func (in *ConnectionDetails) DeepCopy() *ConnectionDetails { + if in == nil { + return nil + } + out := new(ConnectionDetails) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *Cortex) DeepCopyInto(out *Cortex) { + *out = *in + out.Ruler = in.Ruler + in.Multitenancy.DeepCopyInto(&out.Multitenancy) +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Cortex. +func (in *Cortex) DeepCopy() *Cortex { + if in == nil { + return nil + } + out := new(Cortex) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *Mimir) DeepCopyInto(out *Mimir) { + *out = *in + out.Ruler = in.Ruler + in.Multitenancy.DeepCopyInto(&out.Multitenancy) +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Mimir. +func (in *Mimir) DeepCopy() *Mimir { + if in == nil { + return nil + } + out := new(Mimir) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *MimirRule) DeepCopyInto(out *MimirRule) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) + in.Spec.DeepCopyInto(&out.Spec) + in.Status.DeepCopyInto(&out.Status) +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MimirRule. +func (in *MimirRule) DeepCopy() *MimirRule { + if in == nil { + return nil + } + out := new(MimirRule) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *MimirRule) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *MimirRuleList) DeepCopyInto(out *MimirRuleList) { + *out = *in + out.TypeMeta = in.TypeMeta + in.ListMeta.DeepCopyInto(&out.ListMeta) + if in.Items != nil { + in, out := &in.Items, &out.Items + *out = make([]MimirRule, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MimirRuleList. +func (in *MimirRuleList) DeepCopy() *MimirRuleList { + if in == nil { + return nil + } + out := new(MimirRuleList) + in.DeepCopyInto(out) + return out +} + +// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object. +func (in *MimirRuleList) DeepCopyObject() runtime.Object { + if c := in.DeepCopy(); c != nil { + return c + } + return nil +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *MimirRuleSpec) DeepCopyInto(out *MimirRuleSpec) { + *out = *in + if in.Groups != nil { + in, out := &in.Groups, &out.Groups + *out = make([]RuleGroup, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MimirRuleSpec. +func (in *MimirRuleSpec) DeepCopy() *MimirRuleSpec { + if in == nil { + return nil + } + out := new(MimirRuleSpec) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *MimirRuleStatus) DeepCopyInto(out *MimirRuleStatus) { + *out = *in + if in.Conditions != nil { + in, out := &in.Conditions, &out.Conditions + *out = make([]v1.Condition, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } + in.LastEvaluationTime.DeepCopyInto(&out.LastEvaluationTime) +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MimirRuleStatus. +func (in *MimirRuleStatus) DeepCopy() *MimirRuleStatus { + if in == nil { + return nil + } + out := new(MimirRuleStatus) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *Multitenancy) DeepCopyInto(out *Multitenancy) { + *out = *in + if in.SourceTenants != nil { + in, out := &in.SourceTenants, &out.SourceTenants + *out = make([]string, len(*in)) + copy(*out, *in) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Multitenancy. +func (in *Multitenancy) DeepCopy() *Multitenancy { + if in == nil { + return nil + } + out := new(Multitenancy) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *Rule) DeepCopyInto(out *Rule) { + *out = *in + if in.Labels != nil { + in, out := &in.Labels, &out.Labels + *out = make(map[string]string, len(*in)) + for key, val := range *in { + (*out)[key] = val + } + } + if in.Annotations != nil { + in, out := &in.Annotations, &out.Annotations + *out = make(map[string]string, len(*in)) + for key, val := range *in { + (*out)[key] = val + } + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Rule. +func (in *Rule) DeepCopy() *Rule { + if in == nil { + return nil + } + out := new(Rule) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *RuleGroup) DeepCopyInto(out *RuleGroup) { + *out = *in + if in.SourceTenants != nil { + in, out := &in.SourceTenants, &out.SourceTenants + *out = make([]string, len(*in)) + copy(*out, *in) + } + if in.Rules != nil { + in, out := &in.Rules, &out.Rules + *out = make([]Rule, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } + if in.EvaluationDelay != nil { + in, out := &in.EvaluationDelay, &out.EvaluationDelay + *out = new(model.Duration) + **out = **in + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RuleGroup. +func (in *RuleGroup) DeepCopy() *RuleGroup { + if in == nil { + return nil + } + out := new(RuleGroup) + in.DeepCopyInto(out) + return out +} + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *Ruler) DeepCopyInto(out *Ruler) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Ruler. +func (in *Ruler) DeepCopy() *Ruler { + if in == nil { + return nil + } + out := new(Ruler) + in.DeepCopyInto(out) + return out +} diff --git a/apis/osko/v1alpha1/zz_generated.deepcopy.go b/apis/osko/v1alpha1/zz_generated.deepcopy.go deleted file mode 100644 index a067698..0000000 --- a/apis/osko/v1alpha1/zz_generated.deepcopy.go +++ /dev/null @@ -1,97 +0,0 @@ -//go:build !ignore_autogenerated -// +build !ignore_autogenerated - -// Code generated by controller-gen. DO NOT EDIT. - -package v1alpha1 - -import () - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *ConnectionDetails) DeepCopyInto(out *ConnectionDetails) { - *out = *in - if in.SourceTenants != nil { - in, out := &in.SourceTenants, &out.SourceTenants - *out = make([]string, len(*in)) - copy(*out, *in) - } -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ConnectionDetails. -func (in *ConnectionDetails) DeepCopy() *ConnectionDetails { - if in == nil { - return nil - } - out := new(ConnectionDetails) - in.DeepCopyInto(out) - return out -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *Cortex) DeepCopyInto(out *Cortex) { - *out = *in - out.Ruler = in.Ruler - in.Multitenancy.DeepCopyInto(&out.Multitenancy) -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Cortex. -func (in *Cortex) DeepCopy() *Cortex { - if in == nil { - return nil - } - out := new(Cortex) - in.DeepCopyInto(out) - return out -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *Mimir) DeepCopyInto(out *Mimir) { - *out = *in - out.Ruler = in.Ruler - in.Multitenancy.DeepCopyInto(&out.Multitenancy) -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Mimir. -func (in *Mimir) DeepCopy() *Mimir { - if in == nil { - return nil - } - out := new(Mimir) - in.DeepCopyInto(out) - return out -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *Multitenancy) DeepCopyInto(out *Multitenancy) { - *out = *in - if in.SourceTenants != nil { - in, out := &in.SourceTenants, &out.SourceTenants - *out = make([]string, len(*in)) - copy(*out, *in) - } -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Multitenancy. -func (in *Multitenancy) DeepCopy() *Multitenancy { - if in == nil { - return nil - } - out := new(Multitenancy) - in.DeepCopyInto(out) - return out -} - -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *Ruler) DeepCopyInto(out *Ruler) { - *out = *in -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Ruler. -func (in *Ruler) DeepCopy() *Ruler { - if in == nil { - return nil - } - out := new(Ruler) - in.DeepCopyInto(out) - return out -} diff --git a/cmd/main.go b/cmd/main.go index 5cd628b..2f569d2 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -18,11 +18,11 @@ import ( "sigs.k8s.io/controller-runtime/pkg/log/zap" metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server" - openslov1 "github.com/oskoperator/osko/apis/openslo/v1" - oskov1alpha1 "github.com/oskoperator/osko/apis/osko/v1alpha1" + openslov1 "github.com/oskoperator/osko/api/openslo/v1" + oskov1alpha1 "github.com/oskoperator/osko/api/osko/v1alpha1" - monitoringcoreoscomcontroller "github.com/oskoperator/osko/internal/controller/monitoring.coreos.com" openslov1controller "github.com/oskoperator/osko/internal/controller/openslo" + oskocontroller "github.com/oskoperator/osko/internal/controller/osko" //+kubebuilder:scaffold:imports ) @@ -124,11 +124,12 @@ func main() { setupLog.Error(err, "unable to create controller", "controller", "AlertNotificationTarget") os.Exit(1) } - if err = (&monitoringcoreoscomcontroller.PrometheusRuleReconciler{ - Client: mgr.GetClient(), - Scheme: mgr.GetScheme(), + if err = (&oskocontroller.MimirRuleReconciler{ + Client: mgr.GetClient(), + Scheme: mgr.GetScheme(), + Recorder: mgr.GetEventRecorderFor("mimirrule-controller"), }).SetupWithManager(mgr); err != nil { - setupLog.Error(err, "unable to create controller", "controller", "PrometheusRule") + setupLog.Error(err, "unable to create controller", "controller", "MimirRule") os.Exit(1) } //+kubebuilder:scaffold:builder diff --git a/config/crd/bases/openslo.com_slos.yaml b/config/crd/bases/openslo.com_slos.yaml index 69b31eb..26ac106 100644 --- a/config/crd/bases/openslo.com_slos.yaml +++ b/config/crd/bases/openslo.com_slos.yaml @@ -17,7 +17,7 @@ spec: - additionalPrinterColumns: - description: The reason for the current status of the SLO resource jsonPath: .status.ready - name: Status + name: Ready type: string - description: The time window for the SLO resource jsonPath: .spec.timeWindow[0].duration diff --git a/config/crd/bases/osko.dev_mimirrules.yaml b/config/crd/bases/osko.dev_mimirrules.yaml new file mode 100644 index 0000000..c3a5807 --- /dev/null +++ b/config/crd/bases/osko.dev_mimirrules.yaml @@ -0,0 +1,196 @@ +--- +apiVersion: apiextensions.k8s.io/v1 +kind: CustomResourceDefinition +metadata: + annotations: + controller-gen.kubebuilder.io/version: v0.12.0 + name: mimirrules.osko.dev +spec: + group: osko.dev + names: + kind: MimirRule + listKind: MimirRuleList + plural: mimirrules + singular: mimirrule + scope: Namespaced + versions: + - additionalPrinterColumns: + - description: The reason for the current status of the MimirRule resource + jsonPath: .status.ready + name: Ready + type: string + - jsonPath: .metadata.creationTimestamp + name: Age + type: date + name: v1alpha1 + schema: + openAPIV3Schema: + description: MimirRule is the Schema for the mimirrules API + properties: + apiVersion: + description: 'APIVersion defines the versioned schema of this representation + of an object. Servers should convert recognized schemas to the latest + internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources' + type: string + kind: + description: 'Kind is a string value representing the REST resource this + object represents. Servers may infer this from the endpoint the client + submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds' + type: string + metadata: + type: object + spec: + description: MimirRuleSpec defines the desired state of MimirRule + properties: + groups: + description: Groups is an example field of MimirRule. Edit mimirrule_types.go + to remove/update + items: + properties: + align_evaluation_time_on_interval: + type: boolean + evaluation_delay: + description: Duration wraps time.Duration. It is used to parse + the custom duration format from YAML. This type should not + propagate beyond the scope of input/output processing. + format: int64 + type: integer + interval: + description: Duration wraps time.Duration. It is used to parse + the custom duration format from YAML. This type should not + propagate beyond the scope of input/output processing. + format: int64 + type: integer + limit: + type: integer + name: + type: string + rules: + items: + properties: + alert: + type: string + annotations: + additionalProperties: + type: string + type: object + expr: + type: string + for: + description: Duration wraps time.Duration. It is used + to parse the custom duration format from YAML. This + type should not propagate beyond the scope of input/output + processing. + format: int64 + type: integer + keep_firing_for: + description: Duration wraps time.Duration. It is used + to parse the custom duration format from YAML. This + type should not propagate beyond the scope of input/output + processing. + format: int64 + type: integer + labels: + additionalProperties: + type: string + type: object + record: + type: string + required: + - expr + type: object + type: array + source_tenants: + items: + type: string + type: array + required: + - name + - rules + type: object + type: array + required: + - groups + type: object + status: + description: MimirRuleStatus defines the observed state of MimirRule + properties: + conditions: + items: + description: "Condition contains details for one aspect of the current + state of this API Resource. --- This struct is intended for direct + use as an array at the field path .status.conditions. For example, + \n type FooStatus struct{ // Represents the observations of a + foo's current state. // Known .status.conditions.type are: \"Available\", + \"Progressing\", and \"Degraded\" // +patchMergeKey=type // +patchStrategy=merge + // +listType=map // +listMapKey=type Conditions []metav1.Condition + `json:\"conditions,omitempty\" patchStrategy:\"merge\" patchMergeKey:\"type\" + protobuf:\"bytes,1,rep,name=conditions\"` \n // other fields }" + properties: + lastTransitionTime: + description: lastTransitionTime is the last time the condition + transitioned from one status to another. This should be when + the underlying condition changed. If that is not known, then + using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: message is a human readable message indicating + details about the transition. This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: observedGeneration represents the .metadata.generation + that the condition was set based upon. For instance, if .metadata.generation + is currently 12, but the .status.conditions[x].observedGeneration + is 9, the condition is out of date with respect to the current + state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: reason contains a programmatic identifier indicating + the reason for the condition's last transition. Producers + of specific condition types may define expected values and + meanings for this field, and whether the values are considered + a guaranteed API. The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: type of condition in CamelCase or in foo.example.com/CamelCase. + --- Many .condition.type values are consistent across resources + like Available, but because arbitrary conditions can be useful + (see .node.status.conditions), the ability to deconflict is + important. The regex it matches is (dns1123SubdomainFmt/)?(qualifiedNameFmt) + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + lastEvaluationTime: + format: date-time + type: string + ready: + type: string + type: object + type: object + served: true + storage: true + subresources: + status: {} diff --git a/config/crd/bases/slo-kubernetes-operator.openslo_slos.yaml b/config/crd/bases/slo-kubernetes-operator.openslo_slos.yaml deleted file mode 100644 index 94cacbf..0000000 --- a/config/crd/bases/slo-kubernetes-operator.openslo_slos.yaml +++ /dev/null @@ -1,435 +0,0 @@ ---- -apiVersion: apiextensions.k8s.io/v1 -kind: CustomResourceDefinition -metadata: - annotations: - controller-gen.kubebuilder.io/version: v0.12.0 - name: slos.slo-kubernetes-operator.openslo -spec: - group: slo-kubernetes-operator.openslo - names: - kind: SLO - listKind: SLOList - plural: slos - singular: slo - scope: Namespaced - versions: - - name: v1alpha1 - schema: - openAPIV3Schema: - description: SLO is the Schema for the slos API - properties: - apiVersion: - description: 'APIVersion defines the versioned schema of this representation - of an object. Servers should convert recognized schemas to the latest - internal value, and may reject unrecognized values. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources' - type: string - kind: - description: 'Kind is a string value representing the REST resource this - object represents. Servers may infer this from the endpoint the client - submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds' - type: string - metadata: - type: object - spec: - description: SLOSpec defines the desired state of SLO - properties: - dataSink: - type: string - spec: - description: SLOSpec defines the desired state of SLO - properties: - alertPolicies: - items: - properties: - alertPolicyRef: - type: string - kind: - enum: - - AlertPolicy - type: string - metadata: - properties: - annotations: - additionalProperties: - type: string - type: object - finalizers: - items: - type: string - type: array - labels: - additionalProperties: - type: string - type: object - name: - type: string - namespace: - type: string - type: object - spec: - description: AlertPolicySpec defines the desired state of - AlertPolicy - properties: - alertWhenBreaching: - type: boolean - alertWhenNoData: - type: boolean - alertWhenResolved: - type: boolean - conditions: - items: - properties: - conditionRef: - type: string - kind: - enum: - - AlertCondition - type: string - metadata: - properties: - annotations: - additionalProperties: - type: string - type: object - finalizers: - items: - type: string - type: array - labels: - additionalProperties: - type: string - type: object - name: - type: string - namespace: - type: string - type: object - spec: - description: AlertConditionSpec defines the desired - state of AlertCondition - properties: - condition: - properties: - alertAfter: - pattern: ^[1-9]\d*[s m h d]$ - type: string - kind: - enum: - - Burnrate - type: string - lookbackWindow: - pattern: ^[1-9]\d*[s m h d]$ - type: string - op: - enum: - - lte - - gte - - lt - - gt - type: string - threshold: - type: string - type: object - description: - maxLength: 1050 - type: string - severity: - type: string - type: object - type: object - maxItems: 1 - type: array - description: - maxLength: 1050 - type: string - notificationTargets: - items: - properties: - kind: - enum: - - AlertNotificationTarget - type: string - metadata: - properties: - annotations: - additionalProperties: - type: string - type: object - finalizers: - items: - type: string - type: array - labels: - additionalProperties: - type: string - type: object - name: - type: string - namespace: - type: string - type: object - spec: - description: AlertNotificationTargetSpec defines - the desired state of AlertNotificationTarget - properties: - description: - maxLength: 1050 - type: string - target: - type: string - type: object - targetRef: - type: string - type: object - type: array - type: object - type: object - type: array - budgetingMethod: - enum: - - Occurrences - - Timeslices - - RatioTimeslices - type: string - description: - maxLength: 1050 - type: string - indicator: - description: SLISpec defines the desired state of SLI - properties: - description: - maxLength: 1050 - type: string - ratioMetric: - properties: - bad: - additionalProperties: - properties: - metricSourceRef: - type: string - spec: - type: string - type: - type: string - type: object - type: object - counter: - type: boolean - good: - additionalProperties: - properties: - metricSourceRef: - type: string - spec: - type: string - type: - type: string - type: object - type: object - raw: - additionalProperties: - properties: - metricSourceRef: - type: string - spec: - type: string - type: - type: string - type: object - type: object - rawType: - enum: - - success - - failure - type: string - total: - additionalProperties: - properties: - metricSourceRef: - type: string - spec: - type: string - type: - type: string - type: object - type: object - type: object - thresholdMetric: - properties: - metricSource: - properties: - metricSourceRef: - type: string - spec: - type: string - type: - type: string - type: object - type: object - type: object - indicatorRef: - type: string - objectives: - items: - properties: - compositeWeight: - anyOf: - - type: integer - - type: string - pattern: ^(\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))(([KMGTPE]i)|[numkMGTPE]|([eE](\+|-)?(([0-9]+(\.[0-9]*)?)|(\.[0-9]+))))?$ - x-kubernetes-int-or-string: true - displayName: - type: string - indicator: - properties: - metadata: - properties: - annotations: - additionalProperties: - type: string - type: object - finalizers: - items: - type: string - type: array - labels: - additionalProperties: - type: string - type: object - name: - type: string - namespace: - type: string - type: object - spec: - description: SLISpec defines the desired state of SLI - properties: - description: - maxLength: 1050 - type: string - ratioMetric: - properties: - bad: - additionalProperties: - properties: - metricSourceRef: - type: string - spec: - type: string - type: - type: string - type: object - type: object - counter: - type: boolean - good: - additionalProperties: - properties: - metricSourceRef: - type: string - spec: - type: string - type: - type: string - type: object - type: object - raw: - additionalProperties: - properties: - metricSourceRef: - type: string - spec: - type: string - type: - type: string - type: object - type: object - rawType: - enum: - - success - - failure - type: string - total: - additionalProperties: - properties: - metricSourceRef: - type: string - spec: - type: string - type: - type: string - type: object - type: object - type: object - thresholdMetric: - properties: - metricSource: - properties: - metricSourceRef: - type: string - spec: - type: string - type: - type: string - type: object - type: object - type: object - type: object - indicatorRef: - type: string - op: - enum: - - lte - - gte - - lt - - gt - type: string - target: - type: string - targetPercent: - type: string - timeSliceTarget: - type: string - timeSliceWindow: - pattern: ^[1-9]\d*[s m h d]$ - type: string - value: - type: string - type: object - type: array - service: - type: string - timeWindow: - items: - properties: - calendar: - properties: - startTime: - description: Date with time in 24h format, format without - time zone - example: "2020-01-21 12:30:00" - type: string - timeZone: - description: Name as in IANA Time Zone Database - example: America/New_York - type: string - type: object - duration: - pattern: ^[1-9]\d*[s m h d]$ - type: string - isRolling: - type: boolean - type: object - maxItems: 1 - type: array - type: object - type: object - status: - description: SLOStatus defines the observed state of SLO - type: object - type: object - served: true - storage: true - subresources: - status: {} diff --git a/config/crd/kustomization.yaml b/config/crd/kustomization.yaml index 74089d1..74b152a 100644 --- a/config/crd/kustomization.yaml +++ b/config/crd/kustomization.yaml @@ -9,7 +9,7 @@ resources: - bases/openslo.com_alertconditions.yaml - bases/openslo.com_alertnotificationtargets.yaml - bases/openslo.com_services.yaml -- bases/slo-kubernetes-operator.openslo_slos.yaml +- bases/osko.openslo_mimirrules.yaml #+kubebuilder:scaffold:crdkustomizeresource patches: [] @@ -22,6 +22,7 @@ patches: [] #- patches/webhook_in_alertconditions.yaml #- patches/webhook_in_alertnotificationtargets.yaml #- patches/webhook_in_services.yaml +#- path: patches/webhook_in_osko_mimirrules.yaml #+kubebuilder:scaffold:crdkustomizewebhookpatch # [CERTMANAGER] To enable cert-manager, uncomment all the sections with [CERTMANAGER] prefix. @@ -33,6 +34,7 @@ patches: [] #- patches/cainjection_in_alertconditions.yaml #- patches/cainjection_in_alertnotificationtargets.yaml #- patches/cainjection_in_services.yaml +#- path: patches/cainjection_in_osko_mimirrules.yaml #+kubebuilder:scaffold:crdkustomizecainjectionpatch # the following config is for teaching kustomize how to do kustomization for CRDs. diff --git a/config/rbac/osko_mimirrule_editor_role.yaml b/config/rbac/osko_mimirrule_editor_role.yaml new file mode 100644 index 0000000..1f254a1 --- /dev/null +++ b/config/rbac/osko_mimirrule_editor_role.yaml @@ -0,0 +1,31 @@ +# permissions for end users to edit mimirrules. +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/name: clusterrole + app.kubernetes.io/instance: mimirrule-editor-role + app.kubernetes.io/component: rbac + app.kubernetes.io/created-by: osko + app.kubernetes.io/part-of: osko + app.kubernetes.io/managed-by: kustomize + name: mimirrule-editor-role +rules: +- apiGroups: + - osko.openslo + resources: + - mimirrules + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - osko.openslo + resources: + - mimirrules/status + verbs: + - get diff --git a/config/rbac/osko_mimirrule_viewer_role.yaml b/config/rbac/osko_mimirrule_viewer_role.yaml new file mode 100644 index 0000000..80d6f79 --- /dev/null +++ b/config/rbac/osko_mimirrule_viewer_role.yaml @@ -0,0 +1,27 @@ +# permissions for end users to view mimirrules. +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + labels: + app.kubernetes.io/name: clusterrole + app.kubernetes.io/instance: mimirrule-viewer-role + app.kubernetes.io/component: rbac + app.kubernetes.io/created-by: osko + app.kubernetes.io/part-of: osko + app.kubernetes.io/managed-by: kustomize + name: mimirrule-viewer-role +rules: +- apiGroups: + - osko.openslo + resources: + - mimirrules + verbs: + - get + - list + - watch +- apiGroups: + - osko.openslo + resources: + - mimirrules/status + verbs: + - get diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml index edf35cc..3414989 100644 --- a/config/rbac/role.yaml +++ b/config/rbac/role.yaml @@ -193,3 +193,29 @@ rules: - get - patch - update +- apiGroups: + - osko.openslo + resources: + - mimirrules + verbs: + - create + - delete + - get + - list + - patch + - update + - watch +- apiGroups: + - osko.openslo + resources: + - mimirrules/finalizers + verbs: + - update +- apiGroups: + - osko.openslo + resources: + - mimirrules/status + verbs: + - get + - patch + - update diff --git a/config/samples/kustomization.yaml b/config/samples/kustomization.yaml index ac7fe54..7c393f1 100644 --- a/config/samples/kustomization.yaml +++ b/config/samples/kustomization.yaml @@ -7,5 +7,5 @@ resources: - openslo_v1_alertcondition.yaml - openslo_v1_alertnotificationtarget.yaml - openslo_v1_service.yaml -- slo-kubernetes-operator_v1alpha1_slo.yaml +- osko_v1alpha1_mimirrule.yaml #+kubebuilder:scaffold:manifestskustomizesamples diff --git a/config/samples/osko_v1alpha1_mimirrule.yaml b/config/samples/osko_v1alpha1_mimirrule.yaml new file mode 100644 index 0000000..79ab404 --- /dev/null +++ b/config/samples/osko_v1alpha1_mimirrule.yaml @@ -0,0 +1,12 @@ +apiVersion: osko.openslo/v1alpha1 +kind: MimirRule +metadata: + labels: + app.kubernetes.io/name: mimirrule + app.kubernetes.io/instance: mimirrule-sample + app.kubernetes.io/part-of: osko + app.kubernetes.io/managed-by: kustomize + app.kubernetes.io/created-by: osko + name: mimirrule-sample +spec: + # TODO(user): Add fields here diff --git a/config/samples/slo-kubernetes-operator_v1alpha1_slo.yaml b/config/samples/slo-kubernetes-operator_v1alpha1_slo.yaml deleted file mode 100644 index e23afdf..0000000 --- a/config/samples/slo-kubernetes-operator_v1alpha1_slo.yaml +++ /dev/null @@ -1,12 +0,0 @@ -apiVersion: slo-kubernetes-operator.openslo/v1alpha1 -kind: SLO -metadata: - labels: - app.kubernetes.io/name: slo - app.kubernetes.io/instance: slo-sample - app.kubernetes.io/part-of: slo-kubernetes-operator - app.kubernetes.io/managed-by: kustomize - app.kubernetes.io/created-by: slo-kubernetes-operator - name: slo-sample -spec: - # TODO(user): Add fields here diff --git a/go.mod b/go.mod index 433e621..6c163db 100644 --- a/go.mod +++ b/go.mod @@ -7,7 +7,6 @@ require ( github.com/grafana/mimir v0.0.0-20231101181902-68d120862184 github.com/onsi/ginkgo/v2 v2.11.0 github.com/onsi/gomega v1.27.10 - github.com/opentracing/opentracing-go v1.2.1-0.20220228012449-10b1cf09e00b github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.68.0 github.com/prometheus/client_golang v1.17.0 github.com/prometheus/prometheus v1.99.0 @@ -95,6 +94,7 @@ require ( github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f // indirect github.com/oklog/ulid v1.3.1 // indirect + github.com/opentracing/opentracing-go v1.2.1-0.20220228012449-10b1cf09e00b // indirect github.com/pkg/browser v0.0.0-20210911075715-681adbf594b8 // indirect github.com/pkg/errors v0.9.1 // indirect github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect diff --git a/internal/controller/monitoring.coreos.com/prometheusrule_controller.go b/internal/controller/monitoring.coreos.com/prometheusrule_controller.go index b4624f0..8c9b900 100644 --- a/internal/controller/monitoring.coreos.com/prometheusrule_controller.go +++ b/internal/controller/monitoring.coreos.com/prometheusrule_controller.go @@ -2,30 +2,39 @@ package monitoringcoreoscom import ( "context" - "github.com/go-logr/logr" - "github.com/grafana/mimir/pkg/mimirtool/rules/rwrulefmt" - openslov1 "github.com/oskoperator/osko/apis/openslo/v1" - "github.com/oskoperator/osko/internal/mimirtool" + openslov1 "github.com/oskoperator/osko/api/openslo/v1" + "github.com/oskoperator/osko/internal/helpers" "github.com/oskoperator/osko/internal/utils" monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" - "github.com/prometheus/prometheus/model/rulefmt" - "gopkg.in/yaml.v3" apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/fields" "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/tools/record" + "reflect" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" - "sigs.k8s.io/controller-runtime/pkg/log" + "sigs.k8s.io/controller-runtime/pkg/handler" + ctrllog "sigs.k8s.io/controller-runtime/pkg/log" + "sigs.k8s.io/controller-runtime/pkg/reconcile" +) + +const ( + objectiveRef = ".metaData.ownerReferences.name" ) // PrometheusRuleReconciler reconciles a PrometheusRule object type PrometheusRuleReconciler struct { client.Client - Scheme *runtime.Scheme + Scheme *runtime.Scheme + Recorder record.EventRecorder } -//+kubebuilder:rbac:groups=monitoring.coreos.com,resources=prometheusrules,verbs=get;list;watch;create;update;patch;delete -//+kubebuilder:rbac:groups=monitoring.coreos.com,resources=prometheusrules/status,verbs=get;update;patch -//+kubebuilder:rbac:groups=monitoring.coreos.com,resources=prometheusrules/finalizers,verbs=update +// +kubebuilder:rbac:groups=monitoring.coreos.com,resources=prometheusrules,verbs=get;list;watch;create;update;patch;delete +// +kubebuilder:rbac:groups=monitoring.coreos.com,resources=prometheusrules/status,verbs=get;update;patch +// +kubebuilder:rbac:groups=monitoring.coreos.com,resources=prometheusrules/finalizers,verbs=update +// +kubebuilder:rbac:groups=core,resources=events,verbs=create;patch // Reconcile is part of the main kubernetes reconciliation loop which aims to // move the current state of the cluster closer to the desired state. @@ -37,104 +46,177 @@ type PrometheusRuleReconciler struct { // For more details, check Reconcile and its Result here: // - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.16.0/pkg/reconcile func (r *PrometheusRuleReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { - log := log.FromContext(ctx) + log := ctrllog.FromContext(ctx) + + slo := &openslov1.SLO{} + sli := &openslov1.SLI{} prometheusRule := &monitoringv1.PrometheusRule{} + newPrometheusRule := &monitoringv1.PrometheusRule{} err := r.Get(ctx, req.NamespacedName, prometheusRule) if err != nil { if apierrors.IsNotFound(err) { - log.Info("PrometheusRule deleted") - log.Info("PrometheusRule deleted") + log.Info("PrometheusRule resource not found. Ignoring since object mus be deleted") return ctrl.Result{}, nil } log.Error(err, "Failed to get PrometheusRule") return ctrl.Result{}, err } + for _, ref := range prometheusRule.ObjectMeta.OwnerReferences { + if ref.Kind == "SLO" { + sloNamespacedName := types.NamespacedName{ + Name: ref.Name, + Namespace: req.Namespace, + } + + if err := r.Get(ctx, sloNamespacedName, slo); err != nil { + log.Error(err, "Failed to get SLO") + return ctrl.Result{}, err + } + } + } + ds := &openslov1.Datasource{} - if err := r.Get(context.TODO(), client.ObjectKey{ + if err := r.Get(ctx, client.ObjectKey{ Namespace: prometheusRule.Namespace, Name: "logging-ds", }, ds); err != nil { log.Error(err, "Failed to get Datasource") return ctrl.Result{}, err } - err = r.createMimirRule(log, prometheusRule, ds) + + if apierrors.IsNotFound(err) { + log.Info("PrometheusRule not found. Let's make one.") + prometheusRule, err = helpers.CreatePrometheusRule(slo, sli) + if err != nil { + err = utils.UpdateStatus( + ctx, + slo, + r.Client, + "Ready", + metav1.ConditionFalse, + "Failed to create Prometheus Rule", + ) + if err != nil { + log.Error(err, "Failed to update SLO status") + return ctrl.Result{}, err + } + log.Error(err, "Failed to create new PrometheusRule") + return ctrl.Result{}, err + } + if err := r.Create(ctx, prometheusRule); err != nil { + r.Recorder.Event(slo, "Error", "FailedToCreatePrometheusRule", "Failed to create Prometheus Rule") + if err := r.Status().Update(ctx, prometheusRule); err != nil { + log.Error(err, "Failed to update SLO status") + slo.Status.Ready = "Failed" + if err := r.Status().Update(ctx, slo); err != nil { + log.Error(err, "Failed to update SLO ready status") + return ctrl.Result{}, err + } + return ctrl.Result{}, err + } + } else { + log.Info("PrometheusRule created successfully") + r.Recorder.Event(slo, "Normal", "PrometheusRuleCreated", "PrometheusRule created successfully") + slo.Status.Ready = "True" + if err := r.Status().Update(ctx, slo); err != nil { + log.Error(err, "Failed to update SLO ready status") + return ctrl.Result{}, nil + } + } + } + + // Update PrometheusRule + // This is the main logic for the PrometheusRule update + // Here we should take the existing PrometheusRule and update it with the new one + log.Info("PrometheusRule already exists, we should update it") + newPrometheusRule, err = helpers.CreatePrometheusRule(slo, sli) if err != nil { - log.Error(err, "Failed to create Mimir rule") + log.Error(err, "Failed to create new PrometheusRule") + return ctrl.Result{}, err + } + + compareResult := reflect.DeepEqual(prometheusRule, newPrometheusRule) + if compareResult { + log.Info("PrometheusRule is already up to date") + return ctrl.Result{}, nil + } + + // has to be the same as for previous object, otherwise it will not be updated and throw an error + newPrometheusRule.ResourceVersion = prometheusRule.ResourceVersion + + log.Info("Updating PrometheusRule", "PrometheusRule Name", newPrometheusRule.Name, "PrometheusRule Namespace", newPrometheusRule.Namespace) + if err := r.Update(ctx, newPrometheusRule); err != nil { + log.Error(err, "Failed to update PrometheusRule") + return ctrl.Result{}, err + } + if err := r.Status().Update(ctx, slo); err != nil { + log.Error(err, "Failed to update SLO status") + slo.Status.Ready = "Failed" + if err := r.Status().Update(ctx, slo); err != nil { + log.Error(err, "Failed to update SLO ready status") + return ctrl.Result{}, err + } return ctrl.Result{}, err } - log.Info("Mimir rule created") log.Info("PrometheusRule reconciled") return ctrl.Result{}, nil } -func (r *PrometheusRuleReconciler) createMimirRule(log logr.Logger, rule *monitoringv1.PrometheusRule, ds *openslov1.Datasource) error { +func (r *PrometheusRuleReconciler) createIndices(mgr ctrl.Manager) error { + return mgr.GetFieldIndexer().IndexField( + context.TODO(), + &monitoringv1.PrometheusRule{}, + objectiveRef, + func(object client.Object) []string { + pr := object.(*monitoringv1.PrometheusRule) + if pr.ObjectMeta.OwnerReferences == nil { + return nil + } + return []string{pr.ObjectMeta.OwnerReferences[0].Name} + }) +} - var mimirRuleNodes []rulefmt.RuleNode +func (r *PrometheusRuleReconciler) findObjectsForSlo() func(ctx context.Context, a client.Object) []reconcile.Request { + return func(ctx context.Context, a client.Object) []reconcile.Request { + attachedSLOs := &openslov1.SLOList{} + listOpts := &client.ListOptions{ + FieldSelector: fields.OneTermEqualSelector(objectiveRef, a.GetName()), + Namespace: a.GetNamespace(), + } + err := r.List(ctx, attachedSLOs, listOpts) + if err != nil { + return []reconcile.Request{} + } - for _, group := range rule.Spec.Groups { - for _, r := range group.Rules { - mimirRuleNode := rulefmt.RuleNode{ - Record: yaml.Node{ - Kind: 8, - Value: r.Record, - }, - Alert: yaml.Node{}, - Expr: yaml.Node{ - Kind: 8, - Value: r.Expr.StrVal, + requests := make([]reconcile.Request, len(attachedSLOs.Items)) + for i, item := range attachedSLOs.Items { + requests[i] = reconcile.Request{ + NamespacedName: types.NamespacedName{ + Name: item.Name, + Namespace: item.Namespace, }, - Labels: rule.Labels, } - mimirRuleNodes = append(mimirRuleNodes, mimirRuleNode) } + return requests } - - dsConfig := utils.DataSourceConfig{DataSource: ds} - sourceTenants := dsConfig.ParseTenantAnnotation() - - mimirRuleGroup := rwrulefmt.RuleGroup{ - RuleGroup: rulefmt.RuleGroup{ - Name: rule.Name, - SourceTenants: sourceTenants, - Rules: mimirRuleNodes, - }, - RWConfigs: []rwrulefmt.RemoteWriteConfig{}, - } - - dataSource := &openslov1.Datasource{} - if err := r.Get(context.TODO(), client.ObjectKey{ - Namespace: ds.Namespace, - Name: ds.Name, - }, dataSource); err != nil { - log.Error(err, "Failed to get Datasource") - return err - } - - mClient := mimirtool.MimirClientConfig{ - Address: ds.Spec.ConnectionDetails.Address, - TenantId: ds.Spec.ConnectionDetails.TargetTenant, - } - - mimirClient, err := mClient.NewMimirClient() - if err != nil { - log.Error(err, "Failed to create Mimir client") - return err - } - - if err := mimirClient.CreateRuleGroup(context.Background(), "osko", mimirRuleGroup); err != nil { - log.Error(err, "Failed to create rule group") - return err - } - - return nil } // SetupWithManager sets up the controller with the Manager. func (r *PrometheusRuleReconciler) SetupWithManager(mgr ctrl.Manager) error { + if err := r.createIndices(mgr); err != nil { + return err + } return ctrl.NewControllerManagedBy(mgr). For(&monitoringv1.PrometheusRule{}). + Watches( + &openslov1.SLO{}, + handler.EnqueueRequestsFromMapFunc(r.findObjectsForSlo()), + ). + Watches( + &openslov1.Datasource{}, + handler.EnqueueRequestsFromMapFunc(r.findObjectsForSlo())). Complete(r) } diff --git a/internal/controller/openslo/alertcondition_controller.go b/internal/controller/openslo/alertcondition_controller.go index 012abb3..28488d6 100644 --- a/internal/controller/openslo/alertcondition_controller.go +++ b/internal/controller/openslo/alertcondition_controller.go @@ -8,7 +8,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/log" - openslov1 "github.com/oskoperator/osko/apis/openslo/v1" + openslov1 "github.com/oskoperator/osko/api/openslo/v1" ) // AlertConditionReconciler reconciles a AlertCondition object diff --git a/internal/controller/openslo/alertnotificationtarget_controller.go b/internal/controller/openslo/alertnotificationtarget_controller.go index 73567f9..04480c4 100644 --- a/internal/controller/openslo/alertnotificationtarget_controller.go +++ b/internal/controller/openslo/alertnotificationtarget_controller.go @@ -8,7 +8,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/log" - openslov1 "github.com/oskoperator/osko/apis/openslo/v1" + openslov1 "github.com/oskoperator/osko/api/openslo/v1" ) // AlertNotificationTargetReconciler reconciles a AlertNotificationTarget object diff --git a/internal/controller/openslo/alertpolicy_controller.go b/internal/controller/openslo/alertpolicy_controller.go index 06b6d30..a132bf2 100644 --- a/internal/controller/openslo/alertpolicy_controller.go +++ b/internal/controller/openslo/alertpolicy_controller.go @@ -8,7 +8,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/log" - openslov1 "github.com/oskoperator/osko/apis/openslo/v1" + openslov1 "github.com/oskoperator/osko/api/openslo/v1" ) // AlertPolicyReconciler reconciles a AlertPolicy object diff --git a/internal/controller/openslo/datasource_controller.go b/internal/controller/openslo/datasource_controller.go index fdfb671..dfc4e37 100644 --- a/internal/controller/openslo/datasource_controller.go +++ b/internal/controller/openslo/datasource_controller.go @@ -13,7 +13,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/log" "time" - openslov1 "github.com/oskoperator/osko/apis/openslo/v1" + openslov1 "github.com/oskoperator/osko/api/openslo/v1" ) const ( diff --git a/internal/controller/openslo/sli_controller.go b/internal/controller/openslo/sli_controller.go index c3ea31a..97bf1b0 100644 --- a/internal/controller/openslo/sli_controller.go +++ b/internal/controller/openslo/sli_controller.go @@ -8,7 +8,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/log" - openslov1 "github.com/oskoperator/osko/apis/openslo/v1" + openslov1 "github.com/oskoperator/osko/api/openslo/v1" ) const ( diff --git a/internal/controller/openslo/slo_controller.go b/internal/controller/openslo/slo_controller.go index a964259..3583b01 100644 --- a/internal/controller/openslo/slo_controller.go +++ b/internal/controller/openslo/slo_controller.go @@ -2,8 +2,10 @@ package controller import ( "context" - "fmt" - openslov1 "github.com/oskoperator/osko/apis/openslo/v1" + "github.com/go-logr/logr" + openslov1 "github.com/oskoperator/osko/api/openslo/v1" + oskov1alpha1 "github.com/oskoperator/osko/api/osko/v1alpha1" + "github.com/oskoperator/osko/internal/helpers" "github.com/oskoperator/osko/internal/utils" monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" @@ -11,19 +13,19 @@ import ( "k8s.io/apimachinery/pkg/fields" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" - "k8s.io/apimachinery/pkg/util/intstr" "k8s.io/client-go/tools/record" - "reflect" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" "sigs.k8s.io/controller-runtime/pkg/handler" - "sigs.k8s.io/controller-runtime/pkg/log" + ctrllog "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/controller-runtime/pkg/reconcile" ) const ( - indicatorRef = ".spec.indicatorRef" - errGetSLO = "could not get SLO Object" + indicatorRef = ".spec.indicatorRef" + errGetSLO = "could not get SLO Object" + mimirRuleFinalizer = "finalizer.mimir.osko.dev" ) // SLOReconciler reconciles a SLO object @@ -49,16 +51,15 @@ type SLOReconciler struct { // For more details, check Reconcile and its Result here: // - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.15.0/pkg/reconcile func (r *SLOReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { - log := log.FromContext(ctx) + log := ctrllog.FromContext(ctx) + log.Info("Reconciling SLO") sli := &openslov1.SLI{} slo := &openslov1.SLO{} - newPromRule := &monitoringv1.PrometheusRule{} - err := r.Get(ctx, req.NamespacedName, slo) if err != nil { if apierrors.IsNotFound(err) { - log.Info("SLO resource not found. Object must have been deleted.") + log.V(1).Info("SLO resource not found. Object must have been deleted.") return ctrl.Result{}, nil } log.Error(err, errGetSLO) @@ -72,15 +73,7 @@ func (r *SLOReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.R apierrors.IsNotFound(err) { log.Error(err, errGetSLI) - err = utils.UpdateStatus( - ctx, - slo, - r.Client, - "Ready", - metav1.ConditionFalse, - "SLIObjectNotFound", - "SLI Object not found", - ) + err = utils.UpdateStatus(ctx, slo, r.Client, "Ready", metav1.ConditionFalse, "SLI Object not found") if err != nil { log.Error(err, "Failed to update SLO status") return ctrl.Result{}, err @@ -89,24 +82,14 @@ func (r *SLOReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.R } } } else if slo.Spec.Indicator != nil { - log.Info("SLO has an inline SLI") + log.V(1).Info("SLO has an inline SLI") sli.Name = slo.Spec.Indicator.Metadata.Name sli.Spec.Description = slo.Spec.Indicator.Spec.Description if slo.Spec.Indicator.Spec.RatioMetric != (openslov1.RatioMetricSpec{}) { sli.Spec.RatioMetric = slo.Spec.Indicator.Spec.RatioMetric } - log.Info("SLI created", "SLI Name", sli.Name, "SLI Namespace", sli.Namespace) - r.Recorder.Event(slo, "Normal", "SLICreated", fmt.Sprintf("SLI %s created", sli.Name)) } else { - err = utils.UpdateStatus( - ctx, - slo, - r.Client, - "Ready", - metav1.ConditionFalse, - "SLIObjectNotFound", - "SLI Object not found", - ) + err = utils.UpdateStatus(ctx, slo, r.Client, "Ready", metav1.ConditionFalse, "SLI Object not found") if err != nil { log.Error(err, "Failed to update SLO status") r.Recorder.Event(slo, "Error", "SLIObjectNotFound", "SLI Object not found") @@ -124,238 +107,89 @@ func (r *SLOReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.R if apierrors.IsNotFound(err) { log.Info("PrometheusRule not found. Let's make one.") - promRule, err = r.createPrometheusRule(slo, sli) + promRule, err = helpers.CreatePrometheusRule(slo, sli) if err != nil { - err = utils.UpdateStatus( - ctx, - slo, - r.Client, - "Ready", - metav1.ConditionFalse, - "FailedToCreatePrometheusRule", - "Failed to create Prometheus Rule", - ) + err = utils.UpdateStatus(ctx, slo, r.Client, "Ready", metav1.ConditionFalse, "Failed to create Prometheus Rule") if err != nil { log.Error(err, "Failed to update SLO status") return ctrl.Result{}, err } log.Error(err, "Failed to create new PrometheusRule") - r.Recorder.Event(slo, "Error", "FailedToCreatePrometheusRule", "Failed to create Prometheus Rule") return ctrl.Result{}, err } if err := r.Create(ctx, promRule); err != nil { - if err := r.Status().Update(ctx, slo); err != nil { + r.Recorder.Event(slo, "Error", "FailedToCreatePrometheusRule", "Failed to create Prometheus Rule") + if err := r.Status().Update(ctx, promRule); err != nil { log.Error(err, "Failed to update SLO status") - slo.Status.Ready = "Failed" - if err := r.Status().Update(ctx, slo); err != nil { + if err = utils.UpdateStatus(ctx, slo, r.Client, "Ready", metav1.ConditionFalse, "Failed to create Prometheus Rule"); err != nil { log.Error(err, "Failed to update SLO ready status") return ctrl.Result{}, err } return ctrl.Result{}, err } - } - } else { - // This is the main logic for the PrometheusRule update - // Here we should take the existing PrometheusRule and update it with the new one - log.Info("PrometheusRule already exists, we should update it") - newPromRule, err = r.createPrometheusRule(slo, sli) - if err != nil { - log.Error(err, "Failed to create new PrometheusRule") - r.Recorder.Event(slo, "Error", "FailedToCreatePrometheusRule", "Failed to create Prometheus Rule") - return ctrl.Result{}, err - } - - compareResult := reflect.DeepEqual(promRule, newPromRule) - if compareResult { - log.Info("PrometheusRule is already up to date") + } else { + log.Info("PrometheusRule created successfully") + r.Recorder.Event(slo, "Normal", "PrometheusRuleCreated", "PrometheusRule created successfully") + slo.Status.Ready = "True" + if err := r.Status().Update(ctx, slo); err != nil { + log.Error(err, "Failed to update SLO ready status") + return ctrl.Result{}, err + } return ctrl.Result{}, nil } + } - // has to be the same as for previous object, otherwise it will not be updated and throw an error - newPromRule.ResourceVersion = promRule.ResourceVersion + mimirRule := &oskov1alpha1.MimirRule{} + err = r.Get(ctx, types.NamespacedName{ + Name: slo.Name, + Namespace: slo.Namespace, + }, mimirRule) - log.Info("Updating PrometheusRule", "PrometheusRule Name", newPromRule.Name, "PrometheusRule Namespace", newPromRule.Namespace) - if err := r.Update(ctx, newPromRule); err != nil { - log.Error(err, "Failed to update PrometheusRule") + if apierrors.IsNotFound(err) { + log.Info("MimirRule not found. Let's make one.") + mimirRule, err = helpers.NewMimirRule(slo, promRule) + if err != nil { + if err = utils.UpdateStatus(ctx, slo, r.Client, "Ready", metav1.ConditionFalse, "Failed to create Mimir Rule Object"); err != nil { + log.Error(err, "Failed to update SLO status") + return ctrl.Result{}, err + } + log.Error(err, "Failed to create new PrometheusRule") return ctrl.Result{}, err } - if err := r.Status().Update(ctx, slo); err != nil { - log.Error(err, "Failed to update SLO status") - slo.Status.Ready = "Failed" + if err = r.Create(ctx, mimirRule); err != nil { + r.Recorder.Event(slo, "Error", "FailedToCreateMimirRule", "Failed to create Mimir Rule") + if err = r.Status().Update(ctx, slo); err != nil { + log.Error(err, "Failed to update SLO status") + if err = utils.UpdateStatus(ctx, slo, r.Client, "Ready", metav1.ConditionFalse, "Failed to create Mimir Rule"); err != nil { + log.Error(err, "Failed to update SLO ready status") + return ctrl.Result{}, err + } + return ctrl.Result{}, err + } + } else { + log.Info("MimirRule created successfully") + r.Recorder.Event(slo, "Normal", "MimirRuleCreated", "MimirRule created successfully") + slo.Status.Ready = "True" if err := r.Status().Update(ctx, slo); err != nil { log.Error(err, "Failed to update SLO ready status") return ctrl.Result{}, err } - return ctrl.Result{}, err + if !utils.ContainString(mimirRule.GetFinalizers(), mimirRuleFinalizer) { + if err := r.addFinalizer(log, mimirRule); err != nil { + return ctrl.Result{}, err + } + } + return ctrl.Result{}, nil } } - err = utils.UpdateStatus( - ctx, - slo, - r.Client, - "Ready", - metav1.ConditionTrue, - "PrometheusRuleCreated", - "PrometheusRule created", - ) - if err != nil { + if err = utils.UpdateStatus(ctx, slo, r.Client, "Ready", metav1.ConditionTrue, "PrometheusRule created"); err != nil { return ctrl.Result{}, err } - r.Recorder.Event(slo, "Normal", "PrometheusRuleCreated", "PrometheusRule created successfully") - log.Info("Reconciling SLO") - return ctrl.Result{}, nil } -func (r *SLOReconciler) createPrometheusRule(slo *openslov1.SLO, sli *openslov1.SLI) (*monitoringv1.PrometheusRule, error) { - var monitoringRules []monitoringv1.Rule - var targetVector monitoringv1.Rule - defaultRateWindow := "1m" - //burnRateTimeWindows := []string{"1h", "6h", "3d"} - sloTimeWindowDuration := string(slo.Spec.TimeWindow[0].Duration) - m := utils.MetricLabelParams{Slo: slo, Sli: sli} - - targetVector.Record = "osko_slo_target" - targetVector.Expr = intstr.Parse(fmt.Sprintf("vector(%s)", slo.Spec.Objectives[0].Value)) - m.TimeWindow = sloTimeWindowDuration - targetVector.Labels = m.NewMetricLabelGenerator() - - // for now, total and good are required. bad is optional and is calculated as (total - good) if not provided - // TODO: validate that the SLO budgeting method is Occurrences and that the SLIs are all ratio metrics in other case throw an error - targetVectorConfig := utils.RuleConfig{ - Record: "slo_target", - Expr: "", - TimeWindow: sloTimeWindowDuration, - Slo: slo, - Sli: sli, - MetricLabelCompiler: &m, - } - - totalRule28Config := utils.RuleConfig{ - RuleType: "total", - Record: "sli_ratio_total", - Expr: "sum(increase(%s[%s]))", - TimeWindow: sloTimeWindowDuration, - Slo: slo, - Sli: sli, - MetricLabelCompiler: &m, - } - - goodRule28Config := utils.RuleConfig{ - RuleType: "good", - Record: "sli_ratio_total", - Expr: "sum(increase(%s[%s]))", - TimeWindow: sloTimeWindowDuration, - Slo: slo, - Sli: sli, - MetricLabelCompiler: &m, - } - - badRule28Config := utils.RuleConfig{ - RuleType: "bad", - Record: "sli_ratio_total", - Expr: "sum(increase(%s[%s]))", - TimeWindow: sloTimeWindowDuration, - Slo: slo, - Sli: sli, - MetricLabelCompiler: &m, - } - - totalRuleConfig := utils.RuleConfig{ - RuleType: "total", - Record: "sli_ratio_total", - Expr: "sum(increase(%s[%s]))", - TimeWindow: defaultRateWindow, - Slo: slo, - Sli: sli, - SupportiveRule: &totalRule28Config, - MetricLabelCompiler: &m, - } - - goodRuleConfig := utils.RuleConfig{ - RuleType: "good", - Record: "sli_ratio_good", - Expr: "sum(increase(%s[%s]))", - TimeWindow: defaultRateWindow, - Slo: slo, - Sli: sli, - SupportiveRule: &goodRule28Config, - MetricLabelCompiler: &m, - } - - badRuleConfig := utils.RuleConfig{ - RuleType: "bad", - Record: "sli_ratio_bad", - Expr: "sum(increase(%s[%s]))", - TimeWindow: defaultRateWindow, - Slo: slo, - Sli: sli, - SupportiveRule: &badRule28Config, - MetricLabelCompiler: &m, - } - - errorBudgetRuleConfig := utils.BudgetRuleConfig{ - Record: "error_budget_available", - Slo: slo, - Sli: sli, - TargetRuleConfig: &targetVectorConfig, - TotalRuleConfig: &totalRuleConfig, - BadRuleConfig: &badRuleConfig, - } - - configs := []utils.RuleConfig{ - totalRuleConfig, - goodRuleConfig, - badRuleConfig, - } - - for _, config := range configs { - rule, supportiveRule := config.NewRatioRule(config.TimeWindow) - if rule == nil || supportiveRule == nil { - continue - } - monitoringRules = append(monitoringRules, *rule) - monitoringRules = append(monitoringRules, *supportiveRule) - } - - monitoringRules = append(monitoringRules, targetVectorConfig.NewTargetRule()) - monitoringRules = append(monitoringRules, errorBudgetRuleConfig.NewBudgetRule()) - - objectMeta := metav1.ObjectMeta{ - Name: slo.Name, - Namespace: slo.Namespace, - Labels: slo.Labels, - Annotations: slo.Annotations, - OwnerReferences: slo.OwnerReferences, - } - - ruleGroup := []monitoringv1.RuleGroup{ - { - Name: slo.Name, - Rules: monitoringRules, - }, - } - - rule := &monitoringv1.PrometheusRule{ - TypeMeta: metav1.TypeMeta{ - APIVersion: "monitoring.coreos.com/v1", - Kind: "PrometheusRule", - }, - ObjectMeta: objectMeta, - Spec: monitoringv1.PrometheusRuleSpec{ - Groups: ruleGroup, - }, - } - // Set SLO instance as the owner and controller. - if err := ctrl.SetControllerReference(slo, rule, r.Scheme); err != nil { - return nil, err - } - - return rule, nil -} - func (r *SLOReconciler) createIndices(mgr ctrl.Manager) error { return mgr.GetFieldIndexer().IndexField( context.TODO(), @@ -395,6 +229,18 @@ func (r *SLOReconciler) findObjectsForSli() func(ctx context.Context, a client.O } } +func (r *SLOReconciler) addFinalizer(log logr.Logger, rule *oskov1alpha1.MimirRule) error { + log.Info("Adding Finalizer for the MimirRule") + controllerutil.AddFinalizer(rule, mimirRuleFinalizer) + + err := r.Update(context.Background(), rule) + if err != nil { + log.Error(err, "Failed to update MimirRule with finalizer") + return err + } + return nil +} + // SetupWithManager sets up the controller with the Manager. func (r *SLOReconciler) SetupWithManager(mgr ctrl.Manager) error { if err := r.createIndices(mgr); err != nil { @@ -403,6 +249,7 @@ func (r *SLOReconciler) SetupWithManager(mgr ctrl.Manager) error { return ctrl.NewControllerManagedBy(mgr). For(&openslov1.SLO{}). Owns(&monitoringv1.PrometheusRule{}). + Owns(&oskov1alpha1.MimirRule{}). Watches( &openslov1.SLI{}, handler.EnqueueRequestsFromMapFunc(r.findObjectsForSli()), diff --git a/internal/controller/openslo/suite_test.go b/internal/controller/openslo/suite_test.go index 7d246a8..58f9830 100644 --- a/internal/controller/openslo/suite_test.go +++ b/internal/controller/openslo/suite_test.go @@ -14,7 +14,7 @@ import ( logf "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/controller-runtime/pkg/log/zap" - openslov1 "github.com/oskoperator/osko/apis/openslo/v1" + openslov1 "github.com/oskoperator/osko/api/openslo/v1" //+kubebuilder:scaffold:imports ) diff --git a/internal/controller/osko/mimirrule_controller.go b/internal/controller/osko/mimirrule_controller.go new file mode 100644 index 0000000..3dc50be --- /dev/null +++ b/internal/controller/osko/mimirrule_controller.go @@ -0,0 +1,296 @@ +package osko + +import ( + "context" + "github.com/go-logr/logr" + mimirclient "github.com/grafana/mimir/pkg/mimirtool/client" + "github.com/grafana/mimir/pkg/mimirtool/rules/rwrulefmt" + openslov1 "github.com/oskoperator/osko/api/openslo/v1" + "github.com/oskoperator/osko/internal/helpers" + "github.com/oskoperator/osko/internal/utils" + monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + "github.com/prometheus/prometheus/model/rulefmt" + "gopkg.in/yaml.v3" + apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/types" + "k8s.io/client-go/tools/record" + "reflect" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" + ctrllog "sigs.k8s.io/controller-runtime/pkg/log" + + oskov1alpha1 "github.com/oskoperator/osko/api/osko/v1alpha1" + "k8s.io/apimachinery/pkg/runtime" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +// MimirRuleReconciler reconciles a MimirRule object +type MimirRuleReconciler struct { + client.Client + Scheme *runtime.Scheme + Recorder record.EventRecorder + MimirClient *mimirclient.MimirClient +} + +const ( + mimirRuleFinalizer = "finalizer.mimir.osko.dev" + mimirRuleNamespace = "osko" +) + +// +kubebuilder:rbac:groups=osko.openslo,resources=mimirrules,verbs=get;list;watch;create;update;patch;delete +// +kubebuilder:rbac:groups=osko.openslo,resources=mimirrules/status,verbs=get;update;patch +// +kubebuilder:rbac:groups=osko.openslo,resources=mimirrules/finalizers,verbs=update +// +kubebuilder:rbac:groups=core,resources=events,verbs=create;patch + +// Reconcile is part of the main kubernetes reconciliation loop which aims to +// move the current state of the cluster closer to the desired state. +// TODO(user): Modify the Reconcile function to compare the state specified by +// the MimirRule object against the actual cluster state, and then +// perform operations to make the cluster state reflect the state specified by +// the user. +// +// For more details, check Reconcile and its Result here: +// - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.16.3/pkg/reconcile +func (r *MimirRuleReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { + log := ctrllog.FromContext(ctx) + + ds := &openslov1.Datasource{} + slo := &openslov1.SLO{} + prometheusRule := &monitoringv1.PrometheusRule{} + mimirRule := &oskov1alpha1.MimirRule{} + newMimirRule := &oskov1alpha1.MimirRule{} + + err := r.Get(ctx, req.NamespacedName, prometheusRule) + if err != nil { + if apierrors.IsNotFound(err) { + log.Info("PrometheusRule resource not found. Ignoring since object must be deleted") + return ctrl.Result{}, nil + } + log.Error(err, "Failed to get PrometheusRule") + return ctrl.Result{}, err + } + + if err := r.Get(ctx, client.ObjectKey{ + Namespace: prometheusRule.Namespace, + Name: "logging-ds", + }, ds); err != nil { + log.Error(err, "Failed to get Datasource") + return ctrl.Result{}, err + } + + log.Info("Datasource found", "Datasource", ds) + + if err := r.newMimirClient(ds); err != nil { + log.Error(err, "Failed to create MimirClient") + return ctrl.Result{}, err + } + + rgs, err := helpers.NewMimirRuleGroup(prometheusRule) + if err != nil { + log.Error(err, "Failed to convert MimirRuleGroup") + return ctrl.Result{}, err + } + + if err := r.createMimirRuleGroupAPI(log, rgs); err != nil { + log.Error(err, "Failed to create MimirRuleGroup") + return ctrl.Result{}, err + } + + err = r.Get(ctx, req.NamespacedName, mimirRule) + if err != nil { + if apierrors.IsNotFound(err) { + log.Info("MimirRule resource not found. Ignoring since object must be deleted") + return ctrl.Result{}, nil + } + log.Error(err, "Failed to get MimirRule") + return ctrl.Result{}, err + } + + if apierrors.IsNotFound(err) { + log.Info("MimirRule not found. Let's make one.") + mimirRule, err = helpers.NewMimirRule(slo, prometheusRule) + + if err = r.Create(ctx, mimirRule); err != nil { + r.Recorder.Event(mimirRule, "Error", "FailedToCreateMimirRule", "Failed to create Mimir Rule") + if err = r.Status().Update(ctx, mimirRule); err != nil { + log.Error(err, "Failed to update MimirRule status") + return ctrl.Result{}, err + } else { + log.Info("MimirRule created successfully") + r.Recorder.Event(mimirRule, "Normal", "MimirRuleCreated", "MimirRule created successfully") + mimirRule.Status.Ready = "True" + if err := r.Status().Update(ctx, mimirRule); err != nil { + log.Error(err, "Failed to update MimirRule ready status") + return ctrl.Result{}, err + } + return ctrl.Result{}, nil + } + } + } + + for _, ref := range mimirRule.ObjectMeta.OwnerReferences { + if ref.Kind == "SLO" { + sloNamespacedName := types.NamespacedName{ + Name: ref.Name, + Namespace: req.Namespace, + } + + if err := r.Get(ctx, sloNamespacedName, slo); err != nil { + log.Error(err, "Failed to get SLO") + return ctrl.Result{}, err + } + } + } + + if !utils.ContainString(mimirRule.GetFinalizers(), mimirRuleFinalizer) { + if err := r.addFinalizer(log, mimirRule); err != nil { + return ctrl.Result{}, err + } + } + + log.Info("MmimirRule already exists, we should update it.") + newMimirRule, err = helpers.NewMimirRule(slo, prometheusRule) + if err != nil { + log.Error(err, "Failed to create new MimirRule") + return ctrl.Result{}, err + } + + compareResult := reflect.DeepEqual(mimirRule.Spec, newMimirRule.Spec) + if compareResult { + log.Info("MimirRule is up to date") + return ctrl.Result{}, nil + } + + newMimirRule.ResourceVersion = mimirRule.ResourceVersion + + if err := r.Update(ctx, newMimirRule); err != nil { + log.Error(err, "Failed to update MimirRule") + mimirRule.Status.Ready = "False" + if err := r.Status().Update(ctx, mimirRule); err != nil { + log.Error(err, "Failed to update SLO status") + return ctrl.Result{}, err + } + return ctrl.Result{}, err + } + + log.Info("MimirRule reconciled") + return ctrl.Result{}, nil +} + +func (r *MimirRuleReconciler) newMimirClient(ds *openslov1.Datasource) error { + mClientConfig := helpers.MimirClientConfig{ + Address: ds.Spec.ConnectionDetails.Address, + TenantId: ds.Spec.ConnectionDetails.TargetTenant, + } + + mimirClient, err := mClientConfig.NewMimirClient() + if err != nil { + return err + } + + r.MimirClient = mimirClient + + return nil +} + +func (r *MimirRuleReconciler) createMimirRuleGroupAPI(log logr.Logger, rule *oskov1alpha1.RuleGroup) error { + var mimirRuleNodes []rulefmt.RuleNode + for _, r := range rule.Rules { + mimirRuleNode := rulefmt.RuleNode{ + Record: yaml.Node{ + Kind: 8, + Value: r.Record, + }, + Alert: yaml.Node{}, + Expr: yaml.Node{ + Kind: 8, + Value: r.Expr, + }, + Labels: r.Labels, + } + mimirRuleNodes = append(mimirRuleNodes, mimirRuleNode) + } + + log.Info("Source tenants", "SourceTenants", rule.SourceTenants) + + mimirRule := rwrulefmt.RuleGroup{ + RuleGroup: rulefmt.RuleGroup{ + Name: rule.Name, + Rules: mimirRuleNodes, + SourceTenants: rule.SourceTenants, + }, + } + + if err := r.MimirClient.CreateRuleGroup(context.Background(), mimirRuleNamespace, mimirRule); err != nil { + log.Error(err, "Failed to create rule group") + return err + } + + return nil +} + +func (r *MimirRuleReconciler) getMimirRuleGroupAPI(log logr.Logger, rule *monitoringv1.PrometheusRule) *rwrulefmt.RuleGroup { + mimirRuleGroup, err := r.MimirClient.GetRuleGroup(context.Background(), mimirRuleNamespace, rule.Name) + if err != nil { + log.Error(err, "Failed to get rule group") + return nil + } + + return mimirRuleGroup +} + +//func (r *MimirRuleReconciler) createMimirRuleGroup(log logr.Logger, mimirClient *mimirclient.MimirClient, rule *monitoringv1.PrometheusRule, ds *openslov1.Datasource) error { +// mimirRuleGroup, err := helpers.NewMimirRuleGroup(rule) +// if err != nil { +// log.Error(err, "Failed to create Mimir rule group") +// return err +// } +// +// if err := mimirClient.CreateRuleGroup(context.Background(), mimirRuleNamespace, *mimirRuleGroup); err != nil { +// log.Error(err, "Failed to create rule group") +// return err +// } +// +// return nil +//} + +func (r *MimirRuleReconciler) deleteMimirRuleGroup(log logr.Logger, mimirClient *mimirclient.MimirClient, ruleGroup *rwrulefmt.RuleGroup) error { + if err := mimirClient.DeleteRuleGroup(context.Background(), mimirRuleNamespace, ruleGroup.Name); err != nil { + log.Error(err, "Failed to delete rule group") + return err + } + + return nil +} + +func (r *MimirRuleReconciler) updateMimirRuleGroup(log logr.Logger, mimirClient *mimirclient.MimirClient, existingGroup *rwrulefmt.RuleGroup, desiredGroup *rwrulefmt.RuleGroup) error { + log.Info("Updating Mimir rule group") + if reflect.DeepEqual(existingGroup, desiredGroup) { + log.Info("Mimir rule group is already up to date") + return nil + } + err := r.deleteMimirRuleGroup(log, mimirClient, existingGroup) + if err != nil { + return err + } + return nil +} + +func (r *MimirRuleReconciler) addFinalizer(log logr.Logger, rule *oskov1alpha1.MimirRule) error { + log.Info("Adding Finalizer for the MimirRule") + controllerutil.AddFinalizer(rule, mimirRuleFinalizer) + + err := r.Update(context.Background(), rule) + if err != nil { + log.Error(err, "Failed to update MimirRule with finalizer") + return err + } + return nil +} + +// SetupWithManager sets up the controller with the Manager. +func (r *MimirRuleReconciler) SetupWithManager(mgr ctrl.Manager) error { + return ctrl.NewControllerManagedBy(mgr). + For(&oskov1alpha1.MimirRule{}). + Complete(r) +} diff --git a/internal/controller/osko/suite_test.go b/internal/controller/osko/suite_test.go new file mode 100644 index 0000000..7d76345 --- /dev/null +++ b/internal/controller/osko/suite_test.go @@ -0,0 +1,74 @@ +package osko + +import ( + "fmt" + "path/filepath" + "runtime" + "testing" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + + "k8s.io/client-go/kubernetes/scheme" + "k8s.io/client-go/rest" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/envtest" + logf "sigs.k8s.io/controller-runtime/pkg/log" + "sigs.k8s.io/controller-runtime/pkg/log/zap" + + oskov1alpha1 "github.com/oskoperator/osko/api/osko/v1alpha1" + //+kubebuilder:scaffold:imports +) + +// These tests use Ginkgo (BDD-style Go testing framework). Refer to +// http://onsi.github.io/ginkgo/ to learn more about Ginkgo. + +var cfg *rest.Config +var k8sClient client.Client +var testEnv *envtest.Environment + +func TestControllers(t *testing.T) { + RegisterFailHandler(Fail) + + RunSpecs(t, "Controller Suite") +} + +var _ = BeforeSuite(func() { + logf.SetLogger(zap.New(zap.WriteTo(GinkgoWriter), zap.UseDevMode(true))) + + By("bootstrapping test environment") + testEnv = &envtest.Environment{ + CRDDirectoryPaths: []string{filepath.Join("..", "..", "..", "config", "crd", "bases")}, + ErrorIfCRDPathMissing: true, + + // The BinaryAssetsDirectory is only required if you want to run the tests directly + // without call the makefile target test. If not informed it will look for the + // default path defined in controller-runtime which is /usr/local/kubebuilder/. + // Note that you must have the required binaries setup under the bin directory to perform + // the tests directly. When we run make test it will be setup and used automatically. + BinaryAssetsDirectory: filepath.Join("..", "..", "..", "bin", "k8s", + fmt.Sprintf("1.28.3-%s-%s", runtime.GOOS, runtime.GOARCH)), + } + + var err error + // cfg is defined in this file globally. + cfg, err = testEnv.Start() + Expect(err).NotTo(HaveOccurred()) + Expect(cfg).NotTo(BeNil()) + + err = oskov1alpha1.AddToScheme(scheme.Scheme) + Expect(err).NotTo(HaveOccurred()) + + //+kubebuilder:scaffold:scheme + + k8sClient, err = client.New(cfg, client.Options{Scheme: scheme.Scheme}) + Expect(err).NotTo(HaveOccurred()) + Expect(k8sClient).NotTo(BeNil()) + +}) + +var _ = AfterSuite(func() { + By("tearing down the test environment") + err := testEnv.Stop() + Expect(err).NotTo(HaveOccurred()) +}) diff --git a/internal/helpers/mimirtool_helper.go b/internal/helpers/mimirtool_helper.go new file mode 100644 index 0000000..d833868 --- /dev/null +++ b/internal/helpers/mimirtool_helper.go @@ -0,0 +1,140 @@ +package helpers + +import ( + "context" + "github.com/go-logr/logr" + mimirclient "github.com/grafana/mimir/pkg/mimirtool/client" + "github.com/grafana/mimir/pkg/mimirtool/rules/rwrulefmt" + openslov1 "github.com/oskoperator/osko/api/openslo/v1" + oskov1alpha1 "github.com/oskoperator/osko/api/osko/v1alpha1" + monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "reflect" +) + +const ( + mimirRuleNamespace = "osko" +) + +type MimirClientConfig struct { + Address string + TenantId string +} + +func (m *MimirClientConfig) NewMimirClient() (*mimirclient.MimirClient, error) { + return mimirclient.New( + mimirclient.Config{ + Address: m.Address, + ID: m.TenantId, + }, + ) +} + +func NewMimirRule(slo *openslov1.SLO, rule *monitoringv1.PrometheusRule) (mimirRule *oskov1alpha1.MimirRule, err error) { + ownerRef := []metav1.OwnerReference{ + *metav1.NewControllerRef( + slo, + openslov1.GroupVersion.WithKind("SLO"), + ), + } + + objectMeta := metav1.ObjectMeta{ + Name: rule.Name, + Namespace: rule.Namespace, + Labels: rule.Labels, + Annotations: rule.Annotations, + OwnerReferences: ownerRef, + } + + var mimirRules []oskov1alpha1.Rule + + for _, group := range rule.Spec.Groups { + for _, r := range group.Rules { + mimirRuleNode := oskov1alpha1.Rule{ + Record: r.Record, + Expr: r.Expr.String(), + Labels: r.Labels, + } + mimirRules = append(mimirRules, mimirRuleNode) + } + } + + mimirRule = &oskov1alpha1.MimirRule{ + ObjectMeta: objectMeta, + Spec: oskov1alpha1.MimirRuleSpec{ + Groups: []oskov1alpha1.RuleGroup{ + { + Name: rule.Name, + SourceTenants: []string{ + "infra", + "logging", + }, + Rules: mimirRules, + }, + }, + }, + } + return mimirRule, nil +} + +func NewMimirRuleGroup(rule *monitoringv1.PrometheusRule) (*oskov1alpha1.RuleGroup, error) { + var mimirRules []oskov1alpha1.Rule + + for _, group := range rule.Spec.Groups { + for _, r := range group.Rules { + mimirRuleNode := oskov1alpha1.Rule{ + Record: r.Record, + Expr: r.Expr.String(), + Labels: r.Labels, + } + mimirRules = append(mimirRules, mimirRuleNode) + } + } + + //dsConfig := utils.DataSourceConfig{DataSource: ds} + sourceTenants := []string{ + "infra", + "logging", + "billing", + } + + mimirRuleGroup := &oskov1alpha1.RuleGroup{ + Name: rule.Name, + Rules: mimirRules, + SourceTenants: sourceTenants, + } + + return mimirRuleGroup, nil +} + +func GetMimirRuleGroup(log logr.Logger, mimirClient *mimirclient.MimirClient, rule *monitoringv1.PrometheusRule) *rwrulefmt.RuleGroup { + mimirRuleGroup, err := mimirClient.GetRuleGroup(context.Background(), mimirRuleNamespace, rule.Name) + if err != nil { + log.Error(err, "Failed to get rule group") + return nil + } + + return mimirRuleGroup +} + +func UpdateMimirRuleGroup(log logr.Logger, mimirClient *mimirclient.MimirClient, existingGroup *rwrulefmt.RuleGroup, desiredGroup *rwrulefmt.RuleGroup) error { + log.Info("Updating Mimir rule group") + if reflect.DeepEqual(existingGroup, desiredGroup) { + log.Info("Mimir rule group is already up to date") + return nil + } + err := DeleteMimirRuleGroup(log, mimirClient, existingGroup) + if err != nil { + return err + } + return nil +} + +func DeleteMimirRuleGroup(log logr.Logger, mimirClient *mimirclient.MimirClient, ruleGroup *rwrulefmt.RuleGroup) error { + if err := mimirClient.DeleteRuleGroup(context.Background(), mimirRuleNamespace, ruleGroup.Name); err != nil { + log.Error(err, "Failed to delete rule group") + return err + } + + return nil +} diff --git a/internal/helpers/prometheus_helper.go b/internal/helpers/prometheus_helper.go new file mode 100644 index 0000000..adfb864 --- /dev/null +++ b/internal/helpers/prometheus_helper.go @@ -0,0 +1,165 @@ +package helpers + +import ( + "fmt" + openslov1 "github.com/oskoperator/osko/api/openslo/v1" + "github.com/oskoperator/osko/internal/utils" + monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/intstr" +) + +const ( + RecordPrefix = "osko" +) + +func CreatePrometheusRule(slo *openslov1.SLO, sli *openslov1.SLI) (*monitoringv1.PrometheusRule, error) { + var monitoringRules []monitoringv1.Rule + var targetVector monitoringv1.Rule + defaultRateWindow := "1m" + //burnRateTimeWindows := []string{"1h", "6h", "3d"} + sloTimeWindowDuration := string(slo.Spec.TimeWindow[0].Duration) + m := utils.MetricLabel{Slo: slo, Sli: sli} + + targetVector.Record = "osko_slo_target" + targetVector.Expr = intstr.Parse(fmt.Sprintf("vector(%s)", slo.Spec.Objectives[0].Value)) + m.TimeWindow = sloTimeWindowDuration + targetVector.Labels = m.NewMetricLabelGenerator() + + // for now, total and good are required. bad is optional and is calculated as (total - good) if not provided + // TODO: validate that the SLO budgeting method is Occurrences and that the SLIs are all ratio metrics in other case throw an error + targetVectorConfig := utils.Rule{ + Record: "slo_target", + Expr: "", + TimeWindow: sloTimeWindowDuration, + Slo: slo, + Sli: sli, + MetricLabelCompiler: &m, + } + + totalRule28Config := utils.Rule{ + RuleType: "total", + Record: "sli_ratio_total", + Expr: "sum(increase(%s[%s]))", + TimeWindow: sloTimeWindowDuration, + Slo: slo, + Sli: sli, + MetricLabelCompiler: &m, + } + + goodRule28Config := utils.Rule{ + RuleType: "good", + Record: "sli_ratio_total", + Expr: "sum(increase(%s[%s]))", + TimeWindow: sloTimeWindowDuration, + Slo: slo, + Sli: sli, + MetricLabelCompiler: &m, + } + + badRule28Config := utils.Rule{ + RuleType: "bad", + Record: "sli_ratio_total", + Expr: "sum(increase(%s[%s]))", + TimeWindow: sloTimeWindowDuration, + Slo: slo, + Sli: sli, + MetricLabelCompiler: &m, + } + + totalRuleConfig := utils.Rule{ + RuleType: "total", + Record: "sli_ratio_total", + Expr: "sum(increase(%s[%s]))", + TimeWindow: defaultRateWindow, + Slo: slo, + Sli: sli, + SupportiveRule: &totalRule28Config, + MetricLabelCompiler: &m, + } + + goodRuleConfig := utils.Rule{ + RuleType: "good", + Record: "sli_ratio_good", + Expr: "sum(increase(%s[%s]))", + TimeWindow: defaultRateWindow, + Slo: slo, + Sli: sli, + SupportiveRule: &goodRule28Config, + MetricLabelCompiler: &m, + } + + badRuleConfig := utils.Rule{ + RuleType: "bad", + Record: "sli_ratio_bad", + Expr: "sum(increase(%s[%s]))", + TimeWindow: defaultRateWindow, + Slo: slo, + Sli: sli, + SupportiveRule: &badRule28Config, + MetricLabelCompiler: &m, + } + + errorBudgetRuleConfig := utils.BudgetRule{ + Record: "error_budget_available", + Slo: slo, + Sli: sli, + TargetRuleConfig: &targetVectorConfig, + TotalRuleConfig: &totalRuleConfig, + BadRuleConfig: &badRuleConfig, + GoodRuleConfig: &goodRuleConfig, + } + + configs := []utils.Rule{ + totalRuleConfig, + goodRuleConfig, + badRuleConfig, + } + + for _, config := range configs { + rule, supportiveRule := config.NewRatioRule(config.TimeWindow) + if rule == nil || supportiveRule == nil { + continue + } + monitoringRules = append(monitoringRules, *rule) + monitoringRules = append(monitoringRules, *supportiveRule) + } + + monitoringRules = append(monitoringRules, targetVectorConfig.NewTargetRule()) + monitoringRules = append(monitoringRules, errorBudgetRuleConfig.NewBudgetRule()) + + ownerRef := []metav1.OwnerReference{ + *metav1.NewControllerRef( + slo, + openslov1.GroupVersion.WithKind("SLO"), + ), + } + + objectMeta := metav1.ObjectMeta{ + Name: slo.Name, + Namespace: slo.Namespace, + Labels: slo.Labels, + Annotations: slo.Annotations, + OwnerReferences: ownerRef, + } + + ruleGroup := []monitoringv1.RuleGroup{ + { + Name: slo.Name, + Rules: monitoringRules, + }, + } + + rule := &monitoringv1.PrometheusRule{ + TypeMeta: metav1.TypeMeta{ + APIVersion: "monitoring.coreos.com/v1", + Kind: "PrometheusRule", + }, + ObjectMeta: objectMeta, + Spec: monitoringv1.PrometheusRuleSpec{ + Groups: ruleGroup, + }, + } + + return rule, nil +} diff --git a/internal/mimirtool/mimirtool.go b/internal/mimirtool/mimirtool.go deleted file mode 100644 index e2fbd12..0000000 --- a/internal/mimirtool/mimirtool.go +++ /dev/null @@ -1,19 +0,0 @@ -package mimirtool - -import ( - mimirtool "github.com/grafana/mimir/pkg/mimirtool/client" -) - -type MimirClientConfig struct { - Address string - TenantId string -} - -func (m *MimirClientConfig) NewMimirClient() (*mimirtool.MimirClient, error) { - return mimirtool.New( - mimirtool.Config{ - Address: m.Address, - ID: m.TenantId, - }, - ) -} diff --git a/internal/utils/common_utils.go b/internal/utils/common_utils.go index b26e4a2..90e6235 100644 --- a/internal/utils/common_utils.go +++ b/internal/utils/common_utils.go @@ -3,23 +3,24 @@ package utils import ( "context" "fmt" - openslov1 "github.com/oskoperator/osko/apis/openslo/v1" + openslov1 "github.com/oskoperator/osko/api/openslo/v1" monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/intstr" "reflect" "sigs.k8s.io/controller-runtime/pkg/client" + ctrllog "sigs.k8s.io/controller-runtime/pkg/log" "time" ) -type MetricLabelParams struct { +type MetricLabel struct { Slo *openslov1.SLO Sli *openslov1.SLI TimeWindow string Labels map[string]string } -type RuleConfig struct { +type Rule struct { Sli *openslov1.SLI Slo *openslov1.SLO BaseRule *monitoringv1.Rule @@ -28,17 +29,18 @@ type RuleConfig struct { Expr string RateWindow string TimeWindow string - SupportiveRule *RuleConfig - MetricLabelCompiler *MetricLabelParams + SupportiveRule *Rule + MetricLabelCompiler *MetricLabel } -type BudgetRuleConfig struct { +type BudgetRule struct { Record string Sli *openslov1.SLI Slo *openslov1.SLO - TotalRuleConfig *RuleConfig - BadRuleConfig *RuleConfig - TargetRuleConfig *RuleConfig + TotalRuleConfig *Rule + BadRuleConfig *Rule + GoodRuleConfig *Rule + TargetRuleConfig *Rule } type DataSourceConfig struct { @@ -46,10 +48,11 @@ type DataSourceConfig struct { } const ( - TypeTotal = "total" - TypeBad = "bad" - TypeGood = "good" - ExprFmt = "sum(increase(%s[%s]))" + RecordPrefix = "osko" + TypeTotal = "total" + TypeBad = "bad" + TypeGood = "good" + ExprFmt = "sum(increase(%s[%s]))" ) // UpdateCondition checks if the condition of the given type is already in the slice @@ -73,7 +76,7 @@ func updateCondition(conditions []metav1.Condition, newCondition metav1.Conditio } // Filter the existing condition (if it exists) - updatedConditions := []metav1.Condition{} + var updatedConditions []metav1.Condition for _, condition := range conditions { if condition.Type != newCondition.Type { updatedConditions = append(updatedConditions, condition) @@ -82,27 +85,26 @@ func updateCondition(conditions []metav1.Condition, newCondition metav1.Conditio // Append the new condition newCondition.LastTransitionTime = metav1.NewTime(time.Now()) - updatedConditions = append(updatedConditions, newCondition) return updatedConditions } -func UpdateStatus(ctx context.Context, slo *openslov1.SLO, r client.Client, conditionType string, status metav1.ConditionStatus, reason string, message string) error { +func UpdateStatus(ctx context.Context, slo *openslov1.SLO, r client.Client, conditionType string, status metav1.ConditionStatus, message string) error { // Update the conditions based on provided arguments condition := metav1.Condition{ Type: conditionType, Status: status, - Reason: reason, + Reason: string(status), Message: message, LastTransitionTime: metav1.NewTime(time.Now()), } slo.Status.Conditions = updateCondition(slo.Status.Conditions, condition) - slo.Status.Ready = reason + slo.Status.Ready = string(status) return r.Status().Update(ctx, slo) } -func (m MetricLabelParams) NewMetricLabelCompiler(rule *monitoringv1.Rule, window string) string { +func (m MetricLabel) NewMetricLabelCompiler(rule *monitoringv1.Rule, window string) string { labelString := "" emptyRule := monitoringv1.Rule{} if !reflect.DeepEqual(rule, emptyRule) { @@ -123,7 +125,7 @@ func (m MetricLabelParams) NewMetricLabelCompiler(rule *monitoringv1.Rule, windo return labelString } -func (m MetricLabelParams) NewMetricLabelGenerator() map[string]string { +func (m MetricLabel) NewMetricLabelGenerator() map[string]string { window := string(m.Slo.Spec.TimeWindow[0].Duration) if m.TimeWindow != "" { window = m.TimeWindow @@ -136,7 +138,7 @@ func (m MetricLabelParams) NewMetricLabelGenerator() map[string]string { } } -func (c RuleConfig) getFieldsByType() (string, error) { +func (c Rule) getFieldsByType() (string, error) { switch c.RuleType { case TypeTotal: return c.Sli.Spec.RatioMetric.Total.MetricSource.Spec, nil @@ -149,8 +151,8 @@ func (c RuleConfig) getFieldsByType() (string, error) { } } -func (c RuleConfig) NewRatioRule(window string) (*monitoringv1.Rule, *monitoringv1.Rule) { - +func (c Rule) NewRatioRule(window string) (*monitoringv1.Rule, *monitoringv1.Rule) { + // field, err := c.getFieldsByType() if err != nil || field == "" { return nil, nil @@ -159,7 +161,7 @@ func (c RuleConfig) NewRatioRule(window string) (*monitoringv1.Rule, *monitoring expr := fmt.Sprintf(ExprFmt, field, window) rule := monitoringv1.Rule{ - Record: fmt.Sprintf("osko_%s", c.Record), + Record: fmt.Sprintf("%s_%s", RecordPrefix, c.Record), Expr: intstr.Parse(expr), } @@ -172,8 +174,8 @@ func (c RuleConfig) NewRatioRule(window string) (*monitoringv1.Rule, *monitoring return &rule, &supportiveRule } -func (c RuleConfig) NewSupportiveRule(baseRule monitoringv1.Rule) (rule monitoringv1.Rule) { - rule.Record = fmt.Sprintf("osko_%s", c.Record) +func (c Rule) NewSupportiveRule(baseRule monitoringv1.Rule) (rule monitoringv1.Rule) { + rule.Record = fmt.Sprintf("%s_%s", RecordPrefix, c.Record) labels := c.SupportiveRule.MetricLabelCompiler.NewMetricLabelCompiler(&baseRule, baseRule.Labels["window"]) expr := fmt.Sprintf("sum(increase(%s{%s}[%s])) by (service, sli_name, slo_name)", baseRule.Record, labels, c.SupportiveRule.TimeWindow) rule.Expr = intstr.Parse(expr) @@ -184,31 +186,53 @@ func (c RuleConfig) NewSupportiveRule(baseRule monitoringv1.Rule) (rule monitori return rule } -func (c RuleConfig) NewTargetRule() (rule monitoringv1.Rule) { - rule.Record = fmt.Sprintf("osko_%s", c.Record) +func (c Rule) NewTargetRule() (rule monitoringv1.Rule) { + rule.Record = fmt.Sprintf("%s_%s", RecordPrefix, c.Record) rule.Expr = intstr.Parse(fmt.Sprintf("vector(%s)", c.Slo.Spec.Objectives[0].Target)) + rule.Labels = c.MetricLabelCompiler.NewMetricLabelGenerator() return rule } -func (b BudgetRuleConfig) NewBudgetRule() (rule monitoringv1.Rule) { - rule.Record = fmt.Sprintf("osko_%s", b.Record) - expr := fmt.Sprintf("(1 - %s{%s}) * (%s{%s} - %s{%s})", +func (b BudgetRule) NewBudgetRule() (rule monitoringv1.Rule) { + log := ctrllog.FromContext(context.Background()) + gbRule := &Rule{} + if b.BadRuleConfig.Sli.Spec.RatioMetric.Bad.MetricSource.Spec == "" || b.BadRuleConfig.Slo.Spec.Indicator.Spec.RatioMetric.Bad.MetricSource.Spec == "" { + log.Info("Bad rule not provided, calculating bad as (total - good)") + gbRule = b.GoodRuleConfig + } else { + log.Info("Bad rule provided") + gbRule = b.BadRuleConfig + } + rule.Record = fmt.Sprintf("%s_%s", RecordPrefix, b.Record) + expr := fmt.Sprintf("(1 - %s_%s{%s}) * (%s_%s{%s} - %s_%s{%s})", + RecordPrefix, b.TargetRuleConfig.Record, b.TargetRuleConfig.MetricLabelCompiler.NewMetricLabelCompiler(nil, ""), + RecordPrefix, b.TotalRuleConfig.Record, b.TotalRuleConfig.MetricLabelCompiler.NewMetricLabelCompiler(nil, ""), - b.BadRuleConfig.Record, - b.BadRuleConfig.MetricLabelCompiler.NewMetricLabelCompiler(nil, ""), + RecordPrefix, + gbRule.Record, + gbRule.MetricLabelCompiler.NewMetricLabelCompiler(nil, ""), ) rule.Expr = intstr.Parse(expr) return rule } func (d DataSourceConfig) ParseTenantAnnotation() (tenants []string) { - if d.DataSource.Annotations["osko.dev/source-tenants"] != "" { - for _, tenant := range d.DataSource.Annotations["osko.dev/source-tenants"] { - tenants = append(tenants, string(tenant)) + if len(d.DataSource.Spec.ConnectionDetails.SourceTenants) != 0 { + for _, tenant := range d.DataSource.Spec.ConnectionDetails.SourceTenants { + tenants = append(tenants, tenant) } } return tenants } + +func ContainString(slice []string, s string) bool { + for _, v := range slice { + if v == s { + return true + } + } + return false +}