Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve alert updates #191

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 20 additions & 8 deletions controllers/alertmanagerconfig_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package controllers
import (
"context"
"fmt"
"reflect"
"regexp"
"time"

Expand Down Expand Up @@ -237,16 +238,25 @@ func (r *AlertmanagerConfigReconciler) linkCxAlertToCxIntegrations(ctx context.C
}

matchedReceiversMap := matchedRoutesToMatchedReceiversMap(matchRoutes, config.Spec.Receivers)
alert.Spec.NotificationGroups, err = generateNotificationGroupFromRoutes(matchRoutes, matchedReceiversMap)
notificationGroups, err := generateNotificationGroupFromRoutes(matchRoutes, matchedReceiversMap)
if err != nil {
succeed = false
log.Error(err, "Received an error while trying to generate NotificationGroup from routes")
continue
}
if err = r.Update(ctx, &alert); err != nil {
succeed = false
log.Error(err, "Received an error while trying to update Alert CRD from AlertmanagerConfig")
continue

if !reflect.DeepEqual(alert.Spec.NotificationGroups, notificationGroups) {
if err = r.Client.Get(ctx, client.ObjectKey{Namespace: alert.Namespace, Name: alert.Name}, &alert); err != nil {
succeed = false
log.Error(err, "Received an error while trying to get Alert CRD from AlertmanagerConfig")
}

alert.Spec.NotificationGroups = notificationGroups
if err = r.Update(ctx, &alert); err != nil {
succeed = false
log.Error(err, "Received an error while trying to update Alert CRD from AlertmanagerConfig")
continue
}
}
}

Expand All @@ -260,9 +270,11 @@ func (r *AlertmanagerConfigReconciler) deleteWebhooksFromRelatedAlerts(ctx conte
}

for _, alert := range alerts.Items {
alert.Spec.NotificationGroups = nil
if err := r.Update(ctx, &alert); err != nil {
return fmt.Errorf("received an error while trying to update Alert CRD from AlertmanagerConfig: %w", err)
if alert.Spec.NotificationGroups != nil {
alert.Spec.NotificationGroups = nil
if err := r.Update(ctx, &alert); err != nil {
return fmt.Errorf("received an error while trying to update Alert CRD from AlertmanagerConfig: %w", err)
}
}
}

Expand Down
82 changes: 20 additions & 62 deletions controllers/alphacontrollers/alert_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ package alphacontrollers

import (
"context"
stdErr "errors"
"fmt"

"github.com/go-logr/logr"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"
Expand Down Expand Up @@ -106,9 +106,7 @@ func (r *AlertReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl
return ctrl.Result{}, nil
}

func (r *AlertReconciler) update(ctx context.Context,
log logr.Logger,
alert *coralogixv1alpha1.Alert) error {
func (r *AlertReconciler) update(ctx context.Context, log logr.Logger, alert *coralogixv1alpha1.Alert) error {
alertRequest, err := alert.Spec.ExtractUpdateAlertRequest(ctx, log, *alert.Status.ID)
if err != nil {
return fmt.Errorf("error to parse alert request: %w", err)
Expand All @@ -123,33 +121,15 @@ func (r *AlertReconciler) update(ctx context.Context,
if err = r.Status().Update(ctx, alert); err != nil {
return fmt.Errorf("error on updating alert status: %w", err)
}
return fmt.Errorf("alert not found on remote, recreating it: %w", err)
return fmt.Errorf("alert not found on remote: %w", err)
}
return fmt.Errorf("error on updating alert: %w", err)
}
log.V(1).Info("Remote alert updated", "alert", protojson.Format(remoteUpdatedAlert))

status, err := getStatus(ctx, log, remoteUpdatedAlert.GetAlert(), alert.Spec)
if err != nil {
return fmt.Errorf("error on getting status: %w", err)
}

if err = r.Get(ctx, client.ObjectKeyFromObject(alert), alert); err != nil {
return fmt.Errorf("error on getting alert: %w", err)
}
alert.Status = status

if err = r.Status().Update(ctx, alert); err != nil {
return fmt.Errorf("error on updating alert status: %w", err)
}

return nil
}

func (r *AlertReconciler) delete(ctx context.Context,
log logr.Logger,
alert *coralogixv1alpha1.Alert) error {

func (r *AlertReconciler) delete(ctx context.Context, log logr.Logger, alert *coralogixv1alpha1.Alert) error {
log.V(1).Info("Deleting remote alert", "alert", *alert.Status.ID)
_, err := r.CoralogixClientSet.Alerts().DeleteAlert(ctx, &alerts.DeleteAlertByUniqueIdRequest{
Id: wrapperspb.String(*alert.Status.ID),
Expand All @@ -167,23 +147,7 @@ func (r *AlertReconciler) delete(ctx context.Context,
return nil
}

func (r *AlertReconciler) create(
ctx context.Context,
log logr.Logger,
alert *coralogixv1alpha1.Alert) error {

if alert.Spec.Labels == nil {
alert.Spec.Labels = make(map[string]string)
}

if value, ok := alert.Spec.Labels["managed-by"]; !ok || value == "" {
alert.Spec.Labels["managed-by"] = "coralogix-operator"
}

if err := r.Update(ctx, alert); err != nil {
return fmt.Errorf("error on updating alert: %w", err)
}

func (r *AlertReconciler) create(ctx context.Context, log logr.Logger, alert *coralogixv1alpha1.Alert) error {
alertRequest, err := alert.ExtractCreateAlertRequest(ctx, log)
if err != nil {
return fmt.Errorf("error to parse alert request: %w", err)
Expand All @@ -201,38 +165,32 @@ func (r *AlertReconciler) create(
}

alert.Status.ID = pointer.String(response.GetAlert().GetUniqueIdentifier().GetValue())
if err = r.Update(ctx, alert); err != nil {
return fmt.Errorf("error on updating alert: %w", err)
if err = r.Status().Update(ctx, alert); err != nil {
return fmt.Errorf("error on updating alert status: %w", err)
}

if alert.Status, err = getStatus(ctx, log, response.GetAlert(), alert.Spec); err != nil {
return fmt.Errorf("error on getting status: %w", err)
updated := false
if alert.Spec.Labels == nil {
alert.Spec.Labels = make(map[string]string)
}
if err = r.Status().Update(ctx, alert); err != nil {
return fmt.Errorf("error on updating alert status: %w", err)

if value, ok := alert.Spec.Labels["managed-by"]; !ok || value == "" {
alert.Spec.Labels["managed-by"] = "coralogix-operator"
updated = true
}

if !controllerutil.ContainsFinalizer(alert, alertFinalizerName) {
controllerutil.AddFinalizer(alert, alertFinalizerName)
}
if err = r.Client.Update(ctx, alert); err != nil {
return fmt.Errorf("error on updating alert: %w", err)
updated = true
}

return nil
}

func getStatus(ctx context.Context, log logr.Logger, actualAlert *alerts.Alert, spec coralogixv1alpha1.AlertSpec) (coralogixv1alpha1.AlertStatus, error) {
if actualAlert == nil {
return coralogixv1alpha1.AlertStatus{}, stdErr.New("alert is nil")
if updated {
if err = r.Client.Update(ctx, alert); err != nil {
return fmt.Errorf("error on updating alert: %w", err)
}
}

var status coralogixv1alpha1.AlertStatus
var err error

status.ID = utils.WrapperspbStringToStringPointer(actualAlert.GetUniqueIdentifier())

return status, err
return nil
}

// SetupWithManager sets up the controller with the Manager.
Expand Down
13 changes: 2 additions & 11 deletions controllers/alphacontrollers/alert_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ import (
"k8s.io/utils/pointer"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/log"
"sigs.k8s.io/controller-runtime/pkg/log/zap"

cxsdk "github.com/coralogix/coralogix-management-sdk/go"
Expand Down Expand Up @@ -761,14 +760,7 @@ func TestFlattenAlerts(t *testing.T) {
},
}

spec := coralogixv1alpha1.AlertSpec{
Scheduling: &coralogixv1alpha1.Scheduling{
TimeZone: coralogixv1alpha1.TimeZone("UTC+02"),
},
}

ctx := context.Background()
log := log.FromContext(ctx)

controller := gomock.NewController(t)
defer controller.Finish()
Expand All @@ -777,12 +769,11 @@ func TestFlattenAlerts(t *testing.T) {
webhookMock.EXPECT().List(ctx, gomock.Any()).Return(&cxsdk.ListAllOutgoingWebhooksResponse{}, nil).AnyTimes()
coralogixv1alpha1.WebhooksClient = webhookMock

status, err := getStatus(ctx, log, alert, spec)
assert.NoError(t, err)
alertStatus := coralogixv1alpha1.AlertStatus{ID: utils.WrapperspbStringToStringPointer(alert.GetUniqueIdentifier())}

expected := &coralogixv1alpha1.AlertStatus{
ID: pointer.String("id1"),
}

assert.EqualValues(t, expected, &status)
assert.EqualValues(t, expected, &alertStatus)
}
2 changes: 1 addition & 1 deletion controllers/clientset/recording-rules-groups-client.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,4 @@ type RecordingRulesGroupsClientInterface interface {
Get(ctx context.Context, req *cxsdk.GetRuleGroupSetRequest) (*cxsdk.GetRuleGroupSetResponse, error)
Update(ctx context.Context, req *cxsdk.UpdateRuleGroupSetRequest) (*emptypb.Empty, error)
Delete(ctx context.Context, req *cxsdk.DeleteRuleGroupSetRequest) (*emptypb.Empty, error)
}
}
82 changes: 45 additions & 37 deletions controllers/prometheusrule_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,14 @@ package controllers
import (
"context"
"fmt"
"reflect"
"strconv"
"strings"
"time"

coralogixv1alpha1 "github.com/coralogix/coralogix-operator/apis/coralogix/v1alpha1"
"github.com/coralogix/coralogix-operator/controllers/clientset"
"github.com/go-logr/logr"
"go.uber.org/zap/zapcore"

prometheus "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
"go.uber.org/zap/zapcore"
"k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
Expand All @@ -25,6 +23,9 @@ import (
"sigs.k8s.io/controller-runtime/pkg/log"
"sigs.k8s.io/controller-runtime/pkg/predicate"
"sigs.k8s.io/controller-runtime/pkg/reconcile"

coralogixv1alpha1 "github.com/coralogix/coralogix-operator/apis/coralogix/v1alpha1"
"github.com/coralogix/coralogix-operator/controllers/clientset"
)

const (
Expand Down Expand Up @@ -85,22 +86,14 @@ func (r *PrometheusRuleReconciler) Reconcile(ctx context.Context, req ctrl.Reque
func (r *PrometheusRuleReconciler) convertPrometheusRuleRecordingRuleToCxRecordingRule(ctx context.Context, log logr.Logger, prometheusRule *prometheus.PrometheusRule, req reconcile.Request) error {
recordingRuleGroupSetSpec := prometheusRuleToRecordingRuleToRuleGroupSet(log, prometheusRule)
if len(recordingRuleGroupSetSpec.Groups) == 0 {
log.V(int(zapcore.DebugLevel)).Info("No recording rules found in PrometheusRule")
return nil
}

recordingRuleGroupSet := &coralogixv1alpha1.RecordingRuleGroupSet{
ObjectMeta: metav1.ObjectMeta{
Namespace: prometheusRule.Namespace,
Name: prometheusRule.Name,
OwnerReferences: []metav1.OwnerReference{
{
APIVersion: prometheusRule.APIVersion,
Kind: prometheusRule.Kind,
Name: prometheusRule.Name,
UID: prometheusRule.UID,
},
},
Namespace: prometheusRule.Namespace,
Name: prometheusRule.Name,
OwnerReferences: []metav1.OwnerReference{getOwnerReference(prometheusRule)},
},
Spec: recordingRuleGroupSetSpec,
}
Expand Down Expand Up @@ -176,14 +169,7 @@ func (r *PrometheusRuleReconciler) convertPrometheusRuleAlertToCxAlert(ctx conte
alertCRD.Spec = prometheusRuleToCoralogixAlertSpec(rule)
alertCRD.Namespace = prometheusRule.Namespace
alertCRD.Name = alertCRDName
alertCRD.OwnerReferences = []metav1.OwnerReference{
{
APIVersion: prometheusRule.APIVersion,
Kind: prometheusRule.Kind,
Name: prometheusRule.Name,
UID: prometheusRule.UID,
},
}
alertCRD.OwnerReferences = []metav1.OwnerReference{getOwnerReference(prometheusRule)}
alertCRD.Labels = map[string]string{"app.kubernetes.io/managed-by": prometheusRule.Name}
if val, ok := prometheusRule.Labels["app.coralogix.com/managed-by-alertmanger-config"]; ok {
alertCRD.Labels["app.coralogix.com/managed-by-alertmanger-config"] = val
Expand All @@ -197,21 +183,32 @@ func (r *PrometheusRuleReconciler) convertPrometheusRuleAlertToCxAlert(ctx conte
}
}

//Converting the PrometheusRule to the desired Alert.
alertCRD.Spec = prometheusRuleToCoralogixAlertSpec(rule)
alertCRD.OwnerReferences = []metav1.OwnerReference{
{
APIVersion: prometheusRule.APIVersion,
Kind: prometheusRule.Kind,
Name: prometheusRule.Name,
UID: prometheusRule.UID,
},
updated := false
desiredSpec := prometheusRuleToCoralogixAlertSpec(rule)
// We keep NotificationGroups on update, to not override AlertmanagerConfig controller settings
desiredSpec.NotificationGroups = alertCRD.Spec.NotificationGroups
if !reflect.DeepEqual(alertCRD.Spec, desiredSpec) {
desiredSpec.DeepCopyInto(&alertCRD.Spec)
updated = true
}
if val, ok := prometheusRule.Labels["app.coralogix.com/managed-by-alertmanger-config"]; ok {
alertCRD.Labels["app.coralogix.com/managed-by-alertmanger-config"] = val

desiredOwnerReferences := []metav1.OwnerReference{getOwnerReference(prometheusRule)}
if !reflect.DeepEqual(alertCRD.OwnerReferences, desiredOwnerReferences) {
alertCRD.OwnerReferences = desiredOwnerReferences
updated = true
}
if err := r.Update(ctx, alertCRD); err != nil {
return fmt.Errorf("received an error while trying to update Alert CRD: %w", err)

if promRuleVal, ok := prometheusRule.Labels["app.coralogix.com/managed-by-alertmanger-config"]; ok {
if alertVal, ok := alertCRD.Labels["app.coralogix.com/managed-by-alertmanger-config"]; !ok || alertVal != promRuleVal {
alertCRD.Labels["app.coralogix.com/managed-by-alertmanger-config"] = promRuleVal
updated = true
}
}

if updated {
if err := r.Update(ctx, alertCRD); err != nil {
return fmt.Errorf("received an error while trying to update Alert CRD: %w", err)
}
}
}
}
Expand Down Expand Up @@ -298,7 +295,7 @@ func prometheusInnerRulesToCoralogixInnerRules(rules []prometheus.Rule) []coralo
}

func prometheusRuleToCoralogixAlertSpec(rule prometheus.Rule) coralogixv1alpha1.AlertSpec {
return coralogixv1alpha1.AlertSpec{
alertSpec := coralogixv1alpha1.AlertSpec{
Description: rule.Annotations["description"],
Severity: getSeverity(rule),
NotificationGroups: []coralogixv1alpha1.NotificationGroup{
Expand Down Expand Up @@ -328,6 +325,8 @@ func prometheusRuleToCoralogixAlertSpec(rule prometheus.Rule) coralogixv1alpha1.
},
Labels: rule.Labels,
}

return alertSpec
}

func getSeverity(rule prometheus.Rule) coralogixv1alpha1.AlertSeverity {
Expand Down Expand Up @@ -388,6 +387,15 @@ var prometheusAlertForToCoralogixPromqlAlertTimeWindow = map[prometheus.Duration
"24h": coralogixv1alpha1.MetricTimeWindow(coralogixv1alpha1.TimeWindowTwentyFourHours),
}

func getOwnerReference(promRule *prometheus.PrometheusRule) metav1.OwnerReference {
return metav1.OwnerReference{
APIVersion: promRule.APIVersion,
Kind: promRule.Kind,
Name: promRule.Name,
UID: promRule.UID,
}
}

// SetupWithManager sets up the controller with the Manager.
func (r *PrometheusRuleReconciler) SetupWithManager(mgr ctrl.Manager) error {
shouldTrackPrometheusRules := func(labels map[string]string) bool {
Expand Down
2 changes: 1 addition & 1 deletion kuttl-test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@ testDirs:
- tests/integration/prometheusrules
- tests/integration/alertmangerconfigs
namespace: default
timeout: 60
timeout: 360
Loading
Loading