From 4e80b3de1a7eadafbb0cc14b2e492657e59eea56 Mon Sep 17 00:00:00 2001 From: Eric Weber Date: Fri, 28 Jun 2024 13:50:29 -0500 Subject: [PATCH] fix(snapshot): don't emit a warning event for normal operations Longhorn 2187 Signed-off-by: Eric Weber (cherry picked from commit d586cb714684e4de4334cdd962d4257424799513) --- constant/events.go | 5 ----- controller/snapshot_controller.go | 22 +++++++++++++++++----- 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/constant/events.go b/constant/events.go index 7240bece12..12dc2305e1 100644 --- a/constant/events.go +++ b/constant/events.go @@ -62,10 +62,5 @@ const ( EventReasonReady = "Ready" EventReasonUploaded = "Uploaded" -<<<<<<< HEAD -======= - EventReasonUpgrade = "Upgrade" - ->>>>>>> 147bdb67 (style(snapshot): use event reason constants in controller) EventReasonRolloutSkippedFmt = "RolloutSkipped: %v %v" ) diff --git a/controller/snapshot_controller.go b/controller/snapshot_controller.go index a1a79f9dbf..343d938a62 100644 --- a/controller/snapshot_controller.go +++ b/controller/snapshot_controller.go @@ -31,6 +31,10 @@ import ( longhorn "github.com/longhorn/longhorn-manager/k8s/pkg/apis/longhorn/v1beta2" ) +const ( + snapshotErrorLost = "lost track of the corresponding snapshot info inside volume engine" +) + type SnapshotController struct { *baseController @@ -466,9 +470,10 @@ func (sc *SnapshotController) reconcile(snapshotName string) (err error) { snapshotInfo, ok := engine.Status.Snapshots[snapshot.Name] if !ok { if !requestCreateNewSnapshot || alreadyCreatedBefore { - // The snapshotInfo exists inside engine.Status.Snapshots before but disappears now. - // Mark snapshotCR as lost track of the corresponding snapshotInfo - snapshot.Status.Error = "lost track of the corresponding snapshot info inside volume engine" + // The snapshotInfo existed inside engine.Status.Snapshots before but is gone now. This often doesn't + // signify an actual problem (e.g. if the snapshot is deleted by the engine process itself during a purge), + // but the snapshot controller can't reconcile the status anymore. Add a message to the CR. + snapshot.Status.Error = snapshotErrorLost } // Newly created snapshotCR, wait for the snapshotInfo to be appeared inside engine.Status.Snapshot snapshot.Status.ReadyToUse = false @@ -554,13 +559,20 @@ func (sc *SnapshotController) handleAttachmentTicketCreation(snap *longhorn.Snap func (sc *SnapshotController) generatingEventsForSnapshot(existingSnapshot, snapshot *longhorn.Snapshot) { if !existingSnapshot.Status.MarkRemoved && snapshot.Status.MarkRemoved { - sc.eventRecorder.Event(snapshot, corev1.EventTypeWarning, constant.EventReasonDelete, "snapshot is marked as removed") + sc.eventRecorder.Event(snapshot, corev1.EventTypeNormal, constant.EventReasonDelete, "snapshot is marked as removed") } if snapshot.Spec.CreateSnapshot && existingSnapshot.Status.CreationTime == "" && snapshot.Status.CreationTime != "" { sc.eventRecorder.Event(snapshot, corev1.EventTypeNormal, constant.EventReasonCreate, "successfully provisioned the snapshot") } if snapshot.Status.Error != "" && existingSnapshot.Status.Error != snapshot.Status.Error { - sc.eventRecorder.Eventf(snapshot, corev1.EventTypeWarning, constant.EventReasonFailed, "%v", snapshot.Status.Error) + if snapshot.Status.Error == snapshotErrorLost { + // There are probably scenarios when this is an actual problem, so we want to continue to emit the event. + // However, it most often occurs in scenarios like https://github.com/longhorn/longhorn/issues/4126, so we + // want to use EventTypeNormal instead of EventTypeWarning. + sc.eventRecorder.Event(snapshot, corev1.EventTypeNormal, constant.EventReasonDelete, "snapshot was removed from engine") + } else { + sc.eventRecorder.Eventf(snapshot, corev1.EventTypeWarning, constant.EventReasonFailed, "%v", snapshot.Status.Error) + } } if existingSnapshot.Status.ReadyToUse != snapshot.Status.ReadyToUse { if snapshot.Status.ReadyToUse {