From 0d83322c2a7e12108bff4ee78235f1278ddb98e2 Mon Sep 17 00:00:00 2001 From: Nir Soffer Date: Mon, 2 Dec 2024 20:13:54 +0200 Subject: [PATCH 1/3] Make kubeobjects.RequestProcessingError reusable This error is used to signal that an operation is in progress, and the caller should log the error in normal log level and try again later. This is a common concept for reconciling, and we can sue the same error in other packages. Changes: - Rename the error to OperationInProgress - Simplify to a string instead of embedding a string - update the tests to reflect the common usage with errors.Is(). Signed-off-by: Nir Soffer --- internal/controller/kubeobjects/requests.go | 13 +++++++------ internal/controller/kubeobjects/requests_test.go | 8 ++++---- internal/controller/kubeobjects/velero/requests.go | 12 ++++++------ internal/controller/vrg_kubeobjects.go | 4 ++-- 4 files changed, 19 insertions(+), 18 deletions(-) diff --git a/internal/controller/kubeobjects/requests.go b/internal/controller/kubeobjects/requests.go index f9b437037..1c5e88175 100644 --- a/internal/controller/kubeobjects/requests.go +++ b/internal/controller/kubeobjects/requests.go @@ -43,8 +43,6 @@ func RequestsMapKeyedByName(requestsStruct Requests) map[string]Request { return requests } -type RequestProcessingError struct{ string } - type CaptureSpec struct { //+optional Name string `json:"name,omitempty"` @@ -138,12 +136,15 @@ type Operation struct { InverseOp string `json:"inverseOp,omitempty"` } -func RequestProcessingErrorCreate(s string) RequestProcessingError { return RequestProcessingError{s} } -func (e RequestProcessingError) Error() string { return e.string } +// OperationInProgress error is return when an operation is in progress and we wait for the desired state. The error +// string should describe the operation for logging the error. +type OperationInProgress string + +func (e OperationInProgress) Error() string { return string(e) } // Called by errors.Is() to match target. -func (RequestProcessingError) Is(target error) bool { - _, ok := target.(RequestProcessingError) +func (OperationInProgress) Is(target error) bool { + _, ok := target.(OperationInProgress) return ok } diff --git a/internal/controller/kubeobjects/requests_test.go b/internal/controller/kubeobjects/requests_test.go index 21a1cc4ae..12fdd15f1 100644 --- a/internal/controller/kubeobjects/requests_test.go +++ b/internal/controller/kubeobjects/requests_test.go @@ -14,14 +14,14 @@ import ( var _ = Describe("kubeobjects", func() { Context("comparing errors", func() { - err := kubeobjects.RequestProcessingErrorCreate("error") - other := kubeobjects.RequestProcessingErrorCreate("other") + err := kubeobjects.OperationInProgress("error") + target := kubeobjects.OperationInProgress("") It("is not equal", func() { - Expect(other == err).To(Equal(false)) + Expect(err == target).To(Equal(false)) }) It("is same error", func() { - Expect(errors.Is(other, err)).To(Equal(true)) + Expect(errors.Is(err, target)).To(Equal(true)) }) It("is not same error", func() { Expect(errors.Is(err, errors.New("error"))).To(Equal(false)) diff --git a/internal/controller/kubeobjects/velero/requests.go b/internal/controller/kubeobjects/velero/requests.go index 919ea225c..e39dac1fa 100644 --- a/internal/controller/kubeobjects/velero/requests.go +++ b/internal/controller/kubeobjects/velero/requests.go @@ -249,11 +249,11 @@ func backupDummyStatusProcessAndRestore( velero.BackupPhaseUploading, velero.BackupPhaseUploadingPartialFailure, velero.BackupPhaseDeleting: - return nil, kubeobjects.RequestProcessingErrorCreate("backup" + string(backup.Status.Phase)) + return nil, kubeobjects.OperationInProgress("backup" + string(backup.Status.Phase)) case velero.BackupPhaseFailedValidation: return nil, errors.New("backup" + string(backup.Status.Phase)) default: - return nil, kubeobjects.RequestProcessingErrorCreate("backup.status.phase absent") + return nil, kubeobjects.OperationInProgress("backup.status.phase absent") } } @@ -283,13 +283,13 @@ func restoreStatusProcess( return nil case velero.RestorePhaseNew, velero.RestorePhaseInProgress: - return kubeobjects.RequestProcessingErrorCreate("restore" + string(restore.Status.Phase)) + return kubeobjects.OperationInProgress("restore" + string(restore.Status.Phase)) case velero.RestorePhaseFailed, velero.RestorePhaseFailedValidation, velero.RestorePhasePartiallyFailed: return errors.New("restore" + string(restore.Status.Phase)) default: - return kubeobjects.RequestProcessingErrorCreate("restore.status.phase absent") + return kubeobjects.OperationInProgress("restore.status.phase absent") } } @@ -399,13 +399,13 @@ func backupRealStatusProcess( velero.BackupPhaseUploading, velero.BackupPhaseUploadingPartialFailure, velero.BackupPhaseDeleting: - return kubeobjects.RequestProcessingErrorCreate("backup" + string(backup.Status.Phase)) + return kubeobjects.OperationInProgress("backup" + string(backup.Status.Phase)) case velero.BackupPhaseFailedValidation, velero.BackupPhasePartiallyFailed, velero.BackupPhaseFailed: return errors.New("backup" + string(backup.Status.Phase)) default: - return kubeobjects.RequestProcessingErrorCreate("backup.status.phase absent") + return kubeobjects.OperationInProgress("backup.status.phase absent") } } diff --git a/internal/controller/vrg_kubeobjects.go b/internal/controller/vrg_kubeobjects.go index 766ccb8e6..61d36614a 100644 --- a/internal/controller/vrg_kubeobjects.go +++ b/internal/controller/vrg_kubeobjects.go @@ -361,7 +361,7 @@ func (v *VRGInstance) kubeObjectsGroupCapture( continue } - if errors.Is(err, kubeobjects.RequestProcessingError{}) { + if errors.Is(err, kubeobjects.OperationInProgress("")) { log1.Info("Kube objects group capturing", "state", err.Error()) continue @@ -695,7 +695,7 @@ func (v *VRGInstance) executeRecoverGroup(result *ctrl.Result, s3StoreAccessor s } } - if errors.Is(err, kubeobjects.RequestProcessingError{}) { + if errors.Is(err, kubeobjects.OperationInProgress("")) { log1.Info("Kube objects group recovering", "state", err.Error()) return err From 43ff7faa9051b17275e634060db28fb4c47ebea3 Mon Sep 17 00:00:00 2001 From: Nir Soffer Date: Mon, 2 Dec 2024 20:30:35 +0200 Subject: [PATCH 2/3] Move OperationInProgress to util We want to use it more in ramen, to remove bugus errors for logs. To encourage use this this pattern and make it easier to use correctly, add util.IsOperationInProgress() helper. Improve the test to use the new IsOperationInProgress() and test also wrapped OperationInProgress error. Signed-off-by: Nir Soffer --- Makefile | 3 -- internal/controller/kubeobjects/requests.go | 13 -------- .../controller/kubeobjects/requests_test.go | 30 ----------------- .../controller/kubeobjects/velero/requests.go | 13 ++++---- internal/controller/util/errors.go | 24 ++++++++++++++ internal/controller/util/errors_test.go | 33 +++++++++++++++++++ internal/controller/vrg_kubeobjects.go | 4 +-- 7 files changed, 66 insertions(+), 54 deletions(-) delete mode 100644 internal/controller/kubeobjects/requests_test.go create mode 100644 internal/controller/util/errors.go create mode 100644 internal/controller/util/errors_test.go diff --git a/Makefile b/Makefile index e9bd192bc..e0a67f56d 100644 --- a/Makefile +++ b/Makefile @@ -190,9 +190,6 @@ test-util: generate manifests envtest ## Run util tests. test-util-pvc: generate manifests envtest ## Run util-pvc tests. go test ./internal/controller/util -coverprofile cover.out -ginkgo.focus PVCS_Util -test-kubeobjects: ## Run kubeobjects tests. - go test ./internal/controller/kubeobjects -coverprofile cover.out -ginkgo.focus Kubeobjects - test-drenv: ## Run drenv tests. $(MAKE) -C test diff --git a/internal/controller/kubeobjects/requests.go b/internal/controller/kubeobjects/requests.go index 1c5e88175..73b5f845c 100644 --- a/internal/controller/kubeobjects/requests.go +++ b/internal/controller/kubeobjects/requests.go @@ -136,19 +136,6 @@ type Operation struct { InverseOp string `json:"inverseOp,omitempty"` } -// OperationInProgress error is return when an operation is in progress and we wait for the desired state. The error -// string should describe the operation for logging the error. -type OperationInProgress string - -func (e OperationInProgress) Error() string { return string(e) } - -// Called by errors.Is() to match target. -func (OperationInProgress) Is(target error) bool { - _, ok := target.(OperationInProgress) - - return ok -} - type RequestsManager interface { ProtectsPath() string RecoversPath() string diff --git a/internal/controller/kubeobjects/requests_test.go b/internal/controller/kubeobjects/requests_test.go deleted file mode 100644 index 12fdd15f1..000000000 --- a/internal/controller/kubeobjects/requests_test.go +++ /dev/null @@ -1,30 +0,0 @@ -// SPDX-FileCopyrightText: The RamenDR authors -// SPDX-License-Identifier: Apache-2.0 - -package kubeobjects_test - -import ( - "errors" - - . "github.com/onsi/ginkgo/v2" - . "github.com/onsi/gomega" - - "github.com/ramendr/ramen/internal/controller/kubeobjects" -) - -var _ = Describe("kubeobjects", func() { - Context("comparing errors", func() { - err := kubeobjects.OperationInProgress("error") - target := kubeobjects.OperationInProgress("") - - It("is not equal", func() { - Expect(err == target).To(Equal(false)) - }) - It("is same error", func() { - Expect(errors.Is(err, target)).To(Equal(true)) - }) - It("is not same error", func() { - Expect(errors.Is(err, errors.New("error"))).To(Equal(false)) - }) - }) -}) diff --git a/internal/controller/kubeobjects/velero/requests.go b/internal/controller/kubeobjects/velero/requests.go index e39dac1fa..c15750bc8 100644 --- a/internal/controller/kubeobjects/velero/requests.go +++ b/internal/controller/kubeobjects/velero/requests.go @@ -17,6 +17,7 @@ import ( "github.com/go-logr/logr" "github.com/ramendr/ramen/internal/controller/kubeobjects" + "github.com/ramendr/ramen/internal/controller/util" velero "github.com/vmware-tanzu/velero/pkg/apis/velero/v1" corev1 "k8s.io/api/core/v1" k8serrors "k8s.io/apimachinery/pkg/api/errors" @@ -249,11 +250,11 @@ func backupDummyStatusProcessAndRestore( velero.BackupPhaseUploading, velero.BackupPhaseUploadingPartialFailure, velero.BackupPhaseDeleting: - return nil, kubeobjects.OperationInProgress("backup" + string(backup.Status.Phase)) + return nil, util.OperationInProgress("backup" + string(backup.Status.Phase)) case velero.BackupPhaseFailedValidation: return nil, errors.New("backup" + string(backup.Status.Phase)) default: - return nil, kubeobjects.OperationInProgress("backup.status.phase absent") + return nil, util.OperationInProgress("backup.status.phase absent") } } @@ -283,13 +284,13 @@ func restoreStatusProcess( return nil case velero.RestorePhaseNew, velero.RestorePhaseInProgress: - return kubeobjects.OperationInProgress("restore" + string(restore.Status.Phase)) + return util.OperationInProgress("restore" + string(restore.Status.Phase)) case velero.RestorePhaseFailed, velero.RestorePhaseFailedValidation, velero.RestorePhasePartiallyFailed: return errors.New("restore" + string(restore.Status.Phase)) default: - return kubeobjects.OperationInProgress("restore.status.phase absent") + return util.OperationInProgress("restore.status.phase absent") } } @@ -399,13 +400,13 @@ func backupRealStatusProcess( velero.BackupPhaseUploading, velero.BackupPhaseUploadingPartialFailure, velero.BackupPhaseDeleting: - return kubeobjects.OperationInProgress("backup" + string(backup.Status.Phase)) + return util.OperationInProgress("backup" + string(backup.Status.Phase)) case velero.BackupPhaseFailedValidation, velero.BackupPhasePartiallyFailed, velero.BackupPhaseFailed: return errors.New("backup" + string(backup.Status.Phase)) default: - return kubeobjects.OperationInProgress("backup.status.phase absent") + return util.OperationInProgress("backup.status.phase absent") } } diff --git a/internal/controller/util/errors.go b/internal/controller/util/errors.go new file mode 100644 index 000000000..a751bff18 --- /dev/null +++ b/internal/controller/util/errors.go @@ -0,0 +1,24 @@ +// SPDX-FileCopyrightText: The RamenDR authors +// SPDX-License-Identifier: Apache-2.0package util + +package util + +import "errors" + +// OperationInProgress error is return when an operation is in progress and we wait for the desired state. The error +// string should describe the operation for logging the error. +type OperationInProgress string + +func (e OperationInProgress) Error() string { return string(e) } + +// Called by errors.Is() to match target. +func (OperationInProgress) Is(target error) bool { + _, ok := target.(OperationInProgress) + + return ok +} + +// IsOperationInProgress returns true if err or error wrapped by it is an OperationInProgress error. +func IsOperationInProgress(err error) bool { + return errors.Is(err, OperationInProgress("")) +} diff --git a/internal/controller/util/errors_test.go b/internal/controller/util/errors_test.go new file mode 100644 index 000000000..3274daa6b --- /dev/null +++ b/internal/controller/util/errors_test.go @@ -0,0 +1,33 @@ +// SPDX-FileCopyrightText: The RamenDR authors +// SPDX-License-Identifier: Apache-2.0 + +package util_test + +import ( + "errors" + "fmt" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + "github.com/ramendr/ramen/internal/controller/util" +) + +var _ = Describe("OperationInProgress", func() { + Context("comparing errors", func() { + err := util.OperationInProgress("this operation") + + It("is not equal", func() { + Expect(err == util.OperationInProgress("other operation")).To(Equal(false)) + }) + It("match error", func() { + Expect(util.IsOperationInProgress(err)).To(Equal(true)) + }) + It("match wrapped error", func() { + wrapped := fmt.Errorf("wrapping operation in progress: %w", err) + Expect(util.IsOperationInProgress(wrapped)).To(Equal(true)) + }) + It("does not match other errors", func() { + Expect(util.IsOperationInProgress(errors.New("other error"))).To(Equal(false)) + }) + }) +}) diff --git a/internal/controller/vrg_kubeobjects.go b/internal/controller/vrg_kubeobjects.go index 61d36614a..e5c337f77 100644 --- a/internal/controller/vrg_kubeobjects.go +++ b/internal/controller/vrg_kubeobjects.go @@ -361,7 +361,7 @@ func (v *VRGInstance) kubeObjectsGroupCapture( continue } - if errors.Is(err, kubeobjects.OperationInProgress("")) { + if util.IsOperationInProgress(err) { log1.Info("Kube objects group capturing", "state", err.Error()) continue @@ -695,7 +695,7 @@ func (v *VRGInstance) executeRecoverGroup(result *ctrl.Result, s3StoreAccessor s } } - if errors.Is(err, kubeobjects.OperationInProgress("")) { + if util.IsOperationInProgress(err) { log1.Info("Kube objects group recovering", "state", err.Error()) return err From ed177134fd5e5701abda0742668dc6d0f5b2aff2 Mon Sep 17 00:00:00 2001 From: Nir Soffer Date: Sun, 1 Dec 2024 23:49:22 +0200 Subject: [PATCH 3/3] Avoid bogus errors during deletion When deleting the DRPC we may need to adopt the VRG, delete the secondary VRG, wait until the secondary VRG is deleted, delete the primary VRG, and wait until the primary VRG is deleted. This takes 60-90 seconds and many reconciles (18 seen in e2e test), and creates huge amount of noise in the log. Suppress the noise using util.OperationInProgress error. When the reconcile is successful but it is still in progress, we return a util.OperationInProgress error describing the current progression. The top level error handler logs an INFO message and requeue the request. With this change we will see multiple logs for the secondary VRG: INFO Deleting DRPC in progress {"reason", "secondary VRG deletion in progress"} ... And finally more logs for the primary VRG: INFO Deleting DRPC in progress {"reason", "primary VRG deletion in progress"} ... Notes: - We logged errors during finalizeDRPC twice; once as INFO log, and once as ERROR with a stacktrace when we return error from the reconcile. Remove the duplicate INFO log. - The linter is not happy about the new nested if. We can avoid this by extracting a helper to handle finalize errors, but I want to keep the change minimal for easy backport. We can improve this later upstream. Signed-off-by: Nir Soffer --- .../drplacementcontrol_controller.go | 20 +++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/internal/controller/drplacementcontrol_controller.go b/internal/controller/drplacementcontrol_controller.go index f3399490a..3cf2a730f 100644 --- a/internal/controller/drplacementcontrol_controller.go +++ b/internal/controller/drplacementcontrol_controller.go @@ -119,7 +119,7 @@ func (r *DRPlacementControlReconciler) SetupWithManager(mgr ctrl.Manager) error // For more details, check Reconcile and its Result here: // - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.7.0/pkg/reconcile // -//nolint:funlen,gocognit,gocyclo,cyclop +//nolint:funlen,gocognit,gocyclo,cyclop,nestif func (r *DRPlacementControlReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { logger := r.Log.WithValues("DRPC", req.NamespacedName, "rid", uuid.New()) @@ -166,13 +166,21 @@ func (r *DRPlacementControlReconciler) Reconcile(ctx context.Context, req ctrl.R // then the DRPC should be deleted as well. The least we should do here is to clean up DPRC. err := r.processDeletion(ctx, drpc, placementObj, logger) if err != nil { - logger.Info(fmt.Sprintf("Error in deleting DRPC: (%v)", err)) - statusErr := r.setDeletionStatusAndUpdate(ctx, drpc) if statusErr != nil { err = fmt.Errorf("drpc deletion failed: %w and status update failed: %w", err, statusErr) + + return ctrl.Result{}, err + } + + // Is this an expected condition? + if rmnutil.IsOperationInProgress(err) { + logger.Info("Deleting DRPC in progress", "reason", err) + + return ctrl.Result{Requeue: true}, nil } + // Unexpected error. return ctrl.Result{}, err } @@ -736,7 +744,7 @@ func (r *DRPlacementControlReconciler) cleanupVRGs( } if len(vrgs) != 0 { - return fmt.Errorf("waiting for VRGs count to go to zero") + return rmnutil.OperationInProgress("waiting for VRGs count to go to zero") } // delete MCVs @@ -761,7 +769,7 @@ func (r *DRPlacementControlReconciler) ensureVRGsDeleted( for cluster, vrg := range vrgs { if vrg.Spec.ReplicationState == replicationState { if !ensureVRGsManagedByDRPC(r.Log, mwu, vrgs, drpc, vrgNamespace) { - return fmt.Errorf("%s VRG adoption in progress", replicationState) + return rmnutil.OperationInProgress(fmt.Sprintf("%s VRG adoption in progress", replicationState)) } if err := mwu.DeleteManifestWork(mwu.BuildManifestWorkName(rmnutil.MWTypeVRG), cluster); err != nil { @@ -773,7 +781,7 @@ func (r *DRPlacementControlReconciler) ensureVRGsDeleted( } if inProgress { - return fmt.Errorf("%s VRG manifestwork deletion in progress", replicationState) + return rmnutil.OperationInProgress(fmt.Sprintf("%s VRG manifestwork deletion in progress", replicationState)) } return nil