From 2d6d2cc2258a73c5f3df73a39aa876a54f89d6cf Mon Sep 17 00:00:00 2001 From: Nir Soffer Date: Sun, 1 Dec 2024 23:49:22 +0200 Subject: [PATCH] Avoid bogus errors during deletion When deleting the DRPC we may need to adopt the VRG, delete the secondary VRG, wait until the secondary VRG is deleted, delete the primary VRG, and wait until the primary VRG is deleted. This takes 60-90 seconds and many reconciles (18 seen in e2e test), and creates huge amount of noise in the log. Suppress the noise using util.OperationInProgress error. When the reconcile is successful but it is still in progress, we return a util.OperationInProgress error describing the current progression. The top level error handler logs an INFO message and requeue the request. With this change we will see multiple logs for the secondary VRG: INFO Deleting DRPC in progress {"reason", "secondary VRG deletion in progress"} ... And finally more logs for the primary VRG: INFO Deleting DRPC in progress {"reason", "primary VRG deletion in progress"} ... Notes: - We logged errors during finalizeDRPC twice; once as INFO log, and once as ERROR with a stacktrace when we return error from the reconcile. Remove the duplicate INFO log. - The linter is not happy about the new nested if. We can avoid this by extracting a helper to handle finalize errors, but I want to keep the change minimal for easy backport. We can improve this later upstream. Signed-off-by: Nir Soffer --- .../drplacementcontrol_controller.go | 20 +++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/internal/controller/drplacementcontrol_controller.go b/internal/controller/drplacementcontrol_controller.go index 0dc142dbd..c28cb56cf 100644 --- a/internal/controller/drplacementcontrol_controller.go +++ b/internal/controller/drplacementcontrol_controller.go @@ -119,7 +119,7 @@ func (r *DRPlacementControlReconciler) SetupWithManager(mgr ctrl.Manager) error // For more details, check Reconcile and its Result here: // - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.7.0/pkg/reconcile // -//nolint:funlen,gocognit,gocyclo,cyclop +//nolint:funlen,gocognit,gocyclo,cyclop,nestif func (r *DRPlacementControlReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { logger := r.Log.WithValues("DRPC", req.NamespacedName, "rid", uuid.New()) @@ -166,13 +166,21 @@ func (r *DRPlacementControlReconciler) Reconcile(ctx context.Context, req ctrl.R // then the DRPC should be deleted as well. The least we should do here is to clean up DPRC. err := r.processDeletion(ctx, drpc, placementObj, logger) if err != nil { - logger.Info(fmt.Sprintf("Error in deleting DRPC: (%v)", err)) - statusErr := r.setDeletionStatusAndUpdate(ctx, drpc) if statusErr != nil { err = fmt.Errorf("drpc deletion failed: %w and status update failed: %w", err, statusErr) + + return ctrl.Result{}, err + } + + // Is this an expected condition? + if rmnutil.IsOperationInProgress(err) { + logger.Info("Deleting DRPC in progress", "reason", err) + + return ctrl.Result{Requeue: true}, nil } + // Unexpected error. return ctrl.Result{}, err } @@ -736,7 +744,7 @@ func (r *DRPlacementControlReconciler) cleanupVRGs( } if len(vrgs) != 0 { - return fmt.Errorf("waiting for VRGs count to go to zero") + return rmnutil.OperationInProgress("waiting for VRGs count to go to zero") } // delete MCVs @@ -761,7 +769,7 @@ func (r *DRPlacementControlReconciler) ensureVRGsDeleted( for cluster, vrg := range vrgs { if vrg.Spec.ReplicationState == replicationState { if !ensureVRGsManagedByDRPC(r.Log, mwu, vrgs, drpc, vrgNamespace) { - return fmt.Errorf("%s VRG adoption in progress", replicationState) + return rmnutil.OperationInProgress(fmt.Sprintf("%s VRG adoption in progress", replicationState)) } if err := mwu.DeleteManifestWork(mwu.BuildManifestWorkName(rmnutil.MWTypeVRG), cluster); err != nil { @@ -773,7 +781,7 @@ func (r *DRPlacementControlReconciler) ensureVRGsDeleted( } if inProgress { - return fmt.Errorf("%s VRG manifestwork deletion in progress", replicationState) + return rmnutil.OperationInProgress(fmt.Sprintf("%s VRG manifestwork deletion in progress", replicationState)) } return nil