From 09771d4ce208ba6953e9fdd859afd5bd5a205544 Mon Sep 17 00:00:00 2001 From: changzhen Date: Mon, 19 Aug 2024 16:34:50 +0800 Subject: [PATCH] fix error of when cluster status condition update Signed-off-by: changzhen --- .../status/cluster_status_controller.go | 65 ++++++++++++++----- .../status/cluster_status_controller_test.go | 20 ++---- 2 files changed, 53 insertions(+), 32 deletions(-) diff --git a/pkg/controllers/status/cluster_status_controller.go b/pkg/controllers/status/cluster_status_controller.go index 85858384d0e2..90627eb562d9 100644 --- a/pkg/controllers/status/cluster_status_controller.go +++ b/pkg/controllers/status/cluster_status_controller.go @@ -153,7 +153,11 @@ func (c *ClusterStatusController) Reconcile(ctx context.Context, req controllerr return controllerruntime.Result{Requeue: true}, nil } - return c.syncClusterStatus(cluster) + err := c.syncClusterStatus(cluster) + if err != nil { + return controllerruntime.Result{}, err + } + return controllerruntime.Result{RequeueAfter: c.ClusterStatusUpdateFrequency.Duration}, nil } // SetupWithManager creates a controller and register to controller manager. @@ -169,7 +173,7 @@ func (c *ClusterStatusController) SetupWithManager(mgr controllerruntime.Manager }).Complete(c) } -func (c *ClusterStatusController) syncClusterStatus(cluster *clusterv1alpha1.Cluster) (controllerruntime.Result, error) { +func (c *ClusterStatusController) syncClusterStatus(cluster *clusterv1alpha1.Cluster) error { start := time.Now() defer func() { metrics.RecordClusterStatus(cluster) @@ -182,7 +186,7 @@ func (c *ClusterStatusController) syncClusterStatus(cluster *clusterv1alpha1.Clu clusterClient, err := c.ClusterClientSetFunc(cluster.Name, c.Client, c.ClusterClientOption) if err != nil { klog.Errorf("Failed to create a ClusterClient for the given member cluster: %v, err is : %v", cluster.Name, err) - return c.setStatusCollectionFailedCondition(cluster, currentClusterStatus, fmt.Sprintf("failed to create a ClusterClient: %v", err)) + return setStatusCollectionFailedCondition(c.Client, cluster, fmt.Sprintf("failed to create a ClusterClient: %v", err)) } online, healthy := getClusterHealthStatus(clusterClient) @@ -193,8 +197,7 @@ func (c *ClusterStatusController) syncClusterStatus(cluster *clusterv1alpha1.Clu if !online && readyCondition.Status != metav1.ConditionTrue { klog.V(2).Infof("Cluster(%s) still offline after %s, ensuring offline is set.", cluster.Name, c.ClusterFailureThreshold.Duration) - meta.SetStatusCondition(¤tClusterStatus.Conditions, *readyCondition) - return c.updateStatusIfNeeded(cluster, currentClusterStatus) + return updateStatusCondition(c.Client, cluster, *readyCondition) } // skip collecting cluster status if not ready @@ -211,15 +214,13 @@ func (c *ClusterStatusController) syncClusterStatus(cluster *clusterv1alpha1.Clu // can be safely removed from current controller. c.initializeGenericInformerManagerForCluster(clusterClient) - err := c.setCurrentClusterStatus(clusterClient, cluster, ¤tClusterStatus) + err = c.setCurrentClusterStatus(clusterClient, cluster, ¤tClusterStatus) if err != nil { - return controllerruntime.Result{Requeue: true}, err + return err } } - meta.SetStatusCondition(¤tClusterStatus.Conditions, *readyCondition) - - return c.updateStatusIfNeeded(cluster, currentClusterStatus) + return c.updateStatusIfNeeded(cluster, currentClusterStatus, *readyCondition) } func (c *ClusterStatusController) setCurrentClusterStatus(clusterClient *util.ClusterClient, cluster *clusterv1alpha1.Cluster, currentClusterStatus *clusterv1alpha1.ClusterStatus) error { @@ -266,22 +267,26 @@ func (c *ClusterStatusController) setCurrentClusterStatus(clusterClient *util.Cl return nil } -func (c *ClusterStatusController) setStatusCollectionFailedCondition(cluster *clusterv1alpha1.Cluster, currentClusterStatus clusterv1alpha1.ClusterStatus, message string) (controllerruntime.Result, error) { +func setStatusCollectionFailedCondition(c client.Client, cluster *clusterv1alpha1.Cluster, message string) error { readyCondition := util.NewCondition(clusterv1alpha1.ClusterConditionReady, statusCollectionFailed, message, metav1.ConditionFalse) - meta.SetStatusCondition(¤tClusterStatus.Conditions, readyCondition) - return c.updateStatusIfNeeded(cluster, currentClusterStatus) + return updateStatusCondition(c, cluster, readyCondition) } // updateStatusIfNeeded calls updateStatus only if the status of the member cluster is not the same as the old status -func (c *ClusterStatusController) updateStatusIfNeeded(cluster *clusterv1alpha1.Cluster, currentClusterStatus clusterv1alpha1.ClusterStatus) (controllerruntime.Result, error) { +func (c *ClusterStatusController) updateStatusIfNeeded(cluster *clusterv1alpha1.Cluster, currentClusterStatus clusterv1alpha1.ClusterStatus, conditions ...metav1.Condition) error { + for _, condition := range conditions { + meta.SetStatusCondition(¤tClusterStatus.Conditions, condition) + } if !equality.Semantic.DeepEqual(cluster.Status, currentClusterStatus) { klog.V(4).Infof("Start to update cluster status: %s", cluster.Name) err := retry.RetryOnConflict(retry.DefaultRetry, func() (err error) { cluster.Status.KubernetesVersion = currentClusterStatus.KubernetesVersion cluster.Status.APIEnablements = currentClusterStatus.APIEnablements - cluster.Status.Conditions = currentClusterStatus.Conditions cluster.Status.NodeSummary = currentClusterStatus.NodeSummary cluster.Status.ResourceSummary = currentClusterStatus.ResourceSummary + for _, condition := range conditions { + meta.SetStatusCondition(&cluster.Status.Conditions, condition) + } updateErr := c.Status().Update(context.TODO(), cluster) if updateErr == nil { return nil @@ -297,11 +302,37 @@ func (c *ClusterStatusController) updateStatusIfNeeded(cluster *clusterv1alpha1. }) if err != nil { klog.Errorf("Failed to update health status of the member cluster: %v, err is : %v", cluster.Name, err) - return controllerruntime.Result{Requeue: true}, err + return err } } - return controllerruntime.Result{RequeueAfter: c.ClusterStatusUpdateFrequency.Duration}, nil + return nil +} + +func updateStatusCondition(c client.Client, cluster *clusterv1alpha1.Cluster, conditions ...metav1.Condition) error { + klog.V(4).Infof("Start to update cluster(%s) status condition", cluster.Name) + err := retry.RetryOnConflict(retry.DefaultRetry, func() (err error) { + for _, condition := range conditions { + meta.SetStatusCondition(&cluster.Status.Conditions, condition) + } + updateErr := c.Status().Update(context.TODO(), cluster) + if updateErr == nil { + return nil + } + + updated := &clusterv1alpha1.Cluster{} + if err = c.Get(context.TODO(), client.ObjectKey{Namespace: cluster.Namespace, Name: cluster.Name}, updated); err == nil { + cluster = updated + } else { + klog.Errorf("Failed to get updated cluster %s: %v", cluster.Name, err) + } + return updateErr + }) + if err != nil { + klog.Errorf("Failed to update status condition of the member cluster: %v, err is : %v", cluster.Name, err) + return err + } + return nil } func (c *ClusterStatusController) initializeGenericInformerManagerForCluster(clusterClient *util.ClusterClient) { diff --git a/pkg/controllers/status/cluster_status_controller_test.go b/pkg/controllers/status/cluster_status_controller_test.go index f9b9bd1bbedf..fb942125c359 100644 --- a/pkg/controllers/status/cluster_status_controller_test.go +++ b/pkg/controllers/status/cluster_status_controller_test.go @@ -228,9 +228,7 @@ func TestClusterStatusController_syncClusterStatus(t *testing.T) { if err := c.Client.Create(context.Background(), cluster); err != nil { t.Fatalf("Failed to create cluster: %v", err) } - res, err := c.syncClusterStatus(cluster) - expect := controllerruntime.Result{} - assert.Equal(t, expect, res) + err := c.syncClusterStatus(cluster) assert.Empty(t, err) }) t.Run("online is false, readyCondition.Status isn't true", func(t *testing.T) { @@ -274,10 +272,7 @@ func TestClusterStatusController_syncClusterStatus(t *testing.T) { if err := c.Client.Create(context.Background(), cluster); err != nil { t.Fatalf("Failed to create cluster: %v", err) } - - res, err := c.syncClusterStatus(cluster) - expect := controllerruntime.Result{} - assert.Equal(t, expect, res) + err := c.syncClusterStatus(cluster) assert.Empty(t, err) }) @@ -322,9 +317,7 @@ func TestClusterStatusController_syncClusterStatus(t *testing.T) { if err := c.Client.Create(context.Background(), cluster); err != nil { t.Fatalf("Failed to create cluster: %v", err) } - res, err := c.syncClusterStatus(cluster) - expect := controllerruntime.Result{} - assert.Equal(t, expect, res) + err := c.syncClusterStatus(cluster) assert.Empty(t, err) }) } @@ -913,8 +906,7 @@ func TestClusterStatusController_updateStatusIfNeeded(t *testing.T) { ClusterClientSetFunc: util.NewClusterClientSet, } - actual, err := c.updateStatusIfNeeded(cluster, currentClusterStatus) - assert.Equal(t, controllerruntime.Result{}, actual) + err := c.updateStatusIfNeeded(cluster, currentClusterStatus) assert.Empty(t, err, "updateStatusIfNeeded returns error") }) @@ -978,9 +970,7 @@ func TestClusterStatusController_updateStatusIfNeeded(t *testing.T) { ClusterClientSetFunc: util.NewClusterClientSet, } - actual, err := c.updateStatusIfNeeded(cluster, currentClusterStatus) - expect := controllerruntime.Result{Requeue: true} - assert.Equal(t, expect, actual) + err := c.updateStatusIfNeeded(cluster, currentClusterStatus) assert.NotEmpty(t, err, "updateStatusIfNeeded doesn't return error") }) }