From fb33f5e38b5f10714a30b913b1f8f01d91eb447c Mon Sep 17 00:00:00 2001 From: Jestin Woods Date: Fri, 15 Mar 2024 10:43:05 -0700 Subject: [PATCH] Fix bug where orphaned node pool statuses block cluster upgrade operations --- controllers/humiocluster_controller.go | 29 ++++++++++++++++++ controllers/humiocluster_services.go | 3 +- controllers/humiocluster_status.go | 30 ++++++++++++++++++- .../clusters/humiocluster_controller_test.go | 12 ++++++++ 4 files changed, 71 insertions(+), 3 deletions(-) diff --git a/controllers/humiocluster_controller.go b/controllers/humiocluster_controller.go index 6a940409..e4b0141a 100644 --- a/controllers/humiocluster_controller.go +++ b/controllers/humiocluster_controller.go @@ -108,6 +108,14 @@ func (r *HumioClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request emptyResult := reconcile.Result{} + if ok, idx := r.hasNoUnusedNodePoolStatus(hc, &humioNodePools); !ok { + r.cleanupUnusedNodePoolStatus(hc, idx) + if result, err := r.updateStatus(ctx, r.Client.Status(), hc, statusOptions(). + withNodePoolStatusList(hc.Status.NodePoolStatus)); err != nil { + return result, r.logErrorAndReturn(err, "unable to set cluster state") + } + } + defer func(ctx context.Context, humioClient humio.Client, hc *humiov1alpha1.HumioCluster) { _, _ = r.updateStatus(ctx, r.Client.Status(), hc, statusOptions(). withObservedGeneration(hc.GetGeneration())) @@ -416,6 +424,27 @@ func (r *HumioClusterReconciler) nodePoolsInMaintenance(hc *humiov1alpha1.HumioC return poolsInMaintenance } +func (r *HumioClusterReconciler) cleanupUnusedNodePoolStatus(hc *humiov1alpha1.HumioCluster, idx int) { + r.Log.Info(fmt.Sprintf("removing node pool %s from node pool status list", hc.Status.NodePoolStatus[idx].Name)) + hc.Status.NodePoolStatus = append(hc.Status.NodePoolStatus[:idx], hc.Status.NodePoolStatus[idx+1:]...) +} + +func (r *HumioClusterReconciler) hasNoUnusedNodePoolStatus(hc *humiov1alpha1.HumioCluster, hnps *HumioNodePoolList) (bool, int) { + for idx, poolStatus := range hc.Status.NodePoolStatus { + var validPool bool + for _, pool := range hnps.Items { + if poolStatus.Name == pool.GetNodePoolName() && pool.GetNodeCount() > 0 { + validPool = true + } + } + if !validPool { + r.Log.Info(fmt.Sprintf("node pool %s is not valid", poolStatus.Name)) + return false, idx + } + } + return true, 0 +} + func (r *HumioClusterReconciler) ensurePodRevisionAnnotation(ctx context.Context, hc *humiov1alpha1.HumioCluster, hnp *HumioNodePool) (string, error) { revisionKey, revisionValue := hnp.GetHumioClusterNodePoolRevisionAnnotation() if revisionValue == 0 { diff --git a/controllers/humiocluster_services.go b/controllers/humiocluster_services.go index f518ebc3..5c74e4dd 100644 --- a/controllers/humiocluster_services.go +++ b/controllers/humiocluster_services.go @@ -19,9 +19,8 @@ package controllers import ( "fmt" - "github.com/humio/humio-operator/pkg/helpers" - humiov1alpha1 "github.com/humio/humio-operator/api/v1alpha1" + "github.com/humio/humio-operator/pkg/helpers" "github.com/humio/humio-operator/pkg/kubernetes" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" diff --git a/controllers/humiocluster_status.go b/controllers/humiocluster_status.go index 0fd3bfac..990c353e 100644 --- a/controllers/humiocluster_status.go +++ b/controllers/humiocluster_status.go @@ -52,6 +52,10 @@ type stateOption struct { nodePoolName string } +type stateOptionList struct { + statesList []stateOption +} + type versionOption struct { version string } @@ -104,6 +108,17 @@ func (o *optionBuilder) withNodePoolState(state string, nodePoolName string) *op return o } +func (o *optionBuilder) withNodePoolStatusList(humioNodePoolStatusList humiov1alpha1.HumioNodePoolStatusList) *optionBuilder { + var statesList []stateOption + for _, poolStatus := range humioNodePoolStatusList { + statesList = append(statesList, stateOption{nodePoolName: poolStatus.Name, state: poolStatus.State}) + } + o.options = append(o.options, stateOptionList{ + statesList: statesList, + }) + return o +} + func (o *optionBuilder) withVersion(version string) *optionBuilder { o.options = append(o.options, versionOption{ version: version, @@ -159,7 +174,6 @@ func (s stateOption) Apply(hc *humiov1alpha1.HumioCluster) { hc.Status.NodePoolStatus[idx] = nodePoolStatus return } - } hc.Status.NodePoolStatus = append(hc.Status.NodePoolStatus, humiov1alpha1.HumioNodePoolStatus{ @@ -180,6 +194,20 @@ func (s stateOption) GetResult() (reconcile.Result, error) { return reconcile.Result{RequeueAfter: time.Second * 15}, nil } +func (s stateOptionList) Apply(hc *humiov1alpha1.HumioCluster) { + hc.Status.NodePoolStatus = humiov1alpha1.HumioNodePoolStatusList{} + for _, poolStatus := range s.statesList { + hc.Status.NodePoolStatus = append(hc.Status.NodePoolStatus, humiov1alpha1.HumioNodePoolStatus{ + Name: poolStatus.nodePoolName, + State: poolStatus.state, + }) + } +} + +func (s stateOptionList) GetResult() (reconcile.Result, error) { + return reconcile.Result{}, nil +} + func (v versionOption) Apply(hc *humiov1alpha1.HumioCluster) { hc.Status.Version = v.version } diff --git a/controllers/suite/clusters/humiocluster_controller_test.go b/controllers/suite/clusters/humiocluster_controller_test.go index de597c5a..227cef0a 100644 --- a/controllers/suite/clusters/humiocluster_controller_test.go +++ b/controllers/suite/clusters/humiocluster_controller_test.go @@ -831,6 +831,18 @@ var _ = Describe("HumioCluster Controller", func() { revisionKey, _ := mainNodePoolManager.GetHumioClusterNodePoolRevisionAnnotation() var updatedHumioCluster humiov1alpha1.HumioCluster + + suite.UsingClusterBy(key.Name, "Simulating migration from non-node pools or orphaned node pools") + Eventually(func() error { + updatedHumioCluster = humiov1alpha1.HumioCluster{} + err := k8sClient.Get(ctx, key, &updatedHumioCluster) + if err != nil { + return err + } + updatedHumioCluster.Status.NodePoolStatus = append(updatedHumioCluster.Status.NodePoolStatus, humiov1alpha1.HumioNodePoolStatus{Name: "orphaned", State: humiov1alpha1.HumioClusterStateUpgrading}) + return k8sClient.Status().Update(ctx, &updatedHumioCluster) + }, testTimeout, suite.TestInterval).Should(Succeed()) + clusterPods, _ := kubernetes.ListPods(ctx, k8sClient, key.Namespace, mainNodePoolManager.GetPodLabels()) for _, pod := range clusterPods { humioIndex, _ := kubernetes.GetContainerIndexByName(pod, controllers.HumioContainerName)