Skip to content

Commit

Permalink
Simplify the confirmRemoval code (#2148)
Browse files Browse the repository at this point in the history
  • Loading branch information
johscheuer authored Oct 20, 2024
1 parent be79b33 commit df5775a
Show file tree
Hide file tree
Showing 3 changed files with 76 additions and 77 deletions.
36 changes: 17 additions & 19 deletions controllers/remove_process_groups.go
Original file line number Diff line number Diff line change
Expand Up @@ -198,21 +198,21 @@ func removeProcessGroup(ctx context.Context, r *FoundationDBClusterReconciler, c
return deletionError
}

func confirmRemoval(ctx context.Context, logger logr.Logger, r *FoundationDBClusterReconciler, cluster *fdbv1beta2.FoundationDBCluster, processGroup *fdbv1beta2.ProcessGroupStatus) (bool, bool, error) {
func confirmRemoval(ctx context.Context, logger logr.Logger, r *FoundationDBClusterReconciler, cluster *fdbv1beta2.FoundationDBCluster, processGroup *fdbv1beta2.ProcessGroupStatus) (bool, error) {
canBeIncluded := true

podName := processGroup.GetPodName(cluster)
pod, err := r.PodLifecycleManager.GetPod(ctx, r, cluster, podName)
// If we get an error different from not found we will return the error.
if err != nil && !k8serrors.IsNotFound(err) {
return false, false, err
return false, err
}

// The Pod resource still exists, so we have to validate the deletion timestamp.
if err == nil {
if pod.DeletionTimestamp.IsZero() {
logger.Info("Waiting for process group to get torn down", "processGroupID", processGroup.ProcessGroupID, "pod", podName)
return false, false, nil
return false, internal.ResourceNotDeleted{Resource: pod}
}

// Pod is in terminating state so we don't want to block, but we also don't want to include it
Expand All @@ -223,40 +223,41 @@ func confirmRemoval(ctx context.Context, logger logr.Logger, r *FoundationDBClus
pvcs := &corev1.PersistentVolumeClaimList{}
err = r.List(ctx, pvcs, internal.GetSinglePodListOptions(cluster, processGroup.ProcessGroupID)...)
if err != nil {
return false, canBeIncluded, err
return false, err
}

if len(pvcs.Items) == 1 {
if pvcs.Items[0].DeletionTimestamp == nil {
logger.Info("Waiting for volume claim to get torn down", "processGroupID", processGroup.ProcessGroupID, "pvc", pvcs.Items[0].Name)
return false, false, nil
pvc := pvcs.Items[0]
if pvc.DeletionTimestamp == nil {
logger.Info("Waiting for volume claim to get torn down", "processGroupID", processGroup.ProcessGroupID, "pvc", pvc.Name)
return false, internal.ResourceNotDeleted{Resource: &pvc}
}

// PVC is in terminating state so we don't want to block, but we also don't want to include it
canBeIncluded = false
} else if len(pvcs.Items) > 1 {
return false, false, fmt.Errorf("multiple PVCs found for cluster %s, processGroupID %s", cluster.Name, processGroup.ProcessGroupID)
return false, fmt.Errorf("multiple PVCs found for cluster %s, processGroupID %s", cluster.Name, processGroup.ProcessGroupID)
}

service := &corev1.Service{}
err = r.Get(ctx, client.ObjectKey{Name: podName, Namespace: cluster.Namespace}, service)
// If we get an error different from not found we will return the error.
if err != nil && !k8serrors.IsNotFound(err) {
return false, false, err
return false, err
}

// The Pod resource still exists, so we have to validate the deletion timestamp.
if err == nil {
if service.DeletionTimestamp.IsZero() {
logger.Info("Waiting for process group to get torn down", "processGroupID", processGroup.ProcessGroupID, "service", podName)
return false, false, nil
return false, internal.ResourceNotDeleted{Resource: service}
}

// Service is in terminating state so we don't want to block, but we also don't want to include it
canBeIncluded = false
}

return true, canBeIncluded, nil
return canBeIncluded, nil
}

func includeProcessGroup(ctx context.Context, logger logr.Logger, r *FoundationDBClusterReconciler, cluster *fdbv1beta2.FoundationDBCluster, removedProcessGroups map[fdbv1beta2.ProcessGroupID]bool, status *fdbv1beta2.FoundationDBStatus, adminClient fdbadminclient.AdminClient) error {
Expand Down Expand Up @@ -420,18 +421,15 @@ func (r *FoundationDBClusterReconciler) removeProcessGroups(ctx context.Context,
// We have to check if the currently removed process groups are completely removed.
// In addition, we have to check if one of the terminating process groups has been cleaned up.
for _, processGroup := range processGroups {
removed, include, err := confirmRemoval(ctx, logger, r, cluster, processGroup)
if err != nil {
include, err := confirmRemoval(ctx, logger, r, cluster, processGroup)
if err != nil && !internal.IsResourceNotDeleted(err) {
logger.Error(err, "Error during confirm process group removal", "processGroupID", processGroup.ProcessGroupID)
continue
}

if removed {
// Pods that are stuck in terminating shouldn't block reconciliation, but we also
// don't want to include them since they have an unknown state.
removedProcessGroups[processGroup.ProcessGroupID] = include
continue
}
// Pods that are stuck in terminating shouldn't block reconciliation, but we also
// don't want to include them since they have an unknown state.
removedProcessGroups[processGroup.ProcessGroupID] = include
}

return removedProcessGroups
Expand Down
97 changes: 40 additions & 57 deletions controllers/remove_process_groups_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -130,9 +130,9 @@ var _ = Describe("remove_process_groups", func() {
It("should not remove that process group", func() {
Expect(result).To(BeNil())
// Ensure resources are not deleted
removed, include, err := confirmRemoval(context.Background(), globalControllerLogger, clusterReconciler, cluster, removedProcessGroup)
Expect(err).To(BeNil())
Expect(removed).To(BeFalse())
include, err := confirmRemoval(context.Background(), globalControllerLogger, clusterReconciler, cluster, removedProcessGroup)
Expect(err).NotTo(BeNil())
Expect(internal.IsResourceNotDeleted(err)).To(BeTrue())
Expect(include).To(BeFalse())
})
})
Expand All @@ -149,9 +149,8 @@ var _ = Describe("remove_process_groups", func() {
It("should successfully remove that process group", func() {
Expect(result).To(BeNil())
// Ensure resources are deleted
removed, include, err := confirmRemoval(context.Background(), globalControllerLogger, clusterReconciler, cluster, removedProcessGroup)
include, err := confirmRemoval(context.Background(), globalControllerLogger, clusterReconciler, cluster, removedProcessGroup)
Expect(err).To(BeNil())
Expect(removed).To(BeTrue())
Expect(include).To(BeTrue())
})
})
Expand All @@ -161,9 +160,8 @@ var _ = Describe("remove_process_groups", func() {
It("should successfully remove that process group", func() {
Expect(result).To(BeNil())
// Ensure resources are deleted
removed, include, err := confirmRemoval(context.Background(), globalControllerLogger, clusterReconciler, cluster, removedProcessGroup)
include, err := confirmRemoval(context.Background(), globalControllerLogger, clusterReconciler, cluster, removedProcessGroup)
Expect(err).To(BeNil())
Expect(removed).To(BeTrue())
Expect(include).To(BeTrue())
})
})
Expand All @@ -187,9 +185,9 @@ var _ = Describe("remove_process_groups", func() {
Expect(result).NotTo(BeNil())
Expect(result.message).To(Equal("Removals cannot proceed because cluster has degraded fault tolerance"))
// Ensure resources are not deleted
removed, include, err := confirmRemoval(context.Background(), globalControllerLogger, clusterReconciler, cluster, removedProcessGroup)
Expect(err).To(BeNil())
Expect(removed).To(BeFalse())
include, err := confirmRemoval(context.Background(), globalControllerLogger, clusterReconciler, cluster, removedProcessGroup)
Expect(err).NotTo(BeNil())
Expect(internal.IsResourceNotDeleted(err)).To(BeTrue())
Expect(include).To(BeFalse())
})
})
Expand All @@ -211,9 +209,9 @@ var _ = Describe("remove_process_groups", func() {
Expect(result).NotTo(BeNil())
Expect(result.message).To(Equal("Removals cannot proceed because cluster has degraded fault tolerance"))
// Ensure resources are not deleted
removed, include, err := confirmRemoval(context.Background(), globalControllerLogger, clusterReconciler, cluster, removedProcessGroup)
Expect(err).To(BeNil())
Expect(removed).To(BeFalse())
include, err := confirmRemoval(context.Background(), globalControllerLogger, clusterReconciler, cluster, removedProcessGroup)
Expect(err).NotTo(BeNil())
Expect(internal.IsResourceNotDeleted(err)).To(BeTrue())
Expect(include).To(BeFalse())
})
})
Expand All @@ -235,9 +233,9 @@ var _ = Describe("remove_process_groups", func() {
Expect(result).NotTo(BeNil())
Expect(result.curError).To(HaveOccurred())
// Ensure resources are not deleted
removed, include, err := confirmRemoval(context.Background(), globalControllerLogger, clusterReconciler, cluster, removedProcessGroup)
Expect(err).To(BeNil())
Expect(removed).To(BeFalse())
include, err := confirmRemoval(context.Background(), globalControllerLogger, clusterReconciler, cluster, removedProcessGroup)
Expect(err).NotTo(BeNil())
Expect(internal.IsResourceNotDeleted(err)).To(BeTrue())
Expect(include).To(BeFalse())
})
})
Expand Down Expand Up @@ -267,9 +265,8 @@ var _ = Describe("remove_process_groups", func() {
It("should successfully remove that process group", func() {
Expect(result).To(BeNil())
// Ensure resources are deleted
removed, include, err := confirmRemoval(context.Background(), globalControllerLogger, clusterReconciler, cluster, removedProcessGroup)
include, err := confirmRemoval(context.Background(), globalControllerLogger, clusterReconciler, cluster, removedProcessGroup)
Expect(err).To(BeNil())
Expect(removed).To(BeTrue())
Expect(include).To(BeTrue())
})
})
Expand All @@ -291,9 +288,9 @@ var _ = Describe("remove_process_groups", func() {
Expect(result).NotTo(BeNil())
Expect(result.message).To(Equal("Removals cannot proceed because cluster has degraded fault tolerance"))
// Ensure resources are not deleted
removed, include, err := confirmRemoval(context.Background(), globalControllerLogger, clusterReconciler, cluster, removedProcessGroup)
Expect(err).To(BeNil())
Expect(removed).To(BeFalse())
include, err := confirmRemoval(context.Background(), globalControllerLogger, clusterReconciler, cluster, removedProcessGroup)
Expect(err).NotTo(BeNil())
Expect(internal.IsResourceNotDeleted(err)).To(BeTrue())
Expect(include).To(BeFalse())
})
})
Expand Down Expand Up @@ -323,13 +320,11 @@ var _ = Describe("remove_process_groups", func() {
Expect(result).To(BeNil())
Expect(initialCnt - len(cluster.Status.ProcessGroups)).To(BeNumerically("==", 1))
// Check if resources are deleted
removed, include, err := confirmRemoval(context.Background(), globalControllerLogger, clusterReconciler, cluster, removedProcessGroup)
Expect(err).To(BeNil())
include, errFirst := confirmRemoval(context.Background(), globalControllerLogger, clusterReconciler, cluster, removedProcessGroup)
// Check if resources are deleted
removedSecondary, includeSecondary, err := confirmRemoval(context.Background(), globalControllerLogger, clusterReconciler, cluster, secondRemovedProcessGroup)
Expect(err).To(BeNil())
includeSecondary, errSecond := confirmRemoval(context.Background(), globalControllerLogger, clusterReconciler, cluster, secondRemovedProcessGroup)
// Make sure only one of the process groups was deleted.
Expect(removed).NotTo(Equal(removedSecondary))
Expect(errFirst).NotTo(Equal(errSecond))
Expect(include).NotTo(Equal(includeSecondary))
})

Expand All @@ -343,14 +338,12 @@ var _ = Describe("remove_process_groups", func() {
Expect(result).To(BeNil())
Expect(initialCnt - len(cluster.Status.ProcessGroups)).To(BeNumerically("==", 2))
// Ensure resources are deleted
removed, include, err := confirmRemoval(context.Background(), globalControllerLogger, clusterReconciler, cluster, secondRemovedProcessGroup)
include, err := confirmRemoval(context.Background(), globalControllerLogger, clusterReconciler, cluster, secondRemovedProcessGroup)
Expect(err).To(BeNil())
Expect(removed).To(BeTrue())
Expect(include).To(BeTrue())
// Ensure resources are deleted
removed, include, err = confirmRemoval(context.Background(), globalControllerLogger, clusterReconciler, cluster, removedProcessGroup)
include, err = confirmRemoval(context.Background(), globalControllerLogger, clusterReconciler, cluster, removedProcessGroup)
Expect(err).To(BeNil())
Expect(removed).To(BeTrue())
Expect(include).To(BeTrue())
})
})
Expand All @@ -364,14 +357,12 @@ var _ = Describe("remove_process_groups", func() {
Expect(result).To(BeNil())
Expect(initialCnt - len(cluster.Status.ProcessGroups)).To(BeNumerically("==", 2))
// Ensure resources are deleted
removed, include, err := confirmRemoval(context.Background(), globalControllerLogger, clusterReconciler, cluster, secondRemovedProcessGroup)
include, err := confirmRemoval(context.Background(), globalControllerLogger, clusterReconciler, cluster, secondRemovedProcessGroup)
Expect(err).To(BeNil())
Expect(removed).To(BeTrue())
Expect(include).To(BeTrue())
// Ensure resources are deleted
removed, include, err = confirmRemoval(context.Background(), globalControllerLogger, clusterReconciler, cluster, removedProcessGroup)
include, err = confirmRemoval(context.Background(), globalControllerLogger, clusterReconciler, cluster, removedProcessGroup)
Expect(err).To(BeNil())
Expect(removed).To(BeTrue())
Expect(include).To(BeTrue())
})
})
Expand All @@ -388,14 +379,14 @@ var _ = Describe("remove_process_groups", func() {
Expect(result.message).To(HavePrefix("not allowed to remove process groups, waiting:"))
Expect(initialCnt - len(cluster.Status.ProcessGroups)).To(BeNumerically("==", 0))
// Ensure resources are not deleted
removed, include, err := confirmRemoval(context.Background(), globalControllerLogger, clusterReconciler, cluster, removedProcessGroup)
Expect(err).To(BeNil())
Expect(removed).To(BeFalse())
include, err := confirmRemoval(context.Background(), globalControllerLogger, clusterReconciler, cluster, removedProcessGroup)
Expect(err).NotTo(BeNil())
Expect(internal.IsResourceNotDeleted(err)).To(BeTrue())
Expect(include).To(BeFalse())
// Ensure resources are deleted
removed, include, err = confirmRemoval(context.Background(), globalControllerLogger, clusterReconciler, cluster, secondRemovedProcessGroup)
Expect(err).To(BeNil())
Expect(removed).To(BeFalse())
// Ensure resources are not deleted
include, err = confirmRemoval(context.Background(), globalControllerLogger, clusterReconciler, cluster, secondRemovedProcessGroup)
Expect(err).NotTo(BeNil())
Expect(internal.IsResourceNotDeleted(err)).To(BeTrue())
Expect(include).To(BeFalse())
})
})
Expand Down Expand Up @@ -425,14 +416,12 @@ var _ = Describe("remove_process_groups", func() {
Expect(result).To(BeNil())
Expect(initialCnt - len(cluster.Status.ProcessGroups)).To(BeNumerically("==", 2))
// Ensure resources are deleted
removed, include, err := confirmRemoval(context.Background(), globalControllerLogger, clusterReconciler, cluster, removedProcessGroup)
include, err := confirmRemoval(context.Background(), globalControllerLogger, clusterReconciler, cluster, removedProcessGroup)
Expect(err).To(BeNil())
Expect(removed).To(BeTrue())
Expect(include).To(BeTrue())
// Ensure resources are deleted as the RemovalMode is PodUpdateModeAll
removed, include, err = confirmRemoval(context.Background(), globalControllerLogger, clusterReconciler, cluster, secondRemovedProcessGroup)
include, err = confirmRemoval(context.Background(), globalControllerLogger, clusterReconciler, cluster, secondRemovedProcessGroup)
Expect(err).To(BeNil())
Expect(removed).To(BeTrue())
Expect(include).To(BeTrue())
})

Expand All @@ -446,14 +435,12 @@ var _ = Describe("remove_process_groups", func() {
Expect(result).To(BeNil())
Expect(initialCnt - len(cluster.Status.ProcessGroups)).To(BeNumerically("==", 2))
// Ensure resources are deleted
removed, include, err := confirmRemoval(context.Background(), globalControllerLogger, clusterReconciler, cluster, secondRemovedProcessGroup)
include, err := confirmRemoval(context.Background(), globalControllerLogger, clusterReconciler, cluster, secondRemovedProcessGroup)
Expect(err).To(BeNil())
Expect(removed).To(BeTrue())
Expect(include).To(BeTrue())
// Ensure resources are deleted
removed, include, err = confirmRemoval(context.Background(), globalControllerLogger, clusterReconciler, cluster, removedProcessGroup)
include, err = confirmRemoval(context.Background(), globalControllerLogger, clusterReconciler, cluster, removedProcessGroup)
Expect(err).To(BeNil())
Expect(removed).To(BeTrue())
Expect(include).To(BeTrue())
})
})
Expand All @@ -467,14 +454,12 @@ var _ = Describe("remove_process_groups", func() {
Expect(result).To(BeNil())
Expect(initialCnt - len(cluster.Status.ProcessGroups)).To(BeNumerically("==", 2))
// Ensure resources are deleted
removed, include, err := confirmRemoval(context.Background(), globalControllerLogger, clusterReconciler, cluster, secondRemovedProcessGroup)
include, err := confirmRemoval(context.Background(), globalControllerLogger, clusterReconciler, cluster, secondRemovedProcessGroup)
Expect(err).To(BeNil())
Expect(removed).To(BeTrue())
Expect(include).To(BeTrue())
// Ensure resources are deleted
removed, include, err = confirmRemoval(context.Background(), globalControllerLogger, clusterReconciler, cluster, removedProcessGroup)
include, err = confirmRemoval(context.Background(), globalControllerLogger, clusterReconciler, cluster, removedProcessGroup)
Expect(err).To(BeNil())
Expect(removed).To(BeTrue())
Expect(include).To(BeTrue())
})
})
Expand All @@ -490,14 +475,12 @@ var _ = Describe("remove_process_groups", func() {
Expect(result).To(BeNil())
Expect(initialCnt - len(cluster.Status.ProcessGroups)).To(BeNumerically("==", 2))
// Ensure resources are not deleted
removed, include, err := confirmRemoval(context.Background(), globalControllerLogger, clusterReconciler, cluster, removedProcessGroup)
include, err := confirmRemoval(context.Background(), globalControllerLogger, clusterReconciler, cluster, removedProcessGroup)
Expect(err).To(BeNil())
Expect(removed).To(BeTrue())
Expect(include).To(BeTrue())
// Ensure resources are deleted
removed, include, err = confirmRemoval(context.Background(), globalControllerLogger, clusterReconciler, cluster, secondRemovedProcessGroup)
include, err = confirmRemoval(context.Background(), globalControllerLogger, clusterReconciler, cluster, secondRemovedProcessGroup)
Expect(err).To(BeNil())
Expect(removed).To(BeTrue())
Expect(include).To(BeTrue())
})
})
Expand Down
20 changes: 19 additions & 1 deletion internal/error_helper.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,32 @@ package internal

import (
"errors"
"fmt"
"net"
"strings"

fdbv1beta2 "github.com/FoundationDB/fdb-kubernetes-operator/api/v1beta2"
"sigs.k8s.io/controller-runtime/pkg/client"

fdbv1beta2 "github.com/FoundationDB/fdb-kubernetes-operator/api/v1beta2"
k8serrors "k8s.io/apimachinery/pkg/api/errors"
)

// ResourceNotDeleted can be returned if the resource is not yet deleted.
type ResourceNotDeleted struct {
Resource client.Object
}

// Error returns the error message of the internal timeout error.
func (err ResourceNotDeleted) Error() string {
return fmt.Sprintf("resource: %s is missing the deletion timestamp.", err.Resource.GetName())
}

// IsResourceNotDeleted returns true if provided error is of the type IsResourceNotDeleted.
func IsResourceNotDeleted(err error) bool {
var targetErr ResourceNotDeleted
return errors.As(err, &targetErr)
}

// IsNetworkError returns true if the network is a network error net.Error
func IsNetworkError(err error) bool {
var netError net.Error
Expand Down

0 comments on commit df5775a

Please sign in to comment.