From 0a13825ff4be4384ac4c1f25e9a17da445f91488 Mon Sep 17 00:00:00 2001 From: subhamkrai Date: Wed, 7 Feb 2024 21:20:22 +0530 Subject: [PATCH] core: remove namespace/ownerRef from networkFence since networkFence is a cluster-based resource so that we don't need the namespace and ownerReferences as it cause garbage-collector errors. Also, now we create the networkFence with clusteUID label so when doing cleanup we match the cephCluster uid and networkFence label clusterUID. Signed-off-by: subhamkrai (cherry picked from commit 5bff860380f34d1aa1ee9dceb9bcb2ed5d64da58) --- .../rook-ceph/templates/clusterrole.yaml | 2 +- deploy/examples/common.yaml | 2 +- pkg/operator/ceph/cluster/controller.go | 34 ++++++++++++++++++- pkg/operator/ceph/cluster/controller_test.go | 8 ++++- pkg/operator/ceph/cluster/watcher.go | 19 +++++++---- pkg/operator/ceph/cluster/watcher_test.go | 22 ++++++------ pkg/operator/ceph/csi/csi.go | 2 +- 7 files changed, 65 insertions(+), 24 deletions(-) diff --git a/deploy/charts/rook-ceph/templates/clusterrole.yaml b/deploy/charts/rook-ceph/templates/clusterrole.yaml index 58430cd9b3a2..cd2b8bb7656c 100644 --- a/deploy/charts/rook-ceph/templates/clusterrole.yaml +++ b/deploy/charts/rook-ceph/templates/clusterrole.yaml @@ -20,7 +20,7 @@ rules: verbs: ["create"] - apiGroups: ["csiaddons.openshift.io"] resources: ["networkfences"] - verbs: ["create", "get", "update", "delete", "watch", "list"] + verbs: ["create", "get", "update", "delete", "watch", "list", "deletecollection"] - apiGroups: ["apiextensions.k8s.io"] resources: ["customresourcedefinitions"] verbs: ["get"] diff --git a/deploy/examples/common.yaml b/deploy/examples/common.yaml index 565d6ce5535e..84ee3953f11d 100644 --- a/deploy/examples/common.yaml +++ b/deploy/examples/common.yaml @@ -563,7 +563,7 @@ rules: verbs: ["create"] - apiGroups: ["csiaddons.openshift.io"] resources: ["networkfences"] - verbs: ["create", "get", "update", "delete", "watch", "list"] + verbs: ["create", "get", "update", "delete", "watch", "list", "deletecollection"] - apiGroups: ["apiextensions.k8s.io"] resources: ["customresourcedefinitions"] verbs: ["get"] diff --git a/pkg/operator/ceph/cluster/controller.go b/pkg/operator/ceph/cluster/controller.go index 04dc765849f8..0112554fe210 100644 --- a/pkg/operator/ceph/cluster/controller.go +++ b/pkg/operator/ceph/cluster/controller.go @@ -38,6 +38,7 @@ import ( corev1 "k8s.io/api/core/v1" kerrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/labels" apituntime "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" "k8s.io/client-go/tools/record" @@ -387,9 +388,40 @@ func (c *ClusterController) requestClusterDelete(cluster *cephv1.CephCluster) (r nsName, existing.namespacedName.Name) return reconcile.Result{}, nil // do not requeue the delete } + _, err := c.context.ApiExtensionsClient.ApiextensionsV1().CustomResourceDefinitions().Get(c.OpManagerCtx, "networkfences.csiaddons.openshift.io", metav1.GetOptions{}) + if err == nil { + logger.Info("removing networkFence if matching cephCluster UID found") + networkFenceList := &addonsv1alpha1.NetworkFenceList{} + labelSelector := labels.SelectorFromSet(map[string]string{ + networkFenceLabel: string(cluster.GetUID()), + }) - logger.Infof("cleaning up CephCluster %q", nsName) + opts := []client.DeleteAllOfOption{ + client.MatchingLabels{ + networkFenceLabel: string(cluster.GetUID()), + }, + client.GracePeriodSeconds(0), + } + err = c.client.DeleteAllOf(c.OpManagerCtx, &addonsv1alpha1.NetworkFence{}, opts...) + if err != nil && !kerrors.IsNotFound(err) { + return reconcile.Result{}, errors.Wrapf(err, "failed to delete networkFence with label %s", networkFenceLabel) + } + + err = c.client.List(c.OpManagerCtx, networkFenceList, &client.MatchingLabelsSelector{Selector: labelSelector}) + if err != nil && !kerrors.IsNotFound(err) { + return reconcile.Result{}, errors.Wrap(err, "failed to list networkFence") + } + if len(networkFenceList.Items) > 0 { + for i := range networkFenceList.Items { + err = opcontroller.RemoveFinalizerWithName(c.OpManagerCtx, c.client, &networkFenceList.Items[i], "csiaddons.openshift.io/network-fence") + if err != nil { + return reconcile.Result{}, errors.Wrap(err, "failed to remove finalizer") + } + } + } + } + logger.Infof("cleaning up CephCluster %q", nsName) if cluster, ok := c.clusterMap[cluster.Namespace]; ok { // We used to stop the bucket controller here but when we get a DELETE event for the CephCluster // we will reload the CRD manager anyway so the bucket controller go routine will be stopped diff --git a/pkg/operator/ceph/cluster/controller_test.go b/pkg/operator/ceph/cluster/controller_test.go index a7adcefbe0ae..052030f4580c 100644 --- a/pkg/operator/ceph/cluster/controller_test.go +++ b/pkg/operator/ceph/cluster/controller_test.go @@ -21,11 +21,14 @@ import ( "testing" "time" + addonsv1alpha1 "github.com/csi-addons/kubernetes-csi-addons/apis/csiaddons/v1alpha1" cephv1 "github.com/rook/rook/pkg/apis/ceph.rook.io/v1" + rookclient "github.com/rook/rook/pkg/client/clientset/versioned/fake" "github.com/rook/rook/pkg/client/clientset/versioned/scheme" "github.com/rook/rook/pkg/clusterd" "github.com/stretchr/testify/assert" corev1 "k8s.io/api/core/v1" + apifake "k8s.io/apiextensions-apiserver/pkg/client/clientset/clientset/fake" kerrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -71,11 +74,14 @@ func TestReconcileDeleteCephCluster(t *testing.T) { // create a Rook-Ceph scheme to use for our tests scheme := runtime.NewScheme() assert.NoError(t, cephv1.AddToScheme(scheme)) + assert.NoError(t, addonsv1alpha1.AddToScheme(scheme)) t.Run("deletion blocked while dependencies exist", func(t *testing.T) { // set up clusterd.Context clusterdCtx := &clusterd.Context{ - Clientset: k8sfake.NewSimpleClientset(), + Clientset: k8sfake.NewSimpleClientset(), + RookClientset: rookclient.NewSimpleClientset(), + ApiExtensionsClient: apifake.NewSimpleClientset(), } // create the cluster controller and tell it that the cluster has been deleted diff --git a/pkg/operator/ceph/cluster/watcher.go b/pkg/operator/ceph/cluster/watcher.go index ab1e3f7b81c7..6b8f08d40567 100644 --- a/pkg/operator/ceph/cluster/watcher.go +++ b/pkg/operator/ceph/cluster/watcher.go @@ -51,7 +51,10 @@ type clientCluster struct { context *clusterd.Context } -var nodesCheckedForReconcile = sets.New[string]() +var ( + nodesCheckedForReconcile = sets.New[string]() + networkFenceLabel = "cephClusterUID" +) // drivers that supports fencing, used in naming networkFence object const ( @@ -511,8 +514,8 @@ func concatenateWatcherIp(address string) string { return watcherIP } -func fenceResourceName(nodeName, driver string) string { - return fmt.Sprintf("%s-%s", nodeName, driver) +func fenceResourceName(nodeName, driver, namespace string) string { + return fmt.Sprintf("%s-%s-%s", nodeName, driver, namespace) } func (c *clientCluster) createNetworkFence(ctx context.Context, pv corev1.PersistentVolume, node *corev1.Node, cluster *cephv1.CephCluster, cidr []string, driver string) error { @@ -531,8 +534,10 @@ func (c *clientCluster) createNetworkFence(ctx context.Context, pv corev1.Persis networkFence := &addonsv1alpha1.NetworkFence{ ObjectMeta: metav1.ObjectMeta{ - Name: fenceResourceName(node.Name, driver), - Namespace: cluster.Namespace, + Name: fenceResourceName(node.Name, driver, cluster.Namespace), + Labels: map[string]string{ + networkFenceLabel: string(cluster.GetUID()), + }, }, Spec: addonsv1alpha1.NetworkFenceSpec{ Driver: pv.Spec.CSI.Driver, @@ -560,7 +565,7 @@ func (c *clientCluster) createNetworkFence(ctx context.Context, pv corev1.Persis func (c *clientCluster) unfenceAndDeleteNetworkFence(ctx context.Context, node corev1.Node, cluster *cephv1.CephCluster, driver string) error { networkFence := &addonsv1alpha1.NetworkFence{} - err := c.client.Get(ctx, types.NamespacedName{Name: fenceResourceName(node.Name, driver), Namespace: cluster.Namespace}, networkFence) + err := c.client.Get(ctx, types.NamespacedName{Name: fenceResourceName(node.Name, driver, cluster.Namespace)}, networkFence) if err != nil && !errors.IsNotFound(err) { return err } else if errors.IsNotFound(err) { @@ -577,7 +582,7 @@ func (c *clientCluster) unfenceAndDeleteNetworkFence(ctx context.Context, node c } err = wait.PollUntilContextTimeout(ctx, 2*time.Second, 60*time.Second, true, func(ctx context.Context) (bool, error) { - err = c.client.Get(ctx, types.NamespacedName{Name: fenceResourceName(node.Name, driver), Namespace: cluster.Namespace}, networkFence) + err = c.client.Get(ctx, types.NamespacedName{Name: fenceResourceName(node.Name, driver, cluster.Namespace)}, networkFence) if err != nil && !errors.IsNotFound(err) { return false, err } diff --git a/pkg/operator/ceph/cluster/watcher_test.go b/pkg/operator/ceph/cluster/watcher_test.go index 5f467d5f84e3..6e527b9108aa 100644 --- a/pkg/operator/ceph/cluster/watcher_test.go +++ b/pkg/operator/ceph/cluster/watcher_test.go @@ -353,11 +353,11 @@ func TestHandleNodeFailure(t *testing.T) { assert.NoError(t, err) networkFenceRbd := &addonsv1alpha1.NetworkFence{} - err = c.client.Get(ctx, types.NamespacedName{Name: fenceResourceName(node.Name, rbdDriver), Namespace: cephCluster.Namespace}, networkFenceRbd) + err = c.client.Get(ctx, types.NamespacedName{Name: fenceResourceName(node.Name, rbdDriver, ns)}, networkFenceRbd) assert.NoError(t, err) networkFenceCephFs := &addonsv1alpha1.NetworkFence{} - err = c.client.Get(ctx, types.NamespacedName{Name: fenceResourceName(node.Name, cephfsDriver), Namespace: cephCluster.Namespace}, networkFenceCephFs) + err = c.client.Get(ctx, types.NamespacedName{Name: fenceResourceName(node.Name, cephfsDriver, ns)}, networkFenceCephFs) assert.NoError(t, err) networkFences := &addonsv1alpha1.NetworkFenceList{} @@ -367,12 +367,10 @@ func TestHandleNodeFailure(t *testing.T) { for _, fence := range networkFences.Items { // Check if the resource is in the desired namespace - if fence.Namespace == cephCluster.Namespace { - if strings.Contains(fence.Name, rbdDriver) { - rbdCount++ - } else if strings.Contains(fence.Name, cephfsDriver) { - cephFsCount++ - } + if strings.Contains(fence.Name, rbdDriver) { + rbdCount++ + } else if strings.Contains(fence.Name, cephfsDriver) { + cephFsCount++ } } @@ -431,10 +429,10 @@ func TestHandleNodeFailure(t *testing.T) { err = c.handleNodeFailure(ctx, cephCluster, node) assert.NoError(t, err) - err = c.client.Get(ctx, types.NamespacedName{Name: fenceResourceName(node.Name, rbdDriver), Namespace: cephCluster.Namespace}, networkFenceRbd) + err = c.client.Get(ctx, types.NamespacedName{Name: fenceResourceName(node.Name, rbdDriver, ns), Namespace: cephCluster.Namespace}, networkFenceRbd) assert.Error(t, err, kerrors.IsNotFound(err)) - err = c.client.Get(ctx, types.NamespacedName{Name: fenceResourceName(node.Name, cephfsDriver), Namespace: cephCluster.Namespace}, networkFenceCephFs) + err = c.client.Get(ctx, types.NamespacedName{Name: fenceResourceName(node.Name, cephfsDriver, ns), Namespace: cephCluster.Namespace}, networkFenceCephFs) assert.Error(t, err, kerrors.IsNotFound(err)) } @@ -487,8 +485,8 @@ func TestConcatenateWatcherIp(t *testing.T) { } func TestFenceResourceName(t *testing.T) { - FenceName := fenceResourceName("fakenode", "rbd") - assert.Equal(t, FenceName, "fakenode-rbd") + FenceName := fenceResourceName("fakenode", "rbd", "rook-ceph") + assert.Equal(t, FenceName, "fakenode-rbd-rook-ceph") } func TestOnDeviceCMUpdate(t *testing.T) { diff --git a/pkg/operator/ceph/csi/csi.go b/pkg/operator/ceph/csi/csi.go index 49d79423b4e3..6b6847d64ba6 100644 --- a/pkg/operator/ceph/csi/csi.go +++ b/pkg/operator/ceph/csi/csi.go @@ -161,7 +161,7 @@ func (r *ReconcileCSI) setParams(ver *version.Info) error { } CSIParam.EnableCSIAddonsSideCar = false - _, err = r.context.ApiExtensionsClient.ApiextensionsV1().CustomResourceDefinitions().Get(r.opManagerContext, "csiaddonsnode.csiaddons.openshift.io", metav1.GetOptions{}) + _, err = r.context.ApiExtensionsClient.ApiextensionsV1().CustomResourceDefinitions().Get(r.opManagerContext, "csiaddonsnodes.csiaddons.openshift.io", metav1.GetOptions{}) if err == nil { CSIParam.EnableCSIAddonsSideCar = true }