From da51433067931d20466942b332b67c355ca509e2 Mon Sep 17 00:00:00 2001 From: Christian Schlotter Date: Mon, 9 Dec 2024 14:42:45 +0100 Subject: [PATCH 1/3] supervisor: add watches for machinedeployemnts to VSphereCluster controller --- .../vmware/vspherecluster_reconciler.go | 29 ++++++++++++++++++ controllers/vspherecluster_controller.go | 4 +++ pkg/util/cluster.go | 30 +++++++++++++++++++ 3 files changed, 63 insertions(+) diff --git a/controllers/vmware/vspherecluster_reconciler.go b/controllers/vmware/vspherecluster_reconciler.go index 397663de19..50a5be56bf 100644 --- a/controllers/vmware/vspherecluster_reconciler.go +++ b/controllers/vmware/vspherecluster_reconciler.go @@ -69,6 +69,7 @@ type ClusterReconciler struct { // +kubebuilder:rbac:groups=netoperator.vmware.com,resources=networks,verbs=get;list;watch // +kubebuilder:rbac:groups="",resources=persistentvolumeclaims,verbs=get;list;watch;update;create;delete // +kubebuilder:rbac:groups="",resources=persistentvolumeclaims/status,verbs=get;update;patch +// +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machinedeployments,verbs=get;list;watch func (r *ClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) (_ ctrl.Result, reterr error) { log := ctrl.LoggerFrom(ctx) @@ -370,6 +371,34 @@ func (r *ClusterReconciler) VSphereMachineToCluster(ctx context.Context, o clien }} } +// MachineDeploymentToCluster adds reconcile requests for a Cluster when one of its machineDeployments has an event. +func (r *ClusterReconciler) MachineDeploymentToCluster(ctx context.Context, o client.Object) []reconcile.Request { + log := ctrl.LoggerFrom(ctx) + + machineDeployment, ok := o.(*clusterv1.MachineDeployment) + if !ok { + log.Error(nil, fmt.Sprintf("Expected a MachineDeployment but got a %T", o)) + return nil + } + log = log.WithValues("MachineDeployment", klog.KObj(machineDeployment)) + ctx = ctrl.LoggerInto(ctx, log) + + vsphereCluster, err := util.GetVMwareVSphereClusterFromMachineDeployment(ctx, r.Client, machineDeployment) + if err != nil { + log.V(4).Error(err, "Failed to get VSphereCluster from MachineDeployment") + return nil + } + + // Can add further filters on Cluster state so that we don't keep reconciling Cluster + log.V(6).Info("Triggering VSphereCluster reconcile from MachineDeployment") + return []ctrl.Request{{ + NamespacedName: types.NamespacedName{ + Namespace: vsphereCluster.Namespace, + Name: vsphereCluster.Name, + }, + }} +} + // ZoneToVSphereClusters adds reconcile requests for VSphereClusters when Zone has an event. func (r *ClusterReconciler) ZoneToVSphereClusters(ctx context.Context, o client.Object) []reconcile.Request { log := ctrl.LoggerFrom(ctx) diff --git a/controllers/vspherecluster_controller.go b/controllers/vspherecluster_controller.go index 47d1ee9ce4..47430f3560 100644 --- a/controllers/vspherecluster_controller.go +++ b/controllers/vspherecluster_controller.go @@ -83,6 +83,10 @@ func AddClusterControllerToManager(ctx context.Context, controllerManagerCtx *ca &vmwarev1.VSphereMachine{}, handler.EnqueueRequestsFromMapFunc(reconciler.VSphereMachineToCluster), ). + Watches( + &clusterv1.MachineDeployment{}, + handler.EnqueueRequestsFromMapFunc(reconciler.MachineDeploymentToCluster), + ). WithEventFilter(predicates.ResourceNotPausedAndHasFilterLabel(mgr.GetScheme(), predicateLog, controllerManagerCtx.WatchFilterValue)) // Conditionally add a Watch for topologyv1.Zone when the feature gate is enabled diff --git a/pkg/util/cluster.go b/pkg/util/cluster.go index e0fcce7efd..1d6661081e 100644 --- a/pkg/util/cluster.go +++ b/pkg/util/cluster.go @@ -59,6 +59,36 @@ func GetVSphereClusterFromVMwareMachine(ctx context.Context, c client.Client, ma return vsphereCluster, err } +// GetVMwareVSphereClusterFromMachineDeployment gets the vmware.infrastructure.cluster.x-k8s.io.VSphereCluster resource for the given MachineDeployment$. +func GetVMwareVSphereClusterFromMachineDeployment(ctx context.Context, c client.Client, machineDeployment *clusterv1.MachineDeployment) (*vmwarev1.VSphereCluster, error) { + clusterName := machineDeployment.Labels[clusterv1.ClusterNameLabel] + if clusterName == "" { + return nil, errors.Errorf("error getting VSphereCluster name from MachineDeployment %s/%s", + machineDeployment.Namespace, machineDeployment.Name) + } + namespacedName := apitypes.NamespacedName{ + Namespace: machineDeployment.Namespace, + Name: clusterName, + } + cluster := &clusterv1.Cluster{} + if err := c.Get(ctx, namespacedName, cluster); err != nil { + return nil, err + } + + if cluster.Spec.InfrastructureRef == nil { + return nil, errors.Errorf("error getting VSphereCluster name from MachineDeployment %s/%s: Cluster.spec.infrastructureRef not yet set", + machineDeployment.Namespace, machineDeployment.Name) + } + + vsphereClusterKey := apitypes.NamespacedName{ + Namespace: machineDeployment.Namespace, + Name: cluster.Spec.InfrastructureRef.Name, + } + vsphereCluster := &vmwarev1.VSphereCluster{} + err := c.Get(ctx, vsphereClusterKey, vsphereCluster) + return vsphereCluster, err +} + // GetVSphereClusterFromVSphereMachine gets the infrastructure.cluster.x-k8s.io.VSphereCluster resource for the given VSphereMachine. func GetVSphereClusterFromVSphereMachine(ctx context.Context, c client.Client, machine *infrav1.VSphereMachine) (*infrav1.VSphereCluster, error) { clusterName := machine.Labels[clusterv1.ClusterNameLabel] From 6adac7a3c712b53734ec240238b7bbf4c3d6c236 Mon Sep 17 00:00:00 2001 From: Christian Schlotter Date: Mon, 9 Dec 2024 14:44:17 +0100 Subject: [PATCH 2/3] supervisor: create ClusterModule per MachineDeployment and re-reconcile VirtualMachineResourceSetPolicy to update VirtualMachineSetResourcePolicy --- pkg/services/vmoperator/resource_policy.go | 69 +++++++++++++++---- .../vmoperator/resource_policy_test.go | 2 +- pkg/services/vmoperator/vmopmachine.go | 42 +++++++++-- 3 files changed, 93 insertions(+), 20 deletions(-) diff --git a/pkg/services/vmoperator/resource_policy.go b/pkg/services/vmoperator/resource_policy.go index 61fd255e02..e495ae8dec 100644 --- a/pkg/services/vmoperator/resource_policy.go +++ b/pkg/services/vmoperator/resource_policy.go @@ -18,11 +18,14 @@ package vmoperator import ( "context" + "sort" "github.com/pkg/errors" vmoprv1 "github.com/vmware-tanzu/vm-operator/api/v1alpha2" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" + "sigs.k8s.io/cluster-api/util/patch" "sigs.k8s.io/controller-runtime/pkg/client" ctrlutil "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" @@ -37,15 +40,31 @@ type RPService struct { // ReconcileResourcePolicy ensures that a VirtualMachineSetResourcePolicy exists for the cluster // Returns the name of a policy if it exists, otherwise returns an error. func (s *RPService) ReconcileResourcePolicy(ctx context.Context, clusterCtx *vmware.ClusterContext) (string, error) { - resourcePolicy, err := s.getVirtualMachineSetResourcePolicy(ctx, clusterCtx) + clusterModuleGroups, err := getClusterModuleGroups(ctx, s.Client, clusterCtx.Cluster) + if err != nil { + return "", err + } + + resourcePolicy, err := getVirtualMachineSetResourcePolicy(ctx, s.Client, clusterCtx.Cluster) if err != nil { if !apierrors.IsNotFound(err) { return "", errors.Errorf("unexpected error in getting the Resource policy: %+v", err) } - resourcePolicy, err = s.createVirtualMachineSetResourcePolicy(ctx, clusterCtx) + resourcePolicy, err = s.createVirtualMachineSetResourcePolicy(ctx, clusterCtx, clusterModuleGroups) if err != nil { return "", errors.Errorf("failed to create Resource Policy: %+v", err) } + return resourcePolicy.Name, nil + } + + // Ensure .spec.clusterModuleGroups is up to date. + helper, err := patch.NewHelper(resourcePolicy, s.Client) + if err != nil { + return "", err + } + resourcePolicy.Spec.ClusterModuleGroups = clusterModuleGroups + if err := helper.Patch(ctx, resourcePolicy); err != nil { + return "", err } return resourcePolicy.Name, nil @@ -60,17 +79,17 @@ func (s *RPService) newVirtualMachineSetResourcePolicy(clusterCtx *vmware.Cluste } } -func (s *RPService) getVirtualMachineSetResourcePolicy(ctx context.Context, clusterCtx *vmware.ClusterContext) (*vmoprv1.VirtualMachineSetResourcePolicy, error) { +func getVirtualMachineSetResourcePolicy(ctx context.Context, ctrlClient client.Client, cluster *clusterv1.Cluster) (*vmoprv1.VirtualMachineSetResourcePolicy, error) { vmResourcePolicy := &vmoprv1.VirtualMachineSetResourcePolicy{} vmResourcePolicyName := client.ObjectKey{ - Namespace: clusterCtx.Cluster.Namespace, - Name: clusterCtx.Cluster.Name, + Namespace: cluster.Namespace, + Name: cluster.Name, } - err := s.Client.Get(ctx, vmResourcePolicyName, vmResourcePolicy) + err := ctrlClient.Get(ctx, vmResourcePolicyName, vmResourcePolicy) return vmResourcePolicy, err } -func (s *RPService) createVirtualMachineSetResourcePolicy(ctx context.Context, clusterCtx *vmware.ClusterContext) (*vmoprv1.VirtualMachineSetResourcePolicy, error) { +func (s *RPService) createVirtualMachineSetResourcePolicy(ctx context.Context, clusterCtx *vmware.ClusterContext, clusterModuleGroups []string) (*vmoprv1.VirtualMachineSetResourcePolicy, error) { vmResourcePolicy := s.newVirtualMachineSetResourcePolicy(clusterCtx) _, err := ctrlutil.CreateOrPatch(ctx, s.Client, vmResourcePolicy, func() error { @@ -78,11 +97,8 @@ func (s *RPService) createVirtualMachineSetResourcePolicy(ctx context.Context, c ResourcePool: vmoprv1.ResourcePoolSpec{ Name: clusterCtx.Cluster.Name, }, - Folder: clusterCtx.Cluster.Name, - ClusterModuleGroups: []string{ - ControlPlaneVMClusterModuleGroupName, - getMachineDeploymentNameForCluster(clusterCtx.Cluster), - }, + Folder: clusterCtx.Cluster.Name, + ClusterModuleGroups: clusterModuleGroups, } // Ensure that the VirtualMachineSetResourcePolicy is owned by the VSphereCluster if err := ctrlutil.SetOwnerReference( @@ -106,3 +122,32 @@ func (s *RPService) createVirtualMachineSetResourcePolicy(ctx context.Context, c } return vmResourcePolicy, nil } + +func getClusterModuleGroups(ctx context.Context, ctrlClient client.Client, cluster *clusterv1.Cluster) ([]string, error) { + machineDeploymentNames, err := getMachineDeploymentNamesForCluster(ctx, ctrlClient, cluster) + if err != nil { + return nil, err + } + + clusterModuleGroups := append([]string{ControlPlaneVMClusterModuleGroupName}, machineDeploymentNames...) + + // sort elements to have deterministic output. + sort.Strings(clusterModuleGroups) + + return clusterModuleGroups, nil +} + +func checkClusterModuleGroup(ctx context.Context, ctrlClient client.Client, cluster *clusterv1.Cluster, clusterModuleGroupName string) error { + resourcePolicy, err := getVirtualMachineSetResourcePolicy(ctx, ctrlClient, cluster) + if err != nil { + return err + } + + for _, cm := range resourcePolicy.Status.ClusterModules { + if cm.GroupName == clusterModuleGroupName { + return nil + } + } + + return errors.Errorf("VirtualMachineSetResourcePolicy's .status.clusterModules does not yet contain %s", clusterModuleGroupName) +} diff --git a/pkg/services/vmoperator/resource_policy_test.go b/pkg/services/vmoperator/resource_policy_test.go index b2f0529057..dfac7a1528 100644 --- a/pkg/services/vmoperator/resource_policy_test.go +++ b/pkg/services/vmoperator/resource_policy_test.go @@ -44,7 +44,7 @@ func TestRPService(t *testing.T) { g.Expect(err).NotTo(HaveOccurred()) g.Expect(name).To(Equal(clusterName)) - resourcePolicy, err := rpService.getVirtualMachineSetResourcePolicy(ctx, clusterCtx) + resourcePolicy, err := getVirtualMachineSetResourcePolicy(ctx, controllerCtx.Client, clusterCtx.Cluster) g.Expect(err).NotTo(HaveOccurred()) g.Expect(resourcePolicy.Spec.ResourcePool.Name).To(Equal(clusterName)) g.Expect(resourcePolicy.Spec.Folder).To(Equal(clusterName)) diff --git a/pkg/services/vmoperator/vmopmachine.go b/pkg/services/vmoperator/vmopmachine.go index de72429b9b..99076eafd3 100644 --- a/pkg/services/vmoperator/vmopmachine.go +++ b/pkg/services/vmoperator/vmopmachine.go @@ -475,7 +475,9 @@ func (v *VmopMachineService) reconcileVMOperatorVM(ctx context.Context, supervis // Assign the VM's labels. vmOperatorVM.Labels = getVMLabels(supervisorMachineCtx, vmOperatorVM.Labels) - addResourcePolicyAnnotations(supervisorMachineCtx, vmOperatorVM) + if err := addResourcePolicyAnnotations(ctx, v.Client, supervisorMachineCtx, vmOperatorVM); err != nil { + return err + } if err := v.addVolumes(ctx, supervisorMachineCtx, vmOperatorVM); err != nil { return err @@ -580,7 +582,7 @@ func (v *VmopMachineService) getVirtualMachinesInCluster(ctx context.Context, su // Helper function to add annotations to indicate which tag vm-operator should add as well as which clusterModule VM // should be associated. -func addResourcePolicyAnnotations(supervisorMachineCtx *vmware.SupervisorMachineContext, vm *vmoprv1.VirtualMachine) { +func addResourcePolicyAnnotations(ctx context.Context, ctrlClient client.Client, supervisorMachineCtx *vmware.SupervisorMachineContext, vm *vmoprv1.VirtualMachine) error { annotations := vm.ObjectMeta.GetAnnotations() if annotations == nil { annotations = make(map[string]string) @@ -591,10 +593,17 @@ func addResourcePolicyAnnotations(supervisorMachineCtx *vmware.SupervisorMachine annotations[ClusterModuleNameAnnotationKey] = ControlPlaneVMClusterModuleGroupName } else { annotations[ProviderTagsAnnotationKey] = WorkerVMVMAntiAffinityTagValue - annotations[ClusterModuleNameAnnotationKey] = getMachineDeploymentNameForCluster(supervisorMachineCtx.Cluster) + clusterModuleName := getMachineDeploymentNameForMachine(supervisorMachineCtx.Machine) + + if err := checkClusterModuleGroup(ctx, ctrlClient, supervisorMachineCtx.Cluster, clusterModuleName); err != nil { + return err + } + + annotations[ClusterModuleNameAnnotationKey] = clusterModuleName } vm.ObjectMeta.SetAnnotations(annotations) + return nil } func volumeName(machine *vmwarev1.VSphereMachine, volume vmwarev1.VSphereMachineVolume) string { @@ -742,8 +751,27 @@ func getTopologyLabels(supervisorMachineCtx *vmware.SupervisorMachineContext) ma return nil } -// getMachineDeploymentName returns the MachineDeployment name for a Cluster. -// This is also the name used by VSphereMachineTemplate and KubeadmConfigTemplate. -func getMachineDeploymentNameForCluster(cluster *clusterv1.Cluster) string { - return fmt.Sprintf("%s-workers-0", cluster.Name) +func getMachineDeploymentNameForMachine(machine *clusterv1.Machine) string { + if mdName, ok := machine.Labels[clusterv1.MachineDeploymentNameLabel]; ok { + return mdName + } + return "" +} + +func getMachineDeploymentNamesForCluster(ctx context.Context, ctrlClient client.Client, cluster *clusterv1.Cluster) ([]string, error) { + mdNames := []string{} + labels := map[string]string{clusterv1.ClusterNameLabel: cluster.GetName()} + mdList := &clusterv1.MachineDeploymentList{} + if err := ctrlClient.List( + ctx, mdList, + client.InNamespace(cluster.GetNamespace()), + client.MatchingLabels(labels)); err != nil { + return nil, errors.Wrapf(err, "failed to list MachineDeployment objects") + } + for _, md := range mdList.Items { + if md.DeletionTimestamp.IsZero() { + mdNames = append(mdNames, md.Name) + } + } + return mdNames, nil } From e9836c6f67c4069c5c0243c93ceb3655c344cbf8 Mon Sep 17 00:00:00 2001 From: Christian Schlotter Date: Mon, 9 Dec 2024 15:14:30 +0100 Subject: [PATCH 3/3] fix test to expect only one cluster module due to md modules created on demand --- controllers/vmware/test/controllers_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/controllers/vmware/test/controllers_test.go b/controllers/vmware/test/controllers_test.go index 08a075b6c6..e871d0c8b6 100644 --- a/controllers/vmware/test/controllers_test.go +++ b/controllers/vmware/test/controllers_test.go @@ -473,7 +473,7 @@ var _ = Describe("Reconciliation tests", func() { Eventually(func() error { return k8sClient.Get(ctx, rpKey, resourcePolicy) }, time.Second*30).Should(Succeed()) - Expect(len(resourcePolicy.Spec.ClusterModuleGroups)).To(BeEquivalentTo(2)) + Expect(len(resourcePolicy.Spec.ClusterModuleGroups)).To(BeEquivalentTo(1)) By("Create the CAPI Machine and wait for it to exist") machineKey, machine := deployCAPIMachine(ns.Name, cluster, k8sClient)