diff --git a/exp/controllers/awsmachinepool_controller.go b/exp/controllers/awsmachinepool_controller.go index 275f1b07a2..f85d6a37c3 100644 --- a/exp/controllers/awsmachinepool_controller.go +++ b/exp/controllers/awsmachinepool_controller.go @@ -19,10 +19,7 @@ package controllers import ( "context" "fmt" - "time" - "github.com/aws/aws-sdk-go/aws/awserr" - "github.com/aws/aws-sdk-go/service/autoscaling" "github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp/cmpopts" "github.com/pkg/errors" @@ -195,13 +192,13 @@ func (r *AWSMachinePoolReconciler) Reconcile(ctx context.Context, req ctrl.Reque return ctrl.Result{}, r.reconcileDelete(machinePoolScope, infraScope, infraScope) } - return ctrl.Result{}, r.reconcileNormal(ctx, machinePoolScope, infraScope, infraScope, s3Scope) + return r.reconcileNormal(ctx, machinePoolScope, infraScope, infraScope, s3Scope) case *scope.ClusterScope: if !awsMachinePool.ObjectMeta.DeletionTimestamp.IsZero() { return ctrl.Result{}, r.reconcileDelete(machinePoolScope, infraScope, infraScope) } - return ctrl.Result{}, r.reconcileNormal(ctx, machinePoolScope, infraScope, infraScope, s3Scope) + return r.reconcileNormal(ctx, machinePoolScope, infraScope, infraScope, s3Scope) default: return ctrl.Result{}, errors.New("infraCluster has unknown type") } @@ -219,7 +216,7 @@ func (r *AWSMachinePoolReconciler) SetupWithManager(ctx context.Context, mgr ctr Complete(r) } -func (r *AWSMachinePoolReconciler) reconcileNormal(ctx context.Context, machinePoolScope *scope.MachinePoolScope, clusterScope cloud.ClusterScoper, ec2Scope scope.EC2Scope, s3Scope scope.S3Scope) error { +func (r *AWSMachinePoolReconciler) reconcileNormal(ctx context.Context, machinePoolScope *scope.MachinePoolScope, clusterScope cloud.ClusterScoper, ec2Scope scope.EC2Scope, s3Scope scope.S3Scope) (ctrl.Result, error) { clusterScope.Info("Reconciling AWSMachinePool") // If the AWSMachine is in an error state, return early. @@ -228,28 +225,28 @@ func (r *AWSMachinePoolReconciler) reconcileNormal(ctx context.Context, machineP // TODO: If we are in a failed state, delete the secret regardless of instance state - return nil + return ctrl.Result{}, nil } // If the AWSMachinepool doesn't have our finalizer, add it if controllerutil.AddFinalizer(machinePoolScope.AWSMachinePool, expinfrav1.MachinePoolFinalizer) { // Register finalizer immediately to avoid orphaning AWS resources if err := machinePoolScope.PatchObject(); err != nil { - return err + return ctrl.Result{}, err } } if !machinePoolScope.Cluster.Status.InfrastructureReady { machinePoolScope.Info("Cluster infrastructure is not ready yet") conditions.MarkFalse(machinePoolScope.AWSMachinePool, expinfrav1.ASGReadyCondition, infrav1.WaitingForClusterInfrastructureReason, clusterv1.ConditionSeverityInfo, "") - return nil + return ctrl.Result{}, nil } // Make sure bootstrap data is available and populated if machinePoolScope.MachinePool.Spec.Template.Spec.Bootstrap.DataSecretName == nil { machinePoolScope.Info("Bootstrap data secret reference is not yet available") conditions.MarkFalse(machinePoolScope.AWSMachinePool, expinfrav1.ASGReadyCondition, infrav1.WaitingForBootstrapDataReason, clusterv1.ConditionSeverityInfo, "") - return nil + return ctrl.Result{}, nil } ec2Svc := r.getEC2Service(ec2Scope) @@ -261,7 +258,7 @@ func (r *AWSMachinePoolReconciler) reconcileNormal(ctx context.Context, machineP asg, err := r.findASG(machinePoolScope, asgsvc) if err != nil { conditions.MarkUnknown(machinePoolScope.AWSMachinePool, expinfrav1.ASGReadyCondition, expinfrav1.ASGNotFoundReason, err.Error()) - return err + return ctrl.Result{}, err } canUpdateLaunchTemplate := func() (bool, error) { @@ -279,16 +276,16 @@ func (r *AWSMachinePoolReconciler) reconcileNormal(ctx context.Context, machineP machinePoolScope.Info("cancelling instance refresh") return asgsvc.CancelASGInstanceRefresh(machinePoolScope) } - runPostLaunchTemplateUpdateOperation := func() (*ctrl.Result, error) { + runPostLaunchTemplateUpdateOperation := func() error { // skip instance refresh if ASG is not created yet if asg == nil { machinePoolScope.Debug("ASG does not exist yet, skipping instance refresh") - return nil, nil + return nil } // skip instance refresh if explicitly disabled if machinePoolScope.AWSMachinePool.Spec.RefreshPreferences != nil && machinePoolScope.AWSMachinePool.Spec.RefreshPreferences.Disable { machinePoolScope.Debug("instance refresh disabled, skipping instance refresh") - return nil, nil + return nil } // After creating a new version of launch template, instance refresh is required // to trigger a rolling replacement of all previously launched instances. @@ -300,22 +297,16 @@ func (r *AWSMachinePoolReconciler) reconcileNormal(ctx context.Context, machineP // Launch Template version, and the difference between the older and current versions is _more_ // than userdata, we should start an Instance Refresh. machinePoolScope.Info("starting instance refresh", "number of instances", machinePoolScope.MachinePool.Spec.Replicas) - err := asgsvc.StartASGInstanceRefresh(machinePoolScope) - var aerr awserr.Error - if err != nil && errors.As(err, &aerr) && aerr.Code() == autoscaling.ErrCodeInstanceRefreshInProgressFault { - // This can happen, for instance, if we cancelled the previous - // instance refresh just now, and it's not in `Cancelled` state yet, - // meaning no new refresh can be started. Delay reconciliation - // for a bit. - machinePoolScope.Info("Previous instance refresh not finished/cancelled yet, cannot start another one") - return &ctrl.Result{RequeueAfter: 30 * time.Second}, nil - } - return nil, err + return asgsvc.StartASGInstanceRefresh(machinePoolScope) } - if err := reconSvc.ReconcileLaunchTemplate(machinePoolScope, machinePoolScope, s3Scope, ec2Svc, objectStoreSvc, canUpdateLaunchTemplate, cancelInstanceRefresh, runPostLaunchTemplateUpdateOperation); err != nil { + res, err := reconSvc.ReconcileLaunchTemplate(machinePoolScope, machinePoolScope, s3Scope, ec2Svc, objectStoreSvc, canUpdateLaunchTemplate, cancelInstanceRefresh, runPostLaunchTemplateUpdateOperation) + if err != nil { r.Recorder.Eventf(machinePoolScope.AWSMachinePool, corev1.EventTypeWarning, "FailedLaunchTemplateReconcile", "Failed to reconcile launch template: %v", err) machinePoolScope.Error(err, "failed to reconcile launch template") - return err + return ctrl.Result{}, err + } + if res != nil { + return *res, nil } // set the LaunchTemplateReady condition @@ -325,9 +316,9 @@ func (r *AWSMachinePoolReconciler) reconcileNormal(ctx context.Context, machineP // Create new ASG if err := r.createPool(machinePoolScope, clusterScope); err != nil { conditions.MarkFalse(machinePoolScope.AWSMachinePool, expinfrav1.ASGReadyCondition, expinfrav1.ASGProvisionFailedReason, clusterv1.ConditionSeverityError, err.Error()) - return err + return ctrl.Result{}, err } - return nil + return ctrl.Result{}, nil } if annotations.ReplicasManagedByExternalAutoscaler(machinePoolScope.MachinePool) { @@ -338,14 +329,14 @@ func (r *AWSMachinePoolReconciler) reconcileNormal(ctx context.Context, machineP "external", asg.DesiredCapacity) machinePoolScope.MachinePool.Spec.Replicas = asg.DesiredCapacity if err := machinePoolScope.PatchCAPIMachinePoolObject(ctx); err != nil { - return err + return ctrl.Result{}, err } } } if err := r.updatePool(machinePoolScope, clusterScope, asg); err != nil { machinePoolScope.Error(err, "error updating AWSMachinePool") - return err + return ctrl.Result{}, err } launchTemplateID := machinePoolScope.GetLaunchTemplateIDStatus() @@ -362,7 +353,7 @@ func (r *AWSMachinePoolReconciler) reconcileNormal(ctx context.Context, machineP } err = reconSvc.ReconcileTags(machinePoolScope, resourceServiceToUpdate) if err != nil { - return errors.Wrap(err, "error updating tags") + return ctrl.Result{}, errors.Wrap(err, "error updating tags") } // Make sure Spec.ProviderID is always set. @@ -385,7 +376,7 @@ func (r *AWSMachinePoolReconciler) reconcileNormal(ctx context.Context, machineP machinePoolScope.Error(err, "failed updating instances", "instances", asg.Instances) } - return nil + return ctrl.Result{}, nil } func (r *AWSMachinePoolReconciler) reconcileDelete(machinePoolScope *scope.MachinePoolScope, clusterScope cloud.ClusterScoper, ec2Scope scope.EC2Scope) error { diff --git a/exp/controllers/awsmachinepool_controller_test.go b/exp/controllers/awsmachinepool_controller_test.go index eb78652cc8..abdd0a5584 100644 --- a/exp/controllers/awsmachinepool_controller_test.go +++ b/exp/controllers/awsmachinepool_controller_test.go @@ -211,7 +211,7 @@ func TestAWSMachinePoolReconciler(t *testing.T) { buf := new(bytes.Buffer) klog.SetOutput(buf) - _ = reconciler.reconcileNormal(context.Background(), ms, cs, cs, cs) + _, _ = reconciler.reconcileNormal(context.Background(), ms, cs, cs, cs) g.Expect(buf).To(ContainSubstring("Error state detected, skipping reconciliation")) }) t.Run("should add our finalizer to the machinepool", func(t *testing.T) { @@ -220,7 +220,7 @@ func TestAWSMachinePoolReconciler(t *testing.T) { defer teardown(t, g) getASG(t, g) - _ = reconciler.reconcileNormal(context.Background(), ms, cs, cs, cs) + _, _ = reconciler.reconcileNormal(context.Background(), ms, cs, cs, cs) g.Expect(ms.AWSMachinePool.Finalizers).To(ContainElement(expinfrav1.MachinePoolFinalizer)) }) @@ -235,7 +235,7 @@ func TestAWSMachinePoolReconciler(t *testing.T) { buf := new(bytes.Buffer) klog.SetOutput(buf) - err := reconciler.reconcileNormal(context.Background(), ms, cs, cs, cs) + _, err := reconciler.reconcileNormal(context.Background(), ms, cs, cs, cs) g.Expect(err).To(BeNil()) g.Expect(buf.String()).To(ContainSubstring("Cluster infrastructure is not ready yet")) expectConditions(g, ms.AWSMachinePool, []conditionAssertion{{expinfrav1.ASGReadyCondition, corev1.ConditionFalse, clusterv1.ConditionSeverityInfo, infrav1.WaitingForClusterInfrastructureReason}}) @@ -250,7 +250,7 @@ func TestAWSMachinePoolReconciler(t *testing.T) { buf := new(bytes.Buffer) klog.SetOutput(buf) - err := reconciler.reconcileNormal(context.Background(), ms, cs, cs, cs) + _, err := reconciler.reconcileNormal(context.Background(), ms, cs, cs, cs) g.Expect(err).To(BeNil()) g.Expect(buf.String()).To(ContainSubstring("Bootstrap data secret reference is not yet available")) @@ -277,8 +277,8 @@ func TestAWSMachinePoolReconciler(t *testing.T) { getASG(t, g) expectedErr := errors.New("no connection available ") - reconSvc.EXPECT().ReconcileLaunchTemplate(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(expectedErr) - err := reconciler.reconcileNormal(context.Background(), ms, cs, cs, cs) + reconSvc.EXPECT().ReconcileLaunchTemplate(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(nil, expectedErr) + _, err := reconciler.reconcileNormal(context.Background(), ms, cs, cs, cs) g.Expect(errors.Cause(err)).To(MatchError(expectedErr)) }) }) @@ -298,14 +298,14 @@ func TestAWSMachinePoolReconciler(t *testing.T) { defer teardown(t, g) setSuspendedProcesses(t, g) - reconSvc.EXPECT().ReconcileLaunchTemplate(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(nil) + reconSvc.EXPECT().ReconcileLaunchTemplate(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(nil, nil) asgSvc.EXPECT().GetASGByName(gomock.Any()).Return(nil, nil) asgSvc.EXPECT().CreateASG(gomock.Any()).Return(&expinfrav1.AutoScalingGroup{ Name: "name", }, nil) asgSvc.EXPECT().SuspendProcesses("name", []string{"Launch", "Terminate"}).Return(nil).AnyTimes().Times(0) - err := reconciler.reconcileNormal(context.Background(), ms, cs, cs, cs) + _, err := reconciler.reconcileNormal(context.Background(), ms, cs, cs, cs) g.Expect(err).To(Succeed()) }) }) @@ -322,7 +322,7 @@ func TestAWSMachinePoolReconciler(t *testing.T) { defer teardown(t, g) setSuspendedProcesses(t, g) ms.AWSMachinePool.Spec.SuspendProcesses.All = true - reconSvc.EXPECT().ReconcileLaunchTemplate(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(nil) + reconSvc.EXPECT().ReconcileLaunchTemplate(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(nil, nil) reconSvc.EXPECT().ReconcileTags(gomock.Any(), gomock.Any()).Return(nil) asgSvc.EXPECT().GetASGByName(gomock.Any()).Return(&expinfrav1.AutoScalingGroup{ Name: "name", @@ -341,7 +341,7 @@ func TestAWSMachinePoolReconciler(t *testing.T) { "ReplaceUnhealthy", })).Return(nil).AnyTimes().Times(1) - err := reconciler.reconcileNormal(context.Background(), ms, cs, cs, cs) + _, err := reconciler.reconcileNormal(context.Background(), ms, cs, cs, cs) g.Expect(err).To(Succeed()) }) }) @@ -362,7 +362,7 @@ func TestAWSMachinePoolReconciler(t *testing.T) { defer teardown(t, g) setSuspendedProcesses(t, g) - reconSvc.EXPECT().ReconcileLaunchTemplate(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(nil) + reconSvc.EXPECT().ReconcileLaunchTemplate(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(nil, nil) reconSvc.EXPECT().ReconcileTags(gomock.Any(), gomock.Any()).Return(nil) asgSvc.EXPECT().GetASGByName(gomock.Any()).Return(&expinfrav1.AutoScalingGroup{ Name: "name", @@ -373,7 +373,7 @@ func TestAWSMachinePoolReconciler(t *testing.T) { asgSvc.EXPECT().SuspendProcesses("name", []string{"Terminate"}).Return(nil).AnyTimes().Times(1) asgSvc.EXPECT().ResumeProcesses("name", []string{"process3"}).Return(nil).AnyTimes().Times(1) - err := reconciler.reconcileNormal(context.Background(), ms, cs, cs, cs) + _, err := reconciler.reconcileNormal(context.Background(), ms, cs, cs, cs) g.Expect(err).To(Succeed()) }) }) @@ -387,7 +387,7 @@ func TestAWSMachinePoolReconciler(t *testing.T) { Name: "an-asg", DesiredCapacity: pointer.Int32(1), } - reconSvc.EXPECT().ReconcileLaunchTemplate(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(nil) + reconSvc.EXPECT().ReconcileLaunchTemplate(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(nil, nil) asgSvc.EXPECT().GetASGByName(gomock.Any()).Return(&asg, nil) asgSvc.EXPECT().SubnetIDs(gomock.Any()).Return([]string{}, nil) asgSvc.EXPECT().UpdateASG(gomock.Any()).Return(nil) @@ -400,7 +400,7 @@ func TestAWSMachinePoolReconciler(t *testing.T) { g.Expect(testEnv.Create(ctx, ms.MachinePool)).To(Succeed()) - _ = reconciler.reconcileNormal(context.Background(), ms, cs, cs, cs) + _, _ = reconciler.reconcileNormal(context.Background(), ms, cs, cs, cs) g.Expect(*ms.MachinePool.Spec.Replicas).To(Equal(int32(1))) }) t.Run("No need to update Asg because asgNeedsUpdates is false and no subnets change", func(t *testing.T) { @@ -425,13 +425,13 @@ func TestAWSMachinePoolReconciler(t *testing.T) { }, }, Subnets: []string{"subnet1", "subnet2"}} - reconSvc.EXPECT().ReconcileLaunchTemplate(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(nil) + reconSvc.EXPECT().ReconcileLaunchTemplate(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(nil, nil) reconSvc.EXPECT().ReconcileTags(gomock.Any(), gomock.Any()).Return(nil) asgSvc.EXPECT().GetASGByName(gomock.Any()).Return(&asg, nil).AnyTimes() asgSvc.EXPECT().SubnetIDs(gomock.Any()).Return([]string{"subnet2", "subnet1"}, nil).Times(1) asgSvc.EXPECT().UpdateASG(gomock.Any()).Return(nil).Times(0) - err := reconciler.reconcileNormal(context.Background(), ms, cs, cs, cs) + _, err := reconciler.reconcileNormal(context.Background(), ms, cs, cs, cs) g.Expect(err).To(Succeed()) }) t.Run("update Asg due to subnet changes", func(t *testing.T) { @@ -443,13 +443,13 @@ func TestAWSMachinePoolReconciler(t *testing.T) { MinSize: int32(0), MaxSize: int32(100), Subnets: []string{"subnet1", "subnet2"}} - reconSvc.EXPECT().ReconcileLaunchTemplate(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(nil) + reconSvc.EXPECT().ReconcileLaunchTemplate(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(nil, nil) reconSvc.EXPECT().ReconcileTags(gomock.Any(), gomock.Any()).Return(nil) asgSvc.EXPECT().GetASGByName(gomock.Any()).Return(&asg, nil).AnyTimes() asgSvc.EXPECT().SubnetIDs(gomock.Any()).Return([]string{"subnet1"}, nil).Times(1) asgSvc.EXPECT().UpdateASG(gomock.Any()).Return(nil).Times(1) - err := reconciler.reconcileNormal(context.Background(), ms, cs, cs, cs) + _, err := reconciler.reconcileNormal(context.Background(), ms, cs, cs, cs) g.Expect(err).To(Succeed()) }) t.Run("update Asg due to asgNeedsUpdates returns true", func(t *testing.T) { @@ -461,13 +461,13 @@ func TestAWSMachinePoolReconciler(t *testing.T) { MinSize: int32(0), MaxSize: int32(2), Subnets: []string{}} - reconSvc.EXPECT().ReconcileLaunchTemplate(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(nil) + reconSvc.EXPECT().ReconcileLaunchTemplate(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(nil, nil) reconSvc.EXPECT().ReconcileTags(gomock.Any(), gomock.Any()).Return(nil) asgSvc.EXPECT().GetASGByName(gomock.Any()).Return(&asg, nil).AnyTimes() asgSvc.EXPECT().SubnetIDs(gomock.Any()).Return([]string{}, nil).Times(1) asgSvc.EXPECT().UpdateASG(gomock.Any()).Return(nil).Times(1) - err := reconciler.reconcileNormal(context.Background(), ms, cs, cs, cs) + _, err := reconciler.reconcileNormal(context.Background(), ms, cs, cs, cs) g.Expect(err).To(Succeed()) }) @@ -492,7 +492,7 @@ func TestAWSMachinePoolReconciler(t *testing.T) { }, nil }) - err := reconciler.reconcileNormal(context.Background(), ms, cs, cs, cs) + _, err := reconciler.reconcileNormal(context.Background(), ms, cs, cs, cs) g.Expect(err).To(Succeed()) }) @@ -533,7 +533,7 @@ func TestAWSMachinePoolReconciler(t *testing.T) { // No changes, so there must not be an ASG update! asgSvc.EXPECT().UpdateASG(gomock.Any()).Times(0) - err := reconciler.reconcileNormal(context.Background(), ms, cs, cs, cs) + _, err := reconciler.reconcileNormal(context.Background(), ms, cs, cs, cs) g.Expect(err).To(Succeed()) }) @@ -580,7 +580,7 @@ func TestAWSMachinePoolReconciler(t *testing.T) { // No changes, so there must not be an ASG update! asgSvc.EXPECT().UpdateASG(gomock.Any()).Times(0) - err := reconciler.reconcileNormal(context.Background(), ms, cs, cs, cs) + _, err := reconciler.reconcileNormal(context.Background(), ms, cs, cs, cs) g.Expect(err).To(Succeed()) }) @@ -630,7 +630,7 @@ func TestAWSMachinePoolReconciler(t *testing.T) { // No changes, so there must not be an ASG update! asgSvc.EXPECT().UpdateASG(gomock.Any()).Times(0) - err := reconciler.reconcileNormal(context.Background(), ms, cs, cs, cs) + _, err := reconciler.reconcileNormal(context.Background(), ms, cs, cs, cs) g.Expect(err).To(Succeed()) }) @@ -646,7 +646,7 @@ func TestAWSMachinePoolReconciler(t *testing.T) { }, nil }) - err := reconciler.reconcileNormal(context.Background(), ms, cs, cs, cs) + _, err := reconciler.reconcileNormal(context.Background(), ms, cs, cs, cs) g.Expect(err).To(Succeed()) g.Expect(ms.AWSMachinePool.Status.LaunchTemplateID).ToNot(BeEmpty()) @@ -705,7 +705,7 @@ func TestAWSMachinePoolReconciler(t *testing.T) { // No changes, so there must not be an ASG update! asgSvc.EXPECT().UpdateASG(gomock.Any()).Times(0) - err = reconciler.reconcileNormal(context.Background(), ms, cs, cs, cs) + _, err = reconciler.reconcileNormal(context.Background(), ms, cs, cs, cs) g.Expect(err).To(Succeed()) }) }) diff --git a/exp/controllers/awsmanagedmachinepool_controller.go b/exp/controllers/awsmanagedmachinepool_controller.go index 5ead563781..e3264d22cf 100644 --- a/exp/controllers/awsmanagedmachinepool_controller.go +++ b/exp/controllers/awsmanagedmachinepool_controller.go @@ -218,8 +218,8 @@ func (r *AWSManagedMachinePoolReconciler) reconcileNormal( cancelInstanceRefresh := func() error { return nil } - runPostLaunchTemplateUpdateOperation := func() (*ctrl.Result, error) { - return nil, nil + runPostLaunchTemplateUpdateOperation := func() error { + return nil } var objectStoreSvc services.ObjectStoreInterface = nil // no S3 bucket support for `AWSManagedControlPlane` yet res, err := reconSvc.ReconcileLaunchTemplate(machinePoolScope, machinePoolScope, s3Scope, ec2svc, objectStoreSvc, canUpdateLaunchTemplate, cancelInstanceRefresh, runPostLaunchTemplateUpdateOperation) diff --git a/pkg/cloud/services/ec2/launchtemplate.go b/pkg/cloud/services/ec2/launchtemplate.go index 9f1bcb5450..5528a48fcb 100644 --- a/pkg/cloud/services/ec2/launchtemplate.go +++ b/pkg/cloud/services/ec2/launchtemplate.go @@ -24,6 +24,7 @@ import ( "sort" "strconv" "strings" + "time" "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/service/ec2" @@ -70,7 +71,7 @@ func (s *Service) ReconcileLaunchTemplate( objectStoreSvc services.ObjectStoreInterface, canUpdateLaunchTemplate func() (bool, error), cancelInstanceRefresh func() error, - runPostLaunchTemplateUpdateOperation func() (*ctrl.Result, error), + runPostLaunchTemplateUpdateOperation func() error, ) (*ctrl.Result, error) { fmt.Printf("ANDI reconcile launch template\n") bootstrapData, bootstrapDataFormat, bootstrapDataSecretKey, err := scope.GetRawBootstrapData() @@ -238,6 +239,14 @@ func (s *Service) ReconcileLaunchTemplate( if err != nil { return nil, err } + + // Until the previous instance refresh goes into `Cancelled` state + // asynchronously, allowing another refresh to be started, + // defer the reconciliation. Otherwise, we get an + // `ErrCodeInstanceRefreshInProgressFault` error if we tried to + // start an instance refresh immediately. + scope.Info("Cancelled previous instance refresh, delaying reconciliation until the next one can be started") + return &ctrl.Result{RequeueAfter: 30 * time.Second}, nil } } @@ -268,14 +277,11 @@ func (s *Service) ReconcileLaunchTemplate( } if needsUpdate || tagsChanged || amiChanged || userDataSecretKeyChanged { - res, err := runPostLaunchTemplateUpdateOperation() + err := runPostLaunchTemplateUpdateOperation() if err != nil { conditions.MarkFalse(scope.GetSetter(), expinfrav1.PostLaunchTemplateUpdateOperationCondition, expinfrav1.PostLaunchTemplateUpdateOperationFailedReason, clusterv1.ConditionSeverityError, err.Error()) return nil, err } - if res != nil { - return res, nil - } conditions.MarkTrue(scope.GetSetter(), expinfrav1.PostLaunchTemplateUpdateOperationCondition) } diff --git a/pkg/cloud/services/interfaces.go b/pkg/cloud/services/interfaces.go index ed971e8d02..dbcf90beaa 100644 --- a/pkg/cloud/services/interfaces.go +++ b/pkg/cloud/services/interfaces.go @@ -22,6 +22,7 @@ import ( infrav1 "sigs.k8s.io/cluster-api-provider-aws/v2/api/v1beta2" expinfrav1 "sigs.k8s.io/cluster-api-provider-aws/v2/exp/api/v1beta2" "sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud/scope" + ctrl "sigs.k8s.io/controller-runtime" ) const ( @@ -85,7 +86,7 @@ type EC2Interface interface { // separate from EC2Interface so that we can mock AWS requests separately. For example, by not mocking the // ReconcileLaunchTemplate function, but mocking EC2Interface, we can test which EC2 API operations would have been called. type MachinePoolReconcileInterface interface { - ReconcileLaunchTemplate(ignitionScope scope.IgnitionScope, scope scope.LaunchTemplateScope, s3Scope scope.S3Scope, ec2svc EC2Interface, objectStoreSvc ObjectStoreInterface, canUpdateLaunchTemplate func() (bool, error), cancelInstanceRefresh func() error, runPostLaunchTemplateUpdateOperation func() (*ctrl.Result, error)) (*ctrl.Result, error) + ReconcileLaunchTemplate(ignitionScope scope.IgnitionScope, scope scope.LaunchTemplateScope, s3Scope scope.S3Scope, ec2svc EC2Interface, objectStoreSvc ObjectStoreInterface, canUpdateLaunchTemplate func() (bool, error), cancelInstanceRefresh func() error, runPostLaunchTemplateUpdateOperation func() error) (*ctrl.Result, error) ReconcileTags(scope scope.LaunchTemplateScope, resourceServicesToUpdate []scope.ResourceServiceToUpdate) error } diff --git a/pkg/cloud/services/mock_services/reconcile_interface_mock.go b/pkg/cloud/services/mock_services/reconcile_interface_mock.go index bfe92d02a1..87b96e0cb8 100644 --- a/pkg/cloud/services/mock_services/reconcile_interface_mock.go +++ b/pkg/cloud/services/mock_services/reconcile_interface_mock.go @@ -26,6 +26,7 @@ import ( gomock "github.com/golang/mock/gomock" scope "sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud/scope" services "sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud/services" + reconcile "sigs.k8s.io/controller-runtime/pkg/reconcile" ) // MockMachinePoolReconcileInterface is a mock of MachinePoolReconcileInterface interface. @@ -52,11 +53,12 @@ func (m *MockMachinePoolReconcileInterface) EXPECT() *MockMachinePoolReconcileIn } // ReconcileLaunchTemplate mocks base method. -func (m *MockMachinePoolReconcileInterface) ReconcileLaunchTemplate(arg0 scope.IgnitionScope, arg1 scope.LaunchTemplateScope, arg2 scope.S3Scope, arg3 services.EC2Interface, arg4 services.ObjectStoreInterface, arg5 func() (bool, error), arg6, arg7 func() error) error { +func (m *MockMachinePoolReconcileInterface) ReconcileLaunchTemplate(arg0 scope.IgnitionScope, arg1 scope.LaunchTemplateScope, arg2 scope.S3Scope, arg3 services.EC2Interface, arg4 services.ObjectStoreInterface, arg5 func() (bool, error), arg6, arg7 func() error) (*reconcile.Result, error) { m.ctrl.T.Helper() ret := m.ctrl.Call(m, "ReconcileLaunchTemplate", arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7) - ret0, _ := ret[0].(error) - return ret0 + ret0, _ := ret[0].(*reconcile.Result) + ret1, _ := ret[1].(error) + return ret0, ret1 } // ReconcileLaunchTemplate indicates an expected call of ReconcileLaunchTemplate.