From a649112dce8b220d442008e4606111a23456393d Mon Sep 17 00:00:00 2001
From: Jose Armesto <github@armesto.net>
Date: Tue, 8 Oct 2024 10:13:32 +0200
Subject: [PATCH] Support setting maxHealthyPercentage to configure ASG
 instance refresh

---
 ...ture.cluster.x-k8s.io_awsmachinepools.yaml | 16 +++++++
 exp/api/v1beta1/conversion.go                 |  3 +-
 exp/api/v1beta1/zz_generated.conversion.go    |  1 +
 exp/api/v1beta2/awsmachinepool_types.go       | 13 ++++++
 exp/api/v1beta2/awsmachinepool_webhook.go     | 23 ++++++++++
 .../v1beta2/awsmachinepool_webhook_test.go    | 42 +++++++++++++++++++
 exp/api/v1beta2/zz_generated.deepcopy.go      |  5 +++
 .../services/autoscaling/autoscalinggroup.go  |  6 ++-
 .../autoscaling/autoscalinggroup_test.go      |  3 ++
 9 files changed, 110 insertions(+), 2 deletions(-)

diff --git a/config/crd/bases/infrastructure.cluster.x-k8s.io_awsmachinepools.yaml b/config/crd/bases/infrastructure.cluster.x-k8s.io_awsmachinepools.yaml
index e28522622c..c01b3d8020 100644
--- a/config/crd/bases/infrastructure.cluster.x-k8s.io_awsmachinepools.yaml
+++ b/config/crd/bases/infrastructure.cluster.x-k8s.io_awsmachinepools.yaml
@@ -969,6 +969,22 @@ spec:
                       The default is to use the value for the health check grace period defined for the group.
                     format: int64
                     type: integer
+                  maxHealthyPercentage:
+                    description: |-
+                      The amount of capacity as a percentage in ASG that can be in service and healthy, or pending,
+                      to support your workload when replacing instances.
+                      The value is expressed as a percentage of the desired capacity of the ASG. Value range is 100 to 200.
+                      If you specify MaxHealthyPercentage , you must also specify MinHealthyPercentage , and the difference between
+                      them cannot be greater than 100.
+
+
+                      A larger range increases the number of instances that can be replaced at the same time.
+
+
+                      If you do not specify this property, the default is 100 percent, or the percentage set in the instance
+                      maintenance policy for the Auto Scaling group, if defined.
+                    format: int64
+                    type: integer
                   minHealthyPercentage:
                     description: |-
                       The amount of capacity as a percentage in ASG that must remain healthy
diff --git a/exp/api/v1beta1/conversion.go b/exp/api/v1beta1/conversion.go
index 50a62f6bb4..7c39f1fcbd 100644
--- a/exp/api/v1beta1/conversion.go
+++ b/exp/api/v1beta1/conversion.go
@@ -42,8 +42,9 @@ func (src *AWSMachinePool) ConvertTo(dstRaw conversion.Hub) error {
 	if restored.Spec.SuspendProcesses != nil {
 		dst.Spec.SuspendProcesses = restored.Spec.SuspendProcesses
 	}
-	if dst.Spec.RefreshPreferences != nil && restored.Spec.RefreshPreferences != nil {
+	if restored.Spec.RefreshPreferences != nil {
 		dst.Spec.RefreshPreferences.Disable = restored.Spec.RefreshPreferences.Disable
+		dst.Spec.RefreshPreferences.MaxHealthyPercentage = restored.Spec.RefreshPreferences.MaxHealthyPercentage
 	}
 	if restored.Spec.AWSLaunchTemplate.InstanceMetadataOptions != nil {
 		dst.Spec.AWSLaunchTemplate.InstanceMetadataOptions = restored.Spec.AWSLaunchTemplate.InstanceMetadataOptions
diff --git a/exp/api/v1beta1/zz_generated.conversion.go b/exp/api/v1beta1/zz_generated.conversion.go
index 4f50a8c20f..a1a2d80e54 100644
--- a/exp/api/v1beta1/zz_generated.conversion.go
+++ b/exp/api/v1beta1/zz_generated.conversion.go
@@ -1066,6 +1066,7 @@ func autoConvert_v1beta2_RefreshPreferences_To_v1beta1_RefreshPreferences(in *v1
 	out.Strategy = (*string)(unsafe.Pointer(in.Strategy))
 	out.InstanceWarmup = (*int64)(unsafe.Pointer(in.InstanceWarmup))
 	out.MinHealthyPercentage = (*int64)(unsafe.Pointer(in.MinHealthyPercentage))
+	// WARNING: in.MaxHealthyPercentage requires manual conversion: does not exist in peer-type
 	return nil
 }
 
diff --git a/exp/api/v1beta2/awsmachinepool_types.go b/exp/api/v1beta2/awsmachinepool_types.go
index a9c26a3e60..c1fa8760c7 100644
--- a/exp/api/v1beta2/awsmachinepool_types.go
+++ b/exp/api/v1beta2/awsmachinepool_types.go
@@ -172,6 +172,19 @@ type RefreshPreferences struct {
 	// during an instance refresh. The default is 90.
 	// +optional
 	MinHealthyPercentage *int64 `json:"minHealthyPercentage,omitempty"`
+
+	// The amount of capacity as a percentage in ASG that can be in service and healthy, or pending,
+	// to support your workload when replacing instances.
+	// The value is expressed as a percentage of the desired capacity of the ASG. Value range is 100 to 200.
+	// If you specify MaxHealthyPercentage , you must also specify MinHealthyPercentage , and the difference between
+	// them cannot be greater than 100.
+	//
+	// A larger range increases the number of instances that can be replaced at the same time.
+	//
+	// If you do not specify this property, the default is 100 percent, or the percentage set in the instance
+	// maintenance policy for the Auto Scaling group, if defined.
+	// +optional
+	MaxHealthyPercentage *int64 `json:"maxHealthyPercentage,omitempty"`
 }
 
 // AWSMachinePoolStatus defines the observed state of AWSMachinePool.
diff --git a/exp/api/v1beta2/awsmachinepool_webhook.go b/exp/api/v1beta2/awsmachinepool_webhook.go
index 541243c53f..a4f6a44d41 100644
--- a/exp/api/v1beta2/awsmachinepool_webhook.go
+++ b/exp/api/v1beta2/awsmachinepool_webhook.go
@@ -133,6 +133,7 @@ func (r *AWSMachinePool) validateAdditionalSecurityGroups() field.ErrorList {
 	}
 	return allErrs
 }
+
 func (r *AWSMachinePool) validateSpotInstances() field.ErrorList {
 	var allErrs field.ErrorList
 	if r.Spec.AWSLaunchTemplate.SpotMarketOptions != nil && r.Spec.MixedInstancesPolicy != nil {
@@ -141,6 +142,26 @@ func (r *AWSMachinePool) validateSpotInstances() field.ErrorList {
 	return allErrs
 }
 
+func (r *AWSMachinePool) validateRefreshPreferences() field.ErrorList {
+	var allErrs field.ErrorList
+
+	if r.Spec.RefreshPreferences == nil {
+		return allErrs
+	}
+
+	if r.Spec.RefreshPreferences.MaxHealthyPercentage != nil && r.Spec.RefreshPreferences.MinHealthyPercentage == nil {
+		allErrs = append(allErrs, field.Forbidden(field.NewPath("spec.refreshPreferences.maxHealthyPercentage"), "If you specify spec.refreshPreferences.maxHealthyPercentage, you must also specify spec.refreshPreferences.minHealthyPercentage"))
+	}
+
+	if r.Spec.RefreshPreferences.MaxHealthyPercentage != nil && r.Spec.RefreshPreferences.MinHealthyPercentage != nil {
+		if *r.Spec.RefreshPreferences.MaxHealthyPercentage-*r.Spec.RefreshPreferences.MinHealthyPercentage > 100 {
+			allErrs = append(allErrs, field.Forbidden(field.NewPath("spec.refreshPreferences.maxHealthyPercentage"), "the difference between spec.refreshPreferences.maxHealthyPercentage and spec.refreshPreferences.minHealthyPercentage cannot be greater than 100"))
+		}
+	}
+
+	return allErrs
+}
+
 // ValidateCreate will do any extra validation when creating a AWSMachinePool.
 func (r *AWSMachinePool) ValidateCreate() (admission.Warnings, error) {
 	log.Info("AWSMachinePool validate create", "machine-pool", klog.KObj(r))
@@ -154,6 +175,7 @@ func (r *AWSMachinePool) ValidateCreate() (admission.Warnings, error) {
 	allErrs = append(allErrs, r.validateSubnets()...)
 	allErrs = append(allErrs, r.validateAdditionalSecurityGroups()...)
 	allErrs = append(allErrs, r.validateSpotInstances()...)
+	allErrs = append(allErrs, r.validateRefreshPreferences()...)
 
 	if len(allErrs) == 0 {
 		return nil, nil
@@ -175,6 +197,7 @@ func (r *AWSMachinePool) ValidateUpdate(_ runtime.Object) (admission.Warnings, e
 	allErrs = append(allErrs, r.validateSubnets()...)
 	allErrs = append(allErrs, r.validateAdditionalSecurityGroups()...)
 	allErrs = append(allErrs, r.validateSpotInstances()...)
+	allErrs = append(allErrs, r.validateRefreshPreferences()...)
 
 	if len(allErrs) == 0 {
 		return nil, nil
diff --git a/exp/api/v1beta2/awsmachinepool_webhook_test.go b/exp/api/v1beta2/awsmachinepool_webhook_test.go
index 3f7f30a101..0f14ad1c0a 100644
--- a/exp/api/v1beta2/awsmachinepool_webhook_test.go
+++ b/exp/api/v1beta2/awsmachinepool_webhook_test.go
@@ -153,6 +153,27 @@ func TestAWSMachinePoolValidateCreate(t *testing.T) {
 			},
 			wantErr: true,
 		},
+		{
+			name: "Should fail if MaxHealthyPercentage is set, but MinHealthyPercentage is not set",
+			pool: &AWSMachinePool{
+				Spec: AWSMachinePoolSpec{
+					RefreshPreferences: &RefreshPreferences{MaxHealthyPercentage: aws.Int64(100)},
+				},
+			},
+			wantErr: true,
+		},
+		{
+			name: "Should fail if the difference between MaxHealthyPercentage and MinHealthyPercentage is greater than 100",
+			pool: &AWSMachinePool{
+				Spec: AWSMachinePoolSpec{
+					RefreshPreferences: &RefreshPreferences{
+						MaxHealthyPercentage: aws.Int64(150),
+						MinHealthyPercentage: aws.Int64(25),
+					},
+				},
+			},
+			wantErr: true,
+		},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
@@ -287,6 +308,27 @@ func TestAWSMachinePoolValidateUpdate(t *testing.T) {
 			},
 			wantErr: true,
 		},
+		{
+			name: "Should fail if MaxHealthyPercentage is set, but MinHealthyPercentage is not set",
+			new: &AWSMachinePool{
+				Spec: AWSMachinePoolSpec{
+					RefreshPreferences: &RefreshPreferences{MaxHealthyPercentage: aws.Int64(100)},
+				},
+			},
+			wantErr: true,
+		},
+		{
+			name: "Should fail if the difference between MaxHealthyPercentage and MinHealthyPercentage is greater than 100",
+			new: &AWSMachinePool{
+				Spec: AWSMachinePoolSpec{
+					RefreshPreferences: &RefreshPreferences{
+						MaxHealthyPercentage: aws.Int64(150),
+						MinHealthyPercentage: aws.Int64(25),
+					},
+				},
+			},
+			wantErr: true,
+		},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
diff --git a/exp/api/v1beta2/zz_generated.deepcopy.go b/exp/api/v1beta2/zz_generated.deepcopy.go
index 69ff149f48..f34e8f9d9b 100644
--- a/exp/api/v1beta2/zz_generated.deepcopy.go
+++ b/exp/api/v1beta2/zz_generated.deepcopy.go
@@ -1062,6 +1062,11 @@ func (in *RefreshPreferences) DeepCopyInto(out *RefreshPreferences) {
 		*out = new(int64)
 		**out = **in
 	}
+	if in.MaxHealthyPercentage != nil {
+		in, out := &in.MaxHealthyPercentage, &out.MaxHealthyPercentage
+		*out = new(int64)
+		**out = **in
+	}
 }
 
 // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RefreshPreferences.
diff --git a/pkg/cloud/services/autoscaling/autoscalinggroup.go b/pkg/cloud/services/autoscaling/autoscalinggroup.go
index d473010d12..f8e8b18690 100644
--- a/pkg/cloud/services/autoscaling/autoscalinggroup.go
+++ b/pkg/cloud/services/autoscaling/autoscalinggroup.go
@@ -349,7 +349,7 @@ func (s *Service) CanStartASGInstanceRefresh(scope *scope.MachinePoolScope) (boo
 // StartASGInstanceRefresh will start an ASG instance with refresh.
 func (s *Service) StartASGInstanceRefresh(scope *scope.MachinePoolScope) error {
 	strategy := ptr.To[string](autoscaling.RefreshStrategyRolling)
-	var minHealthyPercentage, instanceWarmup *int64
+	var minHealthyPercentage, maxHealthyPercentage, instanceWarmup *int64
 	if scope.AWSMachinePool.Spec.RefreshPreferences != nil {
 		if scope.AWSMachinePool.Spec.RefreshPreferences.Strategy != nil {
 			strategy = scope.AWSMachinePool.Spec.RefreshPreferences.Strategy
@@ -360,6 +360,9 @@ func (s *Service) StartASGInstanceRefresh(scope *scope.MachinePoolScope) error {
 		if scope.AWSMachinePool.Spec.RefreshPreferences.MinHealthyPercentage != nil {
 			minHealthyPercentage = scope.AWSMachinePool.Spec.RefreshPreferences.MinHealthyPercentage
 		}
+		if scope.AWSMachinePool.Spec.RefreshPreferences.MaxHealthyPercentage != nil {
+			maxHealthyPercentage = scope.AWSMachinePool.Spec.RefreshPreferences.MaxHealthyPercentage
+		}
 	}
 
 	input := &autoscaling.StartInstanceRefreshInput{
@@ -368,6 +371,7 @@ func (s *Service) StartASGInstanceRefresh(scope *scope.MachinePoolScope) error {
 		Preferences: &autoscaling.RefreshPreferences{
 			InstanceWarmup:       instanceWarmup,
 			MinHealthyPercentage: minHealthyPercentage,
+			MaxHealthyPercentage: maxHealthyPercentage,
 		},
 	}
 
diff --git a/pkg/cloud/services/autoscaling/autoscalinggroup_test.go b/pkg/cloud/services/autoscaling/autoscalinggroup_test.go
index e116c80126..9434696166 100644
--- a/pkg/cloud/services/autoscaling/autoscalinggroup_test.go
+++ b/pkg/cloud/services/autoscaling/autoscalinggroup_test.go
@@ -1203,6 +1203,7 @@ func TestServiceStartASGInstanceRefresh(t *testing.T) {
 					Preferences: &autoscaling.RefreshPreferences{
 						InstanceWarmup:       aws.Int64(100),
 						MinHealthyPercentage: aws.Int64(80),
+						MaxHealthyPercentage: aws.Int64(100),
 					},
 				})).
 					Return(nil, awserrors.NewNotFound("not found"))
@@ -1218,6 +1219,7 @@ func TestServiceStartASGInstanceRefresh(t *testing.T) {
 					Preferences: &autoscaling.RefreshPreferences{
 						InstanceWarmup:       aws.Int64(100),
 						MinHealthyPercentage: aws.Int64(80),
+						MaxHealthyPercentage: aws.Int64(100),
 					},
 				})).
 					Return(&autoscaling.StartInstanceRefreshOutput{}, nil)
@@ -1312,6 +1314,7 @@ func getMachinePoolScope(client client.Client, clusterScope *scope.ClusterScope)
 				Strategy:             aws.String("Rolling"),
 				InstanceWarmup:       aws.Int64(100),
 				MinHealthyPercentage: aws.Int64(80),
+				MaxHealthyPercentage: aws.Int64(100),
 			},
 			MixedInstancesPolicy: &expinfrav1.MixedInstancesPolicy{
 				InstancesDistribution: &expinfrav1.InstancesDistribution{