From 8af9c1085bb50b131d415531563a30851570cf6f Mon Sep 17 00:00:00 2001 From: Andreas Sommer Date: Mon, 8 Jul 2024 12:23:12 +0200 Subject: [PATCH] Try deleting machine pool user data file from S3 when pruning an old launch template version --- .../awsmachinepool_controller_test.go | 6 +-- pkg/cloud/services/ec2/launchtemplate.go | 40 ++++++++++++++----- pkg/cloud/services/interfaces.go | 4 +- .../mock_services/ec2_interface_mock.go | 8 ++-- .../objectstore_machine_interface_mock.go | 14 +++++++ pkg/cloud/services/s3/s3.go | 40 +++++++++++++++++++ 6 files changed, 96 insertions(+), 16 deletions(-) diff --git a/exp/controllers/awsmachinepool_controller_test.go b/exp/controllers/awsmachinepool_controller_test.go index 55b15dd9ab..52bcd9628b 100644 --- a/exp/controllers/awsmachinepool_controller_test.go +++ b/exp/controllers/awsmachinepool_controller_test.go @@ -556,7 +556,7 @@ func TestAWSMachinePoolReconciler(t *testing.T) { ec2Svc.EXPECT().DiscoverLaunchTemplateAMI(gomock.Any()).Return(pointer.String("ami-different"), nil) ec2Svc.EXPECT().LaunchTemplateNeedsUpdate(gomock.Any(), gomock.Any(), gomock.Any()).Return(false, nil) asgSvc.EXPECT().CanStartASGInstanceRefresh(gomock.Any()).Return(true, nil, nil) - ec2Svc.EXPECT().PruneLaunchTemplateVersions(gomock.Any()).Return(nil) + ec2Svc.EXPECT().PruneLaunchTemplateVersions(gomock.Any()).Return(nil, nil) ec2Svc.EXPECT().CreateLaunchTemplateVersion(gomock.Any(), gomock.Any(), gomock.Eq(pointer.String("ami-different")), gomock.Eq(apimachinerytypes.NamespacedName{Namespace: "default", Name: "bootstrap-data"}), gomock.Any()).Return(nil) ec2Svc.EXPECT().GetLaunchTemplateLatestVersion(gomock.Any()).Return("2", nil) // AMI change should trigger rolling out new nodes @@ -604,7 +604,7 @@ func TestAWSMachinePoolReconciler(t *testing.T) { ec2Svc.EXPECT().DiscoverLaunchTemplateAMI(gomock.Any()).Return(pointer.String("ami-existing"), nil) ec2Svc.EXPECT().LaunchTemplateNeedsUpdate(gomock.Any(), gomock.Any(), gomock.Any()).Return(false, nil) asgSvc.EXPECT().CanStartASGInstanceRefresh(gomock.Any()).Return(true, nil, nil) - ec2Svc.EXPECT().PruneLaunchTemplateVersions(gomock.Any()).Return(nil) + ec2Svc.EXPECT().PruneLaunchTemplateVersions(gomock.Any()).Return(nil, nil) ec2Svc.EXPECT().CreateLaunchTemplateVersion(gomock.Any(), gomock.Any(), gomock.Eq(pointer.String("ami-existing")), gomock.Eq(apimachinerytypes.NamespacedName{Namespace: "default", Name: "bootstrap-data"}), gomock.Any()).Return(nil) ec2Svc.EXPECT().GetLaunchTemplateLatestVersion(gomock.Any()).Return("2", nil) // Changing the bootstrap data secret name should trigger rolling out new nodes, no matter what the @@ -679,7 +679,7 @@ func TestAWSMachinePoolReconciler(t *testing.T) { ec2Svc.EXPECT().DiscoverLaunchTemplateAMI(gomock.Any()).Return(pointer.String("ami-existing"), nil) ec2Svc.EXPECT().LaunchTemplateNeedsUpdate(gomock.Any(), gomock.Any(), gomock.Any()).Return(false, nil) asgSvc.EXPECT().CanStartASGInstanceRefresh(gomock.Any()).Return(true, nil, nil) - ec2Svc.EXPECT().PruneLaunchTemplateVersions(gomock.Any()).Return(nil) + ec2Svc.EXPECT().PruneLaunchTemplateVersions(gomock.Any()).Return(nil, nil) ec2Svc.EXPECT().CreateLaunchTemplateVersion(gomock.Any(), gomock.Any(), gomock.Eq(pointer.String("ami-existing")), gomock.Eq(apimachinerytypes.NamespacedName{Namespace: "default", Name: "bootstrap-data-new"}), gomock.Any()).Return(nil) ec2Svc.EXPECT().GetLaunchTemplateLatestVersion(gomock.Any()).Return("2", nil) // Changing the bootstrap data secret name should trigger rolling out new nodes, no matter what the diff --git a/pkg/cloud/services/ec2/launchtemplate.go b/pkg/cloud/services/ec2/launchtemplate.go index 3b1a2d6006..628874c56d 100644 --- a/pkg/cloud/services/ec2/launchtemplate.go +++ b/pkg/cloud/services/ec2/launchtemplate.go @@ -258,11 +258,31 @@ func (s *Service) ReconcileLaunchTemplate( // userdata, OR we've discovered a new AMI ID. if needsUpdate || tagsChanged || amiChanged || userDataHashChanged || userDataSecretKeyChanged || launchTemplateNeedsUserDataSecretKeyTag { scope.Info("creating new version for launch template", "existing", launchTemplate, "incoming", scope.GetLaunchTemplate(), "needsUpdate", needsUpdate, "tagsChanged", tagsChanged, "amiChanged", amiChanged, "userDataHashChanged", userDataHashChanged, "userDataSecretKeyChanged", userDataSecretKeyChanged) + // There is a limit to the number of Launch Template Versions. // We ensure that the number of versions does not grow without bound by following a simple rule: Before we create a new version, we delete one old version, if there is at least one old version that is not in use. - if err := ec2svc.PruneLaunchTemplateVersions(scope.GetLaunchTemplateIDStatus()); err != nil { + deletedLaunchTemplateVersion, err := ec2svc.PruneLaunchTemplateVersions(scope.GetLaunchTemplateIDStatus()) + if err != nil { return nil, err } + + // S3 objects should be deleted as soon as possible if they're not used + // anymore. If this fails, it would still be cleaned by the bucket lifecycle + // policy later. + if deletedLaunchTemplateVersion != nil && deletedLaunchTemplateVersion.LaunchTemplateData.UserData != nil && len(*deletedLaunchTemplateVersion.LaunchTemplateData.UserData) > 0 && s3Scope.Bucket() != nil && bootstrapDataFormat == "ignition" && ignitionScope.Ignition() != nil { + scope.Info("Deleting S3 object for deleted launch template version", "version", *deletedLaunchTemplateVersion.VersionNumber) + + decodedUserDataOfDeletedLaunchTemplateVersion, err := base64.StdEncoding.DecodeString(*deletedLaunchTemplateVersion.LaunchTemplateData.UserData) + if err == nil { + err = objectStoreSvc.DeleteForMachinePool(scope, decodedUserDataOfDeletedLaunchTemplateVersion) + } + + // If any error happened above, log it and continue + if err != nil { + scope.Error(err, "Failed to delete S3 object for deleted launch template version, continuing because the bucket lifecycle policy will clean it later", "version", *deletedLaunchTemplateVersion.VersionNumber) + } + } + if err := ec2svc.CreateLaunchTemplateVersion(scope.GetLaunchTemplateIDStatus(), scope, imageID, *bootstrapDataSecretKey, bootstrapDataForLaunchTemplate); err != nil { return nil, err } @@ -703,7 +723,9 @@ func (s *Service) DeleteLaunchTemplate(id string) error { // It does not delete the "latest" version, because that version may still be in use. // It does not delete the "default" version, because that version cannot be deleted. // It does not assume that versions are sequential. Versions may be deleted out of band. -func (s *Service) PruneLaunchTemplateVersions(id string) error { +// If there was an unused version, it is returned together with any error that +// happened during deletion. +func (s *Service) PruneLaunchTemplateVersions(id string) (*ec2.LaunchTemplateVersion, error) { // When there is one version available, it is the default and the latest. // When there are two versions available, one the is the default, the other is the latest. // Therefore we only prune when there are at least 3 versions available. @@ -719,7 +741,7 @@ func (s *Service) PruneLaunchTemplateVersions(id string) error { out, err := s.EC2Client.DescribeLaunchTemplateVersionsWithContext(context.TODO(), input) if err != nil { s.scope.Info("", "aerr", err.Error()) - return err + return nil, err } // len(out.LaunchTemplateVersions) | items @@ -728,10 +750,10 @@ func (s *Service) PruneLaunchTemplateVersions(id string) error { // 2 | [default, latest] // 3 | [default, versionToPrune, latest] if len(out.LaunchTemplateVersions) < minCountToAllowPrune { - return nil + return nil, nil } - versionToPrune := out.LaunchTemplateVersions[1].VersionNumber - return s.deleteLaunchTemplateVersion(id, versionToPrune) + versionToPrune := out.LaunchTemplateVersions[1] + return versionToPrune, s.deleteLaunchTemplateVersion(id, versionToPrune.VersionNumber) } func (s *Service) GetLaunchTemplateLatestVersion(id string) (string, error) { @@ -754,11 +776,11 @@ func (s *Service) GetLaunchTemplateLatestVersion(id string) (string, error) { } func (s *Service) deleteLaunchTemplateVersion(id string, version *int64) error { - s.scope.Debug("Deleting launch template version", "id", id) - if version == nil { return errors.New("version is a nil pointer") } + s.scope.Debug("Deleting launch template version", "id", id, "version", *version) + versions := []string{strconv.FormatInt(*version, 10)} input := &ec2.DeleteLaunchTemplateVersionsInput{ @@ -771,7 +793,7 @@ func (s *Service) deleteLaunchTemplateVersion(id string, version *int64) error { return err } - s.scope.Debug("Deleted launch template", "id", id, "version", *version) + s.scope.Debug("Deleted launch template version", "id", id, "version", *version) return nil } diff --git a/pkg/cloud/services/interfaces.go b/pkg/cloud/services/interfaces.go index a23bfe5290..529a95c2dd 100644 --- a/pkg/cloud/services/interfaces.go +++ b/pkg/cloud/services/interfaces.go @@ -17,6 +17,7 @@ limitations under the License. package services import ( + "github.com/aws/aws-sdk-go/service/ec2" apimachinerytypes "k8s.io/apimachinery/pkg/types" ctrl "sigs.k8s.io/controller-runtime" @@ -75,7 +76,7 @@ type EC2Interface interface { GetLaunchTemplateLatestVersion(id string) (string, error) CreateLaunchTemplate(scope scope.LaunchTemplateScope, imageID *string, userDataSecretKey apimachinerytypes.NamespacedName, userData []byte) (string, error) CreateLaunchTemplateVersion(id string, scope scope.LaunchTemplateScope, imageID *string, userDataSecretKey apimachinerytypes.NamespacedName, userData []byte) error - PruneLaunchTemplateVersions(id string) error + PruneLaunchTemplateVersions(id string) (*ec2.LaunchTemplateVersion, error) DeleteLaunchTemplate(id string) error LaunchTemplateNeedsUpdate(scope scope.LaunchTemplateScope, incoming *expinfrav1.AWSLaunchTemplate, existing *expinfrav1.AWSLaunchTemplate) (bool, error) DeleteBastion() error @@ -132,4 +133,5 @@ type ObjectStoreInterface interface { Delete(m *scope.MachineScope) error Create(m *scope.MachineScope, data []byte) (objectURL string, err error) CreateForMachinePool(scope scope.LaunchTemplateScope, data []byte) (objectURL string, err error) + DeleteForMachinePool(scope scope.LaunchTemplateScope, data []byte) error } diff --git a/pkg/cloud/services/mock_services/ec2_interface_mock.go b/pkg/cloud/services/mock_services/ec2_interface_mock.go index 922d5f3360..f2872fc1d1 100644 --- a/pkg/cloud/services/mock_services/ec2_interface_mock.go +++ b/pkg/cloud/services/mock_services/ec2_interface_mock.go @@ -23,6 +23,7 @@ package mock_services import ( reflect "reflect" + ec2 "github.com/aws/aws-sdk-go/service/ec2" gomock "github.com/golang/mock/gomock" types "k8s.io/apimachinery/pkg/types" v1beta2 "sigs.k8s.io/cluster-api-provider-aws/v2/api/v1beta2" @@ -306,11 +307,12 @@ func (mr *MockEC2InterfaceMockRecorder) ModifyInstanceMetadataOptions(arg0, arg1 } // PruneLaunchTemplateVersions mocks base method. -func (m *MockEC2Interface) PruneLaunchTemplateVersions(arg0 string) error { +func (m *MockEC2Interface) PruneLaunchTemplateVersions(arg0 string) (*ec2.LaunchTemplateVersion, error) { m.ctrl.T.Helper() ret := m.ctrl.Call(m, "PruneLaunchTemplateVersions", arg0) - ret0, _ := ret[0].(error) - return ret0 + ret0, _ := ret[0].(*ec2.LaunchTemplateVersion) + ret1, _ := ret[1].(error) + return ret0, ret1 } // PruneLaunchTemplateVersions indicates an expected call of PruneLaunchTemplateVersions. diff --git a/pkg/cloud/services/mock_services/objectstore_machine_interface_mock.go b/pkg/cloud/services/mock_services/objectstore_machine_interface_mock.go index 6fd4b67104..64c1c91b71 100644 --- a/pkg/cloud/services/mock_services/objectstore_machine_interface_mock.go +++ b/pkg/cloud/services/mock_services/objectstore_machine_interface_mock.go @@ -108,6 +108,20 @@ func (mr *MockObjectStoreInterfaceMockRecorder) DeleteBucket() *gomock.Call { return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DeleteBucket", reflect.TypeOf((*MockObjectStoreInterface)(nil).DeleteBucket)) } +// DeleteForMachinePool mocks base method. +func (m *MockObjectStoreInterface) DeleteForMachinePool(arg0 scope.LaunchTemplateScope, arg1 []byte) error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "DeleteForMachinePool", arg0, arg1) + ret0, _ := ret[0].(error) + return ret0 +} + +// DeleteForMachinePool indicates an expected call of DeleteForMachinePool. +func (mr *MockObjectStoreInterfaceMockRecorder) DeleteForMachinePool(arg0, arg1 interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DeleteForMachinePool", reflect.TypeOf((*MockObjectStoreInterface)(nil).DeleteForMachinePool), arg0, arg1) +} + // ReconcileBucket mocks base method. func (m *MockObjectStoreInterface) ReconcileBucket() error { m.ctrl.T.Helper() diff --git a/pkg/cloud/services/s3/s3.go b/pkg/cloud/services/s3/s3.go index 086503a8a6..6d81063d60 100644 --- a/pkg/cloud/services/s3/s3.go +++ b/pkg/cloud/services/s3/s3.go @@ -292,6 +292,46 @@ func (s *Service) Delete(m *scope.MachineScope) error { return nil } +func (s *Service) DeleteForMachinePool(scope scope.LaunchTemplateScope, data []byte) error { + if !s.bucketManagementEnabled() { + return errors.New("requested object deletion but bucket management is not enabled") + } + + if scope.LaunchTemplateName() == "" { + return errors.New("launch template name can't be empty") + } + + if len(data) == 0 { + return errors.New("got empty data") + } + + bucket := s.bucketName() + key := s.bootstrapDataKeyForMachinePool(scope, data) + + s.scope.Info("Deleting object for machine pool", "bucket_name", bucket, "key", key) + + _, err := s.S3Client.DeleteObject(&s3.DeleteObjectInput{ + Bucket: aws.String(bucket), + Key: aws.String(key), + }) + if err == nil { + return nil + } + + aerr, ok := err.(awserr.Error) + if !ok { + return errors.Wrap(err, "deleting S3 object for machine pool") + } + + switch aerr.Code() { + case s3.ErrCodeNoSuchBucket: + default: + return errors.Wrap(aerr, "deleting S3 object for machine pool") + } + + return nil +} + func (s *Service) createBucketIfNotExist(bucketName string) error { input := &s3.CreateBucketInput{ Bucket: aws.String(bucketName),