diff --git a/.circleci/config.yml b/.circleci/config.yml new file mode 100644 index 0000000000..2dac1826c3 --- /dev/null +++ b/.circleci/config.yml @@ -0,0 +1,106 @@ +version: 2.1 +orbs: + go: circleci/go@1.11.0 + +jobs: + test: + resource_class: large + executor: + name: go/default + tag: "1.22.3" + steps: + - checkout + - go/load-cache + - go/mod-download + - run: + command: make setup-envtest + - go/save-cache + - run: + command: make test + + build: + machine: + image: "ubuntu-2204:2024.05.1" + environment: + ALL_ARCH: "amd64 arm64" + REGISTRY_AZURE: gsoci.azurecr.io/giantswarm + REGISTRY_QUAY: quay.io/giantswarm + REGISTRY_CHINA: giantswarm-registry.cn-shanghai.cr.aliyuncs.com/giantswarm + steps: + - checkout + + - run: + name: Ensure 'make generate' matches commit + command: | + make generate + git diff --exit-code || { echo "There are differences in generated files. Please run and commit 'make generate'. Else we can run into CRDs mismatching the code."; exit 1; } + + - run: + name: Build the CAPA docker images + command: | + for registry in $REGISTRY_AZURE $REGISTRY_QUAY $REGISTRY_CHINA; do + make docker-build-all ALL_ARCH="$ALL_ARCH" TAG=$CIRCLE_SHA1 REGISTRY=$registry + + if [ -n "$CIRCLE_TAG" ]; then + echo "Building tag $CIRCLE_TAG" + make docker-build-all ALL_ARCH="$ALL_ARCH" TAG="$CIRCLE_TAG" REGISTRY=$registry + fi + done + + - run: + name: Push to Azure + command: | + docker login --username $ACR_GSOCI_USERNAME --password $ACR_GSOCI_PASSWORD "${REGISTRY_AZURE%/*}" + + make docker-push-all ALL_ARCH="$ALL_ARCH" TAG=$CIRCLE_SHA1 REGISTRY=$REGISTRY_AZURE + + if [ -n "$CIRCLE_TAG" ]; then + echo "Pushing tag $CIRCLE_TAG" + make docker-push-all ALL_ARCH="$ALL_ARCH" TAG="$CIRCLE_TAG" REGISTRY=$REGISTRY_AZURE + fi + + - run: + name: Push to quay + command: | + docker login --username $QUAY_USERNAME --password $QUAY_PASSWORD quay.io + + make docker-push-all ALL_ARCH="$ALL_ARCH" TAG=$CIRCLE_SHA1 REGISTRY=$REGISTRY_QUAY + + if [ -n "$CIRCLE_TAG" ]; then + echo "Pushing tag $CIRCLE_TAG" + make docker-push-all ALL_ARCH="$ALL_ARCH" TAG="$CIRCLE_TAG" REGISTRY=$REGISTRY_QUAY + fi + + - run: + name: Push to aliyun + command: | + for n in $(seq 1 5); do + ( + set -eu + docker login --username $ALIYUN_USERNAME --password $ALIYUN_PASSWORD giantswarm-registry.cn-shanghai.cr.aliyuncs.com + + make docker-push-all ALL_ARCH="$ALL_ARCH" TAG=$CIRCLE_SHA1 REGISTRY=$REGISTRY_CHINA + + if [ -n "${CIRCLE_TAG:-}" ]; then + echo "Pushing tag $CIRCLE_TAG" + make docker-push-all ALL_ARCH="$ALL_ARCH" TAG="$CIRCLE_TAG" REGISTRY=$REGISTRY_CHINA + fi + ) || { echo "Failed attempt ${n}"; sleep 30; continue; } + + echo "Succeeded in attempt ${n}" + exit 0 + done + + exit 1 + +workflows: + version: 2 + build_and_update: + jobs: + - build: + context: + - architect + filters: + tags: + only: /^v.*/ + - test diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index 41d312832f..ac541ad5cb 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -1,38 +1,60 @@ +# As opposed to https://github.com/kubernetes-sigs/cluster-api (CAPI), the CAPA upstream project does not offer +# a GitHub action to automatically create releases from tags (as of 2023-08-21). Therefore, this is a Giant Swarm +# fork-specific addition. We require a GitHub release containing the YAML manifests which we use in +# cluster-api-provider-aws-app. Since doing this manually is very error-prone (see +# `docs/book/src/development/releasing.md`), we run the needed commands here. + name: release on: push: tags: - - 'v*' + - 'v*' permissions: - contents: write # required to write to github release. + contents: write # allow creating a release jobs: - release: + build: name: Create draft release runs-on: ubuntu-latest + env: + GH_ORG_NAME: giantswarm steps: + - name: Set env + run: | + if echo "${GITHUB_REF}" | grep -qF "vX.Y"; then + >&2 echo "ERROR: Oops, you copy-pasted verbatim from the README.md - please ensure to replace 'vX.Y.Z' with an actual tag" + exit 1 + fi + + echo "RELEASE_TAG=${GITHUB_REF:10}" >> $GITHUB_ENV # strip off `refs/tags/` prefix + - name: checkout code uses: actions/checkout@v4 with: fetch-depth: 0 + - name: Set up Go uses: actions/setup-go@v5 with: go-version: '1.22' - - name: Set version info - run: | - echo "VERSION=${GITHUB_REF_NAME}" >> $GITHUB_ENV - echo "PREVIOUS_VERSION=$(git describe --abbrev=0 2> /dev/null)" >> $GITHUB_ENV - echo "RELEASE_BRANCH=release-$(echo ${GITHUB_REF_NAME} | grep -Eo '[0-9]\.[0-9]+')" >> $GITHUB_ENV - echo "RELEASE_TAG=${GITHUB_REF_NAME}" >> $GITHUB_ENV - - name: Run release - run: | - echo "Version is: $VERSION" - echo "Previous version is: $PREVIOUS_VERSION" - echo "Release branch is: $RELEASE_BRANCH" - echo "Release tag is: $RELEASE_TAG" - make release + cache-dependency-path: | + ./go.sum + ./hack/tools/go.sum + + - name: Generate release artifacts env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + GITHUB_TOKEN: "unused" # since we create the release without using CAPA's Makefile target + run: | + make REGISTRY="registry.k8s.io/cluster-api-aws" RELEASE_TAG="${RELEASE_TAG}" release-manifests + + # Instead of `make VERSION="${RELEASE_TAG}" create-gh-release upload-gh-artifacts`, which requires GitHub CLI + # authentication, use an action which does the same. + - name: Release + uses: softprops/action-gh-release@de2c0eb89ae2a093876385947365aca7b0e5f844 # tag=v1 + with: + draft: true + files: out/* + body: "This fork does not provide release changelogs." + # `name` not needed since this takes the tag by default (which we also use above as ${RELEASE_TAG}) diff --git a/README.md b/README.md index 6f1b87baa6..0c26793893 100644 --- a/README.md +++ b/README.md @@ -1,228 +1,112 @@ -# Kubernetes Cluster API Provider AWS +# Cluster API Provider AWS -

-Powered by AWS Cloud Computing -

-

- - - - - - - - - - - - -

+This is Giant Swarm's fork. See the upstream [cluster-api-provider-aws README](https://github.com/kubernetes-sigs/cluster-api-provider-aws/blob/main/README.md) for official documentation. ------- +## How to work with this repo -Kubernetes-native declarative infrastructure for AWS. +Currently, we try to follow the upstream `release-X.Y` branch to always get the latest stable release and fixes, but not untested commits from `main`. Our only differences against upstream should be in this `README.md`, `.circleci/` and `.github/workflows/release.yaml`. Other changes should be opened as PR for the upstream project first. -## What is the Cluster API Provider AWS +We release cluster-api-provider-aws versions with [cluster-api-provider-aws-app](https://github.com/giantswarm/cluster-api-provider-aws-app/). To provide the YAML manifests, we use GitHub releases as the upstream project. The scripts in `cluster-api-provider-aws-app` convert them into the final manifests. -The [Cluster API][cluster_api] brings -declarative, Kubernetes-style APIs to cluster creation, configuration and -management. +### Repo setup -The API itself is shared across multiple cloud providers allowing for true AWS -hybrid deployments of Kubernetes. It is built atop the lessons learned from -previous cluster managers such as [kops][kops] and -[kubicorn][kubicorn]. +Since we follow upstream, add their Git repo as remote from which we merge commits: -## Documentation - -Please see our [book](https://cluster-api-aws.sigs.k8s.io) for in-depth documentation. - -## Launching a Kubernetes cluster on AWS - -Check out the [Cluster API Quick Start](https://cluster-api.sigs.k8s.io/user/quick-start.html) for launching a -cluster on AWS. - -## Features +```sh +git clone git@github.com:giantswarm/cluster-api-provider-aws.git +cd cluster-api-provider-aws +git remote add upstream https://github.com/kubernetes-sigs/cluster-api-provider-aws.git +``` -- Native Kubernetes manifests and API -- Manages the bootstrapping of VPCs, gateways, security groups and instances. -- Choice of Linux distribution using [pre-baked AMIs][published_amis]. -- Deploys Kubernetes control planes into private subnets with a separate - bastion server. -- Doesn't use SSH for bootstrapping nodes. -- Installs only the minimal components to bootstrap a control plane and workers. -- Supports control planes on EC2 instances. -- [EKS support][eks_support] +### Test and release ------- +If you have a non-urgent fix, create an upstream PR and wait until it gets released. We call this release `vX.Y.Z` in the below instructions, so please fill in the desired tag. -## Compatibility with Cluster API and Kubernetes Versions +Please follow the development workflow: -This provider's versions are compatible with the following versions of Cluster API -and support all Kubernetes versions that is supported by its compatible Cluster API version: +- Ensure a stable release branch exists in our fork repo. For example with a desired upstream release v2.2.1, the branch is `release-2.2`. If it does not exist on our side yet, copy the branch from upstream and add our changes such as `README.md` and `.circleci/` on top. +- Create a working branch for your changes +- We want to use stable upstream release tags unless a hotfix is absolutely required ([decision](https://intranet.giantswarm.io/docs/product/pdr/010_fork_management/)). Please decide what type of change you're making: -| | Cluster API v1alpha4 (v0.4) | Cluster API v1beta1 (v1.x) | -| --------------------------- | :-------------------------: | :-------------------------: | -| CAPA v1alpha4 `(v0.7)` | ✓ | ☓ | -| CAPA v1beta1 `(v1.x)` | ☓ | ✓ | -| CAPA v1beta2 `(v2.x, main)`| ☓ | ✓ | + - Either: you want to merge and test the latest upstream tag -(See [Kubernetes support matrix][cluster-api-supported-v] of Cluster API versions). + ```sh + # Get latest changes on our release branch + git checkout release-X.Y + git pull ------- + git fetch upstream -## Kubernetes versions with published AMIs + git checkout -b my-working-branch release-X.Y -See [amis] for the list of most recently published AMIs. + # Create a merge commit using upstream's desired release tag (the one we want + # to upgrade to). + # This creates a commit message such as "Merge tag 'v2.2.1' into release-2.2". + git merge --no-ff vX.Y.Z ------- + # Since we want the combined content of our repo and the upstream Git tag, + # we need to create our own tag on the merge commit + git tag "vX.Y.Z-gs-$(git rev-parse --short HEAD)" -## clusterawsadm + # Push your working branch. This triggers image build in CircleCI. + git push -`clusterawsadm` CLI tool provides bootstrapping, AMI, EKS, and controller related helpers. + # Push your Giant Swarm tag (assuming `origin` is the Giant Swarm fork). + # This triggers the GitHub release action - please continue reading below! + git push origin "vX.Y.Z-gs-$(git rev-parse --short HEAD)" + ``` -`clusterawsadm` binaries are released with each release, can be found under [assets](https://github.com/kubernetes-sigs/cluster-api-provider-aws/releases/latest) section. + - Or: you want to implement something else, such as working on some issue that we have which is not fixed in upstream yet. Note that for testing changes to upstream, you probably better base your work on the `upstream/main` branch and try your change together with the latest commits from upstream. This also avoids merge conflicts. Maintainers can then help you cherry-pick into their release branches. The latest release branch is usually a bit behind `main`. -`clusterawsadm` could also be installed via Homebrew on macOS and linux OS. -Install the latest release using homebrew: + ```sh + # Get latest changes on our release branch + git checkout release-X.Y + git pull -```shell -brew install clusterawsadm -``` + git checkout -b my-working-branch release-X.Y # or based on `main` instead of `release-X.Y`, see hint above -Test to ensure the version you installed is up-to-date: + # Make some changes and commit as usual + git commit -```shell -clusterawsadm version -``` + git tag "vX.Y.Z-gs-$(git rev-parse --short HEAD)" ------- + # Push your working branch. This triggers image build in CircleCI + git push -## Getting involved and contributing + # Push your Giant Swarm tag (assuming `origin` is the Giant Swarm fork). + # This triggers the GitHub release action - please continue reading below! + git push origin "vX.Y.Z-gs-$(git rev-parse --short HEAD)" + ``` -Are you interested in contributing to cluster-api-provider-aws? We, the -maintainers and community, would love your suggestions, contributions, and help! -Also, the maintainers can be contacted at any time to learn more about how to get -involved. +- Check that the [CircleCI pipeline](https://app.circleci.com/pipelines/github/giantswarm/cluster-api-provider-aws) succeeds for the desired Git tag in order to produce images. If the tag build fails, fix it. +- Check that the [GitHub release action](https://github.com/giantswarm/cluster-api-provider-aws/actions) for the `vX.Y.Z-gs-...` tag succeeds +- Edit [that draft GitHub release](https://github.com/giantswarm/cluster-api-provider-aws/releases) and turn it from draft to released. This makes the release's manifest files available on the internet, as used in [cluster-api-provider-aws-app](https://github.com/giantswarm/cluster-api-provider-aws-app). +- Test the changes in the app -In the interest of getting more new people involved we tag issues with -[`good first issue`][good_first_issue]. -These are typically issues that have smaller scope but are good ways to start -to get acquainted with the codebase. + - Replace `.tag` in [cluster-api-provider-aws-app's `values.yaml`](https://github.com/giantswarm/cluster-api-provider-aws-app/blob/master/helm/cluster-api-provider-aws/values.yaml) with the new tag `vX.Y.Z-gs-...`. + - Run `cd cluster-api-provider-aws-app && make generate` to update manifests + - Commit and push your working branch for `cluster-api-provider-aws-app` to trigger CircleCI pipeline + - Install and test the app thoroughly on a management cluster. Continue with the next step only once you're confident. +- Open PR for `cluster-api-provider-aws` fork (your working branch) -We also encourage ALL active community participants to act as if they are -maintainers, even if you don't have "official" write permissions. This is a -community effort, we are here to serve the Kubernetes community. If you have an -active interest and you want to get involved, you have real power! Don't assume -that the only people who can get things done around here are the "maintainers". + - If you merged an upstream release tag, we should target our `release-X.Y` branch with the PR. + - On the other hand, if you implemented something else which is not in upstream yet, we should target `upstream/main` so that it first lands in the upstream project, officially approved, tested and released. Afterwards, you would repeat this whole procedure and merge the release that includes your fix. For a quick in-house hotfix, you can alternatively do a quicker PR targeted against our `release-X.Y` branch. +- Also open PR for `cluster-api-provider-aws-app` change +- Once merged, manually bump the version in the respective collection to deploy it for one provider (e.g. [capa-app-collection](https://github.com/giantswarm/capa-app-collection/)) -We also would love to add more "official" maintainers, so show us what you can -do! +### Keep fork customizations up to date -This repository uses the Kubernetes bots. See a full list of the commands [here][prow]. +Only `README.md`, `.circleci/` and `.github/workflows/release.yaml` should differ between upstream and our fork, so the diff of everything else should be empty, or at worst, contain hotfixes that are not in upstream yet: -### Build the images locally +```sh +git fetch upstream +git diff `# the upstream tag we merged recently` vX.Y.Z..origin/release-X.Y `# our release branch` -- ':!.circleci/' "!.github/workflows/release.yaml' ':!README.md' +``` -If you want to just build the CAPA containers locally, run +And we should also keep our `main` and `release-X.Y` branches in sync, so this diff should be empty: -```shell - REGISTRY=docker.io/my-reg make docker-build +```sh +git diff main..release-X.Y -- .circleci/ .github/workflows/release.yaml README.md ``` -### Tilt-based development environment - -See [development][development] section for details. - -### Implementer office hours - -Maintainers hold office hours every two weeks, with sessions open to all -developers working on this project. - -Office hours are hosted on a zoom video chat every other Monday -at 09:00 (Pacific) / 12:00 (Eastern) / 17:00 (Europe/London), -and are published on the [Kubernetes community meetings calendar][gcal]. - -### Other ways to communicate with the contributors - -Please check in with us in the [#cluster-api-aws][slack] channel on Slack. - -## Github issues - -### Bugs - -If you think you have found a bug please follow the instructions below. - -- Please spend a small amount of time giving due diligence to the issue tracker. Your issue might be a duplicate. -- Get the logs from the cluster controllers. Please paste this into your issue. -- Open a [new issue][new_issue]. -- Remember that users might be searching for your issue in the future, so please give it a meaningful title to help others. -- Feel free to reach out to the cluster-api community on the [kubernetes slack][slack]. - -### Tracking new features - -We also use the issue tracker to track features. If you have an idea for a feature, or think you can help kops become even more awesome follow the steps below. - -- Open a [new issue][new_issue]. -- Remember that users might be searching for your issue in the future, so please - give it a meaningful title to help others. -- Clearly define the use case, using concrete examples. EG: I type `this` and - cluster-api-provider-aws does `that`. -- Some of our larger features will require some design. If you would like to - include a technical design for your feature please include it in the issue. -- After the new feature is well understood, and the design agreed upon, we can - start coding the feature. We would love for you to code it. So please open - up a **WIP** *(work in progress)* pull request, and happy coding. - ->“Amazon Web Services, AWS, and the “Powered by AWS” logo materials are -trademarks of Amazon.com, Inc. or its affiliates in the United States -and/or other countries." - -## Our Contributors - -Thank you to all contributors and a special thanks to our current maintainers & reviewers: - -| Maintainers | Reviewers | -|------------------------------------------------------------------| -------------------------------------------------------------------- | -| [@richardcase](https://github.com/richardcase) (from 2020-12-04) | [@cnmcavoy](https://github.com/cnmcavoy) (from 2023-10-16) | -| [@Ankitasw](https://github.com/Ankitasw) (from 2022-10-19) | [@AverageMarcus](https://github.com/AverageMarcus) (from 2022-10-19) | -| [@dlipovetsky](https://github.com/dlipovetsky) (from 2021-10-31) | [@luthermonson](https://github.com/luthermonson ) (from 2023-03-08) | -| [@nrb](https://github.com/nrb) (from 2024-05-24) | [@faiq](https://github.com/faiq) (from 2023-10-16) | -| [@AndiDog](https://github.com/AndiDog) (from 2023-12-13) | [@fiunchinho](https://github.com/fiunchinho) (from 2023-11-6) | -| | [@damdo](https://github.com/damdo) (from 2023-03-01) | - -and the previous/emeritus maintainers & reviewers: - -| Emeritus Maintainers | Emeritus Reviewers | -|------------------------------------------------------|--------------------------------------------------------| -| [@chuckha](https://github.com/chuckha) | [@ashish-amarnath](https://github.com/ashish-amarnath) | -| [@detiber](https://github.com/detiber) | [@davidewatson](https://github.com/davidewatson) | -| [@ncdc](https://github.com/ncdc) | [@enxebre](https://github.com/enxebre) | -| [@randomvariable](https://github.com/randomvariable) | [@ingvagabund](https://github.com/ingvagabund) | -| [@rudoi](https://github.com/rudoi) | [@michaelbeaumont](https://github.com/michaelbeaumont) | -| [@sedefsavas](https://github.com/sedefsavas) | [@sethp-nr](https://github.com/sethp-nr) | -| [@Skarlso](https://github.com/Skarlso) | [@shivi28](https://github.com/shivi28) | -| [@vincepri](https://github.com/vincepri) | [@dthorsen](https://github.com/dthorsen) | -| | [@pydctw](https://github.com/pydctw) | - -All the CAPA contributors: - -

- - - -

- - -[slack]: https://kubernetes.slack.com/messages/CD6U2V71N -[good_first_issue]: https://github.com/kubernetes-sigs/cluster-api-provider-aws/issues?q=is%3Aissue+is%3Aopen+sort%3Aupdated-desc+label%3A%22good+first+issue%22 -[gcal]: https://calendar.google.com/calendar/embed?src=cgnt364vd8s86hr2phapfjc6uk%40group.calendar.google.com -[prow]: https://go.k8s.io/bot-commands -[new_issue]: https://github.com/kubernetes-sigs/cluster-api-provider-aws/issues/new -[cluster_api]: https://github.com/kubernetes-sigs/cluster-api -[kops]: https://github.com/kubernetes/kops -[kubicorn]: http://kubicorn.io/ -[amis]: https://cluster-api-aws.sigs.k8s.io/topics/images/amis.html -[published_amis]: https://cluster-api-aws.sigs.k8s.io/topics/images/built-amis.html -[eks_support]: https://cluster-api-aws.sigs.k8s.io/topics/eks/index.html -[cluster-api-supported-v]: https://cluster-api.sigs.k8s.io/reference/versions.html -[development]: https://cluster-api-aws.sigs.k8s.io/development/development.html +If this shows any output, please align the `main` branch with the release branches. diff --git a/api/v1beta2/tags.go b/api/v1beta2/tags.go index e6e0ea7e73..45bc371a49 100644 --- a/api/v1beta2/tags.go +++ b/api/v1beta2/tags.go @@ -195,6 +195,12 @@ const ( // of the bootstrap secret that was used to create the user data for the latest launch // template version. LaunchTemplateBootstrapDataSecret = NameAWSProviderPrefix + "bootstrap-data-secret" + + // LaunchTemplateBootstrapDataHash is the tag we use to store the hash of the raw bootstrap data. + // If bootstrap data is stored in S3, this hash relates to that data, not to the EC2 instance + // user data which only references the S3 object. We store this tag on launch template versions + // so that S3 bootstrap data objects can be deleted when they get outdated. + LaunchTemplateBootstrapDataHash = NameAWSProviderPrefix + "bootstrap-data-hash" ) // ClusterTagKey generates the key for resources associated with a cluster. diff --git a/cmd/clusterawsadm/cloudformation/bootstrap/cluster_api_controller.go b/cmd/clusterawsadm/cloudformation/bootstrap/cluster_api_controller.go index 049de10431..825a0fca5b 100644 --- a/cmd/clusterawsadm/cloudformation/bootstrap/cluster_api_controller.go +++ b/cmd/clusterawsadm/cloudformation/bootstrap/cluster_api_controller.go @@ -193,6 +193,7 @@ func (t Template) ControllersPolicy() *iamv1.PolicyDocument { "arn:*:autoscaling:*:*:autoScalingGroup:*:autoScalingGroupName/*", }, Action: iamv1.Actions{ + "autoscaling:CancelInstanceRefresh", "autoscaling:CreateAutoScalingGroup", "autoscaling:UpdateAutoScalingGroup", "autoscaling:CreateOrUpdateTags", @@ -292,10 +293,12 @@ func (t Template) ControllersPolicy() *iamv1.PolicyDocument { "s3:CreateBucket", "s3:DeleteBucket", "s3:GetObject", - "s3:PutObject", "s3:DeleteObject", + "s3:ListBucket", "s3:PutBucketPolicy", "s3:PutBucketTagging", + "s3:PutLifecycleConfiguration", + "s3:PutObject", }, }) } diff --git a/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/customsuffix.yaml b/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/customsuffix.yaml index 7909fe12d5..4024618ba4 100644 --- a/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/customsuffix.yaml +++ b/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/customsuffix.yaml @@ -249,6 +249,7 @@ Resources: Resource: - '*' - Action: + - autoscaling:CancelInstanceRefresh - autoscaling:CreateAutoScalingGroup - autoscaling:UpdateAutoScalingGroup - autoscaling:CreateOrUpdateTags diff --git a/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/default.yaml b/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/default.yaml index a9290741ba..aeb1585696 100644 --- a/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/default.yaml +++ b/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/default.yaml @@ -249,6 +249,7 @@ Resources: Resource: - '*' - Action: + - autoscaling:CancelInstanceRefresh - autoscaling:CreateAutoScalingGroup - autoscaling:UpdateAutoScalingGroup - autoscaling:CreateOrUpdateTags diff --git a/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/with_all_secret_backends.yaml b/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/with_all_secret_backends.yaml index fa7b5a4d95..fde66e0f73 100644 --- a/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/with_all_secret_backends.yaml +++ b/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/with_all_secret_backends.yaml @@ -255,6 +255,7 @@ Resources: Resource: - '*' - Action: + - autoscaling:CancelInstanceRefresh - autoscaling:CreateAutoScalingGroup - autoscaling:UpdateAutoScalingGroup - autoscaling:CreateOrUpdateTags diff --git a/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/with_allow_assume_role.yaml b/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/with_allow_assume_role.yaml index 2390d86097..4f53826a67 100644 --- a/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/with_allow_assume_role.yaml +++ b/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/with_allow_assume_role.yaml @@ -249,6 +249,7 @@ Resources: Resource: - '*' - Action: + - autoscaling:CancelInstanceRefresh - autoscaling:CreateAutoScalingGroup - autoscaling:UpdateAutoScalingGroup - autoscaling:CreateOrUpdateTags diff --git a/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/with_bootstrap_user.yaml b/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/with_bootstrap_user.yaml index 930b879c2e..ee871514ed 100644 --- a/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/with_bootstrap_user.yaml +++ b/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/with_bootstrap_user.yaml @@ -255,6 +255,7 @@ Resources: Resource: - '*' - Action: + - autoscaling:CancelInstanceRefresh - autoscaling:CreateAutoScalingGroup - autoscaling:UpdateAutoScalingGroup - autoscaling:CreateOrUpdateTags diff --git a/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/with_custom_bootstrap_user.yaml b/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/with_custom_bootstrap_user.yaml index 50b9bb3182..af55b82920 100644 --- a/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/with_custom_bootstrap_user.yaml +++ b/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/with_custom_bootstrap_user.yaml @@ -255,6 +255,7 @@ Resources: Resource: - '*' - Action: + - autoscaling:CancelInstanceRefresh - autoscaling:CreateAutoScalingGroup - autoscaling:UpdateAutoScalingGroup - autoscaling:CreateOrUpdateTags diff --git a/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/with_different_instance_profiles.yaml b/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/with_different_instance_profiles.yaml index 478967b404..1511d42401 100644 --- a/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/with_different_instance_profiles.yaml +++ b/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/with_different_instance_profiles.yaml @@ -249,6 +249,7 @@ Resources: Resource: - '*' - Action: + - autoscaling:CancelInstanceRefresh - autoscaling:CreateAutoScalingGroup - autoscaling:UpdateAutoScalingGroup - autoscaling:CreateOrUpdateTags diff --git a/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/with_eks_console.yaml b/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/with_eks_console.yaml index ae2e279062..03b1fcf57c 100644 --- a/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/with_eks_console.yaml +++ b/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/with_eks_console.yaml @@ -249,6 +249,7 @@ Resources: Resource: - '*' - Action: + - autoscaling:CancelInstanceRefresh - autoscaling:CreateAutoScalingGroup - autoscaling:UpdateAutoScalingGroup - autoscaling:CreateOrUpdateTags diff --git a/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/with_eks_default_roles.yaml b/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/with_eks_default_roles.yaml index 3ca015276a..7d74b272f9 100644 --- a/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/with_eks_default_roles.yaml +++ b/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/with_eks_default_roles.yaml @@ -249,6 +249,7 @@ Resources: Resource: - '*' - Action: + - autoscaling:CancelInstanceRefresh - autoscaling:CreateAutoScalingGroup - autoscaling:UpdateAutoScalingGroup - autoscaling:CreateOrUpdateTags diff --git a/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/with_eks_disable.yaml b/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/with_eks_disable.yaml index 57c08e20cc..dc2f256017 100644 --- a/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/with_eks_disable.yaml +++ b/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/with_eks_disable.yaml @@ -249,6 +249,7 @@ Resources: Resource: - '*' - Action: + - autoscaling:CancelInstanceRefresh - autoscaling:CreateAutoScalingGroup - autoscaling:UpdateAutoScalingGroup - autoscaling:CreateOrUpdateTags diff --git a/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/with_eks_kms_prefix.yaml b/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/with_eks_kms_prefix.yaml index 0bacb55e5c..9c9186345e 100644 --- a/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/with_eks_kms_prefix.yaml +++ b/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/with_eks_kms_prefix.yaml @@ -249,6 +249,7 @@ Resources: Resource: - '*' - Action: + - autoscaling:CancelInstanceRefresh - autoscaling:CreateAutoScalingGroup - autoscaling:UpdateAutoScalingGroup - autoscaling:CreateOrUpdateTags diff --git a/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/with_extra_statements.yaml b/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/with_extra_statements.yaml index b864e1c1b3..0490cc9e1e 100644 --- a/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/with_extra_statements.yaml +++ b/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/with_extra_statements.yaml @@ -255,6 +255,7 @@ Resources: Resource: - '*' - Action: + - autoscaling:CancelInstanceRefresh - autoscaling:CreateAutoScalingGroup - autoscaling:UpdateAutoScalingGroup - autoscaling:CreateOrUpdateTags diff --git a/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/with_s3_bucket.yaml b/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/with_s3_bucket.yaml index b376d7cab8..b040508389 100644 --- a/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/with_s3_bucket.yaml +++ b/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/with_s3_bucket.yaml @@ -249,6 +249,7 @@ Resources: Resource: - '*' - Action: + - autoscaling:CancelInstanceRefresh - autoscaling:CreateAutoScalingGroup - autoscaling:UpdateAutoScalingGroup - autoscaling:CreateOrUpdateTags @@ -298,10 +299,12 @@ Resources: - s3:CreateBucket - s3:DeleteBucket - s3:GetObject - - s3:PutObject - s3:DeleteObject + - s3:ListBucket - s3:PutBucketPolicy - s3:PutBucketTagging + - s3:PutLifecycleConfiguration + - s3:PutObject Effect: Allow Resource: - arn:*:s3:::cluster-api-provider-aws-* diff --git a/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/with_ssm_secret_backend.yaml b/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/with_ssm_secret_backend.yaml index edc07671d6..8c6ee01d48 100644 --- a/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/with_ssm_secret_backend.yaml +++ b/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/with_ssm_secret_backend.yaml @@ -249,6 +249,7 @@ Resources: Resource: - '*' - Action: + - autoscaling:CancelInstanceRefresh - autoscaling:CreateAutoScalingGroup - autoscaling:UpdateAutoScalingGroup - autoscaling:CreateOrUpdateTags diff --git a/config/crd/bases/infrastructure.cluster.x-k8s.io_awsmachinepools.yaml b/config/crd/bases/infrastructure.cluster.x-k8s.io_awsmachinepools.yaml index e70f544535..778030c456 100644 --- a/config/crd/bases/infrastructure.cluster.x-k8s.io_awsmachinepools.yaml +++ b/config/crd/bases/infrastructure.cluster.x-k8s.io_awsmachinepools.yaml @@ -883,6 +883,106 @@ spec: after it enters the InService state. If no value is supplied by user a default value of 300 seconds is set type: string + ignition: + description: Ignition defined options related to the bootstrapping + systems where Ignition is used. + properties: + proxy: + description: |- + Proxy defines proxy settings for Ignition. + Only valid for Ignition versions 3.1 and above. + properties: + httpProxy: + description: |- + HTTPProxy is the HTTP proxy to use for Ignition. + A single URL that specifies the proxy server to use for HTTP and HTTPS requests, + unless overridden by the HTTPSProxy or NoProxy options. + type: string + httpsProxy: + description: |- + HTTPSProxy is the HTTPS proxy to use for Ignition. + A single URL that specifies the proxy server to use for HTTPS requests, + unless overridden by the NoProxy option. + type: string + noProxy: + description: |- + NoProxy is the list of domains to not proxy for Ignition. + Specifies a list of strings to hosts that should be excluded from proxying. + + + Each value is represented by: + - An IP address prefix (1.2.3.4) + - An IP address prefix in CIDR notation (1.2.3.4/8) + - A domain name + - A domain name matches that name and all subdomains + - A domain name with a leading . matches subdomains only + - A special DNS label (*), indicates that no proxying should be done + + + An IP address prefix and domain name can also include a literal port number (1.2.3.4:80). + items: + description: IgnitionNoProxy defines the list of domains + to not proxy for Ignition. + maxLength: 2048 + type: string + maxItems: 64 + type: array + type: object + storageType: + default: ClusterObjectStore + description: |- + StorageType defines how to store the boostrap user data for Ignition. + This can be used to instruct Ignition from where to fetch the user data to bootstrap an instance. + + + When omitted, the storage option will default to ClusterObjectStore. + + + When set to "ClusterObjectStore", if the capability is available and a Cluster ObjectStore configuration + is correctly provided in the Cluster object (under .spec.s3Bucket), + an object store will be used to store bootstrap user data. + + + When set to "UnencryptedUserData", EC2 Instance User Data will be used to store the machine bootstrap user data, unencrypted. + This option is considered less secure than others as user data may contain sensitive informations (keys, certificates, etc.) + and users with ec2:DescribeInstances permission or users running pods + that can access the ec2 metadata service have access to this sensitive information. + So this is only to be used at ones own risk, and only when other more secure options are not viable. + enum: + - ClusterObjectStore + - UnencryptedUserData + type: string + tls: + description: |- + TLS defines TLS settings for Ignition. + Only valid for Ignition versions 3.1 and above. + properties: + certificateAuthorities: + description: |- + CASources defines the list of certificate authorities to use for Ignition. + The value is the certificate bundle (in PEM format). The bundle can contain multiple concatenated certificates. + Supported schemes are http, https, tftp, s3, arn, gs, and `data` (RFC 2397) URL scheme. + items: + description: IgnitionCASource defines the source of the + certificate authority to use for Ignition. + maxLength: 65536 + type: string + maxItems: 64 + type: array + type: object + version: + default: "2.3" + description: Version defines which version of Ignition will be + used to generate bootstrap data. + enum: + - "2.3" + - "3.0" + - "3.1" + - "3.2" + - "3.3" + - "3.4" + type: string + type: object maxSize: default: 1 description: MaxSize defines the maximum size of the group. diff --git a/exp/api/v1beta1/conversion.go b/exp/api/v1beta1/conversion.go index 7c39f1fcbd..fa16ace4ab 100644 --- a/exp/api/v1beta1/conversion.go +++ b/exp/api/v1beta1/conversion.go @@ -52,6 +52,9 @@ func (src *AWSMachinePool) ConvertTo(dstRaw conversion.Hub) error { if restored.Spec.AvailabilityZoneSubnetType != nil { dst.Spec.AvailabilityZoneSubnetType = restored.Spec.AvailabilityZoneSubnetType } + if restored.Spec.Ignition != nil { + dst.Spec.Ignition = restored.Spec.Ignition + } if restored.Spec.AWSLaunchTemplate.PrivateDNSName != nil { dst.Spec.AWSLaunchTemplate.PrivateDNSName = restored.Spec.AWSLaunchTemplate.PrivateDNSName diff --git a/exp/api/v1beta1/zz_generated.conversion.go b/exp/api/v1beta1/zz_generated.conversion.go index 585cbd1504..c09131cc71 100644 --- a/exp/api/v1beta1/zz_generated.conversion.go +++ b/exp/api/v1beta1/zz_generated.conversion.go @@ -565,6 +565,7 @@ func autoConvert_v1beta2_AWSMachinePoolSpec_To_v1beta1_AWSMachinePoolSpec(in *v1 } out.CapacityRebalance = in.CapacityRebalance // WARNING: in.SuspendProcesses requires manual conversion: does not exist in peer-type + // WARNING: in.Ignition requires manual conversion: does not exist in peer-type return nil } diff --git a/exp/api/v1beta2/awsmachinepool_types.go b/exp/api/v1beta2/awsmachinepool_types.go index 526876bcfd..62e81e7c7a 100644 --- a/exp/api/v1beta2/awsmachinepool_types.go +++ b/exp/api/v1beta2/awsmachinepool_types.go @@ -101,6 +101,10 @@ type AWSMachinePoolSpec struct { // SuspendProcesses defines a list of processes to suspend for the given ASG. This is constantly reconciled. // If a process is removed from this list it will automatically be resumed. SuspendProcesses *SuspendProcessesTypes `json:"suspendProcesses,omitempty"` + + // Ignition defined options related to the bootstrapping systems where Ignition is used. + // +optional + Ignition *infrav1.Ignition `json:"ignition,omitempty"` } // SuspendProcessesTypes contains user friendly auto-completable values for suspended process names. diff --git a/exp/api/v1beta2/awsmachinepool_webhook.go b/exp/api/v1beta2/awsmachinepool_webhook.go index a4f6a44d41..5f74cef64e 100644 --- a/exp/api/v1beta2/awsmachinepool_webhook.go +++ b/exp/api/v1beta2/awsmachinepool_webhook.go @@ -27,7 +27,8 @@ import ( "sigs.k8s.io/controller-runtime/pkg/webhook" "sigs.k8s.io/controller-runtime/pkg/webhook/admission" - "sigs.k8s.io/cluster-api-provider-aws/v2/api/v1beta2" + infrav1 "sigs.k8s.io/cluster-api-provider-aws/v2/api/v1beta2" + "sigs.k8s.io/cluster-api-provider-aws/v2/feature" ) var log = ctrl.Log.WithName("awsmachinepool-resource") @@ -62,12 +63,12 @@ func (r *AWSMachinePool) validateRootVolume() field.ErrorList { return allErrs } - if v1beta2.VolumeTypesProvisioned.Has(string(r.Spec.AWSLaunchTemplate.RootVolume.Type)) && r.Spec.AWSLaunchTemplate.RootVolume.IOPS == 0 { + if infrav1.VolumeTypesProvisioned.Has(string(r.Spec.AWSLaunchTemplate.RootVolume.Type)) && r.Spec.AWSLaunchTemplate.RootVolume.IOPS == 0 { allErrs = append(allErrs, field.Required(field.NewPath("spec.awsLaunchTemplate.rootVolume.iops"), "iops required if type is 'io1' or 'io2'")) } if r.Spec.AWSLaunchTemplate.RootVolume.Throughput != nil { - if r.Spec.AWSLaunchTemplate.RootVolume.Type != v1beta2.VolumeTypeGP3 { + if r.Spec.AWSLaunchTemplate.RootVolume.Type != infrav1.VolumeTypeGP3 { allErrs = append(allErrs, field.Required(field.NewPath("spec.awsLaunchTemplate.rootVolume.throughput"), "throughput is valid only for type 'gp3'")) } if *r.Spec.AWSLaunchTemplate.RootVolume.Throughput < 0 { @@ -86,12 +87,12 @@ func (r *AWSMachinePool) validateNonRootVolumes() field.ErrorList { var allErrs field.ErrorList for _, volume := range r.Spec.AWSLaunchTemplate.NonRootVolumes { - if v1beta2.VolumeTypesProvisioned.Has(string(volume.Type)) && volume.IOPS == 0 { + if infrav1.VolumeTypesProvisioned.Has(string(volume.Type)) && volume.IOPS == 0 { allErrs = append(allErrs, field.Required(field.NewPath("spec.template.spec.nonRootVolumes.iops"), "iops required if type is 'io1' or 'io2'")) } if volume.Throughput != nil { - if volume.Type != v1beta2.VolumeTypeGP3 { + if volume.Type != infrav1.VolumeTypeGP3 { allErrs = append(allErrs, field.Required(field.NewPath("spec.template.spec.nonRootVolumes.throughput"), "throughput is valid only for type 'gp3'")) } if *volume.Throughput < 0 { @@ -162,6 +163,22 @@ func (r *AWSMachinePool) validateRefreshPreferences() field.ErrorList { return allErrs } +func (r *AWSMachinePool) ignitionEnabled() bool { + return r.Spec.Ignition != nil +} + +func (r *AWSMachinePool) validateIgnition() field.ErrorList { + var allErrs field.ErrorList + + // Feature gate is not enabled but ignition is enabled then send a forbidden error. + if !feature.Gates.Enabled(feature.BootstrapFormatIgnition) && r.ignitionEnabled() { + allErrs = append(allErrs, field.Forbidden(field.NewPath("spec", "ignition"), + "can be set only if the BootstrapFormatIgnition feature gate is enabled")) + } + + return allErrs +} + // ValidateCreate will do any extra validation when creating a AWSMachinePool. func (r *AWSMachinePool) ValidateCreate() (admission.Warnings, error) { log.Info("AWSMachinePool validate create", "machine-pool", klog.KObj(r)) @@ -176,6 +193,7 @@ func (r *AWSMachinePool) ValidateCreate() (admission.Warnings, error) { allErrs = append(allErrs, r.validateAdditionalSecurityGroups()...) allErrs = append(allErrs, r.validateSpotInstances()...) allErrs = append(allErrs, r.validateRefreshPreferences()...) + allErrs = append(allErrs, r.validateIgnition()...) if len(allErrs) == 0 { return nil, nil @@ -226,4 +244,8 @@ func (r *AWSMachinePool) Default() { log.Info("DefaultInstanceWarmup is zero, setting 300 seconds as default") r.Spec.DefaultInstanceWarmup.Duration = 300 * time.Second } + + if r.ignitionEnabled() && r.Spec.Ignition.Version == "" { + r.Spec.Ignition.Version = infrav1.DefaultIgnitionVersion + } } diff --git a/exp/api/v1beta2/zz_generated.deepcopy.go b/exp/api/v1beta2/zz_generated.deepcopy.go index f34e8f9d9b..6c0f077766 100644 --- a/exp/api/v1beta2/zz_generated.deepcopy.go +++ b/exp/api/v1beta2/zz_generated.deepcopy.go @@ -277,6 +277,11 @@ func (in *AWSMachinePoolSpec) DeepCopyInto(out *AWSMachinePoolSpec) { *out = new(SuspendProcessesTypes) (*in).DeepCopyInto(*out) } + if in.Ignition != nil { + in, out := &in.Ignition, &out.Ignition + *out = new(apiv1beta2.Ignition) + (*in).DeepCopyInto(*out) + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new AWSMachinePoolSpec. diff --git a/exp/controllers/awsmachinepool_controller.go b/exp/controllers/awsmachinepool_controller.go index 741cdcdb10..66a20dee71 100644 --- a/exp/controllers/awsmachinepool_controller.go +++ b/exp/controllers/awsmachinepool_controller.go @@ -46,6 +46,7 @@ import ( "sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud/services" asg "sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud/services/autoscaling" "sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud/services/ec2" + "sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud/services/s3" "sigs.k8s.io/cluster-api-provider-aws/v2/pkg/logger" clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" expclusterv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1" @@ -63,6 +64,7 @@ type AWSMachinePoolReconciler struct { asgServiceFactory func(cloud.ClusterScoper) services.ASGInterface ec2ServiceFactory func(scope.EC2Scope) services.EC2Interface reconcileServiceFactory func(scope.EC2Scope) services.MachinePoolReconcileInterface + objectStoreServiceFactory func(scope.S3Scope) services.ObjectStoreInterface TagUnmanagedNetworkResources bool } @@ -89,6 +91,19 @@ func (r *AWSMachinePoolReconciler) getReconcileService(scope scope.EC2Scope) ser return ec2.NewService(scope) } +func (r *AWSMachinePoolReconciler) getObjectStoreService(scope scope.S3Scope) services.ObjectStoreInterface { + if scope.Bucket() == nil { + // S3 bucket usage not enabled, so object store service not needed + return nil + } + + if r.objectStoreServiceFactory != nil { + return r.objectStoreServiceFactory(scope) + } + + return s3.NewService(scope) +} + // +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=awsmachinepools,verbs=get;list;watch;update;patch;delete // +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=awsmachinepools/status,verbs=get;update;patch // +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machinepools;machinepools/status,verbs=get;list;watch;patch @@ -130,7 +145,7 @@ func (r *AWSMachinePoolReconciler) Reconcile(ctx context.Context, req ctrl.Reque log = log.WithValues("cluster", klog.KObj(cluster)) - infraCluster, err := r.getInfraCluster(ctx, log, cluster, awsMachinePool) + infraCluster, s3Scope, err := r.getInfraCluster(ctx, log, cluster, awsMachinePool) if err != nil { return ctrl.Result{}, fmt.Errorf("getting infra provider cluster or control plane object: %w", err) } @@ -178,13 +193,13 @@ func (r *AWSMachinePoolReconciler) Reconcile(ctx context.Context, req ctrl.Reque return ctrl.Result{}, r.reconcileDelete(machinePoolScope, infraScope, infraScope) } - return ctrl.Result{}, r.reconcileNormal(ctx, machinePoolScope, infraScope, infraScope) + return r.reconcileNormal(ctx, machinePoolScope, infraScope, infraScope, s3Scope) case *scope.ClusterScope: if !awsMachinePool.ObjectMeta.DeletionTimestamp.IsZero() { return ctrl.Result{}, r.reconcileDelete(machinePoolScope, infraScope, infraScope) } - return ctrl.Result{}, r.reconcileNormal(ctx, machinePoolScope, infraScope, infraScope) + return r.reconcileNormal(ctx, machinePoolScope, infraScope, infraScope, s3Scope) default: return ctrl.Result{}, errors.New("infraCluster has unknown type") } @@ -202,7 +217,7 @@ func (r *AWSMachinePoolReconciler) SetupWithManager(ctx context.Context, mgr ctr Complete(r) } -func (r *AWSMachinePoolReconciler) reconcileNormal(ctx context.Context, machinePoolScope *scope.MachinePoolScope, clusterScope cloud.ClusterScoper, ec2Scope scope.EC2Scope) error { +func (r *AWSMachinePoolReconciler) reconcileNormal(ctx context.Context, machinePoolScope *scope.MachinePoolScope, clusterScope cloud.ClusterScoper, ec2Scope scope.EC2Scope, s3Scope scope.S3Scope) (ctrl.Result, error) { clusterScope.Info("Reconciling AWSMachinePool") // If the AWSMachine is in an error state, return early. @@ -211,52 +226,57 @@ func (r *AWSMachinePoolReconciler) reconcileNormal(ctx context.Context, machineP // TODO: If we are in a failed state, delete the secret regardless of instance state - return nil + return ctrl.Result{}, nil } // If the AWSMachinepool doesn't have our finalizer, add it if controllerutil.AddFinalizer(machinePoolScope.AWSMachinePool, expinfrav1.MachinePoolFinalizer) { // Register finalizer immediately to avoid orphaning AWS resources if err := machinePoolScope.PatchObject(); err != nil { - return err + return ctrl.Result{}, err } } if !machinePoolScope.Cluster.Status.InfrastructureReady { machinePoolScope.Info("Cluster infrastructure is not ready yet") conditions.MarkFalse(machinePoolScope.AWSMachinePool, expinfrav1.ASGReadyCondition, infrav1.WaitingForClusterInfrastructureReason, clusterv1.ConditionSeverityInfo, "") - return nil + return ctrl.Result{}, nil } // Make sure bootstrap data is available and populated if machinePoolScope.MachinePool.Spec.Template.Spec.Bootstrap.DataSecretName == nil { machinePoolScope.Info("Bootstrap data secret reference is not yet available") conditions.MarkFalse(machinePoolScope.AWSMachinePool, expinfrav1.ASGReadyCondition, infrav1.WaitingForBootstrapDataReason, clusterv1.ConditionSeverityInfo, "") - return nil + return ctrl.Result{}, nil } ec2Svc := r.getEC2Service(ec2Scope) asgsvc := r.getASGService(clusterScope) reconSvc := r.getReconcileService(ec2Scope) + objectStoreSvc := r.getObjectStoreService(s3Scope) // Find existing ASG asg, err := r.findASG(machinePoolScope, asgsvc) if err != nil { conditions.MarkUnknown(machinePoolScope.AWSMachinePool, expinfrav1.ASGReadyCondition, expinfrav1.ASGNotFoundReason, err.Error()) - return err + return ctrl.Result{}, err } - canUpdateLaunchTemplate := func() (bool, error) { + canStartInstanceRefresh := func() (bool, *string, error) { // If there is a change: before changing the template, check if there exist an ongoing instance refresh, // because only 1 instance refresh can be "InProgress". If template is updated when refresh cannot be started, // that change will not trigger a refresh. Do not start an instance refresh if only userdata changed. if asg == nil { // If the ASG hasn't been created yet, there is no need to check if we can start the instance refresh. // But we want to update the LaunchTemplate because an error in the LaunchTemplate may be blocking the ASG creation. - return true, nil + return true, nil, nil } return asgsvc.CanStartASGInstanceRefresh(machinePoolScope) } + cancelInstanceRefresh := func() error { + machinePoolScope.Info("cancelling instance refresh") + return asgsvc.CancelASGInstanceRefresh(machinePoolScope) + } runPostLaunchTemplateUpdateOperation := func() error { // skip instance refresh if ASG is not created yet if asg == nil { @@ -280,10 +300,14 @@ func (r *AWSMachinePoolReconciler) reconcileNormal(ctx context.Context, machineP machinePoolScope.Info("starting instance refresh", "number of instances", machinePoolScope.MachinePool.Spec.Replicas) return asgsvc.StartASGInstanceRefresh(machinePoolScope) } - if err := reconSvc.ReconcileLaunchTemplate(machinePoolScope, ec2Svc, canUpdateLaunchTemplate, runPostLaunchTemplateUpdateOperation); err != nil { + res, err := reconSvc.ReconcileLaunchTemplate(machinePoolScope, machinePoolScope, s3Scope, ec2Svc, objectStoreSvc, canStartInstanceRefresh, cancelInstanceRefresh, runPostLaunchTemplateUpdateOperation) + if err != nil { r.Recorder.Eventf(machinePoolScope.AWSMachinePool, corev1.EventTypeWarning, "FailedLaunchTemplateReconcile", "Failed to reconcile launch template: %v", err) machinePoolScope.Error(err, "failed to reconcile launch template") - return err + return ctrl.Result{}, err + } + if res != nil { + return *res, nil } // set the LaunchTemplateReady condition @@ -293,9 +317,9 @@ func (r *AWSMachinePoolReconciler) reconcileNormal(ctx context.Context, machineP // Create new ASG if err := r.createPool(machinePoolScope, clusterScope); err != nil { conditions.MarkFalse(machinePoolScope.AWSMachinePool, expinfrav1.ASGReadyCondition, expinfrav1.ASGProvisionFailedReason, clusterv1.ConditionSeverityError, err.Error()) - return err + return ctrl.Result{}, err } - return nil + return ctrl.Result{}, nil } if annotations.ReplicasManagedByExternalAutoscaler(machinePoolScope.MachinePool) { @@ -306,14 +330,14 @@ func (r *AWSMachinePoolReconciler) reconcileNormal(ctx context.Context, machineP "external", asg.DesiredCapacity) machinePoolScope.MachinePool.Spec.Replicas = asg.DesiredCapacity if err := machinePoolScope.PatchCAPIMachinePoolObject(ctx); err != nil { - return err + return ctrl.Result{}, err } } } if err := r.updatePool(machinePoolScope, clusterScope, asg); err != nil { machinePoolScope.Error(err, "error updating AWSMachinePool") - return err + return ctrl.Result{}, err } launchTemplateID := machinePoolScope.GetLaunchTemplateIDStatus() @@ -330,7 +354,7 @@ func (r *AWSMachinePoolReconciler) reconcileNormal(ctx context.Context, machineP } err = reconSvc.ReconcileTags(machinePoolScope, resourceServiceToUpdate) if err != nil { - return errors.Wrap(err, "error updating tags") + return ctrl.Result{}, errors.Wrap(err, "error updating tags") } // Make sure Spec.ProviderID is always set. @@ -353,7 +377,7 @@ func (r *AWSMachinePoolReconciler) reconcileNormal(ctx context.Context, machineP machinePoolScope.Error(err, "failed updating instances", "instances", asg.Instances) } - return nil + return ctrl.Result{}, nil } func (r *AWSMachinePoolReconciler) reconcileDelete(machinePoolScope *scope.MachinePoolScope, clusterScope cloud.ClusterScoper, ec2Scope scope.EC2Scope) error { @@ -389,7 +413,7 @@ func (r *AWSMachinePoolReconciler) reconcileDelete(machinePoolScope *scope.Machi } launchTemplateID := machinePoolScope.AWSMachinePool.Status.LaunchTemplateID - launchTemplate, _, _, err := ec2Svc.GetLaunchTemplate(machinePoolScope.LaunchTemplateName()) + launchTemplate, _, _, _, err := ec2Svc.GetLaunchTemplate(machinePoolScope.LaunchTemplateName()) if err != nil { return err } @@ -607,7 +631,7 @@ func machinePoolToInfrastructureMapFunc(gvk schema.GroupVersionKind) handler.Map } } -func (r *AWSMachinePoolReconciler) getInfraCluster(ctx context.Context, log *logger.Logger, cluster *clusterv1.Cluster, awsMachinePool *expinfrav1.AWSMachinePool) (scope.EC2Scope, error) { +func (r *AWSMachinePoolReconciler) getInfraCluster(ctx context.Context, log *logger.Logger, cluster *clusterv1.Cluster, awsMachinePool *expinfrav1.AWSMachinePool) (scope.EC2Scope, scope.S3Scope, error) { var clusterScope *scope.ClusterScope var managedControlPlaneScope *scope.ManagedControlPlaneScope var err error @@ -621,7 +645,7 @@ func (r *AWSMachinePoolReconciler) getInfraCluster(ctx context.Context, log *log if err := r.Get(ctx, controlPlaneName, controlPlane); err != nil { // AWSManagedControlPlane is not ready - return nil, nil //nolint:nilerr + return nil, nil, nil //nolint:nilerr } managedControlPlaneScope, err = scope.NewManagedControlPlaneScope(scope.ManagedControlPlaneScopeParams{ @@ -633,10 +657,10 @@ func (r *AWSMachinePoolReconciler) getInfraCluster(ctx context.Context, log *log TagUnmanagedNetworkResources: r.TagUnmanagedNetworkResources, }) if err != nil { - return nil, err + return nil, nil, err } - return managedControlPlaneScope, nil + return managedControlPlaneScope, managedControlPlaneScope, nil } awsCluster := &infrav1.AWSCluster{} @@ -648,7 +672,7 @@ func (r *AWSMachinePoolReconciler) getInfraCluster(ctx context.Context, log *log if err := r.Client.Get(ctx, infraClusterName, awsCluster); err != nil { // AWSCluster is not ready - return nil, nil //nolint:nilerr + return nil, nil, nil //nolint:nilerr } // Create the cluster scope @@ -661,8 +685,8 @@ func (r *AWSMachinePoolReconciler) getInfraCluster(ctx context.Context, log *log TagUnmanagedNetworkResources: r.TagUnmanagedNetworkResources, }) if err != nil { - return nil, err + return nil, nil, err } - return clusterScope, nil + return clusterScope, clusterScope, nil } diff --git a/exp/controllers/awsmachinepool_controller_test.go b/exp/controllers/awsmachinepool_controller_test.go index 4902dbb7e7..2e86eb79c8 100644 --- a/exp/controllers/awsmachinepool_controller_test.go +++ b/exp/controllers/awsmachinepool_controller_test.go @@ -19,11 +19,15 @@ package controllers import ( "bytes" "context" + "encoding/base64" "flag" "fmt" "testing" "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/service/autoscaling" + "github.com/aws/aws-sdk-go/service/ec2" + "github.com/aws/aws-sdk-go/service/s3" "github.com/go-logr/logr" "github.com/golang/mock/gomock" . "github.com/onsi/gomega" @@ -43,6 +47,9 @@ import ( "sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud/scope" "sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud/services" "sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud/services/mock_services" + s3svc "sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud/services/s3" + "sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud/services/s3/mock_s3iface" + "sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud/services/s3/mock_stsiface" "sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud/services/userdata" "sigs.k8s.io/cluster-api-provider-aws/v2/pkg/logger" clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" @@ -61,6 +68,8 @@ func TestAWSMachinePoolReconciler(t *testing.T) { ec2Svc *mock_services.MockEC2Interface asgSvc *mock_services.MockASGInterface reconSvc *mock_services.MockMachinePoolReconcileInterface + s3Mock *mock_s3iface.MockS3API + stsMock *mock_stsiface.MockSTSAPI recorder *record.FakeRecorder awsMachinePool *expinfrav1.AWSMachinePool secret *corev1.Secret @@ -158,6 +167,8 @@ func TestAWSMachinePoolReconciler(t *testing.T) { ec2Svc = mock_services.NewMockEC2Interface(mockCtrl) asgSvc = mock_services.NewMockASGInterface(mockCtrl) reconSvc = mock_services.NewMockMachinePoolReconcileInterface(mockCtrl) + s3Mock = mock_s3iface.NewMockS3API(mockCtrl) + stsMock = mock_stsiface.NewMockSTSAPI(mockCtrl) // If the test hangs for 9 minutes, increase the value here to the number of events during a reconciliation loop recorder = record.NewFakeRecorder(2) @@ -172,6 +183,12 @@ func TestAWSMachinePoolReconciler(t *testing.T) { reconcileServiceFactory: func(scope.EC2Scope) services.MachinePoolReconcileInterface { return reconSvc }, + objectStoreServiceFactory: func(scope scope.S3Scope) services.ObjectStoreInterface { + svc := s3svc.NewService(scope) + svc.S3Client = s3Mock + svc.STSClient = stsMock + return svc + }, Recorder: recorder, } } @@ -195,7 +212,7 @@ func TestAWSMachinePoolReconciler(t *testing.T) { getASG := func(t *testing.T, g *WithT) { t.Helper() - ec2Svc.EXPECT().GetLaunchTemplate(gomock.Any()).Return(nil, "", nil, expectedErr).AnyTimes() + ec2Svc.EXPECT().GetLaunchTemplate(gomock.Any()).Return(nil, "", nil, nil, expectedErr).AnyTimes() asgSvc.EXPECT().GetASGByName(gomock.Any()).Return(nil, expectedErr).AnyTimes() } t.Run("should exit immediately on an error state", func(t *testing.T) { @@ -211,7 +228,7 @@ func TestAWSMachinePoolReconciler(t *testing.T) { buf := new(bytes.Buffer) klog.SetOutput(buf) - _ = reconciler.reconcileNormal(context.Background(), ms, cs, cs) + _, _ = reconciler.reconcileNormal(context.Background(), ms, cs, cs, cs) g.Expect(buf).To(ContainSubstring("Error state detected, skipping reconciliation")) }) t.Run("should add our finalizer to the machinepool", func(t *testing.T) { @@ -220,7 +237,7 @@ func TestAWSMachinePoolReconciler(t *testing.T) { defer teardown(t, g) getASG(t, g) - _ = reconciler.reconcileNormal(context.Background(), ms, cs, cs) + _, _ = reconciler.reconcileNormal(context.Background(), ms, cs, cs, cs) g.Expect(ms.AWSMachinePool.Finalizers).To(ContainElement(expinfrav1.MachinePoolFinalizer)) }) @@ -235,7 +252,7 @@ func TestAWSMachinePoolReconciler(t *testing.T) { buf := new(bytes.Buffer) klog.SetOutput(buf) - err := reconciler.reconcileNormal(context.Background(), ms, cs, cs) + _, err := reconciler.reconcileNormal(context.Background(), ms, cs, cs, cs) g.Expect(err).To(BeNil()) g.Expect(buf.String()).To(ContainSubstring("Cluster infrastructure is not ready yet")) expectConditions(g, ms.AWSMachinePool, []conditionAssertion{{expinfrav1.ASGReadyCondition, corev1.ConditionFalse, clusterv1.ConditionSeverityInfo, infrav1.WaitingForClusterInfrastructureReason}}) @@ -250,7 +267,7 @@ func TestAWSMachinePoolReconciler(t *testing.T) { buf := new(bytes.Buffer) klog.SetOutput(buf) - err := reconciler.reconcileNormal(context.Background(), ms, cs, cs) + _, err := reconciler.reconcileNormal(context.Background(), ms, cs, cs, cs) g.Expect(err).To(BeNil()) g.Expect(buf.String()).To(ContainSubstring("Bootstrap data secret reference is not yet available")) @@ -266,7 +283,7 @@ func TestAWSMachinePoolReconciler(t *testing.T) { getASG := func(t *testing.T, g *WithT) { t.Helper() - ec2Svc.EXPECT().GetLaunchTemplate(gomock.Any()).Return(nil, "", nil, nil).AnyTimes() + ec2Svc.EXPECT().GetLaunchTemplate(gomock.Any()).Return(nil, "", nil, nil, nil).AnyTimes() asgSvc.EXPECT().GetASGByName(gomock.Any()).Return(nil, nil).AnyTimes() } t.Run("should look up by provider ID when one exists", func(t *testing.T) { @@ -277,8 +294,8 @@ func TestAWSMachinePoolReconciler(t *testing.T) { getASG(t, g) expectedErr := errors.New("no connection available ") - reconSvc.EXPECT().ReconcileLaunchTemplate(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(expectedErr) - err := reconciler.reconcileNormal(context.Background(), ms, cs, cs) + reconSvc.EXPECT().ReconcileLaunchTemplate(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(nil, expectedErr) + _, err := reconciler.reconcileNormal(context.Background(), ms, cs, cs, cs) g.Expect(errors.Cause(err)).To(MatchError(expectedErr)) }) }) @@ -298,14 +315,14 @@ func TestAWSMachinePoolReconciler(t *testing.T) { defer teardown(t, g) setSuspendedProcesses(t, g) - reconSvc.EXPECT().ReconcileLaunchTemplate(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(nil) + reconSvc.EXPECT().ReconcileLaunchTemplate(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(nil, nil) asgSvc.EXPECT().GetASGByName(gomock.Any()).Return(nil, nil) asgSvc.EXPECT().CreateASG(gomock.Any()).Return(&expinfrav1.AutoScalingGroup{ Name: "name", }, nil) asgSvc.EXPECT().SuspendProcesses("name", []string{"Launch", "Terminate"}).Return(nil).AnyTimes().Times(0) - err := reconciler.reconcileNormal(context.Background(), ms, cs, cs) + _, err := reconciler.reconcileNormal(context.Background(), ms, cs, cs, cs) g.Expect(err).To(Succeed()) }) }) @@ -322,7 +339,7 @@ func TestAWSMachinePoolReconciler(t *testing.T) { defer teardown(t, g) setSuspendedProcesses(t, g) ms.AWSMachinePool.Spec.SuspendProcesses.All = true - reconSvc.EXPECT().ReconcileLaunchTemplate(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(nil) + reconSvc.EXPECT().ReconcileLaunchTemplate(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(nil, nil) reconSvc.EXPECT().ReconcileTags(gomock.Any(), gomock.Any()).Return(nil) asgSvc.EXPECT().GetASGByName(gomock.Any()).Return(&expinfrav1.AutoScalingGroup{ Name: "name", @@ -341,7 +358,7 @@ func TestAWSMachinePoolReconciler(t *testing.T) { "ReplaceUnhealthy", })).Return(nil).AnyTimes().Times(1) - err := reconciler.reconcileNormal(context.Background(), ms, cs, cs) + _, err := reconciler.reconcileNormal(context.Background(), ms, cs, cs, cs) g.Expect(err).To(Succeed()) }) }) @@ -362,7 +379,7 @@ func TestAWSMachinePoolReconciler(t *testing.T) { defer teardown(t, g) setSuspendedProcesses(t, g) - reconSvc.EXPECT().ReconcileLaunchTemplate(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(nil) + reconSvc.EXPECT().ReconcileLaunchTemplate(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(nil, nil) reconSvc.EXPECT().ReconcileTags(gomock.Any(), gomock.Any()).Return(nil) asgSvc.EXPECT().GetASGByName(gomock.Any()).Return(&expinfrav1.AutoScalingGroup{ Name: "name", @@ -373,7 +390,7 @@ func TestAWSMachinePoolReconciler(t *testing.T) { asgSvc.EXPECT().SuspendProcesses("name", []string{"Terminate"}).Return(nil).AnyTimes().Times(1) asgSvc.EXPECT().ResumeProcesses("name", []string{"process3"}).Return(nil).AnyTimes().Times(1) - err := reconciler.reconcileNormal(context.Background(), ms, cs, cs) + _, err := reconciler.reconcileNormal(context.Background(), ms, cs, cs, cs) g.Expect(err).To(Succeed()) }) }) @@ -387,7 +404,7 @@ func TestAWSMachinePoolReconciler(t *testing.T) { Name: "an-asg", DesiredCapacity: ptr.To[int32](1), } - reconSvc.EXPECT().ReconcileLaunchTemplate(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(nil) + reconSvc.EXPECT().ReconcileLaunchTemplate(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(nil, nil) asgSvc.EXPECT().GetASGByName(gomock.Any()).Return(&asg, nil) asgSvc.EXPECT().SubnetIDs(gomock.Any()).Return([]string{}, nil) asgSvc.EXPECT().UpdateASG(gomock.Any()).Return(nil) @@ -400,7 +417,7 @@ func TestAWSMachinePoolReconciler(t *testing.T) { g.Expect(testEnv.Create(ctx, ms.MachinePool)).To(Succeed()) - _ = reconciler.reconcileNormal(context.Background(), ms, cs, cs) + _, _ = reconciler.reconcileNormal(context.Background(), ms, cs, cs, cs) g.Expect(*ms.MachinePool.Spec.Replicas).To(Equal(int32(1))) }) t.Run("No need to update Asg because asgNeedsUpdates is false and no subnets change", func(t *testing.T) { @@ -425,13 +442,13 @@ func TestAWSMachinePoolReconciler(t *testing.T) { }, }, Subnets: []string{"subnet1", "subnet2"}} - reconSvc.EXPECT().ReconcileLaunchTemplate(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(nil) + reconSvc.EXPECT().ReconcileLaunchTemplate(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(nil, nil) reconSvc.EXPECT().ReconcileTags(gomock.Any(), gomock.Any()).Return(nil) asgSvc.EXPECT().GetASGByName(gomock.Any()).Return(&asg, nil).AnyTimes() asgSvc.EXPECT().SubnetIDs(gomock.Any()).Return([]string{"subnet2", "subnet1"}, nil).Times(1) asgSvc.EXPECT().UpdateASG(gomock.Any()).Return(nil).Times(0) - err := reconciler.reconcileNormal(context.Background(), ms, cs, cs) + _, err := reconciler.reconcileNormal(context.Background(), ms, cs, cs, cs) g.Expect(err).To(Succeed()) }) t.Run("update Asg due to subnet changes", func(t *testing.T) { @@ -443,13 +460,13 @@ func TestAWSMachinePoolReconciler(t *testing.T) { MinSize: int32(0), MaxSize: int32(100), Subnets: []string{"subnet1", "subnet2"}} - reconSvc.EXPECT().ReconcileLaunchTemplate(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(nil) + reconSvc.EXPECT().ReconcileLaunchTemplate(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(nil, nil) reconSvc.EXPECT().ReconcileTags(gomock.Any(), gomock.Any()).Return(nil) asgSvc.EXPECT().GetASGByName(gomock.Any()).Return(&asg, nil).AnyTimes() asgSvc.EXPECT().SubnetIDs(gomock.Any()).Return([]string{"subnet1"}, nil).Times(1) asgSvc.EXPECT().UpdateASG(gomock.Any()).Return(nil).Times(1) - err := reconciler.reconcileNormal(context.Background(), ms, cs, cs) + _, err := reconciler.reconcileNormal(context.Background(), ms, cs, cs, cs) g.Expect(err).To(Succeed()) }) t.Run("update Asg due to asgNeedsUpdates returns true", func(t *testing.T) { @@ -461,13 +478,13 @@ func TestAWSMachinePoolReconciler(t *testing.T) { MinSize: int32(0), MaxSize: int32(2), Subnets: []string{}} - reconSvc.EXPECT().ReconcileLaunchTemplate(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(nil) + reconSvc.EXPECT().ReconcileLaunchTemplate(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(nil, nil) reconSvc.EXPECT().ReconcileTags(gomock.Any(), gomock.Any()).Return(nil) asgSvc.EXPECT().GetASGByName(gomock.Any()).Return(&asg, nil).AnyTimes() asgSvc.EXPECT().SubnetIDs(gomock.Any()).Return([]string{}, nil).Times(1) asgSvc.EXPECT().UpdateASG(gomock.Any()).Return(nil).Times(1) - err := reconciler.reconcileNormal(context.Background(), ms, cs, cs) + _, err := reconciler.reconcileNormal(context.Background(), ms, cs, cs, cs) g.Expect(err).To(Succeed()) }) @@ -481,9 +498,9 @@ func TestAWSMachinePoolReconciler(t *testing.T) { reconSvc = nil // not used defer teardown(t, g) - ec2Svc.EXPECT().GetLaunchTemplate(gomock.Eq("test")).Return(nil, "", nil, nil) + ec2Svc.EXPECT().GetLaunchTemplate(gomock.Eq("test")).Return(nil, "", nil, nil, nil) ec2Svc.EXPECT().DiscoverLaunchTemplateAMI(gomock.Any()).Return(ptr.To[string]("ami-abcdef123"), nil) - ec2Svc.EXPECT().CreateLaunchTemplate(gomock.Any(), gomock.Eq(ptr.To[string]("ami-abcdef123")), gomock.Eq(userDataSecretKey), gomock.Eq([]byte("shell-script"))).Return("lt-ghijkl456", nil) + ec2Svc.EXPECT().CreateLaunchTemplate(gomock.Any(), gomock.Eq(ptr.To[string]("ami-abcdef123")), gomock.Eq(userDataSecretKey), gomock.Eq([]byte("shell-script")), gomock.Eq(userdata.ComputeHash([]byte("shell-script")))).Return("lt-ghijkl456", nil) asgSvc.EXPECT().GetASGByName(gomock.Any()).Return(nil, nil) asgSvc.EXPECT().CreateASG(gomock.Any()).DoAndReturn(func(scope *scope.MachinePoolScope) (*expinfrav1.AutoScalingGroup, error) { g.Expect(scope.Name()).To(Equal("test")) @@ -492,7 +509,7 @@ func TestAWSMachinePoolReconciler(t *testing.T) { }, nil }) - err := reconciler.reconcileNormal(context.Background(), ms, cs, cs) + _, err := reconciler.reconcileNormal(context.Background(), ms, cs, cs, cs) g.Expect(err).To(Succeed()) }) @@ -517,6 +534,7 @@ func TestAWSMachinePoolReconciler(t *testing.T) { // No change to user data userdata.ComputeHash([]byte("shell-script")), &userDataSecretKey, + nil, nil) ec2Svc.EXPECT().DiscoverLaunchTemplateAMI(gomock.Any()).Return(ptr.To[string]("ami-existing"), nil) // no change ec2Svc.EXPECT().LaunchTemplateNeedsUpdate(gomock.Any(), gomock.Any(), gomock.Any()).Return(false, nil) @@ -539,7 +557,7 @@ func TestAWSMachinePoolReconciler(t *testing.T) { // No changes, so there must not be an ASG update! asgSvc.EXPECT().UpdateASG(gomock.Any()).Times(0) - err := reconciler.reconcileNormal(context.Background(), ms, cs, cs) + _, err := reconciler.reconcileNormal(context.Background(), ms, cs, cs, cs) g.Expect(err).To(Succeed()) }) @@ -564,12 +582,13 @@ func TestAWSMachinePoolReconciler(t *testing.T) { // No change to user data userdata.ComputeHash([]byte("shell-script")), &userDataSecretKey, + nil, nil) ec2Svc.EXPECT().DiscoverLaunchTemplateAMI(gomock.Any()).Return(ptr.To[string]("ami-different"), nil) ec2Svc.EXPECT().LaunchTemplateNeedsUpdate(gomock.Any(), gomock.Any(), gomock.Any()).Return(false, nil) - asgSvc.EXPECT().CanStartASGInstanceRefresh(gomock.Any()).Return(true, nil) - ec2Svc.EXPECT().PruneLaunchTemplateVersions(gomock.Any()).Return(nil) - ec2Svc.EXPECT().CreateLaunchTemplateVersion(gomock.Any(), gomock.Any(), gomock.Eq(ptr.To[string]("ami-different")), gomock.Eq(apimachinerytypes.NamespacedName{Namespace: "default", Name: "bootstrap-data"}), gomock.Any()).Return(nil) + asgSvc.EXPECT().CanStartASGInstanceRefresh(gomock.Any()).Return(true, nil, nil) + ec2Svc.EXPECT().PruneLaunchTemplateVersions(gomock.Any()).Return(nil, nil) + ec2Svc.EXPECT().CreateLaunchTemplateVersion(gomock.Any(), gomock.Any(), gomock.Eq(ptr.To[string]("ami-different")), gomock.Eq(apimachinerytypes.NamespacedName{Namespace: "default", Name: "bootstrap-data"}), gomock.Any(), gomock.Any()).Return(nil) ec2Svc.EXPECT().GetLaunchTemplateLatestVersion(gomock.Any()).Return("2", nil) // AMI change should trigger rolling out new nodes asgSvc.EXPECT().StartASGInstanceRefresh(gomock.Any()) @@ -592,7 +611,7 @@ func TestAWSMachinePoolReconciler(t *testing.T) { // No changes, so there must not be an ASG update! asgSvc.EXPECT().UpdateASG(gomock.Any()).Times(0) - err := reconciler.reconcileNormal(context.Background(), ms, cs, cs) + _, err := reconciler.reconcileNormal(context.Background(), ms, cs, cs, cs) g.Expect(err).To(Succeed()) }) @@ -618,12 +637,13 @@ func TestAWSMachinePoolReconciler(t *testing.T) { userdata.ComputeHash([]byte("shell-script")), // But the name of the secret changes from `previous-secret-name` to `bootstrap-data` &apimachinerytypes.NamespacedName{Namespace: "default", Name: "previous-secret-name"}, + nil, nil) ec2Svc.EXPECT().DiscoverLaunchTemplateAMI(gomock.Any()).Return(ptr.To[string]("ami-existing"), nil) ec2Svc.EXPECT().LaunchTemplateNeedsUpdate(gomock.Any(), gomock.Any(), gomock.Any()).Return(false, nil) - asgSvc.EXPECT().CanStartASGInstanceRefresh(gomock.Any()).Return(true, nil) - ec2Svc.EXPECT().PruneLaunchTemplateVersions(gomock.Any()).Return(nil) - ec2Svc.EXPECT().CreateLaunchTemplateVersion(gomock.Any(), gomock.Any(), gomock.Eq(ptr.To[string]("ami-existing")), gomock.Eq(apimachinerytypes.NamespacedName{Namespace: "default", Name: "bootstrap-data"}), gomock.Any()).Return(nil) + asgSvc.EXPECT().CanStartASGInstanceRefresh(gomock.Any()).Return(true, nil, nil) + ec2Svc.EXPECT().PruneLaunchTemplateVersions(gomock.Any()).Return(nil, nil) + ec2Svc.EXPECT().CreateLaunchTemplateVersion(gomock.Any(), gomock.Any(), gomock.Eq(ptr.To[string]("ami-existing")), gomock.Eq(apimachinerytypes.NamespacedName{Namespace: "default", Name: "bootstrap-data"}), gomock.Any(), gomock.Any()).Return(nil) ec2Svc.EXPECT().GetLaunchTemplateLatestVersion(gomock.Any()).Return("2", nil) // Changing the bootstrap data secret name should trigger rolling out new nodes, no matter what the // content (user data) is. This way, users can enforce a rollout by changing the bootstrap config @@ -648,7 +668,7 @@ func TestAWSMachinePoolReconciler(t *testing.T) { // No changes, so there must not be an ASG update! asgSvc.EXPECT().UpdateASG(gomock.Any()).Times(0) - err := reconciler.reconcileNormal(context.Background(), ms, cs, cs) + _, err := reconciler.reconcileNormal(context.Background(), ms, cs, cs, cs) g.Expect(err).To(Succeed()) }) @@ -659,9 +679,9 @@ func TestAWSMachinePoolReconciler(t *testing.T) { reconSvc = nil // not used defer teardown(t, g) - ec2Svc.EXPECT().GetLaunchTemplate(gomock.Eq("test")).Return(nil, "", nil, nil) + ec2Svc.EXPECT().GetLaunchTemplate(gomock.Eq("test")).Return(nil, "", nil, nil, nil) ec2Svc.EXPECT().DiscoverLaunchTemplateAMI(gomock.Any()).Return(ptr.To[string]("ami-abcdef123"), nil) - ec2Svc.EXPECT().CreateLaunchTemplate(gomock.Any(), gomock.Eq(ptr.To[string]("ami-abcdef123")), gomock.Eq(userDataSecretKey), gomock.Eq([]byte("shell-script"))).Return("lt-ghijkl456", nil) + ec2Svc.EXPECT().CreateLaunchTemplate(gomock.Any(), gomock.Eq(ptr.To[string]("ami-abcdef123")), gomock.Eq(userDataSecretKey), gomock.Eq([]byte("shell-script")), gomock.Eq(userdata.ComputeHash([]byte("shell-script")))).Return("lt-ghijkl456", nil) asgSvc.EXPECT().GetASGByName(gomock.Any()).Return(nil, nil) asgSvc.EXPECT().CreateASG(gomock.Any()).DoAndReturn(func(scope *scope.MachinePoolScope) (*expinfrav1.AutoScalingGroup, error) { g.Expect(scope.Name()).To(Equal("test")) @@ -670,7 +690,7 @@ func TestAWSMachinePoolReconciler(t *testing.T) { }, nil }) - err := reconciler.reconcileNormal(context.Background(), ms, cs, cs) + _, err := reconciler.reconcileNormal(context.Background(), ms, cs, cs, cs) g.Expect(err).To(Succeed()) g.Expect(ms.AWSMachinePool.Status.LaunchTemplateID).ToNot(BeEmpty()) @@ -702,12 +722,13 @@ func TestAWSMachinePoolReconciler(t *testing.T) { // No change to user data content userdata.ComputeHash([]byte("shell-script")), &apimachinerytypes.NamespacedName{Namespace: "default", Name: "bootstrap-data"}, + nil, nil) ec2Svc.EXPECT().DiscoverLaunchTemplateAMI(gomock.Any()).Return(ptr.To[string]("ami-existing"), nil) ec2Svc.EXPECT().LaunchTemplateNeedsUpdate(gomock.Any(), gomock.Any(), gomock.Any()).Return(false, nil) - asgSvc.EXPECT().CanStartASGInstanceRefresh(gomock.Any()).Return(true, nil) - ec2Svc.EXPECT().PruneLaunchTemplateVersions(gomock.Any()).Return(nil) - ec2Svc.EXPECT().CreateLaunchTemplateVersion(gomock.Any(), gomock.Any(), gomock.Eq(ptr.To[string]("ami-existing")), gomock.Eq(apimachinerytypes.NamespacedName{Namespace: "default", Name: "bootstrap-data-new"}), gomock.Any()).Return(nil) + asgSvc.EXPECT().CanStartASGInstanceRefresh(gomock.Any()).Return(true, nil, nil) + ec2Svc.EXPECT().PruneLaunchTemplateVersions(gomock.Any()).Return(nil, nil) + ec2Svc.EXPECT().CreateLaunchTemplateVersion(gomock.Any(), gomock.Any(), gomock.Eq(ptr.To[string]("ami-existing")), gomock.Eq(apimachinerytypes.NamespacedName{Namespace: "default", Name: "bootstrap-data-new"}), gomock.Any(), gomock.Any()).Return(nil) ec2Svc.EXPECT().GetLaunchTemplateLatestVersion(gomock.Any()).Return("2", nil) // Changing the bootstrap data secret name should trigger rolling out new nodes, no matter what the // content (user data) is. This way, users can enforce a rollout by changing the bootstrap config @@ -732,7 +753,156 @@ func TestAWSMachinePoolReconciler(t *testing.T) { // No changes, so there must not be an ASG update! asgSvc.EXPECT().UpdateASG(gomock.Any()).Times(0) - err = reconciler.reconcileNormal(context.Background(), ms, cs, cs) + _, err = reconciler.reconcileNormal(context.Background(), ms, cs, cs, cs) + g.Expect(err).To(Succeed()) + }) + + t.Run("launch template and ASG exist, bootstrap data secret name changed, Ignition bootstrap data stored in S3", func(t *testing.T) { + g := NewWithT(t) + setup(t, g) + reconciler.reconcileServiceFactory = nil // use real implementation, but keep EC2 calls mocked (`ec2ServiceFactory`) + reconSvc = nil // not used + defer teardown(t, g) + + secret.Data["format"] = []byte("ignition") + g.Expect(testEnv.Update(ctx, secret)).To(Succeed()) + + // Latest ID and version already stored, no need to retrieve it + ms.AWSMachinePool.Status.LaunchTemplateID = launchTemplateIDExisting + ms.AWSMachinePool.Status.LaunchTemplateVersion = ptr.To[string]("1") + + // Enable Ignition S3 storage + cs.AWSCluster.Spec.S3Bucket = &infrav1.S3Bucket{} + ms.AWSMachinePool.Spec.Ignition = &infrav1.Ignition{} + ms.AWSMachinePool.Default() // simulate webhook that sets default ignition version + + asgSvc.EXPECT().GetASGByName(gomock.Any()).DoAndReturn(func(scope *scope.MachinePoolScope) (*expinfrav1.AutoScalingGroup, error) { + g.Expect(scope.Name()).To(Equal("test")) + + // No difference to `AWSMachinePool.spec` + return &expinfrav1.AutoScalingGroup{ + Name: scope.Name(), + Subnets: []string{ + "subnet-1", + }, + MinSize: awsMachinePool.Spec.MinSize, + MaxSize: awsMachinePool.Spec.MaxSize, + MixedInstancesPolicy: awsMachinePool.Spec.MixedInstancesPolicy.DeepCopy(), + }, nil + }) + + ec2Svc.EXPECT().GetLaunchTemplate(gomock.Eq("test")).Return( + &expinfrav1.AWSLaunchTemplate{ + Name: "test", + AMI: infrav1.AMIReference{ + ID: ptr.To[string]("ami-existing"), + }, + }, + // No change to user data + userdata.ComputeHash([]byte("shell-script")), + // But the name of the secret changes from `previous-secret-name` to `bootstrap-data` + &apimachinerytypes.NamespacedName{Namespace: "default", Name: "previous-secret-name"}, + nil, + nil) + ec2Svc.EXPECT().DiscoverLaunchTemplateAMI(gomock.Any()).Return(ptr.To[string]("ami-existing"), nil) + ec2Svc.EXPECT().LaunchTemplateNeedsUpdate(gomock.Any(), gomock.Any(), gomock.Any()).Return(false, nil) + + s3Mock.EXPECT().PutObject(gomock.Any()).DoAndReturn(func(input *s3.PutObjectInput) (*s3.PutObjectOutput, error) { + g.Expect(*input.Key).To(Equal(fmt.Sprintf("machine-pool/test/%s", userdata.ComputeHash([]byte("shell-script"))))) + return &s3.PutObjectOutput{}, nil + }) + + // Simulate a pending instance refresh here, to see if `CancelInstanceRefresh` gets called + instanceRefreshStatus := autoscaling.InstanceRefreshStatusPending + asgSvc.EXPECT().CanStartASGInstanceRefresh(gomock.Any()).Return(false, &instanceRefreshStatus, nil) + asgSvc.EXPECT().CancelASGInstanceRefresh(gomock.Any()).Return(nil) + + // First reconciliation should notice the existing instance refresh and cancel it. + // Since the cancellation is asynchronous, the controller should requeue. + res, err := reconciler.reconcileNormal(context.Background(), ms, cs, cs, cs) + g.Expect(res.RequeueAfter).To(BeNumerically(">", 0)) + g.Expect(err).To(Succeed()) + + // Now simulate that no pending instance refresh exists. Cancellation should not be called anymore. + asgSvc.EXPECT().CanStartASGInstanceRefresh(gomock.Any()).Return(true, nil, nil) + + asgSvc.EXPECT().GetASGByName(gomock.Any()).DoAndReturn(func(scope *scope.MachinePoolScope) (*expinfrav1.AutoScalingGroup, error) { + g.Expect(scope.Name()).To(Equal("test")) + + // No difference to `AWSMachinePool.spec` + return &expinfrav1.AutoScalingGroup{ + Name: scope.Name(), + Subnets: []string{ + "subnet-1", + }, + MinSize: awsMachinePool.Spec.MinSize, + MaxSize: awsMachinePool.Spec.MaxSize, + MixedInstancesPolicy: awsMachinePool.Spec.MixedInstancesPolicy.DeepCopy(), + }, nil + }) + + ec2Svc.EXPECT().GetLaunchTemplate(gomock.Eq("test")).Return( + &expinfrav1.AWSLaunchTemplate{ + Name: "test", + AMI: infrav1.AMIReference{ + ID: ptr.To[string]("ami-existing"), + }, + }, + // No change to user data + userdata.ComputeHash([]byte("shell-script")), + // But the name of the secret changes from `previous-secret-name` to `bootstrap-data` + &apimachinerytypes.NamespacedName{Namespace: "default", Name: "previous-secret-name"}, + nil, + nil) + ec2Svc.EXPECT().DiscoverLaunchTemplateAMI(gomock.Any()).Return(ptr.To[string]("ami-existing"), nil) + ec2Svc.EXPECT().LaunchTemplateNeedsUpdate(gomock.Any(), gomock.Any(), gomock.Any()).Return(false, nil) + + s3Mock.EXPECT().PutObject(gomock.Any()).DoAndReturn(func(input *s3.PutObjectInput) (*s3.PutObjectOutput, error) { + g.Expect(*input.Key).To(Equal(fmt.Sprintf("machine-pool/test/%s", userdata.ComputeHash([]byte("shell-script"))))) + return &s3.PutObjectOutput{}, nil + }) + + // No cancellation expected in this second reconciliation (see above) + asgSvc.EXPECT().CancelASGInstanceRefresh(gomock.Any()).Times(0) + + var simulatedDeletedVersionNumber int64 = 777 + bootstrapDataHash := "some-simulated-hash" + ec2Svc.EXPECT().PruneLaunchTemplateVersions(gomock.Any()).Return(&ec2.LaunchTemplateVersion{ + VersionNumber: &simulatedDeletedVersionNumber, + LaunchTemplateData: &ec2.ResponseLaunchTemplateData{ + TagSpecifications: []*ec2.LaunchTemplateTagSpecification{ + { + ResourceType: aws.String(ec2.ResourceTypeInstance), + Tags: []*ec2.Tag{ + // Only this tag is relevant for the test. If this is stored in the + // launch template version, and the version gets deleted, the S3 object + // with the bootstrap data should be deleted as well. + { + Key: aws.String("sigs.k8s.io/cluster-api-provider-aws/bootstrap-data-hash"), + Value: aws.String(bootstrapDataHash), + }, + }, + }, + }, + UserData: aws.String(base64.StdEncoding.EncodeToString([]byte("old-user-data"))), + }, + }, nil) + s3Mock.EXPECT().DeleteObject(gomock.Any()).DoAndReturn(func(input *s3.DeleteObjectInput) (*s3.DeleteObjectOutput, error) { + g.Expect(*input.Key).To(Equal(fmt.Sprintf("machine-pool/test/%s", bootstrapDataHash))) + return &s3.DeleteObjectOutput{}, nil + }) + ec2Svc.EXPECT().CreateLaunchTemplateVersion(gomock.Any(), gomock.Any(), gomock.Eq(ptr.To[string]("ami-existing")), gomock.Eq(apimachinerytypes.NamespacedName{Namespace: "default", Name: "bootstrap-data"}), gomock.Any(), gomock.Any()).Return(nil) + ec2Svc.EXPECT().GetLaunchTemplateLatestVersion(gomock.Any()).Return("2", nil) + // Changing the bootstrap data secret name should trigger rolling out new nodes, no matter what the + // content (user data) is. This way, users can enforce a rollout by changing the bootstrap config + // reference (`MachinePool.spec.template.spec.bootstrap`). + asgSvc.EXPECT().StartASGInstanceRefresh(gomock.Any()) + + asgSvc.EXPECT().SubnetIDs(gomock.Any()).Return([]string{"subnet-1"}, nil) // no change + // No changes, so there must not be an ASG update! + asgSvc.EXPECT().UpdateASG(gomock.Any()).Times(0) + + _, err = reconciler.reconcileNormal(context.Background(), ms, cs, cs, cs) g.Expect(err).To(Succeed()) }) }) @@ -766,7 +936,7 @@ func TestAWSMachinePoolReconciler(t *testing.T) { finalizer(t, g) asgSvc.EXPECT().GetASGByName(gomock.Any()).Return(nil, nil) - ec2Svc.EXPECT().GetLaunchTemplate(gomock.Any()).Return(nil, "", nil, nil).AnyTimes() + ec2Svc.EXPECT().GetLaunchTemplate(gomock.Any()).Return(nil, "", nil, nil, nil).AnyTimes() buf := new(bytes.Buffer) klog.SetOutput(buf) @@ -788,7 +958,7 @@ func TestAWSMachinePoolReconciler(t *testing.T) { Status: expinfrav1.ASGStatusDeleteInProgress, } asgSvc.EXPECT().GetASGByName(gomock.Any()).Return(&inProgressASG, nil) - ec2Svc.EXPECT().GetLaunchTemplate(gomock.Any()).Return(nil, "", nil, nil).AnyTimes() + ec2Svc.EXPECT().GetLaunchTemplate(gomock.Any()).Return(nil, "", nil, nil, nil).AnyTimes() buf := new(bytes.Buffer) klog.SetOutput(buf) diff --git a/exp/controllers/awsmanagedmachinepool_controller.go b/exp/controllers/awsmanagedmachinepool_controller.go index 8c0d75c2ec..606dc5893a 100644 --- a/exp/controllers/awsmanagedmachinepool_controller.go +++ b/exp/controllers/awsmanagedmachinepool_controller.go @@ -156,6 +156,7 @@ func (r *AWSManagedMachinePoolReconciler) Reconcile(ctx context.Context, req ctr } machinePoolScope, err := scope.NewManagedMachinePoolScope(scope.ManagedMachinePoolScopeParams{ + Logger: log, Client: r.Client, ControllerName: "awsmanagedmachinepool", Cluster: cluster, @@ -189,19 +190,20 @@ func (r *AWSManagedMachinePoolReconciler) Reconcile(ctx context.Context, req ctr return ctrl.Result{}, r.reconcileDelete(ctx, machinePoolScope, managedControlPlaneScope) } - return ctrl.Result{}, r.reconcileNormal(ctx, machinePoolScope, managedControlPlaneScope) + return r.reconcileNormal(ctx, machinePoolScope, managedControlPlaneScope, managedControlPlaneScope) } func (r *AWSManagedMachinePoolReconciler) reconcileNormal( ctx context.Context, machinePoolScope *scope.ManagedMachinePoolScope, ec2Scope scope.EC2Scope, -) error { + s3Scope scope.S3Scope, +) (ctrl.Result, error) { machinePoolScope.Info("Reconciling AWSManagedMachinePool") if controllerutil.AddFinalizer(machinePoolScope.ManagedMachinePool, expinfrav1.ManagedMachinePoolFinalizer) { if err := machinePoolScope.PatchObject(); err != nil { - return err + return ctrl.Result{}, err } } @@ -210,17 +212,25 @@ func (r *AWSManagedMachinePoolReconciler) reconcileNormal( reconSvc := r.getReconcileService(ec2Scope) if machinePoolScope.ManagedMachinePool.Spec.AWSLaunchTemplate != nil { - canUpdateLaunchTemplate := func() (bool, error) { - return true, nil + canStartInstanceRefresh := func() (bool, *string, error) { + return true, nil, nil + } + cancelInstanceRefresh := func() error { + return nil } runPostLaunchTemplateUpdateOperation := func() error { return nil } - if err := reconSvc.ReconcileLaunchTemplate(machinePoolScope, ec2svc, canUpdateLaunchTemplate, runPostLaunchTemplateUpdateOperation); err != nil { + var objectStoreSvc services.ObjectStoreInterface = nil // no S3 bucket support for `AWSManagedControlPlane` yet + res, err := reconSvc.ReconcileLaunchTemplate(machinePoolScope, machinePoolScope, s3Scope, ec2svc, objectStoreSvc, canStartInstanceRefresh, cancelInstanceRefresh, runPostLaunchTemplateUpdateOperation) + if err != nil { r.Recorder.Eventf(machinePoolScope.ManagedMachinePool, corev1.EventTypeWarning, "FailedLaunchTemplateReconcile", "Failed to reconcile launch template: %v", err) machinePoolScope.Error(err, "failed to reconcile launch template") conditions.MarkFalse(machinePoolScope.ManagedMachinePool, expinfrav1.LaunchTemplateReadyCondition, expinfrav1.LaunchTemplateReconcileFailedReason, clusterv1.ConditionSeverityError, "") - return err + return ctrl.Result{}, err + } + if res != nil { + return *res, nil } launchTemplateID := machinePoolScope.GetLaunchTemplateIDStatus() @@ -229,7 +239,7 @@ func (r *AWSManagedMachinePoolReconciler) reconcileNormal( ResourceService: ec2svc, }} if err := reconSvc.ReconcileTags(machinePoolScope, resourceServiceToUpdate); err != nil { - return errors.Wrap(err, "error updating tags") + return ctrl.Result{}, errors.Wrap(err, "error updating tags") } // set the LaunchTemplateReady condition @@ -237,10 +247,10 @@ func (r *AWSManagedMachinePoolReconciler) reconcileNormal( } if err := ekssvc.ReconcilePool(ctx); err != nil { - return errors.Wrapf(err, "failed to reconcile machine pool for AWSManagedMachinePool %s/%s", machinePoolScope.ManagedMachinePool.Namespace, machinePoolScope.ManagedMachinePool.Name) + return ctrl.Result{}, errors.Wrapf(err, "failed to reconcile machine pool for AWSManagedMachinePool %s/%s", machinePoolScope.ManagedMachinePool.Namespace, machinePoolScope.ManagedMachinePool.Name) } - return nil + return ctrl.Result{}, nil } func (r *AWSManagedMachinePoolReconciler) reconcileDelete( @@ -259,7 +269,7 @@ func (r *AWSManagedMachinePoolReconciler) reconcileDelete( if machinePoolScope.ManagedMachinePool.Spec.AWSLaunchTemplate != nil { launchTemplateID := machinePoolScope.ManagedMachinePool.Status.LaunchTemplateID - launchTemplate, _, _, err := ec2Svc.GetLaunchTemplate(machinePoolScope.LaunchTemplateName()) + launchTemplate, _, _, _, err := ec2Svc.GetLaunchTemplate(machinePoolScope.LaunchTemplateName()) if err != nil { return err } diff --git a/pkg/cloud/scope/ignition.go b/pkg/cloud/scope/ignition.go new file mode 100644 index 0000000000..d996612181 --- /dev/null +++ b/pkg/cloud/scope/ignition.go @@ -0,0 +1,26 @@ +/* +Copyright 2024 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package scope + +import ( + infrav1 "sigs.k8s.io/cluster-api-provider-aws/v2/api/v1beta2" +) + +// IgnitionScope gets the optional Ignition configuration. +type IgnitionScope interface { + Ignition() *infrav1.Ignition +} diff --git a/pkg/cloud/scope/launchtemplate.go b/pkg/cloud/scope/launchtemplate.go index fb2df8b59f..34e84e7ff7 100644 --- a/pkg/cloud/scope/launchtemplate.go +++ b/pkg/cloud/scope/launchtemplate.go @@ -37,7 +37,7 @@ type LaunchTemplateScope interface { SetLaunchTemplateIDStatus(id string) GetLaunchTemplateLatestVersionStatus() string SetLaunchTemplateLatestVersionStatus(version string) - GetRawBootstrapData() ([]byte, *types.NamespacedName, error) + GetRawBootstrapData() ([]byte, string, *types.NamespacedName, error) IsEKSManaged() bool AdditionalTags() infrav1.Tags diff --git a/pkg/cloud/scope/machinepool.go b/pkg/cloud/scope/machinepool.go index 00e8abeadc..d141aef06f 100644 --- a/pkg/cloud/scope/machinepool.go +++ b/pkg/cloud/scope/machinepool.go @@ -121,6 +121,11 @@ func NewMachinePoolScope(params MachinePoolScopeParams) (*MachinePoolScope, erro }, nil } +// Ignition gets the ignition config. +func (m *MachinePoolScope) Ignition() *infrav1.Ignition { + return m.AWSMachinePool.Spec.Ignition +} + // Name returns the AWSMachinePool name. func (m *MachinePoolScope) Name() string { return m.AWSMachinePool.Name @@ -133,13 +138,7 @@ func (m *MachinePoolScope) Namespace() string { // GetRawBootstrapData returns the bootstrap data from the secret in the Machine's bootstrap.dataSecretName, // including the secret's namespaced name. -func (m *MachinePoolScope) GetRawBootstrapData() ([]byte, *types.NamespacedName, error) { - data, _, bootstrapDataSecretKey, err := m.getBootstrapData() - - return data, bootstrapDataSecretKey, err -} - -func (m *MachinePoolScope) getBootstrapData() ([]byte, string, *types.NamespacedName, error) { +func (m *MachinePoolScope) GetRawBootstrapData() ([]byte, string, *types.NamespacedName, error) { if m.MachinePool.Spec.Template.Spec.Bootstrap.DataSecretName == nil { return nil, "", nil, errors.New("error retrieving bootstrap data: linked Machine's bootstrap.dataSecretName is nil") } diff --git a/pkg/cloud/scope/managednodegroup.go b/pkg/cloud/scope/managednodegroup.go index e9421d7282..5d35fad215 100644 --- a/pkg/cloud/scope/managednodegroup.go +++ b/pkg/cloud/scope/managednodegroup.go @@ -305,6 +305,11 @@ func (s *ManagedMachinePoolScope) ControllerName() string { return s.controllerName } +// Ignition gets the ignition config. +func (s *ManagedMachinePoolScope) Ignition() *infrav1.Ignition { + return nil +} + // KubernetesClusterName is the name of the EKS cluster name. func (s *ManagedMachinePoolScope) KubernetesClusterName() string { return s.ControlPlane.Spec.EKSClusterName @@ -326,24 +331,24 @@ func (s *ManagedMachinePoolScope) Namespace() string { } // GetRawBootstrapData returns the raw bootstrap data from the linked Machine's bootstrap.dataSecretName. -func (s *ManagedMachinePoolScope) GetRawBootstrapData() ([]byte, *types.NamespacedName, error) { +func (s *ManagedMachinePoolScope) GetRawBootstrapData() ([]byte, string, *types.NamespacedName, error) { if s.MachinePool.Spec.Template.Spec.Bootstrap.DataSecretName == nil { - return nil, nil, errors.New("error retrieving bootstrap data: linked Machine's bootstrap.dataSecretName is nil") + return nil, "", nil, errors.New("error retrieving bootstrap data: linked Machine's bootstrap.dataSecretName is nil") } secret := &corev1.Secret{} key := types.NamespacedName{Namespace: s.Namespace(), Name: *s.MachinePool.Spec.Template.Spec.Bootstrap.DataSecretName} if err := s.Client.Get(context.TODO(), key, secret); err != nil { - return nil, nil, errors.Wrapf(err, "failed to retrieve bootstrap data secret for AWSManagedMachinePool %s/%s", s.Namespace(), s.Name()) + return nil, "", nil, errors.Wrapf(err, "failed to retrieve bootstrap data secret for AWSManagedMachinePool %s/%s", s.Namespace(), s.Name()) } value, ok := secret.Data["value"] if !ok { - return nil, nil, errors.New("error retrieving bootstrap data: secret value key is missing") + return nil, "", nil, errors.New("error retrieving bootstrap data: secret value key is missing") } - return value, &key, nil + return value, string(secret.Data["format"]), &key, nil } // GetObjectMeta returns the ObjectMeta for the AWSManagedMachinePool. diff --git a/pkg/cloud/services/autoscaling/autoscalinggroup.go b/pkg/cloud/services/autoscaling/autoscalinggroup.go index f8e8b18690..c3cf215075 100644 --- a/pkg/cloud/services/autoscaling/autoscalinggroup.go +++ b/pkg/cloud/services/autoscaling/autoscalinggroup.go @@ -24,6 +24,7 @@ import ( "strings" "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/aws/awserr" "github.com/aws/aws-sdk-go/service/autoscaling" "github.com/aws/aws-sdk-go/service/ec2" "github.com/pkg/errors" @@ -323,30 +324,53 @@ func (s *Service) UpdateASG(machinePoolScope *scope.MachinePoolScope) error { return nil } -// CanStartASGInstanceRefresh will start an ASG instance with refresh. -func (s *Service) CanStartASGInstanceRefresh(scope *scope.MachinePoolScope) (bool, error) { +// CanStartASGInstanceRefresh checks if a new ASG instance refresh can currently be started, and returns the status if there is an existing, unfinished refresh. +func (s *Service) CanStartASGInstanceRefresh(scope *scope.MachinePoolScope) (bool, *string, error) { describeInput := &autoscaling.DescribeInstanceRefreshesInput{AutoScalingGroupName: aws.String(scope.Name())} refreshes, err := s.ASGClient.DescribeInstanceRefreshesWithContext(context.TODO(), describeInput) if err != nil { - return false, err + return false, nil, err } hasUnfinishedRefresh := false + var unfinishedRefreshStatus *string if err == nil && len(refreshes.InstanceRefreshes) != 0 { for i := range refreshes.InstanceRefreshes { if *refreshes.InstanceRefreshes[i].Status == autoscaling.InstanceRefreshStatusInProgress || *refreshes.InstanceRefreshes[i].Status == autoscaling.InstanceRefreshStatusPending || *refreshes.InstanceRefreshes[i].Status == autoscaling.InstanceRefreshStatusCancelling { hasUnfinishedRefresh = true + unfinishedRefreshStatus = refreshes.InstanceRefreshes[i].Status } } } if hasUnfinishedRefresh { - return false, nil + return false, unfinishedRefreshStatus, nil } - return true, nil + return true, nil, nil } -// StartASGInstanceRefresh will start an ASG instance with refresh. +// CancelASGInstanceRefresh cancels an ASG instance refresh. +func (s *Service) CancelASGInstanceRefresh(scope *scope.MachinePoolScope) error { + input := &autoscaling.CancelInstanceRefreshInput{ + AutoScalingGroupName: aws.String(scope.Name()), + } + + if _, err := s.ASGClient.CancelInstanceRefreshWithContext(context.TODO(), input); err != nil { + var aerr awserr.Error + if errors.As(err, &aerr) && aerr.Code() == autoscaling.ErrCodeActiveInstanceRefreshNotFoundFault { + // Refresh isn't "in progress". It may have turned to cancelled status + // by now. So this is not an error for us because we may have called + // CancelInstanceRefresh multiple times and should be idempotent here. + return nil + } + + return errors.Wrapf(err, "failed to cancel ASG instance refresh %q", scope.Name()) + } + + return nil +} + +// StartASGInstanceRefresh will start an ASG instance refresh. func (s *Service) StartASGInstanceRefresh(scope *scope.MachinePoolScope) error { strategy := ptr.To[string](autoscaling.RefreshStrategyRolling) var minHealthyPercentage, maxHealthyPercentage, instanceWarmup *int64 diff --git a/pkg/cloud/services/autoscaling/autoscalinggroup_test.go b/pkg/cloud/services/autoscaling/autoscalinggroup_test.go index 9434696166..5a83aa8088 100644 --- a/pkg/cloud/services/autoscaling/autoscalinggroup_test.go +++ b/pkg/cloud/services/autoscaling/autoscalinggroup_test.go @@ -1111,10 +1111,11 @@ func TestServiceCanStartASGInstanceRefresh(t *testing.T) { defer mockCtrl.Finish() tests := []struct { - name string - wantErr bool - canStart bool - expect func(m *mock_autoscalingiface.MockAutoScalingAPIMockRecorder) + name string + wantErr bool + wantUnfinishedRefreshStatus *string + canStart bool + expect func(m *mock_autoscalingiface.MockAutoScalingAPIMockRecorder) }{ { name: "should return error if describe instance refresh failed", @@ -1139,9 +1140,10 @@ func TestServiceCanStartASGInstanceRefresh(t *testing.T) { }, }, { - name: "should return false if some instances have unfinished refresh", - wantErr: false, - canStart: false, + name: "should return false if some instances have unfinished refresh", + wantErr: false, + wantUnfinishedRefreshStatus: aws.String(autoscaling.InstanceRefreshStatusInProgress), + canStart: false, expect: func(m *mock_autoscalingiface.MockAutoScalingAPIMockRecorder) { m.DescribeInstanceRefreshesWithContext(context.TODO(), gomock.Eq(&autoscaling.DescribeInstanceRefreshesInput{ AutoScalingGroupName: aws.String("machinePoolName"), @@ -1173,11 +1175,13 @@ func TestServiceCanStartASGInstanceRefresh(t *testing.T) { g.Expect(err).ToNot(HaveOccurred()) mps.AWSMachinePool.Name = "machinePoolName" - out, err := s.CanStartASGInstanceRefresh(mps) + out, unfinishedRefreshStatus, err := s.CanStartASGInstanceRefresh(mps) checkErr(tt.wantErr, err, g) if tt.canStart { g.Expect(out).To(BeTrue()) return + } else { + g.Expect(unfinishedRefreshStatus).To(Equal(tt.wantUnfinishedRefreshStatus)) } g.Expect(out).To(BeFalse()) }) diff --git a/pkg/cloud/services/ec2/launchtemplate.go b/pkg/cloud/services/ec2/launchtemplate.go index 5da57f2521..464b90f538 100644 --- a/pkg/cloud/services/ec2/launchtemplate.go +++ b/pkg/cloud/services/ec2/launchtemplate.go @@ -23,9 +23,14 @@ import ( "sort" "strconv" "strings" + "time" "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/service/autoscaling" "github.com/aws/aws-sdk-go/service/ec2" + "github.com/blang/semver" + ignTypes "github.com/coreos/ignition/config/v2_3/types" + ignV3Types "github.com/coreos/ignition/v2/config/v3_4/types" "github.com/google/go-cmp/cmp" "github.com/pkg/errors" corev1 "k8s.io/api/core/v1" @@ -34,6 +39,7 @@ import ( infrav1 "sigs.k8s.io/cluster-api-provider-aws/v2/api/v1beta2" expinfrav1 "sigs.k8s.io/cluster-api-provider-aws/v2/exp/api/v1beta2" + "sigs.k8s.io/cluster-api-provider-aws/v2/feature" "sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud/awserrors" "sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud/scope" "sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud/services" @@ -41,6 +47,7 @@ import ( "sigs.k8s.io/cluster-api-provider-aws/v2/pkg/record" clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" "sigs.k8s.io/cluster-api/util/conditions" + ctrl "sigs.k8s.io/controller-runtime" ) const ( @@ -56,43 +63,124 @@ const ( // ReconcileLaunchTemplate reconciles a launch template and triggers instance refresh conditionally, depending on // changes. // -//nolint:gocyclo +//nolint:gocyclo,maintidx func (s *Service) ReconcileLaunchTemplate( + ignitionScope scope.IgnitionScope, scope scope.LaunchTemplateScope, + s3Scope scope.S3Scope, ec2svc services.EC2Interface, - canUpdateLaunchTemplate func() (bool, error), + objectStoreSvc services.ObjectStoreInterface, + canStartInstanceRefresh func() (bool, *string, error), + cancelInstanceRefresh func() error, runPostLaunchTemplateUpdateOperation func() error, -) error { - bootstrapData, bootstrapDataSecretKey, err := scope.GetRawBootstrapData() +) (*ctrl.Result, error) { + bootstrapData, bootstrapDataFormat, bootstrapDataSecretKey, err := scope.GetRawBootstrapData() if err != nil { record.Eventf(scope.GetMachinePool(), corev1.EventTypeWarning, "FailedGetBootstrapData", err.Error()) - return err + return nil, err } - bootstrapDataHash := userdata.ComputeHash(bootstrapData) - scope.Info("checking for existing launch template") - launchTemplate, launchTemplateUserDataHash, launchTemplateUserDataSecretKey, err := ec2svc.GetLaunchTemplate(scope.LaunchTemplateName()) + launchTemplate, launchTemplateUserDataHash, launchTemplateUserDataSecretKey, _, err := ec2svc.GetLaunchTemplate(scope.LaunchTemplateName()) if err != nil { conditions.MarkUnknown(scope.GetSetter(), expinfrav1.LaunchTemplateReadyCondition, expinfrav1.LaunchTemplateNotFoundReason, err.Error()) - return err + return nil, err } imageID, err := ec2svc.DiscoverLaunchTemplateAMI(scope) if err != nil { conditions.MarkFalse(scope.GetSetter(), expinfrav1.LaunchTemplateReadyCondition, expinfrav1.LaunchTemplateCreateFailedReason, clusterv1.ConditionSeverityError, err.Error()) - return err + return nil, err } + var userDataForLaunchTemplate []byte + if s3Scope.Bucket() != nil && bootstrapDataFormat == "ignition" && ignitionScope.Ignition() != nil { + scope.Info("Using S3 bucket storage for Ignition format") + + // S3 bucket storage enabled and Ignition format is used. Ignition supports reading large user data from S3, + // not restricted by the EC2 user data size limit. The actual user data goes into the S3 object while the + // user data on the launch template points to the S3 bucket (or presigned URL). + // Previously, user data was always written into the launch template, so we check + // `AWSMachinePool.Spec.Ignition != nil` to toggle the S3 feature on for `AWSMachinePool` objects. + objectURL, err := objectStoreSvc.CreateForMachinePool(scope, bootstrapData) + + if err != nil { + conditions.MarkFalse(scope.GetSetter(), expinfrav1.LaunchTemplateReadyCondition, expinfrav1.LaunchTemplateReconcileFailedReason, clusterv1.ConditionSeverityError, err.Error()) + return nil, err + } + + ignVersion := ignitionScope.Ignition().Version + semver, err := semver.ParseTolerant(ignVersion) + if err != nil { + err = errors.Wrapf(err, "failed to parse ignition version %q", ignVersion) + conditions.MarkFalse(scope.GetSetter(), expinfrav1.LaunchTemplateReadyCondition, expinfrav1.LaunchTemplateReconcileFailedReason, clusterv1.ConditionSeverityError, err.Error()) + return nil, err + } + + // EC2 user data points to S3 + switch semver.Major { + case 2: + ignData := &ignTypes.Config{ + Ignition: ignTypes.Ignition{ + Version: semver.String(), + Config: ignTypes.IgnitionConfig{ + Append: []ignTypes.ConfigReference{ + { + Source: objectURL, + }, + }, + }, + }, + } + + userDataForLaunchTemplate, err = json.Marshal(ignData) + if err != nil { + err = errors.Wrap(err, "failed to convert ignition config to JSON") + conditions.MarkFalse(scope.GetSetter(), expinfrav1.LaunchTemplateReadyCondition, expinfrav1.LaunchTemplateReconcileFailedReason, clusterv1.ConditionSeverityError, err.Error()) + return nil, err + } + case 3: + ignData := &ignV3Types.Config{ + Ignition: ignV3Types.Ignition{ + Version: semver.String(), + Config: ignV3Types.IgnitionConfig{ + Merge: []ignV3Types.Resource{ + { + Source: aws.String(objectURL), + }, + }, + }, + }, + } + + userDataForLaunchTemplate, err = json.Marshal(ignData) + if err != nil { + err = errors.Wrap(err, "failed to convert ignition config to JSON") + conditions.MarkFalse(scope.GetSetter(), expinfrav1.LaunchTemplateReadyCondition, expinfrav1.LaunchTemplateReconcileFailedReason, clusterv1.ConditionSeverityError, err.Error()) + return nil, err + } + default: + err = errors.Errorf("unsupported ignition version %q", ignVersion) + conditions.MarkFalse(scope.GetSetter(), expinfrav1.LaunchTemplateReadyCondition, expinfrav1.LaunchTemplateReconcileFailedReason, clusterv1.ConditionSeverityError, err.Error()) + return nil, err + } + } else { + // S3 bucket not used, so the bootstrap data is stored directly in the launch template + // (EC2 user data) + userDataForLaunchTemplate = bootstrapData + } + + bootstrapDataForLaunchTemplateHash := userdata.ComputeHash(userDataForLaunchTemplate) + if launchTemplate == nil { scope.Info("no existing launch template found, creating") - launchTemplateID, err := ec2svc.CreateLaunchTemplate(scope, imageID, *bootstrapDataSecretKey, bootstrapData) + launchTemplateID, err := ec2svc.CreateLaunchTemplate(scope, imageID, *bootstrapDataSecretKey, userDataForLaunchTemplate, userdata.ComputeHash(bootstrapData)) if err != nil { conditions.MarkFalse(scope.GetSetter(), expinfrav1.LaunchTemplateReadyCondition, expinfrav1.LaunchTemplateCreateFailedReason, clusterv1.ConditionSeverityError, err.Error()) - return err + return nil, err } scope.SetLaunchTemplateIDStatus(launchTemplateID) - return scope.PatchObject() + return nil, scope.PatchObject() } // LaunchTemplateID is set during LaunchTemplate creation, but for a scenario such as `clusterctl move`, status fields become blank. @@ -101,27 +189,27 @@ func (s *Service) ReconcileLaunchTemplate( launchTemplateID, err := ec2svc.GetLaunchTemplateID(scope.LaunchTemplateName()) if err != nil { conditions.MarkUnknown(scope.GetSetter(), expinfrav1.LaunchTemplateReadyCondition, expinfrav1.LaunchTemplateNotFoundReason, err.Error()) - return err + return nil, err } scope.SetLaunchTemplateIDStatus(launchTemplateID) - return scope.PatchObject() + return nil, scope.PatchObject() } if scope.GetLaunchTemplateLatestVersionStatus() == "" { launchTemplateVersion, err := ec2svc.GetLaunchTemplateLatestVersion(scope.GetLaunchTemplateIDStatus()) if err != nil { conditions.MarkUnknown(scope.GetSetter(), expinfrav1.LaunchTemplateReadyCondition, expinfrav1.LaunchTemplateNotFoundReason, err.Error()) - return err + return nil, err } scope.SetLaunchTemplateLatestVersionStatus(launchTemplateVersion) - if err := scope.PatchObject(); err != nil { - return err + if err = scope.PatchObject(); err != nil { + return nil, err } } annotation, err := MachinePoolAnnotationJSON(scope, TagsLastAppliedAnnotation) if err != nil { - return err + return nil, err } // Check if the instance tags were changed. If they were, create a new LaunchTemplate. @@ -129,7 +217,7 @@ func (s *Service) ReconcileLaunchTemplate( needsUpdate, err := ec2svc.LaunchTemplateNeedsUpdate(scope, scope.GetLaunchTemplate(), launchTemplate) if err != nil { - return err + return nil, err } amiChanged := *imageID != *launchTemplate.AMI.ID @@ -143,50 +231,90 @@ func (s *Service) ReconcileLaunchTemplate( launchTemplateNeedsUserDataSecretKeyTag := launchTemplateUserDataSecretKey == nil if needsUpdate || tagsChanged || amiChanged || userDataSecretKeyChanged { - canUpdate, err := canUpdateLaunchTemplate() + // More than just the bootstrap token changed + + canStartRefresh, unfinishedRefreshStatus, err := canStartInstanceRefresh() if err != nil { - return err + return nil, err } - if !canUpdate { - conditions.MarkFalse(scope.GetSetter(), expinfrav1.PreLaunchTemplateUpdateCheckCondition, expinfrav1.PreLaunchTemplateUpdateCheckFailedReason, clusterv1.ConditionSeverityWarning, "") - return errors.New("Cannot update the launch template, prerequisite not met") + if !canStartRefresh { + if unfinishedRefreshStatus != nil && *unfinishedRefreshStatus != autoscaling.InstanceRefreshStatusCancelling { + // Until the previous instance refresh goes into `Cancelled` state + // asynchronously, allowing another refresh to be started, + // defer the reconciliation. Otherwise, we get an + // `ErrCodeInstanceRefreshInProgressFault` error if we tried to + // start an instance refresh immediately. + scope.Info("Cancelling previous instance refresh and delaying reconciliation until the next one can be started", "unfinishedRefreshStatus", unfinishedRefreshStatus) + + err := cancelInstanceRefresh() + if err != nil { + return nil, err + } + } else { + scope.Info("Existing instance refresh is not finished, delaying reconciliation until the next one can be started", "unfinishedRefreshStatus", unfinishedRefreshStatus) + } + return &ctrl.Result{RequeueAfter: 30 * time.Second}, nil } } - userDataHashChanged := launchTemplateUserDataHash != bootstrapDataHash + userDataHashChanged := launchTemplateUserDataHash != bootstrapDataForLaunchTemplateHash // Create a new launch template version if there's a difference in configuration, tags, // userdata, OR we've discovered a new AMI ID. if needsUpdate || tagsChanged || amiChanged || userDataHashChanged || userDataSecretKeyChanged || launchTemplateNeedsUserDataSecretKeyTag { scope.Info("creating new version for launch template", "existing", launchTemplate, "incoming", scope.GetLaunchTemplate(), "needsUpdate", needsUpdate, "tagsChanged", tagsChanged, "amiChanged", amiChanged, "userDataHashChanged", userDataHashChanged, "userDataSecretKeyChanged", userDataSecretKeyChanged) + // There is a limit to the number of Launch Template Versions. // We ensure that the number of versions does not grow without bound by following a simple rule: Before we create a new version, we delete one old version, if there is at least one old version that is not in use. - if err := ec2svc.PruneLaunchTemplateVersions(scope.GetLaunchTemplateIDStatus()); err != nil { - return err + deletedLaunchTemplateVersion, err := ec2svc.PruneLaunchTemplateVersions(scope.GetLaunchTemplateIDStatus()) + if err != nil { + return nil, err + } + + // S3 objects should be deleted as soon as possible if they're not used + // anymore. If this fails, it would still be cleaned by the bucket lifecycle + // policy later. + if feature.Gates.Enabled(feature.MachinePool) && deletedLaunchTemplateVersion != nil { + _, _, _, deletedLaunchTemplateVersionBootstrapDataHash, err := s.SDKToLaunchTemplate(deletedLaunchTemplateVersion) + if err != nil { + return nil, err + } + if deletedLaunchTemplateVersionBootstrapDataHash != nil && s3Scope.Bucket() != nil && bootstrapDataFormat == "ignition" && ignitionScope.Ignition() != nil { + scope.Info("Deleting S3 object for deleted launch template version", "version", *deletedLaunchTemplateVersion.VersionNumber) + + err = objectStoreSvc.DeleteForMachinePool(scope, *deletedLaunchTemplateVersionBootstrapDataHash) + + // If any error happened above, log it and continue + if err != nil { + scope.Error(err, "Failed to delete S3 object for deleted launch template version, continuing because the bucket lifecycle policy will clean it later", "version", *deletedLaunchTemplateVersion.VersionNumber) + } + } } - if err := ec2svc.CreateLaunchTemplateVersion(scope.GetLaunchTemplateIDStatus(), scope, imageID, *bootstrapDataSecretKey, bootstrapData); err != nil { - return err + + if err := ec2svc.CreateLaunchTemplateVersion(scope.GetLaunchTemplateIDStatus(), scope, imageID, *bootstrapDataSecretKey, userDataForLaunchTemplate, userdata.ComputeHash(bootstrapData)); err != nil { + return nil, err } version, err := ec2svc.GetLaunchTemplateLatestVersion(scope.GetLaunchTemplateIDStatus()) if err != nil { - return err + return nil, err } scope.SetLaunchTemplateLatestVersionStatus(version) if err := scope.PatchObject(); err != nil { - return err + return nil, err } } if needsUpdate || tagsChanged || amiChanged || userDataSecretKeyChanged { - if err := runPostLaunchTemplateUpdateOperation(); err != nil { + err := runPostLaunchTemplateUpdateOperation() + if err != nil { conditions.MarkFalse(scope.GetSetter(), expinfrav1.PostLaunchTemplateUpdateOperationCondition, expinfrav1.PostLaunchTemplateUpdateOperationFailedReason, clusterv1.ConditionSeverityError, err.Error()) - return err + return nil, err } conditions.MarkTrue(scope.GetSetter(), expinfrav1.PostLaunchTemplateUpdateOperationCondition) } - return nil + return nil, nil } // ReconcileTags reconciles the tags for the AWSMachinePool instances. @@ -345,9 +473,9 @@ func tagsChanged(annotation map[string]interface{}, src map[string]string) (bool // GetLaunchTemplate returns the existing LaunchTemplate or nothing if it doesn't exist. // For now by name until we need the input to be something different. -func (s *Service) GetLaunchTemplate(launchTemplateName string) (*expinfrav1.AWSLaunchTemplate, string, *apimachinerytypes.NamespacedName, error) { +func (s *Service) GetLaunchTemplate(launchTemplateName string) (*expinfrav1.AWSLaunchTemplate, string, *apimachinerytypes.NamespacedName, *string, error) { if launchTemplateName == "" { - return nil, "", nil, nil + return nil, "", nil, nil, nil } s.scope.Debug("Looking for existing LaunchTemplates") @@ -360,13 +488,13 @@ func (s *Service) GetLaunchTemplate(launchTemplateName string) (*expinfrav1.AWSL out, err := s.EC2Client.DescribeLaunchTemplateVersionsWithContext(context.TODO(), input) switch { case awserrors.IsNotFound(err): - return nil, "", nil, nil + return nil, "", nil, nil, nil case err != nil: - return nil, "", nil, err + return nil, "", nil, nil, err } if out == nil || out.LaunchTemplateVersions == nil || len(out.LaunchTemplateVersions) == 0 { - return nil, "", nil, nil + return nil, "", nil, nil, nil } return s.SDKToLaunchTemplate(out.LaunchTemplateVersions[0]) @@ -400,10 +528,10 @@ func (s *Service) GetLaunchTemplateID(launchTemplateName string) (string, error) } // CreateLaunchTemplate generates a launch template to be used with the autoscaling group. -func (s *Service) CreateLaunchTemplate(scope scope.LaunchTemplateScope, imageID *string, userDataSecretKey apimachinerytypes.NamespacedName, userData []byte) (string, error) { +func (s *Service) CreateLaunchTemplate(scope scope.LaunchTemplateScope, imageID *string, userDataSecretKey apimachinerytypes.NamespacedName, userData []byte, bootstrapDataHash string) (string, error) { s.scope.Info("Create a new launch template") - launchTemplateData, err := s.createLaunchTemplateData(scope, imageID, userDataSecretKey, userData) + launchTemplateData, err := s.createLaunchTemplateData(scope, imageID, userDataSecretKey, userData, bootstrapDataHash) if err != nil { return "", errors.Wrapf(err, "unable to form launch template data") } @@ -444,10 +572,14 @@ func (s *Service) CreateLaunchTemplate(scope scope.LaunchTemplateScope, imageID } // CreateLaunchTemplateVersion will create a launch template. -func (s *Service) CreateLaunchTemplateVersion(id string, scope scope.LaunchTemplateScope, imageID *string, userDataSecretKey apimachinerytypes.NamespacedName, userData []byte) error { +// While userDataForLaunchTemplate is the data for the EC2 launch +// template, bootstrapDataHash relates to the final bootstrap data +// (not necessarily stored in EC2 user data, but could be in an S3 +// object). +func (s *Service) CreateLaunchTemplateVersion(id string, scope scope.LaunchTemplateScope, imageID *string, userDataSecretKey apimachinerytypes.NamespacedName, userDataForLaunchTemplate []byte, bootstrapDataHash string) error { s.scope.Debug("creating new launch template version", "machine-pool", scope.LaunchTemplateName()) - launchTemplateData, err := s.createLaunchTemplateData(scope, imageID, userDataSecretKey, userData) + launchTemplateData, err := s.createLaunchTemplateData(scope, imageID, userDataSecretKey, userDataForLaunchTemplate, bootstrapDataHash) if err != nil { return errors.Wrapf(err, "unable to form launch template data") } @@ -465,7 +597,7 @@ func (s *Service) CreateLaunchTemplateVersion(id string, scope scope.LaunchTempl return nil } -func (s *Service) createLaunchTemplateData(scope scope.LaunchTemplateScope, imageID *string, userDataSecretKey apimachinerytypes.NamespacedName, userData []byte) (*ec2.RequestLaunchTemplateData, error) { +func (s *Service) createLaunchTemplateData(scope scope.LaunchTemplateScope, imageID *string, userDataSecretKey apimachinerytypes.NamespacedName, userDataForLaunchTemplate []byte, bootstrapDataHash string) (*ec2.RequestLaunchTemplateData, error) { lt := scope.GetLaunchTemplate() // An explicit empty string for SSHKeyName means do not specify a key in the ASG launch @@ -477,7 +609,7 @@ func (s *Service) createLaunchTemplateData(scope scope.LaunchTemplateScope, imag data := &ec2.RequestLaunchTemplateData{ InstanceType: aws.String(lt.InstanceType), KeyName: sshKeyNamePtr, - UserData: ptr.To[string](base64.StdEncoding.EncodeToString(userData)), + UserData: ptr.To[string](base64.StdEncoding.EncodeToString(userDataForLaunchTemplate)), } if lt.InstanceMetadataOptions != nil { @@ -548,7 +680,7 @@ func (s *Service) createLaunchTemplateData(scope scope.LaunchTemplateScope, imag data.BlockDeviceMappings = blockDeviceMappings } - data.TagSpecifications = s.buildLaunchTemplateTagSpecificationRequest(scope, userDataSecretKey) + data.TagSpecifications = s.buildLaunchTemplateTagSpecificationRequest(scope, userDataSecretKey, bootstrapDataHash) return data, nil } @@ -603,7 +735,8 @@ func (s *Service) DeleteLaunchTemplate(id string) error { // It does not delete the "latest" version, because that version may still be in use. // It does not delete the "default" version, because that version cannot be deleted. // It does not assume that versions are sequential. Versions may be deleted out of band. -func (s *Service) PruneLaunchTemplateVersions(id string) error { +// If there was an unused version which was successfully deleted, it is returned. +func (s *Service) PruneLaunchTemplateVersions(id string) (*ec2.LaunchTemplateVersion, error) { // When there is one version available, it is the default and the latest. // When there are two versions available, one the is the default, the other is the latest. // Therefore we only prune when there are at least 3 versions available. @@ -619,7 +752,7 @@ func (s *Service) PruneLaunchTemplateVersions(id string) error { out, err := s.EC2Client.DescribeLaunchTemplateVersionsWithContext(context.TODO(), input) if err != nil { s.scope.Info("", "aerr", err.Error()) - return err + return nil, err } // len(out.LaunchTemplateVersions) | items @@ -628,10 +761,14 @@ func (s *Service) PruneLaunchTemplateVersions(id string) error { // 2 | [default, latest] // 3 | [default, versionToPrune, latest] if len(out.LaunchTemplateVersions) < minCountToAllowPrune { - return nil + return nil, nil } - versionToPrune := out.LaunchTemplateVersions[1].VersionNumber - return s.deleteLaunchTemplateVersion(id, versionToPrune) + versionToPrune := out.LaunchTemplateVersions[1] + err = s.deleteLaunchTemplateVersion(id, versionToPrune.VersionNumber) + if err != nil { + return nil, err + } + return versionToPrune, nil } // GetLaunchTemplateLatestVersion returns the latest version of a launch template. @@ -655,11 +792,12 @@ func (s *Service) GetLaunchTemplateLatestVersion(id string) (string, error) { } func (s *Service) deleteLaunchTemplateVersion(id string, version *int64) error { - s.scope.Debug("Deleting launch template version", "id", id) - if version == nil { return errors.New("version is a nil pointer") } + + s.scope.Debug("Deleting launch template version", "id", id, "version", *version) + versions := []string{strconv.FormatInt(*version, 10)} input := &ec2.DeleteLaunchTemplateVersionsInput{ @@ -672,12 +810,12 @@ func (s *Service) deleteLaunchTemplateVersion(id string, version *int64) error { return err } - s.scope.Debug("Deleted launch template", "id", id, "version", *version) + s.scope.Debug("Deleted launch template version", "id", id, "version", *version) return nil } // SDKToLaunchTemplate converts an AWS EC2 SDK instance to the CAPA instance type. -func (s *Service) SDKToLaunchTemplate(d *ec2.LaunchTemplateVersion) (*expinfrav1.AWSLaunchTemplate, string, *apimachinerytypes.NamespacedName, error) { +func (s *Service) SDKToLaunchTemplate(d *ec2.LaunchTemplateVersion) (*expinfrav1.AWSLaunchTemplate, string, *apimachinerytypes.NamespacedName, *string, error) { v := d.LaunchTemplateData i := &expinfrav1.AWSLaunchTemplate{ Name: aws.StringValue(d.LaunchTemplateName), @@ -732,30 +870,35 @@ func (s *Service) SDKToLaunchTemplate(d *ec2.LaunchTemplateVersion) (*expinfrav1 } if v.UserData == nil { - return i, userdata.ComputeHash(nil), nil, nil + return i, userdata.ComputeHash(nil), nil, nil, nil } decodedUserData, err := base64.StdEncoding.DecodeString(*v.UserData) if err != nil { - return nil, "", nil, errors.Wrap(err, "unable to decode UserData") + return nil, "", nil, nil, errors.Wrap(err, "unable to decode UserData") } decodedUserDataHash := userdata.ComputeHash(decodedUserData) + var launchTemplateUserDataSecretKey *apimachinerytypes.NamespacedName + var bootstrapDataHash *string for _, tagSpecification := range v.TagSpecifications { if tagSpecification.ResourceType != nil && *tagSpecification.ResourceType == ec2.ResourceTypeInstance { for _, tag := range tagSpecification.Tags { if tag.Key != nil && *tag.Key == infrav1.LaunchTemplateBootstrapDataSecret && tag.Value != nil && strings.Contains(*tag.Value, "/") { parts := strings.SplitN(*tag.Value, "/", 2) - launchTemplateUserDataSecretKey := &apimachinerytypes.NamespacedName{ + launchTemplateUserDataSecretKey = &apimachinerytypes.NamespacedName{ Namespace: parts[0], Name: parts[1], } - return i, decodedUserDataHash, launchTemplateUserDataSecretKey, nil + } + + if tag.Key != nil && *tag.Key == infrav1.LaunchTemplateBootstrapDataHash && tag.Value != nil && *tag.Value != "" { + bootstrapDataHash = tag.Value } } } } - return i, decodedUserDataHash, nil, nil + return i, decodedUserDataHash, launchTemplateUserDataSecretKey, bootstrapDataHash, nil } // LaunchTemplateNeedsUpdate checks if a new launch template version is needed. @@ -891,7 +1034,7 @@ func (s *Service) GetAdditionalSecurityGroupsIDs(securityGroups []infrav1.AWSRes return additionalSecurityGroupsIDs, nil } -func (s *Service) buildLaunchTemplateTagSpecificationRequest(scope scope.LaunchTemplateScope, userDataSecretKey apimachinerytypes.NamespacedName) []*ec2.LaunchTemplateTagSpecificationRequest { +func (s *Service) buildLaunchTemplateTagSpecificationRequest(scope scope.LaunchTemplateScope, userDataSecretKey apimachinerytypes.NamespacedName, bootstrapDataHash string) []*ec2.LaunchTemplateTagSpecificationRequest { tagSpecifications := make([]*ec2.LaunchTemplateTagSpecificationRequest, 0) additionalTags := scope.AdditionalTags() // Set the cloud provider tag @@ -909,6 +1052,7 @@ func (s *Service) buildLaunchTemplateTagSpecificationRequest(scope scope.LaunchT { instanceTags := tags.DeepCopy() instanceTags[infrav1.LaunchTemplateBootstrapDataSecret] = userDataSecretKey.String() + instanceTags[infrav1.LaunchTemplateBootstrapDataHash] = bootstrapDataHash spec := &ec2.LaunchTemplateTagSpecificationRequest{ResourceType: aws.String(ec2.ResourceTypeInstance)} for key, value := range instanceTags { diff --git a/pkg/cloud/services/ec2/launchtemplate_test.go b/pkg/cloud/services/ec2/launchtemplate_test.go index 4553ad4546..aca1b85a12 100644 --- a/pkg/cloud/services/ec2/launchtemplate_test.go +++ b/pkg/cloud/services/ec2/launchtemplate_test.go @@ -80,12 +80,19 @@ users: var testUserDataHash = userdata.ComputeHash([]byte(testUserData)) -func defaultEC2AndUserDataSecretKeyTags(name string, clusterName string, userDataSecretKey types.NamespacedName) []*ec2.Tag { +var testBootstrapData = []byte("different from testUserData since bootstrap data may be in S3 while EC2 user data points to that S3 object") +var testBootstrapDataHash = userdata.ComputeHash(testBootstrapData) + +func defaultEC2AndDataTags(name string, clusterName string, userDataSecretKey types.NamespacedName, bootstrapDataHash string) []*ec2.Tag { tags := defaultEC2Tags(name, clusterName) tags = append(tags, &ec2.Tag{ Key: aws.String(infrav1.LaunchTemplateBootstrapDataSecret), Value: aws.String(userDataSecretKey.String()), }) + tags = append(tags, &ec2.Tag{ + Key: aws.String(infrav1.LaunchTemplateBootstrapDataHash), + Value: aws.String(bootstrapDataHash), + }) sortTags(tags) return tags } @@ -295,7 +302,7 @@ func TestGetLaunchTemplate(t *testing.T) { tc.expect(mockEC2Client.EXPECT()) } - launchTemplate, userData, _, err := s.GetLaunchTemplate(tc.launchTemplateName) + launchTemplate, userData, _, _, err := s.GetLaunchTemplate(tc.launchTemplateName) tc.check(g, launchTemplate, userData, err) }) } @@ -303,12 +310,13 @@ func TestGetLaunchTemplate(t *testing.T) { func TestServiceSDKToLaunchTemplate(t *testing.T) { tests := []struct { - name string - input *ec2.LaunchTemplateVersion - wantLT *expinfrav1.AWSLaunchTemplate - wantHash string - wantDataSecretKey *types.NamespacedName - wantErr bool + name string + input *ec2.LaunchTemplateVersion + wantLT *expinfrav1.AWSLaunchTemplate + wantUserDataHash string + wantDataSecretKey *types.NamespacedName + wantBootstrapDataHash *string + wantErr bool }{ { name: "lots of input", @@ -350,8 +358,9 @@ func TestServiceSDKToLaunchTemplate(t *testing.T) { SSHKeyName: aws.String("foo-keyname"), VersionNumber: aws.Int64(1), }, - wantHash: testUserDataHash, - wantDataSecretKey: nil, // respective tag is not given + wantUserDataHash: testUserDataHash, + wantDataSecretKey: nil, // respective tag is not given + wantBootstrapDataHash: nil, // respective tag is not given }, { name: "tag of bootstrap secret", @@ -388,6 +397,10 @@ func TestServiceSDKToLaunchTemplate(t *testing.T) { Key: aws.String("sigs.k8s.io/cluster-api-provider-aws/bootstrap-data-secret"), Value: aws.String("bootstrap-secret-ns/bootstrap-secret"), }, + { + Key: aws.String("sigs.k8s.io/cluster-api-provider-aws/bootstrap-data-hash"), + Value: aws.String(testBootstrapDataHash), + }, }, }, }, @@ -404,26 +417,29 @@ func TestServiceSDKToLaunchTemplate(t *testing.T) { SSHKeyName: aws.String("foo-keyname"), VersionNumber: aws.Int64(1), }, - wantHash: testUserDataHash, - wantDataSecretKey: &types.NamespacedName{Namespace: "bootstrap-secret-ns", Name: "bootstrap-secret"}, + wantUserDataHash: testUserDataHash, + wantDataSecretKey: &types.NamespacedName{Namespace: "bootstrap-secret-ns", Name: "bootstrap-secret"}, + wantBootstrapDataHash: &testBootstrapDataHash, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { + g := NewWithT(t) s := &Service{} - gotLT, gotHash, gotDataSecretKey, err := s.SDKToLaunchTemplate(tt.input) + gotLT, gotUserDataHash, gotDataSecretKey, gotBootstrapDataHash, err := s.SDKToLaunchTemplate(tt.input) if (err != nil) != tt.wantErr { t.Fatalf("error mismatch: got %v, wantErr %v", err, tt.wantErr) } if !cmp.Equal(gotLT, tt.wantLT) { t.Fatalf("launchTemplate mismatch: got %v, want %v", gotLT, tt.wantLT) } - if !cmp.Equal(gotHash, tt.wantHash) { - t.Fatalf("userDataHash mismatch: got %v, want %v", gotHash, tt.wantHash) + if !cmp.Equal(gotUserDataHash, tt.wantUserDataHash) { + t.Fatalf("userDataHash mismatch: got %v, want %v", gotUserDataHash, tt.wantUserDataHash) } if !cmp.Equal(gotDataSecretKey, tt.wantDataSecretKey) { t.Fatalf("userDataSecretKey mismatch: got %v, want %v", gotDataSecretKey, tt.wantDataSecretKey) } + g.Expect(gotBootstrapDataHash).To(Equal(tt.wantBootstrapDataHash)) }) } } @@ -845,7 +861,7 @@ func TestCreateLaunchTemplate(t *testing.T) { TagSpecifications: []*ec2.LaunchTemplateTagSpecificationRequest{ { ResourceType: aws.String(ec2.ResourceTypeInstance), - Tags: defaultEC2AndUserDataSecretKeyTags("aws-mp-name", "cluster-name", userDataSecretKey), + Tags: defaultEC2AndDataTags("aws-mp-name", "cluster-name", userDataSecretKey, testBootstrapDataHash), }, { ResourceType: aws.String(ec2.ResourceTypeVolume), @@ -905,7 +921,7 @@ func TestCreateLaunchTemplate(t *testing.T) { TagSpecifications: []*ec2.LaunchTemplateTagSpecificationRequest{ { ResourceType: aws.String(ec2.ResourceTypeInstance), - Tags: defaultEC2AndUserDataSecretKeyTags("aws-mp-name", "cluster-name", userDataSecretKey), + Tags: defaultEC2AndDataTags("aws-mp-name", "cluster-name", userDataSecretKey, testBootstrapDataHash), }, { ResourceType: aws.String(ec2.ResourceTypeVolume), @@ -967,7 +983,7 @@ func TestCreateLaunchTemplate(t *testing.T) { TagSpecifications: []*ec2.LaunchTemplateTagSpecificationRequest{ { ResourceType: aws.String(ec2.ResourceTypeInstance), - Tags: defaultEC2AndUserDataSecretKeyTags("aws-mp-name", "cluster-name", userDataSecretKey), + Tags: defaultEC2AndDataTags("aws-mp-name", "cluster-name", userDataSecretKey, testBootstrapDataHash), }, { ResourceType: aws.String(ec2.ResourceTypeVolume), @@ -1022,7 +1038,7 @@ func TestCreateLaunchTemplate(t *testing.T) { tc.expect(g, mockEC2Client.EXPECT()) } - launchTemplate, err := s.CreateLaunchTemplate(ms, aws.String("imageID"), userDataSecretKey, userData) + launchTemplate, err := s.CreateLaunchTemplate(ms, aws.String("imageID"), userDataSecretKey, userData, testBootstrapDataHash) tc.check(g, launchTemplate, err) }) } @@ -1050,7 +1066,7 @@ func TestLaunchTemplateDataCreation(t *testing.T) { Namespace: "bootstrap-secret-ns", Name: "bootstrap-secret", } - launchTemplate, err := s.CreateLaunchTemplate(ms, aws.String("imageID"), userDataSecretKey, nil) + launchTemplate, err := s.CreateLaunchTemplate(ms, aws.String("imageID"), userDataSecretKey, nil, "") g.Expect(err).To(HaveOccurred()) g.Expect(launchTemplate).Should(BeEmpty()) }) @@ -1104,7 +1120,7 @@ func TestCreateLaunchTemplateVersion(t *testing.T) { TagSpecifications: []*ec2.LaunchTemplateTagSpecificationRequest{ { ResourceType: aws.String(ec2.ResourceTypeInstance), - Tags: defaultEC2AndUserDataSecretKeyTags("aws-mp-name", "cluster-name", userDataSecretKey), + Tags: defaultEC2AndDataTags("aws-mp-name", "cluster-name", userDataSecretKey, testBootstrapDataHash), }, { ResourceType: aws.String(ec2.ResourceTypeVolume), @@ -1155,7 +1171,7 @@ func TestCreateLaunchTemplateVersion(t *testing.T) { TagSpecifications: []*ec2.LaunchTemplateTagSpecificationRequest{ { ResourceType: aws.String(ec2.ResourceTypeInstance), - Tags: defaultEC2AndUserDataSecretKeyTags("aws-mp-name", "cluster-name", userDataSecretKey), + Tags: defaultEC2AndDataTags("aws-mp-name", "cluster-name", userDataSecretKey, testBootstrapDataHash), }, { ResourceType: aws.String(ec2.ResourceTypeVolume), @@ -1202,10 +1218,10 @@ func TestCreateLaunchTemplateVersion(t *testing.T) { tc.expect(mockEC2Client.EXPECT()) } if tc.wantErr { - g.Expect(s.CreateLaunchTemplateVersion("launch-template-id", ms, aws.String("imageID"), userDataSecretKey, userData)).To(HaveOccurred()) + g.Expect(s.CreateLaunchTemplateVersion("launch-template-id", ms, aws.String("imageID"), userDataSecretKey, userData, testBootstrapDataHash)).To(HaveOccurred()) return } - g.Expect(s.CreateLaunchTemplateVersion("launch-template-id", ms, aws.String("imageID"), userDataSecretKey, userData)).NotTo(HaveOccurred()) + g.Expect(s.CreateLaunchTemplateVersion("launch-template-id", ms, aws.String("imageID"), userDataSecretKey, userData, testBootstrapDataHash)).NotTo(HaveOccurred()) }) } } @@ -1218,6 +1234,7 @@ func TestBuildLaunchTemplateTagSpecificationRequest(t *testing.T) { Namespace: "bootstrap-secret-ns", Name: "bootstrap-secret", } + bootstrapDataHash := userdata.ComputeHash([]byte("shell-script")) testCases := []struct { name string check func(g *WithT, m []*ec2.LaunchTemplateTagSpecificationRequest) @@ -1228,7 +1245,7 @@ func TestBuildLaunchTemplateTagSpecificationRequest(t *testing.T) { expected := []*ec2.LaunchTemplateTagSpecificationRequest{ { ResourceType: aws.String(ec2.ResourceTypeInstance), - Tags: defaultEC2AndUserDataSecretKeyTags("aws-mp-name", "cluster-name", userDataSecretKey), + Tags: defaultEC2AndDataTags("aws-mp-name", "cluster-name", userDataSecretKey, bootstrapDataHash), }, { ResourceType: aws.String(ec2.ResourceTypeVolume), @@ -1258,7 +1275,7 @@ func TestBuildLaunchTemplateTagSpecificationRequest(t *testing.T) { g.Expect(err).NotTo(HaveOccurred()) s := NewService(cs) - tc.check(g, s.buildLaunchTemplateTagSpecificationRequest(ms, userDataSecretKey)) + tc.check(g, s.buildLaunchTemplateTagSpecificationRequest(ms, userDataSecretKey, bootstrapDataHash)) }) } } diff --git a/pkg/cloud/services/interfaces.go b/pkg/cloud/services/interfaces.go index 5e0f3dc2e6..6c4f8434e9 100644 --- a/pkg/cloud/services/interfaces.go +++ b/pkg/cloud/services/interfaces.go @@ -20,7 +20,9 @@ package services import ( "context" + "github.com/aws/aws-sdk-go/service/ec2" apimachinerytypes "k8s.io/apimachinery/pkg/types" + ctrl "sigs.k8s.io/controller-runtime" infrav1 "sigs.k8s.io/cluster-api-provider-aws/v2/api/v1beta2" expinfrav1 "sigs.k8s.io/cluster-api-provider-aws/v2/exp/api/v1beta2" @@ -43,8 +45,9 @@ type ASGInterface interface { GetASGByName(scope *scope.MachinePoolScope) (*expinfrav1.AutoScalingGroup, error) CreateASG(scope *scope.MachinePoolScope) (*expinfrav1.AutoScalingGroup, error) UpdateASG(scope *scope.MachinePoolScope) error + CancelASGInstanceRefresh(scope *scope.MachinePoolScope) error StartASGInstanceRefresh(scope *scope.MachinePoolScope) error - CanStartASGInstanceRefresh(scope *scope.MachinePoolScope) (bool, error) + CanStartASGInstanceRefresh(scope *scope.MachinePoolScope) (bool, *string, error) UpdateResourceTags(resourceID *string, create, remove map[string]string) error DeleteASGAndWait(id string) error SuspendProcesses(name string, processes []string) error @@ -71,12 +74,12 @@ type EC2Interface interface { DetachSecurityGroupsFromNetworkInterface(groups []string, interfaceID string) error DiscoverLaunchTemplateAMI(scope scope.LaunchTemplateScope) (*string, error) - GetLaunchTemplate(id string) (lt *expinfrav1.AWSLaunchTemplate, userDataHash string, userDataSecretKey *apimachinerytypes.NamespacedName, err error) + GetLaunchTemplate(id string) (lt *expinfrav1.AWSLaunchTemplate, userDataHash string, userDataSecretKey *apimachinerytypes.NamespacedName, bootstrapDataHash *string, err error) GetLaunchTemplateID(id string) (string, error) GetLaunchTemplateLatestVersion(id string) (string, error) - CreateLaunchTemplate(scope scope.LaunchTemplateScope, imageID *string, userDataSecretKey apimachinerytypes.NamespacedName, userData []byte) (string, error) - CreateLaunchTemplateVersion(id string, scope scope.LaunchTemplateScope, imageID *string, userDataSecretKey apimachinerytypes.NamespacedName, userData []byte) error - PruneLaunchTemplateVersions(id string) error + CreateLaunchTemplate(scope scope.LaunchTemplateScope, imageID *string, userDataSecretKey apimachinerytypes.NamespacedName, userData []byte, bootstrapDataHash string) (string, error) + CreateLaunchTemplateVersion(id string, scope scope.LaunchTemplateScope, imageID *string, userDataSecretKey apimachinerytypes.NamespacedName, userData []byte, bootstrapDataHash string) error + PruneLaunchTemplateVersions(id string) (*ec2.LaunchTemplateVersion, error) DeleteLaunchTemplate(id string) error LaunchTemplateNeedsUpdate(scope scope.LaunchTemplateScope, incoming *expinfrav1.AWSLaunchTemplate, existing *expinfrav1.AWSLaunchTemplate) (bool, error) DeleteBastion() error @@ -92,7 +95,7 @@ type EC2Interface interface { // separate from EC2Interface so that we can mock AWS requests separately. For example, by not mocking the // ReconcileLaunchTemplate function, but mocking EC2Interface, we can test which EC2 API operations would have been called. type MachinePoolReconcileInterface interface { - ReconcileLaunchTemplate(scope scope.LaunchTemplateScope, ec2svc EC2Interface, canUpdateLaunchTemplate func() (bool, error), runPostLaunchTemplateUpdateOperation func() error) error + ReconcileLaunchTemplate(ignitionScope scope.IgnitionScope, scope scope.LaunchTemplateScope, s3Scope scope.S3Scope, ec2svc EC2Interface, objectStoreSvc ObjectStoreInterface, canUpdateLaunchTemplate func() (bool, *string, error), cancelInstanceRefresh func() error, runPostLaunchTemplateUpdateOperation func() error) (*ctrl.Result, error) ReconcileTags(scope scope.LaunchTemplateScope, resourceServicesToUpdate []scope.ResourceServiceToUpdate) error } @@ -137,6 +140,8 @@ type ObjectStoreInterface interface { ReconcileBucket() error Delete(m *scope.MachineScope) error Create(m *scope.MachineScope, data []byte) (objectURL string, err error) + CreateForMachinePool(scope scope.LaunchTemplateScope, data []byte) (objectURL string, err error) + DeleteForMachinePool(scope scope.LaunchTemplateScope, bootstrapDataHash string) error } // AWSNodeInterface installs the CNI for EKS clusters. diff --git a/pkg/cloud/services/mock_services/autoscaling_interface_mock.go b/pkg/cloud/services/mock_services/autoscaling_interface_mock.go index b860077f4f..2448d3dcb7 100644 --- a/pkg/cloud/services/mock_services/autoscaling_interface_mock.go +++ b/pkg/cloud/services/mock_services/autoscaling_interface_mock.go @@ -67,12 +67,13 @@ func (mr *MockASGInterfaceMockRecorder) ASGIfExists(arg0 interface{}) *gomock.Ca } // CanStartASGInstanceRefresh mocks base method. -func (m *MockASGInterface) CanStartASGInstanceRefresh(arg0 *scope.MachinePoolScope) (bool, error) { +func (m *MockASGInterface) CanStartASGInstanceRefresh(arg0 *scope.MachinePoolScope) (bool, *string, error) { m.ctrl.T.Helper() ret := m.ctrl.Call(m, "CanStartASGInstanceRefresh", arg0) ret0, _ := ret[0].(bool) - ret1, _ := ret[1].(error) - return ret0, ret1 + ret1, _ := ret[1].(*string) + ret2, _ := ret[2].(error) + return ret0, ret1, ret2 } // CanStartASGInstanceRefresh indicates an expected call of CanStartASGInstanceRefresh. @@ -81,6 +82,20 @@ func (mr *MockASGInterfaceMockRecorder) CanStartASGInstanceRefresh(arg0 interfac return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "CanStartASGInstanceRefresh", reflect.TypeOf((*MockASGInterface)(nil).CanStartASGInstanceRefresh), arg0) } +// CancelASGInstanceRefresh mocks base method. +func (m *MockASGInterface) CancelASGInstanceRefresh(arg0 *scope.MachinePoolScope) error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "CancelASGInstanceRefresh", arg0) + ret0, _ := ret[0].(error) + return ret0 +} + +// CancelASGInstanceRefresh indicates an expected call of CancelASGInstanceRefresh. +func (mr *MockASGInterfaceMockRecorder) CancelASGInstanceRefresh(arg0 interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "CancelASGInstanceRefresh", reflect.TypeOf((*MockASGInterface)(nil).CancelASGInstanceRefresh), arg0) +} + // CreateASG mocks base method. func (m *MockASGInterface) CreateASG(arg0 *scope.MachinePoolScope) (*v1beta2.AutoScalingGroup, error) { m.ctrl.T.Helper() diff --git a/pkg/cloud/services/mock_services/ec2_interface_mock.go b/pkg/cloud/services/mock_services/ec2_interface_mock.go index d46dc001e4..0de1443e2c 100644 --- a/pkg/cloud/services/mock_services/ec2_interface_mock.go +++ b/pkg/cloud/services/mock_services/ec2_interface_mock.go @@ -23,6 +23,7 @@ package mock_services import ( reflect "reflect" + ec2 "github.com/aws/aws-sdk-go/service/ec2" gomock "github.com/golang/mock/gomock" types "k8s.io/apimachinery/pkg/types" v1beta2 "sigs.k8s.io/cluster-api-provider-aws/v2/api/v1beta2" @@ -69,32 +70,32 @@ func (mr *MockEC2InterfaceMockRecorder) CreateInstance(arg0, arg1, arg2 interfac } // CreateLaunchTemplate mocks base method. -func (m *MockEC2Interface) CreateLaunchTemplate(arg0 scope.LaunchTemplateScope, arg1 *string, arg2 types.NamespacedName, arg3 []byte) (string, error) { +func (m *MockEC2Interface) CreateLaunchTemplate(arg0 scope.LaunchTemplateScope, arg1 *string, arg2 types.NamespacedName, arg3 []byte, arg4 string) (string, error) { m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "CreateLaunchTemplate", arg0, arg1, arg2, arg3) + ret := m.ctrl.Call(m, "CreateLaunchTemplate", arg0, arg1, arg2, arg3, arg4) ret0, _ := ret[0].(string) ret1, _ := ret[1].(error) return ret0, ret1 } // CreateLaunchTemplate indicates an expected call of CreateLaunchTemplate. -func (mr *MockEC2InterfaceMockRecorder) CreateLaunchTemplate(arg0, arg1, arg2, arg3 interface{}) *gomock.Call { +func (mr *MockEC2InterfaceMockRecorder) CreateLaunchTemplate(arg0, arg1, arg2, arg3, arg4 interface{}) *gomock.Call { mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "CreateLaunchTemplate", reflect.TypeOf((*MockEC2Interface)(nil).CreateLaunchTemplate), arg0, arg1, arg2, arg3) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "CreateLaunchTemplate", reflect.TypeOf((*MockEC2Interface)(nil).CreateLaunchTemplate), arg0, arg1, arg2, arg3, arg4) } // CreateLaunchTemplateVersion mocks base method. -func (m *MockEC2Interface) CreateLaunchTemplateVersion(arg0 string, arg1 scope.LaunchTemplateScope, arg2 *string, arg3 types.NamespacedName, arg4 []byte) error { +func (m *MockEC2Interface) CreateLaunchTemplateVersion(arg0 string, arg1 scope.LaunchTemplateScope, arg2 *string, arg3 types.NamespacedName, arg4 []byte, arg5 string) error { m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "CreateLaunchTemplateVersion", arg0, arg1, arg2, arg3, arg4) + ret := m.ctrl.Call(m, "CreateLaunchTemplateVersion", arg0, arg1, arg2, arg3, arg4, arg5) ret0, _ := ret[0].(error) return ret0 } // CreateLaunchTemplateVersion indicates an expected call of CreateLaunchTemplateVersion. -func (mr *MockEC2InterfaceMockRecorder) CreateLaunchTemplateVersion(arg0, arg1, arg2, arg3, arg4 interface{}) *gomock.Call { +func (mr *MockEC2InterfaceMockRecorder) CreateLaunchTemplateVersion(arg0, arg1, arg2, arg3, arg4, arg5 interface{}) *gomock.Call { mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "CreateLaunchTemplateVersion", reflect.TypeOf((*MockEC2Interface)(nil).CreateLaunchTemplateVersion), arg0, arg1, arg2, arg3, arg4) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "CreateLaunchTemplateVersion", reflect.TypeOf((*MockEC2Interface)(nil).CreateLaunchTemplateVersion), arg0, arg1, arg2, arg3, arg4, arg5) } // DeleteBastion mocks base method. @@ -200,14 +201,15 @@ func (mr *MockEC2InterfaceMockRecorder) GetInstanceSecurityGroups(arg0 interface } // GetLaunchTemplate mocks base method. -func (m *MockEC2Interface) GetLaunchTemplate(arg0 string) (*v1beta20.AWSLaunchTemplate, string, *types.NamespacedName, error) { +func (m *MockEC2Interface) GetLaunchTemplate(arg0 string) (*v1beta20.AWSLaunchTemplate, string, *types.NamespacedName, *string, error) { m.ctrl.T.Helper() ret := m.ctrl.Call(m, "GetLaunchTemplate", arg0) ret0, _ := ret[0].(*v1beta20.AWSLaunchTemplate) ret1, _ := ret[1].(string) ret2, _ := ret[2].(*types.NamespacedName) - ret3, _ := ret[3].(error) - return ret0, ret1, ret2, ret3 + ret3, _ := ret[3].(*string) + ret4, _ := ret[4].(error) + return ret0, ret1, ret2, ret3, ret4 } // GetLaunchTemplate indicates an expected call of GetLaunchTemplate. @@ -306,11 +308,12 @@ func (mr *MockEC2InterfaceMockRecorder) ModifyInstanceMetadataOptions(arg0, arg1 } // PruneLaunchTemplateVersions mocks base method. -func (m *MockEC2Interface) PruneLaunchTemplateVersions(arg0 string) error { +func (m *MockEC2Interface) PruneLaunchTemplateVersions(arg0 string) (*ec2.LaunchTemplateVersion, error) { m.ctrl.T.Helper() ret := m.ctrl.Call(m, "PruneLaunchTemplateVersions", arg0) - ret0, _ := ret[0].(error) - return ret0 + ret0, _ := ret[0].(*ec2.LaunchTemplateVersion) + ret1, _ := ret[1].(error) + return ret0, ret1 } // PruneLaunchTemplateVersions indicates an expected call of PruneLaunchTemplateVersions. diff --git a/pkg/cloud/services/mock_services/objectstore_machine_interface_mock.go b/pkg/cloud/services/mock_services/objectstore_machine_interface_mock.go index 559f356f3a..f38bf008dd 100644 --- a/pkg/cloud/services/mock_services/objectstore_machine_interface_mock.go +++ b/pkg/cloud/services/mock_services/objectstore_machine_interface_mock.go @@ -65,6 +65,21 @@ func (mr *MockObjectStoreInterfaceMockRecorder) Create(arg0, arg1 interface{}) * return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Create", reflect.TypeOf((*MockObjectStoreInterface)(nil).Create), arg0, arg1) } +// CreateForMachinePool mocks base method. +func (m *MockObjectStoreInterface) CreateForMachinePool(arg0 scope.LaunchTemplateScope, arg1 []byte) (string, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "CreateForMachinePool", arg0, arg1) + ret0, _ := ret[0].(string) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// CreateForMachinePool indicates an expected call of CreateForMachinePool. +func (mr *MockObjectStoreInterfaceMockRecorder) CreateForMachinePool(arg0, arg1 interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "CreateForMachinePool", reflect.TypeOf((*MockObjectStoreInterface)(nil).CreateForMachinePool), arg0, arg1) +} + // Delete mocks base method. func (m *MockObjectStoreInterface) Delete(arg0 *scope.MachineScope) error { m.ctrl.T.Helper() @@ -93,6 +108,20 @@ func (mr *MockObjectStoreInterfaceMockRecorder) DeleteBucket() *gomock.Call { return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DeleteBucket", reflect.TypeOf((*MockObjectStoreInterface)(nil).DeleteBucket)) } +// DeleteForMachinePool mocks base method. +func (m *MockObjectStoreInterface) DeleteForMachinePool(arg0 scope.LaunchTemplateScope, arg1 string) error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "DeleteForMachinePool", arg0, arg1) + ret0, _ := ret[0].(error) + return ret0 +} + +// DeleteForMachinePool indicates an expected call of DeleteForMachinePool. +func (mr *MockObjectStoreInterfaceMockRecorder) DeleteForMachinePool(arg0, arg1 interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DeleteForMachinePool", reflect.TypeOf((*MockObjectStoreInterface)(nil).DeleteForMachinePool), arg0, arg1) +} + // ReconcileBucket mocks base method. func (m *MockObjectStoreInterface) ReconcileBucket() error { m.ctrl.T.Helper() diff --git a/pkg/cloud/services/mock_services/reconcile_interface_mock.go b/pkg/cloud/services/mock_services/reconcile_interface_mock.go index 3771e81e3a..cc2c41e082 100644 --- a/pkg/cloud/services/mock_services/reconcile_interface_mock.go +++ b/pkg/cloud/services/mock_services/reconcile_interface_mock.go @@ -26,6 +26,7 @@ import ( gomock "github.com/golang/mock/gomock" scope "sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud/scope" services "sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud/services" + reconcile "sigs.k8s.io/controller-runtime/pkg/reconcile" ) // MockMachinePoolReconcileInterface is a mock of MachinePoolReconcileInterface interface. @@ -52,17 +53,18 @@ func (m *MockMachinePoolReconcileInterface) EXPECT() *MockMachinePoolReconcileIn } // ReconcileLaunchTemplate mocks base method. -func (m *MockMachinePoolReconcileInterface) ReconcileLaunchTemplate(arg0 scope.LaunchTemplateScope, arg1 services.EC2Interface, arg2 func() (bool, error), arg3 func() error) error { +func (m *MockMachinePoolReconcileInterface) ReconcileLaunchTemplate(arg0 scope.IgnitionScope, arg1 scope.LaunchTemplateScope, arg2 scope.S3Scope, arg3 services.EC2Interface, arg4 services.ObjectStoreInterface, arg5 func() (bool, *string, error), arg6, arg7 func() error) (*reconcile.Result, error) { m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "ReconcileLaunchTemplate", arg0, arg1, arg2, arg3) - ret0, _ := ret[0].(error) - return ret0 + ret := m.ctrl.Call(m, "ReconcileLaunchTemplate", arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7) + ret0, _ := ret[0].(*reconcile.Result) + ret1, _ := ret[1].(error) + return ret0, ret1 } // ReconcileLaunchTemplate indicates an expected call of ReconcileLaunchTemplate. -func (mr *MockMachinePoolReconcileInterfaceMockRecorder) ReconcileLaunchTemplate(arg0, arg1, arg2, arg3 interface{}) *gomock.Call { +func (mr *MockMachinePoolReconcileInterfaceMockRecorder) ReconcileLaunchTemplate(arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7 interface{}) *gomock.Call { mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ReconcileLaunchTemplate", reflect.TypeOf((*MockMachinePoolReconcileInterface)(nil).ReconcileLaunchTemplate), arg0, arg1, arg2, arg3) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ReconcileLaunchTemplate", reflect.TypeOf((*MockMachinePoolReconcileInterface)(nil).ReconcileLaunchTemplate), arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7) } // ReconcileTags mocks base method. diff --git a/pkg/cloud/services/s3/s3.go b/pkg/cloud/services/s3/s3.go index 6eb8582585..cc645df829 100644 --- a/pkg/cloud/services/s3/s3.go +++ b/pkg/cloud/services/s3/s3.go @@ -35,8 +35,10 @@ import ( "k8s.io/utils/ptr" infrav1 "sigs.k8s.io/cluster-api-provider-aws/v2/api/v1beta2" + "sigs.k8s.io/cluster-api-provider-aws/v2/feature" iam "sigs.k8s.io/cluster-api-provider-aws/v2/iam/api/v1beta1" "sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud/scope" + "sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud/services/userdata" "sigs.k8s.io/cluster-api-provider-aws/v2/util/system" ) @@ -84,6 +86,10 @@ func (s *Service) ReconcileBucket() error { return errors.Wrap(err, "ensuring bucket policy") } + if err := s.ensureBucketLifecycleConfiguration(bucketName); err != nil { + return errors.Wrap(err, "ensuring bucket lifecycle configuration") + } + return nil } @@ -99,6 +105,50 @@ func (s *Service) DeleteBucket() error { log.Info("Deleting S3 Bucket") + if feature.Gates.Enabled(feature.MachinePool) { + // Delete machine pool user data files that did not get deleted + // yet by the lifecycle policy + for { + log.Info("Listing S3 objects of machine pools") + + // TODO Switch to aws-sdk-go-v2 which has NewListObjectsV2Paginator (as part of https://github.com/kubernetes-sigs/cluster-api-provider-aws/issues/2225) + out, err := s.S3Client.ListObjectsV2(&s3.ListObjectsV2Input{ + Bucket: aws.String(bucketName), + Prefix: aws.String("machine-pool/"), + }) + if err != nil { + aerr, ok := err.(awserr.Error) + if !ok { + return errors.Wrap(err, "listing S3 bucket") + } + + switch aerr.Code() { + case s3.ErrCodeNoSuchBucket: + log.Info("Bucket already removed") + return nil + default: + return errors.Wrap(aerr, "listing S3 bucket") + } + } + + // Stop on last page of results + if len(out.Contents) == 0 { + break + } + + log.Info("Deleting S3 objects of machine pools", "count", len(out.Contents)) + for _, obj := range out.Contents { + _, err := s.S3Client.DeleteObject(&s3.DeleteObjectInput{ + Bucket: aws.String(bucketName), + Key: obj.Key, + }) + if err != nil { + return err + } + } + } + } + _, err := s.S3Client.DeleteBucket(&s3.DeleteBucketInput{ Bucket: aws.String(bucketName), }) @@ -169,6 +219,51 @@ func (s *Service) Create(m *scope.MachineScope, data []byte) (string, error) { return objectURL.String(), nil } +func (s *Service) CreateForMachinePool(scope scope.LaunchTemplateScope, data []byte) (string, error) { + if !s.bucketManagementEnabled() { + return "", errors.New("requested object creation but bucket management is not enabled") + } + + if scope.LaunchTemplateName() == "" { + return "", errors.New("launch template name can't be empty") + } + + if len(data) == 0 { + return "", errors.New("got empty data") + } + + bucket := s.bucketName() + key := s.bootstrapDataKeyForMachinePool(scope, userdata.ComputeHash(data)) + + s.scope.Info("Creating object for machine pool", "bucket_name", bucket, "key", key) + + if _, err := s.S3Client.PutObject(&s3.PutObjectInput{ + Body: aws.ReadSeekCloser(bytes.NewReader(data)), + Bucket: aws.String(bucket), + Key: aws.String(key), + ServerSideEncryption: aws.String("aws:kms"), + }); err != nil { + return "", errors.Wrap(err, "putting object for machine pool") + } + + if exp := s.scope.Bucket().PresignedURLDuration; exp != nil { + s.scope.Info("Generating presigned URL", "bucket_name", bucket, "key", key) + req, _ := s.S3Client.GetObjectRequest(&s3.GetObjectInput{ + Bucket: aws.String(bucket), + Key: aws.String(key), + }) + return req.Presign(exp.Duration) + } + + objectURL := &url.URL{ + Scheme: "s3", + Host: bucket, + Path: key, + } + + return objectURL.String(), nil +} + // Delete deletes the object from the S3 bucket. func (s *Service) Delete(m *scope.MachineScope) error { if !s.bucketManagementEnabled() { @@ -241,6 +336,42 @@ func (s *Service) deleteObject(bucket, key string) error { return nil } +func (s *Service) DeleteForMachinePool(scope scope.LaunchTemplateScope, bootstrapDataHash string) error { + if !s.bucketManagementEnabled() { + return errors.New("requested object deletion but bucket management is not enabled") + } + + if scope.LaunchTemplateName() == "" { + return errors.New("launch template name can't be empty") + } + + bucket := s.bucketName() + key := s.bootstrapDataKeyForMachinePool(scope, bootstrapDataHash) + + s.scope.Info("Deleting object for machine pool", "bucket_name", bucket, "key", key) + + _, err := s.S3Client.DeleteObject(&s3.DeleteObjectInput{ + Bucket: aws.String(bucket), + Key: aws.String(key), + }) + if err == nil { + return nil + } + + aerr, ok := err.(awserr.Error) + if !ok { + return errors.Wrap(err, "deleting S3 object for machine pool") + } + + switch aerr.Code() { + case s3.ErrCodeNoSuchBucket: + default: + return errors.Wrap(aerr, "deleting S3 object for machine pool") + } + + return nil +} + func (s *Service) createBucketIfNotExist(bucketName string) error { input := &s3.CreateBucketInput{Bucket: aws.String(bucketName)} @@ -294,6 +425,41 @@ func (s *Service) ensureBucketPolicy(bucketName string) error { return nil } +func (s *Service) ensureBucketLifecycleConfiguration(bucketName string) error { + if !feature.Gates.Enabled(feature.MachinePool) { + return nil + } + + input := &s3.PutBucketLifecycleConfigurationInput{ + Bucket: aws.String(bucketName), + LifecycleConfiguration: &s3.BucketLifecycleConfiguration{ + Rules: []*s3.LifecycleRule{ + { + ID: aws.String("machine-pool"), + Expiration: &s3.LifecycleExpiration{ + // The bootstrap token for new nodes to join the cluster is normally rotated regularly, + // such as in CAPI's `KubeadmConfig` reconciler. Therefore, the launch template user data + // stored in the S3 bucket only needs to live longer than the token TTL. + Days: aws.Int64(1), + }, + Filter: &s3.LifecycleRuleFilter{ + Prefix: aws.String("machine-pool/"), + }, + Status: aws.String(s3.ExpirationStatusEnabled), + }, + }, + }, + } + + if _, err := s.S3Client.PutBucketLifecycleConfiguration(input); err != nil { + return errors.Wrap(err, "creating S3 bucket lifecycle configuration") + } + + s.scope.Trace("Updated bucket lifecycle configuration", "bucket_name", bucketName) + + return nil +} + func (s *Service) tagBucket(bucketName string) error { taggingInput := &s3.PutBucketTaggingInput{ Bucket: aws.String(bucketName), @@ -380,6 +546,15 @@ func (s *Service) bucketPolicy(bucketName string) (string, error) { Action: []string{"s3:GetObject"}, Resource: []string{fmt.Sprintf("arn:%s:s3:::%s/node/*", partition, bucketName)}, }) + statements = append(statements, iam.StatementEntry{ + Sid: iamInstanceProfile, + Effect: iam.EffectAllow, + Principal: map[iam.PrincipalType]iam.PrincipalID{ + iam.PrincipalAWS: []string{fmt.Sprintf("arn:%s:iam::%s:role/%s", partition, *accountID.Account, iamInstanceProfile)}, + }, + Action: []string{"s3:GetObject"}, + Resource: []string{fmt.Sprintf("arn:%s:s3:::%s/machine-pool/*", partition, bucketName)}, + }) } } @@ -408,3 +583,7 @@ func (s *Service) bootstrapDataKey(m *scope.MachineScope) string { // Use machine name as object key. return path.Join(m.Role(), m.Name()) } + +func (s *Service) bootstrapDataKeyForMachinePool(scope scope.LaunchTemplateScope, dataHash string) string { + return path.Join("machine-pool", scope.LaunchTemplateName(), dataHash) +} diff --git a/pkg/cloud/services/s3/s3_test.go b/pkg/cloud/services/s3/s3_test.go index 3db7abfca7..4a1a598ba1 100644 --- a/pkg/cloud/services/s3/s3_test.go +++ b/pkg/cloud/services/s3/s3_test.go @@ -32,9 +32,11 @@ import ( "github.com/golang/mock/gomock" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" + utilfeature "k8s.io/component-base/featuregate/testing" "sigs.k8s.io/controller-runtime/pkg/client/fake" infrav1 "sigs.k8s.io/cluster-api-provider-aws/v2/api/v1beta2" + "sigs.k8s.io/cluster-api-provider-aws/v2/feature" iamv1 "sigs.k8s.io/cluster-api-provider-aws/v2/iam/api/v1beta1" "sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud/scope" "sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud/services/s3" @@ -49,8 +51,6 @@ const ( ) func TestReconcileBucket(t *testing.T) { - t.Parallel() - t.Run("does_nothing_when_bucket_management_is_disabled", func(t *testing.T) { t.Parallel() @@ -62,7 +62,7 @@ func TestReconcileBucket(t *testing.T) { }) t.Run("creates_bucket_with_configured_name", func(t *testing.T) { - t.Parallel() + defer utilfeature.SetFeatureGateDuringTest(t, feature.Gates, feature.MachinePool, true)() expectedBucketName := "baz" @@ -104,6 +104,7 @@ func TestReconcileBucket(t *testing.T) { s3Mock.EXPECT().PutBucketTagging(gomock.Eq(taggingInput)).Return(nil, nil).Times(1) s3Mock.EXPECT().PutBucketPolicy(gomock.Any()).Return(nil, nil).Times(1) + s3Mock.EXPECT().PutBucketLifecycleConfiguration(gomock.Any()).Return(nil, nil).Times(1) if err := svc.ReconcileBucket(); err != nil { t.Fatalf("Unexpected error: %v", err) @@ -111,7 +112,7 @@ func TestReconcileBucket(t *testing.T) { }) t.Run("hashes_default_bucket_name_if_name_exceeds_maximum_length", func(t *testing.T) { - t.Parallel() + defer utilfeature.SetFeatureGateDuringTest(t, feature.Gates, feature.MachinePool, true)() mockCtrl := gomock.NewController(t) s3Mock := mock_s3iface.NewMockS3API(mockCtrl) @@ -159,6 +160,7 @@ func TestReconcileBucket(t *testing.T) { s3Mock.EXPECT().PutBucketTagging(gomock.Any()).Return(nil, nil).Times(1) s3Mock.EXPECT().PutBucketPolicy(gomock.Any()).Return(nil, nil).Times(1) + s3Mock.EXPECT().PutBucketLifecycleConfiguration(gomock.Any()).Return(nil, nil).Times(1) if err := svc.ReconcileBucket(); err != nil { t.Fatalf("Unexpected error: %v", err) @@ -166,7 +168,7 @@ func TestReconcileBucket(t *testing.T) { }) t.Run("creates_bucket_with_policy_allowing_controlplane_and_worker_nodes_to_read_their_secrets", func(t *testing.T) { - t.Parallel() + defer utilfeature.SetFeatureGateDuringTest(t, feature.Gates, feature.MachinePool, true)() bucketName := "bar" @@ -213,6 +215,7 @@ func TestReconcileBucket(t *testing.T) { t.Errorf("Expected deny when not using SecureTransport; got: %v", policy) } }).Return(nil, nil).Times(1) + s3Mock.EXPECT().PutBucketLifecycleConfiguration(gomock.Any()).Return(nil, nil).Times(1) if err := svc.ReconcileBucket(); err != nil { t.Fatalf("Unexpected error: %v", err) @@ -220,13 +223,14 @@ func TestReconcileBucket(t *testing.T) { }) t.Run("is_idempotent", func(t *testing.T) { - t.Parallel() + defer utilfeature.SetFeatureGateDuringTest(t, feature.Gates, feature.MachinePool, true)() svc, s3Mock := testService(t, &testServiceInput{Bucket: &infrav1.S3Bucket{}}) s3Mock.EXPECT().CreateBucket(gomock.Any()).Return(nil, nil).Times(2) s3Mock.EXPECT().PutBucketTagging(gomock.Any()).Return(nil, nil).Times(2) s3Mock.EXPECT().PutBucketPolicy(gomock.Any()).Return(nil, nil).Times(2) + s3Mock.EXPECT().PutBucketLifecycleConfiguration(gomock.Any()).Return(nil, nil).Times(2) if err := svc.ReconcileBucket(); err != nil { t.Fatalf("Unexpected error: %v", err) @@ -238,7 +242,7 @@ func TestReconcileBucket(t *testing.T) { }) t.Run("ignores_when_bucket_already_exists_but_its_owned_by_the_same_account", func(t *testing.T) { - t.Parallel() + defer utilfeature.SetFeatureGateDuringTest(t, feature.Gates, feature.MachinePool, true)() svc, s3Mock := testService(t, &testServiceInput{Bucket: &infrav1.S3Bucket{}}) @@ -247,6 +251,7 @@ func TestReconcileBucket(t *testing.T) { s3Mock.EXPECT().CreateBucket(gomock.Any()).Return(nil, err).Times(1) s3Mock.EXPECT().PutBucketTagging(gomock.Any()).Return(nil, nil).Times(1) s3Mock.EXPECT().PutBucketPolicy(gomock.Any()).Return(nil, nil).Times(1) + s3Mock.EXPECT().PutBucketLifecycleConfiguration(gomock.Any()).Return(nil, nil).Times(1) if err := svc.ReconcileBucket(); err != nil { t.Fatalf("Unexpected error, got: %v", err) @@ -326,6 +331,7 @@ func TestReconcileBucket(t *testing.T) { s3Mock.EXPECT().CreateBucket(gomock.Eq(input)).Return(nil, nil).Times(1) s3Mock.EXPECT().PutBucketTagging(gomock.Any()).Return(nil, nil).Times(1) + s3Mock.EXPECT().PutBucketLifecycleConfiguration(gomock.Any()).Return(nil, nil).Times(1) s3Mock.EXPECT().PutBucketPolicy(gomock.Any()).Return(nil, nil).Times(1) if err := svc.ReconcileBucket(); err != nil { @@ -341,7 +347,7 @@ func TestDeleteBucket(t *testing.T) { const bucketName = "foo" t.Run("does_nothing_when_bucket_management_is_disabled", func(t *testing.T) { - t.Parallel() + defer utilfeature.SetFeatureGateDuringTest(t, feature.Gates, feature.MachinePool, true)() svc, _ := testService(t, nil) @@ -351,7 +357,7 @@ func TestDeleteBucket(t *testing.T) { }) t.Run("deletes_bucket_with_configured_name", func(t *testing.T) { - t.Parallel() + defer utilfeature.SetFeatureGateDuringTest(t, feature.Gates, feature.MachinePool, true)() svc, s3Mock := testService(t, &testServiceInput{ Bucket: &infrav1.S3Bucket{ @@ -363,6 +369,7 @@ func TestDeleteBucket(t *testing.T) { Bucket: aws.String(bucketName), } + s3Mock.EXPECT().ListObjectsV2(gomock.Any()).Return(&s3svc.ListObjectsV2Output{}, nil).Times(1) s3Mock.EXPECT().DeleteBucket(input).Return(nil, nil).Times(1) if err := svc.DeleteBucket(); err != nil { @@ -371,12 +378,12 @@ func TestDeleteBucket(t *testing.T) { }) t.Run("returns_error_when_bucket_removal_returns", func(t *testing.T) { - t.Parallel() t.Run("unexpected_error", func(t *testing.T) { - t.Parallel() + defer utilfeature.SetFeatureGateDuringTest(t, feature.Gates, feature.MachinePool, true)() svc, s3Mock := testService(t, &testServiceInput{Bucket: &infrav1.S3Bucket{}}) + s3Mock.EXPECT().ListObjectsV2(gomock.Any()).Return(&s3svc.ListObjectsV2Output{}, nil).Times(1) s3Mock.EXPECT().DeleteBucket(gomock.Any()).Return(nil, errors.New("err")).Times(1) if err := svc.DeleteBucket(); err == nil { @@ -385,10 +392,11 @@ func TestDeleteBucket(t *testing.T) { }) t.Run("unexpected_AWS_error", func(t *testing.T) { - t.Parallel() + defer utilfeature.SetFeatureGateDuringTest(t, feature.Gates, feature.MachinePool, true)() svc, s3Mock := testService(t, &testServiceInput{Bucket: &infrav1.S3Bucket{}}) + s3Mock.EXPECT().ListObjectsV2(gomock.Any()).Return(&s3svc.ListObjectsV2Output{}, nil).Times(1) s3Mock.EXPECT().DeleteBucket(gomock.Any()).Return(nil, awserr.New("foo", "", nil)).Times(1) if err := svc.DeleteBucket(); err == nil { @@ -398,10 +406,11 @@ func TestDeleteBucket(t *testing.T) { }) t.Run("ignores_when_bucket_has_already_been_removed", func(t *testing.T) { - t.Parallel() + defer utilfeature.SetFeatureGateDuringTest(t, feature.Gates, feature.MachinePool, true)() svc, s3Mock := testService(t, &testServiceInput{Bucket: &infrav1.S3Bucket{}}) + s3Mock.EXPECT().ListObjectsV2(gomock.Any()).Return(&s3svc.ListObjectsV2Output{}, nil).Times(1) s3Mock.EXPECT().DeleteBucket(gomock.Any()).Return(nil, awserr.New(s3svc.ErrCodeNoSuchBucket, "", nil)).Times(1) if err := svc.DeleteBucket(); err != nil { @@ -410,10 +419,11 @@ func TestDeleteBucket(t *testing.T) { }) t.Run("skips_bucket_removal_when_bucket_is_not_empty", func(t *testing.T) { - t.Parallel() + defer utilfeature.SetFeatureGateDuringTest(t, feature.Gates, feature.MachinePool, true)() svc, s3Mock := testService(t, &testServiceInput{Bucket: &infrav1.S3Bucket{}}) + s3Mock.EXPECT().ListObjectsV2(gomock.Any()).Return(&s3svc.ListObjectsV2Output{}, nil).Times(1) s3Mock.EXPECT().DeleteBucket(gomock.Any()).Return(nil, awserr.New("BucketNotEmpty", "", nil)).Times(1) if err := svc.DeleteBucket(); err != nil { diff --git a/pkg/cloud/tags/tags.go b/pkg/cloud/tags/tags.go index 42c8bfd843..47654eb4a4 100644 --- a/pkg/cloud/tags/tags.go +++ b/pkg/cloud/tags/tags.go @@ -21,6 +21,7 @@ import ( "context" "fmt" "sort" + "strings" "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/service/ec2" @@ -32,6 +33,10 @@ import ( infrav1 "sigs.k8s.io/cluster-api-provider-aws/v2/api/v1beta2" ) +const ( + AwsInternalTagPrefix = "aws:" // AwsInternalTagPrefix is the prefix for AWS internal tags, which are reserved for internal AWS use. +) + var ( // ErrBuildParamsRequired defines an error for when no build params are supplied. ErrBuildParamsRequired = errors.New("no build params supplied") @@ -99,7 +104,10 @@ func WithEC2(ec2client ec2iface.EC2API) BuilderOption { // For testing, we need sorted keys sortedKeys := make([]string, 0, len(tags)) for k := range tags { - sortedKeys = append(sortedKeys, k) + // We want to filter out the tag keys that start with `aws:` as they are reserved for internal AWS use. + if !strings.HasPrefix(k, AwsInternalTagPrefix) { + sortedKeys = append(sortedKeys, k) + } } sort.Strings(sortedKeys) @@ -127,10 +135,12 @@ func WithEKS(eksclient eksiface.EKSAPI) BuilderOption { return func(b *Builder) { b.applyFunc = func(params *infrav1.BuildParams) error { tags := infrav1.Build(*params) - eksTags := make(map[string]*string, len(tags)) for k, v := range tags { - eksTags[k] = aws.String(v) + // We want to filter out the tag keys that start with `aws:` as they are reserved for internal AWS use. + if !strings.HasPrefix(k, AwsInternalTagPrefix) { + eksTags[k] = aws.String(v) + } } tagResourcesInput := &eks.TagResourceInput{ diff --git a/pkg/cloud/tags/tags_test.go b/pkg/cloud/tags/tags_test.go index 536db71c61..7ea0006d86 100644 --- a/pkg/cloud/tags/tags_test.go +++ b/pkg/cloud/tags/tags_test.go @@ -34,14 +34,7 @@ import ( ) var ( - bp = infrav1.BuildParams{ - Lifecycle: infrav1.ResourceLifecycleOwned, - ClusterName: "testcluster", - Name: aws.String("test"), - Role: aws.String("testrole"), - Additional: map[string]string{"k1": "v1"}, - } - tags = []*ec2.Tag{ + expectedTags = []*ec2.Tag{ { Key: aws.String("Name"), Value: aws.String("test"), @@ -140,30 +133,64 @@ func TestTagsEnsureWithEC2(t *testing.T) { expect func(m *mocks.MockEC2APIMockRecorder) }{ { - name: "Should return error when create tag fails", - builder: Builder{params: &bp}, + name: "Should return error when create tag fails", + builder: Builder{params: &infrav1.BuildParams{ + Lifecycle: infrav1.ResourceLifecycleOwned, + ClusterName: "testcluster", + Name: aws.String("test"), + Role: aws.String("testrole"), + Additional: map[string]string{"k1": "v1"}, + }}, expect: func(m *mocks.MockEC2APIMockRecorder) { m.CreateTagsWithContext(context.TODO(), gomock.Eq(&ec2.CreateTagsInput{ Resources: aws.StringSlice([]string{""}), - Tags: tags, + Tags: expectedTags, })).Return(nil, errors.New("failed to create tag")) }, }, { - name: "Should return error when optional configuration for builder is nil", - builder: Builder{params: &bp, applyFunc: nil}, + name: "Should return error when optional configuration for builder is nil", + builder: Builder{params: &infrav1.BuildParams{ + Lifecycle: infrav1.ResourceLifecycleOwned, + ClusterName: "testcluster", + Name: aws.String("test"), + Role: aws.String("testrole"), + Additional: map[string]string{"k1": "v1"}, + }, applyFunc: nil}, }, { name: "Should return error when build params is nil", builder: Builder{params: nil}, }, { - name: "Should ensure tags successfully", - builder: Builder{params: &bp}, + name: "Should ensure tags successfully", + builder: Builder{params: &infrav1.BuildParams{ + Lifecycle: infrav1.ResourceLifecycleOwned, + ClusterName: "testcluster", + Name: aws.String("test"), + Role: aws.String("testrole"), + Additional: map[string]string{"k1": "v1"}, + }}, + expect: func(m *mocks.MockEC2APIMockRecorder) { + m.CreateTagsWithContext(context.TODO(), gomock.Eq(&ec2.CreateTagsInput{ + Resources: aws.StringSlice([]string{""}), + Tags: expectedTags, + })).Return(nil, nil) + }, + }, + { + name: "Should filter internal aws tags", + builder: Builder{params: &infrav1.BuildParams{ + Lifecycle: infrav1.ResourceLifecycleOwned, + ClusterName: "testcluster", + Name: aws.String("test"), + Role: aws.String("testrole"), + Additional: map[string]string{"k1": "v1", "aws:cloudformation:stack-name": "cloudformation-stack-name"}, + }}, expect: func(m *mocks.MockEC2APIMockRecorder) { m.CreateTagsWithContext(context.TODO(), gomock.Eq(&ec2.CreateTagsInput{ Resources: aws.StringSlice([]string{""}), - Tags: tags, + Tags: expectedTags, })).Return(nil, nil) }, }, @@ -198,8 +225,14 @@ func TestTagsEnsureWithEKS(t *testing.T) { expect func(m *mock_eksiface.MockEKSAPIMockRecorder) }{ { - name: "Should return error when tag resources fails", - builder: Builder{params: &bp}, + name: "Should return error when tag resources fails", + builder: Builder{params: &infrav1.BuildParams{ + Lifecycle: infrav1.ResourceLifecycleOwned, + ClusterName: "testcluster", + Name: aws.String("test"), + Role: aws.String("testrole"), + Additional: map[string]string{"k1": "v1"}, + }}, expect: func(m *mock_eksiface.MockEKSAPIMockRecorder) { m.TagResource(gomock.Eq(&eks.TagResourceInput{ ResourceArn: aws.String(""), @@ -208,8 +241,14 @@ func TestTagsEnsureWithEKS(t *testing.T) { }, }, { - name: "Should ensure tags successfully", - builder: Builder{params: &bp}, + name: "Should ensure tags successfully", + builder: Builder{params: &infrav1.BuildParams{ + Lifecycle: infrav1.ResourceLifecycleOwned, + ClusterName: "testcluster", + Name: aws.String("test"), + Role: aws.String("testrole"), + Additional: map[string]string{"k1": "v1"}, + }}, expect: func(m *mock_eksiface.MockEKSAPIMockRecorder) { m.TagResource(gomock.Eq(&eks.TagResourceInput{ ResourceArn: aws.String(""), @@ -243,10 +282,16 @@ func TestTagsEnsureWithEKS(t *testing.T) { func TestTagsBuildParamsToTagSpecification(t *testing.T) { g := NewWithT(t) - tagSpec := BuildParamsToTagSpecification("test-resource", bp) + tagSpec := BuildParamsToTagSpecification("test-resource", infrav1.BuildParams{ + Lifecycle: infrav1.ResourceLifecycleOwned, + ClusterName: "testcluster", + Name: aws.String("test"), + Role: aws.String("testrole"), + Additional: map[string]string{"k1": "v1"}, + }) expectedTagSpec := &ec2.TagSpecification{ ResourceType: aws.String("test-resource"), - Tags: tags, + Tags: expectedTags, } g.Expect(expectedTagSpec).To(Equal(tagSpec)) }