From 08280ec433b63119d652b39c06d9a4b78d5a9055 Mon Sep 17 00:00:00 2001 From: Andreas Sommer Date: Mon, 27 Nov 2023 12:25:00 +0100 Subject: [PATCH 1/7] Add Giant Swarm fork modifications --- .circleci/config.yml | 83 +++++++++++ .github/workflows/release.yml | 76 ++++++++++ README.md | 270 ++++++++++------------------------ 3 files changed, 235 insertions(+), 194 deletions(-) create mode 100644 .circleci/config.yml create mode 100644 .github/workflows/release.yml diff --git a/.circleci/config.yml b/.circleci/config.yml new file mode 100644 index 0000000000..9e324a132f --- /dev/null +++ b/.circleci/config.yml @@ -0,0 +1,83 @@ +version: 2.1 + + +jobs: + build: + machine: + image: "ubuntu-2204:2022.10.2" + environment: + ALL_ARCH: "amd64 arm64" + REGISTRY_AZURE: gsoci.azurecr.io/giantswarm + REGISTRY_QUAY: quay.io/giantswarm + REGISTRY_CHINA: giantswarm-registry.cn-shanghai.cr.aliyuncs.com/giantswarm + steps: + - checkout + + - run: + name: Build the CAPA docker images + command: | + for registry in $REGISTRY_AZURE $REGISTRY_QUAY $REGISTRY_CHINA; do + make docker-build-all ALL_ARCH="$ALL_ARCH" TAG=$CIRCLE_SHA1 REGISTRY=$registry + + if [ -n "$CIRCLE_TAG" ]; then + echo "Building tag $CIRCLE_TAG" + make docker-build-all ALL_ARCH="$ALL_ARCH" TAG="$CIRCLE_TAG" REGISTRY=$registry + fi + done + + - run: + name: Push to Azure + command: | + docker login --username $ACR_GSOCI_USERNAME --password $ACR_GSOCI_PASSWORD "${REGISTRY_AZURE%/*}" + + make docker-push-all ALL_ARCH="$ALL_ARCH" TAG=$CIRCLE_SHA1 REGISTRY=$REGISTRY_AZURE + + if [ -n "$CIRCLE_TAG" ]; then + echo "Pushing tag $CIRCLE_TAG" + make docker-push-all ALL_ARCH="$ALL_ARCH" TAG="$CIRCLE_TAG" REGISTRY=$REGISTRY_AZURE + fi + + - run: + name: Push to quay + command: | + docker login --username $QUAY_USERNAME --password $QUAY_PASSWORD quay.io + + make docker-push-all ALL_ARCH="$ALL_ARCH" TAG=$CIRCLE_SHA1 REGISTRY=$REGISTRY_QUAY + + if [ -n "$CIRCLE_TAG" ]; then + echo "Pushing tag $CIRCLE_TAG" + make docker-push-all ALL_ARCH="$ALL_ARCH" TAG="$CIRCLE_TAG" REGISTRY=$REGISTRY_QUAY + fi + + - run: + name: Push to aliyun + command: | + for n in $(seq 1 5); do + ( + set -eu + docker login --username $ALIYUN_USERNAME --password $ALIYUN_PASSWORD giantswarm-registry.cn-shanghai.cr.aliyuncs.com + + make docker-push-all ALL_ARCH="$ALL_ARCH" TAG=$CIRCLE_SHA1 REGISTRY=$REGISTRY_CHINA + + if [ -n "${CIRCLE_TAG:-}" ]; then + echo "Pushing tag $CIRCLE_TAG" + make docker-push-all ALL_ARCH="$ALL_ARCH" TAG="$CIRCLE_TAG" REGISTRY=$REGISTRY_CHINA + fi + ) || { echo "Failed attempt ${n}"; sleep 30; continue; } + + echo "Succeeded in attempt ${n}" + exit 0 + done + + exit 1 + +workflows: + version: 2 + build_and_update: + jobs: + - build: + context: + - architect + filters: + tags: + only: /^v.*/ diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml new file mode 100644 index 0000000000..5034a43be2 --- /dev/null +++ b/.github/workflows/release.yml @@ -0,0 +1,76 @@ +# As opposed to https://github.com/kubernetes-sigs/cluster-api (CAPI), the CAPA upstream project does not offer +# a GitHub action to automatically create releases from tags (as of 2023-08-21). Therefore, this is a Giant Swarm +# fork-specific addition. We require a GitHub release containing the YAML manifests which we use in +# cluster-api-provider-aws-app. Since doing this manually is very error-prone (see +# `docs/book/src/development/releasing.md`), we run the needed commands here. + +name: release + +on: + push: + tags: + - 'v*' + +permissions: + contents: write # allow creating a release + +jobs: + build: + name: Create draft release + runs-on: ubuntu-latest + env: + GH_ORG_NAME: giantswarm + steps: + - name: Set env + run: | + if echo "${GITHUB_REF}" | grep -qF "vX.Y"; then + >&2 echo "ERROR: Oops, you copy-pasted verbatim from the README.md - please ensure to replace 'vX.Y.Z' with an actual tag" + exit 1 + fi + + echo "RELEASE_TAG=${GITHUB_REF:10}" >> $GITHUB_ENV # strip off `refs/tags/` prefix + + - name: Check out code + uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # tag=v3.5.0 + with: + fetch-depth: 0 + + # - name: Calculate Go version + # run: echo "go_version=$(make go-version)" >> $GITHUB_ENV + + # - name: Set up Go + # uses: actions/setup-go@6edd4406fa81c3da01a34fa6f6343087c207a568 # tag=v3.5.0 + # with: + # go-version: ${{ env.go_version }} + + - name: Generate release artifacts + env: + GITHUB_TOKEN: "unused" # since we create the release without using CAPA's Makefile target + run: | + # Dealing with changelogs isn't that easy since Giant Swarm releases can jump from `v2.2.0` to `v2.3.1` + # as we skip intermediate releases. Therefore, finding the required value for `PREVIOUS_VERSION` would be + # a manual task. Better not deal with the changelog right now since it's unlikely that someone will look + # at those in our fork (as compared to upstream's releases). + printf '#!/bin/sh\necho "Changelogs are not filled in this fork"\n' > hack/releasechangelog.sh # old path of this tool + mkdir -p hack/tools/bin + printf '#!/bin/sh\necho "Changelogs are not filled in this fork" > out/CHANGELOG.md\n' > hack/tools/bin/release-notes + chmod +x hack/tools/bin/release-notes + + # We don't need the binaries and other stuff in the release, either. Really only the YAML manifests. + sed -i -E -e '/\$\(MAKE\) (release-binaries|release-templates|release-policies)/d' Makefile + sed -i -E -e '/cp metadata.yaml/d' Makefile + + # To allow the above changes since normally the Makefile wouldn't allow a dirty Git repo + sed -i -e '/Your local git repository contains uncommitted changes/d' Makefile + + (set -x; make PREVIOUS_VERSION="${RELEASE_TAG}" RELEASE_TAG="${RELEASE_TAG}" release) + + # Instead of `make VERSION="${RELEASE_TAG}" create-gh-release upload-gh-artifacts`, which requires GitHub CLI + # authentication, use an action which does the same. + - name: Release + uses: softprops/action-gh-release@de2c0eb89ae2a093876385947365aca7b0e5f844 # tag=v1 + with: + draft: true + files: out/* + body: "This fork does not provide release changelogs." + # `name` not needed since this takes the tag by default (which we also use above as ${RELEASE_TAG}) diff --git a/README.md b/README.md index b62fde62c3..dddceb0f88 100644 --- a/README.md +++ b/README.md @@ -1,230 +1,112 @@ -# Kubernetes Cluster API Provider AWS +# Cluster API Provider AWS -

-Powered by AWS Cloud Computing -

-

- - - - - - - - - - - - -

+This is Giant Swarm's fork. See the upstream [cluster-api-provider-aws README](https://github.com/kubernetes-sigs/cluster-api-provider-aws/blob/main/README.md) for official documentation. ------- +## How to work with this repo -Kubernetes-native declarative infrastructure for AWS. +Currently, we try to follow the upstream `release-X.Y` branch to always get the latest stable release and fixes, but not untested commits from `main`. Our only differences against upstream should be in this `README.md`, `.circleci/` and `.github/workflows/release.yml`. Other changes should be opened as PR for the upstream project first. -## What is the Cluster API Provider AWS +We release cluster-api-provider-aws versions with [cluster-api-provider-aws-app](https://github.com/giantswarm/cluster-api-provider-aws-app/). To provide the YAML manifests, we use GitHub releases as the upstream project. The scripts in `cluster-api-provider-aws-app` convert them into the final manifests. -The [Cluster API][cluster_api] brings -declarative, Kubernetes-style APIs to cluster creation, configuration and -management. +### Repo setup -The API itself is shared across multiple cloud providers allowing for true AWS -hybrid deployments of Kubernetes. It is built atop the lessons learned from -previous cluster managers such as [kops][kops] and -[kubicorn][kubicorn]. +Since we follow upstream, add their Git repo as remote from which we merge commits: -## Documentation - -Please see our [book](https://cluster-api-aws.sigs.k8s.io) for in-depth documentation. - -## Launching a Kubernetes cluster on AWS - -Check out the [Cluster API Quick Start](https://cluster-api.sigs.k8s.io/user/quick-start.html) for launching a -cluster on AWS. - -## Features +```sh +git clone git@github.com:giantswarm/cluster-api-provider-aws.git +cd cluster-api-provider-aws +git remote add upstream https://github.com/kubernetes-sigs/cluster-api-provider-aws.git +``` -- Native Kubernetes manifests and API -- Manages the bootstrapping of VPCs, gateways, security groups and instances. -- Choice of Linux distribution among Amazon Linux 2, CentOS 7, Ubuntu(18.04, 20.04) and Flatcar - using [pre-baked AMIs][published_amis]. -- Deploys Kubernetes control planes into private subnets with a separate - bastion server. -- Doesn't use SSH for bootstrapping nodes. -- Installs only the minimal components to bootstrap a control plane and workers. -- Supports control planes on EC2 instances. -- [EKS support][eks_support] +### Test and release ------- +If you have a non-urgent fix, create an upstream PR and wait until it gets released. We call this release `vX.Y.Z` in the below instructions, so please fill in the desired tag. -## Compatibility with Cluster API and Kubernetes Versions +Please follow the development workflow: -This provider's versions are compatible with the following versions of Cluster API -and support all Kubernetes versions that is supported by its compatible Cluster API version: +- Ensure a stable release branch exists in our fork repo. For example with a desired upstream release v2.2.1, the branch is `release-2.2`. If it does not exist on our side yet, copy the branch from upstream and add our changes such as `README.md` and `.circleci/` on top. +- Create a working branch for your changes +- We want to use stable upstream release tags unless a hotfix is absolutely required ([decision](https://intranet.giantswarm.io/docs/product/pdr/010_fork_management/)). Please decide what type of change you're making: -| | Cluster API v1alpha4 (v0.4) | Cluster API v1beta1 (v1.x) | -| --------------------------- | :-------------------------: | :-------------------------: | -| CAPA v1alpha4 `(v0.7)` | ✓ | ☓ | -| CAPA v1beta1 `(v1.x)` | ☓ | ✓ | -| CAPA v1beta2 `(v2.x, main)`| ☓ | ✓ | + - Either: you want to merge and test the latest upstream tag -(See [Kubernetes support matrix][cluster-api-supported-v] of Cluster API versions). + ```sh + # Get latest changes on our release branch + git checkout release-X.Y + git pull ------- + git fetch upstream -## Kubernetes versions with published AMIs + git checkout -b my-working-branch release-X.Y -See [amis] for the list of most recently published AMIs. + # Create a merge commit using upstream's desired release tag (the one we want + # to upgrade to). + # This creates a commit message such as "Merge tag 'v2.2.1' into release-2.2". + git merge --no-ff vX.Y.Z ------- + # Since we want the combined content of our repo and the upstream Git tag, + # we need to create our own tag on the merge commit + git tag "vX.Y.Z-gs-$(git rev-parse --short HEAD)" -## clusterawsadm + # Push your working branch. This triggers image build in CircleCI. + git push -`clusterawsadm` CLI tool provides bootstrapping, AMI, EKS, and controller related helpers. + # Push your Giant Swarm tag (assuming `origin` is the Giant Swarm fork). + # This triggers the GitHub release action - please continue reading below! + git push origin "vX.Y.Z-gs-$(git rev-parse --short HEAD)" + ``` -`clusterawsadm` binaries are released with each release, can be found under [assets](https://github.com/kubernetes-sigs/cluster-api-provider-aws/releases/latest) section. + - Or: you want to implement something else, such as working on some issue that we have which is not fixed in upstream yet. Note that for testing changes to upstream, you probably better base your work on the `upstream/main` branch and try your change together with the latest commits from upstream. This also avoids merge conflicts. Maintainers can then help you cherry-pick into their release branches. The latest release branch is usually a bit behind `main`. -`clusterawsadm` could also be installed via Homebrew on macOS and linux OS. -Install the latest release using homebrew: + ```sh + # Get latest changes on our release branch + git checkout release-X.Y + git pull -```shell -brew install clusterawsadm -``` + git checkout -b my-working-branch release-X.Y # or based on `main` instead of `release-X.Y`, see hint above -Test to ensure the version you installed is up-to-date: + # Make some changes and commit as usual + git commit -```shell -clusterawsadm version -``` + git tag "vX.Y.Z-gs-$(git rev-parse --short HEAD)" ------- + # Push your working branch. This triggers image build in CircleCI + git push -## Getting involved and contributing + # Push your Giant Swarm tag (assuming `origin` is the Giant Swarm fork). + # This triggers the GitHub release action - please continue reading below! + git push origin "vX.Y.Z-gs-$(git rev-parse --short HEAD)" + ``` -Are you interested in contributing to cluster-api-provider-aws? We, the -maintainers and community, would love your suggestions, contributions, and help! -Also, the maintainers can be contacted at any time to learn more about how to get -involved. +- Check that the [CircleCI pipeline](https://app.circleci.com/pipelines/github/giantswarm/cluster-api-provider-aws) succeeds for the desired Git tag in order to produce images. If the tag build fails, fix it. +- Check that the [GitHub release action](https://github.com/giantswarm/cluster-api-provider-aws/actions) for the `vX.Y.Z-gs-...` tag succeeds +- Edit [that draft GitHub release](https://github.com/giantswarm/cluster-api-provider-aws/releases) and turn it from draft to released. This makes the release's manifest files available on the internet, as used in [cluster-api-provider-aws-app](https://github.com/giantswarm/cluster-api-provider-aws-app). +- Test the changes in the app -In the interest of getting more new people involved we tag issues with -[`good first issue`][good_first_issue]. -These are typically issues that have smaller scope but are good ways to start -to get acquainted with the codebase. + - Replace `.tag` in [cluster-api-provider-aws-app's `values.yaml`](https://github.com/giantswarm/cluster-api-provider-aws-app/blob/master/helm/cluster-api-provider-aws/values.yaml) with the new tag `vX.Y.Z-gs-...`. + - Run `cd cluster-api-provider-aws-app && make generate` to update manifests + - Commit and push your working branch for `cluster-api-provider-aws-app` to trigger CircleCI pipeline + - Install and test the app thoroughly on a management cluster. Continue with the next step only once you're confident. +- Open PR for `cluster-api-provider-aws` fork (your working branch) -We also encourage ALL active community participants to act as if they are -maintainers, even if you don't have "official" write permissions. This is a -community effort, we are here to serve the Kubernetes community. If you have an -active interest and you want to get involved, you have real power! Don't assume -that the only people who can get things done around here are the "maintainers". + - If you merged an upstream release tag, we should target our `release-X.Y` branch with the PR. + - On the other hand, if you implemented something else which is not in upstream yet, we should target `upstream/main` so that it first lands in the upstream project, officially approved, tested and released. Afterwards, you would repeat this whole procedure and merge the release that includes your fix. For a quick in-house hotfix, you can alternatively do a quicker PR targeted against our `release-X.Y` branch. +- Also open PR for `cluster-api-provider-aws-app` change +- Once merged, manually bump the version in the respective collection to deploy it for one provider (e.g. [capa-app-collection](https://github.com/giantswarm/capa-app-collection/)) -We also would love to add more "official" maintainers, so show us what you can -do! +### Keep fork customizations up to date -This repository uses the Kubernetes bots. See a full list of the commands [here][prow]. +Only `README.md`, `.circleci/` and `.github/workflows/release.yml` should differ between upstream and our fork, so the diff of everything else should be empty, or at worst, contain hotfixes that are not in upstream yet: -### Build the images locally +```sh +git fetch upstream +git diff `# the upstream tag we merged recently` vX.Y.Z..origin/release-X.Y `# our release branch` -- ':!.circleci/' "!.github/workflows/release.yml' ':!README.md' +``` -If you want to just build the CAPA containers locally, run +And we should also keep our `main` and `release-X.Y` branches in sync, so this diff should be empty: -```shell - REGISTRY=docker.io/my-reg make docker-build +```sh +git diff main..release-X.Y -- .circleci/ .github/workflows/release.yml README.md ``` -### Tilt-based development environment - -See [development][development] section for details. - -### Implementer office hours - -Maintainers hold office hours every two weeks, with sessions open to all -developers working on this project. - -Office hours are hosted on a zoom video chat every other Monday -at 09:00 (Pacific) / 12:00 (Eastern) / 17:00 (Europe/London), -and are published on the [Kubernetes community meetings calendar][gcal]. - -### Other ways to communicate with the contributors - -Please check in with us in the [#cluster-api-aws][slack] channel on Slack. - -## Github issues - -### Bugs - -If you think you have found a bug please follow the instructions below. - -- Please spend a small amount of time giving due diligence to the issue tracker. Your issue might be a duplicate. -- Get the logs from the cluster controllers. Please paste this into your issue. -- Open a [new issue][new_issue]. -- Remember that users might be searching for your issue in the future, so please give it a meaningful title to help others. -- Feel free to reach out to the cluster-api community on the [kubernetes slack][slack]. - -### Tracking new features - -We also use the issue tracker to track features. If you have an idea for a feature, or think you can help kops become even more awesome follow the steps below. - -- Open a [new issue][new_issue]. -- Remember that users might be searching for your issue in the future, so please - give it a meaningful title to help others. -- Clearly define the use case, using concrete examples. EG: I type `this` and - cluster-api-provider-aws does `that`. -- Some of our larger features will require some design. If you would like to - include a technical design for your feature please include it in the issue. -- After the new feature is well understood, and the design agreed upon, we can - start coding the feature. We would love for you to code it. So please open - up a **WIP** *(work in progress)* pull request, and happy coding. - ->“Amazon Web Services, AWS, and the “Powered by AWS” logo materials are -trademarks of Amazon.com, Inc. or its affiliates in the United States -and/or other countries." - -## Our Contributors - -Thank you to all contributors and a special thanks to our current maintainers & reviewers: - -| Maintainers | Reviewers | -|------------------------------------------------------------------| -------------------------------------------------------------------- | -| [@richardcase](https://github.com/richardcase) (from 2020-12-04) | [@cnmcavoy](https://github.com/cnmcavoy) (from 2023-10-16) | -| [@Ankitasw](https://github.com/Ankitasw) (from 2022-10-19) | [@AverageMarcus](https://github.com/AverageMarcus) (from 2022-10-19) | -| [@dlipovetsky](https://github.com/dlipovetsky) (from 2021-10-31) | [@luthermonson](https://github.com/luthermonson ) (from 2023-03-08) | -| [@vincepri](https://github.com/vincepri) (og & from 2023-10-16) | [@nrb](https://github.com/nrb) (from 2023-10-16) | -| | [@faiq](https://github.com/faiq) (from 2023-10-16) | -| | [@fiunchinho](https://github.com/fiunchinho) (from 2023-11-6) | -| | [@AndiDog](https://github.com/AndiDog) (from 2023-12-13) | - -and the previous/emeritus maintainers & reviewers: - -| Emeritus Maintainers | Emeritus Reviewers | -|------------------------------------------------------|--------------------------------------------------------| -| [@chuckha](https://github.com/chuckha) | [@ashish-amarnath](https://github.com/ashish-amarnath) | -| [@detiber](https://github.com/detiber) | [@davidewatson](https://github.com/davidewatson) | -| [@ncdc](https://github.com/ncdc) | [@enxebre](https://github.com/enxebre) | -| [@randomvariable](https://github.com/randomvariable) | [@ingvagabund](https://github.com/ingvagabund) | -| [@rudoi](https://github.com/rudoi) | [@michaelbeaumont](https://github.com/michaelbeaumont) | -| [@sedefsavas](https://github.com/sedefsavas) | [@sethp-nr](https://github.com/sethp-nr) | -| [@Skarlso](https://github.com/Skarlso) | [@shivi28](https://github.com/shivi28) | -| | [@dthorsen](https://github.com/dthorsen) | -| | [@pydctw](https://github.com/pydctw) | - -All the CAPA contributors: - -

- - - -

- - -[slack]: https://kubernetes.slack.com/messages/CD6U2V71N -[good_first_issue]: https://github.com/kubernetes-sigs/cluster-api-provider-aws/issues?q=is%3Aissue+is%3Aopen+sort%3Aupdated-desc+label%3A%22good+first+issue%22 -[gcal]: https://calendar.google.com/calendar/embed?src=cgnt364vd8s86hr2phapfjc6uk%40group.calendar.google.com -[prow]: https://go.k8s.io/bot-commands -[new_issue]: https://github.com/kubernetes-sigs/cluster-api-provider-aws/issues/new -[cluster_api]: https://github.com/kubernetes-sigs/cluster-api -[kops]: https://github.com/kubernetes/kops -[kubicorn]: http://kubicorn.io/ -[amis]: https://cluster-api-aws.sigs.k8s.io/topics/images/amis.html -[published_amis]: https://cluster-api-aws.sigs.k8s.io/topics/images/built-amis.html -[eks_support]: https://cluster-api-aws.sigs.k8s.io/topics/eks/index.html -[cluster-api-supported-v]: https://cluster-api.sigs.k8s.io/reference/versions.html -[development]: https://cluster-api-aws.sigs.k8s.io/development/development.html +If this shows any output, please align the `main` branch with the release branches. From bbbb36a7a7bd050bb232bfa60a10b4202ac99850 Mon Sep 17 00:00:00 2001 From: calvix Date: Wed, 17 Jan 2024 13:41:23 +0100 Subject: [PATCH 2/7] aws-cni-deleted-helm-managed-resources --- pkg/cloud/services/awsnode/cni.go | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/pkg/cloud/services/awsnode/cni.go b/pkg/cloud/services/awsnode/cni.go index 25211e6062..82e6ff56b2 100644 --- a/pkg/cloud/services/awsnode/cni.go +++ b/pkg/cloud/services/awsnode/cni.go @@ -31,7 +31,6 @@ import ( "k8s.io/apimachinery/pkg/types" "k8s.io/klog/v2" "sigs.k8s.io/controller-runtime/pkg/client" - "sigs.k8s.io/kustomize/api/konfig" infrav1 "sigs.k8s.io/cluster-api-provider-aws/v2/api/v1beta2" "sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud/awserrors" @@ -273,20 +272,16 @@ func (s *Service) deleteResource(ctx context.Context, remoteClient client.Client } s.scope.Debug(fmt.Sprintf("resource %s was not found, no action", key)) } else { - // resource found, delete if no label or not managed by helm - if val, ok := obj.GetLabels()[konfig.ManagedbyLabelKey]; !ok || val != "Helm" { - if err := remoteClient.Delete(ctx, obj, &client.DeleteOptions{}); err != nil { - if !apierrors.IsNotFound(err) { - return fmt.Errorf("deleting %s: %w", key, err) - } - s.scope.Debug(fmt.Sprintf( - "resource %s was not found, not deleted", key)) - } else { - s.scope.Debug(fmt.Sprintf("resource %s was deleted", key)) + if err := remoteClient.Delete(ctx, obj, &client.DeleteOptions{}); err != nil { + if !apierrors.IsNotFound(err) { + return fmt.Errorf("deleting %s: %w", key, err) } + s.scope.Debug(fmt.Sprintf( + "resource %s was not found, not deleted", key)) } else { - s.scope.Debug(fmt.Sprintf("resource %s is managed by helm, not deleted", key)) + s.scope.Debug(fmt.Sprintf("resource %s was deleted", key)) } + } return nil From edd14fb6bd39434a249ed8696e0abeb9b85f6c35 Mon Sep 17 00:00:00 2001 From: Mario Nitchev Date: Tue, 13 Feb 2024 17:05:36 +0200 Subject: [PATCH 3/7] Filter CNI subnets when creating EKS NodeGroup --- api/v1beta2/network_types.go | 12 ++++++++++++ pkg/cloud/scope/shared.go | 1 + 2 files changed, 13 insertions(+) diff --git a/api/v1beta2/network_types.go b/api/v1beta2/network_types.go index cd3042b717..cc81082ad3 100644 --- a/api/v1beta2/network_types.go +++ b/api/v1beta2/network_types.go @@ -715,6 +715,18 @@ func (s Subnets) FilterPrivate() (res Subnets) { return } +// FilterPrimary returns a slice containing all subnets that do not have the +// sigs.k8s.io/cluster-api-provider-aws/association: secondary tag. These +// subnets are intended for the CNI. +func (s Subnets) FilterPrimary() (res Subnets) { + for _, x := range s { + if x.Tags[NameAWSSubnetAssociation] != SecondarySubnetTagValue { + res = append(res, x) + } + } + return +} + // FilterPublic returns a slice containing all subnets marked as public. func (s Subnets) FilterPublic() (res Subnets) { for _, x := range s { diff --git a/pkg/cloud/scope/shared.go b/pkg/cloud/scope/shared.go index 865ebfaf52..50e0529fb0 100644 --- a/pkg/cloud/scope/shared.go +++ b/pkg/cloud/scope/shared.go @@ -122,6 +122,7 @@ func (p *defaultSubnetPlacementStrategy) getSubnetsForAZs(azs []string, controlP subnets = subnets.FilterPublic() case expinfrav1.AZSubnetTypePrivate: subnets = subnets.FilterPrivate() + subnets = subnets.FilterPrimary() } } if len(subnets) == 0 { From ab0528f4b80ed36fd7b508c4137ee09d41fefa8c Mon Sep 17 00:00:00 2001 From: Mario Nitchev Date: Thu, 25 Apr 2024 09:44:11 +0300 Subject: [PATCH 4/7] Add non root volumes to AWSMachineTemplate --- ...ture.cluster.x-k8s.io_awsmachinepools.yaml | 45 +++++++++++++++++++ ...uster.x-k8s.io_awsmanagedmachinepools.yaml | 45 +++++++++++++++++++ exp/api/v1beta1/conversion.go | 3 ++ exp/api/v1beta1/zz_generated.conversion.go | 1 + exp/api/v1beta2/types.go | 4 ++ exp/api/v1beta2/zz_generated.deepcopy.go | 7 +++ pkg/cloud/scope/launchtemplate.go | 5 ++- pkg/cloud/services/ec2/launchtemplate.go | 24 ++++++++-- 8 files changed, 128 insertions(+), 6 deletions(-) diff --git a/config/crd/bases/infrastructure.cluster.x-k8s.io_awsmachinepools.yaml b/config/crd/bases/infrastructure.cluster.x-k8s.io_awsmachinepools.yaml index 7b6acd1ccc..1892d984f2 100644 --- a/config/crd/bases/infrastructure.cluster.x-k8s.io_awsmachinepools.yaml +++ b/config/crd/bases/infrastructure.cluster.x-k8s.io_awsmachinepools.yaml @@ -758,6 +758,51 @@ spec: - resource-name type: string type: object + nonRootVolumes: + description: Configuration options for the non root storage volumes. + items: + description: Volume encapsulates the configuration options for + the storage device. + properties: + deviceName: + description: Device name + type: string + encrypted: + description: Encrypted is whether the volume should be encrypted + or not. + type: boolean + encryptionKey: + description: EncryptionKey is the KMS key to use to encrypt + the volume. Can be either a KMS key ID or ARN. If Encrypted + is set and this is omitted, the default AWS key will be + used. The key must already exist and be accessible by + the controller. + type: string + iops: + description: IOPS is the number of IOPS requested for the + disk. Not applicable to all types. + format: int64 + type: integer + size: + description: Size specifies size (in Gi) of the storage + device. Must be greater than the image snapshot size or + 8 (whichever is greater). + format: int64 + minimum: 8 + type: integer + throughput: + description: Throughput to provision in MiB/s supported + for the volume type. Not applicable to all types. + format: int64 + type: integer + type: + description: Type is the type of the volume (e.g. gp2, io1, + etc...). + type: string + required: + - size + type: object + type: array rootVolume: description: RootVolume encapsulates the configuration options for the root volume diff --git a/config/crd/bases/infrastructure.cluster.x-k8s.io_awsmanagedmachinepools.yaml b/config/crd/bases/infrastructure.cluster.x-k8s.io_awsmanagedmachinepools.yaml index 3b4e76e87c..01f722f849 100644 --- a/config/crd/bases/infrastructure.cluster.x-k8s.io_awsmanagedmachinepools.yaml +++ b/config/crd/bases/infrastructure.cluster.x-k8s.io_awsmanagedmachinepools.yaml @@ -754,6 +754,51 @@ spec: - resource-name type: string type: object + nonRootVolumes: + description: Configuration options for the non root storage volumes. + items: + description: Volume encapsulates the configuration options for + the storage device. + properties: + deviceName: + description: Device name + type: string + encrypted: + description: Encrypted is whether the volume should be encrypted + or not. + type: boolean + encryptionKey: + description: EncryptionKey is the KMS key to use to encrypt + the volume. Can be either a KMS key ID or ARN. If Encrypted + is set and this is omitted, the default AWS key will be + used. The key must already exist and be accessible by + the controller. + type: string + iops: + description: IOPS is the number of IOPS requested for the + disk. Not applicable to all types. + format: int64 + type: integer + size: + description: Size specifies size (in Gi) of the storage + device. Must be greater than the image snapshot size or + 8 (whichever is greater). + format: int64 + minimum: 8 + type: integer + throughput: + description: Throughput to provision in MiB/s supported + for the volume type. Not applicable to all types. + format: int64 + type: integer + type: + description: Type is the type of the volume (e.g. gp2, io1, + etc...). + type: string + required: + - size + type: object + type: array rootVolume: description: RootVolume encapsulates the configuration options for the root volume diff --git a/exp/api/v1beta1/conversion.go b/exp/api/v1beta1/conversion.go index 16cf651fdf..18508949d5 100644 --- a/exp/api/v1beta1/conversion.go +++ b/exp/api/v1beta1/conversion.go @@ -56,6 +56,7 @@ func (src *AWSMachinePool) ConvertTo(dstRaw conversion.Hub) error { } dst.Spec.DefaultInstanceWarmup = restored.Spec.DefaultInstanceWarmup + dst.Spec.AWSLaunchTemplate.NonRootVolumes = restored.Spec.AWSLaunchTemplate.NonRootVolumes return nil } @@ -105,6 +106,8 @@ func (src *AWSManagedMachinePool) ConvertTo(dstRaw conversion.Hub) error { if restored.Spec.AWSLaunchTemplate.PrivateDNSName != nil { dst.Spec.AWSLaunchTemplate.PrivateDNSName = restored.Spec.AWSLaunchTemplate.PrivateDNSName } + + dst.Spec.AWSLaunchTemplate.NonRootVolumes = restored.Spec.AWSLaunchTemplate.NonRootVolumes } if restored.Spec.AvailabilityZoneSubnetType != nil { dst.Spec.AvailabilityZoneSubnetType = restored.Spec.AvailabilityZoneSubnetType diff --git a/exp/api/v1beta1/zz_generated.conversion.go b/exp/api/v1beta1/zz_generated.conversion.go index 869a3c13d4..4f50a8c20f 100644 --- a/exp/api/v1beta1/zz_generated.conversion.go +++ b/exp/api/v1beta1/zz_generated.conversion.go @@ -402,6 +402,7 @@ func autoConvert_v1beta2_AWSLaunchTemplate_To_v1beta1_AWSLaunchTemplate(in *v1be out.ImageLookupBaseOS = in.ImageLookupBaseOS out.InstanceType = in.InstanceType out.RootVolume = (*apiv1beta2.Volume)(unsafe.Pointer(in.RootVolume)) + // WARNING: in.NonRootVolumes requires manual conversion: does not exist in peer-type out.SSHKeyName = (*string)(unsafe.Pointer(in.SSHKeyName)) out.VersionNumber = (*int64)(unsafe.Pointer(in.VersionNumber)) out.AdditionalSecurityGroups = *(*[]apiv1beta2.AWSResourceReference)(unsafe.Pointer(&in.AdditionalSecurityGroups)) diff --git a/exp/api/v1beta2/types.go b/exp/api/v1beta2/types.go index ef589c2951..0bc4009a2e 100644 --- a/exp/api/v1beta2/types.go +++ b/exp/api/v1beta2/types.go @@ -96,6 +96,10 @@ type AWSLaunchTemplate struct { // +optional RootVolume *infrav1.Volume `json:"rootVolume,omitempty"` + // Configuration options for the non root storage volumes. + // +optional + NonRootVolumes []infrav1.Volume `json:"nonRootVolumes,omitempty"` + // SSHKeyName is the name of the ssh key to attach to the instance. Valid values are empty string // (do not use SSH keys), a valid SSH key name, or omitted (use the default SSH key name) // +optional diff --git a/exp/api/v1beta2/zz_generated.deepcopy.go b/exp/api/v1beta2/zz_generated.deepcopy.go index a916ebc059..7048d53196 100644 --- a/exp/api/v1beta2/zz_generated.deepcopy.go +++ b/exp/api/v1beta2/zz_generated.deepcopy.go @@ -96,6 +96,13 @@ func (in *AWSLaunchTemplate) DeepCopyInto(out *AWSLaunchTemplate) { *out = new(apiv1beta2.Volume) (*in).DeepCopyInto(*out) } + if in.NonRootVolumes != nil { + in, out := &in.NonRootVolumes, &out.NonRootVolumes + *out = make([]apiv1beta2.Volume, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } if in.SSHKeyName != nil { in, out := &in.SSHKeyName, &out.SSHKeyName *out = new(string) diff --git a/pkg/cloud/scope/launchtemplate.go b/pkg/cloud/scope/launchtemplate.go index fb2df8b59f..af7651b963 100644 --- a/pkg/cloud/scope/launchtemplate.go +++ b/pkg/cloud/scope/launchtemplate.go @@ -21,11 +21,12 @@ import ( "k8s.io/apimachinery/pkg/types" "sigs.k8s.io/controller-runtime/pkg/client" + expclusterv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1" + "sigs.k8s.io/cluster-api/util/conditions" + infrav1 "sigs.k8s.io/cluster-api-provider-aws/v2/api/v1beta2" expinfrav1 "sigs.k8s.io/cluster-api-provider-aws/v2/exp/api/v1beta2" "sigs.k8s.io/cluster-api-provider-aws/v2/pkg/logger" - expclusterv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1" - "sigs.k8s.io/cluster-api/util/conditions" ) // LaunchTemplateScope defines a scope defined around a launch template. diff --git a/pkg/cloud/services/ec2/launchtemplate.go b/pkg/cloud/services/ec2/launchtemplate.go index bb04605475..d5caeb26f9 100644 --- a/pkg/cloud/services/ec2/launchtemplate.go +++ b/pkg/cloud/services/ec2/launchtemplate.go @@ -32,6 +32,9 @@ import ( apimachinerytypes "k8s.io/apimachinery/pkg/types" "k8s.io/utils/ptr" + clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" + "sigs.k8s.io/cluster-api/util/conditions" + infrav1 "sigs.k8s.io/cluster-api-provider-aws/v2/api/v1beta2" expinfrav1 "sigs.k8s.io/cluster-api-provider-aws/v2/exp/api/v1beta2" "sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud/awserrors" @@ -39,8 +42,6 @@ import ( "sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud/services" "sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud/services/userdata" "sigs.k8s.io/cluster-api-provider-aws/v2/pkg/record" - clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" - "sigs.k8s.io/cluster-api/util/conditions" ) const ( @@ -520,6 +521,8 @@ func (s *Service) createLaunchTemplateData(scope scope.LaunchTemplateScope, imag data.InstanceMarketOptions = getLaunchTemplateInstanceMarketOptionsRequest(scope.GetLaunchTemplate().SpotMarketOptions) data.PrivateDnsNameOptions = getLaunchTemplatePrivateDNSNameOptionsRequest(scope.GetLaunchTemplate().PrivateDNSName) + blockDeviceMappings := []*ec2.LaunchTemplateBlockDeviceMappingRequest{} + // Set up root volume if lt.RootVolume != nil { rootDeviceName, err := s.checkRootVolume(lt.RootVolume, *data.ImageId) @@ -530,9 +533,22 @@ func (s *Service) createLaunchTemplateData(scope scope.LaunchTemplateScope, imag lt.RootVolume.DeviceName = aws.StringValue(rootDeviceName) req := volumeToLaunchTemplateBlockDeviceMappingRequest(lt.RootVolume) - data.BlockDeviceMappings = []*ec2.LaunchTemplateBlockDeviceMappingRequest{ - req, + blockDeviceMappings = append(blockDeviceMappings, req) + } + + for vi := range lt.NonRootVolumes { + nonRootVolume := lt.NonRootVolumes[vi] + + if nonRootVolume.DeviceName == "" { + return nil, errors.Errorf("non root volume should have device name specified") } + + blockDeviceMapping := volumeToLaunchTemplateBlockDeviceMappingRequest(&nonRootVolume) + blockDeviceMappings = append(blockDeviceMappings, blockDeviceMapping) + } + + if len(blockDeviceMappings) > 0 { + data.BlockDeviceMappings = blockDeviceMappings } data.TagSpecifications = s.buildLaunchTemplateTagSpecificationRequest(scope, userDataSecretKey) From ae00674b1ed47e0e939f44d4712507839df97106 Mon Sep 17 00:00:00 2001 From: Andreas Sommer Date: Tue, 30 Apr 2024 11:16:39 +0200 Subject: [PATCH 5/7] Support adding custom secondary VPC CIDR blocks in `AWSCluster` (backport) (#590) --- api/v1beta1/awscluster_conversion.go | 1 + api/v1beta1/zz_generated.conversion.go | 1 + api/v1beta2/awscluster_webhook.go | 8 + api/v1beta2/network_types.go | 20 +- api/v1beta2/zz_generated.deepcopy.go | 20 ++ ...ster.x-k8s.io_awsmanagedcontrolplanes.yaml | 40 ++++ ...tructure.cluster.x-k8s.io_awsclusters.yaml | 20 ++ ....cluster.x-k8s.io_awsclustertemplates.yaml | 21 ++ .../v1beta2/awsmanagedcontrolplane_webhook.go | 40 +++- .../awsmanagedcontrolplane_webhook_test.go | 43 +++- pkg/cloud/scope/cluster.go | 11 + pkg/cloud/scope/managedcontrolplane.go | 22 ++ pkg/cloud/scope/network.go | 8 +- pkg/cloud/scope/shared.go | 7 +- pkg/cloud/scope/shared_test.go | 8 + .../services/autoscaling/autoscalinggroup.go | 6 + pkg/cloud/services/ec2/instances.go | 15 +- pkg/cloud/services/elb/loadbalancer.go | 8 +- pkg/cloud/services/network/natgateways.go | 6 +- pkg/cloud/services/network/network.go | 6 +- pkg/cloud/services/network/secondarycidr.go | 70 +++--- .../services/network/secondarycidr_test.go | 219 +++++++++++++++--- 22 files changed, 502 insertions(+), 98 deletions(-) diff --git a/api/v1beta1/awscluster_conversion.go b/api/v1beta1/awscluster_conversion.go index 382a4cd4d3..c3f196e1f3 100644 --- a/api/v1beta1/awscluster_conversion.go +++ b/api/v1beta1/awscluster_conversion.go @@ -104,6 +104,7 @@ func (src *AWSCluster) ConvertTo(dstRaw conversion.Hub) error { dst.Spec.NetworkSpec.VPC.EmptyRoutesDefaultVPCSecurityGroup = restored.Spec.NetworkSpec.VPC.EmptyRoutesDefaultVPCSecurityGroup dst.Spec.NetworkSpec.VPC.PrivateDNSHostnameTypeOnLaunch = restored.Spec.NetworkSpec.VPC.PrivateDNSHostnameTypeOnLaunch dst.Spec.NetworkSpec.VPC.CarrierGatewayID = restored.Spec.NetworkSpec.VPC.CarrierGatewayID + dst.Spec.NetworkSpec.VPC.SecondaryCidrBlocks = restored.Spec.NetworkSpec.VPC.SecondaryCidrBlocks // Restore SubnetSpec.ResourceID, SubnetSpec.ParentZoneName, and SubnetSpec.ZoneType fields, if any. for _, subnet := range restored.Spec.NetworkSpec.Subnets { diff --git a/api/v1beta1/zz_generated.conversion.go b/api/v1beta1/zz_generated.conversion.go index 10842bb9ae..ba1fcc23a2 100644 --- a/api/v1beta1/zz_generated.conversion.go +++ b/api/v1beta1/zz_generated.conversion.go @@ -2296,6 +2296,7 @@ func Convert_v1beta1_VPCSpec_To_v1beta2_VPCSpec(in *VPCSpec, out *v1beta2.VPCSpe func autoConvert_v1beta2_VPCSpec_To_v1beta1_VPCSpec(in *v1beta2.VPCSpec, out *VPCSpec, s conversion.Scope) error { out.ID = in.ID out.CidrBlock = in.CidrBlock + // WARNING: in.SecondaryCidrBlocks requires manual conversion: does not exist in peer-type // WARNING: in.IPAMPool requires manual conversion: does not exist in peer-type if in.IPv6 != nil { in, out := &in.IPv6, &out.IPv6 diff --git a/api/v1beta2/awscluster_webhook.go b/api/v1beta2/awscluster_webhook.go index ae9c80f5b4..e67434fc7a 100644 --- a/api/v1beta2/awscluster_webhook.go +++ b/api/v1beta2/awscluster_webhook.go @@ -269,6 +269,14 @@ func (r *AWSCluster) validateNetwork() field.ErrorList { } } + secondaryCidrBlocks := r.Spec.NetworkSpec.VPC.SecondaryCidrBlocks + secondaryCidrBlocksField := field.NewPath("spec", "network", "vpc", "secondaryCidrBlocks") + for _, cidrBlock := range secondaryCidrBlocks { + if r.Spec.NetworkSpec.VPC.CidrBlock != "" && r.Spec.NetworkSpec.VPC.CidrBlock == cidrBlock.IPv4CidrBlock { + allErrs = append(allErrs, field.Invalid(secondaryCidrBlocksField, secondaryCidrBlocks, fmt.Sprintf("AWSCluster.spec.network.vpc.secondaryCidrBlocks must not contain the primary AWSCluster.spec.network.vpc.cidrBlock %v", r.Spec.NetworkSpec.VPC.CidrBlock))) + } + } + return allErrs } diff --git a/api/v1beta2/network_types.go b/api/v1beta2/network_types.go index cc81082ad3..0ce298c990 100644 --- a/api/v1beta2/network_types.go +++ b/api/v1beta2/network_types.go @@ -388,6 +388,13 @@ type IPAMPool struct { NetmaskLength int64 `json:"netmaskLength,omitempty"` } +// VpcCidrBlock defines the CIDR block and settings to associate with the managed VPC. Currently, only IPv4 is supported. +type VpcCidrBlock struct { + // IPv4CidrBlock is the IPv4 CIDR block to associate with the managed VPC. + // +kubebuilder:validation:MinLength=1 + IPv4CidrBlock string `json:"ipv4CidrBlock"` +} + // VPCSpec configures an AWS VPC. type VPCSpec struct { // ID is the vpc-id of the VPC this provider should use to create resources. @@ -398,6 +405,12 @@ type VPCSpec struct { // Mutually exclusive with IPAMPool. CidrBlock string `json:"cidrBlock,omitempty"` + // SecondaryCidrBlocks are additional CIDR blocks to be associated when the provider creates a managed VPC. + // Defaults to none. Mutually exclusive with IPAMPool. This makes sense to use if, for example, you want to use + // a separate IP range for pods (e.g. Cilium ENI mode). + // +optional + SecondaryCidrBlocks []VpcCidrBlock `json:"secondaryCidrBlocks,omitempty"` + // IPAMPool defines the IPAMv4 pool to be used for VPC. // Mutually exclusive with CidrBlock. IPAMPool *IPAMPool `json:"ipamPool,omitempty"` @@ -715,10 +728,9 @@ func (s Subnets) FilterPrivate() (res Subnets) { return } -// FilterPrimary returns a slice containing all subnets that do not have the -// sigs.k8s.io/cluster-api-provider-aws/association: secondary tag. These -// subnets are intended for the CNI. -func (s Subnets) FilterPrimary() (res Subnets) { +// FilterNonCni returns the subnets that are NOT intended for usage with the CNI pod network +// (i.e. do NOT have the `sigs.k8s.io/cluster-api-provider-aws/association=secondary` tag). +func (s Subnets) FilterNonCni() (res Subnets) { for _, x := range s { if x.Tags[NameAWSSubnetAssociation] != SecondarySubnetTagValue { res = append(res, x) diff --git a/api/v1beta2/zz_generated.deepcopy.go b/api/v1beta2/zz_generated.deepcopy.go index 81b8a8d314..a477418bae 100644 --- a/api/v1beta2/zz_generated.deepcopy.go +++ b/api/v1beta2/zz_generated.deepcopy.go @@ -2115,6 +2115,11 @@ func (in *TargetGroupSpec) DeepCopy() *TargetGroupSpec { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *VPCSpec) DeepCopyInto(out *VPCSpec) { *out = *in + if in.SecondaryCidrBlocks != nil { + in, out := &in.SecondaryCidrBlocks, &out.SecondaryCidrBlocks + *out = make([]VpcCidrBlock, len(*in)) + copy(*out, *in) + } if in.IPAMPool != nil { in, out := &in.IPAMPool, &out.IPAMPool *out = new(IPAMPool) @@ -2193,3 +2198,18 @@ func (in *Volume) DeepCopy() *Volume { in.DeepCopyInto(out) return out } + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *VpcCidrBlock) DeepCopyInto(out *VpcCidrBlock) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new VpcCidrBlock. +func (in *VpcCidrBlock) DeepCopy() *VpcCidrBlock { + if in == nil { + return nil + } + out := new(VpcCidrBlock) + in.DeepCopyInto(out) + return out +} diff --git a/config/crd/bases/controlplane.cluster.x-k8s.io_awsmanagedcontrolplanes.yaml b/config/crd/bases/controlplane.cluster.x-k8s.io_awsmanagedcontrolplanes.yaml index c9ffb5ecd8..7f4260fee5 100644 --- a/config/crd/bases/controlplane.cluster.x-k8s.io_awsmanagedcontrolplanes.yaml +++ b/config/crd/bases/controlplane.cluster.x-k8s.io_awsmanagedcontrolplanes.yaml @@ -722,6 +722,26 @@ spec: - ip-name - resource-name type: string + secondaryCidrBlocks: + description: SecondaryCidrBlocks are additional CIDR blocks + to be associated when the provider creates a managed VPC. + Defaults to none. Mutually exclusive with IPAMPool. This + makes sense to use if, for example, you want to use a separate + IP range for pods (e.g. Cilium ENI mode). + items: + description: VpcCidrBlock defines the CIDR block and settings + to associate with the managed VPC. Currently, only IPv4 + is supported. + properties: + ipv4CidrBlock: + description: IPv4CidrBlock is the IPv4 CIDR block to + associate with the managed VPC. + minLength: 1 + type: string + required: + - ipv4CidrBlock + type: object + type: array tags: additionalProperties: type: string @@ -2672,6 +2692,26 @@ spec: - ip-name - resource-name type: string + secondaryCidrBlocks: + description: SecondaryCidrBlocks are additional CIDR blocks + to be associated when the provider creates a managed VPC. + Defaults to none. Mutually exclusive with IPAMPool. This + makes sense to use if, for example, you want to use a separate + IP range for pods (e.g. Cilium ENI mode). + items: + description: VpcCidrBlock defines the CIDR block and settings + to associate with the managed VPC. Currently, only IPv4 + is supported. + properties: + ipv4CidrBlock: + description: IPv4CidrBlock is the IPv4 CIDR block to + associate with the managed VPC. + minLength: 1 + type: string + required: + - ipv4CidrBlock + type: object + type: array tags: additionalProperties: type: string diff --git a/config/crd/bases/infrastructure.cluster.x-k8s.io_awsclusters.yaml b/config/crd/bases/infrastructure.cluster.x-k8s.io_awsclusters.yaml index f2d4b882b5..a63d28c2f1 100644 --- a/config/crd/bases/infrastructure.cluster.x-k8s.io_awsclusters.yaml +++ b/config/crd/bases/infrastructure.cluster.x-k8s.io_awsclusters.yaml @@ -1658,6 +1658,26 @@ spec: - ip-name - resource-name type: string + secondaryCidrBlocks: + description: SecondaryCidrBlocks are additional CIDR blocks + to be associated when the provider creates a managed VPC. + Defaults to none. Mutually exclusive with IPAMPool. This + makes sense to use if, for example, you want to use a separate + IP range for pods (e.g. Cilium ENI mode). + items: + description: VpcCidrBlock defines the CIDR block and settings + to associate with the managed VPC. Currently, only IPv4 + is supported. + properties: + ipv4CidrBlock: + description: IPv4CidrBlock is the IPv4 CIDR block to + associate with the managed VPC. + minLength: 1 + type: string + required: + - ipv4CidrBlock + type: object + type: array tags: additionalProperties: type: string diff --git a/config/crd/bases/infrastructure.cluster.x-k8s.io_awsclustertemplates.yaml b/config/crd/bases/infrastructure.cluster.x-k8s.io_awsclustertemplates.yaml index ccc966dbb2..f5ce74c7a7 100644 --- a/config/crd/bases/infrastructure.cluster.x-k8s.io_awsclustertemplates.yaml +++ b/config/crd/bases/infrastructure.cluster.x-k8s.io_awsclustertemplates.yaml @@ -1257,6 +1257,27 @@ spec: - ip-name - resource-name type: string + secondaryCidrBlocks: + description: SecondaryCidrBlocks are additional CIDR + blocks to be associated when the provider creates + a managed VPC. Defaults to none. Mutually exclusive + with IPAMPool. This makes sense to use if, for example, + you want to use a separate IP range for pods (e.g. + Cilium ENI mode). + items: + description: VpcCidrBlock defines the CIDR block + and settings to associate with the managed VPC. + Currently, only IPv4 is supported. + properties: + ipv4CidrBlock: + description: IPv4CidrBlock is the IPv4 CIDR + block to associate with the managed VPC. + minLength: 1 + type: string + required: + - ipv4CidrBlock + type: object + type: array tags: additionalProperties: type: string diff --git a/controlplane/eks/api/v1beta2/awsmanagedcontrolplane_webhook.go b/controlplane/eks/api/v1beta2/awsmanagedcontrolplane_webhook.go index 4b44508b65..c46a2ec4ad 100644 --- a/controlplane/eks/api/v1beta2/awsmanagedcontrolplane_webhook.go +++ b/controlplane/eks/api/v1beta2/awsmanagedcontrolplane_webhook.go @@ -164,7 +164,7 @@ func (r *AWSManagedControlPlane) ValidateUpdate(old runtime.Object) (admission.W if oldAWSManagedControlplane.Spec.NetworkSpec.VPC.IsIPv6Enabled() != r.Spec.NetworkSpec.VPC.IsIPv6Enabled() { allErrs = append(allErrs, - field.Invalid(field.NewPath("spec", "networkSpec", "vpc", "enableIPv6"), r.Spec.NetworkSpec.VPC.IsIPv6Enabled(), "changing IP family is not allowed after it has been set")) + field.Invalid(field.NewPath("spec", "network", "vpc", "enableIPv6"), r.Spec.NetworkSpec.VPC.IsIPv6Enabled(), "changing IP family is not allowed after it has been set")) } if len(allErrs) == 0 { @@ -406,23 +406,53 @@ func (r *AWSManagedControlPlane) validatePrivateDNSHostnameTypeOnLaunch() field. func (r *AWSManagedControlPlane) validateNetwork() field.ErrorList { var allErrs field.ErrorList + // If only `AWSManagedControlPlane.spec.secondaryCidrBlock` is set, no additional checks are done to remain + // backward-compatible. The `VPCSpec.SecondaryCidrBlocks` field was added later - if that list is not empty, we + // require `AWSManagedControlPlane.spec.secondaryCidrBlock` to be listed in there as well. This may allow merging + // the fields later on. + podSecondaryCidrBlock := r.Spec.SecondaryCidrBlock + secondaryCidrBlocks := r.Spec.NetworkSpec.VPC.SecondaryCidrBlocks + secondaryCidrBlocksField := field.NewPath("spec", "network", "vpc", "secondaryCidrBlocks") + if podSecondaryCidrBlock != nil && len(secondaryCidrBlocks) > 0 { + found := false + for _, cidrBlock := range secondaryCidrBlocks { + if cidrBlock.IPv4CidrBlock == *podSecondaryCidrBlock { + found = true + break + } + } + if !found { + allErrs = append(allErrs, field.Invalid(secondaryCidrBlocksField, secondaryCidrBlocks, fmt.Sprintf("AWSManagedControlPlane.spec.secondaryCidrBlock %v must be listed in AWSManagedControlPlane.spec.network.vpc.secondaryCidrBlocks (required if both fields are filled)", *podSecondaryCidrBlock))) + } + } + + if podSecondaryCidrBlock != nil && r.Spec.NetworkSpec.VPC.CidrBlock != "" && r.Spec.NetworkSpec.VPC.CidrBlock == *podSecondaryCidrBlock { + secondaryCidrBlockField := field.NewPath("spec", "vpc", "secondaryCidrBlock") + allErrs = append(allErrs, field.Invalid(secondaryCidrBlockField, secondaryCidrBlocks, fmt.Sprintf("AWSManagedControlPlane.spec.secondaryCidrBlock %v must not be equal to the primary AWSManagedControlPlane.spec.network.vpc.cidrBlock", *podSecondaryCidrBlock))) + } + for _, cidrBlock := range secondaryCidrBlocks { + if r.Spec.NetworkSpec.VPC.CidrBlock != "" && r.Spec.NetworkSpec.VPC.CidrBlock == cidrBlock.IPv4CidrBlock { + allErrs = append(allErrs, field.Invalid(secondaryCidrBlocksField, secondaryCidrBlocks, fmt.Sprintf("AWSManagedControlPlane.spec.network.vpc.secondaryCidrBlocks must not contain the primary AWSManagedControlPlane.spec.network.vpc.cidrBlock %v", r.Spec.NetworkSpec.VPC.CidrBlock))) + } + } + if r.Spec.NetworkSpec.VPC.IsIPv6Enabled() && r.Spec.NetworkSpec.VPC.IPv6.CidrBlock != "" && r.Spec.NetworkSpec.VPC.IPv6.PoolID == "" { - poolField := field.NewPath("spec", "networkSpec", "vpc", "ipv6", "poolId") + poolField := field.NewPath("spec", "network", "vpc", "ipv6", "poolId") allErrs = append(allErrs, field.Invalid(poolField, r.Spec.NetworkSpec.VPC.IPv6.PoolID, "poolId cannot be empty if cidrBlock is set")) } if r.Spec.NetworkSpec.VPC.IsIPv6Enabled() && r.Spec.NetworkSpec.VPC.IPv6.PoolID != "" && r.Spec.NetworkSpec.VPC.IPv6.IPAMPool != nil { - poolField := field.NewPath("spec", "networkSpec", "vpc", "ipv6", "poolId") + poolField := field.NewPath("spec", "network", "vpc", "ipv6", "poolId") allErrs = append(allErrs, field.Invalid(poolField, r.Spec.NetworkSpec.VPC.IPv6.PoolID, "poolId and ipamPool cannot be used together")) } if r.Spec.NetworkSpec.VPC.IsIPv6Enabled() && r.Spec.NetworkSpec.VPC.IPv6.CidrBlock != "" && r.Spec.NetworkSpec.VPC.IPv6.IPAMPool != nil { - cidrBlockField := field.NewPath("spec", "networkSpec", "vpc", "ipv6", "cidrBlock") + cidrBlockField := field.NewPath("spec", "network", "vpc", "ipv6", "cidrBlock") allErrs = append(allErrs, field.Invalid(cidrBlockField, r.Spec.NetworkSpec.VPC.IPv6.CidrBlock, "cidrBlock and ipamPool cannot be used together")) } if r.Spec.NetworkSpec.VPC.IsIPv6Enabled() && r.Spec.NetworkSpec.VPC.IPv6.IPAMPool != nil && r.Spec.NetworkSpec.VPC.IPv6.IPAMPool.ID == "" && r.Spec.NetworkSpec.VPC.IPv6.IPAMPool.Name == "" { - ipamPoolField := field.NewPath("spec", "networkSpec", "vpc", "ipv6", "ipamPool") + ipamPoolField := field.NewPath("spec", "network", "vpc", "ipv6", "ipamPool") allErrs = append(allErrs, field.Invalid(ipamPoolField, r.Spec.NetworkSpec.VPC.IPv6.IPAMPool, "ipamPool must have either id or name")) } diff --git a/controlplane/eks/api/v1beta2/awsmanagedcontrolplane_webhook_test.go b/controlplane/eks/api/v1beta2/awsmanagedcontrolplane_webhook_test.go index bc3cd5d086..b6aab4d3a8 100644 --- a/controlplane/eks/api/v1beta2/awsmanagedcontrolplane_webhook_test.go +++ b/controlplane/eks/api/v1beta2/awsmanagedcontrolplane_webhook_test.go @@ -167,15 +167,17 @@ func TestDefaultingWebhook(t *testing.T) { func TestWebhookCreate(t *testing.T) { tests := []struct { //nolint:maligned - name string - eksClusterName string - expectError bool - eksVersion string - hasAddons bool - vpcCNI VpcCni - additionalTags infrav1.Tags - secondaryCidr *string - kubeProxy KubeProxy + name string + eksClusterName string + expectError bool + expectErrorToContain string // if non-empty, the error message must contain this substring + eksVersion string + hasAddons bool + vpcCNI VpcCni + additionalTags infrav1.Tags + secondaryCidr *string + secondaryCidrBlocks []infrav1.VpcCidrBlock + kubeProxy KubeProxy }{ { name: "ekscluster specified", @@ -253,6 +255,15 @@ func TestWebhookCreate(t *testing.T) { vpcCNI: VpcCni{Disable: true}, secondaryCidr: aws.String("100.64.0.0/10"), }, + { + name: "secondary CIDR block not listed in NetworkSpec.VPC.SecondaryCidrBlocks", + eksClusterName: "default_cluster1", + eksVersion: "v1.19", + expectError: true, + expectErrorToContain: "100.64.0.0/16 must be listed in AWSManagedControlPlane.spec.network.vpc.secondaryCidrBlocks", + secondaryCidr: aws.String("100.64.0.0/16"), + secondaryCidrBlocks: []infrav1.VpcCidrBlock{{IPv4CidrBlock: "123.456.0.0/16"}}, + }, { name: "invalid tags not allowed", eksClusterName: "default_cluster1", @@ -327,6 +338,11 @@ func TestWebhookCreate(t *testing.T) { KubeProxy: tc.kubeProxy, AdditionalTags: tc.additionalTags, VpcCni: tc.vpcCNI, + NetworkSpec: infrav1.NetworkSpec{ + VPC: infrav1.VPCSpec{ + SecondaryCidrBlocks: tc.secondaryCidrBlocks, + }, + }, }, } if tc.eksVersion != "" { @@ -353,7 +369,16 @@ func TestWebhookCreate(t *testing.T) { if tc.expectError { g.Expect(err).ToNot(BeNil()) + + if tc.expectErrorToContain != "" && err != nil { + g.Expect(err.Error()).To(ContainSubstring(tc.expectErrorToContain)) + } } else { + if tc.expectErrorToContain != "" { + t.Error("Logic error: expectError=false means that expectErrorToContain must be empty") + t.FailNow() + } + g.Expect(err).To(BeNil()) } }) diff --git a/pkg/cloud/scope/cluster.go b/pkg/cloud/scope/cluster.go index aa988f8825..9efaa16d7a 100644 --- a/pkg/cloud/scope/cluster.go +++ b/pkg/cloud/scope/cluster.go @@ -153,6 +153,17 @@ func (s *ClusterScope) SecondaryCidrBlock() *string { return nil } +// SecondaryCidrBlocks returns the additional CIDR blocks to be associated with the managed VPC. +func (s *ClusterScope) SecondaryCidrBlocks() []infrav1.VpcCidrBlock { + return s.AWSCluster.Spec.NetworkSpec.VPC.SecondaryCidrBlocks +} + +// AllSecondaryCidrBlocks returns all secondary CIDR blocks (combining `SecondaryCidrBlock` and `SecondaryCidrBlocks`). +func (s *ClusterScope) AllSecondaryCidrBlocks() []infrav1.VpcCidrBlock { + // Non-EKS clusters don't have anything in `SecondaryCidrBlock()` + return s.SecondaryCidrBlocks() +} + // Name returns the CAPI cluster name. func (s *ClusterScope) Name() string { return s.Cluster.Name diff --git a/pkg/cloud/scope/managedcontrolplane.go b/pkg/cloud/scope/managedcontrolplane.go index 018cbc7781..3ad342e6b6 100644 --- a/pkg/cloud/scope/managedcontrolplane.go +++ b/pkg/cloud/scope/managedcontrolplane.go @@ -208,6 +208,28 @@ func (s *ManagedControlPlaneScope) SecondaryCidrBlock() *string { return s.ControlPlane.Spec.SecondaryCidrBlock } +// SecondaryCidrBlocks returns the additional CIDR blocks to be associated with the managed VPC. +func (s *ManagedControlPlaneScope) SecondaryCidrBlocks() []infrav1.VpcCidrBlock { + return s.ControlPlane.Spec.NetworkSpec.VPC.SecondaryCidrBlocks +} + +// AllSecondaryCidrBlocks returns all secondary CIDR blocks (combining `SecondaryCidrBlock` and `SecondaryCidrBlocks`). +func (s *ManagedControlPlaneScope) AllSecondaryCidrBlocks() []infrav1.VpcCidrBlock { + secondaryCidrBlocks := s.ControlPlane.Spec.NetworkSpec.VPC.SecondaryCidrBlocks + + // If only `AWSManagedControlPlane.spec.secondaryCidrBlock` is set, no additional checks are done to remain + // backward-compatible. The `VPCSpec.SecondaryCidrBlocks` field was added later - if that list is not empty, we + // require `AWSManagedControlPlane.spec.secondaryCidrBlock` to be listed in there as well (validation done in + // webhook). + if s.ControlPlane.Spec.SecondaryCidrBlock != nil && len(secondaryCidrBlocks) == 0 { + secondaryCidrBlocks = []infrav1.VpcCidrBlock{{ + IPv4CidrBlock: *s.ControlPlane.Spec.SecondaryCidrBlock, + }} + } + + return secondaryCidrBlocks +} + // SecurityGroupOverrides returns the security groups that are overrides in the ControlPlane spec. func (s *ManagedControlPlaneScope) SecurityGroupOverrides() map[infrav1.SecurityGroupRole]string { return s.ControlPlane.Spec.NetworkSpec.SecurityGroupOverrides diff --git a/pkg/cloud/scope/network.go b/pkg/cloud/scope/network.go index 32b02ca0d2..aeb0e34231 100644 --- a/pkg/cloud/scope/network.go +++ b/pkg/cloud/scope/network.go @@ -37,8 +37,14 @@ type NetworkScope interface { CNIIngressRules() infrav1.CNIIngressRules // SecurityGroups returns the cluster security groups as a map, it creates the map if empty. SecurityGroups() map[infrav1.SecurityGroupRole]infrav1.SecurityGroup - // SecondaryCidrBlock returns the optional secondary CIDR block to use for pod IPs + // SecondaryCidrBlock returns the optional secondary CIDR block to use for pod IPs. This may later be renamed since + // it should not be confused with SecondaryCidrBlocks. SecondaryCidrBlock() *string + // SecondaryCidrBlocks returns the additional CIDR blocks to be associated with the managed VPC. + SecondaryCidrBlocks() []infrav1.VpcCidrBlock + // AllSecondaryCidrBlocks returns a unique list of all secondary CIDR blocks (combining `SecondaryCidrBlock` and + // `SecondaryCidrBlocks`). + AllSecondaryCidrBlocks() []infrav1.VpcCidrBlock // Bastion returns the bastion details for the cluster. Bastion() *infrav1.Bastion diff --git a/pkg/cloud/scope/shared.go b/pkg/cloud/scope/shared.go index 50e0529fb0..2521f100de 100644 --- a/pkg/cloud/scope/shared.go +++ b/pkg/cloud/scope/shared.go @@ -100,7 +100,7 @@ func (p *defaultSubnetPlacementStrategy) Place(input *placementInput) ([]string, return subnetIDs, nil } - controlPlaneSubnetIDs := input.ControlplaneSubnets.FilterPrivate().IDs() + controlPlaneSubnetIDs := input.ControlplaneSubnets.FilterPrivate().FilterNonCni().IDs() if len(controlPlaneSubnetIDs) > 0 { p.logger.Debug("using all the private subnets from the control plane") return controlPlaneSubnetIDs, nil @@ -119,10 +119,9 @@ func (p *defaultSubnetPlacementStrategy) getSubnetsForAZs(azs []string, controlP case expinfrav1.AZSubnetTypeAll: // no-op case expinfrav1.AZSubnetTypePublic: - subnets = subnets.FilterPublic() + subnets = subnets.FilterPublic().FilterNonCni() case expinfrav1.AZSubnetTypePrivate: - subnets = subnets.FilterPrivate() - subnets = subnets.FilterPrimary() + subnets = subnets.FilterPrivate().FilterNonCni() } } if len(subnets) == 0 { diff --git a/pkg/cloud/scope/shared_test.go b/pkg/cloud/scope/shared_test.go index 34d124abf3..8afc051c6c 100644 --- a/pkg/cloud/scope/shared_test.go +++ b/pkg/cloud/scope/shared_test.go @@ -182,6 +182,14 @@ func TestSubnetPlacement(t *testing.T) { AvailabilityZone: "eu-west-1c", IsPublic: false, }, + infrav1.SubnetSpec{ + ID: "subnet-az6", + AvailabilityZone: "eu-west-1c", + IsPublic: false, + Tags: infrav1.Tags{ + infrav1.NameAWSSubnetAssociation: infrav1.SecondarySubnetTagValue, + }, + }, }, logger: logger.NewLogger(klog.Background()), expectedSubnetIDs: []string{"subnet-az3"}, diff --git a/pkg/cloud/services/autoscaling/autoscalinggroup.go b/pkg/cloud/services/autoscaling/autoscalinggroup.go index 9ddd4c086d..d473010d12 100644 --- a/pkg/cloud/services/autoscaling/autoscalinggroup.go +++ b/pkg/cloud/services/autoscaling/autoscalinggroup.go @@ -542,6 +542,12 @@ func (s *Service) SubnetIDs(scope *scope.MachinePoolScope) ([]string, error) { } for _, subnet := range out.Subnets { + tags := converters.TagsToMap(subnet.Tags) + if tags[infrav1.NameAWSSubnetAssociation] == infrav1.SecondarySubnetTagValue { + // Subnet belongs to a secondary CIDR block which won't be used to create instances + continue + } + subnetIDs = append(subnetIDs, *subnet.SubnetId) } diff --git a/pkg/cloud/services/ec2/instances.go b/pkg/cloud/services/ec2/instances.go index a3efbea726..77db7d2aca 100644 --- a/pkg/cloud/services/ec2/instances.go +++ b/pkg/cloud/services/ec2/instances.go @@ -364,6 +364,13 @@ func (s *Service) findSubnet(scope *scope.MachineScope) (string, error) { continue } } + + tags := converters.TagsToMap(subnet.Tags) + if tags[infrav1.NameAWSSubnetAssociation] == infrav1.SecondarySubnetTagValue { + errMessage += fmt.Sprintf(" subnet %q belongs to a secondary CIDR block which won't be used to create instances.", *subnet.SubnetId) + continue + } + filtered = append(filtered, subnet) } // prefer a subnet in the cluster VPC if multiple match @@ -380,7 +387,7 @@ func (s *Service) findSubnet(scope *scope.MachineScope) (string, error) { return *filtered[0].SubnetId, nil case failureDomain != nil: if scope.AWSMachine.Spec.PublicIP != nil && *scope.AWSMachine.Spec.PublicIP { - subnets := s.scope.Subnets().FilterPublic().FilterByZone(*failureDomain) + subnets := s.scope.Subnets().FilterPublic().FilterNonCni().FilterByZone(*failureDomain) if len(subnets) == 0 { errMessage := fmt.Sprintf("failed to run machine %q with public IP, no public subnets available in availability zone %q", scope.Name(), *failureDomain) @@ -390,7 +397,7 @@ func (s *Service) findSubnet(scope *scope.MachineScope) (string, error) { return subnets[0].GetResourceID(), nil } - subnets := s.scope.Subnets().FilterPrivate().FilterByZone(*failureDomain) + subnets := s.scope.Subnets().FilterPrivate().FilterNonCni().FilterByZone(*failureDomain) if len(subnets) == 0 { errMessage := fmt.Sprintf("failed to run machine %q, no subnets available in availability zone %q", scope.Name(), *failureDomain) @@ -399,7 +406,7 @@ func (s *Service) findSubnet(scope *scope.MachineScope) (string, error) { } return subnets[0].GetResourceID(), nil case scope.AWSMachine.Spec.PublicIP != nil && *scope.AWSMachine.Spec.PublicIP: - subnets := s.scope.Subnets().FilterPublic() + subnets := s.scope.Subnets().FilterPublic().FilterNonCni() if len(subnets) == 0 { errMessage := fmt.Sprintf("failed to run machine %q with public IP, no public subnets available", scope.Name()) record.Eventf(scope.AWSMachine, "FailedCreate", errMessage) @@ -411,7 +418,7 @@ func (s *Service) findSubnet(scope *scope.MachineScope) (string, error) { // with control plane machines. default: - sns := s.scope.Subnets().FilterPrivate() + sns := s.scope.Subnets().FilterPrivate().FilterNonCni() if len(sns) == 0 { errMessage := fmt.Sprintf("failed to run machine %q, no subnets available", scope.Name()) record.Eventf(s.scope.InfraCluster(), "FailedCreateInstance", errMessage) diff --git a/pkg/cloud/services/elb/loadbalancer.go b/pkg/cloud/services/elb/loadbalancer.go index 234123b4d5..2106b6705e 100644 --- a/pkg/cloud/services/elb/loadbalancer.go +++ b/pkg/cloud/services/elb/loadbalancer.go @@ -333,10 +333,10 @@ func (s *Service) getAPIServerLBSpec(elbName string, lbSpec *infrav1.AWSLoadBala } } else { // The load balancer APIs require us to only attach one subnet for each AZ. - subnets := s.scope.Subnets().FilterPrivate() + subnets := s.scope.Subnets().FilterPrivate().FilterNonCni() if scheme == infrav1.ELBSchemeInternetFacing { - subnets = s.scope.Subnets().FilterPublic() + subnets = s.scope.Subnets().FilterPublic().FilterNonCni() } subnetLoop: @@ -1151,10 +1151,10 @@ func (s *Service) getAPIServerClassicELBSpec(elbName string) (*infrav1.LoadBalan } } else { // The load balancer APIs require us to only attach one subnet for each AZ. - subnets := s.scope.Subnets().FilterPrivate() + subnets := s.scope.Subnets().FilterPrivate().FilterNonCni() if scheme == infrav1.ELBSchemeInternetFacing { - subnets = s.scope.Subnets().FilterPublic() + subnets = s.scope.Subnets().FilterPublic().FilterNonCni() } subnetLoop: diff --git a/pkg/cloud/services/network/natgateways.go b/pkg/cloud/services/network/natgateways.go index 4c549a39e5..f600aefefd 100644 --- a/pkg/cloud/services/network/natgateways.go +++ b/pkg/cloud/services/network/natgateways.go @@ -46,7 +46,7 @@ func (s *Service) reconcileNatGateways() error { s.scope.Debug("Reconciling NAT gateways") - if len(s.scope.Subnets().FilterPrivate()) == 0 { + if len(s.scope.Subnets().FilterPrivate().FilterNonCni()) == 0 { s.scope.Debug("No private subnets available, skipping NAT gateways") conditions.MarkFalse( s.scope.InfraCluster(), @@ -55,7 +55,7 @@ func (s *Service) reconcileNatGateways() error { clusterv1.ConditionSeverityWarning, "No private subnets available, skipping NAT gateways") return nil - } else if len(s.scope.Subnets().FilterPublic()) == 0 { + } else if len(s.scope.Subnets().FilterPublic().FilterNonCni()) == 0 { s.scope.Debug("No public subnets available. Cannot create NAT gateways for private subnets, this might be a configuration error.") conditions.MarkFalse( s.scope.InfraCluster(), @@ -74,7 +74,7 @@ func (s *Service) reconcileNatGateways() error { natGatewaysIPs := []string{} subnetIDs := []string{} - for _, sn := range s.scope.Subnets().FilterPublic() { + for _, sn := range s.scope.Subnets().FilterPublic().FilterNonCni() { if sn.GetResourceID() == "" { continue } diff --git a/pkg/cloud/services/network/network.go b/pkg/cloud/services/network/network.go index e97024fad7..70d85fd682 100644 --- a/pkg/cloud/services/network/network.go +++ b/pkg/cloud/services/network/network.go @@ -37,8 +37,8 @@ func (s *Service) ReconcileNetwork() (err error) { } conditions.MarkTrue(s.scope.InfraCluster(), infrav1.VpcReadyCondition) - // Secondary CIDR - if err := s.associateSecondaryCidr(); err != nil { + // Secondary CIDRs + if err := s.associateSecondaryCidrs(); err != nil { conditions.MarkFalse(s.scope.InfraCluster(), infrav1.SecondaryCidrsReadyCondition, infrav1.SecondaryCidrReconciliationFailedReason, infrautilconditions.ErrorConditionAfterInit(s.scope.ClusterObj()), err.Error()) return err } @@ -199,7 +199,7 @@ func (s *Service) DeleteNetwork() (err error) { // Secondary CIDR. conditions.MarkFalse(s.scope.InfraCluster(), infrav1.SecondaryCidrsReadyCondition, clusterv1.DeletingReason, clusterv1.ConditionSeverityInfo, "") - if err := s.disassociateSecondaryCidr(); err != nil { + if err := s.disassociateSecondaryCidrs(); err != nil { conditions.MarkFalse(s.scope.InfraCluster(), infrav1.SecondaryCidrsReadyCondition, "DisassociateFailed", clusterv1.ConditionSeverityWarning, err.Error()) return err } diff --git a/pkg/cloud/services/network/secondarycidr.go b/pkg/cloud/services/network/secondarycidr.go index 54fb7c5816..829383bf1a 100644 --- a/pkg/cloud/services/network/secondarycidr.go +++ b/pkg/cloud/services/network/secondarycidr.go @@ -20,7 +20,6 @@ import ( "context" "github.com/aws/aws-sdk-go/service/ec2" - "github.com/google/go-cmp/cmp" "github.com/pkg/errors" "sigs.k8s.io/cluster-api-provider-aws/v2/pkg/record" @@ -30,8 +29,9 @@ func isVPCPresent(vpcs *ec2.DescribeVpcsOutput) bool { return vpcs != nil && len(vpcs.Vpcs) > 0 } -func (s *Service) associateSecondaryCidr() error { - if s.scope.SecondaryCidrBlock() == nil { +func (s *Service) associateSecondaryCidrs() error { + secondaryCidrBlocks := s.scope.AllSecondaryCidrBlocks() + if len(secondaryCidrBlocks) == 0 { return nil } @@ -46,30 +46,43 @@ func (s *Service) associateSecondaryCidr() error { return errors.Errorf("failed to associateSecondaryCidr as there are no VPCs present") } + // We currently only *add* associations. Here, we do not reconcile exactly against the provided list + // (i.e. disassociate what isn't listed in the spec). existingAssociations := vpcs.Vpcs[0].CidrBlockAssociationSet - for _, existing := range existingAssociations { - if *existing.CidrBlock == *s.scope.SecondaryCidrBlock() { - return nil + for _, desiredCidrBlock := range secondaryCidrBlocks { + desiredCidrBlock := desiredCidrBlock + + found := false + for _, existing := range existingAssociations { + if *existing.CidrBlock == desiredCidrBlock.IPv4CidrBlock { + found = true + break + } + } + if found { + continue } - } - out, err := s.EC2Client.AssociateVpcCidrBlockWithContext(context.TODO(), &ec2.AssociateVpcCidrBlockInput{ - VpcId: &s.scope.VPC().ID, - CidrBlock: s.scope.SecondaryCidrBlock(), - }) - if err != nil { - record.Warnf(s.scope.InfraCluster(), "FailedAssociateSecondaryCidr", "Failed associating secondary CIDR with VPC %v", err) - return err - } + out, err := s.EC2Client.AssociateVpcCidrBlockWithContext(context.TODO(), &ec2.AssociateVpcCidrBlockInput{ + VpcId: &s.scope.VPC().ID, + CidrBlock: &desiredCidrBlock.IPv4CidrBlock, + }) + if err != nil { + record.Warnf(s.scope.InfraCluster(), "FailedAssociateSecondaryCidr", "Failed associating secondary CIDR %q with VPC %v", desiredCidrBlock.IPv4CidrBlock, err) + return err + } - // once IPv6 is supported, we need to modify out.CidrBlockAssociation.AssociationId to out.Ipv6CidrBlockAssociation.AssociationId - record.Eventf(s.scope.InfraCluster(), "SuccessfulAssociateSecondaryCidr", "Associated secondary CIDR with VPC %q", *out.CidrBlockAssociation.AssociationId) + // Once IPv6 is supported, we need to consider both `out.CidrBlockAssociation.AssociationId` and + // `out.Ipv6CidrBlockAssociation.AssociationId` + record.Eventf(s.scope.InfraCluster(), "SuccessfulAssociateSecondaryCidr", "Associated secondary CIDR %q with VPC %q", desiredCidrBlock.IPv4CidrBlock, *out.CidrBlockAssociation.AssociationId) + } return nil } -func (s *Service) disassociateSecondaryCidr() error { - if s.scope.SecondaryCidrBlock() == nil { +func (s *Service) disassociateSecondaryCidrs() error { + secondaryCidrBlocks := s.scope.AllSecondaryCidrBlocks() + if len(secondaryCidrBlocks) == 0 { return nil } @@ -81,17 +94,20 @@ func (s *Service) disassociateSecondaryCidr() error { } if !isVPCPresent(vpcs) { - return errors.Errorf("failed to associateSecondaryCidr as there are no VPCs present") + return errors.Errorf("failed to disassociateSecondaryCidr as there are no VPCs present") } existingAssociations := vpcs.Vpcs[0].CidrBlockAssociationSet - for _, existing := range existingAssociations { - if cmp.Equal(existing.CidrBlock, s.scope.SecondaryCidrBlock()) { - if _, err := s.EC2Client.DisassociateVpcCidrBlockWithContext(context.TODO(), &ec2.DisassociateVpcCidrBlockInput{ - AssociationId: existing.AssociationId, - }); err != nil { - record.Warnf(s.scope.InfraCluster(), "FailedDisassociateSecondaryCidr", "Failed disassociating secondary CIDR with VPC %v", err) - return err + for _, cidrBlockToDelete := range secondaryCidrBlocks { + for _, existing := range existingAssociations { + if *existing.CidrBlock == cidrBlockToDelete.IPv4CidrBlock { + if _, err := s.EC2Client.DisassociateVpcCidrBlockWithContext(context.TODO(), &ec2.DisassociateVpcCidrBlockInput{ + AssociationId: existing.AssociationId, + }); err != nil { + record.Warnf(s.scope.InfraCluster(), "FailedDisassociateSecondaryCidr", "Failed disassociating secondary CIDR %q from VPC %v", cidrBlockToDelete.IPv4CidrBlock, err) + return err + } + break } } } diff --git a/pkg/cloud/services/network/secondarycidr_test.go b/pkg/cloud/services/network/secondarycidr_test.go index 5be6cf441e..639a8f07f1 100644 --- a/pkg/cloud/services/network/secondarycidr_test.go +++ b/pkg/cloud/services/network/secondarycidr_test.go @@ -65,25 +65,33 @@ func TestServiceAssociateSecondaryCidr(t *testing.T) { defer mockCtrl.Finish() tests := []struct { - name string - haveSecondaryCIDR bool - expect func(m *mocks.MockEC2APIMockRecorder) - wantErr bool + name string + fillAWSManagedControlPlaneSecondaryCIDR bool + networkSecondaryCIDRBlocks []infrav1.VpcCidrBlock + expect func(m *mocks.MockEC2APIMockRecorder) + wantErr bool }{ { - name: "Should not associate secondary CIDR if no secondary cidr block info present in control plane", + name: "Should not associate secondary CIDR if no secondary cidr block info present in control plane", + fillAWSManagedControlPlaneSecondaryCIDR: false, + expect: func(m *mocks.MockEC2APIMockRecorder) { + // No calls expected + m.DescribeVpcsWithContext(context.TODO(), gomock.Any()).Times(0) + m.AssociateVpcCidrBlockWithContext(context.TODO(), gomock.Any()).Times(0) + }, + wantErr: false, }, { - name: "Should return error if unable to describe VPC", - haveSecondaryCIDR: true, + name: "Should return error if unable to describe VPC", + fillAWSManagedControlPlaneSecondaryCIDR: true, expect: func(m *mocks.MockEC2APIMockRecorder) { m.DescribeVpcsWithContext(context.TODO(), gomock.AssignableToTypeOf(&ec2.DescribeVpcsInput{})).Return(nil, awserrors.NewFailedDependency("dependency-failure")) }, wantErr: true, }, { - name: "Should not associate secondary cidr block if already exist in VPC", - haveSecondaryCIDR: true, + name: "Should not associate secondary cidr block if already exist in VPC", + fillAWSManagedControlPlaneSecondaryCIDR: true, expect: func(m *mocks.MockEC2APIMockRecorder) { m.DescribeVpcsWithContext(context.TODO(), gomock.AssignableToTypeOf(&ec2.DescribeVpcsInput{})).Return(&ec2.DescribeVpcsOutput{ Vpcs: []*ec2.Vpc{ @@ -96,29 +104,102 @@ func TestServiceAssociateSecondaryCidr(t *testing.T) { }, }, { - name: "Should return error if no VPC found", - haveSecondaryCIDR: true, + name: "Should return error if no VPC found", + fillAWSManagedControlPlaneSecondaryCIDR: true, expect: func(m *mocks.MockEC2APIMockRecorder) { - m.DescribeVpcsWithContext(context.TODO(), gomock.AssignableToTypeOf(&ec2.DescribeVpcsInput{})).Return(nil, nil) + m.DescribeVpcsWithContext(context.TODO(), gomock.AssignableToTypeOf(&ec2.DescribeVpcsInput{})).Return(&ec2.DescribeVpcsOutput{ + Vpcs: []*ec2.Vpc{}, + }, nil) }, wantErr: true, }, { - name: "Should return error if failed during associating secondary cidr block", - haveSecondaryCIDR: true, + name: "Should return error if failed during associating secondary cidr block", + fillAWSManagedControlPlaneSecondaryCIDR: true, expect: func(m *mocks.MockEC2APIMockRecorder) { m.DescribeVpcsWithContext(context.TODO(), gomock.AssignableToTypeOf(&ec2.DescribeVpcsInput{})).Return(&ec2.DescribeVpcsOutput{ Vpcs: []*ec2.Vpc{ { - CidrBlockAssociationSet: []*ec2.VpcCidrBlockAssociation{ - {CidrBlock: aws.String("secondary-cidr-new")}, - }, + CidrBlockAssociationSet: []*ec2.VpcCidrBlockAssociation{}, }, }}, nil) m.AssociateVpcCidrBlockWithContext(context.TODO(), gomock.AssignableToTypeOf(&ec2.AssociateVpcCidrBlockInput{})).Return(nil, awserrors.NewFailedDependency("dependency-failure")) }, wantErr: true, }, + { + name: "Should successfully associate secondary CIDR block if none is associated yet", + fillAWSManagedControlPlaneSecondaryCIDR: true, + expect: func(m *mocks.MockEC2APIMockRecorder) { + m.DescribeVpcsWithContext(context.TODO(), gomock.AssignableToTypeOf(&ec2.DescribeVpcsInput{})).Return(&ec2.DescribeVpcsOutput{ + Vpcs: []*ec2.Vpc{ + { + CidrBlockAssociationSet: []*ec2.VpcCidrBlockAssociation{}, + }, + }}, nil) + m.AssociateVpcCidrBlockWithContext(context.TODO(), gomock.AssignableToTypeOf(&ec2.AssociateVpcCidrBlockInput{})).Return(&ec2.AssociateVpcCidrBlockOutput{ + CidrBlockAssociation: &ec2.VpcCidrBlockAssociation{ + AssociationId: aws.String("association-id-success"), + }, + }, nil) + }, + wantErr: false, + }, + { + name: "Should successfully associate missing secondary CIDR blocks", + fillAWSManagedControlPlaneSecondaryCIDR: false, + networkSecondaryCIDRBlocks: []infrav1.VpcCidrBlock{ + { + IPv4CidrBlock: "10.0.1.0/24", + }, + { + IPv4CidrBlock: "10.0.2.0/24", + }, + { + IPv4CidrBlock: "10.0.3.0/24", + }, + { + IPv4CidrBlock: "10.0.4.0/24", + }, + }, + expect: func(m *mocks.MockEC2APIMockRecorder) { + // Two are simulated to exist... + m.DescribeVpcsWithContext(context.TODO(), gomock.AssignableToTypeOf(&ec2.DescribeVpcsInput{})).Return(&ec2.DescribeVpcsOutput{ + Vpcs: []*ec2.Vpc{ + { + CidrBlockAssociationSet: []*ec2.VpcCidrBlockAssociation{ + { + AssociationId: aws.String("association-id-existing-1"), + CidrBlock: aws.String("10.0.1.0/24"), + }, + { + AssociationId: aws.String("association-id-existing-3"), + CidrBlock: aws.String("10.0.3.0/24"), + }, + }, + }, + }}, nil) + + // ...the other two should be created + m.AssociateVpcCidrBlockWithContext(context.TODO(), gomock.Eq(&ec2.AssociateVpcCidrBlockInput{ + CidrBlock: aws.String("10.0.2.0/24"), + VpcId: aws.String("vpc-id"), + })).Return(&ec2.AssociateVpcCidrBlockOutput{ + CidrBlockAssociation: &ec2.VpcCidrBlockAssociation{ + AssociationId: aws.String("association-id-success-2"), + }, + }, nil) + m.AssociateVpcCidrBlockWithContext(context.TODO(), gomock.Eq(&ec2.AssociateVpcCidrBlockInput{ + CidrBlock: aws.String("10.0.4.0/24"), + VpcId: aws.String("vpc-id"), + })).Return(&ec2.AssociateVpcCidrBlockOutput{ + CidrBlockAssociation: &ec2.VpcCidrBlockAssociation{ + AssociationId: aws.String("association-id-success-4"), + }, + }, nil) + }, + wantErr: false, + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { @@ -133,9 +214,10 @@ func TestServiceAssociateSecondaryCidr(t *testing.T) { mcpScope, err := setupNewManagedControlPlaneScope(cl) g.Expect(err).NotTo(HaveOccurred()) - if !tt.haveSecondaryCIDR { + if !tt.fillAWSManagedControlPlaneSecondaryCIDR { mcpScope.ControlPlane.Spec.SecondaryCidrBlock = nil } + mcpScope.ControlPlane.Spec.NetworkSpec.VPC.SecondaryCidrBlocks = tt.networkSecondaryCIDRBlocks s := NewService(mcpScope) s.EC2Client = ec2Mock @@ -144,7 +226,7 @@ func TestServiceAssociateSecondaryCidr(t *testing.T) { tt.expect(ec2Mock.EXPECT()) } - err = s.associateSecondaryCidr() + err = s.associateSecondaryCidrs() if tt.wantErr { g.Expect(err).To(HaveOccurred()) return @@ -159,33 +241,41 @@ func TestServiceDiassociateSecondaryCidr(t *testing.T) { defer mockCtrl.Finish() tests := []struct { - name string - haveSecondaryCIDR bool - expect func(m *mocks.MockEC2APIMockRecorder) - wantErr bool + name string + fillAWSManagedControlPlaneSecondaryCIDR bool + networkSecondaryCIDRBlocks []infrav1.VpcCidrBlock + expect func(m *mocks.MockEC2APIMockRecorder) + wantErr bool }{ { - name: "Should not disassociate secondary CIDR if no secondary cidr block info present in control plane", + name: "Should not disassociate secondary CIDR if no secondary cidr block info present in control plane", + fillAWSManagedControlPlaneSecondaryCIDR: false, + expect: func(m *mocks.MockEC2APIMockRecorder) { + // No calls expected + m.DescribeVpcsWithContext(context.TODO(), gomock.Any()).Times(0) + m.DisassociateVpcCidrBlockWithContext(context.TODO(), gomock.Any()).Times(0) + }, + wantErr: false, }, { - name: "Should return error if unable to describe VPC", - haveSecondaryCIDR: true, + name: "Should return error if unable to describe VPC", + fillAWSManagedControlPlaneSecondaryCIDR: true, expect: func(m *mocks.MockEC2APIMockRecorder) { m.DescribeVpcsWithContext(context.TODO(), gomock.AssignableToTypeOf(&ec2.DescribeVpcsInput{})).Return(nil, awserrors.NewFailedDependency("dependency-failure")) }, wantErr: true, }, { - name: "Should return error if no VPC found", - haveSecondaryCIDR: true, + name: "Should return error if no VPC found", + fillAWSManagedControlPlaneSecondaryCIDR: true, expect: func(m *mocks.MockEC2APIMockRecorder) { m.DescribeVpcsWithContext(context.TODO(), gomock.AssignableToTypeOf(&ec2.DescribeVpcsInput{})).Return(nil, nil) }, wantErr: true, }, { - name: "Should diassociate secondary cidr block if already exist in VPC", - haveSecondaryCIDR: true, + name: "Should diassociate secondary cidr block if already exist in VPC", + fillAWSManagedControlPlaneSecondaryCIDR: true, expect: func(m *mocks.MockEC2APIMockRecorder) { m.DescribeVpcsWithContext(context.TODO(), gomock.AssignableToTypeOf(&ec2.DescribeVpcsInput{})).Return(&ec2.DescribeVpcsOutput{ Vpcs: []*ec2.Vpc{ @@ -199,8 +289,8 @@ func TestServiceDiassociateSecondaryCidr(t *testing.T) { }, }, { - name: "Should return error if failed to diassociate secondary cidr block", - haveSecondaryCIDR: true, + name: "Should return error if failed to diassociate secondary cidr block", + fillAWSManagedControlPlaneSecondaryCIDR: true, expect: func(m *mocks.MockEC2APIMockRecorder) { m.DescribeVpcsWithContext(context.TODO(), gomock.AssignableToTypeOf(&ec2.DescribeVpcsInput{})).Return(&ec2.DescribeVpcsOutput{ Vpcs: []*ec2.Vpc{ @@ -214,6 +304,66 @@ func TestServiceDiassociateSecondaryCidr(t *testing.T) { }, wantErr: true, }, + { + name: "Should successfully return from disassociating secondary CIDR blocks if none is currently associated", + fillAWSManagedControlPlaneSecondaryCIDR: true, + expect: func(m *mocks.MockEC2APIMockRecorder) { + m.DescribeVpcsWithContext(context.TODO(), gomock.AssignableToTypeOf(&ec2.DescribeVpcsInput{})).Return(&ec2.DescribeVpcsOutput{ + Vpcs: []*ec2.Vpc{ + { + CidrBlockAssociationSet: []*ec2.VpcCidrBlockAssociation{}, + }, + }}, nil) + + // No calls expected + m.DisassociateVpcCidrBlockWithContext(context.TODO(), gomock.Any()).Times(0) + }, + wantErr: false, + }, + { + name: "Should successfully disassociate existing secondary CIDR blocks", + fillAWSManagedControlPlaneSecondaryCIDR: false, + networkSecondaryCIDRBlocks: []infrav1.VpcCidrBlock{ + { + IPv4CidrBlock: "10.0.1.0/24", + }, + { + IPv4CidrBlock: "10.0.2.0/24", + }, + { + IPv4CidrBlock: "10.0.3.0/24", + }, + { + IPv4CidrBlock: "10.0.4.0/24", + }, + }, + expect: func(m *mocks.MockEC2APIMockRecorder) { + // Two are simulated to exist... + m.DescribeVpcsWithContext(context.TODO(), gomock.AssignableToTypeOf(&ec2.DescribeVpcsInput{})).Return(&ec2.DescribeVpcsOutput{ + Vpcs: []*ec2.Vpc{ + { + CidrBlockAssociationSet: []*ec2.VpcCidrBlockAssociation{ + { + AssociationId: aws.String("association-id-existing-1"), + CidrBlock: aws.String("10.0.1.0/24"), + }, + { + AssociationId: aws.String("association-id-existing-3"), + CidrBlock: aws.String("10.0.3.0/24"), + }, + }, + }, + }}, nil) + + m.DisassociateVpcCidrBlockWithContext(context.TODO(), gomock.Eq(&ec2.DisassociateVpcCidrBlockInput{ + AssociationId: aws.String("association-id-existing-1"), // 10.0.1.0/24 (see above) + })).Return(&ec2.DisassociateVpcCidrBlockOutput{}, nil) + m.DisassociateVpcCidrBlockWithContext(context.TODO(), gomock.Eq(&ec2.DisassociateVpcCidrBlockInput{ + AssociationId: aws.String("association-id-existing-3"), // 10.0.3.0/24 (see above) + })).Return(&ec2.DisassociateVpcCidrBlockOutput{}, nil) + }, + wantErr: false, + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { @@ -228,9 +378,10 @@ func TestServiceDiassociateSecondaryCidr(t *testing.T) { mcpScope, err := setupNewManagedControlPlaneScope(cl) g.Expect(err).NotTo(HaveOccurred()) - if !tt.haveSecondaryCIDR { + if !tt.fillAWSManagedControlPlaneSecondaryCIDR { mcpScope.ControlPlane.Spec.SecondaryCidrBlock = nil } + mcpScope.ControlPlane.Spec.NetworkSpec.VPC.SecondaryCidrBlocks = tt.networkSecondaryCIDRBlocks s := NewService(mcpScope) s.EC2Client = ec2Mock @@ -239,7 +390,7 @@ func TestServiceDiassociateSecondaryCidr(t *testing.T) { tt.expect(ec2Mock.EXPECT()) } - err = s.disassociateSecondaryCidr() + err = s.disassociateSecondaryCidrs() if tt.wantErr { g.Expect(err).To(HaveOccurred()) return From 905c44ab2afde5e2d4e983066ab9856859a0de47 Mon Sep 17 00:00:00 2001 From: Andreas Sommer Date: Thu, 6 Jun 2024 11:27:56 +0200 Subject: [PATCH 6/7] S3 user data support for `AWSMachinePool` (#592) --- .circleci/config.yml | 21 +- api/v1beta2/tags.go | 6 + .../bootstrap/cluster_api_controller.go | 5 +- .../bootstrap/fixtures/customsuffix.yaml | 1 + .../bootstrap/fixtures/default.yaml | 1 + .../fixtures/with_all_secret_backends.yaml | 1 + .../fixtures/with_allow_assume_role.yaml | 1 + .../fixtures/with_bootstrap_user.yaml | 1 + .../fixtures/with_custom_bootstrap_user.yaml | 1 + .../with_different_instance_profiles.yaml | 1 + .../bootstrap/fixtures/with_eks_console.yaml | 1 + .../fixtures/with_eks_default_roles.yaml | 1 + .../bootstrap/fixtures/with_eks_disable.yaml | 1 + .../fixtures/with_eks_kms_prefix.yaml | 1 + .../fixtures/with_extra_statements.yaml | 1 + .../bootstrap/fixtures/with_s3_bucket.yaml | 5 +- .../fixtures/with_ssm_secret_backend.yaml | 1 + ...ture.cluster.x-k8s.io_awsmachinepools.yaml | 17 + exp/api/v1beta1/conversion.go | 3 + exp/api/v1beta1/zz_generated.conversion.go | 1 + exp/api/v1beta2/awsmachinepool_types.go | 4 + exp/api/v1beta2/awsmachinepool_webhook.go | 28 +- exp/api/v1beta2/zz_generated.deepcopy.go | 5 + exp/controllers/awsmachinepool_controller.go | 78 +++-- .../awsmachinepool_controller_test.go | 297 +++++++++++++++--- .../awsmanagedmachinepool_controller.go | 32 +- go.mod | 2 +- pkg/cloud/scope/ignition.go | 26 ++ pkg/cloud/scope/launchtemplate.go | 7 +- pkg/cloud/scope/machinepool.go | 13 +- pkg/cloud/scope/managednodegroup.go | 15 +- .../services/autoscaling/autoscalinggroup.go | 36 ++- .../autoscaling/autoscalinggroup_test.go | 20 +- pkg/cloud/services/ec2/launchtemplate.go | 273 ++++++++++++---- pkg/cloud/services/ec2/launchtemplate_test.go | 69 ++-- pkg/cloud/services/interfaces.go | 17 +- .../autoscaling_interface_mock.go | 21 +- .../mock_services/ec2_interface_mock.go | 31 +- .../objectstore_machine_interface_mock.go | 29 ++ .../mock_services/reconcile_interface_mock.go | 14 +- pkg/cloud/services/s3/s3.go | 179 +++++++++++ pkg/cloud/services/s3/s3_test.go | 38 ++- 42 files changed, 1046 insertions(+), 259 deletions(-) create mode 100644 pkg/cloud/scope/ignition.go diff --git a/.circleci/config.yml b/.circleci/config.yml index 9e324a132f..6c6458c093 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -1,10 +1,26 @@ version: 2.1 - +orbs: + go: circleci/go@1.11.0 jobs: + test: + resource_class: large + executor: + name: go/default + tag: "1.22.3" + steps: + - checkout + - go/load-cache + - go/mod-download + - run: + command: make setup-envtest + - go/save-cache + - run: + command: make test + build: machine: - image: "ubuntu-2204:2022.10.2" + image: "ubuntu-2204:2024.05.1" environment: ALL_ARCH: "amd64 arm64" REGISTRY_AZURE: gsoci.azurecr.io/giantswarm @@ -81,3 +97,4 @@ workflows: filters: tags: only: /^v.*/ + - test diff --git a/api/v1beta2/tags.go b/api/v1beta2/tags.go index e6e0ea7e73..45bc371a49 100644 --- a/api/v1beta2/tags.go +++ b/api/v1beta2/tags.go @@ -195,6 +195,12 @@ const ( // of the bootstrap secret that was used to create the user data for the latest launch // template version. LaunchTemplateBootstrapDataSecret = NameAWSProviderPrefix + "bootstrap-data-secret" + + // LaunchTemplateBootstrapDataHash is the tag we use to store the hash of the raw bootstrap data. + // If bootstrap data is stored in S3, this hash relates to that data, not to the EC2 instance + // user data which only references the S3 object. We store this tag on launch template versions + // so that S3 bootstrap data objects can be deleted when they get outdated. + LaunchTemplateBootstrapDataHash = NameAWSProviderPrefix + "bootstrap-data-hash" ) // ClusterTagKey generates the key for resources associated with a cluster. diff --git a/cmd/clusterawsadm/cloudformation/bootstrap/cluster_api_controller.go b/cmd/clusterawsadm/cloudformation/bootstrap/cluster_api_controller.go index 905403cedd..aab49ec059 100644 --- a/cmd/clusterawsadm/cloudformation/bootstrap/cluster_api_controller.go +++ b/cmd/clusterawsadm/cloudformation/bootstrap/cluster_api_controller.go @@ -190,6 +190,7 @@ func (t Template) ControllersPolicy() *iamv1.PolicyDocument { "arn:*:autoscaling:*:*:autoScalingGroup:*:autoScalingGroupName/*", }, Action: iamv1.Actions{ + "autoscaling:CancelInstanceRefresh", "autoscaling:CreateAutoScalingGroup", "autoscaling:UpdateAutoScalingGroup", "autoscaling:CreateOrUpdateTags", @@ -289,10 +290,12 @@ func (t Template) ControllersPolicy() *iamv1.PolicyDocument { "s3:CreateBucket", "s3:DeleteBucket", "s3:GetObject", - "s3:PutObject", "s3:DeleteObject", + "s3:ListBucket", "s3:PutBucketPolicy", "s3:PutBucketTagging", + "s3:PutLifecycleConfiguration", + "s3:PutObject", }, }) } diff --git a/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/customsuffix.yaml b/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/customsuffix.yaml index 3afd943654..1d93090c0e 100644 --- a/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/customsuffix.yaml +++ b/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/customsuffix.yaml @@ -245,6 +245,7 @@ Resources: Resource: - '*' - Action: + - autoscaling:CancelInstanceRefresh - autoscaling:CreateAutoScalingGroup - autoscaling:UpdateAutoScalingGroup - autoscaling:CreateOrUpdateTags diff --git a/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/default.yaml b/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/default.yaml index 4c25142282..05597566e7 100644 --- a/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/default.yaml +++ b/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/default.yaml @@ -245,6 +245,7 @@ Resources: Resource: - '*' - Action: + - autoscaling:CancelInstanceRefresh - autoscaling:CreateAutoScalingGroup - autoscaling:UpdateAutoScalingGroup - autoscaling:CreateOrUpdateTags diff --git a/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/with_all_secret_backends.yaml b/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/with_all_secret_backends.yaml index b342bfeb92..7f6341661a 100644 --- a/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/with_all_secret_backends.yaml +++ b/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/with_all_secret_backends.yaml @@ -251,6 +251,7 @@ Resources: Resource: - '*' - Action: + - autoscaling:CancelInstanceRefresh - autoscaling:CreateAutoScalingGroup - autoscaling:UpdateAutoScalingGroup - autoscaling:CreateOrUpdateTags diff --git a/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/with_allow_assume_role.yaml b/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/with_allow_assume_role.yaml index 31468775d6..8fafc495cb 100644 --- a/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/with_allow_assume_role.yaml +++ b/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/with_allow_assume_role.yaml @@ -245,6 +245,7 @@ Resources: Resource: - '*' - Action: + - autoscaling:CancelInstanceRefresh - autoscaling:CreateAutoScalingGroup - autoscaling:UpdateAutoScalingGroup - autoscaling:CreateOrUpdateTags diff --git a/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/with_bootstrap_user.yaml b/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/with_bootstrap_user.yaml index 5f6e9ffa21..61ccf9a62d 100644 --- a/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/with_bootstrap_user.yaml +++ b/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/with_bootstrap_user.yaml @@ -251,6 +251,7 @@ Resources: Resource: - '*' - Action: + - autoscaling:CancelInstanceRefresh - autoscaling:CreateAutoScalingGroup - autoscaling:UpdateAutoScalingGroup - autoscaling:CreateOrUpdateTags diff --git a/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/with_custom_bootstrap_user.yaml b/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/with_custom_bootstrap_user.yaml index 7e4564e7b4..9a31e63c8b 100644 --- a/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/with_custom_bootstrap_user.yaml +++ b/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/with_custom_bootstrap_user.yaml @@ -251,6 +251,7 @@ Resources: Resource: - '*' - Action: + - autoscaling:CancelInstanceRefresh - autoscaling:CreateAutoScalingGroup - autoscaling:UpdateAutoScalingGroup - autoscaling:CreateOrUpdateTags diff --git a/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/with_different_instance_profiles.yaml b/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/with_different_instance_profiles.yaml index 5a0c91d1bb..f6bfc34a9c 100644 --- a/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/with_different_instance_profiles.yaml +++ b/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/with_different_instance_profiles.yaml @@ -245,6 +245,7 @@ Resources: Resource: - '*' - Action: + - autoscaling:CancelInstanceRefresh - autoscaling:CreateAutoScalingGroup - autoscaling:UpdateAutoScalingGroup - autoscaling:CreateOrUpdateTags diff --git a/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/with_eks_console.yaml b/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/with_eks_console.yaml index 1010746967..80a439ba31 100644 --- a/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/with_eks_console.yaml +++ b/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/with_eks_console.yaml @@ -245,6 +245,7 @@ Resources: Resource: - '*' - Action: + - autoscaling:CancelInstanceRefresh - autoscaling:CreateAutoScalingGroup - autoscaling:UpdateAutoScalingGroup - autoscaling:CreateOrUpdateTags diff --git a/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/with_eks_default_roles.yaml b/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/with_eks_default_roles.yaml index 7880019781..be47ec8f75 100644 --- a/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/with_eks_default_roles.yaml +++ b/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/with_eks_default_roles.yaml @@ -245,6 +245,7 @@ Resources: Resource: - '*' - Action: + - autoscaling:CancelInstanceRefresh - autoscaling:CreateAutoScalingGroup - autoscaling:UpdateAutoScalingGroup - autoscaling:CreateOrUpdateTags diff --git a/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/with_eks_disable.yaml b/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/with_eks_disable.yaml index be85872a9e..c224931904 100644 --- a/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/with_eks_disable.yaml +++ b/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/with_eks_disable.yaml @@ -245,6 +245,7 @@ Resources: Resource: - '*' - Action: + - autoscaling:CancelInstanceRefresh - autoscaling:CreateAutoScalingGroup - autoscaling:UpdateAutoScalingGroup - autoscaling:CreateOrUpdateTags diff --git a/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/with_eks_kms_prefix.yaml b/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/with_eks_kms_prefix.yaml index 037f81cc82..6892174d7b 100644 --- a/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/with_eks_kms_prefix.yaml +++ b/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/with_eks_kms_prefix.yaml @@ -245,6 +245,7 @@ Resources: Resource: - '*' - Action: + - autoscaling:CancelInstanceRefresh - autoscaling:CreateAutoScalingGroup - autoscaling:UpdateAutoScalingGroup - autoscaling:CreateOrUpdateTags diff --git a/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/with_extra_statements.yaml b/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/with_extra_statements.yaml index c1f9a0ca90..7cb9d086c5 100644 --- a/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/with_extra_statements.yaml +++ b/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/with_extra_statements.yaml @@ -251,6 +251,7 @@ Resources: Resource: - '*' - Action: + - autoscaling:CancelInstanceRefresh - autoscaling:CreateAutoScalingGroup - autoscaling:UpdateAutoScalingGroup - autoscaling:CreateOrUpdateTags diff --git a/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/with_s3_bucket.yaml b/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/with_s3_bucket.yaml index 659616c606..a755b08ede 100644 --- a/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/with_s3_bucket.yaml +++ b/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/with_s3_bucket.yaml @@ -245,6 +245,7 @@ Resources: Resource: - '*' - Action: + - autoscaling:CancelInstanceRefresh - autoscaling:CreateAutoScalingGroup - autoscaling:UpdateAutoScalingGroup - autoscaling:CreateOrUpdateTags @@ -294,10 +295,12 @@ Resources: - s3:CreateBucket - s3:DeleteBucket - s3:GetObject - - s3:PutObject - s3:DeleteObject + - s3:ListBucket - s3:PutBucketPolicy - s3:PutBucketTagging + - s3:PutLifecycleConfiguration + - s3:PutObject Effect: Allow Resource: - arn:*:s3:::cluster-api-provider-aws-* diff --git a/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/with_ssm_secret_backend.yaml b/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/with_ssm_secret_backend.yaml index 327487795c..0edc12fab9 100644 --- a/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/with_ssm_secret_backend.yaml +++ b/cmd/clusterawsadm/cloudformation/bootstrap/fixtures/with_ssm_secret_backend.yaml @@ -245,6 +245,7 @@ Resources: Resource: - '*' - Action: + - autoscaling:CancelInstanceRefresh - autoscaling:CreateAutoScalingGroup - autoscaling:UpdateAutoScalingGroup - autoscaling:CreateOrUpdateTags diff --git a/config/crd/bases/infrastructure.cluster.x-k8s.io_awsmachinepools.yaml b/config/crd/bases/infrastructure.cluster.x-k8s.io_awsmachinepools.yaml index 1892d984f2..ae1d6e3b83 100644 --- a/config/crd/bases/infrastructure.cluster.x-k8s.io_awsmachinepools.yaml +++ b/config/crd/bases/infrastructure.cluster.x-k8s.io_awsmachinepools.yaml @@ -884,6 +884,23 @@ spec: after it enters the InService state. If no value is supplied by user a default value of 300 seconds is set type: string + ignition: + description: Ignition defined options related to the bootstrapping + systems where Ignition is used. + properties: + version: + default: "2.3" + description: Version defines which version of Ignition will be + used to generate bootstrap data. + enum: + - "2.3" + - "3.0" + - "3.1" + - "3.2" + - "3.3" + - "3.4" + type: string + type: object maxSize: default: 1 description: MaxSize defines the maximum size of the group. diff --git a/exp/api/v1beta1/conversion.go b/exp/api/v1beta1/conversion.go index 18508949d5..603131a2e4 100644 --- a/exp/api/v1beta1/conversion.go +++ b/exp/api/v1beta1/conversion.go @@ -50,6 +50,9 @@ func (src *AWSMachinePool) ConvertTo(dstRaw conversion.Hub) error { if restored.Spec.AvailabilityZoneSubnetType != nil { dst.Spec.AvailabilityZoneSubnetType = restored.Spec.AvailabilityZoneSubnetType } + if restored.Spec.Ignition != nil { + dst.Spec.Ignition = restored.Spec.Ignition + } if restored.Spec.AWSLaunchTemplate.PrivateDNSName != nil { dst.Spec.AWSLaunchTemplate.PrivateDNSName = restored.Spec.AWSLaunchTemplate.PrivateDNSName diff --git a/exp/api/v1beta1/zz_generated.conversion.go b/exp/api/v1beta1/zz_generated.conversion.go index 4f50a8c20f..3c88285772 100644 --- a/exp/api/v1beta1/zz_generated.conversion.go +++ b/exp/api/v1beta1/zz_generated.conversion.go @@ -560,6 +560,7 @@ func autoConvert_v1beta2_AWSMachinePoolSpec_To_v1beta1_AWSMachinePoolSpec(in *v1 } out.CapacityRebalance = in.CapacityRebalance // WARNING: in.SuspendProcesses requires manual conversion: does not exist in peer-type + // WARNING: in.Ignition requires manual conversion: does not exist in peer-type return nil } diff --git a/exp/api/v1beta2/awsmachinepool_types.go b/exp/api/v1beta2/awsmachinepool_types.go index a9c26a3e60..d29ce0676b 100644 --- a/exp/api/v1beta2/awsmachinepool_types.go +++ b/exp/api/v1beta2/awsmachinepool_types.go @@ -101,6 +101,10 @@ type AWSMachinePoolSpec struct { // SuspendProcesses defines a list of processes to suspend for the given ASG. This is constantly reconciled. // If a process is removed from this list it will automatically be resumed. SuspendProcesses *SuspendProcessesTypes `json:"suspendProcesses,omitempty"` + + // Ignition defined options related to the bootstrapping systems where Ignition is used. + // +optional + Ignition *infrav1.Ignition `json:"ignition,omitempty"` } // SuspendProcessesTypes contains user friendly auto-completable values for suspended process names. diff --git a/exp/api/v1beta2/awsmachinepool_webhook.go b/exp/api/v1beta2/awsmachinepool_webhook.go index ab434ffb4b..f392e8670b 100644 --- a/exp/api/v1beta2/awsmachinepool_webhook.go +++ b/exp/api/v1beta2/awsmachinepool_webhook.go @@ -27,7 +27,8 @@ import ( "sigs.k8s.io/controller-runtime/pkg/webhook" "sigs.k8s.io/controller-runtime/pkg/webhook/admission" - "sigs.k8s.io/cluster-api-provider-aws/v2/api/v1beta2" + infrav1 "sigs.k8s.io/cluster-api-provider-aws/v2/api/v1beta2" + "sigs.k8s.io/cluster-api-provider-aws/v2/feature" ) var log = ctrl.Log.WithName("awsmachinepool-resource") @@ -62,12 +63,12 @@ func (r *AWSMachinePool) validateRootVolume() field.ErrorList { return allErrs } - if v1beta2.VolumeTypesProvisioned.Has(string(r.Spec.AWSLaunchTemplate.RootVolume.Type)) && r.Spec.AWSLaunchTemplate.RootVolume.IOPS == 0 { + if infrav1.VolumeTypesProvisioned.Has(string(r.Spec.AWSLaunchTemplate.RootVolume.Type)) && r.Spec.AWSLaunchTemplate.RootVolume.IOPS == 0 { allErrs = append(allErrs, field.Required(field.NewPath("spec.awsLaunchTemplate.rootVolume.iops"), "iops required if type is 'io1' or 'io2'")) } if r.Spec.AWSLaunchTemplate.RootVolume.Throughput != nil { - if r.Spec.AWSLaunchTemplate.RootVolume.Type != v1beta2.VolumeTypeGP3 { + if r.Spec.AWSLaunchTemplate.RootVolume.Type != infrav1.VolumeTypeGP3 { allErrs = append(allErrs, field.Required(field.NewPath("spec.awsLaunchTemplate.rootVolume.throughput"), "throughput is valid only for type 'gp3'")) } if *r.Spec.AWSLaunchTemplate.RootVolume.Throughput < 0 { @@ -116,6 +117,22 @@ func (r *AWSMachinePool) validateSpotInstances() field.ErrorList { return allErrs } +func (r *AWSMachinePool) ignitionEnabled() bool { + return r.Spec.Ignition != nil +} + +func (r *AWSMachinePool) validateIgnition() field.ErrorList { + var allErrs field.ErrorList + + // Feature gate is not enabled but ignition is enabled then send a forbidden error. + if !feature.Gates.Enabled(feature.BootstrapFormatIgnition) && r.ignitionEnabled() { + allErrs = append(allErrs, field.Forbidden(field.NewPath("spec", "ignition"), + "can be set only if the BootstrapFormatIgnition feature gate is enabled")) + } + + return allErrs +} + // ValidateCreate will do any extra validation when creating a AWSMachinePool. func (r *AWSMachinePool) ValidateCreate() (admission.Warnings, error) { log.Info("AWSMachinePool validate create", "machine-pool", klog.KObj(r)) @@ -128,6 +145,7 @@ func (r *AWSMachinePool) ValidateCreate() (admission.Warnings, error) { allErrs = append(allErrs, r.validateSubnets()...) allErrs = append(allErrs, r.validateAdditionalSecurityGroups()...) allErrs = append(allErrs, r.validateSpotInstances()...) + allErrs = append(allErrs, r.validateIgnition()...) if len(allErrs) == 0 { return nil, nil @@ -177,4 +195,8 @@ func (r *AWSMachinePool) Default() { log.Info("DefaultInstanceWarmup is zero, setting 300 seconds as default") r.Spec.DefaultInstanceWarmup.Duration = 300 * time.Second } + + if r.ignitionEnabled() && r.Spec.Ignition.Version == "" { + r.Spec.Ignition.Version = infrav1.DefaultIgnitionVersion + } } diff --git a/exp/api/v1beta2/zz_generated.deepcopy.go b/exp/api/v1beta2/zz_generated.deepcopy.go index 7048d53196..e4a9c0988c 100644 --- a/exp/api/v1beta2/zz_generated.deepcopy.go +++ b/exp/api/v1beta2/zz_generated.deepcopy.go @@ -276,6 +276,11 @@ func (in *AWSMachinePoolSpec) DeepCopyInto(out *AWSMachinePoolSpec) { *out = new(SuspendProcessesTypes) (*in).DeepCopyInto(*out) } + if in.Ignition != nil { + in, out := &in.Ignition, &out.Ignition + *out = new(apiv1beta2.Ignition) + **out = **in + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new AWSMachinePoolSpec. diff --git a/exp/controllers/awsmachinepool_controller.go b/exp/controllers/awsmachinepool_controller.go index 741cdcdb10..66a20dee71 100644 --- a/exp/controllers/awsmachinepool_controller.go +++ b/exp/controllers/awsmachinepool_controller.go @@ -46,6 +46,7 @@ import ( "sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud/services" asg "sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud/services/autoscaling" "sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud/services/ec2" + "sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud/services/s3" "sigs.k8s.io/cluster-api-provider-aws/v2/pkg/logger" clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" expclusterv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1" @@ -63,6 +64,7 @@ type AWSMachinePoolReconciler struct { asgServiceFactory func(cloud.ClusterScoper) services.ASGInterface ec2ServiceFactory func(scope.EC2Scope) services.EC2Interface reconcileServiceFactory func(scope.EC2Scope) services.MachinePoolReconcileInterface + objectStoreServiceFactory func(scope.S3Scope) services.ObjectStoreInterface TagUnmanagedNetworkResources bool } @@ -89,6 +91,19 @@ func (r *AWSMachinePoolReconciler) getReconcileService(scope scope.EC2Scope) ser return ec2.NewService(scope) } +func (r *AWSMachinePoolReconciler) getObjectStoreService(scope scope.S3Scope) services.ObjectStoreInterface { + if scope.Bucket() == nil { + // S3 bucket usage not enabled, so object store service not needed + return nil + } + + if r.objectStoreServiceFactory != nil { + return r.objectStoreServiceFactory(scope) + } + + return s3.NewService(scope) +} + // +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=awsmachinepools,verbs=get;list;watch;update;patch;delete // +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=awsmachinepools/status,verbs=get;update;patch // +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machinepools;machinepools/status,verbs=get;list;watch;patch @@ -130,7 +145,7 @@ func (r *AWSMachinePoolReconciler) Reconcile(ctx context.Context, req ctrl.Reque log = log.WithValues("cluster", klog.KObj(cluster)) - infraCluster, err := r.getInfraCluster(ctx, log, cluster, awsMachinePool) + infraCluster, s3Scope, err := r.getInfraCluster(ctx, log, cluster, awsMachinePool) if err != nil { return ctrl.Result{}, fmt.Errorf("getting infra provider cluster or control plane object: %w", err) } @@ -178,13 +193,13 @@ func (r *AWSMachinePoolReconciler) Reconcile(ctx context.Context, req ctrl.Reque return ctrl.Result{}, r.reconcileDelete(machinePoolScope, infraScope, infraScope) } - return ctrl.Result{}, r.reconcileNormal(ctx, machinePoolScope, infraScope, infraScope) + return r.reconcileNormal(ctx, machinePoolScope, infraScope, infraScope, s3Scope) case *scope.ClusterScope: if !awsMachinePool.ObjectMeta.DeletionTimestamp.IsZero() { return ctrl.Result{}, r.reconcileDelete(machinePoolScope, infraScope, infraScope) } - return ctrl.Result{}, r.reconcileNormal(ctx, machinePoolScope, infraScope, infraScope) + return r.reconcileNormal(ctx, machinePoolScope, infraScope, infraScope, s3Scope) default: return ctrl.Result{}, errors.New("infraCluster has unknown type") } @@ -202,7 +217,7 @@ func (r *AWSMachinePoolReconciler) SetupWithManager(ctx context.Context, mgr ctr Complete(r) } -func (r *AWSMachinePoolReconciler) reconcileNormal(ctx context.Context, machinePoolScope *scope.MachinePoolScope, clusterScope cloud.ClusterScoper, ec2Scope scope.EC2Scope) error { +func (r *AWSMachinePoolReconciler) reconcileNormal(ctx context.Context, machinePoolScope *scope.MachinePoolScope, clusterScope cloud.ClusterScoper, ec2Scope scope.EC2Scope, s3Scope scope.S3Scope) (ctrl.Result, error) { clusterScope.Info("Reconciling AWSMachinePool") // If the AWSMachine is in an error state, return early. @@ -211,52 +226,57 @@ func (r *AWSMachinePoolReconciler) reconcileNormal(ctx context.Context, machineP // TODO: If we are in a failed state, delete the secret regardless of instance state - return nil + return ctrl.Result{}, nil } // If the AWSMachinepool doesn't have our finalizer, add it if controllerutil.AddFinalizer(machinePoolScope.AWSMachinePool, expinfrav1.MachinePoolFinalizer) { // Register finalizer immediately to avoid orphaning AWS resources if err := machinePoolScope.PatchObject(); err != nil { - return err + return ctrl.Result{}, err } } if !machinePoolScope.Cluster.Status.InfrastructureReady { machinePoolScope.Info("Cluster infrastructure is not ready yet") conditions.MarkFalse(machinePoolScope.AWSMachinePool, expinfrav1.ASGReadyCondition, infrav1.WaitingForClusterInfrastructureReason, clusterv1.ConditionSeverityInfo, "") - return nil + return ctrl.Result{}, nil } // Make sure bootstrap data is available and populated if machinePoolScope.MachinePool.Spec.Template.Spec.Bootstrap.DataSecretName == nil { machinePoolScope.Info("Bootstrap data secret reference is not yet available") conditions.MarkFalse(machinePoolScope.AWSMachinePool, expinfrav1.ASGReadyCondition, infrav1.WaitingForBootstrapDataReason, clusterv1.ConditionSeverityInfo, "") - return nil + return ctrl.Result{}, nil } ec2Svc := r.getEC2Service(ec2Scope) asgsvc := r.getASGService(clusterScope) reconSvc := r.getReconcileService(ec2Scope) + objectStoreSvc := r.getObjectStoreService(s3Scope) // Find existing ASG asg, err := r.findASG(machinePoolScope, asgsvc) if err != nil { conditions.MarkUnknown(machinePoolScope.AWSMachinePool, expinfrav1.ASGReadyCondition, expinfrav1.ASGNotFoundReason, err.Error()) - return err + return ctrl.Result{}, err } - canUpdateLaunchTemplate := func() (bool, error) { + canStartInstanceRefresh := func() (bool, *string, error) { // If there is a change: before changing the template, check if there exist an ongoing instance refresh, // because only 1 instance refresh can be "InProgress". If template is updated when refresh cannot be started, // that change will not trigger a refresh. Do not start an instance refresh if only userdata changed. if asg == nil { // If the ASG hasn't been created yet, there is no need to check if we can start the instance refresh. // But we want to update the LaunchTemplate because an error in the LaunchTemplate may be blocking the ASG creation. - return true, nil + return true, nil, nil } return asgsvc.CanStartASGInstanceRefresh(machinePoolScope) } + cancelInstanceRefresh := func() error { + machinePoolScope.Info("cancelling instance refresh") + return asgsvc.CancelASGInstanceRefresh(machinePoolScope) + } runPostLaunchTemplateUpdateOperation := func() error { // skip instance refresh if ASG is not created yet if asg == nil { @@ -280,10 +300,14 @@ func (r *AWSMachinePoolReconciler) reconcileNormal(ctx context.Context, machineP machinePoolScope.Info("starting instance refresh", "number of instances", machinePoolScope.MachinePool.Spec.Replicas) return asgsvc.StartASGInstanceRefresh(machinePoolScope) } - if err := reconSvc.ReconcileLaunchTemplate(machinePoolScope, ec2Svc, canUpdateLaunchTemplate, runPostLaunchTemplateUpdateOperation); err != nil { + res, err := reconSvc.ReconcileLaunchTemplate(machinePoolScope, machinePoolScope, s3Scope, ec2Svc, objectStoreSvc, canStartInstanceRefresh, cancelInstanceRefresh, runPostLaunchTemplateUpdateOperation) + if err != nil { r.Recorder.Eventf(machinePoolScope.AWSMachinePool, corev1.EventTypeWarning, "FailedLaunchTemplateReconcile", "Failed to reconcile launch template: %v", err) machinePoolScope.Error(err, "failed to reconcile launch template") - return err + return ctrl.Result{}, err + } + if res != nil { + return *res, nil } // set the LaunchTemplateReady condition @@ -293,9 +317,9 @@ func (r *AWSMachinePoolReconciler) reconcileNormal(ctx context.Context, machineP // Create new ASG if err := r.createPool(machinePoolScope, clusterScope); err != nil { conditions.MarkFalse(machinePoolScope.AWSMachinePool, expinfrav1.ASGReadyCondition, expinfrav1.ASGProvisionFailedReason, clusterv1.ConditionSeverityError, err.Error()) - return err + return ctrl.Result{}, err } - return nil + return ctrl.Result{}, nil } if annotations.ReplicasManagedByExternalAutoscaler(machinePoolScope.MachinePool) { @@ -306,14 +330,14 @@ func (r *AWSMachinePoolReconciler) reconcileNormal(ctx context.Context, machineP "external", asg.DesiredCapacity) machinePoolScope.MachinePool.Spec.Replicas = asg.DesiredCapacity if err := machinePoolScope.PatchCAPIMachinePoolObject(ctx); err != nil { - return err + return ctrl.Result{}, err } } } if err := r.updatePool(machinePoolScope, clusterScope, asg); err != nil { machinePoolScope.Error(err, "error updating AWSMachinePool") - return err + return ctrl.Result{}, err } launchTemplateID := machinePoolScope.GetLaunchTemplateIDStatus() @@ -330,7 +354,7 @@ func (r *AWSMachinePoolReconciler) reconcileNormal(ctx context.Context, machineP } err = reconSvc.ReconcileTags(machinePoolScope, resourceServiceToUpdate) if err != nil { - return errors.Wrap(err, "error updating tags") + return ctrl.Result{}, errors.Wrap(err, "error updating tags") } // Make sure Spec.ProviderID is always set. @@ -353,7 +377,7 @@ func (r *AWSMachinePoolReconciler) reconcileNormal(ctx context.Context, machineP machinePoolScope.Error(err, "failed updating instances", "instances", asg.Instances) } - return nil + return ctrl.Result{}, nil } func (r *AWSMachinePoolReconciler) reconcileDelete(machinePoolScope *scope.MachinePoolScope, clusterScope cloud.ClusterScoper, ec2Scope scope.EC2Scope) error { @@ -389,7 +413,7 @@ func (r *AWSMachinePoolReconciler) reconcileDelete(machinePoolScope *scope.Machi } launchTemplateID := machinePoolScope.AWSMachinePool.Status.LaunchTemplateID - launchTemplate, _, _, err := ec2Svc.GetLaunchTemplate(machinePoolScope.LaunchTemplateName()) + launchTemplate, _, _, _, err := ec2Svc.GetLaunchTemplate(machinePoolScope.LaunchTemplateName()) if err != nil { return err } @@ -607,7 +631,7 @@ func machinePoolToInfrastructureMapFunc(gvk schema.GroupVersionKind) handler.Map } } -func (r *AWSMachinePoolReconciler) getInfraCluster(ctx context.Context, log *logger.Logger, cluster *clusterv1.Cluster, awsMachinePool *expinfrav1.AWSMachinePool) (scope.EC2Scope, error) { +func (r *AWSMachinePoolReconciler) getInfraCluster(ctx context.Context, log *logger.Logger, cluster *clusterv1.Cluster, awsMachinePool *expinfrav1.AWSMachinePool) (scope.EC2Scope, scope.S3Scope, error) { var clusterScope *scope.ClusterScope var managedControlPlaneScope *scope.ManagedControlPlaneScope var err error @@ -621,7 +645,7 @@ func (r *AWSMachinePoolReconciler) getInfraCluster(ctx context.Context, log *log if err := r.Get(ctx, controlPlaneName, controlPlane); err != nil { // AWSManagedControlPlane is not ready - return nil, nil //nolint:nilerr + return nil, nil, nil //nolint:nilerr } managedControlPlaneScope, err = scope.NewManagedControlPlaneScope(scope.ManagedControlPlaneScopeParams{ @@ -633,10 +657,10 @@ func (r *AWSMachinePoolReconciler) getInfraCluster(ctx context.Context, log *log TagUnmanagedNetworkResources: r.TagUnmanagedNetworkResources, }) if err != nil { - return nil, err + return nil, nil, err } - return managedControlPlaneScope, nil + return managedControlPlaneScope, managedControlPlaneScope, nil } awsCluster := &infrav1.AWSCluster{} @@ -648,7 +672,7 @@ func (r *AWSMachinePoolReconciler) getInfraCluster(ctx context.Context, log *log if err := r.Client.Get(ctx, infraClusterName, awsCluster); err != nil { // AWSCluster is not ready - return nil, nil //nolint:nilerr + return nil, nil, nil //nolint:nilerr } // Create the cluster scope @@ -661,8 +685,8 @@ func (r *AWSMachinePoolReconciler) getInfraCluster(ctx context.Context, log *log TagUnmanagedNetworkResources: r.TagUnmanagedNetworkResources, }) if err != nil { - return nil, err + return nil, nil, err } - return clusterScope, nil + return clusterScope, clusterScope, nil } diff --git a/exp/controllers/awsmachinepool_controller_test.go b/exp/controllers/awsmachinepool_controller_test.go index 4448fb94f3..2e86eb79c8 100644 --- a/exp/controllers/awsmachinepool_controller_test.go +++ b/exp/controllers/awsmachinepool_controller_test.go @@ -19,11 +19,15 @@ package controllers import ( "bytes" "context" + "encoding/base64" "flag" "fmt" "testing" "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/service/autoscaling" + "github.com/aws/aws-sdk-go/service/ec2" + "github.com/aws/aws-sdk-go/service/s3" "github.com/go-logr/logr" "github.com/golang/mock/gomock" . "github.com/onsi/gomega" @@ -43,6 +47,9 @@ import ( "sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud/scope" "sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud/services" "sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud/services/mock_services" + s3svc "sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud/services/s3" + "sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud/services/s3/mock_s3iface" + "sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud/services/s3/mock_stsiface" "sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud/services/userdata" "sigs.k8s.io/cluster-api-provider-aws/v2/pkg/logger" clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" @@ -61,6 +68,8 @@ func TestAWSMachinePoolReconciler(t *testing.T) { ec2Svc *mock_services.MockEC2Interface asgSvc *mock_services.MockASGInterface reconSvc *mock_services.MockMachinePoolReconcileInterface + s3Mock *mock_s3iface.MockS3API + stsMock *mock_stsiface.MockSTSAPI recorder *record.FakeRecorder awsMachinePool *expinfrav1.AWSMachinePool secret *corev1.Secret @@ -158,6 +167,8 @@ func TestAWSMachinePoolReconciler(t *testing.T) { ec2Svc = mock_services.NewMockEC2Interface(mockCtrl) asgSvc = mock_services.NewMockASGInterface(mockCtrl) reconSvc = mock_services.NewMockMachinePoolReconcileInterface(mockCtrl) + s3Mock = mock_s3iface.NewMockS3API(mockCtrl) + stsMock = mock_stsiface.NewMockSTSAPI(mockCtrl) // If the test hangs for 9 minutes, increase the value here to the number of events during a reconciliation loop recorder = record.NewFakeRecorder(2) @@ -172,6 +183,12 @@ func TestAWSMachinePoolReconciler(t *testing.T) { reconcileServiceFactory: func(scope.EC2Scope) services.MachinePoolReconcileInterface { return reconSvc }, + objectStoreServiceFactory: func(scope scope.S3Scope) services.ObjectStoreInterface { + svc := s3svc.NewService(scope) + svc.S3Client = s3Mock + svc.STSClient = stsMock + return svc + }, Recorder: recorder, } } @@ -195,7 +212,7 @@ func TestAWSMachinePoolReconciler(t *testing.T) { getASG := func(t *testing.T, g *WithT) { t.Helper() - ec2Svc.EXPECT().GetLaunchTemplate(gomock.Any()).Return(nil, "", nil, expectedErr).AnyTimes() + ec2Svc.EXPECT().GetLaunchTemplate(gomock.Any()).Return(nil, "", nil, nil, expectedErr).AnyTimes() asgSvc.EXPECT().GetASGByName(gomock.Any()).Return(nil, expectedErr).AnyTimes() } t.Run("should exit immediately on an error state", func(t *testing.T) { @@ -211,7 +228,7 @@ func TestAWSMachinePoolReconciler(t *testing.T) { buf := new(bytes.Buffer) klog.SetOutput(buf) - _ = reconciler.reconcileNormal(context.Background(), ms, cs, cs) + _, _ = reconciler.reconcileNormal(context.Background(), ms, cs, cs, cs) g.Expect(buf).To(ContainSubstring("Error state detected, skipping reconciliation")) }) t.Run("should add our finalizer to the machinepool", func(t *testing.T) { @@ -220,7 +237,7 @@ func TestAWSMachinePoolReconciler(t *testing.T) { defer teardown(t, g) getASG(t, g) - _ = reconciler.reconcileNormal(context.Background(), ms, cs, cs) + _, _ = reconciler.reconcileNormal(context.Background(), ms, cs, cs, cs) g.Expect(ms.AWSMachinePool.Finalizers).To(ContainElement(expinfrav1.MachinePoolFinalizer)) }) @@ -235,7 +252,7 @@ func TestAWSMachinePoolReconciler(t *testing.T) { buf := new(bytes.Buffer) klog.SetOutput(buf) - err := reconciler.reconcileNormal(context.Background(), ms, cs, cs) + _, err := reconciler.reconcileNormal(context.Background(), ms, cs, cs, cs) g.Expect(err).To(BeNil()) g.Expect(buf.String()).To(ContainSubstring("Cluster infrastructure is not ready yet")) expectConditions(g, ms.AWSMachinePool, []conditionAssertion{{expinfrav1.ASGReadyCondition, corev1.ConditionFalse, clusterv1.ConditionSeverityInfo, infrav1.WaitingForClusterInfrastructureReason}}) @@ -250,7 +267,7 @@ func TestAWSMachinePoolReconciler(t *testing.T) { buf := new(bytes.Buffer) klog.SetOutput(buf) - err := reconciler.reconcileNormal(context.Background(), ms, cs, cs) + _, err := reconciler.reconcileNormal(context.Background(), ms, cs, cs, cs) g.Expect(err).To(BeNil()) g.Expect(buf.String()).To(ContainSubstring("Bootstrap data secret reference is not yet available")) @@ -266,7 +283,7 @@ func TestAWSMachinePoolReconciler(t *testing.T) { getASG := func(t *testing.T, g *WithT) { t.Helper() - ec2Svc.EXPECT().GetLaunchTemplate(gomock.Any()).Return(nil, "", nil, nil).AnyTimes() + ec2Svc.EXPECT().GetLaunchTemplate(gomock.Any()).Return(nil, "", nil, nil, nil).AnyTimes() asgSvc.EXPECT().GetASGByName(gomock.Any()).Return(nil, nil).AnyTimes() } t.Run("should look up by provider ID when one exists", func(t *testing.T) { @@ -277,8 +294,8 @@ func TestAWSMachinePoolReconciler(t *testing.T) { getASG(t, g) expectedErr := errors.New("no connection available ") - reconSvc.EXPECT().ReconcileLaunchTemplate(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(expectedErr) - err := reconciler.reconcileNormal(context.Background(), ms, cs, cs) + reconSvc.EXPECT().ReconcileLaunchTemplate(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(nil, expectedErr) + _, err := reconciler.reconcileNormal(context.Background(), ms, cs, cs, cs) g.Expect(errors.Cause(err)).To(MatchError(expectedErr)) }) }) @@ -298,14 +315,14 @@ func TestAWSMachinePoolReconciler(t *testing.T) { defer teardown(t, g) setSuspendedProcesses(t, g) - reconSvc.EXPECT().ReconcileLaunchTemplate(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(nil) + reconSvc.EXPECT().ReconcileLaunchTemplate(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(nil, nil) asgSvc.EXPECT().GetASGByName(gomock.Any()).Return(nil, nil) asgSvc.EXPECT().CreateASG(gomock.Any()).Return(&expinfrav1.AutoScalingGroup{ Name: "name", }, nil) asgSvc.EXPECT().SuspendProcesses("name", []string{"Launch", "Terminate"}).Return(nil).AnyTimes().Times(0) - err := reconciler.reconcileNormal(context.Background(), ms, cs, cs) + _, err := reconciler.reconcileNormal(context.Background(), ms, cs, cs, cs) g.Expect(err).To(Succeed()) }) }) @@ -322,7 +339,7 @@ func TestAWSMachinePoolReconciler(t *testing.T) { defer teardown(t, g) setSuspendedProcesses(t, g) ms.AWSMachinePool.Spec.SuspendProcesses.All = true - reconSvc.EXPECT().ReconcileLaunchTemplate(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(nil) + reconSvc.EXPECT().ReconcileLaunchTemplate(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(nil, nil) reconSvc.EXPECT().ReconcileTags(gomock.Any(), gomock.Any()).Return(nil) asgSvc.EXPECT().GetASGByName(gomock.Any()).Return(&expinfrav1.AutoScalingGroup{ Name: "name", @@ -341,7 +358,7 @@ func TestAWSMachinePoolReconciler(t *testing.T) { "ReplaceUnhealthy", })).Return(nil).AnyTimes().Times(1) - err := reconciler.reconcileNormal(context.Background(), ms, cs, cs) + _, err := reconciler.reconcileNormal(context.Background(), ms, cs, cs, cs) g.Expect(err).To(Succeed()) }) }) @@ -362,7 +379,7 @@ func TestAWSMachinePoolReconciler(t *testing.T) { defer teardown(t, g) setSuspendedProcesses(t, g) - reconSvc.EXPECT().ReconcileLaunchTemplate(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(nil) + reconSvc.EXPECT().ReconcileLaunchTemplate(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(nil, nil) reconSvc.EXPECT().ReconcileTags(gomock.Any(), gomock.Any()).Return(nil) asgSvc.EXPECT().GetASGByName(gomock.Any()).Return(&expinfrav1.AutoScalingGroup{ Name: "name", @@ -373,7 +390,7 @@ func TestAWSMachinePoolReconciler(t *testing.T) { asgSvc.EXPECT().SuspendProcesses("name", []string{"Terminate"}).Return(nil).AnyTimes().Times(1) asgSvc.EXPECT().ResumeProcesses("name", []string{"process3"}).Return(nil).AnyTimes().Times(1) - err := reconciler.reconcileNormal(context.Background(), ms, cs, cs) + _, err := reconciler.reconcileNormal(context.Background(), ms, cs, cs, cs) g.Expect(err).To(Succeed()) }) }) @@ -387,7 +404,7 @@ func TestAWSMachinePoolReconciler(t *testing.T) { Name: "an-asg", DesiredCapacity: ptr.To[int32](1), } - reconSvc.EXPECT().ReconcileLaunchTemplate(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(nil) + reconSvc.EXPECT().ReconcileLaunchTemplate(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(nil, nil) asgSvc.EXPECT().GetASGByName(gomock.Any()).Return(&asg, nil) asgSvc.EXPECT().SubnetIDs(gomock.Any()).Return([]string{}, nil) asgSvc.EXPECT().UpdateASG(gomock.Any()).Return(nil) @@ -400,7 +417,7 @@ func TestAWSMachinePoolReconciler(t *testing.T) { g.Expect(testEnv.Create(ctx, ms.MachinePool)).To(Succeed()) - _ = reconciler.reconcileNormal(context.Background(), ms, cs, cs) + _, _ = reconciler.reconcileNormal(context.Background(), ms, cs, cs, cs) g.Expect(*ms.MachinePool.Spec.Replicas).To(Equal(int32(1))) }) t.Run("No need to update Asg because asgNeedsUpdates is false and no subnets change", func(t *testing.T) { @@ -425,13 +442,13 @@ func TestAWSMachinePoolReconciler(t *testing.T) { }, }, Subnets: []string{"subnet1", "subnet2"}} - reconSvc.EXPECT().ReconcileLaunchTemplate(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(nil) + reconSvc.EXPECT().ReconcileLaunchTemplate(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(nil, nil) reconSvc.EXPECT().ReconcileTags(gomock.Any(), gomock.Any()).Return(nil) asgSvc.EXPECT().GetASGByName(gomock.Any()).Return(&asg, nil).AnyTimes() asgSvc.EXPECT().SubnetIDs(gomock.Any()).Return([]string{"subnet2", "subnet1"}, nil).Times(1) asgSvc.EXPECT().UpdateASG(gomock.Any()).Return(nil).Times(0) - err := reconciler.reconcileNormal(context.Background(), ms, cs, cs) + _, err := reconciler.reconcileNormal(context.Background(), ms, cs, cs, cs) g.Expect(err).To(Succeed()) }) t.Run("update Asg due to subnet changes", func(t *testing.T) { @@ -443,13 +460,13 @@ func TestAWSMachinePoolReconciler(t *testing.T) { MinSize: int32(0), MaxSize: int32(100), Subnets: []string{"subnet1", "subnet2"}} - reconSvc.EXPECT().ReconcileLaunchTemplate(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(nil) + reconSvc.EXPECT().ReconcileLaunchTemplate(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(nil, nil) reconSvc.EXPECT().ReconcileTags(gomock.Any(), gomock.Any()).Return(nil) asgSvc.EXPECT().GetASGByName(gomock.Any()).Return(&asg, nil).AnyTimes() asgSvc.EXPECT().SubnetIDs(gomock.Any()).Return([]string{"subnet1"}, nil).Times(1) asgSvc.EXPECT().UpdateASG(gomock.Any()).Return(nil).Times(1) - err := reconciler.reconcileNormal(context.Background(), ms, cs, cs) + _, err := reconciler.reconcileNormal(context.Background(), ms, cs, cs, cs) g.Expect(err).To(Succeed()) }) t.Run("update Asg due to asgNeedsUpdates returns true", func(t *testing.T) { @@ -461,29 +478,29 @@ func TestAWSMachinePoolReconciler(t *testing.T) { MinSize: int32(0), MaxSize: int32(2), Subnets: []string{}} - reconSvc.EXPECT().ReconcileLaunchTemplate(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(nil) + reconSvc.EXPECT().ReconcileLaunchTemplate(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(nil, nil) reconSvc.EXPECT().ReconcileTags(gomock.Any(), gomock.Any()).Return(nil) asgSvc.EXPECT().GetASGByName(gomock.Any()).Return(&asg, nil).AnyTimes() asgSvc.EXPECT().SubnetIDs(gomock.Any()).Return([]string{}, nil).Times(1) asgSvc.EXPECT().UpdateASG(gomock.Any()).Return(nil).Times(1) - err := reconciler.reconcileNormal(context.Background(), ms, cs, cs) + _, err := reconciler.reconcileNormal(context.Background(), ms, cs, cs, cs) g.Expect(err).To(Succeed()) }) t.Run("ReconcileLaunchTemplate not mocked", func(t *testing.T) { - g := NewWithT(t) - setup(t, g) - reconciler.reconcileServiceFactory = nil // use real implementation, but keep EC2 calls mocked (`ec2ServiceFactory`) - reconSvc = nil // not used - defer teardown(t, g) - launchTemplateIDExisting := "lt-existing" t.Run("nothing exists, so launch template and ASG must be created", func(t *testing.T) { - ec2Svc.EXPECT().GetLaunchTemplate(gomock.Eq("test")).Return(nil, "", nil, nil) + g := NewWithT(t) + setup(t, g) + reconciler.reconcileServiceFactory = nil // use real implementation, but keep EC2 calls mocked (`ec2ServiceFactory`) + reconSvc = nil // not used + defer teardown(t, g) + + ec2Svc.EXPECT().GetLaunchTemplate(gomock.Eq("test")).Return(nil, "", nil, nil, nil) ec2Svc.EXPECT().DiscoverLaunchTemplateAMI(gomock.Any()).Return(ptr.To[string]("ami-abcdef123"), nil) - ec2Svc.EXPECT().CreateLaunchTemplate(gomock.Any(), gomock.Eq(ptr.To[string]("ami-abcdef123")), gomock.Eq(userDataSecretKey), gomock.Eq([]byte("shell-script"))).Return("lt-ghijkl456", nil) + ec2Svc.EXPECT().CreateLaunchTemplate(gomock.Any(), gomock.Eq(ptr.To[string]("ami-abcdef123")), gomock.Eq(userDataSecretKey), gomock.Eq([]byte("shell-script")), gomock.Eq(userdata.ComputeHash([]byte("shell-script")))).Return("lt-ghijkl456", nil) asgSvc.EXPECT().GetASGByName(gomock.Any()).Return(nil, nil) asgSvc.EXPECT().CreateASG(gomock.Any()).DoAndReturn(func(scope *scope.MachinePoolScope) (*expinfrav1.AutoScalingGroup, error) { g.Expect(scope.Name()).To(Equal("test")) @@ -492,11 +509,17 @@ func TestAWSMachinePoolReconciler(t *testing.T) { }, nil }) - err := reconciler.reconcileNormal(context.Background(), ms, cs, cs) + _, err := reconciler.reconcileNormal(context.Background(), ms, cs, cs, cs) g.Expect(err).To(Succeed()) }) t.Run("launch template and ASG exist and need no update", func(t *testing.T) { + g := NewWithT(t) + setup(t, g) + reconciler.reconcileServiceFactory = nil // use real implementation, but keep EC2 calls mocked (`ec2ServiceFactory`) + reconSvc = nil // not used + defer teardown(t, g) + // Latest ID and version already stored, no need to retrieve it ms.AWSMachinePool.Status.LaunchTemplateID = launchTemplateIDExisting ms.AWSMachinePool.Status.LaunchTemplateVersion = ptr.To[string]("1") @@ -511,6 +534,7 @@ func TestAWSMachinePoolReconciler(t *testing.T) { // No change to user data userdata.ComputeHash([]byte("shell-script")), &userDataSecretKey, + nil, nil) ec2Svc.EXPECT().DiscoverLaunchTemplateAMI(gomock.Any()).Return(ptr.To[string]("ami-existing"), nil) // no change ec2Svc.EXPECT().LaunchTemplateNeedsUpdate(gomock.Any(), gomock.Any(), gomock.Any()).Return(false, nil) @@ -533,11 +557,17 @@ func TestAWSMachinePoolReconciler(t *testing.T) { // No changes, so there must not be an ASG update! asgSvc.EXPECT().UpdateASG(gomock.Any()).Times(0) - err := reconciler.reconcileNormal(context.Background(), ms, cs, cs) + _, err := reconciler.reconcileNormal(context.Background(), ms, cs, cs, cs) g.Expect(err).To(Succeed()) }) t.Run("launch template and ASG exist and only AMI ID changed", func(t *testing.T) { + g := NewWithT(t) + setup(t, g) + reconciler.reconcileServiceFactory = nil // use real implementation, but keep EC2 calls mocked (`ec2ServiceFactory`) + reconSvc = nil // not used + defer teardown(t, g) + // Latest ID and version already stored, no need to retrieve it ms.AWSMachinePool.Status.LaunchTemplateID = launchTemplateIDExisting ms.AWSMachinePool.Status.LaunchTemplateVersion = ptr.To[string]("1") @@ -552,12 +582,13 @@ func TestAWSMachinePoolReconciler(t *testing.T) { // No change to user data userdata.ComputeHash([]byte("shell-script")), &userDataSecretKey, + nil, nil) ec2Svc.EXPECT().DiscoverLaunchTemplateAMI(gomock.Any()).Return(ptr.To[string]("ami-different"), nil) ec2Svc.EXPECT().LaunchTemplateNeedsUpdate(gomock.Any(), gomock.Any(), gomock.Any()).Return(false, nil) - asgSvc.EXPECT().CanStartASGInstanceRefresh(gomock.Any()).Return(true, nil) - ec2Svc.EXPECT().PruneLaunchTemplateVersions(gomock.Any()).Return(nil) - ec2Svc.EXPECT().CreateLaunchTemplateVersion(gomock.Any(), gomock.Any(), gomock.Eq(ptr.To[string]("ami-different")), gomock.Eq(apimachinerytypes.NamespacedName{Namespace: "default", Name: "bootstrap-data"}), gomock.Any()).Return(nil) + asgSvc.EXPECT().CanStartASGInstanceRefresh(gomock.Any()).Return(true, nil, nil) + ec2Svc.EXPECT().PruneLaunchTemplateVersions(gomock.Any()).Return(nil, nil) + ec2Svc.EXPECT().CreateLaunchTemplateVersion(gomock.Any(), gomock.Any(), gomock.Eq(ptr.To[string]("ami-different")), gomock.Eq(apimachinerytypes.NamespacedName{Namespace: "default", Name: "bootstrap-data"}), gomock.Any(), gomock.Any()).Return(nil) ec2Svc.EXPECT().GetLaunchTemplateLatestVersion(gomock.Any()).Return("2", nil) // AMI change should trigger rolling out new nodes asgSvc.EXPECT().StartASGInstanceRefresh(gomock.Any()) @@ -580,11 +611,17 @@ func TestAWSMachinePoolReconciler(t *testing.T) { // No changes, so there must not be an ASG update! asgSvc.EXPECT().UpdateASG(gomock.Any()).Times(0) - err := reconciler.reconcileNormal(context.Background(), ms, cs, cs) + _, err := reconciler.reconcileNormal(context.Background(), ms, cs, cs, cs) g.Expect(err).To(Succeed()) }) t.Run("launch template and ASG exist and only bootstrap data secret name changed", func(t *testing.T) { + g := NewWithT(t) + setup(t, g) + reconciler.reconcileServiceFactory = nil // use real implementation, but keep EC2 calls mocked (`ec2ServiceFactory`) + reconSvc = nil // not used + defer teardown(t, g) + // Latest ID and version already stored, no need to retrieve it ms.AWSMachinePool.Status.LaunchTemplateID = launchTemplateIDExisting ms.AWSMachinePool.Status.LaunchTemplateVersion = ptr.To[string]("1") @@ -600,12 +637,13 @@ func TestAWSMachinePoolReconciler(t *testing.T) { userdata.ComputeHash([]byte("shell-script")), // But the name of the secret changes from `previous-secret-name` to `bootstrap-data` &apimachinerytypes.NamespacedName{Namespace: "default", Name: "previous-secret-name"}, + nil, nil) ec2Svc.EXPECT().DiscoverLaunchTemplateAMI(gomock.Any()).Return(ptr.To[string]("ami-existing"), nil) ec2Svc.EXPECT().LaunchTemplateNeedsUpdate(gomock.Any(), gomock.Any(), gomock.Any()).Return(false, nil) - asgSvc.EXPECT().CanStartASGInstanceRefresh(gomock.Any()).Return(true, nil) - ec2Svc.EXPECT().PruneLaunchTemplateVersions(gomock.Any()).Return(nil) - ec2Svc.EXPECT().CreateLaunchTemplateVersion(gomock.Any(), gomock.Any(), gomock.Eq(ptr.To[string]("ami-existing")), gomock.Eq(apimachinerytypes.NamespacedName{Namespace: "default", Name: "bootstrap-data"}), gomock.Any()).Return(nil) + asgSvc.EXPECT().CanStartASGInstanceRefresh(gomock.Any()).Return(true, nil, nil) + ec2Svc.EXPECT().PruneLaunchTemplateVersions(gomock.Any()).Return(nil, nil) + ec2Svc.EXPECT().CreateLaunchTemplateVersion(gomock.Any(), gomock.Any(), gomock.Eq(ptr.To[string]("ami-existing")), gomock.Eq(apimachinerytypes.NamespacedName{Namespace: "default", Name: "bootstrap-data"}), gomock.Any(), gomock.Any()).Return(nil) ec2Svc.EXPECT().GetLaunchTemplateLatestVersion(gomock.Any()).Return("2", nil) // Changing the bootstrap data secret name should trigger rolling out new nodes, no matter what the // content (user data) is. This way, users can enforce a rollout by changing the bootstrap config @@ -630,14 +668,20 @@ func TestAWSMachinePoolReconciler(t *testing.T) { // No changes, so there must not be an ASG update! asgSvc.EXPECT().UpdateASG(gomock.Any()).Times(0) - err := reconciler.reconcileNormal(context.Background(), ms, cs, cs) + _, err := reconciler.reconcileNormal(context.Background(), ms, cs, cs, cs) g.Expect(err).To(Succeed()) }) t.Run("launch template and ASG created from zero, then bootstrap config reference changes", func(t *testing.T) { - ec2Svc.EXPECT().GetLaunchTemplate(gomock.Eq("test")).Return(nil, "", nil, nil) + g := NewWithT(t) + setup(t, g) + reconciler.reconcileServiceFactory = nil // use real implementation, but keep EC2 calls mocked (`ec2ServiceFactory`) + reconSvc = nil // not used + defer teardown(t, g) + + ec2Svc.EXPECT().GetLaunchTemplate(gomock.Eq("test")).Return(nil, "", nil, nil, nil) ec2Svc.EXPECT().DiscoverLaunchTemplateAMI(gomock.Any()).Return(ptr.To[string]("ami-abcdef123"), nil) - ec2Svc.EXPECT().CreateLaunchTemplate(gomock.Any(), gomock.Eq(ptr.To[string]("ami-abcdef123")), gomock.Eq(userDataSecretKey), gomock.Eq([]byte("shell-script"))).Return("lt-ghijkl456", nil) + ec2Svc.EXPECT().CreateLaunchTemplate(gomock.Any(), gomock.Eq(ptr.To[string]("ami-abcdef123")), gomock.Eq(userDataSecretKey), gomock.Eq([]byte("shell-script")), gomock.Eq(userdata.ComputeHash([]byte("shell-script")))).Return("lt-ghijkl456", nil) asgSvc.EXPECT().GetASGByName(gomock.Any()).Return(nil, nil) asgSvc.EXPECT().CreateASG(gomock.Any()).DoAndReturn(func(scope *scope.MachinePoolScope) (*expinfrav1.AutoScalingGroup, error) { g.Expect(scope.Name()).To(Equal("test")) @@ -646,11 +690,10 @@ func TestAWSMachinePoolReconciler(t *testing.T) { }, nil }) - err := reconciler.reconcileNormal(context.Background(), ms, cs, cs) + _, err := reconciler.reconcileNormal(context.Background(), ms, cs, cs, cs) g.Expect(err).To(Succeed()) g.Expect(ms.AWSMachinePool.Status.LaunchTemplateID).ToNot(BeEmpty()) - g.Expect(ptr.Deref[string](ms.AWSMachinePool.Status.LaunchTemplateVersion, "")).ToNot(BeEmpty()) // Data secret name changes newBootstrapSecret := &corev1.Secret{ @@ -665,6 +708,10 @@ func TestAWSMachinePoolReconciler(t *testing.T) { g.Expect(testEnv.Create(ctx, newBootstrapSecret)).To(Succeed()) ms.MachinePool.Spec.Template.Spec.Bootstrap.DataSecretName = ptr.To[string](newBootstrapSecret.Name) + // Since `AWSMachinePool.status.launchTemplateVersion` isn't set yet, + // the controller will ask for the current version and then set the status. + ec2Svc.EXPECT().GetLaunchTemplateLatestVersion(gomock.Any()).Return("1", nil) + ec2Svc.EXPECT().GetLaunchTemplate(gomock.Eq("test")).Return( &expinfrav1.AWSLaunchTemplate{ Name: "test", @@ -675,12 +722,13 @@ func TestAWSMachinePoolReconciler(t *testing.T) { // No change to user data content userdata.ComputeHash([]byte("shell-script")), &apimachinerytypes.NamespacedName{Namespace: "default", Name: "bootstrap-data"}, + nil, nil) ec2Svc.EXPECT().DiscoverLaunchTemplateAMI(gomock.Any()).Return(ptr.To[string]("ami-existing"), nil) ec2Svc.EXPECT().LaunchTemplateNeedsUpdate(gomock.Any(), gomock.Any(), gomock.Any()).Return(false, nil) - asgSvc.EXPECT().CanStartASGInstanceRefresh(gomock.Any()).Return(true, nil) - ec2Svc.EXPECT().PruneLaunchTemplateVersions(gomock.Any()).Return(nil) - ec2Svc.EXPECT().CreateLaunchTemplateVersion(gomock.Any(), gomock.Any(), gomock.Eq(ptr.To[string]("ami-existing")), gomock.Eq(apimachinerytypes.NamespacedName{Namespace: "default", Name: "bootstrap-data-new"}), gomock.Any()).Return(nil) + asgSvc.EXPECT().CanStartASGInstanceRefresh(gomock.Any()).Return(true, nil, nil) + ec2Svc.EXPECT().PruneLaunchTemplateVersions(gomock.Any()).Return(nil, nil) + ec2Svc.EXPECT().CreateLaunchTemplateVersion(gomock.Any(), gomock.Any(), gomock.Eq(ptr.To[string]("ami-existing")), gomock.Eq(apimachinerytypes.NamespacedName{Namespace: "default", Name: "bootstrap-data-new"}), gomock.Any(), gomock.Any()).Return(nil) ec2Svc.EXPECT().GetLaunchTemplateLatestVersion(gomock.Any()).Return("2", nil) // Changing the bootstrap data secret name should trigger rolling out new nodes, no matter what the // content (user data) is. This way, users can enforce a rollout by changing the bootstrap config @@ -705,7 +753,156 @@ func TestAWSMachinePoolReconciler(t *testing.T) { // No changes, so there must not be an ASG update! asgSvc.EXPECT().UpdateASG(gomock.Any()).Times(0) - err = reconciler.reconcileNormal(context.Background(), ms, cs, cs) + _, err = reconciler.reconcileNormal(context.Background(), ms, cs, cs, cs) + g.Expect(err).To(Succeed()) + }) + + t.Run("launch template and ASG exist, bootstrap data secret name changed, Ignition bootstrap data stored in S3", func(t *testing.T) { + g := NewWithT(t) + setup(t, g) + reconciler.reconcileServiceFactory = nil // use real implementation, but keep EC2 calls mocked (`ec2ServiceFactory`) + reconSvc = nil // not used + defer teardown(t, g) + + secret.Data["format"] = []byte("ignition") + g.Expect(testEnv.Update(ctx, secret)).To(Succeed()) + + // Latest ID and version already stored, no need to retrieve it + ms.AWSMachinePool.Status.LaunchTemplateID = launchTemplateIDExisting + ms.AWSMachinePool.Status.LaunchTemplateVersion = ptr.To[string]("1") + + // Enable Ignition S3 storage + cs.AWSCluster.Spec.S3Bucket = &infrav1.S3Bucket{} + ms.AWSMachinePool.Spec.Ignition = &infrav1.Ignition{} + ms.AWSMachinePool.Default() // simulate webhook that sets default ignition version + + asgSvc.EXPECT().GetASGByName(gomock.Any()).DoAndReturn(func(scope *scope.MachinePoolScope) (*expinfrav1.AutoScalingGroup, error) { + g.Expect(scope.Name()).To(Equal("test")) + + // No difference to `AWSMachinePool.spec` + return &expinfrav1.AutoScalingGroup{ + Name: scope.Name(), + Subnets: []string{ + "subnet-1", + }, + MinSize: awsMachinePool.Spec.MinSize, + MaxSize: awsMachinePool.Spec.MaxSize, + MixedInstancesPolicy: awsMachinePool.Spec.MixedInstancesPolicy.DeepCopy(), + }, nil + }) + + ec2Svc.EXPECT().GetLaunchTemplate(gomock.Eq("test")).Return( + &expinfrav1.AWSLaunchTemplate{ + Name: "test", + AMI: infrav1.AMIReference{ + ID: ptr.To[string]("ami-existing"), + }, + }, + // No change to user data + userdata.ComputeHash([]byte("shell-script")), + // But the name of the secret changes from `previous-secret-name` to `bootstrap-data` + &apimachinerytypes.NamespacedName{Namespace: "default", Name: "previous-secret-name"}, + nil, + nil) + ec2Svc.EXPECT().DiscoverLaunchTemplateAMI(gomock.Any()).Return(ptr.To[string]("ami-existing"), nil) + ec2Svc.EXPECT().LaunchTemplateNeedsUpdate(gomock.Any(), gomock.Any(), gomock.Any()).Return(false, nil) + + s3Mock.EXPECT().PutObject(gomock.Any()).DoAndReturn(func(input *s3.PutObjectInput) (*s3.PutObjectOutput, error) { + g.Expect(*input.Key).To(Equal(fmt.Sprintf("machine-pool/test/%s", userdata.ComputeHash([]byte("shell-script"))))) + return &s3.PutObjectOutput{}, nil + }) + + // Simulate a pending instance refresh here, to see if `CancelInstanceRefresh` gets called + instanceRefreshStatus := autoscaling.InstanceRefreshStatusPending + asgSvc.EXPECT().CanStartASGInstanceRefresh(gomock.Any()).Return(false, &instanceRefreshStatus, nil) + asgSvc.EXPECT().CancelASGInstanceRefresh(gomock.Any()).Return(nil) + + // First reconciliation should notice the existing instance refresh and cancel it. + // Since the cancellation is asynchronous, the controller should requeue. + res, err := reconciler.reconcileNormal(context.Background(), ms, cs, cs, cs) + g.Expect(res.RequeueAfter).To(BeNumerically(">", 0)) + g.Expect(err).To(Succeed()) + + // Now simulate that no pending instance refresh exists. Cancellation should not be called anymore. + asgSvc.EXPECT().CanStartASGInstanceRefresh(gomock.Any()).Return(true, nil, nil) + + asgSvc.EXPECT().GetASGByName(gomock.Any()).DoAndReturn(func(scope *scope.MachinePoolScope) (*expinfrav1.AutoScalingGroup, error) { + g.Expect(scope.Name()).To(Equal("test")) + + // No difference to `AWSMachinePool.spec` + return &expinfrav1.AutoScalingGroup{ + Name: scope.Name(), + Subnets: []string{ + "subnet-1", + }, + MinSize: awsMachinePool.Spec.MinSize, + MaxSize: awsMachinePool.Spec.MaxSize, + MixedInstancesPolicy: awsMachinePool.Spec.MixedInstancesPolicy.DeepCopy(), + }, nil + }) + + ec2Svc.EXPECT().GetLaunchTemplate(gomock.Eq("test")).Return( + &expinfrav1.AWSLaunchTemplate{ + Name: "test", + AMI: infrav1.AMIReference{ + ID: ptr.To[string]("ami-existing"), + }, + }, + // No change to user data + userdata.ComputeHash([]byte("shell-script")), + // But the name of the secret changes from `previous-secret-name` to `bootstrap-data` + &apimachinerytypes.NamespacedName{Namespace: "default", Name: "previous-secret-name"}, + nil, + nil) + ec2Svc.EXPECT().DiscoverLaunchTemplateAMI(gomock.Any()).Return(ptr.To[string]("ami-existing"), nil) + ec2Svc.EXPECT().LaunchTemplateNeedsUpdate(gomock.Any(), gomock.Any(), gomock.Any()).Return(false, nil) + + s3Mock.EXPECT().PutObject(gomock.Any()).DoAndReturn(func(input *s3.PutObjectInput) (*s3.PutObjectOutput, error) { + g.Expect(*input.Key).To(Equal(fmt.Sprintf("machine-pool/test/%s", userdata.ComputeHash([]byte("shell-script"))))) + return &s3.PutObjectOutput{}, nil + }) + + // No cancellation expected in this second reconciliation (see above) + asgSvc.EXPECT().CancelASGInstanceRefresh(gomock.Any()).Times(0) + + var simulatedDeletedVersionNumber int64 = 777 + bootstrapDataHash := "some-simulated-hash" + ec2Svc.EXPECT().PruneLaunchTemplateVersions(gomock.Any()).Return(&ec2.LaunchTemplateVersion{ + VersionNumber: &simulatedDeletedVersionNumber, + LaunchTemplateData: &ec2.ResponseLaunchTemplateData{ + TagSpecifications: []*ec2.LaunchTemplateTagSpecification{ + { + ResourceType: aws.String(ec2.ResourceTypeInstance), + Tags: []*ec2.Tag{ + // Only this tag is relevant for the test. If this is stored in the + // launch template version, and the version gets deleted, the S3 object + // with the bootstrap data should be deleted as well. + { + Key: aws.String("sigs.k8s.io/cluster-api-provider-aws/bootstrap-data-hash"), + Value: aws.String(bootstrapDataHash), + }, + }, + }, + }, + UserData: aws.String(base64.StdEncoding.EncodeToString([]byte("old-user-data"))), + }, + }, nil) + s3Mock.EXPECT().DeleteObject(gomock.Any()).DoAndReturn(func(input *s3.DeleteObjectInput) (*s3.DeleteObjectOutput, error) { + g.Expect(*input.Key).To(Equal(fmt.Sprintf("machine-pool/test/%s", bootstrapDataHash))) + return &s3.DeleteObjectOutput{}, nil + }) + ec2Svc.EXPECT().CreateLaunchTemplateVersion(gomock.Any(), gomock.Any(), gomock.Eq(ptr.To[string]("ami-existing")), gomock.Eq(apimachinerytypes.NamespacedName{Namespace: "default", Name: "bootstrap-data"}), gomock.Any(), gomock.Any()).Return(nil) + ec2Svc.EXPECT().GetLaunchTemplateLatestVersion(gomock.Any()).Return("2", nil) + // Changing the bootstrap data secret name should trigger rolling out new nodes, no matter what the + // content (user data) is. This way, users can enforce a rollout by changing the bootstrap config + // reference (`MachinePool.spec.template.spec.bootstrap`). + asgSvc.EXPECT().StartASGInstanceRefresh(gomock.Any()) + + asgSvc.EXPECT().SubnetIDs(gomock.Any()).Return([]string{"subnet-1"}, nil) // no change + // No changes, so there must not be an ASG update! + asgSvc.EXPECT().UpdateASG(gomock.Any()).Times(0) + + _, err = reconciler.reconcileNormal(context.Background(), ms, cs, cs, cs) g.Expect(err).To(Succeed()) }) }) @@ -739,7 +936,7 @@ func TestAWSMachinePoolReconciler(t *testing.T) { finalizer(t, g) asgSvc.EXPECT().GetASGByName(gomock.Any()).Return(nil, nil) - ec2Svc.EXPECT().GetLaunchTemplate(gomock.Any()).Return(nil, "", nil, nil).AnyTimes() + ec2Svc.EXPECT().GetLaunchTemplate(gomock.Any()).Return(nil, "", nil, nil, nil).AnyTimes() buf := new(bytes.Buffer) klog.SetOutput(buf) @@ -761,7 +958,7 @@ func TestAWSMachinePoolReconciler(t *testing.T) { Status: expinfrav1.ASGStatusDeleteInProgress, } asgSvc.EXPECT().GetASGByName(gomock.Any()).Return(&inProgressASG, nil) - ec2Svc.EXPECT().GetLaunchTemplate(gomock.Any()).Return(nil, "", nil, nil).AnyTimes() + ec2Svc.EXPECT().GetLaunchTemplate(gomock.Any()).Return(nil, "", nil, nil, nil).AnyTimes() buf := new(bytes.Buffer) klog.SetOutput(buf) diff --git a/exp/controllers/awsmanagedmachinepool_controller.go b/exp/controllers/awsmanagedmachinepool_controller.go index 8c0d75c2ec..606dc5893a 100644 --- a/exp/controllers/awsmanagedmachinepool_controller.go +++ b/exp/controllers/awsmanagedmachinepool_controller.go @@ -156,6 +156,7 @@ func (r *AWSManagedMachinePoolReconciler) Reconcile(ctx context.Context, req ctr } machinePoolScope, err := scope.NewManagedMachinePoolScope(scope.ManagedMachinePoolScopeParams{ + Logger: log, Client: r.Client, ControllerName: "awsmanagedmachinepool", Cluster: cluster, @@ -189,19 +190,20 @@ func (r *AWSManagedMachinePoolReconciler) Reconcile(ctx context.Context, req ctr return ctrl.Result{}, r.reconcileDelete(ctx, machinePoolScope, managedControlPlaneScope) } - return ctrl.Result{}, r.reconcileNormal(ctx, machinePoolScope, managedControlPlaneScope) + return r.reconcileNormal(ctx, machinePoolScope, managedControlPlaneScope, managedControlPlaneScope) } func (r *AWSManagedMachinePoolReconciler) reconcileNormal( ctx context.Context, machinePoolScope *scope.ManagedMachinePoolScope, ec2Scope scope.EC2Scope, -) error { + s3Scope scope.S3Scope, +) (ctrl.Result, error) { machinePoolScope.Info("Reconciling AWSManagedMachinePool") if controllerutil.AddFinalizer(machinePoolScope.ManagedMachinePool, expinfrav1.ManagedMachinePoolFinalizer) { if err := machinePoolScope.PatchObject(); err != nil { - return err + return ctrl.Result{}, err } } @@ -210,17 +212,25 @@ func (r *AWSManagedMachinePoolReconciler) reconcileNormal( reconSvc := r.getReconcileService(ec2Scope) if machinePoolScope.ManagedMachinePool.Spec.AWSLaunchTemplate != nil { - canUpdateLaunchTemplate := func() (bool, error) { - return true, nil + canStartInstanceRefresh := func() (bool, *string, error) { + return true, nil, nil + } + cancelInstanceRefresh := func() error { + return nil } runPostLaunchTemplateUpdateOperation := func() error { return nil } - if err := reconSvc.ReconcileLaunchTemplate(machinePoolScope, ec2svc, canUpdateLaunchTemplate, runPostLaunchTemplateUpdateOperation); err != nil { + var objectStoreSvc services.ObjectStoreInterface = nil // no S3 bucket support for `AWSManagedControlPlane` yet + res, err := reconSvc.ReconcileLaunchTemplate(machinePoolScope, machinePoolScope, s3Scope, ec2svc, objectStoreSvc, canStartInstanceRefresh, cancelInstanceRefresh, runPostLaunchTemplateUpdateOperation) + if err != nil { r.Recorder.Eventf(machinePoolScope.ManagedMachinePool, corev1.EventTypeWarning, "FailedLaunchTemplateReconcile", "Failed to reconcile launch template: %v", err) machinePoolScope.Error(err, "failed to reconcile launch template") conditions.MarkFalse(machinePoolScope.ManagedMachinePool, expinfrav1.LaunchTemplateReadyCondition, expinfrav1.LaunchTemplateReconcileFailedReason, clusterv1.ConditionSeverityError, "") - return err + return ctrl.Result{}, err + } + if res != nil { + return *res, nil } launchTemplateID := machinePoolScope.GetLaunchTemplateIDStatus() @@ -229,7 +239,7 @@ func (r *AWSManagedMachinePoolReconciler) reconcileNormal( ResourceService: ec2svc, }} if err := reconSvc.ReconcileTags(machinePoolScope, resourceServiceToUpdate); err != nil { - return errors.Wrap(err, "error updating tags") + return ctrl.Result{}, errors.Wrap(err, "error updating tags") } // set the LaunchTemplateReady condition @@ -237,10 +247,10 @@ func (r *AWSManagedMachinePoolReconciler) reconcileNormal( } if err := ekssvc.ReconcilePool(ctx); err != nil { - return errors.Wrapf(err, "failed to reconcile machine pool for AWSManagedMachinePool %s/%s", machinePoolScope.ManagedMachinePool.Namespace, machinePoolScope.ManagedMachinePool.Name) + return ctrl.Result{}, errors.Wrapf(err, "failed to reconcile machine pool for AWSManagedMachinePool %s/%s", machinePoolScope.ManagedMachinePool.Namespace, machinePoolScope.ManagedMachinePool.Name) } - return nil + return ctrl.Result{}, nil } func (r *AWSManagedMachinePoolReconciler) reconcileDelete( @@ -259,7 +269,7 @@ func (r *AWSManagedMachinePoolReconciler) reconcileDelete( if machinePoolScope.ManagedMachinePool.Spec.AWSLaunchTemplate != nil { launchTemplateID := machinePoolScope.ManagedMachinePool.Status.LaunchTemplateID - launchTemplate, _, _, err := ec2Svc.GetLaunchTemplate(machinePoolScope.LaunchTemplateName()) + launchTemplate, _, _, _, err := ec2Svc.GetLaunchTemplate(machinePoolScope.LaunchTemplateName()) if err != nil { return err } diff --git a/go.mod b/go.mod index 00a19b4783..a428d481dd 100644 --- a/go.mod +++ b/go.mod @@ -57,7 +57,6 @@ require ( sigs.k8s.io/cluster-api v1.7.1 sigs.k8s.io/cluster-api/test v1.7.1 sigs.k8s.io/controller-runtime v0.17.3 - sigs.k8s.io/kustomize/api v0.13.5-0.20230601165947-6ce0bf390ce3 sigs.k8s.io/yaml v1.4.0 ) @@ -226,6 +225,7 @@ require ( sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.29.0 // indirect sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect sigs.k8s.io/kind v0.22.0 // indirect + sigs.k8s.io/kustomize/api v0.13.5-0.20230601165947-6ce0bf390ce3 // indirect sigs.k8s.io/kustomize/kustomize/v5 v5.0.4-0.20230601165947-6ce0bf390ce3 // indirect sigs.k8s.io/kustomize/kyaml v0.14.3-0.20230601165947-6ce0bf390ce3 // indirect sigs.k8s.io/structured-merge-diff/v4 v4.4.1 // indirect diff --git a/pkg/cloud/scope/ignition.go b/pkg/cloud/scope/ignition.go new file mode 100644 index 0000000000..d996612181 --- /dev/null +++ b/pkg/cloud/scope/ignition.go @@ -0,0 +1,26 @@ +/* +Copyright 2024 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package scope + +import ( + infrav1 "sigs.k8s.io/cluster-api-provider-aws/v2/api/v1beta2" +) + +// IgnitionScope gets the optional Ignition configuration. +type IgnitionScope interface { + Ignition() *infrav1.Ignition +} diff --git a/pkg/cloud/scope/launchtemplate.go b/pkg/cloud/scope/launchtemplate.go index af7651b963..34e84e7ff7 100644 --- a/pkg/cloud/scope/launchtemplate.go +++ b/pkg/cloud/scope/launchtemplate.go @@ -21,12 +21,11 @@ import ( "k8s.io/apimachinery/pkg/types" "sigs.k8s.io/controller-runtime/pkg/client" - expclusterv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1" - "sigs.k8s.io/cluster-api/util/conditions" - infrav1 "sigs.k8s.io/cluster-api-provider-aws/v2/api/v1beta2" expinfrav1 "sigs.k8s.io/cluster-api-provider-aws/v2/exp/api/v1beta2" "sigs.k8s.io/cluster-api-provider-aws/v2/pkg/logger" + expclusterv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1" + "sigs.k8s.io/cluster-api/util/conditions" ) // LaunchTemplateScope defines a scope defined around a launch template. @@ -38,7 +37,7 @@ type LaunchTemplateScope interface { SetLaunchTemplateIDStatus(id string) GetLaunchTemplateLatestVersionStatus() string SetLaunchTemplateLatestVersionStatus(version string) - GetRawBootstrapData() ([]byte, *types.NamespacedName, error) + GetRawBootstrapData() ([]byte, string, *types.NamespacedName, error) IsEKSManaged() bool AdditionalTags() infrav1.Tags diff --git a/pkg/cloud/scope/machinepool.go b/pkg/cloud/scope/machinepool.go index 00e8abeadc..d141aef06f 100644 --- a/pkg/cloud/scope/machinepool.go +++ b/pkg/cloud/scope/machinepool.go @@ -121,6 +121,11 @@ func NewMachinePoolScope(params MachinePoolScopeParams) (*MachinePoolScope, erro }, nil } +// Ignition gets the ignition config. +func (m *MachinePoolScope) Ignition() *infrav1.Ignition { + return m.AWSMachinePool.Spec.Ignition +} + // Name returns the AWSMachinePool name. func (m *MachinePoolScope) Name() string { return m.AWSMachinePool.Name @@ -133,13 +138,7 @@ func (m *MachinePoolScope) Namespace() string { // GetRawBootstrapData returns the bootstrap data from the secret in the Machine's bootstrap.dataSecretName, // including the secret's namespaced name. -func (m *MachinePoolScope) GetRawBootstrapData() ([]byte, *types.NamespacedName, error) { - data, _, bootstrapDataSecretKey, err := m.getBootstrapData() - - return data, bootstrapDataSecretKey, err -} - -func (m *MachinePoolScope) getBootstrapData() ([]byte, string, *types.NamespacedName, error) { +func (m *MachinePoolScope) GetRawBootstrapData() ([]byte, string, *types.NamespacedName, error) { if m.MachinePool.Spec.Template.Spec.Bootstrap.DataSecretName == nil { return nil, "", nil, errors.New("error retrieving bootstrap data: linked Machine's bootstrap.dataSecretName is nil") } diff --git a/pkg/cloud/scope/managednodegroup.go b/pkg/cloud/scope/managednodegroup.go index e9421d7282..5d35fad215 100644 --- a/pkg/cloud/scope/managednodegroup.go +++ b/pkg/cloud/scope/managednodegroup.go @@ -305,6 +305,11 @@ func (s *ManagedMachinePoolScope) ControllerName() string { return s.controllerName } +// Ignition gets the ignition config. +func (s *ManagedMachinePoolScope) Ignition() *infrav1.Ignition { + return nil +} + // KubernetesClusterName is the name of the EKS cluster name. func (s *ManagedMachinePoolScope) KubernetesClusterName() string { return s.ControlPlane.Spec.EKSClusterName @@ -326,24 +331,24 @@ func (s *ManagedMachinePoolScope) Namespace() string { } // GetRawBootstrapData returns the raw bootstrap data from the linked Machine's bootstrap.dataSecretName. -func (s *ManagedMachinePoolScope) GetRawBootstrapData() ([]byte, *types.NamespacedName, error) { +func (s *ManagedMachinePoolScope) GetRawBootstrapData() ([]byte, string, *types.NamespacedName, error) { if s.MachinePool.Spec.Template.Spec.Bootstrap.DataSecretName == nil { - return nil, nil, errors.New("error retrieving bootstrap data: linked Machine's bootstrap.dataSecretName is nil") + return nil, "", nil, errors.New("error retrieving bootstrap data: linked Machine's bootstrap.dataSecretName is nil") } secret := &corev1.Secret{} key := types.NamespacedName{Namespace: s.Namespace(), Name: *s.MachinePool.Spec.Template.Spec.Bootstrap.DataSecretName} if err := s.Client.Get(context.TODO(), key, secret); err != nil { - return nil, nil, errors.Wrapf(err, "failed to retrieve bootstrap data secret for AWSManagedMachinePool %s/%s", s.Namespace(), s.Name()) + return nil, "", nil, errors.Wrapf(err, "failed to retrieve bootstrap data secret for AWSManagedMachinePool %s/%s", s.Namespace(), s.Name()) } value, ok := secret.Data["value"] if !ok { - return nil, nil, errors.New("error retrieving bootstrap data: secret value key is missing") + return nil, "", nil, errors.New("error retrieving bootstrap data: secret value key is missing") } - return value, &key, nil + return value, string(secret.Data["format"]), &key, nil } // GetObjectMeta returns the ObjectMeta for the AWSManagedMachinePool. diff --git a/pkg/cloud/services/autoscaling/autoscalinggroup.go b/pkg/cloud/services/autoscaling/autoscalinggroup.go index d473010d12..fc0e523bc6 100644 --- a/pkg/cloud/services/autoscaling/autoscalinggroup.go +++ b/pkg/cloud/services/autoscaling/autoscalinggroup.go @@ -24,6 +24,7 @@ import ( "strings" "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/aws/awserr" "github.com/aws/aws-sdk-go/service/autoscaling" "github.com/aws/aws-sdk-go/service/ec2" "github.com/pkg/errors" @@ -323,30 +324,53 @@ func (s *Service) UpdateASG(machinePoolScope *scope.MachinePoolScope) error { return nil } -// CanStartASGInstanceRefresh will start an ASG instance with refresh. -func (s *Service) CanStartASGInstanceRefresh(scope *scope.MachinePoolScope) (bool, error) { +// CanStartASGInstanceRefresh checks if a new ASG instance refresh can currently be started, and returns the status if there is an existing, unfinished refresh. +func (s *Service) CanStartASGInstanceRefresh(scope *scope.MachinePoolScope) (bool, *string, error) { describeInput := &autoscaling.DescribeInstanceRefreshesInput{AutoScalingGroupName: aws.String(scope.Name())} refreshes, err := s.ASGClient.DescribeInstanceRefreshesWithContext(context.TODO(), describeInput) if err != nil { - return false, err + return false, nil, err } hasUnfinishedRefresh := false + var unfinishedRefreshStatus *string if err == nil && len(refreshes.InstanceRefreshes) != 0 { for i := range refreshes.InstanceRefreshes { if *refreshes.InstanceRefreshes[i].Status == autoscaling.InstanceRefreshStatusInProgress || *refreshes.InstanceRefreshes[i].Status == autoscaling.InstanceRefreshStatusPending || *refreshes.InstanceRefreshes[i].Status == autoscaling.InstanceRefreshStatusCancelling { hasUnfinishedRefresh = true + unfinishedRefreshStatus = refreshes.InstanceRefreshes[i].Status } } } if hasUnfinishedRefresh { - return false, nil + return false, unfinishedRefreshStatus, nil } - return true, nil + return true, nil, nil } -// StartASGInstanceRefresh will start an ASG instance with refresh. +// CancelASGInstanceRefresh cancels an ASG instance refresh. +func (s *Service) CancelASGInstanceRefresh(scope *scope.MachinePoolScope) error { + input := &autoscaling.CancelInstanceRefreshInput{ + AutoScalingGroupName: aws.String(scope.Name()), + } + + if _, err := s.ASGClient.CancelInstanceRefreshWithContext(context.TODO(), input); err != nil { + var aerr awserr.Error + if errors.As(err, &aerr) && aerr.Code() == autoscaling.ErrCodeActiveInstanceRefreshNotFoundFault { + // Refresh isn't "in progress". It may have turned to cancelled status + // by now. So this is not an error for us because we may have called + // CancelInstanceRefresh multiple times and should be idempotent here. + return nil + } + + return errors.Wrapf(err, "failed to cancel ASG instance refresh %q", scope.Name()) + } + + return nil +} + +// StartASGInstanceRefresh will start an ASG instance refresh. func (s *Service) StartASGInstanceRefresh(scope *scope.MachinePoolScope) error { strategy := ptr.To[string](autoscaling.RefreshStrategyRolling) var minHealthyPercentage, instanceWarmup *int64 diff --git a/pkg/cloud/services/autoscaling/autoscalinggroup_test.go b/pkg/cloud/services/autoscaling/autoscalinggroup_test.go index e116c80126..d64ed15c29 100644 --- a/pkg/cloud/services/autoscaling/autoscalinggroup_test.go +++ b/pkg/cloud/services/autoscaling/autoscalinggroup_test.go @@ -1111,10 +1111,11 @@ func TestServiceCanStartASGInstanceRefresh(t *testing.T) { defer mockCtrl.Finish() tests := []struct { - name string - wantErr bool - canStart bool - expect func(m *mock_autoscalingiface.MockAutoScalingAPIMockRecorder) + name string + wantErr bool + wantUnfinishedRefreshStatus *string + canStart bool + expect func(m *mock_autoscalingiface.MockAutoScalingAPIMockRecorder) }{ { name: "should return error if describe instance refresh failed", @@ -1139,9 +1140,10 @@ func TestServiceCanStartASGInstanceRefresh(t *testing.T) { }, }, { - name: "should return false if some instances have unfinished refresh", - wantErr: false, - canStart: false, + name: "should return false if some instances have unfinished refresh", + wantErr: false, + wantUnfinishedRefreshStatus: aws.String(autoscaling.InstanceRefreshStatusInProgress), + canStart: false, expect: func(m *mock_autoscalingiface.MockAutoScalingAPIMockRecorder) { m.DescribeInstanceRefreshesWithContext(context.TODO(), gomock.Eq(&autoscaling.DescribeInstanceRefreshesInput{ AutoScalingGroupName: aws.String("machinePoolName"), @@ -1173,11 +1175,13 @@ func TestServiceCanStartASGInstanceRefresh(t *testing.T) { g.Expect(err).ToNot(HaveOccurred()) mps.AWSMachinePool.Name = "machinePoolName" - out, err := s.CanStartASGInstanceRefresh(mps) + out, unfinishedRefreshStatus, err := s.CanStartASGInstanceRefresh(mps) checkErr(tt.wantErr, err, g) if tt.canStart { g.Expect(out).To(BeTrue()) return + } else { + g.Expect(unfinishedRefreshStatus).To(Equal(tt.wantUnfinishedRefreshStatus)) } g.Expect(out).To(BeFalse()) }) diff --git a/pkg/cloud/services/ec2/launchtemplate.go b/pkg/cloud/services/ec2/launchtemplate.go index d5caeb26f9..f147d6d414 100644 --- a/pkg/cloud/services/ec2/launchtemplate.go +++ b/pkg/cloud/services/ec2/launchtemplate.go @@ -23,25 +23,31 @@ import ( "sort" "strconv" "strings" + "time" "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/service/autoscaling" "github.com/aws/aws-sdk-go/service/ec2" + "github.com/blang/semver" + ignTypes "github.com/coreos/ignition/config/v2_3/types" + ignV3Types "github.com/coreos/ignition/v2/config/v3_4/types" "github.com/google/go-cmp/cmp" "github.com/pkg/errors" corev1 "k8s.io/api/core/v1" apimachinerytypes "k8s.io/apimachinery/pkg/types" "k8s.io/utils/ptr" - - clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" - "sigs.k8s.io/cluster-api/util/conditions" + ctrl "sigs.k8s.io/controller-runtime" infrav1 "sigs.k8s.io/cluster-api-provider-aws/v2/api/v1beta2" expinfrav1 "sigs.k8s.io/cluster-api-provider-aws/v2/exp/api/v1beta2" + "sigs.k8s.io/cluster-api-provider-aws/v2/feature" "sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud/awserrors" "sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud/scope" "sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud/services" "sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud/services/userdata" "sigs.k8s.io/cluster-api-provider-aws/v2/pkg/record" + clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" + "sigs.k8s.io/cluster-api/util/conditions" ) const ( @@ -57,43 +63,124 @@ const ( // ReconcileLaunchTemplate reconciles a launch template and triggers instance refresh conditionally, depending on // changes. // -//nolint:gocyclo +//nolint:gocyclo,maintidx func (s *Service) ReconcileLaunchTemplate( + ignitionScope scope.IgnitionScope, scope scope.LaunchTemplateScope, + s3Scope scope.S3Scope, ec2svc services.EC2Interface, - canUpdateLaunchTemplate func() (bool, error), + objectStoreSvc services.ObjectStoreInterface, + canStartInstanceRefresh func() (bool, *string, error), + cancelInstanceRefresh func() error, runPostLaunchTemplateUpdateOperation func() error, -) error { - bootstrapData, bootstrapDataSecretKey, err := scope.GetRawBootstrapData() +) (*ctrl.Result, error) { + bootstrapData, bootstrapDataFormat, bootstrapDataSecretKey, err := scope.GetRawBootstrapData() if err != nil { record.Eventf(scope.GetMachinePool(), corev1.EventTypeWarning, "FailedGetBootstrapData", err.Error()) - return err + return nil, err } - bootstrapDataHash := userdata.ComputeHash(bootstrapData) - scope.Info("checking for existing launch template") - launchTemplate, launchTemplateUserDataHash, launchTemplateUserDataSecretKey, err := ec2svc.GetLaunchTemplate(scope.LaunchTemplateName()) + launchTemplate, launchTemplateUserDataHash, launchTemplateUserDataSecretKey, _, err := ec2svc.GetLaunchTemplate(scope.LaunchTemplateName()) if err != nil { conditions.MarkUnknown(scope.GetSetter(), expinfrav1.LaunchTemplateReadyCondition, expinfrav1.LaunchTemplateNotFoundReason, err.Error()) - return err + return nil, err } imageID, err := ec2svc.DiscoverLaunchTemplateAMI(scope) if err != nil { conditions.MarkFalse(scope.GetSetter(), expinfrav1.LaunchTemplateReadyCondition, expinfrav1.LaunchTemplateCreateFailedReason, clusterv1.ConditionSeverityError, err.Error()) - return err + return nil, err } + var userDataForLaunchTemplate []byte + if s3Scope.Bucket() != nil && bootstrapDataFormat == "ignition" && ignitionScope.Ignition() != nil { + scope.Info("Using S3 bucket storage for Ignition format") + + // S3 bucket storage enabled and Ignition format is used. Ignition supports reading large user data from S3, + // not restricted by the EC2 user data size limit. The actual user data goes into the S3 object while the + // user data on the launch template points to the S3 bucket (or presigned URL). + // Previously, user data was always written into the launch template, so we check + // `AWSMachinePool.Spec.Ignition != nil` to toggle the S3 feature on for `AWSMachinePool` objects. + objectURL, err := objectStoreSvc.CreateForMachinePool(scope, bootstrapData) + + if err != nil { + conditions.MarkFalse(scope.GetSetter(), expinfrav1.LaunchTemplateReadyCondition, expinfrav1.LaunchTemplateReconcileFailedReason, clusterv1.ConditionSeverityError, err.Error()) + return nil, err + } + + ignVersion := ignitionScope.Ignition().Version + semver, err := semver.ParseTolerant(ignVersion) + if err != nil { + err = errors.Wrapf(err, "failed to parse ignition version %q", ignVersion) + conditions.MarkFalse(scope.GetSetter(), expinfrav1.LaunchTemplateReadyCondition, expinfrav1.LaunchTemplateReconcileFailedReason, clusterv1.ConditionSeverityError, err.Error()) + return nil, err + } + + // EC2 user data points to S3 + switch semver.Major { + case 2: + ignData := &ignTypes.Config{ + Ignition: ignTypes.Ignition{ + Version: semver.String(), + Config: ignTypes.IgnitionConfig{ + Append: []ignTypes.ConfigReference{ + { + Source: objectURL, + }, + }, + }, + }, + } + + userDataForLaunchTemplate, err = json.Marshal(ignData) + if err != nil { + err = errors.Wrap(err, "failed to convert ignition config to JSON") + conditions.MarkFalse(scope.GetSetter(), expinfrav1.LaunchTemplateReadyCondition, expinfrav1.LaunchTemplateReconcileFailedReason, clusterv1.ConditionSeverityError, err.Error()) + return nil, err + } + case 3: + ignData := &ignV3Types.Config{ + Ignition: ignV3Types.Ignition{ + Version: semver.String(), + Config: ignV3Types.IgnitionConfig{ + Merge: []ignV3Types.Resource{ + { + Source: aws.String(objectURL), + }, + }, + }, + }, + } + + userDataForLaunchTemplate, err = json.Marshal(ignData) + if err != nil { + err = errors.Wrap(err, "failed to convert ignition config to JSON") + conditions.MarkFalse(scope.GetSetter(), expinfrav1.LaunchTemplateReadyCondition, expinfrav1.LaunchTemplateReconcileFailedReason, clusterv1.ConditionSeverityError, err.Error()) + return nil, err + } + default: + err = errors.Errorf("unsupported ignition version %q", ignVersion) + conditions.MarkFalse(scope.GetSetter(), expinfrav1.LaunchTemplateReadyCondition, expinfrav1.LaunchTemplateReconcileFailedReason, clusterv1.ConditionSeverityError, err.Error()) + return nil, err + } + } else { + // S3 bucket not used, so the bootstrap data is stored directly in the launch template + // (EC2 user data) + userDataForLaunchTemplate = bootstrapData + } + + bootstrapDataForLaunchTemplateHash := userdata.ComputeHash(userDataForLaunchTemplate) + if launchTemplate == nil { scope.Info("no existing launch template found, creating") - launchTemplateID, err := ec2svc.CreateLaunchTemplate(scope, imageID, *bootstrapDataSecretKey, bootstrapData) + launchTemplateID, err := ec2svc.CreateLaunchTemplate(scope, imageID, *bootstrapDataSecretKey, userDataForLaunchTemplate, userdata.ComputeHash(bootstrapData)) if err != nil { conditions.MarkFalse(scope.GetSetter(), expinfrav1.LaunchTemplateReadyCondition, expinfrav1.LaunchTemplateCreateFailedReason, clusterv1.ConditionSeverityError, err.Error()) - return err + return nil, err } scope.SetLaunchTemplateIDStatus(launchTemplateID) - return scope.PatchObject() + return nil, scope.PatchObject() } // LaunchTemplateID is set during LaunchTemplate creation, but for a scenario such as `clusterctl move`, status fields become blank. @@ -102,25 +189,27 @@ func (s *Service) ReconcileLaunchTemplate( launchTemplateID, err := ec2svc.GetLaunchTemplateID(scope.LaunchTemplateName()) if err != nil { conditions.MarkUnknown(scope.GetSetter(), expinfrav1.LaunchTemplateReadyCondition, expinfrav1.LaunchTemplateNotFoundReason, err.Error()) - return err + return nil, err } scope.SetLaunchTemplateIDStatus(launchTemplateID) - return scope.PatchObject() + return nil, scope.PatchObject() } if scope.GetLaunchTemplateLatestVersionStatus() == "" { launchTemplateVersion, err := ec2svc.GetLaunchTemplateLatestVersion(scope.GetLaunchTemplateIDStatus()) if err != nil { conditions.MarkUnknown(scope.GetSetter(), expinfrav1.LaunchTemplateReadyCondition, expinfrav1.LaunchTemplateNotFoundReason, err.Error()) - return err + return nil, err } scope.SetLaunchTemplateLatestVersionStatus(launchTemplateVersion) - return scope.PatchObject() + if err = scope.PatchObject(); err != nil { + return nil, err + } } annotation, err := MachinePoolAnnotationJSON(scope, TagsLastAppliedAnnotation) if err != nil { - return err + return nil, err } // Check if the instance tags were changed. If they were, create a new LaunchTemplate. @@ -128,7 +217,7 @@ func (s *Service) ReconcileLaunchTemplate( needsUpdate, err := ec2svc.LaunchTemplateNeedsUpdate(scope, scope.GetLaunchTemplate(), launchTemplate) if err != nil { - return err + return nil, err } amiChanged := *imageID != *launchTemplate.AMI.ID @@ -142,50 +231,90 @@ func (s *Service) ReconcileLaunchTemplate( launchTemplateNeedsUserDataSecretKeyTag := launchTemplateUserDataSecretKey == nil if needsUpdate || tagsChanged || amiChanged || userDataSecretKeyChanged { - canUpdate, err := canUpdateLaunchTemplate() + // More than just the bootstrap token changed + + canStartRefresh, unfinishedRefreshStatus, err := canStartInstanceRefresh() if err != nil { - return err + return nil, err } - if !canUpdate { - conditions.MarkFalse(scope.GetSetter(), expinfrav1.PreLaunchTemplateUpdateCheckCondition, expinfrav1.PreLaunchTemplateUpdateCheckFailedReason, clusterv1.ConditionSeverityWarning, "") - return errors.New("Cannot update the launch template, prerequisite not met") + if !canStartRefresh { + if unfinishedRefreshStatus != nil && *unfinishedRefreshStatus != autoscaling.InstanceRefreshStatusCancelling { + // Until the previous instance refresh goes into `Cancelled` state + // asynchronously, allowing another refresh to be started, + // defer the reconciliation. Otherwise, we get an + // `ErrCodeInstanceRefreshInProgressFault` error if we tried to + // start an instance refresh immediately. + scope.Info("Cancelling previous instance refresh and delaying reconciliation until the next one can be started", "unfinishedRefreshStatus", unfinishedRefreshStatus) + + err := cancelInstanceRefresh() + if err != nil { + return nil, err + } + } else { + scope.Info("Existing instance refresh is not finished, delaying reconciliation until the next one can be started", "unfinishedRefreshStatus", unfinishedRefreshStatus) + } + return &ctrl.Result{RequeueAfter: 30 * time.Second}, nil } } - userDataHashChanged := launchTemplateUserDataHash != bootstrapDataHash + userDataHashChanged := launchTemplateUserDataHash != bootstrapDataForLaunchTemplateHash // Create a new launch template version if there's a difference in configuration, tags, // userdata, OR we've discovered a new AMI ID. if needsUpdate || tagsChanged || amiChanged || userDataHashChanged || userDataSecretKeyChanged || launchTemplateNeedsUserDataSecretKeyTag { scope.Info("creating new version for launch template", "existing", launchTemplate, "incoming", scope.GetLaunchTemplate(), "needsUpdate", needsUpdate, "tagsChanged", tagsChanged, "amiChanged", amiChanged, "userDataHashChanged", userDataHashChanged, "userDataSecretKeyChanged", userDataSecretKeyChanged) + // There is a limit to the number of Launch Template Versions. // We ensure that the number of versions does not grow without bound by following a simple rule: Before we create a new version, we delete one old version, if there is at least one old version that is not in use. - if err := ec2svc.PruneLaunchTemplateVersions(scope.GetLaunchTemplateIDStatus()); err != nil { - return err + deletedLaunchTemplateVersion, err := ec2svc.PruneLaunchTemplateVersions(scope.GetLaunchTemplateIDStatus()) + if err != nil { + return nil, err } - if err := ec2svc.CreateLaunchTemplateVersion(scope.GetLaunchTemplateIDStatus(), scope, imageID, *bootstrapDataSecretKey, bootstrapData); err != nil { - return err + + // S3 objects should be deleted as soon as possible if they're not used + // anymore. If this fails, it would still be cleaned by the bucket lifecycle + // policy later. + if feature.Gates.Enabled(feature.MachinePool) && deletedLaunchTemplateVersion != nil { + _, _, _, deletedLaunchTemplateVersionBootstrapDataHash, err := s.SDKToLaunchTemplate(deletedLaunchTemplateVersion) + if err != nil { + return nil, err + } + if deletedLaunchTemplateVersionBootstrapDataHash != nil && s3Scope.Bucket() != nil && bootstrapDataFormat == "ignition" && ignitionScope.Ignition() != nil { + scope.Info("Deleting S3 object for deleted launch template version", "version", *deletedLaunchTemplateVersion.VersionNumber) + + err = objectStoreSvc.DeleteForMachinePool(scope, *deletedLaunchTemplateVersionBootstrapDataHash) + + // If any error happened above, log it and continue + if err != nil { + scope.Error(err, "Failed to delete S3 object for deleted launch template version, continuing because the bucket lifecycle policy will clean it later", "version", *deletedLaunchTemplateVersion.VersionNumber) + } + } + } + + if err := ec2svc.CreateLaunchTemplateVersion(scope.GetLaunchTemplateIDStatus(), scope, imageID, *bootstrapDataSecretKey, userDataForLaunchTemplate, userdata.ComputeHash(bootstrapData)); err != nil { + return nil, err } version, err := ec2svc.GetLaunchTemplateLatestVersion(scope.GetLaunchTemplateIDStatus()) if err != nil { - return err + return nil, err } scope.SetLaunchTemplateLatestVersionStatus(version) if err := scope.PatchObject(); err != nil { - return err + return nil, err } } if needsUpdate || tagsChanged || amiChanged || userDataSecretKeyChanged { - if err := runPostLaunchTemplateUpdateOperation(); err != nil { + err := runPostLaunchTemplateUpdateOperation() + if err != nil { conditions.MarkFalse(scope.GetSetter(), expinfrav1.PostLaunchTemplateUpdateOperationCondition, expinfrav1.PostLaunchTemplateUpdateOperationFailedReason, clusterv1.ConditionSeverityError, err.Error()) - return err + return nil, err } conditions.MarkTrue(scope.GetSetter(), expinfrav1.PostLaunchTemplateUpdateOperationCondition) } - return nil + return nil, nil } // ReconcileTags reconciles the tags for the AWSMachinePool instances. @@ -344,9 +473,9 @@ func tagsChanged(annotation map[string]interface{}, src map[string]string) (bool // GetLaunchTemplate returns the existing LaunchTemplate or nothing if it doesn't exist. // For now by name until we need the input to be something different. -func (s *Service) GetLaunchTemplate(launchTemplateName string) (*expinfrav1.AWSLaunchTemplate, string, *apimachinerytypes.NamespacedName, error) { +func (s *Service) GetLaunchTemplate(launchTemplateName string) (*expinfrav1.AWSLaunchTemplate, string, *apimachinerytypes.NamespacedName, *string, error) { if launchTemplateName == "" { - return nil, "", nil, nil + return nil, "", nil, nil, nil } s.scope.Debug("Looking for existing LaunchTemplates") @@ -359,13 +488,13 @@ func (s *Service) GetLaunchTemplate(launchTemplateName string) (*expinfrav1.AWSL out, err := s.EC2Client.DescribeLaunchTemplateVersionsWithContext(context.TODO(), input) switch { case awserrors.IsNotFound(err): - return nil, "", nil, nil + return nil, "", nil, nil, nil case err != nil: - return nil, "", nil, err + return nil, "", nil, nil, err } if out == nil || out.LaunchTemplateVersions == nil || len(out.LaunchTemplateVersions) == 0 { - return nil, "", nil, nil + return nil, "", nil, nil, nil } return s.SDKToLaunchTemplate(out.LaunchTemplateVersions[0]) @@ -399,10 +528,10 @@ func (s *Service) GetLaunchTemplateID(launchTemplateName string) (string, error) } // CreateLaunchTemplate generates a launch template to be used with the autoscaling group. -func (s *Service) CreateLaunchTemplate(scope scope.LaunchTemplateScope, imageID *string, userDataSecretKey apimachinerytypes.NamespacedName, userData []byte) (string, error) { +func (s *Service) CreateLaunchTemplate(scope scope.LaunchTemplateScope, imageID *string, userDataSecretKey apimachinerytypes.NamespacedName, userData []byte, bootstrapDataHash string) (string, error) { s.scope.Info("Create a new launch template") - launchTemplateData, err := s.createLaunchTemplateData(scope, imageID, userDataSecretKey, userData) + launchTemplateData, err := s.createLaunchTemplateData(scope, imageID, userDataSecretKey, userData, bootstrapDataHash) if err != nil { return "", errors.Wrapf(err, "unable to form launch template data") } @@ -443,10 +572,14 @@ func (s *Service) CreateLaunchTemplate(scope scope.LaunchTemplateScope, imageID } // CreateLaunchTemplateVersion will create a launch template. -func (s *Service) CreateLaunchTemplateVersion(id string, scope scope.LaunchTemplateScope, imageID *string, userDataSecretKey apimachinerytypes.NamespacedName, userData []byte) error { +// While userDataForLaunchTemplate is the data for the EC2 launch +// template, bootstrapDataHash relates to the final bootstrap data +// (not necessarily stored in EC2 user data, but could be in an S3 +// object). +func (s *Service) CreateLaunchTemplateVersion(id string, scope scope.LaunchTemplateScope, imageID *string, userDataSecretKey apimachinerytypes.NamespacedName, userDataForLaunchTemplate []byte, bootstrapDataHash string) error { s.scope.Debug("creating new launch template version", "machine-pool", scope.LaunchTemplateName()) - launchTemplateData, err := s.createLaunchTemplateData(scope, imageID, userDataSecretKey, userData) + launchTemplateData, err := s.createLaunchTemplateData(scope, imageID, userDataSecretKey, userDataForLaunchTemplate, bootstrapDataHash) if err != nil { return errors.Wrapf(err, "unable to form launch template data") } @@ -464,7 +597,7 @@ func (s *Service) CreateLaunchTemplateVersion(id string, scope scope.LaunchTempl return nil } -func (s *Service) createLaunchTemplateData(scope scope.LaunchTemplateScope, imageID *string, userDataSecretKey apimachinerytypes.NamespacedName, userData []byte) (*ec2.RequestLaunchTemplateData, error) { +func (s *Service) createLaunchTemplateData(scope scope.LaunchTemplateScope, imageID *string, userDataSecretKey apimachinerytypes.NamespacedName, userDataForLaunchTemplate []byte, bootstrapDataHash string) (*ec2.RequestLaunchTemplateData, error) { lt := scope.GetLaunchTemplate() // An explicit empty string for SSHKeyName means do not specify a key in the ASG launch @@ -476,7 +609,7 @@ func (s *Service) createLaunchTemplateData(scope scope.LaunchTemplateScope, imag data := &ec2.RequestLaunchTemplateData{ InstanceType: aws.String(lt.InstanceType), KeyName: sshKeyNamePtr, - UserData: ptr.To[string](base64.StdEncoding.EncodeToString(userData)), + UserData: ptr.To[string](base64.StdEncoding.EncodeToString(userDataForLaunchTemplate)), } if lt.InstanceMetadataOptions != nil { @@ -551,7 +684,7 @@ func (s *Service) createLaunchTemplateData(scope scope.LaunchTemplateScope, imag data.BlockDeviceMappings = blockDeviceMappings } - data.TagSpecifications = s.buildLaunchTemplateTagSpecificationRequest(scope, userDataSecretKey) + data.TagSpecifications = s.buildLaunchTemplateTagSpecificationRequest(scope, userDataSecretKey, bootstrapDataHash) return data, nil } @@ -606,7 +739,8 @@ func (s *Service) DeleteLaunchTemplate(id string) error { // It does not delete the "latest" version, because that version may still be in use. // It does not delete the "default" version, because that version cannot be deleted. // It does not assume that versions are sequential. Versions may be deleted out of band. -func (s *Service) PruneLaunchTemplateVersions(id string) error { +// If there was an unused version which was successfully deleted, it is returned. +func (s *Service) PruneLaunchTemplateVersions(id string) (*ec2.LaunchTemplateVersion, error) { // When there is one version available, it is the default and the latest. // When there are two versions available, one the is the default, the other is the latest. // Therefore we only prune when there are at least 3 versions available. @@ -622,7 +756,7 @@ func (s *Service) PruneLaunchTemplateVersions(id string) error { out, err := s.EC2Client.DescribeLaunchTemplateVersionsWithContext(context.TODO(), input) if err != nil { s.scope.Info("", "aerr", err.Error()) - return err + return nil, err } // len(out.LaunchTemplateVersions) | items @@ -631,10 +765,14 @@ func (s *Service) PruneLaunchTemplateVersions(id string) error { // 2 | [default, latest] // 3 | [default, versionToPrune, latest] if len(out.LaunchTemplateVersions) < minCountToAllowPrune { - return nil + return nil, nil + } + versionToPrune := out.LaunchTemplateVersions[1] + err = s.deleteLaunchTemplateVersion(id, versionToPrune.VersionNumber) + if err != nil { + return nil, err } - versionToPrune := out.LaunchTemplateVersions[1].VersionNumber - return s.deleteLaunchTemplateVersion(id, versionToPrune) + return versionToPrune, nil } // GetLaunchTemplateLatestVersion returns the latest version of a launch template. @@ -658,11 +796,12 @@ func (s *Service) GetLaunchTemplateLatestVersion(id string) (string, error) { } func (s *Service) deleteLaunchTemplateVersion(id string, version *int64) error { - s.scope.Debug("Deleting launch template version", "id", id) - if version == nil { return errors.New("version is a nil pointer") } + + s.scope.Debug("Deleting launch template version", "id", id, "version", *version) + versions := []string{strconv.FormatInt(*version, 10)} input := &ec2.DeleteLaunchTemplateVersionsInput{ @@ -675,12 +814,12 @@ func (s *Service) deleteLaunchTemplateVersion(id string, version *int64) error { return err } - s.scope.Debug("Deleted launch template", "id", id, "version", *version) + s.scope.Debug("Deleted launch template version", "id", id, "version", *version) return nil } // SDKToLaunchTemplate converts an AWS EC2 SDK instance to the CAPA instance type. -func (s *Service) SDKToLaunchTemplate(d *ec2.LaunchTemplateVersion) (*expinfrav1.AWSLaunchTemplate, string, *apimachinerytypes.NamespacedName, error) { +func (s *Service) SDKToLaunchTemplate(d *ec2.LaunchTemplateVersion) (*expinfrav1.AWSLaunchTemplate, string, *apimachinerytypes.NamespacedName, *string, error) { v := d.LaunchTemplateData i := &expinfrav1.AWSLaunchTemplate{ Name: aws.StringValue(d.LaunchTemplateName), @@ -735,30 +874,35 @@ func (s *Service) SDKToLaunchTemplate(d *ec2.LaunchTemplateVersion) (*expinfrav1 } if v.UserData == nil { - return i, userdata.ComputeHash(nil), nil, nil + return i, userdata.ComputeHash(nil), nil, nil, nil } decodedUserData, err := base64.StdEncoding.DecodeString(*v.UserData) if err != nil { - return nil, "", nil, errors.Wrap(err, "unable to decode UserData") + return nil, "", nil, nil, errors.Wrap(err, "unable to decode UserData") } decodedUserDataHash := userdata.ComputeHash(decodedUserData) + var launchTemplateUserDataSecretKey *apimachinerytypes.NamespacedName + var bootstrapDataHash *string for _, tagSpecification := range v.TagSpecifications { if tagSpecification.ResourceType != nil && *tagSpecification.ResourceType == ec2.ResourceTypeInstance { for _, tag := range tagSpecification.Tags { if tag.Key != nil && *tag.Key == infrav1.LaunchTemplateBootstrapDataSecret && tag.Value != nil && strings.Contains(*tag.Value, "/") { parts := strings.SplitN(*tag.Value, "/", 2) - launchTemplateUserDataSecretKey := &apimachinerytypes.NamespacedName{ + launchTemplateUserDataSecretKey = &apimachinerytypes.NamespacedName{ Namespace: parts[0], Name: parts[1], } - return i, decodedUserDataHash, launchTemplateUserDataSecretKey, nil + } + + if tag.Key != nil && *tag.Key == infrav1.LaunchTemplateBootstrapDataHash && tag.Value != nil && *tag.Value != "" { + bootstrapDataHash = tag.Value } } } } - return i, decodedUserDataHash, nil, nil + return i, decodedUserDataHash, launchTemplateUserDataSecretKey, bootstrapDataHash, nil } // LaunchTemplateNeedsUpdate checks if a new launch template version is needed. @@ -894,7 +1038,7 @@ func (s *Service) GetAdditionalSecurityGroupsIDs(securityGroups []infrav1.AWSRes return additionalSecurityGroupsIDs, nil } -func (s *Service) buildLaunchTemplateTagSpecificationRequest(scope scope.LaunchTemplateScope, userDataSecretKey apimachinerytypes.NamespacedName) []*ec2.LaunchTemplateTagSpecificationRequest { +func (s *Service) buildLaunchTemplateTagSpecificationRequest(scope scope.LaunchTemplateScope, userDataSecretKey apimachinerytypes.NamespacedName, bootstrapDataHash string) []*ec2.LaunchTemplateTagSpecificationRequest { tagSpecifications := make([]*ec2.LaunchTemplateTagSpecificationRequest, 0) additionalTags := scope.AdditionalTags() // Set the cloud provider tag @@ -912,6 +1056,7 @@ func (s *Service) buildLaunchTemplateTagSpecificationRequest(scope scope.LaunchT { instanceTags := tags.DeepCopy() instanceTags[infrav1.LaunchTemplateBootstrapDataSecret] = userDataSecretKey.String() + instanceTags[infrav1.LaunchTemplateBootstrapDataHash] = bootstrapDataHash spec := &ec2.LaunchTemplateTagSpecificationRequest{ResourceType: aws.String(ec2.ResourceTypeInstance)} for key, value := range instanceTags { diff --git a/pkg/cloud/services/ec2/launchtemplate_test.go b/pkg/cloud/services/ec2/launchtemplate_test.go index 4553ad4546..aca1b85a12 100644 --- a/pkg/cloud/services/ec2/launchtemplate_test.go +++ b/pkg/cloud/services/ec2/launchtemplate_test.go @@ -80,12 +80,19 @@ users: var testUserDataHash = userdata.ComputeHash([]byte(testUserData)) -func defaultEC2AndUserDataSecretKeyTags(name string, clusterName string, userDataSecretKey types.NamespacedName) []*ec2.Tag { +var testBootstrapData = []byte("different from testUserData since bootstrap data may be in S3 while EC2 user data points to that S3 object") +var testBootstrapDataHash = userdata.ComputeHash(testBootstrapData) + +func defaultEC2AndDataTags(name string, clusterName string, userDataSecretKey types.NamespacedName, bootstrapDataHash string) []*ec2.Tag { tags := defaultEC2Tags(name, clusterName) tags = append(tags, &ec2.Tag{ Key: aws.String(infrav1.LaunchTemplateBootstrapDataSecret), Value: aws.String(userDataSecretKey.String()), }) + tags = append(tags, &ec2.Tag{ + Key: aws.String(infrav1.LaunchTemplateBootstrapDataHash), + Value: aws.String(bootstrapDataHash), + }) sortTags(tags) return tags } @@ -295,7 +302,7 @@ func TestGetLaunchTemplate(t *testing.T) { tc.expect(mockEC2Client.EXPECT()) } - launchTemplate, userData, _, err := s.GetLaunchTemplate(tc.launchTemplateName) + launchTemplate, userData, _, _, err := s.GetLaunchTemplate(tc.launchTemplateName) tc.check(g, launchTemplate, userData, err) }) } @@ -303,12 +310,13 @@ func TestGetLaunchTemplate(t *testing.T) { func TestServiceSDKToLaunchTemplate(t *testing.T) { tests := []struct { - name string - input *ec2.LaunchTemplateVersion - wantLT *expinfrav1.AWSLaunchTemplate - wantHash string - wantDataSecretKey *types.NamespacedName - wantErr bool + name string + input *ec2.LaunchTemplateVersion + wantLT *expinfrav1.AWSLaunchTemplate + wantUserDataHash string + wantDataSecretKey *types.NamespacedName + wantBootstrapDataHash *string + wantErr bool }{ { name: "lots of input", @@ -350,8 +358,9 @@ func TestServiceSDKToLaunchTemplate(t *testing.T) { SSHKeyName: aws.String("foo-keyname"), VersionNumber: aws.Int64(1), }, - wantHash: testUserDataHash, - wantDataSecretKey: nil, // respective tag is not given + wantUserDataHash: testUserDataHash, + wantDataSecretKey: nil, // respective tag is not given + wantBootstrapDataHash: nil, // respective tag is not given }, { name: "tag of bootstrap secret", @@ -388,6 +397,10 @@ func TestServiceSDKToLaunchTemplate(t *testing.T) { Key: aws.String("sigs.k8s.io/cluster-api-provider-aws/bootstrap-data-secret"), Value: aws.String("bootstrap-secret-ns/bootstrap-secret"), }, + { + Key: aws.String("sigs.k8s.io/cluster-api-provider-aws/bootstrap-data-hash"), + Value: aws.String(testBootstrapDataHash), + }, }, }, }, @@ -404,26 +417,29 @@ func TestServiceSDKToLaunchTemplate(t *testing.T) { SSHKeyName: aws.String("foo-keyname"), VersionNumber: aws.Int64(1), }, - wantHash: testUserDataHash, - wantDataSecretKey: &types.NamespacedName{Namespace: "bootstrap-secret-ns", Name: "bootstrap-secret"}, + wantUserDataHash: testUserDataHash, + wantDataSecretKey: &types.NamespacedName{Namespace: "bootstrap-secret-ns", Name: "bootstrap-secret"}, + wantBootstrapDataHash: &testBootstrapDataHash, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { + g := NewWithT(t) s := &Service{} - gotLT, gotHash, gotDataSecretKey, err := s.SDKToLaunchTemplate(tt.input) + gotLT, gotUserDataHash, gotDataSecretKey, gotBootstrapDataHash, err := s.SDKToLaunchTemplate(tt.input) if (err != nil) != tt.wantErr { t.Fatalf("error mismatch: got %v, wantErr %v", err, tt.wantErr) } if !cmp.Equal(gotLT, tt.wantLT) { t.Fatalf("launchTemplate mismatch: got %v, want %v", gotLT, tt.wantLT) } - if !cmp.Equal(gotHash, tt.wantHash) { - t.Fatalf("userDataHash mismatch: got %v, want %v", gotHash, tt.wantHash) + if !cmp.Equal(gotUserDataHash, tt.wantUserDataHash) { + t.Fatalf("userDataHash mismatch: got %v, want %v", gotUserDataHash, tt.wantUserDataHash) } if !cmp.Equal(gotDataSecretKey, tt.wantDataSecretKey) { t.Fatalf("userDataSecretKey mismatch: got %v, want %v", gotDataSecretKey, tt.wantDataSecretKey) } + g.Expect(gotBootstrapDataHash).To(Equal(tt.wantBootstrapDataHash)) }) } } @@ -845,7 +861,7 @@ func TestCreateLaunchTemplate(t *testing.T) { TagSpecifications: []*ec2.LaunchTemplateTagSpecificationRequest{ { ResourceType: aws.String(ec2.ResourceTypeInstance), - Tags: defaultEC2AndUserDataSecretKeyTags("aws-mp-name", "cluster-name", userDataSecretKey), + Tags: defaultEC2AndDataTags("aws-mp-name", "cluster-name", userDataSecretKey, testBootstrapDataHash), }, { ResourceType: aws.String(ec2.ResourceTypeVolume), @@ -905,7 +921,7 @@ func TestCreateLaunchTemplate(t *testing.T) { TagSpecifications: []*ec2.LaunchTemplateTagSpecificationRequest{ { ResourceType: aws.String(ec2.ResourceTypeInstance), - Tags: defaultEC2AndUserDataSecretKeyTags("aws-mp-name", "cluster-name", userDataSecretKey), + Tags: defaultEC2AndDataTags("aws-mp-name", "cluster-name", userDataSecretKey, testBootstrapDataHash), }, { ResourceType: aws.String(ec2.ResourceTypeVolume), @@ -967,7 +983,7 @@ func TestCreateLaunchTemplate(t *testing.T) { TagSpecifications: []*ec2.LaunchTemplateTagSpecificationRequest{ { ResourceType: aws.String(ec2.ResourceTypeInstance), - Tags: defaultEC2AndUserDataSecretKeyTags("aws-mp-name", "cluster-name", userDataSecretKey), + Tags: defaultEC2AndDataTags("aws-mp-name", "cluster-name", userDataSecretKey, testBootstrapDataHash), }, { ResourceType: aws.String(ec2.ResourceTypeVolume), @@ -1022,7 +1038,7 @@ func TestCreateLaunchTemplate(t *testing.T) { tc.expect(g, mockEC2Client.EXPECT()) } - launchTemplate, err := s.CreateLaunchTemplate(ms, aws.String("imageID"), userDataSecretKey, userData) + launchTemplate, err := s.CreateLaunchTemplate(ms, aws.String("imageID"), userDataSecretKey, userData, testBootstrapDataHash) tc.check(g, launchTemplate, err) }) } @@ -1050,7 +1066,7 @@ func TestLaunchTemplateDataCreation(t *testing.T) { Namespace: "bootstrap-secret-ns", Name: "bootstrap-secret", } - launchTemplate, err := s.CreateLaunchTemplate(ms, aws.String("imageID"), userDataSecretKey, nil) + launchTemplate, err := s.CreateLaunchTemplate(ms, aws.String("imageID"), userDataSecretKey, nil, "") g.Expect(err).To(HaveOccurred()) g.Expect(launchTemplate).Should(BeEmpty()) }) @@ -1104,7 +1120,7 @@ func TestCreateLaunchTemplateVersion(t *testing.T) { TagSpecifications: []*ec2.LaunchTemplateTagSpecificationRequest{ { ResourceType: aws.String(ec2.ResourceTypeInstance), - Tags: defaultEC2AndUserDataSecretKeyTags("aws-mp-name", "cluster-name", userDataSecretKey), + Tags: defaultEC2AndDataTags("aws-mp-name", "cluster-name", userDataSecretKey, testBootstrapDataHash), }, { ResourceType: aws.String(ec2.ResourceTypeVolume), @@ -1155,7 +1171,7 @@ func TestCreateLaunchTemplateVersion(t *testing.T) { TagSpecifications: []*ec2.LaunchTemplateTagSpecificationRequest{ { ResourceType: aws.String(ec2.ResourceTypeInstance), - Tags: defaultEC2AndUserDataSecretKeyTags("aws-mp-name", "cluster-name", userDataSecretKey), + Tags: defaultEC2AndDataTags("aws-mp-name", "cluster-name", userDataSecretKey, testBootstrapDataHash), }, { ResourceType: aws.String(ec2.ResourceTypeVolume), @@ -1202,10 +1218,10 @@ func TestCreateLaunchTemplateVersion(t *testing.T) { tc.expect(mockEC2Client.EXPECT()) } if tc.wantErr { - g.Expect(s.CreateLaunchTemplateVersion("launch-template-id", ms, aws.String("imageID"), userDataSecretKey, userData)).To(HaveOccurred()) + g.Expect(s.CreateLaunchTemplateVersion("launch-template-id", ms, aws.String("imageID"), userDataSecretKey, userData, testBootstrapDataHash)).To(HaveOccurred()) return } - g.Expect(s.CreateLaunchTemplateVersion("launch-template-id", ms, aws.String("imageID"), userDataSecretKey, userData)).NotTo(HaveOccurred()) + g.Expect(s.CreateLaunchTemplateVersion("launch-template-id", ms, aws.String("imageID"), userDataSecretKey, userData, testBootstrapDataHash)).NotTo(HaveOccurred()) }) } } @@ -1218,6 +1234,7 @@ func TestBuildLaunchTemplateTagSpecificationRequest(t *testing.T) { Namespace: "bootstrap-secret-ns", Name: "bootstrap-secret", } + bootstrapDataHash := userdata.ComputeHash([]byte("shell-script")) testCases := []struct { name string check func(g *WithT, m []*ec2.LaunchTemplateTagSpecificationRequest) @@ -1228,7 +1245,7 @@ func TestBuildLaunchTemplateTagSpecificationRequest(t *testing.T) { expected := []*ec2.LaunchTemplateTagSpecificationRequest{ { ResourceType: aws.String(ec2.ResourceTypeInstance), - Tags: defaultEC2AndUserDataSecretKeyTags("aws-mp-name", "cluster-name", userDataSecretKey), + Tags: defaultEC2AndDataTags("aws-mp-name", "cluster-name", userDataSecretKey, bootstrapDataHash), }, { ResourceType: aws.String(ec2.ResourceTypeVolume), @@ -1258,7 +1275,7 @@ func TestBuildLaunchTemplateTagSpecificationRequest(t *testing.T) { g.Expect(err).NotTo(HaveOccurred()) s := NewService(cs) - tc.check(g, s.buildLaunchTemplateTagSpecificationRequest(ms, userDataSecretKey)) + tc.check(g, s.buildLaunchTemplateTagSpecificationRequest(ms, userDataSecretKey, bootstrapDataHash)) }) } } diff --git a/pkg/cloud/services/interfaces.go b/pkg/cloud/services/interfaces.go index 46a2c7aecf..7b2a12756a 100644 --- a/pkg/cloud/services/interfaces.go +++ b/pkg/cloud/services/interfaces.go @@ -20,7 +20,9 @@ package services import ( "context" + "github.com/aws/aws-sdk-go/service/ec2" apimachinerytypes "k8s.io/apimachinery/pkg/types" + ctrl "sigs.k8s.io/controller-runtime" infrav1 "sigs.k8s.io/cluster-api-provider-aws/v2/api/v1beta2" expinfrav1 "sigs.k8s.io/cluster-api-provider-aws/v2/exp/api/v1beta2" @@ -43,8 +45,9 @@ type ASGInterface interface { GetASGByName(scope *scope.MachinePoolScope) (*expinfrav1.AutoScalingGroup, error) CreateASG(scope *scope.MachinePoolScope) (*expinfrav1.AutoScalingGroup, error) UpdateASG(scope *scope.MachinePoolScope) error + CancelASGInstanceRefresh(scope *scope.MachinePoolScope) error StartASGInstanceRefresh(scope *scope.MachinePoolScope) error - CanStartASGInstanceRefresh(scope *scope.MachinePoolScope) (bool, error) + CanStartASGInstanceRefresh(scope *scope.MachinePoolScope) (bool, *string, error) UpdateResourceTags(resourceID *string, create, remove map[string]string) error DeleteASGAndWait(id string) error SuspendProcesses(name string, processes []string) error @@ -71,12 +74,12 @@ type EC2Interface interface { DetachSecurityGroupsFromNetworkInterface(groups []string, interfaceID string) error DiscoverLaunchTemplateAMI(scope scope.LaunchTemplateScope) (*string, error) - GetLaunchTemplate(id string) (lt *expinfrav1.AWSLaunchTemplate, userDataHash string, userDataSecretKey *apimachinerytypes.NamespacedName, err error) + GetLaunchTemplate(id string) (lt *expinfrav1.AWSLaunchTemplate, userDataHash string, userDataSecretKey *apimachinerytypes.NamespacedName, bootstrapDataHash *string, err error) GetLaunchTemplateID(id string) (string, error) GetLaunchTemplateLatestVersion(id string) (string, error) - CreateLaunchTemplate(scope scope.LaunchTemplateScope, imageID *string, userDataSecretKey apimachinerytypes.NamespacedName, userData []byte) (string, error) - CreateLaunchTemplateVersion(id string, scope scope.LaunchTemplateScope, imageID *string, userDataSecretKey apimachinerytypes.NamespacedName, userData []byte) error - PruneLaunchTemplateVersions(id string) error + CreateLaunchTemplate(scope scope.LaunchTemplateScope, imageID *string, userDataSecretKey apimachinerytypes.NamespacedName, userData []byte, bootstrapDataHash string) (string, error) + CreateLaunchTemplateVersion(id string, scope scope.LaunchTemplateScope, imageID *string, userDataSecretKey apimachinerytypes.NamespacedName, userData []byte, bootstrapDataHash string) error + PruneLaunchTemplateVersions(id string) (*ec2.LaunchTemplateVersion, error) DeleteLaunchTemplate(id string) error LaunchTemplateNeedsUpdate(scope scope.LaunchTemplateScope, incoming *expinfrav1.AWSLaunchTemplate, existing *expinfrav1.AWSLaunchTemplate) (bool, error) DeleteBastion() error @@ -87,7 +90,7 @@ type EC2Interface interface { // separate from EC2Interface so that we can mock AWS requests separately. For example, by not mocking the // ReconcileLaunchTemplate function, but mocking EC2Interface, we can test which EC2 API operations would have been called. type MachinePoolReconcileInterface interface { - ReconcileLaunchTemplate(scope scope.LaunchTemplateScope, ec2svc EC2Interface, canUpdateLaunchTemplate func() (bool, error), runPostLaunchTemplateUpdateOperation func() error) error + ReconcileLaunchTemplate(ignitionScope scope.IgnitionScope, scope scope.LaunchTemplateScope, s3Scope scope.S3Scope, ec2svc EC2Interface, objectStoreSvc ObjectStoreInterface, canUpdateLaunchTemplate func() (bool, *string, error), cancelInstanceRefresh func() error, runPostLaunchTemplateUpdateOperation func() error) (*ctrl.Result, error) ReconcileTags(scope scope.LaunchTemplateScope, resourceServicesToUpdate []scope.ResourceServiceToUpdate) error } @@ -132,6 +135,8 @@ type ObjectStoreInterface interface { ReconcileBucket() error Delete(m *scope.MachineScope) error Create(m *scope.MachineScope, data []byte) (objectURL string, err error) + CreateForMachinePool(scope scope.LaunchTemplateScope, data []byte) (objectURL string, err error) + DeleteForMachinePool(scope scope.LaunchTemplateScope, bootstrapDataHash string) error } // AWSNodeInterface installs the CNI for EKS clusters. diff --git a/pkg/cloud/services/mock_services/autoscaling_interface_mock.go b/pkg/cloud/services/mock_services/autoscaling_interface_mock.go index b860077f4f..2448d3dcb7 100644 --- a/pkg/cloud/services/mock_services/autoscaling_interface_mock.go +++ b/pkg/cloud/services/mock_services/autoscaling_interface_mock.go @@ -67,12 +67,13 @@ func (mr *MockASGInterfaceMockRecorder) ASGIfExists(arg0 interface{}) *gomock.Ca } // CanStartASGInstanceRefresh mocks base method. -func (m *MockASGInterface) CanStartASGInstanceRefresh(arg0 *scope.MachinePoolScope) (bool, error) { +func (m *MockASGInterface) CanStartASGInstanceRefresh(arg0 *scope.MachinePoolScope) (bool, *string, error) { m.ctrl.T.Helper() ret := m.ctrl.Call(m, "CanStartASGInstanceRefresh", arg0) ret0, _ := ret[0].(bool) - ret1, _ := ret[1].(error) - return ret0, ret1 + ret1, _ := ret[1].(*string) + ret2, _ := ret[2].(error) + return ret0, ret1, ret2 } // CanStartASGInstanceRefresh indicates an expected call of CanStartASGInstanceRefresh. @@ -81,6 +82,20 @@ func (mr *MockASGInterfaceMockRecorder) CanStartASGInstanceRefresh(arg0 interfac return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "CanStartASGInstanceRefresh", reflect.TypeOf((*MockASGInterface)(nil).CanStartASGInstanceRefresh), arg0) } +// CancelASGInstanceRefresh mocks base method. +func (m *MockASGInterface) CancelASGInstanceRefresh(arg0 *scope.MachinePoolScope) error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "CancelASGInstanceRefresh", arg0) + ret0, _ := ret[0].(error) + return ret0 +} + +// CancelASGInstanceRefresh indicates an expected call of CancelASGInstanceRefresh. +func (mr *MockASGInterfaceMockRecorder) CancelASGInstanceRefresh(arg0 interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "CancelASGInstanceRefresh", reflect.TypeOf((*MockASGInterface)(nil).CancelASGInstanceRefresh), arg0) +} + // CreateASG mocks base method. func (m *MockASGInterface) CreateASG(arg0 *scope.MachinePoolScope) (*v1beta2.AutoScalingGroup, error) { m.ctrl.T.Helper() diff --git a/pkg/cloud/services/mock_services/ec2_interface_mock.go b/pkg/cloud/services/mock_services/ec2_interface_mock.go index 922d5f3360..29763d531f 100644 --- a/pkg/cloud/services/mock_services/ec2_interface_mock.go +++ b/pkg/cloud/services/mock_services/ec2_interface_mock.go @@ -23,6 +23,7 @@ package mock_services import ( reflect "reflect" + ec2 "github.com/aws/aws-sdk-go/service/ec2" gomock "github.com/golang/mock/gomock" types "k8s.io/apimachinery/pkg/types" v1beta2 "sigs.k8s.io/cluster-api-provider-aws/v2/api/v1beta2" @@ -69,32 +70,32 @@ func (mr *MockEC2InterfaceMockRecorder) CreateInstance(arg0, arg1, arg2 interfac } // CreateLaunchTemplate mocks base method. -func (m *MockEC2Interface) CreateLaunchTemplate(arg0 scope.LaunchTemplateScope, arg1 *string, arg2 types.NamespacedName, arg3 []byte) (string, error) { +func (m *MockEC2Interface) CreateLaunchTemplate(arg0 scope.LaunchTemplateScope, arg1 *string, arg2 types.NamespacedName, arg3 []byte, arg4 string) (string, error) { m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "CreateLaunchTemplate", arg0, arg1, arg2, arg3) + ret := m.ctrl.Call(m, "CreateLaunchTemplate", arg0, arg1, arg2, arg3, arg4) ret0, _ := ret[0].(string) ret1, _ := ret[1].(error) return ret0, ret1 } // CreateLaunchTemplate indicates an expected call of CreateLaunchTemplate. -func (mr *MockEC2InterfaceMockRecorder) CreateLaunchTemplate(arg0, arg1, arg2, arg3 interface{}) *gomock.Call { +func (mr *MockEC2InterfaceMockRecorder) CreateLaunchTemplate(arg0, arg1, arg2, arg3, arg4 interface{}) *gomock.Call { mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "CreateLaunchTemplate", reflect.TypeOf((*MockEC2Interface)(nil).CreateLaunchTemplate), arg0, arg1, arg2, arg3) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "CreateLaunchTemplate", reflect.TypeOf((*MockEC2Interface)(nil).CreateLaunchTemplate), arg0, arg1, arg2, arg3, arg4) } // CreateLaunchTemplateVersion mocks base method. -func (m *MockEC2Interface) CreateLaunchTemplateVersion(arg0 string, arg1 scope.LaunchTemplateScope, arg2 *string, arg3 types.NamespacedName, arg4 []byte) error { +func (m *MockEC2Interface) CreateLaunchTemplateVersion(arg0 string, arg1 scope.LaunchTemplateScope, arg2 *string, arg3 types.NamespacedName, arg4 []byte, arg5 string) error { m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "CreateLaunchTemplateVersion", arg0, arg1, arg2, arg3, arg4) + ret := m.ctrl.Call(m, "CreateLaunchTemplateVersion", arg0, arg1, arg2, arg3, arg4, arg5) ret0, _ := ret[0].(error) return ret0 } // CreateLaunchTemplateVersion indicates an expected call of CreateLaunchTemplateVersion. -func (mr *MockEC2InterfaceMockRecorder) CreateLaunchTemplateVersion(arg0, arg1, arg2, arg3, arg4 interface{}) *gomock.Call { +func (mr *MockEC2InterfaceMockRecorder) CreateLaunchTemplateVersion(arg0, arg1, arg2, arg3, arg4, arg5 interface{}) *gomock.Call { mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "CreateLaunchTemplateVersion", reflect.TypeOf((*MockEC2Interface)(nil).CreateLaunchTemplateVersion), arg0, arg1, arg2, arg3, arg4) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "CreateLaunchTemplateVersion", reflect.TypeOf((*MockEC2Interface)(nil).CreateLaunchTemplateVersion), arg0, arg1, arg2, arg3, arg4, arg5) } // DeleteBastion mocks base method. @@ -200,14 +201,15 @@ func (mr *MockEC2InterfaceMockRecorder) GetInstanceSecurityGroups(arg0 interface } // GetLaunchTemplate mocks base method. -func (m *MockEC2Interface) GetLaunchTemplate(arg0 string) (*v1beta20.AWSLaunchTemplate, string, *types.NamespacedName, error) { +func (m *MockEC2Interface) GetLaunchTemplate(arg0 string) (*v1beta20.AWSLaunchTemplate, string, *types.NamespacedName, *string, error) { m.ctrl.T.Helper() ret := m.ctrl.Call(m, "GetLaunchTemplate", arg0) ret0, _ := ret[0].(*v1beta20.AWSLaunchTemplate) ret1, _ := ret[1].(string) ret2, _ := ret[2].(*types.NamespacedName) - ret3, _ := ret[3].(error) - return ret0, ret1, ret2, ret3 + ret3, _ := ret[3].(*string) + ret4, _ := ret[4].(error) + return ret0, ret1, ret2, ret3, ret4 } // GetLaunchTemplate indicates an expected call of GetLaunchTemplate. @@ -306,11 +308,12 @@ func (mr *MockEC2InterfaceMockRecorder) ModifyInstanceMetadataOptions(arg0, arg1 } // PruneLaunchTemplateVersions mocks base method. -func (m *MockEC2Interface) PruneLaunchTemplateVersions(arg0 string) error { +func (m *MockEC2Interface) PruneLaunchTemplateVersions(arg0 string) (*ec2.LaunchTemplateVersion, error) { m.ctrl.T.Helper() ret := m.ctrl.Call(m, "PruneLaunchTemplateVersions", arg0) - ret0, _ := ret[0].(error) - return ret0 + ret0, _ := ret[0].(*ec2.LaunchTemplateVersion) + ret1, _ := ret[1].(error) + return ret0, ret1 } // PruneLaunchTemplateVersions indicates an expected call of PruneLaunchTemplateVersions. diff --git a/pkg/cloud/services/mock_services/objectstore_machine_interface_mock.go b/pkg/cloud/services/mock_services/objectstore_machine_interface_mock.go index 559f356f3a..f38bf008dd 100644 --- a/pkg/cloud/services/mock_services/objectstore_machine_interface_mock.go +++ b/pkg/cloud/services/mock_services/objectstore_machine_interface_mock.go @@ -65,6 +65,21 @@ func (mr *MockObjectStoreInterfaceMockRecorder) Create(arg0, arg1 interface{}) * return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "Create", reflect.TypeOf((*MockObjectStoreInterface)(nil).Create), arg0, arg1) } +// CreateForMachinePool mocks base method. +func (m *MockObjectStoreInterface) CreateForMachinePool(arg0 scope.LaunchTemplateScope, arg1 []byte) (string, error) { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "CreateForMachinePool", arg0, arg1) + ret0, _ := ret[0].(string) + ret1, _ := ret[1].(error) + return ret0, ret1 +} + +// CreateForMachinePool indicates an expected call of CreateForMachinePool. +func (mr *MockObjectStoreInterfaceMockRecorder) CreateForMachinePool(arg0, arg1 interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "CreateForMachinePool", reflect.TypeOf((*MockObjectStoreInterface)(nil).CreateForMachinePool), arg0, arg1) +} + // Delete mocks base method. func (m *MockObjectStoreInterface) Delete(arg0 *scope.MachineScope) error { m.ctrl.T.Helper() @@ -93,6 +108,20 @@ func (mr *MockObjectStoreInterfaceMockRecorder) DeleteBucket() *gomock.Call { return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DeleteBucket", reflect.TypeOf((*MockObjectStoreInterface)(nil).DeleteBucket)) } +// DeleteForMachinePool mocks base method. +func (m *MockObjectStoreInterface) DeleteForMachinePool(arg0 scope.LaunchTemplateScope, arg1 string) error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "DeleteForMachinePool", arg0, arg1) + ret0, _ := ret[0].(error) + return ret0 +} + +// DeleteForMachinePool indicates an expected call of DeleteForMachinePool. +func (mr *MockObjectStoreInterfaceMockRecorder) DeleteForMachinePool(arg0, arg1 interface{}) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "DeleteForMachinePool", reflect.TypeOf((*MockObjectStoreInterface)(nil).DeleteForMachinePool), arg0, arg1) +} + // ReconcileBucket mocks base method. func (m *MockObjectStoreInterface) ReconcileBucket() error { m.ctrl.T.Helper() diff --git a/pkg/cloud/services/mock_services/reconcile_interface_mock.go b/pkg/cloud/services/mock_services/reconcile_interface_mock.go index 3771e81e3a..cc2c41e082 100644 --- a/pkg/cloud/services/mock_services/reconcile_interface_mock.go +++ b/pkg/cloud/services/mock_services/reconcile_interface_mock.go @@ -26,6 +26,7 @@ import ( gomock "github.com/golang/mock/gomock" scope "sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud/scope" services "sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud/services" + reconcile "sigs.k8s.io/controller-runtime/pkg/reconcile" ) // MockMachinePoolReconcileInterface is a mock of MachinePoolReconcileInterface interface. @@ -52,17 +53,18 @@ func (m *MockMachinePoolReconcileInterface) EXPECT() *MockMachinePoolReconcileIn } // ReconcileLaunchTemplate mocks base method. -func (m *MockMachinePoolReconcileInterface) ReconcileLaunchTemplate(arg0 scope.LaunchTemplateScope, arg1 services.EC2Interface, arg2 func() (bool, error), arg3 func() error) error { +func (m *MockMachinePoolReconcileInterface) ReconcileLaunchTemplate(arg0 scope.IgnitionScope, arg1 scope.LaunchTemplateScope, arg2 scope.S3Scope, arg3 services.EC2Interface, arg4 services.ObjectStoreInterface, arg5 func() (bool, *string, error), arg6, arg7 func() error) (*reconcile.Result, error) { m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "ReconcileLaunchTemplate", arg0, arg1, arg2, arg3) - ret0, _ := ret[0].(error) - return ret0 + ret := m.ctrl.Call(m, "ReconcileLaunchTemplate", arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7) + ret0, _ := ret[0].(*reconcile.Result) + ret1, _ := ret[1].(error) + return ret0, ret1 } // ReconcileLaunchTemplate indicates an expected call of ReconcileLaunchTemplate. -func (mr *MockMachinePoolReconcileInterfaceMockRecorder) ReconcileLaunchTemplate(arg0, arg1, arg2, arg3 interface{}) *gomock.Call { +func (mr *MockMachinePoolReconcileInterfaceMockRecorder) ReconcileLaunchTemplate(arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7 interface{}) *gomock.Call { mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ReconcileLaunchTemplate", reflect.TypeOf((*MockMachinePoolReconcileInterface)(nil).ReconcileLaunchTemplate), arg0, arg1, arg2, arg3) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ReconcileLaunchTemplate", reflect.TypeOf((*MockMachinePoolReconcileInterface)(nil).ReconcileLaunchTemplate), arg0, arg1, arg2, arg3, arg4, arg5, arg6, arg7) } // ReconcileTags mocks base method. diff --git a/pkg/cloud/services/s3/s3.go b/pkg/cloud/services/s3/s3.go index 6eb8582585..cc645df829 100644 --- a/pkg/cloud/services/s3/s3.go +++ b/pkg/cloud/services/s3/s3.go @@ -35,8 +35,10 @@ import ( "k8s.io/utils/ptr" infrav1 "sigs.k8s.io/cluster-api-provider-aws/v2/api/v1beta2" + "sigs.k8s.io/cluster-api-provider-aws/v2/feature" iam "sigs.k8s.io/cluster-api-provider-aws/v2/iam/api/v1beta1" "sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud/scope" + "sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud/services/userdata" "sigs.k8s.io/cluster-api-provider-aws/v2/util/system" ) @@ -84,6 +86,10 @@ func (s *Service) ReconcileBucket() error { return errors.Wrap(err, "ensuring bucket policy") } + if err := s.ensureBucketLifecycleConfiguration(bucketName); err != nil { + return errors.Wrap(err, "ensuring bucket lifecycle configuration") + } + return nil } @@ -99,6 +105,50 @@ func (s *Service) DeleteBucket() error { log.Info("Deleting S3 Bucket") + if feature.Gates.Enabled(feature.MachinePool) { + // Delete machine pool user data files that did not get deleted + // yet by the lifecycle policy + for { + log.Info("Listing S3 objects of machine pools") + + // TODO Switch to aws-sdk-go-v2 which has NewListObjectsV2Paginator (as part of https://github.com/kubernetes-sigs/cluster-api-provider-aws/issues/2225) + out, err := s.S3Client.ListObjectsV2(&s3.ListObjectsV2Input{ + Bucket: aws.String(bucketName), + Prefix: aws.String("machine-pool/"), + }) + if err != nil { + aerr, ok := err.(awserr.Error) + if !ok { + return errors.Wrap(err, "listing S3 bucket") + } + + switch aerr.Code() { + case s3.ErrCodeNoSuchBucket: + log.Info("Bucket already removed") + return nil + default: + return errors.Wrap(aerr, "listing S3 bucket") + } + } + + // Stop on last page of results + if len(out.Contents) == 0 { + break + } + + log.Info("Deleting S3 objects of machine pools", "count", len(out.Contents)) + for _, obj := range out.Contents { + _, err := s.S3Client.DeleteObject(&s3.DeleteObjectInput{ + Bucket: aws.String(bucketName), + Key: obj.Key, + }) + if err != nil { + return err + } + } + } + } + _, err := s.S3Client.DeleteBucket(&s3.DeleteBucketInput{ Bucket: aws.String(bucketName), }) @@ -169,6 +219,51 @@ func (s *Service) Create(m *scope.MachineScope, data []byte) (string, error) { return objectURL.String(), nil } +func (s *Service) CreateForMachinePool(scope scope.LaunchTemplateScope, data []byte) (string, error) { + if !s.bucketManagementEnabled() { + return "", errors.New("requested object creation but bucket management is not enabled") + } + + if scope.LaunchTemplateName() == "" { + return "", errors.New("launch template name can't be empty") + } + + if len(data) == 0 { + return "", errors.New("got empty data") + } + + bucket := s.bucketName() + key := s.bootstrapDataKeyForMachinePool(scope, userdata.ComputeHash(data)) + + s.scope.Info("Creating object for machine pool", "bucket_name", bucket, "key", key) + + if _, err := s.S3Client.PutObject(&s3.PutObjectInput{ + Body: aws.ReadSeekCloser(bytes.NewReader(data)), + Bucket: aws.String(bucket), + Key: aws.String(key), + ServerSideEncryption: aws.String("aws:kms"), + }); err != nil { + return "", errors.Wrap(err, "putting object for machine pool") + } + + if exp := s.scope.Bucket().PresignedURLDuration; exp != nil { + s.scope.Info("Generating presigned URL", "bucket_name", bucket, "key", key) + req, _ := s.S3Client.GetObjectRequest(&s3.GetObjectInput{ + Bucket: aws.String(bucket), + Key: aws.String(key), + }) + return req.Presign(exp.Duration) + } + + objectURL := &url.URL{ + Scheme: "s3", + Host: bucket, + Path: key, + } + + return objectURL.String(), nil +} + // Delete deletes the object from the S3 bucket. func (s *Service) Delete(m *scope.MachineScope) error { if !s.bucketManagementEnabled() { @@ -241,6 +336,42 @@ func (s *Service) deleteObject(bucket, key string) error { return nil } +func (s *Service) DeleteForMachinePool(scope scope.LaunchTemplateScope, bootstrapDataHash string) error { + if !s.bucketManagementEnabled() { + return errors.New("requested object deletion but bucket management is not enabled") + } + + if scope.LaunchTemplateName() == "" { + return errors.New("launch template name can't be empty") + } + + bucket := s.bucketName() + key := s.bootstrapDataKeyForMachinePool(scope, bootstrapDataHash) + + s.scope.Info("Deleting object for machine pool", "bucket_name", bucket, "key", key) + + _, err := s.S3Client.DeleteObject(&s3.DeleteObjectInput{ + Bucket: aws.String(bucket), + Key: aws.String(key), + }) + if err == nil { + return nil + } + + aerr, ok := err.(awserr.Error) + if !ok { + return errors.Wrap(err, "deleting S3 object for machine pool") + } + + switch aerr.Code() { + case s3.ErrCodeNoSuchBucket: + default: + return errors.Wrap(aerr, "deleting S3 object for machine pool") + } + + return nil +} + func (s *Service) createBucketIfNotExist(bucketName string) error { input := &s3.CreateBucketInput{Bucket: aws.String(bucketName)} @@ -294,6 +425,41 @@ func (s *Service) ensureBucketPolicy(bucketName string) error { return nil } +func (s *Service) ensureBucketLifecycleConfiguration(bucketName string) error { + if !feature.Gates.Enabled(feature.MachinePool) { + return nil + } + + input := &s3.PutBucketLifecycleConfigurationInput{ + Bucket: aws.String(bucketName), + LifecycleConfiguration: &s3.BucketLifecycleConfiguration{ + Rules: []*s3.LifecycleRule{ + { + ID: aws.String("machine-pool"), + Expiration: &s3.LifecycleExpiration{ + // The bootstrap token for new nodes to join the cluster is normally rotated regularly, + // such as in CAPI's `KubeadmConfig` reconciler. Therefore, the launch template user data + // stored in the S3 bucket only needs to live longer than the token TTL. + Days: aws.Int64(1), + }, + Filter: &s3.LifecycleRuleFilter{ + Prefix: aws.String("machine-pool/"), + }, + Status: aws.String(s3.ExpirationStatusEnabled), + }, + }, + }, + } + + if _, err := s.S3Client.PutBucketLifecycleConfiguration(input); err != nil { + return errors.Wrap(err, "creating S3 bucket lifecycle configuration") + } + + s.scope.Trace("Updated bucket lifecycle configuration", "bucket_name", bucketName) + + return nil +} + func (s *Service) tagBucket(bucketName string) error { taggingInput := &s3.PutBucketTaggingInput{ Bucket: aws.String(bucketName), @@ -380,6 +546,15 @@ func (s *Service) bucketPolicy(bucketName string) (string, error) { Action: []string{"s3:GetObject"}, Resource: []string{fmt.Sprintf("arn:%s:s3:::%s/node/*", partition, bucketName)}, }) + statements = append(statements, iam.StatementEntry{ + Sid: iamInstanceProfile, + Effect: iam.EffectAllow, + Principal: map[iam.PrincipalType]iam.PrincipalID{ + iam.PrincipalAWS: []string{fmt.Sprintf("arn:%s:iam::%s:role/%s", partition, *accountID.Account, iamInstanceProfile)}, + }, + Action: []string{"s3:GetObject"}, + Resource: []string{fmt.Sprintf("arn:%s:s3:::%s/machine-pool/*", partition, bucketName)}, + }) } } @@ -408,3 +583,7 @@ func (s *Service) bootstrapDataKey(m *scope.MachineScope) string { // Use machine name as object key. return path.Join(m.Role(), m.Name()) } + +func (s *Service) bootstrapDataKeyForMachinePool(scope scope.LaunchTemplateScope, dataHash string) string { + return path.Join("machine-pool", scope.LaunchTemplateName(), dataHash) +} diff --git a/pkg/cloud/services/s3/s3_test.go b/pkg/cloud/services/s3/s3_test.go index 3db7abfca7..4a1a598ba1 100644 --- a/pkg/cloud/services/s3/s3_test.go +++ b/pkg/cloud/services/s3/s3_test.go @@ -32,9 +32,11 @@ import ( "github.com/golang/mock/gomock" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" + utilfeature "k8s.io/component-base/featuregate/testing" "sigs.k8s.io/controller-runtime/pkg/client/fake" infrav1 "sigs.k8s.io/cluster-api-provider-aws/v2/api/v1beta2" + "sigs.k8s.io/cluster-api-provider-aws/v2/feature" iamv1 "sigs.k8s.io/cluster-api-provider-aws/v2/iam/api/v1beta1" "sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud/scope" "sigs.k8s.io/cluster-api-provider-aws/v2/pkg/cloud/services/s3" @@ -49,8 +51,6 @@ const ( ) func TestReconcileBucket(t *testing.T) { - t.Parallel() - t.Run("does_nothing_when_bucket_management_is_disabled", func(t *testing.T) { t.Parallel() @@ -62,7 +62,7 @@ func TestReconcileBucket(t *testing.T) { }) t.Run("creates_bucket_with_configured_name", func(t *testing.T) { - t.Parallel() + defer utilfeature.SetFeatureGateDuringTest(t, feature.Gates, feature.MachinePool, true)() expectedBucketName := "baz" @@ -104,6 +104,7 @@ func TestReconcileBucket(t *testing.T) { s3Mock.EXPECT().PutBucketTagging(gomock.Eq(taggingInput)).Return(nil, nil).Times(1) s3Mock.EXPECT().PutBucketPolicy(gomock.Any()).Return(nil, nil).Times(1) + s3Mock.EXPECT().PutBucketLifecycleConfiguration(gomock.Any()).Return(nil, nil).Times(1) if err := svc.ReconcileBucket(); err != nil { t.Fatalf("Unexpected error: %v", err) @@ -111,7 +112,7 @@ func TestReconcileBucket(t *testing.T) { }) t.Run("hashes_default_bucket_name_if_name_exceeds_maximum_length", func(t *testing.T) { - t.Parallel() + defer utilfeature.SetFeatureGateDuringTest(t, feature.Gates, feature.MachinePool, true)() mockCtrl := gomock.NewController(t) s3Mock := mock_s3iface.NewMockS3API(mockCtrl) @@ -159,6 +160,7 @@ func TestReconcileBucket(t *testing.T) { s3Mock.EXPECT().PutBucketTagging(gomock.Any()).Return(nil, nil).Times(1) s3Mock.EXPECT().PutBucketPolicy(gomock.Any()).Return(nil, nil).Times(1) + s3Mock.EXPECT().PutBucketLifecycleConfiguration(gomock.Any()).Return(nil, nil).Times(1) if err := svc.ReconcileBucket(); err != nil { t.Fatalf("Unexpected error: %v", err) @@ -166,7 +168,7 @@ func TestReconcileBucket(t *testing.T) { }) t.Run("creates_bucket_with_policy_allowing_controlplane_and_worker_nodes_to_read_their_secrets", func(t *testing.T) { - t.Parallel() + defer utilfeature.SetFeatureGateDuringTest(t, feature.Gates, feature.MachinePool, true)() bucketName := "bar" @@ -213,6 +215,7 @@ func TestReconcileBucket(t *testing.T) { t.Errorf("Expected deny when not using SecureTransport; got: %v", policy) } }).Return(nil, nil).Times(1) + s3Mock.EXPECT().PutBucketLifecycleConfiguration(gomock.Any()).Return(nil, nil).Times(1) if err := svc.ReconcileBucket(); err != nil { t.Fatalf("Unexpected error: %v", err) @@ -220,13 +223,14 @@ func TestReconcileBucket(t *testing.T) { }) t.Run("is_idempotent", func(t *testing.T) { - t.Parallel() + defer utilfeature.SetFeatureGateDuringTest(t, feature.Gates, feature.MachinePool, true)() svc, s3Mock := testService(t, &testServiceInput{Bucket: &infrav1.S3Bucket{}}) s3Mock.EXPECT().CreateBucket(gomock.Any()).Return(nil, nil).Times(2) s3Mock.EXPECT().PutBucketTagging(gomock.Any()).Return(nil, nil).Times(2) s3Mock.EXPECT().PutBucketPolicy(gomock.Any()).Return(nil, nil).Times(2) + s3Mock.EXPECT().PutBucketLifecycleConfiguration(gomock.Any()).Return(nil, nil).Times(2) if err := svc.ReconcileBucket(); err != nil { t.Fatalf("Unexpected error: %v", err) @@ -238,7 +242,7 @@ func TestReconcileBucket(t *testing.T) { }) t.Run("ignores_when_bucket_already_exists_but_its_owned_by_the_same_account", func(t *testing.T) { - t.Parallel() + defer utilfeature.SetFeatureGateDuringTest(t, feature.Gates, feature.MachinePool, true)() svc, s3Mock := testService(t, &testServiceInput{Bucket: &infrav1.S3Bucket{}}) @@ -247,6 +251,7 @@ func TestReconcileBucket(t *testing.T) { s3Mock.EXPECT().CreateBucket(gomock.Any()).Return(nil, err).Times(1) s3Mock.EXPECT().PutBucketTagging(gomock.Any()).Return(nil, nil).Times(1) s3Mock.EXPECT().PutBucketPolicy(gomock.Any()).Return(nil, nil).Times(1) + s3Mock.EXPECT().PutBucketLifecycleConfiguration(gomock.Any()).Return(nil, nil).Times(1) if err := svc.ReconcileBucket(); err != nil { t.Fatalf("Unexpected error, got: %v", err) @@ -326,6 +331,7 @@ func TestReconcileBucket(t *testing.T) { s3Mock.EXPECT().CreateBucket(gomock.Eq(input)).Return(nil, nil).Times(1) s3Mock.EXPECT().PutBucketTagging(gomock.Any()).Return(nil, nil).Times(1) + s3Mock.EXPECT().PutBucketLifecycleConfiguration(gomock.Any()).Return(nil, nil).Times(1) s3Mock.EXPECT().PutBucketPolicy(gomock.Any()).Return(nil, nil).Times(1) if err := svc.ReconcileBucket(); err != nil { @@ -341,7 +347,7 @@ func TestDeleteBucket(t *testing.T) { const bucketName = "foo" t.Run("does_nothing_when_bucket_management_is_disabled", func(t *testing.T) { - t.Parallel() + defer utilfeature.SetFeatureGateDuringTest(t, feature.Gates, feature.MachinePool, true)() svc, _ := testService(t, nil) @@ -351,7 +357,7 @@ func TestDeleteBucket(t *testing.T) { }) t.Run("deletes_bucket_with_configured_name", func(t *testing.T) { - t.Parallel() + defer utilfeature.SetFeatureGateDuringTest(t, feature.Gates, feature.MachinePool, true)() svc, s3Mock := testService(t, &testServiceInput{ Bucket: &infrav1.S3Bucket{ @@ -363,6 +369,7 @@ func TestDeleteBucket(t *testing.T) { Bucket: aws.String(bucketName), } + s3Mock.EXPECT().ListObjectsV2(gomock.Any()).Return(&s3svc.ListObjectsV2Output{}, nil).Times(1) s3Mock.EXPECT().DeleteBucket(input).Return(nil, nil).Times(1) if err := svc.DeleteBucket(); err != nil { @@ -371,12 +378,12 @@ func TestDeleteBucket(t *testing.T) { }) t.Run("returns_error_when_bucket_removal_returns", func(t *testing.T) { - t.Parallel() t.Run("unexpected_error", func(t *testing.T) { - t.Parallel() + defer utilfeature.SetFeatureGateDuringTest(t, feature.Gates, feature.MachinePool, true)() svc, s3Mock := testService(t, &testServiceInput{Bucket: &infrav1.S3Bucket{}}) + s3Mock.EXPECT().ListObjectsV2(gomock.Any()).Return(&s3svc.ListObjectsV2Output{}, nil).Times(1) s3Mock.EXPECT().DeleteBucket(gomock.Any()).Return(nil, errors.New("err")).Times(1) if err := svc.DeleteBucket(); err == nil { @@ -385,10 +392,11 @@ func TestDeleteBucket(t *testing.T) { }) t.Run("unexpected_AWS_error", func(t *testing.T) { - t.Parallel() + defer utilfeature.SetFeatureGateDuringTest(t, feature.Gates, feature.MachinePool, true)() svc, s3Mock := testService(t, &testServiceInput{Bucket: &infrav1.S3Bucket{}}) + s3Mock.EXPECT().ListObjectsV2(gomock.Any()).Return(&s3svc.ListObjectsV2Output{}, nil).Times(1) s3Mock.EXPECT().DeleteBucket(gomock.Any()).Return(nil, awserr.New("foo", "", nil)).Times(1) if err := svc.DeleteBucket(); err == nil { @@ -398,10 +406,11 @@ func TestDeleteBucket(t *testing.T) { }) t.Run("ignores_when_bucket_has_already_been_removed", func(t *testing.T) { - t.Parallel() + defer utilfeature.SetFeatureGateDuringTest(t, feature.Gates, feature.MachinePool, true)() svc, s3Mock := testService(t, &testServiceInput{Bucket: &infrav1.S3Bucket{}}) + s3Mock.EXPECT().ListObjectsV2(gomock.Any()).Return(&s3svc.ListObjectsV2Output{}, nil).Times(1) s3Mock.EXPECT().DeleteBucket(gomock.Any()).Return(nil, awserr.New(s3svc.ErrCodeNoSuchBucket, "", nil)).Times(1) if err := svc.DeleteBucket(); err != nil { @@ -410,10 +419,11 @@ func TestDeleteBucket(t *testing.T) { }) t.Run("skips_bucket_removal_when_bucket_is_not_empty", func(t *testing.T) { - t.Parallel() + defer utilfeature.SetFeatureGateDuringTest(t, feature.Gates, feature.MachinePool, true)() svc, s3Mock := testService(t, &testServiceInput{Bucket: &infrav1.S3Bucket{}}) + s3Mock.EXPECT().ListObjectsV2(gomock.Any()).Return(&s3svc.ListObjectsV2Output{}, nil).Times(1) s3Mock.EXPECT().DeleteBucket(gomock.Any()).Return(nil, awserr.New("BucketNotEmpty", "", nil)).Times(1) if err := svc.DeleteBucket(); err != nil { From 47542ff15f82d14b6094d37ce7adf02022be7473 Mon Sep 17 00:00:00 2001 From: Jose Armesto Date: Tue, 3 Sep 2024 17:14:49 +0200 Subject: [PATCH 7/7] Add GS workflow action --- .github/workflows/release.yaml | 84 ++++++++++++++++++++++++---------- .github/workflows/release.yml | 76 ------------------------------ 2 files changed, 61 insertions(+), 99 deletions(-) delete mode 100644 .github/workflows/release.yml diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index ee349bd18b..5034a43be2 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -1,38 +1,76 @@ +# As opposed to https://github.com/kubernetes-sigs/cluster-api (CAPI), the CAPA upstream project does not offer +# a GitHub action to automatically create releases from tags (as of 2023-08-21). Therefore, this is a Giant Swarm +# fork-specific addition. We require a GitHub release containing the YAML manifests which we use in +# cluster-api-provider-aws-app. Since doing this manually is very error-prone (see +# `docs/book/src/development/releasing.md`), we run the needed commands here. + name: release on: push: tags: - - 'v*' + - 'v*' permissions: - contents: write # required to write to github release. + contents: write # allow creating a release jobs: - release: + build: name: Create draft release runs-on: ubuntu-latest + env: + GH_ORG_NAME: giantswarm steps: - - name: checkout code - uses: actions/checkout@v4 + - name: Set env + run: | + if echo "${GITHUB_REF}" | grep -qF "vX.Y"; then + >&2 echo "ERROR: Oops, you copy-pasted verbatim from the README.md - please ensure to replace 'vX.Y.Z' with an actual tag" + exit 1 + fi + + echo "RELEASE_TAG=${GITHUB_REF:10}" >> $GITHUB_ENV # strip off `refs/tags/` prefix + + - name: Check out code + uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # tag=v3.5.0 with: fetch-depth: 0 - - name: Set up Go - uses: actions/setup-go@v5 - with: - go-version: '1.21' - - name: Set version info - run: | - echo "VERSION=${GITHUB_REF_NAME}" >> $GITHUB_ENV - echo "PREVIOUS_VERSION=$(git describe --abbrev=0 2> /dev/null)" >> $GITHUB_ENV - echo "RELEASE_BRANCH=release-$(echo ${GITHUB_REF_NAME} | grep -Eo '[0-9]\.[0-9]+')" >> $GITHUB_ENV - echo "RELEASE_TAG=${GITHUB_REF_NAME}" >> $GITHUB_ENV - - name: Run release - run: | - echo "Version is: $VERSION" - echo "Previous version is: $PREVIOUS_VERSION" - echo "Release branch is: $RELEASE_BRANCH" - echo "Release tag is: $RELEASE_TAG" - make release + + # - name: Calculate Go version + # run: echo "go_version=$(make go-version)" >> $GITHUB_ENV + + # - name: Set up Go + # uses: actions/setup-go@6edd4406fa81c3da01a34fa6f6343087c207a568 # tag=v3.5.0 + # with: + # go-version: ${{ env.go_version }} + + - name: Generate release artifacts env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + GITHUB_TOKEN: "unused" # since we create the release without using CAPA's Makefile target + run: | + # Dealing with changelogs isn't that easy since Giant Swarm releases can jump from `v2.2.0` to `v2.3.1` + # as we skip intermediate releases. Therefore, finding the required value for `PREVIOUS_VERSION` would be + # a manual task. Better not deal with the changelog right now since it's unlikely that someone will look + # at those in our fork (as compared to upstream's releases). + printf '#!/bin/sh\necho "Changelogs are not filled in this fork"\n' > hack/releasechangelog.sh # old path of this tool + mkdir -p hack/tools/bin + printf '#!/bin/sh\necho "Changelogs are not filled in this fork" > out/CHANGELOG.md\n' > hack/tools/bin/release-notes + chmod +x hack/tools/bin/release-notes + + # We don't need the binaries and other stuff in the release, either. Really only the YAML manifests. + sed -i -E -e '/\$\(MAKE\) (release-binaries|release-templates|release-policies)/d' Makefile + sed -i -E -e '/cp metadata.yaml/d' Makefile + + # To allow the above changes since normally the Makefile wouldn't allow a dirty Git repo + sed -i -e '/Your local git repository contains uncommitted changes/d' Makefile + + (set -x; make PREVIOUS_VERSION="${RELEASE_TAG}" RELEASE_TAG="${RELEASE_TAG}" release) + + # Instead of `make VERSION="${RELEASE_TAG}" create-gh-release upload-gh-artifacts`, which requires GitHub CLI + # authentication, use an action which does the same. + - name: Release + uses: softprops/action-gh-release@de2c0eb89ae2a093876385947365aca7b0e5f844 # tag=v1 + with: + draft: true + files: out/* + body: "This fork does not provide release changelogs." + # `name` not needed since this takes the tag by default (which we also use above as ${RELEASE_TAG}) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml deleted file mode 100644 index 5034a43be2..0000000000 --- a/.github/workflows/release.yml +++ /dev/null @@ -1,76 +0,0 @@ -# As opposed to https://github.com/kubernetes-sigs/cluster-api (CAPI), the CAPA upstream project does not offer -# a GitHub action to automatically create releases from tags (as of 2023-08-21). Therefore, this is a Giant Swarm -# fork-specific addition. We require a GitHub release containing the YAML manifests which we use in -# cluster-api-provider-aws-app. Since doing this manually is very error-prone (see -# `docs/book/src/development/releasing.md`), we run the needed commands here. - -name: release - -on: - push: - tags: - - 'v*' - -permissions: - contents: write # allow creating a release - -jobs: - build: - name: Create draft release - runs-on: ubuntu-latest - env: - GH_ORG_NAME: giantswarm - steps: - - name: Set env - run: | - if echo "${GITHUB_REF}" | grep -qF "vX.Y"; then - >&2 echo "ERROR: Oops, you copy-pasted verbatim from the README.md - please ensure to replace 'vX.Y.Z' with an actual tag" - exit 1 - fi - - echo "RELEASE_TAG=${GITHUB_REF:10}" >> $GITHUB_ENV # strip off `refs/tags/` prefix - - - name: Check out code - uses: actions/checkout@8f4b7f84864484a7bf31766abe9204da3cbe65b3 # tag=v3.5.0 - with: - fetch-depth: 0 - - # - name: Calculate Go version - # run: echo "go_version=$(make go-version)" >> $GITHUB_ENV - - # - name: Set up Go - # uses: actions/setup-go@6edd4406fa81c3da01a34fa6f6343087c207a568 # tag=v3.5.0 - # with: - # go-version: ${{ env.go_version }} - - - name: Generate release artifacts - env: - GITHUB_TOKEN: "unused" # since we create the release without using CAPA's Makefile target - run: | - # Dealing with changelogs isn't that easy since Giant Swarm releases can jump from `v2.2.0` to `v2.3.1` - # as we skip intermediate releases. Therefore, finding the required value for `PREVIOUS_VERSION` would be - # a manual task. Better not deal with the changelog right now since it's unlikely that someone will look - # at those in our fork (as compared to upstream's releases). - printf '#!/bin/sh\necho "Changelogs are not filled in this fork"\n' > hack/releasechangelog.sh # old path of this tool - mkdir -p hack/tools/bin - printf '#!/bin/sh\necho "Changelogs are not filled in this fork" > out/CHANGELOG.md\n' > hack/tools/bin/release-notes - chmod +x hack/tools/bin/release-notes - - # We don't need the binaries and other stuff in the release, either. Really only the YAML manifests. - sed -i -E -e '/\$\(MAKE\) (release-binaries|release-templates|release-policies)/d' Makefile - sed -i -E -e '/cp metadata.yaml/d' Makefile - - # To allow the above changes since normally the Makefile wouldn't allow a dirty Git repo - sed -i -e '/Your local git repository contains uncommitted changes/d' Makefile - - (set -x; make PREVIOUS_VERSION="${RELEASE_TAG}" RELEASE_TAG="${RELEASE_TAG}" release) - - # Instead of `make VERSION="${RELEASE_TAG}" create-gh-release upload-gh-artifacts`, which requires GitHub CLI - # authentication, use an action which does the same. - - name: Release - uses: softprops/action-gh-release@de2c0eb89ae2a093876385947365aca7b0e5f844 # tag=v1 - with: - draft: true - files: out/* - body: "This fork does not provide release changelogs." - # `name` not needed since this takes the tag by default (which we also use above as ${RELEASE_TAG})