diff --git a/framework/API.md b/framework/API.md index 5ebfbcffc..51bcb606f 100644 --- a/framework/API.md +++ b/framework/API.md @@ -19703,11 +19703,11 @@ The list of supported Karpenter versions as defined [here](https://github.com/aw | **Name** | **Description** | | --- | --- | -| V0_37_0 | *No description.* | +| V1_0_1 | *No description.* | --- -##### `V0_37_0` +##### `V1_0_1` --- diff --git a/framework/src/processing/lib/karpenter-releases.ts b/framework/src/processing/lib/karpenter-releases.ts index fcd8ed4d2..d16fef04f 100644 --- a/framework/src/processing/lib/karpenter-releases.ts +++ b/framework/src/processing/lib/karpenter-releases.ts @@ -6,7 +6,7 @@ * At this time only v0.37.0 is supported. */ export enum KarpenterVersion { - V0_37_0 = '0.37.0', + V1_0_1 = '1.0.1', } -export const DEFAULT_KARPENTER_VERSION: KarpenterVersion = KarpenterVersion.V0_37_0; +export const DEFAULT_KARPENTER_VERSION: KarpenterVersion = KarpenterVersion.V1_0_1; diff --git a/framework/src/processing/lib/spark-runtime/emr-containers/eks-karpenter-helpers.ts b/framework/src/processing/lib/spark-runtime/emr-containers/eks-karpenter-helpers.ts index f5ecabcf1..f0496d2b1 100644 --- a/framework/src/processing/lib/spark-runtime/emr-containers/eks-karpenter-helpers.ts +++ b/framework/src/processing/lib/spark-runtime/emr-containers/eks-karpenter-helpers.ts @@ -183,43 +183,45 @@ export function karpenterSetup(cluster: ICluster, actions: ['ec2:RunInstances', 'ec2:CreateFleet'], }); - const allowScopedEC2InstanceActionsWithTags: PolicyStatement = new PolicyStatement({ + const allowScopedEC2LaunchTemplateAccessActions = new PolicyStatement({ + sid: 'AllowScopedEC2LaunchTemplateAccessActions', effect: Effect.ALLOW, - resources: [ - `arn:aws:ec2:${Stack.of(scope).region}:*:fleet/*`, - `arn:aws:ec2:${Stack.of(scope).region}:*:instance/*`, - `arn:aws:ec2:${Stack.of(scope).region}:*:volume/*`, - `arn:aws:ec2:${Stack.of(scope).region}:*:network-interface/*`, - `arn:aws:ec2:${Stack.of(scope).region}:*:launch-template/*`, - `arn:aws:ec2:${Stack.of(scope).region}:*:spot-instances-request`, + resources: [`arn:${Stack.of(scope).partition}:ec2:${Stack.of(scope).region}:*:launch-template/*`], + actions: [ + 'ec2:RunInstances', + 'ec2:CreateFleet', ], - actions: ['ec2:RunInstances', 'ec2:CreateFleet', 'ec2:CreateLaunchTemplate'], conditions: { StringEquals: { - [`aws:RequestTag/kubernetes.io/cluster/${clusterName}`]: 'owned', + [`aws:ResourceTag/kubernetes.io/cluster/${clusterName}`]: 'owned', }, StringLike: { - 'aws:RequestTag/karpenter.sh/nodepool': '*', + 'aws:ResourceTag/karpenter.sh/nodepool': '*', }, }, }); - const allowScopedResourceCreationTagging: PolicyStatement = new PolicyStatement({ + const allowScopedResourceCreationTagging = new PolicyStatement({ sid: 'AllowScopedResourceCreationTagging', effect: Effect.ALLOW, resources: [ - `arn:aws:ec2:${Stack.of(scope).region}:*:fleet/*`, - `arn:aws:ec2:${Stack.of(scope).region}:*:instance/*`, - `arn:aws:ec2:${Stack.of(scope).region}:*:volume/*`, - `arn:aws:ec2:${Stack.of(scope).region}:*:network-interface/*`, - `arn:aws:ec2:${Stack.of(scope).region}:*:launch-template/*`, - `arn:aws:ec2:${Stack.of(scope).region}:*:spot-instances-request`, + `arn:${Stack.of(scope).partition}:ec2:${Stack.of(scope).region}:*:fleet/*`, + `arn:${Stack.of(scope).partition}:ec2:${Stack.of(scope).region}:*:instance/*`, + `arn:${Stack.of(scope).partition}:ec2:${Stack.of(scope).region}:*:volume/*`, + `arn:${Stack.of(scope).partition}:ec2:${Stack.of(scope).region}:*:network-interface/*`, + `arn:${Stack.of(scope).partition}:ec2:${Stack.of(scope).region}:*:launch-template/*`, + `arn:${Stack.of(scope).partition}:ec2:${Stack.of(scope).region}:*:spot-instances-request/*`, ], actions: ['ec2:CreateTags'], conditions: { StringEquals: { [`aws:RequestTag/kubernetes.io/cluster/${clusterName}`]: 'owned', - 'ec2:CreateAction': ['RunInstances', 'CreateFleet', 'CreateLaunchTemplate'], + 'aws:RequestTag/eks:eks-cluster-name': clusterName, + 'ec2:CreateAction': [ + 'RunInstances', + 'CreateFleet', + 'CreateLaunchTemplate', + ], }, StringLike: { 'aws:RequestTag/karpenter.sh/nodepool': '*', @@ -227,12 +229,11 @@ export function karpenterSetup(cluster: ICluster, }, }); - const allowScopedResourceTagging: PolicyStatement = new PolicyStatement({ - sid: 'allowScopedResourceTagging', + + const allowScopedResourceTagging = new PolicyStatement({ + sid: 'AllowScopedResourceTagging', effect: Effect.ALLOW, - resources: [ - `arn:aws:ec2:${Stack.of(scope).region}:*:instance/*`, - ], + resources: [`arn:${Stack.of(scope).partition}:ec2:${Stack.of(scope).region}:*:instance/*`], actions: ['ec2:CreateTags'], conditions: { 'StringEquals': { @@ -241,8 +242,12 @@ export function karpenterSetup(cluster: ICluster, 'StringLike': { 'aws:ResourceTag/karpenter.sh/nodepool': '*', }, + 'StringEqualsIfExists': { + 'aws:RequestTag/eks:eks-cluster-name': clusterName, + }, 'ForAllValues:StringEquals': { 'aws:TagKeys': [ + 'eks:eks-cluster-name', 'karpenter.sh/nodeclaim', 'Name', ], @@ -250,14 +255,18 @@ export function karpenterSetup(cluster: ICluster, }, }); - const allowScopedDeletion: PolicyStatement = new PolicyStatement({ + + const allowScopedDeletion = new PolicyStatement({ sid: 'AllowScopedDeletion', effect: Effect.ALLOW, resources: [ - `arn:aws:ec2:${Stack.of(scope).region}:*:instance/*`, - `arn:aws:ec2:${Stack.of(scope).region}:*:launch-template/*`, + `arn:${Stack.of(scope).partition}:ec2:${Stack.of(scope).region}:*:instance/*`, + `arn:${Stack.of(scope).partition}:ec2:${Stack.of(scope).region}:*:launch-template/*`, + ], + actions: [ + 'ec2:TerminateInstances', + 'ec2:DeleteLaunchTemplate', ], - actions: ['ec2:TerminateInstances', 'ec2:DeleteLaunchTemplate'], conditions: { StringEquals: { [`aws:ResourceTag/kubernetes.io/cluster/${clusterName}`]: 'owned', @@ -268,6 +277,29 @@ export function karpenterSetup(cluster: ICluster, }, }); + + const allowScopedEC2InstanceActionsWithTags: PolicyStatement = new PolicyStatement({ + effect: Effect.ALLOW, + resources: [ + `arn:aws:ec2:${Stack.of(scope).region}:*:fleet/*`, + `arn:aws:ec2:${Stack.of(scope).region}:*:instance/*`, + `arn:aws:ec2:${Stack.of(scope).region}:*:volume/*`, + `arn:aws:ec2:${Stack.of(scope).region}:*:network-interface/*`, + `arn:aws:ec2:${Stack.of(scope).region}:*:launch-template/*`, + `arn:aws:ec2:${Stack.of(scope).region}:*:spot-instances-request`, + ], + actions: ['ec2:RunInstances', 'ec2:CreateFleet', 'ec2:CreateLaunchTemplate'], + conditions: { + StringEquals: { + [`aws:RequestTag/kubernetes.io/cluster/${clusterName}`]: 'owned', + 'aws:RequestTag/eks:eks-cluster-name': clusterName, + }, + StringLike: { + 'aws:RequestTag/karpenter.sh/nodepool': '*', + }, + }, + }); + const allowPassingInstanceRole: PolicyStatement = new PolicyStatement({ effect: Effect.ALLOW, actions: ['iam:PassRole'], @@ -293,22 +325,16 @@ export function karpenterSetup(cluster: ICluster, actions: ['eks:DescribeCluster'], }); - const allowInstanceProfileReadActions: PolicyStatement = new PolicyStatement({ - sid: 'AllowInstanceProfileReadActions', - effect: Effect.ALLOW, - resources: ['*'], - actions: ['iam:GetInstanceProfile'], - }); - - const allowScopedInstanceProfileCreationActions: PolicyStatement = new PolicyStatement({ + const allowScopedInstanceProfileCreationActions = new PolicyStatement({ sid: 'AllowScopedInstanceProfileCreationActions', effect: Effect.ALLOW, - resources: ['*'], + resources: [`arn:${Stack.of(scope).partition}:iam::${Stack.of(scope).account}:instance-profile/*`], actions: ['iam:CreateInstanceProfile'], conditions: { StringEquals: { [`aws:RequestTag/kubernetes.io/cluster/${clusterName}`]: 'owned', - 'aws:RequestTag/topology.kubernetes.io/region': `${Stack.of(scope).region}`, + 'aws:RequestTag/eks:eks-cluster-name': clusterName, + 'aws:RequestTag/topology.kubernetes.io/region': Stack.of(scope).region, }, StringLike: { 'aws:RequestTag/karpenter.k8s.aws/ec2nodeclass': '*', @@ -316,17 +342,18 @@ export function karpenterSetup(cluster: ICluster, }, }); - const allowScopedInstanceProfileTagActions: PolicyStatement = new PolicyStatement({ + const allowScopedInstanceProfileTagActions = new PolicyStatement({ sid: 'AllowScopedInstanceProfileTagActions', effect: Effect.ALLOW, - resources: ['*'], + resources: [`arn:${Stack.of(scope).partition}:iam::${Stack.of(scope).account}:instance-profile/*`], actions: ['iam:TagInstanceProfile'], conditions: { StringEquals: { [`aws:ResourceTag/kubernetes.io/cluster/${clusterName}`]: 'owned', - 'aws:ResourceTag/topology.kubernetes.io/region': `${Stack.of(scope).region}`, + 'aws:ResourceTag/topology.kubernetes.io/region': Stack.of(scope).region, [`aws:RequestTag/kubernetes.io/cluster/${clusterName}`]: 'owned', - 'aws:RequestTag/topology.kubernetes.io/region': `${Stack.of(scope).region}`, + 'aws:RequestTag/eks:eks-cluster-name': clusterName, + 'aws:RequestTag/topology.kubernetes.io/region': Stack.of(scope).region, }, StringLike: { 'aws:ResourceTag/karpenter.k8s.aws/ec2nodeclass': '*', @@ -335,15 +362,19 @@ export function karpenterSetup(cluster: ICluster, }, }); - const allowScopedInstanceProfileActions: PolicyStatement = new PolicyStatement({ + const allowScopedInstanceProfileActions = new PolicyStatement({ sid: 'AllowScopedInstanceProfileActions', effect: Effect.ALLOW, - resources: ['*'], - actions: ['iam:AddRoleToInstanceProfile', 'iam:RemoveRoleFromInstanceProfile', 'iam:DeleteInstanceProfile'], + resources: [`arn:${Stack.of(scope).partition}:iam::${Stack.of(scope).account}:instance-profile/*`], + actions: [ + 'iam:AddRoleToInstanceProfile', + 'iam:RemoveRoleFromInstanceProfile', + 'iam:DeleteInstanceProfile', + ], conditions: { StringEquals: { [`aws:ResourceTag/kubernetes.io/cluster/${clusterName}`]: 'owned', - 'aws:ResourceTag/topology.kubernetes.io/region': `${Stack.of(scope).region}`, + 'aws:ResourceTag/topology.kubernetes.io/region': Stack.of(scope).region, }, StringLike: { 'aws:ResourceTag/karpenter.k8s.aws/ec2nodeclass': '*', @@ -351,6 +382,13 @@ export function karpenterSetup(cluster: ICluster, }, }); + const allowInstanceProfileReadActions = new PolicyStatement({ + sid: 'AllowInstanceProfileReadActions', + effect: Effect.ALLOW, + resources: [`arn:${Stack.of(scope).partition}:iam::${Stack.of(scope).account}:instance-profile/*`], + actions: ['iam:GetInstanceProfile'], + }); + const karpenterNS = cluster.addManifest('karpenterNS', { apiVersion: 'v1', @@ -380,6 +418,7 @@ export function karpenterSetup(cluster: ICluster, karpenterAccount.addToPrincipalPolicy(allowAPIServerEndpointDiscovery); karpenterAccount.addToPrincipalPolicy(allowInstanceProfileReadActions); karpenterAccount.addToPrincipalPolicy(allowRegionalReadActions); + karpenterAccount.addToPrincipalPolicy(allowScopedEC2LaunchTemplateAccessActions); //Deploy Karpenter Chart const karpenterChart = cluster.addHelmChart('KarpenterHelmChart', { diff --git a/framework/src/processing/lib/spark-runtime/emr-containers/resources/k8s/karpenter-provisioner-config/1.0.1/Dockerfile-nvme-raid0-mount b/framework/src/processing/lib/spark-runtime/emr-containers/resources/k8s/karpenter-provisioner-config/1.0.1/Dockerfile-nvme-raid0-mount new file mode 100644 index 000000000..8c20f2f1f --- /dev/null +++ b/framework/src/processing/lib/spark-runtime/emr-containers/resources/k8s/karpenter-provisioner-config/1.0.1/Dockerfile-nvme-raid0-mount @@ -0,0 +1,8 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 + +FROM public.ecr.aws/amazonlinux/amazonlinux:2023 +RUN dnf -y install e2fsprogs bash mdadm util-linux +COPY setup-runtime-storage ./ +RUN chmod +x ./setup-runtime-storage +ENTRYPOINT ["sh", "setup-runtime-storage"] \ No newline at end of file diff --git a/framework/src/processing/lib/spark-runtime/emr-containers/resources/k8s/karpenter-provisioner-config/1.0.1/critical-provisioner.yml b/framework/src/processing/lib/spark-runtime/emr-containers/resources/k8s/karpenter-provisioner-config/1.0.1/critical-provisioner.yml new file mode 100644 index 000000000..a4cb651a2 --- /dev/null +++ b/framework/src/processing/lib/spark-runtime/emr-containers/resources/k8s/karpenter-provisioner-config/1.0.1/critical-provisioner.yml @@ -0,0 +1,102 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: karpenter.sh/v1 +kind: NodePool +metadata: + name: critical-{{az}} +spec: + # References cloud provider-specific custom resource, see your cloud provider specific documentation + template: + metadata: + # Labels are arbitrary key-values that are applied to all nodes + labels: + role: critical + node-lifecycle: on-demand + + spec: + nodeClassRef: + group: karpenter.k8s.aws + kind: EC2NodeClass + name: critical-nodes-{{az}} + + expireAfter: 720h + + taints: + - key: role + value: critical + effect: NoSchedule + + # Requirements that constrain the parameters of provisioned nodes. + # These requirements are combined with pod.spec.affinity.nodeAffinity rules. + # Operators { In, NotIn } are supported to enable including or excluding values + requirements: + # Include general purpose instance families + - key: "karpenter.k8s.aws/instance-family" + operator: In + values: ["m6gd"] + - key: "kubernetes.io/arch" + operator: In + values: ["arm64"] + # Exclude smaller instance sizes + - key: "karpenter.k8s.aws/instance-size" + operator: NotIn + values: [nano, micro, small, medium, large] + - key: "karpenter.sh/capacity-type" # If not included, the webhook for the AWS cloud provider will default to on-demand + operator: In + values: ["on-demand"] + - key: "topology.kubernetes.io/zone" + operator: In + values: ["{{az}}"] + - key: "karpenter.k8s.aws/instance-hypervisor" + operator: In + values: ["nitro"] + + # Resource limits constrain the total size of the cluster. + # Limits prevent Karpenter from creating new instances once the limit is exceeded. + + limits: + cpu: "3200" + memory: 12800Gi + + disruption: + + consolidationPolicy: WhenEmptyOrUnderutilized + + # If omitted, the feature is disabled, nodes will never scale down due to low utilization + consolidateAfter: 300s + + # Priority given to the provisioner when the scheduler considers which provisioner + # to select. Higher weights indicate higher priority when comparing provisioners. + # Specifying no weight is equivalent to specifying a weight of 0. + weight: 10 + +--- +apiVersion: karpenter.k8s.aws/v1 +kind: EC2NodeClass +metadata: + name: critical-nodes-{{az}} +spec: + subnetSelectorTerms: + - id: {{subnet-id}} + securityGroupSelectorTerms: + - tags: + kubernetes.io/cluster/{{cluster-name}}: owned + tags: + KarpenerProvisionerName: "critical" + + role: {{ROLENAME}} + + metadataOptions: + httpEndpoint: enabled + httpProtocolIPv6: disabled + httpPutResponseHopLimit: 2 + httpTokens: required + amiFamily: Bottlerocket + amiSelectorTerms: + - alias: bottlerocket@latest + userData: | + [settings.bootstrap-containers.bootstrap-nvme-raid0] + source = "{{REPLACE-WITH-IMAGE-ECR}}" + mode = "once" + essential = true \ No newline at end of file diff --git a/framework/src/processing/lib/spark-runtime/emr-containers/resources/k8s/karpenter-provisioner-config/1.0.1/notebook-driver-provisioner.yml b/framework/src/processing/lib/spark-runtime/emr-containers/resources/k8s/karpenter-provisioner-config/1.0.1/notebook-driver-provisioner.yml new file mode 100644 index 000000000..1317c26e9 --- /dev/null +++ b/framework/src/processing/lib/spark-runtime/emr-containers/resources/k8s/karpenter-provisioner-config/1.0.1/notebook-driver-provisioner.yml @@ -0,0 +1,98 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: karpenter.sh/v1 +kind: NodePool +metadata: + name: notebook-driver-{{az}} +spec: + # References cloud provider-specific custom resource, see your cloud provider specific documentation + template: + metadata: + # Labels are arbitrary key-values that are applied to all nodes + labels: + role: notebook + node-lifecycle: on-demand + spark-role: driver + + spec: + nodeClassRef: + group: karpenter.k8s.aws + kind: EC2NodeClass + name: notebook-driver-nodes-{{az}} + + expireAfter: 720h + + taints: + - key: role + value: notebook + effect: NoSchedule + + # Requirements that constrain the parameters of provisioned nodes. + # These requirements are combined with pod.spec.affinity.nodeAffinity rules. + # Operators { In, NotIn } are supported to enable including or excluding values + requirements: + # Include general purpose instance families + - key: "karpenter.k8s.aws/instance-family" + operator: In + values: ["t3", "t3a"] + - key: "kubernetes.io/arch" + operator: In + values: ["amd64"] + # Exclude smaller instance sizes + - key: "karpenter.k8s.aws/instance-size" + operator: NotIn + values: [nano, micro, small] + - key: "karpenter.sh/capacity-type" # If not included, the webhook for the AWS cloud provider will default to on-demand + operator: In + values: ["on-demand"] + - key: "topology.kubernetes.io/zone" + operator: In + values: ["{{az}}"] + - key: "karpenter.k8s.aws/instance-hypervisor" + operator: In + values: ["nitro"] + + # Resource limits constrain the total size of the cluster. + # Limits prevent Karpenter from creating new instances once the limit is exceeded. + limits: + cpu: "40" + memory: 160Gi + + disruption: + + consolidationPolicy: WhenEmptyOrUnderutilized + + # If omitted, the feature is disabled, nodes will never scale down due to low utilization + consolidateAfter: 300s + + # Priority given to the provisioner when the scheduler considers which provisioner + # to select. Higher weights indicate higher priority when comparing provisioners. + # Specifying no weight is equivalent to specifying a weight of 0. + weight: 20 + +--- +apiVersion: karpenter.k8s.aws/v1 +kind: EC2NodeClass +metadata: + name: notebook-driver-nodes-{{az}} +spec: + amiFamily: Bottlerocket + amiSelectorTerms: + - alias: bottlerocket@latest + subnetSelectorTerms: + - id: {{subnet-id}} + securityGroupSelectorTerms: + - tags: + kubernetes.io/cluster/{{cluster-name}}: owned + metadataOptions: + httpEndpoint: enabled + httpProtocolIPv6: disabled + httpPutResponseHopLimit: 2 + httpTokens: required + + tags: + KarpenerProvisionerName: "notebook-driver" + + role: {{ROLENAME}} + diff --git a/framework/src/processing/lib/spark-runtime/emr-containers/resources/k8s/karpenter-provisioner-config/1.0.1/notebook-executor-provisioner.yml b/framework/src/processing/lib/spark-runtime/emr-containers/resources/k8s/karpenter-provisioner-config/1.0.1/notebook-executor-provisioner.yml new file mode 100644 index 000000000..b404e60c1 --- /dev/null +++ b/framework/src/processing/lib/spark-runtime/emr-containers/resources/k8s/karpenter-provisioner-config/1.0.1/notebook-executor-provisioner.yml @@ -0,0 +1,103 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: karpenter.sh/v1 +kind: NodePool +metadata: + name: notebook-executor-{{az}} +spec: + # References cloud provider-specific custom resource, see your cloud provider specific documentation + template: + metadata: + # Labels are arbitrary key-values that are applied to all nodes + labels: + role: notebook + node-lifecycle: spot + spark-role: executor + + spec: + nodeClassRef: + group: karpenter.k8s.aws + kind: EC2NodeClass + name: notebook-executor-nodes-{{az}} + + expireAfter: 720h + + taints: + - key: role + value: notebook + effect: NoSchedule + - key: node-lifecycle + value: spot + effect: NoSchedule + + # Requirements that constrain the parameters of provisioned nodes. + # These requirements are combined with pod.spec.affinity.nodeAffinity rules. + # Operators { In, NotIn } are supported to enable including or excluding values + requirements: + # Include general purpose instance families + - key: "karpenter.k8s.aws/instance-family" + operator: In + values: ["t3","t3a"] + - key: "kubernetes.io/arch" + operator: In + values: ["amd64"] + # Exclude smaller instance sizes + - key: "karpenter.k8s.aws/instance-size" + operator: NotIn + values: [nano, micro, small, medium, large] + - key: "karpenter.sh/capacity-type" # If not included, the webhook for the AWS cloud provider will default to on-demand + operator: In + values: ["spot"] + - key: "topology.kubernetes.io/zone" + operator: In + values: ["{{az}}"] + - key: "karpenter.k8s.aws/instance-hypervisor" + operator: In + values: ["nitro"] + + # Resource limits constrain the total size of the cluster. + # Limits prevent Karpenter from creating new instances once the limit is exceeded. + limits: + cpu: "800" + memory: 3200Gi + + + disruption: + + consolidationPolicy: WhenEmptyOrUnderutilized + + # If omitted, the feature is disabled, nodes will never scale down due to low utilization + consolidateAfter: 300s + + # Priority given to the provisioner when the scheduler considers which provisioner + # to select. Higher weights indicate higher priority when comparing provisioners. + # Specifying no weight is equivalent to specifying a weight of 0. + weight: 20 + +--- +apiVersion: karpenter.k8s.aws/v1 +kind: EC2NodeClass +metadata: + name: notebook-executor-nodes-{{az}} +spec: + amiFamily: Bottlerocket + amiSelectorTerms: + - alias: bottlerocket@latest + subnetSelectorTerms: + - id: {{subnet-id}} + securityGroupSelectorTerms: + - tags: + kubernetes.io/cluster/{{cluster-name}}: owned + + metadataOptions: + httpEndpoint: enabled + httpProtocolIPv6: disabled + httpPutResponseHopLimit: 2 + httpTokens: required + + tags: + KarpenerProvisionerName: "notebook-executor" + + role: {{ROLENAME}} + diff --git a/framework/src/processing/lib/spark-runtime/emr-containers/resources/k8s/karpenter-provisioner-config/1.0.1/setup-runtime-storage b/framework/src/processing/lib/spark-runtime/emr-containers/resources/k8s/karpenter-provisioner-config/1.0.1/setup-runtime-storage new file mode 100644 index 000000000..a9cc0111f --- /dev/null +++ b/framework/src/processing/lib/spark-runtime/emr-containers/resources/k8s/karpenter-provisioner-config/1.0.1/setup-runtime-storage @@ -0,0 +1,98 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 + +#!/usr/bin/env bash +set -ex + +ROOT_PATH="/.bottlerocket/rootfs" + +# Symlinks to ephemeral disks are created here by udev +declare -a EPHEMERAL_DISKS +EPHEMERAL_DISKS=("${ROOT_PATH}"/dev/disk/ephemeral/*) + +# Exit early if there aren't ephemeral disks +if [ "${#EPHEMERAL_DISKS[@]}" -eq 0 ]; then + echo "no ephemeral disks found" + exit 1 +fi + +MD_NAME="scratch" +MD_DEVICE="/dev/md/${MD_NAME}" +MD_CONFIG="/.bottlerocket/bootstrap-containers/current/mdadm.conf" + +# Create or assemble the array. +if [ ! -s "${MD_CONFIG}" ] ; then + mdadm --create --force --verbose \ + "${MD_DEVICE}" \ + --level=0 \ + --name="${MD_NAME}" \ + --raid-devices="${#EPHEMERAL_DISKS[@]}" \ + "${EPHEMERAL_DISKS[@]}" + mdadm --detail --scan > "${MD_CONFIG}" +else + mdadm --assemble --config="${MD_CONFIG}" "${MD_DEVICE}" +fi + +# Format the array if not already formatted. +if ! blkid --match-token TYPE=ext4 "${MD_DEVICE}" ; then + mkfs.ext4 "${MD_DEVICE}" +fi + +MOUNT_POINT="${ROOT_PATH}/mnt/${MD_NAME}" + +# Mount the array in the host's /mnt. +mkdir -p "${MOUNT_POINT}" +mount "${MD_DEVICE}" "${MOUNT_POINT}" + +# Keep track of whether we can unmount the array later. This depends on the +# version of Bottlerocket. +should_umount="no" + +# Bind state directories to the array, if they exist. +for state_dir in containerd docker kubelet ; do + # The correct next step depends on the version of Bottlerocket, which can be + # inferred by inspecting the mounts available to the bootstrap container. + if findmnt "${ROOT_PATH}/var/lib/${state_dir}" ; then + # For Bottlerocket >= 1.9.0, the state directory can be bind-mounted over + # the host directory and the mount will propagate back to the host. + mkdir -p "${MOUNT_POINT}/${state_dir}" + mount --rbind "${MOUNT_POINT}/${state_dir}" "${ROOT_PATH}/var/lib/${state_dir}" + mount --make-rshared "${ROOT_PATH}/var/lib/${state_dir}" + should_umount="yes" + elif [ ! -L "${ROOT_PATH}/var/lib/${state_dir}" ] ; then + # For Bottlerocket < 1.9.0, the host directory needs to be replaced with a + # symlink to the state directory on the array. This works but can lead to + # unexpected behavior or incompatibilities, for example with CSI drivers. + if [ -d "${ROOT_PATH}/var/lib/${state_dir}" ] ; then + # The host directory exists but is not a symlink, and might need to be + # relocated to the storage array. This depends on whether the host has + # been downgraded from a newer version of Bottlerocket, or whether it's + # the first boot of an older version. + if [ -d "${MOUNT_POINT}/${state_dir}" ] ; then + # If downgrading from a version of Bottlerocket that supported bind + # mounts, the directory will exist but should be empty, except for + # subdirectories that may have been created by tmpfiles.d before an + # upgrade to that version. Keep a copy of the directory just in case. + rm -rf "${ROOT_PATH}/var/lib/${state_dir}.bak" + mv "${ROOT_PATH}/var/lib/${state_dir}"{,.bak} + else + # Otherwise, treat it as the first boot of an older version, and move + # the directory to the array. + mv "${ROOT_PATH}/var/lib/${state_dir}" "${MOUNT_POINT}/${state_dir}" + fi + else + # The host directory does not exist, so the target directory likely needs + # to be created. + mkdir -p "${MOUNT_POINT}/${state_dir}" + fi + # Any host directory has been dealt with and the symlink can be created. + ln -snfT "/mnt/${MD_NAME}/${state_dir}" "${ROOT_PATH}/var/lib/${state_dir}" + fi +done + +# When using bind mounts, the parent directory where the array is mounted can +# be unmounted. This avoids a second, redundant mount entry under `/mnt` for +# every new mount in one of the state directories. +if [ "${should_umount}" == "yes" ] ; then + umount "${MOUNT_POINT}" +fi \ No newline at end of file diff --git a/framework/src/processing/lib/spark-runtime/emr-containers/resources/k8s/karpenter-provisioner-config/1.0.1/shared-driver-provisioner.yml b/framework/src/processing/lib/spark-runtime/emr-containers/resources/k8s/karpenter-provisioner-config/1.0.1/shared-driver-provisioner.yml new file mode 100644 index 000000000..a51c7f551 --- /dev/null +++ b/framework/src/processing/lib/spark-runtime/emr-containers/resources/k8s/karpenter-provisioner-config/1.0.1/shared-driver-provisioner.yml @@ -0,0 +1,94 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: karpenter.sh/v1 +kind: NodePool +metadata: + name: shared-driver-{{az}} +spec: + # References cloud provider-specific custom resource, see your cloud provider specific documentation + template: + metadata: + # Labels are arbitrary key-values that are applied to all nodes + labels: + role: shared + node-lifecycle: on-demand + spark-role: driver + + spec: + nodeClassRef: + group: karpenter.k8s.aws + kind: EC2NodeClass + name: shared-driver-nodes-{{az}} + + expireAfter: 720h + + # Requirements that constrain the parameters of provisioned nodes. + # These requirements are combined with pod.spec.affinity.nodeAffinity rules. + # Operators { In, NotIn } are supported to enable including or excluding values + requirements: + # Include general purpose instance families + - key: "karpenter.k8s.aws/instance-family" + operator: In + values: ["m6g"] + - key: "kubernetes.io/arch" + operator: In + values: ["arm64"] + # Exclude smaller instance sizes + - key: "karpenter.k8s.aws/instance-size" + operator: NotIn + values: [nano, micro, small, medium] + - key: "karpenter.sh/capacity-type" # If not included, the webhook for the AWS cloud provider will default to on-demand + operator: In + values: ["on-demand"] + - key: "topology.kubernetes.io/zone" + operator: In + values: ["{{az}}"] + - key: "karpenter.k8s.aws/instance-hypervisor" + operator: In + values: ["nitro"] + + # Resource limits constrain the total size of the cluster. + # Limits prevent Karpenter from creating new instances once the limit is exceeded. + limits: + cpu: "40" + memory: 160Gi + + disruption: + + consolidationPolicy: WhenEmptyOrUnderutilized + + # If omitted, the feature is disabled, nodes will never scale down due to low utilization + consolidateAfter: 300s + + # Priority given to the provisioner when the scheduler considers which provisioner + # to select. Higher weights indicate higher priority when comparing provisioners. + # Specifying no weight is equivalent to specifying a weight of 0. + weight: 10 + +--- +apiVersion: karpenter.k8s.aws/v1 +kind: EC2NodeClass +metadata: + name: shared-driver-nodes-{{az}} +spec: + amiFamily: Bottlerocket + amiSelectorTerms: + - alias: bottlerocket@latest + subnetSelectorTerms: + - id: {{subnet-id}} + securityGroupSelectorTerms: + - tags: + kubernetes.io/cluster/{{cluster-name}}: owned + + metadataOptions: + httpEndpoint: enabled + httpProtocolIPv6: disabled + httpPutResponseHopLimit: 2 + httpTokens: required + + tags: + KarpenerProvisionerName: "shared-driver" + + role: {{ROLENAME}} + diff --git a/framework/src/processing/lib/spark-runtime/emr-containers/resources/k8s/karpenter-provisioner-config/1.0.1/shared-executor-provisioner.yml b/framework/src/processing/lib/spark-runtime/emr-containers/resources/k8s/karpenter-provisioner-config/1.0.1/shared-executor-provisioner.yml new file mode 100644 index 000000000..42918c420 --- /dev/null +++ b/framework/src/processing/lib/spark-runtime/emr-containers/resources/k8s/karpenter-provisioner-config/1.0.1/shared-executor-provisioner.yml @@ -0,0 +1,98 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: karpenter.sh/v1 +kind: NodePool +metadata: + name: shared-executor-{{az}} +spec: + # References cloud provider-specific custom resource, see your cloud provider specific documentation + template: + metadata: + # Labels are arbitrary key-values that are applied to all nodes + labels: + role: shared + node-lifecycle: spot + spark-role: executor + + spec: + nodeClassRef: + group: karpenter.k8s.aws + kind: EC2NodeClass + name: shared-executor-nodes-{{az}} + + expireAfter: 720h + + taints: + - key: node-lifecycle + value: spot + effect: NoSchedule + + # Requirements that constrain the parameters of provisioned nodes. + # These requirements are combined with pod.spec.affinity.nodeAffinity rules. + # Operators { In, NotIn } are supported to enable including or excluding values + requirements: + # Include general purpose instance families + - key: "karpenter.k8s.aws/instance-family" + operator: In + values: ["m6g", "m6gd"] + - key: "kubernetes.io/arch" + operator: In + values: ["arm64"] + # Exclude smaller instance sizes + - key: "karpenter.k8s.aws/instance-size" + operator: NotIn + values: [nano, micro, small, medium, large] + - key: "karpenter.sh/capacity-type" # If not included, the webhook for the AWS cloud provider will default to on-demand + operator: In + values: ["spot"] + - key: "topology.kubernetes.io/zone" + operator: In + values: ["{{az}}"] + - key: "karpenter.k8s.aws/instance-hypervisor" + operator: In + values: ["nitro"] + + # Resource limits constrain the total size of the cluster. + # Limits prevent Karpenter from creating new instances once the limit is exceeded. + limits: + cpu: "3200" + memory: 12800Gi + + disruption: + + consolidationPolicy: WhenEmptyOrUnderutilized + + # If omitted, the feature is disabled, nodes will never scale down due to low utilization + consolidateAfter: 300s + + # Priority given to the provisioner when the scheduler considers which provisioner + # to select. Higher weights indicate higher priority when comparing provisioners. + # Specifying no weight is equivalent to specifying a weight of 0. + weight: 10 + +--- +apiVersion: karpenter.k8s.aws/v1 +kind: EC2NodeClass +metadata: + name: shared-executor-nodes-{{az}} +spec: + amiFamily: Bottlerocket + amiSelectorTerms: + - alias: bottlerocket@latest + subnetSelectorTerms: + - id: {{subnet-id}} + securityGroupSelectorTerms: + - tags: + kubernetes.io/cluster/{{cluster-name}}: owned + metadataOptions: + httpEndpoint: enabled + httpProtocolIPv6: disabled + httpPutResponseHopLimit: 2 + httpTokens: required + + tags: + KarpenerProvisionerName: "shared-executor" + + role: {{ROLENAME}} + diff --git a/framework/src/processing/lib/spark-runtime/emr-containers/resources/k8s/karpenter-provisioner-config/1.0.1/tooling-provisioner.yml b/framework/src/processing/lib/spark-runtime/emr-containers/resources/k8s/karpenter-provisioner-config/1.0.1/tooling-provisioner.yml new file mode 100644 index 000000000..5d967ae01 --- /dev/null +++ b/framework/src/processing/lib/spark-runtime/emr-containers/resources/k8s/karpenter-provisioner-config/1.0.1/tooling-provisioner.yml @@ -0,0 +1,87 @@ +# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +# SPDX-License-Identifier: Apache-2.0 + +apiVersion: karpenter.sh/v1 +kind: NodePool +metadata: + name: tooling-nodes +spec: + # References cloud provider-specific custom resource, see your cloud provider specific documentation + template: + metadata: + # Labels are arbitrary key-values that are applied to all nodes + labels: + role: tooling + + spec: + nodeClassRef: + group: karpenter.k8s.aws + kind: EC2NodeClass + name: tooling-nodes + + expireAfter: 720h + # Requirements that constrain the parameters of provisioned nodes. + # These requirements are combined with pod.spec.affinity.nodeAffinity rules. + # Operators { In, NotIn } are supported to enable including or excluding values + requirements: + # Include general purpose instance families + - key: "karpenter.k8s.aws/instance-family" + operator: In + values: ["t3", "t3a"] + - key: "kubernetes.io/arch" + operator: In + values: ["amd64"] + # Exclude smaller instance sizes + - key: "karpenter.k8s.aws/instance-size" + operator: In + values: [medium, large, xlarge] + - key: "karpenter.sh/capacity-type" # If not included, the webhook for the AWS cloud provider will default to on-demand + operator: In + values: ["on-demand"] + - key: "karpenter.k8s.aws/instance-hypervisor" + operator: In + values: ["nitro"] + + # Resource limits constrain the total size of the cluster. + # Limits prevent Karpenter from creating new instances once the limit is exceeded. + limits: + cpu: "100" + memory: 100Gi + + disruption: + + consolidationPolicy: WhenEmptyOrUnderutilized + + # If omitted, the feature is disabled, nodes will never scale down due to low utilization + consolidateAfter: 300s + + # Priority given to the provisioner when the scheduler considers which provisioner + # to select. Higher weights indicate higher priority when comparing provisioners. + # Specifying no weight is equivalent to specifying a weight of 0. + weight: 50 + +--- +apiVersion: karpenter.k8s.aws/v1 +kind: EC2NodeClass +metadata: + name: tooling-nodes +spec: + amiFamily: Bottlerocket + amiSelectorTerms: + - alias: bottlerocket@latest + subnetSelectorTerms: + - id: {{subnet-1}} + - id: {{subnet-2}} + metadataOptions: + httpEndpoint: enabled + httpProtocolIPv6: disabled + httpPutResponseHopLimit: 2 + httpTokens: required + securityGroupSelectorTerms: + - tags: + kubernetes.io/cluster/{{cluster-name}}: owned + tags: + KarpenerProvisionerName: "tooling" + + role: {{ROLENAME}} +