Skip to content

Commit

Permalink
feat(processing): bump to karpenter v1 in SparkEmrContainersRuntime (#…
Browse files Browse the repository at this point in the history
…744)

* bump to karpenter v1
  • Loading branch information
lmouhib authored Sep 23, 2024
1 parent ffe7c24 commit 1bb7372
Show file tree
Hide file tree
Showing 11 changed files with 777 additions and 50 deletions.
4 changes: 2 additions & 2 deletions framework/API.md

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions framework/src/processing/lib/karpenter-releases.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
* At this time only v0.37.0 is supported.
*/
export enum KarpenterVersion {
V0_37_0 = '0.37.0',
V1_0_1 = '1.0.1',
}

export const DEFAULT_KARPENTER_VERSION: KarpenterVersion = KarpenterVersion.V0_37_0;
export const DEFAULT_KARPENTER_VERSION: KarpenterVersion = KarpenterVersion.V1_0_1;
Original file line number Diff line number Diff line change
Expand Up @@ -183,56 +183,57 @@ export function karpenterSetup(cluster: ICluster,
actions: ['ec2:RunInstances', 'ec2:CreateFleet'],
});

const allowScopedEC2InstanceActionsWithTags: PolicyStatement = new PolicyStatement({
const allowScopedEC2LaunchTemplateAccessActions = new PolicyStatement({
sid: 'AllowScopedEC2LaunchTemplateAccessActions',
effect: Effect.ALLOW,
resources: [
`arn:aws:ec2:${Stack.of(scope).region}:*:fleet/*`,
`arn:aws:ec2:${Stack.of(scope).region}:*:instance/*`,
`arn:aws:ec2:${Stack.of(scope).region}:*:volume/*`,
`arn:aws:ec2:${Stack.of(scope).region}:*:network-interface/*`,
`arn:aws:ec2:${Stack.of(scope).region}:*:launch-template/*`,
`arn:aws:ec2:${Stack.of(scope).region}:*:spot-instances-request`,
resources: [`arn:${Stack.of(scope).partition}:ec2:${Stack.of(scope).region}:*:launch-template/*`],
actions: [
'ec2:RunInstances',
'ec2:CreateFleet',
],
actions: ['ec2:RunInstances', 'ec2:CreateFleet', 'ec2:CreateLaunchTemplate'],
conditions: {
StringEquals: {
[`aws:RequestTag/kubernetes.io/cluster/${clusterName}`]: 'owned',
[`aws:ResourceTag/kubernetes.io/cluster/${clusterName}`]: 'owned',
},
StringLike: {
'aws:RequestTag/karpenter.sh/nodepool': '*',
'aws:ResourceTag/karpenter.sh/nodepool': '*',
},
},
});

const allowScopedResourceCreationTagging: PolicyStatement = new PolicyStatement({
const allowScopedResourceCreationTagging = new PolicyStatement({
sid: 'AllowScopedResourceCreationTagging',
effect: Effect.ALLOW,
resources: [
`arn:aws:ec2:${Stack.of(scope).region}:*:fleet/*`,
`arn:aws:ec2:${Stack.of(scope).region}:*:instance/*`,
`arn:aws:ec2:${Stack.of(scope).region}:*:volume/*`,
`arn:aws:ec2:${Stack.of(scope).region}:*:network-interface/*`,
`arn:aws:ec2:${Stack.of(scope).region}:*:launch-template/*`,
`arn:aws:ec2:${Stack.of(scope).region}:*:spot-instances-request`,
`arn:${Stack.of(scope).partition}:ec2:${Stack.of(scope).region}:*:fleet/*`,
`arn:${Stack.of(scope).partition}:ec2:${Stack.of(scope).region}:*:instance/*`,
`arn:${Stack.of(scope).partition}:ec2:${Stack.of(scope).region}:*:volume/*`,
`arn:${Stack.of(scope).partition}:ec2:${Stack.of(scope).region}:*:network-interface/*`,
`arn:${Stack.of(scope).partition}:ec2:${Stack.of(scope).region}:*:launch-template/*`,
`arn:${Stack.of(scope).partition}:ec2:${Stack.of(scope).region}:*:spot-instances-request/*`,
],
actions: ['ec2:CreateTags'],
conditions: {
StringEquals: {
[`aws:RequestTag/kubernetes.io/cluster/${clusterName}`]: 'owned',
'ec2:CreateAction': ['RunInstances', 'CreateFleet', 'CreateLaunchTemplate'],
'aws:RequestTag/eks:eks-cluster-name': clusterName,
'ec2:CreateAction': [
'RunInstances',
'CreateFleet',
'CreateLaunchTemplate',
],
},
StringLike: {
'aws:RequestTag/karpenter.sh/nodepool': '*',
},
},
});

const allowScopedResourceTagging: PolicyStatement = new PolicyStatement({
sid: 'allowScopedResourceTagging',

const allowScopedResourceTagging = new PolicyStatement({
sid: 'AllowScopedResourceTagging',
effect: Effect.ALLOW,
resources: [
`arn:aws:ec2:${Stack.of(scope).region}:*:instance/*`,
],
resources: [`arn:${Stack.of(scope).partition}:ec2:${Stack.of(scope).region}:*:instance/*`],
actions: ['ec2:CreateTags'],
conditions: {
'StringEquals': {
Expand All @@ -241,23 +242,31 @@ export function karpenterSetup(cluster: ICluster,
'StringLike': {
'aws:ResourceTag/karpenter.sh/nodepool': '*',
},
'StringEqualsIfExists': {
'aws:RequestTag/eks:eks-cluster-name': clusterName,
},
'ForAllValues:StringEquals': {
'aws:TagKeys': [
'eks:eks-cluster-name',
'karpenter.sh/nodeclaim',
'Name',
],
},
},
});

const allowScopedDeletion: PolicyStatement = new PolicyStatement({

const allowScopedDeletion = new PolicyStatement({
sid: 'AllowScopedDeletion',
effect: Effect.ALLOW,
resources: [
`arn:aws:ec2:${Stack.of(scope).region}:*:instance/*`,
`arn:aws:ec2:${Stack.of(scope).region}:*:launch-template/*`,
`arn:${Stack.of(scope).partition}:ec2:${Stack.of(scope).region}:*:instance/*`,
`arn:${Stack.of(scope).partition}:ec2:${Stack.of(scope).region}:*:launch-template/*`,
],
actions: [
'ec2:TerminateInstances',
'ec2:DeleteLaunchTemplate',
],
actions: ['ec2:TerminateInstances', 'ec2:DeleteLaunchTemplate'],
conditions: {
StringEquals: {
[`aws:ResourceTag/kubernetes.io/cluster/${clusterName}`]: 'owned',
Expand All @@ -268,6 +277,29 @@ export function karpenterSetup(cluster: ICluster,
},
});


const allowScopedEC2InstanceActionsWithTags: PolicyStatement = new PolicyStatement({
effect: Effect.ALLOW,
resources: [
`arn:aws:ec2:${Stack.of(scope).region}:*:fleet/*`,
`arn:aws:ec2:${Stack.of(scope).region}:*:instance/*`,
`arn:aws:ec2:${Stack.of(scope).region}:*:volume/*`,
`arn:aws:ec2:${Stack.of(scope).region}:*:network-interface/*`,
`arn:aws:ec2:${Stack.of(scope).region}:*:launch-template/*`,
`arn:aws:ec2:${Stack.of(scope).region}:*:spot-instances-request`,
],
actions: ['ec2:RunInstances', 'ec2:CreateFleet', 'ec2:CreateLaunchTemplate'],
conditions: {
StringEquals: {
[`aws:RequestTag/kubernetes.io/cluster/${clusterName}`]: 'owned',
'aws:RequestTag/eks:eks-cluster-name': clusterName,
},
StringLike: {
'aws:RequestTag/karpenter.sh/nodepool': '*',
},
},
});

const allowPassingInstanceRole: PolicyStatement = new PolicyStatement({
effect: Effect.ALLOW,
actions: ['iam:PassRole'],
Expand All @@ -293,40 +325,35 @@ export function karpenterSetup(cluster: ICluster,
actions: ['eks:DescribeCluster'],
});

const allowInstanceProfileReadActions: PolicyStatement = new PolicyStatement({
sid: 'AllowInstanceProfileReadActions',
effect: Effect.ALLOW,
resources: ['*'],
actions: ['iam:GetInstanceProfile'],
});

const allowScopedInstanceProfileCreationActions: PolicyStatement = new PolicyStatement({
const allowScopedInstanceProfileCreationActions = new PolicyStatement({
sid: 'AllowScopedInstanceProfileCreationActions',
effect: Effect.ALLOW,
resources: ['*'],
resources: [`arn:${Stack.of(scope).partition}:iam::${Stack.of(scope).account}:instance-profile/*`],
actions: ['iam:CreateInstanceProfile'],
conditions: {
StringEquals: {
[`aws:RequestTag/kubernetes.io/cluster/${clusterName}`]: 'owned',
'aws:RequestTag/topology.kubernetes.io/region': `${Stack.of(scope).region}`,
'aws:RequestTag/eks:eks-cluster-name': clusterName,
'aws:RequestTag/topology.kubernetes.io/region': Stack.of(scope).region,
},
StringLike: {
'aws:RequestTag/karpenter.k8s.aws/ec2nodeclass': '*',
},
},
});

const allowScopedInstanceProfileTagActions: PolicyStatement = new PolicyStatement({
const allowScopedInstanceProfileTagActions = new PolicyStatement({
sid: 'AllowScopedInstanceProfileTagActions',
effect: Effect.ALLOW,
resources: ['*'],
resources: [`arn:${Stack.of(scope).partition}:iam::${Stack.of(scope).account}:instance-profile/*`],
actions: ['iam:TagInstanceProfile'],
conditions: {
StringEquals: {
[`aws:ResourceTag/kubernetes.io/cluster/${clusterName}`]: 'owned',
'aws:ResourceTag/topology.kubernetes.io/region': `${Stack.of(scope).region}`,
'aws:ResourceTag/topology.kubernetes.io/region': Stack.of(scope).region,
[`aws:RequestTag/kubernetes.io/cluster/${clusterName}`]: 'owned',
'aws:RequestTag/topology.kubernetes.io/region': `${Stack.of(scope).region}`,
'aws:RequestTag/eks:eks-cluster-name': clusterName,
'aws:RequestTag/topology.kubernetes.io/region': Stack.of(scope).region,
},
StringLike: {
'aws:ResourceTag/karpenter.k8s.aws/ec2nodeclass': '*',
Expand All @@ -335,22 +362,33 @@ export function karpenterSetup(cluster: ICluster,
},
});

const allowScopedInstanceProfileActions: PolicyStatement = new PolicyStatement({
const allowScopedInstanceProfileActions = new PolicyStatement({
sid: 'AllowScopedInstanceProfileActions',
effect: Effect.ALLOW,
resources: ['*'],
actions: ['iam:AddRoleToInstanceProfile', 'iam:RemoveRoleFromInstanceProfile', 'iam:DeleteInstanceProfile'],
resources: [`arn:${Stack.of(scope).partition}:iam::${Stack.of(scope).account}:instance-profile/*`],
actions: [
'iam:AddRoleToInstanceProfile',
'iam:RemoveRoleFromInstanceProfile',
'iam:DeleteInstanceProfile',
],
conditions: {
StringEquals: {
[`aws:ResourceTag/kubernetes.io/cluster/${clusterName}`]: 'owned',
'aws:ResourceTag/topology.kubernetes.io/region': `${Stack.of(scope).region}`,
'aws:ResourceTag/topology.kubernetes.io/region': Stack.of(scope).region,
},
StringLike: {
'aws:ResourceTag/karpenter.k8s.aws/ec2nodeclass': '*',
},
},
});

const allowInstanceProfileReadActions = new PolicyStatement({
sid: 'AllowInstanceProfileReadActions',
effect: Effect.ALLOW,
resources: [`arn:${Stack.of(scope).partition}:iam::${Stack.of(scope).account}:instance-profile/*`],
actions: ['iam:GetInstanceProfile'],
});


const karpenterNS = cluster.addManifest('karpenterNS', {
apiVersion: 'v1',
Expand Down Expand Up @@ -380,6 +418,7 @@ export function karpenterSetup(cluster: ICluster,
karpenterAccount.addToPrincipalPolicy(allowAPIServerEndpointDiscovery);
karpenterAccount.addToPrincipalPolicy(allowInstanceProfileReadActions);
karpenterAccount.addToPrincipalPolicy(allowRegionalReadActions);
karpenterAccount.addToPrincipalPolicy(allowScopedEC2LaunchTemplateAccessActions);

//Deploy Karpenter Chart
const karpenterChart = cluster.addHelmChart('KarpenterHelmChart', {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
# SPDX-License-Identifier: Apache-2.0

FROM public.ecr.aws/amazonlinux/amazonlinux:2023
RUN dnf -y install e2fsprogs bash mdadm util-linux
COPY setup-runtime-storage ./
RUN chmod +x ./setup-runtime-storage
ENTRYPOINT ["sh", "setup-runtime-storage"]
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
# SPDX-License-Identifier: Apache-2.0

apiVersion: karpenter.sh/v1
kind: NodePool
metadata:
name: critical-{{az}}
spec:
# References cloud provider-specific custom resource, see your cloud provider specific documentation
template:
metadata:
# Labels are arbitrary key-values that are applied to all nodes
labels:
role: critical
node-lifecycle: on-demand

spec:
nodeClassRef:
group: karpenter.k8s.aws
kind: EC2NodeClass
name: critical-nodes-{{az}}

expireAfter: 720h

taints:
- key: role
value: critical
effect: NoSchedule

# Requirements that constrain the parameters of provisioned nodes.
# These requirements are combined with pod.spec.affinity.nodeAffinity rules.
# Operators { In, NotIn } are supported to enable including or excluding values
requirements:
# Include general purpose instance families
- key: "karpenter.k8s.aws/instance-family"
operator: In
values: ["m6gd"]
- key: "kubernetes.io/arch"
operator: In
values: ["arm64"]
# Exclude smaller instance sizes
- key: "karpenter.k8s.aws/instance-size"
operator: NotIn
values: [nano, micro, small, medium, large]
- key: "karpenter.sh/capacity-type" # If not included, the webhook for the AWS cloud provider will default to on-demand
operator: In
values: ["on-demand"]
- key: "topology.kubernetes.io/zone"
operator: In
values: ["{{az}}"]
- key: "karpenter.k8s.aws/instance-hypervisor"
operator: In
values: ["nitro"]

# Resource limits constrain the total size of the cluster.
# Limits prevent Karpenter from creating new instances once the limit is exceeded.

limits:
cpu: "3200"
memory: 12800Gi

disruption:

consolidationPolicy: WhenEmptyOrUnderutilized

# If omitted, the feature is disabled, nodes will never scale down due to low utilization
consolidateAfter: 300s

# Priority given to the provisioner when the scheduler considers which provisioner
# to select. Higher weights indicate higher priority when comparing provisioners.
# Specifying no weight is equivalent to specifying a weight of 0.
weight: 10

---
apiVersion: karpenter.k8s.aws/v1
kind: EC2NodeClass
metadata:
name: critical-nodes-{{az}}
spec:
subnetSelectorTerms:
- id: {{subnet-id}}
securityGroupSelectorTerms:
- tags:
kubernetes.io/cluster/{{cluster-name}}: owned
tags:
KarpenerProvisionerName: "critical"

role: {{ROLENAME}}

metadataOptions:
httpEndpoint: enabled
httpProtocolIPv6: disabled
httpPutResponseHopLimit: 2
httpTokens: required
amiFamily: Bottlerocket
amiSelectorTerms:
- alias: bottlerocket@latest
userData: |
[settings.bootstrap-containers.bootstrap-nvme-raid0]
source = "{{REPLACE-WITH-IMAGE-ECR}}"
mode = "once"
essential = true
Loading

0 comments on commit 1bb7372

Please sign in to comment.