Skip to content

Commit dee9e31

Browse files
erikvveenPhilipSchmid
authored andcommitted
Talos k8s with AWS CCM support
1 parent bf68f17 commit dee9e31

File tree

7 files changed

+241
-17
lines changed

7 files changed

+241
-17
lines changed

00-locals.tf

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -27,10 +27,19 @@ locals {
2727
cluster = {
2828
id = var.cluster_id,
2929
clusterName = var.cluster_name,
30+
externalCloudProvider = {
31+
enabled = var.enable_external_cloud_provider
32+
manifests = [
33+
var.enable_external_cloud_provider ? var.external_cloud_provider_manifest : null,
34+
]
35+
},
3036
apiServer = {
3137
certSANs = [
3238
module.elb_k8s_elb.elb_dns_name
33-
]
39+
],
40+
extraArgs = {
41+
enable-admission-plugins = var.admission_plugins
42+
}
3443
},
3544
controllerManager = {
3645
extraArgs = {
@@ -55,16 +64,14 @@ locals {
5564
allowSchedulingOnControlPlanes = var.allow_workload_on_cp_nodes
5665
},
5766
machine = {
58-
kubelet = {
59-
registerWithFQDN = true
60-
},
6167
certSANs = [
6268
module.elb_k8s_elb.elb_dns_name
6369
],
6470
kubelet = {
6571
extraArgs = {
6672
rotate-server-certificates = true
67-
}
73+
},
74+
registerWithFQDN = true
6875
}
6976
}
7077
}
@@ -94,4 +101,4 @@ locals {
94101
"kubernetes.io/cluster/${var.cluster_name}" = "owned"
95102
}
96103

97-
}
104+
}

00-variables.tf

Lines changed: 52 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,28 @@ variable "cluster_id" {
99
type = number
1010
}
1111

12+
variable "iam_instance_profile_control_plane" {
13+
description = "IAM instance profile to attach to the control plane instances to give AWS CCM the sufficient rights to execute."
14+
type = string
15+
default = null
16+
}
17+
18+
variable "iam_instance_profile_worker" {
19+
description = "IAM instance profile to attach to the worker instances to give AWS CCM the sufficient rights to execute."
20+
type = string
21+
default = null
22+
}
23+
24+
variable "metadata_options" {
25+
description = "Metadata to attach to the instances."
26+
type = map(string)
27+
default = {
28+
http_endpoint = "enabled"
29+
http_tokens = "optional"
30+
http_put_response_hop_limit = 1
31+
}
32+
}
33+
1234
variable "cluster_architecture" {
1335
default = "amd64"
1436
description = "Cluster architecture. Choose 'arm64' or 'amd64'. If you choose 'arm64', ensure to also override the control_plane.instance_type and worker_groups.instance_type with an ARM64-based instance type like 'm7g.large'."
@@ -55,7 +77,7 @@ variable "disable_kube_proxy" {
5577

5678
variable "allow_workload_on_cp_nodes" {
5779
default = false
58-
description = "Allow workloads on CP nodes or not. Allowing it means Talos Linux default taints are removed from CP nodes. More details here: https://www.talos.dev/v1.5/talos-guides/howto/workers-on-controlplane/"
80+
description = "Allow workloads on CP nodes or not. Allowing it means Talos Linux default taints are removed from CP nodes which is typically required for single-node clusters. More details here: https://www.talos.dev/v1.5/talos-guides/howto/workers-on-controlplane/"
5981
type = bool
6082
}
6183

@@ -142,4 +164,32 @@ variable "config_patch_files" {
142164
default = []
143165
description = "Path to talos config path files that applies to all nodes"
144166
type = list(string)
145-
}
167+
}
168+
169+
variable "admission_plugins" {
170+
description = "List of admission plugins to enable"
171+
type = string
172+
default = "MutatingAdmissionWebhook,ValidatingAdmissionWebhook,ServiceAccount"
173+
}
174+
175+
variable "enable_external_cloud_provider" {
176+
default = false
177+
description = "Whether to enable or disable externalCloudProvider support. See https://kubernetes.io/docs/tasks/administer-cluster/running-cloud-controller/."
178+
type = bool
179+
}
180+
181+
variable "deploy_external_cloud_provider_iam_policies" {
182+
default = false
183+
description = "Whether to auto-deploy the externalCloudProvider-required IAM policies. See https://cloud-provider-aws.sigs.k8s.io/prerequisites/."
184+
type = bool
185+
validation {
186+
condition = (var.deploy_external_cloud_provider_iam_policies && var.enable_external_cloud_provider) || (!var.deploy_external_cloud_provider_iam_policies)
187+
error_message = "externalCloudProvider support needs to be enabled when trying to deploy the externalCloudProvider-required IAM policies."
188+
}
189+
}
190+
191+
variable "external_cloud_provider_manifest" {
192+
default = "https://raw.githubusercontent.com/isovalent/terraform-aws-talos/main/aws-cloud-controller.yaml"
193+
description = "externalCloudProvider manifest to be applied if var.enable_external_cloud_provider is enabled. If you want to deploy it manually (e.g., via Helm chart), enable var.enable_external_cloud_provider but set this value to an empty string (\"\"). See https://kubernetes.io/docs/tasks/administer-cluster/running-cloud-controller/."
194+
type = string
195+
}

03-talos.tf

Lines changed: 134 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,117 @@
1+
# https://cloud-provider-aws.sigs.k8s.io/prerequisites/
2+
resource "aws_iam_policy" "control_plane_ccm_policy" {
3+
count = var.enable_external_cloud_provider && var.deploy_external_cloud_provider_iam_policies ? 1 : 0
4+
5+
name = "${var.cluster_name}-control-plane-ccm-policy"
6+
path = "/"
7+
description = "IAM policy for the control plane nodes to allow CCM to manage AWS resources"
8+
9+
policy = jsonencode(
10+
{
11+
"Version" : "2012-10-17",
12+
"Statement" : [
13+
{
14+
"Effect" : "Allow",
15+
"Action" : [
16+
"autoscaling:DescribeAutoScalingGroups",
17+
"autoscaling:DescribeLaunchConfigurations",
18+
"autoscaling:DescribeTags",
19+
"ec2:DescribeInstances",
20+
"ec2:DescribeRegions",
21+
"ec2:DescribeRouteTables",
22+
"ec2:DescribeSecurityGroups",
23+
"ec2:DescribeSubnets",
24+
"ec2:DescribeVolumes",
25+
"ec2:DescribeAvailabilityZones",
26+
"ec2:CreateSecurityGroup",
27+
"ec2:CreateTags",
28+
"ec2:CreateVolume",
29+
"ec2:ModifyInstanceAttribute",
30+
"ec2:ModifyVolume",
31+
"ec2:AttachVolume",
32+
"ec2:AuthorizeSecurityGroupIngress",
33+
"ec2:CreateRoute",
34+
"ec2:DeleteRoute",
35+
"ec2:DeleteSecurityGroup",
36+
"ec2:DeleteVolume",
37+
"ec2:DetachVolume",
38+
"ec2:RevokeSecurityGroupIngress",
39+
"ec2:DescribeVpcs",
40+
"ec2:DescribeInstanceTopology",
41+
"elasticloadbalancing:AddTags",
42+
"elasticloadbalancing:AttachLoadBalancerToSubnets",
43+
"elasticloadbalancing:ApplySecurityGroupsToLoadBalancer",
44+
"elasticloadbalancing:CreateLoadBalancer",
45+
"elasticloadbalancing:CreateLoadBalancerPolicy",
46+
"elasticloadbalancing:CreateLoadBalancerListeners",
47+
"elasticloadbalancing:ConfigureHealthCheck",
48+
"elasticloadbalancing:DeleteLoadBalancer",
49+
"elasticloadbalancing:DeleteLoadBalancerListeners",
50+
"elasticloadbalancing:DescribeLoadBalancers",
51+
"elasticloadbalancing:DescribeLoadBalancerAttributes",
52+
"elasticloadbalancing:DetachLoadBalancerFromSubnets",
53+
"elasticloadbalancing:DeregisterInstancesFromLoadBalancer",
54+
"elasticloadbalancing:ModifyLoadBalancerAttributes",
55+
"elasticloadbalancing:RegisterInstancesWithLoadBalancer",
56+
"elasticloadbalancing:SetLoadBalancerPoliciesForBackendServer",
57+
"elasticloadbalancing:AddTags",
58+
"elasticloadbalancing:CreateListener",
59+
"elasticloadbalancing:CreateTargetGroup",
60+
"elasticloadbalancing:DeleteListener",
61+
"elasticloadbalancing:DeleteTargetGroup",
62+
"elasticloadbalancing:DescribeListeners",
63+
"elasticloadbalancing:DescribeLoadBalancerPolicies",
64+
"elasticloadbalancing:DescribeTargetGroups",
65+
"elasticloadbalancing:DescribeTargetHealth",
66+
"elasticloadbalancing:ModifyListener",
67+
"elasticloadbalancing:ModifyTargetGroup",
68+
"elasticloadbalancing:RegisterTargets",
69+
"elasticloadbalancing:DeregisterTargets",
70+
"elasticloadbalancing:SetLoadBalancerPoliciesOfListener",
71+
"iam:CreateServiceLinkedRole",
72+
"kms:DescribeKey"
73+
],
74+
"Resource" : [
75+
"*"
76+
]
77+
}
78+
]
79+
}
80+
)
81+
}
82+
83+
# https://cloud-provider-aws.sigs.k8s.io/prerequisites/
84+
resource "aws_iam_policy" "worker_ccm_policy" {
85+
count = var.enable_external_cloud_provider && var.deploy_external_cloud_provider_iam_policies ? 1 : 0
86+
87+
name = "${var.cluster_name}-worker-ccm-policy"
88+
path = "/"
89+
description = "IAM policy for the worker nodes to allow CCM to manage AWS resources"
90+
91+
policy = jsonencode(
92+
{
93+
"Version" : "2012-10-17",
94+
"Statement" : [
95+
{
96+
"Effect" : "Allow",
97+
"Action" : [
98+
"ec2:DescribeInstances",
99+
"ec2:DescribeRegions",
100+
"ecr:GetAuthorizationToken",
101+
"ecr:BatchCheckLayerAvailability",
102+
"ecr:GetDownloadUrlForLayer",
103+
"ecr:GetRepositoryPolicy",
104+
"ecr:DescribeRepositories",
105+
"ecr:ListImages",
106+
"ecr:BatchGetImage"
107+
],
108+
"Resource" : "*"
109+
}
110+
]
111+
}
112+
)
113+
}
114+
1115
module "talos_control_plane_nodes" {
2116
source = "terraform-aws-modules/ec2-instance/aws"
3117
version = "~> 5.5"
@@ -10,6 +124,13 @@ module "talos_control_plane_nodes" {
10124
subnet_id = element(data.aws_subnets.public.ids, count.index)
11125
associate_public_ip_address = true
12126
tags = merge(var.tags, local.cluster_required_tags)
127+
metadata_options = var.metadata_options
128+
create_iam_instance_profile = var.enable_external_cloud_provider && var.deploy_external_cloud_provider_iam_policies ? true : false
129+
iam_instance_profile = var.iam_instance_profile_control_plane
130+
iam_role_use_name_prefix = false
131+
iam_role_policies = var.enable_external_cloud_provider && var.deploy_external_cloud_provider_iam_policies ? {
132+
"${var.cluster_name}-control-plane-ccm-policy" : aws_iam_policy.control_plane_ccm_policy[0].arn,
133+
} : {}
13134

14135
vpc_security_group_ids = [module.cluster_sg.security_group_id]
15136

@@ -32,6 +153,13 @@ module "talos_worker_group" {
32153
subnet_id = element(data.aws_subnets.public.ids, tonumber(trimprefix(each.key, "${each.value.name}.")))
33154
associate_public_ip_address = true
34155
tags = merge(each.value.tags, var.tags, local.cluster_required_tags)
156+
metadata_options = var.metadata_options
157+
create_iam_instance_profile = var.enable_external_cloud_provider && var.deploy_external_cloud_provider_iam_policies ? true : false
158+
iam_instance_profile = var.iam_instance_profile_worker
159+
iam_role_use_name_prefix = false
160+
iam_role_policies = var.enable_external_cloud_provider && var.deploy_external_cloud_provider_iam_policies ? {
161+
"${var.cluster_name}-worker-ccm-policy" : aws_iam_policy.worker_ccm_policy[0].arn,
162+
} : {}
35163

36164
vpc_security_group_ids = [module.cluster_sg.security_group_id]
37165

@@ -45,6 +173,8 @@ module "talos_worker_group" {
45173
resource "talos_machine_secrets" "this" {}
46174

47175
data "talos_machine_configuration" "controlplane" {
176+
for_each = { for index in range(var.controlplane_count) : index => index }
177+
48178
cluster_name = var.cluster_name
49179
cluster_endpoint = "https://${module.elb_k8s_elb.elb_dns_name}"
50180
machine_type = "controlplane"
@@ -77,12 +207,11 @@ data "talos_machine_configuration" "worker_group" {
77207
}
78208

79209
resource "talos_machine_configuration_apply" "controlplane" {
80-
count = var.controlplane_count
81-
210+
for_each = { for index, instance in module.talos_control_plane_nodes : index => instance }
82211
client_configuration = talos_machine_secrets.this.client_configuration
83-
machine_configuration_input = data.talos_machine_configuration.controlplane.machine_configuration
84-
endpoint = module.talos_control_plane_nodes[count.index].public_ip
85-
node = module.talos_control_plane_nodes[count.index].private_ip
212+
machine_configuration_input = data.talos_machine_configuration.controlplane[each.key].machine_configuration
213+
endpoint = module.talos_control_plane_nodes[each.key].public_ip
214+
node = module.talos_control_plane_nodes[each.key].private_ip
86215
}
87216

88217
resource "talos_machine_configuration_apply" "worker_group" {

README.md

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ A Terraform module to manage a Talos-based Kubernetes on AWS (EC2 instances). Is
1414
- [Talos' KubePrism](https://www.talos.dev/v1.5/kubernetes-guides/configuration/kubeprism/) to get an internal endpoint for the KAPI (used for [Cilium Kube-Proxy replacement](https://docs.cilium.io/en/stable/network/kubernetes/kubeproxy-free/))
1515
- [kubernetes-sigs/metrics-server](https://github.com/kubernetes-sigs/metrics-server/)
1616
- [alex1989hu/kubelet-serving-cert-approver](https://github.com/alex1989hu/kubelet-serving-cert-approver) inspired by [Talos' Deploying Metrics Server](https://www.talos.dev/v1.5/kubernetes-guides/configuration/deploy-metrics-server/) guide.
17+
- [AWS Cloud Provider](https://github.com/kubernetes/cloud-provider-aws/tree/master)
1718
- Cilium features:
1819
- [Kube-Proxy replacement](https://docs.cilium.io/en/stable/network/kubernetes/kubeproxy-free/)
1920
- [IPAM modes](https://docs.cilium.io/en/stable/network/concepts/ipam/): `kubernetes`, `cluster-pool`
@@ -72,6 +73,8 @@ module "talos" {
7273

7374
| Name | Type |
7475
|------|------|
76+
| [aws_iam_policy.control_plane_ccm_policy](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_policy) | resource |
77+
| [aws_iam_policy.worker_ccm_policy](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/resources/iam_policy) | resource |
7578
| [local_file.kubeconfig](https://registry.terraform.io/providers/hashicorp/local/latest/docs/resources/file) | resource |
7679
| [local_file.talosconfig](https://registry.terraform.io/providers/hashicorp/local/latest/docs/resources/file) | resource |
7780
| [null_resource.wait_for_public_subnets](https://registry.terraform.io/providers/hashicorp/null/latest/docs/resources/resource) | resource |
@@ -93,17 +96,24 @@ module "talos" {
9396

9497
| Name | Description | Type | Default | Required |
9598
|------|-------------|------|---------|:--------:|
99+
| <a name="input_admission_plugins"></a> [admission\_plugins](#input\_admission\_plugins) | List of admission plugins to enable | `string` | `"MutatingAdmissionWebhook,ValidatingAdmissionWebhook,ServiceAccount"` | no |
96100
| <a name="input_allocate_node_cidrs"></a> [allocate\_node\_cidrs](#input\_allocate\_node\_cidrs) | Whether to assign PodCIDRs to Node resources or not. Only needed in case Cilium runs in 'kubernetes' IPAM mode. | `bool` | `true` | no |
97-
| <a name="input_allow_workload_on_cp_nodes"></a> [allow\_workload\_on\_cp\_nodes](#input\_allow\_workload\_on\_cp\_nodes) | Allow workloads on CP nodes or not. Allowing it means Talos Linux default taints are removed from CP nodes. More details here: https://www.talos.dev/v1.5/talos-guides/howto/workers-on-controlplane/ | `bool` | `false` | no |
101+
| <a name="input_allow_workload_on_cp_nodes"></a> [allow\_workload\_on\_cp\_nodes](#input\_allow\_workload\_on\_cp\_nodes) | Allow workloads on CP nodes or not. Allowing it means Talos Linux default taints are removed from CP nodes which is typically required for single-node clusters. More details here: https://www.talos.dev/v1.5/talos-guides/howto/workers-on-controlplane/ | `bool` | `false` | no |
98102
| <a name="input_cluster_architecture"></a> [cluster\_architecture](#input\_cluster\_architecture) | Cluster architecture. Choose 'arm64' or 'amd64'. If you choose 'arm64', ensure to also override the control\_plane.instance\_type and worker\_groups.instance\_type with an ARM64-based instance type like 'm7g.large'. | `string` | `"amd64"` | no |
99103
| <a name="input_cluster_id"></a> [cluster\_id](#input\_cluster\_id) | The ID of the cluster. | `number` | `"1"` | no |
100104
| <a name="input_cluster_name"></a> [cluster\_name](#input\_cluster\_name) | Name of cluster | `string` | n/a | yes |
101105
| <a name="input_config_patch_files"></a> [config\_patch\_files](#input\_config\_patch\_files) | Path to talos config path files that applies to all nodes | `list(string)` | `[]` | no |
102106
| <a name="input_control_plane"></a> [control\_plane](#input\_control\_plane) | Info for control plane that will be created | <pre>object({<br/> instance_type = optional(string, "m5.large")<br/> config_patch_files = optional(list(string), [])<br/> tags = optional(map(string), {})<br/> })</pre> | `{}` | no |
103107
| <a name="input_controlplane_count"></a> [controlplane\_count](#input\_controlplane\_count) | Defines how many controlplane nodes are deployed in the cluster. | `number` | `3` | no |
108+
| <a name="input_deploy_external_cloud_provider_iam_policies"></a> [deploy\_external\_cloud\_provider\_iam\_policies](#input\_deploy\_external\_cloud\_provider\_iam\_policies) | Whether to auto-deploy the externalCloudProvider-required IAM policies. See https://cloud-provider-aws.sigs.k8s.io/prerequisites/. | `bool` | `false` | no |
104109
| <a name="input_disable_kube_proxy"></a> [disable\_kube\_proxy](#input\_disable\_kube\_proxy) | Whether to deploy Kube-Proxy or not. By default, KP shouldn't be deployed. | `bool` | `true` | no |
110+
| <a name="input_enable_external_cloud_provider"></a> [enable\_external\_cloud\_provider](#input\_enable\_external\_cloud\_provider) | Whether to enable or disable externalCloudProvider support. See https://kubernetes.io/docs/tasks/administer-cluster/running-cloud-controller/. | `bool` | `false` | no |
111+
| <a name="input_external_cloud_provider_manifest"></a> [external\_cloud\_provider\_manifest](#input\_external\_cloud\_provider\_manifest) | externalCloudProvider manifest to be applied if var.enable\_external\_cloud\_provider is enabled. If you want to deploy it manually (e.g., via Helm chart), enable var.enable\_external\_cloud\_provider but set this value to an empty string (""). See https://kubernetes.io/docs/tasks/administer-cluster/running-cloud-controller/. | `string` | `"https://raw.githubusercontent.com/isovalent/terraform-aws-talos/main/aws-cloud-controller.yaml"` | no |
112+
| <a name="input_iam_instance_profile_control_plane"></a> [iam\_instance\_profile\_control\_plane](#input\_iam\_instance\_profile\_control\_plane) | IAM instance profile to attach to the control plane instances to give AWS CCM the sufficient rights to execute. | `string` | `null` | no |
113+
| <a name="input_iam_instance_profile_worker"></a> [iam\_instance\_profile\_worker](#input\_iam\_instance\_profile\_worker) | IAM instance profile to attach to the worker instances to give AWS CCM the sufficient rights to execute. | `string` | `null` | no |
105114
| <a name="input_kubernetes_api_allowed_cidr"></a> [kubernetes\_api\_allowed\_cidr](#input\_kubernetes\_api\_allowed\_cidr) | The CIDR from which to allow to access the Kubernetes API | `string` | `"0.0.0.0/0"` | no |
106115
| <a name="input_kubernetes_version"></a> [kubernetes\_version](#input\_kubernetes\_version) | Kubernetes version to use for the Talos cluster, if not set, the K8s version shipped with the selected Talos version will be used. Check https://www.talos.dev/latest/introduction/support-matrix/. For example '1.29.3'. | `string` | `""` | no |
116+
| <a name="input_metadata_options"></a> [metadata\_options](#input\_metadata\_options) | Metadata to attach to the instances. | `map(string)` | <pre>{<br/> "http_endpoint": "enabled",<br/> "http_put_response_hop_limit": 1,<br/> "http_tokens": "optional"<br/>}</pre> | no |
107117
| <a name="input_pod_cidr"></a> [pod\_cidr](#input\_pod\_cidr) | The CIDR to use for Pods. Only required in case allocate\_node\_cidrs is set to 'true'. Otherwise, simply configure it inside Cilium's Helm values. | `string` | `"100.64.0.0/14"` | no |
108118
| <a name="input_region"></a> [region](#input\_region) | The region in which to create the Talos Linux cluster. | `string` | n/a | yes |
109119
| <a name="input_service_cidr"></a> [service\_cidr](#input\_service\_cidr) | The CIDR to use for services. | `string` | `"100.68.0.0/16"` | no |

0 commit comments

Comments
 (0)