appuio · simu · Feb 10, 2025 · Jan 15, 2025 · Feb 4, 2025 · Feb 4, 2025
diff --git a/README.md b/README.md
@@ -51,6 +51,8 @@ The module provides variables to
 * provide an API token for control.vshn.net (see next sections for details).
 * choose a dedicated deployment target
   This allows for using dedicated hypervisors.
+* choose to provision Exoscale instance pools for the infra and worker nodes.
+  NOTE: we currently don't support provisioning Exoscale instance pools for the control plane and storage nodes.
 
 The cluster's domain is constructed from the provided base domain, cluster id and cluster name.
 If a cluster name is provided the cluster domain is set to `<cluster name>.<base domain>`.

diff --git a/control_plane.tf b/control_plane.tf
@@ -32,6 +32,11 @@ module "master" {
   deploy_target_id = var.deploy_target_id
 
   bootstrap_bucket = var.bootstrap_bucket
+
+  # Don't use instance pool for control plane nodes, since scaling them is
+  # much easier without an instance pool by just stopping/scaling/starting the
+  # same VM.
+  use_instancepool = false
 }
 
 resource "exoscale_domain_record" "etcd" {

diff --git a/infra.tf b/infra.tf
@@ -33,4 +33,6 @@ module "infra" {
   deploy_target_id = var.deploy_target_id
 
   bootstrap_bucket = var.bootstrap_bucket
+
+  use_instancepool = var.use_instancepools
 }
diff --git a/modules/node-group/main.tf b/modules/node-group/main.tf
@@ -17,7 +17,11 @@ locals {
   // having to work around merge() being a shallow merge in the compute
   // instance resource.
   user_data = [
-    for hostname in random_id.node_id[*].hex :
+    // NOTE(sg): we only need to patch each node's user-data to have a custom
+    // /etc/hosts for non-instancepool setups. For instancepool setups, we
+    // only need a single user-data and we don't actually use the value of
+    // `hostname`.
+    for hostname in(var.use_instancepool ? ["pool_member"] : random_id.node_id[*].hex) :
     {
       "ignition" : {
         "version" : "3.1.0",
@@ -42,7 +46,7 @@ locals {
       "storage" : {
         // concatenate the private network config (if requested) with the
         // `/etc/hostname` override.
-        "files" : concat(
+        "files" : var.use_instancepool ? [] : concat(
           var.use_privnet ? local.privnet_config_files : [],
           // override /etc/hostname with short hostname, this works around the
           // fact that we can't set a separate `name` and `display_name` for
@@ -158,7 +162,7 @@ locals {
 }
 
 resource "random_id" "node_id" {
-  count       = var.node_count
+  count       = var.use_instancepool ? 0 : var.node_count
   prefix      = "${var.role}-"
   byte_length = 2
 }
@@ -170,7 +174,7 @@ resource "exoscale_anti_affinity_group" "anti_affinity_group" {
 }
 
 resource "exoscale_compute_instance" "nodes" {
-  count       = var.node_count
+  count       = var.use_instancepool ? 0 : var.node_count
   name        = "${random_id.node_id[count.index].hex}.${var.cluster_domain}"
   ssh_key     = var.ssh_key_pair
   zone        = var.region
@@ -207,3 +211,27 @@ resource "exoscale_compute_instance" "nodes" {
     ]
   }
 }
+
+resource "exoscale_instance_pool" "nodes" {
+  count       = var.use_instancepool ? local.anti_affinity_group_count : 0
+  name        = "${var.cluster_id}_${var.role}-${count.index}"
+  size        = var.node_count
+  zone        = var.region
+  key_pair    = var.ssh_key_pair
+  template_id = var.template_id
+
+  instance_prefix = var.role
+  instance_type   = var.instance_type
+
+  disk_size = local.disk_size
+  user_data = jsonencode(local.user_data[0])
+
+  deploy_target_id = var.deploy_target_id
+
+  security_group_ids = var.security_group_ids
+
+  anti_affinity_group_ids = concat(
+    [exoscale_anti_affinity_group.anti_affinity_group[count.index].id],
+    var.additional_affinity_group_ids
+  )
+}
diff --git a/modules/node-group/output.tf b/modules/node-group/output.tf
@@ -1,3 +1,7 @@
+locals {
+  instance_pool_ips = var.use_instancepool ? flatten(exoscale_instance_pool.nodes[*].instances[*].public_ip_address) : []
+  instance_ips      = var.use_privnet ? exoscale_compute_instance.nodes[*].network_interface[0].ip_address : exoscale_compute_instance.nodes[*].public_ip_address
+}
 output "ip_address" {
-  value = var.use_privnet ? exoscale_compute_instance.nodes[*].network_interface[0].ip_address : exoscale_compute_instance.nodes[*].public_ip_address
+  value = var.use_instancepool ? local.instance_pool_ips : local.instance_ips
 }
diff --git a/modules/node-group/variables.tf b/modules/node-group/variables.tf
@@ -126,3 +126,9 @@ variable "affinity_group_capacity" {
   default     = 0
   description = "Capacity of the affinity group, e.g. when using dedicated hypervisors, default: 0 (unlimited)"
 }
+
+variable "use_instancepool" {
+  type        = bool
+  description = "Use instancepool for this node group"
+  default     = false
+}
diff --git a/security_groups.tf b/security_groups.tf
@@ -160,3 +160,22 @@ resource "exoscale_security_group_rule" "storage" {
 
   user_security_group_id = exoscale_security_group.all_machines.id
 }
+
+resource "exoscale_security_group" "worker" {
+  name        = "${var.cluster_id}_worker"
+  description = "${var.cluster_id} worker nodes"
+}
+
+resource "exoscale_security_group_rule" "worker_nodeports" {
+  for_each = toset(["TCP", "UDP"])
+
+  security_group_id = exoscale_security_group.worker.id
+
+  type        = "INGRESS"
+  protocol    = each.value
+  description = "Access to worker node ports from anywhere"
+  start_port  = 30000
+  end_port    = 32767
+
+  cidr = "0.0.0.0/0"
+}
diff --git a/storage.tf b/storage.tf
@@ -37,4 +37,8 @@ module "storage" {
   deploy_target_id = var.deploy_target_id
 
   bootstrap_bucket = var.bootstrap_bucket
+
+  # Don't use instancepool for storage nodes so that we can keep the existing
+  # day 2 operations how-tos (scaling disk etc.).
+  use_instancepool = false
 }
diff --git a/worker.tf b/worker.tf
@@ -26,7 +26,8 @@ module "worker" {
 
   security_group_ids = concat(
     var.additional_security_group_ids,
-    [exoscale_security_group.all_machines.id]
+    [exoscale_security_group.all_machines.id],
+    var.use_instancepools ? [exoscale_security_group.worker.id] : []
   )
 
   affinity_group_capacity       = var.affinity_group_capacity
@@ -35,6 +36,8 @@ module "worker" {
   deploy_target_id = var.deploy_target_id
 
   bootstrap_bucket = var.bootstrap_bucket
+
+  use_instancepool = var.use_instancepools
 }
 
 // Additional worker groups.
@@ -71,7 +74,8 @@ module "additional_worker" {
 
   security_group_ids = concat(
     var.additional_security_group_ids,
-    [exoscale_security_group.all_machines.id]
+    [exoscale_security_group.all_machines.id],
+    var.use_instancepools ? [exoscale_security_group.worker.id] : []
   )
 
   affinity_group_capacity = var.affinity_group_capacity
@@ -83,4 +87,6 @@ module "additional_worker" {
   deploy_target_id = var.deploy_target_id
 
   bootstrap_bucket = var.bootstrap_bucket
+
+  use_instancepool = true
 }