From 2fc3debd0264aa3eb03d70e1602fa41e7c360b09 Mon Sep 17 00:00:00 2001 From: Erik Sundell Date: Tue, 9 Jan 2024 09:01:41 +0100 Subject: [PATCH 1/8] terraform, azure: increase misc config resolution to help during upgrades --- terraform/azure/main.tf | 33 ++++++++++++++---------- terraform/azure/projects/utoronto.tfvars | 21 ++++++++++++--- terraform/azure/variables.tf | 29 +++++++++++---------- 3 files changed, 53 insertions(+), 30 deletions(-) diff --git a/terraform/azure/main.tf b/terraform/azure/main.tf index ce605763ac..8f8f94d0cf 100644 --- a/terraform/azure/main.tf +++ b/terraform/azure/main.tf @@ -93,22 +93,31 @@ resource "azurerm_kubernetes_cluster" "jupyterhub" { } } - # Core node-pool + # default_node_pool must be set, and it must be a node pool of system type + # that can't scale to zero. Due to that we are forced to use it, and have + # decided to use it as our core node pool. + # + # Most changes to this node pool forces a replace operation on the entire + # cluster. This can be avoided with v3.47.0+ of this provider by declaring + # temporary_name_for_rotation = "core-b". + # + # ref: https://github.com/hashicorp/terraform-provider-azurerm/pull/20628 + # ref: https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/resources/kubernetes_cluster#temporary_name_for_rotation. + # default_node_pool { - # Unfortunately, changing anything about VM type / size recreates *whole cluster - name = "core" - vm_size = var.core_node_vm_size - os_disk_size_gb = 40 + name = var.core_node_pool.name + vm_size = var.core_node_pool.vm_size + os_disk_size_gb = var.core_node_pool.os_disk_size_gb enable_auto_scaling = true - min_count = 1 - max_count = 10 + min_count = var.core_node_pool.min + max_count = var.core_node_pool.max vnet_subnet_id = azurerm_subnet.node_subnet.id - node_labels = { + node_labels = merge({ "hub.jupyter.org/node-purpose" = "core", "k8s.dask.org/node-purpose" = "core" - } + }, var.core_node_pool.labels) - orchestrator_version = var.kubernetes_version + orchestrator_version = coalesce(var.core_node_pool.kubernetes_version, var.kubernetes_version) } auto_scaler_profile { @@ -140,7 +149,7 @@ resource "azurerm_kubernetes_cluster" "jupyterhub" { resource "azurerm_kubernetes_cluster_node_pool" "user_pool" { for_each = var.notebook_nodes - name = "nb${each.key}" + name = coalesce(each.value.name, each.key) kubernetes_cluster_id = azurerm_kubernetes_cluster.jupyterhub.id enable_auto_scaling = true os_disk_size_gb = 200 @@ -152,7 +161,6 @@ resource "azurerm_kubernetes_cluster_node_pool" "user_pool" { node_labels = merge({ "hub.jupyter.org/node-purpose" = "user", "k8s.dask.org/node-purpose" = "scheduler" - "hub.jupyter.org/node-size" = each.value.vm_size }, each.value.labels) node_taints = concat([ @@ -180,7 +188,6 @@ resource "azurerm_kubernetes_cluster_node_pool" "dask_pool" { vm_size = each.value.vm_size node_labels = merge({ "k8s.dask.org/node-purpose" = "worker", - "hub.jupyter.org/node-size" = each.value.vm_size }, each.value.labels) node_taints = concat([ diff --git a/terraform/azure/projects/utoronto.tfvars b/terraform/azure/projects/utoronto.tfvars index eaf3d01c87..8c3eeaddaf 100644 --- a/terraform/azure/projects/utoronto.tfvars +++ b/terraform/azure/projects/utoronto.tfvars @@ -13,16 +13,31 @@ ssh_pub_key = "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDQJ4h39UYNi1wybxAH+jCFkNK2 # kubernetes_version = "1.26.3" -# FIXME: upgrade core_node_vm_size to Standard_E4s_v5 -core_node_vm_size = "Standard_E4s_v3" +core_node_pool = { + name : "core", + vm_size : "Standard_E4s_v3", + kubernetes_version : "1.26.3", +} notebook_nodes = { "default" : { + name : "nbdefault", # NOTE: min-max below was set to 0-86 retroactively to align with # observed state without understanding on why 0-86 was picked. min : 0, max : 86, # FIXME: upgrade user nodes vm_size to Standard_E8s_v5 vm_size : "Standard_E8s_v3", - } + # FIXME: remove this label + labels : { + "hub.jupyter.org/node-size" = "Standard_E8s_v3", + }, + kubernetes_version : "1.26.3", + }, + #"usere8sv5" : { + # min : 0, + # max : 100, + # vm_size : "Standard_E8s_v5", + # kubernetes_version : "1.28.3", + #} } diff --git a/terraform/azure/variables.tf b/terraform/azure/variables.tf index b9cd943680..a7982e1c55 100644 --- a/terraform/azure/variables.tf +++ b/terraform/azure/variables.tf @@ -48,20 +48,6 @@ variable "kubernetes_version" { } -variable "core_node_vm_size" { - type = string - description = <<-EOT - VM Size to use for core nodes - - Core nodes will always be on, and count as 'base cost' - for a cluster. We should try to run with as few of them - as possible. - - WARNING: CHANGING THIS WILL DESTROY AND RECREATE THE CLUSTER! - EOT -} - - variable "global_container_registry_name" { type = string description = <<-EOT @@ -92,8 +78,23 @@ variable "ssh_pub_key" { EOT } +variable "core_node_pool" { + type = object({ + name : optional(string, ""), + min : optional(number, 1), + max : optional(number, 10), + vm_size : string, + labels : optional(map(string), {}), + taints : optional(list(string), []), + os_disk_size_gb : optional(number, 40), + kubernetes_version : optional(string, "") + }) + description = "Core node pool" +} + variable "notebook_nodes" { type = map(object({ + name : optional(string, ""), min : number, max : number, vm_size : string, From 7eb118d15445f38adeac13b992e0eae77e3b4034 Mon Sep 17 00:00:00 2001 From: Erik Sundell Date: Tue, 9 Jan 2024 09:02:26 +0100 Subject: [PATCH 2/8] terraform, utoronto: upgrade from 1.26.3 to 1.27.7 --- terraform/azure/projects/utoronto.tfvars | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/terraform/azure/projects/utoronto.tfvars b/terraform/azure/projects/utoronto.tfvars index 8c3eeaddaf..d06c6b8007 100644 --- a/terraform/azure/projects/utoronto.tfvars +++ b/terraform/azure/projects/utoronto.tfvars @@ -11,7 +11,7 @@ ssh_pub_key = "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDQJ4h39UYNi1wybxAH+jCFkNK2 # FIXME: upgrade to 1.27.7, and then 1.28.3, based on the latest versions # available via: az aks get-versions --location westus2 -o table # -kubernetes_version = "1.26.3" +kubernetes_version = "1.27.7" core_node_pool = { name : "core", From 04f93699993d344deeea7e20a45fe640f486901d Mon Sep 17 00:00:00 2001 From: Erik Sundell Date: Tue, 9 Jan 2024 17:41:00 +0100 Subject: [PATCH 3/8] terraform, azure and utoronto: k8s upgrade with misc variables to support it --- terraform/azure/main.tf | 23 +++++++----- terraform/azure/projects/utoronto.tfvars | 47 ++++++++++++++++++------ terraform/azure/variables.tf | 19 ++++++---- 3 files changed, 60 insertions(+), 29 deletions(-) diff --git a/terraform/azure/main.tf b/terraform/azure/main.tf index 8f8f94d0cf..5777b68340 100644 --- a/terraform/azure/main.tf +++ b/terraform/azure/main.tf @@ -99,7 +99,7 @@ resource "azurerm_kubernetes_cluster" "jupyterhub" { # # Most changes to this node pool forces a replace operation on the entire # cluster. This can be avoided with v3.47.0+ of this provider by declaring - # temporary_name_for_rotation = "core-b". + # temporary_name_for_rotation = "coreb". # # ref: https://github.com/hashicorp/terraform-provider-azurerm/pull/20628 # ref: https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/resources/kubernetes_cluster#temporary_name_for_rotation. @@ -108,9 +108,11 @@ resource "azurerm_kubernetes_cluster" "jupyterhub" { name = var.core_node_pool.name vm_size = var.core_node_pool.vm_size os_disk_size_gb = var.core_node_pool.os_disk_size_gb - enable_auto_scaling = true - min_count = var.core_node_pool.min - max_count = var.core_node_pool.max + enable_auto_scaling = var.core_node_pool.enable_auto_scaling + min_count = var.core_node_pool.enable_auto_scaling ? var.core_node_pool.min : null + max_count = var.core_node_pool.enable_auto_scaling ? var.core_node_pool.max : null + node_count = var.core_node_pool.node_count + kubelet_disk_type = var.core_node_pool.kubelet_disk_type vnet_subnet_id = azurerm_subnet.node_subnet.id node_labels = merge({ "hub.jupyter.org/node-purpose" = "core", @@ -147,13 +149,14 @@ resource "azurerm_kubernetes_cluster" "jupyterhub" { resource "azurerm_kubernetes_cluster_node_pool" "user_pool" { - for_each = var.notebook_nodes + for_each = var.user_node_pools name = coalesce(each.value.name, each.key) kubernetes_cluster_id = azurerm_kubernetes_cluster.jupyterhub.id enable_auto_scaling = true - os_disk_size_gb = 200 + os_disk_size_gb = each.value.os_disk_size_gb vnet_subnet_id = azurerm_subnet.node_subnet.id + kubelet_disk_type = each.value.kubelet_disk_type orchestrator_version = each.value.kubernetes_version == "" ? var.kubernetes_version : each.value.kubernetes_version @@ -173,14 +176,14 @@ resource "azurerm_kubernetes_cluster_node_pool" "user_pool" { } resource "azurerm_kubernetes_cluster_node_pool" "dask_pool" { - # If dask_nodes is set, we use that. If it isn't, we use notebook_nodes. - # This lets us set dask_nodes to an empty array to get no dask nodes - for_each = var.dask_nodes + # If dask_node_pools is set, we use that. If it isn't, we use user_node_pools. + # This lets us set dask_node_pools to an empty array to get no dask nodes + for_each = var.dask_node_pools name = "dask${each.key}" kubernetes_cluster_id = azurerm_kubernetes_cluster.jupyterhub.id enable_auto_scaling = true - os_disk_size_gb = 200 + os_disk_size_gb = each.value.os_disk_size_gb vnet_subnet_id = azurerm_subnet.node_subnet.id orchestrator_version = each.value.kubernetes_version == "" ? var.kubernetes_version : each.value.kubernetes_version diff --git a/terraform/azure/projects/utoronto.tfvars b/terraform/azure/projects/utoronto.tfvars index d06c6b8007..c4246f9bf6 100644 --- a/terraform/azure/projects/utoronto.tfvars +++ b/terraform/azure/projects/utoronto.tfvars @@ -8,18 +8,40 @@ location = "canadacentral" storage_size = 8192 ssh_pub_key = "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDQJ4h39UYNi1wybxAH+jCFkNK2aqRcuhDkQSMx0Hak5xkbt3KnT3cOwAgUP1Vt/SjhltSTuxpOHxiAKCRnjwRk60SxKhUNzPHih2nkfYTmBBjmLfdepDPSke/E0VWvTDIEXz/L8vW8aI0QGPXnXyqzEDO9+U1buheBlxB0diFAD3vEp2SqBOw+z7UgrGxXPdP+2b3AV+X6sOtd6uSzpV8Qvdh+QAkd4r7h9JrkFvkrUzNFAGMjlTb0Lz7qAlo4ynjEwzVN2I1i7cVDKgsGz9ZG/8yZfXXx+INr9jYtYogNZ63ajKR/dfjNPovydhuz5zQvQyxpokJNsTqt1CiWEUNj georgiana@georgiana" -# FIXME: upgrade to 1.27.7, and then 1.28.3, based on the latest versions -# available via: az aks get-versions --location westus2 -o table -# -kubernetes_version = "1.27.7" +# List available versions via: az aks get-versions --location westus2 -o table +kubernetes_version = "1.28.3" core_node_pool = { name : "core", + kubernetes_version : "1.28.3", + + # FIXME: transition to "Standard_E2s_v5" nodes as they are large enough and + # can more cheaply handle being forced to have 2-3 replicas for silly + # reasons like three calico-typha pods. See + # https://github.com/2i2c-org/infrastructure/issues/3592#issuecomment-1883269632. + # + # Transitioning to E2s_v5 would require reducing the requested memory + # by prometheus-server though, but that should be okay since + # prometheus has reduced its memory profile significant enough recently. + # vm_size : "Standard_E4s_v3", - kubernetes_version : "1.26.3", + + # FIXME: stop using persistent disks for the nodes, use the variable default + # "Temporary" instead + kubelet_disk_type : "OS", + + # FIXME: use a larger os_disk_size_gb than 40, like the default of 100, to + # avoid running low when few replicas are used + os_disk_size_gb : 40, + + # FIXME: its nice to use autoscaling, but we end up with three replicas due to + # https://github.com/2i2c-org/infrastructure/issues/3592#issuecomment-1883269632 + # and its a waste at least using Standard_E4s_v3 machines. + enable_auto_scaling : false, + node_count : 2, } -notebook_nodes = { +user_node_pools = { "default" : { name : "nbdefault", # NOTE: min-max below was set to 0-86 retroactively to align with @@ -33,11 +55,12 @@ notebook_nodes = { "hub.jupyter.org/node-size" = "Standard_E8s_v3", }, kubernetes_version : "1.26.3", + # FIXME: stop using persistent disks for the nodes, use Temporary instead + kubelet_disk_type : "OS", }, - #"usere8sv5" : { - # min : 0, - # max : 100, - # vm_size : "Standard_E8s_v5", - # kubernetes_version : "1.28.3", - #} + "usere8sv5" : { + min : 0, + max : 100, + vm_size : "Standard_E8s_v5", + } } diff --git a/terraform/azure/variables.tf b/terraform/azure/variables.tf index a7982e1c55..dda5c1fab6 100644 --- a/terraform/azure/variables.tf +++ b/terraform/azure/variables.tf @@ -81,18 +81,21 @@ variable "ssh_pub_key" { variable "core_node_pool" { type = object({ name : optional(string, ""), + enable_auto_scaling = optional(bool, true), min : optional(number, 1), max : optional(number, 10), + node_count : optional(number), vm_size : string, labels : optional(map(string), {}), taints : optional(list(string), []), - os_disk_size_gb : optional(number, 40), - kubernetes_version : optional(string, "") + os_disk_size_gb : optional(number, 100), + kubernetes_version : optional(string, ""), + kubelet_disk_type : optional(string, "Temporary"), }) description = "Core node pool" } -variable "notebook_nodes" { +variable "user_node_pools" { type = map(object({ name : optional(string, ""), min : number, @@ -100,20 +103,22 @@ variable "notebook_nodes" { vm_size : string, labels : optional(map(string), {}), taints : optional(list(string), []), - kubernetes_version : optional(string, "") + os_disk_size_gb : optional(number, 200), + kubernetes_version : optional(string, ""), + kubelet_disk_type : optional(string, "Temporary"), })) - description = "Notebook node pools to create" + description = "User node pools to create" default = {} } -variable "dask_nodes" { +variable "dask_node_pools" { type = map(object({ min : number, max : number, vm_size : string, labels : optional(map(string), {}), taints : optional(list(string), []), - kubernetes_version : optional(string, "") + kubernetes_version : optional(string, ""), })) description = "Dask node pools to create" default = {} From ed82584f65c608251bf0c86723da688b072bfa23 Mon Sep 17 00:00:00 2001 From: Erik Sundell Date: Tue, 9 Jan 2024 17:42:07 +0100 Subject: [PATCH 4/8] terraform, utoronto: cleanup old user node pool --- terraform/azure/projects/utoronto.tfvars | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) diff --git a/terraform/azure/projects/utoronto.tfvars b/terraform/azure/projects/utoronto.tfvars index c4246f9bf6..361f726e2e 100644 --- a/terraform/azure/projects/utoronto.tfvars +++ b/terraform/azure/projects/utoronto.tfvars @@ -42,25 +42,9 @@ core_node_pool = { } user_node_pools = { - "default" : { - name : "nbdefault", - # NOTE: min-max below was set to 0-86 retroactively to align with - # observed state without understanding on why 0-86 was picked. - min : 0, - max : 86, - # FIXME: upgrade user nodes vm_size to Standard_E8s_v5 - vm_size : "Standard_E8s_v3", - # FIXME: remove this label - labels : { - "hub.jupyter.org/node-size" = "Standard_E8s_v3", - }, - kubernetes_version : "1.26.3", - # FIXME: stop using persistent disks for the nodes, use Temporary instead - kubelet_disk_type : "OS", - }, "usere8sv5" : { min : 0, max : 100, vm_size : "Standard_E8s_v5", - } + }, } From 9485b3ddff566541a9d9d9c5219ae23f5cc4befc Mon Sep 17 00:00:00 2001 From: Erik Sundell Date: Tue, 9 Jan 2024 18:06:48 +0100 Subject: [PATCH 5/8] terraform, utoronto: remove resolved comment This comment was relevant before https://github.com/2i2c-org/infrastructure/pull/3595, but not after. --- terraform/azure/projects/utoronto.tfvars | 4 ---- 1 file changed, 4 deletions(-) diff --git a/terraform/azure/projects/utoronto.tfvars b/terraform/azure/projects/utoronto.tfvars index 361f726e2e..fc51571045 100644 --- a/terraform/azure/projects/utoronto.tfvars +++ b/terraform/azure/projects/utoronto.tfvars @@ -20,10 +20,6 @@ core_node_pool = { # reasons like three calico-typha pods. See # https://github.com/2i2c-org/infrastructure/issues/3592#issuecomment-1883269632. # - # Transitioning to E2s_v5 would require reducing the requested memory - # by prometheus-server though, but that should be okay since - # prometheus has reduced its memory profile significant enough recently. - # vm_size : "Standard_E4s_v3", # FIXME: stop using persistent disks for the nodes, use the variable default From d02bc52b4653c5216821b0508ff77aef624fd612 Mon Sep 17 00:00:00 2001 From: Erik Sundell Date: Wed, 10 Jan 2024 09:18:39 +0100 Subject: [PATCH 6/8] terraform, azure: autoscaling core node pool and refactor to reduce duplication --- terraform/azure/main.tf | 118 +++++++++++------------ terraform/azure/projects/utoronto.tfvars | 66 +++++++------ terraform/azure/variables.tf | 45 ++------- 3 files changed, 102 insertions(+), 127 deletions(-) diff --git a/terraform/azure/main.tf b/terraform/azure/main.tf index 5777b68340..36e0b567d7 100644 --- a/terraform/azure/main.tf +++ b/terraform/azure/main.tf @@ -37,6 +37,7 @@ terraform { provider "azuread" { tenant_id = var.tenant_id } + provider "azurerm" { subscription_id = var.subscription_id features {} @@ -93,35 +94,6 @@ resource "azurerm_kubernetes_cluster" "jupyterhub" { } } - # default_node_pool must be set, and it must be a node pool of system type - # that can't scale to zero. Due to that we are forced to use it, and have - # decided to use it as our core node pool. - # - # Most changes to this node pool forces a replace operation on the entire - # cluster. This can be avoided with v3.47.0+ of this provider by declaring - # temporary_name_for_rotation = "coreb". - # - # ref: https://github.com/hashicorp/terraform-provider-azurerm/pull/20628 - # ref: https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/resources/kubernetes_cluster#temporary_name_for_rotation. - # - default_node_pool { - name = var.core_node_pool.name - vm_size = var.core_node_pool.vm_size - os_disk_size_gb = var.core_node_pool.os_disk_size_gb - enable_auto_scaling = var.core_node_pool.enable_auto_scaling - min_count = var.core_node_pool.enable_auto_scaling ? var.core_node_pool.min : null - max_count = var.core_node_pool.enable_auto_scaling ? var.core_node_pool.max : null - node_count = var.core_node_pool.node_count - kubelet_disk_type = var.core_node_pool.kubelet_disk_type - vnet_subnet_id = azurerm_subnet.node_subnet.id - node_labels = merge({ - "hub.jupyter.org/node-purpose" = "core", - "k8s.dask.org/node-purpose" = "core" - }, var.core_node_pool.labels) - - orchestrator_version = coalesce(var.core_node_pool.kubernetes_version, var.kubernetes_version) - } - auto_scaler_profile { skip_nodes_with_local_storage = true } @@ -131,7 +103,8 @@ resource "azurerm_kubernetes_cluster" "jupyterhub" { } network_profile { - # I don't trust Azure CNI + # Azure CNI is the default, but we don't trust it to be reliable, so we've + # opted to use kubenet instead network_plugin = "kubenet" network_policy = "calico" } @@ -144,68 +117,92 @@ resource "azurerm_kubernetes_cluster" "jupyterhub" { client_secret = azuread_service_principal_password.service_principal_password[0].value } } -} + # default_node_pool must be set, and it must be a node pool of system type + # that can't scale to zero. Due to that we are forced to use it, and have + # decided to use it as our core node pool. + # + # Most changes to this node pool forces a replace operation on the entire + # cluster. This can be avoided with v3.47.0+ of this provider by declaring + # temporary_name_for_rotation = "coreb". + # + # ref: https://github.com/hashicorp/terraform-provider-azurerm/pull/20628 + # ref: https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/resources/kubernetes_cluster#temporary_name_for_rotation. + # + default_node_pool { + name = var.node_pools["core"][0].name + vm_size = var.node_pools["core"][0].vm_size + os_disk_size_gb = var.node_pools["core"][0].os_disk_size_gb + kubelet_disk_type = var.node_pools["core"][0].kubelet_disk_type + enable_auto_scaling = true + min_count = var.node_pools["core"][0].min + max_count = var.node_pools["core"][0].max + + node_labels = merge({ + "hub.jupyter.org/node-purpose" = "core", + "k8s.dask.org/node-purpose" = "core" + }, var.node_pools["core"][0].labels) + node_taints = concat([], var.node_pools["core"][0].taints) + orchestrator_version = coalesce(var.node_pools["core"][0].kubernetes_version, var.kubernetes_version) -resource "azurerm_kubernetes_cluster_node_pool" "user_pool" { - for_each = var.user_node_pools + vnet_subnet_id = azurerm_subnet.node_subnet.id + } +} - name = coalesce(each.value.name, each.key) - kubernetes_cluster_id = azurerm_kubernetes_cluster.jupyterhub.id - enable_auto_scaling = true - os_disk_size_gb = each.value.os_disk_size_gb - vnet_subnet_id = azurerm_subnet.node_subnet.id - kubelet_disk_type = each.value.kubelet_disk_type - orchestrator_version = each.value.kubernetes_version == "" ? var.kubernetes_version : each.value.kubernetes_version +resource "azurerm_kubernetes_cluster_node_pool" "user_pool" { + for_each = { for i, v in var.node_pools["user"] : v.name => v } + + name = each.value.name + vm_size = each.value.vm_size + os_disk_size_gb = each.value.os_disk_size_gb + kubelet_disk_type = each.value.kubelet_disk_type + enable_auto_scaling = true + min_count = each.value.min + max_count = each.value.max - vm_size = each.value.vm_size node_labels = merge({ "hub.jupyter.org/node-purpose" = "user", "k8s.dask.org/node-purpose" = "scheduler" }, each.value.labels) - node_taints = concat([ "hub.jupyter.org_dedicated=user:NoSchedule" ], each.value.taints) + orchestrator_version = each.value.kubernetes_version == "" ? var.kubernetes_version : each.value.kubernetes_version - min_count = each.value.min - max_count = each.value.max + kubernetes_cluster_id = azurerm_kubernetes_cluster.jupyterhub.id + vnet_subnet_id = azurerm_subnet.node_subnet.id } -resource "azurerm_kubernetes_cluster_node_pool" "dask_pool" { - # If dask_node_pools is set, we use that. If it isn't, we use user_node_pools. - # This lets us set dask_node_pools to an empty array to get no dask nodes - for_each = var.dask_node_pools - name = "dask${each.key}" - kubernetes_cluster_id = azurerm_kubernetes_cluster.jupyterhub.id - enable_auto_scaling = true - os_disk_size_gb = each.value.os_disk_size_gb - vnet_subnet_id = azurerm_subnet.node_subnet.id +resource "azurerm_kubernetes_cluster_node_pool" "dask_pool" { + for_each = { for i, v in var.node_pools["dask"] : v.name => v } - orchestrator_version = each.value.kubernetes_version == "" ? var.kubernetes_version : each.value.kubernetes_version + name = each.value.name + vm_size = each.value.vm_size + os_disk_size_gb = each.value.os_disk_size_gb + kubelet_disk_type = each.value.kubelet_disk_type + enable_auto_scaling = true + min_count = each.value.min + max_count = each.value.max - vm_size = each.value.vm_size node_labels = merge({ "k8s.dask.org/node-purpose" = "worker", }, each.value.labels) - node_taints = concat([ "k8s.dask.org_dedicated=worker:NoSchedule" ], each.value.taints) + orchestrator_version = each.value.kubernetes_version == "" ? var.kubernetes_version : each.value.kubernetes_version - min_count = each.value.min - max_count = each.value.max + kubernetes_cluster_id = azurerm_kubernetes_cluster.jupyterhub.id + vnet_subnet_id = azurerm_subnet.node_subnet.id } -# AZure container registry resource "azurerm_container_registry" "container_registry" { - # meh, only alphanumberic chars. No separators. BE CONSISTENT, AZURE name = var.global_container_registry_name resource_group_name = azurerm_resource_group.jupyterhub.name location = azurerm_resource_group.jupyterhub.location @@ -213,6 +210,7 @@ resource "azurerm_container_registry" "container_registry" { admin_enabled = true } + locals { registry_creds = { "imagePullSecret" = { diff --git a/terraform/azure/projects/utoronto.tfvars b/terraform/azure/projects/utoronto.tfvars index fc51571045..e3c3bb8daa 100644 --- a/terraform/azure/projects/utoronto.tfvars +++ b/terraform/azure/projects/utoronto.tfvars @@ -11,36 +11,38 @@ ssh_pub_key = "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDQJ4h39UYNi1wybxAH+jCFkNK2 # List available versions via: az aks get-versions --location westus2 -o table kubernetes_version = "1.28.3" -core_node_pool = { - name : "core", - kubernetes_version : "1.28.3", - - # FIXME: transition to "Standard_E2s_v5" nodes as they are large enough and - # can more cheaply handle being forced to have 2-3 replicas for silly - # reasons like three calico-typha pods. See - # https://github.com/2i2c-org/infrastructure/issues/3592#issuecomment-1883269632. - # - vm_size : "Standard_E4s_v3", - - # FIXME: stop using persistent disks for the nodes, use the variable default - # "Temporary" instead - kubelet_disk_type : "OS", - - # FIXME: use a larger os_disk_size_gb than 40, like the default of 100, to - # avoid running low when few replicas are used - os_disk_size_gb : 40, - - # FIXME: its nice to use autoscaling, but we end up with three replicas due to - # https://github.com/2i2c-org/infrastructure/issues/3592#issuecomment-1883269632 - # and its a waste at least using Standard_E4s_v3 machines. - enable_auto_scaling : false, - node_count : 2, -} - -user_node_pools = { - "usere8sv5" : { - min : 0, - max : 100, - vm_size : "Standard_E8s_v5", - }, +node_pools = { + core : [ + { + name : "core", + + # FIXME: transition to "Standard_E2s_v5" nodes as they are large enough and + # can more cheaply handle being forced to have 2-3 replicas for silly + # reasons like three calico-typha pods. See + # https://github.com/2i2c-org/infrastructure/issues/3592#issuecomment-1883269632. + # + vm_size : "Standard_E4s_v3", + + os_disk_size_gb : 40, + + # FIXME: stop using persistent disks for the nodes, use the variable default + # "Temporary" instead + kubelet_disk_type : "OS", + + min : 1, + max : 10, + }, + ], + + user : [ + { + name : "usere8sv5", + vm_size : "Standard_E8s_v5", + os_disk_size_gb : 200, + min : 0, + max : 100, + }, + ], + + dask : [] } diff --git a/terraform/azure/variables.tf b/terraform/azure/variables.tf index dda5c1fab6..afeee2db6e 100644 --- a/terraform/azure/variables.tf +++ b/terraform/azure/variables.tf @@ -78,50 +78,25 @@ variable "ssh_pub_key" { EOT } -variable "core_node_pool" { - type = object({ - name : optional(string, ""), - enable_auto_scaling = optional(bool, true), - min : optional(number, 1), - max : optional(number, 10), - node_count : optional(number), +variable "node_pools" { + type = map(list(object({ + name : string, vm_size : string, - labels : optional(map(string), {}), - taints : optional(list(string), []), os_disk_size_gb : optional(number, 100), - kubernetes_version : optional(string, ""), kubelet_disk_type : optional(string, "Temporary"), - }) - description = "Core node pool" -} - -variable "user_node_pools" { - type = map(object({ - name : optional(string, ""), min : number, max : number, - vm_size : string, labels : optional(map(string), {}), taints : optional(list(string), []), - os_disk_size_gb : optional(number, 200), kubernetes_version : optional(string, ""), - kubelet_disk_type : optional(string, "Temporary"), - })) - description = "User node pools to create" - default = {} -} + }))) + description = <<-EOT + Node pools to create to be listed under the keys 'core', 'user', and 'dask'. -variable "dask_node_pools" { - type = map(object({ - min : number, - max : number, - vm_size : string, - labels : optional(map(string), {}), - taints : optional(list(string), []), - kubernetes_version : optional(string, ""), - })) - description = "Dask node pools to create" - default = {} + There should be exactly one core node pool. The core node pool is given a + special treatment by being listed directly in the cluster resource's + 'default_node_pool' field. + EOT } variable "create_service_principal" { From d6d89f53721960baf2c257051e360d17ccc25b7e Mon Sep 17 00:00:00 2001 From: Erik Sundell Date: Wed, 10 Jan 2024 09:51:21 +0100 Subject: [PATCH 7/8] utoronto, terraform: refine comments slightly --- terraform/azure/projects/utoronto.tfvars | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/terraform/azure/projects/utoronto.tfvars b/terraform/azure/projects/utoronto.tfvars index e3c3bb8daa..f7888250e5 100644 --- a/terraform/azure/projects/utoronto.tfvars +++ b/terraform/azure/projects/utoronto.tfvars @@ -1,3 +1,11 @@ +# IMPORTANT: Due to a restrictive network rule from storage.tf, we can't perform +# "terraform plan" or "terraform apply" without a workaround. +# +# One known workaround is to allow your public IP temporarily as +# discussed in https://github.com/2i2c-org/infrastructure/issues/890#issuecomment-1879072422. +# This workaround is problematic as that may temporarily allow access +# to storage by other actors with the same IP. +# tenant_id = "78aac226-2f03-4b4d-9037-b46d56c55210" subscription_id = "ead3521a-d994-4a44-a68d-b16e35642d5b" resourcegroup_name = "2i2c-utoronto-cluster" @@ -16,17 +24,20 @@ node_pools = { { name : "core", - # FIXME: transition to "Standard_E2s_v5" nodes as they are large enough and - # can more cheaply handle being forced to have 2-3 replicas for silly - # reasons like three calico-typha pods. See - # https://github.com/2i2c-org/infrastructure/issues/3592#issuecomment-1883269632. + # FIXME: Transition to "Standard_E2s_v5" nodes as they are large enough to + # for the biggest workload (prometheus-server) and can handle high + # availability requirements better. + # + # We are currently forced to handle three calico-typha pods that + # can't schedule on the same node, see https://github.com/2i2c-org/infrastructure/issues/3592#issuecomment-1883269632. # vm_size : "Standard_E4s_v3", + # core nodes doesn't need much disk space os_disk_size_gb : 40, - # FIXME: stop using persistent disks for the nodes, use the variable default - # "Temporary" instead + # FIXME: Stop using persistent disks for the nodes, use the variable default + # "Temporary" instead by removing this line. kubelet_disk_type : "OS", min : 1, From d0ad3a226d6ba25bdaecfef40ec13bdf1ea3185c Mon Sep 17 00:00:00 2001 From: Erik Sundell Date: Thu, 11 Jan 2024 08:54:10 +0100 Subject: [PATCH 8/8] terraform, azure: add node_pools variable validation --- terraform/azure/projects/utoronto.tfvars | 2 +- terraform/azure/variables.tf | 41 +++++++++++++++++------- 2 files changed, 31 insertions(+), 12 deletions(-) diff --git a/terraform/azure/projects/utoronto.tfvars b/terraform/azure/projects/utoronto.tfvars index f7888250e5..6a552efa24 100644 --- a/terraform/azure/projects/utoronto.tfvars +++ b/terraform/azure/projects/utoronto.tfvars @@ -55,5 +55,5 @@ node_pools = { }, ], - dask : [] + dask : [], } diff --git a/terraform/azure/variables.tf b/terraform/azure/variables.tf index afeee2db6e..85856ea2e3 100644 --- a/terraform/azure/variables.tf +++ b/terraform/azure/variables.tf @@ -79,17 +79,21 @@ variable "ssh_pub_key" { } variable "node_pools" { - type = map(list(object({ - name : string, - vm_size : string, - os_disk_size_gb : optional(number, 100), - kubelet_disk_type : optional(string, "Temporary"), - min : number, - max : number, - labels : optional(map(string), {}), - taints : optional(list(string), []), - kubernetes_version : optional(string, ""), - }))) + type = map( + list( + object({ + name : string, + vm_size : string, + os_disk_size_gb : optional(number, 100), + kubelet_disk_type : optional(string, "Temporary"), + min : number, + max : number, + labels : optional(map(string), {}), + taints : optional(list(string), []), + kubernetes_version : optional(string, ""), + }) + ) + ) description = <<-EOT Node pools to create to be listed under the keys 'core', 'user', and 'dask'. @@ -97,6 +101,21 @@ variable "node_pools" { special treatment by being listed directly in the cluster resource's 'default_node_pool' field. EOT + + validation { + condition = length(var.node_pools["core"]) == 1 + error_message = "The core node pool is mapped to the cluster resource's `default_node_pool`, due to this we require exactly one core node pool to be specified." + } + + validation { + condition = length(setsubtract(keys(var.node_pools), ["core", "user", "dask"])) == 0 + error_message = "Only three kinds of node pools supported: 'core', 'user', and 'dask'." + } + + validation { + condition = length(setintersection(keys(var.node_pools), ["core", "user", "dask"])) == 3 + error_message = "All three kinds of node pools ('core', 'user', and 'dask') must be declared, even if they are empty lists of node pools." + } } variable "create_service_principal" {