Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

terraform: switch to pd-balanced, reduce size to save cost #2102

Merged
merged 4 commits into from
Jan 25, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion terraform/modules/mybinder/versions.tf
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,5 @@ terraform {
version = "~> 3.0.0"
}
}
required_version = "~> 0.13"
required_version = "~> 1.1"
}
119 changes: 115 additions & 4 deletions terraform/prod/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ provider "google" {
}

locals {
gke_version = "1.17.14-gke.400"
gke_version = "1.19.14-gke.1900"
location = "us-central1" # for regional clusters
federation_members = ["gke-old", "gesis", "turing", "ovh"]
}
Expand All @@ -28,13 +28,64 @@ module "mybinder" {
}

# define node pools here, too hard to encode with variables
# note: when upgrading a node pool:
# 1. copy the pool to be upgraded and change the name
# 2. make the planned changes
# 3. deploy them with terraform
# 4. drain old pools (takes a while for user pools)
# 5. once drained, remove old pool(s) here
# 6. deploy again to remove old pool

resource "google_container_node_pool" "core" {
name = "core-202009"
cluster = module.mybinder.cluster_name
location = local.location # location of *cluster*
# node_locations lets us specify a single-zone regional cluster:
node_locations = ["${local.location}-a"]

autoscaling {
min_node_count = 0
max_node_count = 1
}

version = local.gke_version

node_config {
machine_type = "n1-highmem-4"
disk_size_gb = 250
disk_type = "pd-ssd"

labels = {
"mybinder.org/pool-type" = "core"
}
# https://www.terraform.io/docs/providers/google/r/container_cluster.html#oauth_scopes-1
oauth_scopes = [
"storage-ro",
"logging-write",
"monitoring",
]

metadata = {
disable-legacy-endpoints = "true"
}
}

# do not recreate pools that have been auto-upgraded

lifecycle {
ignore_changes = [
version
]
}
}

resource "google_container_node_pool" "core1" {
name = "core-202201"
cluster = module.mybinder.cluster_name
location = local.location # location of *cluster*
# node_locations lets us specify a single-zone regional cluster:
node_locations = ["${local.location}-a"]

autoscaling {
min_node_count = 1
max_node_count = 4
Expand All @@ -45,7 +96,7 @@ resource "google_container_node_pool" "core" {
node_config {
machine_type = "n1-highmem-4"
disk_size_gb = 250
disk_type = "pd-ssd"
disk_type = "pd-balanced"

labels = {
"mybinder.org/pool-type" = "core"
Expand All @@ -61,6 +112,14 @@ resource "google_container_node_pool" "core" {
disable-legacy-endpoints = "true"
}
}

# do not recreate pools that have been auto-upgraded

lifecycle {
ignore_changes = [
version
]
}
}

resource "google_container_node_pool" "user" {
Expand All @@ -72,8 +131,8 @@ resource "google_container_node_pool" "user" {
version = local.gke_version

autoscaling {
min_node_count = 2
max_node_count = 12
min_node_count = 0
max_node_count = 1
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should not trigger immediate scale-down. Autoscale doesn't force these bounds to be satisfied continuously. Instead, it should only prevent scale-up.

}


Expand All @@ -97,6 +156,58 @@ resource "google_container_node_pool" "user" {
disable-legacy-endpoints = "true"
}
}

# do not recreate pools that have been auto-upgraded

lifecycle {
ignore_changes = [
version
]
}
}

resource "google_container_node_pool" "user1" {
name = "user-202201"
cluster = module.mybinder.cluster_name
location = local.location # location of *cluster*
# node_locations lets us specify a single-zone regional cluster:
node_locations = ["${local.location}-a"]
version = local.gke_version

autoscaling {
min_node_count = 2
max_node_count = 12
}


node_config {
machine_type = "n1-highmem-8"
disk_size_gb = 800
disk_type = "pd-balanced"
local_ssd_count = 1

labels = {
"mybinder.org/pool-type" = "users"
}
# https://www.terraform.io/docs/providers/google/r/container_cluster.html#oauth_scopes-1
oauth_scopes = [
"storage-ro",
"logging-write",
"monitoring",
]

metadata = {
disable-legacy-endpoints = "true"
}
}

# do not recreate pools that have been auto-upgraded

lifecycle {
ignore_changes = [
version
]
}
}

# other prod-only resources, not required for both prod and staging,
Expand Down
10 changes: 8 additions & 2 deletions terraform/staging/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ provider "google" {
}

locals {
gke_version = "1.17.14-gke.400"
gke_version = "1.19.14-gke.1900"
}

module "mybinder" {
Expand Down Expand Up @@ -49,6 +49,13 @@ resource "google_container_node_pool" "pool" {
disable-legacy-endpoints = "true"
}
}
# do not recreate pools that have been auto-upgraded

lifecycle {
ignore_changes = [
version
]
}
}

# output "public_ip" {
Expand All @@ -66,4 +73,3 @@ output "matomo_password" {
value = module.mybinder.matomo_password
sensitive = true
}