Skip to content

Commit

Permalink
Merge pull request #3344 from consideRatio/pr/dask-options
Browse files Browse the repository at this point in the history
daskhub: provide worker resource options for 16CPU/128GB nodes on GKE/EKS
  • Loading branch information
consideRatio authored Nov 20, 2023
2 parents b39e0c1 + 8ac8085 commit 8b3d7d2
Show file tree
Hide file tree
Showing 4 changed files with 151 additions and 78 deletions.
62 changes: 0 additions & 62 deletions config/clusters/jupyter-meets-the-earth/common.values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -284,65 +284,3 @@ dask-gateway:
memory:
request: 2G
limit: 500G

# Note that we are overriding options provided in 2i2c's helm chart that has
# default values for these config entries.
#
extraConfig:
# This configuration represents options that can be presented to users
# that want to create a Dask cluster using dask-gateway. For more
# details, see https://gateway.dask.org/cluster-options.html
#
# The goal is to provide a simple configuration that allow the user some
# flexibility while also fitting well well on AWS nodes that are all
# having 1:4 ratio between CPU and GB of memory. By providing the
# username label, we help administrators to track user pods.
option_handler: |
from dask_gateway_server.options import Options, Select, String, Mapping
def cluster_options(user):
def option_handler(options):
if ":" not in options.image:
raise ValueError("When specifying an image you must also provide a tag")
extra_labels = {}
scheduler_extra_pod_annotations = {
"prometheus.io/scrape": "true",
"prometheus.io/port": "8787",
}
chosen_worker_cpu = int(options.worker_specification.split("CPU")[0])
chosen_worker_memory = 4 * chosen_worker_cpu
# We multiply the requests by a fraction to ensure that the
# worker fit well within a node that need some resources
# reserved for system pods.
return {
# A default image is suggested via DASK_GATEWAY__CLUSTER__OPTIONS__IMAGE env variable
"image": options.image,
"scheduler_extra_pod_labels": extra_labels,
"scheduler_extra_pod_annotations": scheduler_extra_pod_annotations,
"worker_extra_pod_labels": extra_labels,
"worker_cores": 0.85 * chosen_worker_cpu,
"worker_cores_limit": chosen_worker_cpu,
"worker_memory": "%fG" % (0.85 * chosen_worker_memory),
"worker_memory_limit": "%fG" % chosen_worker_memory,
"environment": options.environment,
}
return Options(
Select(
"worker_specification",
[
"1CPU, 4GB",
"2CPU, 8GB",
"4CPU, 16GB",
"8CPU, 32GB",
"16CPU, 64GB",
"32CPU, 128GB",
"64CPU, 256GB",
],
default="1CPU, 4GB",
label="Worker specification",
),
# The default image is set via DASK_GATEWAY__CLUSTER__OPTIONS__IMAGE env variable
String("image", label="Image"),
Mapping("environment", {}, label="Environment variables"),
handler=option_handler,
)
c.Backend.cluster_options = cluster_options
19 changes: 19 additions & 0 deletions helm-charts/basehub/templates/configmap-cluster-info.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
kind: ConfigMap
apiVersion: v1
metadata:
name: basehub-cluster-info
labels:
helm.sh/chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }}
app.kubernetes.io/name: basehub
app.kubernetes.io/instance: {{ .Release.Name }}
app.kubernetes.io/managed-by: {{ .Release.Service }}
data:
{{- $k8s_dist := "" }}
{{- if (.Capabilities.KubeVersion.Version | contains "gke") }}
{{- $k8s_dist = "gke" }}
{{- else if (.Capabilities.KubeVersion.Version | contains "eks") }}
{{- $k8s_dist = "eks" }}
{{- else }}
{{- $k8s_dist = "aks" }}
{{- end }}
K8S_DIST: {{ $k8s_dist }}
6 changes: 6 additions & 0 deletions helm-charts/basehub/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -461,6 +461,12 @@ jupyterhub:
- value: "/rstudio"
title: RStudio
description: An IDE For R, created by the RStudio company
extraEnv:
BASEHUB_K8S_DIST:
valueFrom:
configMapKeyRef:
name: basehub-cluster-info
key: K8S_DIST
initContainers:
- name: templates-clone
image: alpine/git:2.40.1
Expand Down
142 changes: 126 additions & 16 deletions helm-charts/daskhub/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -146,13 +146,32 @@ dask-gateway:
nodeSelector:
# Dask workers get their own pre-emptible pool
k8s.dask.org/node-purpose: worker
env:
- name: BASEHUB_K8S_DIST
valueFrom:
configMapKeyRef:
name: basehub-cluster-info
key: K8S_DIST

# TODO: figure out a replacement for userLimits.
extraConfig:
# This configuration represents options that can be presented to users
# that want to create a Dask cluster using dask-gateway client.
#
# This configuration is meant to enable the user to request dask worker
# pods that fits well on 2i2c's clusters. Currently the only kind of
# instance types used are n2-highmem-16 or r5.4xlarge.
#
# - Documentation about exposing cluster options to users:
# https://gateway.dask.org/cluster-options.html and the
# - Reference for KubeClusterConfig, which is what can be configured:
# https://gateway.dask.org/api-server.html#kubeclusterconfig.
#
optionHandler: |
from dask_gateway_server.options import Options, Integer, Float, String, Mapping
import os
import string
from dask_gateway_server.options import Integer, Mapping, Options, Select, String
# Escape a string to be dns-safe in the same way that KubeSpawner does it.
# Reference https://github.com/jupyterhub/kubespawner/blob/616f72c4aee26c3d2127c6af6086ec50d6cda383/kubespawner/spawner.py#L1828-L1835
# Adapted from https://github.com/minrk/escapism to avoid installing the package
Expand All @@ -177,40 +196,131 @@ dask-gateway:
chars.append(escaped_hex_char)
return u''.join(chars)
# Decide on available instance types and their resource allocation
# choices to expose based on cloud provider. For each daskhub hub
# managed by 2i2c, there should be these instance types available.
#
cloud_provider = os.environ["BASEHUB_K8S_DIST"] # gke, eks, or aks
instance_types = {
"gke": ["n2-highmem-16"],
"eks": ["r5.4xlarge"],
# 2i2c doesn't yet manage any dask-gateway installations on AKS, so
# this hasn't been configured yet and may cause an error - but that
# is good as we really should have this if we setup dask-gateway for
# AKS anyhow.
# aks: [],
}
# NOTE: Data mentioned below comes from manual inspection of data
# collected and currently only available at
# https://github.com/2i2c-org/infrastructure/pull/3337.
#
resource_allocations = {
# n2-highmem-16 nodes in our clusters have 15.89 allocatable cores
# and 116.549Gi allocatable memory, and daemonset are expected to
# not add more than 400m cores and 800Mi (0.781Gi) memory with some
# margin, so we get 15.49 cores and 115.768Gi available for worker
# pods to request.
#
# This is an initial conservative strategy, allowing a slight
# oversubscription of CPU but not any oversubscription of memory.
#
# To workaround https://github.com/dask/dask-gateway/issues/765, we
# round worker_cores down from [0.968, 1.936, 3.872, 7.745, 15.49]
# to [0.9, 1.9, 3.8, 7.7, 15.4].
#
"n2-highmem-16": {
"1CPU, 7.2Gi": {"worker_cores": 0.9, "worker_cores_limit": 1, "worker_memory": "7.235G", "worker_memory_limit": "7.235G"},
"2CPU, 14.5Gi": {"worker_cores": 1.9, "worker_cores_limit": 2, "worker_memory": "14.471G", "worker_memory_limit": "14.471G"},
"4CPU, 28.9Gi": {"worker_cores": 3.8, "worker_cores_limit": 4, "worker_memory": "28.942G", "worker_memory_limit": "28.942G"},
"8CPU, 57.9Gi": {"worker_cores": 7.7, "worker_cores_limit": 8, "worker_memory": "57.884G", "worker_memory_limit": "57.884G"},
"16CPU, 115.8Gi": {"worker_cores": 15.4, "worker_cores_limit": 16, "worker_memory": "115.768G", "worker_memory_limit": "115.768G"},
},
# r5.4xlarge nodes in our clusters have 15.89 allocatable cores and
# 121.504Gi allocatable memory, and daemonset are expected to not
# add more than 400m cores and 800Mi (0.781Gi) memory with some
# margin, so we get 15.49 cores and 120.723Gi available for worker
# pods to request.
#
# This is an initial conservative strategy, allowing a slight
# oversubscription of CPU but not any oversubscription of memory.
#
# To workaround https://github.com/dask/dask-gateway/issues/765, we
# round worker_cores down from [0.968, 1.936, 3.872, 7.745, 15.49]
# to [0.9, 1.9, 3.8, 7.7, 15.4].
#
"r5.4xlarge": {
"1CPU, 7.5Gi": {"worker_cores": 0.9, "worker_cores_limit": 1, "worker_memory": "7.545G", "worker_memory_limit": "7.545G"},
"2CPU, 15.1Gi": {"worker_cores": 1.9, "worker_cores_limit": 2, "worker_memory": "15.090G", "worker_memory_limit": "15.090G"},
"4CPU, 30.2Gi": {"worker_cores": 3.8, "worker_cores_limit": 4, "worker_memory": "30.180G", "worker_memory_limit": "30.180G"},
"8CPU, 60.4Gi": {"worker_cores": 7.7, "worker_cores_limit": 8, "worker_memory": "60.361G", "worker_memory_limit": "60.361G"},
"16CPU, 120.7Gi": {"worker_cores": 15.4, "worker_cores_limit": 16, "worker_memory": "120.723G", "worker_memory_limit": "120.723G"},
},
}
# for now we support only on one instance type per cluster, listing it
# as an option is a way to help convey how things work a bit better
it = instance_types[cloud_provider][0]
ra = resource_allocations[it]
ra_keys = list(ra.keys())
def cluster_options(user):
safe_username = escape_string_label_safe(user.name)
def option_handler(options):
if ":" not in options.image:
raise ValueError("When specifying an image you must also provide a tag")
extra_labels = {
"hub.jupyter.org/username": escape_string_label_safe(user.name),
}
scheduler_extra_pod_annotations = {
"hub.jupyter.org/username": safe_username,
"hub.jupyter.org/username": user.name,
"prometheus.io/scrape": "true",
"prometheus.io/port": "8787",
}
extra_labels = {
"hub.jupyter.org/username": safe_username,
worker_extra_pod_annotations = {
"hub.jupyter.org/username": user.name,
}
picked_ra = ra[options.worker_resource_allocation]
return {
"worker_cores_limit": options.worker_cores,
"worker_cores": options.worker_cores,
"worker_memory": "%fG" % options.worker_memory,
# A default image is suggested via DASK_GATEWAY__CLUSTER__OPTIONS__IMAGE env variable
"image": options.image,
"scheduler_extra_pod_annotations": scheduler_extra_pod_annotations,
"scheduler_extra_pod_labels": extra_labels,
"scheduler_extra_pod_annotations": scheduler_extra_pod_annotations,
"worker_extra_pod_labels": extra_labels,
"worker_extra_pod_annotations": worker_extra_pod_annotations,
"worker_cores": picked_ra["worker_cores"],
"worker_cores_limit": picked_ra["worker_cores_limit"],
"worker_memory": picked_ra["worker_memory"],
"worker_memory_limit": picked_ra["worker_memory_limit"],
"environment": options.environment,
"idle_timeout": options.idle_timeout_minutes * 60,
}
return Options(
Integer("worker_cores", 2, min=1, label="Worker Cores"),
Float("worker_memory", 4, min=1, label="Worker Memory (GiB)"),
# The default image is set via DASK_GATEWAY__CLUSTER__OPTIONS__IMAGE env variable
Select(
"instance_type",
[it],
default=it,
label="Instance type running worker containers",
),
Select(
"worker_resource_allocation",
ra_keys,
default=ra_keys[0],
label="Resources per worker container",
),
# The default image is pre-specified by the dask-gateway client
# via the env var DASK_GATEWAY__CLUSTER__OPTIONS__IMAGE set on
# the jupyterhub user pods
String("image", label="Image"),
Mapping("environment", {}, label="Environment Variables"),
Mapping("environment", {}, label="Environment variables (YAML)"),
Integer("idle_timeout_minutes", 30, min=0, label="Idle cluster terminated after (minutes)"),
handler=option_handler,
)
c.Backend.cluster_options = cluster_options
idle: |
# timeout after 30 minutes of inactivity
# timeout after 30 minutes of inactivity by default, keep this in sync
# with the user exposed option idle_timeout_minutes's default value
# configured above
c.KubeClusterConfig.idle_timeout = 1800
prefix: "/services/dask-gateway" # Users connect to the Gateway through the JupyterHub service.
auth:
Expand Down

0 comments on commit 8b3d7d2

Please sign in to comment.