From 2de32c7038f4b313d50ab67f2942e90e4374f599 Mon Sep 17 00:00:00 2001 From: Georgiana Dolocan Date: Fri, 22 Nov 2024 13:40:03 +0200 Subject: [PATCH 1/6] Add gpu prefix --- eksctl/nasa-cryo.jsonnet | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/eksctl/nasa-cryo.jsonnet b/eksctl/nasa-cryo.jsonnet index 44fa7f5ed..d4e84030e 100644 --- a/eksctl/nasa-cryo.jsonnet +++ b/eksctl/nasa-cryo.jsonnet @@ -63,7 +63,7 @@ local notebookNodes = [ }, { instanceType: "g4dn.xlarge", - namePrefix: "staging", + namePrefix: "gpu-staging", labels+: { "2i2c/hub-name": "staging" }, tags+: { "2i2c:hub-name": "staging", @@ -78,7 +78,7 @@ local notebookNodes = [ }, { instanceType: "g4dn.xlarge", - namePrefix: "prod", + namePrefix: "gpu-prod", labels+: { "2i2c/hub-name": "prod" }, tags+: { "2i2c:hub-name": "prod", From 6d95aaa7411eb9e76df1b46ea2979074fd93f8c4 Mon Sep 17 00:00:00 2001 From: Georgiana Dolocan Date: Fri, 22 Nov 2024 13:52:22 +0200 Subject: [PATCH 2/6] Switch nb prefix for gpu --- eksctl/2i2c-aws-us.jsonnet | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/eksctl/2i2c-aws-us.jsonnet b/eksctl/2i2c-aws-us.jsonnet index ba1a4797d..ae24d11c6 100644 --- a/eksctl/2i2c-aws-us.jsonnet +++ b/eksctl/2i2c-aws-us.jsonnet @@ -84,7 +84,7 @@ local notebookNodes = [ }, { instanceType: "g4dn.xlarge", - namePrefix: "nb-showcase", + namePrefix: "gpu-showcase", minSize: 0, labels+: { "2i2c/hub-name": "showcase" }, tags+: { @@ -119,7 +119,7 @@ local notebookNodes = [ }, { instanceType: "g4dn.xlarge", - namePrefix: "nb-ncar-cisl", + namePrefix: "gpu-ncar-cisl", minSize: 0, labels+: { "2i2c/hub-name": "ncar-cisl" }, tags+: { From 86ed62c5397f215fcfd6011bc699280831d62803 Mon Sep 17 00:00:00 2001 From: Georgiana Dolocan Date: Fri, 22 Nov 2024 14:01:18 +0200 Subject: [PATCH 3/6] Update gpu docs to include prefix, per hub split and cost allocation importance --- docs/howto/features/gpu.md | 9 +++++++-- eksctl/gridsst.jsonnet | 4 ++-- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/docs/howto/features/gpu.md b/docs/howto/features/gpu.md index b1ef83aa7..236762830 100644 --- a/docs/howto/features/gpu.md +++ b/docs/howto/features/gpu.md @@ -112,8 +112,12 @@ AWS, and we can configure a node group there to provide us GPUs. ``` { instanceType: "g4dn.xlarge", + namePrefix: "gpu-{{hub-name}}", + minSize: 0, + labels+: { "2i2c/hub-name": "{{hub-name}}" }, tags+: { - "k8s.io/cluster-autoscaler/node-template/resources/nvidia.com/gpu": "1" + "k8s.io/cluster-autoscaler/node-template/resources/nvidia.com/gpu": "1", + "2i2c:hub-name": "{{hub-name}}", }, taints+: { "nvidia.com/gpu": "present:NoSchedule" @@ -126,7 +130,8 @@ AWS, and we can configure a node group there to provide us GPUs. `g4dn.xlarge` gives us 1 Nvidia T4 GPU and ~4 CPUs. The `tags` definition is necessary to let the autoscaler know that this nodegroup has - 1 GPU per node. The `taints` definition is required to prevent scheduling of + 1 GPU per node and also for the cost attribution system to differentiate + between hubs. The `taints` definition is required to prevent scheduling of non-GPU pods onto the GPU nodes. If you're using a different machine type with more GPUs, adjust this definition accordingly. diff --git a/eksctl/gridsst.jsonnet b/eksctl/gridsst.jsonnet index 9e33e3ebc..cd575a072 100644 --- a/eksctl/gridsst.jsonnet +++ b/eksctl/gridsst.jsonnet @@ -132,7 +132,7 @@ local notebookNodes = [ { instanceType: "g4dn.xlarge", minSize: 0, - namePrefix: "nb-staging", + namePrefix: "gpu-staging", labels+: { "2i2c/hub-name": "staging" }, tags+: { "2i2c:hub-name": "staging", @@ -148,7 +148,7 @@ local notebookNodes = [ { instanceType: "g4dn.xlarge", minSize: 0, - namePrefix: "nb-prod", + namePrefix: "gpu-prod", labels+: { "2i2c/hub-name": "prod" }, tags+: { "2i2c:hub-name": "prod", From e3914eb3590bed8501547ada436718e91b77c71f Mon Sep 17 00:00:00 2001 From: Georgiana Dolocan Date: Fri, 22 Nov 2024 14:18:15 +0200 Subject: [PATCH 4/6] kitware: wwitch nb prefix for gpu --- eksctl/kitware.jsonnet | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/eksctl/kitware.jsonnet b/eksctl/kitware.jsonnet index 312e4f40b..e8dae2227 100644 --- a/eksctl/kitware.jsonnet +++ b/eksctl/kitware.jsonnet @@ -63,7 +63,7 @@ local notebookNodes = [ }, { instanceType: "g4dn.xlarge", - namePrefix: "nb-staging", + namePrefix: "gpu-staging", labels+: { "2i2c/hub-name": "staging" }, tags+: { "2i2c:hub-name": "staging", @@ -78,7 +78,7 @@ local notebookNodes = [ }, { instanceType: "g4dn.xlarge", - namePrefix: "nb-prod", + namePrefix: "gpu-prod", labels+: { "2i2c/hub-name": "prod" }, tags+: { "2i2c:hub-name": "prod", From c5d5fdef5bdd1eab04c3c6aa4259d3e7e38ccb17 Mon Sep 17 00:00:00 2001 From: Georgiana Dolocan Date: Fri, 22 Nov 2024 14:23:11 +0200 Subject: [PATCH 5/6] smithsonian: switch nb prefix for gpu --- eksctl/smithsonian.jsonnet | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/eksctl/smithsonian.jsonnet b/eksctl/smithsonian.jsonnet index 732dcf323..67bb34e2b 100644 --- a/eksctl/smithsonian.jsonnet +++ b/eksctl/smithsonian.jsonnet @@ -63,7 +63,7 @@ local notebookNodes = [ }, { instanceType: "g4dn.xlarge", - namePrefix: "nb-staging", + namePrefix: "gpu-staging", labels+: { "2i2c/hub-name": "staging" }, tags+: { "2i2c:hub-name": "staging", @@ -78,7 +78,7 @@ local notebookNodes = [ }, { instanceType: "g4dn.xlarge", - namePrefix: "nb-prod", + namePrefix: "gpu-prod", labels+: { "2i2c/hub-name": "prod" }, tags+: { "2i2c:hub-name": "prod", From 7865fce4ae7dbf5459daa6a285518d58b071492f Mon Sep 17 00:00:00 2001 From: Georgiana Dolocan Date: Fri, 22 Nov 2024 14:34:52 +0200 Subject: [PATCH 6/6] jmte: switch nb prefix for gpu --- eksctl/jupyter-meets-the-earth.jsonnet | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/eksctl/jupyter-meets-the-earth.jsonnet b/eksctl/jupyter-meets-the-earth.jsonnet index d5977273e..2396f2bd0 100644 --- a/eksctl/jupyter-meets-the-earth.jsonnet +++ b/eksctl/jupyter-meets-the-earth.jsonnet @@ -112,7 +112,7 @@ local notebookNodes = [ { instanceType: "g4dn.xlarge", minSize: 0, - namePrefix: "nb-staging", + namePrefix: "gpu-staging", labels+: { "2i2c/hub-name": "staging" }, tags+: { "2i2c:hub-name": "staging", @@ -125,7 +125,7 @@ local notebookNodes = [ { instanceType: "g4dn.xlarge", minSize: 0, - namePrefix: "nb-prod", + namePrefix: "gpu-prod", labels+: { "2i2c/hub-name": "prod" }, tags+: { "2i2c:hub-name": "prod", @@ -138,7 +138,7 @@ local notebookNodes = [ { instanceType: "g4dn.4xlarge", minSize: 0, - namePrefix: "nb-staging", + namePrefix: "gpu-staging", labels+: { "2i2c/hub-name": "staging" }, tags+: { "2i2c:hub-name": "staging", @@ -151,7 +151,7 @@ local notebookNodes = [ { instanceType: "g4dn.4xlarge", minSize: 0, - namePrefix: "nb-prod", + namePrefix: "gpu-prod", labels+: { "2i2c/hub-name": "prod" }, tags+: { "2i2c:hub-name": "prod", @@ -164,7 +164,7 @@ local notebookNodes = [ { instanceType: "g4dn.16xlarge", minSize: 0, - namePrefix: "nb-staging", + namePrefix: "gpu-staging", labels+: { "2i2c/hub-name": "staging" }, taints+: { "nvidia.com/gpu": "NoSchedule" @@ -177,7 +177,7 @@ local notebookNodes = [ { instanceType: "g4dn.16xlarge", minSize: 0, - namePrefix: "nb-prod", + namePrefix: "gpu-prod", labels+: { "2i2c/hub-name": "prod" }, taints+: { "nvidia.com/gpu": "NoSchedule"