From c72ede4b6d4e41650d359012afb2f9731fbbe149 Mon Sep 17 00:00:00 2001 From: Drew Viles Date: Mon, 8 Jan 2024 14:12:18 +0000 Subject: [PATCH] fix: Added GPU node selector This prevents a GPU destined pod ending up on the non-gpu based nodes. This has been seen in the wild even though the resources request has a GPU specified. --- charts/dogkat/templates/gpu-pod.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/charts/dogkat/templates/gpu-pod.yaml b/charts/dogkat/templates/gpu-pod.yaml index 1817626..1b929da 100644 --- a/charts/dogkat/templates/gpu-pod.yaml +++ b/charts/dogkat/templates/gpu-pod.yaml @@ -14,13 +14,14 @@ metadata: {{- include "e2e-testing.gpu.labels" . | nindent 4 }} spec: restartPolicy: OnFailure + nodeSelector: + nvidia.com/gpu.present: "true" containers: - name: nvidia-vectoradd image: {{.Values.gpu.image.repo}}:{{.Values.gpu.image.tag}} terminationMessagePath: /dev/termination-log terminationMessagePolicy: File imagePullPolicy: IfNotPresent - {{- if .Values.gpu.resources }} resources: "nvidia.com/gpu": {{ .Values.gpu.numberOfGPUs }}