diff --git a/helm-chart/ray-cluster/README.md b/helm-chart/ray-cluster/README.md index 17e35465f5f..e727314bb90 100644 --- a/helm-chart/ray-cluster/README.md +++ b/helm-chart/ray-cluster/README.md @@ -84,6 +84,7 @@ helm uninstall raycluster | head.labels | object | `{}` | Labels for the head pod | | head.serviceAccountName | string | `""` | | | head.restartPolicy | string | `""` | | +| head.runtimeClassName | string | `""` | runtimeClassName is the name of the RuntimeClass to use to run the head Pod. | | head.containerEnv | list | `[]` | | | head.envFrom | list | `[]` | envFrom to pass to head pod | | head.resources.limits.cpu | string | `"1"` | | @@ -113,6 +114,7 @@ helm uninstall raycluster | worker.labels | object | `{}` | Labels for the worker pod | | worker.serviceAccountName | string | `""` | | | worker.restartPolicy | string | `""` | | +| worker.runtimeClassName | string | `""` | runtimeClassName is the name of the RuntimeClass to use to run the worker Pods. | | worker.initContainers | list | `[]` | Init containers to add to the worker pod | | worker.containerEnv | list | `[]` | | | worker.envFrom | list | `[]` | envFrom to pass to worker pod | @@ -142,6 +144,7 @@ helm uninstall raycluster | additionalWorkerGroups.smallGroup.labels | object | `{}` | Labels for the additional worker pod | | additionalWorkerGroups.smallGroup.serviceAccountName | string | `""` | | | additionalWorkerGroups.smallGroup.restartPolicy | string | `""` | | +| additionalWorkerGroups.smallGroup.runtimeClassName | string | `""` | runtimeClassName for this additional worker group. Empty string means default runtime. | | additionalWorkerGroups.smallGroup.containerEnv | list | `[]` | | | additionalWorkerGroups.smallGroup.envFrom | list | `[]` | envFrom to pass to additional worker pod | | additionalWorkerGroups.smallGroup.resources.limits.cpu | int | `1` | | diff --git a/helm-chart/ray-cluster/templates/raycluster-cluster.yaml b/helm-chart/ray-cluster/templates/raycluster-cluster.yaml index 8ef6a860c15..b018b5f4de7 100644 --- a/helm-chart/ray-cluster/templates/raycluster-cluster.yaml +++ b/helm-chart/ray-cluster/templates/raycluster-cluster.yaml @@ -155,6 +155,9 @@ spec: {{- with .Values.head.restartPolicy }} restartPolicy: {{ . }} {{- end }} + {{- with .Values.head.runtimeClassName }} + runtimeClassName: {{ . }} + {{- end }} {{- with .Values.head.serviceAccountName }} serviceAccountName: {{ . }} {{- end }} @@ -281,6 +284,9 @@ spec: {{- with .Values.worker.restartPolicy }} restartPolicy: {{ . }} {{- end }} + {{- with .Values.worker.runtimeClassName }} + runtimeClassName: {{ . }} + {{- end }} {{- with .Values.worker.serviceAccountName }} serviceAccountName: {{ . }} {{- end }} @@ -408,6 +414,9 @@ spec: {{- with $values.restartPolicy }} restartPolicy: {{ . }} {{- end }} + {{- with $values.runtimeClassName }} + runtimeClassName: {{ . }} + {{- end }} {{- with $values.serviceAccountName }} serviceAccountName: {{ . }} {{- end }} diff --git a/helm-chart/ray-cluster/tests/raycluster_test.yaml b/helm-chart/ray-cluster/tests/raycluster_test.yaml index 4f931dc6c24..e3e5faa3776 100644 --- a/helm-chart/ray-cluster/tests/raycluster_test.yaml +++ b/helm-chart/ray-cluster/tests/raycluster_test.yaml @@ -2153,3 +2153,32 @@ tests: - equal: path: spec.workerGroupSpecs[?(@.groupName=="smallGroup")].template.spec.securityContext.fsGroup value: 3000 + + - it: Should set head runtimeClassName when `head.runtimeClassName` is set + set: + head: + runtimeClassName: nvidia + asserts: + - equal: + path: spec.headGroupSpec.template.spec.runtimeClassName + value: nvidia + + - it: Should set worker runtimeClassName when `worker.runtimeClassName` is set + set: + worker: + runtimeClassName: nvidia + asserts: + - equal: + path: spec.workerGroupSpecs[?(@.groupName=="workergroup")].template.spec.runtimeClassName + value: nvidia + + - it: Should set additional worker group runtimeClassName when `additionalWorkerGroups.smallGroup.runtimeClassName` is set + set: + additionalWorkerGroups: + smallGroup: + disabled: false + runtimeClassName: nvidia + asserts: + - equal: + path: spec.workerGroupSpecs[?(@.groupName=="smallGroup")].template.spec.runtimeClassName + value: nvidia diff --git a/helm-chart/ray-cluster/values.yaml b/helm-chart/ray-cluster/values.yaml index 2d460849432..8a21448f59b 100644 --- a/helm-chart/ray-cluster/values.yaml +++ b/helm-chart/ray-cluster/values.yaml @@ -98,6 +98,9 @@ head: serviceAccountName: "" restartPolicy: "" + # -- runtimeClassName is the name of the RuntimeClass to use to run the head Pod. + runtimeClassName: "" + # containerEnv specifies environment variables for the Ray container, # Follows standard K8s container env schema. containerEnv: [] @@ -211,6 +214,9 @@ worker: serviceAccountName: "" restartPolicy: "" + # -- runtimeClassName is the name of the RuntimeClass to use to run the worker Pods. + runtimeClassName: "" + # -- Init containers to add to the worker pod initContainers: [] @@ -319,6 +325,9 @@ additionalWorkerGroups: serviceAccountName: "" restartPolicy: "" + # -- runtimeClassName for this additional worker group. Empty string means default runtime. + runtimeClassName: "" + # containerEnv specifies environment variables for the Ray container, # Follows standard K8s container env schema. containerEnv: []