Skip to content

Commit 6c9f859

Browse files
authored
[Feature] Support inject specific env vars to all Ray containers in all RayCluster CRs by configuration (#4103)
* [Feature] Inject specific env vars to all Ray containers in all RayCluster CRs Signed-off-by: win5923 <ken89@kimo.com> * Add to helm chart Signed-off-by: win5923 <ken89@kimo.com> * Apply Rueian's comments Signed-off-by: win5923 <ken89@kimo.com> * Change defaultRayEnvs to defaultContainerEnvs Signed-off-by: win5923 <ken89@kimo.com> --------- Signed-off-by: win5923 <ken89@kimo.com>
1 parent bbdff70 commit 6c9f859

File tree

12 files changed

+243
-19
lines changed

12 files changed

+243
-19
lines changed

helm-chart/kuberay-operator/README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,8 @@ spec:
166166
| logging.sizeLimit | string | `""` | EmptyDir volume size limit for kuberay-operator log file. |
167167
| batchScheduler.enabled | bool | `false` | |
168168
| batchScheduler.name | string | `""` | |
169+
| configuration.enabled | bool | `false` | Whether to enable the configuration feature. If enabled, a ConfigMap will be created and mounted to the operator. |
170+
| configuration.defaultContainerEnvs | list | `[]` | Default environment variables to inject into all Ray containers in all RayCluster CRs. This allows user to set feature flags across all Ray pods. Example: defaultContainerEnvs: - name: RAY_enable_open_telemetry value: "true" - name: RAY_metric_cardinality_level value: "recommended" |
169171
| featureGates[0].name | string | `"RayClusterStatusConditions"` | |
170172
| featureGates[0].enabled | bool | `true` | |
171173
| featureGates[1].name | string | `"RayJobDeletionPolicy"` | |
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
{{- if .Values.configuration.enabled }}
2+
apiVersion: v1
3+
kind: ConfigMap
4+
metadata:
5+
name: {{ include "kuberay-operator.deployment.name" . }}-config
6+
namespace: {{ .Release.Namespace }}
7+
labels:
8+
{{- include "kuberay-operator.labels" . | nindent 4 }}
9+
{{- with .Values.labels }}
10+
{{- toYaml . | nindent 4 }}
11+
{{- end }}
12+
data:
13+
config.yaml: |
14+
apiVersion: config.ray.io/v1alpha1
15+
kind: Configuration
16+
{{- if .Values.configuration.defaultContainerEnvs }}
17+
defaultContainerEnvs:
18+
{{- toYaml .Values.configuration.defaultContainerEnvs | nindent 4 }}
19+
{{- end }}
20+
{{- end }}

helm-chart/kuberay-operator/templates/deployment.yaml

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,9 @@ spec:
3535
{{- toYaml . | nindent 8 }}
3636
{{- end }}
3737
serviceAccountName: {{ include "kuberay-operator.serviceAccount.name" . }}
38-
{{- if and (.Values.logging.baseDir) (.Values.logging.fileName) }}
38+
{{- if or (and (.Values.logging.baseDir) (.Values.logging.fileName)) .Values.configuration.enabled }}
3939
volumes:
40+
{{- if and (.Values.logging.baseDir) (.Values.logging.fileName) }}
4041
- name: kuberay-logs
4142
{{- if .Values.logging.sizeLimit }}
4243
emptyDir:
@@ -45,6 +46,12 @@ spec:
4546
emptyDir: {}
4647
{{- end }}
4748
{{- end }}
49+
{{- if .Values.configuration.enabled }}
50+
- name: operator-config
51+
configMap:
52+
name: {{ include "kuberay-operator.deployment.name" . }}-config
53+
{{- end }}
54+
{{- end }}
4855
{{- with .Values.podSecurityContext }}
4956
securityContext:
5057
{{- toYaml . | nindent 8 }}
@@ -62,15 +69,26 @@ spec:
6269
{{- with .Values.image.pullPolicy }}
6370
imagePullPolicy: {{ . }}
6471
{{- end }}
65-
{{- if and (.Values.logging.baseDir) (.Values.logging.fileName) }}
72+
{{- if or (and (.Values.logging.baseDir) (.Values.logging.fileName)) .Values.configuration.enabled }}
6673
volumeMounts:
74+
{{- if and (.Values.logging.baseDir) (.Values.logging.fileName) }}
6775
- name: kuberay-logs
6876
mountPath: "{{ .Values.logging.baseDir }}"
6977
{{- end }}
78+
{{- if .Values.configuration.enabled }}
79+
- name: operator-config
80+
mountPath: /etc/kuberay
81+
readOnly: true
82+
{{- end }}
83+
{{- end }}
7084
command:
7185
- {{ .Values.operatorCommand }}
7286
args:
7387
{{- $argList := list -}}
88+
{{- if .Values.configuration.enabled -}}
89+
{{- $argList = append $argList "--config" -}}
90+
{{- $argList = append $argList "/etc/kuberay/config.yaml" -}}
91+
{{- end -}}
7492
{{- $argList = append $argList (include "kuberay.featureGates" . | trim) -}}
7593
{{- if .Values.batchScheduler -}}
7694
{{- if .Values.batchScheduler.enabled -}}
Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
suite: Test ConfigMap
2+
3+
templates:
4+
- configmap.yaml
5+
6+
release:
7+
name: kuberay-operator
8+
namespace: default
9+
10+
tests:
11+
- it: Should not create ConfigMap when configuration is not enabled
12+
asserts:
13+
- hasDocuments:
14+
count: 0
15+
16+
- it: Should not create ConfigMap when configuration.enabled is false
17+
set:
18+
configuration:
19+
enabled: false
20+
asserts:
21+
- hasDocuments:
22+
count: 0
23+
24+
- it: Should create ConfigMap when configuration is enabled
25+
set:
26+
configuration:
27+
enabled: true
28+
asserts:
29+
- hasDocuments:
30+
count: 1
31+
- containsDocument:
32+
apiVersion: v1
33+
kind: ConfigMap
34+
name: kuberay-operator-config
35+
namespace: default
36+
37+
- it: Should create ConfigMap with defaultContainerEnvs when set
38+
set:
39+
configuration:
40+
enabled: true
41+
defaultContainerEnvs:
42+
- name: RAY_enable_open_telemetry
43+
value: "true"
44+
asserts:
45+
- hasDocuments:
46+
count: 1
47+
- containsDocument:
48+
apiVersion: v1
49+
kind: ConfigMap
50+
name: kuberay-operator-config
51+
namespace: default
52+
53+
- it: Should include defaultContainerEnvs in Configuration
54+
set:
55+
configuration:
56+
enabled: true
57+
defaultContainerEnvs:
58+
- name: RAY_enable_open_telemetry
59+
value: "true"
60+
- name: RAY_metric_cardinality_level
61+
value: "recommended"
62+
asserts:
63+
- matchRegex:
64+
path: data["config.yaml"]
65+
pattern: "apiVersion: config.ray.io/v1alpha1"
66+
- matchRegex:
67+
path: data["config.yaml"]
68+
pattern: "kind: Configuration"
69+
- matchRegex:
70+
path: data["config.yaml"]
71+
pattern: "defaultContainerEnvs:"
72+
- matchRegex:
73+
path: data["config.yaml"]
74+
pattern: "name: RAY_enable_open_telemetry"
75+
- matchRegex:
76+
path: data["config.yaml"]
77+
pattern: 'value: "true"'
78+
- matchRegex:
79+
path: data["config.yaml"]
80+
pattern: "name: RAY_metric_cardinality_level"
81+
- matchRegex:
82+
path: data["config.yaml"]
83+
pattern: "value: recommended"
84+
85+
- it: Should create ConfigMap without defaultContainerEnvs when not set
86+
set:
87+
configuration:
88+
enabled: true
89+
asserts:
90+
- hasDocuments:
91+
count: 1
92+
- matchRegex:
93+
path: data["config.yaml"]
94+
pattern: "apiVersion: config.ray.io/v1alpha1"
95+
- matchRegex:
96+
path: data["config.yaml"]
97+
pattern: "kind: Configuration"
98+
- notMatchRegex:
99+
path: data["config.yaml"]
100+
pattern: "defaultContainerEnvs:"

helm-chart/kuberay-operator/tests/deployment_test.yaml

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -296,3 +296,46 @@ tests:
296296
- contains:
297297
path: spec.template.spec.containers[?(@.name=="kuberay-operator")].args
298298
content: "--burst=150"
299+
300+
- it: Should add config volume and volumeMount when configuration is enabled
301+
set:
302+
configuration:
303+
enabled: true
304+
defaultContainerEnvs:
305+
- name: RAY_enable_open_telemetry
306+
value: "true"
307+
- name: RAY_metric_cardinality_level
308+
value: "recommended"
309+
asserts:
310+
- contains:
311+
path: spec.template.spec.volumes
312+
content:
313+
name: operator-config
314+
configMap:
315+
name: kuberay-operator-config
316+
- contains:
317+
path: spec.template.spec.containers[?(@.name=="kuberay-operator")].volumeMounts
318+
content:
319+
name: operator-config
320+
mountPath: /etc/kuberay
321+
readOnly: true
322+
323+
- it: Should add --config arg when configuration is enabled
324+
set:
325+
configuration:
326+
enabled: true
327+
defaultContainerEnvs:
328+
- name: RAY_enable_open_telemetry
329+
value: "true"
330+
asserts:
331+
- contains:
332+
path: spec.template.spec.containers[?(@.name=="kuberay-operator")].args
333+
content: "--config"
334+
- contains:
335+
path: spec.template.spec.containers[?(@.name=="kuberay-operator")].args
336+
content: "/etc/kuberay/config.yaml"
337+
338+
- it: Should not add config volume when configuration is not enabled
339+
asserts:
340+
- notExists:
341+
path: spec.template.spec.volumes[?(@.name=="operator-config")]

helm-chart/kuberay-operator/values.yaml

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,20 @@ batchScheduler:
9898
# "batchScheduler.enabled=true" at the same time as it will override this option.
9999
name: ""
100100

101+
# Configuration for the KubeRay operator.
102+
configuration:
103+
# -- Whether to enable the configuration feature. If enabled, a ConfigMap will be created and mounted to the operator.
104+
enabled: false
105+
# -- Default environment variables to inject into all Ray containers in all RayCluster CRs.
106+
# This allows user to set feature flags across all Ray pods.
107+
# Example:
108+
# defaultContainerEnvs:
109+
# - name: RAY_enable_open_telemetry
110+
# value: "true"
111+
# - name: RAY_metric_cardinality_level
112+
# value: "recommended"
113+
defaultContainerEnvs: []
114+
101115
featureGates:
102116
- name: RayClusterStatusConditions
103117
enabled: true

ray-operator/apis/config/v1alpha1/configuration_types.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,9 @@ type Configuration struct {
6565
// to inject into every Head pod.
6666
HeadSidecarContainers []corev1.Container `json:"headSidecarContainers,omitempty"`
6767

68+
// DefaultContainerEnvs specifies default environment variables to inject into all Ray containers
69+
DefaultContainerEnvs []corev1.EnvVar `json:"defaultContainerEnvs,omitempty"`
70+
6871
// ReconcileConcurrency is the max concurrency for each reconciler.
6972
ReconcileConcurrency int `json:"reconcileConcurrency,omitempty"`
7073

ray-operator/apis/config/v1alpha1/zz_generated.deepcopy.go

Lines changed: 7 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

ray-operator/controllers/ray/common/pod.go

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -409,7 +409,7 @@ func initLivenessAndReadinessProbe(rayContainer *corev1.Container, rayNodeType r
409409
}
410410

411411
// BuildPod a pod config
412-
func BuildPod(ctx context.Context, podTemplateSpec corev1.PodTemplateSpec, rayNodeType rayv1.RayNodeType, rayStartParams map[string]string, headPort string, enableRayAutoscaler bool, creatorCRDType utils.CRDType, fqdnRayIP string) (aPod corev1.Pod) {
412+
func BuildPod(ctx context.Context, podTemplateSpec corev1.PodTemplateSpec, rayNodeType rayv1.RayNodeType, rayStartParams map[string]string, headPort string, enableRayAutoscaler bool, creatorCRDType utils.CRDType, fqdnRayIP string, defaultContainerEnvs []corev1.EnvVar) (aPod corev1.Pod) {
413413
log := ctrl.LoggerFrom(ctx)
414414

415415
// For Worker Pod: Traffic readiness is determined by the readiness probe.
@@ -483,7 +483,7 @@ func BuildPod(ctx context.Context, podTemplateSpec corev1.PodTemplateSpec, rayNo
483483
for index := range pod.Spec.InitContainers {
484484
setInitContainerEnvVars(&pod.Spec.InitContainers[index], fqdnRayIP)
485485
}
486-
setContainerEnvVars(&pod, rayNodeType, fqdnRayIP, headPort, rayStartCmd, creatorCRDType)
486+
setContainerEnvVars(&pod, rayNodeType, fqdnRayIP, headPort, rayStartCmd, creatorCRDType, defaultContainerEnvs)
487487

488488
// Inject probes into the Ray containers if the user has not explicitly disabled them.
489489
// The feature flag `ENABLE_PROBES_INJECTION` will be removed if this feature is stable enough.
@@ -642,13 +642,20 @@ func setInitContainerEnvVars(container *corev1.Container, fqdnRayIP string) {
642642
)
643643
}
644644

645-
func setContainerEnvVars(pod *corev1.Pod, rayNodeType rayv1.RayNodeType, fqdnRayIP string, headPort string, rayStartCmd string, creatorCRDType utils.CRDType) {
645+
func setContainerEnvVars(pod *corev1.Pod, rayNodeType rayv1.RayNodeType, fqdnRayIP string, headPort string, rayStartCmd string, creatorCRDType utils.CRDType, defaultContainerEnvs []corev1.EnvVar) {
646646
// TODO: Audit all environment variables to identify which should not be modified by users.
647647
container := &pod.Spec.Containers[utils.RayContainerIndex]
648648
if len(container.Env) == 0 {
649649
container.Env = []corev1.EnvVar{}
650650
}
651651

652+
// Inject default container environment variables from configuration
653+
for _, defaultEnv := range defaultContainerEnvs {
654+
if !utils.EnvVarExists(defaultEnv.Name, container.Env) {
655+
container.Env = append(container.Env, defaultEnv)
656+
}
657+
}
658+
652659
// case 1: head => Use LOCAL_HOST
653660
// case 2: worker => Use fqdnRayIP (fully qualified domain name)
654661
ip := utils.LOCAL_HOST

0 commit comments

Comments
 (0)