From fe8365fa12e3917174c029ed273171aeabc1fab8 Mon Sep 17 00:00:00 2001 From: Junhao Liao Date: Fri, 26 Dec 2025 04:40:08 -0500 Subject: [PATCH 01/15] feat(helm): Add multi-node deployment support with configurable scheduling. --- tools/deployment/package-helm/.test-common.sh | 83 +++++++++++ tools/deployment/package-helm/Chart.yaml | 2 +- .../package-helm/templates/_helpers.tpl | 92 +++++++++++-- .../templates/api-server-deployment.yaml | 107 +++++++++++++++ .../templates/api-server-logs-pv.yaml | 10 ++ .../templates/api-server-logs-pvc.yaml | 9 ++ .../templates/api-server-service.yaml | 18 +++ .../compression-scheduler-deployment.yaml | 22 ++- .../compression-scheduler-logs-pv.yaml | 3 +- .../compression-scheduler-user-logs-pv.yaml | 3 +- .../compression-worker-deployment.yaml | 42 +++++- .../templates/compression-worker-logs-pv.yaml | 3 +- ...compression-worker-staged-archives-pv.yaml | 10 ++ ...ompression-worker-staged-archives-pvc.yaml | 9 ++ .../templates/compression-worker-tmp-pv.yaml | 3 +- .../package-helm/templates/configmap.yaml | 129 +++++++++++++++++- .../templates/database-data-pv.yaml | 3 +- .../templates/database-logs-pv.yaml | 3 +- .../garbage-collector-deployment.yaml | 38 ++++-- .../templates/garbage-collector-logs-pv.yaml | 7 +- .../templates/garbage-collector-logs-pvc.yaml | 4 +- .../templates/query-scheduler-logs-pv.yaml | 3 +- .../templates/query-worker-deployment.yaml | 46 +++++-- .../templates/query-worker-logs-pv.yaml | 3 +- .../query-worker-staged-streams-pv.yaml | 10 ++ .../query-worker-staged-streams-pvc.yaml | 9 ++ .../package-helm/templates/queue-logs-pv.yaml | 3 +- .../package-helm/templates/redis-data-pv.yaml | 3 +- .../package-helm/templates/redis-logs-pv.yaml | 3 +- .../templates/reducer-logs-pv.yaml | 3 +- .../templates/results-cache-data-pv.yaml | 3 +- .../templates/results-cache-logs-pv.yaml | 3 +- .../templates/shared-data-archives-pv.yaml | 3 +- .../templates/shared-data-streams-pv.yaml | 3 +- .../package-helm/templates/storage-class.yaml | 10 ++ .../templates/webui-deployment.yaml | 37 ++++- .../package-helm/test-multi-dedicated.sh | 102 ++++++++++++++ .../package-helm/test-multi-shared.sh | 81 +++++++++++ tools/deployment/package-helm/test.sh | 6 +- tools/deployment/package-helm/values.yaml | 58 ++++++++ 40 files changed, 901 insertions(+), 88 deletions(-) create mode 100755 tools/deployment/package-helm/.test-common.sh create mode 100644 tools/deployment/package-helm/templates/api-server-deployment.yaml create mode 100644 tools/deployment/package-helm/templates/api-server-logs-pv.yaml create mode 100644 tools/deployment/package-helm/templates/api-server-logs-pvc.yaml create mode 100644 tools/deployment/package-helm/templates/api-server-service.yaml create mode 100644 tools/deployment/package-helm/templates/compression-worker-staged-archives-pv.yaml create mode 100644 tools/deployment/package-helm/templates/compression-worker-staged-archives-pvc.yaml create mode 100644 tools/deployment/package-helm/templates/query-worker-staged-streams-pv.yaml create mode 100644 tools/deployment/package-helm/templates/query-worker-staged-streams-pvc.yaml create mode 100644 tools/deployment/package-helm/templates/storage-class.yaml create mode 100755 tools/deployment/package-helm/test-multi-dedicated.sh create mode 100755 tools/deployment/package-helm/test-multi-shared.sh diff --git a/tools/deployment/package-helm/.test-common.sh b/tools/deployment/package-helm/.test-common.sh new file mode 100755 index 0000000000..7a9c900ae7 --- /dev/null +++ b/tools/deployment/package-helm/.test-common.sh @@ -0,0 +1,83 @@ +#!/usr/bin/env bash + +# Common utilities for Helm chart testing +# Source this file from test scripts + +set -o errexit +set -o nounset +set -o pipefail + +CLP_HOME="${CLP_HOME:-/tmp/clp}" + +# Waits for all jobs to complete and all non-job pods to be ready. +# +# @param {int} timeout_seconds Overall timeout in seconds +# @param {int} poll_interval_seconds Interval between status checks +# @param {int} wait_timeout_seconds Timeout for each kubectl wait call +# @return {int} 0 on success, 1 on timeout +wait_for_pods() { + local timeout_seconds=$1 + local poll_interval_seconds=$2 + local wait_timeout_seconds=$3 + + echo "Waiting for all pods to be ready" \ + "(timeout=${timeout_seconds}s, poll=${poll_interval_seconds}s," \ + "wait=${wait_timeout_seconds}s)..." + + # Reset bash built-in SECONDS counter + SECONDS=0 + + while true; do + sleep "${poll_interval_seconds}" + kubectl get pods + + if kubectl wait job \ + --all \ + --for=condition=Complete \ + --timeout="${wait_timeout_seconds}s" 2>/dev/null \ + && kubectl wait pods \ + --all \ + --selector='!job-name' \ + --for=condition=Ready \ + --timeout="${wait_timeout_seconds}s" 2>/dev/null + then + echo "All jobs completed and services are ready." + return 0 + fi + + if [[ ${SECONDS} -ge ${timeout_seconds} ]]; then + echo "ERROR: Timed out waiting for pods to be ready" + return 1 + fi + + echo "---" + done +} + +# Creates required directories for CLP data and logs +create_clp_directories() { + echo "Creating CLP directories at ${CLP_HOME}..." + mkdir -p "$CLP_HOME/var/"{data,log}/{database,queue,redis,results_cache} \ + "$CLP_HOME/var/data/"{archives,streams,staged-archives,staged-streams} \ + "$CLP_HOME/var/log/"{compression_scheduler,compression_worker,user} \ + "$CLP_HOME/var/log/"{query_scheduler,query_worker,reducer} \ + "$CLP_HOME/var/log/garbage_collector" \ + "$CLP_HOME/var/log/api_server" \ + "$CLP_HOME/var/tmp" \ + "$CLP_HOME/samples" +} + +# Downloads sample datasets in the background +# Sets SAMPLE_DOWNLOAD_PID global variable +download_samples() { + echo "Downloading sample datasets..." + wget -O - https://zenodo.org/records/10516402/files/postgresql.tar.gz?download=1 \ + | tar xz -C "$CLP_HOME/samples" & + SAMPLE_DOWNLOAD_PID=$! + + # Generate sample log file for garbage collector testing + cat < "$CLP_HOME/samples/test-gc.jsonl" +{"timestamp": $(date +%s%3N), "level": "INFO", "message": "User login successful"} +{"timestamp": $(date +%s%3N), "level": "ERROR", "message": "Database connection failed"} +EOF +} diff --git a/tools/deployment/package-helm/Chart.yaml b/tools/deployment/package-helm/Chart.yaml index 443a3c4a16..192d61a98f 100644 --- a/tools/deployment/package-helm/Chart.yaml +++ b/tools/deployment/package-helm/Chart.yaml @@ -1,6 +1,6 @@ apiVersion: "v2" name: "clp" -version: "0.1.2-dev.11" +version: "0.1.2-dev.16" description: "A Helm chart for CLP's (Compressed Log Processor) package deployment" type: "application" appVersion: "0.7.1-dev" diff --git a/tools/deployment/package-helm/templates/_helpers.tpl b/tools/deployment/package-helm/templates/_helpers.tpl index 882c8a3b17..8454f0fa8d 100644 --- a/tools/deployment/package-helm/templates/_helpers.tpl +++ b/tools/deployment/package-helm/templates/_helpers.tpl @@ -111,20 +111,21 @@ Used for: {{- end }} {{/* -Creates a local PersistentVolume. +Creates a PersistentVolume that does not use dynamic provisioning. + +Behavior depends on the `distributed` value: +- distributed=false: Uses local volume type with node affinity targeting control-plane nodes +- distributed=true: Uses hostPath without node affinity (assumes shared storage like NFS) @param {object} root Root template context @param {string} component_category (e.g., "database", "shared-data") @param {string} name (e.g., "archives", "data", "logs") -@param {string} nodeRole Node role for affinity. Targets nodes with label - "node-role.kubernetes.io/". Always falls back to - "node-role.kubernetes.io/control-plane" @param {string} capacity Storage capacity @param {string[]} accessModes Access modes @param {string} hostPath Absolute path on host @return {string} YAML-formatted PersistentVolume resource */}} -{{- define "clp.createLocalPv" -}} +{{- define "clp.createStaticPv" -}} apiVersion: "v1" kind: "PersistentVolume" metadata: @@ -137,19 +138,22 @@ spec: storage: {{ .capacity }} accessModes: {{ .accessModes }} persistentVolumeReclaimPolicy: "Retain" - storageClassName: "local-storage" + storageClassName: {{ .root.Values.storage.storageClassName | quote }} + {{- if .root.Values.distributed }} + hostPath: + path: {{ .hostPath | quote }} + type: "DirectoryOrCreate" + {{- else }} local: path: {{ .hostPath | quote }} nodeAffinity: required: nodeSelectorTerms: - - matchExpressions: - - key: {{ printf "node-role.kubernetes.io/%s" .nodeRole | quote }} - operator: "Exists" - matchExpressions: - key: "node-role.kubernetes.io/control-plane" operator: "Exists" -{{- end }} + {{- end }}{{/* if .root.Values.distributed */}} +{{- end }}{{/* define "clp.createStaticPv" */}} {{/* Creates a PersistentVolumeClaim for the given component. @@ -171,7 +175,7 @@ metadata: app.kubernetes.io/component: {{ .component_category | quote }} spec: accessModes: {{ .accessModes }} - storageClassName: "local-storage" + storageClassName: {{ .root.Values.storage.storageClassName | quote }} selector: matchLabels: {{- include "clp.selectorLabels" .root | nindent 6 }} @@ -247,6 +251,31 @@ hostPath: type: "Directory" {{- end }} +{{/* +Creates a volumeMount for the AWS config directory. + +@param {object} . Root template context +@return {string} YAML-formatted volumeMount definition +*/}} +{{- define "clp.awsConfigVolumeMount" -}} +name: "aws-config" +mountPath: {{ .Values.clpConfig.aws_config_directory | quote }} +readOnly: true +{{- end }} + +{{/* +Creates a volume for the AWS config directory. + +@param {object} . Root template context +@return {string} YAML-formatted volume definition +*/}} +{{- define "clp.awsConfigVolume" -}} +name: "aws-config" +hostPath: + path: {{ .Values.clpConfig.aws_config_directory | quote }} + type: "Directory" +{{- end }} + {{/* Creates an initContainer that waits for a Kubernetes resource to be ready. @@ -272,3 +301,44 @@ command: [ "--timeout=300s" ] {{- end }} + +{{/* +Creates scheduling configuration (nodeSelector, affinity, tolerations, topologySpreadConstraints) +for a component. + +When distributed is false (single-node mode), a control-plane toleration is automatically added +so pods can be scheduled on tainted control-plane nodes without manual untainting. + +@param {object} root Root template context +@param {string} component Top-level values key (e.g., "compressionWorker", "queryWorker") +@return {string} YAML-formatted scheduling fields (nodeSelector, affinity, tolerations, + topologySpreadConstraints) +*/}} +{{- define "clp.createSchedulingConfigs" -}} +{{- $componentConfig := index .root.Values .component | default dict -}} +{{- $scheduling := $componentConfig.scheduling | default dict -}} +{{- $tolerations := $scheduling.tolerations | default list -}} +{{- if not .root.Values.distributed -}} +{{- $tolerations = append $tolerations (dict + "key" "node-role.kubernetes.io/control-plane" + "operator" "Exists" + "effect" "NoSchedule" +) -}} +{{- end -}} +{{- with $scheduling.nodeSelector }} +nodeSelector: + {{- toYaml . | nindent 2 }} +{{- end }} +{{- with $scheduling.affinity }} +affinity: + {{- toYaml . | nindent 2 }} +{{- end }} +{{- with $tolerations }} +tolerations: + {{- toYaml . | nindent 2 }} +{{- end }} +{{- with $scheduling.topologySpreadConstraints }} +topologySpreadConstraints: + {{- toYaml . | nindent 2 }} +{{- end }} +{{- end }}{{/* define "clp.createSchedulingConfigs" */}} diff --git a/tools/deployment/package-helm/templates/api-server-deployment.yaml b/tools/deployment/package-helm/templates/api-server-deployment.yaml new file mode 100644 index 0000000000..26da76486f --- /dev/null +++ b/tools/deployment/package-helm/templates/api-server-deployment.yaml @@ -0,0 +1,107 @@ +{{- if .Values.clpConfig.api_server }} +apiVersion: "apps/v1" +kind: "Deployment" +metadata: + name: {{ include "clp.fullname" . }}-api-server + labels: + {{- include "clp.labels" . | nindent 4 }} + app.kubernetes.io/component: "api-server" +spec: + replicas: 1 + selector: + matchLabels: + {{- include "clp.selectorLabels" . | nindent 6 }} + app.kubernetes.io/component: "api-server" + template: + metadata: + labels: + {{- include "clp.labels" . | nindent 8 }} + app.kubernetes.io/component: "api-server" + spec: + serviceAccountName: {{ include "clp.fullname" . }}-job-watcher + terminationGracePeriodSeconds: 10 + securityContext: + runAsUser: {{ .Values.securityContext.firstParty.uid }} + runAsGroup: {{ .Values.securityContext.firstParty.gid }} + fsGroup: {{ .Values.securityContext.firstParty.gid }} + initContainers: + - {{- include "clp.waitFor" (dict + "root" . + "type" "job" + "name" "db-table-creator" + ) | nindent 10 }} + - {{- include "clp.waitFor" (dict + "root" . + "type" "job" + "name" "results-cache-indices-creator" + ) | nindent 10 }} + containers: + - name: "api-server" + image: "{{ include "clp.image.ref" . }}" + imagePullPolicy: "{{ .Values.image.clpPackage.pullPolicy }}" + env: + - name: "CLP_DB_PASS" + valueFrom: + secretKeyRef: + name: {{ include "clp.fullname" . }}-database + key: "password" + - name: "CLP_DB_USER" + valueFrom: + secretKeyRef: + name: {{ include "clp.fullname" . }}-database + key: "username" + - name: "CLP_LOGS_DIR" + value: "/var/log/api_server" + - name: "RUST_LOG" + value: "INFO" + ports: + - name: "api-server" + containerPort: 3001 + volumeMounts: + - name: {{ include "clp.volumeName" (dict + "component_category" "api-server" + "name" "logs" + ) | quote }} + mountPath: "/var/log/api_server" + - name: "config" + mountPath: "/etc/clp-config.yaml" + subPath: "clp-config.yaml" + readOnly: true + {{- if eq .Values.clpConfig.stream_output.storage.type "fs" }} + - name: {{ include "clp.volumeName" (dict + "component_category" "shared-data" + "name" "streams" + ) | quote }} + mountPath: "/var/data/streams" + {{- end }} + command: [ + "/opt/clp/bin/api_server", + "--host", "0.0.0.0", + "--port", "3001", + "--config", "/etc/clp-config.yaml" + ] + readinessProbe: + {{- include "clp.readinessProbeTimings" . | nindent 12 }} + httpGet: &api-server-health-check + path: "/health" + port: 3001 + livenessProbe: + {{- include "clp.livenessProbeTimings" . | nindent 12 }} + httpGet: *api-server-health-check + volumes: + - {{- include "clp.pvcVolume" (dict + "root" . + "component_category" "api-server" + "name" "logs" + ) | nindent 10 }} + - name: "config" + configMap: + name: {{ include "clp.fullname" . }}-config + {{- if eq .Values.clpConfig.stream_output.storage.type "fs" }} + - {{- include "clp.pvcVolume" (dict + "root" . + "component_category" "shared-data" + "name" "streams" + ) | nindent 10 }} + {{- end }} +{{- end }} diff --git a/tools/deployment/package-helm/templates/api-server-logs-pv.yaml b/tools/deployment/package-helm/templates/api-server-logs-pv.yaml new file mode 100644 index 0000000000..e117f83bd1 --- /dev/null +++ b/tools/deployment/package-helm/templates/api-server-logs-pv.yaml @@ -0,0 +1,10 @@ +{{- if .Values.clpConfig.api_server }} +{{- include "clp.createStaticPv" (dict + "root" . + "component_category" "api-server" + "name" "logs" + "capacity" "5Gi" + "accessModes" (list "ReadWriteOnce") + "hostPath" (printf "%s/api_server" .Values.clpConfig.logs_directory) +) }} +{{- end }} diff --git a/tools/deployment/package-helm/templates/api-server-logs-pvc.yaml b/tools/deployment/package-helm/templates/api-server-logs-pvc.yaml new file mode 100644 index 0000000000..d9429b6dad --- /dev/null +++ b/tools/deployment/package-helm/templates/api-server-logs-pvc.yaml @@ -0,0 +1,9 @@ +{{- if .Values.clpConfig.api_server }} +{{- include "clp.createPvc" (dict + "root" . + "component_category" "api-server" + "name" "logs" + "capacity" "5Gi" + "accessModes" (list "ReadWriteOnce") +) }} +{{- end }} diff --git a/tools/deployment/package-helm/templates/api-server-service.yaml b/tools/deployment/package-helm/templates/api-server-service.yaml new file mode 100644 index 0000000000..0aed0e7efa --- /dev/null +++ b/tools/deployment/package-helm/templates/api-server-service.yaml @@ -0,0 +1,18 @@ +{{- if .Values.clpConfig.api_server }} +apiVersion: "v1" +kind: "Service" +metadata: + name: {{ include "clp.fullname" . }}-api-server + labels: + {{- include "clp.labels" . | nindent 4 }} + app.kubernetes.io/component: "api-server" +spec: + type: "NodePort" + selector: + {{- include "clp.selectorLabels" . | nindent 4 }} + app.kubernetes.io/component: "api-server" + ports: + - port: 3001 + targetPort: "api-server" + nodePort: {{ .Values.clpConfig.api_server.port }} +{{- end }} diff --git a/tools/deployment/package-helm/templates/compression-scheduler-deployment.yaml b/tools/deployment/package-helm/templates/compression-scheduler-deployment.yaml index f58e9594b8..1f9bd0835c 100644 --- a/tools/deployment/package-helm/templates/compression-scheduler-deployment.yaml +++ b/tools/deployment/package-helm/templates/compression-scheduler-deployment.yaml @@ -66,11 +66,6 @@ spec: - name: "PYTHONPATH" value: "/opt/clp/lib/python3/site-packages" volumeMounts: - - {{- include "clp.logsInputVolumeMount" . | nindent 14 }} - - name: "config" - mountPath: "/etc/clp-config.yaml" - subPath: "clp-config.yaml" - readOnly: true - name: {{ include "clp.volumeName" (dict "component_category" "compression-scheduler" "name" "logs" @@ -81,13 +76,22 @@ spec: "name" "user-logs" ) | quote }} mountPath: "/var/log/user" + - name: "config" + mountPath: "/etc/clp-config.yaml" + subPath: "clp-config.yaml" + readOnly: true + {{- if .Values.clpConfig.aws_config_directory }} + - {{- include "clp.awsConfigVolumeMount" . | nindent 14 }} + {{- end }} + {{- if eq .Values.clpConfig.logs_input.type "fs" }} + - {{- include "clp.logsInputVolumeMount" . | nindent 14 }} + {{- end }} command: [ "python3", "-u", "-m", "job_orchestration.scheduler.compress.compression_scheduler", "--config", "/etc/clp-config.yaml" ] volumes: - - {{- include "clp.logsInputVolume" . | nindent 10 }} - {{- include "clp.pvcVolume" (dict "root" . "component_category" "compression-scheduler" @@ -101,3 +105,9 @@ spec: - name: "config" configMap: name: {{ include "clp.fullname" . }}-config + {{- if .Values.clpConfig.aws_config_directory }} + - {{- include "clp.awsConfigVolume" . | nindent 10 }} + {{- end }} + {{- if eq .Values.clpConfig.logs_input.type "fs" }} + - {{- include "clp.logsInputVolume" . | nindent 10 }} + {{- end }} diff --git a/tools/deployment/package-helm/templates/compression-scheduler-logs-pv.yaml b/tools/deployment/package-helm/templates/compression-scheduler-logs-pv.yaml index 565f5dc40b..0a886c7c53 100644 --- a/tools/deployment/package-helm/templates/compression-scheduler-logs-pv.yaml +++ b/tools/deployment/package-helm/templates/compression-scheduler-logs-pv.yaml @@ -1,8 +1,7 @@ -{{- include "clp.createLocalPv" (dict +{{- include "clp.createStaticPv" (dict "root" . "component_category" "compression-scheduler" "name" "logs" - "nodeRole" "control-plane" "capacity" "5Gi" "accessModes" (list "ReadWriteOnce") "hostPath" (printf "%s/compression_scheduler" .Values.clpConfig.logs_directory) diff --git a/tools/deployment/package-helm/templates/compression-scheduler-user-logs-pv.yaml b/tools/deployment/package-helm/templates/compression-scheduler-user-logs-pv.yaml index 9b667a58bf..2e30551fbd 100644 --- a/tools/deployment/package-helm/templates/compression-scheduler-user-logs-pv.yaml +++ b/tools/deployment/package-helm/templates/compression-scheduler-user-logs-pv.yaml @@ -1,8 +1,7 @@ -{{- include "clp.createLocalPv" (dict +{{- include "clp.createStaticPv" (dict "root" . "component_category" "compression-scheduler" "name" "user-logs" - "nodeRole" "control-plane" "capacity" "10Gi" "accessModes" (list "ReadWriteOnce") "hostPath" (printf "%s/user" .Values.clpConfig.logs_directory) diff --git a/tools/deployment/package-helm/templates/compression-worker-deployment.yaml b/tools/deployment/package-helm/templates/compression-worker-deployment.yaml index 70a6165154..c6c24f355c 100644 --- a/tools/deployment/package-helm/templates/compression-worker-deployment.yaml +++ b/tools/deployment/package-helm/templates/compression-worker-deployment.yaml @@ -6,7 +6,7 @@ metadata: {{- include "clp.labels" . | nindent 4 }} app.kubernetes.io/component: "compression-worker" spec: - replicas: 1 + replicas: {{ .Values.compressionWorker.replicas }} selector: matchLabels: {{- include "clp.selectorLabels" . | nindent 6 }} @@ -17,6 +17,10 @@ spec: {{- include "clp.labels" . | nindent 8 }} app.kubernetes.io/component: "compression-worker" spec: + {{- include "clp.createSchedulingConfigs" (dict + "root" . + "component" "compressionWorker" + ) | nindent 6 }} terminationGracePeriodSeconds: 60 securityContext: runAsUser: {{ .Values.securityContext.firstParty.uid }} @@ -45,7 +49,6 @@ spec: - name: "PYTHONPATH" value: "/opt/clp/lib/python3/site-packages" volumeMounts: - - {{- include "clp.logsInputVolumeMount" . | nindent 14 }} - name: {{ include "clp.volumeName" (dict "component_category" "compression-worker" "name" "logs" @@ -60,11 +63,25 @@ spec: mountPath: "/etc/clp-config.yaml" subPath: "clp-config.yaml" readOnly: true + {{- if eq .Values.clpConfig.archive_output.storage.type "fs" }} - name: {{ include "clp.volumeName" (dict "component_category" "shared-data" "name" "archives" ) | quote }} mountPath: "/var/data/archives" + {{- else if eq .Values.clpConfig.archive_output.storage.type "s3" }} + - name: {{ include "clp.volumeName" (dict + "component_category" "compression-worker" + "name" "staged-archives" + ) | quote }} + mountPath: "/var/data/staged-archives" + {{- end }} + {{- if .Values.clpConfig.aws_config_directory }} + - {{- include "clp.awsConfigVolumeMount" . | nindent 14 }} + {{- end }} + {{- if eq .Values.clpConfig.logs_input.type "fs" }} + - {{- include "clp.logsInputVolumeMount" . | nindent 14 }} + {{- end }} command: [ "python3", "-u", "/opt/clp/lib/python3/site-packages/bin/celery", @@ -77,7 +94,6 @@ spec: "-n", "compression-worker" ] volumes: - - {{- include "clp.logsInputVolume" . | nindent 10 }} - {{- include "clp.pvcVolume" (dict "root" . "component_category" "compression-worker" @@ -88,11 +104,25 @@ spec: "component_category" "compression-worker" "name" "tmp" ) | nindent 10 }} + - name: "config" + configMap: + name: {{ include "clp.fullname" . }}-config + {{- if eq .Values.clpConfig.archive_output.storage.type "fs" }} - {{- include "clp.pvcVolume" (dict "root" . "component_category" "shared-data" "name" "archives" ) | nindent 10 }} - - name: "config" - configMap: - name: {{ include "clp.fullname" . }}-config + {{- else if eq .Values.clpConfig.archive_output.storage.type "s3" }} + - {{- include "clp.pvcVolume" (dict + "root" . + "component_category" "compression-worker" + "name" "staged-archives" + ) | nindent 10 }} + {{- end }} + {{- if .Values.clpConfig.aws_config_directory }} + - {{- include "clp.awsConfigVolume" . | nindent 10 }} + {{- end }} + {{- if eq .Values.clpConfig.logs_input.type "fs" }} + - {{- include "clp.logsInputVolume" . | nindent 10 }} + {{- end }} diff --git a/tools/deployment/package-helm/templates/compression-worker-logs-pv.yaml b/tools/deployment/package-helm/templates/compression-worker-logs-pv.yaml index 4b6d55466b..55f243cf8c 100644 --- a/tools/deployment/package-helm/templates/compression-worker-logs-pv.yaml +++ b/tools/deployment/package-helm/templates/compression-worker-logs-pv.yaml @@ -1,8 +1,7 @@ -{{- include "clp.createLocalPv" (dict +{{- include "clp.createStaticPv" (dict "root" . "component_category" "compression-worker" "name" "logs" - "nodeRole" "control-plane" "capacity" "5Gi" "accessModes" (list "ReadWriteOnce") "hostPath" (printf "%s/compression_worker" .Values.clpConfig.logs_directory) diff --git a/tools/deployment/package-helm/templates/compression-worker-staged-archives-pv.yaml b/tools/deployment/package-helm/templates/compression-worker-staged-archives-pv.yaml new file mode 100644 index 0000000000..e44096b060 --- /dev/null +++ b/tools/deployment/package-helm/templates/compression-worker-staged-archives-pv.yaml @@ -0,0 +1,10 @@ +{{- if eq .Values.clpConfig.archive_output.storage.type "s3" }} +{{- include "clp.createStaticPv" (dict + "root" . + "component_category" "compression-worker" + "name" "staged-archives" + "capacity" "50Gi" + "accessModes" (list "ReadWriteOnce") + "hostPath" (printf "%s/var/data/staged-archives" .Values.clpConfig.data_directory) +) }} +{{- end }} diff --git a/tools/deployment/package-helm/templates/compression-worker-staged-archives-pvc.yaml b/tools/deployment/package-helm/templates/compression-worker-staged-archives-pvc.yaml new file mode 100644 index 0000000000..b40e8720f9 --- /dev/null +++ b/tools/deployment/package-helm/templates/compression-worker-staged-archives-pvc.yaml @@ -0,0 +1,9 @@ +{{- if eq .Values.clpConfig.archive_output.storage.type "s3" }} +{{- include "clp.createPvc" (dict + "root" . + "component_category" "compression-worker" + "name" "staged-archives" + "capacity" "50Gi" + "accessModes" (list "ReadWriteOnce") +) }} +{{- end }} diff --git a/tools/deployment/package-helm/templates/compression-worker-tmp-pv.yaml b/tools/deployment/package-helm/templates/compression-worker-tmp-pv.yaml index 43e46c2503..d7107e14ae 100644 --- a/tools/deployment/package-helm/templates/compression-worker-tmp-pv.yaml +++ b/tools/deployment/package-helm/templates/compression-worker-tmp-pv.yaml @@ -1,8 +1,7 @@ -{{- include "clp.createLocalPv" (dict +{{- include "clp.createStaticPv" (dict "root" . "component_category" "compression-worker" "name" "tmp" - "nodeRole" "control-plane" "capacity" "10Gi" "accessModes" (list "ReadWriteOnce") "hostPath" .Values.clpConfig.tmp_directory diff --git a/tools/deployment/package-helm/templates/configmap.yaml b/tools/deployment/package-helm/templates/configmap.yaml index 78cf6b6677..42caabd26a 100644 --- a/tools/deployment/package-helm/templates/configmap.yaml +++ b/tools/deployment/package-helm/templates/configmap.yaml @@ -6,11 +6,55 @@ metadata: {{- include "clp.labels" . | nindent 4 }} data: clp-config.yaml: | + {{- with .Values.clpConfig.api_server }} + api_server: + default_max_num_query_results: {{ .default_max_num_query_results | int }} + host: "localhost" + port: 3001 + query_job_polling: + initial_backoff_ms: {{ .query_job_polling.initial_backoff_ms | int }} + max_backoff_ms: {{ .query_job_polling.max_backoff_ms | int }} + {{- else }} + api_server: null + {{- end }} archive_output: compression_level: {{ .Values.clpConfig.archive_output.compression_level }} + {{- with .Values.clpConfig.archive_output.storage }} storage: + {{- if eq .type "fs" }} directory: "/var/data/archives" type: "fs" + {{- else if eq .type "s3" }} + type: "s3" + staging_directory: "/var/data/staged-archives" + {{- with .s3_config }} + s3_config: + {{- if .endpoint_url }} + endpoint_url: {{ .endpoint_url | quote }} + {{- end }} + {{- if .region_code }} + region_code: {{ .region_code | quote }} + {{- end }} + bucket: {{ .bucket | quote }} + key_prefix: {{ .key_prefix | quote }} + {{- with .aws_authentication }} + aws_authentication: + type: {{ .type | quote }} + {{- if .profile }} + profile: {{ .profile | quote }} + {{- end }} + {{- if .credentials }} + credentials: + access_key_id: {{ .credentials.access_key_id | quote }} + secret_access_key: {{ .credentials.secret_access_key | quote }} + {{- if .credentials.session_token }} + session_token: {{ .credentials.session_token | quote }} + {{- end }} + {{- end }} + {{- end }}{{/* with .aws_authentication */}} + {{- end }}{{/* with .s3_config */}} + {{- end }}{{/* if eq .type "fs" */}} + {{- end }}{{/* with .Values.clpConfig.archive_output.storage */}} {{- if .Values.clpConfig.archive_output.retention_period }} retention_period: {{ .Values.clpConfig.archive_output.retention_period | int }} {{- else }} @@ -46,9 +90,30 @@ data: ssl_cert: null type: {{ .Values.clpConfig.database.type | quote }} logs_directory: "/var/log" + {{- with .Values.clpConfig.logs_input }} logs_input: + {{- if eq .type "fs" }} directory: "/mnt/logs" type: "fs" + {{- else if eq .type "s3" }} + type: "s3" + {{- with .aws_authentication }} + aws_authentication: + type: {{ .type | quote }} + {{- if .profile }} + profile: {{ .profile | quote }} + {{- end }} + {{- if .credentials }} + credentials: + access_key_id: {{ .credentials.access_key_id | quote }} + secret_access_key: {{ .credentials.secret_access_key | quote }} + {{- if .credentials.session_token }} + session_token: {{ .credentials.session_token | quote }} + {{- end }} + {{- end }} + {{- end }}{{/* with .aws_authentication */}} + {{- end }}{{/* if eq .type "fs" */}} + {{- end }}{{/* with .Values.clpConfig.logs_input */}} package: query_engine: {{ .Values.clpConfig.package.query_engine | quote }} storage_engine: {{ .Values.clpConfig.package.storage_engine | quote }} @@ -84,12 +149,50 @@ data: retention_period: null {{- end }} stream_collection_name: {{ .Values.clpConfig.results_cache.stream_collection_name | quote }} + {{- with .Values.clpConfig.stream_output }} stream_output: + {{- with .storage }} storage: + {{- if eq .type "fs" }} directory: "/var/data/streams" type: "fs" - target_uncompressed_size: {{ .Values.clpConfig.stream_output.target_uncompressed_size | int }} + {{- else if eq .type "s3" }} + type: "s3" + staging_directory: "/var/data/staged-streams" + {{- with .s3_config }} + s3_config: + {{- if .endpoint_url }} + endpoint_url: {{ .endpoint_url | quote }} + {{- end }} + {{- if .region_code }} + region_code: {{ .region_code | quote }} + {{- end }} + bucket: {{ .bucket | quote }} + key_prefix: {{ .key_prefix | quote }} + {{- with .aws_authentication }} + aws_authentication: + type: {{ .type | quote }} + {{- if .profile }} + profile: {{ .profile | quote }} + {{- end }} + {{- if .credentials }} + credentials: + access_key_id: {{ .credentials.access_key_id | quote }} + secret_access_key: {{ .credentials.secret_access_key | quote }} + {{- if .credentials.session_token }} + session_token: {{ .credentials.session_token | quote }} + {{- end }} + {{- end }} + {{- end }}{{/* with .aws_authentication */}} + {{- end }}{{/* with .s3_config */}} + {{- end }}{{/* if eq .type "fs" */}} + {{- end }}{{/* with .storage */}} + target_uncompressed_size: {{ .target_uncompressed_size | int }} + {{- end }}{{/* with .Values.clpConfig.stream_output */}} tmp_directory: "/var/tmp" + {{- if .Values.clpConfig.aws_config_directory }} + aws_config_directory: {{ .Values.clpConfig.aws_config_directory | quote }} + {{- end }} webui: host: "localhost" port: 4000 @@ -130,7 +233,11 @@ data: "ClpStorageEngine": {{ .Values.clpConfig.package.storage_engine | quote }}, "ClpQueryEngine": {{ .Values.clpConfig.package.query_engine | quote }}, "LogsInputType": {{ .Values.clpConfig.logs_input.type | quote }}, + {{- if eq .Values.clpConfig.logs_input.type "fs" }} "LogsInputRootDir": "/mnt/logs", + {{- else }} + "LogsInputRootDir": null, + {{- end }} "MongoDbSearchResultsMetadataCollectionName": {{ .Values.clpConfig.webui.results_metadata_collection_name | quote }}, "SqlDbClpArchivesTableName": "clp_archives", @@ -156,13 +263,33 @@ data: {{ .Values.clpConfig.results_cache.stream_collection_name | quote }}, "ClientDir": "/opt/clp/var/www/webui/client", "LogViewerDir": "/opt/clp/var/www/webui/yscope-log-viewer", + {{- if eq .Values.clpConfig.logs_input.type "fs" }} "LogsInputRootDir": "/mnt/logs", + {{- else }} + "LogsInputRootDir": null, + {{- end }} + {{- if eq .Values.clpConfig.stream_output.storage.type "fs" }} "StreamFilesDir": "/var/data/streams", + {{- else }} + "StreamFilesDir": null, + {{- end }} "StreamTargetUncompressedSize": {{ .Values.clpConfig.stream_output.target_uncompressed_size | int }}, + {{- if eq .Values.clpConfig.stream_output.storage.type "s3" }} + {{- with .Values.clpConfig.stream_output.storage.s3_config }} + "StreamFilesS3Region": {{ .region_code | default "null" | quote }}, + "StreamFilesS3PathPrefix": {{ printf "s3://%s/%s" .bucket .key_prefix | quote }}, + {{- if eq .aws_authentication.type "profile" }} + "StreamFilesS3Profile": {{ .aws_authentication.profile | quote }}, + {{- else }} + "StreamFilesS3Profile": null, + {{- end }} + {{- end }}{{/* with .Values.clpConfig.stream_output.storage.s3_config */}} + {{- else }} "StreamFilesS3Region": null, "StreamFilesS3PathPrefix": null, "StreamFilesS3Profile": null, + {{- end }} "ArchiveOutputCompressionLevel": {{ .Values.clpConfig.archive_output.compression_level }}, "ArchiveOutputTargetArchiveSize": {{ .Values.clpConfig.archive_output.target_archive_size | int }}, diff --git a/tools/deployment/package-helm/templates/database-data-pv.yaml b/tools/deployment/package-helm/templates/database-data-pv.yaml index 3bd6ea2b9a..1456cb9f9f 100644 --- a/tools/deployment/package-helm/templates/database-data-pv.yaml +++ b/tools/deployment/package-helm/templates/database-data-pv.yaml @@ -1,8 +1,7 @@ -{{- include "clp.createLocalPv" (dict +{{- include "clp.createStaticPv" (dict "root" . "component_category" "database" "name" "data" - "nodeRole" "control-plane" "capacity" "20Gi" "accessModes" (list "ReadWriteOnce") "hostPath" (printf "%s/database" .Values.clpConfig.data_directory) diff --git a/tools/deployment/package-helm/templates/database-logs-pv.yaml b/tools/deployment/package-helm/templates/database-logs-pv.yaml index 794215cf3a..c9f2e63793 100644 --- a/tools/deployment/package-helm/templates/database-logs-pv.yaml +++ b/tools/deployment/package-helm/templates/database-logs-pv.yaml @@ -1,8 +1,7 @@ -{{- include "clp.createLocalPv" (dict +{{- include "clp.createStaticPv" (dict "root" . "component_category" "database" "name" "logs" - "nodeRole" "control-plane" "capacity" "5Gi" "accessModes" (list "ReadWriteOnce") "hostPath" (printf "%s/database" .Values.clpConfig.logs_directory) diff --git a/tools/deployment/package-helm/templates/garbage-collector-deployment.yaml b/tools/deployment/package-helm/templates/garbage-collector-deployment.yaml index a4f810cb73..cccacd96bb 100644 --- a/tools/deployment/package-helm/templates/garbage-collector-deployment.yaml +++ b/tools/deployment/package-helm/templates/garbage-collector-deployment.yaml @@ -1,4 +1,6 @@ -{{- if or .Values.clpConfig.archive_output.retention_period .Values.clpConfig.results_cache.retention_period }} +{{- $archiveRetention := .Values.clpConfig.archive_output.retention_period }} +{{- $cacheRetention := .Values.clpConfig.results_cache.retention_period }} +{{- if or $archiveRetention $cacheRetention }} apiVersion: "apps/v1" kind: "Deployment" metadata: @@ -59,27 +61,34 @@ spec: - name: "PYTHONPATH" value: "/opt/clp/lib/python3/site-packages" volumeMounts: + - name: {{ include "clp.volumeName" (dict + "component_category" "garbage-collector" + "name" "logs" + ) | quote }} + mountPath: "/var/log/garbage_collector" - name: "config" mountPath: "/etc/clp-config.yaml" subPath: "clp-config.yaml" readOnly: true - name: "tmp" mountPath: "/var/log" - - name: {{ include "clp.volumeName" (dict - "component_category" "garbage-collector" - "name" "logs" - ) | quote }} - mountPath: "/var/log/garbage_collector" + {{- if eq .Values.clpConfig.archive_output.storage.type "fs" }} - name: {{ include "clp.volumeName" (dict "component_category" "shared-data" "name" "archives" ) | quote }} mountPath: "/var/data/archives" + {{- end }} + {{- if .Values.clpConfig.aws_config_directory }} + - {{- include "clp.awsConfigVolumeMount" . | nindent 14 }} + {{- end }} + {{- if eq .Values.clpConfig.stream_output.storage.type "fs" }} - name: {{ include "clp.volumeName" (dict "component_category" "shared-data" "name" "streams" ) | quote }} mountPath: "/var/data/streams" + {{- end }} command: [ "python3", "-u", "-m", "job_orchestration.garbage_collector.garbage_collector", @@ -91,19 +100,26 @@ spec: "component_category" "garbage-collector" "name" "logs" ) | nindent 10 }} + - name: "config" + configMap: + name: {{ include "clp.fullname" . }}-config + - name: "tmp" + emptyDir: {} + {{- if eq .Values.clpConfig.archive_output.storage.type "fs" }} - {{- include "clp.pvcVolume" (dict "root" . "component_category" "shared-data" "name" "archives" ) | nindent 10 }} + {{- end }} + {{- if .Values.clpConfig.aws_config_directory }} + - {{- include "clp.awsConfigVolume" . | nindent 10 }} + {{- end }} + {{- if eq .Values.clpConfig.stream_output.storage.type "fs" }} - {{- include "clp.pvcVolume" (dict "root" . "component_category" "shared-data" "name" "streams" ) | nindent 10 }} - - name: "config" - configMap: - name: {{ include "clp.fullname" . }}-config - - name: "tmp" - emptyDir: {} + {{- end }} {{- end }} diff --git a/tools/deployment/package-helm/templates/garbage-collector-logs-pv.yaml b/tools/deployment/package-helm/templates/garbage-collector-logs-pv.yaml index 608e03739b..9f1954623c 100644 --- a/tools/deployment/package-helm/templates/garbage-collector-logs-pv.yaml +++ b/tools/deployment/package-helm/templates/garbage-collector-logs-pv.yaml @@ -1,9 +1,10 @@ -{{- if or .Values.clpConfig.archive_output.retention_period .Values.clpConfig.results_cache.retention_period }} -{{- include "clp.createLocalPv" (dict +{{- $archiveRetention := .Values.clpConfig.archive_output.retention_period }} +{{- $cacheRetention := .Values.clpConfig.results_cache.retention_period }} +{{- if or $archiveRetention $cacheRetention }} +{{- include "clp.createStaticPv" (dict "root" . "component_category" "garbage-collector" "name" "logs" - "nodeRole" "control-plane" "capacity" "5Gi" "accessModes" (list "ReadWriteOnce") "hostPath" (printf "%s/garbage_collector" .Values.clpConfig.logs_directory) diff --git a/tools/deployment/package-helm/templates/garbage-collector-logs-pvc.yaml b/tools/deployment/package-helm/templates/garbage-collector-logs-pvc.yaml index 9f40e096e8..bf31d6807a 100644 --- a/tools/deployment/package-helm/templates/garbage-collector-logs-pvc.yaml +++ b/tools/deployment/package-helm/templates/garbage-collector-logs-pvc.yaml @@ -1,4 +1,6 @@ -{{- if or .Values.clpConfig.archive_output.retention_period .Values.clpConfig.results_cache.retention_period }} +{{- $archiveRetention := .Values.clpConfig.archive_output.retention_period }} +{{- $cacheRetention := .Values.clpConfig.results_cache.retention_period }} +{{- if or $archiveRetention $cacheRetention }} {{- include "clp.createPvc" (dict "root" . "component_category" "garbage-collector" diff --git a/tools/deployment/package-helm/templates/query-scheduler-logs-pv.yaml b/tools/deployment/package-helm/templates/query-scheduler-logs-pv.yaml index de7633da55..bc66ac1ae3 100644 --- a/tools/deployment/package-helm/templates/query-scheduler-logs-pv.yaml +++ b/tools/deployment/package-helm/templates/query-scheduler-logs-pv.yaml @@ -1,8 +1,7 @@ -{{- include "clp.createLocalPv" (dict +{{- include "clp.createStaticPv" (dict "root" . "component_category" "query-scheduler" "name" "logs" - "nodeRole" "control-plane" "capacity" "5Gi" "accessModes" (list "ReadWriteOnce") "hostPath" (printf "%s/query_scheduler" .Values.clpConfig.logs_directory) diff --git a/tools/deployment/package-helm/templates/query-worker-deployment.yaml b/tools/deployment/package-helm/templates/query-worker-deployment.yaml index f95e4ddaf7..33e4e20b86 100644 --- a/tools/deployment/package-helm/templates/query-worker-deployment.yaml +++ b/tools/deployment/package-helm/templates/query-worker-deployment.yaml @@ -6,7 +6,7 @@ metadata: {{- include "clp.labels" . | nindent 4 }} app.kubernetes.io/component: "query-worker" spec: - replicas: 1 + replicas: {{ .Values.queryWorker.replicas }} selector: matchLabels: {{- include "clp.selectorLabels" . | nindent 6 }} @@ -17,6 +17,10 @@ spec: {{- include "clp.labels" . | nindent 8 }} app.kubernetes.io/component: "query-worker" spec: + {{- include "clp.createSchedulingConfigs" (dict + "root" . + "component" "queryWorker" + ) | nindent 6 }} terminationGracePeriodSeconds: 60 securityContext: runAsUser: {{ .Values.securityContext.firstParty.uid }} @@ -45,25 +49,38 @@ spec: - name: "PYTHONPATH" value: "/opt/clp/lib/python3/site-packages" volumeMounts: - - name: "config" - mountPath: "/etc/clp-config.yaml" - subPath: "clp-config.yaml" - readOnly: true - name: {{ include "clp.volumeName" (dict "component_category" "query-worker" "name" "logs" ) | quote }} mountPath: "/var/log/query_worker" + - name: "config" + mountPath: "/etc/clp-config.yaml" + subPath: "clp-config.yaml" + readOnly: true + {{- if eq .Values.clpConfig.archive_output.storage.type "fs" }} - name: {{ include "clp.volumeName" (dict "component_category" "shared-data" "name" "archives" ) | quote }} mountPath: "/var/data/archives" + {{- end }} + {{- if .Values.clpConfig.aws_config_directory }} + - {{- include "clp.awsConfigVolumeMount" . | nindent 14 }} + {{- end }} + {{- if eq .Values.clpConfig.stream_output.storage.type "fs" }} - name: {{ include "clp.volumeName" (dict "component_category" "shared-data" "name" "streams" ) | quote }} mountPath: "/var/data/streams" + {{- else if eq .Values.clpConfig.stream_output.storage.type "s3" }} + - name: {{ include "clp.volumeName" (dict + "component_category" "query-worker" + "name" "staged-streams" + ) | quote }} + mountPath: "/var/data/staged-streams" + {{- end }} command: [ "python3", "-u", "/opt/clp/lib/python3/site-packages/bin/celery", @@ -81,16 +98,29 @@ spec: "component_category" "query-worker" "name" "logs" ) | nindent 10 }} + - name: "config" + configMap: + name: {{ include "clp.fullname" . }}-config + {{- if eq .Values.clpConfig.archive_output.storage.type "fs" }} - {{- include "clp.pvcVolume" (dict "root" . "component_category" "shared-data" "name" "archives" ) | nindent 10 }} + {{- end }} + {{- if .Values.clpConfig.aws_config_directory }} + - {{- include "clp.awsConfigVolume" . | nindent 10 }} + {{- end }} + {{- if eq .Values.clpConfig.stream_output.storage.type "fs" }} - {{- include "clp.pvcVolume" (dict "root" . "component_category" "shared-data" "name" "streams" ) | nindent 10 }} - - name: "config" - configMap: - name: {{ include "clp.fullname" . }}-config + {{- else if eq .Values.clpConfig.stream_output.storage.type "s3" }} + - {{- include "clp.pvcVolume" (dict + "root" . + "component_category" "query-worker" + "name" "staged-streams" + ) | nindent 10 }} + {{- end }} diff --git a/tools/deployment/package-helm/templates/query-worker-logs-pv.yaml b/tools/deployment/package-helm/templates/query-worker-logs-pv.yaml index 4f602a8c74..1fd12c2e84 100644 --- a/tools/deployment/package-helm/templates/query-worker-logs-pv.yaml +++ b/tools/deployment/package-helm/templates/query-worker-logs-pv.yaml @@ -1,8 +1,7 @@ -{{- include "clp.createLocalPv" (dict +{{- include "clp.createStaticPv" (dict "root" . "component_category" "query-worker" "name" "logs" - "nodeRole" "control-plane" "capacity" "5Gi" "accessModes" (list "ReadWriteOnce") "hostPath" (printf "%s/query_worker" .Values.clpConfig.logs_directory) diff --git a/tools/deployment/package-helm/templates/query-worker-staged-streams-pv.yaml b/tools/deployment/package-helm/templates/query-worker-staged-streams-pv.yaml new file mode 100644 index 0000000000..36d744a522 --- /dev/null +++ b/tools/deployment/package-helm/templates/query-worker-staged-streams-pv.yaml @@ -0,0 +1,10 @@ +{{- if eq .Values.clpConfig.stream_output.storage.type "s3" }} +{{- include "clp.createStaticPv" (dict + "root" . + "component_category" "query-worker" + "name" "staged-streams" + "capacity" "50Gi" + "accessModes" (list "ReadWriteOnce") + "hostPath" (printf "%s/var/data/staged-streams" .Values.clpConfig.data_directory) +) }} +{{- end }} diff --git a/tools/deployment/package-helm/templates/query-worker-staged-streams-pvc.yaml b/tools/deployment/package-helm/templates/query-worker-staged-streams-pvc.yaml new file mode 100644 index 0000000000..0fac69eeb5 --- /dev/null +++ b/tools/deployment/package-helm/templates/query-worker-staged-streams-pvc.yaml @@ -0,0 +1,9 @@ +{{- if eq .Values.clpConfig.stream_output.storage.type "s3" }} +{{- include "clp.createPvc" (dict + "root" . + "component_category" "query-worker" + "name" "staged-streams" + "capacity" "50Gi" + "accessModes" (list "ReadWriteOnce") +) }} +{{- end }} diff --git a/tools/deployment/package-helm/templates/queue-logs-pv.yaml b/tools/deployment/package-helm/templates/queue-logs-pv.yaml index c9315630f5..37f45a5d4e 100644 --- a/tools/deployment/package-helm/templates/queue-logs-pv.yaml +++ b/tools/deployment/package-helm/templates/queue-logs-pv.yaml @@ -1,8 +1,7 @@ -{{- include "clp.createLocalPv" (dict +{{- include "clp.createStaticPv" (dict "root" . "component_category" "queue" "name" "logs" - "nodeRole" "control-plane" "capacity" "5Gi" "accessModes" (list "ReadWriteOnce") "hostPath" (printf "%s/queue" .Values.clpConfig.logs_directory) diff --git a/tools/deployment/package-helm/templates/redis-data-pv.yaml b/tools/deployment/package-helm/templates/redis-data-pv.yaml index 56efc9d19c..5e30c1c1c4 100644 --- a/tools/deployment/package-helm/templates/redis-data-pv.yaml +++ b/tools/deployment/package-helm/templates/redis-data-pv.yaml @@ -1,8 +1,7 @@ -{{- include "clp.createLocalPv" (dict +{{- include "clp.createStaticPv" (dict "root" . "component_category" "redis" "name" "data" - "nodeRole" "control-plane" "capacity" "20Gi" "accessModes" (list "ReadWriteOnce") "hostPath" (printf "%s/redis" .Values.clpConfig.data_directory) diff --git a/tools/deployment/package-helm/templates/redis-logs-pv.yaml b/tools/deployment/package-helm/templates/redis-logs-pv.yaml index 7f03c8cdad..811b48a4d8 100644 --- a/tools/deployment/package-helm/templates/redis-logs-pv.yaml +++ b/tools/deployment/package-helm/templates/redis-logs-pv.yaml @@ -1,8 +1,7 @@ -{{- include "clp.createLocalPv" (dict +{{- include "clp.createStaticPv" (dict "root" . "component_category" "redis" "name" "logs" - "nodeRole" "control-plane" "capacity" "5Gi" "accessModes" (list "ReadWriteOnce") "hostPath" (printf "%s/redis" .Values.clpConfig.logs_directory) diff --git a/tools/deployment/package-helm/templates/reducer-logs-pv.yaml b/tools/deployment/package-helm/templates/reducer-logs-pv.yaml index 0aab54233a..4c467877ae 100644 --- a/tools/deployment/package-helm/templates/reducer-logs-pv.yaml +++ b/tools/deployment/package-helm/templates/reducer-logs-pv.yaml @@ -1,8 +1,7 @@ -{{- include "clp.createLocalPv" (dict +{{- include "clp.createStaticPv" (dict "root" . "component_category" "reducer" "name" "logs" - "nodeRole" "control-plane" "capacity" "5Gi" "accessModes" (list "ReadWriteOnce") "hostPath" (printf "%s/reducer" .Values.clpConfig.logs_directory) diff --git a/tools/deployment/package-helm/templates/results-cache-data-pv.yaml b/tools/deployment/package-helm/templates/results-cache-data-pv.yaml index 8410734b2e..9e8ccea38e 100644 --- a/tools/deployment/package-helm/templates/results-cache-data-pv.yaml +++ b/tools/deployment/package-helm/templates/results-cache-data-pv.yaml @@ -1,8 +1,7 @@ -{{- include "clp.createLocalPv" (dict +{{- include "clp.createStaticPv" (dict "root" . "component_category" "results-cache" "name" "data" - "nodeRole" "control-plane" "capacity" "20Gi" "accessModes" (list "ReadWriteOnce") "hostPath" (printf "%s/results_cache" .Values.clpConfig.data_directory) diff --git a/tools/deployment/package-helm/templates/results-cache-logs-pv.yaml b/tools/deployment/package-helm/templates/results-cache-logs-pv.yaml index e76aaf90af..2064685ee0 100644 --- a/tools/deployment/package-helm/templates/results-cache-logs-pv.yaml +++ b/tools/deployment/package-helm/templates/results-cache-logs-pv.yaml @@ -1,8 +1,7 @@ -{{- include "clp.createLocalPv" (dict +{{- include "clp.createStaticPv" (dict "root" . "component_category" "results-cache" "name" "logs" - "nodeRole" "control-plane" "capacity" "5Gi" "accessModes" (list "ReadWriteOnce") "hostPath" (printf "%s/results_cache" .Values.clpConfig.logs_directory) diff --git a/tools/deployment/package-helm/templates/shared-data-archives-pv.yaml b/tools/deployment/package-helm/templates/shared-data-archives-pv.yaml index 3e17545925..4260827682 100644 --- a/tools/deployment/package-helm/templates/shared-data-archives-pv.yaml +++ b/tools/deployment/package-helm/templates/shared-data-archives-pv.yaml @@ -1,8 +1,7 @@ -{{- include "clp.createLocalPv" (dict +{{- include "clp.createStaticPv" (dict "root" . "component_category" "shared-data" "name" "archives" - "nodeRole" "control-plane" "capacity" "50Gi" "accessModes" (list "ReadWriteMany") "hostPath" .Values.clpConfig.archive_output.storage.directory diff --git a/tools/deployment/package-helm/templates/shared-data-streams-pv.yaml b/tools/deployment/package-helm/templates/shared-data-streams-pv.yaml index 8cc2cd8019..d235870390 100644 --- a/tools/deployment/package-helm/templates/shared-data-streams-pv.yaml +++ b/tools/deployment/package-helm/templates/shared-data-streams-pv.yaml @@ -1,8 +1,7 @@ -{{- include "clp.createLocalPv" (dict +{{- include "clp.createStaticPv" (dict "root" . "component_category" "shared-data" "name" "streams" - "nodeRole" "control-plane" "capacity" "20Gi" "accessModes" (list "ReadWriteMany") "hostPath" .Values.clpConfig.stream_output.storage.directory diff --git a/tools/deployment/package-helm/templates/storage-class.yaml b/tools/deployment/package-helm/templates/storage-class.yaml new file mode 100644 index 0000000000..acbb3c4334 --- /dev/null +++ b/tools/deployment/package-helm/templates/storage-class.yaml @@ -0,0 +1,10 @@ +{{- if eq .Values.storage.storageClassName "local-storage" }} +apiVersion: "storage.k8s.io/v1" +kind: "StorageClass" +metadata: + name: "local-storage" + labels: + {{- include "clp.labels" . | nindent 4 }} +provisioner: "kubernetes.io/no-provisioner" +volumeBindingMode: "WaitForFirstConsumer" +{{- end }} diff --git a/tools/deployment/package-helm/templates/webui-deployment.yaml b/tools/deployment/package-helm/templates/webui-deployment.yaml index ec4ae5c13a..78b9d6b0f8 100644 --- a/tools/deployment/package-helm/templates/webui-deployment.yaml +++ b/tools/deployment/package-helm/templates/webui-deployment.yaml @@ -59,11 +59,19 @@ spec: value: "4000" - name: "RATE_LIMIT" value: {{ .Values.clpConfig.webui.rate_limit | quote }} + {{- with .Values.clpConfig.stream_output.storage }} + {{- if and (eq .type "s3") (eq .s3_config.aws_authentication.type "credentials") }} + - name: "AWS_ACCESS_KEY_ID" + value: {{ .s3_config.aws_authentication.credentials.access_key_id | quote }} + - name: "AWS_SECRET_ACCESS_KEY" + value: {{ .s3_config.aws_authentication.credentials.secret_access_key | quote }} + {{- end }}{{/* if and (eq .type "s3") + (eq .s3_config.aws_authentication.type "credentials") */}} + {{- end }}{{/* with .Values.clpConfig.stream_output.storage */}} ports: - name: "webui" containerPort: 4000 volumeMounts: - - {{- include "clp.logsInputVolumeMount" . | nindent 14 }} - name: "client-settings" mountPath: "/opt/clp/var/www/webui/client/settings.json" subPath: "webui-client-settings.json" @@ -72,11 +80,19 @@ spec: mountPath: "/opt/clp/var/www/webui/server/dist/settings.json" subPath: "webui-server-settings.json" readOnly: true + {{- if .Values.clpConfig.aws_config_directory }} + - {{- include "clp.awsConfigVolumeMount" . | nindent 14 }} + {{- end }} + {{- if eq .Values.clpConfig.logs_input.type "fs" }} + - {{- include "clp.logsInputVolumeMount" . | nindent 14 }} + {{- end }} + {{- if eq .Values.clpConfig.stream_output.storage.type "fs" }} - name: {{ include "clp.volumeName" (dict "component_category" "shared-data" "name" "streams" ) | quote }} mountPath: "/var/data/streams" + {{- end }} command: [ "/opt/clp/bin/node-22", "/opt/clp/var/www/webui/server/dist/src/main.js" @@ -89,15 +105,22 @@ spec: {{- include "clp.livenessProbeTimings" . | nindent 12 }} tcpSocket: *webui-health-check volumes: - - {{- include "clp.logsInputVolume" . | nindent 10 }} - - {{- include "clp.pvcVolume" (dict - "root" . - "component_category" "shared-data" - "name" "streams" - ) | nindent 10 }} - name: "client-settings" configMap: name: {{ include "clp.fullname" . }}-config - name: "server-settings" configMap: name: {{ include "clp.fullname" . }}-config + {{- if .Values.clpConfig.aws_config_directory }} + - {{- include "clp.awsConfigVolume" . | nindent 10 }} + {{- end }} + {{- if eq .Values.clpConfig.logs_input.type "fs" }} + - {{- include "clp.logsInputVolume" . | nindent 10 }} + {{- end }} + {{- if eq .Values.clpConfig.stream_output.storage.type "fs" }} + - {{- include "clp.pvcVolume" (dict + "root" . + "component_category" "shared-data" + "name" "streams" + ) | nindent 10 }} + {{- end }} diff --git a/tools/deployment/package-helm/test-multi-dedicated.sh b/tools/deployment/package-helm/test-multi-dedicated.sh new file mode 100755 index 0000000000..d9af9bf821 --- /dev/null +++ b/tools/deployment/package-helm/test-multi-dedicated.sh @@ -0,0 +1,102 @@ +#!/usr/bin/env bash + +# Multi-node cluster test with dedicated worker nodes for each worker type +# Demonstrates nodeSelector scheduling with separate node pools +# +# To clean up after running: +# kind delete cluster --name "${CLUSTER_NAME}" +# rm -rf /tmp/clp + +script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +source "${script_dir}/.test-common.sh" + +CLUSTER_NAME="${CLUSTER_NAME:-clp-test-dedicated}" +NUM_COMPRESSION_NODES="${NUM_COMPRESSION_NODES:-2}" +NUM_QUERY_NODES="${NUM_QUERY_NODES:-2}" +COMPRESSION_WORKER_REPLICAS="${COMPRESSION_WORKER_REPLICAS:-2}" +QUERY_WORKER_REPLICAS="${QUERY_WORKER_REPLICAS:-2}" + +echo "=== Multi-node test with dedicated worker nodes ===" +echo "Cluster: ${CLUSTER_NAME}" +echo "Compression nodes: ${NUM_COMPRESSION_NODES}" +echo "Query nodes: ${NUM_QUERY_NODES}" +echo "Compression workers: ${COMPRESSION_WORKER_REPLICAS}" +echo "Query workers: ${QUERY_WORKER_REPLICAS}" +echo "" + +echo "Deleting existing cluster if present..." +kind delete cluster --name "${CLUSTER_NAME}" 2>/dev/null || true + +rm -rf "$CLP_HOME" +create_clp_directories +download_samples + +total_workers=$((NUM_COMPRESSION_NODES + NUM_QUERY_NODES)) + +echo "Creating kind cluster..." +{ + cat </dev/null || true +sleep 2 +helm install test "${script_dir}" \ + --set "distributed=true" \ + --set "compressionWorker.replicas=${COMPRESSION_WORKER_REPLICAS}" \ + --set "compressionWorker.scheduling.nodeSelector.yscope\.io/nodeType=compression" \ + --set "queryWorker.replicas=${QUERY_WORKER_REPLICAS}" \ + --set "queryWorker.scheduling.nodeSelector.yscope\.io/nodeType=query" + +wait $SAMPLE_DOWNLOAD_PID +echo "Sample download and extraction complete" + +wait_for_pods 300 5 5 diff --git a/tools/deployment/package-helm/test-multi-shared.sh b/tools/deployment/package-helm/test-multi-shared.sh new file mode 100755 index 0000000000..09c52a8bf6 --- /dev/null +++ b/tools/deployment/package-helm/test-multi-shared.sh @@ -0,0 +1,81 @@ +#!/usr/bin/env bash + +# Multi-node cluster test with shared worker nodes +# Both compression and query workers share the same node pool +# +# To clean up after running: +# kind delete cluster --name "${CLUSTER_NAME}" +# rm -rf /tmp/clp + +script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +source "${script_dir}/.test-common.sh" + +CLUSTER_NAME="${CLUSTER_NAME:-clp-test-multi}" +NUM_WORKER_NODES="${NUM_WORKER_NODES:-2}" +COMPRESSION_WORKER_REPLICAS="${COMPRESSION_WORKER_REPLICAS:-2}" +QUERY_WORKER_REPLICAS="${QUERY_WORKER_REPLICAS:-2}" + +echo "=== Multi-node test with shared worker nodes ===" +echo "Cluster: ${CLUSTER_NAME}" +echo "Worker nodes: ${NUM_WORKER_NODES}" +echo "Compression workers: ${COMPRESSION_WORKER_REPLICAS}" +echo "Query workers: ${QUERY_WORKER_REPLICAS}" +echo "" + +echo "Deleting existing cluster if present..." +kind delete cluster --name "${CLUSTER_NAME}" 2>/dev/null || true + +rm -rf "$CLP_HOME" +create_clp_directories +download_samples + +echo "Creating kind cluster..." +{ + cat </dev/null || true +sleep 2 +helm install test "${script_dir}" \ + --set "distributed=true" \ + --set "compressionWorker.replicas=${COMPRESSION_WORKER_REPLICAS}" \ + --set "queryWorker.replicas=${QUERY_WORKER_REPLICAS}" + +wait $SAMPLE_DOWNLOAD_PID +echo "Sample download and extraction complete" + +wait_for_pods 300 5 5 diff --git a/tools/deployment/package-helm/test.sh b/tools/deployment/package-helm/test.sh index 937125c7a2..14f7ee5497 100755 --- a/tools/deployment/package-helm/test.sh +++ b/tools/deployment/package-helm/test.sh @@ -56,10 +56,11 @@ wait_for_pods() { kind delete cluster --name clp-test rm -rf "$CLP_HOME" mkdir -p "$CLP_HOME/var/"{data,log}/{database,queue,redis,results_cache} \ - "$CLP_HOME/var/data/"{archives,streams} \ + "$CLP_HOME/var/data/"{archives,streams,staged-archives,staged-streams} \ "$CLP_HOME/var/log/"{compression_scheduler,compression_worker,user} \ "$CLP_HOME/var/log/"{query_scheduler,query_worker,reducer} \ "$CLP_HOME/var/log/garbage_collector" \ + "$CLP_HOME/var/log/api_server" \ "$CLP_HOME/var/tmp" \ "$CLP_HOME/samples" @@ -94,6 +95,9 @@ cat < Date: Wed, 7 Jan 2026 15:22:25 -0500 Subject: [PATCH 02/15] bump chart version --- tools/deployment/package-helm/Chart.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/deployment/package-helm/Chart.yaml b/tools/deployment/package-helm/Chart.yaml index 192d61a98f..8385e19fae 100644 --- a/tools/deployment/package-helm/Chart.yaml +++ b/tools/deployment/package-helm/Chart.yaml @@ -1,6 +1,6 @@ apiVersion: "v2" name: "clp" -version: "0.1.2-dev.16" +version: "0.1.2-dev.17" description: "A Helm chart for CLP's (Compressed Log Processor) package deployment" type: "application" appVersion: "0.7.1-dev" From 203811ba5f7b065abd5f0bc88199760779129c47 Mon Sep 17 00:00:00 2001 From: Junhao Liao Date: Thu, 8 Jan 2026 14:13:48 -0500 Subject: [PATCH 03/15] Add configurable reducer replicas and scheduling support --- .../templates/reducer-deployment.yaml | 6 +++++- .../package-helm/test-multi-dedicated.sh | 5 ++++- .../deployment/package-helm/test-multi-shared.sh | 5 ++++- tools/deployment/package-helm/values.yaml | 16 ++++++++++++++++ 4 files changed, 29 insertions(+), 3 deletions(-) diff --git a/tools/deployment/package-helm/templates/reducer-deployment.yaml b/tools/deployment/package-helm/templates/reducer-deployment.yaml index 51970785f7..0c8714f59e 100644 --- a/tools/deployment/package-helm/templates/reducer-deployment.yaml +++ b/tools/deployment/package-helm/templates/reducer-deployment.yaml @@ -6,7 +6,7 @@ metadata: {{- include "clp.labels" . | nindent 4 }} app.kubernetes.io/component: "reducer" spec: - replicas: 1 + replicas: {{ .Values.reducer.replicas }} selector: matchLabels: {{- include "clp.selectorLabels" . | nindent 6 }} @@ -20,6 +20,10 @@ spec: serviceAccountName: {{ include "clp.fullname" . }}-job-watcher hostname: {{ include "clp.fullname" . }}-reducer subdomain: {{ include "clp.fullname" . }}-reducer + {{- include "clp.createSchedulingConfigs" (dict + "root" . + "component" "reducer" + ) | nindent 6 }} terminationGracePeriodSeconds: 10 securityContext: runAsUser: {{ .Values.securityContext.firstParty.uid }} diff --git a/tools/deployment/package-helm/test-multi-dedicated.sh b/tools/deployment/package-helm/test-multi-dedicated.sh index d9af9bf821..f23ebbb5dc 100755 --- a/tools/deployment/package-helm/test-multi-dedicated.sh +++ b/tools/deployment/package-helm/test-multi-dedicated.sh @@ -15,6 +15,7 @@ NUM_COMPRESSION_NODES="${NUM_COMPRESSION_NODES:-2}" NUM_QUERY_NODES="${NUM_QUERY_NODES:-2}" COMPRESSION_WORKER_REPLICAS="${COMPRESSION_WORKER_REPLICAS:-2}" QUERY_WORKER_REPLICAS="${QUERY_WORKER_REPLICAS:-2}" +REDUCER_REPLICAS="${REDUCER_REPLICAS:-2}" echo "=== Multi-node test with dedicated worker nodes ===" echo "Cluster: ${CLUSTER_NAME}" @@ -22,6 +23,7 @@ echo "Compression nodes: ${NUM_COMPRESSION_NODES}" echo "Query nodes: ${NUM_QUERY_NODES}" echo "Compression workers: ${COMPRESSION_WORKER_REPLICAS}" echo "Query workers: ${QUERY_WORKER_REPLICAS}" +echo "Reducers: ${REDUCER_REPLICAS}" echo "" echo "Deleting existing cluster if present..." @@ -94,7 +96,8 @@ helm install test "${script_dir}" \ --set "compressionWorker.replicas=${COMPRESSION_WORKER_REPLICAS}" \ --set "compressionWorker.scheduling.nodeSelector.yscope\.io/nodeType=compression" \ --set "queryWorker.replicas=${QUERY_WORKER_REPLICAS}" \ - --set "queryWorker.scheduling.nodeSelector.yscope\.io/nodeType=query" + --set "queryWorker.scheduling.nodeSelector.yscope\.io/nodeType=query" \ + --set "reducer.replicas=${REDUCER_REPLICAS}" wait $SAMPLE_DOWNLOAD_PID echo "Sample download and extraction complete" diff --git a/tools/deployment/package-helm/test-multi-shared.sh b/tools/deployment/package-helm/test-multi-shared.sh index 09c52a8bf6..f6621e2218 100755 --- a/tools/deployment/package-helm/test-multi-shared.sh +++ b/tools/deployment/package-helm/test-multi-shared.sh @@ -14,12 +14,14 @@ CLUSTER_NAME="${CLUSTER_NAME:-clp-test-multi}" NUM_WORKER_NODES="${NUM_WORKER_NODES:-2}" COMPRESSION_WORKER_REPLICAS="${COMPRESSION_WORKER_REPLICAS:-2}" QUERY_WORKER_REPLICAS="${QUERY_WORKER_REPLICAS:-2}" +REDUCER_REPLICAS="${REDUCER_REPLICAS:-2}" echo "=== Multi-node test with shared worker nodes ===" echo "Cluster: ${CLUSTER_NAME}" echo "Worker nodes: ${NUM_WORKER_NODES}" echo "Compression workers: ${COMPRESSION_WORKER_REPLICAS}" echo "Query workers: ${QUERY_WORKER_REPLICAS}" +echo "Reducers: ${REDUCER_REPLICAS}" echo "" echo "Deleting existing cluster if present..." @@ -73,7 +75,8 @@ sleep 2 helm install test "${script_dir}" \ --set "distributed=true" \ --set "compressionWorker.replicas=${COMPRESSION_WORKER_REPLICAS}" \ - --set "queryWorker.replicas=${QUERY_WORKER_REPLICAS}" + --set "queryWorker.replicas=${QUERY_WORKER_REPLICAS}" \ + --set "reducer.replicas=${REDUCER_REPLICAS}" wait $SAMPLE_DOWNLOAD_PID echo "Sample download and extraction complete" diff --git a/tools/deployment/package-helm/values.yaml b/tools/deployment/package-helm/values.yaml index fc0bee0265..eff4a2a614 100644 --- a/tools/deployment/package-helm/values.yaml +++ b/tools/deployment/package-helm/values.yaml @@ -61,6 +61,22 @@ queryWorker: # topologyKey: "kubernetes.io/hostname" # whenUnsatisfiable: "DoNotSchedule" +reducer: + replicas: 1 + # Controls which nodes run reducers + # scheduling: + # nodeSelector: + # yscope.io/nodeType: compute + # tolerations: + # - key: "yscope.io/dedicated" + # operator: "Equal" + # value: "reducer" + # effect: "NoSchedule" + # topologySpreadConstraints: + # - maxSkew: 1 + # topologyKey: "kubernetes.io/hostname" + # whenUnsatisfiable: "DoNotSchedule" + storage: # Name of the StorageClass for PVs and PVCs. # - If "local-storage" (default), the chart creates a StorageClass with WaitForFirstConsumer From 6d2375ec44fb3f498bd90777293061f849bf2625 Mon Sep 17 00:00:00 2001 From: Junhao Liao Date: Thu, 8 Jan 2026 14:55:47 -0500 Subject: [PATCH 04/15] refactor(helm): Simplify log directory definition in test script --- tools/deployment/package-helm/.test-common.sh | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tools/deployment/package-helm/.test-common.sh b/tools/deployment/package-helm/.test-common.sh index 7a9c900ae7..c3b07017cc 100755 --- a/tools/deployment/package-helm/.test-common.sh +++ b/tools/deployment/package-helm/.test-common.sh @@ -61,8 +61,7 @@ create_clp_directories() { "$CLP_HOME/var/data/"{archives,streams,staged-archives,staged-streams} \ "$CLP_HOME/var/log/"{compression_scheduler,compression_worker,user} \ "$CLP_HOME/var/log/"{query_scheduler,query_worker,reducer} \ - "$CLP_HOME/var/log/garbage_collector" \ - "$CLP_HOME/var/log/api_server" \ + "$CLP_HOME/var/log/{api_server,garbage_collector}" \ "$CLP_HOME/var/tmp" \ "$CLP_HOME/samples" } From 3b6003187e28fb1b6fdf55d32fa47daec26cf6af Mon Sep 17 00:00:00 2001 From: Junhao Liao Date: Sat, 10 Jan 2026 01:14:16 -0500 Subject: [PATCH 05/15] improve param component docs - Apply suggestions from code review Co-authored-by: Devin Gibson --- tools/deployment/package-helm/templates/_helpers.tpl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/deployment/package-helm/templates/_helpers.tpl b/tools/deployment/package-helm/templates/_helpers.tpl index 8454f0fa8d..94fc79ee64 100644 --- a/tools/deployment/package-helm/templates/_helpers.tpl +++ b/tools/deployment/package-helm/templates/_helpers.tpl @@ -310,7 +310,7 @@ When distributed is false (single-node mode), a control-plane toleration is auto so pods can be scheduled on tainted control-plane nodes without manual untainting. @param {object} root Root template context -@param {string} component Top-level values key (e.g., "compressionWorker", "queryWorker") +@param {string} component Key name in top-level Values (e.g., "compressionWorker", "queryWorker") @return {string} YAML-formatted scheduling fields (nodeSelector, affinity, tolerations, topologySpreadConstraints) */}} From 553925a6d63853f60cc9bf071bc73f407ca3a6be Mon Sep 17 00:00:00 2001 From: Junhao Liao Date: Sat, 10 Jan 2026 02:41:37 -0500 Subject: [PATCH 06/15] refactor(helm): Rename test scripts to set-up-* Rename test scripts to set-up-* to reflect they set up test environments rather than run tests: - test.sh -> set-up-test.sh - test-multi-dedicated.sh -> set-up-multi-dedicated-test.sh - test-multi-shared.sh -> set-up-multi-shared-test.sh - .test-common.sh -> .set-up-common.sh --- .../package-helm/{.test-common.sh => .set-up-common.sh} | 0 .../{test-multi-dedicated.sh => set-up-multi-dedicated-test.sh} | 0 .../{test-multi-shared.sh => set-up-multi-shared-test.sh} | 0 tools/deployment/package-helm/{test.sh => set-up-test.sh} | 0 4 files changed, 0 insertions(+), 0 deletions(-) rename tools/deployment/package-helm/{.test-common.sh => .set-up-common.sh} (100%) rename tools/deployment/package-helm/{test-multi-dedicated.sh => set-up-multi-dedicated-test.sh} (100%) rename tools/deployment/package-helm/{test-multi-shared.sh => set-up-multi-shared-test.sh} (100%) rename tools/deployment/package-helm/{test.sh => set-up-test.sh} (100%) diff --git a/tools/deployment/package-helm/.test-common.sh b/tools/deployment/package-helm/.set-up-common.sh similarity index 100% rename from tools/deployment/package-helm/.test-common.sh rename to tools/deployment/package-helm/.set-up-common.sh diff --git a/tools/deployment/package-helm/test-multi-dedicated.sh b/tools/deployment/package-helm/set-up-multi-dedicated-test.sh similarity index 100% rename from tools/deployment/package-helm/test-multi-dedicated.sh rename to tools/deployment/package-helm/set-up-multi-dedicated-test.sh diff --git a/tools/deployment/package-helm/test-multi-shared.sh b/tools/deployment/package-helm/set-up-multi-shared-test.sh similarity index 100% rename from tools/deployment/package-helm/test-multi-shared.sh rename to tools/deployment/package-helm/set-up-multi-shared-test.sh diff --git a/tools/deployment/package-helm/test.sh b/tools/deployment/package-helm/set-up-test.sh similarity index 100% rename from tools/deployment/package-helm/test.sh rename to tools/deployment/package-helm/set-up-test.sh From 2b0e863208e145e00623d5ebc6774591e23d69af Mon Sep 17 00:00:00 2001 From: Junhao Liao Date: Sat, 10 Jan 2026 02:41:59 -0500 Subject: [PATCH 07/15] refactor(helm): Extract common utilities and deduplicate scripts - Extract common functions to .set-up-common.sh: - prepare_environment: cluster cleanup and directory setup - generate_kind_config: kind cluster YAML generation - wait_for_cluster_ready: sample download and pod readiness - Refactor set-up-test.sh to use common utilities instead of duplicating code - Unify default cluster name to clp-test across all scripts - Add missing port 30800 mapping to multi-node scripts - Add missing mcp_server log directory - Add shellcheck source directive for static analysis - Move CLP_HOME to individual scripts for better visibility - Remove redundant 2>/dev/null from helm uninstall with --ignore-not-found - Reorder functions in common file by call order --- .../deployment/package-helm/.set-up-common.sh | 119 ++++++++++++++---- .../set-up-multi-dedicated-test.sh | 69 ++-------- .../package-helm/set-up-multi-shared-test.sh | 69 ++-------- tools/deployment/package-helm/set-up-test.sh | 114 +++-------------- 4 files changed, 133 insertions(+), 238 deletions(-) diff --git a/tools/deployment/package-helm/.set-up-common.sh b/tools/deployment/package-helm/.set-up-common.sh index c3b07017cc..ce9f79a04b 100755 --- a/tools/deployment/package-helm/.set-up-common.sh +++ b/tools/deployment/package-helm/.set-up-common.sh @@ -1,13 +1,97 @@ #!/usr/bin/env bash -# Common utilities for Helm chart testing -# Source this file from test scripts +# Common utilities for Helm chart set-up scripts +# Source this file from set-up-*.sh scripts set -o errexit set -o nounset set -o pipefail -CLP_HOME="${CLP_HOME:-/tmp/clp}" +# Creates required directories for CLP data and logs +create_clp_directories() { + echo "Creating CLP directories at ${CLP_HOME}..." + mkdir -p "$CLP_HOME/var/"{data,log}/{database,queue,redis,results_cache} \ + "$CLP_HOME/var/data/"{archives,streams,staged-archives,staged-streams} \ + "$CLP_HOME/var/log/"{compression_scheduler,compression_worker,user} \ + "$CLP_HOME/var/log/"{query_scheduler,query_worker,reducer} \ + "$CLP_HOME/var/log/"{api_server,garbage_collector,mcp_server} \ + "$CLP_HOME/var/tmp" \ + "$CLP_HOME/samples" +} + +# Downloads sample datasets in the background +# Sets SAMPLE_DOWNLOAD_PID global variable +download_samples() { + echo "Downloading sample datasets..." + wget -O - https://zenodo.org/records/10516402/files/postgresql.tar.gz?download=1 \ + | tar xz -C "$CLP_HOME/samples" & + SAMPLE_DOWNLOAD_PID=$! + + # Generate sample log file for garbage collector testing + cat < "$CLP_HOME/samples/test-gc.jsonl" +{"timestamp": $(date +%s%3N), "level": "INFO", "message": "User login successful"} +{"timestamp": $(date +%s%3N), "level": "ERROR", "message": "Database connection failed"} +EOF +} + +# Cleans up existing cluster and prepares environment +# @param {string} cluster_name Name of the kind cluster +prepare_environment() { + local cluster_name=$1 + + echo "Deleting existing cluster if present..." + kind delete cluster --name "${cluster_name}" 2>/dev/null || true + + rm -rf "$CLP_HOME" + create_clp_directories + download_samples +} + +# Generates kind cluster configuration YAML +# +# @param {int} num_workers Number of worker nodes (0 for single-node cluster) +generate_kind_config() { + local num_workers=${1:-0} + + cat < "$CLP_HOME/samples/test-gc.jsonl" -{"timestamp": $(date +%s%3N), "level": "INFO", "message": "User login successful"} -{"timestamp": $(date +%s%3N), "level": "ERROR", "message": "Database connection failed"} -EOF + wait_for_pods 300 5 5 } diff --git a/tools/deployment/package-helm/set-up-multi-dedicated-test.sh b/tools/deployment/package-helm/set-up-multi-dedicated-test.sh index f23ebbb5dc..2322c420b8 100755 --- a/tools/deployment/package-helm/set-up-multi-dedicated-test.sh +++ b/tools/deployment/package-helm/set-up-multi-dedicated-test.sh @@ -1,23 +1,23 @@ #!/usr/bin/env bash -# Multi-node cluster test with dedicated worker nodes for each worker type +# Multi-node cluster setup with dedicated worker nodes for each worker type # Demonstrates nodeSelector scheduling with separate node pools -# -# To clean up after running: -# kind delete cluster --name "${CLUSTER_NAME}" -# rm -rf /tmp/clp +# TODO: Migrate into integration test -script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -source "${script_dir}/.test-common.sh" +script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)" -CLUSTER_NAME="${CLUSTER_NAME:-clp-test-dedicated}" +CLP_HOME="${CLP_HOME:-/tmp/clp}" +CLUSTER_NAME="${CLUSTER_NAME:-clp-test}" NUM_COMPRESSION_NODES="${NUM_COMPRESSION_NODES:-2}" NUM_QUERY_NODES="${NUM_QUERY_NODES:-2}" COMPRESSION_WORKER_REPLICAS="${COMPRESSION_WORKER_REPLICAS:-2}" QUERY_WORKER_REPLICAS="${QUERY_WORKER_REPLICAS:-2}" REDUCER_REPLICAS="${REDUCER_REPLICAS:-2}" -echo "=== Multi-node test with dedicated worker nodes ===" +# shellcheck source=.set-up-common.sh +source "${script_dir}/.set-up-common.sh" + +echo "=== Multi-node setup with dedicated worker nodes ===" echo "Cluster: ${CLUSTER_NAME}" echo "Compression nodes: ${NUM_COMPRESSION_NODES}" echo "Query nodes: ${NUM_QUERY_NODES}" @@ -26,52 +26,12 @@ echo "Query workers: ${QUERY_WORKER_REPLICAS}" echo "Reducers: ${REDUCER_REPLICAS}" echo "" -echo "Deleting existing cluster if present..." -kind delete cluster --name "${CLUSTER_NAME}" 2>/dev/null || true - -rm -rf "$CLP_HOME" -create_clp_directories -download_samples +prepare_environment "${CLUSTER_NAME}" total_workers=$((NUM_COMPRESSION_NODES + NUM_QUERY_NODES)) echo "Creating kind cluster..." -{ - cat </dev/null || true +helm uninstall test --ignore-not-found sleep 2 helm install test "${script_dir}" \ --set "distributed=true" \ @@ -99,7 +59,4 @@ helm install test "${script_dir}" \ --set "queryWorker.scheduling.nodeSelector.yscope\.io/nodeType=query" \ --set "reducer.replicas=${REDUCER_REPLICAS}" -wait $SAMPLE_DOWNLOAD_PID -echo "Sample download and extraction complete" - -wait_for_pods 300 5 5 +wait_for_cluster_ready diff --git a/tools/deployment/package-helm/set-up-multi-shared-test.sh b/tools/deployment/package-helm/set-up-multi-shared-test.sh index f6621e2218..f297d5e5b8 100755 --- a/tools/deployment/package-helm/set-up-multi-shared-test.sh +++ b/tools/deployment/package-helm/set-up-multi-shared-test.sh @@ -1,22 +1,22 @@ #!/usr/bin/env bash -# Multi-node cluster test with shared worker nodes +# Multi-node cluster setup with shared worker nodes # Both compression and query workers share the same node pool -# -# To clean up after running: -# kind delete cluster --name "${CLUSTER_NAME}" -# rm -rf /tmp/clp +# TODO: Migrate into integration test -script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -source "${script_dir}/.test-common.sh" +script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)" -CLUSTER_NAME="${CLUSTER_NAME:-clp-test-multi}" +CLP_HOME="${CLP_HOME:-/tmp/clp}" +CLUSTER_NAME="${CLUSTER_NAME:-clp-test}" NUM_WORKER_NODES="${NUM_WORKER_NODES:-2}" COMPRESSION_WORKER_REPLICAS="${COMPRESSION_WORKER_REPLICAS:-2}" QUERY_WORKER_REPLICAS="${QUERY_WORKER_REPLICAS:-2}" REDUCER_REPLICAS="${REDUCER_REPLICAS:-2}" -echo "=== Multi-node test with shared worker nodes ===" +# shellcheck source=.set-up-common.sh +source "${script_dir}/.set-up-common.sh" + +echo "=== Multi-node setup with shared worker nodes ===" echo "Cluster: ${CLUSTER_NAME}" echo "Worker nodes: ${NUM_WORKER_NODES}" echo "Compression workers: ${COMPRESSION_WORKER_REPLICAS}" @@ -24,53 +24,13 @@ echo "Query workers: ${QUERY_WORKER_REPLICAS}" echo "Reducers: ${REDUCER_REPLICAS}" echo "" -echo "Deleting existing cluster if present..." -kind delete cluster --name "${CLUSTER_NAME}" 2>/dev/null || true - -rm -rf "$CLP_HOME" -create_clp_directories -download_samples +prepare_environment "${CLUSTER_NAME}" echo "Creating kind cluster..." -{ - cat </dev/null || true +helm uninstall test --ignore-not-found sleep 2 helm install test "${script_dir}" \ --set "distributed=true" \ @@ -78,7 +38,4 @@ helm install test "${script_dir}" \ --set "queryWorker.replicas=${QUERY_WORKER_REPLICAS}" \ --set "reducer.replicas=${REDUCER_REPLICAS}" -wait $SAMPLE_DOWNLOAD_PID -echo "Sample download and extraction complete" - -wait_for_pods 300 5 5 +wait_for_cluster_ready diff --git a/tools/deployment/package-helm/set-up-test.sh b/tools/deployment/package-helm/set-up-test.sh index 474e1f1ab3..f8290c12a2 100755 --- a/tools/deployment/package-helm/set-up-test.sh +++ b/tools/deployment/package-helm/set-up-test.sh @@ -1,112 +1,28 @@ #!/usr/bin/env bash +# Single-node cluster setup for testing # TODO: Migrate into integration test -set -o errexit -set -o nounset -set -o pipefail +script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)" -CLP_HOME="/tmp/clp" +CLP_HOME="${CLP_HOME:-/tmp/clp}" +CLUSTER_NAME="${CLUSTER_NAME:-clp-test}" -# Waits for all jobs to complete and all non-job pods to be ready. -# -# @param {int} timeout_seconds Overall timeout in seconds -# @param {int} poll_interval_seconds Interval between status checks -# @param {int} wait_timeout_seconds Timeout for each kubectl wait call -# @return {int} 0 on success, 1 on timeout -wait_for_pods() { - local timeout_seconds=$1 - local poll_interval_seconds=$2 - local wait_timeout_seconds=$3 +# shellcheck source=.set-up-common.sh +source "${script_dir}/.set-up-common.sh" - echo "Waiting for all pods to be ready" \ - "(timeout=${timeout_seconds}s, poll=${poll_interval_seconds}s," \ - "wait=${wait_timeout_seconds}s)..." +echo "=== Single-node setup ===" +echo "Cluster: ${CLUSTER_NAME}" +echo "" - # Reset bash built-in SECONDS counter - SECONDS=0 +prepare_environment "${CLUSTER_NAME}" - while true; do - sleep "${poll_interval_seconds}" - kubectl get pods - - if kubectl wait job \ - --all \ - --for=condition=Complete \ - --timeout="${wait_timeout_seconds}s" 2>/dev/null \ - && kubectl wait pods \ - --all \ - --selector='!job-name' \ - --for=condition=Ready \ - --timeout="${wait_timeout_seconds}s" 2>/dev/null - then - echo "All jobs completed and services are ready." - return 0 - fi - - if [[ ${SECONDS} -ge ${timeout_seconds} ]]; then - echo "ERROR: Timed out waiting for pods to be ready" - return 1 - fi - - echo "---" - done -} - -kind delete cluster --name clp-test -rm -rf "$CLP_HOME" -mkdir -p "$CLP_HOME/var/"{data,log}/{database,queue,redis,results_cache} \ - "$CLP_HOME/var/data/"{archives,streams,staged-archives,staged-streams} \ - "$CLP_HOME/var/log/"{compression_scheduler,compression_worker,user} \ - "$CLP_HOME/var/log/"{query_scheduler,query_worker,reducer} \ - "$CLP_HOME/var/log/"{api_server,garbage_collector,mcp_server} \ - "$CLP_HOME/var/tmp" \ - "$CLP_HOME/samples" - -# Download sample datasets in the background -wget -O - https://zenodo.org/records/10516402/files/postgresql.tar.gz?download=1 \ - | tar xz -C "$CLP_HOME/samples" & -SAMPLE_DOWNLOAD_PID=$! - -# Generate sample log file for garbage collector testing. -cat < /tmp/clp/samples/test-gc.jsonl -{"timestamp": $(date +%s%3N), "level": "INFO", "message": "User login successful"} -{"timestamp": $(date +%s%3N), "level": "ERROR", "message": "Database connection failed"} -EOF - -cat < Date: Sat, 10 Jan 2026 02:46:20 -0500 Subject: [PATCH 08/15] fix(helm): Handle sample download failure in wait_for_cluster_ready --- tools/deployment/package-helm/.set-up-common.sh | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tools/deployment/package-helm/.set-up-common.sh b/tools/deployment/package-helm/.set-up-common.sh index ce9f79a04b..3eb17fa05e 100755 --- a/tools/deployment/package-helm/.set-up-common.sh +++ b/tools/deployment/package-helm/.set-up-common.sh @@ -140,8 +140,12 @@ wait_for_pods() { # Waits for sample download to complete and all pods to be ready wait_for_cluster_ready() { - wait "$SAMPLE_DOWNLOAD_PID" - echo "Sample download and extraction complete" + if wait "$SAMPLE_DOWNLOAD_PID"; then + echo "Sample download and extraction complete" + else + echo "ERROR: Sample download failed" + return 1 + fi wait_for_pods 300 5 5 } From bdc0207d0815bd6624844201df912dc73851e1a9 Mon Sep 17 00:00:00 2001 From: Junhao Liao Date: Mon, 12 Jan 2026 17:11:05 -0500 Subject: [PATCH 09/15] bump chart version --- tools/deployment/package-helm/Chart.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/deployment/package-helm/Chart.yaml b/tools/deployment/package-helm/Chart.yaml index 8385e19fae..1805a262a3 100644 --- a/tools/deployment/package-helm/Chart.yaml +++ b/tools/deployment/package-helm/Chart.yaml @@ -1,6 +1,6 @@ apiVersion: "v2" name: "clp" -version: "0.1.2-dev.17" +version: "0.1.2-dev.18" description: "A Helm chart for CLP's (Compressed Log Processor) package deployment" type: "application" appVersion: "0.7.1-dev" From d642a4811155c2192dd323a9ae5875b2ba38a77b Mon Sep 17 00:00:00 2001 From: Junhao Liao Date: Mon, 12 Jan 2026 17:21:51 -0500 Subject: [PATCH 10/15] refactor(helm): Rename `distributed` to `distributed_deployment` for clarity --- .../package-helm/templates/_helpers.tpl | 17 +++++++++-------- tools/deployment/package-helm/values.yaml | 12 ++++++------ 2 files changed, 15 insertions(+), 14 deletions(-) diff --git a/tools/deployment/package-helm/templates/_helpers.tpl b/tools/deployment/package-helm/templates/_helpers.tpl index 94fc79ee64..84b465645d 100644 --- a/tools/deployment/package-helm/templates/_helpers.tpl +++ b/tools/deployment/package-helm/templates/_helpers.tpl @@ -113,9 +113,10 @@ Used for: {{/* Creates a PersistentVolume that does not use dynamic provisioning. -Behavior depends on the `distributed` value: -- distributed=false: Uses local volume type with node affinity targeting control-plane nodes -- distributed=true: Uses hostPath without node affinity (assumes shared storage like NFS) +Behavior depends on the `distributed_deployment` value: +- distributed_deployment=false: Uses local volume type with node affinity targeting control-plane + nodes +- distributed_deployment=true: Uses hostPath without node affinity (assumes shared storage like NFS) @param {object} root Root template context @param {string} component_category (e.g., "database", "shared-data") @@ -139,7 +140,7 @@ spec: accessModes: {{ .accessModes }} persistentVolumeReclaimPolicy: "Retain" storageClassName: {{ .root.Values.storage.storageClassName | quote }} - {{- if .root.Values.distributed }} + {{- if .root.Values.distributed_deployment }} hostPath: path: {{ .hostPath | quote }} type: "DirectoryOrCreate" @@ -152,7 +153,7 @@ spec: - matchExpressions: - key: "node-role.kubernetes.io/control-plane" operator: "Exists" - {{- end }}{{/* if .root.Values.distributed */}} + {{- end }}{{/* if .root.Values.distributed_deployment */}} {{- end }}{{/* define "clp.createStaticPv" */}} {{/* @@ -306,8 +307,8 @@ command: [ Creates scheduling configuration (nodeSelector, affinity, tolerations, topologySpreadConstraints) for a component. -When distributed is false (single-node mode), a control-plane toleration is automatically added -so pods can be scheduled on tainted control-plane nodes without manual untainting. +When distributed_deployment is false (single-node mode), a control-plane toleration is automatically +added so pods can be scheduled on tainted control-plane nodes without manual untainting. @param {object} root Root template context @param {string} component Key name in top-level Values (e.g., "compressionWorker", "queryWorker") @@ -318,7 +319,7 @@ so pods can be scheduled on tainted control-plane nodes without manual untaintin {{- $componentConfig := index .root.Values .component | default dict -}} {{- $scheduling := $componentConfig.scheduling | default dict -}} {{- $tolerations := $scheduling.tolerations | default list -}} -{{- if not .root.Values.distributed -}} +{{- if not .root.Values.distributed_deployment -}} {{- $tolerations = append $tolerations (dict "key" "node-role.kubernetes.io/control-plane" "operator" "Exists" diff --git a/tools/deployment/package-helm/values.yaml b/tools/deployment/package-helm/values.yaml index eff4a2a614..9c60b39583 100644 --- a/tools/deployment/package-helm/values.yaml +++ b/tools/deployment/package-helm/values.yaml @@ -19,12 +19,12 @@ image: tag: "main" # Deployment mode: -# - distributed=false: Single-node deployment. PVs use local volumes bound to one node. Pods -# automatically tolerate control-plane taints. Only works with worker replicas=1. -# - distributed=true: Multi-node deployment. PVs use hostPath without node affinity, assuming -# unmanaged shared storage (e.g., NFS/CephFS mounted via /etc/fstab) at the same path on all -# nodes. -distributed: false +# - distributed_deployment=false: Single-node deployment. PVs use local volumes bound to one node. +# Pods automatically tolerate control-plane taints. Only works with worker replicas=1. +# - distributed_deployment=true: Multi-node deployment. PVs use hostPath without node affinity, +# assuming unmanaged shared storage (e.g., NFS/CephFS mounted via /etc/fstab) at the same path on +# all nodes. +distributed_deployment: false # Number of concurrent processes per worker pod. workerConcurrency: 8 From f64d4c01242c060d614a680d057a24868dd265a6 Mon Sep 17 00:00:00 2001 From: Junhao Liao Date: Mon, 12 Jan 2026 17:45:16 -0500 Subject: [PATCH 11/15] refactor(helm): Rename `distributed_deployment` to `distributedDeployment` for consistency with naming conventions --- .../package-helm/set-up-multi-dedicated-test.sh | 2 +- .../package-helm/set-up-multi-shared-test.sh | 2 +- .../deployment/package-helm/templates/_helpers.tpl | 14 +++++++------- tools/deployment/package-helm/values.yaml | 6 +++--- 4 files changed, 12 insertions(+), 12 deletions(-) diff --git a/tools/deployment/package-helm/set-up-multi-dedicated-test.sh b/tools/deployment/package-helm/set-up-multi-dedicated-test.sh index 2322c420b8..2e8ab3c23d 100755 --- a/tools/deployment/package-helm/set-up-multi-dedicated-test.sh +++ b/tools/deployment/package-helm/set-up-multi-dedicated-test.sh @@ -52,7 +52,7 @@ echo "Installing Helm chart..." helm uninstall test --ignore-not-found sleep 2 helm install test "${script_dir}" \ - --set "distributed=true" \ + --set "distributedDeployment=true" \ --set "compressionWorker.replicas=${COMPRESSION_WORKER_REPLICAS}" \ --set "compressionWorker.scheduling.nodeSelector.yscope\.io/nodeType=compression" \ --set "queryWorker.replicas=${QUERY_WORKER_REPLICAS}" \ diff --git a/tools/deployment/package-helm/set-up-multi-shared-test.sh b/tools/deployment/package-helm/set-up-multi-shared-test.sh index f297d5e5b8..766e836ac3 100755 --- a/tools/deployment/package-helm/set-up-multi-shared-test.sh +++ b/tools/deployment/package-helm/set-up-multi-shared-test.sh @@ -33,7 +33,7 @@ echo "Installing Helm chart..." helm uninstall test --ignore-not-found sleep 2 helm install test "${script_dir}" \ - --set "distributed=true" \ + --set "distributedDeployment=true" \ --set "compressionWorker.replicas=${COMPRESSION_WORKER_REPLICAS}" \ --set "queryWorker.replicas=${QUERY_WORKER_REPLICAS}" \ --set "reducer.replicas=${REDUCER_REPLICAS}" diff --git a/tools/deployment/package-helm/templates/_helpers.tpl b/tools/deployment/package-helm/templates/_helpers.tpl index 84b465645d..c148c04349 100644 --- a/tools/deployment/package-helm/templates/_helpers.tpl +++ b/tools/deployment/package-helm/templates/_helpers.tpl @@ -113,10 +113,10 @@ Used for: {{/* Creates a PersistentVolume that does not use dynamic provisioning. -Behavior depends on the `distributed_deployment` value: -- distributed_deployment=false: Uses local volume type with node affinity targeting control-plane +Behavior depends on the `distributedDeployment` value: +- distributedDeployment=false: Uses local volume type with node affinity targeting control-plane nodes -- distributed_deployment=true: Uses hostPath without node affinity (assumes shared storage like NFS) +- distributedDeployment=true: Uses hostPath without node affinity (assumes shared storage like NFS) @param {object} root Root template context @param {string} component_category (e.g., "database", "shared-data") @@ -140,7 +140,7 @@ spec: accessModes: {{ .accessModes }} persistentVolumeReclaimPolicy: "Retain" storageClassName: {{ .root.Values.storage.storageClassName | quote }} - {{- if .root.Values.distributed_deployment }} + {{- if .root.Values.distributedDeployment }} hostPath: path: {{ .hostPath | quote }} type: "DirectoryOrCreate" @@ -153,7 +153,7 @@ spec: - matchExpressions: - key: "node-role.kubernetes.io/control-plane" operator: "Exists" - {{- end }}{{/* if .root.Values.distributed_deployment */}} + {{- end }}{{/* if .root.Values.distributedDeployment */}} {{- end }}{{/* define "clp.createStaticPv" */}} {{/* @@ -307,7 +307,7 @@ command: [ Creates scheduling configuration (nodeSelector, affinity, tolerations, topologySpreadConstraints) for a component. -When distributed_deployment is false (single-node mode), a control-plane toleration is automatically +When distributedDeployment is false (single-node mode), a control-plane toleration is automatically added so pods can be scheduled on tainted control-plane nodes without manual untainting. @param {object} root Root template context @@ -319,7 +319,7 @@ added so pods can be scheduled on tainted control-plane nodes without manual unt {{- $componentConfig := index .root.Values .component | default dict -}} {{- $scheduling := $componentConfig.scheduling | default dict -}} {{- $tolerations := $scheduling.tolerations | default list -}} -{{- if not .root.Values.distributed_deployment -}} +{{- if not .root.Values.distributedDeployment -}} {{- $tolerations = append $tolerations (dict "key" "node-role.kubernetes.io/control-plane" "operator" "Exists" diff --git a/tools/deployment/package-helm/values.yaml b/tools/deployment/package-helm/values.yaml index 4407e3c676..8ebba945d7 100644 --- a/tools/deployment/package-helm/values.yaml +++ b/tools/deployment/package-helm/values.yaml @@ -19,12 +19,12 @@ image: tag: "main" # Deployment mode: -# - distributed_deployment=false: Single-node deployment. PVs use local volumes bound to one node. +# - distributedDeployment=false: Single-node deployment. PVs use local volumes bound to one node. # Pods automatically tolerate control-plane taints. Only works with worker replicas=1. -# - distributed_deployment=true: Multi-node deployment. PVs use hostPath without node affinity, +# - distributedDeployment=true: Multi-node deployment. PVs use hostPath without node affinity, # assuming unmanaged shared storage (e.g., NFS/CephFS mounted via /etc/fstab) at the same path on # all nodes. -distributed_deployment: false +distributedDeployment: false # Number of concurrent processes per worker pod. workerConcurrency: 8 From 84c951b50bb7ec3255d6092b56d0dd28e712f196 Mon Sep 17 00:00:00 2001 From: Junhao Liao Date: Mon, 12 Jan 2026 17:46:34 -0500 Subject: [PATCH 12/15] refactor(helm): Replace `createLocalPv` with `createStaticPv` for log PV templates --- .../package-helm/templates/log-ingestor-logs-pv.yaml | 3 +-- .../deployment/package-helm/templates/mcp-server-logs-pv.yaml | 3 +-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/tools/deployment/package-helm/templates/log-ingestor-logs-pv.yaml b/tools/deployment/package-helm/templates/log-ingestor-logs-pv.yaml index 6ed940bc0c..227ac47988 100644 --- a/tools/deployment/package-helm/templates/log-ingestor-logs-pv.yaml +++ b/tools/deployment/package-helm/templates/log-ingestor-logs-pv.yaml @@ -1,9 +1,8 @@ {{- if .Values.clpConfig.log_ingestor }} -{{- include "clp.createLocalPv" (dict +{{- include "clp.createStaticPv" (dict "root" . "component_category" "log-ingestor" "name" "logs" - "nodeRole" "control-plane" "capacity" "10Gi" "accessModes" (list "ReadWriteOnce") "hostPath" (printf "%s/log_ingestor" .Values.clpConfig.logs_directory) diff --git a/tools/deployment/package-helm/templates/mcp-server-logs-pv.yaml b/tools/deployment/package-helm/templates/mcp-server-logs-pv.yaml index 9c53c9c7f1..fc297f5020 100644 --- a/tools/deployment/package-helm/templates/mcp-server-logs-pv.yaml +++ b/tools/deployment/package-helm/templates/mcp-server-logs-pv.yaml @@ -1,9 +1,8 @@ {{- if .Values.clpConfig.mcp_server }} -{{- include "clp.createLocalPv" (dict +{{- include "clp.createStaticPv" (dict "root" . "component_category" "mcp-server" "name" "logs" - "nodeRole" "control-plane" "capacity" "5Gi" "accessModes" (list "ReadWriteOnce") "hostPath" (printf "%s/mcp_server" .Values.clpConfig.logs_directory) From 68d7ca4826346f09268e82e854ad3febcd763740 Mon Sep 17 00:00:00 2001 From: Kirk Rodrigues <2454684+kirkrodrigues@users.noreply.github.com> Date: Tue, 13 Jan 2026 20:33:22 -0500 Subject: [PATCH 13/15] Minor edits for clarity. --- tools/deployment/package-helm/values.yaml | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/tools/deployment/package-helm/values.yaml b/tools/deployment/package-helm/values.yaml index 8ebba945d7..b392232a24 100644 --- a/tools/deployment/package-helm/values.yaml +++ b/tools/deployment/package-helm/values.yaml @@ -19,11 +19,13 @@ image: tag: "main" # Deployment mode: -# - distributedDeployment=false: Single-node deployment. PVs use local volumes bound to one node. -# Pods automatically tolerate control-plane taints. Only works with worker replicas=1. -# - distributedDeployment=true: Multi-node deployment. PVs use hostPath without node affinity, -# assuming unmanaged shared storage (e.g., NFS/CephFS mounted via /etc/fstab) at the same path on -# all nodes. +# - distributedDeployment=false indicates a single-node deployment. In this case: +# - PVs use local storage bound to a single node. +# - Pods automatically tolerate control-plane taints. +# - Deployments only work with 1 replica for each worker. +# - distributedDeployment=true indicates a multi-node deployment. In this case: +# - PVs use `hostPath` without node affinity, assuming that `hostPath` is on unmanaged shared +# storage (e.g., NFS/CephFS mounted via /etc/fstab) at the same path on all nodes. distributedDeployment: false # Number of concurrent processes per worker pod. @@ -79,8 +81,9 @@ reducer: storage: # Name of the StorageClass for PVs and PVCs. - # - If "local-storage" (default), the chart creates a StorageClass with WaitForFirstConsumer - # - If a different name, the StorageClass must already exist in your cluster + # - "local-storage" (default) indicates the chart should create a StorageClass with + # WaitForFirstConsumer. + # - A different name indicates a StorageClass which must already exist in your cluster. storageClassName: "local-storage" clpConfig: From 4a87d799426e400fabf8306f7ecbdfad60acb9e8 Mon Sep 17 00:00:00 2001 From: Junhao Liao Date: Wed, 14 Jan 2026 15:46:11 -0500 Subject: [PATCH 14/15] improve values.yaml docstrings - Apply suggestions from code review Co-authored-by: kirkrodrigues <2454684+kirkrodrigues@users.noreply.github.com> --- tools/deployment/package-helm/values.yaml | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/tools/deployment/package-helm/values.yaml b/tools/deployment/package-helm/values.yaml index b392232a24..710563209f 100644 --- a/tools/deployment/package-helm/values.yaml +++ b/tools/deployment/package-helm/values.yaml @@ -18,15 +18,14 @@ image: pullPolicy: "Always" tag: "main" -# Deployment mode: -# - distributedDeployment=false indicates a single-node deployment. In this case: +# - If false: Single-node deployment. # - PVs use local storage bound to a single node. # - Pods automatically tolerate control-plane taints. -# - Deployments only work with 1 replica for each worker. -# - distributedDeployment=true indicates a multi-node deployment. In this case: -# - PVs use `hostPath` without node affinity, assuming that `hostPath` is on unmanaged shared -# storage (e.g., NFS/CephFS mounted via /etc/fstab) at the same path on all nodes. -distributedDeployment: false +# - Each worker deployment supports only 1 replica. +# - If true: Multi-node deployment. +# - PVs use a path on the host (`hostPath`) without node affinity. +# - `hostPath` must point to externally managed shared storage (e.g., NFS/CephFS) mounted at the +# same path on all nodes. # Number of concurrent processes per worker pod. workerConcurrency: 8 @@ -81,9 +80,9 @@ reducer: storage: # Name of the StorageClass for PVs and PVCs. - # - "local-storage" (default) indicates the chart should create a StorageClass with - # WaitForFirstConsumer. - # - A different name indicates a StorageClass which must already exist in your cluster. + # - If set to "local-storage" (default), the chart will create a StorageClass + # with volumeBindingMode=WaitForFirstConsumer. + # - If set to any other value, that StorageClass must already exist in the cluster. storageClassName: "local-storage" clpConfig: From 107bed962c61a0857d06cf46ddd84994e9b334ef Mon Sep 17 00:00:00 2001 From: Junhao Liao Date: Wed, 14 Jan 2026 16:22:42 -0500 Subject: [PATCH 15/15] Add missing`distributedDeployment` option to values.yaml --- tools/deployment/package-helm/values.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/deployment/package-helm/values.yaml b/tools/deployment/package-helm/values.yaml index 710563209f..2ebde3e41b 100644 --- a/tools/deployment/package-helm/values.yaml +++ b/tools/deployment/package-helm/values.yaml @@ -26,6 +26,7 @@ image: # - PVs use a path on the host (`hostPath`) without node affinity. # - `hostPath` must point to externally managed shared storage (e.g., NFS/CephFS) mounted at the # same path on all nodes. +distributedDeployment: false # Number of concurrent processes per worker pod. workerConcurrency: 8