y-scope · junhaoliao · Jan 14, 2026 · Dec 26, 2025 · Jan 7, 2026 · Jan 7, 2026
@@ -0,0 +1,154 @@
+#!/usr/bin/env bash
+
+# Common utilities for Helm chart set-up scripts
+# Source this file from set-up-*.sh scripts
+
+set -o errexit
+set -o nounset
+set -o pipefail
+
+# Creates required directories for CLP data and logs
+create_clp_directories() {
+    echo "Creating CLP directories at ${CLP_HOME}..."
+    mkdir -p  "$CLP_HOME/var/"{data,log}/{database,queue,redis,results_cache} \
+              "$CLP_HOME/var/data/"{archives,streams,staged-archives,staged-streams} \
+              "$CLP_HOME/var/log/"{compression_scheduler,compression_worker,user} \
+              "$CLP_HOME/var/log/"{query_scheduler,query_worker,reducer} \
+              "$CLP_HOME/var/log/"{api_server,garbage_collector,log_ingestor,mcp_server} \
+              "$CLP_HOME/var/tmp" \
+              "$CLP_HOME/samples"
+}
+
+# Downloads sample datasets in the background
+# Sets SAMPLE_DOWNLOAD_PID global variable
+download_samples() {
+    echo "Downloading sample datasets..."
+    wget -O - https://zenodo.org/records/10516402/files/postgresql.tar.gz?download=1 \
+      | tar xz -C "$CLP_HOME/samples" &
+    SAMPLE_DOWNLOAD_PID=$!
-download_samples() {
-    echo "Downloading sample datasets..."
-    wget -O - https://zenodo.org/records/10516402/files/postgresql.tar.gz?download=1 \
-      | tar xz -C "$CLP_HOME/samples" &
-    SAMPLE_DOWNLOAD_PID=$!
+download_samples() {
+    echo "Downloading sample datasets..."
+    wget --timeout=60 --tries=3 -q -O - \
+        https://zenodo.org/records/10516402/files/postgresql.tar.gz?download=1 \
+      | tar xz -C "$CLP_HOME/samples" &
+    SAMPLE_DOWNLOAD_PID=$!
-download_samples() {
-    echo "Downloading sample datasets..."
-    wget -O - https://zenodo.org/records/10516402/files/postgresql.tar.gz?download=1 \
-      | tar xz -C "$CLP_HOME/samples" &
-    SAMPLE_DOWNLOAD_PID=$!
+download_samples() {
+    echo "Downloading sample datasets..."
+    wget --timeout=60 --tries=3 -q -O - \
+        https://zenodo.org/records/10516402/files/postgresql.tar.gz?download=1 \
+      | tar xz -C "$CLP_HOME/samples" &
+    SAMPLE_DOWNLOAD_PID=$!
+
+    # Generate sample log file for garbage collector testing
+    cat <<EOF > "$CLP_HOME/samples/test-gc.jsonl"
+{"timestamp": $(date +%s%3N), "level": "INFO", "message": "User login successful"}
+{"timestamp": $(date +%s%3N), "level": "ERROR", "message": "Database connection failed"}
+EOF
-    cat <<EOF > "$CLP_HOME/samples/test-gc.jsonl"
-{"timestamp": $(date +%s%3N), "level": "INFO", "message": "User login successful"}
-{"timestamp": $(date +%s%3N), "level": "ERROR", "message": "Database connection failed"}
-EOF
+    cat <<EOF > "$CLP_HOME/samples/test-gc.jsonl"
+{"timestamp": $(date +%s)000, "level": "INFO", "message": "User login successful"}
+{"timestamp": $(date +%s)000, "level": "ERROR", "message": "Database connection failed"}
+EOF
-    cat <<EOF > "$CLP_HOME/samples/test-gc.jsonl"
-{"timestamp": $(date +%s%3N), "level": "INFO", "message": "User login successful"}
-{"timestamp": $(date +%s%3N), "level": "ERROR", "message": "Database connection failed"}
-EOF
+    cat <<EOF > "$CLP_HOME/samples/test-gc.jsonl"
+{"timestamp": $(date +%s)000, "level": "INFO", "message": "User login successful"}
+{"timestamp": $(date +%s)000, "level": "ERROR", "message": "Database connection failed"}
+EOF
+}
+
+# Cleans up existing cluster and prepares environment
+# @param {string} cluster_name Name of the kind cluster
+prepare_environment() {
+    local cluster_name=$1
+
+    echo "Deleting existing cluster if present..."
+    kind delete cluster --name "${cluster_name}" 2>/dev/null || true
+
+    rm -rf "$CLP_HOME"
+    create_clp_directories
-
-    rm -rf "$CLP_HOME"
-    create_clp_directories
+
+    if [[ -z "${CLP_HOME}" || "${CLP_HOME}" == "/" ]]; then
+        echo "ERROR: CLP_HOME is empty or set to root"
+        return 1
+    fi
+
+    rm -rf "$CLP_HOME"
+    create_clp_directories
-
-    rm -rf "$CLP_HOME"
-    create_clp_directories
+
+    if [[ -z "${CLP_HOME}" || "${CLP_HOME}" == "/" ]]; then
+        echo "ERROR: CLP_HOME is empty or set to root"
+        return 1
+    fi
+
+    rm -rf "$CLP_HOME"
+    create_clp_directories
+    download_samples
+}
+
+# Generates kind cluster configuration YAML
+#
+# @param {int} num_workers Number of worker nodes (0 for single-node cluster)
+generate_kind_config() {
+    local num_workers=${1:-0}
+
+    cat <<EOF
+kind: Cluster
+apiVersion: kind.x-k8s.io/v1alpha4
+nodes:
+- role: control-plane
+  extraMounts:
+  - hostPath: /home
+    containerPath: /home
+  - hostPath: $CLP_HOME
+    containerPath: $CLP_HOME
+  extraPortMappings:
+  - containerPort: 30000
+    hostPort: 30000
+    protocol: TCP
+  - containerPort: 30017
+    hostPort: 30017
+    protocol: TCP
+  - containerPort: 30301
+    hostPort: 30301
+    protocol: TCP
+  - containerPort: 30302
+    hostPort: 30302
+    protocol: TCP
+  - containerPort: 30306
+    hostPort: 30306
+    protocol: TCP
+  - containerPort: 30800
+    hostPort: 30800
+    protocol: TCP
+EOF
+
+    for ((i = 0; i < num_workers; i++)); do
+        cat <<EOF
+- role: worker
+  extraMounts:
+  - hostPath: /home
+    containerPath: /home
+  - hostPath: $CLP_HOME
+    containerPath: $CLP_HOME
+EOF
+    done
+}
+
+# Waits for all jobs to complete and all non-job pods to be ready.
+#
+# @param {int} timeout_seconds Overall timeout in seconds
+# @param {int} poll_interval_seconds Interval between status checks
+# @param {int} wait_timeout_seconds Timeout for each kubectl wait call
+# @return {int} 0 on success, 1 on timeout
+wait_for_pods() {
+    local timeout_seconds=$1
+    local poll_interval_seconds=$2
+    local wait_timeout_seconds=$3
+
+    echo "Waiting for all pods to be ready" \
+        "(timeout=${timeout_seconds}s, poll=${poll_interval_seconds}s," \
+        "wait=${wait_timeout_seconds}s)..."
+
+    # Reset bash built-in SECONDS counter
+    SECONDS=0
+
+    while true; do
+        sleep "${poll_interval_seconds}"
+        kubectl get pods
+
+        if kubectl wait job \
+                --all \
+                --for=condition=Complete \
+                --timeout="${wait_timeout_seconds}s" 2>/dev/null \
+            && kubectl wait pods \
+                --all \
+                --selector='!job-name' \
+                --for=condition=Ready \
+                --timeout="${wait_timeout_seconds}s" 2>/dev/null
+        then
+            echo "All jobs completed and services are ready."
+            return 0
+        fi
+
+        if [[ ${SECONDS} -ge ${timeout_seconds} ]]; then
+            echo "ERROR: Timed out waiting for pods to be ready"
+            return 1
+        fi
+
+        echo "---"
+    done
-    while true; do
-        sleep "${poll_interval_seconds}"
-        kubectl get pods
-
-        if kubectl wait job \
-                --all \
-                --for=condition=Complete \
-                --timeout="${wait_timeout_seconds}s" 2>/dev/null \
-            && kubectl wait pods \
-                --all \
-                --selector='!job-name' \
-                --for=condition=Ready \
-                --timeout="${wait_timeout_seconds}s" 2>/dev/null
-        then
-            echo "All jobs completed and services are ready."
-            return 0
-        fi
-
-        if [[ ${SECONDS} -ge ${timeout_seconds} ]]; then
-            echo "ERROR: Timed out waiting for pods to be ready"
-            return 1
-        fi
-
-        echo "---"
-    done
+    while true; do
+        kubectl get pods
+
+        if kubectl wait job \
+                --all \
+                --for=condition=Complete \
+                --timeout="${wait_timeout_seconds}s" 2>/dev/null \
+            && kubectl wait pods \
+                --all \
+                --selector='!job-name' \
+                --for=condition=Ready \
+                --timeout="${wait_timeout_seconds}s" 2>/dev/null
+        then
+            echo "All jobs completed and services are ready."
+            return 0
+        fi
+
+        if [[ ${SECONDS} -ge ${timeout_seconds} ]]; then
+            echo "ERROR: Timed out waiting for pods to be ready"
+            return 1
+        fi
+
+        echo "---"
+        sleep "${poll_interval_seconds}"
+    done
-    while true; do
-        sleep "${poll_interval_seconds}"
-        kubectl get pods
-
-        if kubectl wait job \
-                --all \
-                --for=condition=Complete \
-                --timeout="${wait_timeout_seconds}s" 2>/dev/null \
-            && kubectl wait pods \
-                --all \
-                --selector='!job-name' \
-                --for=condition=Ready \
-                --timeout="${wait_timeout_seconds}s" 2>/dev/null
-        then
-            echo "All jobs completed and services are ready."
-            return 0
-        fi
-
-        if [[ ${SECONDS} -ge ${timeout_seconds} ]]; then
-            echo "ERROR: Timed out waiting for pods to be ready"
-            return 1
-        fi
-
-        echo "---"
-    done
+    while true; do
+        kubectl get pods
+
+        if kubectl wait job \
+                --all \
+                --for=condition=Complete \
+                --timeout="${wait_timeout_seconds}s" 2>/dev/null \
+            && kubectl wait pods \
+                --all \
+                --selector='!job-name' \
+                --for=condition=Ready \
+                --timeout="${wait_timeout_seconds}s" 2>/dev/null
+        then
+            echo "All jobs completed and services are ready."
+            return 0
+        fi
+
+        if [[ ${SECONDS} -ge ${timeout_seconds} ]]; then
+            echo "ERROR: Timed out waiting for pods to be ready"
+            return 1
+        fi
+
+        echo "---"
+        sleep "${poll_interval_seconds}"
+    done
+}
-wait_for_pods() {
-    local timeout_seconds=$1
-    local poll_interval_seconds=$2
-    local wait_timeout_seconds=$3
-
-    echo "Waiting for all pods to be ready" \
-        "(timeout=${timeout_seconds}s, poll=${poll_interval_seconds}s," \
-        "wait=${wait_timeout_seconds}s)..."
-
-    # Reset bash built-in SECONDS counter
-    SECONDS=0
-
-    while true; do
-        sleep "${poll_interval_seconds}"
-        kubectl get pods
-
-        if kubectl wait job \
-                --all \
-                --for=condition=Complete \
-                --timeout="${wait_timeout_seconds}s" 2>/dev/null \
-            && kubectl wait pods \
-                --all \
-                --selector='!job-name' \
-                --for=condition=Ready \
-                --timeout="${wait_timeout_seconds}s" 2>/dev/null
-        then
-            echo "All jobs completed and services are ready."
-            return 0
-        fi
-
-        if [[ ${SECONDS} -ge ${timeout_seconds} ]]; then
-            echo "ERROR: Timed out waiting for pods to be ready"
-            return 1
-        fi
-
-        echo "---"
-    done
-}
+wait_for_pods() {
+    local timeout_seconds=$1
+    local poll_interval_seconds=$2
+    local wait_timeout_seconds=$3
+
+    echo "Waiting for all pods to be ready" \
+        "(timeout=${timeout_seconds}s, poll=${poll_interval_seconds}s," \
+        "wait=${wait_timeout_seconds}s)..."
+
+    # Reset bash built-in SECONDS counter
+    SECONDS=0
+
+    while true; do
+        if [[ ${SECONDS} -ge ${timeout_seconds} ]]; then
+            echo "ERROR: Timed out waiting for pods to be ready"
+            return 1
+        fi
+
+        sleep "${poll_interval_seconds}"
+        kubectl get pods
+
+        if kubectl wait job \
+                --all \
+                --for=condition=Complete \
+                --timeout="${wait_timeout_seconds}s" 2>/dev/null \
+            && kubectl wait pods \
+                --all \
+                --selector='!job-name' \
+                --for=condition=Ready \
+                --timeout="${wait_timeout_seconds}s" 2>/dev/null
+        then
+            echo "All jobs completed and services are ready."
+            return 0
+        fi
+
+        echo "---"
+    done
+}
-# Waits for all jobs to complete and all non-job pods to be ready.
-#
-# @param {int} timeout_seconds Overall timeout in seconds
-# @param {int} poll_interval_seconds Interval between status checks
-# @param {int} wait_timeout_seconds Timeout for each kubectl wait call
-# @return {int} 0 on success, 1 on timeout
-wait_for_pods() {
-    local timeout_seconds=$1
-    local poll_interval_seconds=$2
-    local wait_timeout_seconds=$3
-
-    echo "Waiting for all pods to be ready" \
-        "(timeout=${timeout_seconds}s, poll=${poll_interval_seconds}s," \
-        "wait=${wait_timeout_seconds}s)..."
-
-    # Reset bash built-in SECONDS counter
-    SECONDS=0
-
-    while true; do
-        sleep "${poll_interval_seconds}"
-        kubectl get pods
-
-        if kubectl wait job \
-                --all \
-                --for=condition=Complete \
-                --timeout="${wait_timeout_seconds}s" 2>/dev/null \
-            && kubectl wait pods \
-                --all \
-                --selector='!job-name' \
-                --for=condition=Ready \
-                --timeout="${wait_timeout_seconds}s" 2>/dev/null
-        then
-            echo "All jobs completed and services are ready."
-            return 0
-        fi
-
-        if [[ ${SECONDS} -ge ${timeout_seconds} ]]; then
-            echo "ERROR: Timed out waiting for pods to be ready"
-            return 1
-        fi
-
-        echo "---"
-    done
-}
+# Waits for all jobs to complete and all non-job pods to be ready.
+#
+# @param {int} timeout_seconds Overall timeout in seconds
+# @param {int} poll_interval_seconds Interval between status checks
+# @param {int} wait_timeout_seconds Timeout for each kubectl wait call
+# @return {int} 0 on success, 1 on timeout
+wait_for_pods() {
+    local timeout_seconds=$1
+    local poll_interval_seconds=$2
+    local wait_timeout_seconds=$3
+
+    echo "Waiting for all pods to be ready" \
+        "(timeout=${timeout_seconds}s, poll=${poll_interval_seconds}s," \
+        "wait=${wait_timeout_seconds}s)..."
+
+    # Reset bash built-in SECONDS counter
+    SECONDS=0
+
+    while true; do
+        sleep "${poll_interval_seconds}"
+        kubectl get pods
+
+        # On final attempt, show errors for debugging
+        if [[ ${SECONDS} -ge $((timeout_seconds - poll_interval_seconds)) ]]; then
+            redirect=""
+        else
+            redirect="2>/dev/null"
+        fi
+
+        if eval kubectl wait job \
+                --all \
+                --for=condition=Complete \
+                --timeout="${wait_timeout_seconds}s" $redirect \
+            && eval kubectl wait pods \
+                --all \
+                --selector='!job-name' \
+                --for=condition=Ready \
+                --timeout="${wait_timeout_seconds}s" $redirect
+        then
+            echo "All jobs completed and services are ready."
+            return 0
+        fi
+
+        if [[ ${SECONDS} -ge ${timeout_seconds} ]]; then
+            echo "ERROR: Timed out waiting for pods to be ready"
+            return 1
+        fi
+
+        echo "---"
+    done
+}
-wait_for_pods() {
-    local timeout_seconds=$1
-    local poll_interval_seconds=$2
-    local wait_timeout_seconds=$3
-
-    echo "Waiting for all pods to be ready" \
-        "(timeout=${timeout_seconds}s, poll=${poll_interval_seconds}s," \
-        "wait=${wait_timeout_seconds}s)..."
-
-    # Reset bash built-in SECONDS counter
-    SECONDS=0
-
-    while true; do
-        sleep "${poll_interval_seconds}"
-        kubectl get pods
-
-        if kubectl wait job \
-                --all \
-                --for=condition=Complete \
-                --timeout="${wait_timeout_seconds}s" 2>/dev/null \
-            && kubectl wait pods \
-                --all \
-                --selector='!job-name' \
-                --for=condition=Ready \
-                --timeout="${wait_timeout_seconds}s" 2>/dev/null
-        then
-            echo "All jobs completed and services are ready."
-            return 0
-        fi
-
-        if [[ ${SECONDS} -ge ${timeout_seconds} ]]; then
-            echo "ERROR: Timed out waiting for pods to be ready"
-            return 1
-        fi
-
-        echo "---"
-    done
-}
+wait_for_pods() {
+    local timeout_seconds=$1
+    local poll_interval_seconds=$2
+    local wait_timeout_seconds=$3
+
+    echo "Waiting for all pods to be ready" \
+        "(timeout=${timeout_seconds}s, poll=${poll_interval_seconds}s," \
+        "wait=${wait_timeout_seconds}s)..."
+
+    # Reset bash built-in SECONDS counter
+    SECONDS=0
+
+    while true; do
+        if [[ ${SECONDS} -ge ${timeout_seconds} ]]; then
+            echo "ERROR: Timed out waiting for pods to be ready"
+            return 1
+        fi
+
+        sleep "${poll_interval_seconds}"
+        kubectl get pods
+
+        if kubectl wait job \
+                --all \
+                --for=condition=Complete \
+                --timeout="${wait_timeout_seconds}s" 2>/dev/null \
+            && kubectl wait pods \
+                --all \
+                --selector='!job-name' \
+                --for=condition=Ready \
+                --timeout="${wait_timeout_seconds}s" 2>/dev/null
+        then
+            echo "All jobs completed and services are ready."
+            return 0
+        fi
+
+        echo "---"
+    done
+}
-# Waits for all jobs to complete and all non-job pods to be ready.
-#
-# @param {int} timeout_seconds Overall timeout in seconds
-# @param {int} poll_interval_seconds Interval between status checks
-# @param {int} wait_timeout_seconds Timeout for each kubectl wait call
-# @return {int} 0 on success, 1 on timeout
-wait_for_pods() {
-    local timeout_seconds=$1
-    local poll_interval_seconds=$2
-    local wait_timeout_seconds=$3
-
-    echo "Waiting for all pods to be ready" \
-        "(timeout=${timeout_seconds}s, poll=${poll_interval_seconds}s," \
-        "wait=${wait_timeout_seconds}s)..."
-
-    # Reset bash built-in SECONDS counter
-    SECONDS=0
-
-    while true; do
-        sleep "${poll_interval_seconds}"
-        kubectl get pods
-
-        if kubectl wait job \
-                --all \
-                --for=condition=Complete \
-                --timeout="${wait_timeout_seconds}s" 2>/dev/null \
-            && kubectl wait pods \
-                --all \
-                --selector='!job-name' \
-                --for=condition=Ready \
-                --timeout="${wait_timeout_seconds}s" 2>/dev/null
-        then
-            echo "All jobs completed and services are ready."
-            return 0
-        fi
-
-        if [[ ${SECONDS} -ge ${timeout_seconds} ]]; then
-            echo "ERROR: Timed out waiting for pods to be ready"
-            return 1
-        fi
-
-        echo "---"
-    done
-}
+# Waits for all jobs to complete and all non-job pods to be ready.
+#
+# @param {int} timeout_seconds Overall timeout in seconds
+# @param {int} poll_interval_seconds Interval between status checks
+# @param {int} wait_timeout_seconds Timeout for each kubectl wait call
+# @return {int} 0 on success, 1 on timeout
+wait_for_pods() {
+    local timeout_seconds=$1
+    local poll_interval_seconds=$2
+    local wait_timeout_seconds=$3
+
+    echo "Waiting for all pods to be ready" \
+        "(timeout=${timeout_seconds}s, poll=${poll_interval_seconds}s," \
+        "wait=${wait_timeout_seconds}s)..."
+
+    # Reset bash built-in SECONDS counter
+    SECONDS=0
+
+    while true; do
+        sleep "${poll_interval_seconds}"
+        kubectl get pods
+
+        # On final attempt, show errors for debugging
+        if [[ ${SECONDS} -ge $((timeout_seconds - poll_interval_seconds)) ]]; then
+            redirect=""
+        else
+            redirect="2>/dev/null"
+        fi
+
+        if eval kubectl wait job \
+                --all \
+                --for=condition=Complete \
+                --timeout="${wait_timeout_seconds}s" $redirect \
+            && eval kubectl wait pods \
+                --all \
+                --selector='!job-name' \
+                --for=condition=Ready \
+                --timeout="${wait_timeout_seconds}s" $redirect
+        then
+            echo "All jobs completed and services are ready."
+            return 0
+        fi
+
+        if [[ ${SECONDS} -ge ${timeout_seconds} ]]; then
+            echo "ERROR: Timed out waiting for pods to be ready"
+            return 1
+        fi
+
+        echo "---"
+    done
+}
+
+# Waits for sample download to complete and all pods to be ready
+wait_for_cluster_ready() {
+    if wait "$SAMPLE_DOWNLOAD_PID"; then
+        echo "Sample download and extraction complete"
+    else
+        echo "ERROR: Sample download failed"
+        return 1
+    fi
+
+    wait_for_pods 300 5 5
+}
@@ -1,6 +1,6 @@
 apiVersion: "v2"
 name: "clp"
-version: "0.1.2-dev.17"
+version: "0.1.2-dev.18"
 description: "A Helm chart for CLP's (Compressed Log Processor) package deployment"
 type: "application"
 appVersion: "0.7.1-dev"

@@ -0,0 +1,62 @@
+#!/usr/bin/env bash
+
+# Multi-node cluster setup with dedicated worker nodes for each worker type
+# Demonstrates nodeSelector scheduling with separate node pools
+# TODO: Migrate into integration test
+
+script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)"
+
+CLP_HOME="${CLP_HOME:-/tmp/clp}"
+CLUSTER_NAME="${CLUSTER_NAME:-clp-test}"
+NUM_COMPRESSION_NODES="${NUM_COMPRESSION_NODES:-2}"
+NUM_QUERY_NODES="${NUM_QUERY_NODES:-2}"
+COMPRESSION_WORKER_REPLICAS="${COMPRESSION_WORKER_REPLICAS:-2}"
+QUERY_WORKER_REPLICAS="${QUERY_WORKER_REPLICAS:-2}"
+REDUCER_REPLICAS="${REDUCER_REPLICAS:-2}"
+
+# shellcheck source=.set-up-common.sh
+source "${script_dir}/.set-up-common.sh"
+
+echo "=== Multi-node setup with dedicated worker nodes ==="
+echo "Cluster: ${CLUSTER_NAME}"
+echo "Compression nodes: ${NUM_COMPRESSION_NODES}"
+echo "Query nodes: ${NUM_QUERY_NODES}"
+echo "Compression workers: ${COMPRESSION_WORKER_REPLICAS}"
+echo "Query workers: ${QUERY_WORKER_REPLICAS}"
+echo "Reducers: ${REDUCER_REPLICAS}"
+echo ""
+
+prepare_environment "${CLUSTER_NAME}"
+
+total_workers=$((NUM_COMPRESSION_NODES + NUM_QUERY_NODES))
+
+echo "Creating kind cluster..."
+generate_kind_config "${total_workers}" | kind create cluster --name "${CLUSTER_NAME}" --config=-
+
+echo "Labeling worker nodes..."
+mapfile -t worker_nodes < <(kubectl get nodes --selector='!node-role.kubernetes.io/control-plane' -o jsonpath='{.items[*].metadata.name}' | tr ' ' '\n')
+
+# Label compression nodes
+for ((i = 0; i < NUM_COMPRESSION_NODES; i++)); do
+    echo "Labeling ${worker_nodes[$i]} as compression node"
+    kubectl label node "${worker_nodes[$i]}" yscope.io/nodeType=compression --overwrite
+done
+
+# Label query nodes
+for ((i = NUM_COMPRESSION_NODES; i < total_workers; i++)); do
+    echo "Labeling ${worker_nodes[$i]} as query node"
+    kubectl label node "${worker_nodes[$i]}" yscope.io/nodeType=query --overwrite
+done
-echo "Labeling worker nodes..."
-mapfile -t worker_nodes < <(kubectl get nodes --selector='!node-role.kubernetes.io/control-plane' -o jsonpath='{.items[*].metadata.name}' | tr ' ' '\n')
-
-# Label compression nodes
-for ((i = 0; i < NUM_COMPRESSION_NODES; i++)); do
-    echo "Labeling ${worker_nodes[$i]} as compression node"
-    kubectl label node "${worker_nodes[$i]}" yscope.io/nodeType=compression --overwrite
-done
-
-# Label query nodes
-for ((i = NUM_COMPRESSION_NODES; i < total_workers; i++)); do
-    echo "Labeling ${worker_nodes[$i]} as query node"
-    kubectl label node "${worker_nodes[$i]}" yscope.io/nodeType=query --overwrite
-done
+echo "Labeling worker nodes..."
+mapfile -t worker_nodes < <(kubectl get nodes --selector='!node-role.kubernetes.io/control-plane' -o jsonpath='{.items[*].metadata.name}' | tr ' ' '\n')
+
+if [[ ${#worker_nodes[@]} -lt ${total_workers} ]]; then
+    echo "ERROR: Expected ${total_workers} worker nodes, but found ${#worker_nodes[@]}"
+    exit 1
+fi
+
+# Label compression nodes
+for ((i = 0; i < NUM_COMPRESSION_NODES; i++)); do
+    echo "Labeling ${worker_nodes[$i]} as compression node"
+    kubectl label node "${worker_nodes[$i]}" yscope.io/nodeType=compression --overwrite
+done
+
+# Label query nodes
+for ((i = NUM_COMPRESSION_NODES; i < total_workers; i++)); do
+    echo "Labeling ${worker_nodes[$i]} as query node"
+    kubectl label node "${worker_nodes[$i]}" yscope.io/nodeType=query --overwrite
+done
-echo "Labeling worker nodes..."
-mapfile -t worker_nodes < <(kubectl get nodes --selector='!node-role.kubernetes.io/control-plane' -o jsonpath='{.items[*].metadata.name}' | tr ' ' '\n')
-
-# Label compression nodes
-for ((i = 0; i < NUM_COMPRESSION_NODES; i++)); do
-    echo "Labeling ${worker_nodes[$i]} as compression node"
-    kubectl label node "${worker_nodes[$i]}" yscope.io/nodeType=compression --overwrite
-done
-
-# Label query nodes
-for ((i = NUM_COMPRESSION_NODES; i < total_workers; i++)); do
-    echo "Labeling ${worker_nodes[$i]} as query node"
-    kubectl label node "${worker_nodes[$i]}" yscope.io/nodeType=query --overwrite
-done
+echo "Labeling worker nodes..."
+mapfile -t worker_nodes < <(kubectl get nodes --selector='!node-role.kubernetes.io/control-plane' -o jsonpath='{.items[*].metadata.name}' | tr ' ' '\n')
+
+if [[ ${#worker_nodes[@]} -lt ${total_workers} ]]; then
+    echo "ERROR: Expected ${total_workers} worker nodes, but found ${#worker_nodes[@]}"
+    exit 1
+fi
+
+# Label compression nodes
+for ((i = 0; i < NUM_COMPRESSION_NODES; i++)); do
+    echo "Labeling ${worker_nodes[$i]} as compression node"
+    kubectl label node "${worker_nodes[$i]}" yscope.io/nodeType=compression --overwrite
+done
+
+# Label query nodes
+for ((i = NUM_COMPRESSION_NODES; i < total_workers; i++)); do
+    echo "Labeling ${worker_nodes[$i]} as query node"
+    kubectl label node "${worker_nodes[$i]}" yscope.io/nodeType=query --overwrite
+done
+
+echo "Installing Helm chart..."
+helm uninstall test --ignore-not-found
+sleep 2
-helm uninstall test --ignore-not-found
-sleep 2
+helm uninstall test --ignore-not-found
+helm uninstall test --ignore-not-found --wait 2>/dev/null || true
-helm uninstall test --ignore-not-found
-sleep 2
+helm uninstall test --ignore-not-found --wait
-helm uninstall test --ignore-not-found
-sleep 2
+helm uninstall test --ignore-not-found
+helm uninstall test --ignore-not-found --wait 2>/dev/null || true
-helm uninstall test --ignore-not-found
-sleep 2
+helm uninstall test --ignore-not-found --wait
+helm install test "${script_dir}" \
+    --set "distributedDeployment=true" \
+    --set "compressionWorker.replicas=${COMPRESSION_WORKER_REPLICAS}" \
+    --set "compressionWorker.scheduling.nodeSelector.yscope\.io/nodeType=compression" \
+    --set "queryWorker.replicas=${QUERY_WORKER_REPLICAS}" \
+    --set "queryWorker.scheduling.nodeSelector.yscope\.io/nodeType=query" \
+    --set "reducer.replicas=${REDUCER_REPLICAS}"
+
+wait_for_cluster_ready
@@ -0,0 +1,41 @@
+#!/usr/bin/env bash
+
+# Multi-node cluster setup with shared worker nodes
+# Both compression and query workers share the same node pool
+# TODO: Migrate into integration test
+
-#!/usr/bin/env bash
-
-# Multi-node cluster setup with shared worker nodes
-# Both compression and query workers share the same node pool
-# TODO: Migrate into integration test
+#!/usr/bin/env bash
+
+set -euo pipefail
+
+# Multi-node cluster setup with shared worker nodes
+# Both compression and query workers share the same node pool
+# TODO: Migrate into integration test
+
-#!/usr/bin/env bash
-
-# Multi-node cluster setup with shared worker nodes
-# Both compression and query workers share the same node pool
-# TODO: Migrate into integration test
+#!/usr/bin/env bash
+
+set -euo pipefail
+
+# Multi-node cluster setup with shared worker nodes
+# Both compression and query workers share the same node pool
+# TODO: Migrate into integration test
+
+script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)"
+
+CLP_HOME="${CLP_HOME:-/tmp/clp}"
+CLUSTER_NAME="${CLUSTER_NAME:-clp-test}"
+NUM_WORKER_NODES="${NUM_WORKER_NODES:-2}"
+COMPRESSION_WORKER_REPLICAS="${COMPRESSION_WORKER_REPLICAS:-2}"
+QUERY_WORKER_REPLICAS="${QUERY_WORKER_REPLICAS:-2}"
+REDUCER_REPLICAS="${REDUCER_REPLICAS:-2}"
+
+# shellcheck source=.set-up-common.sh
+source "${script_dir}/.set-up-common.sh"
+
+echo "=== Multi-node setup with shared worker nodes ==="
+echo "Cluster: ${CLUSTER_NAME}"
+echo "Worker nodes: ${NUM_WORKER_NODES}"
+echo "Compression workers: ${COMPRESSION_WORKER_REPLICAS}"
+echo "Query workers: ${QUERY_WORKER_REPLICAS}"
+echo "Reducers: ${REDUCER_REPLICAS}"
+echo ""
+
+prepare_environment "${CLUSTER_NAME}"
+
+echo "Creating kind cluster..."
+generate_kind_config "${NUM_WORKER_NODES}" | kind create cluster --name "${CLUSTER_NAME}" --config=-
+
+echo "Installing Helm chart..."
+helm uninstall test --ignore-not-found
+sleep 2
+helm install test "${script_dir}" \
+    --set "distributedDeployment=true" \
+    --set "compressionWorker.replicas=${COMPRESSION_WORKER_REPLICAS}" \
+    --set "queryWorker.replicas=${QUERY_WORKER_REPLICAS}" \
+    --set "reducer.replicas=${REDUCER_REPLICAS}"
+
+wait_for_cluster_ready
@@ -0,0 +1,28 @@
+#!/usr/bin/env bash
+
+# Single-node cluster setup for testing
+# TODO: Migrate into integration test
+
+script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)"
+
+CLP_HOME="${CLP_HOME:-/tmp/clp}"
+CLUSTER_NAME="${CLUSTER_NAME:-clp-test}"
+
+# shellcheck source=.set-up-common.sh
+source "${script_dir}/.set-up-common.sh"
+
+echo "=== Single-node setup ==="
+echo "Cluster: ${CLUSTER_NAME}"
+echo ""
+
+prepare_environment "${CLUSTER_NAME}"
+
+echo "Creating kind cluster..."
+generate_kind_config 0 | kind create cluster --name "${CLUSTER_NAME}" --config=-
+
+echo "Installing Helm chart..."
+helm uninstall test --ignore-not-found
+sleep 2
+helm install test "${script_dir}"
-echo "Installing Helm chart..."
-helm uninstall test --ignore-not-found
-sleep 2
-helm install test "${script_dir}"
+echo "Installing Helm chart..."
+helm uninstall test --ignore-not-found --wait
+helm install test "${script_dir}"
-echo "Installing Helm chart..."
-helm uninstall test --ignore-not-found
-sleep 2
-helm install test "${script_dir}"
+echo "Installing Helm chart..."
+helm uninstall test --ignore-not-found --wait
+helm install test "${script_dir}"
+
+wait_for_cluster_ready