diff --git a/ansible/roles/ethlambda/tasks/main.yml b/ansible/roles/ethlambda/tasks/main.yml index 3fda6f8..d8ff446 100644 --- a/ansible/roles/ethlambda/tasks/main.yml +++ b/ansible/roles/ethlambda/tasks/main.yml @@ -87,6 +87,7 @@ --name {{ node_name }} --restart unless-stopped --network host + {{ '--init --ulimit core=-1 --workdir /data' if (enable_core_dumps | default('') == 'all') or (node_name in (enable_core_dumps | default('')).split(',')) or (node_name.split('_')[0] in (enable_core_dumps | default('')).split(',')) else '' }} -v {{ genesis_dir }}:/config:ro -v {{ data_dir }}/{{ node_name }}:/data {{ ethlambda_docker_image }} diff --git a/ansible/roles/grandine/tasks/main.yml b/ansible/roles/grandine/tasks/main.yml index 7679d50..1fd9558 100644 --- a/ansible/roles/grandine/tasks/main.yml +++ b/ansible/roles/grandine/tasks/main.yml @@ -89,6 +89,7 @@ --name {{ node_name }} --restart unless-stopped --network host + {{ '--init --ulimit core=-1 --workdir /data' if (enable_core_dumps | default('') == 'all') or (node_name in (enable_core_dumps | default('')).split(',')) or (node_name.split('_')[0] in (enable_core_dumps | default('')).split(',')) else '' }} -v {{ genesis_dir }}:/config:ro -v {{ data_dir }}/{{ node_name }}:/data {{ grandine_docker_image }} diff --git a/ansible/roles/lantern/tasks/main.yml b/ansible/roles/lantern/tasks/main.yml index 8f18357..23b9246 100644 --- a/ansible/roles/lantern/tasks/main.yml +++ b/ansible/roles/lantern/tasks/main.yml @@ -82,6 +82,7 @@ --name {{ node_name }} --restart unless-stopped --network host + {{ '--init --ulimit core=-1 --workdir /data' if (enable_core_dumps | default('') == 'all') or (node_name in (enable_core_dumps | default('')).split(',')) or (node_name.split('_')[0] in (enable_core_dumps | default('')).split(',')) else '' }} -v {{ genesis_dir }}:/config:ro -v {{ data_dir }}/{{ node_name }}:/data {{ lantern_docker_image }} diff --git a/ansible/roles/lighthouse/tasks/main.yml b/ansible/roles/lighthouse/tasks/main.yml index 7385715..d87c831 100644 --- a/ansible/roles/lighthouse/tasks/main.yml +++ b/ansible/roles/lighthouse/tasks/main.yml @@ -86,6 +86,7 @@ --name {{ node_name }} --restart unless-stopped --network host + {{ '--init --ulimit core=-1 --workdir /data' if (enable_core_dumps | default('') == 'all') or (node_name in (enable_core_dumps | default('')).split(',')) or (node_name.split('_')[0] in (enable_core_dumps | default('')).split(',')) else '' }} -v {{ genesis_dir }}:/config:ro -v {{ data_dir }}/{{ node_name }}:/data {{ lighthouse_docker_image }} diff --git a/ansible/roles/qlean/tasks/main.yml b/ansible/roles/qlean/tasks/main.yml index 07579b7..1cb505c 100644 --- a/ansible/roles/qlean/tasks/main.yml +++ b/ansible/roles/qlean/tasks/main.yml @@ -96,6 +96,7 @@ --restart unless-stopped --network host --platform {{ qlean_docker_platform }} + {{ '--init --ulimit core=-1 --workdir /data' if (enable_core_dumps | default('') == 'all') or (node_name in (enable_core_dumps | default('')).split(',')) or (node_name.split('_')[0] in (enable_core_dumps | default('')).split(',')) else '' }} -v {{ genesis_dir }}:/config:ro -v {{ data_dir }}/{{ node_name }}:/data {{ qlean_docker_image }} diff --git a/ansible/roles/ream/tasks/main.yml b/ansible/roles/ream/tasks/main.yml index 1810958..8008fae 100644 --- a/ansible/roles/ream/tasks/main.yml +++ b/ansible/roles/ream/tasks/main.yml @@ -82,6 +82,7 @@ --name {{ node_name }} --restart unless-stopped --network host + {{ '--init --ulimit core=-1 --workdir /data' if (enable_core_dumps | default('') == 'all') or (node_name in (enable_core_dumps | default('')).split(',')) or (node_name.split('_')[0] in (enable_core_dumps | default('')).split(',')) else '' }} -v {{ genesis_dir }}:/config:ro -v {{ data_dir }}/{{ node_name }}:/data {{ ream_docker_image }} diff --git a/ansible/roles/zeam/tasks/main.yml b/ansible/roles/zeam/tasks/main.yml index 74c0473..5026ed6 100644 --- a/ansible/roles/zeam/tasks/main.yml +++ b/ansible/roles/zeam/tasks/main.yml @@ -95,6 +95,7 @@ --restart unless-stopped --network host --security-opt seccomp=unconfined + {{ '--init --ulimit core=-1 --workdir /data' if (enable_core_dumps | default('') == 'all') or (node_name in (enable_core_dumps | default('')).split(',')) or (node_name.split('_')[0] in (enable_core_dumps | default('')).split(',')) else '' }} -v {{ genesis_dir }}:/config:ro -v {{ data_dir }}/{{ node_name }}:/data {{ zeam_docker_image }} diff --git a/parse-env.sh b/parse-env.sh index 91b2a6a..84db521 100755 --- a/parse-env.sh +++ b/parse-env.sh @@ -80,6 +80,11 @@ while [[ $# -gt 0 ]]; do stopNodes=true shift ;; + --coreDumps) + coreDumps="$2" + shift # past argument + shift # past value + ;; *) # unknown option shift # past argument ;; @@ -110,3 +115,4 @@ echo "cleanData = $cleanData" echo "popupTerminal = $popupTerminal" echo "dockerTag = ${dockerTag:-latest}" echo "enableMetrics = $enableMetrics" +echo "coreDumps = ${coreDumps:-disabled}" diff --git a/run-ansible.sh b/run-ansible.sh index 5f4eabd..3afd48c 100755 --- a/run-ansible.sh +++ b/run-ansible.sh @@ -26,6 +26,7 @@ validator_config_file="$5" sshKeyFile="$6" useRoot="$7" # Flag to use root user (defaults to current user) action="$8" # Action: "stop" to stop nodes, otherwise deploy +coreDumps="$9" # Core dump configuration: "all", node names, or client types # Determine SSH user: use root if --useRoot flag is set, otherwise use current user if [ "$useRoot" == "true" ]; then @@ -37,7 +38,7 @@ fi # Validate required arguments if [ -z "$configDir" ] || [ -z "$validator_config_file" ]; then echo "Error: Missing required arguments" - echo "Usage: $0 [sshKeyFile] [useRoot]" + echo "Usage: $0 [sshKeyFile] [useRoot] [action] [coreDumps]" exit 1 fi @@ -107,6 +108,10 @@ if [ -n "$validatorConfig" ] && [ "$validatorConfig" != "genesis_bootnode" ]; th EXTRA_VARS="$EXTRA_VARS validator_config=$validatorConfig" fi +if [ -n "$coreDumps" ]; then + EXTRA_VARS="$EXTRA_VARS enable_core_dumps=$coreDumps" +fi + # Determine deployment mode (docker/binary) - read default from group_vars/all.yml # Default to 'docker' if not specified in group_vars GROUP_VARS_FILE="$ANSIBLE_DIR/inventory/group_vars/all.yml" diff --git a/spin-node.sh b/spin-node.sh index d0f50d5..fd61447 100755 --- a/spin-node.sh +++ b/spin-node.sh @@ -10,6 +10,23 @@ fi # 0. parse env and args source "$(dirname $0)/parse-env.sh" +# Helper function to check if core dumps should be enabled for a node +# Accepts: "all", exact node names (zeam_0), or client types (zeam) +should_enable_core_dumps() { + local node_name="$1" + local client_type="${node_name%%_*}" # Extract client type (e.g., "zeam" from "zeam_0") + + [ -z "$coreDumps" ] && return 1 + [ "$coreDumps" = "all" ] && return 0 + + IFS=',' read -r -a dump_targets <<< "$coreDumps" + for target in "${dump_targets[@]}"; do + # Exact node name match or client type match + [ "$target" = "$node_name" ] || [ "$target" = "$client_type" ] && return 0 + done + return 1 +} + # Check if yq is installed (needed for deployment mode detection) if ! command -v yq &> /dev/null; then echo "Error: yq is required but not installed. Please install yq first." @@ -145,7 +162,7 @@ if [ "$deployment_mode" == "ansible" ]; then # Handle stop action if [ -n "$stopNodes" ] && [ "$stopNodes" == "true" ]; then echo "Stopping nodes via Ansible..." - if ! "$scriptDir/run-ansible.sh" "$configDir" "$node" "$cleanData" "$validatorConfig" "$validator_config_file" "$sshKeyFile" "$useRoot" "stop"; then + if ! "$scriptDir/run-ansible.sh" "$configDir" "$node" "$cleanData" "$validatorConfig" "$validator_config_file" "$sshKeyFile" "$useRoot" "stop" "$coreDumps"; then echo "❌ Ansible stop operation failed. Exiting." exit 1 fi @@ -154,7 +171,7 @@ if [ "$deployment_mode" == "ansible" ]; then # Call separate Ansible execution script # If Ansible deployment fails, exit immediately (don't fall through to local deployment) - if ! "$scriptDir/run-ansible.sh" "$configDir" "$node" "$cleanData" "$validatorConfig" "$validator_config_file" "$sshKeyFile" "$useRoot"; then + if ! "$scriptDir/run-ansible.sh" "$configDir" "$node" "$cleanData" "$validatorConfig" "$validator_config_file" "$sshKeyFile" "$useRoot" "" "$coreDumps"; then echo "❌ Ansible deployment failed. Exiting." exit 1 fi @@ -272,7 +289,13 @@ for item in "${spin_nodes[@]}"; do # spin nodes if [ "$node_setup" == "binary" ] then - execCmd="$node_binary" + # Add core dump support if enabled for this node + if should_enable_core_dumps "$item"; then + execCmd="ulimit -c unlimited && $node_binary" + echo "Core dumps enabled for $item (binary mode)" + else + execCmd="$node_binary" + fi else # Extract image name from node_docker (find word containing ':' which is the image:tag) docker_image=$(echo "$node_docker" | grep -oE '[^ ]+:[^ ]+' | head -1) @@ -293,6 +316,15 @@ for item in "${spin_nodes[@]}"; do # to reach each other via 127.0.0.1 (as configured in nodes.yaml ENR records). # Note: Port mapping (-p) doesn't work with --network host, so metrics endpoints # are not directly accessible from the macOS host. Use 'docker exec' to access them. + + # Add core dump support if enabled for this node + # --init: forwards signals and reaps zombies (required for core dumps) + # --workdir /data: dumps land in the mounted volume + if should_enable_core_dumps "$item"; then + execCmd="$execCmd --init --ulimit core=-1 --workdir /data" + echo "Core dumps enabled for $item (dumps will be written to $dataDir/$item/)" + fi + execCmd="$execCmd --name $item --network host \ -v $configDir:/config \ -v $dataDir/$item:/data \