edgarjs · claudio-pi · Feb 10, 2026 · Feb 10, 2026 · Feb 10, 2026 · Feb 10, 2026
diff --git a/.gitignore b/.gitignore
@@ -3,3 +3,4 @@ logs/
 *.db
 __pycache__/
 .pytest_cache/
+EXPERIMENTS.md
diff --git a/README.md b/README.md
@@ -92,7 +92,7 @@ The setup wizard will confirm when it receives the message and finish. Once done
 
 > A cron job runs every 5 minutes to monitor the webhook endpoint. It verifies the webhook is registered and re-registers it if needed. If the server is unreachable, it auto-restarts the service (throttled to once per 3 minutes, max 3 attempts). After exhausting restart attempts without recovery, it sends a Telegram alert and stops retrying until the server responds with HTTP 200. The restart counter auto-clears when the health endpoint returns HTTP 200. You can also reset it manually by deleting `$HOME/.claudio/.last_restart_attempt` and `$HOME/.claudio/.restart_fail_count`.
 >
-> The health check also monitors: orphan `claude`/`node` processes (kills them after 30 minutes), disk usage (alerts above 90%), log file sizes (rotates files over 10MB), and backup freshness (alerts if the last backup is older than 2 hours). These thresholds are configurable via environment variables.
+> The health check also monitors: disk usage (alerts above 90%), log file sizes (rotates files over 10MB), and backup freshness (alerts if the last backup is older than 2 hours). These thresholds are configurable via environment variables.
 
 ### Status
 

diff --git a/lib/health-check.sh b/lib/health-check.sh
@@ -6,7 +6,6 @@
 # Sends a Telegram alert after 3 restart attempts if the service never recovers
 #
 # Additional checks (run when service is healthy):
-# - Orphan claude/node processes not belonging to the active service
 # - Disk usage alerts (configurable threshold, default 90%)
 # - Log rotation (configurable max size, default 10MB)
 # - Backup freshness (alerts if last backup is older than threshold)
@@ -120,77 +119,6 @@ _clear_fail_state() {
     rm -f "$RESTART_STAMP" "$FAIL_COUNT_FILE"
 }
 
-# --- Orphan process detection ---
-# Finds claude/node processes that are NOT children of the active claudio service.
-# Orphans are logged and killed (SIGTERM). Returns the count found.
-_check_orphan_processes() {
-    local service_pid=""
-    if [[ "$(uname)" == "Darwin" ]]; then
-        service_pid=$(launchctl list | awk '/com\.claudio\.server/{print $1}')
-    else
-        service_pid=$(systemctl --user show claudio --property=MainPID --value 2>/dev/null || echo "")
-    fi
-    # Normalize: "0" or empty means no active service PID
-    [[ "$service_pid" == "0" || -z "$service_pid" ]] && service_pid=""
-
-    local orphan_count=0
-    local pids
-    # Find claude and node processes owned by this user
-    pids=$(pgrep -u "$(id -u)" -f '(claude|node)' 2>/dev/null || true)
-    [[ -z "$pids" ]] && echo 0 && return
-
-    for pid in $pids; do
-        # Skip our own process tree
-        [[ "$pid" == "$$" ]] && continue
-
-        # Skip if it's a child of the service main PID
-        if [[ -n "$service_pid" ]]; then
-            local ancestor="$pid"
-            local is_child=false
-            # Walk up the process tree (max 20 levels to avoid loops)
-            for (( depth=0; depth<20; depth++ )); do
-                local ppid
-                ppid=$(ps -o ppid= -p "$ancestor" 2>/dev/null | tr -d ' ') || break
-                [[ -z "$ppid" || "$ppid" == "0" || "$ppid" == "1" ]] && break
-                if [[ "$ppid" == "$service_pid" ]]; then
-                    is_child=true
-                    break
-                fi
-                ancestor="$ppid"
-            done
-            [[ "$is_child" == true ]] && continue
-        fi
-
-        # Skip processes started less than 30 minutes ago (could be active handlers)
-        local elapsed
-        if [[ "$(uname)" == "Darwin" ]]; then
-            # macOS: ps -o etime= gives [[dd-]hh:]mm:ss, convert to seconds
-            local etime
-            etime=$(ps -o etime= -p "$pid" 2>/dev/null | tr -d ' ') || continue
-            [[ -z "$etime" ]] && continue
-            elapsed=0
-            local parts
-            IFS=: read -ra parts <<< "${etime//-/:}"
-            for part in "${parts[@]}"; do
-                elapsed=$(( elapsed * 60 + 10#$part ))
-            done
-        else
-            elapsed=$(ps -o etimes= -p "$pid" 2>/dev/null | tr -d ' ') || continue
-        fi
-        [[ -z "$elapsed" ]] && continue
-        (( elapsed < 1800 )) && continue
-
-        # This looks like an orphan
-        local cmdline
-        cmdline=$(ps -o args= -p "$pid" 2>/dev/null | head -c 120) || cmdline="unknown"
-        log_warn "health-check" "Orphan process found: PID=$pid age=${elapsed}s cmd=$cmdline"
-        kill "$pid" 2>/dev/null || true
-        orphan_count=$((orphan_count + 1))
-    done
-
-    echo "$orphan_count"
-}
-
 # --- Disk usage check ---
 # Checks usage of all mounted partitions relevant to Claudio.
 # Returns 0 if all OK, 1 if any partition exceeds threshold.
@@ -310,12 +238,6 @@ if [ "$http_code" = "200" ]; then
     # --- Additional system checks (only when service is healthy) ---
     alerts=""
 
-    # Orphan processes
-    orphans=$(_check_orphan_processes)
-    if (( orphans > 0 )); then
-        alerts="${alerts}Killed $orphans orphan process(es). "
-    fi
-
     # Disk usage
     if ! _check_disk_usage; then
         alerts="${alerts}Disk usage above ${DISK_USAGE_THRESHOLD}%. "

diff --git a/tests/health-check.bats b/tests/health-check.bats
@@ -311,15 +311,6 @@ EOF
     ! grep -q "Backup stale" "$CLAUDIO_PATH/claudio.log" 2>/dev/null
 }
 
-@test "orphan check runs without errors when no processes found" {
-    create_env_file
-    create_mock_curl_healthy
-
-    run "$BATS_TEST_DIRNAME/../lib/health-check.sh"
-
-    [ "$status" -eq 0 ]
-    ! grep -q "Orphan process" "$CLAUDIO_PATH/claudio.log" 2>/dev/null
-}
 
 @test "cron_install adds cron entry" {
     source "$BATS_TEST_DIRNAME/../lib/service.sh"