From 39d342bd67fd7d32872ace4dfa4434fa0a510d58 Mon Sep 17 00:00:00 2001 From: Nate Barbettini Date: Mon, 4 Nov 2024 09:39:09 -0800 Subject: [PATCH] fix: Wait for healthy actor in arcade dev (#142) Context: Currently, `arcade dev` starts the actor process and then waits a hardcoded amount of time (2sec) for the actor to start up. This isn't enough time on some slower machines, which leads to the engine trying to start but failing. Fix: Wait until the actor is healthy according to its own `/actor/health` endpoint. --- arcade/arcade/cli/launcher.py | 41 ++++++++++++++++++++++++------ arcade/arcade/core/config_model.py | 8 ++++++ 2 files changed, 41 insertions(+), 8 deletions(-) diff --git a/arcade/arcade/cli/launcher.py b/arcade/arcade/cli/launcher.py index a89530c6..2ca16013 100644 --- a/arcade/arcade/cli/launcher.py +++ b/arcade/arcade/cli/launcher.py @@ -1,9 +1,11 @@ +import http.client import io import ipaddress import logging import os import shutil import signal +import socket import subprocess import sys import threading @@ -28,8 +30,8 @@ def start_servers( - host: str, - port: int, + actor_host: str, + actor_port: int, engine_config: str | None, engine_env: str | None = None, debug: bool = False, @@ -45,8 +47,8 @@ def start_servers( debug: Whether to run in debug mode. """ # Validate host and port - host = _validate_host(host) - port = _validate_port(port) + actor_host = _validate_host(actor_host) + actor_port = _validate_port(actor_port) # Ensure engine_config is provided and validated engine_config = _get_config_file(engine_config, default_filename="engine.yaml") @@ -55,13 +57,13 @@ def start_servers( env_file = _get_config_file(engine_env, default_filename="arcade.env", optional=True) # Prepare command-line arguments for the actor server and engine - actor_cmd = _build_actor_command(host, port, debug) + actor_cmd = _build_actor_command(actor_host, actor_port, debug) # even if the user didn't pass an env file we may have found it in the default locations engine_cmd = _build_engine_command(engine_config, engine_env=env_file if env_file else None) # Start and manage the processes - _manage_processes(actor_cmd, engine_cmd, debug=debug) + _manage_processes(actor_cmd, actor_host, actor_port, engine_cmd, debug=debug) def _validate_host(host: str) -> str: @@ -239,6 +241,8 @@ def _build_engine_command(engine_config: str | None, engine_env: str | None = No def _manage_processes( actor_cmd: list[str], + actor_host: str, + actor_port: int, engine_cmd: list[str], engine_env: dict[str, str] | None = None, debug: bool = False, @@ -273,8 +277,7 @@ def terminate_processes(exit_program: bool = False) -> None: console.print("Starting actor server...", style="bold green") actor_process = _start_process("Actor", actor_cmd, debug=debug) - # Wait a bit to ensure actor is up - time.sleep(2) + _wait_for_healthy_actor(actor_process, actor_host, actor_port) # Start the engine console.print("Starting engine...", style="bold green") @@ -356,6 +359,28 @@ def _start_process( raise RuntimeError(f"Failed to start {name}") +def _wait_for_healthy_actor( + actor_process: subprocess.Popen, actor_host: str, actor_port: int +) -> None: + """Wait until an HTTP request to `host:port/actor/health` returns 200""" + + while not actor_process.poll(): # Stop waiting if the actor process has exited + time.sleep(1) + try: + conn = http.client.HTTPConnection(actor_host, actor_port, timeout=1) + conn.request("GET", "/actor/health") + res = conn.getresponse() + if res.status == 200: + break + conn.close() + except (socket.gaierror, http.client.HTTPException, ConnectionRefusedError, TimeoutError): + pass # Handle expected exceptions gracefully + console.print("Waiting for actor to start...", style="bold yellow") + + time.sleep(1) # Wait just a little longer for everything to settle (discovered experimentally) + console.print("Actor is healthy", style="bold green") + + def _stream_output(process: subprocess.Popen, name: str) -> None: """ Streams the output from a subprocess to the console. diff --git a/arcade/arcade/core/config_model.py b/arcade/arcade/core/config_model.py index a11813e3..5fb915b5 100644 --- a/arcade/arcade/core/config_model.py +++ b/arcade/arcade/core/config_model.py @@ -118,6 +118,14 @@ def load_from_file(cls) -> "Config": config_data = yaml.safe_load(config_file_path.read_text()) + if config_data is None: + raise ValueError( + "Invalid credentials.yaml file. Please ensure it is a valid YAML file." + ) + + if "cloud" not in config_data: + raise ValueError("Invalid credentials.yaml file. Expected a 'cloud' key.") + try: return cls(**config_data["cloud"]) except ValidationError as e: