From e37037e9c7e456855c501786c956632f99e19214 Mon Sep 17 00:00:00 2001 From: Junhao Liao Date: Mon, 15 Dec 2025 23:53:28 -0500 Subject: [PATCH 1/2] refactor(deployment): Standardizes service names to use hyphens instead of underscores for RFC1123 compliance (resolves #1777). --- .../clp-py-utils/clp_py_utils/clp_config.py | 20 +++--- .../tests/utils/clp_mode_utils.py | 37 ++++------ .../package/docker-compose-all.yaml | 72 ++++++++----------- .../package/docker-compose.runtime.yaml | 1 - 4 files changed, 51 insertions(+), 79 deletions(-) diff --git a/components/clp-py-utils/clp_py_utils/clp_config.py b/components/clp-py-utils/clp_py_utils/clp_config.py index 6236a9dadc..91552166a1 100644 --- a/components/clp-py-utils/clp_py_utils/clp_config.py +++ b/components/clp-py-utils/clp_py_utils/clp_config.py @@ -32,19 +32,19 @@ DB_COMPONENT_NAME = "database" QUEUE_COMPONENT_NAME = "queue" REDIS_COMPONENT_NAME = "redis" -SPIDER_SCHEDULER_COMPONENT_NAME = "spider_scheduler" +SPIDER_SCHEDULER_COMPONENT_NAME = "spider-scheduler" REDUCER_COMPONENT_NAME = "reducer" -RESULTS_CACHE_COMPONENT_NAME = "results_cache" -COMPRESSION_SCHEDULER_COMPONENT_NAME = "compression_scheduler" -QUERY_SCHEDULER_COMPONENT_NAME = "query_scheduler" +RESULTS_CACHE_COMPONENT_NAME = "results-cache" +COMPRESSION_SCHEDULER_COMPONENT_NAME = "compression-scheduler" +QUERY_SCHEDULER_COMPONENT_NAME = "query-scheduler" PRESTO_COORDINATOR_COMPONENT_NAME = "presto-coordinator" -COMPRESSION_WORKER_COMPONENT_NAME = "compression_worker" -QUERY_WORKER_COMPONENT_NAME = "query_worker" -API_SERVER_COMPONENT_NAME = "api_server" -LOG_INGESTOR_COMPONENT_NAME = "log_ingestor" +COMPRESSION_WORKER_COMPONENT_NAME = "compression-worker" +QUERY_WORKER_COMPONENT_NAME = "query-worker" +API_SERVER_COMPONENT_NAME = "api-server" +LOG_INGESTOR_COMPONENT_NAME = "log-ingestor" WEBUI_COMPONENT_NAME = "webui" -MCP_SERVER_COMPONENT_NAME = "mcp_server" -GARBAGE_COLLECTOR_COMPONENT_NAME = "garbage_collector" +MCP_SERVER_COMPONENT_NAME = "mcp-server" +GARBAGE_COLLECTOR_COMPONENT_NAME = "garbage-collector" # Action names ARCHIVE_MANAGER_ACTION_NAME = "archive_manager" diff --git a/integration-tests/tests/utils/clp_mode_utils.py b/integration-tests/tests/utils/clp_mode_utils.py index b81f9d381e..5364cc7290 100644 --- a/integration-tests/tests/utils/clp_mode_utils.py +++ b/integration-tests/tests/utils/clp_mode_utils.py @@ -40,38 +40,27 @@ } -# TODO: This will eventually be replaced by a formalized mapping between component and service. -def _to_docker_compose_service_name(name: str) -> str: - """ - Convert a component name to a Docker Compose service name. - - :param name: - :return: Service name with underscores replaced by hyphens - """ - return name.replace("_", "-") - - # TODO: Modify these component lists when the Presto Docker Compose project is integrated with the # CLP Docker compose project. CLP_BASE_COMPONENTS = [ - _to_docker_compose_service_name(DB_COMPONENT_NAME), - _to_docker_compose_service_name(QUEUE_COMPONENT_NAME), - _to_docker_compose_service_name(REDIS_COMPONENT_NAME), - _to_docker_compose_service_name(REDUCER_COMPONENT_NAME), - _to_docker_compose_service_name(RESULTS_CACHE_COMPONENT_NAME), - _to_docker_compose_service_name(COMPRESSION_SCHEDULER_COMPONENT_NAME), - _to_docker_compose_service_name(COMPRESSION_WORKER_COMPONENT_NAME), - _to_docker_compose_service_name(WEBUI_COMPONENT_NAME), + DB_COMPONENT_NAME, + QUEUE_COMPONENT_NAME, + REDIS_COMPONENT_NAME, + REDUCER_COMPONENT_NAME, + RESULTS_CACHE_COMPONENT_NAME, + COMPRESSION_SCHEDULER_COMPONENT_NAME, + COMPRESSION_WORKER_COMPONENT_NAME, + WEBUI_COMPONENT_NAME, ] CLP_QUERY_COMPONENTS = [ - _to_docker_compose_service_name(QUERY_SCHEDULER_COMPONENT_NAME), - _to_docker_compose_service_name(QUERY_WORKER_COMPONENT_NAME), + QUERY_SCHEDULER_COMPONENT_NAME, + QUERY_WORKER_COMPONENT_NAME, ] -CLP_API_SERVER_COMPONENT = _to_docker_compose_service_name(API_SERVER_COMPONENT_NAME) -CLP_GARBAGE_COLLECTOR_COMPONENT = _to_docker_compose_service_name(GARBAGE_COLLECTOR_COMPONENT_NAME) -CLP_MCP_SERVER_COMPONENT = _to_docker_compose_service_name(MCP_SERVER_COMPONENT_NAME) +CLP_API_SERVER_COMPONENT = API_SERVER_COMPONENT_NAME +CLP_GARBAGE_COLLECTOR_COMPONENT = GARBAGE_COLLECTOR_COMPONENT_NAME +CLP_MCP_SERVER_COMPONENT = MCP_SERVER_COMPONENT_NAME def get_clp_config_from_mode(mode_name: str) -> ClpConfig: diff --git a/tools/deployment/package/docker-compose-all.yaml b/tools/deployment/package/docker-compose-all.yaml index 2502cf5f52..58892c9ffd 100644 --- a/tools/deployment/package/docker-compose-all.yaml +++ b/tools/deployment/package/docker-compose-all.yaml @@ -48,7 +48,6 @@ services: database: <<: *service_defaults image: "${CLP_DB_CONTAINER_IMAGE_REF:-mariadb:10-jammy}" - hostname: "database" deploy: # Value must be either 0 or 1. Set to 0 to disable the database. replicas: "${CLP_DATABASE_ENABLED:-1}" @@ -86,7 +85,6 @@ services: db-table-creator: <<: *service_defaults - hostname: "db_table_creator" environment: CLP_DB_PASS: "${CLP_DB_PASS:?Please set a value.}" CLP_DB_ROOT_PASS: "${CLP_DB_ROOT_PASS:?Please set a value.}" @@ -111,7 +109,6 @@ services: queue: <<: *service_defaults image: "rabbitmq:3.9.8" - hostname: "queue" deploy: # Value must be either 0 or 1. Set to 0 to disable the queue. replicas: "${CLP_QUEUE_ENABLED:-1}" @@ -138,7 +135,6 @@ services: redis: <<: *service_defaults image: "redis:7.2.4" - hostname: "redis" deploy: # Value must be either 0 or 1. Set to 0 to disable the redis. replicas: "${CLP_REDIS_ENABLED:-1}" @@ -177,7 +173,6 @@ services: results-cache: <<: *service_defaults image: "mongo:7.0.1" - hostname: "results_cache" deploy: # Value must be either 0 or 1. Set to 0 to disable the results cache. replicas: "${CLP_RESULTS_CACHE_ENABLED:-1}" @@ -192,10 +187,10 @@ services: target: "/etc/mongo/mongod.conf" read_only: true - type: "bind" - source: "${CLP_RESULTS_CACHE_DATA_DIR_HOST:-./var/data/results_cache}" + source: "${CLP_RESULTS_CACHE_DATA_DIR_HOST:-./var/data/results-cache}" target: "/data/db" - type: "bind" - source: "${CLP_RESULTS_CACHE_LOGS_DIR_HOST:-./var/log/results_cache}" + source: "${CLP_RESULTS_CACHE_LOGS_DIR_HOST:-./var/log/results-cache}" target: "/var/log/mongodb" command: [ "--config", "/etc/mongo/mongod.conf", @@ -210,7 +205,6 @@ services: results-cache-indices-creator: <<: *service_defaults - hostname: "results_cache_indices_creator" environment: PYTHONPATH: "/opt/clp/lib/python3/site-packages" depends_on: @@ -220,15 +214,14 @@ services: "python3", "-u", "-m", "clp_py_utils.initialize-results-cache", - "--uri", "mongodb://results_cache:27017/${CLP_RESULTS_CACHE_DB_NAME:-clp-query-results}", + "--uri", "mongodb://results-cache:27017/${CLP_RESULTS_CACHE_DB_NAME:-clp-query-results}", "--stream-collection", "${CLP_RESULTS_CACHE_STREAM_COLLECTION_NAME:-stream-files}", ] spider-scheduler: <<: *service_defaults - hostname: "spider_scheduler" environment: - SPIDER_LOG_FILE: "/var/log/spider_scheduler.log" + SPIDER_LOG_FILE: "/var/log/spider-scheduler.log" depends_on: db-table-creator: condition: "service_completed_successfully" @@ -238,7 +231,7 @@ services: target: 6000 command: [ "/opt/clp/bin/spider_scheduler", - "--host", "spider_scheduler", + "--host", "spider-scheduler", "--port", "6000", "--storage_url", "jdbc:mariadb://database:3306/\ ${SPIDER_DB_NAME:-spider-db}?\ @@ -248,7 +241,6 @@ services: compression-scheduler: <<: *service_defaults - hostname: "compression_scheduler" stop_grace_period: "300s" environment: BROKER_URL: "amqp://${CLP_QUEUE_USER:?Please set a value.}\ @@ -256,7 +248,7 @@ services: CLP_DB_PASS: "${CLP_DB_PASS:?Please set a value.}" CLP_DB_USER: "${CLP_DB_USER:?Please set a value.}" CLP_LOGGING_LEVEL: "${CLP_COMPRESSION_SCHEDULER_LOGGING_LEVEL:-INFO}" - CLP_LOGS_DIR: "/var/log/compression_scheduler" + CLP_LOGS_DIR: "/var/log/compression-scheduler" PYTHONPATH: "/opt/clp/lib/python3/site-packages" RESULT_BACKEND: "redis://default:${CLP_REDIS_PASS:?Please set a value.}@redis:6379\ /${CLP_REDIS_BACKEND_DB_COMPRESSION:-1}" @@ -281,15 +273,14 @@ services: compression-worker: <<: *service_defaults - hostname: "compression_worker" environment: BROKER_URL: "amqp://${CLP_QUEUE_USER:?Please set a value.}\ :${CLP_QUEUE_PASS:?Please set a value.}@queue:5672" CLP_CONFIG_PATH: "/etc/clp-config.yaml" CLP_HOME: "/opt/clp" CLP_LOGGING_LEVEL: "${CLP_COMPRESSION_WORKER_LOGGING_LEVEL:-INFO}" - CLP_LOGS_DIR: "/var/log/compression_worker" - CLP_WORKER_LOG_PATH: "/var/log/compression_worker/worker.log" + CLP_LOGS_DIR: "/var/log/compression-worker" + CLP_WORKER_LOG_PATH: "/var/log/compression-worker/worker.log" PYTHONPATH: "/opt/clp/lib/python3/site-packages" RESULT_BACKEND: "redis://default:${CLP_REDIS_PASS:?Please set a value.}@redis:6379\ /${CLP_REDIS_BACKEND_DB_COMPRESSION:-1}" @@ -309,22 +300,23 @@ services: "worker", "--concurrency", "${CLP_COMPRESSION_WORKER_CONCURRENCY:-1}", "--loglevel", "WARNING", - "-f", "/var/log/compression_worker/worker.log", + "-f", "/var/log/compression-worker/worker.log", "-Q", "compression", "-n", "compression-worker" ] spider-compression-worker: <<: *service_defaults - hostname: "compression_worker" + # Use same hostname as compression-worker for service discovery. + hostname: "compression-worker" environment: CLP_CONFIG_PATH: "/etc/clp-config.yaml" CLP_HOME: "/opt/clp" CLP_LOGGING_LEVEL: "${CLP_COMPRESSION_WORKER_LOGGING_LEVEL:-INFO}" - CLP_LOGS_DIR: "/var/log/compression_worker" - CLP_WORKER_LOG_PATH: "/var/log/compression_worker/worker.log" + CLP_LOGS_DIR: "/var/log/compression-worker" + CLP_WORKER_LOG_PATH: "/var/log/compression-worker/worker.log" PYTHONPATH: "/opt/clp/lib/python3/site-packages" - SPIDER_LOG_DIR: "/var/log/compression_worker" + SPIDER_LOG_DIR: "/var/log/compression-worker" volumes: - *volume_clp_config_readonly - *volume_clp_logs @@ -339,7 +331,7 @@ services: command: [ "python3", "-u", "-m", "job_orchestration.executor.start-spider-worker", - "--host", "compression_worker", + "--host", "compression-worker", "--num-workers", "${CLP_COMPRESSION_WORKER_CONCURRENCY:-1}", "--storage-url", "jdbc:mariadb://database:3306/\ ${SPIDER_DB_NAME:-spider-db}?\ @@ -349,7 +341,6 @@ services: webui: <<: *service_defaults - hostname: "webui" environment: AWS_ACCESS_KEY_ID: "${CLP_STREAM_OUTPUT_AWS_ACCESS_KEY_ID:-}" AWS_SECRET_ACCESS_KEY: "${CLP_STREAM_OUTPUT_AWS_SECRET_ACCESS_KEY:-}" @@ -396,7 +387,6 @@ services: garbage-collector: <<: *service_defaults - hostname: "garbage_collector" stop_grace_period: "10s" deploy: # Value must be either 0 or 1. Set to 0 to disable the garbage collector. @@ -406,7 +396,7 @@ services: CLP_DB_USER: "${CLP_DB_USER:?Please set a value.}" CLP_HOME: "/opt/clp" CLP_LOGGING_LEVEL: "${CLP_GARBAGE_COLLECTOR_LOGGING_LEVEL:-INFO}" - CLP_LOGS_DIR: "/var/log/garbage_collector" + CLP_LOGS_DIR: "/var/log/garbage-collector" PYTHONPATH: "/opt/clp/lib/python3/site-packages" volumes: - *volume_clp_config_readonly @@ -427,7 +417,6 @@ services: query-scheduler: <<: *service_defaults - hostname: "query_scheduler" stop_grace_period: "10s" environment: BROKER_URL: "amqp://${CLP_QUEUE_USER:?Please set a value.}\ @@ -435,7 +424,7 @@ services: CLP_DB_PASS: "${CLP_DB_PASS:?Please set a value.}" CLP_DB_USER: "${CLP_DB_USER:?Please set a value.}" CLP_LOGGING_LEVEL: "${CLP_QUERY_SCHEDULER_LOGGING_LEVEL:-INFO}" - CLP_LOGS_DIR: "/var/log/query_scheduler" + CLP_LOGS_DIR: "/var/log/query-scheduler" PYTHONPATH: "/opt/clp/lib/python3/site-packages" RESULT_BACKEND: "redis://default:${CLP_REDIS_PASS:?Please set a value.}@redis:6379\ /${CLP_REDIS_BACKEND_DB_QUERY:-0}" @@ -461,20 +450,19 @@ services: "CMD", "bash", "-c", - "< /dev/tcp/query_scheduler/7000" + "< /dev/tcp/query-scheduler/7000" ] query-worker: <<: *service_defaults - hostname: "query_worker" environment: BROKER_URL: "amqp://${CLP_QUEUE_USER:?Please set a value.}\ :${CLP_QUEUE_PASS:?Please set a value.}@queue:5672" CLP_CONFIG_PATH: "/etc/clp-config.yaml" CLP_HOME: "/opt/clp" CLP_LOGGING_LEVEL: "${CLP_QUERY_WORKER_LOGGING_LEVEL:-INFO}" - CLP_LOGS_DIR: "/var/log/query_worker" - CLP_WORKER_LOG_PATH: "/var/log/query_worker/worker.log" + CLP_LOGS_DIR: "/var/log/query-worker" + CLP_WORKER_LOG_PATH: "/var/log/query-worker/worker.log" PYTHONPATH: "/opt/clp/lib/python3/site-packages" RESULT_BACKEND: "redis://default:${CLP_REDIS_PASS:?Please set a value.}@redis:6379\ /${CLP_REDIS_BACKEND_DB_QUERY:-0}" @@ -493,14 +481,13 @@ services: "worker", "--concurrency", "${CLP_QUERY_WORKER_CONCURRENCY:-1}", "--loglevel", "WARNING", - "-f", "/var/log/query_worker/worker.log", + "-f", "/var/log/query-worker/worker.log", "-Q", "query", "-n", "query-worker" ] reducer: <<: *service_defaults - hostname: "reducer" stop_grace_period: "10s" environment: CLP_HOME: "/opt/clp" @@ -525,13 +512,12 @@ services: mcp-server: <<: *service_defaults - hostname: "mcp_server" deploy: # Value must be either 0 or 1. Set to 0 to disable the MCP server. replicas: "${CLP_MCP_SERVER_ENABLED:-0}" environment: CLP_LOGGING_LEVEL: "${CLP_MCP_LOGGING_LEVEL:-INFO}" - CLP_LOGS_DIR: "/var/log/mcp_server" + CLP_LOGS_DIR: "/var/log/mcp-server" CLP_DB_PASS: "${CLP_DB_PASS:?Please set a value.}" CLP_DB_USER: "${CLP_DB_USER:?Please set a value.}" PYTHONPATH: "/opt/clp/lib/python3/site-packages" @@ -550,7 +536,7 @@ services: command: [ "python3", "-u", "-m", "clp_mcp_server.clp_mcp_server", - "--host", "mcp_server", + "--host", "mcp-server", "--port", "8000", "--config-path", "/etc/clp-config.yaml", ] @@ -560,17 +546,16 @@ services: "CMD", "curl", "-f", - "http://mcp_server:8000/health" + "http://mcp-server:8000/health" ] api-server: <<: *service_defaults - hostname: "api_server" deploy: # Value must be either 0 or 1. Set to 0 to disable the API server. replicas: "${CLP_API_SERVER_ENABLED:-1}" environment: - CLP_LOGS_DIR: "/var/log/api_server" + CLP_LOGS_DIR: "/var/log/api-server" CLP_DB_PASS: "${CLP_DB_PASS:?Please set a value.}" CLP_DB_USER: "${CLP_DB_USER:?Please set a value.}" RUST_LOG: "INFO" @@ -600,17 +585,16 @@ services: "CMD", "curl", "-f", - "http://api_server:3001/health" + "http://api-server:3001/health" ] log-ingestor: <<: *service_defaults - hostname: "log_ingestor" deploy: # Value must be either 0 or 1. Set to 0 to disable log-ingestor. replicas: "${CLP_LOG_INGESTOR_ENABLED:-1}" environment: - CLP_LOGS_DIR: "/var/log/log_ingestor" + CLP_LOGS_DIR: "/var/log/log-ingestor" CLP_DB_PASS: "${CLP_DB_PASS:?Please set a value.}" CLP_DB_USER: "${CLP_DB_USER:?Please set a value.}" RUST_LOG: "${CLP_LOG_INGESTOR_LOGGING_LEVEL:-INFO}" @@ -636,5 +620,5 @@ services: "CMD", "curl", "-f", - "http://log_ingestor:3002/health" + "http://log-ingestor:3002/health" ] diff --git a/tools/deployment/package/docker-compose.runtime.yaml b/tools/deployment/package/docker-compose.runtime.yaml index 75fa6d3f9b..41518adeb4 100644 --- a/tools/deployment/package/docker-compose.runtime.yaml +++ b/tools/deployment/package/docker-compose.runtime.yaml @@ -1,6 +1,5 @@ services: clp-runtime: - hostname: "clp_runtime" image: "${CLP_PACKAGE_CONTAINER_IMAGE_REF:-clp-package}" logging: driver: "local" From a44216fe826e7d6f5416b0db3a962f8963bb03d6 Mon Sep 17 00:00:00 2001 From: Junhao Liao Date: Tue, 16 Dec 2025 00:00:07 -0500 Subject: [PATCH 2/2] docs: Update service names in orchestration design docs to use hyphens for consistency with deployment standards --- .../design-deployment-orchestration.md | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/docs/src/dev-docs/design-deployment-orchestration.md b/docs/src/dev-docs/design-deployment-orchestration.md index 80bbe0c675..3fbfa6b299 100755 --- a/docs/src/dev-docs/design-deployment-orchestration.md +++ b/docs/src/dev-docs/design-deployment-orchestration.md @@ -151,19 +151,19 @@ graph LR | database | Database for archive metadata, compression jobs, and query jobs | | queue | Task queue for schedulers | | redis | Task result storage for workers | -| compression_scheduler | Scheduler for compression jobs | -| query_scheduler | Scheduler for search/aggregation jobs | -| spider_scheduler | Scheduler for Spider distributed task execution framework | -| results_cache | Storage for the workers to return search results to the UI | -| compression_worker | Worker processes for compression jobs using Celery | -| spider_compression_worker | Worker processes for compression jobs using Spider | -| query_worker | Worker processes for search/aggregation jobs using Celery | +| compression-scheduler | Scheduler for compression jobs | +| query-scheduler | Scheduler for search/aggregation jobs | +| spider-scheduler | Scheduler for Spider distributed task execution framework | +| results-cache | Storage for the workers to return search results to the UI | +| compression-worker | Worker processes for compression jobs using Celery | +| spider-compression-worker | Worker processes for compression jobs using Spider | +| query-worker | Worker processes for search/aggregation jobs using Celery | | reducer | Reducers for performing the final stages of aggregation jobs | -| api_server | API server for submitting queries | +| api-server | API server for submitting queries | | webui | Web server for the UI | -| mcp_server | MCP server for AI agent to access CLP functionalities | -| garbage_collector | Process to manage data retention | -| log_ingestor | Server for orchestrating and running continuous log ingestion jobs | +| mcp-server | MCP server for AI agent to access CLP functionalities | +| garbage-collector | Process to manage data retention | +| log-ingestor | Server for orchestrating and running continuous log ingestion jobs | :::