diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml deleted file mode 100644 index ce2677b4..00000000 --- a/.github/workflows/integration-tests.yml +++ /dev/null @@ -1,83 +0,0 @@ -name: Integration Tests - -on: - push: - branches: [ main, develop ] - paths: - - 'backends/advanced/src/**' - - 'backends/advanced/run-test.sh' - - '.github/workflows/integration-tests.yml' - pull_request: - branches: [ main, develop ] - paths: - - 'backends/advanced/src/**' - - 'backends/advanced/run-test.sh' - - '.github/workflows/integration-tests.yml' - -jobs: - integration-tests: - runs-on: ubuntu-latest - timeout-minutes: 20 - - services: - docker: - image: docker:dind - options: --privileged - - steps: - - name: Checkout code - uses: actions/checkout@v4 - - - name: Verify required secrets - env: - DEEPGRAM_API_KEY: ${{ secrets.DEEPGRAM_API_KEY }} - OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} - run: | - echo "Verifying required secrets..." - if [ -z "$DEEPGRAM_API_KEY" ]; then - echo "❌ ERROR: DEEPGRAM_API_KEY secret is not set" - exit 1 - fi - if [ -z "$OPENAI_API_KEY" ]; then - echo "❌ ERROR: OPENAI_API_KEY secret is not set" - exit 1 - fi - echo "✓ DEEPGRAM_API_KEY is set (length: ${#DEEPGRAM_API_KEY})" - echo "✓ OPENAI_API_KEY is set (length: ${#OPENAI_API_KEY})" - echo "✓ All required secrets verified" - - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 - - - name: Install uv - uses: astral-sh/setup-uv@v4 - with: - version: "latest" - - - name: Set up Python - uses: actions/setup-python@v5 - with: - python-version: "3.12" - - - name: Install PortAudio dependencies - run: sudo apt-get update && sudo apt-get install -y portaudio19-dev - - - name: Run Advanced Backend Integration Tests - env: - DEEPGRAM_API_KEY: ${{ secrets.DEEPGRAM_API_KEY }} - OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} - run: | - cd backends/advanced - chmod +x run-test.sh - ./run-test.sh - - - name: Upload test logs on failure - if: failure() - uses: actions/upload-artifact@v4 - with: - name: integration-test-logs - path: | - backends/advanced/test_integration.log - backends/advanced/docker-compose-test.yml - backends/advanced/.env.test - retention-days: 7 \ No newline at end of file diff --git a/.github/workflows/robot-tests.yml b/.github/workflows/robot-tests.yml index bac4c65a..3333266d 100644 --- a/.github/workflows/robot-tests.yml +++ b/.github/workflows/robot-tests.yml @@ -28,6 +28,7 @@ jobs: env: DEEPGRAM_API_KEY: ${{ secrets.DEEPGRAM_API_KEY }} OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + HF_TOKEN: ${{ secrets.HF_TOKEN }} run: | echo "Verifying required secrets..." if [ -z "$DEEPGRAM_API_KEY" ]; then @@ -38,8 +39,13 @@ jobs: echo "❌ ERROR: OPENAI_API_KEY secret is not set" exit 1 fi + if [ -z "$HF_TOKEN" ]; then + echo "❌ ERROR: HF_TOKEN secret is not set" + exit 1 + fi echo "✓ DEEPGRAM_API_KEY is set (length: ${#DEEPGRAM_API_KEY})" echo "✓ OPENAI_API_KEY is set (length: ${#OPENAI_API_KEY})" + echo "✓ HF_TOKEN is set (length: ${#HF_TOKEN})" echo "✓ All required secrets verified" - name: Set up Docker Buildx @@ -71,28 +77,6 @@ jobs: run: | uv pip install --system robotframework robotframework-requests python-dotenv websockets - - name: Create test environment file - working-directory: tests/setup - run: | - cat > .env.test << EOF - # API URLs - API_URL=http://localhost:8001 - BACKEND_URL=http://localhost:8001 - FRONTEND_URL=http://localhost:3001 - - # Test Admin Credentials - ADMIN_EMAIL=test-admin@example.com - ADMIN_PASSWORD=test-admin-password-123 - - # API Keys (from GitHub secrets) - OPENAI_API_KEY=${{ secrets.OPENAI_API_KEY }} - DEEPGRAM_API_KEY=${{ secrets.DEEPGRAM_API_KEY }} - - # Test Configuration - TEST_TIMEOUT=120 - TEST_DEVICE_NAME=robot-test - EOF - - name: Create test config.yml run: | echo "Copying test configuration file..." @@ -101,122 +85,17 @@ jobs: echo "✓ Test config.yml created from tests/configs/deepgram-openai.yml" ls -lh config/config.yml - - name: Start test environment - working-directory: backends/advanced - env: - DEEPGRAM_API_KEY: ${{ secrets.DEEPGRAM_API_KEY }} - OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} - LLM_PROVIDER: openai - TRANSCRIPTION_PROVIDER: deepgram - MEMORY_PROVIDER: friend_lite - run: | - # Debug: Check if secrets are available - echo "Checking environment variables..." - echo "DEEPGRAM_API_KEY is set: $([ -n "$DEEPGRAM_API_KEY" ] && echo 'YES' || echo 'NO')" - echo "OPENAI_API_KEY is set: $([ -n "$OPENAI_API_KEY" ] && echo 'YES' || echo 'NO')" - echo "LLM_PROVIDER: $LLM_PROVIDER" - echo "TRANSCRIPTION_PROVIDER: $TRANSCRIPTION_PROVIDER" - - # Clean any existing test containers for fresh start - echo "Cleaning up any existing test containers..." - docker compose -f docker-compose-test.yml down -v || true - - # Start ALL services in parallel - Docker Compose handles dependencies via healthchecks - echo "Starting all services in parallel (docker-compose-test.yml)..." - echo "Note: Using test compose file with source mounts for faster startup" - - # Export API keys so docker-compose can use them - export DEEPGRAM_API_KEY - export OPENAI_API_KEY - export LLM_PROVIDER - export TRANSCRIPTION_PROVIDER - export MEMORY_PROVIDER - - DOCKER_BUILDKIT=0 docker compose -f docker-compose-test.yml up -d - - # Show container status - echo "Container status:" - docker compose -f docker-compose-test.yml ps - - # Single wait for backend readiness (backend depends_on ensures infra is ready) - echo "Waiting for backend readiness (up to 120s)..." - for i in {1..40}; do - if curl -s http://localhost:8001/readiness > /dev/null 2>&1; then - echo "✓ Backend is ready (all dependencies satisfied)" - break - fi - # Show logs every 10 attempts to help debug - if [ $((i % 10)) -eq 0 ]; then - echo "Still waiting... showing recent logs:" - docker compose -f docker-compose-test.yml logs --tail=20 chronicle-backend-test - fi - if [ $i -eq 40 ]; then - echo "✗ Backend failed to start - showing full logs:" - docker compose -f docker-compose-test.yml logs - exit 1 - fi - echo "Attempt $i/40..." - sleep 3 - done - - echo "✓ Backend is ready!" - - # Verify workers are registered with Redis (Robot tests need stable workers) - echo "Waiting for workers to register with Redis (up to 60s)..." - for i in {1..30}; do - WORKER_COUNT=$(docker compose -f docker-compose-test.yml exec -T workers-test uv run python -c 'from rq import Worker; from redis import Redis; import os; r = Redis.from_url(os.getenv("REDIS_URL", "redis://redis-test:6379/0")); print(len(Worker.all(connection=r)))' 2>/dev/null || echo "0") - - if [ "$WORKER_COUNT" -ge 6 ]; then - echo "✓ Found $WORKER_COUNT workers registered" - # Show worker details - docker compose -f docker-compose-test.yml exec -T workers-test uv run python -c 'from rq import Worker; from redis import Redis; import os; r = Redis.from_url(os.getenv("REDIS_URL", "redis://redis-test:6379/0")); workers = Worker.all(connection=r); print(f"Total registered workers: {len(workers)}"); [print(f" - {w.name}: queues={w.queue_names()}, state={w.get_state()}") for w in workers]' - break - fi - - if [ $i -eq 30 ]; then - echo "✗ Workers failed to register after 60s" - echo "Showing worker logs:" - docker compose -f docker-compose-test.yml logs --tail=50 workers-test - exit 1 - fi - - echo "Attempt $i/30: $WORKER_COUNT workers registered (waiting for 6+)..." - sleep 2 - done - - echo "✓ All services ready!" - - - name: Verify checked out code - working-directory: tests - run: | - echo "Current git commit:" - git log -1 --oneline - echo "" - echo "Test files in current checkout:" - find . -name "*.robot" -type f | head -10 - echo "" - echo "Sample of tags in test files:" - grep -h "\[Tags\]" endpoints/*.robot infrastructure/*.robot integration/*.robot 2>/dev/null | head -20 || echo "No tag files found" - - - name: Clean previous test results - working-directory: tests - run: | - echo "Cleaning any previous test results..." - rm -rf results - mkdir -p results - echo "✓ Fresh results directory created" - - name: Run Robot Framework tests working-directory: tests env: - # Required for backend imports in test libraries - OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} - OPENAI_BASE_URL: https://api.openai.com/v1 - OPENAI_MODEL: gpt-4o-mini + # Required for test runner script DEEPGRAM_API_KEY: ${{ secrets.DEEPGRAM_API_KEY }} + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + HF_TOKEN: ${{ secrets.HF_TOKEN }} + CLEANUP_CONTAINERS: "false" # Don't cleanup in CI - handled by workflow run: | - # Run all tests (don't fail workflow to allow artifact upload) - make all OUTPUTDIR=results + # Use the unified test script that mirrors local development + ./run-robot-tests.sh TEST_EXIT_CODE=$? echo "test_exit_code=$TEST_EXIT_CODE" >> $GITHUB_ENV exit 0 # Don't fail here, we'll fail at the end after uploading artifacts diff --git a/backends/advanced/docker-compose-test.yml b/backends/advanced/docker-compose-test.yml index 3b0e1eaf..c699bc6b 100644 --- a/backends/advanced/docker-compose-test.yml +++ b/backends/advanced/docker-compose-test.yml @@ -39,9 +39,9 @@ services: - OPENMEMORY_USER_ID=${OPENMEMORY_USER_ID:-openmemory} - MYCELIA_URL=http://mycelia-backend-test:5173 - MYCELIA_DB=mycelia_test - # Disable speaker recognition in test environment to prevent segment duplication + # Enable speaker recognition in test environment - DISABLE_SPEAKER_RECOGNITION=false - - SPEAKER_SERVICE_URL=https://localhost:8085 + - SPEAKER_SERVICE_URL=http://speaker-service-test:8085 - CORS_ORIGINS=http://localhost:3001,http://localhost:8001,https://localhost:3001,https://localhost:8001 # Set low inactivity timeout for tests (2 seconds instead of 60) - SPEECH_INACTIVITY_THRESHOLD_SECONDS=2 @@ -54,6 +54,8 @@ services: condition: service_healthy redis-test: condition: service_started + speaker-service-test: + condition: service_healthy healthcheck: test: ["CMD", "curl", "-f", "http://localhost:8000/readiness"] interval: 10s @@ -119,6 +121,36 @@ services: timeout: 3s retries: 5 + speaker-service-test: + build: + context: ../../extras/speaker-recognition + dockerfile: Dockerfile + args: + PYTORCH_CUDA_VERSION: cpu + image: speaker-recognition-test:latest + ports: + - "8086:8085" # Avoid conflict with dev speaker service on 8085 + volumes: + - ../../extras/speaker-recognition/src:/app/src + - ../../extras/speaker-recognition/model_cache:/models + - ../../extras/speaker-recognition/audio_chunks:/app/audio_chunks + - ../../extras/speaker-recognition/debug:/app/debug + - ../../extras/speaker-recognition/speaker_data:/app/data + environment: + - HF_HOME=/models + - HF_TOKEN=${HF_TOKEN} + - SIMILARITY_THRESHOLD=0.15 + - SPEAKER_SERVICE_HOST=0.0.0.0 + - SPEAKER_SERVICE_PORT=8085 + - DEEPGRAM_API_KEY=${DEEPGRAM_API_KEY} + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:8085/health"] + interval: 30s + timeout: 10s + retries: 5 + start_period: 60s + restart: unless-stopped + workers-test: build: context: . @@ -151,7 +183,7 @@ services: - MYCELIA_URL=http://mycelia-backend-test:5173 - MYCELIA_DB=mycelia_test - DISABLE_SPEAKER_RECOGNITION=false - - SPEAKER_SERVICE_URL=https://localhost:8085 + - SPEAKER_SERVICE_URL=http://speaker-service-test:8085 # Set low inactivity timeout for tests (2 seconds instead of 60) - SPEECH_INACTIVITY_THRESHOLD_SECONDS=2 # Wait for audio queue to drain before timing out (test mode) @@ -165,6 +197,8 @@ services: condition: service_started qdrant-test: condition: service_started + speaker-service-test: + condition: service_healthy restart: unless-stopped # Mycelia - AI memory and timeline service (test environment) @@ -237,6 +271,11 @@ services: # condition: service_healthy # restart: unless-stopped +# Use default bridge network for test isolation (no external network dependency) +networks: + default: + driver: bridge + # CI Considerations (for future implementation): # - GitHub Actions can run these services in isolated containers # - Port conflicts won't exist in CI since each job runs in isolation diff --git a/backends/advanced/run-test.sh b/backends/advanced/run-test.sh index 17773dc1..01204be6 100755 --- a/backends/advanced/run-test.sh +++ b/backends/advanced/run-test.sh @@ -166,9 +166,8 @@ if [ ! -f "diarization_config.json" ] && [ -f "diarization_config.json.template" print_success "diarization_config.json created" fi -# Install dependencies with uv -print_info "Installing dependencies with uv..." -uv sync --dev --group test +# Note: Robot Framework dependencies are managed via tests/test-requirements.txt +# The integration tests use Docker containers for service dependencies # Set up environment variables for testing print_info "Setting up test environment variables..." @@ -211,8 +210,9 @@ export DOCKER_BUILDKIT=0 export TEST_MODE=dev # Run the Robot Framework integration tests with extended timeout (mem0 needs time for comprehensive extraction) +# IMPORTANT: Robot tests must be run from the repository root where backends/ and tests/ are siblings print_info "Starting Robot Framework integration tests (timeout: 15 minutes)..." -if timeout 900 uv run robot --outputdir ../../test-results --loglevel INFO ../../tests/integration/integration_test.robot; then +if (cd ../.. && timeout 900 robot --outputdir test-results --loglevel INFO tests/integration/integration_test.robot); then print_success "Integration tests completed successfully!" else TEST_EXIT_CODE=$? diff --git a/backends/advanced/src/advanced_omi_backend/services/memory/providers/chronicle.py b/backends/advanced/src/advanced_omi_backend/services/memory/providers/chronicle.py index a0974e21..3fb96f00 100644 --- a/backends/advanced/src/advanced_omi_backend/services/memory/providers/chronicle.py +++ b/backends/advanced/src/advanced_omi_backend/services/memory/providers/chronicle.py @@ -84,9 +84,16 @@ async def initialize(self) -> None: vector_ok = await self.vector_store.test_connection() if not llm_ok: - raise RuntimeError("LLM provider connection failed") + raise RuntimeError( + f"LLM provider connection failed for {self.config.llm_provider.value}. " + f"Check API keys, network connectivity, and service availability. " + f"Memory processing cannot proceed without a working LLM connection." + ) if not vector_ok: - raise RuntimeError("Vector store connection failed") + raise RuntimeError( + f"Vector store connection failed for {self.config.vector_store_provider.value}. " + f"Check that Qdrant service is running and accessible." + ) self._initialized = True memory_logger.info( diff --git a/backends/advanced/src/advanced_omi_backend/services/memory/providers/llm_providers.py b/backends/advanced/src/advanced_omi_backend/services/memory/providers/llm_providers.py index f72bdb81..a73f1bc8 100644 --- a/backends/advanced/src/advanced_omi_backend/services/memory/providers/llm_providers.py +++ b/backends/advanced/src/advanced_omi_backend/services/memory/providers/llm_providers.py @@ -8,15 +8,12 @@ memory action proposals using their respective APIs. """ +import asyncio import json import logging import os -import httpx from typing import Any, Dict, List, Optional -# TODO: Re-enable spacy when Docker build is fixed -# import spacy - from ..base import LLMProviderBase from ..prompts import ( FACT_RETRIEVAL_PROMPT, @@ -30,10 +27,14 @@ ) from ..utils import extract_json_from_text +# TODO: Re-enable spacy when Docker build is fixed +# import spacy + + memory_logger = logging.getLogger("memory_service") # New: config-driven model registry + universal client -from advanced_omi_backend.model_registry import get_models_registry, ModelDef +from advanced_omi_backend.model_registry import ModelDef, get_models_registry def _is_langfuse_enabled() -> bool: @@ -62,7 +63,7 @@ def _get_openai_client(api_key: str, base_url: str, is_async: bool = False): memory_logger.debug("Using OpenAI client with Langfuse tracing") else: # Use regular OpenAI client without tracing - from openai import OpenAI, AsyncOpenAI + from openai import AsyncOpenAI, OpenAI openai = type('OpenAI', (), {'OpenAI': OpenAI, 'AsyncOpenAI': AsyncOpenAI})() memory_logger.debug("Using OpenAI client without tracing") @@ -174,18 +175,33 @@ def __init__(self, config: Dict[str, Any]): if not self.llm_def: raise RuntimeError("No default LLM defined in config.yml") + # Store parameters for LLM self.api_key = self.llm_def.api_key or "" self.base_url = self.llm_def.model_url self.model = self.llm_def.model_name self.temperature = float(self.llm_def.model_params.get("temperature", 0.1)) self.max_tokens = int(self.llm_def.model_params.get("max_tokens", 2000)) - + # Store parameters for embeddings (use separate config if available) self.embedding_model = (self.embed_def.model_name if self.embed_def else self.llm_def.model_name) self.embedding_api_key = (self.embed_def.api_key if self.embed_def else self.api_key) self.embedding_base_url = (self.embed_def.model_url if self.embed_def else self.base_url) - + + # CRITICAL: Validate API keys are present - fail fast instead of hanging + if not self.api_key or self.api_key.strip() == "": + raise RuntimeError( + f"API key is missing or empty for LLM provider '{self.llm_def.model_provider}' (model: {self.model}). " + f"Please set the API key in config.yml or environment variables. " + f"Cannot proceed without valid API credentials." + ) + + if self.embed_def and (not self.embedding_api_key or self.embedding_api_key.strip() == ""): + raise RuntimeError( + f"API key is missing or empty for embedding provider '{self.embed_def.model_provider}' (model: {self.embedding_model}). " + f"Please set the API key in config.yml or environment variables." + ) + # Lazy client creation self._client = None @@ -285,20 +301,21 @@ async def generate_embeddings(self, texts: List[str]) -> List[List[float]]: raise async def test_connection(self) -> bool: - """Test OpenAI connection. + """Test OpenAI connection with timeout. Returns: True if connection successful, False otherwise """ + try: - try: + # Add 10-second timeout to prevent hanging on API calls + async with asyncio.timeout(10): client = _get_openai_client(api_key=self.api_key, base_url=self.base_url, is_async=True) await client.models.list() return True - except Exception as e: - memory_logger.error(f"OpenAI connection test failed: {e}") - return False - + except asyncio.TimeoutError: + memory_logger.error(f"OpenAI connection test timed out after 10s - check network connectivity and API endpoint") + return False except Exception as e: memory_logger.error(f"OpenAI connection test failed: {e}") return False diff --git a/backends/advanced/src/advanced_omi_backend/speaker_recognition_client.py b/backends/advanced/src/advanced_omi_backend/speaker_recognition_client.py index 50b12645..99c9e594 100644 --- a/backends/advanced/src/advanced_omi_backend/speaker_recognition_client.py +++ b/backends/advanced/src/advanced_omi_backend/speaker_recognition_client.py @@ -144,16 +144,16 @@ async def diarize_identify_match( except ClientConnectorError as e: logger.error(f"🎤 Failed to connect to speaker recognition service: {e}") - return {} + return {"error": "connection_failed", "message": str(e), "segments": []} except asyncio.TimeoutError as e: logger.error(f"🎤 Timeout connecting to speaker recognition service: {e}") - return {} + return {"error": "timeout", "message": str(e), "segments": []} except aiohttp.ClientError as e: logger.warning(f"🎤 Client error during speaker recognition: {e}") - return {} + return {"error": "client_error", "message": str(e), "segments": []} except Exception as e: logger.error(f"🎤 Error during speaker recognition: {e}") - return {} + return {"error": "unknown_error", "message": str(e), "segments": []} async def diarize_and_identify( self, audio_path: str, words: None, user_id: Optional[str] = None # NOT IMPLEMENTED @@ -265,18 +265,18 @@ async def diarize_and_identify( except ClientConnectorError as e: logger.error(f"🎤 [DIARIZE] ❌ Failed to connect to speaker recognition service at {self.service_url}: {e}") - return {} + return {"error": "connection_failed", "message": str(e), "segments": []} except asyncio.TimeoutError as e: logger.error(f"🎤 [DIARIZE] ❌ Timeout connecting to speaker recognition service: {e}") - return {} + return {"error": "timeout", "message": str(e), "segments": []} except aiohttp.ClientError as e: logger.warning(f"🎤 [DIARIZE] ❌ Client error during speaker recognition: {e}") - return {} + return {"error": "client_error", "message": str(e), "segments": []} except Exception as e: logger.error(f"🎤 [DIARIZE] ❌ Error during speaker diarization and identification: {e}") import traceback logger.debug(traceback.format_exc()) - return {} + return {"error": "unknown_error", "message": str(e), "segments": []} async def identify_speakers(self, audio_path: str, segments: List[Dict]) -> Dict[str, str]: """ diff --git a/backends/advanced/src/advanced_omi_backend/workers/speaker_jobs.py b/backends/advanced/src/advanced_omi_backend/workers/speaker_jobs.py index d9165b2d..066d05c5 100644 --- a/backends/advanced/src/advanced_omi_backend/workers/speaker_jobs.py +++ b/backends/advanced/src/advanced_omi_backend/workers/speaker_jobs.py @@ -58,6 +58,23 @@ async def check_enrolled_speakers_job( transcription_results=raw_results ) + # Check for errors from speaker service + if speaker_result and speaker_result.get("error"): + error_type = speaker_result.get("error") + error_message = speaker_result.get("message", "Unknown error") + logger.error(f"🎤 [SPEAKER CHECK] Speaker service error: {error_type} - {error_message}") + + # Fail the job - don't create conversation if speaker service failed + return { + "success": False, + "session_id": session_id, + "error": f"Speaker recognition failed: {error_type}", + "error_details": error_message, + "enrolled_present": False, + "identified_speakers": [], + "processing_time_seconds": time.time() - start_time + } + # Extract identified speakers identified_speakers = [] if speaker_result and "segments" in speaker_result: @@ -206,7 +223,29 @@ async def recognise_speakers_job( user_id=user_id ) - if not speaker_result or "segments" not in speaker_result: + # Check for errors from speaker service + if speaker_result.get("error"): + error_type = speaker_result.get("error") + error_message = speaker_result.get("message", "Unknown error") + logger.error(f"🎤 Speaker recognition service error: {error_type} - {error_message}") + + # Raise exception for connection failures so dependent jobs are canceled + # This ensures RQ marks the job as "failed" instead of "completed" + if error_type in ("connection_failed", "timeout", "client_error"): + raise RuntimeError(f"Speaker recognition service unavailable: {error_type} - {error_message}") + + # For other errors (e.g., processing errors), return error dict without failing + return { + "success": False, + "conversation_id": conversation_id, + "version_id": version_id, + "error": f"Speaker recognition failed: {error_type}", + "error_details": error_message, + "processing_time_seconds": time.time() - start_time + } + + # Service worked but found no segments (legitimate empty result) + if not speaker_result or "segments" not in speaker_result or not speaker_result["segments"]: logger.warning(f"🎤 Speaker recognition returned no segments") return { "success": True, diff --git a/backends/advanced/start-workers.sh b/backends/advanced/start-workers.sh index 2ed50727..3fea5a39 100755 --- a/backends/advanced/start-workers.sh +++ b/backends/advanced/start-workers.sh @@ -181,12 +181,14 @@ monitor_worker_health & MONITOR_PID=$! echo "🩺 Self-healing monitor started: PID $MONITOR_PID" -# Wait for any worker process to exit -wait -n - -# If we get here, one worker process has exited - kill everything -echo "⚠️ One worker exited, stopping all workers..." -kill $MONITOR_PID 2>/dev/null || true +# Keep the script running and let the self-healing monitor handle worker failures +# Don't use wait -n (fail-fast on first worker exit) - this kills all workers when one fails +# Instead, wait for the monitor process or explicit shutdown signal +echo "⏳ Workers running - self-healing monitor will restart failed workers automatically" +wait $MONITOR_PID + +# If monitor exits (should only happen on SIGTERM/SIGINT), shut down gracefully +echo "🛑 Monitor exited, shutting down all workers..." kill $RQ_WORKER_1_PID 2>/dev/null || true kill $RQ_WORKER_2_PID 2>/dev/null || true kill $RQ_WORKER_3_PID 2>/dev/null || true @@ -198,5 +200,5 @@ kill $AUDIO_PERSISTENCE_WORKER_PID 2>/dev/null || true [ -n "$AUDIO_STREAM_PARAKEET_WORKER_PID" ] && kill $AUDIO_STREAM_PARAKEET_WORKER_PID 2>/dev/null || true wait -echo "🔄 All workers stopped" -exit 1 +echo "✅ All workers stopped gracefully" +exit 0 diff --git a/extras/speaker-recognition/Dockerfile b/extras/speaker-recognition/Dockerfile index d14baf35..0c1ccc81 100644 --- a/extras/speaker-recognition/Dockerfile +++ b/extras/speaker-recognition/Dockerfile @@ -31,6 +31,9 @@ COPY src/simple_speaker_recognition/__init__.py src/simple_speaker_recognition/ RUN uv sync --no-dev --extra ${PYTORCH_CUDA_VERSION} && \ uv cache clean +# Copy the full source code (after dependencies are cached) +COPY src/ src/ + # Create directories RUN mkdir -p /app/audio_chunks /app/debug /app/data /models diff --git a/tests/Makefile b/tests/Makefile index 707743e4..8ba002f5 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -31,7 +31,7 @@ help: # Creates a persistent fixture conversation that won't be deleted between suites all: @echo "Running all tests..." - CREATE_FIXTURE=true uv run robot --outputdir $(OUTPUTDIR) \ + CREATE_FIXTURE=true uv run --with-requirements test-requirements.txt robot --outputdir $(OUTPUTDIR) \ --name "All Tests" \ --console verbose \ $(TEST_DIR) @@ -39,7 +39,7 @@ all: # Run only endpoint tests endpoints: @echo "Running endpoint tests..." - uv run robot --outputdir $(OUTPUTDIR) \ + uv run --with-requirements test-requirements.txt robot --outputdir $(OUTPUTDIR) \ --name "Endpoint Tests" \ --console verbose \ endpoints @@ -47,7 +47,7 @@ endpoints: # Run only integration tests integration: @echo "Running integration tests..." - CREATE_FIXTURE=true uv run robot --outputdir $(OUTPUTDIR) \ + CREATE_FIXTURE=true uv run --with-requirements test-requirements.txt robot --outputdir $(OUTPUTDIR) \ --name "Integration Tests" \ --console verbose \ integration @@ -55,7 +55,7 @@ integration: # Run only infrastructure tests infra: @echo "Running infrastructure tests..." - uv run robot --outputdir $(OUTPUTDIR) \ + uv run --with-requirements test-requirements.txt robot --outputdir $(OUTPUTDIR) \ --name "Infrastructure Tests" \ --console verbose \ infrastructure diff --git a/tests/endpoints/rq_queue_tests.robot b/tests/endpoints/rq_queue_tests.robot index 892a1090..5206b5ff 100644 --- a/tests/endpoints/rq_queue_tests.robot +++ b/tests/endpoints/rq_queue_tests.robot @@ -19,7 +19,7 @@ Suite Teardown Suite Teardown Test Setup Test Cleanup *** Variables *** ${TEST_TIMEOUT} 20s -${COMPOSE_FILE} backends/advanced/docker-compose-test.yml +${COMPOSE_FILE} docker-compose-test.yml *** Keywords *** @@ -29,11 +29,11 @@ Restart Backend Service # Stop backend container Run Process docker compose -f ${COMPOSE_FILE} stop chronicle-backend-test - ... cwd=. timeout=30s + ... cwd=${BACKEND_DIR} timeout=30s # Start backend container again Run Process docker compose -f ${COMPOSE_FILE} start chronicle-backend-test - ... cwd=. timeout=60s + ... cwd=${BACKEND_DIR} timeout=60s # Wait for backend to be ready again Wait Until Keyword Succeeds ${TEST_TIMEOUT} 5s diff --git a/tests/endpoints/system_admin_tests.robot b/tests/endpoints/system_admin_tests.robot index e524c264..283c1865 100644 --- a/tests/endpoints/system_admin_tests.robot +++ b/tests/endpoints/system_admin_tests.robot @@ -79,7 +79,7 @@ Get Enrolled Speakers Test Dictionary Should Contain Key ${response_data} speakers # If service is available, verify speakers data - Should Be True isinstance($response_data[speakers], list) + Should Be True isinstance($response_data["speakers"], list) Get Speaker Service Status Test [Documentation] Test checking speaker recognition service status (admin only) diff --git a/tests/run-robot-tests.sh b/tests/run-robot-tests.sh index 462377ed..d623c8f9 100755 --- a/tests/run-robot-tests.sh +++ b/tests/run-robot-tests.sh @@ -121,9 +121,13 @@ export COMPOSE_PROJECT_NAME="advanced-backend-test" print_info "Cleaning up any existing test environment..." docker compose -f docker-compose-test.yml down -v 2>/dev/null || true -# Force remove any stuck containers with test names +# Force remove any stuck containers with test names (uses COMPOSE_PROJECT_NAME) print_info "Removing any stuck test containers..." -docker rm -f advanced-backend-test-mongo-test-1 advanced-backend-test-redis-test-1 advanced-backend-test-qdrant-test-1 advanced-backend-test-chronicle-backend-test-1 advanced-backend-test-workers-test-1 advanced-backend-test-webui-test-1 2>/dev/null || true +# Dynamically construct container names from docker-compose services +TEST_SERVICES=(mongo-test redis-test qdrant-test chronicle-backend-test workers-test webui-test speaker-service-test) +for service in "${TEST_SERVICES[@]}"; do + docker rm -f "${COMPOSE_PROJECT_NAME}-${service}-1" 2>/dev/null || true +done # Start infrastructure services (MongoDB, Redis, Qdrant) print_info "Starting MongoDB, Redis, and Qdrant (fresh containers)..." @@ -224,14 +228,8 @@ print_success "All services ready!" # Return to tests directory cd ../../tests -# Install Robot Framework dependencies if not in CI -if [ -z "$CI" ]; then - print_info "Installing Robot Framework dependencies..." - uv venv --quiet --python 3.12 || true # May already exist - uv pip install --quiet robotframework robotframework-requests python-dotenv websockets -fi - # Run Robot Framework tests via Makefile +# Dependencies are handled automatically by 'uv run' in Makefile print_info "Running Robot Framework tests..." print_info "Output directory: $OUTPUTDIR" diff --git a/tests/setup/setup_keywords.robot b/tests/setup/setup_keywords.robot index 3fe7bd17..656a082d 100644 --- a/tests/setup/setup_keywords.robot +++ b/tests/setup/setup_keywords.robot @@ -85,7 +85,7 @@ Prod Mode Setup Log To Console Tearing down existing containers and volumes... Stop Docker Services remove_volumes=${True} - Run Process rm -rf data/test_mongo_data data/test_qdrant_data data/test_audio_chunks cwd=backends/advanced shell=True + Run Process rm -rf data/test_mongo_data data/test_qdrant_data data/test_audio_chunks cwd=${BACKEND_DIR} shell=True Log To Console Building and starting fresh containers... Start Docker Services build=${True} @@ -95,7 +95,7 @@ Prod Mode Setup Start Docker Services [Documentation] Start Docker services using docker-compose ... Checks if services are already running to avoid redundant starts - [Arguments] ${compose_file}=docker-compose-test.yml ${working_dir}=backends/advanced ${build}=${False} + [Arguments] ${compose_file}=docker-compose-test.yml ${working_dir}=${BACKEND_DIR} ${build}=${False} ${is_up}= Run Keyword And Return Status Check Services Ready ${API_URL} @@ -120,7 +120,7 @@ Start Docker Services Stop Docker Services [Documentation] Stop Docker services using docker-compose - [Arguments] ${compose_file}=docker-compose-test.yml ${working_dir}=backends/advanced ${remove_volumes}=${False} + [Arguments] ${compose_file}=docker-compose-test.yml ${working_dir}=${BACKEND_DIR} ${remove_volumes}=${False} IF ${remove_volumes} Run Process docker compose -f ${compose_file} down -v cwd=${working_dir} shell=True @@ -130,7 +130,7 @@ Stop Docker Services Rebuild Docker Services [Documentation] Rebuild and restart Docker services - [Arguments] ${compose_file}=docker-compose-test.yml ${working_dir}=backends/advanced + [Arguments] ${compose_file}=docker-compose-test.yml ${working_dir}=${BACKEND_DIR} Log To Console Rebuilding containers with latest code... Run Process docker compose -f ${compose_file} up -d --build cwd=${working_dir} shell=True @@ -155,7 +155,7 @@ Start Speaker Recognition Service END Log Starting speaker-recognition-service - Run Process docker compose -f extras/speaker-recognition/docker-compose-test.yml up -d --build shell=True + Run Process docker compose -f docker-compose-test.yml up -d --build cwd=${SPEAKER_RECOGNITION_DIR} shell=True Log Waiting for speaker recognition service to start... Wait Until Keyword Succeeds 60s 5s Check Services Ready ${SPEAKER_RECOGNITION_URL} @@ -166,9 +166,9 @@ Stop Speaker Recognition Service [Arguments] ${remove_volumes}=${False} IF ${remove_volumes} - Run Process docker compose -f extras/speaker-recognition/docker-compose-test.yml down -v shell=True + Run Process docker compose -f docker-compose-test.yml down -v cwd=${SPEAKER_RECOGNITION_DIR} shell=True ELSE - Run Process docker compose -f extras/speaker-recognition/docker-compose-test.yml down shell=True + Run Process docker compose -f docker-compose-test.yml down cwd=${SPEAKER_RECOGNITION_DIR} shell=True END Check Environment Variables diff --git a/tests/setup/teardown_keywords.robot b/tests/setup/teardown_keywords.robot index 4553ad0a..cd4b2b5a 100644 --- a/tests/setup/teardown_keywords.robot +++ b/tests/setup/teardown_keywords.robot @@ -38,7 +38,7 @@ Dev Mode Teardown Log To Console \n=== Dev Mode Teardown (Default) === Log To Console ✓ Keeping containers running for next test run Log To Console Tip: Use 'TEST_MODE=prod' for full cleanup or run manually: - Log To Console docker compose -f backends/advanced/docker-compose-ci.yml down -v + Log To Console docker compose -f ${BACKEND_DIR}/docker-compose-ci.yml down -v # Only delete HTTP sessions Delete All Sessions @@ -52,9 +52,9 @@ Prod Mode Teardown Stop Docker Services remove_volumes=${True} # Clean up any remaining volumes - Run Process rm -rf backends/advanced/data/test_mongo_data shell=True - Run Process rm -rf ${EXECDIR}/backends/advanced/data/test_qdrant_data shell=True - Run Process rm -rf ${EXECDIR}/backends/advanced/data/test_audio_chunks shell=True + Run Process rm -rf ${BACKEND_DIR}/data/test_mongo_data shell=True + Run Process rm -rf ${BACKEND_DIR}/data/test_qdrant_data shell=True + Run Process rm -rf ${BACKEND_DIR}/data/test_audio_chunks shell=True # Delete all HTTP sessions Delete All Sessions diff --git a/tests/setup/test_env.py b/tests/setup/test_env.py index d11f2ff8..94956a14 100644 --- a/tests/setup/test_env.py +++ b/tests/setup/test_env.py @@ -8,7 +8,14 @@ # 2. .env.test (test-specific configuration) # 3. .env (default configuration) -backend_dir = Path(__file__).parent.parent.parent / "backends" / "advanced" +# Find repository root (tests/setup/test_env.py -> go up 2 levels) +REPO_ROOT = Path(__file__).parent.parent.parent +backend_dir = REPO_ROOT / "backends" / "advanced" + +# Export absolute paths for Robot Framework keywords +BACKEND_DIR = str(backend_dir.absolute()) +REPO_ROOT_DIR = str(REPO_ROOT.absolute()) +SPEAKER_RECOGNITION_DIR = str((REPO_ROOT / "extras" / "speaker-recognition").absolute()) # Load in reverse order of precedence (since override=False won't overwrite existing vars) # Load .env.test first (will set test-specific values) diff --git a/tests/setup/test_manager_keywords.robot b/tests/setup/test_manager_keywords.robot index 65506551..8927994a 100644 --- a/tests/setup/test_manager_keywords.robot +++ b/tests/setup/test_manager_keywords.robot @@ -62,8 +62,8 @@ Clear Test Databases Log To Console Qdrant collections cleared # Clear audio files (except fixtures subfolder) - Run Process bash -c find ${EXECDIR}/backends/advanced/data/test_audio_chunks -maxdepth 1 -name "*.wav" -delete || true shell=True - Run Process bash -c rm -rf ${EXECDIR}/backends/advanced/data/test_debug_dir/* || true shell=True + Run Process bash -c find ${BACKEND_DIR}/data/test_audio_chunks -maxdepth 1 -name "*.wav" -delete || true shell=True + Run Process bash -c rm -rf ${BACKEND_DIR}/data/test_debug_dir/* || true shell=True Log To Console Audio files cleared (fixtures/ subfolder preserved) # Clear container audio files (except fixtures subfolder) @@ -90,8 +90,8 @@ Clear All Test Data Run Process curl -s -X DELETE http://localhost:6337/collections/conversations shell=True # Clear all audio files - Run Process bash -c rm -rf ${EXECDIR}/backends/advanced/data/test_audio_chunks/* || true shell=True - Run Process bash -c rm -rf ${EXECDIR}/backends/advanced/data/test_debug_dir/* || true shell=True + Run Process bash -c rm -rf ${BACKEND_DIR}/data/test_audio_chunks/* || true shell=True + Run Process bash -c rm -rf ${BACKEND_DIR}/data/test_debug_dir/* || true shell=True # Clear all Redis data Run Process docker exec ${REDIS_CONTAINER} redis-cli FLUSHALL shell=True diff --git a/test-requirements.txt b/tests/test-requirements.txt similarity index 89% rename from test-requirements.txt rename to tests/test-requirements.txt index 48b8ad96..4efaf39b 100644 --- a/test-requirements.txt +++ b/tests/test-requirements.txt @@ -3,4 +3,5 @@ robotframework-tidy robotframework-requests robotframework-browser python-dotenv +websockets \ No newline at end of file