From 9accc1ef598f708e5ec41214e89434d4aacb0d07 Mon Sep 17 00:00:00 2001 From: Ankush Malaker <43288948+AnkushMalaker@users.noreply.github.com> Date: Fri, 26 Dec 2025 13:49:23 +0000 Subject: [PATCH 1/9] Enhance configuration management and add new setup scripts - Updated .gitignore to include config.yml and its template. - Added config.yml.template for default configuration settings. - Introduced restart.sh script for service management. - Enhanced services.py to load config.yml and check for Obsidian/Neo4j integration. - Updated wizard.py to prompt for Obsidian/Neo4j configuration during setup and create config.yml from template if it doesn't exist. --- .gitignore | 2 + config.yml => config.yml.template | 8 ++- restart.sh | 2 + services.py | 39 +++++++++++++- wizard.py | 90 ++++++++++++++++++++++++++++--- 5 files changed, 132 insertions(+), 9 deletions(-) rename config.yml => config.yml.template (96%) create mode 100755 restart.sh diff --git a/.gitignore b/.gitignore index b2b052b3..38cb0c88 100644 --- a/.gitignore +++ b/.gitignore @@ -4,6 +4,8 @@ !**/.env.template **/memory_config.yaml !**/memory_config.yaml.template +config.yml +!config.yml.template example/* **/node_modules/* **/ollama-data/* diff --git a/config.yml b/config.yml.template similarity index 96% rename from config.yml rename to config.yml.template index ac412d1e..37209d4b 100644 --- a/config.yml +++ b/config.yml.template @@ -128,7 +128,7 @@ models: language: multi smart_format: 'true' punctuate: 'true' - diarize: false + diarize: 'true' encoding: linear16 sample_rate: 16000 channels: '1' @@ -191,7 +191,7 @@ memory: enabled: true prompt: 'Extract important information from this conversation and return a JSON object with an array named "facts". Include personal preferences, plans, names, - dates, locations, numbers, and key details hehehe. Keep items concise and useful. + dates, locations, numbers, and key details. Keep items concise and useful. ' openmemory_mcp: @@ -202,3 +202,7 @@ memory: mycelia: api_url: http://localhost:5173 timeout: 30 + obsidian: + enabled: false + neo4j_host: neo4j-mem0 + timeout: 30 diff --git a/restart.sh b/restart.sh new file mode 100755 index 00000000..019518c4 --- /dev/null +++ b/restart.sh @@ -0,0 +1,2 @@ +#!/bin/bash +uv run --with-requirements setup-requirements.txt python services.py restart --all diff --git a/services.py b/services.py index 716e437e..0deeff8a 100755 --- a/services.py +++ b/services.py @@ -8,12 +8,26 @@ import subprocess from pathlib import Path +import yaml from rich.console import Console from rich.table import Table from dotenv import dotenv_values console = Console() +def load_config_yml(): + """Load config.yml from repository root""" + config_path = Path(__file__).parent / 'config.yml' + if not config_path.exists(): + return None + + try: + with open(config_path, 'r') as f: + return yaml.safe_load(f) + except Exception as e: + console.print(f"[yellow]โš ๏ธ Warning: Could not load config.yml: {e}[/yellow]") + return None + SERVICES = { 'backend': { 'path': 'backends/advanced', @@ -74,7 +88,30 @@ def run_compose_command(service_name, command, build=False): if caddyfile_path.exists() and caddyfile_path.is_file(): # Enable HTTPS profile to start Caddy service cmd.extend(['--profile', 'https']) - + + # Check if Obsidian/Neo4j is enabled + obsidian_enabled = False + + # Method 1: Check config.yml (preferred) + config_data = load_config_yml() + if config_data: + memory_config = config_data.get('memory', {}) + obsidian_config = memory_config.get('obsidian', {}) + if obsidian_config.get('enabled', False): + obsidian_enabled = True + + # Method 2: Fallback to .env for backward compatibility + if not obsidian_enabled: + env_file = service_path / '.env' + if env_file.exists(): + env_values = dotenv_values(env_file) + if env_values.get('OBSIDIAN_ENABLED', 'false').lower() == 'true': + obsidian_enabled = True + + if obsidian_enabled: + cmd.extend(['--profile', 'obsidian']) + console.print("[blue]โ„น๏ธ Starting with Obsidian/Neo4j support[/blue]") + # Handle speaker-recognition service specially if service_name == 'speaker-recognition' and command in ['up', 'down']: # Read configuration to determine profile diff --git a/wizard.py b/wizard.py index 8e3fa041..05e97e59 100755 --- a/wizard.py +++ b/wizard.py @@ -150,7 +150,8 @@ def cleanup_unselected_services(selected_services): env_file.rename(backup_file) console.print(f"๐Ÿงน [dim]Backed up {service_name} configuration to {backup_file.name} (service not selected)[/dim]") -def run_service_setup(service_name, selected_services, https_enabled=False, server_ip=None): +def run_service_setup(service_name, selected_services, https_enabled=False, server_ip=None, + obsidian_enabled=False, neo4j_password=None): """Execute individual service setup script""" if service_name == 'advanced': service = SERVICES['backend'][service_name] @@ -165,7 +166,11 @@ def run_service_setup(service_name, selected_services, https_enabled=False, serv # Add HTTPS configuration if https_enabled and server_ip: cmd.extend(['--enable-https', '--server-ip', server_ip]) - + + # Add Obsidian configuration + if obsidian_enabled and neo4j_password: + cmd.extend(['--enable-obsidian', '--neo4j-password', neo4j_password]) + else: service = SERVICES['extras'][service_name] cmd = service['cmd'].copy() @@ -308,10 +313,28 @@ def setup_git_hooks(): except Exception as e: console.print(f"โš ๏ธ [yellow]Could not setup git hooks: {e} (optional)[/yellow]") +def setup_config_file(): + """Setup config.yml from template if it doesn't exist""" + config_file = Path("config.yml") + config_template = Path("config.yml.template") + + if not config_file.exists(): + if config_template.exists(): + import shutil + shutil.copy(config_template, config_file) + console.print("โœ… [green]Created config.yml from template[/green]") + else: + console.print("โš ๏ธ [yellow]config.yml.template not found, skipping config setup[/yellow]") + else: + console.print("โ„น๏ธ [blue]config.yml already exists, keeping existing configuration[/blue]") + def main(): """Main orchestration logic""" console.print("๐ŸŽ‰ [bold green]Welcome to Chronicle![/bold green]\n") + # Setup config file from template + setup_config_file() + # Setup git hooks first setup_git_hooks() @@ -371,7 +394,43 @@ def main(): break console.print(f"[green]โœ…[/green] HTTPS configured for: {server_ip}") - + + # Obsidian/Neo4j Integration + obsidian_enabled = False + neo4j_password = None + + # Check if advanced backend is selected + if 'advanced' in selected_services: + console.print("\n๐Ÿ—‚๏ธ [bold cyan]Obsidian/Neo4j Integration[/bold cyan]") + console.print("Enable graph-based knowledge management for Obsidian vault notes") + console.print() + + try: + obsidian_enabled = Confirm.ask("Enable Obsidian/Neo4j integration?", default=False) + except EOFError: + console.print("Using default: No") + obsidian_enabled = False + + if obsidian_enabled: + console.print("[blue][INFO][/blue] Neo4j will be configured for graph-based memory storage") + console.print() + + # Prompt for Neo4j password + while True: + try: + neo4j_password = console.input("Neo4j password (min 8 chars) [default: neo4jpassword]: ").strip() + if not neo4j_password: + neo4j_password = "neo4jpassword" + if len(neo4j_password) >= 8: + break + console.print("[yellow][WARNING][/yellow] Password must be at least 8 characters") + except EOFError: + neo4j_password = "neo4jpassword" + console.print(f"Using default password") + break + + console.print("[green]โœ…[/green] Obsidian/Neo4j integration will be configured") + # Pure Delegation - Run Each Service Setup console.print(f"\n๐Ÿ“‹ [bold]Setting up {len(selected_services)} services...[/bold]") @@ -382,17 +441,36 @@ def main(): failed_services = [] for service in selected_services: - if run_service_setup(service, selected_services, https_enabled, server_ip): + if run_service_setup(service, selected_services, https_enabled, server_ip, + obsidian_enabled, neo4j_password): success_count += 1 else: failed_services.append(service) - + + # Check for Obsidian/Neo4j configuration + obsidian_enabled = False + if 'advanced' in selected_services and 'advanced' not in failed_services: + backend_env_path = Path('backends/advanced/.env') + if backend_env_path.exists(): + neo4j_host = read_env_value(str(backend_env_path), 'NEO4J_HOST') + obsidian_enabled_flag = read_env_value(str(backend_env_path), 'OBSIDIAN_ENABLED') + if neo4j_host and not is_placeholder(neo4j_host, 'your-neo4j-host-here', 'your_neo4j_host_here'): + obsidian_enabled = True + elif obsidian_enabled_flag == 'true': + obsidian_enabled = True + # Final Summary console.print(f"\n๐ŸŽŠ [bold green]Setup Complete![/bold green]") console.print(f"โœ… {success_count}/{len(selected_services)} services configured successfully") - + if failed_services: console.print(f"โŒ Failed services: {', '.join(failed_services)}") + + # Inform about Obsidian/Neo4j if configured + if obsidian_enabled: + console.print(f"\n๐Ÿ“š [bold cyan]Obsidian Integration Detected[/bold cyan]") + console.print(" Neo4j will be started with the 'obsidian' profile") + console.print(" when you start the backend service.") # Next Steps console.print("\n๐Ÿ“– [bold]Next Steps:[/bold]") From 892b1b2a23e7a3630377ef28d000a857b221b72b Mon Sep 17 00:00:00 2001 From: Ankush Malaker <43288948+AnkushMalaker@users.noreply.github.com> Date: Tue, 30 Dec 2025 00:11:56 +0000 Subject: [PATCH 2/9] Refactor transcription providers and enhance configuration management - Updated Docker Compose files to include the new Neo4j service configuration. - Added support for Obsidian/Neo4j integration in the setup process. - Refactored transcription providers to utilize a registry-driven approach for Deepgram and Parakeet. - Enhanced error handling and logging in transcription processes. - Improved environment variable management in test scripts to prioritize command-line overrides. - Removed deprecated Parakeet provider implementation and streamlined audio stream workers. --- backends/advanced/docker-compose-test.yml | 3 +- backends/advanced/docker-compose.yml | 36 +- backends/advanced/init.py | 70 ++- backends/advanced/run-test.sh | 60 ++- .../services/transcription/__init__.py | 14 + .../services/transcription/deepgram.py | 429 +----------------- .../services/transcription/parakeet.py | 303 ------------- .../transcription/parakeet_stream_consumer.py | 19 +- .../workers/audio_stream_deepgram_worker.py | 11 +- .../workers/audio_stream_parakeet_worker.py | 11 +- .../workers/memory_jobs.py | 7 +- 11 files changed, 196 insertions(+), 767 deletions(-) delete mode 100644 backends/advanced/src/advanced_omi_backend/services/transcription/parakeet.py diff --git a/backends/advanced/docker-compose-test.yml b/backends/advanced/docker-compose-test.yml index 2498ea1a..20b4fd08 100644 --- a/backends/advanced/docker-compose-test.yml +++ b/backends/advanced/docker-compose-test.yml @@ -32,7 +32,7 @@ services: - ADMIN_EMAIL=test-admin@example.com # Transcription provider configuration - TRANSCRIPTION_PROVIDER=${TRANSCRIPTION_PROVIDER:-deepgram} - # - PARAKEET_ASR_URL=${PARAKEET_ASR_URL} + - PARAKEET_ASR_URL=${PARAKEET_ASR_URL} # Memory provider configuration - MEMORY_PROVIDER=${MEMORY_PROVIDER:-chronicle} - OPENMEMORY_MCP_URL=${OPENMEMORY_MCP_URL:-http://host.docker.internal:8765} @@ -144,6 +144,7 @@ services: - ADMIN_PASSWORD=test-admin-password-123 - ADMIN_EMAIL=test-admin@example.com - TRANSCRIPTION_PROVIDER=${TRANSCRIPTION_PROVIDER:-deepgram} + - PARAKEET_ASR_URL=${PARAKEET_ASR_URL} - MEMORY_PROVIDER=${MEMORY_PROVIDER:-chronicle} - OPENMEMORY_MCP_URL=${OPENMEMORY_MCP_URL:-http://host.docker.internal:8765} - OPENMEMORY_USER_ID=${OPENMEMORY_USER_ID:-openmemory} diff --git a/backends/advanced/docker-compose.yml b/backends/advanced/docker-compose.yml index b84d2ebe..313c0f23 100644 --- a/backends/advanced/docker-compose.yml +++ b/backends/advanced/docker-compose.yml @@ -177,20 +177,28 @@ services: ## Additional - # neo4j-mem0: - # image: neo4j:5.15-community - # ports: - # - "7474:7474" # HTTP - # - "7687:7687" # Bolt - # environment: - # - NEO4J_AUTH=neo4j/${NEO4J_PASSWORD:-password} - # - NEO4J_PLUGINS=["apoc"] - # - NEO4J_dbms_security_procedures_unrestricted=apoc.* - # - NEO4J_dbms_security_procedures_allowlist=apoc.* - # volumes: - # - ./data/neo4j_data:/data - # - ./data/neo4j_logs:/logs - # restart: unless-stopped + neo4j-mem0: + image: neo4j:5.15-community + hostname: neo4j-mem0 + ports: + - "7474:7474" # HTTP + - "7687:7687" # Bolt + environment: + - NEO4J_AUTH=neo4j/${NEO4J_PASSWORD:-password} + - NEO4J_PLUGINS=["apoc"] + - NEO4J_dbms_security_procedures_unrestricted=apoc.* + - NEO4J_dbms_security_procedures_allowlist=apoc.* + - NEO4J_server_default__listen__address=0.0.0.0 + - NEO4J_server_bolt_listen__address=0.0.0.0:7687 + - NEO4J_server_http_listen__address=0.0.0.0:7474 + - NEO4J_dbms_memory_heap_initial__size=512m + - NEO4J_dbms_memory_heap_max__size=2G + volumes: + - ./data/neo4j_data:/data + - ./data/neo4j_logs:/logs + restart: unless-stopped + profiles: + - obsidian # ollama: # image: ollama/ollama:latest diff --git a/backends/advanced/init.py b/backends/advanced/init.py index 25a614aa..851d56e1 100644 --- a/backends/advanced/init.py +++ b/backends/advanced/init.py @@ -410,11 +410,56 @@ def setup_optional_services(self): self.console.print("[green][SUCCESS][/green] Speaker Recognition configured") self.console.print("[blue][INFO][/blue] Start with: cd ../../extras/speaker-recognition && docker compose up -d") - # Check if ASR service URL provided via args + # Check if ASR service URL provided via args if hasattr(self.args, 'parakeet_asr_url') and self.args.parakeet_asr_url: self.config["PARAKEET_ASR_URL"] = self.args.parakeet_asr_url self.console.print(f"[green][SUCCESS][/green] Parakeet ASR configured via args: {self.args.parakeet_asr_url}") + def setup_obsidian(self): + """Configure Obsidian/Neo4j integration""" + # Check if enabled via command line + if hasattr(self.args, 'enable_obsidian') and self.args.enable_obsidian: + enable_obsidian = True + neo4j_password = getattr(self.args, 'neo4j_password', None) + + if not neo4j_password: + self.console.print("[yellow][WARNING][/yellow] --enable-obsidian provided but no password") + neo4j_password = self.prompt_password("Neo4j password (min 8 chars)") + else: + # Interactive prompt (fallback) + self.console.print() + self.console.print("[bold cyan]Obsidian/Neo4j Integration[/bold cyan]") + self.console.print("Enable graph-based knowledge management for Obsidian vault notes") + self.console.print() + + try: + enable_obsidian = Confirm.ask("Enable Obsidian/Neo4j integration?", default=False) + except EOFError: + self.console.print("Using default: No") + enable_obsidian = False + + if enable_obsidian: + neo4j_password = self.prompt_password("Neo4j password (min 8 chars)") + + if enable_obsidian: + # Update .env with credentials + self.config["NEO4J_HOST"] = "neo4j-mem0" + self.config["NEO4J_USER"] = "neo4j" + self.config["NEO4J_PASSWORD"] = neo4j_password + + # Update config.yml with feature flag + if "memory" not in self.config_yml_data: + self.config_yml_data["memory"] = {} + if "obsidian" not in self.config_yml_data["memory"]: + self.config_yml_data["memory"]["obsidian"] = {} + + self.config_yml_data["memory"]["obsidian"]["enabled"] = True + self.config_yml_data["memory"]["obsidian"]["neo4j_host"] = "neo4j-mem0" + self.config_yml_data["memory"]["obsidian"]["timeout"] = 30 + + self.console.print("[green][SUCCESS][/green] Obsidian/Neo4j configured") + self.console.print("[blue][INFO][/blue] Neo4j will start automatically with --profile obsidian") + def setup_network(self): """Configure network settings""" self.print_section("Network Configuration") @@ -589,6 +634,11 @@ def show_summary(self): memory_provider = self.config_yml_data.get("memory", {}).get("provider", "chronicle") self.console.print(f"โœ… Memory Provider: {memory_provider} (config.yml)") + # Show Obsidian/Neo4j status + if self.config.get('OBSIDIAN_ENABLED') == 'true': + neo4j_host = self.config.get('NEO4J_HOST', 'not set') + self.console.print(f"โœ… Obsidian/Neo4j: Enabled ({neo4j_host})") + # Auto-determine URLs based on HTTPS configuration if self.config.get('HTTPS_ENABLED') == 'true': server_ip = self.config.get('SERVER_IP', 'localhost') @@ -604,9 +654,14 @@ def show_next_steps(self): """Show next steps""" self.print_section("Next Steps") self.console.print() - + self.console.print("1. Start the main services:") - self.console.print(" [cyan]docker compose up --build -d[/cyan]") + # Include --profile obsidian if Obsidian is enabled + if self.config.get('OBSIDIAN_ENABLED') == 'true': + self.console.print(" [cyan]docker compose --profile obsidian up --build -d[/cyan]") + self.console.print(" [dim](Includes Neo4j for Obsidian integration)[/dim]") + else: + self.console.print(" [cyan]docker compose up --build -d[/cyan]") self.console.print() # Auto-determine URLs for next steps @@ -653,6 +708,7 @@ def run(self): self.setup_llm() self.setup_memory() self.setup_optional_services() + self.setup_obsidian() self.setup_network() self.setup_https() @@ -695,9 +751,13 @@ def main(): help="Parakeet ASR service URL (default: prompt user)") parser.add_argument("--enable-https", action="store_true", help="Enable HTTPS configuration (default: prompt user)") - parser.add_argument("--server-ip", + parser.add_argument("--server-ip", help="Server IP/domain for SSL certificate (default: prompt user)") - + parser.add_argument("--enable-obsidian", action="store_true", + help="Enable Obsidian/Neo4j integration (default: prompt user)") + parser.add_argument("--neo4j-password", + help="Neo4j password (default: prompt user)") + args = parser.parse_args() setup = ChronicleSetup(args) diff --git a/backends/advanced/run-test.sh b/backends/advanced/run-test.sh index 925e3615..4f944256 100755 --- a/backends/advanced/run-test.sh +++ b/backends/advanced/run-test.sh @@ -39,8 +39,16 @@ print_info "Advanced Backend Integration Test Runner" print_info "========================================" # Load environment variables (CI or local) -# Priority: CI environment > .env.test > .env -if [ -n "$DEEPGRAM_API_KEY" ]; then +# Priority: Command-line env vars > CI environment > .env.test > .env +# Save any pre-existing environment variables to preserve command-line overrides +_TRANSCRIPTION_PROVIDER_OVERRIDE=${TRANSCRIPTION_PROVIDER} +_PARAKEET_ASR_URL_OVERRIDE=${PARAKEET_ASR_URL} +_DEEPGRAM_API_KEY_OVERRIDE=${DEEPGRAM_API_KEY} +_OPENAI_API_KEY_OVERRIDE=${OPENAI_API_KEY} +_LLM_PROVIDER_OVERRIDE=${LLM_PROVIDER} +_MEMORY_PROVIDER_OVERRIDE=${MEMORY_PROVIDER} + +if [ -n "$DEEPGRAM_API_KEY" ] && [ -z "$_TRANSCRIPTION_PROVIDER_OVERRIDE" ]; then print_info "Using environment variables from CI/environment..." elif [ -f ".env.test" ]; then print_info "Loading environment variables from .env.test..." @@ -59,6 +67,30 @@ else exit 1 fi +# Restore command-line overrides (these take highest priority) +if [ -n "$_TRANSCRIPTION_PROVIDER_OVERRIDE" ]; then + export TRANSCRIPTION_PROVIDER=$_TRANSCRIPTION_PROVIDER_OVERRIDE + print_info "Using command-line override: TRANSCRIPTION_PROVIDER=$TRANSCRIPTION_PROVIDER" +fi +if [ -n "$_PARAKEET_ASR_URL_OVERRIDE" ]; then + export PARAKEET_ASR_URL=$_PARAKEET_ASR_URL_OVERRIDE + print_info "Using command-line override: PARAKEET_ASR_URL=$PARAKEET_ASR_URL" +fi +if [ -n "$_DEEPGRAM_API_KEY_OVERRIDE" ]; then + export DEEPGRAM_API_KEY=$_DEEPGRAM_API_KEY_OVERRIDE +fi +if [ -n "$_OPENAI_API_KEY_OVERRIDE" ]; then + export OPENAI_API_KEY=$_OPENAI_API_KEY_OVERRIDE +fi +if [ -n "$_LLM_PROVIDER_OVERRIDE" ]; then + export LLM_PROVIDER=$_LLM_PROVIDER_OVERRIDE + print_info "Using command-line override: LLM_PROVIDER=$LLM_PROVIDER" +fi +if [ -n "$_MEMORY_PROVIDER_OVERRIDE" ]; then + export MEMORY_PROVIDER=$_MEMORY_PROVIDER_OVERRIDE + print_info "Using command-line override: MEMORY_PROVIDER=$MEMORY_PROVIDER" +fi + # Verify required environment variables based on configured providers TRANSCRIPTION_PROVIDER=${TRANSCRIPTION_PROVIDER:-deepgram} LLM_PROVIDER=${LLM_PROVIDER:-openai} @@ -161,17 +193,25 @@ else TEST_EXIT_CODE=$? print_error "Integration tests FAILED with exit code: $TEST_EXIT_CODE" - # Clean up test containers before exiting - print_info "Cleaning up test containers after failure..." - docker compose -f docker-compose-test.yml down -v || true - docker system prune -f || true + # Clean up test containers before exiting (unless CLEANUP_CONTAINERS=false) + if [ "${CLEANUP_CONTAINERS:-true}" != "false" ]; then + print_info "Cleaning up test containers after failure..." + docker compose -f docker-compose-test.yml down -v || true + docker system prune -f || true + else + print_warning "Skipping cleanup (CLEANUP_CONTAINERS=false) - containers left running for debugging" + fi exit $TEST_EXIT_CODE fi -# Clean up test containers -print_info "Cleaning up test containers..." -docker compose -f docker-compose-test.yml down -v || true -docker system prune -f || true +# Clean up test containers (unless CLEANUP_CONTAINERS=false) +if [ "${CLEANUP_CONTAINERS:-true}" != "false" ]; then + print_info "Cleaning up test containers..." + docker compose -f docker-compose-test.yml down -v || true + docker system prune -f || true +else + print_warning "Skipping cleanup (CLEANUP_CONTAINERS=false) - containers left running" +fi print_success "Advanced Backend integration tests completed!" diff --git a/backends/advanced/src/advanced_omi_backend/services/transcription/__init__.py b/backends/advanced/src/advanced_omi_backend/services/transcription/__init__.py index 507df738..2e20171b 100644 --- a/backends/advanced/src/advanced_omi_backend/services/transcription/__init__.py +++ b/backends/advanced/src/advanced_omi_backend/services/transcription/__init__.py @@ -126,6 +126,13 @@ async def transcribe(self, audio_data: bytes, sample_rate: int, diarize: bool = resp.raise_for_status() data = resp.json() + # DEBUG: Log Deepgram response structure + if "results" in data and "channels" in data.get("results", {}): + channels = data["results"]["channels"] + if channels and "alternatives" in channels[0]: + alt = channels[0]["alternatives"][0] + logger.info(f"DEBUG Registry: Deepgram alternative keys: {list(alt.keys())}") + # Extract normalized shape text, words, segments = "", [], [] extract = (op.get("response", {}) or {}).get("extract") or {} @@ -133,6 +140,13 @@ async def transcribe(self, audio_data: bytes, sample_rate: int, diarize: bool = text = _dotted_get(data, extract.get("text")) or "" words = _dotted_get(data, extract.get("words")) or [] segments = _dotted_get(data, extract.get("segments")) or [] + + # DEBUG: Log what we extracted + logger.info(f"DEBUG Registry: Extracted {len(segments)} segments from response") + if segments and len(segments) > 0: + logger.info(f"DEBUG Registry: First segment keys: {list(segments[0].keys()) if isinstance(segments[0], dict) else 'not a dict'}") + logger.info(f"DEBUG Registry: First segment: {segments[0]}") + return {"text": text, "words": words, "segments": segments} class RegistryStreamingTranscriptionProvider(StreamingTranscriptionProvider): diff --git a/backends/advanced/src/advanced_omi_backend/services/transcription/deepgram.py b/backends/advanced/src/advanced_omi_backend/services/transcription/deepgram.py index ee7e23fa..03b2936d 100644 --- a/backends/advanced/src/advanced_omi_backend/services/transcription/deepgram.py +++ b/backends/advanced/src/advanced_omi_backend/services/transcription/deepgram.py @@ -1,408 +1,13 @@ """ -Deepgram transcription provider implementations. +Deepgram transcription consumer for Redis Streams architecture. -Provides both batch and streaming transcription using Deepgram's Nova-3 model. +Uses the registry-driven transcription provider for Deepgram batch transcription. """ -import asyncio -import json import logging -import uuid -from typing import Dict, Optional - -import httpx -import websockets - -from .base import ( - BatchTranscriptionProvider, - StreamingTranscriptionProvider, -) logger = logging.getLogger(__name__) -class DeepgramProvider(BatchTranscriptionProvider): - """Deepgram batch transcription provider using Nova-3 model.""" - - def __init__(self, api_key: str): - self.api_key = api_key - self.url = "https://api.deepgram.com/v1/listen" - - @property - def name(self) -> str: - return "deepgram" - - async def transcribe(self, audio_data: bytes, sample_rate: int, diarize: bool = False) -> dict: - """Transcribe audio using Deepgram's REST API. - - Args: - audio_data: Raw audio bytes - sample_rate: Audio sample rate - diarize: Whether to enable speaker diarization - """ - try: - params = { - "model": "nova-3", - "language": "multi", - "smart_format": "true", - "punctuate": "true", - "diarize": "true" if diarize else "false", - "encoding": "linear16", - "sample_rate": str(sample_rate), - "channels": "1", - } - - headers = {"Authorization": f"Token {self.api_key}", "Content-Type": "audio/raw"} - - logger.debug(f"Sending {len(audio_data)} bytes to Deepgram API") - - # Calculate dynamic timeout based on audio file size - estimated_duration = len(audio_data) / (sample_rate * 2 * 1) # 16-bit mono - processing_timeout = max( - 120, int(estimated_duration * 3) - ) # Min 2 minutes, 3x audio duration - - timeout_config = httpx.Timeout( - connect=30.0, - read=processing_timeout, - write=max( - 180.0, int(len(audio_data) / (sample_rate * 2)) - ), # bytes per second for 16-bit PCM - pool=10.0, - ) - - logger.info( - f"Estimated audio duration: {estimated_duration:.1f}s, timeout: {processing_timeout}s" - ) - - async with httpx.AsyncClient(timeout=timeout_config) as client: - response = await client.post( - self.url, params=params, headers=headers, content=audio_data - ) - - if response.status_code == 200: - result = response.json() - logger.debug(f"Deepgram response: {result}") - - # Extract transcript from response - if result.get("results", {}).get("channels", []) and result["results"][ - "channels" - ][0].get("alternatives", []): - - alternative = result["results"]["channels"][0]["alternatives"][0] - - # Extract segments from diarized utterances if available - segments = [] - if "paragraphs" in alternative and alternative["paragraphs"].get("paragraphs"): - transcript = alternative["paragraphs"]["transcript"].strip() - logger.info( - f"Deepgram diarized transcription successful: {len(transcript)} characters" - ) - - # Extract speaker segments, grouping consecutive sentences from same speaker - current_speaker = None - current_segment = None - - for paragraph in alternative["paragraphs"]["paragraphs"]: - speaker = f"Speaker {paragraph.get('speaker', 'unknown')}" - - for sentence in paragraph.get("sentences", []): - if speaker == current_speaker and current_segment: - # Extend current segment with same speaker - current_segment["text"] += " " + sentence.get("text", "").strip() - current_segment["end"] = sentence.get("end", 0) - else: - # Save previous segment and start new one - if current_segment: - segments.append(current_segment) - current_segment = { - "text": sentence.get("text", "").strip(), - "speaker": speaker, - "start": sentence.get("start", 0), - "end": sentence.get("end", 0), - "confidence": None # Deepgram doesn't provide segment-level confidence - } - current_speaker = speaker - - # Don't forget the last segment - if current_segment: - segments.append(current_segment) - else: - transcript = alternative.get("transcript", "").strip() - logger.debug( - f"Deepgram basic transcription successful: {len(transcript)} characters" - ) - - if transcript: - # Extract speech timing information for logging - words = alternative.get("words", []) - if words: - first_word_start = words[0].get("start", 0) - last_word_end = words[-1].get("end", 0) - speech_duration = last_word_end - first_word_start - - # Calculate audio duration from data size - audio_duration = len(audio_data) / ( - sample_rate * 2 * 1 - ) # 16-bit mono - speech_percentage = ( - (speech_duration / audio_duration) * 100 - if audio_duration > 0 - else 0 - ) - - logger.info( - f"Deepgram speech analysis: {speech_duration:.1f}s speech detected in {audio_duration:.1f}s audio ({speech_percentage:.1f}%)" - ) - - # Check confidence levels - confidences = [ - w.get("confidence", 0) for w in words if "confidence" in w - ] - if confidences: - avg_confidence = sum(confidences) / len(confidences) - low_confidence_count = sum(1 for c in confidences if c < 0.5) - logger.info( - f"Deepgram confidence: avg={avg_confidence:.2f}, {low_confidence_count}/{len(words)} words <0.5 confidence" - ) - - # Keep raw transcript and word data without formatting - logger.info( - f"Keeping raw transcript with word-level data: {len(transcript)} characters, {len(segments)} segments" - ) - return { - "text": transcript, - "words": words, - "segments": segments, - } - else: - # No word-level data, return basic transcript - logger.info( - "No word-level data available, returning basic transcript" - ) - return {"text": transcript, "words": [], "segments": []} - else: - logger.warning("Deepgram returned empty transcript") - return {"text": "", "words": [], "segments": []} - else: - error_msg = "Deepgram response missing expected transcript structure" - logger.error(error_msg) - raise RuntimeError(error_msg) - else: - error_msg = f"Deepgram API error: {response.status_code} - {response.text}" - logger.error(error_msg) - raise RuntimeError(error_msg) - - except httpx.TimeoutException as e: - timeout_type = "unknown" - if "connect" in str(e).lower(): - timeout_type = "connection" - elif "read" in str(e).lower(): - timeout_type = "read" - elif "write" in str(e).lower(): - timeout_type = "write (upload)" - elif "pool" in str(e).lower(): - timeout_type = "connection pool" - error_msg = f"HTTP {timeout_type} timeout during Deepgram API call for {len(audio_data)} bytes: {e}" - logger.error(error_msg) - raise RuntimeError(error_msg) from e - except RuntimeError: - # Re-raise RuntimeError from above (API errors, timeouts) - raise - except Exception as e: - error_msg = f"Unexpected error calling Deepgram API: {e}" - logger.error(error_msg) - raise RuntimeError(error_msg) from e - - -class DeepgramStreamingProvider(StreamingTranscriptionProvider): - """Deepgram streaming transcription provider using WebSocket connection.""" - - def __init__(self, api_key: str): - self.api_key = api_key - self.ws_url = "wss://api.deepgram.com/v1/listen" - self._streams: Dict[str, Dict] = {} # client_id -> stream data - - @property - def name(self) -> str: - return "deepgram" - - async def start_stream(self, client_id: str, sample_rate: int = 16000, diarize: bool = False): - """Start a WebSocket connection for streaming transcription. - - Args: - client_id: Unique client identifier - sample_rate: Audio sample rate - diarize: Whether to enable speaker diarization - """ - try: - logger.info(f"Starting Deepgram streaming for client {client_id} (diarize={diarize})") - - # WebSocket connection parameters - params = { - "model": "nova-3", - "language": "multi", - "smart_format": "true", - "punctuate": "true", - "diarize": "true" if diarize else "false", - "encoding": "linear16", - "sample_rate": str(sample_rate), - "channels": "1", - "interim_results": "true", - "endpointing": "300", # 300ms silence for endpoint detection - } - - # Build WebSocket URL with parameters - query_string = "&".join([f"{k}={v}" for k, v in params.items()]) - ws_url = f"{self.ws_url}?{query_string}" - - # Connect to WebSocket - websocket = await websockets.connect( - ws_url, - extra_headers={"Authorization": f"Token {self.api_key}"} - ) - - # Store stream data - self._streams[client_id] = { - "websocket": websocket, - "final_transcript": "", - "words": [], - "stream_id": str(uuid.uuid4()) - } - - logger.debug(f"Deepgram WebSocket connected for client {client_id}") - - except Exception as e: - logger.error(f"Failed to start Deepgram streaming for {client_id}: {e}") - raise - - async def process_audio_chunk(self, client_id: str, audio_chunk: bytes) -> Optional[dict]: - """Send audio chunk to WebSocket and process responses.""" - if client_id not in self._streams: - logger.error(f"No active stream for client {client_id}") - return None - - try: - stream_data = self._streams[client_id] - websocket = stream_data["websocket"] - - # Send audio chunk - await websocket.send(audio_chunk) - - # Check for responses (non-blocking) - try: - while True: - response = await asyncio.wait_for(websocket.recv(), timeout=0.01) - result = json.loads(response) - - if result.get("type") == "Results": - channel = result.get("channel", {}) - alternatives = channel.get("alternatives", []) - - if alternatives: - alt = alternatives[0] - is_final = channel.get("is_final", False) - - if is_final: - # Accumulate final transcript and words - transcript = alt.get("transcript", "") - words = alt.get("words", []) - - if transcript.strip(): - stream_data["final_transcript"] += transcript + " " - stream_data["words"].extend(words) - - logger.debug(f"Final transcript chunk: {transcript}") - - except asyncio.TimeoutError: - # No response available, continue - pass - - return None # Streaming, no final result yet - - except Exception as e: - logger.error(f"Error processing audio chunk for {client_id}: {e}") - return None - - async def end_stream(self, client_id: str) -> dict: - """Close WebSocket connection and return final transcription.""" - if client_id not in self._streams: - logger.error(f"No active stream for client {client_id}") - return {"text": "", "words": [], "segments": []} - - try: - stream_data = self._streams[client_id] - websocket = stream_data["websocket"] - - # Send close message - close_msg = json.dumps({"type": "CloseStream"}) - await websocket.send(close_msg) - - # Wait a bit for final responses - try: - end_time = asyncio.get_event_loop().time() + 2.0 # 2 second timeout - while asyncio.get_event_loop().time() < end_time: - response = await asyncio.wait_for(websocket.recv(), timeout=0.5) - result = json.loads(response) - - if result.get("type") == "Results": - channel = result.get("channel", {}) - alternatives = channel.get("alternatives", []) - - if alternatives and channel.get("is_final", False): - alt = alternatives[0] - transcript = alt.get("transcript", "") - words = alt.get("words", []) - - if transcript.strip(): - stream_data["final_transcript"] += transcript - stream_data["words"].extend(words) - - except asyncio.TimeoutError: - pass - - # Close WebSocket - await websocket.close() - - # Prepare final result - final_transcript = stream_data["final_transcript"].strip() - final_words = stream_data["words"] - - logger.info(f"Deepgram streaming completed for {client_id}: {len(final_transcript)} chars, {len(final_words)} words") - - # Clean up - del self._streams[client_id] - - return { - "text": final_transcript, - "words": final_words, - "segments": [] - } - - except Exception as e: - logger.error(f"Error ending stream for {client_id}: {e}") - # Clean up on error - if client_id in self._streams: - del self._streams[client_id] - return {"text": "", "words": [], "segments": []} - - async def transcribe(self, audio_data: bytes, sample_rate: int, **kwargs) -> dict: - """For streaming provider, this method is not typically used.""" - logger.warning("transcribe() called on streaming provider - use streaming methods instead") - return {"text": "", "words": [], "segments": []} - - async def disconnect(self): - """Close all active WebSocket connections.""" - for client_id in list(self._streams.keys()): - try: - websocket = self._streams[client_id]["websocket"] - await websocket.close() - except Exception as e: - logger.error(f"Error closing WebSocket for {client_id}: {e}") - finally: - del self._streams[client_id] - - logger.info("All Deepgram streaming connections closed") - class DeepgramStreamConsumer: """ @@ -411,40 +16,42 @@ class DeepgramStreamConsumer: Reads from: specified stream (client-specific or provider-specific) Writes to: transcription:results:{session_id} - This inherits from BaseAudioStreamConsumer and implements transcribe_audio(). + Uses RegistryBatchTranscriptionProvider configured via config.yml for + Deepgram transcription. This ensures consistent behavior with batch + transcription jobs. """ - def __init__(self, redis_client, api_key: str = None, buffer_chunks: int = 30): + def __init__(self, redis_client, buffer_chunks: int = 30): """ Initialize Deepgram consumer. Dynamically discovers all audio:stream:* streams and claims them using Redis locks. + Uses config.yml stt-deepgram configuration for transcription. Args: redis_client: Connected Redis client - api_key: Deepgram API key (defaults to DEEPGRAM_API_KEY env var) buffer_chunks: Number of chunks to buffer before transcribing (default: 30 = ~7.5s) """ - import os from advanced_omi_backend.services.audio_stream.consumer import BaseAudioStreamConsumer + from advanced_omi_backend.services.transcription import get_transcription_provider - self.api_key = api_key or os.getenv("DEEPGRAM_API_KEY") - if not self.api_key: - raise ValueError("DEEPGRAM_API_KEY is required") - - # Initialize Deepgram provider - self.provider = DeepgramProvider(api_key=self.api_key) + # Get registry-driven transcription provider + self.provider = get_transcription_provider(mode="batch") + if not self.provider: + raise RuntimeError( + "Failed to load transcription provider. Ensure config.yml has a default 'stt' model configured." + ) # Create a concrete subclass that implements transcribe_audio class _ConcreteConsumer(BaseAudioStreamConsumer): def __init__(inner_self, provider_name: str, redis_client, buffer_chunks: int): super().__init__(provider_name, redis_client, buffer_chunks) - inner_self._deepgram_provider = self.provider + inner_self._transcription_provider = self.provider async def transcribe_audio(inner_self, audio_data: bytes, sample_rate: int) -> dict: - """Transcribe using DeepgramProvider.""" + """Transcribe using registry-driven transcription provider.""" try: - result = await inner_self._deepgram_provider.transcribe( + result = await inner_self._transcription_provider.transcribe( audio_data=audio_data, sample_rate=sample_rate, diarize=True @@ -482,5 +89,3 @@ async def start_consuming(self): async def stop(self): """Delegate to base consumer.""" return await self._consumer.stop() - - diff --git a/backends/advanced/src/advanced_omi_backend/services/transcription/parakeet.py b/backends/advanced/src/advanced_omi_backend/services/transcription/parakeet.py deleted file mode 100644 index 97b5b751..00000000 --- a/backends/advanced/src/advanced_omi_backend/services/transcription/parakeet.py +++ /dev/null @@ -1,303 +0,0 @@ -""" -Parakeet (NeMo) transcription provider implementations. - -Provides both batch and streaming transcription using NeMo's Parakeet ASR models. -""" - -import asyncio -import json -import logging -import os -import tempfile -from typing import Dict, Optional - -import httpx -import numpy as np -import websockets -from easy_audio_interfaces.audio_interfaces import AudioChunk -from easy_audio_interfaces.filesystem import LocalFileSink - -from .base import ( - BatchTranscriptionProvider, - StreamingTranscriptionProvider, -) - -logger = logging.getLogger(__name__) - -class ParakeetProvider(BatchTranscriptionProvider): - """Parakeet HTTP batch transcription provider.""" - - def __init__(self, service_url: str): - self.service_url = service_url.rstrip('/') - self.transcribe_url = f"{self.service_url}/transcribe" - - @property - def name(self) -> str: - return "parakeet" - - async def transcribe(self, audio_data: bytes, sample_rate: int, **kwargs) -> dict: - """Transcribe audio using Parakeet HTTP service.""" - try: - - logger.info(f"Sending {len(audio_data)} bytes to Parakeet service at {self.transcribe_url}") - - # Convert PCM bytes to audio file for upload - if sample_rate != 16000: - logger.warning(f"Sample rate {sample_rate} != 16000, audio may not be optimal") - - # Assume 16-bit PCM - audio_array = np.frombuffer(audio_data, dtype=np.int16).astype(np.float32) - audio_array = audio_array / np.iinfo(np.int16).max # Normalize to [-1, 1] - - # Create temporary WAV file - with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file: - # sf.write(tmp_file.name, audio_array, 16000) # Force 16kHz - async with LocalFileSink(tmp_file.name, sample_rate, 1) as sink: - await sink.write(AudioChunk( - rate=sample_rate, - width=2, - channels=1, - audio=audio_data, - )) - - tmp_filename = tmp_file.name - - try: - # Upload file to Parakeet service - async with httpx.AsyncClient(timeout=180.0) as client: - with open(tmp_filename, "rb") as f: - files = {"file": ("audio.wav", f, "audio/wav")} - response = await client.post(self.transcribe_url, files=files) - - if response.status_code == 200: - result = response.json() - logger.info(f"Parakeet transcription successful: {len(result.get('text', ''))} chars, {len(result.get('words', []))} words") - return result - else: - error_msg = f"Parakeet service error: {response.status_code} - {response.text}" - logger.error(error_msg) - - # For 5xx errors, raise exception to trigger retry/failure handling - if response.status_code >= 500: - raise RuntimeError(f"Parakeet service unavailable: HTTP {response.status_code}") - - # For 4xx errors, return empty result (client error, won't retry) - return {"text": "", "words": [], "segments": []} - - finally: - # Clean up temporary file - if os.path.exists(tmp_filename): - os.unlink(tmp_filename) - - except Exception as e: - logger.error(f"Error calling Parakeet service: {e}") - raise e - - -class ParakeetStreamingProvider(StreamingTranscriptionProvider): - """Parakeet WebSocket streaming transcription provider.""" - - def __init__(self, service_url: str): - self.service_url = service_url.rstrip('/') - self.ws_url = service_url.replace("http://", "ws://").replace("https://", "wss://") + "/stream" - self._streams: Dict[str, Dict] = {} # client_id -> stream data - - @property - def name(self) -> str: - return "parakeet" - - async def start_stream(self, client_id: str, sample_rate: int = 16000, diarize: bool = False): - """Start a WebSocket connection for streaming transcription. - - Args: - client_id: Unique client identifier - sample_rate: Audio sample rate - diarize: Whether to enable speaker diarization (ignored - Parakeet doesn't support diarization) - """ - if diarize: - logger.warning(f"Parakeet streaming provider does not support diarization, ignoring diarize=True for client {client_id}") - try: - logger.info(f"Starting Parakeet streaming for client {client_id}") - - # Connect to WebSocket - websocket = await websockets.connect(self.ws_url) - - # Send transcribe event to start session - session_config = { - "vad_enabled": True, - "vad_silence_ms": 1000, - "time_interval_seconds": 30, - "return_interim_results": True, - "min_audio_seconds": 0.5 - } - - start_message = { - "type": "transcribe", - "session_id": client_id, - "config": session_config - } - - await websocket.send(json.dumps(start_message)) - - # Wait for session_started confirmation - response = await websocket.recv() - response_data = json.loads(response) - - if response_data.get("type") != "session_started": - raise RuntimeError(f"Failed to start session: {response_data}") - - # Store stream data - self._streams[client_id] = { - "websocket": websocket, - "sample_rate": sample_rate, - "session_id": client_id, - "interim_results": [], - "final_result": None - } - - logger.info(f"Parakeet WebSocket connected for client {client_id}") - - except Exception as e: - logger.error(f"Failed to start Parakeet streaming for {client_id}: {e}") - raise - - async def process_audio_chunk(self, client_id: str, audio_chunk: bytes) -> Optional[dict]: - """Send audio chunk to WebSocket and process responses.""" - if client_id not in self._streams: - logger.error(f"No active stream for client {client_id}") - return None - - try: - stream_data = self._streams[client_id] - websocket = stream_data["websocket"] - sample_rate = stream_data["sample_rate"] - - # Send audio_chunk event - chunk_message = { - "type": "audio_chunk", - "session_id": client_id, - "rate": sample_rate, - "width": 2, # 16-bit - "channels": 1 - } - - await websocket.send(json.dumps(chunk_message)) - await websocket.send(audio_chunk) - - # Check for responses (non-blocking) - try: - while True: - response = await asyncio.wait_for(websocket.recv(), timeout=0.01) - result = json.loads(response) - - if result.get("type") == "interim_result": - # Store interim result but don't return it (handled by backend differently) - stream_data["interim_results"].append(result) - logger.debug(f"Received interim result: {result.get('text', '')[:50]}...") - elif result.get("type") == "final_result": - # This shouldn't happen during chunk processing, but store it - stream_data["final_result"] = result - logger.debug(f"Received final result during chunk processing: {result.get('text', '')[:50]}...") - - except asyncio.TimeoutError: - # No response available, continue - pass - - return None # Streaming, no final result yet - - except Exception as e: - logger.error(f"Error processing audio chunk for {client_id}: {e}") - return None - - async def end_stream(self, client_id: str) -> dict: - """Close WebSocket connection and return final transcription.""" - if client_id not in self._streams: - logger.error(f"No active stream for client {client_id}") - return {"text": "", "words": [], "segments": []} - - try: - stream_data = self._streams[client_id] - websocket = stream_data["websocket"] - - # Send finalize event - finalize_message = { - "type": "finalize", - "session_id": client_id - } - await websocket.send(json.dumps(finalize_message)) - - # Wait for final result - try: - end_time = asyncio.get_event_loop().time() + 5.0 # 5 second timeout - while asyncio.get_event_loop().time() < end_time: - response = await asyncio.wait_for(websocket.recv(), timeout=1.0) - result = json.loads(response) - - if result.get("type") == "final_result": - stream_data["final_result"] = result - break - - except asyncio.TimeoutError: - logger.warning(f"Timeout waiting for final result from {client_id}") - - # Close WebSocket - await websocket.close() - - # Prepare final result - final_result = stream_data.get("final_result") - if final_result: - result_data = { - "text": final_result.get("text", ""), - "words": final_result.get("words", []), - "segments": final_result.get("segments", []) - } - else: - # Fallback: aggregate interim results if no final result received - interim_texts = [r.get("text", "") for r in stream_data["interim_results"]] - all_words = [] - for r in stream_data["interim_results"]: - all_words.extend(r.get("words", [])) - - result_data = { - "text": " ".join(interim_texts), - "words": all_words, - "segments": [] - } - - logger.info(f"Parakeet streaming completed for {client_id}: {len(result_data.get('text', ''))} chars") - - # Clean up - del self._streams[client_id] - - return result_data - - except Exception as e: - logger.error(f"Error ending stream for {client_id}: {e}") - # Clean up on error - if client_id in self._streams: - try: - await self._streams[client_id]["websocket"].close() - except: - pass - del self._streams[client_id] - return {"text": "", "words": [], "segments": []} - - async def transcribe(self, audio_data: bytes, sample_rate: int, **kwargs) -> dict: - """For streaming provider, this method is not typically used.""" - logger.warning("transcribe() called on streaming provider - use streaming methods instead") - return {"text": "", "words": [], "segments": []} - - async def disconnect(self): - """Close all active WebSocket connections.""" - for client_id in list(self._streams.keys()): - try: - websocket = self._streams[client_id]["websocket"] - await websocket.close() - except Exception as e: - logger.error(f"Error closing WebSocket for {client_id}: {e}") - finally: - del self._streams[client_id] - - logger.info("All Parakeet streaming connections closed") - - diff --git a/backends/advanced/src/advanced_omi_backend/services/transcription/parakeet_stream_consumer.py b/backends/advanced/src/advanced_omi_backend/services/transcription/parakeet_stream_consumer.py index 740a5f84..f629cefd 100644 --- a/backends/advanced/src/advanced_omi_backend/services/transcription/parakeet_stream_consumer.py +++ b/backends/advanced/src/advanced_omi_backend/services/transcription/parakeet_stream_consumer.py @@ -6,10 +6,9 @@ """ import logging -import os from advanced_omi_backend.services.audio_stream.consumer import BaseAudioStreamConsumer -from advanced_omi_backend.services.transcription.parakeet import ParakeetProvider +from advanced_omi_backend.services.transcription import get_transcription_provider logger = logging.getLogger(__name__) @@ -24,23 +23,23 @@ class ParakeetStreamConsumer: This inherits from BaseAudioStreamConsumer and implements transcribe_audio(). """ - def __init__(self, redis_client, service_url: str = None, buffer_chunks: int = 30): + def __init__(self, redis_client, buffer_chunks: int = 30): """ Initialize Parakeet consumer. Dynamically discovers all audio:stream:* streams and claims them using Redis locks. + Uses config.yml stt-parakeet-batch configuration for transcription. Args: redis_client: Connected Redis client - service_url: Parakeet service URL (defaults to PARAKEET_ASR_URL env var) buffer_chunks: Number of chunks to buffer before transcribing (default: 30 = ~7.5s) """ - self.service_url = service_url or os.getenv("PARAKEET_ASR_URL") - if not self.service_url: - raise ValueError("PARAKEET_ASR_URL is required") - - # Initialize Parakeet provider - self.provider = ParakeetProvider(service_url=self.service_url) + # Get registry-driven transcription provider + self.provider = get_transcription_provider(mode="batch") + if not self.provider: + raise RuntimeError( + "Failed to load transcription provider. Ensure config.yml has a default 'stt' model configured." + ) # Create a concrete subclass that implements transcribe_audio class _ConcreteConsumer(BaseAudioStreamConsumer): diff --git a/backends/advanced/src/advanced_omi_backend/workers/audio_stream_deepgram_worker.py b/backends/advanced/src/advanced_omi_backend/workers/audio_stream_deepgram_worker.py index c8866eed..a58682c1 100644 --- a/backends/advanced/src/advanced_omi_backend/workers/audio_stream_deepgram_worker.py +++ b/backends/advanced/src/advanced_omi_backend/workers/audio_stream_deepgram_worker.py @@ -27,12 +27,13 @@ async def main(): """Main worker entry point.""" logger.info("๐Ÿš€ Starting Deepgram audio stream worker") - # Get configuration from environment + # Check that config.yml has Deepgram configured + # The registry provider will load configuration from config.yml api_key = os.getenv("DEEPGRAM_API_KEY") if not api_key: - logger.warning("DEEPGRAM_API_KEY environment variable not set - Deepgram audio stream worker will not start") - logger.warning("Audio transcription will use alternative providers if configured") - return + logger.warning("DEEPGRAM_API_KEY environment variable not set") + logger.warning("Ensure config.yml has a default 'stt' model configured for Deepgram") + logger.warning("Audio transcription will use alternative providers if configured in config.yml") redis_url = os.getenv("REDIS_URL", "redis://localhost:6379/0") @@ -47,9 +48,9 @@ async def main(): # Create consumer with balanced buffer size # 20 chunks = ~5 seconds of audio # Balance between transcription accuracy and latency + # Consumer uses registry-driven provider from config.yml consumer = DeepgramStreamConsumer( redis_client=redis_client, - api_key=api_key, buffer_chunks=20 # 5 seconds - good context without excessive delay ) diff --git a/backends/advanced/src/advanced_omi_backend/workers/audio_stream_parakeet_worker.py b/backends/advanced/src/advanced_omi_backend/workers/audio_stream_parakeet_worker.py index 0c368a2b..56f2f26b 100644 --- a/backends/advanced/src/advanced_omi_backend/workers/audio_stream_parakeet_worker.py +++ b/backends/advanced/src/advanced_omi_backend/workers/audio_stream_parakeet_worker.py @@ -27,12 +27,13 @@ async def main(): """Main worker entry point.""" logger.info("๐Ÿš€ Starting Parakeet audio stream worker") - # Get configuration from environment + # Check that config.yml has Parakeet configured + # The registry provider will load configuration from config.yml service_url = os.getenv("PARAKEET_ASR_URL") if not service_url: - logger.warning("PARAKEET_ASR_URL environment variable not set - Parakeet audio stream worker will not start") - logger.warning("Audio transcription will use alternative providers if configured") - return + logger.warning("PARAKEET_ASR_URL environment variable not set") + logger.warning("Ensure config.yml has a default 'stt' model configured for Parakeet") + logger.warning("Audio transcription will use alternative providers if configured in config.yml") redis_url = os.getenv("REDIS_URL", "redis://localhost:6379/0") @@ -47,9 +48,9 @@ async def main(): # Create consumer with balanced buffer size # 20 chunks = ~5 seconds of audio # Balance between transcription accuracy and latency + # Consumer uses registry-driven provider from config.yml consumer = ParakeetStreamConsumer( redis_client=redis_client, - service_url=service_url, buffer_chunks=20 # 5 seconds - good context without excessive delay ) diff --git a/backends/advanced/src/advanced_omi_backend/workers/memory_jobs.py b/backends/advanced/src/advanced_omi_backend/workers/memory_jobs.py index 31dba573..8b64d690 100644 --- a/backends/advanced/src/advanced_omi_backend/workers/memory_jobs.py +++ b/backends/advanced/src/advanced_omi_backend/workers/memory_jobs.py @@ -89,8 +89,11 @@ async def process_memory_job(conversation_id: str, *, redis_client=None) -> Dict if text: dialogue_lines.append(f"{speaker}: {text}") full_conversation = "\n".join(dialogue_lines) - elif conversation_model.transcript and isinstance(conversation_model.transcript, str): - # Fallback: if segments are empty but transcript text exists + + # Fallback: if segments have no text content but transcript exists, use transcript + # This handles cases where speaker recognition fails/is disabled + if len(full_conversation) < 10 and conversation_model.transcript and isinstance(conversation_model.transcript, str): + logger.info(f"Segments empty or too short, falling back to transcript text for {conversation_id}") full_conversation = conversation_model.transcript if len(full_conversation) < 10: From eb7ca433082210cc8b6cbb812673d0012ef84de1 Mon Sep 17 00:00:00 2001 From: Ankush Malaker <43288948+AnkushMalaker@users.noreply.github.com> Date: Fri, 2 Jan 2026 17:52:52 +0530 Subject: [PATCH 3/9] Update configuration management and enhance file structure, add test-matrix (#237) * Update configuration management and enhance file structure - Refactored configuration file paths to use a dedicated `config/` directory, including updates to `config.yml` and its template. - Modified service scripts to load the new configuration path for `config.yml`. - Enhanced `.gitignore` to include the new configuration files and templates. - Updated documentation to reflect changes in configuration file locations and usage. - Improved setup scripts to ensure proper creation and management of configuration files. - Added new test configurations for various provider combinations to streamline testing processes. * Add test requirements and clean up imports in wizard.py - Introduced a new `test-requirements.txt` file to manage testing dependencies. - Removed redundant import of `shutil` in `wizard.py` to improve code clarity. * Add ConfigManager for unified configuration management - Introduced a new `config_manager.py` module to handle reading and writing configurations from `config.yml` and `.env` files, ensuring backward compatibility. - Refactored `ChronicleSetup` in `backends/advanced/init.py` to utilize `ConfigManager` for loading and updating configurations, simplifying the setup process. - Removed redundant methods for loading and saving `config.yml` directly in `ChronicleSetup`, as these are now managed by `ConfigManager`. - Enhanced user feedback during configuration updates, including success messages for changes made to configuration files. * Refactor transcription provider configuration and enhance setup process - Updated `.env.template` to clarify speech-to-text configuration and removed deprecated options for Mistral. - Modified `docker-compose.yml` to streamline environment variable management by removing unused Mistral keys. - Enhanced `ChronicleSetup` in `init.py` to provide clearer user feedback and updated the transcription provider selection process to rely on `config.yml`. - Improved error handling in the websocket controller to determine the transcription provider from the model registry instead of environment variables. - Updated health check routes to reflect the new method of retrieving the transcription provider from `config.yml`. - Adjusted `config.yml.template` to include comments on transcription provider options for better user guidance. * Enhance ConfigManager with deep merge functionality - Updated the `update_memory_config` method to perform a deep merge of updates into the memory configuration, ensuring nested dictionaries are merged correctly. - Added a new `_deep_merge` method to handle recursive merging of dictionaries, improving configuration management capabilities. * Refactor run-test.sh and enhance memory extraction tests - Removed deprecated environment variable handling for TRANSCRIPTION_PROVIDER in `run-test.sh`, streamlining the configuration process. - Introduced a new `run-custom.sh` script for executing Robot tests with custom configurations, improving test flexibility. - Enhanced memory extraction tests in `audio_keywords.robot` and `memory_keywords.robot` to include detailed assertions and result handling. - Updated `queue_keywords.robot` to fail fast if a job is in a 'failed' state when expecting 'completed', improving error handling. - Refactored `test_env.py` to load environment variables with correct precedence, ensuring better configuration management. * unify tests to robot test, add some more clean up * Update health check configuration in docker-compose-test.yml (#241) - Increased the number of retries from 5 to 10 for improved resilience during service readiness checks. - Extended the start period from 30s to 60s to allow more time for services to initialize before health checks commence. * Add step to create test configuration file in robot-tests.yml - Introduced a new step in the GitHub Actions workflow to copy the test configuration file from tests/configs/deepgram-openai.yml to a new config/config.yml. - Added logging to confirm the creation of the test config file, improving visibility during the test setup process. * remove cache step since not required * coderabbit comments * Refactor ConfigManager error handling for configuration file loading - Updated the ConfigManager to raise RuntimeError exceptions when the configuration file is not found or is invalid, improving error visibility and user guidance. - Removed fallback behavior that previously returned the current directory, ensuring users are explicitly informed about missing or invalid configuration files. * Refactor _find_repo_root method in ConfigManager - Updated the _find_repo_root method to locate the repository root using the __file__ location instead of searching for config/config.yml, simplifying the logic and improving reliability. - Removed the previous error handling that raised a RuntimeError if the configuration file was not found, as the new approach assumes config_manager.py is always at the repo root. --- .github/workflows/README.md | 4 +- .github/workflows/robot-tests.yml | 9 +- .gitignore | 12 +- CLAUDE.md | 6 +- Docs/getting-started.md | 14 +- backends/advanced/.env.template | 12 +- backends/advanced/Docs/README.md | 20 +- backends/advanced/Docs/contribution.md | 4 +- backends/advanced/Docs/memories.md | 4 +- .../Docs/memory-configuration-guide.md | 6 +- backends/advanced/Docs/quickstart.md | 14 +- backends/advanced/README.md | 21 +- backends/advanced/SETUP_SCRIPTS.md | 2 +- backends/advanced/docker-compose-test.yml | 8 +- backends/advanced/docker-compose.yml | 10 +- backends/advanced/init.py | 173 +- backends/advanced/run-test.sh | 77 +- .../controllers/websocket_controller.py | 32 +- .../routers/modules/health_routes.py | 6 +- backends/advanced/start-workers.sh | 25 +- backends/advanced/tests/test_integration.py | 1591 ----------------- config/README.md | 106 ++ .../config.yml.template | 5 +- config_manager.py | 367 ++++ extras/speaker-recognition/run-test.sh | 9 +- services.py | 4 +- requirements.txt => test-requirements.txt | 0 tests/configs/README.md | 132 ++ tests/configs/deepgram-openai.yml | 84 + tests/configs/full-local.yml | 1 + tests/configs/parakeet-ollama.yml | 73 + tests/configs/parakeet-openai.yml | 73 + tests/integration/integration_test.robot | 40 + tests/resources/audio_keywords.robot | 44 + tests/resources/memory_keywords.robot | 186 ++ tests/resources/queue_keywords.robot | 7 + tests/run-custom.sh | 20 + tests/run-robot-tests.sh | 16 +- tests/setup/test_data.py | 14 + tests/setup/test_env.py | 34 +- wizard.py | 16 +- 41 files changed, 1440 insertions(+), 1841 deletions(-) delete mode 100644 backends/advanced/tests/test_integration.py create mode 100644 config/README.md rename config.yml.template => config/config.yml.template (95%) create mode 100644 config_manager.py rename requirements.txt => test-requirements.txt (100%) create mode 100644 tests/configs/README.md create mode 100644 tests/configs/deepgram-openai.yml create mode 120000 tests/configs/full-local.yml create mode 100644 tests/configs/parakeet-ollama.yml create mode 100644 tests/configs/parakeet-openai.yml create mode 100755 tests/run-custom.sh diff --git a/.github/workflows/README.md b/.github/workflows/README.md index 3b645800..5e98cd18 100644 --- a/.github/workflows/README.md +++ b/.github/workflows/README.md @@ -86,6 +86,6 @@ uv sync --dev cp .env.template .env.test # Add your API keys to .env.test -# Run test (modify CACHED_MODE in test_integration.py if needed) -uv run pytest test_integration.py::test_full_pipeline_integration -v -s +# Run Robot Framework integration tests +uv run robot --outputdir test-results --loglevel INFO tests/integration/integration_test.robot ``` \ No newline at end of file diff --git a/.github/workflows/robot-tests.yml b/.github/workflows/robot-tests.yml index 92073f7b..bac4c65a 100644 --- a/.github/workflows/robot-tests.yml +++ b/.github/workflows/robot-tests.yml @@ -61,7 +61,6 @@ jobs: uses: actions/setup-python@v5 with: python-version: "3.12" - cache: 'pip' - name: Install uv uses: astral-sh/setup-uv@v4 @@ -94,6 +93,14 @@ jobs: TEST_DEVICE_NAME=robot-test EOF + - name: Create test config.yml + run: | + echo "Copying test configuration file..." + mkdir -p config + cp tests/configs/deepgram-openai.yml config/config.yml + echo "โœ“ Test config.yml created from tests/configs/deepgram-openai.yml" + ls -lh config/config.yml + - name: Start test environment working-directory: backends/advanced env: diff --git a/.gitignore b/.gitignore index 38cb0c88..933a1165 100644 --- a/.gitignore +++ b/.gitignore @@ -4,8 +4,16 @@ !**/.env.template **/memory_config.yaml !**/memory_config.yaml.template -config.yml -!config.yml.template +tests/setup/.env.test + +# Main config (user-specific) +config/config.yml +!config/config.yml.template + +# Config backups +config/*.backup.* +config/*.backup* + example/* **/node_modules/* **/ollama-data/* diff --git a/CLAUDE.md b/CLAUDE.md index e505b25a..abe20db6 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -116,11 +116,11 @@ cp .env.template .env # Configure API keys # Manual test execution (for debugging) source .env && export DEEPGRAM_API_KEY && export OPENAI_API_KEY -uv run pytest tests/test_integration.py::test_full_pipeline_integration -v -s +uv run robot --outputdir test-results --loglevel INFO ../../tests/integration/integration_test.robot # Leave test containers running for debugging (don't auto-cleanup) CLEANUP_CONTAINERS=false source .env && export DEEPGRAM_API_KEY && export OPENAI_API_KEY -uv run pytest tests/test_integration.py::test_full_pipeline_integration -v -s +uv run robot --outputdir test-results --loglevel INFO ../../tests/integration/integration_test.robot # Manual cleanup when needed docker compose -f docker-compose-test.yml down -v @@ -390,7 +390,7 @@ docker compose up --build -d ### Testing Strategy - **Local Test Scripts**: Simplified scripts (`./run-test.sh`) mirror CI workflows for local development -- **End-to-End Integration**: `test_integration.py` validates complete audio processing pipeline +- **End-to-End Integration**: Robot Framework tests (`tests/integration/integration_test.robot`) validate complete audio processing pipeline - **Speaker Recognition Tests**: `test_speaker_service_integration.py` validates speaker identification - **Environment Flexibility**: Tests work with both local .env files and CI environment variables - **Automated Cleanup**: Test containers are automatically removed after execution diff --git a/Docs/getting-started.md b/Docs/getting-started.md index dfa3dabf..a923c99c 100644 --- a/Docs/getting-started.md +++ b/Docs/getting-started.md @@ -179,9 +179,9 @@ After configuration, verify everything works with the integration test suite: # Alternative: Manual test with detailed logging source .env && export DEEPGRAM_API_KEY OPENAI_API_KEY && \ - uv run pytest tests/test_integration.py -vv -s --log-cli-level=INFO + uv run robot --outputdir ../../test-results --loglevel INFO ../../tests/integration/integration_test.robot ``` -This end-to-end test validates the complete audio processing pipeline. +This end-to-end test validates the complete audio processing pipeline using Robot Framework. ## Using the System @@ -342,7 +342,7 @@ curl -X POST "http://localhost:8000/api/process-audio-files" \ **Implementation**: - **Memory System**: `src/advanced_omi_backend/memory/memory_service.py` + `src/advanced_omi_backend/controllers/memory_controller.py` -- **Configuration**: memory settings in `config.yml` (memory section) +- **Configuration**: memory settings in `config/config.yml` (memory section) ### Authentication & Security - **Email Authentication**: Login with email and password @@ -541,10 +541,10 @@ OPENMEMORY_MCP_URL=http://host.docker.internal:8765 > ๐ŸŽฏ **New to memory configuration?** Read our [Memory Configuration Guide](./memory-configuration-guide.md) for a step-by-step setup guide with examples. -The system uses **centralized configuration** via `config.yml` for all models (LLM, embeddings, vector store) and memory extraction settings. +The system uses **centralized configuration** via `config/config.yml` for all models (LLM, embeddings, vector store) and memory extraction settings. ### Configuration File Location -- **Path**: repository `config.yml` (override with `CONFIG_FILE` env var) +- **Path**: repository `config/config.yml` (override with `CONFIG_FILE` env var) - **Hot-reload**: Changes are applied on next processing cycle (no restart required) - **Fallback**: If file is missing, system uses safe defaults with environment variables @@ -613,7 +613,7 @@ If you experience JSON parsing errors in fact extraction: 2. **Enable fact extraction** with reliable JSON output: ```yaml - # In config.yml (memory section) + # In config/config.yml (memory section) fact_extraction: enabled: true # Safe to enable with GPT-4o ``` @@ -727,5 +727,5 @@ curl -H "Authorization: Bearer $ADMIN_TOKEN" \ - **Connect audio clients** using the WebSocket API - **Explore the dashboard** to manage conversations and users - **Review the user data architecture** for understanding data organization -- **Customize memory extraction** by editing the `memory` section in `config.yml` +- **Customize memory extraction** by editing the `memory` section in `config/config.yml` - **Monitor processing performance** using debug API endpoints diff --git a/backends/advanced/.env.template b/backends/advanced/.env.template index 18a30d8a..a63ab6f5 100644 --- a/backends/advanced/.env.template +++ b/backends/advanced/.env.template @@ -45,18 +45,14 @@ OPENAI_MODEL=gpt-4o-mini # CHAT_TEMPERATURE=0.7 # ======================================== -# SPEECH-TO-TEXT CONFIGURATION (Choose one) +# SPEECH-TO-TEXT CONFIGURATION (API Keys Only) # ======================================== +# Provider selection is in config.yml (defaults.stt) -# Option 1: Deepgram (recommended for best transcription quality) +# Deepgram (cloud-based, recommended) DEEPGRAM_API_KEY= -# Option 2: Parakeet ASR service from extras/asr-services -# PARAKEET_ASR_URL=http://host.docker.internal:8767 - -# Optional: Specify which provider to use ('deepgram' or 'parakeet') -# If not set, will auto-select based on available configuration (Deepgram preferred) -# TRANSCRIPTION_PROVIDER= +# Note: Parakeet ASR URL configured in config.yml # ======================================== # SPEECH DETECTION CONFIGURATION diff --git a/backends/advanced/Docs/README.md b/backends/advanced/Docs/README.md index abddef9b..11e683e8 100644 --- a/backends/advanced/Docs/README.md +++ b/backends/advanced/Docs/README.md @@ -13,7 +13,7 @@ Welcome to chronicle! This guide provides the optimal reading sequence to unders - What the system does (voice โ†’ memories) - Key features and capabilities - Basic setup and configuration -- **Code References**: `src/advanced_omi_backend/main.py`, `config.yml`, `docker-compose.yml` +- **Code References**: `src/advanced_omi_backend/main.py`, `config/config.yml`, `docker-compose.yml` ### 2. **[System Architecture](./architecture.md)** **Read second** - Complete technical architecture with diagrams @@ -70,7 +70,7 @@ Welcome to chronicle! This guide provides the optimal reading sequence to unders ## ๐Ÿ” **Configuration & Customization** -### 6. **Configuration File** โ†’ `../config.yml` +### 6. **Configuration File** โ†’ `../config/config.yml` **Central configuration for all extraction** - Memory extraction settings and prompts - Quality control and debug settings @@ -86,11 +86,11 @@ Welcome to chronicle! This guide provides the optimal reading sequence to unders 1. [quickstart.md](./quickstart.md) - System overview 2. [architecture.md](./architecture.md) - Technical architecture 3. `src/advanced_omi_backend/main.py` - Core imports and setup -4. `config.yml` - Configuration overview +4. `config/config.yml` - Configuration overview ### **"I want to work on memory extraction"** 1. [memories.md](./memories.md) - Memory system details -2. `../config.yml` - Models and memory configuration +2. `../config/config.yml` - Models and memory configuration 3. `src/advanced_omi_backend/memory/memory_service.py` - Implementation 4. `src/advanced_omi_backend/controllers/memory_controller.py` - Processing triggers @@ -130,7 +130,7 @@ backends/advanced-backend/ โ”‚ โ”‚ โ””โ”€โ”€ memory_service.py # Memory system (Mem0) โ”‚ โ””โ”€โ”€ model_registry.py # Configuration loading โ”‚ -โ”œโ”€โ”€ config.yml # ๐Ÿ“‹ Central configuration +โ”œโ”€โ”€ config/config.yml # ๐Ÿ“‹ Central configuration โ”œโ”€โ”€ MEMORY_DEBUG_IMPLEMENTATION.md # Debug system details ``` @@ -148,7 +148,7 @@ backends/advanced-backend/ ### **Configuration** - **Loading**: `src/advanced_omi_backend/model_registry.py` -- **File**: `config.yml` +- **File**: `config/config.yml` - **Usage**: `src/advanced_omi_backend/memory/memory_service.py` ### **Authentication** @@ -162,7 +162,7 @@ backends/advanced-backend/ 1. **Follow the references**: Each doc links to specific code files and line numbers 2. **Use the debug API**: `GET /api/debug/memory/stats` shows live system status -3. **Check configuration first**: Many behaviors are controlled by `config.yml` +3. **Check configuration first**: Many behaviors are controlled by `config/config.yml` 4. **Understand the memory pipeline**: Memories (end-of-conversation) 5. **Test with curl**: All API endpoints have curl examples in the docs @@ -175,20 +175,20 @@ backends/advanced-backend/ 1. **Set up the system**: Follow [quickstart.md](./quickstart.md) to get everything running 2. **Test the API**: Use the curl examples in the documentation to test endpoints 3. **Explore the debug system**: Check `GET /api/debug/memory/stats` to see live data -4. **Modify configuration**: Edit `config.yml` (memory section) to see how it affects extraction +4. **Modify configuration**: Edit `config/config.yml` (memory section) to see how it affects extraction 5. **Read the code**: Start with `src/advanced_omi_backend/main.py` and follow the references in each doc ### **Contributing Guidelines** - **Add code references**: When updating docs, include file paths and line numbers - **Test your changes**: Use the debug API to verify your modifications work -- **Update configuration**: Add new settings to `config.yml` when needed +- **Update configuration**: Add new settings to `config/config.yml` when needed - **Follow the architecture**: Keep memories in their respective services ### **Getting Help** - **Debug API**: `GET /api/debug/memory/*` endpoints show real-time system status -- **Configuration**: Check `config.yml` for behavior controls +- **Configuration**: Check `config/config.yml` for behavior controls - **Logs**: Check Docker logs with `docker compose logs chronicle-backend` - **Documentation**: Each doc file links to relevant code sections diff --git a/backends/advanced/Docs/contribution.md b/backends/advanced/Docs/contribution.md index a5766828..b78f4a5a 100644 --- a/backends/advanced/Docs/contribution.md +++ b/backends/advanced/Docs/contribution.md @@ -1,12 +1,12 @@ 1. Docs/quickstart.md (15 min) 2. Docs/architecture.md (20 min) 3. main.py - just the imports and WebSocket sections (15 min) - 4. config.yml (memory section) (10 min) + 4. config/config.yml (memory section) (10 min) ๐Ÿ”ง "I want to work on memory extraction" 1. Docs/quickstart.md โ†’ Docs/memories.md - 2. config.yml (memory.extraction section) + 2. config/config.yml (memory.extraction section) 3. main.py lines 1047-1065 (trigger) 4. main.py lines 1163-1195 (processing) 5. src/memory/memory_service.py diff --git a/backends/advanced/Docs/memories.md b/backends/advanced/Docs/memories.md index 38eed697..cae98383 100644 --- a/backends/advanced/Docs/memories.md +++ b/backends/advanced/Docs/memories.md @@ -10,7 +10,7 @@ This document explains how to configure and customize the memory service in the - **Repository Layer**: `src/advanced_omi_backend/conversation_repository.py` (clean data access) - **Processing Manager**: `src/advanced_omi_backend/processors.py` (MemoryProcessor class) - **Conversation Management**: `src/advanced_omi_backend/conversation_manager.py` (lifecycle coordination) -- **Configuration**: `config.yml` (memory section) + `src/model_registry.py` +- **Configuration**: `config/config.yml` (memory section) + `src/model_registry.py` ## Overview @@ -180,7 +180,7 @@ OPENAI_MODEL=gpt-5-mini # Recommended for reliable JSON output # OPENAI_MODEL=gpt-3.5-turbo # Budget option ``` -Or configure via `config.yml` (memory block): +Or configure via `config/config.yml` (memory block): ```yaml memory_extraction: diff --git a/backends/advanced/Docs/memory-configuration-guide.md b/backends/advanced/Docs/memory-configuration-guide.md index 9a694ac5..12796e13 100644 --- a/backends/advanced/Docs/memory-configuration-guide.md +++ b/backends/advanced/Docs/memory-configuration-guide.md @@ -6,10 +6,10 @@ This guide helps you set up and configure the memory system for the Friend Advan 1. **Copy the template configuration**: ```bash -Edit the `memory` section of `config.yml`. +Edit the `memory` section of `config/config.yml`. ``` -2. **Edit `config.yml`** with your preferred settings in the `memory` section: +2. **Edit `config/config.yml`** with your preferred settings in the `memory` section: ```yaml memory: provider: "mem0" # or "basic" for simpler setup @@ -127,6 +127,6 @@ memory: ## Next Steps -- Configure action items detection in `config.yml` (memory.extraction) +- Configure action items detection in `config/config.yml` (memory.extraction) - Set up custom prompt templates for your use case - Monitor memory processing in the debug dashboard diff --git a/backends/advanced/Docs/quickstart.md b/backends/advanced/Docs/quickstart.md index 6e7f03a2..0d681978 100644 --- a/backends/advanced/Docs/quickstart.md +++ b/backends/advanced/Docs/quickstart.md @@ -177,9 +177,9 @@ After configuration, verify everything works with the integration test suite: # Alternative: Manual test with detailed logging source .env && export DEEPGRAM_API_KEY OPENAI_API_KEY && \ - uv run pytest tests/test_integration.py -vv -s --log-cli-level=INFO + uv run robot --outputdir ../../test-results --loglevel INFO ../../tests/integration/integration_test.robot ``` -This end-to-end test validates the complete audio processing pipeline. +This end-to-end test validates the complete audio processing pipeline using Robot Framework. ## Using the System @@ -340,7 +340,7 @@ curl -X POST "http://localhost:8000/api/audio/upload" \ **Implementation**: - **Memory System**: `src/advanced_omi_backend/memory/memory_service.py` + `src/advanced_omi_backend/controllers/memory_controller.py` -- **Configuration**: `config.yml` (memory + models) in repo root +- **Configuration**: `config/config.yml` (memory + models) in repo root ### Authentication & Security - **Email Authentication**: Login with email and password @@ -539,10 +539,10 @@ OPENMEMORY_MCP_URL=http://host.docker.internal:8765 > ๐ŸŽฏ **New to memory configuration?** Read our [Memory Configuration Guide](./memory-configuration-guide.md) for a step-by-step setup guide with examples. -The system uses **centralized configuration** via `config.yml` for all memory extraction and model settings. +The system uses **centralized configuration** via `config/config.yml` for all memory extraction and model settings. ### Configuration File Location -- **Path**: `config.yml` in repo root +- **Path**: `config/config.yml` in repo root - **Hot-reload**: Changes are applied on next processing cycle (no restart required) - **Fallback**: If file is missing, system uses safe defaults with environment variables @@ -611,7 +611,7 @@ If you experience JSON parsing errors in fact extraction: 2. **Enable fact extraction** with reliable JSON output: ```yaml - # In config.yml (memory section) + # In config/config.yml (memory section) fact_extraction: enabled: true # Safe to enable with GPT-4o ``` @@ -725,5 +725,5 @@ curl -H "Authorization: Bearer $ADMIN_TOKEN" \ - **Connect audio clients** using the WebSocket API - **Explore the dashboard** to manage conversations and users - **Review the user data architecture** for understanding data organization -- **Customize memory extraction** by editing the `memory` section in `config.yml` +- **Customize memory extraction** by editing the `memory` section in `config/config.yml` - **Monitor processing performance** using debug API endpoints diff --git a/backends/advanced/README.md b/backends/advanced/README.md index ab86a22e..d493241c 100644 --- a/backends/advanced/README.md +++ b/backends/advanced/README.md @@ -100,14 +100,21 @@ See [Docs/HTTPS_SETUP.md](Docs/HTTPS_SETUP.md) for detailed configuration. To run integration tests with different transcription providers: ```bash -# Test with Parakeet ASR (offline transcription) -# Automatically starts test ASR service - no manual setup required -source .env && export DEEPGRAM_API_KEY && export OPENAI_API_KEY && TRANSCRIPTION_PROVIDER=parakeet uv run pytest tests/test_integration.py::test_full_pipeline_integration -v -s --tb=short +# Test with different configurations using config.yml files +# Test configs located in tests/configs/ -# Test with Deepgram (default) -source .env && export DEEPGRAM_API_KEY && export OPENAI_API_KEY && uv run pytest tests/test_integration.py::test_full_pipeline_integration -v -s --tb=short +# Test with Parakeet ASR + Ollama (offline, no API keys) +CONFIG_FILE=../../tests/configs/parakeet-ollama.yml ./run-test.sh + +# Test with Deepgram + OpenAI (cloud-based) +CONFIG_FILE=../../tests/configs/deepgram-openai.yml ./run-test.sh + +# Manual Robot Framework test execution +source .env && export DEEPGRAM_API_KEY OPENAI_API_KEY && \ + uv run robot --outputdir ../../test-results --loglevel INFO ../../tests/integration/integration_test.robot ``` **Prerequisites:** -- API keys configured in `.env` file -- For debugging: Set `CACHED_MODE = True` in test file to keep containers running +- API keys configured in `.env` file (for cloud providers) +- Test configurations in `tests/configs/` directory +- For debugging: Set `CLEANUP_CONTAINERS=false` environment variable to keep containers running diff --git a/backends/advanced/SETUP_SCRIPTS.md b/backends/advanced/SETUP_SCRIPTS.md index 8fbc0ab2..b45c8910 100644 --- a/backends/advanced/SETUP_SCRIPTS.md +++ b/backends/advanced/SETUP_SCRIPTS.md @@ -6,7 +6,7 @@ This document explains the different setup scripts available in Friend-Lite and | Script | Purpose | When to Use | |--------|---------|-------------| -| `init.py` | **Main interactive setup wizard** | **Recommended for all users** - First time setup with guided configuration (located at repo root). Memory now configured in `config.yml`. | +| `init.py` | **Main interactive setup wizard** | **Recommended for all users** - First time setup with guided configuration (located at repo root). Memory now configured in `config/config.yml`. | | `setup-https.sh` | HTTPS certificate generation | **Optional** - When you need secure connections for microphone access | ## Main Setup Script: `init.py` diff --git a/backends/advanced/docker-compose-test.yml b/backends/advanced/docker-compose-test.yml index 20b4fd08..3b0e1eaf 100644 --- a/backends/advanced/docker-compose-test.yml +++ b/backends/advanced/docker-compose-test.yml @@ -14,7 +14,7 @@ services: - ./data/test_audio_chunks:/app/audio_chunks - ./data/test_debug_dir:/app/debug_dir - ./data/test_data:/app/data - - ../../config.yml:/app/config.yml:ro # Mount config.yml for model registry and memory settings + - ${CONFIG_FILE:-../../config/config.yml}:/app/config.yml:ro # Mount config.yml for model registry and memory settings environment: # Override with test-specific settings - MONGODB_URI=mongodb://mongo-test:27017/test_db @@ -58,8 +58,8 @@ services: test: ["CMD", "curl", "-f", "http://localhost:8000/readiness"] interval: 10s timeout: 5s - retries: 5 - start_period: 30s + retries: 10 + start_period: 60s restart: unless-stopped webui-test: @@ -129,7 +129,7 @@ services: - ./data/test_audio_chunks:/app/audio_chunks - ./data/test_debug_dir:/app/debug_dir - ./data/test_data:/app/data - - ../../config.yml:/app/config.yml:ro # Mount config.yml for model registry and memory settings + - ${CONFIG_FILE:-../../config/config.yml}:/app/config.yml:ro # Mount config.yml for model registry and memory settings environment: # Same environment as backend - MONGODB_URI=mongodb://mongo-test:27017/test_db diff --git a/backends/advanced/docker-compose.yml b/backends/advanced/docker-compose.yml index 77d7e756..f46a23fa 100644 --- a/backends/advanced/docker-compose.yml +++ b/backends/advanced/docker-compose.yml @@ -12,12 +12,9 @@ services: - ./data/audio_chunks:/app/audio_chunks - ./data/debug_dir:/app/debug_dir - ./data:/app/data - - ../../config.yml:/app/config.yml # Removed :ro to allow UI config saving + - ../../config/config.yml:/app/config.yml # Removed :ro to allow UI config saving environment: - DEEPGRAM_API_KEY=${DEEPGRAM_API_KEY} - - MISTRAL_API_KEY=${MISTRAL_API_KEY} - - MISTRAL_MODEL=${MISTRAL_MODEL} - - TRANSCRIPTION_PROVIDER=${TRANSCRIPTION_PROVIDER} - PARAKEET_ASR_URL=${PARAKEET_ASR_URL} - OLLAMA_BASE_URL=${OLLAMA_BASE_URL} - HF_TOKEN=${HF_TOKEN} @@ -63,12 +60,9 @@ services: - ./start-workers.sh:/app/start-workers.sh - ./data/audio_chunks:/app/audio_chunks - ./data:/app/data - - ../../config.yml:/app/config.yml # Removed :ro for consistency + - ../../config/config.yml:/app/config.yml # Removed :ro for consistency environment: - DEEPGRAM_API_KEY=${DEEPGRAM_API_KEY} - - MISTRAL_API_KEY=${MISTRAL_API_KEY} - - MISTRAL_MODEL=${MISTRAL_MODEL} - - TRANSCRIPTION_PROVIDER=${TRANSCRIPTION_PROVIDER} - PARAKEET_ASR_URL=${PARAKEET_ASR_URL} - OPENAI_API_KEY=${OPENAI_API_KEY} - GROQ_API_KEY=${GROQ_API_KEY} diff --git a/backends/advanced/init.py b/backends/advanced/init.py index 851d56e1..c68fa10f 100644 --- a/backends/advanced/init.py +++ b/backends/advanced/init.py @@ -22,13 +22,17 @@ from rich.prompt import Confirm, Prompt from rich.text import Text +# Add repo root to path for config_manager import +sys.path.insert(0, str(Path(__file__).resolve().parent.parent.parent)) +from config_manager import ConfigManager + class ChronicleSetup: def __init__(self, args=None): self.console = Console() self.config: Dict[str, Any] = {} self.args = args or argparse.Namespace() - self.config_yml_path = Path("../../config.yml") # Repo root config.yml + self.config_yml_path = Path("../../config/config.yml") # Main config at config/config.yml self.config_yml_data = None # Check if we're in the right directory @@ -36,8 +40,15 @@ def __init__(self, args=None): self.console.print("[red][ERROR][/red] Please run this script from the backends/advanced directory") sys.exit(1) - # Load config.yml if it exists - self.load_config_yml() + # Initialize ConfigManager + self.config_manager = ConfigManager(service_path="backends/advanced") + self.console.print(f"[blue][INFO][/blue] Using config.yml at: {self.config_manager.config_yml_path}") + + # Load existing config or create default structure + self.config_yml_data = self.config_manager.get_full_config() + if not self.config_yml_data: + self.console.print("[yellow][WARNING][/yellow] config.yml not found, will create default structure") + self.config_yml_data = self._get_default_config_structure() def print_header(self, title: str): """Print a colorful header""" @@ -126,21 +137,6 @@ def mask_api_key(self, key: str, show_chars: int = 5) -> str: return f"{key_clean[:show_chars]}{'*' * min(15, len(key_clean) - show_chars * 2)}{key_clean[-show_chars:]}" - def load_config_yml(self): - """Load config.yml from repository root""" - if not self.config_yml_path.exists(): - self.console.print(f"[yellow][WARNING][/yellow] config.yml not found at {self.config_yml_path}") - self.console.print("[yellow]Will create a new config.yml during setup[/yellow]") - self.config_yml_data = self._get_default_config_structure() - return - - try: - with open(self.config_yml_path, 'r') as f: - self.config_yml_data = yaml.safe_load(f) - self.console.print(f"[blue][INFO][/blue] Loaded existing config.yml") - except Exception as e: - self.console.print(f"[red][ERROR][/red] Failed to load config.yml: {e}") - self.config_yml_data = self._get_default_config_structure() def _get_default_config_structure(self) -> Dict[str, Any]: """Return default config.yml structure if file doesn't exist""" @@ -163,36 +159,6 @@ def _get_default_config_structure(self) -> Dict[str, Any]: } } - def save_config_yml(self): - """Save config.yml back to repository root""" - try: - # Backup existing config.yml if it exists - if self.config_yml_path.exists(): - timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") - backup_path = self.config_yml_path.parent / f"config.yml.backup.{timestamp}" - shutil.copy2(self.config_yml_path, backup_path) - self.console.print(f"[blue][INFO][/blue] Backed up config.yml to {backup_path.name}") - - # Write updated config - with open(self.config_yml_path, 'w') as f: - yaml.dump(self.config_yml_data, f, default_flow_style=False, sort_keys=False) - - self.console.print("[green][SUCCESS][/green] config.yml updated successfully") - except Exception as e: - self.console.print(f"[red][ERROR][/red] Failed to save config.yml: {e}") - raise - - def update_config_default(self, key: str, value: str): - """Update a default value in config.yml""" - if "defaults" not in self.config_yml_data: - self.config_yml_data["defaults"] = {} - self.config_yml_data["defaults"][key] = value - - def update_memory_config(self, updates: Dict[str, Any]): - """Update memory configuration in config.yml""" - if "memory" not in self.config_yml_data: - self.config_yml_data["memory"] = {} - self.config_yml_data["memory"].update(updates) def setup_authentication(self): """Configure authentication settings""" @@ -207,16 +173,19 @@ def setup_authentication(self): self.console.print("[green][SUCCESS][/green] Admin account configured") def setup_transcription(self): - """Configure transcription provider""" + """Configure transcription provider - updates config.yml and .env""" self.print_section("Speech-to-Text Configuration") - + + self.console.print("[blue][INFO][/blue] Provider selection is configured in config.yml (defaults.stt)") + self.console.print("[blue][INFO][/blue] API keys are stored in .env") + self.console.print() + choices = { - "1": "Deepgram (recommended - high quality, requires API key)", - "2": "Mistral (Voxtral models - requires API key)", - "3": "Offline (Parakeet ASR - requires GPU, runs locally)", - "4": "None (skip transcription setup)" + "1": "Deepgram (recommended - high quality, cloud-based)", + "2": "Offline (Parakeet ASR - requires GPU, runs locally)", + "3": "None (skip transcription setup)" } - + choice = self.prompt_choice("Choose your transcription provider:", choices, "1") if choice == "1": @@ -234,44 +203,34 @@ def setup_transcription(self): api_key = self.prompt_value("Deepgram API key (leave empty to skip)", "") if api_key: - self.config["TRANSCRIPTION_PROVIDER"] = "deepgram" + # Write API key to .env self.config["DEEPGRAM_API_KEY"] = api_key - self.console.print("[green][SUCCESS][/green] Deepgram configured") - else: - self.console.print("[yellow][WARNING][/yellow] No API key provided - transcription will not work") - - elif choice == "2": - self.config["TRANSCRIPTION_PROVIDER"] = "mistral" - self.console.print("[blue][INFO][/blue] Mistral selected") - self.console.print("Get your API key from: https://console.mistral.ai/") - - # Check for existing API key - existing_key = self.read_existing_env_value("MISTRAL_API_KEY") - if existing_key and existing_key not in ['your_mistral_api_key_here', 'your-mistral-key-here']: - masked_key = self.mask_api_key(existing_key) - prompt_text = f"Mistral API key ({masked_key}) [press Enter to reuse, or enter new]" - api_key_input = self.prompt_value(prompt_text, "") - api_key = api_key_input if api_key_input else existing_key - else: - api_key = self.prompt_value("Mistral API key (leave empty to skip)", "") - model = self.prompt_value("Mistral model", "voxtral-mini-2507") + # Update config.yml to use Deepgram + self.config_manager.update_config_defaults({"stt": "stt-deepgram"}) + self.config_yml_data = self.config_manager.get_full_config() # Reload - if api_key: - self.config["MISTRAL_API_KEY"] = api_key - self.config["MISTRAL_MODEL"] = model - self.console.print("[green][SUCCESS][/green] Mistral configured") + self.console.print("[green][SUCCESS][/green] Deepgram configured in config.yml and .env") + self.console.print("[blue][INFO][/blue] Set defaults.stt: stt-deepgram") else: self.console.print("[yellow][WARNING][/yellow] No API key provided - transcription will not work") - elif choice == "3": - self.config["TRANSCRIPTION_PROVIDER"] = "parakeet" + elif choice == "2": self.console.print("[blue][INFO][/blue] Offline Parakeet ASR selected") parakeet_url = self.prompt_value("Parakeet ASR URL", "http://host.docker.internal:8767") + + # Write URL to .env for ${PARAKEET_ASR_URL} placeholder in config.yml self.config["PARAKEET_ASR_URL"] = parakeet_url + + # Update config.yml to use Parakeet + self.config_manager.update_config_defaults({"stt": "stt-parakeet-batch"}) + self.config_yml_data = self.config_manager.get_full_config() # Reload + + self.console.print("[green][SUCCESS][/green] Parakeet configured in config.yml and .env") + self.console.print("[blue][INFO][/blue] Set defaults.stt: stt-parakeet-batch") self.console.print("[yellow][WARNING][/yellow] Remember to start Parakeet service: cd ../../extras/asr-services && docker compose up parakeet") - elif choice == "4": + elif choice == "3": self.console.print("[blue][INFO][/blue] Skipping transcription setup") def setup_llm(self): @@ -306,8 +265,8 @@ def setup_llm(self): if api_key: self.config["OPENAI_API_KEY"] = api_key # Update config.yml to use OpenAI models - self.update_config_default("llm", "openai-llm") - self.update_config_default("embedding", "openai-embed") + self.config_manager.update_config_defaults({"llm": "openai-llm", "embedding": "openai-embed"}) + self.config_yml_data = self.config_manager.get_full_config() # Reload to stay in sync self.console.print("[green][SUCCESS][/green] OpenAI configured in config.yml") self.console.print("[blue][INFO][/blue] Set defaults.llm: openai-llm") self.console.print("[blue][INFO][/blue] Set defaults.embedding: openai-embed") @@ -317,8 +276,8 @@ def setup_llm(self): elif choice == "2": self.console.print("[blue][INFO][/blue] Ollama selected") # Update config.yml to use Ollama models - self.update_config_default("llm", "local-llm") - self.update_config_default("embedding", "local-embed") + self.config_manager.update_config_defaults({"llm": "local-llm", "embedding": "local-embed"}) + self.config_yml_data = self.config_manager.get_full_config() # Reload to stay in sync self.console.print("[green][SUCCESS][/green] Ollama configured in config.yml") self.console.print("[blue][INFO][/blue] Set defaults.llm: local-llm") self.console.print("[blue][INFO][/blue] Set defaults.embedding: local-embed") @@ -327,7 +286,8 @@ def setup_llm(self): elif choice == "3": self.console.print("[blue][INFO][/blue] Skipping LLM setup - memory extraction disabled") # Disable memory extraction in config.yml - self.update_memory_config({"extraction": {"enabled": False}}) + self.config_manager.update_memory_config({"extraction": {"enabled": False}}) + self.config_yml_data = self.config_manager.get_full_config() # Reload to stay in sync def setup_memory(self): """Configure memory provider - updates config.yml""" @@ -347,9 +307,10 @@ def setup_memory(self): qdrant_url = self.prompt_value("Qdrant URL", "qdrant") self.config["QDRANT_BASE_URL"] = qdrant_url - # Update config.yml - self.update_memory_config({"provider": "chronicle"}) - self.console.print("[green][SUCCESS][/green] Chronicle memory provider configured in config.yml") + # Update config.yml (also updates .env automatically) + self.config_manager.update_memory_config({"provider": "chronicle"}) + self.config_yml_data = self.config_manager.get_full_config() # Reload to stay in sync + self.console.print("[green][SUCCESS][/green] Chronicle memory provider configured in config.yml and .env") elif choice == "2": self.console.print("[blue][INFO][/blue] OpenMemory MCP selected") @@ -359,8 +320,8 @@ def setup_memory(self): user_id = self.prompt_value("OpenMemory user ID", "openmemory") timeout = self.prompt_value("OpenMemory timeout (seconds)", "30") - # Update config.yml with OpenMemory MCP settings - self.update_memory_config({ + # Update config.yml with OpenMemory MCP settings (also updates .env automatically) + self.config_manager.update_memory_config({ "provider": "openmemory_mcp", "openmemory_mcp": { "server_url": mcp_url, @@ -369,7 +330,8 @@ def setup_memory(self): "timeout": int(timeout) } }) - self.console.print("[green][SUCCESS][/green] OpenMemory MCP configured in config.yml") + self.config_yml_data = self.config_manager.get_full_config() # Reload to stay in sync + self.console.print("[green][SUCCESS][/green] OpenMemory MCP configured in config.yml and .env") self.console.print("[yellow][WARNING][/yellow] Remember to start OpenMemory: cd ../../extras/openmemory-mcp && docker compose up -d") elif choice == "3": @@ -378,15 +340,16 @@ def setup_memory(self): mycelia_url = self.prompt_value("Mycelia API URL", "http://localhost:5173") timeout = self.prompt_value("Mycelia timeout (seconds)", "30") - # Update config.yml with Mycelia settings - self.update_memory_config({ + # Update config.yml with Mycelia settings (also updates .env automatically) + self.config_manager.update_memory_config({ "provider": "mycelia", "mycelia": { "api_url": mycelia_url, "timeout": int(timeout) } }) - self.console.print("[green][SUCCESS][/green] Mycelia memory provider configured in config.yml") + self.config_yml_data = self.config_manager.get_full_config() # Reload to stay in sync + self.console.print("[green][SUCCESS][/green] Mycelia memory provider configured in config.yml and .env") self.console.print("[yellow][WARNING][/yellow] Make sure Mycelia is running at the configured URL") def setup_optional_services(self): @@ -604,10 +567,8 @@ def generate_env_file(self): self.console.print("[green][SUCCESS][/green] .env file configured successfully with secure permissions") - # Save config.yml with all updates - self.console.print() - self.console.print("[blue][INFO][/blue] Saving configuration to config.yml...") - self.save_config_yml() + # Note: config.yml is automatically saved by ConfigManager when updates are made + self.console.print("[blue][INFO][/blue] Configuration saved to config.yml and .env (via ConfigManager)") def copy_config_templates(self): """Copy other configuration files""" @@ -622,7 +583,15 @@ def show_summary(self): self.console.print() self.console.print(f"โœ… Admin Account: {self.config.get('ADMIN_EMAIL', 'Not configured')}") - self.console.print(f"โœ… Transcription: {self.config.get('TRANSCRIPTION_PROVIDER', 'Not configured')}") + + # Show transcription from config.yml + stt_default = self.config_yml_data.get("defaults", {}).get("stt", "not set") + stt_model = next( + (m for m in self.config_yml_data.get("models", []) if m.get("name") == stt_default), + None + ) + stt_provider = stt_model.get("model_provider", "unknown") if stt_model else "not configured" + self.console.print(f"โœ… Transcription: {stt_provider} ({stt_default}) - config.yml") # Show LLM config from config.yml llm_default = self.config_yml_data.get("defaults", {}).get("llm", "not set") @@ -726,7 +695,7 @@ def run(self): self.console.print() self.console.print("๐Ÿ“ [bold]Configuration files updated:[/bold]") self.console.print(f" โ€ข .env - API keys and environment variables") - self.console.print(f" โ€ข ../../config.yml - Model and memory provider configuration") + self.console.print(f" โ€ข ../../config/config.yml - Model and memory provider configuration") self.console.print() self.console.print("For detailed documentation, see:") self.console.print(" โ€ข Docs/quickstart.md") diff --git a/backends/advanced/run-test.sh b/backends/advanced/run-test.sh index 4f944256..17773dc1 100755 --- a/backends/advanced/run-test.sh +++ b/backends/advanced/run-test.sh @@ -41,14 +41,14 @@ print_info "========================================" # Load environment variables (CI or local) # Priority: Command-line env vars > CI environment > .env.test > .env # Save any pre-existing environment variables to preserve command-line overrides -_TRANSCRIPTION_PROVIDER_OVERRIDE=${TRANSCRIPTION_PROVIDER} _PARAKEET_ASR_URL_OVERRIDE=${PARAKEET_ASR_URL} _DEEPGRAM_API_KEY_OVERRIDE=${DEEPGRAM_API_KEY} _OPENAI_API_KEY_OVERRIDE=${OPENAI_API_KEY} _LLM_PROVIDER_OVERRIDE=${LLM_PROVIDER} _MEMORY_PROVIDER_OVERRIDE=${MEMORY_PROVIDER} +_CONFIG_FILE_OVERRIDE=${CONFIG_FILE} -if [ -n "$DEEPGRAM_API_KEY" ] && [ -z "$_TRANSCRIPTION_PROVIDER_OVERRIDE" ]; then +if [ -n "$DEEPGRAM_API_KEY" ]; then print_info "Using environment variables from CI/environment..." elif [ -f ".env.test" ]; then print_info "Loading environment variables from .env.test..." @@ -68,10 +68,6 @@ else fi # Restore command-line overrides (these take highest priority) -if [ -n "$_TRANSCRIPTION_PROVIDER_OVERRIDE" ]; then - export TRANSCRIPTION_PROVIDER=$_TRANSCRIPTION_PROVIDER_OVERRIDE - print_info "Using command-line override: TRANSCRIPTION_PROVIDER=$TRANSCRIPTION_PROVIDER" -fi if [ -n "$_PARAKEET_ASR_URL_OVERRIDE" ]; then export PARAKEET_ASR_URL=$_PARAKEET_ASR_URL_OVERRIDE print_info "Using command-line override: PARAKEET_ASR_URL=$PARAKEET_ASR_URL" @@ -90,36 +86,57 @@ if [ -n "$_MEMORY_PROVIDER_OVERRIDE" ]; then export MEMORY_PROVIDER=$_MEMORY_PROVIDER_OVERRIDE print_info "Using command-line override: MEMORY_PROVIDER=$MEMORY_PROVIDER" fi +if [ -n "$_CONFIG_FILE_OVERRIDE" ]; then + export CONFIG_FILE=$_CONFIG_FILE_OVERRIDE + print_info "Using command-line override: CONFIG_FILE=$CONFIG_FILE" +fi -# Verify required environment variables based on configured providers -TRANSCRIPTION_PROVIDER=${TRANSCRIPTION_PROVIDER:-deepgram} +# Set default CONFIG_FILE if not provided +# This allows testing with different provider combinations +# Usage: CONFIG_FILE=../../tests/configs/parakeet-ollama.yml ./run-test.sh +export CONFIG_FILE=${CONFIG_FILE:-../../config/config.yml} + +print_info "Using config file: $CONFIG_FILE" + +# Read STT provider from config.yml (source of truth) +STT_PROVIDER=$(uv run python -c " +from advanced_omi_backend.model_registry import get_models_registry +registry = get_models_registry() +if registry and registry.defaults: + stt_model = registry.get_default('stt') + if stt_model: + print(stt_model.model_provider or '') +" 2>/dev/null || echo "") + +# Fallback to environment variable for backward compatibility (will be removed) +if [ -z "$STT_PROVIDER" ]; then + STT_PROVIDER=${TRANSCRIPTION_PROVIDER:-deepgram} + print_warning "Could not read STT provider from config.yml, using TRANSCRIPTION_PROVIDER: $STT_PROVIDER" +fi + +# LLM provider can still use env var as it's not part of this refactor LLM_PROVIDER=${LLM_PROVIDER:-openai} print_info "Configured providers:" -print_info " TRANSCRIPTION_PROVIDER: $TRANSCRIPTION_PROVIDER" -print_info " LLM_PROVIDER: $LLM_PROVIDER" +print_info " STT Provider (from config.yml): $STT_PROVIDER" +print_info " LLM Provider: $LLM_PROVIDER" -# Check transcription provider API key -case "$TRANSCRIPTION_PROVIDER" in +# Check transcription provider API key based on config.yml +case "$STT_PROVIDER" in deepgram) if [ -z "$DEEPGRAM_API_KEY" ]; then - print_error "DEEPGRAM_API_KEY not set (required for TRANSCRIPTION_PROVIDER=deepgram)" + print_error "DEEPGRAM_API_KEY not set (required for STT provider: deepgram)" exit 1 fi print_info "DEEPGRAM_API_KEY length: ${#DEEPGRAM_API_KEY}" ;; - mistral) - if [ -z "$MISTRAL_API_KEY" ]; then - print_error "MISTRAL_API_KEY not set (required for TRANSCRIPTION_PROVIDER=mistral)" - exit 1 - fi - print_info "MISTRAL_API_KEY length: ${#MISTRAL_API_KEY}" - ;; - offline|parakeet) - print_info "Using offline/local transcription - no API key required" + parakeet) + print_info "Using Parakeet (local transcription) - no API key required" + PARAKEET_ASR_URL=${PARAKEET_ASR_URL:-http://localhost:8767} + print_info "PARAKEET_ASR_URL: $PARAKEET_ASR_URL" ;; *) - print_warning "Unknown TRANSCRIPTION_PROVIDER: $TRANSCRIPTION_PROVIDER" + print_warning "Unknown STT provider from config.yml: $STT_PROVIDER" ;; esac @@ -162,6 +179,9 @@ print_info "Using environment variables from .env file for test configuration" print_info "Cleaning test environment..." sudo rm -rf ./test_audio_chunks/ ./test_data/ ./test_debug_dir/ ./mongo_data_test/ ./qdrant_data_test/ ./test_neo4j/ || true +# Use unique project name to avoid conflicts with development environment +export COMPOSE_PROJECT_NAME="advanced-backend-test" + # Stop any existing test containers print_info "Stopping existing test containers..." docker compose -f docker-compose-test.yml down -v || true @@ -185,9 +205,14 @@ fi # Set environment variables for the test export DOCKER_BUILDKIT=0 -# Run the integration test with extended timeout (mem0 needs time for comprehensive extraction) -print_info "Starting integration test (timeout: 15 minutes)..." -if timeout 900 uv run pytest tests/test_integration.py::test_full_pipeline_integration -v -s --tb=short --log-cli-level=INFO; then +# Configure Robot Framework test mode +# TEST_MODE=dev: Robot tests keep containers running (cleanup handled by run-test.sh) +# This allows CLEANUP_CONTAINERS flag to work as expected +export TEST_MODE=dev + +# Run the Robot Framework integration tests with extended timeout (mem0 needs time for comprehensive extraction) +print_info "Starting Robot Framework integration tests (timeout: 15 minutes)..." +if timeout 900 uv run robot --outputdir ../../test-results --loglevel INFO ../../tests/integration/integration_test.robot; then print_success "Integration tests completed successfully!" else TEST_EXIT_CODE=$? diff --git a/backends/advanced/src/advanced_omi_backend/controllers/websocket_controller.py b/backends/advanced/src/advanced_omi_backend/controllers/websocket_controller.py index b29ca88d..50ffc77f 100644 --- a/backends/advanced/src/advanced_omi_backend/controllers/websocket_controller.py +++ b/backends/advanced/src/advanced_omi_backend/controllers/websocket_controller.py @@ -302,22 +302,22 @@ async def _initialize_streaming_session( client_state.stream_audio_format = audio_format application_logger.info(f"๐Ÿ†” Created stream session: {client_state.stream_session_id}") - # Determine transcription provider from environment - transcription_provider = os.getenv("TRANSCRIPTION_PROVIDER", "").lower() - if transcription_provider == "parakeet": - provider = "parakeet" - elif transcription_provider == "deepgram": - provider = "deepgram" - else: - # Auto-detect: prefer Parakeet if URL is set, otherwise Deepgram - parakeet_url = os.getenv("PARAKEET_ASR_URL") - deepgram_key = os.getenv("DEEPGRAM_API_KEY") - if parakeet_url: - provider = "parakeet" - elif deepgram_key: - provider = "deepgram" - else: - raise ValueError("No transcription provider configured (DEEPGRAM_API_KEY or PARAKEET_ASR_URL required)") + # Determine transcription provider from config.yml + from advanced_omi_backend.model_registry import get_models_registry + + registry = get_models_registry() + if not registry: + raise ValueError("config.yml not found - cannot determine transcription provider") + + stt_model = registry.get_default("stt") + if not stt_model: + raise ValueError("No default STT model configured in config.yml (defaults.stt)") + + provider = stt_model.model_provider.lower() + if provider not in ["deepgram", "parakeet"]: + raise ValueError(f"Unsupported STT provider: {provider}. Expected: deepgram or parakeet") + + application_logger.info(f"๐Ÿ“‹ Using STT provider: {provider} (model: {stt_model.name})") # Initialize session tracking in Redis await audio_stream_producer.init_session( diff --git a/backends/advanced/src/advanced_omi_backend/routers/modules/health_routes.py b/backends/advanced/src/advanced_omi_backend/routers/modules/health_routes.py index 3869026a..d7a62ba9 100644 --- a/backends/advanced/src/advanced_omi_backend/routers/modules/health_routes.py +++ b/backends/advanced/src/advanced_omi_backend/routers/modules/health_routes.py @@ -128,7 +128,11 @@ async def health_check(): if transcription_provider else "Not configured" ), - "transcription_provider": _stt_name or "not set", + "transcription_provider": ( + REGISTRY.get_default("stt").name if REGISTRY and REGISTRY.get_default("stt") + else "not configured" + ), + "provider_type": ( transcription_provider.mode if transcription_provider else "none" ), diff --git a/backends/advanced/start-workers.sh b/backends/advanced/start-workers.sh index a5ca2798..2ed50727 100755 --- a/backends/advanced/start-workers.sh +++ b/backends/advanced/start-workers.sh @@ -51,23 +51,36 @@ start_workers() { uv run python -m advanced_omi_backend.workers.rq_worker_entry audio & AUDIO_PERSISTENCE_WORKER_PID=$! - # Only start Deepgram worker if DEEPGRAM_API_KEY is set - if [ -n "$DEEPGRAM_API_KEY" ]; then + # Determine which STT provider to use from config.yml + echo "๐Ÿ“‹ Checking config.yml for default STT provider..." + DEFAULT_STT=$(uv run python -c " +from advanced_omi_backend.model_registry import get_models_registry +registry = get_models_registry() +if registry and registry.defaults: + stt_model = registry.get_default('stt') + if stt_model: + print(stt_model.model_provider or '') +" 2>/dev/null || echo "") + + echo "๐Ÿ“‹ Configured STT provider: ${DEFAULT_STT:-none}" + + # Only start Deepgram worker if configured as default STT + if [[ "$DEFAULT_STT" == "deepgram" ]] && [ -n "$DEEPGRAM_API_KEY" ]; then echo "๐ŸŽต Starting audio stream Deepgram worker (1 worker for sequential processing)..." uv run python -m advanced_omi_backend.workers.audio_stream_deepgram_worker & AUDIO_STREAM_DEEPGRAM_WORKER_PID=$! else - echo "โญ๏ธ Skipping Deepgram stream worker (DEEPGRAM_API_KEY not set)" + echo "โญ๏ธ Skipping Deepgram stream worker (not configured as default STT or API key missing)" AUDIO_STREAM_DEEPGRAM_WORKER_PID="" fi - # Only start Parakeet worker if PARAKEET_ASR_URL is set - if [ -n "$PARAKEET_ASR_URL" ]; then + # Only start Parakeet worker if configured as default STT + if [[ "$DEFAULT_STT" == "parakeet" ]]; then echo "๐ŸŽต Starting audio stream Parakeet worker (1 worker for sequential processing)..." uv run python -m advanced_omi_backend.workers.audio_stream_parakeet_worker & AUDIO_STREAM_PARAKEET_WORKER_PID=$! else - echo "โญ๏ธ Skipping Parakeet stream worker (PARAKEET_ASR_URL not set)" + echo "โญ๏ธ Skipping Parakeet stream worker (not configured as default STT)" AUDIO_STREAM_PARAKEET_WORKER_PID="" fi diff --git a/backends/advanced/tests/test_integration.py b/backends/advanced/tests/test_integration.py deleted file mode 100644 index 201eaafd..00000000 --- a/backends/advanced/tests/test_integration.py +++ /dev/null @@ -1,1591 +0,0 @@ -#!/usr/bin/env python3 -""" -End-to-end integration test for Chronicle backend with unified transcription support. - -This test validates the complete audio processing pipeline using isolated test environment: -1. Service startup with docker-compose-test.yml (isolated ports and databases) -2. ASR service startup (if Parakeet provider selected) -3. Authentication with test credentials -4. Audio file upload -5. Transcription (Deepgram API or Parakeet ASR service) -6. Memory extraction (OpenAI) -7. Data storage verification - -Run with: - # Deepgram API transcription (default) - source .env && export DEEPGRAM_API_KEY && export OPENAI_API_KEY && uv run pytest tests/test_integration.py::test_full_pipeline_integration -v -s - - # Parakeet ASR transcription (HTTP/WebSocket service) - source .env && export OPENAI_API_KEY && TRANSCRIPTION_PROVIDER=parakeet uv run pytest tests/test_integration.py::test_full_pipeline_integration -v -s - -Test Environment: -- Uses docker-compose-test.yml for service isolation -- Backend runs on port 8001 (vs dev 8000) -- MongoDB on port 27018 (vs dev 27017) -- Qdrant on ports 6335/6336 (vs dev 6333/6334) -- Parakeet ASR on port 8767 (parakeet provider) -- Test credentials configured via environment variables -- Provider selection via TRANSCRIPTION_PROVIDER environment variable -""" - -import asyncio -import json -import logging -import os -import shutil -import socket -import subprocess -import sys -import time -from pathlib import Path -from typing import Optional - -import openai -import pytest -import requests -from pymongo import MongoClient - -# Configure logging with immediate output (no buffering) -logging.basicConfig( - level=logging.INFO, - format='%(asctime)s - %(levelname)s - %(message)s', - stream=sys.stdout, - force=True -) -logger = logging.getLogger(__name__) -# Ensure immediate output -logger.handlers[0].flush() if logger.handlers else None -from dotenv import load_dotenv - -# Test Configuration Flags -# REBUILD=True: Force rebuild of containers (useful when code changes) -# FRESH_RUN=True: Start with fresh data and containers (default) -# CLEANUP_CONTAINERS=True: Stop and remove containers after test (default) -REBUILD = os.environ.get("REBUILD", "true").lower() == "true" -FRESH_RUN = os.environ.get("FRESH_RUN", "true").lower() == "true" -CLEANUP_CONTAINERS = os.environ.get("CLEANUP_CONTAINERS", "true").lower() == "true" - -# Transcription Provider Configuration -# TRANSCRIPTION_PROVIDER: 'deepgram' (Deepgram API) or 'parakeet' (Parakeet ASR service) -TRANSCRIPTION_PROVIDER = os.environ.get("TRANSCRIPTION_PROVIDER", "deepgram") # Default to deepgram -# Get Parakeet URL from environment, fallback to port 8080 -PARAKEET_ASR_URL = os.environ.get("PARAKEET_ASR_URL", "http://host.docker.internal:8080") - -# Test Environment Configuration -# Base configuration for both providers -# NOTE: LLM configuration is now in config.yml (defaults.llm) -TEST_ENV_VARS_BASE = { - "AUTH_SECRET_KEY": "test-jwt-signing-key-for-integration-tests", - "ADMIN_PASSWORD": "test-admin-password-123", - "ADMIN_EMAIL": "test-admin@example.com", - "MONGODB_URI": "mongodb://localhost:27018", # Test port (database specified in backend) - "QDRANT_BASE_URL": "localhost", - "DISABLE_SPEAKER_RECOGNITION": "true", # Prevent segment duplication in tests -} - -# Deepgram provider configuration (API) -TEST_ENV_VARS_DEEPGRAM = { - **TEST_ENV_VARS_BASE, - "TRANSCRIPTION_PROVIDER": "deepgram", - # Deepgram API key loaded from environment -} - -# Parakeet provider configuration (HTTP/WebSocket ASR service) -TEST_ENV_VARS_PARAKEET = { - **TEST_ENV_VARS_BASE, - "TRANSCRIPTION_PROVIDER": "parakeet", - "PARAKEET_ASR_URL": PARAKEET_ASR_URL, -} - -# Select configuration based on provider -if TRANSCRIPTION_PROVIDER == "parakeet": - TEST_ENV_VARS = TEST_ENV_VARS_PARAKEET -else: # Default to deepgram - TEST_ENV_VARS = TEST_ENV_VARS_DEEPGRAM - -tests_dir = Path(__file__).parent - -# Test constants -BACKEND_URL = "http://localhost:8001" # Test backend port -TEST_AUDIO_PATH = tests_dir.parent.parent.parent / "extras/test-audios/DIY Experts Glass Blowing_16khz_mono_4min.wav" -TEST_AUDIO_PATH_PARAKEET = tests_dir / "assets" / "test_clip_10s.wav" # Shorter clip for parakeet testing -MAX_STARTUP_WAIT = 60 # seconds -PROCESSING_TIMEOUT = 300 # seconds for audio processing (5 minutes) - - -# Path to expected transcript file -EXPECTED_TRANSCRIPT_PATH = tests_dir / "assets/test_transcript.txt" - -# Path to expected memories file -EXPECTED_MEMORIES_PATH = tests_dir / "assets/expected_memories.json" - - -class IntegrationTestRunner: - """Manages the integration test lifecycle.""" - - def __init__(self): - print(f"๐Ÿ”ง Initializing IntegrationTestRunner", flush=True) - print(f" FRESH_RUN={FRESH_RUN}, CLEANUP_CONTAINERS={CLEANUP_CONTAINERS}, REBUILD={REBUILD}", flush=True) - print(f" TRANSCRIPTION_PROVIDER={TRANSCRIPTION_PROVIDER}", flush=True) - sys.stdout.flush() - - self.token: Optional[str] = None - self.services_started = False - self.services_started_by_test = False # Track if WE started the services - self.mongo_client: Optional[MongoClient] = None - self.fresh_run = FRESH_RUN # Use global configuration flag - self.cleanup_containers = CLEANUP_CONTAINERS # Use global cleanup flag - self.rebuild = REBUILD # Use global rebuild flag - self.asr_services_started = False # Track ASR services for parakeet provider - self.provider = TRANSCRIPTION_PROVIDER # Store provider type - - def load_expected_transcript(self) -> str: - """Load the expected transcript from the test assets file.""" - try: - # Use provider-specific expectations if available - if self.provider == "parakeet": - transcript_path = tests_dir / "assets/test_transcript_parakeet.txt" - if not transcript_path.exists(): - transcript_path = EXPECTED_TRANSCRIPT_PATH # Fallback to default - else: - transcript_path = EXPECTED_TRANSCRIPT_PATH - - with open(transcript_path, 'r', encoding='utf-8') as f: - return f.read().strip() - except FileNotFoundError: - logger.warning(f"โš ๏ธ Expected transcript file not found: {transcript_path}") - return "" - except Exception as e: - logger.warning(f"โš ๏ธ Error loading expected transcript: {e}") - return "" - - def load_expected_memories(self) -> list: - """Load the expected memories from the test assets file.""" - try: - # Use provider-specific expectations if available - if self.provider == "parakeet": - memories_path = tests_dir / "assets/expected_memories_parakeet.json" - if not memories_path.exists(): - memories_path = EXPECTED_MEMORIES_PATH # Fallback to default - else: - memories_path = EXPECTED_MEMORIES_PATH - - with open(memories_path, 'r', encoding='utf-8') as f: - import json - data = json.load(f) - # Handle both formats: list or dict with 'memories' key - if isinstance(data, list): - return data - elif isinstance(data, dict) and 'memories' in data: - return data['memories'] - else: - logger.warning(f"โš ๏ธ Unexpected memories file format: {type(data)}") - return [] - except FileNotFoundError: - logger.warning(f"โš ๏ธ Expected memories file not found: {memories_path}") - return [] - except Exception as e: - logger.warning(f"โš ๏ธ Error loading expected memories: {e}") - return [] - - def cleanup_test_data(self): - """Clean up test-specific data directories using lightweight Docker container.""" - if not self.fresh_run: - logger.info("๐Ÿ—‚๏ธ Skipping test data cleanup (reusing existing data)") - return - - logger.info("๐Ÿ—‚๏ธ Cleaning up test-specific data directories...") - - # Use lightweight Docker container to clean root-owned files - try: - result = subprocess.run([ - "docker", "run", "--rm", - "-v", f"{Path.cwd()}/data:/data", - "alpine:latest", - "sh", "-c", "rm -rf /data/test_*" - ], capture_output=True, text=True, timeout=30) - - if result.returncode == 0: - logger.info("โœ… Docker cleanup successful") - else: - logger.warning(f"Error during Docker cleanup: {result.stderr}") - - except Exception as e: - logger.warning(f"โš ๏ธ Docker cleanup failed: {e}") - logger.warning("๐Ÿ’ก Ensure Docker is running and accessible") - - logger.info("โœ“ Test data cleanup complete") - - def start_asr_services(self): - """Start ASR services for Parakeet transcription testing.""" - if self.provider != "parakeet": - logger.info(f"๐Ÿ”„ Skipping ASR services ({self.provider} provider uses API)") - return - - logger.info(f"๐Ÿš€ Starting Parakeet ASR service...") - - try: - asr_dir = Path(__file__).parent.parent.parent.parent / "extras/asr-services" - - # Stop any existing ASR services first - subprocess.run( - ["docker", "compose", "-f", "docker-compose-test.yml", "down"], - cwd=asr_dir, - capture_output=True - ) - - # Start Parakeet ASR service - result = subprocess.run( - ["docker", "compose", "-f", "docker-compose-test.yml", "up", "--build", "-d", "parakeet-asr-test"], - cwd=asr_dir, - capture_output=True, - text=True, - timeout=300 # 5 minute timeout for service startup - ) - - if result.returncode != 0: - logger.error(f"Failed to start Parakeet ASR service: {result.stderr}") - raise RuntimeError(f"Parakeet ASR service failed to start: {result.stderr}") - - self.asr_services_started = True - logger.info("โœ… Parakeet ASR service started successfully") - - except Exception as e: - logger.error(f"Error starting Parakeet ASR service: {e}") - raise - - def wait_for_asr_ready(self): - """Wait for ASR services to be ready.""" - if self.provider != "parakeet": - logger.info(f"๐Ÿ”„ Skipping ASR readiness check ({self.provider} provider uses API)") - return - - # Cascade failure check - don't wait for ASR if backend services failed - if not hasattr(self, 'services_started') or not self.services_started: - raise RuntimeError("Backend services are not running - cannot start ASR services") - - logger.info("๐Ÿ” Waiting for Parakeet ASR service to be ready...") - - start_time = time.time() - while time.time() - start_time < MAX_STARTUP_WAIT: - try: - # Check container status directly instead of HTTP health check - # This avoids the curl dependency issue in the container - result = subprocess.run( - ["docker", "ps", "--filter", "name=asr-services-parakeet-asr-test-1", "--format", "{{.Status}}"], - capture_output=True, - text=True, - timeout=10 - ) - - if result.returncode == 0 and result.stdout.strip(): - status = result.stdout.strip() - logger.debug(f"Container status: {status}") - - # Early exit on unhealthy containers - if "(unhealthy)" in status: - raise RuntimeError(f"Parakeet ASR container is unhealthy: {status}") - if "Exited" in status or "Dead" in status: - raise RuntimeError(f"Parakeet ASR container failed: {status}") - - # Look for 'Up' status and ideally '(healthy)' status - if "Up" in status: - # If container is healthy, we can skip the HTTP check - if "(healthy)" in status: - logger.info("โœ“ Parakeet ASR container is healthy") - return - # Additional check: try to connect to the service - try: - import requests - - # Use the same URL that the backend will use - response = requests.get(f"{PARAKEET_ASR_URL}/health", timeout=5) - if response.status_code == 200: - health_data = response.json() - if health_data.get("status") == "healthy": - logger.info("โœ“ Parakeet ASR service is healthy and accessible") - return - elif health_data.get("status") == "unhealthy": - raise RuntimeError(f"Parakeet ASR service reports unhealthy: {health_data}") - else: - logger.debug(f"Service responding but not ready: {health_data}") - elif response.status_code >= 500: - raise RuntimeError(f"Parakeet ASR service error: HTTP {response.status_code}") - elif response.status_code >= 400: - logger.warning(f"Parakeet ASR client error: HTTP {response.status_code}") - else: - logger.debug(f"Health check failed with status {response.status_code}") - except requests.exceptions.ConnectionError as e: - logger.debug(f"Connection failed, but container is up: {e}") - except Exception as e: - logger.debug(f"HTTP health check failed, but container is up: {e}") - else: - logger.debug(f"Container not ready yet: {status}") - else: - logger.debug("Container not found or not running") - - except Exception as e: - logger.debug(f"Container status check failed: {e}") - - time.sleep(2) - - raise RuntimeError("Parakeet ASR service failed to become ready within timeout") - - def cleanup_asr_services(self): - """Clean up ASR services.""" - if not self.asr_services_started: - return - - if not self.fresh_run: - logger.info("๐Ÿ”„ Skipping ASR services cleanup (reusing existing services)") - return - - logger.info("๐Ÿงน Cleaning up ASR services...") - - try: - asr_dir = Path(__file__).parent.parent.parent.parent / "extras/asr-services" - subprocess.run( - ["docker", "compose", "-f", "docker-compose-test.yml", "down"], - cwd=asr_dir, - capture_output=True - ) - logger.info("โœ… ASR services stopped") - except Exception as e: - logger.warning(f"Error stopping ASR services: {e}") - - def setup_environment(self): - """Set up environment variables for testing.""" - logger.info("Setting up test environment variables...") - - # Set test environment variables directly from TEST_ENV_VARS - logger.info("Setting test environment variables from TEST_ENV_VARS...") - for key, value in TEST_ENV_VARS.items(): - os.environ.setdefault(key, value) - logger.info(f"โœ“ {key} set") - - # Load API keys from .env file if not already in environment - if not os.environ.get('DEEPGRAM_API_KEY') or not os.environ.get('OPENAI_API_KEY'): - logger.info("Loading API keys from .env file...") - try: - # Try to load .env.test first (CI environment), then fall back to .env (local development) - env_test_path = '.env.test' - env_path = '.env' - - # Check if we're in the right directory (tests directory vs backend directory) - if not os.path.exists(env_test_path) and os.path.exists('../.env.test'): - env_test_path = '../.env.test' - if not os.path.exists(env_path) and os.path.exists('../.env'): - env_path = '../.env' - - if os.path.exists(env_test_path): - logger.info(f"Loading from {env_test_path}") - load_dotenv(env_test_path) - elif os.path.exists(env_path): - logger.info(f"Loading from {env_path}") - load_dotenv(env_path) - else: - logger.warning("No .env.test or .env file found") - except ImportError: - logger.warning("python-dotenv not available, relying on shell environment") - - # Debug: Log API key status (masked for security) - logger.info("API key status:") - for key in ["DEEPGRAM_API_KEY", "OPENAI_API_KEY"]: - value = os.environ.get(key) - if value: - masked_value = value[:4] + "*" * (len(value) - 8) + value[-4:] if len(value) > 8 else "***" - logger.info(f" โœ“ {key}: {masked_value}") - else: - logger.warning(f" โš ๏ธ {key}: NOT SET") - - # Log environment readiness based on provider type - deepgram_key = os.environ.get('DEEPGRAM_API_KEY') - openai_key = os.environ.get('OPENAI_API_KEY') - - # Validate based on transcription provider (streaming/batch architecture) - if self.provider == "deepgram": - # Deepgram provider validation (API-based) - if deepgram_key and openai_key: - logger.info("โœ“ All required keys for Deepgram transcription are available") - else: - logger.warning("โš ๏ธ Some keys missing for Deepgram transcription - test may fail") - if not deepgram_key: - logger.warning(" Missing DEEPGRAM_API_KEY (required for Deepgram transcription)") - if not openai_key: - logger.warning(" Missing OPENAI_API_KEY (required for memory processing)") - elif self.provider == "parakeet": - # Parakeet provider validation (local ASR service) - parakeet_url = os.environ.get('PARAKEET_ASR_URL') - if parakeet_url and openai_key: - logger.info("โœ“ All required configuration for Parakeet transcription is available") - logger.info(f" Using Parakeet ASR service at: {parakeet_url}") - else: - logger.warning("โš ๏ธ Missing configuration for Parakeet transcription - test may fail") - if not parakeet_url: - logger.warning(" Missing PARAKEET_ASR_URL (required for Parakeet ASR service)") - if not openai_key: - logger.warning(" Missing OPENAI_API_KEY (required for memory processing)") - else: - # Unknown or auto-select provider - check what's available - logger.info(f"Provider '{self.provider}' - checking available configuration...") - if deepgram_key and openai_key: - logger.info("โœ“ Deepgram configuration available") - elif os.environ.get('PARAKEET_ASR_URL') and openai_key: - logger.info("โœ“ Parakeet configuration available") - else: - logger.warning("โš ๏ธ No valid transcription provider configuration found") - if not openai_key: - logger.warning(" Missing OPENAI_API_KEY (required for memory processing)") - - def start_services(self): - """Start all services using docker compose.""" - logger.info("๐Ÿš€ Starting services with docker compose...") - - # Change to backend directory - os.chdir(tests_dir.parent) - - # Clean up test data directories first (unless cached) - self.cleanup_test_data() - - try: - # Check if test services are already running - check_result = subprocess.run(["docker", "compose", "-f", "docker-compose-test.yml", "ps", "-q"], capture_output=True, text=True) - running_services = check_result.stdout.strip().split('\n') if check_result.stdout.strip() else [] - - if len(running_services) > 0 and not self.rebuild: - logger.info(f"๐Ÿ”„ Found {len(running_services)} running test services") - # Check if test backend is healthy (only skip if not rebuilding) - try: - health_check = subprocess.run(["docker", "compose", "-f", "docker-compose-test.yml", "ps", "chronicle-backend-test"], capture_output=True, text=True) - if "healthy" in health_check.stdout or "Up" in health_check.stdout: - logger.info("โœ… Test services already running and healthy, skipping restart") - self.services_started = True - self.services_started_by_test = True # We'll manage test services - return - except: - pass - elif self.rebuild: - logger.info("๐Ÿ”จ Rebuild flag is True, will rebuild containers with latest code") - - logger.info("๐Ÿ”„ Need to start/restart test services...") - - # Handle container management based on rebuild and cached flags - if self.rebuild: - logger.info("๐Ÿ”จ Rebuild mode: stopping containers and rebuilding with latest code...") - # Stop existing test services and remove volumes for fresh rebuild - subprocess.run(["docker", "compose", "-f", "docker-compose-test.yml", "down", "-v"], capture_output=True) - elif not self.fresh_run: - logger.info("๐Ÿ”„ Reuse mode: restarting existing containers...") - subprocess.run(["docker", "compose", "-f", "docker-compose-test.yml", "restart"], capture_output=True) - else: - logger.info("๐Ÿ”„ Fresh mode: stopping containers and removing volumes...") - # Stop existing test services and remove volumes for fresh start - subprocess.run(["docker", "compose", "-f", "docker-compose-test.yml", "down", "-v"], capture_output=True) - - # memory_config.yaml deprecated; memory configuration provided via config.yml - - # Check if we're in CI environment - is_ci = os.environ.get("CI") == "true" or os.environ.get("GITHUB_ACTIONS") == "true" - - if is_ci: - # In CI, use simpler build process - logger.info("๐Ÿค– CI environment detected, using optimized build...") - if self.rebuild: - # Force rebuild in CI when rebuild flag is set with BuildKit disabled - env = os.environ.copy() - env['DOCKER_BUILDKIT'] = '0' - logger.info("๐Ÿ”จ Running Docker build command...") - build_result = subprocess.run(["docker", "compose", "-f", "docker-compose-test.yml", "build"], env=env) - if build_result.returncode != 0: - logger.error(f"โŒ Build failed with exit code {build_result.returncode}") - raise RuntimeError("Docker compose build failed") - cmd = ["docker", "compose", "-f", "docker-compose-test.yml", "up", "-d", "--no-build"] - else: - # Local development - use rebuild flag to determine build behavior - if self.rebuild: - cmd = ["docker", "compose", "-f", "docker-compose-test.yml", "up", "--build", "-d"] - logger.info("๐Ÿ”จ Local rebuild: will rebuild containers with latest code") - else: - cmd = ["docker", "compose", "-f", "docker-compose-test.yml", "up", "-d"] - logger.info("๐Ÿš€ Local start: using existing container images") - - # Start test services with BuildKit disabled to avoid bake issues - env = os.environ.copy() - env['DOCKER_BUILDKIT'] = '0' - logger.info(f"๐Ÿš€ Running Docker compose command: {' '.join(cmd)}") - result = subprocess.run(cmd, env=env, timeout=300) - - if result.returncode != 0: - logger.error(f"โŒ Failed to start services with exit code {result.returncode}") - - # Check individual container logs for better error details - logger.error("๐Ÿ” Checking individual container logs for details...") - try: - container_logs_result = subprocess.run( - ["docker", "compose", "-f", "docker-compose-test.yml", "logs", "--tail=50"], - capture_output=True, text=True, timeout=15 - ) - if container_logs_result.stdout: - logger.error("๐Ÿ“‹ Container logs:") - logger.error(container_logs_result.stdout) - if container_logs_result.stderr: - logger.error("๐Ÿ“‹ Container logs stderr:") - logger.error(container_logs_result.stderr) - except Exception as e: - logger.warning(f"Could not fetch container logs: {e}") - - # Check container status - logger.error("๐Ÿ” Checking container status...") - try: - status_result = subprocess.run( - ["docker", "compose", "-f", "docker-compose-test.yml", "ps"], - capture_output=True, text=True, timeout=10 - ) - if status_result.stdout: - logger.error("๐Ÿ“‹ Container status:") - logger.error(status_result.stdout) - except Exception as e: - logger.warning(f"Could not fetch container status: {e}") - - # Fail fast - no retry attempts - raise RuntimeError("Docker compose failed to start") - - self.services_started = True - self.services_started_by_test = True # Mark that we started the services - logger.info("โœ… Docker compose started successfully") - - except Exception as e: - logger.error(f"Error starting services: {e}") - raise - - def wait_for_services(self): - """Wait for all services to be ready with comprehensive health checks.""" - logger.info("๐Ÿ” Performing comprehensive service health validation...") - - start_time = time.time() - services_status = { - "backend": False, - "mongo": False, - "auth": False, - "readiness": False - } - - while time.time() - start_time < MAX_STARTUP_WAIT: - try: - # 1. Check backend basic health - if not services_status["backend"]: - try: - health_response = requests.get(f"{BACKEND_URL}/health", timeout=5) - if health_response.status_code == 200: - logger.info("โœ“ Backend health check passed") - services_status["backend"] = True - elif health_response.status_code >= 500: - raise RuntimeError(f"Backend service error: HTTP {health_response.status_code}") - elif health_response.status_code >= 400: - logger.warning(f"Backend client error: HTTP {health_response.status_code}") - except requests.exceptions.RequestException: - pass - - # 2. Check MongoDB connection via backend health check - if not services_status["mongo"] and services_status["backend"]: - try: - health_response = requests.get(f"{BACKEND_URL}/health", timeout=5) - if health_response.status_code == 200: - data = health_response.json() - mongo_health = data.get("services", {}).get("mongodb", {}) - if mongo_health.get("healthy", False): - logger.info("โœ“ MongoDB connection validated via backend health check") - services_status["mongo"] = True - except Exception: - pass - - # 3. Check comprehensive readiness (includes Qdrant validation) - if not services_status["readiness"] and services_status["backend"] and services_status["auth"]: - try: - readiness_response = requests.get(f"{BACKEND_URL}/readiness", timeout=5) - if readiness_response.status_code == 200: - data = readiness_response.json() - logger.info(f"๐Ÿ“‹ Readiness report: {json.dumps(data, indent=2)}") - - # Validate readiness data - backend validates Qdrant internally - if data.get("status") in ["healthy", "ready"]: - logger.info("โœ“ Backend reports all services ready (including Qdrant)") - services_status["readiness"] = True - elif data.get("status") == "unhealthy": - raise RuntimeError(f"Backend reports unhealthy status: {data}") - else: - logger.warning(f"โš ๏ธ Backend readiness check not fully healthy: {data}") - elif readiness_response.status_code >= 500: - raise RuntimeError(f"Backend readiness error: HTTP {readiness_response.status_code}") - elif readiness_response.status_code >= 400: - logger.warning(f"Backend readiness client error: HTTP {readiness_response.status_code}") - - except requests.exceptions.RequestException as e: - logger.debug(f"Readiness endpoint not ready yet: {e}") - - # 4. Check authentication endpoint - if not services_status["auth"] and services_status["backend"]: - try: - # Just check that the auth endpoint exists (will return error without credentials) - auth_response = requests.post(f"{BACKEND_URL}/auth/jwt/login", timeout=3) - # Expecting 422 (validation error) not connection error - if auth_response.status_code in [422, 400]: - logger.info("โœ“ Authentication endpoint accessible") - services_status["auth"] = True - except requests.exceptions.RequestException: - pass - - # 5. Final validation - all services ready - if all(services_status.values()): - logger.info("๐ŸŽ‰ All services validated and ready!") - return True - - # Log current status - ready_services = [name for name, status in services_status.items() if status] - pending_services = [name for name, status in services_status.items() if not status] - - elapsed = time.time() - start_time - logger.info(f"โณ Health check progress ({elapsed:.1f}s): โœ“ {ready_services} | โณ {pending_services}") - - except Exception as e: - logger.warning(f"โš ๏ธ Health check error: {e}") - - time.sleep(3) - - # Final status report - logger.error("โŒ Service readiness timeout!") - failed_services = [] - for service, status in services_status.items(): - status_emoji = "โœ“" if status else "โŒ" - logger.error(f" {status_emoji} {service}: {'Ready' if status else 'Not ready'}") - if not status: - failed_services.append(service) - - # Check for cascade failures - if backend failed, everything else will fail - if not services_status["backend"]: - logger.error("๐Ÿ’ฅ CRITICAL: Backend service failed - all dependent services will fail") - logger.error(" This indicates a fundamental infrastructure issue") - elif not services_status["mongo"]: - logger.error("๐Ÿ’ฅ CRITICAL: MongoDB connection failed - memory and auth will not work") - elif not services_status["readiness"]: - logger.error("๐Ÿ’ฅ WARNING: Readiness check failed - Qdrant or other dependencies may be down") - - raise TimeoutError(f"Services did not become ready in {MAX_STARTUP_WAIT}s. Failed services: {failed_services}") - - def authenticate(self): - """Authenticate and get admin token.""" - logger.info("๐Ÿ”‘ Authenticating as admin...") - - # Always use test credentials for test environment - logger.info("Using test environment credentials") - admin_email = TEST_ENV_VARS["ADMIN_EMAIL"] - admin_password = TEST_ENV_VARS["ADMIN_PASSWORD"] - - logger.info(f"Authenticating with email: {admin_email}") - - auth_url = f"{BACKEND_URL}/auth/jwt/login" - - response = requests.post( - auth_url, - data={ - 'username': admin_email, - 'password': admin_password - }, - headers={'Content-Type': 'application/x-www-form-urlencoded'} - ) - - if response.status_code != 200: - logger.error(f"Authentication failed with {admin_email}") - logger.error(f"Response: {response.text}") - raise RuntimeError(f"Authentication failed: {response.text}") - - data = response.json() - self.token = data.get('access_token') - - if not self.token: - raise RuntimeError("No access token received") - - logger.info("โœ“ Authentication successful") - - def upload_test_audio(self): - """Upload test audio file and monitor processing.""" - # Use different audio file for parakeet provider (shorter for faster testing) - audio_path = TEST_AUDIO_PATH_PARAKEET if self.provider == "parakeet" else TEST_AUDIO_PATH - - logger.info(f"๐Ÿ“ค Uploading test audio: {audio_path.name}") - - if not audio_path.exists(): - raise FileNotFoundError(f"Test audio file not found: {audio_path}") - - # Log audio file details - file_size = audio_path.stat().st_size - logger.info(f"๐Ÿ“Š Audio file size: {file_size:,} bytes ({file_size / (1024*1024):.2f} MB)") - - # Upload file - with open(audio_path, 'rb') as f: - files = {'files': (audio_path.name, f, 'audio/wav')} - data = {'device_name': 'integration_test'} - headers = {'Authorization': f'Bearer {self.token}'} - - logger.info("๐Ÿ“ค Sending upload request...") - response = requests.post( - f"{BACKEND_URL}/api/audio/upload", - files=files, - data=data, - headers=headers, - timeout=300 - ) - - logger.info(f"๐Ÿ“ค Upload response status: {response.status_code}") - - if response.status_code != 200: - raise RuntimeError(f"Upload failed: {response.text}") - - result = response.json() - logger.info(f"๐Ÿ“ค Upload response: {json.dumps(result, indent=2)}") - - # Extract client_id from response - client_id = result.get('client_id') - if not client_id: - raise RuntimeError("No client_id in upload response") - - logger.info(f"๐Ÿ“ค Generated client_id: {client_id}") - return result # Return full response with job IDs - - def verify_processing_results(self, upload_response: dict): - """Verify that audio was processed correctly using job tracking.""" - client_id = upload_response.get('client_id') - files = upload_response.get('files', []) - - if not files: - raise RuntimeError("No files in upload response") - - file_info = files[0] - transcript_job_id = file_info.get('transcript_job_id') - conversation_id = file_info.get('conversation_id') - - logger.info(f"๐Ÿ” Verifying processing results:") - logger.info(f" - Client ID: {client_id}") - logger.info(f" - Conversation ID: {conversation_id}") - logger.info(f" - Transcript Job ID: {transcript_job_id}") - - # Wait for transcription job to complete - logger.info("๐Ÿ” Waiting for transcription job to complete...") - start_time = time.time() - job_complete = False - - while time.time() - start_time < 60: # Wait up to 60 seconds for transcription - try: - # Check job status via queue API - response = requests.get( - f"{BACKEND_URL}/api/queue/jobs/{transcript_job_id}", - headers={"Authorization": f"Bearer {self.token}"}, - timeout=10 - ) - - if response.status_code == 200: - job_data = response.json() - status = job_data.get("status") - - if status == "completed": - logger.info(f"โœ… Transcription job completed successfully") - job_complete = True - break - elif status == "failed": - error = job_data.get("exc_info", "Unknown error") - logger.error(f"โŒ Transcription job failed: {error}") - break - else: - logger.info(f"โณ Job status: {status} ({time.time() - start_time:.1f}s)") - - else: - logger.warning(f"โš ๏ธ Job status check returned {response.status_code}") - - except Exception as e: - logger.warning(f"โš ๏ธ Error checking job status: {e}") - - time.sleep(5) - - if not job_complete: - raise AssertionError(f"Transcription job did not complete within 60 seconds. Last status: {status if 'status' in locals() else 'unknown'}") - - # Get the conversation via API - logger.info(f"๐Ÿ” Retrieving conversation...") - conversation = None - - try: - # Get conversations list - response = requests.get( - f"{BACKEND_URL}/api/conversations", - headers={"Authorization": f"Bearer {self.token}"}, - timeout=10 - ) - - if response.status_code == 200: - data = response.json() - conversations_list = data.get("conversations", []) - - # Find our conversation by conversation_id or client_id - for conv in conversations_list: - if conv.get('conversation_id') == conversation_id or conv.get('client_id') == client_id: - conversation = conv - logger.info(f"โœ… Found conversation in list: {conv.get('conversation_id')}") - break - - if not conversation: - logger.error(f"โŒ Conversation not found in list of {len(conversations_list)} conversations") - if conversations_list: - logger.error(f"๐Ÿ“Š Available conversations: {[c.get('conversation_id') for c in conversations_list[:5]]}") - else: - # Fetch full conversation details (list endpoint excludes transcript for performance) - logger.info(f"๐Ÿ” Fetching full conversation details...") - detail_response = requests.get( - f"{BACKEND_URL}/api/conversations/{conversation['conversation_id']}", - headers={"Authorization": f"Bearer {self.token}"}, - timeout=10 - ) - - if detail_response.status_code == 200: - conversation = detail_response.json()["conversation"] - logger.info(f"โœ… Retrieved full conversation details with transcript") - else: - logger.error(f"โŒ Failed to fetch conversation details: {detail_response.status_code}") - logger.error(f"Response: {detail_response.text}") - - else: - logger.error(f"โŒ Conversations API returned status: {response.status_code}") - logger.error(f"Response: {response.text}") - - except Exception as e: - logger.error(f"โŒ Error retrieving conversations: {e}", exc_info=True) - - if not conversation: - raise AssertionError(f"No conversation found for conversation_id: {conversation_id}") - - logger.info(f"โœ“ Conversation found: {conversation['audio_uuid']}") - - # Log conversation details - logger.info("๐Ÿ“‹ Conversation details:") - logger.info(f" - Audio UUID: {conversation['audio_uuid']}") - logger.info(f" - Client ID: {conversation.get('client_id')}") - logger.info(f" - Audio Path: {conversation.get('audio_path', 'N/A')}") - logger.info(f" - Timestamp: {conversation.get('timestamp', 'N/A')}") - - # Verify transcription (transcript is a string, segments is an array) - transcription = conversation.get('transcript', '') - segments = conversation.get('segments', []) - - logger.info(f"๐Ÿ“ Transcription details:") - logger.info(f" - Transcript length: {len(transcription)} characters") - logger.info(f" - Word count: {len(transcription.split()) if transcription else 0}") - logger.info(f" - Speaker segments: {len(segments)}") - - if transcription: - # Show first 200 characters of transcription - preview = transcription[:200] + "..." if len(transcription) > 200 else transcription - logger.info(f" - Preview: {preview}") - - # Load expected transcript for comparison - expected_transcript = self.load_expected_transcript() - logger.info(f" - Expected transcript length: {len(expected_transcript)} characters") - - # Log first 200 characters for comparison - logger.info(f" - Actual start: {transcription[:200]}...") - if expected_transcript: - logger.info(f" - Expected start: {expected_transcript[:200]}...") - - # Call OpenAI to verify transcript similarity - if os.environ.get("OPENAI_API_KEY") and expected_transcript: - similarity_result = self.check_transcript_similarity_simple(transcription, expected_transcript) - logger.info(f" - AI similarity assessment:") - logger.info(f" โ€ข Similar: {similarity_result.get('similar', 'unknown')}") - logger.info(f" โ€ข Reason: {similarity_result.get('reason', 'No reason provided')}") - - # Store result for validation - self.transcript_similarity_result = similarity_result - elif not expected_transcript: - logger.warning("โš ๏ธ No expected transcript available for comparison") - self.transcript_similarity_result = None - else: - logger.error("โŒ No transcription found") - - # Verify conversation has required fields - assert conversation.get('transcript'), "Conversation missing transcript" - assert len(conversation['transcript']) > 0, "Transcript is empty" - assert transcription.strip(), "Transcription text is empty" - - # Check for memory extraction (if LLM is configured) - if os.environ.get("OPENAI_API_KEY"): - logger.info("๐Ÿง  Checking for memory extraction...") - - # Check debug tracker for memory processing - response = requests.get( - f"{BACKEND_URL}/metrics", - headers={'Authorization': f'Bearer {self.token}'} - ) - - if response.status_code == 200: - metrics = response.json() - logger.info(f"๐Ÿ“Š System metrics: {json.dumps(metrics, indent=2)}") - - logger.info("โœ… Processing verification complete") - - return conversation, transcription - - def validate_memory_extraction(self, upload_response: dict): - """Validate that memory extraction worked correctly.""" - client_id = upload_response.get('client_id') - files = upload_response.get('files', []) - - logger.info(f"๐Ÿง  Validating memory extraction for client: {client_id}") - - # Get memory job ID from upload response - memory_job_id = files[0].get('memory_job_id') if files else None - if not memory_job_id: - raise RuntimeError("No memory_job_id in upload response") - - # Wait for memory processing to complete - client_memories = self.wait_for_memory_processing(memory_job_id, client_id) - - # Check if we're using OpenMemory MCP provider - memory_provider = os.environ.get("MEMORY_PROVIDER", "chronicle") - - if not client_memories: - if memory_provider == "openmemory_mcp": - # For OpenMemory MCP, check if there are any memories at all (deduplication is OK) - all_memories = self.get_memories_from_api() - if all_memories: - logger.info(f"โœ… OpenMemory MCP: Found {len(all_memories)} existing memories (deduplication successful)") - client_memories = all_memories # Use existing memories for validation - else: - raise AssertionError("No memories found in OpenMemory MCP - memory processing failed") - else: - raise AssertionError("No memories were extracted - memory processing failed") - - logger.info(f"โœ… Found {len(client_memories)} memories") - - # Load expected memories and compare - expected_memories = self.load_expected_memories() - if not expected_memories: - logger.warning("โš ๏ธ No expected memories available for comparison") - return client_memories - - # Use OpenAI to check if memories are similar - if os.environ.get("OPENAI_API_KEY"): - memory_similarity = self.check_memory_similarity_simple(client_memories, expected_memories) - logger.info(f"๐Ÿง  Memory similarity assessment:") - logger.info(f" โ€ข Similar: {memory_similarity.get('similar', 'unknown')}") - logger.info(f" โ€ข Reason: {memory_similarity.get('reason', 'No reason provided')}") - - # Store result for validation - self.memory_similarity_result = memory_similarity - else: - logger.warning("โš ๏ธ No OpenAI API key available for memory comparison") - self.memory_similarity_result = None - - return client_memories - - def check_transcript_similarity_simple(self, actual_transcript: str, expected_transcript: str) -> dict: - """Use OpenAI to check transcript similarity with simple boolean response.""" - try: - - client = openai.OpenAI(api_key=os.environ.get("OPENAI_API_KEY")) - - prompt = f""" - Compare these two transcripts to determine if they represent the same audio content. - - EXPECTED TRANSCRIPT: - "{expected_transcript}" - - ACTUAL TRANSCRIPT: - "{actual_transcript}" - - **MARK AS SIMILAR if:** - - Core content and topics match (e.g., glass blowing class, participants, activities) - - Key facts and events are present in both (names, numbers, objects, actions) - - Overall narrative flow is recognizable - - At least 70% semantic overlap exists - - **ACCEPTABLE DIFFERENCES (still mark as similar):** - - Minor word variations or ASR errors - - Different punctuation or capitalization - - Missing or extra filler words - - Small sections missing or repeated - - Slightly different word order - - Speaker diarization differences - - **ONLY MARK AS DISSIMILAR if:** - - Core content is fundamentally different - - Major sections (>30%) are missing or wrong - - It appears to be a different audio file entirely - - Respond in JSON format: - {{ - "reason": "brief explanation (1-3 sentences)" - "similar": true/false, - }} - """ - - response = client.chat.completions.create( - model="gpt-4o-mini", - messages=[{"role": "user", "content": prompt}], - response_format={"type": "json_object"} - ) - - response_text = (response.choices[0].message.content or "").strip() - - # Try to parse JSON response - try: - result = json.loads(response_text) - return result - except json.JSONDecodeError: - # If JSON parsing fails, return a basic result - return { - "similar": False, - "reason": f"Could not parse response: {response_text}" - } - - except Exception as e: - logger.warning(f"โš ๏ธ Could not check transcript similarity: {e}") - return { - "similar": False, - "reason": f"API call failed: {str(e)}" - } - - def check_memory_similarity_simple(self, actual_memories: list, expected_memories: list) -> dict: - """Use OpenAI to check if extracted memories are similar to expected memories.""" - try: - import openai - - client = openai.OpenAI(api_key=os.environ.get("OPENAI_API_KEY")) - - # Extract just the memory text from actual memories - actual_memory_texts = [mem.get('memory', '') for mem in actual_memories] - - prompt = f""" - Compare these two lists of memories to determine if they represent content from the same audio source and indicate successful memory extraction. - - **KEY CRITERIA FOR SIMILARITY (Return "similar": true if ANY of these are met):** - - 1. **Topic/Context Match**: Both lists should be about the same main activity/event (e.g., glass blowing class) - 2. **Core Facts Overlap**: At least 3-4 significant factual details should overlap (people, places, numbers, objects) - 3. **Semantic Coverage**: The same general knowledge should be captured, even if from different perspectives - - **ACCEPTABLE DIFFERENCES (Do NOT mark as dissimilar for these):** - - Different focus areas (one list more personal/emotional, other more technical/factual) - - Different level of detail (one more granular, other more high-level) - - Different speakers/participants emphasized - - Different organization or memory chunking - - Emotional vs factual framing of the same events - - Missing some details in either list (as long as core overlap exists) - - **MARK AS DISSIMILAR ONLY IF:** - - The memories seem to be from completely different audio/conversations - - No meaningful factual overlap (suggests wrong audio or major transcription failure) - - Core subject matter is entirely different - - **EVALUATION APPROACH:** - 1. Identify overlapping factual elements (people, places, objects, numbers, activities) - 2. Count significant semantic overlaps - 3. If 3+ substantial overlaps exist AND same general topic/context โ†’ mark as similar - 4. Focus on "are these from the same source" rather than "are these identical" - - EXPECTED MEMORIES: - {expected_memories} - - EXTRACTED MEMORIES: - {actual_memory_texts} - - Respond in JSON format with: - {{ - "reasoning": "detailed analysis of overlapping elements and why they indicate same/different source", - "reason": "brief explanation of the decision", - "similar": true/false - }} - """ - - logger.info(f"Making GPT-5-mini API call for memory similarity...") - response = client.chat.completions.create( - model="gpt-4o-mini", - messages=[{"role": "user", "content": prompt}], - response_format={"type": "json_object"} - ) - - response_text = (response.choices[0].message.content or "").strip() - logger.info(f"Memory similarity GPT-5-mini response: '{response_text}'") - - try: - result = json.loads(response_text) - return result - except json.JSONDecodeError as json_err: - # If JSON parsing fails, return a basic result - logger.error(f"JSON parsing failed: {json_err}") - logger.error(f"Response text that failed to parse: '{response_text}'") - return { - "reason": f"Could not parse response: {response_text}", - "similar": False, - } - - except Exception as e: - logger.error(f"โš ๏ธ Could not check memory similarity: {e}") - logger.error(f"Exception type: {type(e)}") - logger.error(f"Exception details: {str(e)}") - return { - "similar": False, - "reason": f"API call failed: {str(e)}" - } - - def get_memories_from_api(self) -> list: - """Fetch memories from the backend API.""" - try: - headers = {'Authorization': f'Bearer {self.token}'} - response = requests.get(f"{BACKEND_URL}/api/memories", headers=headers) - - if response.status_code == 200: - data = response.json() - return data.get('memories', []) - else: - logger.error(f"Failed to fetch memories: {response.status_code} - {response.text}") - return [] - except Exception as e: - logger.error(f"Error fetching memories: {e}") - return [] - - def wait_for_memory_processing(self, memory_job_id: str, client_id: str, timeout: int = 120): - """Wait for memory processing to complete using queue API.""" - logger.info(f"โณ Waiting for memory job {memory_job_id} to complete...") - - start_time = time.time() - job_complete = False - - while time.time() - start_time < timeout: - try: - # Check job status via queue API - response = requests.get( - f"{BACKEND_URL}/api/queue/jobs/{memory_job_id}", - headers={"Authorization": f"Bearer {self.token}"}, - timeout=10 - ) - - if response.status_code == 200: - job_data = response.json() - status = job_data.get("status") - - if status == "completed": - logger.info(f"โœ… Memory job completed successfully") - job_complete = True - break - elif status == "failed": - error = job_data.get("exc_info", "Unknown error") - logger.error(f"โŒ Memory job failed: {error}") - break - else: - logger.info(f"โณ Memory job status: {status} ({time.time() - start_time:.1f}s)") - - else: - logger.warning(f"โš ๏ธ Memory job status check returned {response.status_code}") - - except Exception as e: - logger.warning(f"โš ๏ธ Error checking memory job status: {e}") - - time.sleep(5) - - if not job_complete: - raise AssertionError(f"Memory job did not complete within {timeout} seconds. Last status: {status if 'status' in locals() else 'unknown'}") - - # Now fetch the memories from the API - memories = self.get_memories_from_api() - - # Filter by client_id for test isolation in fresh mode, or get all user memories in reuse mode - if not self.fresh_run: - # In reuse mode, get all user memories (API already filters by user_id) - user_memories = memories - if user_memories: - logger.info(f"โœ… Found {len(user_memories)} total user memories (reusing existing data)") - return user_memories - else: - # In fresh mode, filter by client_id for test isolation since we cleaned all data - client_memories = [mem for mem in memories if mem.get('metadata', {}).get('client_id') == client_id] - if client_memories: - logger.info(f"โœ… Found {len(client_memories)} memories for client {client_id}") - return client_memories - - logger.warning(f"โš ๏ธ No memories found after processing") - return [] - - async def create_chat_session(self, title: str = "Integration Test Session", description: str = "Testing memory integration") -> Optional[str]: - """Create a new chat session and return session ID.""" - logger.info(f"๐Ÿ“ Creating chat session: {title}") - - try: - response = requests.post( - f"{BACKEND_URL}/api/chat/sessions", - headers={"Authorization": f"Bearer {self.token}"}, - json={ - "title": title, - "description": description - }, - timeout=10 - ) - - if response.status_code == 200: - data = response.json() - session_id = data.get("session_id") - logger.info(f"โœ… Chat session created: {session_id}") - return session_id - else: - logger.error(f"โŒ Chat session creation failed: {response.status_code} - {response.text}") - return None - - except Exception as e: - logger.error(f"โŒ Error creating chat session: {e}") - return None - - async def send_chat_message(self, session_id: str, message: str) -> dict: - """Send a message to chat session and parse response.""" - logger.info(f"๐Ÿ’ฌ Sending message: {message}") - - try: - response = requests.post( - f"{BACKEND_URL}/api/chat/send", - headers={"Authorization": f"Bearer {self.token}"}, - json={ - "message": message, - "session_id": session_id - }, - timeout=30 - ) - - if response.status_code == 200: - # Parse SSE response - full_response = "" - memory_ids = [] - - for line in response.text.split('\n'): - if line.startswith('data: '): - try: - event_data = json.loads(line[6:]) - event_type = event_data.get("type") - - if event_type == "memory_context": - mem_ids = event_data.get("data", {}).get("memory_ids", []) - memory_ids.extend(mem_ids) - elif event_type == "content": - content = event_data.get("data", {}).get("content", "") - full_response += content - elif event_type == "done": - break - except json.JSONDecodeError: - pass - - logger.info(f"๐Ÿค– Response received ({len(full_response)} chars)") - if memory_ids: - logger.info(f"๐Ÿ“š Memories used: {len(memory_ids)} memory IDs") - - return { - "response": full_response, - "memories_used": memory_ids, - "success": True - } - else: - logger.error(f"โŒ Chat message failed: {response.status_code} - {response.text}") - return {"success": False, "error": response.text} - - except Exception as e: - logger.error(f"โŒ Error sending chat message: {e}") - return {"success": False, "error": str(e)} - - async def run_chat_conversation(self, session_id: str) -> bool: - """Run a test conversation with memory integration.""" - logger.info("๐ŸŽญ Starting chat conversation test...") - - # Test messages designed to trigger memory retrieval - test_messages = [ - "Hello! I'm testing the chat system with memory integration.", - "What do you know about glass blowing? Have I mentioned anything about it?", - ] - - memories_used_total = [] - - for i, message in enumerate(test_messages, 1): - logger.info(f"๐Ÿ“จ Message {i}/{len(test_messages)}") - result = await self.send_chat_message(session_id, message) - - if not result.get("success"): - logger.error(f"โŒ Chat message {i} failed: {result.get('error')}") - return False - - # Track memory usage - memories_used = result.get("memories_used", []) - memories_used_total.extend(memories_used) - - # Small delay between messages - time.sleep(1) - - logger.info(f"โœ… Chat conversation completed. Total memories used: {len(set(memories_used_total))}") - return True - - async def extract_memories_from_chat(self, session_id: str) -> dict: - """Extract memories from the chat session.""" - logger.info(f"๐Ÿง  Extracting memories from chat session: {session_id}") - - try: - response = requests.post( - f"{BACKEND_URL}/api/chat/sessions/{session_id}/extract-memories", - headers={"Authorization": f"Bearer {self.token}"}, - timeout=30 - ) - - if response.status_code == 200: - data = response.json() - if data.get("success"): - logger.info(f"โœ… Memory extraction successful: {data.get('count', 0)} memories created") - return data - else: - logger.warning(f"โš ๏ธ Memory extraction completed but no memories: {data.get('message', 'Unknown')}") - return data - else: - logger.error(f"โŒ Memory extraction failed: {response.status_code} - {response.text}") - return {"success": False, "error": response.text} - - except Exception as e: - logger.error(f"โŒ Error extracting memories from chat: {e}") - return {"success": False, "error": str(e)} - - def cleanup(self): - """Clean up test resources based on cached and rebuild flags.""" - logger.info("Cleaning up...") - - if self.mongo_client: - self.mongo_client.close() - - # Handle container cleanup based on cleanup_containers flag (rebuild flag doesn't affect cleanup) - if self.cleanup_containers and self.services_started_by_test: - logger.info("๐Ÿ”„ Cleanup mode: stopping test docker compose services...") - subprocess.run(["docker", "compose", "-f", "docker-compose-test.yml", "down", "-v"], capture_output=True) - logger.info("โœ“ Test containers stopped and volumes removed") - elif not self.cleanup_containers: - logger.info("๐Ÿ—‚๏ธ No cleanup: leaving containers running for debugging") - if self.rebuild: - logger.info(" (containers were rebuilt with latest code during this test)") - else: - logger.info("๐Ÿ”„ Test services were already running, leaving them as-is") - - logger.info("โœ“ Cleanup complete") - - -@pytest.fixture -def test_runner(): - """Pytest fixture for test runner.""" - runner = IntegrationTestRunner() - yield runner - runner.cleanup() - - -@pytest.mark.integration -def test_full_pipeline_integration(test_runner): - """Test the complete audio processing pipeline.""" - # Immediate output to confirm test is starting - print("๐Ÿš€ TEST STARTING - test_full_pipeline_integration", flush=True) - sys.stdout.flush() - sys.stderr.flush() - - try: - # Test timing tracking - test_start_time = time.time() - phase_times = {} - - # Immediate logging to debug environment - print("=" * 80, flush=True) - print("๐Ÿš€ STARTING INTEGRATION TEST", flush=True) - print("=" * 80, flush=True) - logger.info(f"Current working directory: {os.getcwd()}") - logger.info(f"Files in directory: {os.listdir('.')}") - logger.info(f"CI environment: {os.environ.get('CI', 'NOT SET')}") - logger.info(f"GITHUB_ACTIONS: {os.environ.get('GITHUB_ACTIONS', 'NOT SET')}") - sys.stdout.flush() - - # Phase 1: Environment setup - phase_start = time.time() - logger.info("๐Ÿ“‹ Phase 1: Setting up test environment...") - test_runner.setup_environment() - phase_times['env_setup'] = time.time() - phase_start - logger.info(f"โœ… Environment setup completed in {phase_times['env_setup']:.2f}s") - - # Phase 2: Service startup - phase_start = time.time() - logger.info("๐Ÿณ Phase 2: Starting services...") - test_runner.start_services() - phase_times['service_startup'] = time.time() - phase_start - logger.info(f"โœ… Service startup completed in {phase_times['service_startup']:.2f}s") - - # Phase 2b: ASR service startup (parakeet only) - phase_start = time.time() - logger.info(f"๐ŸŽค Phase 2b: Starting ASR services ({TRANSCRIPTION_PROVIDER} provider)...") - test_runner.start_asr_services() - phase_times['asr_startup'] = time.time() - phase_start - logger.info(f"โœ… ASR service startup completed in {phase_times['asr_startup']:.2f}s") - - # Phase 3: Wait for services - phase_start = time.time() - logger.info("โณ Phase 3: Waiting for services to be ready...") - test_runner.wait_for_services() - phase_times['service_readiness'] = time.time() - phase_start - logger.info(f"โœ… Service readiness check completed in {phase_times['service_readiness']:.2f}s") - - # Phase 3b: Wait for ASR services (parakeet only) - phase_start = time.time() - logger.info("โณ Phase 3b: Waiting for ASR services to be ready...") - test_runner.wait_for_asr_ready() - phase_times['asr_readiness'] = time.time() - phase_start - logger.info(f"โœ… ASR readiness check completed in {phase_times['asr_readiness']:.2f}s") - - # Phase 4: Authentication - phase_start = time.time() - logger.info("๐Ÿ”‘ Phase 4: Authentication...") - test_runner.authenticate() - phase_times['authentication'] = time.time() - phase_start - logger.info(f"โœ… Authentication completed in {phase_times['authentication']:.2f}s") - - # Phase 5: Audio upload and processing - phase_start = time.time() - logger.info("๐Ÿ“ค Phase 5: Audio upload...") - upload_response = test_runner.upload_test_audio() - client_id = upload_response.get('client_id') - phase_times['audio_upload'] = time.time() - phase_start - logger.info(f"โœ… Audio upload completed in {phase_times['audio_upload']:.2f}s") - - # Phase 6: Transcription processing - phase_start = time.time() - logger.info("๐ŸŽค Phase 6: Transcription processing...") - conversation, transcription = test_runner.verify_processing_results(upload_response) - phase_times['transcription_processing'] = time.time() - phase_start - logger.info(f"โœ… Transcription processing completed in {phase_times['transcription_processing']:.2f}s") - - # Phase 7: Memory extraction - phase_start = time.time() - logger.info("๐Ÿง  Phase 7: Memory extraction...") - memories = test_runner.validate_memory_extraction(upload_response) - phase_times['memory_extraction'] = time.time() - phase_start - logger.info(f"โœ… Memory extraction completed in {phase_times['memory_extraction']:.2f}s") - - # Phase 8: Chat with Memory Integration - # phase_start = time.time() - # logger.info("๐Ÿ’ฌ Phase 8: Chat with Memory Integration...") - - # # Create chat session - # session_id = asyncio.run(test_runner.create_chat_session( - # title="Integration Test Chat", - # description="Testing chat functionality with memory retrieval" - # )) - # assert session_id is not None, "Failed to create chat session" - - # # Run chat conversation - # chat_success = asyncio.run(test_runner.run_chat_conversation(session_id)) - # assert chat_success, "Chat conversation failed" - - # # Extract memories from chat session (optional - may create additional memories) - # chat_memory_result = asyncio.run(test_runner.extract_memories_from_chat(session_id)) - - # phase_times['chat_integration'] = time.time() - phase_start - # logger.info(f"โœ… Chat integration completed in {phase_times['chat_integration']:.2f}s") - - # Basic assertions - assert conversation is not None - assert len(conversation['transcript']) > 0 - assert transcription.strip() # Ensure we have actual text content - - # Transcript similarity assertion - if hasattr(test_runner, 'transcript_similarity_result') and test_runner.transcript_similarity_result: - assert test_runner.transcript_similarity_result.get('similar') == True, f"Transcript not similar enough: {test_runner.transcript_similarity_result.get('reason')}" - - # Memory validation assertions - assert memories is not None and len(memories) > 0, "No memories were extracted" - - # Memory similarity assertion - if hasattr(test_runner, 'memory_similarity_result') and test_runner.memory_similarity_result: - if test_runner.memory_similarity_result.get('similar') != True: - # Log transcript for debugging before failing - logger.error("=" * 80) - logger.error("โŒ MEMORY SIMILARITY CHECK FAILED - DEBUGGING INFO") - logger.error("=" * 80) - logger.error("๐Ÿ“ Generated Transcript:") - logger.error("-" * 60) - logger.error(transcription) - logger.error("-" * 60) - - # Format detailed error with both memory sets - expected_memories = test_runner.load_expected_memories() - extracted_memories = [mem.get('memory', '') for mem in memories] - - error_msg = f""" -Memory similarity check failed: -Reason: {test_runner.memory_similarity_result.get('reason', 'No reason provided')} -Reasoning: {test_runner.memory_similarity_result.get('reasoning', 'No detailed reasoning provided')} - -Expected memories ({len(expected_memories)}): -{chr(10).join(f" {i+1}. {mem}" for i, mem in enumerate(expected_memories))} - -Extracted memories ({len(extracted_memories)}): -{chr(10).join(f" {i+1}. {mem}" for i, mem in enumerate(extracted_memories))} - -Generated Transcript ({len(transcription)} chars): -{transcription[:500]}{'...' if len(transcription) > 500 else ''} -""" - assert False, error_msg - - # Calculate total test time - total_test_time = time.time() - test_start_time - phase_times['total_test'] = total_test_time - - # Log success with detailed timing - logger.info("=" * 80) - logger.info("๐ŸŽ‰ INTEGRATION TEST PASSED!") - logger.info("=" * 80) - logger.info(f"โฑ๏ธ TIMING BREAKDOWN:") - logger.info(f" ๐Ÿ“‹ Environment Setup: {phase_times['env_setup']:>6.2f}s") - logger.info(f" ๐Ÿณ Service Startup: {phase_times['service_startup']:>6.2f}s") - logger.info(f" โณ Service Readiness: {phase_times['service_readiness']:>6.2f}s") - logger.info(f" ๐Ÿ”‘ Authentication: {phase_times['authentication']:>6.2f}s") - logger.info(f" ๐Ÿ“ค Audio Upload: {phase_times['audio_upload']:>6.2f}s") - logger.info(f" ๐ŸŽค Transcription: {phase_times['transcription_processing']:>6.2f}s") - logger.info(f" ๐Ÿง  Memory Extraction: {phase_times['memory_extraction']:>6.2f}s") - # logger.info(f" ๐Ÿ’ฌ Chat Integration: {phase_times['chat_integration']:>6.2f}s") - logger.info(f" {'โ”€' * 35}") - logger.info(f" ๐Ÿ TOTAL TEST TIME: {total_test_time:>6.2f}s ({total_test_time/60:.1f}m)") - logger.info("") - logger.info(f"๐Ÿ“Š Test Results:") - logger.info(f" โœ… Audio file processed successfully") - logger.info(f" โœ… Transcription generated: {len(transcription)} characters") - logger.info(f" โœ… Word count: {len(transcription.split())}") - logger.info(f" โœ… Audio UUID: {conversation.get('audio_uuid')}") - logger.info(f" โœ… Client ID: {conversation.get('client_id')}") - logger.info(f" โœ… Memories extracted: {len(memories)}") - logger.info(f" โœ… Transcript similarity: {getattr(test_runner, 'transcript_similarity_result', {}).get('similar', 'N/A')}") - logger.info(f" โœ… Memory similarity: {getattr(test_runner, 'memory_similarity_result', {}).get('similar', 'N/A')}") - logger.info("") - logger.info("๐Ÿ“ Full Transcription:") - logger.info("-" * 60) - logger.info(transcription) - logger.info("-" * 60) - logger.info("") - logger.info("๐Ÿง  Extracted Memories:") - logger.info("-" * 60) - for i, memory in enumerate(memories[:10], 1): # Show first 10 memories - logger.info(f"{i}. {memory.get('memory', 'No content')}") - if len(memories) > 10: - logger.info(f"... and {len(memories) - 10} more memories") - logger.info("-" * 60) - logger.info("=" * 80) - - except Exception as e: - logger.error(f"Integration test failed: {e}") - raise - finally: - # Cleanup ASR services - test_runner.cleanup_asr_services() - - -if __name__ == "__main__": - # Run the test directly - pytest.main([__file__, "-v", "-s"]) diff --git a/config/README.md b/config/README.md new file mode 100644 index 00000000..e3a5cf3c --- /dev/null +++ b/config/README.md @@ -0,0 +1,106 @@ +# Chronicle Configuration + +This directory contains Chronicle's centralized configuration files. + +## Files + +- **`config.yml`** - Main configuration file (gitignored, user-specific) + - Contains model registry (LLM, STT, TTS, embeddings, vector store) + - Memory provider settings + - Service endpoints and API keys + +- **`config.yml.template`** - Template for new setups + - Use this to create your `config.yml` + - Contains placeholders with `${ENV_VAR:-default}` patterns + - No secrets included - safe to commit + +## Setup + +### First Time Setup + +```bash +# Option 1: Run the interactive wizard (recommended) +uv run --with-requirements setup-requirements.txt python wizard.py + +# Option 2: Manual setup +cp config/config.yml.template config/config.yml +# Edit config.yml to add your API keys and configure providers +``` + +### Environment Variable Substitution + +The config system supports environment variable substitution using `${VAR:-default}` syntax: + +```yaml +models: + - name: openai-llm + api_key: ${OPENAI_API_KEY:-} # Uses env var or empty string + model_url: ${OPENAI_BASE_URL:-https://api.openai.com/v1} # With fallback +``` + +## Configuration Sections + +### Defaults + +Specifies which models to use by default: + +```yaml +defaults: + llm: openai-llm # Default LLM model + embedding: openai-embed # Default embedding model + stt: stt-deepgram # Default speech-to-text + vector_store: vs-qdrant # Default vector database +``` + +### Models + +Array of model definitions - each model includes: +- `name`: Unique identifier +- `model_type`: llm, embedding, stt, tts, vector_store +- `model_provider`: openai, ollama, deepgram, parakeet, etc. +- `model_name`: Provider-specific model name +- `model_url`: API endpoint +- `api_key`: Authentication (use env vars!) +- `model_params`: Temperature, max_tokens, etc. + +### Memory + +Memory extraction and storage configuration: + +```yaml +memory: + provider: chronicle # chronicle, openmemory_mcp, or mycelia + timeout_seconds: 1200 + extraction: + enabled: true + prompt: "Custom extraction prompt..." +``` + +## Test Configurations + +For testing different provider combinations, see `tests/configs/`: +- These configs are version-controlled +- Use with `CONFIG_FILE` environment variable +- No secrets - only env var placeholders + +Example: +```bash +CONFIG_FILE=tests/configs/parakeet-ollama.yml ./backends/advanced/run-test.sh +``` + +## Hot Reload + +The memory configuration section supports hot reload - changes are picked up without service restart. Model registry changes require service restart. + +## Backups + +The setup wizard automatically backs up `config.yml` before making changes: +- Backups: `config.yml.backup.YYYYMMDD_HHMMSS` +- These are gitignored automatically + +## Documentation + +For detailed configuration guides, see: +- `/Docs/memory-configuration-guide.md` - Memory settings +- `/backends/advanced/Docs/quickstart.md` - Setup guide +- `/CLAUDE.md` - Project overview diff --git a/config.yml.template b/config/config.yml.template similarity index 95% rename from config.yml.template rename to config/config.yml.template index 37209d4b..7b43d042 100644 --- a/config.yml.template +++ b/config/config.yml.template @@ -2,6 +2,9 @@ defaults: llm: openai-llm embedding: openai-embed stt: stt-deepgram + # Transcription provider selection: + # - stt-deepgram: Cloud-based (requires DEEPGRAM_API_KEY in .env) + # - stt-parakeet-batch: Local ASR (requires Parakeet service running) tts: tts-http vector_store: vs-qdrant models: @@ -96,7 +99,7 @@ models: model_type: stt model_provider: parakeet api_family: http - model_url: http://172.17.0.1:8767 + model_url: http://${PARAKEET_ASR_URL:-172.17.0.1:8767} api_key: '' operations: stt_transcribe: diff --git a/config_manager.py b/config_manager.py new file mode 100644 index 00000000..6f8a85a6 --- /dev/null +++ b/config_manager.py @@ -0,0 +1,367 @@ +""" +Shared configuration manager for Chronicle. + +This module provides a unified interface for reading and writing configuration +across both config.yml (source of truth) and .env (backward compatibility). + +Key principles: +- config.yml is the source of truth for memory provider and model settings +- .env files are kept in sync for backward compatibility with legacy code +- All config updates should use this module to maintain consistency + +Usage: + # From any service in the project + from config_manager import ConfigManager + + # For backend service + config = ConfigManager(service_path="backends/advanced") + provider = config.get_memory_provider() + config.set_memory_provider("openmemory_mcp") + + # Auto-detects paths from cwd + config = ConfigManager() +""" + +import logging +import os +import shutil +from datetime import datetime +from pathlib import Path +from typing import Any, Dict, Optional + +import yaml + +logger = logging.getLogger(__name__) + + +class ConfigManager: + """Manages Chronicle configuration across config.yml and .env files.""" + + def __init__(self, service_path: Optional[str] = None, repo_root: Optional[Path] = None): + """ + Initialize ConfigManager. + + Args: + service_path: Path to service directory (e.g., "backends/advanced", "extras/speaker-recognition"). + If None, auto-detects from current working directory. + repo_root: Path to repository root. If None, auto-detects by finding config.yml. + """ + # Find repo root + if repo_root is None: + repo_root = self._find_repo_root() + self.repo_root = Path(repo_root) + + # Find service directory + if service_path is None: + service_path = self._detect_service_path() + self.service_path = self.repo_root / service_path if service_path else None + + # Paths + self.config_yml_path = self.repo_root / "config" / "config.yml" + self.env_path = self.service_path / ".env" if self.service_path else None + + logger.debug(f"ConfigManager initialized: repo_root={self.repo_root}, " + f"service_path={self.service_path}, config_yml={self.config_yml_path}") + + def _find_repo_root(self) -> Path: + """Find repository root using __file__ location (config_manager.py is always at repo root).""" + return Path(__file__).parent + + def _detect_service_path(self) -> Optional[str]: + """Auto-detect service path from current working directory.""" + cwd = Path.cwd() + + # Check if we're in a known service directory + known_services = [ + "backends/advanced", + "extras/speaker-recognition", + "extras/openmemory-mcp", + "extras/asr-services", + ] + + for service in known_services: + service_full_path = self.repo_root / service + if cwd == service_full_path or str(cwd).startswith(str(service_full_path)): + return service + + logger.debug("Could not auto-detect service path from cwd") + return None + + def _load_config_yml(self) -> Dict[str, Any]: + """Load config.yml file.""" + if not self.config_yml_path.exists(): + raise RuntimeError( + f"Configuration file not found at {self.config_yml_path}. " + "Please ensure config/config.yml exists in the repository root." + ) + + try: + with open(self.config_yml_path, 'r') as f: + config = yaml.safe_load(f) + if config is None: + raise RuntimeError( + f"Configuration file {self.config_yml_path} is empty or invalid. " + "Please ensure it contains valid YAML configuration." + ) + return config + except yaml.YAMLError as e: + raise RuntimeError( + f"Invalid YAML in configuration file {self.config_yml_path}: {e}" + ) + except Exception as e: + raise RuntimeError( + f"Failed to load configuration file {self.config_yml_path}: {e}" + ) + + def _save_config_yml(self, config: Dict[str, Any]): + """Save config.yml file with backup.""" + try: + # Create backup + if self.config_yml_path.exists(): + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + backup_path = self.config_yml_path.parent / f"config.yml.backup.{timestamp}" + shutil.copy2(self.config_yml_path, backup_path) + logger.info(f"Backed up config.yml to {backup_path.name}") + + # Write updated config + with open(self.config_yml_path, 'w') as f: + yaml.dump(config, f, default_flow_style=False, sort_keys=False) + + logger.info(f"Saved config.yml to {self.config_yml_path}") + + except Exception as e: + logger.error(f"Failed to save config.yml: {e}") + raise + + def _update_env_file(self, key: str, value: str): + """Update a single key in .env file.""" + if self.env_path is None: + logger.debug("No service path set, skipping .env update") + return + + if not self.env_path.exists(): + logger.warning(f".env file not found at {self.env_path}") + return + + try: + # Read current .env + with open(self.env_path, 'r') as f: + lines = f.readlines() + + # Update or add line + key_found = False + updated_lines = [] + + for line in lines: + if line.strip().startswith(f"{key}="): + updated_lines.append(f"{key}={value}\n") + key_found = True + else: + updated_lines.append(line) + + # If key wasn't found, add it + if not key_found: + updated_lines.append(f"\n# Auto-updated by ConfigManager\n{key}={value}\n") + + # Create backup + backup_path = f"{self.env_path}.bak" + shutil.copy2(self.env_path, backup_path) + logger.debug(f"Backed up .env to {backup_path}") + + # Write updated file + with open(self.env_path, 'w') as f: + f.writelines(updated_lines) + + # Update environment variable for current process + os.environ[key] = value + + logger.info(f"Updated {key}={value} in .env file") + + except Exception as e: + logger.error(f"Failed to update .env file: {e}") + raise + + def get_memory_provider(self) -> str: + """ + Get current memory provider from config.yml. + + Returns: + Memory provider name (chronicle, openmemory_mcp, or mycelia) + """ + config = self._load_config_yml() + provider = config.get("memory", {}).get("provider", "chronicle").lower() + + # Map legacy names + if provider in ("friend-lite", "friend_lite"): + provider = "chronicle" + + return provider + + def set_memory_provider(self, provider: str) -> Dict[str, Any]: + """ + Set memory provider in both config.yml and .env. + + This updates: + 1. config.yml: memory.provider field (source of truth) + 2. .env: MEMORY_PROVIDER variable (backward compatibility, if service_path set) + + Args: + provider: Memory provider name (chronicle, openmemory_mcp, or mycelia) + + Returns: + Dict with status and details of the update + + Raises: + ValueError: If provider is invalid + """ + # Validate provider + provider = provider.lower().strip() + valid_providers = ["chronicle", "openmemory_mcp", "mycelia"] + + if provider not in valid_providers: + raise ValueError( + f"Invalid provider '{provider}'. " + f"Valid providers: {', '.join(valid_providers)}" + ) + + # Update config.yml + config = self._load_config_yml() + + if "memory" not in config: + config["memory"] = {} + + config["memory"]["provider"] = provider + self._save_config_yml(config) + + # Update .env for backward compatibility (if we have a service path) + if self.env_path and self.env_path.exists(): + self._update_env_file("MEMORY_PROVIDER", provider) + + return { + "message": ( + f"Memory provider updated to '{provider}' in config.yml" + f"{' and .env' if self.env_path else ''}. " + "Please restart services for changes to take effect." + ), + "provider": provider, + "config_yml_path": str(self.config_yml_path), + "env_path": str(self.env_path) if self.env_path else None, + "requires_restart": True, + "status": "success" + } + + def get_memory_config(self) -> Dict[str, Any]: + """ + Get complete memory configuration from config.yml. + + Returns: + Full memory configuration dict + """ + config = self._load_config_yml() + return config.get("memory", {}) + + def update_memory_config(self, updates: Dict[str, Any]): + """ + Update memory configuration in config.yml. + + Args: + updates: Dict of updates to merge into memory config (deep merge) + """ + config = self._load_config_yml() + + if "memory" not in config: + config["memory"] = {} + + # Deep merge updates recursively + self._deep_merge(config["memory"], updates) + + self._save_config_yml(config) + + # If provider was updated, also update .env + if "provider" in updates and self.env_path: + self._update_env_file("MEMORY_PROVIDER", updates["provider"]) + + def _deep_merge(self, base: dict, updates: dict) -> None: + """ + Recursively merge updates into base dictionary. + + Args: + base: Base dictionary to merge into (modified in-place) + updates: Updates to merge + """ + for key, value in updates.items(): + if key in base and isinstance(base[key], dict) and isinstance(value, dict): + # Recursively merge nested dictionaries + self._deep_merge(base[key], value) + else: + # Direct assignment for non-dict values + base[key] = value + + def get_config_defaults(self) -> Dict[str, Any]: + """ + Get defaults configuration from config.yml. + + Returns: + Defaults configuration dict (llm, embedding, stt, tts, vector_store) + """ + config = self._load_config_yml() + return config.get("defaults", {}) + + def update_config_defaults(self, updates: Dict[str, str]): + """ + Update defaults configuration in config.yml. + + Args: + updates: Dict of updates to merge into defaults config + (e.g., {"llm": "openai-llm", "embedding": "openai-embed"}) + """ + config = self._load_config_yml() + + if "defaults" not in config: + config["defaults"] = {} + + # Update defaults + config["defaults"].update(updates) + + self._save_config_yml(config) + + def get_full_config(self) -> Dict[str, Any]: + """ + Get complete config.yml as dictionary. + + Returns: + Full configuration dict + """ + return self._load_config_yml() + + def save_full_config(self, config: Dict[str, Any]): + """ + Save complete config.yml from dictionary. + + Args: + config: Full configuration dict to save + """ + self._save_config_yml(config) + + +# Global singleton instance +_config_manager: Optional[ConfigManager] = None + + +def get_config_manager(service_path: Optional[str] = None) -> ConfigManager: + """ + Get global ConfigManager singleton instance. + + Args: + service_path: Optional service path for .env updates. + If None, uses cached instance or creates new one. + + Returns: + ConfigManager instance + """ + global _config_manager + + if _config_manager is None or service_path is not None: + _config_manager = ConfigManager(service_path=service_path) + + return _config_manager diff --git a/extras/speaker-recognition/run-test.sh b/extras/speaker-recognition/run-test.sh index 6ac212fa..ac73de91 100755 --- a/extras/speaker-recognition/run-test.sh +++ b/extras/speaker-recognition/run-test.sh @@ -13,12 +13,12 @@ cleanup() { return fi cleanup_called=true - + print_info "Cleaning up on exit..." # Kill any background processes in this process group pkill -P $$ 2>/dev/null || true - # Clean up test containers - docker compose -f docker-compose-test.yml down -v 2>/dev/null || true + # Clean up test containers (use project name for consistency) + COMPOSE_PROJECT_NAME="speaker-recognition-test" docker compose -f docker-compose-test.yml down -v 2>/dev/null || true } # Set up signal traps for proper cleanup (but not EXIT to avoid double cleanup) @@ -124,6 +124,9 @@ uv sync --extra cpu --group test print_info "Environment variables configured for testing" +# Use unique project name to avoid conflicts with development environment +export COMPOSE_PROJECT_NAME="speaker-recognition-test" + # Clean test environment print_info "Cleaning test environment..." # Stop any existing test containers diff --git a/services.py b/services.py index 0deeff8a..0ffa014a 100755 --- a/services.py +++ b/services.py @@ -17,7 +17,7 @@ def load_config_yml(): """Load config.yml from repository root""" - config_path = Path(__file__).parent / 'config.yml' + config_path = Path(__file__).parent / 'config' / 'config.yml' if not config_path.exists(): return None @@ -25,7 +25,7 @@ def load_config_yml(): with open(config_path, 'r') as f: return yaml.safe_load(f) except Exception as e: - console.print(f"[yellow]โš ๏ธ Warning: Could not load config.yml: {e}[/yellow]") + console.print(f"[yellow]โš ๏ธ Warning: Could not load config/config.yml: {e}[/yellow]") return None SERVICES = { diff --git a/requirements.txt b/test-requirements.txt similarity index 100% rename from requirements.txt rename to test-requirements.txt diff --git a/tests/configs/README.md b/tests/configs/README.md new file mode 100644 index 00000000..8b1e196f --- /dev/null +++ b/tests/configs/README.md @@ -0,0 +1,132 @@ +# Test Configuration Files + +This directory contains configuration variants for testing different provider combinations. + +## Available Test Configs + +### `deepgram-openai.yml` - Cloud Services +- **STT**: Deepgram Nova 3 +- **LLM**: OpenAI GPT-4o-mini +- **Embedding**: OpenAI text-embedding-3-small +- **Memory**: Chronicle native +- **Use Case**: Cloud-based testing when API credits available +- **Required**: `DEEPGRAM_API_KEY`, `OPENAI_API_KEY` + +### `parakeet-ollama.yml` - Full Local Stack +- **STT**: Parakeet ASR (local) +- **LLM**: Ollama llama3.1:latest +- **Embedding**: Ollama nomic-embed-text +- **Memory**: Chronicle native +- **Use Case**: Offline testing, no API keys needed +- **Required**: Parakeet ASR running on port 8767, Ollama running + +### `full-local.yml` - Alias +Symlink to `parakeet-ollama.yml` for convenience. + +## Usage + +### With run-test.sh + +```bash +# Test with Deepgram + OpenAI (cloud) +CONFIG_FILE=../../tests/configs/deepgram-openai.yml ./backends/advanced/run-test.sh + +# Test with Parakeet + Ollama (local) +CONFIG_FILE=../../tests/configs/parakeet-ollama.yml ./backends/advanced/run-test.sh + +# Using the full-local alias +CONFIG_FILE=../../tests/configs/full-local.yml ./backends/advanced/run-test.sh +``` + +### With Docker Compose + +```bash +# From backends/advanced/ +CONFIG_FILE=../../tests/configs/deepgram-openai.yml docker compose -f docker-compose-test.yml up +``` + +### Matrix Testing + +Test all configurations: + +```bash +for cfg in tests/configs/*.yml; do + echo "Testing with: $cfg" + CONFIG_FILE=$cfg ./backends/advanced/run-test.sh || exit 1 +done +``` + +## Creating New Test Configs + +When creating a new test configuration: + +1. **Name it descriptively**: `{stt}-{llm}.yml` (e.g., `mistral-openai.yml`) +2. **Use environment variables**: Always use `${VAR:-default}` pattern for secrets +3. **Set appropriate defaults**: Update the `defaults:` section to match your provider combo +4. **Include only required models**: Don't include models that aren't used +5. **Document requirements**: Update this README with required environment variables + +### Example Structure + +```yaml +# tests/configs/example-config.yml +defaults: + llm: provider-llm + embedding: provider-embed + stt: stt-provider + vector_store: vs-qdrant + +models: + - name: provider-llm + model_type: llm + model_provider: your_provider + api_key: ${YOUR_API_KEY:-} + # ... model config + + - name: stt-provider + model_type: stt + model_provider: your_stt_provider + api_key: ${YOUR_STT_API_KEY:-} + # ... stt config + +memory: + provider: chronicle + # ... memory config +``` + +## Environment Variables + +Test configs use environment variable substitution to avoid hardcoding secrets: + +- **Pattern**: `${VAR_NAME:-default_value}` +- **Example**: `api_key: ${OPENAI_API_KEY:-}` (empty string if not set) +- **Example**: `model_url: ${PARAKEET_ASR_URL:-http://localhost:8767}` (fallback to default) + +### Required by Config + +**deepgram-openai.yml**: +- `DEEPGRAM_API_KEY` - Deepgram transcription API key +- `OPENAI_API_KEY` - OpenAI LLM and embeddings API key + +**parakeet-ollama.yml**: +- `PARAKEET_ASR_URL` (optional) - Defaults to `http://localhost:8767` +- No API keys needed (all local services) + +## Best Practices + +1. **Never hardcode secrets**: Always use environment variables +2. **Test locally first**: Verify config works before adding to repo +3. **Document dependencies**: Update this README with service requirements +4. **Keep configs minimal**: Only include models actually used in tests +5. **Version control**: Test configs are tracked (no secrets), backups are ignored + +## Adding More Combinations + +As you add support for new providers, create corresponding test configs: + +- `mistral-openai.yml` - Mistral Voxtral STT + OpenAI LLM +- `deepgram-ollama.yml` - Deepgram STT + Local Ollama LLM +- `parakeet-openai.yml` - Local Parakeet STT + OpenAI LLM +- etc. + +Each new config should follow the naming convention and documentation pattern above. diff --git a/tests/configs/deepgram-openai.yml b/tests/configs/deepgram-openai.yml new file mode 100644 index 00000000..4cae5e7a --- /dev/null +++ b/tests/configs/deepgram-openai.yml @@ -0,0 +1,84 @@ +# Test Configuration: Deepgram (STT) + OpenAI (LLM) +# Cloud-based services - recommended for CI/testing when API credits available + +defaults: + llm: openai-llm + embedding: openai-embed + stt: stt-deepgram + vector_store: vs-qdrant + +models: + - name: openai-llm + description: OpenAI GPT-4o-mini + model_type: llm + model_provider: openai + api_family: openai + model_name: gpt-4o-mini + model_url: https://api.openai.com/v1 + api_key: ${OPENAI_API_KEY:-} + model_params: + temperature: 0.2 + max_tokens: 2000 + model_output: json + + - name: openai-embed + description: OpenAI text-embedding-3-small + model_type: embedding + model_provider: openai + api_family: openai + model_name: text-embedding-3-small + model_url: https://api.openai.com/v1 + api_key: ${OPENAI_API_KEY:-} + embedding_dimensions: 1536 + model_output: vector + + - name: vs-qdrant + description: Qdrant vector database + model_type: vector_store + model_provider: qdrant + api_family: qdrant + model_url: http://${QDRANT_BASE_URL:-qdrant}:${QDRANT_PORT:-6333} + model_params: + host: ${QDRANT_BASE_URL:-qdrant} + port: ${QDRANT_PORT:-6333} + collection_name: omi_memories + + - name: stt-deepgram + description: Deepgram Nova 3 (batch) + model_type: stt + model_provider: deepgram + api_family: http + model_url: https://api.deepgram.com/v1 + api_key: ${DEEPGRAM_API_KEY:-} + operations: + stt_transcribe: + method: POST + path: /listen + headers: + Authorization: Token ${DEEPGRAM_API_KEY:-} + Content-Type: audio/raw + query: + model: nova-3 + language: multi + smart_format: 'true' + punctuate: 'true' + diarize: 'true' + encoding: linear16 + sample_rate: 16000 + channels: '1' + response: + type: json + extract: + text: results.channels[0].alternatives[0].transcript + words: results.channels[0].alternatives[0].words + segments: results.channels[0].alternatives[0].paragraphs.paragraphs + +memory: + provider: chronicle + timeout_seconds: 1200 + extraction: + enabled: true + prompt: | + Extract important information from this conversation and return a JSON object with an array named "facts". + Include personal preferences, plans, names, dates, locations, numbers, and key details. + Keep items concise and useful. diff --git a/tests/configs/full-local.yml b/tests/configs/full-local.yml new file mode 120000 index 00000000..d2e90934 --- /dev/null +++ b/tests/configs/full-local.yml @@ -0,0 +1 @@ +parakeet-ollama.yml \ No newline at end of file diff --git a/tests/configs/parakeet-ollama.yml b/tests/configs/parakeet-ollama.yml new file mode 100644 index 00000000..a4ef958d --- /dev/null +++ b/tests/configs/parakeet-ollama.yml @@ -0,0 +1,73 @@ +# Test Configuration: Parakeet (STT) + Ollama (LLM) +# Full local stack - no API keys needed, runs entirely offline + +defaults: + llm: local-llm + embedding: local-embed + stt: stt-parakeet-batch + vector_store: vs-qdrant + +models: + - name: local-llm + description: Local Ollama LLM + model_type: llm + model_provider: ollama + api_family: openai + model_name: llama3.1:latest + model_url: http://localhost:11434/v1 + api_key: ${OPENAI_API_KEY:-ollama} + model_params: + temperature: 0.2 + max_tokens: 2000 + model_output: json + + - name: local-embed + description: Local embeddings via Ollama nomic-embed-text + model_type: embedding + model_provider: ollama + api_family: openai + model_name: nomic-embed-text:latest + model_url: http://localhost:11434/v1 + api_key: ${OPENAI_API_KEY:-ollama} + embedding_dimensions: 768 + model_output: vector + + - name: vs-qdrant + description: Qdrant vector database + model_type: vector_store + model_provider: qdrant + api_family: qdrant + model_url: http://${QDRANT_BASE_URL:-qdrant}:${QDRANT_PORT:-6333} + model_params: + host: ${QDRANT_BASE_URL:-qdrant} + port: ${QDRANT_PORT:-6333} + collection_name: omi_memories + + - name: stt-parakeet-batch + description: Parakeet NeMo ASR (batch) - local offline transcription + model_type: stt + model_provider: parakeet + api_family: http + model_url: ${PARAKEET_ASR_URL:-http://localhost:8767} + api_key: '' + operations: + stt_transcribe: + method: POST + path: /transcribe + content_type: multipart/form-data + response: + type: json + extract: + text: text + words: words + segments: segments + +memory: + provider: chronicle + timeout_seconds: 1200 + extraction: + enabled: true + prompt: | + Extract important information from this conversation and return a JSON object with an array named "facts". + Include personal preferences, plans, names, dates, locations, numbers, and key details. + Keep items concise and useful. diff --git a/tests/configs/parakeet-openai.yml b/tests/configs/parakeet-openai.yml new file mode 100644 index 00000000..f3147c33 --- /dev/null +++ b/tests/configs/parakeet-openai.yml @@ -0,0 +1,73 @@ +# Test Configuration: Parakeet (STT) + OpenAI (LLM) +# Hybrid stack - local transcription, cloud LLM + +defaults: + llm: openai-llm + embedding: openai-embed + stt: stt-parakeet-batch + vector_store: vs-qdrant + +models: + - name: openai-llm + description: OpenAI GPT-4o-mini + model_type: llm + model_provider: openai + api_family: openai + model_name: gpt-4o-mini + model_url: https://api.openai.com/v1 + api_key: ${OPENAI_API_KEY:-} + model_params: + temperature: 0.2 + max_tokens: 2000 + model_output: json + + - name: openai-embed + description: OpenAI text-embedding-3-small + model_type: embedding + model_provider: openai + api_family: openai + model_name: text-embedding-3-small + model_url: https://api.openai.com/v1 + api_key: ${OPENAI_API_KEY:-} + embedding_dimensions: 1536 + model_output: vector + + - name: vs-qdrant + description: Qdrant vector database + model_type: vector_store + model_provider: qdrant + api_family: qdrant + model_url: http://${QDRANT_BASE_URL:-qdrant}:${QDRANT_PORT:-6333} + model_params: + host: ${QDRANT_BASE_URL:-qdrant} + port: ${QDRANT_PORT:-6333} + collection_name: omi_memories + + - name: stt-parakeet-batch + description: Parakeet NeMo ASR (batch) - local offline transcription + model_type: stt + model_provider: parakeet + api_family: http + model_url: ${PARAKEET_ASR_URL:-http://localhost:8767} + api_key: '' + operations: + stt_transcribe: + method: POST + path: /transcribe + content_type: multipart/form-data + response: + type: json + extract: + text: text + words: words + segments: segments + +memory: + provider: chronicle + timeout_seconds: 1200 + extraction: + enabled: true + prompt: | + Extract important information from this conversation and return a JSON object with an array named "facts". + Include personal preferences, plans, names, dates, locations, numbers, and key details. + Keep items concise and useful. diff --git a/tests/integration/integration_test.robot b/tests/integration/integration_test.robot index d564a54e..4b08381b 100644 --- a/tests/integration/integration_test.robot +++ b/tests/integration/integration_test.robot @@ -11,6 +11,8 @@ Resource ../setup/teardown_keywords.robot Resource ../resources/session_keywords.robot Resource ../resources/audio_keywords.robot Resource ../resources/conversation_keywords.robot +Resource ../resources/memory_keywords.robot +Resource ../resources/queue_keywords.robot Variables ../setup/test_env.py Variables ../setup/test_data.py Suite Setup Suite Setup @@ -127,6 +129,44 @@ Audio Playback And Segment Timing Test Log All ${segment_count} segments have valid timestamps (0s - ${last_end}s) INFO Log Audio Playback And Segment Timing Test Completed Successfully INFO +End To End Pipeline With Memory Validation Test + [Documentation] Complete E2E test with memory extraction and OpenAI quality validation. + ... Provides comprehensive integration testing of the entire audio processing pipeline. + ... Separate from other tests to avoid breaking existing upload-only tests. + [Tags] e2e memory + [Timeout] 600s + + Log Starting End-to-End Pipeline Test with Memory Validation INFO + + # Phase 1: Upload audio and wait for complete processing + Log Uploading audio file and waiting for full processing INFO + ${conversation} ${memories}= Upload Audio File And Wait For Memory + ... ${TEST_AUDIO_FILE} + ... ${TEST_DEVICE_NAME} + + Set Global Variable ${TEST_CONVERSATION} ${conversation} + + # Phase 2: Verify transcription quality + Log Verifying transcription quality INFO + Verify Transcription Quality ${TEST_CONVERSATION} ${EXPECTED_TRANSCRIPT} + + # Phase 3: Verify memories were extracted + ${memory_count}= Get Length ${memories} + Should Be True ${memory_count} > 0 No memories extracted + Log Extracted ${memory_count} memories INFO + + # Phase 4: Verify memory quality with OpenAI (matches Python test!) + Log Validating memory quality with OpenAI INFO + Verify Memory Quality With OpenAI ${memories} ${EXPECTED_MEMORIES} + + # Phase 5: Verify chat integration + Log Verifying chat integration INFO + Verify Chat Integration api ${TEST_CONVERSATION} + + Log End-to-End Pipeline Test Completed Successfully INFO + Log โœ… Transcript verified INFO + Log โœ… ${memory_count} memories extracted and validated with OpenAI INFO + *** Keywords *** diff --git a/tests/resources/audio_keywords.robot b/tests/resources/audio_keywords.robot index 82c3d782..2d37fcbc 100644 --- a/tests/resources/audio_keywords.robot +++ b/tests/resources/audio_keywords.robot @@ -69,6 +69,50 @@ Upload Audio File RETURN ${conversation} +Upload Audio File And Wait For Memory + [Documentation] Upload audio file and wait for complete processing including memory extraction. + ... This is for E2E testing - use Upload Audio File for upload-only tests. + ... Performs assertions inline to verify successful memory extraction. + [Arguments] ${audio_file_path} ${device_name}=robot-test ${folder}=. ${min_memories}=1 + + # Upload file (uses existing keyword) + ${conversation}= Upload Audio File ${audio_file_path} ${device_name} ${folder} + + # Get conversation ID to find memory job + ${conversation_id}= Set Variable ${conversation}[conversation_id] + Log Conversation ID: ${conversation_id} + + # Find memory job for this conversation + ${memory_jobs}= Get Jobs By Type And Conversation process_memory_job ${conversation_id} + Should Not Be Empty ${memory_jobs} No memory job found for conversation ${conversation_id} + + ${memory_job}= Set Variable ${memory_jobs}[0] + ${memory_job_id}= Set Variable ${memory_job}[job_id] + + Log Found memory job: ${memory_job_id} + + # Wait for memory extraction (returns result dictionary) + ${result}= Wait For Memory Extraction ${memory_job_id} + + # Verify memory extraction succeeded + Should Be True ${result}[success] + ... Memory extraction failed: ${result.get('error_message', 'Unknown error')} + + # Verify job completed successfully + Should Be Equal As Strings ${result}[status] completed + ... Expected job status 'completed', got '${result}[status]' + + # Verify minimum memories were extracted + ${memory_count}= Set Variable ${result}[memory_count] + Should Be True ${memory_count} >= ${min_memories} + ... Expected at least ${min_memories} memories, found ${memory_count} + + ${memories}= Set Variable ${result}[memories] + Log Successfully extracted ${memory_count} memories + + RETURN ${conversation} ${memories} + + Get Cropped Audio Info [Documentation] Get cropped audio information for a conversation [Arguments] ${audio_uuid} diff --git a/tests/resources/memory_keywords.robot b/tests/resources/memory_keywords.robot index 4a02c40e..2ab79d9c 100644 --- a/tests/resources/memory_keywords.robot +++ b/tests/resources/memory_keywords.robot @@ -104,3 +104,189 @@ Verify Memory Extraction Should Be True ${api_memory_count} >= ${min_memories} Insufficient API memories: ${api_memory_count} Log Memory extraction verified: conversation=${conv_memory_count}, api=${api_memory_count} INFO + + +Wait For Memory Extraction + [Documentation] Wait for memory job to complete and fetch extracted memories. + ... Returns a result dictionary with success status, job details, and memories. + ... Does not perform assertions - calling tests should verify the results. + ... + ... Return value structure: + ... { + ... 'success': True/False, + ... 'error_message': 'Error description' (only if success=False), + ... 'status': 'completed'/'failed'/'timeout'/'not_found', + ... 'job': {job object} (if available), + ... 'memories': [list of memories] (if successful), + ... 'memory_count': int (if successful) + ... } + [Arguments] ${memory_job_id} ${timeout}=120 + + Log Waiting for memory job ${memory_job_id} to complete... + + # 1. Check if job exists before waiting + ${job_status}= Get Job Status ${memory_job_id} + IF ${job_status} == ${None} + ${result}= Create Dictionary + ... success=${False} + ... error_message=Memory job ${memory_job_id} not found in queue + ... status=not_found + RETURN ${result} + END + + # 2. Check if job already failed + ${current_status}= Set Variable ${job_status}[status] + IF '${current_status}' == 'failed' + ${error_info}= Evaluate $job_status.get('exc_info', 'Unknown error') + ${result}= Create Dictionary + ... success=${False} + ... error_message=Memory job already failed: ${error_info} + ... status=failed + ... job=${job_status} + RETURN ${result} + END + + # 3. Wait for job completion with status monitoring + ${start_time}= Get Time epoch + ${end_time}= Evaluate ${start_time} + ${timeout} + ${final_job}= Set Variable ${job_status} + ${final_status}= Set Variable ${current_status} + + WHILE True + # Get current job status + ${job}= Get Job Status ${memory_job_id} + + # Handle job not found (e.g., expired from queue) + IF ${job} == ${None} + ${result}= Create Dictionary + ... success=${False} + ... error_message=Memory job ${memory_job_id} disappeared from queue during wait + ... status=not_found + ... job=${final_job} + RETURN ${result} + END + + ${status}= Set Variable ${job}[status] + ${final_job}= Set Variable ${job} + ${final_status}= Set Variable ${status} + + # Success case - job completed + IF '${status}' == 'completed' or '${status}' == 'finished' + Log Memory job completed successfully + BREAK + END + + # Failure case - job failed + IF '${status}' == 'failed' + ${error_info}= Evaluate $job.get('exc_info', 'Unknown error') + ${result}= Create Dictionary + ... success=${False} + ... error_message=Memory job failed during processing: ${error_info} + ... status=failed + ... job=${job} + RETURN ${result} + END + + # Timeout check + ${current_time}= Get Time epoch + IF ${current_time} >= ${end_time} + ${result}= Create Dictionary + ... success=${False} + ... error_message=Memory job did not complete within ${timeout}s (last status: ${status}) + ... status=timeout + ... job=${job} + RETURN ${result} + END + + # Log progress every iteration + Log Memory job status: ${status} (waiting...) DEBUG + + # Wait before next check + Sleep 5s + END + + # 4. Fetch memories from API + TRY + ${response}= GET On Session api /api/memories expected_status=200 + ${memories_data}= Set Variable ${response.json()} + ${memories}= Set Variable ${memories_data}[memories] + ${memory_count}= Get Length ${memories} + + # Return success result + ${result}= Create Dictionary + ... success=${True} + ... status=completed + ... job=${final_job} + ... memories=${memories} + ... memory_count=${memory_count} + + Log Successfully extracted ${memory_count} memories + RETURN ${result} + EXCEPT AS ${error} + # Return error if API fetch fails + ${result}= Create Dictionary + ... success=${False} + ... error_message=Failed to fetch memories from API: ${error} + ... status=api_error + ... job=${final_job} + RETURN ${result} + END + + +Check Memory Similarity With OpenAI + [Documentation] Use OpenAI to check if extracted memories match expected memories + [Arguments] ${actual_memories} ${expected_memories} ${openai_api_key} + + # Extract just the memory text from actual memories + ${actual_memory_texts}= Evaluate [mem.get('memory', '') for mem in $actual_memories] + + # Build OpenAI prompt (same as Python test) + ${prompt}= Catenate SEPARATOR=\n + ... Compare these two lists of memories to determine if they represent content from the same audio source. + ... + ... EXPECTED MEMORIES: + ... ${expected_memories} + ... + ... EXTRACTED MEMORIES: + ... ${actual_memory_texts} + ... + ... Respond in JSON format with: + ... {"similar": true/false, "reason": "brief explanation"} + + # Call OpenAI API + ${headers}= Create Dictionary Authorization=Bearer ${openai_api_key} Content-Type=application/json + ${payload}= Create Dictionary + ... model=gpt-4o-mini + ... messages=${{ [{"role": "user", "content": """${prompt}"""}] }} + ... response_format=${{ {"type": "json_object"} }} + + ${response}= POST https://api.openai.com/v1/chat/completions + ... headers=${headers} + ... json=${payload} + ... expected_status=200 + + ${result_json}= Set Variable ${response.json()} + ${content}= Set Variable ${result_json}[choices][0][message][content] + ${similarity_result}= Evaluate json.loads("""${content}""") json + + Log Memory similarity: ${similarity_result}[similar] INFO + Log Reason: ${similarity_result}[reason] INFO + + RETURN ${similarity_result} + + +Verify Memory Quality With OpenAI + [Documentation] Verify extracted memories match expected memories using OpenAI + [Arguments] ${actual_memories} ${expected_memories} + + # Get OpenAI API key from environment + ${openai_key}= Get Environment Variable OPENAI_API_KEY + + # Check similarity + ${result}= Check Memory Similarity With OpenAI ${actual_memories} ${expected_memories} ${openai_key} + + # Assert memories are similar + Should Be True ${result}[similar] == ${True} + ... Memory similarity check failed: ${result}[reason] + + Log โœ… Memory quality validated INFO diff --git a/tests/resources/queue_keywords.robot b/tests/resources/queue_keywords.robot index 32f8b7fa..3d709661 100644 --- a/tests/resources/queue_keywords.robot +++ b/tests/resources/queue_keywords.robot @@ -59,6 +59,7 @@ Get Job Status Check job status [Documentation] Check the status of a specific job by ID + ... Fails immediately if job is in 'failed' state when expecting 'completed' [Arguments] ${job_id} ${expected_status} ${job}= Get Job status ${job_id} @@ -69,6 +70,12 @@ Check job status ${actual_status}= Set Variable ${job}[status] Log Job ${job_id} status: ${actual_status} (expected: ${expected_status}) + # Fail fast if job is in failed state when we're expecting completed + IF '${actual_status}' == 'failed' and '${expected_status}' == 'completed' + ${error_msg}= Evaluate $job.get('exc_info') or $job.get('error', 'Unknown error') + Fail Job ${job_id} failed: ${error_msg} + END + Should Be Equal As Strings ${actual_status} ${expected_status} Job status is '${actual_status}', expected '${expected_status}' RETURN ${job} diff --git a/tests/run-custom.sh b/tests/run-custom.sh new file mode 100755 index 00000000..c1ce1317 --- /dev/null +++ b/tests/run-custom.sh @@ -0,0 +1,20 @@ +#!/bin/bash +# Quick wrapper for running Robot tests with custom configs +# Usage: ./run-custom.sh [parakeet-url] +# +# Examples: +# ./run-custom.sh parakeet-openai http://host.docker.internal:8767 +# ./run-custom.sh deepgram-openai +# ./run-custom.sh parakeet-ollama http://host.docker.internal:8767 + +set -e + +CONFIG_NAME="${1:-parakeet-openai}" +PARAKEET_URL="${2:-http://host.docker.internal:8767}" + +echo "Running Robot tests with config: ${CONFIG_NAME}" +echo "Parakeet ASR URL: ${PARAKEET_URL}" + +CONFIG_FILE="../tests/configs/${CONFIG_NAME}.yml" \ + PARAKEET_ASR_URL="${PARAKEET_URL}" \ + ./run-robot-tests.sh diff --git a/tests/run-robot-tests.sh b/tests/run-robot-tests.sh index 0c264875..462377ed 100755 --- a/tests/run-robot-tests.sh +++ b/tests/run-robot-tests.sh @@ -42,6 +42,16 @@ print_info "============================" CLEANUP_CONTAINERS="${CLEANUP_CONTAINERS:-true}" OUTPUTDIR="${OUTPUTDIR:-results}" +# Set default CONFIG_FILE if not provided +# This allows testing with different provider combinations +# Usage: CONFIG_FILE=../tests/configs/parakeet-ollama.yml ./run-robot-tests.sh +export CONFIG_FILE="${CONFIG_FILE:-../config/config.yml}" + +# Convert CONFIG_FILE to absolute path (Docker Compose resolves relative paths from compose file location) +if [[ ! "$CONFIG_FILE" = /* ]]; then + CONFIG_FILE="$(cd "$(dirname "$CONFIG_FILE")" && pwd)/$(basename "$CONFIG_FILE")" +fi + # Load environment variables (CI or local) if [ -f "setup/.env.test" ] && [ -z "$DEEPGRAM_API_KEY" ]; then print_info "Loading environment variables from setup/.env.test..." @@ -69,6 +79,7 @@ fi print_info "DEEPGRAM_API_KEY length: ${#DEEPGRAM_API_KEY}" print_info "OPENAI_API_KEY length: ${#OPENAI_API_KEY}" +print_info "Using config file: $CONFIG_FILE" # Create test environment file if it doesn't exist if [ ! -f "setup/.env.test" ]; then @@ -100,6 +111,9 @@ cd ../backends/advanced print_info "Starting test infrastructure..." +# Use unique project name to avoid conflicts with development environment +export COMPOSE_PROJECT_NAME="advanced-backend-test" + # Ensure required config files exist # memory_config.yaml no longer used; memory settings live in config.yml @@ -109,7 +123,7 @@ docker compose -f docker-compose-test.yml down -v 2>/dev/null || true # Force remove any stuck containers with test names print_info "Removing any stuck test containers..." -docker rm -f advanced-mongo-test-1 advanced-redis-test-1 advanced-qdrant-test-1 advanced-chronicle-backend-test-1 advanced-workers-test-1 advanced-webui-test-1 2>/dev/null || true +docker rm -f advanced-backend-test-mongo-test-1 advanced-backend-test-redis-test-1 advanced-backend-test-qdrant-test-1 advanced-backend-test-chronicle-backend-test-1 advanced-backend-test-workers-test-1 advanced-backend-test-webui-test-1 2>/dev/null || true # Start infrastructure services (MongoDB, Redis, Qdrant) print_info "Starting MongoDB, Redis, and Qdrant (fresh containers)..." diff --git a/tests/setup/test_data.py b/tests/setup/test_data.py index 787f0399..6d73b265 100644 --- a/tests/setup/test_data.py +++ b/tests/setup/test_data.py @@ -36,6 +36,20 @@ # Expected content for transcript quality verification EXPECTED_TRANSCRIPT = "glass blowing" +# Expected memories for DIY Glass Blowing audio (from Python integration test) +# Source: backends/advanced/tests/assets/expected_memories.json +EXPECTED_MEMORIES = [ + "Nick assists significantly in the glass blowing process", + "Excitement and nervousness expressed during the process", + "Furnace contains about 400 pounds of liquid glass", + "Choice of color for the flower is light blue", + "Caitlin is mentioned as a participant", + "Class involves making a trumpet flower", + "Gravity is used as a tool in glass blowing", + "Nick did most of the turning during the demonstration", + "The video is sponsored by Squarespace." +] + # Expected segment timestamps for DIY Glass Blowing audio (4-minute version, 500 chunks) # These are the cropped audio timestamps after silence removal # Updated 2025-01-22 based on actual test output with streaming websocket processing diff --git a/tests/setup/test_env.py b/tests/setup/test_env.py index fa3e0f9d..d11f2ff8 100644 --- a/tests/setup/test_env.py +++ b/tests/setup/test_env.py @@ -1,25 +1,23 @@ # Test Environment Configuration import os from pathlib import Path +from dotenv import load_dotenv -# Load .env file from backends/advanced directory if it exists -# This allows tests to work when run from VSCode or command line -def load_env_file(): - """Load environment variables from .env file if it exists.""" - # Look for .env in backends/advanced directory - env_file = Path(__file__).parent.parent.parent / "backends" / "advanced" / ".env" - if env_file.exists(): - with open(env_file) as f: - for line in f: - line = line.strip() - if line and not line.startswith('#') and '=' in line: - key, value = line.split('=', 1) - # Only set if not already in environment (CI takes precedence) - if key not in os.environ: - os.environ[key] = value - -# Load .env file (CI environment variables take precedence) -load_env_file() +# Load environment files with correct precedence: +# 1. Environment variables (highest priority - from shell, CI, etc.) +# 2. .env.test (test-specific configuration) +# 3. .env (default configuration) + +backend_dir = Path(__file__).parent.parent.parent / "backends" / "advanced" + +# Load in reverse order of precedence (since override=False won't overwrite existing vars) +# Load .env.test first (will set test-specific values) +load_dotenv(backend_dir / ".env.test", override=False) + +# Load .env second (will only fill in missing values, won't override .env.test or existing env vars) +load_dotenv(backend_dir / ".env", override=False) + +# Final precedence: environment variables > .env.test > .env # API Configuration API_URL = 'http://localhost:8001' # Use BACKEND_URL from test.env diff --git a/wizard.py b/wizard.py index f9802c88..d78a910c 100755 --- a/wizard.py +++ b/wizard.py @@ -4,6 +4,7 @@ Handles service selection and delegation only - no configuration duplication """ +import shutil import subprocess import sys from datetime import datetime @@ -314,19 +315,20 @@ def setup_git_hooks(): console.print(f"โš ๏ธ [yellow]Could not setup git hooks: {e} (optional)[/yellow]") def setup_config_file(): - """Setup config.yml from template if it doesn't exist""" - config_file = Path("config.yml") - config_template = Path("config.yml.template") + """Setup config/config.yml from template if it doesn't exist""" + config_file = Path("config/config.yml") + config_template = Path("config/config.yml.template") if not config_file.exists(): if config_template.exists(): - import shutil + # Ensure config/ directory exists + config_file.parent.mkdir(parents=True, exist_ok=True) shutil.copy(config_template, config_file) - console.print("โœ… [green]Created config.yml from template[/green]") + console.print("โœ… [green]Created config/config.yml from template[/green]") else: - console.print("โš ๏ธ [yellow]config.yml.template not found, skipping config setup[/yellow]") + console.print("โš ๏ธ [yellow]config/config.yml.template not found, skipping config setup[/yellow]") else: - console.print("โ„น๏ธ [blue]config.yml already exists, keeping existing configuration[/blue]") + console.print("โ„น๏ธ [blue]config/config.yml already exists, keeping existing configuration[/blue]") def main(): """Main orchestration logic""" From 4a4105a73670f8056844e7d13bbe6783ab078ddf Mon Sep 17 00:00:00 2001 From: Ankush Malaker <43288948+AnkushMalaker@users.noreply.github.com> Date: Sat, 3 Jan 2026 02:54:20 +0530 Subject: [PATCH 4/9] Enhance speaker recognition service integration and error handling (#245) * Enhance speaker recognition service integration and error handling - Updated `docker-compose-test.yml` to enable speaker recognition in the test environment and added a new `speaker-service-test` service for testing purposes. - Refactored `run-test.sh` to improve the execution of Robot Framework tests from the repository root. - Enhanced error handling in `speaker_recognition_client.py` to return detailed error messages for connection issues. - Improved error logging in `speaker_jobs.py` to handle and report errors from the speaker recognition service more effectively. - Updated `Dockerfile` to copy the full source code after dependencies are cached, ensuring all necessary files are included in the image. * Remove integration tests workflow and enhance robot tests with HF_TOKEN verification - Deleted the `integration-tests.yml` workflow file to streamline CI processes. - Updated `robot-tests.yml` to include verification for the new `HF_TOKEN` secret, ensuring all required secrets are checked before running tests. * Fix key access in system admin tests to use string indexing for speakers data * Refactor Robot Framework tests and enhance error handling in memory services - Removed the creation of the test environment file from the GitHub Actions workflow to streamline setup. - Updated the Robot Framework tests to utilize a unified test script for improved consistency. - Enhanced error messages in the MemoryService class to provide more context on connection failures for LLM and vector store providers. - Added critical checks for API key presence in the OpenAIProvider class to ensure valid credentials are provided before proceeding. - Adjusted various test setup scripts to use a centralized BACKEND_DIR variable for better maintainability and clarity. * Refactor test container cleanup in run-robot-tests.sh - Updated the script to dynamically construct container names from docker-compose services, improving maintainability and reducing hardcoded values. - Enhanced the cleanup process for stuck test containers by utilizing the COMPOSE_PROJECT_NAME variable. --- .github/workflows/integration-tests.yml | 83 ---------- .github/workflows/robot-tests.yml | 145 ++---------------- backends/advanced/docker-compose-test.yml | 45 +++++- backends/advanced/run-test.sh | 8 +- .../services/memory/providers/chronicle.py | 11 +- .../memory/providers/llm_providers.py | 45 ++++-- .../speaker_recognition_client.py | 16 +- .../workers/speaker_jobs.py | 41 ++++- backends/advanced/start-workers.sh | 18 ++- extras/speaker-recognition/Dockerfile | 3 + tests/Makefile | 8 +- tests/endpoints/rq_queue_tests.robot | 6 +- tests/endpoints/system_admin_tests.robot | 2 +- tests/run-robot-tests.sh | 16 +- tests/setup/setup_keywords.robot | 14 +- tests/setup/teardown_keywords.robot | 8 +- tests/setup/test_env.py | 9 +- tests/setup/test_manager_keywords.robot | 8 +- .../test-requirements.txt | 1 + 19 files changed, 198 insertions(+), 289 deletions(-) delete mode 100644 .github/workflows/integration-tests.yml rename test-requirements.txt => tests/test-requirements.txt (89%) diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml deleted file mode 100644 index ce2677b4..00000000 --- a/.github/workflows/integration-tests.yml +++ /dev/null @@ -1,83 +0,0 @@ -name: Integration Tests - -on: - push: - branches: [ main, develop ] - paths: - - 'backends/advanced/src/**' - - 'backends/advanced/run-test.sh' - - '.github/workflows/integration-tests.yml' - pull_request: - branches: [ main, develop ] - paths: - - 'backends/advanced/src/**' - - 'backends/advanced/run-test.sh' - - '.github/workflows/integration-tests.yml' - -jobs: - integration-tests: - runs-on: ubuntu-latest - timeout-minutes: 20 - - services: - docker: - image: docker:dind - options: --privileged - - steps: - - name: Checkout code - uses: actions/checkout@v4 - - - name: Verify required secrets - env: - DEEPGRAM_API_KEY: ${{ secrets.DEEPGRAM_API_KEY }} - OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} - run: | - echo "Verifying required secrets..." - if [ -z "$DEEPGRAM_API_KEY" ]; then - echo "โŒ ERROR: DEEPGRAM_API_KEY secret is not set" - exit 1 - fi - if [ -z "$OPENAI_API_KEY" ]; then - echo "โŒ ERROR: OPENAI_API_KEY secret is not set" - exit 1 - fi - echo "โœ“ DEEPGRAM_API_KEY is set (length: ${#DEEPGRAM_API_KEY})" - echo "โœ“ OPENAI_API_KEY is set (length: ${#OPENAI_API_KEY})" - echo "โœ“ All required secrets verified" - - - name: Set up Docker Buildx - uses: docker/setup-buildx-action@v3 - - - name: Install uv - uses: astral-sh/setup-uv@v4 - with: - version: "latest" - - - name: Set up Python - uses: actions/setup-python@v5 - with: - python-version: "3.12" - - - name: Install PortAudio dependencies - run: sudo apt-get update && sudo apt-get install -y portaudio19-dev - - - name: Run Advanced Backend Integration Tests - env: - DEEPGRAM_API_KEY: ${{ secrets.DEEPGRAM_API_KEY }} - OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} - run: | - cd backends/advanced - chmod +x run-test.sh - ./run-test.sh - - - name: Upload test logs on failure - if: failure() - uses: actions/upload-artifact@v4 - with: - name: integration-test-logs - path: | - backends/advanced/test_integration.log - backends/advanced/docker-compose-test.yml - backends/advanced/.env.test - retention-days: 7 \ No newline at end of file diff --git a/.github/workflows/robot-tests.yml b/.github/workflows/robot-tests.yml index bac4c65a..3333266d 100644 --- a/.github/workflows/robot-tests.yml +++ b/.github/workflows/robot-tests.yml @@ -28,6 +28,7 @@ jobs: env: DEEPGRAM_API_KEY: ${{ secrets.DEEPGRAM_API_KEY }} OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + HF_TOKEN: ${{ secrets.HF_TOKEN }} run: | echo "Verifying required secrets..." if [ -z "$DEEPGRAM_API_KEY" ]; then @@ -38,8 +39,13 @@ jobs: echo "โŒ ERROR: OPENAI_API_KEY secret is not set" exit 1 fi + if [ -z "$HF_TOKEN" ]; then + echo "โŒ ERROR: HF_TOKEN secret is not set" + exit 1 + fi echo "โœ“ DEEPGRAM_API_KEY is set (length: ${#DEEPGRAM_API_KEY})" echo "โœ“ OPENAI_API_KEY is set (length: ${#OPENAI_API_KEY})" + echo "โœ“ HF_TOKEN is set (length: ${#HF_TOKEN})" echo "โœ“ All required secrets verified" - name: Set up Docker Buildx @@ -71,28 +77,6 @@ jobs: run: | uv pip install --system robotframework robotframework-requests python-dotenv websockets - - name: Create test environment file - working-directory: tests/setup - run: | - cat > .env.test << EOF - # API URLs - API_URL=http://localhost:8001 - BACKEND_URL=http://localhost:8001 - FRONTEND_URL=http://localhost:3001 - - # Test Admin Credentials - ADMIN_EMAIL=test-admin@example.com - ADMIN_PASSWORD=test-admin-password-123 - - # API Keys (from GitHub secrets) - OPENAI_API_KEY=${{ secrets.OPENAI_API_KEY }} - DEEPGRAM_API_KEY=${{ secrets.DEEPGRAM_API_KEY }} - - # Test Configuration - TEST_TIMEOUT=120 - TEST_DEVICE_NAME=robot-test - EOF - - name: Create test config.yml run: | echo "Copying test configuration file..." @@ -101,122 +85,17 @@ jobs: echo "โœ“ Test config.yml created from tests/configs/deepgram-openai.yml" ls -lh config/config.yml - - name: Start test environment - working-directory: backends/advanced - env: - DEEPGRAM_API_KEY: ${{ secrets.DEEPGRAM_API_KEY }} - OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} - LLM_PROVIDER: openai - TRANSCRIPTION_PROVIDER: deepgram - MEMORY_PROVIDER: friend_lite - run: | - # Debug: Check if secrets are available - echo "Checking environment variables..." - echo "DEEPGRAM_API_KEY is set: $([ -n "$DEEPGRAM_API_KEY" ] && echo 'YES' || echo 'NO')" - echo "OPENAI_API_KEY is set: $([ -n "$OPENAI_API_KEY" ] && echo 'YES' || echo 'NO')" - echo "LLM_PROVIDER: $LLM_PROVIDER" - echo "TRANSCRIPTION_PROVIDER: $TRANSCRIPTION_PROVIDER" - - # Clean any existing test containers for fresh start - echo "Cleaning up any existing test containers..." - docker compose -f docker-compose-test.yml down -v || true - - # Start ALL services in parallel - Docker Compose handles dependencies via healthchecks - echo "Starting all services in parallel (docker-compose-test.yml)..." - echo "Note: Using test compose file with source mounts for faster startup" - - # Export API keys so docker-compose can use them - export DEEPGRAM_API_KEY - export OPENAI_API_KEY - export LLM_PROVIDER - export TRANSCRIPTION_PROVIDER - export MEMORY_PROVIDER - - DOCKER_BUILDKIT=0 docker compose -f docker-compose-test.yml up -d - - # Show container status - echo "Container status:" - docker compose -f docker-compose-test.yml ps - - # Single wait for backend readiness (backend depends_on ensures infra is ready) - echo "Waiting for backend readiness (up to 120s)..." - for i in {1..40}; do - if curl -s http://localhost:8001/readiness > /dev/null 2>&1; then - echo "โœ“ Backend is ready (all dependencies satisfied)" - break - fi - # Show logs every 10 attempts to help debug - if [ $((i % 10)) -eq 0 ]; then - echo "Still waiting... showing recent logs:" - docker compose -f docker-compose-test.yml logs --tail=20 chronicle-backend-test - fi - if [ $i -eq 40 ]; then - echo "โœ— Backend failed to start - showing full logs:" - docker compose -f docker-compose-test.yml logs - exit 1 - fi - echo "Attempt $i/40..." - sleep 3 - done - - echo "โœ“ Backend is ready!" - - # Verify workers are registered with Redis (Robot tests need stable workers) - echo "Waiting for workers to register with Redis (up to 60s)..." - for i in {1..30}; do - WORKER_COUNT=$(docker compose -f docker-compose-test.yml exec -T workers-test uv run python -c 'from rq import Worker; from redis import Redis; import os; r = Redis.from_url(os.getenv("REDIS_URL", "redis://redis-test:6379/0")); print(len(Worker.all(connection=r)))' 2>/dev/null || echo "0") - - if [ "$WORKER_COUNT" -ge 6 ]; then - echo "โœ“ Found $WORKER_COUNT workers registered" - # Show worker details - docker compose -f docker-compose-test.yml exec -T workers-test uv run python -c 'from rq import Worker; from redis import Redis; import os; r = Redis.from_url(os.getenv("REDIS_URL", "redis://redis-test:6379/0")); workers = Worker.all(connection=r); print(f"Total registered workers: {len(workers)}"); [print(f" - {w.name}: queues={w.queue_names()}, state={w.get_state()}") for w in workers]' - break - fi - - if [ $i -eq 30 ]; then - echo "โœ— Workers failed to register after 60s" - echo "Showing worker logs:" - docker compose -f docker-compose-test.yml logs --tail=50 workers-test - exit 1 - fi - - echo "Attempt $i/30: $WORKER_COUNT workers registered (waiting for 6+)..." - sleep 2 - done - - echo "โœ“ All services ready!" - - - name: Verify checked out code - working-directory: tests - run: | - echo "Current git commit:" - git log -1 --oneline - echo "" - echo "Test files in current checkout:" - find . -name "*.robot" -type f | head -10 - echo "" - echo "Sample of tags in test files:" - grep -h "\[Tags\]" endpoints/*.robot infrastructure/*.robot integration/*.robot 2>/dev/null | head -20 || echo "No tag files found" - - - name: Clean previous test results - working-directory: tests - run: | - echo "Cleaning any previous test results..." - rm -rf results - mkdir -p results - echo "โœ“ Fresh results directory created" - - name: Run Robot Framework tests working-directory: tests env: - # Required for backend imports in test libraries - OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} - OPENAI_BASE_URL: https://api.openai.com/v1 - OPENAI_MODEL: gpt-4o-mini + # Required for test runner script DEEPGRAM_API_KEY: ${{ secrets.DEEPGRAM_API_KEY }} + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + HF_TOKEN: ${{ secrets.HF_TOKEN }} + CLEANUP_CONTAINERS: "false" # Don't cleanup in CI - handled by workflow run: | - # Run all tests (don't fail workflow to allow artifact upload) - make all OUTPUTDIR=results + # Use the unified test script that mirrors local development + ./run-robot-tests.sh TEST_EXIT_CODE=$? echo "test_exit_code=$TEST_EXIT_CODE" >> $GITHUB_ENV exit 0 # Don't fail here, we'll fail at the end after uploading artifacts diff --git a/backends/advanced/docker-compose-test.yml b/backends/advanced/docker-compose-test.yml index 3b0e1eaf..c699bc6b 100644 --- a/backends/advanced/docker-compose-test.yml +++ b/backends/advanced/docker-compose-test.yml @@ -39,9 +39,9 @@ services: - OPENMEMORY_USER_ID=${OPENMEMORY_USER_ID:-openmemory} - MYCELIA_URL=http://mycelia-backend-test:5173 - MYCELIA_DB=mycelia_test - # Disable speaker recognition in test environment to prevent segment duplication + # Enable speaker recognition in test environment - DISABLE_SPEAKER_RECOGNITION=false - - SPEAKER_SERVICE_URL=https://localhost:8085 + - SPEAKER_SERVICE_URL=http://speaker-service-test:8085 - CORS_ORIGINS=http://localhost:3001,http://localhost:8001,https://localhost:3001,https://localhost:8001 # Set low inactivity timeout for tests (2 seconds instead of 60) - SPEECH_INACTIVITY_THRESHOLD_SECONDS=2 @@ -54,6 +54,8 @@ services: condition: service_healthy redis-test: condition: service_started + speaker-service-test: + condition: service_healthy healthcheck: test: ["CMD", "curl", "-f", "http://localhost:8000/readiness"] interval: 10s @@ -119,6 +121,36 @@ services: timeout: 3s retries: 5 + speaker-service-test: + build: + context: ../../extras/speaker-recognition + dockerfile: Dockerfile + args: + PYTORCH_CUDA_VERSION: cpu + image: speaker-recognition-test:latest + ports: + - "8086:8085" # Avoid conflict with dev speaker service on 8085 + volumes: + - ../../extras/speaker-recognition/src:/app/src + - ../../extras/speaker-recognition/model_cache:/models + - ../../extras/speaker-recognition/audio_chunks:/app/audio_chunks + - ../../extras/speaker-recognition/debug:/app/debug + - ../../extras/speaker-recognition/speaker_data:/app/data + environment: + - HF_HOME=/models + - HF_TOKEN=${HF_TOKEN} + - SIMILARITY_THRESHOLD=0.15 + - SPEAKER_SERVICE_HOST=0.0.0.0 + - SPEAKER_SERVICE_PORT=8085 + - DEEPGRAM_API_KEY=${DEEPGRAM_API_KEY} + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:8085/health"] + interval: 30s + timeout: 10s + retries: 5 + start_period: 60s + restart: unless-stopped + workers-test: build: context: . @@ -151,7 +183,7 @@ services: - MYCELIA_URL=http://mycelia-backend-test:5173 - MYCELIA_DB=mycelia_test - DISABLE_SPEAKER_RECOGNITION=false - - SPEAKER_SERVICE_URL=https://localhost:8085 + - SPEAKER_SERVICE_URL=http://speaker-service-test:8085 # Set low inactivity timeout for tests (2 seconds instead of 60) - SPEECH_INACTIVITY_THRESHOLD_SECONDS=2 # Wait for audio queue to drain before timing out (test mode) @@ -165,6 +197,8 @@ services: condition: service_started qdrant-test: condition: service_started + speaker-service-test: + condition: service_healthy restart: unless-stopped # Mycelia - AI memory and timeline service (test environment) @@ -237,6 +271,11 @@ services: # condition: service_healthy # restart: unless-stopped +# Use default bridge network for test isolation (no external network dependency) +networks: + default: + driver: bridge + # CI Considerations (for future implementation): # - GitHub Actions can run these services in isolated containers # - Port conflicts won't exist in CI since each job runs in isolation diff --git a/backends/advanced/run-test.sh b/backends/advanced/run-test.sh index 17773dc1..01204be6 100755 --- a/backends/advanced/run-test.sh +++ b/backends/advanced/run-test.sh @@ -166,9 +166,8 @@ if [ ! -f "diarization_config.json" ] && [ -f "diarization_config.json.template" print_success "diarization_config.json created" fi -# Install dependencies with uv -print_info "Installing dependencies with uv..." -uv sync --dev --group test +# Note: Robot Framework dependencies are managed via tests/test-requirements.txt +# The integration tests use Docker containers for service dependencies # Set up environment variables for testing print_info "Setting up test environment variables..." @@ -211,8 +210,9 @@ export DOCKER_BUILDKIT=0 export TEST_MODE=dev # Run the Robot Framework integration tests with extended timeout (mem0 needs time for comprehensive extraction) +# IMPORTANT: Robot tests must be run from the repository root where backends/ and tests/ are siblings print_info "Starting Robot Framework integration tests (timeout: 15 minutes)..." -if timeout 900 uv run robot --outputdir ../../test-results --loglevel INFO ../../tests/integration/integration_test.robot; then +if (cd ../.. && timeout 900 robot --outputdir test-results --loglevel INFO tests/integration/integration_test.robot); then print_success "Integration tests completed successfully!" else TEST_EXIT_CODE=$? diff --git a/backends/advanced/src/advanced_omi_backend/services/memory/providers/chronicle.py b/backends/advanced/src/advanced_omi_backend/services/memory/providers/chronicle.py index a0974e21..3fb96f00 100644 --- a/backends/advanced/src/advanced_omi_backend/services/memory/providers/chronicle.py +++ b/backends/advanced/src/advanced_omi_backend/services/memory/providers/chronicle.py @@ -84,9 +84,16 @@ async def initialize(self) -> None: vector_ok = await self.vector_store.test_connection() if not llm_ok: - raise RuntimeError("LLM provider connection failed") + raise RuntimeError( + f"LLM provider connection failed for {self.config.llm_provider.value}. " + f"Check API keys, network connectivity, and service availability. " + f"Memory processing cannot proceed without a working LLM connection." + ) if not vector_ok: - raise RuntimeError("Vector store connection failed") + raise RuntimeError( + f"Vector store connection failed for {self.config.vector_store_provider.value}. " + f"Check that Qdrant service is running and accessible." + ) self._initialized = True memory_logger.info( diff --git a/backends/advanced/src/advanced_omi_backend/services/memory/providers/llm_providers.py b/backends/advanced/src/advanced_omi_backend/services/memory/providers/llm_providers.py index f72bdb81..a73f1bc8 100644 --- a/backends/advanced/src/advanced_omi_backend/services/memory/providers/llm_providers.py +++ b/backends/advanced/src/advanced_omi_backend/services/memory/providers/llm_providers.py @@ -8,15 +8,12 @@ memory action proposals using their respective APIs. """ +import asyncio import json import logging import os -import httpx from typing import Any, Dict, List, Optional -# TODO: Re-enable spacy when Docker build is fixed -# import spacy - from ..base import LLMProviderBase from ..prompts import ( FACT_RETRIEVAL_PROMPT, @@ -30,10 +27,14 @@ ) from ..utils import extract_json_from_text +# TODO: Re-enable spacy when Docker build is fixed +# import spacy + + memory_logger = logging.getLogger("memory_service") # New: config-driven model registry + universal client -from advanced_omi_backend.model_registry import get_models_registry, ModelDef +from advanced_omi_backend.model_registry import ModelDef, get_models_registry def _is_langfuse_enabled() -> bool: @@ -62,7 +63,7 @@ def _get_openai_client(api_key: str, base_url: str, is_async: bool = False): memory_logger.debug("Using OpenAI client with Langfuse tracing") else: # Use regular OpenAI client without tracing - from openai import OpenAI, AsyncOpenAI + from openai import AsyncOpenAI, OpenAI openai = type('OpenAI', (), {'OpenAI': OpenAI, 'AsyncOpenAI': AsyncOpenAI})() memory_logger.debug("Using OpenAI client without tracing") @@ -174,18 +175,33 @@ def __init__(self, config: Dict[str, Any]): if not self.llm_def: raise RuntimeError("No default LLM defined in config.yml") + # Store parameters for LLM self.api_key = self.llm_def.api_key or "" self.base_url = self.llm_def.model_url self.model = self.llm_def.model_name self.temperature = float(self.llm_def.model_params.get("temperature", 0.1)) self.max_tokens = int(self.llm_def.model_params.get("max_tokens", 2000)) - + # Store parameters for embeddings (use separate config if available) self.embedding_model = (self.embed_def.model_name if self.embed_def else self.llm_def.model_name) self.embedding_api_key = (self.embed_def.api_key if self.embed_def else self.api_key) self.embedding_base_url = (self.embed_def.model_url if self.embed_def else self.base_url) - + + # CRITICAL: Validate API keys are present - fail fast instead of hanging + if not self.api_key or self.api_key.strip() == "": + raise RuntimeError( + f"API key is missing or empty for LLM provider '{self.llm_def.model_provider}' (model: {self.model}). " + f"Please set the API key in config.yml or environment variables. " + f"Cannot proceed without valid API credentials." + ) + + if self.embed_def and (not self.embedding_api_key or self.embedding_api_key.strip() == ""): + raise RuntimeError( + f"API key is missing or empty for embedding provider '{self.embed_def.model_provider}' (model: {self.embedding_model}). " + f"Please set the API key in config.yml or environment variables." + ) + # Lazy client creation self._client = None @@ -285,20 +301,21 @@ async def generate_embeddings(self, texts: List[str]) -> List[List[float]]: raise async def test_connection(self) -> bool: - """Test OpenAI connection. + """Test OpenAI connection with timeout. Returns: True if connection successful, False otherwise """ + try: - try: + # Add 10-second timeout to prevent hanging on API calls + async with asyncio.timeout(10): client = _get_openai_client(api_key=self.api_key, base_url=self.base_url, is_async=True) await client.models.list() return True - except Exception as e: - memory_logger.error(f"OpenAI connection test failed: {e}") - return False - + except asyncio.TimeoutError: + memory_logger.error(f"OpenAI connection test timed out after 10s - check network connectivity and API endpoint") + return False except Exception as e: memory_logger.error(f"OpenAI connection test failed: {e}") return False diff --git a/backends/advanced/src/advanced_omi_backend/speaker_recognition_client.py b/backends/advanced/src/advanced_omi_backend/speaker_recognition_client.py index 50b12645..99c9e594 100644 --- a/backends/advanced/src/advanced_omi_backend/speaker_recognition_client.py +++ b/backends/advanced/src/advanced_omi_backend/speaker_recognition_client.py @@ -144,16 +144,16 @@ async def diarize_identify_match( except ClientConnectorError as e: logger.error(f"๐ŸŽค Failed to connect to speaker recognition service: {e}") - return {} + return {"error": "connection_failed", "message": str(e), "segments": []} except asyncio.TimeoutError as e: logger.error(f"๐ŸŽค Timeout connecting to speaker recognition service: {e}") - return {} + return {"error": "timeout", "message": str(e), "segments": []} except aiohttp.ClientError as e: logger.warning(f"๐ŸŽค Client error during speaker recognition: {e}") - return {} + return {"error": "client_error", "message": str(e), "segments": []} except Exception as e: logger.error(f"๐ŸŽค Error during speaker recognition: {e}") - return {} + return {"error": "unknown_error", "message": str(e), "segments": []} async def diarize_and_identify( self, audio_path: str, words: None, user_id: Optional[str] = None # NOT IMPLEMENTED @@ -265,18 +265,18 @@ async def diarize_and_identify( except ClientConnectorError as e: logger.error(f"๐ŸŽค [DIARIZE] โŒ Failed to connect to speaker recognition service at {self.service_url}: {e}") - return {} + return {"error": "connection_failed", "message": str(e), "segments": []} except asyncio.TimeoutError as e: logger.error(f"๐ŸŽค [DIARIZE] โŒ Timeout connecting to speaker recognition service: {e}") - return {} + return {"error": "timeout", "message": str(e), "segments": []} except aiohttp.ClientError as e: logger.warning(f"๐ŸŽค [DIARIZE] โŒ Client error during speaker recognition: {e}") - return {} + return {"error": "client_error", "message": str(e), "segments": []} except Exception as e: logger.error(f"๐ŸŽค [DIARIZE] โŒ Error during speaker diarization and identification: {e}") import traceback logger.debug(traceback.format_exc()) - return {} + return {"error": "unknown_error", "message": str(e), "segments": []} async def identify_speakers(self, audio_path: str, segments: List[Dict]) -> Dict[str, str]: """ diff --git a/backends/advanced/src/advanced_omi_backend/workers/speaker_jobs.py b/backends/advanced/src/advanced_omi_backend/workers/speaker_jobs.py index d9165b2d..066d05c5 100644 --- a/backends/advanced/src/advanced_omi_backend/workers/speaker_jobs.py +++ b/backends/advanced/src/advanced_omi_backend/workers/speaker_jobs.py @@ -58,6 +58,23 @@ async def check_enrolled_speakers_job( transcription_results=raw_results ) + # Check for errors from speaker service + if speaker_result and speaker_result.get("error"): + error_type = speaker_result.get("error") + error_message = speaker_result.get("message", "Unknown error") + logger.error(f"๐ŸŽค [SPEAKER CHECK] Speaker service error: {error_type} - {error_message}") + + # Fail the job - don't create conversation if speaker service failed + return { + "success": False, + "session_id": session_id, + "error": f"Speaker recognition failed: {error_type}", + "error_details": error_message, + "enrolled_present": False, + "identified_speakers": [], + "processing_time_seconds": time.time() - start_time + } + # Extract identified speakers identified_speakers = [] if speaker_result and "segments" in speaker_result: @@ -206,7 +223,29 @@ async def recognise_speakers_job( user_id=user_id ) - if not speaker_result or "segments" not in speaker_result: + # Check for errors from speaker service + if speaker_result.get("error"): + error_type = speaker_result.get("error") + error_message = speaker_result.get("message", "Unknown error") + logger.error(f"๐ŸŽค Speaker recognition service error: {error_type} - {error_message}") + + # Raise exception for connection failures so dependent jobs are canceled + # This ensures RQ marks the job as "failed" instead of "completed" + if error_type in ("connection_failed", "timeout", "client_error"): + raise RuntimeError(f"Speaker recognition service unavailable: {error_type} - {error_message}") + + # For other errors (e.g., processing errors), return error dict without failing + return { + "success": False, + "conversation_id": conversation_id, + "version_id": version_id, + "error": f"Speaker recognition failed: {error_type}", + "error_details": error_message, + "processing_time_seconds": time.time() - start_time + } + + # Service worked but found no segments (legitimate empty result) + if not speaker_result or "segments" not in speaker_result or not speaker_result["segments"]: logger.warning(f"๐ŸŽค Speaker recognition returned no segments") return { "success": True, diff --git a/backends/advanced/start-workers.sh b/backends/advanced/start-workers.sh index 2ed50727..3fea5a39 100755 --- a/backends/advanced/start-workers.sh +++ b/backends/advanced/start-workers.sh @@ -181,12 +181,14 @@ monitor_worker_health & MONITOR_PID=$! echo "๐Ÿฉบ Self-healing monitor started: PID $MONITOR_PID" -# Wait for any worker process to exit -wait -n - -# If we get here, one worker process has exited - kill everything -echo "โš ๏ธ One worker exited, stopping all workers..." -kill $MONITOR_PID 2>/dev/null || true +# Keep the script running and let the self-healing monitor handle worker failures +# Don't use wait -n (fail-fast on first worker exit) - this kills all workers when one fails +# Instead, wait for the monitor process or explicit shutdown signal +echo "โณ Workers running - self-healing monitor will restart failed workers automatically" +wait $MONITOR_PID + +# If monitor exits (should only happen on SIGTERM/SIGINT), shut down gracefully +echo "๐Ÿ›‘ Monitor exited, shutting down all workers..." kill $RQ_WORKER_1_PID 2>/dev/null || true kill $RQ_WORKER_2_PID 2>/dev/null || true kill $RQ_WORKER_3_PID 2>/dev/null || true @@ -198,5 +200,5 @@ kill $AUDIO_PERSISTENCE_WORKER_PID 2>/dev/null || true [ -n "$AUDIO_STREAM_PARAKEET_WORKER_PID" ] && kill $AUDIO_STREAM_PARAKEET_WORKER_PID 2>/dev/null || true wait -echo "๐Ÿ”„ All workers stopped" -exit 1 +echo "โœ… All workers stopped gracefully" +exit 0 diff --git a/extras/speaker-recognition/Dockerfile b/extras/speaker-recognition/Dockerfile index d14baf35..0c1ccc81 100644 --- a/extras/speaker-recognition/Dockerfile +++ b/extras/speaker-recognition/Dockerfile @@ -31,6 +31,9 @@ COPY src/simple_speaker_recognition/__init__.py src/simple_speaker_recognition/ RUN uv sync --no-dev --extra ${PYTORCH_CUDA_VERSION} && \ uv cache clean +# Copy the full source code (after dependencies are cached) +COPY src/ src/ + # Create directories RUN mkdir -p /app/audio_chunks /app/debug /app/data /models diff --git a/tests/Makefile b/tests/Makefile index 707743e4..8ba002f5 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -31,7 +31,7 @@ help: # Creates a persistent fixture conversation that won't be deleted between suites all: @echo "Running all tests..." - CREATE_FIXTURE=true uv run robot --outputdir $(OUTPUTDIR) \ + CREATE_FIXTURE=true uv run --with-requirements test-requirements.txt robot --outputdir $(OUTPUTDIR) \ --name "All Tests" \ --console verbose \ $(TEST_DIR) @@ -39,7 +39,7 @@ all: # Run only endpoint tests endpoints: @echo "Running endpoint tests..." - uv run robot --outputdir $(OUTPUTDIR) \ + uv run --with-requirements test-requirements.txt robot --outputdir $(OUTPUTDIR) \ --name "Endpoint Tests" \ --console verbose \ endpoints @@ -47,7 +47,7 @@ endpoints: # Run only integration tests integration: @echo "Running integration tests..." - CREATE_FIXTURE=true uv run robot --outputdir $(OUTPUTDIR) \ + CREATE_FIXTURE=true uv run --with-requirements test-requirements.txt robot --outputdir $(OUTPUTDIR) \ --name "Integration Tests" \ --console verbose \ integration @@ -55,7 +55,7 @@ integration: # Run only infrastructure tests infra: @echo "Running infrastructure tests..." - uv run robot --outputdir $(OUTPUTDIR) \ + uv run --with-requirements test-requirements.txt robot --outputdir $(OUTPUTDIR) \ --name "Infrastructure Tests" \ --console verbose \ infrastructure diff --git a/tests/endpoints/rq_queue_tests.robot b/tests/endpoints/rq_queue_tests.robot index 892a1090..5206b5ff 100644 --- a/tests/endpoints/rq_queue_tests.robot +++ b/tests/endpoints/rq_queue_tests.robot @@ -19,7 +19,7 @@ Suite Teardown Suite Teardown Test Setup Test Cleanup *** Variables *** ${TEST_TIMEOUT} 20s -${COMPOSE_FILE} backends/advanced/docker-compose-test.yml +${COMPOSE_FILE} docker-compose-test.yml *** Keywords *** @@ -29,11 +29,11 @@ Restart Backend Service # Stop backend container Run Process docker compose -f ${COMPOSE_FILE} stop chronicle-backend-test - ... cwd=. timeout=30s + ... cwd=${BACKEND_DIR} timeout=30s # Start backend container again Run Process docker compose -f ${COMPOSE_FILE} start chronicle-backend-test - ... cwd=. timeout=60s + ... cwd=${BACKEND_DIR} timeout=60s # Wait for backend to be ready again Wait Until Keyword Succeeds ${TEST_TIMEOUT} 5s diff --git a/tests/endpoints/system_admin_tests.robot b/tests/endpoints/system_admin_tests.robot index e524c264..283c1865 100644 --- a/tests/endpoints/system_admin_tests.robot +++ b/tests/endpoints/system_admin_tests.robot @@ -79,7 +79,7 @@ Get Enrolled Speakers Test Dictionary Should Contain Key ${response_data} speakers # If service is available, verify speakers data - Should Be True isinstance($response_data[speakers], list) + Should Be True isinstance($response_data["speakers"], list) Get Speaker Service Status Test [Documentation] Test checking speaker recognition service status (admin only) diff --git a/tests/run-robot-tests.sh b/tests/run-robot-tests.sh index 462377ed..d623c8f9 100755 --- a/tests/run-robot-tests.sh +++ b/tests/run-robot-tests.sh @@ -121,9 +121,13 @@ export COMPOSE_PROJECT_NAME="advanced-backend-test" print_info "Cleaning up any existing test environment..." docker compose -f docker-compose-test.yml down -v 2>/dev/null || true -# Force remove any stuck containers with test names +# Force remove any stuck containers with test names (uses COMPOSE_PROJECT_NAME) print_info "Removing any stuck test containers..." -docker rm -f advanced-backend-test-mongo-test-1 advanced-backend-test-redis-test-1 advanced-backend-test-qdrant-test-1 advanced-backend-test-chronicle-backend-test-1 advanced-backend-test-workers-test-1 advanced-backend-test-webui-test-1 2>/dev/null || true +# Dynamically construct container names from docker-compose services +TEST_SERVICES=(mongo-test redis-test qdrant-test chronicle-backend-test workers-test webui-test speaker-service-test) +for service in "${TEST_SERVICES[@]}"; do + docker rm -f "${COMPOSE_PROJECT_NAME}-${service}-1" 2>/dev/null || true +done # Start infrastructure services (MongoDB, Redis, Qdrant) print_info "Starting MongoDB, Redis, and Qdrant (fresh containers)..." @@ -224,14 +228,8 @@ print_success "All services ready!" # Return to tests directory cd ../../tests -# Install Robot Framework dependencies if not in CI -if [ -z "$CI" ]; then - print_info "Installing Robot Framework dependencies..." - uv venv --quiet --python 3.12 || true # May already exist - uv pip install --quiet robotframework robotframework-requests python-dotenv websockets -fi - # Run Robot Framework tests via Makefile +# Dependencies are handled automatically by 'uv run' in Makefile print_info "Running Robot Framework tests..." print_info "Output directory: $OUTPUTDIR" diff --git a/tests/setup/setup_keywords.robot b/tests/setup/setup_keywords.robot index 3fe7bd17..656a082d 100644 --- a/tests/setup/setup_keywords.robot +++ b/tests/setup/setup_keywords.robot @@ -85,7 +85,7 @@ Prod Mode Setup Log To Console Tearing down existing containers and volumes... Stop Docker Services remove_volumes=${True} - Run Process rm -rf data/test_mongo_data data/test_qdrant_data data/test_audio_chunks cwd=backends/advanced shell=True + Run Process rm -rf data/test_mongo_data data/test_qdrant_data data/test_audio_chunks cwd=${BACKEND_DIR} shell=True Log To Console Building and starting fresh containers... Start Docker Services build=${True} @@ -95,7 +95,7 @@ Prod Mode Setup Start Docker Services [Documentation] Start Docker services using docker-compose ... Checks if services are already running to avoid redundant starts - [Arguments] ${compose_file}=docker-compose-test.yml ${working_dir}=backends/advanced ${build}=${False} + [Arguments] ${compose_file}=docker-compose-test.yml ${working_dir}=${BACKEND_DIR} ${build}=${False} ${is_up}= Run Keyword And Return Status Check Services Ready ${API_URL} @@ -120,7 +120,7 @@ Start Docker Services Stop Docker Services [Documentation] Stop Docker services using docker-compose - [Arguments] ${compose_file}=docker-compose-test.yml ${working_dir}=backends/advanced ${remove_volumes}=${False} + [Arguments] ${compose_file}=docker-compose-test.yml ${working_dir}=${BACKEND_DIR} ${remove_volumes}=${False} IF ${remove_volumes} Run Process docker compose -f ${compose_file} down -v cwd=${working_dir} shell=True @@ -130,7 +130,7 @@ Stop Docker Services Rebuild Docker Services [Documentation] Rebuild and restart Docker services - [Arguments] ${compose_file}=docker-compose-test.yml ${working_dir}=backends/advanced + [Arguments] ${compose_file}=docker-compose-test.yml ${working_dir}=${BACKEND_DIR} Log To Console Rebuilding containers with latest code... Run Process docker compose -f ${compose_file} up -d --build cwd=${working_dir} shell=True @@ -155,7 +155,7 @@ Start Speaker Recognition Service END Log Starting speaker-recognition-service - Run Process docker compose -f extras/speaker-recognition/docker-compose-test.yml up -d --build shell=True + Run Process docker compose -f docker-compose-test.yml up -d --build cwd=${SPEAKER_RECOGNITION_DIR} shell=True Log Waiting for speaker recognition service to start... Wait Until Keyword Succeeds 60s 5s Check Services Ready ${SPEAKER_RECOGNITION_URL} @@ -166,9 +166,9 @@ Stop Speaker Recognition Service [Arguments] ${remove_volumes}=${False} IF ${remove_volumes} - Run Process docker compose -f extras/speaker-recognition/docker-compose-test.yml down -v shell=True + Run Process docker compose -f docker-compose-test.yml down -v cwd=${SPEAKER_RECOGNITION_DIR} shell=True ELSE - Run Process docker compose -f extras/speaker-recognition/docker-compose-test.yml down shell=True + Run Process docker compose -f docker-compose-test.yml down cwd=${SPEAKER_RECOGNITION_DIR} shell=True END Check Environment Variables diff --git a/tests/setup/teardown_keywords.robot b/tests/setup/teardown_keywords.robot index 4553ad0a..cd4b2b5a 100644 --- a/tests/setup/teardown_keywords.robot +++ b/tests/setup/teardown_keywords.robot @@ -38,7 +38,7 @@ Dev Mode Teardown Log To Console \n=== Dev Mode Teardown (Default) === Log To Console โœ“ Keeping containers running for next test run Log To Console Tip: Use 'TEST_MODE=prod' for full cleanup or run manually: - Log To Console docker compose -f backends/advanced/docker-compose-ci.yml down -v + Log To Console docker compose -f ${BACKEND_DIR}/docker-compose-ci.yml down -v # Only delete HTTP sessions Delete All Sessions @@ -52,9 +52,9 @@ Prod Mode Teardown Stop Docker Services remove_volumes=${True} # Clean up any remaining volumes - Run Process rm -rf backends/advanced/data/test_mongo_data shell=True - Run Process rm -rf ${EXECDIR}/backends/advanced/data/test_qdrant_data shell=True - Run Process rm -rf ${EXECDIR}/backends/advanced/data/test_audio_chunks shell=True + Run Process rm -rf ${BACKEND_DIR}/data/test_mongo_data shell=True + Run Process rm -rf ${BACKEND_DIR}/data/test_qdrant_data shell=True + Run Process rm -rf ${BACKEND_DIR}/data/test_audio_chunks shell=True # Delete all HTTP sessions Delete All Sessions diff --git a/tests/setup/test_env.py b/tests/setup/test_env.py index d11f2ff8..94956a14 100644 --- a/tests/setup/test_env.py +++ b/tests/setup/test_env.py @@ -8,7 +8,14 @@ # 2. .env.test (test-specific configuration) # 3. .env (default configuration) -backend_dir = Path(__file__).parent.parent.parent / "backends" / "advanced" +# Find repository root (tests/setup/test_env.py -> go up 2 levels) +REPO_ROOT = Path(__file__).parent.parent.parent +backend_dir = REPO_ROOT / "backends" / "advanced" + +# Export absolute paths for Robot Framework keywords +BACKEND_DIR = str(backend_dir.absolute()) +REPO_ROOT_DIR = str(REPO_ROOT.absolute()) +SPEAKER_RECOGNITION_DIR = str((REPO_ROOT / "extras" / "speaker-recognition").absolute()) # Load in reverse order of precedence (since override=False won't overwrite existing vars) # Load .env.test first (will set test-specific values) diff --git a/tests/setup/test_manager_keywords.robot b/tests/setup/test_manager_keywords.robot index 65506551..8927994a 100644 --- a/tests/setup/test_manager_keywords.robot +++ b/tests/setup/test_manager_keywords.robot @@ -62,8 +62,8 @@ Clear Test Databases Log To Console Qdrant collections cleared # Clear audio files (except fixtures subfolder) - Run Process bash -c find ${EXECDIR}/backends/advanced/data/test_audio_chunks -maxdepth 1 -name "*.wav" -delete || true shell=True - Run Process bash -c rm -rf ${EXECDIR}/backends/advanced/data/test_debug_dir/* || true shell=True + Run Process bash -c find ${BACKEND_DIR}/data/test_audio_chunks -maxdepth 1 -name "*.wav" -delete || true shell=True + Run Process bash -c rm -rf ${BACKEND_DIR}/data/test_debug_dir/* || true shell=True Log To Console Audio files cleared (fixtures/ subfolder preserved) # Clear container audio files (except fixtures subfolder) @@ -90,8 +90,8 @@ Clear All Test Data Run Process curl -s -X DELETE http://localhost:6337/collections/conversations shell=True # Clear all audio files - Run Process bash -c rm -rf ${EXECDIR}/backends/advanced/data/test_audio_chunks/* || true shell=True - Run Process bash -c rm -rf ${EXECDIR}/backends/advanced/data/test_debug_dir/* || true shell=True + Run Process bash -c rm -rf ${BACKEND_DIR}/data/test_audio_chunks/* || true shell=True + Run Process bash -c rm -rf ${BACKEND_DIR}/data/test_debug_dir/* || true shell=True # Clear all Redis data Run Process docker exec ${REDIS_CONTAINER} redis-cli FLUSHALL shell=True diff --git a/test-requirements.txt b/tests/test-requirements.txt similarity index 89% rename from test-requirements.txt rename to tests/test-requirements.txt index 48b8ad96..4efaf39b 100644 --- a/test-requirements.txt +++ b/tests/test-requirements.txt @@ -3,4 +3,5 @@ robotframework-tidy robotframework-requests robotframework-browser python-dotenv +websockets \ No newline at end of file From 078575af03e7f69b01b62f2468d8697d7e0399c4 Mon Sep 17 00:00:00 2001 From: Ankush Malaker <43288948+AnkushMalaker@users.noreply.github.com> Date: Fri, 2 Jan 2026 22:03:03 +0000 Subject: [PATCH 5/9] Enhance run-robot-tests.sh for improved logging and cleanup - Set absolute paths for consistent directory references to simplify navigation. - Capture container logs, status, and resource usage for better debugging. - Refactor cleanup process to utilize dynamic backend directory references, improving maintainability. - Ensure proper navigation back to the tests directory after operations. --- tests/run-robot-tests.sh | 65 +++++++++++++++++++++++++++++++++++----- 1 file changed, 58 insertions(+), 7 deletions(-) diff --git a/tests/run-robot-tests.sh b/tests/run-robot-tests.sh index d623c8f9..de0a9ae8 100755 --- a/tests/run-robot-tests.sh +++ b/tests/run-robot-tests.sh @@ -35,6 +35,10 @@ if [ ! -f "Makefile" ] || [ ! -d "endpoints" ]; then exit 1 fi +# Set absolute paths for consistent directory references +TESTS_DIR="$(pwd)" +BACKEND_DIR="$(cd ../backends/advanced && pwd)" + print_info "Robot Framework Test Runner" print_info "============================" @@ -107,7 +111,7 @@ EOF fi # Navigate to backend directory for docker compose -cd ../backends/advanced +cd "$BACKEND_DIR" print_info "Starting test infrastructure..." @@ -226,7 +230,7 @@ done print_success "All services ready!" # Return to tests directory -cd ../../tests +cd "$TESTS_DIR" # Run Robot Framework tests via Makefile # Dependencies are handled automatically by 'uv run' in Makefile @@ -243,13 +247,13 @@ fi # Show service logs if tests failed if [ $TEST_EXIT_CODE -ne 0 ]; then print_info "Showing service logs..." - cd ../backends/advanced + cd "$BACKEND_DIR" echo "=== Backend Logs (last 50 lines) ===" docker compose -f docker-compose-test.yml logs --tail=50 chronicle-backend-test echo "" echo "=== Worker Logs (last 50 lines) ===" docker compose -f docker-compose-test.yml logs --tail=50 workers-test - cd ../../tests + cd "$TESTS_DIR" fi # Display test results summary @@ -291,16 +295,63 @@ if stats is not None: PYTHON_SCRIPT fi +# Capture container logs before cleanup (always, for debugging) +print_info "Capturing container logs for debugging..." +LOG_DIR="${TESTS_DIR}/${OUTPUTDIR}/container-logs" +mkdir -p "$LOG_DIR" + +cd "$BACKEND_DIR" + +# Capture container status +print_info "Capturing container status..." +docker compose -f docker-compose-test.yml ps > "$LOG_DIR/container-status.txt" 2>&1 || true + +# Capture worker registration status +print_info "Capturing worker registration status..." +docker compose -f docker-compose-test.yml exec -T workers-test uv run python -c ' +from rq import Worker +from redis import Redis +import os + +redis_url = os.getenv("REDIS_URL", "redis://redis-test:6379/0") +r = Redis.from_url(redis_url) +workers = Worker.all(connection=r) + +print(f"Total workers: {len(workers)}") +print(f"\nWorker details:") +for i, worker in enumerate(workers, 1): + print(f" {i}. {worker.name}") + print(f" State: {worker.state}") + print(f" Queues: {[q.name for q in worker.queues]}") + print(f" Current job: {worker.get_current_job()}") + print() +' > "$LOG_DIR/worker-status.txt" 2>&1 || echo "Failed to capture worker status" > "$LOG_DIR/worker-status.txt" + +# Capture logs from all services +print_info "Capturing service logs..." +SERVICES=(chronicle-backend-test workers-test mongo-test redis-test qdrant-test speaker-service-test) +for service in "${SERVICES[@]}"; do + docker compose -f docker-compose-test.yml logs --tail=200 "$service" > "$LOG_DIR/${service}.log" 2>&1 || true +done + +# Capture container resource usage +print_info "Capturing container resource usage..." +docker stats --no-stream --format "table {{.Name}}\t{{.CPUPerc}}\t{{.MemUsage}}\t{{.MemPerc}}\t{{.NetIO}}\t{{.BlockIO}}" > "$LOG_DIR/container-stats.txt" 2>&1 || true + +print_success "Container logs saved to: $LOG_DIR" + +cd "$TESTS_DIR" + # Cleanup test containers if [ "$CLEANUP_CONTAINERS" = "true" ]; then print_info "Cleaning up test containers..." - cd ../backends/advanced + cd "$BACKEND_DIR" docker compose -f docker-compose-test.yml down -v - cd ../../tests + cd "$TESTS_DIR" print_success "Cleanup complete" else print_warning "Skipping container cleanup (CLEANUP_CONTAINERS=false)" - print_info "To cleanup manually: cd backends/advanced && docker compose -f docker-compose-test.yml down -v" + print_info "To cleanup manually: cd $BACKEND_DIR && docker compose -f docker-compose-test.yml down -v" fi if [ $TEST_EXIT_CODE -eq 0 ]; then From f41237702a65b35497adfebae25530a059c55135 Mon Sep 17 00:00:00 2001 From: Ankush Malaker <43288948+AnkushMalaker@users.noreply.github.com> Date: Fri, 2 Jan 2026 22:26:58 +0000 Subject: [PATCH 6/9] Add speaker recognition configuration and update test script defaults - Introduced speaker recognition settings in config.yml.template, allowing for easy enable/disable and service URL configuration. - Updated run-robot-tests.sh to use a test-specific configuration file that disables speaker recognition for improved CI performance. - Modified deepgram-openai.yml to disable speaker recognition during CI tests to enhance execution speed. --- config/config.yml.template | 8 ++++++++ tests/configs/deepgram-openai.yml | 5 +++++ tests/run-robot-tests.sh | 6 +++--- 3 files changed, 16 insertions(+), 3 deletions(-) diff --git a/config/config.yml.template b/config/config.yml.template index 7b43d042..3670a6bb 100644 --- a/config/config.yml.template +++ b/config/config.yml.template @@ -209,3 +209,11 @@ memory: enabled: false neo4j_host: neo4j-mem0 timeout: 30 + +speaker_recognition: + # Enable/disable speaker recognition (overrides DISABLE_SPEAKER_RECOGNITION env var) + enabled: true + # Service URL (defaults to SPEAKER_SERVICE_URL env var if not specified) + service_url: null + # Request timeout in seconds + timeout: 60 diff --git a/tests/configs/deepgram-openai.yml b/tests/configs/deepgram-openai.yml index 4cae5e7a..46c8ddef 100644 --- a/tests/configs/deepgram-openai.yml +++ b/tests/configs/deepgram-openai.yml @@ -82,3 +82,8 @@ memory: Extract important information from this conversation and return a JSON object with an array named "facts". Include personal preferences, plans, names, dates, locations, numbers, and key details. Keep items concise and useful. + +speaker_recognition: + # Disable speaker recognition in CI tests (too slow, blocks workers) + enabled: false + timeout: 60 diff --git a/tests/run-robot-tests.sh b/tests/run-robot-tests.sh index de0a9ae8..b5af8682 100755 --- a/tests/run-robot-tests.sh +++ b/tests/run-robot-tests.sh @@ -47,9 +47,9 @@ CLEANUP_CONTAINERS="${CLEANUP_CONTAINERS:-true}" OUTPUTDIR="${OUTPUTDIR:-results}" # Set default CONFIG_FILE if not provided -# This allows testing with different provider combinations -# Usage: CONFIG_FILE=../tests/configs/parakeet-ollama.yml ./run-robot-tests.sh -export CONFIG_FILE="${CONFIG_FILE:-../config/config.yml}" +# Use test config by default (disables speaker recognition for CI performance) +# Override: CONFIG_FILE=../config/config.yml ./run-robot-tests.sh +export CONFIG_FILE="${CONFIG_FILE:-configs/deepgram-openai.yml}" # Convert CONFIG_FILE to absolute path (Docker Compose resolves relative paths from compose file location) if [[ ! "$CONFIG_FILE" = /* ]]; then From efc942871ae788f8608070fc60b0391e95d1287a Mon Sep 17 00:00:00 2001 From: Ankush Malaker <43288948+AnkushMalaker@users.noreply.github.com> Date: Fri, 2 Jan 2026 22:52:45 +0000 Subject: [PATCH 7/9] Refactor speaker recognition configuration management - Updated docker-compose-test.yml to clarify speaker recognition settings, now controlled via config.yml for improved CI performance. - Enhanced model_registry.py to include a dedicated speaker_recognition field for better configuration handling. - Modified speaker_recognition_client.py to load configuration from config.yml, allowing for dynamic enabling/disabling of the speaker recognition service based on the configuration. --- backends/advanced/docker-compose-test.yml | 5 +-- .../advanced_omi_backend/model_registry.py | 4 ++ .../speaker_recognition_client.py | 45 ++++++++++++++----- 3 files changed, 40 insertions(+), 14 deletions(-) diff --git a/backends/advanced/docker-compose-test.yml b/backends/advanced/docker-compose-test.yml index c699bc6b..867edc5f 100644 --- a/backends/advanced/docker-compose-test.yml +++ b/backends/advanced/docker-compose-test.yml @@ -39,8 +39,7 @@ services: - OPENMEMORY_USER_ID=${OPENMEMORY_USER_ID:-openmemory} - MYCELIA_URL=http://mycelia-backend-test:5173 - MYCELIA_DB=mycelia_test - # Enable speaker recognition in test environment - - DISABLE_SPEAKER_RECOGNITION=false + # Speaker recognition controlled by config.yml (disabled in test config for CI performance) - SPEAKER_SERVICE_URL=http://speaker-service-test:8085 - CORS_ORIGINS=http://localhost:3001,http://localhost:8001,https://localhost:3001,https://localhost:8001 # Set low inactivity timeout for tests (2 seconds instead of 60) @@ -182,7 +181,7 @@ services: - OPENMEMORY_USER_ID=${OPENMEMORY_USER_ID:-openmemory} - MYCELIA_URL=http://mycelia-backend-test:5173 - MYCELIA_DB=mycelia_test - - DISABLE_SPEAKER_RECOGNITION=false + # Speaker recognition controlled by config.yml (disabled in test config for CI performance) - SPEAKER_SERVICE_URL=http://speaker-service-test:8085 # Set low inactivity timeout for tests (2 seconds instead of 60) - SPEECH_INACTIVITY_THRESHOLD_SECONDS=2 diff --git a/backends/advanced/src/advanced_omi_backend/model_registry.py b/backends/advanced/src/advanced_omi_backend/model_registry.py index 47bef4ba..05704457 100644 --- a/backends/advanced/src/advanced_omi_backend/model_registry.py +++ b/backends/advanced/src/advanced_omi_backend/model_registry.py @@ -181,6 +181,10 @@ class AppModels(BaseModel): default_factory=dict, description="Memory service configuration" ) + speaker_recognition: Dict[str, Any] = Field( + default_factory=dict, + description="Speaker recognition service configuration" + ) def get_by_name(self, name: str) -> Optional[ModelDef]: """Get a model by its unique name. diff --git a/backends/advanced/src/advanced_omi_backend/speaker_recognition_client.py b/backends/advanced/src/advanced_omi_backend/speaker_recognition_client.py index 99c9e594..e829eff5 100644 --- a/backends/advanced/src/advanced_omi_backend/speaker_recognition_client.py +++ b/backends/advanced/src/advanced_omi_backend/speaker_recognition_client.py @@ -3,6 +3,8 @@ This module provides an optional integration with the speaker recognition service to enhance transcripts with actual speaker names instead of generic labels. + +Configuration is managed via config.yml (speaker_recognition section). """ import asyncio @@ -15,6 +17,8 @@ import aiohttp from aiohttp import ClientConnectorError +from advanced_omi_backend.model_registry import get_models_registry + logger = logging.getLogger(__name__) @@ -25,23 +29,42 @@ def __init__(self, service_url: Optional[str] = None): """ Initialize the speaker recognition client. + Configuration is read from config.yml (speaker_recognition section). + The 'enabled' flag controls whether speaker recognition is active. + Args: service_url: URL of the speaker recognition service (e.g., http://speaker-service:8085) - If not provided, uses SPEAKER_SERVICE_URL env var + If not provided, uses config.yml service_url or SPEAKER_SERVICE_URL env var """ - # Check if speaker recognition is explicitly disabled - if os.getenv("DISABLE_SPEAKER_RECOGNITION", "").lower() in ["true", "1", "yes"]: + # Load speaker recognition config from config.yml + registry = get_models_registry() + if not registry or not registry.speaker_recognition: + # No config found, default to disabled + self.enabled = False self.service_url = None + logger.info("Speaker recognition client disabled (no configuration found)") + return + + speaker_config = registry.speaker_recognition + if not speaker_config.get("enabled", True): + # Disabled in config self.enabled = False - logger.info("Speaker recognition client disabled (DISABLE_SPEAKER_RECOGNITION=true)") - else: - self.service_url = service_url or os.getenv("SPEAKER_SERVICE_URL") - self.enabled = bool(self.service_url) + self.service_url = None + logger.info("Speaker recognition client disabled (config.yml enabled=false)") + return + + # Enabled - determine URL (priority: param > config > env var) + self.service_url = ( + service_url + or speaker_config.get("service_url") + or os.getenv("SPEAKER_SERVICE_URL") + ) + self.enabled = bool(self.service_url) - if self.enabled: - logger.info(f"Speaker recognition client initialized with URL: {self.service_url}") - else: - logger.info("Speaker recognition client disabled (no service URL configured)") + if self.enabled: + logger.info(f"Speaker recognition client initialized with URL: {self.service_url}") + else: + logger.info("Speaker recognition client disabled (no service URL configured)") async def diarize_identify_match( self, audio_path: str, transcript_data: Dict, user_id: Optional[str] = None From 5738ef487c3f6555de1fe3eca9130b03a7e0383f Mon Sep 17 00:00:00 2001 From: Ankush Malaker <43288948+AnkushMalaker@users.noreply.github.com> Date: Fri, 2 Jan 2026 23:13:05 +0000 Subject: [PATCH 8/9] Add minimum worker count verification to infrastructure tests - Introduced a new keyword to verify that the minimum number of workers are registered, enhancing the robustness of health checks. - Updated the worker count validation test to include a wait mechanism for worker registration, improving test reliability. - Clarified comments regarding expected worker counts to reflect the distinction between RQ and audio stream workers. --- tests/infrastructure/infra_tests.robot | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/tests/infrastructure/infra_tests.robot b/tests/infrastructure/infra_tests.robot index 48b1a057..11dad0c7 100644 --- a/tests/infrastructure/infra_tests.robot +++ b/tests/infrastructure/infra_tests.robot @@ -44,6 +44,14 @@ Get Worker Count From Health Endpoint ${worker_count}= Set Variable ${redis_service}[worker_count] RETURN ${worker_count} +Verify Minimum Worker Count + [Documentation] Verify that at least the minimum number of workers are registered + [Arguments] ${min_count} + ${worker_count}= Get Worker Count From Health Endpoint + Log To Console Current worker count: ${worker_count} + Should Be True ${worker_count} >= ${min_count} msg=Expected at least ${min_count} workers, got ${worker_count} + RETURN ${worker_count} + Simulate Worker Registration Loss [Documentation] Simulate the scenario where workers lose Redis registration ... This happens when: @@ -176,6 +184,13 @@ Worker Count Validation Test ... - Worker state information is accurate [Tags] health queue + # Wait for workers to register (up to 20s, checking every 2s) + Log To Console \nโณ Waiting for workers to register (up to 20s)... + ${worker_count}= Wait Until Keyword Succeeds 20s 2s + ... Verify Minimum Worker Count 6 + Log To Console โœ… Workers registered: ${worker_count} + + # Get full health details for validation ${response}= GET On Session api /health Should Be Equal As Integers ${response.status_code} 200 @@ -191,7 +206,7 @@ Worker Count Validation Test Dictionary Should Contain Key ${redis_service} active_workers Dictionary Should Contain Key ${redis_service} idle_workers - # Verify worker count is reasonable (7 workers: 6 RQ + 1 audio stream) + # Get worker state details ${worker_count}= Set Variable ${redis_service}[worker_count] ${active_workers}= Set Variable ${redis_service}[active_workers] ${idle_workers}= Set Variable ${redis_service}[idle_workers] @@ -201,8 +216,8 @@ Worker Count Validation Test Log To Console Active workers: ${active_workers} Log To Console Idle workers: ${idle_workers} - # Verify expected worker count (should be 7: 6 RQ workers + 1 audio stream worker) - # Note: Audio stream worker might not register in RQ, so we expect 6-7 workers + # Verify expected worker count (should be 7: 6 RQ workers + 1 audio persistence worker) + # Audio stream workers (Deepgram/Parakeet) are NOT RQ workers, so they don't register Should Be True ${worker_count} >= 6 msg=Expected at least 6 RQ workers registered Should Be True ${worker_count} <= 8 msg=Expected no more than 8 workers From 02a5c9e0bdcda20b16bbaf48c2fd6b9b4db894af Mon Sep 17 00:00:00 2001 From: Ankush Malaker <43288948+AnkushMalaker@users.noreply.github.com> Date: Fri, 2 Jan 2026 23:27:52 +0000 Subject: [PATCH 9/9] Update configuration management and enhance model handling - Added OBSIDIAN_ENABLED configuration to ChronicleSetup for improved feature toggling. - Introduced speaker_recognition configuration handling in model_registry.py to streamline model loading. - Refactored imports in deepgram.py to improve clarity and reduce redundancy. --- backends/advanced/init.py | 1 + .../advanced/src/advanced_omi_backend/model_registry.py | 6 ++++-- .../services/transcription/deepgram.py | 5 +++-- tests/infrastructure/infra_tests.robot | 9 +++++---- 4 files changed, 13 insertions(+), 8 deletions(-) diff --git a/backends/advanced/init.py b/backends/advanced/init.py index c68fa10f..f093bf4d 100644 --- a/backends/advanced/init.py +++ b/backends/advanced/init.py @@ -406,6 +406,7 @@ def setup_obsidian(self): if enable_obsidian: # Update .env with credentials + self.config["OBSIDIAN_ENABLED"] = "true" self.config["NEO4J_HOST"] = "neo4j-mem0" self.config["NEO4J_USER"] = "neo4j" self.config["NEO4J_PASSWORD"] = neo4j_password diff --git a/backends/advanced/src/advanced_omi_backend/model_registry.py b/backends/advanced/src/advanced_omi_backend/model_registry.py index 05704457..53d919ca 100644 --- a/backends/advanced/src/advanced_omi_backend/model_registry.py +++ b/backends/advanced/src/advanced_omi_backend/model_registry.py @@ -317,7 +317,8 @@ def load_models_config(force_reload: bool = False) -> Optional[AppModels]: defaults = raw.get("defaults", {}) or {} model_list = raw.get("models", []) or [] memory_settings = raw.get("memory", {}) or {} - + speaker_recognition_cfg = raw.get("speaker_recognition", {}) or {} + # Parse and validate models using Pydantic models: Dict[str, ModelDef] = {} for m in model_list: @@ -334,7 +335,8 @@ def load_models_config(force_reload: bool = False) -> Optional[AppModels]: _REGISTRY = AppModels( defaults=defaults, models=models, - memory=memory_settings + memory=memory_settings, + speaker_recognition=speaker_recognition_cfg ) return _REGISTRY diff --git a/backends/advanced/src/advanced_omi_backend/services/transcription/deepgram.py b/backends/advanced/src/advanced_omi_backend/services/transcription/deepgram.py index 03b2936d..ef54a3d9 100644 --- a/backends/advanced/src/advanced_omi_backend/services/transcription/deepgram.py +++ b/backends/advanced/src/advanced_omi_backend/services/transcription/deepgram.py @@ -6,6 +6,9 @@ import logging +from advanced_omi_backend.services.audio_stream.consumer import BaseAudioStreamConsumer +from advanced_omi_backend.services.transcription import get_transcription_provider + logger = logging.getLogger(__name__) @@ -32,8 +35,6 @@ def __init__(self, redis_client, buffer_chunks: int = 30): redis_client: Connected Redis client buffer_chunks: Number of chunks to buffer before transcribing (default: 30 = ~7.5s) """ - from advanced_omi_backend.services.audio_stream.consumer import BaseAudioStreamConsumer - from advanced_omi_backend.services.transcription import get_transcription_provider # Get registry-driven transcription provider self.provider = get_transcription_provider(mode="batch") diff --git a/tests/infrastructure/infra_tests.robot b/tests/infrastructure/infra_tests.robot index 11dad0c7..c0d401db 100644 --- a/tests/infrastructure/infra_tests.robot +++ b/tests/infrastructure/infra_tests.robot @@ -216,10 +216,11 @@ Worker Count Validation Test Log To Console Active workers: ${active_workers} Log To Console Idle workers: ${idle_workers} - # Verify expected worker count (should be 7: 6 RQ workers + 1 audio persistence worker) - # Audio stream workers (Deepgram/Parakeet) are NOT RQ workers, so they don't register - Should Be True ${worker_count} >= 6 msg=Expected at least 6 RQ workers registered - Should Be True ${worker_count} <= 8 msg=Expected no more than 8 workers + # Verify exact worker count + # Expected: 7 RQ workers (6 general workers + 1 audio persistence worker) + # Note: Audio stream workers (Deepgram/Parakeet) are NOT RQ workers - they don't register + # We wait up to 20s for registration, so all workers should be present + Should Be Equal As Integers ${worker_count} 7 msg=Expected exactly 7 RQ workers (6 general + 1 audio persistence) # Verify active + idle = total ${sum}= Evaluate ${active_workers} + ${idle_workers}