From 3aaacdfb74ffc309d2b25793c7957c4970074071 Mon Sep 17 00:00:00 2001 From: AJASU Date: Wed, 14 Jan 2026 13:57:35 +0900 Subject: [PATCH 01/10] Update quickstart.md (#268) --- quickstart.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/quickstart.md b/quickstart.md index 86d4851b..100a4fe2 100644 --- a/quickstart.md +++ b/quickstart.md @@ -48,6 +48,10 @@ Think of it like having Siri/Alexa, but it's **your own AI** running on **your h - **Linux**: [Install Docker](https://docs.docker.com/engine/install/) - **After install**: Make sure Docker Desktop is running +> **WSL Users**: Chronicle services will fail to start unless Docker +> is installed and running inside WSL2 +> (or Docker Desktop with **WSL integration enabled**). + **uv** (Python package manager): ```bash curl -LsSf https://astral.sh/uv/install.sh | sh @@ -319,4 +323,4 @@ Before connecting your phone, make sure everything works: - **Full Documentation**: [CLAUDE.md](CLAUDE.md) - Complete technical reference - **Architecture Details**: [Docs/features.md](Docs/features.md) - How everything works -- **Advanced Setup**: [Docs/init-system.md](Docs/init-system.md) - Power user options \ No newline at end of file +- **Advanced Setup**: [Docs/init-system.md](Docs/init-system.md) - Power user options From 00cd589de7ba71a468b90fd909af2fbb6fd2202c Mon Sep 17 00:00:00 2001 From: Ankush Malaker <43288948+AnkushMalaker@users.noreply.github.com> Date: Fri, 23 Jan 2026 21:07:29 +0000 Subject: [PATCH 02/10] Add plugin UI configuration panel and refactor plugin management This commit introduces a comprehensive plugin configuration UI with the following enhancements: - Add PluginSettingsForm component for plugin configuration - Create modular plugin configuration components: * EnvVarsSection - manage plugin environment variables * FormField - reusable form field component * OrchestrationSection - configure plugin orchestration settings * PluginConfigPanel - main plugin configuration panel * PluginListSidebar - plugin list and navigation - Update plugin service to support new configuration endpoints - Enhance system controller and routes for plugin management - Update Plugins page with new UI components - Enhance API service with plugin configuration methods --- .../controllers/system_controller.py | 278 +++++++++++++++ .../plugins/email_summarizer/plugin.py | 92 +++++ .../plugins/homeassistant/plugin.py | 94 +++++ .../routers/modules/system_routes.py | 72 ++++ .../services/plugin_service.py | 280 ++++++++++++++- .../src/components/PluginSettingsForm.tsx | 320 ++++++++++++++++++ .../src/components/plugins/EnvVarsSection.tsx | 91 +++++ .../src/components/plugins/FormField.tsx | 216 ++++++++++++ .../plugins/OrchestrationSection.tsx | 239 +++++++++++++ .../components/plugins/PluginConfigPanel.tsx | 287 ++++++++++++++++ .../components/plugins/PluginListSidebar.tsx | 162 +++++++++ backends/advanced/webui/src/pages/Plugins.tsx | 34 +- backends/advanced/webui/src/services/api.ts | 23 +- 13 files changed, 2184 insertions(+), 4 deletions(-) create mode 100644 backends/advanced/webui/src/components/PluginSettingsForm.tsx create mode 100644 backends/advanced/webui/src/components/plugins/EnvVarsSection.tsx create mode 100644 backends/advanced/webui/src/components/plugins/FormField.tsx create mode 100644 backends/advanced/webui/src/components/plugins/OrchestrationSection.tsx create mode 100644 backends/advanced/webui/src/components/plugins/PluginConfigPanel.tsx create mode 100644 backends/advanced/webui/src/components/plugins/PluginListSidebar.tsx diff --git a/backends/advanced/src/advanced_omi_backend/controllers/system_controller.py b/backends/advanced/src/advanced_omi_backend/controllers/system_controller.py index bff60037..7831fc40 100644 --- a/backends/advanced/src/advanced_omi_backend/controllers/system_controller.py +++ b/backends/advanced/src/advanced_omi_backend/controllers/system_controller.py @@ -962,3 +962,281 @@ async def validate_plugins_config_yaml(yaml_content: str) -> dict: except Exception as e: logger.error(f"Error validating plugins config: {e}") return {"valid": False, "error": f"Validation error: {str(e)}"} + + +# Structured Plugin Configuration Management Functions (Form-based UI) + +async def get_plugins_metadata() -> dict: + """Get plugin metadata for form-based configuration UI. + + Returns complete metadata for all discovered plugins including: + - Plugin information (name, description, enabled status) + - Auto-generated schemas from config.yml (or explicit schema.yml) + - Current configuration with masked secrets + - Orchestration settings (events, conditions) + + Returns: + Dict with plugins list containing metadata for each plugin + """ + try: + from advanced_omi_backend.services.plugin_service import ( + discover_plugins, + get_plugin_metadata, + ) + + # Discover all available plugins + discovered_plugins = discover_plugins() + + # Load orchestration config from plugins.yml + plugins_yml_path = get_plugins_yml_path() + orchestration_configs = {} + + if plugins_yml_path.exists(): + with open(plugins_yml_path, 'r') as f: + plugins_data = yaml.safe_load(f) or {} + orchestration_configs = plugins_data.get('plugins', {}) + + # Build metadata for each plugin + plugins_metadata = [] + for plugin_id, plugin_class in discovered_plugins.items(): + # Get orchestration config (or empty dict if not configured) + orchestration_config = orchestration_configs.get(plugin_id, { + 'enabled': False, + 'events': [], + 'condition': {'type': 'always'} + }) + + # Get complete metadata including schema + metadata = get_plugin_metadata(plugin_id, plugin_class, orchestration_config) + plugins_metadata.append(metadata) + + logger.info(f"Retrieved metadata for {len(plugins_metadata)} plugins") + + return { + "plugins": plugins_metadata, + "status": "success" + } + + except Exception as e: + logger.exception("Error getting plugins metadata") + raise e + + +async def update_plugin_config_structured(plugin_id: str, config: dict) -> dict: + """Update plugin configuration from structured JSON (form data). + + Updates the three-file plugin architecture: + 1. config/plugins.yml - Orchestration (enabled, events, condition) + 2. plugins/{plugin_id}/config.yml - Settings with ${ENV_VAR} references + 3. backends/advanced/.env - Actual secret values + + Args: + plugin_id: Plugin identifier + config: Structured configuration with 'orchestration', 'settings', 'env_vars' sections + + Returns: + Success message with list of updated files + """ + try: + from advanced_omi_backend.services.plugin_service import discover_plugins + import advanced_omi_backend.plugins + + # Validate plugin exists + discovered_plugins = discover_plugins() + if plugin_id not in discovered_plugins: + raise ValueError(f"Plugin '{plugin_id}' not found") + + updated_files = [] + + # 1. Update config/plugins.yml (orchestration) + if 'orchestration' in config: + plugins_yml_path = get_plugins_yml_path() + + # Load current plugins.yml + if plugins_yml_path.exists(): + with open(plugins_yml_path, 'r') as f: + plugins_data = yaml.safe_load(f) or {} + else: + plugins_data = {} + + if 'plugins' not in plugins_data: + plugins_data['plugins'] = {} + + # Update orchestration config + orchestration = config['orchestration'] + plugins_data['plugins'][plugin_id] = { + 'enabled': orchestration.get('enabled', False), + 'events': orchestration.get('events', []), + 'condition': orchestration.get('condition', {'type': 'always'}) + } + + # Create backup + if plugins_yml_path.exists(): + backup_path = str(plugins_yml_path) + '.backup' + shutil.copy2(plugins_yml_path, backup_path) + + # Create config directory if needed + plugins_yml_path.parent.mkdir(parents=True, exist_ok=True) + + # Write updated plugins.yml + with open(plugins_yml_path, 'w') as f: + yaml.dump(plugins_data, f, default_flow_style=False, sort_keys=False) + + updated_files.append(str(plugins_yml_path)) + logger.info(f"Updated orchestration config for '{plugin_id}' in {plugins_yml_path}") + + # 2. Update plugins/{plugin_id}/config.yml (settings with env var references) + if 'settings' in config: + plugins_dir = Path(advanced_omi_backend.plugins.__file__).parent + plugin_config_path = plugins_dir / plugin_id / "config.yml" + + # Load current config.yml + if plugin_config_path.exists(): + with open(plugin_config_path, 'r') as f: + plugin_config_data = yaml.safe_load(f) or {} + else: + plugin_config_data = {} + + # Update settings (preserve ${ENV_VAR} references) + settings = config['settings'] + plugin_config_data.update(settings) + + # Create backup + if plugin_config_path.exists(): + backup_path = str(plugin_config_path) + '.backup' + shutil.copy2(plugin_config_path, backup_path) + + # Write updated config.yml + with open(plugin_config_path, 'w') as f: + yaml.dump(plugin_config_data, f, default_flow_style=False, sort_keys=False) + + updated_files.append(str(plugin_config_path)) + logger.info(f"Updated settings for '{plugin_id}' in {plugin_config_path}") + + # 3. Update .env (only changed env vars) + if 'env_vars' in config and config['env_vars']: + env_path = os.path.join(os.getcwd(), ".env") + + if not os.path.exists(env_path): + raise FileNotFoundError(f".env file not found at {env_path}") + + # Read current .env + with open(env_path, 'r') as f: + env_lines = f.readlines() + + # Create backup + backup_path = f"{env_path}.backup" + shutil.copy2(env_path, backup_path) + + # Update env vars (only if not masked) + env_vars = config['env_vars'] + updated_env_lines = [] + updated_vars = set() + + for line in env_lines: + line_updated = False + for env_var, value in env_vars.items(): + # Skip if value is masked (not actually changed) + if value == '••••••••••••': + continue + + if line.strip().startswith(f"{env_var}="): + updated_env_lines.append(f"{env_var}={value}\n") + updated_vars.add(env_var) + line_updated = True + break + + if not line_updated: + updated_env_lines.append(line) + + # Add new env vars that weren't found in file + for env_var, value in env_vars.items(): + if value != '••••••••••••' and env_var not in updated_vars: + updated_env_lines.append(f"{env_var}={value}\n") + updated_vars.add(env_var) + + # Write updated .env + if updated_vars: + with open(env_path, 'w') as f: + f.writelines(updated_env_lines) + + updated_files.append(env_path) + logger.info(f"Updated {len(updated_vars)} environment variables in {env_path}") + + return { + "success": True, + "message": f"Plugin '{plugin_id}' configuration updated successfully. Restart backend for changes to take effect.", + "updated_files": updated_files, + "requires_restart": True, + "status": "success" + } + + except Exception as e: + logger.exception(f"Error updating structured config for plugin '{plugin_id}'") + raise e + + +async def test_plugin_connection(plugin_id: str, config: dict) -> dict: + """Test plugin connection/configuration without saving. + + Calls the plugin's test_connection method if available to validate + configuration (e.g., SMTP connection, Home Assistant API). + + Args: + plugin_id: Plugin identifier + config: Configuration to test (same structure as update_plugin_config_structured) + + Returns: + Test result with success status and details + """ + try: + from advanced_omi_backend.services.plugin_service import discover_plugins, expand_env_vars + + # Validate plugin exists + discovered_plugins = discover_plugins() + if plugin_id not in discovered_plugins: + raise ValueError(f"Plugin '{plugin_id}' not found") + + plugin_class = discovered_plugins[plugin_id] + + # Check if plugin supports testing + if not hasattr(plugin_class, 'test_connection'): + return { + "success": False, + "message": f"Plugin '{plugin_id}' does not support connection testing", + "status": "unsupported" + } + + # Build complete config from provided data + test_config = {} + + # Merge settings + if 'settings' in config: + test_config.update(config['settings']) + + # Add env vars (expand any ${ENV_VAR} references with test values) + if 'env_vars' in config: + for key, value in config['env_vars'].items(): + # Skip masked values + if value == '••••••••••••': + # Use actual env var value + value = os.getenv(key, '') + test_config[key.lower()] = value + + # Expand any remaining env var references + test_config = expand_env_vars(test_config) + + # Call plugin's test_connection static method + result = await plugin_class.test_connection(test_config) + + logger.info(f"Test connection for '{plugin_id}': {result.get('message', 'No message')}") + + return result + + except Exception as e: + logger.exception(f"Error testing connection for plugin '{plugin_id}'") + return { + "success": False, + "message": f"Connection test failed: {str(e)}", + "status": "error" + } diff --git a/backends/advanced/src/advanced_omi_backend/plugins/email_summarizer/plugin.py b/backends/advanced/src/advanced_omi_backend/plugins/email_summarizer/plugin.py index 02521d29..bb3965dc 100644 --- a/backends/advanced/src/advanced_omi_backend/plugins/email_summarizer/plugin.py +++ b/backends/advanced/src/advanced_omi_backend/plugins/email_summarizer/plugin.py @@ -46,6 +46,9 @@ class EmailSummarizerPlugin(BasePlugin): SUPPORTED_ACCESS_LEVELS: List[str] = ['conversation'] + name = "Email Summarizer" + description = "Sends email summaries when conversations complete" + def __init__(self, config: Dict[str, Any]): """ Initialize Email Summarizer plugin. @@ -286,3 +289,92 @@ def _format_subject(self, created_at: Optional[datetime] = None) -> str: return f"{self.subject_prefix} - {date_str}" else: return self.subject_prefix + + @staticmethod + async def test_connection(config: Dict[str, Any]) -> Dict[str, Any]: + """ + Test SMTP connection with provided configuration. + + This static method tests the SMTP connection without fully initializing the plugin. + Used by the form-based configuration UI to validate settings before saving. + + Args: + config: Configuration dictionary with SMTP settings + + Returns: + Dict with success status, message, and optional details + + Example: + >>> result = await EmailSummarizerPlugin.test_connection({ + ... 'smtp_host': 'smtp.gmail.com', + ... 'smtp_port': 587, + ... 'smtp_username': 'user@gmail.com', + ... 'smtp_password': 'password', + ... 'smtp_use_tls': True, + ... 'from_email': 'noreply@example.com', + ... 'from_name': 'Test' + ... }) + >>> result['success'] + True + """ + import time + + try: + # Validate required config fields + required_fields = ['smtp_host', 'smtp_username', 'smtp_password', 'from_email'] + missing_fields = [field for field in required_fields if not config.get(field)] + + if missing_fields: + return { + "success": False, + "message": f"Missing required fields: {', '.join(missing_fields)}", + "status": "error" + } + + # Build SMTP config + smtp_config = { + 'smtp_host': config.get('smtp_host'), + 'smtp_port': config.get('smtp_port', 587), + 'smtp_username': config.get('smtp_username'), + 'smtp_password': config.get('smtp_password'), + 'smtp_use_tls': config.get('smtp_use_tls', True), + 'from_email': config.get('from_email'), + 'from_name': config.get('from_name', 'Chronicle AI'), + } + + # Create temporary email service instance + email_service = SMTPEmailService(smtp_config) + + # Test connection + logger.info(f"Testing SMTP connection to {smtp_config['smtp_host']}...") + start_time = time.time() + + connection_success = await email_service.test_connection() + connection_time_ms = int((time.time() - start_time) * 1000) + + if connection_success: + return { + "success": True, + "message": f"Successfully connected to SMTP server at {smtp_config['smtp_host']}", + "status": "success", + "details": { + "smtp_host": smtp_config['smtp_host'], + "smtp_port": smtp_config['smtp_port'], + "connection_time_ms": connection_time_ms, + "use_tls": smtp_config['smtp_use_tls'] + } + } + else: + return { + "success": False, + "message": "SMTP connection test failed", + "status": "error" + } + + except Exception as e: + logger.error(f"SMTP connection test failed: {e}", exc_info=True) + return { + "success": False, + "message": f"Connection test failed: {str(e)}", + "status": "error" + } diff --git a/backends/advanced/src/advanced_omi_backend/plugins/homeassistant/plugin.py b/backends/advanced/src/advanced_omi_backend/plugins/homeassistant/plugin.py index 931dd813..13683194 100644 --- a/backends/advanced/src/advanced_omi_backend/plugins/homeassistant/plugin.py +++ b/backends/advanced/src/advanced_omi_backend/plugins/homeassistant/plugin.py @@ -30,6 +30,9 @@ class HomeAssistantPlugin(BasePlugin): SUPPORTED_ACCESS_LEVELS: List[str] = ['transcript'] + name = "Home Assistant" + description = "Wake word device control with Home Assistant integration" + def __init__(self, config: Dict[str, Any]): """ Initialize Home Assistant plugin. @@ -596,3 +599,94 @@ async def _parse_command_hybrid(self, command: str) -> Optional['ParsedCommand'] except Exception as e: logger.error(f"Fallback parsing failed: {e}", exc_info=True) return None + + @staticmethod + async def test_connection(config: Dict[str, Any]) -> Dict[str, Any]: + """ + Test Home Assistant API connection with provided configuration. + + This static method tests the HA API connection without fully initializing the plugin. + Used by the form-based configuration UI to validate settings before saving. + + Args: + config: Configuration dictionary with HA settings: + - ha_url: Home Assistant URL + - ha_token: Long-lived access token + - timeout: Request timeout (optional, default 30) + + Returns: + Dict with success status, message, and optional details + + Example: + >>> result = await HomeAssistantPlugin.test_connection({ + ... 'ha_url': 'http://homeassistant.local:8123', + ... 'ha_token': 'your_long_lived_token' + ... }) + >>> result['success'] + True + """ + import time + + try: + # Validate required config fields + required_fields = ['ha_url', 'ha_token'] + missing_fields = [field for field in required_fields if not config.get(field)] + + if missing_fields: + return { + "success": False, + "message": f"Missing required fields: {', '.join(missing_fields)}", + "status": "error" + } + + ha_url = config.get('ha_url') + ha_token = config.get('ha_token') + timeout = config.get('timeout', 30) + + # Create temporary MCP client + mcp_client = HAMCPClient( + base_url=ha_url, + token=ha_token, + timeout=timeout + ) + + # Test API connectivity with Template API + logger.info(f"Testing Home Assistant API connection to {ha_url}...") + start_time = time.time() + + test_result = await mcp_client._render_template("{{ 1 + 1 }}") + connection_time_ms = int((time.time() - start_time) * 1000) + + if str(test_result).strip() != "2": + return { + "success": False, + "message": f"Unexpected template result: {test_result}", + "status": "error" + } + + # Try to fetch entities count for additional info + try: + entities = await mcp_client.get_all_entities() + entity_count = len(entities) + except Exception: + entity_count = None + + return { + "success": True, + "message": f"Successfully connected to Home Assistant at {ha_url}", + "status": "success", + "details": { + "ha_url": ha_url, + "connection_time_ms": connection_time_ms, + "entity_count": entity_count, + "api_test": "Template rendering successful" + } + } + + except Exception as e: + logger.error(f"Home Assistant connection test failed: {e}", exc_info=True) + return { + "success": False, + "message": f"Connection test failed: {str(e)}", + "status": "error" + } diff --git a/backends/advanced/src/advanced_omi_backend/routers/modules/system_routes.py b/backends/advanced/src/advanced_omi_backend/routers/modules/system_routes.py index d68843ae..3e84ae57 100644 --- a/backends/advanced/src/advanced_omi_backend/routers/modules/system_routes.py +++ b/backends/advanced/src/advanced_omi_backend/routers/modules/system_routes.py @@ -251,6 +251,78 @@ async def validate_plugins_config( raise HTTPException(status_code=500, detail=str(e)) +# Structured Plugin Configuration Endpoints (Form-based UI) + +@router.get("/admin/plugins/metadata") +async def get_plugins_metadata(current_user: User = Depends(current_superuser)): + """Get plugin metadata for form-based configuration UI. Admin only. + + Returns: + - Plugin information (name, description, enabled status) + - Auto-generated schemas from config.yml + - Current configuration with masked secrets + - Orchestration settings (events, conditions) + """ + try: + return await system_controller.get_plugins_metadata() + except Exception as e: + logger.error(f"Failed to get plugins metadata: {e}") + raise HTTPException(status_code=500, detail=str(e)) + + +class PluginConfigRequest(BaseModel): + """Request model for structured plugin configuration updates.""" + orchestration: Optional[dict] = None + settings: Optional[dict] = None + env_vars: Optional[dict] = None + + +@router.post("/admin/plugins/config/structured/{plugin_id}") +async def update_plugin_config_structured( + plugin_id: str, + config: PluginConfigRequest, + current_user: User = Depends(current_superuser) +): + """Update plugin configuration from structured JSON (form data). Admin only. + + Updates the three-file plugin architecture: + 1. config/plugins.yml - Orchestration (enabled, events, condition) + 2. plugins/{plugin_id}/config.yml - Settings with ${ENV_VAR} references + 3. backends/advanced/.env - Actual secret values + """ + try: + config_dict = config.dict(exclude_none=True) + result = await system_controller.update_plugin_config_structured(plugin_id, config_dict) + return JSONResponse(content=result) + except ValueError as e: + raise HTTPException(status_code=400, detail=str(e)) + except Exception as e: + logger.error(f"Failed to update plugin config: {e}") + raise HTTPException(status_code=500, detail=str(e)) + + +@router.post("/admin/plugins/test-connection/{plugin_id}") +async def test_plugin_connection( + plugin_id: str, + config: PluginConfigRequest, + current_user: User = Depends(current_superuser) +): + """Test plugin connection/configuration without saving. Admin only. + + Calls the plugin's test_connection method to validate configuration + (e.g., SMTP connection, Home Assistant API). + """ + try: + config_dict = config.dict(exclude_none=True) + result = await system_controller.test_plugin_connection(plugin_id, config_dict) + return JSONResponse(content=result) + except ValueError as e: + raise HTTPException(status_code=400, detail=str(e)) + except Exception as e: + logger.error(f"Failed to test plugin connection: {e}") + raise HTTPException(status_code=500, detail=str(e)) + + @router.get("/streaming/status") async def get_streaming_status(request: Request, current_user: User = Depends(current_superuser)): """Get status of active streaming sessions and Redis Streams health. Admin only.""" diff --git a/backends/advanced/src/advanced_omi_backend/services/plugin_service.py b/backends/advanced/src/advanced_omi_backend/services/plugin_service.py index 1c661b92..1ca086f2 100644 --- a/backends/advanced/src/advanced_omi_backend/services/plugin_service.py +++ b/backends/advanced/src/advanced_omi_backend/services/plugin_service.py @@ -10,7 +10,7 @@ import os import re from pathlib import Path -from typing import Any, Dict, Optional, Type +from typing import Any, Dict, List, Optional, Type import yaml @@ -163,6 +163,284 @@ def set_plugin_router(router: PluginRouter) -> None: logger.info("Plugin router registered with plugin service") +def extract_env_var_name(value: str) -> Optional[str]: + """Extract environment variable name from ${ENV_VAR} or ${ENV_VAR:-default} syntax. + + Args: + value: String potentially containing ${ENV_VAR} reference + + Returns: + Environment variable name if found, None otherwise + + Examples: + >>> extract_env_var_name('${SMTP_HOST}') + 'SMTP_HOST' + >>> extract_env_var_name('${SMTP_PORT:-587}') + 'SMTP_PORT' + >>> extract_env_var_name('plain text') + None + """ + if not isinstance(value, str): + return None + + match = re.search(r'\$\{([^}:]+)', value) + if match: + return match.group(1).strip() + return None + + +def infer_field_type(key: str, value: Any) -> Dict[str, Any]: + """Infer field schema from config key and value. + + Args: + key: Configuration field key (e.g., 'smtp_password') + value: Configuration field value + + Returns: + Field schema dictionary with type, label, default, etc. + + Examples: + >>> infer_field_type('smtp_password', '${SMTP_PASSWORD}') + {'type': 'password', 'label': 'SMTP Password', 'secret': True, 'env_var': 'SMTP_PASSWORD', 'required': True} + + >>> infer_field_type('max_sentences', 3) + {'type': 'number', 'label': 'Max Sentences', 'default': 3} + """ + # Generate human-readable label from key + label = key.replace('_', ' ').title() + + # Check for environment variable reference + if isinstance(value, str) and '${' in value: + env_var = extract_env_var_name(value) + if not env_var: + return {'type': 'string', 'label': label, 'default': value} + + # Determine if this is a secret based on env var name + secret_keywords = ['PASSWORD', 'TOKEN', 'KEY', 'SECRET', 'APIKEY', 'API_KEY'] + is_secret = any(keyword in env_var.upper() for keyword in secret_keywords) + + # Extract default value if present (${VAR:-default}) + default_value = None + if ':-' in value: + default_match = re.search(r':-([^}]+)', value) + if default_match: + default_value = default_match.group(1).strip() + # Try to parse boolean/number defaults + if default_value.lower() in ('true', 'false'): + default_value = default_value.lower() == 'true' + elif default_value.isdigit(): + default_value = int(default_value) + + schema = { + 'type': 'password' if is_secret else 'string', + 'label': label, + 'secret': is_secret, + 'env_var': env_var, + 'required': is_secret, # Secrets are required + } + + if default_value is not None: + schema['default'] = default_value + schema['required'] = False + + return schema + + # Boolean values + elif isinstance(value, bool): + return {'type': 'boolean', 'label': label, 'default': value} + + # Numeric values + elif isinstance(value, int): + return {'type': 'number', 'label': label, 'default': value} + + elif isinstance(value, float): + return {'type': 'number', 'label': label, 'default': value, 'step': 0.1} + + # List values + elif isinstance(value, list): + return {'type': 'array', 'label': label, 'default': value} + + # Object/dict values + elif isinstance(value, dict): + return {'type': 'object', 'label': label, 'default': value} + + # String values (fallback) + else: + return {'type': 'string', 'label': label, 'default': str(value) if value is not None else ''} + + +def load_schema_yml(plugin_id: str) -> Optional[Dict[str, Any]]: + """Load optional schema.yml override for a plugin. + + Args: + plugin_id: Plugin identifier + + Returns: + Schema dictionary if schema.yml exists, None otherwise + """ + try: + import advanced_omi_backend.plugins + plugins_dir = Path(advanced_omi_backend.plugins.__file__).parent + schema_path = plugins_dir / plugin_id / "schema.yml" + + if schema_path.exists(): + logger.debug(f"Loading schema override from: {schema_path}") + with open(schema_path, 'r') as f: + return yaml.safe_load(f) or {} + except Exception as e: + logger.warning(f"Failed to load schema.yml for plugin '{plugin_id}': {e}") + + return None + + +def infer_schema_from_config(plugin_id: str, config_dict: Dict[str, Any]) -> Dict[str, Any]: + """Infer configuration schema from plugin config.yml. + + This function analyzes the config.yml file to generate a JSON schema + for rendering forms in the frontend. It can be overridden by providing + a schema.yml file in the plugin directory. + + Args: + plugin_id: Plugin identifier + config_dict: Configuration dictionary from config.yml + + Returns: + Schema dictionary with 'settings' and 'env_vars' sections + + Example: + >>> config = {'subject_prefix': 'Summary', 'smtp_password': '${SMTP_PASSWORD}'} + >>> schema = infer_schema_from_config('email_summarizer', config) + >>> schema['settings']['subject_prefix']['type'] + 'string' + >>> schema['env_vars']['SMTP_PASSWORD']['type'] + 'password' + """ + # Check for explicit schema.yml override + explicit_schema = load_schema_yml(plugin_id) + if explicit_schema: + logger.info(f"Using explicit schema.yml for plugin '{plugin_id}'") + return explicit_schema + + # Infer schema from config values + settings_schema = {} + env_vars_schema = {} + + for key, value in config_dict.items(): + field_schema = infer_field_type(key, value) + + # Separate env vars from regular settings + if field_schema.get('env_var'): + env_var_name = field_schema['env_var'] + env_vars_schema[env_var_name] = field_schema + else: + settings_schema[key] = field_schema + + return { + 'settings': settings_schema, + 'env_vars': env_vars_schema + } + + +def mask_secrets_in_config(config: Dict[str, Any], schema: Dict[str, Any]) -> Dict[str, Any]: + """Mask secret values in configuration for frontend display. + + Args: + config: Configuration dictionary with actual values + schema: Schema dictionary identifying secret fields + + Returns: + Configuration with secrets masked as '••••••••••••' + + Example: + >>> config = {'smtp_password': 'actual_password'} + >>> schema = {'env_vars': {'SMTP_PASSWORD': {'secret': True}}} + >>> masked = mask_secrets_in_config(config, schema) + >>> masked['smtp_password'] + '••••••••••••' + """ + masked_config = config.copy() + + # Get list of secret environment variable names + secret_env_vars = set() + for env_var, field_schema in schema.get('env_vars', {}).items(): + if field_schema.get('secret', False): + secret_env_vars.add(env_var) + + # Mask values that reference secret environment variables + for key, value in masked_config.items(): + if isinstance(value, str): + env_var = extract_env_var_name(value) + if env_var and env_var in secret_env_vars: + # Check if env var is actually set + is_set = bool(os.environ.get(env_var)) + masked_config[key] = '••••••••••••' if is_set else '' + + return masked_config + + +def get_plugin_metadata(plugin_id: str, plugin_class: Type[BasePlugin], + orchestration_config: Dict[str, Any]) -> Dict[str, Any]: + """Get complete metadata for a plugin including schema and current config. + + Args: + plugin_id: Plugin identifier + plugin_class: Plugin class type + orchestration_config: Orchestration config from plugins.yml + + Returns: + Complete plugin metadata for frontend + """ + # Load plugin config.yml + try: + import advanced_omi_backend.plugins + plugins_dir = Path(advanced_omi_backend.plugins.__file__).parent + plugin_config_path = plugins_dir / plugin_id / "config.yml" + + config_dict = {} + if plugin_config_path.exists(): + with open(plugin_config_path, 'r') as f: + config_dict = yaml.safe_load(f) or {} + except Exception as e: + logger.error(f"Failed to load config for plugin '{plugin_id}': {e}") + config_dict = {} + + # Infer schema + config_schema = infer_schema_from_config(plugin_id, config_dict) + + # Get plugin metadata from class + plugin_name = getattr(plugin_class, 'name', plugin_id.replace('_', ' ').title()) + plugin_description = getattr(plugin_class, 'description', '') + supports_testing = hasattr(plugin_class, 'test_connection') + + # Mask secrets in current config + current_config = load_plugin_config(plugin_id, orchestration_config) + masked_config = mask_secrets_in_config(current_config, config_schema) + + # Mark which env vars are set + for env_var_name, env_var_schema in config_schema.get('env_vars', {}).items(): + env_var_schema['is_set'] = bool(os.environ.get(env_var_name)) + if env_var_schema.get('secret') and env_var_schema['is_set']: + env_var_schema['value'] = '••••••••••••' + else: + env_var_schema['value'] = os.environ.get(env_var_name, '') + + return { + 'plugin_id': plugin_id, + 'name': plugin_name, + 'description': plugin_description, + 'enabled': orchestration_config.get('enabled', False), + 'status': 'active' if orchestration_config.get('enabled', False) else 'disabled', + 'supports_testing': supports_testing, + 'config_schema': config_schema, + 'current_config': masked_config, + 'orchestration': { + 'enabled': orchestration_config.get('enabled', False), + 'events': orchestration_config.get('events', []), + 'condition': orchestration_config.get('condition', {'type': 'always'}) + } + } + + def discover_plugins() -> Dict[str, Type[BasePlugin]]: """ Discover plugins in the plugins directory. diff --git a/backends/advanced/webui/src/components/PluginSettingsForm.tsx b/backends/advanced/webui/src/components/PluginSettingsForm.tsx new file mode 100644 index 00000000..9cfb9290 --- /dev/null +++ b/backends/advanced/webui/src/components/PluginSettingsForm.tsx @@ -0,0 +1,320 @@ +import { useState, useEffect } from 'react' +import { RefreshCw, AlertCircle } from 'lucide-react' +import { systemApi } from '../services/api' +import PluginListSidebar from './plugins/PluginListSidebar' +import PluginConfigPanel from './plugins/PluginConfigPanel' + +interface PluginMetadata { + plugin_id: string + name: string + description: string + enabled: boolean + status: string + supports_testing: boolean + config_schema: { + orchestration: any + settings: Record + env_vars: Record + } +} + +interface PluginConfig { + orchestration: { + enabled: boolean + events: string[] + condition: { + type: 'always' | 'wake_word' + wake_words?: string[] + } + } + settings: Record + env_vars: Record +} + +interface PluginSettingsFormProps { + className?: string +} + +export default function PluginSettingsForm({ className }: PluginSettingsFormProps) { + const [plugins, setPlugins] = useState([]) + const [selectedPluginId, setSelectedPluginId] = useState(null) + const [currentConfig, setCurrentConfig] = useState(null) + const [originalConfig, setOriginalConfig] = useState(null) + const [loading, setLoading] = useState(false) + const [testing, setTesting] = useState(false) + const [saving, setSaving] = useState(false) + const [errors, setErrors] = useState>({}) + const [message, setMessage] = useState('') + const [error, setError] = useState('') + const [testResult, setTestResult] = useState(null) + + const selectedPlugin = plugins.find((p) => p.plugin_id === selectedPluginId) + + useEffect(() => { + loadPlugins() + }, []) + + useEffect(() => { + if (selectedPluginId) { + loadPluginConfig(selectedPluginId) + } + }, [selectedPluginId]) + + const loadPlugins = async () => { + setLoading(true) + setError('') + setMessage('') + + try { + const response = await systemApi.getPluginsMetadata() + const pluginsData = response.data.plugins || [] + setPlugins(pluginsData) + + // Auto-select first plugin if none selected + if (!selectedPluginId && pluginsData.length > 0) { + setSelectedPluginId(pluginsData[0].plugin_id) + } + + setMessage('Plugins loaded successfully') + setTimeout(() => setMessage(''), 3000) + } catch (err: any) { + const status = err.response?.status + if (status === 401) { + setError('Unauthorized: admin privileges required') + } else if (status === 404 || status === 405) { + setError('Backend does not expose plugin configuration endpoints') + } else { + setError(err.response?.data?.detail || 'Failed to load plugins') + } + } finally { + setLoading(false) + } + } + + const loadPluginConfig = (pluginId: string) => { + const plugin = plugins.find((p) => p.plugin_id === pluginId) + if (!plugin) return + + // Extract current configuration from plugin metadata + const config: PluginConfig = { + orchestration: { + enabled: plugin.enabled || false, + events: [], + condition: { type: 'always' } + }, + settings: {}, + env_vars: {} + } + + // Load settings with defaults + Object.keys(plugin.config_schema.settings || {}).forEach((key) => { + const schema = plugin.config_schema.settings[key] + config.settings[key] = schema.default ?? '' + }) + + // Load env vars (will be masked values from backend) + Object.keys(plugin.config_schema.env_vars || {}).forEach((key) => { + const schema = plugin.config_schema.env_vars[key] + config.env_vars[key] = schema.value ?? '' + }) + + setCurrentConfig(config) + setOriginalConfig(JSON.parse(JSON.stringify(config))) + setErrors({}) + setTestResult(null) + } + + const handlePluginSelect = (pluginId: string) => { + setSelectedPluginId(pluginId) + } + + const handleToggleEnabled = async (pluginId: string, enabled: boolean) => { + try { + // Update the plugin's enabled state + const plugin = plugins.find((p) => p.plugin_id === pluginId) + if (!plugin) return + + await systemApi.updatePluginConfigStructured(pluginId, { + orchestration: { + enabled, + events: plugin.config_schema.orchestration?.events || [], + condition: plugin.config_schema.orchestration?.condition || { type: 'always' } + } + }) + + // Reload plugins to reflect changes + await loadPlugins() + setMessage(`Plugin ${enabled ? 'enabled' : 'disabled'} successfully`) + setTimeout(() => setMessage(''), 3000) + } catch (err: any) { + setError(err.response?.data?.detail || `Failed to ${enabled ? 'enable' : 'disable'} plugin`) + } + } + + const handleConfigChange = (config: PluginConfig) => { + setCurrentConfig(config) + setErrors({}) + } + + const handleTestConnection = async () => { + if (!selectedPluginId || !currentConfig) return + + setTesting(true) + setTestResult(null) + setError('') + + try { + const response = await systemApi.testPluginConnection(selectedPluginId, { + orchestration: currentConfig.orchestration, + settings: currentConfig.settings, + env_vars: currentConfig.env_vars + }) + + setTestResult(response.data) + + if (response.data.success) { + setMessage('Connection test successful') + setTimeout(() => setMessage(''), 3000) + } + } catch (err: any) { + const errorMessage = err.response?.data?.detail || 'Connection test failed' + setTestResult({ + success: false, + message: errorMessage + }) + setError(errorMessage) + } finally { + setTesting(false) + } + } + + const handleSave = async () => { + if (!selectedPluginId || !currentConfig) return + + setSaving(true) + setError('') + setMessage('') + setErrors({}) + + try { + // Filter out masked env vars (don't send unchanged secrets) + const envVarsToSend: Record = {} + Object.keys(currentConfig.env_vars).forEach((key) => { + const value = currentConfig.env_vars[key] + // Only send if value is not masked + if (typeof value !== 'string' || !value.includes('••••')) { + envVarsToSend[key] = value + } + }) + + const response = await systemApi.updatePluginConfigStructured(selectedPluginId, { + orchestration: currentConfig.orchestration, + settings: currentConfig.settings, + env_vars: Object.keys(envVarsToSend).length > 0 ? envVarsToSend : undefined + }) + + setMessage('Configuration saved successfully. Restart backend to apply changes.') + setTimeout(() => setMessage(''), 5000) + + // Reload plugins to reflect changes + await loadPlugins() + } catch (err: any) { + setError(err.response?.data?.detail || 'Failed to save configuration') + } finally { + setSaving(false) + } + } + + const handleReset = () => { + if (originalConfig) { + setCurrentConfig(JSON.parse(JSON.stringify(originalConfig))) + setErrors({}) + setTestResult(null) + setMessage('Configuration reset to original values') + setTimeout(() => setMessage(''), 3000) + } + } + + const handleRefresh = async () => { + await loadPlugins() + } + + return ( +
+
+ {/* Header */} +
+
+

+ Plugin Configuration +

+

+ Configure plugins, manage orchestration, and test connections +

+
+ +
+ + {/* Status Messages */} + {message && ( +
+

{message}

+
+ )} + + {error && ( +
+
+ +

{error}

+
+
+ )} + + {/* Main Content */} +
+ {/* Sidebar */} +
+ +
+ + {/* Config Panel */} +
+ {selectedPlugin && currentConfig ? ( + + ) : ( +
+

Select a plugin to configure

+
+ )} +
+
+
+
+ ) +} diff --git a/backends/advanced/webui/src/components/plugins/EnvVarsSection.tsx b/backends/advanced/webui/src/components/plugins/EnvVarsSection.tsx new file mode 100644 index 00000000..382baeca --- /dev/null +++ b/backends/advanced/webui/src/components/plugins/EnvVarsSection.tsx @@ -0,0 +1,91 @@ +import { Key } from 'lucide-react' +import FormField, { FieldSchema } from './FormField' + +interface EnvVarsSectionProps { + schema: Record + values: Record + onChange: (envVars: Record) => void + errors?: Record + disabled?: boolean +} + +export default function EnvVarsSection({ + schema, + values, + onChange, + errors = {}, + disabled = false +}: EnvVarsSectionProps) { + const envVarKeys = Object.keys(schema) + + if (envVarKeys.length === 0) { + return null + } + + const handleChange = (key: string, value: any) => { + onChange({ + ...values, + [key]: value + }) + } + + return ( +
+ {/* Section Header */} +
+ +

+ Secrets & Environment Variables +

+
+ +

+ Environment variables and secrets for this plugin. Values are stored securely and masked for display. +

+ +
+ {envVarKeys.map((key) => { + const fieldSchema = schema[key] + const value = values[key] + const error = errors[key] + + return ( +
+ handleChange(key, newValue)} + error={error} + disabled={disabled} + /> + + {fieldSchema.env_var && ( +
+ + ${fieldSchema.env_var} + + {fieldSchema.secret && ( + + 🔒 Stored securely in .env file + + )} +
+ )} +
+ ) + })} +
+ +
+

+ Note: Changing environment variables requires a backend restart to take effect. + Existing values are masked with •••••••• for security. +

+
+
+ ) +} diff --git a/backends/advanced/webui/src/components/plugins/FormField.tsx b/backends/advanced/webui/src/components/plugins/FormField.tsx new file mode 100644 index 00000000..60ebf0b2 --- /dev/null +++ b/backends/advanced/webui/src/components/plugins/FormField.tsx @@ -0,0 +1,216 @@ +import { useState } from 'react' +import { AlertCircle, Eye, EyeOff } from 'lucide-react' + +export interface FieldSchema { + type: 'string' | 'number' | 'boolean' | 'password' | 'enum' | 'array' + label: string + default?: any + required?: boolean + secret?: boolean + env_var?: string + min?: number + max?: number + help_text?: string + options?: Array<{ value: string; label: string }> +} + +interface FormFieldProps { + fieldKey: string + schema: FieldSchema + value: any + onChange: (value: any) => void + error?: string + disabled?: boolean +} + +export default function FormField({ + fieldKey, + schema, + value, + onChange, + error, + disabled = false +}: FormFieldProps) { + const [showPassword, setShowPassword] = useState(false) + const [isEditing, setIsEditing] = useState(false) + + const isMaskedValue = typeof value === 'string' && value.includes('••••') + + const renderField = () => { + switch (schema.type) { + case 'boolean': + return ( +
+ onChange(e.target.checked)} + disabled={disabled} + className="h-4 w-4 text-blue-600 focus:ring-blue-500 border-gray-300 rounded disabled:opacity-50" + /> + +
+ ) + + case 'number': + return ( +
+ + onChange(e.target.valueAsNumber || parseInt(e.target.value))} + min={schema.min} + max={schema.max} + disabled={disabled} + className="w-full px-3 py-2 border border-gray-300 dark:border-gray-600 rounded-md bg-white dark:bg-gray-700 text-gray-900 dark:text-gray-100 focus:outline-none focus:ring-2 focus:ring-blue-500 focus:border-blue-500 disabled:opacity-50 disabled:cursor-not-allowed" + /> + {schema.help_text && ( +

+ {schema.help_text} +

+ )} +
+ ) + + case 'password': + const displayValue = isMaskedValue && !isEditing ? value : value || '' + + return ( +
+ +
+ { + setIsEditing(true) + onChange(e.target.value) + }} + disabled={disabled} + placeholder={isMaskedValue ? 'Enter new value to change' : ''} + className="w-full px-3 py-2 pr-10 border border-gray-300 dark:border-gray-600 rounded-md bg-white dark:bg-gray-700 text-gray-900 dark:text-gray-100 focus:outline-none focus:ring-2 focus:ring-blue-500 focus:border-blue-500 disabled:opacity-50 disabled:cursor-not-allowed" + /> + +
+ {schema.help_text && ( +

+ {schema.help_text} +

+ )} + {isMaskedValue && !isEditing && ( +

+ Value is set. Enter new value to change. +

+ )} +
+ ) + + case 'enum': + return ( +
+ + + {schema.help_text && ( +

+ {schema.help_text} +

+ )} +
+ ) + + case 'string': + default: + return ( +
+ + onChange(e.target.value)} + disabled={disabled} + className="w-full px-3 py-2 border border-gray-300 dark:border-gray-600 rounded-md bg-white dark:bg-gray-700 text-gray-900 dark:text-gray-100 focus:outline-none focus:ring-2 focus:ring-blue-500 focus:border-blue-500 disabled:opacity-50 disabled:cursor-not-allowed" + /> + {schema.help_text && ( +

+ {schema.help_text} +

+ )} +
+ ) + } + } + + return ( +
+ {renderField()} + {error && ( +
+ +

{error}

+
+ )} +
+ ) +} diff --git a/backends/advanced/webui/src/components/plugins/OrchestrationSection.tsx b/backends/advanced/webui/src/components/plugins/OrchestrationSection.tsx new file mode 100644 index 00000000..f667143c --- /dev/null +++ b/backends/advanced/webui/src/components/plugins/OrchestrationSection.tsx @@ -0,0 +1,239 @@ +import { Zap } from 'lucide-react' + +interface OrchestrationConfig { + enabled: boolean + events: string[] + condition: { + type: 'always' | 'wake_word' + wake_words?: string[] + } +} + +interface OrchestrationSectionProps { + config: OrchestrationConfig + onChange: (config: OrchestrationConfig) => void + disabled?: boolean +} + +const AVAILABLE_EVENTS = [ + { value: 'conversation.complete', label: 'Conversation Complete' }, + { value: 'transcript.streaming', label: 'Transcript Streaming' }, + { value: 'memory.created', label: 'Memory Created' }, +] + +export default function OrchestrationSection({ + config, + onChange, + disabled = false +}: OrchestrationSectionProps) { + const handleEnabledChange = (enabled: boolean) => { + onChange({ ...config, enabled }) + } + + const handleEventToggle = (event: string) => { + const events = config.events.includes(event) + ? config.events.filter((e) => e !== event) + : [...config.events, event] + onChange({ ...config, events }) + } + + const handleConditionTypeChange = (type: 'always' | 'wake_word') => { + onChange({ + ...config, + condition: { + type, + wake_words: type === 'wake_word' ? config.condition.wake_words || [] : undefined + } + }) + } + + const handleWakeWordsChange = (value: string) => { + const wake_words = value.split(',').map((w) => w.trim()).filter(Boolean) + onChange({ + ...config, + condition: { + ...config.condition, + wake_words + } + }) + } + + return ( +
+ {/* Section Header */} +
+ +

+ Orchestration +

+
+ + {/* Enable Plugin Toggle */} +
+
+ +

+ Activate this plugin for event processing +

+
+ +
+ + {/* Events Selection */} +
+ +

+ Select which events should trigger this plugin +

+
+ {AVAILABLE_EVENTS.map((event) => ( + + ))} +
+
+ + {/* Condition Type */} +
+ +

+ When should this plugin execute? +

+
+ + + +
+
+ + {/* Wake Words Input (conditional) */} + {config.condition.type === 'wake_word' && ( +
+ + !disabled && handleWakeWordsChange(e.target.value)} + placeholder="e.g., hey jarvis, ok assistant" + disabled={disabled} + className="w-full px-3 py-2 border border-gray-300 dark:border-gray-600 rounded-md bg-white dark:bg-gray-700 text-gray-900 dark:text-gray-100 focus:outline-none focus:ring-2 focus:ring-blue-500 focus:border-blue-500 disabled:opacity-50 disabled:cursor-not-allowed" + /> +

+ Comma-separated list of wake words (case-insensitive) +

+
+ )} +
+ ) +} diff --git a/backends/advanced/webui/src/components/plugins/PluginConfigPanel.tsx b/backends/advanced/webui/src/components/plugins/PluginConfigPanel.tsx new file mode 100644 index 00000000..e3cf2089 --- /dev/null +++ b/backends/advanced/webui/src/components/plugins/PluginConfigPanel.tsx @@ -0,0 +1,287 @@ +import { useState } from 'react' +import { Settings, CheckCircle, XCircle, Loader2 } from 'lucide-react' +import OrchestrationSection from './OrchestrationSection' +import EnvVarsSection from './EnvVarsSection' +import FormField, { FieldSchema } from './FormField' + +interface PluginMetadata { + plugin_id: string + name: string + description: string + enabled: boolean + status: string + supports_testing: boolean + config_schema: { + orchestration: any + settings: Record + env_vars: Record + } +} + +interface PluginConfig { + orchestration: { + enabled: boolean + events: string[] + condition: { + type: 'always' | 'wake_word' + wake_words?: string[] + } + } + settings: Record + env_vars: Record +} + +interface PluginConfigPanelProps { + plugin: PluginMetadata + config: PluginConfig + onChange: (config: PluginConfig) => void + onTestConnection?: () => Promise + onSave: () => Promise + onReset: () => void + errors?: Record + testResult?: { success: boolean; message: string; details?: any } | null + testing?: boolean + saving?: boolean + disabled?: boolean +} + +export default function PluginConfigPanel({ + plugin, + config, + onChange, + onTestConnection, + onSave, + onReset, + errors = {}, + testResult = null, + testing = false, + saving = false, + disabled = false +}: PluginConfigPanelProps) { + const [activeTab, setActiveTab] = useState<'orchestration' | 'settings' | 'secrets'>('orchestration') + + const handleOrchestrationChange = (orchestration: any) => { + onChange({ ...config, orchestration }) + } + + const handleSettingsChange = (key: string, value: any) => { + onChange({ + ...config, + settings: { ...config.settings, [key]: value } + }) + } + + const handleEnvVarsChange = (envVars: Record) => { + onChange({ ...config, env_vars: envVars }) + } + + const settingsKeys = Object.keys(plugin.config_schema.settings || {}) + const hasSettings = settingsKeys.length > 0 + const hasEnvVars = Object.keys(plugin.config_schema.env_vars || {}).length > 0 + + return ( +
+ {/* Plugin Header */} +
+

+ {plugin.name} +

+

+ {plugin.description} +

+
+ + {/* Tabs */} +
+ + {hasSettings && ( + + )} + {hasEnvVars && ( + + )} +
+ + {/* Tab Content */} +
+ {activeTab === 'orchestration' && ( + + )} + + {activeTab === 'settings' && hasSettings && ( +
+
+ +

+ Plugin Settings +

+
+ +
+ {settingsKeys.map((key) => { + const fieldSchema = plugin.config_schema.settings[key] + const value = config.settings[key] + const error = errors[`settings.${key}`] + + return ( + handleSettingsChange(key, newValue)} + error={error} + disabled={disabled} + /> + ) + })} +
+
+ )} + + {activeTab === 'secrets' && hasEnvVars && ( + + )} +
+ + {/* Test Result Display */} + {testResult && ( +
+
+ {testResult.success ? ( + + ) : ( + + )} +
+

+ {testResult.message} +

+ {testResult.details && ( +
+                  {JSON.stringify(testResult.details, null, 2)}
+                
+ )} +
+
+
+ )} + + {/* Action Buttons */} +
+
+ {plugin.supports_testing && onTestConnection && ( + + )} + + + + +
+
+
+ ) +} diff --git a/backends/advanced/webui/src/components/plugins/PluginListSidebar.tsx b/backends/advanced/webui/src/components/plugins/PluginListSidebar.tsx new file mode 100644 index 00000000..bf842620 --- /dev/null +++ b/backends/advanced/webui/src/components/plugins/PluginListSidebar.tsx @@ -0,0 +1,162 @@ +import { CheckCircle, Circle, AlertTriangle } from 'lucide-react' + +interface Plugin { + plugin_id: string + name: string + description: string + enabled: boolean + status: 'active' | 'disabled' | 'error' +} + +interface PluginListSidebarProps { + plugins: Plugin[] + selectedPluginId: string | null + onSelectPlugin: (pluginId: string) => void + onToggleEnabled: (pluginId: string, enabled: boolean) => void + loading?: boolean +} + +export default function PluginListSidebar({ + plugins, + selectedPluginId, + onSelectPlugin, + onToggleEnabled, + loading = false +}: PluginListSidebarProps) { + const getStatusIcon = (status: string, enabled: boolean) => { + if (!enabled) { + return + } + + switch (status) { + case 'active': + return + case 'error': + return + default: + return + } + } + + const getStatusBadge = (status: string, enabled: boolean) => { + if (!enabled) { + return ( + + Disabled + + ) + } + + switch (status) { + case 'active': + return ( + + Active + + ) + case 'error': + return ( + + Error + + ) + default: + return ( + + Unknown + + ) + } + } + + if (loading) { + return ( +
+ {[1, 2, 3].map((i) => ( +
+ ))} +
+ ) + } + + if (plugins.length === 0) { + return ( +
+

No plugins found

+
+ ) + } + + return ( +
+ {plugins.map((plugin) => { + const isSelected = selectedPluginId === plugin.plugin_id + + return ( +
onSelectPlugin(plugin.plugin_id)} + className={` + p-4 rounded-lg border cursor-pointer transition-all + ${ + isSelected + ? 'border-blue-500 bg-blue-50 dark:bg-blue-900/20' + : 'border-gray-200 dark:border-gray-700 hover:border-gray-300 dark:hover:border-gray-600 bg-white dark:bg-gray-800' + } + `} + > + {/* Plugin Header */} +
+
+ {getStatusIcon(plugin.status, plugin.enabled)} +
+

+ {plugin.name} +

+
+
+
+ + {/* Plugin Description */} +

+ {plugin.description} +

+ + {/* Plugin Status and Toggle */} +
+ {getStatusBadge(plugin.status, plugin.enabled)} + + +
+
+ ) + })} +
+ ) +} diff --git a/backends/advanced/webui/src/pages/Plugins.tsx b/backends/advanced/webui/src/pages/Plugins.tsx index f28921f5..adb85930 100644 --- a/backends/advanced/webui/src/pages/Plugins.tsx +++ b/backends/advanced/webui/src/pages/Plugins.tsx @@ -1,9 +1,39 @@ +import { useState } from 'react' +import { Code, Layout } from 'lucide-react' import PluginSettings from '../components/PluginSettings' +import PluginSettingsForm from '../components/PluginSettingsForm' export default function Plugins() { + const [useFormUI, setUseFormUI] = useState(true) + return ( -
- +
+ {/* Toggle Button */} +
+ +
+ + {/* Content */} + {useFormUI ? ( + + ) : ( + + )}
) } diff --git a/backends/advanced/webui/src/services/api.ts b/backends/advanced/webui/src/services/api.ts index 88ce8f58..cd54473c 100644 --- a/backends/advanced/webui/src/services/api.ts +++ b/backends/advanced/webui/src/services/api.ts @@ -217,7 +217,7 @@ export const systemApi = { headers: { 'Content-Type': 'text/plain' } }), - // Plugin Configuration Management + // Plugin Configuration Management (YAML-based) getPluginsConfigRaw: () => api.get('/api/admin/plugins/config'), updatePluginsConfigRaw: (configYaml: string) => api.post('/api/admin/plugins/config', configYaml, { @@ -228,6 +228,27 @@ export const systemApi = { headers: { 'Content-Type': 'text/plain' } }), + // Plugin Configuration Management (Structured/Form-based) + getPluginsMetadata: () => api.get('/api/admin/plugins/metadata'), + updatePluginConfigStructured: (pluginId: string, config: { + orchestration?: { + enabled: boolean + events: string[] + condition: { type: string; wake_words?: string[] } + } + settings?: Record + env_vars?: Record + }) => api.post(`/api/admin/plugins/config/structured/${pluginId}`, config), + testPluginConnection: (pluginId: string, config: { + orchestration?: { + enabled: boolean + events: string[] + condition: { type: string; wake_words?: string[] } + } + settings?: Record + env_vars?: Record + }) => api.post(`/api/admin/plugins/test-connection/${pluginId}`, config), + // Memory Provider Management getMemoryProvider: () => api.get('/api/admin/memory/provider'), setMemoryProvider: (provider: string) => api.post('/api/admin/memory/provider', { provider }), From e32ea9f290228c7cc86c48f8a627a2aee00147de Mon Sep 17 00:00:00 2001 From: Ankush Malaker <43288948+AnkushMalaker@users.noreply.github.com> Date: Fri, 23 Jan 2026 21:48:53 +0000 Subject: [PATCH 03/10] fix --- backends/advanced/webui/src/components/PluginSettingsForm.tsx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/backends/advanced/webui/src/components/PluginSettingsForm.tsx b/backends/advanced/webui/src/components/PluginSettingsForm.tsx index 9cfb9290..1611a025 100644 --- a/backends/advanced/webui/src/components/PluginSettingsForm.tsx +++ b/backends/advanced/webui/src/components/PluginSettingsForm.tsx @@ -9,7 +9,7 @@ interface PluginMetadata { name: string description: string enabled: boolean - status: string + status: 'active' | 'disabled' | 'error' supports_testing: boolean config_schema: { orchestration: any @@ -207,7 +207,7 @@ export default function PluginSettingsForm({ className }: PluginSettingsFormProp } }) - const response = await systemApi.updatePluginConfigStructured(selectedPluginId, { + await systemApi.updatePluginConfigStructured(selectedPluginId, { orchestration: currentConfig.orchestration, settings: currentConfig.settings, env_vars: Object.keys(envVarsToSend).length > 0 ? envVarsToSend : undefined From fa10c5e40d7a35b3c31666495e40f146809bcce9 Mon Sep 17 00:00:00 2001 From: Ankush Malaker <43288948+AnkushMalaker@users.noreply.github.com> Date: Tue, 27 Jan 2026 13:42:35 +0000 Subject: [PATCH 04/10] Enhance audio processing and conversation management with always_persist feature - Updated Docker Compose configuration to include mock streaming services for testing. - Introduced `always_persist` flag in audio stream and conversation management, ensuring audio is saved even if transcription fails. - Enhanced conversation model to track processing status and persist audio data, improving reliability in audio handling. - Added integration tests to verify the functionality of the always_persist feature, ensuring audio is correctly stored in various scenarios. - Improved logging for audio processing and conversation state transitions to facilitate debugging and monitoring. --- backends/advanced/docker-compose-test.yml | 26 ++ .../clients/audio_stream_client.py | 24 +- .../controllers/conversation_controller.py | 8 + .../controllers/queue_controller.py | 5 +- .../controllers/system_controller.py | 6 +- .../controllers/websocket_controller.py | 269 +++++++++++++- .../models/conversation.py | 32 ++ .../plugins/email_summarizer/email_service.py | 42 ++- .../plugins/email_summarizer/plugin.py | 23 +- .../services/transcription/__init__.py | 20 ++ .../services/transcription/mock_provider.py | 41 ++- .../transcription/streaming_consumer.py | 4 +- .../utils/logging_utils.py | 262 ++++++++++++++ .../workers/audio_jobs.py | 55 ++- .../workers/conversation_jobs.py | 99 +++++- .../workers/transcription_jobs.py | 66 +++- .../ConversationVersionDropdown.tsx | 14 +- .../components/ConversationVersionHeader.tsx | 2 + .../src/components/plugins/FormField.tsx | 19 +- .../src/hooks/useSimpleAudioRecording.ts | 42 ++- .../webui/src/pages/Conversations.tsx | 6 +- .../advanced/webui/src/pages/LiveRecord.tsx | 29 +- config/defaults.yml | 4 + tests/Makefile | 94 ++++- tests/bin/start-containers.sh | 8 + tests/configs/mock-services.yml | 58 +++- .../always_persist_audio_tests.robot | 321 +++++++++++++++++ tests/libs/audio_stream_library.py | 4 + tests/libs/mock_llm_server.py | 328 ++++++++++++++++++ tests/libs/mock_streaming_stt_server.py | 190 ++++++++++ tests/resources/conversation_keywords.robot | 42 +++ tests/resources/redis_keywords.robot | 47 +++ tests/resources/system_keywords.robot | 48 ++- tests/resources/websocket_keywords.robot | 39 +++ 34 files changed, 2186 insertions(+), 91 deletions(-) create mode 100644 backends/advanced/src/advanced_omi_backend/utils/logging_utils.py create mode 100644 tests/integration/always_persist_audio_tests.robot create mode 100755 tests/libs/mock_llm_server.py create mode 100755 tests/libs/mock_streaming_stt_server.py diff --git a/backends/advanced/docker-compose-test.yml b/backends/advanced/docker-compose-test.yml index 7e0ad0b6..73be45be 100644 --- a/backends/advanced/docker-compose-test.yml +++ b/backends/advanced/docker-compose-test.yml @@ -164,6 +164,32 @@ services: profiles: - speaker # Optional service - only start when explicitly enabled + mock-streaming-stt: + build: + context: ../.. + dockerfile: tests/Dockerfile.mock-streaming-stt + ports: + - "9999:9999" + healthcheck: + test: ["CMD", "python", "-c", "import socket; s=socket.socket(); s.connect(('localhost',9999)); s.close()"] + interval: 10s + timeout: 5s + retries: 3 + restart: unless-stopped + + mock-llm: + build: + context: ../.. + dockerfile: tests/Dockerfile.mock-llm + ports: + - "11435:11435" + healthcheck: + test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:11435/health').read()"] + interval: 10s + timeout: 5s + retries: 3 + restart: unless-stopped + workers-test: build: context: . diff --git a/backends/advanced/src/advanced_omi_backend/clients/audio_stream_client.py b/backends/advanced/src/advanced_omi_backend/clients/audio_stream_client.py index ee33b86c..1f3c695a 100644 --- a/backends/advanced/src/advanced_omi_backend/clients/audio_stream_client.py +++ b/backends/advanced/src/advanced_omi_backend/clients/audio_stream_client.py @@ -123,6 +123,7 @@ async def send_audio_start( sample_rate: int = OMI_SAMPLE_RATE, sample_width: int = OMI_SAMPLE_WIDTH, channels: int = OMI_CHANNELS, + always_persist: bool = False, ) -> None: """Send Wyoming audio-start event. @@ -131,6 +132,7 @@ async def send_audio_start( sample_rate: Audio sample rate in Hz (default: 16000) sample_width: Bytes per sample (default: 2 for 16-bit) channels: Number of audio channels (default: 1) + always_persist: Save audio even if transcription fails (default: False) Note: The mode is inside the "data" dict, matching _handle_audio_session_start @@ -146,11 +148,15 @@ async def send_audio_start( "width": sample_width, "channels": channels, "mode": recording_mode, + "always_persist": always_persist, }, "payload_length": None, } + print(f"🔵 CLIENT: Sending audio-start message: {header}") + logger.info(f"🔵 CLIENT: Sending audio-start message: {header}") await self.ws.send(json.dumps(header) + "\n") - logger.info(f"Sent audio-start with mode={recording_mode}") + print(f"✅ CLIENT: Sent audio-start with mode={recording_mode}, always_persist={always_persist}") + logger.info(f"✅ CLIENT: Sent audio-start with mode={recording_mode}, always_persist={always_persist}") async def send_audio_chunk_wyoming( self, @@ -232,6 +238,7 @@ async def stream_wav_file( use_wyoming: bool = True, recording_mode: str = "streaming", realtime_factor: float = 0.1, + always_persist: bool = False, ) -> int: """Stream a WAV file in chunks, simulating real-time audio. @@ -241,6 +248,7 @@ async def stream_wav_file( use_wyoming: If True, use Wyoming protocol; if False, send raw binary recording_mode: "streaming" or "batch" realtime_factor: Fraction of real-time to simulate (0.1 = 10x speed) + always_persist: Save audio even if transcription fails (default: False) Returns: Number of chunks sent @@ -268,6 +276,7 @@ async def stream_wav_file( sample_rate=sample_rate, sample_width=sample_width, channels=channels, + always_persist=always_persist, ) # Reset counters @@ -335,6 +344,7 @@ def stream_audio_file( device_name: str = "robot-test", recording_mode: str = "streaming", use_wyoming: bool = True, + always_persist: bool = False, ) -> int: """Synchronous wrapper for streaming audio file. @@ -348,6 +358,7 @@ def stream_audio_file( device_name: Device name for client identification recording_mode: "streaming" or "batch" use_wyoming: If True, use Wyoming protocol + always_persist: Save audio even if transcription fails (default: False) Returns: Number of chunks sent @@ -359,6 +370,7 @@ async def _run() -> int: wav_path, use_wyoming=use_wyoming, recording_mode=recording_mode, + always_persist=always_persist, ) return asyncio.run(_run()) @@ -407,6 +419,7 @@ def start_stream( token: str, device_name: str = "robot-test", recording_mode: str = "streaming", + always_persist: bool = False, ) -> str: """Start a new audio stream (non-blocking). @@ -415,6 +428,7 @@ def start_stream( token: JWT token device_name: Device name for client ID recording_mode: "streaming" or "batch" + always_persist: Save audio even if transcription fails (default: False) Returns: stream_id: Unique ID for this stream session @@ -440,14 +454,16 @@ def run_loop(): # Connect and send audio-start async def _connect_and_start(): try: + logger.info(f"🔵 CLIENT: Stream {stream_id} connecting for {device_name}...") await client.connect() session.connected = True - await client.send_audio_start(recording_mode=recording_mode) + logger.info(f"✅ CLIENT: Stream {stream_id} connected, sending audio-start...") + await client.send_audio_start(recording_mode=recording_mode, always_persist=always_persist) session.audio_started = True - logger.info(f"Stream {stream_id} started for {device_name}") + logger.info(f"✅ CLIENT: Stream {stream_id} started for {device_name}") except Exception as e: session.error = str(e) - logger.error(f"Stream {stream_id} failed to start: {e}") + logger.error(f"❌ CLIENT: Stream {stream_id} failed to start: {e}") future = asyncio.run_coroutine_threadsafe(_connect_and_start(), loop) future.result(timeout=10) # Wait for connection diff --git a/backends/advanced/src/advanced_omi_backend/controllers/conversation_controller.py b/backends/advanced/src/advanced_omi_backend/controllers/conversation_controller.py index e6aac85a..c142aeee 100644 --- a/backends/advanced/src/advanced_omi_backend/controllers/conversation_controller.py +++ b/backends/advanced/src/advanced_omi_backend/controllers/conversation_controller.py @@ -118,6 +118,8 @@ async def get_conversation(conversation_id: str, user: User): "deleted": conversation.deleted, "deletion_reason": conversation.deletion_reason, "deleted_at": conversation.deleted_at.isoformat() if conversation.deleted_at else None, + "processing_status": conversation.processing_status, + "always_persist": conversation.always_persist, "end_reason": conversation.end_reason.value if conversation.end_reason else None, "completed_at": conversation.completed_at.isoformat() if conversation.completed_at else None, "title": conversation.title, @@ -133,6 +135,8 @@ async def get_conversation(conversation_id: str, user: User): "active_memory_version": conversation.active_memory_version, "transcript_version_count": conversation.transcript_version_count, "memory_version_count": conversation.memory_version_count, + "active_transcript_version_number": conversation.active_transcript_version_number, + "active_memory_version_number": conversation.active_memory_version_number, } return {"conversation": response} @@ -182,6 +186,8 @@ async def get_conversations(user: User, include_deleted: bool = False): "deleted": conv.deleted, "deletion_reason": conv.deletion_reason, "deleted_at": conv.deleted_at.isoformat() if conv.deleted_at else None, + "processing_status": conv.processing_status, + "always_persist": conv.always_persist, "title": conv.title, "summary": conv.summary, "detailed_summary": conv.detailed_summary, @@ -193,6 +199,8 @@ async def get_conversations(user: User, include_deleted: bool = False): "memory_count": conv.memory_count, "transcript_version_count": conv.transcript_version_count, "memory_version_count": conv.memory_version_count, + "active_transcript_version_number": conv.active_transcript_version_number, + "active_memory_version_number": conv.active_memory_version_number, }) return {"conversations": conversations} diff --git a/backends/advanced/src/advanced_omi_backend/controllers/queue_controller.py b/backends/advanced/src/advanced_omi_backend/controllers/queue_controller.py index 70b9b336..fdf1ec7b 100644 --- a/backends/advanced/src/advanced_omi_backend/controllers/queue_controller.py +++ b/backends/advanced/src/advanced_omi_backend/controllers/queue_controller.py @@ -325,7 +325,8 @@ def is_job_complete(job): def start_streaming_jobs( session_id: str, user_id: str, - client_id: str + client_id: str, + always_persist: bool = False ) -> Dict[str, str]: """ Enqueue jobs for streaming audio session (initial session setup). @@ -338,6 +339,7 @@ def start_streaming_jobs( session_id: Stream session ID (equals client_id for streaming) user_id: User identifier client_id: Client identifier + always_persist: Whether to create placeholder conversation immediately (default: False) Returns: Dict with job IDs: {'speech_detection': job_id, 'audio_persistence': job_id} @@ -386,6 +388,7 @@ def start_streaming_jobs( session_id, user_id, client_id, + always_persist, job_timeout=86400, # 24 hours for all-day sessions ttl=None, # No pre-run expiry (job can wait indefinitely in queue) result_ttl=JOB_RESULT_TTL, # Cleanup AFTER completion diff --git a/backends/advanced/src/advanced_omi_backend/controllers/system_controller.py b/backends/advanced/src/advanced_omi_backend/controllers/system_controller.py index 7831fc40..9cceb509 100644 --- a/backends/advanced/src/advanced_omi_backend/controllers/system_controller.py +++ b/backends/advanced/src/advanced_omi_backend/controllers/system_controller.py @@ -127,7 +127,7 @@ async def get_config_diagnostics(): "component": "STT (Batch)", "severity": "warning", "message": f"{stt.name} ({stt.model_provider}) - No API key configured", - "resolution": "Transcription will fail without API key" + "resolution": "Transcription can fail without API key" }) else: diagnostics["issues"].append({ @@ -150,7 +150,7 @@ async def get_config_diagnostics(): "component": "STT (Streaming)", "severity": "warning", "message": f"{stt_stream.name} ({stt_stream.model_provider}) - No API key configured", - "resolution": "Real-time transcription will fail without API key" + "resolution": "Real-time transcription can fail without API key" }) else: diagnostics["warnings"].append({ @@ -172,7 +172,7 @@ async def get_config_diagnostics(): "component": "LLM", "severity": "warning", "message": f"{llm.name} ({llm.model_provider}) - No API key configured", - "resolution": "Memory extraction will fail without API key" + "resolution": "Memory extraction can fail without API key" }) else: diff --git a/backends/advanced/src/advanced_omi_backend/controllers/websocket_controller.py b/backends/advanced/src/advanced_omi_backend/controllers/websocket_controller.py index 8cd3319b..541210c3 100644 --- a/backends/advanced/src/advanced_omi_backend/controllers/websocket_controller.py +++ b/backends/advanced/src/advanced_omi_backend/controllers/websocket_controller.py @@ -16,6 +16,7 @@ from typing import Optional from fastapi import WebSocket, WebSocketDisconnect, Query +from starlette.websockets import WebSocketState from friend_lite.decoder import OmiOpusDecoder import redis.asyncio as redis @@ -266,6 +267,26 @@ async def cleanup_client_state(client_id: str): stream_pattern = f"audio:stream:{client_id}" stream_key = await async_redis.exists(stream_pattern) if stream_key: + # Check how many messages are in the stream + stream_length = await async_redis.xlen(stream_pattern) + + # Check for pending messages in consumer groups + pending_count = 0 + try: + # Check streaming-transcription consumer group for pending messages + pending_info = await async_redis.xpending(stream_pattern, "streaming-transcription") + if pending_info: + pending_count = pending_info.get('pending', 0) + except Exception as e: + # Consumer group might not exist yet - that's ok + logger.debug(f"No consumer group for {stream_pattern}: {e}") + + if stream_length > 0 or pending_count > 0: + logger.warning( + f"⚠️ Closing {stream_pattern} with unprocessed data: " + f"{stream_length} messages in stream, {pending_count} pending in consumer group" + ) + await async_redis.expire(stream_pattern, 60) # 60 second TTL for consumer group fan-out logger.info(f"⏰ Set 60s TTL on Redis stream: {stream_pattern}") else: @@ -378,6 +399,10 @@ async def _initialize_streaming_session( Returns: Interim results subscriber task if websocket provided and session initialized, None otherwise """ + application_logger.info( + f"🔴 BACKEND: _initialize_streaming_session called for {client_id}" + ) + if hasattr(client_state, 'stream_session_id'): application_logger.debug(f"Session already initialized for {client_id}") return None @@ -426,10 +451,14 @@ async def _initialize_streaming_session( # Enqueue streaming jobs (speech detection + audio persistence) from advanced_omi_backend.controllers.queue_controller import start_streaming_jobs + # Get always_persist flag from client state + always_persist_flag = getattr(client_state, 'always_persist', False) + job_ids = start_streaming_jobs( session_id=client_state.stream_session_id, user_id=user_id, - client_id=client_id + client_id=client_id, + always_persist=always_persist_flag ) # Store job IDs in Redis session (not in ClientState) @@ -439,6 +468,9 @@ async def _initialize_streaming_session( audio_persistence_job_id=job_ids['audio_persistence'] ) + # Note: Placeholder conversation creation (if always_persist=True) is now handled + # by the audio persistence job itself, making it self-sufficient. + # Launch interim results subscriber if WebSocket provided subscriber_task = None if websocket: @@ -682,7 +714,7 @@ async def _handle_batch_mode_audio( client_id: str ) -> None: """ - Handle audio chunk in batch mode - accumulate in memory. + Handle audio chunk in batch mode with rolling 30-minute limit. Args: client_state: Client state object @@ -694,14 +726,53 @@ async def _handle_batch_mode_audio( if not hasattr(client_state, 'batch_audio_chunks'): client_state.batch_audio_chunks = [] client_state.batch_audio_format = audio_format + client_state.batch_audio_bytes = 0 # Track total bytes + client_state.batch_chunks_processed = 0 # Track how many batches processed application_logger.info(f"📦 Started batch audio accumulation for {client_id}") # Accumulate audio client_state.batch_audio_chunks.append(audio_data) + client_state.batch_audio_bytes += len(audio_data) application_logger.debug( f"📦 Accumulated chunk #{len(client_state.batch_audio_chunks)} ({len(audio_data)} bytes) for {client_id}" ) + # Calculate duration: sample_rate * width * channels = bytes/second + sample_rate = audio_format.get("rate", 16000) + width = audio_format.get("width", 2) + channels = audio_format.get("channels", 1) + bytes_per_second = sample_rate * width * channels + + accumulated_seconds = client_state.batch_audio_bytes / bytes_per_second + MAX_BATCH_SECONDS = 30 * 60 # 30 minutes + + # Check if we've hit the 30-minute limit + if accumulated_seconds >= MAX_BATCH_SECONDS: + application_logger.warning( + f"⚠️ Batch accumulation reached 30-minute limit " + f"({accumulated_seconds:.1f}s, {client_state.batch_audio_bytes / 1024 / 1024:.1f} MB). " + f"Processing batch #{client_state.batch_chunks_processed + 1}..." + ) + + # Process this batch (will create conversation and transcribe) + await _process_rolling_batch( + client_state, + user_id=client_state.user_id, # Need to store these on session start + user_email=client_state.user_email, + client_id=client_state.client_id, + batch_number=client_state.batch_chunks_processed + 1 + ) + + # Clear buffer for next batch + client_state.batch_audio_chunks = [] + client_state.batch_audio_bytes = 0 + client_state.batch_chunks_processed += 1 + + application_logger.info( + f"✅ Rolled batch #{client_state.batch_chunks_processed}. " + f"Starting fresh accumulation for next 30 minutes." + ) + async def _handle_audio_chunk( client_state, @@ -747,28 +818,88 @@ async def _handle_audio_chunk( async def _handle_audio_session_start( client_state, audio_format: dict, - client_id: str + client_id: str, + websocket: Optional[WebSocket] = None ) -> tuple[bool, str]: """ - Handle audio-start event - set mode and switch to audio streaming. + Handle audio-start event - validate mode, set recording mode, and extract always_persist flag. Args: client_state: Client state object - audio_format: Audio format dict with mode + audio_format: Audio format dict with mode and always_persist client_id: Client ID + websocket: Optional WebSocket connection (for WebUI error messages) Returns: (audio_streaming_flag, recording_mode) """ + from advanced_omi_backend.services.transcription import is_transcription_available + recording_mode = audio_format.get("mode", "batch") + always_persist = audio_format.get("always_persist", False) + + application_logger.info( + f"🔴 BACKEND: Received audio-start for {client_id} - " + f"mode={recording_mode}, always_persist={always_persist}, full format={audio_format}" + ) + + # Store on client state for later use client_state.recording_mode = recording_mode + client_state.always_persist = always_persist + + # VALIDATION: Check if streaming mode is available + if recording_mode == "streaming": + if not is_transcription_available("streaming"): + error_msg = ( + "Streaming transcription not available. " + "Please use Batch mode or configure a streaming STT provider (defaults.stt_stream in config.yml)." + ) + + application_logger.warning( + f"⚠️ Streaming mode requested but stt_stream not configured for {client_id}" + ) + + # Send error to WebSocket client (for WebUI display) + if websocket and websocket.client_state == WebSocketState.CONNECTED: + try: + error_response = { + "type": "error", + "error": "streaming_not_configured", + "message": error_msg, + "code": 400 + } + await websocket.send_json(error_response) + application_logger.info(f"📤 Sent streaming error to WebUI client {client_id}") + + # Close the websocket connection after sending error + await websocket.close(code=1008, reason="Streaming transcription not configured") + application_logger.info(f"🔌 Closed WebSocket connection for {client_id} due to streaming config error") + + # Raise ValueError to exit the handler completely + raise ValueError(error_msg) + except ValueError: + # Re-raise ValueError to exit handler + raise + except Exception as e: + application_logger.error(f"Failed to send error to client: {e}") + # Still raise ValueError to exit handler + raise ValueError(error_msg) + + # For OMI devices (no websocket), fall back to batch mode silently + if not websocket: + application_logger.warning( + f"🔄 OMI device {client_id} requested streaming but falling back to batch mode" + ) + recording_mode = "batch" + client_state.recording_mode = recording_mode application_logger.info( f"🎙️ Audio session started for {client_id} - " f"Format: {audio_format.get('rate')}Hz, " f"{audio_format.get('width')}bytes, " f"{audio_format.get('channels')}ch, " - f"Mode: {recording_mode}" + f"Mode: {recording_mode}, " + f"Always Persist: {always_persist}" ) return True, recording_mode # Switch to audio streaming mode @@ -810,6 +941,99 @@ async def _handle_audio_session_stop( return False # Switch back to control mode +async def _process_rolling_batch( + client_state, + user_id: str, + user_email: str, + client_id: str, + batch_number: int +) -> None: + """ + Process accumulated batch audio as a rolling segment. + + Creates conversation titled "Recording Part {batch_number}" and enqueues transcription. + + Args: + client_state: Client state with batch_audio_chunks + user_id: User ID + user_email: User email + client_id: Client ID + batch_number: Sequential batch number (1, 2, 3...) + """ + if not hasattr(client_state, 'batch_audio_chunks') or not client_state.batch_audio_chunks: + application_logger.warning(f"⚠️ No audio chunks to process for rolling batch") + return + + try: + from advanced_omi_backend.models.conversation import create_conversation + from advanced_omi_backend.utils.audio_chunk_utils import convert_audio_to_chunks + + # Combine chunks + complete_audio = b''.join(client_state.batch_audio_chunks) + application_logger.info( + f"📦 Rolling batch #{batch_number}: Combined {len(client_state.batch_audio_chunks)} chunks " + f"into {len(complete_audio)} bytes" + ) + + # Get audio format + audio_format = getattr(client_state, 'batch_audio_format', {}) + sample_rate = audio_format.get("rate", 16000) + width = audio_format.get("width", 2) + channels = audio_format.get("channels", 1) + + # Create conversation with batch number in title + conversation = create_conversation( + user_id=user_id, + client_id=client_id, + title=f"Recording Part {batch_number}", + summary="Rolling batch processing..." + ) + await conversation.insert() + conversation_id = conversation.conversation_id # Get the auto-generated ID + + # Convert to MongoDB chunks + num_chunks = await convert_audio_to_chunks( + conversation_id=conversation_id, + audio_data=complete_audio, + sample_rate=sample_rate, + channels=channels, + sample_width=width + ) + + # Enqueue transcription job + from advanced_omi_backend.controllers.queue_controller import ( + transcription_queue, + JOB_RESULT_TTL + ) + from advanced_omi_backend.workers.transcription_jobs import transcribe_full_audio_job + + version_id = str(uuid.uuid4()) + transcribe_job_id = f"transcribe_rolling_{conversation_id[:12]}_{batch_number}" + + transcription_job = transcription_queue.enqueue( + transcribe_full_audio_job, + conversation_id, + version_id, + f"rolling_batch_{batch_number}", # trigger + job_timeout=1800, # 30 minutes + result_ttl=JOB_RESULT_TTL, + job_id=transcribe_job_id, + description=f"Transcribe rolling batch #{batch_number} {conversation_id[:8]}", + meta={'conversation_id': conversation_id, 'client_id': client_id, 'batch_number': batch_number} + ) + + application_logger.info( + f"✅ Rolling batch #{batch_number} created conversation {conversation_id}, " + f"enqueued transcription job {transcription_job.id}" + ) + + except Exception as e: + application_logger.error( + f"❌ Failed to process rolling batch #{batch_number}: {e}", + exc_info=True + ) + + async def _process_batch_audio_complete( client_state, user_id: str, @@ -977,7 +1201,14 @@ async def handle_omi_websocket( if header["type"] == "audio-start": # Handle audio session start + application_logger.info(f"🔴 BACKEND: Received audio-start in OMI MODE for {client_id} (header={header})") application_logger.info(f"🎙️ OMI audio session started for {client_id}") + + # Store user context on client state + client_state.user_id = user.user_id + client_state.user_email = user.email + client_state.client_id = client_id + interim_subscriber_task = await _initialize_streaming_session( client_state, audio_stream_producer, @@ -1111,13 +1342,35 @@ async def handle_pcm_websocket( application_logger.debug(f"✅ Received message type: {header.get('type')} for {client_id}") if header["type"] == "audio-start": + application_logger.info(f"🔴 BACKEND: Received audio-start in CONTROL MODE for {client_id}") application_logger.debug(f"🎙️ Processing audio-start for {client_id}") - # Handle audio session start using helper function + + # Store user context on client state for rolling batch processing + client_state.user_id = user.user_id + client_state.user_email = user.email + client_state.client_id = client_id + + # Handle audio session start using helper function (pass websocket for error handling) audio_streaming, recording_mode = await _handle_audio_session_start( client_state, header.get("data", {}), - client_id + client_id, + websocket=ws # Pass websocket for WebUI error display ) + + # Initialize streaming session (for always_persist and job setup) + if recording_mode == "streaming": + application_logger.info(f"🔴 BACKEND: Initializing streaming session for {client_id}") + interim_subscriber_task = await _initialize_streaming_session( + client_state, + audio_stream_producer, + user.user_id, + user.email, + client_id, + header.get("data", {}), + websocket=ws + ) + continue # Continue to audio streaming mode elif header["type"] == "ping": diff --git a/backends/advanced/src/advanced_omi_backend/models/conversation.py b/backends/advanced/src/advanced_omi_backend/models/conversation.py index 1ee9a57a..e4446f0f 100644 --- a/backends/advanced/src/advanced_omi_backend/models/conversation.py +++ b/backends/advanced/src/advanced_omi_backend/models/conversation.py @@ -125,6 +125,16 @@ class MemoryVersion(BaseModel): deletion_reason: Optional[str] = Field(None, description="Reason for deletion (no_meaningful_speech, audio_file_not_ready, etc.)") deleted_at: Optional[datetime] = Field(None, description="When the conversation was marked as deleted") + # Always persist audio flag and processing status + processing_status: Optional[str] = Field( + None, + description="Processing status: pending_transcription, transcription_failed, completed" + ) + always_persist: bool = Field( + default=False, + description="Flag indicating conversation was created for audio persistence" + ) + # Conversation completion tracking end_reason: Optional["Conversation.EndReason"] = Field(None, description="Reason why the conversation ended") completed_at: Optional[datetime] = Field(None, description="When the conversation was completed/closed") @@ -256,6 +266,28 @@ def memory_version_count(self) -> int: """Get count of memory versions.""" return len(self.memory_versions) + @computed_field + @property + def active_transcript_version_number(self) -> Optional[int]: + """Get 1-based version number of the active transcript version.""" + if not self.active_transcript_version: + return None + for i, version in enumerate(self.transcript_versions): + if version.version_id == self.active_transcript_version: + return i + 1 + return None + + @computed_field + @property + def active_memory_version_number(self) -> Optional[int]: + """Get 1-based version number of the active memory version.""" + if not self.active_memory_version: + return None + for i, version in enumerate(self.memory_versions): + if version.version_id == self.active_memory_version: + return i + 1 + return None + def add_transcript_version( self, version_id: str, diff --git a/backends/advanced/src/advanced_omi_backend/plugins/email_summarizer/email_service.py b/backends/advanced/src/advanced_omi_backend/plugins/email_summarizer/email_service.py index be2d389e..b51de0b5 100644 --- a/backends/advanced/src/advanced_omi_backend/plugins/email_summarizer/email_service.py +++ b/backends/advanced/src/advanced_omi_backend/plugins/email_summarizer/email_service.py @@ -14,6 +14,8 @@ from email.mime.text import MIMEText from typing import Any, Dict, Optional +from advanced_omi_backend.utils.logging_utils import mask_dict + logger = logging.getLogger(__name__) @@ -49,10 +51,13 @@ def __init__(self, config: Dict[str, Any]): "smtp_password, from_email" ) + # Log configuration with masked secrets + masked_config = mask_dict(config) logger.info( f"SMTP Email Service initialized: {self.username}@{self.host}:{self.port} " f"(TLS: {self.use_tls})" ) + logger.debug(f"SMTP config: {masked_config}") async def send_email( self, @@ -151,25 +156,38 @@ def _test_smtp_connection(self) -> None: Raises: Exception: If connection fails """ - if self.use_tls: - smtp_server = smtplib.SMTP(self.host, self.port, timeout=10) - smtp_server.ehlo() - smtp_server.starttls() - smtp_server.ehlo() - else: - smtp_server = smtplib.SMTP(self.host, self.port, timeout=10) - try: - smtp_server.login(self.username, self.password) - logger.debug("SMTP authentication successful") - finally: - smtp_server.quit() + if self.use_tls: + smtp_server = smtplib.SMTP(self.host, self.port, timeout=10) + smtp_server.ehlo() + smtp_server.starttls() + smtp_server.ehlo() + else: + smtp_server = smtplib.SMTP(self.host, self.port, timeout=10) + + try: + smtp_server.login(self.username, self.password) + logger.debug("SMTP authentication successful") + finally: + smtp_server.quit() + except smtplib.SMTPAuthenticationError as e: + # Note: Error message from smtplib should not contain password, but be cautious + raise Exception(f"SMTP Authentication failed for {self.username}. Check credentials. For Gmail, use an App Password instead of your regular password. Error: {str(e)}") + except smtplib.SMTPConnectError as e: + raise Exception(f"Failed to connect to SMTP server {self.host}:{self.port}. Check host and port. Error: {str(e)}") + except smtplib.SMTPServerDisconnected as e: + raise Exception(f"SMTP server disconnected unexpectedly. Check TLS settings (port 587 needs TLS, port 465 needs SSL). Error: {str(e)}") + except TimeoutError as e: + raise Exception(f"Connection to {self.host}:{self.port} timed out. Check firewall/network settings. Error: {str(e)}") + except Exception as e: + raise Exception(f"SMTP connection test failed: {type(e).__name__}: {str(e)}") # Test script for development/debugging async def main(): """Test the SMTP email service.""" import os + from dotenv import load_dotenv load_dotenv() diff --git a/backends/advanced/src/advanced_omi_backend/plugins/email_summarizer/plugin.py b/backends/advanced/src/advanced_omi_backend/plugins/email_summarizer/plugin.py index bb3965dc..a61a915d 100644 --- a/backends/advanced/src/advanced_omi_backend/plugins/email_summarizer/plugin.py +++ b/backends/advanced/src/advanced_omi_backend/plugins/email_summarizer/plugin.py @@ -9,6 +9,7 @@ from advanced_omi_backend.database import get_database from advanced_omi_backend.llm_client import async_generate +from advanced_omi_backend.utils.logging_utils import mask_dict from ..base import BasePlugin, PluginContext, PluginResult from .email_service import SMTPEmailService @@ -342,6 +343,9 @@ async def test_connection(config: Dict[str, Any]) -> Dict[str, Any]: 'from_name': config.get('from_name', 'Chronicle AI'), } + # Log config with masked secrets for debugging + logger.debug(f"SMTP config for testing: {mask_dict(smtp_config)}") + # Create temporary email service instance email_service = SMTPEmailService(smtp_config) @@ -373,8 +377,23 @@ async def test_connection(config: Dict[str, Any]) -> Dict[str, Any]: except Exception as e: logger.error(f"SMTP connection test failed: {e}", exc_info=True) + error_msg = str(e) + + # Provide helpful hints based on error type + hints = [] + if "Authentication" in error_msg or "535" in error_msg: + hints.append("For Gmail: Enable 2FA and create an App Password at https://myaccount.google.com/apppasswords") + hints.append("Verify your username and password are correct") + elif "Connection" in error_msg or "timeout" in error_msg.lower(): + hints.append("Check your SMTP host and port settings") + hints.append("Verify firewall/network allows outbound SMTP connections") + elif "TLS" in error_msg or "SSL" in error_msg: + hints.append("For port 587: Enable TLS") + hints.append("For port 465: Disable TLS (uses implicit SSL)") + return { "success": False, - "message": f"Connection test failed: {str(e)}", - "status": "error" + "message": f"Connection test failed: {error_msg}", + "status": "error", + "hints": hints } diff --git a/backends/advanced/src/advanced_omi_backend/services/transcription/__init__.py b/backends/advanced/src/advanced_omi_backend/services/transcription/__init__.py index 4bfae311..99b79a6f 100644 --- a/backends/advanced/src/advanced_omi_backend/services/transcription/__init__.py +++ b/backends/advanced/src/advanced_omi_backend/services/transcription/__init__.py @@ -70,6 +70,12 @@ def name(self) -> str: return self._name async def transcribe(self, audio_data: bytes, sample_rate: int, diarize: bool = False) -> dict: + # Special handling for mock provider (no HTTP server needed) + if self.model.model_provider == "mock": + from .mock_provider import MockTranscriptionProvider + mock = MockTranscriptionProvider(fail_mode=False) + return await mock.transcribe(audio_data, sample_rate, diarize) + op = (self.model.operations or {}).get("stt_transcribe") or {} method = (op.get("method") or "POST").upper() path = (op.get("path") or "/listen") @@ -372,9 +378,23 @@ def is_transcription_available(mode: str = "batch") -> bool: return provider is not None +def get_mock_transcription_provider(fail_mode: bool = False) -> BaseTranscriptionProvider: + """Return a mock transcription provider (for testing only). + + Args: + fail_mode: If True, transcribe() will raise an exception to simulate transcription failure + + Returns: + MockTranscriptionProvider instance + """ + from .mock_provider import MockTranscriptionProvider + return MockTranscriptionProvider(fail_mode=fail_mode) + + __all__ = [ "get_transcription_provider", "is_transcription_available", + "get_mock_transcription_provider", "RegistryBatchTranscriptionProvider", "RegistryStreamingTranscriptionProvider", "BaseTranscriptionProvider", diff --git a/backends/advanced/src/advanced_omi_backend/services/transcription/mock_provider.py b/backends/advanced/src/advanced_omi_backend/services/transcription/mock_provider.py index f6a2d9c0..04b192df 100644 --- a/backends/advanced/src/advanced_omi_backend/services/transcription/mock_provider.py +++ b/backends/advanced/src/advanced_omi_backend/services/transcription/mock_provider.py @@ -17,9 +17,15 @@ class MockTranscriptionProvider(BatchTranscriptionProvider): Useful for testing API contracts and data flow without external APIs. """ - def __init__(self): - """Initialize the mock transcription provider.""" + def __init__(self, fail_mode: bool = False): + """ + Initialize the mock transcription provider. + + Args: + fail_mode: If True, transcribe() will raise an exception to simulate transcription failure + """ self._is_connected = False + self.fail_mode = fail_mode @property def name(self) -> str: @@ -28,7 +34,7 @@ def name(self) -> str: async def transcribe(self, audio_data: bytes, sample_rate: int, diarize: bool = False) -> dict: """ - Return a predefined mock transcript. + Return a predefined mock transcript or raise exception in fail mode. Args: audio_data: Raw audio bytes (ignored in mock) @@ -37,15 +43,26 @@ async def transcribe(self, audio_data: bytes, sample_rate: int, diarize: bool = Returns: Dictionary containing predefined transcript with words and segments + + Raises: + RuntimeError: If fail_mode is True (simulates transcription failure) """ + # Simulate transcription failure if fail_mode is enabled + if self.fail_mode: + raise RuntimeError("Mock transcription failure (test mode)") + # Calculate audio duration from bytes (assuming 16-bit PCM) audio_duration = len(audio_data) / (sample_rate * 2) # 2 bytes per sample # Return a mock transcript with word-level timestamps # This simulates a real transcription result - mock_transcript = "This is a mock transcription for testing purposes." + # Note: Made longer to pass test requirements (>100 chars) + mock_transcript = ( + "This is a mock transcription for testing purposes. " + "It contains enough words to meet minimum length requirements for automated testing." + ) - # Generate mock words with timestamps + # Generate mock words with timestamps (spread across audio duration) words = [ {"word": "This", "start": 0.0, "end": 0.3, "confidence": 0.99, "speaker": 0}, {"word": "is", "start": 0.3, "end": 0.5, "confidence": 0.99, "speaker": 0}, @@ -55,6 +72,18 @@ async def transcribe(self, audio_data: bytes, sample_rate: int, diarize: bool = {"word": "for", "start": 1.5, "end": 1.7, "confidence": 0.99, "speaker": 0}, {"word": "testing", "start": 1.7, "end": 2.1, "confidence": 0.99, "speaker": 0}, {"word": "purposes", "start": 2.1, "end": 2.6, "confidence": 0.97, "speaker": 0}, + {"word": "It", "start": 2.6, "end": 2.8, "confidence": 0.99, "speaker": 0}, + {"word": "contains", "start": 2.8, "end": 3.2, "confidence": 0.99, "speaker": 0}, + {"word": "enough", "start": 3.2, "end": 3.5, "confidence": 0.99, "speaker": 0}, + {"word": "words", "start": 3.5, "end": 3.8, "confidence": 0.99, "speaker": 0}, + {"word": "to", "start": 3.8, "end": 3.9, "confidence": 0.99, "speaker": 0}, + {"word": "meet", "start": 3.9, "end": 4.1, "confidence": 0.99, "speaker": 0}, + {"word": "minimum", "start": 4.1, "end": 4.5, "confidence": 0.98, "speaker": 0}, + {"word": "length", "start": 4.5, "end": 4.8, "confidence": 0.99, "speaker": 0}, + {"word": "requirements", "start": 4.8, "end": 5.4, "confidence": 0.98, "speaker": 0}, + {"word": "for", "start": 5.4, "end": 5.6, "confidence": 0.99, "speaker": 0}, + {"word": "automated", "start": 5.6, "end": 6.1, "confidence": 0.98, "speaker": 0}, + {"word": "testing", "start": 6.1, "end": 6.5, "confidence": 0.99, "speaker": 0}, ] # Mock segments (single speaker for simplicity) @@ -62,7 +91,7 @@ async def transcribe(self, audio_data: bytes, sample_rate: int, diarize: bool = { "speaker": 0, "start": 0.0, - "end": 2.6, + "end": 6.5, "text": mock_transcript } ] diff --git a/backends/advanced/src/advanced_omi_backend/services/transcription/streaming_consumer.py b/backends/advanced/src/advanced_omi_backend/services/transcription/streaming_consumer.py index 83cb9d12..b6c05ae8 100644 --- a/backends/advanced/src/advanced_omi_backend/services/transcription/streaming_consumer.py +++ b/backends/advanced/src/advanced_omi_backend/services/transcription/streaming_consumer.py @@ -524,8 +524,8 @@ async def start_consuming(self): asyncio.create_task(self.process_stream(stream_name)) logger.info(f"✅ Now consuming from {stream_name} (group: {self.group_name})") - # Sleep before next discovery cycle - await asyncio.sleep(5) + # Sleep before next discovery cycle (1s for fast discovery) + await asyncio.sleep(1) except Exception as e: logger.error(f"Fatal error in consumer main loop: {e}", exc_info=True) diff --git a/backends/advanced/src/advanced_omi_backend/utils/logging_utils.py b/backends/advanced/src/advanced_omi_backend/utils/logging_utils.py new file mode 100644 index 00000000..3d15f49c --- /dev/null +++ b/backends/advanced/src/advanced_omi_backend/utils/logging_utils.py @@ -0,0 +1,262 @@ +""" +Logging utilities for secure logging with secret masking. + +Provides functions to mask sensitive information in logs to prevent +accidental exposure of credentials, tokens, and other secrets. +""" +import re +from typing import Any, Dict, List, Optional, Set, Union + + +# Common patterns for identifying secret field names +SECRET_KEYWORDS = [ + 'PASSWORD', 'PASSWD', 'PWD', + 'TOKEN', 'AUTH', 'AUTHORIZATION', + 'KEY', 'APIKEY', 'API_KEY', 'SECRET', + 'CREDENTIAL', 'CRED', + 'PRIVATE', 'CERTIFICATE', 'CERT' +] + +# Default mask for secrets +SECRET_MASK = '••••••••' + + +def is_secret_field(field_name: str, additional_keywords: Optional[List[str]] = None) -> bool: + """ + Check if a field name indicates a secret value. + + Args: + field_name: The field/key name to check + additional_keywords: Optional additional keywords to check for + + Returns: + True if field name matches secret patterns + + Examples: + >>> is_secret_field('smtp_password') + True + >>> is_secret_field('smtp_host') + False + >>> is_secret_field('api_token') + True + """ + field_upper = field_name.upper() + + # Check default keywords + for keyword in SECRET_KEYWORDS: + if keyword in field_upper: + return True + + # Check additional keywords if provided + if additional_keywords: + for keyword in additional_keywords: + if keyword.upper() in field_upper: + return True + + return False + + +def mask_dict( + data: Dict[str, Any], + mask: str = SECRET_MASK, + secret_fields: Optional[Set[str]] = None, + additional_keywords: Optional[List[str]] = None +) -> Dict[str, Any]: + """ + Mask secret values in a dictionary for safe logging. + + Args: + data: Dictionary to mask + mask: String to use for masked values (default: '••••••••') + secret_fields: Explicit set of field names to mask (case-insensitive) + additional_keywords: Additional keywords to identify secret fields + + Returns: + New dictionary with secrets masked + + Examples: + >>> config = {'smtp_host': 'smtp.gmail.com', 'smtp_password': 'secret123'} + >>> mask_dict(config) + {'smtp_host': 'smtp.gmail.com', 'smtp_password': '••••••••'} + + >>> mask_dict({'token': 'abc123'}, secret_fields={'token'}) + {'token': '••••••••'} + """ + masked = {} + secret_fields_lower = {f.lower() for f in (secret_fields or set())} + + for key, value in data.items(): + # Check if this is a secret field + is_secret = ( + key.lower() in secret_fields_lower or + is_secret_field(key, additional_keywords) + ) + + if is_secret and value: + # Mask non-empty secret values + masked[key] = mask + elif isinstance(value, dict): + # Recursively mask nested dictionaries + masked[key] = mask_dict(value, mask, secret_fields, additional_keywords) + elif isinstance(value, list): + # Handle lists of dictionaries + masked[key] = [ + mask_dict(item, mask, secret_fields, additional_keywords) + if isinstance(item, dict) else item + for item in value + ] + else: + # Keep non-secret values as-is + masked[key] = value + + return masked + + +def mask_string( + text: str, + patterns: Optional[List[str]] = None, + mask: str = SECRET_MASK +) -> str: + """ + Mask sensitive patterns in strings (e.g., tokens in error messages). + + Args: + text: String to mask + patterns: List of regex patterns to match and mask + mask: String to use for masked values + + Returns: + String with matched patterns masked + + Examples: + >>> mask_string('Token: abc123def456', patterns=[r'Token: \w+']) + 'Token: ••••••••' + + >>> mask_string('password=secret123', patterns=[r'password=\S+']) + 'password=••••••••' + """ + if not patterns: + # Default patterns for common secret formats + patterns = [ + r'password[=:]\s*\S+', + r'token[=:]\s*\S+', + r'key[=:]\s*\S+', + r'secret[=:]\s*\S+', + r'api[_-]?key[=:]\s*\S+', + ] + + masked_text = text + for pattern in patterns: + # Replace the value part after the = or : with mask + masked_text = re.sub( + pattern, + lambda m: re.sub(r'([=:])\s*\S+', r'\1' + mask, m.group(0)), + masked_text, + flags=re.IGNORECASE + ) + + return masked_text + + +def safe_log_config( + config: Dict[str, Any], + name: str = "Configuration", + mask: str = SECRET_MASK, + secret_fields: Optional[Set[str]] = None, + additional_keywords: Optional[List[str]] = None +) -> str: + """ + Create a safe log message for configuration with masked secrets. + + Args: + config: Configuration dictionary + name: Name for the configuration (e.g., "SMTP Config") + mask: String to use for masked values + secret_fields: Explicit set of field names to mask + additional_keywords: Additional keywords to identify secret fields + + Returns: + Formatted string safe for logging + + Examples: + >>> config = {'host': 'smtp.gmail.com', 'password': 'secret', 'port': 587} + >>> safe_log_config(config, "SMTP") + "SMTP: {'host': 'smtp.gmail.com', 'password': '••••••••', 'port': 587}" + """ + masked = mask_dict(config, mask, secret_fields, additional_keywords) + return f"{name}: {masked}" + + +def mask_connection_string(connection_string: str, mask: str = SECRET_MASK) -> str: + """ + Mask credentials in connection strings (URLs, DSNs). + + Args: + connection_string: Connection string that may contain credentials + mask: String to use for masked values + + Returns: + Connection string with credentials masked + + Examples: + >>> mask_connection_string('mongodb://user:pass123@localhost:27017/db') + 'mongodb://user:••••••••@localhost:27017/db' + + >>> mask_connection_string('postgresql://admin:secret@db.example.com/mydb') + 'postgresql://admin:••••••••@db.example.com/mydb' + """ + # Pattern: protocol://username:password@host + return re.sub( + r'([a-zA-Z][a-zA-Z0-9+.-]*://[^:]+:)[^@]+(@)', + r'\1' + mask + r'\2', + connection_string + ) + + +def create_masked_repr( + obj: Any, + secret_attrs: Set[str], + mask: str = SECRET_MASK +) -> str: + """ + Create a string representation of an object with masked secret attributes. + + Useful for __repr__ methods in classes that contain secrets. + + Args: + obj: Object to represent + secret_attrs: Set of attribute names that are secrets + mask: String to use for masked values + + Returns: + String representation with secrets masked + + Examples: + >>> class Config: + ... def __init__(self): + ... self.host = 'smtp.gmail.com' + ... self.password = 'secret123' + >>> + >>> config = Config() + >>> create_masked_repr(config, {'password'}) + "Config(host='smtp.gmail.com', password='••••••••')" + """ + class_name = obj.__class__.__name__ + attrs = [] + + for key in dir(obj): + # Skip private/magic attributes and methods + if key.startswith('_') or callable(getattr(obj, key)): + continue + + value = getattr(obj, key) + + # Mask secret attributes + if key in secret_attrs: + value_repr = f"'{mask}'" + else: + value_repr = repr(value) + + attrs.append(f"{key}={value_repr}") + + return f"{class_name}({', '.join(attrs)})" diff --git a/backends/advanced/src/advanced_omi_backend/workers/audio_jobs.py b/backends/advanced/src/advanced_omi_backend/workers/audio_jobs.py index 8505d547..26089c2a 100644 --- a/backends/advanced/src/advanced_omi_backend/workers/audio_jobs.py +++ b/backends/advanced/src/advanced_omi_backend/workers/audio_jobs.py @@ -26,6 +26,7 @@ async def audio_streaming_persistence_job( session_id: str, user_id: str, client_id: str, + always_persist: bool = False, *, redis_client=None ) -> Dict[str, Any]: @@ -41,6 +42,7 @@ async def audio_streaming_persistence_job( session_id: Stream session ID user_id: User ID client_id: Client ID + always_persist: Whether to create placeholder conversation immediately (default: False) redis_client: Redis client (injected by decorator) Returns: @@ -68,6 +70,53 @@ async def audio_streaming_persistence_job( logger.warning(f"Failed to create audio consumer group: {e}") logger.debug(f"Audio consumer group already exists for {audio_stream_name}") + # If always_persist enabled, create placeholder conversation if it doesn't exist + if always_persist: + conversation_key = f"conversation:current:{session_id}" + existing_conversation_id = await redis_client.get(conversation_key) + + if not existing_conversation_id: + logger.info( + f"📝 always_persist=True - creating placeholder conversation for session {session_id[:12]}" + ) + + # Import conversation model + from advanced_omi_backend.models.conversation import Conversation + + # Create placeholder conversation + conversation = Conversation( + user_id=user_id, + client_id=client_id, + title="Audio Recording (Processing...)", + summary="Transcription in progress...", + transcript_versions=[], + memory_versions=[], + processing_status="pending_transcription", + always_persist=True + ) + await conversation.insert() + + # Set conversation:current Redis key + await redis_client.set( + conversation_key, + conversation.conversation_id, + ex=3600 # 1 hour expiry + ) + + logger.info( + f"✅ Created placeholder conversation {conversation.conversation_id} " + f"and set Redis key {conversation_key}" + ) + else: + logger.info( + f"📋 always_persist=True - placeholder conversation already exists: " + f"{existing_conversation_id.decode()}" + ) + else: + logger.info( + f"🔍 always_persist=False - will wait for speech detection to create conversation" + ) + # Job control session_key = f"audio:session:{session_id}" max_runtime = 86340 # 24 hours - 60 seconds (graceful exit before RQ timeout) @@ -384,8 +433,10 @@ async def flush_pcm_buffer(): # Clean up Redis tracking keys audio_job_key = f"audio_persistence:session:{session_id}" await redis_client.delete(audio_job_key) - conversation_key = f"conversation:current:{session_id}" - await redis_client.delete(conversation_key) + + # NOTE: Do NOT delete conversation:current:{session_id} key here! + # It's needed for speech detection to reuse placeholder conversations (always_persist feature). + # The key already has a TTL (3600s) set when created and will expire automatically. logger.info(f"🧹 Cleaned up tracking keys for session {session_id}") return { diff --git a/backends/advanced/src/advanced_omi_backend/workers/conversation_jobs.py b/backends/advanced/src/advanced_omi_backend/workers/conversation_jobs.py index febdfbd8..86c7b464 100644 --- a/backends/advanced/src/advanced_omi_backend/workers/conversation_jobs.py +++ b/backends/advanced/src/advanced_omi_backend/workers/conversation_jobs.py @@ -222,19 +222,66 @@ async def open_conversation_job( current_job = get_current_job() current_job.meta = {} current_job.save_meta() - - # Create minimal streaming conversation (conversation_id auto-generated) - conversation = create_conversation( - user_id=user_id, - client_id=client_id, - title="Recording...", - summary="Transcribing audio...", - ) - # Save to database - await conversation.insert() - conversation_id = conversation.conversation_id # Get the auto-generated ID - logger.info(f"✅ Created streaming conversation {conversation_id} for session {session_id}") + # Check if a placeholder conversation already exists for this session + conversation_key = f"conversation:current:{session_id}" + existing_conversation_id_bytes = await redis_client.get(conversation_key) + + logger.info(f"🔍 Checking for placeholder: key={conversation_key}, found={existing_conversation_id_bytes is not None}") + + conversation = None + if existing_conversation_id_bytes: + existing_conversation_id = existing_conversation_id_bytes.decode() + logger.info(f"🔍 Found Redis key with conversation_id={existing_conversation_id}") + + # Try to fetch the existing conversation by conversation_id + conversation = await Conversation.find_one( + Conversation.conversation_id == existing_conversation_id + ) + + if conversation: + always_persist = getattr(conversation, 'always_persist', False) + processing_status = getattr(conversation, 'processing_status', None) + logger.info( + f"🔍 Found conversation in DB: always_persist={always_persist}, " + f"processing_status={processing_status}" + ) + else: + logger.warning(f"⚠️ Conversation {existing_conversation_id} not found in database!") + + # Verify it's a placeholder conversation (always_persist=True, processing_status='pending_transcription') + if conversation and getattr(conversation, 'always_persist', False) and \ + getattr(conversation, 'processing_status', None) == 'pending_transcription': + logger.info( + f"🔄 Reusing placeholder conversation {conversation.conversation_id} for session {session_id}" + ) + # Update placeholder with active recording status + conversation.title = "Recording..." + conversation.summary = "Transcribing audio..." + await conversation.save() + conversation_id = conversation.conversation_id + else: + if conversation: + logger.info( + f"⚠️ Found conversation {existing_conversation_id} but not a valid placeholder " + f"(always_persist={getattr(conversation, 'always_persist', False)}, " + f"processing_status={getattr(conversation, 'processing_status', None)}), creating new" + ) + conversation = None + else: + logger.info(f"🔍 No Redis key found for {conversation_key}, creating new conversation") + + # If no valid placeholder found, create new conversation + if not conversation: + conversation = create_conversation( + user_id=user_id, + client_id=client_id, + title="Recording...", + summary="Transcribing audio...", + ) + await conversation.insert() + conversation_id = conversation.conversation_id + logger.info(f"✅ Created streaming conversation {conversation_id} for session {session_id}") # Link job metadata to conversation (cascading updates) current_job.meta["conversation_id"] = conversation_id @@ -625,6 +672,14 @@ async def open_conversation_job( set_as_active=True ) + # Update placeholder conversation if it exists + if getattr(conversation, 'always_persist', False) and getattr(conversation, 'processing_status', None) == "pending_transcription": + # Keep placeholder status - will be updated by title_summary_job + logger.info( + f"📝 Placeholder conversation {conversation_id} has transcript, " + f"waiting for title/summary generation" + ) + # Save conversation with streaming transcript await conversation.save() logger.info( @@ -751,8 +806,28 @@ async def generate_title_summary_job(conversation_id: str, *, redis_client=None) logger.info(f"✅ Generated summary: '{conversation.summary}'") logger.info(f"✅ Generated detailed summary: {len(conversation.detailed_summary)} chars") + # Update processing status for placeholder conversations + if getattr(conversation, 'processing_status', None) == "pending_transcription": + conversation.processing_status = "completed" + logger.info( + f"✅ Updated placeholder conversation {conversation_id} " + f"processing_status to 'completed'" + ) + except Exception as gen_error: logger.error(f"❌ Title/summary generation failed: {gen_error}") + + # Mark placeholder conversation as failed + if getattr(conversation, 'processing_status', None) == "pending_transcription": + conversation.title = "Audio Recording (Transcription Failed)" + conversation.summary = f"Title/summary generation failed: {str(gen_error)}" + conversation.processing_status = "transcription_failed" + await conversation.save() + logger.warning( + f"⚠️ Marked placeholder conversation {conversation_id} " + f"as transcription_failed (title/summary generation error). Audio is still saved." + ) + return { "success": False, "error": str(gen_error), diff --git a/backends/advanced/src/advanced_omi_backend/workers/transcription_jobs.py b/backends/advanced/src/advanced_omi_backend/workers/transcription_jobs.py index a3676383..d0ec47d9 100644 --- a/backends/advanced/src/advanced_omi_backend/workers/transcription_jobs.py +++ b/backends/advanced/src/advanced_omi_backend/workers/transcription_jobs.py @@ -21,6 +21,7 @@ ) from advanced_omi_backend.utils.conversation_utils import analyze_speech, mark_conversation_deleted from advanced_omi_backend.services.plugin_service import get_plugin_router +from advanced_omi_backend.config import get_backend_config logger = logging.getLogger(__name__) @@ -357,10 +358,33 @@ async def transcribe_full_audio_job( # Calculate processing time (transcription only) processing_time = time.time() - start_time - # Transcription only provides text + words with timestamps - # Speaker service will create segments via diarization - speaker_segments = [] - logger.info(f"📊 Transcription complete: {len(words)} words (segments will be created by speaker service)") + # Check if we should use provider segments as fallback + transcription_config = get_backend_config('transcription') + use_provider_segments = transcription_config.get('use_provider_segments', False) + + # If flag enabled and provider returned segments, use them + # Otherwise, speaker service will create segments via diarization + if use_provider_segments and segments: + # Convert dict segments to SpeakerSegment objects + speaker_segments = [ + Conversation.SpeakerSegment( + speaker=str(seg.get("speaker", "0")), # Convert to string for Pydantic validation + start=seg.get("start", 0.0), + end=seg.get("end", 0.0), + text=seg.get("text", "") + ) + for seg in segments + ] + logger.info( + f"✅ Using {len(speaker_segments)} segments from transcription provider " + f"(use_provider_segments=true)" + ) + else: + speaker_segments = [] + logger.info( + f"📊 Transcription complete: {len(words)} words " + f"(segments will be created by speaker service)" + ) # Add new transcript version provider_normalized = provider_name.lower() if provider_name else "unknown" @@ -376,12 +400,12 @@ async def transcribe_full_audio_job( for w in words ] - # Prepare metadata (transcription only - speaker service will add segments and metadata) + # Prepare metadata metadata = { "trigger": trigger, "audio_file_size": len(wav_data), "word_count": len(words), - "segments_created_by": "speaker_service", # Speaker service creates segments via diarization + "segments_created_by": "provider" if (use_provider_segments and segments) else "speaker_service", } conversation.add_transcript_version( @@ -819,6 +843,36 @@ async def stream_speech_detection_job( f" Reason: {reason}\n" f" Runtime: {time.time() - start_time:.1f}s" ) + + # Check if this is an always_persist conversation that needs to be marked as failed + # NOTE: We check MongoDB directly because the conversation:current Redis key might have been + # deleted by the audio persistence job cleanup (which runs in parallel). + from advanced_omi_backend.models.conversation import Conversation + + logger.info(f"🔍 Checking MongoDB for always_persist conversation with client_id: {client_id}") + + # Find conversation by client_id that matches this session + # session_id == client_id for streaming sessions (set in _initialize_streaming_session) + conversation = await Conversation.find_one( + Conversation.client_id == session_id, + Conversation.always_persist == True, + Conversation.processing_status == "pending_transcription" + ) + + if conversation: + logger.info(f"🔴 Found always_persist placeholder conversation {conversation.conversation_id} for failed session {session_id[:12]}") + + # Update conversation with failure status + conversation.processing_status = "transcription_failed" + conversation.title = "Audio Recording (Transcription Failed)" + conversation.summary = f"Transcription failed: {reason}" + + await conversation.save() + + logger.info(f"✅ Marked conversation {conversation.conversation_id} as transcription_failed") + else: + logger.info(f"ℹ️ No always_persist placeholder conversation found for session {session_id[:12]}") + return { "session_id": session_id, "user_id": user_id, diff --git a/backends/advanced/webui/src/components/ConversationVersionDropdown.tsx b/backends/advanced/webui/src/components/ConversationVersionDropdown.tsx index 30ea4f1f..ed21f69c 100644 --- a/backends/advanced/webui/src/components/ConversationVersionDropdown.tsx +++ b/backends/advanced/webui/src/components/ConversationVersionDropdown.tsx @@ -38,6 +38,8 @@ interface ConversationVersionDropdownProps { memory_count: number active_transcript_version?: string active_memory_version?: string + active_transcript_version_number?: number + active_memory_version_number?: number } onVersionChange: () => void } @@ -135,9 +137,9 @@ export default function ConversationVersionDropdown({ className="flex items-center space-x-1 px-3 py-1 bg-blue-50 dark:bg-blue-900/20 border border-blue-200 dark:border-blue-600 rounded text-blue-700 dark:text-blue-300 hover:bg-blue-100 dark:hover:bg-blue-900/30" > - Transcript: v{versionHistory ? - versionHistory.transcript_versions.findIndex(v => v.version_id === versionHistory.active_transcript_version) + 1 : - 1 + Transcript: {versionHistory ? + `v${versionHistory.transcript_versions.findIndex(v => v.version_id === versionHistory.active_transcript_version) + 1}` : + (versionInfo?.active_transcript_version_number ? `v${versionInfo.active_transcript_version_number}` : '-') } @@ -197,9 +199,9 @@ export default function ConversationVersionDropdown({ className="flex items-center space-x-1 px-3 py-1 bg-green-50 dark:bg-green-900/20 border border-green-200 dark:border-green-600 rounded text-green-700 dark:text-green-300 hover:bg-green-100 dark:hover:bg-green-900/30" > - Memory: v{versionHistory ? - versionHistory.memory_versions.findIndex(v => v.version_id === versionHistory.active_memory_version) + 1 : - 1 + Memory: {versionHistory ? + `v${versionHistory.memory_versions.findIndex(v => v.version_id === versionHistory.active_memory_version) + 1}` : + (versionInfo?.active_memory_version_number ? `v${versionInfo.active_memory_version_number}` : '-') } diff --git a/backends/advanced/webui/src/components/ConversationVersionHeader.tsx b/backends/advanced/webui/src/components/ConversationVersionHeader.tsx index 9e7c5e09..55627c4f 100644 --- a/backends/advanced/webui/src/components/ConversationVersionHeader.tsx +++ b/backends/advanced/webui/src/components/ConversationVersionHeader.tsx @@ -10,6 +10,8 @@ interface ConversationVersionHeaderProps { memory_count: number; active_transcript_version?: string; active_memory_version?: string; + active_transcript_version_number?: number; + active_memory_version_number?: number; }; onVersionChange?: () => void; } diff --git a/backends/advanced/webui/src/components/plugins/FormField.tsx b/backends/advanced/webui/src/components/plugins/FormField.tsx index 60ebf0b2..81896804 100644 --- a/backends/advanced/webui/src/components/plugins/FormField.tsx +++ b/backends/advanced/webui/src/components/plugins/FormField.tsx @@ -112,14 +112,22 @@ export default function FormField({ setIsEditing(true) onChange(e.target.value) }} + onFocus={() => { + // When focusing on a masked field, clear it to allow entering new value + if (isMaskedValue && !isEditing) { + setIsEditing(true) + onChange('') + } + }} disabled={disabled} - placeholder={isMaskedValue ? 'Enter new value to change' : ''} + placeholder={isMaskedValue ? 'Enter new password to change' : 'Enter password'} className="w-full px-3 py-2 pr-10 border border-gray-300 dark:border-gray-600 rounded-md bg-white dark:bg-gray-700 text-gray-900 dark:text-gray-100 focus:outline-none focus:ring-2 focus:ring-blue-500 focus:border-blue-500 disabled:opacity-50 disabled:cursor-not-allowed" />
diff --git a/backends/advanced/webui/src/hooks/useSimpleAudioRecording.ts b/backends/advanced/webui/src/hooks/useSimpleAudioRecording.ts index 91f394c9..4c25df53 100644 --- a/backends/advanced/webui/src/hooks/useSimpleAudioRecording.ts +++ b/backends/advanced/webui/src/hooks/useSimpleAudioRecording.ts @@ -21,11 +21,13 @@ export interface SimpleAudioRecordingReturn { recordingDuration: number error: string | null mode: RecordingMode + alwaysPersist: boolean // Actions startRecording: () => Promise stopRecording: () => void setMode: (mode: RecordingMode) => void + setAlwaysPersist: (value: boolean) => void // For components analyser: AnalyserNode | null @@ -43,6 +45,7 @@ export const useSimpleAudioRecording = (): SimpleAudioRecordingReturn => { const [recordingDuration, setRecordingDuration] = useState(0) const [error, setError] = useState(null) const [mode, setMode] = useState('streaming') + const [alwaysPersist, setAlwaysPersist] = useState(false) // Debug stats const [debugStats, setDebugStats] = useState({ @@ -228,6 +231,38 @@ export const useSimpleAudioRecording = (): SimpleAudioRecordingReturn => { ws.onmessage = (event) => { console.log('📨 Received message from server:', event.data) setDebugStats(prev => ({ ...prev, messagesReceived: prev.messagesReceived + 1 })) + + // Parse server messages + try { + const message = JSON.parse(event.data) + + // Handle error messages from backend + if (message.type === 'error') { + const errorMsg = message.message || 'Unknown error from server' + console.error('❌ Server error:', errorMsg) + + setError(errorMsg) + setCurrentStep('error') + setDebugStats(prev => ({ + ...prev, + lastError: errorMsg, + lastErrorTime: new Date() + })) + + // Stop recording and cleanup + cleanup() + setIsRecording(false) + } + + // Handle other message types (interim_transcript, etc.) + else if (message.type === 'interim_transcript') { + console.log('📝 Received interim transcript:', message.data) + } + + } catch (e) { + // Not JSON, ignore + console.log('📨 Non-JSON message:', event.data) + } } }) }, []) @@ -246,13 +281,14 @@ export const useSimpleAudioRecording = (): SimpleAudioRecordingReturn => { rate: 16000, width: 2, channels: 1, - mode: mode // Pass recording mode to backend + mode: mode, // Pass recording mode to backend + always_persist: alwaysPersist // Pass always_persist flag }, payload_length: null } ws.send(JSON.stringify(startMessage) + '\n') - console.log('✅ Audio-start message sent with mode:', mode) + console.log('✅ Audio-start message sent with mode:', mode, 'always_persist:', alwaysPersist) }, [mode]) // Step 4: Start audio streaming @@ -471,9 +507,11 @@ export const useSimpleAudioRecording = (): SimpleAudioRecordingReturn => { recordingDuration, error, mode, + alwaysPersist, startRecording, stopRecording, setMode, + setAlwaysPersist, analyser: analyserRef.current, debugStats, formatDuration, diff --git a/backends/advanced/webui/src/pages/Conversations.tsx b/backends/advanced/webui/src/pages/Conversations.tsx index ad1de51c..ef57e738 100644 --- a/backends/advanced/webui/src/pages/Conversations.tsx +++ b/backends/advanced/webui/src/pages/Conversations.tsx @@ -31,6 +31,8 @@ interface Conversation { active_memory_version?: string transcript_version_count?: number memory_version_count?: number + active_transcript_version_number?: number + active_memory_version_number?: number deleted?: boolean deletion_reason?: string deleted_at?: string @@ -719,7 +721,9 @@ export default function Conversations() { transcript_count: conversation.transcript_version_count || 0, memory_count: conversation.memory_version_count || 0, active_transcript_version: conversation.active_transcript_version, - active_memory_version: conversation.active_memory_version + active_memory_version: conversation.active_memory_version, + active_transcript_version_number: conversation.active_transcript_version_number, + active_memory_version_number: conversation.active_memory_version_number }} onVersionChange={async () => { // Update only this specific conversation without reloading all conversations diff --git a/backends/advanced/webui/src/pages/LiveRecord.tsx b/backends/advanced/webui/src/pages/LiveRecord.tsx index 4b763746..202a02e8 100644 --- a/backends/advanced/webui/src/pages/LiveRecord.tsx +++ b/backends/advanced/webui/src/pages/LiveRecord.tsx @@ -1,4 +1,4 @@ -import { Radio, Zap, Archive } from 'lucide-react' +import { Radio, Zap, Archive, Database } from 'lucide-react' import { useSimpleAudioRecording } from '../hooks/useSimpleAudioRecording' import SimplifiedControls from '../components/audio/SimplifiedControls' import StatusDisplay from '../components/audio/StatusDisplay' @@ -54,6 +54,33 @@ export default function LiveRecord() {
+ {/* Always Persist Audio Toggle */} +
+ +
+ {/* Mode Description */}

diff --git a/config/defaults.yml b/config/defaults.yml index b3f5a8f0..69b72d94 100644 --- a/config/defaults.yml +++ b/config/defaults.yml @@ -316,6 +316,10 @@ backend: provider: deepgram # or parakeet api_key: ${oc.env:DEEPGRAM_API_KEY,''} base_url: https://api.deepgram.com + # Fallback to provider segments when speaker service unavailable + # When true: Use segments from transcription provider (e.g., mock provider in tests) + # When false: Expect speaker service to create segments via diarization (default production behavior) + use_provider_segments: false # Diarization settings diarization: diff --git a/tests/Makefile b/tests/Makefile index 34fce4a2..303261bd 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -13,11 +13,28 @@ OUTPUTDIR ?= results TEST_DIR = endpoints integration infrastructure SERVICE ?= chronicle-backend-test +# Test configuration file (set this to use different configs) +# Can be overridden with CONFIG variable for convenience +# Examples: +# make test CONFIG=deepgram-openai.yml +# make test-quick CONFIG=mock-services.yml +# make start CONFIG=/app/test-configs/custom.yml +ifdef CONFIG + # If CONFIG is just a filename, prepend the path + ifeq ($(findstring /,$(CONFIG)),) + export TEST_CONFIG_FILE = /app/test-configs/$(CONFIG) + else + export TEST_CONFIG_FILE = $(CONFIG) + endif +else + export TEST_CONFIG_FILE ?= /app/test-configs/mock-services.yml +endif + help: @echo "Chronicle Test Targets:" @echo "" @echo "Quick Commands:" - @echo " make test - Start containers + run all tests (excludes slow/sdk)" + @echo " make test - Start containers + run tests (excludes slow/sdk/API)" @echo " make test-quick - Run tests on existing containers" @echo " make start - Start test containers" @echo " make stop - Stop containers (keep volumes)" @@ -27,7 +44,7 @@ help: @echo " make status - Show container status" @echo "" @echo "Running Tests:" - @echo " make all - Run all tests (excludes slow/sdk)" + @echo " make all - Run all tests (excludes slow/sdk/API)" @echo " make endpoints - Run only endpoint tests" @echo " make integration - Run only integration tests" @echo " make infra - Run only infrastructure tests" @@ -35,6 +52,7 @@ help: @echo "Special Test Tags:" @echo " make test-slow - Run ONLY slow tests (backend restarts)" @echo " make test-sdk - Run ONLY SDK tests (unreleased)" + @echo " make test-with-api-keys - Run ONLY tests requiring API keys" @echo " make test-all-with-slow-and-sdk - Run ALL tests including excluded" @echo "" @echo "Container Management:" @@ -57,25 +75,35 @@ help: @echo " make clean-all - Clean results + containers (saves logs)" @echo "" @echo "Environment Variables:" - @echo " OUTPUTDIR - Output directory (default: results)" - @echo " SERVICE - Service name for logs (default: chronicle-backend-test)" + @echo " OUTPUTDIR - Output directory (default: results)" + @echo " SERVICE - Service name for logs (default: chronicle-backend-test)" + @echo " CONFIG - Config file to use (e.g., deepgram-openai.yml or full path)" + @echo "" + @echo "Config Options:" + @echo " mock-services.yml - No API keys (default, excludes API tests)" + @echo " deepgram-openai.yml - Real API keys (required for API tests)" + @echo " mock-transcription-failure.yml - Test transcription failure scenarios" @echo "" @echo "Examples:" - @echo " make test # Full workflow" - @echo " make endpoints # Only endpoint tests" - @echo " make start-rebuild # After code changes" + @echo " make test # Default (no API keys)" + @echo " make test-with-api-keys # Auto-switches to deepgram config" + @echo " make test CONFIG=deepgram-openai.yml # Custom config" + @echo " make endpoints CONFIG=mock-services.yml # Endpoint tests with mock" + @echo " make start-rebuild CONFIG=custom.yml # Rebuild with custom config" @echo " make containers-logs SERVICE=workers-test # View worker logs" + @echo " make show-config # Show current config" -# Run all tests (excludes slow and sdk tests for faster feedback) +# Run all tests (excludes slow, sdk, and requires-api-keys tests for faster feedback) # Creates a persistent fixture conversation that won't be deleted between suites all: - @echo "Running all tests (excluding slow and sdk tests)..." + @echo "Running all tests (excluding slow, sdk, and requires-api-keys tests)..." CREATE_FIXTURE=true uv run --with-requirements test-requirements.txt robot --outputdir $(OUTPUTDIR) \ --name "All Tests" \ --console verbose \ --loglevel INFO:INFO \ --exclude slow \ --exclude sdk \ + --exclude requires-api-keys \ $(TEST_DIR) # Run only endpoint tests @@ -105,6 +133,15 @@ infra: --loglevel INFO:INFO \ infrastructure +# Show current test configuration +show-config: + @echo "Current Test Configuration:" + @echo " TEST_CONFIG_FILE = $(TEST_CONFIG_FILE)" + @echo "" + @echo "To change config:" + @echo " make test CONFIG=deepgram-openai.yml" + @echo " make test CONFIG=/path/to/custom.yml" + # Clean up test output files clean: @echo "Cleaning test outputs..." @@ -165,9 +202,23 @@ logs: containers-logs # ============================================================================ # Full workflow: start containers + run all tests -test: containers-start all +# If CONFIG is specified and differs from running containers, recreates them +test: + @if docker compose -f ../backends/advanced/docker-compose-test.yml ps chronicle-backend-test 2>/dev/null | grep -q "Up"; then \ + echo "ℹ️ Containers already running"; \ + if [ "$(CONFIG)" != "" ]; then \ + echo "🔄 CONFIG specified - will recreate containers to apply new config"; \ + $(MAKE) containers-stop; \ + $(MAKE) containers-start; \ + else \ + echo "✅ Using existing containers (use CONFIG=... to switch config)"; \ + fi \ + else \ + $(MAKE) containers-start; \ + fi + @$(MAKE) all -# Quick workflow: run tests on existing containers +# Quick workflow: run tests on existing containers (ignores CONFIG changes) test-quick: all # Run ONLY slow tests (backend restarts, long timeouts) @@ -190,6 +241,27 @@ test-sdk: --include sdk \ $(TEST_DIR) +# Run ONLY tests that require API keys (Deepgram + OpenAI) +# Automatically switches to deepgram-openai.yml config +test-with-api-keys: + @echo "🔄 Switching to deepgram-openai.yml config..." + @if [ -z "$$DEEPGRAM_API_KEY" ] || [ -z "$$OPENAI_API_KEY" ]; then \ + echo "❌ Error: DEEPGRAM_API_KEY and OPENAI_API_KEY must be set"; \ + echo " export DEEPGRAM_API_KEY='your-key-here'"; \ + echo " export OPENAI_API_KEY='your-key-here'"; \ + exit 1; \ + fi + @$(MAKE) containers-stop + @TEST_CONFIG_FILE=/app/test-configs/deepgram-openai.yml $(MAKE) containers-start + @echo "✅ Containers running with deepgram-openai.yml" + @echo "🧪 Running API key tests..." + CREATE_FIXTURE=true uv run --with-requirements test-requirements.txt robot --outputdir $(OUTPUTDIR) \ + --name "API Key Tests" \ + --console verbose \ + --loglevel INFO:INFO \ + --include requires-api-keys \ + $(TEST_DIR) + # Run ALL tests including slow and SDK tests test-all-with-slow-and-sdk: @echo "Running ALL tests including slow and SDK tests..." diff --git a/tests/bin/start-containers.sh b/tests/bin/start-containers.sh index b3fed506..0918b141 100755 --- a/tests/bin/start-containers.sh +++ b/tests/bin/start-containers.sh @@ -23,6 +23,14 @@ if [ ! -f "$TESTS_DIR/setup/.env.test" ]; then fi fi +# Load environment variables from .env.test (API keys, etc.) +if [ -f "$TESTS_DIR/setup/.env.test" ]; then + echo "📝 Loading environment variables from .env.test..." + set -a + source "$TESTS_DIR/setup/.env.test" + set +a +fi + # Start containers echo "🐳 Starting Docker containers..." docker compose -f docker-compose-test.yml up -d diff --git a/tests/configs/mock-services.yml b/tests/configs/mock-services.yml index a86e11c1..28644097 100644 --- a/tests/configs/mock-services.yml +++ b/tests/configs/mock-services.yml @@ -4,17 +4,19 @@ chat: defaults: embedding: mock-embed llm: mock-llm + stt: mock-stt + stt_stream: mock-stt-stream vector_store: vs-qdrant memory: extraction: - enabled: false + enabled: true prompt: '' provider: chronicle timeout_seconds: 1200 models: - api_family: openai api_key: dummy-key-not-used - description: Dummy LLM for testing (not called) + description: Mock LLM server for testing (local) model_name: gpt-4o-mini model_output: json model_params: @@ -22,17 +24,17 @@ models: temperature: 0.2 model_provider: openai model_type: llm - model_url: https://api.openai.com/v1 + model_url: http://host.docker.internal:11435 name: mock-llm - api_family: openai api_key: dummy-key-not-used - description: Dummy embeddings for testing (not called) - embedding_dimensions: 384 + description: Mock embedding server for testing (local) + embedding_dimensions: 1536 model_name: text-embedding-3-small model_output: vector model_provider: openai model_type: embedding - model_url: https://api.openai.com/v1 + model_url: http://host.docker.internal:11435 name: mock-embed - api_family: qdrant description: Qdrant vector database (local) @@ -44,6 +46,50 @@ models: model_type: vector_store model_url: http://${oc.env:QDRANT_BASE_URL,qdrant}:${oc.env:QDRANT_PORT,6333} name: vs-qdrant +- api_family: mock + api_key: mock-key-not-used + description: Mock STT for testing (batch) + model_provider: mock + model_type: stt + model_url: http://localhost:9999 + name: mock-stt + operations: + stt_transcribe: + headers: + Content-Type: audio/raw + method: POST + path: /transcribe + response: + extract: + text: text + words: words + segments: segments + type: json +- api_family: mock + api_key: mock-key-not-used + description: Mock STT for testing (streaming) + model_provider: mock + model_type: stt_stream + model_url: ws://host.docker.internal:9999 + name: mock-stt-stream + operations: + chunk_header: + message: {} + end: + message: + type: CloseStream + expect: + extract: + text: channel.alternatives[0].transcript + words: channel.alternatives[0].words + segments: [] + final_type: Results + interim_type: Results + start: + message: {} +backend: + transcription: + use_provider_segments: true speaker_recognition: enabled: false timeout: 60 diff --git a/tests/integration/always_persist_audio_tests.robot b/tests/integration/always_persist_audio_tests.robot new file mode 100644 index 00000000..9d3677bf --- /dev/null +++ b/tests/integration/always_persist_audio_tests.robot @@ -0,0 +1,321 @@ +*** Settings *** +Documentation Always Persist Audio Feature Tests +... +... Tests that verify the always_persist flag ensures audio is saved +... to MongoDB even when transcription fails. +... +... Critical scenarios: +... - Placeholder conversation created immediately +... - Audio chunks persisted despite transcription failure +... - Processing status transitions correctly +... - Normal behavior preserved when always_persist=false + +Resource ../resources/websocket_keywords.robot +Resource ../resources/conversation_keywords.robot +Resource ../resources/mongodb_keywords.robot +Resource ../resources/redis_keywords.robot +Resource ../resources/queue_keywords.robot +Resource ../resources/session_keywords.robot +Resource ../resources/system_keywords.robot +Variables ../setup/test_env.py + +Suite Setup Suite Setup Actions +Suite Teardown Suite Teardown Actions +Test Teardown Test Cleanup + +*** Variables *** +${TEST_AUDIO_FILE} ${CURDIR}/../test_assets/DIY_Experts_Glass_Blowing_16khz_mono_1min.wav + +*** Keywords *** +Suite Setup Actions + [Documentation] Setup actions before running tests + # Start mock transcription server + Start Mock Transcription Server + + # Initialize API session for test user + ${session}= Get Admin API Session + Set Suite Variable ${API_SESSION} ${session} + +Suite Teardown Actions + [Documentation] Cleanup after all tests complete + # Cleanup any remaining audio streams + Cleanup All Audio Streams + + # Stop mock transcription server + Stop Mock Transcription Server + +Test Cleanup + [Documentation] Cleanup after each test + # Stop any active audio streams + Cleanup All Audio Streams + Sleep 2s # Allow backend to finalize processing + +*** Test Cases *** + +Placeholder Conversation Created Immediately With Always Persist + [Documentation] Verify that when always_persist=true, a conversation is created + ... immediately (before speech detection) with placeholder title and + ... processing_status="pending_transcription". + [Tags] conversation audio-streaming + + ${device_name}= Set Variable test-placeholder + ${client_id}= Get Client ID From Device Name ${device_name} + + # Get baseline conversation count + ${convs_before}= Get User Conversations + ${count_before}= Get Length ${convs_before} + + # Start stream with always_persist=true + ${stream_id}= Open Audio Stream With Always Persist device_name=${device_name} + + # Conversation created by audio persistence job (takes 3-5s to start) + Sleep 5s # Wait for audio persistence job to create placeholder + ${convs_after}= Get User Conversations + ${count_after}= Get Length ${convs_after} + + # Verify new conversation created + Should Be True ${count_after} == ${count_before} + 1 + ... Expected 1 new conversation, found ${count_after} - ${count_before} + + # Find the new conversation (most recent) + ${new_conv}= Set Variable ${convs_after}[0] + ${conversation_id}= Set Variable ${new_conv}[conversation_id] + + # Verify placeholder title + Verify Placeholder Conversation Title ${conversation_id} + + # Verify processing_status + Verify Conversation Processing Status ${conversation_id} pending_transcription + + # Verify always_persist flag + Verify Conversation Always Persist Flag ${conversation_id} + + # Close stream + Close Audio Stream ${stream_id} + + Log ✅ Placeholder conversation created immediately with always_persist=true + + +Normal Behavior Preserved When Always Persist Disabled + [Documentation] Verify that when always_persist=false (default), the system + ... behaves as before: no conversation created until speech detected. + [Tags] conversation audio-streaming + + ${device_name}= Set Variable test-normal + ${client_id}= Get Client ID From Device Name ${device_name} + + # Get baseline conversation count + ${convs_before}= Get User Conversations + ${count_before}= Get Length ${convs_before} + + # Start stream with always_persist=false (default behavior) + ${stream_id}= Open Audio Stream device_name=${device_name} + + # Conversation should NOT exist immediately + Sleep 3s + ${convs_after}= Get User Conversations + ${count_after}= Get Length ${convs_after} + + # Verify no new conversation created yet + Should Be Equal As Integers ${count_after} ${count_before} + ... Expected no conversation until speech detected, but found ${count_after} - ${count_before} new conversations + + Log ✅ No placeholder conversation created (always_persist=false) + + # Close stream + Close Audio Stream ${stream_id} + + +Redis Key Set Immediately With Always Persist + [Documentation] Verify that conversation:current:{session_id} Redis key is set + ... immediately when always_persist=true, allowing audio persistence + ... job to start saving chunks. + [Tags] audio-streaming infra + + ${device_name}= Set Variable test-redis-key + ${client_id}= Get Client ID From Device Name ${device_name} + + # Get baseline conversation count + ${convs_before}= Get User Conversations + ${count_before}= Get Length ${convs_before} + + # Start stream with always_persist=true + ${stream_id}= Open Audio Stream With Always Persist device_name=${device_name} + + # session_id == client_id for streaming mode (not stream_id!) + ${session_id}= Set Variable ${client_id} + + # Get conversation (created by audio persistence job) + Sleep 5s # Wait for audio persistence job to create placeholder + ${convs_after}= Get User Conversations + ${count_after}= Get Length ${convs_after} + + # Verify new conversation created + Should Be True ${count_after} == ${count_before} + 1 + ... Expected 1 new conversation, found ${count_after} - ${count_before} + + # Get the new conversation (most recent) + ${conversation}= Set Variable ${convs_after}[0] + ${conversation_id}= Set Variable ${conversation}[conversation_id] + + # Verify Redis key exists and points to the conversation + ${redis_conv_id}= Verify Conversation Current Key ${session_id} ${conversation_id} + + Should Be Equal As Strings ${redis_conv_id} ${conversation_id} + ... Redis key should point to placeholder conversation + + Log ✅ Redis key conversation:current:${session_id} correctly set to ${conversation_id} + + # Close stream + Close Audio Stream ${stream_id} + + +Multiple Sessions Create Separate Conversations + [Documentation] Verify that starting multiple audio sessions with always_persist=true + ... creates separate placeholder conversations for each session. + [Tags] conversation audio-streaming + + ${device_name}= Set Variable test-multi + + # Get baseline conversation count + ${convs_before}= Get User Conversations + ${count_before}= Get Length ${convs_before} + + # Start 3 separate sessions + ${stream_1}= Open Audio Stream With Always Persist device_name=${device_name}-1 + Sleep 1s + ${stream_2}= Open Audio Stream With Always Persist device_name=${device_name}-2 + Sleep 1s + ${stream_3}= Open Audio Stream With Always Persist device_name=${device_name}-3 + Sleep 5s # Wait for all audio persistence jobs to create placeholders + + # Verify 3 new conversations created + ${convs_after}= Get User Conversations + ${count_after}= Get Length ${convs_after} + + ${new_count}= Evaluate ${count_after} - ${count_before} + Should Be Equal As Integers ${new_count} 3 + ... Expected 3 new conversations, found ${new_count} + + # Verify each conversation has unique conversation_id + ${conv_ids}= Create List + FOR ${i} IN RANGE 3 + ${conv}= Set Variable ${convs_after}[${i}] + ${conv_id}= Set Variable ${conv}[conversation_id] + List Should Not Contain Value ${conv_ids} ${conv_id} + ... Duplicate conversation_id found: ${conv_id} + Append To List ${conv_ids} ${conv_id} + END + + Log ✅ 3 separate conversations created with unique IDs + + # Close all streams + Close Audio Stream ${stream_1} + Close Audio Stream ${stream_2} + Close Audio Stream ${stream_3} + + +Audio Chunks Persisted Despite Transcription Failure + [Documentation] Verify that when transcription fails (e.g., invalid Deepgram key), + ... audio chunks are still saved to MongoDB. + ... + ... NOTE: This test requires misconfigured transcription service to trigger failure. + ... Test uses mock-transcription-failure.yml config with invalid API key. + [Tags] audio-streaming mongodb requires-api-keys + + ${device_name}= Set Variable test-persist-fail + ${client_id}= Get Client ID From Device Name ${device_name} + + # Start stream with always_persist=true + ${stream_id}= Open Audio Stream With Always Persist device_name=${device_name} + + # Wait for audio persistence job to start consuming from Redis Stream + Sleep 2s + + # Send audio chunks (transcription will fail due to invalid API key in config) + # Use realtime pacing to ensure chunks arrive while persistence job is running + Send Audio Chunks To Stream ${stream_id} ${TEST_AUDIO_FILE} num_chunks=50 realtime_pacing=True + + # Close stream + ${total_chunks}= Close Audio Stream ${stream_id} + Log Sent ${total_chunks} total chunks + + # Wait for processing to attempt and fail + Sleep 15s + + # Get the conversation (most recent) + ${conversations}= Get User Conversations + ${conversation}= Set Variable ${conversations}[0] + ${conversation_id}= Set Variable ${conversation}[conversation_id] + + # Verify processing_status is transcription_failed + Verify Conversation Processing Status ${conversation_id} transcription_failed + + # Verify title indicates failure + ${title}= Set Variable ${conversation}[title] + ${title_lower}= Convert To Lower Case ${title} + Should Contain ${title_lower} transcription + Should Contain ${title_lower} fail + ... Expected title to contain 'transcription' and 'fail', got: ${title} + + # CRITICAL: Verify audio chunks were saved despite transcription failure + ${chunks}= Verify Audio Chunks Exist ${conversation_id} min_chunks=1 + + ${chunk_count}= Get Length ${chunks} + Should Be True ${chunk_count} > 0 + ... Expected audio chunks to be saved despite transcription failure + + Log ✅ Audio chunks persisted despite transcription failure (${chunk_count} chunks saved) + + +Conversation Updates To Completed When Transcription Succeeds + [Documentation] Verify that when transcription succeeds, the placeholder conversation + ... updates from processing_status="pending_transcription" to "completed", + ... and the title updates from placeholder to actual summary. + [Tags] conversation audio-streaming requires-api-keys + + ${device_name}= Set Variable test-complete + ${client_id}= Get Client ID From Device Name ${device_name} + + # Get baseline conversation count + ${convs_before}= Get User Conversations + ${count_before}= Get Length ${convs_before} + + # Start stream with always_persist=true + ${stream_id}= Open Audio Stream With Always Persist device_name=${device_name} + + # Verify placeholder conversation exists (created by audio persistence job) + Sleep 5s + ${convs_after}= Get User Conversations + ${conversation}= Set Variable ${convs_after}[0] + ${conversation_id}= Set Variable ${conversation}[conversation_id] + + # Verify initial placeholder state + Verify Conversation Processing Status ${conversation_id} pending_transcription + Verify Placeholder Conversation Title ${conversation_id} + + # Send audio chunks with speech (transcription will succeed) + # Use realtime pacing so Deepgram can finalize segments + Send Audio Chunks To Stream ${stream_id} ${TEST_AUDIO_FILE} num_chunks=200 realtime_pacing=True + + # Close stream + Close Audio Stream ${stream_id} + + # Wait for transcription and title generation to complete + Wait Until Keyword Succeeds 90s 5s + ... Verify Conversation Processing Status ${conversation_id} completed + + # Verify title updated from placeholder to actual summary + ${updated_conv}= Get Conversation By ID ${conversation_id} + ${title}= Set Variable ${updated_conv}[title] + + # Title should NOT contain placeholder text + ${title_lower}= Convert To Lower Case ${title} + ${has_processing}= Run Keyword And Return Status Should Contain ${title_lower} processing + ${has_failed}= Run Keyword And Return Status Should Contain ${title_lower} transcription failed + + ${is_placeholder}= Evaluate ${has_processing} or ${has_failed} + Should Not Be True ${is_placeholder} + ... Expected title to be updated, but still has placeholder: ${title} + + Log ✅ Conversation updated to completed with title: ${title} diff --git a/tests/libs/audio_stream_library.py b/tests/libs/audio_stream_library.py index e14a174e..f2fb1aab 100644 --- a/tests/libs/audio_stream_library.py +++ b/tests/libs/audio_stream_library.py @@ -44,6 +44,7 @@ def stream_audio_file( device_name: str = "robot-test", recording_mode: str = "streaming", use_wyoming: bool = True, + always_persist: bool = False, ) -> int: """Stream a WAV file via WebSocket (blocking).""" return _stream_audio_file( @@ -53,6 +54,7 @@ def stream_audio_file( device_name=device_name, recording_mode=recording_mode, use_wyoming=use_wyoming, + always_persist=always_persist, ) @@ -65,6 +67,7 @@ def start_audio_stream( token: str, device_name: str = "robot-test", recording_mode: str = "streaming", + always_persist: bool = False, ) -> str: """Start a new audio stream (non-blocking).""" return _manager.start_stream( @@ -72,6 +75,7 @@ def start_audio_stream( token=token, device_name=device_name, recording_mode=recording_mode, + always_persist=always_persist, ) diff --git a/tests/libs/mock_llm_server.py b/tests/libs/mock_llm_server.py new file mode 100755 index 00000000..d7eea5d9 --- /dev/null +++ b/tests/libs/mock_llm_server.py @@ -0,0 +1,328 @@ +#!/usr/bin/env python3 +""" +Mock LLM Server - OpenAI-compatible HTTP server for testing. + +This server mimics OpenAI's API for chat completions and embeddings without external dependencies. + +Architecture: +- HTTP server on 0.0.0.0:11435 +- Three endpoints: /v1/chat/completions, /v1/embeddings, /v1/models +- Deterministic responses for reproducible tests + +Request Detection: +- Fact extraction: system prompt contains "FACT_RETRIEVAL_PROMPT" or "extract facts" +- Memory updates: system prompt contains "UPDATE_MEMORY_PROMPT" or "memory manager" +""" + +import asyncio +import json +import logging +import argparse +import hashlib +from typing import List +from aiohttp import web +import numpy as np + +logging.basicConfig( + level=logging.INFO, + format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' +) +logger = logging.getLogger(__name__) + + +def generate_deterministic_embedding(text: str, dimensions: int = 1536) -> List[float]: + """ + Generate deterministic embedding using hash seeding. + + Same text always produces same embedding for reproducible tests. + Generates unit vector for cosine similarity compatibility. + """ + # Use SHA-256 hash as seed + hash_bytes = hashlib.sha256(text.encode('utf-8')).digest() + seed = int.from_bytes(hash_bytes[:4], 'big') + + # Generate reproducible random vector + rng = np.random.default_rng(seed) + vector = rng.standard_normal(dimensions) + + # Normalize to unit vector (cosine similarity compatible) + norm = np.linalg.norm(vector) + return (vector / norm).tolist() + + +def detect_request_type(messages: List[dict]) -> str: + """ + Detect request type by analyzing system prompt. + + Returns: + - "fact_extraction": For fact retrieval prompts + - "memory_update": For memory manager prompts + - "general": For other requests + """ + if not messages: + return "general" + + # Check first message (usually system prompt) + first_message = messages[0].get("content", "").lower() + + # Fact extraction detection + if "fact_retrieval_prompt" in first_message or "extract facts" in first_message: + return "fact_extraction" + + # Memory update detection + if "update_memory_prompt" in first_message or "memory manager" in first_message: + return "memory_update" + + return "general" + + +def create_fact_extraction_response() -> dict: + """Create fact extraction response (JSON format).""" + facts = [ + "User likes hiking", + "User met with John", + "Discussed project timeline", + "User prefers morning meetings", + "User is working on Chronicle project" + ] + + content = json.dumps({"facts": facts}) + + return { + "id": "chatcmpl-mock-fact", + "object": "chat.completion", + "created": 1234567890, + "model": "gpt-4o-mini", + "choices": [{ + "index": 0, + "message": { + "role": "assistant", + "content": content + }, + "finish_reason": "stop" + }], + "usage": { + "prompt_tokens": 100, + "completion_tokens": 50, + "total_tokens": 150 + } + } + + +def create_memory_update_response() -> dict: + """ + Create memory update response (XML format). + + Supports multiple XML formats: + - Plain XML: ... + - Markdown code blocks: ```xml ... ``` + - DeepSeek think tags: ...... + """ + # Plain XML format (most common) + xml_content = """ + + + User likes hiking in the mountains + User likes hiking + + + User prefers morning meetings before 10am + + +""" + + return { + "id": "chatcmpl-mock-memory", + "object": "chat.completion", + "created": 1234567890, + "model": "gpt-4o-mini", + "choices": [{ + "index": 0, + "message": { + "role": "assistant", + "content": xml_content + }, + "finish_reason": "stop" + }], + "usage": { + "prompt_tokens": 150, + "completion_tokens": 80, + "total_tokens": 230 + } + } + + +def create_general_response(user_message: str) -> dict: + """Create general chat completion response.""" + response_text = f"This is a mock response to: {user_message}" + + return { + "id": "chatcmpl-mock-general", + "object": "chat.completion", + "created": 1234567890, + "model": "gpt-4o-mini", + "choices": [{ + "index": 0, + "message": { + "role": "assistant", + "content": response_text + }, + "finish_reason": "stop" + }], + "usage": { + "prompt_tokens": 50, + "completion_tokens": 20, + "total_tokens": 70 + } + } + + +async def handle_chat_completions(request: web.Request) -> web.Response: + """Handle /v1/chat/completions endpoint.""" + try: + data = await request.json() + messages = data.get("messages", []) + + # Detect request type + request_type = detect_request_type(messages) + logger.info(f"Chat completion request detected as: {request_type}") + + # Generate appropriate response + if request_type == "fact_extraction": + response = create_fact_extraction_response() + logger.info("Returning fact extraction response") + + elif request_type == "memory_update": + response = create_memory_update_response() + logger.info("Returning memory update response") + + else: + user_content = messages[-1].get("content", "") if messages else "" + response = create_general_response(user_content) + logger.info("Returning general response") + + return web.json_response(response) + + except Exception as e: + logger.error(f"Error handling chat completions: {e}", exc_info=True) + return web.json_response( + {"error": {"message": str(e), "type": "server_error"}}, + status=500 + ) + + +async def handle_embeddings(request: web.Request) -> web.Response: + """Handle /v1/embeddings endpoint.""" + try: + data = await request.json() + input_texts = data.get("input", []) + + # Ensure input is a list + if isinstance(input_texts, str): + input_texts = [input_texts] + + # Generate deterministic embeddings + embeddings_data = [] + for idx, text in enumerate(input_texts): + embedding = generate_deterministic_embedding(text, dimensions=1536) + embeddings_data.append({ + "object": "embedding", + "embedding": embedding, + "index": idx + }) + + logger.info(f"Generated {len(embeddings_data)} embeddings") + + response = { + "object": "list", + "data": embeddings_data, + "model": "text-embedding-3-small", + "usage": { + "prompt_tokens": len(input_texts) * 10, + "total_tokens": len(input_texts) * 10 + } + } + + return web.json_response(response) + + except Exception as e: + logger.error(f"Error handling embeddings: {e}", exc_info=True) + return web.json_response( + {"error": {"message": str(e), "type": "server_error"}}, + status=500 + ) + + +async def handle_models(request: web.Request) -> web.Response: + """Handle /v1/models endpoint.""" + response = { + "object": "list", + "data": [ + { + "id": "gpt-4o-mini", + "object": "model", + "created": 1234567890, + "owned_by": "mock-llm" + }, + { + "id": "text-embedding-3-small", + "object": "model", + "created": 1234567890, + "owned_by": "mock-llm" + } + ] + } + + logger.info("Returning available models") + return web.json_response(response) + + +async def handle_health(request: web.Request) -> web.Response: + """Handle health check endpoint.""" + return web.json_response({"status": "healthy"}) + + +def create_app() -> web.Application: + """Create aiohttp application with routes.""" + app = web.Application() + + # OpenAI-compatible routes + app.router.add_post('/v1/chat/completions', handle_chat_completions) + app.router.add_post('/v1/embeddings', handle_embeddings) + app.router.add_get('/v1/models', handle_models) + + # Health check + app.router.add_get('/health', handle_health) + + return app + + +def main(host: str, port: int): + """Start HTTP server.""" + logger.info(f"Starting Mock LLM Server on {host}:{port}") + logger.info(f"OpenAI-compatible endpoints:") + logger.info(f" - POST /v1/chat/completions") + logger.info(f" - POST /v1/embeddings") + logger.info(f" - GET /v1/models") + logger.info(f" - GET /health") + logger.info(f"Deterministic embeddings: 1536 dimensions") + + app = create_app() + web.run_app(app, host=host, port=port, access_log=logger) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Mock LLM Server") + parser.add_argument("--host", default="0.0.0.0", help="Server host (default: 0.0.0.0)") + parser.add_argument("--port", type=int, default=11435, help="Server port (default: 11435)") + parser.add_argument("--debug", action="store_true", help="Enable debug logging") + + args = parser.parse_args() + + if args.debug: + logger.setLevel(logging.DEBUG) + + try: + main(args.host, args.port) + except KeyboardInterrupt: + logger.info("Server stopped by user") diff --git a/tests/libs/mock_streaming_stt_server.py b/tests/libs/mock_streaming_stt_server.py new file mode 100755 index 00000000..8faf492a --- /dev/null +++ b/tests/libs/mock_streaming_stt_server.py @@ -0,0 +1,190 @@ +#!/usr/bin/env python3 +""" +Mock Streaming STT Server - Deepgram-compatible WebSocket server for testing. + +This server mimics Deepgram's streaming transcription API with nested JSON responses +that match the extraction paths used in the config (e.g., channel.alternatives[0].transcript). + +Architecture: +- Async WebSocket server on 0.0.0.0:9999 +- Sends interim results every 10 audio chunks +- Sends final results on CloseStream with >2s duration and >5 words (speech detection thresholds) +""" + +import asyncio +import json +import logging +import argparse +from typing import Optional +import websockets +from websockets.server import WebSocketServerProtocol + +logging.basicConfig( + level=logging.INFO, + format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' +) +logger = logging.getLogger(__name__) + + +def create_deepgram_response( + transcript: str, + is_final: bool, + words: Optional[list] = None, + confidence: float = 0.99 +) -> dict: + """ + Create Deepgram-compatible nested response format. + + Format matches extraction paths in config: + - channel.alternatives[0].transcript + - channel.alternatives[0].words + """ + if words is None: + # Generate word timestamps from transcript + words = [] + current_time = 0.0 + for word in transcript.split(): + words.append({ + "word": word, + "start": current_time, + "end": current_time + 0.3, + "confidence": confidence + }) + current_time += 0.35 # 0.3s word + 0.05s gap + + return { + "type": "Results", + "is_final": is_final, + "channel": { + "alternatives": [{ + "transcript": transcript, + "confidence": confidence, + "words": words + }] + } + } + + +def create_final_response() -> dict: + """ + Create final response with >2s duration and >5 words. + + Speech detection thresholds (docker-compose-test.yml): + - SPEECH_DETECTION_MIN_DURATION: 2.0s + - SPEECH_DETECTION_MIN_WORDS: 5 + """ + # Create 7 words spanning >2.0 seconds + words = [] + transcript_words = ["This", "is", "a", "test", "conversation", "about", "hiking"] + + current_time = 0.0 + for word in transcript_words: + words.append({ + "word": word, + "start": current_time, + "end": current_time + 0.35, + "confidence": 0.99 + }) + current_time += 0.4 # 0.35s word + 0.05s gap + + # Final timestamp should be >2.0s + assert words[-1]["end"] > 2.0, f"Duration {words[-1]['end']}s must be >2.0s" + + transcript = " ".join(transcript_words) + + return create_deepgram_response( + transcript=transcript, + is_final=True, + words=words, + confidence=0.99 + ) + + +async def handle_client(websocket: WebSocketServerProtocol): + """Handle WebSocket client connection.""" + client_id = f"{websocket.remote_address[0]}:{websocket.remote_address[1]}" + logger.info(f"Client connected: {client_id}") + + chunk_count = 0 + + try: + # Send initial empty result + initial = create_deepgram_response(transcript="", is_final=False) + await websocket.send(json.dumps(initial)) + logger.debug(f"Sent initial result to {client_id}") + + async for message in websocket: + # Handle binary audio chunks + if isinstance(message, bytes): + chunk_count += 1 + logger.debug(f"Received audio chunk {chunk_count} from {client_id}") + + # Send interim results every 10 chunks + if chunk_count % 10 == 0: + interim = create_deepgram_response( + transcript=f"Interim transcription chunk {chunk_count // 10}", + is_final=False + ) + await websocket.send(json.dumps(interim)) + logger.debug(f"Sent interim result to {client_id}") + + # Handle control messages + elif isinstance(message, str): + try: + data = json.loads(message) + msg_type = data.get("type") + + if msg_type == "CloseStream": + logger.info(f"Received CloseStream from {client_id}") + + # Send final result with >2s duration and >5 words + final = create_final_response() + await websocket.send(json.dumps(final)) + logger.info(f"Sent final result to {client_id}: {final['channel']['alternatives'][0]['transcript']}") + + # Close connection gracefully + await websocket.close() + break + + else: + logger.warning(f"Unknown message type from {client_id}: {msg_type}") + + except json.JSONDecodeError: + logger.error(f"Invalid JSON from {client_id}: {message}") + + except websockets.exceptions.ConnectionClosed: + logger.info(f"Client disconnected: {client_id}") + + except Exception as e: + logger.error(f"Error handling client {client_id}: {e}", exc_info=True) + + finally: + logger.info(f"Connection closed: {client_id}, processed {chunk_count} chunks") + + +async def main(host: str, port: int): + """Start WebSocket server.""" + logger.info(f"Starting Mock Streaming STT Server on {host}:{port}") + logger.info(f"Deepgram-compatible nested response format") + logger.info(f"Speech detection: >2.0s duration, >5 words") + + async with websockets.serve(handle_client, host, port): + logger.info(f"Server ready and listening on ws://{host}:{port}") + await asyncio.Future() # Run forever + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Mock Streaming STT Server") + parser.add_argument("--host", default="0.0.0.0", help="Server host (default: 0.0.0.0)") + parser.add_argument("--port", type=int, default=9999, help="Server port (default: 9999)") + parser.add_argument("--debug", action="store_true", help="Enable debug logging") + + args = parser.parse_args() + + if args.debug: + logger.setLevel(logging.DEBUG) + + try: + asyncio.run(main(args.host, args.port)) + except KeyboardInterrupt: + logger.info("Server stopped by user") diff --git a/tests/resources/conversation_keywords.robot b/tests/resources/conversation_keywords.robot index 746565ee..be75b13b 100644 --- a/tests/resources/conversation_keywords.robot +++ b/tests/resources/conversation_keywords.robot @@ -178,3 +178,45 @@ Conversation Should Have End Reason ${actual_end_reason}= Set Variable ${conversation}[end_reason] Should Be Equal As Strings ${actual_end_reason} ${expected_end_reason} ... msg=Expected end_reason '${expected_end_reason}', got '${actual_end_reason}' + +Verify Conversation Processing Status + [Documentation] Verify conversation has expected processing_status value + [Arguments] ${conversation_id} ${expected_status} + + ${conversation}= Get Conversation By ID ${conversation_id} + + Should Contain ${conversation} processing_status + Should Be Equal As Strings ${conversation}[processing_status] ${expected_status} + ... Expected processing_status='${expected_status}', got '${conversation}[processing_status]' + + Log ✅ Conversation ${conversation_id} has processing_status='${expected_status}' + +Verify Conversation Always Persist Flag + [Documentation] Verify conversation has always_persist=True + [Arguments] ${conversation_id} + + ${conversation}= Get Conversation By ID ${conversation_id} + + Should Contain ${conversation} always_persist + Should Be True ${conversation}[always_persist] + ... Expected always_persist=True, got ${conversation}[always_persist] + + Log ✅ Conversation ${conversation_id} has always_persist=True + +Verify Placeholder Conversation Title + [Documentation] Verify conversation has placeholder title + [Arguments] ${conversation_id} + + ${conversation}= Get Conversation By ID ${conversation_id} + + # Placeholder title can be either "Processing..." or "Transcription Failed" + ${title}= Set Variable ${conversation}[title] + ${has_processing}= Run Keyword And Return Status Should Contain ${title} Processing + ${has_failed}= Run Keyword And Return Status Should Contain ${title} Transcription Failed + + ${is_placeholder}= Evaluate ${has_processing} or ${has_failed} + + Should Be True ${is_placeholder} + ... Expected placeholder title, got: ${title} + + Log ✅ Conversation has placeholder title: ${title} diff --git a/tests/resources/redis_keywords.robot b/tests/resources/redis_keywords.robot index 48aaeed0..e6179afd 100644 --- a/tests/resources/redis_keywords.robot +++ b/tests/resources/redis_keywords.robot @@ -107,3 +107,50 @@ Get Backend Logs ... shell=True stderr=STDOUT RETURN ${result.stdout} + +Verify Redis Key Exists + [Documentation] Verify that a Redis key exists + [Arguments] ${redis_key} + + # Use Redis EXISTS command (returns 1 if key exists, 0 otherwise) + ${exists}= Redis Command EXISTS ${redis_key} + + Should Be Equal As Integers ${exists} 1 + ... Redis key does not exist: ${redis_key} + + Log ✅ Redis key exists: ${redis_key} + +Verify Conversation Current Key + [Documentation] Verify conversation:current:{session_id} key exists and has correct value + ... Uses pattern matching to handle counter suffixes (-2, -3, etc) + [Arguments] ${session_id} ${expected_conversation_id}=${None} + + # Use KEYS pattern to find matching key (handles counter suffixes like -2, -3) + ${pattern}= Set Variable conversation:current:${session_id}* + ${result}= Run Process docker exec ${REDIS_CONTAINER} + ... redis-cli KEYS ${pattern} + Should Be Equal As Integers ${result.rc} 0 + + # Get matching keys + @{keys}= Split String ${result.stdout} \n + ${keys_list}= Evaluate [k for k in ${keys} if k.strip()] + ${num_keys}= Get Length ${keys_list} + + Should Be True ${num_keys} > 0 + ... Redis key not found for pattern: ${pattern} + + # Get the first matching key + ${redis_key}= Get From List ${keys_list} 0 + Log Found Redis key: ${redis_key} + + # Get the conversation_id value + ${conversation_id}= Redis Command GET ${redis_key} + + # Optionally verify it matches expected value + IF '${expected_conversation_id}' != '${None}' + Should Be Equal As Strings ${conversation_id} ${expected_conversation_id} + ... Redis key value mismatch: expected ${expected_conversation_id}, got ${conversation_id} + END + + Log ✅ ${redis_key} = ${conversation_id} + RETURN ${conversation_id} diff --git a/tests/resources/system_keywords.robot b/tests/resources/system_keywords.robot index 0bdfcb56..6fb6b3d4 100644 --- a/tests/resources/system_keywords.robot +++ b/tests/resources/system_keywords.robot @@ -1,19 +1,22 @@ *** Settings *** -Documentation Health check and service readiness verification keywords +Documentation Health check, service readiness, and mock service management keywords ... -... This file contains keywords for checking service health and readiness. -... Keywords in this file handle API endpoint health checks and service status verification. +... This file contains keywords for checking service health and managing mock services. +... Keywords in this file handle API endpoint health checks, service status verification, +... and starting/stopping mock services for testing. ... ... Keywords in this file handle: ... - Health endpoint checks ... - Readiness endpoint checks ... - Service availability verification +... - Mock service lifecycle management ... ... Keywords that should NOT be in this file: ... - Docker service management (belong in setup_env_keywords.robot) ... - Data management (belong in test_manager_keywords.robot) ... - User/session management (belong in respective resource files) Library RequestsLibrary +Library Process Variables ../setup/test_env.py @@ -36,3 +39,42 @@ Health Check ${response}= GET ${base_url}/health expected_status=200 timeout=2 Should Be Equal As Integers ${response.status_code} 200 RETURN ${True} + + +Start Mock Transcription Server + [Documentation] Start the mock WebSocket transcription server on port 9999 + ... Used for testing transcription workflows without external API dependencies. + + # Start mock server as background process + ${handle}= Start Process + ... python3 ${CURDIR}/../scripts/mock_transcription_server.py --host 0.0.0.0 --port 9999 + ... alias=mock_transcription_server + ... stdout=${OUTPUTDIR}/mock_transcription_server.log + ... stderr=STDOUT + + # Store process handle for cleanup + Set Suite Variable ${MOCK_TRANSCRIPTION_HANDLE} ${handle} + + # Wait for server to start + Sleep 2s + + Log ✅ Started Mock Transcription Server on ws://localhost:9999 + + +Stop Mock Transcription Server + [Documentation] Stop the mock WebSocket transcription server + + # Check if handle exists + ${handle_exists}= Run Keyword And Return Status Variable Should Exist ${MOCK_TRANSCRIPTION_HANDLE} + + IF ${handle_exists} + # Terminate the process gracefully + Terminate Process ${MOCK_TRANSCRIPTION_HANDLE} + + # Wait for process to exit + ${result}= Wait For Process ${MOCK_TRANSCRIPTION_HANDLE} timeout=5s on_timeout=kill + + Log ✅ Stopped Mock Transcription Server (exit code: ${result.rc}) + ELSE + Log ⚠️ Mock Transcription Server handle not found (may not have been started) + END diff --git a/tests/resources/websocket_keywords.robot b/tests/resources/websocket_keywords.robot index f1fbf378..57b9f3ee 100644 --- a/tests/resources/websocket_keywords.robot +++ b/tests/resources/websocket_keywords.robot @@ -98,6 +98,45 @@ Open Audio Stream Log Started audio stream ${stream_id} for device ${device_name} RETURN ${stream_id} +Open Audio Stream With Always Persist + [Documentation] Start a WebSocket audio stream with always_persist=True + ... This ensures audio is saved to MongoDB even if transcription fails. + ... Returns stream_id for sending chunks. + [Arguments] ${device_name}=robot-test ${recording_mode}=streaming + + ${token}= Get Authentication Token api ${ADMIN_EMAIL} ${ADMIN_PASSWORD} + + ${stream_id}= Start Audio Stream + ... base_url=${API_URL} + ... token=${token} + ... device_name=${device_name} + ... recording_mode=${recording_mode} + ... always_persist=${True} + + Log Started audio stream ${stream_id} with always_persist=True + RETURN ${stream_id} + +Stream Audio File With Always Persist + [Documentation] Stream a WAV file via WebSocket with always_persist=True + ... This ensures audio is saved to MongoDB even if transcription fails. + [Arguments] ${audio_file_path} ${device_name}=robot-test ${recording_mode}=streaming + + File Should Exist ${audio_file_path} + + ${token}= Get Authentication Token api ${ADMIN_EMAIL} ${ADMIN_PASSWORD} + + ${chunks_sent}= Stream Audio File + ... base_url=${API_URL} + ... token=${token} + ... wav_path=${audio_file_path} + ... device_name=${device_name} + ... recording_mode=${recording_mode} + ... always_persist=${True} + + Log Streamed ${chunks_sent} chunks with always_persist=True + Should Be True ${chunks_sent} > 0 + RETURN ${chunks_sent} + Send Audio Chunks To Stream [Documentation] Send audio chunks from a file to an open stream [Arguments] ${stream_id} ${audio_file_path} ${num_chunks}=${None} ${realtime_pacing}=False From 67b0400850b47cb086c4f513d41cd680a1034b36 Mon Sep 17 00:00:00 2001 From: Ankush Malaker <43288948+AnkushMalaker@users.noreply.github.com> Date: Tue, 27 Jan 2026 14:08:39 +0000 Subject: [PATCH 05/10] Add mock transcription failure configuration for testing - Introduced a new YAML configuration file to simulate transcription failures using invalid API keys for Deepgram services. - Configured both standard and streaming speech-to-text models with invalid credentials to facilitate testing of error handling in audio processing. - Enhanced the testing framework by providing mock models for LLM and embeddings, ensuring comprehensive coverage of failure scenarios. --- tests/configs/mock-transcription-failure.yml | 111 +++++++++++++++++++ 1 file changed, 111 insertions(+) create mode 100644 tests/configs/mock-transcription-failure.yml diff --git a/tests/configs/mock-transcription-failure.yml b/tests/configs/mock-transcription-failure.yml new file mode 100644 index 00000000..f9b03575 --- /dev/null +++ b/tests/configs/mock-transcription-failure.yml @@ -0,0 +1,111 @@ +chat: + system_prompt: You are a helpful AI assistant with access to the user's personal + memories and conversation history. +defaults: + embedding: mock-embed + llm: mock-llm + stt: stt-deepgram-invalid + stt_stream: stt-deepgram-stream-invalid + vector_store: vs-qdrant +memory: + extraction: + enabled: false + provider: chronicle + timeout_seconds: 1200 +models: +- api_family: openai + api_key: dummy-key-not-used + description: Dummy LLM for testing (not called) + model_name: gpt-4o-mini + model_output: json + model_params: + max_tokens: 2000 + temperature: 0.2 + model_provider: openai + model_type: llm + model_url: https://api.openai.com/v1 + name: mock-llm +- api_family: openai + api_key: dummy-key-not-used + description: Dummy embeddings for testing (not called) + embedding_dimensions: 384 + model_name: text-embedding-3-small + model_output: vector + model_provider: openai + model_type: embedding + model_url: https://api.openai.com/v1 + name: mock-embed +- api_family: qdrant + description: Qdrant vector database (local) + model_params: + collection_name: omi_memories + host: ${oc.env:QDRANT_BASE_URL,qdrant} + port: ${oc.env:QDRANT_PORT,6333} + model_provider: qdrant + model_type: vector_store + model_url: http://${oc.env:QDRANT_BASE_URL,qdrant}:${oc.env:QDRANT_PORT,6333} + name: vs-qdrant +# Deepgram with invalid API key to trigger transcription failures +- api_family: http + api_key: invalid-key-for-testing + description: Deepgram Nova 3 with invalid key (for testing transcription failure) + model_provider: deepgram + model_type: stt + model_url: https://api.deepgram.com/v1 + name: stt-deepgram-invalid + operations: + stt_transcribe: + headers: + Authorization: Token invalid-key-for-testing + Content-Type: audio/raw + method: POST + path: /listen + query: + channels: '1' + diarize: 'true' + encoding: linear16 + language: multi + model: nova-3 + punctuate: 'true' + sample_rate: 16000 + smart_format: 'true' + response: + extract: + segments: results.channels[0].alternatives[0].paragraphs.paragraphs + text: results.channels[0].alternatives[0].transcript + words: results.channels[0].alternatives[0].words + type: json +- api_family: websocket + api_key: invalid-key-for-testing + description: Deepgram Nova 3 streaming with invalid key (for testing transcription failure) + model_provider: deepgram + model_type: stt_stream + model_url: wss://api.deepgram.com/v1/listen + name: stt-deepgram-stream-invalid + operations: + chunk_header: + message: {} + end: + message: + type: CloseStream + expect: + extract: + segments: [] + text: channel.alternatives[0].transcript + words: channel.alternatives[0].words + final_type: Results + interim_type: Results + query: + channels: 1 + encoding: linear16 + interim_results: true + language: multi + model: nova-3 + punctuate: true + sample_rate: 16000 + smart_format: true + start: + message: {} +speaker_recognition: + enabled: false + timeout: 60 From 98149f897c421dd6bbe65ffaa9b832f9e2114b40 Mon Sep 17 00:00:00 2001 From: Ankush Malaker <43288948+AnkushMalaker@users.noreply.github.com> Date: Tue, 27 Jan 2026 14:14:23 +0000 Subject: [PATCH 06/10] Improve logging for transcription job failures and session handling - Updated logging levels for transcription errors to use error severity, providing clearer insights into issues. - Distinguish between transcription service failures and legitimate no speech scenarios in session termination logs. - Enhanced session failure messages to guide users in checking transcription service configurations. --- backends/advanced/docker-compose-test.yml | 4 ++- .../workers/transcription_jobs.py | 29 ++++++++++++------- 2 files changed, 22 insertions(+), 11 deletions(-) diff --git a/backends/advanced/docker-compose-test.yml b/backends/advanced/docker-compose-test.yml index 73be45be..43aa1a83 100644 --- a/backends/advanced/docker-compose-test.yml +++ b/backends/advanced/docker-compose-test.yml @@ -59,6 +59,7 @@ services: # Wait for audio queue to drain before timing out (test mode) - WAIT_FOR_AUDIO_QUEUE_DRAIN=true # Mock speaker recognition for tests (avoids resource-intensive ML service) + # To test with REAL speaker recognition: set to 'false' and start extras/speaker-recognition service - USE_MOCK_SPEAKER_CLIENT=true depends_on: qdrant-test: @@ -137,7 +138,7 @@ services: context: ../../extras/speaker-recognition dockerfile: Dockerfile args: - PYTORCH_CUDA_VERSION: cpu + PYTORCH_CUDA_VERSION: cu12.6 image: speaker-recognition-test:latest ports: - "8086:8085" # Avoid conflict with dev speaker service on 8085 @@ -238,6 +239,7 @@ services: # Wait for audio queue to drain before timing out (test mode) - WAIT_FOR_AUDIO_QUEUE_DRAIN=true # Mock speaker recognition for tests (avoids resource-intensive ML service) + # To test with REAL speaker recognition: set to 'false' and start extras/speaker-recognition service - USE_MOCK_SPEAKER_CLIENT=true depends_on: chronicle-backend-test: diff --git a/backends/advanced/src/advanced_omi_backend/workers/transcription_jobs.py b/backends/advanced/src/advanced_omi_backend/workers/transcription_jobs.py index d0ec47d9..bab8adb2 100644 --- a/backends/advanced/src/advanced_omi_backend/workers/transcription_jobs.py +++ b/backends/advanced/src/advanced_omi_backend/workers/transcription_jobs.py @@ -628,16 +628,16 @@ async def stream_speech_detection_job( error_status = await redis_client.hget(session_key, "transcription_error") if error_status: error_msg = error_status.decode() - logger.warning(f"❌ Transcription error detected: {error_msg}") - logger.info(f"✅ Session ended without speech (transcription error)") + logger.error(f"❌ Transcription service error: {error_msg}") + logger.error(f"❌ Session failed - transcription service unavailable") break # Check if we've been waiting too long with no results at all grace_elapsed = time.time() - session_closed_at if grace_elapsed > 5 and not combined.get("chunk_count", 0): # 5+ seconds with no transcription activity at all - likely API key issue - logger.warning(f"⚠️ No transcription activity after {grace_elapsed:.1f}s - possible API key or connectivity issue") - logger.info(f"✅ Session ended without speech (no transcription activity)") + logger.error(f"❌ No transcription activity after {grace_elapsed:.1f}s - possible API key or connectivity issue") + logger.error(f"❌ Session failed - check transcription service configuration") break await asyncio.sleep(2) @@ -838,11 +838,20 @@ async def stream_speech_detection_job( # Session ended without speech reason = last_speech_analysis.get('reason', 'No transcription received') if last_speech_analysis else 'No transcription received' - logger.warning( - f"❌ Session ended without meaningful speech detected\n" - f" Reason: {reason}\n" - f" Runtime: {time.time() - start_time:.1f}s" - ) + + # Distinguish between transcription failures (error) vs legitimate no speech (info) + if reason == 'No transcription received': + logger.error( + f"❌ Session failed - transcription service did not respond\n" + f" Reason: {reason}\n" + f" Runtime: {time.time() - start_time:.1f}s" + ) + else: + logger.info( + f"✅ Session ended without meaningful speech\n" + f" Reason: {reason}\n" + f" Runtime: {time.time() - start_time:.1f}s" + ) # Check if this is an always_persist conversation that needs to be marked as failed # NOTE: We check MongoDB directly because the conversation:current Redis key might have been @@ -869,7 +878,7 @@ async def stream_speech_detection_job( await conversation.save() - logger.info(f"✅ Marked conversation {conversation.conversation_id} as transcription_failed") + logger.warning(f"🔴 Marked conversation {conversation.conversation_id} as transcription_failed") else: logger.info(f"ℹ️ No always_persist placeholder conversation found for session {session_id[:12]}") From 0e7357628af8735802bc884abfa61150709e637e Mon Sep 17 00:00:00 2001 From: Ankush Malaker <43288948+AnkushMalaker@users.noreply.github.com> Date: Wed, 28 Jan 2026 02:48:41 +0000 Subject: [PATCH 07/10] Implement miscellaneous settings management and enhance audio processing - Introduced functions to retrieve and save miscellaneous settings, including `always_persist_enabled` and `use_provider_segments`, using OmegaConf. - Updated the system controller and routes to handle new endpoints for managing miscellaneous settings, ensuring admin access control. - Refactored audio processing jobs to read the `always_persist_enabled` setting from global configuration, improving audio persistence behavior. - Enhanced the web UI to allow administrators to view and modify miscellaneous settings, providing better control over audio processing features. - Added integration tests to verify the functionality of the new settings management, ensuring robust handling of audio persistence scenarios. --- .../src/advanced_omi_backend/config.py | 54 ++- .../controllers/queue_controller.py | 10 +- .../controllers/system_controller.py | 64 ++++ .../controllers/websocket_controller.py | 23 +- .../routers/modules/queue_routes.py | 9 +- .../routers/modules/system_routes.py | 15 + .../workers/audio_jobs.py | 14 +- .../workers/conversation_jobs.py | 2 + .../workers/memory_jobs.py | 202 ++++++----- .../workers/transcription_jobs.py | 342 +++++++++++++++++- .../src/hooks/useSimpleAudioRecording.ts | 10 +- .../advanced/webui/src/pages/LiveRecord.tsx | 29 +- backends/advanced/webui/src/pages/System.tsx | 123 ++++++- backends/advanced/webui/src/services/api.ts | 5 + config/defaults.yml | 6 + tests/bin/start-rebuild-containers.sh | 9 + tests/configs/deepgram-openai.yml | 11 +- tests/configs/mock-services.yml | 3 + .../always_persist_audio_tests.robot | 155 ++++---- tests/resources/conversation_keywords.robot | 17 + tests/resources/system_keywords.robot | 23 ++ 21 files changed, 884 insertions(+), 242 deletions(-) diff --git a/backends/advanced/src/advanced_omi_backend/config.py b/backends/advanced/src/advanced_omi_backend/config.py index 20a08afe..77a842ce 100644 --- a/backends/advanced/src/advanced_omi_backend/config.py +++ b/backends/advanced/src/advanced_omi_backend/config.py @@ -176,4 +176,56 @@ def get_audio_storage_settings() -> dict: Dict with audio_base_path, audio_chunks_path """ cfg = get_backend_config('audio_storage') - return OmegaConf.to_container(cfg, resolve=True) \ No newline at end of file + return OmegaConf.to_container(cfg, resolve=True) + + +# ============================================================================ +# Miscellaneous Settings (OmegaConf-based) +# ============================================================================ + +def get_misc_settings() -> dict: + """ + Get miscellaneous configuration settings using OmegaConf. + + Returns: + Dict with always_persist_enabled and use_provider_segments + """ + # Get audio settings for always_persist_enabled + audio_cfg = get_backend_config('audio') + audio_settings = OmegaConf.to_container(audio_cfg, resolve=True) if audio_cfg else {} + + # Get transcription settings for use_provider_segments + transcription_cfg = get_backend_config('transcription') + transcription_settings = OmegaConf.to_container(transcription_cfg, resolve=True) if transcription_cfg else {} + + return { + 'always_persist_enabled': audio_settings.get('always_persist_enabled', False), + 'use_provider_segments': transcription_settings.get('use_provider_segments', False) + } + + +def save_misc_settings(settings: dict) -> bool: + """ + Save miscellaneous settings to config.yml using OmegaConf. + + Args: + settings: Dict with always_persist_enabled and/or use_provider_segments + + Returns: + True if saved successfully, False otherwise + """ + success = True + + # Save audio settings if always_persist_enabled is provided + if 'always_persist_enabled' in settings: + audio_settings = {'always_persist_enabled': settings['always_persist_enabled']} + if not save_config_section('backend.audio', audio_settings): + success = False + + # Save transcription settings if use_provider_segments is provided + if 'use_provider_segments' in settings: + transcription_settings = {'use_provider_segments': settings['use_provider_segments']} + if not save_config_section('backend.transcription', transcription_settings): + success = False + + return success \ No newline at end of file diff --git a/backends/advanced/src/advanced_omi_backend/controllers/queue_controller.py b/backends/advanced/src/advanced_omi_backend/controllers/queue_controller.py index fdf1ec7b..2d0577e7 100644 --- a/backends/advanced/src/advanced_omi_backend/controllers/queue_controller.py +++ b/backends/advanced/src/advanced_omi_backend/controllers/queue_controller.py @@ -325,8 +325,7 @@ def is_job_complete(job): def start_streaming_jobs( session_id: str, user_id: str, - client_id: str, - always_persist: bool = False + client_id: str ) -> Dict[str, str]: """ Enqueue jobs for streaming audio session (initial session setup). @@ -339,12 +338,13 @@ def start_streaming_jobs( session_id: Stream session ID (equals client_id for streaming) user_id: User identifier client_id: Client identifier - always_persist: Whether to create placeholder conversation immediately (default: False) Returns: Dict with job IDs: {'speech_detection': job_id, 'audio_persistence': job_id} - Note: user_email is fetched from the database when needed. + Note: + - user_email is fetched from the database when needed. + - always_persist setting is read from global config by the audio persistence job. """ from advanced_omi_backend.workers.transcription_jobs import stream_speech_detection_job from advanced_omi_backend.workers.audio_jobs import audio_streaming_persistence_job @@ -383,12 +383,12 @@ def start_streaming_jobs( # Enqueue audio persistence job on dedicated audio queue # NOTE: This job handles file rotation for multiple conversations automatically # Runs for entire session, not tied to individual conversations + # The job reads always_persist_enabled from global config internally audio_job = audio_queue.enqueue( audio_streaming_persistence_job, session_id, user_id, client_id, - always_persist, job_timeout=86400, # 24 hours for all-day sessions ttl=None, # No pre-run expiry (job can wait indefinitely in queue) result_ttl=JOB_RESULT_TTL, # Cleanup AFTER completion diff --git a/backends/advanced/src/advanced_omi_backend/controllers/system_controller.py b/backends/advanced/src/advanced_omi_backend/controllers/system_controller.py index 9cceb509..55a4b43e 100644 --- a/backends/advanced/src/advanced_omi_backend/controllers/system_controller.py +++ b/backends/advanced/src/advanced_omi_backend/controllers/system_controller.py @@ -15,6 +15,8 @@ from advanced_omi_backend.config import ( get_diarization_settings as load_diarization_settings, + get_misc_settings as load_misc_settings, + save_misc_settings, ) from advanced_omi_backend.config import ( save_diarization_settings, @@ -333,6 +335,68 @@ async def save_diarization_settings_controller(settings: dict): raise e +async def get_misc_settings(): + """Get current miscellaneous settings.""" + try: + # Get settings using OmegaConf + settings = load_misc_settings() + return { + "settings": settings, + "status": "success" + } + except Exception as e: + logger.exception("Error getting misc settings") + raise e + + +async def save_misc_settings_controller(settings: dict): + """Save miscellaneous settings.""" + try: + # Validate settings + valid_keys = {"always_persist_enabled", "use_provider_segments"} + + # Filter to only valid keys + filtered_settings = {} + for key, value in settings.items(): + if key not in valid_keys: + continue # Skip unknown keys + + # Type validation + if not isinstance(value, bool): + raise HTTPException(status_code=400, detail=f"Invalid value for {key}: must be boolean") + + filtered_settings[key] = value + + # Reject if NO valid keys provided + if not filtered_settings: + raise HTTPException(status_code=400, detail="No valid misc settings provided") + + # Save using OmegaConf + if save_misc_settings(filtered_settings): + # Get updated settings + updated_settings = load_misc_settings() + logger.info(f"Updated and saved misc settings: {filtered_settings}") + + return { + "message": "Miscellaneous settings saved successfully", + "settings": updated_settings, + "status": "success" + } + else: + logger.warning("Settings save failed") + return { + "message": "Settings save failed", + "settings": load_misc_settings(), + "status": "error" + } + + except HTTPException: + raise + except Exception as e: + logger.exception("Error saving misc settings") + raise e + + async def get_cleanup_settings_controller(user: User) -> dict: """ Get current cleanup settings (admin only). diff --git a/backends/advanced/src/advanced_omi_backend/controllers/websocket_controller.py b/backends/advanced/src/advanced_omi_backend/controllers/websocket_controller.py index 541210c3..89e5b46f 100644 --- a/backends/advanced/src/advanced_omi_backend/controllers/websocket_controller.py +++ b/backends/advanced/src/advanced_omi_backend/controllers/websocket_controller.py @@ -451,14 +451,10 @@ async def _initialize_streaming_session( # Enqueue streaming jobs (speech detection + audio persistence) from advanced_omi_backend.controllers.queue_controller import start_streaming_jobs - # Get always_persist flag from client state - always_persist_flag = getattr(client_state, 'always_persist', False) - job_ids = start_streaming_jobs( session_id=client_state.stream_session_id, user_id=user_id, - client_id=client_id, - always_persist=always_persist_flag + client_id=client_id ) # Store job IDs in Redis session (not in ClientState) @@ -468,8 +464,8 @@ async def _initialize_streaming_session( audio_persistence_job_id=job_ids['audio_persistence'] ) - # Note: Placeholder conversation creation (if always_persist=True) is now handled - # by the audio persistence job itself, making it self-sufficient. + # Note: Placeholder conversation creation is handled by the audio persistence job, + # which reads the always_persist_enabled setting from global config. # Launch interim results subscriber if WebSocket provided subscriber_task = None @@ -822,11 +818,11 @@ async def _handle_audio_session_start( websocket: Optional[WebSocket] = None ) -> tuple[bool, str]: """ - Handle audio-start event - validate mode, set recording mode, and extract always_persist flag. + Handle audio-start event - validate mode and set recording mode. Args: client_state: Client state object - audio_format: Audio format dict with mode and always_persist + audio_format: Audio format dict with mode client_id: Client ID websocket: Optional WebSocket connection (for WebUI error messages) @@ -836,16 +832,14 @@ async def _handle_audio_session_start( from advanced_omi_backend.services.transcription import is_transcription_available recording_mode = audio_format.get("mode", "batch") - always_persist = audio_format.get("always_persist", False) application_logger.info( f"🔴 BACKEND: Received audio-start for {client_id} - " - f"mode={recording_mode}, always_persist={always_persist}, full format={audio_format}" + f"mode={recording_mode}, full format={audio_format}" ) # Store on client state for later use client_state.recording_mode = recording_mode - client_state.always_persist = always_persist # VALIDATION: Check if streaming mode is available if recording_mode == "streaming": @@ -898,8 +892,7 @@ async def _handle_audio_session_start( f"Format: {audio_format.get('rate')}Hz, " f"{audio_format.get('width')}bytes, " f"{audio_format.get('channels')}ch, " - f"Mode: {recording_mode}, " - f"Always Persist: {always_persist}" + f"Mode: {recording_mode}" ) return True, recording_mode # Switch to audio streaming mode @@ -1358,7 +1351,7 @@ async def handle_pcm_websocket( websocket=ws # Pass websocket for WebUI error display ) - # Initialize streaming session (for always_persist and job setup) + # Initialize streaming session if recording_mode == "streaming": application_logger.info(f"🔴 BACKEND: Initializing streaming session for {client_id}") interim_subscriber_task = await _initialize_streaming_session( diff --git a/backends/advanced/src/advanced_omi_backend/routers/modules/queue_routes.py b/backends/advanced/src/advanced_omi_backend/routers/modules/queue_routes.py index 14c7ee0e..51c07097 100644 --- a/backends/advanced/src/advanced_omi_backend/routers/modules/queue_routes.py +++ b/backends/advanced/src/advanced_omi_backend/routers/modules/queue_routes.py @@ -72,11 +72,18 @@ async def get_job_status( logger.error(f"Failed to determine status for job {job_id}: {e}") raise HTTPException(status_code=500, detail=str(e)) - return { + response = { "job_id": job.id, "status": status } + # Include error information for failed jobs + if status == "failed" and job.exc_info: + response["error_message"] = str(job.exc_info) + response["exc_info"] = str(job.exc_info) + + return response + except HTTPException: # Re-raise HTTPException unchanged (e.g., 403 Forbidden) raise diff --git a/backends/advanced/src/advanced_omi_backend/routers/modules/system_routes.py b/backends/advanced/src/advanced_omi_backend/routers/modules/system_routes.py index 3e84ae57..aa7a63cd 100644 --- a/backends/advanced/src/advanced_omi_backend/routers/modules/system_routes.py +++ b/backends/advanced/src/advanced_omi_backend/routers/modules/system_routes.py @@ -63,6 +63,21 @@ async def save_diarization_settings( return await system_controller.save_diarization_settings_controller(settings) +@router.get("/misc-settings") +async def get_misc_settings(current_user: User = Depends(current_superuser)): + """Get miscellaneous configuration settings. Admin only.""" + return await system_controller.get_misc_settings() + + +@router.post("/misc-settings") +async def save_misc_settings( + settings: dict, + current_user: User = Depends(current_superuser) +): + """Save miscellaneous configuration settings. Admin only.""" + return await system_controller.save_misc_settings_controller(settings) + + @router.get("/cleanup-settings") async def get_cleanup_settings( current_user: User = Depends(current_superuser) diff --git a/backends/advanced/src/advanced_omi_backend/workers/audio_jobs.py b/backends/advanced/src/advanced_omi_backend/workers/audio_jobs.py index 26089c2a..de563069 100644 --- a/backends/advanced/src/advanced_omi_backend/workers/audio_jobs.py +++ b/backends/advanced/src/advanced_omi_backend/workers/audio_jobs.py @@ -26,7 +26,6 @@ async def audio_streaming_persistence_job( session_id: str, user_id: str, client_id: str, - always_persist: bool = False, *, redis_client=None ) -> Dict[str, Any]: @@ -42,15 +41,22 @@ async def audio_streaming_persistence_job( session_id: Stream session ID user_id: User ID client_id: Client ID - always_persist: Whether to create placeholder conversation immediately (default: False) redis_client: Redis client (injected by decorator) Returns: Dict with chunk_count, total_bytes, compressed_bytes, duration_seconds - Note: Replaces disk-based WAV file storage with MongoDB chunk storage. + Note: + - Replaces disk-based WAV file storage with MongoDB chunk storage. + - Reads always_persist_enabled from global config to determine whether to + create placeholder conversations immediately. """ - logger.info(f"🎵 Starting MongoDB audio persistence for session {session_id}") + # Read always_persist setting from global config + from advanced_omi_backend.config import get_misc_settings + misc_settings = get_misc_settings() + always_persist = misc_settings.get('always_persist_enabled', False) + + logger.info(f"🎵 Starting MongoDB audio persistence for session {session_id} (always_persist={always_persist})") # Setup audio persistence consumer group (separate from transcription consumer) audio_stream_name = f"audio:stream:{client_id}" diff --git a/backends/advanced/src/advanced_omi_backend/workers/conversation_jobs.py b/backends/advanced/src/advanced_omi_backend/workers/conversation_jobs.py index 86c7b464..fd5875e2 100644 --- a/backends/advanced/src/advanced_omi_backend/workers/conversation_jobs.py +++ b/backends/advanced/src/advanced_omi_backend/workers/conversation_jobs.py @@ -691,10 +691,12 @@ async def open_conversation_job( # Enqueue post-conversation processing pipeline (no batch transcription needed - using streaming transcript) client_id = conversation.client_id if conversation else None + # Enqueue post-conversation jobs directly (no fallback dependency in success case) job_ids = start_post_conversation_jobs( conversation_id=conversation_id, user_id=user_id, transcript_version_id=version_id, # Pass the streaming transcript version ID + depends_on_job=None, # No dependency - streaming already succeeded client_id=client_id, # Pass client_id for UI tracking end_reason=end_reason # Pass the determined end_reason (websocket_disconnect, inactivity_timeout, etc.) ) diff --git a/backends/advanced/src/advanced_omi_backend/workers/memory_jobs.py b/backends/advanced/src/advanced_omi_backend/workers/memory_jobs.py index 1a5700bd..94581024 100644 --- a/backends/advanced/src/advanced_omi_backend/workers/memory_jobs.py +++ b/backends/advanced/src/advanced_omi_backend/workers/memory_jobs.py @@ -137,111 +137,117 @@ async def process_memory_job(conversation_id: str, *, redis_client=None) -> Dict if memory_result: success, created_memory_ids = memory_result - if success and created_memory_ids: - # Add memory version to conversation - conversation_model = await Conversation.find_one( - Conversation.conversation_id == conversation_id - ) - if conversation_model: - processing_time = time.time() - start_time - - # Get active transcript version for reference - transcript_version_id = conversation_model.active_transcript_version or "unknown" - - # Determine memory provider from memory service - memory_provider = conversation_model.MemoryProvider.CHRONICLE # Default - try: - memory_service_obj = get_memory_service() - provider_name = memory_service_obj.__class__.__name__ - if "OpenMemory" in provider_name: - memory_provider = conversation_model.MemoryProvider.OPENMEMORY_MCP - except Exception: - pass - - # Create version ID for this memory extraction - version_id = str(uuid.uuid4()) - - # Add memory version with metadata - conversation_model.add_memory_version( - version_id=version_id, - memory_count=len(created_memory_ids), - transcript_version_id=transcript_version_id, - provider=memory_provider, - processing_time_seconds=processing_time, - metadata={"memory_ids": created_memory_ids}, - set_as_active=True, + if success: + processing_time = time.time() - start_time + + # Determine memory provider from memory service + memory_provider = "chronicle" # Default + try: + memory_service_obj = get_memory_service() + provider_name = memory_service_obj.__class__.__name__ + if "OpenMemory" in provider_name: + memory_provider = "openmemory_mcp" + except Exception: + pass + + # Only create memory version if new memories were created + if created_memory_ids: + # Add memory version to conversation + conversation_model = await Conversation.find_one( + Conversation.conversation_id == conversation_id ) - await conversation_model.save() + if conversation_model: + # Get active transcript version for reference + transcript_version_id = conversation_model.active_transcript_version or "unknown" + + # Create version ID for this memory extraction + version_id = str(uuid.uuid4()) + + # Add memory version with metadata + conversation_model.add_memory_version( + version_id=version_id, + memory_count=len(created_memory_ids), + transcript_version_id=transcript_version_id, + provider=conversation_model.MemoryProvider.OPENMEMORY_MCP if memory_provider == "openmemory_mcp" else conversation_model.MemoryProvider.CHRONICLE, + processing_time_seconds=processing_time, + metadata={"memory_ids": created_memory_ids}, + set_as_active=True, + ) + await conversation_model.save() - logger.info( - f"✅ Completed memory processing for conversation {conversation_id} - created {len(created_memory_ids)} memories in {processing_time:.2f}s" - ) + logger.info( + f"✅ Completed memory processing for conversation {conversation_id} - created {len(created_memory_ids)} memories in {processing_time:.2f}s" + ) - # Update job metadata with memory information - from rq import get_current_job - - current_job = get_current_job() - if current_job: - if not current_job.meta: - current_job.meta = {} - - # Fetch memory details to display in UI - memory_details = [] - try: - for memory_id in created_memory_ids[:5]: # Limit to first 5 for display - memory_entry = await memory_service.get_memory(memory_id, user_id) - if memory_entry: - # Handle different return types from memory service - memory_text: str - if isinstance(memory_entry, MemoryEntry): - # MemoryEntry object with content attribute - memory_text = memory_entry.content - elif isinstance(memory_entry, dict): - # Dictionary with "content" key - if "content" in memory_entry: - memory_text = memory_entry["content"] + # Update job metadata with memory information + from rq import get_current_job + + current_job = get_current_job() + if current_job: + if not current_job.meta: + current_job.meta = {} + + # Fetch memory details to display in UI + memory_details = [] + try: + for memory_id in created_memory_ids[:5]: # Limit to first 5 for display + memory_entry = await memory_service.get_memory(memory_id, user_id) + if memory_entry: + # Handle different return types from memory service + memory_text: str + if isinstance(memory_entry, MemoryEntry): + # MemoryEntry object with content attribute + memory_text = memory_entry.content + elif isinstance(memory_entry, dict): + # Dictionary with "content" key + if "content" in memory_entry: + memory_text = memory_entry["content"] + else: + logger.error( + f"Dict memory entry missing 'content' key for {memory_id}: {list(memory_entry.keys())}" + ) + raise ValueError( + f"Dict memory entry missing 'content' key for memory {memory_id}" + ) + elif isinstance(memory_entry, str): + # String content directly + memory_text = memory_entry else: + # Unexpected type logger.error( - f"Dict memory entry missing 'content' key for {memory_id}: {list(memory_entry.keys())}" + f"Unexpected memory entry type for {memory_id}: {type(memory_entry).__name__}" ) - raise ValueError( - f"Dict memory entry missing 'content' key for memory {memory_id}" + raise TypeError( + f"Unexpected memory entry type: {type(memory_entry).__name__}" ) - elif isinstance(memory_entry, str): - # String content directly - memory_text = memory_entry - else: - # Unexpected type - logger.error( - f"Unexpected memory entry type for {memory_id}: {type(memory_entry).__name__}" - ) - raise TypeError( - f"Unexpected memory entry type: {type(memory_entry).__name__}" - ) - # Truncate to 200 chars - memory_details.append( - {"memory_id": memory_id, "text": memory_text[:200]} - ) - except Exception as e: - logger.warning(f"Failed to fetch memory details for UI: {e}") - - current_job.meta.update( - { - "conversation_id": conversation_id, - "memories_created": len(created_memory_ids), - "memory_ids": created_memory_ids[:5], # Store first 5 IDs - "memory_details": memory_details, - "processing_time": processing_time, - } + # Truncate to 200 chars + memory_details.append( + {"memory_id": memory_id, "text": memory_text[:200]} + ) + except Exception as e: + logger.warning(f"Failed to fetch memory details for UI: {e}") + + current_job.meta.update( + { + "conversation_id": conversation_id, + "memories_created": len(created_memory_ids), + "memory_ids": created_memory_ids[:5], # Store first 5 IDs + "memory_details": memory_details, + "processing_time": processing_time, + } + ) + current_job.save_meta() + else: + logger.info( + f"ℹ️ Memory processing completed for conversation {conversation_id} - no new memories created (deduplication) in {processing_time:.2f}s" ) - current_job.save_meta() # NOTE: Listening jobs are restarted by open_conversation_job (not here) # This allows users to resume talking immediately after conversation closes, # without waiting for memory processing to complete. - # Trigger memory-level plugins + # Trigger memory-level plugins (ALWAYS dispatch when success, even with 0 new memories) try: # Get or initialize plugin router (same pattern as conversation_jobs.py) plugin_router = get_plugin_router() @@ -260,20 +266,20 @@ async def process_memory_job(conversation_id: str, *, redis_client=None) -> Dict if plugin_router: plugin_data = { - 'memories': created_memory_ids, + 'memories': created_memory_ids or [], 'conversation': { 'conversation_id': conversation_id, 'client_id': client_id, 'user_id': user_id, 'user_email': user_email, }, - 'memory_count': len(created_memory_ids), + 'memory_count': len(created_memory_ids) if created_memory_ids else 0, 'conversation_id': conversation_id, } logger.info( f"🔌 DISPATCH: memory.processed event " - f"(conversation={conversation_id[:12]}, memories={len(created_memory_ids)})" + f"(conversation={conversation_id[:12]}, memories={len(created_memory_ids) if created_memory_ids else 0})" ) plugin_results = await plugin_router.dispatch_event( @@ -282,7 +288,7 @@ async def process_memory_job(conversation_id: str, *, redis_client=None) -> Dict data=plugin_data, metadata={ 'processing_time': processing_time, - 'memory_provider': str(memory_provider), + 'memory_provider': memory_provider, } ) @@ -301,12 +307,12 @@ async def process_memory_job(conversation_id: str, *, redis_client=None) -> Dict return { "success": True, - "memories_created": len(created_memory_ids), + "memories_created": len(created_memory_ids) if created_memory_ids else 0, "processing_time": processing_time, } else: - # No memories created - still successful - return {"success": True, "memories_created": 0, "skipped": True} + # Memory extraction failed + return {"success": False, "error": "Memory extraction returned failure"} else: return {"success": False, "error": "Memory service returned False"} diff --git a/backends/advanced/src/advanced_omi_backend/workers/transcription_jobs.py b/backends/advanced/src/advanced_omi_backend/workers/transcription_jobs.py index bab8adb2..0ce9d77e 100644 --- a/backends/advanced/src/advanced_omi_backend/workers/transcription_jobs.py +++ b/backends/advanced/src/advanced_omi_backend/workers/transcription_jobs.py @@ -8,19 +8,31 @@ import os import logging import time +import uuid +from datetime import datetime +from pathlib import Path from typing import Dict, Any +from rq import get_current_job +from rq.job import Job from rq.exceptions import NoSuchJobError from advanced_omi_backend.models.job import JobPriority, BaseRQJob, async_job +from advanced_omi_backend.models.conversation import Conversation +from advanced_omi_backend.models.audio_chunk import AudioChunkDocument +from beanie.operators import In from advanced_omi_backend.controllers.queue_controller import ( transcription_queue, redis_conn, JOB_RESULT_TTL, REDIS_URL, + start_post_conversation_jobs, ) from advanced_omi_backend.utils.conversation_utils import analyze_speech, mark_conversation_deleted +from advanced_omi_backend.utils.audio_chunk_utils import reconstruct_wav_from_conversation, convert_audio_to_chunks from advanced_omi_backend.services.plugin_service import get_plugin_router +from advanced_omi_backend.services.transcription import get_transcription_provider, is_transcription_available +from advanced_omi_backend.services.audio_stream import TranscriptionResultsAggregator from advanced_omi_backend.config import get_backend_config logger = logging.getLogger(__name__) @@ -152,10 +164,6 @@ async def transcribe_full_audio_job( Returns: Dict with processing results including transcript data for next job """ - from pathlib import Path - from advanced_omi_backend.services.transcription import get_transcription_provider - from advanced_omi_backend.models.conversation import Conversation - logger.info( f"🔄 RQ: Starting transcript processing for conversation {conversation_id} (trigger: {trigger})" ) @@ -180,8 +188,6 @@ async def transcribe_full_audio_job( logger.info(f"Using transcription provider: {provider_name}") # Reconstruct audio from MongoDB chunks - from advanced_omi_backend.utils.audio_chunk_utils import reconstruct_wav_from_conversation - logger.info(f"📦 Reconstructing audio from MongoDB chunks for conversation {conversation_id}") try: @@ -296,9 +302,7 @@ async def transcribe_full_audio_job( ) # Cancel all dependent jobs (cropping, speaker recognition, memory, title/summary) - from rq import get_current_job - from rq.job import Job - + # Note: get_current_job and Job are already imported at module level current_job = get_current_job() if current_job: # Get all jobs that depend on this transcription job @@ -493,8 +497,6 @@ async def transcribe_full_audio_job( ) # Update job metadata with title and summary for UI display - from rq import get_current_job - current_job = get_current_job() if current_job: if not current_job.meta: @@ -525,6 +527,293 @@ async def transcribe_full_audio_job( } +async def create_audio_only_conversation( + session_id: str, + user_id: str, + client_id: str +) -> "Conversation": + """ + Create or reuse conversation for batch transcription fallback. + + Handles two scenarios: + 1. always_persist=True - Reuses existing placeholder conversation + 2. always_persist=False - Creates new conversation from audio chunks + """ + # CASE 1: Check if always_persist placeholder conversation exists + # The audio_streaming_persistence_job may have created it already + placeholder_conversation = await Conversation.find_one( + Conversation.client_id == session_id, + Conversation.always_persist == True, + In(Conversation.processing_status, ["pending_transcription", "transcription_failed"]) + ) + + if placeholder_conversation: + logger.info( + f"✅ Found always_persist placeholder conversation {placeholder_conversation.conversation_id[:12]} " + f"for session {session_id[:12]}, reusing for batch transcription" + ) + # Update status to show batch transcription is starting + placeholder_conversation.processing_status = "batch_transcription" + placeholder_conversation.title = "Audio Recording (Batch Transcription...)" + placeholder_conversation.summary = "Processing audio with offline transcription..." + await placeholder_conversation.save() + + # Audio chunks are already linked to this conversation_id + # (stored by audio_streaming_persistence_job) + return placeholder_conversation + + # CASE 2: No placeholder exists - create new conversation using session_id + # This happens when always_persist=False or audio_persistence_job didn't run + # We reuse session_id as conversation_id to avoid unnecessary UUID generation + logger.info( + f"✅ No placeholder found, creating new conversation for session {session_id[:12]} " + f"using session_id as conversation_id" + ) + + conversation = Conversation( + conversation_id=session_id, + user_id=user_id, + client_id=client_id, + title="Audio Recording (Batch Transcription...)", + summary="Processing audio with offline transcription...", + processing_status="batch_transcription", + always_persist=False, # Mark as False since this is fallback + created_at=datetime.utcnow(), + ) + await conversation.insert() + + logger.info( + f"✅ Created batch transcription conversation {session_id[:12]} for fallback" + ) + return conversation + + +@async_job(redis=True, beanie=True) +async def transcription_fallback_check_job( + session_id: str, + user_id: str, + client_id: str, + timeout_seconds: int = 1800, + *, + redis_client=None +) -> Dict[str, Any]: + """ + Check if streaming transcription succeeded, fallback to batch if needed. + + This job acts as a gate for post-conversation jobs: + - If streaming transcript exists → Pass through immediately + - If no transcript → Trigger batch transcription, wait for completion, enqueue post-jobs + + Args: + session_id: Stream session ID + user_id: User ID + client_id: Client ID + timeout_seconds: Max wait time for batch transcription (default 30 minutes) + redis_client: Redis client (injected by decorator) + + Returns: + Dict with status (pass_through or batch_fallback_completed) and conversation details + """ + logger.info(f"🔍 Checking transcription status for session {session_id[:12]}") + + # Find conversation by session_id (client_id for streaming sessions) + conversation = await Conversation.find_one( + Conversation.client_id == session_id + ) + + # Check if transcript exists (streaming succeeded) + if conversation and conversation.active_transcript and conversation.transcript: + logger.info( + f"✅ Streaming transcript exists for session {session_id[:12]}, " + f"passing through (conversation {conversation.conversation_id[:12]})" + ) + return { + "status": "pass_through", + "transcript_source": "streaming", + "conversation_id": conversation.conversation_id + } + + # No transcript → Trigger batch fallback + logger.warning( + f"⚠️ No streaming transcript found for session {session_id[:12]}, " + f"attempting batch transcription fallback" + ) + + # Check if batch provider available + if not is_transcription_available(mode="batch"): + raise ValueError( + "No batch transcription provider available for fallback. " + "Configure a batch STT provider (e.g., Parakeet) or fix streaming provider." + ) + + # If no conversation exists, check if we have audio chunks to transcribe + if not conversation: + chunks_count = await AudioChunkDocument.find( + AudioChunkDocument.conversation_id == session_id + ).count() + + if chunks_count == 0: + # No MongoDB chunks - check if Redis stream has unprocessed audio + logger.info( + f"📦 No MongoDB chunks found for session {session_id[:12]}, " + f"checking Redis stream for unprocessed audio..." + ) + + stream_name = f"audio:stream:{client_id}" + + # Check if stream exists and has messages + try: + stream_length = await redis_client.xlen(stream_name) + + if stream_length == 0: + logger.info( + f"ℹ️ No audio found in Redis stream {stream_name}. " + f"Session ended without audio capture. Skipping fallback." + ) + return { + "status": "skipped", + "reason": "no_audio", + "message": "No audio was captured for this session", + "session_id": session_id + } + + logger.info( + f"📡 Found {stream_length} messages in Redis stream {stream_name}, " + f"extracting audio for batch transcription..." + ) + + # Read all audio messages from stream + messages = await redis_client.xrange(stream_name) + + # Collect PCM audio chunks in order + audio_chunks = {} # {chunk_num: audio_data} + + for msg_id, fields in messages: + # Check if this message belongs to our session + msg_session_id = fields.get(b"session_id", b"").decode() + if msg_session_id != session_id: + continue + + # Get chunk ID + msg_chunk_id = fields.get(b"chunk_id", b"").decode() + if not msg_chunk_id or msg_chunk_id == "END": + continue + + try: + chunk_num = int(msg_chunk_id) + except ValueError: + continue + + # Get PCM audio data + audio_data = fields.get(b"audio_data", b"") + if audio_data: + audio_chunks[chunk_num] = audio_data + + if not audio_chunks: + logger.warning( + f"⚠️ Redis stream has {stream_length} messages but no audio chunks " + f"matched session {session_id[:12]}. Skipping fallback." + ) + return { + "status": "skipped", + "reason": "no_matching_audio", + "message": "No audio matched this session in Redis stream", + "session_id": session_id + } + + # Combine audio chunks in order + sorted_chunks = sorted(audio_chunks.items()) + combined_audio = b"".join(data for _, data in sorted_chunks) + + logger.info( + f"✅ Extracted {len(sorted_chunks)} audio chunks from Redis stream " + f"({len(combined_audio)} bytes, ~{len(combined_audio)/32000:.1f}s)" + ) + + # Create conversation placeholder + conversation = await create_audio_only_conversation(session_id, user_id, client_id) + + # Save audio to MongoDB chunks for batch transcription + num_chunks = await convert_audio_to_chunks( + conversation_id=conversation.conversation_id, + audio_data=combined_audio, + sample_rate=16000, + channels=1, + sample_width=2, + ) + + logger.info( + f"💾 Persisted {num_chunks} MongoDB chunks for batch transcription " + f"(conversation {conversation.conversation_id[:12]})" + ) + + except Exception as e: + logger.error(f"❌ Failed to extract audio from Redis stream: {e}", exc_info=True) + raise + else: + logger.info( + f"✅ Found {chunks_count} MongoDB chunks for session {session_id[:12]}, " + f"creating conversation placeholder" + ) + + # Create conversation placeholder for batch transcription + conversation = await create_audio_only_conversation(session_id, user_id, client_id) + + # Enqueue batch transcription job + version_id = f"batch_fallback_{session_id[:12]}" + batch_job = transcription_queue.enqueue( + transcribe_full_audio_job, + conversation.conversation_id, + version_id, + "batch_fallback", + job_timeout=1800, + job_id=f"transcribe_{conversation.conversation_id[:12]}", + description=f"Batch transcription fallback for {session_id[:8]}", + meta={"session_id": session_id, "client_id": client_id} + ) + + logger.info(f"🔄 Enqueued batch transcription fallback job {batch_job.id}") + + # Wait for batch transcription to complete + max_wait = timeout_seconds + waited = 0 + while waited < max_wait: + batch_job.refresh() + if batch_job.is_finished: + if batch_job.is_failed: + raise Exception(f"Batch transcription failed: {batch_job.exc_info}") + logger.info(f"✅ Batch transcription completed successfully") + break + await asyncio.sleep(2) + waited += 2 + + if waited >= max_wait: + raise TimeoutError(f"Batch transcription timed out after {max_wait}s") + + # Enqueue post-conversation jobs (same as file upload flow) + post_jobs = start_post_conversation_jobs( + conversation_id=conversation.conversation_id, + user_id=user_id, + transcript_version_id=version_id, + depends_on_job=None, # Batch already completed (we waited for it) + client_id=client_id, + end_reason="batch_fallback" + ) + + logger.info( + f"📋 Enqueued {len(post_jobs)} post-conversation jobs for " + f"batch fallback conversation {conversation.conversation_id[:12]}" + ) + + return { + "status": "batch_fallback_completed", + "transcript_source": "batch", + "conversation_id": conversation.conversation_id, + "batch_job_id": batch_job.id, + "post_job_ids": post_jobs + } + + @async_job(redis=True, beanie=True) async def stream_speech_detection_job( session_id: str, user_id: str, client_id: str, *, redis_client=None @@ -549,10 +838,7 @@ async def stream_speech_detection_job( Note: user_email is fetched from the database when needed. """ - from advanced_omi_backend.services.audio_stream import TranscriptionResultsAggregator - from advanced_omi_backend.utils.conversation_utils import analyze_speech from .conversation_jobs import open_conversation_job - from rq import get_current_job logger.info(f"🔍 Starting speech detection for session {session_id[:12]}") @@ -856,8 +1142,6 @@ async def stream_speech_detection_job( # Check if this is an always_persist conversation that needs to be marked as failed # NOTE: We check MongoDB directly because the conversation:current Redis key might have been # deleted by the audio persistence job cleanup (which runs in parallel). - from advanced_omi_backend.models.conversation import Conversation - logger.info(f"🔍 Checking MongoDB for always_persist conversation with client_id: {client_id}") # Find conversation by client_id that matches this session @@ -882,11 +1166,37 @@ async def stream_speech_detection_job( else: logger.info(f"ℹ️ No always_persist placeholder conversation found for session {session_id[:12]}") + # Enqueue fallback check job for failed streaming sessions + # This will attempt batch transcription as a fallback + fallback_job = transcription_queue.enqueue( + transcription_fallback_check_job, + session_id, + user_id, + client_id, + timeout_seconds=1800, # 30 minutes for batch transcription + job_timeout=2400, # 40 minutes job timeout + job_id=f"fallback_check_{session_id[:12]}", + description=f"Transcription fallback check for {session_id[:8]} (no speech)", + meta={"session_id": session_id, "client_id": client_id, "no_speech": True} + ) + + logger.info( + f"📋 Enqueued transcription fallback check job {fallback_job.id} " + f"for failed session {session_id[:12]} (no speech detected)" + ) + + # The fallback job will: + # 1. Check for always_persist placeholder conversation + # 2. If found, trigger batch transcription using stored audio chunks + # 3. Wait for batch completion and enqueue post-conversation jobs + # 4. If no placeholder or no audio chunks, fail gracefully with clear error + return { "session_id": session_id, "user_id": user_id, "client_id": client_id, "no_speech_detected": True, + "fallback_job_id": fallback_job.id, "reason": reason, "runtime_seconds": time.time() - start_time, } diff --git a/backends/advanced/webui/src/hooks/useSimpleAudioRecording.ts b/backends/advanced/webui/src/hooks/useSimpleAudioRecording.ts index 4c25df53..d34c8ea6 100644 --- a/backends/advanced/webui/src/hooks/useSimpleAudioRecording.ts +++ b/backends/advanced/webui/src/hooks/useSimpleAudioRecording.ts @@ -21,13 +21,11 @@ export interface SimpleAudioRecordingReturn { recordingDuration: number error: string | null mode: RecordingMode - alwaysPersist: boolean // Actions startRecording: () => Promise stopRecording: () => void setMode: (mode: RecordingMode) => void - setAlwaysPersist: (value: boolean) => void // For components analyser: AnalyserNode | null @@ -45,7 +43,6 @@ export const useSimpleAudioRecording = (): SimpleAudioRecordingReturn => { const [recordingDuration, setRecordingDuration] = useState(0) const [error, setError] = useState(null) const [mode, setMode] = useState('streaming') - const [alwaysPersist, setAlwaysPersist] = useState(false) // Debug stats const [debugStats, setDebugStats] = useState({ @@ -281,14 +278,13 @@ export const useSimpleAudioRecording = (): SimpleAudioRecordingReturn => { rate: 16000, width: 2, channels: 1, - mode: mode, // Pass recording mode to backend - always_persist: alwaysPersist // Pass always_persist flag + mode: mode // Pass recording mode to backend }, payload_length: null } ws.send(JSON.stringify(startMessage) + '\n') - console.log('✅ Audio-start message sent with mode:', mode, 'always_persist:', alwaysPersist) + console.log('✅ Audio-start message sent with mode:', mode) }, [mode]) // Step 4: Start audio streaming @@ -507,11 +503,9 @@ export const useSimpleAudioRecording = (): SimpleAudioRecordingReturn => { recordingDuration, error, mode, - alwaysPersist, startRecording, stopRecording, setMode, - setAlwaysPersist, analyser: analyserRef.current, debugStats, formatDuration, diff --git a/backends/advanced/webui/src/pages/LiveRecord.tsx b/backends/advanced/webui/src/pages/LiveRecord.tsx index 202a02e8..4b763746 100644 --- a/backends/advanced/webui/src/pages/LiveRecord.tsx +++ b/backends/advanced/webui/src/pages/LiveRecord.tsx @@ -1,4 +1,4 @@ -import { Radio, Zap, Archive, Database } from 'lucide-react' +import { Radio, Zap, Archive } from 'lucide-react' import { useSimpleAudioRecording } from '../hooks/useSimpleAudioRecording' import SimplifiedControls from '../components/audio/SimplifiedControls' import StatusDisplay from '../components/audio/StatusDisplay' @@ -54,33 +54,6 @@ export default function LiveRecord() {

- {/* Always Persist Audio Toggle */} -
- -
- {/* Mode Description */}

diff --git a/backends/advanced/webui/src/pages/System.tsx b/backends/advanced/webui/src/pages/System.tsx index a628d70d..dfe662f7 100644 --- a/backends/advanced/webui/src/pages/System.tsx +++ b/backends/advanced/webui/src/pages/System.tsx @@ -1,5 +1,5 @@ import { useState, useEffect } from 'react' -import { Settings, RefreshCw, CheckCircle, XCircle, AlertCircle, Activity, Users, Database, Server, Volume2, Mic, Brain } from 'lucide-react' +import { Settings, RefreshCw, CheckCircle, XCircle, AlertCircle, Activity, Users, Database, Server, Volume2, Mic, Brain, Sliders } from 'lucide-react' import { systemApi, speakerApi } from '../services/api' import { useAuth } from '../contexts/AuthContext' import MemorySettings from '../components/MemorySettings' @@ -92,6 +92,14 @@ export default function System() { const [providerLoading, setProviderLoading] = useState(false) const [providerMessage, setProviderMessage] = useState('') + // Miscellaneous settings state + const [miscSettings, setMiscSettings] = useState({ + always_persist_enabled: false, + use_provider_segments: false + }) + const [miscLoading, setMiscLoading] = useState(false) + const [miscMessage, setMiscMessage] = useState('') + const { isAdmin } = useAuth() const loadSystemData = async () => { @@ -167,6 +175,38 @@ export default function System() { } } + const loadMiscSettings = async () => { + try { + setMiscLoading(true) + const response = await systemApi.getMiscSettings() + if (response.data.status === 'success') { + setMiscSettings(response.data.settings) + } + } catch (err: any) { + console.error('Failed to load misc settings:', err) + } finally { + setMiscLoading(false) + } + } + + const saveMiscSettings = async () => { + try { + setMiscLoading(true) + setMiscMessage('') + const response = await systemApi.saveMiscSettings(miscSettings) + if (response.data.status === 'success') { + setMiscMessage('Settings saved successfully') + setTimeout(() => setMiscMessage(''), 3000) + } else { + setMiscMessage('Failed to save settings') + } + } catch (err: any) { + setMiscMessage('Error: ' + (err.response?.data?.detail || err.message)) + } finally { + setMiscLoading(false) + } + } + const saveMemoryProvider = async () => { if (selectedProvider === currentProvider) { setProviderMessage('Provider is already set to ' + selectedProvider) @@ -211,6 +251,7 @@ export default function System() { loadSystemData() loadDiarizationSettings() loadMemoryProvider() + loadMiscSettings() }, [isAdmin]) const getStatusIcon = (healthy: boolean) => { @@ -804,6 +845,86 @@ export default function System() {

+ {/* Miscellaneous Configuration */} +
+

+ + Miscellaneous Configuration +

+ +
+ {/* Always Persist Audio Toggle */} +
+
+
+ Always Persist Audio +
+
+ Create conversations for all audio sessions, even when no speech is detected +
+
+ +
+ + {/* Use Provider Segments Toggle */} +
+
+
+ Use Provider Segments +
+
+ Use speech segments from transcription provider instead of speaker service diarization +
+
+ +
+ + {/* Status Message */} + {miscMessage && ( +
+ {miscMessage} +
+ )} + + {/* Save Button */} +
+ +
+
+
+ {/* Speaker Configuration */} diff --git a/backends/advanced/webui/src/services/api.ts b/backends/advanced/webui/src/services/api.ts index 3f34b75f..d8486c6a 100644 --- a/backends/advanced/webui/src/services/api.ts +++ b/backends/advanced/webui/src/services/api.ts @@ -232,6 +232,11 @@ export const systemApi = { getActiveClients: () => api.get('/api/clients/active'), getDiarizationSettings: () => api.get('/api/diarization-settings'), saveDiarizationSettings: (settings: any) => api.post('/api/diarization-settings', settings), + + // Miscellaneous Configuration Settings + getMiscSettings: () => api.get('/api/misc-settings'), + saveMiscSettings: (settings: { always_persist_enabled?: boolean; use_provider_segments?: boolean }) => + api.post('/api/misc-settings', settings), // Memory Configuration Management getMemoryConfigRaw: () => api.get('/api/admin/memory/config/raw'), diff --git a/config/defaults.yml b/config/defaults.yml index 69b72d94..83a9c405 100644 --- a/config/defaults.yml +++ b/config/defaults.yml @@ -311,6 +311,12 @@ backend: model: gpt-4o-mini timeout: 60 + # Audio processing settings + audio: + # When enabled, always persist audio even if no speech is detected + # This creates conversations for all audio sessions regardless of speech content + always_persist_enabled: false + # Transcription provider configuration transcription: provider: deepgram # or parakeet diff --git a/tests/bin/start-rebuild-containers.sh b/tests/bin/start-rebuild-containers.sh index cc348f0d..28cb0667 100755 --- a/tests/bin/start-rebuild-containers.sh +++ b/tests/bin/start-rebuild-containers.sh @@ -5,6 +5,7 @@ set -e SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)" +TESTS_DIR="$SCRIPT_DIR/.." BACKEND_DIR="$SCRIPT_DIR/../../backends/advanced" cd "$BACKEND_DIR" @@ -16,6 +17,14 @@ echo " 2. Rebuild images with latest code" echo " 3. Start containers" echo "" +# Load environment variables from .env.test (API keys, etc.) +if [ -f "$TESTS_DIR/setup/.env.test" ]; then + echo "📝 Loading environment variables from .env.test..." + set -a + source "$TESTS_DIR/setup/.env.test" + set +a +fi + # Stop containers echo "🛑 Stopping containers..." docker compose -f docker-compose-test.yml stop diff --git a/tests/configs/deepgram-openai.yml b/tests/configs/deepgram-openai.yml index e15ec756..9e039a9a 100644 --- a/tests/configs/deepgram-openai.yml +++ b/tests/configs/deepgram-openai.yml @@ -114,6 +114,15 @@ models: smart_format: true start: message: {} +backend: + audio: + # Enable always_persist for testing - creates placeholder conversations immediately + always_persist_enabled: true + speaker_recognition: - enabled: false + enabled: true # Using mock client by default (USE_MOCK_SPEAKER_CLIENT=true in docker-compose-test.yml) timeout: 60 + # To test with REAL speaker recognition service: + # 1. Start speaker-recognition service: cd extras/speaker-recognition && docker compose up -d + # 2. Set USE_MOCK_SPEAKER_CLIENT=false in docker-compose-test.yml (line 62 and 241) + # 3. Rebuild test containers: make start-rebuild diff --git a/tests/configs/mock-services.yml b/tests/configs/mock-services.yml index 28644097..dd6a097c 100644 --- a/tests/configs/mock-services.yml +++ b/tests/configs/mock-services.yml @@ -88,6 +88,9 @@ models: start: message: {} backend: + audio: + # Enable always_persist for testing - creates placeholder conversations immediately + always_persist_enabled: true transcription: use_provider_segments: true speaker_recognition: diff --git a/tests/integration/always_persist_audio_tests.robot b/tests/integration/always_persist_audio_tests.robot index 9d3677bf..824907e0 100644 --- a/tests/integration/always_persist_audio_tests.robot +++ b/tests/integration/always_persist_audio_tests.robot @@ -29,9 +29,6 @@ ${TEST_AUDIO_FILE} ${CURDIR}/../test_assets/DIY_Experts_Glass_Blowing_16khz_m *** Keywords *** Suite Setup Actions [Documentation] Setup actions before running tests - # Start mock transcription server - Start Mock Transcription Server - # Initialize API session for test user ${session}= Get Admin API Session Set Suite Variable ${API_SESSION} ${session} @@ -41,9 +38,6 @@ Suite Teardown Actions # Cleanup any remaining audio streams Cleanup All Audio Streams - # Stop mock transcription server - Stop Mock Transcription Server - Test Cleanup [Documentation] Cleanup after each test # Stop any active audio streams @@ -61,8 +55,8 @@ Placeholder Conversation Created Immediately With Always Persist ${device_name}= Set Variable test-placeholder ${client_id}= Get Client ID From Device Name ${device_name} - # Get baseline conversation count - ${convs_before}= Get User Conversations + # Get baseline conversation count for THIS client_id only + ${convs_before}= Get Conversations By Client ID ${client_id} ${count_before}= Get Length ${convs_before} # Start stream with always_persist=true @@ -70,12 +64,12 @@ Placeholder Conversation Created Immediately With Always Persist # Conversation created by audio persistence job (takes 3-5s to start) Sleep 5s # Wait for audio persistence job to create placeholder - ${convs_after}= Get User Conversations + ${convs_after}= Get Conversations By Client ID ${client_id} ${count_after}= Get Length ${convs_after} - # Verify new conversation created + # Verify new conversation created for this client Should Be True ${count_after} == ${count_before} + 1 - ... Expected 1 new conversation, found ${count_after} - ${count_before} + ... Expected 1 new conversation for client ${client_id}, found ${count_after} - ${count_before} # Find the new conversation (most recent) ${new_conv}= Set Variable ${convs_after}[0] @@ -97,33 +91,42 @@ Placeholder Conversation Created Immediately With Always Persist Normal Behavior Preserved When Always Persist Disabled - [Documentation] Verify that when always_persist=false (default), the system + [Documentation] Verify that when always_persist=false, the system ... behaves as before: no conversation created until speech detected. + ... This test temporarily disables the global always_persist setting. [Tags] conversation audio-streaming ${device_name}= Set Variable test-normal ${client_id}= Get Client ID From Device Name ${device_name} - # Get baseline conversation count - ${convs_before}= Get User Conversations - ${count_before}= Get Length ${convs_before} + # Temporarily disable always_persist for this test + Set Always Persist Enabled ${API_SESSION} ${False} - # Start stream with always_persist=false (default behavior) - ${stream_id}= Open Audio Stream device_name=${device_name} + TRY + # Get baseline conversation count for THIS client_id only + ${convs_before}= Get Conversations By Client ID ${client_id} + ${count_before}= Get Length ${convs_before} - # Conversation should NOT exist immediately - Sleep 3s - ${convs_after}= Get User Conversations - ${count_after}= Get Length ${convs_after} + # Start stream with always_persist=false (disabled via API above) + ${stream_id}= Open Audio Stream device_name=${device_name} - # Verify no new conversation created yet - Should Be Equal As Integers ${count_after} ${count_before} - ... Expected no conversation until speech detected, but found ${count_after} - ${count_before} new conversations + # Conversation should NOT exist immediately for this client + Sleep 3s + ${convs_after}= Get Conversations By Client ID ${client_id} + ${count_after}= Get Length ${convs_after} - Log ✅ No placeholder conversation created (always_persist=false) + # Verify no new conversation created yet for this client + Should Be Equal As Integers ${count_after} ${count_before} + ... Expected no conversation for client ${client_id}, but found ${count_after} - ${count_before} new conversations - # Close stream - Close Audio Stream ${stream_id} + Log ✅ No placeholder conversation created (always_persist=false) + + # Close stream + Close Audio Stream ${stream_id} + FINALLY + # Re-enable always_persist for other tests + Set Always Persist Enabled ${API_SESSION} ${True} + END Redis Key Set Immediately With Always Persist @@ -135,8 +138,8 @@ Redis Key Set Immediately With Always Persist ${device_name}= Set Variable test-redis-key ${client_id}= Get Client ID From Device Name ${device_name} - # Get baseline conversation count - ${convs_before}= Get User Conversations + # Get baseline conversation count for THIS client_id only + ${convs_before}= Get Conversations By Client ID ${client_id} ${count_before}= Get Length ${convs_before} # Start stream with always_persist=true @@ -147,12 +150,12 @@ Redis Key Set Immediately With Always Persist # Get conversation (created by audio persistence job) Sleep 5s # Wait for audio persistence job to create placeholder - ${convs_after}= Get User Conversations + ${convs_after}= Get Conversations By Client ID ${client_id} ${count_after}= Get Length ${convs_after} - # Verify new conversation created + # Verify new conversation created for this client Should Be True ${count_after} == ${count_before} + 1 - ... Expected 1 new conversation, found ${count_after} - ${count_before} + ... Expected 1 new conversation for client ${client_id}, found ${count_after} - ${count_before} # Get the new conversation (most recent) ${conversation}= Set Variable ${convs_after}[0] @@ -177,9 +180,18 @@ Multiple Sessions Create Separate Conversations ${device_name}= Set Variable test-multi - # Get baseline conversation count - ${convs_before}= Get User Conversations - ${count_before}= Get Length ${convs_before} + # Get client IDs for each device + ${client_id_1}= Get Client ID From Device Name ${device_name}-1 + ${client_id_2}= Get Client ID From Device Name ${device_name}-2 + ${client_id_3}= Get Client ID From Device Name ${device_name}-3 + + # Get baseline conversation counts for each client + ${convs_before_1}= Get Conversations By Client ID ${client_id_1} + ${convs_before_2}= Get Conversations By Client ID ${client_id_2} + ${convs_before_3}= Get Conversations By Client ID ${client_id_3} + ${count_before_1}= Get Length ${convs_before_1} + ${count_before_2}= Get Length ${convs_before_2} + ${count_before_3}= Get Length ${convs_before_3} # Start 3 separate sessions ${stream_1}= Open Audio Stream With Always Persist device_name=${device_name}-1 @@ -189,23 +201,36 @@ Multiple Sessions Create Separate Conversations ${stream_3}= Open Audio Stream With Always Persist device_name=${device_name}-3 Sleep 5s # Wait for all audio persistence jobs to create placeholders - # Verify 3 new conversations created - ${convs_after}= Get User Conversations - ${count_after}= Get Length ${convs_after} - - ${new_count}= Evaluate ${count_after} - ${count_before} - Should Be Equal As Integers ${new_count} 3 - ... Expected 3 new conversations, found ${new_count} + # Verify each client has exactly 1 new conversation + ${convs_after_1}= Get Conversations By Client ID ${client_id_1} + ${convs_after_2}= Get Conversations By Client ID ${client_id_2} + ${convs_after_3}= Get Conversations By Client ID ${client_id_3} + ${count_after_1}= Get Length ${convs_after_1} + ${count_after_2}= Get Length ${convs_after_2} + ${count_after_3}= Get Length ${convs_after_3} + + ${new_count_1}= Evaluate ${count_after_1} - ${count_before_1} + ${new_count_2}= Evaluate ${count_after_2} - ${count_before_2} + ${new_count_3}= Evaluate ${count_after_3} - ${count_before_3} + + Should Be Equal As Integers ${new_count_1} 1 + ... Expected 1 new conversation for client ${client_id_1}, found ${new_count_1} + Should Be Equal As Integers ${new_count_2} 1 + ... Expected 1 new conversation for client ${client_id_2}, found ${new_count_2} + Should Be Equal As Integers ${new_count_3} 1 + ... Expected 1 new conversation for client ${client_id_3}, found ${new_count_3} # Verify each conversation has unique conversation_id - ${conv_ids}= Create List - FOR ${i} IN RANGE 3 - ${conv}= Set Variable ${convs_after}[${i}] - ${conv_id}= Set Variable ${conv}[conversation_id] - List Should Not Contain Value ${conv_ids} ${conv_id} - ... Duplicate conversation_id found: ${conv_id} - Append To List ${conv_ids} ${conv_id} - END + ${conv_id_1}= Set Variable ${convs_after_1}[0][conversation_id] + ${conv_id_2}= Set Variable ${convs_after_2}[0][conversation_id] + ${conv_id_3}= Set Variable ${convs_after_3}[0][conversation_id] + + Should Not Be Equal ${conv_id_1} ${conv_id_2} + ... Duplicate conversation_id found: ${conv_id_1} + Should Not Be Equal ${conv_id_2} ${conv_id_3} + ... Duplicate conversation_id found: ${conv_id_2} + Should Not Be Equal ${conv_id_1} ${conv_id_3} + ... Duplicate conversation_id found: ${conv_id_1} Log ✅ 3 separate conversations created with unique IDs @@ -219,9 +244,10 @@ Audio Chunks Persisted Despite Transcription Failure [Documentation] Verify that when transcription fails (e.g., invalid Deepgram key), ... audio chunks are still saved to MongoDB. ... - ... NOTE: This test requires misconfigured transcription service to trigger failure. - ... Test uses mock-transcription-failure.yml config with invalid API key. - [Tags] audio-streaming mongodb requires-api-keys + ... IMPORTANT: This test requires the mock-transcription-failure.yml config. + ... Run with: make test CONFIG=mock-transcription-failure.yml + ... The test will SKIP if transcription succeeds (real API keys). + [Tags] audio-streaming infra slow ${device_name}= Set Variable test-persist-fail ${client_id}= Get Client ID From Device Name ${device_name} @@ -240,19 +266,20 @@ Audio Chunks Persisted Despite Transcription Failure ${total_chunks}= Close Audio Stream ${stream_id} Log Sent ${total_chunks} total chunks - # Wait for processing to attempt and fail - Sleep 15s - - # Get the conversation (most recent) - ${conversations}= Get User Conversations + # Get the conversation for this client - already created by audio persistence job + ${conversations}= Get Conversations By Client ID ${client_id} ${conversation}= Set Variable ${conversations}[0] ${conversation_id}= Set Variable ${conversation}[conversation_id] - # Verify processing_status is transcription_failed - Verify Conversation Processing Status ${conversation_id} transcription_failed + # Wait for transcription to attempt and fail (poll instead of fixed sleep) + Wait Until Keyword Succeeds 60s 5s + ... Verify Conversation Processing Status ${conversation_id} transcription_failed + + # Refresh conversation data after status change (title may have updated) + ${updated_conv}= Get Conversation By ID ${conversation_id} # Verify title indicates failure - ${title}= Set Variable ${conversation}[title] + ${title}= Set Variable ${updated_conv}[title] ${title_lower}= Convert To Lower Case ${title} Should Contain ${title_lower} transcription Should Contain ${title_lower} fail @@ -277,8 +304,8 @@ Conversation Updates To Completed When Transcription Succeeds ${device_name}= Set Variable test-complete ${client_id}= Get Client ID From Device Name ${device_name} - # Get baseline conversation count - ${convs_before}= Get User Conversations + # Get baseline conversation count for THIS client_id only + ${convs_before}= Get Conversations By Client ID ${client_id} ${count_before}= Get Length ${convs_before} # Start stream with always_persist=true @@ -286,7 +313,7 @@ Conversation Updates To Completed When Transcription Succeeds # Verify placeholder conversation exists (created by audio persistence job) Sleep 5s - ${convs_after}= Get User Conversations + ${convs_after}= Get Conversations By Client ID ${client_id} ${conversation}= Set Variable ${convs_after}[0] ${conversation_id}= Set Variable ${conversation}[conversation_id] diff --git a/tests/resources/conversation_keywords.robot b/tests/resources/conversation_keywords.robot index be75b13b..741ca6a8 100644 --- a/tests/resources/conversation_keywords.robot +++ b/tests/resources/conversation_keywords.robot @@ -16,6 +16,23 @@ Get User Conversations ${response}= GET On Session api /api/conversations expected_status=200 RETURN ${response.json()}[conversations] +Get Conversations By Client ID + [Documentation] Get conversations filtered by client_id + ... Returns only conversations matching the specified client_id + [Arguments] ${client_id} + + ${all_conversations}= Get User Conversations + ${filtered}= Create List + + FOR ${conv} IN @{all_conversations} + ${conv_client_id}= Set Variable ${conv}[client_id] + IF '${conv_client_id}' == '${client_id}' + Append To List ${filtered} ${conv} + END + END + + RETURN ${filtered} + Get Conversation By ID [Documentation] Get a specific conversation by ID [Arguments] ${conversation_id} diff --git a/tests/resources/system_keywords.robot b/tests/resources/system_keywords.robot index 6fb6b3d4..c084b2d0 100644 --- a/tests/resources/system_keywords.robot +++ b/tests/resources/system_keywords.robot @@ -78,3 +78,26 @@ Stop Mock Transcription Server ELSE Log ⚠️ Mock Transcription Server handle not found (may not have been started) END + + +Set Always Persist Enabled + [Documentation] Set the always_persist_enabled setting via API. + ... Requires admin session. + [Arguments] ${session} ${enabled}=${True} + + ${settings}= Create Dictionary always_persist_enabled=${enabled} + ${response}= POST On Session ${session} /api/misc-settings json=${settings} + Should Be Equal As Integers ${response.status_code} 200 + Log ✅ Set always_persist_enabled=${enabled} + + +Get Always Persist Enabled + [Documentation] Get the current always_persist_enabled setting via API. + ... Requires admin session. + [Arguments] ${session} + + ${response}= GET On Session ${session} /api/misc-settings + Should Be Equal As Integers ${response.status_code} 200 + ${settings}= Set Variable ${response.json()} + ${enabled}= Set Variable ${settings}[always_persist_enabled] + RETURN ${enabled} From caafa1e3a5461c72daba5123ede18daf8b7ef453 Mon Sep 17 00:00:00 2001 From: Ankush Malaker <43288948+AnkushMalaker@users.noreply.github.com> Date: Wed, 28 Jan 2026 03:32:57 +0000 Subject: [PATCH 08/10] Enhance test framework and conversation handling for audio persistence - Updated the Makefile to introduce new test commands for running tests with and without API keys, improving CI integration. - Refactored integration tests to replace static sleep calls with polling mechanisms for conversation creation, enhancing reliability and reducing flakiness. - Added a new keyword to wait for conversations by client ID, streamlining test logic and improving readability. - Updated documentation in the Makefile to reflect changes in test commands and configurations. --- tests/Makefile | 51 +++++++++----- .../always_persist_audio_tests.robot | 70 ++++++++++--------- tests/resources/conversation_keywords.robot | 14 ++++ 3 files changed, 84 insertions(+), 51 deletions(-) diff --git a/tests/Makefile b/tests/Makefile index 303261bd..1927338b 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -5,7 +5,7 @@ containers-start containers-stop containers-restart containers-rebuild \ containers-start-rebuild containers-clean containers-status containers-logs \ start stop restart rebuild start-rebuild status logs \ - test test-quick test-slow test-sdk test-all-with-slow-and-sdk clean-all \ + test test-quick test-slow test-sdk test-no-api test-with-api-keys test-all-with-slow-and-sdk clean-all \ results results-path results-detailed # Default output directory @@ -27,14 +27,15 @@ ifdef CONFIG export TEST_CONFIG_FILE = $(CONFIG) endif else - export TEST_CONFIG_FILE ?= /app/test-configs/mock-services.yml + export TEST_CONFIG_FILE ?= /app/test-configs/deepgram-openai.yml endif help: @echo "Chronicle Test Targets:" @echo "" @echo "Quick Commands:" - @echo " make test - Start containers + run tests (excludes slow/sdk/API)" + @echo " make test - Start containers + run tests (uses real APIs)" + @echo " make test-no-api - Run tests without API keys (CI mode)" @echo " make test-quick - Run tests on existing containers" @echo " make start - Start test containers" @echo " make stop - Stop containers (keep volumes)" @@ -44,7 +45,7 @@ help: @echo " make status - Show container status" @echo "" @echo "Running Tests:" - @echo " make all - Run all tests (excludes slow/sdk/API)" + @echo " make all - Run all tests (excludes slow/sdk)" @echo " make endpoints - Run only endpoint tests" @echo " make integration - Run only integration tests" @echo " make infra - Run only infrastructure tests" @@ -52,7 +53,7 @@ help: @echo "Special Test Tags:" @echo " make test-slow - Run ONLY slow tests (backend restarts)" @echo " make test-sdk - Run ONLY SDK tests (unreleased)" - @echo " make test-with-api-keys - Run ONLY tests requiring API keys" + @echo " make test-no-api - Run tests without API keys (CI mode)" @echo " make test-all-with-slow-and-sdk - Run ALL tests including excluded" @echo "" @echo "Container Management:" @@ -80,30 +81,29 @@ help: @echo " CONFIG - Config file to use (e.g., deepgram-openai.yml or full path)" @echo "" @echo "Config Options:" - @echo " mock-services.yml - No API keys (default, excludes API tests)" - @echo " deepgram-openai.yml - Real API keys (required for API tests)" + @echo " deepgram-openai.yml - Real API keys (default)" + @echo " mock-services.yml - No API keys (for CI)" @echo " mock-transcription-failure.yml - Test transcription failure scenarios" @echo "" @echo "Examples:" - @echo " make test # Default (no API keys)" - @echo " make test-with-api-keys # Auto-switches to deepgram config" - @echo " make test CONFIG=deepgram-openai.yml # Custom config" + @echo " make test # Default (uses real APIs)" + @echo " make test-no-api # CI mode (no API keys)" + @echo " make test CONFIG=mock-services.yml # Custom config" @echo " make endpoints CONFIG=mock-services.yml # Endpoint tests with mock" @echo " make start-rebuild CONFIG=custom.yml # Rebuild with custom config" @echo " make containers-logs SERVICE=workers-test # View worker logs" @echo " make show-config # Show current config" -# Run all tests (excludes slow, sdk, and requires-api-keys tests for faster feedback) +# Run all tests (excludes slow and sdk tests for faster feedback) # Creates a persistent fixture conversation that won't be deleted between suites all: - @echo "Running all tests (excluding slow, sdk, and requires-api-keys tests)..." + @echo "Running all tests (excluding slow and sdk tests)..." CREATE_FIXTURE=true uv run --with-requirements test-requirements.txt robot --outputdir $(OUTPUTDIR) \ --name "All Tests" \ --console verbose \ --loglevel INFO:INFO \ --exclude slow \ --exclude sdk \ - --exclude requires-api-keys \ $(TEST_DIR) # Run only endpoint tests @@ -242,19 +242,15 @@ test-sdk: $(TEST_DIR) # Run ONLY tests that require API keys (Deepgram + OpenAI) -# Automatically switches to deepgram-openai.yml config +# Uses default deepgram-openai.yml config test-with-api-keys: - @echo "🔄 Switching to deepgram-openai.yml config..." + @echo "🧪 Running tests that require API keys..." @if [ -z "$$DEEPGRAM_API_KEY" ] || [ -z "$$OPENAI_API_KEY" ]; then \ echo "❌ Error: DEEPGRAM_API_KEY and OPENAI_API_KEY must be set"; \ echo " export DEEPGRAM_API_KEY='your-key-here'"; \ echo " export OPENAI_API_KEY='your-key-here'"; \ exit 1; \ fi - @$(MAKE) containers-stop - @TEST_CONFIG_FILE=/app/test-configs/deepgram-openai.yml $(MAKE) containers-start - @echo "✅ Containers running with deepgram-openai.yml" - @echo "🧪 Running API key tests..." CREATE_FIXTURE=true uv run --with-requirements test-requirements.txt robot --outputdir $(OUTPUTDIR) \ --name "API Key Tests" \ --console verbose \ @@ -262,6 +258,23 @@ test-with-api-keys: --include requires-api-keys \ $(TEST_DIR) +# Run tests without API keys (CI mode) +# Switches to mock-services.yml config and excludes requires-api-keys tests +test-no-api: + @echo "🔄 Running tests without API keys (CI mode)..." + @$(MAKE) containers-stop + @TEST_CONFIG_FILE=/app/test-configs/mock-services.yml $(MAKE) containers-start + @echo "✅ Containers running with mock-services.yml" + @echo "🧪 Running tests (excluding requires-api-keys)..." + CREATE_FIXTURE=true uv run --with-requirements test-requirements.txt robot --outputdir $(OUTPUTDIR) \ + --name "No API Tests" \ + --console verbose \ + --loglevel INFO:INFO \ + --exclude slow \ + --exclude sdk \ + --exclude requires-api-keys \ + $(TEST_DIR) + # Run ALL tests including slow and SDK tests test-all-with-slow-and-sdk: @echo "Running ALL tests including slow and SDK tests..." diff --git a/tests/integration/always_persist_audio_tests.robot b/tests/integration/always_persist_audio_tests.robot index 824907e0..f4c31e09 100644 --- a/tests/integration/always_persist_audio_tests.robot +++ b/tests/integration/always_persist_audio_tests.robot @@ -58,18 +58,19 @@ Placeholder Conversation Created Immediately With Always Persist # Get baseline conversation count for THIS client_id only ${convs_before}= Get Conversations By Client ID ${client_id} ${count_before}= Get Length ${convs_before} + ${expected_count}= Evaluate ${count_before} + 1 # Start stream with always_persist=true ${stream_id}= Open Audio Stream With Always Persist device_name=${device_name} - # Conversation created by audio persistence job (takes 3-5s to start) - Sleep 5s # Wait for audio persistence job to create placeholder - ${convs_after}= Get Conversations By Client ID ${client_id} + # Poll for conversation to be created by audio persistence job (may take 10-15s to start) + ${convs_after}= Wait Until Keyword Succeeds 30s 2s + ... Wait For Conversation By Client ID ${client_id} ${expected_count} ${count_after}= Get Length ${convs_after} # Verify new conversation created for this client - Should Be True ${count_after} == ${count_before} + 1 - ... Expected 1 new conversation for client ${client_id}, found ${count_after} - ${count_before} + Should Be True ${count_after} >= ${expected_count} + ... Expected at least ${expected_count} conversation(s) for client ${client_id}, found ${count_after} # Find the new conversation (most recent) ${new_conv}= Set Variable ${convs_after}[0] @@ -141,6 +142,7 @@ Redis Key Set Immediately With Always Persist # Get baseline conversation count for THIS client_id only ${convs_before}= Get Conversations By Client ID ${client_id} ${count_before}= Get Length ${convs_before} + ${expected_count}= Evaluate ${count_before} + 1 # Start stream with always_persist=true ${stream_id}= Open Audio Stream With Always Persist device_name=${device_name} @@ -148,14 +150,14 @@ Redis Key Set Immediately With Always Persist # session_id == client_id for streaming mode (not stream_id!) ${session_id}= Set Variable ${client_id} - # Get conversation (created by audio persistence job) - Sleep 5s # Wait for audio persistence job to create placeholder - ${convs_after}= Get Conversations By Client ID ${client_id} + # Poll for conversation to be created by audio persistence job + ${convs_after}= Wait Until Keyword Succeeds 30s 2s + ... Wait For Conversation By Client ID ${client_id} ${expected_count} ${count_after}= Get Length ${convs_after} # Verify new conversation created for this client - Should Be True ${count_after} == ${count_before} + 1 - ... Expected 1 new conversation for client ${client_id}, found ${count_after} - ${count_before} + Should Be True ${count_after} >= ${expected_count} + ... Expected at least ${expected_count} conversation(s) for client ${client_id}, found ${count_after} # Get the new conversation (most recent) ${conversation}= Set Variable ${convs_after}[0] @@ -192,6 +194,9 @@ Multiple Sessions Create Separate Conversations ${count_before_1}= Get Length ${convs_before_1} ${count_before_2}= Get Length ${convs_before_2} ${count_before_3}= Get Length ${convs_before_3} + ${expected_count_1}= Evaluate ${count_before_1} + 1 + ${expected_count_2}= Evaluate ${count_before_2} + 1 + ${expected_count_3}= Evaluate ${count_before_3} + 1 # Start 3 separate sessions ${stream_1}= Open Audio Stream With Always Persist device_name=${device_name}-1 @@ -199,26 +204,26 @@ Multiple Sessions Create Separate Conversations ${stream_2}= Open Audio Stream With Always Persist device_name=${device_name}-2 Sleep 1s ${stream_3}= Open Audio Stream With Always Persist device_name=${device_name}-3 - Sleep 5s # Wait for all audio persistence jobs to create placeholders - # Verify each client has exactly 1 new conversation - ${convs_after_1}= Get Conversations By Client ID ${client_id_1} - ${convs_after_2}= Get Conversations By Client ID ${client_id_2} - ${convs_after_3}= Get Conversations By Client ID ${client_id_3} + # Poll for each conversation to be created (audio persistence jobs may take 10-15s) + ${convs_after_1}= Wait Until Keyword Succeeds 30s 2s + ... Wait For Conversation By Client ID ${client_id_1} ${expected_count_1} + ${convs_after_2}= Wait Until Keyword Succeeds 30s 2s + ... Wait For Conversation By Client ID ${client_id_2} ${expected_count_2} + ${convs_after_3}= Wait Until Keyword Succeeds 30s 2s + ... Wait For Conversation By Client ID ${client_id_3} ${expected_count_3} + ${count_after_1}= Get Length ${convs_after_1} ${count_after_2}= Get Length ${convs_after_2} ${count_after_3}= Get Length ${convs_after_3} - ${new_count_1}= Evaluate ${count_after_1} - ${count_before_1} - ${new_count_2}= Evaluate ${count_after_2} - ${count_before_2} - ${new_count_3}= Evaluate ${count_after_3} - ${count_before_3} - - Should Be Equal As Integers ${new_count_1} 1 - ... Expected 1 new conversation for client ${client_id_1}, found ${new_count_1} - Should Be Equal As Integers ${new_count_2} 1 - ... Expected 1 new conversation for client ${client_id_2}, found ${new_count_2} - Should Be Equal As Integers ${new_count_3} 1 - ... Expected 1 new conversation for client ${client_id_3}, found ${new_count_3} + # Verify each client has at least 1 new conversation + Should Be True ${count_after_1} >= ${expected_count_1} + ... Expected at least ${expected_count_1} conversation(s) for client ${client_id_1}, found ${count_after_1} + Should Be True ${count_after_2} >= ${expected_count_2} + ... Expected at least ${expected_count_2} conversation(s) for client ${client_id_2}, found ${count_after_2} + Should Be True ${count_after_3} >= ${expected_count_3} + ... Expected at least ${expected_count_3} conversation(s) for client ${client_id_3}, found ${count_after_3} # Verify each conversation has unique conversation_id ${conv_id_1}= Set Variable ${convs_after_1}[0][conversation_id] @@ -255,8 +260,9 @@ Audio Chunks Persisted Despite Transcription Failure # Start stream with always_persist=true ${stream_id}= Open Audio Stream With Always Persist device_name=${device_name} - # Wait for audio persistence job to start consuming from Redis Stream - Sleep 2s + # Poll for conversation to be created by audio persistence job + ${conversations}= Wait Until Keyword Succeeds 30s 2s + ... Wait For Conversation By Client ID ${client_id} 1 # Send audio chunks (transcription will fail due to invalid API key in config) # Use realtime pacing to ensure chunks arrive while persistence job is running @@ -266,8 +272,7 @@ Audio Chunks Persisted Despite Transcription Failure ${total_chunks}= Close Audio Stream ${stream_id} Log Sent ${total_chunks} total chunks - # Get the conversation for this client - already created by audio persistence job - ${conversations}= Get Conversations By Client ID ${client_id} + # Get the conversation for this client ${conversation}= Set Variable ${conversations}[0] ${conversation_id}= Set Variable ${conversation}[conversation_id] @@ -307,13 +312,14 @@ Conversation Updates To Completed When Transcription Succeeds # Get baseline conversation count for THIS client_id only ${convs_before}= Get Conversations By Client ID ${client_id} ${count_before}= Get Length ${convs_before} + ${expected_count}= Evaluate ${count_before} + 1 # Start stream with always_persist=true ${stream_id}= Open Audio Stream With Always Persist device_name=${device_name} - # Verify placeholder conversation exists (created by audio persistence job) - Sleep 5s - ${convs_after}= Get Conversations By Client ID ${client_id} + # Poll for placeholder conversation to be created by audio persistence job + ${convs_after}= Wait Until Keyword Succeeds 30s 2s + ... Wait For Conversation By Client ID ${client_id} ${expected_count} ${conversation}= Set Variable ${convs_after}[0] ${conversation_id}= Set Variable ${conversation}[conversation_id] diff --git a/tests/resources/conversation_keywords.robot b/tests/resources/conversation_keywords.robot index 741ca6a8..f83efc6e 100644 --- a/tests/resources/conversation_keywords.robot +++ b/tests/resources/conversation_keywords.robot @@ -33,6 +33,20 @@ Get Conversations By Client ID RETURN ${filtered} +Wait For Conversation By Client ID + [Documentation] Wait for at least one conversation to exist for the given client_id. + ... Polls until a conversation is found or timeout is reached. + ... Returns the list of conversations for that client. + [Arguments] ${client_id} ${expected_count}=1 + + ${conversations}= Get Conversations By Client ID ${client_id} + ${count}= Get Length ${conversations} + + Should Be True ${count} >= ${expected_count} + ... Expected at least ${expected_count} conversation(s) for client ${client_id}, found ${count} + + RETURN ${conversations} + Get Conversation By ID [Documentation] Get a specific conversation by ID [Arguments] ${conversation_id} From 39fbd2fdcd4d9683002b54ebb9dc64c774e0db62 Mon Sep 17 00:00:00 2001 From: Ankush Malaker <43288948+AnkushMalaker@users.noreply.github.com> Date: Wed, 28 Jan 2026 05:31:18 +0000 Subject: [PATCH 09/10] Implement OpenMemory user registration and enhance MCP client functionality - Added an asynchronous function to initialize and register an OpenMemory user if the OpenMemory MCP provider is configured, improving user management. - Enhanced the MCPClient to accept custom metadata when adding memories, allowing for better tracking and filtering of memories by user. - Updated the OpenMemoryMCPService to utilize the configured OpenMemory user for memory operations, ensuring accurate user context in memory processing. - Modified integration tests to use shorter device names for consistency and to avoid truncation issues, improving test reliability. --- .../src/advanced_omi_backend/app_factory.py | 49 ++++++++++++ .../services/memory/providers/mcp_client.py | 31 ++++---- .../memory/providers/openmemory_mcp.py | 78 +++++++++---------- tests/infrastructure/infra_tests.robot | 2 +- .../always_persist_audio_tests.robot | 15 ++-- 5 files changed, 109 insertions(+), 66 deletions(-) diff --git a/backends/advanced/src/advanced_omi_backend/app_factory.py b/backends/advanced/src/advanced_omi_backend/app_factory.py index 79f893c0..4458ed9e 100644 --- a/backends/advanced/src/advanced_omi_backend/app_factory.py +++ b/backends/advanced/src/advanced_omi_backend/app_factory.py @@ -42,6 +42,52 @@ application_logger = logging.getLogger("audio_processing") +async def initialize_openmemory_user() -> None: + """Initialize and register OpenMemory user if using OpenMemory MCP provider. + + This function: + - Checks if OpenMemory MCP is configured as the memory provider + - Registers the configured user with OpenMemory server + - Creates a test memory and deletes it to trigger user creation + - Logs success or warning if OpenMemory is not reachable + """ + from advanced_omi_backend.services.memory.config import build_memory_config_from_env, MemoryProvider + + memory_provider_config = build_memory_config_from_env() + + if memory_provider_config.memory_provider != MemoryProvider.OPENMEMORY_MCP: + return + + try: + from advanced_omi_backend.services.memory.providers.mcp_client import MCPClient + + # Get configured user_id and server_url + openmemory_config = memory_provider_config.openmemory_config + user_id = openmemory_config.get("user_id", "openmemory") if openmemory_config else "openmemory" + server_url = openmemory_config.get("server_url", "http://host.docker.internal:8765") if openmemory_config else "http://host.docker.internal:8765" + client_name = openmemory_config.get("client_name", "chronicle") if openmemory_config else "chronicle" + + application_logger.info(f"Registering OpenMemory user: {user_id} at {server_url}") + + # Make a lightweight registration call (create and delete dummy memory) + async with MCPClient(server_url=server_url, client_name=client_name, user_id=user_id) as client: + # Test connection first + is_connected = await client.test_connection() + if is_connected: + # Create and immediately delete a dummy memory to trigger user creation + memory_ids = await client.add_memories("Chronicle initialization - user registration test") + if memory_ids: + # Delete the test memory + await client.delete_memory(memory_ids[0]) + application_logger.info(f"✅ Registered OpenMemory user: {user_id}") + else: + application_logger.warning(f"⚠️ OpenMemory MCP not reachable at {server_url}") + application_logger.info("User will be auto-created on first memory operation") + except Exception as e: + application_logger.warning(f"⚠️ Could not register OpenMemory user: {e}") + application_logger.info("User will be auto-created on first memory operation") + + @asynccontextmanager async def lifespan(app: FastAPI): """Manage application lifespan events.""" @@ -126,6 +172,9 @@ async def lifespan(app: FastAPI): # Memory service will be lazily initialized when first used application_logger.info("Memory service will be initialized on first use (lazy loading)") + # Register OpenMemory user if using openmemory_mcp provider + await initialize_openmemory_user() + # SystemTracker is used for monitoring and debugging application_logger.info("Using SystemTracker for monitoring and debugging") diff --git a/backends/advanced/src/advanced_omi_backend/services/memory/providers/mcp_client.py b/backends/advanced/src/advanced_omi_backend/services/memory/providers/mcp_client.py index 971c41f3..8c5b5389 100644 --- a/backends/advanced/src/advanced_omi_backend/services/memory/providers/mcp_client.py +++ b/backends/advanced/src/advanced_omi_backend/services/memory/providers/mcp_client.py @@ -61,7 +61,7 @@ async def __aenter__(self): async def __aexit__(self, exc_type, exc_val, exc_tb): await self.close() - async def add_memories(self, text: str) -> List[str]: + async def add_memories(self, text: str, metadata: Dict[str, Any] = None) -> List[str]: """Add memories to the OpenMemory server. Uses the REST API to create memories. OpenMemory will handle: @@ -109,17 +109,22 @@ async def add_memories(self, text: str) -> List[str]: memory_logger.error("No apps found in OpenMemory - cannot create memory") raise MCPError("No apps found in OpenMemory") + # Merge custom metadata with default metadata + default_metadata = { + "source": "chronicle", + "client": self.client_name, + "user_email": self.user_email + } + if metadata: + default_metadata.update(metadata) + # Use REST API endpoint for creating memories # The 'app' field can be either app name (string) or app UUID payload = { "user_id": self.user_id, "text": text, - "app": self.client_name, # Use app name (OpenMemory accepts name or UUID) - "metadata": { - "source": "friend_lite", - "client": self.client_name, - "user_email": self.user_email - }, + "app": self.client_name, + "metadata": default_metadata, "infer": True } @@ -127,17 +132,7 @@ async def add_memories(self, text: str) -> List[str]: response = await self.client.post( f"{self.server_url}/api/v1/memories/", - json={ - "user_id": self.user_id, - "text": text, - "app": self.client_name, # Use app name (OpenMemory accepts name or UUID) - "metadata": { - "source": "chronicle", - "client": self.client_name, - "user_email": self.user_email - }, - "infer": True - } + json=payload ) response_body = response.text[:500] if response.status_code != 200 else "..." diff --git a/backends/advanced/src/advanced_omi_backend/services/memory/providers/openmemory_mcp.py b/backends/advanced/src/advanced_omi_backend/services/memory/providers/openmemory_mcp.py index 510dd019..922f2555 100644 --- a/backends/advanced/src/advanced_omi_backend/services/memory/providers/openmemory_mcp.py +++ b/backends/advanced/src/advanced_omi_backend/services/memory/providers/openmemory_mcp.py @@ -142,25 +142,21 @@ async def add_memory( memory_logger.info(f"Skipping empty transcript for {source_id}") return True, [] - # Pass Friend-Lite user details to OpenMemory for proper user tracking - # OpenMemory will auto-create users if they don't exist - original_user_id = self.mcp_client.user_id - original_user_email = self.mcp_client.user_email - self.mcp_client.user_id = user_id # Use the actual Chronicle user's ID - self.mcp_client.user_email = user_email # Use the actual user's email + # Use configured OpenMemory user (from config) for all Chronicle users + # Chronicle user_id and email are stored in metadata for filtering + enriched_transcript = f"[Source: {source_id}, Client: {client_id}] {transcript}" - try: - # Thin client approach: Send raw transcript to OpenMemory MCP server - # OpenMemory handles: extraction, deduplication, vector storage, ACL - enriched_transcript = f"[Source: {source_id}, Client: {client_id}] {transcript}" + memory_logger.info(f"Delegating memory processing to OpenMemory for user {user_id} (email: {user_email}), source {source_id}") - memory_logger.info(f"Delegating memory processing to OpenMemory for user {user_id} (email: {user_email}), source {source_id}") - memory_ids = await self.mcp_client.add_memories(text=enriched_transcript) + # Pass Chronicle user details in metadata for filtering/search + metadata = { + "chronicle_user_id": user_id, + "chronicle_user_email": user_email, + "source_id": source_id, + "client_id": client_id + } - finally: - # Restore original user context - self.mcp_client.user_id = original_user_id - self.mcp_client.user_email = original_user_email + memory_ids = await self.mcp_client.add_memories(text=enriched_transcript, metadata=metadata) # Update database relationships if helper provided if memory_ids and db_helper: @@ -204,23 +200,27 @@ async def search_memories( """ if not self._initialized: await self.initialize() - - # Update MCP client user context for this search operation - original_user_id = self.mcp_client.user_id - self.mcp_client.user_id = user_id # Use the actual Chronicle user's ID + # Use configured OpenMemory user (not Chronicle user_id) + # Search all memories, then filter by chronicle_user_id in metadata try: + # Get more results since we'll filter by user results = await self.mcp_client.search_memory( query=query, - limit=limit + limit=limit * 3 # Get extra to account for filtering ) - # Convert MCP results to MemoryEntry objects + # Convert MCP results to MemoryEntry objects and filter by user memory_entries = [] for result in results: - memory_entry = self._mcp_result_to_memory_entry(result, user_id) - if memory_entry: - memory_entries.append(memory_entry) + # Check if memory belongs to this Chronicle user via metadata + metadata = result.get("metadata", {}) + if metadata.get("chronicle_user_id") == user_id: + memory_entry = self._mcp_result_to_memory_entry(result, user_id) + if memory_entry: + memory_entries.append(memory_entry) + if len(memory_entries) >= limit: + break # Got enough results memory_logger.info(f"🔍 Found {len(memory_entries)} memories for query '{query}' (user: {user_id})") return memory_entries @@ -231,9 +231,6 @@ async def search_memories( except Exception as e: memory_logger.error(f"Search memories failed: {e}") return [] - finally: - # Restore original user context - self.mcp_client.user_id = original_user_id async def get_all_memories( self, @@ -254,20 +251,24 @@ async def get_all_memories( """ if not self._initialized: await self.initialize() - - # Update MCP client user context for this operation - original_user_id = self.mcp_client.user_id - self.mcp_client.user_id = user_id # Use the actual Chronicle user's ID + # Use configured OpenMemory user (not Chronicle user_id) + # List all memories, then filter by chronicle_user_id in metadata try: - results = await self.mcp_client.list_memories(limit=limit) + # Get more results since we'll filter by user + results = await self.mcp_client.list_memories(limit=limit * 3) - # Convert MCP results to MemoryEntry objects + # Convert MCP results to MemoryEntry objects and filter by user memory_entries = [] for result in results: - memory_entry = self._mcp_result_to_memory_entry(result, user_id) - if memory_entry: - memory_entries.append(memory_entry) + # Check if memory belongs to this Chronicle user via metadata + metadata = result.get("metadata", {}) + if metadata.get("chronicle_user_id") == user_id: + memory_entry = self._mcp_result_to_memory_entry(result, user_id) + if memory_entry: + memory_entries.append(memory_entry) + if len(memory_entries) >= limit: + break # Got enough results memory_logger.info(f"📚 Retrieved {len(memory_entries)} memories for user {user_id}") return memory_entries @@ -278,9 +279,6 @@ async def get_all_memories( except Exception as e: memory_logger.error(f"Get all memories failed: {e}") return [] - finally: - # Restore original user_id - self.mcp_client.user_id = original_user_id async def get_memory(self, memory_id: str, user_id: Optional[str] = None) -> Optional[MemoryEntry]: """Get a specific memory by ID. diff --git a/tests/infrastructure/infra_tests.robot b/tests/infrastructure/infra_tests.robot index 783329ae..53d90f3a 100644 --- a/tests/infrastructure/infra_tests.robot +++ b/tests/infrastructure/infra_tests.robot @@ -159,7 +159,7 @@ Worker Registration Loss Detection Test ... - Health endpoint reports 0 workers when registration is lost ... - Self-healing mechanism detects the issue ... - Workers automatically re-register within monitoring interval - [Tags] infra queue + [Tags] infra queue slow # Step 1: Verify workers are initially registered Log To Console \n📊 Step 1: Check initial worker registration diff --git a/tests/integration/always_persist_audio_tests.robot b/tests/integration/always_persist_audio_tests.robot index f4c31e09..d27dee81 100644 --- a/tests/integration/always_persist_audio_tests.robot +++ b/tests/integration/always_persist_audio_tests.robot @@ -180,12 +180,13 @@ Multiple Sessions Create Separate Conversations ... creates separate placeholder conversations for each session. [Tags] conversation audio-streaming - ${device_name}= Set Variable test-multi + # NOTE: Device names must be <=10 chars to be unique (backend truncates to 10 chars) + # Using short names: multi-1, multi-2, multi-3 (7 chars each) # Get client IDs for each device - ${client_id_1}= Get Client ID From Device Name ${device_name}-1 - ${client_id_2}= Get Client ID From Device Name ${device_name}-2 - ${client_id_3}= Get Client ID From Device Name ${device_name}-3 + ${client_id_1}= Get Client ID From Device Name multi-1 + ${client_id_2}= Get Client ID From Device Name multi-2 + ${client_id_3}= Get Client ID From Device Name multi-3 # Get baseline conversation counts for each client ${convs_before_1}= Get Conversations By Client ID ${client_id_1} @@ -199,11 +200,11 @@ Multiple Sessions Create Separate Conversations ${expected_count_3}= Evaluate ${count_before_3} + 1 # Start 3 separate sessions - ${stream_1}= Open Audio Stream With Always Persist device_name=${device_name}-1 + ${stream_1}= Open Audio Stream With Always Persist device_name=multi-1 Sleep 1s - ${stream_2}= Open Audio Stream With Always Persist device_name=${device_name}-2 + ${stream_2}= Open Audio Stream With Always Persist device_name=multi-2 Sleep 1s - ${stream_3}= Open Audio Stream With Always Persist device_name=${device_name}-3 + ${stream_3}= Open Audio Stream With Always Persist device_name=multi-3 # Poll for each conversation to be created (audio persistence jobs may take 10-15s) ${convs_after_1}= Wait Until Keyword Succeeds 30s 2s From b641f0a4e1156c790b7c0cf98c4e6163c1a95bf5 Mon Sep 17 00:00:00 2001 From: Ankush Malaker <43288948+AnkushMalaker@users.noreply.github.com> Date: Wed, 28 Jan 2026 05:59:15 +0000 Subject: [PATCH 10/10] Add Dockerfiles for mock LLM and streaming STT servers - Created Dockerfile for a mock LLM server, including dependencies and configuration for running the server on port 11435. - Created Dockerfile for a mock streaming STT server, including dependencies and configuration for running the server on port 9999. - Both Dockerfiles streamline the setup process for testing related functionalities. --- tests/Dockerfile.mock-llm | 15 +++++++++++++++ tests/Dockerfile.mock-streaming-stt | 15 +++++++++++++++ 2 files changed, 30 insertions(+) create mode 100644 tests/Dockerfile.mock-llm create mode 100644 tests/Dockerfile.mock-streaming-stt diff --git a/tests/Dockerfile.mock-llm b/tests/Dockerfile.mock-llm new file mode 100644 index 00000000..ef9efef5 --- /dev/null +++ b/tests/Dockerfile.mock-llm @@ -0,0 +1,15 @@ +FROM python:3.12-slim + +WORKDIR /app + +# Install dependencies +RUN pip install --no-cache-dir aiohttp numpy + +# Copy mock server script +COPY tests/libs/mock_llm_server.py . + +# Expose HTTP port +EXPOSE 11435 + +# Run server +CMD ["python", "mock_llm_server.py", "--host", "0.0.0.0", "--port", "11435"] diff --git a/tests/Dockerfile.mock-streaming-stt b/tests/Dockerfile.mock-streaming-stt new file mode 100644 index 00000000..964946f6 --- /dev/null +++ b/tests/Dockerfile.mock-streaming-stt @@ -0,0 +1,15 @@ +FROM python:3.12-slim + +WORKDIR /app + +# Install websockets dependency +RUN pip install --no-cache-dir websockets + +# Copy mock server script +COPY tests/libs/mock_streaming_stt_server.py . + +# Expose WebSocket port +EXPOSE 9999 + +# Run server +CMD ["python", "mock_streaming_stt_server.py", "--host", "0.0.0.0", "--port", "9999"]