From d4a08fc7e036cf1337968211cbf101959c6e199d Mon Sep 17 00:00:00 2001 From: Ankush Malaker <43288948+AnkushMalaker@users.noreply.github.com> Date: Fri, 6 Feb 2026 20:30:20 +0000 Subject: [PATCH 1/3] Enhance test environment setup and configuration - Added a new interactive setup script for configuring test API keys (Deepgram, OpenAI) to streamline the testing process. - Introduced a template for the .env.test file to guide users in setting up their API keys. - Updated the Makefile to include a new 'configure' target for setting up API keys. - Enhanced the start-containers script to warn users if API keys are still set to placeholder values, improving user awareness during testing. - Updated .gitignore to include the new .env.test.template file. --- .gitignore | 1 + tests/Makefile | 7 +- tests/bin/start-containers.sh | 9 +++ tests/setup/.env.test.template | 5 ++ tests/setup/init.py | 121 +++++++++++++++++++++++++++++++++ 5 files changed, 142 insertions(+), 1 deletion(-) create mode 100644 tests/setup/.env.test.template create mode 100644 tests/setup/init.py diff --git a/.gitignore b/.gitignore index 8f39a10a..e860fd86 100644 --- a/.gitignore +++ b/.gitignore @@ -5,6 +5,7 @@ **/memory_config.yaml !**/memory_config.yaml.template tests/setup/.env.test +!tests/setup/.env.test.template # Main config (user-specific) config/config.yml diff --git a/tests/Makefile b/tests/Makefile index b7c1c0ca..83d9126d 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -1,7 +1,7 @@ # Chronicle Test Makefile # Shortcuts for running tests and managing test containers -.PHONY: help all clean \ +.PHONY: help all clean configure \ containers-start containers-stop containers-restart containers-rebuild \ containers-start-rebuild containers-clean containers-status containers-logs \ start stop restart rebuild start-rebuild status logs \ @@ -35,6 +35,7 @@ help: @echo "Chronicle Test Targets:" @echo "" @echo "Quick Commands:" + @echo " make configure - Set up test API keys (Deepgram, OpenAI)" @echo " make test - Start containers + run tests (uses real APIs)" @echo " make test-no-api - Run tests without API keys (CI mode)" @echo " make test-quick - Run tests on existing containers" @@ -155,6 +156,10 @@ clean: rm -rf $(OUTPUTDIR) @echo "Clean complete!" +# Configure test API keys (interactive setup) +configure: + @uv run --with-requirements ../setup-requirements.txt python setup/init.py + # ============================================================================ # Container Management Targets # ============================================================================ diff --git a/tests/bin/start-containers.sh b/tests/bin/start-containers.sh index 0918b141..3b2e69c2 100755 --- a/tests/bin/start-containers.sh +++ b/tests/bin/start-containers.sh @@ -29,6 +29,15 @@ if [ -f "$TESTS_DIR/setup/.env.test" ]; then set -a source "$TESTS_DIR/setup/.env.test" set +a + + # Warn if API keys are still placeholders + if echo "$DEEPGRAM_API_KEY" | grep -qi "your-.*-here" || echo "$OPENAI_API_KEY" | grep -qi "your-.*-here"; then + echo "" + echo "โš ๏ธ WARNING: API keys in .env.test are still placeholder values." + echo " Tests tagged 'requires-api-keys' will fail." + echo " Run 'make configure' from tests/ to set your API keys." + echo "" + fi fi # Start containers diff --git a/tests/setup/.env.test.template b/tests/setup/.env.test.template new file mode 100644 index 00000000..470c09eb --- /dev/null +++ b/tests/setup/.env.test.template @@ -0,0 +1,5 @@ +# Test API Keys - Configure with: make configure (from tests/) +# Or run: uv run --with-requirements ../setup-requirements.txt python setup/init.py + +DEEPGRAM_API_KEY=your-deepgram-api-key-here +OPENAI_API_KEY=your-openai-api-key-here diff --git a/tests/setup/init.py b/tests/setup/init.py new file mode 100644 index 00000000..8223e619 --- /dev/null +++ b/tests/setup/init.py @@ -0,0 +1,121 @@ +""" +Chronicle Test Environment Setup Script. + +Interactive configuration for test API keys (Deepgram, OpenAI). +Follows the same pattern as backends/advanced/init.py. +""" + +import argparse +import shutil +import sys +from pathlib import Path + +from dotenv import set_key +from rich.console import Console +from rich.panel import Panel +from rich.text import Text + +# Add repo root to path for imports +sys.path.insert(0, str(Path(__file__).resolve().parent.parent.parent)) +from setup_utils import is_placeholder, prompt_with_existing_masked + +SETUP_DIR = Path(__file__).resolve().parent +ENV_TEST_PATH = SETUP_DIR / ".env.test" +ENV_TEST_TEMPLATE = SETUP_DIR / ".env.test.template" + +DEEPGRAM_PLACEHOLDERS = ["your-deepgram-api-key-here", "your_deepgram_api_key_here"] +OPENAI_PLACEHOLDERS = ["your-openai-api-key-here", "your_openai_api_key_here"] + + +def main(): + parser = argparse.ArgumentParser(description="Chronicle Test Environment Setup") + parser.add_argument( + "--deepgram-api-key", help="Deepgram API key (skips interactive prompt)" + ) + parser.add_argument( + "--openai-api-key", help="OpenAI API key (skips interactive prompt)" + ) + args = parser.parse_args() + + console = Console() + + console.print() + panel = Panel( + Text("Chronicle Test Environment Setup", style="cyan bold"), + style="cyan", + expand=False, + ) + console.print(panel) + console.print() + + # Ensure template exists + if not ENV_TEST_TEMPLATE.exists(): + console.print( + f"[red][ERROR][/red] Template not found: {ENV_TEST_TEMPLATE}" + ) + sys.exit(1) + + # Copy template to .env.test if it doesn't exist + if not ENV_TEST_PATH.exists(): + shutil.copy2(ENV_TEST_TEMPLATE, ENV_TEST_PATH) + console.print("[blue][INFO][/blue] Created .env.test from template") + else: + console.print("[blue][INFO][/blue] Found existing .env.test") + + env_path_str = str(ENV_TEST_PATH) + + # --- Deepgram API Key --- + if args.deepgram_api_key: + deepgram_key = args.deepgram_api_key + console.print("[green][OK][/green] Deepgram API key provided via argument") + else: + deepgram_key = prompt_with_existing_masked( + prompt_text="Deepgram API key", + env_file_path=env_path_str, + env_key="DEEPGRAM_API_KEY", + placeholders=DEEPGRAM_PLACEHOLDERS, + is_password=True, + ) + + if deepgram_key and not is_placeholder(deepgram_key, *DEEPGRAM_PLACEHOLDERS): + set_key(env_path_str, "DEEPGRAM_API_KEY", deepgram_key) + console.print("[green][OK][/green] Deepgram API key saved") + else: + console.print( + "[yellow][WARNING][/yellow] No Deepgram key configured - " + "tests tagged requires-api-keys will fail" + ) + + # --- OpenAI API Key --- + if args.openai_api_key: + openai_key = args.openai_api_key + console.print("[green][OK][/green] OpenAI API key provided via argument") + else: + openai_key = prompt_with_existing_masked( + prompt_text="OpenAI API key", + env_file_path=env_path_str, + env_key="OPENAI_API_KEY", + placeholders=OPENAI_PLACEHOLDERS, + is_password=True, + ) + + if openai_key and not is_placeholder(openai_key, *OPENAI_PLACEHOLDERS): + set_key(env_path_str, "OPENAI_API_KEY", openai_key) + console.print("[green][OK][/green] OpenAI API key saved") + else: + console.print( + "[yellow][WARNING][/yellow] No OpenAI key configured - " + "tests tagged requires-api-keys will fail" + ) + + console.print() + console.print("[green][DONE][/green] Test environment configured") + console.print(f" Config file: {ENV_TEST_PATH}") + console.print() + console.print("Next steps:") + console.print(" [cyan]make start[/cyan] - Start test containers") + console.print(" [cyan]make test[/cyan] - Start containers + run tests") + + +if __name__ == "__main__": + main() From 3b516c5837a197f9152b681804a270d400c56eab Mon Sep 17 00:00:00 2001 From: Ankush Malaker <43288948+AnkushMalaker@users.noreply.github.com> Date: Fri, 6 Feb 2026 22:42:54 +0000 Subject: [PATCH 2/3] Remove outdated documentation and restructure feature overview - Deleted the `features.md` file, consolidating its content into the new `overview.md` for a more streamlined documentation structure. - Updated `init-system.md` to link to the new `overview.md` instead of the removed `features.md`. - Removed `ports-and-access.md` as its content was integrated into other documentation files, enhancing clarity and reducing redundancy. - Revised the `README.md` in the advanced backend to reflect the new naming conventions and updated links to documentation. - Introduced a new `plugin-development-guide.md` to assist users in creating custom plugins, expanding the documentation for developers. --- CLAUDE.md | 5 + Docs/features.md | 282 ------------------ Docs/init-system.md | 36 ++- Docs/overview.md | 127 ++++++++ Docs/ports-and-access.md | 173 ----------- README-K8S.md | 6 +- README.md | 2 +- backends/advanced/Docs/README.md | 6 - .../Docs/memory-configuration-guide.md | 132 -------- .../plugin-development-guide.md | 0 backends/advanced/README.md | 10 +- .../services/transcription/__init__.py | 2 +- .../workers/transcription_jobs.py | 39 ++- config/README.md | 2 +- .../asr-services/providers/vibevoice/impl.md | 17 -- quickstart.md | 6 +- setup_utils.py | 9 +- tests/MOCK_SPEAKER_IMPLEMENTATION.md | 206 ------------- 18 files changed, 201 insertions(+), 859 deletions(-) delete mode 100644 Docs/features.md create mode 100644 Docs/overview.md delete mode 100644 Docs/ports-and-access.md delete mode 100644 backends/advanced/Docs/memory-configuration-guide.md rename backends/advanced/{docs => Docs}/plugin-development-guide.md (100%) delete mode 100644 extras/asr-services/providers/vibevoice/impl.md delete mode 100644 tests/MOCK_SPEAKER_IMPLEMENTATION.md diff --git a/CLAUDE.md b/CLAUDE.md index faed99c2..19684385 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -373,6 +373,11 @@ curl -s -H "Authorization: Bearer YOUR_TOKEN" \ http://localhost:8000/api/conversations ``` +### Backend API Interaction Rules +- **Get token first**: Always authenticate in a separate Bash call, store the token, then use it in subsequent calls. Never chain login + API call in one command. +- **Read .env with Read tool**: Use the Read tool to get values from `.env` files. Don't use `grep | sed | cut` in Bash to extract env values. +- **Keep Bash simple**: Each Bash call should do one thing. Don't string together complex piped commands for backend queries. + ### Development Reset Commands ```bash # Reset all data (development only) diff --git a/Docs/features.md b/Docs/features.md deleted file mode 100644 index 0332c6ee..00000000 --- a/Docs/features.md +++ /dev/null @@ -1,282 +0,0 @@ -# Chronicle Features & Architecture - -## Core Features - -Chronicle supports AI-powered personal systems through multiple OMI-compatible audio devices: - -**Memory System:** -- **Advanced memory system** with pluggable providers (Chronicle native or OpenMemory MCP) -- **Memory extraction** from conversations with individual fact storage -- **Semantic memory search** with relevance threshold filtering and live results -- **Memory count display** with total count tracking from native providers -- **Speaker-based memory filtering** to control processing based on participant presence - -**Audio Processing:** -- **Action item detection** and tracking -- **Multi-device support** for comprehensive audio capture -- **Cross-client compatibility** (optional with OpenMemory MCP) - -**Device Support:** -- OMI pendants and wearables -- Smart glasses with audio capture -- Any Bluetooth-enabled audio device - -## Architecture Overview - -![Architecture Diagram](../.assets/plan.png) - -DevKit2 streams audio via Bluetooth using OPUS codec. The processing pipeline includes: - -**Audio Processing:** -- Bluetooth audio capture from OMI devices -- OPUS codec streaming to backend services -- WebSocket-based real-time audio transport - -**Transcription Services:** -- Cloud-based: Deepgram API for high-quality transcription -- Self-hosted: Local ASR services (Parakeet, Moonshine) - -**AI Processing:** -- LLM-based conversation analysis (OpenAI or local Ollama) -- **Dual memory system**: Chronicle native or OpenMemory MCP integration -- Enhanced memory extraction with individual fact storage -- **Semantic search** with relevance scoring and threshold filtering -- Smart deduplication and memory updates (ADD/UPDATE/DELETE) -- Action item detection - -**Data Storage:** -- MongoDB: User data, conversations, and transcripts -- Qdrant: Vector storage for semantic memory search -- Audio files: Optional conversation recording - -## Repository Structure - -### ๐Ÿ“ฑ Mobile App (`app/`) -- **React Native app** for connecting to OMI devices via Bluetooth -- Streams audio in OPUS format to selected backend -- Cross-platform (iOS/Android) support -- Uses React Native Bluetooth SDK - -### ๐Ÿ–ฅ๏ธ Backends (`backends/`) - -Choose one based on your needs: - -#### **Simple Backend** (`backends/simple-backend/`) -**Use case:** Getting started, basic audio processing, learning - -**Features:** -- โœ… Basic audio ingestion (OPUS โ†’ PCM โ†’ WAV chunks) -- โœ… File-based storage (30-second segments) -- โœ… Minimal dependencies -- โœ… Quick setup - -**Requirements:** -- Minimal resource usage -- No external services - -**Limitations:** -- No transcription -- No memory/conversation management -- No speaker recognition -- Manual file management - ---- - -#### **Advanced Backend** (`backends/advanced/`) **RECOMMENDED** -**Use case:** Production use, full feature set - -**Features:** -- Audio processing pipeline with real-time WebSocket support -- **Pluggable memory system**: Choose between Chronicle native or OpenMemory MCP -- Enhanced memory extraction with individual fact storage (no generic fallbacks) -- **Semantic memory search** with relevance threshold filtering and total count display -- **Speaker-based memory filtering**: Optional control over processing based on participant presence -- Smart memory updates with LLM-driven action proposals (ADD/UPDATE/DELETE) -- Speaker recognition and enrollment -- Action items extraction from conversations -- Audio cropping (removes silence, keeps speech) -- Conversation management with session timeouts -- Modern React web UI with live recording and advanced search -- Multiple ASR options (Deepgram API + offline ASR) -- MongoDB for structured data storage -- RESTful API for all operations -- **Cross-client compatibility** (with OpenMemory MCP provider) - -**Requirements:** -- Multiple services (MongoDB, Qdrant, Ollama) -- Higher resource usage -- Authentication configuration - ---- - -#### **OMI-Webhook-Compatible Backend** (`backends/omi-webhook-compatible/`) -**Use case:** Existing OMI users, migration from official OMI backend - -**Features:** -- โœ… Compatible with official OMI app webhook system -- โœ… Drop-in replacement for OMI backend -- โœ… Audio file storage -- โœ… ngrok integration for public endpoints - -**Requirements:** -- ngrok for public access - -**Limitations:** -- Limited features compared to advanced backend -- No built-in AI features - ---- - -#### **Example Satellite Backend** (`backends/example-satellite/`) -**Use case:** Distributed setups, external ASR integration - -**Features:** -- โœ… Audio streaming satellite -- โœ… Streams audio to remote ASR servers -- โœ… Bluetooth OMI device discovery -- โœ… Integration with external voice processing systems - -**Requirements:** -- Separate ASR server - -**Limitations:** -- Limited standalone functionality - -### ๐Ÿ”ง Additional Services (`extras/`) - -#### **ASR Services** (`extras/asr-services/`) -- **Self-hosted** ASR services -- **Moonshine** - Fast offline ASR -- **Parakeet** - Alternative offline ASR -- Self-hosted transcription options - -#### **Speaker Recognition Service** (`extras/speaker-recognition/`) -- Standalone speaker identification service -- Used by advanced backend -- REST API for speaker operations - -#### **HAVPE Relay** (`extras/havpe-relay/`) -- Audio relay service -- Protocol bridging capabilities - -## Audio Streaming Protocol - -Backends and ASR services use standardized audio streaming: -- Consistent audio streaming format -- Interoperable with external systems -- Modular ASR service architecture -- Easy to swap ASR providers - -## Deployment Scenarios - -### Single Machine (Recommended for beginners) -1. **Clone the repository** -2. **Run interactive setup**: `./wizard.sh` -3. **Start all services**: `./start.sh` -4. **Access WebUI**: `http://localhost:5173` for the React web dashboard - -### Distributed Setup (Advanced users with multiple machines) -1. **GPU Machine**: Deploy LLM services (Ollama, ASR, Speaker Recognition) - ```bash - # Ollama with GPU - docker run -d --gpus=all -p 11434:11434 ollama/ollama:latest - - # ASR services - cd extras/asr-services && docker compose up moonshine -d - - # Speaker recognition - cd extras/speaker-recognition && docker compose up --build -d - ``` - -2. **Backend Machine**: Deploy lightweight services - ```bash - cd backends/advanced - - # Configure distributed services in .env - OLLAMA_BASE_URL=http://[gpu-machine-tailscale-ip]:11434 - SPEAKER_SERVICE_URL=http://[gpu-machine-tailscale-ip]:8001 - - docker compose up --build -d - ``` - -3. **Tailscale Networking**: Connect machines securely - ```bash - # On each machine - curl -fsSL https://tailscale.com/install.sh | sh - sudo tailscale up - ``` - -## Use Case Recommendations - -### For Beginners -1. Start with **Simple Backend** to understand the basics -2. Use **mobile app** to connect your OMI device -3. Examine saved audio chunks in `./audio_chunks/` - -### For Production Use -1. Use **Advanced Backend** for full features -2. Run the orchestrated setup: `./wizard.sh` -3. Start all services: `./start.sh` -4. Access the Web UI at http://localhost:5173 for conversation management - -### For OMI Users -1. Use **OMI-Webhook-Compatible Backend** for easy migration -2. Configure ngrok for public webhook access -3. Point your OMI app to the webhook URL - -### For Home Assistant Users -1. Use **Example Satellite Backend** for audio streaming -2. Set up ASR services from `extras/asr-services/` -3. Configure external voice processing integration - -### For Distributed/Self-Hosting Users -1. Use **Advanced Backend** for full feature set -2. **Separate GPU services**: Run LLM/ASR on dedicated GPU machine -3. **Lightweight backend**: Deploy FastAPI/WebUI on VPS or Raspberry Pi -4. **Tailscale networking**: Secure VPN connection between services (automatic CORS support) -5. **Service examples**: Ollama on GPU machine, backend on lightweight server - -## Service Ports & Access - -### Advanced Backend (Primary) - -**HTTP Mode (Default):** -``` -Web Dashboard: http://localhost:5173 -Backend API: http://localhost:8000 -MongoDB: localhost:27017 -Qdrant: localhost:6333 (HTTP), 6334 (gRPC) -``` - -**HTTPS Mode:** -``` -Web Dashboard: https://localhost/ or https://your-ip/ -Backend API: https://localhost/api/ or https://your-ip/api/ -(Internal services same as HTTP mode) -``` - -### Speaker Recognition Service - -**HTTP Mode:** -``` -Web UI: http://localhost:5174 -API: http://localhost:8085 -``` - -**HTTPS Mode (nginx proxy):** -``` -Web UI: https://localhost:8444/ or https://your-ip:8444/ -API: https://localhost:8444/api/ -HTTP: http://localhost:8081/ (redirects to HTTPS) -``` - -### Additional Services - -``` -Parakeet ASR: http://localhost:8767 -OpenMemory MCP: http://localhost:8765 (API + WebUI) -``` - -**Note:** HTTPS mode requires SSL certificate setup. See individual service documentation for SSL configuration details. - -For detailed port configuration, see [ports-and-access.md](ports-and-access.md). \ No newline at end of file diff --git a/Docs/init-system.md b/Docs/init-system.md index 14d7cb3f..be7d2ae6 100644 --- a/Docs/init-system.md +++ b/Docs/init-system.md @@ -4,7 +4,7 @@ - **๐Ÿ‘‰ [Start Here: Quick Start Guide](../quickstart.md)** - Main setup path for new users - **๐Ÿ“š [Full Documentation](../CLAUDE.md)** - Comprehensive reference -- **๐Ÿ—๏ธ [Architecture Details](features.md)** - Technical deep dive +- **๐Ÿ—๏ธ [Architecture Details](overview.md)** - Technical deep dive --- @@ -118,20 +118,36 @@ Note (Linux): If `host.docker.internal` is unavailable, add `extra_hosts: - "hos โœ… **Unified Control** - Single command to start/stop all services โœ… **Selective Starting** - Choose which services to run based on your current needs -## Service URLs +## Ports & Access -### Default Service Endpoints -- **Backend API**: http://localhost:8000 -- **Backend WebUI**: http://localhost:5173 -- **Speaker Recognition**: http://localhost:8085 -- **Speaker Recognition WebUI**: http://localhost:5173 -- **Parakeet ASR**: http://localhost:8767 -- **OpenMemory MCP**: http://localhost:8765 +### HTTP Mode (Default - No SSL Required) + +| Service | API Port | Web UI Port | Access URL | +|---------|----------|-------------|------------| +| **Advanced Backend** | 8000 | 5173 | http://localhost:8000 (API), http://localhost:5173 (Dashboard) | +| **Speaker Recognition** | 8085 | 5175* | http://localhost:8085 (API), http://localhost:5175 (WebUI) | +| **Parakeet ASR** | 8767 | - | http://localhost:8767 (API) | +| **OpenMemory MCP** | 8765 | 8765 | http://localhost:8765 (API + WebUI) | + +*Speaker Recognition WebUI port is configurable via REACT_UI_PORT + +Note: Browsers require HTTPS for microphone access over network. + +### HTTPS Mode (For Microphone Access) + +| Service | HTTP Port | HTTPS Port | Access URL | +|---------|-----------|------------|------------| +| **Advanced Backend** | 80->443 | 443 | https://localhost/ (Main), https://localhost/api/ (API) | +| **Speaker Recognition** | 8081->8444 | 8444 | https://localhost:8444/ (Main), https://localhost:8444/api/ (API) | + +nginx services start automatically with the standard docker compose command. + +See [ssl-certificates.md](ssl-certificates.md) for HTTPS/SSL setup details. ### Container-to-Container Communication Services use `host.docker.internal` for inter-container communication: - `http://127.0.0.1:8085` - Speaker Recognition -- `http://host.docker.internal:8767` - Parakeet ASR +- `http://host.docker.internal:8767` - Parakeet ASR - `http://host.docker.internal:8765` - OpenMemory MCP ## Service Management diff --git a/Docs/overview.md b/Docs/overview.md new file mode 100644 index 00000000..927bbf16 --- /dev/null +++ b/Docs/overview.md @@ -0,0 +1,127 @@ +# Chronicle Overview + +Chronicle is an open-source, self-hosted system for building a personal timeline of your life. It captures events โ€” conversations, audio, images, and more โ€” processes them with AI, and extracts memories and facts that accumulate over time into a personal knowledge base. + +The goal is a personal AI that gets better the more you use it: the more context it has about you, the more useful it becomes. + +## Core Ideas + +- **Timeline of events**: Your life is a sequence of things that happen โ€” someone talks, music plays, a photo is taken. Chronicle models these as timestamped events on a timeline. +- **Multimodal**: Audio is the primary input today, but the architecture supports images, visual context, and other data sources. +- **Memories from everything**: Events produce memories. A conversation yields facts about people, plans, and preferences. A photo yields location, context, and associations. +- **Self-hosted**: Runs on your hardware, your data stays with you. +- **Hackable**: Designed to be forked, modified, and extended. Pluggable providers for transcription, LLM, memory storage, and analysis. + +## How It Works + +``` +Audio/Images/Data โ†’ Ingestion โ†’ Processing โ†’ Memories + โ†“ + Vector Store + โ†“ + Retrieval & Search +``` + +### Audio Pipeline (Primary) + +1. **Capture**: OMI devices, microphones, or uploaded files stream audio +2. **Transcription**: Deepgram (cloud) or Parakeet (local) converts speech to text +3. **Speaker Recognition**: Optional identification of who said what (pyannote) +4. **Memory Extraction**: LLM extracts facts, preferences, and context from transcripts +5. **Storage**: Memories stored as vectors in Qdrant for semantic search + +### Image Pipeline (In Development) + +1. **Import**: Zip upload, or sync from external services (e.g., Immich) +2. **Analysis**: Extract EXIF metadata, captions, detected objects +3. **Memory Extraction**: Same LLM pipeline, different source type +4. **Storage**: Same vector store, queryable alongside conversation memories + +## Architecture + +``` +โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” +โ”‚ Chronicle System โ”‚ +โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค +โ”‚ โ”‚ +โ”‚ โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” โ”‚ +โ”‚ โ”‚ Mobile App โ”‚โ—„โ”€โ”€โ–บโ”‚ Backend โ”‚โ—„โ–บโ”‚ MongoDB โ”‚ โ”‚ +โ”‚ โ”‚ (React โ”‚ โ”‚ (FastAPI) โ”‚ โ”‚ โ”‚ โ”‚ +โ”‚ โ”‚ Native) โ”‚ โ”‚ โ”‚ โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ โ”‚ +โ”‚ โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”ฌโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ โ”‚ +โ”‚ โ”‚ โ”‚ +โ”‚ โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ–ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ” โ”‚ +โ”‚ โ”‚ Web UI โ”‚ โ”‚ Workers โ”‚ โ”‚ Qdrant โ”‚ โ”‚ +โ”‚ โ”‚ (React) โ”‚ โ”‚ (RQ/Redis) โ”‚ โ”‚ (Vector) โ”‚ โ”‚ +โ”‚ โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ โ”‚ +โ”‚ โ”‚ +โ”‚ Transcription: Deepgram (cloud) or Parakeet (local) โ”‚ +โ”‚ LLM: OpenAI (cloud) or Ollama (local) โ”‚ +โ”‚ Optional: Speaker Recognition, OpenMemory MCP โ”‚ +โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜ +``` + +### Key Components + +| Component | Location | Purpose | +|-----------|----------|---------| +| **Backend** | `backends/advanced/` | FastAPI server, audio processing, API | +| **Web UI** | `backends/advanced/webui/` | React dashboard for conversations and memories | +| **Mobile App** | `app/` | React Native app for OMI device pairing | +| **Speaker Recognition** | `extras/speaker-recognition/` | Voice identification service | +| **ASR Services** | `extras/asr-services/` | Local speech-to-text (Parakeet) | +| **OpenMemory MCP** | `extras/openmemory-mcp/` | Cross-client memory compatibility | +| **HAVPE Relay** | `extras/havpe-relay/` | ESP32 audio bridge | + +### Pluggable Providers + +Chronicle is designed around swappable providers: + +- **Transcription**: Deepgram API or local Parakeet ASR +- **LLM**: OpenAI or local Ollama +- **Memory Storage**: Chronicle native (Qdrant) or OpenMemory MCP +- **Speaker Recognition**: pyannote-based service (optional) + +## Repository Structure + +``` +chronicle/ +โ”œโ”€โ”€ app/ # React Native mobile app +โ”œโ”€โ”€ backends/ +โ”‚ โ”œโ”€โ”€ advanced/ # Main backend (FastAPI + WebUI) +โ”‚ โ”œโ”€โ”€ simple/ # Minimal backend for learning +โ”‚ โ””โ”€โ”€ other-backends/ # Example/alternative implementations +โ”œโ”€โ”€ extras/ +โ”‚ โ”œโ”€โ”€ speaker-recognition/ # Voice identification +โ”‚ โ”œโ”€โ”€ asr-services/ # Local ASR (Parakeet) +โ”‚ โ”œโ”€โ”€ openmemory-mcp/ # External memory server +โ”‚ โ””โ”€โ”€ havpe-relay/ # ESP32 audio bridge +โ”œโ”€โ”€ config/ # Central configuration +โ”œโ”€โ”€ Docs/ # Documentation +โ”œโ”€โ”€ tests/ # Integration tests (Robot Framework) +โ”œโ”€โ”€ wizard.py # Setup wizard +โ””โ”€โ”€ services.py # Service lifecycle manager +``` + +## Getting Started + +See [quickstart.md](../quickstart.md) for setup instructions. + +```bash +# Setup +./wizard.sh + +# Start +./start.sh + +# Access +open http://localhost:5173 +``` + +## Further Reading + +- [Quick Start Guide](../quickstart.md) โ€” Step-by-step setup +- [Initialization System](init-system.md) โ€” Setup wizard internals and port configuration +- [Audio Pipeline Architecture](audio-pipeline-architecture.md) โ€” Deep technical reference +- [SSL Certificates](ssl-certificates.md) โ€” HTTPS setup +- [Backend Architecture](../backends/advanced/Docs/architecture.md) โ€” Backend internals diff --git a/Docs/ports-and-access.md b/Docs/ports-and-access.md deleted file mode 100644 index 00f5ee64..00000000 --- a/Docs/ports-and-access.md +++ /dev/null @@ -1,173 +0,0 @@ -# Chronicle Port Configuration & User Journey - -## User Journey: Git Clone to Running Services - -### 1. Clone & Setup -```bash -git clone -cd chronicle - -# Configure all services (using convenience script) -./wizard.sh - -# Start all configured services -./start.sh -``` - -### 2. Service Access Points - -## HTTP Mode (Default - No SSL Required) - -| Service | API Port | Web UI Port | Access URL | -|---------|----------|-------------|------------| -| **Advanced Backend** | 8000 | 5173 | http://localhost:8000 (API)
http://localhost:5173 (Dashboard) | -| **Speaker Recognition** | 8085 | 5175* | http://localhost:8085 (API)
http://localhost:5175 (WebUI) | -| **Parakeet ASR** | 8767 | - | http://localhost:8767 (API) | -| **OpenMemory MCP** | 8765 | 8765 | http://localhost:8765 (API + WebUI) | - -*Note: Speaker Recognition WebUI port is configurable via REACT_UI_PORT (default varies by mode) - -**๐ŸŒ Main Dashboard**: http://localhost:5173 -**๐ŸŽค Speaker Recognition**: http://localhost:5174 -**โŒ No microphone access** - browsers require HTTPS for microphone - ---- - -## HTTPS Mode (For Microphone Access) - -| Service | HTTP Port | HTTPS Port | Access URL | Microphone Access | -|---------|-----------|------------|------------|-------------------| -| **Advanced Backend** | 80โ†’443 | 443 | https://localhost/ (Main)
https://localhost/api/ (API) | โœ… Yes | -| **Speaker Recognition** | 8081โ†’8444 | 8444 | https://localhost:8444/ (Main)
https://localhost:8444/api/ (API) | โœ… Yes | - -**IMPORTANT**: nginx services start automatically with the standard docker compose command - -**๐ŸŒ Main Dashboard**: https://localhost/ (Advanced Backend with SSL) -**๐ŸŽค Speaker Recognition**: https://localhost:8444/ (Speaker Recognition with SSL) -**โœ… Full microphone access** - both services secured with SSL - -### Port Details (HTTPS Mode) -- **Advanced Backend nginx**: Ports 80 (HTTP redirect) + 443 (HTTPS) -- **Speaker Recognition nginx**: Ports 8081 (HTTP redirect) + 8444 (HTTPS) -- **No port conflicts** - different port ranges for each service - ---- - -## Why Two Modes? - -### HTTP Mode (Default) -โœ… **Simple setup** - No SSL certificates needed -โœ… **Development friendly** - Quick start for testing -โŒ **No microphone access** - Browsers require HTTPS for microphone - -### HTTPS Mode (Advanced) -โœ… **Microphone access** - Browsers allow mic access over HTTPS -โœ… **Production ready** - Secure for real deployments -โŒ **Complex setup** - Requires SSL certificate generation - ---- - -## Configuration Files - -### Speaker Recognition Modes - -The speaker recognition service supports both modes via configuration: - -**HTTP Mode (.env)**: -```bash -REACT_UI_PORT=5174 # Direct HTTP access -REACT_UI_HTTPS=false -``` - -**HTTPS Mode (.env)**: -```bash -REACT_UI_PORT=5175 # Internal HTTPS port (proxied through nginx) -REACT_UI_HTTPS=true -# nginx provides external access on ports 8081 (HTTP redirect) and 8444 (HTTPS) -# Start with: docker compose up -d -``` - ---- - -## Service Management Commands - -**Convenience Scripts (Recommended):** -```bash -# Check what's running -./status.sh - -# Start all configured services -./start.sh - -# Restart all services -./restart.sh - -# Stop all services -./stop.sh -``` - -**Note**: Convenience scripts wrap the longer `uv run --with-requirements setup-requirements.txt python` commands for ease of use. - -
-Full commands (click to expand) - -```bash -# Check what's running -uv run --with-requirements setup-requirements.txt python services.py status - -# Start all services -uv run --with-requirements setup-requirements.txt python services.py start --all --build - -# Start only specific services -uv run --with-requirements setup-requirements.txt python services.py start backend speaker-recognition - -# Restart all services -uv run --with-requirements setup-requirements.txt python services.py restart --all - -# Restart specific services -uv run --with-requirements setup-requirements.txt python services.py restart backend - -# Stop all services -uv run --with-requirements setup-requirements.txt python services.py stop --all -``` - -
- -**Important:** Use `restart` for configuration changes (.env updates). For code changes, use `stop` + `start --build` to rebuild images. - ---- - -## Microphone Access Requirements - -For **speaker recognition** and **live audio features** to work: - -1. **Local development**: Use HTTP mode, access via `http://localhost:5174` - - Some browsers allow localhost microphone access over HTTP - -2. **Production/Remote access**: Use HTTPS mode, access via `https://localhost:8444` - - All browsers require HTTPS for microphone access over network - -3. **Mixed setup**: Keep backend on HTTP, only enable HTTPS for speaker recognition when needed - ---- - -## Port Conflict Resolution - -If you encounter port conflicts: - -1. **Check running services**: `uv run --with-requirements setup-requirements.txt python services.py status` -2. **Stop conflicting services**: `uv run --with-requirements setup-requirements.txt python services.py stop --all` -3. **Change ports in .env files** if needed -4. **Restart services**: `uv run --with-requirements setup-requirements.txt python services.py restart --all` or `./restart.sh` - ---- - -## Summary: Default User Experience - -After `git clone` and running init + services: - -๐ŸŒ **Main Application**: http://localhost:5173 -๐ŸŽค **Speaker Recognition**: http://localhost:5174 (HTTP) or https://localhost:8444 (HTTPS) -๐Ÿ”ง **Backend API**: http://localhost:8000 -๐Ÿ“ **ASR Service**: http://localhost:8767 -๐Ÿง  **Memory Service**: http://localhost:8765 \ No newline at end of file diff --git a/README-K8S.md b/README-K8S.md index 0e8358c1..8bbe22fa 100644 --- a/README-K8S.md +++ b/README-K8S.md @@ -266,9 +266,9 @@ chronicle/ 1. **Clone Repository** ```bash - # Clone Friend-Lite repository with submodules + # Clone Chronicle repository with submodules git clone --recursive https://github.com/chronicle-ai/chronicle.git - cd friend-lite + cd chronicle # If you already cloned without --recursive, initialize submodules: # git submodule update --init --recursive @@ -278,7 +278,7 @@ chronicle/ ls -la backends/advanced/.env.template ``` - > **Note:** The `--recursive` flag downloads the optional Mycelia submodule (an alternative memory backend with timeline visualization). Most deployments use the default Friend-Lite memory system and don't need Mycelia. + > **Note:** The `--recursive` flag downloads the optional Mycelia submodule (an alternative memory backend with timeline visualization). Most deployments use the default Chronicle memory system and don't need Mycelia. 2. **Install Required Tools** diff --git a/README.md b/README.md index b70f4255..7e342210 100644 --- a/README.md +++ b/README.md @@ -31,7 +31,7 @@ Run setup wizard, start services, access at http://localhost:5173 - **๐Ÿ“š [Setup Guide](quickstart.md)** - Start here - **๐Ÿ”ง [Full Documentation](CLAUDE.md)** - Comprehensive reference -- **๐Ÿ—๏ธ [Architecture Details](Docs/features.md)** - Technical deep dive +- **๐Ÿ—๏ธ [Project Overview](Docs/overview.md)** - Architecture and vision - **๐Ÿณ [Docker/K8s](README-K8S.md)** - Container deployment ## Project Structure diff --git a/backends/advanced/Docs/README.md b/backends/advanced/Docs/README.md index 11e683e8..e58f94ee 100644 --- a/backends/advanced/Docs/README.md +++ b/backends/advanced/Docs/README.md @@ -29,12 +29,6 @@ Welcome to chronicle! This guide provides the optimal reading sequence to unders ### 3. **[Memory System](./memories.md)** **Memory extraction and semantic search** -### 3a. **[Memory Configuration Guide](./memory-configuration-guide.md)** ๐ŸŽฏ *NEW USER GUIDE* -**Easy guide for configuring memory extraction** -- 3-step setup for memory extraction -- Understanding memory types (general, facts, categories) -- Customization examples and troubleshooting -- **Perfect for**: New users wanting to customize memory behavior - How conversations become memories - Mem0 integration and vector storage - Configuration and customization options diff --git a/backends/advanced/Docs/memory-configuration-guide.md b/backends/advanced/Docs/memory-configuration-guide.md deleted file mode 100644 index 66244003..00000000 --- a/backends/advanced/Docs/memory-configuration-guide.md +++ /dev/null @@ -1,132 +0,0 @@ -# Memory Configuration Guide - -This guide helps you set up and configure the memory system for the Friend Advanced Backend. - -## Quick Start - -1. **Copy the template configuration**: -```bash -Edit the `memory` section of `config/config.yml`. -``` - -2. **Edit `config/config.yml`** with your preferred settings in the `memory` section: -```yaml -memory: - provider: "mem0" # or "basic" for simpler setup - - # Provider-specific configuration - mem0: - model_provider: "openai" # or "ollama" for local - embedding_model: "text-embedding-3-small" - llm_model: "gpt-5-mini" -``` - -3. **Set environment variables** in `.env`: -```bash -# For OpenAI -OPENAI_API_KEY=your-api-key - -# For Ollama (local) -OLLAMA_BASE_URL=http://ollama:11434 -``` - -## Configuration Options - -### Memory Providers - -#### mem0 (Recommended) -Advanced memory system with semantic search and context awareness. - -**Configuration**: -```yaml -memory: - provider: "mem0" - mem0: - model_provider: "openai" # or "ollama" - embedding_model: "text-embedding-3-small" - llm_model: "gpt-5-mini" - prompt_template: "custom_prompt_here" # Optional -``` - -#### basic -Simple memory storage without advanced features. - -**Configuration**: -```yaml -memory: - provider: "basic" - # No additional configuration needed -``` - -### Model Selection - -#### OpenAI Models -- **LLM**: `gpt-5-mini`, `gpt-5-mini`, `gpt-3.5-turbo` -- **Embeddings**: `text-embedding-3-small`, `text-embedding-3-large` - -#### Ollama Models (Local) -- **LLM**: `llama3`, `qwen2.5` -- **Embeddings**: `nomic-embed-text`, `all-minilm` - -## Hot Reload - -The configuration supports hot reloading - changes are applied automatically without restarting the service. - -## Validation - -The system validates your configuration on startup and logs any issues: -- Missing required fields -- Invalid provider names -- Incompatible model combinations - -## Troubleshooting - -### Common Issues - -1. **"Provider not found"**: Check spelling in `provider` field -2. **"API key missing"**: Ensure environment variables are set -3. **"Model not available"**: Verify model names match provider's available models -4. **"Connection refused"**: Check Ollama is running if using local models - -### Debug Mode - -Enable debug logging by setting: -```bash -DEBUG=true -``` - -This provides detailed information about memory processing and configuration loading. - -## Examples - -### OpenAI Setup -```yaml -memory: - provider: "mem0" - mem0: - model_provider: "openai" - embedding_model: "text-embedding-3-small" - llm_model: "gpt-5-mini" -``` - -### Local Ollama Setup -```yaml -memory: - provider: "mem0" - mem0: - model_provider: "ollama" - embedding_model: "nomic-embed-text" - llm_model: "llama3" -``` - -### Minimal Setup -```yaml -memory: - provider: "basic" -``` - -## Next Steps - -- Configure action items detection in `config/config.yml` (memory.extraction) -- Set up custom prompt templates for your use case -- Monitor memory processing in the debug dashboard diff --git a/backends/advanced/docs/plugin-development-guide.md b/backends/advanced/Docs/plugin-development-guide.md similarity index 100% rename from backends/advanced/docs/plugin-development-guide.md rename to backends/advanced/Docs/plugin-development-guide.md diff --git a/backends/advanced/README.md b/backends/advanced/README.md index 7f3d5a24..104137b3 100644 --- a/backends/advanced/README.md +++ b/backends/advanced/README.md @@ -1,13 +1,13 @@ -# Friend-Lite Advanced Backend +# Chronicle Advanced Backend A FastAPI backend with pluggable memory providers, real-time audio processing, and comprehensive conversation management. -[QuickStart](https://github.com/AnkushMalaker/friend-lite/blob/main/backends/advanced-backend/Docs/quickstart.md) | [Memory Providers](./MEMORY_PROVIDERS.md) | [Configuration Guide](./Docs/memory-configuration-guide.md) +[QuickStart](../../quickstart.md) | [Memory Providers](./MEMORY_PROVIDERS.md) | [Configuration Guide](./Docs/memories.md) ## Key Features ### Memory System -- **Pluggable Memory Providers**: Choose between Friend-Lite native or OpenMemory MCP +- **Pluggable Memory Providers**: Choose between Chronicle native or OpenMemory MCP - **Enhanced Memory Extraction**: Individual facts instead of generic transcripts - **Smart Memory Updates**: LLM-driven ADD/UPDATE/DELETE actions - **Cross-client Compatibility**: Use OpenMemory with Claude Desktop, Cursor, etc. @@ -33,7 +33,7 @@ Modern React-based web dashboard located in `./webui/` with: - **Authentication**: Admin email/password setup with secure keys - **Transcription Provider**: Choose between Deepgram or Offline (Parakeet) - **LLM Provider**: Choose between OpenAI (recommended) or Ollama for memory extraction -- **Memory Provider**: Choose between Friend-Lite Native or OpenMemory MCP +- **Memory Provider**: Choose between Chronicle Native or OpenMemory MCP - **HTTPS Configuration**: Optional SSL setup for microphone access (uses Caddy) - **Optional Services**: Speaker Recognition, network configuration - **API Keys**: Prompts for all required keys with helpful links @@ -58,7 +58,7 @@ docker compose --profile https up --build -d #### 3. Access URLs -**Friend-Lite Advanced Backend (Primary - ports 80/443):** +**Chronicle Advanced Backend (Primary - ports 80/443):** - **HTTPS Dashboard**: https://localhost/ or https://your-ip/ - **HTTP**: http://localhost/ (redirects to HTTPS) - **Live Recording**: Available at `/live-record` page diff --git a/backends/advanced/src/advanced_omi_backend/services/transcription/__init__.py b/backends/advanced/src/advanced_omi_backend/services/transcription/__init__.py index 9a53e483..5c5c2296 100644 --- a/backends/advanced/src/advanced_omi_backend/services/transcription/__init__.py +++ b/backends/advanced/src/advanced_omi_backend/services/transcription/__init__.py @@ -148,7 +148,7 @@ async def transcribe(self, audio_data: bytes, sample_rate: int, diarize: bool = if "diarize" in query: query["diarize"] = "true" if diarize else "false" - timeout = op.get("timeout", 120) + timeout = op.get("timeout", 300) async with httpx.AsyncClient(timeout=timeout) as client: if method == "POST": if use_multipart: diff --git a/backends/advanced/src/advanced_omi_backend/workers/transcription_jobs.py b/backends/advanced/src/advanced_omi_backend/workers/transcription_jobs.py index 5897d3a2..15a6cdfb 100644 --- a/backends/advanced/src/advanced_omi_backend/workers/transcription_jobs.py +++ b/backends/advanced/src/advanced_omi_backend/workers/transcription_jobs.py @@ -208,20 +208,23 @@ async def transcribe_full_audio_job( logger.info( f"๐Ÿ“ฆ Reconstructed audio from MongoDB chunks: " f"{len(wav_data) / 1024 / 1024:.2f} MB" ) + except ValueError as e: + # No chunks found for conversation + raise FileNotFoundError(f"No audio chunks found for conversation {conversation_id}: {e}") + except Exception as e: + logger.error(f"Failed to reconstruct audio from MongoDB: {e}", exc_info=True) + raise RuntimeError(f"Audio reconstruction failed: {e}") + try: # Transcribe the audio directly from memory (no disk I/O needed) transcription_result = await provider.transcribe( audio_data=wav_data, # Pass bytes directly, already in memory sample_rate=16000, diarize=True, ) - - except ValueError as e: - # No chunks found for conversation - raise FileNotFoundError(f"No audio chunks found for conversation {conversation_id}: {e}") except Exception as e: - logger.error(f"Failed to reconstruct audio from MongoDB: {e}", exc_info=True) - raise RuntimeError(f"Audio reconstruction failed: {e}") + logger.error(f"Transcription failed for conversation {conversation_id}: {type(e).__name__}: {e}", exc_info=True) + raise RuntimeError(f"Transcription failed ({type(e).__name__}): {e}") # Extract results transcript_text = transcription_result.get("text", "") @@ -408,15 +411,23 @@ async def transcribe_full_audio_job( if segments: # Provider returned segments - use them - speaker_segments = [ - Conversation.SpeakerSegment( - speaker=str(seg.get("speaker", "Speaker 0")), - start=seg.get("start", 0.0), - end=seg.get("end", 0.0), - text=seg.get("text", ""), + speaker_segments = [] + for seg in segments: + raw_speaker = seg.get("speaker") + if raw_speaker is None: + speaker = "Speaker 0" + elif isinstance(raw_speaker, int): + speaker = f"Speaker {raw_speaker}" + else: + speaker = str(raw_speaker) + speaker_segments.append( + Conversation.SpeakerSegment( + speaker=speaker, + start=seg.get("start", 0.0), + end=seg.get("end", 0.0), + text=seg.get("text", ""), + ) ) - for seg in segments - ] if provider_has_diarization: # Provider did diarization (e.g., VibeVoice, Deepgram) diff --git a/config/README.md b/config/README.md index 44ec2330..634417e6 100644 --- a/config/README.md +++ b/config/README.md @@ -148,6 +148,6 @@ Before committing `config/plugins.yml`, manually verify: ## Documentation For detailed configuration guides, see: -- `/Docs/memory-configuration-guide.md` - Memory settings +- `/backends/advanced/Docs/memories.md` - Memory settings - `/quickstart.md` - Setup guide - `/CLAUDE.md` - Project overview and technical reference diff --git a/extras/asr-services/providers/vibevoice/impl.md b/extras/asr-services/providers/vibevoice/impl.md deleted file mode 100644 index 6f687079..00000000 --- a/extras/asr-services/providers/vibevoice/impl.md +++ /dev/null @@ -1,17 +0,0 @@ -โฏ Can you can form if the integration is working, that is, I want to run the wizard and select the specific provider, and it should configure it to the - default.yaml and config.yaml. When I do @start.sh from the root of the repository, it should be able to start up these services. Here we also need to change - how the architecture works.Because earlier we had a system where we had a separate speech-to-text and speaker recognition system. Now with vibevoice ASR, - we have a combined speech-to-text plus diarization.So we really need to make sureWe are appropriately adding it and not just adding it as a half-baked - feature. - - - I think right now the conversation model has words and segments, and segments have a speaker tag. Even words can have a speaker tag, I think. Please check - the actual models. This can be a breaking change; that's okay.What we need to do now is make sure that in this Change: we have a cohesive system. - - We can do it in many ways, but let's think about which way makes sense here. - The end result is we kind of want segments of speech, ideally with timing, but with speaker recognition, for sure. There is vibevoice ASR, which is doing it - combined. It does not give word time stamps but gives a unified transcript plus diarisation. - - On the other hand, we have ASR plus diarization. - Here we use parakeet and pyannote respectively. - let's think about it. diff --git a/quickstart.md b/quickstart.md index 100a4fe2..377b9cbb 100644 --- a/quickstart.md +++ b/quickstart.md @@ -126,14 +126,14 @@ git submodule update --init --recursive ๐Ÿ“ฆ About the Mycelia Submodule (Optional - Click to expand) **What is Mycelia?** -Mycelia is an optional self-hosted AI memory system that Friend-Lite can use as an alternative memory backend. It provides: +Mycelia is an optional self-hosted AI memory system that Chronicle can use as an alternative memory backend. It provides: - Timeline-based memory visualization - Advanced audio processing with speaker diarization - MongoDB-based full-text search - MCP (Model Context Protocol) integration **Do I need it?** -**Most users don't need Mycelia!** The default Friend-Lite memory system works great for most use cases. Only consider Mycelia if you: +**Most users don't need Mycelia!** The default Chronicle memory system works great for most use cases. Only consider Mycelia if you: - Want timeline-based memory visualization - Need advanced speaker diarization features - Want to use MongoDB for memory storage instead of Qdrant @@ -322,5 +322,5 @@ Before connecting your phone, make sure everything works: ## Need Help? - **Full Documentation**: [CLAUDE.md](CLAUDE.md) - Complete technical reference -- **Architecture Details**: [Docs/features.md](Docs/features.md) - How everything works +- **Architecture Details**: [Docs/overview.md](Docs/overview.md) - How everything works - **Advanced Setup**: [Docs/init-system.md](Docs/init-system.md) - Power user options diff --git a/setup_utils.py b/setup_utils.py index e9072c76..8a906c7b 100644 --- a/setup_utils.py +++ b/setup_utils.py @@ -396,13 +396,12 @@ def detect_cuda_version(default: str = "cu126") -> str: if result.returncode == 0: match = re.search(r'CUDA Version:\s*(\d+)\.(\d+)', result.stdout) if match: - major, minor = match.groups() - cuda_ver = f"{major}.{minor}" - if cuda_ver >= "12.8": + major, minor = int(match.group(1)), int(match.group(2)) + if (major, minor) >= (12, 8): return "cu128" - elif cuda_ver >= "12.6": + elif (major, minor) >= (12, 6): return "cu126" - elif cuda_ver >= "12.1": + elif (major, minor) >= (12, 1): return "cu121" except (subprocess.SubprocessError, FileNotFoundError): pass diff --git a/tests/MOCK_SPEAKER_IMPLEMENTATION.md b/tests/MOCK_SPEAKER_IMPLEMENTATION.md deleted file mode 100644 index 0b76487f..00000000 --- a/tests/MOCK_SPEAKER_IMPLEMENTATION.md +++ /dev/null @@ -1,206 +0,0 @@ -# Mock Speaker Recognition Implementation - -## Summary - -Implemented a lightweight mock speaker recognition client to enable CI tests without running resource-intensive ML models. This allows the 2 failing tests to pass in CI environments. - -## Problem Solved - -- **Before**: 2 tests failed because `conversation['segments']` was empty (speaker recognition disabled in CI) -- **After**: Tests pass with mock segments provided by `MockSpeakerRecognitionClient` -- **Benefit**: No GPU/heavy CPU required, deterministic results, fast test execution - -## Implementation Details - -### Files Created - -1. **`tests/mocks/__init__.py`** - - Package initialization for mocks directory - -2. **`tests/mocks/mock_speaker_client.py`** - - Mock speaker recognition client with pre-computed segments - - Returns 9 segments for DIY Glass Blowing audio (based on `test_data.py`) - - Fallback to single generic segment for unknown audio - -### Files Modified - -1. **`backends/advanced/src/advanced_omi_backend/speaker_recognition_client.py`** - - Added mock detection in `__init__()` via `USE_MOCK_SPEAKER_CLIENT` env var - - Modified `diarize_identify_match()` to delegate to mock when enabled - - Minimal changes, transparent to callers - -2. **`backends/advanced/docker-compose-test.yml`** - - Added `USE_MOCK_SPEAKER_CLIENT=true` to both services: - - `chronicle-backend-test` (line 62) - - `workers-test` (line 215) - -3. **`tests/configs/deepgram-openai.yml`** - - Changed `speaker_recognition.enabled` from `false` to `true` - - Updated comment to reference mock usage - -4. **`tests/.gitignore`** - - Added `mocks/__pycache__/` and `mocks/*.pyc` - -## How It Works - -### Environment Detection - -```python -# In speaker_recognition_client.py __init__() -if os.getenv("USE_MOCK_SPEAKER_CLIENT") == "true": - # Import and use MockSpeakerRecognitionClient - self._mock_client = MockSpeakerRecognitionClient() - self.enabled = True -``` - -### Mock Segment Data - -The mock returns pre-computed segments for the DIY Glass Blowing test audio: - -```python -MOCK_SEGMENTS = { - "DIY_Experts_Glass_Blowing_16khz_mono_1min.wav": [ - {"start": 0.0, "end": 10.08, "speaker": 0, "identified_as": "Unknown", "text": "...", "confidence": 0.95}, - {"start": 10.28, "end": 20.255, "speaker": 0, "identified_as": "Unknown", "text": "...", "confidence": 0.93}, - # ... 7 more segments (9 total) - ] -} -``` - -### Transcript Matching - -The mock identifies test audio by transcript content: - -```python -if "glass blowing" in transcript_text or "glass" in transcript_text: - return {"segments": MOCK_SEGMENTS["DIY_Experts_Glass_Blowing_16khz_mono_1min.wav"]} -``` - -### Fallback Behavior - -For unknown audio, creates a single generic segment: - -```python -return { - "segments": [{ - "start": 0.0, - "end": duration, - "speaker": 0, - "identified_as": "Unknown", - "text": transcript_data.get("text", ""), - "confidence": 0.85 - }] -} -``` - -## Validation - -### Pre-Test Validation - -Run the validation script to verify mock setup: - -```bash -cd tests -python3 validate_mock.py -``` - -**Expected Output:** -``` -โœ… Mock client initialized successfully -โœ… Correct number of segments! (9 for glass blowing) -โœ… All required fields present -โœ… All mock client tests passed! -``` - -### Integration Tests - -Run the previously failing tests: - -```bash -cd tests - -# Start test containers -make start - -# Run individual tests -robot --test "Audio Upload Job Tracking Test" endpoints/audio_upload_tests.robot -robot --test "Audio Playback And Segment Timing Test" integration/integration_test.robot - -# Or run full test suite -make test-all -``` - -### Expected Logs - -When tests run, you should see: - -``` -๐ŸŽค Using MOCK speaker recognition client for tests -๐ŸŽค Mock speaker client processing conversation: ... -๐ŸŽค Mock returning 9 segments for DIY Glass Blowing audio -``` - -## Benefits - -โœ… **No CI Resource Requirements** - Speaker service not needed -โœ… **Fast Test Execution** - No ML model loading or GPU processing -โœ… **Deterministic Results** - Same segments every test run -โœ… **Easy to Maintain** - Mock data in single Python file -โœ… **Test Coverage Restored** - Segment-dependent tests run in CI -โœ… **Zero Test Code Changes** - Tests work transparently with mock -โœ… **Production Unaffected** - Mock only activates in test environment - -## Rollback Plan - -If issues arise: - -1. Remove `USE_MOCK_SPEAKER_CLIENT=true` from `docker-compose-test.yml` -2. Change `speaker_recognition.enabled` back to `false` in `tests/configs/deepgram-openai.yml` -3. Delete `tests/mocks/` directory -4. Revert changes to `speaker_recognition_client.py` - -The mock is isolated and safe to remove without affecting production code. - -## Future Enhancements (Optional) - -### Adding More Test Audio Files - -If you need to add mock data for new audio files: - -1. Add segment data to `MOCK_SEGMENTS` dict in `mock_speaker_client.py` -2. Update transcript matching logic in `diarize_identify_match()` -3. Run `validate_mock.py` to verify - -### Auto-Generate Mock Segments - -Create a script that: -1. Uploads test audio to real speaker service -2. Captures segments from response -3. Saves to `mock_speaker_client.py` - -This is **not needed** for current implementation since we have segment times from `test_data.py`. - -## Testing Checklist - -- [x] Mock client imports successfully -- [x] Returns 9 segments for glass blowing audio -- [x] All required fields present (start, end, speaker, identified_as, text, confidence) -- [x] Fallback to generic segment for unknown audio -- [x] Environment variable set in docker-compose-test.yml -- [x] Speaker recognition enabled in test config -- [x] .gitignore updated for Python cache -- [ ] Integration tests pass with mock enabled - -## Next Steps - -1. **Run validation**: `cd tests && python3 validate_mock.py` -2. **Start test containers**: `make start` -3. **Run failing tests**: See commands above -4. **Verify segments**: Check logs for "๐ŸŽค Mock returning 9 segments" -5. **Run full suite**: `make test-all` to ensure no regressions - -## Documentation - -- **Plan**: See `/home/ankush/workspaces/friend-lite/tests/TODO_MOCK_SPEAKER_RECOGNITION.md` for detailed implementation plan -- **Test Data**: See `tests/setup/test_data.py` for expected segment times -- **Mock Client**: See `tests/mocks/mock_speaker_client.py` for implementation From 2c3bb536a0d7ab4df1d141c834c39eabbdc17554 Mon Sep 17 00:00:00 2001 From: Ankush Malaker <43288948+AnkushMalaker@users.noreply.github.com> Date: Sat, 7 Feb 2026 02:00:11 +0000 Subject: [PATCH 3/3] tech debt --- .../src/advanced_omi_backend/app_config.py | 8 +- .../src/advanced_omi_backend/app_factory.py | 79 ++- .../src/advanced_omi_backend/chat_service.py | 2 +- .../src/advanced_omi_backend/client.py | 1 - .../advanced_omi_backend/client_manager.py | 46 +- .../clients/audio_stream_client.py | 6 +- .../clients/gdrive_audio_client.py | 4 +- .../controllers/__init__.py | 6 +- .../controllers/conversation_controller.py | 519 +++++++++++------- .../controllers/queue_controller.py | 22 +- .../controllers/session_controller.py | 9 +- .../controllers/system_controller.py | 13 +- .../controllers/user_controller.py | 4 +- .../controllers/websocket_controller.py | 39 +- .../src/advanced_omi_backend/llm_client.py | 9 +- .../advanced_omi_backend/model_registry.py | 13 +- .../advanced_omi_backend/models/annotation.py | 4 +- .../models/audio_chunk.py | 3 +- .../models/conversation.py | 7 +- .../src/advanced_omi_backend/models/job.py | 14 +- .../src/advanced_omi_backend/plugins/base.py | 2 +- .../plugins/email_summarizer/setup.py | 5 +- .../plugins/homeassistant/entity_cache.py | 2 +- .../plugins/test_event/plugin.py | 1 + .../routers/modules/admin_routes.py | 9 +- .../routers/modules/annotation_routes.py | 104 ++-- .../routers/modules/audio_routes.py | 24 +- .../routers/modules/health_routes.py | 8 +- .../routers/modules/memory_routes.py | 2 +- .../routers/modules/obsidian_routes.py | 12 +- .../routers/modules/queue_routes.py | 69 ++- .../routers/modules/test_routes.py | 1 + .../routers/modules/websocket_routes.py | 5 +- .../services/audio_stream/consumer.py | 1 + .../services/audio_stream/producer.py | 1 + .../services/knowledge_graph/__init__.py | 6 +- .../knowledge_graph/entity_extractor.py | 6 +- .../services/knowledge_graph/models.py | 3 +- .../services/knowledge_graph/service.py | 1 - .../services/memory/base.py | 186 +++---- .../services/memory/config.py | 5 +- .../services/memory/prompts.py | 3 +- .../services/memory/providers/__init__.py | 6 +- .../services/memory/providers/chronicle.py | 214 +++++--- .../services/memory/providers/mycelia.py | 22 +- .../memory/providers/openmemory_mcp.py | 4 + .../services/memory/service_factory.py | 41 +- .../services/memory/update_memory_utils.py | 8 +- .../services/neo4j_client.py | 3 +- .../services/obsidian_service.py | 15 +- .../services/plugin_service.py | 187 ++++--- .../services/transcription/mock_provider.py | 1 + .../transcription/streaming_consumer.py | 2 +- .../speaker_recognition_client.py | 14 +- .../advanced_omi_backend/utils/audio_utils.py | 3 +- .../utils/conversation_utils.py | 54 +- .../utils/gdrive_audio_utils.py | 7 +- .../utils/logging_utils.py | 1 - .../advanced_omi_backend/utils/model_utils.py | 2 +- .../advanced_omi_backend/workers/__init__.py | 60 +- .../workers/annotation_jobs.py | 2 +- .../workers/audio_jobs.py | 84 ++- .../workers/audio_stream_worker.py | 6 +- .../workers/cleanup_jobs.py | 6 +- .../workers/conversation_jobs.py | 209 +++---- .../workers/memory_jobs.py | 164 ++---- .../workers/orchestrator/__init__.py | 6 +- .../workers/orchestrator/config.py | 2 +- .../workers/orchestrator/worker_registry.py | 2 +- .../workers/rq_worker_entry.py | 2 +- .../workers/speaker_jobs.py | 1 + .../workers/transcription_jobs.py | 38 +- .../workers/waveform_jobs.py | 4 +- .../advanced/src/scripts/cleanup_state.py | 13 +- tests/bin/start-rebuild-containers.sh | 8 + 75 files changed, 1348 insertions(+), 1107 deletions(-) diff --git a/backends/advanced/src/advanced_omi_backend/app_config.py b/backends/advanced/src/advanced_omi_backend/app_config.py index c87398f3..5ed50618 100644 --- a/backends/advanced/src/advanced_omi_backend/app_config.py +++ b/backends/advanced/src/advanced_omi_backend/app_config.py @@ -13,9 +13,13 @@ from dotenv import load_dotenv from motor.motor_asyncio import AsyncIOMotorClient -from advanced_omi_backend.constants import OMI_CHANNELS, OMI_SAMPLE_RATE, OMI_SAMPLE_WIDTH -from advanced_omi_backend.services.transcription import get_transcription_provider +from advanced_omi_backend.constants import ( + OMI_CHANNELS, + OMI_SAMPLE_RATE, + OMI_SAMPLE_WIDTH, +) from advanced_omi_backend.model_registry import get_models_registry +from advanced_omi_backend.services.transcription import get_transcription_provider # Load environment variables load_dotenv() diff --git a/backends/advanced/src/advanced_omi_backend/app_factory.py b/backends/advanced/src/advanced_omi_backend/app_factory.py index 4458ed9e..763967f1 100644 --- a/backends/advanced/src/advanced_omi_backend/app_factory.py +++ b/backends/advanced/src/advanced_omi_backend/app_factory.py @@ -23,20 +23,25 @@ fastapi_users, websocket_auth, ) +from advanced_omi_backend.client_manager import get_client_manager +from advanced_omi_backend.middleware.app_middleware import setup_middleware +from advanced_omi_backend.routers.api_router import router as api_router +from advanced_omi_backend.routers.modules.health_routes import router as health_router +from advanced_omi_backend.routers.modules.websocket_routes import ( + router as websocket_router, +) +from advanced_omi_backend.services.audio_service import get_audio_stream_service +from advanced_omi_backend.services.memory import ( + get_memory_service, + shutdown_memory_service, +) +from advanced_omi_backend.task_manager import get_task_manager, init_task_manager from advanced_omi_backend.users import ( User, UserRead, UserUpdate, register_client_to_user, ) -from advanced_omi_backend.client_manager import get_client_manager -from advanced_omi_backend.services.memory import get_memory_service, shutdown_memory_service -from advanced_omi_backend.middleware.app_middleware import setup_middleware -from advanced_omi_backend.routers.api_router import router as api_router -from advanced_omi_backend.routers.modules.health_routes import router as health_router -from advanced_omi_backend.routers.modules.websocket_routes import router as websocket_router -from advanced_omi_backend.services.audio_service import get_audio_stream_service -from advanced_omi_backend.task_manager import init_task_manager, get_task_manager logger = logging.getLogger(__name__) application_logger = logging.getLogger("audio_processing") @@ -51,7 +56,10 @@ async def initialize_openmemory_user() -> None: - Creates a test memory and deletes it to trigger user creation - Logs success or warning if OpenMemory is not reachable """ - from advanced_omi_backend.services.memory.config import build_memory_config_from_env, MemoryProvider + from advanced_omi_backend.services.memory.config import ( + MemoryProvider, + build_memory_config_from_env, + ) memory_provider_config = build_memory_config_from_env() @@ -99,11 +107,12 @@ async def lifespan(app: FastAPI): # Initialize Beanie for all document models try: from beanie import init_beanie - from advanced_omi_backend.models.conversation import Conversation + + from advanced_omi_backend.models.annotation import Annotation from advanced_omi_backend.models.audio_chunk import AudioChunkDocument + from advanced_omi_backend.models.conversation import Conversation from advanced_omi_backend.models.user import User from advanced_omi_backend.models.waveform import WaveformData - from advanced_omi_backend.models.annotation import Annotation await init_beanie( database=config.db, @@ -139,6 +148,27 @@ async def lifespan(app: FastAPI): application_logger.error(f"Failed to connect to Redis for RQ: {e}") application_logger.warning("RQ queue system will not be available - check Redis connection") + # Initialize BackgroundTaskManager (must happen before any code path uses it) + try: + task_manager = init_task_manager() + await task_manager.start() + application_logger.info("BackgroundTaskManager initialized and started") + except Exception as e: + application_logger.error(f"Failed to initialize task manager: {e}") + raise # Task manager is essential + + # Initialize ClientManager eagerly (prevents lazy race on first WebSocket connect) + get_client_manager() + application_logger.info("ClientManager initialized") + + # Initialize LLM client eagerly (catch config errors at startup, not on first request) + try: + from advanced_omi_backend.llm_client import get_llm_client + get_llm_client() + application_logger.info("LLM client initialized from config.yml") + except Exception as e: + application_logger.warning(f"LLM client initialization deferred: {e}") + # Initialize audio stream service for Redis Streams try: audio_service = get_audio_stream_service() @@ -161,7 +191,9 @@ async def lifespan(app: FastAPI): application_logger.info("โœ… Redis client for audio streaming producer initialized") # Initialize ClientManager Redis for cross-container clientโ†’user mapping - from advanced_omi_backend.client_manager import initialize_redis_for_client_manager + from advanced_omi_backend.client_manager import ( + initialize_redis_for_client_manager, + ) initialize_redis_for_client_manager(config.redis_url) except Exception as e: @@ -180,7 +212,10 @@ async def lifespan(app: FastAPI): # Initialize plugins using plugin service try: - from advanced_omi_backend.services.plugin_service import init_plugin_router, set_plugin_router + from advanced_omi_backend.services.plugin_service import ( + init_plugin_router, + set_plugin_router, + ) plugin_router = init_plugin_router() @@ -221,11 +256,23 @@ async def lifespan(app: FastAPI): client_manager = get_client_manager() for client_id in client_manager.get_all_client_ids(): try: - from advanced_omi_backend.controllers.websocket_controller import cleanup_client_state + from advanced_omi_backend.controllers.websocket_controller import ( + cleanup_client_state, + ) await cleanup_client_state(client_id) except Exception as e: application_logger.error(f"Error cleaning up client {client_id}: {e}") + # Shutdown BackgroundTaskManager + try: + task_mgr = get_task_manager() + await task_mgr.shutdown() + application_logger.info("BackgroundTaskManager shut down") + except RuntimeError: + pass # Never initialized + except Exception as e: + application_logger.error(f"Error shutting down task manager: {e}") + # RQ workers shut down automatically when process ends # No special cleanup needed for Redis connections @@ -250,7 +297,9 @@ async def lifespan(app: FastAPI): # Shutdown plugins try: - from advanced_omi_backend.services.plugin_service import cleanup_plugin_router + from advanced_omi_backend.services.plugin_service import ( + cleanup_plugin_router, + ) await cleanup_plugin_router() application_logger.info("Plugins shut down") except Exception as e: diff --git a/backends/advanced/src/advanced_omi_backend/chat_service.py b/backends/advanced/src/advanced_omi_backend/chat_service.py index 16cba331..f3184b74 100644 --- a/backends/advanced/src/advanced_omi_backend/chat_service.py +++ b/backends/advanced/src/advanced_omi_backend/chat_service.py @@ -26,8 +26,8 @@ from advanced_omi_backend.services.memory import get_memory_service from advanced_omi_backend.services.memory.base import MemoryEntry from advanced_omi_backend.services.obsidian_service import ( - get_obsidian_service, ObsidianSearchError, + get_obsidian_service, ) from advanced_omi_backend.users import User diff --git a/backends/advanced/src/advanced_omi_backend/client.py b/backends/advanced/src/advanced_omi_backend/client.py index be92716e..a92fbc10 100644 --- a/backends/advanced/src/advanced_omi_backend/client.py +++ b/backends/advanced/src/advanced_omi_backend/client.py @@ -12,7 +12,6 @@ from pathlib import Path from typing import Dict, List, Optional, Tuple -from advanced_omi_backend.task_manager import get_task_manager from wyoming.audio import AudioChunk # Get loggers diff --git a/backends/advanced/src/advanced_omi_backend/client_manager.py b/backends/advanced/src/advanced_omi_backend/client_manager.py index e55b3502..68fd6ef8 100644 --- a/backends/advanced/src/advanced_omi_backend/client_manager.py +++ b/backends/advanced/src/advanced_omi_backend/client_manager.py @@ -9,6 +9,7 @@ import logging import uuid from typing import TYPE_CHECKING, Dict, Optional + import redis.asyncio as redis if TYPE_CHECKING: @@ -39,17 +40,6 @@ def __init__(self): self._initialized = True # Self-initializing, no external dict needed logger.info("ClientManager initialized as single source of truth") - def initialize(self, active_clients_dict: Optional[Dict[str, "ClientState"]] = None): - """ - Legacy initialization method for backward compatibility. - - New design: ClientManager is self-initializing and doesn't need external dict. - This method is kept for compatibility but does nothing. - """ - if active_clients_dict is not None: - logger.warning("ClientManager no longer uses external dictionaries - ignoring active_clients_dict") - logger.info("ClientManager initialization (legacy compatibility mode)") - def is_initialized(self) -> bool: """Check if the client manager has been initialized.""" return self._initialized @@ -314,40 +304,6 @@ def get_client_manager() -> ClientManager: return _client_manager -def init_client_manager(active_clients_dict: Dict[str, "ClientState"]): - """ - Initialize the global client manager with active_clients reference. - - This should be called from main.py during startup. - - Args: - active_clients_dict: Reference to the global active_clients dictionary - """ - client_manager = get_client_manager() - client_manager.initialize(active_clients_dict) - return client_manager - - -# Client-user relationship initialization and utility functions -def init_client_user_mapping( - active_mapping_dict: Dict[str, str], all_mapping_dict: Optional[Dict[str, str]] = None -): - """ - Initialize the client-user mapping with references to the global mappings. - - This should be called from main.py during startup. - - Args: - active_mapping_dict: Reference to the active client_to_user_mapping dictionary - all_mapping_dict: Reference to the all_client_user_mappings dictionary (optional) - """ - global _client_to_user_mapping, _all_client_user_mappings - _client_to_user_mapping = active_mapping_dict - if all_mapping_dict is not None: - _all_client_user_mappings = all_mapping_dict - logger.info("Client-user mapping initialized") - - def register_client_user_mapping(client_id: str, user_id: str): """ Register a client-user mapping for active clients. diff --git a/backends/advanced/src/advanced_omi_backend/clients/audio_stream_client.py b/backends/advanced/src/advanced_omi_backend/clients/audio_stream_client.py index 1f3c695a..5686f1ef 100644 --- a/backends/advanced/src/advanced_omi_backend/clients/audio_stream_client.py +++ b/backends/advanced/src/advanced_omi_backend/clients/audio_stream_client.py @@ -48,7 +48,11 @@ async def main(): import websockets from websockets.client import WebSocketClientProtocol -from advanced_omi_backend.constants import OMI_CHANNELS, OMI_SAMPLE_RATE, OMI_SAMPLE_WIDTH +from advanced_omi_backend.constants import ( + OMI_CHANNELS, + OMI_SAMPLE_RATE, + OMI_SAMPLE_WIDTH, +) logger = logging.getLogger(__name__) diff --git a/backends/advanced/src/advanced_omi_backend/clients/gdrive_audio_client.py b/backends/advanced/src/advanced_omi_backend/clients/gdrive_audio_client.py index 5a6271e1..9d93d884 100644 --- a/backends/advanced/src/advanced_omi_backend/clients/gdrive_audio_client.py +++ b/backends/advanced/src/advanced_omi_backend/clients/gdrive_audio_client.py @@ -1,6 +1,8 @@ -import os +import os + from google.oauth2.service_account import Credentials from googleapiclient.discovery import build + from advanced_omi_backend.app_config import get_app_config _drive_client_cache = None diff --git a/backends/advanced/src/advanced_omi_backend/controllers/__init__.py b/backends/advanced/src/advanced_omi_backend/controllers/__init__.py index 25d660f9..f40145ed 100644 --- a/backends/advanced/src/advanced_omi_backend/controllers/__init__.py +++ b/backends/advanced/src/advanced_omi_backend/controllers/__init__.py @@ -3,11 +3,11 @@ """ from . import ( - memory_controller, - user_controller, - conversation_controller, client_controller, + conversation_controller, + memory_controller, system_controller, + user_controller, ) __all__ = [ diff --git a/backends/advanced/src/advanced_omi_backend/controllers/conversation_controller.py b/backends/advanced/src/advanced_omi_backend/controllers/conversation_controller.py index 1456d1be..f327a545 100644 --- a/backends/advanced/src/advanced_omi_backend/controllers/conversation_controller.py +++ b/backends/advanced/src/advanced_omi_backend/controllers/conversation_controller.py @@ -35,6 +35,7 @@ logger = logging.getLogger(__name__) audio_logger = logging.getLogger("audio_processing") + async def close_current_conversation(client_id: str, user: User, client_manager: ClientManager): """Close the current conversation for a specific client. Users can only close their own conversations.""" # Validate client ownership @@ -122,7 +123,9 @@ async def get_conversation(conversation_id: str, user: User): "processing_status": conversation.processing_status, "always_persist": conversation.always_persist, "end_reason": conversation.end_reason.value if conversation.end_reason else None, - "completed_at": conversation.completed_at.isoformat() if conversation.completed_at else None, + "completed_at": ( + conversation.completed_at.isoformat() if conversation.completed_at else None + ), "title": conversation.title, "summary": conversation.summary, "detailed_summary": conversation.detailed_summary, @@ -155,54 +158,65 @@ async def get_conversations(user: User, include_deleted: bool = False): # Regular users can only see their own conversations # Filter by deleted status if not include_deleted: - user_conversations = await Conversation.find( - Conversation.user_id == str(user.user_id), - Conversation.deleted == False - ).sort(-Conversation.created_at).to_list() + user_conversations = ( + await Conversation.find( + Conversation.user_id == str(user.user_id), Conversation.deleted == False + ) + .sort(-Conversation.created_at) + .to_list() + ) else: - user_conversations = await Conversation.find( - Conversation.user_id == str(user.user_id) - ).sort(-Conversation.created_at).to_list() + user_conversations = ( + await Conversation.find(Conversation.user_id == str(user.user_id)) + .sort(-Conversation.created_at) + .to_list() + ) else: # Admins see all conversations # Filter by deleted status if not include_deleted: - user_conversations = await Conversation.find( - Conversation.deleted == False - ).sort(-Conversation.created_at).to_list() + user_conversations = ( + await Conversation.find(Conversation.deleted == False) + .sort(-Conversation.created_at) + .to_list() + ) else: - user_conversations = await Conversation.find_all().sort(-Conversation.created_at).to_list() + user_conversations = ( + await Conversation.find_all().sort(-Conversation.created_at).to_list() + ) # Build response with explicit curated fields - minimal for list view conversations = [] for conv in user_conversations: - conversations.append({ - "conversation_id": conv.conversation_id, - "user_id": conv.user_id, - "client_id": conv.client_id, - "audio_chunks_count": conv.audio_chunks_count, - "audio_total_duration": conv.audio_total_duration, - "audio_compression_ratio": conv.audio_compression_ratio, - "created_at": conv.created_at.isoformat() if conv.created_at else None, - "deleted": conv.deleted, - "deletion_reason": conv.deletion_reason, - "deleted_at": conv.deleted_at.isoformat() if conv.deleted_at else None, - "processing_status": conv.processing_status, - "always_persist": conv.always_persist, - "title": conv.title, - "summary": conv.summary, - "detailed_summary": conv.detailed_summary, - "active_transcript_version": conv.active_transcript_version, - "active_memory_version": conv.active_memory_version, - # Computed fields (counts only, no heavy data) - "segment_count": conv.segment_count, - "has_memory": conv.has_memory, - "memory_count": conv.memory_count, - "transcript_version_count": conv.transcript_version_count, - "memory_version_count": conv.memory_version_count, - "active_transcript_version_number": conv.active_transcript_version_number, - "active_memory_version_number": conv.active_memory_version_number, - }) + conversations.append( + { + "conversation_id": conv.conversation_id, + "user_id": conv.user_id, + "client_id": conv.client_id, + "audio_chunks_count": conv.audio_chunks_count, + "audio_total_duration": conv.audio_total_duration, + "audio_compression_ratio": conv.audio_compression_ratio, + "created_at": conv.created_at.isoformat() if conv.created_at else None, + "deleted": conv.deleted, + "deletion_reason": conv.deletion_reason, + "deleted_at": conv.deleted_at.isoformat() if conv.deleted_at else None, + "processing_status": conv.processing_status, + "always_persist": conv.always_persist, + "title": conv.title, + "summary": conv.summary, + "detailed_summary": conv.detailed_summary, + "active_transcript_version": conv.active_transcript_version, + "active_memory_version": conv.active_memory_version, + # Computed fields (counts only, no heavy data) + "segment_count": conv.segment_count, + "has_memory": conv.has_memory, + "memory_count": conv.memory_count, + "transcript_version_count": conv.transcript_version_count, + "memory_version_count": conv.memory_version_count, + "active_transcript_version_number": conv.active_transcript_version_number, + "active_memory_version_number": conv.active_memory_version_number, + } + ) return {"conversations": conversations} @@ -212,31 +226,43 @@ async def get_conversations(user: User, include_deleted: bool = False): async def _soft_delete_conversation(conversation: Conversation, user: User) -> JSONResponse: - """Mark conversation and chunks as deleted (soft delete).""" - conversation_id = conversation.conversation_id - - # Mark conversation as deleted - conversation.deleted = True - conversation.deletion_reason = "user_deleted" - conversation.deleted_at = datetime.utcnow() - await conversation.save() + """Mark conversation and chunks as deleted (soft delete). - logger.info(f"Soft deleted conversation {conversation_id} for user {user.user_id}") + Chunks are soft-deleted first so that a crash between the two writes + leaves chunks deleted but the conversation still active โ€” a safe state + where a retry will complete the operation. + """ + conversation_id = conversation.conversation_id + deleted_at = datetime.utcnow() - # Soft delete all associated audio chunks + # 1. Soft delete audio chunks FIRST (safe failure mode: orphaned-deleted chunks) result = await AudioChunkDocument.find( AudioChunkDocument.conversation_id == conversation_id, - AudioChunkDocument.deleted == False # Only update non-deleted chunks - ).update_many({ - "$set": { - "deleted": True, - "deleted_at": datetime.utcnow() - } - }) + AudioChunkDocument.deleted == False, + ).update_many({"$set": {"deleted": True, "deleted_at": deleted_at}}) deleted_chunks = result.modified_count logger.info(f"Soft deleted {deleted_chunks} audio chunks for conversation {conversation_id}") + # 2. Mark conversation as deleted + conversation.deleted = True + conversation.deletion_reason = "user_deleted" + conversation.deleted_at = deleted_at + try: + await conversation.save() + except Exception: + # Rollback: undo chunk soft-delete using the exact timestamp we set + logger.error( + f"Failed to soft-delete conversation {conversation_id}, rolling back chunk deletes" + ) + await AudioChunkDocument.find( + AudioChunkDocument.conversation_id == conversation_id, + AudioChunkDocument.deleted_at == deleted_at, + ).update_many({"$set": {"deleted": False, "deleted_at": None}}) + raise + + logger.info(f"Soft deleted conversation {conversation_id} for user {user.user_id}") + return JSONResponse( status_code=200, content={ @@ -244,21 +270,22 @@ async def _soft_delete_conversation(conversation: Conversation, user: User) -> J "deleted_chunks": deleted_chunks, "conversation_id": conversation_id, "client_id": conversation.client_id, - "deleted_at": conversation.deleted_at.isoformat() if conversation.deleted_at else None - } + "deleted_at": conversation.deleted_at.isoformat() if conversation.deleted_at else None, + }, ) async def _hard_delete_conversation(conversation: Conversation) -> JSONResponse: - """Permanently delete conversation and chunks (admin only).""" + """Permanently delete conversation and chunks (admin only). + + Chunks are deleted first so that a crash between the two writes + leaves the conversation document intact โ€” an admin can retry the + delete since the conversation still exists. + """ conversation_id = conversation.conversation_id client_id = conversation.client_id - # Delete conversation document - await conversation.delete() - logger.info(f"Hard deleted conversation {conversation_id}") - - # Delete all audio chunks + # 1. Delete audio chunks FIRST (no rollback possible for hard deletes) result = await AudioChunkDocument.find( AudioChunkDocument.conversation_id == conversation_id ).delete() @@ -266,14 +293,26 @@ async def _hard_delete_conversation(conversation: Conversation) -> JSONResponse: deleted_chunks = result.deleted_count logger.info(f"Hard deleted {deleted_chunks} audio chunks for conversation {conversation_id}") + # 2. Delete conversation document + try: + await conversation.delete() + except Exception: + logger.error( + f"Failed to hard-delete conversation {conversation_id} after " + f"deleting {deleted_chunks} chunks. Conversation document remains โ€” retry delete." + ) + raise + + logger.info(f"Hard deleted conversation {conversation_id}") + return JSONResponse( status_code=200, content={ "message": f"Successfully permanently deleted conversation '{conversation_id}'", "deleted_chunks": deleted_chunks, "conversation_id": conversation_id, - "client_id": client_id - } + "client_id": client_id, + }, ) @@ -288,16 +327,21 @@ async def delete_conversation(conversation_id: str, user: User, permanent: bool """ try: # Create masked identifier for logging - masked_id = f"{conversation_id[:8]}...{conversation_id[-4:]}" if len(conversation_id) > 12 else "***" - logger.info(f"Attempting to {'permanently ' if permanent else ''}delete conversation: {masked_id}") + masked_id = ( + f"{conversation_id[:8]}...{conversation_id[-4:]}" + if len(conversation_id) > 12 + else "***" + ) + logger.info( + f"Attempting to {'permanently ' if permanent else ''}delete conversation: {masked_id}" + ) # Find the conversation using Beanie conversation = await Conversation.find_one(Conversation.conversation_id == conversation_id) if not conversation: return JSONResponse( - status_code=404, - content={"error": f"Conversation '{conversation_id}' not found"} + status_code=404, content={"error": f"Conversation '{conversation_id}' not found"} ) # Check ownership for non-admin users @@ -309,8 +353,8 @@ async def delete_conversation(conversation_id: str, user: User, permanent: bool status_code=403, content={ "error": "Access forbidden. You can only delete your own conversations.", - "details": f"Conversation '{conversation_id}' does not belong to your account." - } + "details": f"Conversation '{conversation_id}' does not belong to your account.", + }, ) # Hard delete (admin only, permanent flag) @@ -323,8 +367,7 @@ async def delete_conversation(conversation_id: str, user: User, permanent: bool except Exception as e: logger.error(f"Error deleting conversation {conversation_id}: {e}") return JSONResponse( - status_code=500, - content={"error": f"Failed to delete conversation: {str(e)}"} + status_code=500, content={"error": f"Failed to delete conversation: {str(e)}"} ) @@ -337,48 +380,45 @@ async def restore_conversation(conversation_id: str, user: User) -> JSONResponse user: Requesting user """ try: - conversation = await Conversation.find_one( - Conversation.conversation_id == conversation_id - ) + conversation = await Conversation.find_one(Conversation.conversation_id == conversation_id) if not conversation: - return JSONResponse( - status_code=404, - content={"error": "Conversation not found"} - ) + return JSONResponse(status_code=404, content={"error": "Conversation not found"}) # Permission check if not user.is_superuser and conversation.user_id != str(user.user_id): - return JSONResponse( - status_code=403, - content={"error": "Access denied"} - ) + return JSONResponse(status_code=403, content={"error": "Access denied"}) if not conversation.deleted: - return JSONResponse( - status_code=400, - content={"error": "Conversation is not deleted"} - ) - - # Restore conversation - conversation.deleted = False - conversation.deletion_reason = None - conversation.deleted_at = None - await conversation.save() + return JSONResponse(status_code=400, content={"error": "Conversation is not deleted"}) - # Restore audio chunks + # 1. Restore audio chunks FIRST (safe failure mode: restored chunks, conversation still deleted) + original_deleted_at = conversation.deleted_at result = await AudioChunkDocument.find( AudioChunkDocument.conversation_id == conversation_id, - AudioChunkDocument.deleted == True - ).update_many({ - "$set": { - "deleted": False, - "deleted_at": None - } - }) + AudioChunkDocument.deleted == True, + ).update_many({"$set": {"deleted": False, "deleted_at": None}}) restored_chunks = result.modified_count + # 2. Restore conversation + conversation.deleted = False + conversation.deletion_reason = None + conversation.deleted_at = None + try: + await conversation.save() + except Exception: + # Rollback: re-soft-delete the chunks we just restored + logger.error( + f"Failed to restore conversation {conversation_id}, " + f"rolling back {restored_chunks} chunk restores" + ) + await AudioChunkDocument.find( + AudioChunkDocument.conversation_id == conversation_id, + AudioChunkDocument.deleted == False, + ).update_many({"$set": {"deleted": True, "deleted_at": original_deleted_at}}) + raise + logger.info( f"Restored conversation {conversation_id} " f"({restored_chunks} chunks) for user {user.user_id}" @@ -390,14 +430,13 @@ async def restore_conversation(conversation_id: str, user: User) -> JSONResponse "message": f"Successfully restored conversation '{conversation_id}'", "restored_chunks": restored_chunks, "conversation_id": conversation_id, - } + }, ) except Exception as e: logger.error(f"Error restoring conversation {conversation_id}: {e}") return JSONResponse( - status_code=500, - content={"error": f"Failed to restore conversation: {str(e)}"} + status_code=500, content={"error": f"Failed to restore conversation: {str(e)}"} ) @@ -405,13 +444,20 @@ async def reprocess_transcript(conversation_id: str, user: User): """Reprocess transcript for a conversation. Users can only reprocess their own conversations.""" try: # Find the conversation using Beanie - conversation_model = await Conversation.find_one(Conversation.conversation_id == conversation_id) + conversation_model = await Conversation.find_one( + Conversation.conversation_id == conversation_id + ) if not conversation_model: return JSONResponse(status_code=404, content={"error": "Conversation not found"}) # Check ownership for non-admin users if not user.is_superuser and conversation_model.user_id != str(user.user_id): - return JSONResponse(status_code=403, content={"error": "Access forbidden. You can only reprocess your own conversations."}) + return JSONResponse( + status_code=403, + content={ + "error": "Access forbidden. You can only reprocess your own conversations." + }, + ) # Get audio_uuid from conversation # Validate audio chunks exist in MongoDB @@ -424,8 +470,8 @@ async def reprocess_transcript(conversation_id: str, user: User): status_code=404, content={ "error": "No audio data found for this conversation", - "details": f"Conversation '{conversation_id}' exists but has no audio chunks in MongoDB" - } + "details": f"Conversation '{conversation_id}' exists but has no audio chunks in MongoDB", + }, ) # Create new transcript version ID @@ -446,13 +492,15 @@ async def reprocess_transcript(conversation_id: str, user: User): result_ttl=JOB_RESULT_TTL, job_id=f"reprocess_{conversation_id[:8]}", description=f"Transcribe audio for {conversation_id[:8]}", - meta={'conversation_id': conversation_id} + meta={"conversation_id": conversation_id}, ) logger.info(f"๐Ÿ“ฅ RQ: Enqueued transcription job {transcript_job.id}") # Check if speaker recognition is enabled - speaker_config = get_service_config('speaker_recognition') - speaker_enabled = speaker_config.get('enabled', True) # Default to True for backward compatibility + speaker_config = get_service_config("speaker_recognition") + speaker_enabled = speaker_config.get( + "enabled", True + ) # Default to True for backward compatibility # Job 2: Recognize speakers (conditional - only if enabled) speaker_dependency = transcript_job # Start with transcription job @@ -468,12 +516,16 @@ async def reprocess_transcript(conversation_id: str, user: User): result_ttl=JOB_RESULT_TTL, job_id=f"speaker_{conversation_id[:8]}", description=f"Recognize speakers for {conversation_id[:8]}", - meta={'conversation_id': conversation_id} + meta={"conversation_id": conversation_id}, ) speaker_dependency = speaker_job # Chain for next job - logger.info(f"๐Ÿ“ฅ RQ: Enqueued speaker recognition job {speaker_job.id} (depends on {transcript_job.id})") + logger.info( + f"๐Ÿ“ฅ RQ: Enqueued speaker recognition job {speaker_job.id} (depends on {transcript_job.id})" + ) else: - logger.info(f"โญ๏ธ Speaker recognition disabled, skipping speaker job for conversation {conversation_id[:8]}") + logger.info( + f"โญ๏ธ Speaker recognition disabled, skipping speaker job for conversation {conversation_id[:8]}" + ) # Job 3: Extract memories # Depends on speaker job if it was created, otherwise depends on transcription @@ -486,12 +538,16 @@ async def reprocess_transcript(conversation_id: str, user: User): result_ttl=JOB_RESULT_TTL, job_id=f"memory_{conversation_id[:8]}", description=f"Extract memories for {conversation_id[:8]}", - meta={'conversation_id': conversation_id} + meta={"conversation_id": conversation_id}, ) if speaker_job: - logger.info(f"๐Ÿ“ฅ RQ: Enqueued memory job {memory_job.id} (depends on speaker job {speaker_job.id})") + logger.info( + f"๐Ÿ“ฅ RQ: Enqueued memory job {memory_job.id} (depends on speaker job {speaker_job.id})" + ) else: - logger.info(f"๐Ÿ“ฅ RQ: Enqueued memory job {memory_job.id} (depends on transcript job {transcript_job.id})") + logger.info( + f"๐Ÿ“ฅ RQ: Enqueued memory job {memory_job.id} (depends on transcript job {transcript_job.id})" + ) # Job 4: Regenerate title/summary (depends on memory job to avoid race condition # and to ensure fresh memories are available for context-enriched summaries) @@ -503,37 +559,52 @@ async def reprocess_transcript(conversation_id: str, user: User): depends_on=memory_job, job_id=f"title_summary_{conversation_id[:8]}", description=f"Regenerate title/summary for {conversation_id[:8]}", - meta={'conversation_id': conversation_id, 'trigger': 'reprocess_transcript'} + meta={"conversation_id": conversation_id, "trigger": "reprocess_transcript"}, + ) + logger.info( + f"๐Ÿ“ฅ RQ: Enqueued title/summary job {title_summary_job.id} (depends on memory job {memory_job.id})" ) - logger.info(f"๐Ÿ“ฅ RQ: Enqueued title/summary job {title_summary_job.id} (depends on memory job {memory_job.id})") job = transcript_job # For backward compatibility with return value - logger.info(f"Created transcript reprocessing job {job.id} (version: {version_id}) for conversation {conversation_id}") + logger.info( + f"Created transcript reprocessing job {job.id} (version: {version_id}) for conversation {conversation_id}" + ) - return JSONResponse(content={ - "message": f"Transcript reprocessing started for conversation {conversation_id}", - "job_id": job.id, - "title_summary_job_id": title_summary_job.id, - "version_id": version_id, - "status": "queued" - }) + return JSONResponse( + content={ + "message": f"Transcript reprocessing started for conversation {conversation_id}", + "job_id": job.id, + "title_summary_job_id": title_summary_job.id, + "version_id": version_id, + "status": "queued", + } + ) except Exception as e: logger.error(f"Error starting transcript reprocessing: {e}") - return JSONResponse(status_code=500, content={"error": "Error starting transcript reprocessing"}) + return JSONResponse( + status_code=500, content={"error": "Error starting transcript reprocessing"} + ) async def reprocess_memory(conversation_id: str, transcript_version_id: str, user: User): """Reprocess memory extraction for a specific transcript version. Users can only reprocess their own conversations.""" try: # Find the conversation using Beanie - conversation_model = await Conversation.find_one(Conversation.conversation_id == conversation_id) + conversation_model = await Conversation.find_one( + Conversation.conversation_id == conversation_id + ) if not conversation_model: return JSONResponse(status_code=404, content={"error": "Conversation not found"}) # Check ownership for non-admin users if not user.is_superuser and conversation_model.user_id != str(user.user_id): - return JSONResponse(status_code=403, content={"error": "Access forbidden. You can only reprocess your own conversations."}) + return JSONResponse( + status_code=403, + content={ + "error": "Access forbidden. You can only reprocess your own conversations." + }, + ) # Resolve transcript version ID # Handle special "active" version ID @@ -554,7 +625,8 @@ async def reprocess_memory(conversation_id: str, transcript_version_id: str, use if not transcript_version: return JSONResponse( - status_code=404, content={"error": f"Transcript version '{transcript_version_id}' not found"} + status_code=404, + content={"error": f"Transcript version '{transcript_version_id}' not found"}, ) # Create new memory version ID @@ -563,33 +635,32 @@ async def reprocess_memory(conversation_id: str, transcript_version_id: str, use # Enqueue memory processing job with RQ (RQ handles job tracking) job = enqueue_memory_processing( - client_id=conversation_model.client_id, - user_id=str(user.user_id), - user_email=user.email, conversation_id=conversation_id, - priority=JobPriority.NORMAL + priority=JobPriority.NORMAL, ) - logger.info(f"Created memory reprocessing job {job.id} (version {version_id}) for conversation {conversation_id}") + logger.info( + f"Created memory reprocessing job {job.id} (version {version_id}) for conversation {conversation_id}" + ) - return JSONResponse(content={ - "message": f"Memory reprocessing started for conversation {conversation_id}", - "job_id": job.id, - "version_id": version_id, - "transcript_version_id": transcript_version_id, - "status": "queued" - }) + return JSONResponse( + content={ + "message": f"Memory reprocessing started for conversation {conversation_id}", + "job_id": job.id, + "version_id": version_id, + "transcript_version_id": transcript_version_id, + "status": "queued", + } + ) except Exception as e: logger.error(f"Error starting memory reprocessing: {e}") - return JSONResponse(status_code=500, content={"error": "Error starting memory reprocessing"}) + return JSONResponse( + status_code=500, content={"error": "Error starting memory reprocessing"} + ) -async def reprocess_speakers( - conversation_id: str, - transcript_version_id: str, - user: User -): +async def reprocess_speakers(conversation_id: str, transcript_version_id: str, user: User): """ Reprocess speaker identification for a specific transcript version. Users can only reprocess their own conversations. @@ -603,16 +674,15 @@ async def reprocess_speakers( Conversation.conversation_id == conversation_id ) if not conversation_model: - return JSONResponse( - status_code=404, - content={"error": "Conversation not found"} - ) + return JSONResponse(status_code=404, content={"error": "Conversation not found"}) # Check ownership for non-admin users if not user.is_superuser and conversation_model.user_id != str(user.user_id): return JSONResponse( status_code=403, - content={"error": "Access forbidden. You can only reprocess your own conversations."} + content={ + "error": "Access forbidden. You can only reprocess your own conversations." + }, ) # 2. Resolve source transcript version ID (handle "active" special case) @@ -621,8 +691,7 @@ async def reprocess_speakers( active_version_id = conversation_model.active_transcript_version if not active_version_id: return JSONResponse( - status_code=404, - content={"error": "No active transcript version found"} + status_code=404, content={"error": "No active transcript version found"} ) source_version_id = active_version_id @@ -636,14 +705,16 @@ async def reprocess_speakers( if not source_version: return JSONResponse( status_code=404, - content={"error": f"Transcript version '{source_version_id}' not found"} + content={"error": f"Transcript version '{source_version_id}' not found"}, ) # 4. Validate transcript has content and words (or provider-diarized segments) if not source_version.transcript: return JSONResponse( status_code=400, - content={"error": "Cannot re-diarize empty transcript. Transcript version has no text."} + content={ + "error": "Cannot re-diarize empty transcript. Transcript version has no text." + }, ) provider_capabilities = source_version.metadata.get("provider_capabilities", {}) @@ -655,18 +726,20 @@ async def reprocess_speakers( if not source_version.words and not (provider_has_diarization and source_version.segments): return JSONResponse( status_code=400, - content={"error": "Cannot re-diarize transcript without word timings. Words are required for diarization."} + content={ + "error": "Cannot re-diarize transcript without word timings. Words are required for diarization." + }, ) # 5. Check if speaker recognition is enabled - speaker_config = get_service_config('speaker_recognition') - if not speaker_config.get('enabled', True): + speaker_config = get_service_config("speaker_recognition") + if not speaker_config.get("enabled", True): return JSONResponse( status_code=400, content={ "error": "Speaker recognition is disabled", - "details": "Enable speaker service in config to use this feature" - } + "details": "Enable speaker service in config to use this feature", + }, ) # 6. Create NEW transcript version (copy text/words, segments for provider-diarized) @@ -678,7 +751,7 @@ async def reprocess_speakers( new_metadata = { "reprocessing_type": "speaker_diarization", "source_version_id": source_version_id, - "trigger": "manual_reprocess" + "trigger": "manual_reprocess", } if provider_has_diarization: new_segments = source_version.segments # COPY provider segments @@ -695,7 +768,7 @@ async def reprocess_speakers( model=source_version.model, processing_time_seconds=None, # Will be updated by job metadata=new_metadata, - set_as_active=True # Set new version as active + set_as_active=True, # Set new version as active ) # Carry over diarization_source so speaker job knows to use segment identification @@ -720,11 +793,11 @@ async def reprocess_speakers( job_id=f"reprocess_speaker_{conversation_id[:12]}", description=f"Re-diarize speakers for {conversation_id[:8]}", meta={ - 'conversation_id': conversation_id, - 'version_id': new_version_id, - 'source_version_id': source_version_id, - 'trigger': 'reprocess' - } + "conversation_id": conversation_id, + "version_id": new_version_id, + "source_version_id": source_version_id, + "trigger": "reprocess", + }, ) logger.info( @@ -741,10 +814,7 @@ async def reprocess_speakers( result_ttl=JOB_RESULT_TTL, job_id=f"memory_{conversation_id[:12]}", description=f"Extract memories for {conversation_id[:8]}", - meta={ - 'conversation_id': conversation_id, - 'trigger': 'reprocess_after_speaker' - } + meta={"conversation_id": conversation_id, "trigger": "reprocess_after_speaker"}, ) logger.info( @@ -763,30 +833,30 @@ async def reprocess_speakers( depends_on=memory_job, job_id=f"title_summary_{conversation_id[:12]}", description=f"Regenerate title/summary for {conversation_id[:8]}", - meta={'conversation_id': conversation_id, 'trigger': 'reprocess_after_speaker'} + meta={"conversation_id": conversation_id, "trigger": "reprocess_after_speaker"}, ) logger.info( - f"Chained title/summary job {title_summary_job.id} " - f"after memory job {memory_job.id}" + f"Chained title/summary job {title_summary_job.id} " f"after memory job {memory_job.id}" ) # 9. Return job information - return JSONResponse(content={ - "message": "Speaker reprocessing started", - "job_id": speaker_job.id, - "memory_job_id": memory_job.id, - "title_summary_job_id": title_summary_job.id, - "version_id": new_version_id, # NEW version ID - "source_version_id": source_version_id, # Original version used as source - "status": "queued" - }) + return JSONResponse( + content={ + "message": "Speaker reprocessing started", + "job_id": speaker_job.id, + "memory_job_id": memory_job.id, + "title_summary_job_id": title_summary_job.id, + "version_id": new_version_id, # NEW version ID + "source_version_id": source_version_id, # Original version used as source + "status": "queued", + } + ) except Exception as e: logger.error(f"Error starting speaker reprocessing: {e}") return JSONResponse( - status_code=500, - content={"error": "Error starting speaker reprocessing"} + status_code=500, content={"error": "Error starting speaker reprocessing"} ) @@ -794,13 +864,18 @@ async def activate_transcript_version(conversation_id: str, version_id: str, use """Activate a specific transcript version. Users can only modify their own conversations.""" try: # Find the conversation using Beanie - conversation_model = await Conversation.find_one(Conversation.conversation_id == conversation_id) + conversation_model = await Conversation.find_one( + Conversation.conversation_id == conversation_id + ) if not conversation_model: return JSONResponse(status_code=404, content={"error": "Conversation not found"}) # Check ownership for non-admin users if not user.is_superuser and conversation_model.user_id != str(user.user_id): - return JSONResponse(status_code=403, content={"error": "Access forbidden. You can only modify your own conversations."}) + return JSONResponse( + status_code=403, + content={"error": "Access forbidden. You can only modify your own conversations."}, + ) # Activate the transcript version using Beanie model method success = conversation_model.set_active_transcript_version(version_id) @@ -814,29 +889,40 @@ async def activate_transcript_version(conversation_id: str, version_id: str, use # TODO: Trigger speaker recognition if configured # This would integrate with existing speaker recognition logic - logger.info(f"Activated transcript version {version_id} for conversation {conversation_id} by user {user.user_id}") + logger.info( + f"Activated transcript version {version_id} for conversation {conversation_id} by user {user.user_id}" + ) - return JSONResponse(content={ - "message": f"Transcript version {version_id} activated successfully", - "active_transcript_version": version_id - }) + return JSONResponse( + content={ + "message": f"Transcript version {version_id} activated successfully", + "active_transcript_version": version_id, + } + ) except Exception as e: logger.error(f"Error activating transcript version: {e}") - return JSONResponse(status_code=500, content={"error": "Error activating transcript version"}) + return JSONResponse( + status_code=500, content={"error": "Error activating transcript version"} + ) async def activate_memory_version(conversation_id: str, version_id: str, user: User): """Activate a specific memory version. Users can only modify their own conversations.""" try: # Find the conversation using Beanie - conversation_model = await Conversation.find_one(Conversation.conversation_id == conversation_id) + conversation_model = await Conversation.find_one( + Conversation.conversation_id == conversation_id + ) if not conversation_model: return JSONResponse(status_code=404, content={"error": "Conversation not found"}) # Check ownership for non-admin users if not user.is_superuser and conversation_model.user_id != str(user.user_id): - return JSONResponse(status_code=403, content={"error": "Access forbidden. You can only modify your own conversations."}) + return JSONResponse( + status_code=403, + content={"error": "Access forbidden. You can only modify your own conversations."}, + ) # Activate the memory version using Beanie model method success = conversation_model.set_active_memory_version(version_id) @@ -847,12 +933,16 @@ async def activate_memory_version(conversation_id: str, version_id: str, user: U await conversation_model.save() - logger.info(f"Activated memory version {version_id} for conversation {conversation_id} by user {user.user_id}") + logger.info( + f"Activated memory version {version_id} for conversation {conversation_id} by user {user.user_id}" + ) - return JSONResponse(content={ - "message": f"Memory version {version_id} activated successfully", - "active_memory_version": version_id - }) + return JSONResponse( + content={ + "message": f"Memory version {version_id} activated successfully", + "active_memory_version": version_id, + } + ) except Exception as e: logger.error(f"Error activating memory version: {e}") @@ -863,28 +953,33 @@ async def get_conversation_version_history(conversation_id: str, user: User): """Get version history for a conversation. Users can only access their own conversations.""" try: # Find the conversation using Beanie to check ownership - conversation_model = await Conversation.find_one(Conversation.conversation_id == conversation_id) + conversation_model = await Conversation.find_one( + Conversation.conversation_id == conversation_id + ) if not conversation_model: return JSONResponse(status_code=404, content={"error": "Conversation not found"}) # Check ownership for non-admin users if not user.is_superuser and conversation_model.user_id != str(user.user_id): - return JSONResponse(status_code=403, content={"error": "Access forbidden. You can only access your own conversations."}) + return JSONResponse( + status_code=403, + content={"error": "Access forbidden. You can only access your own conversations."}, + ) # Get version history from model # Convert datetime objects to ISO strings for JSON serialization transcript_versions = [] for v in conversation_model.transcript_versions: version_dict = v.model_dump() - if version_dict.get('created_at'): - version_dict['created_at'] = version_dict['created_at'].isoformat() + if version_dict.get("created_at"): + version_dict["created_at"] = version_dict["created_at"].isoformat() transcript_versions.append(version_dict) memory_versions = [] for v in conversation_model.memory_versions: version_dict = v.model_dump() - if version_dict.get('created_at'): - version_dict['created_at'] = version_dict['created_at'].isoformat() + if version_dict.get("created_at"): + version_dict["created_at"] = version_dict["created_at"].isoformat() memory_versions.append(version_dict) history = { @@ -892,7 +987,7 @@ async def get_conversation_version_history(conversation_id: str, user: User): "active_transcript_version": conversation_model.active_transcript_version, "active_memory_version": conversation_model.active_memory_version, "transcript_versions": transcript_versions, - "memory_versions": memory_versions + "memory_versions": memory_versions, } return JSONResponse(content=history) diff --git a/backends/advanced/src/advanced_omi_backend/controllers/queue_controller.py b/backends/advanced/src/advanced_omi_backend/controllers/queue_controller.py index c3fef8fa..76075d62 100644 --- a/backends/advanced/src/advanced_omi_backend/controllers/queue_controller.py +++ b/backends/advanced/src/advanced_omi_backend/controllers/queue_controller.py @@ -9,20 +9,20 @@ """ import asyncio -import os import logging +import os import uuid from datetime import datetime -from typing import Dict, Any, Optional +from typing import Any, Dict, Optional import redis from rq import Queue, Worker from rq.job import Job, JobStatus -from rq.registry import ScheduledJobRegistry, DeferredJobRegistry +from rq.registry import DeferredJobRegistry, ScheduledJobRegistry -from advanced_omi_backend.models.job import JobPriority -from advanced_omi_backend.models.conversation import Conversation from advanced_omi_backend.config_loader import get_service_config +from advanced_omi_backend.models.conversation import Conversation +from advanced_omi_backend.models.job import JobPriority logger = logging.getLogger(__name__) @@ -346,8 +346,10 @@ def start_streaming_jobs( - user_email is fetched from the database when needed. - always_persist setting is read from global config by the audio persistence job. """ - from advanced_omi_backend.workers.transcription_jobs import stream_speech_detection_job from advanced_omi_backend.workers.audio_jobs import audio_streaming_persistence_job + from advanced_omi_backend.workers.transcription_jobs import ( + stream_speech_detection_job, + ) # Enqueue speech detection job speech_job = transcription_queue.enqueue( @@ -444,9 +446,12 @@ def start_post_conversation_jobs( Returns: Dict with job IDs for speaker_recognition, memory, title_summary, event_dispatch """ - from advanced_omi_backend.workers.speaker_jobs import recognise_speakers_job + from advanced_omi_backend.workers.conversation_jobs import ( + dispatch_conversation_complete_event_job, + generate_title_summary_job, + ) from advanced_omi_backend.workers.memory_jobs import process_memory_job - from advanced_omi_backend.workers.conversation_jobs import generate_title_summary_job, dispatch_conversation_complete_event_job + from advanced_omi_backend.workers.speaker_jobs import recognise_speakers_job version_id = transcript_version_id or str(uuid.uuid4()) @@ -643,6 +648,7 @@ def get_queue_health() -> Dict[str, Any]: async def cleanup_stuck_stream_workers(request): """Clean up stuck Redis Stream consumers and pending messages from all active streams.""" import time + from fastapi.responses import JSONResponse try: diff --git a/backends/advanced/src/advanced_omi_backend/controllers/session_controller.py b/backends/advanced/src/advanced_omi_backend/controllers/session_controller.py index fe9b87cd..7d7d5f2e 100644 --- a/backends/advanced/src/advanced_omi_backend/controllers/session_controller.py +++ b/backends/advanced/src/advanced_omi_backend/controllers/session_controller.py @@ -9,7 +9,7 @@ import logging import time -from typing import Dict, List, Optional, Literal +from typing import Dict, List, Literal, Optional from fastapi.responses import JSONResponse @@ -197,10 +197,10 @@ async def increment_session_conversation_count(redis_client, session_id: str) -> async def get_streaming_status(request): """Get status of active streaming sessions and Redis Streams health.""" from advanced_omi_backend.controllers.queue_controller import ( - transcription_queue, - memory_queue, + all_jobs_complete_for_client, default_queue, - all_jobs_complete_for_client + memory_queue, + transcription_queue, ) try: @@ -497,6 +497,7 @@ async def get_streaming_status(request): async def cleanup_old_sessions(request, max_age_seconds: int = 3600): """Clean up old session tracking metadata and old audio streams from Redis.""" import time + from fastapi.responses import JSONResponse try: diff --git a/backends/advanced/src/advanced_omi_backend/controllers/system_controller.py b/backends/advanced/src/advanced_omi_backend/controllers/system_controller.py index 55a4b43e..53e8ff95 100644 --- a/backends/advanced/src/advanced_omi_backend/controllers/system_controller.py +++ b/backends/advanced/src/advanced_omi_backend/controllers/system_controller.py @@ -15,11 +15,11 @@ from advanced_omi_backend.config import ( get_diarization_settings as load_diarization_settings, - get_misc_settings as load_misc_settings, - save_misc_settings, ) +from advanced_omi_backend.config import get_misc_settings as load_misc_settings from advanced_omi_backend.config import ( save_diarization_settings, + save_misc_settings, ) from advanced_omi_backend.config_loader import get_plugins_yml_path from advanced_omi_backend.model_registry import _find_config_path, load_models_config @@ -47,7 +47,7 @@ async def get_config_diagnostics(): # Test OmegaConf configuration loading try: from advanced_omi_backend.config_loader import load_config - + # Capture warnings during config load with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") @@ -1102,8 +1102,8 @@ async def update_plugin_config_structured(plugin_id: str, config: dict) -> dict: Success message with list of updated files """ try: - from advanced_omi_backend.services.plugin_service import discover_plugins import advanced_omi_backend.plugins + from advanced_omi_backend.services.plugin_service import discover_plugins # Validate plugin exists discovered_plugins = discover_plugins() @@ -1254,7 +1254,10 @@ async def test_plugin_connection(plugin_id: str, config: dict) -> dict: Test result with success status and details """ try: - from advanced_omi_backend.services.plugin_service import discover_plugins, expand_env_vars + from advanced_omi_backend.services.plugin_service import ( + discover_plugins, + expand_env_vars, + ) # Validate plugin exists discovered_plugins = discover_plugins() diff --git a/backends/advanced/src/advanced_omi_backend/controllers/user_controller.py b/backends/advanced/src/advanced_omi_backend/controllers/user_controller.py index a1b9c140..ce801327 100644 --- a/backends/advanced/src/advanced_omi_backend/controllers/user_controller.py +++ b/backends/advanced/src/advanced_omi_backend/controllers/user_controller.py @@ -11,13 +11,13 @@ from advanced_omi_backend.auth import ( ADMIN_EMAIL, - get_user_db, UserManager, + get_user_db, ) from advanced_omi_backend.client_manager import get_user_clients_all from advanced_omi_backend.database import db, users_col -from advanced_omi_backend.services.memory import get_memory_service from advanced_omi_backend.models.conversation import Conversation +from advanced_omi_backend.services.memory import get_memory_service from advanced_omi_backend.users import User, UserCreate, UserUpdate logger = logging.getLogger(__name__) diff --git a/backends/advanced/src/advanced_omi_backend/controllers/websocket_controller.py b/backends/advanced/src/advanced_omi_backend/controllers/websocket_controller.py index 89e5b46f..fcf80de4 100644 --- a/backends/advanced/src/advanced_omi_backend/controllers/websocket_controller.py +++ b/backends/advanced/src/advanced_omi_backend/controllers/websocket_controller.py @@ -15,18 +15,24 @@ from functools import partial from typing import Optional -from fastapi import WebSocket, WebSocketDisconnect, Query -from starlette.websockets import WebSocketState -from friend_lite.decoder import OmiOpusDecoder import redis.asyncio as redis +from fastapi import Query, WebSocket, WebSocketDisconnect +from friend_lite.decoder import OmiOpusDecoder +from starlette.websockets import WebSocketState from advanced_omi_backend.auth import websocket_auth from advanced_omi_backend.client_manager import generate_client_id, get_client_manager -from advanced_omi_backend.constants import OMI_CHANNELS, OMI_SAMPLE_RATE, OMI_SAMPLE_WIDTH +from advanced_omi_backend.constants import ( + OMI_CHANNELS, + OMI_SAMPLE_RATE, + OMI_SAMPLE_WIDTH, +) from advanced_omi_backend.controllers.session_controller import mark_session_complete -from advanced_omi_backend.utils.audio_utils import process_audio_chunk from advanced_omi_backend.services.audio_stream import AudioStreamProducer -from advanced_omi_backend.services.audio_stream.producer import get_audio_stream_producer +from advanced_omi_backend.services.audio_stream.producer import ( + get_audio_stream_producer, +) +from advanced_omi_backend.utils.audio_utils import process_audio_chunk # Thread pool executors for audio decoding _DEC_IO_EXECUTOR = concurrent.futures.ThreadPoolExecutor( @@ -227,7 +233,9 @@ async def cleanup_client_state(client_id: str): async_redis = redis.from_url(redis_url, decode_responses=False) # Get audio stream producer for finalization - from advanced_omi_backend.services.audio_stream.producer import get_audio_stream_producer + from advanced_omi_backend.services.audio_stream.producer import ( + get_audio_stream_producer, + ) audio_stream_producer = get_audio_stream_producer() # Find all session keys for this client and mark them complete @@ -442,8 +450,11 @@ async def _initialize_streaming_session( ) # Store audio format in Redis session (not in ClientState) - from advanced_omi_backend.services.audio_stream.producer import get_audio_stream_producer import json + + from advanced_omi_backend.services.audio_stream.producer import ( + get_audio_stream_producer, + ) session_key = f"audio:session:{client_state.stream_session_id}" redis_client = audio_stream_producer.redis_client await redis_client.hset(session_key, "audio_format", json.dumps(audio_format)) @@ -995,10 +1006,12 @@ async def _process_rolling_batch( # Enqueue transcription job from advanced_omi_backend.controllers.queue_controller import ( + JOB_RESULT_TTL, transcription_queue, - JOB_RESULT_TTL ) - from advanced_omi_backend.workers.transcription_jobs import transcribe_full_audio_job + from advanced_omi_backend.workers.transcription_jobs import ( + transcribe_full_audio_job, + ) version_id = str(uuid.uuid4()) transcribe_job_id = f"transcribe_rolling_{conversation_id[:12]}_{batch_number}" @@ -1108,11 +1121,13 @@ async def _process_batch_audio_complete( # Enqueue batch transcription job first (file uploads need transcription) from advanced_omi_backend.controllers.queue_controller import ( + JOB_RESULT_TTL, start_post_conversation_jobs, transcription_queue, - JOB_RESULT_TTL ) - from advanced_omi_backend.workers.transcription_jobs import transcribe_full_audio_job + from advanced_omi_backend.workers.transcription_jobs import ( + transcribe_full_audio_job, + ) version_id = str(uuid.uuid4()) transcribe_job_id = f"transcribe_{conversation_id[:12]}" diff --git a/backends/advanced/src/advanced_omi_backend/llm_client.py b/backends/advanced/src/advanced_omi_backend/llm_client.py index e6b5a14d..417eed5f 100644 --- a/backends/advanced/src/advanced_omi_backend/llm_client.py +++ b/backends/advanced/src/advanced_omi_backend/llm_client.py @@ -9,12 +9,13 @@ import logging import os from abc import ABC, abstractmethod -from typing import Dict, Any, Optional - -from advanced_omi_backend.services.memory.config import load_config_yml as _load_root_config -from advanced_omi_backend.services.memory.config import resolve_value as _resolve_value +from typing import Any, Dict, Optional from advanced_omi_backend.model_registry import get_models_registry +from advanced_omi_backend.services.memory.config import ( + load_config_yml as _load_root_config, +) +from advanced_omi_backend.services.memory.config import resolve_value as _resolve_value logger = logging.getLogger(__name__) diff --git a/backends/advanced/src/advanced_omi_backend/model_registry.py b/backends/advanced/src/advanced_omi_backend/model_registry.py index 53bc912e..5e88d91a 100644 --- a/backends/advanced/src/advanced_omi_backend/model_registry.py +++ b/backends/advanced/src/advanced_omi_backend/model_registry.py @@ -9,12 +9,19 @@ from __future__ import annotations -import yaml +import logging from pathlib import Path from typing import Any, Dict, List, Optional -import logging -from pydantic import BaseModel, Field, field_validator, model_validator, ConfigDict, ValidationError +import yaml +from pydantic import ( + BaseModel, + ConfigDict, + Field, + ValidationError, + field_validator, + model_validator, +) # Import config merging for defaults.yml + config.yml integration # OmegaConf handles environment variable resolution (${VAR:-default} syntax) diff --git a/backends/advanced/src/advanced_omi_backend/models/annotation.py b/backends/advanced/src/advanced_omi_backend/models/annotation.py index b2a986a5..ac8ceefe 100644 --- a/backends/advanced/src/advanced_omi_backend/models/annotation.py +++ b/backends/advanced/src/advanced_omi_backend/models/annotation.py @@ -5,10 +5,10 @@ Enables both user edits and AI-powered suggestions. """ +import uuid +from datetime import datetime, timezone from enum import Enum from typing import Optional -from datetime import datetime, timezone -import uuid from beanie import Document, Indexed from pydantic import BaseModel, Field diff --git a/backends/advanced/src/advanced_omi_backend/models/audio_chunk.py b/backends/advanced/src/advanced_omi_backend/models/audio_chunk.py index cea20ef7..5f3b4c1d 100644 --- a/backends/advanced/src/advanced_omi_backend/models/audio_chunk.py +++ b/backends/advanced/src/advanced_omi_backend/models/audio_chunk.py @@ -8,9 +8,10 @@ from datetime import datetime from typing import Optional -from pydantic import ConfigDict, Field, field_serializer + from beanie import Document, Indexed from bson import Binary +from pydantic import ConfigDict, Field, field_serializer class AudioChunkDocument(Document): diff --git a/backends/advanced/src/advanced_omi_backend/models/conversation.py b/backends/advanced/src/advanced_omi_backend/models/conversation.py index fd97b14a..2ec45f33 100644 --- a/backends/advanced/src/advanced_omi_backend/models/conversation.py +++ b/backends/advanced/src/advanced_omi_backend/models/conversation.py @@ -5,13 +5,13 @@ transcript versions, and memory versions. """ +import uuid from datetime import datetime -from typing import Dict, List, Optional, Any, Union -from pydantic import BaseModel, Field, model_validator, computed_field, field_validator from enum import Enum -import uuid +from typing import Any, Dict, List, Optional, Union from beanie import Document, Indexed +from pydantic import BaseModel, Field, computed_field, field_validator, model_validator from pymongo import IndexModel @@ -56,6 +56,7 @@ class SpeakerSegment(BaseModel): end: float = Field(description="End time in seconds") text: str = Field(description="Transcript text for this segment") speaker: str = Field(description="Speaker identifier") + identified_as: Optional[str] = Field(None, description="Speaker name from speaker recognition (None if not identified)") confidence: Optional[float] = Field(None, description="Confidence score (0-1)") words: List["Conversation.Word"] = Field(default_factory=list, description="Word-level timestamps for this segment") diff --git a/backends/advanced/src/advanced_omi_backend/models/job.py b/backends/advanced/src/advanced_omi_backend/models/job.py index 5d906865..a3d93f96 100644 --- a/backends/advanced/src/advanced_omi_backend/models/job.py +++ b/backends/advanced/src/advanced_omi_backend/models/job.py @@ -13,8 +13,8 @@ from abc import ABC, abstractmethod from datetime import datetime, timezone from enum import Enum -from typing import Any, Dict, Optional, Callable from functools import wraps +from typing import Any, Callable, Dict, Optional import redis.asyncio as redis_async @@ -32,13 +32,15 @@ async def _ensure_beanie_initialized(): return try: import os - from motor.motor_asyncio import AsyncIOMotorClient + from beanie import init_beanie - from advanced_omi_backend.models.conversation import Conversation + from motor.motor_asyncio import AsyncIOMotorClient + from pymongo.errors import ConfigurationError + from advanced_omi_backend.models.audio_chunk import AudioChunkDocument + from advanced_omi_backend.models.conversation import Conversation from advanced_omi_backend.models.user import User from advanced_omi_backend.models.waveform import WaveformData - from pymongo.errors import ConfigurationError # Get MongoDB URI from environment mongodb_uri = os.getenv("MONGODB_URI", "mongodb://localhost:27017") @@ -254,7 +256,9 @@ async def process(): # Create Redis client if requested if redis: - from advanced_omi_backend.controllers.queue_controller import REDIS_URL + from advanced_omi_backend.controllers.queue_controller import ( + REDIS_URL, + ) redis_client = redis_async.from_url(REDIS_URL) kwargs['redis_client'] = redis_client logger.debug(f"Redis client created") diff --git a/backends/advanced/src/advanced_omi_backend/plugins/base.py b/backends/advanced/src/advanced_omi_backend/plugins/base.py index dbd13301..fefcc6a0 100644 --- a/backends/advanced/src/advanced_omi_backend/plugins/base.py +++ b/backends/advanced/src/advanced_omi_backend/plugins/base.py @@ -7,8 +7,8 @@ - BasePlugin: Abstract base class for all plugins """ from abc import ABC, abstractmethod -from typing import Optional, Dict, Any, List from dataclasses import dataclass, field +from typing import Any, Dict, List, Optional @dataclass diff --git a/backends/advanced/src/advanced_omi_backend/plugins/email_summarizer/setup.py b/backends/advanced/src/advanced_omi_backend/plugins/email_summarizer/setup.py index 728ae607..df179e26 100755 --- a/backends/advanced/src/advanced_omi_backend/plugins/email_summarizer/setup.py +++ b/backends/advanced/src/advanced_omi_backend/plugins/email_summarizer/setup.py @@ -23,10 +23,7 @@ project_root = Path(__file__).resolve().parents[6] sys.path.insert(0, str(project_root)) -from setup_utils import ( - prompt_with_existing_masked, - prompt_value -) +from setup_utils import prompt_value, prompt_with_existing_masked console = Console() diff --git a/backends/advanced/src/advanced_omi_backend/plugins/homeassistant/entity_cache.py b/backends/advanced/src/advanced_omi_backend/plugins/homeassistant/entity_cache.py index e8624f1b..90c952e5 100644 --- a/backends/advanced/src/advanced_omi_backend/plugins/homeassistant/entity_cache.py +++ b/backends/advanced/src/advanced_omi_backend/plugins/homeassistant/entity_cache.py @@ -4,10 +4,10 @@ This module provides caching and lookup functionality for Home Assistant areas and entities. """ +import logging from dataclasses import dataclass, field from datetime import datetime from typing import Dict, List, Optional -import logging logger = logging.getLogger(__name__) diff --git a/backends/advanced/src/advanced_omi_backend/plugins/test_event/plugin.py b/backends/advanced/src/advanced_omi_backend/plugins/test_event/plugin.py index 59dd652e..d75a88df 100644 --- a/backends/advanced/src/advanced_omi_backend/plugins/test_event/plugin.py +++ b/backends/advanced/src/advanced_omi_backend/plugins/test_event/plugin.py @@ -8,6 +8,7 @@ from typing import Any, Dict, List, Optional from advanced_omi_backend.plugins.base import BasePlugin, PluginContext, PluginResult + from .event_storage import EventStorage logger = logging.getLogger(__name__) diff --git a/backends/advanced/src/advanced_omi_backend/routers/modules/admin_routes.py b/backends/advanced/src/advanced_omi_backend/routers/modules/admin_routes.py index a2ef4398..49594dd0 100644 --- a/backends/advanced/src/advanced_omi_backend/routers/modules/admin_routes.py +++ b/backends/advanced/src/advanced_omi_backend/routers/modules/admin_routes.py @@ -7,7 +7,7 @@ import logging from typing import Optional -from fastapi import APIRouter, Depends, Query, HTTPException +from fastapi import APIRouter, Depends, HTTPException, Query from fastapi.responses import JSONResponse from advanced_omi_backend.auth import current_active_user @@ -50,8 +50,10 @@ async def trigger_cleanup( ): """Manually trigger cleanup of soft-deleted conversations (admin only).""" try: - from advanced_omi_backend.workers.cleanup_jobs import purge_old_deleted_conversations from advanced_omi_backend.controllers.queue_controller import get_queue + from advanced_omi_backend.workers.cleanup_jobs import ( + purge_old_deleted_conversations, + ) # Enqueue cleanup job queue = get_queue("default") @@ -90,9 +92,10 @@ async def preview_cleanup( ): """Preview what would be deleted by cleanup (admin only).""" try: + from datetime import datetime, timedelta + from advanced_omi_backend.config import get_cleanup_settings from advanced_omi_backend.models.conversation import Conversation - from datetime import datetime, timedelta # Use provided retention or default from config if retention_days is None: diff --git a/backends/advanced/src/advanced_omi_backend/routers/modules/annotation_routes.py b/backends/advanced/src/advanced_omi_backend/routers/modules/annotation_routes.py index ebee7634..f85a99ed 100644 --- a/backends/advanced/src/advanced_omi_backend/routers/modules/annotation_routes.py +++ b/backends/advanced/src/advanced_omi_backend/routers/modules/annotation_routes.py @@ -80,15 +80,11 @@ async def create_memory_annotation( content=annotation_data.corrected_text, user_id=current_user.user_id, ) - logger.info( - f"Updated memory {annotation_data.memory_id} with corrected text" - ) + logger.info(f"Updated memory {annotation_data.memory_id} with corrected text") except Exception as e: logger.error(f"Error updating memory: {e}") # Annotation is saved, but memory update failed - log but don't fail the request - logger.warning( - f"Memory annotation {annotation.id} saved but memory update failed" - ) + logger.warning(f"Memory annotation {annotation.id} saved but memory update failed") return AnnotationResponse.model_validate(annotation) @@ -127,9 +123,8 @@ async def create_transcript_annotation( # Validate segment index active_transcript = conversation.active_transcript - if ( - not active_transcript - or annotation_data.segment_index >= len(active_transcript.segments) + if not active_transcript or annotation_data.segment_index >= len( + active_transcript.segments ): raise HTTPException(status_code=400, detail="Invalid segment index") @@ -237,10 +232,7 @@ async def update_annotation_status( annotation.updated_at = datetime.now(timezone.utc) # If accepting a pending suggestion, apply the correction - if ( - status == AnnotationStatus.ACCEPTED - and old_status == AnnotationStatus.PENDING - ): + if status == AnnotationStatus.ACCEPTED and old_status == AnnotationStatus.PENDING: if annotation.is_memory_annotation(): # Update memory try: @@ -250,9 +242,7 @@ async def update_annotation_status( content=annotation.corrected_text, user_id=current_user.user_id, ) - logger.info( - f"Applied suggestion to memory {annotation.memory_id}" - ) + logger.info(f"Applied suggestion to memory {annotation.memory_id}") except Exception as e: logger.error(f"Error applying memory suggestion: {e}") # Don't fail the status update if memory update fails @@ -261,17 +251,14 @@ async def update_annotation_status( try: conversation = await Conversation.find_one( Conversation.conversation_id == annotation.conversation_id, - Conversation.user_id == annotation.user_id + Conversation.user_id == annotation.user_id, ) if conversation: transcript = conversation.active_transcript - if ( - transcript - and annotation.segment_index < len(transcript.segments) - ): - transcript.segments[ - annotation.segment_index - ].text = annotation.corrected_text + if transcript and annotation.segment_index < len(transcript.segments): + transcript.segments[annotation.segment_index].text = ( + annotation.corrected_text + ) await conversation.save() logger.info( f"Applied suggestion to transcript segment {annotation.segment_index}" @@ -322,9 +309,8 @@ async def create_diarization_annotation( # Validate segment index active_transcript = conversation.active_transcript - if ( - not active_transcript - or annotation_data.segment_index >= len(active_transcript.segments) + if not active_transcript or annotation_data.segment_index >= len( + active_transcript.segments ): raise HTTPException(status_code=400, detail="Invalid segment index") @@ -425,6 +411,7 @@ async def apply_diarization_annotations( # Create NEW transcript version with corrected speakers import uuid + new_version_id = str(uuid.uuid4()) # Copy segments and apply corrections @@ -479,19 +466,18 @@ async def apply_diarization_annotations( from advanced_omi_backend.workers.memory_jobs import enqueue_memory_processing enqueue_memory_processing( - client_id=conversation.client_id, - user_id=current_user.user_id, - user_email=current_user.email, conversation_id=conversation_id, priority=JobPriority.NORMAL, ) - return JSONResponse(content={ - "message": "Diarization annotations applied", - "version_id": new_version_id, - "applied_count": len(annotations), - "status": "success" - }) + return JSONResponse( + content={ + "message": "Diarization annotations applied", + "version_id": new_version_id, + "applied_count": len(annotations), + "status": "success", + } + ) except HTTPException: raise @@ -533,15 +519,21 @@ async def apply_all_annotations( ).to_list() if not annotations: - return JSONResponse(content={ - "message": "No pending annotations to apply", - "diarization_count": 0, - "transcript_count": 0, - }) + return JSONResponse( + content={ + "message": "No pending annotations to apply", + "diarization_count": 0, + "transcript_count": 0, + } + ) # Separate by type - diarization_annotations = [a for a in annotations if a.annotation_type == AnnotationType.DIARIZATION] - transcript_annotations = [a for a in annotations if a.annotation_type == AnnotationType.TRANSCRIPT] + diarization_annotations = [ + a for a in annotations if a.annotation_type == AnnotationType.DIARIZATION + ] + transcript_annotations = [ + a for a in annotations if a.annotation_type == AnnotationType.TRANSCRIPT + ] # Get active transcript active_transcript = conversation.active_transcript @@ -550,6 +542,7 @@ async def apply_all_annotations( # Create new version with ALL corrections applied import uuid + new_version_id = str(uuid.uuid4()) corrected_segments = [] @@ -558,16 +551,14 @@ async def apply_all_annotations( # Apply diarization correction (if exists) diar_annotation = next( - (a for a in diarization_annotations if a.segment_index == segment_idx), - None + (a for a in diarization_annotations if a.segment_index == segment_idx), None ) if diar_annotation: corrected_segment.speaker = diar_annotation.corrected_speaker # Apply transcript correction (if exists) transcript_annotation = next( - (a for a in transcript_annotations if a.segment_index == segment_idx), - None + (a for a in transcript_annotations if a.segment_index == segment_idx), None ) if transcript_annotation: corrected_segment.text = transcript_annotation.corrected_text @@ -610,20 +601,19 @@ async def apply_all_annotations( from advanced_omi_backend.workers.memory_jobs import enqueue_memory_processing enqueue_memory_processing( - client_id=conversation.client_id, - user_id=current_user.user_id, - user_email=current_user.email, conversation_id=conversation_id, priority=JobPriority.NORMAL, ) - return JSONResponse(content={ - "message": f"Applied {len(diarization_annotations)} diarization and {len(transcript_annotations)} transcript annotations", - "version_id": new_version_id, - "diarization_count": len(diarization_annotations), - "transcript_count": len(transcript_annotations), - "status": "success", - }) + return JSONResponse( + content={ + "message": f"Applied {len(diarization_annotations)} diarization and {len(transcript_annotations)} transcript annotations", + "version_id": new_version_id, + "diarization_count": len(diarization_annotations), + "transcript_count": len(transcript_annotations), + "status": "success", + } + ) except HTTPException: raise diff --git a/backends/advanced/src/advanced_omi_backend/routers/modules/audio_routes.py b/backends/advanced/src/advanced_omi_backend/routers/modules/audio_routes.py index 7cef955a..94a57e1e 100644 --- a/backends/advanced/src/advanced_omi_backend/routers/modules/audio_routes.py +++ b/backends/advanced/src/advanced_omi_backend/routers/modules/audio_routes.py @@ -7,20 +7,28 @@ import io from typing import Optional -from fastapi import APIRouter, Depends, File, HTTPException, Query, UploadFile, Request -from fastapi.responses import FileResponse, StreamingResponse, Response -from advanced_omi_backend.auth import current_superuser, current_active_user_optional, get_user_from_token_param +from fastapi import APIRouter, Depends, File, HTTPException, Query, Request, UploadFile +from fastapi.responses import FileResponse, Response, StreamingResponse + +from advanced_omi_backend.app_config import get_audio_chunk_dir +from advanced_omi_backend.auth import ( + current_active_user_optional, + current_superuser, + get_user_from_token_param, +) from advanced_omi_backend.controllers import audio_controller -from advanced_omi_backend.models.user import User from advanced_omi_backend.models.conversation import Conversation -from advanced_omi_backend.app_config import get_audio_chunk_dir -from advanced_omi_backend.utils.gdrive_audio_utils import download_audio_files_from_drive, AudioValidationError +from advanced_omi_backend.models.user import User from advanced_omi_backend.utils.audio_chunk_utils import ( + build_wav_from_pcm, + concatenate_chunks_to_pcm, reconstruct_wav_from_conversation, retrieve_audio_chunks, - concatenate_chunks_to_pcm, - build_wav_from_pcm, +) +from advanced_omi_backend.utils.gdrive_audio_utils import ( + AudioValidationError, + download_audio_files_from_drive, ) router = APIRouter(prefix="/audio", tags=["audio"]) diff --git a/backends/advanced/src/advanced_omi_backend/routers/modules/health_routes.py b/backends/advanced/src/advanced_omi_backend/routers/modules/health_routes.py index 96ee72fe..cd11c6ff 100644 --- a/backends/advanced/src/advanced_omi_backend/routers/modules/health_routes.py +++ b/backends/advanced/src/advanced_omi_backend/routers/modules/health_routes.py @@ -8,19 +8,19 @@ import logging import os import time -from typing import Dict, Any +from typing import Any, Dict import aiohttp -from fastapi import APIRouter, Request, HTTPException +from fastapi import APIRouter, HTTPException, Request from fastapi.responses import JSONResponse from motor.motor_asyncio import AsyncIOMotorClient -from advanced_omi_backend.controllers.queue_controller import redis_conn from advanced_omi_backend.client_manager import get_client_manager +from advanced_omi_backend.controllers.queue_controller import redis_conn from advanced_omi_backend.llm_client import async_health_check +from advanced_omi_backend.model_registry import get_models_registry from advanced_omi_backend.services.memory import get_memory_service from advanced_omi_backend.services.transcription import get_transcription_provider -from advanced_omi_backend.model_registry import get_models_registry # Create router router = APIRouter(tags=["health"]) diff --git a/backends/advanced/src/advanced_omi_backend/routers/modules/memory_routes.py b/backends/advanced/src/advanced_omi_backend/routers/modules/memory_routes.py index 185f55ec..409f7b85 100644 --- a/backends/advanced/src/advanced_omi_backend/routers/modules/memory_routes.py +++ b/backends/advanced/src/advanced_omi_backend/routers/modules/memory_routes.py @@ -7,7 +7,7 @@ import logging from typing import Optional -from fastapi import APIRouter, Depends, Query, Body +from fastapi import APIRouter, Body, Depends, Query from pydantic import BaseModel from advanced_omi_backend.auth import current_active_user, current_superuser diff --git a/backends/advanced/src/advanced_omi_backend/routers/modules/obsidian_routes.py b/backends/advanced/src/advanced_omi_backend/routers/modules/obsidian_routes.py index f6a46a38..b02ed426 100644 --- a/backends/advanced/src/advanced_omi_backend/routers/modules/obsidian_routes.py +++ b/backends/advanced/src/advanced_omi_backend/routers/modules/obsidian_routes.py @@ -1,21 +1,21 @@ +import json import logging import os import uuid -import json +import zipfile from pathlib import Path -from fastapi import APIRouter, Depends, HTTPException, UploadFile, File, Body +from fastapi import APIRouter, Body, Depends, File, HTTPException, UploadFile +from pydantic import BaseModel from rq.exceptions import NoSuchJobError from rq.job import Job -from pydantic import BaseModel -import zipfile from advanced_omi_backend.auth import current_active_user, current_superuser from advanced_omi_backend.controllers.queue_controller import default_queue, redis_conn -from advanced_omi_backend.users import User from advanced_omi_backend.services.obsidian_service import obsidian_service -from advanced_omi_backend.utils.file_utils import extract_zip, ZipExtractionError +from advanced_omi_backend.users import User +from advanced_omi_backend.utils.file_utils import ZipExtractionError, extract_zip from advanced_omi_backend.workers.obsidian_jobs import ( count_markdown_files, ingest_obsidian_vault_job, diff --git a/backends/advanced/src/advanced_omi_backend/routers/modules/queue_routes.py b/backends/advanced/src/advanced_omi_backend/routers/modules/queue_routes.py index 51c07097..934cf0b1 100644 --- a/backends/advanced/src/advanced_omi_backend/routers/modules/queue_routes.py +++ b/backends/advanced/src/advanced_omi_backend/routers/modules/queue_routes.py @@ -4,15 +4,22 @@ """ import logging -from fastapi import APIRouter, Depends, Query, HTTPException, Request -from pydantic import BaseModel from typing import List, Optional +import redis.asyncio as aioredis +from fastapi import APIRouter, Depends, HTTPException, Query, Request +from pydantic import BaseModel +from rq.job import Job + from advanced_omi_backend.auth import current_active_user -from advanced_omi_backend.controllers.queue_controller import get_jobs, get_job_stats, redis_conn, QUEUE_NAMES, get_job_status_from_rq +from advanced_omi_backend.controllers.queue_controller import ( + QUEUE_NAMES, + get_job_stats, + get_job_status_from_rq, + get_jobs, + redis_conn, +) from advanced_omi_backend.users import User -from rq.job import Job -import redis.asyncio as aioredis logger = logging.getLogger(__name__) router = APIRouter(prefix="/queue", tags=["queue"]) @@ -187,7 +194,15 @@ async def get_jobs_by_client( ): """Get all jobs associated with a specific client device.""" try: - from rq.registry import FinishedJobRegistry, FailedJobRegistry, StartedJobRegistry, CanceledJobRegistry, DeferredJobRegistry, ScheduledJobRegistry + from rq.registry import ( + CanceledJobRegistry, + DeferredJobRegistry, + FailedJobRegistry, + FinishedJobRegistry, + ScheduledJobRegistry, + StartedJobRegistry, + ) + from advanced_omi_backend.controllers.queue_controller import get_queue from advanced_omi_backend.models.conversation import Conversation @@ -326,10 +341,10 @@ async def get_queue_worker_details( ): """Get detailed queue and worker status including task manager health.""" try: - from advanced_omi_backend.controllers.queue_controller import get_queue_health - from advanced_omi_backend.task_manager import get_task_manager import time + from advanced_omi_backend.controllers.queue_controller import get_queue_health + # Get queue health directly queue_health = get_queue_health() @@ -488,7 +503,13 @@ async def flush_jobs( try: from datetime import datetime, timedelta, timezone - from rq.registry import FinishedJobRegistry, FailedJobRegistry, CanceledJobRegistry + + from rq.registry import ( + CanceledJobRegistry, + FailedJobRegistry, + FinishedJobRegistry, + ) + from advanced_omi_backend.controllers.queue_controller import get_queue cutoff_time = datetime.now(timezone.utc) - timedelta(hours=request.older_than_hours) @@ -563,13 +584,14 @@ async def flush_all_jobs( try: from rq.registry import ( - FinishedJobRegistry, - FailedJobRegistry, CanceledJobRegistry, - StartedJobRegistry, DeferredJobRegistry, - ScheduledJobRegistry + FailedJobRegistry, + FinishedJobRegistry, + ScheduledJobRegistry, + StartedJobRegistry, ) + from advanced_omi_backend.controllers.queue_controller import get_queue total_removed = 0 @@ -705,6 +727,7 @@ async def get_redis_sessions( """Get Redis session tracking information.""" try: import redis.asyncio as aioredis + from advanced_omi_backend.controllers.queue_controller import REDIS_URL redis_client = aioredis.from_url(REDIS_URL) @@ -769,8 +792,10 @@ async def clear_old_sessions( raise HTTPException(status_code=403, detail="Admin access required") try: - import redis.asyncio as aioredis import time + + import redis.asyncio as aioredis + from advanced_omi_backend.controllers.queue_controller import REDIS_URL redis_client = aioredis.from_url(REDIS_URL) @@ -827,8 +852,13 @@ async def get_dashboard_data( - Client jobs for expanded clients """ try: + from rq.registry import ( + FailedJobRegistry, + FinishedJobRegistry, + StartedJobRegistry, + ) + from advanced_omi_backend.controllers import system_controller - from rq.registry import FinishedJobRegistry, FailedJobRegistry, StartedJobRegistry from advanced_omi_backend.controllers.queue_controller import get_queue # Parse expanded clients list @@ -912,6 +942,7 @@ async def fetch_streaming_status(): try: # Import session_controller for streaming status from advanced_omi_backend.controllers import session_controller + # Use the actual request object from the parent function return await session_controller.get_streaming_status(request) except Exception as e: @@ -943,8 +974,12 @@ def get_job_status(job): # Check all registries from rq.registry import ( - FinishedJobRegistry, FailedJobRegistry, StartedJobRegistry, - CanceledJobRegistry, DeferredJobRegistry, ScheduledJobRegistry + CanceledJobRegistry, + DeferredJobRegistry, + FailedJobRegistry, + FinishedJobRegistry, + ScheduledJobRegistry, + StartedJobRegistry, ) registries = [ diff --git a/backends/advanced/src/advanced_omi_backend/routers/modules/test_routes.py b/backends/advanced/src/advanced_omi_backend/routers/modules/test_routes.py index 6255b6d6..349fe33d 100644 --- a/backends/advanced/src/advanced_omi_backend/routers/modules/test_routes.py +++ b/backends/advanced/src/advanced_omi_backend/routers/modules/test_routes.py @@ -7,6 +7,7 @@ import logging from typing import Optional + from fastapi import APIRouter, HTTPException from advanced_omi_backend.services.plugin_service import get_plugin_router diff --git a/backends/advanced/src/advanced_omi_backend/routers/modules/websocket_routes.py b/backends/advanced/src/advanced_omi_backend/routers/modules/websocket_routes.py index 2671d7f6..4b244343 100644 --- a/backends/advanced/src/advanced_omi_backend/routers/modules/websocket_routes.py +++ b/backends/advanced/src/advanced_omi_backend/routers/modules/websocket_routes.py @@ -5,12 +5,13 @@ """ import logging -from fastapi import APIRouter, WebSocket, WebSocketDisconnect, Query from typing import Optional +from fastapi import APIRouter, Query, WebSocket, WebSocketDisconnect + from advanced_omi_backend.controllers.websocket_controller import ( handle_omi_websocket, - handle_pcm_websocket + handle_pcm_websocket, ) logger = logging.getLogger(__name__) diff --git a/backends/advanced/src/advanced_omi_backend/services/audio_stream/consumer.py b/backends/advanced/src/advanced_omi_backend/services/audio_stream/consumer.py index aeb12e02..455ebebe 100644 --- a/backends/advanced/src/advanced_omi_backend/services/audio_stream/consumer.py +++ b/backends/advanced/src/advanced_omi_backend/services/audio_stream/consumer.py @@ -11,6 +11,7 @@ import redis.asyncio as redis from redis import exceptions as redis_exceptions + logger = logging.getLogger(__name__) diff --git a/backends/advanced/src/advanced_omi_backend/services/audio_stream/producer.py b/backends/advanced/src/advanced_omi_backend/services/audio_stream/producer.py index 224d69f4..e7fae522 100644 --- a/backends/advanced/src/advanced_omi_backend/services/audio_stream/producer.py +++ b/backends/advanced/src/advanced_omi_backend/services/audio_stream/producer.py @@ -465,6 +465,7 @@ def get_audio_stream_producer() -> AudioStreamProducer: if _producer_instance is None: import os + import redis.asyncio as redis_async redis_url = os.getenv("REDIS_URL", "redis://localhost:6379/0") diff --git a/backends/advanced/src/advanced_omi_backend/services/knowledge_graph/__init__.py b/backends/advanced/src/advanced_omi_backend/services/knowledge_graph/__init__.py index 2bb7f4e0..da675616 100644 --- a/backends/advanced/src/advanced_omi_backend/services/knowledge_graph/__init__.py +++ b/backends/advanced/src/advanced_omi_backend/services/knowledge_graph/__init__.py @@ -9,11 +9,11 @@ from .models import ( Entity, EntityType, - Relationship, - RelationshipType, + ExtractionResult, Promise, PromiseStatus, - ExtractionResult, + Relationship, + RelationshipType, ) from .service import KnowledgeGraphService, get_knowledge_graph_service diff --git a/backends/advanced/src/advanced_omi_backend/services/knowledge_graph/entity_extractor.py b/backends/advanced/src/advanced_omi_backend/services/knowledge_graph/entity_extractor.py index f6985d4f..dc4724f2 100644 --- a/backends/advanced/src/advanced_omi_backend/services/knowledge_graph/entity_extractor.py +++ b/backends/advanced/src/advanced_omi_backend/services/knowledge_graph/entity_extractor.py @@ -13,10 +13,10 @@ from .models import ( EntityType, - ExtractionResult, ExtractedEntity, ExtractedPromise, ExtractedRelationship, + ExtractionResult, RelationshipType, ) @@ -83,7 +83,9 @@ def _get_llm_client(): """Get async OpenAI client from model registry.""" - from advanced_omi_backend.services.memory.providers.llm_providers import _get_openai_client + from advanced_omi_backend.services.memory.providers.llm_providers import ( + _get_openai_client, + ) registry = get_models_registry() if not registry: diff --git a/backends/advanced/src/advanced_omi_backend/services/knowledge_graph/models.py b/backends/advanced/src/advanced_omi_backend/services/knowledge_graph/models.py index 8ab82583..c4cf533c 100644 --- a/backends/advanced/src/advanced_omi_backend/services/knowledge_graph/models.py +++ b/backends/advanced/src/advanced_omi_backend/services/knowledge_graph/models.py @@ -4,11 +4,12 @@ service for storing and retrieving entities, relationships, and promises. """ +import uuid from datetime import datetime from enum import Enum from typing import Any, Dict, List, Optional + from pydantic import BaseModel, Field -import uuid class EntityType(str, Enum): diff --git a/backends/advanced/src/advanced_omi_backend/services/knowledge_graph/service.py b/backends/advanced/src/advanced_omi_backend/services/knowledge_graph/service.py index 4ab05445..6562dccc 100644 --- a/backends/advanced/src/advanced_omi_backend/services/knowledge_graph/service.py +++ b/backends/advanced/src/advanced_omi_backend/services/knowledge_graph/service.py @@ -15,7 +15,6 @@ from typing import Any, Dict, List, Optional from ..neo4j_client import Neo4jClient, Neo4jReadInterface, Neo4jWriteInterface - from . import queries from .entity_extractor import extract_entities_from_transcript, parse_natural_datetime from .models import ( diff --git a/backends/advanced/src/advanced_omi_backend/services/memory/base.py b/backends/advanced/src/advanced_omi_backend/services/memory/base.py index 4abdb5b0..7df7748e 100644 --- a/backends/advanced/src/advanced_omi_backend/services/memory/base.py +++ b/backends/advanced/src/advanced_omi_backend/services/memory/base.py @@ -2,7 +2,7 @@ This module defines the core abstractions and interfaces for: - Memory service operations -- LLM provider integration +- LLM provider integration - Vector store backends - Memory entry data structures @@ -12,14 +12,9 @@ import time from abc import ABC, abstractmethod from dataclasses import dataclass, field -from typing import Dict, List, Any, Optional, Tuple +from typing import Any, Dict, List, Optional, Tuple -__all__ = [ - "MemoryEntry", - "MemoryServiceBase", - "LLMProviderBase", - "VectorStoreBase" -] +__all__ = ["MemoryEntry", "MemoryServiceBase", "LLMProviderBase", "VectorStoreBase"] @dataclass @@ -38,6 +33,7 @@ class MemoryEntry: created_at: Timestamp when memory was created updated_at: Timestamp when memory was last updated """ + id: str content: str metadata: Dict[str, Any] = field(default_factory=dict) @@ -65,30 +61,36 @@ def to_dict(self) -> Dict[str, Any]: "score": self.score, "created_at": self.created_at, "updated_at": self.updated_at, - "user_id": self.metadata.get("user_id") # Extract user_id from metadata + "user_id": self.metadata.get("user_id"), # Extract user_id from metadata } class MemoryServiceBase(ABC): """Abstract base class defining the core memory service interface. - + This class defines all the essential operations that any memory service implementation must provide. Concrete implementations should inherit from this class and implement all abstract methods. """ - + + @property + @abstractmethod + def provider_identifier(self) -> str: + """Return the provider identifier (e.g., 'chronicle', 'openmemory_mcp', 'mycelia').""" + ... + @abstractmethod async def initialize(self) -> None: """Initialize the memory service and all its components. - + This should set up connections to LLM providers, vector stores, and any other required dependencies. - + Raises: RuntimeError: If initialization fails """ pass - + @abstractmethod async def add_memory( self, @@ -98,62 +100,54 @@ async def add_memory( user_id: str, user_email: str, allow_update: bool = False, - db_helper: Any = None + db_helper: Any = None, ) -> Tuple[bool, List[str]]: """Add memories extracted from a transcript. - + Args: transcript: Raw transcript text to extract memories from - client_id: Client identifier + client_id: Client identifier source_id: Unique identifier for the source (audio session, chat session, etc.) user_id: User identifier user_email: User email address allow_update: Whether to allow updating existing memories db_helper: Optional database helper for tracking relationships - + Returns: Tuple of (success: bool, created_memory_ids: List[str]) """ pass - + @abstractmethod async def search_memories( - self, - query: str, - user_id: str, - limit: int = 10, - score_threshold: float = 0.0 + self, query: str, user_id: str, limit: int = 10, score_threshold: float = 0.0 ) -> List[MemoryEntry]: """Search memories using semantic similarity. - + Args: query: Search query text user_id: User identifier to filter memories limit: Maximum number of results to return score_threshold: Minimum similarity score (0.0 = no threshold) - + Returns: List of matching MemoryEntry objects ordered by relevance """ pass - + @abstractmethod - async def get_all_memories( - self, - user_id: str, - limit: int = 100 - ) -> List[MemoryEntry]: + async def get_all_memories(self, user_id: str, limit: int = 100) -> List[MemoryEntry]: """Get all memories for a specific user. - + Args: user_id: User identifier limit: Maximum number of memories to return - + Returns: List of MemoryEntry objects for the user """ pass - + async def count_memories(self, user_id: str) -> Optional[int]: """Count total number of memories for a user. @@ -168,7 +162,9 @@ async def count_memories(self, user_id: str) -> Optional[int]: """ return None - async def get_memory(self, memory_id: str, user_id: Optional[str] = None) -> Optional[MemoryEntry]: + async def get_memory( + self, memory_id: str, user_id: Optional[str] = None + ) -> Optional[MemoryEntry]: """Get a specific memory by ID. This is an optional method that providers can implement for fetching @@ -190,7 +186,7 @@ async def update_memory( content: Optional[str] = None, metadata: Optional[Dict[str, Any]] = None, user_id: Optional[str] = None, - user_email: Optional[str] = None + user_email: Optional[str] = None, ) -> bool: """Update a specific memory's content and/or metadata. @@ -210,7 +206,9 @@ async def update_memory( return False @abstractmethod - async def delete_memory(self, memory_id: str, user_id: Optional[str] = None, user_email: Optional[str] = None) -> bool: + async def delete_memory( + self, memory_id: str, user_id: Optional[str] = None, user_email: Optional[str] = None + ) -> bool: """Delete a specific memory by ID. Args: @@ -222,31 +220,31 @@ async def delete_memory(self, memory_id: str, user_id: Optional[str] = None, use True if successfully deleted, False otherwise """ pass - + @abstractmethod async def delete_all_user_memories(self, user_id: str) -> int: """Delete all memories for a specific user. - + Args: user_id: User identifier - + Returns: Number of memories that were deleted """ pass - + @abstractmethod async def test_connection(self) -> bool: """Test if the memory service and its dependencies are working. - + Returns: True if all connections are healthy, False otherwise """ pass - + def shutdown(self) -> None: """Shutdown the memory service and clean up resources. - + Default implementation does nothing. Subclasses should override if they need to perform cleanup operations. """ @@ -254,19 +252,19 @@ def shutdown(self) -> None: def __init__(self): """Initialize base memory service state. - + Subclasses should call super().__init__() in their constructors. """ self._initialized = False - + async def _ensure_initialized(self) -> None: """Ensure the memory service is initialized before use. - + This method provides lazy initialization - it will automatically call initialize() the first time it's needed. This is critical for services used in RQ workers where the service instance is created in one process but used in another. - + This should be called at the start of any method that requires the service to be initialized (e.g., add_memory, search_memories). """ @@ -276,38 +274,38 @@ async def _ensure_initialized(self) -> None: class LLMProviderBase(ABC): """Abstract base class for LLM provider implementations. - + LLM providers handle: - Memory extraction from text using prompts - Text embedding generation - Memory action proposals (add/update/delete decisions) """ - + @abstractmethod async def extract_memories(self, text: str, prompt: str) -> List[str]: """Extract meaningful fact memories from text using an LLM. - + Args: text: Input text to extract memories from prompt: System prompt to guide the extraction process - + Returns: List of extracted fact memory strings """ pass - + @abstractmethod async def generate_embeddings(self, texts: List[str]) -> List[List[float]]: """Generate vector embeddings for the given texts. - + Args: texts: List of text strings to embed - + Returns: List of embedding vectors (one per input text) """ pass - + @abstractmethod async def propose_memory_actions( self, @@ -316,27 +314,27 @@ async def propose_memory_actions( custom_prompt: Optional[str] = None, ) -> Dict[str, Any]: """Propose memory management actions based on existing and new information. - + This method uses the LLM to decide whether new facts should: - ADD: Create new memories - - UPDATE: Modify existing memories + - UPDATE: Modify existing memories - DELETE: Remove outdated memories - NONE: No action needed - + Args: retrieved_old_memory: List of existing memories for context new_facts: List of new facts to process custom_prompt: Optional custom prompt to use instead of default - + Returns: Dictionary containing proposed actions in structured format """ pass - + @abstractmethod async def test_connection(self) -> bool: """Test connection to the LLM provider. - + Returns: True if connection is working, False otherwise """ @@ -345,82 +343,78 @@ async def test_connection(self) -> bool: class VectorStoreBase(ABC): """Abstract base class for vector store implementations. - + Vector stores handle: - Storing memory embeddings with metadata - Semantic search using vector similarity - CRUD operations on memory entries """ - + @abstractmethod async def initialize(self) -> None: """Initialize the vector store (create collections, etc.). - + Raises: RuntimeError: If initialization fails """ pass - + @abstractmethod async def add_memories(self, memories: List[MemoryEntry]) -> List[str]: """Add multiple memory entries to the vector store. - + Args: memories: List of MemoryEntry objects to store - + Returns: List of created memory IDs """ pass - + @abstractmethod async def search_memories( - self, - query_embedding: List[float], - user_id: str, - limit: int, - score_threshold: float = 0.0 + self, query_embedding: List[float], user_id: str, limit: int, score_threshold: float = 0.0 ) -> List[MemoryEntry]: """Search memories using vector similarity. - + Args: query_embedding: Query vector for similarity search user_id: User identifier to filter results limit: Maximum number of results to return score_threshold: Minimum similarity score (0.0 = no threshold) - + Returns: List of matching MemoryEntry objects with similarity scores """ pass - + @abstractmethod async def get_memories(self, user_id: str, limit: int) -> List[MemoryEntry]: """Get all memories for a user without similarity filtering. - + Args: user_id: User identifier limit: Maximum number of memories to return - + Returns: List of MemoryEntry objects for the user """ pass - + async def count_memories(self, user_id: str) -> Optional[int]: """Count total number of memories for a user. - + Default implementation returns None to indicate counting is unsupported. Vector stores should override this method to provide efficient counting if supported. - + Args: user_id: User identifier - + Returns: Total count of memories for the user, or None if counting is not supported by this store """ return None - + @abstractmethod async def update_memory( self, @@ -430,47 +424,47 @@ async def update_memory( new_metadata: Dict[str, Any], ) -> bool: """Update an existing memory with new content and metadata. - + Args: memory_id: ID of the memory to update new_content: Updated memory content new_embedding: Updated embedding vector new_metadata: Updated metadata - + Returns: True if update succeeded, False otherwise """ pass - + @abstractmethod async def delete_memory(self, memory_id: str) -> bool: """Delete a specific memory from the store. - + Args: memory_id: ID of the memory to delete - + Returns: True if deletion succeeded, False otherwise """ pass - + @abstractmethod async def delete_user_memories(self, user_id: str) -> int: """Delete all memories for a specific user. - + Args: user_id: User identifier - + Returns: Number of memories that were deleted """ pass - + @abstractmethod async def test_connection(self) -> bool: """Test connection to the vector store. - + Returns: True if connection is working, False otherwise """ - pass \ No newline at end of file + pass diff --git a/backends/advanced/src/advanced_omi_backend/services/memory/config.py b/backends/advanced/src/advanced_omi_backend/services/memory/config.py index 19b47bd7..db3b98e0 100644 --- a/backends/advanced/src/advanced_omi_backend/services/memory/config.py +++ b/backends/advanced/src/advanced_omi_backend/services/memory/config.py @@ -2,12 +2,13 @@ import logging import os -import yaml -from pathlib import Path from dataclasses import dataclass from enum import Enum +from pathlib import Path from typing import Any, Dict, Optional, Union +import yaml + from advanced_omi_backend.model_registry import get_models_registry from advanced_omi_backend.utils.config_utils import resolve_value diff --git a/backends/advanced/src/advanced_omi_backend/services/memory/prompts.py b/backends/advanced/src/advanced_omi_backend/services/memory/prompts.py index 4b41a51a..4325fd13 100644 --- a/backends/advanced/src/advanced_omi_backend/services/memory/prompts.py +++ b/backends/advanced/src/advanced_omi_backend/services/memory/prompts.py @@ -8,9 +8,10 @@ 5. Temporal and entity extraction (get_temporal_entity_extraction_prompt()) """ -from datetime import datetime, timedelta import json +from datetime import datetime, timedelta from typing import List, Optional + from pydantic import BaseModel, Field MEMORY_ANSWER_PROMPT = """ diff --git a/backends/advanced/src/advanced_omi_backend/services/memory/providers/__init__.py b/backends/advanced/src/advanced_omi_backend/services/memory/providers/__init__.py index 3a71f7cf..7a46dd33 100644 --- a/backends/advanced/src/advanced_omi_backend/services/memory/providers/__init__.py +++ b/backends/advanced/src/advanced_omi_backend/services/memory/providers/__init__.py @@ -10,11 +10,11 @@ """ from .chronicle import MemoryService as ChronicleMemoryService -from .openmemory_mcp import OpenMemoryMCPService -from .mycelia import MyceliaMemoryService from .llm_providers import OpenAIProvider -from .vector_stores import QdrantVectorStore from .mcp_client import MCPClient, MCPError +from .mycelia import MyceliaMemoryService +from .openmemory_mcp import OpenMemoryMCPService +from .vector_stores import QdrantVectorStore __all__ = [ "ChronicleMemoryService", diff --git a/backends/advanced/src/advanced_omi_backend/services/memory/providers/chronicle.py b/backends/advanced/src/advanced_omi_backend/services/memory/providers/chronicle.py index 1984b281..1eddae93 100644 --- a/backends/advanced/src/advanced_omi_backend/services/memory/providers/chronicle.py +++ b/backends/advanced/src/advanced_omi_backend/services/memory/providers/chronicle.py @@ -22,16 +22,16 @@ class MemoryService(MemoryServiceBase): """Main memory service that orchestrates LLM and vector store operations. - + This class implements the core memory management functionality including: - Memory extraction from transcripts using LLM providers - Semantic storage and retrieval using vector stores - Memory updates and deduplication - User-scoped memory management - + The service supports multiple LLM providers (OpenAI, Ollama) and vector stores (Qdrant), providing a flexible and extensible architecture. - + Attributes: config: Memory service configuration llm_provider: Active LLM provider instance @@ -39,9 +39,13 @@ class MemoryService(MemoryServiceBase): _initialized: Whether the service has been initialized """ + @property + def provider_identifier(self) -> str: + return "chronicle" + def __init__(self, config: MemoryConfig): """Initialize the memory service with configuration. - + Args: config: MemoryConfig instance with provider settings """ @@ -52,10 +56,10 @@ def __init__(self, config: MemoryConfig): async def initialize(self) -> None: """Initialize the memory service and all its components. - + Sets up LLM provider and vector store based on configuration, tests connections, and marks the service as ready for use. - + Raises: ValueError: If unsupported provider is configured RuntimeError: If initialization or connection tests fail @@ -74,7 +78,9 @@ async def initialize(self) -> None: if self.config.vector_store_provider == VectorStoreProvider.QDRANT: self.vector_store = QdrantVectorStore(self.config.vector_store_config) else: - raise ValueError(f"Unsupported vector store provider: {self.config.vector_store_provider}") + raise ValueError( + f"Unsupported vector store provider: {self.config.vector_store_provider}" + ) # Initialize vector store await self.vector_store.initialize() @@ -113,14 +119,14 @@ async def add_memory( user_id: str, user_email: str, allow_update: bool = False, - db_helper: Any = None + db_helper: Any = None, ) -> Tuple[bool, List[str]]: """Add memories extracted from a transcript. - + Processes a transcript to extract meaningful memories using the LLM, generates embeddings, and stores them in the vector database. Optionally allows updating existing memories through LLM-driven action proposals. - + Args: transcript: Raw transcript text to extract memories from client_id: Client identifier for tracking @@ -129,10 +135,10 @@ async def add_memory( user_email: User email address allow_update: Whether to allow updating existing memories db_helper: Optional database helper for relationship tracking - + Returns: Tuple of (success: bool, created_memory_ids: List[str]) - + Raises: asyncio.TimeoutError: If processing exceeds timeout """ @@ -149,14 +155,18 @@ async def add_memory( if self.config.extraction_enabled and self.config.extraction_prompt: fact_memories_text = await asyncio.wait_for( self.llm_provider.extract_memories(transcript, self.config.extraction_prompt), - timeout=self.config.timeout_seconds + timeout=self.config.timeout_seconds, + ) + memory_logger.info( + f"๐Ÿง  Extracted {len(fact_memories_text)} memories from transcript for {source_id}" ) - memory_logger.info(f"๐Ÿง  Extracted {len(fact_memories_text)} memories from transcript for {source_id}") - + # Fallback to storing raw transcript if no memories extracted if not fact_memories_text: fact_memories_text = [transcript] - memory_logger.info(f"๐Ÿ’พ No memories extracted, storing raw transcript for {source_id}") + memory_logger.info( + f"๐Ÿ’พ No memories extracted, storing raw transcript for {source_id}" + ) memory_logger.debug(f"๐Ÿง  fact_memories_text: {fact_memories_text}") # Simple deduplication of extracted memories within the same call @@ -165,14 +175,14 @@ async def add_memory( # Generate embeddings embeddings = await asyncio.wait_for( self.llm_provider.generate_embeddings(fact_memories_text), - timeout=self.config.timeout_seconds + timeout=self.config.timeout_seconds, ) memory_logger.info(f"embeddings generated") if not embeddings or len(embeddings) != len(fact_memories_text): error_msg = f"โŒ Embedding generation failed for {source_id}: got {len(embeddings) if embeddings else 0} embeddings for {len(fact_memories_text)} memories" memory_logger.error(error_msg) raise RuntimeError(error_msg) - + # Create or update memory entries memory_entries = [] created_ids: List[str] = [] @@ -204,7 +214,9 @@ async def add_memory( return True, created_ids # No memories created - this is a valid outcome (duplicates, no extractable facts, etc.) - memory_logger.info(f"โ„น๏ธ No new memories created for {source_id}: memory_entries={len(memory_entries) if memory_entries else 0}, allow_update={allow_update}") + memory_logger.info( + f"โ„น๏ธ No new memories created for {source_id}: memory_entries={len(memory_entries) if memory_entries else 0}, allow_update={allow_update}" + ) return True, [] except asyncio.TimeoutError as e: @@ -214,18 +226,20 @@ async def add_memory( memory_logger.error(f"โŒ Add memory failed for {source_id}: {e}") raise e - async def search_memories(self, query: str, user_id: str, limit: int = 10, score_threshold: float = 0.0) -> List[MemoryEntry]: + async def search_memories( + self, query: str, user_id: str, limit: int = 10, score_threshold: float = 0.0 + ) -> List[MemoryEntry]: """Search memories using semantic similarity. - + Generates an embedding for the query and searches the vector store for similar memories belonging to the specified user. - + Args: query: Search query text user_id: User identifier to filter memories limit: Maximum number of results to return score_threshold: Minimum similarity score (0.0 = no threshold) - + Returns: List of matching MemoryEntry objects ordered by relevance """ @@ -244,7 +258,9 @@ async def search_memories(self, query: str, user_id: str, limit: int = 10, score query_embeddings[0], user_id, limit, score_threshold ) - memory_logger.info(f"๐Ÿ” Found {len(results)} memories for query '{query}' (user: {user_id})") + memory_logger.info( + f"๐Ÿ” Found {len(results)} memories for query '{query}' (user: {user_id})" + ) return results except Exception as e: @@ -253,14 +269,14 @@ async def search_memories(self, query: str, user_id: str, limit: int = 10, score async def get_all_memories(self, user_id: str, limit: int = 100) -> List[MemoryEntry]: """Get all memories for a specific user. - + Retrieves all stored memories for the given user without similarity filtering. - + Args: user_id: User identifier limit: Maximum number of memories to return - + Returns: List of MemoryEntry objects for the user """ @@ -297,7 +313,9 @@ async def count_memories(self, user_id: str) -> Optional[int]: memory_logger.error(f"Count memories failed: {e}") return None - async def get_memory(self, memory_id: str, user_id: Optional[str] = None) -> Optional[MemoryEntry]: + async def get_memory( + self, memory_id: str, user_id: Optional[str] = None + ) -> Optional[MemoryEntry]: """Get a specific memory by ID. Args: @@ -327,7 +345,7 @@ async def update_memory( content: Optional[str] = None, metadata: Optional[dict[str, Any]] = None, user_id: Optional[str] = None, - user_email: Optional[str] = None + user_email: Optional[str] = None, ) -> bool: """Update a specific memory's content and/or metadata. @@ -380,7 +398,7 @@ async def update_memory( memory_id=memory_id, new_content=new_content, new_embedding=new_embedding, - new_metadata=new_metadata + new_metadata=new_metadata, ) if success: @@ -394,12 +412,14 @@ async def update_memory( memory_logger.error(f"Error updating memory {memory_id}: {e}", exc_info=True) return False - async def delete_memory(self, memory_id: str, user_id: Optional[str] = None, user_email: Optional[str] = None) -> bool: + async def delete_memory( + self, memory_id: str, user_id: Optional[str] = None, user_email: Optional[str] = None + ) -> bool: """Delete a specific memory by ID. - + Args: memory_id: Unique identifier of the memory to delete - + Returns: True if successfully deleted, False otherwise """ @@ -417,10 +437,10 @@ async def delete_memory(self, memory_id: str, user_id: Optional[str] = None, use async def delete_all_user_memories(self, user_id: str) -> int: """Delete all memories for a specific user. - + Args: user_id: User identifier - + Returns: Number of memories that were deleted """ @@ -437,7 +457,7 @@ async def delete_all_user_memories(self, user_id: str) -> int: async def test_connection(self) -> bool: """Test if the memory service and its dependencies are working. - + Returns: True if all connections are healthy, False otherwise """ @@ -460,13 +480,14 @@ def shutdown(self) -> None: def _deduplicate_memories(self, memories_text: List[str]) -> List[str]: """Remove near-duplicate memories from the same extraction session. - + Args: memories_text: List of extracted memory strings - + Returns: Deduplicated list of memory strings """ + def _collapse_text_for_dedup(text: str) -> str: """Normalize text for deduplication by removing common words and punctuation.""" t = text.lower() @@ -480,16 +501,18 @@ def _collapse_text_for_dedup(text: str) -> str: seen_collapsed = set() deduped_text: List[str] = [] - + for memory_text in memories_text: key = _collapse_text_for_dedup(memory_text) if key not in seen_collapsed: seen_collapsed.add(key) deduped_text.append(memory_text) - + if len(deduped_text) != len(memories_text): - memory_logger.info(f"๐Ÿงน Deduplicated memories: {len(memories_text)} -> {len(deduped_text)}") - + memory_logger.info( + f"๐Ÿงน Deduplicated memories: {len(memories_text)} -> {len(deduped_text)}" + ) + return deduped_text def _create_memory_entries( @@ -499,10 +522,10 @@ def _create_memory_entries( client_id: str, source_id: str, user_id: str, - user_email: str + user_email: str, ) -> List[MemoryEntry]: """Create MemoryEntry objects from extracted memories. - + Args: fact_memories_text: List of factmemory content strings embeddings: Corresponding embedding vectors @@ -510,7 +533,7 @@ def _create_memory_entries( source_id: Source session identifier user_id: User identifier user_email: User email - + Returns: List of MemoryEntry objects ready for storage """ @@ -547,14 +570,14 @@ async def _process_memory_updates( user_id: str, client_id: str, source_id: str, - user_email: str + user_email: str, ) -> List[str]: """Process memory updates using LLM-driven action proposals. - + This method implements the intelligent memory (can be fact or summarized facts) updating logic that decides whether to add, update, or delete memories based on existing context and new information. - + Args: memories_text: List of new memory content embeddings: Corresponding embeddings @@ -562,16 +585,16 @@ async def _process_memory_updates( client_id: Client identifier source_id: Source session identifier user_email: User email - + Returns: List of created/updated memory IDs """ created_ids: List[str] = [] - + # For each new fact, find top-5 existing memories as retrieval set retrieved_old_memory = [] new_message_embeddings = {} - + for new_mem, emb in zip(memories_text, embeddings): new_message_embeddings[new_mem] = emb try: @@ -604,7 +627,7 @@ async def _process_memory_updates( f"and {len(memories_text)} new facts" ) memory_logger.debug(f"๐Ÿง  Individual facts being sent to LLM: {memories_text}") - + # add update or delete etc actions using DEFAULT_UPDATE_MEMORY_PROMPT actions_obj = await self.llm_provider.propose_memory_actions( retrieved_old_memory=retrieved_old_memory, @@ -619,23 +642,28 @@ async def _process_memory_updates( # Process the proposed actions actions_list = self._normalize_actions(actions_obj) created_ids = await self._apply_memory_actions( - actions_list, new_message_embeddings, temp_uuid_mapping, - client_id, source_id, user_id, user_email + actions_list, + new_message_embeddings, + temp_uuid_mapping, + client_id, + source_id, + user_id, + user_email, ) return created_ids def _normalize_actions(self, actions_obj: Any) -> List[dict]: """Normalize LLM response into a list of action dictionaries. - + Args: actions_obj: Raw LLM response object - + Returns: List of normalized action dictionaries """ actions_list = [] - + try: memory_logger.debug(f"Normalizing actions from: {actions_obj}") if isinstance(actions_obj, dict): @@ -652,12 +680,12 @@ def _normalize_actions(self, actions_obj: Any) -> List[dict]: break elif isinstance(actions_obj, list): actions_list = actions_obj - + memory_logger.info(f"๐Ÿ“‹ Normalized to {len(actions_list)} actions: {actions_list}") except Exception as normalize_err: memory_logger.warning(f"Failed to normalize actions: {normalize_err}") actions_list = [] - + return actions_list async def _apply_memory_actions( @@ -668,10 +696,10 @@ async def _apply_memory_actions( client_id: str, source_id: str, user_id: str, - user_email: str + user_email: str, ) -> List[str]: """Apply the proposed memory actions. - + Args: actions_list: List of action dictionaries new_message_embeddings: Pre-computed embeddings for new content @@ -680,15 +708,15 @@ async def _apply_memory_actions( source_id: Source session identifier user_id: User identifier user_email: User email - + Returns: List of created/updated memory IDs """ created_ids: List[str] = [] memory_entries = [] - + memory_logger.info(f"โšก Processing {len(actions_list)} actions") - + for resp in actions_list: # Allow plain string entries โ†’ ADD action if isinstance(resp, str): @@ -698,7 +726,7 @@ async def _apply_memory_actions( event_type = resp.get("event", "ADD") action_text = resp.get("text") or resp.get("memory") - + if not action_text or not isinstance(action_text, str): memory_logger.warning(f"Skipping action with no text: {resp}") continue @@ -730,7 +758,9 @@ async def _apply_memory_actions( if event_type == "ADD": if emb is None: - memory_logger.warning(f"Skipping ADD action due to missing embedding: {action_text}") + memory_logger.warning( + f"Skipping ADD action due to missing embedding: {action_text}" + ) continue memory_id = str(uuid.uuid4()) @@ -746,11 +776,11 @@ async def _apply_memory_actions( ) ) memory_logger.info(f"โž• Added new memory: {memory_id} - {action_text[:50]}...") - + elif event_type == "UPDATE": provided_id = resp.get("id") actual_id = temp_uuid_mapping.get(str(provided_id), provided_id) - + if actual_id and emb is not None: try: updated = await self.vector_store.update_memory( @@ -761,14 +791,16 @@ async def _apply_memory_actions( ) if updated: created_ids.append(str(actual_id)) - memory_logger.info(f"๐Ÿ”„ Updated memory: {actual_id} - {action_text[:50]}...") + memory_logger.info( + f"๐Ÿ”„ Updated memory: {actual_id} - {action_text[:50]}..." + ) else: memory_logger.warning(f"Failed to update memory {actual_id}") except Exception as update_err: memory_logger.error(f"Update memory failed: {update_err}") else: memory_logger.warning(f"Skipping UPDATE due to missing ID or embedding") - + elif event_type == "DELETE": provided_id = resp.get("id") actual_id = temp_uuid_mapping.get(str(provided_id), provided_id) @@ -783,24 +815,28 @@ async def _apply_memory_actions( memory_logger.error(f"Delete memory failed: {delete_err}") else: memory_logger.warning(f"Skipping DELETE due to missing ID: {provided_id}") - + elif event_type == "NONE": memory_logger.debug(f"NONE action - no changes for: {action_text[:50]}...") continue else: memory_logger.warning(f"Unknown event type: {event_type}") - + # Store new entries if memory_entries: stored_ids = await self.vector_store.add_memories(memory_entries) created_ids.extend(stored_ids) - memory_logger.info(f"โœ… Actions processed: {len(memory_entries)} new entries, {len(created_ids)} total changes") + memory_logger.info( + f"โœ… Actions processed: {len(memory_entries)} new entries, {len(created_ids)} total changes" + ) return created_ids - async def _update_database_relationships(self, db_helper: Any, source_id: str, created_ids: List[str]) -> None: + async def _update_database_relationships( + self, db_helper: Any, source_id: str, created_ids: List[str] + ) -> None: """Update database relationships for created memories. - + Args: db_helper: Database helper instance source_id: Source session identifier @@ -820,46 +856,42 @@ async def example_usage(): # Build config from environment config = build_memory_config_from_env() - + # Initialize service memory_service = MemoryService(config) await memory_service.initialize() - + # Add memory success, memory_ids = await memory_service.add_memory( transcript="User discussed their goals for the next quarter.", client_id="client123", source_id="audio456", user_id="user789", - user_email="user@example.com" + user_email="user@example.com", ) - + if success: print(f"โœ… Added memories: {memory_ids}") - + # Search memories results = await memory_service.search_memories( - query="quarterly goals", - user_id="user789", - limit=5 + query="quarterly goals", user_id="user789", limit=5 ) print(f"๐Ÿ” Found {len(results)} search results") - + # Get all memories - all_memories = await memory_service.get_all_memories( - user_id="user789", - limit=100 - ) + all_memories = await memory_service.get_all_memories(user_id="user789", limit=100) print(f"๐Ÿ“š Total memories: {len(all_memories)}") - + # Clean up test data for memory_id in memory_ids: await memory_service.delete_memory(memory_id) print("๐Ÿงน Cleaned up test data") - + memory_service.shutdown() if __name__ == "__main__": import asyncio - asyncio.run(example_usage()) \ No newline at end of file + + asyncio.run(example_usage()) diff --git a/backends/advanced/src/advanced_omi_backend/services/memory/providers/mycelia.py b/backends/advanced/src/advanced_omi_backend/services/memory/providers/mycelia.py index 6289f035..067dd954 100644 --- a/backends/advanced/src/advanced_omi_backend/services/memory/providers/mycelia.py +++ b/backends/advanced/src/advanced_omi_backend/services/memory/providers/mycelia.py @@ -13,6 +13,7 @@ import httpx from advanced_omi_backend.auth import generate_jwt_for_user +from advanced_omi_backend.model_registry import get_models_registry from advanced_omi_backend.users import User from ..base import MemoryEntry, MemoryServiceBase @@ -23,7 +24,6 @@ get_temporal_entity_extraction_prompt, ) from .llm_providers import _get_openai_client -from advanced_omi_backend.model_registry import get_models_registry memory_logger = logging.getLogger("memory_service") @@ -61,6 +61,10 @@ class MyceliaMemoryService(MemoryServiceBase): **kwargs: Additional configuration parameters """ + @property + def provider_identifier(self) -> str: + return "mycelia" + def __init__(self, config: MemoryConfig): """Initialize Mycelia memory service. @@ -253,7 +257,9 @@ async def _extract_memories_via_llm( if not llm_def: memory_logger.warning("No default LLM in config.yml; cannot extract facts") return [] - client = _get_openai_client(api_key=llm_def.api_key or "", base_url=llm_def.model_url, is_async=True) + client = _get_openai_client( + api_key=llm_def.api_key or "", base_url=llm_def.model_url, is_async=True + ) response = await client.chat.completions.create( model=llm_def.model_name, messages=[ @@ -302,13 +308,19 @@ async def _extract_temporal_entity_via_llm( # Use registry-driven default LLM with OpenAI SDK reg = get_models_registry() if not reg: - memory_logger.warning("No registry available for LLM; cannot extract temporal entity") + memory_logger.warning( + "No registry available for LLM; cannot extract temporal entity" + ) return None llm_def = reg.get_default("llm") if not llm_def: - memory_logger.warning("No default LLM in config.yml; cannot extract temporal entity") + memory_logger.warning( + "No default LLM in config.yml; cannot extract temporal entity" + ) return None - client = _get_openai_client(api_key=llm_def.api_key or "", base_url=llm_def.model_url, is_async=True) + client = _get_openai_client( + api_key=llm_def.api_key or "", base_url=llm_def.model_url, is_async=True + ) response = await client.chat.completions.create( model=llm_def.model_name, messages=[ diff --git a/backends/advanced/src/advanced_omi_backend/services/memory/providers/openmemory_mcp.py b/backends/advanced/src/advanced_omi_backend/services/memory/providers/openmemory_mcp.py index d5061a2c..babe9bf6 100644 --- a/backends/advanced/src/advanced_omi_backend/services/memory/providers/openmemory_mcp.py +++ b/backends/advanced/src/advanced_omi_backend/services/memory/providers/openmemory_mcp.py @@ -40,6 +40,10 @@ class OpenMemoryMCPService(MemoryServiceBase): _initialized: Whether the service has been initialized """ + @property + def provider_identifier(self) -> str: + return "openmemory_mcp" + def __init__( self, server_url: Optional[str] = None, diff --git a/backends/advanced/src/advanced_omi_backend/services/memory/service_factory.py b/backends/advanced/src/advanced_omi_backend/services/memory/service_factory.py index 5607d8ff..d30778e0 100644 --- a/backends/advanced/src/advanced_omi_backend/services/memory/service_factory.py +++ b/backends/advanced/src/advanced_omi_backend/services/memory/service_factory.py @@ -11,7 +11,7 @@ from typing import Optional from .base import MemoryServiceBase -from .config import build_memory_config_from_env, MemoryConfig, MemoryProvider +from .config import MemoryConfig, MemoryProvider, build_memory_config_from_env memory_logger = logging.getLogger("memory_service") @@ -23,22 +23,23 @@ def create_memory_service(config: MemoryConfig) -> MemoryServiceBase: """Create a memory service instance based on configuration. - + Args: config: Memory service configuration - + Returns: Configured memory service instance - + Raises: ValueError: If unsupported memory provider is specified RuntimeError: If required dependencies are missing """ memory_logger.info(f"๐Ÿง  Creating memory service with provider: {config.memory_provider.value}") - + if config.memory_provider == MemoryProvider.CHRONICLE: # Use the sophisticated Chronicle implementation from .providers.chronicle import MemoryService as ChronicleMemoryService + return ChronicleMemoryService(config) elif config.memory_provider == MemoryProvider.OPENMEMORY_MCP: @@ -99,9 +100,13 @@ def get_memory_service() -> MemoryServiceBase: # Don't initialize here - let it happen lazily on first use # This prevents orphaned tasks that cause "Task was destroyed but it is pending" errors - memory_logger.debug(f"Memory service created but not initialized: {type(_memory_service).__name__}") + memory_logger.debug( + f"Memory service created but not initialized: {type(_memory_service).__name__}" + ) - memory_logger.info(f"โœ… Global memory service created: {type(_memory_service).__name__}") + memory_logger.info( + f"โœ… Global memory service created: {type(_memory_service).__name__}" + ) except Exception as e: memory_logger.error(f"โŒ Failed to create memory service: {e}") @@ -113,7 +118,7 @@ def get_memory_service() -> MemoryServiceBase: def shutdown_memory_service() -> None: """Shutdown the global memory service and clean up resources.""" global _memory_service - + if _memory_service is not None: try: _memory_service.shutdown() @@ -135,30 +140,24 @@ def reset_memory_service() -> None: def get_service_info() -> dict: """Get information about the current memory service. - + Returns: Dictionary with service information """ global _memory_service - + info = { "service_created": _memory_service is not None, "service_type": None, "service_initialized": False, - "memory_provider": None + "memory_provider": None, } - + if _memory_service is not None: info["service_type"] = type(_memory_service).__name__ # All memory services should have _initialized attribute per the base class info["service_initialized"] = _memory_service._initialized - # Try to determine provider from service type - if "OpenMemoryMCP" in info["service_type"]: - info["memory_provider"] = "openmemory_mcp" - elif info["service_type"] == "ChronicleMemoryService": - info["memory_provider"] = "chronicle" - elif info["service_type"] == "MyceliaMemoryService": - info["memory_provider"] = "mycelia" - - return info \ No newline at end of file + info["memory_provider"] = _memory_service.provider_identifier + + return info diff --git a/backends/advanced/src/advanced_omi_backend/services/memory/update_memory_utils.py b/backends/advanced/src/advanced_omi_backend/services/memory/update_memory_utils.py index 1fce1079..b0c6c9db 100644 --- a/backends/advanced/src/advanced_omi_backend/services/memory/update_memory_utils.py +++ b/backends/advanced/src/advanced_omi_backend/services/memory/update_memory_utils.py @@ -1,8 +1,8 @@ -from dataclasses import dataclass, asdict -from typing import List, Optional, Literal, Dict, Any, Union -import xml.etree.ElementTree as ET import re +import xml.etree.ElementTree as ET +from dataclasses import asdict, dataclass +from typing import Any, Dict, List, Literal, Optional, Union Event = Literal["ADD", "UPDATE", "DELETE", "NONE"] NUMERIC_ID = re.compile(r"^\d+$") @@ -25,7 +25,7 @@ def extract_xml_from_content(content: str) -> str: """ # Try to find XML block within the content import re - + # Look for ... block xml_match = re.search(r'.*?', content, re.DOTALL) if xml_match: diff --git a/backends/advanced/src/advanced_omi_backend/services/neo4j_client.py b/backends/advanced/src/advanced_omi_backend/services/neo4j_client.py index c0e3e633..263d01cd 100644 --- a/backends/advanced/src/advanced_omi_backend/services/neo4j_client.py +++ b/backends/advanced/src/advanced_omi_backend/services/neo4j_client.py @@ -1,7 +1,8 @@ """Shared Neo4j client utilities for the advanced OMI backend.""" from typing import Optional -from neo4j import GraphDatabase, Driver, READ_ACCESS, WRITE_ACCESS + +from neo4j import READ_ACCESS, WRITE_ACCESS, Driver, GraphDatabase class Neo4jClient: diff --git a/backends/advanced/src/advanced_omi_backend/services/obsidian_service.py b/backends/advanced/src/advanced_omi_backend/services/obsidian_service.py index 07f841b7..b02a6fa0 100644 --- a/backends/advanced/src/advanced_omi_backend/services/obsidian_service.py +++ b/backends/advanced/src/advanced_omi_backend/services/obsidian_service.py @@ -12,24 +12,27 @@ - Vector similarity search via Neo4j vector indexes """ +import hashlib import logging import os import re -import hashlib -from typing import TypedDict, List, Optional, Literal from pathlib import Path +from typing import List, Literal, Optional, TypedDict + +from advanced_omi_backend.services.memory.config import ( + load_config_yml as load_root_config, +) from advanced_omi_backend.services.memory.providers.llm_providers import ( - generate_openai_embeddings, chunk_text_with_spacy, + generate_openai_embeddings, ) -from advanced_omi_backend.services.memory.config import load_config_yml as load_root_config -from advanced_omi_backend.utils.model_utils import get_model_config -from advanced_omi_backend.utils.config_utils import resolve_value from advanced_omi_backend.services.neo4j_client import ( Neo4jClient, Neo4jReadInterface, Neo4jWriteInterface, ) +from advanced_omi_backend.utils.config_utils import resolve_value +from advanced_omi_backend.utils.model_utils import get_model_config logger = logging.getLogger(__name__) diff --git a/backends/advanced/src/advanced_omi_backend/services/plugin_service.py b/backends/advanced/src/advanced_omi_backend/services/plugin_service.py index 1ca086f2..fb3956db 100644 --- a/backends/advanced/src/advanced_omi_backend/services/plugin_service.py +++ b/backends/advanced/src/advanced_omi_backend/services/plugin_service.py @@ -48,8 +48,8 @@ def expand_env_vars(value: Any) -> Any: def replacer(match): var_expr = match.group(1) # Support default values: ${VAR:-default} - if ':-' in var_expr: - var_name, default = var_expr.split(':-', 1) + if ":-" in var_expr: + var_name, default = var_expr.split(":-", 1) return os.environ.get(var_name.strip(), default.strip()) else: var_name = var_expr.strip() @@ -62,7 +62,7 @@ def replacer(match): return match.group(0) # Keep original placeholder return env_value - return re.sub(r'\$\{([^}]+)\}', replacer, value) + return re.sub(r"\$\{([^}]+)\}", replacer, value) elif isinstance(value, dict): return {k: expand_env_vars(v) for k, v in value.items()} @@ -106,12 +106,13 @@ def load_plugin_config(plugin_id: str, orchestration_config: Dict[str, Any]) -> # 1. Load plugin-specific config.yml if it exists try: import advanced_omi_backend.plugins + plugins_dir = Path(advanced_omi_backend.plugins.__file__).parent plugin_config_path = plugins_dir / plugin_id / "config.yml" if plugin_config_path.exists(): logger.debug(f"Loading plugin config from: {plugin_config_path}") - with open(plugin_config_path, 'r') as f: + with open(plugin_config_path, "r") as f: plugin_config = yaml.safe_load(f) or {} config.update(plugin_config) logger.debug(f"Loaded {len(plugin_config)} config keys for '{plugin_id}'") @@ -125,12 +126,12 @@ def load_plugin_config(plugin_id: str, orchestration_config: Dict[str, Any]) -> config = expand_env_vars(config) # 3. Merge orchestration settings from config/plugins.yml - config['enabled'] = orchestration_config.get('enabled', False) - config['events'] = orchestration_config.get('events', []) - config['condition'] = orchestration_config.get('condition', {'type': 'always'}) + config["enabled"] = orchestration_config.get("enabled", False) + config["events"] = orchestration_config.get("events", []) + config["condition"] = orchestration_config.get("condition", {"type": "always"}) # Add plugin ID for reference - config['plugin_id'] = plugin_id + config["plugin_id"] = plugin_id logger.debug( f"Plugin '{plugin_id}' config merged: enabled={config['enabled']}, " @@ -183,7 +184,7 @@ def extract_env_var_name(value: str) -> Optional[str]: if not isinstance(value, str): return None - match = re.search(r'\$\{([^}:]+)', value) + match = re.search(r"\$\{([^}:]+)", value) if match: return match.group(1).strip() return None @@ -207,66 +208,70 @@ def infer_field_type(key: str, value: Any) -> Dict[str, Any]: {'type': 'number', 'label': 'Max Sentences', 'default': 3} """ # Generate human-readable label from key - label = key.replace('_', ' ').title() + label = key.replace("_", " ").title() # Check for environment variable reference - if isinstance(value, str) and '${' in value: + if isinstance(value, str) and "${" in value: env_var = extract_env_var_name(value) if not env_var: - return {'type': 'string', 'label': label, 'default': value} + return {"type": "string", "label": label, "default": value} # Determine if this is a secret based on env var name - secret_keywords = ['PASSWORD', 'TOKEN', 'KEY', 'SECRET', 'APIKEY', 'API_KEY'] + secret_keywords = ["PASSWORD", "TOKEN", "KEY", "SECRET", "APIKEY", "API_KEY"] is_secret = any(keyword in env_var.upper() for keyword in secret_keywords) # Extract default value if present (${VAR:-default}) default_value = None - if ':-' in value: - default_match = re.search(r':-([^}]+)', value) + if ":-" in value: + default_match = re.search(r":-([^}]+)", value) if default_match: default_value = default_match.group(1).strip() # Try to parse boolean/number defaults - if default_value.lower() in ('true', 'false'): - default_value = default_value.lower() == 'true' + if default_value.lower() in ("true", "false"): + default_value = default_value.lower() == "true" elif default_value.isdigit(): default_value = int(default_value) schema = { - 'type': 'password' if is_secret else 'string', - 'label': label, - 'secret': is_secret, - 'env_var': env_var, - 'required': is_secret, # Secrets are required + "type": "password" if is_secret else "string", + "label": label, + "secret": is_secret, + "env_var": env_var, + "required": is_secret, # Secrets are required } if default_value is not None: - schema['default'] = default_value - schema['required'] = False + schema["default"] = default_value + schema["required"] = False return schema # Boolean values elif isinstance(value, bool): - return {'type': 'boolean', 'label': label, 'default': value} + return {"type": "boolean", "label": label, "default": value} # Numeric values elif isinstance(value, int): - return {'type': 'number', 'label': label, 'default': value} + return {"type": "number", "label": label, "default": value} elif isinstance(value, float): - return {'type': 'number', 'label': label, 'default': value, 'step': 0.1} + return {"type": "number", "label": label, "default": value, "step": 0.1} # List values elif isinstance(value, list): - return {'type': 'array', 'label': label, 'default': value} + return {"type": "array", "label": label, "default": value} # Object/dict values elif isinstance(value, dict): - return {'type': 'object', 'label': label, 'default': value} + return {"type": "object", "label": label, "default": value} # String values (fallback) else: - return {'type': 'string', 'label': label, 'default': str(value) if value is not None else ''} + return { + "type": "string", + "label": label, + "default": str(value) if value is not None else "", + } def load_schema_yml(plugin_id: str) -> Optional[Dict[str, Any]]: @@ -280,12 +285,13 @@ def load_schema_yml(plugin_id: str) -> Optional[Dict[str, Any]]: """ try: import advanced_omi_backend.plugins + plugins_dir = Path(advanced_omi_backend.plugins.__file__).parent schema_path = plugins_dir / plugin_id / "schema.yml" if schema_path.exists(): logger.debug(f"Loading schema override from: {schema_path}") - with open(schema_path, 'r') as f: + with open(schema_path, "r") as f: return yaml.safe_load(f) or {} except Exception as e: logger.warning(f"Failed to load schema.yml for plugin '{plugin_id}': {e}") @@ -329,16 +335,13 @@ def infer_schema_from_config(plugin_id: str, config_dict: Dict[str, Any]) -> Dic field_schema = infer_field_type(key, value) # Separate env vars from regular settings - if field_schema.get('env_var'): - env_var_name = field_schema['env_var'] + if field_schema.get("env_var"): + env_var_name = field_schema["env_var"] env_vars_schema[env_var_name] = field_schema else: settings_schema[key] = field_schema - return { - 'settings': settings_schema, - 'env_vars': env_vars_schema - } + return {"settings": settings_schema, "env_vars": env_vars_schema} def mask_secrets_in_config(config: Dict[str, Any], schema: Dict[str, Any]) -> Dict[str, Any]: @@ -362,8 +365,8 @@ def mask_secrets_in_config(config: Dict[str, Any], schema: Dict[str, Any]) -> Di # Get list of secret environment variable names secret_env_vars = set() - for env_var, field_schema in schema.get('env_vars', {}).items(): - if field_schema.get('secret', False): + for env_var, field_schema in schema.get("env_vars", {}).items(): + if field_schema.get("secret", False): secret_env_vars.add(env_var) # Mask values that reference secret environment variables @@ -373,13 +376,14 @@ def mask_secrets_in_config(config: Dict[str, Any], schema: Dict[str, Any]) -> Di if env_var and env_var in secret_env_vars: # Check if env var is actually set is_set = bool(os.environ.get(env_var)) - masked_config[key] = 'โ€ขโ€ขโ€ขโ€ขโ€ขโ€ขโ€ขโ€ขโ€ขโ€ขโ€ขโ€ข' if is_set else '' + masked_config[key] = "โ€ขโ€ขโ€ขโ€ขโ€ขโ€ขโ€ขโ€ขโ€ขโ€ขโ€ขโ€ข" if is_set else "" return masked_config -def get_plugin_metadata(plugin_id: str, plugin_class: Type[BasePlugin], - orchestration_config: Dict[str, Any]) -> Dict[str, Any]: +def get_plugin_metadata( + plugin_id: str, plugin_class: Type[BasePlugin], orchestration_config: Dict[str, Any] +) -> Dict[str, Any]: """Get complete metadata for a plugin including schema and current config. Args: @@ -393,12 +397,13 @@ def get_plugin_metadata(plugin_id: str, plugin_class: Type[BasePlugin], # Load plugin config.yml try: import advanced_omi_backend.plugins + plugins_dir = Path(advanced_omi_backend.plugins.__file__).parent plugin_config_path = plugins_dir / plugin_id / "config.yml" config_dict = {} if plugin_config_path.exists(): - with open(plugin_config_path, 'r') as f: + with open(plugin_config_path, "r") as f: config_dict = yaml.safe_load(f) or {} except Exception as e: logger.error(f"Failed to load config for plugin '{plugin_id}': {e}") @@ -408,36 +413,36 @@ def get_plugin_metadata(plugin_id: str, plugin_class: Type[BasePlugin], config_schema = infer_schema_from_config(plugin_id, config_dict) # Get plugin metadata from class - plugin_name = getattr(plugin_class, 'name', plugin_id.replace('_', ' ').title()) - plugin_description = getattr(plugin_class, 'description', '') - supports_testing = hasattr(plugin_class, 'test_connection') + plugin_name = getattr(plugin_class, "name", plugin_id.replace("_", " ").title()) + plugin_description = getattr(plugin_class, "description", "") + supports_testing = hasattr(plugin_class, "test_connection") # Mask secrets in current config current_config = load_plugin_config(plugin_id, orchestration_config) masked_config = mask_secrets_in_config(current_config, config_schema) # Mark which env vars are set - for env_var_name, env_var_schema in config_schema.get('env_vars', {}).items(): - env_var_schema['is_set'] = bool(os.environ.get(env_var_name)) - if env_var_schema.get('secret') and env_var_schema['is_set']: - env_var_schema['value'] = 'โ€ขโ€ขโ€ขโ€ขโ€ขโ€ขโ€ขโ€ขโ€ขโ€ขโ€ขโ€ข' + for env_var_name, env_var_schema in config_schema.get("env_vars", {}).items(): + env_var_schema["is_set"] = bool(os.environ.get(env_var_name)) + if env_var_schema.get("secret") and env_var_schema["is_set"]: + env_var_schema["value"] = "โ€ขโ€ขโ€ขโ€ขโ€ขโ€ขโ€ขโ€ขโ€ขโ€ขโ€ขโ€ข" else: - env_var_schema['value'] = os.environ.get(env_var_name, '') + env_var_schema["value"] = os.environ.get(env_var_name, "") return { - 'plugin_id': plugin_id, - 'name': plugin_name, - 'description': plugin_description, - 'enabled': orchestration_config.get('enabled', False), - 'status': 'active' if orchestration_config.get('enabled', False) else 'disabled', - 'supports_testing': supports_testing, - 'config_schema': config_schema, - 'current_config': masked_config, - 'orchestration': { - 'enabled': orchestration_config.get('enabled', False), - 'events': orchestration_config.get('events', []), - 'condition': orchestration_config.get('condition', {'type': 'always'}) - } + "plugin_id": plugin_id, + "name": plugin_name, + "description": plugin_description, + "enabled": orchestration_config.get("enabled", False), + "status": "active" if orchestration_config.get("enabled", False) else "disabled", + "supports_testing": supports_testing, + "config_schema": config_schema, + "current_config": masked_config, + "orchestration": { + "enabled": orchestration_config.get("enabled", False), + "events": orchestration_config.get("events", []), + "condition": orchestration_config.get("condition", {"type": "always"}), + }, } @@ -467,6 +472,7 @@ def discover_plugins() -> Dict[str, Type[BasePlugin]]: # Get the plugins directory path try: import advanced_omi_backend.plugins + plugins_dir = Path(advanced_omi_backend.plugins.__file__).parent except Exception as e: logger.error(f"Failed to locate plugins directory: {e}") @@ -475,7 +481,7 @@ def discover_plugins() -> Dict[str, Type[BasePlugin]]: logger.info(f"๐Ÿ” Scanning for plugins in: {plugins_dir}") # Skip these known system directories/files - skip_items = {'__pycache__', '__init__.py', 'base.py', 'router.py'} + skip_items = {"__pycache__", "__init__.py", "base.py", "router.py"} # Scan for plugin directories for item in plugins_dir.iterdir(): @@ -483,7 +489,7 @@ def discover_plugins() -> Dict[str, Type[BasePlugin]]: continue plugin_id = item.name - plugin_file = item / 'plugin.py' + plugin_file = item / "plugin.py" if not plugin_file.exists(): logger.debug(f"Skipping '{plugin_id}': no plugin.py found") @@ -492,10 +498,10 @@ def discover_plugins() -> Dict[str, Type[BasePlugin]]: try: # Convert snake_case directory name to PascalCase class name # e.g., email_summarizer -> EmailSummarizerPlugin - class_name = ''.join(word.capitalize() for word in plugin_id.split('_')) + 'Plugin' + class_name = "".join(word.capitalize() for word in plugin_id.split("_")) + "Plugin" # Import the plugin module - module_path = f'advanced_omi_backend.plugins.{plugin_id}' + module_path = f"advanced_omi_backend.plugins.{plugin_id}" logger.debug(f"Attempting to import plugin from: {module_path}") # Import the plugin package (which should export the class in __init__.py) @@ -558,24 +564,28 @@ def init_plugin_router() -> Optional[PluginRouter]: logger.info(f"๐Ÿ” File exists: {plugins_yml.exists()}") if plugins_yml.exists(): - with open(plugins_yml, 'r') as f: + with open(plugins_yml, "r") as f: plugins_config = yaml.safe_load(f) # Expand environment variables in configuration plugins_config = expand_env_vars(plugins_config) - plugins_data = plugins_config.get('plugins', {}) + plugins_data = plugins_config.get("plugins", {}) - logger.info(f"๐Ÿ” Loaded plugins config with {len(plugins_data)} plugin(s): {list(plugins_data.keys())}") + logger.info( + f"๐Ÿ” Loaded plugins config with {len(plugins_data)} plugin(s): {list(plugins_data.keys())}" + ) # Discover all plugins via auto-discovery discovered_plugins = discover_plugins() # Core plugin names (for informational logging only) - CORE_PLUGIN_NAMES = {'homeassistant', 'test_event'} + CORE_PLUGIN_NAMES = {"homeassistant", "test_event"} # Initialize each plugin listed in config/plugins.yml for plugin_id, orchestration_config in plugins_data.items(): - logger.info(f"๐Ÿ” Processing plugin '{plugin_id}', enabled={orchestration_config.get('enabled', False)}") - if not orchestration_config.get('enabled', False): + logger.info( + f"๐Ÿ” Processing plugin '{plugin_id}', enabled={orchestration_config.get('enabled', False)}" + ) + if not orchestration_config.get("enabled", False): continue try: @@ -603,7 +613,9 @@ def init_plugin_router() -> Optional[PluginRouter]: except Exception as e: logger.error(f"Failed to register plugin '{plugin_id}': {e}", exc_info=True) - logger.info(f"๐ŸŽ‰ Plugin registration complete: {len(_plugin_router.plugins)} plugin(s) registered") + logger.info( + f"๐ŸŽ‰ Plugin registration complete: {len(_plugin_router.plugins)} plugin(s) registered" + ) else: logger.info("No plugins.yml found, plugins disabled") @@ -615,6 +627,31 @@ def init_plugin_router() -> Optional[PluginRouter]: return None +async def ensure_plugin_router() -> Optional[PluginRouter]: + """Get or initialize the plugin router with all plugins initialized. + + This is the standard pattern for worker processes that need the plugin router. + It handles the get-or-init-then-initialize sequence in one call. + + Returns: + Initialized plugin router, or None if no plugins configured + """ + plugin_router = get_plugin_router() + if plugin_router: + return plugin_router + + logger.info("Initializing plugin router in worker process...") + plugin_router = init_plugin_router() + if plugin_router: + for plugin_id, plugin in plugin_router.plugins.items(): + try: + await plugin.initialize() + logger.info(f"Plugin '{plugin_id}' initialized") + except Exception as e: + logger.error(f"Failed to initialize plugin '{plugin_id}': {e}") + return plugin_router + + async def cleanup_plugin_router() -> None: """Clean up the plugin router and all registered plugins.""" global _plugin_router diff --git a/backends/advanced/src/advanced_omi_backend/services/transcription/mock_provider.py b/backends/advanced/src/advanced_omi_backend/services/transcription/mock_provider.py index 04b192df..f596214f 100644 --- a/backends/advanced/src/advanced_omi_backend/services/transcription/mock_provider.py +++ b/backends/advanced/src/advanced_omi_backend/services/transcription/mock_provider.py @@ -6,6 +6,7 @@ """ from typing import Optional + from .base import BatchTranscriptionProvider diff --git a/backends/advanced/src/advanced_omi_backend/services/transcription/streaming_consumer.py b/backends/advanced/src/advanced_omi_backend/services/transcription/streaming_consumer.py index a58e886a..052680d2 100644 --- a/backends/advanced/src/advanced_omi_backend/services/transcription/streaming_consumer.py +++ b/backends/advanced/src/advanced_omi_backend/services/transcription/streaming_consumer.py @@ -19,9 +19,9 @@ import redis.asyncio as redis from redis import exceptions as redis_exceptions +from advanced_omi_backend.client_manager import get_client_owner_async from advanced_omi_backend.plugins.router import PluginRouter from advanced_omi_backend.services.transcription import get_transcription_provider -from advanced_omi_backend.client_manager import get_client_owner_async logger = logging.getLogger(__name__) diff --git a/backends/advanced/src/advanced_omi_backend/speaker_recognition_client.py b/backends/advanced/src/advanced_omi_backend/speaker_recognition_client.py index 97f1cce1..7c14cccd 100644 --- a/backends/advanced/src/advanced_omi_backend/speaker_recognition_client.py +++ b/backends/advanced/src/advanced_omi_backend/speaker_recognition_client.py @@ -40,7 +40,9 @@ def __init__(self, service_url: Optional[str] = None): if os.getenv("USE_MOCK_SPEAKER_CLIENT") == "true": try: # Import mock client from testing module - from advanced_omi_backend.testing.mock_speaker_client import MockSpeakerRecognitionClient + from advanced_omi_backend.testing.mock_speaker_client import ( + MockSpeakerRecognitionClient, + ) self._mock_client = MockSpeakerRecognitionClient() self.enabled = True @@ -331,7 +333,9 @@ async def identify_provider_segments( return {"segments": []} from advanced_omi_backend.config import get_diarization_settings - from advanced_omi_backend.utils.audio_chunk_utils import reconstruct_audio_segment + from advanced_omi_backend.utils.audio_chunk_utils import ( + reconstruct_audio_segment, + ) config = get_diarization_settings() similarity_threshold = config.get("similarity_threshold", 0.15) @@ -852,7 +856,7 @@ async def enroll_new_speaker( try: import uuid - + # Generate speaker ID: user_{user_id}_speaker_{random_hex} speaker_id = f"user_{user_id}_speaker_{uuid.uuid4().hex[:12]}" @@ -963,7 +967,9 @@ async def check_if_enrolled_speaker_present( - enrolled_present: True if enrolled speaker detected, False otherwise - speaker_result: Full speaker recognition result dict with segments """ - from advanced_omi_backend.utils.audio_extraction import extract_audio_for_results + from advanced_omi_backend.utils.audio_extraction import ( + extract_audio_for_results, + ) logger.info(f"๐ŸŽค [SPEAKER CHECK] Starting speaker check for session {session_id}") logger.info(f"๐ŸŽค [SPEAKER CHECK] Client: {client_id}, User: {user_id}") diff --git a/backends/advanced/src/advanced_omi_backend/utils/audio_utils.py b/backends/advanced/src/advanced_omi_backend/utils/audio_utils.py index f8ba07a3..5b5fa992 100644 --- a/backends/advanced/src/advanced_omi_backend/utils/audio_utils.py +++ b/backends/advanced/src/advanced_omi_backend/utils/audio_utils.py @@ -234,6 +234,7 @@ async def write_audio_file( AudioValidationError: If validation fails (when validate=True) """ from easy_audio_interfaces.filesystem.filesystem_interfaces import LocalFileSink + from advanced_omi_backend.config import CHUNK_DIR # Validate and prepare audio if needed @@ -374,8 +375,8 @@ def pcm_to_wav_bytes( Returns: WAV file data as bytes """ - import wave import io + import wave logger.debug( f"Converting PCM to WAV in memory: {len(pcm_data)} bytes " diff --git a/backends/advanced/src/advanced_omi_backend/utils/conversation_utils.py b/backends/advanced/src/advanced_omi_backend/utils/conversation_utils.py index a1f34285..2b69a47f 100644 --- a/backends/advanced/src/advanced_omi_backend/utils/conversation_utils.py +++ b/backends/advanced/src/advanced_omi_backend/utils/conversation_utils.py @@ -9,7 +9,7 @@ import time from datetime import datetime from pathlib import Path -from typing import Optional, Dict, Any, List +from typing import Any, Dict, List, Optional from advanced_omi_backend.config import get_speech_detection_settings from advanced_omi_backend.llm_client import async_generate @@ -178,7 +178,7 @@ async def generate_title(text: str, segments: Optional[list] = None) -> str: if segments: conversation_text = "" for segment in segments[:10]: # Use first 10 segments for title generation - segment_text = segment.get("text", "").strip() + segment_text = segment.text.strip() if segment.text else "" if segment_text: conversation_text += f"{segment_text}\n" text = conversation_text if conversation_text.strip() else text @@ -232,8 +232,8 @@ async def generate_short_summary(text: str, segments: Optional[list] = None) -> formatted_text = "" speakers_in_conv = set() for segment in segments: - speaker = segment.get("speaker", "") - segment_text = segment.get("text", "").strip() + speaker = segment.speaker or "" + segment_text = segment.text.strip() if segment.text else "" if segment_text: if speaker: formatted_text += f"{speaker}: {segment_text}\n" @@ -281,15 +281,6 @@ async def generate_short_summary(text: str, segments: Optional[list] = None) -> ) -# Backward compatibility alias -async def generate_summary(text: str) -> str: - """ - Backward compatibility wrapper for generate_short_summary. - - Deprecated: Use generate_short_summary instead. - """ - return await generate_short_summary(text) - async def generate_detailed_summary( text: str, @@ -324,8 +315,8 @@ async def generate_detailed_summary( formatted_text = "" speakers_in_conv = set() for segment in segments: - speaker = segment.get("speaker", "") - segment_text = segment.get("text", "").strip() + speaker = segment.speaker or "" + segment_text = segment.text.strip() if segment.text else "" if segment_text: if speaker: formatted_text += f"{speaker}: {segment_text}\n" @@ -396,45 +387,18 @@ async def generate_detailed_summary( ) -# Backward compatibility aliases for deprecated speaker-specific methods -async def generate_title_with_speakers(segments: list) -> str: - """ - Deprecated: Use generate_title(text, segments=segments) instead. - - Backward compatibility wrapper. - """ - if not segments: - return "Conversation" - # Extract text from segments for compatibility - text = "\n".join(s.get("text", "") for s in segments if s.get("text")) - return await generate_title(text, segments=segments) - - -async def generate_summary_with_speakers(segments: list) -> str: - """ - Deprecated: Use generate_short_summary(text, segments=segments) instead. - - Backward compatibility wrapper. - """ - if not segments: - return "No content" - # Extract text from segments for compatibility - text = "\n".join(s.get("text", "") for s in segments if s.get("text")) - return await generate_short_summary(text, segments=segments) - - # ============================================================================ # Conversation Job Helpers # ============================================================================ -def extract_speakers_from_segments(segments: List[Dict[str, Any]]) -> List[str]: +def extract_speakers_from_segments(segments: list) -> List[str]: """ Extract unique speaker names from segments. Args: - segments: List of segments with speaker information + segments: List of segments (dict or SpeakerSegment objects) Returns: List of unique speaker names (excluding "Unknown") @@ -442,7 +406,7 @@ def extract_speakers_from_segments(segments: List[Dict[str, Any]]) -> List[str]: speakers = [] if segments: for seg in segments: - speaker = seg.get("speaker", "Unknown") + speaker = seg.get("speaker", "Unknown") if isinstance(seg, dict) else (seg.speaker or "Unknown") if speaker and speaker != "Unknown" and speaker not in speakers: speakers.append(speaker) return speakers diff --git a/backends/advanced/src/advanced_omi_backend/utils/gdrive_audio_utils.py b/backends/advanced/src/advanced_omi_backend/utils/gdrive_audio_utils.py index 785091e1..d9e39163 100644 --- a/backends/advanced/src/advanced_omi_backend/utils/gdrive_audio_utils.py +++ b/backends/advanced/src/advanced_omi_backend/utils/gdrive_audio_utils.py @@ -1,14 +1,15 @@ import io +import logging import tempfile from typing import List -import logging -from starlette.datastructures import UploadFile as StarletteUploadFile + from googleapiclient.http import MediaIoBaseDownload +from starlette.datastructures import UploadFile as StarletteUploadFile + from advanced_omi_backend.clients.gdrive_audio_client import get_google_drive_client from advanced_omi_backend.models.conversation import Conversation from advanced_omi_backend.utils.audio_utils import AudioValidationError - logger = logging.getLogger(__name__) audio_logger = logging.getLogger("audio_processing") diff --git a/backends/advanced/src/advanced_omi_backend/utils/logging_utils.py b/backends/advanced/src/advanced_omi_backend/utils/logging_utils.py index 3d15f49c..d0c69e34 100644 --- a/backends/advanced/src/advanced_omi_backend/utils/logging_utils.py +++ b/backends/advanced/src/advanced_omi_backend/utils/logging_utils.py @@ -7,7 +7,6 @@ import re from typing import Any, Dict, List, Optional, Set, Union - # Common patterns for identifying secret field names SECRET_KEYWORDS = [ 'PASSWORD', 'PASSWD', 'PWD', diff --git a/backends/advanced/src/advanced_omi_backend/utils/model_utils.py b/backends/advanced/src/advanced_omi_backend/utils/model_utils.py index 97b0cba5..96042ba0 100644 --- a/backends/advanced/src/advanced_omi_backend/utils/model_utils.py +++ b/backends/advanced/src/advanced_omi_backend/utils/model_utils.py @@ -5,7 +5,7 @@ from config.yml that can be used across different LLM services. """ -from typing import Dict, Any +from typing import Any, Dict def get_model_config(config_data: Dict[str, Any], model_role: str) -> Dict[str, Any]: diff --git a/backends/advanced/src/advanced_omi_backend/workers/__init__.py b/backends/advanced/src/advanced_omi_backend/workers/__init__.py index ea82056b..d4792805 100644 --- a/backends/advanced/src/advanced_omi_backend/workers/__init__.py +++ b/backends/advanced/src/advanced_omi_backend/workers/__init__.py @@ -11,16 +11,29 @@ Queue configuration and utilities are in controllers/queue_controller.py """ -# Import from transcription_jobs -from .transcription_jobs import ( - transcribe_full_audio_job, - stream_speech_detection_job, +# Import from queue_controller +from advanced_omi_backend.controllers.queue_controller import ( + DEFAULT_QUEUE, + JOB_RESULT_TTL, + MEMORY_QUEUE, + REDIS_URL, + TRANSCRIPTION_QUEUE, + default_queue, + get_job_stats, + get_jobs, + get_queue, + get_queue_health, + memory_queue, + redis_conn, + transcription_queue, ) -# Import from speaker_jobs -from .speaker_jobs import ( - check_enrolled_speakers_job, - recognise_speakers_job, +# Import from job models +from advanced_omi_backend.models.job import _ensure_beanie_initialized + +# Import from audio_jobs +from .audio_jobs import ( + audio_streaming_persistence_job, ) # Import from conversation_jobs @@ -30,35 +43,22 @@ # Import from memory_jobs from .memory_jobs import ( - process_memory_job, enqueue_memory_processing, + process_memory_job, ) -# Import from audio_jobs -from .audio_jobs import ( - audio_streaming_persistence_job, +# Import from speaker_jobs +from .speaker_jobs import ( + check_enrolled_speakers_job, + recognise_speakers_job, ) -# Import from queue_controller -from advanced_omi_backend.controllers.queue_controller import ( - get_queue, - get_job_stats, - get_jobs, - get_queue_health, - transcription_queue, - memory_queue, - default_queue, - redis_conn, - REDIS_URL, - JOB_RESULT_TTL, - TRANSCRIPTION_QUEUE, - MEMORY_QUEUE, - DEFAULT_QUEUE, +# Import from transcription_jobs +from .transcription_jobs import ( + stream_speech_detection_job, + transcribe_full_audio_job, ) -# Import from job models -from advanced_omi_backend.models.job import _ensure_beanie_initialized - __all__ = [ # Transcription jobs "transcribe_full_audio_job", diff --git a/backends/advanced/src/advanced_omi_backend/workers/annotation_jobs.py b/backends/advanced/src/advanced_omi_backend/workers/annotation_jobs.py index 8ad1a8ba..3681ab5f 100644 --- a/backends/advanced/src/advanced_omi_backend/workers/annotation_jobs.py +++ b/backends/advanced/src/advanced_omi_backend/workers/annotation_jobs.py @@ -9,7 +9,7 @@ """ import logging -from datetime import datetime, timezone, timedelta +from datetime import datetime, timedelta, timezone from typing import List from advanced_omi_backend.models.annotation import ( diff --git a/backends/advanced/src/advanced_omi_backend/workers/audio_jobs.py b/backends/advanced/src/advanced_omi_backend/workers/audio_jobs.py index de563069..f124feb8 100644 --- a/backends/advanced/src/advanced_omi_backend/workers/audio_jobs.py +++ b/backends/advanced/src/advanced_omi_backend/workers/audio_jobs.py @@ -5,18 +5,20 @@ """ import asyncio -import os import logging +import os import time -from typing import Dict, Any, Optional - -from advanced_omi_backend.models.job import JobPriority, async_job +from typing import Any, Dict, Optional from advanced_omi_backend.controllers.queue_controller import ( - default_queue, JOB_RESULT_TTL, + default_queue, +) +from advanced_omi_backend.models.job import ( + JobPriority, + _ensure_beanie_initialized, + async_job, ) -from advanced_omi_backend.models.job import _ensure_beanie_initialized logger = logging.getLogger(__name__) @@ -81,6 +83,25 @@ async def audio_streaming_persistence_job( conversation_key = f"conversation:current:{session_id}" existing_conversation_id = await redis_client.get(conversation_key) + # Guard against stale Redis keys: the conversation:current key has a 1-hour + # TTL and can survive container rebuilds (Redis uses appendonly persistence + # with a bind mount). If the key points to a MongoDB document that was deleted + # (e.g., data directory cleared during rebuild), we must create a fresh + # placeholder instead of silently reusing a non-existent conversation. + if existing_conversation_id: + existing_id_str = existing_conversation_id.decode() + from advanced_omi_backend.models.conversation import Conversation + existing_conv = await Conversation.find_one( + Conversation.conversation_id == existing_id_str + ) + if not existing_conv: + logger.warning( + f"โš ๏ธ Stale Redis key: conversation {existing_id_str} not found in MongoDB. " + f"Clearing key and creating fresh placeholder." + ) + await redis_client.delete(conversation_key) + existing_conversation_id = None + if not existing_conversation_id: logger.info( f"๐Ÿ“ always_persist=True - creating placeholder conversation for session {session_id[:12]}" @@ -129,10 +150,11 @@ async def audio_streaming_persistence_job( start_time = time.time() # Import MongoDB chunk utilities + from bson import Binary + from advanced_omi_backend.models.audio_chunk import AudioChunkDocument from advanced_omi_backend.models.conversation import Conversation from advanced_omi_backend.utils.audio_chunk_utils import encode_pcm_to_opus - from bson import Binary # Conversation rotation state current_conversation_id = None @@ -162,20 +184,23 @@ async def audio_streaming_persistence_job( # Get current job for zombie detection from rq import get_current_job + from advanced_omi_backend.utils.job_utils import check_job_alive current_job = get_current_job() - async def flush_pcm_buffer(): + async def flush_pcm_buffer() -> bool: """ Flush current PCM buffer to MongoDB as Opus-compressed chunk. Updates conversation metadata with chunk count and compression stats. + Returns True on success, False on failure. On failure the buffer is + NOT cleared so the caller can retry on the next incoming message. """ nonlocal pcm_buffer, chunk_index, chunk_start_time nonlocal total_pcm_bytes, total_compressed_bytes, total_mongo_chunks_written if len(pcm_buffer) == 0 or not current_conversation_id: - return + return True try: # Encode PCM โ†’ Opus @@ -244,8 +269,11 @@ async def flush_pcm_buffer(): f"{chunk_index + 1} chunks, {total_duration:.1f}s total" ) + return True + except Exception as e: logger.error(f"โŒ Failed to save audio chunk {chunk_index}: {e}", exc_info=True) + return False while True: # Check if job still exists in Redis (detect zombie state) @@ -290,11 +318,10 @@ async def flush_pcm_buffer(): # Flush if buffer reaches chunk size if len(pcm_buffer) >= CHUNK_SIZE_BYTES: - await flush_pcm_buffer() - # Reset for next chunk - pcm_buffer = bytearray() - chunk_index += 1 - chunk_start_time += CHUNK_DURATION_SECONDS + if await flush_pcm_buffer(): + pcm_buffer = bytearray() + chunk_index += 1 + chunk_start_time += CHUNK_DURATION_SECONDS await redis_client.xack(audio_stream_name, audio_group_name, message_id) @@ -320,11 +347,17 @@ async def flush_pcm_buffer(): if new_conversation_id != current_conversation_id: # Flush remaining buffer from previous conversation if len(pcm_buffer) > 0 and current_conversation_id: - await flush_pcm_buffer() - logger.info( - f"โœ… Finalized conversation {current_conversation_id[:12]}: " - f"{chunk_index + 1} chunks saved to MongoDB" - ) + if await flush_pcm_buffer(): + logger.info( + f"โœ… Finalized conversation {current_conversation_id[:12]}: " + f"{chunk_index + 1} chunks saved to MongoDB" + ) + else: + logger.warning( + f"โš ๏ธ Failed to flush final chunk for conversation " + f"{current_conversation_id[:12]} during rotation โ€” " + f"{len(pcm_buffer)} bytes lost" + ) # Start new conversation current_conversation_id = new_conversation_id @@ -388,12 +421,13 @@ async def flush_pcm_buffer(): # Flush if buffer reaches 10-second chunk size if len(pcm_buffer) >= CHUNK_SIZE_BYTES: - await flush_pcm_buffer() - - # Reset for next chunk - pcm_buffer = bytearray() - chunk_index += 1 - chunk_start_time += CHUNK_DURATION_SECONDS + if await flush_pcm_buffer(): + # Reset for next chunk only on success; + # on failure the buffer is retained and + # the next message triggers a retry. + pcm_buffer = bytearray() + chunk_index += 1 + chunk_start_time += CHUNK_DURATION_SECONDS # ACK the message await redis_client.xack(audio_stream_name, audio_group_name, message_id) diff --git a/backends/advanced/src/advanced_omi_backend/workers/audio_stream_worker.py b/backends/advanced/src/advanced_omi_backend/workers/audio_stream_worker.py index df133de4..dcad10dc 100644 --- a/backends/advanced/src/advanced_omi_backend/workers/audio_stream_worker.py +++ b/backends/advanced/src/advanced_omi_backend/workers/audio_stream_worker.py @@ -17,9 +17,11 @@ import redis.asyncio as redis -from advanced_omi_backend.services.plugin_service import init_plugin_router -from advanced_omi_backend.services.transcription.streaming_consumer import StreamingTranscriptionConsumer from advanced_omi_backend.client_manager import initialize_redis_for_client_manager +from advanced_omi_backend.services.plugin_service import init_plugin_router +from advanced_omi_backend.services.transcription.streaming_consumer import ( + StreamingTranscriptionConsumer, +) logging.basicConfig( level=logging.INFO, diff --git a/backends/advanced/src/advanced_omi_backend/workers/cleanup_jobs.py b/backends/advanced/src/advanced_omi_backend/workers/cleanup_jobs.py index 65240af4..99ea5869 100644 --- a/backends/advanced/src/advanced_omi_backend/workers/cleanup_jobs.py +++ b/backends/advanced/src/advanced_omi_backend/workers/cleanup_jobs.py @@ -8,11 +8,11 @@ from datetime import datetime, timedelta from typing import Optional -from advanced_omi_backend.models.conversation import Conversation +from advanced_omi_backend.config import CleanupSettings, get_cleanup_settings from advanced_omi_backend.models.audio_chunk import AudioChunkDocument -from advanced_omi_backend.models.waveform import WaveformData +from advanced_omi_backend.models.conversation import Conversation from advanced_omi_backend.models.job import async_job -from advanced_omi_backend.config import get_cleanup_settings, CleanupSettings +from advanced_omi_backend.models.waveform import WaveformData logger = logging.getLogger(__name__) diff --git a/backends/advanced/src/advanced_omi_backend/workers/conversation_jobs.py b/backends/advanced/src/advanced_omi_backend/workers/conversation_jobs.py index 69927da4..6e10122f 100644 --- a/backends/advanced/src/advanced_omi_backend/workers/conversation_jobs.py +++ b/backends/advanced/src/advanced_omi_backend/workers/conversation_jobs.py @@ -6,31 +6,33 @@ import asyncio import logging -import time, os +import os +import time from datetime import datetime -from typing import Dict, Any, Optional -from rq.job import Job +from typing import Any, Dict, Optional + from rq.exceptions import NoSuchJobError +from rq.job import Job -from advanced_omi_backend.models.job import async_job -from advanced_omi_backend.controllers.queue_controller import redis_conn +from advanced_omi_backend.controllers.queue_controller import ( + redis_conn, + start_post_conversation_jobs, +) from advanced_omi_backend.controllers.session_controller import mark_session_complete -from advanced_omi_backend.services.plugin_service import get_plugin_router, init_plugin_router -from datetime import datetime - +from advanced_omi_backend.models.job import async_job +from advanced_omi_backend.services.plugin_service import ( + ensure_plugin_router, + get_plugin_router, +) from advanced_omi_backend.utils.conversation_utils import ( analyze_speech, extract_speakers_from_segments, - track_speech_activity, - update_job_progress_metadata, -) -from advanced_omi_backend.utils.conversation_utils import ( is_meaningful_speech, mark_conversation_deleted, + track_speech_activity, + update_job_progress_metadata, ) -from advanced_omi_backend.controllers.queue_controller import start_post_conversation_jobs - logger = logging.getLogger(__name__) @@ -98,9 +100,10 @@ async def handle_end_of_conversation( logger.info(f"๐Ÿงน Deleted conversation:current signal for session {session_id[:12]}") # Update conversation in database with end reason and completion time - from advanced_omi_backend.models.conversation import Conversation from datetime import datetime + from advanced_omi_backend.models.conversation import Conversation + conversation = await Conversation.find_one(Conversation.conversation_id == conversation_id) if conversation: # Convert string to enum @@ -112,7 +115,9 @@ async def handle_end_of_conversation( conversation.completed_at = datetime.utcnow() await conversation.save() - logger.info(f"๐Ÿ’พ Saved conversation {conversation_id[:12]} end_reason: {conversation.end_reason}") + logger.info( + f"๐Ÿ’พ Saved conversation {conversation_id[:12]} end_reason: {conversation.end_reason}" + ) else: logger.warning(f"โš ๏ธ Conversation {conversation_id} not found for end reason tracking") @@ -137,11 +142,13 @@ async def handle_end_of_conversation( ) from advanced_omi_backend.controllers.queue_controller import ( - transcription_queue, - redis_conn, JOB_RESULT_TTL, + redis_conn, + transcription_queue, + ) + from advanced_omi_backend.workers.transcription_jobs import ( + stream_speech_detection_job, ) - from advanced_omi_backend.workers.transcription_jobs import stream_speech_detection_job # Enqueue speech detection job for next conversation (audio persistence keeps running) speech_job = transcription_queue.enqueue( @@ -158,7 +165,9 @@ async def handle_end_of_conversation( # Store job ID for cleanup (keyed by client_id for WebSocket cleanup) try: - redis_conn.set(f"speech_detection_job:{client_id}", speech_job.id, ex=86400) # 24 hours + redis_conn.set( + f"speech_detection_job:{client_id}", speech_job.id, ex=86400 + ) # 24 hours logger.info(f"๐Ÿ“Œ Stored speech detection job ID for client {client_id}") except Exception as e: logger.warning(f"โš ๏ธ Failed to store job ID for {client_id}: {e}") @@ -210,10 +219,16 @@ async def open_conversation_job( Note: user_email is fetched from the database when needed. """ - from advanced_omi_backend.services.audio_stream import TranscriptionResultsAggregator - from advanced_omi_backend.models.conversation import Conversation, create_conversation from rq import get_current_job + from advanced_omi_backend.models.conversation import ( + Conversation, + create_conversation, + ) + from advanced_omi_backend.services.audio_stream import ( + TranscriptionResultsAggregator, + ) + logger.info( f"๐Ÿ“ Creating and opening conversation for session {session_id} (speech detected at {speech_detected_at})" ) @@ -227,7 +242,9 @@ async def open_conversation_job( conversation_key = f"conversation:current:{session_id}" existing_conversation_id_bytes = await redis_client.get(conversation_key) - logger.info(f"๐Ÿ” Checking for placeholder: key={conversation_key}, found={existing_conversation_id_bytes is not None}") + logger.info( + f"๐Ÿ” Checking for placeholder: key={conversation_key}, found={existing_conversation_id_bytes is not None}" + ) conversation = None if existing_conversation_id_bytes: @@ -240,8 +257,8 @@ async def open_conversation_job( ) if conversation: - always_persist = getattr(conversation, 'always_persist', False) - processing_status = getattr(conversation, 'processing_status', None) + always_persist = getattr(conversation, "always_persist", False) + processing_status = getattr(conversation, "processing_status", None) logger.info( f"๐Ÿ” Found conversation in DB: always_persist={always_persist}, " f"processing_status={processing_status}" @@ -250,8 +267,11 @@ async def open_conversation_job( logger.warning(f"โš ๏ธ Conversation {existing_conversation_id} not found in database!") # Verify it's a placeholder conversation (always_persist=True, processing_status='pending_transcription') - if conversation and getattr(conversation, 'always_persist', False) and \ - getattr(conversation, 'processing_status', None) == 'pending_transcription': + if ( + conversation + and getattr(conversation, "always_persist", False) + and getattr(conversation, "processing_status", None) == "pending_transcription" + ): logger.info( f"๐Ÿ”„ Reusing placeholder conversation {conversation.conversation_id} for session {session_id}" ) @@ -315,7 +335,7 @@ async def open_conversation_job( ) else: raise - + # Signal audio persistence job to rotate to this conversation's file rotation_signal_key = f"conversation:current:{session_id}" await redis_client.set(rotation_signal_key, conversation_id, ex=86400) # 24 hour TTL @@ -337,9 +357,13 @@ async def open_conversation_job( # Inactivity timeout configuration inactivity_timeout_seconds = float(os.getenv("SPEECH_INACTIVITY_THRESHOLD_SECONDS", "60")) inactivity_timeout_minutes = inactivity_timeout_seconds / 60 - last_meaningful_speech_time = 0.0 # Initialize with audio time 0 (will be updated with first speech) + last_meaningful_speech_time = ( + 0.0 # Initialize with audio time 0 (will be updated with first speech) + ) timeout_triggered = False # Track if closure was due to timeout - last_inactivity_log_time = time.time() # Track when we last logged inactivity (wall-clock for logging) + last_inactivity_log_time = ( + time.time() + ) # Track when we last logged inactivity (wall-clock for logging) last_word_count = 0 # Track word count to detect actual new speech # Test mode: wait for audio queue to drain before timing out @@ -355,6 +379,7 @@ async def open_conversation_job( while True: # Check if job still exists in Redis (detect zombie state) from advanced_omi_backend.utils.job_utils import check_job_alive + if not await check_job_alive(redis_client, current_job, session_id): break @@ -368,7 +393,9 @@ async def open_conversation_job( # Get completion reason (guaranteed to exist with unified API) completion_reason = await redis_client.hget(session_key, "completion_reason") - completion_reason_str = completion_reason.decode() if completion_reason else "unknown" + completion_reason_str = ( + completion_reason.decode() if completion_reason else "unknown" + ) if completion_reason_str == "websocket_disconnect": logger.warning( @@ -394,7 +421,6 @@ async def open_conversation_job( # Analyze speech content using detailed analysis - transcript_data = {"text": combined["text"], "words": combined.get("words", [])} speech_analysis = analyze_speech(transcript_data) @@ -419,7 +445,9 @@ async def open_conversation_job( start = seg.get("start", 0.0) end = seg.get("end", 0.0) if end <= start: - logger.debug(f"Segment {i} has invalid timing (start={start}, end={end}), correcting") + logger.debug( + f"Segment {i} has invalid timing (start={start}, end={end}), correcting" + ) # Auto-correct: estimate duration from text length estimated_duration = len(text.split()) * 0.5 # ~0.5 seconds per word seg["end"] = start + estimated_duration @@ -519,15 +547,15 @@ async def open_conversation_job( plugin_router = get_plugin_router() if plugin_router: # Get the latest transcript text for plugin processing - transcript_text = combined.get('text', '') + transcript_text = combined.get("text", "") if transcript_text: plugin_data = { - 'transcript': transcript_text, - 'segment_id': f"{session_id}_{current_count}", - 'conversation_id': conversation_id, - 'segments': validated_segments, - 'word_count': speech_analysis.get('word_count', 0), + "transcript": transcript_text, + "segment_id": f"{session_id}_{current_count}", + "conversation_id": conversation_id, + "segments": validated_segments, + "word_count": speech_analysis.get("word_count", 0), } logger.info( @@ -536,10 +564,10 @@ async def open_conversation_job( ) plugin_results = await plugin_router.dispatch_event( - event='transcript.streaming', + event="transcript.streaming", user_id=user_id, data=plugin_data, - metadata={'client_id': client_id} + metadata={"client_id": client_id}, ) logger.info( @@ -547,7 +575,9 @@ async def open_conversation_job( ) if plugin_results: - logger.info(f"๐Ÿ“Œ Triggered {len(plugin_results)} streaming transcript plugins") + logger.info( + f"๐Ÿ“Œ Triggered {len(plugin_results)} streaming transcript plugins" + ) for result in plugin_results: if result.message: logger.info(f" Plugin: {result.message}") @@ -591,7 +621,9 @@ async def open_conversation_job( # This prevents empty/noise-only conversations from being processed and saved # NOTE: Speech was already validated during streaming, so we skip this check # to avoid false negatives from aggregated results lacking proper word-level data - logger.info("โœ… Conversation has meaningful speech (validated during streaming), proceeding with post-processing") + logger.info( + "โœ… Conversation has meaningful speech (validated during streaming), proceeding with post-processing" + ) # Wait for streaming transcription consumer to complete before reading transcript # This fixes the race condition where conversation job reads transcript before @@ -602,9 +634,15 @@ async def open_conversation_job( while waited_streaming < max_wait_streaming: completion_status = await redis_client.get(completion_key) if completion_status: - status_str = completion_status.decode() if isinstance(completion_status, bytes) else completion_status + status_str = ( + completion_status.decode() + if isinstance(completion_status, bytes) + else completion_status + ) if status_str == "error": - logger.warning(f"โš ๏ธ Streaming transcription ended with error for {session_id}, proceeding anyway") + logger.warning( + f"โš ๏ธ Streaming transcription ended with error for {session_id}, proceeding anyway" + ) else: logger.info(f"โœ… Streaming transcription confirmed complete for {session_id}") break @@ -667,7 +705,7 @@ async def open_conversation_job( word=w.get("word", ""), start=w.get("start", 0.0), end=w.get("end", 0.0), - confidence=w.get("confidence") + confidence=w.get("confidence"), ) for w in words_data ] @@ -693,11 +731,14 @@ async def open_conversation_job( "chunk_count": final_transcript.get("chunk_count", 0), "word_count": len(words), }, - set_as_active=True + set_as_active=True, ) # Update placeholder conversation if it exists - if getattr(conversation, 'always_persist', False) and getattr(conversation, 'processing_status', None) == "pending_transcription": + if ( + getattr(conversation, "always_persist", False) + and getattr(conversation, "processing_status", None) == "pending_transcription" + ): # Keep placeholder status - will be updated by title_summary_job logger.info( f"๐Ÿ“ Placeholder conversation {conversation_id} has transcript, " @@ -722,7 +763,7 @@ async def open_conversation_job( transcript_version_id=version_id, # Pass the streaming transcript version ID depends_on_job=None, # No dependency - streaming already succeeded client_id=client_id, # Pass client_id for UI tracking - end_reason=end_reason # Pass the determined end_reason (websocket_disconnect, inactivity_timeout, etc.) + end_reason=end_reason, # Pass the determined end_reason (websocket_disconnect, inactivity_timeout, etc.) ) logger.info( @@ -774,9 +815,9 @@ async def generate_title_summary_job(conversation_id: str, *, redis_client=None) """ from advanced_omi_backend.models.conversation import Conversation from advanced_omi_backend.utils.conversation_utils import ( - generate_title, - generate_short_summary, generate_detailed_summary, + generate_short_summary, + generate_title, ) logger.info(f"๐Ÿ“ Starting title/summary generation for conversation {conversation_id}") @@ -807,14 +848,6 @@ async def generate_title_summary_job(conversation_id: str, *, redis_client=None) f"๐Ÿค– Generating title/summary/detailed_summary using LLM for conversation {conversation_id}" ) - # Convert segments to dict format expected by utils - segment_dicts = None - if segments and len(segments) > 0: - segment_dicts = [ - {"speaker": seg.speaker, "text": seg.text, "start": seg.start, "end": seg.end} - for seg in segments - ] - # Fetch memory context for richer detailed summaries # Use the entire transcript as the search query for best semantic matching # so all key topics/entities in the conversation can find relevant memories @@ -840,10 +873,10 @@ async def generate_title_summary_job(conversation_id: str, *, redis_client=None) import asyncio title, short_summary, detailed_summary = await asyncio.gather( - generate_title(transcript_text, segments=segment_dicts), - generate_short_summary(transcript_text, segments=segment_dicts), + generate_title(transcript_text, segments=segments), + generate_short_summary(transcript_text, segments=segments), generate_detailed_summary( - transcript_text, segments=segment_dicts, memory_context=memory_context + transcript_text, segments=segments, memory_context=memory_context ), ) @@ -856,7 +889,7 @@ async def generate_title_summary_job(conversation_id: str, *, redis_client=None) logger.info(f"โœ… Generated detailed summary: {len(conversation.detailed_summary)} chars") # Update processing status for placeholder conversations - if getattr(conversation, 'processing_status', None) == "pending_transcription": + if getattr(conversation, "processing_status", None) == "pending_transcription": conversation.processing_status = "completed" logger.info( f"โœ… Updated placeholder conversation {conversation_id} " @@ -867,7 +900,7 @@ async def generate_title_summary_job(conversation_id: str, *, redis_client=None) logger.error(f"โŒ Title/summary generation failed: {gen_error}") # Mark placeholder conversation as failed - if getattr(conversation, 'processing_status', None) == "pending_transcription": + if getattr(conversation, "processing_status", None) == "pending_transcription": conversation.title = "Audio Recording (Transcription Failed)" conversation.summary = f"Title/summary generation failed: {str(gen_error)}" conversation.processing_status = "transcription_failed" @@ -931,7 +964,7 @@ async def dispatch_conversation_complete_event_job( user_id: str, end_reason: Optional[str] = None, *, - redis_client=None + redis_client=None, ) -> Dict[str, Any]: """ Dispatch conversation.complete plugin event for all conversation sources. @@ -976,35 +1009,19 @@ async def dispatch_conversation_complete_event_job( conversation.completed_at = datetime.utcnow() await conversation.save() - logger.info(f"๐Ÿ’พ Saved end_reason={conversation.end_reason} to conversation {conversation_id[:12]} in event dispatch job") + logger.info( + f"๐Ÿ’พ Saved end_reason={conversation.end_reason} to conversation {conversation_id[:12]} in event dispatch job" + ) # Get user email for event data from advanced_omi_backend.models.user import User + user = await User.get(user_id) user_email = user.email if user else "" # Prepare plugin event data (same format as open_conversation_job) try: - # Get or initialize plugin router (same pattern as transcription_jobs.py) - plugin_router = get_plugin_router() - - if not plugin_router: - logger.warning("๐Ÿ”ง Plugin router not found in worker process - attempting initialization...") - plugin_router = init_plugin_router() - - if plugin_router: - logger.info(f"๐Ÿ”ง Plugin router initialized with {len(plugin_router.plugins)} plugin(s)") - - # Initialize all plugins - for plugin_id, plugin in plugin_router.plugins.items(): - try: - logger.info(f" Initializing plugin '{plugin_id}'...") - await plugin.initialize() - logger.info(f" โœ“ Plugin '{plugin_id}' initialized") - except Exception as e: - logger.error(f" โœ— Failed to initialize plugin '{plugin_id}': {e}", exc_info=True) - else: - logger.error("๐Ÿ”ง Plugin router initialization FAILED - router is None") + plugin_router = await ensure_plugin_router() # CRITICAL CHECK: Fail loudly if no router if not plugin_router: @@ -1019,21 +1036,21 @@ async def dispatch_conversation_complete_event_job( "skipped": True, "reason": "No plugin router", "conversation_id": conversation_id, - "error": error_msg + "error": error_msg, } plugin_data = { - 'conversation': { - 'client_id': client_id, - 'user_id': user_id, + "conversation": { + "client_id": client_id, + "user_id": user_id, }, - 'transcript': conversation.transcript if conversation else "", - 'duration': 0, # Duration not tracked for file uploads - 'conversation_id': conversation_id, + "transcript": conversation.transcript if conversation else "", + "duration": 0, # Duration not tracked for file uploads + "conversation_id": conversation_id, } # Use provided end_reason or default to 'file_upload' for backward compatibility - actual_end_reason = end_reason or 'file_upload' + actual_end_reason = end_reason or "file_upload" logger.info( f"๐Ÿ”Œ DISPATCH: conversation.complete event for {conversation_id[:12]} " @@ -1041,10 +1058,10 @@ async def dispatch_conversation_complete_event_job( ) plugin_results = await plugin_router.dispatch_event( - event='conversation.complete', + event="conversation.complete", user_id=user_id, data=plugin_data, - metadata={'end_reason': actual_end_reason} + metadata={"end_reason": actual_end_reason}, ) logger.info( diff --git a/backends/advanced/src/advanced_omi_backend/workers/memory_jobs.py b/backends/advanced/src/advanced_omi_backend/workers/memory_jobs.py index 1dafc823..9c227bd9 100644 --- a/backends/advanced/src/advanced_omi_backend/workers/memory_jobs.py +++ b/backends/advanced/src/advanced_omi_backend/workers/memory_jobs.py @@ -7,19 +7,19 @@ import logging import time import uuid -from datetime import UTC, datetime from typing import Any, Dict from advanced_omi_backend.controllers.queue_controller import ( JOB_RESULT_TTL, memory_queue, ) -from advanced_omi_backend.models.job import BaseRQJob, JobPriority, async_job -from advanced_omi_backend.services.memory.base import MemoryEntry -from advanced_omi_backend.services.plugin_service import get_plugin_router, init_plugin_router +from advanced_omi_backend.models.job import JobPriority, async_job +from advanced_omi_backend.services.plugin_service import ensure_plugin_router logger = logging.getLogger(__name__) +MIN_CONVERSATION_LENGTH = 10 + @async_job(redis=True, beanie=True) async def process_memory_job(conversation_id: str, *, redis_client=None) -> Dict[str, Any]: @@ -73,48 +73,38 @@ async def process_memory_job(conversation_id: str, *, redis_client=None) -> Dict f"๐Ÿ”„ Processing memory for conversation {conversation_id}, client={client_id}, user={user_id}" ) - # Extract conversation text from transcript segments - full_conversation = "" + # Extract conversation text and speakers from transcript segments in a single pass + dialogue_lines = [] + transcript_speakers = set() segments = conversation_model.segments if segments: - dialogue_lines = [] for segment in segments: - # Handle both dict and object segments - if isinstance(segment, dict): - text = segment.get("text", "").strip() - speaker = segment.get("speaker", "Unknown") - else: - text = getattr(segment, "text", "").strip() - speaker = getattr(segment, "speaker", "Unknown") - + text = segment.text.strip() + speaker = segment.speaker if text: dialogue_lines.append(f"{speaker}: {text}") - full_conversation = "\n".join(dialogue_lines) + if speaker and speaker != "Unknown": + transcript_speakers.add(speaker.strip().lower()) + full_conversation = "\n".join(dialogue_lines) # Fallback: if segments have no text content but transcript exists, use transcript # This handles cases where speaker recognition fails/is disabled - if len(full_conversation) < 10 and conversation_model.transcript and isinstance(conversation_model.transcript, str): - logger.info(f"Segments empty or too short, falling back to transcript text for {conversation_id}") + if ( + len(full_conversation) < MIN_CONVERSATION_LENGTH + and conversation_model.transcript + and isinstance(conversation_model.transcript, str) + ): + logger.info( + f"Segments empty or too short, falling back to transcript text for {conversation_id}" + ) full_conversation = conversation_model.transcript - if len(full_conversation) < 10: + if len(full_conversation) < MIN_CONVERSATION_LENGTH: logger.warning(f"Conversation too short for memory processing: {conversation_id}") return {"success": False, "error": "Conversation too short"} - # Check primary speakers filter - user = await get_user_by_id(user_id) + # Check primary speakers filter (reuse `user` from above โ€” no duplicate DB call) if user and user.primary_speakers: - transcript_speakers = set() - for segment in conversation_model.segments: - # Handle both dict and object segments - if isinstance(segment, dict): - identified_as = segment.get("identified_as") - else: - identified_as = getattr(segment, "identified_as", None) - - if identified_as and identified_as != "Unknown": - transcript_speakers.add(identified_as.strip().lower()) - primary_speaker_names = {ps["name"].strip().lower() for ps in user.primary_speakers} if transcript_speakers and not transcript_speakers.intersection(primary_speaker_names): @@ -141,14 +131,7 @@ async def process_memory_job(conversation_id: str, *, redis_client=None) -> Dict processing_time = time.time() - start_time # Determine memory provider from memory service - memory_provider = "chronicle" # Default - try: - memory_service_obj = get_memory_service() - provider_name = memory_service_obj.__class__.__name__ - if "OpenMemory" in provider_name: - memory_provider = "openmemory_mcp" - except Exception: - pass + memory_provider = memory_service.provider_identifier # Only create memory version if new memories were created if created_memory_ids: @@ -158,7 +141,9 @@ async def process_memory_job(conversation_id: str, *, redis_client=None) -> Dict ) if conversation_model: # Get active transcript version for reference - transcript_version_id = conversation_model.active_transcript_version or "unknown" + transcript_version_id = ( + conversation_model.active_transcript_version or "unknown" + ) # Create version ID for this memory extraction version_id = str(uuid.uuid4()) @@ -168,7 +153,11 @@ async def process_memory_job(conversation_id: str, *, redis_client=None) -> Dict version_id=version_id, memory_count=len(created_memory_ids), transcript_version_id=transcript_version_id, - provider=conversation_model.MemoryProvider.OPENMEMORY_MCP if memory_provider == "openmemory_mcp" else conversation_model.MemoryProvider.CHRONICLE, + provider=( + conversation_model.MemoryProvider.OPENMEMORY_MCP + if memory_provider == "openmemory_mcp" + else conversation_model.MemoryProvider.CHRONICLE + ), processing_time_seconds=processing_time, metadata={"memory_ids": created_memory_ids}, set_as_active=True, @@ -193,37 +182,8 @@ async def process_memory_job(conversation_id: str, *, redis_client=None) -> Dict for memory_id in created_memory_ids[:5]: # Limit to first 5 for display memory_entry = await memory_service.get_memory(memory_id, user_id) if memory_entry: - # Handle different return types from memory service - memory_text: str - if isinstance(memory_entry, MemoryEntry): - # MemoryEntry object with content attribute - memory_text = memory_entry.content - elif isinstance(memory_entry, dict): - # Dictionary with "content" key - if "content" in memory_entry: - memory_text = memory_entry["content"] - else: - logger.error( - f"Dict memory entry missing 'content' key for {memory_id}: {list(memory_entry.keys())}" - ) - raise ValueError( - f"Dict memory entry missing 'content' key for memory {memory_id}" - ) - elif isinstance(memory_entry, str): - # String content directly - memory_text = memory_entry - else: - # Unexpected type - logger.error( - f"Unexpected memory entry type for {memory_id}: {type(memory_entry).__name__}" - ) - raise TypeError( - f"Unexpected memory entry type: {type(memory_entry).__name__}" - ) - - # Truncate to 200 chars memory_details.append( - {"memory_id": memory_id, "text": memory_text[:200]} + {"memory_id": memory_id, "text": memory_entry.content[:200]} ) except Exception as e: logger.warning(f"Failed to fetch memory details for UI: {e}") @@ -250,18 +210,27 @@ async def process_memory_job(conversation_id: str, *, redis_client=None) -> Dict # Extract entities and relationships to knowledge graph (if enabled) try: from advanced_omi_backend.model_registry import get_config + config = get_config() - kg_enabled = config.get("memory", {}).get("knowledge_graph", {}).get("enabled", False) + kg_enabled = ( + config.get("memory", {}).get("knowledge_graph", {}).get("enabled", False) + ) if kg_enabled: - from advanced_omi_backend.services.knowledge_graph import get_knowledge_graph_service + from advanced_omi_backend.services.knowledge_graph import ( + get_knowledge_graph_service, + ) kg_service = get_knowledge_graph_service() kg_result = await kg_service.process_conversation( conversation_id=conversation_id, transcript=full_conversation, user_id=user_id, - conversation_name=conversation_model.title if hasattr(conversation_model, 'title') else None, + conversation_name=( + conversation_model.title + if hasattr(conversation_model, "title") + else None + ), ) if kg_result.get("entities", 0) > 0: logger.info( @@ -277,32 +246,19 @@ async def process_memory_job(conversation_id: str, *, redis_client=None) -> Dict # Trigger memory-level plugins (ALWAYS dispatch when success, even with 0 new memories) try: - # Get or initialize plugin router (same pattern as conversation_jobs.py) - plugin_router = get_plugin_router() - if not plugin_router: - logger.info("๐Ÿ”ง Initializing plugin router in worker process...") - plugin_router = init_plugin_router() - - # Initialize all plugins asynchronously (same as app_factory.py) - if plugin_router: - for plugin_id, plugin in plugin_router.plugins.items(): - try: - await plugin.initialize() - logger.info(f"โœ… Plugin '{plugin_id}' initialized") - except Exception as e: - logger.error(f"Failed to initialize plugin '{plugin_id}': {e}") + plugin_router = await ensure_plugin_router() if plugin_router: plugin_data = { - 'memories': created_memory_ids or [], - 'conversation': { - 'conversation_id': conversation_id, - 'client_id': client_id, - 'user_id': user_id, - 'user_email': user_email, + "memories": created_memory_ids or [], + "conversation": { + "conversation_id": conversation_id, + "client_id": client_id, + "user_id": user_id, + "user_email": user_email, }, - 'memory_count': len(created_memory_ids) if created_memory_ids else 0, - 'conversation_id': conversation_id, + "memory_count": len(created_memory_ids) if created_memory_ids else 0, + "conversation_id": conversation_id, } logger.info( @@ -311,13 +267,13 @@ async def process_memory_job(conversation_id: str, *, redis_client=None) -> Dict ) plugin_results = await plugin_router.dispatch_event( - event='memory.processed', + event="memory.processed", user_id=user_id, data=plugin_data, metadata={ - 'processing_time': processing_time, - 'memory_provider': memory_provider, - } + "processing_time": processing_time, + "memory_provider": memory_provider, + }, ) logger.info( @@ -346,15 +302,15 @@ async def process_memory_job(conversation_id: str, *, redis_client=None) -> Dict def enqueue_memory_processing( - client_id: str, - user_id: str, - user_email: str, conversation_id: str, priority: JobPriority = JobPriority.NORMAL, ): """ Enqueue a memory processing job. + The job fetches all needed data (client_id, user_id, user_email) from the + conversation document internally, so only conversation_id is needed. + Returns RQ Job object for tracking. """ timeout_mapping = { diff --git a/backends/advanced/src/advanced_omi_backend/workers/orchestrator/__init__.py b/backends/advanced/src/advanced_omi_backend/workers/orchestrator/__init__.py index 1c7b0d7a..43870b06 100644 --- a/backends/advanced/src/advanced_omi_backend/workers/orchestrator/__init__.py +++ b/backends/advanced/src/advanced_omi_backend/workers/orchestrator/__init__.py @@ -11,10 +11,10 @@ - health_monitor: Health checks and self-healing """ -from .config import WorkerDefinition, OrchestratorConfig, WorkerType -from .worker_registry import build_worker_definitions -from .process_manager import ManagedWorker, ProcessManager, WorkerState +from .config import OrchestratorConfig, WorkerDefinition, WorkerType from .health_monitor import HealthMonitor +from .process_manager import ManagedWorker, ProcessManager, WorkerState +from .worker_registry import build_worker_definitions __all__ = [ "WorkerDefinition", diff --git a/backends/advanced/src/advanced_omi_backend/workers/orchestrator/config.py b/backends/advanced/src/advanced_omi_backend/workers/orchestrator/config.py index 633d366a..b63e454c 100644 --- a/backends/advanced/src/advanced_omi_backend/workers/orchestrator/config.py +++ b/backends/advanced/src/advanced_omi_backend/workers/orchestrator/config.py @@ -7,7 +7,7 @@ import os from dataclasses import dataclass, field from enum import Enum -from typing import Optional, Callable, List +from typing import Callable, List, Optional class WorkerType(Enum): diff --git a/backends/advanced/src/advanced_omi_backend/workers/orchestrator/worker_registry.py b/backends/advanced/src/advanced_omi_backend/workers/orchestrator/worker_registry.py index a5cf4b74..c28b2727 100644 --- a/backends/advanced/src/advanced_omi_backend/workers/orchestrator/worker_registry.py +++ b/backends/advanced/src/advanced_omi_backend/workers/orchestrator/worker_registry.py @@ -5,8 +5,8 @@ Reuses model_registry.py for config.yml parsing. """ -import os import logging +import os from typing import List from .config import WorkerDefinition, WorkerType diff --git a/backends/advanced/src/advanced_omi_backend/workers/rq_worker_entry.py b/backends/advanced/src/advanced_omi_backend/workers/rq_worker_entry.py index c402005f..d9da1c6a 100755 --- a/backends/advanced/src/advanced_omi_backend/workers/rq_worker_entry.py +++ b/backends/advanced/src/advanced_omi_backend/workers/rq_worker_entry.py @@ -22,8 +22,8 @@ def main(): """Start RQ worker with proper logging configuration.""" - from rq import Worker from redis import Redis + from rq import Worker # Get Redis URL from environment redis_url = os.getenv('REDIS_URL', 'redis://localhost:6379/0') diff --git a/backends/advanced/src/advanced_omi_backend/workers/speaker_jobs.py b/backends/advanced/src/advanced_omi_backend/workers/speaker_jobs.py index 74cbde34..8c90701e 100644 --- a/backends/advanced/src/advanced_omi_backend/workers/speaker_jobs.py +++ b/backends/advanced/src/advanced_omi_backend/workers/speaker_jobs.py @@ -477,6 +477,7 @@ async def recognise_speakers_job( end=seg.get("end", 0), text=text, speaker=speaker_name, + identified_as=seg.get("identified_as"), confidence=seg.get("confidence"), words=segment_words # Use words from speaker service ) diff --git a/backends/advanced/src/advanced_omi_backend/workers/transcription_jobs.py b/backends/advanced/src/advanced_omi_backend/workers/transcription_jobs.py index 15a6cdfb..fa755bac 100644 --- a/backends/advanced/src/advanced_omi_backend/workers/transcription_jobs.py +++ b/backends/advanced/src/advanced_omi_backend/workers/transcription_jobs.py @@ -30,7 +30,7 @@ from advanced_omi_backend.models.conversation import Conversation from advanced_omi_backend.models.job import BaseRQJob, JobPriority, async_job from advanced_omi_backend.services.audio_stream import TranscriptionResultsAggregator -from advanced_omi_backend.services.plugin_service import get_plugin_router +from advanced_omi_backend.services.plugin_service import ensure_plugin_router from advanced_omi_backend.services.transcription import ( get_transcription_provider, is_transcription_available, @@ -223,7 +223,10 @@ async def transcribe_full_audio_job( diarize=True, ) except Exception as e: - logger.error(f"Transcription failed for conversation {conversation_id}: {type(e).__name__}: {e}", exc_info=True) + logger.error( + f"Transcription failed for conversation {conversation_id}: {type(e).__name__}: {e}", + exc_info=True, + ) raise RuntimeError(f"Transcription failed ({type(e).__name__}): {e}") # Extract results @@ -242,33 +245,7 @@ async def transcribe_full_audio_job( ) if transcript_text: try: - from advanced_omi_backend.services.plugin_service import init_plugin_router - - # Initialize plugin router if not already initialized (worker context) - plugin_router = get_plugin_router() - logger.info(f"๐Ÿ” DEBUG: Plugin router from service: {plugin_router is not None}") - - if not plugin_router: - logger.info("๐Ÿ”ง Initializing plugin router in worker process...") - plugin_router = init_plugin_router() - logger.info( - f"๐Ÿ”ง After init, plugin_router: {plugin_router is not None}, plugins count: {len(plugin_router.plugins) if plugin_router else 0}" - ) - - # Initialize async plugins - if plugin_router: - for plugin_id, plugin in plugin_router.plugins.items(): - try: - await plugin.initialize() - logger.info(f"โœ… Plugin '{plugin_id}' initialized in worker") - except Exception as e: - logger.exception( - f"Failed to initialize plugin '{plugin_id}' in worker: {e}" - ) - - logger.info( - f"๐Ÿ” DEBUG: Plugin router final check: {plugin_router is not None}, has {len(plugin_router.plugins) if plugin_router else 0} plugins" - ) + plugin_router = await ensure_plugin_router() if plugin_router: logger.info( @@ -392,7 +369,7 @@ async def transcribe_full_audio_job( # Get provider capabilities for downstream processing decisions # Capabilities determine whether pyannote diarization is needed or can be skipped provider_capabilities = {} - if hasattr(provider, 'get_capabilities_dict'): + if hasattr(provider, "get_capabilities_dict"): provider_capabilities = provider.get_capabilities_dict() logger.info(f"๐Ÿ“Š Provider capabilities: {list(provider_capabilities.keys())}") @@ -401,6 +378,7 @@ async def transcribe_full_audio_job( # Check speaker recognition configuration from advanced_omi_backend.speaker_recognition_client import SpeakerRecognitionClient + speaker_client = SpeakerRecognitionClient() speaker_recognition_enabled = speaker_client.enabled diff --git a/backends/advanced/src/advanced_omi_backend/workers/waveform_jobs.py b/backends/advanced/src/advanced_omi_backend/workers/waveform_jobs.py index 911b651d..f58387cd 100644 --- a/backends/advanced/src/advanced_omi_backend/workers/waveform_jobs.py +++ b/backends/advanced/src/advanced_omi_backend/workers/waveform_jobs.py @@ -9,7 +9,7 @@ import logging import struct import time -from typing import Dict, Any, List +from typing import Any, Dict, List logger = logging.getLogger(__name__) @@ -43,8 +43,8 @@ async def generate_waveform_data( """ from advanced_omi_backend.models.waveform import WaveformData from advanced_omi_backend.utils.audio_chunk_utils import ( - retrieve_audio_chunks, decode_opus_to_pcm, + retrieve_audio_chunks, ) start_time = time.time() diff --git a/backends/advanced/src/scripts/cleanup_state.py b/backends/advanced/src/scripts/cleanup_state.py index b028ecde..f04f2c76 100644 --- a/backends/advanced/src/scripts/cleanup_state.py +++ b/backends/advanced/src/scripts/cleanup_state.py @@ -15,30 +15,31 @@ - Confirmation prompts with detailed warnings """ -import asyncio import argparse +import asyncio import json import logging import os import shutil +import struct import sys from datetime import datetime from pathlib import Path -from typing import Dict, List, Any, Optional, Tuple -import struct +from typing import Any, Dict, List, Optional, Tuple # Add parent directory to path for imports sys.path.insert(0, str(Path(__file__).parent.parent.parent)) try: import redis - from rq import Queue + from beanie import init_beanie from motor.motor_asyncio import AsyncIOMotorClient from qdrant_client import AsyncQdrantClient from qdrant_client.models import Distance, VectorParams - from beanie import init_beanie - from advanced_omi_backend.models.conversation import Conversation + from rq import Queue + from advanced_omi_backend.models.audio_chunk import AudioChunkDocument + from advanced_omi_backend.models.conversation import Conversation from advanced_omi_backend.models.user import User from advanced_omi_backend.models.waveform import WaveformData from advanced_omi_backend.services.memory.config import build_memory_config_from_env diff --git a/tests/bin/start-rebuild-containers.sh b/tests/bin/start-rebuild-containers.sh index 28cb0667..39dbfb1e 100755 --- a/tests/bin/start-rebuild-containers.sh +++ b/tests/bin/start-rebuild-containers.sh @@ -33,6 +33,14 @@ docker compose -f docker-compose-test.yml stop echo "๐Ÿ—๏ธ Rebuilding images..." docker compose -f docker-compose-test.yml up -d --build +# Flush Redis to clear stale keys from previous test runs. +# Redis uses appendonly persistence with a bind mount, so data survives +# stop/rebuild cycles. Stale conversation:current:* keys can cause test +# failures when the audio persistence job finds a Redis key pointing to +# a MongoDB document that no longer exists. +echo "๐Ÿ—‘๏ธ Flushing Redis for clean test state..." +docker compose -f docker-compose-test.yml exec -T redis-test redis-cli FLUSHALL > /dev/null 2>&1 || true + # Wait for services echo "โณ Waiting for services to be ready..." sleep 5