From 6242362af01671d9a7f38a45317c8296c654e012 Mon Sep 17 00:00:00 2001 From: 0xrushi <6279035+0xrushi@users.noreply.github.com> Date: Fri, 6 Feb 2026 23:23:57 -0500 Subject: [PATCH 01/11] Enhance model management and transcription provider setup - Added `add_or_update_model` method in `ConfigManager` to facilitate adding or updating models in the configuration. - Updated `ChronicleSetup` to support a new OpenAI-Compatible transcription provider, allowing users to configure custom endpoints and API keys. - Enhanced user prompts for API base URL and model name during setup, improving the configuration experience. - Introduced unit tests for the new model management functionality and transcription provider setup, ensuring robust validation of the changes. - Improved Docker configurations for ASR services, including support for customizable CUDA versions and DNS settings. --- backends/advanced/init.py | 82 ++++- .../tests/test_setup_llm_custom_provider.py | 286 ++++++++++++++++++ .../tests/test_transcription_url_config.py | 183 +++++++++++ config_manager.py | 19 ++ extras/asr-services/docker-compose.yml | 5 + .../providers/vibevoice/Dockerfile | 4 +- .../tests/test_cuda_version_config.py | 220 ++++++++++++++ 7 files changed, 797 insertions(+), 2 deletions(-) create mode 100644 backends/advanced/tests/test_setup_llm_custom_provider.py create mode 100644 backends/advanced/tests/test_transcription_url_config.py create mode 100644 extras/asr-services/tests/test_cuda_version_config.py diff --git a/backends/advanced/init.py b/backends/advanced/init.py index aad7ff0e..e70a1369 100644 --- a/backends/advanced/init.py +++ b/backends/advanced/init.py @@ -309,7 +309,8 @@ def setup_llm(self): choices = { "1": "OpenAI (GPT-4, GPT-3.5 - requires API key)", "2": "Ollama (local models - runs locally)", - "3": "Skip (no memory extraction)" + "3": "OpenAI-Compatible (custom endpoint - Groq, Together AI, LM Studio, etc.)", + "4": "Skip (no memory extraction)" } choice = self.prompt_choice("Which LLM provider will you use?", choices, "1") @@ -347,6 +348,85 @@ def setup_llm(self): self.console.print("[yellow][WARNING][/yellow] Make sure Ollama is running and models are pulled") elif choice == "3": + self.console.print("[blue][INFO][/blue] OpenAI-Compatible custom endpoint selected") + self.console.print("This works with any provider that exposes an OpenAI-compatible API") + self.console.print("(e.g., Groq, Together AI, LM Studio, vLLM, etc.)") + self.console.print() + + # Prompt for base URL (required) + base_url = self.prompt_value( + "API Base URL (e.g., https://api.groq.com/openai/v1)", "" + ) + if not base_url: + self.console.print("[yellow][WARNING][/yellow] No base URL provided - skipping custom LLM setup") + else: + # Prompt for API key + api_key = self.prompt_with_existing_masked( + prompt_text="API Key (leave empty if not required)", + env_key="CUSTOM_LLM_API_KEY", + placeholders=['your_custom_llm_api_key_here'], + is_password=True, + default="" + ) + if api_key: + self.config["CUSTOM_LLM_API_KEY"] = api_key + + # Prompt for model name (required) + model_name = self.prompt_value( + "LLM Model name (e.g., llama-3.1-70b-versatile)", "" + ) + if not model_name: + self.console.print("[yellow][WARNING][/yellow] No model name provided - skipping custom LLM setup") + else: + # Create LLM model entry + llm_model = { + "name": "custom-llm", + "description": "Custom OpenAI-compatible LLM", + "model_type": "llm", + "model_provider": "openai", + "api_family": "openai", + "model_name": model_name, + "model_url": base_url, + "api_key": "${oc.env:CUSTOM_LLM_API_KEY,''}", + "model_params": { + "temperature": 0.2, + "max_tokens": 2000 + }, + "model_output": "json" + } + self.config_manager.add_or_update_model(llm_model) + + # Prompt for optional embedding model + embedding_model_name = self.prompt_value( + "Embedding model name (leave empty to use Ollama local-embed)", "" + ) + + if embedding_model_name: + embed_model = { + "name": "custom-embed", + "description": "Custom OpenAI-compatible embeddings", + "model_type": "embedding", + "model_provider": "openai", + "api_family": "openai", + "model_name": embedding_model_name, + "model_url": base_url, + "api_key": "${oc.env:CUSTOM_LLM_API_KEY,''}", + "embedding_dimensions": 1536, + "model_output": "vector" + } + self.config_manager.add_or_update_model(embed_model) + self.config_manager.update_config_defaults({"llm": "custom-llm", "embedding": "custom-embed"}) + self.console.print("[green][SUCCESS][/green] Custom LLM and embedding configured in config.yml") + self.console.print("[blue][INFO][/blue] Set defaults.llm: custom-llm") + self.console.print("[blue][INFO][/blue] Set defaults.embedding: custom-embed") + else: + self.config_manager.update_config_defaults({"llm": "custom-llm", "embedding": "local-embed"}) + self.console.print("[green][SUCCESS][/green] Custom LLM configured in config.yml") + self.console.print("[blue][INFO][/blue] Set defaults.llm: custom-llm") + self.console.print("[blue][INFO][/blue] Set defaults.embedding: local-embed (Ollama)") + self.console.print("[yellow][WARNING][/yellow] Make sure Ollama is running for embeddings") + + elif choice == "4": self.console.print("[blue][INFO][/blue] Skipping LLM setup - memory extraction disabled") # Disable memory extraction in config.yml self.config_manager.update_memory_config({"extraction": {"enabled": False}}) diff --git a/backends/advanced/tests/test_setup_llm_custom_provider.py b/backends/advanced/tests/test_setup_llm_custom_provider.py new file mode 100644 index 00000000..33014383 --- /dev/null +++ b/backends/advanced/tests/test_setup_llm_custom_provider.py @@ -0,0 +1,286 @@ +""" +Unit tests for OpenAI-Compatible custom LLM provider setup. + +Tests the wizard's choice "3" (OpenAI-Compatible) in setup_llm(), +including model creation in config.yml and defaults updates. +""" + +import os +import sys +import tempfile +import shutil +from pathlib import Path +from unittest.mock import patch, MagicMock + +import pytest +import yaml + +# Add repo root to path for imports +sys.path.insert(0, str(Path(__file__).resolve().parent.parent.parent.parent)) +from config_manager import ConfigManager + + +@pytest.fixture +def temp_config_dir(): + """Create a temporary directory with a minimal config.yml.""" + tmpdir = tempfile.mkdtemp() + config_dir = Path(tmpdir) / "config" + config_dir.mkdir() + + config = { + "defaults": { + "llm": "openai-llm", + "embedding": "openai-embed", + "stt": "stt-deepgram", + }, + "models": [ + { + "name": "openai-llm", + "description": "OpenAI GPT-4o-mini", + "model_type": "llm", + "model_provider": "openai", + "api_family": "openai", + "model_name": "gpt-4o-mini", + "model_url": "https://api.openai.com/v1", + "api_key": "${oc.env:OPENAI_API_KEY,''}", + "model_params": {"temperature": 0.2, "max_tokens": 2000}, + "model_output": "json", + }, + { + "name": "local-embed", + "description": "Local embeddings via Ollama", + "model_type": "embedding", + "model_provider": "ollama", + "api_family": "openai", + "model_name": "nomic-embed-text:latest", + "model_url": "http://localhost:11434/v1", + "api_key": "${oc.env:OPENAI_API_KEY,ollama}", + "embedding_dimensions": 768, + "model_output": "vector", + }, + ], + "memory": {"provider": "chronicle"}, + } + + config_path = config_dir / "config.yml" + with open(config_path, "w") as f: + yaml.dump(config, f, default_flow_style=False, sort_keys=False) + + yield tmpdir + + shutil.rmtree(tmpdir) + + +@pytest.fixture +def config_manager(temp_config_dir): + """Create a ConfigManager pointing to the temp config.""" + return ConfigManager(service_path=None, repo_root=Path(temp_config_dir)) + + +class TestAddOrUpdateModel: + """Tests for ConfigManager.add_or_update_model().""" + + def test_add_new_model(self, config_manager): + """add_or_update_model() should append a new model when name doesn't exist.""" + new_model = { + "name": "custom-llm", + "description": "Custom OpenAI-compatible LLM", + "model_type": "llm", + "model_provider": "openai", + "api_family": "openai", + "model_name": "llama-3.1-70b-versatile", + "model_url": "https://api.groq.com/openai/v1", + "api_key": "${oc.env:CUSTOM_LLM_API_KEY,''}", + "model_params": {"temperature": 0.2, "max_tokens": 2000}, + "model_output": "json", + } + + config_manager.add_or_update_model(new_model) + + config = config_manager.get_full_config() + model_names = [m["name"] for m in config["models"]] + assert "custom-llm" in model_names + + added = next(m for m in config["models"] if m["name"] == "custom-llm") + assert added["model_name"] == "llama-3.1-70b-versatile" + assert added["model_url"] == "https://api.groq.com/openai/v1" + assert added["model_type"] == "llm" + + def test_update_existing_model(self, config_manager): + """add_or_update_model() should replace an existing model with the same name.""" + # First add + model_v1 = { + "name": "custom-llm", + "model_type": "llm", + "model_name": "model-v1", + "model_url": "https://example.com/v1", + } + config_manager.add_or_update_model(model_v1) + + # Then update + model_v2 = { + "name": "custom-llm", + "model_type": "llm", + "model_name": "model-v2", + "model_url": "https://example.com/v2", + } + config_manager.add_or_update_model(model_v2) + + config = config_manager.get_full_config() + custom_models = [m for m in config["models"] if m["name"] == "custom-llm"] + assert len(custom_models) == 1 + assert custom_models[0]["model_name"] == "model-v2" + assert custom_models[0]["model_url"] == "https://example.com/v2" + + def test_add_model_to_empty_models_list(self, temp_config_dir): + """add_or_update_model() should create models list if it doesn't exist.""" + config_path = Path(temp_config_dir) / "config" / "config.yml" + with open(config_path, "w") as f: + yaml.dump({"defaults": {"llm": "openai-llm"}}, f) + + cm = ConfigManager(service_path=None, repo_root=Path(temp_config_dir)) + cm.add_or_update_model({"name": "test-model", "model_type": "llm"}) + + config = cm.get_full_config() + assert "models" in config + assert len(config["models"]) == 1 + assert config["models"][0]["name"] == "test-model" + + +class TestSetupLlmCustomProvider: + """Tests for the custom LLM provider flow in setup_llm().""" + + def _make_setup(self, temp_config_dir): + """Create a ChronicleSetup instance pointing at the temp config.""" + # We need to mock the ChronicleSetup constructor's checks + # Instead, we test the logic by calling config_manager directly, + # simulating what setup_llm() choice "3" does. + return ConfigManager(service_path=None, repo_root=Path(temp_config_dir)) + + def test_custom_llm_model_added_to_config(self, config_manager): + """Selecting custom provider should create correct model entry.""" + llm_model = { + "name": "custom-llm", + "description": "Custom OpenAI-compatible LLM", + "model_type": "llm", + "model_provider": "openai", + "api_family": "openai", + "model_name": "llama-3.1-70b-versatile", + "model_url": "https://api.groq.com/openai/v1", + "api_key": "${oc.env:CUSTOM_LLM_API_KEY,''}", + "model_params": {"temperature": 0.2, "max_tokens": 2000}, + "model_output": "json", + } + + config_manager.add_or_update_model(llm_model) + + config = config_manager.get_full_config() + model = next(m for m in config["models"] if m["name"] == "custom-llm") + assert model["model_provider"] == "openai" + assert model["api_family"] == "openai" + assert model["model_name"] == "llama-3.1-70b-versatile" + assert model["model_url"] == "https://api.groq.com/openai/v1" + assert model["api_key"] == "${oc.env:CUSTOM_LLM_API_KEY,''}" + assert model["model_params"]["temperature"] == 0.2 + assert model["model_output"] == "json" + + def test_custom_llm_and_embedding_model_added(self, config_manager): + """Both LLM and embedding models should be created when embedding model is provided.""" + llm_model = { + "name": "custom-llm", + "model_type": "llm", + "model_provider": "openai", + "api_family": "openai", + "model_name": "llama-3.1-70b-versatile", + "model_url": "https://api.groq.com/openai/v1", + "api_key": "${oc.env:CUSTOM_LLM_API_KEY,''}", + "model_params": {"temperature": 0.2, "max_tokens": 2000}, + "model_output": "json", + } + embed_model = { + "name": "custom-embed", + "description": "Custom OpenAI-compatible embeddings", + "model_type": "embedding", + "model_provider": "openai", + "api_family": "openai", + "model_name": "text-embedding-3-small", + "model_url": "https://api.groq.com/openai/v1", + "api_key": "${oc.env:CUSTOM_LLM_API_KEY,''}", + "embedding_dimensions": 1536, + "model_output": "vector", + } + + config_manager.add_or_update_model(llm_model) + config_manager.add_or_update_model(embed_model) + + config = config_manager.get_full_config() + model_names = [m["name"] for m in config["models"]] + assert "custom-llm" in model_names + assert "custom-embed" in model_names + + embed = next(m for m in config["models"] if m["name"] == "custom-embed") + assert embed["model_type"] == "embedding" + assert embed["model_name"] == "text-embedding-3-small" + assert embed["embedding_dimensions"] == 1536 + + def test_custom_llm_without_embedding_falls_back_to_local_embed(self, config_manager): + """defaults.embedding should be local-embed when no custom embedding is provided.""" + llm_model = { + "name": "custom-llm", + "model_type": "llm", + "model_name": "some-model", + "model_url": "https://api.example.com/v1", + } + config_manager.add_or_update_model(llm_model) + config_manager.update_config_defaults({"llm": "custom-llm", "embedding": "local-embed"}) + + defaults = config_manager.get_config_defaults() + assert defaults["llm"] == "custom-llm" + assert defaults["embedding"] == "local-embed" + + def test_custom_llm_updates_defaults_with_embedding(self, config_manager): + """defaults.llm and defaults.embedding should be updated correctly with custom embed.""" + config_manager.update_config_defaults({"llm": "custom-llm", "embedding": "custom-embed"}) + + defaults = config_manager.get_config_defaults() + assert defaults["llm"] == "custom-llm" + assert defaults["embedding"] == "custom-embed" + + def test_custom_llm_api_key_env_reference(self, config_manager): + """API key should use env var reference in config.yml model.""" + llm_model = { + "name": "custom-llm", + "model_type": "llm", + "model_name": "some-model", + "model_url": "https://api.example.com/v1", + "api_key": "${oc.env:CUSTOM_LLM_API_KEY,''}", + } + config_manager.add_or_update_model(llm_model) + + config = config_manager.get_full_config() + model = next(m for m in config["models"] if m["name"] == "custom-llm") + assert model["api_key"] == "${oc.env:CUSTOM_LLM_API_KEY,''}" + + def test_existing_models_preserved_after_adding_custom(self, config_manager): + """Adding a custom model should not remove existing models.""" + config_before = config_manager.get_full_config() + original_count = len(config_before["models"]) + + config_manager.add_or_update_model({ + "name": "custom-llm", + "model_type": "llm", + "model_name": "test-model", + "model_url": "https://example.com/v1", + }) + + config_after = config_manager.get_full_config() + assert len(config_after["models"]) == original_count + 1 + # Original models still present + model_names = [m["name"] for m in config_after["models"]] + assert "openai-llm" in model_names + assert "local-embed" in model_names + assert "custom-llm" in model_names + + +if __name__ == "__main__": + pytest.main([__file__, "-v"]) diff --git a/backends/advanced/tests/test_transcription_url_config.py b/backends/advanced/tests/test_transcription_url_config.py new file mode 100644 index 00000000..b9fcf9bc --- /dev/null +++ b/backends/advanced/tests/test_transcription_url_config.py @@ -0,0 +1,183 @@ +""" +Unit tests for transcription service URL configuration. + +Tests the fix for the double http:// prefix issue where environment variables +containing protocol prefixes were incorrectly combined with hardcoded prefixes +in config.yml. +""" + +import os +import pytest +from unittest.mock import patch, MagicMock +from omegaconf import OmegaConf + + +class TestTranscriptionURLConfiguration: + """Test transcription service URL configuration and parsing.""" + + def test_vibevoice_url_without_http_prefix(self): + """Test that VIBEVOICE_ASR_URL without http:// prefix works correctly.""" + # Simulate config.yml template: http://${oc.env:VIBEVOICE_ASR_URL} + config_template = {"model_url": "http://${oc.env:VIBEVOICE_ASR_URL,host.docker.internal:8767}"} + + with patch.dict(os.environ, {"VIBEVOICE_ASR_URL": "host.docker.internal:8767"}): + resolved = OmegaConf.create(config_template) + resolved = OmegaConf.to_container(resolved, resolve=True) + + assert resolved["model_url"] == "http://host.docker.internal:8767" + assert "http://http://" not in resolved["model_url"] + + def test_vibevoice_url_with_http_prefix_causes_double_prefix(self): + """Test that VIBEVOICE_ASR_URL WITH http:// causes double prefix (bug scenario).""" + config_template = {"model_url": "http://${oc.env:VIBEVOICE_ASR_URL,host.docker.internal:8767}"} + + # This is the BUG scenario - env var already has http:// + with patch.dict(os.environ, {"VIBEVOICE_ASR_URL": "http://host.docker.internal:8767"}): + resolved = OmegaConf.create(config_template) + resolved = OmegaConf.to_container(resolved, resolve=True) + + # This demonstrates the bug + assert resolved["model_url"] == "http://http://host.docker.internal:8767" + assert "http://http://" in resolved["model_url"] + + def test_vibevoice_url_default_fallback(self): + """Test that default fallback works when VIBEVOICE_ASR_URL is not set.""" + config_template = {"model_url": "http://${oc.env:VIBEVOICE_ASR_URL,host.docker.internal:8767}"} + + # No VIBEVOICE_ASR_URL set - should use default + with patch.dict(os.environ, {}, clear=True): + resolved = OmegaConf.create(config_template) + resolved = OmegaConf.to_container(resolved, resolve=True) + + assert resolved["model_url"] == "http://host.docker.internal:8767" + + def test_parakeet_url_configuration(self): + """Test that PARAKEET_ASR_URL follows same pattern.""" + config_template = {"model_url": "http://${oc.env:PARAKEET_ASR_URL,172.17.0.1:8767}"} + + # Correct format - without http:// prefix + with patch.dict(os.environ, {"PARAKEET_ASR_URL": "host.docker.internal:8767"}): + resolved = OmegaConf.create(config_template) + resolved = OmegaConf.to_container(resolved, resolve=True) + + assert resolved["model_url"] == "http://host.docker.internal:8767" + assert "http://http://" not in resolved["model_url"] + + def test_url_parsing_removes_double_slashes(self): + """Test that URL with double http:// causes connection failures.""" + from urllib.parse import urlparse + + # Valid URL + valid_url = "http://host.docker.internal:8767/transcribe" + parsed_valid = urlparse(valid_url) + assert parsed_valid.scheme == "http" + assert parsed_valid.netloc == "host.docker.internal:8767" + + # Invalid URL with double prefix + invalid_url = "http://http://host.docker.internal:8767/transcribe" + parsed_invalid = urlparse(invalid_url) + # urlparse treats "http:" as the netloc which causes DNS failures + assert parsed_invalid.scheme == "http" + assert parsed_invalid.netloc == "http:" # Invalid netloc causes "Name or service not known" + assert parsed_invalid.netloc != "host.docker.internal:8767" + + +class TestProviderSegmentsConfiguration: + """Test use_provider_segments configuration for different providers.""" + + def test_use_provider_segments_default_false(self): + """Test that use_provider_segments defaults to false.""" + config = OmegaConf.create({ + "backend": { + "transcription": {} + } + }) + + use_segments = config.backend.transcription.get("use_provider_segments", False) + assert use_segments is False + + def test_use_provider_segments_explicit_true(self): + """Test that use_provider_segments can be enabled.""" + config = OmegaConf.create({ + "backend": { + "transcription": { + "use_provider_segments": True + } + } + }) + + assert config.backend.transcription.use_provider_segments is True + + def test_vibevoice_should_use_provider_segments(self): + """ + Test that VibeVoice provider should have use_provider_segments=true + since it provides diarized segments. + """ + # VibeVoice provides segments with speaker diarization + vibevoice_capabilities = ["segments", "diarization"] + + # When provider has both capabilities, use_provider_segments should be true + has_diarization = "diarization" in vibevoice_capabilities + has_segments = "segments" in vibevoice_capabilities + + should_use_segments = has_diarization and has_segments + assert should_use_segments is True + + +class TestModelRegistryURLResolution: + """Test model registry URL resolution with environment variables.""" + + def test_model_url_resolution_with_env_var(self): + """Test that model URLs resolve correctly from environment.""" + config_template = """ + defaults: + stt: stt-vibevoice + models: + - name: stt-vibevoice + model_type: stt + model_provider: vibevoice + model_url: http://${oc.env:VIBEVOICE_ASR_URL,host.docker.internal:8767} + """ + + with patch.dict(os.environ, {"VIBEVOICE_ASR_URL": "host.docker.internal:8767"}): + config = OmegaConf.create(config_template) + resolved = OmegaConf.to_container(config, resolve=True) + + vibevoice_model = resolved["models"][0] + assert vibevoice_model["model_url"] == "http://host.docker.internal:8767" + + def test_multiple_asr_providers_url_resolution(self): + """Test that multiple ASR providers can use different URL patterns.""" + config_template = { + "models": [ + { + "name": "stt-vibevoice", + "model_url": "http://${oc.env:VIBEVOICE_ASR_URL,host.docker.internal:8767}" + }, + { + "name": "stt-parakeet", + "model_url": "http://${oc.env:PARAKEET_ASR_URL,172.17.0.1:8767}" + }, + { + "name": "stt-deepgram", + "model_url": "https://api.deepgram.com/v1" + } + ] + } + + env_vars = { + "VIBEVOICE_ASR_URL": "host.docker.internal:8767", + "PARAKEET_ASR_URL": "localhost:8080" + } + + with patch.dict(os.environ, env_vars): + config = OmegaConf.create(config_template) + resolved = OmegaConf.to_container(config, resolve=True) + + assert resolved["models"][0]["model_url"] == "http://host.docker.internal:8767" + assert resolved["models"][1]["model_url"] == "http://localhost:8080" + assert resolved["models"][2]["model_url"] == "https://api.deepgram.com/v1" + + +if __name__ == "__main__": + pytest.main([__file__, "-v"]) diff --git a/config_manager.py b/config_manager.py index 6f8a85a6..7919c5bc 100644 --- a/config_manager.py +++ b/config_manager.py @@ -325,6 +325,25 @@ def update_config_defaults(self, updates: Dict[str, str]): self._save_config_yml(config) + def add_or_update_model(self, model_def: Dict[str, Any]): + """ + Add or update a model in the models list by name. + + Args: + model_def: Model definition dict with at least a 'name' key. + """ + config = self._load_config_yml() + if "models" not in config: + config["models"] = [] + # Update existing or append + for i, m in enumerate(config["models"]): + if m.get("name") == model_def["name"]: + config["models"][i] = model_def + break + else: + config["models"].append(model_def) + self._save_config_yml(config) + def get_full_config(self) -> Dict[str, Any]: """ Get complete config.yml as dictionary. diff --git a/extras/asr-services/docker-compose.yml b/extras/asr-services/docker-compose.yml index 7e4b0aa5..d31ea7bf 100644 --- a/extras/asr-services/docker-compose.yml +++ b/extras/asr-services/docker-compose.yml @@ -90,6 +90,8 @@ services: build: context: . dockerfile: providers/vibevoice/Dockerfile + args: + PYTORCH_CUDA_VERSION: ${PYTORCH_CUDA_VERSION:-cu126} image: chronicle-asr-vibevoice:latest ports: - "${ASR_PORT:-8767}:8765" @@ -112,6 +114,9 @@ services: - DEVICE=${DEVICE:-cuda} - TORCH_DTYPE=${TORCH_DTYPE:-bfloat16} - MAX_NEW_TOKENS=${MAX_NEW_TOKENS:-8192} + dns: + - 8.8.8.8 + - 8.8.4.4 restart: unless-stopped # ============================================================================ diff --git a/extras/asr-services/providers/vibevoice/Dockerfile b/extras/asr-services/providers/vibevoice/Dockerfile index 218abb0c..a8d110e9 100644 --- a/extras/asr-services/providers/vibevoice/Dockerfile +++ b/extras/asr-services/providers/vibevoice/Dockerfile @@ -8,6 +8,8 @@ ######################### builder ################################# FROM ghcr.io/astral-sh/uv:python3.12-bookworm-slim AS builder +ARG PYTORCH_CUDA_VERSION=cu126 + WORKDIR /app # Install system dependencies for building @@ -17,7 +19,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ # Dependency manifest first for cache-friendly installs COPY pyproject.toml uv.lock ./ -RUN uv sync --no-install-project --group vibevoice && \ +RUN uv sync --no-install-project --group vibevoice --extra ${PYTORCH_CUDA_VERSION} && \ uv cache clean ######################### runtime ################################# diff --git a/extras/asr-services/tests/test_cuda_version_config.py b/extras/asr-services/tests/test_cuda_version_config.py new file mode 100644 index 00000000..b4ded1c4 --- /dev/null +++ b/extras/asr-services/tests/test_cuda_version_config.py @@ -0,0 +1,220 @@ +""" +Unit tests for CUDA version configuration in ASR service Dockerfiles. + +Tests the configurable PYTORCH_CUDA_VERSION build arg that allows selecting +different CUDA versions (cu121, cu126, cu128) for different GPU architectures. +""" + +import os +import re +import pytest +from pathlib import Path + + +class TestDockerfileCUDASupport: + """Test that Dockerfiles support configurable CUDA versions.""" + + @pytest.fixture + def vibevoice_dockerfile_path(self): + """Path to VibeVoice Dockerfile.""" + return Path(__file__).parent.parent / "providers" / "vibevoice" / "Dockerfile" + + @pytest.fixture + def nemo_dockerfile_path(self): + """Path to NeMo Dockerfile.""" + return Path(__file__).parent.parent / "providers" / "nemo" / "Dockerfile" + + @pytest.fixture + def docker_compose_path(self): + """Path to docker-compose.yml.""" + return Path(__file__).parent.parent / "docker-compose.yml" + + def test_vibevoice_dockerfile_has_cuda_arg(self, vibevoice_dockerfile_path): + """Test that VibeVoice Dockerfile declares PYTORCH_CUDA_VERSION arg.""" + content = vibevoice_dockerfile_path.read_text() + + # Should have ARG declaration + assert re.search(r"ARG\s+PYTORCH_CUDA_VERSION", content), \ + "Dockerfile must declare PYTORCH_CUDA_VERSION build arg" + + # Should have default value + arg_match = re.search(r"ARG\s+PYTORCH_CUDA_VERSION=(\w+)", content) + assert arg_match, "PYTORCH_CUDA_VERSION should have default value" + default_version = arg_match.group(1) + assert default_version in ["cu121", "cu126", "cu128"], \ + f"Default CUDA version {default_version} should be cu121, cu126, or cu128" + + def test_vibevoice_dockerfile_uses_cuda_arg_in_uv_sync(self, vibevoice_dockerfile_path): + """Test that VibeVoice Dockerfile uses CUDA arg in uv sync command.""" + content = vibevoice_dockerfile_path.read_text() + + # Should use --extra ${PYTORCH_CUDA_VERSION} + assert re.search(r"uv\s+sync.*--extra\s+\$\{PYTORCH_CUDA_VERSION\}", content), \ + "uv sync command must include --extra ${PYTORCH_CUDA_VERSION}" + + def test_nemo_dockerfile_has_cuda_support(self, nemo_dockerfile_path): + """Test that NeMo Dockerfile (reference implementation) has CUDA support.""" + content = nemo_dockerfile_path.read_text() + + assert re.search(r"ARG\s+PYTORCH_CUDA_VERSION", content), \ + "NeMo Dockerfile should have PYTORCH_CUDA_VERSION arg" + + assert re.search(r"uv\s+sync.*--extra\s+\$\{PYTORCH_CUDA_VERSION\}", content), \ + "NeMo Dockerfile should use CUDA version in uv sync" + + def test_docker_compose_passes_cuda_arg_to_vibevoice(self, docker_compose_path): + """Test that docker-compose.yml passes PYTORCH_CUDA_VERSION to vibevoice service.""" + content = docker_compose_path.read_text() + + # Find vibevoice-asr service section + vibevoice_section = re.search( + r"vibevoice-asr:.*?(?=^\S|\Z)", + content, + re.MULTILINE | re.DOTALL + ) + assert vibevoice_section, "docker-compose.yml must have vibevoice-asr service" + + section_text = vibevoice_section.group(0) + + # Should have build args section + assert re.search(r"args:", section_text), \ + "vibevoice-asr service should have build args section" + + # Should pass PYTORCH_CUDA_VERSION + assert re.search( + r"PYTORCH_CUDA_VERSION:\s*\$\{PYTORCH_CUDA_VERSION:-cu126\}", + section_text + ), "vibevoice-asr should pass PYTORCH_CUDA_VERSION build arg with cu126 default" + + def test_docker_compose_cuda_arg_consistency(self, docker_compose_path): + """Test that all GPU-enabled services use consistent CUDA version pattern.""" + content = docker_compose_path.read_text() + + # Services that should have CUDA support + gpu_services = ["vibevoice-asr", "nemo-asr", "parakeet-asr"] + + for service_name in gpu_services: + service_match = re.search( + rf"{service_name}:.*?(?=^\S|\Z)", + content, + re.MULTILINE | re.DOTALL + ) + + if service_match: + service_text = service_match.group(0) + + # Check if service has GPU resources + if "devices:" in service_text and "nvidia" in service_text: + # Should have PYTORCH_CUDA_VERSION arg + assert re.search( + r"PYTORCH_CUDA_VERSION:\s*\$\{PYTORCH_CUDA_VERSION:-cu\d+\}", + service_text + ), f"{service_name} with GPU should have PYTORCH_CUDA_VERSION build arg" + + +class TestCUDAVersionEnvironmentVariable: + """Test CUDA version environment variable handling.""" + + def test_cuda_version_env_var_format(self): + """Test that CUDA version environment variables follow correct format.""" + valid_versions = ["cu121", "cu126", "cu128"] + + for version in valid_versions: + assert re.match(r"^cu\d{3}$", version), \ + f"{version} should match pattern cu### (e.g., cu121, cu126)" + + def test_cuda_version_from_env(self): + """Test reading CUDA version from environment.""" + test_version = "cu128" + + with pytest.MonkeyPatch.context() as mp: + mp.setenv("PYTORCH_CUDA_VERSION", test_version) + cuda_version = os.getenv("PYTORCH_CUDA_VERSION") + + assert cuda_version == test_version + assert cuda_version in ["cu121", "cu126", "cu128"] + + def test_cuda_version_default_fallback(self): + """Test that default CUDA version is used when env var not set.""" + with pytest.MonkeyPatch.context() as mp: + mp.delenv("PYTORCH_CUDA_VERSION", raising=False) + + # Simulate docker-compose default: ${PYTORCH_CUDA_VERSION:-cu126} + cuda_version = os.getenv("PYTORCH_CUDA_VERSION", "cu126") + + assert cuda_version == "cu126" + + +class TestGPUArchitectureCUDAMapping: + """Test that GPU architectures map to correct CUDA versions.""" + + def test_rtx_5090_requires_cu128(self): + """ + Test that RTX 5090 (sm_120) requires CUDA 12.8+. + + RTX 5090 has CUDA capability 12.0 (sm_120) which requires + PyTorch built with CUDA 12.8 or higher. + """ + gpu_arch = "sm_120" # RTX 5090 + required_cuda = "cu128" + + # Map GPU architecture to minimum CUDA version + arch_to_cuda = { + "sm_120": "cu128", # RTX 5090, RTX 50 series + "sm_90": "cu126", # RTX 4090, H100 + "sm_89": "cu121", # RTX 4090 + "sm_86": "cu121", # RTX 3090, A6000 + } + + assert arch_to_cuda.get(gpu_arch) == required_cuda, \ + f"GPU architecture {gpu_arch} requires CUDA version {required_cuda}" + + def test_older_gpus_work_with_cu121(self): + """Test that older GPUs (sm_86, sm_80) work with cu121.""" + older_archs = ["sm_86", "sm_80", "sm_75"] # RTX 3090, A100, RTX 2080 + + for arch in older_archs: + # cu121 supports these architectures + assert arch in ["sm_75", "sm_80", "sm_86"], \ + f"{arch} should be supported by CUDA 12.1" + + +class TestPyProjectCUDAExtras: + """Test that pyproject.toml defines CUDA version extras correctly.""" + + @pytest.fixture + def pyproject_path(self): + """Path to pyproject.toml.""" + return Path(__file__).parent.parent / "pyproject.toml" + + def test_pyproject_has_cuda_extras(self, pyproject_path): + """Test that pyproject.toml defines cu121, cu126, cu128 extras.""" + if not pyproject_path.exists(): + pytest.skip("pyproject.toml not found") + + content = pyproject_path.read_text() + + # Should have [project.optional-dependencies] or [tool.uv] with extras + cuda_versions = ["cu121", "cu126", "cu128"] + + for version in cuda_versions: + # Look for the CUDA version as an extra + assert re.search(rf'["\']?{version}["\']?\s*=', content), \ + f"pyproject.toml should define {version} extra" + + def test_pyproject_cuda_extras_have_pytorch(self, pyproject_path): + """Test that CUDA extras include torch/torchaudio dependencies.""" + if not pyproject_path.exists(): + pytest.skip("pyproject.toml not found") + + content = pyproject_path.read_text() + + # Each CUDA extra should reference torch with the appropriate index + # e.g., { extra = "cu128" } or { index = "pytorch-cu128" } + assert re.search(r'extra\s*=\s*["\']cu\d{3}["\']', content) or \ + re.search(r'index\s*=\s*["\']pytorch-cu\d{3}["\']', content), \ + "CUDA extras should reference PyTorch with CUDA version" + + +if __name__ == "__main__": + pytest.main([__file__, "-v"]) From e2b924ceeee8a68bb31562630b9df0c780d13750 Mon Sep 17 00:00:00 2001 From: 0xrushi <6279035+0xrushi@users.noreply.github.com> Date: Sat, 7 Feb 2026 00:02:22 -0500 Subject: [PATCH 02/11] Remove outdated unit tests for LLM custom provider and transcription URL configuration --- .../tests/test_setup_llm_custom_provider.py | 286 ------------------ .../tests/test_transcription_url_config.py | 183 ----------- .../test_llm_custom_provider.robot | 258 ++++++++++++++++ .../test_transcription_url.robot | 126 ++++++++ tests/libs/ConfigTestHelper.py | 73 +++++ tests/test-requirements.txt | 2 + 6 files changed, 459 insertions(+), 469 deletions(-) delete mode 100644 backends/advanced/tests/test_setup_llm_custom_provider.py delete mode 100644 backends/advanced/tests/test_transcription_url_config.py create mode 100644 tests/configuration/test_llm_custom_provider.robot create mode 100644 tests/configuration/test_transcription_url.robot create mode 100644 tests/libs/ConfigTestHelper.py diff --git a/backends/advanced/tests/test_setup_llm_custom_provider.py b/backends/advanced/tests/test_setup_llm_custom_provider.py deleted file mode 100644 index 33014383..00000000 --- a/backends/advanced/tests/test_setup_llm_custom_provider.py +++ /dev/null @@ -1,286 +0,0 @@ -""" -Unit tests for OpenAI-Compatible custom LLM provider setup. - -Tests the wizard's choice "3" (OpenAI-Compatible) in setup_llm(), -including model creation in config.yml and defaults updates. -""" - -import os -import sys -import tempfile -import shutil -from pathlib import Path -from unittest.mock import patch, MagicMock - -import pytest -import yaml - -# Add repo root to path for imports -sys.path.insert(0, str(Path(__file__).resolve().parent.parent.parent.parent)) -from config_manager import ConfigManager - - -@pytest.fixture -def temp_config_dir(): - """Create a temporary directory with a minimal config.yml.""" - tmpdir = tempfile.mkdtemp() - config_dir = Path(tmpdir) / "config" - config_dir.mkdir() - - config = { - "defaults": { - "llm": "openai-llm", - "embedding": "openai-embed", - "stt": "stt-deepgram", - }, - "models": [ - { - "name": "openai-llm", - "description": "OpenAI GPT-4o-mini", - "model_type": "llm", - "model_provider": "openai", - "api_family": "openai", - "model_name": "gpt-4o-mini", - "model_url": "https://api.openai.com/v1", - "api_key": "${oc.env:OPENAI_API_KEY,''}", - "model_params": {"temperature": 0.2, "max_tokens": 2000}, - "model_output": "json", - }, - { - "name": "local-embed", - "description": "Local embeddings via Ollama", - "model_type": "embedding", - "model_provider": "ollama", - "api_family": "openai", - "model_name": "nomic-embed-text:latest", - "model_url": "http://localhost:11434/v1", - "api_key": "${oc.env:OPENAI_API_KEY,ollama}", - "embedding_dimensions": 768, - "model_output": "vector", - }, - ], - "memory": {"provider": "chronicle"}, - } - - config_path = config_dir / "config.yml" - with open(config_path, "w") as f: - yaml.dump(config, f, default_flow_style=False, sort_keys=False) - - yield tmpdir - - shutil.rmtree(tmpdir) - - -@pytest.fixture -def config_manager(temp_config_dir): - """Create a ConfigManager pointing to the temp config.""" - return ConfigManager(service_path=None, repo_root=Path(temp_config_dir)) - - -class TestAddOrUpdateModel: - """Tests for ConfigManager.add_or_update_model().""" - - def test_add_new_model(self, config_manager): - """add_or_update_model() should append a new model when name doesn't exist.""" - new_model = { - "name": "custom-llm", - "description": "Custom OpenAI-compatible LLM", - "model_type": "llm", - "model_provider": "openai", - "api_family": "openai", - "model_name": "llama-3.1-70b-versatile", - "model_url": "https://api.groq.com/openai/v1", - "api_key": "${oc.env:CUSTOM_LLM_API_KEY,''}", - "model_params": {"temperature": 0.2, "max_tokens": 2000}, - "model_output": "json", - } - - config_manager.add_or_update_model(new_model) - - config = config_manager.get_full_config() - model_names = [m["name"] for m in config["models"]] - assert "custom-llm" in model_names - - added = next(m for m in config["models"] if m["name"] == "custom-llm") - assert added["model_name"] == "llama-3.1-70b-versatile" - assert added["model_url"] == "https://api.groq.com/openai/v1" - assert added["model_type"] == "llm" - - def test_update_existing_model(self, config_manager): - """add_or_update_model() should replace an existing model with the same name.""" - # First add - model_v1 = { - "name": "custom-llm", - "model_type": "llm", - "model_name": "model-v1", - "model_url": "https://example.com/v1", - } - config_manager.add_or_update_model(model_v1) - - # Then update - model_v2 = { - "name": "custom-llm", - "model_type": "llm", - "model_name": "model-v2", - "model_url": "https://example.com/v2", - } - config_manager.add_or_update_model(model_v2) - - config = config_manager.get_full_config() - custom_models = [m for m in config["models"] if m["name"] == "custom-llm"] - assert len(custom_models) == 1 - assert custom_models[0]["model_name"] == "model-v2" - assert custom_models[0]["model_url"] == "https://example.com/v2" - - def test_add_model_to_empty_models_list(self, temp_config_dir): - """add_or_update_model() should create models list if it doesn't exist.""" - config_path = Path(temp_config_dir) / "config" / "config.yml" - with open(config_path, "w") as f: - yaml.dump({"defaults": {"llm": "openai-llm"}}, f) - - cm = ConfigManager(service_path=None, repo_root=Path(temp_config_dir)) - cm.add_or_update_model({"name": "test-model", "model_type": "llm"}) - - config = cm.get_full_config() - assert "models" in config - assert len(config["models"]) == 1 - assert config["models"][0]["name"] == "test-model" - - -class TestSetupLlmCustomProvider: - """Tests for the custom LLM provider flow in setup_llm().""" - - def _make_setup(self, temp_config_dir): - """Create a ChronicleSetup instance pointing at the temp config.""" - # We need to mock the ChronicleSetup constructor's checks - # Instead, we test the logic by calling config_manager directly, - # simulating what setup_llm() choice "3" does. - return ConfigManager(service_path=None, repo_root=Path(temp_config_dir)) - - def test_custom_llm_model_added_to_config(self, config_manager): - """Selecting custom provider should create correct model entry.""" - llm_model = { - "name": "custom-llm", - "description": "Custom OpenAI-compatible LLM", - "model_type": "llm", - "model_provider": "openai", - "api_family": "openai", - "model_name": "llama-3.1-70b-versatile", - "model_url": "https://api.groq.com/openai/v1", - "api_key": "${oc.env:CUSTOM_LLM_API_KEY,''}", - "model_params": {"temperature": 0.2, "max_tokens": 2000}, - "model_output": "json", - } - - config_manager.add_or_update_model(llm_model) - - config = config_manager.get_full_config() - model = next(m for m in config["models"] if m["name"] == "custom-llm") - assert model["model_provider"] == "openai" - assert model["api_family"] == "openai" - assert model["model_name"] == "llama-3.1-70b-versatile" - assert model["model_url"] == "https://api.groq.com/openai/v1" - assert model["api_key"] == "${oc.env:CUSTOM_LLM_API_KEY,''}" - assert model["model_params"]["temperature"] == 0.2 - assert model["model_output"] == "json" - - def test_custom_llm_and_embedding_model_added(self, config_manager): - """Both LLM and embedding models should be created when embedding model is provided.""" - llm_model = { - "name": "custom-llm", - "model_type": "llm", - "model_provider": "openai", - "api_family": "openai", - "model_name": "llama-3.1-70b-versatile", - "model_url": "https://api.groq.com/openai/v1", - "api_key": "${oc.env:CUSTOM_LLM_API_KEY,''}", - "model_params": {"temperature": 0.2, "max_tokens": 2000}, - "model_output": "json", - } - embed_model = { - "name": "custom-embed", - "description": "Custom OpenAI-compatible embeddings", - "model_type": "embedding", - "model_provider": "openai", - "api_family": "openai", - "model_name": "text-embedding-3-small", - "model_url": "https://api.groq.com/openai/v1", - "api_key": "${oc.env:CUSTOM_LLM_API_KEY,''}", - "embedding_dimensions": 1536, - "model_output": "vector", - } - - config_manager.add_or_update_model(llm_model) - config_manager.add_or_update_model(embed_model) - - config = config_manager.get_full_config() - model_names = [m["name"] for m in config["models"]] - assert "custom-llm" in model_names - assert "custom-embed" in model_names - - embed = next(m for m in config["models"] if m["name"] == "custom-embed") - assert embed["model_type"] == "embedding" - assert embed["model_name"] == "text-embedding-3-small" - assert embed["embedding_dimensions"] == 1536 - - def test_custom_llm_without_embedding_falls_back_to_local_embed(self, config_manager): - """defaults.embedding should be local-embed when no custom embedding is provided.""" - llm_model = { - "name": "custom-llm", - "model_type": "llm", - "model_name": "some-model", - "model_url": "https://api.example.com/v1", - } - config_manager.add_or_update_model(llm_model) - config_manager.update_config_defaults({"llm": "custom-llm", "embedding": "local-embed"}) - - defaults = config_manager.get_config_defaults() - assert defaults["llm"] == "custom-llm" - assert defaults["embedding"] == "local-embed" - - def test_custom_llm_updates_defaults_with_embedding(self, config_manager): - """defaults.llm and defaults.embedding should be updated correctly with custom embed.""" - config_manager.update_config_defaults({"llm": "custom-llm", "embedding": "custom-embed"}) - - defaults = config_manager.get_config_defaults() - assert defaults["llm"] == "custom-llm" - assert defaults["embedding"] == "custom-embed" - - def test_custom_llm_api_key_env_reference(self, config_manager): - """API key should use env var reference in config.yml model.""" - llm_model = { - "name": "custom-llm", - "model_type": "llm", - "model_name": "some-model", - "model_url": "https://api.example.com/v1", - "api_key": "${oc.env:CUSTOM_LLM_API_KEY,''}", - } - config_manager.add_or_update_model(llm_model) - - config = config_manager.get_full_config() - model = next(m for m in config["models"] if m["name"] == "custom-llm") - assert model["api_key"] == "${oc.env:CUSTOM_LLM_API_KEY,''}" - - def test_existing_models_preserved_after_adding_custom(self, config_manager): - """Adding a custom model should not remove existing models.""" - config_before = config_manager.get_full_config() - original_count = len(config_before["models"]) - - config_manager.add_or_update_model({ - "name": "custom-llm", - "model_type": "llm", - "model_name": "test-model", - "model_url": "https://example.com/v1", - }) - - config_after = config_manager.get_full_config() - assert len(config_after["models"]) == original_count + 1 - # Original models still present - model_names = [m["name"] for m in config_after["models"]] - assert "openai-llm" in model_names - assert "local-embed" in model_names - assert "custom-llm" in model_names - - -if __name__ == "__main__": - pytest.main([__file__, "-v"]) diff --git a/backends/advanced/tests/test_transcription_url_config.py b/backends/advanced/tests/test_transcription_url_config.py deleted file mode 100644 index b9fcf9bc..00000000 --- a/backends/advanced/tests/test_transcription_url_config.py +++ /dev/null @@ -1,183 +0,0 @@ -""" -Unit tests for transcription service URL configuration. - -Tests the fix for the double http:// prefix issue where environment variables -containing protocol prefixes were incorrectly combined with hardcoded prefixes -in config.yml. -""" - -import os -import pytest -from unittest.mock import patch, MagicMock -from omegaconf import OmegaConf - - -class TestTranscriptionURLConfiguration: - """Test transcription service URL configuration and parsing.""" - - def test_vibevoice_url_without_http_prefix(self): - """Test that VIBEVOICE_ASR_URL without http:// prefix works correctly.""" - # Simulate config.yml template: http://${oc.env:VIBEVOICE_ASR_URL} - config_template = {"model_url": "http://${oc.env:VIBEVOICE_ASR_URL,host.docker.internal:8767}"} - - with patch.dict(os.environ, {"VIBEVOICE_ASR_URL": "host.docker.internal:8767"}): - resolved = OmegaConf.create(config_template) - resolved = OmegaConf.to_container(resolved, resolve=True) - - assert resolved["model_url"] == "http://host.docker.internal:8767" - assert "http://http://" not in resolved["model_url"] - - def test_vibevoice_url_with_http_prefix_causes_double_prefix(self): - """Test that VIBEVOICE_ASR_URL WITH http:// causes double prefix (bug scenario).""" - config_template = {"model_url": "http://${oc.env:VIBEVOICE_ASR_URL,host.docker.internal:8767}"} - - # This is the BUG scenario - env var already has http:// - with patch.dict(os.environ, {"VIBEVOICE_ASR_URL": "http://host.docker.internal:8767"}): - resolved = OmegaConf.create(config_template) - resolved = OmegaConf.to_container(resolved, resolve=True) - - # This demonstrates the bug - assert resolved["model_url"] == "http://http://host.docker.internal:8767" - assert "http://http://" in resolved["model_url"] - - def test_vibevoice_url_default_fallback(self): - """Test that default fallback works when VIBEVOICE_ASR_URL is not set.""" - config_template = {"model_url": "http://${oc.env:VIBEVOICE_ASR_URL,host.docker.internal:8767}"} - - # No VIBEVOICE_ASR_URL set - should use default - with patch.dict(os.environ, {}, clear=True): - resolved = OmegaConf.create(config_template) - resolved = OmegaConf.to_container(resolved, resolve=True) - - assert resolved["model_url"] == "http://host.docker.internal:8767" - - def test_parakeet_url_configuration(self): - """Test that PARAKEET_ASR_URL follows same pattern.""" - config_template = {"model_url": "http://${oc.env:PARAKEET_ASR_URL,172.17.0.1:8767}"} - - # Correct format - without http:// prefix - with patch.dict(os.environ, {"PARAKEET_ASR_URL": "host.docker.internal:8767"}): - resolved = OmegaConf.create(config_template) - resolved = OmegaConf.to_container(resolved, resolve=True) - - assert resolved["model_url"] == "http://host.docker.internal:8767" - assert "http://http://" not in resolved["model_url"] - - def test_url_parsing_removes_double_slashes(self): - """Test that URL with double http:// causes connection failures.""" - from urllib.parse import urlparse - - # Valid URL - valid_url = "http://host.docker.internal:8767/transcribe" - parsed_valid = urlparse(valid_url) - assert parsed_valid.scheme == "http" - assert parsed_valid.netloc == "host.docker.internal:8767" - - # Invalid URL with double prefix - invalid_url = "http://http://host.docker.internal:8767/transcribe" - parsed_invalid = urlparse(invalid_url) - # urlparse treats "http:" as the netloc which causes DNS failures - assert parsed_invalid.scheme == "http" - assert parsed_invalid.netloc == "http:" # Invalid netloc causes "Name or service not known" - assert parsed_invalid.netloc != "host.docker.internal:8767" - - -class TestProviderSegmentsConfiguration: - """Test use_provider_segments configuration for different providers.""" - - def test_use_provider_segments_default_false(self): - """Test that use_provider_segments defaults to false.""" - config = OmegaConf.create({ - "backend": { - "transcription": {} - } - }) - - use_segments = config.backend.transcription.get("use_provider_segments", False) - assert use_segments is False - - def test_use_provider_segments_explicit_true(self): - """Test that use_provider_segments can be enabled.""" - config = OmegaConf.create({ - "backend": { - "transcription": { - "use_provider_segments": True - } - } - }) - - assert config.backend.transcription.use_provider_segments is True - - def test_vibevoice_should_use_provider_segments(self): - """ - Test that VibeVoice provider should have use_provider_segments=true - since it provides diarized segments. - """ - # VibeVoice provides segments with speaker diarization - vibevoice_capabilities = ["segments", "diarization"] - - # When provider has both capabilities, use_provider_segments should be true - has_diarization = "diarization" in vibevoice_capabilities - has_segments = "segments" in vibevoice_capabilities - - should_use_segments = has_diarization and has_segments - assert should_use_segments is True - - -class TestModelRegistryURLResolution: - """Test model registry URL resolution with environment variables.""" - - def test_model_url_resolution_with_env_var(self): - """Test that model URLs resolve correctly from environment.""" - config_template = """ - defaults: - stt: stt-vibevoice - models: - - name: stt-vibevoice - model_type: stt - model_provider: vibevoice - model_url: http://${oc.env:VIBEVOICE_ASR_URL,host.docker.internal:8767} - """ - - with patch.dict(os.environ, {"VIBEVOICE_ASR_URL": "host.docker.internal:8767"}): - config = OmegaConf.create(config_template) - resolved = OmegaConf.to_container(config, resolve=True) - - vibevoice_model = resolved["models"][0] - assert vibevoice_model["model_url"] == "http://host.docker.internal:8767" - - def test_multiple_asr_providers_url_resolution(self): - """Test that multiple ASR providers can use different URL patterns.""" - config_template = { - "models": [ - { - "name": "stt-vibevoice", - "model_url": "http://${oc.env:VIBEVOICE_ASR_URL,host.docker.internal:8767}" - }, - { - "name": "stt-parakeet", - "model_url": "http://${oc.env:PARAKEET_ASR_URL,172.17.0.1:8767}" - }, - { - "name": "stt-deepgram", - "model_url": "https://api.deepgram.com/v1" - } - ] - } - - env_vars = { - "VIBEVOICE_ASR_URL": "host.docker.internal:8767", - "PARAKEET_ASR_URL": "localhost:8080" - } - - with patch.dict(os.environ, env_vars): - config = OmegaConf.create(config_template) - resolved = OmegaConf.to_container(config, resolve=True) - - assert resolved["models"][0]["model_url"] == "http://host.docker.internal:8767" - assert resolved["models"][1]["model_url"] == "http://localhost:8080" - assert resolved["models"][2]["model_url"] == "https://api.deepgram.com/v1" - - -if __name__ == "__main__": - pytest.main([__file__, "-v"]) diff --git a/tests/configuration/test_llm_custom_provider.robot b/tests/configuration/test_llm_custom_provider.robot new file mode 100644 index 00000000..fa9a09c3 --- /dev/null +++ b/tests/configuration/test_llm_custom_provider.robot @@ -0,0 +1,258 @@ +*** Settings *** +Documentation Tests for LLM Custom Provider Setup (ConfigManager) +Library OperatingSystem +Library Collections +Library String +Library ../libs/ConfigTestHelper.py + +*** Keywords *** +Setup Temp Config + [Documentation] Creates a temporary configuration environment + ${random_suffix}= Generate Random String 8 [NUMBERS] + ${temp_path}= Join Path ${OUTPUT DIR} temp_config_${random_suffix} + Create Directory ${temp_path} + + # Create initial default config content + ${defaults}= Create Dictionary llm=openai-llm embedding=openai-embed stt=stt-deepgram + ${model1_params}= Create Dictionary temperature=${0.2} max_tokens=${2000} + ${model1}= Create Dictionary + ... name=openai-llm + ... description=OpenAI GPT-4o-mini + ... model_type=llm + ... model_provider=openai + ... api_family=openai + ... model_name=gpt-4o-mini + ... model_url=https://api.openai.com/v1 + ... api_key=\${oc.env:OPENAI_API_KEY,''} + ... model_params=${model1_params} + ... model_output=json + + ${model2}= Create Dictionary + ... name=local-embed + ... description=Local embeddings via Ollama + ... model_type=embedding + ... model_provider=ollama + ... api_family=openai + ... model_name=nomic-embed-text:latest + ... model_url=http://localhost:11434/v1 + ... api_key=\${oc.env:OPENAI_API_KEY,ollama} + ... embedding_dimensions=${768} + ... model_output=vector + + ${models}= Create List ${model1} ${model2} + ${memory}= Create Dictionary provider=chronicle + ${config}= Create Dictionary defaults=${defaults} models=${models} memory=${memory} + + Create Temp Config Structure ${temp_path} ${config} + Set Test Variable ${TEMP_PATH} ${temp_path} + +Cleanup Temp Config + Remove Directory ${TEMP_PATH} recursive=True + +*** Test Cases *** +Add New Model To Config + [Documentation] add_or_update_model() should append a new model when name doesn't exist. + [Setup] Setup Temp Config + [Teardown] Cleanup Temp Config + + ${params}= Create Dictionary temperature=${0.2} max_tokens=${2000} + ${new_model}= Create Dictionary + ... name=custom-llm + ... description=Custom OpenAI-compatible LLM + ... model_type=llm + ... model_provider=openai + ... api_family=openai + ... model_name=llama-3.1-70b-versatile + ... model_url=https://api.groq.com/openai/v1 + ... api_key=\${oc.env:CUSTOM_LLM_API_KEY,''} + ... model_params=${params} + ... model_output=json + + ${cm}= Get Config Manager Instance ${TEMP_PATH} + Add Model To Config Manager ${cm} ${new_model} + + ${config}= Call Method ${cm} get_full_config + ${models}= Get From Dictionary ${config} models + + ${target_model}= Set Variable ${None} + FOR ${m} IN @{models} + Run Keyword If '${m["name"]}' == 'custom-llm' Set Test Variable ${target_model} ${m} + END + + Should Not Be Equal ${target_model} ${None} + Should Be Equal ${target_model["model_name"]} llama-3.1-70b-versatile + Should Be Equal ${target_model["model_url"]} https://api.groq.com/openai/v1 + Should Be Equal ${target_model["model_type"]} llm + +Update Existing Model + [Documentation] add_or_update_model() should replace an existing model with the same name. + [Setup] Setup Temp Config + [Teardown] Cleanup Temp Config + + ${cm}= Get Config Manager Instance ${TEMP_PATH} + + # First add + ${model_v1}= Create Dictionary name=custom-llm model_type=llm model_name=model-v1 model_url=https://example.com/v1 + Add Model To Config Manager ${cm} ${model_v1} + + # Then update + ${model_v2}= Create Dictionary name=custom-llm model_type=llm model_name=model-v2 model_url=https://example.com/v2 + Add Model To Config Manager ${cm} ${model_v2} + + ${config}= Call Method ${cm} get_full_config + ${models}= Get From Dictionary ${config} models + + ${count}= Set Variable 0 + ${target_model}= Set Variable ${None} + FOR ${m} IN @{models} + IF '${m["name"]}' == 'custom-llm' + Set Test Variable ${target_model} ${m} + ${count}= Evaluate ${count} + 1 + END + END + + Should Be Equal As Integers ${count} 1 + Should Be Equal ${target_model["model_name"]} model-v2 + Should Be Equal ${target_model["model_url"]} https://example.com/v2 + +Add Model To Empty Models List + [Documentation] add_or_update_model() should create models list if it doesn't exist. + [Setup] Setup Temp Config + [Teardown] Cleanup Temp Config + + # Overwrite config with empty models + ${defaults}= Create Dictionary llm=openai-llm + ${empty_config}= Create Dictionary defaults=${defaults} + Create Temp Config Structure ${TEMP_PATH} ${empty_config} + + ${cm}= Get Config Manager Instance ${TEMP_PATH} + ${test_model}= Create Dictionary name=test-model model_type=llm + Add Model To Config Manager ${cm} ${test_model} + + ${config}= Call Method ${cm} get_full_config + Dictionary Should Contain Key ${config} models + ${models}= Get From Dictionary ${config} models + Length Should Be ${models} 1 + Should Be Equal ${models[0]["name"]} test-model + +Custom LLM And Embedding Model Added + [Documentation] Both LLM and embedding models should be created when embedding model is provided. + [Setup] Setup Temp Config + [Teardown] Cleanup Temp Config + + ${cm}= Get Config Manager Instance ${TEMP_PATH} + + ${params}= Create Dictionary temperature=${0.2} max_tokens=${2000} + ${llm_model}= Create Dictionary + ... name=custom-llm + ... model_type=llm + ... model_provider=openai + ... api_family=openai + ... model_name=llama-3.1-70b-versatile + ... model_url=https://api.groq.com/openai/v1 + ... api_key=\${oc.env:CUSTOM_LLM_API_KEY,''} + ... model_params=${params} + ... model_output=json + + ${embed_model}= Create Dictionary + ... name=custom-embed + ... description=Custom OpenAI-compatible embeddings + ... model_type=embedding + ... model_provider=openai + ... api_family=openai + ... model_name=text-embedding-3-small + ... model_url=https://api.groq.com/openai/v1 + ... api_key=\${oc.env:CUSTOM_LLM_API_KEY,''} + ... embedding_dimensions=${1536} + ... model_output=vector + + Add Model To Config Manager ${cm} ${llm_model} + Add Model To Config Manager ${cm} ${embed_model} + + ${config}= Call Method ${cm} get_full_config + ${models}= Get From Dictionary ${config} models + ${model_names}= Create List + FOR ${m} IN @{models} + Append To List ${model_names} ${m["name"]} + END + + List Should Contain Value ${model_names} custom-llm + List Should Contain Value ${model_names} custom-embed + + ${target_embed}= Set Variable ${None} + FOR ${m} IN @{models} + Run Keyword If '${m["name"]}' == 'custom-embed' Set Test Variable ${target_embed} ${m} + END + + Should Be Equal ${target_embed["model_type"]} embedding + Should Be Equal ${target_embed["model_name"]} text-embedding-3-small + Should Be Equal As Integers ${target_embed["embedding_dimensions"]} 1536 + +Custom LLM Without Embedding Falls Back To Local + [Documentation] defaults.embedding should be local-embed when no custom embedding is provided. + [Setup] Setup Temp Config + [Teardown] Cleanup Temp Config + + ${cm}= Get Config Manager Instance ${TEMP_PATH} + + ${llm_model}= Create Dictionary + ... name=custom-llm + ... model_type=llm + ... model_name=some-model + ... model_url=https://api.example.com/v1 + + Add Model To Config Manager ${cm} ${llm_model} + ${defaults_update}= Create Dictionary llm=custom-llm embedding=local-embed + Update Defaults In Config Manager ${cm} ${defaults_update} + + ${defaults}= Call Method ${cm} get_config_defaults + Should Be Equal ${defaults["llm"]} custom-llm + Should Be Equal ${defaults["embedding"]} local-embed + +Custom LLM Updates Defaults With Embedding + [Documentation] defaults.llm and defaults.embedding should be updated correctly with custom embed. + [Setup] Setup Temp Config + [Teardown] Cleanup Temp Config + + ${cm}= Get Config Manager Instance ${TEMP_PATH} + + ${defaults_update}= Create Dictionary llm=custom-llm embedding=custom-embed + Update Defaults In Config Manager ${cm} ${defaults_update} + + ${defaults}= Call Method ${cm} get_config_defaults + Should Be Equal ${defaults["llm"]} custom-llm + Should Be Equal ${defaults["embedding"]} custom-embed + +Existing Models Preserved After Adding Custom + [Documentation] Adding a custom model should not remove existing models. + [Setup] Setup Temp Config + [Teardown] Cleanup Temp Config + + ${cm}= Get Config Manager Instance ${TEMP_PATH} + ${config_before}= Call Method ${cm} get_full_config + ${models_before}= Get From Dictionary ${config_before} models + ${original_count}= Get Length ${models_before} + + ${new_model}= Create Dictionary + ... name=custom-llm + ... model_type=llm + ... model_name=test-model + ... model_url=https://example.com/v1 + + Add Model To Config Manager ${cm} ${new_model} + + ${config_after}= Call Method ${cm} get_full_config + ${models_after}= Get From Dictionary ${config_after} models + ${new_count}= Get Length ${models_after} + ${expected_count}= Evaluate ${original_count} + 1 + + Should Be Equal As Integers ${new_count} ${expected_count} + + ${model_names}= Create List + FOR ${m} IN @{models_after} + Append To List ${model_names} ${m["name"]} + END + + List Should Contain Value ${model_names} openai-llm + List Should Contain Value ${model_names} local-embed + List Should Contain Value ${model_names} custom-llm \ No newline at end of file diff --git a/tests/configuration/test_transcription_url.robot b/tests/configuration/test_transcription_url.robot new file mode 100644 index 00000000..e0ba40e8 --- /dev/null +++ b/tests/configuration/test_transcription_url.robot @@ -0,0 +1,126 @@ +*** Settings *** +Documentation Tests for Transcription Service URL Configuration +Library Collections +Library ../libs/ConfigTestHelper.py + +*** Test Cases *** +Vibevoice Url Without Http Prefix + [Documentation] Test that VIBEVOICE_ASR_URL without http:// prefix works correctly. + ${config_template}= Create Dictionary model_url=http://\${oc.env:VIBEVOICE_ASR_URL,host.docker.internal:8767} + ${env_vars}= Create Dictionary VIBEVOICE_ASR_URL=host.docker.internal:8767 + + ${resolved}= Resolve Omega Config ${config_template} ${env_vars} + Should Be Equal ${resolved["model_url"]} http://host.docker.internal:8767 + Should Not Contain ${resolved["model_url"]} http://http:// + +Vibevoice Url With Http Prefix Causes Double Prefix + [Documentation] Test that VIBEVOICE_ASR_URL WITH http:// causes double prefix (bug scenario). + ${config_template}= Create Dictionary model_url=http://\${oc.env:VIBEVOICE_ASR_URL,host.docker.internal:8767} + ${env_vars}= Create Dictionary VIBEVOICE_ASR_URL=http://host.docker.internal:8767 + + ${resolved}= Resolve Omega Config ${config_template} ${env_vars} + Should Be Equal ${resolved["model_url"]} http://http://host.docker.internal:8767 + Should Contain ${resolved["model_url"]} http://http:// + +Vibevoice Url Default Fallback + [Documentation] Test that default fallback works when VIBEVOICE_ASR_URL is not set. + ${config_template}= Create Dictionary model_url=http://\${oc.env:VIBEVOICE_ASR_URL,host.docker.internal:8767} + ${env_vars}= Create Dictionary + + ${resolved}= Resolve Omega Config ${config_template} ${env_vars} + Should Be Equal ${resolved["model_url"]} http://host.docker.internal:8767 + +Parakeet Url Configuration + [Documentation] Test that PARAKEET_ASR_URL follows same pattern. + ${config_template}= Create Dictionary model_url=http://\${oc.env:PARAKEET_ASR_URL,172.17.0.1:8767} + ${env_vars}= Create Dictionary PARAKEET_ASR_URL=host.docker.internal:8767 + + ${resolved}= Resolve Omega Config ${config_template} ${env_vars} + Should Be Equal ${resolved["model_url"]} http://host.docker.internal:8767 + Should Not Contain ${resolved["model_url"]} http://http:// + +Url Parsing Removes Double Slashes + [Documentation] Test that URL with double http:// causes connection failures (simulated by parsing check). + + # Valid URL + ${valid_url}= Set Variable http://host.docker.internal:8767/transcribe + ${parsed_valid}= Check Url Parsing ${valid_url} + Should Be Equal ${parsed_valid["scheme"]} http + Should Be Equal ${parsed_valid["netloc"]} host.docker.internal:8767 + + # Invalid URL + ${invalid_url}= Set Variable http://http://host.docker.internal:8767/transcribe + ${parsed_invalid}= Check Url Parsing ${invalid_url} + Should Be Equal ${parsed_invalid["scheme"]} http + # In python urlparse, 'http:' becomes the netloc for 'http://http://...' + Should Be Equal ${parsed_invalid["netloc"]} http: + Should Not Be Equal ${parsed_invalid["netloc"]} host.docker.internal:8767 + +Use Provider Segments Default False + [Documentation] Test that use_provider_segments defaults to false. + ${transcription}= Create Dictionary + ${backend}= Create Dictionary transcription=${transcription} + ${config_template}= Create Dictionary backend=${backend} + ${env_vars}= Create Dictionary + + ${resolved}= Resolve Omega Config ${config_template} ${env_vars} + ${val}= Evaluate $resolved.get('backend', {}).get('transcription', {}).get('use_provider_segments', False) + Should Be Equal ${val} ${FALSE} + +Use Provider Segments Explicit True + [Documentation] Test that use_provider_segments can be enabled. + ${transcription}= Create Dictionary use_provider_segments=${TRUE} + ${backend}= Create Dictionary transcription=${transcription} + ${config_template}= Create Dictionary backend=${backend} + ${env_vars}= Create Dictionary + + ${resolved}= Resolve Omega Config ${config_template} ${env_vars} + ${val}= Evaluate $resolved['backend']['transcription']['use_provider_segments'] + Should Be Equal ${val} ${TRUE} + +Vibevoice Should Use Provider Segments + [Documentation] Test that VibeVoice provider should have use_provider_segments=true since it provides diarized segments. + # Logic simulation + ${vibevoice_capabilities}= Create List segments diarization + ${has_diarization}= Evaluate "diarization" in $vibevoice_capabilities + ${has_segments}= Evaluate "segments" in $vibevoice_capabilities + ${should_use_segments}= Evaluate $has_diarization and $has_segments + Should Be Equal ${should_use_segments} ${TRUE} + +Model Registry Url Resolution With Env Var + [Documentation] Test that model URLs resolve correctly from environment. + ${model_def}= Create Dictionary + ... name=stt-vibevoice + ... model_type=stt + ... model_provider=vibevoice + ... model_url=http://\${oc.env:VIBEVOICE_ASR_URL,host.docker.internal:8767} + + ${models}= Create List ${model_def} + ${defaults}= Create Dictionary stt=stt-vibevoice + ${config_template}= Create Dictionary defaults=${defaults} models=${models} + + ${env_vars}= Create Dictionary VIBEVOICE_ASR_URL=host.docker.internal:8767 + + ${resolved}= Resolve Omega Config ${config_template} ${env_vars} + ${resolved_models}= Get From Dictionary ${resolved} models + Should Be Equal ${resolved_models[0]["model_url"]} http://host.docker.internal:8767 + +Multiple Asr Providers Url Resolution + [Documentation] Test that multiple ASR providers can use different URL patterns. + ${m1}= Create Dictionary name=stt-vibevoice model_url=http://\${oc.env:VIBEVOICE_ASR_URL,host.docker.internal:8767} + ${m2}= Create Dictionary name=stt-parakeet model_url=http://\${oc.env:PARAKEET_ASR_URL,172.17.0.1:8767} + ${m3}= Create Dictionary name=stt-deepgram model_url=https://api.deepgram.com/v1 + + ${models}= Create List ${m1} ${m2} ${m3} + ${config_template}= Create Dictionary models=${models} + + ${env_vars}= Create Dictionary + ... VIBEVOICE_ASR_URL=host.docker.internal:8767 + ... PARAKEET_ASR_URL=localhost:8080 + + ${resolved}= Resolve Omega Config ${config_template} ${env_vars} + ${resolved_models}= Get From Dictionary ${resolved} models + + Should Be Equal ${resolved_models[0]["model_url"]} http://host.docker.internal:8767 + Should Be Equal ${resolved_models[1]["model_url"]} http://localhost:8080 + Should Be Equal ${resolved_models[2]["model_url"]} https://api.deepgram.com/v1 diff --git a/tests/libs/ConfigTestHelper.py b/tests/libs/ConfigTestHelper.py new file mode 100644 index 00000000..6fbdcab4 --- /dev/null +++ b/tests/libs/ConfigTestHelper.py @@ -0,0 +1,73 @@ +import os +import sys +import yaml +from pathlib import Path +from typing import Dict, Any, Optional, List +from omegaconf import OmegaConf +from unittest.mock import patch + +# Add repo root to path to import config_manager +sys.path.insert(0, str(Path(__file__).resolve().parent.parent.parent)) +from config_manager import ConfigManager + +class ConfigTestHelper: + """Helper library for testing configuration logic.""" + + def _to_dict(self, obj: Any) -> Any: + """Recursively converts Robot Framework DotDict to standard dict.""" + if isinstance(obj, dict): + return {k: self._to_dict(v) for k, v in obj.items()} + if isinstance(obj, list): + return [self._to_dict(v) for v in obj] + return obj + + def resolve_omega_config(self, config_template: Dict[str, Any], env_vars: Dict[str, str]) -> Dict[str, Any]: + """ + Resolves an OmegaConf configuration template with provided environment variables. + """ + config_template = self._to_dict(config_template) + # We need to ensure values are strings for os.environ + str_env_vars = {k: str(v) for k, v in env_vars.items()} + + with patch.dict(os.environ, str_env_vars): + conf = OmegaConf.create(config_template) + resolved = OmegaConf.to_container(conf, resolve=True) + return resolved + + def check_url_parsing(self, url: str) -> Dict[str, Any]: + """ + Parses a URL and returns its components to verify correct parsing. + """ + from urllib.parse import urlparse + parsed = urlparse(url) + return { + "scheme": parsed.scheme, + "netloc": parsed.netloc, + "path": parsed.path + } + + def create_temp_config_structure(self, base_path: str, content: Dict[str, Any]) -> str: + """ + Creates the config folder structure and config.yml within the given base path. + """ + content = self._to_dict(content) + path = Path(base_path) / "config" + path.mkdir(parents=True, exist_ok=True) + config_file = path / "config.yml" + with open(config_file, "w") as f: + yaml.dump(content, f, default_flow_style=False, sort_keys=False) + return str(base_path) + + def get_config_manager_instance(self, repo_root: str) -> ConfigManager: + """Returns a ConfigManager instance configured with the given repo_root.""" + return ConfigManager(service_path=None, repo_root=Path(repo_root)) + + def add_model_to_config_manager(self, cm: ConfigManager, model_def: Dict[str, Any]): + """Wrapper for add_or_update_model that converts arguments.""" + model_def = self._to_dict(model_def) + cm.add_or_update_model(model_def) + + def update_defaults_in_config_manager(self, cm: ConfigManager, updates: Dict[str, str]): + """Wrapper for update_config_defaults that converts arguments.""" + updates = self._to_dict(updates) + cm.update_config_defaults(updates) \ No newline at end of file diff --git a/tests/test-requirements.txt b/tests/test-requirements.txt index f32614e0..5cd8f020 100644 --- a/tests/test-requirements.txt +++ b/tests/test-requirements.txt @@ -6,4 +6,6 @@ robotframework-databaselibrary python-dotenv websockets pymongo +omegaconf +pyyaml \ No newline at end of file From 6b91a1e993b95eb5a867ffae488660380379ba06 Mon Sep 17 00:00:00 2001 From: Ankush Malaker <43288948+AnkushMalaker@users.noreply.github.com> Date: Thu, 19 Feb 2026 18:46:14 +0000 Subject: [PATCH 03/11] Enhance Docker Compose and service management for LangFuse integration - Updated `services.py` to include new options for service management, allowing for forced recreation of containers during startup. - Added LangFuse configuration options in the setup wizard, improving user experience for observability setup. - Introduced new API endpoints for retrieving observability configuration, enhancing integration with the frontend. - Enhanced error handling and logging for service startup processes, ensuring better visibility of configuration issues. - Updated documentation to reflect changes in service management and LangFuse integration. --- backends/advanced/init.py | 26 +++++++++++++ .../controllers/system_controller.py | 27 +++++++++++++ .../src/advanced_omi_backend/llm_client.py | 36 +++++++++++++++--- .../routers/modules/system_routes.py | 10 +++++ .../services/memory/base.py | 4 ++ .../services/memory/providers/chronicle.py | 7 +++- .../memory/providers/llm_providers.py | 38 ++++++++++++++----- .../utils/conversation_utils.py | 6 ++- .../workers/conversation_jobs.py | 6 ++- .../src/components/SpeakerNameDropdown.tsx | 22 ++++++++++- .../webui/src/pages/ConversationDetail.tsx | 27 ++++++++++++- backends/advanced/webui/src/services/api.ts | 3 ++ config/defaults.yml | 10 +++++ services.py | 29 ++++++++------ 14 files changed, 216 insertions(+), 35 deletions(-) diff --git a/backends/advanced/init.py b/backends/advanced/init.py index a1448876..4ea037b2 100644 --- a/backends/advanced/init.py +++ b/backends/advanced/init.py @@ -662,9 +662,15 @@ def setup_langfuse(self): self.config["LANGFUSE_PUBLIC_KEY"] = langfuse_pub self.config["LANGFUSE_SECRET_KEY"] = langfuse_sec self.config["LANGFUSE_BASE_URL"] = langfuse_host + + # Derive browser-accessible URL for deep-links + public_url = getattr(self.args, 'langfuse_public_url', None) or "http://localhost:3002" + self._save_langfuse_public_url(public_url) + source = "external" if "langfuse-web" not in langfuse_host else "local" self.console.print(f"[green][SUCCESS][/green] LangFuse auto-configured ({source})") self.console.print(f"[blue][INFO][/blue] Host: {langfuse_host}") + self.console.print(f"[blue][INFO][/blue] Public URL: {public_url}") self.console.print(f"[blue][INFO][/blue] Public key: {self.mask_api_key(langfuse_pub)}") return @@ -710,10 +716,28 @@ def setup_langfuse(self): if secret_key: self.config["LANGFUSE_SECRET_KEY"] = secret_key + # Browser-accessible URL for deep-links (stored in config.yml, not .env) + public_url = Prompt.ask( + "LangFuse browser URL (for dashboard links)", + default="http://localhost:3002", + ) + if public_url: + self._save_langfuse_public_url(public_url) + self.console.print("[green][SUCCESS][/green] LangFuse configured") else: self.console.print("[blue][INFO][/blue] LangFuse disabled") + def _save_langfuse_public_url(self, public_url: str): + """Save the Langfuse browser-accessible URL to config.yml.""" + full_config = self.config_manager.get_full_config() + if "observability" not in full_config: + full_config["observability"] = {} + if "langfuse" not in full_config["observability"]: + full_config["observability"]["langfuse"] = {} + full_config["observability"]["langfuse"]["public_url"] = public_url + self.config_manager.save_full_config(full_config) + def setup_network(self): """Configure network settings""" self.print_section("Network Configuration") @@ -1038,6 +1062,8 @@ def main(): help="LangFuse project secret key (from langfuse init or external)") parser.add_argument("--langfuse-host", help="LangFuse host URL (default: http://langfuse-web:3000 for local)") + parser.add_argument("--langfuse-public-url", + help="LangFuse browser-accessible URL for deep-links (default: http://localhost:3002)") parser.add_argument("--streaming-provider", choices=["deepgram", "smallest", "qwen3-asr"], help="Streaming provider when different from batch (enables batch re-transcription)") diff --git a/backends/advanced/src/advanced_omi_backend/controllers/system_controller.py b/backends/advanced/src/advanced_omi_backend/controllers/system_controller.py index 263b806f..bf3ce1b1 100644 --- a/backends/advanced/src/advanced_omi_backend/controllers/system_controller.py +++ b/backends/advanced/src/advanced_omi_backend/controllers/system_controller.py @@ -283,6 +283,33 @@ async def get_auth_config(): } +async def get_observability_config(): + """Get observability configuration for frontend (Langfuse deep-links). + + Returns non-secret data only (enabled status and browser URL). + """ + from advanced_omi_backend.openai_factory import is_langfuse_enabled + + enabled = is_langfuse_enabled() + session_base_url = None + + if enabled: + from advanced_omi_backend.config_loader import load_config + + cfg = load_config() + public_url = cfg.get("observability", {}).get("langfuse", {}).get("public_url", "") + if public_url: + # Strip trailing slash and build session URL + session_base_url = f"{public_url.rstrip('/')}/project/chronicle/sessions" + + return { + "langfuse": { + "enabled": enabled, + "session_base_url": session_base_url, + } + } + + # Audio file processing functions moved to audio_controller.py diff --git a/backends/advanced/src/advanced_omi_backend/llm_client.py b/backends/advanced/src/advanced_omi_backend/llm_client.py index ca640640..96ccc77b 100644 --- a/backends/advanced/src/advanced_omi_backend/llm_client.py +++ b/backends/advanced/src/advanced_omi_backend/llm_client.py @@ -11,7 +11,7 @@ from typing import Any, Dict, Optional from advanced_omi_backend.model_registry import get_models_registry -from advanced_omi_backend.openai_factory import create_openai_client +from advanced_omi_backend.openai_factory import create_openai_client, is_langfuse_enabled from advanced_omi_backend.services.memory.config import ( load_config_yml as _load_root_config, ) @@ -78,7 +78,8 @@ def __init__( raise def generate( - self, prompt: str, model: str | None = None, temperature: float | None = None + self, prompt: str, model: str | None = None, temperature: float | None = None, + **langfuse_kwargs, ) -> str: """Generate text completion using OpenAI-compatible API.""" try: @@ -90,6 +91,8 @@ def generate( "messages": [{"role": "user", "content": prompt}], "temperature": temp, } + if is_langfuse_enabled(): + params.update(langfuse_kwargs) response = self.client.chat.completions.create(**params) return response.choices[0].message.content.strip() @@ -98,7 +101,8 @@ def generate( raise def chat_with_tools( - self, messages: list, tools: list | None = None, model: str | None = None, temperature: float | None = None + self, messages: list, tools: list | None = None, model: str | None = None, + temperature: float | None = None, **langfuse_kwargs, ): """Chat completion with tool/function calling support. Returns raw response object.""" model_name = model or self.model @@ -109,6 +113,8 @@ def chat_with_tools( } if tools: params["tools"] = tools + if is_langfuse_enabled(): + params.update(langfuse_kwargs) return self.client.chat.completions.create(**params) def health_check(self) -> Dict: @@ -190,12 +196,20 @@ def reset_llm_client(): _llm_client = None +def _langfuse_metadata(session_id: str | None) -> dict: + """Return metadata dict with langfuse_session_id if Langfuse is enabled.""" + if session_id and is_langfuse_enabled(): + return {"langfuse_session_id": session_id} + return {} + + # Async wrapper for blocking LLM operations async def async_generate( prompt: str, model: str | None = None, temperature: float | None = None, operation: str | None = None, + langfuse_session_id: str | None = None, ) -> str: """Async wrapper for LLM text generation. @@ -203,6 +217,10 @@ async def async_generate( ``llm_operations`` config section via ``get_llm_operation()``. The resolved config determines model, temperature, max_tokens, etc. Explicit ``model``/``temperature`` kwargs still override the resolved values. + + When ``langfuse_session_id`` is provided and Langfuse is enabled, + the session ID is set on the current Langfuse trace to group all + LLM calls for a conversation. """ if operation: registry = get_models_registry() @@ -210,19 +228,21 @@ async def async_generate( op = registry.get_llm_operation(operation) client = op.get_client(is_async=True) api_params = op.to_api_params() - # Allow explicit overrides if temperature is not None: api_params["temperature"] = temperature if model is not None: api_params["model"] = model api_params["messages"] = [{"role": "user", "content": prompt}] + api_params["metadata"] = _langfuse_metadata(langfuse_session_id) response = await client.chat.completions.create(**api_params) return response.choices[0].message.content.strip() # Fallback: use singleton client client = get_llm_client() loop = asyncio.get_running_loop() - return await loop.run_in_executor(None, client.generate, prompt, model, temperature) + return await loop.run_in_executor( + None, lambda: client.generate(prompt, model, temperature) + ) async def async_chat_with_tools( @@ -231,6 +251,7 @@ async def async_chat_with_tools( model: str | None = None, temperature: float | None = None, operation: str | None = None, + langfuse_session_id: str | None = None, ): """Async wrapper for chat completion with tool calling. @@ -249,12 +270,15 @@ async def async_chat_with_tools( api_params["messages"] = messages if tools: api_params["tools"] = tools + api_params["metadata"] = _langfuse_metadata(langfuse_session_id) return await client.chat.completions.create(**api_params) # Fallback: use singleton client client = get_llm_client() loop = asyncio.get_running_loop() - return await loop.run_in_executor(None, client.chat_with_tools, messages, tools, model, temperature) + return await loop.run_in_executor( + None, lambda: client.chat_with_tools(messages, tools, model, temperature) + ) async def async_health_check() -> Dict: diff --git a/backends/advanced/src/advanced_omi_backend/routers/modules/system_routes.py b/backends/advanced/src/advanced_omi_backend/routers/modules/system_routes.py index 44d44c1a..277d7dc1 100644 --- a/backends/advanced/src/advanced_omi_backend/routers/modules/system_routes.py +++ b/backends/advanced/src/advanced_omi_backend/routers/modules/system_routes.py @@ -50,6 +50,16 @@ async def get_auth_config(): return await system_controller.get_auth_config() +@router.get("/observability") +async def get_observability_config(): + """Get observability configuration for frontend (Langfuse deep-links). + + Returns non-secret data: enabled status and browser-accessible session URL. + No authentication required. + """ + return await system_controller.get_observability_config() + + @router.get("/diarization-settings") async def get_diarization_settings(current_user: User = Depends(current_superuser)): """Get current diarization settings. Admin only.""" diff --git a/backends/advanced/src/advanced_omi_backend/services/memory/base.py b/backends/advanced/src/advanced_omi_backend/services/memory/base.py index ce0fe22b..bae18e56 100644 --- a/backends/advanced/src/advanced_omi_backend/services/memory/base.py +++ b/backends/advanced/src/advanced_omi_backend/services/memory/base.py @@ -342,6 +342,7 @@ class LLMProviderBase(ABC): @abstractmethod async def extract_memories( self, text: str, prompt: str, user_id: Optional[str] = None, + langfuse_session_id: Optional[str] = None, ) -> List[str]: """Extract meaningful fact memories from text using an LLM. @@ -349,6 +350,7 @@ async def extract_memories( text: Input text to extract memories from prompt: System prompt to guide the extraction process user_id: Optional user ID for per-user prompt override resolution + langfuse_session_id: Optional session ID for Langfuse trace grouping Returns: List of extracted fact memory strings @@ -373,6 +375,7 @@ async def propose_memory_actions( retrieved_old_memory: List[Dict[str, str]], new_facts: List[str], custom_prompt: Optional[str] = None, + langfuse_session_id: Optional[str] = None, ) -> Dict[str, Any]: """Propose memory management actions based on existing and new information. @@ -398,6 +401,7 @@ async def propose_reprocess_actions( diff_context: str, new_transcript: str, custom_prompt: Optional[str] = None, + langfuse_session_id: Optional[str] = None, ) -> Dict[str, Any]: """Propose memory updates after transcript reprocessing (e.g., speaker changes). diff --git a/backends/advanced/src/advanced_omi_backend/services/memory/providers/chronicle.py b/backends/advanced/src/advanced_omi_backend/services/memory/providers/chronicle.py index abfb7bb5..d1f51775 100644 --- a/backends/advanced/src/advanced_omi_backend/services/memory/providers/chronicle.py +++ b/backends/advanced/src/advanced_omi_backend/services/memory/providers/chronicle.py @@ -156,6 +156,7 @@ async def add_memory( fact_memories_text = await asyncio.wait_for( self.llm_provider.extract_memories( transcript, self.config.extraction_prompt, user_id=user_id, + langfuse_session_id=source_id, ), timeout=self.config.timeout_seconds, ) @@ -193,7 +194,8 @@ async def add_memory( if allow_update and fact_memories_text: memory_logger.info(f"š Allowing update for {source_id}") created_ids = await self._process_memory_updates( - fact_memories_text, embeddings, user_id, client_id, source_id, user_email + fact_memories_text, embeddings, user_id, client_id, source_id, user_email, + langfuse_session_id=source_id, ) else: memory_logger.info(f"š Not allowing update for {source_id}") @@ -578,6 +580,7 @@ async def reprocess_memory( existing_memories=existing_memory_dicts, diff_context=diff_text, new_transcript=transcript, + langfuse_session_id=source_id, ) memory_logger.info( f"š Reprocess LLM returned actions: {actions_obj}" @@ -786,6 +789,7 @@ async def _process_memory_updates( client_id: str, source_id: str, user_email: str, + langfuse_session_id: Optional[str] = None, ) -> List[str]: """Process memory updates using LLM-driven action proposals. @@ -848,6 +852,7 @@ async def _process_memory_updates( retrieved_old_memory=retrieved_old_memory, new_facts=memories_text, custom_prompt=None, + langfuse_session_id=langfuse_session_id, ) memory_logger.info(f"š UpdateMemory LLM returned: {type(actions_obj)} - {actions_obj}") except Exception as e_actions: diff --git a/backends/advanced/src/advanced_omi_backend/services/memory/providers/llm_providers.py b/backends/advanced/src/advanced_omi_backend/services/memory/providers/llm_providers.py index 25afc38a..2d83d24c 100644 --- a/backends/advanced/src/advanced_omi_backend/services/memory/providers/llm_providers.py +++ b/backends/advanced/src/advanced_omi_backend/services/memory/providers/llm_providers.py @@ -15,7 +15,7 @@ from typing import Any, Dict, List, Optional from advanced_omi_backend.model_registry import ModelDef, get_models_registry -from advanced_omi_backend.openai_factory import create_openai_client +from advanced_omi_backend.openai_factory import create_openai_client, is_langfuse_enabled from advanced_omi_backend.prompt_registry import get_prompt_registry from ..base import LLMProviderBase @@ -38,6 +38,13 @@ memory_logger = logging.getLogger("memory_service") +def _langfuse_metadata(session_id: str | None) -> dict: + """Return metadata dict with langfuse_session_id if Langfuse is enabled.""" + if session_id and is_langfuse_enabled(): + return {"langfuse_session_id": session_id} + return {} + + def _get_openai_client(api_key: str, base_url: str, is_async: bool = False): """Get OpenAI client with optional Langfuse tracing. @@ -186,6 +193,7 @@ def __init__(self, config: Dict[str, Any]): async def extract_memories( self, text: str, prompt: str, user_id: Optional[str] = None, + langfuse_session_id: Optional[str] = None, ) -> List[str]: """Extract memories using OpenAI API with the enhanced fact retrieval prompt. @@ -193,6 +201,7 @@ async def extract_memories( text: Input text to extract memories from prompt: System prompt to guide extraction (uses default if empty) user_id: Optional user ID for per-user prompt override resolution + langfuse_session_id: Optional session ID for Langfuse trace grouping Returns: List of extracted memory strings @@ -209,12 +218,12 @@ async def extract_memories( user_id, current_date=datetime.now().strftime("%Y-%m-%d"), ) - + # local models can only handle small chunks of input text text_chunks = chunk_text_with_spacy(text) - + # Process all chunks in sequence, not concurrently - results = [await self._process_chunk(system_prompt, chunk, i) for i, chunk in enumerate(text_chunks)] + results = [await self._process_chunk(system_prompt, chunk, i, langfuse_session_id=langfuse_session_id) for i, chunk in enumerate(text_chunks)] # Spread list of list of facts into a single list of facts cleaned_facts = [] @@ -228,23 +237,26 @@ async def extract_memories( memory_logger.error(f"OpenAI memory extraction failed: {e}") return [] - async def _process_chunk(self, system_prompt: str, chunk: str, index: int) -> List[str]: + async def _process_chunk( + self, system_prompt: str, chunk: str, index: int, + langfuse_session_id: Optional[str] = None, + ) -> List[str]: """Process a single text chunk to extract memories using OpenAI API. - + This private method handles the LLM interaction for a single chunk of text, sending it to OpenAI's chat completion API with the specified system prompt to extract structured memory facts. - + Args: - client: OpenAI async client instance for API communication system_prompt: System prompt that guides the memory extraction behavior chunk: Individual text chunk to process for memory extraction index: Index of the chunk for logging and error tracking purposes - + langfuse_session_id: Optional session ID for Langfuse trace grouping + Returns: List of extracted memory fact strings from the chunk. Returns empty list if no facts are found or if an error occurs during processing. - + Note: Errors are logged but don't propagate to avoid failing the entire memory extraction process. @@ -258,6 +270,7 @@ async def _process_chunk(self, system_prompt: str, chunk: str, index: int) -> Li {"role": "system", "content": system_prompt}, {"role": "user", "content": chunk}, ], + metadata=_langfuse_metadata(langfuse_session_id), ) facts = (response.choices[0].message.content or "").strip() if not facts: @@ -314,6 +327,7 @@ async def propose_memory_actions( retrieved_old_memory: List[Dict[str, str]] | List[str], new_facts: List[str], custom_prompt: Optional[str] = None, + langfuse_session_id: Optional[str] = None, ) -> Dict[str, Any]: """Use OpenAI chat completion with enhanced prompt to propose memory actions. @@ -321,6 +335,7 @@ async def propose_memory_actions( retrieved_old_memory: List of existing memories for context new_facts: List of new facts to process custom_prompt: Optional custom prompt to override default + langfuse_session_id: Optional session ID for Langfuse trace grouping Returns: Dictionary containing proposed memory actions @@ -340,6 +355,7 @@ async def propose_memory_actions( response = await client.chat.completions.create( **op.to_api_params(), messages=update_memory_messages, + metadata=_langfuse_metadata(langfuse_session_id), ) content = (response.choices[0].message.content or "").strip() if not content: @@ -365,6 +381,7 @@ async def propose_reprocess_actions( diff_context: str, new_transcript: str, custom_prompt: Optional[str] = None, + langfuse_session_id: Optional[str] = None, ) -> Dict[str, Any]: """Propose memory updates after speaker re-identification. @@ -427,6 +444,7 @@ async def propose_reprocess_actions( response = await client.chat.completions.create( **op.to_api_params(), messages=messages, + metadata=_langfuse_metadata(langfuse_session_id), ) content = (response.choices[0].message.content or "").strip() diff --git a/backends/advanced/src/advanced_omi_backend/utils/conversation_utils.py b/backends/advanced/src/advanced_omi_backend/utils/conversation_utils.py index a199d5fa..63036ce1 100644 --- a/backends/advanced/src/advanced_omi_backend/utils/conversation_utils.py +++ b/backends/advanced/src/advanced_omi_backend/utils/conversation_utils.py @@ -164,6 +164,7 @@ async def generate_title_and_summary( text: str, segments: Optional[list] = None, user_id: Optional[str] = None, + langfuse_session_id: Optional[str] = None, ) -> tuple[str, str]: """ Generate title and short summary in a single LLM call using full conversation context. @@ -221,7 +222,7 @@ async def generate_title_and_summary( "{conversation_text}" """ - response = await async_generate(prompt, operation="title_summary") + response = await async_generate(prompt, operation="title_summary", langfuse_session_id=langfuse_session_id) # Parse response for Title: and Summary: lines title = None @@ -253,6 +254,7 @@ async def generate_detailed_summary( text: str, segments: Optional[list] = None, memory_context: Optional[str] = None, + langfuse_session_id: Optional[str] = None, ) -> str: """ Generate a comprehensive, detailed summary of the conversation. @@ -328,7 +330,7 @@ async def generate_detailed_summary( "{conversation_text}" """ - summary = await async_generate(prompt, operation="detailed_summary") + summary = await async_generate(prompt, operation="detailed_summary", langfuse_session_id=langfuse_session_id) return summary.strip().strip('"').strip("'") or "No meaningful content to summarize" except Exception as e: diff --git a/backends/advanced/src/advanced_omi_backend/workers/conversation_jobs.py b/backends/advanced/src/advanced_omi_backend/workers/conversation_jobs.py index 021bc9da..b1bbb8cd 100644 --- a/backends/advanced/src/advanced_omi_backend/workers/conversation_jobs.py +++ b/backends/advanced/src/advanced_omi_backend/workers/conversation_jobs.py @@ -1025,10 +1025,12 @@ async def generate_title_summary_job(conversation_id: str, *, redis_client=None) (title, short_summary), detailed_summary = await asyncio.gather( generate_title_and_summary( - transcript_text, segments=segments, user_id=conversation.user_id + transcript_text, segments=segments, user_id=conversation.user_id, + langfuse_session_id=conversation_id, ), generate_detailed_summary( - transcript_text, segments=segments, memory_context=memory_context + transcript_text, segments=segments, memory_context=memory_context, + langfuse_session_id=conversation_id, ), ) diff --git a/backends/advanced/webui/src/components/SpeakerNameDropdown.tsx b/backends/advanced/webui/src/components/SpeakerNameDropdown.tsx index 45b0ff07..b0553fc7 100644 --- a/backends/advanced/webui/src/components/SpeakerNameDropdown.tsx +++ b/backends/advanced/webui/src/components/SpeakerNameDropdown.tsx @@ -1,7 +1,9 @@ import { useState, useRef, useEffect } from 'react' -import { Check, Plus } from 'lucide-react' +import { Check, Plus, UserX } from 'lucide-react' import { useSortedSpeakers } from '../hooks/useSortedSpeakers' +const UNKNOWN_SPEAKER = 'Unknown Speaker' + interface SpeakerNameDropdownProps { currentSpeaker: string enrolledSpeakers: Array<{ speaker_id: string; name: string }> @@ -112,6 +114,24 @@ export default function SpeakerNameDropdown({ /> + {/* Unknown Speaker option */} + {(!searchQuery || UNKNOWN_SPEAKER.toLowerCase().includes(searchQuery.toLowerCase())) && ( +