diff --git a/backends/advanced/init.py b/backends/advanced/init.py
index 4ea037b2..f26958eb 100644
--- a/backends/advanced/init.py
+++ b/backends/advanced/init.py
@@ -34,21 +34,31 @@ def __init__(self, args=None):
         self.console = Console()
         self.config: Dict[str, Any] = {}
         self.args = args or argparse.Namespace()
-        self.config_yml_path = Path("../../config/config.yml")  # Main config at config/config.yml
+        self.config_yml_path = Path(
+            "../../config/config.yml"
+        )  # Main config at config/config.yml
 
         # Check if we're in the right directory
         if not Path("pyproject.toml").exists() or not Path("src").exists():
-            self.console.print("[red][ERROR][/red] Please run this script from the backends/advanced directory")
+            self.console.print(
+                "[red][ERROR][/red] Please run this script from the backends/advanced directory"
+            )
             sys.exit(1)
 
         # Initialize ConfigManager (single source of truth for config.yml)
         self.config_manager = ConfigManager(service_path="backends/advanced")
-        self.console.print(f"[blue][INFO][/blue] Using config.yml at: {self.config_manager.config_yml_path}")
+        self.console.print(
+            f"[blue][INFO][/blue] Using config.yml at: {self.config_manager.config_yml_path}"
+        )
 
         # Verify config.yml exists - fail fast if missing
         if not self.config_manager.config_yml_path.exists():
-            self.console.print(f"[red][ERROR][/red] config.yml not found at {self.config_manager.config_yml_path}")
-            self.console.print("[red][ERROR][/red] Run wizard.py from project root to create config.yml")
+            self.console.print(
+                f"[red][ERROR][/red] config.yml not found at {self.config_manager.config_yml_path}"
+            )
+            self.console.print(
+                "[red][ERROR][/red] Run wizard.py from project root to create config.yml"
+            )
             sys.exit(1)
 
         # Ensure plugins.yml exists (copy from template if missing)
@@ -57,11 +67,7 @@ def __init__(self, args=None):
     def print_header(self, title: str):
         """Print a colorful header"""
         self.console.print()
-        panel = Panel(
-            Text(title, style="cyan bold"),
-            style="cyan",
-            expand=False
-        )
+        panel = Panel(Text(title, style="cyan bold"), style="cyan", expand=False)
         self.console.print(panel)
         self.console.print()
 
@@ -84,19 +90,23 @@ def prompt_password(self, prompt: str) -> str:
         """Prompt for password (delegates to shared utility)"""
         return util_prompt_password(prompt, min_length=8, allow_generated=True)
 
-    def prompt_choice(self, prompt: str, choices: Dict[str, str], default: str = "1") -> str:
+    def prompt_choice(
+        self, prompt: str, choices: Dict[str, str], default: str = "1"
+    ) -> str:
         """Prompt for a choice from options"""
         self.console.print(prompt)
         for key, desc in choices.items():
             self.console.print(f"  {key}) {desc}")
         self.console.print()
-        
+
         while True:
             try:
                 choice = Prompt.ask("Enter choice", default=default)
                 if choice in choices:
                     return choice
-                self.console.print(f"[red]Invalid choice. Please select from {list(choices.keys())}[/red]")
+                self.console.print(
+                    f"[red]Invalid choice. Please select from {list(choices.keys())}[/red]"
+                )
             except EOFError:
                 self.console.print(f"Using default choice: {default}")
                 return default
@@ -108,11 +118,19 @@ def _ensure_plugins_yml_exists(self):
 
         if not plugins_yml.exists():
             if plugins_template.exists():
-                self.console.print("[blue][INFO][/blue] plugins.yml not found, creating from template...")
+                self.console.print(
+                    "[blue][INFO][/blue] plugins.yml not found, creating from template..."
+                )
                 shutil.copy2(plugins_template, plugins_yml)
-                self.console.print(f"[green]✅[/green] Created {plugins_yml} from template")
-                self.console.print("[yellow][NOTE][/yellow] Edit config/plugins.yml to configure plugins")
-                self.console.print("[yellow][NOTE][/yellow] Set HA_TOKEN in .env for Home Assistant integration")
+                self.console.print(
+                    f"[green]✅[/green] Created {plugins_yml} from template"
+                )
+                self.console.print(
+                    "[yellow][NOTE][/yellow] Edit config/plugins.yml to configure plugins"
+                )
+                self.console.print(
+                    "[yellow][NOTE][/yellow] Set HA_TOKEN in .env for Home Assistant integration"
+                )
             else:
                 raise RuntimeError(
                     f"Template file not found: {plugins_template}\n"
@@ -128,7 +146,9 @@ def backup_existing_env(self):
             timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
             backup_path = f".env.backup.{timestamp}"
             shutil.copy2(env_path, backup_path)
-            self.console.print(f"[blue][INFO][/blue] Backed up existing .env file to {backup_path}")
+            self.console.print(
+                f"[blue][INFO][/blue] Backed up existing .env file to {backup_path}"
+            )
 
     def read_existing_env_value(self, key: str) -> str:
         """Read a value from existing .env file (delegates to shared utility)"""
@@ -138,8 +158,14 @@ def mask_api_key(self, key: str, show_chars: int = 5) -> str:
         """Mask API key (delegates to shared utility)"""
         return mask_value(key, show_chars)
 
-    def prompt_with_existing_masked(self, prompt_text: str, env_key: str, placeholders: list,
-                                     is_password: bool = False, default: str = "") -> str:
+    def prompt_with_existing_masked(
+        self,
+        prompt_text: str,
+        env_key: str,
+        placeholders: list,
+        is_password: bool = False,
+        default: str = "",
+    ) -> str:
         """
         Prompt for a value, showing masked existing value from .env if present.
         Delegates to shared utility from setup_utils.
@@ -161,10 +187,9 @@ def prompt_with_existing_masked(self, prompt_text: str, env_key: str, placeholde
             env_key=env_key,
             placeholders=placeholders,
             is_password=is_password,
-            default=default
+            default=default,
         )
 
-
     def setup_authentication(self):
         """Configure authentication settings"""
         self.print_section("Authentication Setup")
@@ -186,13 +211,17 @@ def setup_authentication(self):
             )
             self.config["ADMIN_PASSWORD"] = password
         else:
-            self.config["ADMIN_PASSWORD"] = self.prompt_password("Admin password (min 8 chars)")
+            self.config["ADMIN_PASSWORD"] = self.prompt_password(
+                "Admin password (min 8 chars)"
+            )
 
         # Preserve existing AUTH_SECRET_KEY to avoid invalidating JWTs
         existing_secret = self.read_existing_env_value("AUTH_SECRET_KEY")
         if existing_secret:
             self.config["AUTH_SECRET_KEY"] = existing_secret
-            self.console.print("[blue][INFO][/blue] Reusing existing AUTH_SECRET_KEY (existing JWT tokens remain valid)")
+            self.console.print(
+                "[blue][INFO][/blue] Reusing existing AUTH_SECRET_KEY (existing JWT tokens remain valid)"
+            )
         else:
             self.config["AUTH_SECRET_KEY"] = secrets.token_hex(32)
 
@@ -201,9 +230,14 @@ def setup_authentication(self):
     def setup_transcription(self):
         """Configure transcription provider - updates config.yml and .env"""
         # Check if transcription provider was provided via command line
-        if hasattr(self.args, 'transcription_provider') and self.args.transcription_provider:
+        if (
+            hasattr(self.args, "transcription_provider")
+            and self.args.transcription_provider
+        ):
             provider = self.args.transcription_provider
-            self.console.print(f"[green]✅[/green] Transcription: {provider} (configured via wizard)")
+            self.console.print(
+                f"[green]✅[/green] Transcription: {provider} (configured via wizard)"
+            )
 
             # Map provider to choice
             if provider == "deepgram":
@@ -223,21 +257,27 @@ def setup_transcription(self):
         else:
             self.print_section("Speech-to-Text Configuration")
 
-            self.console.print("[blue][INFO][/blue] Provider selection is configured in config.yml (defaults.stt)")
+            self.console.print(
+                "[blue][INFO][/blue] Provider selection is configured in config.yml (defaults.stt)"
+            )
             self.console.print("[blue][INFO][/blue] API keys are stored in .env")
             self.console.print()
 
             # Interactive prompt
-            is_macos = platform.system() == 'Darwin'
+            is_macos = platform.system() == "Darwin"
 
             if is_macos:
                 parakeet_desc = "Offline (Parakeet ASR - CPU-based, runs locally)"
                 vibevoice_desc = "Offline (VibeVoice - CPU-based, built-in diarization)"
             else:
                 parakeet_desc = "Offline (Parakeet ASR - GPU recommended, runs locally)"
-                vibevoice_desc = "Offline (VibeVoice - GPU recommended, built-in diarization)"
+                vibevoice_desc = (
+                    "Offline (VibeVoice - GPU recommended, built-in diarization)"
+                )
 
-            qwen3_desc = "Offline (Qwen3-ASR - GPU required, 52 languages, streaming + batch)"
+            qwen3_desc = (
+                "Offline (Qwen3-ASR - GPU required, 52 languages, streaming + batch)"
+            )
 
             smallest_desc = "Smallest.ai Pulse (cloud-based, fast, requires API key)"
 
@@ -247,10 +287,12 @@ def setup_transcription(self):
                 "3": vibevoice_desc,
                 "4": qwen3_desc,
                 "5": smallest_desc,
-                "6": "None (skip transcription setup)"
+                "6": "None (skip transcription setup)",
             }
 
-            choice = self.prompt_choice("Choose your transcription provider:", choices, "1")
+            choice = self.prompt_choice(
+                "Choose your transcription provider:", choices, "1"
+            )
 
         if choice == "1":
             self.console.print("[blue][INFO][/blue] Deepgram selected")
@@ -260,9 +302,9 @@ def setup_transcription(self):
             api_key = self.prompt_with_existing_masked(
                 prompt_text="Deepgram API key (leave empty to skip)",
                 env_key="DEEPGRAM_API_KEY",
-                placeholders=['your_deepgram_api_key_here', 'your-deepgram-key-here'],
+                placeholders=["your_deepgram_api_key_here", "your-deepgram-key-here"],
                 is_password=True,
-                default=""
+                default="",
             )
 
             if api_key:
@@ -272,14 +314,20 @@ def setup_transcription(self):
                 # Update config.yml to use Deepgram
                 self.config_manager.update_config_defaults({"stt": "stt-deepgram"})
 
-                self.console.print("[green][SUCCESS][/green] Deepgram configured in config.yml and .env")
+                self.console.print(
+                    "[green][SUCCESS][/green] Deepgram configured in config.yml and .env"
+                )
                 self.console.print("[blue][INFO][/blue] Set defaults.stt: stt-deepgram")
             else:
-                self.console.print("[yellow][WARNING][/yellow] No API key provided - transcription will not work")
+                self.console.print(
+                    "[yellow][WARNING][/yellow] No API key provided - transcription will not work"
+                )
 
         elif choice == "2":
             self.console.print("[blue][INFO][/blue] Offline Parakeet ASR selected")
-            parakeet_url = self.prompt_value("Parakeet ASR URL", "http://host.docker.internal:8767")
+            parakeet_url = self.prompt_value(
+                "Parakeet ASR URL", "http://host.docker.internal:8767"
+            )
 
             # Write URL to .env for ${PARAKEET_ASR_URL} placeholder in config.yml
             self.config["PARAKEET_ASR_URL"] = parakeet_url
@@ -287,13 +335,23 @@ def setup_transcription(self):
             # Update config.yml to use Parakeet
             self.config_manager.update_config_defaults({"stt": "stt-parakeet-batch"})
 
-            self.console.print("[green][SUCCESS][/green] Parakeet configured in config.yml and .env")
-            self.console.print("[blue][INFO][/blue] Set defaults.stt: stt-parakeet-batch")
-            self.console.print("[yellow][WARNING][/yellow] Remember to start Parakeet service: cd ../../extras/asr-services && docker compose up nemo-asr")
+            self.console.print(
+                "[green][SUCCESS][/green] Parakeet configured in config.yml and .env"
+            )
+            self.console.print(
+                "[blue][INFO][/blue] Set defaults.stt: stt-parakeet-batch"
+            )
+            self.console.print(
+                "[yellow][WARNING][/yellow] Remember to start Parakeet service: cd ../../extras/asr-services && docker compose up nemo-asr"
+            )
 
         elif choice == "3":
-            self.console.print("[blue][INFO][/blue] Offline VibeVoice ASR selected (built-in speaker diarization)")
-            vibevoice_url = self.prompt_value("VibeVoice ASR URL", "http://host.docker.internal:8767")
+            self.console.print(
+                "[blue][INFO][/blue] Offline VibeVoice ASR selected (built-in speaker diarization)"
+            )
+            vibevoice_url = self.prompt_value(
+                "VibeVoice ASR URL", "http://host.docker.internal:8767"
+            )
 
             # Write URL to .env for ${VIBEVOICE_ASR_URL} placeholder in config.yml
             self.config["VIBEVOICE_ASR_URL"] = vibevoice_url
@@ -301,14 +359,24 @@ def setup_transcription(self):
             # Update config.yml to use VibeVoice
             self.config_manager.update_config_defaults({"stt": "stt-vibevoice"})
 
-            self.console.print("[green][SUCCESS][/green] VibeVoice configured in config.yml and .env")
+            self.console.print(
+                "[green][SUCCESS][/green] VibeVoice configured in config.yml and .env"
+            )
             self.console.print("[blue][INFO][/blue] Set defaults.stt: stt-vibevoice")
-            self.console.print("[blue][INFO][/blue] VibeVoice provides built-in speaker diarization - pyannote will be skipped")
-            self.console.print("[yellow][WARNING][/yellow] Remember to start VibeVoice service: cd ../../extras/asr-services && docker compose up vibevoice-asr")
+            self.console.print(
+                "[blue][INFO][/blue] VibeVoice provides built-in speaker diarization - pyannote will be skipped"
+            )
+            self.console.print(
+                "[yellow][WARNING][/yellow] Remember to start VibeVoice service: cd ../../extras/asr-services && docker compose up vibevoice-asr"
+            )
 
         elif choice == "4":
-            self.console.print("[blue][INFO][/blue] Qwen3-ASR selected (52 languages, streaming + batch via vLLM)")
-            qwen3_url = self.prompt_value("Qwen3-ASR URL", "http://host.docker.internal:8767")
+            self.console.print(
+                "[blue][INFO][/blue] Qwen3-ASR selected (52 languages, streaming + batch via vLLM)"
+            )
+            qwen3_url = self.prompt_value(
+                "Qwen3-ASR URL", "http://host.docker.internal:8767"
+            )
 
             # Write URL to .env for ${QWEN3_ASR_URL} placeholder in config.yml
             self.config["QWEN3_ASR_URL"] = qwen3_url.replace("http://", "").rstrip("/")
@@ -320,9 +388,13 @@ def setup_transcription(self):
             # Update config.yml to use Qwen3-ASR
             self.config_manager.update_config_defaults({"stt": "stt-qwen3-asr"})
 
-            self.console.print("[green][SUCCESS][/green] Qwen3-ASR configured in config.yml and .env")
+            self.console.print(
+                "[green][SUCCESS][/green] Qwen3-ASR configured in config.yml and .env"
+            )
             self.console.print("[blue][INFO][/blue] Set defaults.stt: stt-qwen3-asr")
-            self.console.print("[yellow][WARNING][/yellow] Remember to start Qwen3-ASR: cd ../../extras/asr-services && docker compose up qwen3-asr-wrapper qwen3-asr-bridge -d")
+            self.console.print(
+                "[yellow][WARNING][/yellow] Remember to start Qwen3-ASR: cd ../../extras/asr-services && docker compose up qwen3-asr-wrapper qwen3-asr-bridge -d"
+            )
 
         elif choice == "5":
             self.console.print("[blue][INFO][/blue] Smallest.ai Pulse selected")
@@ -332,9 +404,9 @@ def setup_transcription(self):
             api_key = self.prompt_with_existing_masked(
                 prompt_text="Smallest.ai API key (leave empty to skip)",
                 env_key="SMALLEST_API_KEY",
-                placeholders=['your_smallest_api_key_here', 'your-smallest-key-here'],
+                placeholders=["your_smallest_api_key_here", "your-smallest-key-here"],
                 is_password=True,
-                default=""
+                default="",
             )
 
             if api_key:
@@ -342,16 +414,21 @@ def setup_transcription(self):
                 self.config["SMALLEST_API_KEY"] = api_key
 
                 # Update config.yml to use Smallest.ai (batch + streaming)
-                self.config_manager.update_config_defaults({
-                    "stt": "stt-smallest",
-                    "stt_stream": "stt-smallest-stream"
-                })
+                self.config_manager.update_config_defaults(
+                    {"stt": "stt-smallest", "stt_stream": "stt-smallest-stream"}
+                )
 
-                self.console.print("[green][SUCCESS][/green] Smallest.ai configured in config.yml and .env")
+                self.console.print(
+                    "[green][SUCCESS][/green] Smallest.ai configured in config.yml and .env"
+                )
                 self.console.print("[blue][INFO][/blue] Set defaults.stt: stt-smallest")
-                self.console.print("[blue][INFO][/blue] Set defaults.stt_stream: stt-smallest-stream")
+                self.console.print(
+                    "[blue][INFO][/blue] Set defaults.stt_stream: stt-smallest-stream"
+                )
             else:
-                self.console.print("[yellow][WARNING][/yellow] No API key provided - transcription will not work")
+                self.console.print(
+                    "[yellow][WARNING][/yellow] No API key provided - transcription will not work"
+                )
 
         elif choice == "6":
             self.console.print("[blue][INFO][/blue] Skipping transcription setup")
@@ -362,11 +439,16 @@ def setup_streaming_provider(self):
         When a different streaming provider is specified, sets defaults.stt_stream
         and enables always_batch_retranscribe (batch provider was set by setup_transcription).
         """
-        if not hasattr(self.args, 'streaming_provider') or not self.args.streaming_provider:
+        if (
+            not hasattr(self.args, "streaming_provider")
+            or not self.args.streaming_provider
+        ):
             return
 
         streaming_provider = self.args.streaming_provider
-        self.console.print(f"\n[green]✅[/green] Streaming provider: {streaming_provider} (configured via wizard)")
+        self.console.print(
+            f"\n[green]✅[/green] Streaming provider: {streaming_provider} (configured via wizard)"
+        )
 
         # Map streaming provider to stt_stream config value
         provider_to_stt_stream = {
@@ -377,7 +459,9 @@ def setup_streaming_provider(self):
 
         stream_stt = provider_to_stt_stream.get(streaming_provider)
         if not stream_stt:
-            self.console.print(f"[yellow][WARNING][/yellow] Unknown streaming provider: {streaming_provider}")
+            self.console.print(
+                f"[yellow][WARNING][/yellow] Unknown streaming provider: {streaming_provider}"
+            )
             return
 
         # Set stt_stream (batch stt was already set by setup_transcription)
@@ -385,11 +469,11 @@ def setup_streaming_provider(self):
 
         # Enable always_batch_retranscribe
         full_config = self.config_manager.get_full_config()
-        if 'backend' not in full_config:
-            full_config['backend'] = {}
-        if 'transcription' not in full_config['backend']:
-            full_config['backend']['transcription'] = {}
-        full_config['backend']['transcription']['always_batch_retranscribe'] = True
+        if "backend" not in full_config:
+            full_config["backend"] = {}
+        if "transcription" not in full_config["backend"]:
+            full_config["backend"]["transcription"] = {}
+        full_config["backend"]["transcription"]["always_batch_retranscribe"] = True
         self.config_manager.save_full_config(full_config)
 
         self.console.print(f"[blue][INFO][/blue] Set defaults.stt_stream: {stream_stt}")
@@ -397,33 +481,47 @@ def setup_streaming_provider(self):
 
         # Prompt for streaming provider env vars if not already set
         if streaming_provider == "deepgram":
-            existing_key = read_env_value('.env', 'DEEPGRAM_API_KEY')
-            if not existing_key or existing_key in ('your_deepgram_api_key_here', 'your-deepgram-key-here'):
+            existing_key = read_env_value(".env", "DEEPGRAM_API_KEY")
+            if not existing_key or existing_key in (
+                "your_deepgram_api_key_here",
+                "your-deepgram-key-here",
+            ):
                 api_key = self.prompt_with_existing_masked(
                     prompt_text="Deepgram API key for streaming",
                     env_key="DEEPGRAM_API_KEY",
-                    placeholders=['your_deepgram_api_key_here', 'your-deepgram-key-here'],
+                    placeholders=[
+                        "your_deepgram_api_key_here",
+                        "your-deepgram-key-here",
+                    ],
                     is_password=True,
-                    default=""
+                    default="",
                 )
                 if api_key:
                     self.config["DEEPGRAM_API_KEY"] = api_key
         elif streaming_provider == "smallest":
-            existing_key = read_env_value('.env', 'SMALLEST_API_KEY')
-            if not existing_key or existing_key in ('your_smallest_api_key_here', 'your-smallest-key-here'):
+            existing_key = read_env_value(".env", "SMALLEST_API_KEY")
+            if not existing_key or existing_key in (
+                "your_smallest_api_key_here",
+                "your-smallest-key-here",
+            ):
                 api_key = self.prompt_with_existing_masked(
                     prompt_text="Smallest.ai API key for streaming",
                     env_key="SMALLEST_API_KEY",
-                    placeholders=['your_smallest_api_key_here', 'your-smallest-key-here'],
+                    placeholders=[
+                        "your_smallest_api_key_here",
+                        "your-smallest-key-here",
+                    ],
                     is_password=True,
-                    default=""
+                    default="",
                 )
                 if api_key:
                     self.config["SMALLEST_API_KEY"] = api_key
         elif streaming_provider == "qwen3-asr":
-            existing_url = read_env_value('.env', 'QWEN3_ASR_STREAM_URL')
+            existing_url = read_env_value(".env", "QWEN3_ASR_STREAM_URL")
             if not existing_url:
-                qwen3_url = self.prompt_value("Qwen3-ASR streaming URL", "http://host.docker.internal:8769")
+                qwen3_url = self.prompt_value(
+                    "Qwen3-ASR streaming URL", "http://host.docker.internal:8769"
+                )
                 stream_host = qwen3_url.replace("http://", "").rstrip("/")
                 self.config["QWEN3_ASR_STREAM_URL"] = stream_host
 
@@ -431,51 +529,177 @@ def setup_llm(self):
         """Configure LLM provider - updates config.yml and .env"""
         self.print_section("LLM Provider Configuration")
 
-        self.console.print("[blue][INFO][/blue] LLM configuration will be saved to config.yml")
+        self.console.print(
+            "[blue][INFO][/blue] LLM configuration will be saved to config.yml"
+        )
         self.console.print()
 
         choices = {
             "1": "OpenAI (GPT-4, GPT-3.5 - requires API key)",
             "2": "Ollama (local models - runs locally)",
-            "3": "Skip (no memory extraction)"
+            "3": "OpenAI-Compatible (custom endpoint - Groq, Together AI, LM Studio, etc.)",
+            "4": "Skip (no memory extraction)",
         }
 
         choice = self.prompt_choice("Which LLM provider will you use?", choices, "1")
 
         if choice == "1":
             self.console.print("[blue][INFO][/blue] OpenAI selected")
-            self.console.print("Get your API key from: https://platform.openai.com/api-keys")
+            self.console.print(
+                "Get your API key from: https://platform.openai.com/api-keys"
+            )
 
             # Use the new masked prompt function
             api_key = self.prompt_with_existing_masked(
                 prompt_text="OpenAI API key (leave empty to skip)",
                 env_key="OPENAI_API_KEY",
-                placeholders=['your_openai_api_key_here', 'your-openai-key-here'],
+                placeholders=["your_openai_api_key_here", "your-openai-key-here"],
                 is_password=True,
-                default=""
+                default="",
             )
 
             if api_key:
                 self.config["OPENAI_API_KEY"] = api_key
                 # Update config.yml to use OpenAI models
-                self.config_manager.update_config_defaults({"llm": "openai-llm", "embedding": "openai-embed"})
-                self.console.print("[green][SUCCESS][/green] OpenAI configured in config.yml")
+                self.config_manager.update_config_defaults(
+                    {"llm": "openai-llm", "embedding": "openai-embed"}
+                )
+                self.console.print(
+                    "[green][SUCCESS][/green] OpenAI configured in config.yml"
+                )
                 self.console.print("[blue][INFO][/blue] Set defaults.llm: openai-llm")
-                self.console.print("[blue][INFO][/blue] Set defaults.embedding: openai-embed")
+                self.console.print(
+                    "[blue][INFO][/blue] Set defaults.embedding: openai-embed"
+                )
             else:
-                self.console.print("[yellow][WARNING][/yellow] No API key provided - memory extraction will not work")
+                self.console.print(
+                    "[yellow][WARNING][/yellow] No API key provided - memory extraction will not work"
+                )
 
         elif choice == "2":
             self.console.print("[blue][INFO][/blue] Ollama selected")
             # Update config.yml to use Ollama models
-            self.config_manager.update_config_defaults({"llm": "local-llm", "embedding": "local-embed"})
-            self.console.print("[green][SUCCESS][/green] Ollama configured in config.yml")
+            self.config_manager.update_config_defaults(
+                {"llm": "local-llm", "embedding": "local-embed"}
+            )
+            self.console.print(
+                "[green][SUCCESS][/green] Ollama configured in config.yml"
+            )
             self.console.print("[blue][INFO][/blue] Set defaults.llm: local-llm")
-            self.console.print("[blue][INFO][/blue] Set defaults.embedding: local-embed")
-            self.console.print("[yellow][WARNING][/yellow] Make sure Ollama is running and models are pulled")
+            self.console.print(
+                "[blue][INFO][/blue] Set defaults.embedding: local-embed"
+            )
+            self.console.print(
+                "[yellow][WARNING][/yellow] Make sure Ollama is running and models are pulled"
+            )
 
         elif choice == "3":
-            self.console.print("[blue][INFO][/blue] Skipping LLM setup - memory extraction disabled")
+            self.console.print(
+                "[blue][INFO][/blue] OpenAI-Compatible custom endpoint selected"
+            )
+            self.console.print(
+                "This works with any provider that exposes an OpenAI-compatible API"
+            )
+            self.console.print("(e.g., Groq, Together AI, LM Studio, vLLM, etc.)")
+            self.console.print()
+
+            # Prompt for base URL (required)
+            base_url = self.prompt_value(
+                "API Base URL (e.g., https://api.groq.com/openai/v1)", ""
+            )
+            if not base_url:
+                self.console.print(
+                    "[yellow][WARNING][/yellow] No base URL provided - skipping custom LLM setup"
+                )
+            else:
+                # Prompt for API key
+                api_key = self.prompt_with_existing_masked(
+                    prompt_text="API Key (leave empty if not required)",
+                    env_key="CUSTOM_LLM_API_KEY",
+                    placeholders=["your_custom_llm_api_key_here"],
+                    is_password=True,
+                    default="",
+                )
+                if api_key:
+                    self.config["CUSTOM_LLM_API_KEY"] = api_key
+
+                # Prompt for model name (required)
+                model_name = self.prompt_value(
+                    "LLM Model name (e.g., llama-3.1-70b-versatile)", ""
+                )
+                if not model_name:
+                    self.console.print(
+                        "[yellow][WARNING][/yellow] No model name provided - skipping custom LLM setup"
+                    )
+                else:
+                    # Create LLM model entry
+                    llm_model = {
+                        "name": "custom-llm",
+                        "description": "Custom OpenAI-compatible LLM",
+                        "model_type": "llm",
+                        "model_provider": "openai",
+                        "api_family": "openai",
+                        "model_name": model_name,
+                        "model_url": base_url,
+                        "api_key": "${oc.env:CUSTOM_LLM_API_KEY,''}",
+                        "model_params": {"temperature": 0.2, "max_tokens": 2000},
+                        "model_output": "json",
+                    }
+                    self.config_manager.add_or_update_model(llm_model)
+
+                    # Prompt for optional embedding model
+                    embedding_model_name = self.prompt_value(
+                        "Embedding model name (leave empty to use Ollama local-embed)",
+                        "",
+                    )
+
+                    if embedding_model_name:
+                        embed_model = {
+                            "name": "custom-embed",
+                            "description": "Custom OpenAI-compatible embeddings",
+                            "model_type": "embedding",
+                            "model_provider": "openai",
+                            "api_family": "openai",
+                            "model_name": embedding_model_name,
+                            "model_url": base_url,
+                            "api_key": "${oc.env:CUSTOM_LLM_API_KEY,''}",
+                            "embedding_dimensions": 1536,
+                            "model_output": "vector",
+                        }
+                        self.config_manager.add_or_update_model(embed_model)
+                        self.config_manager.update_config_defaults(
+                            {"llm": "custom-llm", "embedding": "custom-embed"}
+                        )
+                        self.console.print(
+                            "[green][SUCCESS][/green] Custom LLM and embedding configured in config.yml"
+                        )
+                        self.console.print(
+                            "[blue][INFO][/blue] Set defaults.llm: custom-llm"
+                        )
+                        self.console.print(
+                            "[blue][INFO][/blue] Set defaults.embedding: custom-embed"
+                        )
+                    else:
+                        self.config_manager.update_config_defaults(
+                            {"llm": "custom-llm", "embedding": "local-embed"}
+                        )
+                        self.console.print(
+                            "[green][SUCCESS][/green] Custom LLM configured in config.yml"
+                        )
+                        self.console.print(
+                            "[blue][INFO][/blue] Set defaults.llm: custom-llm"
+                        )
+                        self.console.print(
+                            "[blue][INFO][/blue] Set defaults.embedding: local-embed (Ollama)"
+                        )
+                        self.console.print(
+                            "[yellow][WARNING][/yellow] Make sure Ollama is running for embeddings"
+                        )
+
+        elif choice == "4":
+            self.console.print(
+                "[blue][INFO][/blue] Skipping LLM setup - memory extraction disabled"
+            )
             # Disable memory extraction in config.yml
             self.config_manager.update_memory_config({"extraction": {"enabled": False}})
 
@@ -491,80 +715,115 @@ def setup_memory(self):
         choice = self.prompt_choice("Choose your memory storage backend:", choices, "1")
 
         if choice == "1":
-            self.console.print("[blue][INFO][/blue] Chronicle Native memory provider selected")
+            self.console.print(
+                "[blue][INFO][/blue] Chronicle Native memory provider selected"
+            )
 
             qdrant_url = self.prompt_value("Qdrant URL", "qdrant")
             self.config["QDRANT_BASE_URL"] = qdrant_url
 
             # Update config.yml (also updates .env automatically)
             self.config_manager.update_memory_config({"provider": "chronicle"})
-            self.console.print("[green][SUCCESS][/green] Chronicle memory provider configured in config.yml and .env")
+            self.console.print(
+                "[green][SUCCESS][/green] Chronicle memory provider configured in config.yml and .env"
+            )
 
         elif choice == "2":
             self.console.print("[blue][INFO][/blue] OpenMemory MCP selected")
 
-            mcp_url = self.prompt_value("OpenMemory MCP server URL", "http://host.docker.internal:8765")
+            mcp_url = self.prompt_value(
+                "OpenMemory MCP server URL", "http://host.docker.internal:8765"
+            )
             client_name = self.prompt_value("OpenMemory client name", "chronicle")
             user_id = self.prompt_value("OpenMemory user ID", "openmemory")
             timeout = self.prompt_value("OpenMemory timeout (seconds)", "30")
 
             # Update config.yml with OpenMemory MCP settings (also updates .env automatically)
-            self.config_manager.update_memory_config({
-                "provider": "openmemory_mcp",
-                "openmemory_mcp": {
-                    "server_url": mcp_url,
-                    "client_name": client_name,
-                    "user_id": user_id,
-                    "timeout": int(timeout)
+            self.config_manager.update_memory_config(
+                {
+                    "provider": "openmemory_mcp",
+                    "openmemory_mcp": {
+                        "server_url": mcp_url,
+                        "client_name": client_name,
+                        "user_id": user_id,
+                        "timeout": int(timeout),
+                    },
                 }
-            })
-            self.console.print("[green][SUCCESS][/green] OpenMemory MCP configured in config.yml and .env")
-            self.console.print("[yellow][WARNING][/yellow] Remember to start OpenMemory: cd ../../extras/openmemory-mcp && docker compose up -d")
+            )
+            self.console.print(
+                "[green][SUCCESS][/green] OpenMemory MCP configured in config.yml and .env"
+            )
+            self.console.print(
+                "[yellow][WARNING][/yellow] Remember to start OpenMemory: cd ../../extras/openmemory-mcp && docker compose up -d"
+            )
 
     def setup_optional_services(self):
         """Configure optional services"""
         # Check if speaker service URL provided via args
-        has_speaker_arg = hasattr(self.args, 'speaker_service_url') and self.args.speaker_service_url
-        has_asr_arg = hasattr(self.args, 'parakeet_asr_url') and self.args.parakeet_asr_url
+        has_speaker_arg = (
+            hasattr(self.args, "speaker_service_url") and self.args.speaker_service_url
+        )
+        has_asr_arg = (
+            hasattr(self.args, "parakeet_asr_url") and self.args.parakeet_asr_url
+        )
 
         if has_speaker_arg:
             self.config["SPEAKER_SERVICE_URL"] = self.args.speaker_service_url
-            self.console.print(f"[green]✅[/green] Speaker Recognition: {self.args.speaker_service_url} (configured via wizard)")
+            self.console.print(
+                f"[green]✅[/green] Speaker Recognition: {self.args.speaker_service_url} (configured via wizard)"
+            )
 
         if has_asr_arg:
             self.config["PARAKEET_ASR_URL"] = self.args.parakeet_asr_url
-            self.console.print(f"[green]✅[/green] Parakeet ASR: {self.args.parakeet_asr_url} (configured via wizard)")
+            self.console.print(
+                f"[green]✅[/green] Parakeet ASR: {self.args.parakeet_asr_url} (configured via wizard)"
+            )
 
         # Only show interactive section if not all configured via args
         if not has_speaker_arg:
             try:
-                enable_speaker = Confirm.ask("Enable Speaker Recognition?", default=False)
+                enable_speaker = Confirm.ask(
+                    "Enable Speaker Recognition?", default=False
+                )
             except EOFError:
                 self.console.print("Using default: No")
                 enable_speaker = False
-                
+
             if enable_speaker:
-                speaker_url = self.prompt_value("Speaker Recognition service URL", "http://host.docker.internal:8001")
+                speaker_url = self.prompt_value(
+                    "Speaker Recognition service URL",
+                    "http://host.docker.internal:8001",
+                )
                 self.config["SPEAKER_SERVICE_URL"] = speaker_url
-                self.console.print("[green][SUCCESS][/green] Speaker Recognition configured")
-                self.console.print("[blue][INFO][/blue] Start with: cd ../../extras/speaker-recognition && docker compose up -d")
-        
+                self.console.print(
+                    "[green][SUCCESS][/green] Speaker Recognition configured"
+                )
+                self.console.print(
+                    "[blue][INFO][/blue] Start with: cd ../../extras/speaker-recognition && docker compose up -d"
+                )
+
         # Check if Tailscale auth key provided via args
-        if hasattr(self.args, 'ts_authkey') and self.args.ts_authkey:
+        if hasattr(self.args, "ts_authkey") and self.args.ts_authkey:
             self.config["TS_AUTHKEY"] = self.args.ts_authkey
-            self.console.print(f"[green][SUCCESS][/green] Tailscale auth key configured (Docker integration enabled)")
+            self.console.print(
+                f"[green][SUCCESS][/green] Tailscale auth key configured (Docker integration enabled)"
+            )
 
     def setup_neo4j(self):
         """Configure Neo4j credentials (always required - used by Knowledge Graph)"""
-        neo4j_password = getattr(self.args, 'neo4j_password', None)
+        neo4j_password = getattr(self.args, "neo4j_password", None)
 
         if neo4j_password:
-            self.console.print(f"[green]✅[/green] Neo4j: password configured via wizard")
+            self.console.print(
+                f"[green]✅[/green] Neo4j: password configured via wizard"
+            )
         else:
             # Interactive prompt (standalone init.py run)
             self.console.print()
             self.console.print("[bold cyan]Neo4j Configuration[/bold cyan]")
-            self.console.print("Neo4j is used for Knowledge Graph (entity/relationship extraction)")
+            self.console.print(
+                "Neo4j is used for Knowledge Graph (entity/relationship extraction)"
+            )
             self.console.print()
             neo4j_password = self.prompt_password("Neo4j password (min 8 chars)")
 
@@ -575,49 +834,54 @@ def setup_neo4j(self):
 
     def setup_obsidian(self):
         """Configure Obsidian integration (optional feature flag only - Neo4j credentials handled by setup_neo4j)"""
-        if hasattr(self.args, 'enable_obsidian') and self.args.enable_obsidian:
+        if hasattr(self.args, "enable_obsidian") and self.args.enable_obsidian:
             enable_obsidian = True
-            self.console.print(f"[green]✅[/green] Obsidian: enabled (configured via wizard)")
+            self.console.print(
+                f"[green]✅[/green] Obsidian: enabled (configured via wizard)"
+            )
         else:
             # Interactive prompt (fallback)
             self.console.print()
             self.console.print("[bold cyan]Obsidian Integration (Optional)[/bold cyan]")
-            self.console.print("Enable graph-based knowledge management for Obsidian vault notes")
+            self.console.print(
+                "Enable graph-based knowledge management for Obsidian vault notes"
+            )
             self.console.print()
 
             try:
-                enable_obsidian = Confirm.ask("Enable Obsidian integration?", default=False)
+                enable_obsidian = Confirm.ask(
+                    "Enable Obsidian integration?", default=False
+                )
             except EOFError:
                 self.console.print("Using default: No")
                 enable_obsidian = False
 
         if enable_obsidian:
-            self.config_manager.update_memory_config({
-                "obsidian": {
-                    "enabled": True,
-                    "neo4j_host": "neo4j",
-                    "timeout": 30
-                }
-            })
+            self.config_manager.update_memory_config(
+                {"obsidian": {"enabled": True, "neo4j_host": "neo4j", "timeout": 30}}
+            )
             self.console.print("[green][SUCCESS][/green] Obsidian integration enabled")
         else:
-            self.config_manager.update_memory_config({
-                "obsidian": {
-                    "enabled": False,
-                    "neo4j_host": "neo4j",
-                    "timeout": 30
-                }
-            })
+            self.config_manager.update_memory_config(
+                {"obsidian": {"enabled": False, "neo4j_host": "neo4j", "timeout": 30}}
+            )
             self.console.print("[blue][INFO][/blue] Obsidian integration disabled")
 
     def setup_knowledge_graph(self):
         """Configure Knowledge Graph (Neo4j-based entity/relationship extraction - enabled by default)"""
-        if hasattr(self.args, 'enable_knowledge_graph') and self.args.enable_knowledge_graph:
+        if (
+            hasattr(self.args, "enable_knowledge_graph")
+            and self.args.enable_knowledge_graph
+        ):
             enable_kg = True
         else:
             self.console.print()
-            self.console.print("[bold cyan]Knowledge Graph (Entity Extraction)[/bold cyan]")
-            self.console.print("Extract people, places, organizations, events, and tasks from conversations")
+            self.console.print(
+                "[bold cyan]Knowledge Graph (Entity Extraction)[/bold cyan]"
+            )
+            self.console.print(
+                "Extract people, places, organizations, events, and tasks from conversations"
+            )
             self.console.print()
 
             try:
@@ -627,56 +891,77 @@ def setup_knowledge_graph(self):
                 enable_kg = True
 
         if enable_kg:
-            self.config_manager.update_memory_config({
-                "knowledge_graph": {
-                    "enabled": True,
-                    "neo4j_host": "neo4j",
-                    "timeout": 30
+            self.config_manager.update_memory_config(
+                {
+                    "knowledge_graph": {
+                        "enabled": True,
+                        "neo4j_host": "neo4j",
+                        "timeout": 30,
+                    }
                 }
-            })
+            )
             self.console.print("[green][SUCCESS][/green] Knowledge Graph enabled")
-            self.console.print("[blue][INFO][/blue] Entities and relationships will be extracted from conversations")
+            self.console.print(
+                "[blue][INFO][/blue] Entities and relationships will be extracted from conversations"
+            )
         else:
-            self.config_manager.update_memory_config({
-                "knowledge_graph": {
-                    "enabled": False,
-                    "neo4j_host": "neo4j",
-                    "timeout": 30
+            self.config_manager.update_memory_config(
+                {
+                    "knowledge_graph": {
+                        "enabled": False,
+                        "neo4j_host": "neo4j",
+                        "timeout": 30,
+                    }
                 }
-            })
+            )
             self.console.print("[blue][INFO][/blue] Knowledge Graph disabled")
 
     def setup_langfuse(self):
         """Configure LangFuse observability and prompt management"""
         self.console.print()
-        self.console.print("[bold cyan]LangFuse Observability & Prompt Management[/bold cyan]")
+        self.console.print(
+            "[bold cyan]LangFuse Observability & Prompt Management[/bold cyan]"
+        )
 
         # Check if keys were passed from wizard (langfuse init already ran)
-        langfuse_pub = getattr(self.args, 'langfuse_public_key', None)
-        langfuse_sec = getattr(self.args, 'langfuse_secret_key', None)
+        langfuse_pub = getattr(self.args, "langfuse_public_key", None)
+        langfuse_sec = getattr(self.args, "langfuse_secret_key", None)
 
         if langfuse_pub and langfuse_sec:
             # Auto-configure from wizard — no prompts needed
-            langfuse_host = getattr(self.args, 'langfuse_host', None) or "http://langfuse-web:3000"
+            langfuse_host = (
+                getattr(self.args, "langfuse_host", None) or "http://langfuse-web:3000"
+            )
             self.config["LANGFUSE_HOST"] = langfuse_host
             self.config["LANGFUSE_PUBLIC_KEY"] = langfuse_pub
             self.config["LANGFUSE_SECRET_KEY"] = langfuse_sec
             self.config["LANGFUSE_BASE_URL"] = langfuse_host
 
             # Derive browser-accessible URL for deep-links
-            public_url = getattr(self.args, 'langfuse_public_url', None) or "http://localhost:3002"
+            public_url = (
+                getattr(self.args, "langfuse_public_url", None)
+                or "http://localhost:3002"
+            )
             self._save_langfuse_public_url(public_url)
 
             source = "external" if "langfuse-web" not in langfuse_host else "local"
-            self.console.print(f"[green][SUCCESS][/green] LangFuse auto-configured ({source})")
+            self.console.print(
+                f"[green][SUCCESS][/green] LangFuse auto-configured ({source})"
+            )
             self.console.print(f"[blue][INFO][/blue] Host: {langfuse_host}")
             self.console.print(f"[blue][INFO][/blue] Public URL: {public_url}")
-            self.console.print(f"[blue][INFO][/blue] Public key: {self.mask_api_key(langfuse_pub)}")
+            self.console.print(
+                f"[blue][INFO][/blue] Public key: {self.mask_api_key(langfuse_pub)}"
+            )
             return
 
         # Manual configuration (standalone init.py run)
-        self.console.print("Enable LLM tracing, observability, and prompt management with LangFuse")
-        self.console.print("Self-host: cd ../../extras/langfuse && docker compose up -d")
+        self.console.print(
+            "Enable LLM tracing, observability, and prompt management with LangFuse"
+        )
+        self.console.print(
+            "Self-host: cd ../../extras/langfuse && docker compose up -d"
+        )
         self.console.print()
 
         try:
@@ -748,52 +1033,68 @@ def setup_network(self):
     def setup_https(self):
         """Configure HTTPS settings for microphone access"""
         # Check if HTTPS configuration provided via command line
-        if hasattr(self.args, 'enable_https') and self.args.enable_https:
+        if hasattr(self.args, "enable_https") and self.args.enable_https:
             enable_https = True
-            server_ip = getattr(self.args, 'server_ip', 'localhost')
-            self.console.print(f"[green]✅[/green] HTTPS: {server_ip} (configured via wizard)")
+            server_ip = getattr(self.args, "server_ip", "localhost")
+            self.console.print(
+                f"[green]✅[/green] HTTPS: {server_ip} (configured via wizard)"
+            )
         else:
             # Interactive configuration
             self.print_section("HTTPS Configuration (Optional)")
 
             try:
-                enable_https = Confirm.ask("Enable HTTPS for microphone access?", default=False)
+                enable_https = Confirm.ask(
+                    "Enable HTTPS for microphone access?", default=False
+                )
             except EOFError:
                 self.console.print("Using default: No")
                 enable_https = False
 
             if enable_https:
-                self.console.print("[blue][INFO][/blue] HTTPS enables microphone access in browsers")
+                self.console.print(
+                    "[blue][INFO][/blue] HTTPS enables microphone access in browsers"
+                )
 
                 # Try to auto-detect Tailscale address
                 ts_dns, ts_ip = detect_tailscale_info()
 
                 if ts_dns:
-                    self.console.print(f"[green][AUTO-DETECTED][/green] Tailscale DNS: {ts_dns}")
+                    self.console.print(
+                        f"[green][AUTO-DETECTED][/green] Tailscale DNS: {ts_dns}"
+                    )
                     if ts_ip:
-                        self.console.print(f"[green][AUTO-DETECTED][/green] Tailscale IP:  {ts_ip}")
+                        self.console.print(
+                            f"[green][AUTO-DETECTED][/green] Tailscale IP:  {ts_ip}"
+                        )
                     default_address = ts_dns
                 elif ts_ip:
-                    self.console.print(f"[green][AUTO-DETECTED][/green] Tailscale IP: {ts_ip}")
+                    self.console.print(
+                        f"[green][AUTO-DETECTED][/green] Tailscale IP: {ts_ip}"
+                    )
                     default_address = ts_ip
                 else:
                     self.console.print("[blue][INFO][/blue] Tailscale not detected")
-                    self.console.print("[blue][INFO][/blue] To find your Tailscale address: tailscale status --json | jq -r '.Self.DNSName'")
+                    self.console.print(
+                        "[blue][INFO][/blue] To find your Tailscale address: tailscale status --json | jq -r '.Self.DNSName'"
+                    )
                     default_address = "localhost"
 
-                self.console.print("[blue][INFO][/blue] For local-only access, use 'localhost'")
+                self.console.print(
+                    "[blue][INFO][/blue] For local-only access, use 'localhost'"
+                )
 
                 # Use the new masked prompt function (not masked for IP, but shows existing)
                 server_ip = self.prompt_with_existing_masked(
                     prompt_text="Server IP/Domain for SSL certificate",
                     env_key="SERVER_IP",
-                    placeholders=['localhost', 'your-server-ip-here'],
+                    placeholders=["localhost", "your-server-ip-here"],
                     is_password=False,
-                    default=default_address
+                    default=default_address,
                 )
-        
+
         if enable_https:
-            
+
             # Generate SSL certificates
             self.console.print("[blue][INFO][/blue] Generating SSL certificates...")
             # Use path relative to this script's directory
@@ -802,17 +1103,32 @@ def setup_https(self):
             if ssl_script.exists():
                 try:
                     # Run from the backend directory so paths work correctly
-                    subprocess.run([str(ssl_script), server_ip], check=True, cwd=str(script_dir), timeout=180)
-                    self.console.print("[green][SUCCESS][/green] SSL certificates generated")
+                    subprocess.run(
+                        [str(ssl_script), server_ip],
+                        check=True,
+                        cwd=str(script_dir),
+                        timeout=180,
+                    )
+                    self.console.print(
+                        "[green][SUCCESS][/green] SSL certificates generated"
+                    )
                 except subprocess.TimeoutExpired:
-                    self.console.print("[yellow][WARNING][/yellow] SSL certificate generation timed out after 3 minutes")
+                    self.console.print(
+                        "[yellow][WARNING][/yellow] SSL certificate generation timed out after 3 minutes"
+                    )
                 except subprocess.CalledProcessError:
-                    self.console.print("[yellow][WARNING][/yellow] SSL certificate generation failed")
+                    self.console.print(
+                        "[yellow][WARNING][/yellow] SSL certificate generation failed"
+                    )
             else:
-                self.console.print(f"[yellow][WARNING][/warning] SSL script not found at {ssl_script}")
+                self.console.print(
+                    f"[yellow][WARNING][/warning] SSL script not found at {ssl_script}"
+                )
 
             # Generate Caddyfile from template
-            self.console.print("[blue][INFO][/blue] Creating Caddyfile configuration...")
+            self.console.print(
+                "[blue][INFO][/blue] Creating Caddyfile configuration..."
+            )
             caddyfile_template = script_dir / "Caddyfile.template"
             caddyfile_path = script_dir / "Caddyfile"
 
@@ -820,32 +1136,50 @@ def setup_https(self):
                 try:
                     # Check if Caddyfile exists as a directory (common issue)
                     if caddyfile_path.exists() and caddyfile_path.is_dir():
-                        self.console.print("[red]❌ ERROR: 'Caddyfile' exists as a directory![/red]")
-                        self.console.print("[yellow]   Please remove it manually:[/yellow]")
-                        self.console.print(f"[yellow]   rm -rf {caddyfile_path}[/yellow]")
-                        self.console.print("[red]   HTTPS will NOT work without a proper Caddyfile![/red]")
+                        self.console.print(
+                            "[red]❌ ERROR: 'Caddyfile' exists as a directory![/red]"
+                        )
+                        self.console.print(
+                            "[yellow]   Please remove it manually:[/yellow]"
+                        )
+                        self.console.print(
+                            f"[yellow]   rm -rf {caddyfile_path}[/yellow]"
+                        )
+                        self.console.print(
+                            "[red]   HTTPS will NOT work without a proper Caddyfile![/red]"
+                        )
                         self.config["HTTPS_ENABLED"] = "false"
                     else:
-                        with open(caddyfile_template, 'r') as f:
+                        with open(caddyfile_template, "r") as f:
                             caddyfile_content = f.read()
 
                         # Replace TAILSCALE_IP with server_ip
-                        caddyfile_content = caddyfile_content.replace('TAILSCALE_IP', server_ip)
+                        caddyfile_content = caddyfile_content.replace(
+                            "TAILSCALE_IP", server_ip
+                        )
 
-                        with open(caddyfile_path, 'w') as f:
+                        with open(caddyfile_path, "w") as f:
                             f.write(caddyfile_content)
 
-                        self.console.print(f"[green][SUCCESS][/green] Caddyfile created for: {server_ip}")
+                        self.console.print(
+                            f"[green][SUCCESS][/green] Caddyfile created for: {server_ip}"
+                        )
                         self.config["HTTPS_ENABLED"] = "true"
                         self.config["SERVER_IP"] = server_ip
 
                 except Exception as e:
-                    self.console.print(f"[red]❌ ERROR: Caddyfile generation failed: {e}[/red]")
-                    self.console.print("[red]   HTTPS will NOT work without a proper Caddyfile![/red]")
+                    self.console.print(
+                        f"[red]❌ ERROR: Caddyfile generation failed: {e}[/red]"
+                    )
+                    self.console.print(
+                        "[red]   HTTPS will NOT work without a proper Caddyfile![/red]"
+                    )
                     self.config["HTTPS_ENABLED"] = "false"
             else:
                 self.console.print("[red]❌ ERROR: Caddyfile.template not found[/red]")
-                self.console.print("[red]   HTTPS will NOT work without a proper Caddyfile![/red]")
+                self.console.print(
+                    "[red]   HTTPS will NOT work without a proper Caddyfile![/red]"
+                )
                 self.config["HTTPS_ENABLED"] = "false"
         else:
             self.config["HTTPS_ENABLED"] = "false"
@@ -863,7 +1197,9 @@ def generate_env_file(self):
             shutil.copy2(env_template, env_path)
             self.console.print("[blue][INFO][/blue] Copied .env.template to .env")
         else:
-            self.console.print("[yellow][WARNING][/yellow] .env.template not found, creating new .env")
+            self.console.print(
+                "[yellow][WARNING][/yellow] .env.template not found, creating new .env"
+            )
             env_path.touch(mode=0o600)
 
         # Update configured values using set_key
@@ -875,24 +1211,35 @@ def generate_env_file(self):
         # Ensure secure permissions
         os.chmod(env_path, 0o600)
 
-        self.console.print("[green][SUCCESS][/green] .env file configured successfully with secure permissions")
+        self.console.print(
+            "[green][SUCCESS][/green] .env file configured successfully with secure permissions"
+        )
 
         # Note: config.yml is automatically saved by ConfigManager when updates are made
-        self.console.print("[blue][INFO][/blue] Configuration saved to config.yml and .env (via ConfigManager)")
+        self.console.print(
+            "[blue][INFO][/blue] Configuration saved to config.yml and .env (via ConfigManager)"
+        )
 
     def copy_config_templates(self):
         """Copy other configuration files"""
 
-        if not Path("diarization_config.json").exists() and Path("diarization_config.json.template").exists():
+        if (
+            not Path("diarization_config.json").exists()
+            and Path("diarization_config.json.template").exists()
+        ):
             shutil.copy2("diarization_config.json.template", "diarization_config.json")
-            self.console.print("[green][SUCCESS][/green] diarization_config.json created")
+            self.console.print(
+                "[green][SUCCESS][/green] diarization_config.json created"
+            )
 
     def show_summary(self):
         """Show configuration summary"""
         self.print_section("Configuration Summary")
         self.console.print()
 
-        self.console.print(f"✅ Admin Account: {self.config.get('ADMIN_EMAIL', 'Not configured')}")
+        self.console.print(
+            f"✅ Admin Account: {self.config.get('ADMIN_EMAIL', 'Not configured')}"
+        )
 
         # Get current config from ConfigManager (single source of truth)
         config_yml = self.config_manager.get_full_config()
@@ -901,10 +1248,16 @@ def show_summary(self):
         stt_default = config_yml.get("defaults", {}).get("stt", "not set")
         stt_model = next(
             (m for m in config_yml.get("models", []) if m.get("name") == stt_default),
-            None
+            None,
+        )
+        stt_provider = (
+            stt_model.get("model_provider", "unknown")
+            if stt_model
+            else "not configured"
+        )
+        self.console.print(
+            f"✅ Transcription: {stt_provider} ({stt_default}) - config.yml"
         )
-        stt_provider = stt_model.get("model_provider", "unknown") if stt_model else "not configured"
-        self.console.print(f"✅ Transcription: {stt_provider} ({stt_default}) - config.yml")
 
         # Show LLM config from config.yml
         llm_default = config_yml.get("defaults", {}).get("llm", "not set")
@@ -929,13 +1282,13 @@ def show_summary(self):
             self.console.print(f"✅ Knowledge Graph: Enabled ({neo4j_host})")
 
         # Auto-determine URLs based on HTTPS configuration
-        if self.config.get('HTTPS_ENABLED') == 'true':
-            server_ip = self.config.get('SERVER_IP', 'localhost')
+        if self.config.get("HTTPS_ENABLED") == "true":
+            server_ip = self.config.get("SERVER_IP", "localhost")
             self.console.print(f"✅ Backend URL: https://{server_ip}/")
             self.console.print(f"✅ Dashboard URL: https://{server_ip}/")
         else:
-            backend_port = self.config.get('BACKEND_PUBLIC_PORT', '8000')
-            webui_port = self.config.get('WEBUI_PORT', '5173')
+            backend_port = self.config.get("BACKEND_PUBLIC_PORT", "8000")
+            webui_port = self.config.get("WEBUI_PORT", "5173")
             self.console.print(f"✅ Backend URL: http://localhost:{backend_port}")
             self.console.print(f"✅ Dashboard URL: http://localhost:{webui_port}")
 
@@ -950,40 +1303,52 @@ def show_next_steps(self):
         self.console.print("1. Start the main services:")
         self.console.print("   [cyan]docker compose up --build -d[/cyan]")
         self.console.print()
-        
+
         # Auto-determine URLs for next steps
-        if self.config.get('HTTPS_ENABLED') == 'true':
-            server_ip = self.config.get('SERVER_IP', 'localhost')
+        if self.config.get("HTTPS_ENABLED") == "true":
+            server_ip = self.config.get("SERVER_IP", "localhost")
             self.console.print("2. Access the dashboard:")
             self.console.print(f"   [cyan]https://{server_ip}/[/cyan]")
             self.console.print()
             self.console.print("3. Check service health:")
             self.console.print(f"   [cyan]curl -k https://{server_ip}/health[/cyan]")
         else:
-            webui_port = self.config.get('WEBUI_PORT', '5173')
-            backend_port = self.config.get('BACKEND_PUBLIC_PORT', '8000')
+            webui_port = self.config.get("WEBUI_PORT", "5173")
+            backend_port = self.config.get("BACKEND_PUBLIC_PORT", "8000")
             self.console.print("2. Access the dashboard:")
             self.console.print(f"   [cyan]http://localhost:{webui_port}[/cyan]")
             self.console.print()
             self.console.print("3. Check service health:")
-            self.console.print(f"   [cyan]curl http://localhost:{backend_port}/health[/cyan]")
+            self.console.print(
+                f"   [cyan]curl http://localhost:{backend_port}/health[/cyan]"
+            )
 
         if self.config.get("MEMORY_PROVIDER") == "openmemory_mcp":
             self.console.print()
             self.console.print("4. Start OpenMemory MCP:")
-            self.console.print("   [cyan]cd ../../extras/openmemory-mcp && docker compose up -d[/cyan]")
+            self.console.print(
+                "   [cyan]cd ../../extras/openmemory-mcp && docker compose up -d[/cyan]"
+            )
 
         if self.config.get("TRANSCRIPTION_PROVIDER") == "offline":
             self.console.print()
             self.console.print("5. Start Parakeet ASR:")
-            self.console.print("   [cyan]cd ../../extras/asr-services && docker compose up parakeet -d[/cyan]")
+            self.console.print(
+                "   [cyan]cd ../../extras/asr-services && docker compose up parakeet -d[/cyan]"
+            )
 
     def run(self):
         """Run the complete setup process"""
         self.print_header("🚀 Chronicle Interactive Setup")
-        self.console.print("This wizard will help you configure Chronicle with all necessary services.")
-        self.console.print("[dim]Safe to run again — it backs up your config and preserves previous values.[/dim]")
-        self.console.print("[dim]When unsure, just press Enter — the defaults will work.[/dim]")
+        self.console.print(
+            "This wizard will help you configure Chronicle with all necessary services."
+        )
+        self.console.print(
+            "[dim]Safe to run again — it backs up your config and preserves previous values.[/dim]"
+        )
+        self.console.print(
+            "[dim]When unsure, just press Enter — the defaults will work.[/dim]"
+        )
         self.console.print()
 
         try:
@@ -1018,7 +1383,9 @@ def run(self):
             self.console.print()
             self.console.print("📝 [bold]Configuration files updated:[/bold]")
             self.console.print(f"  • .env - API keys and environment variables")
-            self.console.print(f"  • ../../config/config.yml - Model and memory provider configuration")
+            self.console.print(
+                f"  • ../../config/config.yml - Model and memory provider configuration"
+            )
             self.console.print()
             self.console.print("For detailed documentation, see:")
             self.console.print("  • Docs/quickstart.md")
@@ -1037,39 +1404,68 @@ def run(self):
 def main():
     """Main entry point"""
     parser = argparse.ArgumentParser(description="Chronicle Advanced Backend Setup")
-    parser.add_argument("--speaker-service-url",
-                       help="Speaker Recognition service URL (default: prompt user)")
-    parser.add_argument("--parakeet-asr-url",
-                       help="Parakeet ASR service URL (default: prompt user)")
-    parser.add_argument("--transcription-provider",
-                       choices=["deepgram", "parakeet", "vibevoice", "qwen3-asr", "smallest", "none"],
-                       help="Transcription provider (default: prompt user)")
-    parser.add_argument("--enable-https", action="store_true",
-                       help="Enable HTTPS configuration (default: prompt user)")
-    parser.add_argument("--server-ip",
-                       help="Server IP/domain for SSL certificate (default: prompt user)")
-    parser.add_argument("--enable-obsidian", action="store_true",
-                       help="Enable Obsidian/Neo4j integration (default: prompt user)")
-    parser.add_argument("--enable-knowledge-graph", action="store_true",
-                       help="Enable Knowledge Graph entity extraction (default: prompt user)")
-    parser.add_argument("--neo4j-password",
-                       help="Neo4j password (default: prompt user)")
-    parser.add_argument("--ts-authkey",
-                       help="Tailscale auth key for Docker integration (default: prompt user)")
-    parser.add_argument("--langfuse-public-key",
-                       help="LangFuse project public key (from langfuse init or external)")
-    parser.add_argument("--langfuse-secret-key",
-                       help="LangFuse project secret key (from langfuse init or external)")
-    parser.add_argument("--langfuse-host",
-                       help="LangFuse host URL (default: http://langfuse-web:3000 for local)")
-    parser.add_argument("--langfuse-public-url",
-                       help="LangFuse browser-accessible URL for deep-links (default: http://localhost:3002)")
-    parser.add_argument("--streaming-provider",
-                       choices=["deepgram", "smallest", "qwen3-asr"],
-                       help="Streaming provider when different from batch (enables batch re-transcription)")
+    parser.add_argument(
+        "--speaker-service-url",
+        help="Speaker Recognition service URL (default: prompt user)",
+    )
+    parser.add_argument(
+        "--parakeet-asr-url", help="Parakeet ASR service URL (default: prompt user)"
+    )
+    parser.add_argument(
+        "--transcription-provider",
+        choices=["deepgram", "parakeet", "vibevoice", "qwen3-asr", "smallest", "none"],
+        help="Transcription provider (default: prompt user)",
+    )
+    parser.add_argument(
+        "--enable-https",
+        action="store_true",
+        help="Enable HTTPS configuration (default: prompt user)",
+    )
+    parser.add_argument(
+        "--server-ip",
+        help="Server IP/domain for SSL certificate (default: prompt user)",
+    )
+    parser.add_argument(
+        "--enable-obsidian",
+        action="store_true",
+        help="Enable Obsidian/Neo4j integration (default: prompt user)",
+    )
+    parser.add_argument(
+        "--enable-knowledge-graph",
+        action="store_true",
+        help="Enable Knowledge Graph entity extraction (default: prompt user)",
+    )
+    parser.add_argument(
+        "--neo4j-password", help="Neo4j password (default: prompt user)"
+    )
+    parser.add_argument(
+        "--ts-authkey",
+        help="Tailscale auth key for Docker integration (default: prompt user)",
+    )
+    parser.add_argument(
+        "--langfuse-public-key",
+        help="LangFuse project public key (from langfuse init or external)",
+    )
+    parser.add_argument(
+        "--langfuse-secret-key",
+        help="LangFuse project secret key (from langfuse init or external)",
+    )
+    parser.add_argument(
+        "--langfuse-host",
+        help="LangFuse host URL (default: http://langfuse-web:3000 for local)",
+    )
+    parser.add_argument(
+        "--langfuse-public-url",
+        help="LangFuse browser-accessible URL for deep-links (default: http://localhost:3002)",
+    )
+    parser.add_argument(
+        "--streaming-provider",
+        choices=["deepgram", "smallest", "qwen3-asr"],
+        help="Streaming provider when different from batch (enables batch re-transcription)",
+    )
 
     args = parser.parse_args()
-    
+
     setup = ChronicleSetup(args)
     setup.run()
 
diff --git a/backends/advanced/src/advanced_omi_backend/app_factory.py b/backends/advanced/src/advanced_omi_backend/app_factory.py
index 6083de97..c1d56fed 100644
--- a/backends/advanced/src/advanced_omi_backend/app_factory.py
+++ b/backends/advanced/src/advanced_omi_backend/app_factory.py
@@ -7,6 +7,7 @@
 
 import asyncio
 import logging
+import time
 from contextlib import asynccontextmanager
 from pathlib import Path
 
@@ -122,10 +123,14 @@ async def initialize_openmemory_user() -> None:
 async def lifespan(app: FastAPI):
     """Manage application lifespan events."""
     config = get_app_config()
+    startup_start = time.monotonic()
 
     # Startup
     application_logger.info("Starting application...")
 
+    # ── Phase 1 (sequential — dependencies) ──────────────────────────
+    phase_start = time.monotonic()
+
     # Initialize Beanie for all document models
     try:
         from beanie import init_beanie
@@ -151,200 +156,258 @@ async def lifespan(app: FastAPI):
         application_logger.error(f"Failed to initialize Beanie: {e}")
         raise
 
-    # Create admin user if needed
+    # Create admin user if needed (requires Beanie)
     try:
         await create_admin_user_if_needed()
     except Exception as e:
         application_logger.error(f"Failed to create admin user: {e}")
-        # Don't raise here as this is not critical for startup
 
-    # Initialize Redis connection for RQ
-    try:
-        from advanced_omi_backend.controllers.queue_controller import redis_conn
+    application_logger.info(
+        f"Phase 1 (Beanie + admin) completed in {time.monotonic() - phase_start:.2f}s"
+    )
 
-        redis_conn.ping()
-        application_logger.info("Redis connection established for RQ")
-        application_logger.info(
-            "RQ workers can be started with: rq worker transcription memory default"
-        )
-    except Exception as e:
-        application_logger.error(f"Failed to connect to Redis for RQ: {e}")
-        application_logger.warning(
-            "RQ queue system will not be available - check Redis connection"
-        )
+    # ── Phase 2 (parallel — all independent) ─────────────────────────
+    phase_start = time.monotonic()
 
-    # Initialize BackgroundTaskManager (must happen before any code path uses it)
-    try:
-        task_manager = init_task_manager()
-        await task_manager.start()
-        application_logger.info("BackgroundTaskManager initialized and started")
-    except Exception as e:
-        application_logger.error(f"Failed to initialize task manager: {e}")
-        raise  # Task manager is essential
+    async def _init_redis_rq():
+        try:
+            from advanced_omi_backend.controllers.queue_controller import redis_conn
 
-    # Initialize ClientManager eagerly (prevents lazy race on first WebSocket connect)
-    get_client_manager()
-    application_logger.info("ClientManager initialized")
+            redis_conn.ping()
+            application_logger.info("Redis connection established for RQ")
+        except Exception as e:
+            application_logger.error(f"Failed to connect to Redis for RQ: {e}")
+            application_logger.warning(
+                "RQ queue system will not be available - check Redis connection"
+            )
 
-    # Initialize OTEL/Galileo if configured (before LLM client so instrumentor patches OpenAI first)
-    try:
-        from advanced_omi_backend.observability.otel_setup import init_otel
+    async def _init_task_manager():
+        try:
+            tm = init_task_manager()
+            await tm.start()
+            application_logger.info("BackgroundTaskManager initialized and started")
+        except Exception as e:
+            application_logger.error(f"Failed to initialize task manager: {e}")
+            raise  # Task manager is essential
 
-        init_otel()
-    except Exception as e:
-        application_logger.warning(f"OTEL initialization skipped: {e}")
+    async def _init_client_manager():
+        get_client_manager()
+        application_logger.info("ClientManager initialized")
 
-    # Initialize prompt registry with defaults; seed into LangFuse in background
-    try:
-        from advanced_omi_backend.prompt_defaults import register_all_defaults
-        from advanced_omi_backend.prompt_registry import get_prompt_registry
+    async def _init_otel():
+        try:
+            from advanced_omi_backend.observability.otel_setup import init_otel
 
-        prompt_registry = get_prompt_registry()
-        register_all_defaults(prompt_registry)
-        application_logger.info(
-            f"Prompt registry initialized with {len(prompt_registry._defaults)} defaults"
-        )
+            init_otel()
+        except Exception as e:
+            application_logger.warning(f"OTEL initialization skipped: {e}")
 
-        # Seed prompts in background — Langfuse may not be ready at startup
-        async def _deferred_seed():
-            await asyncio.sleep(10)
-            await prompt_registry.seed_prompts()
+    async def _init_prompt_registry():
+        try:
+            from advanced_omi_backend.prompt_defaults import register_all_defaults
+            from advanced_omi_backend.prompt_registry import get_prompt_registry
 
-        asyncio.create_task(_deferred_seed())
-    except Exception as e:
-        application_logger.warning(f"Prompt registry initialization failed: {e}")
+            registry = get_prompt_registry()
+            register_all_defaults(registry)
+            application_logger.info(
+                f"Prompt registry initialized with {len(registry._defaults)} defaults"
+            )
+        except Exception as e:
+            application_logger.warning(f"Prompt registry initialization failed: {e}")
+
+    await asyncio.gather(
+        _init_redis_rq(),
+        _init_task_manager(),
+        _init_client_manager(),
+        _init_otel(),
+        _init_prompt_registry(),
+    )
 
-    # Initialize LLM client eagerly (catch config errors at startup, not on first request)
-    try:
-        from advanced_omi_backend.llm_client import get_llm_client
+    application_logger.info(
+        f"Phase 2 (Redis/TaskMgr/ClientMgr/OTEL/Prompts) completed in {time.monotonic() - phase_start:.2f}s"
+    )
 
-        get_llm_client()
-        application_logger.info("LLM client initialized from config.yml")
-    except Exception as e:
-        application_logger.warning(f"LLM client initialization deferred: {e}")
+    # ── Phase 3 (parallel — OTEL done, safe for LLM patching) ────────
+    phase_start = time.monotonic()
 
-    # Initialize audio stream service for Redis Streams
-    try:
-        audio_service = get_audio_stream_service()
-        await audio_service.connect()
-        application_logger.info("Audio stream service connected to Redis Streams")
-        application_logger.info(
-            "Audio stream workers can be started with: python -m advanced_omi_backend.workers.audio_stream_worker"
-        )
-    except Exception as e:
-        application_logger.error(f"Failed to connect audio stream service: {e}")
-        application_logger.warning(
-            "Redis Streams audio processing will not be available"
-        )
+    async def _init_llm_client():
+        try:
+            from advanced_omi_backend.llm_client import get_llm_client
 
-    # Initialize Redis client for audio streaming producer (used by WebSocket handlers)
-    try:
-        app.state.redis_audio_stream = await redis.from_url(
-            config.redis_url, encoding="utf-8", decode_responses=False
-        )
-        from advanced_omi_backend.services.audio_stream import AudioStreamProducer
+            get_llm_client()
+            application_logger.info("LLM client initialized from config.yml")
+        except Exception as e:
+            application_logger.warning(f"LLM client initialization deferred: {e}")
 
-        app.state.audio_stream_producer = AudioStreamProducer(
-            app.state.redis_audio_stream
-        )
-        application_logger.info(
-            "✅ Redis client for audio streaming producer initialized"
-        )
+    async def _init_audio_stream_service():
+        try:
+            audio_service = get_audio_stream_service()
+            await audio_service.connect()
+            application_logger.info("Audio stream service connected to Redis Streams")
+        except Exception as e:
+            application_logger.error(f"Failed to connect audio stream service: {e}")
+            application_logger.warning(
+                "Redis Streams audio processing will not be available"
+            )
 
-        # Initialize ClientManager Redis for cross-container client→user mapping
-        from advanced_omi_backend.client_manager import (
-            initialize_redis_for_client_manager,
-        )
+    async def _init_redis_audio_producer():
+        try:
+            app.state.redis_audio_stream = await redis.from_url(
+                config.redis_url, encoding="utf-8", decode_responses=False
+            )
+            from advanced_omi_backend.services.audio_stream import AudioStreamProducer
 
-        initialize_redis_for_client_manager(config.redis_url)
+            app.state.audio_stream_producer = AudioStreamProducer(
+                app.state.redis_audio_stream
+            )
+            application_logger.info(
+                "Redis client for audio streaming producer initialized"
+            )
 
-    except Exception as e:
-        application_logger.error(
-            f"Failed to initialize Redis client for audio streaming: {e}", exc_info=True
+            from advanced_omi_backend.client_manager import (
+                initialize_redis_for_client_manager,
+            )
+
+            initialize_redis_for_client_manager(config.redis_url)
+        except Exception as e:
+            application_logger.error(
+                f"Failed to initialize Redis client for audio streaming: {e}",
+                exc_info=True,
+            )
+            application_logger.warning("Audio streaming producer will not be available")
+
+    async def _deferred_prompt_seed():
+        """Seed prompts into Langfuse with retry backoff."""
+        try:
+            from advanced_omi_backend.prompt_registry import get_prompt_registry
+
+            registry = get_prompt_registry()
+        except Exception:
+            return
+
+        backoff_delays = [0, 2, 4, 8, 16, 32]
+        for delay in backoff_delays:
+            if delay:
+                await asyncio.sleep(delay)
+            try:
+                await registry.seed_prompts()
+                application_logger.info("Prompt seeding to Langfuse completed")
+                return
+            except Exception as e:
+                application_logger.debug(
+                    f"Prompt seeding attempt failed (next retry in {delay}s): {e}"
+                )
+        application_logger.warning(
+            "Prompt seeding to Langfuse failed after all retries"
         )
-        application_logger.warning("Audio streaming producer will not be available")
 
-    # Skip memory service pre-initialization to avoid blocking FastAPI startup
-    # Memory service will be lazily initialized when first used
+    await asyncio.gather(
+        _init_llm_client(),
+        _init_audio_stream_service(),
+        _init_redis_audio_producer(),
+    )
+
+    # Launch deferred prompt seeding as a fire-and-forget background task
+    asyncio.create_task(_deferred_prompt_seed())
+
     application_logger.info(
-        "Memory service will be initialized on first use (lazy loading)"
+        f"Phase 3 (LLM/AudioStream/RedisProducer) completed in {time.monotonic() - phase_start:.2f}s"
     )
 
-    # Register OpenMemory user if using openmemory_mcp provider
-    await initialize_openmemory_user()
+    # ── Phase 4 (parallel — all independent) ─────────────────────────
+    phase_start = time.monotonic()
 
-    # Start cron scheduler (requires Redis to be available)
-    try:
-        from advanced_omi_backend.cron_scheduler import get_scheduler, register_cron_job
-        from advanced_omi_backend.workers.finetuning_jobs import (
-            run_asr_finetuning_job,
-            run_asr_jargon_extraction_job,
-            run_speaker_finetuning_job,
-        )
-        from advanced_omi_backend.workers.prompt_optimization_jobs import (
-            run_prompt_optimization_job,
-        )
+    application_logger.info(
+        "Memory service will be initialized on first use (lazy loading)"
+    )
 
-        register_cron_job("speaker_finetuning", run_speaker_finetuning_job)
-        register_cron_job("asr_finetuning", run_asr_finetuning_job)
-        register_cron_job("asr_jargon_extraction", run_asr_jargon_extraction_job)
-        register_cron_job("prompt_optimization", run_prompt_optimization_job)
+    async def _init_openmemory():
+        await initialize_openmemory_user()
 
-        scheduler = get_scheduler()
-        await scheduler.start()
-        application_logger.info("Cron scheduler started")
-    except Exception as e:
-        application_logger.warning(f"Cron scheduler failed to start: {e}")
+    async def _init_cron_scheduler():
+        try:
+            from advanced_omi_backend.cron_scheduler import (
+                get_scheduler,
+                register_cron_job,
+            )
+            from advanced_omi_backend.workers.annotation_jobs import (
+                surface_error_suggestions,
+            )
+            from advanced_omi_backend.workers.finetuning_jobs import (
+                run_asr_finetuning_job,
+                run_asr_jargon_extraction_job,
+                run_speaker_finetuning_job,
+            )
+            from advanced_omi_backend.workers.prompt_optimization_jobs import (
+                run_prompt_optimization_job,
+            )
 
-    # SystemTracker is used for monitoring and debugging
-    application_logger.info("Using SystemTracker for monitoring and debugging")
+            register_cron_job("speaker_finetuning", run_speaker_finetuning_job)
+            register_cron_job("asr_finetuning", run_asr_finetuning_job)
+            register_cron_job("asr_jargon_extraction", run_asr_jargon_extraction_job)
+            register_cron_job("prompt_optimization", run_prompt_optimization_job)
+            register_cron_job("annotation_suggestions", surface_error_suggestions)
 
-    # Initialize plugins using plugin service
-    try:
-        from advanced_omi_backend.services.plugin_service import (
-            init_plugin_router,
-            set_plugin_router,
-        )
+            scheduler = get_scheduler()
+            await scheduler.start()
+            application_logger.info("Cron scheduler started")
+        except Exception as e:
+            application_logger.warning(f"Cron scheduler failed to start: {e}")
 
-        plugin_router = init_plugin_router()
-
-        if plugin_router:
-            # Initialize async resources for each enabled plugin
-            for plugin_id, plugin in plugin_router.plugins.items():
-                if plugin.enabled:
-                    try:
-                        await plugin.initialize()
-                        plugin_router.mark_plugin_initialized(plugin_id)
-                        application_logger.info(f"✅ Plugin '{plugin_id}' initialized")
-                    except Exception as e:
-                        plugin_router.mark_plugin_failed(plugin_id, str(e))
-                        application_logger.error(
-                            f"Failed to initialize plugin '{plugin_id}': {e}",
-                            exc_info=True,
-                        )
-
-            health = plugin_router.get_health_summary()
-            application_logger.info(
-                f"Plugins initialized: {health['initialized']}/{health['total']} active"
-                + (f", {health['failed']} failed" if health["failed"] else "")
+    async def _init_plugins():
+        try:
+            from advanced_omi_backend.services.plugin_service import (
+                init_plugin_router,
+                set_plugin_router,
             )
 
-            # Store in app state for API access
-            app.state.plugin_router = plugin_router
-            # Register with plugin service for worker access
-            set_plugin_router(plugin_router)
-        else:
-            application_logger.info("No plugins configured")
+            plugin_router = init_plugin_router()
+
+            if plugin_router:
+                for plugin_id, plugin in plugin_router.plugins.items():
+                    if plugin.enabled:
+                        try:
+                            await plugin.initialize()
+                            plugin_router.mark_plugin_initialized(plugin_id)
+                            application_logger.info(f"Plugin '{plugin_id}' initialized")
+                        except Exception as e:
+                            plugin_router.mark_plugin_failed(plugin_id, str(e))
+                            application_logger.error(
+                                f"Failed to initialize plugin '{plugin_id}': {e}",
+                                exc_info=True,
+                            )
+
+                health = plugin_router.get_health_summary()
+                application_logger.info(
+                    f"Plugins initialized: {health['initialized']}/{health['total']} active"
+                    + (f", {health['failed']} failed" if health["failed"] else "")
+                )
+
+                app.state.plugin_router = plugin_router
+                set_plugin_router(plugin_router)
+            else:
+                application_logger.info("No plugins configured")
+                app.state.plugin_router = None
+
+        except Exception as e:
+            application_logger.error(
+                f"Failed to initialize plugin system: {e}", exc_info=True
+            )
             app.state.plugin_router = None
 
-    except Exception as e:
-        application_logger.error(
-            f"Failed to initialize plugin system: {e}", exc_info=True
-        )
-        app.state.plugin_router = None
+    await asyncio.gather(
+        _init_openmemory(),
+        _init_cron_scheduler(),
+        _init_plugins(),
+    )
+
+    application_logger.info(
+        f"Phase 4 (OpenMemory/Cron/Plugins) completed in {time.monotonic() - phase_start:.2f}s"
+    )
 
+    total_startup = time.monotonic() - startup_start
     application_logger.info(
-        "Application ready - using application-level processing architecture."
+        f"Application ready in {total_startup:.2f}s - using application-level processing architecture."
     )
 
     logger.info("App ready")
diff --git a/backends/advanced/src/advanced_omi_backend/controllers/conversation_controller.py b/backends/advanced/src/advanced_omi_backend/controllers/conversation_controller.py
index 1bf41dfc..c2f1ad5d 100644
--- a/backends/advanced/src/advanced_omi_backend/controllers/conversation_controller.py
+++ b/backends/advanced/src/advanced_omi_backend/controllers/conversation_controller.py
@@ -46,6 +46,25 @@
 audio_logger = logging.getLogger("audio_processing")
 
 
+async def _get_conversation_or_error(conversation_id: str, user: User):
+    """Fetch a conversation and validate user access.
+
+    Returns (conversation, None) on success, or (None, error_response) on failure.
+    """
+    conversation = await Conversation.find_one(
+        Conversation.conversation_id == conversation_id
+    )
+    if not conversation:
+        return None, JSONResponse(
+            status_code=404, content={"error": "Conversation not found"}
+        )
+    if not user.is_superuser and conversation.user_id != str(user.user_id):
+        return None, JSONResponse(
+            status_code=403, content={"error": "Access forbidden"}
+        )
+    return conversation, None
+
+
 async def close_current_conversation(client_id: str, user: User):
     """Close the current conversation for a specific client.
 
@@ -112,18 +131,9 @@ async def close_current_conversation(client_id: str, user: User):
 async def get_conversation(conversation_id: str, user: User):
     """Get a single conversation with full transcript details."""
     try:
-        # Find the conversation using Beanie
-        conversation = await Conversation.find_one(
-            Conversation.conversation_id == conversation_id
-        )
-        if not conversation:
-            return JSONResponse(
-                status_code=404, content={"error": "Conversation not found"}
-            )
-
-        # Check ownership for non-admin users
-        if not user.is_superuser and conversation.user_id != str(user.user_id):
-            return JSONResponse(status_code=403, content={"error": "Access forbidden"})
+        conversation, error = await _get_conversation_or_error(conversation_id, user)
+        if error:
+            return error
 
         # Build response with explicit curated fields
         response = {
@@ -184,16 +194,9 @@ async def get_conversation(conversation_id: str, user: User):
 async def get_conversation_memories(conversation_id: str, user: User, limit: int = 100):
     """Get memories extracted from a specific conversation."""
     try:
-        conversation = await Conversation.find_one(
-            Conversation.conversation_id == conversation_id
-        )
-        if not conversation:
-            return JSONResponse(
-                status_code=404, content={"error": "Conversation not found"}
-            )
-
-        if not user.is_superuser and conversation.user_id != str(user.user_id):
-            return JSONResponse(status_code=403, content={"error": "Access forbidden"})
+        conversation, error = await _get_conversation_or_error(conversation_id, user)
+        if error:
+            return error
 
         memory_service = get_memory_service()
         memories = await memory_service.get_memories_by_source(
@@ -671,29 +674,13 @@ async def delete_conversation(
             f"Attempting to {'permanently ' if permanent else ''}delete conversation: {masked_id}"
         )
 
-        # Find the conversation using Beanie
-        conversation = await Conversation.find_one(
-            Conversation.conversation_id == conversation_id
-        )
-
-        if not conversation:
-            return JSONResponse(
-                status_code=404,
-                content={"error": f"Conversation '{conversation_id}' not found"},
-            )
-
-        # Check ownership for non-admin users
-        if not user.is_superuser and conversation.user_id != str(user.user_id):
-            logger.warning(
-                f"User {user.user_id} attempted to delete conversation {conversation_id} without permission"
-            )
-            return JSONResponse(
-                status_code=403,
-                content={
-                    "error": "Access forbidden. You can only delete your own conversations.",
-                    "details": f"Conversation '{conversation_id}' does not belong to your account.",
-                },
-            )
+        conversation, error = await _get_conversation_or_error(conversation_id, user)
+        if error:
+            if error.status_code == 403:
+                logger.warning(
+                    f"User {user.user_id} attempted to delete conversation {conversation_id} without permission"
+                )
+            return error
 
         # Hard delete (admin only, permanent flag)
         if permanent and user.is_superuser:
@@ -719,18 +706,9 @@ async def restore_conversation(conversation_id: str, user: User) -> JSONResponse
         user: Requesting user
     """
     try:
-        conversation = await Conversation.find_one(
-            Conversation.conversation_id == conversation_id
-        )
-
-        if not conversation:
-            return JSONResponse(
-                status_code=404, content={"error": "Conversation not found"}
-            )
-
-        # Permission check
-        if not user.is_superuser and conversation.user_id != str(user.user_id):
-            return JSONResponse(status_code=403, content={"error": "Access denied"})
+        conversation, error = await _get_conversation_or_error(conversation_id, user)
+        if error:
+            return error
 
         if not conversation.deleted:
             return JSONResponse(
@@ -933,16 +911,9 @@ def _enqueue_speaker_reprocessing_chain(
 async def toggle_star(conversation_id: str, user: User):
     """Toggle the starred/favorite status of a conversation."""
     try:
-        conversation = await Conversation.find_one(
-            Conversation.conversation_id == conversation_id
-        )
-        if not conversation:
-            return JSONResponse(
-                status_code=404, content={"error": "Conversation not found"}
-            )
-
-        if not user.is_superuser and conversation.user_id != str(user.user_id):
-            return JSONResponse(status_code=403, content={"error": "Access forbidden"})
+        conversation, error = await _get_conversation_or_error(conversation_id, user)
+        if error:
+            return error
 
         # Toggle
         conversation.starred = not conversation.starred
@@ -993,17 +964,9 @@ async def toggle_star(conversation_id: str, user: User):
 async def reprocess_orphan(conversation_id: str, user: User):
     """Reprocess an orphan audio session - restore if deleted and enqueue full processing chain."""
     try:
-        conversation = await Conversation.find_one(
-            Conversation.conversation_id == conversation_id
-        )
-        if not conversation:
-            return JSONResponse(
-                status_code=404, content={"error": "Conversation not found"}
-            )
-
-        # Check ownership
-        if not user.is_superuser and conversation.user_id != str(user.user_id):
-            return JSONResponse(status_code=403, content={"error": "Access forbidden"})
+        conversation, error = await _get_conversation_or_error(conversation_id, user)
+        if error:
+            return error
 
         # Verify audio chunks exist (check both deleted and non-deleted)
         total_chunks = await AudioChunkDocument.find(
@@ -1068,23 +1031,11 @@ async def reprocess_orphan(conversation_id: str, user: User):
 async def reprocess_transcript(conversation_id: str, user: User):
     """Reprocess transcript for a conversation. Users can only reprocess their own conversations."""
     try:
-        # Find the conversation using Beanie
-        conversation_model = await Conversation.find_one(
-            Conversation.conversation_id == conversation_id
+        conversation_model, error = await _get_conversation_or_error(
+            conversation_id, user
         )
-        if not conversation_model:
-            return JSONResponse(
-                status_code=404, content={"error": "Conversation not found"}
-            )
-
-        # Check ownership for non-admin users
-        if not user.is_superuser and conversation_model.user_id != str(user.user_id):
-            return JSONResponse(
-                status_code=403,
-                content={
-                    "error": "Access forbidden. You can only reprocess your own conversations."
-                },
-            )
+        if error:
+            return error
 
         # Get audio_uuid from conversation
         # Validate audio chunks exist in MongoDB
@@ -1137,24 +1088,11 @@ async def reprocess_memory(
 ):
     """Reprocess memory extraction for a specific transcript version. Users can only reprocess their own conversations."""
     try:
-        # Find the conversation using Beanie
-        conversation_model = await Conversation.find_one(
-            Conversation.conversation_id == conversation_id
+        conversation_model, error = await _get_conversation_or_error(
+            conversation_id, user
         )
-        if not conversation_model:
-            return JSONResponse(
-                status_code=404, content={"error": "Conversation not found"}
-            )
-
-        # Check ownership for non-admin users
-        if not user.is_superuser and conversation_model.user_id != str(user.user_id):
-            return JSONResponse(
-                status_code=403,
-                content={
-                    "error": "Access forbidden. You can only reprocess your own conversations."
-                },
-            )
-
+        if error:
+            return error
         # Resolve transcript version ID (handle "active" special case)
         error, transcript_version_id, transcript_version = _resolve_transcript_version(
             conversation_model, transcript_version_id
@@ -1205,23 +1143,11 @@ async def reprocess_speakers(
     """
     try:
         # 1. Find conversation and validate ownership
-        conversation_model = await Conversation.find_one(
-            Conversation.conversation_id == conversation_id
+        conversation_model, error = await _get_conversation_or_error(
+            conversation_id, user
         )
-        if not conversation_model:
-            return JSONResponse(
-                status_code=404, content={"error": "Conversation not found"}
-            )
-
-        # Check ownership for non-admin users
-        if not user.is_superuser and conversation_model.user_id != str(user.user_id):
-            return JSONResponse(
-                status_code=403,
-                content={
-                    "error": "Access forbidden. You can only reprocess your own conversations."
-                },
-            )
-
+        if error:
+            return error
         # 2-3. Resolve source transcript version ID and find version object
         error, source_version_id, source_version = _resolve_transcript_version(
             conversation_model, transcript_version_id
@@ -1349,23 +1275,11 @@ async def activate_transcript_version(
 ):
     """Activate a specific transcript version. Users can only modify their own conversations."""
     try:
-        # Find the conversation using Beanie
-        conversation_model = await Conversation.find_one(
-            Conversation.conversation_id == conversation_id
+        conversation_model, error = await _get_conversation_or_error(
+            conversation_id, user
         )
-        if not conversation_model:
-            return JSONResponse(
-                status_code=404, content={"error": "Conversation not found"}
-            )
-
-        # Check ownership for non-admin users
-        if not user.is_superuser and conversation_model.user_id != str(user.user_id):
-            return JSONResponse(
-                status_code=403,
-                content={
-                    "error": "Access forbidden. You can only modify your own conversations."
-                },
-            )
+        if error:
+            return error
 
         # Activate the transcript version using Beanie model method
         success = conversation_model.set_active_transcript_version(version_id)
@@ -1401,23 +1315,11 @@ async def activate_transcript_version(
 async def activate_memory_version(conversation_id: str, version_id: str, user: User):
     """Activate a specific memory version. Users can only modify their own conversations."""
     try:
-        # Find the conversation using Beanie
-        conversation_model = await Conversation.find_one(
-            Conversation.conversation_id == conversation_id
+        conversation_model, error = await _get_conversation_or_error(
+            conversation_id, user
         )
-        if not conversation_model:
-            return JSONResponse(
-                status_code=404, content={"error": "Conversation not found"}
-            )
-
-        # Check ownership for non-admin users
-        if not user.is_superuser and conversation_model.user_id != str(user.user_id):
-            return JSONResponse(
-                status_code=403,
-                content={
-                    "error": "Access forbidden. You can only modify your own conversations."
-                },
-            )
+        if error:
+            return error
 
         # Activate the memory version using Beanie model method
         success = conversation_model.set_active_memory_version(version_id)
@@ -1449,23 +1351,11 @@ async def activate_memory_version(conversation_id: str, version_id: str, user: U
 async def get_conversation_version_history(conversation_id: str, user: User):
     """Get version history for a conversation. Users can only access their own conversations."""
     try:
-        # Find the conversation using Beanie to check ownership
-        conversation_model = await Conversation.find_one(
-            Conversation.conversation_id == conversation_id
+        conversation_model, error = await _get_conversation_or_error(
+            conversation_id, user
         )
-        if not conversation_model:
-            return JSONResponse(
-                status_code=404, content={"error": "Conversation not found"}
-            )
-
-        # Check ownership for non-admin users
-        if not user.is_superuser and conversation_model.user_id != str(user.user_id):
-            return JSONResponse(
-                status_code=403,
-                content={
-                    "error": "Access forbidden. You can only access your own conversations."
-                },
-            )
+        if error:
+            return error
 
         # Get version history from model
         # Convert datetime objects to ISO strings for JSON serialization
diff --git a/backends/advanced/src/advanced_omi_backend/controllers/memory_controller.py b/backends/advanced/src/advanced_omi_backend/controllers/memory_controller.py
index fe4fca88..40c1ac51 100644
--- a/backends/advanced/src/advanced_omi_backend/controllers/memory_controller.py
+++ b/backends/advanced/src/advanced_omi_backend/controllers/memory_controller.py
@@ -17,15 +17,19 @@
 audio_logger = logging.getLogger("audio_processing")
 
 
+def _resolve_target_user(user: User, user_id: Optional[str] = None) -> str:
+    """Return the effective user ID: admins may override with user_id param."""
+    if user.is_superuser and user_id:
+        return user_id
+    return user.user_id
+
+
 async def get_memories(user: User, limit: int, user_id: Optional[str] = None):
     """Get memories. Users see only their own memories, admins can see all or filter by user."""
     try:
         memory_service = get_memory_service()
 
-        # Determine which user's memories to fetch
-        target_user_id = user.user_id
-        if user.is_superuser and user_id:
-            target_user_id = user_id
+        target_user_id = _resolve_target_user(user, user_id)
 
         # Execute memory retrieval directly (now async)
         memories = await memory_service.get_all_memories(target_user_id, limit)
@@ -40,7 +44,7 @@ async def get_memories(user: User, limit: int, user_id: Optional[str] = None):
             "memories": memories_dicts,
             "count": len(memories),
             "total_count": total_count,
-            "user_id": target_user_id
+            "user_id": target_user_id,
         }
 
     except Exception as e:
@@ -50,15 +54,14 @@ async def get_memories(user: User, limit: int, user_id: Optional[str] = None):
         )
 
 
-async def get_memories_with_transcripts(user: User, limit: int, user_id: Optional[str] = None):
+async def get_memories_with_transcripts(
+    user: User, limit: int, user_id: Optional[str] = None
+):
     """Get memories with their source transcripts. Users see only their own memories, admins can see all or filter by user."""
     try:
         memory_service = get_memory_service()
 
-        # Determine which user's memories to fetch
-        target_user_id = user.user_id
-        if user.is_superuser and user_id:
-            target_user_id = user_id
+        target_user_id = _resolve_target_user(user, user_id)
 
         # Execute memory retrieval directly (now async)
         memories_with_transcripts = await memory_service.get_memories_with_transcripts(
@@ -72,25 +75,32 @@ async def get_memories_with_transcripts(user: User, limit: int, user_id: Optiona
         }
 
     except Exception as e:
-        audio_logger.error(f"Error fetching memories with transcripts: {e}", exc_info=True)
+        audio_logger.error(
+            f"Error fetching memories with transcripts: {e}", exc_info=True
+        )
         return JSONResponse(
             status_code=500,
             content={"message": f"Error fetching memories with transcripts: {str(e)}"},
         )
 
 
-async def search_memories(query: str, user: User, limit: int, score_threshold: float = 0.0, user_id: Optional[str] = None):
+async def search_memories(
+    query: str,
+    user: User,
+    limit: int,
+    score_threshold: float = 0.0,
+    user_id: Optional[str] = None,
+):
     """Search memories by text query. Users can only search their own memories, admins can search all or filter by user."""
     try:
         memory_service = get_memory_service()
 
-        # Determine which user's memories to search
-        target_user_id = user.user_id
-        if user.is_superuser and user_id:
-            target_user_id = user_id
+        target_user_id = _resolve_target_user(user, user_id)
 
         # Execute search directly (now async)
-        search_results = await memory_service.search_memories(query, target_user_id, limit, score_threshold)
+        search_results = await memory_service.search_memories(
+            query, target_user_id, limit, score_threshold
+        )
 
         # Convert MemoryEntry objects to dicts for JSON serialization
         results_dicts = [result.to_dict() for result in search_results]
@@ -122,16 +132,26 @@ async def delete_memory(memory_id: str, user: User):
             # MemoryEntry is a dataclass, access id attribute directly
             memory_ids = [str(mem.id) for mem in user_memories]
             if memory_id not in memory_ids:
-                return JSONResponse(status_code=404, content={"message": "Memory not found"})
+                return JSONResponse(
+                    status_code=404, content={"message": "Memory not found"}
+                )
 
         # Delete the memory
-        audio_logger.info(f"Deleting memory {memory_id} for user_id={user.user_id}, email={user.email}")
-        success = await memory_service.delete_memory(memory_id, user_id=user.user_id, user_email=user.email)
+        audio_logger.info(
+            f"Deleting memory {memory_id} for user_id={user.user_id}, email={user.email}"
+        )
+        success = await memory_service.delete_memory(
+            memory_id, user_id=user.user_id, user_email=user.email
+        )
 
         if success:
-            return JSONResponse(content={"message": f"Memory {memory_id} deleted successfully"})
+            return JSONResponse(
+                content={"message": f"Memory {memory_id} deleted successfully"}
+            )
         else:
-            return JSONResponse(status_code=404, content={"message": "Memory not found"})
+            return JSONResponse(
+                status_code=404, content={"message": "Memory not found"}
+            )
 
     except Exception as e:
         audio_logger.error(f"Error deleting memory: {e}", exc_info=True)
@@ -146,7 +166,9 @@ async def add_memory(content: str, user: User, source_id: Optional[str] = None):
         memory_service = get_memory_service()
 
         # Use source_id or generate a unique one
-        memory_source_id = source_id or f"manual_{user.user_id}_{int(asyncio.get_event_loop().time())}"
+        memory_source_id = (
+            source_id or f"manual_{user.user_id}_{int(asyncio.get_event_loop().time())}"
+        )
 
         # Extract memories from content
         success, memory_ids = await memory_service.add_memory(
@@ -156,7 +178,7 @@ async def add_memory(content: str, user: User, source_id: Optional[str] = None):
             user_id=user.user_id,
             user_email=user.email,
             allow_update=False,
-            db_helper=None
+            db_helper=None,
         )
 
         if success:
@@ -165,18 +187,19 @@ async def add_memory(content: str, user: User, source_id: Optional[str] = None):
                 "memory_ids": memory_ids,
                 "count": len(memory_ids),
                 "source_id": memory_source_id,
-                "message": f"Successfully created {len(memory_ids)} memory/memories"
+                "message": f"Successfully created {len(memory_ids)} memory/memories",
             }
         else:
             return JSONResponse(
                 status_code=500,
-                content={"success": False, "message": "Failed to create memories"}
+                content={"success": False, "message": "Failed to create memories"},
             )
 
     except Exception as e:
         audio_logger.error(f"Error adding memory: {e}", exc_info=True)
         return JSONResponse(
-            status_code=500, content={"success": False, "message": f"Error adding memory: {str(e)}"}
+            status_code=500,
+            content={"success": False, "message": f"Error adding memory: {str(e)}"},
         )
 
 
@@ -225,7 +248,8 @@ async def get_all_memories_admin(user: User, limit: int):
     except Exception as e:
         audio_logger.error(f"Error fetching admin memories: {e}", exc_info=True)
         return JSONResponse(
-            status_code=500, content={"message": f"Error fetching admin memories: {str(e)}"}
+            status_code=500,
+            content={"message": f"Error fetching admin memories: {str(e)}"},
         )
 
 
@@ -234,10 +258,7 @@ async def get_memory_by_id(memory_id: str, user: User, user_id: Optional[str] =
     try:
         memory_service = get_memory_service()
 
-        # Determine which user's memory to fetch
-        target_user_id = user.user_id
-        if user.is_superuser and user_id:
-            target_user_id = user_id
+        target_user_id = _resolve_target_user(user, user_id)
 
         # Get the specific memory
         memory = await memory_service.get_memory(memory_id, target_user_id)
@@ -265,11 +286,15 @@ async def get_memory_by_id(memory_id: str, user: User, user_id: Optional[str] =
                             ),
                         }
                 except Exception as e:
-                    logger.warning(f"Failed to fetch source conversation {source_id}: {e}")
+                    logger.warning(
+                        f"Failed to fetch source conversation {source_id}: {e}"
+                    )
 
             return {"memory": memory_dict}
         else:
-            return JSONResponse(status_code=404, content={"message": "Memory not found"})
+            return JSONResponse(
+                status_code=404, content={"message": "Memory not found"}
+            )
 
     except Exception as e:
         audio_logger.error(f"Error fetching memory {memory_id}: {e}", exc_info=True)
diff --git a/backends/advanced/src/advanced_omi_backend/models/annotation.py b/backends/advanced/src/advanced_omi_backend/models/annotation.py
index 451d84d1..99974532 100644
--- a/backends/advanced/src/advanced_omi_backend/models/annotation.py
+++ b/backends/advanced/src/advanced_omi_backend/models/annotation.py
@@ -16,22 +16,26 @@
 
 class AnnotationType(str, Enum):
     """Type of content being annotated."""
+
     MEMORY = "memory"
     TRANSCRIPT = "transcript"
     DIARIZATION = "diarization"  # Speaker identification corrections
     ENTITY = "entity"  # Knowledge graph entity corrections (name/details edits)
     TITLE = "title"  # Conversation title corrections
     INSERT = "insert"  # Insert new segment between existing segments
+    SPEECH_SUGGESTION_CORRECTION = "speech_suggestion_correction"  # User-refined model suggestion (training signal triple)
 
 
 class AnnotationSource(str, Enum):
     """Origin of the annotation."""
+
     USER = "user"  # User-created edit
     MODEL_SUGGESTION = "model_suggestion"  # AI-generated suggestion
 
 
 class AnnotationStatus(str, Enum):
     """Lifecycle status of annotation."""
+
     PENDING = "pending"  # Waiting for user review (suggestions)
     ACCEPTED = "accepted"  # Applied to content
     REJECTED = "rejected"  # User dismissed suggestion
@@ -79,6 +83,11 @@ class Annotation(Document):
     entity_id: Optional[str] = None  # Neo4j entity ID
     entity_field: Optional[str] = None  # Which field was changed ("name" or "details")
 
+    # For SPEECH_SUGGESTION_CORRECTION annotations:
+    model_suggested_text: Optional[str] = (
+        None  # What AI originally suggested before user edited
+    )
+
     # For INSERT annotations:
     insert_after_index: Optional[int] = None  # -1 = before first segment
     insert_text: Optional[str] = None  # e.g., "[laughter]" or "wife laughed"
@@ -86,17 +95,17 @@ class Annotation(Document):
     insert_speaker: Optional[str] = None  # Speaker label for "speech" type inserts
 
     # Processed tracking (applies to ALL annotation types)
-    processed: bool = Field(default=False)  # Whether annotation has been applied/sent to training
+    processed: bool = Field(
+        default=False
+    )  # Whether annotation has been applied/sent to training
     processed_at: Optional[datetime] = None  # When annotation was processed
-    processed_by: Optional[str] = None  # What processed it (manual, cron, apply, training, etc.)
+    processed_by: Optional[str] = (
+        None  # What processed it (manual, cron, apply, training, etc.)
+    )
 
     # Timestamps (Python 3.12+ compatible)
-    created_at: datetime = Field(
-        default_factory=lambda: datetime.now(timezone.utc)
-    )
-    updated_at: datetime = Field(
-        default_factory=lambda: datetime.now(timezone.utc)
-    )
+    created_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
+    updated_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
 
     class Settings:
         name = "annotations"
@@ -132,6 +141,10 @@ def is_title_annotation(self) -> bool:
         """Check if this is a title annotation."""
         return self.annotation_type == AnnotationType.TITLE
 
+    def is_speech_suggestion_correction(self) -> bool:
+        """Check if this is a user-refined model suggestion."""
+        return self.annotation_type == AnnotationType.SPEECH_SUGGESTION_CORRECTION
+
     def is_pending_suggestion(self) -> bool:
         """Check if this is a pending AI suggestion."""
         return (
@@ -145,6 +158,7 @@ def is_pending_suggestion(self) -> bool:
 
 class AnnotationCreateBase(BaseModel):
     """Base model for annotation creation."""
+
     original_text: str = ""  # Optional for diarization
     corrected_text: str = ""  # Optional for diarization
     status: AnnotationStatus = AnnotationStatus.ACCEPTED
@@ -152,6 +166,7 @@ class AnnotationCreateBase(BaseModel):
 
 class MemoryAnnotationCreate(AnnotationCreateBase):
     """Create memory annotation request."""
+
     memory_id: str
     original_text: str  # Required for memory annotations
     corrected_text: str  # Required for memory annotations
@@ -159,6 +174,7 @@ class MemoryAnnotationCreate(AnnotationCreateBase):
 
 class TranscriptAnnotationCreate(AnnotationCreateBase):
     """Create transcript annotation request."""
+
     conversation_id: str
     segment_index: int
     original_text: str  # Required for transcript annotations
@@ -167,6 +183,7 @@ class TranscriptAnnotationCreate(AnnotationCreateBase):
 
 class DiarizationAnnotationCreate(BaseModel):
     """Create diarization annotation request."""
+
     conversation_id: str
     segment_index: int
     original_speaker: str
@@ -181,6 +198,7 @@ class EntityAnnotationCreate(BaseModel):
     Dual purpose: feeds both the jargon pipeline (entity name corrections = domain vocabulary
     the ASR should know) and the entity extraction pipeline (corrections improve future accuracy).
     """
+
     entity_id: str
     entity_field: str  # "name" or "details"
     original_text: str
@@ -189,6 +207,7 @@ class EntityAnnotationCreate(BaseModel):
 
 class TitleAnnotationCreate(AnnotationCreateBase):
     """Create title annotation request."""
+
     conversation_id: str
     original_text: str
     corrected_text: str
@@ -196,6 +215,7 @@ class TitleAnnotationCreate(AnnotationCreateBase):
 
 class InsertAnnotationCreate(BaseModel):
     """Create insert annotation request (new segment between existing segments)."""
+
     conversation_id: str
     insert_after_index: int  # -1 = before first segment
     insert_text: str
@@ -205,8 +225,10 @@ class InsertAnnotationCreate(BaseModel):
 
 class AnnotationUpdate(BaseModel):
     """Update an existing unprocessed annotation."""
+
     corrected_text: Optional[str] = None
     corrected_speaker: Optional[str] = None
+    model_suggested_text: Optional[str] = None
     insert_text: Optional[str] = None
     insert_segment_type: Optional[str] = None
     insert_speaker: Optional[str] = None
@@ -214,6 +236,7 @@ class AnnotationUpdate(BaseModel):
 
 class AnnotationResponse(BaseModel):
     """Annotation response for API."""
+
     id: str
     annotation_type: AnnotationType
     user_id: str
@@ -227,6 +250,7 @@ class AnnotationResponse(BaseModel):
     segment_start_time: Optional[float] = None
     entity_id: Optional[str] = None
     entity_field: Optional[str] = None
+    model_suggested_text: Optional[str] = None
     insert_after_index: Optional[int] = None
     insert_text: Optional[str] = None
     insert_segment_type: Optional[str] = None
diff --git a/backends/advanced/src/advanced_omi_backend/prompt_defaults.py b/backends/advanced/src/advanced_omi_backend/prompt_defaults.py
index 94b03d31..7e847b41 100644
--- a/backends/advanced/src/advanced_omi_backend/prompt_defaults.py
+++ b/backends/advanced/src/advanced_omi_backend/prompt_defaults.py
@@ -607,6 +607,42 @@ def register_all_defaults(registry: PromptRegistry) -> None:
         is_dynamic=True,
     )
 
+    # ------------------------------------------------------------------
+    # annotation.transcript_error_detection
+    # ------------------------------------------------------------------
+    registry.register_default(
+        "annotation.transcript_error_detection",
+        template="""\
+You are a transcript quality reviewer. Analyze the following transcript segments \
+from a conversation and identify potential transcription errors.
+
+Look for:
+- Misheard words (homophones, phonetically similar substitutions)
+- Nonsensical phrases that are likely ASR mistakes
+- Obvious hallucinations or repeated/garbled text
+- Missing or extra words that break sentence meaning
+
+Conversation title: {{title}}
+
+Segments (index: speaker - text):
+{{segments_text}}
+
+Return a JSON array of issues found. Each issue should have:
+- "segment_index": the index number of the problematic segment
+- "original_text": the exact text from that segment
+- "corrected_text": your suggested correction
+- "reason": brief explanation (e.g. "misheard word", "garbled text", "hallucination")
+
+If no issues are found, return an empty array: []
+
+Return ONLY the JSON array, no other text.""",
+        name="Transcript Error Detection",
+        description="Analyzes transcript segments for ASR errors, hallucinations, and misheard words.",
+        category="annotation",
+        variables=["title", "segments_text"],
+        is_dynamic=True,
+    )
+
     # ------------------------------------------------------------------
     # prompt_optimization.title_optimizer
     # ------------------------------------------------------------------
diff --git a/backends/advanced/src/advanced_omi_backend/routers/modules/annotation_routes.py b/backends/advanced/src/advanced_omi_backend/routers/modules/annotation_routes.py
index e04e6c76..43ffa212 100644
--- a/backends/advanced/src/advanced_omi_backend/routers/modules/annotation_routes.py
+++ b/backends/advanced/src/advanced_omi_backend/routers/modules/annotation_routes.py
@@ -9,13 +9,14 @@
 from datetime import datetime, timezone
 from typing import List
 
-from fastapi import APIRouter, Depends, HTTPException
+from fastapi import APIRouter, Depends, HTTPException, Query
 from fastapi.responses import JSONResponse
 
 from advanced_omi_backend.auth import current_active_user
 from advanced_omi_backend.models.annotation import (
     Annotation,
     AnnotationResponse,
+    AnnotationSource,
     AnnotationStatus,
     AnnotationType,
     AnnotationUpdate,
@@ -36,6 +37,103 @@
 router = APIRouter(prefix="/annotations", tags=["annotations"])
 
 
+@router.get("/suggestions")
+async def get_pending_suggestions(
+    current_user: User = Depends(current_active_user),
+    limit: int = Query(20, ge=1, le=100),
+):
+    """
+    Get pending AI-generated suggestions for the current user.
+
+    Returns MODEL_SUGGESTION annotations with PENDING status,
+    enriched with conversation context (title, transcript snippet,
+    audio path) for the swipe review UI.
+    """
+    try:
+        annotations = (
+            await Annotation.find(
+                Annotation.user_id == current_user.user_id,
+                Annotation.source == AnnotationSource.MODEL_SUGGESTION,
+                Annotation.status == AnnotationStatus.PENDING,
+            )
+            .sort("-created_at")
+            .limit(limit)
+            .to_list()
+        )
+
+        if not annotations:
+            return []
+
+        # Batch-fetch conversations for context
+        conversation_ids = list(
+            {a.conversation_id for a in annotations if a.conversation_id}
+        )
+        conversations = await Conversation.find(
+            {"conversation_id": {"$in": conversation_ids}},
+        ).to_list()
+        conv_map = {c.conversation_id: c for c in conversations}
+
+        results = []
+        for a in annotations:
+            conv = conv_map.get(a.conversation_id)
+
+            segment_start = None
+            segment_end = None
+            if conv and a.segment_index is not None:
+                transcript = conv.active_transcript
+                if (
+                    transcript
+                    and transcript.segments
+                    and a.segment_index < len(transcript.segments)
+                ):
+                    seg = transcript.segments[a.segment_index]
+                    segment_start = seg.start
+                    segment_end = seg.end
+
+            results.append(
+                {
+                    "id": a.id,
+                    "annotation_type": a.annotation_type,
+                    "conversation_id": a.conversation_id,
+                    "segment_index": a.segment_index,
+                    "original_text": a.original_text,
+                    "corrected_text": a.corrected_text,
+                    "created_at": a.created_at.isoformat(),
+                    "conversation_title": conv.title if conv else None,
+                    "transcript_snippet": _get_segment_context(conv, a.segment_index),
+                    "segment_start": segment_start,
+                    "segment_end": segment_end,
+                }
+            )
+
+        return results
+
+    except Exception as e:
+        logger.error(f"Error fetching suggestions: {e}", exc_info=True)
+        raise HTTPException(
+            status_code=500, detail=f"Failed to fetch suggestions: {str(e)}"
+        )
+
+
+def _get_segment_context(
+    conversation, segment_index: int | None, context_size: int = 1
+) -> str | None:
+    """Get a snippet of transcript around the flagged segment for context."""
+    if not conversation or segment_index is None:
+        return None
+    transcript = conversation.active_transcript
+    if not transcript or not transcript.segments:
+        return None
+    start = max(0, segment_index - context_size)
+    end = min(len(transcript.segments), segment_index + context_size + 1)
+    lines = []
+    for i in range(start, end):
+        seg = transcript.segments[i]
+        prefix = ">>> " if i == segment_index else "    "
+        lines.append(f"{prefix}{seg.speaker}: {seg.text}")
+    return "\n".join(lines)
+
+
 @router.post("/memory", response_model=AnnotationResponse)
 async def create_memory_annotation(
     annotation_data: MemoryAnnotationCreate,
@@ -85,11 +183,15 @@ async def create_memory_annotation(
                     content=annotation_data.corrected_text,
                     user_id=current_user.user_id,
                 )
-                logger.info(f"Updated memory {annotation_data.memory_id} with corrected text")
+                logger.info(
+                    f"Updated memory {annotation_data.memory_id} with corrected text"
+                )
             except Exception as e:
                 logger.error(f"Error updating memory: {e}")
                 # Annotation is saved, but memory update failed - log but don't fail the request
-                logger.warning(f"Memory annotation {annotation.id} saved but memory update failed")
+                logger.warning(
+                    f"Memory annotation {annotation.id} saved but memory update failed"
+                )
 
         return AnnotationResponse.model_validate(annotation)
 
@@ -237,7 +339,22 @@ async def update_annotation_status(
         annotation.updated_at = datetime.now(timezone.utc)
 
         # If accepting a pending suggestion, apply the correction
-        if status == AnnotationStatus.ACCEPTED and old_status == AnnotationStatus.PENDING:
+        if (
+            status == AnnotationStatus.ACCEPTED
+            and old_status == AnnotationStatus.PENDING
+        ):
+            # Promote to SPEECH_SUGGESTION_CORRECTION if user edited the AI suggestion
+            if (
+                annotation.source == AnnotationSource.MODEL_SUGGESTION
+                and annotation.model_suggested_text is not None
+                and annotation.is_transcript_annotation()
+            ):
+                annotation.annotation_type = AnnotationType.SPEECH_SUGGESTION_CORRECTION
+                logger.info(
+                    f"Promoted annotation {annotation_id} to SPEECH_SUGGESTION_CORRECTION "
+                    f"(AI suggested: {annotation.model_suggested_text!r}, user decided: {annotation.corrected_text!r})"
+                )
+
             if annotation.is_memory_annotation():
                 # Update memory
                 try:
@@ -251,8 +368,11 @@ async def update_annotation_status(
                 except Exception as e:
                     logger.error(f"Error applying memory suggestion: {e}")
                     # Don't fail the status update if memory update fails
-            elif annotation.is_transcript_annotation():
-                # Update transcript segment
+            elif (
+                annotation.is_transcript_annotation()
+                or annotation.is_speech_suggestion_correction()
+            ):
+                # Update transcript segment (same logic for both TRANSCRIPT and SPEECH_SUGGESTION_CORRECTION)
                 try:
                     conversation = await Conversation.find_one(
                         Conversation.conversation_id == annotation.conversation_id,
@@ -260,7 +380,9 @@ async def update_annotation_status(
                     )
                     if conversation:
                         transcript = conversation.active_transcript
-                        if transcript and annotation.segment_index < len(transcript.segments):
+                        if transcript and annotation.segment_index < len(
+                            transcript.segments
+                        ):
                             transcript.segments[annotation.segment_index].text = (
                                 annotation.corrected_text
                             )
@@ -286,7 +408,9 @@ async def update_annotation_status(
                             user_id=annotation.user_id,
                             **update_kwargs,
                         )
-                        logger.info(f"Applied entity suggestion to entity {annotation.entity_id}")
+                        logger.info(
+                            f"Applied entity suggestion to entity {annotation.entity_id}"
+                        )
                 except Exception as e:
                     logger.error(f"Error applying entity suggestion: {e}")
                     # Don't fail the status update if entity update fails
@@ -310,7 +434,11 @@ async def update_annotation_status(
         await annotation.save()
         logger.info(f"Updated annotation {annotation_id} status to {status}")
 
-        return {"status": "updated", "annotation_id": annotation_id, "new_status": status}
+        return {
+            "status": "updated",
+            "annotation_id": annotation_id,
+            "new_status": status,
+        }
 
     except HTTPException:
         raise
@@ -345,7 +473,9 @@ async def delete_annotation(
             raise HTTPException(status_code=404, detail="Annotation not found")
 
         if annotation.processed:
-            raise HTTPException(status_code=400, detail="Cannot delete a processed annotation")
+            raise HTTPException(
+                status_code=400, detail="Cannot delete a processed annotation"
+            )
 
         await annotation.delete()
         logger.info(f"Deleted annotation {annotation_id}")
@@ -384,10 +514,22 @@ async def update_annotation(
             raise HTTPException(status_code=404, detail="Annotation not found")
 
         if annotation.processed:
-            raise HTTPException(status_code=400, detail="Cannot update a processed annotation")
+            raise HTTPException(
+                status_code=400, detail="Cannot update a processed annotation"
+            )
 
         if update_data.corrected_text is not None:
+            # Auto-capture AI's original suggestion before user overwrites it
+            if (
+                annotation.source == AnnotationSource.MODEL_SUGGESTION
+                and annotation.model_suggested_text is None
+                and annotation.corrected_text
+                and update_data.corrected_text != annotation.corrected_text
+            ):
+                annotation.model_suggested_text = annotation.corrected_text
             annotation.corrected_text = update_data.corrected_text
+        if update_data.model_suggested_text is not None:
+            annotation.model_suggested_text = update_data.model_suggested_text
         if update_data.corrected_speaker is not None:
             annotation.corrected_speaker = update_data.corrected_speaker
         if update_data.insert_text is not None:
@@ -441,7 +583,10 @@ async def create_insert_annotation(
             raise HTTPException(status_code=400, detail="No active transcript found")
 
         segment_count = len(active_transcript.segments)
-        if annotation_data.insert_after_index < -1 or annotation_data.insert_after_index >= segment_count:
+        if (
+            annotation_data.insert_after_index < -1
+            or annotation_data.insert_after_index >= segment_count
+        ):
             raise HTTPException(
                 status_code=400,
                 detail=f"insert_after_index must be between -1 and {segment_count - 1}",
@@ -572,7 +717,9 @@ async def create_entity_annotation(
                 user_id=current_user.user_id,
                 **update_kwargs,
             )
-            logger.info(f"Applied entity correction to Neo4j for entity {annotation_data.entity_id}")
+            logger.info(
+                f"Applied entity correction to Neo4j for entity {annotation_data.entity_id}"
+            )
         except Exception as e:
             logger.error(f"Error applying entity correction to Neo4j: {e}")
             # Annotation is saved but Neo4j update failed — log but don't fail the request
@@ -657,7 +804,9 @@ async def create_title_annotation(
         try:
             conversation.title = annotation_data.corrected_text
             await conversation.save()
-            logger.info(f"Updated title for conversation {annotation_data.conversation_id}")
+            logger.info(
+                f"Updated title for conversation {annotation_data.conversation_id}"
+            )
         except Exception as e:
             logger.error(f"Error updating conversation title: {e}")
             # Annotation is saved but title update failed — log but don't fail the request
@@ -697,7 +846,6 @@ async def get_title_annotations(
         )
 
 
-
 # === Diarization Annotation Routes ===
 
 
@@ -817,7 +965,10 @@ async def apply_diarization_annotations(
 
         if not annotations:
             return JSONResponse(
-                content={"message": "No pending annotations to apply", "applied_count": 0}
+                content={
+                    "message": "No pending annotations to apply",
+                    "applied_count": 0,
+                }
             )
 
         # Get active transcript version
@@ -839,7 +990,9 @@ async def apply_diarization_annotations(
                 key=lambda a: a.updated_at,
                 reverse=True,
             )
-            annotation_for_segment = annotations_for_segment[0] if annotations_for_segment else None
+            annotation_for_segment = (
+                annotations_for_segment[0] if annotations_for_segment else None
+            )
 
             if annotation_for_segment:
                 # Apply correction
@@ -951,7 +1104,10 @@ async def apply_all_annotations(
             a for a in annotations if a.annotation_type == AnnotationType.DIARIZATION
         ]
         transcript_annotations = [
-            a for a in annotations if a.annotation_type == AnnotationType.TRANSCRIPT
+            a
+            for a in annotations
+            if a.annotation_type
+            in (AnnotationType.TRANSCRIPT, AnnotationType.SPEECH_SUGGESTION_CORRECTION)
         ]
         insert_annotations = [
             a for a in annotations if a.annotation_type == AnnotationType.INSERT
diff --git a/backends/advanced/src/advanced_omi_backend/routers/modules/finetuning_routes.py b/backends/advanced/src/advanced_omi_backend/routers/modules/finetuning_routes.py
index 7abb8bbd..2e338232 100644
--- a/backends/advanced/src/advanced_omi_backend/routers/modules/finetuning_routes.py
+++ b/backends/advanced/src/advanced_omi_backend/routers/modules/finetuning_routes.py
@@ -25,7 +25,9 @@
 @router.post("/process-annotations")
 async def process_annotations_for_training(
     current_user: User = Depends(current_active_user),
-    annotation_type: Optional[str] = Query("diarization", description="Type of annotations to process"),
+    annotation_type: Optional[str] = Query(
+        "diarization", description="Type of annotations to process"
+    ),
 ):
     """
     Send processed annotations to speaker recognition service for training.
@@ -44,8 +46,7 @@ async def process_annotations_for_training(
         # Only admins can trigger training for now (can expand to per-user later)
         if not current_user.is_superuser:
             raise HTTPException(
-                status_code=403,
-                detail="Only administrators can trigger model training"
+                status_code=403, detail="Only administrators can trigger model training"
             )
 
         # Find annotations ready for training
@@ -57,15 +58,18 @@ async def process_annotations_for_training(
 
         # Filter out already trained annotations (processed_by contains "training")
         ready_for_training = [
-            a for a in annotations
+            a
+            for a in annotations
             if not a.processed_by or "training" not in a.processed_by
         ]
 
         if not ready_for_training:
-            return JSONResponse(content={
-                "message": "No annotations ready for training",
-                "processed_count": 0
-            })
+            return JSONResponse(
+                content={
+                    "message": "No annotations ready for training",
+                    "processed_count": 0,
+                }
+            )
 
         # Import required modules
         from advanced_omi_backend.models.conversation import Conversation
@@ -78,13 +82,16 @@ async def process_annotations_for_training(
 
         # Initialize speaker client
         speaker_client = SpeakerRecognitionClient()
-        
+
         if not speaker_client.enabled:
-            return JSONResponse(content={
-                "message": "Speaker recognition service is not enabled",
-                "processed_count": 0,
-                "status": "error"
-            }, status_code=503)
+            return JSONResponse(
+                content={
+                    "message": "Speaker recognition service is not enabled",
+                    "processed_count": 0,
+                    "status": "error",
+                },
+                status_code=503,
+            )
 
         # Track processing statistics
         enrolled_count = 0
@@ -101,27 +108,33 @@ async def process_annotations_for_training(
 
                 if not conversation or not conversation.active_transcript:
                     failed_count += 1
-                    errors.append(f"Conversation {annotation.conversation_id[:8]} not found")
+                    errors.append(
+                        f"Conversation {annotation.conversation_id[:8]} not found"
+                    )
                     continue
 
                 # Validate segment index
-                if annotation.segment_index >= len(conversation.active_transcript.segments):
+                if annotation.segment_index >= len(
+                    conversation.active_transcript.segments
+                ):
                     failed_count += 1
                     errors.append(f"Invalid segment index {annotation.segment_index}")
                     continue
 
-                segment = conversation.active_transcript.segments[annotation.segment_index]
+                segment = conversation.active_transcript.segments[
+                    annotation.segment_index
+                ]
 
                 # 2. Extract audio segment from MongoDB
                 logger.info(
                     f"Extracting audio for conversation {annotation.conversation_id[:8]}... "
                     f"segment {annotation.segment_index} ({segment.start:.2f}s - {segment.end:.2f}s)"
                 )
-                
+
                 wav_bytes = await reconstruct_audio_segment(
                     conversation_id=annotation.conversation_id,
                     start_time=segment.start,
-                    end_time=segment.end
+                    end_time=segment.end,
                 )
 
                 if not wav_bytes:
@@ -135,42 +148,49 @@ async def process_annotations_for_training(
                 # 3. Check if speaker exists
                 existing_speaker = await speaker_client.get_speaker_by_name(
                     speaker_name=annotation.corrected_speaker,
-                    user_id=1  # TODO: Map Chronicle user_id to speaker service user_id
+                    user_id=1,  # TODO: Map Chronicle user_id to speaker service user_id
                 )
 
                 if existing_speaker:
                     # APPEND to existing speaker
-                    logger.info(f"Appending to existing speaker: {annotation.corrected_speaker}")
+                    logger.info(
+                        f"Appending to existing speaker: {annotation.corrected_speaker}"
+                    )
                     result = await speaker_client.append_to_speaker(
-                        speaker_id=existing_speaker["id"],
-                        audio_data=wav_bytes
+                        speaker_id=existing_speaker["id"], audio_data=wav_bytes
                     )
-                    
+
                     if "error" in result:
                         logger.error(f"Failed to append to speaker: {result}")
                         failed_count += 1
                         errors.append(f"Append failed: {result.get('error')}")
                         continue
-                    
+
                     appended_count += 1
-                    logger.info(f"✅ Successfully appended to speaker '{annotation.corrected_speaker}'")
+                    logger.info(
+                        f"✅ Successfully appended to speaker '{annotation.corrected_speaker}'"
+                    )
                 else:
                     # ENROLL new speaker
-                    logger.info(f"Enrolling new speaker: {annotation.corrected_speaker}")
+                    logger.info(
+                        f"Enrolling new speaker: {annotation.corrected_speaker}"
+                    )
                     result = await speaker_client.enroll_new_speaker(
                         speaker_name=annotation.corrected_speaker,
                         audio_data=wav_bytes,
-                        user_id=1  # TODO: Map Chronicle user_id to speaker service user_id
+                        user_id=1,  # TODO: Map Chronicle user_id to speaker service user_id
                     )
-                    
+
                     if "error" in result:
                         logger.error(f"Failed to enroll speaker: {result}")
                         failed_count += 1
                         errors.append(f"Enroll failed: {result.get('error')}")
                         continue
-                    
+
                     enrolled_count += 1
-                    logger.info(f"✅ Successfully enrolled new speaker '{annotation.corrected_speaker}'")
+                    logger.info(
+                        f"✅ Successfully enrolled new speaker '{annotation.corrected_speaker}'"
+                    )
 
                 # 4. Mark annotation as trained
                 if annotation.processed_by:
@@ -181,7 +201,9 @@ async def process_annotations_for_training(
                 await annotation.save()
 
             except Exception as e:
-                logger.error(f"Error processing annotation {annotation.id}: {e}", exc_info=True)
+                logger.error(
+                    f"Error processing annotation {annotation.id}: {e}", exc_info=True
+                )
                 failed_count += 1
                 errors.append(f"Exception: {str(e)[:50]}")
                 continue
@@ -192,15 +214,17 @@ async def process_annotations_for_training(
             f"({enrolled_count} new, {appended_count} appended, {failed_count} failed)"
         )
 
-        return JSONResponse(content={
-            "message": "Training complete",
-            "enrolled_new_speakers": enrolled_count,
-            "appended_to_existing": appended_count,
-            "total_processed": total_processed,
-            "failed_count": failed_count,
-            "errors": errors[:10] if errors else [],
-            "status": "success" if total_processed > 0 else "partial_failure"
-        })
+        return JSONResponse(
+            content={
+                "message": "Training complete",
+                "enrolled_new_speakers": enrolled_count,
+                "appended_to_existing": appended_count,
+                "total_processed": total_processed,
+                "failed_count": failed_count,
+                "errors": errors[:10] if errors else [],
+                "status": "success" if total_processed > 0 else "partial_failure",
+            }
+        )
 
     except HTTPException:
         raise
@@ -226,7 +250,9 @@ async def export_asr_dataset(
         Export job results with counts of conversations exported and annotations consumed.
     """
     if not current_user.is_superuser:
-        raise HTTPException(status_code=403, detail="Only administrators can trigger ASR dataset export")
+        raise HTTPException(
+            status_code=403, detail="Only administrators can trigger ASR dataset export"
+        )
 
     try:
         from advanced_omi_backend.workers.finetuning_jobs import run_asr_finetuning_job
@@ -235,7 +261,9 @@ async def export_asr_dataset(
         return JSONResponse(content=result)
     except Exception as e:
         logger.error(f"ASR dataset export failed: {e}", exc_info=True)
-        raise HTTPException(status_code=500, detail=f"ASR dataset export failed: {str(e)}")
+        raise HTTPException(
+            status_code=500, detail=f"ASR dataset export failed: {str(e)}"
+        )
 
 
 @router.get("/status")
@@ -269,7 +297,11 @@ async def get_finetuning_status(
             ).to_list()
 
         # Batch-check which conversation_ids still exist
-        conv_annotation_types = {AnnotationType.DIARIZATION, AnnotationType.TRANSCRIPT}
+        conv_annotation_types = {
+            AnnotationType.DIARIZATION,
+            AnnotationType.TRANSCRIPT,
+            AnnotationType.SPEECH_SUGGESTION_CORRECTION,
+        }
         all_conv_ids: set[str] = set()
         for ann_type in conv_annotation_types:
             for a in all_annotations_by_type.get(ann_type, []):
@@ -291,8 +323,12 @@ async def get_finetuning_status(
 
             # Identify orphaned annotations for conversation-based types
             if ann_type in conv_annotation_types:
-                orphaned = [a for a in annotations if a.conversation_id in orphaned_conv_ids]
-                non_orphaned = [a for a in annotations if a.conversation_id not in orphaned_conv_ids]
+                orphaned = [
+                    a for a in annotations if a.conversation_id in orphaned_conv_ids
+                ]
+                non_orphaned = [
+                    a for a in annotations if a.conversation_id not in orphaned_conv_ids
+                ]
             else:
                 # Memory/entity orphan detection is placeholder for now
                 orphaned = []
@@ -300,9 +336,12 @@ async def get_finetuning_status(
 
             pending = [a for a in non_orphaned if not a.processed]
             processed = [a for a in non_orphaned if a.processed]
-            trained = [a for a in processed if a.processed_by and "training" in a.processed_by]
+            trained = [
+                a for a in processed if a.processed_by and "training" in a.processed_by
+            ]
             applied_not_trained = [
-                a for a in processed
+                a
+                for a in processed
                 if not a.processed_by or "training" not in a.processed_by
             ]
 
@@ -333,9 +372,17 @@ async def get_finetuning_status(
         if trained_diarization_list:
             latest_trained = max(
                 trained_diarization_list,
-                key=lambda a: a.updated_at if a.updated_at else datetime.min.replace(tzinfo=timezone.utc)
+                key=lambda a: (
+                    a.updated_at
+                    if a.updated_at
+                    else datetime.min.replace(tzinfo=timezone.utc)
+                ),
+            )
+            last_training_run = (
+                latest_trained.updated_at.isoformat()
+                if latest_trained.updated_at
+                else None
             )
-            last_training_run = latest_trained.updated_at.isoformat() if latest_trained.updated_at else None
 
         # Get cron job status from scheduler
         try:
@@ -344,7 +391,9 @@ async def get_finetuning_status(
             scheduler = get_scheduler()
             all_jobs = await scheduler.get_all_jobs_status()
             # Find speaker finetuning job for backward compat
-            speaker_job = next((j for j in all_jobs if j["job_id"] == "speaker_finetuning"), None)
+            speaker_job = next(
+                (j for j in all_jobs if j["job_id"] == "speaker_finetuning"), None
+            )
             cron_status = {
                 "enabled": speaker_job["enabled"] if speaker_job else False,
                 "schedule": speaker_job["schedule"] if speaker_job else "0 2 * * *",
@@ -359,15 +408,17 @@ async def get_finetuning_status(
                 "next_run": None,
             }
 
-        return JSONResponse(content={
-            "pending_annotation_count": pending_count,
-            "applied_annotation_count": applied_count,
-            "trained_annotation_count": trained_count,
-            "last_training_run": last_training_run,
-            "cron_status": cron_status,
-            "annotation_counts": annotation_counts,
-            "orphaned_annotation_count": total_orphaned,
-        })
+        return JSONResponse(
+            content={
+                "pending_annotation_count": pending_count,
+                "applied_annotation_count": applied_count,
+                "trained_annotation_count": trained_count,
+                "last_training_run": last_training_run,
+                "cron_status": cron_status,
+                "annotation_counts": annotation_counts,
+                "orphaned_annotation_count": total_orphaned,
+            }
+        )
 
     except Exception as e:
         logger.error(f"Error fetching fine-tuning status: {e}", exc_info=True)
@@ -385,7 +436,9 @@ async def get_finetuning_status(
 @router.delete("/orphaned-annotations")
 async def delete_orphaned_annotations(
     current_user: User = Depends(current_active_user),
-    annotation_type: Optional[str] = Query(None, description="Filter by annotation type (e.g. 'diarization')"),
+    annotation_type: Optional[str] = Query(
+        None, description="Filter by annotation type (e.g. 'diarization')"
+    ),
 ):
     """
     Find and delete orphaned annotations whose referenced conversation no longer exists.
@@ -404,9 +457,17 @@ async def delete_orphaned_annotations(
         try:
             requested_type = AnnotationType(annotation_type)
         except ValueError:
-            raise HTTPException(status_code=400, detail=f"Unknown annotation type: {annotation_type}")
+            raise HTTPException(
+                status_code=400, detail=f"Unknown annotation type: {annotation_type}"
+            )
         if requested_type not in conv_annotation_types:
-            return JSONResponse(content={"deleted_count": 0, "by_type": {}, "message": "Orphan detection not supported for this type"})
+            return JSONResponse(
+                content={
+                    "deleted_count": 0,
+                    "by_type": {},
+                    "message": "Orphan detection not supported for this type",
+                }
+            )
         types_to_check = {requested_type}
     else:
         types_to_check = conv_annotation_types
@@ -448,10 +509,12 @@ async def delete_orphaned_annotations(
             total_deleted += len(orphaned)
 
     logger.info(f"Deleted {total_deleted} orphaned annotations: {deleted_by_type}")
-    return JSONResponse(content={
-        "deleted_count": total_deleted,
-        "by_type": deleted_by_type,
-    })
+    return JSONResponse(
+        content={
+            "deleted_count": total_deleted,
+            "by_type": deleted_by_type,
+        }
+    )
 
 
 @router.post("/orphaned-annotations/reattach")
diff --git a/backends/advanced/src/advanced_omi_backend/services/plugin_service.py b/backends/advanced/src/advanced_omi_backend/services/plugin_service.py
index 9a8cc205..a2061c5d 100644
--- a/backends/advanced/src/advanced_omi_backend/services/plugin_service.py
+++ b/backends/advanced/src/advanced_omi_backend/services/plugin_service.py
@@ -18,6 +18,7 @@
 
 from advanced_omi_backend.config_loader import get_plugins_yml_path
 from advanced_omi_backend.plugins import BasePlugin, PluginRouter
+from advanced_omi_backend.plugins.events import PluginEvent
 from advanced_omi_backend.plugins.services import PluginServices
 
 logger = logging.getLogger(__name__)
@@ -176,7 +177,9 @@ def replacer(match):
         return value
 
 
-def load_plugin_config(plugin_id: str, orchestration_config: Dict[str, Any]) -> Dict[str, Any]:
+def load_plugin_config(
+    plugin_id: str, orchestration_config: Dict[str, Any]
+) -> Dict[str, Any]:
     """
     Load complete plugin configuration from multiple sources.
 
@@ -215,9 +218,13 @@ def load_plugin_config(plugin_id: str, orchestration_config: Dict[str, Any]) ->
             with open(plugin_config_path, "r") as f:
                 plugin_config = yaml.safe_load(f) or {}
                 config.update(plugin_config)
-                logger.debug(f"Loaded {len(plugin_config)} config keys for '{plugin_id}'")
+                logger.debug(
+                    f"Loaded {len(plugin_config)} config keys for '{plugin_id}'"
+                )
         else:
-            logger.debug(f"No config.yml found for plugin '{plugin_id}' at {plugin_config_path}")
+            logger.debug(
+                f"No config.yml found for plugin '{plugin_id}' at {plugin_config_path}"
+            )
 
     except Exception as e:
         logger.warning(f"Failed to load config.yml for plugin '{plugin_id}': {e}")
@@ -398,7 +405,9 @@ def load_schema_yml(plugin_id: str) -> Optional[Dict[str, Any]]:
     return None
 
 
-def infer_schema_from_config(plugin_id: str, config_dict: Dict[str, Any]) -> Dict[str, Any]:
+def infer_schema_from_config(
+    plugin_id: str, config_dict: Dict[str, Any]
+) -> Dict[str, Any]:
     """Infer configuration schema from plugin config.yml.
 
     This function analyzes the config.yml file to generate a JSON schema
@@ -480,8 +489,7 @@ def mask_secrets_in_config(
             if env_var and env_var in secret_env_vars:
                 # Check if env var is set in per-plugin .env or os.environ
                 is_set = bool(
-                    (plugin_env and plugin_env.get(env_var))
-                    or os.environ.get(env_var)
+                    (plugin_env and plugin_env.get(env_var)) or os.environ.get(env_var)
                 )
                 masked_config[key] = "••••••••••••" if is_set else ""
 
@@ -527,7 +535,9 @@ def get_plugin_metadata(
 
     # Mask secrets in current config
     current_config = load_plugin_config(plugin_id, orchestration_config)
-    masked_config = mask_secrets_in_config(current_config, config_schema, plugin_env=plugin_env)
+    masked_config = mask_secrets_in_config(
+        current_config, config_schema, plugin_env=plugin_env
+    )
 
     # Mark which env vars are set (check per-plugin .env first, then os.environ)
     for env_var_name, env_var_schema in config_schema.get("env_vars", {}).items():
@@ -727,17 +737,24 @@ def _build_plugin_router() -> Optional[PluginRouter]:
 
                     # Let plugin register its prompts with the prompt registry
                     try:
-                        from advanced_omi_backend.prompt_registry import get_prompt_registry
+                        from advanced_omi_backend.prompt_registry import (
+                            get_prompt_registry,
+                        )
+
                         plugin.register_prompts(get_prompt_registry())
                     except Exception as e:
-                        logger.debug(f"Plugin '{plugin_id}' prompt registration skipped: {e}")
+                        logger.debug(
+                            f"Plugin '{plugin_id}' prompt registration skipped: {e}"
+                        )
 
                     # Note: async initialization happens in app_factory lifespan or reload_plugins
                     router.register_plugin(plugin_id, plugin)
                     logger.info(f"Plugin '{plugin_id}' registered successfully")
 
                 except Exception as e:
-                    logger.error(f"Failed to register plugin '{plugin_id}': {e}", exc_info=True)
+                    logger.error(
+                        f"Failed to register plugin '{plugin_id}': {e}", exc_info=True
+                    )
 
             logger.info(
                 f"Plugin registration complete: {len(router.plugins)} plugin(s) registered"
@@ -806,6 +823,62 @@ async def ensure_plugin_router() -> Optional[PluginRouter]:
     return plugin_router
 
 
+async def dispatch_plugin_event(
+    event: PluginEvent,
+    user_id: str,
+    data: dict,
+    metadata: dict = None,
+    description: str = "",
+    require_router: bool = False,
+) -> Optional[list]:
+    """Dispatch an event to the plugin system with standard logging.
+
+    Handles the common pattern of: ensure router -> dispatch event -> log results.
+
+    Args:
+        event: Plugin event to dispatch
+        user_id: User ID for the event
+        data: Event-specific data dict
+        metadata: Optional metadata dict
+        description: Log context (e.g., "conversation=abc123, memories=5")
+        require_router: If True and no router, raise RuntimeError instead of returning None
+
+    Returns:
+        List of plugin results, or None if no router available
+
+    Raises:
+        RuntimeError: If require_router=True and no plugin router is available
+    """
+    plugin_router = await ensure_plugin_router()
+
+    if not plugin_router:
+        if require_router:
+            raise RuntimeError(
+                f"Plugin router could not be initialized in worker process. "
+                f"{event.value} event will NOT be dispatched!"
+            )
+        return None
+
+    logger.info(f"🔌 DISPATCH: {event.value} event ({description})")
+
+    plugin_results = await plugin_router.dispatch_event(
+        event=event,
+        user_id=user_id,
+        data=data,
+        metadata=metadata or {},
+    )
+
+    result_count = len(plugin_results) if plugin_results else 0
+    logger.info(f"🔌 RESULT: {event.value} dispatched to {result_count} plugins")
+
+    if plugin_results:
+        for result in plugin_results:
+            if result.message:
+                logger.info(f"  Plugin result: {result.message}")
+
+    return plugin_results
+
+
 async def cleanup_plugin_router() -> None:
     """Clean up the plugin router and all registered plugins."""
     global _plugin_router
@@ -934,7 +1007,9 @@ def signal_worker_restart() -> None:
         try:
             timestamp = time.strftime("%Y-%m-%dT%H:%M:%S")
             client.set(WORKER_RESTART_KEY, timestamp)
-            logger.info(f"Worker restart signal sent via Redis key '{WORKER_RESTART_KEY}'")
+            logger.info(
+                f"Worker restart signal sent via Redis key '{WORKER_RESTART_KEY}'"
+            )
         finally:
             client.close()
     except Exception as e:
diff --git a/backends/advanced/src/advanced_omi_backend/utils/job_utils.py b/backends/advanced/src/advanced_omi_backend/utils/job_utils.py
index c9028909..695906c4 100644
--- a/backends/advanced/src/advanced_omi_backend/utils/job_utils.py
+++ b/backends/advanced/src/advanced_omi_backend/utils/job_utils.py
@@ -10,7 +10,31 @@
 logger = logging.getLogger(__name__)
 
 
-async def check_job_alive(redis_client, current_job, session_id: Optional[str] = None) -> bool:
+def update_job_meta(**kwargs) -> None:
+    """Update the current RQ job's metadata with the given key-value pairs.
+
+    Handles the common boilerplate of: get_current_job() -> null check ->
+    meta init -> update -> save_meta.
+
+    Args:
+        **kwargs: Key-value pairs to merge into job.meta
+
+    Example:
+        update_job_meta(conversation_id="abc", processing_time=1.5)
+    """
+    from rq import get_current_job
+
+    current_job = get_current_job()
+    if current_job:
+        if not current_job.meta:
+            current_job.meta = {}
+        current_job.meta.update(kwargs)
+        current_job.save_meta()
+
+
+async def check_job_alive(
+    redis_client, current_job, session_id: Optional[str] = None
+) -> bool:
     """
     Check if current RQ job still exists in Redis.
 
@@ -44,12 +68,19 @@ async def check_job_alive(redis_client, current_job, session_id: Optional[str] =
             if session_id:
                 session_key = f"audio:session:{session_id}"
                 session_status = await redis_client.hget(session_key, "status")
-                if session_status and session_status.decode() in ["finalizing", "finished"]:
+                if session_status and session_status.decode() in [
+                    "finalizing",
+                    "finished",
+                ]:
                     # Session ended naturally - not a zombie, just natural cleanup
-                    logger.debug(f"📋 Job {current_job.id} ending naturally (session closed)")
+                    logger.debug(
+                        f"📋 Job {current_job.id} ending naturally (session closed)"
+                    )
                     return False
 
             # True zombie - job deleted while session still active
-            logger.error(f"🧟 Zombie job detected - job {current_job.id} deleted from Redis while session still active, exiting")
+            logger.error(
+                f"🧟 Zombie job detected - job {current_job.id} deleted from Redis while session still active, exiting"
+            )
             return False
     return True
diff --git a/backends/advanced/src/advanced_omi_backend/workers/annotation_jobs.py b/backends/advanced/src/advanced_omi_backend/workers/annotation_jobs.py
index 3681ab5f..10d8f65e 100644
--- a/backends/advanced/src/advanced_omi_backend/workers/annotation_jobs.py
+++ b/backends/advanced/src/advanced_omi_backend/workers/annotation_jobs.py
@@ -4,14 +4,14 @@
 These jobs run periodically via the cron scheduler to:
 1. Surface potential errors in transcripts and memories for user review
 2. Fine-tune error detection models using accepted/rejected annotations
-
-TODO: Implement actual LLM-based error detection and model training logic.
 """
 
+import json
 import logging
 from datetime import datetime, timedelta, timezone
 from typing import List
 
+from advanced_omi_backend.llm_client import async_generate
 from advanced_omi_backend.models.annotation import (
     Annotation,
     AnnotationSource,
@@ -20,100 +20,182 @@
 )
 from advanced_omi_backend.models.conversation import Conversation
 from advanced_omi_backend.models.user import User
+from advanced_omi_backend.prompt_registry import get_prompt_registry
 
 logger = logging.getLogger(__name__)
 
+LOOKBACK_DAYS = 7
+MAX_SEGMENTS_PER_PROMPT = 30
+MAX_SUGGESTIONS_PER_RUN = 50
+
+PROMPT_ID = "annotation.transcript_error_detection"
+
 
 async def surface_error_suggestions():
     """
-    Generate AI suggestions for potential transcript/memory errors.
-    Runs daily, creates PENDING annotations for user review.
+    Generate AI suggestions for potential transcript errors.
 
-    This is a PLACEHOLDER implementation. To fully implement:
-    1. Query recent transcripts and memories (last N days)
-    2. Use LLM to analyze content for potential errors:
-       - Hallucinations (made-up facts)
-       - Misheard words (audio transcription errors)
-       - Grammar/spelling issues
-       - Inconsistencies with other memories
-    3. For each potential error:
-       - Create PENDING annotation with MODEL_SUGGESTION source
-       - Store original_text and suggested corrected_text
-    4. Users can review suggestions in UI (accept/reject)
-    5. Accepted suggestions improve future model accuracy
-
-    TODO: Implement LLM-based error detection logic.
+    Runs daily via cron. For each user, queries recent conversations
+    and uses the LLM to identify potential transcription errors.
+    Creates PENDING annotations with MODEL_SUGGESTION source for
+    user review in the swipe UI.
     """
-    logger.info("📝 Checking for annotation suggestions (placeholder)...")
+    logger.info("Checking for annotation suggestions...")
+    total_created = 0
 
     try:
-        # Get all users
         users = await User.find_all().to_list()
-        logger.info(f"   Found {len(users)} users to analyze")
+        logger.info(f"Found {len(users)} users to analyze")
 
         for user in users:
-            # TODO: Query recent conversations for this user (last 7 days)
-            # recent_conversations = await Conversation.find(
-            #     Conversation.user_id == str(user.id),
-            #     Conversation.created_at >= datetime.now(timezone.utc) - timedelta(days=7)
-            # ).to_list()
-
-            # TODO: For each conversation, analyze transcripts
-            # for conversation in recent_conversations:
-            #     active_transcript = conversation.get_active_transcript()
-            #     if not active_transcript:
-            #         continue
-            #
-            #     # TODO: Use LLM to identify potential errors
-            #     # suggestions = await llm_provider.analyze_transcript_for_errors(
-            #     #     segments=active_transcript.segments,
-            #     #     context=conversation.summary
-            #     # )
-            #
-            #     # TODO: Create PENDING annotations for each suggestion
-            #     # for suggestion in suggestions:
-            #     #     annotation = Annotation(
-            #     #         annotation_type=AnnotationType.TRANSCRIPT,
-            #     #         user_id=str(user.id),
-            #     #         conversation_id=conversation.conversation_id,
-            #     #         segment_index=suggestion.segment_index,
-            #     #         original_text=suggestion.original_text,
-            #     #         corrected_text=suggestion.suggested_text,
-            #     #         source=AnnotationSource.MODEL_SUGGESTION,
-            #     #         status=AnnotationStatus.PENDING
-            #     #     )
-            #     #     await annotation.save()
-
-            # TODO: Query recent memories for this user
-            # recent_memories = await memory_service.get_recent_memories(
-            #     user_id=str(user.id),
-            #     days=7
-            # )
-
-            # TODO: Use LLM to identify potential errors in memories
-            # for memory in recent_memories:
-            #     # TODO: Analyze memory content for hallucinations/errors
-            #     # suggestions = await llm_provider.analyze_memory_for_errors(
-            #     #     content=memory.content,
-            #     #     metadata=memory.metadata
-            #     # )
-            #
-            #     # TODO: Create PENDING annotations
-            #     # ...
-
-            # Placeholder logging
-            logger.debug(f"   Analyzed user {user.id} (placeholder)")
-
-        logger.info("✅ Suggestion check complete (placeholder implementation)")
-        logger.info(
-            "   ℹ️  TODO: Implement LLM-based error detection to create actual suggestions"
-        )
+            user_id = str(user.id)
+            cutoff = datetime.now(timezone.utc) - timedelta(days=LOOKBACK_DAYS)
+
+            recent_conversations = await Conversation.find(
+                Conversation.user_id == user_id,
+                Conversation.created_at >= cutoff,
+                Conversation.deleted != True,
+            ).to_list()
+
+            if not recent_conversations:
+                logger.info(
+                    f"User {user.email or user_id}: no recent conversations, skipping"
+                )
+                continue
+
+            logger.info(
+                f"User {user.email or user_id}: {len(recent_conversations)} conversations in last {LOOKBACK_DAYS} days"
+            )
+
+            # Get conversation IDs that already have pending model suggestions
+            existing = await Annotation.find(
+                Annotation.user_id == user_id,
+                Annotation.source == AnnotationSource.MODEL_SUGGESTION,
+                Annotation.status == AnnotationStatus.PENDING,
+            ).to_list()
+            skip_conversation_ids = {
+                a.conversation_id for a in existing if a.conversation_id
+            }
+            if skip_conversation_ids:
+                logger.info(
+                    f"  Skipping {len(skip_conversation_ids)} conversations with existing pending suggestions"
+                )
+
+            created_for_user = 0
+            for conversation in recent_conversations:
+                if total_created >= MAX_SUGGESTIONS_PER_RUN:
+                    logger.info(
+                        f"  Reached max suggestions per run ({MAX_SUGGESTIONS_PER_RUN}), stopping"
+                    )
+                    break
+                if conversation.conversation_id in skip_conversation_ids:
+                    continue
+
+                active_transcript = conversation.active_transcript
+                if not active_transcript or not active_transcript.segments:
+                    logger.debug(
+                        f"  Conversation '{conversation.title or conversation.conversation_id}': no transcript/segments, skipping"
+                    )
+                    continue
+
+                seg_count = len(active_transcript.segments)
+                logger.info(
+                    f"  Analyzing '{conversation.title or 'Untitled'}' "
+                    f"({seg_count} segments, id={conversation.conversation_id[:8]}...)"
+                )
+
+                suggestions = await _analyze_transcript(conversation, active_transcript)
+
+                if not suggestions:
+                    logger.info(f"    No issues found")
+                else:
+                    logger.info(f"    LLM found {len(suggestions)} potential issues")
+
+                for suggestion in suggestions:
+                    if total_created >= MAX_SUGGESTIONS_PER_RUN:
+                        break
+
+                    seg_idx = suggestion.get("segment_index")
+                    if seg_idx is None or seg_idx >= len(active_transcript.segments):
+                        logger.debug(f"    Skipping invalid segment_index={seg_idx}")
+                        continue
+
+                    annotation = Annotation(
+                        annotation_type=AnnotationType.TRANSCRIPT,
+                        user_id=user_id,
+                        conversation_id=conversation.conversation_id,
+                        segment_index=seg_idx,
+                        original_text=suggestion.get("original_text", ""),
+                        corrected_text=suggestion.get("corrected_text", ""),
+                        source=AnnotationSource.MODEL_SUGGESTION,
+                        status=AnnotationStatus.PENDING,
+                    )
+                    await annotation.save()
+                    total_created += 1
+                    created_for_user += 1
+                    logger.info(
+                        f"    Created suggestion: segment {seg_idx} - "
+                        f"'{suggestion.get('reason', 'unknown')}'"
+                    )
+
+            logger.info(
+                f"User {user.email or user_id}: {created_for_user} suggestions created"
+            )
+
+        logger.info(f"Suggestion check complete: {total_created} annotations created")
 
     except Exception as e:
-        logger.error(f"❌ Error in surface_error_suggestions: {e}", exc_info=True)
+        logger.error(f"Error in surface_error_suggestions: {e}", exc_info=True)
         raise
 
 
+async def _analyze_transcript(conversation, transcript) -> list[dict]:
+    """Use LLM to analyze a transcript for potential errors."""
+    segments = transcript.segments[:MAX_SEGMENTS_PER_PROMPT]
+    segments_text = "\n".join(
+        f"{i}: {seg.speaker} - {seg.text}"
+        for i, seg in enumerate(segments)
+        if seg.text.strip()
+    )
+
+    if not segments_text:
+        logger.debug(f"    No non-empty segments to analyze")
+        return []
+
+    registry = get_prompt_registry()
+    prompt = await registry.get_prompt(
+        PROMPT_ID,
+        title=conversation.title or "Untitled",
+        segments_text=segments_text,
+    )
+
+    try:
+        logger.debug(f"    Sending {len(segments)} segments to LLM for analysis...")
+        response = await async_generate(prompt)
+        logger.debug(f"    LLM response length: {len(response)} chars")
+        # Parse JSON from response, handling markdown code blocks
+        text = response.strip()
+        if text.startswith("```"):
+            text = text.split("\n", 1)[1] if "\n" in text else text[3:]
+            text = text.rsplit("```", 1)[0]
+        suggestions = json.loads(text)
+        if not isinstance(suggestions, list):
+            logger.warning(f"    LLM returned non-list response, ignoring")
+            return []
+        return suggestions
+    except json.JSONDecodeError as e:
+        logger.warning(
+            f"    Failed to parse LLM JSON for '{conversation.title or conversation.conversation_id}': {e}"
+        )
+        logger.debug(f"    Raw LLM response: {response[:500]}")
+        return []
+    except Exception as e:
+        logger.warning(
+            f"    LLM call failed for '{conversation.title or conversation.conversation_id}': {e}"
+        )
+        return []
+
+
 async def finetune_hallucination_model():
     """
     Fine-tune error detection model using accepted/rejected annotations.
@@ -199,15 +281,11 @@ async def finetune_hallucination_model():
 
         # Calculate acceptance rate
         if accepted_count + rejected_count > 0:
-            acceptance_rate = (
-                accepted_count / (accepted_count + rejected_count)
-            ) * 100
+            acceptance_rate = (accepted_count / (accepted_count + rejected_count)) * 100
             logger.info(f"   Suggestion acceptance rate: {acceptance_rate:.1f}%")
 
         logger.info("✅ Training check complete (placeholder implementation)")
-        logger.info(
-            "   ℹ️  TODO: Implement model fine-tuning using user feedback data"
-        )
+        logger.info("   ℹ️  TODO: Implement model fine-tuning using user feedback data")
 
     except Exception as e:
         logger.error(f"❌ Error in finetune_hallucination_model: {e}", exc_info=True)
@@ -216,6 +294,7 @@ async def finetune_hallucination_model():
 
 # Additional helper functions for future implementation
 
+
 async def analyze_common_error_patterns() -> List[dict]:
     """
     Analyze accepted annotations to identify common error patterns.
diff --git a/backends/advanced/src/advanced_omi_backend/workers/conversation_jobs.py b/backends/advanced/src/advanced_omi_backend/workers/conversation_jobs.py
index 2142ce07..5f7487e5 100644
--- a/backends/advanced/src/advanced_omi_backend/workers/conversation_jobs.py
+++ b/backends/advanced/src/advanced_omi_backend/workers/conversation_jobs.py
@@ -24,7 +24,7 @@
 from advanced_omi_backend.observability.otel_setup import set_otel_session
 from advanced_omi_backend.plugins.events import PluginEvent
 from advanced_omi_backend.services.plugin_service import (
-    ensure_plugin_router,
+    dispatch_plugin_event,
     get_plugin_router,
 )
 from advanced_omi_backend.utils.conversation_utils import (
@@ -35,6 +35,7 @@
     track_speech_activity,
     update_job_progress_metadata,
 )
+from advanced_omi_backend.utils.job_utils import update_job_meta
 
 logger = logging.getLogger(__name__)
 
@@ -1161,27 +1162,16 @@ async def generate_title_summary_job(
     processing_time = time.time() - start_time
 
     # Update job metadata
-    from rq import get_current_job
-
-    current_job = get_current_job()
-    if current_job:
-        if not current_job.meta:
-            current_job.meta = {}
-        current_job.meta.update(
-            {
-                "conversation_id": conversation_id,
-                "title": conversation.title,
-                "summary": conversation.summary,
-                "detailed_summary_length": (
-                    len(conversation.detailed_summary)
-                    if conversation.detailed_summary
-                    else 0
-                ),
-                "segment_count": len(segments),
-                "processing_time": processing_time,
-            }
-        )
-        current_job.save_meta()
+    update_job_meta(
+        conversation_id=conversation_id,
+        title=conversation.title,
+        summary=conversation.summary,
+        detailed_summary_length=(
+            len(conversation.detailed_summary) if conversation.detailed_summary else 0
+        ),
+        segment_count=len(segments),
+        processing_time=processing_time,
+    )
 
     logger.info(
         f"✅ Title/summary generation completed for {conversation_id} in {processing_time:.2f}s"
@@ -1264,64 +1254,25 @@ async def dispatch_conversation_complete_event_job(
     user_email = user.email if user else ""
 
     # Prepare plugin event data (same format as open_conversation_job)
+    actual_end_reason = end_reason or "file_upload"
     try:
-        plugin_router = await ensure_plugin_router()
-
-        # CRITICAL CHECK: Fail loudly if no router
-        if not plugin_router:
-            error_msg = (
-                f"❌ Plugin router could not be initialized in worker process. "
-                f"conversation.complete event for {conversation_id[:12]} will NOT be dispatched!"
-            )
-            logger.error(error_msg)
-
-            return {
-                "success": False,
-                "skipped": True,
-                "reason": "No plugin router",
-                "conversation_id": conversation_id,
-                "error": error_msg,
-            }
-
-        plugin_data = {
-            "conversation": {
-                "client_id": client_id,
-                "user_id": user_id,
-            },
-            "transcript": conversation.transcript if conversation else "",
-            "duration": 0,  # Duration not tracked for file uploads
-            "conversation_id": conversation_id,
-        }
-
-        # Use provided end_reason or default to 'file_upload' for backward compatibility
-        actual_end_reason = end_reason or "file_upload"
-
-        logger.info(
-            f"🔌 DISPATCH: conversation.complete event for {conversation_id[:12]} "
-            f"(end_reason={actual_end_reason}, user={user_id}, client={client_id})"
-        )
-
-        plugin_results = await plugin_router.dispatch_event(
+        plugin_results = await dispatch_plugin_event(
             event=PluginEvent.CONVERSATION_COMPLETE,
             user_id=user_id,
-            data=plugin_data,
+            data={
+                "conversation": {
+                    "client_id": client_id,
+                    "user_id": user_id,
+                },
+                "transcript": conversation.transcript if conversation else "",
+                "duration": 0,  # Duration not tracked for file uploads
+                "conversation_id": conversation_id,
+            },
             metadata={"end_reason": actual_end_reason},
+            description=f"conversation={conversation_id[:12]}, end_reason={actual_end_reason}",
+            require_router=True,
         )
 
-        logger.info(
-            f"🔌 RESULT: conversation.complete dispatched to {len(plugin_results) if plugin_results else 0} plugins"
-        )
-        if plugin_results:
-            logger.info(
-                f"📌 Triggered {len(plugin_results)} conversation-level plugins"
-            )
-            for result in plugin_results:
-                logger.info(
-                    f"   Plugin result: success={result.success}, message={result.message}"
-                )
-                if result.message:
-                    logger.info(f"  Plugin result: {result.message}")
-
         processing_time = time.time() - start_time
         logger.info(
             f"✅ Conversation complete event dispatched for {conversation_id} in {processing_time:.2f}s"
@@ -1334,6 +1285,15 @@ async def dispatch_conversation_complete_event_job(
             "processing_time_seconds": processing_time,
         }
 
+    except RuntimeError as e:
+        logger.error(f"❌ {e}")
+        return {
+            "success": False,
+            "skipped": True,
+            "reason": "No plugin router",
+            "conversation_id": conversation_id,
+            "error": str(e),
+        }
     except Exception as e:
         logger.warning(f"⚠️ Error dispatching conversation complete event: {e}")
         return {
diff --git a/backends/advanced/src/advanced_omi_backend/workers/memory_jobs.py b/backends/advanced/src/advanced_omi_backend/workers/memory_jobs.py
index 492dc650..1fcfe510 100644
--- a/backends/advanced/src/advanced_omi_backend/workers/memory_jobs.py
+++ b/backends/advanced/src/advanced_omi_backend/workers/memory_jobs.py
@@ -27,7 +27,7 @@
     set_otel_session,
 )
 from advanced_omi_backend.plugins.events import PluginEvent
-from advanced_omi_backend.services.plugin_service import ensure_plugin_router
+from advanced_omi_backend.services.plugin_service import dispatch_plugin_event
 
 logger = logging.getLogger(__name__)
 
@@ -391,11 +391,12 @@ async def process_memory_job(
                 logger.warning(f"⚠️ Knowledge graph extraction failed (non-fatal): {e}")
 
             # Trigger memory-level plugins (ALWAYS dispatch when success, even with 0 new memories)
+            memory_count = len(created_memory_ids) if created_memory_ids else 0
             try:
-                plugin_router = await ensure_plugin_router()
-
-                if plugin_router:
-                    plugin_data = {
+                await dispatch_plugin_event(
+                    event=PluginEvent.MEMORY_PROCESSED,
+                    user_id=user_id,
+                    data={
                         "memories": created_memory_ids or [],
                         "conversation": {
                             "conversation_id": conversation_id,
@@ -403,39 +404,15 @@ async def process_memory_job(
                             "user_id": user_id,
                             "user_email": user_email,
                         },
-                        "memory_count": (
-                            len(created_memory_ids) if created_memory_ids else 0
-                        ),
+                        "memory_count": memory_count,
                         "conversation_id": conversation_id,
-                    }
-
-                    logger.info(
-                        f"🔌 DISPATCH: memory.processed event "
-                        f"(conversation={conversation_id[:12]}, memories={len(created_memory_ids) if created_memory_ids else 0})"
-                    )
-
-                    plugin_results = await plugin_router.dispatch_event(
-                        event=PluginEvent.MEMORY_PROCESSED,
-                        user_id=user_id,
-                        data=plugin_data,
-                        metadata={
-                            "processing_time": processing_time,
-                            "memory_provider": memory_provider,
-                        },
-                    )
-
-                    logger.info(
-                        f"🔌 RESULT: memory.processed dispatched to {len(plugin_results) if plugin_results else 0} plugins"
-                    )
-
-                    if plugin_results:
-                        logger.info(
-                            f"📌 Triggered {len(plugin_results)} memory-level plugins"
-                        )
-                        for result in plugin_results:
-                            if result.message:
-                                logger.info(f"  Plugin result: {result.message}")
-
+                    },
+                    metadata={
+                        "processing_time": processing_time,
+                        "memory_provider": memory_provider,
+                    },
+                    description=f"conversation={conversation_id[:12]}, memories={memory_count}",
+                )
             except Exception as e:
                 logger.warning(f"⚠️ Error triggering memory-level plugins: {e}")
 
diff --git a/backends/advanced/src/advanced_omi_backend/workers/speaker_jobs.py b/backends/advanced/src/advanced_omi_backend/workers/speaker_jobs.py
index 0a4192fb..1576d9eb 100644
--- a/backends/advanced/src/advanced_omi_backend/workers/speaker_jobs.py
+++ b/backends/advanced/src/advanced_omi_backend/workers/speaker_jobs.py
@@ -12,22 +12,17 @@
 from advanced_omi_backend.auth import generate_jwt_for_user
 from advanced_omi_backend.models.conversation import Conversation
 from advanced_omi_backend.models.job import async_job
-from advanced_omi_backend.services.audio_stream import (
-    TranscriptionResultsAggregator,
-)
+from advanced_omi_backend.services.audio_stream import TranscriptionResultsAggregator
 from advanced_omi_backend.speaker_recognition_client import SpeakerRecognitionClient
 from advanced_omi_backend.users import get_user_by_id
+from advanced_omi_backend.utils.job_utils import update_job_meta
 
 logger = logging.getLogger(__name__)
 
 
 @async_job(redis=True, beanie=True)
 async def check_enrolled_speakers_job(
-    session_id: str,
-    user_id: str,
-    client_id: str,
-    *,
-    redis_client=None
+    session_id: str, user_id: str, client_id: str, *, redis_client=None
 ) -> Dict[str, Any]:
     """
     Check if any enrolled speakers are present in the current audio stream.
@@ -54,19 +49,23 @@ async def check_enrolled_speakers_job(
 
     # Check for enrolled speakers
     speaker_client = SpeakerRecognitionClient()
-    enrolled_present, speaker_result = await speaker_client.check_if_enrolled_speaker_present(
-        redis_client=redis_client,
-        client_id=client_id,
-        session_id=session_id,
-        user_id=user_id,
-        transcription_results=raw_results
+    enrolled_present, speaker_result = (
+        await speaker_client.check_if_enrolled_speaker_present(
+            redis_client=redis_client,
+            client_id=client_id,
+            session_id=session_id,
+            user_id=user_id,
+            transcription_results=raw_results,
+        )
     )
 
     # Check for errors from speaker service
     if speaker_result and speaker_result.get("error"):
         error_type = speaker_result.get("error")
         error_message = speaker_result.get("message", "Unknown error")
-        logger.error(f"🎤 [SPEAKER CHECK] Speaker service error: {error_type} - {error_message}")
+        logger.error(
+            f"🎤 [SPEAKER CHECK] Speaker service error: {error_type} - {error_message}"
+        )
 
         # For connection failures, assume no enrolled speakers but allow conversation to proceed
         # Speaker filtering is optional - if service is down, conversation should still be created
@@ -82,7 +81,7 @@ async def check_enrolled_speakers_job(
                 "enrolled_present": False,
                 "identified_speakers": [],
                 "skip_reason": f"Speaker service unavailable: {error_type}",
-                "processing_time_seconds": time.time() - start_time
+                "processing_time_seconds": time.time() - start_time,
             }
 
         # For other processing errors, also assume no enrolled speakers
@@ -93,7 +92,7 @@ async def check_enrolled_speakers_job(
             "error_details": error_message,
             "enrolled_present": False,
             "identified_speakers": [],
-            "processing_time_seconds": time.time() - start_time
+            "processing_time_seconds": time.time() - start_time,
         }
 
     # Extract identified speakers
@@ -101,31 +100,31 @@ async def check_enrolled_speakers_job(
     if speaker_result and "segments" in speaker_result:
         for seg in speaker_result["segments"]:
             identified_as = seg.get("identified_as")
-            if identified_as and identified_as != "Unknown" and identified_as not in identified_speakers:
+            if (
+                identified_as
+                and identified_as != "Unknown"
+                and identified_as not in identified_speakers
+            ):
                 identified_speakers.append(identified_as)
 
     processing_time = time.time() - start_time
 
     if enrolled_present:
-        logger.info(f"✅ Enrolled speaker(s) found: {', '.join(identified_speakers)} ({processing_time:.2f}s)")
+        logger.info(
+            f"✅ Enrolled speaker(s) found: {', '.join(identified_speakers)} ({processing_time:.2f}s)"
+        )
     else:
         logger.info(f"⏭️ No enrolled speakers found ({processing_time:.2f}s)")
 
     # Update job metadata for timeline tracking
-    from rq import get_current_job
-    current_job = get_current_job()
-    if current_job:
-        if not current_job.meta:
-            current_job.meta = {}
-        current_job.meta.update({
-            "session_id": session_id,
-            "client_id": client_id,
-            "enrolled_present": enrolled_present,
-            "identified_speakers": identified_speakers,
-            "speaker_count": len(identified_speakers),
-            "processing_time": processing_time
-        })
-        current_job.save_meta()
+    update_job_meta(
+        session_id=session_id,
+        client_id=client_id,
+        enrolled_present=enrolled_present,
+        identified_speakers=identified_speakers,
+        speaker_count=len(identified_speakers),
+        processing_time=processing_time,
+    )
 
     return {
         "success": True,
@@ -133,7 +132,7 @@ async def check_enrolled_speakers_job(
         "enrolled_present": enrolled_present,
         "identified_speakers": identified_speakers,
         "speaker_result": speaker_result,
-        "processing_time_seconds": processing_time
+        "processing_time_seconds": processing_time,
     }
 
 
@@ -144,7 +143,7 @@ async def recognise_speakers_job(
     transcript_text: str = "",
     words: list = None,
     *,
-    redis_client=None
+    redis_client=None,
 ) -> Dict[str, Any]:
     """
     RQ job function for identifying speakers in a transcribed conversation.
@@ -168,12 +167,16 @@ async def recognise_speakers_job(
         Dict with processing results
     """
 
-    logger.info(f"🎤 RQ: Starting speaker recognition for conversation {conversation_id}")
+    logger.info(
+        f"🎤 RQ: Starting speaker recognition for conversation {conversation_id}"
+    )
 
     start_time = time.time()
 
     # Get the conversation
-    conversation = await Conversation.find_one(Conversation.conversation_id == conversation_id)
+    conversation = await Conversation.find_one(
+        Conversation.conversation_id == conversation_id
+    )
     if not conversation:
         logger.error(f"Conversation {conversation_id} not found")
         return {"success": False, "error": "Conversation not found"}
@@ -201,7 +204,7 @@ async def recognise_speakers_job(
             "conversation_id": conversation_id,
             "version_id": version_id,
             "speaker_recognition_enabled": False,
-            "processing_time_seconds": 0
+            "processing_time_seconds": 0,
         }
 
     # Get provider capabilities from metadata
@@ -222,7 +225,9 @@ async def recognise_speakers_job(
 
         # If we have existing segments from provider, proceed to identification
         if transcript_version.segments:
-            logger.info(f"🎤 Using {len(transcript_version.segments)} segments from provider")
+            logger.info(
+                f"🎤 Using {len(transcript_version.segments)} segments from provider"
+            )
             # Continue to speaker identification below (after this block)
         else:
             logger.warning(f"🎤 Provider claimed diarization but no segments found")
@@ -237,32 +242,35 @@ async def recognise_speakers_job(
     if not actual_words and transcript_version.words:
         # Convert Word objects to dicts for speaker service API
         actual_words = [
-            {
-                "word": w.word,
-                "start": w.start,
-                "end": w.end,
-                "confidence": w.confidence
-            }
+            {"word": w.word, "start": w.start, "end": w.end, "confidence": w.confidence}
             for w in transcript_version.words
         ]
-        logger.info(f"🔤 Loaded {len(actual_words)} words from transcript version.words field")
+        logger.info(
+            f"🔤 Loaded {len(actual_words)} words from transcript version.words field"
+        )
     # Backward compatibility: Fall back to metadata if words field is empty (old data)
     elif not actual_words and transcript_version.metadata.get("words"):
         actual_words = transcript_version.metadata.get("words", [])
-        logger.info(f"🔤 Loaded {len(actual_words)} words from transcript version metadata (legacy)")
+        logger.info(
+            f"🔤 Loaded {len(actual_words)} words from transcript version metadata (legacy)"
+        )
     # Backward compatibility: Extract from segments if that's all we have (old streaming data)
     elif not actual_words and transcript_version.segments:
         for segment in transcript_version.segments:
             if segment.words:
                 for w in segment.words:
-                    actual_words.append({
-                        "word": w.word,
-                        "start": w.start,
-                        "end": w.end,
-                        "confidence": w.confidence
-                    })
+                    actual_words.append(
+                        {
+                            "word": w.word,
+                            "start": w.start,
+                            "end": w.end,
+                            "confidence": w.confidence,
+                        }
+                    )
         if actual_words:
-            logger.info(f"🔤 Extracted {len(actual_words)} words from segments (legacy)")
+            logger.info(
+                f"🔤 Extracted {len(actual_words)} words from segments (legacy)"
+            )
 
     if not actual_transcript_text:
         logger.warning(f"🎤 No transcript text found in version {version_id}")
@@ -271,7 +279,7 @@ async def recognise_speakers_job(
             "conversation_id": conversation_id,
             "version_id": version_id,
             "error": "No transcript text available",
-            "processing_time_seconds": 0
+            "processing_time_seconds": 0,
         }
 
     # Check if we can run pyannote diarization
@@ -290,7 +298,7 @@ async def recognise_speakers_job(
                 "conversation_id": conversation_id,
                 "version_id": version_id,
                 "error": "No word timestamps and no segments available",
-                "processing_time_seconds": time.time() - start_time
+                "processing_time_seconds": time.time() - start_time,
             }
         # Has existing segments - fall through to run identification on them
         logger.info(
@@ -303,6 +311,7 @@ async def recognise_speakers_job(
     # 1. Config toggle (per_segment_speaker_id) enables per-segment globally
     # 2. Manual reprocess trigger also enables per-segment for that run
     from advanced_omi_backend.config import get_misc_settings
+
     misc_config = get_misc_settings()
     per_segment_config = misc_config.get("per_segment_speaker_id", False)
 
@@ -323,7 +332,11 @@ async def recognise_speakers_job(
             # Have existing segments and can't/shouldn't run pyannote - do identification only
             # Covers: provider already diarized, no word timestamps but segments exist, etc.
             # Only send speech segments for identification; skip event/note segments
-            speech_segments = [s for s in transcript_version.segments if getattr(s, 'segment_type', 'speech') == 'speech']
+            speech_segments = [
+                s
+                for s in transcript_version.segments
+                if getattr(s, "segment_type", "speech") == "speech"
+            ]
             logger.info(
                 f"🎤 Using segment-level speaker identification on {len(speech_segments)} speech segments "
                 f"(skipped {len(transcript_version.segments) - len(speech_segments)} non-speech)"
@@ -341,10 +354,7 @@ async def recognise_speakers_job(
             )
         else:
             # Standard path: full diarization + identification via speaker service
-            transcript_data = {
-                "text": actual_transcript_text,
-                "words": actual_words
-            }
+            transcript_data = {"text": actual_transcript_text, "words": actual_words}
 
             # Generate backend token for speaker service to fetch audio
             try:
@@ -356,35 +366,41 @@ async def recognise_speakers_job(
                         "conversation_id": conversation_id,
                         "version_id": version_id,
                         "error": "User not found",
-                        "processing_time_seconds": time.time() - start_time
+                        "processing_time_seconds": time.time() - start_time,
                     }
 
                 backend_token = generate_jwt_for_user(user_id, user.email)
                 logger.info(f"🔐 Generated backend token for speaker service")
 
             except Exception as token_error:
-                logger.error(f"Failed to generate backend token: {token_error}", exc_info=True)
+                logger.error(
+                    f"Failed to generate backend token: {token_error}", exc_info=True
+                )
                 return {
                     "success": False,
                     "conversation_id": conversation_id,
                     "version_id": version_id,
                     "error": f"Token generation failed: {token_error}",
-                    "processing_time_seconds": time.time() - start_time
+                    "processing_time_seconds": time.time() - start_time,
                 }
 
-            logger.info(f"🎤 Calling speaker recognition service with conversation_id...")
+            logger.info(
+                f"🎤 Calling speaker recognition service with conversation_id..."
+            )
             speaker_result = await speaker_client.diarize_identify_match(
                 conversation_id=conversation_id,
                 backend_token=backend_token,
                 transcript_data=transcript_data,
-                user_id=user_id
+                user_id=user_id,
             )
 
         # Check for errors from speaker service
         if speaker_result.get("error"):
             error_type = speaker_result.get("error")
             error_message = speaker_result.get("message", "Unknown error")
-            logger.error(f"🎤 Speaker recognition service error: {error_type} - {error_message}")
+            logger.error(
+                f"🎤 Speaker recognition service error: {error_type} - {error_message}"
+            )
 
             # Connection/timeout errors → skip gracefully (existing behavior)
             if error_type in ("connection_failed", "timeout", "client_error"):
@@ -401,7 +417,7 @@ async def recognise_speakers_job(
                     "identified_speakers": [],
                     "skip_reason": f"Speaker service unavailable: {error_type}",
                     "error_type": error_type,
-                    "processing_time_seconds": time.time() - start_time
+                    "processing_time_seconds": time.time() - start_time,
                 }
 
             # Validation errors → fail job, don't retry
@@ -414,7 +430,7 @@ async def recognise_speakers_job(
                     "error": f"Validation error: {error_message}",
                     "error_type": error_type,
                     "retryable": False,  # Don't retry validation errors
-                    "processing_time_seconds": time.time() - start_time
+                    "processing_time_seconds": time.time() - start_time,
                 }
 
             # Resource errors → fail job, can retry later
@@ -427,7 +443,7 @@ async def recognise_speakers_job(
                     "error": f"Resource error: {error_message}",
                     "error_type": error_type,
                     "retryable": True,  # Can retry later when resources available
-                    "processing_time_seconds": time.time() - start_time
+                    "processing_time_seconds": time.time() - start_time,
                 }
 
             # Unknown errors → fail job
@@ -439,11 +455,15 @@ async def recognise_speakers_job(
                     "error": f"Speaker recognition failed: {error_type}",
                     "error_details": error_message,
                     "error_type": error_type,
-                    "processing_time_seconds": time.time() - start_time
+                    "processing_time_seconds": time.time() - start_time,
                 }
 
         # Service worked but found no segments (legitimate empty result)
-        if not speaker_result or "segments" not in speaker_result or not speaker_result["segments"]:
+        if (
+            not speaker_result
+            or "segments" not in speaker_result
+            or not speaker_result["segments"]
+        ):
             logger.warning(f"🎤 Speaker recognition returned no segments")
             return {
                 "success": True,
@@ -451,7 +471,7 @@ async def recognise_speakers_job(
                 "version_id": version_id,
                 "speaker_recognition_enabled": True,
                 "identified_speakers": [],
-                "processing_time_seconds": time.time() - start_time
+                "processing_time_seconds": time.time() - start_time,
             }
 
         speaker_segments = speaker_result["segments"]
@@ -486,12 +506,16 @@ async def recognise_speakers_job(
                 continue
 
             # Skip segments with invalid structure
-            if not isinstance(seg.get("start"), (int, float)) or not isinstance(seg.get("end"), (int, float)):
+            if not isinstance(seg.get("start"), (int, float)) or not isinstance(
+                seg.get("end"), (int, float)
+            ):
                 empty_segment_count += 1
                 logger.debug(f"Filtered segment with invalid timing: {seg}")
                 continue
 
-            speaker_name = seg.get("identified_as") or unknown_label_map.get(seg.get("speaker", "Unknown"), "Unknown Speaker")
+            speaker_name = seg.get("identified_as") or unknown_label_map.get(
+                seg.get("speaker", "Unknown"), "Unknown Speaker"
+            )
 
             # Extract words from speaker service response (already matched to this segment)
             words_data = seg.get("words", [])
@@ -500,13 +524,14 @@ async def recognise_speakers_job(
                     word=w.get("word", ""),
                     start=w.get("start", 0.0),
                     end=w.get("end", 0.0),
-                    confidence=w.get("confidence")
+                    confidence=w.get("confidence"),
                 )
                 for w in words_data
             ]
 
             # Classify segment type from content
             from advanced_omi_backend.utils.segment_utils import classify_segment_text
+
             seg_classification = classify_segment_text(text)
             seg_type = "event" if seg_classification == "event" else "speech"
 
@@ -519,18 +544,21 @@ async def recognise_speakers_job(
                     segment_type=seg_type,
                     identified_as=seg.get("identified_as"),
                     confidence=seg.get("confidence"),
-                    words=segment_words  # Use words from speaker service
+                    words=segment_words,  # Use words from speaker service
                 )
             )
 
         if empty_segment_count > 0:
-            logger.info(f"🔇 Filtered out {empty_segment_count} empty segments from speaker recognition")
+            logger.info(
+                f"🔇 Filtered out {empty_segment_count} empty segments from speaker recognition"
+            )
 
         # Re-insert non-speech segments (event/note) that were skipped during identification
         # They need to be merged back into position based on timestamps
         non_speech_segments = [
-            s for s in transcript_version.segments
-            if getattr(s, 'segment_type', 'speech') != 'speech'
+            s
+            for s in transcript_version.segments
+            if getattr(s, "segment_type", "speech") != "speech"
         ]
         if non_speech_segments:
             for ns_seg in non_speech_segments:
@@ -541,7 +569,9 @@ async def recognise_speakers_job(
                         insert_pos = i
                         break
                 updated_segments.insert(insert_pos, ns_seg)
-            logger.info(f"🎤 Re-inserted {len(non_speech_segments)} non-speech segments")
+            logger.info(
+                f"🎤 Re-inserted {len(non_speech_segments)} non-speech segments"
+            )
 
         # Update the transcript version
         transcript_version.segments = updated_segments
@@ -559,24 +589,31 @@ async def recognise_speakers_job(
 
         sr_metadata = {
             "enabled": True,
-            "identification_mode": "per_segment" if use_per_segment else "majority_vote",
+            "identification_mode": (
+                "per_segment" if use_per_segment else "majority_vote"
+            ),
             "identified_speakers": list(identified_speakers),
             "speaker_count": len(identified_speakers),
             "total_segments": len(speaker_segments),
-            "processing_time_seconds": time.time() - start_time
+            "processing_time_seconds": time.time() - start_time,
         }
         if speaker_result.get("partial_errors"):
             sr_metadata["partial_errors"] = speaker_result["partial_errors"]
         transcript_version.metadata["speaker_recognition"] = sr_metadata
 
         # Set diarization source if pyannote ran (provider didn't do diarization)
-        if not provider_has_diarization and transcript_version.diarization_source != "provider":
+        if (
+            not provider_has_diarization
+            and transcript_version.diarization_source != "provider"
+        ):
             transcript_version.diarization_source = "pyannote"
 
         await conversation.save()
 
         processing_time = time.time() - start_time
-        logger.info(f"✅ Speaker recognition completed for {conversation_id} in {processing_time:.2f}s")
+        logger.info(
+            f"✅ Speaker recognition completed for {conversation_id} in {processing_time:.2f}s"
+        )
 
         return {
             "success": True,
@@ -585,22 +622,18 @@ async def recognise_speakers_job(
             "speaker_recognition_enabled": True,
             "identified_speakers": list(identified_speakers),
             "segment_count": len(updated_segments),
-            "processing_time_seconds": processing_time
+            "processing_time_seconds": processing_time,
         }
 
     except asyncio.TimeoutError as e:
         logger.error(f"❌ Speaker recognition timeout: {e}")
 
         # Add timeout metadata to job
-        from rq import get_current_job
-        current_job = get_current_job()
-        if current_job:
-            current_job.meta.update({
-                "error_type": "timeout",
-                "audio_duration": conversation.audio_total_duration if conversation else None,
-                "timeout_occurred_at": time.time()
-            })
-            current_job.save_meta()
+        update_job_meta(
+            error_type="timeout",
+            audio_duration=conversation.audio_total_duration if conversation else None,
+            timeout_occurred_at=time.time(),
+        )
 
         return {
             "success": False,
@@ -608,13 +641,16 @@ async def recognise_speakers_job(
             "version_id": version_id,
             "error": "Speaker recognition timeout",
             "error_type": "timeout",
-            "audio_duration": conversation.audio_total_duration if conversation else None,
-            "processing_time_seconds": time.time() - start_time
+            "audio_duration": (
+                conversation.audio_total_duration if conversation else None
+            ),
+            "processing_time_seconds": time.time() - start_time,
         }
 
     except Exception as speaker_error:
         logger.error(f"❌ Speaker recognition failed: {speaker_error}")
         import traceback
+
         logger.debug(traceback.format_exc())
 
         return {
@@ -622,5 +658,5 @@ async def recognise_speakers_job(
             "conversation_id": conversation_id,
             "version_id": version_id,
             "error": str(speaker_error),
-            "processing_time_seconds": time.time() - start_time
+            "processing_time_seconds": time.time() - start_time,
         }
diff --git a/backends/advanced/src/advanced_omi_backend/workers/transcription_jobs.py b/backends/advanced/src/advanced_omi_backend/workers/transcription_jobs.py
index fb644ec1..a9e98c5f 100644
--- a/backends/advanced/src/advanced_omi_backend/workers/transcription_jobs.py
+++ b/backends/advanced/src/advanced_omi_backend/workers/transcription_jobs.py
@@ -10,10 +10,8 @@
 import logging
 import os
 import time
-import uuid
 import wave
 from datetime import datetime
-from pathlib import Path
 from typing import Any, Dict
 
 from beanie.operators import In
@@ -21,23 +19,18 @@
 from rq.exceptions import NoSuchJobError
 from rq.job import Job
 
-from advanced_omi_backend.config import (
-    get_backend_config,
-    get_transcription_job_timeout,
-)
+from advanced_omi_backend.config import get_transcription_job_timeout
 from advanced_omi_backend.controllers.queue_controller import (
     JOB_RESULT_TTL,
-    REDIS_URL,
-    redis_conn,
     start_post_conversation_jobs,
     transcription_queue,
 )
 from advanced_omi_backend.models.audio_chunk import AudioChunkDocument
 from advanced_omi_backend.models.conversation import Conversation
-from advanced_omi_backend.models.job import BaseRQJob, JobPriority, async_job
+from advanced_omi_backend.models.job import async_job
 from advanced_omi_backend.plugins.events import PluginEvent
 from advanced_omi_backend.services.audio_stream import TranscriptionResultsAggregator
-from advanced_omi_backend.services.plugin_service import ensure_plugin_router
+from advanced_omi_backend.services.plugin_service import dispatch_plugin_event
 from advanced_omi_backend.services.transcription import (
     get_transcription_provider,
     is_transcription_available,
@@ -50,6 +43,7 @@
     analyze_speech,
     mark_conversation_deleted,
 )
+from advanced_omi_backend.utils.job_utils import update_job_meta
 
 logger = logging.getLogger(__name__)
 
@@ -295,48 +289,21 @@ def _on_batch_progress(event: dict) -> None:
 
     # Trigger transcript-level plugins BEFORE speech validation
     # This ensures wake-word commands execute even if conversation gets deleted
-    logger.info(
-        f"🔍 DEBUG: About to trigger plugins - transcript_text exists: {bool(transcript_text)}"
-    )
     if transcript_text:
         try:
-            plugin_router = await ensure_plugin_router()
-
-            if plugin_router:
-                logger.info(
-                    f"🔍 DEBUG: Preparing to trigger transcript plugins for conversation {conversation_id}"
-                )
-                plugin_data = {
+            await dispatch_plugin_event(
+                event=PluginEvent.TRANSCRIPT_BATCH,
+                user_id=user_id,
+                data={
                     "transcript": transcript_text,
                     "segment_id": f"{conversation_id}_batch",
                     "conversation_id": conversation_id,
                     "segments": segments,
                     "word_count": len(words),
-                }
-
-                logger.info(
-                    f"🔌 DISPATCH: transcript.batch event "
-                    f"(conversation={conversation_id[:12]}, words={len(words)})"
-                )
-
-                plugin_results = await plugin_router.dispatch_event(
-                    event=PluginEvent.TRANSCRIPT_BATCH,
-                    user_id=user_id,
-                    data=plugin_data,
-                    metadata={"client_id": client_id},
-                )
-
-                logger.info(
-                    f"🔌 RESULT: transcript.batch dispatched to {len(plugin_results) if plugin_results else 0} plugins"
-                )
-
-                if plugin_results:
-                    logger.info(
-                        f"✅ Triggered {len(plugin_results)} transcript plugins in batch mode"
-                    )
-                    for result in plugin_results:
-                        if result.message:
-                            logger.info(f"  Plugin: {result.message}")
+                },
+                metadata={"client_id": client_id},
+                description=f"conversation={conversation_id[:12]}, words={len(words)}",
+            )
         except Exception as e:
             logger.exception(
                 f"⚠️ Error triggering transcript plugins in batch mode: {e}"
@@ -573,21 +540,14 @@ def _on_batch_progress(event: dict) -> None:
     )
 
     # Update job metadata with title and summary for UI display
-    current_job = get_current_job()
-    if current_job:
-        if not current_job.meta:
-            current_job.meta = {}
-        current_job.meta.update(
-            {
-                "conversation_id": conversation_id,
-                "title": conversation.title,
-                "summary": conversation.summary,
-                "transcript_length": len(transcript_text),
-                "word_count": len(words),
-                "processing_time": processing_time,
-            }
-        )
-        current_job.save_meta()
+    update_job_meta(
+        conversation_id=conversation_id,
+        title=conversation.title,
+        summary=conversation.summary,
+        transcript_length=len(transcript_text),
+        word_count=len(words),
+        processing_time=processing_time,
+    )
 
     return {
         "success": True,
@@ -972,18 +932,12 @@ async def stream_speech_detection_job(
     )
 
     # Update job metadata to show status
-    if current_job:
-        if not current_job.meta:
-            current_job.meta = {}
-        current_job.meta.update(
-            {
-                "status": "listening_for_speech",
-                "session_id": session_id,
-                "client_id": client_id,
-                "session_level": True,  # Mark as session-level job
-            }
-        )
-        current_job.save_meta()
+    update_job_meta(
+        status="listening_for_speech",
+        session_id=session_id,
+        client_id=client_id,
+        session_level=True,  # Mark as session-level job
+    )
 
     # Track when session closes for graceful shutdown
     session_closed_at = None
diff --git a/backends/advanced/webui/package-lock.json b/backends/advanced/webui/package-lock.json
index 54ca06ae..c3bd503e 100644
--- a/backends/advanced/webui/package-lock.json
+++ b/backends/advanced/webui/package-lock.json
@@ -18,6 +18,7 @@
         "d3-selection": "^3.0.0",
         "d3-time-format": "^4.1.0",
         "d3-zoom": "^3.0.0",
+        "framer-motion": "^11.0.0",
         "lucide-react": "^0.294.0",
         "react": "^18.2.0",
         "react-dom": "^18.2.0",
@@ -3355,6 +3356,33 @@
         "url": "https://github.com/sponsors/rawify"
       }
     },
+    "node_modules/framer-motion": {
+      "version": "11.18.2",
+      "resolved": "https://registry.npmjs.org/framer-motion/-/framer-motion-11.18.2.tgz",
+      "integrity": "sha512-5F5Och7wrvtLVElIpclDT0CBzMVg3dL22B64aZwHtsIY8RB4mXICLrkajK4G9R+ieSAGcgrLeae2SeUTg2pr6w==",
+      "license": "MIT",
+      "dependencies": {
+        "motion-dom": "^11.18.1",
+        "motion-utils": "^11.18.1",
+        "tslib": "^2.4.0"
+      },
+      "peerDependencies": {
+        "@emotion/is-prop-valid": "*",
+        "react": "^18.0.0 || ^19.0.0",
+        "react-dom": "^18.0.0 || ^19.0.0"
+      },
+      "peerDependenciesMeta": {
+        "@emotion/is-prop-valid": {
+          "optional": true
+        },
+        "react": {
+          "optional": true
+        },
+        "react-dom": {
+          "optional": true
+        }
+      }
+    },
     "node_modules/fs.realpath": {
       "version": "1.0.0",
       "resolved": "https://registry.npmjs.org/fs.realpath/-/fs.realpath-1.0.0.tgz",
@@ -4028,6 +4056,21 @@
         "node": ">=16 || 14 >=14.17"
       }
     },
+    "node_modules/motion-dom": {
+      "version": "11.18.1",
+      "resolved": "https://registry.npmjs.org/motion-dom/-/motion-dom-11.18.1.tgz",
+      "integrity": "sha512-g76KvA001z+atjfxczdRtw/RXOM3OMSdd1f4DL77qCTF/+avrRJiawSG4yDibEQ215sr9kpinSlX2pCTJ9zbhw==",
+      "license": "MIT",
+      "dependencies": {
+        "motion-utils": "^11.18.1"
+      }
+    },
+    "node_modules/motion-utils": {
+      "version": "11.18.1",
+      "resolved": "https://registry.npmjs.org/motion-utils/-/motion-utils-11.18.1.tgz",
+      "integrity": "sha512-49Kt+HKjtbJKLtgO/LKj9Ld+6vw9BjH5d9sc40R/kVyH8GLAXgT42M2NnuPcJNuA3s9ZfZBUcwIgpmZWGEE+hA==",
+      "license": "MIT"
+    },
     "node_modules/ms": {
       "version": "2.1.3",
       "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz",
@@ -5560,7 +5603,6 @@
       "version": "2.8.1",
       "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz",
       "integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==",
-      "dev": true,
       "license": "0BSD"
     },
     "node_modules/type-check": {
diff --git a/backends/advanced/webui/package.json b/backends/advanced/webui/package.json
index ca2c77a5..7c497790 100644
--- a/backends/advanced/webui/package.json
+++ b/backends/advanced/webui/package.json
@@ -13,6 +13,7 @@
     "@tanstack/react-query": "^5.90.20",
     "axios": "^1.6.2",
     "clsx": "^2.0.0",
+    "framer-motion": "^11.0.0",
     "cronstrue": "^2.50.0",
     "d3-array": "^3.2.4",
     "d3-axis": "^3.0.0",
diff --git a/backends/advanced/webui/src/components/UserLoopModal.tsx b/backends/advanced/webui/src/components/UserLoopModal.tsx
new file mode 100644
index 00000000..6e671432
--- /dev/null
+++ b/backends/advanced/webui/src/components/UserLoopModal.tsx
@@ -0,0 +1,533 @@
+import { useState, useEffect, useCallback, useMemo, useRef } from 'react'
+import { motion, AnimatePresence, PanInfo } from 'framer-motion'
+import { X, Check, Heart, HeartCrack, Pencil, Play, Pause } from 'lucide-react'
+import { api, BACKEND_URL } from '../services/api'
+import { getStorageKey } from '../utils/storage'
+
+type DiffToken = { text: string; type: 'equal' | 'added' | 'removed' }
+
+/** Simple word-level diff using LCS to highlight changes. */
+function computeWordDiff(original: string, corrected: string): { originalTokens: DiffToken[]; correctedTokens: DiffToken[] } {
+  const a = original.split(/(\s+)/)
+  const b = corrected.split(/(\s+)/)
+
+  // Build LCS table
+  const m = a.length, n = b.length
+  const dp: number[][] = Array.from({ length: m + 1 }, () => Array(n + 1).fill(0))
+  for (let i = 1; i <= m; i++) {
+    for (let j = 1; j <= n; j++) {
+      dp[i][j] = a[i - 1] === b[j - 1] ? dp[i - 1][j - 1] + 1 : Math.max(dp[i - 1][j], dp[i][j - 1])
+    }
+  }
+
+  // Backtrack to get diff
+  const originalTokens: DiffToken[] = []
+  const correctedTokens: DiffToken[] = []
+  let i = m, j = n
+  const origReverse: DiffToken[] = []
+  const corrReverse: DiffToken[] = []
+
+  while (i > 0 || j > 0) {
+    if (i > 0 && j > 0 && a[i - 1] === b[j - 1]) {
+      origReverse.push({ text: a[i - 1], type: 'equal' })
+      corrReverse.push({ text: b[j - 1], type: 'equal' })
+      i--; j--
+    } else if (j > 0 && (i === 0 || dp[i][j - 1] >= dp[i - 1][j])) {
+      corrReverse.push({ text: b[j - 1], type: 'added' })
+      j--
+    } else {
+      origReverse.push({ text: a[i - 1], type: 'removed' })
+      i--
+    }
+  }
+
+  originalTokens.push(...origReverse.reverse())
+  correctedTokens.push(...corrReverse.reverse())
+  return { originalTokens, correctedTokens }
+}
+
+const AUTO_SHOW_KEY = 'userloop-auto-show'
+
+interface Suggestion {
+  id: string
+  annotation_type: string
+  conversation_id: string
+  segment_index: number | null
+  original_text: string
+  corrected_text: string
+  created_at: string
+  conversation_title: string | null
+  transcript_snippet: string | null
+  segment_start: number | null
+  segment_end: number | null
+}
+
+/** Read auto-show preference from localStorage (default: false). */
+function getAutoShow(): boolean {
+  try {
+    return localStorage.getItem(AUTO_SHOW_KEY) === 'true'
+  } catch {
+    return false
+  }
+}
+
+export default function UserLoopModal() {
+  const [isOpen, setIsOpen] = useState(false)
+  const [suggestions, setSuggestions] = useState<Suggestion[]>([])
+  const [currentIndex, setCurrentIndex] = useState(0)
+  const [direction, setDirection] = useState(0)
+  const [isAnimating, setIsAnimating] = useState(false)
+  const [particles, setParticles] = useState<{ id: number; x: number; y: number; type: 'heart' | 'heart-break' }[]>([])
+  const [isEditing, setIsEditing] = useState(false)
+  const [editText, setEditText] = useState('')
+  const [isPlaying, setIsPlaying] = useState(false)
+  const audioRef = useRef<HTMLAudioElement | null>(null)
+
+  const stopAudio = useCallback(() => {
+    if (audioRef.current) {
+      audioRef.current.pause()
+      audioRef.current = null
+    }
+    setIsPlaying(false)
+  }, [])
+
+  const fetchSuggestions = useCallback(async (): Promise<Suggestion[]> => {
+    try {
+      const response = await api.get('/api/annotations/suggestions', { params: { limit: 20 } })
+      const data = response.data
+      if (Array.isArray(data) && data.length > 0) {
+        setSuggestions(data)
+        setCurrentIndex(0)
+        return data
+      }
+      return []
+    } catch {
+      return []
+    }
+  }, [])
+
+  // Auto-show: only poll & auto-open when the user has opted in via localStorage
+  useEffect(() => {
+    if (!getAutoShow()) return
+
+    const check = async () => {
+      const data = await fetchSuggestions()
+      if (data.length > 0) setIsOpen(true)
+    }
+    check()
+    const interval = setInterval(check, 60000)
+    return () => clearInterval(interval)
+  }, [fetchSuggestions])
+
+  // Explicit trigger from Fine-tuning page (always works regardless of auto-show)
+  useEffect(() => {
+    const handler = () => {
+      fetchSuggestions().then(data => {
+        if (data.length > 0) setIsOpen(true)
+      })
+    }
+    window.addEventListener('open-swipe-ui', handler)
+    return () => window.removeEventListener('open-swipe-ui', handler)
+  }, [fetchSuggestions])
+
+  // Stop audio on unmount
+  useEffect(() => {
+    return () => { stopAudio() }
+  }, [stopAudio])
+
+  // Stop audio when card changes
+  useEffect(() => {
+    stopAudio()
+  }, [currentIndex, stopAudio])
+
+  // Clean up particles
+  useEffect(() => {
+    const timer = setTimeout(() => setParticles([]), 1000)
+    return () => clearTimeout(timer)
+  }, [particles])
+
+  // Close modal when no suggestions left
+  useEffect(() => {
+    if (suggestions.length === 0 && isOpen) {
+      stopAudio()
+      setIsOpen(false)
+    }
+  }, [suggestions.length, isOpen, stopAudio])
+
+  // Keyboard shortcuts
+  useEffect(() => {
+    if (!isOpen || suggestions.length === 0) return
+
+    const handleKeyDown = (e: KeyboardEvent) => {
+      // Don't capture keys when editing (textarea handles its own keys)
+      if (isEditing) return
+
+      switch (e.key) {
+        case 'ArrowDown':
+          e.preventDefault()
+          handleSkip()
+          break
+        case 'ArrowUp':
+          e.preventDefault()
+          setEditText(suggestions[currentIndex]?.corrected_text || '')
+          setIsEditing(true)
+          break
+        case 'ArrowLeft':
+          e.preventDefault()
+          handleAction('reject', -1)
+          break
+        case 'ArrowRight':
+          e.preventDefault()
+          handleAction('accept', 1)
+          break
+      }
+    }
+
+    window.addEventListener('keydown', handleKeyDown)
+    return () => window.removeEventListener('keydown', handleKeyDown)
+  }, [isOpen, suggestions, currentIndex, isEditing, isAnimating])
+
+  const createParticles = (type: 'heart' | 'heart-break') => {
+    setParticles(
+      Array.from({ length: 8 }, (_, i) => ({
+        id: Date.now() + i,
+        x: Math.random() * 400 - 200,
+        y: Math.random() * 200 - 100,
+        type,
+      }))
+    )
+  }
+
+  const handleSkip = () => {
+    if (isAnimating) return
+    setIsEditing(false)
+    stopAudio()
+    if (currentIndex < suggestions.length - 1) {
+      setCurrentIndex(prev => prev + 1)
+    } else {
+      setIsOpen(false)
+      setSuggestions([])
+    }
+  }
+
+  const handleEditSave = async () => {
+    const suggestion = suggestions[currentIndex]
+    if (!suggestion) return
+    try {
+      await api.patch(`/api/annotations/${suggestion.id}`, { corrected_text: editText })
+      // Update local state so diff re-renders with new text
+      setSuggestions(prev => prev.map((s, i) => i === currentIndex ? { ...s, corrected_text: editText } : s))
+    } catch (error) {
+      console.error('Failed to save edit:', error)
+    }
+    setIsEditing(false)
+  }
+
+  const togglePlay = () => {
+    const s = suggestions[currentIndex]
+    if (!s || s.segment_start == null || s.segment_end == null) return
+
+    if (isPlaying && audioRef.current) {
+      stopAudio()
+      return
+    }
+
+    const token = localStorage.getItem(getStorageKey('token')) || ''
+    const url = `${BACKEND_URL}/api/audio/chunks/${s.conversation_id}?start_time=${s.segment_start}&end_time=${s.segment_end}&token=${token}`
+    const audio = new Audio(url)
+    audioRef.current = audio
+    audio.addEventListener('ended', () => setIsPlaying(false))
+    audio.play().then(() => setIsPlaying(true)).catch(() => setIsPlaying(false))
+  }
+
+  const handleAction = async (action: 'accept' | 'reject', swipeDirection: number) => {
+    const suggestion = suggestions[currentIndex]
+    if (!suggestion || isAnimating) return
+
+    setIsAnimating(true)
+    setDirection(swipeDirection)
+    createParticles(action === 'accept' ? 'heart' : 'heart-break')
+
+    try {
+      const status = action === 'accept' ? 'accepted' : 'rejected'
+      await api.patch(`/api/annotations/${suggestion.id}/status`, null, {
+        params: { status },
+      })
+    } catch (error) {
+      console.error(`Failed to ${action} suggestion:`, error)
+    }
+
+    setTimeout(() => {
+      if (currentIndex < suggestions.length - 1) {
+        setCurrentIndex(prev => prev + 1)
+      } else {
+        setIsOpen(false)
+        setSuggestions([])
+      }
+      setIsAnimating(false)
+      setDirection(0)
+    }, 400)
+  }
+
+  const onPanEnd = (_event: MouseEvent | TouchEvent | PointerEvent, info: PanInfo) => {
+    if (isAnimating) return
+    const threshold = 100
+    if (info.offset.x > threshold) {
+      handleAction('accept', 1)
+    } else if (info.offset.x < -threshold) {
+      handleAction('reject', -1)
+    }
+  }
+
+  const diff = useMemo(() => {
+    if (!isOpen || suggestions.length === 0) return null
+    const current = suggestions[currentIndex]
+    return computeWordDiff(current.original_text, current.corrected_text)
+  }, [isOpen, suggestions, currentIndex])
+
+  if (!isOpen || suggestions.length === 0) return null
+
+  const current = suggestions[currentIndex]
+
+  const cardVariants = {
+    enter: (dir: number) => ({ x: dir > 0 ? 1000 : -1000, opacity: 0, scale: 0.8 }),
+    center: { zIndex: 1, x: 0, opacity: 1, scale: 1 },
+    exit: (dir: number) => ({ zIndex: 0, x: dir > 0 ? 1000 : -1000, opacity: 0, scale: 0.8 }),
+  }
+
+  return (
+    <AnimatePresence mode="wait">
+      {isOpen && (
+        <motion.div
+          className="fixed inset-0 z-50 flex items-center justify-center bg-black/60 backdrop-blur-sm"
+          initial={{ opacity: 0 }}
+          animate={{ opacity: 1 }}
+          exit={{ opacity: 0 }}
+          transition={{ duration: 0.3 }}
+        >
+          <div className="relative w-full max-w-md mx-4">
+            {/* Particles */}
+            <AnimatePresence mode="popLayout">
+              {particles.map(p => (
+                <motion.div
+                  key={p.id}
+                  className="absolute top-1/2 left-1/2 pointer-events-none"
+                  initial={{ x: p.x, y: p.y, scale: 0, opacity: 1 }}
+                  animate={{ y: p.y - 200, scale: [0, 1.5, 1], opacity: [1, 1, 0] }}
+                  exit={{ opacity: 0 }}
+                  transition={{ duration: 0.8, ease: 'easeOut' }}
+                >
+                  {p.type === 'heart' ? (
+                    <Heart className="h-16 w-16 text-pink-500 fill-pink-500" />
+                  ) : (
+                    <HeartCrack className="h-16 w-16 text-red-500" />
+                  )}
+                </motion.div>
+              ))}
+            </AnimatePresence>
+
+            {/* Card */}
+            <motion.div
+              className="relative bg-white dark:bg-gray-800 rounded-3xl shadow-2xl p-6 cursor-grab active:cursor-grabbing"
+              custom={direction}
+              variants={cardVariants}
+              initial="enter"
+              animate="center"
+              exit="exit"
+              transition={{ x: { type: 'spring', stiffness: 300, damping: 30 }, opacity: { duration: 0.2 } }}
+              drag="x"
+              dragConstraints={{ left: 0, right: 0 }}
+              dragElastic={0.1}
+              onDragEnd={onPanEnd}
+              whileDrag={{ scale: 1.05 }}
+            >
+              {/* Status Overlays */}
+              <AnimatePresence mode="popLayout">
+                {direction > 0 && (
+                  <motion.div
+                    className="absolute top-6 right-8 text-5xl font-bold text-green-500 border-4 border-green-500 rounded-2xl px-4 py-2"
+                    initial={{ scale: 0, opacity: 0, rotate: -15 }}
+                    animate={{ scale: 1, opacity: 1, rotate: -15 }}
+                    exit={{ scale: 1.2, opacity: 0 }}
+                  >
+                    GOOD
+                  </motion.div>
+                )}
+                {direction < 0 && (
+                  <motion.div
+                    className="absolute top-6 left-8 text-5xl font-bold text-red-500 border-4 border-red-500 rounded-2xl px-4 py-2"
+                    initial={{ scale: 0, opacity: 0, rotate: 15 }}
+                    animate={{ scale: 1, opacity: 1, rotate: 15 }}
+                    exit={{ scale: 1.2, opacity: 0 }}
+                  >
+                    NOPE
+                  </motion.div>
+                )}
+              </AnimatePresence>
+
+              {/* Content */}
+              <motion.div
+                className="text-center"
+                initial={{ opacity: 0, y: 20 }}
+                animate={{ opacity: 1, y: 0 }}
+                transition={{ delay: 0.1, duration: 0.4 }}
+              >
+                <div className="mb-2 text-2xl font-semibold text-gray-900 dark:text-gray-100">
+                  Review Suggestion
+                </div>
+
+                {current.conversation_title && (
+                  <div className="mb-3 text-sm text-gray-500 dark:text-gray-400">
+                    {current.conversation_title}
+                  </div>
+                )}
+
+                {/* Original vs corrected with diff highlighting */}
+                <div className="mb-4 text-left space-y-3">
+                  <div>
+                    <div className="text-xs font-medium text-gray-500 dark:text-gray-400 uppercase mb-1">Original</div>
+                    <div className="bg-red-50 dark:bg-red-900/20 border border-red-200 dark:border-red-800 rounded-lg p-3 text-sm text-gray-800 dark:text-gray-200">
+                      {diff?.originalTokens.map((t, i) =>
+                        t.type === 'removed' ? (
+                          <span key={i} className="bg-red-200 dark:bg-red-700/50 text-red-800 dark:text-red-200 line-through rounded px-0.5">{t.text}</span>
+                        ) : (
+                          <span key={i}>{t.text}</span>
+                        )
+                      )}
+                    </div>
+                  </div>
+                  <div>
+                    <div className="text-xs font-medium text-gray-500 dark:text-gray-400 uppercase mb-1 flex items-center gap-1.5">
+                      {isEditing ? (
+                        <>
+                          <Pencil className="h-3 w-3 text-amber-500" />
+                          <span className="text-amber-600 dark:text-amber-400">Editing...</span>
+                        </>
+                      ) : (
+                        'Suggested'
+                      )}
+                    </div>
+                    {isEditing ? (
+                      <div>
+                        <textarea
+                          value={editText}
+                          onChange={(e) => setEditText(e.target.value)}
+                          onKeyDown={(e) => {
+                            if (e.key === 'Enter' && !e.shiftKey) {
+                              e.preventDefault()
+                              handleEditSave()
+                            } else if (e.key === 'Escape') {
+                              e.preventDefault()
+                              setIsEditing(false)
+                            }
+                          }}
+                          className="w-full bg-amber-50 dark:bg-amber-900/20 border-2 border-amber-400 dark:border-amber-600 rounded-lg p-3 text-sm text-gray-800 dark:text-gray-200 focus:outline-none focus:ring-2 focus:ring-amber-400 resize-none"
+                          rows={3}
+                          autoFocus
+                        />
+                        <div className="flex items-center justify-between mt-1.5 text-xs text-gray-400">
+                          <span>Enter to save &middot; Shift+Enter for newline &middot; Esc to cancel</span>
+                        </div>
+                      </div>
+                    ) : (
+                      <div className="bg-green-50 dark:bg-green-900/20 border border-green-200 dark:border-green-800 rounded-lg p-3 text-sm text-gray-800 dark:text-gray-200">
+                        {diff?.correctedTokens.map((t, i) =>
+                          t.type === 'added' ? (
+                            <span key={i} className="bg-green-200 dark:bg-green-700/50 text-green-800 dark:text-green-200 font-medium rounded px-0.5">{t.text}</span>
+                          ) : (
+                            <span key={i}>{t.text}</span>
+                          )
+                        )}
+                      </div>
+                    )}
+                  </div>
+                </div>
+
+                {/* Context snippet */}
+                {current.transcript_snippet && (
+                  <div className="mb-4 text-left">
+                    <div className="text-xs font-medium text-gray-500 dark:text-gray-400 uppercase mb-1 flex items-center gap-2">
+                      <span>Context</span>
+                      {current.segment_start != null && current.segment_end != null && (
+                        <button
+                          onClick={togglePlay}
+                          className="inline-flex items-center gap-1 px-1.5 py-0.5 rounded bg-blue-100 dark:bg-blue-900/30 text-blue-600 dark:text-blue-400 hover:bg-blue-200 dark:hover:bg-blue-900/50 transition-colors"
+                          title={isPlaying ? 'Pause audio' : 'Play segment audio'}
+                        >
+                          {isPlaying ? <Pause className="h-3 w-3" /> : <Play className="h-3 w-3" />}
+                          <span className="text-[10px]">{isPlaying ? 'Pause' : 'Play'}</span>
+                        </button>
+                      )}
+                    </div>
+                    <pre className="bg-gray-50 dark:bg-gray-900 border border-gray-200 dark:border-gray-700 rounded-lg p-3 text-xs text-gray-600 dark:text-gray-400 whitespace-pre-wrap font-mono">
+                      {current.transcript_snippet}
+                    </pre>
+                  </div>
+                )}
+
+                {/* Counter */}
+                <div className="text-sm text-gray-500 dark:text-gray-400 mb-3">
+                  {currentIndex + 1} / {suggestions.length}
+                </div>
+
+                {/* Instructions */}
+                <div className="text-xs text-gray-400 dark:text-gray-500 space-y-1">
+                  <div className="flex items-center justify-center gap-2">
+                    <motion.span animate={{ x: [0, -10, 0] }} transition={{ duration: 1.5, repeat: Infinity }}>
+                      &larr;
+                    </motion.span>
+                    Swipe or arrow keys
+                    <motion.span animate={{ x: [0, 10, 0] }} transition={{ duration: 1.5, repeat: Infinity }}>
+                      &rarr;
+                    </motion.span>
+                  </div>
+                  <div className="flex items-center justify-center gap-3 font-mono">
+                    <span>&larr; reject</span>
+                    <span>&rarr; accept</span>
+                    <span>&darr; skip</span>
+                    <span>&uarr; edit</span>
+                  </div>
+                </div>
+              </motion.div>
+
+              {/* Close Button */}
+              <motion.button
+                onClick={() => { stopAudio(); setIsOpen(false) }}
+                className="absolute top-4 right-4 p-2 text-gray-400 hover:text-gray-600 dark:hover:text-gray-200"
+                whileHover={{ scale: 1.1, rotate: 90 }}
+                whileTap={{ scale: 0.9 }}
+              >
+                <X className="h-6 w-6" />
+              </motion.button>
+            </motion.div>
+
+            {/* Control Buttons */}
+            <motion.div
+              className="flex justify-center items-center gap-8 mt-6"
+              initial={{ opacity: 0, y: 20 }}
+              animate={{ opacity: 1, y: 0 }}
+              transition={{ delay: 0.4, duration: 0.4 }}
+            >
+              <motion.button
+                onClick={() => handleAction('reject', -1)}
+                className="w-16 h-16 rounded-full bg-white dark:bg-gray-800 border-2 border-red-500 text-red-500 flex items-center justify-center shadow-lg"
+                whileHover={{ scale: 1.1 }}
+                whileTap={{ scale: 0.9 }}
+              >
+                <X className="h-8 w-8" />
+              </motion.button>
+              <motion.button
+                onClick={() => handleAction('accept', 1)}
+                className="w-16 h-16 rounded-full bg-white dark:bg-gray-800 border-2 border-green-500 text-green-500 flex items-center justify-center shadow-lg"
+                whileHover={{ scale: 1.1 }}
+                whileTap={{ scale: 0.9 }}
+              >
+                <Check className="h-8 w-8" />
+              </motion.button>
+            </motion.div>
+          </div>
+        </motion.div>
+      )}
+    </AnimatePresence>
+  )
+}
diff --git a/backends/advanced/webui/src/components/layout/Layout.tsx b/backends/advanced/webui/src/components/layout/Layout.tsx
index c98de99c..630fb0d2 100644
--- a/backends/advanced/webui/src/components/layout/Layout.tsx
+++ b/backends/advanced/webui/src/components/layout/Layout.tsx
@@ -3,6 +3,7 @@ import { Music, MessageSquare, MessageCircle, Brain, Users, Upload, Settings, Lo
 import { useAuth } from '../../contexts/AuthContext'
 import { useTheme } from '../../contexts/ThemeContext'
 import GlobalRecordingIndicator from './GlobalRecordingIndicator'
+import UserLoopModal from '../UserLoopModal'
 
 export default function Layout() {
   const location = useLocation()
@@ -56,7 +57,7 @@ export default function Layout() {
                   <span>{user?.name || user?.email}</span>
                 </div>
               </div>
-              
+
               <button
                 onClick={logout}
                 className="flex items-center space-x-2 px-3 py-2 rounded-lg hover:bg-gray-100 dark:hover:bg-gray-700 transition-colors text-gray-600 dark:text-gray-300"
@@ -112,6 +113,9 @@ export default function Layout() {
           </div>
         </div>
       </footer>
+
+      {/* User Loop: AI suggestion review modal (auto-opens when suggestions exist) */}
+      <UserLoopModal />
     </div>
   )
-}
\ No newline at end of file
+}
diff --git a/backends/advanced/webui/src/pages/Finetuning.tsx b/backends/advanced/webui/src/pages/Finetuning.tsx
index da1e2836..948115c8 100644
--- a/backends/advanced/webui/src/pages/Finetuning.tsx
+++ b/backends/advanced/webui/src/pages/Finetuning.tsx
@@ -1,5 +1,5 @@
-import { useState } from 'react'
-import { Zap, RefreshCw, AlertCircle, AlertTriangle, CheckCircle, Clock, Play, ToggleLeft, ToggleRight, Edit3, X, Check } from 'lucide-react'
+import { useState, useEffect } from 'react'
+import { Zap, RefreshCw, AlertCircle, AlertTriangle, CheckCircle, Clock, Play, ToggleLeft, ToggleRight, Edit3, X, Check, Eye } from 'lucide-react'
 import cronstrue from 'cronstrue'
 import { finetuningApi } from '../services/api'
 import { useFinetuningStatus, useCronJobs, useToggleCronJob, useUpdateCronSchedule, useRunCronJob, useProcessAnnotations, useDeleteOrphanedAnnotations } from '../hooks/useFinetuning'
@@ -28,6 +28,7 @@ function formatTimestamp(iso: string | null): string {
 const JOB_DISPLAY_NAMES: Record<string, string> = {
   speaker_finetuning: 'Speaker Fine-tuning',
   asr_jargon_extraction: 'ASR Jargon Extraction',
+  annotation_suggestions: 'Transcript Suggestion Detection',
 }
 
 const ANNOTATION_TYPE_DISPLAY: Record<string, { label: string; description: string }> = {
@@ -36,6 +37,7 @@ const ANNOTATION_TYPE_DISPLAY: Record<string, { label: string; description: stri
   transcript: { label: 'Transcript', description: 'Transcript text corrections' },
   memory: { label: 'Memory', description: 'Memory content corrections' },
   title: { label: 'Title', description: 'Conversation title corrections' },
+  speech_suggestion_correction: { label: 'Speech Suggestion Correction', description: 'User-refined model suggestions (ASR training signal)' },
 }
 
 function getAnnotationDisplay(key: string): { label: string; description: string } {
@@ -81,6 +83,13 @@ export default function Finetuning() {
   const [cleaningType, setCleaningType] = useState<string | null>(null)
   const [editingSchedule, setEditingSchedule] = useState<string | null>(null)
   const [scheduleInput, setScheduleInput] = useState('')
+  const [autoShowSwipe, setAutoShowSwipe] = useState(() => {
+    try { return localStorage.getItem('userloop-auto-show') === 'true' } catch { return false }
+  })
+
+  useEffect(() => {
+    try { localStorage.setItem('userloop-auto-show', String(autoShowSwipe)) } catch {}
+  }, [autoShowSwipe])
 
   const toggleJob = useToggleCronJob()
   const updateSchedule = useUpdateCronSchedule()
@@ -317,24 +326,51 @@ export default function Finetuning() {
               </div>
             )}
 
-            {/* Run Now Button */}
-            <button
-              onClick={() => handleRunNow(job.job_id)}
-              disabled={runningJobId === job.job_id || job.running}
-              className="flex items-center space-x-2 px-4 py-2 bg-blue-600 text-white text-sm rounded-lg hover:bg-blue-700 disabled:bg-gray-300 disabled:cursor-not-allowed transition-colors"
-            >
-              {runningJobId === job.job_id || job.running ? (
-                <>
-                  <RefreshCw className="h-4 w-4 animate-spin" />
-                  <span>Running...</span>
-                </>
-              ) : (
+            {/* Action Buttons */}
+            <div className="flex items-center space-x-2">
+              <button
+                onClick={() => handleRunNow(job.job_id)}
+                disabled={runningJobId === job.job_id || job.running}
+                className="flex items-center space-x-2 px-4 py-2 bg-blue-600 text-white text-sm rounded-lg hover:bg-blue-700 disabled:bg-gray-300 disabled:cursor-not-allowed transition-colors"
+              >
+                {runningJobId === job.job_id || job.running ? (
+                  <>
+                    <RefreshCw className="h-4 w-4 animate-spin" />
+                    <span>Running...</span>
+                  </>
+                ) : (
+                  <>
+                    <Play className="h-4 w-4" />
+                    <span>Run Now</span>
+                  </>
+                )}
+              </button>
+
+              {/* Review Suggestions button + auto-show toggle for annotation_suggestions job */}
+              {job.job_id === 'annotation_suggestions' && (
                 <>
-                  <Play className="h-4 w-4" />
-                  <span>Run Now</span>
+                  <button
+                    onClick={() => window.dispatchEvent(new Event('open-swipe-ui'))}
+                    className="flex items-center space-x-2 px-4 py-2 bg-purple-600 text-white text-sm rounded-lg hover:bg-purple-700 transition-colors"
+                  >
+                    <Eye className="h-4 w-4" />
+                    <span>Review</span>
+                  </button>
+                  <button
+                    onClick={() => setAutoShowSwipe(!autoShowSwipe)}
+                    className="flex items-center space-x-1.5 px-3 py-2 text-sm text-gray-600 dark:text-gray-300 hover:bg-gray-100 dark:hover:bg-gray-700 rounded-lg transition-colors"
+                    title={autoShowSwipe ? 'Auto-show enabled: modal opens automatically when suggestions exist' : 'Auto-show disabled: use Review button to open'}
+                  >
+                    {autoShowSwipe ? (
+                      <ToggleRight className="h-5 w-5 text-purple-500" />
+                    ) : (
+                      <ToggleLeft className="h-5 w-5 text-gray-400" />
+                    )}
+                    <span className="text-xs">Auto</span>
+                  </button>
                 </>
               )}
-            </button>
+            </div>
           </div>
         ))}
       </div>
diff --git a/config/defaults.yml b/config/defaults.yml
index b32ec1c1..14b94068 100644
--- a/config/defaults.yml
+++ b/config/defaults.yml
@@ -534,3 +534,7 @@ cron_jobs:
     description: "Analyze user annotations and optimize LLM prompts"
     enabled: false
     schedule: "0 4 * * 0"
+  annotation_suggestions:
+    description: "Use LLM to surface potential transcript errors for user review"
+    enabled: false
+    schedule: "0 5 * * *"
diff --git a/config_manager.py b/config_manager.py
index 1c5079a2..6d85bba7 100644
--- a/config_manager.py
+++ b/config_manager.py
@@ -40,7 +40,9 @@
 class ConfigManager:
     """Manages Chronicle configuration across config.yml and .env files."""
 
-    def __init__(self, service_path: Optional[str] = None, repo_root: Optional[Path] = None):
+    def __init__(
+        self, service_path: Optional[str] = None, repo_root: Optional[Path] = None
+    ):
         """
         Initialize ConfigManager.
 
@@ -63,8 +65,10 @@ def __init__(self, service_path: Optional[str] = None, repo_root: Optional[Path]
         self.config_yml_path = self.repo_root / "config" / "config.yml"
         self.env_path = self.service_path / ".env" if self.service_path else None
 
-        logger.debug(f"ConfigManager initialized: repo_root={self.repo_root}, "
-                    f"service_path={self.service_path}, config_yml={self.config_yml_path}")
+        logger.debug(
+            f"ConfigManager initialized: repo_root={self.repo_root}, "
+            f"service_path={self.service_path}, config_yml={self.config_yml_path}"
+        )
 
     def _find_repo_root(self) -> Path:
         """Find repository root using __file__ location (config_manager.py is always at repo root)."""
@@ -99,7 +103,7 @@ def _load_config_yml(self) -> Dict[str, Any]:
             )
 
         try:
-            with open(self.config_yml_path, 'r') as f:
+            with open(self.config_yml_path, "r") as f:
                 config = _yaml.load(f)
                 if config is None:
                     raise RuntimeError(
@@ -120,12 +124,14 @@ def _save_config_yml(self, config: Dict[str, Any]):
             # Create backup
             if self.config_yml_path.exists():
                 timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
-                backup_path = self.config_yml_path.parent / f"config.yml.backup.{timestamp}"
+                backup_path = (
+                    self.config_yml_path.parent / f"config.yml.backup.{timestamp}"
+                )
                 shutil.copy2(self.config_yml_path, backup_path)
                 logger.info(f"Backed up config.yml to {backup_path.name}")
 
             # Write updated config
-            with open(self.config_yml_path, 'w') as f:
+            with open(self.config_yml_path, "w") as f:
                 _yaml.dump(config, f)
 
             logger.info(f"Saved config.yml to {self.config_yml_path}")
@@ -146,7 +152,7 @@ def _update_env_file(self, key: str, value: str):
 
         try:
             # Read current .env
-            with open(self.env_path, 'r') as f:
+            with open(self.env_path, "r") as f:
                 lines = f.readlines()
 
             # Update or add line
@@ -162,7 +168,9 @@ def _update_env_file(self, key: str, value: str):
 
             # If key wasn't found, add it
             if not key_found:
-                updated_lines.append(f"\n# Auto-updated by ConfigManager\n{key}={value}\n")
+                updated_lines.append(
+                    f"\n# Auto-updated by ConfigManager\n{key}={value}\n"
+                )
 
             # Create backup
             backup_path = f"{self.env_path}.bak"
@@ -170,7 +178,7 @@ def _update_env_file(self, key: str, value: str):
             logger.debug(f"Backed up .env to {backup_path}")
 
             # Write updated file
-            with open(self.env_path, 'w') as f:
+            with open(self.env_path, "w") as f:
                 f.writelines(updated_lines)
 
             # Update environment variable for current process
@@ -248,7 +256,7 @@ def set_memory_provider(self, provider: str) -> Dict[str, Any]:
             "config_yml_path": str(self.config_yml_path),
             "env_path": str(self.env_path) if self.env_path else None,
             "requires_restart": True,
-            "status": "success"
+            "status": "success",
         }
 
     def get_memory_config(self) -> Dict[str, Any]:
@@ -326,6 +334,25 @@ def update_config_defaults(self, updates: Dict[str, str]):
 
         self._save_config_yml(config)
 
+    def add_or_update_model(self, model_def: Dict[str, Any]):
+        """
+        Add or update a model in the models list by name.
+
+        Args:
+            model_def: Model definition dict with at least a 'name' key.
+        """
+        config = self._load_config_yml()
+        if "models" not in config:
+            config["models"] = []
+        # Update existing or append
+        for i, m in enumerate(config["models"]):
+            if m.get("name") == model_def["name"]:
+                config["models"][i] = model_def
+                break
+        else:
+            config["models"].append(model_def)
+        self._save_config_yml(config)
+
     def get_full_config(self) -> Dict[str, Any]:
         """
         Get complete config.yml as dictionary.
diff --git a/extras/asr-services/docker-compose.yml b/extras/asr-services/docker-compose.yml
index 84539124..fea49372 100644
--- a/extras/asr-services/docker-compose.yml
+++ b/extras/asr-services/docker-compose.yml
@@ -90,6 +90,8 @@ services:
     build:
       context: .
       dockerfile: providers/vibevoice/Dockerfile
+      args:
+        PYTORCH_CUDA_VERSION: ${PYTORCH_CUDA_VERSION:-cu126}
     image: chronicle-asr-vibevoice:latest
     ports:
       - "${ASR_PORT:-8767}:8765"
@@ -119,6 +121,9 @@ services:
       # LoRA adapter: path to pre-trained adapter to auto-load on startup (optional)
       - LORA_ADAPTER_PATH=${LORA_ADAPTER_PATH:-}
       # Batching config: managed via config/defaults.yml (asr_services.vibevoice)
+    dns:
+      - 8.8.8.8
+      - 8.8.4.4
     restart: unless-stopped
 
   # ============================================================================
diff --git a/extras/asr-services/providers/vibevoice/Dockerfile b/extras/asr-services/providers/vibevoice/Dockerfile
index 93ed36af..89002c59 100644
--- a/extras/asr-services/providers/vibevoice/Dockerfile
+++ b/extras/asr-services/providers/vibevoice/Dockerfile
@@ -8,6 +8,8 @@
 #########################  builder #################################
 FROM ghcr.io/astral-sh/uv:python3.12-bookworm-slim AS builder
 
+ARG PYTORCH_CUDA_VERSION=cu126
+
 WORKDIR /app
 
 # Install system dependencies for building
@@ -21,7 +23,7 @@ ENV UV_LINK_MODE=copy
 # Dependency manifest first for cache-friendly installs
 COPY pyproject.toml uv.lock ./
 RUN --mount=type=cache,target=/root/.cache/uv \
-    uv sync --frozen --no-install-project --group vibevoice
+    uv sync --frozen --no-install-project --group vibevoice --extra ${PYTORCH_CUDA_VERSION}
 
 #########################  runtime #################################
 FROM python:3.12-slim-bookworm AS runtime
diff --git a/extras/asr-services/providers/vibevoice/transcriber.py b/extras/asr-services/providers/vibevoice/transcriber.py
index 55947d31..9691b40a 100644
--- a/extras/asr-services/providers/vibevoice/transcriber.py
+++ b/extras/asr-services/providers/vibevoice/transcriber.py
@@ -37,13 +37,13 @@
 
 import torch
 from common.audio_utils import STANDARD_SAMPLE_RATE, load_audio_file
-from omegaconf import OmegaConf
 from common.batching import (
     extract_context_tail,
     split_audio_file,
     stitch_transcription_results,
 )
 from common.response_models import Segment, Speaker, TranscriptionResult
+from omegaconf import OmegaConf
 
 logger = logging.getLogger(__name__)
 
@@ -66,7 +66,9 @@ def load_vibevoice_config() -> dict:
     user_config = OmegaConf.load(config_path) if config_path.exists() else {}
     merged = OmegaConf.merge(defaults, user_config)
 
-    asr_config = merged.get("asr_services", {}).get("vibevoice", {})
+    asr_config = OmegaConf.select(
+        merged, "asr_services.vibevoice", default=OmegaConf.create({})
+    )
     resolved = OmegaConf.to_container(asr_config, resolve=True)
     logger.info(f"Loaded vibevoice config: {resolved}")
     return resolved
@@ -103,7 +105,9 @@ def __init__(self, model_id: Optional[str] = None):
         self.model_id = model_id or os.getenv("ASR_MODEL", "microsoft/VibeVoice-ASR")
         self.llm_model = os.getenv("VIBEVOICE_LLM_MODEL", "Qwen/Qwen2.5-7B")
         self.attn_impl = os.getenv("VIBEVOICE_ATTN_IMPL", "sdpa")
-        self.device = os.getenv("DEVICE", "cuda" if torch.cuda.is_available() else "cpu")
+        self.device = os.getenv(
+            "DEVICE", "cuda" if torch.cuda.is_available() else "cpu"
+        )
         self.max_new_tokens = int(os.getenv("MAX_NEW_TOKENS", "8192"))
 
         # Quantization config: "4bit", "8bit", or "" (none)
@@ -121,19 +125,21 @@ def __init__(self, model_id: Optional[str] = None):
         # Batching config: config.yml > env vars > hardcoded defaults
         config = load_vibevoice_config()
         self.batch_threshold = float(
-            os.getenv("BATCH_THRESHOLD_SECONDS") or config.get("batch_threshold_seconds", 300)
+            os.getenv("BATCH_THRESHOLD_SECONDS")
+            or config.get("batch_threshold_seconds", 300)
         )
         self.batch_duration = float(
-            os.getenv("BATCH_DURATION_SECONDS") or config.get("batch_duration_seconds", 240)
+            os.getenv("BATCH_DURATION_SECONDS")
+            or config.get("batch_duration_seconds", 240)
         )
         self.batch_overlap = float(
-            os.getenv("BATCH_OVERLAP_SECONDS") or config.get("batch_overlap_seconds", 30)
+            os.getenv("BATCH_OVERLAP_SECONDS")
+            or config.get("batch_overlap_seconds", 30)
         )
 
         # LoRA adapter path (auto-loaded after base model if set)
         self.lora_adapter_path = os.getenv("LORA_ADAPTER_PATH") or None
 
-
         # Model components (initialized in load_model)
         self.model = None
         self.processor = None
@@ -204,7 +210,9 @@ def _build_quantization_config(self):
             logger.info("Using 8-bit quantization (bitsandbytes)")
             return BitsAndBytesConfig(load_in_8bit=True)
         else:
-            logger.warning(f"Unknown quantization '{self.quantization}', loading without quantization")
+            logger.warning(
+                f"Unknown quantization '{self.quantization}', loading without quantization"
+            )
             return None
 
     def load_model(self) -> None:
@@ -304,7 +312,6 @@ def load_lora_adapter(self, adapter_path: str) -> None:
         self._has_lora = True
         logger.info("LoRA adapter loaded successfully")
 
-
     def transcribe(
         self,
         audio_file_path: str,
@@ -330,7 +337,9 @@ def transcribe(
 
         # Check duration to decide whether to batch
 
-        audio_array, sr = load_audio_file(audio_file_path, target_rate=STANDARD_SAMPLE_RATE)
+        audio_array, sr = load_audio_file(
+            audio_file_path, target_rate=STANDARD_SAMPLE_RATE
+        )
         duration = len(audio_array) / sr
 
         if duration > self.batch_threshold:
@@ -346,7 +355,10 @@ def transcribe(
             return self._transcribe_single(audio_file_path, context_info=context_info)
 
     def _transcribe_single(
-        self, audio_file_path: str, context: Optional[str] = None, context_info: Optional[str] = None
+        self,
+        audio_file_path: str,
+        context: Optional[str] = None,
+        context_info: Optional[str] = None,
     ) -> TranscriptionResult:
         """
         Transcribe a single audio file (or batch window).
@@ -363,7 +375,9 @@ def _transcribe_single(
         """
         logger.info(f"Transcribing: {audio_file_path}")
         if context:
-            logger.info(f"With batch context ({len(context)} chars): ...{context[-80:]}")
+            logger.info(
+                f"With batch context ({len(context)} chars): ...{context[-80:]}"
+            )
         if context_info:
             logger.info(f"With hot words context: {context_info[:120]}")
 
@@ -417,7 +431,9 @@ def _transcribe_single(
         generated_ids = output_ids[0, input_length:]
 
         # Remove eos tokens
-        eos_positions = (generated_ids == self.processor.tokenizer.eos_token_id).nonzero(as_tuple=True)[0]
+        eos_positions = (
+            generated_ids == self.processor.tokenizer.eos_token_id
+        ).nonzero(as_tuple=True)[0]
         if len(eos_positions) > 0:
             generated_ids = generated_ids[: eos_positions[0] + 1]
 
@@ -483,7 +499,111 @@ def _transcribe_batched(
             finally:
                 os.unlink(temp_path)
 
-        return stitch_transcription_results(batch_results, overlap_seconds=self.batch_overlap)
+        return stitch_transcription_results(
+            batch_results, overlap_seconds=self.batch_overlap
+        )
+
+    def _transcribe_batched_with_progress(
+        self,
+        audio_file_path: str,
+        hotwords: Optional[str] = None,
+    ):
+        """
+        Transcribe a long audio file with progress reporting.
+
+        Same logic as _transcribe_batched() but yields progress counters
+        between windows so callers can report how far along the batch is.
+
+        Yields:
+            {"type": "progress", "current": i, "total": n} after each window
+            {"type": "result", ...} as the final item (TranscriptionResult.to_dict())
+        """
+        windows = split_audio_file(
+            audio_file_path,
+            batch_duration=self.batch_duration,
+            overlap=self.batch_overlap,
+        )
+
+        batch_results = []
+
+        for i, (temp_path, start_time, end_time) in enumerate(windows):
+            try:
+                logger.info(
+                    f"Batch {i+1}/{len(windows)}: [{start_time:.0f}s - {end_time:.0f}s]"
+                )
+
+                # No inter-window context — see note in _transcribe_batched()
+                result = self._transcribe_single(temp_path, context_info=hotwords)
+                batch_results.append((result, start_time, end_time))
+                logger.info(
+                    f"Batch {i+1} done: {len(result.segments)} segments, "
+                    f"{len(result.text)} chars"
+                )
+
+            finally:
+                os.unlink(temp_path)
+
+            yield {"type": "progress", "current": i + 1, "total": len(windows)}
+
+        final = stitch_transcription_results(
+            batch_results, overlap_seconds=self.batch_overlap
+        )
+        yield {"type": "result", **final.to_dict()}
+
+    def supports_batch_progress(self, audio_duration: float) -> bool:
+        """Return True if this audio is long enough to use batched transcription with progress."""
+        return audio_duration > self.batch_threshold
+
+    def _transcribe_batched_with_progress(
+        self,
+        audio_file_path: str,
+        hotwords: Optional[str] = None,
+    ):
+        """
+        Transcribe a long audio file with progress reporting.
+
+        Same logic as _transcribe_batched() but yields progress counters
+        between windows so callers can report how far along the batch is.
+
+        Yields:
+            {"type": "progress", "current": i, "total": n} after each window
+            {"type": "result", ...} as the final item (TranscriptionResult.to_dict())
+        """
+        windows = split_audio_file(
+            audio_file_path,
+            batch_duration=self.batch_duration,
+            overlap=self.batch_overlap,
+        )
+
+        batch_results = []
+
+        for i, (temp_path, start_time, end_time) in enumerate(windows):
+            try:
+                logger.info(
+                    f"Batch {i+1}/{len(windows)}: [{start_time:.0f}s - {end_time:.0f}s]"
+                )
+
+                # No inter-window context — see note in _transcribe_batched()
+                result = self._transcribe_single(temp_path, context_info=hotwords)
+                batch_results.append((result, start_time, end_time))
+                logger.info(
+                    f"Batch {i+1} done: {len(result.segments)} segments, "
+                    f"{len(result.text)} chars"
+                )
+
+            finally:
+                os.unlink(temp_path)
+
+            yield {"type": "progress", "current": i + 1, "total": len(windows)}
+
+        final = stitch_transcription_results(
+            batch_results, overlap_seconds=self.batch_overlap
+        )
+        yield {"type": "result", **final.to_dict()}
+
+    def supports_batch_progress(self, audio_duration: float) -> bool:
+        """Return True if this audio is long enough to use batched transcription with progress."""
+        return audio_duration > self.batch_threshold
 
     def _transcribe_batched_with_progress(
         self,
@@ -527,7 +647,9 @@ def _transcribe_batched_with_progress(
 
             yield {"type": "progress", "current": i + 1, "total": len(windows)}
 
-        final = stitch_transcription_results(batch_results, overlap_seconds=self.batch_overlap)
+        final = stitch_transcription_results(
+            batch_results, overlap_seconds=self.batch_overlap
+        )
         yield {"type": "result", **final.to_dict()}
 
     def supports_batch_progress(self, audio_duration: float) -> bool:
@@ -555,13 +677,17 @@ def _parse_vibevoice_output(self, raw_output: str) -> dict:
         # Extract JSON array from assistant response
         # Strategy: Find the outermost [ ] that contains valid JSON
         # Look for array starting with [{ which indicates segment objects
-        json_match = re.search(r'\[\s*\{.*\}\s*\]', raw_output, re.DOTALL)
+        json_match = re.search(r"\[\s*\{.*\}\s*\]", raw_output, re.DOTALL)
 
         if not json_match:
-            logger.warning("Could not find JSON array in output, returning raw text only")
-            logger.warning(f"Output does not match pattern [{{...}}], checking for other formats...")
+            logger.warning(
+                "Could not find JSON array in output, returning raw text only"
+            )
+            logger.warning(
+                f"Output does not match pattern [{{...}}], checking for other formats..."
+            )
             # Try alternate pattern: just find any array
-            json_match = re.search(r'\[.*\]', raw_output, re.DOTALL)
+            json_match = re.search(r"\[.*\]", raw_output, re.DOTALL)
 
         if not json_match:
             logger.warning("No JSON array found in output")
@@ -574,12 +700,14 @@ def _parse_vibevoice_output(self, raw_output: str) -> dict:
             # Convert to our expected format
             segments = []
             for seg in segments_raw:
-                segments.append({
-                    "text": seg.get("Content", ""),
-                    "start": float(seg.get("Start", 0.0)),
-                    "end": float(seg.get("End", 0.0)),
-                    "speaker": seg.get("Speaker", 0),
-                })
+                segments.append(
+                    {
+                        "text": seg.get("Content", ""),
+                        "start": float(seg.get("Start", 0.0)),
+                        "end": float(seg.get("End", 0.0)),
+                        "speaker": seg.get("Speaker", 0),
+                    }
+                )
 
             return {"raw_text": raw_output, "segments": segments}
 
@@ -644,7 +772,11 @@ def _map_to_result(self, processed: dict, raw_output: str) -> TranscriptionResul
         ]
 
         # Use raw text if no segments parsed
-        full_text = " ".join(text_parts) if text_parts else processed.get("raw_text", raw_output)
+        full_text = (
+            " ".join(text_parts)
+            if text_parts
+            else processed.get("raw_text", raw_output)
+        )
 
         # Calculate total duration
         duration = None
diff --git a/extras/asr-services/tests/test_cuda_version_config.py b/extras/asr-services/tests/test_cuda_version_config.py
new file mode 100644
index 00000000..370b00b2
--- /dev/null
+++ b/extras/asr-services/tests/test_cuda_version_config.py
@@ -0,0 +1,232 @@
+"""
+Unit tests for CUDA version configuration in ASR service Dockerfiles.
+
+Tests the configurable PYTORCH_CUDA_VERSION build arg that allows selecting
+different CUDA versions (cu121, cu126, cu128) for different GPU architectures.
+"""
+
+import os
+import re
+from pathlib import Path
+
+import pytest
+
+
+class TestDockerfileCUDASupport:
+    """Test that Dockerfiles support configurable CUDA versions."""
+
+    @pytest.fixture
+    def vibevoice_dockerfile_path(self):
+        """Path to VibeVoice Dockerfile."""
+        return Path(__file__).parent.parent / "providers" / "vibevoice" / "Dockerfile"
+
+    @pytest.fixture
+    def nemo_dockerfile_path(self):
+        """Path to NeMo Dockerfile."""
+        return Path(__file__).parent.parent / "providers" / "nemo" / "Dockerfile"
+
+    @pytest.fixture
+    def docker_compose_path(self):
+        """Path to docker-compose.yml."""
+        return Path(__file__).parent.parent / "docker-compose.yml"
+
+    def test_vibevoice_dockerfile_has_cuda_arg(self, vibevoice_dockerfile_path):
+        """Test that VibeVoice Dockerfile declares PYTORCH_CUDA_VERSION arg."""
+        content = vibevoice_dockerfile_path.read_text()
+
+        # Should have ARG declaration
+        assert re.search(
+            r"ARG\s+PYTORCH_CUDA_VERSION", content
+        ), "Dockerfile must declare PYTORCH_CUDA_VERSION build arg"
+
+        # Should have default value
+        arg_match = re.search(r"ARG\s+PYTORCH_CUDA_VERSION=(\w+)", content)
+        assert arg_match, "PYTORCH_CUDA_VERSION should have default value"
+        default_version = arg_match.group(1)
+        assert default_version in [
+            "cu121",
+            "cu126",
+            "cu128",
+        ], f"Default CUDA version {default_version} should be cu121, cu126, or cu128"
+
+    def test_vibevoice_dockerfile_uses_cuda_arg_in_uv_sync(
+        self, vibevoice_dockerfile_path
+    ):
+        """Test that VibeVoice Dockerfile uses CUDA arg in uv sync command."""
+        content = vibevoice_dockerfile_path.read_text()
+
+        # Should use --extra ${PYTORCH_CUDA_VERSION}
+        assert re.search(
+            r"uv\s+sync.*--extra\s+\$\{PYTORCH_CUDA_VERSION\}", content
+        ), "uv sync command must include --extra ${PYTORCH_CUDA_VERSION}"
+
+    def test_nemo_dockerfile_has_cuda_support(self, nemo_dockerfile_path):
+        """Test that NeMo Dockerfile (reference implementation) has CUDA support."""
+        content = nemo_dockerfile_path.read_text()
+
+        assert re.search(
+            r"ARG\s+PYTORCH_CUDA_VERSION", content
+        ), "NeMo Dockerfile should have PYTORCH_CUDA_VERSION arg"
+
+        assert re.search(
+            r"uv\s+sync.*--extra\s+\$\{PYTORCH_CUDA_VERSION\}", content
+        ), "NeMo Dockerfile should use CUDA version in uv sync"
+
+    def test_docker_compose_passes_cuda_arg_to_vibevoice(self, docker_compose_path):
+        """Test that docker-compose.yml passes PYTORCH_CUDA_VERSION to vibevoice service."""
+        content = docker_compose_path.read_text()
+
+        # Find vibevoice-asr service section
+        vibevoice_section = re.search(
+            r"vibevoice-asr:.*?(?=^\S|\Z)", content, re.MULTILINE | re.DOTALL
+        )
+        assert vibevoice_section, "docker-compose.yml must have vibevoice-asr service"
+
+        section_text = vibevoice_section.group(0)
+
+        # Should have build args section
+        assert re.search(
+            r"args:", section_text
+        ), "vibevoice-asr service should have build args section"
+
+        # Should pass PYTORCH_CUDA_VERSION
+        assert re.search(
+            r"PYTORCH_CUDA_VERSION:\s*\$\{PYTORCH_CUDA_VERSION:-cu126\}", section_text
+        ), "vibevoice-asr should pass PYTORCH_CUDA_VERSION build arg with cu126 default"
+
+    def test_docker_compose_cuda_arg_consistency(self, docker_compose_path):
+        """Test that all GPU-enabled services use consistent CUDA version pattern."""
+        content = docker_compose_path.read_text()
+
+        # Services that should have CUDA support
+        gpu_services = ["vibevoice-asr", "nemo-asr", "parakeet-asr"]
+
+        for service_name in gpu_services:
+            service_match = re.search(
+                rf"{service_name}:.*?(?=^\S|\Z)", content, re.MULTILINE | re.DOTALL
+            )
+
+            if service_match:
+                service_text = service_match.group(0)
+
+                # Check if service has GPU resources
+                if "devices:" in service_text and "nvidia" in service_text:
+                    # Should have PYTORCH_CUDA_VERSION arg
+                    assert re.search(
+                        r"PYTORCH_CUDA_VERSION:\s*\$\{PYTORCH_CUDA_VERSION:-cu\d+\}",
+                        service_text,
+                    ), f"{service_name} with GPU should have PYTORCH_CUDA_VERSION build arg"
+
+
+class TestCUDAVersionEnvironmentVariable:
+    """Test CUDA version environment variable handling."""
+
+    def test_cuda_version_env_var_format(self):
+        """Test that CUDA version environment variables follow correct format."""
+        valid_versions = ["cu121", "cu126", "cu128"]
+
+        for version in valid_versions:
+            assert re.match(
+                r"^cu\d{3}$", version
+            ), f"{version} should match pattern cu### (e.g., cu121, cu126)"
+
+    def test_cuda_version_from_env(self):
+        """Test reading CUDA version from environment."""
+        test_version = "cu128"
+
+        with pytest.MonkeyPatch.context() as mp:
+            mp.setenv("PYTORCH_CUDA_VERSION", test_version)
+            cuda_version = os.getenv("PYTORCH_CUDA_VERSION")
+
+            assert cuda_version == test_version
+            assert cuda_version in ["cu121", "cu126", "cu128"]
+
+    def test_cuda_version_default_fallback(self):
+        """Test that default CUDA version is used when env var not set."""
+        with pytest.MonkeyPatch.context() as mp:
+            mp.delenv("PYTORCH_CUDA_VERSION", raising=False)
+
+            # Simulate docker-compose default: ${PYTORCH_CUDA_VERSION:-cu126}
+            cuda_version = os.getenv("PYTORCH_CUDA_VERSION", "cu126")
+
+            assert cuda_version == "cu126"
+
+
+class TestGPUArchitectureCUDAMapping:
+    """Test that GPU architectures map to correct CUDA versions."""
+
+    def test_rtx_5090_requires_cu128(self):
+        """
+        Test that RTX 5090 (sm_120) requires CUDA 12.8+.
+
+        RTX 5090 has CUDA capability 12.0 (sm_120) which requires
+        PyTorch built with CUDA 12.8 or higher.
+        """
+        gpu_arch = "sm_120"  # RTX 5090
+        required_cuda = "cu128"
+
+        # Map GPU architecture to minimum CUDA version
+        arch_to_cuda = {
+            "sm_120": "cu128",  # RTX 5090, RTX 50 series
+            "sm_90": "cu126",  # RTX 4090, H100
+            "sm_89": "cu121",  # RTX 4090
+            "sm_86": "cu121",  # RTX 3090, A6000
+        }
+
+        assert (
+            arch_to_cuda.get(gpu_arch) == required_cuda
+        ), f"GPU architecture {gpu_arch} requires CUDA version {required_cuda}"
+
+    def test_older_gpus_work_with_cu121(self):
+        """Test that older GPUs (sm_86, sm_80) work with cu121."""
+        older_archs = ["sm_86", "sm_80", "sm_75"]  # RTX 3090, A100, RTX 2080
+
+        for arch in older_archs:
+            # cu121 supports these architectures
+            assert arch in [
+                "sm_75",
+                "sm_80",
+                "sm_86",
+            ], f"{arch} should be supported by CUDA 12.1"
+
+
+class TestPyProjectCUDAExtras:
+    """Test that pyproject.toml defines CUDA version extras correctly."""
+
+    @pytest.fixture
+    def pyproject_path(self):
+        """Path to pyproject.toml."""
+        return Path(__file__).parent.parent / "pyproject.toml"
+
+    def test_pyproject_has_cuda_extras(self, pyproject_path):
+        """Test that pyproject.toml defines cu121, cu126, cu128 extras."""
+        if not pyproject_path.exists():
+            pytest.skip("pyproject.toml not found")
+
+        content = pyproject_path.read_text()
+
+        # Should have [project.optional-dependencies] or [tool.uv] with extras
+        cuda_versions = ["cu121", "cu126", "cu128"]
+
+        for version in cuda_versions:
+            # Look for the CUDA version as an extra
+            assert re.search(
+                rf'["\']?{version}["\']?\s*=', content
+            ), f"pyproject.toml should define {version} extra"
+
+    def test_pyproject_cuda_extras_have_pytorch(self, pyproject_path):
+        """Test that CUDA extras include torch/torchaudio dependencies."""
+        if not pyproject_path.exists():
+            pytest.skip("pyproject.toml not found")
+
+        content = pyproject_path.read_text()
+
+        # Each CUDA extra should reference torch with the appropriate index
+        # e.g., { extra = "cu128" } or { index = "pytorch-cu128" }
+        assert re.search(r'extra\s*=\s*["\']cu\d{3}["\']', content) or re.search(
+            r'index\s*=\s*["\']pytorch-cu\d{3}["\']', content
+        ), "CUDA extras should reference PyTorch with CUDA version"
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
diff --git a/extras/ml-training/README.md b/extras/ml-training/README.md
new file mode 100644
index 00000000..abeb85e1
--- /dev/null
+++ b/extras/ml-training/README.md
@@ -0,0 +1,39 @@
+# ML Training Scripts
+
+Standalone CLI tools for exporting training data from Chronicle and fine-tuning models. These are **not** part of the backend runtime -- they're run manually on a workstation with GPU access.
+
+## Contents
+
+### `event-detection/`
+
+Export accepted/rejected annotations from MongoDB and train an event detection classifier.
+
+- `export_from_mongo.py` - Export annotation data to training-ready format
+- `manage_data.py` - Dataset utilities (split, stats, cleanup)
+- `train.py` - Train a classifier from exported data
+
+See `event-detection/README.md` for full usage.
+
+### `whisper-adapter-finetuning/`
+
+Fine-tune a Whisper LoRA adapter for domain-specific ASR improvements (e.g., detecting non-speech events like sneezes, laughter).
+
+- `prepare_sneeze_data.py` - Prepare training data
+- `train_sneeze.py` - LoRA adapter training script
+- `evaluate_sneeze_model.py` - Evaluate trained adapter
+
+See `whisper-adapter-finetuning/README.md` for full usage.
+
+## Prerequisites
+
+```bash
+pip install -r event-detection/requirements.txt
+# For whisper adapter: see whisper-adapter-finetuning/README.md
+```
+
+## Relationship to Backend
+
+These tools consume annotations created by the backend's annotation system:
+- `surface_error_suggestions()` creates `MODEL_SUGGESTION` annotations
+- Users accept/reject via the swipe UI
+- These scripts export that feedback for model training
diff --git a/extras/ml-training/event-detection/README.md b/extras/ml-training/event-detection/README.md
new file mode 100644
index 00000000..d0499e6d
--- /dev/null
+++ b/extras/ml-training/event-detection/README.md
@@ -0,0 +1,476 @@
+# Event Detection - Whisper LoRA Adapter
+
+**🟢 STATUS: TRAINING/EXPORT WORKFLOW (User-Loop → Export → Training)**
+
+This folder contains **training/export utilities** for Whisper + LoRA event detection. It integrates with the **Chronicle user-loop** for continuous data collection and training.
+
+---
+
+## 📋 Overview
+
+This system uses a **LoRA (Low-Rank Adaptation)** adapter on top of Whisper's Large V3 model to detect specific custom events (sounds, keywords, phrases) in audio.
+
+### Workflow:
+
+```
+Backend Anomaly Scan Job (sets maybe_anomaly: true)
+        │
+        ▼
+User-Loop Popup (Review Anomalies)
+        │
+        ├──► Swipe Right → Accept/Verify
+        │       │
+        │       ▼
+        │  MongoDB: maybe_anomaly = "verified"
+        │
+        └──► Swipe Left → Reject/Stash
+                │
+                ▼
+         MongoDB: training_stash collection
+                │
+                ▼
+        Export: user_loop_feedback.jsonl
+                │
+                ▼
+        Train: LoRA adapter
+```
+
+---
+
+## 📁 Files
+
+| File | Purpose | Status |
+|-------|----------|--------|
+| `export_from_mongo.py` | Export MongoDB `training_stash` to JSONL for training | ✅ Active (Bridge) |
+| `train.py` | Fine-tune Whisper with LoRA adapter | ✅ Active |
+| `requirements.txt` | Python dependencies | ✅ Active |
+
+Anomaly flagging (setting `maybe_anomaly: true` in MongoDB) is handled by the backend script `backends/advanced/src/advanced_omi_backend/scripts/run_anomaly_detection.py`.
+
+---
+
+## 🚀 Production Workflow
+
+### Step 1: Data Collection (User-Loop)
+
+**Users interact with user-loop popup:**
+
+1. **Frontend shows popup** when conversations have `maybe_anomaly: true`
+2. **User reviews transcript** and audio
+3. **Swipe Left** → Reject (stashes for training)
+4. **Swipe Right** → Accept (marks as verified, `maybe_anomaly: "verified"`)
+
+**MongoDB Collections:**
+
+```javascript
+// conversations - User-Loop reviews these
+{
+  "conversation_id": "1a43e276-...",
+  "transcript_versions": [{
+    "version_id": "c9c392d9-...",
+    "maybe_anomaly": true,  // Triggers popup
+    "transcript": "The stale smell of old beer..."
+  }]
+}
+
+// training_stash - User-Loop saves rejected items here
+{
+  "_id": ObjectId("..."),
+  "version_id": "c9c392d9-...",
+  "conversation_id": "1a43e276-...",
+  "transcript": "The stale smell of old beer...",
+  "reason": "False positive",
+  "timestamp": 1738254720.123,
+  "audio_chunks": [...],
+  "metadata": {"word_count": 43}
+}
+```
+
+---
+
+### Step 2: Export Training Data (Bridge)
+
+**Export MongoDB `training_stash` collection to JSONL format:**
+
+```bash
+uv run python export_from_mongo.py \
+  --output user_loop_feedback.jsonl \
+  --min_samples 10
+```
+
+**Output (`user_loop_feedback.jsonl`):**
+```json
+{"audio": "/data/audio/1a43e276-....wav", "text": "The stale smell of old beer...", "type": "positive", "timestamp": "2024-01-30T10:00:00Z"}
+{"audio": "/data/audio/another-id.wav", "text": "Transcription with <event>", "type": "positive", "timestamp": "2024-01-30T10:05:00Z"}
+```
+
+**Arguments:**
+- `--output`: Output JSONL file path (default: `user_loop_feedback.jsonl`)
+- `--mongo_uri`: MongoDB connection (default: `mongodb://localhost:27017`)
+- `--db_name`: Database name (default: `chronicle`)
+- `--min_samples`: Minimum samples to export (default: 0)
+
+**Schema Mapping:**
+```python
+# MongoDB → Training JSONL
+{
+    "audio": f"/data/audio/{entry['conversation_id']}.wav",
+    "text": entry["transcript"],
+    "timestamp": entry.get("timestamp"),
+    "type": "positive"  # All user-loop rejections = positive for training
+}
+```
+
+---
+
+### Step 3: Train LoRA Adapter
+
+**Fine-tune Whisper with exported user-loop data:**
+
+```bash
+uv run python train.py \
+  --train_manifest user_loop_feedback.jsonl \
+  --output_dir ./sneeze_adapter \
+  --base_model unsloth/whisper-large-v3 \
+  --source_tag "<event>" \
+  --target_token "EVENT_DETECTED"
+```
+
+**Training Parameters:**
+- `--train_manifest`: JSONL file from export (default: `train.jsonl`)
+- `--output_dir`: Directory to save adapter (default: `event_lora_adapter_unsloth`)
+- `--base_model`: Whisper model ID (default: `unsloth/whisper-large-v3`)
+- `--source_tag`: Tag in text to replace (default: `<event>`)
+- `--target_token`: Token to emit for event (default: `EVENT_DETECTED`)
+
+**Output:**
+```bash
+./sneeze_adapter/
+  ├── adapter_config.json
+  ├── adapter_model.safetensors
+  └── README.md
+```
+
+---
+
+### Step 4: Flag New Anomalies (Backend Job)
+
+The backend provides a MongoDB scan job that sets `transcript_versions.$.maybe_anomaly = True` for transcripts that haven't been reviewed yet.
+
+From `backends/advanced/`:
+
+```bash
+uv run python src/advanced_omi_backend/scripts/run_anomaly_detection.py
+```
+
+Notes:
+- Configure MongoDB via `MONGODB_URI` (defaults to `mongodb://localhost:27017`).
+- This script is currently a placeholder implementation (it marks unflagged transcripts as anomalies).
+
+---
+
+## 🔄 Full Cycle Diagram
+
+```
+┌─────────────────────────────────────────────────────────────────┐
+│                    USER INTERACTION PHASE                 │
+│  Frontend shows popup when maybe_anomaly: true          │
+└──────────────────┬──────────────────────────────────────────┘
+                   │
+        ┌──────────┴──────────┐
+        │ Swipe Actions         │
+        │ Left=Reject Right=Accept
+   ┌────┴────┐         ┌────┴────┐
+   │ Swipe   │         │ Swipe   │
+   │ Left    │         │ Right   │
+   ▼         ▼         ▼         ▼
+ Reject    Reject   Accept   Accept
+ (stash)   (stash)  (verify) (verify)
+   │         │         │         │
+   ▼         ▼         │         │
+ MongoDB:   MongoDB:   │         │
+ training_  training_ ▼         ▼
+ stash      stash   maybe_   maybe_
+                      anomaly  anomaly
+                      ="verified"
+   │         │
+   ▼         ▼
+ More Data in
+ training_stash
+                   │
+                   ▼
+         ┌────────────────────┐
+         │ EXPORT PHASE      │
+         │ export_from_mongo │
+         │   .py            │
+         └────────┬─────────┘
+                  │
+                  ▼
+         ┌────────────────────┐
+         │ TRAINING PHASE    │
+         │   train.py        │
+         └────────┬─────────┘
+                  │
+                  ▼
+         ┌────────────────────┐
+         │  ./sneeze_      │
+         │    adapter/        │
+         └────────┬─────────┘
+                  │
+                  ▼
+          ┌──────────────────────────────┐
+          │ BACKEND ANOMALY SCAN (JOB)   │
+          │ run_anomaly_detection.py     │
+          │ sets maybe_anomaly: true     │
+          └─────────────┬────────────────┘
+                        │
+                        ▼
+          ┌────────────────────┐
+          │   User Popup      │
+          │   (Round 2)       │
+          └────────────────────┘
+                   │
+                   └──► Back to start!
+```
+
+---
+
+## 📊 Training Data Format
+
+The training JSONL file (`user_loop_feedback.jsonl`) uses this schema:
+
+```json
+{
+  "audio": "/path/to/audio.wav",
+  "text": "Transcription with <event> tag or just normal speech",
+  "timestamp": "2024-01-30T10:00:00Z",
+  "type": "positive"
+}
+```
+
+**Fields:**
+- `audio`: Path to audio file for training
+- `text`: Ground truth transcription (from user-loop transcript)
+- `timestamp`: When sample was added (from MongoDB)
+- `type`: Always `"positive"` for user-loop data (rejections = positive training samples)
+
+---
+
+## 🧠 Training Details
+
+### LoRA Configuration
+
+- **Base Model**: Whisper Large V3 (unsloth)
+- **Adapter Type**: LoRA (Low-Rank Adaptation)
+- **Parameters**:
+  - `r`: Rank (8-32 recommended)
+  - `lora_alpha`: Scaling factor (16-64)
+  - `target_modules`: `["q_proj", "v_proj"]`
+  - `dtype`: `float16` for CUDA, `float32` for CPU
+
+### Training Process
+
+1. Load base model (Whisper Large V3)
+2. Load training data (audio + transcriptions)
+3. Fine-tune adapter layers only
+4. Validate on test set
+5. Save adapter weights to output directory
+
+---
+
+## 🎯 Production Deployment
+
+### Automated Workflow
+
+**Setup Cron Jobs for continuous improvement:**
+
+```bash
+# crontab -e
+
+# Export training data daily at 2 AM
+0 2 * * * cd /path/to/backends/advanced/event-detection && uv run python export_from_mongo.py --min_samples 50
+
+# Retrain adapter weekly on Sunday at 3 AM
+0 3 * * 0 cd /path/to/backends/advanced/event-detection && uv run python train.py --train_manifest user_loop_feedback.jsonl
+```
+
+### Adapter Versioning
+
+Store versioned adapters for A/B testing and rollback:
+
+```bash
+./adapters/
+  ├── sneeze_v1/    # Initial training
+  ├── sneeze_v2/    # After 100 samples
+  ├── sneeze_v3/    # After 500 samples
+  └── sneeze_latest/  # Symlink to current
+```
+
+### Monitoring
+
+Track metrics to improve detection:
+
+- **False Positive Rate**: Swipe left (reject) / Total popups
+- **True Positive Rate**: Swipe right (accept) / Total swipes right
+- **Detection Accuracy**: Correct detections / Total samples
+
+---
+
+## 🐛 Troubleshooting
+
+### Issue: "No entries found in training_stash"
+
+**Symptoms:**
+```
+❌ No entries found in training_stash collection
+💡 Tip: Swipe left on user-loop popup to add samples (reject = stash)
+```
+
+**Solutions:**
+1. Verify user-loop popup is working
+2. Swipe left on some anomalies to add to training_stash
+3. Check MongoDB connection
+4. Lower `--min_samples` threshold
+
+---
+
+### Issue: "Adapter output directory missing"
+
+**Symptoms:**
+```
+Expected adapter directory not found: ./sneeze_adapter
+```
+
+**Solutions:**
+1. Verify `--output_dir` matches where you expect the adapter to be saved
+2. Check if train.py completed successfully
+3. Ensure the output directory exists and is writable
+
+---
+
+### Issue: "No audio files found" (in export)
+
+**Symptoms:**
+```
+Exported 0 entries to user_loop_feedback.jsonl
+   Has audio chunks: 0/10
+```
+
+**Solutions:**
+1. Verify conversations have audio in MongoDB
+2. Check audio_chunks collection
+3. Ensure audio was uploaded correctly
+
+---
+
+### Issue: CUDA out of memory
+
+**Symptoms:**
+```
+RuntimeError: CUDA out of memory
+```
+
+**Solutions:**
+1. Reduce `--batch_size` (try 2 or 1)
+2. Use CPU with `torch_dtype=torch.float32`
+3. Use smaller base model (Whisper Base instead of Large)
+
+---
+
+### Issue: Poor detection accuracy
+
+**Symptoms:**
+- High false positive rate
+- Misses obvious events
+- Random detections
+
+**Solutions:**
+1. **More data**: Need at least 100-500 samples
+2. **Better labels**: Review user-loop feedback for accuracy
+3. **Retrain**: Train with more epochs
+4. **Adjust trigger token**: Check if token appears in training data
+
+---
+
+## 📚 Dependencies
+
+Install required packages:
+
+```bash
+pip install -r requirements.txt
+```
+
+**Key Dependencies:**
+- `unsloth`: Optimized Whisper model
+- `transformers`: Hugging Face model library
+- `peft`: LoRA adapters
+- `torch`: Deep learning framework
+- `librosa`: Audio processing
+- `datasets`: Training data utilities
+
+**System Requirements:**
+- Python 3.8+
+- CUDA-capable GPU (recommended) or 16GB+ RAM for CPU
+
+---
+
+## 🔗 Integration with Backend
+
+### Current State
+
+**Frontend:**
+```typescript
+// UserLoopModal.tsx
+const checkAnomaly = async () => {
+  // TODO: Replace with actual algorithm
+  const shouldShow = true  // Always shows popup
+}
+```
+
+**Backend:**
+```python
+# user_loop_routes.py
+- ✅ GET /api/user-loop/events (returns anomalies)
+- ✅ POST /api/user-loop/accept (verifies)
+- ✅ POST /api/user-loop/reject (stashes to training)
+- ✅ Anomaly scan job: src/advanced_omi_backend/scripts/run_anomaly_detection.py (sets maybe_anomaly: true)
+```
+
+### Future Integration
+
+To replace the placeholder scan with **model-based anomaly detection**:
+
+1. Train an adapter in this folder (`train.py`) and version it.
+2. Load the adapter in the backend scan job and use inference to decide whether to set `maybe_anomaly: true`.
+3. Ensure the UI only opens the user-loop popup when `/api/user-loop/events` returns events.
+
+---
+
+## 📊 Workflow Summary
+
+| Phase | Component | Command |
+|--------|-----------|----------|
+| **1. Collection** | User-Loop Popup | User swipes left to reject (stash) / right to accept |
+| **2. Storage** | MongoDB | Saves to `training_stash` collection |
+| **3. Export** | export_from_mongo.py | `uv run python export_from_mongo.py --min_samples 10` |
+| **4. Training** | train.py | `uv run python train.py --train_manifest user_loop_feedback.jsonl --output_dir ./sneeze_adapter` |
+| **5. Flagging** | Backend job | `cd .. && uv run python src/advanced_omi_backend/scripts/run_anomaly_detection.py` |
+| **6. Deployment** | Backend | (Future) Use trained adapter inside the scan job |
+
+---
+
+## 🤝 Contributing
+
+To improve event detection:
+
+1. **Collect More Data**: Swipe left on user-loop popup to reject (stash) samples
+2. **Review Labels**: Check training data quality
+3. **Retrain Often**: Update adapter weekly with new data
+4. **A/B Test**: Compare new vs old adapters
+5. **Monitor Metrics**: Track false positive/negative rates
+
+---
+
+**Last Updated**: January 30, 2026
+**Status**: 🟢 Training/Export Workflow (User-Loop → Export → Training) ✅
+**Backend Integration**: 🟡 Partial (flagging job exists; model-based detection pending)
diff --git a/extras/ml-training/event-detection/export_from_mongo.py b/extras/ml-training/event-detection/export_from_mongo.py
new file mode 100755
index 00000000..4f95a590
--- /dev/null
+++ b/extras/ml-training/event-detection/export_from_mongo.py
@@ -0,0 +1,147 @@
+#!/usr/bin/env python3
+"""
+Export MongoDB Training Stash to JSONL for LoRA Training
+
+Bridge script: User-Loop (MongoDB) → Training (JSONL)
+
+Usage:
+    python export_from_mongo.py --output user_loop_feedback.jsonl
+
+Workflow:
+    1. Users swipe on user-loop popup
+    2. Items saved to MongoDB training_stash collection
+    3. Export with this script to JSONL format
+    4. Train LoRA adapter: python train.py --train_manifest user_loop_feedback.jsonl
+"""
+
+import argparse
+import json
+from datetime import datetime
+
+from pymongo import MongoClient
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description="Export training stash from MongoDB to JSONL"
+    )
+    parser.add_argument(
+        "--output",
+        type=str,
+        default="user_loop_feedback.jsonl",
+        help="Output JSONL file",
+    )
+    parser.add_argument(
+        "--mongo_uri",
+        type=str,
+        default="mongodb://localhost:27017",
+        help="MongoDB connection string",
+    )
+    parser.add_argument(
+        "--db_name", type=str, default="chronicle", help="Database name"
+    )
+    parser.add_argument(
+        "--min_samples", type=int, default=0, help="Minimum samples to export"
+    )
+    return parser.parse_args()
+
+
+def export_training_stash(mongo_uri, db_name, output_file, min_samples):
+    """
+    Export MongoDB training_stash collection to JSONL format
+
+    Schema:
+        {
+            "audio": "/path/to/audio.wav",
+            "text": "Transcription with <event> tag",
+            "timestamp": "2024-01-30T10:00:00Z",
+            "type": "positive"
+        }
+    """
+    print(f"🔗 Connecting to MongoDB: {mongo_uri}")
+    print(f"📁 Database: {db_name}")
+
+    client = MongoClient(mongo_uri)
+    db = client[db_name]
+
+    # Fetch all training stash entries
+    entries = list(db.training_stash.find({}))
+
+    if not entries:
+        print("❌ No entries found in training_stash collection")
+        print("💡 Tip: Swipe right on user-loop popup to add samples")
+        return False
+
+    if len(entries) < min_samples:
+        print(f"⚠️  Found {len(entries)} entries (minimum: {min_samples})")
+        return False
+
+    print(f"✅ Found {len(entries)} entries in training_stash")
+
+    # Convert to JSONL format
+    exported = 0
+    with open(output_file, "w") as f:
+        for entry in entries:
+            # Map MongoDB schema to training schema
+            training_sample = {
+                "audio": f"/data/audio/{entry['conversation_id']}.wav",
+                "text": entry["transcript"],
+                "timestamp": entry.get("timestamp", datetime.now().isoformat()),
+                "type": "positive",  # User-loop rejections = positive for training
+            }
+
+            # Write as JSONL (one JSON per line)
+            f.write(json.dumps(training_sample) + "\n")
+            exported += 1
+
+    print(f"💾 Exported {exported} entries to {output_file}")
+
+    # Print statistics
+    print(f"\n📊 Statistics:")
+    print(f"   Total exported: {exported}")
+
+    # Count unique conversations
+    unique_convs = set(e["conversation_id"] for e in entries)
+    print(f"   Unique conversations: {len(unique_convs)}")
+
+    # Check audio data
+    has_audio = sum(
+        1 for e in entries if e.get("audio_chunks") and len(e["audio_chunks"]) > 0
+    )
+    print(f"   Has audio chunks: {has_audio}/{exported}")
+
+    return True
+
+
+def main():
+    args = parse_args()
+
+    print("🚀 MongoDB Training Stash Export")
+    print("=" * 50)
+
+    success = export_training_stash(
+        args.mongo_uri, args.db_name, args.output, args.min_samples
+    )
+
+    print("\n" + "=" * 50)
+
+    if success:
+        print("✅ Export complete!")
+        print("\n🎯 Next Steps:")
+        print("   1. Review exported file:", args.output)
+        print("   2. Train LoRA adapter:")
+        print(f"      python train.py --train_manifest {args.output}")
+        print("   3. Run anomaly scan (MongoDB flagging):")
+        print(
+            "      cd .. && uv run python src/advanced_omi_backend/scripts/run_anomaly_detection.py"
+        )
+    else:
+        print("❌ Export failed")
+        print("\n💡 Suggestions:")
+        print("   - Swipe left on user-loop popup to add samples")
+        print("   - Check MongoDB connection")
+        print("   - Lower --min_samples threshold")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/extras/ml-training/event-detection/manage_data.py b/extras/ml-training/event-detection/manage_data.py
new file mode 100644
index 00000000..9d596d1e
--- /dev/null
+++ b/extras/ml-training/event-detection/manage_data.py
@@ -0,0 +1,167 @@
+import argparse
+import json
+import os
+import shutil
+from datetime import datetime
+
+import librosa
+import torch
+from transformers import WhisperForConditionalGeneration, WhisperProcessor
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(description="Manage Event Detection Dataset")
+    subparsers = parser.add_subparsers(dest="command", required=True)
+
+    # Bootstrap command
+    cmd_bootstrap = subparsers.add_parser(
+        "bootstrap", help="Create initial manifest from folder"
+    )
+    cmd_bootstrap.add_argument(
+        "--audio_dir", required=True, help="Directory containing positive audio samples"
+    )
+    cmd_bootstrap.add_argument(
+        "--output_manifest", default="train.jsonl", help="Output jsonl file"
+    )
+    cmd_bootstrap.add_argument(
+        "--source_tag", default="<event>", help="Tag to use for event"
+    )
+
+    # Feedback command
+    cmd_feedback = subparsers.add_parser(
+        "feedback", help="Add feedback (positive/negative)"
+    )
+    cmd_feedback.add_argument("--audio_path", required=True, help="Path to audio file")
+    cmd_feedback.add_argument(
+        "--is_positive",
+        action="store_true",
+        help="Flag if sample is positive instance of event",
+    )
+    cmd_feedback.add_argument(
+        "--manifest", default="train.jsonl", help="Manifest file to update"
+    )
+    cmd_feedback.add_argument(
+        "--source_tag", default="<event>", help="Tag to use if positive"
+    )
+    cmd_feedback.add_argument(
+        "--base_model",
+        default="unsloth/whisper-large-v3",
+        help="Base model for transcription",
+    )
+
+    return parser.parse_args()
+
+
+def transcribe_audio(audio_path, model_id):
+    """Transcribe audio using base model to get ground truth text"""
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    print(f"Transcribing with {model_id} on {device}...")
+
+    processor = WhisperProcessor.from_pretrained(model_id)
+    model = WhisperForConditionalGeneration.from_pretrained(
+        model_id, torch_dtype=torch.float16 if device == "cuda" else torch.float32
+    )
+    model.to(device)
+    model.eval()
+
+    audio, _ = librosa.load(audio_path, sr=16000)
+    inputs = processor(audio, sampling_rate=16000, return_tensors="pt")
+    input_features = inputs.input_features.to(device)
+
+    if device == "cuda":
+        input_features = input_features.half()
+
+    with torch.no_grad():
+        generated_ids = model.generate(input_features)
+
+    text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
+    return text
+
+
+def bootstrap(audio_dir, output_manifest, source_tag):
+    if not os.path.exists(audio_dir):
+        raise FileNotFoundError(f"{audio_dir} does not exist")
+
+    entries = []
+    files = [
+        f for f in os.listdir(audio_dir) if f.lower().endswith((".wav", ".mp3", ".m4a"))
+    ]
+
+    if not files:
+        print("No audio files found in directory.")
+        return
+
+    print(f"Found {len(files)} files. Adding to {output_manifest}...")
+
+    # Check if we should append or overwrite? Spec implies bootstrapping starts the process.
+    # I'll append if exists, or create new.
+    mode = "a" if os.path.exists(output_manifest) else "w"
+
+    with open(output_manifest, mode) as f:
+        for filename in files:
+            file_path = os.path.join(audio_dir, filename)
+            # For bootstrapping, we assume these are just the event itself,
+            # so text is just the tag.
+            # Realistically, we might want to transcribe it, but let's keep it simple for now
+            # as these might be non-speech sounds (sneezes).
+            entry = {"audio": file_path, "text": source_tag}
+            f.write(json.dumps(entry) + "\n")
+            print(f"Added {filename}")
+
+
+def add_feedback(audio_path, is_positive, manifest_path, source_tag, base_model):
+    if not os.path.exists(audio_path):
+        raise FileNotFoundError(f"{audio_path} not found")
+
+    # Get ground truth text
+    # If positive, we want "Transcription <event>" or just "<event>" if it's non-speech?
+    # If negative, we want "Transcription" (without tag).
+
+    base_text = transcribe_audio(audio_path, base_model)
+    print(f"Base transcription: {base_text}")
+
+    if is_positive:
+        # If it's a positive sample, we ensure the tag is present.
+        # If the model didn't transcribe it (likely), we append it.
+        # If it's a pronunciation correction, the user might want to replace a specific word...
+        # But for "arbitrary event detection" usually implies adding a marker.
+        # Simple heuristic: Append tag to text.
+        final_text = f"{base_text} {source_tag}".strip()
+    else:
+        # Negative sample: The text is just what the base model hears (without the tag)
+        final_text = base_text.replace(
+            source_tag, ""
+        )  # Ensure tag isn't there by accident
+
+    entry = {
+        "audio": audio_path,
+        "text": final_text,
+        "timestamp": datetime.now().isoformat(),
+        "type": "positive" if is_positive else "negative",
+    }
+
+    with open(manifest_path, "a") as f:
+        f.write(json.dumps(entry) + "\n")
+
+    print(
+        f"Added {'positive' if is_positive else 'negative'} feedback for {audio_path}"
+    )
+
+
+def main():
+    args = parse_args()
+
+    if args.command == "bootstrap":
+        bootstrap(args.audio_dir, args.output_manifest, args.source_tag)
+    elif args.command == "feedback":
+        add_feedback(
+            args.audio_path,
+            args.is_positive,
+            args.manifest,
+            args.source_tag,
+            args.base_model,
+        )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/extras/ml-training/event-detection/requirements.txt b/extras/ml-training/event-detection/requirements.txt
new file mode 100644
index 00000000..748abf19
--- /dev/null
+++ b/extras/ml-training/event-detection/requirements.txt
@@ -0,0 +1,13 @@
+unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git
+transformers
+torch
+librosa
+numpy
+datasets
+evaluate
+jiwer
+tqdm
+soundfile
+accelerate
+bitsandbytes
+peft
diff --git a/extras/ml-training/event-detection/train.py b/extras/ml-training/event-detection/train.py
new file mode 100644
index 00000000..63d1490e
--- /dev/null
+++ b/extras/ml-training/event-detection/train.py
@@ -0,0 +1,258 @@
+#!/usr/bin/env python3
+# Event Detection Training - Fine-tune Whisper with LoRA
+# ⚠️  NOT INTEGRATED WITH MAIN BACKEND - Use directly from CLI
+
+
+import argparse
+import json
+import os
+from dataclasses import dataclass
+from typing import Any, Dict, List, Union
+
+import evaluate
+import librosa
+import numpy as np
+import torch
+import tqdm
+from datasets import Dataset
+from transformers import (
+    Seq2SeqTrainer,
+    Seq2SeqTrainingArguments,
+    WhisperForConditionalGeneration,
+)
+from unsloth import FastModel, is_bf16_supported
+
+# Global Constants
+DEFAULT_MODEL_ID = "unsloth/whisper-large-v3"
+DEFAULT_OUTPUT_DIR = "event_lora_adapter_unsloth"
+DEFAULT_TRAIN_MANIFEST = "train.jsonl"
+DEFAULT_TARGET_TOKEN = "EVENT_DETECTED"
+DEFAULT_SOURCE_TAG = "<event>"
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description="Train Whisper LoRA for Event Detection"
+    )
+    parser.add_argument(
+        "--train_manifest",
+        type=str,
+        default=DEFAULT_TRAIN_MANIFEST,
+        help="Path to training data jsonl",
+    )
+    parser.add_argument(
+        "--output_dir",
+        type=str,
+        default=DEFAULT_OUTPUT_DIR,
+        help="Output directory for adapter",
+    )
+    parser.add_argument(
+        "--base_model", type=str, default=DEFAULT_MODEL_ID, help="Base Whisper model ID"
+    )
+    parser.add_argument(
+        "--target_token",
+        type=str,
+        default=DEFAULT_TARGET_TOKEN,
+        help="Token to emit for event",
+    )
+    parser.add_argument(
+        "--source_tag",
+        type=str,
+        default=DEFAULT_SOURCE_TAG,
+        help="Tag in text to replace",
+    )
+    return parser.parse_args()
+
+
+def prepare_dataset(manifest_path: str, source_tag: str, target_token: str) -> Dataset:
+    """Load and normalize training data from jsonl"""
+    audio_paths = []
+    texts = []
+
+    if not os.path.exists(manifest_path):
+        raise FileNotFoundError(f"{manifest_path} not found.")
+
+    with open(manifest_path, "r") as f:
+        for line in f:
+            entry = json.loads(line)
+            # Normalize text
+            text = entry["text"].replace(source_tag, target_token)
+
+            audio_paths.append(entry["audio"])
+            texts.append(text)
+
+            # Oversampling for target keyword (x5 as per spec)
+            if target_token in text:
+                for _ in range(5):
+                    audio_paths.append(entry["audio"])
+                    texts.append(text)
+
+    print(f"Loaded {len(audio_paths)} training samples.")
+
+    data = {"audio_path": audio_paths, "text": texts}
+
+    return Dataset.from_dict(data)
+
+
+@dataclass
+class DataCollatorSpeechSeq2SeqWithPadding:
+    processor: Any
+
+    def __call__(
+        self, features: List[Dict[str, Union[List[int], torch.Tensor]]]
+    ) -> Dict[str, torch.Tensor]:
+        input_features = [
+            {"input_features": feature["input_features"]} for feature in features
+        ]
+        batch = self.processor.feature_extractor.pad(
+            input_features, return_tensors="pt"
+        )
+
+        label_features = [{"input_ids": feature["labels"]} for feature in features]
+        labels_batch = self.processor.tokenizer.pad(label_features, return_tensors="pt")
+
+        labels = labels_batch["input_ids"].masked_fill(
+            labels_batch.attention_mask.ne(1), -100
+        )
+
+        if (labels[:, 0] == self.processor.tokenizer.bos_token_id).all().cpu().item():
+            labels = labels[:, 1:]
+
+        batch["labels"] = labels
+
+        return batch
+
+
+def main():
+    args = parse_args()
+
+    print(f"Loading model with Unsloth: {args.base_model}")
+
+    # Load model using Unsloth's FastModel
+    model, tokenizer = FastModel.from_pretrained(
+        model_name=args.base_model,
+        dtype=None,  # Auto detection
+        load_in_4bit=False,
+        auto_model=WhisperForConditionalGeneration,
+        whisper_language="English",
+        whisper_task="transcribe",
+    )
+
+    # Apply LoRA adapters
+    model = FastModel.get_peft_model(
+        model,
+        r=64,
+        target_modules=["q_proj", "v_proj"],
+        lora_alpha=64,
+        lora_dropout=0,
+        bias="none",
+        use_gradient_checkpointing="unsloth",
+        random_state=3407,
+        use_rslora=False,
+        loftq_config=None,
+        task_type=None,
+    )
+
+    # Configure generation settings
+    model.generation_config.language = "<|en|>"
+    model.generation_config.task = "transcribe"
+    model.config.suppress_tokens = []
+    model.generation_config.forced_decoder_ids = None
+
+    # Load dataset
+    dataset = prepare_dataset(args.train_manifest, args.source_tag, args.target_token)
+
+    def formatting_prompts_func(example):
+        """Process audio and text for training"""
+        # Load audio file
+        audio_array, sr = librosa.load(example["audio_path"], sr=16000)
+
+        # Extract features
+        features = tokenizer.feature_extractor(audio_array, sampling_rate=16000)
+
+        # Tokenize text
+        tokenized_text = tokenizer.tokenizer(example["text"])
+
+        return {
+            "input_features": features.input_features[0],
+            "labels": tokenized_text.input_ids,
+        }
+
+    print("Processing dataset...")
+    train_data = []
+    for example in tqdm.tqdm(dataset, desc="Processing audio"):
+        # Errors will bubble up as per spec
+        result = formatting_prompts_func(example)
+        train_data.append(result)
+
+    print(f"Successfully processed {len(train_data)} samples")
+
+    # Split into train/test
+    split_idx = max(1, int(len(train_data) * 0.94))
+    train_dataset = train_data[:split_idx]
+    test_dataset = train_data[split_idx:]
+
+    print(f"Train samples: {len(train_dataset)}, Test samples: {len(test_dataset)}")
+
+    metric = evaluate.load("wer")
+
+    def compute_metrics(pred):
+        pred_logits = pred.predictions[0]
+        label_ids = pred.label_ids
+
+        label_ids[label_ids == -100] = tokenizer.pad_token_id
+
+        pred_ids = np.argmax(pred_logits, axis=-1)
+
+        pred_str = tokenizer.batch_decode(pred_ids, skip_special_tokens=True)
+        label_str = tokenizer.batch_decode(label_ids, skip_special_tokens=True)
+
+        wer = 100 * metric.compute(predictions=pred_str, references=label_str)
+
+        return {"wer": wer}
+
+    data_collator = DataCollatorSpeechSeq2SeqWithPadding(processor=tokenizer)
+
+    # Setup trainer
+    trainer = Seq2SeqTrainer(
+        model=model,
+        train_dataset=train_dataset,
+        data_collator=data_collator,
+        eval_dataset=test_dataset if len(test_dataset) > 0 else None,
+        tokenizer=tokenizer.feature_extractor,
+        compute_metrics=compute_metrics,
+        args=Seq2SeqTrainingArguments(
+            per_device_train_batch_size=1,
+            gradient_accumulation_steps=4,
+            warmup_steps=5,
+            max_steps=200,  # Can be exposed as arg if needed, keeping simple for now
+            learning_rate=1e-4,
+            logging_steps=10,
+            optim="adamw_8bit",
+            fp16=not is_bf16_supported(),
+            bf16=is_bf16_supported(),
+            weight_decay=0.001,
+            remove_unused_columns=False,
+            lr_scheduler_type="linear",
+            label_names=["labels"],
+            eval_steps=20,
+            eval_strategy="steps" if len(test_dataset) > 0 else "no",
+            seed=3407,
+            output_dir=args.output_dir,
+            report_to="none",
+        ),
+    )
+
+    print("Starting training...")
+    trainer.train()
+
+    # Save the model
+    print(f"Saving adapter to {args.output_dir}")
+    model.save_pretrained(args.output_dir)
+    tokenizer.save_pretrained(args.output_dir)
+
+    print("Training complete!")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/extras/ml-training/whisper-adapter-finetuning/README.md b/extras/ml-training/whisper-adapter-finetuning/README.md
new file mode 100644
index 00000000..bf66b919
--- /dev/null
+++ b/extras/ml-training/whisper-adapter-finetuning/README.md
@@ -0,0 +1,202 @@
+# Whisper Sneeze Adapter Training
+
+This project fine-tunes OpenAI's Whisper model to transcribe sneezes in audio/video content using LoRA adapters. The model learns to recognize and transcribe sneezes as the token "SNEEZE" in transcriptions.
+
+## Prerequisites
+
+- Python 3.10+
+- CUDA-capable GPU (recommended for training)
+- Access to Google Gemini API (for generating transcripts)
+
+## Installation
+
+1. Create a virtual environment:
+```bash
+python -m venv .venv
+source .venv/bin/activate  # On Windows: .venv\Scripts\activate
+```
+
+2. Install dependencies:
+```bash
+pip install torch torchaudio
+pip install transformers datasets evaluate
+pip install unsloth[colab-new]
+pip install librosa soundfile jiwer
+pip install tqdm
+```
+
+## Workflow
+
+### Step 1: Prepare Your Video
+
+1. Record or obtain a video file containing sneezes (e.g., `girls_sneezing.mp4` download with
+   ```
+   yt-dlp -f "bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best" --merge-output-format mp4 -o "girls_sneezing.mp4" https://youtu.be/36b4248j5UE
+   ```
+
+### Step 2: Generate Transcript with Gemini
+
+1. Upload your video to Google Gemini (or use Gemini API)
+2. Request a transcript with sneezes marked using the format: `<sneeze>`
+3. Generate a JSONL file named `sneeze_data.jsonl` with the following format:
+
+```jsonl
+{"start": 0.0, "end": 5.0, "text": "Ugh, I really need to sneeze. Stuck? Yeah, it's right there."}
+{"start": 5.0, "end": 11.0, "text": "Close one. <sneeze> Bless you. Thanks."}
+{"start": 12.0, "end": 17.0, "text": "Ugh, I can feel it. I really need to sneeze so bad. Go on, let it out."}
+```
+
+**Format requirements:**
+- Each line is a JSON object
+- `start`: Start time in seconds (float)
+- `end`: End time in seconds (float)
+- `text`: Transcription text with sneezes marked as `<sneeze>`
+
+**Example Gemini prompt:**
+```
+Please transcribe this video and create a JSONL file where each line contains:
+- start: start time in seconds
+- end: end time in seconds
+- text: the transcription with sneezes marked as <sneeze>
+
+Format as JSONL (one JSON object per line).
+```
+
+### Step 3: Prepare Training Data
+
+Run the data preparation script to extract audio chunks and create train/test splits:
+
+```bash
+python prepare_sneeze_data.py
+```
+
+This script will:
+- Extract audio from your video file (`girls_sneezing.mp4`)
+- Create audio chunks from the segments in `sneeze_data.jsonl`
+- Save chunks to `sneeze_chunks/` directory
+- Split data into `train.jsonl` (60%) and `test.jsonl` (40%)
+
+**Requirements:**
+- `sneeze_data.jsonl` must exist in the project root
+- Video file must be named `girls_sneezing.mp4`
+
+### Step 4: Train the Model
+
+Train the Whisper model with LoRA adapters:
+
+```bash
+python train_sneeze.py
+```
+
+This will:
+- Load the base Whisper Large v3 model
+- Apply LoRA adapters (only trains 1-10% of parameters)
+- Fine-tune on your sneeze data
+- Save the adapter to `sneeze_lora_adapter_unsloth/`
+
+**Training configuration:**
+- Model: `unsloth/whisper-large-v3`
+- LoRA rank: 64
+- Batch size: 1 (with gradient accumulation: 4)
+- Max steps: 200
+- Learning rate: 1e-4
+
+**Note:** Training requires a GPU with sufficient VRAM. Adjust `load_in_4bit=True` in the script if you have limited memory.
+
+### Step 5: Evaluate the Model
+
+Evaluate the trained model on the test set:
+
+```bash
+python evaluate_sneeze_model.py
+```
+
+This will:
+- Load the base model and merge the LoRA adapter
+- Run inference on test samples
+- Calculate Word Error Rate (WER)
+- Report sneeze detection recall and false positives
+
+## Results
+
+### Training Results
+
+Training was performed on a Tesla T4 GPU with the following configuration:
+- **Model**: `unsloth/whisper-large-v3`
+- **Trainable Parameters**: 31,457,280 of 1,574,947,840 (2.00%)
+- **Training Time**: 12.04 minutes
+- **Peak Memory Usage**: 8.896 GB (60.35% of max memory)
+- **Training Samples**: 49 samples
+- **Test Samples**: 4 samples
+
+**Training Loss Progression:**
+| Step | Training Loss | Validation Loss | WER |
+|------|---------------|-----------------|-----|
+| 20   | 1.646100      | 1.869532        | 50.0% |
+| 40   | 0.832500      | 1.004385        | 30.0% |
+| 60   | 0.304600      | 0.354044        | 30.0% |
+| 80   | 0.067700      | 0.051606        | 0.0% |
+| 100  | 0.017600      | 0.162433        | 10.0% |
+| 120  | 0.003400      | 0.006127        | 0.0% |
+| 140  | 0.002000      | 0.004151        | 0.0% |
+| 160  | 0.001400      | 0.003399        | 0.0% |
+| 180  | 0.001300      | 0.003005        | 0.0% |
+| 200  | 0.001000      | 0.002856        | 0.0% |
+
+**Final Metrics:**
+- Final Training Loss: 0.001000
+- Final Validation Loss: 0.002856
+- Final Validation WER: 0.0%
+
+### Evaluation Results
+
+Evaluation was performed on 10 test samples (4 containing sneezes):
+
+**Overall Performance:**
+- **Word Error Rate (WER)**: 0.3217 (32.17%)
+- **Sneeze Recall**: 2/4 (50.0%)
+- **False Positives**: 0
+
+**Missed Sneezes:**
+1. Reference: "Take your time, it'll come. SNEEZE Oh wow. Excuse me."
+   Prediction: "Take your time. It'll come. Oh, wow."
+
+2. Reference: "It's right there but... False alarm? No, it's stuck. SNEEZE Bless you."
+   Prediction: "It's right there, but... False alarm? No! It stopped..."
+
+**Analysis:**
+- The model achieved perfect WER (0.0%) on the validation set during training, indicating good generalization on the training distribution.
+- On the test set, the model achieved 50% sneeze recall, successfully detecting 2 out of 4 sneezes.
+- No false positives were detected, showing the model is conservative in its sneeze predictions.
+- The 32.17% WER on the test set suggests room for improvement, particularly in detecting sneezes in more varied contexts.
+
+## Project Structure
+
+```
+whisper-adapter-test/
+├── prepare_sneeze_data.py      # Data preparation script
+├── improved_sneeze_trainer.py   # Training script
+├── evaluate_sneeze_model.py     # Evaluation script
+├── sneeze_data.jsonl            # Input transcript with sneezes
+├── train.jsonl                  # Training manifest
+├── test.jsonl                   # Test manifest
+├── sneeze_chunks/               # Extracted audio chunks
+└── sneeze_lora_adapter_unsloth/ # Trained adapter (created after training)
+```
+
+## Output Files
+
+- `train.jsonl`: Training dataset manifest
+- `test.jsonl`: Test dataset manifest
+- `sneeze_chunks/`: Directory with extracted audio chunks
+- `sneeze_lora_adapter_unsloth/`: Trained LoRA adapter weights
+
+## Notes
+
+- The model replaces `<sneeze>` tags with `SNEEZE` during training
+- LoRA adapters are memory-efficient and only update a small portion of model weights
+- The evaluation script merges the adapter into the base model for inference
+
+## Conclusion
+
+Despite training on only 13 examples and evaluating on 10 test samples, the model achieved significant progress in sneeze detection. With just this small dataset, we were able to fine-tune the Whisper model to recognize and transcribe sneezes with 50% recall and zero false positives. This demonstrates the effectiveness of LoRA adapters for efficient fine-tuning on specialized tasks with limited data.
diff --git a/extras/ml-training/whisper-adapter-finetuning/evaluate_sneeze_model.py b/extras/ml-training/whisper-adapter-finetuning/evaluate_sneeze_model.py
new file mode 100644
index 00000000..4dcceaf2
--- /dev/null
+++ b/extras/ml-training/whisper-adapter-finetuning/evaluate_sneeze_model.py
@@ -0,0 +1,123 @@
+import json
+import os
+
+import jiwer
+import librosa
+import torch
+from peft import PeftModel
+from tqdm import tqdm
+from transformers import WhisperForConditionalGeneration, WhisperProcessor
+
+# --- CONFIGURATION (MUST MATCH YOUR TRAINING) ---
+BASE_MODEL_ID = "openai/whisper-large-v3"
+ADAPTER_PATH = "sneeze_lora_adapter_unsloth"  # The folder Unsloth created
+TEST_MANIFEST = "test.jsonl"
+
+
+def main():
+    # 1. Setup Device
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    print(f"Using device: {device}")
+
+    # 2. Load Base Model (Large v3)
+    print(f"Loading base model: {BASE_MODEL_ID}")
+    processor = WhisperProcessor.from_pretrained(BASE_MODEL_ID)
+    model = WhisperForConditionalGeneration.from_pretrained(
+        BASE_MODEL_ID, torch_dtype=torch.float16 if device == "cuda" else torch.float32
+    )
+
+    # 3. Load and MERGE Adapter
+    if os.path.exists(ADAPTER_PATH):
+        print(f"Loading LoRA adapter from: {ADAPTER_PATH}")
+        model = PeftModel.from_pretrained(model, ADAPTER_PATH)
+        print("Merging LoRA weights...")
+        model = model.merge_and_unload()
+    else:
+        print(f"❌ ERROR: Adapter {ADAPTER_PATH} not found!")
+        return
+
+    model.to(device)
+    model.eval()
+
+    # 4. Run Evaluation
+    evaluate_dataset(model, processor, device, TEST_MANIFEST)
+
+
+def evaluate_dataset(model, processor, device, manifest_path):
+    if not os.path.exists(manifest_path):
+        print(f"Manifest {manifest_path} not found.")
+        return
+
+    samples = []
+    with open(manifest_path, "r") as f:
+        for line in f:
+            samples.append(json.loads(line))
+
+    print(f"Testing on {len(samples)} samples...")
+
+    predictions = []
+    references = []
+    sneeze_stats = {"total": 0, "detected": 0, "fp": 0}
+
+    for sample in tqdm(samples):
+        path = sample["audio"]
+        ref_text = sample["text"].replace("<sneeze>", "SNEEZE")
+
+        try:
+            audio, _ = librosa.load(path, sr=16000)
+        except:
+            continue
+
+        # Process audio
+        inputs = processor(audio, sampling_rate=16000, return_tensors="pt")
+        input_features = inputs.input_features.to(device)
+
+        # Handle the dtype for half precision (if on GPU)
+        if device == "cuda":
+            input_features = input_features.half()
+
+        # Generate
+        with torch.no_grad():
+            generated_ids = model.generate(
+                input_features=input_features,  # Use input_features, not inputs
+                language="en",
+                task="transcribe",
+                max_new_tokens=256,
+            )
+
+        pred = processor.batch_decode(generated_ids, skip_special_tokens=True)[
+            0
+        ].strip()
+
+        predictions.append(pred)
+        references.append(ref_text)
+
+        # Stats
+        has_sneeze_ref = "SNEEZE" in ref_text
+        has_sneeze_pred = "SNEEZE" in pred
+
+        if has_sneeze_ref:
+            sneeze_stats["total"] += 1
+            if has_sneeze_pred:
+                sneeze_stats["detected"] += 1
+            else:
+                print(f"\n❌ MISSED SNEEZE\nRef: {ref_text}\nPrd: {pred}")
+        elif has_sneeze_pred:
+            sneeze_stats["fp"] += 1
+            print(f"\n⚠️ FALSE POSITIVE\nRef: {ref_text}\nPrd: {pred}")
+
+    # Results
+    wer = jiwer.wer(references, predictions)
+    print("\n" + "=" * 40)
+    print(f"Word Error Rate: {wer:.4f}")
+    if sneeze_stats["total"] > 0:
+        recall = (sneeze_stats["detected"] / sneeze_stats["total"]) * 100
+        print(
+            f"Sneeze Recall: {sneeze_stats['detected']}/{sneeze_stats['total']} ({recall:.1f}%)"
+        )
+    print(f"False Positives: {sneeze_stats['fp']}")
+    print("=" * 40)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/extras/ml-training/whisper-adapter-finetuning/prepare_sneeze_data.py b/extras/ml-training/whisper-adapter-finetuning/prepare_sneeze_data.py
new file mode 100644
index 00000000..69dcbda9
--- /dev/null
+++ b/extras/ml-training/whisper-adapter-finetuning/prepare_sneeze_data.py
@@ -0,0 +1,80 @@
+import json
+import os
+import random
+
+import librosa
+import numpy as np
+import soundfile as sf
+
+
+def prepare_data():
+    jsonl_path = "sneeze_data.jsonl"
+    video_path = "girls_sneezing.mp4"
+    output_dir = "sneeze_chunks"
+
+    if not os.path.exists(output_dir):
+        os.makedirs(output_dir)
+
+    # Load full audio
+    print(f"Loading {video_path}...")
+    try:
+        y, sr = librosa.load(video_path, sr=16000)
+    except Exception as e:
+        print(f"Error loading video: {e}")
+        return
+
+    segments = []
+    with open(jsonl_path, "r") as f:
+        for line in f:
+            if line.strip():
+                segments.append(json.loads(line))
+
+    print(f"Found {len(segments)} segments.")
+
+    dataset_entries = []
+
+    for i, seg in enumerate(segments):
+        start_time = seg["start"]
+        end_time = seg["end"]
+        text = seg["text"]
+
+        # Calculate sample indices
+        start_sample = int(start_time * sr)
+        end_sample = int(end_time * sr)
+
+        # Extract audio
+        chunk = y[start_sample:end_sample]
+
+        # Save to file
+        chunk_filename = f"chunk_{i:03d}.wav"
+        chunk_path = os.path.join(output_dir, chunk_filename)
+        sf.write(chunk_path, chunk, sr)
+
+        dataset_entries.append({"audio": chunk_path, "text": text})
+        print(f"Saved {chunk_filename}: {text[:30]}...")
+
+    # Shuffle and Split
+    random.seed(42)
+    random.shuffle(dataset_entries)
+
+    split_idx = int(len(dataset_entries) * 0.6)
+    train_data = dataset_entries[:split_idx]
+    test_data = dataset_entries[split_idx:]
+
+    print(f"Training samples: {len(train_data)}")
+    print(f"Testing samples: {len(test_data)}")
+
+    # Save split manifests
+    with open("train.jsonl", "w") as f:
+        for entry in train_data:
+            f.write(json.dumps(entry) + "\n")
+
+    with open("test.jsonl", "w") as f:
+        for entry in test_data:
+            f.write(json.dumps(entry) + "\n")
+
+    print("Data preparation complete.")
+
+
+if __name__ == "__main__":
+    prepare_data()
diff --git a/extras/ml-training/whisper-adapter-finetuning/sneeze_data.jsonl b/extras/ml-training/whisper-adapter-finetuning/sneeze_data.jsonl
new file mode 100644
index 00000000..418ee305
--- /dev/null
+++ b/extras/ml-training/whisper-adapter-finetuning/sneeze_data.jsonl
@@ -0,0 +1,23 @@
+{"start": 0.0, "end": 5.0, "text": "Ugh, I really need to sneeze. Stuck? Yeah, it's right there."}
+{"start": 5.0, "end": 11.0, "text": "Close one. <sneeze> Bless you. Thanks."}
+{"start": 12.0, "end": 17.0, "text": "Ugh, I can feel it. I really need to sneeze so bad. Go on, let it out."}
+{"start": 17.0, "end": 23.0, "text": "It's right there but... False alarm? No, it's stuck. <sneeze> Bless you."}
+{"start": 24.0, "end": 29.0, "text": "Ugh, my nose. I... I really need to sneeze so bad. Do it then."}
+{"start": 29.0, "end": 36.0, "text": "No, it's not coming. That's the worst. Stuck. <sneeze>"}
+{"start": 36.0, "end": 42.0, "text": "Ugh, my nose, I want to sneeze so bad. You okay? Is it stuck?"}
+{"start": 42.0, "end": 48.0, "text": "Nope, nothing. Teasing you. <sneeze>"}
+{"start": 48.0, "end": 54.0, "text": "Ugh, my nose. I need to sneeze so bad. Go on, let it out."}
+{"start": 54.0, "end": 60.0, "text": "Oh, it's stuck. It's teasing you. <sneeze> Bless you."}
+{"start": 60.0, "end": 66.0, "text": "Ugh, I really... I really need to sneeze so bad. Go on, just let it out."}
+{"start": 66.0, "end": 72.0, "text": "Ugh, it's stuck. Oh come on. <sneeze>"}
+{"start": 72.0, "end": 78.0, "text": "Ugh, I really need to sneeze so bad. Go on, let it out. It's just stuck."}
+{"start": 78.0, "end": 84.0, "text": "I can feel it right there. <sneeze> Oh finally."}
+{"start": 84.0, "end": 90.0, "text": "Ugh, my nose is so itchy. I need to sneeze so badly. Do it. Let it out."}
+{"start": 90.0, "end": 96.0, "text": "No. It's... it's stuck. Almost? <sneeze>"}
+{"start": 96.0, "end": 102.0, "text": "Ugh, I want to sneeze so bad. It's right there, just not coming out."}
+{"start": 102.0, "end": 108.0, "text": "No. Still stuck! <sneeze>"}
+{"start": 108.0, "end": 114.0, "text": "Ugh, I really... I really need to sneeze so bad. Here it comes?"}
+{"start": 114.0, "end": 120.0, "text": "Nope, it's uh, stuck. Why won't it come out? <sneeze>"}
+{"start": 120.0, "end": 126.0, "text": "Ugh, I swear I need to sneeze so badly. Ugh, nope, still there."}
+{"start": 126.0, "end": 131.0, "text": "Take your time, it'll come. <sneeze> Oh wow. Excuse me."}
+{"start": 132.0, "end": 140.0, "text": "Don't forget to check out Patreon dot com slash AI sneeze for exclusive sneezing content and early access. Try it for free! <sneeze> <sneeze>"}
diff --git a/extras/ml-training/whisper-adapter-finetuning/train_sneeze.py b/extras/ml-training/whisper-adapter-finetuning/train_sneeze.py
new file mode 100644
index 00000000..1775582d
--- /dev/null
+++ b/extras/ml-training/whisper-adapter-finetuning/train_sneeze.py
@@ -0,0 +1,295 @@
+import json
+import os
+from dataclasses import dataclass
+from typing import Any, Dict, List, Union
+
+import evaluate
+import librosa
+import numpy as np
+import torch
+import tqdm
+from datasets import Dataset
+from transformers import (
+    Seq2SeqTrainer,
+    Seq2SeqTrainingArguments,
+    WhisperForConditionalGeneration,
+)
+from unsloth import FastModel, is_bf16_supported
+
+# Configuration
+MODEL_ID = "unsloth/whisper-large-v3"
+OUTPUT_DIR = "sneeze_lora_adapter_unsloth"
+TRAIN_MANIFEST = "train.jsonl"
+
+
+def prepare_dataset():
+    """Load training data from jsonl"""
+    audio_paths = []
+    texts = []
+
+    # Check if file exists
+    if not os.path.exists(TRAIN_MANIFEST):
+        raise FileNotFoundError(f"{TRAIN_MANIFEST} not found.")
+
+    with open(TRAIN_MANIFEST, "r") as f:
+        for line in f:
+            try:
+                entry = json.loads(line)
+                # Normalize text
+                text = entry["text"].replace("<sneeze>", "SNEEZE")
+
+                audio_paths.append(entry["audio"])
+                texts.append(text)
+
+                # Simple oversampling for target keyword
+                if "SNEEZE" in text:
+                    for _ in range(5):
+                        audio_paths.append(entry["audio"])
+                        texts.append(text)
+            except Exception as e:
+                print(f"Skipping bad line: {e}")
+
+    print(f"Loaded {len(audio_paths)} training samples.")
+
+    data = {"audio_path": audio_paths, "text": texts}
+
+    return Dataset.from_dict(data)
+
+
+def main():
+    print(f"Loading model with Unsloth: {MODEL_ID}")
+
+    # Load model using Unsloth's FastModel
+    model, tokenizer = FastModel.from_pretrained(
+        model_name=MODEL_ID,
+        dtype=None,  # Auto detection
+        load_in_4bit=False,  # Set to True for 4bit quantization (lower memory)
+        auto_model=WhisperForConditionalGeneration,
+        whisper_language="English",
+        whisper_task="transcribe",
+        # token = "hf_...",  # Use if needed for gated models
+    )
+
+    # Apply LoRA adapters using Unsloth (only updates 1-10% of parameters)
+    model = FastModel.get_peft_model(
+        model,
+        r=64,  # Suggested: 8, 16, 32, 64, 128
+        target_modules=["q_proj", "v_proj"],
+        lora_alpha=64,
+        lora_dropout=0,  # 0 is optimized
+        bias="none",  # "none" is optimized
+        use_gradient_checkpointing="unsloth",  # 30% less VRAM, fits 2x larger batch sizes
+        random_state=3407,
+        use_rslora=False,
+        loftq_config=None,
+        task_type=None,  # MUST be None for Whisper
+    )
+
+    # Configure generation settings
+    model.generation_config.language = "<|en|>"
+    model.generation_config.task = "transcribe"
+    model.config.suppress_tokens = []
+    model.generation_config.forced_decoder_ids = None
+
+    # Load dataset
+    dataset = prepare_dataset()
+
+    def formatting_prompts_func(example):
+        """Process audio and text for training"""
+        try:
+            # Load audio file
+            audio_array, sr = librosa.load(example["audio_path"], sr=16000)
+        except Exception as e:
+            print(f"Error loading {example['audio_path']}: {e}")
+            return None
+
+        # Extract features using tokenizer's feature extractor
+        features = tokenizer.feature_extractor(audio_array, sampling_rate=16000)
+
+        # Tokenize text
+        tokenized_text = tokenizer.tokenizer(example["text"])
+
+        return {
+            "input_features": features.input_features[0],
+            "labels": tokenized_text.input_ids,
+        }
+
+    print("Processing dataset...")
+    train_data = []
+    for example in tqdm.tqdm(dataset, desc="Processing audio"):
+        result = formatting_prompts_func(example)
+        if result is not None:
+            train_data.append(result)
+
+    print(f"Successfully processed {len(train_data)} samples")
+
+    # Split into train/test
+    split_idx = max(1, int(len(train_data) * 0.94))
+    train_dataset = train_data[:split_idx]
+    test_dataset = train_data[split_idx:]
+
+    print(f"Train samples: {len(train_dataset)}, Test samples: {len(test_dataset)}")
+
+    # Setup WER metric for evaluation
+    metric = evaluate.load("wer")
+
+    def compute_metrics(pred):
+        pred_logits = pred.predictions[0]
+        label_ids = pred.label_ids
+
+        # Replace -100 with pad_token_id
+        label_ids[label_ids == -100] = tokenizer.pad_token_id
+
+        pred_ids = np.argmax(pred_logits, axis=-1)
+
+        pred_str = tokenizer.batch_decode(pred_ids, skip_special_tokens=True)
+        label_str = tokenizer.batch_decode(label_ids, skip_special_tokens=True)
+
+        wer = 100 * metric.compute(predictions=pred_str, references=label_str)
+
+        return {"wer": wer}
+
+    @dataclass
+    class DataCollatorSpeechSeq2SeqWithPadding:
+        processor: Any
+
+        def __call__(
+            self, features: List[Dict[str, Union[List[int], torch.Tensor]]]
+        ) -> Dict[str, torch.Tensor]:
+            input_features = [
+                {"input_features": feature["input_features"]} for feature in features
+            ]
+            batch = self.processor.feature_extractor.pad(
+                input_features, return_tensors="pt"
+            )
+
+            label_features = [{"input_ids": feature["labels"]} for feature in features]
+            labels_batch = self.processor.tokenizer.pad(
+                label_features, return_tensors="pt"
+            )
+
+            labels = labels_batch["input_ids"].masked_fill(
+                labels_batch.attention_mask.ne(1), -100
+            )
+
+            if (
+                (labels[:, 0] == self.processor.tokenizer.bos_token_id)
+                .all()
+                .cpu()
+                .item()
+            ):
+                labels = labels[:, 1:]
+
+            batch["labels"] = labels
+
+            return batch
+
+    data_collator = DataCollatorSpeechSeq2SeqWithPadding(processor=tokenizer)
+
+    # Show memory stats before training
+    if torch.cuda.is_available():
+        gpu_stats = torch.cuda.get_device_properties(0)
+        start_gpu_memory = round(
+            torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3
+        )
+        max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
+        print(f"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
+        print(f"{start_gpu_memory} GB of memory reserved.")
+
+    # Setup trainer with Seq2SeqTrainer
+    trainer = Seq2SeqTrainer(
+        model=model,
+        train_dataset=train_dataset,
+        data_collator=data_collator,
+        eval_dataset=test_dataset if len(test_dataset) > 0 else None,
+        tokenizer=tokenizer.feature_extractor,
+        compute_metrics=compute_metrics,
+        args=Seq2SeqTrainingArguments(
+            # predict_with_generate=True,
+            per_device_train_batch_size=1,
+            gradient_accumulation_steps=4,
+            warmup_steps=5,
+            # num_train_epochs=1,  # Set for full training run
+            max_steps=200,
+            learning_rate=1e-4,
+            logging_steps=10,
+            optim="adamw_8bit",
+            fp16=not is_bf16_supported(),
+            bf16=is_bf16_supported(),
+            weight_decay=0.001,
+            remove_unused_columns=False,  # Required for PEFT
+            lr_scheduler_type="linear",
+            label_names=["labels"],
+            eval_steps=20,
+            eval_strategy="steps" if len(test_dataset) > 0 else "no",
+            seed=3407,
+            output_dir=OUTPUT_DIR,
+            report_to="none",
+        ),
+    )
+
+    print("Starting training...")
+    trainer_stats = trainer.train()
+
+    # Show final memory stats
+    if torch.cuda.is_available():
+        used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
+        used_memory_for_lora = round(used_memory - start_gpu_memory, 3)
+        used_percentage = round(used_memory / max_memory * 100, 3)
+        lora_percentage = round(used_memory_for_lora / max_memory * 100, 3)
+        print(f"{trainer_stats.metrics['train_runtime']} seconds used for training.")
+        print(
+            f"{round(trainer_stats.metrics['train_runtime']/60, 2)} minutes used for training."
+        )
+        print(f"Peak reserved memory = {used_memory} GB.")
+        print(f"Peak reserved memory for training = {used_memory_for_lora} GB.")
+        print(f"Peak reserved memory % of max memory = {used_percentage} %.")
+        print(
+            f"Peak reserved memory for training % of max memory = {lora_percentage} %."
+        )
+
+    # Save the model
+    print(f"Saving adapter to {OUTPUT_DIR}")
+    model.save_pretrained(OUTPUT_DIR)
+    tokenizer.save_pretrained(OUTPUT_DIR)
+
+    print("Training complete!")
+
+
+def run_inference(audio_file: str, model_path: str = OUTPUT_DIR):
+    """Run inference with the trained model"""
+    from transformers import pipeline
+
+    print(f"Loading model from {model_path}")
+
+    # Load the fine-tuned model
+    model, tokenizer = FastModel.from_pretrained(
+        model_name=model_path,
+        dtype=None,
+        load_in_4bit=False,
+        auto_model=WhisperForConditionalGeneration,
+    )
+
+    # Set model to inference mode
+    FastModel.for_inference(model)
+    model.eval()
+
+    # Create pipeline
+    whisper = pipeline(
+        "automatic-speech-recognition",
+        model=model,
+        tokenizer=tokenizer.tokenizer,
+        feature_extractor=tokenizer.feature_extractor,
+        processor=tokenizer,
+        return_language=True,
+        torch_dtype=torch.float16,
+    )
+
+    # Transcribe
+    result = whisper(audio_file)
+    print(f"Transcription: {result['text']}")
+    return result
+
+
+if __name__ == "__main__":
+    main()
diff --git a/services.py b/services.py
index 2826d902..d17309be 100755
--- a/services.py
+++ b/services.py
@@ -9,60 +9,64 @@
 from pathlib import Path
 
 import yaml
+from dotenv import dotenv_values
 from rich.console import Console
 from rich.table import Table
-from dotenv import dotenv_values
-
 from setup_utils import read_env_value
 
 console = Console()
 
+
 def load_config_yml():
     """Load config.yml from repository root"""
-    config_path = Path(__file__).parent / 'config' / 'config.yml'
+    config_path = Path(__file__).parent / "config" / "config.yml"
     if not config_path.exists():
         return None
 
     try:
-        with open(config_path, 'r') as f:
+        with open(config_path, "r") as f:
             return yaml.safe_load(f)
     except Exception as e:
-        console.print(f"[yellow]⚠️  Warning: Could not load config/config.yml: {e}[/yellow]")
+        console.print(
+            f"[yellow]⚠️  Warning: Could not load config/config.yml: {e}[/yellow]"
+        )
         return None
 
+
 SERVICES = {
-    'backend': {
-        'path': 'backends/advanced',
-        'compose_file': 'docker-compose.yml',
-        'description': 'Advanced Backend + WebUI',
-        'ports': ['8000', '5173']
+    "langfuse": {
+        "path": "extras/langfuse",
+        "compose_file": "docker-compose.yml",
+        "description": "LangFuse Observability & Prompt Management",
+        "ports": ["3002"],
+    },
+    "backend": {
+        "path": "backends/advanced",
+        "compose_file": "docker-compose.yml",
+        "description": "Advanced Backend + WebUI",
+        "ports": ["8000", "5173"],
     },
-    'speaker-recognition': {
-        'path': 'extras/speaker-recognition', 
-        'compose_file': 'docker-compose.yml',
-        'description': 'Speaker Recognition Service',
-        'ports': ['8085', '5174/8444']
+    "speaker-recognition": {
+        "path": "extras/speaker-recognition",
+        "compose_file": "docker-compose.yml",
+        "description": "Speaker Recognition Service",
+        "ports": ["8085", "5174/8444"],
     },
-    'asr-services': {
-        'path': 'extras/asr-services',
-        'compose_file': 'docker-compose.yml', 
-        'description': 'Parakeet ASR Service',
-        'ports': ['8767']
+    "asr-services": {
+        "path": "extras/asr-services",
+        "compose_file": "docker-compose.yml",
+        "description": "Parakeet ASR Service",
+        "ports": ["8767"],
     },
-    'openmemory-mcp': {
-        'path': 'extras/openmemory-mcp',
-        'compose_file': 'docker-compose.yml',
-        'description': 'OpenMemory MCP Server',
-        'ports': ['8765']
+    "openmemory-mcp": {
+        "path": "extras/openmemory-mcp",
+        "compose_file": "docker-compose.yml",
+        "description": "OpenMemory MCP Server",
+        "ports": ["8765"],
     },
-    'langfuse': {
-        'path': 'extras/langfuse',
-        'compose_file': 'docker-compose.yml',
-        'description': 'LangFuse Observability & Prompt Management',
-        'ports': ['3002']
-    }
 }
 
+
 def _get_backend_env_path() -> Path:
     return Path(__file__).parent / "backends" / "advanced" / ".env"
 
@@ -139,85 +143,90 @@ def _ensure_langfuse_env() -> bool:
 def check_service_configured(service_name):
     """Check if service is configured (has .env file)"""
     service = SERVICES[service_name]
-    service_path = Path(service['path'])
+    service_path = Path(service["path"])
 
-    if service_name == 'langfuse':
-        return (service_path / '.env').exists()
+    if service_name == "langfuse":
+        return (service_path / ".env").exists()
 
     # Backend uses advanced init, others use .env
-    if service_name == 'backend':
-        return (service_path / '.env').exists()
+    if service_name == "backend":
+        return (service_path / ".env").exists()
     else:
-        return (service_path / '.env').exists()
+        return (service_path / ".env").exists()
+
 
 def run_compose_command(service_name, command, build=False, force_recreate=False):
     """Run docker compose command for a service"""
     service = SERVICES[service_name]
-    service_path = Path(service['path'])
+    service_path = Path(service["path"])
 
     if not service_path.exists():
         console.print(f"[red]❌ Service directory not found: {service_path}[/red]")
         return False
 
-    compose_file = service_path / service['compose_file']
+    compose_file = service_path / service["compose_file"]
     if not compose_file.exists():
         console.print(f"[red]❌ Docker compose file not found: {compose_file}[/red]")
         return False
 
     # Step 1: If build is requested, run build separately first (no timeout for CUDA builds)
-    if build and command == 'up':
+    if build and command == "up":
         # Build command - need to specify profiles for build too
-        build_cmd = ['docker', 'compose']
+        build_cmd = ["docker", "compose"]
 
         # Add profiles to build command (needed for profile-specific services)
-        if service_name == 'backend':
-            caddyfile_path = service_path / 'Caddyfile'
+        if service_name == "backend":
+            caddyfile_path = service_path / "Caddyfile"
             if caddyfile_path.exists() and caddyfile_path.is_file():
-                build_cmd.extend(['--profile', 'https'])
+                build_cmd.extend(["--profile", "https"])
 
-        elif service_name == 'speaker-recognition':
-            env_file = service_path / '.env'
+        elif service_name == "speaker-recognition":
+            env_file = service_path / ".env"
             if env_file.exists():
                 env_values = dotenv_values(env_file)
                 # Derive profile from PYTORCH_CUDA_VERSION (cu126/cu121/etc = gpu, cpu = cpu)
-                pytorch_version = env_values.get('PYTORCH_CUDA_VERSION', 'cpu')
-                profile = 'gpu' if pytorch_version.startswith('cu') else 'cpu'
-                build_cmd.extend(['--profile', profile])
+                pytorch_version = env_values.get("PYTORCH_CUDA_VERSION", "cpu")
+                profile = "gpu" if pytorch_version.startswith("cu") else "cpu"
+                build_cmd.extend(["--profile", profile])
 
         # For asr-services, only build the selected provider
         asr_service_to_build = None
-        if service_name == 'asr-services':
-            env_file = service_path / '.env'
+        if service_name == "asr-services":
+            env_file = service_path / ".env"
             if env_file.exists():
                 env_values = dotenv_values(env_file)
-                asr_provider = env_values.get('ASR_PROVIDER', '').strip("'\"")
+                asr_provider = env_values.get("ASR_PROVIDER", "").strip("'\"")
 
                 # Map provider to docker service name
                 provider_to_service = {
-                    'vibevoice': 'vibevoice-asr',
-                    'faster-whisper': 'faster-whisper-asr',
-                    'transformers': 'transformers-asr',
-                    'nemo': 'nemo-asr',
-                    'parakeet': 'parakeet-asr',
-                    'qwen3-asr': 'qwen3-asr-wrapper',
+                    "vibevoice": "vibevoice-asr",
+                    "faster-whisper": "faster-whisper-asr",
+                    "transformers": "transformers-asr",
+                    "nemo": "nemo-asr",
+                    "parakeet": "parakeet-asr",
+                    "qwen3-asr": "qwen3-asr-wrapper",
                 }
                 asr_service_to_build = provider_to_service.get(asr_provider)
 
                 if asr_service_to_build:
-                    console.print(f"[blue]ℹ️  Building ASR provider: {asr_provider} ({asr_service_to_build})[/blue]")
+                    console.print(
+                        f"[blue]ℹ️  Building ASR provider: {asr_provider} ({asr_service_to_build})[/blue]"
+                    )
 
-        build_cmd.append('build')
+        build_cmd.append("build")
 
         # If building ASR, only build the specific service(s)
         if asr_service_to_build:
-            if asr_provider == 'qwen3-asr':
+            if asr_provider == "qwen3-asr":
                 # Qwen3-ASR also needs the streaming bridge built
-                build_cmd.extend([asr_service_to_build, 'qwen3-asr-bridge'])
+                build_cmd.extend([asr_service_to_build, "qwen3-asr-bridge"])
             else:
                 build_cmd.append(asr_service_to_build)
 
         # Run build with streaming output (no timeout)
-        console.print(f"[cyan]🔨 Building {service_name} (this may take several minutes for CUDA/GPU builds)...[/cyan]")
+        console.print(
+            f"[cyan]🔨 Building {service_name} (this may take several minutes for CUDA/GPU builds)...[/cyan]"
+        )
         try:
             process = subprocess.Popen(
                 build_cmd,
@@ -225,24 +234,26 @@ def run_compose_command(service_name, command, build=False, force_recreate=False
                 stdout=subprocess.PIPE,
                 stderr=subprocess.STDOUT,
                 text=True,
-                bufsize=1
+                bufsize=1,
             )
 
             if process.stdout is None:
-                raise RuntimeError("Process stdout is None - unable to read command output")
+                raise RuntimeError(
+                    "Process stdout is None - unable to read command output"
+                )
 
             for line in process.stdout:
                 line = line.rstrip()
                 if not line:
                     continue
 
-                if 'error' in line.lower() or 'failed' in line.lower():
+                if "error" in line.lower() or "failed" in line.lower():
                     console.print(f"  [red]{line}[/red]")
-                elif 'Successfully' in line or 'built' in line.lower():
+                elif "Successfully" in line or "built" in line.lower():
                     console.print(f"  [green]{line}[/green]")
-                elif 'Building' in line or 'Step' in line:
+                elif "Building" in line or "Step" in line:
                     console.print(f"  [cyan]{line}[/cyan]")
-                elif 'warning' in line.lower():
+                elif "warning" in line.lower():
                     console.print(f"  [yellow]{line}[/yellow]")
                 else:
                     console.print(f"  [dim]{line}[/dim]")
@@ -260,97 +271,111 @@ def run_compose_command(service_name, command, build=False, force_recreate=False
             return False
 
     # Step 2: Run the actual command (up/down/restart/status)
-    up_flags = ['up', '-d']
+    up_flags = ["up", "-d"]
     if force_recreate:
-        up_flags.append('--force-recreate')
+        up_flags.append("--force-recreate")
 
-    cmd = ['docker', 'compose']
+    cmd = ["docker", "compose"]
 
     # Add profiles for backend service
-    if service_name == 'backend':
-        caddyfile_path = service_path / 'Caddyfile'
+    if service_name == "backend":
+        caddyfile_path = service_path / "Caddyfile"
         if caddyfile_path.exists() and caddyfile_path.is_file():
-            cmd.extend(['--profile', 'https'])
+            cmd.extend(["--profile", "https"])
 
     # Handle speaker-recognition service specially
-    if service_name == 'speaker-recognition' and command in ['up', 'down']:
-        env_file = service_path / '.env'
+    if service_name == "speaker-recognition" and command in ["up", "down"]:
+        env_file = service_path / ".env"
         if env_file.exists():
             env_values = dotenv_values(env_file)
             # Derive profile from PYTORCH_CUDA_VERSION (cu126/cu121/etc = gpu, cpu = cpu)
-            pytorch_version = env_values.get('PYTORCH_CUDA_VERSION', 'cpu')
-            profile = 'gpu' if pytorch_version.startswith('cu') else 'cpu'
+            pytorch_version = env_values.get("PYTORCH_CUDA_VERSION", "cpu")
+            profile = "gpu" if pytorch_version.startswith("cu") else "cpu"
 
-            cmd.extend(['--profile', profile])
+            cmd.extend(["--profile", profile])
 
-            if command == 'up':
-                https_enabled = env_values.get('REACT_UI_HTTPS', 'false')
-                if https_enabled.lower() == 'true':
+            if command == "up":
+                https_enabled = env_values.get("REACT_UI_HTTPS", "false")
+                if https_enabled.lower() == "true":
                     cmd.extend(up_flags)
                 else:
-                    cmd.extend(up_flags + ['speaker-service-gpu' if profile == 'gpu' else 'speaker-service-cpu', 'web-ui'])
-            elif command == 'down':
-                cmd.extend(['down'])
+                    cmd.extend(
+                        up_flags
+                        + [
+                            (
+                                "speaker-service-gpu"
+                                if profile == "gpu"
+                                else "speaker-service-cpu"
+                            ),
+                            "web-ui",
+                        ]
+                    )
+            elif command == "down":
+                cmd.extend(["down"])
         else:
-            if command == 'up':
+            if command == "up":
                 cmd.extend(up_flags)
-            elif command == 'down':
-                cmd.extend(['down'])
+            elif command == "down":
+                cmd.extend(["down"])
 
     # Handle asr-services - start only the configured provider
-    elif service_name == 'asr-services' and command in ['up', 'down', 'restart']:
-        env_file = service_path / '.env'
+    elif service_name == "asr-services" and command in ["up", "down", "restart"]:
+        env_file = service_path / ".env"
         asr_service_name = None
 
         if env_file.exists():
             env_values = dotenv_values(env_file)
-            asr_provider = env_values.get('ASR_PROVIDER', '').strip("'\"")
+            asr_provider = env_values.get("ASR_PROVIDER", "").strip("'\"")
 
             # Map provider to docker service name
             provider_to_service = {
-                'vibevoice': 'vibevoice-asr',
-                'faster-whisper': 'faster-whisper-asr',
-                'transformers': 'transformers-asr',
-                'nemo': 'nemo-asr',
-                'parakeet': 'parakeet-asr',
-                'qwen3-asr': 'qwen3-asr-wrapper',
+                "vibevoice": "vibevoice-asr",
+                "faster-whisper": "faster-whisper-asr",
+                "transformers": "transformers-asr",
+                "nemo": "nemo-asr",
+                "parakeet": "parakeet-asr",
+                "qwen3-asr": "qwen3-asr-wrapper",
             }
             asr_service_name = provider_to_service.get(asr_provider)
 
             if asr_service_name:
-                console.print(f"[blue]ℹ️  Using ASR provider: {asr_provider} ({asr_service_name})[/blue]")
+                console.print(
+                    f"[blue]ℹ️  Using ASR provider: {asr_provider} ({asr_service_name})[/blue]"
+                )
 
-        if command == 'up':
+        if command == "up":
             if asr_service_name:
                 services_to_start = [asr_service_name]
                 # Qwen3-ASR also needs the streaming bridge
-                if asr_provider == 'qwen3-asr':
-                    services_to_start.append('qwen3-asr-bridge')
+                if asr_provider == "qwen3-asr":
+                    services_to_start.append("qwen3-asr-bridge")
                 cmd.extend(up_flags + services_to_start)
             else:
-                console.print("[yellow]⚠️  No ASR_PROVIDER configured, starting default service[/yellow]")
-                cmd.extend(up_flags + ['vibevoice-asr'])
-        elif command == 'down':
-            cmd.extend(['down'])
-        elif command == 'restart':
+                console.print(
+                    "[yellow]⚠️  No ASR_PROVIDER configured, starting default service[/yellow]"
+                )
+                cmd.extend(up_flags + ["vibevoice-asr"])
+        elif command == "down":
+            cmd.extend(["down"])
+        elif command == "restart":
             if asr_service_name:
                 services_to_restart = [asr_service_name]
-                if asr_provider == 'qwen3-asr':
-                    services_to_restart.append('qwen3-asr-bridge')
-                cmd.extend(['restart'] + services_to_restart)
+                if asr_provider == "qwen3-asr":
+                    services_to_restart.append("qwen3-asr-bridge")
+                cmd.extend(["restart"] + services_to_restart)
             else:
-                cmd.extend(['restart'])
+                cmd.extend(["restart"])
 
     else:
         # Standard compose commands for other services
-        if command == 'up':
+        if command == "up":
             cmd.extend(up_flags)
-        elif command == 'down':
-            cmd.extend(['down'])
-        elif command == 'restart':
-            cmd.extend(['restart'])
-        elif command == 'status':
-            cmd.extend(['ps'])
+        elif command == "down":
+            cmd.extend(["down"])
+        elif command == "restart":
+            cmd.extend(["restart"])
+        elif command == "status":
+            cmd.extend(["ps"])
 
     try:
         # Run the command with timeout (build already done if needed)
@@ -360,7 +385,7 @@ def run_compose_command(service_name, command, build=False, force_recreate=False
             capture_output=True,
             text=True,
             check=False,
-            timeout=120  # 2 minute timeout
+            timeout=120,  # 2 minute timeout
         )
 
         if result.returncode == 0:
@@ -374,29 +399,32 @@ def run_compose_command(service_name, command, build=False, force_recreate=False
             return False
 
     except subprocess.TimeoutExpired:
-        console.print(f"[red]❌ Command timed out after 2 minutes for {service_name}[/red]")
+        console.print(
+            f"[red]❌ Command timed out after 2 minutes for {service_name}[/red]"
+        )
         return False
     except Exception as e:
         console.print(f"[red]❌ Error running command: {e}[/red]")
         return False
 
+
 def ensure_docker_network():
     """Ensure chronicle-network exists"""
     try:
         # Check if network already exists
         result = subprocess.run(
-            ['docker', 'network', 'inspect', 'chronicle-network'],
+            ["docker", "network", "inspect", "chronicle-network"],
             capture_output=True,
-            check=False
+            check=False,
         )
 
         if result.returncode != 0:
             # Network doesn't exist, create it
             console.print("[blue]📡 Creating chronicle-network...[/blue]")
             subprocess.run(
-                ['docker', 'network', 'create', 'chronicle-network'],
+                ["docker", "network", "create", "chronicle-network"],
                 check=True,
-                capture_output=True
+                capture_output=True,
             )
             console.print("[green]✅ chronicle-network created[/green]")
         else:
@@ -409,6 +437,7 @@ def ensure_docker_network():
         console.print(f"[red]❌ Error checking/creating network: {e}[/red]")
         return False
 
+
 def start_services(services, build=False, force_recreate=False):
     """Start specified services"""
     console.print(f"🚀 [bold]Starting {len(services)} services...[/bold]")
@@ -423,28 +452,34 @@ def start_services(services, build=False, force_recreate=False):
         if service_name not in SERVICES:
             console.print(f"[red]❌ Unknown service: {service_name}[/red]")
             continue
-            
+
         if service_name == "langfuse" and not _ensure_langfuse_env():
             console.print("[yellow]⚠️  LangFuse not configured, skipping[/yellow]")
             continue
 
         if not check_service_configured(service_name):
-            console.print(f"[yellow]⚠️  {service_name} not configured, skipping[/yellow]")
+            console.print(
+                f"[yellow]⚠️  {service_name} not configured, skipping[/yellow]"
+            )
             continue
-            
+
         console.print(f"\n🔧 Starting {service_name}...")
-        if run_compose_command(service_name, 'up', build, force_recreate):
+        if run_compose_command(service_name, "up", build, force_recreate):
             console.print(f"[green]✅ {service_name} started[/green]")
             success_count += 1
         else:
             console.print(f"[red]❌ Failed to start {service_name}[/red]")
-    
-    console.print(f"\n[green]🎉 {success_count}/{len(services)} services started successfully[/green]")
+
+    console.print(
+        f"\n[green]🎉 {success_count}/{len(services)} services started successfully[/green]"
+    )
 
     # Show access URLs if backend was started
-    if 'backend' in services and check_service_configured('backend'):
+    if "backend" in services and check_service_configured("backend"):
         backend_env = _get_backend_env_path()
-        https_enabled = (read_env_value(backend_env, "HTTPS_ENABLED") or "").lower() == "true"
+        https_enabled = (
+            read_env_value(backend_env, "HTTPS_ENABLED") or ""
+        ).lower() == "true"
         server_ip = read_env_value(backend_env, "SERVER_IP") or ""
 
         if https_enabled and server_ip:
@@ -463,12 +498,13 @@ def start_services(services, build=False, force_recreate=False):
         console.print(f"   API:            {api_url}")
 
     # Show LangFuse prompt management tip if langfuse was started
-    if 'langfuse' in services and check_service_configured('langfuse'):
+    if "langfuse" in services and check_service_configured("langfuse"):
         backend_env = _get_backend_env_path()
         langfuse_host = read_env_value(backend_env, "SERVER_IP") or "localhost"
         langfuse_url = f"http://{langfuse_host}:3002/project/chronicle/prompts"
         console.print(f"   Prompt Mgmt:    {langfuse_url}")
 
+
 def stop_services(services):
     """Stop specified services"""
     console.print(f"🛑 [bold]Stopping {len(services)} services...[/bold]")
@@ -480,22 +516,29 @@ def stop_services(services):
             continue
 
         console.print(f"\n🔧 Stopping {service_name}...")
-        if run_compose_command(service_name, 'down'):
+        if run_compose_command(service_name, "down"):
             console.print(f"[green]✅ {service_name} stopped[/green]")
             success_count += 1
         else:
             console.print(f"[red]❌ Failed to stop {service_name}[/red]")
 
-    console.print(f"\n[green]🎉 {success_count}/{len(services)} services stopped successfully[/green]")
+    console.print(
+        f"\n[green]🎉 {success_count}/{len(services)} services stopped successfully[/green]"
+    )
+
 
 def restart_services(services, recreate=False):
     """Restart specified services"""
     console.print(f"🔄 [bold]Restarting {len(services)} services...[/bold]")
 
     if recreate:
-        console.print("[dim]Using down + up to recreate containers (fixes WSL2 bind mount issues)[/dim]\n")
+        console.print(
+            "[dim]Using down + up to recreate containers (fixes WSL2 bind mount issues)[/dim]\n"
+        )
     else:
-        console.print("[dim]Quick restart (use --recreate to fix bind mount issues)[/dim]\n")
+        console.print(
+            "[dim]Quick restart (use --recreate to fix bind mount issues)[/dim]\n"
+        )
 
     success_count = 0
     for service_name in services:
@@ -504,98 +547,122 @@ def restart_services(services, recreate=False):
             continue
 
         if not check_service_configured(service_name):
-            console.print(f"[yellow]⚠️  {service_name} not configured, skipping[/yellow]")
+            console.print(
+                f"[yellow]⚠️  {service_name} not configured, skipping[/yellow]"
+            )
             continue
 
         console.print(f"\n🔧 Restarting {service_name}...")
 
         if recreate:
             # Full recreation: down + up (fixes bind mount issues)
-            if not run_compose_command(service_name, 'down'):
+            if not run_compose_command(service_name, "down"):
                 console.print(f"[red]❌ Failed to stop {service_name}[/red]")
                 continue
 
-            if run_compose_command(service_name, 'up'):
+            if run_compose_command(service_name, "up"):
                 console.print(f"[green]✅ {service_name} restarted[/green]")
                 success_count += 1
             else:
                 console.print(f"[red]❌ Failed to start {service_name}[/red]")
         else:
             # Quick restart: docker compose restart
-            if run_compose_command(service_name, 'restart'):
+            if run_compose_command(service_name, "restart"):
                 console.print(f"[green]✅ {service_name} restarted[/green]")
                 success_count += 1
             else:
                 console.print(f"[red]❌ Failed to restart {service_name}[/red]")
 
-    console.print(f"\n[green]🎉 {success_count}/{len(services)} services restarted successfully[/green]")
+    console.print(
+        f"\n[green]🎉 {success_count}/{len(services)} services restarted successfully[/green]"
+    )
+
 
 def show_status():
     """Show status of all services"""
     console.print("📊 [bold]Service Status:[/bold]\n")
-    
+
     table = Table()
     table.add_column("Service", style="cyan")
     table.add_column("Configured", justify="center")
     table.add_column("Description", style="dim")
     table.add_column("Ports", style="green")
-    
+
     for service_name, service_info in SERVICES.items():
         configured = "✅" if check_service_configured(service_name) else "❌"
-        ports = ", ".join(service_info['ports'])
-        table.add_row(
-            service_name,
-            configured, 
-            service_info['description'],
-            ports
-        )
-    
+        ports = ", ".join(service_info["ports"])
+        table.add_row(service_name, configured, service_info["description"], ports)
+
     console.print(table)
-    
+
     console.print("\n💡 [dim]Use './start.sh' to start all configured services[/dim]")
 
+
 def main():
     parser = argparse.ArgumentParser(description="Chronicle Service Management")
-    subparsers = parser.add_subparsers(dest='command', help='Available commands')
-    
+    subparsers = parser.add_subparsers(dest="command", help="Available commands")
+
     # Start command
-    start_parser = subparsers.add_parser('start', help='Start services')
-    start_parser.add_argument('services', nargs='*', 
-                            help='Services to start: backend, speaker-recognition, asr-services, openmemory-mcp (or use --all)')
-    start_parser.add_argument('--all', action='store_true', help='Start all configured services')
-    start_parser.add_argument('--build', action='store_true', help='Build images before starting')
-    start_parser.add_argument('--force-recreate', action='store_true', help='Force recreate containers even if unchanged')
-    
+    start_parser = subparsers.add_parser("start", help="Start services")
+    start_parser.add_argument(
+        "services",
+        nargs="*",
+        help="Services to start: backend, speaker-recognition, asr-services, openmemory-mcp (or use --all)",
+    )
+    start_parser.add_argument(
+        "--all", action="store_true", help="Start all configured services"
+    )
+    start_parser.add_argument(
+        "--build", action="store_true", help="Build images before starting"
+    )
+    start_parser.add_argument(
+        "--force-recreate",
+        action="store_true",
+        help="Force recreate containers even if unchanged",
+    )
+
     # Stop command
-    stop_parser = subparsers.add_parser('stop', help='Stop services')
-    stop_parser.add_argument('services', nargs='*',
-                           help='Services to stop: backend, speaker-recognition, asr-services, openmemory-mcp (or use --all)')
-    stop_parser.add_argument('--all', action='store_true', help='Stop all services')
+    stop_parser = subparsers.add_parser("stop", help="Stop services")
+    stop_parser.add_argument(
+        "services",
+        nargs="*",
+        help="Services to stop: backend, speaker-recognition, asr-services, openmemory-mcp (or use --all)",
+    )
+    stop_parser.add_argument("--all", action="store_true", help="Stop all services")
 
     # Restart command
-    restart_parser = subparsers.add_parser('restart', help='Restart services')
-    restart_parser.add_argument('services', nargs='*',
-                               help='Services to restart: backend, speaker-recognition, asr-services, openmemory-mcp (or use --all)')
-    restart_parser.add_argument('--all', action='store_true', help='Restart all services')
-    restart_parser.add_argument('--recreate', action='store_true',
-                               help='Recreate containers (down + up) instead of quick restart - fixes WSL2 bind mount issues')
+    restart_parser = subparsers.add_parser("restart", help="Restart services")
+    restart_parser.add_argument(
+        "services",
+        nargs="*",
+        help="Services to restart: backend, speaker-recognition, asr-services, openmemory-mcp (or use --all)",
+    )
+    restart_parser.add_argument(
+        "--all", action="store_true", help="Restart all services"
+    )
+    restart_parser.add_argument(
+        "--recreate",
+        action="store_true",
+        help="Recreate containers (down + up) instead of quick restart - fixes WSL2 bind mount issues",
+    )
 
     # Status command
-    subparsers.add_parser('status', help='Show service status')
-    
+    subparsers.add_parser("status", help="Show service status")
+
     args = parser.parse_args()
-    
+
     if not args.command:
         show_status()
         return
-    
-    if args.command == 'status':
+
+    if args.command == "status":
         show_status()
-        
-    elif args.command == 'start':
+
+    elif args.command == "start":
         if args.all:
             services = [
-                s for s in SERVICES.keys()
+                s
+                for s in SERVICES.keys()
                 if check_service_configured(s)
                 or (s == "langfuse" and _langfuse_enabled_in_backend())
             ]
@@ -603,17 +670,21 @@ def main():
             # Validate service names
             invalid_services = [s for s in args.services if s not in SERVICES]
             if invalid_services:
-                console.print(f"[red]❌ Invalid service names: {', '.join(invalid_services)}[/red]")
+                console.print(
+                    f"[red]❌ Invalid service names: {', '.join(invalid_services)}[/red]"
+                )
                 console.print(f"Available services: {', '.join(SERVICES.keys())}")
                 return
             services = args.services
         else:
-            console.print("[red]❌ No services specified. Use --all or specify service names.[/red]")
+            console.print(
+                "[red]❌ No services specified. Use --all or specify service names.[/red]"
+            )
             return
-            
+
         start_services(services, args.build, args.force_recreate)
-        
-    elif args.command == 'stop':
+
+    elif args.command == "stop":
         if args.all:
             # Only stop configured services (like start --all does)
             services = [s for s in SERVICES.keys() if check_service_configured(s)]
@@ -621,32 +692,41 @@ def main():
             # Validate service names
             invalid_services = [s for s in args.services if s not in SERVICES]
             if invalid_services:
-                console.print(f"[red]❌ Invalid service names: {', '.join(invalid_services)}[/red]")
+                console.print(
+                    f"[red]❌ Invalid service names: {', '.join(invalid_services)}[/red]"
+                )
                 console.print(f"Available services: {', '.join(SERVICES.keys())}")
                 return
             services = args.services
         else:
-            console.print("[red]❌ No services specified. Use --all or specify service names.[/red]")
+            console.print(
+                "[red]❌ No services specified. Use --all or specify service names.[/red]"
+            )
             return
 
         stop_services(services)
 
-    elif args.command == 'restart':
+    elif args.command == "restart":
         if args.all:
             services = [s for s in SERVICES.keys() if check_service_configured(s)]
         elif args.services:
             # Validate service names
             invalid_services = [s for s in args.services if s not in SERVICES]
             if invalid_services:
-                console.print(f"[red]❌ Invalid service names: {', '.join(invalid_services)}[/red]")
+                console.print(
+                    f"[red]❌ Invalid service names: {', '.join(invalid_services)}[/red]"
+                )
                 console.print(f"Available services: {', '.join(SERVICES.keys())}")
                 return
             services = args.services
         else:
-            console.print("[red]❌ No services specified. Use --all or specify service names.[/red]")
+            console.print(
+                "[red]❌ No services specified. Use --all or specify service names.[/red]"
+            )
             return
 
         restart_services(services, recreate=args.recreate)
 
+
 if __name__ == "__main__":
-    main()
\ No newline at end of file
+    main()
diff --git a/tests/configuration/test_llm_custom_provider.robot b/tests/configuration/test_llm_custom_provider.robot
new file mode 100644
index 00000000..66d7fa81
--- /dev/null
+++ b/tests/configuration/test_llm_custom_provider.robot
@@ -0,0 +1,258 @@
+*** Settings ***
+Documentation   Tests for LLM Custom Provider Setup (ConfigManager)
+Library         OperatingSystem
+Library         Collections
+Library         String
+Library         ../libs/ConfigTestHelper.py
+
+*** Keywords ***
+Setup Temp Config
+    [Documentation]  Creates a temporary configuration environment
+    ${random_suffix}=  Generate Random String  8  [NUMBERS]
+    ${temp_path}=  Join Path  ${OUTPUT DIR}  temp_config_${random_suffix}
+    Create Directory  ${temp_path}
+
+    # Create initial default config content
+    ${defaults}=  Create Dictionary  llm=openai-llm  embedding=openai-embed  stt=stt-deepgram
+    ${model1_params}=  Create Dictionary  temperature=${0.2}  max_tokens=${2000}
+    ${model1}=  Create Dictionary
+    ...  name=openai-llm
+    ...  description=OpenAI GPT-4o-mini
+    ...  model_type=llm
+    ...  model_provider=openai
+    ...  api_family=openai
+    ...  model_name=gpt-4o-mini
+    ...  model_url=https://api.openai.com/v1
+    ...  api_key=\${oc.env:OPENAI_API_KEY,''}
+    ...  model_params=${model1_params}
+    ...  model_output=json
+
+    ${model2}=  Create Dictionary
+    ...  name=local-embed
+    ...  description=Local embeddings via Ollama
+    ...  model_type=embedding
+    ...  model_provider=ollama
+    ...  api_family=openai
+    ...  model_name=nomic-embed-text:latest
+    ...  model_url=http://localhost:11434/v1
+    ...  api_key=\${oc.env:OPENAI_API_KEY,ollama}
+    ...  embedding_dimensions=${768}
+    ...  model_output=vector
+
+    ${models}=  Create List  ${model1}  ${model2}
+    ${memory}=  Create Dictionary  provider=chronicle
+    ${config}=  Create Dictionary  defaults=${defaults}  models=${models}  memory=${memory}
+
+    Create Temp Config Structure  ${temp_path}  ${config}
+    Set Test Variable  ${TEMP_PATH}  ${temp_path}
+
+Cleanup Temp Config
+    Remove Directory  ${TEMP_PATH}  recursive=True
+
+*** Test Cases ***
+Add New Model To Config
+    [Documentation]  add_or_update_model() should append a new model when name doesn't exist.
+    [Setup]    Setup Temp Config
+    [Teardown]  Cleanup Temp Config
+
+    ${params}=  Create Dictionary  temperature=${0.2}  max_tokens=${2000}
+    ${new_model}=  Create Dictionary
+    ...  name=custom-llm
+    ...  description=Custom OpenAI-compatible LLM
+    ...  model_type=llm
+    ...  model_provider=openai
+    ...  api_family=openai
+    ...  model_name=llama-3.1-70b-versatile
+    ...  model_url=https://api.groq.com/openai/v1
+    ...  api_key=\${oc.env:CUSTOM_LLM_API_KEY,''}
+    ...  model_params=${params}
+    ...  model_output=json
+
+    ${cm}=  Get Config Manager Instance  ${TEMP_PATH}
+    Add Model To Config Manager  ${cm}  ${new_model}
+
+    ${config}=  Call Method  ${cm}  get_full_config
+    ${models}=  Get From Dictionary  ${config}  models
+
+    ${target_model}=  Set Variable  ${None}
+    FOR  ${m}  IN  @{models}
+        Run Keyword If  '${m["name"]}' == 'custom-llm'  Set Test Variable  ${target_model}  ${m}
+    END
+
+    Should Not Be Equal  ${target_model}  ${None}
+    Should Be Equal  ${target_model["model_name"]}  llama-3.1-70b-versatile
+    Should Be Equal  ${target_model["model_url"]}  https://api.groq.com/openai/v1
+    Should Be Equal  ${target_model["model_type"]}  llm
+
+Update Existing Model
+    [Documentation]  add_or_update_model() should replace an existing model with the same name.
+    [Setup]    Setup Temp Config
+    [Teardown]  Cleanup Temp Config
+
+    ${cm}=  Get Config Manager Instance  ${TEMP_PATH}
+
+    # First add
+    ${model_v1}=  Create Dictionary  name=custom-llm  model_type=llm  model_name=model-v1  model_url=https://example.com/v1
+    Add Model To Config Manager  ${cm}  ${model_v1}
+
+    # Then update
+    ${model_v2}=  Create Dictionary  name=custom-llm  model_type=llm  model_name=model-v2  model_url=https://example.com/v2
+    Add Model To Config Manager  ${cm}  ${model_v2}
+
+    ${config}=  Call Method  ${cm}  get_full_config
+    ${models}=  Get From Dictionary  ${config}  models
+
+    ${count}=  Set Variable  0
+    ${target_model}=  Set Variable  ${None}
+    FOR  ${m}  IN  @{models}
+        IF  '${m["name"]}' == 'custom-llm'
+            Set Test Variable  ${target_model}  ${m}
+            ${count}=  Evaluate  ${count} + 1
+        END
+    END
+
+    Should Be Equal As Integers  ${count}  1
+    Should Be Equal  ${target_model["model_name"]}  model-v2
+    Should Be Equal  ${target_model["model_url"]}  https://example.com/v2
+
+Add Model To Empty Models List
+    [Documentation]  add_or_update_model() should create models list if it doesn't exist.
+    [Setup]    Setup Temp Config
+    [Teardown]  Cleanup Temp Config
+
+    # Overwrite config with empty models
+    ${defaults}=  Create Dictionary  llm=openai-llm
+    ${empty_config}=  Create Dictionary  defaults=${defaults}
+    Create Temp Config Structure  ${TEMP_PATH}  ${empty_config}
+
+    ${cm}=  Get Config Manager Instance  ${TEMP_PATH}
+    ${test_model}=  Create Dictionary  name=test-model  model_type=llm
+    Add Model To Config Manager  ${cm}  ${test_model}
+
+    ${config}=  Call Method  ${cm}  get_full_config
+    Dictionary Should Contain Key  ${config}  models
+    ${models}=  Get From Dictionary  ${config}  models
+    Length Should Be  ${models}  1
+    Should Be Equal  ${models[0]["name"]}  test-model
+
+Custom LLM And Embedding Model Added
+    [Documentation]  Both LLM and embedding models should be created when embedding model is provided.
+    [Setup]    Setup Temp Config
+    [Teardown]  Cleanup Temp Config
+
+    ${cm}=  Get Config Manager Instance  ${TEMP_PATH}
+
+    ${params}=  Create Dictionary  temperature=${0.2}  max_tokens=${2000}
+    ${llm_model}=  Create Dictionary
+    ...  name=custom-llm
+    ...  model_type=llm
+    ...  model_provider=openai
+    ...  api_family=openai
+    ...  model_name=llama-3.1-70b-versatile
+    ...  model_url=https://api.groq.com/openai/v1
+    ...  api_key=\${oc.env:CUSTOM_LLM_API_KEY,''}
+    ...  model_params=${params}
+    ...  model_output=json
+
+    ${embed_model}=  Create Dictionary
+    ...  name=custom-embed
+    ...  description=Custom OpenAI-compatible embeddings
+    ...  model_type=embedding
+    ...  model_provider=openai
+    ...  api_family=openai
+    ...  model_name=text-embedding-3-small
+    ...  model_url=https://api.groq.com/openai/v1
+    ...  api_key=\${oc.env:CUSTOM_LLM_API_KEY,''}
+    ...  embedding_dimensions=${1536}
+    ...  model_output=vector
+
+    Add Model To Config Manager  ${cm}  ${llm_model}
+    Add Model To Config Manager  ${cm}  ${embed_model}
+
+    ${config}=  Call Method  ${cm}  get_full_config
+    ${models}=  Get From Dictionary  ${config}  models
+    ${model_names}=  Create List
+    FOR  ${m}  IN  @{models}
+        Append To List  ${model_names}  ${m["name"]}
+    END
+
+    List Should Contain Value  ${model_names}  custom-llm
+    List Should Contain Value  ${model_names}  custom-embed
+
+    ${target_embed}=  Set Variable  ${None}
+    FOR  ${m}  IN  @{models}
+        Run Keyword If  '${m["name"]}' == 'custom-embed'  Set Test Variable  ${target_embed}  ${m}
+    END
+
+    Should Be Equal  ${target_embed["model_type"]}  embedding
+    Should Be Equal  ${target_embed["model_name"]}  text-embedding-3-small
+    Should Be Equal As Integers  ${target_embed["embedding_dimensions"]}  1536
+
+Custom LLM Without Embedding Falls Back To Local
+    [Documentation]  defaults.embedding should be local-embed when no custom embedding is provided.
+    [Setup]    Setup Temp Config
+    [Teardown]  Cleanup Temp Config
+
+    ${cm}=  Get Config Manager Instance  ${TEMP_PATH}
+
+    ${llm_model}=  Create Dictionary
+    ...  name=custom-llm
+    ...  model_type=llm
+    ...  model_name=some-model
+    ...  model_url=https://api.example.com/v1
+
+    Add Model To Config Manager  ${cm}  ${llm_model}
+    ${defaults_update}=  Create Dictionary  llm=custom-llm  embedding=local-embed
+    Update Defaults In Config Manager  ${cm}  ${defaults_update}
+
+    ${defaults}=  Call Method  ${cm}  get_config_defaults
+    Should Be Equal  ${defaults["llm"]}  custom-llm
+    Should Be Equal  ${defaults["embedding"]}  local-embed
+
+Custom LLM Updates Defaults With Embedding
+    [Documentation]  defaults.llm and defaults.embedding should be updated correctly with custom embed.
+    [Setup]    Setup Temp Config
+    [Teardown]  Cleanup Temp Config
+
+    ${cm}=  Get Config Manager Instance  ${TEMP_PATH}
+
+    ${defaults_update}=  Create Dictionary  llm=custom-llm  embedding=custom-embed
+    Update Defaults In Config Manager  ${cm}  ${defaults_update}
+
+    ${defaults}=  Call Method  ${cm}  get_config_defaults
+    Should Be Equal  ${defaults["llm"]}  custom-llm
+    Should Be Equal  ${defaults["embedding"]}  custom-embed
+
+Existing Models Preserved After Adding Custom
+    [Documentation]  Adding a custom model should not remove existing models.
+    [Setup]    Setup Temp Config
+    [Teardown]  Cleanup Temp Config
+
+    ${cm}=  Get Config Manager Instance  ${TEMP_PATH}
+    ${config_before}=  Call Method  ${cm}  get_full_config
+    ${models_before}=  Get From Dictionary  ${config_before}  models
+    ${original_count}=  Get Length  ${models_before}
+
+    ${new_model}=  Create Dictionary
+    ...  name=custom-llm
+    ...  model_type=llm
+    ...  model_name=test-model
+    ...  model_url=https://example.com/v1
+
+    Add Model To Config Manager  ${cm}  ${new_model}
+
+    ${config_after}=  Call Method  ${cm}  get_full_config
+    ${models_after}=  Get From Dictionary  ${config_after}  models
+    ${new_count}=  Get Length  ${models_after}
+    ${expected_count}=  Evaluate  ${original_count} + 1
+
+    Should Be Equal As Integers  ${new_count}  ${expected_count}
+
+    ${model_names}=  Create List
+    FOR  ${m}  IN  @{models_after}
+        Append To List  ${model_names}  ${m["name"]}
+    END
+
+    List Should Contain Value  ${model_names}  openai-llm
+    List Should Contain Value  ${model_names}  local-embed
+    List Should Contain Value  ${model_names}  custom-llm
diff --git a/tests/configuration/test_transcription_url.robot b/tests/configuration/test_transcription_url.robot
new file mode 100644
index 00000000..6c0ff641
--- /dev/null
+++ b/tests/configuration/test_transcription_url.robot
@@ -0,0 +1,126 @@
+*** Settings ***
+Documentation   Tests for Transcription Service URL Configuration
+Library         Collections
+Library         ../libs/ConfigTestHelper.py
+
+*** Test Cases ***
+Vibevoice Url Without Http Prefix
+    [Documentation]  Test that VIBEVOICE_ASR_URL without http:// prefix works correctly.
+    ${config_template}=  Create Dictionary  model_url=http://\${oc.env:VIBEVOICE_ASR_URL,host.docker.internal:8767}
+    ${env_vars}=  Create Dictionary  VIBEVOICE_ASR_URL=host.docker.internal:8767
+
+    ${resolved}=  Resolve Omega Config  ${config_template}  ${env_vars}
+    Should Be Equal  ${resolved["model_url"]}  http://host.docker.internal:8767
+    Should Not Contain  ${resolved["model_url"]}  http://http://
+
+Vibevoice Url With Http Prefix Causes Double Prefix
+    [Documentation]  Test that VIBEVOICE_ASR_URL WITH http:// causes double prefix (bug scenario).
+    ${config_template}=  Create Dictionary  model_url=http://\${oc.env:VIBEVOICE_ASR_URL,host.docker.internal:8767}
+    ${env_vars}=  Create Dictionary  VIBEVOICE_ASR_URL=http://host.docker.internal:8767
+
+    ${resolved}=  Resolve Omega Config  ${config_template}  ${env_vars}
+    Should Be Equal  ${resolved["model_url"]}  http://http://host.docker.internal:8767
+    Should Contain  ${resolved["model_url"]}  http://http://
+
+Vibevoice Url Default Fallback
+    [Documentation]  Test that default fallback works when VIBEVOICE_ASR_URL is not set.
+    ${config_template}=  Create Dictionary  model_url=http://\${oc.env:VIBEVOICE_ASR_URL,host.docker.internal:8767}
+    ${env_vars}=  Create Dictionary
+
+    ${resolved}=  Resolve Omega Config  ${config_template}  ${env_vars}
+    Should Be Equal  ${resolved["model_url"]}  http://host.docker.internal:8767
+
+Parakeet Url Configuration
+    [Documentation]  Test that PARAKEET_ASR_URL follows same pattern.
+    ${config_template}=  Create Dictionary  model_url=http://\${oc.env:PARAKEET_ASR_URL,172.17.0.1:8767}
+    ${env_vars}=  Create Dictionary  PARAKEET_ASR_URL=host.docker.internal:8767
+
+    ${resolved}=  Resolve Omega Config  ${config_template}  ${env_vars}
+    Should Be Equal  ${resolved["model_url"]}  http://host.docker.internal:8767
+    Should Not Contain  ${resolved["model_url"]}  http://http://
+
+Url Parsing Removes Double Slashes
+    [Documentation]  Test that URL with double http:// causes connection failures (simulated by parsing check).
+
+    # Valid URL
+    ${valid_url}=  Set Variable  http://host.docker.internal:8767/transcribe
+    ${parsed_valid}=  Check Url Parsing  ${valid_url}
+    Should Be Equal  ${parsed_valid["scheme"]}  http
+    Should Be Equal  ${parsed_valid["netloc"]}  host.docker.internal:8767
+
+    # Invalid URL
+    ${invalid_url}=  Set Variable  http://http://host.docker.internal:8767/transcribe
+    ${parsed_invalid}=  Check Url Parsing  ${invalid_url}
+    Should Be Equal  ${parsed_invalid["scheme"]}  http
+    # In python urlparse, 'http:' becomes the netloc for 'http://http://...'
+    Should Be Equal  ${parsed_invalid["netloc"]}  http:
+    Should Not Be Equal  ${parsed_invalid["netloc"]}  host.docker.internal:8767
+
+Use Provider Segments Default False
+    [Documentation]  Test that use_provider_segments defaults to false.
+    ${transcription}=  Create Dictionary
+    ${backend}=  Create Dictionary  transcription=${transcription}
+    ${config_template}=  Create Dictionary  backend=${backend}
+    ${env_vars}=  Create Dictionary
+
+    ${resolved}=  Resolve Omega Config  ${config_template}  ${env_vars}
+    ${val}=  Evaluate  $resolved.get('backend', {}).get('transcription', {}).get('use_provider_segments', False)
+    Should Be Equal  ${val}  ${FALSE}
+
+Use Provider Segments Explicit True
+    [Documentation]  Test that use_provider_segments can be enabled.
+    ${transcription}=  Create Dictionary  use_provider_segments=${TRUE}
+    ${backend}=  Create Dictionary  transcription=${transcription}
+    ${config_template}=  Create Dictionary  backend=${backend}
+    ${env_vars}=  Create Dictionary
+
+    ${resolved}=  Resolve Omega Config  ${config_template}  ${env_vars}
+    ${val}=  Evaluate  $resolved['backend']['transcription']['use_provider_segments']
+    Should Be Equal  ${val}  ${TRUE}
+
+Vibevoice Should Use Provider Segments
+    [Documentation]  Test that VibeVoice provider should have use_provider_segments=true since it provides diarized segments.
+    # Logic simulation
+    ${vibevoice_capabilities}=  Create List  segments  diarization
+    ${has_diarization}=  Evaluate  "diarization" in $vibevoice_capabilities
+    ${has_segments}=  Evaluate  "segments" in $vibevoice_capabilities
+    ${should_use_segments}=  Evaluate  $has_diarization and $has_segments
+    Should Be Equal  ${should_use_segments}  ${TRUE}
+
+Model Registry Url Resolution With Env Var
+    [Documentation]  Test that model URLs resolve correctly from environment.
+    ${model_def}=  Create Dictionary
+    ...  name=stt-vibevoice
+    ...  model_type=stt
+    ...  model_provider=vibevoice
+    ...  model_url=http://\${oc.env:VIBEVOICE_ASR_URL,host.docker.internal:8767}
+
+    ${models}=  Create List  ${model_def}
+    ${defaults}=  Create Dictionary  stt=stt-vibevoice
+    ${config_template}=  Create Dictionary  defaults=${defaults}  models=${models}
+
+    ${env_vars}=  Create Dictionary  VIBEVOICE_ASR_URL=host.docker.internal:8767
+
+    ${resolved}=  Resolve Omega Config  ${config_template}  ${env_vars}
+    ${resolved_models}=  Get From Dictionary  ${resolved}  models
+    Should Be Equal  ${resolved_models[0]["model_url"]}  http://host.docker.internal:8767
+
+Multiple Asr Providers Url Resolution
+    [Documentation]  Test that multiple ASR providers can use different URL patterns.
+    ${m1}=  Create Dictionary  name=stt-vibevoice  model_url=http://\${oc.env:VIBEVOICE_ASR_URL,host.docker.internal:8767}
+    ${m2}=  Create Dictionary  name=stt-parakeet  model_url=http://\${oc.env:PARAKEET_ASR_URL,172.17.0.1:8767}
+    ${m3}=  Create Dictionary  name=stt-deepgram  model_url=https://api.deepgram.com/v1
+
+    ${models}=  Create List  ${m1}  ${m2}  ${m3}
+    ${config_template}=  Create Dictionary  models=${models}
+
+    ${env_vars}=  Create Dictionary
+    ...  VIBEVOICE_ASR_URL=host.docker.internal:8767
+    ...  PARAKEET_ASR_URL=localhost:8080
+
+    ${resolved}=  Resolve Omega Config  ${config_template}  ${env_vars}
+    ${resolved_models}=  Get From Dictionary  ${resolved}  models
+
+    Should Be Equal  ${resolved_models[0]["model_url"]}  http://host.docker.internal:8767
+    Should Be Equal  ${resolved_models[1]["model_url"]}  http://localhost:8080
+    Should Be Equal  ${resolved_models[2]["model_url"]}  https://api.deepgram.com/v1
diff --git a/tests/endpoints/annotation_suggestions_tests.robot b/tests/endpoints/annotation_suggestions_tests.robot
new file mode 100644
index 00000000..ab79c602
--- /dev/null
+++ b/tests/endpoints/annotation_suggestions_tests.robot
@@ -0,0 +1,65 @@
+*** Settings ***
+Documentation    Annotation Suggestions Endpoint Tests
+...
+...              Tests for the GET /annotations/suggestions endpoint
+...              used by the User Loop swipe review UI.
+Library          RequestsLibrary
+Library          Collections
+Resource         ../setup/setup_keywords.robot
+Resource         ../setup/teardown_keywords.robot
+Resource         ../resources/session_keywords.robot
+Resource         ../resources/user_keywords.robot
+Suite Setup      Suite Setup
+Suite Teardown   Suite Teardown
+Test Setup       Test Cleanup
+
+*** Test Cases ***
+
+Get Suggestions Returns Empty List When No Suggestions Exist
+    [Documentation]    Verify suggestions endpoint returns empty list for a fresh user
+    [Tags]    infra
+
+    ${session}=    Get Admin API Session
+    ${response}=    GET On Session    ${session}    /api/annotations/suggestions
+    Should Be Equal As Integers    ${response.status_code}    200
+
+    ${suggestions}=    Set Variable    ${response.json()}
+    Should Be Equal As Integers    ${suggestions.__len__()}    0
+
+Get Suggestions Requires Authentication
+    [Documentation]    Verify suggestions endpoint returns 401 without auth token
+    [Tags]    infra
+
+    Get Anonymous Session    anon_session
+    ${response}=    GET On Session    anon_session    /api/annotations/suggestions    expected_status=401
+    Should Be Equal As Integers    ${response.status_code}    401
+
+Get Suggestions Respects Limit Parameter
+    [Documentation]    Verify suggestions endpoint accepts limit query parameter
+    [Tags]    infra
+
+    ${session}=    Get Admin API Session
+    &{params}=    Create Dictionary    limit=5
+    ${response}=    GET On Session    ${session}    /api/annotations/suggestions    params=${params}
+    Should Be Equal As Integers    ${response.status_code}    200
+
+    ${suggestions}=    Set Variable    ${response.json()}
+    ${count}=    Get Length    ${suggestions}
+    Should Be True    ${count} <= 5    Suggestions count should respect limit parameter
+
+Non Admin User Can Access Own Suggestions
+    [Documentation]    Verify non-admin users can access the suggestions endpoint
+    [Tags]    infra	permissions
+
+    # Create a regular test user
+    ${session}=    Get Admin API Session
+    ${user}=    Create Test User    ${session}    suggestions_test@example.com    testpass123
+
+    # Login as the test user
+    Create API Session    user_session    suggestions_test@example.com    testpass123
+
+    ${response}=    GET On Session    user_session    /api/annotations/suggestions
+    Should Be Equal As Integers    ${response.status_code}    200
+
+    ${suggestions}=    Set Variable    ${response.json()}
+    Should Be Equal As Integers    ${suggestions.__len__()}    0
diff --git a/tests/libs/ConfigTestHelper.py b/tests/libs/ConfigTestHelper.py
new file mode 100644
index 00000000..e5e46ced
--- /dev/null
+++ b/tests/libs/ConfigTestHelper.py
@@ -0,0 +1,78 @@
+import os
+import sys
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+from unittest.mock import patch
+
+import yaml
+from omegaconf import OmegaConf
+
+# Add repo root to path to import config_manager
+sys.path.insert(0, str(Path(__file__).resolve().parent.parent.parent))
+from config_manager import ConfigManager
+
+
+class ConfigTestHelper:
+    """Helper library for testing configuration logic."""
+
+    def _to_dict(self, obj: Any) -> Any:
+        """Recursively converts Robot Framework DotDict to standard dict."""
+        if isinstance(obj, dict):
+            return {k: self._to_dict(v) for k, v in obj.items()}
+        if isinstance(obj, list):
+            return [self._to_dict(v) for v in obj]
+        return obj
+
+    def resolve_omega_config(
+        self, config_template: Dict[str, Any], env_vars: Dict[str, str]
+    ) -> Dict[str, Any]:
+        """
+        Resolves an OmegaConf configuration template with provided environment variables.
+        """
+        config_template = self._to_dict(config_template)
+        # We need to ensure values are strings for os.environ
+        str_env_vars = {k: str(v) for k, v in env_vars.items()}
+
+        with patch.dict(os.environ, str_env_vars):
+            conf = OmegaConf.create(config_template)
+            resolved = OmegaConf.to_container(conf, resolve=True)
+            return resolved
+
+    def check_url_parsing(self, url: str) -> Dict[str, Any]:
+        """
+        Parses a URL and returns its components to verify correct parsing.
+        """
+        from urllib.parse import urlparse
+
+        parsed = urlparse(url)
+        return {"scheme": parsed.scheme, "netloc": parsed.netloc, "path": parsed.path}
+
+    def create_temp_config_structure(
+        self, base_path: str, content: Dict[str, Any]
+    ) -> str:
+        """
+        Creates the config folder structure and config.yml within the given base path.
+        """
+        content = self._to_dict(content)
+        path = Path(base_path) / "config"
+        path.mkdir(parents=True, exist_ok=True)
+        config_file = path / "config.yml"
+        with open(config_file, "w") as f:
+            yaml.dump(content, f, default_flow_style=False, sort_keys=False)
+        return str(base_path)
+
+    def get_config_manager_instance(self, repo_root: str) -> ConfigManager:
+        """Returns a ConfigManager instance configured with the given repo_root."""
+        return ConfigManager(service_path=None, repo_root=Path(repo_root))
+
+    def add_model_to_config_manager(self, cm: ConfigManager, model_def: Dict[str, Any]):
+        """Wrapper for add_or_update_model that converts arguments."""
+        model_def = self._to_dict(model_def)
+        cm.add_or_update_model(model_def)
+
+    def update_defaults_in_config_manager(
+        self, cm: ConfigManager, updates: Dict[str, str]
+    ):
+        """Wrapper for update_config_defaults that converts arguments."""
+        updates = self._to_dict(updates)
+        cm.update_config_defaults(updates)
diff --git a/tests/test-requirements.txt b/tests/test-requirements.txt
index f32614e0..2ea12f65 100644
--- a/tests/test-requirements.txt
+++ b/tests/test-requirements.txt
@@ -6,4 +6,5 @@ robotframework-databaselibrary
 python-dotenv
 websockets
 pymongo
- 
\ No newline at end of file
+omegaconf
+pyyaml