diff --git a/.env.template b/.env.template
index c2a4d8a2..388edbf5 100644
--- a/.env.template
+++ b/.env.template
@@ -90,16 +90,12 @@ CHAT_TEMPERATURE=0.7
 # SPEECH-TO-TEXT CONFIGURATION
 # ========================================
 
-# Primary transcription provider: deepgram, mistral, or parakeet
+# Primary transcription provider: deepgram or parakeet
 TRANSCRIPTION_PROVIDER=deepgram
 
 # Deepgram configuration
 DEEPGRAM_API_KEY=your-deepgram-key-here
 
-# Mistral configuration (when TRANSCRIPTION_PROVIDER=mistral)
-MISTRAL_API_KEY=your-mistral-key-here
-MISTRAL_MODEL=voxtral-mini-2507
-
 # Parakeet ASR configuration (when TRANSCRIPTION_PROVIDER=parakeet)
 PARAKEET_ASR_URL=http://host.docker.internal:8767
 
diff --git a/.github/workflows/README.md b/.github/workflows/README.md
index 5e98cd18..0b8987c5 100644
--- a/.github/workflows/README.md
+++ b/.github/workflows/README.md
@@ -1,91 +1,408 @@
-# GitHub Actions CI/CD Setup for Friend Lite
+# Chronicle GitHub Workflows
 
-This sets up **automatic GitHub releases** with APK/IPA files whenever you push code.
+Documentation for CI/CD workflows and test automation.
 
-## 🚀 How This Works
+## Test Workflows Overview
 
-1. You push code to GitHub
-2. GitHub automatically builds **both Android APK and iOS IPA**
-3. **Creates GitHub Releases** with both files attached
-4. You download directly from the **Releases** tab!
+Chronicle uses **three separate test workflows** to balance fast PR feedback with comprehensive testing:
 
-## 🎯 Quick Setup (2 Steps)
+| Workflow | Trigger | Test Coverage | API Keys | Purpose |
+|----------|---------|---------------|----------|---------|
+| `robot-tests.yml` | All PRs | ~70% (no-API tests) | ❌ Not required | Fast PR validation |
+| `full-tests-with-api.yml` | Push to dev/main | 100% (full suite) | ✅ Required | Comprehensive validation |
+| `pr-tests-with-api.yml` | PR label trigger | 100% (full suite) | ✅ Required | Pre-merge API testing |
 
-### Step 1: Get Expo Token
-1. Go to [expo.dev](https://expo.dev) and sign in/create account
-2. Go to [Access Tokens](https://expo.dev/accounts/[account]/settings/access-tokens)
-3. Create a new token and copy it
+## Workflow Details
 
-### Step 2: Add GitHub Secret
-1. In your GitHub repo: **Settings** → **Secrets and variables** → **Actions**
-2. Click **New repository secret**
-3. Name: `EXPO_TOKEN`
-4. Value: Paste your token from Step 1
-5. Click **Add secret**
+### 1. `robot-tests.yml` - PR Tests (No API Keys)
 
-## ⚡ That's It!
-# GitHub Actions Workflows
+**File**: `.github/workflows/robot-tests.yml`
 
-## Integration Tests
+**Trigger**:
+```yaml
+on:
+  pull_request:
+    paths:
+      - 'tests/**/*.robot'
+      - 'tests/**/*.py'
+      - 'backends/advanced/src/**'
+```
+
+**Characteristics**:
+- **No secrets required** - Works for external contributors
+- **Excludes**: Tests tagged with `requires-api-keys`
+- **Config**: `tests/configs/mock-services.yml`
+- **Test Script**: `./run-no-api-tests.sh`
+- **Results**: `results-no-api/`
+- **Time**: ~10-15 minutes
+- **Coverage**: ~70% of test suite
+
+**Benefits**:
+- Fast feedback on PRs
+- No API costs for every PR
+- External contributors can run full CI
+- Most development workflows covered
+
+**What's Tested**:
+- API endpoints (auth, CRUD, permissions)
+- Infrastructure (workers, queues, health)
+- Basic integration (non-transcription)
+
+**What's Skipped**:
+- Audio upload with transcription
+- Memory operations requiring LLM
+- Audio streaming with STT
+- Full E2E pipeline tests
+
+### 2. `full-tests-with-api.yml` - Dev/Main Tests (Full Suite)
+
+**File**: `.github/workflows/full-tests-with-api.yml`
+
+**Trigger**:
+```yaml
+on:
+  push:
+    branches: [dev, main]
+    paths:
+      - 'tests/**'
+      - 'backends/advanced/src/**'
+  workflow_dispatch:  # Manual trigger available
+```
+
+**Characteristics**:
+- **Requires secrets**: `DEEPGRAM_API_KEY`, `OPENAI_API_KEY`, `HF_TOKEN`
+- **Includes**: All tests (including `requires-api-keys`)
+- **Config**: `tests/configs/deepgram-openai.yml`
+- **Test Script**: `./run-robot-tests.sh`
+- **Results**: `results/`
+- **Time**: ~20-30 minutes
+- **Coverage**: 100% of test suite
+
+**Benefits**:
+- Full validation before deployment
+- Catches API integration issues
+- Validates real transcription and memory processing
+- Comprehensive E2E coverage
+
+**What's Tested**:
+- Everything from `robot-tests.yml` PLUS:
+- Audio upload with real transcription
+- Memory extraction with LLM
+- Audio streaming with STT
+- Full E2E pipeline validation
+
+### 3. `pr-tests-with-api.yml` - Label-Triggered PR Tests
+
+**File**: `.github/workflows/pr-tests-with-api.yml`
+
+**Trigger**:
+```yaml
+on:
+  pull_request:
+    types: [labeled, synchronize]
+```
+
+**Condition**:
+```yaml
+if: contains(github.event.pull_request.labels.*.name, 'test-with-api-keys')
+```
+
+**Characteristics**:
+- **Requires**: PR labeled with `test-with-api-keys`
+- **Requires secrets**: `DEEPGRAM_API_KEY`, `OPENAI_API_KEY`, `HF_TOKEN`
+- **Includes**: All tests (same as full-tests-with-api.yml)
+- **Config**: `tests/configs/deepgram-openai.yml`
+- **Time**: ~20-30 minutes
+- **Re-runs**: On new commits while label present
 
-### Automatic Integration Tests (`integration-tests.yml`)
-- **Triggers**: Push/PR to `main` or `develop` branches affecting backend code
-- **Timeout**: 15 minutes
-- **Mode**: Cached mode (better for CI environment)
-- **Dependencies**: Requires `DEEPGRAM_API_KEY` and `OPENAI_API_KEY` secrets
+**Benefits**:
+- Test API integrations before merging
+- Useful for PRs modifying transcription/LLM code
+- Maintainers can trigger on trusted PRs
+- Catches issues before they reach dev/main
+
+**Use Cases**:
+- PRs that modify transcription logic
+- PRs that change memory extraction
+- PRs that affect audio processing pipeline
+- Before merging large feature branches
+
+## Usage Guide
+
+### For Contributors
+
+**Normal PR Workflow**:
+1. Push your branch
+2. Create PR
+3. `robot-tests.yml` runs automatically (~70% coverage)
+4. Fix any failures
+5. Merge when tests pass
+
+**Testing API Integrations**:
+1. Push your branch
+2. Create PR
+3. Ask maintainer to add `test-with-api-keys` label
+4. `pr-tests-with-api.yml` runs (100% coverage)
+5. Fix any failures
+6. Merge when tests pass
+
+### For Maintainers
+
+**Adding the Label**:
+```bash
+# Via GitHub UI
+1. Go to PR
+2. Click "Labels" on right sidebar
+3. Select "test-with-api-keys"
+
+# Via GitHub CLI
+gh pr edit <pr-number> --add-label "test-with-api-keys"
+```
+
+**When to Use Label**:
+- PR modifies audio processing or transcription
+- PR changes memory extraction logic
+- PR affects LLM integration
+- Before merging large features
+- When in doubt about API changes
+
+**Removing the Label**:
+- Label is automatically retained on new commits
+- Remove manually if no longer needed
+- Saves API costs if changes don't affect APIs
+
+## Test Results
+
+### PR Comments
+
+All workflows post results as PR comments:
+
+```markdown
+## 🎉 Robot Framework Test Results (No API Keys)
+
+**Status**: ✅ All tests passed!
+
+| Metric | Count |
+|--------|-------|
+| ✅ Passed | 76 |
+| ❌ Failed | 0 |
+| 📊 Total | 76 |
+
+### 📊 View Reports
+- [Test Report](https://pages-url/report.html)
+- [Detailed Log](https://pages-url/log.html)
+```
+
+### GitHub Pages
+
+Test reports are automatically deployed to GitHub Pages:
+- **Live Reports**: Clickable links in PR comments
+- **Persistence**: 30 days retention
+- **Format**: HTML reports from Robot Framework
+
+### Artifacts
+
+Downloadable artifacts for deeper analysis:
+- **HTML Reports**: `robot-test-reports-html-*`
+- **XML Results**: `robot-test-results-xml-*`
+- **Logs**: `robot-test-logs-*` (on failure only)
+- **Retention**: 30 days for reports, 7 days for logs
 
 ## Required Secrets
 
-Add these secrets in your GitHub repository settings:
+### Repository Secrets
 
+Must be configured in GitHub repository settings:
+
+```bash
+DEEPGRAM_API_KEY    # Required for full-tests-with-api.yml
+OPENAI_API_KEY      # Required for full-tests-with-api.yml
+HF_TOKEN            # Optional (speaker recognition)
 ```
-DEEPGRAM_API_KEY=your-deepgram-api-key
-OPENAI_API_KEY=your-openai-api-key
+
+**Setting Secrets**:
+1. Go to repository Settings
+2. Navigate to Secrets and variables → Actions
+3. Click "New repository secret"
+4. Add each secret
+
+### Secret Validation
+
+Workflows validate secrets before running tests:
+```yaml
+- name: Verify required secrets
+  env:
+    DEEPGRAM_API_KEY: ${{ secrets.DEEPGRAM_API_KEY }}
+    OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+  run: |
+    if [ -z "$DEEPGRAM_API_KEY" ]; then
+      echo "❌ ERROR: DEEPGRAM_API_KEY secret is not set"
+      exit 1
+    fi
 ```
 
-## Test Environment
+## Cost Management
+
+### API Cost Breakdown
+
+**No-API Tests** (`robot-tests.yml`):
+- **Cost**: $0 per run
+- **Frequency**: Every PR commit
+- **Monthly**: Potentially hundreds of runs
+- **Savings**: Significant with external contributors
+
+**Full Tests** (`full-tests-with-api.yml`, `pr-tests-with-api.yml`):
+- **Transcription**: ~$0.10-0.30 per run (Deepgram)
+- **LLM**: ~$0.05-0.15 per run (OpenAI)
+- **Total**: ~$0.15-0.45 per run
+- **Frequency**: dev/main pushes + labeled PRs
+- **Monthly**: Typically 10-50 runs
+
+### Cost Optimization
+
+**Strategies**:
+1. Most PRs use no-API tests (free)
+2. Full tests only on protected branches
+3. Label-triggered for selective full testing
+4. No redundant API calls on every commit
+
+**Before This System**:
+- Every PR: ~$0.45 cost
+- 100 PRs/month: ~$45
+
+**After This System**:
+- Most PRs: $0 cost
+- 10 dev/main pushes: ~$4.50
+- 5 labeled PRs: ~$2.25
+- Total: ~$6.75/month (85% savings)
+
+## Workflow Configuration
+
+### Common Settings
 
-- **Runtime**: Ubuntu latest with Docker support
-- **Python**: 3.12 with uv package manager
-- **Services**: MongoDB (port 27018), Qdrant (ports 6335/6336), Backend (port 8001)
-- **Test Data**: Isolated test directories and databases
-- **Audio**: 4-minute glass blowing tutorial for end-to-end validation
+All test workflows share:
 
-## Modes
+```yaml
+# Performance
+timeout-minutes: 30
+runs-on: ubuntu-latest
 
-### Cached Mode (Recommended for CI)
-- Reuses containers and data between test runs
-- Faster startup time
-- Better for containerized CI environments
-- Used by default in automatic workflows
+# Caching
+- uses: actions/cache@v4
+  with:
+    path: /tmp/.buildx-cache
+    key: ${{ runner.os }}-buildx-${{ hashFiles(...) }}
 
-### Fresh Mode (Recommended for Local Development)
-- Completely clean environment each run
-- Removes all test data and containers
-- Slower but more reliable for debugging
-- Can be selected in manual workflow
+# Python setup
+- uses: actions/setup-python@v5
+  with:
+    python-version: "3.12"
+
+# UV package manager
+- uses: astral-sh/setup-uv@v4
+  with:
+    version: "latest"
+```
+
+### Test Execution Pattern
+
+```yaml
+- name: Run tests
+  env:
+    CLEANUP_CONTAINERS: "false"  # Handled by workflow
+    # API keys if needed
+  run: |
+    ./run-{no-api|robot}-tests.sh
+    TEST_EXIT_CODE=$?
+    echo "test_exit_code=$TEST_EXIT_CODE" >> $GITHUB_ENV
+    exit 0  # Don't fail yet
+
+- name: Fail workflow if tests failed
+  if: always()
+  run: |
+    if [ "${{ env.test_exit_code }}" != "0" ]; then
+      echo "❌ Tests failed"
+      exit 1
+    fi
+```
+
+**Benefits**:
+- Artifacts uploaded even on test failure
+- Clean container teardown guaranteed
+- Clear separation of test execution and reporting
 
 ## Troubleshooting
 
-1. **Test Timeout**: Increase `timeout_minutes` in manual workflow
-2. **Memory Issues**: Check container logs in failed run artifacts
-3. **API Key Issues**: Verify secrets are set correctly in repository settings
-4. **Fresh Mode Fails**: Try cached mode for comparison
+### Workflow Not Triggering
 
-## Local Testing
+**Problem**: Workflow doesn't run on PR
+**Solutions**:
+- Check file paths in workflow trigger
+- Verify workflow file syntax (YAML)
+- Check repository permissions
+- Look for disabled workflows in Settings
 
-To run the same tests locally:
+### Secret Errors
 
-```bash
-cd backends/advanced-backend
+**Problem**: "ERROR: DEEPGRAM_API_KEY secret is not set"
+**Solutions**:
+- Verify secret is set in repository settings
+- Check secret name matches exactly (case-sensitive)
+- Ensure workflow has access to secrets
+- Fork PRs cannot access secrets (expected)
+
+### Test Failures
+
+**Problem**: Tests fail in CI but pass locally
+**Solutions**:
+- Check environment differences (.env.test)
+- Verify test isolation (database cleanup)
+- Look for timing issues (increase timeouts)
+- Check Docker resource limits in CI
+
+### Label Workflow Not Running
+
+**Problem**: Added label but workflow doesn't trigger
+**Solutions**:
+- Verify label name is exactly `test-with-api-keys`
+- Check workflow trigger includes `types: [labeled]`
+- Try removing and re-adding label
+- Push new commit to trigger synchronize event
+
+## Maintenance
+
+### Updating Workflows
+
+**When to Update**:
+- Adding new test categories
+- Changing test execution scripts
+- Modifying timeout values
+- Updating artifact retention
+
+**Testing Changes**:
+1. Create test branch
+2. Modify workflow file
+3. Push to trigger workflow
+4. Verify execution
+5. Merge if successful
+
+### Monitoring
+
+**Key Metrics**:
+- Test pass rate (target: >95%)
+- Workflow execution time (target: <30min)
+- API costs (target: <$10/month)
+- Artifact storage usage
 
-# Install dependencies
-uv sync --dev
+**Tools**:
+- GitHub Actions dashboard
+- Workflow run history
+- Cost tracking (GitHub billing)
+- Test result trends
 
-# Set up environment (copy from .env.template)
-cp .env.template .env.test
-# Add your API keys to .env.test
+## Reference Links
 
-# Run Robot Framework integration tests
-uv run robot --outputdir test-results --loglevel INFO tests/integration/integration_test.robot
-```
\ No newline at end of file
+- **Test Suite README**: `tests/README.md`
+- **Testing Guidelines**: `tests/TESTING_GUIDELINES.md`
+- **Tag Documentation**: `tests/tags.md`
+- **GitHub Actions Docs**: https://docs.github.com/en/actions
diff --git a/.github/workflows/full-tests-with-api.yml b/.github/workflows/full-tests-with-api.yml
new file mode 100644
index 00000000..b5881fcd
--- /dev/null
+++ b/.github/workflows/full-tests-with-api.yml
@@ -0,0 +1,264 @@
+name: Robot Framework Tests (Full - With API Keys)
+
+on:
+  push:
+    branches:
+      - dev
+      - main
+    paths:
+      - 'tests/**/*.robot'
+      - 'tests/**/*.py'
+      - 'backends/advanced/src/**'
+      - '.github/workflows/full-tests-with-api.yml'
+  workflow_dispatch:  # Allow manual triggering
+
+permissions:
+  contents: read
+  pull-requests: write
+  issues: write
+  pages: write
+  id-token: write
+
+jobs:
+  full-robot-tests:
+    runs-on: ubuntu-latest
+    timeout-minutes: 30
+
+    steps:
+    - name: Checkout code
+      uses: actions/checkout@v4
+
+    - name: Verify required secrets
+      env:
+        DEEPGRAM_API_KEY: ${{ secrets.DEEPGRAM_API_KEY }}
+        OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+        HF_TOKEN: ${{ secrets.HF_TOKEN }}
+      run: |
+        echo "Verifying required secrets..."
+        if [ -z "$DEEPGRAM_API_KEY" ]; then
+          echo "❌ ERROR: DEEPGRAM_API_KEY secret is not set"
+          exit 1
+        fi
+        if [ -z "$OPENAI_API_KEY" ]; then
+          echo "❌ ERROR: OPENAI_API_KEY secret is not set"
+          exit 1
+        fi
+        if [ -z "$HF_TOKEN" ]; then
+          echo "⚠️ WARNING: HF_TOKEN secret is not set (speaker recognition will be disabled)"
+        else
+          echo "✓ HF_TOKEN is set (length: ${#HF_TOKEN})"
+        fi
+        echo "✓ DEEPGRAM_API_KEY is set (length: ${#DEEPGRAM_API_KEY})"
+        echo "✓ OPENAI_API_KEY is set (length: ${#OPENAI_API_KEY})"
+        echo "✓ Required secrets verified"
+
+    - name: Set up Docker Buildx
+      uses: docker/setup-buildx-action@v3
+      with:
+        driver-opts: |
+          image=moby/buildkit:latest
+          network=host
+
+    - name: Cache Docker layers
+      uses: actions/cache@v4
+      with:
+        path: /tmp/.buildx-cache
+        key: ${{ runner.os }}-buildx-${{ hashFiles('backends/advanced/Dockerfile', 'backends/advanced/pyproject.toml') }}
+        restore-keys: |
+          ${{ runner.os }}-buildx-
+
+    - name: Set up Python
+      uses: actions/setup-python@v5
+      with:
+        python-version: "3.12"
+
+    - name: Install uv
+      uses: astral-sh/setup-uv@v4
+      with:
+        version: "latest"
+
+    - name: Install Robot Framework and dependencies
+      run: |
+        uv pip install --system robotframework robotframework-requests python-dotenv websockets
+
+    - name: Create test config.yml
+      run: |
+        echo "Copying test configuration file..."
+        mkdir -p config
+        cp tests/configs/deepgram-openai.yml config/config.yml
+        echo "✓ Test config.yml created from tests/configs/deepgram-openai.yml"
+        ls -lh config/config.yml
+
+    - name: Create plugins.yml from template
+      run: |
+        echo "Creating plugins.yml from template..."
+        if [ -f "config/plugins.yml.template" ]; then
+          cp config/plugins.yml.template config/plugins.yml
+          echo "✓ plugins.yml created from template"
+          ls -lh config/plugins.yml
+        else
+          echo "❌ ERROR: config/plugins.yml.template not found"
+          exit 1
+        fi
+
+    - name: Run Full Robot Framework tests
+      working-directory: tests
+      env:
+        # Required for test runner script
+        DEEPGRAM_API_KEY: ${{ secrets.DEEPGRAM_API_KEY }}
+        OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+        HF_TOKEN: ${{ secrets.HF_TOKEN }}
+        CLEANUP_CONTAINERS: "false"  # Don't cleanup in CI - handled by workflow
+      run: |
+        # Use the full test script (includes all tests with API keys)
+        ./run-robot-tests.sh
+        TEST_EXIT_CODE=$?
+        echo "test_exit_code=$TEST_EXIT_CODE" >> $GITHUB_ENV
+        exit 0  # Don't fail here, we'll fail at the end after uploading artifacts
+
+    - name: Save service logs to files
+      if: always()
+      working-directory: backends/advanced
+      run: |
+        echo "Checking running containers..."
+        docker compose -f docker-compose-test.yml ps -a
+        echo ""
+        echo "Saving service logs to files..."
+        mkdir -p logs
+        docker compose -f docker-compose-test.yml logs chronicle-backend-test > logs/backend.log 2>&1 || true
+        docker compose -f docker-compose-test.yml logs workers-test > logs/workers.log 2>&1 || true
+        docker compose -f docker-compose-test.yml logs mongo-test > logs/mongo.log 2>&1 || true
+        docker compose -f docker-compose-test.yml logs redis-test > logs/redis.log 2>&1 || true
+        docker compose -f docker-compose-test.yml logs qdrant-test > logs/qdrant.log 2>&1 || true
+        docker compose -f docker-compose-test.yml logs speaker-service-test > logs/speaker.log 2>&1 || true
+        echo "✓ Logs saved to backends/advanced/logs/"
+        ls -lh logs/
+
+    - name: Check if test results exist
+      if: always()
+      id: check_results
+      run: |
+        if [ -f tests/results/output.xml ]; then
+          echo "results_exist=true" >> $GITHUB_OUTPUT
+        else
+          echo "results_exist=false" >> $GITHUB_OUTPUT
+          echo "⚠️ No test results found in tests/results/"
+          ls -la tests/results/ || echo "Results directory doesn't exist"
+        fi
+
+    - name: Upload Robot Framework HTML reports
+      if: always() && steps.check_results.outputs.results_exist == 'true'
+      uses: actions/upload-artifact@v4
+      with:
+        name: robot-test-reports-html-full
+        path: |
+          tests/results/report.html
+          tests/results/log.html
+        retention-days: 30
+
+    - name: Publish HTML Report as GitHub Pages artifact
+      if: always() && steps.check_results.outputs.results_exist == 'true'
+      uses: actions/upload-pages-artifact@v3
+      with:
+        path: tests/results
+
+    - name: Deploy to GitHub Pages
+      if: always() && steps.check_results.outputs.results_exist == 'true'
+      uses: actions/deploy-pages@v4
+      id: deployment
+
+    - name: Generate test summary
+      if: always() && steps.check_results.outputs.results_exist == 'true'
+      id: test_summary
+      run: |
+        # Parse test results
+        python3 << 'PYTHON_SCRIPT' > test_summary.txt
+        import xml.etree.ElementTree as ET
+        tree = ET.parse('tests/results/output.xml')
+        root = tree.getroot()
+        stats = root.find('.//total/stat')
+        if stats is not None:
+            passed = stats.get("pass", "0")
+            failed = stats.get("fail", "0")
+            total = int(passed) + int(failed)
+            print(f"PASSED={passed}")
+            print(f"FAILED={failed}")
+            print(f"TOTAL={total}")
+        PYTHON_SCRIPT
+
+        # Source the variables
+        source test_summary.txt
+
+        # Set outputs
+        echo "passed=$PASSED" >> $GITHUB_OUTPUT
+        echo "failed=$FAILED" >> $GITHUB_OUTPUT
+        echo "total=$TOTAL" >> $GITHUB_OUTPUT
+
+    - name: Upload Robot Framework XML output
+      if: always() && steps.check_results.outputs.results_exist == 'true'
+      uses: actions/upload-artifact@v4
+      with:
+        name: robot-test-results-xml-full
+        path: tests/results/output.xml
+        retention-days: 30
+
+    - name: Upload logs on failure
+      if: failure()
+      uses: actions/upload-artifact@v4
+      with:
+        name: robot-test-logs-full
+        path: |
+          backends/advanced/logs/*.log
+          backends/advanced/.env
+          tests/setup/.env.test
+        retention-days: 7
+
+    - name: Display test results summary
+      if: always()
+      run: |
+        if [ -f tests/results/output.xml ]; then
+          echo "Full test results generated successfully (With API Keys)"
+          echo "========================================"
+          python3 << 'PYTHON_SCRIPT'
+        import xml.etree.ElementTree as ET
+        tree = ET.parse('tests/results/output.xml')
+        root = tree.getroot()
+        stats = root.find('.//total/stat')
+        if stats is not None:
+            passed = stats.get("pass", "0")
+            failed = stats.get("fail", "0")
+            print(f'✅ Passed: {passed}')
+            print(f'❌ Failed: {failed}')
+            print(f'📊 Total: {int(passed) + int(failed)}')
+        PYTHON_SCRIPT
+          echo "========================================"
+          echo ""
+          echo "ℹ️  Full test suite including API-dependent tests"
+          echo ""
+          echo "📊 FULL TEST REPORTS AVAILABLE:"
+          echo "  1. Go to the 'Summary' tab at the top of this page"
+          echo "  2. Scroll down to 'Artifacts' section"
+          echo "  3. Download 'robot-test-reports-html-full'"
+          echo "  4. Extract and open report.html or log.html in your browser"
+          echo ""
+          echo "The HTML reports provide:"
+          echo "  - report.html: Executive summary with statistics"
+          echo "  - log.html: Detailed step-by-step execution log"
+          echo ""
+        fi
+
+    - name: Cleanup
+      if: always()
+      working-directory: backends/advanced
+      run: |
+        docker compose -f docker-compose-test.yml down -v
+
+    - name: Fail workflow if tests failed
+      if: always()
+      run: |
+        if [ "${{ env.test_exit_code }}" != "0" ]; then
+          echo "❌ Tests failed with exit code ${{ env.test_exit_code }}"
+          exit 1
+        else
+          echo "✅ All tests passed"
+        fi
diff --git a/.github/workflows/pr-tests-with-api.yml b/.github/workflows/pr-tests-with-api.yml
new file mode 100644
index 00000000..aeb45b1c
--- /dev/null
+++ b/.github/workflows/pr-tests-with-api.yml
@@ -0,0 +1,303 @@
+name: Robot Framework Tests (PR - Label Triggered)
+
+on:
+  pull_request:
+    types: [labeled, synchronize]
+
+permissions:
+  contents: read
+  pull-requests: write
+  issues: write
+  pages: write
+  id-token: write
+
+jobs:
+  pr-full-tests:
+    # Only run if PR has the 'test-with-api-keys' label
+    if: contains(github.event.pull_request.labels.*.name, 'test-with-api-keys')
+    runs-on: ubuntu-latest
+    timeout-minutes: 30
+
+    steps:
+    - name: Checkout code
+      uses: actions/checkout@v4
+
+    - name: Verify required secrets
+      env:
+        DEEPGRAM_API_KEY: ${{ secrets.DEEPGRAM_API_KEY }}
+        OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+        HF_TOKEN: ${{ secrets.HF_TOKEN }}
+      run: |
+        echo "Verifying required secrets for label-triggered full test run..."
+        if [ -z "$DEEPGRAM_API_KEY" ]; then
+          echo "❌ ERROR: DEEPGRAM_API_KEY secret is not set"
+          exit 1
+        fi
+        if [ -z "$OPENAI_API_KEY" ]; then
+          echo "❌ ERROR: OPENAI_API_KEY secret is not set"
+          exit 1
+        fi
+        if [ -z "$HF_TOKEN" ]; then
+          echo "⚠️ WARNING: HF_TOKEN secret is not set (speaker recognition will be disabled)"
+        else
+          echo "✓ HF_TOKEN is set (length: ${#HF_TOKEN})"
+        fi
+        echo "✓ DEEPGRAM_API_KEY is set (length: ${#DEEPGRAM_API_KEY})"
+        echo "✓ OPENAI_API_KEY is set (length: ${#OPENAI_API_KEY})"
+        echo "✓ Required secrets verified"
+
+    - name: Set up Docker Buildx
+      uses: docker/setup-buildx-action@v3
+      with:
+        driver-opts: |
+          image=moby/buildkit:latest
+          network=host
+
+    - name: Cache Docker layers
+      uses: actions/cache@v4
+      with:
+        path: /tmp/.buildx-cache
+        key: ${{ runner.os }}-buildx-${{ hashFiles('backends/advanced/Dockerfile', 'backends/advanced/pyproject.toml') }}
+        restore-keys: |
+          ${{ runner.os }}-buildx-
+
+    - name: Set up Python
+      uses: actions/setup-python@v5
+      with:
+        python-version: "3.12"
+
+    - name: Install uv
+      uses: astral-sh/setup-uv@v4
+      with:
+        version: "latest"
+
+    - name: Install Robot Framework and dependencies
+      run: |
+        uv pip install --system robotframework robotframework-requests python-dotenv websockets
+
+    - name: Create test config.yml
+      run: |
+        echo "Copying test configuration file..."
+        mkdir -p config
+        cp tests/configs/deepgram-openai.yml config/config.yml
+        echo "✓ Test config.yml created from tests/configs/deepgram-openai.yml"
+        ls -lh config/config.yml
+
+    - name: Create plugins.yml from template
+      run: |
+        echo "Creating plugins.yml from template..."
+        if [ -f "config/plugins.yml.template" ]; then
+          cp config/plugins.yml.template config/plugins.yml
+          echo "✓ plugins.yml created from template"
+          ls -lh config/plugins.yml
+        else
+          echo "❌ ERROR: config/plugins.yml.template not found"
+          exit 1
+        fi
+
+    - name: Run Full Robot Framework tests
+      working-directory: tests
+      env:
+        # Required for test runner script
+        DEEPGRAM_API_KEY: ${{ secrets.DEEPGRAM_API_KEY }}
+        OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+        HF_TOKEN: ${{ secrets.HF_TOKEN }}
+        CLEANUP_CONTAINERS: "false"  # Don't cleanup in CI - handled by workflow
+      run: |
+        # Use the full test script (includes all tests with API keys)
+        ./run-robot-tests.sh
+        TEST_EXIT_CODE=$?
+        echo "test_exit_code=$TEST_EXIT_CODE" >> $GITHUB_ENV
+        exit 0  # Don't fail here, we'll fail at the end after uploading artifacts
+
+    - name: Save service logs to files
+      if: always()
+      working-directory: backends/advanced
+      run: |
+        echo "Checking running containers..."
+        docker compose -f docker-compose-test.yml ps -a
+        echo ""
+        echo "Saving service logs to files..."
+        mkdir -p logs
+        docker compose -f docker-compose-test.yml logs chronicle-backend-test > logs/backend.log 2>&1 || true
+        docker compose -f docker-compose-test.yml logs workers-test > logs/workers.log 2>&1 || true
+        docker compose -f docker-compose-test.yml logs mongo-test > logs/mongo.log 2>&1 || true
+        docker compose -f docker-compose-test.yml logs redis-test > logs/redis.log 2>&1 || true
+        docker compose -f docker-compose-test.yml logs qdrant-test > logs/qdrant.log 2>&1 || true
+        docker compose -f docker-compose-test.yml logs speaker-service-test > logs/speaker.log 2>&1 || true
+        echo "✓ Logs saved to backends/advanced/logs/"
+        ls -lh logs/
+
+    - name: Check if test results exist
+      if: always()
+      id: check_results
+      run: |
+        if [ -f tests/results/output.xml ]; then
+          echo "results_exist=true" >> $GITHUB_OUTPUT
+        else
+          echo "results_exist=false" >> $GITHUB_OUTPUT
+          echo "⚠️ No test results found in tests/results/"
+          ls -la tests/results/ || echo "Results directory doesn't exist"
+        fi
+
+    - name: Upload Robot Framework HTML reports
+      if: always() && steps.check_results.outputs.results_exist == 'true'
+      uses: actions/upload-artifact@v4
+      with:
+        name: robot-test-reports-html-pr-labeled
+        path: |
+          tests/results/report.html
+          tests/results/log.html
+        retention-days: 30
+
+    - name: Publish HTML Report as GitHub Pages artifact
+      if: always() && steps.check_results.outputs.results_exist == 'true'
+      uses: actions/upload-pages-artifact@v3
+      with:
+        path: tests/results
+
+    - name: Deploy to GitHub Pages
+      if: always() && steps.check_results.outputs.results_exist == 'true'
+      uses: actions/deploy-pages@v4
+      id: deployment
+
+    - name: Generate test summary
+      if: always() && steps.check_results.outputs.results_exist == 'true'
+      id: test_summary
+      run: |
+        # Parse test results
+        python3 << 'PYTHON_SCRIPT' > test_summary.txt
+        import xml.etree.ElementTree as ET
+        tree = ET.parse('tests/results/output.xml')
+        root = tree.getroot()
+        stats = root.find('.//total/stat')
+        if stats is not None:
+            passed = stats.get("pass", "0")
+            failed = stats.get("fail", "0")
+            total = int(passed) + int(failed)
+            print(f"PASSED={passed}")
+            print(f"FAILED={failed}")
+            print(f"TOTAL={total}")
+        PYTHON_SCRIPT
+
+        # Source the variables
+        source test_summary.txt
+
+        # Set outputs
+        echo "passed=$PASSED" >> $GITHUB_OUTPUT
+        echo "failed=$FAILED" >> $GITHUB_OUTPUT
+        echo "total=$TOTAL" >> $GITHUB_OUTPUT
+
+    - name: Post PR comment with test results
+      if: always() && steps.check_results.outputs.results_exist == 'true'
+      uses: actions/github-script@v7
+      with:
+        github-token: ${{ secrets.GITHUB_TOKEN }}
+        script: |
+          const passed = '${{ steps.test_summary.outputs.passed }}';
+          const failed = '${{ steps.test_summary.outputs.failed }}';
+          const total = '${{ steps.test_summary.outputs.total }}';
+          const runUrl = `https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}`;
+          const pagesUrl = '${{ steps.deployment.outputs.page_url }}';
+
+          const status = failed === '0' ? '✅ All tests passed!' : '❌ Some tests failed';
+          const emoji = failed === '0' ? '🎉' : '⚠️';
+
+          const comment = `## ${emoji} Robot Framework Test Results (Label-Triggered Full Suite)
+
+          **Status**: ${status}
+
+          🏷️ **Note**: This run was triggered by the \`test-with-api-keys\` label.
+          All tests including API-dependent tests have been executed.
+
+          | Metric | Count |
+          |--------|-------|
+          | ✅ Passed | ${passed} |
+          | ❌ Failed | ${failed} |
+          | 📊 Total | ${total} |
+
+          ### 📊 View Reports
+
+          **GitHub Pages (Live Reports):**
+          - [📋 Test Report](${pagesUrl}report.html)
+          - [📝 Detailed Log](${pagesUrl}log.html)
+
+          **Download Artifacts:**
+          - [robot-test-reports-html-pr-labeled](${runUrl}) - HTML reports
+          - [robot-test-results-xml-pr-labeled](${runUrl}) - XML output
+
+          ---
+          *[View full workflow run](${runUrl})*`;
+
+          github.rest.issues.createComment({
+            issue_number: context.issue.number,
+            owner: context.repo.owner,
+            repo: context.repo.repo,
+            body: comment
+          });
+
+    - name: Upload Robot Framework XML output
+      if: always() && steps.check_results.outputs.results_exist == 'true'
+      uses: actions/upload-artifact@v4
+      with:
+        name: robot-test-results-xml-pr-labeled
+        path: tests/results/output.xml
+        retention-days: 30
+
+    - name: Upload logs on failure
+      if: failure()
+      uses: actions/upload-artifact@v4
+      with:
+        name: robot-test-logs-pr-labeled
+        path: |
+          backends/advanced/logs/*.log
+          backends/advanced/.env
+          tests/setup/.env.test
+        retention-days: 7
+
+    - name: Display test results summary
+      if: always()
+      run: |
+        if [ -f tests/results/output.xml ]; then
+          echo "Label-triggered full test results generated successfully"
+          echo "========================================"
+          python3 << 'PYTHON_SCRIPT'
+        import xml.etree.ElementTree as ET
+        tree = ET.parse('tests/results/output.xml')
+        root = tree.getroot()
+        stats = root.find('.//total/stat')
+        if stats is not None:
+            passed = stats.get("pass", "0")
+            failed = stats.get("fail", "0")
+            print(f'✅ Passed: {passed}')
+            print(f'❌ Failed: {failed}')
+            print(f'📊 Total: {int(passed) + int(failed)}')
+        PYTHON_SCRIPT
+          echo "========================================"
+          echo ""
+          echo "🏷️  This run was triggered by the 'test-with-api-keys' label"
+          echo "ℹ️  Full test suite including API-dependent tests"
+          echo ""
+          echo "📊 FULL TEST REPORTS AVAILABLE:"
+          echo "  1. Go to the 'Summary' tab at the top of this page"
+          echo "  2. Scroll down to 'Artifacts' section"
+          echo "  3. Download 'robot-test-reports-html-pr-labeled'"
+          echo "  4. Extract and open report.html or log.html in your browser"
+          echo ""
+        fi
+
+    - name: Cleanup
+      if: always()
+      working-directory: backends/advanced
+      run: |
+        docker compose -f docker-compose-test.yml down -v
+
+    - name: Fail workflow if tests failed
+      if: always()
+      run: |
+        if [ "${{ env.test_exit_code }}" != "0" ]; then
+          echo "❌ Tests failed with exit code ${{ env.test_exit_code }}"
+          exit 1
+        else
+          echo "✅ All tests passed"
+        fi
diff --git a/.github/workflows/robot-tests.yml b/.github/workflows/robot-tests.yml
index 3333266d..35e4dffa 100644
--- a/.github/workflows/robot-tests.yml
+++ b/.github/workflows/robot-tests.yml
@@ -1,4 +1,4 @@
-name: Robot Framework Tests
+name: Robot Framework Tests (No API Keys)
 
 on:
   pull_request:
@@ -24,30 +24,6 @@ jobs:
     - name: Checkout code
       uses: actions/checkout@v4
 
-    - name: Verify required secrets
-      env:
-        DEEPGRAM_API_KEY: ${{ secrets.DEEPGRAM_API_KEY }}
-        OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
-        HF_TOKEN: ${{ secrets.HF_TOKEN }}
-      run: |
-        echo "Verifying required secrets..."
-        if [ -z "$DEEPGRAM_API_KEY" ]; then
-          echo "❌ ERROR: DEEPGRAM_API_KEY secret is not set"
-          exit 1
-        fi
-        if [ -z "$OPENAI_API_KEY" ]; then
-          echo "❌ ERROR: OPENAI_API_KEY secret is not set"
-          exit 1
-        fi
-        if [ -z "$HF_TOKEN" ]; then
-          echo "❌ ERROR: HF_TOKEN secret is not set"
-          exit 1
-        fi
-        echo "✓ DEEPGRAM_API_KEY is set (length: ${#DEEPGRAM_API_KEY})"
-        echo "✓ OPENAI_API_KEY is set (length: ${#OPENAI_API_KEY})"
-        echo "✓ HF_TOKEN is set (length: ${#HF_TOKEN})"
-        echo "✓ All required secrets verified"
-
     - name: Set up Docker Buildx
       uses: docker/setup-buildx-action@v3
       with:
@@ -79,64 +55,81 @@ jobs:
 
     - name: Create test config.yml
       run: |
-        echo "Copying test configuration file..."
+        echo "Copying mock services configuration file..."
         mkdir -p config
-        cp tests/configs/deepgram-openai.yml config/config.yml
-        echo "✓ Test config.yml created from tests/configs/deepgram-openai.yml"
+        cp tests/configs/mock-services.yml config/config.yml
+        echo "✓ Test config.yml created from tests/configs/mock-services.yml"
+        echo "ℹ️  This config disables external API dependencies (transcription, LLM)"
         ls -lh config/config.yml
 
-    - name: Run Robot Framework tests
+    - name: Create plugins.yml from template
+      run: |
+        echo "Creating plugins.yml from template..."
+        if [ -f "config/plugins.yml.template" ]; then
+          cp config/plugins.yml.template config/plugins.yml
+          echo "✓ plugins.yml created from template"
+          ls -lh config/plugins.yml
+        else
+          echo "❌ ERROR: config/plugins.yml.template not found"
+          exit 1
+        fi
+
+    - name: Run Robot Framework tests (No API Keys)
       working-directory: tests
       env:
-        # Required for test runner script
-        DEEPGRAM_API_KEY: ${{ secrets.DEEPGRAM_API_KEY }}
-        OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
-        HF_TOKEN: ${{ secrets.HF_TOKEN }}
         CLEANUP_CONTAINERS: "false"  # Don't cleanup in CI - handled by workflow
       run: |
-        # Use the unified test script that mirrors local development
-        ./run-robot-tests.sh
+        # Use the no-API test script (excludes tests tagged with requires-api-keys)
+        ./run-no-api-tests.sh
         TEST_EXIT_CODE=$?
         echo "test_exit_code=$TEST_EXIT_CODE" >> $GITHUB_ENV
         exit 0  # Don't fail here, we'll fail at the end after uploading artifacts
 
-    - name: Show service logs
+    - name: Save service logs to files
       if: always()
       working-directory: backends/advanced
       run: |
-        echo "=== Backend Logs (last 50 lines) ==="
-        docker compose -f docker-compose-test.yml logs --tail=50 chronicle-backend-test
+        echo "Checking running containers..."
+        docker compose -f docker-compose-test.yml ps -a
         echo ""
-        echo "=== Worker Logs (last 50 lines) ==="
-        docker compose -f docker-compose-test.yml logs --tail=50 workers-test
+        echo "Saving service logs to files..."
+        mkdir -p logs
+        docker compose -f docker-compose-test.yml logs chronicle-backend-test > logs/backend.log 2>&1 || true
+        docker compose -f docker-compose-test.yml logs workers-test > logs/workers.log 2>&1 || true
+        docker compose -f docker-compose-test.yml logs mongo-test > logs/mongo.log 2>&1 || true
+        docker compose -f docker-compose-test.yml logs redis-test > logs/redis.log 2>&1 || true
+        docker compose -f docker-compose-test.yml logs qdrant-test > logs/qdrant.log 2>&1 || true
+        docker compose -f docker-compose-test.yml logs speaker-service-test > logs/speaker.log 2>&1 || true
+        echo "✓ Logs saved to backends/advanced/logs/"
+        ls -lh logs/
 
     - name: Check if test results exist
       if: always()
       id: check_results
       run: |
-        if [ -f tests/results/output.xml ]; then
+        if [ -f tests/results-no-api/output.xml ]; then
           echo "results_exist=true" >> $GITHUB_OUTPUT
         else
           echo "results_exist=false" >> $GITHUB_OUTPUT
-          echo "⚠️ No test results found in tests/results/"
-          ls -la tests/results/ || echo "Results directory doesn't exist"
+          echo "⚠️ No test results found in tests/results-no-api/"
+          ls -la tests/results-no-api/ || echo "Results directory doesn't exist"
         fi
 
     - name: Upload Robot Framework HTML reports
       if: always() && steps.check_results.outputs.results_exist == 'true'
       uses: actions/upload-artifact@v4
       with:
-        name: robot-test-reports-html
+        name: robot-test-reports-html-no-api
         path: |
-          tests/results/report.html
-          tests/results/log.html
+          tests/results-no-api/report.html
+          tests/results-no-api/log.html
         retention-days: 30
 
     - name: Publish HTML Report as GitHub Pages artifact
       if: always() && steps.check_results.outputs.results_exist == 'true'
       uses: actions/upload-pages-artifact@v3
       with:
-        path: tests/results
+        path: tests/results-no-api
 
     - name: Deploy to GitHub Pages
       if: always() && steps.check_results.outputs.results_exist == 'true'
@@ -150,7 +143,7 @@ jobs:
         # Parse test results
         python3 << 'PYTHON_SCRIPT' > test_summary.txt
         import xml.etree.ElementTree as ET
-        tree = ET.parse('tests/results/output.xml')
+        tree = ET.parse('tests/results-no-api/output.xml')
         root = tree.getroot()
         stats = root.find('.//total/stat')
         if stats is not None:
@@ -185,10 +178,13 @@ jobs:
           const status = failed === '0' ? '✅ All tests passed!' : '❌ Some tests failed';
           const emoji = failed === '0' ? '🎉' : '⚠️';
 
-          const comment = `## ${emoji} Robot Framework Test Results
+          const comment = `## ${emoji} Robot Framework Test Results (No API Keys)
 
           **Status**: ${status}
 
+          ℹ️ **Note**: This run excludes tests requiring external API keys (Deepgram, OpenAI).
+          Tests tagged with \`requires-api-keys\` will run on dev/main branches.
+
           | Metric | Count |
           |--------|-------|
           | ✅ Passed | ${passed} |
@@ -202,8 +198,8 @@ jobs:
           - [📝 Detailed Log](${pagesUrl}log.html)
 
           **Download Artifacts:**
-          - [robot-test-reports-html](${runUrl}) - HTML reports
-          - [robot-test-results-xml](${runUrl}) - XML output
+          - [robot-test-reports-html-no-api](${runUrl}) - HTML reports
+          - [robot-test-results-xml-no-api](${runUrl}) - XML output
 
           ---
           *[View full workflow run](${runUrl})*`;
@@ -219,16 +215,17 @@ jobs:
       if: always() && steps.check_results.outputs.results_exist == 'true'
       uses: actions/upload-artifact@v4
       with:
-        name: robot-test-results-xml
-        path: tests/results/output.xml
+        name: robot-test-results-xml-no-api
+        path: tests/results-no-api/output.xml
         retention-days: 30
 
     - name: Upload logs on failure
       if: failure()
       uses: actions/upload-artifact@v4
       with:
-        name: robot-test-logs
+        name: robot-test-logs-no-api
         path: |
+          backends/advanced/logs/*.log
           backends/advanced/.env
           tests/setup/.env.test
         retention-days: 7
@@ -236,12 +233,12 @@ jobs:
     - name: Display test results summary
       if: always()
       run: |
-        if [ -f tests/results/output.xml ]; then
-          echo "Test results generated successfully"
+        if [ -f tests/results-no-api/output.xml ]; then
+          echo "Test results generated successfully (No API Keys mode)"
           echo "========================================"
           python3 << 'PYTHON_SCRIPT'
         import xml.etree.ElementTree as ET
-        tree = ET.parse('tests/results/output.xml')
+        tree = ET.parse('tests/results-no-api/output.xml')
         root = tree.getroot()
         stats = root.find('.//total/stat')
         if stats is not None:
@@ -253,10 +250,12 @@ jobs:
         PYTHON_SCRIPT
           echo "========================================"
           echo ""
+          echo "ℹ️  Tests excluded: requires-api-keys (run on dev/main branches)"
+          echo ""
           echo "📊 FULL TEST REPORTS AVAILABLE:"
           echo "  1. Go to the 'Summary' tab at the top of this page"
           echo "  2. Scroll down to 'Artifacts' section"
-          echo "  3. Download 'robot-test-reports-html'"
+          echo "  3. Download 'robot-test-reports-html-no-api'"
           echo "  4. Extract and open report.html or log.html in your browser"
           echo ""
           echo "The HTML reports provide:"
diff --git a/.gitignore b/.gitignore
index 23141c6b..6fa02d7f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -10,6 +10,10 @@ tests/setup/.env.test
 config/config.yml
 !config/config.yml.template
 
+# Plugins config (contains secrets)
+config/plugins.yml
+!config/plugins.yml.template
+
 # Config backups
 config/*.backup.*
 config/*.backup*
diff --git a/CLAUDE.md b/CLAUDE.md
index 7f5f5507..faed99c2 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -18,7 +18,7 @@ This supports a comprehensive web dashboard for management.
 Chronicle includes an **interactive setup wizard** for easy configuration. The wizard guides you through:
 - Service selection (backend + optional services)
 - Authentication setup (admin account, JWT secrets)
-- Transcription provider configuration (Deepgram, Mistral, or offline ASR)
+- Transcription provider configuration (Deepgram or offline ASR)
 - LLM provider setup (OpenAI or Ollama)
 - Memory provider selection (Chronicle Native with Qdrant or OpenMemory MCP)
 - Network configuration and HTTPS setup
@@ -86,72 +86,54 @@ cp .env.template .env  # Configure environment variables
 sudo rm -rf backends/advanced/data/
 ```
 
-### Testing Infrastructure
+### Running Tests
 
-#### Local Test Scripts
-The project includes simplified test scripts that mirror CI workflows:
+#### Quick Commands
+All test operations are managed through a simple Makefile interface:
 
 ```bash
-# Run all tests from project root
-./run-test.sh [advanced-backend|speaker-recognition|all]
+cd tests
 
-# Advanced backend tests only
-./run-test.sh advanced-backend
+# Full test workflow (recommended)
+make test              # Start containers + run all tests
 
-# Speaker recognition tests only
-./run-test.sh speaker-recognition
+# Or step by step
+make start             # Start test containers (with health checks)
+make test-all          # Run all test suites
+make stop              # Stop containers (preserves volumes)
 
-# Run all test suites (default)
-./run-test.sh all
-```
+# Run specific test suites
+make test-endpoints    # API endpoint tests (~40 tests, fast)
+make test-integration  # End-to-end workflows (~15 tests, slower)
+make test-infra        # Infrastructure resilience (~5 tests)
 
-#### Advanced Backend Integration Tests
-```bash
-cd backends/advanced
+# Quick iteration (reuse existing containers)
+make test-quick        # Run tests without restarting containers
+```
 
-# Requires .env file with DEEPGRAM_API_KEY and OPENAI_API_KEY
-cp .env.template .env  # Configure API keys
+#### Container Management
+All container operations automatically preserve logs before cleanup:
 
-# Run full integration test suite
-./run-test.sh
+```bash
+make start             # Start test containers
+make stop              # Stop containers (keep volumes)
+make restart           # Restart without rebuild
+make rebuild           # Rebuild images + restart (for code changes)
+make containers-clean  # SAVES LOGS → removes everything
+make status            # Show container health
+make logs SERVICE=<name>  # View specific service logs
+```
 
-# Manual test execution (for debugging)
-source .env && export DEEPGRAM_API_KEY && export OPENAI_API_KEY
-uv run robot --outputdir test-results --loglevel INFO ../../tests/integration/integration_test.robot
+**Log Preservation:** All cleanup operations save container logs to `tests/logs/YYYY-MM-DD_HH-MM-SS/`
 
-# Leave test containers running for debugging (don't auto-cleanup)
-CLEANUP_CONTAINERS=false source .env && export DEEPGRAM_API_KEY && export OPENAI_API_KEY
-uv run robot --outputdir test-results --loglevel INFO ../../tests/integration/integration_test.robot
+#### Test Environment
 
-# Manual cleanup when needed
-docker compose -f docker-compose-test.yml down -v
-```
+Test services use isolated ports and database:
+- **Ports:** Backend (8001), MongoDB (27018), Redis (6380), Qdrant (6337/6338)
+- **Database:** `test_db` (separate from production)
+- **Credentials:** `test-admin@example.com` / `test-admin-password-123`
 
-#### Test Configuration Flags
-- **CLEANUP_CONTAINERS** (default: true): Automatically stop and remove test containers after test completion
-  - Set to `false` for debugging: `CLEANUP_CONTAINERS=false ./run-test.sh`
-- **REBUILD** (default: true): Force rebuild containers with latest code changes
-- **FRESH_RUN** (default: true): Start with clean database and fresh containers
-- **TRANSCRIPTION_PROVIDER** (default: deepgram): Choose transcription provider (deepgram or parakeet)
-
-#### Test Environment Variables
-Tests use isolated test environment with overridden credentials:
-- **Test Database**: `test_db` (MongoDB on port 27018, separate from production)
-- **Test Ports**: Backend (8001), Qdrant (6337/6338), WebUI (3001)
-- **Test Credentials**:
-  - `AUTH_SECRET_KEY`: test-jwt-signing-key-for-integration-tests
-  - `ADMIN_EMAIL`: test-admin@example.com
-  - `ADMIN_PASSWORD`: test-admin-password-123
-- **API Keys**: Loaded from `.env` file (DEEPGRAM_API_KEY, OPENAI_API_KEY)
-- **Test Settings**: `DISABLE_SPEAKER_RECOGNITION=true` to prevent segment duplication
-
-#### Test Script Features
-- **Environment Compatibility**: Works with both local .env files and CI environment variables
-- **Isolated Test Environment**: Separate ports and database prevent conflicts with running services
-- **Automatic Cleanup**: Configurable via CLEANUP_CONTAINERS flag (default: true)
-- **Colored Output**: Clear progress indicators and error reporting
-- **Timeout Protection**: 15-minute timeout for advanced backend, 30-minute for speaker recognition
-- **Fresh Testing**: Clean database and containers for each test run
+**For complete test documentation, see `tests/README.md`**
 
 ### Mobile App Development
 ```bash
@@ -185,12 +167,12 @@ docker compose up --build
 ## Architecture Overview
 
 ### Key Components
-- **Audio Pipeline**: Real-time Opus/PCM → Application-level processing → Deepgram/Mistral transcription → memory extraction
+- **Audio Pipeline**: Real-time Opus/PCM → Application-level processing → Deepgram transcription → memory extraction
 - **Wyoming Protocol**: WebSocket communication uses Wyoming protocol (JSONL + binary) for structured audio sessions
 - **Unified Pipeline**: Job-based tracking system for all audio processing (WebSocket and file uploads)
 - **Job Tracker**: Tracks pipeline jobs with stage events (audio → transcription → memory) and completion status
 - **Task Management**: BackgroundTaskManager tracks all async tasks to prevent orphaned processes
-- **Unified Transcription**: Deepgram/Mistral transcription with fallback to offline ASR services
+- **Unified Transcription**: Deepgram transcription with fallback to offline ASR services
 - **Memory System**: Pluggable providers (Chronicle native or OpenMemory MCP)
 - **Authentication**: Email-based login with MongoDB ObjectId user system
 - **Client Management**: Auto-generated client IDs as `{user_id_suffix}-{device_name}`, centralized ClientManager
@@ -206,7 +188,7 @@ Required:
 
 Recommended:
   - Vector Storage: Qdrant (Chronicle provider) or OpenMemory MCP server
-  - Transcription: Deepgram, Mistral, or offline ASR services
+  - Transcription: Deepgram or offline ASR services
 
 Optional:
   - Parakeet ASR: Offline transcription service
@@ -330,12 +312,7 @@ Chronicle supports multiple transcription services:
 TRANSCRIPTION_PROVIDER=deepgram
 DEEPGRAM_API_KEY=your-deepgram-key-here
 
-# Option 2: Mistral (Voxtral models)
-TRANSCRIPTION_PROVIDER=mistral
-MISTRAL_API_KEY=your-mistral-key-here
-MISTRAL_MODEL=voxtral-mini-2507
-
-# Option 3: Local ASR (Parakeet)
+# Option 2: Local ASR (Parakeet)
 PARAKEET_ASR_URL=http://host.docker.internal:8767
 ```
 
@@ -348,12 +325,37 @@ OLLAMA_BASE_URL=http://ollama:11434
 SPEAKER_SERVICE_URL=http://speaker-recognition:8085
 ```
 
+### Plugin Security Architecture
+
+**Three-File Separation**:
+
+1. **backends/advanced/.env** - Secrets (gitignored)
+   ```bash
+   SMTP_PASSWORD=abcdefghijklmnop
+   OPENAI_API_KEY=sk-proj-...
+   ```
+
+2. **config/plugins.yml** - Orchestration (uses env var references)
+   ```yaml
+   plugins:
+     email_summarizer:
+       enabled: true
+       smtp_password: ${SMTP_PASSWORD}  # Reference, not actual value!
+   ```
+
+3. **plugins/{plugin_id}/config.yml** - Non-secret defaults
+   ```yaml
+   subject_prefix: "Conversation Summary"
+   ```
+
+**CRITICAL**: Never hardcode secrets in `config/plugins.yml`. Always use `${ENV_VAR}` syntax.
+
 ## Quick API Reference
 
 ### Common Endpoints
 - **GET /health**: Basic application health check
 - **GET /readiness**: Service dependency validation
-- **WS /ws_pcm**: Primary audio streaming endpoint (Wyoming protocol + raw PCM fallback)
+- **WS /ws**: Audio streaming endpoint with codec parameter (Wyoming protocol, supports pcm and opus codecs)
 - **GET /api/conversations**: User's conversations with transcripts
 - **GET /api/memories/search**: Semantic memory search with relevance scoring
 - **POST /auth/jwt/login**: Email-based login (returns JWT token)
@@ -518,12 +520,11 @@ tailscale ip -4
 - **Docker**: Primary deployment method with docker-compose
 
 ### Testing Strategy
-- **Local Test Scripts**: Simplified scripts (`./run-test.sh`) mirror CI workflows for local development
-- **End-to-End Integration**: Robot Framework tests (`tests/integration/integration_test.robot`) validate complete audio processing pipeline
-- **Speaker Recognition Tests**: `test_speaker_service_integration.py` validates speaker identification
+- **Makefile-Based**: All test operations through simple `make` commands (`make test`, `make start`, `make stop`)
+- **Log Preservation**: Container logs always saved before cleanup (never lose debugging info)
+- **End-to-End Integration**: Robot Framework validates complete audio processing pipeline
 - **Environment Flexibility**: Tests work with both local .env files and CI environment variables
-- **Automated Cleanup**: Test containers are automatically removed after execution
-- **CI/CD Integration**: GitHub Actions use the same local test scripts for consistency
+- **CI/CD Integration**: Same test logic locally and in GitHub Actions
 
 ### Code Style
 - **Python**: Black formatter with 100-character line length, isort for imports
@@ -550,14 +551,10 @@ The system includes comprehensive health checks:
 - Memory debug system for transcript processing monitoring
 
 ### Integration Test Infrastructure
-- **Unified Test Scripts**: Local `./run-test.sh` scripts mirror GitHub Actions workflows
-- **Test Environment**: `docker-compose-test.yml` provides isolated services on separate ports
-- **Test Database**: Uses `test_db` database with isolated collections
-- **Service Ports**: Backend (8001), MongoDB (27018), Qdrant (6335/6336), WebUI (5174)
-- **Test Credentials**: Auto-generated `.env.test` files with secure test configurations
-- **Ground Truth**: Expected transcript established via `scripts/test_deepgram_direct.py`
-- **AI Validation**: OpenAI-powered transcript similarity comparison
-- **Test Audio**: 4-minute glass blowing tutorial (`extras/test-audios/DIY*mono*.wav`)
+- **Makefile Interface**: Simple `make` commands for all operations (see `tests/README.md`)
+- **Test Environment**: `docker-compose-test.yml` with isolated services on separate ports
+- **Test Database**: Uses `test_db` database (separate from production)
+- **Log Preservation**: All cleanup operations save logs to `tests/logs/` automatically
 - **CI Compatibility**: Same test logic runs locally and in GitHub Actions
 
 ### Cursor Rule Integration
diff --git a/Docs/audio-pipeline-architecture.md b/Docs/audio-pipeline-architecture.md
new file mode 100644
index 00000000..afba52db
--- /dev/null
+++ b/Docs/audio-pipeline-architecture.md
@@ -0,0 +1,1241 @@
+# Audio Pipeline Architecture
+
+This document explains how audio flows through the Chronicle system from initial capture to final storage, including all intermediate processing stages, Redis streams, and data storage locations.
+
+## Table of Contents
+
+- [Overview](#overview)
+- [Architecture Diagram](#architecture-diagram)
+- [Data Sources](#data-sources)
+- [Redis Streams: The Central Pipeline](#redis-streams-the-central-pipeline)
+- [Producer: AudioStreamProducer](#producer-audiostreamproducer)
+- [Dual-Consumer Architecture](#dual-consumer-architecture)
+- [Transcription Results Aggregator](#transcription-results-aggregator)
+- [Job Queue Orchestration (RQ)](#job-queue-orchestration-rq)
+- [Data Storage](#data-storage)
+- [Complete End-to-End Flow](#complete-end-to-end-flow)
+- [Key Design Patterns](#key-design-patterns)
+- [Failure Handling](#failure-handling)
+
+## Overview
+
+Chronicle's audio pipeline is built on three core technologies:
+
+- **Redis Streams**: Distributed message queues for audio chunks and transcription results
+- **Background Tasks**: Async consumers that process streams independently
+- **RQ Job Queue**: Orchestrates session-level and conversation-level workflows
+
+**Key Insight**: Multiple workers can independently consume the **same audio stream** using Redis Consumer Groups, enabling parallel processing paths (transcription + disk persistence) without duplication.
+
+## Architecture Diagram
+
+```
+┌─────────────────────────────────────────────────────────────────┐
+│                        AUDIO INPUT                              │
+│  WebSocket (/ws) │ File Upload (/audio/upload) │ Google Drive  │
+└────────────────────────────────┬────────────────────────────────┘
+                                 ↓
+                    ┌────────────────────────┐
+                    │  AudioStreamProducer   │
+                    │  - Chunk audio (0.25s) │
+                    │  - Session metadata    │
+                    └────────────┬───────────┘
+                                 ↓
+                    ┌────────────────────────────────┐
+                    │  Redis Stream (Per Client)     │
+                    │  audio:stream:{client_id}      │
+                    └─────┬──────────────────┬───────┘
+                          ↓                  ↓
+          ┌───────────────────────┐  ┌──────────────────────┐
+          │ Transcription Consumer│  │ Audio Persistence    │
+          │ Group (streaming/batch)│  │ Consumer Group       │
+          │                       │  │                      │
+          │ → Deepgram WebSocket  │  │ → Writes WAV files   │
+          │ → Batch buffering     │  │ → Monitors rotation  │
+          │ → Publish results     │  │ → Stores file paths  │
+          └───────────┬───────────┘  └──────────┬───────────┘
+                      ↓                          ↓
+          ┌───────────────────────┐  ┌──────────────────────┐
+          │ transcription:results │  │ Disk Storage         │
+          │ :{session_id}         │  │ data/chunks/*.wav    │
+          └───────────┬───────────┘  └──────────────────────┘
+                      ↓
+          ┌───────────────────────┐
+          │ TranscriptionResults  │
+          │ Aggregator            │
+          │ - Combines chunks     │
+          │ - Merges timestamps   │
+          └───────────┬───────────┘
+                      ↓
+          ┌───────────────────────┐
+          │   RQ Job Pipeline     │
+          ├───────────────────────┤
+          │ speech_detection_job  │ ← Session-level
+          │         ↓             │
+          │ open_conversation_job │ ← Conversation-level
+          │         ↓             │
+          │ Post-Conversation:    │
+          │ • transcribe_full     │
+          │ • speaker_recognition │
+          │ • memory_extraction   │
+          │ • title_generation    │
+          └───────────┬───────────┘
+                      ↓
+          ┌───────────────────────┐
+          │   Final Storage       │
+          ├───────────────────────┤
+          │ MongoDB: conversations│
+          │ Disk: WAV files       │
+          │ Qdrant: Memories      │
+          └───────────────────────┘
+```
+
+## Data Sources
+
+### 1. WebSocket Streaming (`/ws`)
+
+**Endpoint**: `/ws?codec=pcm|opus&token=xxx&device_name=xxx`
+
+**Handlers**:
+- `handle_pcm_websocket()` - Raw PCM audio
+- `handle_omi_websocket()` - Opus-encoded audio (compressed, used by OMI devices)
+
+**Protocol**: Wyoming Protocol (JSON lines + binary frames)
+
+**Authentication**: JWT token required
+
+**Location**: `backends/advanced/src/advanced_omi_backend/routers/websocket_routes.py`
+
+**Container**: `chronicle-backend`
+
+### 2. File Upload (`/audio/upload`)
+
+**Endpoint**: `POST /api/audio/upload`
+
+**Accepts**: Multiple WAV files (multipart form data)
+
+**Authentication**: Admin only
+
+**Device ID**: Auto-generated as `{user_id_suffix}-upload` or custom `device_name`
+
+**Location**: `backends/advanced/src/advanced_omi_backend/routers/api_router.py`
+
+**Container**: `chronicle-backend`
+
+### 3. Google Drive Upload
+
+**Endpoint**: `POST /api/audio/upload_audio_from_gdrive`
+
+**Source**: Google Drive folder ID
+
+**Processing**: Downloads files and enqueues for processing
+
+**Container**: `chronicle-backend`
+
+## Redis Streams: The Central Pipeline
+
+### Stream Naming Convention
+
+```
+audio:stream:{client_id}
+```
+
+**Examples**:
+- `audio:stream:user01-phone`
+- `audio:stream:user01-omi-device`
+- `audio:stream:user01-upload`
+
+**Characteristics**:
+- **Client-specific isolation**: Each device has its own stream
+- **Fan-out pattern**: Multiple consumer groups read the same stream
+- **MAXLEN constraint**: Keeps last 25,000 entries (auto-trimming)
+- **No TTL**: Streams persist until manually deleted
+- **Container**: `redis` service
+
+### Session Metadata Storage
+
+```
+audio:session:{session_id}
+```
+
+**Type**: Redis Hash
+
+**Fields**:
+- `user_id`: MongoDB ObjectId
+- `client_id`: Device identifier
+- `connection_id`: WebSocket connection ID
+- `stream_name`: `audio:stream:{client_id}`
+- `status`: `"active"` → `"finalizing"` → `"complete"`
+- `chunks_published`: Integer count
+- `speech_detection_job_id`: RQ job ID
+- `audio_persistence_job_id`: RQ job ID
+- `websocket_connected`: `true|false`
+- `transcription_error`: Error message (if any)
+
+**TTL**: 1 hour
+
+**Container**: `redis`
+
+### Transcription Results Stream
+
+```
+transcription:results:{session_id}
+```
+
+**Type**: Redis Stream
+
+**Written by**: Transcription consumers (streaming or batch)
+
+**Read by**: `TranscriptionResultsAggregator`
+
+**Message Fields**:
+- `text`: Transcribed text for this chunk
+- `chunk_id`: Redis message ID from audio stream
+- `provider`: `"deepgram"` or `"parakeet"`
+- `confidence`: Float (0.0-1.0)
+- `words`: JSON array of word-level timestamps
+- `segments`: JSON array of speaker segments
+
+**Lifecycle**: Deleted when conversation completes
+
+**Container**: `redis`
+
+### Conversation Tracking
+
+```
+conversation:current:{session_id}
+```
+
+**Type**: Redis String
+
+**Value**: Current `conversation_id` (UUID)
+
+**Purpose**: Signals audio persistence job to rotate WAV file
+
+**TTL**: 24 hours
+
+**Container**: `redis`
+
+### Audio File Path Mapping
+
+```
+audio:file:{conversation_id}
+```
+
+**Type**: Redis String
+
+**Value**: File path (e.g., `1704067200000_user01-phone_convid.wav`)
+
+**Purpose**: Links conversation to its audio file on disk
+
+**TTL**: 24 hours
+
+**Container**: `redis`
+
+## Producer: AudioStreamProducer
+
+**File**: `backends/advanced/src/advanced_omi_backend/services/audio_stream/producer.py`
+
+**Container**: `chronicle-backend` (in-memory, no persistence)
+
+### Responsibilities
+
+#### 1. Session Initialization
+
+```python
+async def init_session(
+    session_id: str,
+    user_id: str,
+    client_id: str,
+    provider: str,
+    mode: str
+) -> None
+```
+
+**Actions**:
+- Creates `audio:session:{session_id}` hash in Redis
+- Initializes in-memory buffer for chunking
+- Stores session metadata (user, client, provider)
+
+#### 2. Audio Chunking
+
+```python
+async def add_audio_chunk(
+    session_id: str,
+    audio_data: bytes
+) -> list[str]
+```
+
+**Process**:
+1. Buffers incoming audio (arbitrary size from WebSocket)
+2. Creates **fixed-size chunks**: 0.25 seconds = 8,000 bytes
+   - Assumes: 16kHz sample rate, 16-bit mono PCM
+3. Prevents cutting audio mid-word (aligned chunks)
+4. Publishes each chunk to `audio:stream:{client_id}` via `XADD`
+5. Returns Redis message IDs for tracking
+
+**In-Memory Storage**: Session buffers stored in `AudioStreamProducer._session_buffers` dict
+
+#### 3. Session End Signal
+
+```python
+async def send_session_end_signal(session_id: str) -> None
+```
+
+**Actions**:
+- Publishes special `{"type": "END"}` message to stream
+- Signals all consumers to flush buffers and finalize
+- Updates session status to `"finalizing"`
+
+### Data Location
+
+**Memory**: `chronicle-backend` container (in-memory buffers)
+
+**Redis**: Published chunks in `audio:stream:{client_id}` (redis container)
+
+## Dual-Consumer Architecture
+
+Chronicle uses **Redis Consumer Groups** to enable multiple independent consumers to read the **same audio stream** without message duplication.
+
+### Consumer Group 1: Transcription
+
+Two implementations available:
+
+#### A. Streaming Transcription Consumer
+
+**File**: `backends/advanced/src/advanced_omi_backend/services/transcription/streaming_consumer.py`
+
+**Class**: `StreamingTranscriptionConsumer`
+
+**Consumer Group**: `streaming-transcription`
+
+**Provider**: Deepgram (WebSocket-based)
+
+**Process**:
+1. Discovers `audio:stream:*` streams dynamically using `SCAN`
+2. Opens persistent WebSocket connection to Deepgram per stream
+3. Sends audio chunks **immediately** (no buffering)
+4. Publishes **interim results** to `transcription:interim:{session_id}` (Redis Pub/Sub)
+5. Publishes **final results** to `transcription:results:{session_id}` (Redis Stream)
+6. Triggers plugins on final results only
+7. ACKs messages with `XACK` to prevent reprocessing
+8. Handles END signal: closes WebSocket, cleans up
+
+**Container**: `chronicle-backend` (Background Task via `BackgroundTaskManager`)
+
+**Real-time Updates**: Interim results pushed to WebSocket clients via Pub/Sub
+
+#### B. Batch Transcription Consumer
+
+**File**: `backends/advanced/src/advanced_omi_backend/services/audio_stream/consumer.py`
+
+**Class**: `BaseAudioStreamConsumer`
+
+**Consumer Group**: `{provider_name}_workers` (e.g., `deepgram_workers`, `parakeet_workers`)
+
+**Providers**: Deepgram (batch), Parakeet ASR (offline)
+
+**Process**:
+1. Reads from `audio:stream:{client_id}` using `XREADGROUP`
+2. Buffers chunks per session (default: 30 chunks = ~7.5 seconds)
+3. When buffer full:
+   - Combines chunks into single audio buffer
+   - Transcribes using provider API
+   - Adjusts word/segment timestamps relative to session start
+   - Publishes result to `transcription:results:{session_id}`
+4. Flushes remaining buffer on END signal
+5. ACKs all buffered messages with `XACK`
+6. Trims stream to keep only last 1,000 entries (`XTRIM MAXLEN`)
+
+**Container**: `chronicle-backend` (Background Task)
+
+**Batching Benefits**: Reduces API calls, improves transcription accuracy (more context)
+
+### Consumer Group 2: Audio Persistence
+
+**File**: `backends/advanced/src/advanced_omi_backend/workers/audio_jobs.py`
+
+**Function**: `audio_streaming_persistence_job()`
+
+**Consumer Group**: `audio_persistence`
+
+**Consumer Name**: `persistence-worker-{session_id}`
+
+**Process**:
+1. Reads audio chunks from `audio:stream:{client_id}` using `XREADGROUP`
+2. Monitors `conversation:current:{session_id}` for rotation signals
+3. On conversation rotation:
+   - Closes current WAV file
+   - Opens new WAV file with new conversation ID
+4. Writes chunks immediately to disk (real-time persistence)
+5. Stores file path in `audio:file:{conversation_id}` (Redis)
+6. Handles END signal: closes file, returns statistics
+7. ACKs messages after writing to disk
+
+**Container**: `chronicle-backend` (RQ Worker)
+
+**Output Location**: `backends/advanced/data/chunks/` (volume-mounted)
+
+**File Format**: `{timestamp_ms}_{client_id}_{conversation_id}.wav`
+
+### Fan-Out Pattern Visualization
+
+```
+audio:stream:user01-phone
+    ↓
+    ├─ Consumer Group: "streaming-transcription"
+    │  └─ Worker: streaming-worker-12345
+    │     → Reads: chunks → Deepgram WS → Results stream
+    │
+    ├─ Consumer Group: "deepgram_workers"
+    │  ├─ Worker: deepgram-worker-67890
+    │  ├─ Worker: deepgram-worker-67891
+    │  └─ Reads: chunks → Buffer (30) → Batch API → Results stream
+    │
+    └─ Consumer Group: "audio_persistence"
+       └─ Worker: persistence-worker-sessionXYZ
+          → Reads: chunks → WAV file (disk)
+```
+
+**Key Benefits**:
+- **Horizontal scaling**: Multiple workers per group
+- **Independent processing**: Each group processes all messages
+- **No message loss**: Messages ACKed only after processing
+- **Decoupled**: Producer doesn't know about consumers
+
+## Transcription Results Aggregator
+
+**File**: `backends/advanced/src/advanced_omi_backend/services/audio_stream/aggregator.py`
+
+**Class**: `TranscriptionResultsAggregator`
+
+**Container**: `chronicle-backend` (in-memory, stateless)
+
+### Methods
+
+#### Get Combined Results
+
+```python
+async def get_combined_results(session_id: str) -> dict
+```
+
+**Returns**:
+```python
+{
+    "text": "Full transcript...",
+    "segments": [SpeakerSegment, ...],
+    "words": [Word, ...],
+    "provider": "deepgram",
+    "chunk_count": 42
+}
+```
+
+**Process**:
+- Reads all entries from `transcription:results:{session_id}`
+- For **streaming mode**: Uses latest final result only (supersedes interim)
+- For **batch mode**: Combines all chunks sequentially
+- Adjusts timestamps across chunks (adds audio offset)
+- Merges speaker segments, words
+
+#### Get Session Results (Raw)
+
+```python
+async def get_session_results(session_id: str) -> list[dict]
+```
+
+**Returns**: Raw list of transcription result messages
+
+#### Get Real-time Results
+
+```python
+async def get_realtime_results(
+    session_id: str,
+    last_id: str = "0-0"
+) -> tuple[list[dict], str]
+```
+
+**Returns**: `(new_results, new_last_id)`
+
+**Purpose**: Incremental polling for live UI updates
+
+### Data Location
+
+**Input**: `transcription:results:{session_id}` stream (redis container)
+
+**Processing**: In-memory (chronicle-backend container)
+
+**Output**: Returned to caller (no persistence)
+
+## Job Queue Orchestration (RQ)
+
+**Library**: Python RQ (Redis Queue)
+
+**File**: `backends/advanced/src/advanced_omi_backend/controllers/queue_controller.py`
+
+**Containers**:
+- `chronicle-backend` (enqueues jobs)
+- `rq-worker` (executes jobs)
+
+### Job Pipeline
+
+```
+Session Starts
+    ↓
+┌─────────────────────────────────┐
+│ stream_speech_detection_job     │ ← Session-level (long-running)
+│ - Polls transcription results   │
+│ - Analyzes speech content       │
+│ - Checks speaker filters        │
+└─────────────┬───────────────────┘
+              ↓ (when speech detected)
+┌─────────────────────────────────┐
+│ open_conversation_job           │ ← Conversation-level (long-running)
+│ - Creates conversation          │
+│ - Signals file rotation         │
+│ - Monitors activity             │
+│ - Detects end conditions        │
+└─────────────┬───────────────────┘
+              ↓ (when conversation ends)
+┌─────────────────────────────────┐
+│ Post-Conversation Pipeline      │
+├─────────────────────────────────┤
+│ • recognize_speakers_job        │
+│ • memory_extraction_job         │
+│ • generate_title_summary_job    │
+│ • dispatch_conversation_complete│
+└─────────────────────────────────┘
+```
+
+### Session-Level Jobs
+
+#### Speech Detection Job
+
+**File**: `backends/advanced/src/advanced_omi_backend/workers/transcription_jobs.py`
+
+**Function**: `stream_speech_detection_job()`
+
+**Scope**: Entire session (can handle multiple conversations)
+
+**Max Duration**: 24 hours
+
+**Process**:
+1. Polls `TranscriptionResultsAggregator.get_combined_results()` (1-second intervals)
+2. Analyzes speech content:
+   - Word count > 10
+   - Duration > 5 seconds
+   - Confidence > threshold
+3. If speaker filter enabled: checks for enrolled speakers
+4. When speech detected:
+   - Creates conversation in MongoDB
+   - Enqueues `open_conversation_job`
+   - **Exits** (restarts when conversation completes)
+5. Handles transcription errors (marks session with error flag)
+
+**RQ Queue**: `speech_detection_queue` (dedicated queue)
+
+**Container**: `rq-worker`
+
+### Conversation-Level Jobs
+
+#### Open Conversation Job
+
+**File**: `backends/advanced/src/advanced_omi_backend/workers/conversation_jobs.py`
+
+**Function**: `open_conversation_job()`
+
+**Scope**: Single conversation
+
+**Max Duration**: 3 hours
+
+**Process**:
+1. Creates conversation document in MongoDB `conversations` collection
+2. Sets `conversation:current:{session_id}` = `conversation_id` (Redis)
+   - **Triggers audio persistence job to rotate WAV file**
+3. Polls for transcription updates (1-second intervals)
+4. Tracks speech activity (inactivity timeout = 60 seconds default)
+5. Detects end conditions:
+   - WebSocket disconnect
+   - User manual stop
+   - Inactivity timeout
+6. Waits for audio file path from persistence job
+7. Saves `audio_path` to conversation document
+8. Triggers conversation-level plugins
+9. Enqueues post-conversation jobs
+10. Calls `handle_end_of_conversation()` for cleanup + restart
+
+**RQ Queue**: `default`
+
+**Container**: `rq-worker`
+
+#### Audio Persistence Job
+
+**File**: `backends/advanced/src/advanced_omi_backend/workers/audio_jobs.py`
+
+**Function**: `audio_streaming_persistence_job()`
+
+**Scope**: Entire session (parallel with open_conversation_job)
+
+**Max Duration**: 24 hours
+
+**Process**:
+1. Monitors `conversation:current:{session_id}` for rotation signals
+2. For each conversation:
+   - Opens new WAV file: `{timestamp}_{client_id}_{conversation_id}.wav`
+   - Writes chunks immediately as they arrive from stream
+   - Stores file path in `audio:file:{conversation_id}`
+3. On rotation signal:
+   - Closes current file
+   - Opens new file for next conversation
+4. On END signal:
+   - Closes file
+   - Returns statistics (chunk count, bytes, duration)
+
+**Output**: WAV files in `backends/advanced/data/chunks/`
+
+**Container**: `rq-worker`
+
+### Post-Conversation Pipeline
+
+**Streaming conversations**: Use streaming transcript saved during conversation. No batch re-transcription.
+
+**File uploads**: Batch transcription job runs first, then post-conversation jobs depend on it.
+
+#### 1. Recognize Speakers Job
+
+**File**: `backends/advanced/src/advanced_omi_backend/workers/transcription_jobs.py`
+
+**Function**: `recognize_speakers_job()`
+
+**Process**:
+- Sends audio + segments to speaker recognition service
+- Identifies speakers using voice embeddings
+- Updates segment speaker labels in MongoDB
+
+**Optional**: Only runs if `DISABLE_SPEAKER_RECOGNITION=false`
+
+**Container**: `rq-worker`
+
+**External Service**: `speaker-recognition` container (if enabled)
+
+#### 2. Memory Extraction Job
+
+**File**: `backends/advanced/src/advanced_omi_backend/workers/memory_jobs.py`
+
+**Function**: `memory_extraction_job()`
+
+**Prerequisite**: Speaker recognition job
+
+**Process**:
+- Uses LLM (OpenAI/Ollama) to extract semantic facts
+- Stores embeddings in vector database:
+  - **Chronicle provider**: Qdrant
+  - **OpenMemory MCP provider**: External OpenMemory server
+
+**Container**: `rq-worker`
+
+**External Services**:
+- `ollama` or OpenAI API (LLM)
+- `qdrant` or OpenMemory MCP (vector storage)
+
+#### 3. Generate Title Summary Job
+
+**File**: `backends/advanced/src/advanced_omi_backend/workers/conversation_jobs.py`
+
+**Function**: `generate_title_summary_job()`
+
+**Prerequisite**: Speaker recognition job
+
+**Process**:
+- Uses LLM to generate title, summary, detailed summary
+- Updates conversation document in MongoDB
+
+**Container**: `rq-worker`
+
+#### 4. Dispatch Conversation Complete Event
+
+**File**: `backends/advanced/src/advanced_omi_backend/workers/conversation_jobs.py`
+
+**Function**: `dispatch_conversation_complete_event_job()`
+
+**Process**:
+- Triggers `conversation.complete` plugin event
+
+**Container**: `rq-worker`
+
+#### Batch Transcription Job
+
+**File**: `backends/advanced/src/advanced_omi_backend/workers/transcription_jobs.py`
+
+**Function**: `transcribe_full_audio_job()`
+
+**When used**:
+- File uploads via `/api/process-audio-files`
+- Manual reprocessing via `/api/conversations/{id}/reprocess-transcript`
+- NOT used for streaming conversations
+
+**Process**:
+- Reconstructs audio from MongoDB chunks
+- Batch transcribes entire audio
+- Stores transcript with word-level timestamps
+
+**Container**: `rq-worker`
+
+### Session Restart
+
+**File**: `backends/advanced/src/advanced_omi_backend/utils/conversation_utils.py`
+
+**Function**: `handle_end_of_conversation()`
+
+**Process**:
+1. Deletes transcription results stream: `transcription:results:{session_id}`
+2. Increments `session:conversation_count:{session_id}`
+3. Checks if session still active (WebSocket connected)
+4. If active: Re-enqueues `stream_speech_detection_job` for next conversation
+5. Cleans up consumer groups and pending messages
+
+**Purpose**: Allows continuous recording with multiple conversations per session
+
+## Data Storage
+
+### MongoDB Collections
+
+**Database**: `chronicle`
+
+**Container**: `mongo`
+
+**Volume**: `mongodb_data` (persistent)
+
+#### `conversations` Collection
+
+**Schema**:
+```python
+{
+    "_id": ObjectId,
+    "conversation_id": "uuid-string",
+    "audio_uuid": "session_id",
+    "user_id": ObjectId,
+    "client_id": "user01-phone",
+
+    # Content
+    "title": "Meeting notes",
+    "summary": "Discussion about...",
+    "detailed_summary": "Longer summary...",
+    "transcript": "Full transcript text",
+    "audio_path": "1704067200000_user01-phone_convid.wav",
+
+    # Versioned Transcripts
+    "active_transcript_version": "v1",
+    "transcript_versions": {
+        "v1": {
+            "text": "Full transcript",
+            "segments": [SpeakerSegment],
+            "words": [Word],
+            "provider": "deepgram",
+            "processing_time_seconds": 45.2,
+            "created_at": "2025-01-11T12:00:00Z"
+        }
+    },
+    "segments": [SpeakerSegment],  # From active version
+
+    # Metadata
+    "created_at": "2025-01-11T12:00:00Z",
+    "completed_at": "2025-01-11T12:15:00Z",
+    "end_reason": "user_stopped|inactivity_timeout|websocket_disconnect",
+    "deleted": false
+}
+```
+
+**Indexes**:
+- `user_id` (for user-scoped queries)
+- `client_id` (for device filtering)
+- `conversation_id` (unique)
+
+#### `audio_chunks` Collection
+
+**Purpose**: Stores raw audio session data
+
+**Schema**:
+```python
+{
+    "_id": ObjectId,
+    "audio_uuid": "session_id",
+    "user_id": ObjectId,
+    "client_id": "user01-phone",
+    "created_at": "2025-01-11T12:00:00Z",
+    "metadata": { ... }
+}
+```
+
+**Use Case**: Speech-driven architecture (sessions without conversations)
+
+#### `users` Collection
+
+**Purpose**: User accounts, authentication, preferences
+
+**Schema**:
+```python
+{
+    "_id": ObjectId,
+    "email": "user@example.com",
+    "hashed_password": "...",
+    "is_active": true,
+    "is_superuser": false,
+    "created_at": "2025-01-11T12:00:00Z"
+}
+```
+
+### Disk Storage
+
+**Location**: `backends/advanced/data/chunks/`
+
+**Container**: `chronicle-backend` (volume-mounted)
+
+**Volume**: `./backends/advanced/data/chunks:/app/data/chunks`
+
+**File Format**: WAV files
+
+**Naming Convention**: `{timestamp_ms}_{client_id}_{conversation_id}.wav`
+
+**Example**: `1704067200000_user01-phone_550e8400-e29b-41d4-a716-446655440000.wav`
+
+**Created by**: `audio_streaming_persistence_job()`
+
+**Read by**: Post-conversation transcription jobs
+
+**Retention**: Manual cleanup (no automatic deletion)
+
+### Redis Storage
+
+**Container**: `redis`
+
+**Volume**: `redis_data` (persistent)
+
+| Key Pattern | Type | Purpose | TTL | Created By |
+|-------------|------|---------|-----|------------|
+| `audio:stream:{client_id}` | Stream | Audio chunks for transcription | None (MAXLEN=25k) | AudioStreamProducer |
+| `audio:session:{session_id}` | Hash | Session metadata | 1 hour | AudioStreamProducer |
+| `transcription:results:{session_id}` | Stream | Transcription results | Manual delete | Transcription consumers |
+| `transcription:interim:{session_id}` | Pub/Sub | Real-time interim results | N/A (ephemeral) | Streaming consumer |
+| `conversation:current:{session_id}` | String | Current conversation ID | 24 hours | open_conversation_job |
+| `audio:file:{conversation_id}` | String | Audio file path | 24 hours | audio_persistence_job |
+| `session:conversation_count:{session_id}` | Counter | Conversation count | 1 hour | handle_end_of_conversation |
+| `speech_detection_job:{client_id}` | String | Job ID for cleanup | 1 hour | speech_detection_job |
+| `rq:job:{job_id}` | Hash | RQ job metadata | 24 hours (default) | RQ |
+
+### Vector Storage (Memory)
+
+#### Option A: Qdrant (Chronicle Native Provider)
+
+**Container**: `qdrant`
+
+**Volume**: `qdrant_data` (persistent)
+
+**Ports**: 6333 (HTTP), 6334 (gRPC)
+
+**Collections**: User-specific collections for semantic embeddings
+
+**Written by**: `memory_extraction_job()`
+
+**Read by**: Memory search API (`/api/memories/search`)
+
+#### Option B: OpenMemory MCP
+
+**Container**: `openmemory-mcp` (external service)
+
+**Port**: 8765
+
+**Protocol**: MCP (Model Context Protocol)
+
+**Collections**: Cross-client memory storage
+
+**Written by**: `memory_extraction_job()` (via MCP provider)
+
+**Read by**: Memory search API (via MCP provider)
+
+## Complete End-to-End Flow
+
+### Step-by-Step Data Journey
+
+```
+┌─────────────────────────────────────────────────────────────────┐
+│ 1. AUDIO INPUT                                                  │
+└─────────────────────────────────────────────────────────────────┘
+  WebSocket (/ws) or File Upload (/audio/upload)
+  ↓
+  Container: chronicle-backend
+  ↓
+  AudioStreamProducer.init_session()
+  - Creates: audio:session:{session_id} (Redis)
+  - Initializes: In-memory buffer (chronicle-backend container)
+  ↓
+  AudioStreamProducer.add_audio_chunk()
+  - Buffers: In-memory (chronicle-backend)
+  - Chunks: Fixed 0.25s chunks (8,000 bytes)
+  - Publishes: audio:stream:{client_id} (Redis)
+  - Returns: Redis message IDs
+
+┌─────────────────────────────────────────────────────────────────┐
+│ 2. SESSION-LEVEL JOB (RQ)                                       │
+└─────────────────────────────────────────────────────────────────┘
+  stream_speech_detection_job
+  Container: rq-worker
+  ↓
+  Polls: TranscriptionResultsAggregator.get_combined_results()
+  Reads: transcription:results:{session_id} (Redis)
+  ↓
+  Analyzes: Word count, duration, confidence
+  ↓
+  When speech detected:
+    - Creates: Conversation document (MongoDB)
+    - Enqueues: open_conversation_job (RQ)
+    - Exits (restarts when conversation ends)
+
+┌─────────────────────────────────────────────────────────────────┐
+│ 3a. TRANSCRIPTION CONSUMER (Background Task)                    │
+└─────────────────────────────────────────────────────────────────┘
+  StreamingTranscriptionConsumer (or BaseAudioStreamConsumer)
+  Container: chronicle-backend (Background Task)
+  ↓
+  Reads: audio:stream:{client_id} (Redis, via XREADGROUP)
+  Consumer Group: streaming-transcription (or batch provider)
+  ↓
+  STREAMING PATH:
+    • Opens: WebSocket to Deepgram
+    • Sends: Chunks immediately (no buffering)
+    • Publishes Interim: transcription:interim:{session_id} (Redis Pub/Sub)
+    • Publishes Final: transcription:results:{session_id} (Redis Stream)
+    • Triggers: Plugins on final results
+
+  BATCH PATH:
+    • Buffers: 30 chunks (~7.5s) in memory (chronicle-backend)
+    • Combines: All buffered chunks
+    • Transcribes: Via provider API (Deepgram/Parakeet)
+    • Adjusts: Timestamps relative to session start
+    • Publishes: transcription:results:{session_id} (Redis Stream)
+
+┌─────────────────────────────────────────────────────────────────┐
+│ 3b. AUDIO PERSISTENCE CONSUMER (RQ Job)                         │
+└─────────────────────────────────────────────────────────────────┘
+  audio_streaming_persistence_job
+  Container: rq-worker
+  ↓
+  Reads: audio:stream:{client_id} (Redis, via XREADGROUP)
+  Consumer Group: audio_persistence
+  ↓
+  Monitors: conversation:current:{session_id} (Redis)
+  ↓
+  For each conversation:
+    • Opens: New WAV file (data/chunks/, chronicle-backend volume)
+    • Writes: Chunks immediately (real-time)
+    • Stores: audio:file:{conversation_id} = path (Redis)
+  ↓
+  On rotation signal:
+    • Closes: Current file
+    • Opens: New file for next conversation
+  ↓
+  On END signal:
+    • Closes: File
+    • Returns: Statistics (chunks, bytes, duration)
+
+┌─────────────────────────────────────────────────────────────────┐
+│ 4. CONVERSATION-LEVEL JOB (RQ)                                  │
+└─────────────────────────────────────────────────────────────────┘
+  open_conversation_job
+  Container: rq-worker
+  ↓
+  Creates: Conversation document (MongoDB conversations collection)
+  ↓
+  Sets: conversation:current:{session_id} = conversation_id (Redis)
+    → Triggers audio persistence job to rotate WAV file
+  ↓
+  Polls: TranscriptionResultsAggregator for updates (1s intervals)
+  Reads: transcription:results:{session_id} (Redis)
+  ↓
+  Tracks: Speech activity (inactivity timeout = 60s)
+  ↓
+  Detects End:
+    - Inactivity (60s)
+    - User manual stop
+    - WebSocket disconnect
+  ↓
+  Waits: For audio file path from persistence job
+  Reads: audio:file:{conversation_id} (Redis)
+  ↓
+  Saves: audio_path to conversation document (MongoDB)
+  ↓
+  Enqueues: POST-CONVERSATION PIPELINE (RQ)
+
+┌─────────────────────────────────────────────────────────────────┐
+│ 5. POST-CONVERSATION PIPELINE (RQ - Parallel Jobs)              │
+└─────────────────────────────────────────────────────────────────┘
+  All jobs run in parallel
+  Container: rq-worker
+  ↓
+  Reads: Audio file from disk (data/chunks/*.wav)
+
+  ┌─ transcribe_full_audio_job
+  │  - Batch transcribes: Complete audio file
+  │  - Validates: Meaningful speech
+  │  - Marks deleted: If no speech
+  │  - Stores: MongoDB (transcript, segments, words)
+  │
+  │  └─ recognize_speakers_job (if enabled)
+  │     - Sends: Audio + segments to speaker-recognition service
+  │     - Identifies: Speakers via voice embeddings
+  │     - Updates: MongoDB (segment speaker labels)
+  │
+  │  └─ memory_extraction_job
+  │     - Uses: LLM (OpenAI/Ollama) to extract facts
+  │     - Stores: Qdrant (Chronicle) or OpenMemory MCP (vector DB)
+  │
+  └─ generate_title_summary_job
+     - Uses: LLM (OpenAI/Ollama)
+     - Generates: Title, summary, detailed_summary
+     - Stores: MongoDB (conversation document)
+
+  └─ dispatch_conversation_complete_event_job
+     - Triggers: conversation.complete plugins
+     - Only for: File uploads (not streaming)
+
+  All results stored: MongoDB conversations collection
+
+┌─────────────────────────────────────────────────────────────────┐
+│ 6. SESSION RESTART                                              │
+└─────────────────────────────────────────────────────────────────┘
+  handle_end_of_conversation()
+  Container: chronicle-backend
+  ↓
+  Deletes: transcription:results:{session_id} (Redis)
+  ↓
+  Increments: session:conversation_count:{session_id} (Redis)
+  ↓
+  Checks: Session still active? (WebSocket connected)
+  ↓
+  If active:
+    - Re-enqueues: stream_speech_detection_job (RQ)
+    - Session remains: "active" for next conversation
+```
+
+### Data Locations Summary
+
+| Stage | Data Type | Location | Container |
+|-------|-----------|----------|-----------|
+| Input | Audio bytes | In-memory buffers | chronicle-backend |
+| Producer | Fixed chunks | `audio:stream:{client_id}` | redis |
+| Session metadata | Hash | `audio:session:{session_id}` | redis |
+| Transcription consumer | Interim results | `transcription:interim:{session_id}` (Pub/Sub) | redis |
+| Transcription consumer | Final results | `transcription:results:{session_id}` (Stream) | redis |
+| Audio persistence | WAV files | `data/chunks/*.wav` (disk volume) | chronicle-backend (volume) |
+| Audio persistence | File paths | `audio:file:{conversation_id}` | redis |
+| Conversation job | Conversation doc | MongoDB `conversations` | mongo |
+| Post-processing | Transcript | MongoDB `conversations` | mongo |
+| Post-processing | Memories | Qdrant or OpenMemory MCP | qdrant / openmemory-mcp |
+| Post-processing | Title/summary | MongoDB `conversations` | mongo |
+
+## Key Design Patterns
+
+### 1. Speech-Driven Architecture
+
+**Principle**: Conversations only created when speech is detected
+
+**Benefits**:
+- Clean user experience (no noise-only sessions in UI)
+- Reduced memory processing load
+- Automatic quality filtering
+
+**Implementation**:
+- `audio_chunks` collection: Always stores sessions
+- `conversations` collection: Only created with speech
+- Speech detection: Analyzes word count, duration, confidence
+
+### 2. Versioned Processing
+
+**Principle**: Store multiple versions of transcripts/memories
+
+**Benefits**:
+- Reprocess without losing originals
+- A/B testing different providers
+- Rollback to previous versions
+
+**Implementation**:
+- `transcript_versions` dict with version IDs (v1, v2, ...)
+- `active_transcript_version` pointer
+- `segments` field mirrors active version (quick access)
+
+### 3. Session-Level vs Conversation-Level
+
+**Session**: WebSocket connection lifetime (multiple conversations)
+- Duration: Up to 24 hours
+- Job: `stream_speech_detection_job`
+- Purpose: Continuous monitoring for speech
+
+**Conversation**: Speech burst between silence periods
+- Duration: Typically minutes
+- Job: `open_conversation_job`
+- Purpose: Process single meaningful exchange
+
+**Benefits**:
+- Continuous recording without manual start/stop
+- Automatic conversation segmentation
+- Efficient resource usage (one session, many conversations)
+
+### 4. Job Metadata Cascading
+
+**Pattern**: Parent jobs link to child jobs
+
+**Example**:
+```
+speech_detection_job
+  ↓ job_id stored in
+audio:session:{session_id}
+  ↓ creates
+open_conversation_job
+  ↓ job_id stored in
+conversation document
+  ↓ creates
+post-conversation jobs (parallel)
+```
+
+**Benefits**:
+- Job grouping and cleanup
+- Dependency tracking
+- Debugging (trace job lineage)
+
+### 5. Real-Time + Batch Hybrid
+
+**Real-Time Path** (Streaming Consumer):
+- Low latency (interim results in <1 second)
+- WebSocket to Deepgram
+- Publishes to Pub/Sub for live UI updates
+
+**Batch Path** (Batch Consumer):
+- High accuracy (more context)
+- Buffers 7.5 seconds
+- API-based transcription
+
+**Both paths** write to same `transcription:results:{session_id}` stream
+
+**Benefits**:
+- Live UI updates (interim results)
+- Accurate final results (batch processing)
+- Provider flexibility (switch between streaming/batch)
+
+### 6. Fan-Out via Redis Consumer Groups
+
+**Pattern**: Multiple consumer groups read same stream
+
+**Example**: `audio:stream:{client_id}` consumed by:
+- Transcription consumer group
+- Audio persistence consumer group
+
+**Benefits**:
+- Parallel processing paths
+- Horizontal scaling (multiple workers per group)
+- No message duplication (each group processes independently)
+
+### 7. File Rotation via Redis Signals
+
+**Pattern**: Conversation job signals persistence job via Redis key
+
+**Implementation**:
+```python
+# Conversation job
+redis.set(f"conversation:current:{session_id}", conversation_id)
+
+# Persistence job (monitors key)
+current_conv = redis.get(f"conversation:current:{session_id}")
+if current_conv != last_conv:
+    close_current_file()
+    open_new_file(current_conv)
+```
+
+**Benefits**:
+- Decoupled jobs (no direct communication)
+- Real-time file rotation
+- Multiple files per session (one per conversation)
+
+## Failure Handling
+
+### Transcription Errors
+
+**Detection**: `stream_speech_detection_job` polls results
+
+**Action**:
+- Sets `transcription_error` field in `audio:session:{session_id}`
+- Logs error for debugging
+- Session remains active (can recover)
+
+### No Meaningful Speech
+
+**Detection**: `transcribe_full_audio_job` validates transcript
+
+**Criteria**:
+- Word count < 10
+- Duration < 5 seconds
+- All words low confidence
+
+**Action**:
+- Marks conversation `deleted=True`
+- Sets `end_reason="no_meaningful_speech"`
+- Conversation hidden from UI
+
+### Audio File Not Ready
+
+**Detection**: `open_conversation_job` waits for file path
+
+**Timeout**: 30 seconds (configurable)
+
+**Action**:
+- Marks conversation `deleted=True`
+- Sets `end_reason="audio_file_not_ready"`
+- Logs error for debugging
+
+### Job Zombies (Stuck Jobs)
+
+**Detection**: `check_job_alive()` utility
+
+**Method**: Checks Redis for job existence
+
+**Action**:
+- Returns `False` if job missing
+- Caller can retry or fail gracefully
+
+### Dead Consumers
+
+**Detection**: Consumer group lag monitoring
+
+**Cleanup**:
+- Removes idle consumers (>30 seconds)
+- Claims pending messages from dead consumers
+- Redistributes to active workers
+
+### Stream Trimming
+
+**Prevention**: Streams don't grow unbounded
+
+**Implementation**:
+- `XTRIM MAXLEN 25000` on `audio:stream:{client_id}`
+- Keeps last 25k messages (~104 minutes @ 0.25s chunks)
+- Deletes `transcription:results:{session_id}` after conversation ends
+
+### Session Timeout
+
+**Max Duration**: 24 hours
+
+**Action**:
+- Jobs exit gracefully
+- Session marked `"complete"`
+- Resources cleaned up (streams deleted, consumer groups removed)
+
+---
+
+## Conclusion
+
+Chronicle's audio pipeline is designed for:
+- **Real-time processing**: Low-latency transcription and live UI updates
+- **Horizontal scalability**: Redis Consumer Groups enable multiple workers
+- **Fault tolerance**: Decoupled components, job retries, graceful error handling
+- **Resource efficiency**: Speech-driven architecture filters noise automatically
+- **Flexibility**: Pluggable providers (Deepgram/Parakeet, OpenAI/Ollama, Qdrant/OpenMemory)
+
+All coordinated through **Redis Streams** for data flow and **RQ** for orchestration, with **MongoDB** for final storage and **disk** for audio archives.
diff --git a/app/README.md b/app/README.md
index d73dd748..e85e83e5 100644
--- a/app/README.md
+++ b/app/README.md
@@ -120,14 +120,14 @@ The app connects to any backend that accepts OPUS audio streams:
 2. **Advanced Backend** (`backends/advanced/`)
    - Full transcription and memory features
    - Real-time processing with speaker recognition
-   - WebSocket endpoint: `/ws_pcm`
+   - WebSocket endpoint: `/ws?codec=pcm`
 
 ### Connection Setup
 
 #### Local Development
 ```
-Backend URL: ws://[machine-ip]:8000/ws_pcm
-Example: ws://192.168.1.100:8000/ws_pcm
+Backend URL: ws://[machine-ip]:8000/ws?codec=pcm
+Example: ws://192.168.1.100:8000/ws?codec=pcm
 ```
 
 #### Public Access (Production)
@@ -138,7 +138,7 @@ Use ngrok or similar tunneling service:
 ngrok http 8000
 
 # Use provided URL in app
-Backend URL: wss://[ngrok-subdomain].ngrok.io/ws_pcm
+Backend URL: wss://[ngrok-subdomain].ngrok.io/ws?codec=pcm
 ```
 
 ### Configuration Steps
@@ -147,8 +147,8 @@ Backend URL: wss://[ngrok-subdomain].ngrok.io/ws_pcm
 2. **Open the mobile app**
 3. **Navigate to Settings**
 4. **Enter Backend URL**:
-   - Local: `ws://[your-ip]:8000/ws_pcm`
-   - Public: `wss://[your-domain]/ws_pcm`
+   - Local: `ws://[your-ip]:8000/ws?codec=pcm`
+   - Public: `wss://[your-domain]/ws?codec=pcm`
 5. **Save configuration**
 
 ## Phone Audio Streaming (NEW)
@@ -176,7 +176,7 @@ Stream audio directly from your phone's microphone to Chronicle backend, bypassi
 - **iOS**: iOS 13+ with microphone permissions
 - **Android**: Android API 21+ with microphone permissions  
 - **Network**: Stable connection to Chronicle backend
-- **Backend**: Advanced backend running with `/ws_pcm` endpoint
+- **Backend**: Advanced backend running with `/ws?codec=pcm` endpoint
 
 #### Switching Audio Sources
 - **Mutual Exclusion**: Cannot use Bluetooth and phone audio simultaneously
@@ -187,7 +187,7 @@ Stream audio directly from your phone's microphone to Chronicle backend, bypassi
 
 #### Audio Not Streaming
 - **Check Permissions**: Ensure microphone access granted
-- **Verify Backend URL**: Confirm `ws://[ip]:8000/ws_pcm` format
+- **Verify Backend URL**: Confirm `ws://[ip]:8000/ws?codec=pcm` format
 - **Network Connection**: Test backend connectivity
 - **Authentication**: Verify JWT token is valid
 
@@ -292,7 +292,7 @@ curl -i -N -H "Connection: Upgrade" \
      -H "Upgrade: websocket" \
      -H "Sec-WebSocket-Key: test" \
      -H "Sec-WebSocket-Version: 13" \
-     http://[backend-ip]:8000/ws_pcm
+     http://[backend-ip]:8000/ws?codec=pcm
 ```
 
 ## Development
@@ -338,7 +338,7 @@ npx expo build:android
 ### WebSocket Communication
 ```javascript
 // Connect to backend
-const ws = new WebSocket('ws://backend-url:8000/ws_pcm');
+const ws = new WebSocket('ws://backend-url:8000/ws?codec=pcm');
 
 // Send audio data
 ws.send(audioBuffer);
diff --git a/app/app/components/BackendStatus.tsx b/app/app/components/BackendStatus.tsx
index 75fdd7a8..4f55d37f 100644
--- a/app/app/components/BackendStatus.tsx
+++ b/app/app/components/BackendStatus.tsx
@@ -208,9 +208,9 @@ export const BackendStatus: React.FC<BackendStatusProps> = ({
       </TouchableOpacity>
 
       <Text style={styles.helpText}>
-        Enter the WebSocket URL of your backend server. Simple backend: http://localhost:8000/ (no auth). 
+        Enter the WebSocket URL of your backend server. Simple backend: http://localhost:8000/ (no auth).
         Advanced backend: http://localhost:8080/ (requires login). Status is automatically checked.
-        The websocket URL can be different or the same as the HTTP URL, with /ws_omi suffix
+        The websocket URL can be different or the same as the HTTP URL, with /ws endpoint and codec parameter (e.g., /ws?codec=pcm)
       </Text>
     </View>
   );
diff --git a/app/app/index.tsx b/app/app/index.tsx
index fc924d92..649a2e2b 100644
--- a/app/app/index.tsx
+++ b/app/app/index.tsx
@@ -322,10 +322,16 @@ export default function App() {
       // Convert HTTP/HTTPS to WS/WSS protocol
       finalWebSocketUrl = finalWebSocketUrl.replace(/^http:/, 'ws:').replace(/^https:/, 'wss:');
       
-      // Ensure /ws_pcm endpoint is included
-      if (!finalWebSocketUrl.includes('/ws_pcm')) {
-        // Remove trailing slash if present, then add /ws_pcm
-        finalWebSocketUrl = finalWebSocketUrl.replace(/\/$/, '') + '/ws_pcm';
+      // Ensure /ws endpoint is included
+      if (!finalWebSocketUrl.includes('/ws')) {
+        // Remove trailing slash if present, then add /ws
+        finalWebSocketUrl = finalWebSocketUrl.replace(/\/$/, '') + '/ws';
+      }
+
+      // Add codec parameter if not present
+      if (!finalWebSocketUrl.includes('codec=')) {
+        const separator = finalWebSocketUrl.includes('?') ? '&' : '?';
+        finalWebSocketUrl = finalWebSocketUrl + separator + 'codec=pcm';
       }
       
       // Check if this is the advanced backend (requires authentication) or simple backend
diff --git a/backends/advanced/.dockerignore b/backends/advanced/.dockerignore
index 2dd9b44f..f0f7f05c 100644
--- a/backends/advanced/.dockerignore
+++ b/backends/advanced/.dockerignore
@@ -17,5 +17,5 @@
 !nginx.conf.template
 !start.sh
 !start-k8s.sh
-!start-workers.sh
+!worker_orchestrator.py
 !Caddyfile
\ No newline at end of file
diff --git a/backends/advanced/.env.template b/backends/advanced/.env.template
index a63ab6f5..818b47b6 100644
--- a/backends/advanced/.env.template
+++ b/backends/advanced/.env.template
@@ -1,219 +1,106 @@
 # ========================================
-# GETTING STARTED
+# Chronicle Backend - Secrets Only
 # ========================================
+# This file contains ONLY secret values (API keys, passwords, tokens).
+# All other configuration is in config/config.yml.
+#
+# Setup:
 # 1. Copy this file to .env: cp .env.template .env
-# 2. Fill in your API keys below (at minimum: DEEPGRAM_API_KEY, OPENAI_API_KEY)
-# 3. Run: docker compose up --build -d
-# 4. For testing: ./run-test.sh (requires API keys to be set)
-
-# This key is used to sign your JWT token, just make it random and long
-AUTH_SECRET_KEY=
-
-# This is the password for the admin user
-ADMIN_PASSWORD=
-
-# Admin email (defaults to admin@example.com if not set)
-ADMIN_EMAIL=admin@example.com
+# 2. Fill in your API keys and secrets below
+# 3. Configure non-secret settings in config/config.yml
+# 4. Run: docker compose up --build -d
 
 # ========================================
-# LLM CONFIGURATION (Standard)
+# Authentication Secrets
 # ========================================
 
-# LLM Provider: "openai" or "ollama" (default: openai)
-LLM_PROVIDER=openai
+# JWT signing key (generate a long random string)
+AUTH_SECRET_KEY=
 
-# OpenAI or OpenAI-compatible API configuration
-OPENAI_API_KEY=your-openai-key-here
-OPENAI_BASE_URL=https://api.openai.com/v1
-OPENAI_MODEL=gpt-4o-mini
+# Admin account password
+ADMIN_PASSWORD=
 
-# For Ollama (OpenAI-compatible mode):
-# LLM_PROVIDER=ollama
-# OLLAMA_BASE_URL=dummy
-# OLLAMA_BASE_URL=http://ollama:11434/v1
-# OLLAMA_MODEL=llama3.1:latest
-# OLLAMA_EMBEDDER_MODEL=nomic-embed-text:latest
+# Admin email address
+ADMIN_EMAIL=admin@example.com
 
 # ========================================
-# CHAT INTERFACE CONFIGURATION (Optional)
+# LLM API Keys
 # ========================================
 
-# Chat-specific LLM model (defaults to OPENAI_MODEL if not set)
-# CHAT_LLM_MODEL=gpt-4o-mini
-
-# Chat temperature for more conversational responses (defaults to 0.7)
-# CHAT_TEMPERATURE=0.7
+# OpenAI API key (or OpenAI-compatible provider)
+OPENAI_API_KEY=
 
 # ========================================
-# SPEECH-TO-TEXT CONFIGURATION (API Keys Only)
+# Transcription API Keys
 # ========================================
-# Provider selection is in config.yml (defaults.stt)
 
-# Deepgram (cloud-based, recommended)
+# Deepgram API key (for cloud-based transcription)
 DEEPGRAM_API_KEY=
 
-# Note: Parakeet ASR URL configured in config.yml
-
 # ========================================
-# SPEECH DETECTION CONFIGURATION
+# Speaker Recognition
 # ========================================
 
-# Speech detection settings for conversation creation (speech-driven architecture)
-# Only meaningful speech creates conversations - silence/noise is filtered out
-
-# Minimum words required to create a conversation (default: 5)
-SPEECH_DETECTION_MIN_WORDS=5
-
-# Minimum word confidence threshold (0.0-1.0, default: 0.5)
-# Used for both conversation creation and speech gap analysis
-SPEECH_DETECTION_MIN_CONFIDENCE=0.5
-
-# Batch transcription monitoring (for batch providers like Parakeet)
-TRANSCRIPTION_BUFFER_SECONDS=120    # Trigger transcription every N seconds
-
-# Auto-stop thresholds
-SPEECH_INACTIVITY_THRESHOLD_SECONDS=60  # Close conversation after N seconds of no speech
-
-# Speaker enrollment filter (default: false)
-# When enabled, only creates conversations when enrolled speakers are detected
-# Requires speaker recognition service to be running and speakers to be enrolled
-# Set to "true" to enable, "false" or omit to disable
-RECORD_ONLY_ENROLLED_SPEAKERS=false
+# Hugging Face token (for PyAnnote speaker recognition models)
+HF_TOKEN=
 
 # ========================================
-# DATABASE CONFIGURATION
+# Optional Services
 # ========================================
 
-# MongoDB for conversations and user data (defaults to mongodb://mongo:27017)
-MONGODB_URI=mongodb://mongo:27017
-
-# MongoDB database name (new installations use 'chronicle', legacy installations use 'friend-lite')
-MONGODB_DATABASE=chronicle
+# Neo4j password (if using Neo4j for graph memory)
+NEO4J_PASSWORD=
 
-# Qdrant for vector memory storage (defaults to qdrant)
-QDRANT_BASE_URL=qdrant
+# Langfuse API keys (for LLM observability)
+LANGFUSE_PUBLIC_KEY=
+LANGFUSE_SECRET_KEY=
 
+# Tailscale auth key (for remote service access)
+TS_AUTHKEY=
 
 # ========================================
-# MEMORY PROVIDER CONFIGURATION
+# Plugin Configuration
 # ========================================
+# Plugin-specific configuration is in: backends/advanced/src/advanced_omi_backend/plugins/{plugin_id}/config.yml
+# Plugin orchestration (enabled, events) is in: config/plugins.yml
+# This section contains ONLY plugin secrets
 
-# Memory Provider: "chronicle" (default), "openmemory_mcp", or "mycelia"
-#
-# Chronicle (default): In-house memory system with full control
-# - Custom LLM-powered extraction with individual fact storage
-# - Smart deduplication and memory updates (ADD/UPDATE/DELETE)
-# - Direct Qdrant vector storage
-# - No external dependencies
-#
-# OpenMemory MCP: Delegates to external OpenMemory MCP server
-# - Professional memory processing with cross-client compatibility
-# - Works with Claude Desktop, Cursor, Windsurf, etc.
-# - Web UI at http://localhost:8765
-# - Requires external server setup
-#
-# Mycelia: Full-featured personal memory timeline
-# - Voice, screenshots, and text capture
-# - Timeline UI with waveform playback
-# - Conversation extraction and semantic search
-# - OAuth federation for cross-instance sharing
-# - Requires Mycelia server setup (extras/mycelia)
-#
-# See MEMORY_PROVIDERS.md for detailed comparison
-MEMORY_PROVIDER=chronicle
-
-# ----------------------------------------
-# OpenMemory MCP Configuration
-# (Only needed if MEMORY_PROVIDER=openmemory_mcp)
-# ----------------------------------------
-# First start the external server:
-#   cd extras/openmemory-mcp && docker compose up -d
-# 
-# OPENMEMORY_MCP_URL=http://host.docker.internal:8765
-# OPENMEMORY_CLIENT_NAME=chronicle
-# OPENMEMORY_USER_ID=openmemory
-# OPENMEMORY_TIMEOUT=30
-
-# ----------------------------------------
-# Mycelia Configuration
-# (Only needed if MEMORY_PROVIDER=mycelia)
-# ----------------------------------------
-# First start Mycelia:
-#   cd extras/mycelia && docker compose up -d redis mongo mongo-search
-#   cd extras/mycelia/backend && deno task dev
-#
-# IMPORTANT: JWT_SECRET in Mycelia backend/.env must match AUTH_SECRET_KEY above
-# MYCELIA_URL=http://host.docker.internal:5173
-# MYCELIA_DB=mycelia  # Database name (use mycelia_test for test environment)
-# MYCELIA_TIMEOUT=30
+# ---------------------------------------
+# Home Assistant Plugin
+# ---------------------------------------
+# Enable in config/plugins.yml
+# Configure in backends/advanced/src/advanced_omi_backend/plugins/homeassistant/config.yml
 
-# ========================================
-# OPTIONAL FEATURES
-# ========================================
+# Home Assistant server URL
+HA_URL=http://homeassistant.local:8123
 
-NEO4J_HOST=neo4j-mem0
-NEO4J_USER=neo4j
-NEO4J_PASSWORD=
-
-# Debug directory for troubleshooting
-DEBUG_DIR=./data/debug_dir
-
-# Ngrok for external access (if using ngrok from docker-compose)
-# NGROK_AUTHTOKEN=
-
-# Speaker recognition service
-# HF_TOKEN=
-# SPEAKER_SERVICE_URL=http://speaker-recognition:8001
+# Home Assistant long-lived access token
+# Get from: Profile → Security → Long-Lived Access Tokens
+HA_TOKEN=
 
-# Audio processing settings
-# NEW_CONVERSATION_TIMEOUT_MINUTES=1.5
-# AUDIO_CROPPING_ENABLED=true
-# MIN_SPEECH_SEGMENT_DURATION=1.0
-# CROPPING_CONTEXT_PADDING=0.1
+# Wake word for voice commands (optional, default: vivi)
+HA_WAKE_WORD=vivi
 
-# ========================================
-# SPEECH-DRIVEN CONVERSATIONS CONFIGURATION
-# ========================================
+# Request timeout in seconds (optional, default: 30)
+HA_TIMEOUT=30
 
-# Note: File rotation for long sessions is not yet implemented
-# Audio sessions currently create single files that grow until the session ends
+# ---------------------------------------
+# Email Summarizer Plugin
+# ---------------------------------------
+# Enable in config/plugins.yml
+# Configure in backends/advanced/src/advanced_omi_backend/plugins/email_summarizer/config.yml
 
+# SMTP server configuration
+# For Gmail: Use App Password (requires 2FA enabled)
+# 1. Go to Google Account → Security → 2-Step Verification
+# 2. Scroll to "App passwords" → Generate password for "Mail"
+# 3. Use the 16-character password below (no spaces)
+SMTP_HOST=smtp.gmail.com
+SMTP_PORT=587
+SMTP_USERNAME=your-email@gmail.com
+SMTP_PASSWORD=your-app-password-here
+SMTP_USE_TLS=true
 
-# ========================================
-# PUBLIC ACCESS CONFIGURATION
-# ========================================
-# These settings control how the browser accesses the backend for audio playback
-
-# The IP address or hostname where your backend is publicly accessible from the browser
-# Examples:
-#   - For local development: localhost or 127.0.0.1
-#   - For LAN access: your machine's IP (e.g., 192.168.1.100)
-#   - For VPN/Tailscale access: your VPN IP (e.g., 100.64.x.x for Tailscale)
-#   - For internet access: your domain or public IP (e.g., friend.example.com)
-# Note: This must be accessible from your browser, not from the Docker container
-HOST_IP=localhost
-
-# Backend API port (where audio files are served)
-BACKEND_PUBLIC_PORT=8000
-
-# WebUI port (defaults to 5173 for Vite dev server)
-WEBUI_PORT=5173
-
-# CORS origins (comma-separated list of allowed origins for browser requests)
-# Note: Tailscale IPs (100.x.x.x) are automatically supported via regex
-# For HTTPS access, add HTTPS origins after running ./init.sh <tailscale-ip>
-# Examples:
-#   - Local HTTP: http://localhost:5173,http://127.0.0.1:5173
-#   - Local HTTPS: https://localhost,https://127.0.0.1
-#   - Tailscale HTTPS: https://100.x.x.x
-#   - Custom: http://192.168.1.100:5173,https://192.168.1.100
-CORS_ORIGINS=http://localhost:5173,http://localhost:3000,http://127.0.0.1:5173,http://127.0.0.1:3000
-
-# Memory settings
-# MEM0_TELEMETRY=False
-
-# Langfuse settings
-LANGFUSE_PUBLIC_KEY=""
-LANGFUSE_SECRET_KEY=""
-LANGFUSE_HOST="http://x.x.x.x:3002"
-LANGFUSE_ENABLE_TELEMETRY=False
\ No newline at end of file
+# Email sender information
+FROM_EMAIL=noreply@chronicle.ai
+FROM_NAME=Chronicle AI
diff --git a/backends/advanced/Dockerfile b/backends/advanced/Dockerfile
index 352bcfe9..886c1f32 100644
--- a/backends/advanced/Dockerfile
+++ b/backends/advanced/Dockerfile
@@ -1,6 +1,9 @@
-FROM python:3.12-slim-bookworm AS builder
+# ============================================
+# Base stage - common setup
+# ============================================
+FROM python:3.12-slim-bookworm AS base
 
-# Install system dependencies for building
+# Install system dependencies
 RUN apt-get update && \
     apt-get install -y --no-install-recommends \
     build-essential \
@@ -9,40 +12,59 @@ RUN apt-get update && \
     curl \
     ffmpeg \
     && rm -rf /var/lib/apt/lists/*
-    # portaudio19-dev \
 
 # Install uv
 COPY --from=ghcr.io/astral-sh/uv:0.6.10 /uv /uvx /bin/
 
-# Set up the working directory
+# Set up working directory
 WORKDIR /app
 
-# Copy package structure and dependency files first
+# Copy package structure and dependency files
 COPY pyproject.toml README.md ./
 COPY uv.lock .
 RUN mkdir -p src/advanced_omi_backend
 COPY src/advanced_omi_backend/__init__.py src/advanced_omi_backend/
 
-# Install dependencies using uv with deepgram extra
-# Use cache mount for BuildKit, fallback for legacy builds
-# RUN --mount=type=cache,target=/root/.cache/uv \
-#     uv sync --extra deepgram
-# Fallback for legacy Docker builds (CI compatibility)
+
+# ============================================
+# Production stage - production dependencies only
+# ============================================
+FROM base AS prod
+
+# Install production dependencies only
 RUN uv sync --extra deepgram
 
 # Copy all application code
 COPY . .
 
-# Copy configuration files if they exist, otherwise they will be created from templates at runtime
-# The files are expected to exist, but we handle the case where they don't gracefully
-
+# Copy configuration files if they exist
 COPY diarization_config.json* ./
 
+# Copy and make startup script executable
+COPY start.sh ./
+RUN chmod +x start.sh
+
+# Run the application
+CMD ["./start.sh"]
+
+
+# ============================================
+# Dev/Test stage - includes test dependencies
+# ============================================
+FROM base AS dev
+
+# Install production + test dependencies
+RUN uv sync --extra deepgram --group test
+
+# Copy all application code
+COPY . .
+
+# Copy configuration files if they exist
+COPY diarization_config.json* ./
 
-# Copy and make startup scripts executable
+# Copy and make startup script executable
 COPY start.sh ./
-COPY start-workers.sh ./
-RUN chmod +x start.sh start-workers.sh
+RUN chmod +x start.sh
 
-# Run the application with workers
+# Run the application
 CMD ["./start.sh"]
diff --git a/backends/advanced/Dockerfile.k8s b/backends/advanced/Dockerfile.k8s
index b746752a..6500ccf5 100644
--- a/backends/advanced/Dockerfile.k8s
+++ b/backends/advanced/Dockerfile.k8s
@@ -36,9 +36,9 @@ COPY . .
 # Copy memory config (created by init.sh from template)
 
 
-# Copy and make K8s startup scripts executable
-COPY start-k8s.sh start-workers.sh ./
-RUN chmod +x start-k8s.sh start-workers.sh
+# Copy and make K8s startup script executable
+COPY start-k8s.sh ./
+RUN chmod +x start-k8s.sh
 
 # Activate virtual environment in PATH
 ENV PATH="/app/.venv/bin:$PATH"
diff --git a/backends/advanced/Docs/architecture.md b/backends/advanced/Docs/architecture.md
index 7c6427bb..739f0ed7 100644
--- a/backends/advanced/Docs/architecture.md
+++ b/backends/advanced/Docs/architecture.md
@@ -22,7 +22,7 @@ graph TB
 
     %% Main WebSocket Server
     subgraph "WebSocket Server"
-        WS["/ws_pcm endpoint"]
+        WS["/ws?codec=pcm endpoint"]
         AUTH[JWT Auth]
     end
 
@@ -237,13 +237,13 @@ Wyoming is a peer-to-peer protocol for voice assistants that combines JSONL (JSO
 
 #### Backend Implementation
 
-**Advanced Backend (`/ws_pcm`)**:
+**Advanced Backend (`/ws?codec=pcm`)**:
 - **Full Wyoming Protocol Support**: Parses all Wyoming events for comprehensive session management
 - **Session State Tracking**: Only processes audio chunks when session is active (after receiving audio-start)
 - **Conversation Boundaries**: Uses Wyoming audio-start/stop events to define precise conversation segments
 - **PCM Audio Processing**: Direct processing of PCM audio data from all apps
 
-**Advanced Backend (`/ws_omi`)**:
+**Advanced Backend (`/ws?codec=opus`)**:
 - **Wyoming Protocol + Opus Decoding**: Combines Wyoming session management with OMI Opus decoding
 - **Continuous Streaming**: OMI devices stream continuously, audio-start/stop events are optional
 - **Timestamp Preservation**: Uses timestamps from Wyoming headers when provided
@@ -1006,8 +1006,8 @@ src/advanced_omi_backend/
 - `POST /api/conversations/{conversation_id}/activate-transcript` - Switch transcript version
 - `POST /api/conversations/{conversation_id}/activate-memory` - Switch memory version
 - `POST /api/audio/upload` - Batch audio file upload and processing
-- WebSocket `/ws_omi` - Real-time Opus audio streaming with Wyoming protocol (OMI devices)
-- WebSocket `/ws_pcm` - Real-time PCM audio streaming with Wyoming protocol (all apps)
+- WebSocket `/ws?codec=opus` - Real-time Opus audio streaming with Wyoming protocol (OMI devices)
+- WebSocket `/ws?codec=pcm` - Real-time PCM audio streaming with Wyoming protocol (all apps)
 
 ### Authentication & Authorization
 - **JWT Tokens**: All API endpoints require valid JWT authentication
diff --git a/backends/advanced/Docs/auth.md b/backends/advanced/Docs/auth.md
index acbf8df4..7998750e 100644
--- a/backends/advanced/Docs/auth.md
+++ b/backends/advanced/Docs/auth.md
@@ -100,13 +100,13 @@ curl -X POST "http://localhost:8000/auth/jwt/login" \
 
 #### Token-based (Recommended)
 ```javascript
-const ws = new WebSocket('ws://localhost:8000/ws_pcm?token=JWT_TOKEN&device_name=phone');
+const ws = new WebSocket('ws://localhost:8000/ws?codec=pcm?token=JWT_TOKEN&device_name=phone');
 ```
 
 #### Cookie-based
 ```javascript
 // Requires existing cookie from web login
-const ws = new WebSocket('ws://localhost:8000/ws_pcm?device_name=phone');
+const ws = new WebSocket('ws://localhost:8000/ws?codec=pcm?device_name=phone');
 ```
 
 ## Client ID Management
@@ -183,8 +183,8 @@ COOKIE_SECURE=false
 - `PATCH /api/users/me` - Update user profile
 
 ### WebSocket Endpoints
-- `ws://host/ws` - Opus audio stream with auth
-- `ws://host/ws_pcm` - PCM audio stream with auth
+- `ws://host/ws?codec=opus` - Opus audio stream with auth
+- `ws://host/ws?codec=pcm` - PCM audio stream with auth (default)
 
 ## Error Handling
 
diff --git a/backends/advanced/Docs/memories.md b/backends/advanced/Docs/memories.md
index cae98383..08ae393e 100644
--- a/backends/advanced/Docs/memories.md
+++ b/backends/advanced/Docs/memories.md
@@ -98,7 +98,7 @@ MEM0_CONFIG = {
     "vector_store": {
         "provider": "qdrant",
         "config": {
-            "collection_name": "omi_memories",
+            "collection_name": "chronicle_memories",
             "embedding_model_dims": 768,
             "host": QDRANT_BASE_URL,
             "port": 6333,
@@ -499,7 +499,7 @@ This will:
 3. **Search Not Working**
    - Ensure embedding model is available in Ollama
    - Check vector dimensions match between embedder and Qdrant
-   - Verify collection has vectors: `curl http://localhost:6333/collections/omi_memories`
+   - Verify collection has vectors: `curl http://localhost:6333/collections/chronicle_memories`
 
 ### Required Ollama Models
 
diff --git a/backends/advanced/Docs/memory-configuration-guide.md b/backends/advanced/Docs/memory-configuration-guide.md
index 12796e13..66244003 100644
--- a/backends/advanced/Docs/memory-configuration-guide.md
+++ b/backends/advanced/Docs/memory-configuration-guide.md
@@ -65,7 +65,7 @@ memory:
 - **Embeddings**: `text-embedding-3-small`, `text-embedding-3-large`
 
 #### Ollama Models (Local)
-- **LLM**: `llama3`, `mistral`, `qwen2.5`
+- **LLM**: `llama3`, `qwen2.5`
 - **Embeddings**: `nomic-embed-text`, `all-minilm`
 
 ## Hot Reload
diff --git a/backends/advanced/Docs/plugin-configuration.md b/backends/advanced/Docs/plugin-configuration.md
new file mode 100644
index 00000000..a4c7b222
--- /dev/null
+++ b/backends/advanced/Docs/plugin-configuration.md
@@ -0,0 +1,399 @@
+# Plugin Configuration Architecture
+
+Chronicle uses a clean separation of concerns for plugin configuration, dividing settings across three locations based on their purpose.
+
+## Configuration Files
+
+### 1. `config/plugins.yml` - Orchestration Only
+
+**Purpose**: Controls which plugins are enabled and what events they listen to
+
+**Contains**:
+- Plugin enable/disable flags
+- Event subscriptions
+- Trigger conditions (wake words, etc.)
+
+**Example**:
+```yaml
+plugins:
+  email_summarizer:
+    enabled: true
+    events:
+      - conversation.complete
+    condition:
+      type: always
+
+  homeassistant:
+    enabled: false
+    events:
+      - transcript.streaming
+    condition:
+      type: wake_word
+      wake_words:
+        - hey vivi
+```
+
+### 2. `backends/advanced/src/advanced_omi_backend/plugins/{plugin_id}/config.yml` - Plugin Settings
+
+**Purpose**: Plugin-specific non-secret configuration
+
+**Contains**:
+- Feature flags
+- Timeouts and limits
+- Display preferences
+- References to environment variables using `${VAR_NAME}` syntax
+
+**Example** (`plugins/email_summarizer/config.yml`):
+```yaml
+# Email content settings
+subject_prefix: "Conversation Summary"
+summary_max_sentences: 3
+include_conversation_id: true
+
+# SMTP config (reads from .env)
+smtp_host: ${SMTP_HOST}
+smtp_port: ${SMTP_PORT:-587}
+smtp_username: ${SMTP_USERNAME}
+smtp_password: ${SMTP_PASSWORD}
+```
+
+### 3. `backends/advanced/.env` - Secrets Only
+
+**Purpose**: All secret values (API keys, passwords, tokens)
+
+**Contains**:
+- API keys
+- Authentication tokens
+- SMTP credentials
+- Database passwords
+
+**Example**:
+```bash
+# Email Summarizer Plugin
+SMTP_HOST=smtp.gmail.com
+SMTP_PORT=587
+SMTP_USERNAME=your-email@gmail.com
+SMTP_PASSWORD=your-app-password-here
+
+# Home Assistant Plugin
+HA_URL=http://homeassistant.local:8123
+HA_TOKEN=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9...
+```
+
+## Configuration Loading Process
+
+When a plugin is initialized, Chronicle merges configuration from all three sources:
+
+```
+1. Load plugins/{plugin_id}/config.yml
+   ↓
+2. Expand ${ENV_VAR} references from .env
+   ↓
+3. Merge orchestration settings from config/plugins.yml
+   ↓
+4. Pass complete config to plugin constructor
+```
+
+### Example Configuration Flow
+
+**Email Summarizer Plugin**:
+
+1. **Load** `plugins/email_summarizer/config.yml`:
+   ```yaml
+   subject_prefix: "Conversation Summary"
+   smtp_host: ${SMTP_HOST}
+   smtp_password: ${SMTP_PASSWORD}
+   ```
+
+2. **Expand env vars** from `.env`:
+   ```yaml
+   subject_prefix: "Conversation Summary"
+   smtp_host: "smtp.gmail.com"          # ← Expanded
+   smtp_password: "app-password-123"    # ← Expanded
+   ```
+
+3. **Merge orchestration** from `config/plugins.yml`:
+   ```yaml
+   enabled: true                        # ← Added
+   events: ["conversation.complete"]    # ← Added
+   condition: {type: "always"}          # ← Added
+   subject_prefix: "Conversation Summary"
+   smtp_host: "smtp.gmail.com"
+   smtp_password: "app-password-123"
+   ```
+
+4. **Pass to plugin** constructor with complete config
+
+## Environment Variable Expansion
+
+Plugin config files use `${VAR_NAME}` syntax for environment variable references:
+
+- **Simple reference**: `${SMTP_HOST}` → expands to env value
+- **With default**: `${SMTP_PORT:-587}` → uses 587 if SMTP_PORT not set
+- **Missing vars**: Logs warning and keeps placeholder
+
+**Example**:
+```yaml
+# In plugin config.yml
+smtp_host: ${SMTP_HOST}
+smtp_port: ${SMTP_PORT:-587}
+timeout: ${HA_TIMEOUT:-30}
+
+# With .env:
+# SMTP_HOST=smtp.gmail.com
+# (SMTP_PORT not set)
+# HA_TIMEOUT=60
+
+# Results in:
+# smtp_host: "smtp.gmail.com"
+# smtp_port: "587"           # ← Used default
+# timeout: "60"              # ← From .env
+```
+
+## Creating a New Plugin
+
+To add a new plugin with proper configuration:
+
+### 1. Create plugin directory structure
+
+```bash
+backends/advanced/src/advanced_omi_backend/plugins/my_plugin/
+├── __init__.py           # Export plugin class
+├── plugin.py             # Plugin implementation
+└── config.yml            # Plugin-specific config
+```
+
+### 2. Add plugin config file
+
+**`plugins/my_plugin/config.yml`**:
+```yaml
+# My Plugin Configuration
+# Non-secret settings only
+
+# Feature settings
+feature_enabled: true
+timeout: ${MY_PLUGIN_TIMEOUT:-30}
+
+# API configuration (secrets from .env)
+api_url: ${MY_PLUGIN_API_URL}
+api_key: ${MY_PLUGIN_API_KEY}
+```
+
+### 3. Add secrets to `.env.template`
+
+**`backends/advanced/.env.template`**:
+```bash
+# My Plugin
+MY_PLUGIN_API_URL=https://api.example.com
+MY_PLUGIN_API_KEY=
+MY_PLUGIN_TIMEOUT=30
+```
+
+### 4. Add orchestration settings
+
+**`config/plugins.yml`**:
+```yaml
+plugins:
+  my_plugin:
+    enabled: false
+    events:
+      - conversation.complete
+    condition:
+      type: always
+```
+
+### 5. Implement plugin class
+
+**`plugins/my_plugin/plugin.py`**:
+```python
+from ..base import BasePlugin, PluginContext, PluginResult
+
+class MyPlugin(BasePlugin):
+    def __init__(self, config: Dict[str, Any]):
+        super().__init__(config)
+        # Config automatically merged from all sources
+        self.api_url = config.get('api_url')
+        self.api_key = config.get('api_key')
+        self.timeout = config.get('timeout', 30)
+
+    async def initialize(self):
+        # Plugin initialization
+        pass
+
+    async def on_conversation_complete(self, context: PluginContext):
+        # Event handler
+        pass
+```
+
+## Benefits of This Architecture
+
+✅ **Clean separation**: Secrets (.env) vs Config (yml) vs Orchestration (plugins.yml)
+
+✅ **Plugin portability**: Each plugin has self-contained config.yml
+
+✅ **No secret duplication**: Secrets only in .env, referenced via ${VAR}
+
+✅ **Easy discovery**: Want to configure a plugin? → `plugins/{plugin_id}/config.yml`
+
+✅ **Main config.yml stays clean**: No plugin pollution in main backend config
+
+✅ **Unified interface**: All plugins loaded with same pattern via `load_plugin_config()`
+
+## Troubleshooting
+
+### Plugin not loading
+
+**Check logs** for:
+- "Plugin 'X' not found" → Directory/file structure issue
+- "Environment variable 'X' not found" → Missing .env entry
+- "Failed to load config.yml" → YAML syntax error
+
+**Verify**:
+```bash
+# Check plugin directory exists
+ls backends/advanced/src/advanced_omi_backend/plugins/my_plugin/
+
+# Validate config.yml syntax
+python -c "import yaml; yaml.safe_load(open('plugins/my_plugin/config.yml'))"
+
+# Check .env has required vars
+grep MY_PLUGIN .env
+```
+
+### Environment variables not expanding
+
+**Problem**: `${SMTP_HOST}` stays as literal text
+
+**Solution**:
+- Ensure `.env` file exists in `backends/advanced/.env`
+- Check variable name matches exactly (case-sensitive)
+- Restart backend after .env changes
+- Check logs for "Environment variable 'X' not found" warnings
+
+### Plugin enabled but not running
+
+**Check**:
+1. `config/plugins.yml` has `enabled: true`
+2. Plugin subscribed to correct events
+3. Conditions are met (wake words, etc.)
+4. Plugin initialized without errors (check logs)
+
+## Using Shared Setup Utilities in Plugin Setup Scripts
+
+Chronicle provides shared utilities (`setup_utils.py`) for creating interactive plugin setup wizards with password masking and existing value detection.
+
+### Quick Reference
+
+```python
+#!/usr/bin/env python3
+import sys
+from pathlib import Path
+
+# Import shared utilities
+project_root = Path(__file__).resolve().parents[6]
+sys.path.insert(0, str(project_root))
+
+from setup_utils import (
+    prompt_with_existing_masked,  # Main function for masked prompts
+    prompt_value,                   # Simple value prompts
+    prompt_password,                # Password with validation
+    mask_value,                     # Mask a value manually
+    read_env_value                  # Read from .env
+)
+from dotenv import set_key
+
+# Path to backend .env
+env_path = str(project_root / "backends" / "advanced" / ".env")
+
+# Prompt for password/token with masking
+api_key = prompt_with_existing_masked(
+    prompt_text="API Key",
+    env_file_path=env_path,
+    env_key="MY_PLUGIN_API_KEY",
+    placeholders=['your-key-here'],
+    is_password=True  # ← Shows masked existing value
+)
+
+# Save to .env
+set_key(env_path, "MY_PLUGIN_API_KEY", api_key)
+```
+
+### Function Details
+
+**`prompt_with_existing_masked()`** - Primary function for secrets
+
+Shows masked existing values and allows users to reuse them:
+```python
+smtp_password = prompt_with_existing_masked(
+    prompt_text="SMTP Password",
+    env_file_path="../../.env",           # Path to .env file
+    env_key="SMTP_PASSWORD",              # Environment variable name
+    placeholders=['your-password-here'],  # Values to treat as "not set"
+    is_password=True,                     # Use masking and hidden input
+    default=""                            # Fallback if no existing value
+)
+# Output: SMTP Password (smtp_***********word) [press Enter to reuse, or enter new]:
+```
+
+**Benefits:**
+- ✅ Shows previously configured values as masked (e.g., `sk-pr***********xyz`)
+- ✅ Lets users press Enter to keep existing value (no re-entry needed)
+- ✅ Automatically reads from .env if path/key provided
+- ✅ Works with placeholders - treats them as "not configured"
+
+**`prompt_password()`** - Password with validation
+
+```python
+admin_pass = prompt_password(
+    prompt_text="Admin Password",
+    min_length=8,          # Minimum length requirement
+    allow_generated=True   # Auto-generate in non-interactive mode
+)
+```
+
+**`prompt_value()`** - Simple value prompts
+
+```python
+port = prompt_value("SMTP Port", default="587")
+```
+
+### Complete Plugin Setup Example
+
+See `backends/advanced/src/advanced_omi_backend/plugins/email_summarizer/setup.py` for a complete working example showing:
+- Masked password/token prompts with existing value reuse
+- Saving credentials to backend .env
+- Clean user-facing instructions
+- Error handling
+
+### Best Practices
+
+1. **Always show masked values for secrets** - Use `is_password=True`
+2. **Auto-read from .env** - Provide `env_file_path` and `env_key` parameters
+3. **Use placeholders** - Define common placeholder values to detect "not configured"
+4. **Save to backend .env** - All plugin secrets go in `backends/advanced/.env`
+5. **Clear instructions** - Tell users what to do next (enable in plugins.yml, restart)
+
+### Convenience Functions
+
+For common patterns, use the convenience wrappers:
+
+```python
+from setup_utils import prompt_api_key, prompt_token
+
+# API keys
+openai_key = prompt_api_key("OpenAI", env_file_path="../../.env")
+# Prompts: "OpenAI API Key"
+# Env var: OPENAI_API_KEY
+
+# Auth tokens
+ha_token = prompt_token("Home Assistant", env_file_path="../../.env")
+# Prompts: "Home Assistant Token"
+# Env var: HOME_ASSISTANT_TOKEN
+```
+
+## See Also
+
+- [CLAUDE.md](../../../CLAUDE.md) - Main documentation
+- [Plugin Development Guide](plugin-development.md) - Creating custom plugins
+- [Environment Variables](environment-variables.md) - Complete .env reference
+- [setup_utils.py](../../../setup_utils.py) - Shared setup utility reference
diff --git a/backends/advanced/README.md b/backends/advanced/README.md
index 0f5a4490..7f3d5a24 100644
--- a/backends/advanced/README.md
+++ b/backends/advanced/README.md
@@ -31,7 +31,7 @@ Modern React-based web dashboard located in `./webui/` with:
 
 **The setup wizard guides you through:**
 - **Authentication**: Admin email/password setup with secure keys
-- **Transcription Provider**: Choose between Deepgram, Mistral, or Offline (Parakeet)
+- **Transcription Provider**: Choose between Deepgram or Offline (Parakeet)
 - **LLM Provider**: Choose between OpenAI (recommended) or Ollama for memory extraction
 - **Memory Provider**: Choose between Friend-Lite Native or OpenMemory MCP
 - **HTTPS Configuration**: Optional SSL setup for microphone access (uses Caddy)
diff --git a/backends/advanced/cleanup.sh b/backends/advanced/cleanup.sh
new file mode 100755
index 00000000..041e6364
--- /dev/null
+++ b/backends/advanced/cleanup.sh
@@ -0,0 +1,15 @@
+#!/bin/bash
+# Wrapper script for cleanup_state.py
+# Usage: ./cleanup.sh --backup --export-audio
+#
+# This script runs the cleanup_state.py script inside the chronicle-backend container
+# to handle data ownership and permissions correctly.
+#
+# Examples:
+#   ./cleanup.sh --dry-run              # Preview what would be deleted
+#   ./cleanup.sh --backup               # Cleanup with metadata backup
+#   ./cleanup.sh --backup --export-audio  # Full backup including audio
+#   ./cleanup.sh --backup --force       # Skip confirmation prompts
+
+cd "$(dirname "$0")"
+docker compose exec chronicle-backend uv run python src/scripts/cleanup_state.py "$@"
diff --git a/backends/advanced/diarization_config.json.template b/backends/advanced/diarization_config.json.template
deleted file mode 100644
index d760df85..00000000
--- a/backends/advanced/diarization_config.json.template
+++ /dev/null
@@ -1,9 +0,0 @@
-{
-  "diarization_source": "pyannote",
-  "similarity_threshold": 0.15,
-  "min_duration": 0.5,
-  "collar": 2.0,
-  "min_duration_off": 1.5,
-  "min_speakers": 2,
-  "max_speakers": 6
-}
\ No newline at end of file
diff --git a/backends/advanced/docker-compose-test.yml b/backends/advanced/docker-compose-test.yml
index e4203f91..43aa1a83 100644
--- a/backends/advanced/docker-compose-test.yml
+++ b/backends/advanced/docker-compose-test.yml
@@ -2,26 +2,34 @@
 # Isolated test environment for integration tests
 # Uses different ports to avoid conflicts with development environment
 
+name: backend-test
+
 services:
   chronicle-backend-test:
     build:
       context: .
       dockerfile: Dockerfile
+      target: dev  # Use dev stage with test dependencies
+    command: ["./start.sh", "--test"]
     ports:
       - "8001:8000"  # Avoid conflict with dev on 8000
     volumes:
       - ./src:/app/src  # Mount source code for easier development
       - ./data/test_audio_chunks:/app/audio_chunks
-      - ./data/test_debug_dir:/app/debug_dir
+      - ./data/test_debug_dir:/app/debug  # Fixed: mount to /app/debug for plugin database
       - ./data/test_data:/app/data
-      - ${CONFIG_FILE:-../../config/config.yml}:/app/config.yml  # Mount config.yml for model registry and memory settings (writable for admin config updates)
+      - ../../config:/app/config  # Mount config directory with defaults.yml
+      - ../../tests/configs:/app/test-configs:ro  # Mount test-specific configs
+      - ${PLUGINS_CONFIG:-../../tests/config/plugins.test.yml}:/app/config/plugins.yml  # Mount test plugins config to correct location
     environment:
       # Override with test-specific settings
       - MONGODB_URI=mongodb://mongo-test:27017/test_db
       - QDRANT_BASE_URL=qdrant-test
       - QDRANT_PORT=6333
       - REDIS_URL=redis://redis-test:6379/0
-      - DEBUG_DIR=/app/debug_dir
+      - DEBUG_DIR=/app/debug  # Fixed: match plugin database mount path
+      # Test configuration file
+      - CONFIG_FILE=${TEST_CONFIG_FILE:-/app/test-configs/deepgram-openai.yml}
       # Import API keys from environment
       - DEEPGRAM_API_KEY=${DEEPGRAM_API_KEY}
       - OPENAI_API_KEY=${OPENAI_API_KEY}
@@ -42,10 +50,17 @@ services:
       # Speaker recognition controlled by config.yml (disabled in test config for CI performance)
       - SPEAKER_SERVICE_URL=http://speaker-service-test:8085
       - CORS_ORIGINS=http://localhost:3001,http://localhost:8001,https://localhost:3001,https://localhost:8001
-      # Set low inactivity timeout for tests (2 seconds instead of 60)
-      - SPEECH_INACTIVITY_THRESHOLD_SECONDS=2
+      # Set inactivity timeout for tests (20 seconds of audio time)
+      # This is audio duration, not wall-clock time
+      - SPEECH_INACTIVITY_THRESHOLD_SECONDS=20
+      # Set low speech detection thresholds for tests
+      - SPEECH_DETECTION_MIN_DURATION=2.0  # 2 seconds instead of 10
+      - SPEECH_DETECTION_MIN_WORDS=5  # 5 words instead of 10
       # Wait for audio queue to drain before timing out (test mode)
       - WAIT_FOR_AUDIO_QUEUE_DRAIN=true
+      # Mock speaker recognition for tests (avoids resource-intensive ML service)
+      # To test with REAL speaker recognition: set to 'false' and start extras/speaker-recognition service
+      - USE_MOCK_SPEAKER_CLIENT=true
     depends_on:
       qdrant-test:
         condition: service_started
@@ -53,8 +68,6 @@ services:
         condition: service_healthy
       redis-test:
         condition: service_started
-      speaker-service-test:
-        condition: service_healthy
     healthcheck:
       test: ["CMD", "curl", "-f", "http://localhost:8000/readiness"]
       interval: 10s
@@ -125,7 +138,7 @@ services:
       context: ../../extras/speaker-recognition
       dockerfile: Dockerfile
       args:
-        PYTORCH_CUDA_VERSION: cpu
+        PYTORCH_CUDA_VERSION: cu12.6
     image: speaker-recognition-test:latest
     ports:
       - "8086:8085"  # Avoid conflict with dev speaker service on 8085
@@ -149,25 +162,59 @@ services:
       retries: 5
       start_period: 60s
     restart: unless-stopped
+    profiles:
+      - speaker  # Optional service - only start when explicitly enabled
+
+  mock-streaming-stt:
+    build:
+      context: ../..
+      dockerfile: tests/Dockerfile.mock-streaming-stt
+    ports:
+      - "9999:9999"
+    healthcheck:
+      test: ["CMD", "python", "-c", "import socket; s=socket.socket(); s.connect(('localhost',9999)); s.close()"]
+      interval: 10s
+      timeout: 5s
+      retries: 3
+    restart: unless-stopped
+
+  mock-llm:
+    build:
+      context: ../..
+      dockerfile: tests/Dockerfile.mock-llm
+    ports:
+      - "11435:11435"
+    healthcheck:
+      test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:11435/health').read()"]
+      interval: 10s
+      timeout: 5s
+      retries: 3
+    restart: unless-stopped
 
   workers-test:
     build:
       context: .
       dockerfile: Dockerfile
-    command: ./start-workers.sh
+      target: dev  # Use dev stage with test dependencies
+    command: ["uv", "run", "--group", "test", "python", "worker_orchestrator.py"]
     volumes:
       - ./src:/app/src
+      - ./worker_orchestrator.py:/app/worker_orchestrator.py
       - ./data/test_audio_chunks:/app/audio_chunks
-      - ./data/test_debug_dir:/app/debug_dir
+      - ./data/test_debug_dir:/app/debug  # Fixed: mount to /app/debug for plugin database
       - ./data/test_data:/app/data
-      - ${CONFIG_FILE:-../../config/config.yml}:/app/config.yml  # Mount config.yml for model registry and memory settings (writable for admin config updates)
+      - ../../config:/app/config  # Mount config directory with defaults.yml
+      - ../../tests/configs:/app/test-configs:ro  # Mount test-specific configs
+      - ${PLUGINS_CONFIG:-../../tests/config/plugins.test.yml}:/app/config/plugins.yml  # Mount test plugins config to correct location
     environment:
       # Same environment as backend
       - MONGODB_URI=mongodb://mongo-test:27017/test_db
       - QDRANT_BASE_URL=qdrant-test
       - QDRANT_PORT=6333
       - REDIS_URL=redis://redis-test:6379/0
-      - DEBUG_DIR=/app/debug_dir
+      - DEBUG_DIR=/app/debug  # Fixed: match plugin database mount path
+      # Test configuration file
+      - CONFIG_FILE=${TEST_CONFIG_FILE:-/app/test-configs/deepgram-openai.yml}
       - DEEPGRAM_API_KEY=${DEEPGRAM_API_KEY}
       - OPENAI_API_KEY=${OPENAI_API_KEY}
       - GROQ_API_KEY=${GROQ_API_KEY}
@@ -183,10 +230,17 @@ services:
       - MYCELIA_DB=mycelia_test
       # Speaker recognition controlled by config.yml (disabled in test config for CI performance)
       - SPEAKER_SERVICE_URL=http://speaker-service-test:8085
-      # Set low inactivity timeout for tests (2 seconds instead of 60)
-      - SPEECH_INACTIVITY_THRESHOLD_SECONDS=2
+      # Set inactivity timeout for tests (20 seconds of audio time)
+      # This is audio duration, not wall-clock time
+      - SPEECH_INACTIVITY_THRESHOLD_SECONDS=20
+      # Set low speech detection thresholds for tests
+      - SPEECH_DETECTION_MIN_DURATION=2.0  # 2 seconds instead of 10
+      - SPEECH_DETECTION_MIN_WORDS=5  # 5 words instead of 10
       # Wait for audio queue to drain before timing out (test mode)
       - WAIT_FOR_AUDIO_QUEUE_DRAIN=true
+      # Mock speaker recognition for tests (avoids resource-intensive ML service)
+      # To test with REAL speaker recognition: set to 'false' and start extras/speaker-recognition service
+      - USE_MOCK_SPEAKER_CLIENT=true
     depends_on:
       chronicle-backend-test:
         condition: service_healthy
@@ -196,8 +250,6 @@ services:
         condition: service_started
       qdrant-test:
         condition: service_started
-      speaker-service-test:
-        condition: service_healthy
     restart: unless-stopped
 
   # Mycelia - AI memory and timeline service (test environment)
diff --git a/backends/advanced/docker-compose.yml b/backends/advanced/docker-compose.yml
index f46a23fa..230f40c9 100644
--- a/backends/advanced/docker-compose.yml
+++ b/backends/advanced/docker-compose.yml
@@ -1,8 +1,35 @@
 services:
+  tailscale:
+    image: tailscale/tailscale:latest
+    container_name: advanced-tailscale
+    hostname: chronicle-tailscale
+    environment:
+      - TS_AUTHKEY=${TS_AUTHKEY}
+      - TS_STATE_DIR=/var/lib/tailscale
+      - TS_USERSPACE=false
+      - TS_ACCEPT_DNS=true
+    volumes:
+      - tailscale-state:/var/lib/tailscale
+    devices:
+      - /dev/net/tun:/dev/net/tun
+    cap_add:
+      - NET_ADMIN
+    restart: unless-stopped
+    profiles:
+      - tailscale  # Optional profile
+    ports:
+      - "18123:18123"  # HA proxy port
+    command: >
+      sh -c "tailscaled &
+             tailscale up --authkey=$${TS_AUTHKEY} --accept-dns=true &&
+             apk add --no-cache socat 2>/dev/null || true &&
+             socat TCP-LISTEN:18123,fork,reuseaddr TCP:100.99.62.5:8123"
+
   chronicle-backend:
     build:
       context: .
       dockerfile: Dockerfile
+      target: prod  # Use prod stage without test dependencies
     ports:
       - "8000:8000"
     env_file:
@@ -12,7 +39,7 @@ services:
       - ./data/audio_chunks:/app/audio_chunks
       - ./data/debug_dir:/app/debug_dir
       - ./data:/app/data
-      - ../../config/config.yml:/app/config.yml  # Removed :ro to allow UI config saving
+      - ../../config:/app/config  # Mount entire config directory (includes config.yml, defaults.yml, plugins.yml)
     environment:
       - DEEPGRAM_API_KEY=${DEEPGRAM_API_KEY}
       - PARAKEET_ASR_URL=${PARAKEET_ASR_URL}
@@ -26,8 +53,10 @@ services:
       - NEO4J_HOST=${NEO4J_HOST}
       - NEO4J_USER=${NEO4J_USER}
       - NEO4J_PASSWORD=${NEO4J_PASSWORD}
+      - HA_TOKEN=${HA_TOKEN}
       - CORS_ORIGINS=http://localhost:3010,http://localhost:8000,http://192.168.1.153:3010,http://192.168.1.153:8000,https://localhost:3010,https://localhost:8000,https://100.105.225.45,https://localhost
       - REDIS_URL=redis://redis:6379/0
+      - MONGODB_URI=mongodb://mongo:27017
     depends_on:
       qdrant:
         condition: service_started
@@ -35,6 +64,8 @@ services:
         condition: service_healthy
       redis:
         condition: service_healthy
+    extra_hosts:
+      - "host.docker.internal:host-gateway"  # Access host's Tailscale network
     healthcheck:
       test: ["CMD", "curl", "-f", "http://localhost:8000/readiness"]
       interval: 30s
@@ -46,27 +77,37 @@ services:
   # Unified Worker Container
   # No CUDA needed for chronicle-backend and workers, workers only orchestrate jobs and call external services
   # Runs all workers in a single container for efficiency:
-  # - 3 RQ workers (transcription, memory, default queues)
-  # - 1 Audio stream worker (Redis Streams consumer - must be single to maintain sequential chunks)
+  # - 6 RQ workers (transcription, memory, default queues)
+  # - 1 Audio persistence worker (audio queue)
+  # - 1+ Stream workers (conditional based on config.yml - Deepgram/Parakeet)
+  # Uses Python orchestrator for process management, health monitoring, and self-healing
   workers:
     build:
       context: .
       dockerfile: Dockerfile
-    command: ["./start-workers.sh"]
+      target: prod  # Use prod stage without test dependencies
+    command: ["uv", "run", "python", "worker_orchestrator.py"]
     env_file:
       - .env
     volumes:
       - ./src:/app/src
-      - ./start-workers.sh:/app/start-workers.sh
+      - ./worker_orchestrator.py:/app/worker_orchestrator.py
       - ./data/audio_chunks:/app/audio_chunks
       - ./data:/app/data
-      - ../../config/config.yml:/app/config.yml  # Removed :ro for consistency
+      - ../../config:/app/config  # Mount entire config directory (includes config.yml, defaults.yml, plugins.yml)
     environment:
       - DEEPGRAM_API_KEY=${DEEPGRAM_API_KEY}
       - PARAKEET_ASR_URL=${PARAKEET_ASR_URL}
       - OPENAI_API_KEY=${OPENAI_API_KEY}
       - GROQ_API_KEY=${GROQ_API_KEY}
+      - HA_TOKEN=${HA_TOKEN}
       - REDIS_URL=redis://redis:6379/0
+      - MONGODB_URI=mongodb://mongo:27017
+      # Worker orchestrator configuration (optional - defaults shown)
+      - WORKER_CHECK_INTERVAL=${WORKER_CHECK_INTERVAL:-10}
+      - MIN_RQ_WORKERS=${MIN_RQ_WORKERS:-6}
+      - WORKER_STARTUP_GRACE_PERIOD=${WORKER_STARTUP_GRACE_PERIOD:-30}
+      - WORKER_SHUTDOWN_TIMEOUT=${WORKER_SHUTDOWN_TIMEOUT:-30}
     depends_on:
       redis:
         condition: service_healthy
@@ -76,6 +117,33 @@ services:
         condition: service_started
     restart: unless-stopped
 
+  # Annotation Cron Scheduler
+  # Runs periodic jobs for AI-powered annotation suggestions:
+  # - Daily: Surface potential errors in transcripts/memories
+  # - Weekly: Fine-tune error detection models using user feedback
+  # Set DEV_MODE=true in .env for 1-minute intervals (testing)
+  annotation-cron:
+    build:
+      context: .
+      dockerfile: Dockerfile
+      target: prod
+    command: ["uv", "run", "python", "-m", "advanced_omi_backend.cron"]
+    container_name: chronicle-annotation-cron
+    env_file:
+      - .env
+    environment:
+      - MONGODB_URI=mongodb://mongo:27017
+      - DEV_MODE=${DEV_MODE:-false}
+      - OPENAI_API_KEY=${OPENAI_API_KEY}
+      - LLM_PROVIDER=${LLM_PROVIDER:-openai}
+      - OLLAMA_BASE_URL=${OLLAMA_BASE_URL}
+    depends_on:
+      mongo:
+        condition: service_healthy
+    restart: unless-stopped
+    profiles:
+      - annotation  # Optional profile - enable with: docker compose --profile annotation up
+
   webui:
     build:
       context: ./webui
@@ -226,3 +294,5 @@ volumes:
     driver: local
   neo4j_logs:
     driver: local
+  tailscale-state:
+    driver: local
diff --git a/backends/advanced/docs/plugin-development-guide.md b/backends/advanced/docs/plugin-development-guide.md
new file mode 100644
index 00000000..17c53b4a
--- /dev/null
+++ b/backends/advanced/docs/plugin-development-guide.md
@@ -0,0 +1,776 @@
+# Chronicle Plugin Development Guide
+
+A comprehensive guide to creating custom plugins for Chronicle.
+
+## Table of Contents
+
+1. [Introduction](#introduction)
+2. [Quick Start](#quick-start)
+3. [Plugin Architecture](#plugin-architecture)
+4. [Event Types](#event-types)
+5. [Creating Your First Plugin](#creating-your-first-plugin)
+6. [Configuration](#configuration)
+7. [Testing Plugins](#testing-plugins)
+8. [Best Practices](#best-practices)
+9. [Examples](#examples)
+10. [Troubleshooting](#troubleshooting)
+
+## Introduction
+
+Chronicle's plugin system allows you to extend functionality by subscribing to events and executing custom logic. Plugins are:
+
+- **Event-driven**: React to transcripts, conversations, or memory processing
+- **Auto-discovered**: Drop plugins into the `plugins/` directory
+- **Configurable**: YAML-based configuration with environment variable support
+- **Isolated**: Each plugin runs independently with proper error handling
+
+### Plugin Types
+
+- **Core Plugins**: Built-in plugins (`homeassistant`, `test_event`)
+- **Community Plugins**: Auto-discovered plugins in `plugins/` directory
+
+## Quick Start
+
+### 1. Generate Plugin Boilerplate
+
+```bash
+cd backends/advanced
+uv run python scripts/create_plugin.py my_awesome_plugin
+```
+
+This creates:
+```
+plugins/my_awesome_plugin/
+├── __init__.py           # Plugin exports
+├── plugin.py             # Main plugin logic
+└── README.md             # Plugin documentation
+```
+
+### 2. Implement Plugin Logic
+
+Edit `plugins/my_awesome_plugin/plugin.py`:
+
+```python
+async def on_conversation_complete(self, context: PluginContext) -> Optional[PluginResult]:
+    """Handle conversation completion."""
+    transcript = context.data.get('transcript', '')
+
+    # Your custom logic here
+    print(f"Processing: {transcript}")
+
+    return PluginResult(success=True, message="Processing complete")
+```
+
+### 3. Configure Plugin
+
+Add to `config/plugins.yml`:
+
+```yaml
+plugins:
+  my_awesome_plugin:
+    enabled: true
+    events:
+      - conversation.complete
+    condition:
+      type: always
+```
+
+### 4. Restart Backend
+
+```bash
+cd backends/advanced
+docker compose restart
+```
+
+Your plugin will be auto-discovered and loaded!
+
+## Plugin Architecture
+
+### Base Plugin Class
+
+All plugins inherit from `BasePlugin`:
+
+```python
+from advanced_omi_backend.plugins.base import BasePlugin, PluginContext, PluginResult
+
+class MyPlugin(BasePlugin):
+    SUPPORTED_ACCESS_LEVELS = ['conversation']  # Which events you support
+
+    async def initialize(self):
+        """Initialize resources (called on app startup)"""
+        pass
+
+    async def cleanup(self):
+        """Clean up resources (called on app shutdown)"""
+        pass
+
+    async def on_conversation_complete(self, context: PluginContext):
+        """Handle conversation.complete events"""
+        pass
+```
+
+### Plugin Context
+
+Context passed to plugin methods:
+
+```python
+@dataclass
+class PluginContext:
+    user_id: str                    # User identifier
+    event: str                      # Event name (e.g., "conversation.complete")
+    data: Dict[str, Any]            # Event-specific data
+    metadata: Dict[str, Any]        # Additional metadata
+```
+
+### Plugin Result
+
+Return value from plugin methods:
+
+```python
+@dataclass
+class PluginResult:
+    success: bool                   # Whether operation succeeded
+    data: Optional[Dict[str, Any]]  # Optional result data
+    message: Optional[str]          # Optional status message
+    should_continue: bool           # Whether to continue normal processing (default: True)
+```
+
+## Event Types
+
+### 1. Transcript Events (`transcript.streaming`)
+
+**When**: Real-time transcript segments arrive from WebSocket
+**Context Data**:
+- `transcript` (str): The transcript text
+- `segment_id` (str): Unique segment identifier
+- `conversation_id` (str): Current conversation ID
+
+**Use Cases**:
+- Wake word detection
+- Real-time command processing
+- Live transcript analysis
+
+**Example**:
+```python
+async def on_transcript(self, context: PluginContext):
+    transcript = context.data.get('transcript', '')
+    if 'urgent' in transcript.lower():
+        await self.send_notification(transcript)
+```
+
+### 2. Conversation Events (`conversation.complete`)
+
+**When**: Conversation processing finishes
+**Context Data**:
+- `conversation` (dict): Full conversation data
+- `transcript` (str): Complete transcript
+- `duration` (float): Conversation duration in seconds
+- `conversation_id` (str): Conversation identifier
+
+**Use Cases**:
+- Email summaries
+- Analytics tracking
+- External integrations
+- Conversation archiving
+
+**Example**:
+```python
+async def on_conversation_complete(self, context: PluginContext):
+    conversation = context.data.get('conversation', {})
+    duration = context.data.get('duration', 0)
+
+    if duration > 300:  # 5 minutes
+        await self.archive_long_conversation(conversation)
+```
+
+### 3. Memory Events (`memory.processed`)
+
+**When**: Memory extraction finishes
+**Context Data**:
+- `memories` (list): Extracted memories
+- `conversation` (dict): Source conversation
+- `memory_count` (int): Number of memories created
+- `conversation_id` (str): Conversation identifier
+
+**Use Cases**:
+- Memory indexing
+- Knowledge graph updates
+- Memory notifications
+- Analytics
+
+**Example**:
+```python
+async def on_memory_processed(self, context: PluginContext):
+    memories = context.data.get('memories', [])
+
+    for memory in memories:
+        await self.index_memory(memory)
+```
+
+## Creating Your First Plugin
+
+### Step 1: Generate Boilerplate
+
+```bash
+uv run python scripts/create_plugin.py todo_extractor
+```
+
+### Step 2: Define Plugin Logic
+
+```python
+"""
+Todo Extractor Plugin - Extracts action items from conversations.
+"""
+import logging
+import re
+from typing import Any, Dict, List, Optional
+
+from ..base import BasePlugin, PluginContext, PluginResult
+
+logger = logging.getLogger(__name__)
+
+
+class TodoExtractorPlugin(BasePlugin):
+    """Extract and save action items from conversations."""
+
+    SUPPORTED_ACCESS_LEVELS = ['conversation']
+
+    def __init__(self, config: Dict[str, Any]):
+        super().__init__(config)
+        self.todo_patterns = [
+            r'I need to (.+)',
+            r'I should (.+)',
+            r'TODO: (.+)',
+            r'reminder to (.+)',
+        ]
+
+    async def initialize(self):
+        if not self.enabled:
+            return
+
+        logger.info("TodoExtractor plugin initialized")
+
+    async def on_conversation_complete(self, context: PluginContext):
+        try:
+            transcript = context.data.get('transcript', '')
+            todos = self._extract_todos(transcript)
+
+            if todos:
+                await self._save_todos(context.user_id, todos)
+
+                return PluginResult(
+                    success=True,
+                    message=f"Extracted {len(todos)} action items",
+                    data={'todos': todos}
+                )
+
+            return PluginResult(success=True, message="No action items found")
+
+        except Exception as e:
+            logger.error(f"Error extracting todos: {e}")
+            return PluginResult(success=False, message=str(e))
+
+    def _extract_todos(self, transcript: str) -> List[str]:
+        """Extract todo items from transcript."""
+        todos = []
+
+        for pattern in self.todo_patterns:
+            matches = re.findall(pattern, transcript, re.IGNORECASE)
+            todos.extend(matches)
+
+        return list(set(todos))  # Remove duplicates
+
+    async def _save_todos(self, user_id: str, todos: List[str]):
+        """Save todos to database or external service."""
+        from advanced_omi_backend.database import get_database
+
+        db = get_database()
+        for todo in todos:
+            await db['todos'].insert_one({
+                'user_id': user_id,
+                'task': todo,
+                'completed': False,
+                'created_at': datetime.utcnow()
+            })
+```
+
+### Step 3: Configure Plugin
+
+`config/plugins.yml`:
+
+```yaml
+plugins:
+  todo_extractor:
+    enabled: true
+    events:
+      - conversation.complete
+    condition:
+      type: always
+```
+
+### Step 4: Test Plugin
+
+1. Restart backend: `docker compose restart`
+2. Create a conversation with phrases like "I need to buy milk"
+3. Check logs: `docker compose logs -f chronicle-backend | grep TodoExtractor`
+4. Verify todos in database
+
+## Configuration
+
+### YAML Configuration
+
+`config/plugins.yml`:
+
+```yaml
+plugins:
+  my_plugin:
+    # Basic Configuration
+    enabled: true                 # Enable/disable plugin
+
+    # Event Subscriptions
+    events:
+      - conversation.complete
+      - memory.processed
+
+    # Execution Conditions
+    condition:
+      type: always                # always, wake_word, regex
+      # wake_words: ["hey assistant"]  # For wake_word type
+      # pattern: "urgent"              # For regex type
+
+    # Custom Configuration
+    api_url: ${MY_API_URL}        # Environment variable
+    timeout: 30
+    max_retries: 3
+```
+
+### Environment Variables
+
+Use `${VAR_NAME}` syntax:
+
+```yaml
+api_key: ${MY_API_KEY}
+base_url: ${BASE_URL:-http://localhost:8000}  # With default
+```
+
+Add to `.env`:
+
+```bash
+MY_API_KEY=your-key-here
+BASE_URL=https://api.example.com
+```
+
+### Condition Types
+
+**Always Execute**:
+```yaml
+condition:
+  type: always
+```
+
+**Wake Word** (transcript events only):
+```yaml
+condition:
+  type: wake_word
+  wake_words:
+    - hey assistant
+    - computer
+```
+
+**Regex Pattern**:
+```yaml
+condition:
+  type: regex
+  pattern: "urgent|important"
+```
+
+## Testing Plugins
+
+### Unit Tests
+
+`tests/test_my_plugin.py`:
+
+```python
+import pytest
+from plugins.my_plugin import MyPlugin
+from plugins.base import PluginContext
+
+class TestMyPlugin:
+    def test_plugin_initialization(self):
+        config = {'enabled': True, 'events': ['conversation.complete']}
+        plugin = MyPlugin(config)
+        assert plugin.enabled is True
+
+    @pytest.mark.asyncio
+    async def test_conversation_processing(self):
+        plugin = MyPlugin({'enabled': True})
+        await plugin.initialize()
+
+        context = PluginContext(
+            user_id='test-user',
+            event='conversation.complete',
+            data={'transcript': 'Test transcript'}
+        )
+
+        result = await plugin.on_conversation_complete(context)
+        assert result.success is True
+```
+
+### Integration Testing
+
+1. **Enable Test Plugin**:
+```yaml
+test_event:
+  enabled: true
+  events:
+    - conversation.complete
+```
+
+2. **Check Logs**:
+```bash
+docker compose logs -f | grep "test_event"
+```
+
+3. **Upload Test Audio**:
+```bash
+curl -X POST http://localhost:8000/api/process-audio-files \
+  -H "Authorization: Bearer $TOKEN" \
+  -F "files=@test.wav"
+```
+
+### Manual Testing Checklist
+
+- [ ] Plugin loads without errors
+- [ ] Configuration validates correctly
+- [ ] Events trigger plugin execution
+- [ ] Plugin logic executes successfully
+- [ ] Errors are handled gracefully
+- [ ] Logs provide useful information
+
+## Best Practices
+
+### 1. Error Handling
+
+Always wrap logic in try-except:
+
+```python
+async def on_conversation_complete(self, context):
+    try:
+        # Your logic
+        result = await self.process(context)
+        return PluginResult(success=True, data=result)
+    except Exception as e:
+        logger.error(f"Error: {e}", exc_info=True)
+        return PluginResult(success=False, message=str(e))
+```
+
+### 2. Logging
+
+Use appropriate log levels:
+
+```python
+logger.debug("Detailed debug information")
+logger.info("Important milestones")
+logger.warning("Non-critical issues")
+logger.error("Errors that need attention")
+```
+
+### 3. Resource Management
+
+Clean up in `cleanup()`:
+
+```python
+async def initialize(self):
+    self.client = ExternalClient()
+    await self.client.connect()
+
+async def cleanup(self):
+    if self.client:
+        await self.client.disconnect()
+```
+
+### 4. Configuration Validation
+
+Validate in `initialize()`:
+
+```python
+async def initialize(self):
+    if not self.config.get('api_key'):
+        raise ValueError("API key is required")
+
+    if self.config.get('timeout', 0) <= 0:
+        raise ValueError("Timeout must be positive")
+```
+
+### 5. Async Best Practices
+
+Use `asyncio.to_thread()` for blocking operations:
+
+```python
+import asyncio
+
+async def my_method(self):
+    # Run blocking operation in thread pool
+    result = await asyncio.to_thread(blocking_function, arg1, arg2)
+    return result
+```
+
+### 6. Database Access
+
+Use the global database handle:
+
+```python
+from advanced_omi_backend.database import get_database
+
+async def save_data(self, data):
+    db = get_database()
+    await db['my_collection'].insert_one(data)
+```
+
+### 7. LLM Access
+
+Use the global LLM client:
+
+```python
+from advanced_omi_backend.llm_client import async_generate
+
+async def generate_summary(self, text):
+    prompt = f"Summarize: {text}"
+    summary = await async_generate(prompt)
+    return summary
+```
+
+## Examples
+
+### Example 1: Slack Notifier
+
+```python
+class SlackNotifierPlugin(BasePlugin):
+    SUPPORTED_ACCESS_LEVELS = ['conversation']
+
+    async def initialize(self):
+        self.webhook_url = self.config.get('slack_webhook_url')
+        if not self.webhook_url:
+            raise ValueError("Slack webhook URL required")
+
+    async def on_conversation_complete(self, context):
+        transcript = context.data.get('transcript', '')
+        duration = context.data.get('duration', 0)
+
+        message = {
+            "text": f"New conversation ({duration:.1f}s)",
+            "blocks": [{
+                "type": "section",
+                "text": {"type": "mrkdwn", "text": f"```{transcript[:500]}```"}
+            }]
+        }
+
+        async with aiohttp.ClientSession() as session:
+            await session.post(self.webhook_url, json=message)
+
+        return PluginResult(success=True, message="Notification sent")
+```
+
+### Example 2: Keyword Alerter
+
+```python
+class KeywordAlerterPlugin(BasePlugin):
+    SUPPORTED_ACCESS_LEVELS = ['transcript']
+
+    async def on_transcript(self, context):
+        transcript = context.data.get('transcript', '')
+        keywords = self.config.get('keywords', [])
+
+        for keyword in keywords:
+            if keyword.lower() in transcript.lower():
+                await self.send_alert(keyword, transcript)
+                return PluginResult(
+                    success=True,
+                    message=f"Alert sent for keyword: {keyword}"
+                )
+
+        return PluginResult(success=True)
+```
+
+### Example 3: Analytics Tracker
+
+```python
+class AnalyticsTrackerPlugin(BasePlugin):
+    SUPPORTED_ACCESS_LEVELS = ['conversation', 'memory']
+
+    async def on_conversation_complete(self, context):
+        duration = context.data.get('duration', 0)
+        word_count = len(context.data.get('transcript', '').split())
+
+        await self.track_event('conversation_complete', {
+            'user_id': context.user_id,
+            'duration': duration,
+            'word_count': word_count,
+        })
+
+        return PluginResult(success=True)
+
+    async def on_memory_processed(self, context):
+        memory_count = context.data.get('memory_count', 0)
+
+        await self.track_event('memory_processed', {
+            'user_id': context.user_id,
+            'memory_count': memory_count,
+        })
+
+        return PluginResult(success=True)
+```
+
+## Troubleshooting
+
+### Plugin Not Loading
+
+**Check logs**:
+```bash
+docker compose logs chronicle-backend | grep "plugin"
+```
+
+**Common issues**:
+- Plugin directory name doesn't match class name convention
+- Missing `__init__.py` or incorrect exports
+- Syntax errors in plugin.py
+- Not inheriting from `BasePlugin`
+
+**Solution**:
+1. Verify directory structure matches: `plugins/my_plugin/`
+2. Class name should be: `MyPluginPlugin`
+3. Export in `__init__.py`: `from .plugin import MyPluginPlugin`
+
+### Plugin Enabled But Not Executing
+
+**Check**:
+- Plugin enabled in `plugins.yml`
+- Correct events subscribed
+- Condition matches (wake_word, regex, etc.)
+
+**Debug**:
+```python
+async def on_conversation_complete(self, context):
+    logger.info(f"Plugin executed! Context: {context}")
+    # Your logic
+```
+
+### Configuration Errors
+
+**Error**: `Environment variable not found`
+
+**Solution**:
+- Add variable to `.env` file
+- Use default values: `${VAR:-default}`
+- Check variable name spelling
+
+### Import Errors
+
+**Error**: `ModuleNotFoundError`
+
+**Solution**:
+- Restart backend after adding dependencies
+- Verify imports are from correct modules
+- Check relative imports use `..base` for base classes
+
+### Database Connection Issues
+
+**Error**: `Database connection failed`
+
+**Solution**:
+```python
+from advanced_omi_backend.database import get_database
+
+async def my_method(self):
+    db = get_database()  # Global database handle
+    # Use db...
+```
+
+## Advanced Topics
+
+### Custom Conditions
+
+Implement custom condition checking:
+
+```python
+async def on_conversation_complete(self, context):
+    # Custom condition check
+    if not self._should_execute(context):
+        return PluginResult(success=True, message="Skipped")
+
+    # Your logic
+    ...
+
+def _should_execute(self, context):
+    # Custom logic
+    duration = context.data.get('duration', 0)
+    return duration > 60  # Only process long conversations
+```
+
+### Plugin Dependencies
+
+Share data between plugins using context metadata:
+
+```python
+# Plugin A
+async def on_conversation_complete(self, context):
+    context.metadata['extracted_keywords'] = ['important', 'urgent']
+    return PluginResult(success=True)
+
+# Plugin B (executes after Plugin A)
+async def on_conversation_complete(self, context):
+    keywords = context.metadata.get('extracted_keywords', [])
+    # Use keywords...
+```
+
+### External Service Integration
+
+```python
+import aiohttp
+
+class ExternalServicePlugin(BasePlugin):
+    async def initialize(self):
+        self.session = aiohttp.ClientSession()
+        self.api_url = self.config.get('api_url')
+        self.api_key = self.config.get('api_key')
+
+    async def cleanup(self):
+        await self.session.close()
+
+    async def on_conversation_complete(self, context):
+        async with self.session.post(
+            self.api_url,
+            headers={'Authorization': f'Bearer {self.api_key}'},
+            json={'transcript': context.data.get('transcript')}
+        ) as response:
+            result = await response.json()
+            return PluginResult(success=True, data=result)
+```
+
+## Resources
+
+- **Base Plugin Class**: `backends/advanced/src/advanced_omi_backend/plugins/base.py`
+- **Example Plugins**:
+  - Email Summarizer: `plugins/email_summarizer/`
+  - Home Assistant: `plugins/homeassistant/`
+  - Test Event: `plugins/test_event/`
+- **Plugin Generator**: `scripts/create_plugin.py`
+- **Configuration**: `config/plugins.yml.template`
+
+## Contributing Plugins
+
+Want to share your plugin with the community?
+
+1. Create a well-documented plugin
+2. Add comprehensive README
+3. Include configuration examples
+4. Test thoroughly
+5. Submit PR to Chronicle repository
+
+## Support
+
+- **GitHub Issues**: [chronicle-ai/chronicle/issues](https://github.com/chronicle-ai/chronicle/issues)
+- **Discussions**: [chronicle-ai/chronicle/discussions](https://github.com/chronicle-ai/chronicle/discussions)
+- **Documentation**: [Chronicle Docs](https://github.com/chronicle-ai/chronicle)
+
+Happy plugin development! 🚀
diff --git a/backends/advanced/init.py b/backends/advanced/init.py
index dddbfdcb..7aa4f6aa 100644
--- a/backends/advanced/init.py
+++ b/backends/advanced/init.py
@@ -5,7 +5,6 @@
 """
 
 import argparse
-import getpass
 import os
 import platform
 import secrets
@@ -22,9 +21,15 @@
 from rich.prompt import Confirm, Prompt
 from rich.text import Text
 
-# Add repo root to path for config_manager import
+# Add repo root to path for imports
 sys.path.insert(0, str(Path(__file__).resolve().parent.parent.parent))
 from config_manager import ConfigManager
+from setup_utils import (
+    prompt_password as util_prompt_password,
+    prompt_with_existing_masked,
+    mask_value,
+    read_env_value
+)
 
 
 class ChronicleSetup:
@@ -49,6 +54,9 @@ def __init__(self, args=None):
             self.console.print("[red][ERROR][/red] Run wizard.py from project root to create config.yml")
             sys.exit(1)
 
+        # Ensure plugins.yml exists (copy from template if missing)
+        self._ensure_plugins_yml_exists()
+
     def print_header(self, title: str):
         """Print a colorful header"""
         self.console.print()
@@ -76,19 +84,8 @@ def prompt_value(self, prompt: str, default: str = "") -> str:
             return default
 
     def prompt_password(self, prompt: str) -> str:
-        """Prompt for password (hidden input)"""
-        while True:
-            try:
-                password = getpass.getpass(f"{prompt}: ")
-                if len(password) >= 8:
-                    return password
-                self.console.print("[yellow][WARNING][/yellow] Password must be at least 8 characters")
-            except (EOFError, KeyboardInterrupt):
-                # For non-interactive environments, generate a secure password
-                self.console.print("[yellow][WARNING][/yellow] Non-interactive environment detected")
-                password = f"admin-{secrets.token_hex(8)}"
-                self.console.print(f"Generated secure password: {password}")
-                return password
+        """Prompt for password (delegates to shared utility)"""
+        return util_prompt_password(prompt, min_length=8, allow_generated=True)
 
     def prompt_choice(self, prompt: str, choices: Dict[str, str], default: str = "1") -> str:
         """Prompt for a choice from options"""
@@ -107,6 +104,26 @@ def prompt_choice(self, prompt: str, choices: Dict[str, str], default: str = "1"
                 self.console.print(f"Using default choice: {default}")
                 return default
 
+    def _ensure_plugins_yml_exists(self):
+        """Ensure plugins.yml exists by copying from template if missing."""
+        plugins_yml = Path("../../config/plugins.yml")
+        plugins_template = Path("../../config/plugins.yml.template")
+
+        if not plugins_yml.exists():
+            if plugins_template.exists():
+                self.console.print("[blue][INFO][/blue] plugins.yml not found, creating from template...")
+                shutil.copy2(plugins_template, plugins_yml)
+                self.console.print(f"[green]✅[/green] Created {plugins_yml} from template")
+                self.console.print("[yellow][NOTE][/yellow] Edit config/plugins.yml to configure plugins")
+                self.console.print("[yellow][NOTE][/yellow] Set HA_TOKEN in .env for Home Assistant integration")
+            else:
+                raise RuntimeError(
+                    f"Template file not found: {plugins_template}\n"
+                    f"The repository structure is incomplete. Please ensure config/plugins.yml.template exists."
+                )
+        else:
+            self.console.print(f"[blue][INFO][/blue] Found existing {plugins_yml}")
+
     def backup_existing_env(self):
         """Backup existing .env file"""
         env_path = Path(".env")
@@ -117,24 +134,38 @@ def backup_existing_env(self):
             self.console.print(f"[blue][INFO][/blue] Backed up existing .env file to {backup_path}")
 
     def read_existing_env_value(self, key: str) -> str:
-        """Read a value from existing .env file"""
-        env_path = Path(".env")
-        if not env_path.exists():
-            return None
-
-        value = get_key(str(env_path), key)
-        # get_key returns None if key doesn't exist or value is empty
-        return value if value else None
+        """Read a value from existing .env file (delegates to shared utility)"""
+        return read_env_value(".env", key)
 
     def mask_api_key(self, key: str, show_chars: int = 5) -> str:
-        """Mask API key showing only first and last few characters"""
-        if not key or len(key) <= show_chars * 2:
-            return key
-
-        # Remove quotes if present
-        key_clean = key.strip("'\"")
-
-        return f"{key_clean[:show_chars]}{'*' * min(15, len(key_clean) - show_chars * 2)}{key_clean[-show_chars:]}"
+        """Mask API key (delegates to shared utility)"""
+        return mask_value(key, show_chars)
+
+    def prompt_with_existing_masked(self, prompt_text: str, env_key: str, placeholders: list,
+                                     is_password: bool = False, default: str = "") -> str:
+        """
+        Prompt for a value, showing masked existing value from .env if present.
+        Delegates to shared utility from setup_utils.
+
+        Args:
+            prompt_text: The prompt to display
+            env_key: The .env key to check for existing value
+            placeholders: List of placeholder values to treat as "not set"
+            is_password: Whether to mask the value (for passwords/tokens)
+            default: Default value if no existing value
+
+        Returns:
+            User input value, existing value if reused, or default
+        """
+        # Use shared utility with auto-read from .env
+        return prompt_with_existing_masked(
+            prompt_text=prompt_text,
+            env_file_path=".env",
+            env_key=env_key,
+            placeholders=placeholders,
+            is_password=is_password,
+            default=default
+        )
 
 
     def setup_authentication(self):
@@ -192,15 +223,14 @@ def setup_transcription(self):
             self.console.print("[blue][INFO][/blue] Deepgram selected")
             self.console.print("Get your API key from: https://console.deepgram.com/")
 
-            # Check for existing API key
-            existing_key = self.read_existing_env_value("DEEPGRAM_API_KEY")
-            if existing_key and existing_key not in ['your_deepgram_api_key_here', 'your-deepgram-key-here']:
-                masked_key = self.mask_api_key(existing_key)
-                prompt_text = f"Deepgram API key ({masked_key}) [press Enter to reuse, or enter new]"
-                api_key_input = self.prompt_value(prompt_text, "")
-                api_key = api_key_input if api_key_input else existing_key
-            else:
-                api_key = self.prompt_value("Deepgram API key (leave empty to skip)", "")
+            # Use the new masked prompt function
+            api_key = self.prompt_with_existing_masked(
+                prompt_text="Deepgram API key (leave empty to skip)",
+                env_key="DEEPGRAM_API_KEY",
+                placeholders=['your_deepgram_api_key_here', 'your-deepgram-key-here'],
+                is_password=True,
+                default=""
+            )
 
             if api_key:
                 # Write API key to .env
@@ -250,15 +280,14 @@ def setup_llm(self):
             self.console.print("[blue][INFO][/blue] OpenAI selected")
             self.console.print("Get your API key from: https://platform.openai.com/api-keys")
 
-            # Check for existing API key
-            existing_key = self.read_existing_env_value("OPENAI_API_KEY")
-            if existing_key and existing_key not in ['your_openai_api_key_here', 'your-openai-key-here']:
-                masked_key = self.mask_api_key(existing_key)
-                prompt_text = f"OpenAI API key ({masked_key}) [press Enter to reuse, or enter new]"
-                api_key_input = self.prompt_value(prompt_text, "")
-                api_key = api_key_input if api_key_input else existing_key
-            else:
-                api_key = self.prompt_value("OpenAI API key (leave empty to skip)", "")
+            # Use the new masked prompt function
+            api_key = self.prompt_with_existing_masked(
+                prompt_text="OpenAI API key (leave empty to skip)",
+                env_key="OPENAI_API_KEY",
+                placeholders=['your_openai_api_key_here', 'your-openai-key-here'],
+                is_password=True,
+                default=""
+            )
 
             if api_key:
                 self.config["OPENAI_API_KEY"] = api_key
@@ -370,6 +399,11 @@ def setup_optional_services(self):
             self.config["PARAKEET_ASR_URL"] = self.args.parakeet_asr_url
             self.console.print(f"[green][SUCCESS][/green] Parakeet ASR configured via args: {self.args.parakeet_asr_url}")
 
+        # Check if Tailscale auth key provided via args
+        if hasattr(self.args, 'ts_authkey') and self.args.ts_authkey:
+            self.config["TS_AUTHKEY"] = self.args.ts_authkey
+            self.console.print(f"[green][SUCCESS][/green] Tailscale auth key configured (Docker integration enabled)")
+
     def setup_obsidian(self):
         """Configure Obsidian/Neo4j integration"""
         # Check if enabled via command line
@@ -413,6 +447,16 @@ def setup_obsidian(self):
 
             self.console.print("[green][SUCCESS][/green] Obsidian/Neo4j configured")
             self.console.print("[blue][INFO][/blue] Neo4j will start automatically with --profile obsidian")
+        else:
+            # Explicitly disable Obsidian in config.yml when not enabled
+            self.config_manager.update_memory_config({
+                "obsidian": {
+                    "enabled": False,
+                    "neo4j_host": "neo4j-mem0",
+                    "timeout": 30
+                }
+            })
+            self.console.print("[blue][INFO][/blue] Obsidian/Neo4j integration disabled")
 
     def setup_network(self):
         """Configure network settings"""
@@ -443,14 +487,14 @@ def setup_https(self):
                 self.console.print("[blue][INFO][/blue] For distributed deployments, use your Tailscale IP (e.g., 100.64.1.2)")
                 self.console.print("[blue][INFO][/blue] For local-only access, use 'localhost'")
 
-                # Check for existing SERVER_IP
-                existing_ip = self.read_existing_env_value("SERVER_IP")
-                if existing_ip and existing_ip not in ['localhost', 'your-server-ip-here']:
-                    prompt_text = f"Server IP/Domain for SSL certificate ({existing_ip}) [press Enter to reuse, or enter new]"
-                    server_ip_input = self.prompt_value(prompt_text, "")
-                    server_ip = server_ip_input if server_ip_input else existing_ip
-                else:
-                    server_ip = self.prompt_value("Server IP/Domain for SSL certificate (Tailscale IP or localhost)", "localhost")
+                # Use the new masked prompt function (not masked for IP, but shows existing)
+                server_ip = self.prompt_with_existing_masked(
+                    prompt_text="Server IP/Domain for SSL certificate (Tailscale IP or localhost)",
+                    env_key="SERVER_IP",
+                    placeholders=['localhost', 'your-server-ip-here'],
+                    is_password=False,
+                    default="localhost"
+                )
         
         if enable_https:
             
@@ -707,6 +751,8 @@ def main():
                        help="Enable Obsidian/Neo4j integration (default: prompt user)")
     parser.add_argument("--neo4j-password",
                        help="Neo4j password (default: prompt user)")
+    parser.add_argument("--ts-authkey",
+                       help="Tailscale auth key for Docker integration (default: prompt user)")
 
     args = parser.parse_args()
     
diff --git a/backends/advanced/pyproject.toml b/backends/advanced/pyproject.toml
index e7bcb50a..c5d17b00 100644
--- a/backends/advanced/pyproject.toml
+++ b/backends/advanced/pyproject.toml
@@ -21,6 +21,7 @@ dependencies = [
     "httpx>=0.28.0,<1.0.0",
     "fastapi-users[beanie]>=14.0.1",
     "PyYAML>=6.0.1",
+    "omegaconf>=2.3.0",
     "langfuse>=3.3.0",
     "spacy>=3.8.2",
     "en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl",
@@ -114,4 +115,5 @@ test = [
     "requests-mock>=1.12.1",
     "pytest-json-report>=1.5.0",
     "pytest-html>=4.0.0",
+    "aiosqlite>=0.20.0",  # For test plugin event storage
 ]
diff --git a/backends/advanced/run-test.sh b/backends/advanced/run-test.sh
index 01204be6..61fd7d55 100755
--- a/backends/advanced/run-test.sh
+++ b/backends/advanced/run-test.sh
@@ -91,6 +91,29 @@ if [ -n "$_CONFIG_FILE_OVERRIDE" ]; then
     print_info "Using command-line override: CONFIG_FILE=$CONFIG_FILE"
 fi
 
+# Load HF_TOKEN from speaker-recognition/.env (proper location for this credential)
+SPEAKER_ENV="../../extras/speaker-recognition/.env"
+if [ -f "$SPEAKER_ENV" ] && [ -z "$HF_TOKEN" ]; then
+    print_info "Loading HF_TOKEN from speaker-recognition service..."
+    set -a
+    source "$SPEAKER_ENV"
+    set +a
+fi
+
+# Display HF_TOKEN status with masking
+if [ -n "$HF_TOKEN" ]; then
+    if [ ${#HF_TOKEN} -gt 15 ]; then
+        MASKED_TOKEN="${HF_TOKEN:0:5}***************${HF_TOKEN: -5}"
+    else
+        MASKED_TOKEN="***************"
+    fi
+    print_info "HF_TOKEN configured: $MASKED_TOKEN"
+    export HF_TOKEN
+else
+    print_warning "HF_TOKEN not found - speaker recognition tests may fail"
+    print_info "Configure via wizard: uv run --with-requirements ../../setup-requirements.txt python ../../wizard.py"
+fi
+
 # Set default CONFIG_FILE if not provided
 # This allows testing with different provider combinations
 # Usage: CONFIG_FILE=../../tests/configs/parakeet-ollama.yml ./run-test.sh
@@ -166,6 +189,18 @@ if [ ! -f "diarization_config.json" ] && [ -f "diarization_config.json.template"
     print_success "diarization_config.json created"
 fi
 
+# Ensure plugins.yml exists (required for Docker volume mount)
+if [ ! -f "../../config/plugins.yml" ]; then
+    if [ -f "../../config/plugins.yml.template" ]; then
+        print_info "Creating config/plugins.yml from template..."
+        cp ../../config/plugins.yml.template ../../config/plugins.yml
+        print_success "config/plugins.yml created"
+    else
+        print_error "config/plugins.yml.template not found - repository structure incomplete"
+        exit 1
+    fi
+fi
+
 # Note: Robot Framework dependencies are managed via tests/test-requirements.txt
 # The integration tests use Docker containers for service dependencies
 
@@ -176,10 +211,16 @@ print_info "Using environment variables from .env file for test configuration"
 
 # Clean test environment
 print_info "Cleaning test environment..."
-sudo rm -rf ./test_audio_chunks/ ./test_data/ ./test_debug_dir/ ./mongo_data_test/ ./qdrant_data_test/ ./test_neo4j/ || true
+rm -rf ./test_audio_chunks/ ./test_data/ ./test_debug_dir/ ./mongo_data_test/ ./qdrant_data_test/ ./test_neo4j/ 2>/dev/null || true
+
+# If cleanup fails due to permissions, try with docker
+if [ -d "./data/test_audio_chunks/" ] || [ -d "./data/test_data/" ] || [ -d "./data/test_debug_dir/" ]; then
+    print_warning "Permission denied, using docker to clean test directories..."
+    docker run --rm -v "$(pwd)/data:/data" alpine sh -c 'rm -rf /data/test_*' 2>/dev/null || true
+fi
 
-# Use unique project name to avoid conflicts with development environment
-export COMPOSE_PROJECT_NAME="advanced-backend-test"
+# Note: Project name 'backend-test' is set in docker-compose-test.yml
+# No need to export COMPOSE_PROJECT_NAME - it's handled by the compose file
 
 # Stop any existing test containers
 print_info "Stopping existing test containers..."
@@ -211,8 +252,9 @@ export TEST_MODE=dev
 
 # Run the Robot Framework integration tests with extended timeout (mem0 needs time for comprehensive extraction)
 # IMPORTANT: Robot tests must be run from the repository root where backends/ and tests/ are siblings
+# Run full test suite from tests/integration/ directory (includes all test files)
 print_info "Starting Robot Framework integration tests (timeout: 15 minutes)..."
-if (cd ../.. && timeout 900 robot --outputdir test-results --loglevel INFO tests/integration/integration_test.robot); then
+if (cd ../.. && timeout 900 uv run --with-requirements tests/test-requirements.txt robot --outputdir test-results --loglevel INFO tests/integration/); then
     print_success "Integration tests completed successfully!"
 else
     TEST_EXIT_CODE=$?
diff --git a/backends/advanced/scripts/create_plugin.py b/backends/advanced/scripts/create_plugin.py
new file mode 100755
index 00000000..a38a3570
--- /dev/null
+++ b/backends/advanced/scripts/create_plugin.py
@@ -0,0 +1,437 @@
+#!/usr/bin/env python3
+"""
+Plugin Generator Script for Chronicle.
+
+Creates boilerplate plugin structure with templates and examples.
+
+Usage:
+    uv run python scripts/create_plugin.py my_awesome_plugin
+"""
+import argparse
+import os
+import shutil
+import sys
+from pathlib import Path
+
+
+def snake_to_pascal(snake_str: str) -> str:
+    """Convert snake_case to PascalCase."""
+    return ''.join(word.capitalize() for word in snake_str.split('_'))
+
+
+def create_plugin(plugin_name: str, force: bool = False):
+    """
+    Create a new plugin with boilerplate structure.
+
+    Args:
+        plugin_name: Plugin name in snake_case (e.g., my_awesome_plugin)
+        force: Overwrite existing plugin if True
+    """
+    # Validate plugin name
+    if not plugin_name.replace('_', '').isalnum():
+        print(f"❌ Error: Plugin name must be alphanumeric with underscores")
+        print(f"   Got: {plugin_name}")
+        print(f"   Example: my_awesome_plugin")
+        sys.exit(1)
+
+    # Convert to class name
+    class_name = snake_to_pascal(plugin_name) + 'Plugin'
+
+    # Get plugins directory
+    script_dir = Path(__file__).parent
+    backend_dir = script_dir.parent
+    plugins_dir = backend_dir / 'src' / 'advanced_omi_backend' / 'plugins'
+    plugin_dir = plugins_dir / plugin_name
+
+    # Check if plugin already exists
+    if plugin_dir.exists():
+        if not force:
+            print(f"❌ Error: Plugin '{plugin_name}' already exists at {plugin_dir}")
+            print(f"   Use --force to overwrite")
+            sys.exit(1)
+        else:
+            # Remove existing directory when using --force
+            print(f"🗑️  Removing existing plugin directory: {plugin_dir}")
+            shutil.rmtree(plugin_dir)
+
+    # Create plugin directory
+    print(f"📁 Creating plugin directory: {plugin_dir}")
+    plugin_dir.mkdir(parents=True, exist_ok=True)
+
+    # Create __init__.py
+    init_content = f'''"""
+{class_name} for Chronicle.
+
+[Brief description of what your plugin does]
+"""
+
+from .plugin import {class_name}
+
+__all__ = ['{class_name}']
+'''
+
+    init_file = plugin_dir / '__init__.py'
+    print(f"📝 Creating {init_file}")
+    init_file.write_text(init_content)
+
+    # Create plugin.py with template
+    plugin_content = f'''"""
+{class_name} implementation.
+
+This plugin [describe what it does].
+"""
+import logging
+from typing import Any, Dict, List, Optional
+
+from ..base import BasePlugin, PluginContext, PluginResult
+
+logger = logging.getLogger(__name__)
+
+
+class {class_name}(BasePlugin):
+    """
+    [Plugin description]
+
+    Subscribes to: [list events you want to subscribe to]
+    - transcript.streaming: Real-time transcript segments
+    - conversation.complete: When conversation finishes
+    - memory.processed: After memory extraction
+
+    Configuration (config/plugins.yml):
+        {plugin_name}:
+            enabled: true
+            events:
+              - conversation.complete  # Change to your event
+            condition:
+              type: always  # or wake_word, regex, etc.
+            # Your custom config here:
+            my_setting: ${{MY_ENV_VAR}}
+    """
+
+    # Declare which access levels this plugin supports
+    # Options: 'transcript', 'conversation', 'memory'
+    SUPPORTED_ACCESS_LEVELS: List[str] = ['conversation']
+
+    def __init__(self, config: Dict[str, Any]):
+        """
+        Initialize plugin with configuration.
+
+        Args:
+            config: Plugin configuration from config/plugins.yml
+        """
+        super().__init__(config)
+
+        # Load your custom configuration
+        self.my_setting = config.get('my_setting', 'default_value')
+
+        logger.info(f"{class_name} configuration loaded")
+
+    async def initialize(self):
+        """
+        Initialize plugin resources.
+
+        Called during application startup.
+        Use this to:
+        - Connect to external services
+        - Initialize clients
+        - Validate configuration
+        - Set up resources
+
+        Raises:
+            Exception: If initialization fails
+        """
+        if not self.enabled:
+            logger.info(f"{class_name} is disabled, skipping initialization")
+            return
+
+        logger.info(f"Initializing {class_name}...")
+
+        # TODO: Add your initialization code here
+        # Example:
+        # self.client = SomeClient(self.my_setting)
+        # await self.client.connect()
+
+        logger.info(f"✅ {class_name} initialized successfully")
+
+    async def cleanup(self):
+        """
+        Clean up plugin resources.
+
+        Called during application shutdown.
+        Use this to:
+        - Close connections
+        - Save state
+        - Release resources
+        """
+        logger.info(f"{class_name} cleanup complete")
+
+    # Implement the methods for events you subscribed to:
+
+    async def on_transcript(self, context: PluginContext) -> Optional[PluginResult]:
+        """
+        Handle transcript.streaming events.
+
+        Context data contains:
+            - transcript: str - The transcript text
+            - segment_id: str - Unique segment identifier
+            - conversation_id: str - Current conversation ID
+
+        For wake_word conditions, router adds:
+            - command: str - Command with wake word stripped
+            - original_transcript: str - Full transcript
+
+        Args:
+            context: Plugin context with transcript data
+
+        Returns:
+            PluginResult with success status and optional message
+        """
+        # TODO: Implement if you subscribed to transcript.streaming
+        pass
+
+    async def on_conversation_complete(self, context: PluginContext) -> Optional[PluginResult]:
+        """
+        Handle conversation.complete events.
+
+        Context data contains:
+            - conversation: dict - Full conversation data
+            - transcript: str - Complete transcript
+            - duration: float - Conversation duration
+            - conversation_id: str - Conversation identifier
+
+        Args:
+            context: Plugin context with conversation data
+
+        Returns:
+            PluginResult with success status and optional message
+        """
+        try:
+            logger.info(f"Processing conversation complete event for user: {{context.user_id}}")
+
+            # Extract data from context
+            conversation = context.data.get('conversation', {{}})
+            transcript = context.data.get('transcript', '')
+            duration = context.data.get('duration', 0)
+            conversation_id = context.data.get('conversation_id', 'unknown')
+
+            # TODO: Add your plugin logic here
+            # Example:
+            # - Process the transcript
+            # - Call external APIs
+            # - Store data
+            # - Trigger actions
+
+            logger.info(f"Processed conversation {{conversation_id}}")
+
+            return PluginResult(
+                success=True,
+                message="Processing complete",
+                data={{'conversation_id': conversation_id}}
+            )
+
+        except Exception as e:
+            logger.error(f"Error in {class_name}: {{e}}", exc_info=True)
+            return PluginResult(
+                success=False,
+                message=f"Error: {{str(e)}}"
+            )
+
+    async def on_memory_processed(self, context: PluginContext) -> Optional[PluginResult]:
+        """
+        Handle memory.processed events.
+
+        Context data contains:
+            - memories: list - Extracted memories
+            - conversation: dict - Source conversation
+            - memory_count: int - Number of memories created
+            - conversation_id: str - Conversation identifier
+
+        Args:
+            context: Plugin context with memory data
+
+        Returns:
+            PluginResult with success status and optional message
+        """
+        # TODO: Implement if you subscribed to memory.processed
+        pass
+
+    # Add your custom helper methods here:
+
+    async def _my_helper_method(self, data: Any) -> Any:
+        """
+        Example helper method.
+
+        Args:
+            data: Input data
+
+        Returns:
+            Processed data
+        """
+        # TODO: Implement your helper logic
+        pass
+'''
+
+    plugin_file = plugin_dir / 'plugin.py'
+    print(f"📝 Creating {plugin_file}")
+    plugin_file.write_text(plugin_content)
+
+    # Create README.md
+    readme_content = f'''# {class_name}
+
+[Brief description of what your plugin does]
+
+## Features
+
+- Feature 1
+- Feature 2
+- Feature 3
+
+## Configuration
+
+### Step 1: Environment Variables
+
+Add to `backends/advanced/.env`:
+
+```bash
+# {class_name} Configuration
+MY_ENV_VAR=your-value-here
+```
+
+### Step 2: Plugin Configuration
+
+Add to `config/plugins.yml`:
+
+```yaml
+plugins:
+  {plugin_name}:
+    enabled: true
+    events:
+      - conversation.complete  # Change to your event
+    condition:
+      type: always
+
+    # Your custom configuration
+    my_setting: ${{MY_ENV_VAR}}
+```
+
+### Step 3: Restart Backend
+
+```bash
+cd backends/advanced
+docker compose restart
+```
+
+## How It Works
+
+1. [Step 1 description]
+2. [Step 2 description]
+3. [Step 3 description]
+
+## Configuration Options
+
+| Option | Type | Default | Description |
+|--------|------|---------|-------------|
+| `my_setting` | string | `default` | Description of setting |
+
+## Testing
+
+```bash
+# Add testing instructions here
+```
+
+## Troubleshooting
+
+### Issue 1
+
+Solution 1
+
+### Issue 2
+
+Solution 2
+
+## Development
+
+### File Structure
+
+```
+plugins/{plugin_name}/
+├── __init__.py           # Plugin exports
+├── plugin.py             # Main plugin logic
+└── README.md             # This file
+```
+
+## License
+
+MIT License - see project LICENSE file for details.
+'''
+
+    readme_file = plugin_dir / 'README.md'
+    print(f"📝 Creating {readme_file}")
+    readme_file.write_text(readme_content)
+
+    # Print success message and next steps
+    print(f"\n✅ Plugin '{plugin_name}' created successfully!\n")
+    print(f"📁 Location: {plugin_dir}\n")
+    print(f"📋 Next steps:")
+    print(f"  1. Edit {plugin_file}")
+    print(f"     - Implement your plugin logic")
+    print(f"     - Choose which events to subscribe to")
+    print(f"     - Add your configuration options")
+    print(f"")
+    print(f"  2. Update config/plugins.yml:")
+    print(f"     ```yaml")
+    print(f"     plugins:")
+    print(f"       {plugin_name}:")
+    print(f"         enabled: true")
+    print(f"         events:")
+    print(f"           - conversation.complete")
+    print(f"         condition:")
+    print(f"           type: always")
+    print(f"     ```")
+    print(f"")
+    print(f"  3. Add environment variables to .env (if needed)")
+    print(f"")
+    print(f"  4. Restart backend:")
+    print(f"     cd backends/advanced && docker compose restart")
+    print(f"")
+    print(f"📖 Resources:")
+    print(f"  - Plugin development guide: docs/plugin-development-guide.md")
+    print(f"  - Example plugin: plugins/email_summarizer/")
+    print(f"  - Base plugin class: plugins/base.py")
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description='Create a new Chronicle plugin with boilerplate structure',
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog='''
+Examples:
+  uv run python scripts/create_plugin.py my_awesome_plugin
+  uv run python scripts/create_plugin.py slack_notifier
+  uv run python scripts/create_plugin.py todo_extractor --force
+        '''
+    )
+    parser.add_argument(
+        'plugin_name',
+        help='Plugin name in snake_case (e.g., my_awesome_plugin)'
+    )
+    parser.add_argument(
+        '--force', '-f',
+        action='store_true',
+        help='Overwrite existing plugin if it exists'
+    )
+
+    args = parser.parse_args()
+
+    try:
+        create_plugin(args.plugin_name, force=args.force)
+    except KeyboardInterrupt:
+        print("\n\n❌ Plugin creation cancelled")
+        sys.exit(1)
+    except Exception as e:
+        print(f"\n❌ Error creating plugin: {e}")
+        sys.exit(1)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/backends/advanced/scripts/laptop_client.py b/backends/advanced/scripts/laptop_client.py
index 385a4a1b..a0047f3b 100644
--- a/backends/advanced/scripts/laptop_client.py
+++ b/backends/advanced/scripts/laptop_client.py
@@ -15,7 +15,7 @@
 # Default WebSocket settings
 DEFAULT_HOST = "localhost"
 DEFAULT_PORT = 8000
-DEFAULT_ENDPOINT = "/ws_pcm"
+DEFAULT_ENDPOINT = "/ws?codec=pcm"
 
 # Audio format will be determined from the InputMicStream instance
 
diff --git a/backends/advanced/src/advanced_omi_backend/app_config.py b/backends/advanced/src/advanced_omi_backend/app_config.py
index 1e24fb54..c87398f3 100644
--- a/backends/advanced/src/advanced_omi_backend/app_config.py
+++ b/backends/advanced/src/advanced_omi_backend/app_config.py
@@ -29,8 +29,7 @@ class AppConfig:
     def __init__(self):
         # MongoDB Configuration
         self.mongodb_uri = os.getenv("MONGODB_URI", "mongodb://mongo:27017")
-        # default to legacy value to avoid breaking peoples .env
-        self.mongodb_database = os.getenv("MONGODB_DATABASE", "friend-lite")
+        self.mongodb_database = os.getenv("MONGODB_DATABASE", "chronicle")
         self.mongo_client = AsyncIOMotorClient(self.mongodb_uri)
         self.db = self.mongo_client.get_default_database(self.mongodb_database)
         self.users_col = self.db["users"]
@@ -47,11 +46,6 @@ def __init__(self):
             os.getenv("NEW_CONVERSATION_TIMEOUT_MINUTES", "1.5")
         )
 
-        # Audio cropping configuration
-        self.audio_cropping_enabled = os.getenv("AUDIO_CROPPING_ENABLED", "true").lower() == "true"
-        self.min_speech_segment_duration = float(os.getenv("MIN_SPEECH_SEGMENT_DURATION", "1.0"))
-        self.cropping_context_padding = float(os.getenv("CROPPING_CONTEXT_PADDING", "0.1"))
-
         # Transcription Configuration (registry-based)
         self.transcription_provider = get_transcription_provider(None)
         if self.transcription_provider:
diff --git a/backends/advanced/src/advanced_omi_backend/app_factory.py b/backends/advanced/src/advanced_omi_backend/app_factory.py
index 7ccda184..4458ed9e 100644
--- a/backends/advanced/src/advanced_omi_backend/app_factory.py
+++ b/backends/advanced/src/advanced_omi_backend/app_factory.py
@@ -42,6 +42,52 @@
 application_logger = logging.getLogger("audio_processing")
 
 
+async def initialize_openmemory_user() -> None:
+    """Initialize and register OpenMemory user if using OpenMemory MCP provider.
+
+    This function:
+    - Checks if OpenMemory MCP is configured as the memory provider
+    - Registers the configured user with OpenMemory server
+    - Creates a test memory and deletes it to trigger user creation
+    - Logs success or warning if OpenMemory is not reachable
+    """
+    from advanced_omi_backend.services.memory.config import build_memory_config_from_env, MemoryProvider
+
+    memory_provider_config = build_memory_config_from_env()
+
+    if memory_provider_config.memory_provider != MemoryProvider.OPENMEMORY_MCP:
+        return
+
+    try:
+        from advanced_omi_backend.services.memory.providers.mcp_client import MCPClient
+
+        # Get configured user_id and server_url
+        openmemory_config = memory_provider_config.openmemory_config
+        user_id = openmemory_config.get("user_id", "openmemory") if openmemory_config else "openmemory"
+        server_url = openmemory_config.get("server_url", "http://host.docker.internal:8765") if openmemory_config else "http://host.docker.internal:8765"
+        client_name = openmemory_config.get("client_name", "chronicle") if openmemory_config else "chronicle"
+
+        application_logger.info(f"Registering OpenMemory user: {user_id} at {server_url}")
+
+        # Make a lightweight registration call (create and delete dummy memory)
+        async with MCPClient(server_url=server_url, client_name=client_name, user_id=user_id) as client:
+            # Test connection first
+            is_connected = await client.test_connection()
+            if is_connected:
+                # Create and immediately delete a dummy memory to trigger user creation
+                memory_ids = await client.add_memories("Chronicle initialization - user registration test")
+                if memory_ids:
+                    # Delete the test memory
+                    await client.delete_memory(memory_ids[0])
+                application_logger.info(f"✅ Registered OpenMemory user: {user_id}")
+            else:
+                application_logger.warning(f"⚠️  OpenMemory MCP not reachable at {server_url}")
+                application_logger.info("User will be auto-created on first memory operation")
+    except Exception as e:
+        application_logger.warning(f"⚠️  Could not register OpenMemory user: {e}")
+        application_logger.info("User will be auto-created on first memory operation")
+
+
 @asynccontextmanager
 async def lifespan(app: FastAPI):
     """Manage application lifespan events."""
@@ -54,12 +100,14 @@ async def lifespan(app: FastAPI):
     try:
         from beanie import init_beanie
         from advanced_omi_backend.models.conversation import Conversation
-        from advanced_omi_backend.models.audio_file import AudioFile
+        from advanced_omi_backend.models.audio_chunk import AudioChunkDocument
         from advanced_omi_backend.models.user import User
+        from advanced_omi_backend.models.waveform import WaveformData
+        from advanced_omi_backend.models.annotation import Annotation
 
         await init_beanie(
             database=config.db,
-            document_models=[User, Conversation, AudioFile],
+            document_models=[User, Conversation, AudioChunkDocument, WaveformData, Annotation],
         )
         application_logger.info("Beanie initialized for all document models")
     except Exception as e:
@@ -111,6 +159,11 @@ async def lifespan(app: FastAPI):
         from advanced_omi_backend.services.audio_stream import AudioStreamProducer
         app.state.audio_stream_producer = AudioStreamProducer(app.state.redis_audio_stream)
         application_logger.info("✅ Redis client for audio streaming producer initialized")
+
+        # Initialize ClientManager Redis for cross-container client→user mapping
+        from advanced_omi_backend.client_manager import initialize_redis_for_client_manager
+        initialize_redis_for_client_manager(config.redis_url)
+
     except Exception as e:
         application_logger.error(f"Failed to initialize Redis client for audio streaming: {e}", exc_info=True)
         application_logger.warning("Audio streaming producer will not be available")
@@ -119,9 +172,42 @@ async def lifespan(app: FastAPI):
     # Memory service will be lazily initialized when first used
     application_logger.info("Memory service will be initialized on first use (lazy loading)")
 
+    # Register OpenMemory user if using openmemory_mcp provider
+    await initialize_openmemory_user()
+
     # SystemTracker is used for monitoring and debugging
     application_logger.info("Using SystemTracker for monitoring and debugging")
 
+    # Initialize plugins using plugin service
+    try:
+        from advanced_omi_backend.services.plugin_service import init_plugin_router, set_plugin_router
+
+        plugin_router = init_plugin_router()
+
+        if plugin_router:
+            # Initialize async resources for each enabled plugin
+            for plugin_id, plugin in plugin_router.plugins.items():
+                if plugin.enabled:
+                    try:
+                        await plugin.initialize()
+                        application_logger.info(f"✅ Plugin '{plugin_id}' initialized")
+                    except Exception as e:
+                        application_logger.error(f"Failed to initialize plugin '{plugin_id}': {e}", exc_info=True)
+
+            application_logger.info(f"Plugins initialized: {len(plugin_router.plugins)} active")
+
+            # Store in app state for API access
+            app.state.plugin_router = plugin_router
+            # Register with plugin service for worker access
+            set_plugin_router(plugin_router)
+        else:
+            application_logger.info("No plugins configured")
+            app.state.plugin_router = None
+
+    except Exception as e:
+        application_logger.error(f"Failed to initialize plugin system: {e}", exc_info=True)
+        app.state.plugin_router = None
+
     application_logger.info("Application ready - using application-level processing architecture.")
 
     logger.info("App ready")
@@ -162,6 +248,14 @@ async def lifespan(app: FastAPI):
         # Stop metrics collection and save final report
         application_logger.info("Metrics collection stopped")
 
+        # Shutdown plugins
+        try:
+            from advanced_omi_backend.services.plugin_service import cleanup_plugin_router
+            await cleanup_plugin_router()
+            application_logger.info("Plugins shut down")
+        except Exception as e:
+            application_logger.error(f"Error shutting down plugins: {e}")
+
         # Shutdown memory service and speaker service
         shutdown_memory_service()
         application_logger.info("Memory and speaker services shut down.")
diff --git a/backends/advanced/src/advanced_omi_backend/auth.py b/backends/advanced/src/advanced_omi_backend/auth.py
index f1b7909a..2e14b8b0 100644
--- a/backends/advanced/src/advanced_omi_backend/auth.py
+++ b/backends/advanced/src/advanced_omi_backend/auth.py
@@ -224,6 +224,9 @@ async def create_admin_user_if_needed():
         existing_admin = await user_db.get_by_email(ADMIN_EMAIL)
 
         if existing_admin:
+            logger.debug(f"existing_admin.id = {existing_admin.id}, type = {type(existing_admin.id)}")
+            logger.debug(f"str(existing_admin.id) = {str(existing_admin.id)}")
+            logger.debug(f"existing_admin.user_id = {existing_admin.user_id}")
             logger.info(
                 f"✅ Admin user already exists: {existing_admin.user_id} ({existing_admin.email})"
             )
diff --git a/backends/advanced/src/advanced_omi_backend/client_manager.py b/backends/advanced/src/advanced_omi_backend/client_manager.py
index 5a3131b5..e55b3502 100644
--- a/backends/advanced/src/advanced_omi_backend/client_manager.py
+++ b/backends/advanced/src/advanced_omi_backend/client_manager.py
@@ -9,6 +9,7 @@
 import logging
 import uuid
 from typing import TYPE_CHECKING, Dict, Optional
+import redis.asyncio as redis
 
 if TYPE_CHECKING:
     from advanced_omi_backend.client import ClientState
@@ -21,6 +22,9 @@
 _client_to_user_mapping: Dict[str, str] = {}  # Active clients only
 _all_client_user_mappings: Dict[str, str] = {}  # All clients including disconnected
 
+# Redis client for cross-container client→user mapping
+_redis_client: Optional[redis.Redis] = None
+
 
 class ClientManager:
     """
@@ -372,9 +376,33 @@ def unregister_client_user_mapping(client_id: str):
         logger.warning(f"⚠️ Attempted to unregister non-existent client {client_id}")
 
 
+async def track_client_user_relationship_async(client_id: str, user_id: str, ttl: int = 86400):
+    """
+    Track that a client belongs to a user (async, writes to Redis for cross-container support).
+
+    Args:
+        client_id: The client ID
+        user_id: The user ID that owns this client
+        ttl: Time-to-live in seconds (default 24 hours)
+    """
+    _all_client_user_mappings[client_id] = user_id  # In-memory fallback
+
+    if _redis_client:
+        try:
+            await _redis_client.setex(f"client:owner:{client_id}", ttl, user_id)
+            logger.debug(f"✅ Tracked client {client_id} → user {user_id} in Redis (TTL: {ttl}s)")
+        except Exception as e:
+            logger.warning(f"Failed to track client in Redis: {e}")
+    else:
+        logger.debug(f"Tracked client {client_id} relationship to user {user_id} (in-memory only)")
+
+
 def track_client_user_relationship(client_id: str, user_id: str):
     """
-    Track that a client belongs to a user (persists after disconnection for database queries).
+    Track that a client belongs to a user (sync version for backward compatibility).
+
+    WARNING: This is synchronous and cannot use Redis. Use track_client_user_relationship_async()
+    instead in async contexts for cross-container support.
 
     Args:
         client_id: The client ID
@@ -444,9 +472,45 @@ def get_user_clients_active(user_id: str) -> list[str]:
     return user_clients
 
 
+def initialize_redis_for_client_manager(redis_url: str):
+    """
+    Initialize Redis client for cross-container client→user mapping.
+
+    Args:
+        redis_url: Redis connection URL
+    """
+    global _redis_client
+    _redis_client = redis.from_url(redis_url, decode_responses=True)
+    logger.info(f"✅ ClientManager Redis initialized: {redis_url}")
+
+
+async def get_client_owner_async(client_id: str) -> Optional[str]:
+    """
+    Get the user ID that owns a specific client (async Redis lookup).
+
+    Args:
+        client_id: The client ID to look up
+
+    Returns:
+        User ID if found, None otherwise
+    """
+    if _redis_client:
+        try:
+            user_id = await _redis_client.get(f"client:owner:{client_id}")
+            return user_id
+        except Exception as e:
+            logger.warning(f"Redis lookup failed for client {client_id}: {e}")
+
+    # Fallback to in-memory mapping
+    return _all_client_user_mappings.get(client_id)
+
+
 def get_client_owner(client_id: str) -> Optional[str]:
     """
-    Get the user ID that owns a specific client.
+    Get the user ID that owns a specific client (sync version for backward compatibility).
+
+    WARNING: This is synchronous and cannot use Redis. Use get_client_owner_async() instead
+    in async contexts for cross-container support.
 
     Args:
         client_id: The client ID to look up
diff --git a/backends/advanced/src/advanced_omi_backend/clients/audio_stream_client.py b/backends/advanced/src/advanced_omi_backend/clients/audio_stream_client.py
index af89fd51..1f3c695a 100644
--- a/backends/advanced/src/advanced_omi_backend/clients/audio_stream_client.py
+++ b/backends/advanced/src/advanced_omi_backend/clients/audio_stream_client.py
@@ -65,7 +65,7 @@ def __init__(
         base_url: str,
         token: str,
         device_name: str = "python-client",
-        endpoint: str = "ws_pcm",
+        endpoint: str = "ws?codec=pcm",
     ):
         """Initialize the audio stream client.
 
@@ -73,7 +73,7 @@ def __init__(
             base_url: Base URL of the backend (e.g., "http://localhost:8000")
             token: JWT authentication token
             device_name: Device name for client identification
-            endpoint: WebSocket endpoint ("ws_pcm" or "ws_omi")
+            endpoint: WebSocket endpoint ("ws?codec=pcm" or "ws?codec=opus")
         """
         self.base_url = base_url
         self.token = token
@@ -87,7 +87,9 @@ def __init__(
     def ws_url(self) -> str:
         """Build WebSocket URL from base URL."""
         url = self.base_url.replace("http://", "ws://").replace("https://", "wss://")
-        return f"{url}/{self.endpoint}?token={self.token}&device_name={self.device_name}"
+        # Check if endpoint already has query params
+        separator = "&" if "?" in self.endpoint else "?"
+        return f"{url}/{self.endpoint}{separator}token={self.token}&device_name={self.device_name}"
 
     async def connect(self, wait_for_ready: bool = True) -> WebSocketClientProtocol:
         """Connect to the WebSocket endpoint.
@@ -105,8 +107,8 @@ async def connect(self, wait_for_ready: bool = True) -> WebSocketClientProtocol:
         self.ws = await websockets.connect(self.ws_url)
         logger.info("WebSocket connected")
 
-        if wait_for_ready and self.endpoint == "ws_pcm":
-            # PCM endpoint sends "ready" message after auth (line 261-268 in websocket_controller.py)
+        if wait_for_ready and "codec=pcm" in self.endpoint:
+            # PCM codec sends "ready" message after auth (line 261-268 in websocket_controller.py)
             ready_msg = await self.ws.recv()
             ready = json.loads(ready_msg.strip() if isinstance(ready_msg, str) else ready_msg.decode().strip())
             if ready.get("type") != "ready":
@@ -121,6 +123,7 @@ async def send_audio_start(
         sample_rate: int = OMI_SAMPLE_RATE,
         sample_width: int = OMI_SAMPLE_WIDTH,
         channels: int = OMI_CHANNELS,
+        always_persist: bool = False,
     ) -> None:
         """Send Wyoming audio-start event.
 
@@ -129,6 +132,7 @@ async def send_audio_start(
             sample_rate: Audio sample rate in Hz (default: 16000)
             sample_width: Bytes per sample (default: 2 for 16-bit)
             channels: Number of audio channels (default: 1)
+            always_persist: Save audio even if transcription fails (default: False)
 
         Note:
             The mode is inside the "data" dict, matching _handle_audio_session_start
@@ -144,11 +148,15 @@ async def send_audio_start(
                 "width": sample_width,
                 "channels": channels,
                 "mode": recording_mode,
+                "always_persist": always_persist,
             },
             "payload_length": None,
         }
+        print(f"🔵 CLIENT: Sending audio-start message: {header}")
+        logger.info(f"🔵 CLIENT: Sending audio-start message: {header}")
         await self.ws.send(json.dumps(header) + "\n")
-        logger.info(f"Sent audio-start with mode={recording_mode}")
+        print(f"✅ CLIENT: Sent audio-start with mode={recording_mode}, always_persist={always_persist}")
+        logger.info(f"✅ CLIENT: Sent audio-start with mode={recording_mode}, always_persist={always_persist}")
 
     async def send_audio_chunk_wyoming(
         self,
@@ -230,6 +238,7 @@ async def stream_wav_file(
         use_wyoming: bool = True,
         recording_mode: str = "streaming",
         realtime_factor: float = 0.1,
+        always_persist: bool = False,
     ) -> int:
         """Stream a WAV file in chunks, simulating real-time audio.
 
@@ -239,6 +248,7 @@ async def stream_wav_file(
             use_wyoming: If True, use Wyoming protocol; if False, send raw binary
             recording_mode: "streaming" or "batch"
             realtime_factor: Fraction of real-time to simulate (0.1 = 10x speed)
+            always_persist: Save audio even if transcription fails (default: False)
 
         Returns:
             Number of chunks sent
@@ -266,6 +276,7 @@ async def stream_wav_file(
                 sample_rate=sample_rate,
                 sample_width=sample_width,
                 channels=channels,
+                always_persist=always_persist,
             )
 
             # Reset counters
@@ -301,9 +312,19 @@ async def stream_wav_file(
     async def close(self) -> None:
         """Close the WebSocket connection."""
         if self.ws:
-            await self.ws.close()
-            self.ws = None
-            logger.info("WebSocket connection closed")
+            try:
+                # Add timeout to WebSocket close to prevent hanging
+                await asyncio.wait_for(self.ws.close(), timeout=2.0)
+                logger.info("WebSocket connection closed cleanly")
+            except asyncio.TimeoutError:
+                logger.warning("WebSocket close timed out after 2s, forcing close")
+                # Force close without waiting for handshake
+                if hasattr(self.ws, 'transport') and self.ws.transport:
+                    self.ws.transport.close()
+            except Exception as e:
+                logger.error(f"Error during WebSocket close: {e}")
+            finally:
+                self.ws = None
 
     async def __aenter__(self) -> "AudioStreamClient":
         """Async context manager entry."""
@@ -323,6 +344,7 @@ def stream_audio_file(
     device_name: str = "robot-test",
     recording_mode: str = "streaming",
     use_wyoming: bool = True,
+    always_persist: bool = False,
 ) -> int:
     """Synchronous wrapper for streaming audio file.
 
@@ -336,6 +358,7 @@ def stream_audio_file(
         device_name: Device name for client identification
         recording_mode: "streaming" or "batch"
         use_wyoming: If True, use Wyoming protocol
+        always_persist: Save audio even if transcription fails (default: False)
 
     Returns:
         Number of chunks sent
@@ -347,6 +370,7 @@ async def _run() -> int:
                 wav_path,
                 use_wyoming=use_wyoming,
                 recording_mode=recording_mode,
+                always_persist=always_persist,
             )
 
     return asyncio.run(_run())
@@ -395,6 +419,7 @@ def start_stream(
         token: str,
         device_name: str = "robot-test",
         recording_mode: str = "streaming",
+        always_persist: bool = False,
     ) -> str:
         """Start a new audio stream (non-blocking).
 
@@ -403,6 +428,7 @@ def start_stream(
             token: JWT token
             device_name: Device name for client ID
             recording_mode: "streaming" or "batch"
+            always_persist: Save audio even if transcription fails (default: False)
 
         Returns:
             stream_id: Unique ID for this stream session
@@ -428,14 +454,16 @@ def run_loop():
         # Connect and send audio-start
         async def _connect_and_start():
             try:
+                logger.info(f"🔵 CLIENT: Stream {stream_id} connecting for {device_name}...")
                 await client.connect()
                 session.connected = True
-                await client.send_audio_start(recording_mode=recording_mode)
+                logger.info(f"✅ CLIENT: Stream {stream_id} connected, sending audio-start...")
+                await client.send_audio_start(recording_mode=recording_mode, always_persist=always_persist)
                 session.audio_started = True
-                logger.info(f"Stream {stream_id} started for {device_name}")
+                logger.info(f"✅ CLIENT: Stream {stream_id} started for {device_name}")
             except Exception as e:
                 session.error = str(e)
-                logger.error(f"Stream {stream_id} failed to start: {e}")
+                logger.error(f"❌ CLIENT: Stream {stream_id} failed to start: {e}")
 
         future = asyncio.run_coroutine_threadsafe(_connect_and_start(), loop)
         future.result(timeout=10)  # Wait for connection
@@ -543,6 +571,39 @@ async def _stop():
         logger.info(f"Stream {stream_id} stopped, sent {total_chunks} chunks")
         return total_chunks
 
+    def close_stream_without_stop(self, stream_id: str) -> int:
+        """Close WebSocket connection without sending audio-stop event.
+
+        This simulates abrupt disconnection (network failure, client crash)
+        and should trigger websocket_disconnect end_reason.
+
+        Args:
+            stream_id: Stream session ID
+
+        Returns:
+            Total chunks sent during this session
+        """
+        session = self._sessions.get(stream_id)
+        if not session:
+            raise ValueError(f"Unknown stream_id: {stream_id}")
+
+        async def _close_abruptly():
+            # Just close the connection without audio-stop
+            await session.client.close()
+
+        future = asyncio.run_coroutine_threadsafe(_close_abruptly(), session.loop)
+        future.result(timeout=10)
+
+        # Stop the event loop
+        session.loop.call_soon_threadsafe(session.loop.stop)
+        session.thread.join(timeout=5)
+
+        total_chunks = session.chunk_count
+        del self._sessions[stream_id]
+
+        logger.info(f"Stream {stream_id} closed abruptly (no audio-stop), sent {total_chunks} chunks")
+        return total_chunks
+
     def get_session(self, stream_id: str) -> Optional[StreamSession]:
         """Get session info for a stream."""
         return self._sessions.get(stream_id)
diff --git a/backends/advanced/src/advanced_omi_backend/config.py b/backends/advanced/src/advanced_omi_backend/config.py
index 2b07a8d4..77a842ce 100644
--- a/backends/advanced/src/advanced_omi_backend/config.py
+++ b/backends/advanced/src/advanced_omi_backend/config.py
@@ -1,15 +1,27 @@
 """
 Configuration management for Chronicle backend.
 
-Currently contains diarization settings because they were used in multiple places 
-causing circular imports. Other configurations can be moved here as needed.
+Uses OmegaConf for unified YAML configuration with environment variable interpolation.
+Secrets are stored in .env files, all other config in config/config.yml.
 """
 
-import json
 import logging
 import os
-import shutil
+from dataclasses import dataclass
 from pathlib import Path
+from typing import Optional
+
+from omegaconf import OmegaConf
+
+from advanced_omi_backend.config_loader import (
+    get_backend_config,
+    get_config_dir,
+    load_config,
+)
+from advanced_omi_backend.config_loader import reload_config as reload_omegaconf_config
+from advanced_omi_backend.config_loader import (
+    save_config_section,
+)
 
 logger = logging.getLogger(__name__)
 
@@ -17,152 +29,203 @@
 DATA_DIR = Path(os.getenv("DATA_DIR", "/app/data"))
 CHUNK_DIR = Path("./audio_chunks")  # Mounted to ./data/audio_chunks by Docker
 
-# Default diarization settings
-DEFAULT_DIARIZATION_SETTINGS = {
-    "diarization_source": "pyannote",
-    "similarity_threshold": 0.15,
-    "min_duration": 0.5,
-    "collar": 2.0,
-    "min_duration_off": 1.5,
-    "min_speakers": 2,
-    "max_speakers": 6
-}
-
-# Default speech detection settings
-DEFAULT_SPEECH_DETECTION_SETTINGS = {
-    "min_words": 10,              # Minimum words to create conversation (increased from 5)
-    "min_confidence": 0.7,        # Word confidence threshold (increased from 0.5)
-    "min_duration": 10.0,         # Minimum speech duration in seconds (increased from 2.0)
-}
-
-# Default conversation stop settings
-DEFAULT_CONVERSATION_STOP_SETTINGS = {
-    "transcription_buffer_seconds": 120,    # Periodic transcription interval (2 minutes)
-    "speech_inactivity_threshold": 60,      # Speech gap threshold for closure (1 minute)
-}
-
-# Default audio storage settings
-DEFAULT_AUDIO_STORAGE_SETTINGS = {
-    "audio_base_path": "/app/data",  # Main audio directory (where volume is mounted)
-    "audio_chunks_path": "/app/audio_chunks",  # Full path to audio chunks subfolder
-}
-
-# Global cache for diarization settings
-_diarization_settings = None
-
-
-def get_diarization_config_path():
-    """Get the path to the diarization config file."""
-    # Try different locations in order of preference
-    # 1. Data directory (for persistence across container restarts)
-    data_path = Path("/app/data/diarization_config.json")
-    if data_path.parent.exists():
-        return data_path
-    
-    # 2. App root directory
-    app_path = Path("/app/diarization_config.json")
-    if app_path.parent.exists():
-        return app_path
-    
-    # 3. Local development path
-    local_path = Path("diarization_config.json")
-    return local_path
-
-
-def load_diarization_settings_from_file():
-    """Load diarization settings from file or create from template."""
-    global _diarization_settings
-    
-    config_path = get_diarization_config_path()
-    template_path = Path("/app/diarization_config.json.template")
-    
-    # If no template, try local development path
-    if not template_path.exists():
-        template_path = Path("diarization_config.json.template")
-    
-    # If config doesn't exist, try to copy from template
-    if not config_path.exists():
-        if template_path.exists():
-            try:
-                # Ensure parent directory exists
-                config_path.parent.mkdir(parents=True, exist_ok=True)
-                shutil.copy(template_path, config_path)
-                logger.info(f"Created diarization config from template at {config_path}")
-            except Exception as e:
-                logger.warning(f"Could not copy template to {config_path}: {e}")
-    
-    # Load from file if it exists
-    if config_path.exists():
-        try:
-            with open(config_path, 'r') as f:
-                _diarization_settings = json.load(f)
-                logger.info(f"Loaded diarization settings from {config_path}")
-                return _diarization_settings
-        except Exception as e:
-            logger.error(f"Error loading diarization settings from {config_path}: {e}")
-    
-    # Fall back to defaults
-    _diarization_settings = DEFAULT_DIARIZATION_SETTINGS.copy()
-    logger.info("Using default diarization settings")
-    return _diarization_settings
-
-
-def save_diarization_settings_to_file(settings):
-    """Save diarization settings to file."""
-    global _diarization_settings
-    
-    config_path = get_diarization_config_path()
-    
-    try:
-        # Ensure parent directory exists
-        config_path.parent.mkdir(parents=True, exist_ok=True)
-        
-        # Write settings to file
-        with open(config_path, 'w') as f:
-            json.dump(settings, f, indent=2)
-        
-        # Update cache
-        _diarization_settings = settings
-        
-        logger.info(f"Saved diarization settings to {config_path}")
-        return True
-    except Exception as e:
-        logger.error(f"Error saving diarization settings to {config_path}: {e}")
-        return False
-
-
-def get_speech_detection_settings():
-    """Get speech detection settings from environment or defaults."""
 
-    return {
-        "min_words": int(os.getenv("SPEECH_DETECTION_MIN_WORDS", DEFAULT_SPEECH_DETECTION_SETTINGS["min_words"])),
-        "min_confidence": float(os.getenv("SPEECH_DETECTION_MIN_CONFIDENCE", DEFAULT_SPEECH_DETECTION_SETTINGS["min_confidence"])),
-        "min_duration": float(os.getenv("SPEECH_DETECTION_MIN_DURATION", DEFAULT_SPEECH_DETECTION_SETTINGS["min_duration"])),
-    }
+# ============================================================================
+# Configuration Functions (OmegaConf-based)
+# ============================================================================
 
+def get_config_yml_path() -> Path:
+    """
+    Get path to config.yml file.
 
-def get_conversation_stop_settings():
-    """Get conversation stop settings from environment or defaults."""
+    Returns:
+        Path to config.yml
+    """
+    return get_config_dir() / "config.yml"
+
+def get_config(force_reload: bool = False) -> dict:
+    """
+    Get merged configuration using OmegaConf.
+
+    Wrapper around load_config() from config_loader for backward compatibility.
+
+    Args:
+        force_reload: If True, reload from disk even if cached
+
+    Returns:
+        Merged configuration dictionary with all settings
+    """
+    cfg = load_config(force_reload=force_reload)
+    return OmegaConf.to_container(cfg, resolve=True)
+
+
+def reload_config():
+    """Reload configuration from disk (invalidate cache)."""
+    return reload_omegaconf_config()
+
+
+# ============================================================================
+# Diarization Settings (OmegaConf-based)
+# ============================================================================
+
+def get_diarization_settings() -> dict:
+    """
+    Get diarization settings using OmegaConf.
+
+    Returns:
+        Dict with diarization configuration (resolved from YAML + env vars)
+    """
+    cfg = get_backend_config('diarization')
+    return OmegaConf.to_container(cfg, resolve=True)
+
+
+def save_diarization_settings(settings: dict) -> bool:
+    """
+    Save diarization settings to config.yml using OmegaConf.
+
+    Args:
+        settings: Dict with diarization settings to save
+
+    Returns:
+        True if saved successfully, False otherwise
+    """
+    return save_config_section('backend.diarization', settings)
+
+
+# ============================================================================
+# Cleanup Settings (OmegaConf-based)
+# ============================================================================
+
+@dataclass
+class CleanupSettings:
+    """Cleanup configuration for soft-deleted conversations."""
+    auto_cleanup_enabled: bool = False
+    retention_days: int = 30
+
+
+def get_cleanup_settings() -> dict:
+    """
+    Get cleanup settings using OmegaConf.
+
+    Returns:
+        Dict with auto_cleanup_enabled and retention_days
+    """
+    cfg = get_backend_config('cleanup')
+    return OmegaConf.to_container(cfg, resolve=True)
 
-    return {
-        "transcription_buffer_seconds": float(os.getenv("TRANSCRIPTION_BUFFER_SECONDS", DEFAULT_CONVERSATION_STOP_SETTINGS["transcription_buffer_seconds"])),
-        "speech_inactivity_threshold": float(os.getenv("SPEECH_INACTIVITY_THRESHOLD_SECONDS", DEFAULT_CONVERSATION_STOP_SETTINGS["speech_inactivity_threshold"])),
-        "min_word_confidence": float(os.getenv("SPEECH_DETECTION_MIN_CONFIDENCE", DEFAULT_SPEECH_DETECTION_SETTINGS["min_confidence"])),
-    }
 
+def save_cleanup_settings(settings: CleanupSettings) -> bool:
+    """
+    Save cleanup settings to config.yml using OmegaConf.
+
+    Args:
+        settings: CleanupSettings dataclass instance
+
+    Returns:
+        True if saved successfully, False otherwise
+    """
+    from dataclasses import asdict
+    return save_config_section('backend.cleanup', asdict(settings))
+
+
+# ============================================================================
+# Speech Detection Settings (OmegaConf-based)
+# ============================================================================
+
+def get_speech_detection_settings() -> dict:
+    """
+    Get speech detection settings using OmegaConf.
+
+    Returns:
+        Dict with min_words, min_confidence, min_duration
+    """
+    cfg = get_backend_config('speech_detection')
+    return OmegaConf.to_container(cfg, resolve=True)
+
+
+# ============================================================================
+# Conversation Stop Settings (OmegaConf-based)
+# ============================================================================
+
+def get_conversation_stop_settings() -> dict:
+    """
+    Get conversation stop settings using OmegaConf.
+
+    Returns:
+        Dict with transcription_buffer_seconds, speech_inactivity_threshold
+    """
+    cfg = get_backend_config('conversation_stop')
+    settings = OmegaConf.to_container(cfg, resolve=True)
+
+    # Add min_word_confidence from speech_detection for backward compatibility
+    speech_cfg = get_backend_config('speech_detection')
+    settings['min_word_confidence'] = OmegaConf.to_container(speech_cfg, resolve=True).get('min_confidence', 0.7)
+
+    return settings
+
+
+# ============================================================================
+# Audio Storage Settings (OmegaConf-based)
+# ============================================================================
+
+def get_audio_storage_settings() -> dict:
+    """
+    Get audio storage settings using OmegaConf.
+
+    Returns:
+        Dict with audio_base_path, audio_chunks_path
+    """
+    cfg = get_backend_config('audio_storage')
+    return OmegaConf.to_container(cfg, resolve=True)
+
+
+# ============================================================================
+# Miscellaneous Settings (OmegaConf-based)
+# ============================================================================
+
+def get_misc_settings() -> dict:
+    """
+    Get miscellaneous configuration settings using OmegaConf.
+
+    Returns:
+        Dict with always_persist_enabled and use_provider_segments
+    """
+    # Get audio settings for always_persist_enabled
+    audio_cfg = get_backend_config('audio')
+    audio_settings = OmegaConf.to_container(audio_cfg, resolve=True) if audio_cfg else {}
+
+    # Get transcription settings for use_provider_segments
+    transcription_cfg = get_backend_config('transcription')
+    transcription_settings = OmegaConf.to_container(transcription_cfg, resolve=True) if transcription_cfg else {}
 
-def get_audio_storage_settings():
-    """Get audio storage settings from environment or defaults."""
-    
-    # Get base path and derive chunks path
-    audio_base_path = os.getenv("AUDIO_BASE_PATH", DEFAULT_AUDIO_STORAGE_SETTINGS["audio_base_path"])
-    audio_chunks_path = os.getenv("AUDIO_CHUNKS_PATH", f"{audio_base_path}/audio_chunks")
-    
     return {
-        "audio_base_path": audio_base_path,
-        "audio_chunks_path": audio_chunks_path,
+        'always_persist_enabled': audio_settings.get('always_persist_enabled', False),
+        'use_provider_segments': transcription_settings.get('use_provider_segments', False)
     }
 
 
-# Initialize settings on module load
-_diarization_settings = load_diarization_settings_from_file()
\ No newline at end of file
+def save_misc_settings(settings: dict) -> bool:
+    """
+    Save miscellaneous settings to config.yml using OmegaConf.
+
+    Args:
+        settings: Dict with always_persist_enabled and/or use_provider_segments
+
+    Returns:
+        True if saved successfully, False otherwise
+    """
+    success = True
+
+    # Save audio settings if always_persist_enabled is provided
+    if 'always_persist_enabled' in settings:
+        audio_settings = {'always_persist_enabled': settings['always_persist_enabled']}
+        if not save_config_section('backend.audio', audio_settings):
+            success = False
+
+    # Save transcription settings if use_provider_segments is provided
+    if 'use_provider_segments' in settings:
+        transcription_settings = {'use_provider_segments': settings['use_provider_segments']}
+        if not save_config_section('backend.transcription', transcription_settings):
+            success = False
+
+    return success
\ No newline at end of file
diff --git a/backends/advanced/src/advanced_omi_backend/config_loader.py b/backends/advanced/src/advanced_omi_backend/config_loader.py
new file mode 100644
index 00000000..5d25debd
--- /dev/null
+++ b/backends/advanced/src/advanced_omi_backend/config_loader.py
@@ -0,0 +1,169 @@
+"""
+OmegaConf-based configuration management for Chronicle.
+
+Provides unified config loading with environment variable interpolation.
+"""
+
+import logging
+import os
+from pathlib import Path
+from typing import Optional
+
+from omegaconf import DictConfig, OmegaConf
+
+logger = logging.getLogger(__name__)
+
+# Global config cache
+_config_cache: Optional[DictConfig] = None
+
+
+def get_config_dir() -> Path:
+    """Get config directory path (single source of truth)."""
+    config_dir = os.getenv("CONFIG_DIR", "/app/config")
+    return Path(config_dir)
+
+
+def get_plugins_yml_path() -> Path:
+    """
+    Get path to plugins.yml file (single source of truth).
+    
+    Returns:
+        Path to plugins.yml
+    """
+    return get_config_dir() / "plugins.yml"
+
+
+def load_config(force_reload: bool = False) -> DictConfig:
+    """
+    Load and merge configuration using OmegaConf.
+
+    Merge priority (later overrides earlier):
+    1. config/defaults.yml (shipped defaults)
+    2. config/config.yml (user overrides)
+    3. Environment variables (via ${oc.env:VAR,default} syntax)
+
+    Args:
+        force_reload: If True, reload from disk even if cached
+
+    Returns:
+        Merged DictConfig with all settings
+    """
+    global _config_cache
+
+    if _config_cache is not None and not force_reload:
+        return _config_cache
+
+    config_dir = get_config_dir()
+    defaults_path = config_dir / "defaults.yml"
+
+    # Support CONFIG_FILE env var for test configurations
+    config_file = os.getenv("CONFIG_FILE", "config.yml")
+    # Handle both absolute paths and relative filenames
+    if os.path.isabs(config_file):
+        config_path = Path(config_file)
+    else:
+        config_path = config_dir / config_file
+
+    # Load defaults
+    defaults = {}
+    if defaults_path.exists():
+        try:
+            defaults = OmegaConf.load(defaults_path)
+            logger.info(f"Loaded defaults from {defaults_path}")
+        except Exception as e:
+            logger.warning(f"Could not load defaults from {defaults_path}: {e}")
+
+    # Load user config
+    user_config = {}
+    if config_path.exists():
+        try:
+            user_config = OmegaConf.load(config_path)
+            logger.info(f"Loaded config from {config_path}")
+        except Exception as e:
+            logger.error(f"Error loading config from {config_path}: {e}")
+
+    # Merge configurations (user config overrides defaults)
+    merged = OmegaConf.merge(defaults, user_config)
+
+    # Cache result
+    _config_cache = merged
+
+    logger.info("Configuration loaded successfully with OmegaConf")
+    return merged
+
+
+def reload_config() -> DictConfig:
+    """Reload configuration from disk (invalidate cache)."""
+    global _config_cache
+    _config_cache = None
+    return load_config(force_reload=True)
+
+
+def get_backend_config(section: Optional[str] = None) -> DictConfig:
+    """
+    Get backend configuration section.
+
+    Args:
+        section: Optional subsection (e.g., 'diarization', 'cleanup')
+
+    Returns:
+        DictConfig for backend section or subsection
+    """
+    cfg = load_config()
+    if 'backend' not in cfg:
+        return OmegaConf.create({})
+
+    backend_cfg = cfg.backend
+    if section:
+        return backend_cfg.get(section, OmegaConf.create({}))
+    return backend_cfg
+
+
+def get_service_config(service_name: str) -> DictConfig:
+    """
+    Get service configuration section.
+
+    Args:
+        service_name: Service name (e.g., 'speaker_recognition', 'asr_services')
+
+    Returns:
+        DictConfig for service section
+    """
+    cfg = load_config()
+    return cfg.get(service_name, OmegaConf.create({}))
+
+
+def save_config_section(section_path: str, values: dict) -> bool:
+    """
+    Update a config section and save to config.yml.
+
+    Args:
+        section_path: Dot-separated path (e.g., 'backend.diarization')
+        values: Dict with new values
+
+    Returns:
+        True if saved successfully
+    """
+    try:
+        config_path = get_config_dir() / "config.yml"
+
+        # Load existing config
+        existing_config = {}
+        if config_path.exists():
+            existing_config = OmegaConf.load(config_path)
+
+        # Update section using dot notation
+        OmegaConf.update(existing_config, section_path, values, merge=True)
+
+        # Save back to file
+        OmegaConf.save(existing_config, config_path)
+
+        # Invalidate cache
+        reload_config()
+
+        logger.info(f"Saved config section '{section_path}' to {config_path}")
+        return True
+
+    except Exception as e:
+        logger.error(f"Error saving config section '{section_path}': {e}")
+        return False
diff --git a/backends/advanced/src/advanced_omi_backend/controllers/audio_controller.py b/backends/advanced/src/advanced_omi_backend/controllers/audio_controller.py
index 4810810d..734df6ed 100644
--- a/backends/advanced/src/advanced_omi_backend/controllers/audio_controller.py
+++ b/backends/advanced/src/advanced_omi_backend/controllers/audio_controller.py
@@ -10,19 +10,26 @@
 import logging
 import time
 import uuid
-from pathlib import Path
 
 from fastapi import UploadFile
 from fastapi.responses import JSONResponse
 
+from advanced_omi_backend.controllers.queue_controller import (
+    JOB_RESULT_TTL,
+    start_post_conversation_jobs,
+    transcription_queue,
+)
+from advanced_omi_backend.models.conversation import create_conversation
+from advanced_omi_backend.models.user import User
+from advanced_omi_backend.services.transcription import is_transcription_available
+from advanced_omi_backend.utils.audio_chunk_utils import convert_audio_to_chunks
 from advanced_omi_backend.utils.audio_utils import (
     AudioValidationError,
-    write_audio_file,
+    validate_and_prepare_audio,
+)
+from advanced_omi_backend.workers.transcription_jobs import (
+    transcribe_full_audio_job,
 )
-from advanced_omi_backend.models.job import JobPriority
-from advanced_omi_backend.models.user import User
-from advanced_omi_backend.models.conversation import create_conversation
-from advanced_omi_backend.models.conversation import Conversation
 
 logger = logging.getLogger(__name__)
 audio_logger = logging.getLogger("audio_processing")
@@ -30,6 +37,7 @@
 
 def generate_client_id(user: User, device_name: str) -> str:
     """Generate client ID for uploaded files."""
+    logger.debug(f"Generating client ID - user.id={user.id}, type={type(user.id)}")
     user_id_suffix = str(user.id)[-6:]
     return f"{user_id_suffix}-{device_name}"
 
@@ -38,8 +46,6 @@ async def upload_and_process_audio_files(
     user: User,
     files: list[UploadFile],
     device_name: str = "upload",
-    auto_generate_client: bool = True,
-    folder: str = None,
     source: str = "upload"
 ) -> dict:
     """
@@ -54,8 +60,7 @@ async def upload_and_process_audio_files(
         user: Authenticated user
         files: List of uploaded audio files
         device_name: Device identifier
-        auto_generate_client: Whether to auto-generate client ID
-        folder: Optional subfolder for audio storage (e.g., 'fixtures')
+        source: Source of the upload (e.g., 'upload', 'gdrive')
     """
     try:
         if not files:
@@ -83,36 +88,23 @@ async def upload_and_process_audio_files(
                 content = await file.read()
 
 
-                # Generate audio UUID and timestamp
+                # Track external source for deduplication (Google Drive, etc.)
+                external_source_id = None
+                external_source_type = None
                 if source == "gdrive":
-                    audio_uuid = getattr(file, "audio_uuid", None)
-                    if not audio_uuid: 
-                        audio_logger.error(f"Missing audio_uuid for gdrive file: {file.filename}")
-                        audio_uuid = str(uuid.uuid4()) 
-                else: 
-                    audio_uuid = str(uuid.uuid4())
+                    external_source_id = getattr(file, "file_id", None) or getattr(file, "audio_uuid", None)
+                    external_source_type = "gdrive"
+                    if not external_source_id:
+                        audio_logger.warning(f"Missing file_id for gdrive file: {file.filename}")
                 timestamp = int(time.time() * 1000)
 
-                # Determine output directory (with optional subfolder)
-                from advanced_omi_backend.config import CHUNK_DIR
-                if folder:
-                    chunk_dir = CHUNK_DIR / folder
-                    chunk_dir.mkdir(parents=True, exist_ok=True)
-                else:
-                    chunk_dir = CHUNK_DIR
-
-                # Validate, write audio file and create AudioSession (all in one)
+                # Validate and prepare audio (read format from WAV file)
                 try:
-                    relative_audio_path, file_path, duration = await write_audio_file(
-                        raw_audio_data=content,
-                        audio_uuid=audio_uuid,
-                        source=source,
-                        client_id=client_id,
-                        user_id=user.user_id,
-                        user_email=user.email,
-                        timestamp=timestamp,
-                        chunk_dir=chunk_dir,
-                        validate=True,  # Validate WAV format, convert stereo→mono
+                    audio_data, sample_rate, sample_width, channels, duration = await validate_and_prepare_audio(
+                        audio_data=content,
+                        expected_sample_rate=16000,  # Expecting 16kHz
+                        convert_to_mono=True,  # Convert stereo to mono
+                        auto_resample=True  # Auto-resample if sample rate doesn't match
                     )
                 except AudioValidationError as e:
                     processed_files.append({
@@ -123,7 +115,7 @@ async def upload_and_process_audio_files(
                     continue
 
                 audio_logger.info(
-                    f"📊 {file.filename}: {duration:.1f}s → {relative_audio_path}"
+                    f"📊 {file.filename}: {duration:.1f}s ({sample_rate}Hz, {channels}ch, {sample_width} bytes/sample)"
                 )
 
                 # Create conversation immediately for uploaded files (conversation_id auto-generated)
@@ -133,45 +125,112 @@ async def upload_and_process_audio_files(
                 title = file.filename.rsplit('.', 1)[0][:50] if file.filename else "Uploaded Audio"
 
                 conversation = create_conversation(
-                    audio_uuid=audio_uuid,
                     user_id=user.user_id,
                     client_id=client_id,
                     title=title,
-                    summary="Processing uploaded audio file..."
+                    summary="Processing uploaded audio file...",
+                    external_source_id=external_source_id,
+                    external_source_type=external_source_type,
                 )
-                # Use the relative path returned by write_audio_file (already includes folder prefix if applicable)
-                conversation.audio_path = relative_audio_path
                 await conversation.insert()
                 conversation_id = conversation.conversation_id  # Get the auto-generated ID
 
                 audio_logger.info(f"📝 Created conversation {conversation_id} for uploaded file")
 
-                # Enqueue post-conversation processing job chain
-                from advanced_omi_backend.controllers.queue_controller import start_post_conversation_jobs
+                # Convert audio directly to MongoDB chunks
+                try:
+                    num_chunks = await convert_audio_to_chunks(
+                        conversation_id=conversation_id,
+                        audio_data=audio_data,
+                        sample_rate=sample_rate,
+                        channels=channels,
+                        sample_width=sample_width,
+                    )
+                    audio_logger.info(
+                        f"📦 Converted uploaded file to {num_chunks} MongoDB chunks "
+                        f"(conversation {conversation_id[:12]})"
+                    )
+                except ValueError as val_error:
+                    # Handle validation errors (e.g., file too long)
+                    audio_logger.error(f"Audio validation failed: {val_error}")
+                    processed_files.append({
+                        "filename": file.filename,
+                        "status": "error",
+                        "error": str(val_error),
+                    })
+                    # Delete the conversation since it won't have audio chunks
+                    await conversation.delete()
+                    continue
+                except Exception as chunk_error:
+                    audio_logger.error(
+                        f"Failed to convert uploaded file to chunks: {chunk_error}",
+                        exc_info=True
+                    )
+                    processed_files.append({
+                        "filename": file.filename,
+                        "status": "error",
+                        "error": f"Audio conversion failed: {str(chunk_error)}",
+                    })
+                    # Delete the conversation since it won't have audio chunks
+                    await conversation.delete()
+                    continue
+
+                # Enqueue batch transcription job first (file uploads need transcription)
+                version_id = str(uuid.uuid4())
+                transcribe_job_id = f"transcribe_{conversation_id[:12]}"
+
+                # Check if transcription provider is available before enqueueing
+                transcription_job = None
+                if is_transcription_available(mode="batch"):
+                    transcription_job = transcription_queue.enqueue(
+                        transcribe_full_audio_job,
+                        conversation_id,
+                        version_id,
+                        "batch",  # trigger
+                        job_timeout=1800,  # 30 minutes
+                        result_ttl=JOB_RESULT_TTL,
+                        job_id=transcribe_job_id,
+                        description=f"Transcribe uploaded file {conversation_id[:8]}",
+                        meta={'conversation_id': conversation_id, 'client_id': client_id}
+                    )
+                    audio_logger.info(f"📥 Enqueued transcription job {transcription_job.id} for uploaded file")
+                else:
+                    audio_logger.warning(
+                        f"⚠️ Skipping transcription for conversation {conversation_id}: "
+                        "No transcription provider configured"
+                    )
 
+                # Enqueue post-conversation processing job chain (depends on transcription)
                 job_ids = start_post_conversation_jobs(
                     conversation_id=conversation_id,
-                    audio_uuid=audio_uuid,
-                    audio_file_path=file_path,
                     user_id=user.user_id,
-                    post_transcription=True,  # Run batch transcription for uploads
+                    transcript_version_id=version_id,  # Pass the version_id from transcription job
+                    depends_on_job=transcription_job,  # Wait for transcription to complete (or None)
                     client_id=client_id  # Pass client_id for UI tracking
                 )
 
                 processed_files.append({
                     "filename": file.filename,
-                    "status": "processing",
-                    "audio_uuid": audio_uuid,
+                    "status": "started",  # RQ standard: job has been enqueued
                     "conversation_id": conversation_id,
-                    "transcript_job_id": job_ids['transcription'],
+                    "transcript_job_id": transcription_job.id if transcription_job else None,
                     "speaker_job_id": job_ids['speaker_recognition'],
                     "memory_job_id": job_ids['memory'],
                     "duration_seconds": round(duration, 2),
                 })
 
+                # Build job chain description
+                job_chain = []
+                if transcription_job:
+                    job_chain.append(transcription_job.id)
+                if job_ids['speaker_recognition']:
+                    job_chain.append(job_ids['speaker_recognition'])
+                if job_ids['memory']:
+                    job_chain.append(job_ids['memory'])
+
                 audio_logger.info(
                     f"✅ Processed {file.filename} → conversation {conversation_id}, "
-                    f"jobs: {job_ids['transcription']} → {job_ids['speaker_recognition']} → {job_ids['memory']}"
+                    f"jobs: {' → '.join(job_chain) if job_chain else 'none'}"
                 )
 
             except (OSError, IOError) as e:
@@ -191,20 +250,33 @@ async def upload_and_process_audio_files(
                     "error": str(e),
                 })
 
-        successful_files = [f for f in processed_files if f.get("status") == "processing"]
+        successful_files = [f for f in processed_files if f.get("status") == "started"]
         failed_files = [f for f in processed_files if f.get("status") == "error"]
 
-        return {
+        response_body = {
             "message": f"Uploaded and processing {len(successful_files)} file(s)",
             "client_id": client_id,
             "files": processed_files,
             "summary": {
                 "total": len(files),
-                "processing": len(successful_files),
+                "started": len(successful_files),  # RQ standard
                 "failed": len(failed_files),
             },
         }
 
+        # Return appropriate HTTP status code based on results
+        if len(failed_files) == len(files):
+            # ALL files failed - return 400 Bad Request
+            audio_logger.error(f"All {len(files)} file(s) failed to upload")
+            return JSONResponse(status_code=400, content=response_body)
+        elif len(failed_files) > 0:
+            # SOME files failed (partial success) - return 207 Multi-Status
+            audio_logger.warning(f"Partial upload: {len(successful_files)} succeeded, {len(failed_files)} failed")
+            return JSONResponse(status_code=207, content=response_body)
+        else:
+            # All files succeeded - return 200 OK
+            return response_body
+
     except (OSError, IOError) as e:
         # File system errors during upload handling
         audio_logger.exception("File I/O error in upload_and_process_audio_files")
@@ -217,83 +289,3 @@ async def upload_and_process_audio_files(
         return JSONResponse(
             status_code=500, content={"error": f"File upload failed: {str(e)}"}
         )
-
-
-async def get_conversation_audio_path(conversation_id: str, user: User, cropped: bool = False) -> Path:
-    """
-    Get the file path for a conversation's audio file.
-
-    Args:
-        conversation_id: The conversation ID
-        user: The authenticated user
-        cropped: If True, return cropped audio path; if False, return original audio path
-
-    Returns:
-        Path object for the audio file
-
-    Raises:
-        ValueError: If conversation not found, access denied, or audio file not available
-    """
-    # Get conversation by conversation_id (UUID field, not _id)
-    conversation = await Conversation.find_one(Conversation.conversation_id == conversation_id)
-
-    if not conversation:
-        raise ValueError("Conversation not found")
-
-    # Check ownership (admins can access all files)
-    if not user.is_superuser and conversation.user_id != str(user.user_id):
-        raise ValueError("Access denied")
-
-    # Get the appropriate audio path
-    audio_path = conversation.cropped_audio_path if cropped else conversation.audio_path
-
-    if not audio_path:
-        audio_type = "cropped" if cropped else "original"
-        raise ValueError(f"No {audio_type} audio file available for this conversation")
-
-    # Build full file path
-    from advanced_omi_backend.app_config import get_audio_chunk_dir
-    audio_dir = get_audio_chunk_dir()
-    file_path = audio_dir / audio_path
-
-    # Check if file exists
-    if not file_path.exists() or not file_path.is_file():
-        raise ValueError("Audio file not found on disk")
-
-    return file_path
-
-
-async def get_cropped_audio_info(audio_uuid: str, user: User):
-    """
-    Get audio cropping metadata from the conversation.
-
-    This is an audio service operation that retrieves cropping-related metadata
-    such as speech segments, cropped audio path, and cropping timestamps.
-
-    Used for: Checking cropping status and retrieving audio processing details.
-    Works with: Conversation model.
-    """
-    try:
-        # Find the conversation
-        conversation = await Conversation.find_one(Conversation.audio_uuid == audio_uuid)
-        if not conversation:
-            return JSONResponse(status_code=404, content={"error": "Conversation not found"})
-
-        # Check ownership for non-admin users
-        if not user.is_superuser:
-            if conversation.user_id != str(user.user_id):
-                return JSONResponse(status_code=404, content={"error": "Conversation not found"})
-
-        return {
-            "audio_uuid": audio_uuid,
-            "cropped_audio_path": conversation.cropped_audio_path,
-            "speech_segments": conversation.speech_segments if hasattr(conversation, 'speech_segments') else [],
-            "cropped_duration": conversation.cropped_duration if hasattr(conversation, 'cropped_duration') else None,
-            "cropped_at": conversation.cropped_at if hasattr(conversation, 'cropped_at') else None,
-            "original_audio_path": conversation.audio_path,
-        }
-
-    except Exception as e:
-        # Database or unexpected errors when fetching audio metadata
-        audio_logger.exception("Error fetching cropped audio info")
-        return JSONResponse(status_code=500, content={"error": "Error fetching cropped audio info"})
diff --git a/backends/advanced/src/advanced_omi_backend/controllers/conversation_controller.py b/backends/advanced/src/advanced_omi_backend/controllers/conversation_controller.py
index b9533391..c142aeee 100644
--- a/backends/advanced/src/advanced_omi_backend/controllers/conversation_controller.py
+++ b/backends/advanced/src/advanced_omi_backend/controllers/conversation_controller.py
@@ -4,26 +4,36 @@
 
 import logging
 import time
+import uuid
+from datetime import datetime
 from pathlib import Path
-from typing import Optional
+
+from fastapi.responses import JSONResponse
 
 from advanced_omi_backend.client_manager import (
     ClientManager,
     client_belongs_to_user,
 )
-from advanced_omi_backend.models.audio_file import AudioFile
+from advanced_omi_backend.config_loader import get_service_config
+from advanced_omi_backend.controllers.queue_controller import (
+    JOB_RESULT_TTL,
+    default_queue,
+    memory_queue,
+    transcription_queue,
+)
+from advanced_omi_backend.models.audio_chunk import AudioChunkDocument
 from advanced_omi_backend.models.conversation import Conversation
+from advanced_omi_backend.models.job import JobPriority
 from advanced_omi_backend.users import User
-from fastapi.responses import JSONResponse
+from advanced_omi_backend.workers.memory_jobs import (
+    enqueue_memory_processing,
+    process_memory_job,
+)
+from advanced_omi_backend.workers.speaker_jobs import recognise_speakers_job
 
 logger = logging.getLogger(__name__)
 audio_logger = logging.getLogger("audio_processing")
 
-# Legacy audio_chunks collection is still used by some endpoints (speaker assignment, segment updates)
-# But conversation queries now use the Conversation model directly
-# Audio cropping operations are handled in audio_controller.py
-
-
 async def close_current_conversation(client_id: str, user: User, client_manager: ClientManager):
     """Close the current conversation for a specific client. Users can only close their own conversations."""
     # Validate client ownership
@@ -99,15 +109,17 @@ async def get_conversation(conversation_id: str, user: User):
         # Build response with explicit curated fields
         response = {
             "conversation_id": conversation.conversation_id,
-            "audio_uuid": conversation.audio_uuid,
             "user_id": conversation.user_id,
             "client_id": conversation.client_id,
-            "audio_path": conversation.audio_path,
-            "cropped_audio_path": conversation.cropped_audio_path,
+            "audio_chunks_count": conversation.audio_chunks_count,
+            "audio_total_duration": conversation.audio_total_duration,
+            "audio_compression_ratio": conversation.audio_compression_ratio,
             "created_at": conversation.created_at.isoformat() if conversation.created_at else None,
             "deleted": conversation.deleted,
             "deletion_reason": conversation.deletion_reason,
             "deleted_at": conversation.deleted_at.isoformat() if conversation.deleted_at else None,
+            "processing_status": conversation.processing_status,
+            "always_persist": conversation.always_persist,
             "end_reason": conversation.end_reason.value if conversation.end_reason else None,
             "completed_at": conversation.completed_at.isoformat() if conversation.completed_at else None,
             "title": conversation.title,
@@ -123,6 +135,8 @@ async def get_conversation(conversation_id: str, user: User):
             "active_memory_version": conversation.active_memory_version,
             "transcript_version_count": conversation.transcript_version_count,
             "memory_version_count": conversation.memory_version_count,
+            "active_transcript_version_number": conversation.active_transcript_version_number,
+            "active_memory_version_number": conversation.active_memory_version_number,
         }
 
         return {"conversation": response}
@@ -132,33 +146,48 @@ async def get_conversation(conversation_id: str, user: User):
         return JSONResponse(status_code=500, content={"error": "Error fetching conversation"})
 
 
-async def get_conversations(user: User):
+async def get_conversations(user: User, include_deleted: bool = False):
     """Get conversations with speech only (speech-driven architecture)."""
     try:
         # Build query based on user permissions using Beanie
         if not user.is_superuser:
             # Regular users can only see their own conversations
-            user_conversations = await Conversation.find(
-                Conversation.user_id == str(user.user_id)
-            ).sort(-Conversation.created_at).to_list()
+            # Filter by deleted status
+            if not include_deleted:
+                user_conversations = await Conversation.find(
+                    Conversation.user_id == str(user.user_id),
+                    Conversation.deleted == False
+                ).sort(-Conversation.created_at).to_list()
+            else:
+                user_conversations = await Conversation.find(
+                    Conversation.user_id == str(user.user_id)
+                ).sort(-Conversation.created_at).to_list()
         else:
             # Admins see all conversations
-            user_conversations = await Conversation.find_all().sort(-Conversation.created_at).to_list()
+            # Filter by deleted status
+            if not include_deleted:
+                user_conversations = await Conversation.find(
+                    Conversation.deleted == False
+                ).sort(-Conversation.created_at).to_list()
+            else:
+                user_conversations = await Conversation.find_all().sort(-Conversation.created_at).to_list()
 
         # Build response with explicit curated fields - minimal for list view
         conversations = []
         for conv in user_conversations:
             conversations.append({
                 "conversation_id": conv.conversation_id,
-                "audio_uuid": conv.audio_uuid,
                 "user_id": conv.user_id,
                 "client_id": conv.client_id,
-                "audio_path": conv.audio_path,
-                "cropped_audio_path": conv.cropped_audio_path,
+                "audio_chunks_count": conv.audio_chunks_count,
+                "audio_total_duration": conv.audio_total_duration,
+                "audio_compression_ratio": conv.audio_compression_ratio,
                 "created_at": conv.created_at.isoformat() if conv.created_at else None,
                 "deleted": conv.deleted,
                 "deletion_reason": conv.deletion_reason,
                 "deleted_at": conv.deleted_at.isoformat() if conv.deleted_at else None,
+                "processing_status": conv.processing_status,
+                "always_persist": conv.always_persist,
                 "title": conv.title,
                 "summary": conv.summary,
                 "detailed_summary": conv.detailed_summary,
@@ -170,6 +199,8 @@ async def get_conversations(user: User):
                 "memory_count": conv.memory_count,
                 "transcript_version_count": conv.transcript_version_count,
                 "memory_version_count": conv.memory_version_count,
+                "active_transcript_version_number": conv.active_transcript_version_number,
+                "active_memory_version_number": conv.active_memory_version_number,
             })
 
         return {"conversations": conversations}
@@ -179,12 +210,85 @@ async def get_conversations(user: User):
         return JSONResponse(status_code=500, content={"error": "Error fetching conversations"})
 
 
-async def delete_conversation(conversation_id: str, user: User):
-    """Delete a conversation and its associated audio files. Users can only delete their own conversations."""
+async def _soft_delete_conversation(conversation: Conversation, user: User) -> JSONResponse:
+    """Mark conversation and chunks as deleted (soft delete)."""
+    conversation_id = conversation.conversation_id
+
+    # Mark conversation as deleted
+    conversation.deleted = True
+    conversation.deletion_reason = "user_deleted"
+    conversation.deleted_at = datetime.utcnow()
+    await conversation.save()
+
+    logger.info(f"Soft deleted conversation {conversation_id} for user {user.user_id}")
+
+    # Soft delete all associated audio chunks
+    result = await AudioChunkDocument.find(
+        AudioChunkDocument.conversation_id == conversation_id,
+        AudioChunkDocument.deleted == False  # Only update non-deleted chunks
+    ).update_many({
+        "$set": {
+            "deleted": True,
+            "deleted_at": datetime.utcnow()
+        }
+    })
+
+    deleted_chunks = result.modified_count
+    logger.info(f"Soft deleted {deleted_chunks} audio chunks for conversation {conversation_id}")
+
+    return JSONResponse(
+        status_code=200,
+        content={
+            "message": f"Successfully soft deleted conversation '{conversation_id}'",
+            "deleted_chunks": deleted_chunks,
+            "conversation_id": conversation_id,
+            "client_id": conversation.client_id,
+            "deleted_at": conversation.deleted_at.isoformat() if conversation.deleted_at else None
+        }
+    )
+
+
+async def _hard_delete_conversation(conversation: Conversation) -> JSONResponse:
+    """Permanently delete conversation and chunks (admin only)."""
+    conversation_id = conversation.conversation_id
+    client_id = conversation.client_id
+
+    # Delete conversation document
+    await conversation.delete()
+    logger.info(f"Hard deleted conversation {conversation_id}")
+
+    # Delete all audio chunks
+    result = await AudioChunkDocument.find(
+        AudioChunkDocument.conversation_id == conversation_id
+    ).delete()
+
+    deleted_chunks = result.deleted_count
+    logger.info(f"Hard deleted {deleted_chunks} audio chunks for conversation {conversation_id}")
+
+    return JSONResponse(
+        status_code=200,
+        content={
+            "message": f"Successfully permanently deleted conversation '{conversation_id}'",
+            "deleted_chunks": deleted_chunks,
+            "conversation_id": conversation_id,
+            "client_id": client_id
+        }
+    )
+
+
+async def delete_conversation(conversation_id: str, user: User, permanent: bool = False):
+    """
+    Soft delete a conversation (mark as deleted but keep data).
+
+    Args:
+        conversation_id: Conversation to delete
+        user: Requesting user
+        permanent: If True, permanently delete (admin only)
+    """
     try:
         # Create masked identifier for logging
         masked_id = f"{conversation_id[:8]}...{conversation_id[-4:]}" if len(conversation_id) > 12 else "***"
-        logger.info(f"Attempting to delete conversation: {masked_id}")
+        logger.info(f"Attempting to {'permanently ' if permanent else ''}delete conversation: {masked_id}")
 
         # Find the conversation using Beanie
         conversation = await Conversation.find_one(Conversation.conversation_id == conversation_id)
@@ -208,69 +312,91 @@ async def delete_conversation(conversation_id: str, user: User):
                 }
             )
 
-        # Get file paths before deletion
-        audio_path = conversation.audio_path
-        cropped_audio_path = conversation.cropped_audio_path
-        audio_uuid = conversation.audio_uuid
-        client_id = conversation.client_id
-
-        # Delete the conversation from database
-        await conversation.delete()
-        logger.info(f"Deleted conversation {conversation_id}")
-
-        # Also delete from legacy AudioFile collection if it exists (backward compatibility)
-        audio_file = await AudioFile.find_one(AudioFile.audio_uuid == audio_uuid)
-        if audio_file:
-            await audio_file.delete()
-            logger.info(f"Deleted legacy audio file record for {audio_uuid}")
-
-        # Delete associated audio files from disk
-        deleted_files = []
-        if audio_path:
-            try:
-                # Construct full path to audio file
-                full_audio_path = Path("/app/audio_chunks") / audio_path
-                if full_audio_path.exists():
-                    full_audio_path.unlink()
-                    deleted_files.append(str(full_audio_path))
-                    logger.info(f"Deleted audio file: {full_audio_path}")
-            except Exception as e:
-                logger.warning(f"Failed to delete audio file {audio_path}: {e}")
-
-        if cropped_audio_path:
-            try:
-                # Construct full path to cropped audio file
-                full_cropped_path = Path("/app/audio_chunks") / cropped_audio_path
-                if full_cropped_path.exists():
-                    full_cropped_path.unlink()
-                    deleted_files.append(str(full_cropped_path))
-                    logger.info(f"Deleted cropped audio file: {full_cropped_path}")
-            except Exception as e:
-                logger.warning(f"Failed to delete cropped audio file {cropped_audio_path}: {e}")
-
-        logger.info(f"Successfully deleted conversation {conversation_id} for user {user.user_id}")
-
-        # Prepare response message
-        delete_summary = ["conversation"]
-        if deleted_files:
-            delete_summary.append(f"{len(deleted_files)} audio file(s)")
+        # Hard delete (admin only, permanent flag)
+        if permanent and user.is_superuser:
+            return await _hard_delete_conversation(conversation)
+
+        # Soft delete (default)
+        return await _soft_delete_conversation(conversation, user)
+
+    except Exception as e:
+        logger.error(f"Error deleting conversation {conversation_id}: {e}")
+        return JSONResponse(
+            status_code=500,
+            content={"error": f"Failed to delete conversation: {str(e)}"}
+        )
+
+
+async def restore_conversation(conversation_id: str, user: User) -> JSONResponse:
+    """
+    Restore a soft-deleted conversation.
+
+    Args:
+        conversation_id: Conversation to restore
+        user: Requesting user
+    """
+    try:
+        conversation = await Conversation.find_one(
+            Conversation.conversation_id == conversation_id
+        )
+
+        if not conversation:
+            return JSONResponse(
+                status_code=404,
+                content={"error": "Conversation not found"}
+            )
+
+        # Permission check
+        if not user.is_superuser and conversation.user_id != str(user.user_id):
+            return JSONResponse(
+                status_code=403,
+                content={"error": "Access denied"}
+            )
+
+        if not conversation.deleted:
+            return JSONResponse(
+                status_code=400,
+                content={"error": "Conversation is not deleted"}
+            )
+
+        # Restore conversation
+        conversation.deleted = False
+        conversation.deletion_reason = None
+        conversation.deleted_at = None
+        await conversation.save()
+
+        # Restore audio chunks
+        result = await AudioChunkDocument.find(
+            AudioChunkDocument.conversation_id == conversation_id,
+            AudioChunkDocument.deleted == True
+        ).update_many({
+            "$set": {
+                "deleted": False,
+                "deleted_at": None
+            }
+        })
+
+        restored_chunks = result.modified_count
+
+        logger.info(
+            f"Restored conversation {conversation_id} "
+            f"({restored_chunks} chunks) for user {user.user_id}"
+        )
 
         return JSONResponse(
             status_code=200,
             content={
-                "message": f"Successfully deleted {', '.join(delete_summary)} '{conversation_id}'",
-                "deleted_files": deleted_files,
-                "client_id": client_id,
+                "message": f"Successfully restored conversation '{conversation_id}'",
+                "restored_chunks": restored_chunks,
                 "conversation_id": conversation_id,
-                "audio_uuid": audio_uuid
             }
         )
 
     except Exception as e:
-        logger.error(f"Error deleting conversation {conversation_id}: {e}")
+        logger.error(f"Error restoring conversation {conversation_id}: {e}")
         return JSONResponse(
             status_code=500,
-            content={"error": f"Failed to delete conversation: {str(e)}"}
+            content={"error": f"Failed to restore conversation: {str(e)}"}
         )
 
 
@@ -286,108 +412,85 @@ async def reprocess_transcript(conversation_id: str, user: User):
         if not user.is_superuser and conversation_model.user_id != str(user.user_id):
             return JSONResponse(status_code=403, content={"error": "Access forbidden. You can only reprocess your own conversations."})
 
-        # Get audio_uuid and file path from conversation
-        audio_uuid = conversation_model.audio_uuid
-        audio_path = conversation_model.audio_path
-
-        if not audio_path:
-            return JSONResponse(
-                status_code=400, content={"error": "No audio file found for this conversation"}
-            )
-
-        # Check if file exists - try multiple possible locations
-        possible_paths = [
-            Path("/app/audio_chunks") / audio_path,
-            Path(audio_path),  # fallback to relative path
-        ]
-
-        full_audio_path = None
-        for path in possible_paths:
-            if path.exists():
-                full_audio_path = path
-                break
+        # Get audio_uuid from conversation
+        # Validate audio chunks exist in MongoDB
+        chunks = await AudioChunkDocument.find(
+            AudioChunkDocument.conversation_id == conversation_id
+        ).to_list()
 
-        if not full_audio_path:
+        if not chunks:
             return JSONResponse(
-                status_code=422,
+                status_code=404,
                 content={
-                    "error": "Audio file not found on disk",
-                    "details": f"Conversation exists but audio file '{audio_path}' is missing from expected locations",
-                    "searched_paths": [str(p) for p in possible_paths]
+                    "error": "No audio data found for this conversation",
+                    "details": f"Conversation '{conversation_id}' exists but has no audio chunks in MongoDB"
                 }
             )
 
         # Create new transcript version ID
-        import uuid
         version_id = str(uuid.uuid4())
 
-        # Enqueue job chain with RQ (transcription -> speaker recognition -> cropping -> memory)
-        from advanced_omi_backend.workers.transcription_jobs import transcribe_full_audio_job
-        from advanced_omi_backend.workers.speaker_jobs import recognise_speakers_job
-        from advanced_omi_backend.workers.audio_jobs import process_cropping_job
-        from advanced_omi_backend.workers.memory_jobs import process_memory_job
-        from advanced_omi_backend.controllers.queue_controller import transcription_queue, memory_queue, default_queue, JOB_RESULT_TTL
+        # Enqueue job chain with RQ (transcription -> speaker recognition -> memory)
+        from advanced_omi_backend.workers.transcription_jobs import (
+            transcribe_full_audio_job,
+        )
 
-        # Job 1: Transcribe audio to text
+        # Job 1: Transcribe audio to text (reconstructs from MongoDB chunks)
         transcript_job = transcription_queue.enqueue(
             transcribe_full_audio_job,
             conversation_id,
-            audio_uuid,
-            str(full_audio_path),
             version_id,
             "reprocess",
             job_timeout=600,
             result_ttl=JOB_RESULT_TTL,
             job_id=f"reprocess_{conversation_id[:8]}",
             description=f"Transcribe audio for {conversation_id[:8]}",
-            meta={'audio_uuid': audio_uuid, 'conversation_id': conversation_id}
+            meta={'conversation_id': conversation_id}
         )
         logger.info(f"📥 RQ: Enqueued transcription job {transcript_job.id}")
 
-        # Job 2: Recognize speakers (depends on transcription)
-        speaker_job = transcription_queue.enqueue(
-            recognise_speakers_job,
-            conversation_id,
-            version_id,
-            str(full_audio_path),
-            "",  # transcript_text - will be read from DB
-            [],  # words - will be read from DB
-            depends_on=transcript_job,
-            job_timeout=600,
-            result_ttl=JOB_RESULT_TTL,
-            job_id=f"speaker_{conversation_id[:8]}",
-            description=f"Recognize speakers for {conversation_id[:8]}",
-            meta={'audio_uuid': audio_uuid, 'conversation_id': conversation_id}
-        )
-        logger.info(f"📥 RQ: Enqueued speaker recognition job {speaker_job.id} (depends on {transcript_job.id})")
-
-        # Job 3: Audio cropping (depends on speaker recognition)
-        cropping_job = default_queue.enqueue(
-            process_cropping_job,
-            conversation_id,
-            str(full_audio_path),
-            depends_on=speaker_job,
-            job_timeout=300,
-            result_ttl=JOB_RESULT_TTL,
-            job_id=f"crop_{conversation_id[:8]}",
-            description=f"Crop audio for {conversation_id[:8]}",
-            meta={'audio_uuid': audio_uuid, 'conversation_id': conversation_id}
-        )
-        logger.info(f"📥 RQ: Enqueued audio cropping job {cropping_job.id} (depends on {speaker_job.id})")
+        # Check if speaker recognition is enabled
+        speaker_config = get_service_config('speaker_recognition')
+        speaker_enabled = speaker_config.get('enabled', True)  # Default to True for backward compatibility
+
+        # Job 2: Recognize speakers (conditional - only if enabled)
+        speaker_dependency = transcript_job  # Start with transcription job
+        speaker_job = None
+
+        if speaker_enabled:
+            speaker_job = transcription_queue.enqueue(
+                recognise_speakers_job,
+                conversation_id,
+                version_id,
+                depends_on=transcript_job,
+                job_timeout=600,
+                result_ttl=JOB_RESULT_TTL,
+                job_id=f"speaker_{conversation_id[:8]}",
+                description=f"Recognize speakers for {conversation_id[:8]}",
+                meta={'conversation_id': conversation_id}
+            )
+            speaker_dependency = speaker_job  # Chain for next job
+            logger.info(f"📥 RQ: Enqueued speaker recognition job {speaker_job.id} (depends on {transcript_job.id})")
+        else:
+            logger.info(f"⏭️  Speaker recognition disabled, skipping speaker job for conversation {conversation_id[:8]}")
 
-        # Job 4: Extract memories (depends on cropping)
+        # Job 3: Extract memories
+        # Depends on speaker job if it was created, otherwise depends on transcription
         # Note: redis_client is injected by @async_job decorator, don't pass it directly
         memory_job = memory_queue.enqueue(
             process_memory_job,
             conversation_id,
-            depends_on=cropping_job,
+            depends_on=speaker_dependency,  # Either speaker_job or transcript_job
             job_timeout=1800,
             result_ttl=JOB_RESULT_TTL,
             job_id=f"memory_{conversation_id[:8]}",
             description=f"Extract memories for {conversation_id[:8]}",
-            meta={'audio_uuid': audio_uuid, 'conversation_id': conversation_id}
+            meta={'conversation_id': conversation_id}
         )
-        logger.info(f"📥 RQ: Enqueued memory job {memory_job.id} (depends on {cropping_job.id})")
+        if speaker_job:
+            logger.info(f"📥 RQ: Enqueued memory job {memory_job.id} (depends on speaker job {speaker_job.id})")
+        else:
+            logger.info(f"📥 RQ: Enqueued memory job {memory_job.id} (depends on transcript job {transcript_job.id})")
 
         job = transcript_job  # For backward compatibility with return value
         logger.info(f"Created transcript reprocessing job {job.id} (version: {version_id}) for conversation {conversation_id}")
@@ -439,12 +542,9 @@ async def reprocess_memory(conversation_id: str, transcript_version_id: str, use
             )
 
         # Create new memory version ID
-        import uuid
         version_id = str(uuid.uuid4())
 
         # Enqueue memory processing job with RQ (RQ handles job tracking)
-        from advanced_omi_backend.workers.memory_jobs import enqueue_memory_processing
-        from advanced_omi_backend.models.job import JobPriority
 
         job = enqueue_memory_processing(
             client_id=conversation_model.client_id,
@@ -469,6 +569,172 @@ async def reprocess_memory(conversation_id: str, transcript_version_id: str, use
         return JSONResponse(status_code=500, content={"error": "Error starting memory reprocessing"})
 
 
+async def reprocess_speakers(
+    conversation_id: str,
+    transcript_version_id: str,
+    user: User
+):
+    """
+    Reprocess speaker identification for a specific transcript version.
+    Users can only reprocess their own conversations.
+
+    Creates NEW transcript version with same text/words but re-identified speakers.
+    Automatically chains memory reprocessing since speaker attribution affects meaning.
+    """
+    try:
+        # 1. Find conversation and validate ownership
+        conversation_model = await Conversation.find_one(
+            Conversation.conversation_id == conversation_id
+        )
+        if not conversation_model:
+            return JSONResponse(
+                status_code=404,
+                content={"error": "Conversation not found"}
+            )
+
+        # Check ownership for non-admin users
+        if not user.is_superuser and conversation_model.user_id != str(user.user_id):
+            return JSONResponse(
+                status_code=403,
+                content={"error": "Access forbidden. You can only reprocess your own conversations."}
+            )
+
+        # 2. Resolve source transcript version ID (handle "active" special case)
+        source_version_id = transcript_version_id
+        if source_version_id == "active":
+            active_version_id = conversation_model.active_transcript_version
+            if not active_version_id:
+                return JSONResponse(
+                    status_code=404,
+                    content={"error": "No active transcript version found"}
+                )
+            source_version_id = active_version_id
+
+        # 3. Find and validate the source transcript version
+        source_version = None
+        for version in conversation_model.transcript_versions:
+            if version.version_id == source_version_id:
+                source_version = version
+                break
+
+        if not source_version:
+            return JSONResponse(
+                status_code=404,
+                content={"error": f"Transcript version '{source_version_id}' not found"}
+            )
+
+        # 4. Validate transcript has content and words
+        if not source_version.transcript:
+            return JSONResponse(
+                status_code=400,
+                content={"error": "Cannot re-diarize empty transcript. Transcript version has no text."}
+            )
+
+        if not source_version.words:
+            return JSONResponse(
+                status_code=400,
+                content={"error": "Cannot re-diarize transcript without word timings. Words are required for diarization."}
+            )
+
+        # 5. Check if speaker recognition is enabled
+        speaker_config = get_service_config('speaker_recognition')
+        if not speaker_config.get('enabled', True):
+            return JSONResponse(
+                status_code=400,
+                content={
+                    "error": "Speaker recognition is disabled",
+                    "details": "Enable speaker service in config to use this feature"
+                }
+            )
+
+        # 6. Create NEW transcript version (copy text/words, empty segments)
+        new_version_id = str(uuid.uuid4())
+
+        # Add new version with copied text/words but empty segments
+        # Speaker job will populate segments with re-identified speakers
+        conversation_model.add_transcript_version(
+            version_id=new_version_id,
+            transcript=source_version.transcript,  # COPY transcript text
+            words=source_version.words,  # COPY word timings
+            segments=[],  # Empty - will be populated by speaker job
+            provider=source_version.provider,
+            model=source_version.model,
+            processing_time_seconds=None,  # Will be updated by job
+            metadata={
+                "reprocessing_type": "speaker_diarization",
+                "source_version_id": source_version_id,
+                "trigger": "manual_reprocess"
+            },
+            set_as_active=True  # Set new version as active
+        )
+
+        # Save conversation with new version
+        await conversation_model.save()
+
+        logger.info(
+            f"Created new transcript version {new_version_id} from source {source_version_id} "
+            f"for conversation {conversation_id}"
+        )
+
+        # 7. Enqueue speaker recognition job with NEW version_id
+        speaker_job = transcription_queue.enqueue(
+            recognise_speakers_job,
+            conversation_id,
+            new_version_id,  # NEW version (not source)
+            job_timeout=1200,  # 20 minutes
+            result_ttl=JOB_RESULT_TTL,
+            job_id=f"reprocess_speaker_{conversation_id[:12]}",
+            description=f"Re-diarize speakers for {conversation_id[:8]}",
+            meta={
+                'conversation_id': conversation_id,
+                'version_id': new_version_id,
+                'source_version_id': source_version_id,
+                'trigger': 'reprocess'
+            }
+        )
+
+        logger.info(
+            f"Enqueued speaker reprocessing job {speaker_job.id} "
+            f"for new version {new_version_id}"
+        )
+
+        # 8. Chain memory reprocessing (speaker changes affect memory context)
+        memory_job = memory_queue.enqueue(
+            process_memory_job,
+            conversation_id,
+            depends_on=speaker_job,
+            job_timeout=1800,  # 30 minutes
+            result_ttl=JOB_RESULT_TTL,
+            job_id=f"memory_{conversation_id[:12]}",
+            description=f"Extract memories for {conversation_id[:8]}",
+            meta={
+                'conversation_id': conversation_id,
+                'trigger': 'reprocess_after_speaker'
+            }
+        )
+
+        logger.info(
+            f"Chained memory reprocessing job {memory_job.id} "
+            f"after speaker job {speaker_job.id}"
+        )
+
+        # 9. Return job information
+        return JSONResponse(content={
+            "message": "Speaker reprocessing started",
+            "job_id": speaker_job.id,
+            "version_id": new_version_id,  # NEW version ID
+            "source_version_id": source_version_id,  # Original version used as source
+            "status": "queued"
+        })
+
+    except Exception as e:
+        logger.error(f"Error starting speaker reprocessing: {e}")
+        return JSONResponse(
+            status_code=500,
+            content={"error": "Error starting speaker reprocessing"}
+        )
+
+
 async def activate_transcript_version(conversation_id: str, version_id: str, user: User):
     """Activate a specific transcript version. Users can only modify their own conversations."""
     try:
diff --git a/backends/advanced/src/advanced_omi_backend/controllers/memory_controller.py b/backends/advanced/src/advanced_omi_backend/controllers/memory_controller.py
index f52167de..5abf4b36 100644
--- a/backends/advanced/src/advanced_omi_backend/controllers/memory_controller.py
+++ b/backends/advanced/src/advanced_omi_backend/controllers/memory_controller.py
@@ -139,33 +139,6 @@ async def delete_memory(memory_id: str, user: User):
         )
 
 
-async def get_memories_unfiltered(user: User, limit: int, user_id: Optional[str] = None):
-    """Get all memories including fallback transcript memories (for debugging). Users see only their own memories, admins can see all or filter by user."""
-    try:
-        memory_service = get_memory_service()
-
-        # Determine which user's memories to fetch
-        target_user_id = user.user_id
-        if user.is_superuser and user_id:
-            target_user_id = user_id
-
-        # Execute memory retrieval directly (now async)
-        memories = await memory_service.get_all_memories_unfiltered(target_user_id, limit)
-
-        return {
-            "memories": memories,
-            "count": len(memories),
-            "user_id": target_user_id,
-            "includes_fallback": True,
-        }
-
-    except Exception as e:
-        audio_logger.error(f"Error fetching unfiltered memories: {e}", exc_info=True)
-        return JSONResponse(
-            status_code=500, content={"message": f"Error fetching unfiltered memories: {str(e)}"}
-        )
-
-
 async def add_memory(content: str, user: User, source_id: Optional[str] = None):
     """Add a memory directly from content text. Extracts structured memories from the provided content."""
     try:
diff --git a/backends/advanced/src/advanced_omi_backend/controllers/queue_controller.py b/backends/advanced/src/advanced_omi_backend/controllers/queue_controller.py
index 91773756..2d0577e7 100644
--- a/backends/advanced/src/advanced_omi_backend/controllers/queue_controller.py
+++ b/backends/advanced/src/advanced_omi_backend/controllers/queue_controller.py
@@ -17,11 +17,12 @@
 
 import redis
 from rq import Queue, Worker
-from rq.job import Job
+from rq.job import Job, JobStatus
 from rq.registry import ScheduledJobRegistry, DeferredJobRegistry
 
 from advanced_omi_backend.models.job import JobPriority
 from advanced_omi_backend.models.conversation import Conversation
+from advanced_omi_backend.config_loader import get_service_config
 
 logger = logging.getLogger(__name__)
 
@@ -29,6 +30,52 @@
 REDIS_URL = os.getenv("REDIS_URL", "redis://localhost:6379/0")
 redis_conn = redis.from_url(REDIS_URL)
 
+
+def get_job_status_from_rq(job: Job) -> str:
+    """
+    Get job status using RQ's native method.
+
+    Uses job.get_status() which is the Redis Queue standard approach.
+    Returns RQ's standard status names.
+
+    Returns one of: queued, started, finished, failed, deferred, scheduled, canceled, stopped
+
+    Raises:
+        RuntimeError: If job status is unexpected (should never happen with RQ's method)
+    """
+    rq_status = job.get_status()
+
+    # RQ returns status as JobStatus enum or string
+    # Convert to string if it's an enum
+    if isinstance(rq_status, JobStatus):
+        status_str = rq_status.value
+    else:
+        status_str = str(rq_status)
+
+    # Validate it's a known RQ status
+    valid_statuses = {
+        JobStatus.QUEUED.value,
+        JobStatus.STARTED.value,
+        JobStatus.FINISHED.value,
+        JobStatus.FAILED.value,
+        JobStatus.DEFERRED.value,
+        JobStatus.SCHEDULED.value,
+        JobStatus.CANCELED.value,
+        JobStatus.STOPPED.value,
+    }
+
+    if status_str not in valid_statuses:
+        logger.error(
+            f"Job {job.id} has unexpected RQ status: {status_str}. "
+            f"This indicates RQ library added a new status we don't know about."
+        )
+        raise RuntimeError(
+            f"Job {job.id} has unknown RQ status: {status_str}. "
+            f"Please update get_job_status_from_rq() to handle this new status."
+        )
+
+    return status_str
+
 # Queue name constants
 TRANSCRIPTION_QUEUE = "transcription"
 MEMORY_QUEUE = "memory"
@@ -60,34 +107,34 @@ def get_queue(queue_name: str = DEFAULT_QUEUE) -> Queue:
 
 
 def get_job_stats() -> Dict[str, Any]:
-    """Get statistics about jobs in all queues matching frontend expectations."""
+    """Get statistics about jobs in all queues using RQ standard status names."""
     total_jobs = 0
     queued_jobs = 0
-    processing_jobs = 0
-    completed_jobs = 0
+    started_jobs = 0  # RQ standard: "started" not "processing"
+    finished_jobs = 0  # RQ standard: "finished" not "completed"
     failed_jobs = 0
-    cancelled_jobs = 0
+    canceled_jobs = 0  # RQ standard: "canceled" not "cancelled"
     deferred_jobs = 0  # Jobs waiting for dependencies (depends_on)
 
     for queue_name in QUEUE_NAMES:
         queue = get_queue(queue_name)
 
         queued_jobs += len(queue)
-        processing_jobs += len(queue.started_job_registry)
-        completed_jobs += len(queue.finished_job_registry)
+        started_jobs += len(queue.started_job_registry)
+        finished_jobs += len(queue.finished_job_registry)
         failed_jobs += len(queue.failed_job_registry)
-        cancelled_jobs += len(queue.canceled_job_registry)
+        canceled_jobs += len(queue.canceled_job_registry)
         deferred_jobs += len(queue.deferred_job_registry)
 
-    total_jobs = queued_jobs + processing_jobs + completed_jobs + failed_jobs + cancelled_jobs + deferred_jobs
+    total_jobs = queued_jobs + started_jobs + finished_jobs + failed_jobs + canceled_jobs + deferred_jobs
 
     return {
         "total_jobs": total_jobs,
         "queued_jobs": queued_jobs,
-        "processing_jobs": processing_jobs,
-        "completed_jobs": completed_jobs,
+        "started_jobs": started_jobs,
+        "finished_jobs": finished_jobs,
         "failed_jobs": failed_jobs,
-        "cancelled_jobs": cancelled_jobs,
+        "canceled_jobs": canceled_jobs,
         "deferred_jobs": deferred_jobs,
         "timestamp": datetime.utcnow().isoformat()
     }
@@ -113,24 +160,32 @@ def get_jobs(
     Returns:
         Dict with jobs list and pagination metadata matching frontend expectations
     """
+    logger.info(f"🔍 DEBUG get_jobs: Filtering - queue_name={queue_name}, job_type={job_type}, client_id={client_id}")
     all_jobs = []
+    seen_job_ids = set()  # Track which job IDs we've already processed to avoid duplicates
 
     queues_to_check = [queue_name] if queue_name else QUEUE_NAMES
+    logger.info(f"🔍 DEBUG get_jobs: Checking queues: {queues_to_check}")
 
     for qname in queues_to_check:
         queue = get_queue(qname)
 
-        # Collect jobs from all registries
+        # Collect jobs from all registries (using RQ standard status names)
         registries = [
             (queue.job_ids, "queued"),
-            (queue.started_job_registry.get_job_ids(), "processing"),
-            (queue.finished_job_registry.get_job_ids(), "completed"),
+            (queue.started_job_registry.get_job_ids(), "started"),  # RQ standard, not "processing"
+            (queue.finished_job_registry.get_job_ids(), "finished"),  # RQ standard, not "completed"
             (queue.failed_job_registry.get_job_ids(), "failed"),
             (queue.deferred_job_registry.get_job_ids(), "deferred"),  # Jobs waiting for dependencies
         ]
 
         for job_ids, status in registries:
             for job_id in job_ids:
+                # Skip if we've already processed this job_id (prevents duplicates across registries)
+                if job_id in seen_job_ids:
+                    continue
+                seen_job_ids.add(job_id)
+
                 try:
                     job = Job.fetch(job_id, connection=redis_conn)
 
@@ -140,16 +195,23 @@ def get_jobs(
                     # Extract just the function name (e.g., "listen_for_speech_job" from "module.listen_for_speech_job")
                     func_name = job.func_name.split('.')[-1] if job.func_name else "unknown"
 
+                    # Debug: Log job details before filtering
+                    logger.debug(f"🔍 DEBUG get_jobs: Job {job_id} - func_name={func_name}, full_func_name={job.func_name}, meta_client_id={job.meta.get('client_id', '') if job.meta else ''}, status={status}")
+
                     # Apply job_type filter
                     if job_type and job_type not in func_name:
+                        logger.debug(f"🔍 DEBUG get_jobs: Filtered out {job_id} - job_type '{job_type}' not in func_name '{func_name}'")
                         continue
 
                     # Apply client_id filter (partial match in meta)
                     if client_id:
                         job_client_id = job.meta.get("client_id", "") if job.meta else ""
                         if client_id not in job_client_id:
+                            logger.debug(f"🔍 DEBUG get_jobs: Filtered out {job_id} - client_id '{client_id}' not in job_client_id '{job_client_id}'")
                             continue
 
+                    logger.debug(f"🔍 DEBUG get_jobs: Including job {job_id} in results")
+
                     all_jobs.append({
                         "job_id": job.id,
                         "job_type": func_name,
@@ -182,6 +244,8 @@ def get_jobs(
     paginated_jobs = all_jobs[offset:offset + limit]
     has_more = (offset + limit) < total_jobs
 
+    logger.info(f"🔍 DEBUG get_jobs: Found {total_jobs} matching jobs (returning {len(paginated_jobs)} after pagination)")
+
     return {
         "jobs": paginated_jobs,
         "pagination": {
@@ -193,15 +257,15 @@ def get_jobs(
     }
 
 
-def all_jobs_complete_for_session(session_id: str) -> bool:
+def all_jobs_complete_for_client(client_id: str) -> bool:
     """
-    Check if all jobs associated with a session are in terminal states.
+    Check if all jobs associated with a client are in terminal states.
 
-    Only checks jobs with audio_uuid in job.meta (no backward compatibility).
+    Checks jobs with client_id in job.meta.
     Traverses dependency chains to include dependent jobs.
 
     Args:
-        session_id: The audio_uuid (session ID) to check jobs for
+        client_id: The client device identifier to check jobs for
 
     Returns:
         True if all jobs are complete (or no jobs found), False if any job is still processing
@@ -230,7 +294,7 @@ def is_job_complete(job):
 
         return True
 
-    # Find all jobs for this session
+    # Find all jobs for this client
     all_queues = [transcription_queue, memory_queue, audio_queue, default_queue]
     for queue in all_queues:
         registries = [
@@ -248,8 +312,8 @@ def is_job_complete(job):
                 try:
                     job = Job.fetch(job_id, connection=redis_conn)
 
-                    # Only check jobs with audio_uuid in meta
-                    if job.meta and job.meta.get('audio_uuid') == session_id:
+                    # Only check jobs with client_id in meta
+                    if job.meta and job.meta.get('client_id') == client_id:
                         if not is_job_complete(job):
                             return False
                 except Exception as e:
@@ -271,14 +335,16 @@ def start_streaming_jobs(
     2. Audio persistence job - writes audio chunks to WAV file (file rotation per conversation)
 
     Args:
-        session_id: Stream session ID (audio_uuid)
+        session_id: Stream session ID (equals client_id for streaming)
         user_id: User identifier
         client_id: Client identifier
 
     Returns:
         Dict with job IDs: {'speech_detection': job_id, 'audio_persistence': job_id}
 
-    Note: user_email is fetched from the database when needed.
+    Note:
+        - user_email is fetched from the database when needed.
+        - always_persist setting is read from global config by the audio persistence job.
     """
     from advanced_omi_backend.workers.transcription_jobs import stream_speech_detection_job
     from advanced_omi_backend.workers.audio_jobs import audio_streaming_persistence_job
@@ -290,12 +356,22 @@ def start_streaming_jobs(
         user_id,
         client_id,
         job_timeout=86400,  # 24 hours for all-day sessions
-        result_ttl=JOB_RESULT_TTL,
+        ttl=None,  # No pre-run expiry (job can wait indefinitely in queue)
+        result_ttl=JOB_RESULT_TTL,  # Cleanup AFTER completion
+        failure_ttl=86400,  # Cleanup failed jobs after 24h
         job_id=f"speech-detect_{session_id[:12]}",
         description=f"Listening for speech...",
-        meta={'audio_uuid': session_id, 'client_id': client_id, 'session_level': True}
+        meta={'client_id': client_id, 'session_level': True}
     )
+    # Log job enqueue with TTL information for debugging
+    actual_ttl = redis_conn.ttl(f"rq:job:{speech_job.id}")
     logger.info(f"📥 RQ: Enqueued speech detection job {speech_job.id}")
+    logger.info(
+        f"🔍 Job enqueue details: ID={speech_job.id}, "
+        f"job_timeout={speech_job.timeout}, result_ttl={speech_job.result_ttl}, "
+        f"failure_ttl={speech_job.failure_ttl}, redis_key_ttl={actual_ttl}, "
+        f"queue_length={transcription_queue.count}, client_id={client_id}"
+    )
 
     # Store job ID for cleanup (keyed by client_id for easy WebSocket cleanup)
     try:
@@ -307,18 +383,29 @@ def start_streaming_jobs(
     # Enqueue audio persistence job on dedicated audio queue
     # NOTE: This job handles file rotation for multiple conversations automatically
     # Runs for entire session, not tied to individual conversations
+    # The job reads always_persist_enabled from global config internally
     audio_job = audio_queue.enqueue(
         audio_streaming_persistence_job,
         session_id,
         user_id,
         client_id,
         job_timeout=86400,  # 24 hours for all-day sessions
-        result_ttl=JOB_RESULT_TTL,
+        ttl=None,  # No pre-run expiry (job can wait indefinitely in queue)
+        result_ttl=JOB_RESULT_TTL,  # Cleanup AFTER completion
+        failure_ttl=86400,  # Cleanup failed jobs after 24h
         job_id=f"audio-persist_{session_id[:12]}",
         description=f"Audio persistence for session {session_id[:12]}",
-        meta={'audio_uuid': session_id, 'session_level': True}  # Mark as session-level job
+        meta={'client_id': client_id, 'session_level': True}  # Mark as session-level job
     )
+    # Log job enqueue with TTL information for debugging
+    actual_ttl = redis_conn.ttl(f"rq:job:{audio_job.id}")
     logger.info(f"📥 RQ: Enqueued audio persistence job {audio_job.id} on audio queue")
+    logger.info(
+        f"🔍 Job enqueue details: ID={audio_job.id}, "
+        f"job_timeout={audio_job.timeout}, result_ttl={audio_job.result_ttl}, "
+        f"failure_ttl={audio_job.failure_ttl}, redis_key_ttl={actual_ttl}, "
+        f"queue_length={audio_queue.count}, client_id={client_id}"
+    )
 
     return {
         'speech_detection': speech_job.id,
@@ -328,151 +415,169 @@ def start_streaming_jobs(
 
 def start_post_conversation_jobs(
     conversation_id: str,
-    audio_uuid: str,
-    audio_file_path: str,
     user_id: str,
-    post_transcription: bool = True,
     transcript_version_id: Optional[str] = None,
     depends_on_job = None,
-    client_id: Optional[str] = None
+    client_id: Optional[str] = None,
+    end_reason: str = "file_upload"
 ) -> Dict[str, str]:
     """
     Start post-conversation processing jobs after conversation is created.
 
     This creates the standard processing chain after a conversation is created:
-    1. [Optional] Transcription job - Batch transcription (if post_transcription=True)
-    2. Audio cropping job - Removes silence from audio
-    3. Speaker recognition job - Identifies speakers in audio
-    4. Memory extraction job - Extracts memories from conversation (parallel)
-    5. Title/summary generation job - Generates title and summary (parallel)
+    1. Speaker recognition job - Identifies speakers in audio segments
+    2. Memory extraction job - Extracts memories from conversation
+    3. Title/summary generation job - Generates title and summary
+    4. Event dispatch job - Triggers conversation.complete plugins
+
+    Note: Batch transcription removed - streaming conversations use streaming transcript.
+    For file uploads, batch transcription must be enqueued separately before calling this function.
 
     Args:
         conversation_id: Conversation identifier
-        audio_uuid: Audio UUID for job tracking
-        audio_file_path: Path to audio file
         user_id: User identifier
-        post_transcription: If True, run batch transcription step (for uploads)
-                           If False, skip transcription (streaming already has it)
         transcript_version_id: Transcript version ID (auto-generated if None)
-        depends_on_job: Optional job dependency for cropping job
+        depends_on_job: Optional job dependency for first job (e.g., transcription for file uploads)
+        client_id: Client ID for UI tracking
+        end_reason: Reason conversation ended (e.g., 'file_upload', 'websocket_disconnect', 'user_stopped')
 
     Returns:
-        Dict with job IDs (transcription will be None if post_transcription=False)
+        Dict with job IDs for speaker_recognition, memory, title_summary, event_dispatch
     """
-    from advanced_omi_backend.workers.transcription_jobs import transcribe_full_audio_job
     from advanced_omi_backend.workers.speaker_jobs import recognise_speakers_job
-    from advanced_omi_backend.workers.audio_jobs import process_cropping_job
     from advanced_omi_backend.workers.memory_jobs import process_memory_job
-    from advanced_omi_backend.workers.conversation_jobs import generate_title_summary_job
+    from advanced_omi_backend.workers.conversation_jobs import generate_title_summary_job, dispatch_conversation_complete_event_job
 
     version_id = transcript_version_id or str(uuid.uuid4())
 
     # Build job metadata (include client_id if provided for UI tracking)
-    job_meta = {'audio_uuid': audio_uuid, 'conversation_id': conversation_id}
+    job_meta = {'conversation_id': conversation_id}
     if client_id:
         job_meta['client_id'] = client_id
 
-    # Step 1: Batch transcription job (ALWAYS run to get correct conversation-relative timestamps)
-    # Even for streaming, we need batch transcription before cropping to fix cumulative timestamps
-    transcribe_job_id = f"transcribe_{conversation_id[:12]}"
-    logger.info(f"🔍 DEBUG: Creating transcribe job with job_id={transcribe_job_id}, conversation_id={conversation_id[:12]}, audio_uuid={audio_uuid[:12]}")
-
-    transcription_job = transcription_queue.enqueue(
-        transcribe_full_audio_job,
-        conversation_id,
-        audio_uuid,
-        audio_file_path,
-        version_id,
-        "batch",  # trigger
-        job_timeout=1800,  # 30 minutes
-        result_ttl=JOB_RESULT_TTL,
-        depends_on=depends_on_job,
-        job_id=transcribe_job_id,
-        description=f"Transcribe conversation {conversation_id[:8]}",
-        meta=job_meta
-    )
-    logger.info(f"📥 RQ: Enqueued transcription job {transcription_job.id}, meta={transcription_job.meta}")
-    crop_depends_on = transcription_job
-
-    # Step 2: Audio cropping job (depends on transcription if it ran, otherwise depends_on_job)
-    crop_job_id = f"crop_{conversation_id[:12]}"
-    logger.info(f"🔍 DEBUG: Creating crop job with job_id={crop_job_id}, conversation_id={conversation_id[:12]}, audio_uuid={audio_uuid[:12]}")
-
-    cropping_job = default_queue.enqueue(
-        process_cropping_job,
-        conversation_id,
-        audio_file_path,
-        job_timeout=300,  # 5 minutes
-        result_ttl=JOB_RESULT_TTL,
-        depends_on=crop_depends_on,
-        job_id=crop_job_id,
-        description=f"Crop audio for conversation {conversation_id[:8]}",
-        meta=job_meta
-    )
-    logger.info(f"📥 RQ: Enqueued cropping job {cropping_job.id}, meta={cropping_job.meta}")
-
-    # Speaker recognition depends on cropping
-    speaker_depends_on = cropping_job
-
-    # Step 3: Speaker recognition job
-    speaker_job_id = f"speaker_{conversation_id[:12]}"
-    logger.info(f"🔍 DEBUG: Creating speaker job with job_id={speaker_job_id}, conversation_id={conversation_id[:12]}, audio_uuid={audio_uuid[:12]}")
-
-    speaker_job = transcription_queue.enqueue(
-        recognise_speakers_job,
-        conversation_id,
-        version_id,
-        audio_file_path,
-        "",  # transcript_text - will be read from DB
-        [],  # words - will be read from DB
-        job_timeout=1200,  # 20 minutes
-        result_ttl=JOB_RESULT_TTL,
-        depends_on=speaker_depends_on,
-        job_id=speaker_job_id,
-        description=f"Speaker recognition for conversation {conversation_id[:8]}",
-        meta=job_meta
-    )
-    logger.info(f"📥 RQ: Enqueued speaker recognition job {speaker_job.id}, meta={speaker_job.meta} (depends on {speaker_depends_on.id})")
-
-    # Step 4: Memory extraction job (parallel with title/summary)
-    memory_job_id = f"memory_{conversation_id[:12]}"
-    logger.info(f"🔍 DEBUG: Creating memory job with job_id={memory_job_id}, conversation_id={conversation_id[:12]}, audio_uuid={audio_uuid[:12]}")
-
-    memory_job = memory_queue.enqueue(
-        process_memory_job,
-        conversation_id,
-        job_timeout=900,  # 15 minutes
-        result_ttl=JOB_RESULT_TTL,
-        depends_on=speaker_job,
-        job_id=memory_job_id,
-        description=f"Memory extraction for conversation {conversation_id[:8]}",
-        meta=job_meta
-    )
-    logger.info(f"📥 RQ: Enqueued memory extraction job {memory_job.id}, meta={memory_job.meta} (depends on {speaker_job.id})")
+    # Check if speaker recognition is enabled
+    speaker_config = get_service_config('speaker_recognition')
+    speaker_enabled = speaker_config.get('enabled', True)  # Default to True for backward compatibility
+
+    # Step 1: Speaker recognition job (conditional - only if enabled)
+    speaker_dependency = depends_on_job  # Start with upstream dependency (transcription if file upload)
+    speaker_job = None
+
+    if speaker_enabled:
+        speaker_job_id = f"speaker_{conversation_id[:12]}"
+        logger.info(f"🔍 DEBUG: Creating speaker job with job_id={speaker_job_id}, conversation_id={conversation_id[:12]}")
+
+        speaker_job = transcription_queue.enqueue(
+            recognise_speakers_job,
+            conversation_id,
+            version_id,
+            job_timeout=1200,  # 20 minutes
+            result_ttl=JOB_RESULT_TTL,
+            depends_on=speaker_dependency,
+            job_id=speaker_job_id,
+            description=f"Speaker recognition for conversation {conversation_id[:8]}",
+            meta=job_meta
+        )
+        speaker_dependency = speaker_job  # Chain for next jobs
+        if depends_on_job:
+            logger.info(f"📥 RQ: Enqueued speaker recognition job {speaker_job.id}, meta={speaker_job.meta} (depends on {depends_on_job.id})")
+        else:
+            logger.info(f"📥 RQ: Enqueued speaker recognition job {speaker_job.id}, meta={speaker_job.meta} (no dependencies, starts immediately)")
+    else:
+        logger.info(f"⏭️  Speaker recognition disabled, skipping speaker job for conversation {conversation_id[:8]}")
+
+    # Step 2: Memory extraction job (conditional - only if enabled)
+    # Check if memory extraction is enabled
+    memory_config = get_service_config('memory.extraction')
+    memory_enabled = memory_config.get('enabled', True)  # Default to True for backward compatibility
+
+    memory_job = None
+    if memory_enabled:
+        # Depends on speaker job if it was created, otherwise depends on upstream (transcription or nothing)
+        memory_job_id = f"memory_{conversation_id[:12]}"
+        logger.info(f"🔍 DEBUG: Creating memory job with job_id={memory_job_id}, conversation_id={conversation_id[:12]}")
+
+        memory_job = memory_queue.enqueue(
+            process_memory_job,
+            conversation_id,
+            job_timeout=900,  # 15 minutes
+            result_ttl=JOB_RESULT_TTL,
+            depends_on=speaker_dependency,  # Either speaker_job or upstream dependency
+            job_id=memory_job_id,
+            description=f"Memory extraction for conversation {conversation_id[:8]}",
+            meta=job_meta
+        )
+        if speaker_job:
+            logger.info(f"📥 RQ: Enqueued memory extraction job {memory_job.id}, meta={memory_job.meta} (depends on speaker job {speaker_job.id})")
+        elif depends_on_job:
+            logger.info(f"📥 RQ: Enqueued memory extraction job {memory_job.id}, meta={memory_job.meta} (depends on {depends_on_job.id})")
+        else:
+            logger.info(f"📥 RQ: Enqueued memory extraction job {memory_job.id}, meta={memory_job.meta} (no dependencies, starts immediately)")
+    else:
+        logger.info(f"⏭️  Memory extraction disabled, skipping memory job for conversation {conversation_id[:8]}")
 
-    # Step 5: Title/summary generation job (parallel with memory, independent)
-    # This ensures conversations always get titles/summaries even if memory job fails
+    # Step 3: Title/summary generation job
+    # Depends on speaker job if enabled, otherwise on upstream dependency
     title_job_id = f"title_summary_{conversation_id[:12]}"
-    logger.info(f"🔍 DEBUG: Creating title/summary job with job_id={title_job_id}, conversation_id={conversation_id[:12]}, audio_uuid={audio_uuid[:12]}")
+    logger.info(f"🔍 DEBUG: Creating title/summary job with job_id={title_job_id}, conversation_id={conversation_id[:12]}")
 
     title_summary_job = default_queue.enqueue(
         generate_title_summary_job,
         conversation_id,
         job_timeout=300,  # 5 minutes
         result_ttl=JOB_RESULT_TTL,
-        depends_on=speaker_job,  # Depends on speaker job, NOT memory job
+        depends_on=speaker_dependency,  # Depends on speaker job if enabled, NOT memory job
         job_id=title_job_id,
         description=f"Generate title and summary for conversation {conversation_id[:8]}",
         meta=job_meta
     )
-    logger.info(f"📥 RQ: Enqueued title/summary job {title_summary_job.id}, meta={title_summary_job.meta} (depends on {speaker_job.id})")
+    if speaker_job:
+        logger.info(f"📥 RQ: Enqueued title/summary job {title_summary_job.id}, meta={title_summary_job.meta} (depends on speaker job {speaker_job.id})")
+    elif depends_on_job:
+        logger.info(f"📥 RQ: Enqueued title/summary job {title_summary_job.id}, meta={title_summary_job.meta} (depends on {depends_on_job.id})")
+    else:
+        logger.info(f"📥 RQ: Enqueued title/summary job {title_summary_job.id}, meta={title_summary_job.meta} (no dependencies, starts immediately)")
+
+    # Step 5: Dispatch conversation.complete event (runs after both memory and title/summary complete)
+    # This ensures plugins receive the event after all processing is done
+    event_job_id = f"event_complete_{conversation_id[:12]}"
+    logger.info(f"🔍 DEBUG: Creating conversation complete event job with job_id={event_job_id}, conversation_id={conversation_id[:12]}")
+
+    # Event job depends on memory and title/summary jobs that were actually enqueued
+    # Build dependency list excluding None values
+    event_dependencies = []
+    if memory_job:
+        event_dependencies.append(memory_job)
+    if title_summary_job:
+        event_dependencies.append(title_summary_job)
+
+    # Enqueue event dispatch job (may have no dependencies if all jobs were skipped)
+    event_dispatch_job = default_queue.enqueue(
+        dispatch_conversation_complete_event_job,
+        conversation_id,
+        client_id or "",
+        user_id,
+        end_reason,  # Use the end_reason parameter (defaults to 'file_upload' for backward compatibility)
+        job_timeout=120,  # 2 minutes
+        result_ttl=JOB_RESULT_TTL,
+        depends_on=event_dependencies if event_dependencies else None,  # Wait for jobs that were enqueued
+        job_id=event_job_id,
+        description=f"Dispatch conversation complete event ({end_reason}) for {conversation_id[:8]}",
+        meta=job_meta
+    )
+
+    # Log event dispatch dependencies
+    if event_dependencies:
+        dep_ids = [job.id for job in event_dependencies]
+        logger.info(f"📥 RQ: Enqueued conversation complete event job {event_dispatch_job.id}, meta={event_dispatch_job.meta} (depends on {', '.join(dep_ids)})")
+    else:
+        logger.info(f"📥 RQ: Enqueued conversation complete event job {event_dispatch_job.id}, meta={event_dispatch_job.meta} (no dependencies, starts immediately)")
 
     return {
-        'cropping': cropping_job.id,
-        'transcription': transcription_job.id if transcription_job else None,
-        'speaker_recognition': speaker_job.id,
-        'memory': memory_job.id,
-        'title_summary': title_summary_job.id
+        'speaker_recognition': speaker_job.id if speaker_job else None,
+        'memory': memory_job.id if memory_job else None,
+        'title_summary': title_summary_job.id,
+        'event_dispatch': event_dispatch_job.id
     }
 
 
diff --git a/backends/advanced/src/advanced_omi_backend/controllers/session_controller.py b/backends/advanced/src/advanced_omi_backend/controllers/session_controller.py
index a3836898..fe9b87cd 100644
--- a/backends/advanced/src/advanced_omi_backend/controllers/session_controller.py
+++ b/backends/advanced/src/advanced_omi_backend/controllers/session_controller.py
@@ -9,13 +9,62 @@
 
 import logging
 import time
-from typing import Dict, List, Optional
+from typing import Dict, List, Optional, Literal
 
 from fastapi.responses import JSONResponse
 
 logger = logging.getLogger(__name__)
 
 
+async def mark_session_complete(
+    redis_client,
+    session_id: str,
+    reason: Literal[
+        "websocket_disconnect",
+        "user_stopped",
+        "inactivity_timeout",
+        "max_duration",
+        "all_jobs_complete"
+    ],
+) -> None:
+    """
+    Single source of truth for marking sessions as complete.
+
+    This function ensures that both 'status' and 'completion_reason' are ALWAYS
+    set together atomically, preventing race conditions where workers check status
+    before completion_reason is set.
+
+    Args:
+        redis_client: Redis async client
+        session_id: Session UUID
+        reason: Why the session is completing (enforced by type system)
+
+    Usage:
+        # WebSocket disconnect
+        await mark_session_complete(redis, session_id, "websocket_disconnect")
+
+        # User manually stopped
+        await mark_session_complete(redis, session_id, "user_stopped")
+
+        # Inactivity timeout
+        await mark_session_complete(redis, session_id, "inactivity_timeout")
+
+        # Max duration reached
+        await mark_session_complete(redis, session_id, "max_duration")
+
+        # All jobs finished
+        await mark_session_complete(redis, session_id, "all_jobs_complete")
+    """
+    session_key = f"audio:session:{session_id}"
+    mark_time = time.time()
+    await redis_client.hset(session_key, mapping={
+        "status": "finished",
+        "completed_at": str(mark_time),
+        "completion_reason": reason
+    })
+    logger.info(f"✅ Session {session_id[:12]} marked finished: {reason} [TIME: {mark_time:.3f}]")
+
+
 async def get_session_info(redis_client, session_id: str) -> Optional[Dict]:
     """
     Get detailed information about a specific session.
@@ -151,7 +200,7 @@ async def get_streaming_status(request):
         transcription_queue,
         memory_queue,
         default_queue,
-        all_jobs_complete_for_session
+        all_jobs_complete_for_client
     )
 
     try:
@@ -181,19 +230,19 @@ async def get_streaming_status(request):
 
             # Separate active and completed sessions
             # Check if all jobs are complete (including failed jobs)
-            all_jobs_done = all_jobs_complete_for_session(session_id)
-
-            # Session is completed if:
-            # 1. Redis status says complete/finalized AND all jobs done, OR
-            # 2. All jobs are done (even if status isn't complete yet)
-            # This ensures sessions with failed jobs move to completed
-            if status in ["complete", "completed", "finalized"] or all_jobs_done:
+            # Note: session_id == client_id in streaming context, but using client_id explicitly
+            all_jobs_done = all_jobs_complete_for_client(session_obj.get("client_id"))
+
+            # Session is finished if:
+            # 1. Redis status says finished AND all jobs done, OR
+            # 2. All jobs are done (even if status isn't finished yet)
+            # This ensures sessions with failed jobs move to finished
+            if status == "finished" or all_jobs_done:
                 if all_jobs_done:
-                    # All jobs complete - this is truly a completed session
-                    # Update Redis status if it wasn't already marked complete
-                    if status not in ["complete", "completed", "finalized"]:
-                        await redis_client.hset(key, "status", "complete")
-                        logger.info(f"✅ Marked session {session_id} as complete (all jobs terminal)")
+                    # All jobs finished - this is truly a finished session
+                    # Update Redis status if it wasn't already marked finished
+                    if status != "finished":
+                        await mark_session_complete(redis_client, session_id, "all_jobs_complete")
 
                     # Get additional session data for completed sessions
                     session_key = f"audio:session:{session_id}"
@@ -204,7 +253,7 @@ async def get_streaming_status(request):
                         "client_id": session_obj.get("client_id", ""),
                         "conversation_id": session_data.get(b"conversation_id", b"").decode() if session_data and b"conversation_id" in session_data else None,
                         "has_conversation": bool(session_data and session_data.get(b"conversation_id", b"")),
-                        "action": session_data.get(b"action", b"complete").decode() if session_data and b"action" in session_data else "complete",
+                        "action": session_data.get(b"action", b"finished").decode() if session_data and b"action" in session_data else "finished",
                         "reason": session_data.get(b"reason", b"").decode() if session_data and b"reason" in session_data else "",
                         "completed_at": session_obj.get("last_chunk_at", 0),
                         "audio_file": session_data.get(b"audio_file", b"").decode() if session_data and b"audio_file" in session_data else "",
@@ -403,26 +452,26 @@ async def get_streaming_status(request):
         rq_stats = {
             "transcription_queue": {
                 "queued": transcription_queue.count,
-                "processing": len(transcription_queue.started_job_registry),
-                "completed": len(transcription_queue.finished_job_registry),
+                "started": len(transcription_queue.started_job_registry),
+                "finished": len(transcription_queue.finished_job_registry),
                 "failed": len(transcription_queue.failed_job_registry),
-                "cancelled": len(transcription_queue.canceled_job_registry),
+                "canceled": len(transcription_queue.canceled_job_registry),
                 "deferred": len(transcription_queue.deferred_job_registry)
             },
             "memory_queue": {
                 "queued": memory_queue.count,
-                "processing": len(memory_queue.started_job_registry),
-                "completed": len(memory_queue.finished_job_registry),
+                "started": len(memory_queue.started_job_registry),
+                "finished": len(memory_queue.finished_job_registry),
                 "failed": len(memory_queue.failed_job_registry),
-                "cancelled": len(memory_queue.canceled_job_registry),
+                "canceled": len(memory_queue.canceled_job_registry),
                 "deferred": len(memory_queue.deferred_job_registry)
             },
             "default_queue": {
                 "queued": default_queue.count,
-                "processing": len(default_queue.started_job_registry),
-                "completed": len(default_queue.finished_job_registry),
+                "started": len(default_queue.started_job_registry),
+                "finished": len(default_queue.finished_job_registry),
                 "failed": len(default_queue.failed_job_registry),
-                "cancelled": len(default_queue.canceled_job_registry),
+                "canceled": len(default_queue.canceled_job_registry),
                 "deferred": len(default_queue.deferred_job_registry)
             }
         }
diff --git a/backends/advanced/src/advanced_omi_backend/controllers/system_controller.py b/backends/advanced/src/advanced_omi_backend/controllers/system_controller.py
index aced763f..55a4b43e 100644
--- a/backends/advanced/src/advanced_omi_backend/controllers/system_controller.py
+++ b/backends/advanced/src/advanced_omi_backend/controllers/system_controller.py
@@ -6,15 +6,22 @@
 import os
 import shutil
 import time
+import warnings
 from datetime import UTC, datetime
+from pathlib import Path
 
 import yaml
 from fastapi import HTTPException
 
 from advanced_omi_backend.config import (
-    load_diarization_settings_from_file,
-    save_diarization_settings_to_file,
+    get_diarization_settings as load_diarization_settings,
+    get_misc_settings as load_misc_settings,
+    save_misc_settings,
 )
+from advanced_omi_backend.config import (
+    save_diarization_settings,
+)
+from advanced_omi_backend.config_loader import get_plugins_yml_path
 from advanced_omi_backend.model_registry import _find_config_path, load_models_config
 from advanced_omi_backend.models.user import User
 
@@ -22,6 +29,201 @@
 audio_logger = logging.getLogger("audio_processing")
 
 
+async def get_config_diagnostics():
+    """
+    Get comprehensive configuration diagnostics.
+    
+    Returns warnings, errors, and status for all configuration components.
+    """
+    diagnostics = {
+        "timestamp": datetime.now(UTC).isoformat(),
+        "overall_status": "healthy",
+        "issues": [],
+        "warnings": [],
+        "info": [],
+        "components": {}
+    }
+    
+    # Test OmegaConf configuration loading
+    try:
+        from advanced_omi_backend.config_loader import load_config
+        
+        # Capture warnings during config load
+        with warnings.catch_warnings(record=True) as w:
+            warnings.simplefilter("always")
+            config = load_config(force_reload=True)
+            
+            # Check for OmegaConf warnings
+            for warning in w:
+                warning_msg = str(warning.message)
+                if "some elements are missing" in warning_msg.lower():
+                    # Extract the variable name from warning
+                    if "variable '" in warning_msg.lower():
+                        var_name = warning_msg.split("'")[1]
+                        diagnostics["warnings"].append({
+                            "component": "OmegaConf",
+                            "severity": "warning",
+                            "message": f"Environment variable '{var_name}' not set (using empty default)",
+                            "resolution": f"Set {var_name} in .env file if needed"
+                        })
+        
+        diagnostics["components"]["omegaconf"] = {
+            "status": "healthy",
+            "message": "Configuration loaded successfully"
+        }
+    except Exception as e:
+        diagnostics["overall_status"] = "unhealthy"
+        diagnostics["issues"].append({
+            "component": "OmegaConf",
+            "severity": "error",
+            "message": f"Failed to load configuration: {str(e)}",
+            "resolution": "Check config/defaults.yml and config/config.yml syntax"
+        })
+        diagnostics["components"]["omegaconf"] = {
+            "status": "unhealthy",
+            "message": str(e)
+        }
+    
+    # Test model registry
+    try:
+        from advanced_omi_backend.model_registry import get_models_registry
+        
+        with warnings.catch_warnings(record=True) as w:
+            warnings.simplefilter("always")
+            registry = get_models_registry()
+            
+            # Capture model loading warnings
+            for warning in w:
+                warning_msg = str(warning.message)
+                diagnostics["warnings"].append({
+                    "component": "Model Registry",
+                    "severity": "warning",
+                    "message": warning_msg,
+                    "resolution": "Check model definitions in config/defaults.yml"
+                })
+        
+        if registry:
+            diagnostics["components"]["model_registry"] = {
+                "status": "healthy",
+                "message": f"Loaded {len(registry.models)} models",
+                "details": {
+                    "total_models": len(registry.models),
+                    "defaults": dict(registry.defaults) if registry.defaults else {}
+                }
+            }
+            
+            # Check critical models
+            stt = registry.get_default("stt")
+            stt_stream = registry.get_default("stt_stream")
+            llm = registry.get_default("llm")
+            
+            # STT check
+            if stt:
+                if stt.api_key:
+                    diagnostics["info"].append({
+                        "component": "STT (Batch)",
+                        "message": f"Configured: {stt.name} ({stt.model_provider}) - API key present"
+                    })
+                else:
+                    diagnostics["warnings"].append({
+                        "component": "STT (Batch)",
+                        "severity": "warning",
+                        "message": f"{stt.name} ({stt.model_provider}) - No API key configured",
+                        "resolution": "Transcription can fail without API key"
+                    })
+            else:
+                diagnostics["issues"].append({
+                    "component": "STT (Batch)",
+                    "severity": "error",
+                    "message": "No batch STT model configured",
+                    "resolution": "Set defaults.stt in config.yml"
+                })
+                diagnostics["overall_status"] = "partial"
+            
+            # Streaming STT check
+            if stt_stream:
+                if stt_stream.api_key:
+                    diagnostics["info"].append({
+                        "component": "STT (Streaming)",
+                        "message": f"Configured: {stt_stream.name} ({stt_stream.model_provider}) - API key present"
+                    })
+                else:
+                    diagnostics["warnings"].append({
+                        "component": "STT (Streaming)",
+                        "severity": "warning",
+                        "message": f"{stt_stream.name} ({stt_stream.model_provider}) - No API key configured",
+                        "resolution": "Real-time transcription can fail without API key"
+                    })
+            else:
+                diagnostics["warnings"].append({
+                    "component": "STT (Streaming)",
+                    "severity": "warning",
+                    "message": "No streaming STT model configured - streaming worker disabled",
+                    "resolution": "Set defaults.stt_stream in config.yml for WebSocket transcription"
+                })
+            
+            # LLM check
+            if llm:
+                if llm.api_key:
+                    diagnostics["info"].append({
+                        "component": "LLM",
+                        "message": f"Configured: {llm.name} ({llm.model_provider}) - API key present"
+                    })
+                else:
+                    diagnostics["warnings"].append({
+                        "component": "LLM",
+                        "severity": "warning",
+                        "message": f"{llm.name} ({llm.model_provider}) - No API key configured",
+                        "resolution": "Memory extraction can fail without API key"
+                    })
+            
+        else:
+            diagnostics["overall_status"] = "unhealthy"
+            diagnostics["issues"].append({
+                "component": "Model Registry",
+                "severity": "error",
+                "message": "Failed to load model registry",
+                "resolution": "Check config/defaults.yml for syntax errors"
+            })
+            diagnostics["components"]["model_registry"] = {
+                "status": "unhealthy",
+                "message": "Registry failed to load"
+            }
+    except Exception as e:
+        diagnostics["overall_status"] = "partial"
+        diagnostics["issues"].append({
+            "component": "Model Registry",
+            "severity": "error",
+            "message": f"Error loading registry: {str(e)}",
+            "resolution": "Check logs for detailed error information"
+        })
+        diagnostics["components"]["model_registry"] = {
+            "status": "unhealthy",
+            "message": str(e)
+        }
+    
+    # Check environment variables
+    env_checks = [
+        ("DEEPGRAM_API_KEY", "Required for Deepgram transcription"),
+        ("OPENAI_API_KEY", "Required for OpenAI LLM and embeddings"),
+        ("AUTH_SECRET_KEY", "Required for authentication"),
+        ("ADMIN_EMAIL", "Required for admin user login"),
+        ("ADMIN_PASSWORD", "Required for admin user login"),
+    ]
+    
+    for env_var, description in env_checks:
+        value = os.getenv(env_var)
+        if not value or value == "":
+            diagnostics["warnings"].append({
+                "component": "Environment Variables",
+                "severity": "warning",
+                "message": f"{env_var} not set - {description}",
+                "resolution": f"Set {env_var} in .env file"
+            })
+    
+    return diagnostics
+
+
 async def get_current_metrics():
     """Get current system metrics."""
     try:
@@ -64,8 +266,8 @@ async def get_auth_config():
 async def get_diarization_settings():
     """Get current diarization settings."""
     try:
-        # Reload from file to get latest settings
-        settings = load_diarization_settings_from_file()
+        # Get settings using OmegaConf
+        settings = load_diarization_settings()
         return {
             "settings": settings,
             "status": "success"
@@ -75,7 +277,7 @@ async def get_diarization_settings():
         raise e
 
 
-async def save_diarization_settings(settings: dict):
+async def save_diarization_settings_controller(settings: dict):
     """Save diarization settings."""
     try:
         # Validate settings
@@ -84,11 +286,13 @@ async def save_diarization_settings(settings: dict):
             "min_duration_off", "min_speakers", "max_speakers"
         }
 
+        # Filter to only valid keys (allow round-trip GET→POST)
+        filtered_settings = {}
         for key, value in settings.items():
             if key not in valid_keys:
-                raise HTTPException(status_code=400, detail=f"Invalid setting key: {key}")
+                continue  # Skip unknown keys instead of rejecting
 
-            # Type validation
+            # Type validation for known keys only
             if key in ["min_speakers", "max_speakers"]:
                 if not isinstance(value, int) or value < 1 or value > 20:
                     raise HTTPException(status_code=400, detail=f"Invalid value for {key}: must be integer 1-20")
@@ -98,34 +302,165 @@ async def save_diarization_settings(settings: dict):
             else:
                 if not isinstance(value, (int, float)) or value < 0:
                     raise HTTPException(status_code=400, detail=f"Invalid value for {key}: must be positive number")
-        
+
+            filtered_settings[key] = value
+
+        # Reject if NO valid keys provided (completely invalid request)
+        if not filtered_settings:
+            raise HTTPException(status_code=400, detail="No valid diarization settings provided")
+
         # Get current settings and merge with new values
-        current_settings = load_diarization_settings_from_file()
-        current_settings.update(settings)
-        
-        # Save to file
-        if save_diarization_settings_to_file(current_settings):
-            logger.info(f"Updated and saved diarization settings: {settings}")
-            
+        current_settings = load_diarization_settings()
+        current_settings.update(filtered_settings)
+
+        # Save using OmegaConf
+        if save_diarization_settings(current_settings):
+            logger.info(f"Updated and saved diarization settings: {filtered_settings}")
+
             return {
                 "message": "Diarization settings saved successfully",
                 "settings": current_settings,
                 "status": "success"
             }
         else:
-            # Even if file save fails, we've updated the in-memory settings
-            logger.warning("Settings updated in memory but file save failed")
+            logger.warning("Settings save failed")
             return {
-                "message": "Settings updated (file save failed)",
+                "message": "Settings save failed",
                 "settings": current_settings,
-                "status": "partial"
+                "status": "error"
             }
-        
+
     except Exception as e:
         logger.exception("Error saving diarization settings")
         raise e
 
 
+async def get_misc_settings():
+    """Get current miscellaneous settings."""
+    try:
+        # Get settings using OmegaConf
+        settings = load_misc_settings()
+        return {
+            "settings": settings,
+            "status": "success"
+        }
+    except Exception as e:
+        logger.exception("Error getting misc settings")
+        raise e
+
+
+async def save_misc_settings_controller(settings: dict):
+    """Save miscellaneous settings."""
+    try:
+        # Validate settings
+        valid_keys = {"always_persist_enabled", "use_provider_segments"}
+
+        # Filter to only valid keys
+        filtered_settings = {}
+        for key, value in settings.items():
+            if key not in valid_keys:
+                continue  # Skip unknown keys
+
+            # Type validation
+            if not isinstance(value, bool):
+                raise HTTPException(status_code=400, detail=f"Invalid value for {key}: must be boolean")
+
+            filtered_settings[key] = value
+
+        # Reject if NO valid keys provided
+        if not filtered_settings:
+            raise HTTPException(status_code=400, detail="No valid misc settings provided")
+
+        # Save using OmegaConf
+        if save_misc_settings(filtered_settings):
+            # Get updated settings
+            updated_settings = load_misc_settings()
+            logger.info(f"Updated and saved misc settings: {filtered_settings}")
+
+            return {
+                "message": "Miscellaneous settings saved successfully",
+                "settings": updated_settings,
+                "status": "success"
+            }
+        else:
+            logger.warning("Settings save failed")
+            return {
+                "message": "Settings save failed",
+                "settings": load_misc_settings(),
+                "status": "error"
+            }
+
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.exception("Error saving misc settings")
+        raise e
+
+
+async def get_cleanup_settings_controller(user: User) -> dict:
+    """
+    Get current cleanup settings (admin only).
+
+    Args:
+        user: Authenticated admin user
+
+    Returns:
+        Dict with cleanup settings
+    """
+    from advanced_omi_backend.config import get_cleanup_settings
+
+    return get_cleanup_settings()
+
+
+async def save_cleanup_settings_controller(
+    auto_cleanup_enabled: bool,
+    retention_days: int,
+    user: User
+) -> dict:
+    """
+    Save cleanup settings (admin only).
+
+    Args:
+        auto_cleanup_enabled: Enable/disable automatic cleanup
+        retention_days: Number of days to retain soft-deleted conversations
+        user: Authenticated admin user
+
+    Returns:
+        Updated cleanup settings
+
+    Raises:
+        ValueError: If validation fails
+    """
+    from advanced_omi_backend.config import CleanupSettings, save_cleanup_settings
+
+    # Validation
+    if not isinstance(auto_cleanup_enabled, bool):
+        raise ValueError("auto_cleanup_enabled must be a boolean")
+
+    if not isinstance(retention_days, int):
+        raise ValueError("retention_days must be an integer")
+
+    if retention_days < 1 or retention_days > 365:
+        raise ValueError("retention_days must be between 1 and 365")
+
+    # Create settings object
+    settings = CleanupSettings(
+        auto_cleanup_enabled=auto_cleanup_enabled,
+        retention_days=retention_days
+    )
+
+    # Save using OmegaConf
+    save_cleanup_settings(settings)
+
+    logger.info(f"Admin {user.email} updated cleanup settings: auto_cleanup={auto_cleanup_enabled}, retention={retention_days}d")
+
+    return {
+        "auto_cleanup_enabled": settings.auto_cleanup_enabled,
+        "retention_days": settings.retention_days,
+        "message": "Cleanup settings saved successfully"
+    }
+
+
 async def get_speaker_configuration(user: User):
     """Get current user's primary speakers configuration."""
     try:
@@ -555,3 +890,417 @@ async def validate_chat_config_yaml(prompt_text: str) -> dict:
     except Exception as e:
         logger.error(f"Error validating chat config: {e}")
         return {"valid": False, "error": f"Validation error: {str(e)}"}
+
+
+# Plugin Configuration Management Functions
+
+async def get_plugins_config_yaml() -> str:
+    """Get plugins configuration as YAML text."""
+    try:
+        plugins_yml_path = get_plugins_yml_path()
+
+        # Default empty plugins config
+        default_config = """plugins:
+  # No plugins configured yet
+  # Example plugin configuration:
+  # homeassistant:
+  #   enabled: true
+  #   access_level: transcript
+  #   trigger:
+  #     type: wake_word
+  #     wake_word: vivi
+  #   ha_url: http://localhost:8123
+  #   ha_token: YOUR_TOKEN_HERE
+"""
+
+        if not plugins_yml_path.exists():
+            return default_config
+
+        with open(plugins_yml_path, 'r') as f:
+            yaml_content = f.read()
+
+        return yaml_content
+
+    except Exception as e:
+        logger.error(f"Error loading plugins config: {e}")
+        raise
+
+
+async def save_plugins_config_yaml(yaml_content: str) -> dict:
+    """Save plugins configuration from YAML text."""
+    try:
+        plugins_yml_path = get_plugins_yml_path()
+
+        # Validate YAML can be parsed
+        try:
+            parsed_config = yaml.safe_load(yaml_content)
+            if not isinstance(parsed_config, dict):
+                raise ValueError("Configuration must be a YAML dictionary")
+
+            # Validate has 'plugins' key
+            if 'plugins' not in parsed_config:
+                raise ValueError("Configuration must contain 'plugins' key")
+
+        except yaml.YAMLError as e:
+            raise ValueError(f"Invalid YAML syntax: {e}")
+
+        # Create config directory if it doesn't exist
+        plugins_yml_path.parent.mkdir(parents=True, exist_ok=True)
+
+        # Backup existing config
+        if plugins_yml_path.exists():
+            backup_path = str(plugins_yml_path) + '.backup'
+            shutil.copy2(plugins_yml_path, backup_path)
+            logger.info(f"Created plugins config backup at {backup_path}")
+
+        # Save new config
+        with open(plugins_yml_path, 'w') as f:
+            f.write(yaml_content)
+
+        # Hot-reload plugins (optional - may require restart)
+        try:
+            from advanced_omi_backend.services.plugin_service import get_plugin_router
+            plugin_router = get_plugin_router()
+            if plugin_router:
+                logger.info("Plugin configuration updated - restart backend for changes to take effect")
+        except Exception as reload_err:
+            logger.warning(f"Could not reload plugins: {reload_err}")
+
+        logger.info("Plugins configuration updated successfully")
+
+        return {
+            "success": True,
+            "message": "Plugins configuration updated successfully. Restart backend for changes to take effect."
+        }
+
+    except Exception as e:
+        logger.error(f"Error saving plugins config: {e}")
+        raise
+
+
+async def validate_plugins_config_yaml(yaml_content: str) -> dict:
+    """Validate plugins configuration YAML."""
+    try:
+        # Parse YAML
+        try:
+            parsed_config = yaml.safe_load(yaml_content)
+        except yaml.YAMLError as e:
+            return {"valid": False, "error": f"Invalid YAML syntax: {e}"}
+
+        # Check structure
+        if not isinstance(parsed_config, dict):
+            return {"valid": False, "error": "Configuration must be a YAML dictionary"}
+
+        if 'plugins' not in parsed_config:
+            return {"valid": False, "error": "Configuration must contain 'plugins' key"}
+
+        plugins = parsed_config['plugins']
+        if not isinstance(plugins, dict):
+            return {"valid": False, "error": "'plugins' must be a dictionary"}
+
+        # Validate each plugin
+        valid_access_levels = ['transcript', 'conversation', 'memory']
+        valid_trigger_types = ['wake_word', 'always', 'conditional']
+
+        for plugin_id, plugin_config in plugins.items():
+            if not isinstance(plugin_config, dict):
+                return {"valid": False, "error": f"Plugin '{plugin_id}' config must be a dictionary"}
+
+            # Check required fields
+            if 'enabled' in plugin_config and not isinstance(plugin_config['enabled'], bool):
+                return {"valid": False, "error": f"Plugin '{plugin_id}': 'enabled' must be boolean"}
+
+            if 'access_level' in plugin_config and plugin_config['access_level'] not in valid_access_levels:
+                return {"valid": False, "error": f"Plugin '{plugin_id}': invalid access_level (must be one of {valid_access_levels})"}
+
+            if 'trigger' in plugin_config:
+                trigger = plugin_config['trigger']
+                if not isinstance(trigger, dict):
+                    return {"valid": False, "error": f"Plugin '{plugin_id}': 'trigger' must be a dictionary"}
+
+                if 'type' in trigger and trigger['type'] not in valid_trigger_types:
+                    return {"valid": False, "error": f"Plugin '{plugin_id}': invalid trigger type (must be one of {valid_trigger_types})"}
+
+        return {"valid": True, "message": "Configuration is valid"}
+
+    except Exception as e:
+        logger.error(f"Error validating plugins config: {e}")
+        return {"valid": False, "error": f"Validation error: {str(e)}"}
+
+
+# Structured Plugin Configuration Management Functions (Form-based UI)
+
+async def get_plugins_metadata() -> dict:
+    """Get plugin metadata for form-based configuration UI.
+
+    Returns complete metadata for all discovered plugins including:
+    - Plugin information (name, description, enabled status)
+    - Auto-generated schemas from config.yml (or explicit schema.yml)
+    - Current configuration with masked secrets
+    - Orchestration settings (events, conditions)
+
+    Returns:
+        Dict with plugins list containing metadata for each plugin
+    """
+    try:
+        from advanced_omi_backend.services.plugin_service import (
+            discover_plugins,
+            get_plugin_metadata,
+        )
+
+        # Discover all available plugins
+        discovered_plugins = discover_plugins()
+
+        # Load orchestration config from plugins.yml
+        plugins_yml_path = get_plugins_yml_path()
+        orchestration_configs = {}
+
+        if plugins_yml_path.exists():
+            with open(plugins_yml_path, 'r') as f:
+                plugins_data = yaml.safe_load(f) or {}
+                orchestration_configs = plugins_data.get('plugins', {})
+
+        # Build metadata for each plugin
+        plugins_metadata = []
+        for plugin_id, plugin_class in discovered_plugins.items():
+            # Get orchestration config (or empty dict if not configured)
+            orchestration_config = orchestration_configs.get(plugin_id, {
+                'enabled': False,
+                'events': [],
+                'condition': {'type': 'always'}
+            })
+
+            # Get complete metadata including schema
+            metadata = get_plugin_metadata(plugin_id, plugin_class, orchestration_config)
+            plugins_metadata.append(metadata)
+
+        logger.info(f"Retrieved metadata for {len(plugins_metadata)} plugins")
+
+        return {
+            "plugins": plugins_metadata,
+            "status": "success"
+        }
+
+    except Exception as e:
+        logger.exception("Error getting plugins metadata")
+        raise e
+
+
+async def update_plugin_config_structured(plugin_id: str, config: dict) -> dict:
+    """Update plugin configuration from structured JSON (form data).
+
+    Updates the three-file plugin architecture:
+    1. config/plugins.yml - Orchestration (enabled, events, condition)
+    2. plugins/{plugin_id}/config.yml - Settings with ${ENV_VAR} references
+    3. backends/advanced/.env - Actual secret values
+
+    Args:
+        plugin_id: Plugin identifier
+        config: Structured configuration with 'orchestration', 'settings', 'env_vars' sections
+
+    Returns:
+        Success message with list of updated files
+    """
+    try:
+        from advanced_omi_backend.services.plugin_service import discover_plugins
+        import advanced_omi_backend.plugins
+
+        # Validate plugin exists
+        discovered_plugins = discover_plugins()
+        if plugin_id not in discovered_plugins:
+            raise ValueError(f"Plugin '{plugin_id}' not found")
+
+        updated_files = []
+
+        # 1. Update config/plugins.yml (orchestration)
+        if 'orchestration' in config:
+            plugins_yml_path = get_plugins_yml_path()
+
+            # Load current plugins.yml
+            if plugins_yml_path.exists():
+                with open(plugins_yml_path, 'r') as f:
+                    plugins_data = yaml.safe_load(f) or {}
+            else:
+                plugins_data = {}
+
+            if 'plugins' not in plugins_data:
+                plugins_data['plugins'] = {}
+
+            # Update orchestration config
+            orchestration = config['orchestration']
+            plugins_data['plugins'][plugin_id] = {
+                'enabled': orchestration.get('enabled', False),
+                'events': orchestration.get('events', []),
+                'condition': orchestration.get('condition', {'type': 'always'})
+            }
+
+            # Create backup
+            if plugins_yml_path.exists():
+                backup_path = str(plugins_yml_path) + '.backup'
+                shutil.copy2(plugins_yml_path, backup_path)
+
+            # Create config directory if needed
+            plugins_yml_path.parent.mkdir(parents=True, exist_ok=True)
+
+            # Write updated plugins.yml
+            with open(plugins_yml_path, 'w') as f:
+                yaml.dump(plugins_data, f, default_flow_style=False, sort_keys=False)
+
+            updated_files.append(str(plugins_yml_path))
+            logger.info(f"Updated orchestration config for '{plugin_id}' in {plugins_yml_path}")
+
+        # 2. Update plugins/{plugin_id}/config.yml (settings with env var references)
+        if 'settings' in config:
+            plugins_dir = Path(advanced_omi_backend.plugins.__file__).parent
+            plugin_config_path = plugins_dir / plugin_id / "config.yml"
+
+            # Load current config.yml
+            if plugin_config_path.exists():
+                with open(plugin_config_path, 'r') as f:
+                    plugin_config_data = yaml.safe_load(f) or {}
+            else:
+                plugin_config_data = {}
+
+            # Update settings (preserve ${ENV_VAR} references)
+            settings = config['settings']
+            plugin_config_data.update(settings)
+
+            # Create backup
+            if plugin_config_path.exists():
+                backup_path = str(plugin_config_path) + '.backup'
+                shutil.copy2(plugin_config_path, backup_path)
+
+            # Write updated config.yml
+            with open(plugin_config_path, 'w') as f:
+                yaml.dump(plugin_config_data, f, default_flow_style=False, sort_keys=False)
+
+            updated_files.append(str(plugin_config_path))
+            logger.info(f"Updated settings for '{plugin_id}' in {plugin_config_path}")
+
+        # 3. Update .env (only changed env vars)
+        if 'env_vars' in config and config['env_vars']:
+            env_path = os.path.join(os.getcwd(), ".env")
+
+            if not os.path.exists(env_path):
+                raise FileNotFoundError(f".env file not found at {env_path}")
+
+            # Read current .env
+            with open(env_path, 'r') as f:
+                env_lines = f.readlines()
+
+            # Create backup
+            backup_path = f"{env_path}.backup"
+            shutil.copy2(env_path, backup_path)
+
+            # Update env vars (only if not masked)
+            env_vars = config['env_vars']
+            updated_env_lines = []
+            updated_vars = set()
+
+            for line in env_lines:
+                line_updated = False
+                for env_var, value in env_vars.items():
+                    # Skip if value is masked (not actually changed)
+                    if value == '••••••••••••':
+                        continue
+
+                    if line.strip().startswith(f"{env_var}="):
+                        updated_env_lines.append(f"{env_var}={value}\n")
+                        updated_vars.add(env_var)
+                        line_updated = True
+                        break
+
+                if not line_updated:
+                    updated_env_lines.append(line)
+
+            # Add new env vars that weren't found in file
+            for env_var, value in env_vars.items():
+                if value != '••••••••••••' and env_var not in updated_vars:
+                    updated_env_lines.append(f"{env_var}={value}\n")
+                    updated_vars.add(env_var)
+
+            # Write updated .env
+            if updated_vars:
+                with open(env_path, 'w') as f:
+                    f.writelines(updated_env_lines)
+
+                updated_files.append(env_path)
+                logger.info(f"Updated {len(updated_vars)} environment variables in {env_path}")
+
+        return {
+            "success": True,
+            "message": f"Plugin '{plugin_id}' configuration updated successfully. Restart backend for changes to take effect.",
+            "updated_files": updated_files,
+            "requires_restart": True,
+            "status": "success"
+        }
+
+    except Exception as e:
+        logger.exception(f"Error updating structured config for plugin '{plugin_id}'")
+        raise e
+
+
+async def test_plugin_connection(plugin_id: str, config: dict) -> dict:
+    """Test plugin connection/configuration without saving.
+
+    Calls the plugin's test_connection method if available to validate
+    configuration (e.g., SMTP connection, Home Assistant API).
+
+    Args:
+        plugin_id: Plugin identifier
+        config: Configuration to test (same structure as update_plugin_config_structured)
+
+    Returns:
+        Test result with success status and details
+    """
+    try:
+        from advanced_omi_backend.services.plugin_service import discover_plugins, expand_env_vars
+
+        # Validate plugin exists
+        discovered_plugins = discover_plugins()
+        if plugin_id not in discovered_plugins:
+            raise ValueError(f"Plugin '{plugin_id}' not found")
+
+        plugin_class = discovered_plugins[plugin_id]
+
+        # Check if plugin supports testing
+        if not hasattr(plugin_class, 'test_connection'):
+            return {
+                "success": False,
+                "message": f"Plugin '{plugin_id}' does not support connection testing",
+                "status": "unsupported"
+            }
+
+        # Build complete config from provided data
+        test_config = {}
+
+        # Merge settings
+        if 'settings' in config:
+            test_config.update(config['settings'])
+
+        # Add env vars (expand any ${ENV_VAR} references with test values)
+        if 'env_vars' in config:
+            for key, value in config['env_vars'].items():
+                # Skip masked values
+                if value == '••••••••••••':
+                    # Use actual env var value
+                    value = os.getenv(key, '')
+                test_config[key.lower()] = value
+
+        # Expand any remaining env var references
+        test_config = expand_env_vars(test_config)
+
+        # Call plugin's test_connection static method
+        result = await plugin_class.test_connection(test_config)
+
+        logger.info(f"Test connection for '{plugin_id}': {result.get('message', 'No message')}")
+
+        return result
+
+    except Exception as e:
+        logger.exception(f"Error testing connection for plugin '{plugin_id}'")
+        return {
+            "success": False,
+            "message": f"Connection test failed: {str(e)}",
+            "status": "error"
+        }
diff --git a/backends/advanced/src/advanced_omi_backend/controllers/websocket_controller.py b/backends/advanced/src/advanced_omi_backend/controllers/websocket_controller.py
index 50ffc77f..89e5b46f 100644
--- a/backends/advanced/src/advanced_omi_backend/controllers/websocket_controller.py
+++ b/backends/advanced/src/advanced_omi_backend/controllers/websocket_controller.py
@@ -16,11 +16,14 @@
 from typing import Optional
 
 from fastapi import WebSocket, WebSocketDisconnect, Query
+from starlette.websockets import WebSocketState
 from friend_lite.decoder import OmiOpusDecoder
+import redis.asyncio as redis
 
 from advanced_omi_backend.auth import websocket_auth
 from advanced_omi_backend.client_manager import generate_client_id, get_client_manager
 from advanced_omi_backend.constants import OMI_CHANNELS, OMI_SAMPLE_RATE, OMI_SAMPLE_WIDTH
+from advanced_omi_backend.controllers.session_controller import mark_session_complete
 from advanced_omi_backend.utils.audio_utils import process_audio_chunk
 from advanced_omi_backend.services.audio_stream import AudioStreamProducer
 from advanced_omi_backend.services.audio_stream.producer import get_audio_stream_producer
@@ -39,6 +42,89 @@
 pending_connections: set[str] = set()
 
 
+async def subscribe_to_interim_results(websocket: WebSocket, session_id: str) -> None:
+    """
+    Subscribe to interim transcription results from Redis Pub/Sub and forward to client WebSocket.
+
+    Runs as background task during WebSocket connection. Listens for interim and final
+    transcription results published by the Deepgram streaming consumer and forwards them
+    to the connected client for real-time transcript display.
+
+    Args:
+        websocket: Connected WebSocket client
+        session_id: Session ID (client_id) to subscribe to
+
+    Note:
+        This task runs continuously until the WebSocket disconnects or the task is cancelled.
+        Results are published to Redis Pub/Sub channel: transcription:interim:{session_id}
+    """
+    redis_url = os.getenv("REDIS_URL", "redis://localhost:6379/0")
+
+    try:
+        # Create Redis client for Pub/Sub
+        redis_client = await redis.from_url(redis_url, decode_responses=True)
+
+        # Create Pub/Sub instance
+        pubsub = redis_client.pubsub()
+
+        # Subscribe to interim results channel for this session
+        channel = f"transcription:interim:{session_id}"
+        await pubsub.subscribe(channel)
+
+        logger.info(f"📢 Subscribed to interim results channel: {channel}")
+
+        # Listen for messages
+        while True:
+            try:
+                message = await pubsub.get_message(ignore_subscribe_messages=True, timeout=1.0)
+
+                if message and message['type'] == 'message':
+                    # Parse result data
+                    try:
+                        result_data = json.loads(message['data'])
+
+                        # Forward to client WebSocket
+                        await websocket.send_json({
+                            "type": "interim_transcript",
+                            "data": result_data
+                        })
+
+                        # Log for debugging
+                        is_final = result_data.get("is_final", False)
+                        text_preview = result_data.get("text", "")[:50]
+                        result_type = "FINAL" if is_final else "interim"
+                        logger.debug(f"✉️ Forwarded {result_type} result to client {session_id}: {text_preview}...")
+
+                    except json.JSONDecodeError as e:
+                        logger.error(f"Failed to parse interim result JSON: {e}")
+                    except Exception as send_error:
+                        logger.error(f"Failed to send interim result to client {session_id}: {send_error}")
+                        # WebSocket might be closed, exit loop
+                        break
+
+            except asyncio.TimeoutError:
+                # No message received, continue waiting
+                continue
+            except asyncio.CancelledError:
+                logger.info(f"Interim results subscriber cancelled for session {session_id}")
+                break
+            except Exception as e:
+                logger.error(f"Error in interim results subscriber for {session_id}: {e}", exc_info=True)
+                break
+
+    except Exception as e:
+        logger.error(f"Failed to initialize interim results subscriber for {session_id}: {e}", exc_info=True)
+    finally:
+        try:
+            # Unsubscribe and close connections
+            await pubsub.unsubscribe(channel)
+            await pubsub.close()
+            await redis_client.aclose()
+            logger.info(f"🔕 Unsubscribed from interim results channel: {channel}")
+        except Exception as cleanup_error:
+            logger.error(f"Error cleaning up interim results subscriber: {cleanup_error}")
+
+
 async def parse_wyoming_protocol(ws: WebSocket) -> tuple[dict, Optional[bytes]]:
     """Parse Wyoming protocol: JSON header line followed by optional binary payload.
 
@@ -105,9 +191,9 @@ async def create_client_state(client_id: str, user, device_name: Optional[str] =
         client_id, CHUNK_DIR, user.user_id, user.email
     )
 
-    # Also track in persistent mapping (for database queries)
-    from advanced_omi_backend.client_manager import track_client_user_relationship
-    track_client_user_relationship(client_id, user.user_id)
+    # Also track in persistent mapping (for database queries + cross-container Redis)
+    from advanced_omi_backend.client_manager import track_client_user_relationship_async
+    await track_client_user_relationship_async(client_id, user.user_id)
 
     # Register client in user model (persistent)
     from advanced_omi_backend.users import register_client_to_user
@@ -117,35 +203,22 @@ async def create_client_state(client_id: str, user, device_name: Optional[str] =
 
 
 async def cleanup_client_state(client_id: str):
-    """Clean up and remove client state, including cancelling speech detection job and marking session complete."""
-    # Cancel the speech detection job for this client
-    from advanced_omi_backend.controllers.queue_controller import redis_conn
-    from rq.job import Job
+    """
+    Clean up and remove client state, marking session complete.
+
+    Note: We do NOT cancel the speech detection job here because:
+    1. The job needs to process all audio data that was already sent
+    2. If speech was detected, it should create a conversation
+    3. The job will complete naturally when it sees session status = "finalizing"
+    4. The job has a grace period (15s) to wait for final transcription
+    5. RQ's job_timeout (24h) prevents jobs from hanging forever
+    """
+    # Note: Previously we cancelled the speech detection job here, but this prevented
+    # conversations from being created when WebSocket disconnects mid-recording.
+    # The speech detection job now monitors session status and completes naturally.
     import redis.asyncio as redis
 
-    try:
-        job_id_key = f"speech_detection_job:{client_id}"
-        job_id_bytes = redis_conn.get(job_id_key)
-
-        if job_id_bytes:
-            job_id = job_id_bytes.decode()
-            logger.info(f"🛑 Cancelling speech detection job {job_id} for client {client_id}")
-
-            try:
-                # Fetch and cancel the job
-                job = Job.fetch(job_id, connection=redis_conn)
-                job.cancel()
-                logger.info(f"✅ Successfully cancelled speech detection job {job_id}")
-            except Exception as job_error:
-                logger.warning(f"⚠️ Failed to cancel job {job_id}: {job_error}")
-
-            # Clean up the tracking key
-            redis_conn.delete(job_id_key)
-            logger.info(f"🧹 Cleaned up job tracking key for client {client_id}")
-        else:
-            logger.debug(f"No speech detection job found for client {client_id}")
-    except Exception as e:
-        logger.warning(f"⚠️ Error during job cancellation for client {client_id}: {e}")
+    logger.info(f"🔄 Letting speech detection job complete naturally for client {client_id} (if running)")
 
     # Mark all active sessions for this client as complete AND delete Redis streams
     try:
@@ -153,6 +226,10 @@ async def cleanup_client_state(client_id: str):
         redis_url = os.getenv("REDIS_URL", "redis://localhost:6379/0")
         async_redis = redis.from_url(redis_url, decode_responses=False)
 
+        # Get audio stream producer for finalization
+        from advanced_omi_backend.services.audio_stream.producer import get_audio_stream_producer
+        audio_stream_producer = get_audio_stream_producer()
+
         # Find all session keys for this client and mark them complete
         pattern = f"audio:session:*"
         cursor = 0
@@ -165,14 +242,19 @@ async def cleanup_client_state(client_id: str):
                 # Check if this session belongs to this client
                 client_id_bytes = await async_redis.hget(key, "client_id")
                 if client_id_bytes and client_id_bytes.decode() == client_id:
-                    # Mark session as complete (WebSocket disconnected)
-                    await async_redis.hset(key, mapping={
-                        "status": "complete",
-                        "completed_at": str(time.time()),
-                        "completion_reason": "websocket_disconnect"
-                    })
                     session_id = key.decode().replace("audio:session:", "")
-                    logger.info(f"📊 Marked session {session_id[:12]} as complete (WebSocket disconnect)")
+
+                    # Check session status
+                    status_bytes = await async_redis.hget(key, "status")
+                    status = status_bytes.decode() if status_bytes else None
+
+                    # If session is still active, finalize it first (sets status + completion_reason atomically)
+                    if status in ["active", None]:
+                        logger.info(f"📊 Finalizing active session {session_id[:12]} due to WebSocket disconnect")
+                        await audio_stream_producer.finalize_session(session_id, completion_reason="websocket_disconnect")
+
+                    # Mark session as complete (WebSocket disconnected)
+                    await mark_session_complete(async_redis, session_id, "websocket_disconnect")
                     sessions_closed += 1
 
             if cursor == 0:
@@ -181,12 +263,32 @@ async def cleanup_client_state(client_id: str):
         if sessions_closed > 0:
             logger.info(f"✅ Closed {sessions_closed} active session(s) for client {client_id}")
 
-        # Delete Redis Streams for this client
+        # Set TTL on Redis Streams for this client (allows consumer groups to finish processing)
         stream_pattern = f"audio:stream:{client_id}"
         stream_key = await async_redis.exists(stream_pattern)
         if stream_key:
-            await async_redis.delete(stream_pattern)
-            logger.info(f"🧹 Deleted Redis stream: {stream_pattern}")
+            # Check how many messages are in the stream
+            stream_length = await async_redis.xlen(stream_pattern)
+
+            # Check for pending messages in consumer groups
+            pending_count = 0
+            try:
+                # Check streaming-transcription consumer group for pending messages
+                pending_info = await async_redis.xpending(stream_pattern, "streaming-transcription")
+                if pending_info:
+                    pending_count = pending_info.get('pending', 0)
+            except Exception as e:
+                # Consumer group might not exist yet - that's ok
+                logger.debug(f"No consumer group for {stream_pattern}: {e}")
+
+            if stream_length > 0 or pending_count > 0:
+                logger.warning(
+                    f"⚠️ Closing {stream_pattern} with unprocessed data: "
+                    f"{stream_length} messages in stream, {pending_count} pending in consumer group"
+                )
+
+            await async_redis.expire(stream_pattern, 60)  # 60 second TTL for consumer group fan-out
+            logger.info(f"⏰ Set 60s TTL on Redis stream: {stream_pattern}")
         else:
             logger.debug(f"No Redis stream found for client {client_id}")
 
@@ -279,8 +381,9 @@ async def _initialize_streaming_session(
     user_id: str,
     user_email: str,
     client_id: str,
-    audio_format: dict
-) -> None:
+    audio_format: dict,
+    websocket: Optional[WebSocket] = None
+) -> Optional[asyncio.Task]:
     """
     Initialize streaming session with Redis and enqueue processing jobs.
 
@@ -291,15 +394,22 @@ async def _initialize_streaming_session(
         user_email: User email
         client_id: Client ID
         audio_format: Audio format dict from audio-start event
+        websocket: Optional WebSocket connection to launch interim results subscriber
+
+    Returns:
+        Interim results subscriber task if websocket provided and session initialized, None otherwise
     """
+    application_logger.info(
+        f"🔴 BACKEND: _initialize_streaming_session called for {client_id}"
+    )
+
     if hasattr(client_state, 'stream_session_id'):
         application_logger.debug(f"Session already initialized for {client_id}")
-        return
+        return None
 
-    # Initialize stream session
-    client_state.stream_session_id = str(uuid.uuid4())
-    client_state.stream_chunk_count = 0
-    client_state.stream_audio_format = audio_format
+    # Initialize stream session - use client_id as session_id for predictable lookup
+    # All other session metadata goes to Redis (single source of truth)
+    client_state.stream_session_id = client_state.client_id
     application_logger.info(f"🆔 Created stream session: {client_state.stream_session_id}")
 
     # Determine transcription provider from config.yml
@@ -313,21 +423,31 @@ async def _initialize_streaming_session(
     if not stt_model:
         raise ValueError("No default STT model configured in config.yml (defaults.stt)")
 
-    provider = stt_model.model_provider.lower()
-    if provider not in ["deepgram", "parakeet"]:
-        raise ValueError(f"Unsupported STT provider: {provider}. Expected: deepgram or parakeet")
+    # Use model_provider for session tracking (generic, not validated against hardcoded list)
+    provider = stt_model.model_provider.lower() if stt_model.model_provider else stt_model.name
 
     application_logger.info(f"📋 Using STT provider: {provider} (model: {stt_model.name})")
-    
-    # Initialize session tracking in Redis
+
+    # Initialize session tracking in Redis (SINGLE SOURCE OF TRUTH for session metadata)
+    # This includes user_email, connection info, audio format, chunk counters, job IDs, etc.
+    connection_id = f"ws_{client_id}_{int(time.time())}"
     await audio_stream_producer.init_session(
         session_id=client_state.stream_session_id,
         user_id=user_id,
         client_id=client_id,
+        user_email=user_email,
+        connection_id=connection_id,
         mode="streaming",
         provider=provider
     )
 
+    # Store audio format in Redis session (not in ClientState)
+    from advanced_omi_backend.services.audio_stream.producer import get_audio_stream_producer
+    import json
+    session_key = f"audio:session:{client_state.stream_session_id}"
+    redis_client = audio_stream_producer.redis_client
+    await redis_client.hset(session_key, "audio_format", json.dumps(audio_format))
+
     # Enqueue streaming jobs (speech detection + audio persistence)
     from advanced_omi_backend.controllers.queue_controller import start_streaming_jobs
 
@@ -337,8 +457,25 @@ async def _initialize_streaming_session(
         client_id=client_id
     )
 
-    client_state.speech_detection_job_id = job_ids['speech_detection']
-    client_state.audio_persistence_job_id = job_ids['audio_persistence']
+    # Store job IDs in Redis session (not in ClientState)
+    await audio_stream_producer.update_session_job_ids(
+        session_id=client_state.stream_session_id,
+        speech_detection_job_id=job_ids['speech_detection'],
+        audio_persistence_job_id=job_ids['audio_persistence']
+    )
+
+    # Note: Placeholder conversation creation is handled by the audio persistence job,
+    # which reads the always_persist_enabled setting from global config.
+
+    # Launch interim results subscriber if WebSocket provided
+    subscriber_task = None
+    if websocket:
+        subscriber_task = asyncio.create_task(
+            subscribe_to_interim_results(websocket, client_state.stream_session_id)
+        )
+        application_logger.info(f"📡 Launched interim results subscriber for session {client_state.stream_session_id}")
+
+    return subscriber_task
 
 
 async def _finalize_streaming_session(
@@ -377,8 +514,8 @@ async def _finalize_streaming_session(
         # Send end-of-session signal to workers
         await audio_stream_producer.send_session_end_signal(session_id)
 
-        # Mark session as finalizing
-        await audio_stream_producer.finalize_session(session_id)
+        # Mark session as finalizing with user_stopped reason (audio-stop event)
+        await audio_stream_producer.finalize_session(session_id, completion_reason="user_stopped")
 
         # NOTE: Finalize job disabled - open_conversation_job now handles everything
         # The open_conversation_job will:
@@ -399,11 +536,10 @@ async def _finalize_streaming_session(
             f"✅ Session {session_id[:12]} marked as finalizing - open_conversation_job will handle cleanup"
         )
 
-        # Clear session state
-        for attr in ['stream_session_id', 'stream_chunk_count', 'stream_audio_format',
-                     'speech_detection_job_id', 'audio_persistence_job_id']:
-            if hasattr(client_state, attr):
-                delattr(client_state, attr)
+        # Clear session state from ClientState (only stream_session_id is stored there now)
+        # All other session metadata lives in Redis (single source of truth)
+        if hasattr(client_state, 'stream_session_id'):
+            delattr(client_state, 'stream_session_id')
 
     except Exception as finalize_error:
         application_logger.error(
@@ -439,14 +575,18 @@ async def _publish_audio_to_stream(
         application_logger.warning(f"⚠️ Received audio chunk before session initialized for {client_id}")
         return
 
-    # Increment chunk count and format chunk ID
-    client_state.stream_chunk_count += 1
-    chunk_id = f"{client_state.stream_chunk_count:05d}"
+    session_id = client_state.stream_session_id
+
+    # Increment chunk count in Redis (single source of truth) and format chunk ID
+    session_key = f"audio:session:{session_id}"
+    redis_client = audio_stream_producer.redis_client
+    chunk_count = await redis_client.hincrby(session_key, "chunks_published", 1)
+    chunk_id = f"{chunk_count:05d}"
 
     # Publish to Redis Stream using producer
     await audio_stream_producer.add_audio_chunk(
         audio_data=audio_data,
-        session_id=client_state.stream_session_id,
+        session_id=session_id,
         chunk_id=chunk_id,
         user_id=user_id,
         client_id=client_id,
@@ -516,8 +656,9 @@ async def _handle_streaming_mode_audio(
     audio_format: dict,
     user_id: str,
     user_email: str,
-    client_id: str
-) -> None:
+    client_id: str,
+    websocket: Optional[WebSocket] = None
+) -> Optional[asyncio.Task]:
     """
     Handle audio chunk in streaming mode.
 
@@ -529,16 +670,22 @@ async def _handle_streaming_mode_audio(
         user_id: User ID
         user_email: User email
         client_id: Client ID
+        websocket: Optional WebSocket connection to launch interim results subscriber
+
+    Returns:
+        Interim results subscriber task if websocket provided and session initialized, None otherwise
     """
     # Initialize session if needed
+    subscriber_task = None
     if not hasattr(client_state, 'stream_session_id'):
-        await _initialize_streaming_session(
+        subscriber_task = await _initialize_streaming_session(
             client_state,
             audio_stream_producer,
             user_id,
             user_email,
             client_id,
-            audio_format
+            audio_format,
+            websocket=websocket  # Pass WebSocket to launch interim results subscriber
         )
 
     # Publish to Redis Stream
@@ -553,6 +700,8 @@ async def _handle_streaming_mode_audio(
         audio_format.get("width", 2)
     )
 
+    return subscriber_task
+
 
 async def _handle_batch_mode_audio(
     client_state,
@@ -561,7 +710,7 @@ async def _handle_batch_mode_audio(
     client_id: str
 ) -> None:
     """
-    Handle audio chunk in batch mode - accumulate in memory.
+    Handle audio chunk in batch mode with rolling 30-minute limit.
 
     Args:
         client_state: Client state object
@@ -573,14 +722,53 @@ async def _handle_batch_mode_audio(
     if not hasattr(client_state, 'batch_audio_chunks'):
         client_state.batch_audio_chunks = []
         client_state.batch_audio_format = audio_format
+        client_state.batch_audio_bytes = 0  # Track total bytes
+        client_state.batch_chunks_processed = 0  # Track how many batches processed
         application_logger.info(f"📦 Started batch audio accumulation for {client_id}")
 
     # Accumulate audio
     client_state.batch_audio_chunks.append(audio_data)
+    client_state.batch_audio_bytes += len(audio_data)
     application_logger.debug(
         f"📦 Accumulated chunk #{len(client_state.batch_audio_chunks)} ({len(audio_data)} bytes) for {client_id}"
     )
 
+    # Calculate duration: sample_rate * width * channels = bytes/second
+    sample_rate = audio_format.get("rate", 16000)
+    width = audio_format.get("width", 2)
+    channels = audio_format.get("channels", 1)
+    bytes_per_second = sample_rate * width * channels
+
+    accumulated_seconds = client_state.batch_audio_bytes / bytes_per_second
+    MAX_BATCH_SECONDS = 30 * 60  # 30 minutes
+
+    # Check if we've hit the 30-minute limit
+    if accumulated_seconds >= MAX_BATCH_SECONDS:
+        application_logger.warning(
+            f"⚠️ Batch accumulation reached 30-minute limit "
+            f"({accumulated_seconds:.1f}s, {client_state.batch_audio_bytes / 1024 / 1024:.1f} MB). "
+            f"Processing batch #{client_state.batch_chunks_processed + 1}..."
+        )
+
+        # Process this batch (will create conversation and transcribe)
+        await _process_rolling_batch(
+            client_state,
+            user_id=client_state.user_id,  # Need to store these on session start
+            user_email=client_state.user_email,
+            client_id=client_state.client_id,
+            batch_number=client_state.batch_chunks_processed + 1
+        )
+
+        # Clear buffer for next batch
+        client_state.batch_audio_chunks = []
+        client_state.batch_audio_bytes = 0
+        client_state.batch_chunks_processed += 1
+
+        application_logger.info(
+            f"✅ Rolled batch #{client_state.batch_chunks_processed}. "
+            f"Starting fresh accumulation for next 30 minutes."
+        )
+
 
 async def _handle_audio_chunk(
     client_state,
@@ -589,8 +777,9 @@ async def _handle_audio_chunk(
     audio_format: dict,
     user_id: str,
     user_email: str,
-    client_id: str
-) -> None:
+    client_id: str,
+    websocket: Optional[WebSocket] = None
+) -> Optional[asyncio.Task]:
     """
     Route audio chunk to appropriate mode handler (streaming or batch).
 
@@ -602,39 +791,102 @@ async def _handle_audio_chunk(
         user_id: User ID
         user_email: User email
         client_id: Client ID
+        websocket: Optional WebSocket connection to launch interim results subscriber
+
+    Returns:
+        Interim results subscriber task if websocket provided and streaming mode, None otherwise
     """
     recording_mode = getattr(client_state, 'recording_mode', 'batch')
 
     if recording_mode == "streaming":
-        await _handle_streaming_mode_audio(
+        return await _handle_streaming_mode_audio(
             client_state, audio_stream_producer, audio_data,
-            audio_format, user_id, user_email, client_id
+            audio_format, user_id, user_email, client_id,
+            websocket=websocket
         )
     else:
         await _handle_batch_mode_audio(
             client_state, audio_data, audio_format, client_id
         )
+        return None
 
 
 async def _handle_audio_session_start(
     client_state,
     audio_format: dict,
-    client_id: str
+    client_id: str,
+    websocket: Optional[WebSocket] = None
 ) -> tuple[bool, str]:
     """
-    Handle audio-start event - set mode and switch to audio streaming.
+    Handle audio-start event - validate mode and set recording mode.
 
     Args:
         client_state: Client state object
         audio_format: Audio format dict with mode
         client_id: Client ID
+        websocket: Optional WebSocket connection (for WebUI error messages)
 
     Returns:
         (audio_streaming_flag, recording_mode)
     """
+    from advanced_omi_backend.services.transcription import is_transcription_available
+
     recording_mode = audio_format.get("mode", "batch")
+
+    application_logger.info(
+        f"🔴 BACKEND: Received audio-start for {client_id} - "
+        f"mode={recording_mode}, full format={audio_format}"
+    )
+
+    # Store on client state for later use
     client_state.recording_mode = recording_mode
 
+    # VALIDATION: Check if streaming mode is available
+    if recording_mode == "streaming":
+        if not is_transcription_available("streaming"):
+            error_msg = (
+                "Streaming transcription not available. "
+                "Please use Batch mode or configure a streaming STT provider (defaults.stt_stream in config.yml)."
+            )
+
+            application_logger.warning(
+                f"⚠️ Streaming mode requested but stt_stream not configured for {client_id}"
+            )
+
+            # Send error to WebSocket client (for WebUI display)
+            if websocket and websocket.client_state == WebSocketState.CONNECTED:
+                try:
+                    error_response = {
+                        "type": "error",
+                        "error": "streaming_not_configured",
+                        "message": error_msg,
+                        "code": 400
+                    }
+                    await websocket.send_json(error_response)
+                    application_logger.info(f"📤 Sent streaming error to WebUI client {client_id}")
+
+                    # Close the websocket connection after sending error
+                    await websocket.close(code=1008, reason="Streaming transcription not configured")
+                    application_logger.info(f"🔌 Closed WebSocket connection for {client_id} due to streaming config error")
+
+                    # Raise ValueError to exit the handler completely
+                    raise ValueError(error_msg)
+                except ValueError:
+                    # Re-raise ValueError to exit handler
+                    raise
+                except Exception as e:
+                    application_logger.error(f"Failed to send error to client: {e}")
+                    # Still raise ValueError to exit handler
+                    raise ValueError(error_msg)
+
+            # For OMI devices (no websocket), fall back to batch mode silently
+            if not websocket:
+                application_logger.warning(
+                    f"🔄 OMI device {client_id} requested streaming but falling back to batch mode"
+                )
+                recording_mode = "batch"
+                client_state.recording_mode = recording_mode
+
     application_logger.info(
         f"🎙️ Audio session started for {client_id} - "
         f"Format: {audio_format.get('rate')}Hz, "
@@ -682,6 +934,99 @@ async def _handle_audio_session_stop(
     return False  # Switch back to control mode
 
 
+async def _process_rolling_batch(
+    client_state,
+    user_id: str,
+    user_email: str,
+    client_id: str,
+    batch_number: int
+) -> None:
+    """
+    Process accumulated batch audio as a rolling segment.
+
+    Creates conversation titled "Recording Part {batch_number}" and enqueues transcription.
+
+    Args:
+        client_state: Client state with batch_audio_chunks
+        user_id: User ID
+        user_email: User email
+        client_id: Client ID
+        batch_number: Sequential batch number (1, 2, 3...)
+    """
+    if not hasattr(client_state, 'batch_audio_chunks') or not client_state.batch_audio_chunks:
+        application_logger.warning(f"⚠️ No audio chunks to process for rolling batch")
+        return
+
+    try:
+        from advanced_omi_backend.models.conversation import create_conversation
+        from advanced_omi_backend.utils.audio_chunk_utils import convert_audio_to_chunks
+
+        # Combine chunks
+        complete_audio = b''.join(client_state.batch_audio_chunks)
+        application_logger.info(
+            f"📦 Rolling batch #{batch_number}: Combined {len(client_state.batch_audio_chunks)} chunks "
+            f"into {len(complete_audio)} bytes"
+        )
+
+        # Get audio format
+        audio_format = getattr(client_state, 'batch_audio_format', {})
+        sample_rate = audio_format.get("rate", 16000)
+        width = audio_format.get("width", 2)
+        channels = audio_format.get("channels", 1)
+
+        # Create conversation with batch number in title
+        conversation = create_conversation(
+            user_id=user_id,
+            client_id=client_id,
+            title=f"Recording Part {batch_number}",
+            summary="Rolling batch processing..."
+        )
+        await conversation.insert()
+        conversation_id = conversation.conversation_id  # Get the auto-generated ID
+
+        # Convert to MongoDB chunks
+        num_chunks = await convert_audio_to_chunks(
+            conversation_id=conversation_id,
+            audio_data=complete_audio,
+            sample_rate=sample_rate,
+            channels=channels,
+            sample_width=width
+        )
+
+        # Enqueue transcription job
+        from advanced_omi_backend.controllers.queue_controller import (
+            transcription_queue,
+            JOB_RESULT_TTL
+        )
+        from advanced_omi_backend.workers.transcription_jobs import transcribe_full_audio_job
+
+        version_id = str(uuid.uuid4())
+        transcribe_job_id = f"transcribe_rolling_{conversation_id[:12]}_{batch_number}"
+
+        transcription_job = transcription_queue.enqueue(
+            transcribe_full_audio_job,
+            conversation_id,
+            version_id,
+            f"rolling_batch_{batch_number}",  # trigger
+            job_timeout=1800,  # 30 minutes
+            result_ttl=JOB_RESULT_TTL,
+            job_id=transcribe_job_id,
+            description=f"Transcribe rolling batch #{batch_number} {conversation_id[:8]}",
+            meta={'conversation_id': conversation_id, 'client_id': client_id, 'batch_number': batch_number}
+        )
+
+        application_logger.info(
+            f"✅ Rolling batch #{batch_number} created conversation {conversation_id}, "
+            f"enqueued transcription job {transcription_job.id}"
+        )
+
+    except Exception as e:
+        application_logger.error(
+            f"❌ Failed to process rolling batch #{batch_number}: {e}",
+            exc_info=True
+        )
+
+
 async def _process_batch_audio_complete(
     client_state,
     user_id: str,
@@ -702,8 +1047,8 @@ async def _process_batch_audio_complete(
         return
 
     try:
-        from advanced_omi_backend.utils.audio_utils import write_audio_file
         from advanced_omi_backend.models.conversation import create_conversation
+        from advanced_omi_backend.utils.audio_chunk_utils import convert_audio_to_chunks
 
         # Combine all chunks
         complete_audio = b''.join(client_state.batch_audio_chunks)
@@ -711,57 +1056,92 @@ async def _process_batch_audio_complete(
             f"📦 Batch mode: Combined {len(client_state.batch_audio_chunks)} chunks into {len(complete_audio)} bytes"
         )
 
-        # Generate audio UUID and timestamp
-        audio_uuid = str(uuid.uuid4())
+        # Timestamp for logging
         timestamp = int(time.time() * 1000)
 
-        # Write audio file and create AudioFile entry
-        relative_audio_path, file_path, duration = await write_audio_file(
-            raw_audio_data=complete_audio,
-            audio_uuid=audio_uuid,
-            source="websocket",
-            client_id=client_id,
-            user_id=user_id,
-            user_email=user_email,
-            timestamp=timestamp,
-            validate=False  # PCM data, not WAV
-        )
+        # Get audio format from batch metadata (set during audio-start)
+        audio_format = getattr(client_state, 'batch_audio_format', {})
+        sample_rate = audio_format.get('rate', OMI_SAMPLE_RATE)
+        sample_width = audio_format.get('width', OMI_SAMPLE_WIDTH)
+        channels = audio_format.get('channels', OMI_CHANNELS)
+
+        # Calculate audio duration
+        duration = len(complete_audio) / (sample_rate * sample_width * channels)
 
         application_logger.info(
-            f"✅ Batch mode: Wrote audio file {relative_audio_path} ({duration:.1f}s)"
+            f"✅ Batch mode: Processing audio ({duration:.1f}s)"
         )
 
         # Create conversation immediately for batch audio (conversation_id auto-generated)
         version_id = str(uuid.uuid4())
 
         conversation = create_conversation(
-            audio_uuid=audio_uuid,
             user_id=user_id,
             client_id=client_id,
             title="Batch Recording",
             summary="Processing batch audio..."
         )
-        conversation.audio_path = relative_audio_path
         await conversation.insert()
         conversation_id = conversation.conversation_id  # Get the auto-generated ID
 
         application_logger.info(f"📝 Batch mode: Created conversation {conversation_id}")
 
-        # Enqueue post-conversation processing job chain
-        from advanced_omi_backend.controllers.queue_controller import start_post_conversation_jobs
+        # Convert audio directly to MongoDB chunks (no disk intermediary)
+        try:
+            num_chunks = await convert_audio_to_chunks(
+                conversation_id=conversation_id,
+                audio_data=complete_audio,
+                sample_rate=sample_rate,
+                channels=channels,
+                sample_width=sample_width,
+            )
+            application_logger.info(
+                f"📦 Batch mode: Converted to {num_chunks} MongoDB chunks "
+                f"(conversation {conversation_id[:12]})"
+            )
+        except Exception as chunk_error:
+            application_logger.error(
+                f"Failed to convert batch audio to chunks: {chunk_error}",
+                exc_info=True
+            )
+            # Continue anyway - transcription job will handle it
+
+        # Enqueue batch transcription job first (file uploads need transcription)
+        from advanced_omi_backend.controllers.queue_controller import (
+            start_post_conversation_jobs,
+            transcription_queue,
+            JOB_RESULT_TTL
+        )
+        from advanced_omi_backend.workers.transcription_jobs import transcribe_full_audio_job
+
+        version_id = str(uuid.uuid4())
+        transcribe_job_id = f"transcribe_{conversation_id[:12]}"
+
+        transcription_job = transcription_queue.enqueue(
+            transcribe_full_audio_job,
+            conversation_id,
+            version_id,
+            "batch",  # trigger
+            job_timeout=1800,  # 30 minutes
+            result_ttl=JOB_RESULT_TTL,
+            job_id=transcribe_job_id,
+            description=f"Transcribe batch audio {conversation_id[:8]}",
+            meta={'conversation_id': conversation_id, 'client_id': client_id}
+        )
 
+        application_logger.info(f"📥 Batch mode: Enqueued transcription job {transcription_job.id}")
+
+        # Enqueue post-conversation processing job chain (depends on transcription)
         job_ids = start_post_conversation_jobs(
             conversation_id=conversation_id,
-            audio_uuid=audio_uuid,
-            audio_file_path=file_path,
             user_id=None,  # Will be read from conversation in DB by jobs
-            post_transcription=True,  # Run batch transcription for uploads
+            depends_on_job=transcription_job,  # Wait for transcription to complete
             client_id=client_id  # Pass client_id for UI tracking
         )
 
         application_logger.info(
             f"✅ Batch mode: Enqueued job chain for {conversation_id} - "
-            f"transcription ({job_ids['transcription']}) → "
+            f"transcription ({transcription_job.id}) → "
             f"speaker ({job_ids['speaker_recognition']}) → "
             f"memory ({job_ids['memory']})"
         )
@@ -788,6 +1168,7 @@ async def handle_omi_websocket(
 
     client_id = None
     client_state = None
+    interim_subscriber_task = None
 
     try:
         # Setup connection (accept, auth, create client state)
@@ -813,14 +1194,22 @@ async def handle_omi_websocket(
 
             if header["type"] == "audio-start":
                 # Handle audio session start
+                application_logger.info(f"🔴 BACKEND: Received audio-start in OMI MODE for {client_id} (header={header})")
                 application_logger.info(f"🎙️ OMI audio session started for {client_id}")
-                await _initialize_streaming_session(
+
+                # Store user context on client state
+                client_state.user_id = user.user_id
+                client_state.user_email = user.email
+                client_state.client_id = client_id
+
+                interim_subscriber_task = await _initialize_streaming_session(
                     client_state,
                     audio_stream_producer,
                     user.user_id,
                     user.email,
                     client_id,
-                    header.get("data", {"rate": OMI_SAMPLE_RATE, "width": OMI_SAMPLE_WIDTH, "channels": OMI_CHANNELS})
+                    header.get("data", {"rate": OMI_SAMPLE_RATE, "width": OMI_SAMPLE_WIDTH, "channels": OMI_CHANNELS}),
+                    websocket=ws  # Pass WebSocket to launch interim results subscriber
                 )
 
             elif header["type"] == "audio-chunk" and payload:
@@ -883,6 +1272,16 @@ async def handle_omi_websocket(
     except Exception as e:
         application_logger.error(f"❌ WebSocket error for client {client_id}: {e}", exc_info=True)
     finally:
+        # Cancel interim results subscriber task if running
+        if interim_subscriber_task and not interim_subscriber_task.done():
+            interim_subscriber_task.cancel()
+            try:
+                await interim_subscriber_task
+            except asyncio.CancelledError:
+                application_logger.info(f"Interim subscriber task cancelled for {client_id}")
+            except Exception as task_error:
+                application_logger.error(f"Error cancelling interim subscriber task: {task_error}")
+
         # Clean up pending connection tracking
         pending_connections.discard(pending_client_id)
 
@@ -909,6 +1308,7 @@ async def handle_pcm_websocket(
 
     client_id = None
     client_state = None
+    interim_subscriber_task = None
 
     try:
         # Setup connection (accept, auth, create client state)
@@ -935,13 +1335,35 @@ async def handle_pcm_websocket(
                     application_logger.debug(f"✅ Received message type: {header.get('type')} for {client_id}")
 
                     if header["type"] == "audio-start":
+                        application_logger.info(f"🔴 BACKEND: Received audio-start in CONTROL MODE for {client_id}")
                         application_logger.debug(f"🎙️ Processing audio-start for {client_id}")
-                        # Handle audio session start using helper function
+
+                        # Store user context on client state for rolling batch processing
+                        client_state.user_id = user.user_id
+                        client_state.user_email = user.email
+                        client_state.client_id = client_id
+
+                        # Handle audio session start using helper function (pass websocket for error handling)
                         audio_streaming, recording_mode = await _handle_audio_session_start(
                             client_state,
                             header.get("data", {}),
-                            client_id
+                            client_id,
+                            websocket=ws  # Pass websocket for WebUI error display
                         )
+
+                        # Initialize streaming session
+                        if recording_mode == "streaming":
+                            application_logger.info(f"🔴 BACKEND: Initializing streaming session for {client_id}")
+                            interim_subscriber_task = await _initialize_streaming_session(
+                                client_state,
+                                audio_stream_producer,
+                                user.user_id,
+                                user.email,
+                                client_id,
+                                header.get("data", {}),
+                                websocket=ws
+                            )
+
                         continue  # Continue to audio streaming mode
                     
                     elif header["type"] == "ping":
@@ -1011,15 +1433,19 @@ async def handle_pcm_websocket(
 
                                             # Route to appropriate mode handler
                                             audio_format = control_header.get("data", {})
-                                            await _handle_audio_chunk(
+                                            task = await _handle_audio_chunk(
                                                 client_state,
                                                 audio_stream_producer,
                                                 audio_data,
                                                 audio_format,
                                                 user.user_id,
                                                 user.email,
-                                                client_id
+                                                client_id,
+                                                websocket=ws
                                             )
+                                            # Store subscriber task if it was created (first streaming chunk)
+                                            if task and not interim_subscriber_task:
+                                                interim_subscriber_task = task
                                         else:
                                             application_logger.warning(f"Expected binary payload for audio-chunk, got: {payload_msg.keys()}")
                                     else:
@@ -1044,15 +1470,19 @@ async def handle_pcm_websocket(
 
                             # Route to appropriate mode handler with default format
                             default_format = {"rate": 16000, "width": 2, "channels": 1}
-                            await _handle_audio_chunk(
+                            task = await _handle_audio_chunk(
                                 client_state,
                                 audio_stream_producer,
                                 audio_data,
                                 default_format,
                                 user.user_id,
                                 user.email,
-                                client_id
+                                client_id,
+                                websocket=ws
                             )
+                            # Store subscriber task if it was created (first streaming chunk)
+                            if task and not interim_subscriber_task:
+                                interim_subscriber_task = task
                         
                         else:
                             application_logger.warning(f"Unexpected message format in streaming mode: {message.keys()}")
@@ -1115,6 +1545,16 @@ async def handle_pcm_websocket(
             f"❌ PCM WebSocket error for client {client_id}: {e}", exc_info=True
         )
     finally:
+        # Cancel interim results subscriber task if running
+        if interim_subscriber_task and not interim_subscriber_task.done():
+            interim_subscriber_task.cancel()
+            try:
+                await interim_subscriber_task
+            except asyncio.CancelledError:
+                application_logger.info(f"Interim subscriber task cancelled for {client_id}")
+            except Exception as task_error:
+                application_logger.error(f"Error cancelling interim subscriber task: {task_error}")
+
         # Clean up pending connection tracking
         pending_connections.discard(pending_client_id)
 
diff --git a/backends/advanced/src/advanced_omi_backend/cron.py b/backends/advanced/src/advanced_omi_backend/cron.py
new file mode 100644
index 00000000..161ceb31
--- /dev/null
+++ b/backends/advanced/src/advanced_omi_backend/cron.py
@@ -0,0 +1,121 @@
+"""
+Annotation cron scheduler for AI-powered suggestion surfacing.
+
+This scheduler runs background jobs to:
+1. Surface AI suggestions for potential transcript/memory errors (daily)
+2. Fine-tune error detection models using user feedback (weekly)
+
+Configuration via environment variables:
+- MONGODB_URI: MongoDB connection string
+- DEV_MODE: When true, uses 1-minute intervals for testing
+
+Usage:
+    uv run python -m advanced_omi_backend.cron
+"""
+
+import asyncio
+import logging
+import os
+from datetime import datetime, timezone
+
+from beanie import init_beanie
+from motor.motor_asyncio import AsyncIOMotorClient
+
+from advanced_omi_backend.models.annotation import Annotation
+from advanced_omi_backend.models.conversation import Conversation
+from advanced_omi_backend.models.user import User
+from advanced_omi_backend.workers.annotation_jobs import (
+    finetune_hallucination_model,
+    surface_error_suggestions,
+)
+
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+)
+logger = logging.getLogger(__name__)
+
+# Configuration
+MONGODB_URI = os.getenv("MONGODB_URI", "mongodb://mongo:27017")
+DEV_MODE = os.getenv("DEV_MODE", "false").lower() == "true"
+
+# Intervals (1 minute in dev, normal in production)
+if DEV_MODE:
+    SUGGESTION_INTERVAL = 60  # 1 minute for dev testing
+    TRAINING_INTERVAL = 60  # 1 minute for dev testing
+    logger.info("🔧 DEV_MODE enabled - using 1-minute intervals for testing")
+else:
+    SUGGESTION_INTERVAL = 24 * 60 * 60  # Daily
+    TRAINING_INTERVAL = 7 * 24 * 60 * 60  # Weekly
+    logger.info("📅 Production mode - using daily/weekly intervals")
+
+
+async def init_db():
+    """Initialize database connection"""
+    try:
+        client = AsyncIOMotorClient(MONGODB_URI)
+        await init_beanie(
+            database=client.chronicle,
+            document_models=[Annotation, Conversation, User],
+        )
+        logger.info("✅ Database connection initialized")
+    except Exception as e:
+        logger.error(f"❌ Failed to initialize database: {e}")
+        raise
+
+
+async def run_scheduler():
+    """Main scheduler loop"""
+    await init_db()
+    logger.info("🕐 Annotation cron scheduler started")
+    logger.info(f"   - Suggestion interval: {SUGGESTION_INTERVAL}s")
+    logger.info(f"   - Training interval: {TRAINING_INTERVAL}s")
+
+    last_suggestion_run = datetime.now(timezone.utc)
+    last_training_run = datetime.now(timezone.utc)
+
+    while True:
+        try:
+            now = datetime.now(timezone.utc)
+
+            # Daily: Surface AI suggestions
+            if (now - last_suggestion_run).total_seconds() >= SUGGESTION_INTERVAL:
+                logger.info(f"🤖 Running suggestion surfacing at {now}")
+                try:
+                    await surface_error_suggestions()
+                    last_suggestion_run = now
+                    logger.info("✅ Suggestion surfacing completed")
+                except Exception as e:
+                    logger.error(f"❌ Suggestion job failed: {e}", exc_info=True)
+
+            # Weekly: Fine-tune model
+            if (now - last_training_run).total_seconds() >= TRAINING_INTERVAL:
+                logger.info(f"🎓 Running model fine-tuning at {now}")
+                try:
+                    await finetune_hallucination_model()
+                    last_training_run = now
+                    logger.info("✅ Model fine-tuning completed")
+                except Exception as e:
+                    logger.error(f"❌ Training job failed: {e}", exc_info=True)
+
+            # Sleep for check interval
+            await asyncio.sleep(60)  # Check every minute
+
+        except KeyboardInterrupt:
+            logger.info("⛔ Scheduler stopped by user")
+            break
+        except Exception as e:
+            logger.error(f"❌ Unexpected error in scheduler loop: {e}", exc_info=True)
+            # Continue running despite errors
+            await asyncio.sleep(60)
+
+
+if __name__ == "__main__":
+    logger.info("🚀 Starting annotation cron scheduler...")
+    try:
+        asyncio.run(run_scheduler())
+    except KeyboardInterrupt:
+        logger.info("👋 Annotation cron scheduler stopped")
+    except Exception as e:
+        logger.error(f"💥 Fatal error: {e}", exc_info=True)
+        exit(1)
diff --git a/backends/advanced/src/advanced_omi_backend/database.py b/backends/advanced/src/advanced_omi_backend/database.py
index ae7650b0..1b214b6d 100644
--- a/backends/advanced/src/advanced_omi_backend/database.py
+++ b/backends/advanced/src/advanced_omi_backend/database.py
@@ -14,7 +14,7 @@
 
 # MongoDB Configuration
 MONGODB_URI = os.getenv("MONGODB_URI", "mongodb://mongo:27017")
-MONGODB_DATABASE = os.getenv("MONGODB_DATABASE", "friend-lite")
+MONGODB_DATABASE = os.getenv("MONGODB_DATABASE", "chronicle")
 
 mongo_client = AsyncIOMotorClient(
     MONGODB_URI,
diff --git a/backends/advanced/src/advanced_omi_backend/main.py b/backends/advanced/src/advanced_omi_backend/main.py
index df51e1cc..ee60696f 100644
--- a/backends/advanced/src/advanced_omi_backend/main.py
+++ b/backends/advanced/src/advanced_omi_backend/main.py
@@ -2,7 +2,7 @@
 """
 Unified Omi-audio service
 
- * Accepts Opus packets over a WebSocket (`/ws`) or PCM over a WebSocket (`/ws_pcm`).
+ * Accepts audio over a unified WebSocket endpoint (`/ws`) with codec parameter (pcm or opus).
  * Uses a central queue to decouple audio ingestion from processing.
  * A saver consumer buffers PCM and writes 30-second WAV chunks to `./data/audio_chunks/`.
  * A transcription consumer sends each chunk to a Wyoming ASR service.
@@ -16,6 +16,7 @@
 """
 
 import logging
+
 import uvicorn
 
 from advanced_omi_backend.app_factory import create_app
diff --git a/backends/advanced/src/advanced_omi_backend/middleware/app_middleware.py b/backends/advanced/src/advanced_omi_backend/middleware/app_middleware.py
index eafeffec..069d5239 100644
--- a/backends/advanced/src/advanced_omi_backend/middleware/app_middleware.py
+++ b/backends/advanced/src/advanced_omi_backend/middleware/app_middleware.py
@@ -56,12 +56,11 @@ class RequestLoggingMiddleware(BaseHTTPMiddleware):
         "/auth/jwt/logout",
         "/auth/cookie/logout",
         "/ws",
-        "/ws_omi",
-        "/ws_pcm",
         "/mcp",
         "/health",
         "/auth/health",
         "/readiness",
+        "/api/queue/dashboard",  # Auto-refresh endpoint, too noisy
     }
 
     # Binary content types to exclude
diff --git a/backends/advanced/src/advanced_omi_backend/model_registry.py b/backends/advanced/src/advanced_omi_backend/model_registry.py
index 18f464ae..382674da 100644
--- a/backends/advanced/src/advanced_omi_backend/model_registry.py
+++ b/backends/advanced/src/advanced_omi_backend/model_registry.py
@@ -4,12 +4,11 @@
 definitions (LLM, embeddings, etc.) in a provider-agnostic way.
 
 Now using Pydantic for robust validation and type safety.
+Environment variable resolution is handled by OmegaConf in the config module.
 """
 
 from __future__ import annotations
 
-import os
-import re
 import yaml
 from pathlib import Path
 from typing import Any, Dict, List, Optional
@@ -17,74 +16,9 @@
 import logging
 from pydantic import BaseModel, Field, field_validator, model_validator, ConfigDict, ValidationError
 
-def _resolve_env(value: Any) -> Any:
-    """Resolve ``${VAR:-default}`` patterns inside a single value.
-    
-    This helper is intentionally minimal: it only operates on strings and leaves
-    all other types unchanged. Patterns of the form ``${VAR}`` or
-    ``${VAR:-default}`` are expanded using ``os.getenv``:
-    
-    - If the environment variable **VAR** is set, its value is used.
-    - Otherwise the optional ``default`` is used (or ``\"\"`` if omitted).
-    
-    Examples:
-        >>> os.environ.get("OLLAMA_MODEL")
-        >>> _resolve_env("${OLLAMA_MODEL:-llama3.1:latest}")
-        'llama3.1:latest'
-        
-        >>> os.environ["OLLAMA_MODEL"] = "llama3.2:latest"
-        >>> _resolve_env("${OLLAMA_MODEL:-llama3.1:latest}")
-        'llama3.2:latest'
-        
-        >>> _resolve_env("Bearer ${OPENAI_API_KEY:-}")
-        'Bearer '  # when OPENAI_API_KEY is not set
-    
-    Note:
-        Use :func:`_deep_resolve_env` to apply this logic to an entire
-        nested config structure (dicts/lists) loaded from YAML.
-    """
-    if not isinstance(value, str):
-        return value
-
-    pattern = re.compile(r"\$\{([^}:]+)(?::-(.*?))?\}")
-
-    def repl(match: re.Match[str]) -> str:
-        var, default = match.group(1), match.group(2)
-        return os.getenv(var, default or "")
-
-    return pattern.sub(repl, value)
-
-
-def _deep_resolve_env(data: Any) -> Any:
-    """Recursively resolve environment variables in nested structures.
-    
-    This walks arbitrary Python structures produced by ``yaml.safe_load`` and
-    applies :func:`_resolve_env` to every string it finds. Dictionaries and
-    lists are traversed deeply; scalars are passed through unchanged.
-    
-    Examples:
-        >>> os.environ["OPENAI_MODEL"] = "gpt-4o-mini"
-        >>> cfg = {
-        ...     "models": [
-        ...         {"model_name": "${OPENAI_MODEL:-gpt-4o-mini}"},
-        ...         {"model_url": "${OPENAI_BASE_URL:-https://api.openai.com/v1}"}
-        ...     ]
-        ... }
-        >>> resolved = _deep_resolve_env(cfg)
-        >>> resolved["models"][0]["model_name"]
-        'gpt-4o-mini'
-        >>> resolved["models"][1]["model_url"]
-        'https://api.openai.com/v1'
-    
-    This is what :func:`load_models_config` uses immediately after loading
-    ``config.yml`` so that all ``${VAR:-default}`` placeholders are resolved
-    before Pydantic validation and model registry construction.
-    """
-    if isinstance(data, dict):
-        return {k: _deep_resolve_env(v) for k, v in data.items()}
-    if isinstance(data, list):
-        return [_deep_resolve_env(v) for v in data]
-    return _resolve_env(data)
+# Import config merging for defaults.yml + config.yml integration
+# OmegaConf handles environment variable resolution (${VAR:-default} syntax)
+from advanced_omi_backend.config import get_config
 
 
 class ModelDef(BaseModel):
@@ -250,73 +184,47 @@ def list_model_types(self) -> List[str]:
 
 
 def _find_config_path() -> Path:
-    """Find config.yml in expected locations.
-    
-    Search order:
-    1. CONFIG_FILE environment variable
-    2. Current working directory
-    3. /app/config.yml (Docker container)
-    4. Walk up from module directory
-    
-    Returns:
-        Path to config.yml (may not exist)
     """
-    # ENV override
-    cfg_env = os.getenv("CONFIG_FILE")
-    if cfg_env and Path(cfg_env).exists():
-        return Path(cfg_env)
-
-    # Common locations (container vs repo root)
-    candidates = [Path("config.yml"), Path("/app/config.yml")]
+    Find config.yml using canonical path from config module.
 
-    # Also walk up from current file's parents defensively
-    try:
-        for parent in Path(__file__).resolve().parents:
-            c = parent / "config.yml"
-            if c.exists():
-                return c
-    except Exception:
-        pass
+    DEPRECATED: Use advanced_omi_backend.config.get_config_yml_path() directly.
+    Kept for backward compatibility.
 
-    for c in candidates:
-        if c.exists():
-            return c
-    
-    # Last resort: return /app/config.yml path (may not exist yet)
-    return Path("/app/config.yml")
+    Returns:
+        Path to config.yml
+    """
+    from advanced_omi_backend.config import get_config_yml_path
+    return get_config_yml_path()
 
 
 def load_models_config(force_reload: bool = False) -> Optional[AppModels]:
-    """Load model configuration from config.yml.
-    
-    This function loads and parses the config.yml file, resolves environment
-    variables, validates model definitions using Pydantic, and caches the result.
-    
+    """Load model configuration from merged defaults.yml + config.yml.
+
+    This function loads defaults.yml and config.yml, merges them with user overrides,
+    validates model definitions using Pydantic, and caches the result.
+    Environment variables are resolved by OmegaConf during config loading.
+
     Args:
         force_reload: If True, reload from disk even if already cached
-        
+
     Returns:
         AppModels instance with validated configuration, or None if config not found
-        
+
     Raises:
         ValidationError: If config.yml has invalid model definitions
-        yaml.YAMLError: If config.yml has invalid YAML syntax
     """
     global _REGISTRY
     if _REGISTRY is not None and not force_reload:
         return _REGISTRY
 
-    cfg_path = _find_config_path()
-    if not cfg_path.exists():
+    # Get merged configuration (defaults + user config)
+    # OmegaConf resolves environment variables automatically
+    try:
+        raw = get_config(force_reload=force_reload)
+    except Exception as e:
+        logging.error(f"Failed to load merged configuration: {e}")
         return None
 
-    # Load and parse YAML
-    with cfg_path.open("r") as f:
-        raw = yaml.safe_load(f) or {}
-    
-    # Resolve environment variables
-    raw = _deep_resolve_env(raw)
-
     # Extract sections
     defaults = raw.get("defaults", {}) or {}
     model_list = raw.get("models", []) or []
diff --git a/backends/advanced/src/advanced_omi_backend/models/annotation.py b/backends/advanced/src/advanced_omi_backend/models/annotation.py
new file mode 100644
index 00000000..b2a986a5
--- /dev/null
+++ b/backends/advanced/src/advanced_omi_backend/models/annotation.py
@@ -0,0 +1,175 @@
+"""
+Unified annotation system for Chronicle.
+
+Supports annotations for memories, transcripts, and future content types.
+Enables both user edits and AI-powered suggestions.
+"""
+
+from enum import Enum
+from typing import Optional
+from datetime import datetime, timezone
+import uuid
+
+from beanie import Document, Indexed
+from pydantic import BaseModel, Field
+
+
+class AnnotationType(str, Enum):
+    """Type of content being annotated."""
+    MEMORY = "memory"
+    TRANSCRIPT = "transcript"
+    DIARIZATION = "diarization"  # Speaker identification corrections
+
+
+class AnnotationSource(str, Enum):
+    """Origin of the annotation."""
+    USER = "user"  # User-created edit
+    MODEL_SUGGESTION = "model_suggestion"  # AI-generated suggestion
+
+
+class AnnotationStatus(str, Enum):
+    """Lifecycle status of annotation."""
+    PENDING = "pending"  # Waiting for user review (suggestions)
+    ACCEPTED = "accepted"  # Applied to content
+    REJECTED = "rejected"  # User dismissed suggestion
+
+
+class Annotation(Document):
+    """
+    Unified annotation model for all content types.
+
+    Supports both user edits and AI-powered suggestions across
+    memories, transcripts, and future content types (chat, action items, etc.).
+
+    Design: Polymorphic model with type-specific fields based on annotation_type.
+    """
+
+    # Identity
+    id: str = Field(default_factory=lambda: str(uuid.uuid4()))
+
+    # Classification
+    annotation_type: AnnotationType
+    user_id: Indexed(str)
+    source: AnnotationSource = Field(default=AnnotationSource.USER)
+    status: AnnotationStatus = Field(default=AnnotationStatus.ACCEPTED)
+
+    # Content
+    original_text: str = ""  # Text before correction (not used for diarization)
+    corrected_text: str = ""  # Text after correction (not used for diarization)
+
+    # Polymorphic References (based on annotation_type)
+    # For MEMORY annotations:
+    memory_id: Optional[str] = None
+
+    # For TRANSCRIPT annotations:
+    conversation_id: Optional[str] = None
+    segment_index: Optional[int] = None
+
+    # For DIARIZATION annotations:
+    original_speaker: Optional[str] = None  # Speaker label before correction
+    corrected_speaker: Optional[str] = None  # Speaker label after correction
+    segment_start_time: Optional[float] = None  # Time offset for reference
+
+    # Processed tracking (applies to ALL annotation types)
+    processed: bool = Field(default=False)  # Whether annotation has been applied/sent to training
+    processed_at: Optional[datetime] = None  # When annotation was processed
+    processed_by: Optional[str] = None  # What processed it (manual, cron, apply, training, etc.)
+
+    # Timestamps (Python 3.12+ compatible)
+    created_at: datetime = Field(
+        default_factory=lambda: datetime.now(timezone.utc)
+    )
+    updated_at: datetime = Field(
+        default_factory=lambda: datetime.now(timezone.utc)
+    )
+
+    class Settings:
+        name = "annotations"
+        # Create indexes on commonly queried fields
+        # Note: Enum fields and Optional fields don't use Indexed() wrapper
+        indexes = [
+            "annotation_type",  # Query by type (memory vs transcript vs diarization)
+            "user_id",  # User-scoped queries
+            "status",  # Filter by status (pending/accepted/rejected)
+            "memory_id",  # Lookup annotations for specific memory
+            "conversation_id",  # Lookup annotations for specific conversation
+            "processed",  # Query unprocessed annotations
+        ]
+
+    def is_memory_annotation(self) -> bool:
+        """Check if this is a memory annotation."""
+        return self.annotation_type == AnnotationType.MEMORY
+
+    def is_transcript_annotation(self) -> bool:
+        """Check if this is a transcript annotation."""
+        return self.annotation_type == AnnotationType.TRANSCRIPT
+
+    def is_diarization_annotation(self) -> bool:
+        """Check if this is a diarization annotation."""
+        return self.annotation_type == AnnotationType.DIARIZATION
+
+    def is_pending_suggestion(self) -> bool:
+        """Check if this is a pending AI suggestion."""
+        return (
+            self.source == AnnotationSource.MODEL_SUGGESTION
+            and self.status == AnnotationStatus.PENDING
+        )
+
+
+# Pydantic Request/Response Models
+
+
+class AnnotationCreateBase(BaseModel):
+    """Base model for annotation creation."""
+    original_text: str = ""  # Optional for diarization
+    corrected_text: str = ""  # Optional for diarization
+    status: AnnotationStatus = AnnotationStatus.ACCEPTED
+
+
+class MemoryAnnotationCreate(AnnotationCreateBase):
+    """Create memory annotation request."""
+    memory_id: str
+    original_text: str  # Required for memory annotations
+    corrected_text: str  # Required for memory annotations
+
+
+class TranscriptAnnotationCreate(AnnotationCreateBase):
+    """Create transcript annotation request."""
+    conversation_id: str
+    segment_index: int
+    original_text: str  # Required for transcript annotations
+    corrected_text: str  # Required for transcript annotations
+
+
+class DiarizationAnnotationCreate(BaseModel):
+    """Create diarization annotation request."""
+    conversation_id: str
+    segment_index: int
+    original_speaker: str
+    corrected_speaker: str
+    segment_start_time: Optional[float] = None
+    status: AnnotationStatus = AnnotationStatus.ACCEPTED
+
+
+class AnnotationResponse(BaseModel):
+    """Annotation response for API."""
+    id: str
+    annotation_type: AnnotationType
+    user_id: str
+    memory_id: Optional[str] = None
+    conversation_id: Optional[str] = None
+    segment_index: Optional[int] = None
+    original_text: str = ""
+    corrected_text: str = ""
+    original_speaker: Optional[str] = None
+    corrected_speaker: Optional[str] = None
+    segment_start_time: Optional[float] = None
+    processed: bool = False
+    processed_at: Optional[datetime] = None
+    processed_by: Optional[str] = None
+    status: AnnotationStatus
+    source: AnnotationSource
+    created_at: datetime
+
+    class Config:
+        from_attributes = True  # Pydantic v2 compatibility
diff --git a/backends/advanced/src/advanced_omi_backend/models/audio_chunk.py b/backends/advanced/src/advanced_omi_backend/models/audio_chunk.py
new file mode 100644
index 00000000..cea20ef7
--- /dev/null
+++ b/backends/advanced/src/advanced_omi_backend/models/audio_chunk.py
@@ -0,0 +1,158 @@
+"""
+Audio chunk models for MongoDB-based audio storage.
+
+This module contains the AudioChunkDocument model for storing Opus-compressed
+audio chunks in MongoDB. Each chunk represents a 10-second segment of audio
+from a conversation.
+"""
+
+from datetime import datetime
+from typing import Optional
+from pydantic import ConfigDict, Field, field_serializer
+from beanie import Document, Indexed
+from bson import Binary
+
+
+class AudioChunkDocument(Document):
+    """
+    MongoDB document representing a 10-second audio chunk.
+
+    Audio chunks are stored in Opus-compressed format for ~94% storage reduction
+    compared to raw PCM. Chunks are sequentially numbered and can be reconstructed
+    into complete WAV files for playback or batch processing.
+
+    Storage Format:
+    - Encoding: Opus (24kbps VBR, optimized for speech)
+    - Chunk Duration: 10 seconds (configurable)
+    - Original Format: 16kHz, 16-bit, mono PCM
+    - Compression Ratio: ~0.047 (94% reduction)
+
+    Indexes:
+    - (conversation_id, chunk_index): Primary query pattern for reconstruction
+    - conversation_id: Conversation lookup and counting
+    - created_at: Maintenance and cleanup operations
+    """
+
+    # Pydantic v2 configuration
+    model_config = ConfigDict(arbitrary_types_allowed=True)
+
+    # Primary identifiers
+    conversation_id: Indexed(str) = Field(
+        description="Parent conversation ID (UUID format)"
+    )
+    chunk_index: int = Field(
+        description="Sequential chunk number (0-based)",
+        ge=0
+    )
+
+    # Audio data
+    audio_data: bytes = Field(
+        description="Opus-encoded audio bytes (stored as BSON Binary in MongoDB)"
+    )
+
+    # Size tracking
+    original_size: int = Field(
+        description="Original PCM size in bytes (before compression)",
+        gt=0
+    )
+    compressed_size: int = Field(
+        description="Opus-encoded size in bytes (after compression)",
+        gt=0
+    )
+
+    # Time boundaries
+    start_time: float = Field(
+        description="Start time in seconds from conversation start",
+        ge=0.0
+    )
+    end_time: float = Field(
+        description="End time in seconds from conversation start",
+        gt=0.0
+    )
+    duration: float = Field(
+        description="Chunk duration in seconds (typically 10.0)",
+        gt=0.0
+    )
+
+    # Audio format
+    sample_rate: int = Field(
+        default=16000,
+        description="Original PCM sample rate (Hz)"
+    )
+    channels: int = Field(
+        default=1,
+        description="Number of audio channels (1=mono, 2=stereo)"
+    )
+
+    # Optional analysis
+    has_speech: Optional[bool] = Field(
+        default=None,
+        description="Voice Activity Detection result (if available)"
+    )
+
+    # Metadata
+    created_at: datetime = Field(
+        default_factory=datetime.utcnow,
+        description="Chunk creation timestamp"
+    )
+
+    # Soft delete fields
+    deleted: bool = Field(
+        default=False,
+        description="Whether this chunk was soft-deleted"
+    )
+    deleted_at: Optional[datetime] = Field(
+        default=None,
+        description="When the chunk was marked as deleted"
+    )
+
+    @field_serializer('audio_data')
+    def serialize_audio_data(self, v: bytes) -> Binary:
+        """
+        Convert bytes to BSON Binary for MongoDB storage.
+
+        MongoDB returns BSON Binary as plain bytes during deserialization,
+        but expects Binary type for serialization to ensure proper binary data handling.
+        """
+        if isinstance(v, bytes):
+            return Binary(v)
+        return v
+
+    class Settings:
+        """Beanie document settings."""
+        name = "audio_chunks"
+
+        indexes = [
+            # Primary query: Retrieve chunks in order for a conversation
+            [("conversation_id", 1), ("chunk_index", 1)],
+
+            # Conversation lookup and counting
+            "conversation_id",
+
+            # Maintenance queries (cleanup, monitoring)
+            "created_at",
+
+            # Soft delete filtering
+            "deleted"
+        ]
+
+    @property
+    def compression_ratio(self) -> float:
+        """Calculate compression ratio (compressed/original)."""
+        if self.original_size == 0:
+            return 0.0
+        return self.compressed_size / self.original_size
+
+    @property
+    def storage_savings_percent(self) -> float:
+        """Calculate storage savings as percentage."""
+        return (1 - self.compression_ratio) * 100
+
+    def __repr__(self) -> str:
+        """Human-readable representation."""
+        return (
+            f"AudioChunk(conversation={self.conversation_id[:8]}..., "
+            f"index={self.chunk_index}, "
+            f"duration={self.duration:.1f}s, "
+            f"compression={self.compression_ratio:.3f})"
+        )
diff --git a/backends/advanced/src/advanced_omi_backend/models/audio_file.py b/backends/advanced/src/advanced_omi_backend/models/audio_file.py
deleted file mode 100644
index e1e2c09a..00000000
--- a/backends/advanced/src/advanced_omi_backend/models/audio_file.py
+++ /dev/null
@@ -1,67 +0,0 @@
-"""
-AudioFile models for Chronicle backend.
-
-This module contains the Beanie Document model for audio_chunks collection,
-which stores ALL audio files (both with and without speech). This is the
-storage layer - all audio gets stored here with its metadata.
-
-Note: Named AudioFile (not AudioChunk) to avoid confusion with wyoming.audio.AudioChunk
-which is the in-memory streaming audio data structure.
-"""
-
-from datetime import datetime
-from typing import Dict, List, Optional, Any
-from pydantic import BaseModel, Field
-
-from beanie import Document, Indexed
-
-
-class AudioFile(Document):
-    """
-    Audio file model representing persisted audio files in MongoDB.
-
-    The audio_chunks collection stores ALL raw audio files (both with and without speech).
-    This is just for audio file storage and metadata. If speech is detected, a
-    Conversation document is created which contains transcripts and memories.
-
-    This is different from wyoming.audio.AudioChunk which is for streaming audio data.
-    """
-
-    # Core identifiers
-    audio_uuid: Indexed(str, unique=True) = Field(description="Unique audio identifier")
-    source: Indexed(str) = Field(
-        default="upload",
-        description="Source of the audio (upload, gdrive, etc.)"
-    )
-    audio_path: str = Field(description="Path to raw audio file")
-    client_id: Indexed(str) = Field(description="Client device identifier")
-    timestamp: Indexed(int) = Field(description="Unix timestamp in milliseconds")
-
-    # User information
-    user_id: Indexed(str) = Field(description="User who owns this audio")
-    user_email: Optional[str] = Field(None, description="User email")
-
-    # Audio processing
-    cropped_audio_path: Optional[str] = Field(None, description="Path to cropped audio (speech only)")
-
-    # Speech-driven conversation linking
-    conversation_id: Optional[str] = Field(
-        None,
-        description="Link to Conversation if speech was detected"
-    )
-    has_speech: bool = Field(default=False, description="Whether speech was detected")
-    speech_analysis: Dict[str, Any] = Field(
-        default_factory=dict,
-        description="Speech detection results"
-    )
-
-
-
-    class Settings:
-        name = "audio_chunks"
-        indexes = [
-            "audio_uuid",
-            "client_id",
-            "user_id",
-            "timestamp", 
-        ]
\ No newline at end of file
diff --git a/backends/advanced/src/advanced_omi_backend/models/conversation.py b/backends/advanced/src/advanced_omi_backend/models/conversation.py
index 01dd5d96..e4446f0f 100644
--- a/backends/advanced/src/advanced_omi_backend/models/conversation.py
+++ b/backends/advanced/src/advanced_omi_backend/models/conversation.py
@@ -7,24 +7,18 @@
 
 from datetime import datetime
 from typing import Dict, List, Optional, Any, Union
-from pydantic import BaseModel, Field, model_validator, computed_field
+from pydantic import BaseModel, Field, model_validator, computed_field, field_validator
 from enum import Enum
 import uuid
 
 from beanie import Document, Indexed
+from pymongo import IndexModel
 
 
 class Conversation(Document):
     """Complete conversation model with versioned processing."""
 
-    # Nested Enums
-    class TranscriptProvider(str, Enum):
-        """Supported transcription providers."""
-        DEEPGRAM = "deepgram"
-        MISTRAL = "mistral"
-        PARAKEET = "parakeet"
-        SPEECH_DETECTION = "speech_detection"  # Legacy value
-        UNKNOWN = "unknown"  # Fallback value
+    # Nested Enums - Note: TranscriptProvider accepts any string value for flexibility
 
     class MemoryProvider(str, Enum):
         """Supported memory providers."""
@@ -49,6 +43,13 @@ class EndReason(str, Enum):
         UNKNOWN = "unknown"  # Unknown or legacy reason
 
     # Nested Models
+    class Word(BaseModel):
+        """Individual word with timestamp in a transcript."""
+        word: str = Field(description="Word text")
+        start: float = Field(description="Start time in seconds")
+        end: float = Field(description="End time in seconds")
+        confidence: Optional[float] = Field(None, description="Confidence score (0-1)")
+
     class SpeakerSegment(BaseModel):
         """Individual speaker segment in a transcript."""
         start: float = Field(description="Start time in seconds")
@@ -56,14 +57,22 @@ class SpeakerSegment(BaseModel):
         text: str = Field(description="Transcript text for this segment")
         speaker: str = Field(description="Speaker identifier")
         confidence: Optional[float] = Field(None, description="Confidence score (0-1)")
+        words: List["Conversation.Word"] = Field(default_factory=list, description="Word-level timestamps for this segment")
 
     class TranscriptVersion(BaseModel):
         """Version of a transcript with processing metadata."""
         version_id: str = Field(description="Unique version identifier")
         transcript: Optional[str] = Field(None, description="Full transcript text")
-        segments: List["Conversation.SpeakerSegment"] = Field(default_factory=list, description="Speaker segments")
-        provider: Optional["Conversation.TranscriptProvider"] = Field(None, description="Transcription provider used")
-        model: Optional[str] = Field(None, description="Model used (e.g., nova-3, voxtral-mini-2507)")
+        words: List["Conversation.Word"] = Field(
+            default_factory=list,
+            description="Word-level timestamps for entire transcript"
+        )
+        segments: List["Conversation.SpeakerSegment"] = Field(
+            default_factory=list,
+            description="Speaker segments (filled by speaker recognition)"
+        )
+        provider: Optional[str] = Field(None, description="Transcription provider used (deepgram, parakeet, etc.)")
+        model: Optional[str] = Field(None, description="Model used (e.g., nova-3, parakeet)")
         created_at: datetime = Field(description="When this version was created")
         processing_time_seconds: Optional[float] = Field(None, description="Time taken to process")
         metadata: Dict[str, Any] = Field(default_factory=dict, description="Additional provider-specific metadata")
@@ -81,13 +90,32 @@ class MemoryVersion(BaseModel):
 
     # Core identifiers
     conversation_id: Indexed(str, unique=True) = Field(default_factory=lambda: str(uuid.uuid4()), description="Unique conversation identifier")
-    audio_uuid: Indexed(str) = Field(description="Session/audio identifier (for tracking audio files)")
     user_id: Indexed(str) = Field(description="User who owns this conversation")
     client_id: Indexed(str) = Field(description="Client device identifier")
 
-    # Audio file reference
-    audio_path: Optional[str] = Field(None, description="Path to audio file (relative to CHUNK_DIR)")
-    cropped_audio_path: Optional[str] = Field(None, description="Path to cropped audio file (relative to CHUNK_DIR)")
+    # External file tracking (for deduplication of imported files)
+    external_source_id: Optional[str] = Field(
+        None,
+        description="External file identifier (e.g., Google Drive file_id) for deduplication"
+    )
+    external_source_type: Optional[str] = Field(
+        None,
+        description="Type of external source (gdrive, dropbox, s3, etc.)"
+    )
+
+    # MongoDB chunk-based audio storage (new system)
+    audio_chunks_count: Optional[int] = Field(
+        None,
+        description="Total number of 10-second audio chunks stored in MongoDB"
+    )
+    audio_total_duration: Optional[float] = Field(
+        None,
+        description="Total audio duration in seconds (sum of all chunks)"
+    )
+    audio_compression_ratio: Optional[float] = Field(
+        None,
+        description="Compression ratio (compressed_size / original_size), typically ~0.047 for Opus"
+    )
 
     # Creation metadata
     created_at: Indexed(datetime) = Field(default_factory=datetime.utcnow, description="When the conversation was created")
@@ -97,6 +125,16 @@ class MemoryVersion(BaseModel):
     deletion_reason: Optional[str] = Field(None, description="Reason for deletion (no_meaningful_speech, audio_file_not_ready, etc.)")
     deleted_at: Optional[datetime] = Field(None, description="When the conversation was marked as deleted")
 
+    # Always persist audio flag and processing status
+    processing_status: Optional[str] = Field(
+        None,
+        description="Processing status: pending_transcription, transcription_failed, completed"
+    )
+    always_persist: bool = Field(
+        default=False,
+        description="Flag indicating conversation was created for audio persistence"
+    )
+
     # Conversation completion tracking
     end_reason: Optional["Conversation.EndReason"] = Field(None, description="Reason why the conversation ended")
     completed_at: Optional[datetime] = Field(None, description="When the conversation was completed/closed")
@@ -228,12 +266,35 @@ def memory_version_count(self) -> int:
         """Get count of memory versions."""
         return len(self.memory_versions)
 
+    @computed_field
+    @property
+    def active_transcript_version_number(self) -> Optional[int]:
+        """Get 1-based version number of the active transcript version."""
+        if not self.active_transcript_version:
+            return None
+        for i, version in enumerate(self.transcript_versions):
+            if version.version_id == self.active_transcript_version:
+                return i + 1
+        return None
+
+    @computed_field
+    @property
+    def active_memory_version_number(self) -> Optional[int]:
+        """Get 1-based version number of the active memory version."""
+        if not self.active_memory_version:
+            return None
+        for i, version in enumerate(self.memory_versions):
+            if version.version_id == self.active_memory_version:
+                return i + 1
+        return None
+
     def add_transcript_version(
         self,
         version_id: str,
         transcript: str,
-        segments: List["Conversation.SpeakerSegment"],
-        provider: "Conversation.TranscriptProvider",
+        words: Optional[List["Conversation.Word"]] = None,
+        segments: Optional[List["Conversation.SpeakerSegment"]] = None,
+        provider: str = None,  # Provider name from config.yml (deepgram, parakeet, etc.)
         model: Optional[str] = None,
         processing_time_seconds: Optional[float] = None,
         metadata: Optional[Dict[str, Any]] = None,
@@ -243,7 +304,8 @@ def add_transcript_version(
         new_version = Conversation.TranscriptVersion(
             version_id=version_id,
             transcript=transcript,
-            segments=segments,
+            words=words or [],
+            segments=segments or [],
             provider=provider,
             model=model,
             created_at=datetime.now(),
@@ -310,13 +372,13 @@ class Settings:
             "conversation_id",
             "user_id",
             "created_at",
-            [("user_id", 1), ("created_at", -1)]  # Compound index for user queries
+            [("user_id", 1), ("created_at", -1)],  # Compound index for user queries
+            IndexModel([("external_source_id", 1)], sparse=True)  # Sparse index for deduplication
         ]
 
 
 # Factory function for creating conversations
 def create_conversation(
-    audio_uuid: str,
     user_id: str,
     client_id: str,
     conversation_id: Optional[str] = None,
@@ -324,12 +386,13 @@ def create_conversation(
     summary: Optional[str] = None,
     transcript: Optional[str] = None,
     segments: Optional[List["Conversation.SpeakerSegment"]] = None,
+    external_source_id: Optional[str] = None,
+    external_source_type: Optional[str] = None,
 ) -> Conversation:
     """
     Factory function to create a new conversation.
 
     Args:
-        audio_uuid: Unique identifier for the audio session
         user_id: User who owns this conversation
         client_id: Client device identifier
         conversation_id: Optional unique conversation identifier (auto-generated if not provided)
@@ -337,26 +400,25 @@ def create_conversation(
         summary: Optional conversation summary
         transcript: Optional transcript text
         segments: Optional speaker segments
+        external_source_id: Optional external file ID for deduplication (e.g., Google Drive file_id)
+        external_source_type: Optional external source type (gdrive, dropbox, etc.)
 
     Returns:
         Conversation instance
     """
     # Build the conversation data
     conv_data = {
-        "audio_uuid": audio_uuid,
         "user_id": user_id,
         "client_id": client_id,
         "created_at": datetime.now(),
         "title": title,
         "summary": summary,
-        "transcript": transcript or "",
-        "segments": segments or [],
         "transcript_versions": [],
         "active_transcript_version": None,
         "memory_versions": [],
         "active_memory_version": None,
-        "memories": [],
-        "memory_count": 0
+        "external_source_id": external_source_id,
+        "external_source_type": external_source_type,
     }
 
     # Only set conversation_id if provided, otherwise let the model auto-generate it
diff --git a/backends/advanced/src/advanced_omi_backend/models/job.py b/backends/advanced/src/advanced_omi_backend/models/job.py
index b295782c..5d906865 100644
--- a/backends/advanced/src/advanced_omi_backend/models/job.py
+++ b/backends/advanced/src/advanced_omi_backend/models/job.py
@@ -35,15 +35,16 @@ async def _ensure_beanie_initialized():
             from motor.motor_asyncio import AsyncIOMotorClient
             from beanie import init_beanie
             from advanced_omi_backend.models.conversation import Conversation
-            from advanced_omi_backend.models.audio_file import AudioFile
-            from advanced_omi_backend.models.user import User                       
+            from advanced_omi_backend.models.audio_chunk import AudioChunkDocument
+            from advanced_omi_backend.models.user import User
+            from advanced_omi_backend.models.waveform import WaveformData
             from pymongo.errors import ConfigurationError
-  
+
             # Get MongoDB URI from environment
             mongodb_uri = os.getenv("MONGODB_URI", "mongodb://localhost:27017")
 
             # Create MongoDB client
-            mongodb_database = os.getenv("MONGODB_DATABASE", "friend-lite")
+            mongodb_database = os.getenv("MONGODB_DATABASE", "chronicle")
             client = AsyncIOMotorClient(mongodb_uri)
             try:
                 database = client.get_default_database(mongodb_database)
@@ -54,7 +55,7 @@ async def _ensure_beanie_initialized():
             # Initialize Beanie
             await init_beanie(
                 database=database,
-                document_models=[User, Conversation, AudioFile],
+                document_models=[User, Conversation, AudioChunkDocument, WaveformData],
             )
 
             _beanie_initialized = True
diff --git a/backends/advanced/src/advanced_omi_backend/models/user.py b/backends/advanced/src/advanced_omi_backend/models/user.py
index b0ced195..7291f9bb 100644
--- a/backends/advanced/src/advanced_omi_backend/models/user.py
+++ b/backends/advanced/src/advanced_omi_backend/models/user.py
@@ -16,6 +16,7 @@ class UserCreate(BaseUserCreate):
     """Schema for creating new users."""
 
     display_name: Optional[str] = None
+    notification_email: Optional[EmailStr] = None
     is_superuser: Optional[bool] = False
 
 
@@ -23,6 +24,7 @@ class UserRead(BaseUser[PydanticObjectId]):
     """Schema for reading user data."""
 
     display_name: Optional[str] = None
+    notification_email: Optional[EmailStr] = None
     registered_clients: dict[str, dict] = Field(default_factory=dict)
     primary_speakers: list[dict] = Field(default_factory=list)
 
@@ -31,6 +33,7 @@ class UserUpdate(BaseUserUpdate):
     """Schema for updating user data."""
 
     display_name: Optional[str] = None
+    notification_email: Optional[EmailStr] = None
     is_superuser: Optional[bool] = None
 
     def create_update_dict(self):
@@ -38,6 +41,8 @@ def create_update_dict(self):
         update_dict = super().create_update_dict()
         if self.display_name is not None:
             update_dict["display_name"] = self.display_name
+        if self.notification_email is not None:
+            update_dict["notification_email"] = self.notification_email
         return update_dict
 
     def create_update_dict_superuser(self):
@@ -45,6 +50,8 @@ def create_update_dict_superuser(self):
         update_dict = super().create_update_dict_superuser()
         if self.display_name is not None:
             update_dict["display_name"] = self.display_name
+        if self.notification_email is not None:
+            update_dict["notification_email"] = self.notification_email
         return update_dict
 
 
@@ -58,6 +65,7 @@ class User(BeanieBaseUser, Document):
     )
 
     display_name: Optional[str] = None
+    notification_email: Optional[EmailStr] = None
     # Client tracking for audio devices
     registered_clients: dict[str, dict] = Field(default_factory=dict)
     # Speaker processing filter configuration
diff --git a/backends/advanced/src/advanced_omi_backend/models/waveform.py b/backends/advanced/src/advanced_omi_backend/models/waveform.py
new file mode 100644
index 00000000..caf6fd49
--- /dev/null
+++ b/backends/advanced/src/advanced_omi_backend/models/waveform.py
@@ -0,0 +1,47 @@
+"""
+Waveform visualization data model for conversations.
+
+This module provides the WaveformData model for storing pre-computed
+waveform visualization data, enabling UI to display audio waveforms
+without real-time decoding.
+"""
+
+from datetime import datetime
+from typing import List, Optional
+
+from beanie import Document, Indexed
+from pydantic import Field
+
+
+class WaveformData(Document):
+    """Pre-computed waveform visualization for conversations."""
+
+    # Link to parent conversation
+    conversation_id: Indexed(str) = Field(
+        description="Parent conversation ID (unique per conversation)"
+    )
+
+    # Waveform amplitude data
+    samples: List[float] = Field(
+        description="Amplitude samples normalized to [-1.0, 1.0] range"
+    )
+    sample_rate: int = Field(
+        description="Samples per second (e.g., 10 = 1 sample per 100ms)"
+    )
+
+    # Metadata
+    duration_seconds: float = Field(description="Total audio duration in seconds")
+    created_at: datetime = Field(
+        default_factory=datetime.utcnow,
+        description="When this waveform was generated"
+    )
+    processing_time_seconds: Optional[float] = Field(
+        None,
+        description="Time taken to generate waveform"
+    )
+
+    class Settings:
+        name = "waveforms"
+        indexes = [
+            "conversation_id",  # Unique lookup by conversation
+        ]
diff --git a/backends/advanced/src/advanced_omi_backend/plugins/__init__.py b/backends/advanced/src/advanced_omi_backend/plugins/__init__.py
new file mode 100644
index 00000000..3ccea7dc
--- /dev/null
+++ b/backends/advanced/src/advanced_omi_backend/plugins/__init__.py
@@ -0,0 +1,18 @@
+"""
+Chronicle plugin system for multi-level pipeline extension.
+
+Plugins can hook into different stages of the processing pipeline:
+- transcript: When new transcript segment arrives
+- conversation: When conversation processing completes
+- memory: After memory extraction finishes
+
+Trigger types control when plugins execute:
+- wake_word: Only when transcript starts with specified wake word
+- always: Execute on every invocation at access level
+- conditional: Execute based on custom condition (future)
+"""
+
+from .base import BasePlugin, PluginContext, PluginResult
+from .router import PluginRouter
+
+__all__ = ['BasePlugin', 'PluginContext', 'PluginResult', 'PluginRouter']
diff --git a/backends/advanced/src/advanced_omi_backend/plugins/base.py b/backends/advanced/src/advanced_omi_backend/plugins/base.py
new file mode 100644
index 00000000..dbd13301
--- /dev/null
+++ b/backends/advanced/src/advanced_omi_backend/plugins/base.py
@@ -0,0 +1,145 @@
+"""
+Base plugin classes for Chronicle multi-level plugin architecture.
+
+Provides:
+- PluginContext: Context passed to plugin execution
+- PluginResult: Result from plugin execution
+- BasePlugin: Abstract base class for all plugins
+"""
+from abc import ABC, abstractmethod
+from typing import Optional, Dict, Any, List
+from dataclasses import dataclass, field
+
+
+@dataclass
+class PluginContext:
+    """Context passed to plugin execution"""
+    user_id: str
+    event: str  # Event name (e.g., "transcript.streaming", "conversation.complete")
+    data: Dict[str, Any]  # Event-specific data
+    metadata: Dict[str, Any] = field(default_factory=dict)
+
+
+@dataclass
+class PluginResult:
+    """Result from plugin execution"""
+    success: bool
+    data: Optional[Dict[str, Any]] = None
+    message: Optional[str] = None
+    should_continue: bool = True  # Whether to continue normal processing
+
+
+class BasePlugin(ABC):
+    """
+    Base class for all Chronicle plugins.
+
+    Plugins can hook into different stages of the processing pipeline:
+    - transcript: When new transcript segment arrives
+    - conversation: When conversation processing completes
+    - memory: When memory extraction finishes
+
+    Subclasses should:
+    1. Set SUPPORTED_ACCESS_LEVELS to list which levels they support
+    2. Implement initialize() for plugin initialization
+    3. Implement the appropriate callback methods (on_transcript, on_conversation_complete, on_memory_processed)
+    4. Optionally implement cleanup() for resource cleanup
+    """
+
+    # Subclasses declare which access levels they support
+    SUPPORTED_ACCESS_LEVELS: List[str] = []
+
+    def __init__(self, config: Dict[str, Any]):
+        """
+        Initialize plugin with configuration.
+
+        Args:
+            config: Plugin configuration from config/plugins.yml
+                   Contains: enabled, events, condition, and plugin-specific config
+        """
+        import logging
+        logger = logging.getLogger(__name__)
+
+        self.config = config
+        self.enabled = config.get('enabled', False)
+
+        # NEW terminology with backward compatibility
+        self.events = config.get('events') or config.get('subscriptions', [])
+        self.condition = config.get('condition') or config.get('trigger', {'type': 'always'})
+
+        # Deprecation warnings
+        plugin_name = config.get('name', 'unknown')
+        if 'subscriptions' in config:
+            logger.warning(f"Plugin '{plugin_name}': 'subscriptions' is deprecated, use 'events' instead")
+        if 'trigger' in config:
+            logger.warning(f"Plugin '{plugin_name}': 'condition' is deprecated, use 'condition' instead")
+        if 'access_level' in config:
+            logger.warning(f"Plugin '{plugin_name}': 'access_level' is deprecated and ignored")
+
+    @abstractmethod
+    async def initialize(self):
+        """
+        Initialize plugin resources (connect to services, etc.)
+
+        Called during application startup after plugin registration.
+        Raise an exception if initialization fails.
+        """
+        pass
+
+    async def cleanup(self):
+        """
+        Clean up plugin resources.
+
+        Called during application shutdown.
+        Override if your plugin needs cleanup (closing connections, etc.)
+        """
+        pass
+
+    # Access-level specific methods (implement only what you need)
+
+    async def on_transcript(self, context: PluginContext) -> Optional[PluginResult]:
+        """
+        Called when new transcript segment arrives.
+
+        Context data contains:
+            - transcript: str - The transcript text
+            - segment_id: str - Unique segment identifier
+            - conversation_id: str - Current conversation ID
+
+        For wake_word conditions, router adds:
+            - command: str - Command with wake word stripped
+            - original_transcript: str - Full transcript
+
+        Returns:
+            PluginResult with success status, optional message, and should_continue flag
+        """
+        pass
+
+    async def on_conversation_complete(self, context: PluginContext) -> Optional[PluginResult]:
+        """
+        Called when conversation processing completes.
+
+        Context data contains:
+            - conversation: dict - Full conversation data
+            - transcript: str - Complete transcript
+            - duration: float - Conversation duration
+            - conversation_id: str - Conversation identifier
+
+        Returns:
+            PluginResult with success status, optional message, and should_continue flag
+        """
+        pass
+
+    async def on_memory_processed(self, context: PluginContext) -> Optional[PluginResult]:
+        """
+        Called after memory extraction finishes.
+
+        Context data contains:
+            - memories: list - Extracted memories
+            - conversation: dict - Source conversation
+            - memory_count: int - Number of memories created
+            - conversation_id: str - Conversation identifier
+
+        Returns:
+            PluginResult with success status, optional message, and should_continue flag
+        """
+        pass
diff --git a/backends/advanced/src/advanced_omi_backend/plugins/email_summarizer/README.md b/backends/advanced/src/advanced_omi_backend/plugins/email_summarizer/README.md
new file mode 100644
index 00000000..f1a21a52
--- /dev/null
+++ b/backends/advanced/src/advanced_omi_backend/plugins/email_summarizer/README.md
@@ -0,0 +1,276 @@
+# Email Summarizer Plugin
+
+Automatically sends email summaries when conversations complete.
+
+## Features
+
+- 📧 **Automatic Email Delivery**: Sends emails when conversations finish
+- 🤖 **LLM-Powered Summaries**: Uses your configured LLM to generate intelligent summaries
+- 🎨 **Beautiful HTML Emails**: Professional-looking emails with proper formatting
+- 📱 **Plain Text Fallback**: Ensures compatibility with all email clients
+- ⚡ **Async Processing**: Non-blocking email sending
+- 🔒 **Secure SMTP**: TLS/SSL encryption support
+
+## How It Works
+
+1. User completes a conversation (via OMI device or file upload)
+2. Plugin receives `conversation.complete` event
+3. Retrieves user email from database
+4. Generates LLM summary (2-3 sentences)
+5. Formats beautiful HTML and plain text emails
+6. Sends email via configured SMTP server
+
+## Configuration Architecture
+
+Chronicle uses a clean three-file separation for plugin configuration:
+
+1. **`backends/advanced/.env`** - Secrets only (SMTP credentials, API keys)
+   - Gitignored for security
+   - Never commit to version control
+
+2. **`plugins/email_summarizer/config.yml`** - Plugin-specific settings
+   - Email content options (subject prefix, max sentences, etc.)
+   - References environment variables using `${VAR_NAME}` syntax
+   - Defaults work for most users - typically no editing needed
+
+3. **`config/plugins.yml`** - Orchestration only
+   - `enabled` flag
+   - Event subscriptions
+   - Trigger conditions
+
+This separation keeps secrets secure and configuration organized. See [`plugin-configuration.md`](../../../Docs/plugin-configuration.md) for details.
+
+## Configuration
+
+### Step 1: Get SMTP Credentials
+
+#### For Gmail (Recommended for Testing):
+
+1. **Enable 2-Factor Authentication** on your Google account
+2. Go to Google Account → Security → 2-Step Verification
+3. Scroll down to **App passwords**
+4. Generate an app password for "Mail"
+5. Copy the 16-character password (no spaces)
+
+#### For Other Providers:
+
+- **Outlook/Hotmail**: smtp.office365.com:587
+- **Yahoo**: smtp.mail.yahoo.com:587
+- **Custom SMTP**: Use your provider's settings
+
+### Step 2: Configure Environment Variables
+
+Add to `backends/advanced/.env`:
+
+```bash
+# Email Summarizer Plugin
+SMTP_HOST=smtp.gmail.com
+SMTP_PORT=587
+SMTP_USERNAME=your-email@gmail.com
+SMTP_PASSWORD=your-app-password-here  # Gmail App Password (16 chars, no spaces)
+SMTP_USE_TLS=true
+FROM_EMAIL=noreply@chronicle.ai
+FROM_NAME=Chronicle AI
+```
+
+### Step 3: Enable Plugin
+
+Add to `config/plugins.yml` (orchestration only):
+
+```yaml
+plugins:
+  email_summarizer:
+    enabled: true
+    events:
+      - conversation.complete
+    condition:
+      type: always
+```
+
+**That's it!** Plugin-specific settings are already configured in:
+- **`plugins/email_summarizer/config.yml`** - Email content options (subject prefix, max sentences, etc.)
+- **SMTP credentials** are automatically read from `.env` via environment variable references
+
+You typically don't need to edit `config.yml` - the defaults work for most users. If you want to customize email content settings, see the Configuration Options section below.
+
+### Step 4: Restart Backend
+
+```bash
+cd backends/advanced
+docker compose restart
+```
+
+## Configuration Options
+
+All configuration options below are in **`plugins/email_summarizer/config.yml`** and have sensible defaults. You typically don't need to modify these unless you want to customize email content.
+
+| Option | Type | Default | Description |
+|--------|------|---------|-------------|
+| `smtp_host` | string | `smtp.gmail.com` | SMTP server hostname |
+| `smtp_port` | integer | `587` | SMTP server port (587 for TLS, 465 for SSL) |
+| `smtp_username` | string | **Required** | SMTP authentication username |
+| `smtp_password` | string | **Required** | SMTP authentication password |
+| `smtp_use_tls` | boolean | `true` | Use STARTTLS encryption |
+| `from_email` | string | **Required** | Sender email address |
+| `from_name` | string | `Chronicle AI` | Sender display name |
+| `subject_prefix` | string | `Conversation Summary` | Email subject prefix |
+| `summary_max_sentences` | integer | `3` | Maximum sentences in LLM summary |
+| `include_conversation_id` | boolean | `true` | Show conversation ID in email |
+| `include_duration` | boolean | `true` | Show conversation duration |
+
+## Email Template
+
+### Subject Line
+```
+Conversation Summary - Jan 15, 2025 at 10:30 AM
+```
+
+### Email Body
+```
+📋 SUMMARY
+[LLM-generated 2-3 sentence summary of key points]
+
+📝 FULL TRANSCRIPT
+[Complete conversation transcript]
+
+📊 METADATA
+Duration: 5m 30s
+Conversation ID: 507f1f77bc...
+```
+
+## Testing
+
+### Test SMTP Connection
+
+```bash
+cd backends/advanced
+uv run python -m advanced_omi_backend.services.email_service
+```
+
+This will:
+- Test SMTP connectivity
+- Send a test email to your SMTP username
+- Verify configuration
+
+### Test Plugin Integration
+
+1. Start the backend with plugin enabled
+2. Upload a test audio file or use OMI device
+3. Wait for conversation to complete
+4. Check your email inbox
+
+## Troubleshooting
+
+### "Authentication failed"
+
+**For Gmail:**
+- Make sure you're using an **App Password**, not your regular password
+- Enable 2-Factor Authentication first
+- App password should be 16 characters (xxxx xxxx xxxx xxxx)
+
+**For other providers:**
+- Verify username and password are correct
+- Check if "less secure apps" needs to be enabled
+
+### "Connection timeout"
+
+- Check `smtp_host` and `smtp_port` are correct
+- Verify firewall allows outbound SMTP connections
+- Try port 465 with SSL instead of 587 with TLS
+
+### "No email received"
+
+- Check user has email configured in database
+- Look for plugin logs: `docker compose logs -f chronicle-backend | grep EmailSummarizer`
+- Verify plugin is enabled in `plugins.yml`
+- Check spam/junk folder
+
+### "Empty summary" or "LLM error"
+
+- Verify LLM service is configured and running
+- Check LLM API keys are valid
+- Plugin will fall back to truncated transcript if LLM fails
+
+## 🔒 Security Best Practices
+
+### NEVER Commit Secrets to Version Control
+
+Always use environment variable references in configuration files:
+
+```yaml
+# plugins/email_summarizer/config.yml
+smtp_password: ${SMTP_PASSWORD}  # Reference to environment variable
+```
+
+```bash
+# backends/advanced/.env (gitignored)
+SMTP_PASSWORD=xnetcqctkkfgzllh  # Actual secret stored safely
+```
+
+### How Configuration Works
+
+The plugin system automatically:
+- ✅ Loads settings from `plugins/email_summarizer/config.yml`
+- ✅ Expands `${ENV_VAR}` references from `backends/advanced/.env`
+- ✅ Merges orchestration settings (enabled, events) from `config/plugins.yml`
+- ✅ Prevents accidental secret commits (only .env has secrets, and it's gitignored)
+
+**Always use the setup wizard** instead of manual configuration:
+```bash
+uv run python backends/advanced/src/advanced_omi_backend/plugins/email_summarizer/setup.py
+```
+
+### Additional Security Tips
+
+1. **Never commit SMTP passwords** to git (use .env only)
+2. **Use environment variable references** (`${SMTP_PASSWORD}`) in YAML files
+3. **Enable TLS/SSL** for encrypted SMTP connections
+4. **Gmail App Passwords** are safer than account passwords
+5. **Rotate credentials** periodically
+6. **Review commits** before pushing to ensure no hardcoded secrets
+
+## Development
+
+### File Structure
+
+```
+plugins/email_summarizer/
+├── __init__.py           # Plugin exports
+├── plugin.py             # Main plugin logic
+├── templates.py          # Email HTML/text templates
+└── README.md             # This file
+```
+
+### Key Methods
+
+- `on_conversation_complete()` - Main event handler
+- `_get_user_email()` - Fetch user email from database
+- `_generate_summary()` - Generate LLM summary with fallback
+- `_format_subject()` - Format email subject line
+
+### Dependencies
+
+- `advanced_omi_backend.database` - MongoDB access
+- `advanced_omi_backend.llm_client` - LLM generation
+- `advanced_omi_backend.services.email_service` - SMTP email sending
+
+## Future Enhancements
+
+- [ ] Email templates customization
+- [ ] User preference for email frequency
+- [ ] Unsubscribe link
+- [ ] Email digests (daily/weekly summaries)
+- [ ] Rich formatting for action items
+- [ ] Attachment support (audio files)
+- [ ] Multiple recipient support
+- [ ] Email open tracking
+
+## Support
+
+- **Issues**: [GitHub Issues](https://github.com/chronicle-ai/chronicle/issues)
+- **Discussions**: [GitHub Discussions](https://github.com/chronicle-ai/chronicle/discussions)
+- **Documentation**: [Chronicle Docs](https://github.com/chronicle-ai/chronicle)
+
+## License
+
+MIT License - see project LICENSE file for details.
diff --git a/backends/advanced/src/advanced_omi_backend/plugins/email_summarizer/__init__.py b/backends/advanced/src/advanced_omi_backend/plugins/email_summarizer/__init__.py
new file mode 100644
index 00000000..525acd51
--- /dev/null
+++ b/backends/advanced/src/advanced_omi_backend/plugins/email_summarizer/__init__.py
@@ -0,0 +1,9 @@
+"""
+Email Summarizer Plugin for Chronicle.
+
+Automatically sends email summaries when conversations complete.
+"""
+
+from .plugin import EmailSummarizerPlugin
+
+__all__ = ['EmailSummarizerPlugin']
diff --git a/backends/advanced/src/advanced_omi_backend/plugins/email_summarizer/config.yml b/backends/advanced/src/advanced_omi_backend/plugins/email_summarizer/config.yml
new file mode 100644
index 00000000..9f4ed8f6
--- /dev/null
+++ b/backends/advanced/src/advanced_omi_backend/plugins/email_summarizer/config.yml
@@ -0,0 +1,23 @@
+# Email Summarizer Plugin Configuration
+#
+# This file contains non-secret configuration for the email summarizer plugin.
+# Secrets (SMTP credentials) are stored in backends/advanced/.env
+# Plugin orchestration (enabled, events) is in config/plugins.yml
+
+# Email content settings
+subject_prefix: "Conversation Summary"
+summary_max_sentences: 3
+include_conversation_id: true
+include_duration: true
+
+# SMTP Configuration (reads from .env)
+# These use environment variable references ${VAR_NAME}
+smtp_host: ${SMTP_HOST}
+smtp_port: ${SMTP_PORT:-587}
+smtp_username: ${SMTP_USERNAME}
+smtp_password: ${SMTP_PASSWORD}
+smtp_use_tls: ${SMTP_USE_TLS:-true}
+
+# Email sender configuration
+from_email: ${FROM_EMAIL}
+from_name: ${FROM_NAME:-Chronicle AI}
diff --git a/backends/advanced/src/advanced_omi_backend/plugins/email_summarizer/email_service.py b/backends/advanced/src/advanced_omi_backend/plugins/email_summarizer/email_service.py
new file mode 100644
index 00000000..b51de0b5
--- /dev/null
+++ b/backends/advanced/src/advanced_omi_backend/plugins/email_summarizer/email_service.py
@@ -0,0 +1,237 @@
+"""
+SMTP Email Service for Chronicle.
+
+Provides email sending functionality via SMTP protocol with support for:
+- HTML and plain text emails
+- TLS/SSL encryption
+- Gmail and other SMTP providers
+- Async implementation
+"""
+import asyncio
+import logging
+import smtplib
+from email.mime.multipart import MIMEMultipart
+from email.mime.text import MIMEText
+from typing import Any, Dict, Optional
+
+from advanced_omi_backend.utils.logging_utils import mask_dict
+
+logger = logging.getLogger(__name__)
+
+
+class SMTPEmailService:
+    """SMTP email service for sending emails via SMTP protocol."""
+
+    def __init__(self, config: Dict[str, Any]):
+        """
+        Initialize SMTP email service with configuration.
+
+        Args:
+            config: SMTP configuration containing:
+                - smtp_host: SMTP server hostname
+                - smtp_port: SMTP server port (default: 587)
+                - smtp_username: SMTP username
+                - smtp_password: SMTP password
+                - smtp_use_tls: Whether to use TLS (default: True)
+                - from_email: Sender email address
+                - from_name: Sender display name (default: 'Chronicle AI')
+        """
+        self.host = config.get('smtp_host')
+        self.port = config.get('smtp_port', 587)
+        self.username = config.get('smtp_username')
+        self.password = config.get('smtp_password')
+        self.use_tls = config.get('smtp_use_tls', True)
+        self.from_email = config.get('from_email')
+        self.from_name = config.get('from_name', 'Chronicle AI')
+
+        # Validate required configuration
+        if not all([self.host, self.username, self.password, self.from_email]):
+            raise ValueError(
+                "SMTP configuration incomplete. Required: smtp_host, smtp_username, "
+                "smtp_password, from_email"
+            )
+
+        # Log configuration with masked secrets
+        masked_config = mask_dict(config)
+        logger.info(
+            f"SMTP Email Service initialized: {self.username}@{self.host}:{self.port} "
+            f"(TLS: {self.use_tls})"
+        )
+        logger.debug(f"SMTP config: {masked_config}")
+
+    async def send_email(
+        self,
+        to_email: str,
+        subject: str,
+        body_text: str,
+        body_html: Optional[str] = None
+    ) -> bool:
+        """
+        Send email via SMTP with HTML/text support.
+
+        Args:
+            to_email: Recipient email address
+            subject: Email subject line
+            body_text: Plain text email body
+            body_html: Optional HTML email body
+
+        Returns:
+            True if email sent successfully, False otherwise
+        """
+        try:
+            # Create message container
+            msg = MIMEMultipart('alternative')
+            msg['Subject'] = subject
+            msg['From'] = f"{self.from_name} <{self.from_email}>"
+            msg['To'] = to_email
+
+            # Attach plain text version
+            text_part = MIMEText(body_text, 'plain')
+            msg.attach(text_part)
+
+            # Attach HTML version if provided
+            if body_html:
+                html_part = MIMEText(body_html, 'html')
+                msg.attach(html_part)
+
+            # Send email asynchronously (run in thread pool to avoid blocking)
+            await asyncio.to_thread(self._send_smtp, msg, to_email)
+
+            logger.info(f"✅ Email sent successfully to {to_email}: {subject}")
+            return True
+
+        except Exception as e:
+            logger.error(f"Failed to send email to {to_email}: {e}", exc_info=True)
+            return False
+
+    def _send_smtp(self, msg: MIMEMultipart, to_email: str) -> None:
+        """
+        Internal method to send email via SMTP (blocking).
+
+        Args:
+            msg: MIME message to send
+            to_email: Recipient email address
+
+        Raises:
+            Exception: If SMTP sending fails
+        """
+        # Connect to SMTP server
+        if self.use_tls:
+            # Use STARTTLS (most common for port 587)
+            smtp_server = smtplib.SMTP(self.host, self.port, timeout=30)
+            smtp_server.ehlo()
+            smtp_server.starttls()
+            smtp_server.ehlo()
+        else:
+            # Direct connection (for port 465 SSL or no encryption)
+            smtp_server = smtplib.SMTP(self.host, self.port, timeout=30)
+
+        try:
+            # Login and send
+            smtp_server.login(self.username, self.password)
+            smtp_server.send_message(msg)
+            logger.debug(f"SMTP send completed for {to_email}")
+        finally:
+            smtp_server.quit()
+
+    async def test_connection(self) -> bool:
+        """
+        Test SMTP connectivity and authentication.
+
+        Returns:
+            True if connection successful, False otherwise
+        """
+        try:
+            await asyncio.to_thread(self._test_smtp_connection)
+            logger.info(f"✅ SMTP connection test successful: {self.username}@{self.host}")
+            return True
+        except Exception as e:
+            logger.error(f"SMTP connection test failed: {e}", exc_info=True)
+            return False
+
+    def _test_smtp_connection(self) -> None:
+        """
+        Internal method to test SMTP connection (blocking).
+
+        Raises:
+            Exception: If connection fails
+        """
+        try:
+            if self.use_tls:
+                smtp_server = smtplib.SMTP(self.host, self.port, timeout=10)
+                smtp_server.ehlo()
+                smtp_server.starttls()
+                smtp_server.ehlo()
+            else:
+                smtp_server = smtplib.SMTP(self.host, self.port, timeout=10)
+
+            try:
+                smtp_server.login(self.username, self.password)
+                logger.debug("SMTP authentication successful")
+            finally:
+                smtp_server.quit()
+        except smtplib.SMTPAuthenticationError as e:
+            # Note: Error message from smtplib should not contain password, but be cautious
+            raise Exception(f"SMTP Authentication failed for {self.username}. Check credentials. For Gmail, use an App Password instead of your regular password. Error: {str(e)}")
+        except smtplib.SMTPConnectError as e:
+            raise Exception(f"Failed to connect to SMTP server {self.host}:{self.port}. Check host and port. Error: {str(e)}")
+        except smtplib.SMTPServerDisconnected as e:
+            raise Exception(f"SMTP server disconnected unexpectedly. Check TLS settings (port 587 needs TLS, port 465 needs SSL). Error: {str(e)}")
+        except TimeoutError as e:
+            raise Exception(f"Connection to {self.host}:{self.port} timed out. Check firewall/network settings. Error: {str(e)}")
+        except Exception as e:
+            raise Exception(f"SMTP connection test failed: {type(e).__name__}: {str(e)}")
+
+
+# Test script for development/debugging
+async def main():
+    """Test the SMTP email service."""
+    import os
+
+    from dotenv import load_dotenv
+
+    load_dotenv()
+
+    config = {
+        'smtp_host': os.getenv('SMTP_HOST', 'smtp.gmail.com'),
+        'smtp_port': int(os.getenv('SMTP_PORT', 587)),
+        'smtp_username': os.getenv('SMTP_USERNAME'),
+        'smtp_password': os.getenv('SMTP_PASSWORD'),
+        'smtp_use_tls': os.getenv('SMTP_USE_TLS', 'true').lower() == 'true',
+        'from_email': os.getenv('FROM_EMAIL', 'noreply@chronicle.ai'),
+        'from_name': os.getenv('FROM_NAME', 'Chronicle AI'),
+    }
+
+    try:
+        service = SMTPEmailService(config)
+
+        # Test connection
+        print("Testing SMTP connection...")
+        if await service.test_connection():
+            print("✅ Connection test passed")
+        else:
+            print("❌ Connection test failed")
+            return
+
+        # Send test email
+        test_email = config['smtp_username']  # Send to self
+        print(f"\nSending test email to {test_email}...")
+
+        success = await service.send_email(
+            to_email=test_email,
+            subject="Chronicle Email Service Test",
+            body_text="This is a test email from Chronicle Email Service.\n\nIf you received this, the email service is working correctly!",
+            body_html="<h2>Chronicle Email Service Test</h2><p>This is a test email from Chronicle Email Service.</p><p>If you received this, the email service is working correctly!</p>"
+        )
+
+        if success:
+            print("✅ Test email sent successfully")
+        else:
+            print("❌ Failed to send test email")
+
+    except Exception as e:
+        print(f"❌ Error: {e}")
+
+
+if __name__ == '__main__':
+    asyncio.run(main())
diff --git a/backends/advanced/src/advanced_omi_backend/plugins/email_summarizer/plugin.py b/backends/advanced/src/advanced_omi_backend/plugins/email_summarizer/plugin.py
new file mode 100644
index 00000000..a61a915d
--- /dev/null
+++ b/backends/advanced/src/advanced_omi_backend/plugins/email_summarizer/plugin.py
@@ -0,0 +1,399 @@
+"""
+Email Summarizer Plugin for Chronicle.
+
+Automatically sends email summaries when conversations complete.
+"""
+import logging
+from datetime import datetime
+from typing import Any, Dict, List, Optional
+
+from advanced_omi_backend.database import get_database
+from advanced_omi_backend.llm_client import async_generate
+from advanced_omi_backend.utils.logging_utils import mask_dict
+
+from ..base import BasePlugin, PluginContext, PluginResult
+from .email_service import SMTPEmailService
+from .templates import format_html_email, format_text_email
+
+logger = logging.getLogger(__name__)
+
+
+class EmailSummarizerPlugin(BasePlugin):
+    """
+    Plugin for sending email summaries when conversations complete.
+
+    Subscribes to conversation.complete events and:
+    1. Retrieves user email from database
+    2. Generates LLM summary of the conversation
+    3. Formats HTML and plain text emails
+    4. Sends email via SMTP
+
+    Configuration (config/plugins.yml):
+        enabled: true
+        events:
+          - conversation.complete
+        condition:
+          type: always
+        smtp_host: smtp.gmail.com
+        smtp_port: 587
+        smtp_username: ${SMTP_USERNAME}
+        smtp_password: ${SMTP_PASSWORD}
+        smtp_use_tls: true
+        from_email: noreply@chronicle.ai
+        from_name: Chronicle AI
+        subject_prefix: "Conversation Summary"
+        summary_max_sentences: 3
+    """
+
+    SUPPORTED_ACCESS_LEVELS: List[str] = ['conversation']
+
+    name = "Email Summarizer"
+    description = "Sends email summaries when conversations complete"
+
+    def __init__(self, config: Dict[str, Any]):
+        """
+        Initialize Email Summarizer plugin.
+
+        Args:
+            config: Plugin configuration from config/plugins.yml
+        """
+        super().__init__(config)
+
+        self.subject_prefix = config.get('subject_prefix', 'Conversation Summary')
+        self.summary_max_sentences = config.get('summary_max_sentences', 3)
+        self.include_conversation_id = config.get('include_conversation_id', True)
+        self.include_duration = config.get('include_duration', True)
+
+        # Email service will be initialized in initialize()
+        self.email_service: Optional[SMTPEmailService] = None
+
+        # MongoDB database handle
+        self.db = None
+
+    async def initialize(self):
+        """
+        Initialize plugin resources.
+
+        Sets up SMTP email service and MongoDB connection.
+
+        Raises:
+            ValueError: If SMTP configuration is incomplete
+            Exception: If email service initialization fails
+        """
+        if not self.enabled:
+            logger.info("Email Summarizer plugin is disabled, skipping initialization")
+            return
+
+        logger.info("Initializing Email Summarizer plugin...")
+
+        # Initialize SMTP email service
+        try:
+            smtp_config = {
+                'smtp_host': self.config.get('smtp_host'),
+                'smtp_port': self.config.get('smtp_port', 587),
+                'smtp_username': self.config.get('smtp_username'),
+                'smtp_password': self.config.get('smtp_password'),
+                'smtp_use_tls': self.config.get('smtp_use_tls', True),
+                'from_email': self.config.get('from_email'),
+                'from_name': self.config.get('from_name', 'Chronicle AI'),
+            }
+
+            self.email_service = SMTPEmailService(smtp_config)
+
+            # Test SMTP connection
+            logger.info("Testing SMTP connectivity...")
+            if await self.email_service.test_connection():
+                logger.info("✅ SMTP connection test successful")
+            else:
+                raise Exception("SMTP connection test failed")
+
+        except Exception as e:
+            logger.error(f"Failed to initialize email service: {e}")
+            raise
+
+        # Get MongoDB database handle
+        self.db = get_database()
+        logger.info("✅ Email Summarizer plugin initialized successfully")
+
+    async def cleanup(self):
+        """Clean up plugin resources."""
+        logger.info("Email Summarizer plugin cleanup complete")
+
+    async def on_conversation_complete(self, context: PluginContext) -> Optional[PluginResult]:
+        """
+        Send email summary when conversation completes.
+
+        Args:
+            context: Plugin context with conversation data
+                - conversation: dict - Full conversation data
+                - transcript: str - Complete transcript
+                - duration: float - Conversation duration
+                - conversation_id: str - Conversation identifier
+
+        Returns:
+            PluginResult with success status and message
+        """
+        try:
+            logger.info(f"Processing conversation complete event for user: {context.user_id}")
+
+            # Extract conversation data
+            conversation = context.data.get('conversation', {})
+            transcript = context.data.get('transcript', '')
+            duration = context.data.get('duration', 0)
+            conversation_id = context.data.get('conversation_id', 'unknown')
+            created_at = conversation.get('created_at')
+
+            # Validate transcript exists
+            if not transcript or transcript.strip() == '':
+                logger.warning(f"Empty transcript for conversation {conversation_id}, skipping email")
+                return PluginResult(
+                    success=False,
+                    message="Skipped: Empty transcript"
+                )
+
+            # Get user email from database
+            user_email = await self._get_user_email(context.user_id)
+            if not user_email:
+                logger.warning(f"No email found for user {context.user_id}, cannot send summary")
+                return PluginResult(
+                    success=False,
+                    message=f"No email configured for user {context.user_id}"
+                )
+
+            # Generate LLM summary
+            summary = await self._generate_summary(transcript)
+
+            # Format email subject and body
+            subject = self._format_subject(created_at)
+            body_html = format_html_email(
+                summary=summary,
+                transcript=transcript,
+                conversation_id=conversation_id,
+                duration=duration,
+                created_at=created_at
+            )
+            body_text = format_text_email(
+                summary=summary,
+                transcript=transcript,
+                conversation_id=conversation_id,
+                duration=duration,
+                created_at=created_at
+            )
+
+            # Send email
+            success = await self.email_service.send_email(
+                to_email=user_email,
+                subject=subject,
+                body_text=body_text,
+                body_html=body_html
+            )
+
+            if success:
+                logger.info(f"✅ Email summary sent to {user_email} for conversation {conversation_id}")
+                return PluginResult(
+                    success=True,
+                    message=f"Email sent to {user_email}",
+                    data={'recipient': user_email, 'conversation_id': conversation_id}
+                )
+            else:
+                logger.error(f"Failed to send email to {user_email}")
+                return PluginResult(
+                    success=False,
+                    message=f"Failed to send email to {user_email}"
+                )
+
+        except Exception as e:
+            logger.error(f"Error in email summarizer plugin: {e}", exc_info=True)
+            return PluginResult(
+                success=False,
+                message=f"Error: {str(e)}"
+            )
+
+    async def _get_user_email(self, user_id: str) -> Optional[str]:
+        """
+        Get notification email from user.
+
+        Args:
+            user_id: User identifier (MongoDB ObjectId)
+
+        Returns:
+            User's notification_email, or None if not set
+        """
+        try:
+            from bson import ObjectId
+
+            # Query users collection
+            user = await self.db['users'].find_one({'_id': ObjectId(user_id)})
+
+            if not user:
+                logger.warning(f"User {user_id} not found")
+                return None
+
+            notification_email = user.get('notification_email')
+
+            if not notification_email:
+                logger.warning(f"User {user_id} has no notification_email set")
+                return None
+
+            logger.debug(f"Sending notification to {notification_email} for user {user_id}")
+            return notification_email
+
+        except Exception as e:
+            logger.error(f"Error fetching user email: {e}", exc_info=True)
+            return None
+
+    async def _generate_summary(self, transcript: str) -> str:
+        """
+        Generate LLM summary of the conversation.
+
+        Args:
+            transcript: Full conversation transcript
+
+        Returns:
+            Generated summary (2-3 sentences)
+        """
+        try:
+            prompt = (
+                f"Summarize this conversation in {self.summary_max_sentences} sentences or less. "
+                f"Focus on key points, main topics discussed, and any action items or decisions. "
+                f"Be concise and clear.\n\n"
+                f"Conversation:\n{transcript}"
+            )
+
+            logger.debug("Generating LLM summary...")
+            summary = await async_generate(prompt)
+
+            if not summary or summary.strip() == '':
+                raise ValueError("LLM returned empty summary")
+
+            logger.info("✅ LLM summary generated successfully")
+            return summary.strip()
+
+        except Exception as e:
+            logger.error(f"Failed to generate LLM summary: {e}", exc_info=True)
+            # Fallback: return first 300 characters of transcript
+            logger.warning("Using fallback: truncated transcript")
+            return transcript[:300] + "..." if len(transcript) > 300 else transcript
+
+    def _format_subject(self, created_at: Optional[datetime] = None) -> str:
+        """
+        Format email subject line.
+
+        Args:
+            created_at: Conversation creation timestamp
+
+        Returns:
+            Formatted subject line
+        """
+        if created_at:
+            date_str = created_at.strftime("%b %d, %Y at %I:%M %p")
+            return f"{self.subject_prefix} - {date_str}"
+        else:
+            return self.subject_prefix
+
+    @staticmethod
+    async def test_connection(config: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Test SMTP connection with provided configuration.
+
+        This static method tests the SMTP connection without fully initializing the plugin.
+        Used by the form-based configuration UI to validate settings before saving.
+
+        Args:
+            config: Configuration dictionary with SMTP settings
+
+        Returns:
+            Dict with success status, message, and optional details
+
+        Example:
+            >>> result = await EmailSummarizerPlugin.test_connection({
+            ...     'smtp_host': 'smtp.gmail.com',
+            ...     'smtp_port': 587,
+            ...     'smtp_username': 'user@gmail.com',
+            ...     'smtp_password': 'password',
+            ...     'smtp_use_tls': True,
+            ...     'from_email': 'noreply@example.com',
+            ...     'from_name': 'Test'
+            ... })
+            >>> result['success']
+            True
+        """
+        import time
+
+        try:
+            # Validate required config fields
+            required_fields = ['smtp_host', 'smtp_username', 'smtp_password', 'from_email']
+            missing_fields = [field for field in required_fields if not config.get(field)]
+
+            if missing_fields:
+                return {
+                    "success": False,
+                    "message": f"Missing required fields: {', '.join(missing_fields)}",
+                    "status": "error"
+                }
+
+            # Build SMTP config
+            smtp_config = {
+                'smtp_host': config.get('smtp_host'),
+                'smtp_port': config.get('smtp_port', 587),
+                'smtp_username': config.get('smtp_username'),
+                'smtp_password': config.get('smtp_password'),
+                'smtp_use_tls': config.get('smtp_use_tls', True),
+                'from_email': config.get('from_email'),
+                'from_name': config.get('from_name', 'Chronicle AI'),
+            }
+
+            # Log config with masked secrets for debugging
+            logger.debug(f"SMTP config for testing: {mask_dict(smtp_config)}")
+
+            # Create temporary email service instance
+            email_service = SMTPEmailService(smtp_config)
+
+            # Test connection
+            logger.info(f"Testing SMTP connection to {smtp_config['smtp_host']}...")
+            start_time = time.time()
+
+            connection_success = await email_service.test_connection()
+            connection_time_ms = int((time.time() - start_time) * 1000)
+
+            if connection_success:
+                return {
+                    "success": True,
+                    "message": f"Successfully connected to SMTP server at {smtp_config['smtp_host']}",
+                    "status": "success",
+                    "details": {
+                        "smtp_host": smtp_config['smtp_host'],
+                        "smtp_port": smtp_config['smtp_port'],
+                        "connection_time_ms": connection_time_ms,
+                        "use_tls": smtp_config['smtp_use_tls']
+                    }
+                }
+            else:
+                return {
+                    "success": False,
+                    "message": "SMTP connection test failed",
+                    "status": "error"
+                }
+
+        except Exception as e:
+            logger.error(f"SMTP connection test failed: {e}", exc_info=True)
+            error_msg = str(e)
+            
+            # Provide helpful hints based on error type
+            hints = []
+            if "Authentication" in error_msg or "535" in error_msg:
+                hints.append("For Gmail: Enable 2FA and create an App Password at https://myaccount.google.com/apppasswords")
+                hints.append("Verify your username and password are correct")
+            elif "Connection" in error_msg or "timeout" in error_msg.lower():
+                hints.append("Check your SMTP host and port settings")
+                hints.append("Verify firewall/network allows outbound SMTP connections")
+            elif "TLS" in error_msg or "SSL" in error_msg:
+                hints.append("For port 587: Enable TLS")
+                hints.append("For port 465: Disable TLS (uses implicit SSL)")
+            
+            return {
+                "success": False,
+                "message": f"Connection test failed: {error_msg}",
+                "status": "error",
+                "hints": hints
+            }
diff --git a/backends/advanced/src/advanced_omi_backend/plugins/email_summarizer/setup.py b/backends/advanced/src/advanced_omi_backend/plugins/email_summarizer/setup.py
new file mode 100755
index 00000000..728ae607
--- /dev/null
+++ b/backends/advanced/src/advanced_omi_backend/plugins/email_summarizer/setup.py
@@ -0,0 +1,200 @@
+#!/usr/bin/env python3
+"""
+Email Summarizer Plugin Setup Wizard
+
+Configures SMTP credentials and plugin settings.
+Follows Chronicle's clean configuration architecture:
+- Secrets → backends/advanced/.env
+- Non-secret settings → plugins/email_summarizer/config.yml
+- Orchestration → config/plugins.yml
+"""
+
+import shutil
+import sys
+from datetime import datetime
+from pathlib import Path
+
+import yaml
+from dotenv import set_key
+from rich.console import Console
+from rich.prompt import Confirm
+
+# Add repo root to path for setup_utils import
+project_root = Path(__file__).resolve().parents[6]
+sys.path.insert(0, str(project_root))
+
+from setup_utils import (
+    prompt_with_existing_masked,
+    prompt_value
+)
+
+console = Console()
+
+
+def update_plugins_yml_orchestration():
+    """
+    Update config/plugins.yml with orchestration settings only.
+    Plugin-specific settings are in plugins/email_summarizer/config.yml.
+    This follows Chronicle's three-file configuration architecture.
+    """
+    plugins_yml_path = project_root / "config" / "plugins.yml"
+
+    # Load existing or create from template
+    if plugins_yml_path.exists():
+        with open(plugins_yml_path, 'r') as f:
+            config = yaml.safe_load(f) or {}
+    else:
+        # Copy from template
+        template_path = project_root / "config" / "plugins.yml.template"
+        if template_path.exists():
+            with open(template_path, 'r') as f:
+                config = yaml.safe_load(f) or {}
+        else:
+            config = {'plugins': {}}
+
+    # Ensure structure exists
+    if 'plugins' not in config:
+        config['plugins'] = {}
+
+    # Only orchestration settings in config/plugins.yml
+    # Plugin-specific settings are in plugins/email_summarizer/config.yml
+    plugin_config = {
+        'enabled': False,  # Let user enable manually or prompt
+        'events': ['conversation.complete'],
+        'condition': {'type': 'always'}
+    }
+
+    # Update or create plugin entry
+    config['plugins']['email_summarizer'] = plugin_config
+
+    # Backup existing file
+    if plugins_yml_path.exists():
+        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+        backup_path = plugins_yml_path.parent / f"plugins.yml.backup.{timestamp}"
+        shutil.copy(plugins_yml_path, backup_path)
+        console.print(f"[dim]Backed up existing plugins.yml to {backup_path.name}[/dim]")
+
+    # Write updated config
+    plugins_yml_path.parent.mkdir(parents=True, exist_ok=True)
+    with open(plugins_yml_path, 'w') as f:
+        yaml.dump(config, f, default_flow_style=False, sort_keys=False)
+
+    console.print("[green]✅ Updated config/plugins.yml (orchestration only)[/green]")
+
+    return plugins_yml_path
+
+
+def main():
+    """Interactive setup for Email Summarizer plugin"""
+    console.print("\n📧 [bold cyan]Email Summarizer Plugin Setup[/bold cyan]")
+    console.print("This plugin sends email summaries when conversations complete.\n")
+
+    # Path to main backend .env file
+    env_path = str(project_root / "backends" / "advanced" / ".env")
+
+    # SMTP Configuration
+    console.print("[bold]SMTP Configuration[/bold]")
+    console.print("[dim]For Gmail: Use App Password (Settings > Security > 2FA > App Passwords)[/dim]\n")
+
+    smtp_host = prompt_with_existing_masked(
+        prompt_text="SMTP Host",
+        env_file_path=env_path,
+        env_key="SMTP_HOST",
+        placeholders=['your-smtp-host-here'],
+        is_password=False,
+        default="smtp.gmail.com"
+    )
+
+    smtp_port = prompt_value("SMTP Port", default="587")
+
+    smtp_username = prompt_with_existing_masked(
+        prompt_text="SMTP Username (your email)",
+        env_file_path=env_path,
+        env_key="SMTP_USERNAME",
+        placeholders=['your-email@example.com'],
+        is_password=False
+    )
+
+    smtp_password = prompt_with_existing_masked(
+        prompt_text="SMTP Password (App Password)",
+        env_file_path=env_path,
+        env_key="SMTP_PASSWORD",
+        placeholders=['your-password-here', 'your-app-password-here'],
+        is_password=True  # Shows masked existing value
+    )
+
+    # Remove spaces from app password (Google adds spaces when copying)
+    smtp_password = smtp_password.replace(" ", "")
+
+    smtp_use_tls = prompt_value("Use TLS? (true/false)", default="true")
+
+    # Email sender configuration
+    from_email = prompt_with_existing_masked(
+        prompt_text="From Email",
+        env_file_path=env_path,
+        env_key="FROM_EMAIL",
+        placeholders=['noreply@example.com'],
+        is_password=False,
+        default=smtp_username  # Default to SMTP username
+    )
+
+    from_name = prompt_value("From Name", default="Chronicle AI")
+
+    # Save secrets to .env
+    console.print("\n💾 [bold]Saving credentials to .env...[/bold]")
+
+    set_key(env_path, "SMTP_HOST", smtp_host)
+    set_key(env_path, "SMTP_PORT", smtp_port)
+    set_key(env_path, "SMTP_USERNAME", smtp_username)
+    set_key(env_path, "SMTP_PASSWORD", smtp_password)
+    set_key(env_path, "SMTP_USE_TLS", smtp_use_tls)
+    set_key(env_path, "FROM_EMAIL", from_email)
+    set_key(env_path, "FROM_NAME", from_name)
+
+    console.print("[green]✅ SMTP credentials saved to backends/advanced/.env[/green]")
+
+    # Auto-update plugins.yml with orchestration settings only
+    console.print("\n📝 [bold]Updating plugin configuration...[/bold]")
+    plugins_yml_path = update_plugins_yml_orchestration()
+
+    # Prompt to enable plugin
+    enable_now = Confirm.ask("\nEnable email_summarizer plugin now?", default=True)
+    if enable_now:
+        with open(plugins_yml_path, 'r') as f:
+            config = yaml.safe_load(f)
+        config['plugins']['email_summarizer']['enabled'] = True
+        with open(plugins_yml_path, 'w') as f:
+            yaml.dump(config, f, default_flow_style=False, sort_keys=False)
+        console.print("[green]✅ Plugin enabled in config/plugins.yml[/green]")
+
+    console.print("\n[bold cyan]✅ Email Summarizer configured successfully![/bold cyan]")
+    console.print("\n[bold]Configuration saved to:[/bold]")
+    console.print("  • [green]backends/advanced/.env[/green] - SMTP credentials (secrets)")
+    console.print("  • [green]config/plugins.yml[/green] - Plugin orchestration (enabled, events)")
+    console.print("  • [green]plugins/email_summarizer/config.yml[/green] - Plugin settings (already configured)")
+    console.print()
+
+    if not enable_now:
+        console.print("[bold]To enable later:[/bold]")
+        console.print("  Edit config/plugins.yml and set: enabled: true")
+        console.print()
+
+    console.print("[bold]Restart backend to apply:[/bold]")
+    console.print("  [dim]cd backends/advanced && docker compose restart[/dim]")
+    console.print()
+    console.print("[yellow]⚠️  SECURITY: Never commit secrets to git![/yellow]")
+    console.print("[yellow]    • Secrets go in backends/advanced/.env (gitignored)[/yellow]")
+    console.print("[yellow]    • Config files use ${ENV_VAR} references only[/yellow]")
+
+
+if __name__ == '__main__':
+    try:
+        main()
+    except KeyboardInterrupt:
+        console.print("\n[yellow]Setup cancelled by user[/yellow]")
+        sys.exit(1)
+    except Exception as e:
+        console.print(f"\n[red]Error during setup: {e}[/red]")
+        import traceback
+        traceback.print_exc()
+        sys.exit(1)
diff --git a/backends/advanced/src/advanced_omi_backend/plugins/email_summarizer/templates.py b/backends/advanced/src/advanced_omi_backend/plugins/email_summarizer/templates.py
new file mode 100644
index 00000000..9f99e5cb
--- /dev/null
+++ b/backends/advanced/src/advanced_omi_backend/plugins/email_summarizer/templates.py
@@ -0,0 +1,258 @@
+"""
+Email templates for the Email Summarizer plugin.
+
+Provides HTML and plain text email templates.
+"""
+import html
+from datetime import datetime
+from typing import Optional
+
+
+def format_duration(seconds: float) -> str:
+    """
+    Format duration in seconds to human-readable format.
+
+    Args:
+        seconds: Duration in seconds
+
+    Returns:
+        Formatted duration (e.g., "5m 30s", "1h 15m")
+    """
+    hours = int(seconds // 3600)
+    minutes = int((seconds % 3600) // 60)
+    secs = int(seconds % 60)
+
+    if hours > 0:
+        return f"{hours}h {minutes}m"
+    elif minutes > 0:
+        return f"{minutes}m {secs}s"
+    else:
+        return f"{secs}s"
+
+
+def format_html_email(
+    summary: str,
+    transcript: str,
+    conversation_id: str,
+    duration: float,
+    created_at: Optional[datetime] = None
+) -> str:
+    """
+    Format HTML email template.
+
+    Args:
+        summary: LLM-generated summary
+        transcript: Full conversation transcript
+        conversation_id: Conversation identifier
+        duration: Conversation duration in seconds
+        created_at: Conversation creation timestamp
+
+    Returns:
+        HTML email body
+    """
+    formatted_duration = format_duration(duration)
+    date_str = created_at.strftime("%B %d, %Y at %I:%M %p") if created_at else "N/A"
+
+    # Escape HTML to prevent XSS attacks
+    summary_escaped = html.escape(summary, quote=True)
+    transcript_escaped = html.escape(transcript, quote=True)
+
+    # Format transcript with line breaks (after escaping)
+    transcript_html = transcript_escaped.replace('\n', '<br>')
+
+    return f"""
+<!DOCTYPE html>
+<html>
+<head>
+    <meta charset="UTF-8">
+    <style>
+        body {{
+            font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Helvetica, Arial, sans-serif;
+            line-height: 1.6;
+            color: #333;
+            max-width: 800px;
+            margin: 0 auto;
+            padding: 20px;
+        }}
+        .header {{
+            background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+            color: white;
+            padding: 30px;
+            border-radius: 8px 8px 0 0;
+            margin-bottom: 0;
+        }}
+        .header h1 {{
+            margin: 0;
+            font-size: 24px;
+        }}
+        .header .date {{
+            margin-top: 5px;
+            opacity: 0.9;
+            font-size: 14px;
+        }}
+        .content {{
+            background: #f9f9f9;
+            padding: 30px;
+            border: 1px solid #e0e0e0;
+            border-top: none;
+        }}
+        .summary {{
+            background: white;
+            padding: 20px;
+            border-radius: 6px;
+            margin-bottom: 25px;
+            border-left: 4px solid #667eea;
+        }}
+        .summary h2 {{
+            margin-top: 0;
+            color: #667eea;
+            font-size: 18px;
+        }}
+        .summary p {{
+            margin: 10px 0 0 0;
+            line-height: 1.8;
+        }}
+        .transcript {{
+            background: white;
+            padding: 20px;
+            border-radius: 6px;
+            margin-bottom: 20px;
+        }}
+        .transcript h2 {{
+            margin-top: 0;
+            color: #555;
+            font-size: 18px;
+            border-bottom: 2px solid #e0e0e0;
+            padding-bottom: 10px;
+        }}
+        .transcript-content {{
+            font-family: 'Courier New', monospace;
+            font-size: 13px;
+            line-height: 1.8;
+            color: #444;
+            white-space: pre-wrap;
+            word-wrap: break-word;
+        }}
+        .metadata {{
+            background: white;
+            padding: 15px 20px;
+            border-radius: 6px;
+            display: flex;
+            justify-content: space-between;
+            font-size: 13px;
+            color: #666;
+        }}
+        .metadata-item {{
+            display: flex;
+            flex-direction: column;
+        }}
+        .metadata-label {{
+            font-weight: bold;
+            margin-bottom: 3px;
+        }}
+        .footer {{
+            text-align: center;
+            margin-top: 30px;
+            padding-top: 20px;
+            border-top: 1px solid #e0e0e0;
+            color: #888;
+            font-size: 12px;
+        }}
+        .footer a {{
+            color: #667eea;
+            text-decoration: none;
+        }}
+    </style>
+</head>
+<body>
+    <div class="header">
+        <h1>🎙️ Conversation Summary</h1>
+        <div class="date">{date_str}</div>
+    </div>
+
+    <div class="content">
+        <div class="summary">
+            <h2>📋 Summary</h2>
+            <p>{summary_escaped}</p>
+        </div>
+
+        <div class="transcript">
+            <h2>📝 Full Transcript</h2>
+            <div class="transcript-content">{transcript_html}</div>
+        </div>
+
+        <div class="metadata">
+            <div class="metadata-item">
+                <span class="metadata-label">Duration</span>
+                <span>{formatted_duration}</span>
+            </div>
+            <div class="metadata-item">
+                <span class="metadata-label">Conversation ID</span>
+                <span>{conversation_id[:12]}...</span>
+            </div>
+        </div>
+    </div>
+
+    <div class="footer">
+        <p>
+            Sent by <a href="https://github.com/chronicle-ai/chronicle">Chronicle AI</a><br>
+            Your personal AI memory system
+        </p>
+    </div>
+</body>
+</html>
+"""
+
+
+def format_text_email(
+    summary: str,
+    transcript: str,
+    conversation_id: str,
+    duration: float,
+    created_at: Optional[datetime] = None
+) -> str:
+    """
+    Format plain text email template.
+
+    Args:
+        summary: LLM-generated summary
+        transcript: Full conversation transcript
+        conversation_id: Conversation identifier
+        duration: Conversation duration in seconds
+        created_at: Conversation creation timestamp
+
+    Returns:
+        Plain text email body
+    """
+    formatted_duration = format_duration(duration)
+    date_str = created_at.strftime("%B %d, %Y at %I:%M %p") if created_at else "N/A"
+
+    return f"""
+🎙️ CONVERSATION SUMMARY
+{date_str}
+
+═══════════════════════════════════════════════════════════
+
+📋 SUMMARY
+
+{summary}
+
+───────────────────────────────────────────────────────────
+
+📝 FULL TRANSCRIPT
+
+{transcript}
+
+═══════════════════════════════════════════════════════════
+
+📊 METADATA
+
+Duration: {formatted_duration}
+Conversation ID: {conversation_id}
+
+───────────────────────────────────────────────────────────
+
+Sent by Chronicle AI
+Your personal AI memory system
+https://github.com/chronicle-ai/chronicle
+"""
diff --git a/backends/advanced/src/advanced_omi_backend/plugins/homeassistant/__init__.py b/backends/advanced/src/advanced_omi_backend/plugins/homeassistant/__init__.py
new file mode 100644
index 00000000..11b831e9
--- /dev/null
+++ b/backends/advanced/src/advanced_omi_backend/plugins/homeassistant/__init__.py
@@ -0,0 +1,9 @@
+"""
+Home Assistant plugin for Chronicle.
+
+Allows control of Home Assistant devices via natural language wake word commands.
+"""
+
+from .plugin import HomeAssistantPlugin
+
+__all__ = ['HomeAssistantPlugin']
diff --git a/backends/advanced/src/advanced_omi_backend/plugins/homeassistant/command_parser.py b/backends/advanced/src/advanced_omi_backend/plugins/homeassistant/command_parser.py
new file mode 100644
index 00000000..cc73626d
--- /dev/null
+++ b/backends/advanced/src/advanced_omi_backend/plugins/homeassistant/command_parser.py
@@ -0,0 +1,97 @@
+"""
+LLM-based command parser for Home Assistant integration.
+
+This module provides structured command parsing using LLM to extract
+intent, target entities/areas, and parameters from natural language.
+"""
+
+from dataclasses import dataclass, field
+from typing import Any, Dict, Optional
+
+
+@dataclass
+class ParsedCommand:
+    """Structured representation of a parsed Home Assistant command."""
+
+    action: str
+    """Action to perform (e.g., turn_on, turn_off, set_brightness, toggle)"""
+
+    target_type: str
+    """Type of target (area, entity, all_in_area)"""
+
+    target: str
+    """Target identifier (area name or entity name)"""
+
+    entity_type: Optional[str] = None
+    """Entity domain filter (e.g., light, switch, fan) - None means all types"""
+
+    parameters: Dict[str, Any] = field(default_factory=dict)
+    """Additional parameters (e.g., brightness_pct=50, color='red')"""
+
+
+# LLM System Prompt for Command Parsing
+COMMAND_PARSER_SYSTEM_PROMPT = """You are a smart home command parser for Home Assistant.
+
+Extract structured information from natural language commands.
+Return ONLY valid JSON in this exact format (no markdown, no code blocks, no explanation):
+
+{
+  "action": "turn_off",
+  "target_type": "area",
+  "target": "study",
+  "entity_type": "light",
+  "parameters": {}
+}
+
+ACTIONS (choose one):
+- turn_on: Turn on entities
+- turn_off: Turn off entities
+- toggle: Toggle entity state
+- set_brightness: Set brightness level
+- set_color: Set color
+
+TARGET_TYPE (choose one):
+- area: Targeting all entities of a type in an area (e.g., "study lights")
+- all_in_area: Targeting ALL entities in an area (e.g., "everything in study")
+- entity: Targeting a specific entity by name (e.g., "desk lamp")
+
+ENTITY_TYPE (optional, use null if not specified):
+- light: Light entities
+- switch: Switch entities
+- fan: Fan entities
+- cover: Covers/blinds
+- null: All entity types (when target_type is "all_in_area")
+
+PARAMETERS (optional, empty dict if none):
+- brightness_pct: Brightness percentage (0-100)
+- color: Color name (e.g., "red", "blue", "warm white")
+
+EXAMPLES:
+
+Command: "turn off study lights"
+Response: {"action": "turn_off", "target_type": "area", "target": "study", "entity_type": "light", "parameters": {}}
+
+Command: "turn off everything in study"
+Response: {"action": "turn_off", "target_type": "all_in_area", "target": "study", "entity_type": null, "parameters": {}}
+
+Command: "turn on desk lamp"
+Response: {"action": "turn_on", "target_type": "entity", "target": "desk lamp", "entity_type": null, "parameters": {}}
+
+Command: "set study lights to 50%"
+Response: {"action": "set_brightness", "target_type": "area", "target": "study", "entity_type": "light", "parameters": {"brightness_pct": 50}}
+
+Command: "turn on living room fan"
+Response: {"action": "turn_on", "target_type": "area", "target": "living room", "entity_type": "fan", "parameters": {}}
+
+Command: "turn off all lights"
+Response: {"action": "turn_off", "target_type": "entity", "target": "all", "entity_type": "light", "parameters": {}}
+
+Command: "toggle hallway light"
+Response: {"action": "toggle", "target_type": "entity", "target": "hallway light", "entity_type": null, "parameters": {}}
+
+Remember:
+1. Return ONLY the JSON object, no markdown formatting
+2. Use lowercase for action, target_type, target, entity_type
+3. Use null (not "null" string) for missing entity_type
+4. Always include all 5 fields: action, target_type, target, entity_type, parameters
+"""
diff --git a/backends/advanced/src/advanced_omi_backend/plugins/homeassistant/config.yml b/backends/advanced/src/advanced_omi_backend/plugins/homeassistant/config.yml
new file mode 100644
index 00000000..eb477aa5
--- /dev/null
+++ b/backends/advanced/src/advanced_omi_backend/plugins/homeassistant/config.yml
@@ -0,0 +1,13 @@
+# Home Assistant Plugin Configuration
+#
+# This file contains non-secret configuration for the Home Assistant plugin.
+# Secrets (HA_TOKEN) are stored in backends/advanced/.env
+# Plugin orchestration (enabled, events, condition) is in config/plugins.yml
+
+# Home Assistant server configuration
+ha_url: ${HA_URL}
+ha_token: ${HA_TOKEN}
+
+# Command configuration
+wake_word: ${HA_WAKE_WORD:-vivi}
+timeout: ${HA_TIMEOUT:-30}
diff --git a/backends/advanced/src/advanced_omi_backend/plugins/homeassistant/entity_cache.py b/backends/advanced/src/advanced_omi_backend/plugins/homeassistant/entity_cache.py
new file mode 100644
index 00000000..e8624f1b
--- /dev/null
+++ b/backends/advanced/src/advanced_omi_backend/plugins/homeassistant/entity_cache.py
@@ -0,0 +1,133 @@
+"""
+Entity cache for Home Assistant integration.
+
+This module provides caching and lookup functionality for Home Assistant areas and entities.
+"""
+
+from dataclasses import dataclass, field
+from datetime import datetime
+from typing import Dict, List, Optional
+import logging
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class EntityCache:
+    """Cache for Home Assistant areas and entities."""
+
+    areas: List[str] = field(default_factory=list)
+    """List of area names (e.g., ["study", "living_room"])"""
+
+    area_entities: Dict[str, List[str]] = field(default_factory=dict)
+    """Map of area names to entity IDs (e.g., {"study": ["light.tubelight_3"]})"""
+
+    entity_details: Dict[str, Dict] = field(default_factory=dict)
+    """Full entity state data keyed by entity_id"""
+
+    last_refresh: datetime = field(default_factory=datetime.now)
+    """Timestamp of last cache refresh"""
+
+    def find_entity_by_name(self, name: str) -> Optional[str]:
+        """
+        Find entity ID by fuzzy name matching.
+
+        Matching priority:
+        1. Exact friendly_name match (case-insensitive)
+        2. Partial friendly_name match (case-insensitive)
+        3. Entity ID match (e.g., "tubelight_3" → "light.tubelight_3")
+
+        Args:
+            name: Entity name to search for
+
+        Returns:
+            Entity ID if found, None otherwise
+        """
+        name_lower = name.lower().strip()
+
+        # Step 1: Exact friendly_name match
+        for entity_id, details in self.entity_details.items():
+            friendly_name = details.get('attributes', {}).get('friendly_name', '')
+            if friendly_name.lower() == name_lower:
+                logger.debug(f"Exact match: {name} → {entity_id} (friendly_name: {friendly_name})")
+                return entity_id
+
+        # Step 2: Partial friendly_name match
+        for entity_id, details in self.entity_details.items():
+            friendly_name = details.get('attributes', {}).get('friendly_name', '')
+            if name_lower in friendly_name.lower():
+                logger.debug(f"Partial match: {name} → {entity_id} (friendly_name: {friendly_name})")
+                return entity_id
+
+        # Step 3: Entity ID match (try adding common domains)
+        common_domains = ['light', 'switch', 'fan', 'cover']
+        for domain in common_domains:
+            candidate_id = f"{domain}.{name_lower.replace(' ', '_')}"
+            if candidate_id in self.entity_details:
+                logger.debug(f"Entity ID match: {name} → {candidate_id}")
+                return candidate_id
+
+        logger.warning(f"No entity found matching: {name}")
+        return None
+
+    def get_entities_in_area(
+        self,
+        area: str,
+        entity_type: Optional[str] = None
+    ) -> List[str]:
+        """
+        Get all entities in an area, optionally filtered by domain.
+
+        Args:
+            area: Area name (case-insensitive)
+            entity_type: Entity domain filter (e.g., "light", "switch")
+
+        Returns:
+            List of entity IDs in the area
+        """
+        area_lower = area.lower().strip()
+
+        # Find matching area (case-insensitive)
+        matching_area = None
+        for area_name in self.areas:
+            if area_name.lower() == area_lower:
+                matching_area = area_name
+                break
+
+        if not matching_area:
+            logger.warning(f"Area not found: {area}")
+            return []
+
+        # Get entities in area
+        entities = self.area_entities.get(matching_area, [])
+
+        # Filter by entity type if specified
+        if entity_type:
+            entity_type_lower = entity_type.lower()
+            entities = [
+                e for e in entities
+                if e.split('.')[0] == entity_type_lower
+            ]
+
+        logger.debug(
+            f"Found {len(entities)} entities in area '{matching_area}'"
+            + (f" (type: {entity_type})" if entity_type else "")
+        )
+
+        return entities
+
+    def get_cache_age_seconds(self) -> float:
+        """Get cache age in seconds."""
+        return (datetime.now() - self.last_refresh).total_seconds()
+
+    def is_stale(self, max_age_seconds: int = 3600) -> bool:
+        """
+        Check if cache is stale.
+
+        Args:
+            max_age_seconds: Maximum cache age before considering stale (default: 1 hour)
+
+        Returns:
+            True if cache is older than max_age_seconds
+        """
+        return self.get_cache_age_seconds() > max_age_seconds
diff --git a/backends/advanced/src/advanced_omi_backend/plugins/homeassistant/mcp_client.py b/backends/advanced/src/advanced_omi_backend/plugins/homeassistant/mcp_client.py
new file mode 100644
index 00000000..42ede8dc
--- /dev/null
+++ b/backends/advanced/src/advanced_omi_backend/plugins/homeassistant/mcp_client.py
@@ -0,0 +1,421 @@
+"""
+MCP client for communicating with Home Assistant's MCP Server.
+
+Home Assistant exposes an MCP server at /api/mcp that provides tools
+for controlling smart home devices.
+"""
+
+import json
+import logging
+from typing import Any, Dict, List, Optional
+
+import httpx
+
+logger = logging.getLogger(__name__)
+
+
+class MCPError(Exception):
+    """MCP protocol error"""
+    pass
+
+
+class HAMCPClient:
+    """
+    MCP Client for Home Assistant's /api/mcp endpoint.
+
+    Implements the Model Context Protocol for communicating with
+    Home Assistant's built-in MCP server.
+    """
+
+    def __init__(self, base_url: str, token: str, timeout: int = 30):
+        """
+        Initialize the MCP client.
+
+        Args:
+            base_url: Base URL of Home Assistant (e.g., http://localhost:8123)
+            token: Long-lived access token for authentication
+            timeout: Request timeout in seconds
+
+        """
+        self.base_url = base_url.rstrip('/')
+        self.mcp_url = f"{self.base_url}/api/mcp"
+        self.token = token
+        self.timeout = timeout
+        self.client = httpx.AsyncClient(timeout=timeout)
+        self._request_id = 0
+
+    async def close(self):
+        """Close the HTTP client"""
+        await self.client.aclose()
+
+    def _next_request_id(self) -> int:
+        """Generate next request ID"""
+        self._request_id += 1
+        return self._request_id
+
+    async def _send_mcp_request(self, method: str, params: Optional[Dict] = None) -> Dict[str, Any]:
+        """
+        Send MCP protocol request to Home Assistant.
+
+        Args:
+            method: MCP method name (e.g., "tools/list", "tools/call")
+            params: Optional method parameters
+
+        Returns:
+            Response data from MCP server
+
+        Raises:
+            MCPError: If request fails or returns an error
+        """
+        payload = {
+            "jsonrpc": "2.0",
+            "id": self._next_request_id(),
+            "method": method
+        }
+
+        if params:
+            payload["params"] = params
+
+        headers = {
+            "Authorization": f"Bearer {self.token}",
+            "Content-Type": "application/json"
+        }
+
+        try:
+            logger.debug(f"MCP Request: {method} with params: {params}")
+            response = await self.client.post(
+                self.mcp_url,
+                json=payload,
+                headers=headers
+            )
+            response.raise_for_status()
+
+            data = response.json()
+
+            # Check for JSON-RPC error
+            if "error" in data:
+                error = data["error"]
+                raise MCPError(f"MCP Error {error.get('code')}: {error.get('message')}")
+
+            return data.get("result", {})
+
+        except httpx.HTTPStatusError as e:
+            logger.error(f"HTTP error calling MCP endpoint: {e.response.status_code}")
+            raise MCPError(f"HTTP {e.response.status_code}: {e.response.text}")
+        except httpx.RequestError as e:
+            logger.error(f"Request error calling MCP endpoint: {e}")
+            raise MCPError(f"Request failed: {e}")
+        except Exception as e:
+            logger.error(f"Unexpected error calling MCP endpoint: {e}")
+            raise MCPError(f"Unexpected error: {e}")
+
+    async def list_tools(self) -> List[Dict[str, Any]]:
+        """
+        Get list of available MCP tools from Home Assistant.
+
+        Returns:
+            List of tool definitions with schema
+
+        Example tool:
+            {
+                "name": "turn_on",
+                "description": "Turn on a light or switch",
+                "inputSchema": {
+                    "type": "object",
+                    "properties": {
+                        "entity_id": {"type": "string"}
+                    }
+                }
+            }
+        """
+        result = await self._send_mcp_request("tools/list")
+        tools = result.get("tools", [])
+        logger.info(f"Retrieved {len(tools)} tools from Home Assistant MCP")
+        return tools
+
+    async def call_tool(self, tool_name: str, arguments: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Execute a tool via MCP.
+
+        Args:
+            tool_name: Name of the tool to call (e.g., "turn_on", "turn_off")
+            arguments: Tool arguments (e.g., {"entity_id": "light.hall_light"})
+
+        Returns:
+            Tool execution result
+
+        Raises:
+            MCPError: If tool execution fails
+
+        Example:
+            >>> await client.call_tool("turn_off", {"entity_id": "light.hall_light"})
+            {"success": True}
+        """
+        params = {
+            "name": tool_name,
+            "arguments": arguments
+        }
+
+        logger.info(f"Calling MCP tool '{tool_name}' with args: {arguments}")
+        result = await self._send_mcp_request("tools/call", params)
+
+        # MCP tool results are wrapped in content blocks
+        content = result.get("content", [])
+        if content and isinstance(content, list):
+            # Extract text content from first block
+            first_block = content[0]
+            if isinstance(first_block, dict) and first_block.get("type") == "text":
+                return {"result": first_block.get("text"), "success": True}
+
+        return result
+
+    async def test_connection(self) -> bool:
+        """
+        Test connection to Home Assistant MCP server.
+
+        Returns:
+            True if connection successful, False otherwise
+        """
+        try:
+            tools = await self.list_tools()
+            logger.info(f"MCP connection test successful ({len(tools)} tools available)")
+            return True
+        except Exception as e:
+            logger.error(f"MCP connection test failed: {e}")
+            return False
+
+    async def _render_template(self, template: str) -> Any:
+        """
+        Render a Home Assistant template using the Template API.
+
+        Args:
+            template: Jinja2 template string (e.g., "{{ areas() }}")
+
+        Returns:
+            Rendered template result (parsed as JSON if possible)
+
+        Raises:
+            MCPError: If template rendering fails
+
+        Example:
+            >>> await client._render_template("{{ areas() }}")
+            ["study", "living_room", "bedroom"]
+        """
+        headers = {
+            "Authorization": f"Bearer {self.token}",
+            "Content-Type": "application/json"
+        }
+
+        payload = {"template": template}
+
+        try:
+            logger.debug(f"Rendering template: {template}")
+            response = await self.client.post(
+                f"{self.base_url}/api/template",
+                json=payload,
+                headers=headers
+            )
+            response.raise_for_status()
+
+            result = response.text.strip()
+
+            # Try to parse as JSON (for lists, dicts)
+            if result.startswith('[') or result.startswith('{'):
+                try:
+                    return json.loads(result)
+                except json.JSONDecodeError:
+                    logger.warning(f"Failed to parse template result as JSON: {result}")
+                    return result
+
+            return result
+
+        except httpx.HTTPStatusError as e:
+            logger.error(f"HTTP error rendering template: {e.response.status_code}")
+            raise MCPError(f"HTTP {e.response.status_code}: {e.response.text}")
+        except httpx.RequestError as e:
+            logger.error(f"Request error rendering template: {e}")
+            raise MCPError(f"Request failed: {e}")
+
+    async def fetch_areas(self) -> List[str]:
+        """
+        Fetch all areas from Home Assistant using Template API.
+
+        Returns:
+            List of area names
+
+        Example:
+            >>> await client.fetch_areas()
+            ["study", "living_room", "bedroom"]
+        """
+        template = "{{ areas() | to_json }}"
+        areas = await self._render_template(template)
+
+        if isinstance(areas, list):
+            logger.info(f"Fetched {len(areas)} areas from Home Assistant")
+            return areas
+        else:
+            logger.warning(f"Unexpected areas format: {type(areas)}")
+            return []
+
+    async def fetch_area_entities(self, area_name: str) -> List[str]:
+        """
+        Fetch all entity IDs in a specific area.
+
+        Args:
+            area_name: Name of the area
+
+        Returns:
+            List of entity IDs in the area
+
+        Example:
+            >>> await client.fetch_area_entities("study")
+            ["light.tubelight_3", "switch.desk_fan"]
+        """
+        template = f"{{{{ area_entities('{area_name}') | to_json }}}}"
+        entities = await self._render_template(template)
+
+        if isinstance(entities, list):
+            logger.info(f"Fetched {len(entities)} entities from area '{area_name}'")
+            return entities
+        else:
+            logger.warning(f"Unexpected entities format for area '{area_name}': {type(entities)}")
+            return []
+
+    async def fetch_entity_states(self) -> Dict[str, Dict]:
+        """
+        Fetch all entity states from Home Assistant.
+
+        Returns:
+            Dict mapping entity_id to state data (includes attributes, area_id)
+
+        Example:
+            >>> await client.fetch_entity_states()
+            {
+                "light.tubelight_3": {
+                    "state": "on",
+                    "attributes": {"friendly_name": "Study Light", ...},
+                    "area_id": "study"
+                }
+            }
+        """
+        headers = {
+            "Authorization": f"Bearer {self.token}",
+            "Content-Type": "application/json"
+        }
+
+        try:
+            logger.debug("Fetching all entity states")
+            response = await self.client.get(
+                f"{self.base_url}/api/states",
+                headers=headers
+            )
+            response.raise_for_status()
+
+            states = response.json()
+            entity_details = {}
+
+            # Enrich with area information
+            for state in states:
+                entity_id = state.get('entity_id')
+                if entity_id:
+                    # Get area_id using Template API
+                    try:
+                        area_template = f"{{{{ area_id('{entity_id}') }}}}"
+                        area_id = await self._render_template(area_template)
+                        state['area_id'] = area_id if area_id else None
+                    except Exception as e:
+                        logger.debug(f"Failed to get area for {entity_id}: {e}")
+                        state['area_id'] = None
+
+                    entity_details[entity_id] = state
+
+            logger.info(f"Fetched {len(entity_details)} entity states")
+            return entity_details
+
+        except httpx.HTTPStatusError as e:
+            logger.error(f"HTTP error fetching states: {e.response.status_code}")
+            raise MCPError(f"HTTP {e.response.status_code}: {e.response.text}")
+        except httpx.RequestError as e:
+            logger.error(f"Request error fetching states: {e}")
+            raise MCPError(f"Request failed: {e}")
+
+    async def call_service(
+        self,
+        domain: str,
+        service: str,
+        entity_ids: List[str],
+        **parameters
+    ) -> Dict[str, Any]:
+        """
+        Call a Home Assistant service directly via REST API.
+
+        Args:
+            domain: Service domain (e.g., "light", "switch")
+            service: Service name (e.g., "turn_on", "turn_off")
+            entity_ids: List of entity IDs to target
+            **parameters: Additional service parameters (e.g., brightness_pct=50)
+
+        Returns:
+            Service call response
+
+        Example:
+            >>> await client.call_service("light", "turn_on", ["light.study"], brightness_pct=50)
+            [{"entity_id": "light.study", "state": "on"}]
+        """
+        headers = {
+            "Authorization": f"Bearer {self.token}",
+            "Content-Type": "application/json"
+        }
+
+        payload = {
+            "entity_id": entity_ids,
+            **parameters
+        }
+
+        service_url = f"{self.base_url}/api/services/{domain}/{service}"
+
+        try:
+            logger.info(f"Calling service {domain}.{service} for {len(entity_ids)} entities")
+            logger.debug(f"Service payload: {payload}")
+
+            response = await self.client.post(
+                service_url,
+                json=payload,
+                headers=headers
+            )
+            response.raise_for_status()
+
+            result = response.json()
+            logger.info(f"Service call successful: {domain}.{service}")
+            return result
+
+        except httpx.HTTPStatusError as e:
+            logger.error(f"HTTP error calling service: {e.response.status_code}")
+            raise MCPError(f"HTTP {e.response.status_code}: {e.response.text}")
+        except httpx.RequestError as e:
+            logger.error(f"Request error calling service: {e}")
+            raise MCPError(f"Request failed: {e}")
+
+    async def discover_entities(self) -> Dict[str, Dict]:
+        """
+        Discover available entities from MCP tools.
+
+        Parses the available tools to build an index of entities
+        that can be controlled.
+
+        Returns:
+            Dict mapping entity_id to metadata
+        """
+        tools = await self.list_tools()
+        entities = {}
+
+        for tool in tools:
+            # Extract entity information from tool schemas
+            # This will depend on how HA MCP structures its tools
+            # For now, we'll just log what we find
+            logger.debug(f"Tool: {tool.get('name')} - {tool.get('description')}")
+
+        # TODO: Parse tool schemas to extract entity_id information
+        # For now, return empty dict - will be populated based on actual HA MCP response
+
+        return entities
diff --git a/backends/advanced/src/advanced_omi_backend/plugins/homeassistant/plugin.py b/backends/advanced/src/advanced_omi_backend/plugins/homeassistant/plugin.py
new file mode 100644
index 00000000..13683194
--- /dev/null
+++ b/backends/advanced/src/advanced_omi_backend/plugins/homeassistant/plugin.py
@@ -0,0 +1,692 @@
+"""
+Home Assistant plugin for Chronicle.
+
+Enables control of Home Assistant devices through natural language commands
+triggered by a wake word.
+"""
+
+import json
+import logging
+from typing import Any, Dict, List, Optional
+
+from ..base import BasePlugin, PluginContext, PluginResult
+from .entity_cache import EntityCache
+from .mcp_client import HAMCPClient, MCPError
+
+logger = logging.getLogger(__name__)
+
+
+class HomeAssistantPlugin(BasePlugin):
+    """
+    Plugin for controlling Home Assistant devices via wake word commands.
+
+    Example:
+        User says: "Vivi, turn off the hall lights"
+        -> Wake word "vivi" detected by router
+        -> Command "turn off the hall lights" passed to on_transcript()
+        -> Plugin parses command and calls HA MCP to execute
+        -> Returns: PluginResult with "I've turned off the hall light"
+    """
+
+    SUPPORTED_ACCESS_LEVELS: List[str] = ['transcript']
+
+    name = "Home Assistant"
+    description = "Wake word device control with Home Assistant integration"
+
+    def __init__(self, config: Dict[str, Any]):
+        """
+        Initialize Home Assistant plugin.
+
+        Args:
+            config: Plugin configuration with keys:
+                - ha_url: Home Assistant URL
+                - ha_token: Long-lived access token
+                - wake_word: Wake word for triggering commands (handled by router)
+                - enabled: Whether plugin is enabled
+                - access_level: Should be 'transcript'
+                - trigger: Should be {'type': 'wake_word', 'wake_word': '...'}
+        """
+        super().__init__(config)
+        self.mcp_client: Optional[HAMCPClient] = None
+        self.available_tools: List[Dict] = []
+        self.entities: Dict[str, Dict] = {}
+
+        # Entity cache for area-based commands
+        self.entity_cache: Optional[EntityCache] = None
+        self.cache_initialized = False
+
+        # Configuration
+        self.ha_url = config.get('ha_url', 'http://localhost:8123')
+        self.ha_token = config.get('ha_token', '')
+        self.wake_word = config.get('wake_word', 'vivi')
+        self.timeout = config.get('timeout', 30)
+
+    async def initialize(self):
+        """
+        Initialize the Home Assistant plugin.
+
+        Connects to Home Assistant MCP server and discovers available tools.
+
+        Raises:
+            MCPError: If connection or discovery fails
+        """
+        if not self.enabled:
+            logger.info("Home Assistant plugin is disabled, skipping initialization")
+            return
+
+        if not self.ha_token:
+            raise ValueError("Home Assistant token is required")
+
+        logger.info(f"Initializing Home Assistant plugin (URL: {self.ha_url})")
+
+        # Create MCP client (used for REST API calls, not MCP protocol)
+        self.mcp_client = HAMCPClient(
+            base_url=self.ha_url,
+            token=self.ha_token,
+            timeout=self.timeout
+        )
+
+        # Test basic API connectivity with Template API
+        try:
+            logger.info("Testing Home Assistant API connectivity...")
+            test_result = await self.mcp_client._render_template("{{ 1 + 1 }}")
+            if str(test_result).strip() != "2":
+                raise ValueError(f"Unexpected template result: {test_result}")
+            logger.info("Home Assistant API connection successful")
+        except Exception as e:
+            raise MCPError(f"Failed to connect to Home Assistant API: {e}")
+
+        logger.info("Home Assistant plugin initialized successfully")
+
+    async def on_transcript(self, context: PluginContext) -> Optional[PluginResult]:
+        """
+        Execute Home Assistant command from wake word transcript.
+
+        Called by the router when a wake word is detected in the transcript.
+        The router has already stripped the wake word and extracted the command.
+
+        Args:
+            context: PluginContext containing:
+                - user_id: User ID who issued the command
+                - access_level: 'transcript'
+                - data: Dict with:
+                    - command: str - Command with wake word already stripped
+                    - original_transcript: str - Full transcript with wake word
+                    - transcript: str - Original transcript
+                    - segment_id: str - Unique segment identifier
+                    - conversation_id: str - Current conversation ID
+                - metadata: Optional additional metadata
+
+        Returns:
+            PluginResult with:
+                - success: True if command executed
+                - message: User-friendly response
+                - data: Dict with action details
+                - should_continue: False to stop normal processing
+
+        Example:
+            Context data:
+                {
+                    'command': 'turn off study lights',
+                    'original_transcript': 'vivi turn off study lights',
+                    'conversation_id': 'conv_123'
+                }
+
+            Returns:
+                PluginResult(
+                    success=True,
+                    message="I've turned off 1 light in study",
+                    data={'action': 'turn_off', 'entity_ids': ['light.tubelight_3']},
+                    should_continue=False
+                )
+        """
+        command = context.data.get('command', '')
+
+        if not command:
+            return PluginResult(
+                success=False,
+                message="No command provided",
+                should_continue=True
+            )
+
+        if not self.mcp_client:
+            logger.error("MCP client not initialized")
+            return PluginResult(
+                success=False,
+                message="Sorry, Home Assistant is not connected",
+                should_continue=True
+            )
+
+        try:
+            # Step 1: Parse command using hybrid LLM + fallback parsing
+            logger.info(f"Processing HA command: '{command}'")
+            parsed = await self._parse_command_hybrid(command)
+
+            if not parsed:
+                return PluginResult(
+                    success=False,
+                    message="Sorry, I couldn't understand that command",
+                    should_continue=True
+                )
+
+            # Step 2: Resolve entities from parsed command
+            try:
+                entity_ids = await self._resolve_entities(parsed)
+            except ValueError as e:
+                logger.warning(f"Entity resolution failed: {e}")
+                return PluginResult(
+                    success=False,
+                    message=str(e),
+                    should_continue=True
+                )
+
+            # Step 3: Determine service and domain
+            # Extract domain from first entity (all should have same domain for area-based)
+            domain = entity_ids[0].split('.')[0] if entity_ids else 'light'
+
+            # Map action to service name
+            service_map = {
+                'turn_on': 'turn_on',
+                'turn_off': 'turn_off',
+                'toggle': 'toggle',
+                'set_brightness': 'turn_on',  # brightness uses turn_on with params
+                'set_color': 'turn_on'        # color uses turn_on with params
+            }
+            service = service_map.get(parsed.action, 'turn_on')
+
+            # Step 4: Call Home Assistant service
+            logger.info(
+                f"Calling {domain}.{service} for {len(entity_ids)} entities: {entity_ids}"
+            )
+
+            result = await self.mcp_client.call_service(
+                domain=domain,
+                service=service,
+                entity_ids=entity_ids,
+                **parsed.parameters
+            )
+
+            # Step 5: Format user-friendly response
+            entity_type_name = parsed.entity_type or domain
+            if parsed.target_type == 'area':
+                message = (
+                    f"I've {parsed.action.replace('_', ' ')} {len(entity_ids)} "
+                    f"{entity_type_name}{'s' if len(entity_ids) != 1 else ''} "
+                    f"in {parsed.target}"
+                )
+            elif parsed.target_type == 'all_in_area':
+                message = (
+                    f"I've {parsed.action.replace('_', ' ')} {len(entity_ids)} "
+                    f"entities in {parsed.target}"
+                )
+            else:
+                message = f"I've {parsed.action.replace('_', ' ')} {parsed.target}"
+
+            logger.info(f"HA command executed successfully: {message}")
+
+            return PluginResult(
+                success=True,
+                data={
+                    'action': parsed.action,
+                    'entity_ids': entity_ids,
+                    'target_type': parsed.target_type,
+                    'target': parsed.target,
+                    'ha_result': result
+                },
+                message=message,
+                should_continue=False  # Stop normal processing - HA command handled
+            )
+
+        except MCPError as e:
+            logger.error(f"Home Assistant API error: {e}", exc_info=True)
+            return PluginResult(
+                success=False,
+                message=f"Sorry, Home Assistant couldn't execute that: {e}",
+                should_continue=True
+            )
+        except Exception as e:
+            logger.error(f"Command execution failed: {e}", exc_info=True)
+            return PluginResult(
+                success=False,
+                message="Sorry, something went wrong while executing that command",
+                should_continue=True
+            )
+
+    async def cleanup(self):
+        """Clean up resources"""
+        if self.mcp_client:
+            await self.mcp_client.close()
+            logger.info("Closed Home Assistant MCP client")
+
+    async def _ensure_cache_initialized(self):
+        """Ensure entity cache is initialized. Lazy-load on first use."""
+        if not self.cache_initialized:
+            logger.info("Entity cache not initialized, refreshing...")
+            await self._refresh_cache()
+            self.cache_initialized = True
+
+    async def _refresh_cache(self):
+        """
+        Refresh the entity cache from Home Assistant.
+
+        Fetches:
+        - All areas
+        - Entities in each area
+        - Entity state details
+        """
+        if not self.mcp_client:
+            logger.error("Cannot refresh cache: MCP client not initialized")
+            return
+
+        try:
+            logger.info("Refreshing entity cache from Home Assistant...")
+
+            # Fetch all areas
+            areas = await self.mcp_client.fetch_areas()
+            logger.debug(f"Fetched {len(areas)} areas: {areas}")
+
+            # Fetch entities for each area
+            area_entities = {}
+            for area in areas:
+                entities = await self.mcp_client.fetch_area_entities(area)
+                area_entities[area] = entities
+                logger.debug(f"Area '{area}': {len(entities)} entities")
+
+            # Fetch all entity states
+            entity_details = await self.mcp_client.fetch_entity_states()
+            logger.debug(f"Fetched {len(entity_details)} entity states")
+
+            # Create cache
+            from datetime import datetime
+            self.entity_cache = EntityCache(
+                areas=areas,
+                area_entities=area_entities,
+                entity_details=entity_details,
+                last_refresh=datetime.now()
+            )
+
+            logger.info(
+                f"Entity cache refreshed: {len(areas)} areas, "
+                f"{len(entity_details)} entities"
+            )
+
+        except Exception as e:
+            logger.error(f"Failed to refresh entity cache: {e}", exc_info=True)
+            raise
+
+    async def _parse_command_with_llm(self, command: str) -> Optional['ParsedCommand']:
+        """
+        Parse command using LLM with structured system prompt.
+
+        Args:
+            command: Natural language command (wake word already stripped)
+
+        Returns:
+            ParsedCommand if parsing succeeds, None otherwise
+
+        Example:
+            >>> await self._parse_command_with_llm("turn off study lights")
+            ParsedCommand(
+                action="turn_off",
+                target_type="area",
+                target="study",
+                entity_type="light",
+                parameters={}
+            )
+        """
+        try:
+            from advanced_omi_backend.llm_client import get_llm_client
+            from .command_parser import COMMAND_PARSER_SYSTEM_PROMPT, ParsedCommand
+
+            llm_client = get_llm_client()
+
+            logger.debug(f"Parsing command with LLM: '{command}'")
+
+            # Use OpenAI chat format with system + user messages
+            response = llm_client.client.chat.completions.create(
+                model=llm_client.model,
+                messages=[
+                    {"role": "system", "content": COMMAND_PARSER_SYSTEM_PROMPT},
+                    {"role": "user", "content": f'Command: "{command}"\n\nReturn JSON only.'}
+                ],
+                temperature=0.1,
+                max_tokens=150
+            )
+
+            result_text = response.choices[0].message.content.strip()
+            logger.debug(f"LLM response: {result_text}")
+
+            # Remove markdown code blocks if present
+            if result_text.startswith('```'):
+                lines = result_text.split('\n')
+                result_text = '\n'.join(lines[1:-1]) if len(lines) > 2 else result_text
+                result_text = result_text.strip()
+
+            # Parse JSON response
+            result_json = json.loads(result_text)
+
+            # Validate required fields
+            required_fields = ['action', 'target_type', 'target']
+            if not all(field in result_json for field in required_fields):
+                logger.warning(f"LLM response missing required fields: {result_json}")
+                return None
+
+            parsed = ParsedCommand(
+                action=result_json['action'],
+                target_type=result_json['target_type'],
+                target=result_json['target'],
+                entity_type=result_json.get('entity_type'),
+                parameters=result_json.get('parameters', {})
+            )
+
+            logger.info(
+                f"LLM parsed command: action={parsed.action}, "
+                f"target_type={parsed.target_type}, target={parsed.target}, "
+                f"entity_type={parsed.entity_type}"
+            )
+
+            return parsed
+
+        except json.JSONDecodeError as e:
+            logger.error(f"Failed to parse LLM JSON response: {e}\nResponse: {result_text}")
+            return None
+        except Exception as e:
+            logger.error(f"LLM command parsing failed: {e}", exc_info=True)
+            return None
+
+    async def _resolve_entities(self, parsed: 'ParsedCommand') -> List[str]:
+        """
+        Resolve ParsedCommand to actual Home Assistant entity IDs.
+
+        Args:
+            parsed: ParsedCommand from LLM parsing
+
+        Returns:
+            List of entity IDs to target
+
+        Raises:
+            ValueError: If target not found or ambiguous
+
+        Example:
+            >>> await self._resolve_entities(ParsedCommand(
+            ...     action="turn_off",
+            ...     target_type="area",
+            ...     target="study",
+            ...     entity_type="light"
+            ... ))
+            ["light.tubelight_3"]
+        """
+        from .command_parser import ParsedCommand
+
+        # Ensure cache is ready
+        await self._ensure_cache_initialized()
+
+        if not self.entity_cache:
+            raise ValueError("Entity cache not initialized")
+
+        if parsed.target_type == 'area':
+            # Get entities in area, filtered by type
+            entities = self.entity_cache.get_entities_in_area(
+                area=parsed.target,
+                entity_type=parsed.entity_type
+            )
+
+            if not entities:
+                entity_desc = f"{parsed.entity_type}s" if parsed.entity_type else "entities"
+                raise ValueError(
+                    f"No {entity_desc} found in area '{parsed.target}'. "
+                    f"Available areas: {', '.join(self.entity_cache.areas)}"
+                )
+
+            logger.info(
+                f"Resolved area '{parsed.target}' to {len(entities)} "
+                f"{parsed.entity_type or 'entity'}(s)"
+            )
+            return entities
+
+        elif parsed.target_type == 'all_in_area':
+            # Get ALL entities in area (no filter)
+            entities = self.entity_cache.get_entities_in_area(
+                area=parsed.target,
+                entity_type=None
+            )
+
+            if not entities:
+                raise ValueError(
+                    f"No entities found in area '{parsed.target}'. "
+                    f"Available areas: {', '.join(self.entity_cache.areas)}"
+                )
+
+            logger.info(f"Resolved 'all in {parsed.target}' to {len(entities)} entities")
+            return entities
+
+        elif parsed.target_type == 'entity':
+            # Fuzzy match entity by name
+            entity_id = self.entity_cache.find_entity_by_name(parsed.target)
+
+            if not entity_id:
+                raise ValueError(
+                    f"Entity '{parsed.target}' not found. "
+                    f"Try being more specific or check the entity name."
+                )
+
+            logger.info(f"Resolved entity '{parsed.target}' to {entity_id}")
+            return [entity_id]
+
+        else:
+            raise ValueError(f"Unknown target type: {parsed.target_type}")
+
+    async def _parse_command_fallback(self, command: str) -> Optional[Dict[str, Any]]:
+        """
+        Fallback keyword-based command parser (used when LLM fails).
+
+        Args:
+            command: Natural language command
+
+        Returns:
+            Dict with 'tool', 'arguments', and optional metadata
+            None if parsing fails
+
+        Example:
+            Input: "turn off the hall lights"
+            Output: {
+                "tool": "turn_off",
+                "arguments": {"entity_id": "light.hall_light"},
+                "friendly_name": "Hall Light",
+                "action": "turn_off"
+            }
+        """
+        logger.debug("Using fallback keyword-based parsing")
+        command_lower = command.lower().strip()
+
+        # Determine action
+        tool = None
+        if any(word in command_lower for word in ['turn off', 'off', 'disable']):
+            tool = 'turn_off'
+            action_desc = 'turned off'
+        elif any(word in command_lower for word in ['turn on', 'on', 'enable']):
+            tool = 'turn_on'
+            action_desc = 'turned on'
+        elif 'toggle' in command_lower:
+            tool = 'toggle'
+            action_desc = 'toggled'
+        else:
+            logger.warning(f"Unknown action in command: {command}")
+            return None
+
+        # Extract entity name from command
+        entity_query = command_lower
+        for action_word in ['turn off', 'turn on', 'toggle', 'off', 'on', 'the']:
+            entity_query = entity_query.replace(action_word, '').strip()
+
+        logger.info(f"Searching for entity: '{entity_query}'")
+
+        # Return placeholder (this will work if entity ID matches pattern)
+        return {
+            "tool": tool,
+            "arguments": {
+                "entity_id": f"light.{entity_query.replace(' ', '_')}"
+            },
+            "friendly_name": entity_query.title(),
+            "action_desc": action_desc
+        }
+
+    async def _parse_command_hybrid(self, command: str) -> Optional['ParsedCommand']:
+        """
+        Hybrid command parser: Try LLM first, fallback to keywords.
+
+        This provides the best of both worlds:
+        - LLM parsing for complex area-based and natural commands
+        - Keyword fallback for reliability when LLM fails or times out
+
+        Args:
+            command: Natural language command
+
+        Returns:
+            ParsedCommand if successful, None otherwise
+
+        Example:
+            >>> await self._parse_command_hybrid("turn off study lights")
+            ParsedCommand(action="turn_off", target_type="area", target="study", ...)
+        """
+        import asyncio
+        from .command_parser import ParsedCommand
+
+        # Try LLM parsing with timeout
+        try:
+            logger.debug("Attempting LLM-based command parsing...")
+            parsed = await asyncio.wait_for(
+                self._parse_command_with_llm(command),
+                timeout=5.0
+            )
+
+            if parsed:
+                logger.info("LLM parsing succeeded")
+                return parsed
+            else:
+                logger.warning("LLM parsing returned None, falling back to keywords")
+
+        except asyncio.TimeoutError:
+            logger.warning("LLM parsing timed out (>5s), falling back to keywords")
+        except Exception as e:
+            logger.warning(f"LLM parsing failed: {e}, falling back to keywords")
+
+        # Fallback to keyword-based parsing
+        try:
+            logger.debug("Using fallback keyword parsing...")
+            fallback_result = await self._parse_command_fallback(command)
+
+            if not fallback_result:
+                return None
+
+            # Convert fallback format to ParsedCommand
+            # Extract entity_id from arguments
+            entity_id = fallback_result['arguments'].get('entity_id', '')
+            entity_name = entity_id.split('.', 1)[1] if '.' in entity_id else entity_id
+
+            # Simple heuristic: assume it's targeting a single entity
+            parsed = ParsedCommand(
+                action=fallback_result['tool'],
+                target_type='entity',
+                target=entity_name.replace('_', ' '),
+                entity_type=None,
+                parameters={}
+            )
+
+            logger.info("Fallback parsing succeeded")
+            return parsed
+
+        except Exception as e:
+            logger.error(f"Fallback parsing failed: {e}", exc_info=True)
+            return None
+
+    @staticmethod
+    async def test_connection(config: Dict[str, Any]) -> Dict[str, Any]:
+        """
+        Test Home Assistant API connection with provided configuration.
+
+        This static method tests the HA API connection without fully initializing the plugin.
+        Used by the form-based configuration UI to validate settings before saving.
+
+        Args:
+            config: Configuration dictionary with HA settings:
+                - ha_url: Home Assistant URL
+                - ha_token: Long-lived access token
+                - timeout: Request timeout (optional, default 30)
+
+        Returns:
+            Dict with success status, message, and optional details
+
+        Example:
+            >>> result = await HomeAssistantPlugin.test_connection({
+            ...     'ha_url': 'http://homeassistant.local:8123',
+            ...     'ha_token': 'your_long_lived_token'
+            ... })
+            >>> result['success']
+            True
+        """
+        import time
+
+        try:
+            # Validate required config fields
+            required_fields = ['ha_url', 'ha_token']
+            missing_fields = [field for field in required_fields if not config.get(field)]
+
+            if missing_fields:
+                return {
+                    "success": False,
+                    "message": f"Missing required fields: {', '.join(missing_fields)}",
+                    "status": "error"
+                }
+
+            ha_url = config.get('ha_url')
+            ha_token = config.get('ha_token')
+            timeout = config.get('timeout', 30)
+
+            # Create temporary MCP client
+            mcp_client = HAMCPClient(
+                base_url=ha_url,
+                token=ha_token,
+                timeout=timeout
+            )
+
+            # Test API connectivity with Template API
+            logger.info(f"Testing Home Assistant API connection to {ha_url}...")
+            start_time = time.time()
+
+            test_result = await mcp_client._render_template("{{ 1 + 1 }}")
+            connection_time_ms = int((time.time() - start_time) * 1000)
+
+            if str(test_result).strip() != "2":
+                return {
+                    "success": False,
+                    "message": f"Unexpected template result: {test_result}",
+                    "status": "error"
+                }
+
+            # Try to fetch entities count for additional info
+            try:
+                entities = await mcp_client.get_all_entities()
+                entity_count = len(entities)
+            except Exception:
+                entity_count = None
+
+            return {
+                "success": True,
+                "message": f"Successfully connected to Home Assistant at {ha_url}",
+                "status": "success",
+                "details": {
+                    "ha_url": ha_url,
+                    "connection_time_ms": connection_time_ms,
+                    "entity_count": entity_count,
+                    "api_test": "Template rendering successful"
+                }
+            }
+
+        except Exception as e:
+            logger.error(f"Home Assistant connection test failed: {e}", exc_info=True)
+            return {
+                "success": False,
+                "message": f"Connection test failed: {str(e)}",
+                "status": "error"
+            }
diff --git a/backends/advanced/src/advanced_omi_backend/plugins/router.py b/backends/advanced/src/advanced_omi_backend/plugins/router.py
new file mode 100644
index 00000000..523fe3ed
--- /dev/null
+++ b/backends/advanced/src/advanced_omi_backend/plugins/router.py
@@ -0,0 +1,256 @@
+"""
+Plugin routing system for multi-level plugin architecture.
+
+Routes pipeline events to appropriate plugins based on access level and triggers.
+"""
+
+import logging
+import re
+import string
+from typing import Dict, List, Optional
+
+from .base import BasePlugin, PluginContext, PluginResult
+
+logger = logging.getLogger(__name__)
+
+
+def normalize_text_for_wake_word(text: str) -> str:
+    """
+    Normalize text for wake word matching.
+    - Lowercase
+    - Replace punctuation with spaces
+    - Collapse multiple spaces to single space
+    - Strip leading/trailing whitespace
+
+    Example:
+        "Hey, Vivi!" -> "hey vivi"
+        "HEY  VIVI" -> "hey vivi"
+        "Hey-Vivi" -> "hey vivi"
+    """
+    # Lowercase
+    text = text.lower()
+    # Replace punctuation with spaces (instead of removing, to preserve word boundaries)
+    text = text.translate(str.maketrans(string.punctuation, ' ' * len(string.punctuation)))
+    # Normalize whitespace (collapse multiple spaces to single space)
+    text = re.sub(r'\s+', ' ', text)
+    # Strip leading/trailing whitespace
+    return text.strip()
+
+
+def extract_command_after_wake_word(transcript: str, wake_word: str) -> str:
+    """
+    Intelligently extract command after wake word in original transcript.
+
+    Handles punctuation and spacing variations by creating a flexible regex pattern.
+
+    Example:
+        transcript: "Hey, Vivi, turn off lights"
+        wake_word: "hey vivi"
+        -> extracts: "turn off lights"
+
+    Args:
+        transcript: Original transcript text with punctuation
+        wake_word: Configured wake word (will be normalized)
+
+    Returns:
+        Command text after wake word, or full transcript if wake word boundary not found
+    """
+    # Split wake word into parts (normalized)
+    wake_word_parts = normalize_text_for_wake_word(wake_word).split()
+
+    if not wake_word_parts:
+        return transcript.strip()
+
+    # Create regex pattern that allows punctuation/whitespace between parts
+    # Example: "hey" + "vivi" -> r"hey[\s,.\-!?]*vivi[\s,.\-!?]*"
+    # The pattern matches the wake word parts with optional punctuation/whitespace between and after
+    pattern_parts = [re.escape(part) for part in wake_word_parts]
+    # Allow optional punctuation/whitespace between parts
+    pattern = r'[\s,.\-!?;:]*'.join(pattern_parts)
+    # Add trailing punctuation/whitespace consumption after last wake word part
+    pattern = '^' + pattern + r'[\s,.\-!?;:]*'
+
+    # Try to match wake word at start of transcript (case-insensitive)
+    match = re.match(pattern, transcript, re.IGNORECASE)
+
+    if match:
+        # Extract everything after the matched wake word (including trailing punctuation)
+        command = transcript[match.end():].strip()
+        return command
+    else:
+        # Fallback: couldn't find wake word boundary, return full transcript
+        logger.warning(f"Could not find wake word boundary for '{wake_word}' in '{transcript}', using full transcript")
+        return transcript.strip()
+
+
+class PluginRouter:
+    """Routes pipeline events to appropriate plugins based on event subscriptions"""
+
+    def __init__(self):
+        self.plugins: Dict[str, BasePlugin] = {}
+        # Index plugins by event for fast lookup
+        self._plugins_by_event: Dict[str, List[str]] = {}
+
+    def register_plugin(self, plugin_id: str, plugin: BasePlugin):
+        """Register a plugin with the router"""
+        self.plugins[plugin_id] = plugin
+
+        # Index by each event
+        for event in plugin.events:
+            if event not in self._plugins_by_event:
+                self._plugins_by_event[event] = []
+            self._plugins_by_event[event].append(plugin_id)
+
+        logger.info(f"Registered plugin '{plugin_id}' for events: {plugin.events}")
+
+    async def dispatch_event(
+        self,
+        event: str,
+        user_id: str,
+        data: Dict,
+        metadata: Optional[Dict] = None
+    ) -> List[PluginResult]:
+        """
+        Dispatch event to all subscribed plugins.
+
+        Args:
+            event: Event name (e.g., 'transcript.streaming', 'conversation.complete')
+            user_id: User ID for context
+            data: Event-specific data
+            metadata: Optional metadata
+
+        Returns:
+            List of plugin results
+        """
+        # Add at start
+        logger.info(f"🔌 ROUTER: Dispatching '{event}' event (user={user_id})")
+
+        results = []
+
+        # Get plugins subscribed to this event
+        plugin_ids = self._plugins_by_event.get(event, [])
+
+        # Add subscription check
+        if not plugin_ids:
+            logger.warning(f"🔌 ROUTER: No plugins subscribed to event '{event}'")
+            return results
+
+        logger.info(f"🔌 ROUTER: Found {len(plugin_ids)} subscribed plugin(s): {plugin_ids}")
+
+        for plugin_id in plugin_ids:
+            plugin = self.plugins[plugin_id]
+
+            if not plugin.enabled:
+                logger.info(f"   ⊘ Skipping '{plugin_id}': disabled")
+                continue
+
+            # Check execution condition (wake_word, etc.)
+            logger.info(f"   → Checking execution condition for '{plugin_id}'")
+            if not await self._should_execute(plugin, data):
+                logger.info(f"   ⊘ Skipping '{plugin_id}': condition not met")
+                continue
+
+            # Execute plugin
+            try:
+                logger.info(f"   ▶ Executing '{plugin_id}' for event '{event}'")
+                context = PluginContext(
+                    user_id=user_id,
+                    event=event,
+                    data=data,
+                    metadata=metadata or {}
+                )
+
+                result = await self._execute_plugin(plugin, event, context)
+
+                if result:
+                    status_icon = "✓" if result.success else "✗"
+                    logger.info(
+                        f"   {status_icon} Plugin '{plugin_id}' completed: "
+                        f"success={result.success}, message={result.message}"
+                    )
+                    results.append(result)
+
+                    # If plugin says stop processing, break
+                    if not result.should_continue:
+                        logger.info(f"   ⊗ Plugin '{plugin_id}' stopped further processing")
+                        break
+
+            except Exception as e:
+                # CRITICAL: Log exception details
+                logger.error(
+                    f"   ✗ Plugin '{plugin_id}' FAILED with exception: {e}",
+                    exc_info=True
+                )
+
+        # Add at end
+        logger.info(
+            f"🔌 ROUTER: Dispatch complete for '{event}': "
+            f"{len(results)} plugin(s) executed successfully"
+        )
+
+        return results
+
+    async def _should_execute(self, plugin: BasePlugin, data: Dict) -> bool:
+        """Check if plugin should be executed based on condition configuration"""
+        condition_type = plugin.condition.get('type', 'always')
+
+        if condition_type == 'always':
+            return True
+
+        elif condition_type == 'wake_word':
+            # Normalize transcript for matching (handles punctuation and spacing)
+            transcript = data.get('transcript', '')
+            normalized_transcript = normalize_text_for_wake_word(transcript)
+
+            # Support both singular 'wake_word' and plural 'wake_words' (list)
+            wake_words = plugin.condition.get('wake_words', [])
+            if not wake_words:
+                # Fallback to singular wake_word for backward compatibility
+                wake_word = plugin.condition.get('wake_word', '')
+                if wake_word:
+                    wake_words = [wake_word]
+
+            # Check if transcript starts with any wake word (after normalization)
+            for wake_word in wake_words:
+                normalized_wake_word = normalize_text_for_wake_word(wake_word)
+                if normalized_wake_word and normalized_transcript.startswith(normalized_wake_word):
+                    # Smart extraction: find where wake word actually ends in original text
+                    command = extract_command_after_wake_word(transcript, wake_word)
+                    data['command'] = command
+                    data['original_transcript'] = transcript
+                    logger.debug(f"Wake word '{wake_word}' detected. Original: '{transcript}', Command: '{command}'")
+                    return True
+
+            return False
+
+        elif condition_type == 'conditional':
+            # Future: Custom condition checking
+            return True
+
+        return False
+
+    async def _execute_plugin(
+        self,
+        plugin: BasePlugin,
+        event: str,
+        context: PluginContext
+    ) -> Optional[PluginResult]:
+        """Execute plugin method for specified event"""
+        # Map events to plugin callback methods
+        if event.startswith('transcript.'):
+            return await plugin.on_transcript(context)
+        elif event.startswith('conversation.'):
+            return await plugin.on_conversation_complete(context)
+        elif event.startswith('memory.'):
+            return await plugin.on_memory_processed(context)
+
+        return None
+
+    async def cleanup_all(self):
+        """Clean up all registered plugins"""
+        for plugin_id, plugin in self.plugins.items():
+            try:
+                await plugin.cleanup()
+                logger.info(f"Cleaned up plugin '{plugin_id}'")
+            except Exception as e:
+                logger.error(f"Error cleaning up plugin '{plugin_id}': {e}")
diff --git a/backends/advanced/src/advanced_omi_backend/plugins/test_event/__init__.py b/backends/advanced/src/advanced_omi_backend/plugins/test_event/__init__.py
new file mode 100644
index 00000000..5f3f2ecf
--- /dev/null
+++ b/backends/advanced/src/advanced_omi_backend/plugins/test_event/__init__.py
@@ -0,0 +1,5 @@
+"""Test Event Plugin for integration testing"""
+
+from .plugin import TestEventPlugin
+
+__all__ = ['TestEventPlugin']
diff --git a/backends/advanced/src/advanced_omi_backend/plugins/test_event/config.yml b/backends/advanced/src/advanced_omi_backend/plugins/test_event/config.yml
new file mode 100644
index 00000000..8b4f776b
--- /dev/null
+++ b/backends/advanced/src/advanced_omi_backend/plugins/test_event/config.yml
@@ -0,0 +1,9 @@
+# Test Event Plugin Configuration
+#
+# This plugin is for development and integration testing only.
+# It logs all plugin events to SQLite for verification.
+#
+# Enable in config/plugins.yml by setting enabled: true
+
+# Database path for event storage
+db_path: ${TEST_PLUGIN_DB_PATH:-/app/debug/test_plugin_events.db}
diff --git a/backends/advanced/src/advanced_omi_backend/plugins/test_event/event_storage.py b/backends/advanced/src/advanced_omi_backend/plugins/test_event/event_storage.py
new file mode 100644
index 00000000..4fb618f9
--- /dev/null
+++ b/backends/advanced/src/advanced_omi_backend/plugins/test_event/event_storage.py
@@ -0,0 +1,323 @@
+"""
+Event storage module for test plugin using SQLite.
+
+Provides async SQLite operations for logging and querying plugin events.
+"""
+import json
+import logging
+import os
+from datetime import datetime
+from pathlib import Path
+from typing import Any, Dict, List, Optional
+
+import aiosqlite
+
+logger = logging.getLogger(__name__)
+
+
+class EventStorage:
+    """SQLite-based event storage for test plugin"""
+
+    def __init__(self, db_path: str = "/app/debug/test_plugin_events.db"):
+        self.db_path = db_path
+        self.db: Optional[aiosqlite.Connection] = None
+
+    async def initialize(self):
+        """Initialize database and create tables"""
+        # Ensure directory exists
+        logger.info(f"🔍 DEBUG: Initializing event storage with db_path={self.db_path}")
+
+        db_dir = Path(self.db_path).parent
+        logger.info(f"🔍 DEBUG: Database directory: {db_dir}")
+        logger.info(f"🔍 DEBUG: Directory exists before mkdir: {db_dir.exists()}")
+
+        try:
+            db_dir.mkdir(parents=True, exist_ok=True)
+            logger.info(f"🔍 DEBUG: Directory created/verified: {db_dir}")
+            logger.info(f"🔍 DEBUG: Directory permissions: {oct(db_dir.stat().st_mode)}")
+        except Exception as e:
+            logger.error(f"🔍 DEBUG: Failed to create directory: {e}")
+            raise
+
+        logger.info(f"🔍 DEBUG: Attempting to connect to SQLite database...")
+        try:
+            self.db = await aiosqlite.connect(self.db_path)
+            logger.info(f"🔍 DEBUG: Successfully connected to database")
+
+            # Enable WAL mode for better concurrent access (allows concurrent reads/writes)
+            # This fixes the "readonly database" error when Robot tests access from host
+            await self.db.execute("PRAGMA journal_mode=WAL")
+            await self.db.execute("PRAGMA busy_timeout=5000")  # Wait up to 5s for locks
+            logger.info(f"✓ Enabled WAL mode for concurrent access")
+
+            # Set file permissions to 666 so host user can write (container runs as root)
+            # Robot tests run as host user and need write access to the database
+            try:
+                os.chmod(self.db_path, 0o666)
+                # Also set permissions on WAL and SHM files if they exist
+                wal_file = f"{self.db_path}-wal"
+                shm_file = f"{self.db_path}-shm"
+                if os.path.exists(wal_file):
+                    os.chmod(wal_file, 0o666)
+                if os.path.exists(shm_file):
+                    os.chmod(shm_file, 0o666)
+                logger.info(f"✓ Set database file permissions to 666 for host access")
+            except Exception as perm_error:
+                logger.warning(f"Could not set database permissions: {perm_error}")
+
+        except Exception as e:
+            logger.error(f"🔍 DEBUG: Failed to connect to database: {e}")
+            logger.error(f"🔍 DEBUG: Database file exists: {Path(self.db_path).exists()}")
+            if Path(self.db_path).exists():
+                logger.error(f"🔍 DEBUG: Database file permissions: {oct(Path(self.db_path).stat().st_mode)}")
+            raise
+
+        # Create events table
+        await self.db.execute("""
+            CREATE TABLE IF NOT EXISTS plugin_events (
+                id INTEGER PRIMARY KEY AUTOINCREMENT,
+                timestamp DATETIME NOT NULL,
+                event TEXT NOT NULL,
+                user_id TEXT NOT NULL,
+                data TEXT NOT NULL,
+                metadata TEXT,
+                created_at DATETIME DEFAULT CURRENT_TIMESTAMP
+            )
+        """)
+
+        # Create index for faster queries
+        await self.db.execute("""
+            CREATE INDEX IF NOT EXISTS idx_event_type
+            ON plugin_events(event)
+        """)
+
+        await self.db.execute("""
+            CREATE INDEX IF NOT EXISTS idx_user_id
+            ON plugin_events(user_id)
+        """)
+
+        await self.db.commit()
+        logger.info(f"Event storage initialized at {self.db_path}")
+
+    async def log_event(
+        self,
+        event: str,
+        user_id: str,
+        data: Dict[str, Any],
+        metadata: Optional[Dict[str, Any]] = None
+    ) -> int:
+        """
+        Log an event to the database.
+
+        Args:
+            event: Event name (e.g., 'transcript.batch')
+            user_id: User ID from context
+            data: Event data dictionary
+            metadata: Optional metadata dictionary
+
+        Returns:
+            Row ID of inserted event
+        """
+        # Add at start
+        logger.debug(f"💾 STORAGE: Logging event '{event}' for user {user_id}")
+
+        if not self.db:
+            logger.error("💾 STORAGE: Database connection not initialized!")
+            raise RuntimeError("Event storage not initialized")
+
+        timestamp = datetime.utcnow().isoformat()
+
+        # Add before serialization
+        logger.debug(f"💾 STORAGE: Serializing event data...")
+        try:
+            data_json = json.dumps(data)
+            metadata_json = json.dumps(metadata) if metadata else None
+        except Exception as e:
+            logger.error(
+                f"💾 STORAGE: JSON serialization failed for event '{event}': {e}",
+                exc_info=True
+            )
+            raise
+
+        # Add before database operation
+        logger.debug(f"💾 STORAGE: Inserting into plugin_events table...")
+
+        try:
+            cursor = await self.db.execute(
+                """
+                INSERT INTO plugin_events (timestamp, event, user_id, data, metadata)
+                VALUES (?, ?, ?, ?, ?)
+                """,
+                (timestamp, event, user_id, data_json, metadata_json)
+            )
+
+            await self.db.commit()
+            row_id = cursor.lastrowid
+
+            # Add success log
+            logger.info(
+                f"💾 STORAGE: Event '{event}' inserted successfully (row_id={row_id})"
+            )
+
+            return row_id
+
+        except Exception as e:
+            logger.error(
+                f"💾 STORAGE: Database operation failed for event '{event}': {e}",
+                exc_info=True
+            )
+            raise
+
+    async def get_events_by_type(self, event: str) -> List[Dict[str, Any]]:
+        """
+        Query events by event type.
+
+        Args:
+            event: Event name to filter by
+
+        Returns:
+            List of event dictionaries
+        """
+        if not self.db:
+            raise RuntimeError("Event storage not initialized")
+
+        cursor = await self.db.execute(
+            """
+            SELECT id, timestamp, event, user_id, data, metadata, created_at
+            FROM plugin_events
+            WHERE event = ?
+            ORDER BY created_at DESC
+            """,
+            (event,)
+        )
+
+        rows = await cursor.fetchall()
+        return self._rows_to_dicts(rows)
+
+    async def get_events_by_user(self, user_id: str) -> List[Dict[str, Any]]:
+        """
+        Query events by user ID.
+
+        Args:
+            user_id: User ID to filter by
+
+        Returns:
+            List of event dictionaries
+        """
+        if not self.db:
+            raise RuntimeError("Event storage not initialized")
+
+        cursor = await self.db.execute(
+            """
+            SELECT id, timestamp, event, user_id, data, metadata, created_at
+            FROM plugin_events
+            WHERE user_id = ?
+            ORDER BY created_at DESC
+            """,
+            (user_id,)
+        )
+
+        rows = await cursor.fetchall()
+        return self._rows_to_dicts(rows)
+
+    async def get_all_events(self) -> List[Dict[str, Any]]:
+        """
+        Get all logged events.
+
+        Returns:
+            List of all event dictionaries
+        """
+        if not self.db:
+            raise RuntimeError("Event storage not initialized")
+
+        cursor = await self.db.execute(
+            """
+            SELECT id, timestamp, event, user_id, data, metadata, created_at
+            FROM plugin_events
+            ORDER BY created_at DESC
+            """
+        )
+
+        rows = await cursor.fetchall()
+        return self._rows_to_dicts(rows)
+
+    async def clear_events(self) -> int:
+        """
+        Clear all events from the database.
+
+        Returns:
+            Number of rows deleted
+        """
+        if not self.db:
+            raise RuntimeError("Event storage not initialized")
+
+        cursor = await self.db.execute("DELETE FROM plugin_events")
+        await self.db.commit()
+
+        deleted = cursor.rowcount
+        logger.info(f"Cleared {deleted} events from database")
+
+        return deleted
+
+    async def get_event_count(self, event: Optional[str] = None) -> int:
+        """
+        Get count of events.
+
+        Args:
+            event: Optional event type to filter by
+
+        Returns:
+            Count of matching events
+        """
+        if not self.db:
+            raise RuntimeError("Event storage not initialized")
+
+        if event:
+            cursor = await self.db.execute(
+                "SELECT COUNT(*) FROM plugin_events WHERE event = ?",
+                (event,)
+            )
+        else:
+            cursor = await self.db.execute(
+                "SELECT COUNT(*) FROM plugin_events"
+            )
+
+        row = await cursor.fetchone()
+        return row[0] if row else 0
+
+    def _rows_to_dicts(self, rows: List[tuple]) -> List[Dict[str, Any]]:
+        """
+        Convert database rows to dictionaries.
+
+        Args:
+            rows: List of database row tuples
+
+        Returns:
+            List of event dictionaries
+        """
+        events = []
+
+        for row in rows:
+            event_dict = {
+                'id': row[0],
+                'timestamp': row[1],
+                'event': row[2],
+                'user_id': row[3],
+                'data': json.loads(row[4]) if row[4] else {},
+                'metadata': json.loads(row[5]) if row[5] else {},
+                'created_at': row[6]
+            }
+
+            # Flatten data fields to top level for easier access in tests
+            if isinstance(event_dict['data'], dict):
+                event_dict.update(event_dict['data'])
+
+            events.append(event_dict)
+
+        return events
+
+    async def cleanup(self):
+        """Close database connection"""
+        if self.db:
+            await self.db.close()
+            logger.info("Event storage connection closed")
diff --git a/backends/advanced/src/advanced_omi_backend/plugins/test_event/plugin.py b/backends/advanced/src/advanced_omi_backend/plugins/test_event/plugin.py
new file mode 100644
index 00000000..59dd652e
--- /dev/null
+++ b/backends/advanced/src/advanced_omi_backend/plugins/test_event/plugin.py
@@ -0,0 +1,232 @@
+"""
+Test Event Plugin
+
+Logs all plugin events to SQLite database for integration testing.
+Subscribes to all event types to verify event dispatch system works correctly.
+"""
+import logging
+from typing import Any, Dict, List, Optional
+
+from advanced_omi_backend.plugins.base import BasePlugin, PluginContext, PluginResult
+from .event_storage import EventStorage
+
+logger = logging.getLogger(__name__)
+
+
+class TestEventPlugin(BasePlugin):
+    """
+    Test plugin that logs all events for verification.
+
+    Subscribes to:
+    - transcript.streaming: Real-time WebSocket transcription
+    - transcript.batch: File upload batch transcription
+    - conversation.complete: Conversation processing complete
+    - memory.processed: Memory extraction complete
+
+    All events are logged to SQLite database with full context for test verification.
+    """
+
+    SUPPORTED_ACCESS_LEVELS: List[str] = ['transcript', 'conversation', 'memory']
+
+    def __init__(self, config: Dict[str, Any]):
+        super().__init__(config)
+        self.storage = EventStorage(
+            db_path=config.get('db_path', '/app/debug/test_plugin_events.db')
+        )
+        self.event_count = 0
+
+    async def initialize(self):
+        """Initialize the test plugin and event storage"""
+        try:
+            await self.storage.initialize()
+            logger.info("✅ Test Event Plugin initialized successfully")
+        except Exception as e:
+            logger.error(f"❌ Failed to initialize Test Event Plugin: {e}")
+            raise
+
+    async def on_transcript(self, context: PluginContext) -> Optional[PluginResult]:
+        """
+        Log transcript events (streaming or batch).
+
+        Context data contains:
+        - transcript: str - The transcript text
+        - conversation_id: str - Conversation ID
+        - For streaming: is_final, confidence, words, segments
+        - For batch: word_count, segments
+
+        Args:
+            context: Plugin context with event data
+
+        Returns:
+            PluginResult indicating success
+        """
+        try:
+            # Determine which transcript event this is based on context.event
+            event_type = context.event  # 'transcript.streaming' or 'transcript.batch'
+
+            # Extract key data fields
+            transcript = context.data.get('transcript', '')
+            conversation_id = context.data.get('conversation_id', 'unknown')
+
+            # Log to storage
+            row_id = await self.storage.log_event(
+                event=event_type,
+                user_id=context.user_id,
+                data=context.data,
+                metadata=context.metadata
+            )
+
+            self.event_count += 1
+
+            logger.info(
+                f"📝 Logged {event_type} event (row_id={row_id}): "
+                f"user={context.user_id}, "
+                f"conversation={conversation_id}, "
+                f"transcript='{transcript[:50]}...'"
+            )
+
+            return PluginResult(
+                success=True,
+                message=f"Transcript event logged (row_id={row_id})",
+                should_continue=True  # Don't block normal processing
+            )
+
+        except Exception as e:
+            logger.error(f"Error logging transcript event: {e}", exc_info=True)
+            return PluginResult(
+                success=False,
+                message=f"Failed to log transcript event: {e}",
+                should_continue=True
+            )
+
+    async def on_conversation_complete(self, context: PluginContext) -> Optional[PluginResult]:
+        """
+        Log conversation completion events.
+
+        Context data contains:
+        - conversation: dict - Full conversation data
+        - transcript: str - Complete conversation transcript
+        - duration: float - Conversation duration
+        - conversation_id: str - Conversation identifier
+
+        Args:
+            context: Plugin context with event data
+
+        Returns:
+            PluginResult indicating success
+        """
+        conversation_id = context.data.get('conversation_id', 'unknown')
+        duration = context.data.get('duration', 0)
+
+        # Add at start
+        logger.info(
+            f"📝 HANDLER: on_conversation_complete called for {conversation_id[:12]}"
+        )
+        logger.debug(f"   Event: {context.event}")
+        logger.debug(f"   Metadata: {context.metadata}")
+        logger.debug(f"   Duration: {duration}s")
+
+        try:
+            # Add before storage
+            logger.info(f"   💾 Storing event to SQLite database...")
+
+            row_id = await self.storage.log_event(
+                event=context.event,  # 'conversation.complete'
+                user_id=context.user_id,
+                data=context.data,
+                metadata=context.metadata
+            )
+
+            # Add after storage
+            logger.info(f"   ✓ Event stored successfully (row_id={row_id})")
+
+            self.event_count += 1
+
+            return PluginResult(
+                success=True,
+                message=f"Conversation event logged (row_id={row_id})",
+                data={"row_id": row_id},
+                should_continue=True,
+            )
+
+        except Exception as e:
+            # Enhance error logging
+            logger.error(
+                f"   ✗ Storage FAILED for {conversation_id[:12]}: {e}",
+                exc_info=True
+            )
+            return PluginResult(
+                success=False,
+                message=f"Failed to log conversation event: {e}",
+                should_continue=True,
+            )
+
+    async def on_memory_processed(self, context: PluginContext) -> Optional[PluginResult]:
+        """
+        Log memory processing events.
+
+        Context data contains:
+        - memories: list - Extracted memories
+        - conversation: dict - Source conversation
+        - memory_count: int - Number of memories created
+        - conversation_id: str - Conversation identifier
+
+        Metadata contains:
+        - processing_time: float - Time spent processing
+        - memory_provider: str - Provider name
+
+        Args:
+            context: Plugin context with event data
+
+        Returns:
+            PluginResult indicating success
+        """
+        try:
+            conversation_id = context.data.get('conversation_id', 'unknown')
+            memory_count = context.data.get('memory_count', 0)
+            memory_provider = context.metadata.get('memory_provider', 'unknown')
+            processing_time = context.metadata.get('processing_time', 0)
+
+            # Log to storage
+            row_id = await self.storage.log_event(
+                event=context.event,  # 'memory.processed'
+                user_id=context.user_id,
+                data=context.data,
+                metadata=context.metadata
+            )
+
+            self.event_count += 1
+
+            logger.info(
+                f"📝 Logged memory.processed event (row_id={row_id}): "
+                f"user={context.user_id}, "
+                f"conversation={conversation_id}, "
+                f"memory_count={memory_count}, "
+                f"provider={memory_provider}, "
+                f"processing_time={processing_time:.2f}s"
+            )
+
+            return PluginResult(
+                success=True,
+                message=f"Memory event logged (row_id={row_id})",
+                should_continue=True
+            )
+
+        except Exception as e:
+            logger.error(f"Error logging memory event: {e}", exc_info=True)
+            return PluginResult(
+                success=False,
+                message=f"Failed to log memory event: {e}",
+                should_continue=True
+            )
+
+    async def cleanup(self):
+        """Clean up plugin resources"""
+        try:
+            logger.info(
+                f"🧹 Test Event Plugin shutting down. "
+                f"Logged {self.event_count} total events"
+            )
+            await self.storage.cleanup()
+        except Exception as e:
+            logger.error(f"Error during test plugin cleanup: {e}")
diff --git a/backends/advanced/src/advanced_omi_backend/routers/api_router.py b/backends/advanced/src/advanced_omi_backend/routers/api_router.py
index 9e761f8e..5a135c7e 100644
--- a/backends/advanced/src/advanced_omi_backend/routers/api_router.py
+++ b/backends/advanced/src/advanced_omi_backend/routers/api_router.py
@@ -6,14 +6,18 @@
 """
 
 import logging
+import os
 
 from fastapi import APIRouter
 
 from .modules import (
+    admin_router,
+    annotation_router,
     audio_router,
     chat_router,
     client_router,
     conversation_router,
+    finetuning_router,
     memory_router,
     obsidian_router,
     queue_router,
@@ -29,16 +33,27 @@
 router = APIRouter(prefix="/api", tags=["api"])
 
 # Include all sub-routers
+router.include_router(admin_router)
+router.include_router(annotation_router)
 router.include_router(audio_router)
 router.include_router(user_router)
 router.include_router(chat_router)
 router.include_router(client_router)
 router.include_router(conversation_router)
+router.include_router(finetuning_router)
 router.include_router(memory_router)
 router.include_router(obsidian_router)
 router.include_router(system_router)
 router.include_router(queue_router)
 router.include_router(health_router)  # Also include under /api for frontend compatibility
 
+# Conditionally include test routes (only in test environments)
+if os.getenv("DEBUG_DIR"):
+    try:
+        from .modules.test_routes import router as test_router
+        router.include_router(test_router)
+        logger.info("✅ Test routes loaded (test environment detected)")
+    except Exception as e:
+        logger.error(f"Error loading test routes: {e}", exc_info=True)
 
 logger.info("API router initialized with all sub-modules")
diff --git a/backends/advanced/src/advanced_omi_backend/routers/modules/__init__.py b/backends/advanced/src/advanced_omi_backend/routers/modules/__init__.py
index 21f89991..4025a6dc 100644
--- a/backends/advanced/src/advanced_omi_backend/routers/modules/__init__.py
+++ b/backends/advanced/src/advanced_omi_backend/routers/modules/__init__.py
@@ -7,17 +7,23 @@
 - client_routes: Active client monitoring and management
 - conversation_routes: Conversation CRUD and audio processing
 - memory_routes: Memory management, search, and debug
+- annotation_routes: Annotation CRUD for memories and transcripts
+- finetuning_routes: Model fine-tuning and training management
 - system_routes: System utilities and metrics
 - queue_routes: Job queue management and monitoring
 - audio_routes: Audio file uploads and processing
 - health_routes: Health check endpoints
 - websocket_routes: WebSocket connection handling
+- admin_routes: Admin-only system management endpoints
 """
 
+from .admin_routes import router as admin_router
+from .annotation_routes import router as annotation_router
 from .audio_routes import router as audio_router
 from .chat_routes import router as chat_router
 from .client_routes import router as client_router
 from .conversation_routes import router as conversation_router
+from .finetuning_routes import router as finetuning_router
 from .health_routes import router as health_router
 from .memory_routes import router as memory_router
 from .obsidian_routes import router as obsidian_router
@@ -27,10 +33,13 @@
 from .websocket_routes import router as websocket_router
 
 __all__ = [
+   "admin_router",
+   "annotation_router",
    "audio_router",
    "chat_router",
    "client_router",
    "conversation_router",
+   "finetuning_router",
    "health_router",
    "memory_router",
    "obsidian_router",
diff --git a/backends/advanced/src/advanced_omi_backend/routers/modules/admin_routes.py b/backends/advanced/src/advanced_omi_backend/routers/modules/admin_routes.py
new file mode 100644
index 00000000..a2ef4398
--- /dev/null
+++ b/backends/advanced/src/advanced_omi_backend/routers/modules/admin_routes.py
@@ -0,0 +1,122 @@
+"""
+Admin routes for Chronicle API.
+
+Provides admin-only endpoints for system management and cleanup operations.
+"""
+
+import logging
+from typing import Optional
+
+from fastapi import APIRouter, Depends, Query, HTTPException
+from fastapi.responses import JSONResponse
+
+from advanced_omi_backend.auth import current_active_user
+from advanced_omi_backend.users import User
+
+logger = logging.getLogger(__name__)
+
+router = APIRouter(prefix="/admin", tags=["admin"])
+
+
+def require_admin(current_user: User = Depends(current_active_user)) -> User:
+    """Dependency to require admin/superuser permissions."""
+    if not current_user.is_superuser:
+        raise HTTPException(
+            status_code=403,
+            detail="Admin permissions required"
+        )
+    return current_user
+
+
+@router.get("/cleanup/settings")
+async def get_cleanup_settings_admin(
+    admin: User = Depends(require_admin)
+):
+    """Get current cleanup settings (admin only)."""
+    from advanced_omi_backend.config import get_cleanup_settings
+
+    settings = get_cleanup_settings()
+    return {
+        **settings,
+        "note": "Cleanup settings are stored in /app/data/cleanup_config.json"
+    }
+
+
+@router.post("/cleanup")
+async def trigger_cleanup(
+    dry_run: bool = Query(False, description="Preview what would be deleted"),
+    retention_days: Optional[int] = Query(None, description="Override retention period"),
+    admin: User = Depends(require_admin)
+):
+    """Manually trigger cleanup of soft-deleted conversations (admin only)."""
+    try:
+        from advanced_omi_backend.workers.cleanup_jobs import purge_old_deleted_conversations
+        from advanced_omi_backend.controllers.queue_controller import get_queue
+
+        # Enqueue cleanup job
+        queue = get_queue("default")
+        job = queue.enqueue(
+            purge_old_deleted_conversations,
+            retention_days=retention_days,  # Will use config default if None
+            dry_run=dry_run,
+            job_timeout="30m",
+        )
+
+        logger.info(f"Admin {admin.email} triggered cleanup job {job.id} (dry_run={dry_run}, retention={retention_days or 'default'})")
+
+        return JSONResponse(
+            status_code=200,
+            content={
+                "message": f"Cleanup job {'(dry run) ' if dry_run else ''}queued successfully",
+                "job_id": job.id,
+                "retention_days": retention_days or "default (from config)",
+                "dry_run": dry_run,
+                "note": "Check job status at /api/queue/jobs/{job_id}"
+            }
+        )
+
+    except Exception as e:
+        logger.error(f"Failed to trigger cleanup: {e}")
+        return JSONResponse(
+            status_code=500,
+            content={"error": f"Failed to trigger cleanup: {str(e)}"}
+        )
+
+
+@router.get("/cleanup/preview")
+async def preview_cleanup(
+    retention_days: Optional[int] = Query(None, description="Preview with specific retention period"),
+    admin: User = Depends(require_admin)
+):
+    """Preview what would be deleted by cleanup (admin only)."""
+    try:
+        from advanced_omi_backend.config import get_cleanup_settings
+        from advanced_omi_backend.models.conversation import Conversation
+        from datetime import datetime, timedelta
+
+        # Use provided retention or default from config
+        if retention_days is None:
+            settings_dict = get_cleanup_settings()
+            retention_days = settings_dict['retention_days']
+
+        cutoff_date = datetime.utcnow() - timedelta(days=retention_days)
+
+        # Count conversations that would be deleted
+        count = await Conversation.find(
+            Conversation.deleted == True,
+            Conversation.deleted_at < cutoff_date
+        ).count()
+
+        return {
+            "retention_days": retention_days,
+            "cutoff_date": cutoff_date.isoformat(),
+            "conversations_to_delete": count,
+            "note": f"Conversations deleted before {cutoff_date.date()} would be purged"
+        }
+
+    except Exception as e:
+        logger.error(f"Failed to preview cleanup: {e}")
+        return JSONResponse(
+            status_code=500,
+            content={"error": f"Failed to preview cleanup: {str(e)}"}
+        )
diff --git a/backends/advanced/src/advanced_omi_backend/routers/modules/annotation_routes.py b/backends/advanced/src/advanced_omi_backend/routers/modules/annotation_routes.py
new file mode 100644
index 00000000..ebee7634
--- /dev/null
+++ b/backends/advanced/src/advanced_omi_backend/routers/modules/annotation_routes.py
@@ -0,0 +1,635 @@
+"""
+Annotation routes for Chronicle API.
+
+Handles annotation CRUD operations for memories and transcripts.
+Supports both user edits and AI-powered suggestions.
+"""
+
+import logging
+from datetime import datetime, timezone
+from typing import List
+
+from fastapi import APIRouter, Depends, HTTPException
+from fastapi.responses import JSONResponse
+
+from advanced_omi_backend.auth import current_active_user
+from advanced_omi_backend.models.annotation import (
+    Annotation,
+    AnnotationResponse,
+    AnnotationStatus,
+    AnnotationType,
+    DiarizationAnnotationCreate,
+    MemoryAnnotationCreate,
+    TranscriptAnnotationCreate,
+)
+from advanced_omi_backend.models.conversation import Conversation
+from advanced_omi_backend.services.memory import get_memory_service
+from advanced_omi_backend.users import User
+
+logger = logging.getLogger(__name__)
+
+router = APIRouter(prefix="/annotations", tags=["annotations"])
+
+
+@router.post("/memory", response_model=AnnotationResponse)
+async def create_memory_annotation(
+    annotation_data: MemoryAnnotationCreate,
+    current_user: User = Depends(current_active_user),
+):
+    """
+    Create annotation for memory edit.
+
+    - Validates user owns memory
+    - Creates annotation record
+    - Updates memory content in vector store
+    - Re-embeds if content changed
+    """
+    try:
+        memory_service = get_memory_service()
+
+        # Verify memory ownership
+        try:
+            memory = await memory_service.get_memory(
+                annotation_data.memory_id, current_user.user_id
+            )
+            if not memory:
+                raise HTTPException(status_code=404, detail="Memory not found")
+        except Exception as e:
+            logger.error(f"Error fetching memory: {e}")
+            raise HTTPException(status_code=404, detail="Memory not found")
+
+        # Create annotation
+        annotation = Annotation(
+            annotation_type=AnnotationType.MEMORY,
+            user_id=current_user.user_id,
+            memory_id=annotation_data.memory_id,
+            original_text=annotation_data.original_text,
+            corrected_text=annotation_data.corrected_text,
+            status=annotation_data.status,
+        )
+        await annotation.save()
+        logger.info(
+            f"Created memory annotation {annotation.id} for memory {annotation_data.memory_id}"
+        )
+
+        # Update memory content if accepted
+        if annotation.status == AnnotationStatus.ACCEPTED:
+            try:
+                await memory_service.update_memory(
+                    memory_id=annotation_data.memory_id,
+                    content=annotation_data.corrected_text,
+                    user_id=current_user.user_id,
+                )
+                logger.info(
+                    f"Updated memory {annotation_data.memory_id} with corrected text"
+                )
+            except Exception as e:
+                logger.error(f"Error updating memory: {e}")
+                # Annotation is saved, but memory update failed - log but don't fail the request
+                logger.warning(
+                    f"Memory annotation {annotation.id} saved but memory update failed"
+                )
+
+        return AnnotationResponse.model_validate(annotation)
+
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error(f"Error creating memory annotation: {e}", exc_info=True)
+        raise HTTPException(
+            status_code=500,
+            detail=f"Failed to create memory annotation: {str(e)}",
+        )
+
+
+@router.post("/transcript", response_model=AnnotationResponse)
+async def create_transcript_annotation(
+    annotation_data: TranscriptAnnotationCreate,
+    current_user: User = Depends(current_active_user),
+):
+    """
+    Create annotation for transcript segment edit.
+
+    - Validates user owns conversation
+    - Creates annotation record (NOT applied to transcript yet)
+    - Annotation is marked as unprocessed (processed=False)
+    - Visual indication in UI (pending badge)
+    - Use unified apply endpoint to apply all annotations together
+    """
+    try:
+        # Verify conversation ownership
+        conversation = await Conversation.find_one(
+            Conversation.conversation_id == annotation_data.conversation_id,
+            Conversation.user_id == current_user.user_id,
+        )
+        if not conversation:
+            raise HTTPException(status_code=404, detail="Conversation not found")
+
+        # Validate segment index
+        active_transcript = conversation.active_transcript
+        if (
+            not active_transcript
+            or annotation_data.segment_index >= len(active_transcript.segments)
+        ):
+            raise HTTPException(status_code=400, detail="Invalid segment index")
+
+        segment = active_transcript.segments[annotation_data.segment_index]
+
+        # Create annotation (NOT applied yet)
+        annotation = Annotation(
+            annotation_type=AnnotationType.TRANSCRIPT,
+            user_id=current_user.user_id,
+            conversation_id=annotation_data.conversation_id,
+            segment_index=annotation_data.segment_index,
+            original_text=segment.text,  # Use current segment text
+            corrected_text=annotation_data.corrected_text,
+            status=AnnotationStatus.PENDING,  # Changed from ACCEPTED
+            processed=False,  # Not applied yet
+        )
+        await annotation.save()
+        logger.info(
+            f"Created transcript annotation {annotation.id} for conversation {annotation_data.conversation_id} segment {annotation_data.segment_index}"
+        )
+
+        # Do NOT modify transcript immediately
+        # Do NOT trigger memory reprocessing yet
+        # User must click "Apply Changes" button to apply all annotations together
+
+        return AnnotationResponse.model_validate(annotation)
+
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error(f"Error creating transcript annotation: {e}", exc_info=True)
+        raise HTTPException(
+            status_code=500,
+            detail=f"Failed to create transcript annotation: {str(e)}",
+        )
+
+
+@router.get("/memory/{memory_id}", response_model=List[AnnotationResponse])
+async def get_memory_annotations(
+    memory_id: str,
+    current_user: User = Depends(current_active_user),
+):
+    """Get all annotations for a memory."""
+    try:
+        annotations = await Annotation.find(
+            Annotation.annotation_type == AnnotationType.MEMORY,
+            Annotation.memory_id == memory_id,
+            Annotation.user_id == current_user.user_id,
+        ).to_list()
+
+        return [AnnotationResponse.model_validate(a) for a in annotations]
+
+    except Exception as e:
+        logger.error(f"Error fetching memory annotations: {e}", exc_info=True)
+        raise HTTPException(
+            status_code=500,
+            detail=f"Failed to fetch memory annotations: {str(e)}",
+        )
+
+
+@router.get("/transcript/{conversation_id}", response_model=List[AnnotationResponse])
+async def get_transcript_annotations(
+    conversation_id: str,
+    current_user: User = Depends(current_active_user),
+):
+    """Get all annotations for a conversation's transcript."""
+    try:
+        annotations = await Annotation.find(
+            Annotation.annotation_type == AnnotationType.TRANSCRIPT,
+            Annotation.conversation_id == conversation_id,
+            Annotation.user_id == current_user.user_id,
+        ).to_list()
+
+        return [AnnotationResponse.model_validate(a) for a in annotations]
+
+    except Exception as e:
+        logger.error(f"Error fetching transcript annotations: {e}", exc_info=True)
+        raise HTTPException(
+            status_code=500,
+            detail=f"Failed to fetch transcript annotations: {str(e)}",
+        )
+
+
+@router.patch("/{annotation_id}/status")
+async def update_annotation_status(
+    annotation_id: str,
+    status: AnnotationStatus,
+    current_user: User = Depends(current_active_user),
+):
+    """
+    Accept or reject AI-generated suggestions.
+
+    Used for pending model suggestions in the UI.
+    """
+    try:
+        annotation = await Annotation.find_one(
+            Annotation.id == annotation_id,
+            Annotation.user_id == current_user.user_id,
+        )
+        if not annotation:
+            raise HTTPException(status_code=404, detail="Annotation not found")
+
+        old_status = annotation.status
+        annotation.status = status
+        annotation.updated_at = datetime.now(timezone.utc)
+
+        # If accepting a pending suggestion, apply the correction
+        if (
+            status == AnnotationStatus.ACCEPTED
+            and old_status == AnnotationStatus.PENDING
+        ):
+            if annotation.is_memory_annotation():
+                # Update memory
+                try:
+                    memory_service = get_memory_service()
+                    await memory_service.update_memory(
+                        memory_id=annotation.memory_id,
+                        content=annotation.corrected_text,
+                        user_id=current_user.user_id,
+                    )
+                    logger.info(
+                        f"Applied suggestion to memory {annotation.memory_id}"
+                    )
+                except Exception as e:
+                    logger.error(f"Error applying memory suggestion: {e}")
+                    # Don't fail the status update if memory update fails
+            elif annotation.is_transcript_annotation():
+                # Update transcript segment
+                try:
+                    conversation = await Conversation.find_one(
+                        Conversation.conversation_id == annotation.conversation_id,
+                        Conversation.user_id == annotation.user_id
+                    )
+                    if conversation:
+                        transcript = conversation.active_transcript
+                        if (
+                            transcript
+                            and annotation.segment_index < len(transcript.segments)
+                        ):
+                            transcript.segments[
+                                annotation.segment_index
+                            ].text = annotation.corrected_text
+                            await conversation.save()
+                            logger.info(
+                                f"Applied suggestion to transcript segment {annotation.segment_index}"
+                            )
+                except Exception as e:
+                    logger.error(f"Error applying transcript suggestion: {e}")
+                    # Don't fail the status update if segment update fails
+
+        await annotation.save()
+        logger.info(f"Updated annotation {annotation_id} status to {status}")
+
+        return {"status": "updated", "annotation_id": annotation_id, "new_status": status}
+
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error(f"Error updating annotation status: {e}", exc_info=True)
+        raise HTTPException(
+            status_code=500,
+            detail=f"Failed to update annotation status: {str(e)}",
+        )
+
+
+# === Diarization Annotation Routes ===
+
+
+@router.post("/diarization", response_model=AnnotationResponse)
+async def create_diarization_annotation(
+    annotation_data: DiarizationAnnotationCreate,
+    current_user: User = Depends(current_active_user),
+):
+    """
+    Create annotation for speaker identification correction.
+
+    - Validates user owns conversation
+    - Creates annotation record (NOT applied to transcript yet)
+    - Annotation is marked as unprocessed (processed=False)
+    - Visual indication in UI (strikethrough + corrected name)
+    """
+    try:
+        # Verify conversation ownership
+        conversation = await Conversation.find_one(
+            Conversation.conversation_id == annotation_data.conversation_id,
+            Conversation.user_id == current_user.user_id,
+        )
+        if not conversation:
+            raise HTTPException(status_code=404, detail="Conversation not found")
+
+        # Validate segment index
+        active_transcript = conversation.active_transcript
+        if (
+            not active_transcript
+            or annotation_data.segment_index >= len(active_transcript.segments)
+        ):
+            raise HTTPException(status_code=400, detail="Invalid segment index")
+
+        # Create annotation (NOT applied yet)
+        annotation = Annotation(
+            annotation_type=AnnotationType.DIARIZATION,
+            user_id=current_user.user_id,
+            conversation_id=annotation_data.conversation_id,
+            segment_index=annotation_data.segment_index,
+            original_speaker=annotation_data.original_speaker,
+            corrected_speaker=annotation_data.corrected_speaker,
+            segment_start_time=annotation_data.segment_start_time,
+            original_text="",  # Not used for diarization
+            corrected_text="",  # Not used for diarization
+            status=annotation_data.status,
+            processed=False,  # Not applied or sent to training yet
+        )
+        await annotation.save()
+        logger.info(
+            f"Created diarization annotation {annotation.id} for conversation {annotation_data.conversation_id} segment {annotation_data.segment_index}"
+        )
+
+        return AnnotationResponse.model_validate(annotation)
+
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error(f"Error creating diarization annotation: {e}", exc_info=True)
+        raise HTTPException(
+            status_code=500,
+            detail=f"Failed to create diarization annotation: {str(e)}",
+        )
+
+
+@router.get("/diarization/{conversation_id}", response_model=List[AnnotationResponse])
+async def get_diarization_annotations(
+    conversation_id: str,
+    current_user: User = Depends(current_active_user),
+):
+    """Get all diarization annotations for a conversation."""
+    try:
+        annotations = await Annotation.find(
+            Annotation.annotation_type == AnnotationType.DIARIZATION,
+            Annotation.conversation_id == conversation_id,
+            Annotation.user_id == current_user.user_id,
+        ).to_list()
+
+        return [AnnotationResponse.model_validate(a) for a in annotations]
+
+    except Exception as e:
+        logger.error(f"Error fetching diarization annotations: {e}", exc_info=True)
+        raise HTTPException(
+            status_code=500,
+            detail=f"Failed to fetch diarization annotations: {str(e)}",
+        )
+
+
+@router.post("/diarization/{conversation_id}/apply")
+async def apply_diarization_annotations(
+    conversation_id: str,
+    current_user: User = Depends(current_active_user),
+):
+    """
+    Apply pending diarization annotations to create new transcript version.
+
+    - Finds all unprocessed diarization annotations for conversation
+    - Creates NEW transcript version with corrected speaker labels
+    - Marks annotations as processed (processed=True, processed_by="apply")
+    - Chains memory reprocessing since speaker changes affect meaning
+    - Returns job status with new version_id
+    """
+    try:
+        # Verify conversation ownership
+        conversation = await Conversation.find_one(
+            Conversation.conversation_id == conversation_id,
+            Conversation.user_id == current_user.user_id,
+        )
+        if not conversation:
+            raise HTTPException(status_code=404, detail="Conversation not found")
+
+        # Get unprocessed diarization annotations
+        annotations = await Annotation.find(
+            Annotation.annotation_type == AnnotationType.DIARIZATION,
+            Annotation.conversation_id == conversation_id,
+            Annotation.user_id == current_user.user_id,
+            Annotation.processed == False,  # Only unprocessed
+        ).to_list()
+
+        if not annotations:
+            return JSONResponse(
+                content={"message": "No pending annotations to apply", "applied_count": 0}
+            )
+
+        # Get active transcript version
+        active_transcript = conversation.active_transcript
+        if not active_transcript:
+            raise HTTPException(status_code=404, detail="No active transcript found")
+
+        # Create NEW transcript version with corrected speakers
+        import uuid
+        new_version_id = str(uuid.uuid4())
+
+        # Copy segments and apply corrections
+        corrected_segments = []
+        for segment_idx, segment in enumerate(active_transcript.segments):
+            # Find annotation for this segment index
+            annotation_for_segment = next(
+                (a for a in annotations if a.segment_index == segment_idx), None
+            )
+
+            if annotation_for_segment:
+                # Apply correction
+                corrected_segment = segment.model_copy()
+                corrected_segment.speaker = annotation_for_segment.corrected_speaker
+                corrected_segments.append(corrected_segment)
+            else:
+                # No correction, keep original
+                corrected_segments.append(segment.model_copy())
+
+        # Add new version
+        conversation.add_transcript_version(
+            version_id=new_version_id,
+            transcript=active_transcript.transcript,  # Same transcript text
+            words=active_transcript.words,  # Same word timings
+            segments=corrected_segments,  # Corrected speaker labels
+            provider=active_transcript.provider,
+            model=active_transcript.model,
+            processing_time_seconds=None,
+            metadata={
+                "reprocessing_type": "diarization_annotations",
+                "source_version_id": active_transcript.version_id,
+                "trigger": "manual_annotation_apply",
+                "applied_annotation_count": len(annotations),
+            },
+            set_as_active=True,
+        )
+
+        await conversation.save()
+        logger.info(
+            f"Created new transcript version {new_version_id} with {len(annotations)} diarization corrections"
+        )
+
+        # Mark annotations as processed
+        for annotation in annotations:
+            annotation.processed = True
+            annotation.processed_at = datetime.now(timezone.utc)
+            annotation.processed_by = "apply"
+            await annotation.save()
+
+        # Chain memory reprocessing
+        from advanced_omi_backend.models.job import JobPriority
+        from advanced_omi_backend.workers.memory_jobs import enqueue_memory_processing
+
+        enqueue_memory_processing(
+            client_id=conversation.client_id,
+            user_id=current_user.user_id,
+            user_email=current_user.email,
+            conversation_id=conversation_id,
+            priority=JobPriority.NORMAL,
+        )
+
+        return JSONResponse(content={
+            "message": "Diarization annotations applied",
+            "version_id": new_version_id,
+            "applied_count": len(annotations),
+            "status": "success"
+        })
+
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error(f"Error applying diarization annotations: {e}", exc_info=True)
+        raise HTTPException(
+            status_code=500,
+            detail=f"Failed to apply diarization annotations: {str(e)}",
+        )
+
+
+@router.post("/{conversation_id}/apply")
+async def apply_all_annotations(
+    conversation_id: str,
+    current_user: User = Depends(current_active_user),
+):
+    """
+    Apply all pending annotations (diarization + transcript) to create new version.
+
+    - Finds all unprocessed annotations (both DIARIZATION and TRANSCRIPT types)
+    - Creates ONE new transcript version with all changes applied
+    - Marks all annotations as processed
+    - Triggers memory reprocessing once
+    """
+    try:
+        # Verify conversation ownership
+        conversation = await Conversation.find_one(
+            Conversation.conversation_id == conversation_id,
+            Conversation.user_id == current_user.user_id,
+        )
+        if not conversation:
+            raise HTTPException(status_code=404, detail="Conversation not found")
+
+        # Get ALL unprocessed annotations (both types)
+        annotations = await Annotation.find(
+            Annotation.conversation_id == conversation_id,
+            Annotation.user_id == current_user.user_id,
+            Annotation.processed == False,
+        ).to_list()
+
+        if not annotations:
+            return JSONResponse(content={
+                "message": "No pending annotations to apply",
+                "diarization_count": 0,
+                "transcript_count": 0,
+            })
+
+        # Separate by type
+        diarization_annotations = [a for a in annotations if a.annotation_type == AnnotationType.DIARIZATION]
+        transcript_annotations = [a for a in annotations if a.annotation_type == AnnotationType.TRANSCRIPT]
+
+        # Get active transcript
+        active_transcript = conversation.active_transcript
+        if not active_transcript:
+            raise HTTPException(status_code=404, detail="No active transcript found")
+
+        # Create new version with ALL corrections applied
+        import uuid
+        new_version_id = str(uuid.uuid4())
+        corrected_segments = []
+
+        for segment_idx, segment in enumerate(active_transcript.segments):
+            corrected_segment = segment.model_copy()
+
+            # Apply diarization correction (if exists)
+            diar_annotation = next(
+                (a for a in diarization_annotations if a.segment_index == segment_idx),
+                None
+            )
+            if diar_annotation:
+                corrected_segment.speaker = diar_annotation.corrected_speaker
+
+            # Apply transcript correction (if exists)
+            transcript_annotation = next(
+                (a for a in transcript_annotations if a.segment_index == segment_idx),
+                None
+            )
+            if transcript_annotation:
+                corrected_segment.text = transcript_annotation.corrected_text
+
+            corrected_segments.append(corrected_segment)
+
+        # Add new version
+        conversation.add_transcript_version(
+            version_id=new_version_id,
+            transcript=active_transcript.transcript,
+            words=active_transcript.words,  # Preserved (may be misaligned for text edits)
+            segments=corrected_segments,
+            provider=active_transcript.provider,
+            model=active_transcript.model,
+            metadata={
+                "reprocessing_type": "unified_annotations",
+                "source_version_id": active_transcript.version_id,
+                "trigger": "manual_annotation_apply",
+                "diarization_count": len(diarization_annotations),
+                "transcript_count": len(transcript_annotations),
+            },
+            set_as_active=True,
+        )
+
+        await conversation.save()
+        logger.info(
+            f"Applied {len(annotations)} annotations (diarization: {len(diarization_annotations)}, transcript: {len(transcript_annotations)})"
+        )
+
+        # Mark all annotations as processed
+        for annotation in annotations:
+            annotation.processed = True
+            annotation.processed_at = datetime.now(timezone.utc)
+            annotation.processed_by = "apply"
+            annotation.status = AnnotationStatus.ACCEPTED
+            await annotation.save()
+
+        # Trigger memory reprocessing (once for all changes)
+        from advanced_omi_backend.models.job import JobPriority
+        from advanced_omi_backend.workers.memory_jobs import enqueue_memory_processing
+
+        enqueue_memory_processing(
+            client_id=conversation.client_id,
+            user_id=current_user.user_id,
+            user_email=current_user.email,
+            conversation_id=conversation_id,
+            priority=JobPriority.NORMAL,
+        )
+
+        return JSONResponse(content={
+            "message": f"Applied {len(diarization_annotations)} diarization and {len(transcript_annotations)} transcript annotations",
+            "version_id": new_version_id,
+            "diarization_count": len(diarization_annotations),
+            "transcript_count": len(transcript_annotations),
+            "status": "success",
+        })
+
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error(f"Error applying annotations: {e}", exc_info=True)
+        raise HTTPException(
+            status_code=500,
+            detail=f"Failed to apply annotations: {str(e)}",
+        )
diff --git a/backends/advanced/src/advanced_omi_backend/routers/modules/audio_routes.py b/backends/advanced/src/advanced_omi_backend/routers/modules/audio_routes.py
index 056e7667..7cef955a 100644
--- a/backends/advanced/src/advanced_omi_backend/routers/modules/audio_routes.py
+++ b/backends/advanced/src/advanced_omi_backend/routers/modules/audio_routes.py
@@ -2,17 +2,26 @@
 Audio file upload and serving routes.
 
 Handles audio file uploads, processing job management, and audio file serving.
+Audio is served from MongoDB chunks with Opus compression.
 """
 
+import io
 from typing import Optional
-from fastapi import APIRouter, Depends, File, HTTPException, Query, UploadFile
-from fastapi.responses import FileResponse
+from fastapi import APIRouter, Depends, File, HTTPException, Query, UploadFile, Request
+from fastapi.responses import FileResponse, StreamingResponse, Response
 
 from advanced_omi_backend.auth import current_superuser, current_active_user_optional, get_user_from_token_param
 from advanced_omi_backend.controllers import audio_controller
 from advanced_omi_backend.models.user import User
+from advanced_omi_backend.models.conversation import Conversation
 from advanced_omi_backend.app_config import get_audio_chunk_dir
 from advanced_omi_backend.utils.gdrive_audio_utils import download_audio_files_from_drive, AudioValidationError
+from advanced_omi_backend.utils.audio_chunk_utils import (
+    reconstruct_wav_from_conversation,
+    retrieve_audio_chunks,
+    concatenate_chunks_to_pcm,
+    build_wav_from_pcm,
+)
 
 router = APIRouter(prefix="/audio", tags=["audio"])
 
@@ -22,45 +31,46 @@ async def upload_audio_from_drive_folder(
     gdrive_folder_id: str = Query(..., description="Google Drive Folder ID containing audio files (e.g., the string after /folders/ in the URL)"),
     current_user: User = Depends(current_superuser),
     device_name: str = Query(default="upload"),
-    auto_generate_client: bool = Query(default=True),
 ):
-    try: 
-        files = await download_audio_files_from_drive(gdrive_folder_id)
-    except AudioValidationError as e: 
+    try:
+        files = await download_audio_files_from_drive(gdrive_folder_id, current_user.id)
+    except AudioValidationError as e:
         raise HTTPException(status_code=400, detail=str(e))
 
     return await audio_controller.upload_and_process_audio_files(
-        current_user, files, device_name, auto_generate_client, source="gdrive"
+        current_user, files, device_name, source="gdrive"
     )
 
 
 @router.get("/get_audio/{conversation_id}")
 async def get_conversation_audio(
     conversation_id: str,
-    cropped: bool = Query(default=False, description="Serve cropped (speech-only) audio instead of original"),
+    request: Request,
     token: Optional[str] = Query(default=None, description="JWT token for audio element access"),
     current_user: Optional[User] = Depends(current_active_user_optional),
 ):
     """
-    Serve audio file for a conversation.
+    Serve complete audio file for a conversation from MongoDB chunks.
 
-    This endpoint uses conversation_id for direct lookup and ownership verification,
-    which is more efficient than querying by filename.
+    Reconstructs audio by:
+    1. Retrieving all Opus-compressed chunks from MongoDB
+    2. Decoding each chunk to PCM
+    3. Concatenating PCM data
+    4. Building complete WAV file with headers
 
     Supports both header-based auth (Authorization: Bearer) and query param token
     for <audio> element compatibility.
 
     Args:
         conversation_id: The conversation ID
-        cropped: If True, serve cropped audio; if False, serve original audio
         token: Optional JWT token as query param (for audio elements)
         current_user: Authenticated user (from header)
 
     Returns:
-        FileResponse with the audio file
+        StreamingResponse with complete WAV file
 
     Raises:
-        404: If conversation or audio file not found
+        404: If conversation or audio chunks not found
         403: If user doesn't own the conversation
         401: If not authenticated
     """
@@ -71,28 +81,284 @@ async def get_conversation_audio(
     if not current_user:
         raise HTTPException(status_code=401, detail="Authentication required")
 
-    # Get audio file path from controller
+    # Verify conversation exists and user has access
+    conversation = await Conversation.find_one(
+        Conversation.conversation_id == conversation_id
+    )
+
+    if not conversation:
+        raise HTTPException(status_code=404, detail="Conversation not found")
+
+    # Check ownership (admins can access all)
+    if not current_user.is_superuser and conversation.user_id != str(current_user.user_id):
+        raise HTTPException(status_code=403, detail="Access denied")
+
+    # Reconstruct WAV from MongoDB chunks
     try:
-        file_path = await audio_controller.get_conversation_audio_path(
-            conversation_id=conversation_id,
-            user=current_user,
-            cropped=cropped
+        wav_data = await reconstruct_wav_from_conversation(conversation_id)
+    except ValueError as e:
+        # No chunks found for conversation
+        raise HTTPException(status_code=404, detail=str(e))
+    except Exception as e:
+        # Reconstruction failed
+        raise HTTPException(
+            status_code=500,
+            detail=f"Failed to reconstruct audio: {str(e)}"
         )
+
+    # Handle Range requests for seeking support
+    file_size = len(wav_data)
+    range_header = request.headers.get("range")
+
+    # If no Range header, return complete file
+    if not range_header:
+        return StreamingResponse(
+            io.BytesIO(wav_data),
+            media_type="audio/wav",
+            headers={
+                "Content-Disposition": f"inline; filename={conversation_id}.wav",
+                "Content-Length": str(file_size),
+                "Accept-Ranges": "bytes",
+                "X-Audio-Source": "mongodb-chunks",
+                "X-Chunk-Count": str(conversation.audio_chunks_count or 0),
+            }
+        )
+
+    # Parse Range header (e.g., "bytes=0-1023")
+    try:
+        range_str = range_header.replace("bytes=", "")
+        range_start, range_end = range_str.split("-")
+        range_start = int(range_start) if range_start else 0
+        range_end = int(range_end) if range_end else file_size - 1
+
+        # Ensure valid range
+        range_start = max(0, range_start)
+        range_end = min(file_size - 1, range_end)
+        content_length = range_end - range_start + 1
+
+        # Extract requested byte range
+        range_data = wav_data[range_start:range_end + 1]
+
+        # Return 206 Partial Content with Range headers
+        return Response(
+            content=range_data,
+            status_code=206,
+            media_type="audio/wav",
+            headers={
+                "Content-Range": f"bytes {range_start}-{range_end}/{file_size}",
+                "Content-Length": str(content_length),
+                "Accept-Ranges": "bytes",
+                "Content-Disposition": f"inline; filename={conversation_id}.wav",
+                "X-Audio-Source": "mongodb-chunks",
+            }
+        )
+    except (ValueError, IndexError) as e:
+        # Invalid Range header, return 416 Range Not Satisfiable
+        return Response(
+            status_code=416,
+            headers={
+                "Content-Range": f"bytes */{file_size}"
+            }
+        )
+
+
+@router.get("/stream_audio/{conversation_id}")
+async def stream_conversation_audio(
+    conversation_id: str,
+    token: Optional[str] = Query(default=None, description="JWT token for audio element access"),
+    current_user: Optional[User] = Depends(current_active_user_optional),
+):
+    """
+    Stream audio file for a conversation with progressive chunk delivery.
+
+    Better UX for long conversations - starts playback before full download completes.
+
+    Uses cursor-based pagination to stream chunks in batches of 20, decoding
+    and serving each batch as it's retrieved.
+
+    Supports both header-based auth (Authorization: Bearer) and query param token
+    for <audio> element compatibility.
+
+    Args:
+        conversation_id: The conversation ID
+        token: Optional JWT token as query param (for audio elements)
+        current_user: Authenticated user (from header)
+
+    Returns:
+        StreamingResponse with chunked WAV data (Transfer-Encoding: chunked)
+
+    Raises:
+        404: If conversation or audio chunks not found
+        403: If user doesn't own the conversation
+        401: If not authenticated
+    """
+    # Try token param if header auth failed
+    if not current_user and token:
+        current_user = await get_user_from_token_param(token)
+
+    if not current_user:
+        raise HTTPException(status_code=401, detail="Authentication required")
+
+    # Verify conversation exists and user has access
+    conversation = await Conversation.find_one(
+        Conversation.conversation_id == conversation_id
+    )
+
+    if not conversation:
+        raise HTTPException(status_code=404, detail="Conversation not found")
+
+    # Check ownership (admins can access all)
+    if not current_user.is_superuser and conversation.user_id != str(current_user.user_id):
+        raise HTTPException(status_code=403, detail="Access denied")
+
+    # Check if chunks exist
+    if not conversation.audio_chunks_count or conversation.audio_chunks_count == 0:
+        raise HTTPException(status_code=404, detail="No audio data for this conversation")
+
+    async def stream_chunks():
+        """Generator that yields WAV data in batches."""
+        # First, yield WAV header with placeholder size
+        # (actual size will be updated by client or ignored in streaming mode)
+        SAMPLE_RATE = 16000
+        CHANNELS = 1
+        SAMPLE_WIDTH = 2
+
+        # Build minimal WAV header (44 bytes)
+        # We'll write a placeholder size since we're streaming
+        wav_header = io.BytesIO()
+        import wave
+        with wave.open(wav_header, "wb") as wav:
+            wav.setnchannels(CHANNELS)
+            wav.setsampwidth(SAMPLE_WIDTH)
+            wav.setframerate(SAMPLE_RATE)
+            # Write empty frame to establish header
+            wav.writeframes(b"")
+
+        # Yield header
+        yield wav_header.getvalue()
+
+        # Stream chunks in batches of 20
+        start_index = 0
+        batch_size = 20
+
+        while start_index < conversation.audio_chunks_count:
+            # Retrieve batch of chunks
+            chunks = await retrieve_audio_chunks(
+                conversation_id=conversation_id,
+                start_index=start_index,
+                limit=batch_size
+            )
+
+            if not chunks:
+                break
+
+            # Decode and concatenate this batch
+            pcm_batch = await concatenate_chunks_to_pcm(chunks)
+
+            # Yield PCM data (client's WAV parser handles the stream)
+            yield pcm_batch
+
+            # Move to next batch
+            start_index += batch_size
+
+    return StreamingResponse(
+        stream_chunks(),
+        media_type="audio/wav",
+        headers={
+            "Content-Disposition": f"inline; filename={conversation_id}_stream.wav",
+            "X-Audio-Source": "mongodb-chunks-stream",
+            "X-Chunk-Count": str(conversation.audio_chunks_count or 0),
+            "X-Total-Duration": str(conversation.audio_total_duration or 0),
+        }
+    )
+
+
+@router.get("/chunks/{conversation_id}")
+async def get_audio_chunk_range(
+    conversation_id: str,
+    start_time: float = Query(..., description="Start time in seconds"),
+    end_time: float = Query(..., description="End time in seconds"),
+    token: Optional[str] = Query(default=None, description="JWT token for audio element access"),
+    current_user: Optional[User] = Depends(current_active_user_optional),
+):
+    """
+    Serve specific audio chunks by time range for seekable audio player.
+
+    Returns PCM audio data for the requested time range without decoding
+    the entire conversation. Enables efficient seeking in the UI player.
+
+    Example:
+        GET /api/audio/chunks/uuid?start_time=15.5&end_time=25.5&token=xxx
+        Returns: 10 seconds of audio from 15.5s to 25.5s
+
+    Args:
+        conversation_id: The conversation ID
+        start_time: Start time in seconds (inclusive)
+        end_time: End time in seconds (inclusive)
+        token: Optional JWT token as query param
+        current_user: Authenticated user (from header)
+
+    Returns:
+        StreamingResponse with WAV file for requested range
+
+    Raises:
+        404: If conversation or audio chunks not found
+        403: If user doesn't own the conversation
+        401: If not authenticated
+        400: If time range is invalid
+    """
+    import logging
+    logger = logging.getLogger(__name__)
+    logger.info(f"🎵 Audio chunk request: conversation={conversation_id[:8]}..., start={start_time:.2f}s, end={end_time:.2f}s")
+
+    # Try token param if header auth failed
+    if not current_user and token:
+        current_user = await get_user_from_token_param(token)
+
+    if not current_user:
+        raise HTTPException(status_code=401, detail="Authentication required")
+
+    # Verify conversation exists and user has access
+    conversation = await Conversation.find_one(
+        Conversation.conversation_id == conversation_id
+    )
+
+    if not conversation:
+        raise HTTPException(status_code=404, detail="Conversation not found")
+
+    # Check ownership (admins can access all)
+    if not current_user.is_superuser and conversation.user_id != str(current_user.user_id):
+        raise HTTPException(status_code=403, detail="Access denied")
+
+    # Validate time range
+    if start_time < 0 or end_time <= start_time:
+        raise HTTPException(status_code=400, detail="Invalid time range")
+
+    if conversation.audio_total_duration and end_time > conversation.audio_total_duration:
+        end_time = conversation.audio_total_duration
+
+    # Use the dedicated segment reconstruction function
+    from advanced_omi_backend.utils.audio_chunk_utils import reconstruct_audio_segment
+
+    try:
+        wav_data = await reconstruct_audio_segment(conversation_id, start_time, end_time)
+        logger.info(f"✅ Returning WAV: {len(wav_data)} bytes for range {start_time:.2f}s - {end_time:.2f}s")
     except ValueError as e:
-        # Map ValueError messages to appropriate HTTP status codes
-        error_msg = str(e)
-        if "not found" in error_msg.lower():
-            raise HTTPException(status_code=404, detail=error_msg)
-        elif "access denied" in error_msg.lower():
-            raise HTTPException(status_code=403, detail=error_msg)
-        else:
-            raise HTTPException(status_code=404, detail=error_msg)
-
-    # Serve the file
-    return FileResponse(
-        path=str(file_path),
+        raise HTTPException(status_code=404, detail=str(e))
+    except Exception as e:
+        logger.error(f"Failed to reconstruct audio segment: {e}")
+        raise HTTPException(status_code=500, detail=f"Failed to reconstruct audio: {str(e)}")
+
+    return StreamingResponse(
+        io.BytesIO(wav_data),
         media_type="audio/wav",
-        filename=file_path.name
+        headers={
+            "Content-Disposition": f"inline; filename=chunk_{start_time}_{end_time}.wav",
+            "Content-Length": str(len(wav_data)),
+            "X-Audio-Duration": str(end_time - start_time),
+            "X-Start-Time": str(start_time),
+            "X-End-Time": str(end_time),
+        }
     )
 
 
@@ -101,13 +367,11 @@ async def upload_audio_files(
     current_user: User = Depends(current_superuser),
     files: list[UploadFile] = File(...),
     device_name: str = Query(default="upload", description="Device name for uploaded files"),
-    auto_generate_client: bool = Query(default=True, description="Auto-generate client ID"),
-    folder: Optional[str] = Query(default=None, description="Optional subfolder for audio storage (e.g., 'fixtures')"),
 ):
     """
     Upload and process audio files. Admin only.
 
-    Audio files are saved to disk and enqueued for processing via RQ jobs.
+    Audio files are stored as MongoDB chunks and enqueued for processing via RQ jobs.
     This allows for scalable processing of large files without blocking the API.
 
     Returns:
@@ -115,5 +379,5 @@ async def upload_audio_files(
         - Summary of enqueued vs failed uploads
     """
     return await audio_controller.upload_and_process_audio_files(
-        current_user, files, device_name, auto_generate_client, folder
+        current_user, files, device_name
     )
diff --git a/backends/advanced/src/advanced_omi_backend/routers/modules/conversation_routes.py b/backends/advanced/src/advanced_omi_backend/routers/modules/conversation_routes.py
index 8da0f5b0..2de13ae7 100644
--- a/backends/advanced/src/advanced_omi_backend/routers/modules/conversation_routes.py
+++ b/backends/advanced/src/advanced_omi_backend/routers/modules/conversation_routes.py
@@ -7,11 +7,13 @@
 import logging
 from typing import Optional
 
-from fastapi import APIRouter, Depends, Query
+from fastapi import APIRouter, Depends, HTTPException, Query, Response
 
 from advanced_omi_backend.auth import current_active_user
-from advanced_omi_backend.controllers import conversation_controller, audio_controller
+from advanced_omi_backend.controllers import conversation_controller
+from advanced_omi_backend.models.conversation import Conversation
 from advanced_omi_backend.users import User
+from advanced_omi_backend.utils.audio_chunk_utils import reconstruct_audio_segment
 
 logger = logging.getLogger(__name__)
 
@@ -28,9 +30,12 @@ async def close_current_conversation(
 
 
 @router.get("")
-async def get_conversations(current_user: User = Depends(current_active_user)):
+async def get_conversations(
+    include_deleted: bool = Query(False, description="Include soft-deleted conversations"),
+    current_user: User = Depends(current_active_user)
+):
     """Get conversations. Admins see all conversations, users see only their own."""
-    return await conversation_controller.get_conversations(current_user)
+    return await conversation_controller.get_conversations(current_user, include_deleted)
 
 
 @router.get("/{conversation_id}")
@@ -42,14 +47,6 @@ async def get_conversation_detail(
     return await conversation_controller.get_conversation(conversation_id, current_user)
 
 
-@router.get("/{audio_uuid}/cropped")
-async def get_cropped_audio_info(
-    audio_uuid: str, current_user: User = Depends(current_active_user)
-):
-    """Get cropped audio information for a conversation. Users can only access their own conversations."""
-    return await audio_controller.get_cropped_audio_info(audio_uuid, current_user)
-
-
 # New reprocessing endpoints
 @router.post("/{conversation_id}/reprocess-transcript")
 async def reprocess_transcript(
@@ -69,6 +66,32 @@ async def reprocess_memory(
     return await conversation_controller.reprocess_memory(conversation_id, transcript_version_id, current_user)
 
 
+@router.post("/{conversation_id}/reprocess-speakers")
+async def reprocess_speakers(
+    conversation_id: str,
+    current_user: User = Depends(current_active_user),
+    transcript_version_id: str = Query(default="active")
+):
+    """
+    Re-run speaker identification/diarization on existing transcript.
+
+    Creates a NEW transcript version with same text/words but re-identified speakers.
+    Automatically chains memory reprocessing since speaker changes affect memory context.
+
+    Args:
+        conversation_id: Conversation to reprocess
+        transcript_version_id: Which transcript version to use as source (default: "active")
+
+    Returns:
+        Job status with job_id and new version_id
+    """
+    return await conversation_controller.reprocess_speakers(
+        conversation_id,
+        transcript_version_id,
+        current_user
+    )
+
+
 @router.post("/{conversation_id}/activate-transcript/{version_id}")
 async def activate_transcript_version(
     conversation_id: str,
@@ -97,9 +120,215 @@ async def get_conversation_version_history(
     return await conversation_controller.get_conversation_version_history(conversation_id, current_user)
 
 
+@router.get("/{conversation_id}/waveform")
+async def get_conversation_waveform(
+    conversation_id: str,
+    current_user: User = Depends(current_active_user)
+):
+    """
+    Get or generate waveform visualization data for a conversation.
+
+    This endpoint implements lazy generation:
+    1. Check if waveform already exists in database
+    2. If exists, return cached version immediately
+    3. If not, generate synchronously and cache in database
+    4. Return waveform data
+
+    The waveform contains amplitude samples normalized to [-1.0, 1.0] range
+    for visualization in the UI without needing to decode audio chunks.
+
+    Returns:
+        - samples: List[float] - Amplitude samples normalized to [-1, 1]
+        - sample_rate: int - Samples per second (10)
+        - duration_seconds: float - Total audio duration
+    """
+    from fastapi import HTTPException
+
+    from advanced_omi_backend.models.conversation import Conversation
+    from advanced_omi_backend.models.waveform import WaveformData
+    from advanced_omi_backend.workers.waveform_jobs import generate_waveform_data
+
+    # Verify conversation exists and user has access
+    conversation = await Conversation.find_one(
+        Conversation.conversation_id == conversation_id
+    )
+
+    if not conversation:
+        raise HTTPException(status_code=404, detail="Conversation not found")
+
+    # Check ownership (admins can access all)
+    if not current_user.is_superuser and conversation.user_id != str(current_user.id):
+        raise HTTPException(status_code=403, detail="Access denied")
+
+    # Check for existing waveform in database
+    waveform = await WaveformData.find_one(
+        WaveformData.conversation_id == conversation_id
+    )
+
+    # If waveform exists, return cached version
+    if waveform:
+        logger.info(f"Returning cached waveform for conversation {conversation_id[:12]}")
+        return waveform.model_dump(exclude={"id", "revision_id"})
+
+    # Generate waveform on-demand
+    logger.info(f"Generating waveform on-demand for conversation {conversation_id[:12]}")
+
+    waveform_dict = await generate_waveform_data(
+        conversation_id=conversation_id,
+        sample_rate=3
+    )
+
+    if not waveform_dict.get("success"):
+        error_msg = waveform_dict.get("error", "Unknown error")
+        logger.error(f"Waveform generation failed: {error_msg}")
+        raise HTTPException(
+            status_code=500,
+            detail=f"Waveform generation failed: {error_msg}"
+        )
+
+    # Return generated waveform (already saved to database by generator)
+    return {
+        "samples": waveform_dict["samples"],
+        "sample_rate": waveform_dict["sample_rate"],
+        "duration_seconds": waveform_dict["duration_seconds"]
+    }
+
+
+@router.get("/{conversation_id}/metadata")
+async def get_conversation_metadata(
+    conversation_id: str,
+    current_user: User = Depends(current_active_user)
+) -> dict:
+    """
+    Get conversation metadata (duration, etc.) without loading audio.
+
+    This endpoint provides lightweight access to conversation metadata,
+    useful for the speaker service to check duration before deciding
+    whether to chunk audio processing.
+
+    Returns:
+        {
+            "conversation_id": str,
+            "duration": float,  # Total duration in seconds
+            "created_at": datetime,
+            "has_audio": bool
+        }
+    """
+    conversation = await Conversation.find_one(
+        Conversation.conversation_id == conversation_id
+    )
+
+    if not conversation:
+        raise HTTPException(status_code=404, detail="Conversation not found")
+
+    # Check ownership (admins can access all)
+    if not current_user.is_superuser and conversation.user_id != str(current_user.id):
+        raise HTTPException(status_code=403, detail="Access denied")
+
+    return {
+        "conversation_id": conversation_id,
+        "duration": conversation.audio_total_duration or 0.0,
+        "created_at": conversation.created_at,
+        "has_audio": (conversation.audio_total_duration or 0.0) > 0
+    }
+
+
+@router.get("/{conversation_id}/audio-segments")
+async def get_audio_segment(
+    conversation_id: str,
+    start: float = Query(0.0, description="Start time in seconds"),
+    duration: Optional[float] = Query(None, description="Duration in seconds (omit for full audio)"),
+    current_user: User = Depends(current_active_user)
+) -> Response:
+    """
+    Get audio segment from a conversation.
+
+    This endpoint enables the speaker service to fetch audio in time-bounded
+    segments without loading the entire file into memory. The speaker service
+    controls chunk size based on its own memory constraints.
+
+    Args:
+        conversation_id: Conversation identifier
+        start: Start time in seconds (default: 0.0)
+        duration: Duration in seconds (if None, returns all audio from start)
+
+    Returns:
+        WAV audio bytes (16kHz, mono) for the requested time range
+    """
+    import time
+    request_start = time.time()
+
+    # Verify conversation exists and user has access
+    conversation = await Conversation.find_one(
+        Conversation.conversation_id == conversation_id
+    )
+
+    if not conversation:
+        raise HTTPException(status_code=404, detail="Conversation not found")
+
+    # Check ownership (admins can access all)
+    if not current_user.is_superuser and conversation.user_id != str(current_user.id):
+        raise HTTPException(status_code=403, detail="Access denied")
+
+    # Calculate end time
+    total_duration = conversation.audio_total_duration or 0.0
+    if total_duration == 0:
+        raise HTTPException(status_code=404, detail="No audio available for this conversation")
+
+    if duration is None:
+        end = total_duration
+    else:
+        end = min(start + duration, total_duration)
+
+    # Validate time range
+    if start < 0 or start >= total_duration:
+        raise HTTPException(status_code=400, detail=f"Invalid start time: {start}s (max: {total_duration}s)")
+
+    # Get audio chunks for time range
+    try:
+        wav_bytes = await reconstruct_audio_segment(
+            conversation_id=conversation_id,
+            start_time=start,
+            end_time=end
+        )
+    except Exception as e:
+        logger.error(f"Failed to reconstruct audio segment for {conversation_id[:12]}: {e}")
+        raise HTTPException(status_code=500, detail=f"Failed to reconstruct audio: {str(e)}")
+
+    request_time = time.time() - request_start
+    logger.info(
+        f"Audio segment endpoint completed for {conversation_id[:12]}: "
+        f"{start:.1f}s - {end:.1f}s ({end - start:.1f}s duration, "
+        f"{len(wav_bytes) / 1024 / 1024:.2f} MB, "
+        f"total request time: {request_time:.2f}s)"
+    )
+
+    return Response(
+        content=wav_bytes,
+        media_type="audio/wav",
+        headers={
+            "Content-Disposition": f"attachment; filename=segment_{start}_{end}.wav",
+            "X-Audio-Start": str(start),
+            "X-Audio-End": str(end),
+            "X-Audio-Duration": str(end - start)
+        }
+    )
+
+
 @router.delete("/{conversation_id}")
 async def delete_conversation(
-    conversation_id: str, current_user: User = Depends(current_active_user)
+    conversation_id: str,
+    permanent: bool = Query(False, description="Permanently delete (admin only)"),
+    current_user: User = Depends(current_active_user)
+):
+    """Soft delete a conversation (or permanently delete if admin)."""
+    return await conversation_controller.delete_conversation(conversation_id, current_user, permanent)
+
+
+@router.post("/{conversation_id}/restore")
+async def restore_conversation(
+    conversation_id: str,
+    current_user: User = Depends(current_active_user)
 ):
-    """Delete a conversation and its associated audio file. Users can only delete their own conversations."""
-    return await conversation_controller.delete_conversation(conversation_id, current_user)
+    """Restore a soft-deleted conversation."""
+    return await conversation_controller.restore_conversation(conversation_id, current_user)
diff --git a/backends/advanced/src/advanced_omi_backend/routers/modules/finetuning_routes.py b/backends/advanced/src/advanced_omi_backend/routers/modules/finetuning_routes.py
new file mode 100644
index 00000000..f3792e0b
--- /dev/null
+++ b/backends/advanced/src/advanced_omi_backend/routers/modules/finetuning_routes.py
@@ -0,0 +1,286 @@
+"""
+Fine-tuning routes for Chronicle API.
+
+Handles sending annotation corrections to speaker recognition service for training.
+"""
+
+import logging
+from datetime import datetime, timezone
+from typing import Optional
+
+from fastapi import APIRouter, Depends, HTTPException, Query
+from fastapi.responses import JSONResponse
+
+from advanced_omi_backend.auth import current_active_user
+from advanced_omi_backend.models.annotation import Annotation, AnnotationType
+from advanced_omi_backend.users import User
+
+logger = logging.getLogger(__name__)
+
+router = APIRouter(prefix="/finetuning", tags=["finetuning"])
+
+
+@router.post("/process-annotations")
+async def process_annotations_for_training(
+    current_user: User = Depends(current_active_user),
+    annotation_type: Optional[str] = Query("diarization", description="Type of annotations to process"),
+):
+    """
+    Send processed annotations to speaker recognition service for training.
+
+    - Finds annotations that have been applied (processed=True, processed_by="apply")
+    - Sends corrections to speaker service for model fine-tuning
+    - Updates annotations with training metadata (processed_by includes "training")
+
+    Args:
+        annotation_type: Type of annotations to process (default: "diarization")
+
+    Returns:
+        Training job status with count of annotations processed
+    """
+    try:
+        # Only admins can trigger training for now (can expand to per-user later)
+        if not current_user.is_superuser:
+            raise HTTPException(
+                status_code=403,
+                detail="Only administrators can trigger model training"
+            )
+
+        # Find annotations ready for training
+        # Criteria: processed=True (applied to transcript), but not yet sent to training
+        annotations = await Annotation.find(
+            Annotation.annotation_type == AnnotationType.DIARIZATION,
+            Annotation.processed == True,
+        ).to_list()
+
+        # Filter out already trained annotations (processed_by contains "training")
+        ready_for_training = [
+            a for a in annotations
+            if a.processed_by and "training" not in a.processed_by
+        ]
+
+        if not ready_for_training:
+            return JSONResponse(content={
+                "message": "No annotations ready for training",
+                "processed_count": 0
+            })
+
+        # Import required modules
+        from advanced_omi_backend.models.conversation import Conversation
+        from advanced_omi_backend.speaker_recognition_client import (
+            SpeakerRecognitionClient,
+        )
+        from advanced_omi_backend.utils.audio_chunk_utils import (
+            reconstruct_audio_segment,
+        )
+
+        # Initialize speaker client
+        speaker_client = SpeakerRecognitionClient()
+        
+        if not speaker_client.enabled:
+            return JSONResponse(content={
+                "message": "Speaker recognition service is not enabled",
+                "processed_count": 0,
+                "status": "error"
+            }, status_code=503)
+
+        # Track processing statistics
+        enrolled_count = 0
+        appended_count = 0
+        failed_count = 0
+        errors = []
+
+        for annotation in ready_for_training:
+            try:
+                # 1. Get conversation and segment timing
+                conversation = await Conversation.find_one(
+                    Conversation.conversation_id == annotation.conversation_id
+                )
+                
+                if not conversation or not conversation.active_transcript:
+                    logger.warning(f"Conversation {annotation.conversation_id} not found or has no transcript")
+                    failed_count += 1
+                    errors.append(f"Conversation {annotation.conversation_id[:8]} not found")
+                    continue
+
+                # Validate segment index
+                if annotation.segment_index >= len(conversation.active_transcript.segments):
+                    logger.warning(f"Invalid segment index {annotation.segment_index} for conversation {annotation.conversation_id}")
+                    failed_count += 1
+                    errors.append(f"Invalid segment index {annotation.segment_index}")
+                    continue
+
+                segment = conversation.active_transcript.segments[annotation.segment_index]
+
+                # 2. Extract audio segment from MongoDB
+                logger.info(
+                    f"Extracting audio for conversation {annotation.conversation_id[:8]}... "
+                    f"segment {annotation.segment_index} ({segment.start:.2f}s - {segment.end:.2f}s)"
+                )
+                
+                wav_bytes = await reconstruct_audio_segment(
+                    conversation_id=annotation.conversation_id,
+                    start_time=segment.start,
+                    end_time=segment.end
+                )
+
+                if not wav_bytes:
+                    logger.warning(f"No audio data for annotation {annotation.id}")
+                    failed_count += 1
+                    errors.append(f"No audio for segment {annotation.segment_index}")
+                    continue
+
+                logger.info(f"Extracted {len(wav_bytes) / 1024:.1f} KB of audio")
+
+                # 3. Check if speaker exists
+                existing_speaker = await speaker_client.get_speaker_by_name(
+                    speaker_name=annotation.corrected_speaker,
+                    user_id=1  # TODO: Map Chronicle user_id to speaker service user_id
+                )
+
+                if existing_speaker:
+                    # APPEND to existing speaker
+                    logger.info(f"Appending to existing speaker: {annotation.corrected_speaker}")
+                    result = await speaker_client.append_to_speaker(
+                        speaker_id=existing_speaker["id"],
+                        audio_data=wav_bytes
+                    )
+                    
+                    if "error" in result:
+                        logger.error(f"Failed to append to speaker: {result}")
+                        failed_count += 1
+                        errors.append(f"Append failed: {result.get('error')}")
+                        continue
+                    
+                    appended_count += 1
+                    logger.info(f"✅ Successfully appended to speaker '{annotation.corrected_speaker}'")
+                else:
+                    # ENROLL new speaker
+                    logger.info(f"Enrolling new speaker: {annotation.corrected_speaker}")
+                    result = await speaker_client.enroll_new_speaker(
+                        speaker_name=annotation.corrected_speaker,
+                        audio_data=wav_bytes,
+                        user_id=1  # TODO: Map Chronicle user_id to speaker service user_id
+                    )
+                    
+                    if "error" in result:
+                        logger.error(f"Failed to enroll speaker: {result}")
+                        failed_count += 1
+                        errors.append(f"Enroll failed: {result.get('error')}")
+                        continue
+                    
+                    enrolled_count += 1
+                    logger.info(f"✅ Successfully enrolled new speaker '{annotation.corrected_speaker}'")
+
+                # 4. Mark annotation as trained
+                if annotation.processed_by:
+                    annotation.processed_by = f"{annotation.processed_by},training"
+                else:
+                    annotation.processed_by = "training"
+                annotation.updated_at = datetime.now(timezone.utc)
+                await annotation.save()
+
+            except Exception as e:
+                logger.error(f"Error processing annotation {annotation.id}: {e}", exc_info=True)
+                failed_count += 1
+                errors.append(f"Exception: {str(e)[:50]}")
+                continue
+
+        total_processed = enrolled_count + appended_count
+        logger.info(
+            f"Training complete: {total_processed} processed "
+            f"({enrolled_count} new, {appended_count} appended, {failed_count} failed)"
+        )
+
+        return JSONResponse(content={
+            "message": "Training complete",
+            "enrolled_new_speakers": enrolled_count,
+            "appended_to_existing": appended_count,
+            "total_processed": total_processed,
+            "failed_count": failed_count,
+            "errors": errors[:10] if errors else [],  # Limit error list
+            "status": "success" if total_processed > 0 else "partial_failure"
+        })
+
+    except HTTPException:
+        raise
+    except Exception as e:
+        logger.error(f"Error processing annotations for training: {e}", exc_info=True)
+        raise HTTPException(
+            status_code=500,
+            detail=f"Failed to process annotations for training: {str(e)}",
+        )
+
+
+@router.get("/status")
+async def get_finetuning_status(
+    current_user: User = Depends(current_active_user),
+):
+    """
+    Get fine-tuning status and statistics.
+
+    Returns:
+        - pending_annotation_count: Annotations not yet applied
+        - applied_annotation_count: Annotations applied but not trained
+        - trained_annotation_count: Annotations sent to training
+        - last_training_run: Timestamp of last training job
+        - cron_status: Cron job schedule and last run info
+    """
+    try:
+        # Count annotations by status
+        pending_count = await Annotation.find(
+            Annotation.annotation_type == AnnotationType.DIARIZATION,
+            Annotation.processed == False,
+        ).count()
+
+        # Get all processed annotations
+        all_processed = await Annotation.find(
+            Annotation.annotation_type == AnnotationType.DIARIZATION,
+            Annotation.processed == True,
+        ).to_list()
+
+        # Split into trained vs not-yet-trained
+        trained_annotations = [
+            a for a in all_processed
+            if a.processed_by and "training" in a.processed_by
+        ]
+        applied_not_trained = [
+            a for a in all_processed
+            if not a.processed_by or "training" not in a.processed_by
+        ]
+
+        applied_count = len(applied_not_trained)
+        trained_count = len(trained_annotations)
+
+        # Get last training run timestamp
+        last_training_run = None
+        if trained_annotations:
+            # Find most recent trained annotation
+            latest_trained = max(
+                trained_annotations,
+                key=lambda a: a.updated_at if a.updated_at else datetime.min.replace(tzinfo=timezone.utc)
+            )
+            last_training_run = latest_trained.updated_at.isoformat() if latest_trained.updated_at else None
+
+        # TODO: Get cron job status from scheduler
+        cron_status = {
+            "enabled": False,  # Placeholder
+            "schedule": "0 2 * * *",  # Example: daily at 2 AM
+            "last_run": None,
+            "next_run": None,
+        }
+
+        return JSONResponse(content={
+            "pending_annotation_count": pending_count,
+            "applied_annotation_count": applied_count,
+            "trained_annotation_count": trained_count,
+            "last_training_run": last_training_run,
+            "cron_status": cron_status,
+        })
+
+    except Exception as e:
+        logger.error(f"Error fetching fine-tuning status: {e}", exc_info=True)
+        raise HTTPException(
+            status_code=500,
+            detail=f"Failed to fetch fine-tuning status: {str(e)}",
+        )
diff --git a/backends/advanced/src/advanced_omi_backend/routers/modules/health_routes.py b/backends/advanced/src/advanced_omi_backend/routers/modules/health_routes.py
index d7a62ba9..96ee72fe 100644
--- a/backends/advanced/src/advanced_omi_backend/routers/modules/health_routes.py
+++ b/backends/advanced/src/advanced_omi_backend/routers/modules/health_routes.py
@@ -139,7 +139,6 @@ async def health_check():
             "chunk_dir": str(os.getenv("CHUNK_DIR", "./audio_chunks")),
             "active_clients": get_client_manager().get_client_count(),
             "new_conversation_timeout_minutes": float(os.getenv("NEW_CONVERSATION_TIMEOUT_MINUTES", "1.5")),
-            "audio_cropping_enabled": os.getenv("AUDIO_CROPPING_ENABLED", "true").lower() == "true",
             "llm_provider": (_llm_def.model_provider if _llm_def else None),
             "llm_model": (_llm_def.model_name if _llm_def else None),
             "llm_base_url": (_llm_def.model_url if _llm_def else None),
diff --git a/backends/advanced/src/advanced_omi_backend/routers/modules/memory_routes.py b/backends/advanced/src/advanced_omi_backend/routers/modules/memory_routes.py
index d0be9528..185f55ec 100644
--- a/backends/advanced/src/advanced_omi_backend/routers/modules/memory_routes.py
+++ b/backends/advanced/src/advanced_omi_backend/routers/modules/memory_routes.py
@@ -72,16 +72,6 @@ async def delete_memory(memory_id: str, current_user: User = Depends(current_act
     return await memory_controller.delete_memory(memory_id, current_user)
 
 
-@router.get("/unfiltered")
-async def get_memories_unfiltered(
-    current_user: User = Depends(current_active_user),
-    limit: int = Query(default=50, ge=1, le=1000),
-    user_id: Optional[str] = Query(default=None, description="User ID filter (admin only)"),
-):
-    """Get all memories including fallback transcript memories (for debugging). Users see only their own memories, admins can see all or filter by user."""
-    return await memory_controller.get_memories_unfiltered(current_user, limit, user_id)
-
-
 @router.get("/admin")
 async def get_all_memories_admin(current_user: User = Depends(current_superuser), limit: int = 200):
     """Get all memories across all users for admin review. Admin only."""
diff --git a/backends/advanced/src/advanced_omi_backend/routers/modules/obsidian_routes.py b/backends/advanced/src/advanced_omi_backend/routers/modules/obsidian_routes.py
index e45c51de..f6a46a38 100644
--- a/backends/advanced/src/advanced_omi_backend/routers/modules/obsidian_routes.py
+++ b/backends/advanced/src/advanced_omi_backend/routers/modules/obsidian_routes.py
@@ -176,14 +176,12 @@ async def get_status(job_id: str, current_user: User = Depends(current_active_us
         status = job.get_status()
         if status == "started":
             status = "running"
-        if status == "canceled":
-            status = "cancelled"
-            
+
         # Get metadata
         meta = job.meta or {}
-        
+
         # If meta has status, prefer it (for granular updates)
-        if "status" in meta and meta["status"] in ("running", "completed", "failed", "cancelled"):
+        if "status" in meta and meta["status"] in ("running", "finished", "failed", "canceled"):
              status = meta["status"]
 
         total = meta.get("total_files", 0)
diff --git a/backends/advanced/src/advanced_omi_backend/routers/modules/queue_routes.py b/backends/advanced/src/advanced_omi_backend/routers/modules/queue_routes.py
index 2da3767b..51c07097 100644
--- a/backends/advanced/src/advanced_omi_backend/routers/modules/queue_routes.py
+++ b/backends/advanced/src/advanced_omi_backend/routers/modules/queue_routes.py
@@ -9,7 +9,7 @@
 from typing import List, Optional
 
 from advanced_omi_backend.auth import current_active_user
-from advanced_omi_backend.controllers.queue_controller import get_jobs, get_job_stats, redis_conn, QUEUE_NAMES
+from advanced_omi_backend.controllers.queue_controller import get_jobs, get_job_stats, redis_conn, QUEUE_NAMES, get_job_status_from_rq
 from advanced_omi_backend.users import User
 from rq.job import Job
 import redis.asyncio as aioredis
@@ -65,24 +65,25 @@ async def get_job_status(
             if job_user_id != str(current_user.user_id):
                 raise HTTPException(status_code=403, detail="Access forbidden")
 
-        # Determine status from registries
-        status = "unknown"
-        if job.is_queued:
-            status = "queued"
-        elif job.is_started:
-            status = "processing"
-        elif job.is_finished:
-            status = "completed"
-        elif job.is_failed:
-            status = "failed"
-        elif job.is_deferred:
-            status = "deferred"
+        # Get status using RQ's native method
+        try:
+            status = get_job_status_from_rq(job)
+        except RuntimeError as e:
+            logger.error(f"Failed to determine status for job {job_id}: {e}")
+            raise HTTPException(status_code=500, detail=str(e))
 
-        return {
+        response = {
             "job_id": job.id,
             "status": status
         }
 
+        # Include error information for failed jobs
+        if status == "failed" and job.exc_info:
+            response["error_message"] = str(job.exc_info)
+            response["exc_info"] = str(job.exc_info)
+
+        return response
+
     except HTTPException:
         # Re-raise HTTPException unchanged (e.g., 403 Forbidden)
         raise
@@ -106,18 +107,12 @@ async def get_job(
             if job_user_id != str(current_user.user_id):
                 raise HTTPException(status_code=403, detail="Access forbidden")
 
-        # Determine status from registries
-        status = "unknown"
-        if job.is_queued:
-            status = "queued"
-        elif job.is_started:
-            status = "processing"
-        elif job.is_finished:
-            status = "completed"
-        elif job.is_failed:
-            status = "failed"
-        elif job.is_deferred:
-            status = "deferred"
+        # Get status using RQ's native method
+        try:
+            status = get_job_status_from_rq(job)
+        except RuntimeError as e:
+            logger.error(f"Failed to determine status for job {job_id}: {e}")
+            raise HTTPException(status_code=500, detail=str(e))
 
         return {
             "job_id": job.id,
@@ -157,18 +152,18 @@ async def cancel_job(
             if job_user_id != str(current_user.user_id):
                 raise HTTPException(status_code=403, detail="Access forbidden")
 
-        # Cancel if queued or processing, delete if completed/failed
+        # Cancel if queued or started, delete if finished/failed
         if job.is_queued or job.is_started or job.is_deferred or job.is_scheduled:
             # Cancel the job
             job.cancel()
             logger.info(f"Cancelled job {job_id}")
             return {
                 "job_id": job_id,
-                "action": "cancelled",
-                "message": f"Job {job_id} has been cancelled"
+                "action": "canceled",
+                "message": f"Job {job_id} has been canceled"
             }
         else:
-            # Delete completed/failed jobs
+            # Delete finished/failed jobs
             job.delete()
             logger.info(f"Deleted job {job_id}")
             return {
@@ -182,45 +177,33 @@ async def cancel_job(
         raise
     except Exception as e:
         logger.error(f"Failed to cancel/delete job {job_id}: {e}")
-        raise HTTPException(status_code=404, detail=f"Job not found or could not be cancelled: {str(e)}")
+        raise HTTPException(status_code=404, detail=f"Job not found or could not be canceled: {str(e)}")
 
 
-@router.get("/jobs/by-session/{session_id}")
-async def get_jobs_by_session(
-    session_id: str,
+@router.get("/jobs/by-client/{client_id}")
+async def get_jobs_by_client(
+    client_id: str,
     current_user: User = Depends(current_active_user)
 ):
-    """Get all jobs associated with a specific streaming session."""
+    """Get all jobs associated with a specific client device."""
     try:
         from rq.registry import FinishedJobRegistry, FailedJobRegistry, StartedJobRegistry, CanceledJobRegistry, DeferredJobRegistry, ScheduledJobRegistry
         from advanced_omi_backend.controllers.queue_controller import get_queue
         from advanced_omi_backend.models.conversation import Conversation
 
-        # First, get conversation_id(s) for this session (for memory jobs)
-        conversation_ids = set()
-        conversations = await Conversation.find(Conversation.audio_uuid == session_id).to_list()
-        conversation_ids = {conv.conversation_id for conv in conversations}
-
         all_jobs = []
         processed_job_ids = set()  # Track which jobs we've already processed
         queues = QUEUE_NAMES
 
         def get_job_status(job, registries_map):
-            """Determine job status from registries."""
-            if job.is_queued:
-                return "queued"
-            elif job.is_started:
-                return "processing"
-            elif job.is_finished:
-                return "completed"
-            elif job.is_failed:
-                return "failed"
-            elif job.is_deferred:
-                return "deferred"
-            elif job.is_scheduled:
-                return "waiting"
-            else:
-                return "unknown"
+            """Determine job status using RQ's native method."""
+            try:
+                return get_job_status_from_rq(job)
+            except RuntimeError:
+                # In nested function, can't raise HTTP exception
+                # Log and re-raise to be handled by outer scope
+                logger.error(f"Job {job.id} status determination failed")
+                raise
 
         def process_job_and_dependents(job, queue_name, base_status):
             """Process a job and recursively find all its dependents."""
@@ -275,15 +258,15 @@ def process_job_and_dependents(job, queue_name, base_status):
         for queue_name in queues:
             queue = get_queue(queue_name)
 
-            # Check all registries
+            # Check all registries (using RQ standard status names)
             registries = [
                 ("queued", queue.job_ids),
-                ("processing", StartedJobRegistry(queue=queue).get_job_ids()),
-                ("completed", FinishedJobRegistry(queue=queue).get_job_ids()),
+                ("started", StartedJobRegistry(queue=queue).get_job_ids()),  # RQ standard
+                ("finished", FinishedJobRegistry(queue=queue).get_job_ids()),  # RQ standard
                 ("failed", FailedJobRegistry(queue=queue).get_job_ids()),
-                ("cancelled", CanceledJobRegistry(queue=queue).get_job_ids()),
-                ("waiting", DeferredJobRegistry(queue=queue).get_job_ids()),
-                ("waiting", ScheduledJobRegistry(queue=queue).get_job_ids())
+                ("canceled", CanceledJobRegistry(queue=queue).get_job_ids()),  # RQ standard (US spelling)
+                ("deferred", DeferredJobRegistry(queue=queue).get_job_ids()),
+                ("scheduled", ScheduledJobRegistry(queue=queue).get_job_ids())
             ]
 
             for status_name, job_ids in registries:
@@ -291,26 +274,15 @@ def process_job_and_dependents(job, queue_name, base_status):
                     try:
                         job = Job.fetch(job_id, connection=redis_conn)
 
-                        # Check if this job belongs to the requested session
-                        matches_session = False
-
-                        # NEW: Check job.meta first (preferred method for all new jobs)
-                        if job.meta and 'audio_uuid' in job.meta:
-                            if job.meta['audio_uuid'] == session_id:
-                                matches_session = True
-                        # FALLBACK: Check args for backward compatibility with existing queued jobs
-                        elif job.args and len(job.args) > 0:
-                            # Check args[0] first (most common for streaming jobs)
-                            if job.args[0] == session_id:
-                                matches_session = True
-                            # Check args[1] for transcription jobs
-                            elif len(job.args) > 1 and job.args[1] == session_id:
-                                matches_session = True
-                            # Check args[3] for memory jobs (conversation_id)
-                            elif len(job.args) > 3 and job.args[3] in conversation_ids:
-                                matches_session = True
-
-                        if matches_session:
+                        # Check if this job belongs to the requested client
+                        matches_client = False
+
+                        # Check job.meta for client_id (current standard)
+                        if job.meta and 'client_id' in job.meta:
+                            if job.meta['client_id'] == client_id:
+                                matches_client = True
+
+                        if matches_client:
                             # Process this job and all its dependents
                             process_job_and_dependents(job, queue_name, status_name)
 
@@ -321,17 +293,17 @@ def process_job_and_dependents(job, queue_name, base_status):
         # Sort by created_at
         all_jobs.sort(key=lambda x: x["created_at"] or "", reverse=False)
 
-        logger.info(f"Found {len(all_jobs)} jobs for session {session_id} (including dependents)")
+        logger.info(f"Found {len(all_jobs)} jobs for client {client_id} (including dependents)")
 
         return {
-            "session_id": session_id,
+            "client_id": client_id,
             "jobs": all_jobs,
             "total": len(all_jobs)
         }
 
     except Exception as e:
-        logger.error(f"Failed to get jobs for session {session_id}: {e}")
-        raise HTTPException(status_code=500, detail=f"Failed to get jobs for session: {str(e)}")
+        logger.error(f"Failed to get jobs for client {client_id}: {e}")
+        raise HTTPException(status_code=500, detail=f"Failed to get jobs for client: {str(e)}")
 
 
 @router.get("/stats")
@@ -345,7 +317,7 @@ async def get_queue_stats_endpoint(
 
     except Exception as e:
         logger.error(f"Failed to get queue stats: {e}")
-        return {"total_jobs": 0, "queued_jobs": 0, "processing_jobs": 0, "completed_jobs": 0, "failed_jobs": 0, "cancelled_jobs": 0, "deferred_jobs": 0}
+        return {"total_jobs": 0, "queued_jobs": 0, "started_jobs": 0, "finished_jobs": 0, "failed_jobs": 0, "canceled_jobs": 0, "deferred_jobs": 0}
 
 
 @router.get("/worker-details")
@@ -496,13 +468,13 @@ async def get_stream_info(stream_key):
 
 class FlushJobsRequest(BaseModel):
     older_than_hours: int = 24
-    statuses: List[str] = ["completed", "failed", "cancelled"]
+    statuses: List[str] = ["finished", "failed", "canceled"]  # RQ standard status names
 
 
 class FlushAllJobsRequest(BaseModel):
     confirm: bool
     include_failed: bool = False  # By default, preserve failed jobs for debugging
-    include_completed: bool = False  # By default, preserve completed jobs for debugging
+    include_finished: bool = False  # By default, preserve finished jobs for debugging
 
 
 @router.post("/flush")
@@ -528,8 +500,8 @@ async def flush_jobs(
         for queue_name in queues:
             queue = get_queue(queue_name)
 
-            # Flush from appropriate registries based on requested statuses
-            if "completed" in request.statuses:
+            # Flush from appropriate registries based on requested statuses (RQ standard names)
+            if "finished" in request.statuses:  # RQ standard, not "completed"
                 registry = FinishedJobRegistry(queue=queue)
                 for job_id in registry.get_job_ids():
                     try:
@@ -551,7 +523,7 @@ async def flush_jobs(
                     except Exception as e:
                         logger.error(f"Error deleting job {job_id}: {e}")
 
-            if "cancelled" in request.statuses:
+            if "canceled" in request.statuses:  # RQ standard (US spelling), not "cancelled"
                 registry = CanceledJobRegistry(queue=queue)
                 for job_id in registry.get_job_ids():
                     try:
@@ -580,8 +552,8 @@ async def flush_all_jobs(
 ):
     """
     Flush jobs from queues and registries.
-    By default preserves failed and completed jobs for debugging.
-    Set include_failed=true or include_completed=true to flush those as well.
+    By default preserves failed and finished jobs for debugging.
+    Set include_failed=true or include_finished=true to flush those as well.
     """
     if not current_user.is_superuser:
         raise HTTPException(status_code=403, detail="Admin access required")
@@ -623,7 +595,7 @@ async def flush_all_jobs(
             # Conditionally add failed and finished registries
             if request.include_failed:
                 registries.append(("failed", FailedJobRegistry(queue=queue)))
-            if request.include_completed:
+            if request.include_finished:
                 registries.append(("finished", FinishedJobRegistry(queue=queue)))
 
             for registry_name, registry in registries:
@@ -635,6 +607,12 @@ async def flush_all_jobs(
                         # Try to fetch the job
                         job = Job.fetch(job_id, connection=redis_conn)
 
+                        # Skip session-level jobs (e.g., speech_detection, audio_persistence)
+                        # These run for the entire session and should not be killed by test cleanup
+                        if job.meta and job.meta.get("session_level"):
+                            logger.info(f"Skipping session-level job {job_id} ({job.description})")
+                            continue
+
                         # Handle running jobs differently to avoid worker deadlock
                         if job.is_started:
                             # Send stop command to worker instead of canceling/deleting immediately
@@ -701,8 +679,8 @@ async def flush_all_jobs(
         preserved = []
         if not request.include_failed:
             preserved.append("failed jobs")
-        if not request.include_completed:
-            preserved.append("completed jobs")
+        if not request.include_finished:
+            preserved.append("finished jobs")
 
         preserved_msg = f" (preserved {', '.join(preserved)})" if preserved else ""
         logger.info(f"Flushed {total_removed} jobs and {deleted_keys} Redis keys from all queues{preserved_msg}")
@@ -837,24 +815,24 @@ async def clear_old_sessions(
 @router.get("/dashboard")
 async def get_dashboard_data(
     request: Request,
-    expanded_sessions: str = Query(default="", description="Comma-separated list of session IDs to fetch jobs for"),
+    expanded_clients: str = Query(default="", description="Comma-separated list of client IDs to fetch jobs for"),
     current_user: User = Depends(current_active_user)
 ):
     """Get all data needed for the Queue dashboard in a single API call.
 
     Returns:
-    - Jobs grouped by status (queued, processing, completed, failed)
+    - Jobs grouped by status (queued, started, finished, failed)
     - Queue statistics
     - Streaming status
-    - Session jobs for expanded sessions
+    - Client jobs for expanded clients
     """
     try:
         from advanced_omi_backend.controllers import system_controller
         from rq.registry import FinishedJobRegistry, FailedJobRegistry, StartedJobRegistry
         from advanced_omi_backend.controllers.queue_controller import get_queue
 
-        # Parse expanded sessions list
-        expanded_session_ids = [s.strip() for s in expanded_sessions.split(",") if s.strip()] if expanded_sessions else []
+        # Parse expanded clients list
+        expanded_client_ids = [c.strip() for c in expanded_clients.split(",") if c.strip()] if expanded_clients else []
 
         # Fetch all data in parallel
         import asyncio
@@ -868,12 +846,12 @@ async def fetch_jobs_by_status(status_name: str, limit: int = 100):
                 for queue_name in queues:
                     queue = get_queue(queue_name)
 
-                    # Get job IDs based on status
+                    # Get job IDs based on status (using RQ standard status names)
                     if status_name == "queued":
                         job_ids = queue.job_ids[:limit]
-                    elif status_name == "processing":
+                    elif status_name == "started":  # RQ standard, not "processing"
                         job_ids = list(StartedJobRegistry(queue=queue).get_job_ids())[:limit]
-                    elif status_name == "completed":
+                    elif status_name == "finished":  # RQ standard, not "completed"
                         job_ids = list(FinishedJobRegistry(queue=queue).get_job_ids())[:limit]
                     elif status_name == "failed":
                         job_ids = list(FailedJobRegistry(queue=queue).get_job_ids())[:limit]
@@ -927,7 +905,7 @@ async def fetch_stats():
                 return get_job_stats()
             except Exception as e:
                 logger.error(f"Error fetching stats: {e}")
-                return {"total_jobs": 0, "queued_jobs": 0, "processing_jobs": 0, "completed_jobs": 0, "failed_jobs": 0}
+                return {"total_jobs": 0, "queued_jobs": 0, "started_jobs": 0, "finished_jobs": 0, "failed_jobs": 0}
 
         async def fetch_streaming_status():
             """Fetch streaming status."""
@@ -940,32 +918,23 @@ async def fetch_streaming_status():
                 logger.error(f"Error fetching streaming status: {e}")
                 return {"active_sessions": [], "stream_health": {}, "rq_queues": {}}
 
-        async def fetch_session_jobs(session_id: str):
-            """Fetch jobs for a specific session."""
+        async def fetch_client_jobs(client_id: str):
+            """Fetch jobs for a specific client device."""
             try:
-                # Reuse the existing logic from get_jobs_by_session endpoint
+                # Reuse the existing logic from get_jobs_by_client endpoint
                 from advanced_omi_backend.models.conversation import Conversation
 
-                # Get conversation IDs for this session
-                conversations = await Conversation.find(Conversation.audio_uuid == session_id).to_list()
-                conversation_ids = {conv.conversation_id for conv in conversations}
-
                 all_jobs = []
                 processed_job_ids = set()
                 queues = QUEUE_NAMES
 
                 def get_job_status(job):
-                    if job.is_queued:
-                        return "queued"
-                    elif job.is_started:
-                        return "processing"
-                    elif job.is_finished:
-                        return "completed"
-                    elif job.is_failed:
-                        return "failed"
-                    elif job.is_deferred:
-                        return "deferred"
-                    else:
+                    """Get job status using RQ's native method."""
+                    try:
+                        return get_job_status_from_rq(job)
+                    except RuntimeError:
+                        logger.error(f"Job {job.id} status determination failed")
+                        # Return unknown as fallback in dashboard context
                         return "unknown"
 
                 # Find all jobs for this session
@@ -980,8 +949,8 @@ def get_job_status(job):
 
                     registries = [
                         ("queued", queue.job_ids),
-                        ("processing", StartedJobRegistry(queue=queue).get_job_ids()),
-                        ("completed", FinishedJobRegistry(queue=queue).get_job_ids()),
+                        ("started", StartedJobRegistry(queue=queue).get_job_ids()),  # RQ standard
+                        ("finished", FinishedJobRegistry(queue=queue).get_job_ids()),  # RQ standard
                         ("failed", FailedJobRegistry(queue=queue).get_job_ids())
                     ]
 
@@ -993,14 +962,12 @@ def get_job_status(job):
                             try:
                                 job = Job.fetch(job_id, connection=redis_conn)
 
-                                # Check if job belongs to this session
-                                matches_session = False
-                                if job.meta and 'audio_uuid' in job.meta and job.meta['audio_uuid'] == session_id:
-                                    matches_session = True
-                                elif job.args and len(job.args) > 0 and job.args[0] == session_id:
-                                    matches_session = True
+                                # Check if job belongs to this client
+                                matches_client = False
+                                if job.meta and 'client_id' in job.meta and job.meta['client_id'] == client_id:
+                                    matches_client = True
 
-                                if not matches_session:
+                                if not matches_client:
                                     continue
 
                                 # Check user permission
@@ -1027,52 +994,51 @@ def get_job_status(job):
                                 logger.debug(f"Error fetching job {job_id}: {e}")
                                 continue
 
-                return {"session_id": session_id, "jobs": all_jobs}
+                return {"client_id": client_id, "jobs": all_jobs}
             except Exception as e:
-                logger.error(f"Error fetching jobs for session {session_id}: {e}")
-                return {"session_id": session_id, "jobs": []}
+                logger.error(f"Error fetching jobs for client {client_id}: {e}")
+                return {"client_id": client_id, "jobs": []}
 
-        # Execute all fetches in parallel
+        # Execute all fetches in parallel (using RQ standard status names)
         queued_jobs_task = fetch_jobs_by_status("queued", limit=100)
-        processing_jobs_task = fetch_jobs_by_status("processing", limit=100)
-        completed_jobs_task = fetch_jobs_by_status("completed", limit=50)
+        started_jobs_task = fetch_jobs_by_status("started", limit=100)  # RQ standard, not "processing"
+        finished_jobs_task = fetch_jobs_by_status("finished", limit=50)  # RQ standard, not "completed"
         failed_jobs_task = fetch_jobs_by_status("failed", limit=50)
         stats_task = fetch_stats()
         streaming_status_task = fetch_streaming_status()
-        session_jobs_tasks = [fetch_session_jobs(sid) for sid in expanded_session_ids]
+        client_jobs_tasks = [fetch_client_jobs(cid) for cid in expanded_client_ids]
 
         results = await asyncio.gather(
             queued_jobs_task,
-            processing_jobs_task,
-            completed_jobs_task,
+            started_jobs_task,
+            finished_jobs_task,
             failed_jobs_task,
             stats_task,
             streaming_status_task,
-            *session_jobs_tasks,
+            *client_jobs_tasks,
             return_exceptions=True
         )
 
         queued_jobs = results[0] if not isinstance(results[0], Exception) else []
-        processing_jobs = results[1] if not isinstance(results[1], Exception) else []
-        completed_jobs = results[2] if not isinstance(results[2], Exception) else []
+        started_jobs = results[1] if not isinstance(results[1], Exception) else []  # RQ standard
+        finished_jobs = results[2] if not isinstance(results[2], Exception) else []  # RQ standard
         failed_jobs = results[3] if not isinstance(results[3], Exception) else []
         stats = results[4] if not isinstance(results[4], Exception) else {"total_jobs": 0}
         streaming_status = results[5] if not isinstance(results[5], Exception) else {"active_sessions": []}
         recent_conversations = []
-        session_jobs_results = results[6:] if len(results) > 6 else []
+        client_jobs_results = results[6:] if len(results) > 6 else []
 
-        # Convert session jobs list to dict
-        session_jobs = {}
-        for result in session_jobs_results:
+        # Convert client jobs list to dict
+        client_jobs = {}
+        for result in client_jobs_results:
             if not isinstance(result, Exception) and result:
-                session_jobs[result["session_id"]] = result["jobs"]
+                client_jobs[result["client_id"]] = result["jobs"]
 
         # Convert conversations to dict format for frontend
         conversations_list = []
         for conv in recent_conversations:
             conversations_list.append({
                 "conversation_id": conv.conversation_id,
-                "audio_uuid": conv.audio_uuid,
                 "user_id": str(conv.user_id) if conv.user_id else None,
                 "created_at": conv.created_at.isoformat() if conv.created_at else None,
                 "title": conv.title,
@@ -1083,14 +1049,14 @@ def get_job_status(job):
         return {
             "jobs": {
                 "queued": queued_jobs,
-                "processing": processing_jobs,
-                "completed": completed_jobs,
+                "started": started_jobs,  # RQ standard status name
+                "finished": finished_jobs,  # RQ standard status name
                 "failed": failed_jobs
             },
             "stats": stats,
             "streaming_status": streaming_status,
             "recent_conversations": conversations_list,
-            "session_jobs": session_jobs,
+            "client_jobs": client_jobs,
             "timestamp": asyncio.get_event_loop().time()
         }
 
diff --git a/backends/advanced/src/advanced_omi_backend/routers/modules/system_routes.py b/backends/advanced/src/advanced_omi_backend/routers/modules/system_routes.py
index 0c261675..aa7a63cd 100644
--- a/backends/advanced/src/advanced_omi_backend/routers/modules/system_routes.py
+++ b/backends/advanced/src/advanced_omi_backend/routers/modules/system_routes.py
@@ -30,6 +30,12 @@ class MemoryConfigRequest(BaseModel):
     config_yaml: str
 
 
+@router.get("/config/diagnostics")
+async def get_config_diagnostics(current_user: User = Depends(current_superuser)):
+    """Get configuration diagnostics including errors, warnings, and status. Admin only."""
+    return await system_controller.get_config_diagnostics()
+
+
 @router.get("/metrics")
 async def get_current_metrics(current_user: User = Depends(current_superuser)):
     """Get current system metrics. Admin only."""
@@ -54,7 +60,44 @@ async def save_diarization_settings(
     current_user: User = Depends(current_superuser)
 ):
     """Save diarization settings. Admin only."""
-    return await system_controller.save_diarization_settings(settings)
+    return await system_controller.save_diarization_settings_controller(settings)
+
+
+@router.get("/misc-settings")
+async def get_misc_settings(current_user: User = Depends(current_superuser)):
+    """Get miscellaneous configuration settings. Admin only."""
+    return await system_controller.get_misc_settings()
+
+
+@router.post("/misc-settings")
+async def save_misc_settings(
+    settings: dict,
+    current_user: User = Depends(current_superuser)
+):
+    """Save miscellaneous configuration settings. Admin only."""
+    return await system_controller.save_misc_settings_controller(settings)
+
+
+@router.get("/cleanup-settings")
+async def get_cleanup_settings(
+    current_user: User = Depends(current_superuser)
+):
+    """Get cleanup configuration settings. Admin only."""
+    return await system_controller.get_cleanup_settings_controller(current_user)
+
+
+@router.post("/cleanup-settings")
+async def save_cleanup_settings(
+    auto_cleanup_enabled: bool = Body(..., description="Enable automatic cleanup of soft-deleted conversations"),
+    retention_days: int = Body(..., ge=1, le=365, description="Number of days to keep soft-deleted conversations"),
+    current_user: User = Depends(current_superuser)
+):
+    """Save cleanup configuration settings. Admin only."""
+    return await system_controller.save_cleanup_settings_controller(
+        auto_cleanup_enabled=auto_cleanup_enabled,
+        retention_days=retention_days,
+        user=current_user
+    )
 
 
 @router.get("/speaker-configuration")
@@ -176,6 +219,125 @@ async def validate_chat_config(
         raise HTTPException(status_code=500, detail=str(e))
 
 
+# Plugin Configuration Management Endpoints
+
+@router.get("/admin/plugins/config", response_class=Response)
+async def get_plugins_config(current_user: User = Depends(current_superuser)):
+    """Get plugins configuration as YAML. Admin only."""
+    try:
+        yaml_content = await system_controller.get_plugins_config_yaml()
+        return Response(content=yaml_content, media_type="text/plain")
+    except Exception as e:
+        logger.error(f"Failed to get plugins config: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+@router.post("/admin/plugins/config")
+async def save_plugins_config(
+    request: Request,
+    current_user: User = Depends(current_superuser)
+):
+    """Save plugins configuration from YAML. Admin only."""
+    try:
+        yaml_content = await request.body()
+        yaml_str = yaml_content.decode('utf-8')
+        result = await system_controller.save_plugins_config_yaml(yaml_str)
+        return JSONResponse(content=result)
+    except ValueError as e:
+        raise HTTPException(status_code=400, detail=str(e))
+    except Exception as e:
+        logger.error(f"Failed to save plugins config: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+@router.post("/admin/plugins/config/validate")
+async def validate_plugins_config(
+    request: Request,
+    current_user: User = Depends(current_superuser)
+):
+    """Validate plugins configuration YAML. Admin only."""
+    try:
+        yaml_content = await request.body()
+        yaml_str = yaml_content.decode('utf-8')
+        result = await system_controller.validate_plugins_config_yaml(yaml_str)
+        return JSONResponse(content=result)
+    except Exception as e:
+        logger.error(f"Failed to validate plugins config: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+# Structured Plugin Configuration Endpoints (Form-based UI)
+
+@router.get("/admin/plugins/metadata")
+async def get_plugins_metadata(current_user: User = Depends(current_superuser)):
+    """Get plugin metadata for form-based configuration UI. Admin only.
+
+    Returns:
+        - Plugin information (name, description, enabled status)
+        - Auto-generated schemas from config.yml
+        - Current configuration with masked secrets
+        - Orchestration settings (events, conditions)
+    """
+    try:
+        return await system_controller.get_plugins_metadata()
+    except Exception as e:
+        logger.error(f"Failed to get plugins metadata: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+class PluginConfigRequest(BaseModel):
+    """Request model for structured plugin configuration updates."""
+    orchestration: Optional[dict] = None
+    settings: Optional[dict] = None
+    env_vars: Optional[dict] = None
+
+
+@router.post("/admin/plugins/config/structured/{plugin_id}")
+async def update_plugin_config_structured(
+    plugin_id: str,
+    config: PluginConfigRequest,
+    current_user: User = Depends(current_superuser)
+):
+    """Update plugin configuration from structured JSON (form data). Admin only.
+
+    Updates the three-file plugin architecture:
+    1. config/plugins.yml - Orchestration (enabled, events, condition)
+    2. plugins/{plugin_id}/config.yml - Settings with ${ENV_VAR} references
+    3. backends/advanced/.env - Actual secret values
+    """
+    try:
+        config_dict = config.dict(exclude_none=True)
+        result = await system_controller.update_plugin_config_structured(plugin_id, config_dict)
+        return JSONResponse(content=result)
+    except ValueError as e:
+        raise HTTPException(status_code=400, detail=str(e))
+    except Exception as e:
+        logger.error(f"Failed to update plugin config: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
+
+
+@router.post("/admin/plugins/test-connection/{plugin_id}")
+async def test_plugin_connection(
+    plugin_id: str,
+    config: PluginConfigRequest,
+    current_user: User = Depends(current_superuser)
+):
+    """Test plugin connection/configuration without saving. Admin only.
+
+    Calls the plugin's test_connection method to validate configuration
+    (e.g., SMTP connection, Home Assistant API).
+    """
+    try:
+        config_dict = config.dict(exclude_none=True)
+        result = await system_controller.test_plugin_connection(plugin_id, config_dict)
+        return JSONResponse(content=result)
+    except ValueError as e:
+        raise HTTPException(status_code=400, detail=str(e))
+    except Exception as e:
+        logger.error(f"Failed to test plugin connection: {e}")
+        raise HTTPException(status_code=500, detail=str(e))
+
+
 @router.get("/streaming/status")
 async def get_streaming_status(request: Request, current_user: User = Depends(current_superuser)):
     """Get status of active streaming sessions and Redis Streams health. Admin only."""
diff --git a/backends/advanced/src/advanced_omi_backend/routers/modules/test_routes.py b/backends/advanced/src/advanced_omi_backend/routers/modules/test_routes.py
new file mode 100644
index 00000000..6255b6d6
--- /dev/null
+++ b/backends/advanced/src/advanced_omi_backend/routers/modules/test_routes.py
@@ -0,0 +1,121 @@
+"""
+Test-only API routes for integration testing.
+
+These routes are ONLY loaded when DEBUG_DIR environment variable is set,
+which happens in test environments. They should never be available in production.
+"""
+
+import logging
+from typing import Optional
+from fastapi import APIRouter, HTTPException
+
+from advanced_omi_backend.services.plugin_service import get_plugin_router
+
+logger = logging.getLogger(__name__)
+
+router = APIRouter(prefix="/test", tags=["testing"])
+
+
+@router.delete("/plugins/events")
+async def clear_test_plugin_events():
+    """
+    Clear all test plugin events.
+
+    This endpoint is ONLY available in test environments and provides a clean
+    way to reset plugin event state between tests without direct database access.
+
+    Returns:
+        dict: Confirmation message with number of events cleared
+    """
+    plugin_router = get_plugin_router()
+
+    if not plugin_router:
+        return {"message": "No plugin router initialized", "events_cleared": 0}
+
+    total_cleared = 0
+
+    # Clear events from all plugins that have storage
+    for plugin_id, plugin in plugin_router.plugins.items():
+        if hasattr(plugin, 'storage') and plugin.storage:
+            try:
+                cleared = await plugin.storage.clear_events()
+                total_cleared += cleared
+                logger.info(f"Cleared {cleared} events from plugin '{plugin_id}'")
+            except Exception as e:
+                logger.error(f"Error clearing events from plugin '{plugin_id}': {e}")
+
+    return {
+        "message": "Test plugin events cleared",
+        "events_cleared": total_cleared
+    }
+
+
+@router.get("/plugins/events/count")
+async def get_test_plugin_event_count(event_type: Optional[str] = None):
+    """
+    Get count of test plugin events.
+
+    Args:
+        event_type: Optional event type to filter by (e.g., 'transcript.batch')
+
+    Returns:
+        dict: Event count and event type filter
+    """
+    plugin_router = get_plugin_router()
+
+    if not plugin_router:
+        return {"count": 0, "event_type": event_type, "message": "No plugin router initialized"}
+
+    # Get count from first plugin with storage (usually test_event plugin)
+    for plugin_id, plugin in plugin_router.plugins.items():
+        if hasattr(plugin, 'storage') and plugin.storage:
+            try:
+                count = await plugin.storage.get_event_count(event_type)
+                return {
+                    "count": count,
+                    "event_type": event_type,
+                    "plugin_id": plugin_id
+                }
+            except Exception as e:
+                logger.error(f"Error getting event count from plugin '{plugin_id}': {e}")
+                raise HTTPException(status_code=500, detail=str(e))
+
+    return {"count": 0, "event_type": event_type, "message": "No plugin with storage found"}
+
+
+@router.get("/plugins/events")
+async def get_test_plugin_events(event_type: Optional[str] = None):
+    """
+    Get test plugin events.
+
+    Args:
+        event_type: Optional event type to filter by
+
+    Returns:
+        dict: List of events
+    """
+    plugin_router = get_plugin_router()
+
+    if not plugin_router:
+        return {"events": [], "message": "No plugin router initialized"}
+
+    # Get events from first plugin with storage
+    for plugin_id, plugin in plugin_router.plugins.items():
+        if hasattr(plugin, 'storage') and plugin.storage:
+            try:
+                if event_type:
+                    events = await plugin.storage.get_events_by_type(event_type)
+                else:
+                    events = await plugin.storage.get_all_events()
+
+                return {
+                    "events": events,
+                    "count": len(events),
+                    "event_type": event_type,
+                    "plugin_id": plugin_id
+                }
+            except Exception as e:
+                logger.error(f"Error getting events from plugin '{plugin_id}': {e}")
+                raise HTTPException(status_code=500, detail=str(e))
+
+    return {"events": [], "message": "No plugin with storage found"}
diff --git a/backends/advanced/src/advanced_omi_backend/routers/modules/websocket_routes.py b/backends/advanced/src/advanced_omi_backend/routers/modules/websocket_routes.py
index d9754a87..2671d7f6 100644
--- a/backends/advanced/src/advanced_omi_backend/routers/modules/websocket_routes.py
+++ b/backends/advanced/src/advanced_omi_backend/routers/modules/websocket_routes.py
@@ -18,21 +18,34 @@
 # Create router
 router = APIRouter(tags=["websocket"])
 
-@router.websocket("/ws_omi")
-async def ws_endpoint_omi(
+@router.websocket("/ws")
+async def ws_endpoint(
     ws: WebSocket,
+    codec: str = Query("pcm"),
     token: Optional[str] = Query(None),
     device_name: Optional[str] = Query(None),
 ):
-    """Accepts WebSocket connections with Wyoming protocol, decodes OMI Opus audio, and processes per-client."""
-    await handle_omi_websocket(ws, token, device_name)
-
-
-@router.websocket("/ws_pcm")
-async def ws_endpoint_pcm(
-    ws: WebSocket,
-    token: Optional[str] = Query(None),
-    device_name: Optional[str] = Query(None)
-):
-    """Accepts WebSocket connections, processes PCM audio per-client."""
-    await handle_pcm_websocket(ws, token, device_name)
\ No newline at end of file
+    """
+    WebSocket endpoint for audio streaming with multiple codec support.
+
+    Args:
+        codec: Audio codec (pcm, opus). Default: pcm
+        token: JWT auth token
+        device_name: Device identifier
+
+    Examples:
+        /ws?codec=pcm&token=xxx&device_name=laptop
+        /ws?codec=opus&token=xxx&device_name=omi-device
+    """
+    # Validate and normalize codec
+    codec = codec.lower()
+    if codec not in ["pcm", "opus"]:
+        logger.warning(f"Unsupported codec requested: {codec}")
+        await ws.close(code=1008, reason=f"Unsupported codec: {codec}. Supported: pcm, opus")
+        return
+
+    # Route to appropriate handler
+    if codec == "opus":
+        await handle_omi_websocket(ws, token, device_name)
+    else:
+        await handle_pcm_websocket(ws, token, device_name)
\ No newline at end of file
diff --git a/backends/advanced/src/advanced_omi_backend/services/audio_stream/aggregator.py b/backends/advanced/src/advanced_omi_backend/services/audio_stream/aggregator.py
index 26b985ab..19b76874 100644
--- a/backends/advanced/src/advanced_omi_backend/services/audio_stream/aggregator.py
+++ b/backends/advanced/src/advanced_omi_backend/services/audio_stream/aggregator.py
@@ -49,8 +49,8 @@ async def get_session_results(self, session_id: str) -> list[dict]:
                     "text": fields[b"text"].decode(),
                     "confidence": float(fields[b"confidence"].decode()),
                     "provider": fields[b"provider"].decode(),
-                    "chunk_id": fields[b"chunk_id"].decode(),
-                    "processing_time": float(fields[b"processing_time"].decode()),
+                    "chunk_id": fields.get(b"chunk_id", b"unknown").decode(),  # Handle missing chunk_id gracefully
+                    "processing_time": float(fields.get(b"processing_time", b"0.0").decode()),
                     "timestamp": float(fields[b"timestamp"].decode()),
                 }
 
@@ -82,8 +82,6 @@ async def get_combined_results(self, session_id: str) -> dict:
         """
         Get all transcription results combined into a single aggregated result.
 
-        This is what an aggregator should do - combine multiple chunks into one.
-
         Args:
             session_id: Session identifier
 
@@ -109,33 +107,45 @@ async def get_combined_results(self, session_id: str) -> dict:
                 "provider": None
             }
 
-        # Combine text
-        full_text = " ".join([r.get("text", "") for r in results if r.get("text")])
-
-        # Combine words
+        # Combine ALL final results for cumulative speech detection
+        # Each result represents a sequential segment of speech
+        all_text = []
         all_words = []
-        for r in results:
-            if "words" in r and r["words"]:
-                all_words.extend(r["words"])
-
-        # Combine segments
         all_segments = []
-        for r in results:
-            if "segments" in r and r["segments"]:
-                all_segments.extend(r["segments"])
+        total_confidence = 0.0
+        provider = None
+
+        for result in results:
+            # Accumulate text
+            text = result.get("text", "").strip()
+            if text:
+                all_text.append(text)
+
+            # Accumulate words with timing data
+            words = result.get("words", [])
+            if words:
+                all_words.extend(words)
+
+            # Accumulate segments
+            segments = result.get("segments", [])
+            if segments:
+                all_segments.extend(segments)
+
+            # Sum confidence for averaging
+            total_confidence += result.get("confidence", 0.0)
 
-        # Sort segments by start time
-        all_segments.sort(key=lambda s: s.get("start", 0.0))
+            # Get provider from first result
+            if provider is None:
+                provider = result.get("provider")
 
         # Calculate average confidence
-        confidences = [r.get("confidence", 0.0) for r in results]
-        avg_confidence = sum(confidences) / len(confidences) if confidences else 0.0
+        avg_confidence = total_confidence / len(results) if results else 0.0
 
-        # Get provider (assume all chunks from same provider)
-        provider = results[0].get("provider") if results else None
+        # Join all text segments with spaces
+        combined_text = " ".join(all_text)
 
         combined = {
-            "text": full_text,
+            "text": combined_text,
             "words": all_words,
             "segments": all_segments,
             "chunk_count": len(results),
@@ -143,9 +153,10 @@ async def get_combined_results(self, session_id: str) -> dict:
             "provider": provider
         }
 
-        logger.debug(
-            f"📦 Combined {len(results)} chunks for session {session_id}: "
-            f"{len(full_text)} chars, {len(all_words)} words, {len(all_segments)} segments"
+        logger.info(
+            f"🔤 TRANSCRIPT [AGGREGATOR] session={session_id}, "
+            f"total_results={len(results)}, words={len(combined['words'])}, "
+            f"text_length={len(combined_text)} chars"
         )
 
         return combined
@@ -188,7 +199,7 @@ async def get_realtime_results(
                             "text": fields[b"text"].decode(),
                             "confidence": float(fields[b"confidence"].decode()),
                             "provider": fields[b"provider"].decode(),
-                            "chunk_id": fields[b"chunk_id"].decode(),
+                            "chunk_id": fields.get(b"chunk_id", b"unknown").decode(),  # Handle missing chunk_id gracefully
                         }
 
                         # Optional fields
diff --git a/backends/advanced/src/advanced_omi_backend/services/audio_stream/consumer.py b/backends/advanced/src/advanced_omi_backend/services/audio_stream/consumer.py
index 8ae0646b..aeb12e02 100644
--- a/backends/advanced/src/advanced_omi_backend/services/audio_stream/consumer.py
+++ b/backends/advanced/src/advanced_omi_backend/services/audio_stream/consumer.py
@@ -11,8 +11,6 @@
 
 import redis.asyncio as redis
 from redis import exceptions as redis_exceptions
-from redis.asyncio.lock import Lock
-
 logger = logging.getLogger(__name__)
 
 
@@ -28,8 +26,8 @@ def __init__(self, provider_name: str, redis_client: redis.Redis, buffer_chunks:
         """
         Initialize consumer.
 
-        Dynamically discovers all audio:stream:* streams and claims them using Redis locks
-        to ensure exclusive processing (one consumer per stream).
+        Dynamically discovers all audio:stream:* streams and uses Redis consumer groups
+        for fan-out processing (multiple worker types can process the same stream).
 
         Args:
             provider_name: Provider name (e.g., "deepgram", "parakeet")
@@ -47,9 +45,8 @@ def __init__(self, provider_name: str, redis_client: redis.Redis, buffer_chunks:
 
         self.running = False
 
-        # Dynamic stream discovery with exclusive locks
+        # Dynamic stream discovery - consumer groups handle fan-out
         self.active_streams = {}  # {stream_name: True}
-        self.stream_locks = {}  # {stream_name: Lock object}
 
         # Buffering: accumulate chunks per session
         self.session_buffers = {}  # {session_id: {"chunks": [], "chunk_ids": [], "sample_rate": int}}
@@ -73,59 +70,6 @@ async def discover_streams(self) -> list[str]:
 
         return streams
 
-    async def try_claim_stream(self, stream_name: str) -> bool:
-        """
-        Try to claim exclusive ownership of a stream using Redis lock.
-
-        Args:
-            stream_name: Stream to claim
-
-        Returns:
-            True if lock acquired, False otherwise
-        """
-        lock_key = f"consumer:lock:{stream_name}"
-
-        # Create lock with 30 second timeout (will be renewed)
-        lock = Lock(
-            self.redis_client,
-            lock_key,
-            timeout=30,
-            blocking=False  # Non-blocking
-        )
-
-        acquired = await lock.acquire(blocking=False)
-
-        if acquired:
-            self.stream_locks[stream_name] = lock
-            logger.info(f"🔒 Claimed stream: {stream_name}")
-            return True
-        else:
-            logger.debug(f"⏭️ Stream already claimed by another consumer: {stream_name}")
-            return False
-
-    async def release_stream(self, stream_name: str):
-        """Release lock on a stream."""
-        if stream_name in self.stream_locks:
-            try:
-                await self.stream_locks[stream_name].release()
-                logger.info(f"🔓 Released stream: {stream_name}")
-            except Exception as e:
-                logger.warning(f"Failed to release lock for {stream_name}: {e}")
-            finally:
-                del self.stream_locks[stream_name]
-
-    async def renew_stream_locks(self):
-        """Renew locks on all claimed streams."""
-        for stream_name, lock in list(self.stream_locks.items()):
-            try:
-                await lock.reacquire()
-            except Exception as e:
-                logger.warning(f"Failed to renew lock for {stream_name}: {e}")
-                # Lock expired, remove from our list
-                del self.stream_locks[stream_name]
-                if stream_name in self.active_streams:
-                    del self.active_streams[stream_name]
-
     async def setup_consumer_group(self, stream_name: str):
         """Create consumer group if it doesn't exist."""
         # Create consumer group (ignore error if already exists)
@@ -257,14 +201,12 @@ async def transcribe_audio(self, audio_data: bytes, sample_rate: int) -> dict:
         pass
 
     async def start_consuming(self):
-        """Discover and consume from multiple streams with exclusive locking."""
+        """Discover and consume from multiple streams using Redis consumer groups."""
         self.running = True
-        logger.info(f"➡️ Starting dynamic stream consumer: {self.consumer_name}")
+        logger.info(f"➡️ Starting dynamic stream consumer: {self.consumer_name} (group: {self.group_name})")
 
         last_discovery = 0
-        last_lock_renewal = 0
         discovery_interval = 10  # Discover new streams every 10 seconds
-        lock_renewal_interval = 15  # Renew locks every 15 seconds
 
         while self.running:
             try:
@@ -277,20 +219,13 @@ async def start_consuming(self):
 
                     for stream_name in discovered:
                         if stream_name not in self.active_streams:
-                            # Try to claim this stream
-                            if await self.try_claim_stream(stream_name):
-                                # Setup consumer group for this stream
-                                await self.setup_consumer_group(stream_name)
-                                self.active_streams[stream_name] = True
-                                logger.info(f"✅ Now consuming from {stream_name}")
+                            # Setup consumer group for this stream (no manual lock needed)
+                            await self.setup_consumer_group(stream_name)
+                            self.active_streams[stream_name] = True
+                            logger.info(f"✅ Now consuming from {stream_name} (group: {self.group_name})")
 
                     last_discovery = current_time
 
-                # Periodically renew locks
-                if current_time - last_lock_renewal > lock_renewal_interval:
-                    await self.renew_stream_locks()
-                    last_lock_renewal = current_time
-
                 # Read from all active streams
                 if not self.active_streams:
                     # No streams claimed yet, wait and retry
@@ -326,14 +261,6 @@ async def start_consuming(self):
                         if stream_name in error_msg:
                             logger.warning(f"➡️ [{self.consumer_name}] Stream {stream_name} was deleted, removing from active streams")
 
-                            # Release the lock
-                            lock_key = f"consumer:lock:{stream_name}"
-                            try:
-                                await self.redis_client.delete(lock_key)
-                                logger.info(f"🔓 Released lock for deleted stream: {stream_name}")
-                            except:
-                                pass
-
                             # Remove from active streams
                             del self.active_streams[stream_name]
                             logger.info(f"➡️ [{self.consumer_name}] Removed {stream_name}, {len(self.active_streams)} streams remaining")
@@ -419,9 +346,6 @@ async def process_message(self, message_id: bytes, fields: dict, stream_name: st
                     # Clean up session buffer
                     del self.session_buffers[session_id]
 
-                # Release the consumer lock for this stream
-                await self.release_stream(stream_name)
-
                 # ACK the END message
                 await self.redis_client.xack(stream_name, self.group_name, message_id)
                 return
diff --git a/backends/advanced/src/advanced_omi_backend/services/audio_stream/producer.py b/backends/advanced/src/advanced_omi_backend/services/audio_stream/producer.py
index 66b0acf7..224d69f4 100644
--- a/backends/advanced/src/advanced_omi_backend/services/audio_stream/producer.py
+++ b/backends/advanced/src/advanced_omi_backend/services/audio_stream/producer.py
@@ -41,32 +41,57 @@ async def init_session(
         session_id: str,
         user_id: str,
         client_id: str,
+        user_email: str = "",
+        connection_id: str = "",
         mode: str = "streaming",
         provider: str = "deepgram"
     ):
         """
-        Initialize session tracking metadata.
+        Initialize session tracking metadata in Redis.
+
+        This is the SINGLE SOURCE OF TRUTH for session state.
+        All session metadata is stored here instead of in-memory ClientState.
 
         Args:
-            session_id: Session identifier
-            user_id: User identifier
-            client_id: Client identifier
+            session_id: Unique session identifier
+            user_id: User identifier (MongoDB ObjectId)
+            client_id: Client identifier (objectid_suffix-device_name)
+            user_email: User email for debugging/tracking
+            connection_id: WebSocket connection identifier
             mode: Processing mode (streaming/batch)
-            provider: Transcription provider ("deepgram", "mistral", etc.)
+            provider: Transcription provider from config.yml
         """
         # Client-specific stream naming (one stream per client for isolation)
         stream_name = f"audio:stream:{client_id}"
         session_key = f"audio:session:{session_id}"
 
         await self.redis_client.hset(session_key, mapping={
+            # User & Client tracking
             "user_id": user_id,
+            "user_email": user_email,
             "client_id": client_id,
+            "connection_id": connection_id,
+
+            # Stream configuration
             "stream_name": stream_name,
             "provider": provider,
             "mode": mode,
+
+            # Timestamps
             "started_at": str(time.time()),
-            "chunks_published": "0",
             "last_chunk_at": str(time.time()),
+
+            # Counters
+            "chunks_published": "0",
+
+            # Job tracking (populated by queue_controller when jobs start)
+            "speech_detection_job_id": "",
+            "audio_persistence_job_id": "",
+
+            # Connection state
+            "websocket_connected": "true",
+
+            # Session status
             "status": "active"
         })
 
@@ -134,22 +159,110 @@ async def send_session_end_signal(self, session_id: str):
         )
         logger.info(f"📡 Sent end-of-session signal for {session_id} to {stream_name}")
 
-    async def finalize_session(self, session_id: str):
+    async def get_session(self, session_id: str) -> dict:
+        """
+        Get session metadata from Redis.
+
+        Args:
+            session_id: Session identifier
+
+        Returns:
+            Dictionary with session metadata, empty dict if not found
+        """
+        session_key = f"audio:session:{session_id}"
+        session_data = await self.redis_client.hgetall(session_key)
+
+        # Convert bytes to strings for easier handling
+        return {k.decode() if isinstance(k, bytes) else k: v.decode() if isinstance(v, bytes) else v
+                for k, v in session_data.items()} if session_data else {}
+
+    async def update_session_job_ids(
+        self,
+        session_id: str,
+        speech_detection_job_id: str = None,
+        audio_persistence_job_id: str = None
+    ):
         """
-        Mark session as finalizing and clean up buffer.
+        Update job IDs in session metadata.
 
         Args:
             session_id: Session identifier
+            speech_detection_job_id: Speech detection job ID (optional)
+            audio_persistence_job_id: Audio persistence job ID (optional)
         """
         session_key = f"audio:session:{session_id}"
+        updates = {}
+
+        if speech_detection_job_id:
+            updates["speech_detection_job_id"] = speech_detection_job_id
+        if audio_persistence_job_id:
+            updates["audio_persistence_job_id"] = audio_persistence_job_id
+
+        if updates:
+            await self.redis_client.hset(session_key, mapping=updates)
+            logger.debug(f"📊 Updated job IDs for session {session_id}: {updates}")
+
+    async def mark_websocket_disconnected(self, session_id: str):
+        """
+        Mark session's websocket as disconnected.
 
+        Args:
+            session_id: Session identifier
+        """
+        session_key = f"audio:session:{session_id}"
         await self.redis_client.hset(session_key, mapping={
+            "websocket_connected": "false",
+            "disconnected_at": str(time.time())
+        })
+        logger.info(f"🔌 Marked websocket disconnected for session {session_id}")
+
+    async def finalize_session(self, session_id: str, completion_reason: str = None):
+        """
+        Mark session as finalizing, send end marker, and clean up buffer.
+
+        Args:
+            session_id: Session identifier
+            completion_reason: Optional reason for session completion (e.g., "websocket_disconnect", "user_stopped")
+                              This is set atomically with status to avoid race conditions.
+        """
+        session_key = f"audio:session:{session_id}"
+
+        # Build mapping with status and optional completion_reason
+        mapping = {
             "status": "finalizing",
             "finalized_at": str(time.time())
-        })
+        }
+
+        # Set completion_reason atomically with status to prevent race conditions
+        if completion_reason:
+            mapping["completion_reason"] = completion_reason
+            logger.info(f"📊 Finalizing session {session_id} with reason: {completion_reason}")
 
-        # Clean up session buffer
+        await self.redis_client.hset(session_key, mapping=mapping)
+
+        # Send end_marker to Redis stream so streaming consumer can close the connection
         if session_id in self.session_buffers:
+            buffer = self.session_buffers[session_id]
+            stream_name = buffer["stream_name"]
+
+            # Send end_marker message to signal stream end
+            end_marker_data = {
+                b"end_marker": b"true",
+                b"session_id": session_id.encode(),
+                b"user_id": buffer["user_id"].encode(),
+                b"client_id": buffer["client_id"].encode(),
+                b"timestamp": str(time.time()).encode(),
+            }
+
+            await self.redis_client.xadd(
+                stream_name,
+                end_marker_data,
+                maxlen=25000,
+                approximate=True
+            )
+            logger.info(f"📡 Sent end_marker to {stream_name} for session {session_id}")
+
+            # Clean up session buffer
             del self.session_buffers[session_id]
             logger.debug(f"🧹 Cleaned up buffer for session {session_id}")
 
diff --git a/backends/advanced/src/advanced_omi_backend/services/memory/README.md b/backends/advanced/src/advanced_omi_backend/services/memory/README.md
index 1a1cad3b..ba6de6a4 100644
--- a/backends/advanced/src/advanced_omi_backend/services/memory/README.md
+++ b/backends/advanced/src/advanced_omi_backend/services/memory/README.md
@@ -448,11 +448,11 @@ OPENAI_API_KEY=your_openai_key
 OPENAI_BASE_URL=https://api.openai.com/v1
 OLLAMA_BASE_URL=http://localhost:11434
 
-# Vector Store Configuration  
+# Vector Store Configuration
 VECTOR_STORE_PROVIDER=qdrant
 QDRANT_BASE_URL=localhost
 QDRANT_PORT=6333
-QDRANT_COLLECTION_NAME=omi_memories
+QDRANT_COLLECTION_NAME=chronicle_memories
 
 # Memory Service Settings
 MEMORY_EXTRACTION_ENABLED=true
diff --git a/backends/advanced/src/advanced_omi_backend/services/memory/base.py b/backends/advanced/src/advanced_omi_backend/services/memory/base.py
index e88e42d4..4abdb5b0 100644
--- a/backends/advanced/src/advanced_omi_backend/services/memory/base.py
+++ b/backends/advanced/src/advanced_omi_backend/services/memory/base.py
@@ -25,17 +25,18 @@
 @dataclass
 class MemoryEntry:
     """Represents a memory entry with content, metadata, and embeddings.
-    
+
     This is the core data structure used throughout the memory service
     for storing and retrieving user memories.
-    
+
     Attributes:
         id: Unique identifier for the memory
         content: The actual memory text/content
         metadata: Additional metadata (user_id, source, timestamps, etc.)
         embedding: Vector embedding for semantic search (optional)
-        score: Similarity score from search operations (optional) 
+        score: Similarity score from search operations (optional)
         created_at: Timestamp when memory was created
+        updated_at: Timestamp when memory was last updated
     """
     id: str
     content: str
@@ -43,11 +44,15 @@ class MemoryEntry:
     embedding: Optional[List[float]] = None
     score: Optional[float] = None
     created_at: Optional[str] = None
-    
+    updated_at: Optional[str] = None
+
     def __post_init__(self):
-        """Set created_at timestamp if not provided."""
+        """Set created_at and updated_at timestamps if not provided."""
+        current_time = str(int(time.time()))
         if self.created_at is None:
-            self.created_at = str(int(time.time()))
+            self.created_at = current_time
+        if self.updated_at is None:
+            self.updated_at = self.created_at  # Default to created_at, not current_time
 
     def to_dict(self) -> Dict[str, Any]:
         """Convert MemoryEntry to dictionary for JSON serialization."""
@@ -59,6 +64,7 @@ def to_dict(self) -> Dict[str, Any]:
             "embedding": self.embedding,
             "score": self.score,
             "created_at": self.created_at,
+            "updated_at": self.updated_at,
             "user_id": self.metadata.get("user_id")  # Extract user_id from metadata
         }
 
diff --git a/backends/advanced/src/advanced_omi_backend/services/memory/config.py b/backends/advanced/src/advanced_omi_backend/services/memory/config.py
index e48b8fb5..19b47bd7 100644
--- a/backends/advanced/src/advanced_omi_backend/services/memory/config.py
+++ b/backends/advanced/src/advanced_omi_backend/services/memory/config.py
@@ -65,22 +65,27 @@ class MemoryConfig:
 
 
 def load_config_yml() -> Dict[str, Any]:
-    """Load config.yml from standard locations."""
-    # Check /app/config.yml (Docker) or root relative to file
-    current_dir = Path(__file__).parent.resolve()
-    # Path inside Docker: /app/config.yml (if mounted) or ../../../config.yml relative to src
-    paths = [
-        Path("/app/config.yml"),
-        current_dir.parent.parent.parent.parent.parent / "config.yml",  # Relative to src/
-        Path("./config.yml"),
-    ]
+    """
+    Load config.yml using canonical path from config module.
+
+    Returns:
+        Loaded config.yml as dictionary
+
+    Raises:
+        FileNotFoundError: If config.yml does not exist
+    """
+    from advanced_omi_backend.config import get_config_yml_path
+
+    config_path = get_config_yml_path()
 
-    for path in paths:
-        if path.exists():
-            with open(path, "r") as f:
-                return yaml.safe_load(f) or {}
+    if not config_path.exists():
+        raise FileNotFoundError(
+            f"config.yml not found at {config_path}. "
+            "Ensure config directory is mounted correctly."
+        )
 
-    raise FileNotFoundError(f"config.yml not found in any of: {[str(p) for p in paths]}")
+    with open(config_path, "r") as f:
+        return yaml.safe_load(f) or {}
 
 
 def create_openmemory_config(
@@ -135,7 +140,7 @@ def create_openai_config(
 def create_qdrant_config(
     host: str = "localhost",
     port: int = 6333,
-    collection_name: str = "omi_memories",
+    collection_name: str = "chronicle_memories",
     embedding_dims: int = 1536,
 ) -> Dict[str, Any]:
     """Create Qdrant vector store configuration."""
@@ -258,7 +263,7 @@ def build_memory_config_from_env() -> MemoryConfig:
 
         host = str(vs_def.model_params.get("host", "qdrant"))
         port = int(vs_def.model_params.get("port", 6333))
-        collection_name = str(vs_def.model_params.get("collection_name", "omi_memories"))
+        collection_name = str(vs_def.model_params.get("collection_name", "chronicle_memories"))
         vector_store_config = create_qdrant_config(
             host=host,
             port=port,
diff --git a/backends/advanced/src/advanced_omi_backend/services/memory/providers/chronicle.py b/backends/advanced/src/advanced_omi_backend/services/memory/providers/chronicle.py
index 3fb96f00..1984b281 100644
--- a/backends/advanced/src/advanced_omi_backend/services/memory/providers/chronicle.py
+++ b/backends/advanced/src/advanced_omi_backend/services/memory/providers/chronicle.py
@@ -277,12 +277,12 @@ async def get_all_memories(self, user_id: str, limit: int = 100) -> List[MemoryE
 
     async def count_memories(self, user_id: str) -> Optional[int]:
         """Count total number of memories for a user.
-        
+
         Uses the vector store's native count capabilities.
-        
+
         Args:
             user_id: User identifier
-            
+
         Returns:
             Total count of memories for the user, or None if not supported
         """
@@ -297,6 +297,103 @@ async def count_memories(self, user_id: str) -> Optional[int]:
             memory_logger.error(f"Count memories failed: {e}")
             return None
 
+    async def get_memory(self, memory_id: str, user_id: Optional[str] = None) -> Optional[MemoryEntry]:
+        """Get a specific memory by ID.
+
+        Args:
+            memory_id: Unique identifier of the memory to retrieve
+            user_id: Optional user ID for authentication/filtering
+
+        Returns:
+            MemoryEntry object if found, None otherwise
+        """
+        if not self._initialized:
+            await self.initialize()
+
+        try:
+            memory = await self.vector_store.get_memory(memory_id, user_id)
+            if memory:
+                memory_logger.info(f"📄 Retrieved memory {memory_id}")
+            else:
+                memory_logger.debug(f"Memory {memory_id} not found")
+            return memory
+        except Exception as e:
+            memory_logger.error(f"Get memory failed: {e}")
+            return None
+
+    async def update_memory(
+        self,
+        memory_id: str,
+        content: Optional[str] = None,
+        metadata: Optional[dict[str, Any]] = None,
+        user_id: Optional[str] = None,
+        user_email: Optional[str] = None
+    ) -> bool:
+        """Update a specific memory's content and/or metadata.
+
+        Regenerates embeddings when content is updated.
+
+        Args:
+            memory_id: Unique identifier of the memory to update
+            content: New content for the memory (if None, content is not updated)
+            metadata: New metadata to merge with existing (if None, metadata is not updated)
+            user_id: Optional user ID for authentication
+            user_email: Optional user email for authentication
+
+        Returns:
+            True if update succeeded, False otherwise
+        """
+        if not self._initialized:
+            await self.initialize()
+
+        try:
+            # Get existing memory
+            existing_memory = await self.vector_store.get_memory(memory_id, user_id)
+            if not existing_memory:
+                memory_logger.warning(f"Memory {memory_id} not found for update")
+                return False
+
+            # Determine new content and metadata
+            new_content = content if content is not None else existing_memory.content
+            new_metadata = {**existing_memory.metadata}
+            if metadata:
+                new_metadata.update(metadata)
+
+            # Update timestamps
+            new_metadata["updated_at"] = str(int(time.time()))
+
+            # Generate new embedding if content changed
+            if content is not None:
+                embeddings = await self.llm_provider.generate_embeddings([new_content])
+                new_embedding = embeddings[0]
+            else:
+                # If content didn't change, reuse existing embedding
+                if existing_memory.embedding:
+                    new_embedding = existing_memory.embedding
+                else:
+                    # No existing embedding, generate one
+                    embeddings = await self.llm_provider.generate_embeddings([new_content])
+                    new_embedding = embeddings[0]
+
+            # Update in vector store
+            success = await self.vector_store.update_memory(
+                memory_id=memory_id,
+                new_content=new_content,
+                new_embedding=new_embedding,
+                new_metadata=new_metadata
+            )
+
+            if success:
+                memory_logger.info(f"✅ Updated memory {memory_id}")
+            else:
+                memory_logger.error(f"Failed to update memory {memory_id}")
+
+            return success
+
+        except Exception as e:
+            memory_logger.error(f"Error updating memory {memory_id}: {e}", exc_info=True)
+            return False
+
     async def delete_memory(self, memory_id: str, user_id: Optional[str] = None, user_email: Optional[str] = None) -> bool:
         """Delete a specific memory by ID.
         
@@ -418,7 +515,8 @@ def _create_memory_entries(
             List of MemoryEntry objects ready for storage
         """
         memory_entries = []
-        
+        current_time = str(int(time.time()))
+
         for memory_text, embedding in zip(fact_memories_text, embeddings):
             memory_id = str(uuid.uuid4())
             memory_entries.append(
@@ -435,10 +533,11 @@ def _create_memory_entries(
                         "extraction_enabled": self.config.extraction_enabled,
                     },
                     embedding=embedding,
-                    created_at=str(int(time.time())),
+                    created_at=current_time,
+                    updated_at=current_time,
                 )
             )
-        
+
         return memory_entries
 
     async def _process_memory_updates(
@@ -633,15 +732,17 @@ async def _apply_memory_actions(
                 if emb is None:
                     memory_logger.warning(f"Skipping ADD action due to missing embedding: {action_text}")
                     continue
-                    
+
                 memory_id = str(uuid.uuid4())
+                current_time = str(int(time.time()))
                 memory_entries.append(
                     MemoryEntry(
                         id=memory_id,
                         content=action_text,
                         metadata=base_metadata,
                         embedding=emb,
-                        created_at=str(int(time.time())),
+                        created_at=current_time,
+                        updated_at=current_time,
                     )
                 )
                 memory_logger.info(f"➕ Added new memory: {memory_id} - {action_text[:50]}...")
diff --git a/backends/advanced/src/advanced_omi_backend/services/memory/providers/mcp_client.py b/backends/advanced/src/advanced_omi_backend/services/memory/providers/mcp_client.py
index 971c41f3..8c5b5389 100644
--- a/backends/advanced/src/advanced_omi_backend/services/memory/providers/mcp_client.py
+++ b/backends/advanced/src/advanced_omi_backend/services/memory/providers/mcp_client.py
@@ -61,7 +61,7 @@ async def __aenter__(self):
     async def __aexit__(self, exc_type, exc_val, exc_tb):
         await self.close()
     
-    async def add_memories(self, text: str) -> List[str]:
+    async def add_memories(self, text: str, metadata: Dict[str, Any] = None) -> List[str]:
         """Add memories to the OpenMemory server.
         
         Uses the REST API to create memories. OpenMemory will handle:
@@ -109,17 +109,22 @@ async def add_memories(self, text: str) -> List[str]:
                 memory_logger.error("No apps found in OpenMemory - cannot create memory")
                 raise MCPError("No apps found in OpenMemory")
 
+            # Merge custom metadata with default metadata
+            default_metadata = {
+                "source": "chronicle",
+                "client": self.client_name,
+                "user_email": self.user_email
+            }
+            if metadata:
+                default_metadata.update(metadata)
+
             # Use REST API endpoint for creating memories
             # The 'app' field can be either app name (string) or app UUID
             payload = {
                 "user_id": self.user_id,
                 "text": text,
-                "app": self.client_name,  # Use app name (OpenMemory accepts name or UUID)
-                "metadata": {
-                    "source": "friend_lite",
-                    "client": self.client_name,
-                    "user_email": self.user_email
-                },
+                "app": self.client_name,
+                "metadata": default_metadata,
                 "infer": True
             }
 
@@ -127,17 +132,7 @@ async def add_memories(self, text: str) -> List[str]:
 
             response = await self.client.post(
                 f"{self.server_url}/api/v1/memories/",
-                json={
-                    "user_id": self.user_id,
-                    "text": text,
-                    "app": self.client_name,  # Use app name (OpenMemory accepts name or UUID)
-                    "metadata": {
-                        "source": "chronicle",
-                        "client": self.client_name,
-                        "user_email": self.user_email
-                    },
-                    "infer": True
-                }
+                json=payload
             )
 
             response_body = response.text[:500] if response.status_code != 200 else "..."
diff --git a/backends/advanced/src/advanced_omi_backend/services/memory/providers/mock_llm_provider.py b/backends/advanced/src/advanced_omi_backend/services/memory/providers/mock_llm_provider.py
new file mode 100644
index 00000000..11ee893e
--- /dev/null
+++ b/backends/advanced/src/advanced_omi_backend/services/memory/providers/mock_llm_provider.py
@@ -0,0 +1,130 @@
+"""
+Mock LLM provider for testing without external API dependencies.
+
+This provider returns predefined responses for testing purposes, allowing
+tests to run without OpenAI or other external LLM APIs.
+"""
+
+import random
+from typing import Any, Dict, List, Optional
+
+from ..base import LLMProviderBase
+
+
+class MockLLMProvider(LLMProviderBase):
+    """
+    Mock LLM provider for testing.
+
+    Returns predefined memory extractions, embeddings, and action proposals.
+    Useful for testing API contracts and data flow without external APIs.
+    """
+
+    def __init__(self, config: Dict[str, Any] = None):
+        """Initialize the mock LLM provider.
+
+        Args:
+            config: Optional configuration dictionary (ignored in mock)
+        """
+        self._is_connected = False
+        self.embedding_dimension = 384  # Standard dimension for mock embeddings
+
+    async def extract_memories(self, text: str, prompt: str) -> List[str]:
+        """
+        Return predefined mock memories extracted from text.
+
+        Args:
+            text: Input text to extract memories from (analyzed for mock generation)
+            prompt: System prompt (ignored in mock)
+
+        Returns:
+            List of mock memory strings based on text content
+        """
+        # Generate deterministic mock memories based on text content
+        # This simulates what a real LLM would extract
+
+        if not text or len(text.strip()) == 0:
+            return []
+
+        # Simple heuristic: generate 1-3 mock memories based on text length
+        num_memories = min(3, max(1, len(text) // 200))
+
+        mock_memories = [
+            "The user discussed testing without API dependencies.",
+            "Mock services are being used for test execution.",
+            "The conversation focused on technical implementation details.",
+        ]
+
+        # Return subset based on text length
+        return mock_memories[:num_memories]
+
+    async def generate_embeddings(self, texts: List[str]) -> List[List[float]]:
+        """
+        Generate mock embedding vectors for the given texts.
+
+        Args:
+            texts: List of text strings to embed
+
+        Returns:
+            List of mock embedding vectors (deterministic based on text content)
+        """
+        embeddings = []
+
+        for text in texts:
+            # Generate deterministic embeddings based on text hash
+            # This ensures same text always gets same embedding
+            seed = hash(text) % (2**32)
+            random.seed(seed)
+
+            # Generate random normalized vector
+            embedding = [random.gauss(0, 0.3) for _ in range(self.embedding_dimension)]
+
+            # Normalize to unit length (standard for embeddings)
+            magnitude = sum(x**2 for x in embedding) ** 0.5
+            normalized_embedding = [x / magnitude for x in embedding]
+
+            embeddings.append(normalized_embedding)
+
+        return embeddings
+
+    async def propose_memory_actions(
+        self,
+        retrieved_old_memory: List[Dict[str, str]] | List[str],
+        new_facts: List[str],
+        custom_prompt: Optional[str] = None,
+    ) -> Dict[str, Any]:
+        """
+        Return mock memory action proposals.
+
+        Args:
+            retrieved_old_memory: List of existing memories (ignored in mock)
+            new_facts: List of new facts to process
+            custom_prompt: Optional custom prompt (ignored in mock)
+
+        Returns:
+            Dictionary containing mock memory actions
+        """
+        # Return simple ADD actions for all new facts
+        # This simulates the LLM deciding to add all new facts as memories
+
+        if not new_facts:
+            return {"memory": []}
+
+        actions = []
+        for idx, fact in enumerate(new_facts):
+            actions.append({
+                "id": str(idx),
+                "event": "ADD",
+                "text": fact,
+                "old_memory": None
+            })
+
+        return {"memory": actions}
+
+    async def test_connection(self) -> bool:
+        """
+        Test mock provider connection (always returns True).
+
+        Returns:
+            True (mock provider is always available)
+        """
+        return True
diff --git a/backends/advanced/src/advanced_omi_backend/services/memory/providers/mycelia.py b/backends/advanced/src/advanced_omi_backend/services/memory/providers/mycelia.py
index 6ace9ad6..6289f035 100644
--- a/backends/advanced/src/advanced_omi_backend/services/memory/providers/mycelia.py
+++ b/backends/advanced/src/advanced_omi_backend/services/memory/providers/mycelia.py
@@ -190,6 +190,7 @@ def _mycelia_object_to_memory_entry(self, obj: Dict, user_id: str) -> MemoryEntr
             content=memory_content,
             metadata=metadata,
             created_at=self._extract_bson_date(obj.get("createdAt")),
+            updated_at=self._extract_bson_date(obj.get("updatedAt")),
         )
 
     async def _call_resource(self, action: str, jwt_token: str, **params) -> Dict[str, Any]:
diff --git a/backends/advanced/src/advanced_omi_backend/services/memory/providers/openmemory_mcp.py b/backends/advanced/src/advanced_omi_backend/services/memory/providers/openmemory_mcp.py
index 2fe34164..922f2555 100644
--- a/backends/advanced/src/advanced_omi_backend/services/memory/providers/openmemory_mcp.py
+++ b/backends/advanced/src/advanced_omi_backend/services/memory/providers/openmemory_mcp.py
@@ -142,25 +142,21 @@ async def add_memory(
                 memory_logger.info(f"Skipping empty transcript for {source_id}")
                 return True, []
             
-            # Pass Friend-Lite user details to OpenMemory for proper user tracking
-            # OpenMemory will auto-create users if they don't exist
-            original_user_id = self.mcp_client.user_id
-            original_user_email = self.mcp_client.user_email
-            self.mcp_client.user_id = user_id  # Use the actual Chronicle user's ID
-            self.mcp_client.user_email = user_email  # Use the actual user's email
+            # Use configured OpenMemory user (from config) for all Chronicle users
+            # Chronicle user_id and email are stored in metadata for filtering
+            enriched_transcript = f"[Source: {source_id}, Client: {client_id}] {transcript}"
 
-            try:
-                # Thin client approach: Send raw transcript to OpenMemory MCP server
-                # OpenMemory handles: extraction, deduplication, vector storage, ACL
-                enriched_transcript = f"[Source: {source_id}, Client: {client_id}] {transcript}"
+            memory_logger.info(f"Delegating memory processing to OpenMemory for user {user_id} (email: {user_email}), source {source_id}")
 
-                memory_logger.info(f"Delegating memory processing to OpenMemory for user {user_id} (email: {user_email}), source {source_id}")
-                memory_ids = await self.mcp_client.add_memories(text=enriched_transcript)
+            # Pass Chronicle user details in metadata for filtering/search
+            metadata = {
+                "chronicle_user_id": user_id,
+                "chronicle_user_email": user_email,
+                "source_id": source_id,
+                "client_id": client_id
+            }
 
-            finally:
-                # Restore original user context
-                self.mcp_client.user_id = original_user_id
-                self.mcp_client.user_email = original_user_email
+            memory_ids = await self.mcp_client.add_memories(text=enriched_transcript, metadata=metadata)
             
             # Update database relationships if helper provided
             if memory_ids and db_helper:
@@ -204,23 +200,27 @@ async def search_memories(
         """
         if not self._initialized:
             await self.initialize()
-        
-        # Update MCP client user context for this search operation
-        original_user_id = self.mcp_client.user_id
-        self.mcp_client.user_id = user_id  # Use the actual Chronicle user's ID
 
+        # Use configured OpenMemory user (not Chronicle user_id)
+        # Search all memories, then filter by chronicle_user_id in metadata
         try:
+            # Get more results since we'll filter by user
             results = await self.mcp_client.search_memory(
                 query=query,
-                limit=limit
+                limit=limit * 3  # Get extra to account for filtering
             )
 
-            # Convert MCP results to MemoryEntry objects
+            # Convert MCP results to MemoryEntry objects and filter by user
             memory_entries = []
             for result in results:
-                memory_entry = self._mcp_result_to_memory_entry(result, user_id)
-                if memory_entry:
-                    memory_entries.append(memory_entry)
+                # Check if memory belongs to this Chronicle user via metadata
+                metadata = result.get("metadata", {})
+                if metadata.get("chronicle_user_id") == user_id:
+                    memory_entry = self._mcp_result_to_memory_entry(result, user_id)
+                    if memory_entry:
+                        memory_entries.append(memory_entry)
+                        if len(memory_entries) >= limit:
+                            break  # Got enough results
 
             memory_logger.info(f"🔍 Found {len(memory_entries)} memories for query '{query}' (user: {user_id})")
             return memory_entries
@@ -231,9 +231,6 @@ async def search_memories(
         except Exception as e:
             memory_logger.error(f"Search memories failed: {e}")
             return []
-        finally:
-            # Restore original user context
-            self.mcp_client.user_id = original_user_id
     
     async def get_all_memories(
         self, 
@@ -254,20 +251,24 @@ async def get_all_memories(
         """
         if not self._initialized:
             await self.initialize()
-        
-        # Update MCP client user context for this operation
-        original_user_id = self.mcp_client.user_id
-        self.mcp_client.user_id = user_id  # Use the actual Chronicle user's ID
 
+        # Use configured OpenMemory user (not Chronicle user_id)
+        # List all memories, then filter by chronicle_user_id in metadata
         try:
-            results = await self.mcp_client.list_memories(limit=limit)
+            # Get more results since we'll filter by user
+            results = await self.mcp_client.list_memories(limit=limit * 3)
 
-            # Convert MCP results to MemoryEntry objects
+            # Convert MCP results to MemoryEntry objects and filter by user
             memory_entries = []
             for result in results:
-                memory_entry = self._mcp_result_to_memory_entry(result, user_id)
-                if memory_entry:
-                    memory_entries.append(memory_entry)
+                # Check if memory belongs to this Chronicle user via metadata
+                metadata = result.get("metadata", {})
+                if metadata.get("chronicle_user_id") == user_id:
+                    memory_entry = self._mcp_result_to_memory_entry(result, user_id)
+                    if memory_entry:
+                        memory_entries.append(memory_entry)
+                        if len(memory_entries) >= limit:
+                            break  # Got enough results
 
             memory_logger.info(f"📚 Retrieved {len(memory_entries)} memories for user {user_id}")
             return memory_entries
@@ -278,9 +279,6 @@ async def get_all_memories(
         except Exception as e:
             memory_logger.error(f"Get all memories failed: {e}")
             return []
-        finally:
-            # Restore original user_id
-            self.mcp_client.user_id = original_user_id
 
     async def get_memory(self, memory_id: str, user_id: Optional[str] = None) -> Optional[MemoryEntry]:
         """Get a specific memory by ID.
@@ -478,19 +476,24 @@ def _mcp_result_to_memory_entry(self, mcp_result: Dict[str, Any], user_id: str)
             
             # Extract similarity score if available (for search results)
             score = mcp_result.get('score') or mcp_result.get('similarity') or mcp_result.get('relevance')
-            
-            # Extract timestamp
+
+            # Extract timestamps
             created_at = mcp_result.get('created_at') or mcp_result.get('timestamp') or mcp_result.get('date')
             if created_at is None:
                 created_at = str(int(time.time()))
-            
+
+            updated_at = mcp_result.get('updated_at') or mcp_result.get('modified_at')
+            if updated_at is None:
+                updated_at = str(created_at)  # Default to created_at if not provided
+
             return MemoryEntry(
                 id=memory_id,
                 content=content,
                 metadata=metadata,
                 embedding=None,  # OpenMemory MCP server handles embeddings internally
                 score=score,
-                created_at=str(created_at)
+                created_at=str(created_at),
+                updated_at=str(updated_at)
             )
             
         except Exception as e:
diff --git a/backends/advanced/src/advanced_omi_backend/services/memory/providers/vector_stores.py b/backends/advanced/src/advanced_omi_backend/services/memory/providers/vector_stores.py
index 85ee200a..9fed0126 100644
--- a/backends/advanced/src/advanced_omi_backend/services/memory/providers/vector_stores.py
+++ b/backends/advanced/src/advanced_omi_backend/services/memory/providers/vector_stores.py
@@ -123,26 +123,28 @@ async def add_memories(self, memories: List[MemoryEntry]) -> List[str]:
             points = []
             for memory in memories:
                 if memory.embedding:
+                    current_time = str(int(time.time()))
                     point = PointStruct(
                         id=memory.id,
                         vector=memory.embedding,
                         payload={
                             "content": memory.content,
                             "metadata": memory.metadata,
-                            "created_at": memory.created_at or str(int(time.time()))
+                            "created_at": memory.created_at or current_time,
+                            "updated_at": memory.updated_at or current_time
                         }
                     )
                     points.append(point)
-            
+
             if points:
                 await self.client.upsert(
                     collection_name=self.collection_name,
                     points=points
                 )
                 return [str(point.id) for point in points]
-            
+
             return []
-            
+
         except Exception as e:
             memory_logger.error(f"Qdrant add memories failed: {e}")
             return []
@@ -190,7 +192,8 @@ async def search_memories(self, query_embedding: List[float], user_id: str, limi
                     metadata=result.payload.get("metadata", {}),
                     # Qdrant returns similarity scores directly (higher = more similar)
                     score=result.score if result.score is not None else None,
-                    created_at=result.payload.get("created_at")
+                    created_at=result.payload.get("created_at"),
+                    updated_at=result.payload.get("updated_at")
                 )
                 memories.append(memory)
                 # Log similarity scores for debugging
@@ -230,10 +233,11 @@ async def get_memories(self, user_id: str, limit: int) -> List[MemoryEntry]:
                     id=str(point.id),
                     content=point.payload.get("content", ""),
                     metadata=point.payload.get("metadata", {}),
-                    created_at=point.payload.get("created_at")
+                    created_at=point.payload.get("created_at"),
+                    updated_at=point.payload.get("updated_at")
                 )
                 memories.append(memory)
-            
+
             return memories
             
         except Exception as e:
@@ -356,29 +360,91 @@ async def update_memory(
     async def count_memories(self, user_id: str) -> int:
         """Count total number of memories for a user in Qdrant using native count API."""
         try:
-            
+
             search_filter = Filter(
                 must=[
                     FieldCondition(
-                        key="metadata.user_id", 
+                        key="metadata.user_id",
                         match=MatchValue(value=user_id)
                     )
                 ]
             )
-            
+
             # Use Qdrant's native count API (documented in qdrant/qdrant/docs)
             # Count operation: CountPoints -> CountResponse with count result
             result = await self.client.count(
                 collection_name=self.collection_name,
                 count_filter=search_filter
             )
-            
+
             return result.count
-            
+
         except Exception as e:
             memory_logger.error(f"Qdrant count memories failed: {e}")
             return 0
 
+    async def get_memory(self, memory_id: str, user_id: Optional[str] = None) -> Optional[MemoryEntry]:
+        """Get a specific memory by ID from Qdrant.
+
+        Args:
+            memory_id: Unique identifier of the memory to retrieve
+            user_id: Optional user ID for validation (not used in Qdrant filtering)
+
+        Returns:
+            MemoryEntry object if found, None otherwise
+        """
+        try:
+            # Convert memory_id to proper format for Qdrant
+            import uuid
+            try:
+                # Try to parse as UUID first
+                uuid.UUID(memory_id)
+                point_id = memory_id
+            except ValueError:
+                # If not a UUID, try as integer
+                try:
+                    point_id = int(memory_id)
+                except ValueError:
+                    # If neither UUID nor integer, use it as-is
+                    point_id = memory_id
+
+            # Retrieve the point by ID
+            points = await self.client.retrieve(
+                collection_name=self.collection_name,
+                ids=[point_id],
+                with_payload=True,
+                with_vectors=False
+            )
+
+            if not points:
+                memory_logger.debug(f"Memory not found: {memory_id}")
+                return None
+
+            point = points[0]
+
+            # If user_id is provided, validate ownership
+            if user_id:
+                point_user_id = point.payload.get("metadata", {}).get("user_id")
+                if point_user_id != user_id:
+                    memory_logger.warning(f"Memory {memory_id} does not belong to user {user_id}")
+                    return None
+
+            # Convert to MemoryEntry
+            memory = MemoryEntry(
+                id=str(point.id),
+                content=point.payload.get("content", ""),
+                metadata=point.payload.get("metadata", {}),
+                created_at=point.payload.get("created_at"),
+                updated_at=point.payload.get("updated_at")
+            )
+
+            memory_logger.debug(f"Retrieved memory {memory_id}")
+            return memory
+
+        except Exception as e:
+            memory_logger.error(f"Qdrant get memory failed for {memory_id}: {e}")
+            return None
+
 
 
 
diff --git a/backends/advanced/src/advanced_omi_backend/services/plugin_service.py b/backends/advanced/src/advanced_omi_backend/services/plugin_service.py
new file mode 100644
index 00000000..1ca086f2
--- /dev/null
+++ b/backends/advanced/src/advanced_omi_backend/services/plugin_service.py
@@ -0,0 +1,629 @@
+"""Plugin service for accessing the global plugin router.
+
+This module provides singleton access to the plugin router, allowing
+worker jobs to trigger plugins without accessing FastAPI app state directly.
+"""
+
+import importlib
+import inspect
+import logging
+import os
+import re
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Type
+
+import yaml
+
+from advanced_omi_backend.config_loader import get_plugins_yml_path
+from advanced_omi_backend.plugins import BasePlugin, PluginRouter
+
+logger = logging.getLogger(__name__)
+
+# Global plugin router instance
+_plugin_router: Optional[PluginRouter] = None
+
+
+def expand_env_vars(value: Any) -> Any:
+    """
+    Recursively expand environment variables in configuration values.
+
+    Supports ${ENV_VAR} syntax. If the environment variable is not set,
+    the original placeholder is kept.
+
+    Args:
+        value: Configuration value (can be str, dict, list, or other)
+
+    Returns:
+        Value with environment variables expanded
+
+    Examples:
+        >>> os.environ['MY_TOKEN'] = 'secret123'
+        >>> expand_env_vars('token: ${MY_TOKEN}')
+        'token: secret123'
+        >>> expand_env_vars({'token': '${MY_TOKEN}'})
+        {'token': 'secret123'}
+    """
+    if isinstance(value, str):
+        # Pattern: ${ENV_VAR} or ${ENV_VAR:-default}
+        def replacer(match):
+            var_expr = match.group(1)
+            # Support default values: ${VAR:-default}
+            if ':-' in var_expr:
+                var_name, default = var_expr.split(':-', 1)
+                return os.environ.get(var_name.strip(), default.strip())
+            else:
+                var_name = var_expr.strip()
+                env_value = os.environ.get(var_name)
+                if env_value is None:
+                    logger.warning(
+                        f"Environment variable '{var_name}' not found, "
+                        f"keeping placeholder: ${{{var_name}}}"
+                    )
+                    return match.group(0)  # Keep original placeholder
+                return env_value
+
+        return re.sub(r'\$\{([^}]+)\}', replacer, value)
+
+    elif isinstance(value, dict):
+        return {k: expand_env_vars(v) for k, v in value.items()}
+
+    elif isinstance(value, list):
+        return [expand_env_vars(item) for item in value]
+
+    else:
+        return value
+
+
+def load_plugin_config(plugin_id: str, orchestration_config: Dict[str, Any]) -> Dict[str, Any]:
+    """
+    Load complete plugin configuration from multiple sources.
+
+    Configuration is loaded and merged in this order:
+    1. Plugin-specific config.yml (non-secret settings)
+    2. Expand environment variables from .env (secrets)
+    3. Merge orchestration settings from config/plugins.yml (enabled, events, condition)
+
+    Args:
+        plugin_id: Plugin identifier (e.g., 'email_summarizer')
+        orchestration_config: Orchestration settings from config/plugins.yml
+
+    Returns:
+        Complete merged plugin configuration
+
+    Example:
+        >>> load_plugin_config('email_summarizer', {'enabled': True, 'events': [...]})
+        {
+            'enabled': True,
+            'events': ['conversation.complete'],
+            'condition': {'type': 'always'},
+            'subject_prefix': 'Conversation Summary',
+            'smtp_host': 'smtp.gmail.com',  # Expanded from ${SMTP_HOST}
+            ...
+        }
+    """
+    config = {}
+
+    # 1. Load plugin-specific config.yml if it exists
+    try:
+        import advanced_omi_backend.plugins
+        plugins_dir = Path(advanced_omi_backend.plugins.__file__).parent
+        plugin_config_path = plugins_dir / plugin_id / "config.yml"
+
+        if plugin_config_path.exists():
+            logger.debug(f"Loading plugin config from: {plugin_config_path}")
+            with open(plugin_config_path, 'r') as f:
+                plugin_config = yaml.safe_load(f) or {}
+                config.update(plugin_config)
+                logger.debug(f"Loaded {len(plugin_config)} config keys for '{plugin_id}'")
+        else:
+            logger.debug(f"No config.yml found for plugin '{plugin_id}' at {plugin_config_path}")
+
+    except Exception as e:
+        logger.warning(f"Failed to load config.yml for plugin '{plugin_id}': {e}")
+
+    # 2. Expand environment variables (reads from .env)
+    config = expand_env_vars(config)
+
+    # 3. Merge orchestration settings from config/plugins.yml
+    config['enabled'] = orchestration_config.get('enabled', False)
+    config['events'] = orchestration_config.get('events', [])
+    config['condition'] = orchestration_config.get('condition', {'type': 'always'})
+
+    # Add plugin ID for reference
+    config['plugin_id'] = plugin_id
+
+    logger.debug(
+        f"Plugin '{plugin_id}' config merged: enabled={config['enabled']}, "
+        f"events={config['events']}, keys={list(config.keys())}"
+    )
+
+    return config
+
+
+def get_plugin_router() -> Optional[PluginRouter]:
+    """Get the global plugin router instance.
+
+    Returns:
+        Plugin router instance if initialized, None otherwise
+    """
+    global _plugin_router
+    return _plugin_router
+
+
+def set_plugin_router(router: PluginRouter) -> None:
+    """Set the global plugin router instance.
+
+    This should be called during app initialization in app_factory.py.
+
+    Args:
+        router: Initialized plugin router instance
+    """
+    global _plugin_router
+    _plugin_router = router
+    logger.info("Plugin router registered with plugin service")
+
+
+def extract_env_var_name(value: str) -> Optional[str]:
+    """Extract environment variable name from ${ENV_VAR} or ${ENV_VAR:-default} syntax.
+
+    Args:
+        value: String potentially containing ${ENV_VAR} reference
+
+    Returns:
+        Environment variable name if found, None otherwise
+
+    Examples:
+        >>> extract_env_var_name('${SMTP_HOST}')
+        'SMTP_HOST'
+        >>> extract_env_var_name('${SMTP_PORT:-587}')
+        'SMTP_PORT'
+        >>> extract_env_var_name('plain text')
+        None
+    """
+    if not isinstance(value, str):
+        return None
+
+    match = re.search(r'\$\{([^}:]+)', value)
+    if match:
+        return match.group(1).strip()
+    return None
+
+
+def infer_field_type(key: str, value: Any) -> Dict[str, Any]:
+    """Infer field schema from config key and value.
+
+    Args:
+        key: Configuration field key (e.g., 'smtp_password')
+        value: Configuration field value
+
+    Returns:
+        Field schema dictionary with type, label, default, etc.
+
+    Examples:
+        >>> infer_field_type('smtp_password', '${SMTP_PASSWORD}')
+        {'type': 'password', 'label': 'SMTP Password', 'secret': True, 'env_var': 'SMTP_PASSWORD', 'required': True}
+
+        >>> infer_field_type('max_sentences', 3)
+        {'type': 'number', 'label': 'Max Sentences', 'default': 3}
+    """
+    # Generate human-readable label from key
+    label = key.replace('_', ' ').title()
+
+    # Check for environment variable reference
+    if isinstance(value, str) and '${' in value:
+        env_var = extract_env_var_name(value)
+        if not env_var:
+            return {'type': 'string', 'label': label, 'default': value}
+
+        # Determine if this is a secret based on env var name
+        secret_keywords = ['PASSWORD', 'TOKEN', 'KEY', 'SECRET', 'APIKEY', 'API_KEY']
+        is_secret = any(keyword in env_var.upper() for keyword in secret_keywords)
+
+        # Extract default value if present (${VAR:-default})
+        default_value = None
+        if ':-' in value:
+            default_match = re.search(r':-([^}]+)', value)
+            if default_match:
+                default_value = default_match.group(1).strip()
+                # Try to parse boolean/number defaults
+                if default_value.lower() in ('true', 'false'):
+                    default_value = default_value.lower() == 'true'
+                elif default_value.isdigit():
+                    default_value = int(default_value)
+
+        schema = {
+            'type': 'password' if is_secret else 'string',
+            'label': label,
+            'secret': is_secret,
+            'env_var': env_var,
+            'required': is_secret,  # Secrets are required
+        }
+
+        if default_value is not None:
+            schema['default'] = default_value
+            schema['required'] = False
+
+        return schema
+
+    # Boolean values
+    elif isinstance(value, bool):
+        return {'type': 'boolean', 'label': label, 'default': value}
+
+    # Numeric values
+    elif isinstance(value, int):
+        return {'type': 'number', 'label': label, 'default': value}
+
+    elif isinstance(value, float):
+        return {'type': 'number', 'label': label, 'default': value, 'step': 0.1}
+
+    # List values
+    elif isinstance(value, list):
+        return {'type': 'array', 'label': label, 'default': value}
+
+    # Object/dict values
+    elif isinstance(value, dict):
+        return {'type': 'object', 'label': label, 'default': value}
+
+    # String values (fallback)
+    else:
+        return {'type': 'string', 'label': label, 'default': str(value) if value is not None else ''}
+
+
+def load_schema_yml(plugin_id: str) -> Optional[Dict[str, Any]]:
+    """Load optional schema.yml override for a plugin.
+
+    Args:
+        plugin_id: Plugin identifier
+
+    Returns:
+        Schema dictionary if schema.yml exists, None otherwise
+    """
+    try:
+        import advanced_omi_backend.plugins
+        plugins_dir = Path(advanced_omi_backend.plugins.__file__).parent
+        schema_path = plugins_dir / plugin_id / "schema.yml"
+
+        if schema_path.exists():
+            logger.debug(f"Loading schema override from: {schema_path}")
+            with open(schema_path, 'r') as f:
+                return yaml.safe_load(f) or {}
+    except Exception as e:
+        logger.warning(f"Failed to load schema.yml for plugin '{plugin_id}': {e}")
+
+    return None
+
+
+def infer_schema_from_config(plugin_id: str, config_dict: Dict[str, Any]) -> Dict[str, Any]:
+    """Infer configuration schema from plugin config.yml.
+
+    This function analyzes the config.yml file to generate a JSON schema
+    for rendering forms in the frontend. It can be overridden by providing
+    a schema.yml file in the plugin directory.
+
+    Args:
+        plugin_id: Plugin identifier
+        config_dict: Configuration dictionary from config.yml
+
+    Returns:
+        Schema dictionary with 'settings' and 'env_vars' sections
+
+    Example:
+        >>> config = {'subject_prefix': 'Summary', 'smtp_password': '${SMTP_PASSWORD}'}
+        >>> schema = infer_schema_from_config('email_summarizer', config)
+        >>> schema['settings']['subject_prefix']['type']
+        'string'
+        >>> schema['env_vars']['SMTP_PASSWORD']['type']
+        'password'
+    """
+    # Check for explicit schema.yml override
+    explicit_schema = load_schema_yml(plugin_id)
+    if explicit_schema:
+        logger.info(f"Using explicit schema.yml for plugin '{plugin_id}'")
+        return explicit_schema
+
+    # Infer schema from config values
+    settings_schema = {}
+    env_vars_schema = {}
+
+    for key, value in config_dict.items():
+        field_schema = infer_field_type(key, value)
+
+        # Separate env vars from regular settings
+        if field_schema.get('env_var'):
+            env_var_name = field_schema['env_var']
+            env_vars_schema[env_var_name] = field_schema
+        else:
+            settings_schema[key] = field_schema
+
+    return {
+        'settings': settings_schema,
+        'env_vars': env_vars_schema
+    }
+
+
+def mask_secrets_in_config(config: Dict[str, Any], schema: Dict[str, Any]) -> Dict[str, Any]:
+    """Mask secret values in configuration for frontend display.
+
+    Args:
+        config: Configuration dictionary with actual values
+        schema: Schema dictionary identifying secret fields
+
+    Returns:
+        Configuration with secrets masked as '••••••••••••'
+
+    Example:
+        >>> config = {'smtp_password': 'actual_password'}
+        >>> schema = {'env_vars': {'SMTP_PASSWORD': {'secret': True}}}
+        >>> masked = mask_secrets_in_config(config, schema)
+        >>> masked['smtp_password']
+        '••••••••••••'
+    """
+    masked_config = config.copy()
+
+    # Get list of secret environment variable names
+    secret_env_vars = set()
+    for env_var, field_schema in schema.get('env_vars', {}).items():
+        if field_schema.get('secret', False):
+            secret_env_vars.add(env_var)
+
+    # Mask values that reference secret environment variables
+    for key, value in masked_config.items():
+        if isinstance(value, str):
+            env_var = extract_env_var_name(value)
+            if env_var and env_var in secret_env_vars:
+                # Check if env var is actually set
+                is_set = bool(os.environ.get(env_var))
+                masked_config[key] = '••••••••••••' if is_set else ''
+
+    return masked_config
+
+
+def get_plugin_metadata(plugin_id: str, plugin_class: Type[BasePlugin],
+                       orchestration_config: Dict[str, Any]) -> Dict[str, Any]:
+    """Get complete metadata for a plugin including schema and current config.
+
+    Args:
+        plugin_id: Plugin identifier
+        plugin_class: Plugin class type
+        orchestration_config: Orchestration config from plugins.yml
+
+    Returns:
+        Complete plugin metadata for frontend
+    """
+    # Load plugin config.yml
+    try:
+        import advanced_omi_backend.plugins
+        plugins_dir = Path(advanced_omi_backend.plugins.__file__).parent
+        plugin_config_path = plugins_dir / plugin_id / "config.yml"
+
+        config_dict = {}
+        if plugin_config_path.exists():
+            with open(plugin_config_path, 'r') as f:
+                config_dict = yaml.safe_load(f) or {}
+    except Exception as e:
+        logger.error(f"Failed to load config for plugin '{plugin_id}': {e}")
+        config_dict = {}
+
+    # Infer schema
+    config_schema = infer_schema_from_config(plugin_id, config_dict)
+
+    # Get plugin metadata from class
+    plugin_name = getattr(plugin_class, 'name', plugin_id.replace('_', ' ').title())
+    plugin_description = getattr(plugin_class, 'description', '')
+    supports_testing = hasattr(plugin_class, 'test_connection')
+
+    # Mask secrets in current config
+    current_config = load_plugin_config(plugin_id, orchestration_config)
+    masked_config = mask_secrets_in_config(current_config, config_schema)
+
+    # Mark which env vars are set
+    for env_var_name, env_var_schema in config_schema.get('env_vars', {}).items():
+        env_var_schema['is_set'] = bool(os.environ.get(env_var_name))
+        if env_var_schema.get('secret') and env_var_schema['is_set']:
+            env_var_schema['value'] = '••••••••••••'
+        else:
+            env_var_schema['value'] = os.environ.get(env_var_name, '')
+
+    return {
+        'plugin_id': plugin_id,
+        'name': plugin_name,
+        'description': plugin_description,
+        'enabled': orchestration_config.get('enabled', False),
+        'status': 'active' if orchestration_config.get('enabled', False) else 'disabled',
+        'supports_testing': supports_testing,
+        'config_schema': config_schema,
+        'current_config': masked_config,
+        'orchestration': {
+            'enabled': orchestration_config.get('enabled', False),
+            'events': orchestration_config.get('events', []),
+            'condition': orchestration_config.get('condition', {'type': 'always'})
+        }
+    }
+
+
+def discover_plugins() -> Dict[str, Type[BasePlugin]]:
+    """
+    Discover plugins in the plugins directory.
+
+    Scans the plugins directory for subdirectories containing plugin.py files.
+    Each plugin must:
+    1. Have a plugin.py file with a class inheriting from BasePlugin
+    2. Export the plugin class in __init__.py
+    3. Plugin class name should match directory name in PascalCase
+
+    Returns:
+        Dictionary mapping plugin_id (directory name) to plugin class
+
+    Example:
+        plugins/
+        ├── email_summarizer/
+        │   ├── __init__.py  (exports EmailSummarizerPlugin)
+        │   └── plugin.py    (defines EmailSummarizerPlugin)
+
+        Returns: {'email_summarizer': EmailSummarizerPlugin}
+    """
+    discovered_plugins = {}
+
+    # Get the plugins directory path
+    try:
+        import advanced_omi_backend.plugins
+        plugins_dir = Path(advanced_omi_backend.plugins.__file__).parent
+    except Exception as e:
+        logger.error(f"Failed to locate plugins directory: {e}")
+        return discovered_plugins
+
+    logger.info(f"🔍 Scanning for plugins in: {plugins_dir}")
+
+    # Skip these known system directories/files
+    skip_items = {'__pycache__', '__init__.py', 'base.py', 'router.py'}
+
+    # Scan for plugin directories
+    for item in plugins_dir.iterdir():
+        if not item.is_dir() or item.name in skip_items:
+            continue
+
+        plugin_id = item.name
+        plugin_file = item / 'plugin.py'
+
+        if not plugin_file.exists():
+            logger.debug(f"Skipping '{plugin_id}': no plugin.py found")
+            continue
+
+        try:
+            # Convert snake_case directory name to PascalCase class name
+            # e.g., email_summarizer -> EmailSummarizerPlugin
+            class_name = ''.join(word.capitalize() for word in plugin_id.split('_')) + 'Plugin'
+
+            # Import the plugin module
+            module_path = f'advanced_omi_backend.plugins.{plugin_id}'
+            logger.debug(f"Attempting to import plugin from: {module_path}")
+
+            # Import the plugin package (which should export the class in __init__.py)
+            plugin_module = importlib.import_module(module_path)
+
+            # Try to get the plugin class
+            if not hasattr(plugin_module, class_name):
+                logger.warning(
+                    f"Plugin '{plugin_id}' does not export '{class_name}' in __init__.py. "
+                    f"Make sure the class is exported: from .plugin import {class_name}"
+                )
+                continue
+
+            plugin_class = getattr(plugin_module, class_name)
+
+            # Validate it's a class and inherits from BasePlugin
+            if not inspect.isclass(plugin_class):
+                logger.warning(f"'{class_name}' in '{plugin_id}' is not a class")
+                continue
+
+            if not issubclass(plugin_class, BasePlugin):
+                logger.warning(
+                    f"Plugin class '{class_name}' in '{plugin_id}' does not inherit from BasePlugin"
+                )
+                continue
+
+            # Successfully discovered plugin
+            discovered_plugins[plugin_id] = plugin_class
+            logger.info(f"✅ Discovered plugin: '{plugin_id}' ({class_name})")
+
+        except ImportError as e:
+            logger.warning(f"Failed to import plugin '{plugin_id}': {e}")
+        except Exception as e:
+            logger.error(f"Error discovering plugin '{plugin_id}': {e}", exc_info=True)
+
+    logger.info(f"🎉 Plugin discovery complete: {len(discovered_plugins)} plugin(s) found")
+    return discovered_plugins
+
+
+def init_plugin_router() -> Optional[PluginRouter]:
+    """Initialize the plugin router from configuration.
+
+    This is called during app startup to create and register the plugin router.
+
+    Returns:
+        Initialized plugin router, or None if no plugins configured
+    """
+    global _plugin_router
+
+    if _plugin_router is not None:
+        logger.warning("Plugin router already initialized")
+        return _plugin_router
+
+    try:
+        _plugin_router = PluginRouter()
+
+        # Load plugin configuration
+        plugins_yml = get_plugins_yml_path()
+        logger.info(f"🔍 Looking for plugins config at: {plugins_yml}")
+        logger.info(f"🔍 File exists: {plugins_yml.exists()}")
+
+        if plugins_yml.exists():
+            with open(plugins_yml, 'r') as f:
+                plugins_config = yaml.safe_load(f)
+                # Expand environment variables in configuration
+                plugins_config = expand_env_vars(plugins_config)
+                plugins_data = plugins_config.get('plugins', {})
+
+            logger.info(f"🔍 Loaded plugins config with {len(plugins_data)} plugin(s): {list(plugins_data.keys())}")
+
+            # Discover all plugins via auto-discovery
+            discovered_plugins = discover_plugins()
+
+            # Core plugin names (for informational logging only)
+            CORE_PLUGIN_NAMES = {'homeassistant', 'test_event'}
+
+            # Initialize each plugin listed in config/plugins.yml
+            for plugin_id, orchestration_config in plugins_data.items():
+                logger.info(f"🔍 Processing plugin '{plugin_id}', enabled={orchestration_config.get('enabled', False)}")
+                if not orchestration_config.get('enabled', False):
+                    continue
+
+                try:
+                    # Check if plugin was discovered
+                    if plugin_id not in discovered_plugins:
+                        logger.warning(
+                            f"Plugin '{plugin_id}' not found. "
+                            f"Make sure the plugin directory exists in plugins/ with proper structure."
+                        )
+                        continue
+
+                    # Load complete plugin configuration (merges plugin config.yml + .env + orchestration)
+                    plugin_config = load_plugin_config(plugin_id, orchestration_config)
+
+                    # Get plugin class from discovered plugins
+                    plugin_class = discovered_plugins[plugin_id]
+                    plugin_type = "core" if plugin_id in CORE_PLUGIN_NAMES else "community"
+
+                    # Instantiate and register the plugin
+                    plugin = plugin_class(plugin_config)
+                    # Note: async initialization happens in app_factory lifespan
+                    _plugin_router.register_plugin(plugin_id, plugin)
+                    logger.info(f"✅ Plugin '{plugin_id}' registered successfully ({plugin_type})")
+
+                except Exception as e:
+                    logger.error(f"Failed to register plugin '{plugin_id}': {e}", exc_info=True)
+
+            logger.info(f"🎉 Plugin registration complete: {len(_plugin_router.plugins)} plugin(s) registered")
+        else:
+            logger.info("No plugins.yml found, plugins disabled")
+
+        return _plugin_router
+
+    except Exception as e:
+        logger.error(f"Failed to initialize plugin router: {e}", exc_info=True)
+        _plugin_router = None
+        return None
+
+
+async def cleanup_plugin_router() -> None:
+    """Clean up the plugin router and all registered plugins."""
+    global _plugin_router
+
+    if _plugin_router:
+        try:
+            await _plugin_router.cleanup_all()
+            logger.info("Plugin router cleanup complete")
+        except Exception as e:
+            logger.error(f"Error during plugin router cleanup: {e}")
+        finally:
+            _plugin_router = None
diff --git a/backends/advanced/src/advanced_omi_backend/services/transcription/__init__.py b/backends/advanced/src/advanced_omi_backend/services/transcription/__init__.py
index 2e20171b..99b79a6f 100644
--- a/backends/advanced/src/advanced_omi_backend/services/transcription/__init__.py
+++ b/backends/advanced/src/advanced_omi_backend/services/transcription/__init__.py
@@ -10,6 +10,7 @@
 import json
 import logging
 from typing import Optional
+from urllib.parse import urlencode
 
 import httpx
 import websockets
@@ -69,6 +70,12 @@ def name(self) -> str:
         return self._name
 
     async def transcribe(self, audio_data: bytes, sample_rate: int, diarize: bool = False) -> dict:
+        # Special handling for mock provider (no HTTP server needed)
+        if self.model.model_provider == "mock":
+            from .mock_provider import MockTranscriptionProvider
+            mock = MockTranscriptionProvider(fail_mode=False)
+            return await mock.transcribe(audio_data, sample_rate, diarize)
+
         op = (self.model.operations or {}).get("stt_transcribe") or {}
         method = (op.get("method") or "POST").upper()
         path = (op.get("path") or "/listen")
@@ -131,7 +138,7 @@ async def transcribe(self, audio_data: bytes, sample_rate: int, diarize: bool =
                 channels = data["results"]["channels"]
                 if channels and "alternatives" in channels[0]:
                     alt = channels[0]["alternatives"][0]
-                    logger.info(f"DEBUG Registry: Deepgram alternative keys: {list(alt.keys())}")
+                    logger.debug(f"DEBUG Registry: Deepgram alternative keys: {list(alt.keys())}")
 
         # Extract normalized shape
         text, words, segments = "", [], []
@@ -141,11 +148,9 @@ async def transcribe(self, audio_data: bytes, sample_rate: int, diarize: bool =
             words = _dotted_get(data, extract.get("words")) or []
             segments = _dotted_get(data, extract.get("segments")) or []
 
-            # DEBUG: Log what we extracted
-            logger.info(f"DEBUG Registry: Extracted {len(segments)} segments from response")
-            if segments and len(segments) > 0:
-                logger.info(f"DEBUG Registry: First segment keys: {list(segments[0].keys()) if isinstance(segments[0], dict) else 'not a dict'}")
-                logger.info(f"DEBUG Registry: First segment: {segments[0]}")
+            # Ignore segments from all providers - speaker service creates them via diarization
+            segments = []
+            logger.debug(f"Transcription: Extracted {len(words)} words, ignoring provider segments (speaker service will create them)")
 
         return {"text": text, "words": words, "segments": segments}
 
@@ -167,26 +172,65 @@ def __init__(self):
     def name(self) -> str:
         return self._name
 
+    async def transcribe(self, audio_data: bytes, sample_rate: int, **kwargs) -> dict:
+        """Not used for streaming providers - use start_stream/process_audio_chunk/end_stream instead."""
+        raise NotImplementedError("Streaming providers do not support batch transcription")
+
     async def start_stream(self, client_id: str, sample_rate: int = 16000, diarize: bool = False):
-        url = self.model.model_url
+        base_url = self.model.model_url
         ops = self.model.operations or {}
+
+        # Build WebSocket URL with query parameters (for Deepgram streaming)
+        query_params = ops.get("query", {})
+        query_dict = dict(query_params) if query_params else {}
+
+        # Override sample_rate if provided
+        if sample_rate and "sample_rate" in query_dict:
+            query_dict["sample_rate"] = sample_rate
+        if diarize and "diarize" in query_dict:
+            query_dict["diarize"] = "true"
+
+        # Normalize boolean values to lowercase strings (Deepgram expects "true"/"false", not "True"/"False")
+        normalized_query = {}
+        for k, v in query_dict.items():
+            if isinstance(v, bool):
+                normalized_query[k] = "true" if v else "false"
+            else:
+                normalized_query[k] = v
+
+        # Build query string with proper URL encoding (NO token in query)
+        query_str = urlencode(normalized_query)
+        url = f"{base_url}?{query_str}" if query_str else base_url
+
+        # Debug: Log the URL
+        logger.info(f"🔗 Connecting to Deepgram WebSocket: {url}")
+
+        # Connect to WebSocket with Authorization header (Deepgram requires this for server-side connections)
+        headers = {}
+        if self.model.api_key:
+            headers["Authorization"] = f"Token {self.model.api_key}"
+
+        ws = await websockets.connect(url, additional_headers=headers)
+
+        # Send start message if required by provider
         start_msg = (ops.get("start", {}) or {}).get("message", {})
-        # Inject session_id if placeholder present
-        start_msg = json.loads(json.dumps(start_msg))  # deep copy
-        start_msg.setdefault("session_id", client_id)
-        # Apply sample rate and diarization if present
-        if "config" in start_msg and isinstance(start_msg["config"], dict):
-            start_msg["config"].setdefault("sample_rate", sample_rate)
-            if diarize:
-                start_msg["config"]["diarize"] = True
-
-        ws = await websockets.connect(url, open_timeout=10)
-        await ws.send(json.dumps(start_msg))
-        # Wait for confirmation; non-fatal if not provided
-        try:
-            await asyncio.wait_for(ws.recv(), timeout=2.0)
-        except Exception:
-            pass
+        if start_msg:
+            # Inject session_id if placeholder present
+            start_msg = json.loads(json.dumps(start_msg))  # deep copy
+            start_msg.setdefault("session_id", client_id)
+            # Apply sample rate and diarization if present
+            if "config" in start_msg and isinstance(start_msg["config"], dict):
+                start_msg["config"].setdefault("sample_rate", sample_rate)
+                if diarize:
+                    start_msg["config"]["diarize"] = True
+            await ws.send(json.dumps(start_msg))
+
+            # Wait for confirmation; non-fatal if not provided
+            try:
+                await asyncio.wait_for(ws.recv(), timeout=2.0)
+            except Exception:
+                pass
+
         self._streams[client_id] = {"ws": ws, "sample_rate": sample_rate, "final": None, "interim": []}
 
     async def process_audio_chunk(self, client_id: str, audio_chunk: bytes) -> dict | None:
@@ -194,26 +238,67 @@ async def process_audio_chunk(self, client_id: str, audio_chunk: bytes) -> dict
             return None
         ws = self._streams[client_id]["ws"]
         ops = self.model.operations or {}
+
+        # Send chunk header if required (for providers like Parakeet)
         chunk_hdr = (ops.get("chunk_header", {}) or {}).get("message", {})
-        hdr = json.loads(json.dumps(chunk_hdr))
-        hdr.setdefault("type", "audio_chunk")
-        hdr.setdefault("session_id", client_id)
-        hdr.setdefault("rate", self._streams[client_id]["sample_rate"])
-        await ws.send(json.dumps(hdr))
+        if chunk_hdr:
+            hdr = json.loads(json.dumps(chunk_hdr))
+            hdr.setdefault("type", "audio_chunk")
+            hdr.setdefault("session_id", client_id)
+            hdr.setdefault("rate", self._streams[client_id]["sample_rate"])
+            await ws.send(json.dumps(hdr))
+
+        # Send audio chunk (raw bytes for Deepgram, or after header for others)
         await ws.send(audio_chunk)
 
-        # Non-blocking read for interim results
+        # Non-blocking read for results
         expect = (ops.get("expect", {}) or {})
+        extract = expect.get("extract", {})
         interim_type = expect.get("interim_type")
+        final_type = expect.get("final_type")
+
         try:
-            while True:
-                msg = await asyncio.wait_for(ws.recv(), timeout=0.01)
-                data = json.loads(msg)
-                if interim_type and data.get("type") == interim_type:
-                    self._streams[client_id]["interim"].append(data)
+            # Try to read a message (non-blocking)
+            msg = await asyncio.wait_for(ws.recv(), timeout=0.05)
+            data = json.loads(msg)
+
+            # Determine if this is interim or final result
+            is_final = False
+            if final_type and data.get("type") == final_type:
+                # Check if Deepgram marks it as final
+                is_final = data.get("is_final", False)
+            elif interim_type and data.get("type") == interim_type:
+                is_final = data.get("is_final", False)
+
+            # Extract result data
+            text = _dotted_get(data, extract.get("text")) if extract.get("text") else data.get("text", "")
+            words = _dotted_get(data, extract.get("words")) if extract.get("words") else data.get("words", [])
+            segments = _dotted_get(data, extract.get("segments")) if extract.get("segments") else data.get("segments", [])
+
+            # Calculate confidence if available
+            confidence = data.get("confidence", 0.0)
+            if not confidence and words and isinstance(words, list):
+                # Calculate average word confidence
+                confidences = [w.get("confidence", 0.0) for w in words if isinstance(w, dict) and "confidence" in w]
+                if confidences:
+                    confidence = sum(confidences) / len(confidences)
+
+            # Return result with is_final flag
+            # Consumer decides what to do with interim vs final
+            return {
+                "text": text,
+                "words": words,
+                "segments": segments,
+                "is_final": is_final,
+                "confidence": confidence
+            }
+
         except asyncio.TimeoutError:
-            pass
-        return None
+            # No message available yet
+            return None
+        except Exception as e:
+            logger.error(f"Error processing audio chunk result for {client_id}: {e}")
+            return None
 
     async def end_stream(self, client_id: str) -> dict:
         if client_id not in self._streams:
@@ -280,8 +365,36 @@ def get_transcription_provider(provider_name: Optional[str] = None, mode: Option
     return RegistryBatchTranscriptionProvider()
 
 
+def is_transcription_available(mode: str = "batch") -> bool:
+    """Check if transcription provider is available for given mode.
+
+    Args:
+        mode: Either "batch" or "streaming"
+
+    Returns:
+        True if a transcription provider is configured and available, False otherwise
+    """
+    provider = get_transcription_provider(mode=mode)
+    return provider is not None
+
+
+def get_mock_transcription_provider(fail_mode: bool = False) -> BaseTranscriptionProvider:
+    """Return a mock transcription provider (for testing only).
+
+    Args:
+        fail_mode: If True, transcribe() will raise an exception to simulate transcription failure
+
+    Returns:
+        MockTranscriptionProvider instance
+    """
+    from .mock_provider import MockTranscriptionProvider
+    return MockTranscriptionProvider(fail_mode=fail_mode)
+
+
 __all__ = [
     "get_transcription_provider",
+    "is_transcription_available",
+    "get_mock_transcription_provider",
     "RegistryBatchTranscriptionProvider",
     "RegistryStreamingTranscriptionProvider",
     "BaseTranscriptionProvider",
diff --git a/backends/advanced/src/advanced_omi_backend/services/transcription/base.py b/backends/advanced/src/advanced_omi_backend/services/transcription/base.py
index 13893a68..7d0f2306 100644
--- a/backends/advanced/src/advanced_omi_backend/services/transcription/base.py
+++ b/backends/advanced/src/advanced_omi_backend/services/transcription/base.py
@@ -36,7 +36,6 @@ class TranscriptionProvider(Enum):
     """Available transcription providers for audio stream routing."""
     DEEPGRAM = "deepgram"
     PARAKEET = "parakeet"
-    MISTRAL = "mistral"
 
 
 class BaseTranscriptionProvider(abc.ABC):
diff --git a/backends/advanced/src/advanced_omi_backend/services/transcription/deepgram.py b/backends/advanced/src/advanced_omi_backend/services/transcription/deepgram.py
deleted file mode 100644
index ef54a3d9..00000000
--- a/backends/advanced/src/advanced_omi_backend/services/transcription/deepgram.py
+++ /dev/null
@@ -1,92 +0,0 @@
-"""
-Deepgram transcription consumer for Redis Streams architecture.
-
-Uses the registry-driven transcription provider for Deepgram batch transcription.
-"""
-
-import logging
-
-from advanced_omi_backend.services.audio_stream.consumer import BaseAudioStreamConsumer
-from advanced_omi_backend.services.transcription import get_transcription_provider
-
-logger = logging.getLogger(__name__)
-
-
-class DeepgramStreamConsumer:
-    """
-    Deepgram consumer for Redis Streams architecture.
-
-    Reads from: specified stream (client-specific or provider-specific)
-    Writes to: transcription:results:{session_id}
-
-    Uses RegistryBatchTranscriptionProvider configured via config.yml for
-    Deepgram transcription. This ensures consistent behavior with batch
-    transcription jobs.
-    """
-
-    def __init__(self, redis_client, buffer_chunks: int = 30):
-        """
-        Initialize Deepgram consumer.
-
-        Dynamically discovers all audio:stream:* streams and claims them using Redis locks.
-        Uses config.yml stt-deepgram configuration for transcription.
-
-        Args:
-            redis_client: Connected Redis client
-            buffer_chunks: Number of chunks to buffer before transcribing (default: 30 = ~7.5s)
-        """
-
-        # Get registry-driven transcription provider
-        self.provider = get_transcription_provider(mode="batch")
-        if not self.provider:
-            raise RuntimeError(
-                "Failed to load transcription provider. Ensure config.yml has a default 'stt' model configured."
-            )
-
-        # Create a concrete subclass that implements transcribe_audio
-        class _ConcreteConsumer(BaseAudioStreamConsumer):
-            def __init__(inner_self, provider_name: str, redis_client, buffer_chunks: int):
-                super().__init__(provider_name, redis_client, buffer_chunks)
-                inner_self._transcription_provider = self.provider
-
-            async def transcribe_audio(inner_self, audio_data: bytes, sample_rate: int) -> dict:
-                """Transcribe using registry-driven transcription provider."""
-                try:
-                    result = await inner_self._transcription_provider.transcribe(
-                        audio_data=audio_data,
-                        sample_rate=sample_rate,
-                        diarize=True
-                    )
-
-                    # Calculate confidence
-                    confidence = 0.0
-                    if result.get("words"):
-                        confidences = [
-                            w.get("confidence", 0)
-                            for w in result["words"]
-                            if "confidence" in w
-                        ]
-                        if confidences:
-                            confidence = sum(confidences) / len(confidences)
-
-                    return {
-                        "text": result.get("text", ""),
-                        "words": result.get("words", []),
-                        "segments": result.get("segments", []),
-                        "confidence": confidence
-                    }
-
-                except Exception as e:
-                    logger.error(f"Deepgram transcription failed: {e}", exc_info=True)
-                    raise
-
-        # Instantiate the concrete consumer
-        self._consumer = _ConcreteConsumer("deepgram", redis_client, buffer_chunks)
-
-    async def start_consuming(self):
-        """Delegate to base consumer."""
-        return await self._consumer.start_consuming()
-
-    async def stop(self):
-        """Delegate to base consumer."""
-        return await self._consumer.stop()
diff --git a/backends/advanced/src/advanced_omi_backend/services/transcription/mock_provider.py b/backends/advanced/src/advanced_omi_backend/services/transcription/mock_provider.py
new file mode 100644
index 00000000..04b192df
--- /dev/null
+++ b/backends/advanced/src/advanced_omi_backend/services/transcription/mock_provider.py
@@ -0,0 +1,111 @@
+"""
+Mock transcription provider for testing without external API dependencies.
+
+This provider returns predefined transcripts for testing purposes, allowing
+tests to run without Deepgram or other external transcription APIs.
+"""
+
+from typing import Optional
+from .base import BatchTranscriptionProvider
+
+
+class MockTranscriptionProvider(BatchTranscriptionProvider):
+    """
+    Mock transcription provider for testing.
+
+    Returns predefined transcripts with word-level timestamps.
+    Useful for testing API contracts and data flow without external APIs.
+    """
+
+    def __init__(self, fail_mode: bool = False):
+        """
+        Initialize the mock transcription provider.
+
+        Args:
+            fail_mode: If True, transcribe() will raise an exception to simulate transcription failure
+        """
+        self._is_connected = False
+        self.fail_mode = fail_mode
+
+    @property
+    def name(self) -> str:
+        """Return the provider name for logging."""
+        return "mock"
+
+    async def transcribe(self, audio_data: bytes, sample_rate: int, diarize: bool = False) -> dict:
+        """
+        Return a predefined mock transcript or raise exception in fail mode.
+
+        Args:
+            audio_data: Raw audio bytes (ignored in mock)
+            sample_rate: Audio sample rate (ignored in mock)
+            diarize: Whether to enable speaker diarization (ignored in mock)
+
+        Returns:
+            Dictionary containing predefined transcript with words and segments
+
+        Raises:
+            RuntimeError: If fail_mode is True (simulates transcription failure)
+        """
+        # Simulate transcription failure if fail_mode is enabled
+        if self.fail_mode:
+            raise RuntimeError("Mock transcription failure (test mode)")
+
+        # Calculate audio duration from bytes (assuming 16-bit PCM)
+        audio_duration = len(audio_data) / (sample_rate * 2)  # 2 bytes per sample
+
+        # Return a mock transcript with word-level timestamps
+        # This simulates a real transcription result
+        # Note: Made longer to pass test requirements (>100 chars)
+        mock_transcript = (
+            "This is a mock transcription for testing purposes. "
+            "It contains enough words to meet minimum length requirements for automated testing."
+        )
+
+        # Generate mock words with timestamps (spread across audio duration)
+        words = [
+            {"word": "This", "start": 0.0, "end": 0.3, "confidence": 0.99, "speaker": 0},
+            {"word": "is", "start": 0.3, "end": 0.5, "confidence": 0.99, "speaker": 0},
+            {"word": "a", "start": 0.5, "end": 0.6, "confidence": 0.99, "speaker": 0},
+            {"word": "mock", "start": 0.6, "end": 0.9, "confidence": 0.99, "speaker": 0},
+            {"word": "transcription", "start": 0.9, "end": 1.5, "confidence": 0.98, "speaker": 0},
+            {"word": "for", "start": 1.5, "end": 1.7, "confidence": 0.99, "speaker": 0},
+            {"word": "testing", "start": 1.7, "end": 2.1, "confidence": 0.99, "speaker": 0},
+            {"word": "purposes", "start": 2.1, "end": 2.6, "confidence": 0.97, "speaker": 0},
+            {"word": "It", "start": 2.6, "end": 2.8, "confidence": 0.99, "speaker": 0},
+            {"word": "contains", "start": 2.8, "end": 3.2, "confidence": 0.99, "speaker": 0},
+            {"word": "enough", "start": 3.2, "end": 3.5, "confidence": 0.99, "speaker": 0},
+            {"word": "words", "start": 3.5, "end": 3.8, "confidence": 0.99, "speaker": 0},
+            {"word": "to", "start": 3.8, "end": 3.9, "confidence": 0.99, "speaker": 0},
+            {"word": "meet", "start": 3.9, "end": 4.1, "confidence": 0.99, "speaker": 0},
+            {"word": "minimum", "start": 4.1, "end": 4.5, "confidence": 0.98, "speaker": 0},
+            {"word": "length", "start": 4.5, "end": 4.8, "confidence": 0.99, "speaker": 0},
+            {"word": "requirements", "start": 4.8, "end": 5.4, "confidence": 0.98, "speaker": 0},
+            {"word": "for", "start": 5.4, "end": 5.6, "confidence": 0.99, "speaker": 0},
+            {"word": "automated", "start": 5.6, "end": 6.1, "confidence": 0.98, "speaker": 0},
+            {"word": "testing", "start": 6.1, "end": 6.5, "confidence": 0.99, "speaker": 0},
+        ]
+
+        # Mock segments (single speaker for simplicity)
+        segments = [
+            {
+                "speaker": 0,
+                "start": 0.0,
+                "end": 6.5,
+                "text": mock_transcript
+            }
+        ]
+
+        return {
+            "text": mock_transcript,
+            "words": words,
+            "segments": segments if diarize else []
+        }
+
+    async def connect(self, client_id: Optional[str] = None):
+        """Initialize the mock provider (no-op)."""
+        self._is_connected = True
+
+    async def disconnect(self):
+        """Cleanup the mock provider (no-op)."""
+        self._is_connected = False
diff --git a/backends/advanced/src/advanced_omi_backend/services/transcription/parakeet_stream_consumer.py b/backends/advanced/src/advanced_omi_backend/services/transcription/parakeet_stream_consumer.py
deleted file mode 100644
index f629cefd..00000000
--- a/backends/advanced/src/advanced_omi_backend/services/transcription/parakeet_stream_consumer.py
+++ /dev/null
@@ -1,90 +0,0 @@
-"""
-Parakeet stream consumer for Redis Streams architecture.
-
-Reads from: audio:stream:* streams
-Writes to: transcription:results:{session_id}
-"""
-
-import logging
-
-from advanced_omi_backend.services.audio_stream.consumer import BaseAudioStreamConsumer
-from advanced_omi_backend.services.transcription import get_transcription_provider
-
-logger = logging.getLogger(__name__)
-
-
-class ParakeetStreamConsumer:
-    """
-    Parakeet consumer for Redis Streams architecture.
-
-    Reads from: specified stream (client-specific or provider-specific)
-    Writes to: transcription:results:{session_id}
-
-    This inherits from BaseAudioStreamConsumer and implements transcribe_audio().
-    """
-
-    def __init__(self, redis_client, buffer_chunks: int = 30):
-        """
-        Initialize Parakeet consumer.
-
-        Dynamically discovers all audio:stream:* streams and claims them using Redis locks.
-        Uses config.yml stt-parakeet-batch configuration for transcription.
-
-        Args:
-            redis_client: Connected Redis client
-            buffer_chunks: Number of chunks to buffer before transcribing (default: 30 = ~7.5s)
-        """
-        # Get registry-driven transcription provider
-        self.provider = get_transcription_provider(mode="batch")
-        if not self.provider:
-            raise RuntimeError(
-                "Failed to load transcription provider. Ensure config.yml has a default 'stt' model configured."
-            )
-
-        # Create a concrete subclass that implements transcribe_audio
-        class _ConcreteConsumer(BaseAudioStreamConsumer):
-            def __init__(inner_self, provider_name: str, redis_client, buffer_chunks: int):
-                super().__init__(provider_name, redis_client, buffer_chunks)
-                inner_self._parakeet_provider = self.provider
-
-            async def transcribe_audio(inner_self, audio_data: bytes, sample_rate: int) -> dict:
-                """Transcribe using ParakeetProvider."""
-                try:
-                    result = await inner_self._parakeet_provider.transcribe(
-                        audio_data=audio_data,
-                        sample_rate=sample_rate
-                    )
-
-                    # Calculate confidence (Parakeet may not provide confidence, default to 0.9)
-                    confidence = 0.9
-                    if result.get("words"):
-                        confidences = [
-                            w.get("confidence", 0.9)
-                            for w in result["words"]
-                            if "confidence" in w
-                        ]
-                        if confidences:
-                            confidence = sum(confidences) / len(confidences)
-
-                    return {
-                        "text": result.get("text", ""),
-                        "words": result.get("words", []),
-                        "segments": result.get("segments", []),
-                        "confidence": confidence
-                    }
-
-                except Exception as e:
-                    logger.error(f"Parakeet transcription failed: {e}", exc_info=True)
-                    raise
-
-        # Instantiate the concrete consumer
-        self._consumer = _ConcreteConsumer("parakeet", redis_client, buffer_chunks)
-
-    async def start_consuming(self):
-        """Delegate to base consumer."""
-        return await self._consumer.start_consuming()
-
-    async def stop(self):
-        """Delegate to base consumer."""
-        return await self._consumer.stop()
-
diff --git a/backends/advanced/src/advanced_omi_backend/services/transcription/streaming_consumer.py b/backends/advanced/src/advanced_omi_backend/services/transcription/streaming_consumer.py
new file mode 100644
index 00000000..b6c05ae8
--- /dev/null
+++ b/backends/advanced/src/advanced_omi_backend/services/transcription/streaming_consumer.py
@@ -0,0 +1,548 @@
+"""
+Generic streaming transcription consumer for real-time audio processing.
+
+Uses registry-driven transcription provider from config.yml (supports any streaming provider).
+
+Reads from: audio:stream:* streams
+Publishes interim to: Redis Pub/Sub channel transcription:interim:{session_id}
+Writes final to: transcription:results:{session_id} Redis Stream
+Triggers plugins: streaming_transcript level (final results only)
+"""
+
+import asyncio
+import json
+import logging
+import os
+import time
+from typing import Dict, Optional
+
+import redis.asyncio as redis
+from redis import exceptions as redis_exceptions
+
+from advanced_omi_backend.plugins.router import PluginRouter
+from advanced_omi_backend.services.transcription import get_transcription_provider
+from advanced_omi_backend.client_manager import get_client_owner_async
+
+logger = logging.getLogger(__name__)
+
+
+class StreamingTranscriptionConsumer:
+    """
+    Generic streaming transcription consumer using registry-driven providers.
+
+    - Discovers audio:stream:* streams dynamically
+    - Uses Redis consumer groups for fan-out (allows batch workers to process same stream)
+    - Starts WebSocket connections using configured provider (from config.yml)
+    - Sends audio immediately (no buffering)
+    - Publishes interim results to Redis Pub/Sub for client display
+    - Publishes final results to Redis Streams for storage
+    - Triggers plugins only on final results
+
+    Supported providers (via config.yml): Any streaming STT service with WebSocket API
+    """
+
+    def __init__(self, redis_client: redis.Redis, plugin_router: Optional[PluginRouter] = None):
+        """
+        Initialize streaming transcription consumer.
+
+        Args:
+            redis_client: Connected Redis client
+            plugin_router: Plugin router for triggering plugins on final results
+        """
+        self.redis_client = redis_client
+        self.plugin_router = plugin_router
+
+        # Get streaming transcription provider from registry
+        self.provider = get_transcription_provider(mode="streaming")
+        if not self.provider:
+            raise RuntimeError(
+                "Failed to load streaming transcription provider. "
+                "Ensure config.yml has a default 'stt_stream' model configured."
+            )
+
+        # Stream configuration
+        self.stream_pattern = "audio:stream:*"
+        self.group_name = "streaming-transcription"
+        self.consumer_name = f"streaming-worker-{os.getpid()}"
+
+        self.running = False
+
+        # Active stream tracking - consumer groups handle fan-out
+        self.active_streams: Dict[str, Dict] = {}  # {stream_name: {"session_id": ...}}
+
+        # Session tracking for WebSocket connections
+        self.active_sessions: Dict[str, Dict] = {}  # {session_id: {"last_activity": timestamp}}
+
+    async def discover_streams(self) -> list[str]:
+        """
+        Discover all audio streams matching the pattern.
+
+        Returns:
+            List of stream names
+        """
+        streams = []
+        cursor = b"0"
+
+        while cursor:
+            cursor, keys = await self.redis_client.scan(
+                cursor, match=self.stream_pattern, count=100
+            )
+            if keys:
+                streams.extend([k.decode() if isinstance(k, bytes) else k for k in keys])
+
+        return streams
+
+    async def setup_consumer_group(self, stream_name: str):
+        """Create consumer group if it doesn't exist."""
+        try:
+            await self.redis_client.xgroup_create(
+                stream_name,
+                self.group_name,
+                "0",
+                mkstream=True
+            )
+            logger.debug(f"➡️ Created consumer group {self.group_name} for {stream_name}")
+        except redis_exceptions.ResponseError as e:
+            if "BUSYGROUP" not in str(e):
+                raise
+            logger.debug(f"➡️ Consumer group {self.group_name} already exists for {stream_name}")
+
+    async def start_session_stream(self, session_id: str, sample_rate: int = 16000):
+        """
+        Start WebSocket connection to Deepgram for a session.
+
+        Args:
+            session_id: Session ID (client_id from audio stream)
+            sample_rate: Audio sample rate in Hz
+        """
+        try:
+            await self.provider.start_stream(
+                client_id=session_id,
+                sample_rate=sample_rate,
+                diarize=False  # Deepgram streaming doesn't support diarization
+            )
+
+            self.active_sessions[session_id] = {
+                "last_activity": time.time(),
+                "sample_rate": sample_rate,
+                "audio_offset_seconds": 0.0  # Track cumulative audio duration for timestamp adjustment
+            }
+
+            logger.info(f"🎙️ Started Deepgram WebSocket stream for session: {session_id}")
+
+        except Exception as e:
+            logger.error(f"Failed to start Deepgram stream for {session_id}: {e}", exc_info=True)
+
+            # Set error flag in Redis so speech detection can detect failure early
+            session_key = f"audio:session:{session_id}"
+            try:
+                await self.redis_client.hset(session_key, "transcription_error", str(e))
+                logger.info(f"Set transcription error flag for {session_id}")
+            except Exception as redis_error:
+                logger.warning(f"Failed to set error flag in Redis: {redis_error}")
+
+            raise
+
+    async def end_session_stream(self, session_id: str):
+        """
+        End WebSocket connection to Deepgram for a session.
+
+        Args:
+            session_id: Session ID
+        """
+        try:
+            # Get final result from Deepgram
+            final_result = await self.provider.end_stream(client_id=session_id)
+
+            # If there's a final result, publish it
+            if final_result and final_result.get("text"):
+                await self.publish_to_client(session_id, final_result, is_final=True)
+                await self.store_final_result(session_id, final_result)
+
+                # Trigger plugins on final result
+                if self.plugin_router:
+                    await self.trigger_plugins(session_id, final_result)
+
+            self.active_sessions.pop(session_id, None)
+            logger.info(f"🛑 Ended Deepgram WebSocket stream for session: {session_id}")
+
+        except Exception as e:
+            logger.error(f"Error ending stream for {session_id}: {e}", exc_info=True)
+
+    async def process_audio_chunk(self, session_id: str, audio_chunk: bytes, chunk_id: str):
+        """
+        Process a single audio chunk through Deepgram WebSocket.
+
+        Args:
+            session_id: Session ID
+            audio_chunk: Raw audio bytes
+            chunk_id: Chunk identifier from Redis stream
+        """
+        try:
+            # Send audio chunk to Deepgram WebSocket and get result
+            result = await self.provider.process_audio_chunk(
+                client_id=session_id,
+                audio_chunk=audio_chunk
+            )
+
+            # Update last activity
+            if session_id in self.active_sessions:
+                self.active_sessions[session_id]["last_activity"] = time.time()
+
+            # Deepgram returns None if no response yet, or a dict with results
+            if result:
+                is_final = result.get("is_final", False)
+                text = result.get("text", "")
+                word_count = len(result.get("words", []))
+
+                # Track transcript at each step
+                logger.info(
+                    f"🔤 TRANSCRIPT [DEEPGRAM] session={session_id}, is_final={is_final}, "
+                    f"words={word_count}, text=\"{text}\""
+                )
+
+                # Always publish to clients (interim + final) for real-time display
+                await self.publish_to_client(session_id, result, is_final=is_final)
+
+                # If final result, also store and trigger plugins
+                if is_final:
+                    logger.info(
+                        f"🔤 TRANSCRIPT [STORE] session={session_id}, words={word_count}, text=\"{text}\""
+                    )
+                    await self.store_final_result(session_id, result, chunk_id=chunk_id)
+
+                    # Trigger plugins on final results only
+                    if self.plugin_router:
+                        await self.trigger_plugins(session_id, result)
+
+        except Exception as e:
+            logger.error(f"Error processing audio chunk for {session_id}: {e}", exc_info=True)
+
+    async def publish_to_client(self, session_id: str, result: Dict, is_final: bool):
+        """
+        Publish interim or final results to Redis Pub/Sub for client consumption.
+
+        Args:
+            session_id: Session ID
+            result: Transcription result from Deepgram
+            is_final: Whether this is a final result
+        """
+        try:
+            channel = f"transcription:interim:{session_id}"
+
+            # Prepare message for clients
+            message = {
+                "text": result.get("text", ""),
+                "is_final": is_final,
+                "words": result.get("words", []),
+                "confidence": result.get("confidence", 0.0),
+                "timestamp": time.time()
+            }
+
+            # Publish to Redis Pub/Sub
+            await self.redis_client.publish(channel, json.dumps(message))
+
+            result_type = "FINAL" if is_final else "interim"
+            logger.debug(f"📢 Published {result_type} result to {channel}: {message['text'][:50]}...")
+
+        except Exception as e:
+            logger.error(f"Error publishing to client for {session_id}: {e}", exc_info=True)
+
+    async def store_final_result(self, session_id: str, result: Dict, chunk_id: str = None):
+        """
+        Store final transcription result to Redis Stream with cumulative timestamp adjustment.
+
+        Transcription providers return word timestamps that reset to 0 for each chunk.
+        We maintain a running audio_offset_seconds to make timestamps cumulative across
+        the session, enabling accurate speech duration calculation for speech detection.
+
+        Args:
+            session_id: Session ID
+            result: Final transcription result
+            chunk_id: Optional chunk identifier
+        """
+        try:
+            stream_name = f"transcription:results:{session_id}"
+
+            # Get cumulative audio offset for this session
+            audio_offset = 0.0
+            chunk_duration = 0.0
+            if session_id in self.active_sessions:
+                audio_offset = self.active_sessions[session_id].get("audio_offset_seconds", 0.0)
+
+            # Adjust word timestamps by cumulative offset
+            words = result.get("words", [])
+            adjusted_words = []
+            if words:
+                for word in words:
+                    adjusted_word = word.copy()
+                    adjusted_word["start"] = word.get("start", 0.0) + audio_offset
+                    adjusted_word["end"] = word.get("end", 0.0) + audio_offset
+                    adjusted_words.append(adjusted_word)
+
+                # Calculate chunk duration from last word's end time
+                if adjusted_words:
+                    last_word_end = words[-1].get("end", 0.0)  # Use unadjusted for duration calc
+                    chunk_duration = last_word_end
+
+                logger.debug(f"➡️ [STREAMING] Adjusted {len(adjusted_words)} words by +{audio_offset:.1f}s (chunk_duration={chunk_duration:.1f}s)")
+
+            # Adjust segment timestamps too
+            segments = result.get("segments", [])
+            adjusted_segments = []
+            if segments:
+                for seg in segments:
+                    adjusted_seg = seg.copy()
+                    adjusted_seg["start"] = seg.get("start", 0.0) + audio_offset
+                    adjusted_seg["end"] = seg.get("end", 0.0) + audio_offset
+                    adjusted_segments.append(adjusted_seg)
+
+            # Prepare result entry - MUST match aggregator's expected schema
+            # All keys and values must be bytes to match consumer.py format
+            entry = {
+                b"text": result.get("text", "").encode(),
+                b"chunk_id": (chunk_id or f"final_{int(time.time() * 1000)}").encode(),
+                b"provider": b"deepgram-stream",
+                b"confidence": str(result.get("confidence", 0.0)).encode(),
+                b"processing_time": b"0.0",  # Streaming has minimal processing time
+                b"timestamp": str(time.time()).encode(),
+            }
+
+            # Add adjusted JSON fields
+            if adjusted_words:
+                entry[b"words"] = json.dumps(adjusted_words).encode()
+
+            if adjusted_segments:
+                entry[b"segments"] = json.dumps(adjusted_segments).encode()
+
+            # Write to Redis Stream
+            await self.redis_client.xadd(stream_name, entry)
+
+            # Update cumulative offset for next chunk
+            if session_id in self.active_sessions and chunk_duration > 0:
+                self.active_sessions[session_id]["audio_offset_seconds"] += chunk_duration
+                new_offset = self.active_sessions[session_id]["audio_offset_seconds"]
+                logger.info(f"💾 Stored final result to {stream_name}: {result.get('text', '')[:50]}... (offset: {audio_offset:.1f}s → {new_offset:.1f}s)")
+            else:
+                logger.info(f"💾 Stored final result to {stream_name}: {result.get('text', '')[:50]}...")
+
+        except Exception as e:
+            logger.error(f"Error storing final result for {session_id}: {e}", exc_info=True)
+
+    async def _get_user_id_from_client_id(self, client_id: str) -> Optional[str]:
+        """
+        Look up user_id from client_id using ClientManager (async Redis lookup).
+
+        Args:
+            client_id: Client ID to search for
+
+        Returns:
+            user_id if found, None otherwise
+        """
+        user_id = await get_client_owner_async(client_id)
+
+        if user_id:
+            logger.debug(f"Found user_id {user_id} for client_id {client_id} via Redis")
+        else:
+            logger.warning(f"No user_id found for client_id {client_id} in Redis")
+
+        return user_id
+
+    async def trigger_plugins(self, session_id: str, result: Dict):
+        """
+        Trigger plugins at streaming_transcript access level (final results only).
+
+        Args:
+            session_id: Session ID (client_id from stream name)
+            result: Final transcription result
+        """
+        try:
+            # Find user_id by looking up session with matching client_id
+            # session_id here is actually the client_id extracted from stream name
+            user_id = await self._get_user_id_from_client_id(session_id)
+
+            if not user_id:
+                logger.warning(
+                    f"Could not find user_id for client_id {session_id}. "
+                    "Plugins will not be triggered."
+                )
+                return
+
+            plugin_data = {
+                'transcript': result.get("text", ""),
+                'session_id': session_id,
+                'words': result.get("words", []),
+                'segments': result.get("segments", []),
+                'confidence': result.get("confidence", 0.0),
+                'is_final': True
+            }
+
+            # Dispatch transcript.streaming event
+            logger.info(f"🎯 Dispatching transcript.streaming event for user {user_id}, transcript: {plugin_data['transcript'][:50]}...")
+
+            plugin_results = await self.plugin_router.dispatch_event(
+                event='transcript.streaming',
+                user_id=user_id,
+                data=plugin_data,
+                metadata={'client_id': session_id}
+            )
+
+            if plugin_results:
+                logger.info(f"✅ Plugins triggered successfully: {len(plugin_results)} results")
+            else:
+                logger.info(f"ℹ️ No plugins triggered (no matching conditions)")
+
+        except Exception as e:
+            logger.error(f"Error triggering plugins for {session_id}: {e}", exc_info=True)
+
+    async def process_stream(self, stream_name: str):
+        """
+        Process a single audio stream.
+
+        Args:
+            stream_name: Redis stream name (e.g., "audio:stream:user01-phone")
+        """
+        # Extract session_id from stream name (format: audio:stream:{session_id})
+        session_id = stream_name.replace("audio:stream:", "")
+
+        # Track this stream
+        self.active_streams[stream_name] = {
+            "session_id": session_id,
+            "started_at": time.time()
+        }
+
+        # Start WebSocket connection to Deepgram
+        await self.start_session_stream(session_id)
+
+        last_id = "0"  # Start from beginning
+        stream_ended = False
+
+        try:
+            while self.running and not stream_ended:
+                # Read messages from Redis stream using consumer group
+                try:
+                    messages = await self.redis_client.xreadgroup(
+                        self.group_name,  # "streaming-transcription"
+                        self.consumer_name,  # "streaming-worker-{pid}"
+                        {stream_name: ">"},  # Read only new messages
+                        count=10,
+                        block=1000  # Block for 1 second
+                    )
+
+                    if not messages:
+                        # No new messages - check if stream is still alive
+                        # Check for stream end marker or timeout
+                        if session_id not in self.active_sessions:
+                            logger.info(f"Session {session_id} no longer active, ending stream processing")
+                            stream_ended = True
+                        continue
+
+                    for stream, stream_messages in messages:
+                        logger.debug(f"📥 Read {len(stream_messages)} messages from {stream_name}")
+                        for message_id, fields in stream_messages:
+                            msg_id = message_id.decode() if isinstance(message_id, bytes) else message_id
+
+                            # Check for end marker
+                            if fields.get(b'end_marker') or fields.get('end_marker'):
+                                logger.info(f"End marker received for {session_id}")
+                                stream_ended = True
+                                # ACK the end marker
+                                await self.redis_client.xack(stream_name, self.group_name, msg_id)
+                                break
+
+                            # Extract audio data (producer sends as 'audio_data', not 'audio_chunk')
+                            audio_chunk = fields.get(b'audio_data') or fields.get('audio_data')
+                            if audio_chunk:
+                                logger.debug(f"🎵 Processing audio chunk {msg_id} ({len(audio_chunk)} bytes)")
+                                # Process audio chunk through Deepgram WebSocket
+                                await self.process_audio_chunk(
+                                    session_id=session_id,
+                                    audio_chunk=audio_chunk,
+                                    chunk_id=msg_id
+                                )
+                            else:
+                                logger.warning(f"⚠️ Message {msg_id} has no audio_data field")
+
+                            # ACK the message after processing
+                            await self.redis_client.xack(stream_name, self.group_name, msg_id)
+
+                        if stream_ended:
+                            break
+
+                except redis_exceptions.ResponseError as e:
+                    if "NOGROUP" in str(e):
+                        # Stream has expired or been deleted - exit gracefully
+                        logger.info(f"Stream {stream_name} expired or deleted, ending processing")
+                        stream_ended = True
+                        break
+                    else:
+                        logger.error(f"Redis error reading from stream {stream_name}: {e}", exc_info=True)
+                        await asyncio.sleep(1)
+                except Exception as e:
+                    logger.error(f"Error reading from stream {stream_name}: {e}", exc_info=True)
+                    await asyncio.sleep(1)
+
+        finally:
+            # End WebSocket connection
+            await self.end_session_stream(session_id)
+
+            # Remove from active streams tracking
+            self.active_streams.pop(stream_name, None)
+            logger.debug(f"Removed {stream_name} from active streams tracking")
+
+    async def start_consuming(self):
+        """
+        Start consuming audio streams and processing through Deepgram WebSocket.
+        Uses Redis consumer groups for fan-out (allows batch workers to process same stream).
+        """
+        self.running = True
+        logger.info(f"🚀 Deepgram streaming consumer started (group: {self.group_name})")
+
+        try:
+            while self.running:
+                # Discover available streams
+                streams = await self.discover_streams()
+
+                if streams:
+                    logger.debug(f"🔍 Discovered {len(streams)} audio streams")
+                else:
+                    logger.debug("🔍 No audio streams found")
+
+                # Setup consumer groups and spawn processing tasks
+                for stream_name in streams:
+                    if stream_name in self.active_streams:
+                        continue  # Already processing
+
+                    # Setup consumer group (no manual lock needed)
+                    await self.setup_consumer_group(stream_name)
+
+                    # Track stream and spawn task to process it
+                    session_id = stream_name.replace("audio:stream:", "")
+                    self.active_streams[stream_name] = {"session_id": session_id}
+
+                    # Spawn task to process this stream
+                    asyncio.create_task(self.process_stream(stream_name))
+                    logger.info(f"✅ Now consuming from {stream_name} (group: {self.group_name})")
+
+                # Sleep before next discovery cycle (1s for fast discovery)
+                await asyncio.sleep(1)
+
+        except Exception as e:
+            logger.error(f"Fatal error in consumer main loop: {e}", exc_info=True)
+        finally:
+            await self.stop()
+
+    async def stop(self):
+        """Stop consuming and clean up resources."""
+        logger.info("🛑 Stopping Deepgram streaming consumer...")
+        self.running = False
+
+        # End all active sessions
+        session_ids = list(self.active_sessions.keys())
+        for session_id in session_ids:
+            try:
+                await self.end_session_stream(session_id)
+            except Exception as e:
+                logger.error(f"Error ending session {session_id}: {e}")
+
+        logger.info("✅ Deepgram streaming consumer stopped")
diff --git a/backends/advanced/src/advanced_omi_backend/speaker_recognition_client.py b/backends/advanced/src/advanced_omi_backend/speaker_recognition_client.py
index e829eff5..00aa0adc 100644
--- a/backends/advanced/src/advanced_omi_backend/speaker_recognition_client.py
+++ b/backends/advanced/src/advanced_omi_backend/speaker_recognition_client.py
@@ -36,6 +36,21 @@ def __init__(self, service_url: Optional[str] = None):
             service_url: URL of the speaker recognition service (e.g., http://speaker-service:8085)
                         If not provided, uses config.yml service_url or SPEAKER_SERVICE_URL env var
         """
+        # Check if we should use mock client (for testing)
+        if os.getenv("USE_MOCK_SPEAKER_CLIENT") == "true":
+            try:
+                # Import mock client from testing module
+                from advanced_omi_backend.testing.mock_speaker_client import MockSpeakerRecognitionClient
+
+                self._mock_client = MockSpeakerRecognitionClient()
+                self.enabled = True
+                self.service_url = "mock://speaker-service"
+                logger.info("🎤 Using MOCK speaker recognition client for tests")
+                return
+            except ImportError as e:
+                logger.error(f"Failed to import mock speaker client: {e}")
+                # Fall through to normal initialization
+
         # Load speaker recognition config from config.yml
         registry = get_models_registry()
         if not registry or not registry.speaker_recognition:
@@ -66,104 +81,152 @@ def __init__(self, service_url: Optional[str] = None):
         else:
             logger.info("Speaker recognition client disabled (no service URL configured)")
 
+    def calculate_timeout(self, audio_duration: Optional[float]) -> float:
+        """
+        Calculate proportional timeout based on audio duration.
+
+        Uses the formula: timeout = min(MAX_TIMEOUT, audio_duration * MULTIPLIER + BASE_TIMEOUT)
+
+        Args:
+            audio_duration: Duration of audio in seconds
+
+        Returns:
+            Calculated timeout in seconds
+        """
+        BASE_TIMEOUT = 30.0  # Minimum timeout for short files
+        TIMEOUT_MULTIPLIER = 8.0  # Processing speed ratio (e.g., 1 min audio = 8 min timeout)
+        MAX_TIMEOUT = 600.0  # 10 minute cap for very long files
+
+        if audio_duration is None or audio_duration <= 0:
+            logger.warning("Audio duration unknown or invalid, using base timeout")
+            return BASE_TIMEOUT
+
+        calculated_timeout = audio_duration * TIMEOUT_MULTIPLIER + BASE_TIMEOUT
+        timeout = min(MAX_TIMEOUT, calculated_timeout)
+
+        logger.info(
+            f"🕐 Calculated timeout: audio_duration={audio_duration:.1f}s → "
+            f"timeout={timeout:.1f}s (base={BASE_TIMEOUT}, multiplier={TIMEOUT_MULTIPLIER}, max={MAX_TIMEOUT})"
+        )
+        return timeout
+
     async def diarize_identify_match(
-        self, audio_path: str, transcript_data: Dict, user_id: Optional[str] = None
+        self,
+        conversation_id: str,
+        backend_token: str,
+        transcript_data: Dict,
+        user_id: Optional[str] = None
     ) -> Dict:
         """
         Perform diarization, speaker identification, and word-to-speaker matching.
-        Routes to appropriate endpoint based on diarization source configuration.
+
+        Speaker service fetches audio from backend and handles chunking based on its
+        own memory constraints.
 
         Args:
-            audio_path: Path to the audio file
+            conversation_id: Conversation ID for speaker service to fetch audio
+            backend_token: JWT token for speaker service to authenticate with backend
             transcript_data: Dict containing words array and text from transcription
             user_id: Optional user ID for speaker identification
 
         Returns:
             Dictionary containing segments with matched text and speaker identification
         """
+        # Use mock client if configured
+        if hasattr(self, '_mock_client'):
+            return await self._mock_client.diarize_identify_match(
+                conversation_id, backend_token, transcript_data, user_id
+            )
+
         if not self.enabled:
             logger.info(f"🎤 Speaker recognition disabled, returning empty result")
-            return {}
+            return {"segments": []}
+
+        # Fetch conversation to get audio duration for timeout calculation
+        from advanced_omi_backend.models.conversation import Conversation
+        conversation = await Conversation.find_one(Conversation.conversation_id == conversation_id)
+        audio_duration = conversation.audio_total_duration if conversation else None
+
+        # Calculate proportional timeout based on audio duration
+        timeout = self.calculate_timeout(audio_duration)
 
         try:
-            logger.info(f"🎤 Identifying speakers for {audio_path}")
+            logger.info(f"🎤 Calling speaker service with conversation_id: {conversation_id[:12]}...")
 
-            # Read diarization source from existing config system
-            from advanced_omi_backend.config import load_diarization_settings_from_file
-            config = load_diarization_settings_from_file()
+            # Read diarization source from config system
+            from advanced_omi_backend.config import get_diarization_settings
+            config = get_diarization_settings()
             diarization_source = config.get("diarization_source", "pyannote")
 
             async with aiohttp.ClientSession() as session:
-                # Prepare the audio file for upload
-                with open(audio_path, "rb") as audio_file:
-                    form_data = aiohttp.FormData()
-                    form_data.add_field(
-                        "file", audio_file, filename=Path(audio_path).name, content_type="audio/wav"
-                    )
-                    
-                    if diarization_source == "deepgram":
-                        # DEEPGRAM DIARIZATION PATH: We EXPECT transcript has speaker info from Deepgram
-                        # Only need speaker identification of existing segments
-                        logger.info("Using Deepgram diarization path - transcript should have speaker segments, identifying speakers")
-                        
-                        # TODO: Implement proper speaker identification for Deepgram segments
-                        # For now, use diarize-identify-match as fallback until we implement segment identification
-                        logger.warning("Deepgram segment identification not yet implemented, using diarize-identify-match as fallback")
-                        
-                        form_data.add_field("transcript_data", json.dumps(transcript_data))
-                        form_data.add_field("user_id", "1")  # TODO: Implement proper user mapping
-                        form_data.add_field("similarity_threshold", str(config.get("similarity_threshold", 0.15)))
-                        form_data.add_field("min_duration", str(config.get("min_duration", 0.5)))
-                        
-                        # Use /v1/diarize-identify-match endpoint as fallback
-                        endpoint = "/v1/diarize-identify-match"
-                        
-                    else:  # pyannote (default)
-                        # PYANNOTE PATH: Backend has transcript, need diarization + speaker identification
-                        logger.info("Using Pyannote path - diarizing backend transcript and identifying speakers")
-                        
-                        # Send existing transcript for diarization and speaker matching
-                        form_data.add_field("transcript_data", json.dumps(transcript_data))
-                        form_data.add_field("user_id", "1")  # TODO: Implement proper user mapping
-                        form_data.add_field("similarity_threshold", str(config.get("similarity_threshold", 0.15)))
-                        
-                        # Add pyannote diarization parameters
-                        form_data.add_field("min_duration", str(config.get("min_duration", 0.5)))
-                        form_data.add_field("collar", str(config.get("collar", 2.0)))
-                        form_data.add_field("min_duration_off", str(config.get("min_duration_off", 1.5)))
-                        if config.get("min_speakers"):
-                            form_data.add_field("min_speakers", str(config.get("min_speakers")))
-                        if config.get("max_speakers"):
-                            form_data.add_field("max_speakers", str(config.get("max_speakers")))
-                        
-                        # Use /v1/diarize-identify-match endpoint for backend integration
-                        endpoint = "/v1/diarize-identify-match"
-
-                    # Make the request to the consolidated endpoint
-                    request_url = f"{self.service_url}{endpoint}"
-                    logger.info(f"🎤 DEBUG: Making request to speaker service URL: {request_url}")
-
-                    async with session.post(
-                        request_url,
-                        data=form_data,
-                        timeout=aiohttp.ClientTimeout(total=120),
-                    ) as response:
-                        logger.info(f"🎤 Speaker service response status: {response.status}")
+                # Prepare form data with conversation_id + backend_token
+                form_data = aiohttp.FormData()
+                form_data.add_field("conversation_id", conversation_id)
+                form_data.add_field("backend_token", backend_token)
+
+                if diarization_source == "deepgram":
+                    # DEEPGRAM DIARIZATION PATH: We EXPECT transcript has speaker info from Deepgram
+                    # Only need speaker identification of existing segments
+                    logger.info("Using Deepgram diarization path - transcript should have speaker segments, identifying speakers")
+
+                    # TODO: Implement proper speaker identification for Deepgram segments
+                    # For now, use diarize-identify-match as fallback until we implement segment identification
+                    logger.warning("Deepgram segment identification not yet implemented, using diarize-identify-match as fallback")
+
+                    form_data.add_field("transcript_data", json.dumps(transcript_data))
+                    form_data.add_field("user_id", "1")  # TODO: Implement proper user mapping
+                    form_data.add_field("similarity_threshold", str(config.get("similarity_threshold", 0.15)))
+                    form_data.add_field("min_duration", str(config.get("min_duration", 0.5)))
+
+                    # Use /v1/diarize-identify-match endpoint as fallback
+                    endpoint = "/v1/diarize-identify-match"
+
+                else:  # pyannote (default)
+                    # PYANNOTE PATH: Backend has transcript, need diarization + speaker identification
+                    logger.info("Using Pyannote path - diarizing backend transcript and identifying speakers")
+
+                    # Send existing transcript for diarization and speaker matching
+                    form_data.add_field("transcript_data", json.dumps(transcript_data))
+                    form_data.add_field("user_id", "1")  # TODO: Implement proper user mapping
+                    form_data.add_field("similarity_threshold", str(config.get("similarity_threshold", 0.15)))
+
+                    # Add pyannote diarization parameters
+                    form_data.add_field("min_duration", str(config.get("min_duration", 0.5)))
+                    form_data.add_field("collar", str(config.get("collar", 2.0)))
+                    form_data.add_field("min_duration_off", str(config.get("min_duration_off", 1.5)))
+                    if config.get("min_speakers"):
+                        form_data.add_field("min_speakers", str(config.get("min_speakers")))
+                    if config.get("max_speakers"):
+                        form_data.add_field("max_speakers", str(config.get("max_speakers")))
+
+                    # Use /v1/diarize-identify-match endpoint for backend integration
+                    endpoint = "/v1/diarize-identify-match"
+
+                # Make the request to the consolidated endpoint
+                request_url = f"{self.service_url}{endpoint}"
+                logger.info(f"🎤 DEBUG: Making request to speaker service URL: {request_url}")
+
+                async with session.post(
+                    request_url,
+                    data=form_data,
+                    timeout=aiohttp.ClientTimeout(total=timeout),
+                ) as response:
+                    logger.info(f"🎤 Speaker service response status: {response.status}")
 
-                        if response.status != 200:
-                            response_text = await response.text()
-                            logger.error(
-                                f"🎤 ❌ Speaker service returned status {response.status}: {response_text}"
-                            )
-                            return {}
+                    if response.status != 200:
+                        response_text = await response.text()
+                        logger.error(
+                            f"🎤 ❌ Speaker service returned status {response.status}: {response_text}"
+                        )
+                        return {"segments": []}
 
-                        result = await response.json()
+                    result = await response.json()
 
-                        # Log basic result info
-                        num_segments = len(result.get("segments", []))
-                        logger.info(f"🎤 Speaker recognition returned {num_segments} segments")
+                    # Log basic result info
+                    num_segments = len(result.get("segments", []))
+                    logger.info(f"🎤 Speaker recognition returned {num_segments} segments")
 
-                        return result
+                    return result
 
         except ClientConnectorError as e:
             logger.error(f"🎤 Failed to connect to speaker recognition service: {e}")
@@ -179,13 +242,13 @@ async def diarize_identify_match(
             return {"error": "unknown_error", "message": str(e), "segments": []}
 
     async def diarize_and_identify(
-        self, audio_path: str, words: None, user_id: Optional[str] = None  # NOT IMPLEMENTED
+        self, audio_data: bytes, words: None, user_id: Optional[str] = None  # NOT IMPLEMENTED
     ) -> Dict:
         """
         Perform diarization and speaker identification using the speaker recognition service.
 
         Args:
-            audio_path: Path to the audio file
+            audio_data: WAV audio data as bytes (in-memory)
             words: Optional word-level data from transcription provider (for hints)
             user_id: Optional user ID for speaker identification
 
@@ -197,94 +260,94 @@ async def diarize_and_identify(
 
         if not self.enabled:
             logger.warning("🎤 [DIARIZE] Speaker recognition is disabled")
-            return {}
+            return {"segments": []}
 
         try:
-            logger.info(f"🎤 [DIARIZE] Starting diarization and identification for {audio_path}")
-
-            # Verify file exists and get info
-            if not os.path.exists(audio_path):
-                logger.error(f"🎤 [DIARIZE] ❌ Audio file does not exist: {audio_path}")
-                return {}
+            logger.info(
+                f"🎤 [DIARIZE] Starting diarization and identification from in-memory audio "
+                f"({len(audio_data) / 1024 / 1024:.2f} MB)"
+            )
 
-            file_size = os.path.getsize(audio_path)
-            logger.info(f"🎤 [DIARIZE] Audio file size: {file_size} bytes")
+            # Estimate audio duration from data size (assuming 16kHz, 16-bit PCM)
+            # WAV header is typically 44 bytes
+            estimated_duration = (len(audio_data) - 44) / 32000  # 16000 Hz * 2 bytes per sample
+            timeout = self.calculate_timeout(estimated_duration)
 
             # Call the speaker recognition service
             async with aiohttp.ClientSession() as session:
-                # Prepare the audio file for upload
-                with open(audio_path, "rb") as audio_file:
-                    form_data = aiohttp.FormData()
-                    form_data.add_field(
-                        "file", audio_file, filename=Path(audio_path).name, content_type="audio/wav"
-                    )
-                    # Get current diarization settings from config
-                    from advanced_omi_backend.config import load_diarization_settings_from_file
+                # Prepare the audio data for upload (no disk I/O!)
+                form_data = aiohttp.FormData()
+                form_data.add_field(
+                    "file", audio_data, filename="audio.wav", content_type="audio/wav"
+                )
 
-                    diarization_settings = load_diarization_settings_from_file()
+                # Get current diarization settings from config
+                from advanced_omi_backend.config import get_diarization_settings
 
-                    # Add all diarization parameters for the diarize-and-identify endpoint
-                    min_duration = diarization_settings.get("min_duration", 0.5)
-                    similarity_threshold = diarization_settings.get("similarity_threshold", 0.15)
-                    collar = diarization_settings.get("collar", 2.0)
-                    min_duration_off = diarization_settings.get("min_duration_off", 1.5)
+                diarization_settings = get_diarization_settings()
 
-                    form_data.add_field("min_duration", str(min_duration))
-                    form_data.add_field("similarity_threshold", str(similarity_threshold))
-                    form_data.add_field("collar", str(collar))
-                    form_data.add_field("min_duration_off", str(min_duration_off))
+                # Add all diarization parameters for the diarize-and-identify endpoint
+                min_duration = diarization_settings.get("min_duration", 0.5)
+                similarity_threshold = diarization_settings.get("similarity_threshold", 0.15)
+                collar = diarization_settings.get("collar", 2.0)
+                min_duration_off = diarization_settings.get("min_duration_off", 1.5)
 
-                    if diarization_settings.get("min_speakers"):
-                        form_data.add_field("min_speakers", str(diarization_settings["min_speakers"]))
-                    if diarization_settings.get("max_speakers"):
-                        form_data.add_field("max_speakers", str(diarization_settings["max_speakers"]))
+                form_data.add_field("min_duration", str(min_duration))
+                form_data.add_field("similarity_threshold", str(similarity_threshold))
+                form_data.add_field("collar", str(collar))
+                form_data.add_field("min_duration_off", str(min_duration_off))
 
-                    form_data.add_field("identify_only_enrolled", "false")
-                    # TODO: Implement proper user mapping between MongoDB ObjectIds and speaker service integer IDs
-                    # For now, hardcode to admin user (ID=1) since speaker service expects integer user_id
-                    form_data.add_field("user_id", "1")
-
-                    endpoint_url = f"{self.service_url}/diarize-and-identify"
-                    logger.info(f"🎤 [DIARIZE] Calling speaker service: {endpoint_url}")
-                    logger.info(
-                        f"🎤 [DIARIZE] Parameters: min_duration={min_duration}, "
-                        f"similarity_threshold={similarity_threshold}, collar={collar}, "
-                        f"min_duration_off={min_duration_off}, user_id=1"
-                    )
+                if diarization_settings.get("min_speakers"):
+                    form_data.add_field("min_speakers", str(diarization_settings["min_speakers"]))
+                if diarization_settings.get("max_speakers"):
+                    form_data.add_field("max_speakers", str(diarization_settings["max_speakers"]))
 
-                    # Make the request
-                    async with session.post(
-                        endpoint_url,
-                        data=form_data,
-                        timeout=aiohttp.ClientTimeout(total=120),
-                    ) as response:
-                        logger.info(f"🎤 [DIARIZE] Response status: {response.status}")
+                form_data.add_field("identify_only_enrolled", "false")
+                # TODO: Implement proper user mapping between MongoDB ObjectIds and speaker service integer IDs
+                # For now, hardcode to admin user (ID=1) since speaker service expects integer user_id
+                form_data.add_field("user_id", "1")
 
-                        if response.status != 200:
-                            response_text = await response.text()
-                            logger.warning(
-                                f"🎤 [DIARIZE] ❌ Speaker recognition service returned status {response.status}: {response_text}"
-                            )
-                            return {}
+                endpoint_url = f"{self.service_url}/diarize-and-identify"
+                logger.info(f"🎤 [DIARIZE] Calling speaker service: {endpoint_url}")
+                logger.info(
+                    f"🎤 [DIARIZE] Parameters: min_duration={min_duration}, "
+                    f"similarity_threshold={similarity_threshold}, collar={collar}, "
+                    f"min_duration_off={min_duration_off}, user_id=1"
+                )
 
-                        result = await response.json()
-                        segments_count = len(result.get('segments', []))
-                        logger.info(f"🎤 [DIARIZE] ✅ Speaker service returned {segments_count} segments")
-
-                        # Log details about identified speakers
-                        if segments_count > 0:
-                            identified_names = set()
-                            for seg in result.get('segments', []):
-                                identified_as = seg.get('identified_as')
-                                if identified_as and identified_as != 'Unknown':
-                                    identified_names.add(identified_as)
-
-                            if identified_names:
-                                logger.info(f"🎤 [DIARIZE] Identified speakers in segments: {identified_names}")
-                            else:
-                                logger.warning(f"🎤 [DIARIZE] No identified speakers found in {segments_count} segments")
+                # Make the request
+                async with session.post(
+                    endpoint_url,
+                    data=form_data,
+                    timeout=aiohttp.ClientTimeout(total=timeout),
+                ) as response:
+                    logger.info(f"🎤 [DIARIZE] Response status: {response.status}")
 
-                        return result
+                    if response.status != 200:
+                        response_text = await response.text()
+                        logger.warning(
+                            f"🎤 [DIARIZE] ❌ Speaker recognition service returned status {response.status}: {response_text}"
+                        )
+                        return {"segments": []}
+
+                    result = await response.json()
+                    segments_count = len(result.get('segments', []))
+                    logger.info(f"🎤 [DIARIZE] ✅ Speaker service returned {segments_count} segments")
+
+                    # Log details about identified speakers
+                    if segments_count > 0:
+                        identified_names = set()
+                        for seg in result.get('segments', []):
+                            identified_as = seg.get('identified_as')
+                            if identified_as and identified_as != 'Unknown':
+                                identified_names.add(identified_as)
+
+                        if identified_names:
+                            logger.info(f"🎤 [DIARIZE] Identified speakers in segments: {identified_names}")
+                        else:
+                            logger.warning(f"🎤 [DIARIZE] No identified speakers found in {segments_count} segments")
+
+                    return result
 
         except ClientConnectorError as e:
             logger.error(f"🎤 [DIARIZE] ❌ Failed to connect to speaker recognition service at {self.service_url}: {e}")
@@ -325,6 +388,20 @@ async def identify_speakers(self, audio_path: str, segments: List[Dict]) -> Dict
 
             logger.info(f"Identifying {len(unique_speakers)} speakers in {audio_path}")
 
+            # Get audio duration for timeout calculation
+            import wave
+            try:
+                with wave.open(audio_path, "rb") as wav_file:
+                    frame_count = wav_file.getnframes()
+                    sample_rate = wav_file.getframerate()
+                    audio_duration = frame_count / sample_rate if sample_rate > 0 else None
+            except Exception as e:
+                logger.warning(f"Failed to get audio duration from {audio_path}: {e}")
+                audio_duration = None
+
+            # Calculate proportional timeout based on audio duration
+            timeout = self.calculate_timeout(audio_duration)
+
             # Call the speaker recognition service
             async with aiohttp.ClientSession() as session:
                 # Prepare the audio file for upload
@@ -334,8 +411,10 @@ async def identify_speakers(self, audio_path: str, segments: List[Dict]) -> Dict
                         "file", audio_file, filename=Path(audio_path).name, content_type="audio/wav"
                     )
                     # Get current diarization settings
-                    from advanced_omi_backend.controllers.system_controller import _diarization_settings
-                    
+                    from advanced_omi_backend.config import get_diarization_settings
+
+                    _diarization_settings = get_diarization_settings()
+
                     # Add all diarization parameters for the diarize-and-identify endpoint
                     form_data.add_field("min_duration", str(_diarization_settings.get("min_duration", 0.5)))
                     form_data.add_field("similarity_threshold", str(_diarization_settings.get("similarity_threshold", 0.15)))
@@ -351,7 +430,7 @@ async def identify_speakers(self, audio_path: str, segments: List[Dict]) -> Dict
                     async with session.post(
                         f"{self.service_url}/diarize-and-identify",
                         data=form_data,
-                        timeout=aiohttp.ClientTimeout(total=120),
+                        timeout=aiohttp.ClientTimeout(total=timeout),
                     ) as response:
                         if response.status != 200:
                             logger.warning(
@@ -469,6 +548,156 @@ async def get_enrolled_speakers(self, user_id: Optional[str] = None) -> Dict:
             logger.error(f"🎤 Error getting enrolled speakers: {e}")
             return {"speakers": []}
 
+    async def get_speaker_by_name(self, speaker_name: str, user_id: int = 1) -> Optional[Dict]:
+        """
+        Look up enrolled speaker by name.
+
+        Args:
+            speaker_name: Name of the speaker to find
+            user_id: User ID to filter speakers (default: 1)
+
+        Returns:
+            Speaker dict with id, name, etc. or None if not found
+        """
+        if not self.enabled:
+            logger.warning("🎤 Speaker recognition disabled, cannot lookup speaker")
+            return None
+
+        try:
+            async with aiohttp.ClientSession() as session:
+                async with session.get(
+                    f"{self.service_url}/speakers",
+                    params={"user_id": user_id},
+                    timeout=aiohttp.ClientTimeout(total=10),
+                ) as response:
+                    if response.status != 200:
+                        logger.warning(f"🎤 Failed to get speakers: status {response.status}")
+                        return None
+
+                    result = await response.json()
+                    speakers = result.get("speakers", [])
+                    
+                    # Case-insensitive name match
+                    for speaker in speakers:
+                        if speaker["name"].lower() == speaker_name.lower():
+                            logger.info(f"🎤 Found speaker '{speaker_name}' with ID: {speaker['id']}")
+                            return speaker
+                    
+                    logger.info(f"🎤 Speaker '{speaker_name}' not found in {len(speakers)} enrolled speakers")
+                    return None
+
+        except aiohttp.ClientError as e:
+            logger.warning(f"🎤 Failed to lookup speaker: {e}")
+            return None
+        except Exception as e:
+            logger.error(f"🎤 Error looking up speaker: {e}")
+            return None
+
+    async def enroll_new_speaker(
+        self, speaker_name: str, audio_data: bytes, user_id: int = 1
+    ) -> Dict:
+        """
+        Enroll a new speaker with audio data.
+
+        Args:
+            speaker_name: Display name for the speaker
+            audio_data: WAV audio bytes
+            user_id: User ID for the speaker (default: 1)
+
+        Returns:
+            Response dict from enrollment endpoint
+        """
+        if not self.enabled:
+            logger.warning("🎤 Speaker recognition disabled, cannot enroll speaker")
+            return {"error": "speaker_recognition_disabled"}
+
+        try:
+            import uuid
+            
+            # Generate speaker ID: user_{user_id}_speaker_{random_hex}
+            speaker_id = f"user_{user_id}_speaker_{uuid.uuid4().hex[:12]}"
+            
+            logger.info(f"🎤 Enrolling new speaker '{speaker_name}' with ID: {speaker_id}")
+
+            async with aiohttp.ClientSession() as session:
+                form_data = aiohttp.FormData()
+                form_data.add_field(
+                    "file", audio_data, filename="segment.wav", content_type="audio/wav"
+                )
+                form_data.add_field("speaker_id", speaker_id)
+                form_data.add_field("speaker_name", speaker_name)
+
+                async with session.post(
+                    f"{self.service_url}/enroll/upload",
+                    data=form_data,
+                    timeout=aiohttp.ClientTimeout(total=60),
+                ) as response:
+                    if response.status != 200:
+                        response_text = await response.text()
+                        logger.error(
+                            f"🎤 ❌ Speaker enrollment failed with status {response.status}: {response_text}"
+                        )
+                        return {"error": "enrollment_failed", "status": response.status}
+
+                    result = await response.json()
+                    logger.info(f"🎤 ✅ Successfully enrolled speaker '{speaker_name}'")
+                    return result
+
+        except aiohttp.ClientError as e:
+            logger.error(f"🎤 ❌ Failed to enroll speaker: {e}")
+            return {"error": "connection_failed", "message": str(e)}
+        except Exception as e:
+            logger.error(f"🎤 ❌ Error enrolling speaker: {e}")
+            return {"error": "unknown_error", "message": str(e)}
+
+    async def append_to_speaker(self, speaker_id: str, audio_data: bytes) -> Dict:
+        """
+        Append audio to existing speaker's embedding (fine-tuning).
+
+        Args:
+            speaker_id: ID of existing speaker
+            audio_data: WAV audio bytes
+
+        Returns:
+            Response dict from append endpoint
+        """
+        if not self.enabled:
+            logger.warning("🎤 Speaker recognition disabled, cannot append to speaker")
+            return {"error": "speaker_recognition_disabled"}
+
+        try:
+            logger.info(f"🎤 Appending audio to speaker: {speaker_id}")
+
+            async with aiohttp.ClientSession() as session:
+                form_data = aiohttp.FormData()
+                form_data.add_field(
+                    "files", audio_data, filename="segment.wav", content_type="audio/wav"
+                )
+                form_data.add_field("speaker_id", speaker_id)
+
+                async with session.post(
+                    f"{self.service_url}/enroll/append",
+                    data=form_data,
+                    timeout=aiohttp.ClientTimeout(total=60),
+                ) as response:
+                    if response.status != 200:
+                        response_text = await response.text()
+                        logger.error(
+                            f"🎤 ❌ Speaker append failed with status {response.status}: {response_text}"
+                        )
+                        return {"error": "append_failed", "status": response.status}
+
+                    result = await response.json()
+                    logger.info(f"🎤 ✅ Successfully appended to speaker {speaker_id}")
+                    return result
+
+        except aiohttp.ClientError as e:
+            logger.error(f"🎤 ❌ Failed to append to speaker: {e}")
+            return {"error": "connection_failed", "message": str(e)}
+        except Exception as e:
+            logger.error(f"🎤 ❌ Error appending to speaker: {e}")
+            return {"error": "unknown_error", "message": str(e)}
+
     async def check_if_enrolled_speaker_present(
         self,
         redis_client,
@@ -495,11 +724,7 @@ async def check_if_enrolled_speaker_present(
             - enrolled_present: True if enrolled speaker detected, False otherwise
             - speaker_result: Full speaker recognition result dict with segments
         """
-        import tempfile
-        import uuid
-        from pathlib import Path
         from advanced_omi_backend.utils.audio_extraction import extract_audio_for_results
-        from advanced_omi_backend.utils.audio_utils import write_pcm_to_wav
 
         logger.info(f"🎤 [SPEAKER CHECK] Starting speaker check for session {session_id}")
         logger.info(f"🎤 [SPEAKER CHECK] Client: {client_id}, User: {user_id}")
@@ -516,44 +741,38 @@ async def check_if_enrolled_speaker_present(
             logger.warning("🎤 [SPEAKER CHECK] No enrolled speakers found, allowing conversation")
             return (True, {})  # If no enrolled speakers, allow all conversations
 
-        # Extract audio chunks
+        # Extract audio chunks (PCM format)
         logger.info(f"🎤 [SPEAKER CHECK] Extracting audio chunks from Redis...")
-        audio_data = await extract_audio_for_results(
+        pcm_data = await extract_audio_for_results(
             redis_client=redis_client,
             client_id=client_id,
             session_id=session_id,
             transcription_results=transcription_results
         )
 
-        if not audio_data:
+        if not pcm_data:
             logger.warning("🎤 [SPEAKER CHECK] No audio data extracted, skipping speaker check")
             return (False, {})
 
-        audio_size_kb = len(audio_data) / 1024
-        audio_duration_sec = len(audio_data) / (16000 * 2)  # 16kHz, 16-bit
+        audio_size_kb = len(pcm_data) / 1024
+        audio_duration_sec = len(pcm_data) / (16000 * 2)  # 16kHz, 16-bit
         logger.info(
             f"🎤 [SPEAKER CHECK] Extracted audio: {audio_size_kb:.1f} KB, ~{audio_duration_sec:.1f}s"
         )
 
-        # Write to temporary WAV file
-        temp_path = Path(tempfile.gettempdir()) / f"speech_check_{uuid.uuid4()}.wav"
-        logger.info(f"🎤 [SPEAKER CHECK] Writing audio to temp file: {temp_path}")
+        # Convert PCM to WAV in memory (no disk I/O!)
+        from advanced_omi_backend.utils.audio_utils import pcm_to_wav_bytes
 
-        try:
-            write_pcm_to_wav(audio_data, str(temp_path), sample_rate=16000, channels=1, sample_width=2)
+        logger.info(f"🎤 [SPEAKER CHECK] Converting PCM to WAV in memory...")
+        wav_data = pcm_to_wav_bytes(pcm_data, sample_rate=16000, channels=1, sample_width=2)
 
-            # Verify file was created
-            if temp_path.exists():
-                file_size = temp_path.stat().st_size
-                logger.info(f"🎤 [SPEAKER CHECK] Temp WAV file created: {file_size} bytes")
-            else:
-                logger.error(f"🎤 [SPEAKER CHECK] ❌ Temp WAV file was not created!")
-                return (False, {})
+        logger.info(f"🎤 [SPEAKER CHECK] WAV created in memory: {len(wav_data) / 1024 / 1024:.2f} MB")
 
-            # Run speaker recognition (diarize and identify)
-            logger.info(f"🎤 [SPEAKER CHECK] Calling diarize_and_identify with audio file...")
+        try:
+            # Run speaker recognition (diarize and identify) with in-memory audio
+            logger.info(f"🎤 [SPEAKER CHECK] Calling diarize_and_identify with in-memory audio...")
             result = await self.diarize_and_identify(
-                audio_path=str(temp_path),
+                audio_data=wav_data,  # Pass bytes directly, no temp file!
                 words=None,
                 user_id=user_id
             )
@@ -600,15 +819,6 @@ async def check_if_enrolled_speaker_present(
             logger.error(f"🎤 [SPEAKER CHECK] ❌ Speaker recognition check failed: {e}", exc_info=True)
             return (False, {})  # Fail closed - don't create conversation on error
 
-        finally:
-            # Clean up temp file
-            try:
-                if temp_path.exists():
-                    temp_path.unlink()
-                    logger.debug(f"🎤 [SPEAKER CHECK] Cleaned up temp file: {temp_path}")
-            except Exception as cleanup_error:
-                logger.warning(f"🎤 [SPEAKER CHECK] Failed to remove temp file {temp_path}: {cleanup_error}")
-
     async def health_check(self) -> bool:
         """
         Check if the speaker recognition service is healthy and responding.
diff --git a/backends/advanced/src/advanced_omi_backend/testing/__init__.py b/backends/advanced/src/advanced_omi_backend/testing/__init__.py
new file mode 100644
index 00000000..8d430cdf
--- /dev/null
+++ b/backends/advanced/src/advanced_omi_backend/testing/__init__.py
@@ -0,0 +1 @@
+"""Testing utilities and mocks for Chronicle backend."""
diff --git a/backends/advanced/src/advanced_omi_backend/testing/mock_speaker_client.py b/backends/advanced/src/advanced_omi_backend/testing/mock_speaker_client.py
new file mode 100644
index 00000000..e53a556e
--- /dev/null
+++ b/backends/advanced/src/advanced_omi_backend/testing/mock_speaker_client.py
@@ -0,0 +1,160 @@
+"""Mock speaker recognition client for testing without heavy ML dependencies."""
+
+import logging
+from typing import Dict, Optional
+
+logger = logging.getLogger(__name__)
+
+
+class MockSpeakerRecognitionClient:
+    """
+    Mock speaker recognition client that returns pre-computed segments.
+
+    Used in test environments to avoid running resource-intensive speaker
+    recognition service. Segments are based on test_data.py expectations.
+    """
+
+    # Map audio filenames to mock segment data
+    # Segments follow the structure expected by the backend:
+    # {
+    #   "start": float,          # Start time in seconds
+    #   "end": float,            # End time in seconds
+    #   "text": str,             # Transcript text for this segment
+    #   "speaker": int,          # Speaker label (0, 1, 2, etc.)
+    #   "identified_as": str,    # Speaker name or "Unknown"
+    #   "confidence": float      # Optional confidence score
+    # }
+
+    MOCK_SEGMENTS = {
+        "DIY_Experts_Glass_Blowing_16khz_mono_1min.wav": [
+            {
+                "start": 0.0,
+                "end": 10.08,
+                "speaker": 0,
+                "identified_as": "Unknown",
+                "text": "The pumpkin that'll last for forever. Finally. Does it count? Today, we're taking a glass blowing class.",
+                "confidence": 0.95
+            },
+            {
+                "start": 10.28,
+                "end": 20.255,
+                "speaker": 0,
+                "identified_as": "Unknown",
+                "text": "I'm sweating already. We've worked with a lot of materials before, but we've only scratched the surface",
+                "confidence": 0.93
+            },
+            {
+                "start": 20.455,
+                "end": 21.895,
+                "speaker": 1,
+                "identified_as": "Unknown",
+                "text": "when it comes to glass",
+                "confidence": 0.91
+            },
+            {
+                "start": 22.095,
+                "end": 23.615,
+                "speaker": 0,
+                "identified_as": "Unknown",
+                "text": "and that's because",
+                "confidence": 0.94
+            },
+            {
+                "start": 23.815,
+                "end": 28.135,
+                "speaker": 1,
+                "identified_as": "Unknown",
+                "text": "a little intimidating. We've got about 400 pounds",
+                "confidence": 0.92
+            },
+            {
+                "start": 28.335,
+                "end": 43.08,
+                "speaker": 0,
+                "identified_as": "Unknown",
+                "text": "of liquid glass in this furnace right here. Nick's gonna really help us out. Nick, I'm excited and nervous. Me too.",
+                "confidence": 0.96
+            },
+            {
+                "start": 43.28,
+                "end": 44.48,
+                "speaker": 1,
+                "identified_as": "Unknown",
+                "text": "So we're gonna",
+                "confidence": 0.90
+            },
+            {
+                "start": 44.68,
+                "end": 46.76,
+                "speaker": 0,
+                "identified_as": "Unknown",
+                "text": "make what's called a trumpet",
+                "confidence": 0.95
+            },
+            {
+                "start": 46.96,
+                "end": 50.24,
+                "speaker": 0,
+                "identified_as": "Unknown",
+                "text": "flower. We're using gravity as a tool.",
+                "confidence": 0.93
+            }
+        ]
+    }
+
+    def __init__(self):
+        """Initialize mock client."""
+        logger.info("🎤 Mock speaker recognition client initialized")
+
+    async def diarize_identify_match(
+        self,
+        conversation_id: str,
+        backend_token: str,
+        transcript_data: Dict,
+        user_id: Optional[str] = None
+    ) -> Dict:
+        """
+        Return pre-computed mock segments for known test audio files.
+
+        Args:
+            conversation_id: Not used in mock (audio filename derived from transcript)
+            backend_token: Not used in mock
+            transcript_data: Dict with 'text' and 'words' - used to identify audio file
+            user_id: Not used in mock
+
+        Returns:
+            Dictionary with 'segments' array matching speaker service format
+        """
+        logger.info(f"🎤 Mock speaker client processing conversation: {conversation_id[:12]}...")
+
+        # Try to identify which test audio this is from the transcript
+        transcript_text = transcript_data.get("text", "").lower()
+
+        # Match by transcript content
+        if "glass blowing" in transcript_text or "glass" in transcript_text:
+            filename = "DIY_Experts_Glass_Blowing_16khz_mono_1min.wav"
+            if filename in self.MOCK_SEGMENTS:
+                segments = self.MOCK_SEGMENTS[filename]
+                logger.info(f"🎤 Mock returning {len(segments)} segments for DIY Glass Blowing audio")
+                return {"segments": segments}
+
+        # Fallback: Create single generic segment
+        logger.warning(f"🎤 Mock: No pre-computed segments found, creating generic segment")
+
+        # Get duration from words if available
+        words = transcript_data.get("words", [])
+        if words:
+            duration = words[-1].get("end", 60.0)
+        else:
+            duration = 60.0
+
+        return {
+            "segments": [{
+                "start": 0.0,
+                "end": duration,
+                "speaker": 0,
+                "identified_as": "Unknown",
+                "text": transcript_data.get("text", ""),
+                "confidence": 0.85
+            }]
+        }
diff --git a/backends/advanced/src/advanced_omi_backend/utils/audio_chunk_utils.py b/backends/advanced/src/advanced_omi_backend/utils/audio_chunk_utils.py
new file mode 100644
index 00000000..52017932
--- /dev/null
+++ b/backends/advanced/src/advanced_omi_backend/utils/audio_chunk_utils.py
@@ -0,0 +1,1086 @@
+"""
+Audio chunk utilities for Opus encoding/decoding and WAV reconstruction.
+
+This module provides functions for:
+- Converting PCM audio to Opus-compressed format
+- Decoding Opus audio back to PCM
+- Building complete WAV files from PCM data
+- Retrieving audio chunks from MongoDB
+
+All FFmpeg operations use subprocess with proper error handling and cleanup.
+"""
+
+import asyncio
+import io
+import logging
+import tempfile
+import time
+import wave
+from pathlib import Path
+from typing import List, Optional
+
+from advanced_omi_backend.models.audio_chunk import AudioChunkDocument
+
+logger = logging.getLogger(__name__)
+
+
+async def encode_pcm_to_opus(
+    pcm_data: bytes,
+    sample_rate: int = 16000,
+    channels: int = 1,
+    bitrate: int = 24,
+) -> bytes:
+    """
+    Encode raw PCM audio to Opus format using FFmpeg.
+
+    Args:
+        pcm_data: Raw PCM audio bytes (signed 16-bit little-endian)
+        sample_rate: Sample rate in Hz (default: 16000)
+        channels: Number of audio channels (default: 1 for mono)
+        bitrate: Opus bitrate in kbps (default: 24 for speech)
+
+    Returns:
+        Opus-encoded audio bytes
+
+    Raises:
+        RuntimeError: If FFmpeg encoding fails
+
+    Example:
+        >>> pcm_bytes = b"..."  # 10 seconds of 16kHz mono PCM
+        >>> opus_bytes = await encode_pcm_to_opus(pcm_bytes)
+        >>> # opus_bytes is ~30KB vs 320KB PCM (94% reduction)
+    """
+    # Create temporary files for FFmpeg I/O
+    with tempfile.NamedTemporaryFile(suffix=".pcm", delete=False) as pcm_file, \
+         tempfile.NamedTemporaryFile(suffix=".opus", delete=False) as opus_file:
+
+        pcm_path = Path(pcm_file.name)
+        opus_path = Path(opus_file.name)
+
+        try:
+            # Write PCM data to temp file
+            pcm_file.write(pcm_data)
+            pcm_file.flush()
+
+            # FFmpeg command: PCM → Opus
+            # -f s16le: signed 16-bit little-endian PCM
+            # -ar: sample rate
+            # -ac: audio channels
+            # -c:a libopus: Opus encoder
+            # -b:a: bitrate
+            # -vbr on: variable bitrate for better quality
+            # -application voip: optimize for speech
+            cmd = [
+                "ffmpeg",
+                "-f", "s16le",
+                "-ar", str(sample_rate),
+                "-ac", str(channels),
+                "-i", str(pcm_path),
+                "-c:a", "libopus",
+                "-b:a", f"{bitrate}k",
+                "-vbr", "on",
+                "-application", "voip",
+                "-y",  # Overwrite output
+                str(opus_path),
+            ]
+
+            # Run FFmpeg
+            process = await asyncio.create_subprocess_exec(
+                *cmd,
+                stdout=asyncio.subprocess.PIPE,
+                stderr=asyncio.subprocess.PIPE,
+            )
+
+            stdout, stderr = await process.communicate()
+
+            if process.returncode != 0:
+                error_msg = stderr.decode() if stderr else "Unknown error"
+                logger.error(f"FFmpeg Opus encoding failed: {error_msg}")
+                raise RuntimeError(f"Opus encoding failed: {error_msg}")
+
+            # Read Opus output
+            with open(opus_path, "rb") as f:
+                opus_data = f.read()
+
+            logger.debug(
+                f"Encoded PCM ({len(pcm_data)} bytes) → Opus ({len(opus_data)} bytes), "
+                f"compression ratio: {len(opus_data)/len(pcm_data):.3f}"
+            )
+
+            return opus_data
+
+        finally:
+            # Cleanup temporary files
+            pcm_path.unlink(missing_ok=True)
+            opus_path.unlink(missing_ok=True)
+
+
+async def decode_opus_to_pcm(
+    opus_data: bytes,
+    sample_rate: int = 16000,
+    channels: int = 1,
+) -> bytes:
+    """
+    Decode Opus audio to raw PCM format using FFmpeg.
+
+    Args:
+        opus_data: Opus-encoded audio bytes
+        sample_rate: Target sample rate in Hz (default: 16000)
+        channels: Target number of channels (default: 1 for mono)
+
+    Returns:
+        Raw PCM audio bytes (signed 16-bit little-endian)
+
+    Raises:
+        RuntimeError: If FFmpeg decoding fails
+
+    Example:
+        >>> opus_bytes = b"..."  # Opus-encoded audio
+        >>> pcm_bytes = await decode_opus_to_pcm(opus_bytes)
+        >>> # pcm_bytes can be played or concatenated
+    """
+    # Create temporary files for FFmpeg I/O
+    with tempfile.NamedTemporaryFile(suffix=".opus", delete=False) as opus_file, \
+         tempfile.NamedTemporaryFile(suffix=".pcm", delete=False) as pcm_file:
+
+        opus_path = Path(opus_file.name)
+        pcm_path = Path(pcm_file.name)
+
+        try:
+            # Write Opus data to temp file
+            opus_file.write(opus_data)
+            opus_file.flush()
+
+            # FFmpeg command: Opus → PCM
+            # -i: input Opus file
+            # -f s16le: output as signed 16-bit little-endian PCM
+            # -ar: resample to target sample rate
+            # -ac: convert to target channel count
+            cmd = [
+                "ffmpeg",
+                "-i", str(opus_path),
+                "-f", "s16le",
+                "-ar", str(sample_rate),
+                "-ac", str(channels),
+                "-y",  # Overwrite output
+                str(pcm_path),
+            ]
+
+            # Run FFmpeg
+            process = await asyncio.create_subprocess_exec(
+                *cmd,
+                stdout=asyncio.subprocess.PIPE,
+                stderr=asyncio.subprocess.PIPE,
+            )
+
+            stdout, stderr = await process.communicate()
+
+            if process.returncode != 0:
+                error_msg = stderr.decode() if stderr else "Unknown error"
+                logger.error(f"FFmpeg Opus decoding failed: {error_msg}")
+                raise RuntimeError(f"Opus decoding failed: {error_msg}")
+
+            # Read PCM output
+            with open(pcm_path, "rb") as f:
+                pcm_data = f.read()
+
+            logger.debug(
+                f"Decoded Opus ({len(opus_data)} bytes) → PCM ({len(pcm_data)} bytes)"
+            )
+
+            return pcm_data
+
+        finally:
+            # Cleanup temporary files
+            opus_path.unlink(missing_ok=True)
+            pcm_path.unlink(missing_ok=True)
+
+
+async def build_wav_from_pcm(
+    pcm_data: bytes,
+    sample_rate: int = 16000,
+    channels: int = 1,
+    sample_width: int = 2,
+) -> bytes:
+    """
+    Build a complete WAV file from raw PCM data.
+
+    Args:
+        pcm_data: Raw PCM audio bytes (signed 16-bit little-endian)
+        sample_rate: Sample rate in Hz (default: 16000)
+        channels: Number of audio channels (default: 1 for mono)
+        sample_width: Bytes per sample (default: 2 for 16-bit)
+
+    Returns:
+        Complete WAV file as bytes (including headers)
+
+    Example:
+        >>> pcm_bytes = b"..."  # Raw PCM audio
+        >>> wav_bytes = await build_wav_from_pcm(pcm_bytes)
+        >>> # wav_bytes can be served via StreamingResponse
+    """
+    # Use BytesIO as in-memory file
+    wav_buffer = io.BytesIO()
+
+    try:
+        # Create WAV file writer
+        with wave.open(wav_buffer, "wb") as wav_file:
+            wav_file.setnchannels(channels)
+            wav_file.setsampwidth(sample_width)
+            wav_file.setframerate(sample_rate)
+            wav_file.writeframes(pcm_data)
+
+        # Get WAV bytes
+        wav_bytes = wav_buffer.getvalue()
+
+        logger.debug(
+            f"Built WAV file: {len(wav_bytes)} bytes "
+            f"(PCM: {len(pcm_data)}, header: {len(wav_bytes) - len(pcm_data)})"
+        )
+
+        return wav_bytes
+
+    finally:
+        wav_buffer.close()
+
+
+async def retrieve_audio_chunks(
+    conversation_id: str,
+    start_index: int = 0,
+    limit: Optional[int] = None,
+) -> List[AudioChunkDocument]:
+    """
+    Retrieve audio chunks from MongoDB for a conversation.
+
+    Chunks are returned in sequential order by chunk_index.
+
+    Args:
+        conversation_id: Parent conversation ID
+        start_index: First chunk index to retrieve (default: 0)
+        limit: Maximum number of chunks to retrieve (default: None for all)
+
+    Returns:
+        List of AudioChunkDocument instances, sorted by chunk_index
+
+    Example:
+        >>> # Get all chunks for a conversation
+        >>> chunks = await retrieve_audio_chunks("550e8400-e29b-41d4...")
+        >>> # Get chunks 5-14 (10 chunks starting at index 5)
+        >>> chunks = await retrieve_audio_chunks("550e8400-e29b-41d4...", start_index=5, limit=10)
+    """
+    # Build query
+    query = AudioChunkDocument.find(
+        AudioChunkDocument.conversation_id == conversation_id,
+        AudioChunkDocument.chunk_index >= start_index,
+    )
+
+    # Apply limit if specified
+    if limit is not None:
+        query = query.limit(limit)
+
+    # Execute query with sorting
+    chunks = await query.sort("+chunk_index").to_list()
+
+    logger.debug(
+        f"Retrieved {len(chunks)} chunks for conversation {conversation_id[:8]}... "
+        f"(start_index={start_index}, limit={limit})"
+    )
+
+    return chunks
+
+
+async def concatenate_chunks_to_pcm(
+    chunks: List[AudioChunkDocument],
+) -> bytes:
+    """
+    Decode and concatenate multiple audio chunks into a single PCM buffer.
+
+    Args:
+        chunks: List of AudioChunkDocument instances (should be pre-sorted)
+
+    Returns:
+        Concatenated PCM audio bytes
+
+    Example:
+        >>> chunks = await retrieve_audio_chunks(conversation_id)
+        >>> pcm_data = await concatenate_chunks_to_pcm(chunks)
+        >>> wav_data = await build_wav_from_pcm(pcm_data)
+    """
+    if not chunks:
+        return b""
+
+    pcm_buffer = bytearray()
+
+    for chunk in chunks:
+        # Decode Opus → PCM
+        pcm_data = await decode_opus_to_pcm(
+            opus_data=chunk.audio_data,
+            sample_rate=chunk.sample_rate,
+            channels=chunk.channels,
+        )
+
+        # Append to buffer
+        pcm_buffer.extend(pcm_data)
+
+    logger.debug(
+        f"Concatenated {len(chunks)} chunks → {len(pcm_buffer)} bytes PCM"
+    )
+
+    return bytes(pcm_buffer)
+
+
+async def reconstruct_wav_from_conversation(
+    conversation_id: str,
+    start_index: int = 0,
+    limit: Optional[int] = None,
+) -> bytes:
+    """
+    Reconstruct a complete WAV file from MongoDB chunks.
+
+    This is a high-level convenience function that:
+    1. Retrieves chunks from MongoDB
+    2. Decodes Opus → PCM
+    3. Concatenates PCM data
+    4. Builds WAV file with headers
+
+    Args:
+        conversation_id: Parent conversation ID
+        start_index: First chunk to include (default: 0)
+        limit: Maximum chunks to include (default: None for all)
+
+    Returns:
+        Complete WAV file as bytes
+
+    Raises:
+        ValueError: If no chunks found for conversation
+
+    Example:
+        >>> # Get complete audio for conversation
+        >>> wav_data = await reconstruct_wav_from_conversation(conversation_id)
+        >>>
+        >>> # Get first 60 seconds (6 chunks @ 10s each)
+        >>> wav_data = await reconstruct_wav_from_conversation(conversation_id, limit=6)
+    """
+    # Retrieve chunks
+    chunks = await retrieve_audio_chunks(
+        conversation_id=conversation_id,
+        start_index=start_index,
+        limit=limit,
+    )
+
+    if not chunks:
+        raise ValueError(
+            f"No audio chunks found for conversation {conversation_id}"
+        )
+
+    # Get audio format from first chunk
+    sample_rate = chunks[0].sample_rate
+    channels = chunks[0].channels
+
+    # Decode and concatenate
+    pcm_data = await concatenate_chunks_to_pcm(chunks)
+
+    # Build WAV file
+    wav_data = await build_wav_from_pcm(
+        pcm_data=pcm_data,
+        sample_rate=sample_rate,
+        channels=channels,
+    )
+
+    logger.info(
+        f"Reconstructed WAV for conversation {conversation_id[:8]}...: "
+        f"{len(chunks)} chunks, {len(wav_data)} bytes, "
+        f"{len(pcm_data) / sample_rate / channels / 2:.1f}s duration"
+    )
+
+    return wav_data
+
+
+async def reconstruct_audio_segments(
+    conversation_id: str,
+    segment_duration: float = 900.0,  # 15 minutes
+    overlap: float = 30.0,  # 30 seconds overlap for continuity
+):
+    """
+    Reconstruct audio from MongoDB chunks in time-bounded segments.
+
+    This function yields audio segments from a conversation, allowing
+    processing of large files without loading everything into memory.
+
+    Args:
+        conversation_id: Parent conversation ID
+        segment_duration: Duration of each segment in seconds (default: 900 = 15 minutes)
+        overlap: Overlap between segments in seconds (default: 30)
+
+    Yields:
+        Tuple of (wav_bytes, start_time, end_time) for each segment
+
+    Example:
+        >>> # Process 73-minute conversation in 15-minute chunks
+        >>> async for wav_data, start, end in reconstruct_audio_segments(conv_id):
+        ...     # Process segment (only ~27 MB in memory at a time)
+        ...     result = await process_segment(wav_data, start, end)
+
+    Note:
+        Overlap is added to all segments except the final one, to ensure
+        speaker continuity across segment boundaries. Overlapping regions
+        should be merged during post-processing.
+    """
+    from advanced_omi_backend.models.conversation import Conversation
+
+    # Get conversation metadata
+    conversation = await Conversation.find_one(
+        Conversation.conversation_id == conversation_id
+    )
+
+    if not conversation:
+        raise ValueError(f"Conversation {conversation_id} not found")
+
+    total_duration = conversation.audio_total_duration or 0.0
+
+    if total_duration == 0:
+        logger.warning(f"Conversation {conversation_id} has zero duration, no segments to yield")
+        return
+
+    # Get audio format from first chunk
+    first_chunk = await AudioChunkDocument.find_one(
+        AudioChunkDocument.conversation_id == conversation_id
+    )
+
+    if not first_chunk:
+        raise ValueError(f"No audio chunks found for conversation {conversation_id}")
+
+    sample_rate = first_chunk.sample_rate
+    channels = first_chunk.channels
+
+    # Calculate segment boundaries
+    start_time = 0.0
+
+    while start_time < total_duration:
+        # Calculate segment end time with overlap
+        end_time = min(start_time + segment_duration + overlap, total_duration)
+
+        # Get chunks that overlap with this time range
+        # Note: Using start_time and end_time fields from chunks
+        chunks = await AudioChunkDocument.find(
+            AudioChunkDocument.conversation_id == conversation_id,
+            AudioChunkDocument.start_time < end_time,  # Chunk starts before segment ends
+            AudioChunkDocument.end_time > start_time,  # Chunk ends after segment starts
+        ).sort(+AudioChunkDocument.chunk_index).to_list()
+
+        if not chunks:
+            logger.warning(
+                f"No chunks found for time range {start_time:.1f}s - {end_time:.1f}s "
+                f"in conversation {conversation_id[:8]}..."
+            )
+            start_time += segment_duration
+            continue
+
+        # Decode and concatenate chunks
+        pcm_data = await concatenate_chunks_to_pcm(chunks)
+
+        # Build WAV file for this segment
+        wav_bytes = await build_wav_from_pcm(
+            pcm_data=pcm_data,
+            sample_rate=sample_rate,
+            channels=channels,
+        )
+
+        logger.info(
+            f"Yielding segment for {conversation_id[:8]}...: "
+            f"{start_time:.1f}s - {end_time:.1f}s "
+            f"({len(chunks)} chunks, {len(wav_bytes)} bytes)"
+        )
+
+        yield (wav_bytes, start_time, end_time)
+
+        # Move to next segment (no overlap on the starting edge)
+        start_time += segment_duration
+
+
+async def reconstruct_audio_segment(
+    conversation_id: str,
+    start_time: float,
+    end_time: float
+) -> bytes:
+    """
+    Reconstruct audio for a specific time range from MongoDB chunks.
+
+    This function returns a single audio segment for the specified time range,
+    enabling on-demand access to conversation audio without loading the entire
+    file into memory. Used by the audio segment API endpoint.
+
+    Args:
+        conversation_id: Conversation ID
+        start_time: Start time in seconds
+        end_time: End time in seconds
+
+    Returns:
+        WAV audio bytes (16kHz mono or original format)
+
+    Raises:
+        ValueError: If conversation not found or has no audio
+        Exception: If audio reconstruction fails
+
+    Example:
+        >>> # Get first 60 seconds of audio
+        >>> wav_bytes = await reconstruct_audio_segment(conv_id, 0.0, 60.0)
+        >>> # Save to file
+        >>> with open("segment.wav", "wb") as f:
+        ...     f.write(wav_bytes)
+    """
+    start_timer = time.time()
+    from advanced_omi_backend.models.conversation import Conversation
+
+    # Validate start_time
+    if start_time < 0:
+        raise ValueError(f"start_time must be >= 0, got {start_time}")
+
+    # Get conversation metadata
+    conversation = await Conversation.find_one(
+        Conversation.conversation_id == conversation_id
+    )
+
+    if not conversation:
+        raise ValueError(f"Conversation {conversation_id} not found")
+
+    total_duration = conversation.audio_total_duration or 0.0
+
+    if total_duration == 0:
+        raise ValueError(f"Conversation {conversation_id} has no audio")
+
+    # Clamp values to valid ranges
+    start_time = max(0, start_time)
+    end_time = min(end_time, total_duration)
+
+    # Validate clamped time range
+    if end_time <= start_time:
+        raise ValueError(
+            f"Invalid time range: end_time ({end_time}s) must be > start_time ({start_time}s)"
+        )
+
+    # Get audio format from first chunk
+    first_chunk = await AudioChunkDocument.find_one(
+        AudioChunkDocument.conversation_id == conversation_id
+    )
+
+    if not first_chunk:
+        raise ValueError(f"No audio chunks found for conversation {conversation_id}")
+
+    sample_rate = first_chunk.sample_rate
+    channels = first_chunk.channels
+
+    # Get chunks that overlap with this time range
+    chunks = await AudioChunkDocument.find(
+        AudioChunkDocument.conversation_id == conversation_id,
+        AudioChunkDocument.start_time < end_time,  # Chunk starts before segment ends
+        AudioChunkDocument.end_time > start_time,  # Chunk ends after segment starts
+    ).sort(+AudioChunkDocument.chunk_index).to_list()
+
+    if not chunks:
+        logger.warning(
+            f"No chunks found for time range {start_time:.1f}s - {end_time:.1f}s "
+            f"in conversation {conversation_id[:8]}..."
+        )
+        # Return silence for empty range
+        return await build_wav_from_pcm(
+            pcm_data=b"",
+            sample_rate=sample_rate,
+            channels=channels,
+        )
+
+    # Decode each chunk and clip to exact time boundaries for precise segment extraction
+    pcm_buffer = bytearray()
+    bytes_per_second = sample_rate * channels * 2  # 16-bit = 2 bytes per sample
+
+    for chunk in chunks:
+        # Decode this chunk to PCM
+        pcm_data = await decode_opus_to_pcm(
+            opus_data=chunk.audio_data,
+            sample_rate=chunk.sample_rate,
+            channels=chunk.channels,
+        )
+
+        # Calculate clip boundaries for this chunk
+        clip_start_byte = 0
+        clip_end_byte = len(pcm_data)
+
+        # Trim start if chunk begins before requested start_time
+        if chunk.start_time < start_time:
+            offset_seconds = start_time - chunk.start_time
+            offset_bytes = int(offset_seconds * bytes_per_second)
+            # Align to sample boundary (2 bytes for 16-bit audio)
+            clip_start_byte = (offset_bytes // 2) * 2
+
+        # Trim end if chunk extends past requested end_time
+        if chunk.end_time > end_time:
+            # Calculate duration from chunk start to requested end
+            duration_seconds = end_time - chunk.start_time
+            duration_bytes = int(duration_seconds * bytes_per_second)
+            # Align to sample boundary
+            clip_end_byte = (duration_bytes // 2) * 2
+
+        # Append only the clipped portion to buffer
+        if clip_start_byte < clip_end_byte:
+            clipped_pcm = pcm_data[clip_start_byte:clip_end_byte]
+            pcm_buffer.extend(clipped_pcm)
+
+            logger.debug(
+                f"Chunk {chunk.chunk_index}: [{chunk.start_time:.1f}s - {chunk.end_time:.1f}s] "
+                f"→ clipped [{max(chunk.start_time, start_time):.1f}s - {min(chunk.end_time, end_time):.1f}s] "
+                f"({len(clipped_pcm)} bytes)"
+            )
+
+    # Build WAV file from precisely trimmed PCM data
+    wav_bytes = await build_wav_from_pcm(
+        pcm_data=bytes(pcm_buffer),
+        sample_rate=sample_rate,
+        channels=channels,
+    )
+
+    actual_duration = len(pcm_buffer) / bytes_per_second
+    expected_duration = end_time - start_time
+    processing_time = time.time() - start_timer
+
+    logger.info(
+        f"Reconstructed audio segment for {conversation_id[:8]}...: "
+        f"{start_time:.1f}s - {end_time:.1f}s "
+        f"({len(chunks)} chunks, {len(wav_bytes)} bytes WAV, "
+        f"actual duration: {actual_duration:.2f}s, expected: {expected_duration:.2f}s, "
+        f"processing time: {processing_time:.2f}s)"
+    )
+
+    return wav_bytes
+
+
+def filter_transcript_by_time(
+    transcript_data: dict,
+    start_time: float,
+    end_time: float
+) -> dict:
+    """
+    Filter transcript data to only include words within a time range.
+
+    Args:
+        transcript_data: Dict with 'text' and 'words' keys
+        start_time: Start time in seconds
+        end_time: End time in seconds
+
+    Returns:
+        Filtered transcript data with only words in time range
+
+    Example:
+        >>> transcript = {"text": "full text", "words": [...100 words...]}
+        >>> segment = filter_transcript_by_time(transcript, 0.0, 900.0)  # First 15 minutes
+        >>> # segment contains only words from 0-900 seconds
+    """
+    if not transcript_data or "words" not in transcript_data:
+        return transcript_data
+
+    words = transcript_data.get("words", [])
+
+    if not words:
+        return transcript_data
+
+    # Filter words by time range
+    filtered_words = []
+    for word in words:
+        word_start = word.get("start", 0)
+        word_end = word.get("end", 0)
+
+        # Include word if it overlaps with the time range
+        if word_start < end_time and word_end > start_time:
+            filtered_words.append(word)
+
+    # Rebuild text from filtered words
+    filtered_text = " ".join(word.get("word", "") for word in filtered_words)
+
+    return {
+        "text": filtered_text,
+        "words": filtered_words
+    }
+
+
+async def convert_audio_to_chunks(
+    conversation_id: str,
+    audio_data: bytes,
+    sample_rate: int = 16000,
+    channels: int = 1,
+    sample_width: int = 2,
+    chunk_duration: float = 10.0,
+) -> int:
+    """
+    Convert raw PCM audio directly to MongoDB chunks without disk intermediary.
+
+    This is the preferred method as it avoids unnecessary disk I/O.
+    Used for both WebSocket streaming and file uploads.
+
+    Args:
+        conversation_id: Conversation ID to associate chunks with
+        audio_data: Raw PCM audio bytes (16-bit mono)
+        sample_rate: Audio sample rate (default: 16000 Hz)
+        channels: Number of channels (default: 1 = mono)
+        sample_width: Bytes per sample (default: 2 = 16-bit)
+        chunk_duration: Duration of each chunk in seconds (default: 10.0)
+
+    Returns:
+        Number of chunks created
+
+    Raises:
+        ValueError: If audio duration exceeds 2 hours
+
+    Example:
+        >>> # Convert from memory without disk write
+        >>> num_chunks = await convert_audio_to_chunks(
+        ...     conversation_id="550e8400-e29b-41d4...",
+        ...     audio_data=pcm_bytes,
+        ...     sample_rate=16000,
+        ...     channels=1,
+        ...     sample_width=2,
+        ... )
+        >>> print(f"Created {num_chunks} chunks")
+    """
+    from bson import Binary
+
+    from advanced_omi_backend.models.conversation import Conversation
+
+    logger.info(f"📦 Converting audio to MongoDB chunks: {len(audio_data)} bytes PCM")
+
+    # Calculate audio duration and validate maximum limit
+    bytes_per_second = sample_rate * sample_width * channels
+    total_duration_seconds = len(audio_data) / bytes_per_second
+    MAX_DURATION_SECONDS = 7200  # 2 hours (720 chunks @ 10s each)
+
+    if total_duration_seconds > MAX_DURATION_SECONDS:
+        raise ValueError(
+            f"Audio duration ({total_duration_seconds:.1f}s) exceeds maximum allowed "
+            f"({MAX_DURATION_SECONDS}s / 2 hours). Please split the file into smaller segments."
+        )
+
+    # Calculate chunk size in bytes
+    chunk_size_bytes = int(chunk_duration * bytes_per_second)
+
+    # Collect all chunks before batch insert
+    chunks_to_insert = []
+    chunk_index = 0
+    total_original_size = 0
+    total_compressed_size = 0
+    offset = 0
+
+    while offset < len(audio_data):
+        # Extract chunk PCM data
+        chunk_end = min(offset + chunk_size_bytes, len(audio_data))
+        chunk_pcm = audio_data[offset:chunk_end]
+
+        if len(chunk_pcm) == 0:
+            break
+
+        # Calculate chunk timing
+        chunk_start_time = offset / bytes_per_second
+        chunk_end_time = chunk_end / bytes_per_second
+        chunk_duration_actual = (chunk_end - offset) / bytes_per_second
+
+        # Encode to Opus
+        opus_data = await encode_pcm_to_opus(
+            pcm_data=chunk_pcm,
+            sample_rate=sample_rate,
+            channels=channels,
+            bitrate=24  # 24kbps for speech
+        )
+
+        # Create MongoDB document
+        audio_chunk = AudioChunkDocument(
+            conversation_id=conversation_id,
+            chunk_index=chunk_index,
+            audio_data=Binary(opus_data),
+            original_size=len(chunk_pcm),
+            compressed_size=len(opus_data),
+            start_time=chunk_start_time,
+            end_time=chunk_end_time,
+            duration=chunk_duration_actual,
+            sample_rate=sample_rate,
+            channels=channels,
+        )
+
+        # Add to batch
+        chunks_to_insert.append(audio_chunk)
+
+        # Update stats
+        total_original_size += len(chunk_pcm)
+        total_compressed_size += len(opus_data)
+        chunk_index += 1
+        offset = chunk_end
+
+        logger.debug(
+            f"💾 Prepared chunk {chunk_index}: "
+            f"{len(chunk_pcm)} → {len(opus_data)} bytes"
+        )
+
+    # Batch insert all chunks to MongoDB (single database operation)
+    if chunks_to_insert:
+        await AudioChunkDocument.insert_many(chunks_to_insert)
+        logger.info(
+            f"✅ Batch inserted {len(chunks_to_insert)} chunks to MongoDB "
+            f"({total_duration_seconds:.1f}s audio)"
+        )
+
+    # Update conversation metadata
+    conversation = await Conversation.find_one(
+        Conversation.conversation_id == conversation_id
+    )
+
+    if conversation:
+        compression_ratio = total_compressed_size / total_original_size if total_original_size > 0 else 0.0
+
+        logger.info(f"🔍 DEBUG: Setting metadata - chunks={chunk_index}, duration={total_duration_seconds:.2f}s, ratio={compression_ratio:.3f}")
+
+        conversation.audio_chunks_count = chunk_index
+        conversation.audio_total_duration = total_duration_seconds
+        conversation.audio_compression_ratio = compression_ratio
+
+        logger.info(f"🔍 DEBUG: Before save - chunks={conversation.audio_chunks_count}, duration={conversation.audio_total_duration}")
+        await conversation.save()
+        logger.info(f"🔍 DEBUG: After save - metadata should be persisted")
+    else:
+        logger.error(f"❌ Conversation {conversation_id} not found for metadata update!")
+
+    logger.info(
+        f"✅ Converted audio to {chunk_index} MongoDB chunks: "
+        f"{total_original_size / 1024 / 1024:.2f} MB → "
+        f"{total_compressed_size / 1024 / 1024:.2f} MB "
+        f"(compression: {compression_ratio:.3f}, "
+        f"{(1 - compression_ratio) * 100:.1f}% savings)"
+    )
+
+    return chunk_index
+
+
+async def convert_wav_to_chunks(
+    conversation_id: str,
+    wav_file_path: Path,
+    chunk_duration: float = 10.0,
+) -> int:
+    """
+    Convert an existing WAV file to MongoDB audio chunks.
+
+    DEPRECATED: Use convert_audio_to_chunks() instead to avoid disk I/O.
+
+    Used for uploaded audio files to ensure consistency with streaming audio storage.
+    Reads WAV file, splits into 10-second chunks, encodes to Opus, and stores in MongoDB.
+
+    Args:
+        conversation_id: Conversation ID to associate chunks with
+        wav_file_path: Path to existing WAV file
+        chunk_duration: Duration of each chunk in seconds (default: 10.0)
+
+    Returns:
+        Number of chunks created
+
+    Raises:
+        FileNotFoundError: If WAV file doesn't exist
+        ValueError: If WAV file is invalid or exceeds 2 hours
+
+    Example:
+        >>> # Convert uploaded file to chunks
+        >>> num_chunks = await convert_wav_to_chunks(
+        ...     conversation_id="550e8400-e29b-41d4...",
+        ...     wav_file_path=Path("/path/to/uploaded.wav")
+        ... )
+        >>> print(f"Created {num_chunks} chunks")
+    """
+    if not wav_file_path.exists():
+        raise FileNotFoundError(f"WAV file not found: {wav_file_path}")
+
+    from bson import Binary
+
+    from advanced_omi_backend.models.conversation import Conversation
+
+    logger.info(f"📦 Converting WAV file to MongoDB chunks: {wav_file_path}")
+
+    # Read WAV file
+    import wave
+    with wave.open(str(wav_file_path), "rb") as wav:
+        sample_rate = wav.getframerate()
+        channels = wav.getnchannels()
+        sample_width = wav.getsampwidth()
+        total_frames = wav.getnframes()
+
+        # Read all PCM data
+        pcm_data = wav.readframes(total_frames)
+
+    logger.info(
+        f"📁 Read WAV: {len(pcm_data)} bytes PCM, "
+        f"{sample_rate}Hz, {channels}ch, {sample_width*8}-bit"
+    )
+
+    # Calculate audio duration and validate maximum limit
+    bytes_per_second = sample_rate * sample_width * channels
+    total_duration_seconds = len(pcm_data) / bytes_per_second
+    MAX_DURATION_SECONDS = 7200  # 2 hours (720 chunks @ 10s each)
+
+    if total_duration_seconds > MAX_DURATION_SECONDS:
+        raise ValueError(
+            f"Audio duration ({total_duration_seconds:.1f}s) exceeds maximum allowed "
+            f"({MAX_DURATION_SECONDS}s / 2 hours). Please split the file into smaller segments."
+        )
+
+    # Calculate chunk size in bytes
+    chunk_size_bytes = int(chunk_duration * bytes_per_second)
+
+    # Collect all chunks before batch insert
+    chunks_to_insert = []
+    chunk_index = 0
+    total_original_size = 0
+    total_compressed_size = 0
+    offset = 0
+
+    while offset < len(pcm_data):
+        # Extract chunk PCM data
+        chunk_end = min(offset + chunk_size_bytes, len(pcm_data))
+        chunk_pcm = pcm_data[offset:chunk_end]
+
+        if len(chunk_pcm) == 0:
+            break
+
+        # Calculate chunk timing
+        chunk_start_time = offset / bytes_per_second
+        chunk_end_time = chunk_end / bytes_per_second
+        chunk_duration_actual = (chunk_end - offset) / bytes_per_second
+
+        # Encode to Opus
+        opus_data = await encode_pcm_to_opus(
+            pcm_data=chunk_pcm,
+            sample_rate=sample_rate,
+            channels=channels,
+            bitrate=24  # 24kbps for speech
+        )
+
+        # Create MongoDB document
+        audio_chunk = AudioChunkDocument(
+            conversation_id=conversation_id,
+            chunk_index=chunk_index,
+            audio_data=Binary(opus_data),
+            original_size=len(chunk_pcm),
+            compressed_size=len(opus_data),
+            start_time=chunk_start_time,
+            end_time=chunk_end_time,
+            duration=chunk_duration_actual,
+            sample_rate=sample_rate,
+            channels=channels,
+        )
+
+        # Add to batch
+        chunks_to_insert.append(audio_chunk)
+
+        # Update stats
+        total_original_size += len(chunk_pcm)
+        total_compressed_size += len(opus_data)
+        chunk_index += 1
+        offset = chunk_end
+
+        logger.debug(
+            f"💾 Prepared chunk {chunk_index}: "
+            f"{len(chunk_pcm)} → {len(opus_data)} bytes"
+        )
+
+    # Batch insert all chunks to MongoDB (single database operation)
+    if chunks_to_insert:
+        await AudioChunkDocument.insert_many(chunks_to_insert)
+        logger.info(
+            f"✅ Batch inserted {len(chunks_to_insert)} chunks to MongoDB "
+            f"({total_duration_seconds:.1f}s audio)"
+        )
+
+    # Update conversation metadata
+    conversation = await Conversation.find_one(
+        Conversation.conversation_id == conversation_id
+    )
+
+    if conversation:
+        compression_ratio = total_compressed_size / total_original_size if total_original_size > 0 else 0.0
+
+        logger.info(f"🔍 DEBUG: Setting metadata - chunks={chunk_index}, duration={total_duration_seconds:.2f}s, ratio={compression_ratio:.3f}")
+
+        conversation.audio_chunks_count = chunk_index
+        conversation.audio_total_duration = total_duration_seconds
+        conversation.audio_compression_ratio = compression_ratio
+
+        logger.info(f"🔍 DEBUG: Before save - chunks={conversation.audio_chunks_count}, duration={conversation.audio_total_duration}")
+        await conversation.save()
+        logger.info(f"🔍 DEBUG: After save - metadata should be persisted")
+    else:
+        logger.error(f"❌ Conversation {conversation_id} not found for metadata update!")
+
+    logger.info(
+        f"✅ Converted WAV to {chunk_index} MongoDB chunks: "
+        f"{total_original_size / 1024 / 1024:.2f} MB → "
+        f"{total_compressed_size / 1024 / 1024:.2f} MB "
+        f"(compression: {compression_ratio:.3f}, "
+        f"{(1 - compression_ratio) * 100:.1f}% savings)"
+    )
+
+    return chunk_index
+
+
+async def wait_for_audio_chunks(
+    conversation_id: str,
+    max_wait_seconds: int = 30,
+    min_chunks: int = 1,
+) -> bool:
+    """
+    Wait for MongoDB audio chunks to be available for a conversation.
+
+    Replaces wait_for_audio_file() for MongoDB-based storage.
+    Polls MongoDB until chunks exist or timeout occurs.
+
+    Args:
+        conversation_id: Conversation ID to check
+        max_wait_seconds: Maximum wait time in seconds (default: 30)
+        min_chunks: Minimum number of chunks required (default: 1)
+
+    Returns:
+        True if chunks are available, False if timeout
+
+    Example:
+        >>> # Wait for chunks before transcription
+        >>> if await wait_for_audio_chunks(conversation_id):
+        ...     await transcribe_full_audio_job(...)
+        ... else:
+        ...     logger.error("No audio chunks available")
+    """
+    import asyncio
+    import time
+
+    wait_start = time.time()
+
+    while time.time() - wait_start < max_wait_seconds:
+        # Query chunk count
+        chunks = await retrieve_audio_chunks(
+            conversation_id=conversation_id,
+            start_index=0,
+            limit=1  # Just check if any exist
+        )
+
+        if len(chunks) >= min_chunks:
+            wait_duration = time.time() - wait_start
+            logger.info(
+                f"✅ Audio chunks ready for conversation {conversation_id[:12]} "
+                f"after {wait_duration:.1f}s ({len(chunks)} chunks found)"
+            )
+            return True
+
+        # Log progress every 5 seconds
+        elapsed = time.time() - wait_start
+        if int(elapsed) % 5 == 0 and int(elapsed) > 0:
+            logger.info(
+                f"⏳ Waiting for audio chunks (conversation {conversation_id[:12]})... "
+                f"({elapsed:.0f}s elapsed)"
+            )
+
+        await asyncio.sleep(0.5)  # Check every 500ms
+
+    logger.error(
+        f"❌ Audio chunks not found after {max_wait_seconds}s "
+        f"(conversation: {conversation_id[:12]})"
+    )
+    return False
diff --git a/backends/advanced/src/advanced_omi_backend/utils/audio_utils.py b/backends/advanced/src/advanced_omi_backend/utils/audio_utils.py
index 3a3b554d..f8ba07a3 100644
--- a/backends/advanced/src/advanced_omi_backend/utils/audio_utils.py
+++ b/backends/advanced/src/advanced_omi_backend/utils/audio_utils.py
@@ -3,15 +3,18 @@
 ###############################################################################
 
 import asyncio
+import io
 import logging
 import os
 import time
 import uuid as uuid_lib
+import wave
 from pathlib import Path
 
 # Type import to avoid circular imports
 from typing import TYPE_CHECKING, Optional
 
+import numpy as np
 from wyoming.audio import AudioChunk
 
 if TYPE_CHECKING:
@@ -30,10 +33,85 @@ class AudioValidationError(Exception):
     pass
 
 
+async def resample_audio_with_ffmpeg(
+    audio_data: bytes,
+    input_sample_rate: int,
+    input_channels: int,
+    input_sample_width: int,
+    target_sample_rate: int,
+    target_channels: int = 1
+) -> bytes:
+    """
+    Resample audio using FFmpeg with stdin/stdout pipes (no disk I/O).
+
+    Args:
+        audio_data: Raw PCM audio bytes
+        input_sample_rate: Input sample rate in Hz
+        input_channels: Number of input channels
+        input_sample_width: Input sample width in bytes (2 for 16-bit, 4 for 32-bit)
+        target_sample_rate: Target sample rate in Hz
+        target_channels: Target number of channels (default: 1 for mono)
+
+    Returns:
+        Resampled PCM audio bytes (16-bit signed little-endian)
+
+    Raises:
+        RuntimeError: If FFmpeg resampling fails
+    """
+    # Determine FFmpeg format based on sample width
+    if input_sample_width == 2:
+        input_format = "s16le"  # 16-bit signed little-endian
+    elif input_sample_width == 4:
+        input_format = "s32le"  # 32-bit signed little-endian
+    else:
+        raise AudioValidationError(
+            f"Unsupported sample width: {input_sample_width} bytes (only 2 or 4 supported)"
+        )
+
+    # FFmpeg command for resampling via pipes
+    # pipe:0 = stdin, pipe:1 = stdout
+    cmd = [
+        "ffmpeg",
+        "-f", input_format,
+        "-ar", str(input_sample_rate),
+        "-ac", str(input_channels),
+        "-i", "pipe:0",  # Read from stdin
+        "-ar", str(target_sample_rate),
+        "-ac", str(target_channels),
+        "-f", "s16le",  # Always output 16-bit
+        "pipe:1",  # Write to stdout
+    ]
+
+    # Run FFmpeg with piped I/O
+    process = await asyncio.create_subprocess_exec(
+        *cmd,
+        stdin=asyncio.subprocess.PIPE,
+        stdout=asyncio.subprocess.PIPE,
+        stderr=asyncio.subprocess.PIPE,
+    )
+
+    # Send input data and get output
+    stdout, stderr = await process.communicate(input=audio_data)
+
+    if process.returncode != 0:
+        error_msg = stderr.decode() if stderr else "Unknown error"
+        audio_logger.error(f"FFmpeg resampling failed: {error_msg}")
+        raise RuntimeError(f"Audio resampling failed: {error_msg}")
+
+    audio_logger.info(
+        f"Resampled audio: {input_sample_rate}Hz/{input_channels}ch → "
+        f"{target_sample_rate}Hz/{target_channels}ch "
+        f"({len(audio_data)} → {len(stdout)} bytes)"
+    )
+
+    return stdout
+
+
 async def validate_and_prepare_audio(
     audio_data: bytes,
     expected_sample_rate: int = 16000,
-    convert_to_mono: bool = True
+    convert_to_mono: bool = True,
+    auto_resample: bool = False
 ) -> tuple[bytes, int, int, int, float]:
     """
     Validate WAV audio data and prepare it for processing.
@@ -42,6 +120,7 @@ async def validate_and_prepare_audio(
         audio_data: Raw WAV file bytes
         expected_sample_rate: Expected sample rate (default: 16000 Hz)
         convert_to_mono: Whether to convert stereo to mono (default: True)
+        auto_resample: Whether to automatically resample audio if sample rate doesn't match (default: False)
 
     Returns:
         Tuple of (processed_audio_data, sample_rate, sample_width, channels, duration)
@@ -49,10 +128,6 @@ async def validate_and_prepare_audio(
     Raises:
         AudioValidationError: If audio validation fails
     """
-    import io
-    import wave
-    import numpy as np
-
     try:
         # Parse WAV file
         with wave.open(io.BytesIO(audio_data), "rb") as wav_file:
@@ -68,13 +143,36 @@ async def validate_and_prepare_audio(
     except Exception as e:
         raise AudioValidationError(f"Invalid WAV file: {str(e)}")
 
-    # Validate sample rate
+    # Handle sample rate mismatch
     if sample_rate != expected_sample_rate:
-        raise AudioValidationError(
-            f"Sample rate must be {expected_sample_rate}Hz, got {sample_rate}Hz"
-        )
+        if auto_resample:
+            audio_logger.info(
+                f"Auto-resampling audio from {sample_rate}Hz to {expected_sample_rate}Hz"
+            )
+            # Resample audio using FFmpeg (with pipes, no disk I/O)
+            processed_audio = await resample_audio_with_ffmpeg(
+                audio_data=processed_audio,
+                input_sample_rate=sample_rate,
+                input_channels=channels,
+                input_sample_width=sample_width,
+                target_sample_rate=expected_sample_rate,
+                target_channels=1 if convert_to_mono else channels
+            )
+            # Update metadata after resampling
+            sample_rate = expected_sample_rate
+            sample_width = 2  # FFmpeg outputs 16-bit
+            if convert_to_mono:
+                channels = 1
+            # Recalculate duration
+            duration = len(processed_audio) / (sample_rate * sample_width * channels)
+            # Skip stereo-to-mono conversion since resampling already handled it
+            convert_to_mono = False
+        else:
+            raise AudioValidationError(
+                f"Sample rate must be {expected_sample_rate}Hz, got {sample_rate}Hz"
+            )
 
-    # Convert stereo to mono if requested
+    # Convert stereo to mono if requested and not already done
     if convert_to_mono and channels == 2:
         audio_logger.info(f"Converting stereo audio to mono")
 
@@ -137,7 +235,6 @@ async def write_audio_file(
     """
     from easy_audio_interfaces.filesystem.filesystem_interfaces import LocalFileSink
     from advanced_omi_backend.config import CHUNK_DIR
-    from advanced_omi_backend.models.audio_file import AudioFile
 
     # Validate and prepare audio if needed
     if validate:
@@ -195,22 +292,6 @@ async def write_audio_file(
         f"✅ Wrote audio file: {wav_filename} ({len(audio_data)} bytes, {duration:.1f}s)"
     )
 
-    # Create AudioFile database entry using Beanie model
-    audio_file = AudioFile(
-        audio_uuid=audio_uuid,
-        source=source,
-        audio_path=wav_filename,
-        client_id=client_id,
-        timestamp=timestamp,
-        user_id=user_id,
-        user_email=user_email,
-        has_speech=False,  # Will be updated by transcription
-        speech_analysis={}, 
-    )
-    await audio_file.insert()
-
-    audio_logger.info(f"✅ Created AudioFile entry for {audio_uuid}")
-
     return relative_audio_path, str(file_path), duration
 
 
@@ -275,71 +356,47 @@ async def process_audio_chunk(
         client_state.update_audio_received(chunk)
 
 
-async def _process_audio_cropping_with_relative_timestamps(
-    original_path: str,
-    speech_segments: list[tuple[float, float]],
-    output_path: str,
-    audio_uuid: str,
-    _deprecated_chunk_repo=None,  # Deprecated - kept for backward compatibility
-) -> tuple[bool, list[dict]]:
+def pcm_to_wav_bytes(
+    pcm_data: bytes,
+    sample_rate: int = 16000,
+    channels: int = 1,
+    sample_width: int = 2
+) -> bytes:
     """
-    Process audio cropping with speech segments already in relative format.
+    Convert raw PCM audio data to WAV format in memory.
 
-    The segments are expected to be in relative format (seconds from audio start),
-    as provided by Deepgram transcription. No timestamp conversion is needed.
-
-    Note: Database updates are now handled by the caller (audio_jobs.py).
+    Args:
+        pcm_data: Raw PCM audio bytes
+        sample_rate: Sample rate in Hz (default: 16000)
+        channels: Number of audio channels (default: 1 for mono)
+        sample_width: Sample width in bytes (default: 2 for 16-bit)
 
     Returns:
-        Tuple of (success: bool, segment_mapping: list[dict])
+        WAV file data as bytes
     """
-    try:
-        # Validate input segments
-        validated_segments = []
-        for start_rel, end_rel in speech_segments:
-            # Validate input timestamps
-            if start_rel >= end_rel:
-                logger.warning(
-                    f"⚠️ Invalid speech segment: start={start_rel} >= end={end_rel}, skipping"
-                )
-                continue
-
-            # Ensure timestamps are positive (sanity check)
-            if start_rel < 0:
-                logger.warning(
-                    f"⚠️ Negative start timestamp: {start_rel}, clamping to 0.0"
-                )
-                start_rel = 0.0
-            if end_rel < 0:
-                logger.warning(
-                    f"⚠️ Negative end timestamp: {end_rel}, skipping segment"
-                )
-                continue
-
-            validated_segments.append((start_rel, end_rel))
-
-        logger.info(f"🕐 Processing cropping for {audio_uuid}")
-        logger.info(f"🕐 Input segments (relative timestamps): {speech_segments}")
-        logger.info(f"🕐 Validated segments: {validated_segments}")
-
-        # Validate that we have valid segments
-        if not validated_segments:
-            logger.warning(
-                f"No valid segments for cropping {audio_uuid}"
-            )
-            return False, []
+    import wave
+    import io
 
-        success, segment_mapping = await _crop_audio_with_ffmpeg(original_path, validated_segments, output_path)
-        if success:
-            cropped_filename = output_path.split("/")[-1]
-            logger.info(f"Successfully processed cropped audio: {cropped_filename}")
-            return True, segment_mapping
-        else:
-            logger.error(f"Failed to crop audio for {audio_uuid}")
-            return False, segment_mapping
-    except Exception as e:
-        logger.error(f"Error in audio cropping task for {audio_uuid}: {e}", exc_info=True)
-        return False, []
+    logger.debug(
+        f"Converting PCM to WAV in memory: {len(pcm_data)} bytes "
+        f"(rate={sample_rate}, channels={channels}, width={sample_width})"
+    )
+
+    # Use BytesIO to create WAV in memory
+    wav_buffer = io.BytesIO()
+
+    with wave.open(wav_buffer, 'wb') as wav_file:
+        wav_file.setnchannels(channels)
+        wav_file.setsampwidth(sample_width)
+        wav_file.setframerate(sample_rate)
+        wav_file.writeframes(pcm_data)
+
+    # Get the WAV bytes
+    wav_bytes = wav_buffer.getvalue()
+
+    logger.debug(f"Created WAV in memory: {len(wav_bytes)} bytes")
+
+    return wav_bytes
 
 
 def write_pcm_to_wav(
@@ -383,142 +440,3 @@ def write_pcm_to_wav(
     except Exception as e:
         logger.error(f"❌ Failed to write PCM to WAV: {e}")
         raise
-
-
-async def _crop_audio_with_ffmpeg(
-    original_path: str, speech_segments: list[tuple[float, float]], output_path: str
-) -> tuple[bool, list[dict]]:
-    """
-    Use ffmpeg to crop audio - runs as async subprocess, no GIL issues.
-
-    Returns:
-        Tuple of (success: bool, segment_mapping: list[dict])
-
-        segment_mapping contains one entry per input segment with:
-        - original_index: Index in input speech_segments
-        - original_start/end: Original timestamps in source audio
-        - cropped_start/end: Where the speech starts/ends in cropped file (None if filtered)
-        - kept: Whether segment was kept (True) or filtered out (False)
-    """
-    logger.info(f"Cropping audio {original_path} with {len(speech_segments)} speech segments")
-
-    if not speech_segments:
-        logger.warning(f"No speech segments to crop for {original_path}")
-        return False, []
-
-    # Check if the original file exists
-    if not os.path.exists(original_path):
-        logger.error(f"Original audio file does not exist: {original_path}")
-        return False, []
-
-    # Filter out segments that are too short and build mapping
-    filtered_segments = []
-    segment_mapping = []
-    current_cropped_offset = 0.0
-
-    for idx, (start, end) in enumerate(speech_segments):
-        duration = end - start
-        if duration >= MIN_SPEECH_SEGMENT_DURATION:
-            # Add padding around speech segments
-            padded_start = max(0, start - CROPPING_CONTEXT_PADDING)
-            padded_end = end + CROPPING_CONTEXT_PADDING
-            padded_duration = padded_end - padded_start
-
-            filtered_segments.append((padded_start, padded_end))
-
-            # Calculate where the speech (not padding) appears in cropped file
-            # The cropped file will have: [padding_before][speech][padding_after]
-            padding_before = start - padded_start
-            speech_start_in_cropped = current_cropped_offset + padding_before
-            speech_end_in_cropped = speech_start_in_cropped + duration
-
-            segment_mapping.append({
-                "original_index": idx,
-                "original_start": start,
-                "original_end": end,
-                "cropped_start": speech_start_in_cropped,
-                "cropped_end": speech_end_in_cropped,
-                "kept": True
-            })
-
-            # Move offset by the full padded duration
-            current_cropped_offset += padded_duration
-        else:
-            # Segment filtered out
-            segment_mapping.append({
-                "original_index": idx,
-                "original_start": start,
-                "original_end": end,
-                "cropped_start": None,
-                "cropped_end": None,
-                "kept": False
-            })
-            logger.debug(
-                f"Skipping short segment: {start}-{end} ({duration:.2f}s < {MIN_SPEECH_SEGMENT_DURATION}s)"
-            )
-
-    if not filtered_segments:
-        logger.warning(
-            f"No segments meet minimum duration ({MIN_SPEECH_SEGMENT_DURATION}s) for {original_path}"
-        )
-        return False, segment_mapping
-
-    logger.info(
-        f"Cropping audio {original_path} with {len(filtered_segments)} speech segments (filtered from {len(speech_segments)})"
-    )
-
-    try:
-        # Build ffmpeg filter for concatenating speech segments
-        filter_parts = []
-        for i, (start, end) in enumerate(filtered_segments):
-            duration = end - start
-            filter_parts.append(
-                f"[0:a]atrim=start={start}:duration={duration},asetpts=PTS-STARTPTS[seg{i}]"
-            )
-
-        # Concatenate all segments
-        inputs = "".join(f"[seg{i}]" for i in range(len(filtered_segments)))
-        concat_filter = f"{inputs}concat=n={len(filtered_segments)}:v=0:a=1[out]"
-
-        full_filter = ";".join(filter_parts + [concat_filter])
-
-        # Run ffmpeg as async subprocess
-        cmd = [
-            "ffmpeg",
-            "-y",  # -y = overwrite output
-            "-i",
-            original_path,
-            "-filter_complex",
-            full_filter,
-            "-map",
-            "[out]",
-            "-c:a",
-            "pcm_s16le",  # Keep same format as original
-            output_path,
-        ]
-
-        logger.info(f"Running ffmpeg command: {' '.join(cmd)}")
-
-        process = await asyncio.create_subprocess_exec(
-            *cmd, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE
-        )
-
-        stdout, stderr = await process.communicate()
-        if stdout:
-            logger.debug(f"FFMPEG stdout: {stdout.decode()}")
-
-        if process.returncode == 0:
-            # Calculate cropped duration
-            cropped_duration = sum(end - start for start, end in filtered_segments)
-            logger.info(
-                f"Successfully cropped {original_path} -> {output_path} ({cropped_duration:.1f}s from {len(filtered_segments)} segments)"
-            )
-            return True, segment_mapping
-        else:
-            error_msg = stderr.decode() if stderr else "Unknown ffmpeg error"
-            logger.error(f"ffmpeg failed for {original_path}: {error_msg}")
-            return False, segment_mapping
-
-    except Exception as e:
-        logger.error(f"Error running ffmpeg on {original_path}: {e}", exc_info=True)
-        return False, segment_mapping
diff --git a/backends/advanced/src/advanced_omi_backend/utils/conversation_utils.py b/backends/advanced/src/advanced_omi_backend/utils/conversation_utils.py
index b2cddf4c..27af4bfa 100644
--- a/backends/advanced/src/advanced_omi_backend/utils/conversation_utils.py
+++ b/backends/advanced/src/advanced_omi_backend/utils/conversation_utils.py
@@ -81,43 +81,58 @@ def analyze_speech(transcript_data: dict) -> dict:
     settings = get_speech_detection_settings()
     words = transcript_data.get("words", [])
 
+    logger.info(f"🔬 analyze_speech: words_list_length={len(words)}, settings={settings}")
+    if words and len(words) > 0:
+        logger.info(f"📝 First 3 words: {words[:3]}")
+
     # Method 1: Word-level analysis (preferred - has confidence scores and timing)
     if words:
         # Filter by confidence threshold
         valid_words = [w for w in words if w.get("confidence", 0) >= settings["min_confidence"]]
 
         if len(valid_words) < settings["min_words"]:
-            return {
-                "has_speech": False,
-                "reason": f"Not enough valid words ({len(valid_words)} < {settings['min_words']})",
-                "word_count": len(valid_words),
-                "duration": 0.0,
-            }
-
-        # Calculate speech duration from word timing
-        if valid_words:
-            speech_start = valid_words[0].get("start", 0)
-            speech_end = valid_words[-1].get("end", 0)
-            speech_duration = speech_end - speech_start
-
-            # Check minimum duration threshold
-            min_duration = settings.get("min_duration", 10.0)
-            if speech_duration < min_duration:
-                return {
-                    "has_speech": False,
-                    "reason": f"Speech too short ({speech_duration:.1f}s < {min_duration}s)",
-                    "word_count": len(valid_words),
-                    "duration": speech_duration,
-                }
-
-            return {
-                "has_speech": True,
-                "word_count": len(valid_words),
-                "speech_start": speech_start,
-                "speech_end": speech_end,
-                "duration": speech_duration,
-                "reason": f"Valid speech detected ({len(valid_words)} words, {speech_duration:.1f}s)",
-            }
+            # Not enough valid words in word-level data - fall through to text-only analysis
+            # This handles cases where word-level data is incomplete or low confidence
+            logger.debug(f"Only {len(valid_words)} valid words, falling back to text-only analysis")
+            # Continue to Method 2 (don't return early)
+        else:
+            # Calculate speech duration from word timing
+            if valid_words:
+                speech_start = valid_words[0].get("start", 0)
+                speech_end = valid_words[-1].get("end", 0)
+                speech_duration = speech_end - speech_start
+
+                # Debug logging for timestamp investigation
+                logger.info(
+                    f"🕐 Speech timing: start={speech_start:.2f}s, end={speech_end:.2f}s, "
+                    f"duration={speech_duration:.2f}s (first_word={valid_words[0]}, last_word={valid_words[-1]})"
+                )
+
+                # If no timing data (duration = 0), fall back to text-only analysis
+                # This happens with some streaming transcription services
+                if speech_duration == 0:
+                    logger.debug("Word timing data missing, falling back to text-only analysis")
+                    # Continue to Method 2 (text-only fallback)
+                else:
+                    # Check minimum duration threshold when we have timing data
+                    min_duration = settings.get("min_duration", 10.0)
+                    logger.info(f"📏 Comparing duration {speech_duration:.1f}s vs threshold {min_duration:.1f}s")
+                    if speech_duration < min_duration:
+                        return {
+                            "has_speech": False,
+                            "reason": f"Speech too short ({speech_duration:.1f}s < {min_duration}s)",
+                            "word_count": len(valid_words),
+                            "duration": speech_duration,
+                        }
+
+                    return {
+                        "has_speech": True,
+                        "word_count": len(valid_words),
+                        "speech_start": speech_start,
+                        "speech_end": speech_end,
+                        "duration": speech_duration,
+                        "reason": f"Valid speech detected ({len(valid_words)} words, {speech_duration:.1f}s)",
+                    }
 
     # Method 2: Text-only fallback (when no word-level data available)
     text = transcript_data.get("text", "").strip()
@@ -423,23 +438,46 @@ async def track_speech_activity(
     speech_analysis: Dict[str, Any], last_word_count: int, conversation_id: str, redis_client
 ) -> tuple[float, int]:
     """
-    Track new speech activity and update last speech timestamp.
+    Track new speech activity and update last speech timestamp using audio timestamps.
 
-    Uses word count instead of chunk count to avoid false positives from noise/silence.
+    Uses word count to detect new speech, and audio timestamps (speech_end) to track
+    when the last speech occurred in the audio stream (not wall-clock time).
 
     Args:
-        speech_analysis: Speech analysis results from analyze_speech()
+        speech_analysis: Speech analysis results from analyze_speech() with:
+            - word_count: Number of words detected
+            - speech_end: Audio timestamp of last word (if available)
+            - fallback: True if using text-only analysis without timing
         last_word_count: Previous word count
         conversation_id: Conversation ID for Redis key
         redis_client: Redis client instance
 
     Returns:
         Tuple of (last_meaningful_speech_time, new_word_count)
+        Note: last_meaningful_speech_time is audio timestamp, NOT wall-clock time
     """
     current_word_count = speech_analysis.get("word_count", 0)
 
     if current_word_count > last_word_count:
-        last_meaningful_speech_time = time.time()
+        # Use audio timestamp (speech_end) when available
+        speech_end = speech_analysis.get("speech_end")
+        is_fallback = speech_analysis.get("fallback", False)
+
+        if speech_end is not None and speech_end > 0:
+            # Preferred: Use audio timestamp from word-level timing
+            last_meaningful_speech_time = speech_end
+            logger.debug(
+                f"🗣️ New speech detected (word count: {current_word_count}), "
+                f"audio timestamp: {speech_end:.2f}s"
+            )
+        else:
+            # Fallback: Use wall-clock time when word-level timing unavailable
+            # This happens with text-only transcription or missing timing data
+            last_meaningful_speech_time = time.time()
+            logger.warning(
+                f"⚠️ Using wall-clock time for speech tracking (no audio timestamps available). "
+                f"Word count: {current_word_count}, fallback={is_fallback}"
+            )
 
         # Store timestamp in Redis for visibility/debugging
         await redis_client.set(
@@ -447,9 +485,6 @@ async def track_speech_activity(
             last_meaningful_speech_time,
             ex=86400,  # 24 hour TTL
         )
-        logger.debug(
-            f"🗣️ New speech detected (word count: {current_word_count}), updated last_speech timestamp"
-        )
 
         return last_meaningful_speech_time, current_word_count
 
@@ -490,10 +525,18 @@ async def update_job_progress_metadata(
     if "created_at" not in current_job.meta:
         current_job.meta["created_at"] = datetime.now().isoformat()
 
+    # Calculate inactivity based on audio-relative timestamps
+    # Both current_audio_time and last_meaningful_speech_time are seconds into the audio stream
+    current_audio_time = speech_analysis.get("speech_end", 0.0)
+    inactivity_seconds = (
+        current_audio_time - last_meaningful_speech_time
+        if current_audio_time > 0 and last_meaningful_speech_time > 0
+        else 0
+    )
+
     current_job.meta.update(
         {
             "conversation_id": conversation_id,
-            "audio_uuid": session_id,  # Link to session for job grouping
             "client_id": client_id,  # Ensure client_id is always present
             "transcript": (
                 combined["text"][:500] + "..." if len(combined["text"]) > 500 else combined["text"]
@@ -504,57 +547,13 @@ async def update_job_progress_metadata(
             "duration_seconds": speech_analysis.get("duration", 0),
             "has_speech": speech_analysis.get("has_speech", False),
             "last_update": datetime.now().isoformat(),
-            "inactivity_seconds": time.time() - last_meaningful_speech_time,
+            "inactivity_seconds": inactivity_seconds,
             "chunks_processed": combined["chunk_count"],
         }
     )
     current_job.save_meta()
 
 
-async def wait_for_audio_file(
-    conversation_id: str, redis_client, max_wait_seconds: int = 30
-) -> Optional[str]:
-    """
-    Wait for audio persistence job to write audio file path to Redis.
-
-    Polls Redis for audio file path with configurable timeout.
-
-    Args:
-        conversation_id: Conversation ID
-        redis_client: Redis client instance
-        max_wait_seconds: Maximum wait time in seconds (default: 30)
-
-    Returns:
-        Audio file path (str) if ready, None if timeout
-    """
-    audio_file_key = f"audio:file:{conversation_id}"
-    wait_start = time.time()
-
-    while time.time() - wait_start < max_wait_seconds:
-        file_path_bytes = await redis_client.get(audio_file_key)
-        if file_path_bytes:
-            wait_duration = time.time() - wait_start
-            logger.info(f"✅ Audio file ready after {wait_duration:.1f}s")
-            return file_path_bytes.decode()
-
-        # Log progress every 5 seconds
-        elapsed = time.time() - wait_start
-        if elapsed % 5 == 0:
-            logger.info(
-                f"⏳ Waiting for audio file (conversation {conversation_id[:12]})... ({elapsed:.0f}s elapsed)"
-            )
-
-        await asyncio.sleep(0.5)  # Check every 500ms
-
-    logger.error(
-        f"❌ Audio file path not found in Redis after {max_wait_seconds}s (key: {audio_file_key})"
-    )
-    logger.warning(
-        "⚠️ Audio persistence job may not have rotated file yet - cannot enqueue batch transcription"
-    )
-    return None
-
-
 async def mark_conversation_deleted(conversation_id: str, deletion_reason: str) -> None:
     """
     Mark a conversation as deleted with a specific reason.
diff --git a/backends/advanced/src/advanced_omi_backend/utils/gdrive_audio_utils.py b/backends/advanced/src/advanced_omi_backend/utils/gdrive_audio_utils.py
index 46b0806d..785091e1 100644
--- a/backends/advanced/src/advanced_omi_backend/utils/gdrive_audio_utils.py
+++ b/backends/advanced/src/advanced_omi_backend/utils/gdrive_audio_utils.py
@@ -5,7 +5,7 @@
 from starlette.datastructures import UploadFile as StarletteUploadFile
 from googleapiclient.http import MediaIoBaseDownload
 from advanced_omi_backend.clients.gdrive_audio_client import get_google_drive_client
-from advanced_omi_backend.models.audio_file import AudioFile
+from advanced_omi_backend.models.conversation import Conversation
 from advanced_omi_backend.utils.audio_utils import AudioValidationError
 
 
@@ -56,7 +56,7 @@ def wrapped_close():
 # -------------------------------------------------------------
 # LIST + DOWNLOAD FILES IN FOLDER (OAUTH)
 # -------------------------------------------------------------
-async def download_audio_files_from_drive(folder_id: str) -> List[StarletteUploadFile]:
+async def download_audio_files_from_drive(folder_id: str, user_id: str) -> List[StarletteUploadFile]:
     if not folder_id:
         raise AudioValidationError("Google Drive folder ID is required.")
 
@@ -88,12 +88,13 @@ async def download_audio_files_from_drive(folder_id: str) -> List[StarletteUploa
         
         for item in audio_files_metadata:
             file_id = item["id"] # Get the Google Drive File ID
-            
-            #  Check if the file is already processed
-            existing = await AudioFile.find_one({
-                "audio_uuid": file_id,
-                "source": "gdrive"
-            })
+
+            # Check if the file is already processed (check Conversation by external_source_id and user_id)
+            existing = await Conversation.find_one(
+                Conversation.external_source_id == file_id,
+                Conversation.external_source_type == "gdrive",
+                Conversation.user_id == user_id
+            )
 
             if existing:
                 audio_logger.info(f"Skipping already processed file: {item['name']}")
@@ -102,8 +103,8 @@ async def download_audio_files_from_drive(folder_id: str) -> List[StarletteUploa
 
             # synchronous call now (but make the parent function async)
             wrapped_file = await download_and_wrap_drive_file(service, item)
-            #  Attach the file_id to the UploadFile object for later use
-            wrapped_file.audio_uuid = file_id
+            #  Attach the file_id to the UploadFile object for later use (for external_source_id)
+            wrapped_file.file_id = file_id
             wrapped_files.append(wrapped_file)
             
         if not wrapped_files and skipped_count > 0:
diff --git a/backends/advanced/src/advanced_omi_backend/utils/job_utils.py b/backends/advanced/src/advanced_omi_backend/utils/job_utils.py
index 6200af82..c9028909 100644
--- a/backends/advanced/src/advanced_omi_backend/utils/job_utils.py
+++ b/backends/advanced/src/advanced_omi_backend/utils/job_utils.py
@@ -10,7 +10,7 @@
 logger = logging.getLogger(__name__)
 
 
-async def check_job_alive(redis_client, current_job) -> bool:
+async def check_job_alive(redis_client, current_job, session_id: Optional[str] = None) -> bool:
     """
     Check if current RQ job still exists in Redis.
 
@@ -20,6 +20,7 @@ async def check_job_alive(redis_client, current_job) -> bool:
     Args:
         redis_client: Async Redis client
         current_job: RQ job instance from get_current_job()
+        session_id: Optional session ID to check if session has ended naturally
 
     Returns:
         False if job is zombie (caller should exit), True otherwise
@@ -32,13 +33,23 @@ async def check_job_alive(redis_client, current_job) -> bool:
 
         while True:
             # Check for zombie state each iteration
-            if not await check_job_alive(redis_client, current_job):
+            if not await check_job_alive(redis_client, current_job, session_id):
                 break
             # ... do work ...
     """
     if current_job:
         job_exists = await redis_client.exists(f"rq:job:{current_job.id}")
         if not job_exists:
-            logger.error(f"🧟 Zombie job detected - job {current_job.id} deleted from Redis, exiting")
+            # Check if this is a natural exit (session ended) vs true zombie
+            if session_id:
+                session_key = f"audio:session:{session_id}"
+                session_status = await redis_client.hget(session_key, "status")
+                if session_status and session_status.decode() in ["finalizing", "finished"]:
+                    # Session ended naturally - not a zombie, just natural cleanup
+                    logger.debug(f"📋 Job {current_job.id} ending naturally (session closed)")
+                    return False
+
+            # True zombie - job deleted while session still active
+            logger.error(f"🧟 Zombie job detected - job {current_job.id} deleted from Redis while session still active, exiting")
             return False
     return True
diff --git a/backends/advanced/src/advanced_omi_backend/utils/logging_utils.py b/backends/advanced/src/advanced_omi_backend/utils/logging_utils.py
new file mode 100644
index 00000000..3d15f49c
--- /dev/null
+++ b/backends/advanced/src/advanced_omi_backend/utils/logging_utils.py
@@ -0,0 +1,262 @@
+"""
+Logging utilities for secure logging with secret masking.
+
+Provides functions to mask sensitive information in logs to prevent
+accidental exposure of credentials, tokens, and other secrets.
+"""
+import re
+from typing import Any, Dict, List, Optional, Set, Union
+
+
+# Common patterns for identifying secret field names
+SECRET_KEYWORDS = [
+    'PASSWORD', 'PASSWD', 'PWD',
+    'TOKEN', 'AUTH', 'AUTHORIZATION',
+    'KEY', 'APIKEY', 'API_KEY', 'SECRET',
+    'CREDENTIAL', 'CRED',
+    'PRIVATE', 'CERTIFICATE', 'CERT'
+]
+
+# Default mask for secrets
+SECRET_MASK = '••••••••'
+
+
+def is_secret_field(field_name: str, additional_keywords: Optional[List[str]] = None) -> bool:
+    """
+    Check if a field name indicates a secret value.
+    
+    Args:
+        field_name: The field/key name to check
+        additional_keywords: Optional additional keywords to check for
+        
+    Returns:
+        True if field name matches secret patterns
+        
+    Examples:
+        >>> is_secret_field('smtp_password')
+        True
+        >>> is_secret_field('smtp_host')
+        False
+        >>> is_secret_field('api_token')
+        True
+    """
+    field_upper = field_name.upper()
+    
+    # Check default keywords
+    for keyword in SECRET_KEYWORDS:
+        if keyword in field_upper:
+            return True
+    
+    # Check additional keywords if provided
+    if additional_keywords:
+        for keyword in additional_keywords:
+            if keyword.upper() in field_upper:
+                return True
+    
+    return False
+
+
+def mask_dict(
+    data: Dict[str, Any],
+    mask: str = SECRET_MASK,
+    secret_fields: Optional[Set[str]] = None,
+    additional_keywords: Optional[List[str]] = None
+) -> Dict[str, Any]:
+    """
+    Mask secret values in a dictionary for safe logging.
+    
+    Args:
+        data: Dictionary to mask
+        mask: String to use for masked values (default: '••••••••')
+        secret_fields: Explicit set of field names to mask (case-insensitive)
+        additional_keywords: Additional keywords to identify secret fields
+        
+    Returns:
+        New dictionary with secrets masked
+        
+    Examples:
+        >>> config = {'smtp_host': 'smtp.gmail.com', 'smtp_password': 'secret123'}
+        >>> mask_dict(config)
+        {'smtp_host': 'smtp.gmail.com', 'smtp_password': '••••••••'}
+        
+        >>> mask_dict({'token': 'abc123'}, secret_fields={'token'})
+        {'token': '••••••••'}
+    """
+    masked = {}
+    secret_fields_lower = {f.lower() for f in (secret_fields or set())}
+    
+    for key, value in data.items():
+        # Check if this is a secret field
+        is_secret = (
+            key.lower() in secret_fields_lower or
+            is_secret_field(key, additional_keywords)
+        )
+        
+        if is_secret and value:
+            # Mask non-empty secret values
+            masked[key] = mask
+        elif isinstance(value, dict):
+            # Recursively mask nested dictionaries
+            masked[key] = mask_dict(value, mask, secret_fields, additional_keywords)
+        elif isinstance(value, list):
+            # Handle lists of dictionaries
+            masked[key] = [
+                mask_dict(item, mask, secret_fields, additional_keywords)
+                if isinstance(item, dict) else item
+                for item in value
+            ]
+        else:
+            # Keep non-secret values as-is
+            masked[key] = value
+    
+    return masked
+
+
+def mask_string(
+    text: str,
+    patterns: Optional[List[str]] = None,
+    mask: str = SECRET_MASK
+) -> str:
+    """
+    Mask sensitive patterns in strings (e.g., tokens in error messages).
+    
+    Args:
+        text: String to mask
+        patterns: List of regex patterns to match and mask
+        mask: String to use for masked values
+        
+    Returns:
+        String with matched patterns masked
+        
+    Examples:
+        >>> mask_string('Token: abc123def456', patterns=[r'Token: \w+'])
+        'Token: ••••••••'
+        
+        >>> mask_string('password=secret123', patterns=[r'password=\S+'])
+        'password=••••••••'
+    """
+    if not patterns:
+        # Default patterns for common secret formats
+        patterns = [
+            r'password[=:]\s*\S+',
+            r'token[=:]\s*\S+',
+            r'key[=:]\s*\S+',
+            r'secret[=:]\s*\S+',
+            r'api[_-]?key[=:]\s*\S+',
+        ]
+    
+    masked_text = text
+    for pattern in patterns:
+        # Replace the value part after the = or : with mask
+        masked_text = re.sub(
+            pattern,
+            lambda m: re.sub(r'([=:])\s*\S+', r'\1' + mask, m.group(0)),
+            masked_text,
+            flags=re.IGNORECASE
+        )
+    
+    return masked_text
+
+
+def safe_log_config(
+    config: Dict[str, Any],
+    name: str = "Configuration",
+    mask: str = SECRET_MASK,
+    secret_fields: Optional[Set[str]] = None,
+    additional_keywords: Optional[List[str]] = None
+) -> str:
+    """
+    Create a safe log message for configuration with masked secrets.
+    
+    Args:
+        config: Configuration dictionary
+        name: Name for the configuration (e.g., "SMTP Config")
+        mask: String to use for masked values
+        secret_fields: Explicit set of field names to mask
+        additional_keywords: Additional keywords to identify secret fields
+        
+    Returns:
+        Formatted string safe for logging
+        
+    Examples:
+        >>> config = {'host': 'smtp.gmail.com', 'password': 'secret', 'port': 587}
+        >>> safe_log_config(config, "SMTP")
+        "SMTP: {'host': 'smtp.gmail.com', 'password': '••••••••', 'port': 587}"
+    """
+    masked = mask_dict(config, mask, secret_fields, additional_keywords)
+    return f"{name}: {masked}"
+
+
+def mask_connection_string(connection_string: str, mask: str = SECRET_MASK) -> str:
+    """
+    Mask credentials in connection strings (URLs, DSNs).
+    
+    Args:
+        connection_string: Connection string that may contain credentials
+        mask: String to use for masked values
+        
+    Returns:
+        Connection string with credentials masked
+        
+    Examples:
+        >>> mask_connection_string('mongodb://user:pass123@localhost:27017/db')
+        'mongodb://user:••••••••@localhost:27017/db'
+        
+        >>> mask_connection_string('postgresql://admin:secret@db.example.com/mydb')
+        'postgresql://admin:••••••••@db.example.com/mydb'
+    """
+    # Pattern: protocol://username:password@host
+    return re.sub(
+        r'([a-zA-Z][a-zA-Z0-9+.-]*://[^:]+:)[^@]+(@)',
+        r'\1' + mask + r'\2',
+        connection_string
+    )
+
+
+def create_masked_repr(
+    obj: Any,
+    secret_attrs: Set[str],
+    mask: str = SECRET_MASK
+) -> str:
+    """
+    Create a string representation of an object with masked secret attributes.
+    
+    Useful for __repr__ methods in classes that contain secrets.
+    
+    Args:
+        obj: Object to represent
+        secret_attrs: Set of attribute names that are secrets
+        mask: String to use for masked values
+        
+    Returns:
+        String representation with secrets masked
+        
+    Examples:
+        >>> class Config:
+        ...     def __init__(self):
+        ...         self.host = 'smtp.gmail.com'
+        ...         self.password = 'secret123'
+        >>> 
+        >>> config = Config()
+        >>> create_masked_repr(config, {'password'})
+        "Config(host='smtp.gmail.com', password='••••••••')"
+    """
+    class_name = obj.__class__.__name__
+    attrs = []
+    
+    for key in dir(obj):
+        # Skip private/magic attributes and methods
+        if key.startswith('_') or callable(getattr(obj, key)):
+            continue
+        
+        value = getattr(obj, key)
+        
+        # Mask secret attributes
+        if key in secret_attrs:
+            value_repr = f"'{mask}'"
+        else:
+            value_repr = repr(value)
+        
+        attrs.append(f"{key}={value_repr}")
+    
+    return f"{class_name}({', '.join(attrs)})"
diff --git a/backends/advanced/src/advanced_omi_backend/workers/__init__.py b/backends/advanced/src/advanced_omi_backend/workers/__init__.py
index fb32797d..ea82056b 100644
--- a/backends/advanced/src/advanced_omi_backend/workers/__init__.py
+++ b/backends/advanced/src/advanced_omi_backend/workers/__init__.py
@@ -6,7 +6,7 @@
 - speaker_jobs: Speaker recognition and identification
 - conversation_jobs: Conversation management and updates
 - memory_jobs: Memory extraction and processing
-- audio_jobs: Audio file processing and cropping
+- audio_jobs: Audio file processing
 
 Queue configuration and utilities are in controllers/queue_controller.py
 """
@@ -36,9 +36,7 @@
 
 # Import from audio_jobs
 from .audio_jobs import (
-    process_cropping_job,
     audio_streaming_persistence_job,
-    enqueue_cropping,
 )
 
 # Import from queue_controller
@@ -78,10 +76,6 @@
     "process_memory_job",
     "enqueue_memory_processing",
 
-    # Audio jobs
-    "process_cropping_job",
-    "enqueue_cropping",
-
     # Queue utils
     "get_queue",
     "get_job_stats",
diff --git a/backends/advanced/src/advanced_omi_backend/workers/annotation_jobs.py b/backends/advanced/src/advanced_omi_backend/workers/annotation_jobs.py
new file mode 100644
index 00000000..8ad1a8ba
--- /dev/null
+++ b/backends/advanced/src/advanced_omi_backend/workers/annotation_jobs.py
@@ -0,0 +1,249 @@
+"""
+Background jobs for annotation-based AI suggestions.
+
+These jobs run periodically via the cron scheduler to:
+1. Surface potential errors in transcripts and memories for user review
+2. Fine-tune error detection models using accepted/rejected annotations
+
+TODO: Implement actual LLM-based error detection and model training logic.
+"""
+
+import logging
+from datetime import datetime, timezone, timedelta
+from typing import List
+
+from advanced_omi_backend.models.annotation import (
+    Annotation,
+    AnnotationSource,
+    AnnotationStatus,
+    AnnotationType,
+)
+from advanced_omi_backend.models.conversation import Conversation
+from advanced_omi_backend.models.user import User
+
+logger = logging.getLogger(__name__)
+
+
+async def surface_error_suggestions():
+    """
+    Generate AI suggestions for potential transcript/memory errors.
+    Runs daily, creates PENDING annotations for user review.
+
+    This is a PLACEHOLDER implementation. To fully implement:
+    1. Query recent transcripts and memories (last N days)
+    2. Use LLM to analyze content for potential errors:
+       - Hallucinations (made-up facts)
+       - Misheard words (audio transcription errors)
+       - Grammar/spelling issues
+       - Inconsistencies with other memories
+    3. For each potential error:
+       - Create PENDING annotation with MODEL_SUGGESTION source
+       - Store original_text and suggested corrected_text
+    4. Users can review suggestions in UI (accept/reject)
+    5. Accepted suggestions improve future model accuracy
+
+    TODO: Implement LLM-based error detection logic.
+    """
+    logger.info("📝 Checking for annotation suggestions (placeholder)...")
+
+    try:
+        # Get all users
+        users = await User.find_all().to_list()
+        logger.info(f"   Found {len(users)} users to analyze")
+
+        for user in users:
+            # TODO: Query recent conversations for this user (last 7 days)
+            # recent_conversations = await Conversation.find(
+            #     Conversation.user_id == str(user.id),
+            #     Conversation.created_at >= datetime.now(timezone.utc) - timedelta(days=7)
+            # ).to_list()
+
+            # TODO: For each conversation, analyze transcripts
+            # for conversation in recent_conversations:
+            #     active_transcript = conversation.get_active_transcript()
+            #     if not active_transcript:
+            #         continue
+            #
+            #     # TODO: Use LLM to identify potential errors
+            #     # suggestions = await llm_provider.analyze_transcript_for_errors(
+            #     #     segments=active_transcript.segments,
+            #     #     context=conversation.summary
+            #     # )
+            #
+            #     # TODO: Create PENDING annotations for each suggestion
+            #     # for suggestion in suggestions:
+            #     #     annotation = Annotation(
+            #     #         annotation_type=AnnotationType.TRANSCRIPT,
+            #     #         user_id=str(user.id),
+            #     #         conversation_id=conversation.conversation_id,
+            #     #         segment_index=suggestion.segment_index,
+            #     #         original_text=suggestion.original_text,
+            #     #         corrected_text=suggestion.suggested_text,
+            #     #         source=AnnotationSource.MODEL_SUGGESTION,
+            #     #         status=AnnotationStatus.PENDING
+            #     #     )
+            #     #     await annotation.save()
+
+            # TODO: Query recent memories for this user
+            # recent_memories = await memory_service.get_recent_memories(
+            #     user_id=str(user.id),
+            #     days=7
+            # )
+
+            # TODO: Use LLM to identify potential errors in memories
+            # for memory in recent_memories:
+            #     # TODO: Analyze memory content for hallucinations/errors
+            #     # suggestions = await llm_provider.analyze_memory_for_errors(
+            #     #     content=memory.content,
+            #     #     metadata=memory.metadata
+            #     # )
+            #
+            #     # TODO: Create PENDING annotations
+            #     # ...
+
+            # Placeholder logging
+            logger.debug(f"   Analyzed user {user.id} (placeholder)")
+
+        logger.info("✅ Suggestion check complete (placeholder implementation)")
+        logger.info(
+            "   ℹ️  TODO: Implement LLM-based error detection to create actual suggestions"
+        )
+
+    except Exception as e:
+        logger.error(f"❌ Error in surface_error_suggestions: {e}", exc_info=True)
+        raise
+
+
+async def finetune_hallucination_model():
+    """
+    Fine-tune error detection model using accepted/rejected annotations.
+    Runs weekly, improves suggestion accuracy over time.
+
+    This is a PLACEHOLDER implementation. To fully implement:
+    1. Fetch all accepted annotations (ground truth corrections)
+       - These show real errors that users confirmed
+    2. Fetch all rejected annotations (false positives)
+       - These show suggestions users disagreed with
+    3. Build training dataset:
+       - Positive examples: accepted annotations (real errors)
+       - Negative examples: rejected annotations (false alarms)
+    4. Fine-tune LLM or update prompt engineering:
+       - Use accepted examples as few-shot learning
+       - Adjust model to reduce false positives
+    5. Log metrics:
+       - Acceptance rate, rejection rate
+       - Most common error types
+       - Model accuracy improvement
+
+    TODO: Implement model training logic.
+    """
+    logger.info("🎓 Checking for model training opportunities (placeholder)...")
+
+    try:
+        # Fetch annotation statistics
+        total_annotations = await Annotation.find().count()
+        accepted_count = await Annotation.find(
+            Annotation.status == AnnotationStatus.ACCEPTED,
+            Annotation.source == AnnotationSource.MODEL_SUGGESTION,
+        ).count()
+        rejected_count = await Annotation.find(
+            Annotation.status == AnnotationStatus.REJECTED,
+            Annotation.source == AnnotationSource.MODEL_SUGGESTION,
+        ).count()
+
+        logger.info(f"   Total annotations: {total_annotations}")
+        logger.info(f"   Accepted suggestions: {accepted_count}")
+        logger.info(f"   Rejected suggestions: {rejected_count}")
+
+        if accepted_count + rejected_count == 0:
+            logger.info("   ℹ️  No user feedback yet, skipping training")
+            return
+
+        # TODO: Fetch accepted annotations (ground truth)
+        # accepted_annotations = await Annotation.find(
+        #     Annotation.status == AnnotationStatus.ACCEPTED,
+        #     Annotation.source == AnnotationSource.MODEL_SUGGESTION
+        # ).to_list()
+
+        # TODO: Fetch rejected annotations (false positives)
+        # rejected_annotations = await Annotation.find(
+        #     Annotation.status == AnnotationStatus.REJECTED,
+        #     Annotation.source == AnnotationSource.MODEL_SUGGESTION
+        # ).to_list()
+
+        # TODO: Build training dataset
+        # training_data = []
+        # for annotation in accepted_annotations:
+        #     training_data.append({
+        #         "input": annotation.original_text,
+        #         "output": annotation.corrected_text,
+        #         "label": "error"
+        #     })
+        #
+        # for annotation in rejected_annotations:
+        #     training_data.append({
+        #         "input": annotation.original_text,
+        #         "output": annotation.original_text,  # No change needed
+        #         "label": "correct"
+        #     })
+
+        # TODO: Fine-tune model or update prompt examples
+        # if len(training_data) >= MIN_TRAINING_SAMPLES:
+        #     await llm_provider.fine_tune_error_detection(
+        #         training_data=training_data,
+        #         validation_split=0.2
+        #     )
+        #     logger.info("✅ Model fine-tuning complete")
+        # else:
+        #     logger.info(f"   ℹ️  Not enough samples for training (need {MIN_TRAINING_SAMPLES})")
+
+        # Calculate acceptance rate
+        if accepted_count + rejected_count > 0:
+            acceptance_rate = (
+                accepted_count / (accepted_count + rejected_count)
+            ) * 100
+            logger.info(f"   Suggestion acceptance rate: {acceptance_rate:.1f}%")
+
+        logger.info("✅ Training check complete (placeholder implementation)")
+        logger.info(
+            "   ℹ️  TODO: Implement model fine-tuning using user feedback data"
+        )
+
+    except Exception as e:
+        logger.error(f"❌ Error in finetune_hallucination_model: {e}", exc_info=True)
+        raise
+
+
+# Additional helper functions for future implementation
+
+async def analyze_common_error_patterns() -> List[dict]:
+    """
+    Analyze accepted annotations to identify common error patterns.
+    Returns list of patterns for prompt engineering or model training.
+
+    TODO: Implement pattern analysis.
+    """
+    # TODO: Group annotations by error type
+    # TODO: Find frequent patterns (e.g., "their" → "there")
+    # TODO: Return structured patterns for model improvement
+    return []
+
+
+async def calculate_suggestion_metrics() -> dict:
+    """
+    Calculate metrics about suggestion quality and user engagement.
+
+    Returns:
+        dict: Metrics including acceptance rate, response time, etc.
+
+    TODO: Implement metrics calculation.
+    """
+    # TODO: Calculate acceptance/rejection rates
+    # TODO: Measure time to user response
+    # TODO: Identify high-confidence vs low-confidence suggestions
+    # TODO: Track improvement over time
+    return {
+        "total_suggestions": 0,
+        "acceptance_rate": 0.0,
+        "avg_response_time_hours": 0.0,
+    }
diff --git a/backends/advanced/src/advanced_omi_backend/workers/audio_jobs.py b/backends/advanced/src/advanced_omi_backend/workers/audio_jobs.py
index 56df7149..de563069 100644
--- a/backends/advanced/src/advanced_omi_backend/workers/audio_jobs.py
+++ b/backends/advanced/src/advanced_omi_backend/workers/audio_jobs.py
@@ -21,170 +21,6 @@
 logger = logging.getLogger(__name__)
 
 
-@async_job(redis=True, beanie=True)
-async def process_cropping_job(
-    conversation_id: str,
-    audio_path: str,
-    *,
-    redis_client=None
-) -> Dict[str, Any]:
-    """
-    RQ job function for audio cropping - removes silent segments from audio.
-
-    This job:
-    1. Reads transcript segments from conversation
-    2. Extracts speech timestamps
-    3. Creates cropped audio file with only speech segments
-    4. Updates conversation with cropped file path
-
-    Args:
-        conversation_id: Conversation ID
-        audio_path: Path to original audio file
-        redis_client: Redis client (injected by decorator)
-
-    Returns:
-        Dict with processing results
-    """
-    from pathlib import Path
-    from advanced_omi_backend.utils.audio_utils import _process_audio_cropping_with_relative_timestamps
-    from advanced_omi_backend.models.conversation import Conversation
-    from advanced_omi_backend.config import CHUNK_DIR
-
-    try:
-        logger.info(f"🔄 RQ: Starting audio cropping for conversation {conversation_id}")
-
-        # Get conversation to access segments
-        conversation = await Conversation.find_one(Conversation.conversation_id == conversation_id)
-        if not conversation:
-            raise ValueError(f"Conversation {conversation_id} not found")
-
-        # Extract speech segments from transcript (property returns data from active version)
-        segments = conversation.segments
-        if not segments or len(segments) == 0:
-            logger.warning(f"⚠️ No segments found for conversation {conversation_id}, skipping cropping")
-            return {
-                "success": False,
-                "conversation_id": conversation_id,
-                "reason": "no_segments"
-            }
-
-        # Convert segments to (start, end) tuples
-        speech_segments = [(seg.start, seg.end) for seg in segments]
-        logger.info(f"Found {len(speech_segments)} speech segments for cropping")
-
-        # Generate output path for cropped audio
-        audio_uuid = conversation.audio_uuid
-
-        # Build full path from conversation.audio_path (which may include folder prefix)
-        # conversation.audio_path is like "fixtures/filename.wav" or just "filename.wav"
-        full_audio_path = CHUNK_DIR / conversation.audio_path
-        original_path = Path(full_audio_path)
-        cropped_filename = f"cropped_{original_path.name}"
-
-        # If the conversation's audio_path contains a folder prefix, use the same folder for cropped audio
-        if conversation.audio_path and "/" in conversation.audio_path:
-            folder = conversation.audio_path.split("/")[0]
-            output_dir = CHUNK_DIR / folder
-            output_dir.mkdir(parents=True, exist_ok=True)
-            output_path = output_dir / cropped_filename
-            cropped_path_for_db = f"{folder}/{cropped_filename}"
-        else:
-            output_path = CHUNK_DIR / cropped_filename
-            cropped_path_for_db = cropped_filename
-
-        # Process cropping (no repository needed - we update conversation directly)
-        success, segment_mapping = await _process_audio_cropping_with_relative_timestamps(
-            str(original_path),
-            speech_segments,
-            str(output_path),
-            audio_uuid,
-            None  # No repository - we update conversation model directly
-        )
-
-        if not success:
-            logger.error(f"❌ RQ: Audio cropping failed for conversation {conversation_id}")
-            return {
-                "success": False,
-                "conversation_id": conversation_id,
-                "reason": "cropping_failed"
-            }
-
-        # Calculate actual cropped duration from kept segments
-        kept_segments = [m for m in segment_mapping if m["kept"]]
-        if kept_segments:
-            # Duration is end of last kept segment
-            cropped_duration_seconds = kept_segments[-1]["cropped_end"]
-        else:
-            cropped_duration_seconds = 0.0
-
-        # Update segment timestamps using the mapping
-        # Only keep segments that weren't filtered out
-        updated_segments = []
-        for i, seg in enumerate(segments):
-            if i >= len(segment_mapping):
-                logger.warning(f"⚠️ Segment {i} not in mapping, skipping")
-                continue
-
-            mapping = segment_mapping[i]
-            if mapping["kept"]:
-                # Segment was kept - use the cropped timestamps
-                updated_seg = seg.model_copy()
-                updated_seg.start = mapping["cropped_start"]
-                updated_seg.end = mapping["cropped_end"]
-                updated_segments.append(updated_seg)
-                logger.debug(
-                    f"Segment {i}: {seg.start:.2f}-{seg.end:.2f}s → "
-                    f"{updated_seg.start:.2f}-{updated_seg.end:.2f}s (in cropped audio)"
-                )
-            else:
-                # Segment was filtered out (too short)
-                logger.debug(
-                    f"Segment {i} filtered out (duration {seg.end - seg.start:.2f}s < MIN_SPEECH_SEGMENT_DURATION)"
-                )
-
-        # Update conversation with cropped audio path and adjusted segments
-        conversation.cropped_audio_path = cropped_path_for_db
-
-        # Update the active transcript version segments
-        # Find and update the version directly in the list to ensure Beanie detects the change
-        if conversation.active_transcript_version:
-            for i, version in enumerate(conversation.transcript_versions):
-                if version.version_id == conversation.active_transcript_version:
-                    conversation.transcript_versions[i].segments = updated_segments
-                    logger.info(f"📝 Updated segments in transcript version {version.version_id[:12]}")
-                    break
-
-        await conversation.save()
-        logger.info(f"💾 Updated conversation {conversation_id[:12]} with cropped_audio_path and adjusted {len(updated_segments)} segment timestamps")
-
-        logger.info(f"✅ RQ: Completed audio cropping for conversation {conversation_id} ({cropped_duration_seconds:.1f}s)")
-
-        # Update job metadata with cropped duration
-        from rq import get_current_job
-        current_job = get_current_job()
-        if current_job:
-            if not current_job.meta:
-                current_job.meta = {}
-            current_job.meta['cropped_duration_seconds'] = round(cropped_duration_seconds, 1)
-            current_job.meta['segments_cropped'] = len(speech_segments)
-            current_job.save_meta()
-
-        return {
-            "success": True,
-            "conversation_id": conversation_id,
-            "audio_uuid": audio_uuid,
-            "original_path": str(original_path),
-            "cropped_path": str(output_path),
-            "cropped_filename": cropped_filename,
-            "segments_count": len(speech_segments),
-            "cropped_duration_seconds": cropped_duration_seconds
-        }
-
-    except Exception as e:
-        logger.error(f"❌ RQ: Audio cropping failed for conversation {conversation_id}: {e}")
-        raise
-
-
 @async_job(redis=True, beanie=True)
 async def audio_streaming_persistence_job(
     session_id: str,
@@ -194,10 +30,10 @@ async def audio_streaming_persistence_job(
     redis_client=None
 ) -> Dict[str, Any]:
     """
-    Long-running RQ job that progressively writes audio chunks to disk as they arrive.
+    Long-running RQ job that stores audio chunks in MongoDB with Opus compression.
 
-    Opens a WAV file immediately and appends chunks in real-time, making the file
-    available for playback in the UI before the session completes.
+    Buffers incoming PCM audio from Redis Stream into 10-second chunks, encodes
+    them to Opus format, and stores in MongoDB audio_chunks collection.
 
     Runs in parallel with transcription processing to reduce memory pressure.
 
@@ -208,11 +44,19 @@ async def audio_streaming_persistence_job(
         redis_client: Redis client (injected by decorator)
 
     Returns:
-        Dict with audio_file_path, chunk_count, total_bytes, duration_seconds
+        Dict with chunk_count, total_bytes, compressed_bytes, duration_seconds
 
-    Note: user_email is fetched from the database when needed.
+    Note:
+        - Replaces disk-based WAV file storage with MongoDB chunk storage.
+        - Reads always_persist_enabled from global config to determine whether to
+          create placeholder conversations immediately.
     """
-    logger.info(f"🎵 Starting audio persistence for session {session_id}")
+    # Read always_persist setting from global config
+    from advanced_omi_backend.config import get_misc_settings
+    misc_settings = get_misc_settings()
+    always_persist = misc_settings.get('always_persist_enabled', False)
+
+    logger.info(f"🎵 Starting MongoDB audio persistence for session {session_id} (always_persist={always_persist})")
 
     # Setup audio persistence consumer group (separate from transcription consumer)
     audio_stream_name = f"audio:stream:{client_id}"
@@ -232,174 +76,305 @@ async def audio_streaming_persistence_job(
             logger.warning(f"Failed to create audio consumer group: {e}")
         logger.debug(f"Audio consumer group already exists for {audio_stream_name}")
 
+    # If always_persist enabled, create placeholder conversation if it doesn't exist
+    if always_persist:
+        conversation_key = f"conversation:current:{session_id}"
+        existing_conversation_id = await redis_client.get(conversation_key)
+
+        if not existing_conversation_id:
+            logger.info(
+                f"📝 always_persist=True - creating placeholder conversation for session {session_id[:12]}"
+            )
+
+            # Import conversation model
+            from advanced_omi_backend.models.conversation import Conversation
+
+            # Create placeholder conversation
+            conversation = Conversation(
+                user_id=user_id,
+                client_id=client_id,
+                title="Audio Recording (Processing...)",
+                summary="Transcription in progress...",
+                transcript_versions=[],
+                memory_versions=[],
+                processing_status="pending_transcription",
+                always_persist=True
+            )
+            await conversation.insert()
+
+            # Set conversation:current Redis key
+            await redis_client.set(
+                conversation_key,
+                conversation.conversation_id,
+                ex=3600  # 1 hour expiry
+            )
+
+            logger.info(
+                f"✅ Created placeholder conversation {conversation.conversation_id} "
+                f"and set Redis key {conversation_key}"
+            )
+        else:
+            logger.info(
+                f"📋 always_persist=True - placeholder conversation already exists: "
+                f"{existing_conversation_id.decode()}"
+            )
+    else:
+        logger.info(
+            f"🔍 always_persist=False - will wait for speech detection to create conversation"
+        )
+
     # Job control
     session_key = f"audio:session:{session_id}"
     max_runtime = 86340  # 24 hours - 60 seconds (graceful exit before RQ timeout)
     start_time = time.time()
 
-    from advanced_omi_backend.config import CHUNK_DIR
-    from easy_audio_interfaces.filesystem.filesystem_interfaces import LocalFileSink
-    from wyoming.audio import AudioChunk
-
-    # Ensure directory exists
-    CHUNK_DIR.mkdir(parents=True, exist_ok=True)
+    # Import MongoDB chunk utilities
+    from advanced_omi_backend.models.audio_chunk import AudioChunkDocument
+    from advanced_omi_backend.models.conversation import Conversation
+    from advanced_omi_backend.utils.audio_chunk_utils import encode_pcm_to_opus
+    from bson import Binary
 
-    # File rotation state
+    # Conversation rotation state
     current_conversation_id = None
-    file_sink = None
-    file_path = None
-    wav_filename = None
-    conversation_chunk_count = 0
     conversation_start_time = None
+    conversation_count = 0
 
-    # Audio collection stats (across all conversations in this session)
-    total_chunk_count = 0
-    total_bytes = 0
+    # PCM buffer for current 10-second chunk
+    pcm_buffer = bytearray()
+    chunk_index = 0  # Sequential chunk counter for current conversation
+    chunk_start_time = 0.0  # Start time of current buffered chunk
+
+    # Chunk configuration
+    SAMPLE_RATE = 16000
+    SAMPLE_WIDTH = 2  # 16-bit
+    CHANNELS = 1  # Mono
+    CHUNK_DURATION_SECONDS = 10.0
+    BYTES_PER_SECOND = SAMPLE_RATE * SAMPLE_WIDTH * CHANNELS  # 32,000 bytes/sec
+    CHUNK_SIZE_BYTES = int(CHUNK_DURATION_SECONDS * BYTES_PER_SECOND)  # 320,000 bytes
+
+    # Session stats (across all conversations)
+    total_pcm_bytes = 0
+    total_compressed_bytes = 0
+    total_mongo_chunks_written = 0
     end_signal_received = False
     consecutive_empty_reads = 0
-    max_empty_reads = 3  # Exit after 3 consecutive empty reads (deterministic check)
-    conversation_count = 0
+    max_empty_reads = 3
 
     # Get current job for zombie detection
     from rq import get_current_job
     from advanced_omi_backend.utils.job_utils import check_job_alive
     current_job = get_current_job()
 
+    async def flush_pcm_buffer():
+        """
+        Flush current PCM buffer to MongoDB as Opus-compressed chunk.
+
+        Updates conversation metadata with chunk count and compression stats.
+        """
+        nonlocal pcm_buffer, chunk_index, chunk_start_time
+        nonlocal total_pcm_bytes, total_compressed_bytes, total_mongo_chunks_written
+
+        if len(pcm_buffer) == 0 or not current_conversation_id:
+            return
+
+        try:
+            # Encode PCM → Opus
+            opus_data = await encode_pcm_to_opus(
+                pcm_data=bytes(pcm_buffer),
+                sample_rate=SAMPLE_RATE,
+                channels=CHANNELS,
+                bitrate=24  # 24kbps for speech
+            )
+
+            # Calculate chunk metadata
+            original_size = len(pcm_buffer)
+            compressed_size = len(opus_data)
+            duration = original_size / BYTES_PER_SECOND
+            end_time = chunk_start_time + duration
+
+            # Create MongoDB document
+            audio_chunk = AudioChunkDocument(
+                conversation_id=current_conversation_id,
+                chunk_index=chunk_index,
+                audio_data=Binary(opus_data),
+                original_size=original_size,
+                compressed_size=compressed_size,
+                start_time=chunk_start_time,
+                end_time=end_time,
+                duration=duration,
+                sample_rate=SAMPLE_RATE,
+                channels=CHANNELS,
+            )
+
+            # Save to MongoDB
+            await audio_chunk.insert()
+
+            # Update session stats
+            total_pcm_bytes += original_size
+            total_compressed_bytes += compressed_size
+            total_mongo_chunks_written += 1
+
+            # Update conversation metadata
+            conversation = await Conversation.find_one(
+                Conversation.conversation_id == current_conversation_id
+            )
+
+            if conversation:
+                # Calculate running totals
+                chunk_count = chunk_index + 1
+                total_duration = end_time
+                compression_ratio = compressed_size / original_size if original_size > 0 else 0.0
+
+                # Update conversation fields
+                conversation.audio_chunks_count = chunk_count
+                conversation.audio_total_duration = total_duration
+                conversation.audio_compression_ratio = compression_ratio
+                await conversation.save()
+
+            logger.debug(
+                f"💾 Saved chunk {chunk_index} for conversation {current_conversation_id[:12]}: "
+                f"{original_size} → {compressed_size} bytes ({compression_ratio:.3f} ratio), "
+                f"{duration:.1f}s duration"
+            )
+
+            # Log every 6 chunks (60 seconds) to avoid spam
+            if (chunk_index + 1) % 6 == 0:
+                logger.info(
+                    f"📦 Conversation {current_conversation_id[:12]}: "
+                    f"{chunk_index + 1} chunks, {total_duration:.1f}s total"
+                )
+
+        except Exception as e:
+            logger.error(f"❌ Failed to save audio chunk {chunk_index}: {e}", exc_info=True)
+
     while True:
         # Check if job still exists in Redis (detect zombie state)
-        if not await check_job_alive(redis_client, current_job):
-            if file_sink:
-                await file_sink.close()
+        if not await check_job_alive(redis_client, current_job, session_id):
+            # Flush remaining buffer before exit
+            if len(pcm_buffer) > 0:
+                await flush_pcm_buffer()
             break
 
         # Check timeout
         if time.time() - start_time > max_runtime:
             logger.warning(f"⏱️ Timeout reached for audio persistence {session_id}")
-            # Close current file if open
-            if file_sink:
-                await file_sink.close()
-                logger.info(f"✅ Closed file on timeout: {wav_filename}")
+            # Flush remaining buffer
+            if len(pcm_buffer) > 0:
+                await flush_pcm_buffer()
             break
 
-        # Check if session is finalizing (user stopped recording or WebSocket disconnected)
+        # Check if session is finalizing
         session_status = await redis_client.hget(session_key, "status")
-        if session_status and session_status.decode() in ["finalizing", "complete"]:
-            logger.info(f"🛑 Session finalizing detected, writing final chunks...")
-            # Give a brief moment for any in-flight chunks to arrive
-            await asyncio.sleep(0.5)
-            # Do one final read to write remaining chunks to current file
-            if file_sink:
-                try:
-                    final_messages = await redis_client.xreadgroup(
-                        audio_group_name,
-                        audio_consumer_name,
-                        {audio_stream_name: ">"},
-                        count=50,
-                        block=500
-                    )
-                    if final_messages:
-                        for stream_name, msgs in final_messages:
-                            for message_id, fields in msgs:
-                                audio_data = fields.get(b"audio_data", b"")
-                                chunk_id = fields.get(b"chunk_id", b"").decode()
-                                if chunk_id != "END" and len(audio_data) > 0:
-                                    chunk = AudioChunk(
-                                        rate=16000,
-                                        width=2,
-                                        channels=1,
-                                        audio=audio_data
-                                    )
-                                    await file_sink.write(chunk)
-                                    conversation_chunk_count += 1
-                                    total_chunk_count += 1
-                                    total_bytes += len(audio_data)
-                                await redis_client.xack(audio_stream_name, audio_group_name, message_id)
-                        logger.info(f"📦 Final read wrote {len(final_messages[0][1]) if final_messages else 0} more chunks")
-                except Exception as e:
-                    logger.debug(f"Final audio read error (non-fatal): {e}")
-
-                # Close final file
-                await file_sink.close()
-                logger.info(f"✅ Closed final file: {wav_filename} ({conversation_chunk_count} chunks)")
+        if session_status and session_status.decode() in ["finalizing", "finished"]:
+            logger.info(f"🛑 Session finalizing detected, flushing final chunks...")
+            await asyncio.sleep(0.5)  # Brief wait for in-flight chunks
+
+            # Final read to collect remaining chunks
+            try:
+                final_messages = await redis_client.xreadgroup(
+                    audio_group_name,
+                    audio_consumer_name,
+                    {audio_stream_name: ">"},
+                    count=50,
+                    block=500
+                )
+
+                if final_messages:
+                    for stream_name, msgs in final_messages:
+                        for message_id, fields in msgs:
+                            audio_data = fields.get(b"audio_data", b"")
+                            chunk_id = fields.get(b"chunk_id", b"").decode()
+
+                            if chunk_id != "END" and len(audio_data) > 0:
+                                pcm_buffer.extend(audio_data)
+
+                                # Flush if buffer reaches chunk size
+                                if len(pcm_buffer) >= CHUNK_SIZE_BYTES:
+                                    await flush_pcm_buffer()
+                                    # Reset for next chunk
+                                    pcm_buffer = bytearray()
+                                    chunk_index += 1
+                                    chunk_start_time += CHUNK_DURATION_SECONDS
+
+                            await redis_client.xack(audio_stream_name, audio_group_name, message_id)
+
+                    logger.info(f"📦 Final read processed {len(final_messages[0][1])} messages")
+
+            except Exception as e:
+                logger.debug(f"Final audio read error (non-fatal): {e}")
+
+            # Flush any remaining partial chunk
+            if len(pcm_buffer) > 0:
+                await flush_pcm_buffer()
+
             break
 
-        # Check for conversation change (file rotation signal)
+        # Check for conversation change (rotation signal)
         conversation_key = f"conversation:current:{session_id}"
         new_conversation_id = await redis_client.get(conversation_key)
 
         if new_conversation_id:
             new_conversation_id = new_conversation_id.decode()
 
-            # Conversation changed - rotate to new file
+            # Conversation changed - flush current buffer and rotate
             if new_conversation_id != current_conversation_id:
-                # Close previous file if exists
-                if file_sink:
-                    await file_sink.close()
-                    duration = (time.time() - conversation_start_time) if conversation_start_time else 0
+                # Flush remaining buffer from previous conversation
+                if len(pcm_buffer) > 0 and current_conversation_id:
+                    await flush_pcm_buffer()
                     logger.info(
-                        f"✅ Closed conversation {current_conversation_id[:12]} file: {wav_filename} "
-                        f"({conversation_chunk_count} chunks, {duration:.1f}s)"
+                        f"✅ Finalized conversation {current_conversation_id[:12]}: "
+                        f"{chunk_index + 1} chunks saved to MongoDB"
                     )
 
-                # Open new file for new conversation
+                # Start new conversation
                 current_conversation_id = new_conversation_id
                 conversation_count += 1
-                conversation_chunk_count = 0
                 conversation_start_time = time.time()
 
-                timestamp = int(time.time() * 1000)
-                wav_filename = f"{timestamp}_{client_id}_{current_conversation_id}.wav"
-                file_path = CHUNK_DIR / wav_filename
+                # Reset chunk state
+                pcm_buffer = bytearray()
+                chunk_index = 0
+                chunk_start_time = 0.0
 
-                file_sink = LocalFileSink(
-                    file_path=str(file_path),
-                    sample_rate=16000,
-                    channels=1,
-                    sample_width=2
-                )
-                await file_sink.open()
                 logger.info(
-                    f"📁 Opened new file for conversation #{conversation_count} ({current_conversation_id[:12]}): {file_path}"
+                    f"📁 Started MongoDB persistence for conversation #{conversation_count} "
+                    f"({current_conversation_id[:12]})"
                 )
-
-                # Store file path in Redis (keyed by conversation_id, not session_id)
-                audio_file_key = f"audio:file:{current_conversation_id}"
-                await redis_client.set(audio_file_key, str(file_path), ex=86400)  # 24 hour TTL
-                logger.info(f"💾 Stored audio file path in Redis: {audio_file_key}")
         else:
-            # Key deleted - conversation ended, close current file
-            if file_sink and current_conversation_id:
-                await file_sink.close()
+            # Conversation key deleted - conversation ended
+            if current_conversation_id and len(pcm_buffer) > 0:
+                # Flush final partial chunk
+                await flush_pcm_buffer()
                 duration = (time.time() - conversation_start_time) if conversation_start_time else 0
                 logger.info(
-                    f"✅ Closed conversation {current_conversation_id[:12]} file after conversation ended: {wav_filename} "
-                    f"({conversation_chunk_count} chunks, {duration:.1f}s)"
+                    f"✅ Conversation {current_conversation_id[:12]} ended: "
+                    f"{chunk_index + 1} chunks, {duration:.1f}s"
                 )
-                file_sink = None  # Clear sink to prevent writing to closed file
+
+                # Reset state
+                pcm_buffer = bytearray()
                 current_conversation_id = None
 
-        # If no file open yet, wait for conversation to be created
-        if not file_sink:
-            await asyncio.sleep(0.5)
+        # Wait for conversation to be created
+        if not current_conversation_id:
+            await asyncio.sleep(0.0001)
             continue
 
-        # Read audio chunks from stream (non-blocking)
+        # Read audio chunks from Redis Stream
         try:
             audio_messages = await redis_client.xreadgroup(
                 audio_group_name,
                 audio_consumer_name,
                 {audio_stream_name: ">"},
-                count=20,  # Read up to 20 chunks at a time for efficiency
-                block=500  # 500ms timeout
+                count=20,  # Read up to 20 chunks at a time
+                block=100  # 100ms timeout
             )
 
             if audio_messages:
-                # Reset empty read counter - we got messages
-                consecutive_empty_reads = 0
+                consecutive_empty_reads = 0  # Reset counter
 
                 for stream_name, msgs in audio_messages:
                     for message_id, fields in msgs:
-                        # Extract audio data
                         audio_data = fields.get(b"audio_data", b"")
                         chunk_id = fields.get(b"chunk_id", b"").decode()
 
@@ -408,112 +383,78 @@ async def audio_streaming_persistence_job(
                             logger.info(f"📡 Received END signal in audio persistence")
                             end_signal_received = True
                         elif len(audio_data) > 0:
-                            # Write chunk immediately to file
-                            chunk = AudioChunk(
-                                rate=16000,
-                                width=2,
-                                channels=1,
-                                audio=audio_data
-                            )
-                            await file_sink.write(chunk)
-                            conversation_chunk_count += 1
-                            total_chunk_count += 1
-                            total_bytes += len(audio_data)
-
-                            # Log every 40 chunks to avoid spam
-                            if total_chunk_count % 40 == 0:
-                                logger.info(
-                                    f"📦 Session {session_id[:12]}: {total_chunk_count} total chunks "
-                                    f"(conversation {current_conversation_id[:12]}: {conversation_chunk_count} chunks)"
-                                )
+                            # Append to PCM buffer
+                            pcm_buffer.extend(audio_data)
+
+                            # Flush if buffer reaches 10-second chunk size
+                            if len(pcm_buffer) >= CHUNK_SIZE_BYTES:
+                                await flush_pcm_buffer()
+
+                                # Reset for next chunk
+                                pcm_buffer = bytearray()
+                                chunk_index += 1
+                                chunk_start_time += CHUNK_DURATION_SECONDS
 
                         # ACK the message
                         await redis_client.xack(audio_stream_name, audio_group_name, message_id)
+
             else:
-                # No new messages - stream might be empty
+                # No new messages
                 if end_signal_received:
                     consecutive_empty_reads += 1
-                    logger.info(f"📭 No new messages ({consecutive_empty_reads}/{max_empty_reads} empty reads after END signal)")
+                    logger.info(f"📭 No new messages ({consecutive_empty_reads}/{max_empty_reads})")
 
                     if consecutive_empty_reads >= max_empty_reads:
-                        logger.info(f"✅ Stream empty after END signal - stopping audio collection")
+                        logger.info(f"✅ Stream empty after END signal - stopping")
+                        # Flush remaining buffer
+                        if len(pcm_buffer) > 0:
+                            await flush_pcm_buffer()
                         break
 
         except Exception as audio_error:
-            # Stream might not exist yet or other transient errors
             logger.debug(f"Audio stream read error (non-fatal): {audio_error}")
 
-        await asyncio.sleep(0.1)  # Check every 100ms for responsiveness
+        await asyncio.sleep(0.0001)
 
     # Job complete - calculate final stats
     runtime_seconds = time.time() - start_time
 
-    # Calculate duration (16kHz, 16-bit mono = 32000 bytes/second)
-    if total_bytes > 0:
-        duration = total_bytes / (16000 * 2 * 1)  # sample_rate * sample_width * channels
+    # Calculate total duration
+    if total_pcm_bytes > 0:
+        duration = total_pcm_bytes / BYTES_PER_SECOND
+        compression_ratio = total_compressed_bytes / total_pcm_bytes if total_pcm_bytes > 0 else 0.0
     else:
         logger.warning(f"⚠️ No audio chunks written for session {session_id}")
         duration = 0.0
+        compression_ratio = 0.0
 
     logger.info(
-        f"🎵 Audio persistence job complete for session {session_id}: "
-        f"{conversation_count} conversations, {total_chunk_count} total chunks, "
-        f"{total_bytes / 1024 / 1024:.2f} MB, {runtime_seconds:.1f}s runtime"
+        f"🎵 MongoDB audio persistence complete for session {session_id}: "
+        f"{conversation_count} conversations, {total_mongo_chunks_written} chunks, "
+        f"{total_pcm_bytes / 1024 / 1024:.2f} MB PCM → {total_compressed_bytes / 1024 / 1024:.2f} MB Opus "
+        f"(compression: {compression_ratio:.3f}, {(1 - compression_ratio) * 100:.1f}% savings), "
+        f"{runtime_seconds:.1f}s runtime"
     )
 
     # Clean up Redis tracking keys
     audio_job_key = f"audio_persistence:session:{session_id}"
     await redis_client.delete(audio_job_key)
-    conversation_key = f"conversation:current:{session_id}"
-    await redis_client.delete(conversation_key)
+
+    # NOTE: Do NOT delete conversation:current:{session_id} key here!
+    # It's needed for speech detection to reuse placeholder conversations (always_persist feature).
+    # The key already has a TTL (3600s) set when created and will expire automatically.
     logger.info(f"🧹 Cleaned up tracking keys for session {session_id}")
 
     return {
         "session_id": session_id,
         "conversation_count": conversation_count,
-        "last_audio_file_path": str(file_path) if file_path else None,
-        "total_chunk_count": total_chunk_count,
-        "total_bytes": total_bytes,
+        "total_mongo_chunks": total_mongo_chunks_written,
+        "total_pcm_bytes": total_pcm_bytes,
+        "total_compressed_bytes": total_compressed_bytes,
+        "compression_ratio": compression_ratio,
         "duration_seconds": duration,
         "runtime_seconds": runtime_seconds
     }
 
 
 # Enqueue wrapper functions
-
-def enqueue_cropping(
-    conversation_id: str,
-    audio_path: str,
-    priority: JobPriority = JobPriority.NORMAL
-):
-    """
-    Enqueue an audio cropping job.
-
-    Args:
-        conversation_id: Conversation ID
-        audio_path: Path to audio file
-        priority: Job priority level
-
-    Returns:
-        RQ Job object for tracking.
-    """
-    timeout_mapping = {
-        JobPriority.URGENT: 300,  # 5 minutes
-        JobPriority.HIGH: 240,    # 4 minutes
-        JobPriority.NORMAL: 180,  # 3 minutes
-        JobPriority.LOW: 120      # 2 minutes
-    }
-
-    job = default_queue.enqueue(
-        process_cropping_job,
-        conversation_id,
-        audio_path,
-        job_timeout=timeout_mapping.get(priority, 180),
-        result_ttl=JOB_RESULT_TTL,
-        job_id=f"crop_{conversation_id[:12]}",
-        description=f"Crop audio for conversation {conversation_id[:12]}",
-        meta={'conversation_id': conversation_id}
-    )
-
-    logger.info(f"📥 RQ: Enqueued cropping job {job.id} for conversation {conversation_id}")
-    return job
diff --git a/backends/advanced/src/advanced_omi_backend/workers/audio_stream_deepgram_worker.py b/backends/advanced/src/advanced_omi_backend/workers/audio_stream_deepgram_worker.py
deleted file mode 100644
index a58682c1..00000000
--- a/backends/advanced/src/advanced_omi_backend/workers/audio_stream_deepgram_worker.py
+++ /dev/null
@@ -1,80 +0,0 @@
-#!/usr/bin/env python3
-"""
-Deepgram audio stream worker.
-
-Starts a consumer that reads from audio:stream:deepgram and transcribes audio.
-"""
-
-import asyncio
-import logging
-import os
-import signal
-import sys
-
-import redis.asyncio as redis
-
-from advanced_omi_backend.services.transcription.deepgram import DeepgramStreamConsumer
-
-logging.basicConfig(
-    level=logging.INFO,
-    format="%(asctime)s [%(levelname)s] %(name)s: %(message)s"
-)
-
-logger = logging.getLogger(__name__)
-
-
-async def main():
-    """Main worker entry point."""
-    logger.info("🚀 Starting Deepgram audio stream worker")
-
-    # Check that config.yml has Deepgram configured
-    # The registry provider will load configuration from config.yml
-    api_key = os.getenv("DEEPGRAM_API_KEY")
-    if not api_key:
-        logger.warning("DEEPGRAM_API_KEY environment variable not set")
-        logger.warning("Ensure config.yml has a default 'stt' model configured for Deepgram")
-        logger.warning("Audio transcription will use alternative providers if configured in config.yml")
-
-    redis_url = os.getenv("REDIS_URL", "redis://localhost:6379/0")
-
-    # Create Redis client
-    redis_client = await redis.from_url(
-        redis_url,
-        encoding="utf-8",
-        decode_responses=False
-    )
-    logger.info("Connected to Redis")
-
-    # Create consumer with balanced buffer size
-    # 20 chunks = ~5 seconds of audio
-    # Balance between transcription accuracy and latency
-    # Consumer uses registry-driven provider from config.yml
-    consumer = DeepgramStreamConsumer(
-        redis_client=redis_client,
-        buffer_chunks=20  # 5 seconds - good context without excessive delay
-    )
-
-    # Setup signal handlers for graceful shutdown
-    def signal_handler(signum, frame):
-        logger.info(f"Received signal {signum}, shutting down...")
-        asyncio.create_task(consumer.stop())
-
-    signal.signal(signal.SIGINT, signal_handler)
-    signal.signal(signal.SIGTERM, signal_handler)
-
-    try:
-        logger.info("✅ Deepgram worker ready")
-
-        # This blocks until consumer is stopped
-        await consumer.start_consuming()
-
-    except Exception as e:
-        logger.error(f"Worker error: {e}", exc_info=True)
-        sys.exit(1)
-    finally:
-        await redis_client.aclose()
-        logger.info("👋 Deepgram worker stopped")
-
-
-if __name__ == "__main__":
-    asyncio.run(main())
diff --git a/backends/advanced/src/advanced_omi_backend/workers/audio_stream_parakeet_worker.py b/backends/advanced/src/advanced_omi_backend/workers/audio_stream_parakeet_worker.py
deleted file mode 100644
index 56f2f26b..00000000
--- a/backends/advanced/src/advanced_omi_backend/workers/audio_stream_parakeet_worker.py
+++ /dev/null
@@ -1,95 +0,0 @@
-#!/usr/bin/env python3
-"""
-Parakeet audio stream worker.
-
-Starts a consumer that reads from audio:stream:* and transcribes audio using Parakeet.
-"""
-
-import asyncio
-import logging
-import os
-import signal
-import sys
-
-import redis.asyncio as redis
-
-from advanced_omi_backend.services.transcription.parakeet_stream_consumer import ParakeetStreamConsumer
-
-logging.basicConfig(
-    level=logging.INFO,
-    format="%(asctime)s [%(levelname)s] %(name)s: %(message)s"
-)
-
-logger = logging.getLogger(__name__)
-
-
-async def main():
-    """Main worker entry point."""
-    logger.info("🚀 Starting Parakeet audio stream worker")
-
-    # Check that config.yml has Parakeet configured
-    # The registry provider will load configuration from config.yml
-    service_url = os.getenv("PARAKEET_ASR_URL")
-    if not service_url:
-        logger.warning("PARAKEET_ASR_URL environment variable not set")
-        logger.warning("Ensure config.yml has a default 'stt' model configured for Parakeet")
-        logger.warning("Audio transcription will use alternative providers if configured in config.yml")
-
-    redis_url = os.getenv("REDIS_URL", "redis://localhost:6379/0")
-
-    # Create Redis client
-    redis_client = await redis.from_url(
-        redis_url,
-        encoding="utf-8",
-        decode_responses=False
-    )
-    logger.info("Connected to Redis")
-
-    # Create consumer with balanced buffer size
-    # 20 chunks = ~5 seconds of audio
-    # Balance between transcription accuracy and latency
-    # Consumer uses registry-driven provider from config.yml
-    consumer = ParakeetStreamConsumer(
-        redis_client=redis_client,
-        buffer_chunks=20  # 5 seconds - good context without excessive delay
-    )
-
-    # Setup signal handlers for graceful shutdown
-    shutdown_event = asyncio.Event()
-
-    def signal_handler(signum, _frame):
-        logger.info(f"Received signal {signum}, shutting down...")
-        shutdown_event.set()
-
-    signal.signal(signal.SIGINT, signal_handler)
-    signal.signal(signal.SIGTERM, signal_handler)
-
-    try:
-        logger.info("✅ Parakeet worker ready")
-
-        # This blocks until consumer is stopped or shutdown signaled
-        consume_task = asyncio.create_task(consumer.start_consuming())
-        shutdown_task = asyncio.create_task(shutdown_event.wait())
-
-        done, pending = await asyncio.wait(
-            [consume_task, shutdown_task],
-            return_when=asyncio.FIRST_COMPLETED
-        )
-
-        # Cancel pending tasks
-        for task in pending:
-            task.cancel()
-
-        await consumer.stop()
-
-    except Exception as e:
-        logger.error(f"Worker error: {e}", exc_info=True)
-        sys.exit(1)
-    finally:
-        await redis_client.aclose()
-        logger.info("👋 Parakeet worker stopped")
-
-
-if __name__ == "__main__":
-    asyncio.run(main())
-
diff --git a/backends/advanced/src/advanced_omi_backend/workers/audio_stream_worker.py b/backends/advanced/src/advanced_omi_backend/workers/audio_stream_worker.py
new file mode 100644
index 00000000..df133de4
--- /dev/null
+++ b/backends/advanced/src/advanced_omi_backend/workers/audio_stream_worker.py
@@ -0,0 +1,115 @@
+#!/usr/bin/env python3
+"""
+Generic streaming transcription worker using registry-driven providers.
+
+Starts a consumer that reads from audio:stream:* streams and transcribes via configured provider.
+Provider configuration is loaded from config.yml (supports any streaming STT service).
+Publishes interim results to Redis Pub/Sub for real-time client display.
+Publishes final results to Redis Streams for storage.
+Triggers plugins on final results only.
+"""
+
+import asyncio
+import logging
+import os
+import signal
+import sys
+
+import redis.asyncio as redis
+
+from advanced_omi_backend.services.plugin_service import init_plugin_router
+from advanced_omi_backend.services.transcription.streaming_consumer import StreamingTranscriptionConsumer
+from advanced_omi_backend.client_manager import initialize_redis_for_client_manager
+
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s [%(levelname)s] %(name)s: %(message)s"
+)
+
+logger = logging.getLogger(__name__)
+
+
+async def main():
+    """Main worker entry point."""
+    logger.info("🚀 Starting streaming transcription worker")
+    logger.info("📋 Provider configuration loaded from config.yml (defaults.stt_stream)")
+
+    redis_url = os.getenv("REDIS_URL", "redis://localhost:6379/0")
+
+    # Create Redis client
+    try:
+        redis_client = await redis.from_url(
+            redis_url,
+            encoding="utf-8",
+            decode_responses=False
+        )
+        logger.info(f"✅ Connected to Redis: {redis_url}")
+
+        # Initialize ClientManager Redis for cross-container client→user mapping
+        initialize_redis_for_client_manager(redis_url)
+
+    except Exception as e:
+        logger.error(f"Failed to connect to Redis: {e}", exc_info=True)
+        sys.exit(1)
+
+    # Initialize plugin router
+    try:
+        plugin_router = init_plugin_router()
+        if plugin_router:
+            logger.info(f"✅ Plugin router initialized with {len(plugin_router.plugins)} plugins")
+
+            # Initialize async plugins
+            for plugin_id, plugin in plugin_router.plugins.items():
+                try:
+                    await plugin.initialize()
+                    logger.info(f"✅ Plugin '{plugin_id}' initialized in streaming worker")
+                except Exception as e:
+                    logger.exception(f"Failed to initialize plugin '{plugin_id}' in streaming worker: {e}")
+        else:
+            logger.warning("No plugin router available - plugins will not be triggered")
+    except Exception as e:
+        logger.error(f"Failed to initialize plugin router: {e}", exc_info=True)
+        plugin_router = None
+
+    # Create streaming transcription consumer (uses registry-driven provider from config.yml)
+    try:
+        consumer = StreamingTranscriptionConsumer(
+            redis_client=redis_client,
+            plugin_router=plugin_router
+        )
+        logger.info("✅ Streaming transcription consumer created")
+    except Exception as e:
+        logger.error(f"Failed to create streaming transcription consumer: {e}", exc_info=True)
+        logger.error("Ensure config.yml has defaults.stt_stream configured with valid provider")
+        await redis_client.aclose()
+        sys.exit(1)
+
+    # Setup signal handlers for graceful shutdown
+    def signal_handler(signum, frame):
+        logger.info(f"Received signal {signum}, shutting down...")
+        asyncio.create_task(consumer.stop())
+
+    signal.signal(signal.SIGINT, signal_handler)
+    signal.signal(signal.SIGTERM, signal_handler)
+
+    try:
+        logger.info("✅ Streaming transcription worker ready")
+        logger.info("📡 Listening for audio streams on audio:stream:* pattern")
+        logger.info("📢 Publishing interim results to transcription:interim:{session_id}")
+        logger.info("💾 Publishing final results to transcription:results:{session_id}")
+
+        # This blocks until consumer is stopped
+        await consumer.start_consuming()
+
+    except KeyboardInterrupt:
+        logger.info("Keyboard interrupt received, shutting down...")
+    except Exception as e:
+        logger.error(f"Worker error: {e}", exc_info=True)
+        sys.exit(1)
+    finally:
+        await redis_client.aclose()
+        logger.info("👋 Streaming transcription worker stopped")
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/backends/advanced/src/advanced_omi_backend/workers/cleanup_jobs.py b/backends/advanced/src/advanced_omi_backend/workers/cleanup_jobs.py
new file mode 100644
index 00000000..65240af4
--- /dev/null
+++ b/backends/advanced/src/advanced_omi_backend/workers/cleanup_jobs.py
@@ -0,0 +1,155 @@
+"""
+Cleanup jobs for managing soft-deleted data.
+
+Provides manual cleanup of soft-deleted conversations and chunks.
+Auto-cleanup is controlled via admin API settings (stored in /app/data/cleanup_config.json).
+"""
+import logging
+from datetime import datetime, timedelta
+from typing import Optional
+
+from advanced_omi_backend.models.conversation import Conversation
+from advanced_omi_backend.models.audio_chunk import AudioChunkDocument
+from advanced_omi_backend.models.waveform import WaveformData
+from advanced_omi_backend.models.job import async_job
+from advanced_omi_backend.config import get_cleanup_settings, CleanupSettings
+
+logger = logging.getLogger(__name__)
+
+
+@async_job(redis=False, beanie=True, timeout=1800)  # 30 minute timeout
+async def purge_old_deleted_conversations(
+    retention_days: Optional[int] = None,
+    dry_run: bool = False
+) -> dict:
+    """
+    Permanently delete conversations that have been soft-deleted for longer than retention period.
+
+    Args:
+        retention_days: Number of days to keep soft-deleted conversations (defaults to config value)
+        dry_run: If True, only count what would be deleted without actually deleting
+
+    Returns:
+        Dict with counts of purged conversations, chunks, and waveforms
+    """
+    # Get retention period from config if not specified
+    if retention_days is None:
+        settings_dict = get_cleanup_settings()
+        retention_days = settings_dict['retention_days']
+
+    cutoff_date = datetime.utcnow() - timedelta(days=retention_days)
+
+    logger.info(f"{'[DRY RUN] ' if dry_run else ''}Purging conversations deleted before {cutoff_date.isoformat()}")
+
+    # Find soft-deleted conversations older than cutoff
+    old_deleted = await Conversation.find(
+        Conversation.deleted == True,
+        Conversation.deleted_at < cutoff_date
+    ).to_list()
+
+    purged_conversations = 0
+    purged_chunks = 0
+    purged_waveforms = 0
+
+    for conversation in old_deleted:
+        conversation_id = conversation.conversation_id
+
+        if not dry_run:
+            # Hard delete chunks
+            chunk_result = await AudioChunkDocument.find(
+                AudioChunkDocument.conversation_id == conversation_id
+            ).delete()
+            purged_chunks += chunk_result.deleted_count
+
+            # Hard delete waveforms
+            waveform_result = await WaveformData.find(
+                WaveformData.conversation_id == conversation_id
+            ).delete()
+            purged_waveforms += waveform_result.deleted_count
+
+            # Hard delete conversation
+            await conversation.delete()
+            purged_conversations += 1
+
+            logger.info(
+                f"Purged conversation {conversation_id} "
+                f"(deleted {chunk_result.deleted_count} chunks, "
+                f"{waveform_result.deleted_count} waveforms)"
+            )
+        else:
+            # Dry run - just count
+            chunk_count = await AudioChunkDocument.find(
+                AudioChunkDocument.conversation_id == conversation_id
+            ).count()
+            purged_chunks += chunk_count
+
+            waveform_count = await WaveformData.find(
+                WaveformData.conversation_id == conversation_id
+            ).count()
+            purged_waveforms += waveform_count
+
+            purged_conversations += 1
+
+            logger.info(
+                f"[DRY RUN] Would purge conversation {conversation_id} "
+                f"(with {chunk_count} chunks, {waveform_count} waveforms)"
+            )
+
+    logger.info(
+        f"{'[DRY RUN] Would purge' if dry_run else 'Purged'} "
+        f"{purged_conversations} conversations, {purged_chunks} chunks, "
+        f"and {purged_waveforms} waveforms"
+    )
+
+    return {
+        "purged_conversations": purged_conversations,
+        "purged_chunks": purged_chunks,
+        "purged_waveforms": purged_waveforms,
+        "retention_days": retention_days,
+        "cutoff_date": cutoff_date.isoformat(),
+        "dry_run": dry_run,
+    }
+
+
+def schedule_cleanup_job(retention_days: Optional[int] = None) -> Optional[str]:
+    """
+    Enqueue cleanup job to run once (manual trigger or scheduled task).
+
+    This function only schedules the job if auto-cleanup is enabled via
+    admin API settings (stored in /app/data/cleanup_config.json).
+
+    For manual cleanup, use the admin API endpoint: POST /api/admin/cleanup
+
+    Args:
+        retention_days: Number of days to keep soft-deleted conversations
+                       (defaults to config value)
+
+    Returns:
+        Job ID if scheduled successfully, None otherwise
+    """
+    # Check if auto-cleanup is enabled
+    settings_dict = get_cleanup_settings()
+    if not settings_dict['auto_cleanup_enabled']:
+        logger.info("Auto-cleanup is disabled (auto_cleanup_enabled=false)")
+        return None
+
+    try:
+        from advanced_omi_backend.controllers.queue_controller import get_queue
+
+        if retention_days is None:
+            retention_days = settings_dict['retention_days']
+
+        queue = get_queue("default")
+        job = queue.enqueue(
+            purge_old_deleted_conversations,
+            retention_days=retention_days,
+            dry_run=False,
+            job_timeout="30m",
+        )
+        logger.info(f"Scheduled cleanup job {job.id} with {retention_days}-day retention")
+        return job.id
+
+    except Exception as e:
+        logger.error(f"Failed to schedule cleanup job: {e}")
+        return None
+
diff --git a/backends/advanced/src/advanced_omi_backend/workers/conversation_jobs.py b/backends/advanced/src/advanced_omi_backend/workers/conversation_jobs.py
index d2b8c4fd..fd5875e2 100644
--- a/backends/advanced/src/advanced_omi_backend/workers/conversation_jobs.py
+++ b/backends/advanced/src/advanced_omi_backend/workers/conversation_jobs.py
@@ -8,10 +8,15 @@
 import logging
 import time, os
 from datetime import datetime
-from typing import Dict, Any
+from typing import Dict, Any, Optional
 from rq.job import Job
+from rq.exceptions import NoSuchJobError
+
 from advanced_omi_backend.models.job import async_job
 from advanced_omi_backend.controllers.queue_controller import redis_conn
+from advanced_omi_backend.controllers.session_controller import mark_session_complete
+from advanced_omi_backend.services.plugin_service import get_plugin_router, init_plugin_router
+from datetime import datetime
 
 from advanced_omi_backend.utils.conversation_utils import (
     analyze_speech,
@@ -144,16 +149,16 @@ async def handle_end_of_conversation(
                 session_id,
                 user_id,
                 client_id,
-                job_timeout=3600,
+                job_timeout=86400,  # 24 hours to match max_runtime in stream_speech_detection_job
                 result_ttl=JOB_RESULT_TTL,
                 job_id=f"speech-detect_{session_id[:12]}_{conversation_count}",
                 description=f"Listening for speech (conversation #{conversation_count + 1})",
-                meta={"audio_uuid": session_id, "client_id": client_id, "session_level": True},
+                meta={"client_id": client_id, "session_level": True},
             )
 
             # Store job ID for cleanup (keyed by client_id for WebSocket cleanup)
             try:
-                redis_conn.set(f"speech_detection_job:{client_id}", speech_job.id, ex=3600)
+                redis_conn.set(f"speech_detection_job:{client_id}", speech_job.id, ex=86400)  # 24 hours
                 logger.info(f"📌 Stored speech detection job ID for client {client_id}")
             except Exception as e:
                 logger.warning(f"⚠️ Failed to store job ID for {client_id}: {e}")
@@ -217,32 +222,99 @@ async def open_conversation_job(
     current_job = get_current_job()
     current_job.meta = {}
     current_job.save_meta()
-    
-    # Create minimal streaming conversation (conversation_id auto-generated)
-    conversation = create_conversation(
-        audio_uuid=session_id,
-        user_id=user_id,
-        client_id=client_id,
-        title="Recording...",
-        summary="Transcribing audio...",
-    )
 
-    # Save to database
-    await conversation.insert()
-    conversation_id = conversation.conversation_id  # Get the auto-generated ID
-    logger.info(f"✅ Created streaming conversation {conversation_id} for session {session_id}")
+    # Check if a placeholder conversation already exists for this session
+    conversation_key = f"conversation:current:{session_id}"
+    existing_conversation_id_bytes = await redis_client.get(conversation_key)
+
+    logger.info(f"🔍 Checking for placeholder: key={conversation_key}, found={existing_conversation_id_bytes is not None}")
+
+    conversation = None
+    if existing_conversation_id_bytes:
+        existing_conversation_id = existing_conversation_id_bytes.decode()
+        logger.info(f"🔍 Found Redis key with conversation_id={existing_conversation_id}")
+
+        # Try to fetch the existing conversation by conversation_id
+        conversation = await Conversation.find_one(
+            Conversation.conversation_id == existing_conversation_id
+        )
+
+        if conversation:
+            always_persist = getattr(conversation, 'always_persist', False)
+            processing_status = getattr(conversation, 'processing_status', None)
+            logger.info(
+                f"🔍 Found conversation in DB: always_persist={always_persist}, "
+                f"processing_status={processing_status}"
+            )
+        else:
+            logger.warning(f"⚠️ Conversation {existing_conversation_id} not found in database!")
+
+        # Verify it's a placeholder conversation (always_persist=True, processing_status='pending_transcription')
+        if conversation and getattr(conversation, 'always_persist', False) and \
+           getattr(conversation, 'processing_status', None) == 'pending_transcription':
+            logger.info(
+                f"🔄 Reusing placeholder conversation {conversation.conversation_id} for session {session_id}"
+            )
+            # Update placeholder with active recording status
+            conversation.title = "Recording..."
+            conversation.summary = "Transcribing audio..."
+            await conversation.save()
+            conversation_id = conversation.conversation_id
+        else:
+            if conversation:
+                logger.info(
+                    f"⚠️ Found conversation {existing_conversation_id} but not a valid placeholder "
+                    f"(always_persist={getattr(conversation, 'always_persist', False)}, "
+                    f"processing_status={getattr(conversation, 'processing_status', None)}), creating new"
+                )
+            conversation = None
+    else:
+        logger.info(f"🔍 No Redis key found for {conversation_key}, creating new conversation")
+
+    # If no valid placeholder found, create new conversation
+    if not conversation:
+        conversation = create_conversation(
+            user_id=user_id,
+            client_id=client_id,
+            title="Recording...",
+            summary="Transcribing audio...",
+        )
+        await conversation.insert()
+        conversation_id = conversation.conversation_id
+        logger.info(f"✅ Created streaming conversation {conversation_id} for session {session_id}")
 
     # Link job metadata to conversation (cascading updates)
     current_job.meta["conversation_id"] = conversation_id
     current_job.save_meta()
-    speech_job = Job.fetch(speech_job_id, connection=redis_conn)
-    speech_job.meta["conversation_id"] = conversation_id
-    speech_job.save_meta()
-    speaker_check_job_id = speech_job.meta.get("speaker_check_job_id")
-    if speaker_check_job_id:
-        speaker_check_job = Job.fetch(speaker_check_job_id, connection=redis_conn)
-        speaker_check_job.meta["conversation_id"] = conversation_id
-        speaker_check_job.save_meta()
+
+    try:
+        speech_job = Job.fetch(speech_job_id, connection=redis_conn)
+        speech_job.meta["conversation_id"] = conversation_id
+        speech_job.save_meta()
+        speaker_check_job_id = speech_job.meta.get("speaker_check_job_id")
+        if speaker_check_job_id:
+            try:
+                speaker_check_job = Job.fetch(speaker_check_job_id, connection=redis_conn)
+                speaker_check_job.meta["conversation_id"] = conversation_id
+                speaker_check_job.save_meta()
+            except Exception as e:
+                if isinstance(e, NoSuchJobError):
+                    logger.error(
+                        f"❌ Missing job hash for speaker_check job {speaker_check_job_id}: "
+                        f"Job was linked to speech_job {speech_job_id} but hash key disappeared. "
+                        f"This may indicate TTL expiry or job collision."
+                    )
+                else:
+                    raise
+    except Exception as e:
+        if isinstance(e, NoSuchJobError):
+            logger.error(
+                f"❌ Missing job hash for speech_job {speech_job_id}: "
+                f"Job was created for session {session_id} but hash key disappeared before metadata link. "
+                f"This may indicate TTL expiry or job collision."
+            )
+        else:
+            raise
     
     # Signal audio persistence job to rotate to this conversation's file
     rotation_signal_key = f"conversation:current:{session_id}"
@@ -265,9 +337,9 @@ async def open_conversation_job(
     # Inactivity timeout configuration
     inactivity_timeout_seconds = float(os.getenv("SPEECH_INACTIVITY_THRESHOLD_SECONDS", "60"))
     inactivity_timeout_minutes = inactivity_timeout_seconds / 60
-    last_meaningful_speech_time = time.time()  # Initialize with conversation start
+    last_meaningful_speech_time = 0.0  # Initialize with audio time 0 (will be updated with first speech)
     timeout_triggered = False  # Track if closure was due to timeout
-    last_inactivity_log_time = time.time()  # Track when we last logged inactivity
+    last_inactivity_log_time = time.time()  # Track when we last logged inactivity (wall-clock for logging)
     last_word_count = 0  # Track word count to detect actual new speech
 
     # Test mode: wait for audio queue to drain before timing out
@@ -283,7 +355,7 @@ async def open_conversation_job(
     while True:
         # Check if job still exists in Redis (detect zombie state)
         from advanced_omi_backend.utils.job_utils import check_job_alive
-        if not await check_job_alive(redis_client, current_job):
+        if not await check_job_alive(redis_client, current_job, session_id):
             break
 
         # Check if session is finalizing (set by producer when recording stops)
@@ -291,12 +363,12 @@ async def open_conversation_job(
             status = await redis_client.hget(session_key, "status")
             status_str = status.decode() if status else None
 
-            if status_str in ["finalizing", "complete"]:
+            if status_str in ["finalizing", "finished"]:
                 finalize_received = True
 
-                # Check if this was a WebSocket disconnect
+                # Get completion reason (guaranteed to exist with unified API)
                 completion_reason = await redis_client.hget(session_key, "completion_reason")
-                completion_reason_str = completion_reason.decode() if completion_reason else None
+                completion_reason_str = completion_reason.decode() if completion_reason else "unknown"
 
                 if completion_reason_str == "websocket_disconnect":
                     logger.warning(
@@ -306,7 +378,7 @@ async def open_conversation_job(
                     timeout_triggered = False  # This is a disconnect, not a timeout
                 else:
                     logger.info(
-                        f"🛑 Session finalizing (reason: {completion_reason_str or 'user_stopped'}), "
+                        f"🛑 Session finalizing (reason: {completion_reason_str}), "
                         f"waiting for audio persistence job to complete..."
                     )
                 break  # Exit immediately when finalize signal received
@@ -328,7 +400,38 @@ async def open_conversation_job(
 
         # Extract speaker information from segments
         segments = combined.get("segments", [])
-        speakers = extract_speakers_from_segments(segments)
+
+        # FIX: Validate and filter segments before processing
+        validated_segments = []
+        for i, seg in enumerate(segments):
+            # Check if segment is a dict
+            if not isinstance(seg, dict):
+                logger.warning(f"Segment {i} is not a dict: {type(seg)}")
+                continue
+
+            # Check for required text field
+            text = seg.get("text", "").strip()
+            if not text:
+                logger.debug(f"Segment {i} has no text, skipping")
+                continue
+
+            # Check for reasonable timing
+            start = seg.get("start", 0.0)
+            end = seg.get("end", 0.0)
+            if end <= start:
+                logger.debug(f"Segment {i} has invalid timing (start={start}, end={end}), correcting")
+                # Auto-correct: estimate duration from text length
+                estimated_duration = len(text.split()) * 0.5  # ~0.5 seconds per word
+                seg["end"] = start + estimated_duration
+
+            # Ensure speaker field exists
+            if "speaker" not in seg or not seg["speaker"]:
+                seg["speaker"] = "SPEAKER_00"
+
+            validated_segments.append(seg)
+
+        logger.info(f"Validated {len(validated_segments)}/{len(segments)} segments")
+        speakers = extract_speakers_from_segments(validated_segments)
 
         # Track new speech activity (word count based)
         new_speech_time, last_word_count = await track_speech_activity(
@@ -352,8 +455,21 @@ async def open_conversation_job(
             last_meaningful_speech_time=last_meaningful_speech_time,
         )
 
-        # Check inactivity timeout and log every 10 seconds
-        inactivity_duration = time.time() - last_meaningful_speech_time
+        # Check inactivity timeout using audio time (not wall-clock time)
+        # Get current audio time from latest transcription
+        current_audio_time = speech_analysis.get("speech_end", 0.0)
+
+        # Calculate inactivity based on audio timestamps
+        # Only check if we have valid audio timing data
+        if current_audio_time > 0 and last_meaningful_speech_time > 0:
+            inactivity_duration = current_audio_time - last_meaningful_speech_time
+        else:
+            # Fallback: No audio timestamps available (text-only transcription)
+            # Can't reliably detect inactivity, so skip timeout check this iteration
+            inactivity_duration = 0
+            if speech_analysis.get("fallback", False):
+                logger.debug("⚠️ Skipping inactivity check (no audio timestamps available)")
+
         current_time = time.time()
 
         # Log inactivity every 10 seconds
@@ -394,10 +510,55 @@ async def open_conversation_job(
         if current_count > last_result_count:
             logger.info(
                 f"📊 Conversation {conversation_id} progress: "
-                f"{current_count} results, {len(combined['text'])} chars, {len(combined['segments'])} segments"
+                f"{current_count} results, {len(combined['text'])} chars, {len(validated_segments)} segments"
             )
             last_result_count = current_count
 
+            # Trigger transcript-level plugins on new transcript segments
+            try:
+                plugin_router = get_plugin_router()
+                if plugin_router:
+                    # Get the latest transcript text for plugin processing
+                    transcript_text = combined.get('text', '')
+
+                    if transcript_text:
+                        plugin_data = {
+                            'transcript': transcript_text,
+                            'segment_id': f"{session_id}_{current_count}",
+                            'conversation_id': conversation_id,
+                            'segments': validated_segments,
+                            'word_count': speech_analysis.get('word_count', 0),
+                        }
+
+                        logger.info(
+                            f"🔌 DISPATCH: transcript.streaming event "
+                            f"(conversation={conversation_id[:12]}, segment_id={session_id}_{current_count})"
+                        )
+
+                        plugin_results = await plugin_router.dispatch_event(
+                            event='transcript.streaming',
+                            user_id=user_id,
+                            data=plugin_data,
+                            metadata={'client_id': client_id}
+                        )
+
+                        logger.info(
+                            f"🔌 RESULT: transcript.streaming dispatched to {len(plugin_results) if plugin_results else 0} plugins"
+                        )
+
+                        if plugin_results:
+                            logger.info(f"📌 Triggered {len(plugin_results)} streaming transcript plugins")
+                            for result in plugin_results:
+                                if result.message:
+                                    logger.info(f"  Plugin: {result.message}")
+
+                                # If plugin stopped processing, log it
+                                if not result.should_continue:
+                                    logger.info(f"  Plugin stopped normal processing")
+
+            except Exception as e:
+                logger.warning(f"⚠️ Error triggering transcript-level plugins: {e}")
+
         await asyncio.sleep(1)  # Check every second for responsiveness
 
     logger.info(
@@ -405,17 +566,18 @@ async def open_conversation_job(
     )
 
     # Determine end reason based on how we exited the loop
-    # Check session completion_reason from Redis (set by WebSocket controller on disconnect)
+    # Check session completion_reason from Redis (set atomically with status by finalize_session)
     completion_reason = await redis_client.hget(session_key, "completion_reason")
     completion_reason_str = completion_reason.decode() if completion_reason else None
 
     # Determine end_reason with proper precedence:
-    # 1. websocket_disconnect (explicit disconnect from client)
+    # 1. completion_reason from Redis (set by WebSocket controller: websocket_disconnect, user_stopped)
     # 2. inactivity_timeout (no speech for SPEECH_INACTIVITY_THRESHOLD_SECONDS)
     # 3. max_duration (conversation exceeded max runtime)
-    # 4. user_stopped (user manually stopped recording)
-    if completion_reason_str == "websocket_disconnect":
-        end_reason = "websocket_disconnect"
+    # 4. user_stopped (fallback for any other exit condition)
+    if completion_reason_str:
+        end_reason = completion_reason_str
+        logger.info(f"📊 Using completion_reason from session: {end_reason}")
     elif timeout_triggered:
         end_reason = "inactivity_timeout"
     elif time.time() - start_time > max_runtime:
@@ -431,18 +593,18 @@ async def open_conversation_job(
     # to avoid false negatives from aggregated results lacking proper word-level data
     logger.info("✅ Conversation has meaningful speech (validated during streaming), proceeding with post-processing")
 
-    # Wait for audio_streaming_persistence_job to complete and write the file path
-    from advanced_omi_backend.utils.conversation_utils import wait_for_audio_file
+    # Wait for audio_streaming_persistence_job to complete and write MongoDB chunks
+    from advanced_omi_backend.utils.audio_chunk_utils import wait_for_audio_chunks
 
-    file_path = await wait_for_audio_file(
-        conversation_id=conversation_id, redis_client=redis_client, max_wait_seconds=30
+    chunks_ready = await wait_for_audio_chunks(
+        conversation_id=conversation_id, max_wait_seconds=30, min_chunks=1
     )
 
-    if not file_path:
-        # Mark conversation as deleted - has speech but no audio file to process
+    if not chunks_ready:
+        # Mark conversation as deleted - has speech but no audio chunks to process
         await mark_conversation_deleted(
             conversation_id=conversation_id,
-            deletion_reason="audio_file_not_ready",
+            deletion_reason="audio_chunks_not_ready",
         )
 
         # Call shared cleanup/restart logic before returning
@@ -458,44 +620,102 @@ async def open_conversation_job(
             end_reason=end_reason,
         )
 
-    logger.info(f"📁 Retrieved audio file path: {file_path}")
+    logger.info(f"📦 MongoDB audio chunks ready for conversation {conversation_id[:12]}")
 
-    # Update conversation with audio file path
+    # Get final streaming transcript and save to conversation
+    logger.info(f"📝 Retrieving final streaming transcript for conversation {conversation_id[:12]}")
+    final_transcript = await aggregator.get_combined_results(session_id)
+
+    # Fetch conversation from database to ensure we have latest state
     conversation = await Conversation.find_one(Conversation.conversation_id == conversation_id)
-    if conversation:
-        # Store just the filename (relative to CHUNK_DIR)
-        from pathlib import Path
+    if not conversation:
+        logger.error(f"❌ Conversation {conversation_id} not found in database")
+        raise ValueError(f"Conversation {conversation_id} not found")
+
+    # Create transcript version from streaming results
+    version_id = f"streaming_{session_id[:12]}"
+    transcript_text = final_transcript.get("text", "")
+    words_data = final_transcript.get("words", [])  # All words from aggregator
+
+    # Convert words to Word objects
+    words = [
+        Conversation.Word(
+            word=w.get("word", ""),
+            start=w.get("start", 0.0),
+            end=w.get("end", 0.0),
+            confidence=w.get("confidence")
+        )
+        for w in words_data
+    ]
+
+    # Segments remain EMPTY until speaker recognition service processes them
+    # Per Chronicle architecture: segments ONLY come from speaker service
+    segments = []
+
+    # Determine provider from streaming results
+    provider = final_transcript.get("provider", "deepgram")
+
+    # Add streaming transcript with words at version level
+    conversation.add_transcript_version(
+        version_id=version_id,
+        transcript=transcript_text,
+        words=words,  # Store at version level
+        segments=segments,  # Empty - only speaker service creates segments
+        provider=provider,
+        model=provider,  # Provider name as model
+        processing_time_seconds=None,  # Not applicable for streaming
+        metadata={
+            "source": "streaming",
+            "chunk_count": final_transcript.get("chunk_count", 0),
+            "word_count": len(words),
+        },
+        set_as_active=True
+    )
 
-        audio_filename = Path(file_path).name
-        conversation.audio_path = audio_filename
-        await conversation.save()
+    # Update placeholder conversation if it exists
+    if getattr(conversation, 'always_persist', False) and getattr(conversation, 'processing_status', None) == "pending_transcription":
+        # Keep placeholder status - will be updated by title_summary_job
         logger.info(
-            f"💾 Updated conversation {conversation_id[:12]} with audio_path: {audio_filename}"
+            f"📝 Placeholder conversation {conversation_id} has transcript, "
+            f"waiting for title/summary generation"
         )
-    else:
-        logger.warning(f"⚠️ Conversation {conversation_id} not found for audio_path update")
 
-    # Enqueue post-conversation processing pipeline
+    # Save conversation with streaming transcript
+    await conversation.save()
+    logger.info(
+        f"✅ Saved streaming transcript: {len(transcript_text)} chars, "
+        f"{len(segments)} segments (empty until speaker recognition), {len(words)} words "
+        f"for conversation {conversation_id[:12]}"
+    )
+
+    # Enqueue post-conversation processing pipeline (no batch transcription needed - using streaming transcript)
     client_id = conversation.client_id if conversation else None
 
+    # Enqueue post-conversation jobs directly (no fallback dependency in success case)
     job_ids = start_post_conversation_jobs(
         conversation_id=conversation_id,
-        audio_uuid=session_id,
-        audio_file_path=file_path,
         user_id=user_id,
-        post_transcription=True,  # Run batch transcription for streaming audio
-        client_id=client_id  # Pass client_id for UI tracking
+        transcript_version_id=version_id,  # Pass the streaming transcript version ID
+        depends_on_job=None,  # No dependency - streaming already succeeded
+        client_id=client_id,  # Pass client_id for UI tracking
+        end_reason=end_reason  # Pass the determined end_reason (websocket_disconnect, inactivity_timeout, etc.)
     )
 
     logger.info(
-        f"📥 Pipeline: transcribe({job_ids['transcription']}) → "
-        f"speaker({job_ids['speaker_recognition']}) → "
-        f"[memory({job_ids['memory']}) + title({job_ids['title_summary']})]"
+        f"📥 Pipeline: speaker({job_ids['speaker_recognition']}) → "
+        f"[memory({job_ids['memory']}) + title({job_ids['title_summary']})] → "
+        f"event({job_ids['event_dispatch']})"
     )
 
     # Wait a moment to ensure jobs are registered in RQ
     await asyncio.sleep(0.5)
 
+    # Note: conversation.complete event dispatch job is already enqueued by start_post_conversation_jobs
+    # It runs after memory and title/summary jobs complete, ensuring all data is ready
+    logger.info(
+        f"✅ Post-conversation pipeline started with event dispatch job (end_reason={end_reason})"
+    )
+
     # Call shared cleanup/restart logic
     return await handle_end_of_conversation(
         session_id=session_id,
@@ -588,8 +808,28 @@ async def generate_title_summary_job(conversation_id: str, *, redis_client=None)
         logger.info(f"✅ Generated summary: '{conversation.summary}'")
         logger.info(f"✅ Generated detailed summary: {len(conversation.detailed_summary)} chars")
 
+        # Update processing status for placeholder conversations
+        if getattr(conversation, 'processing_status', None) == "pending_transcription":
+            conversation.processing_status = "completed"
+            logger.info(
+                f"✅ Updated placeholder conversation {conversation_id} "
+                f"processing_status to 'completed'"
+            )
+
     except Exception as gen_error:
         logger.error(f"❌ Title/summary generation failed: {gen_error}")
+
+        # Mark placeholder conversation as failed
+        if getattr(conversation, 'processing_status', None) == "pending_transcription":
+            conversation.title = "Audio Recording (Transcription Failed)"
+            conversation.summary = f"Title/summary generation failed: {str(gen_error)}"
+            conversation.processing_status = "transcription_failed"
+            await conversation.save()
+            logger.warning(
+                f"⚠️ Marked placeholder conversation {conversation_id} "
+                f"as transcription_failed (title/summary generation error). Audio is still saved."
+            )
+
         return {
             "success": False,
             "error": str(gen_error),
@@ -635,3 +875,157 @@ async def generate_title_summary_job(conversation_id: str, *, redis_client=None)
         "detailed_summary": conversation.detailed_summary,
         "processing_time_seconds": processing_time,
     }
+
+
+@async_job(redis=True, beanie=True)
+async def dispatch_conversation_complete_event_job(
+    conversation_id: str,
+    client_id: str,
+    user_id: str,
+    end_reason: Optional[str] = None,
+    *,
+    redis_client=None
+) -> Dict[str, Any]:
+    """
+    Dispatch conversation.complete plugin event for all conversation sources.
+
+    This job runs at the end of conversation processing to ensure plugins
+    receive the conversation.complete event with the correct end_reason.
+    Used by both file upload and WebSocket streaming paths.
+
+    Args:
+        conversation_id: Conversation ID
+        client_id: Client ID
+        user_id: User ID
+        end_reason: Reason the conversation ended (e.g., 'file_upload', 'websocket_disconnect', 'user_stopped')
+                   Defaults to 'file_upload' for backward compatibility
+        redis_client: Redis client (injected by decorator)
+
+    Returns:
+        Dict with success status and plugin results
+    """
+    from advanced_omi_backend.models.conversation import Conversation
+
+    logger.info(f"📌 Dispatching conversation.complete event for conversation {conversation_id}")
+
+    start_time = time.time()
+
+    # Get the conversation to include in event data
+    conversation = await Conversation.find_one(Conversation.conversation_id == conversation_id)
+    if not conversation:
+        logger.error(f"Conversation {conversation_id} not found")
+        return {"success": False, "error": "Conversation not found"}
+
+    # Save end_reason and completed_at to database if not already set
+    # This ensures end_reason is persisted before plugins receive conversation.complete event
+    if end_reason and conversation.end_reason is None:
+        try:
+            conversation.end_reason = Conversation.EndReason(end_reason)
+        except ValueError:
+            logger.warning(f"⚠️ Invalid end_reason '{end_reason}', using UNKNOWN")
+            conversation.end_reason = Conversation.EndReason.UNKNOWN
+
+        if conversation.completed_at is None:
+            conversation.completed_at = datetime.utcnow()
+
+        await conversation.save()
+        logger.info(f"💾 Saved end_reason={conversation.end_reason} to conversation {conversation_id[:12]} in event dispatch job")
+
+    # Get user email for event data
+    from advanced_omi_backend.models.user import User
+    user = await User.get(user_id)
+    user_email = user.email if user else ""
+
+    # Prepare plugin event data (same format as open_conversation_job)
+    try:
+        # Get or initialize plugin router (same pattern as transcription_jobs.py)
+        plugin_router = get_plugin_router()
+
+        if not plugin_router:
+            logger.warning("🔧 Plugin router not found in worker process - attempting initialization...")
+            plugin_router = init_plugin_router()
+
+            if plugin_router:
+                logger.info(f"🔧 Plugin router initialized with {len(plugin_router.plugins)} plugin(s)")
+
+                # Initialize all plugins
+                for plugin_id, plugin in plugin_router.plugins.items():
+                    try:
+                        logger.info(f"   Initializing plugin '{plugin_id}'...")
+                        await plugin.initialize()
+                        logger.info(f"   ✓ Plugin '{plugin_id}' initialized")
+                    except Exception as e:
+                        logger.error(f"   ✗ Failed to initialize plugin '{plugin_id}': {e}", exc_info=True)
+            else:
+                logger.error("🔧 Plugin router initialization FAILED - router is None")
+
+        # CRITICAL CHECK: Fail loudly if no router
+        if not plugin_router:
+            error_msg = (
+                f"❌ Plugin router could not be initialized in worker process. "
+                f"conversation.complete event for {conversation_id[:12]} will NOT be dispatched!"
+            )
+            logger.error(error_msg)
+
+            return {
+                "success": False,
+                "skipped": True,
+                "reason": "No plugin router",
+                "conversation_id": conversation_id,
+                "error": error_msg
+            }
+
+        plugin_data = {
+            'conversation': {
+                'client_id': client_id,
+                'user_id': user_id,
+            },
+            'transcript': conversation.transcript if conversation else "",
+            'duration': 0,  # Duration not tracked for file uploads
+            'conversation_id': conversation_id,
+        }
+
+        # Use provided end_reason or default to 'file_upload' for backward compatibility
+        actual_end_reason = end_reason or 'file_upload'
+
+        logger.info(
+            f"🔌 DISPATCH: conversation.complete event for {conversation_id[:12]} "
+            f"(end_reason={actual_end_reason}, user={user_id}, client={client_id})"
+        )
+
+        plugin_results = await plugin_router.dispatch_event(
+            event='conversation.complete',
+            user_id=user_id,
+            data=plugin_data,
+            metadata={'end_reason': actual_end_reason}
+        )
+
+        logger.info(
+            f"🔌 RESULT: conversation.complete dispatched to {len(plugin_results) if plugin_results else 0} plugins"
+        )
+        if plugin_results:
+            logger.info(f"📌 Triggered {len(plugin_results)} conversation-level plugins")
+            for result in plugin_results:
+                logger.info(f"   Plugin result: success={result.success}, message={result.message}")
+                if result.message:
+                    logger.info(f"  Plugin result: {result.message}")
+
+        processing_time = time.time() - start_time
+        logger.info(
+            f"✅ Conversation complete event dispatched for {conversation_id} in {processing_time:.2f}s"
+        )
+
+        return {
+            "success": True,
+            "conversation_id": conversation_id,
+            "plugin_count": len(plugin_results) if plugin_results else 0,
+            "processing_time_seconds": processing_time,
+        }
+
+    except Exception as e:
+        logger.warning(f"⚠️ Error dispatching conversation complete event: {e}")
+        return {
+            "success": False,
+            "error": str(e),
+            "conversation_id": conversation_id,
+        }
diff --git a/backends/advanced/src/advanced_omi_backend/workers/memory_jobs.py b/backends/advanced/src/advanced_omi_backend/workers/memory_jobs.py
index 8b64d690..94581024 100644
--- a/backends/advanced/src/advanced_omi_backend/workers/memory_jobs.py
+++ b/backends/advanced/src/advanced_omi_backend/workers/memory_jobs.py
@@ -16,6 +16,7 @@
 )
 from advanced_omi_backend.models.job import BaseRQJob, JobPriority, async_job
 from advanced_omi_backend.services.memory.base import MemoryEntry
+from advanced_omi_backend.services.plugin_service import get_plugin_router, init_plugin_router
 
 logger = logging.getLogger(__name__)
 
@@ -136,118 +137,182 @@ async def process_memory_job(conversation_id: str, *, redis_client=None) -> Dict
     if memory_result:
         success, created_memory_ids = memory_result
 
-        if success and created_memory_ids:
-            # Add memory version to conversation
-            conversation_model = await Conversation.find_one(
-                Conversation.conversation_id == conversation_id
-            )
-            if conversation_model:
-                processing_time = time.time() - start_time
-
-                # Get active transcript version for reference
-                transcript_version_id = conversation_model.active_transcript_version or "unknown"
-
-                # Determine memory provider from memory service
-                memory_provider = conversation_model.MemoryProvider.CHRONICLE  # Default
-                try:
-                    memory_service_obj = get_memory_service()
-                    provider_name = memory_service_obj.__class__.__name__
-                    if "OpenMemory" in provider_name:
-                        memory_provider = conversation_model.MemoryProvider.OPENMEMORY_MCP
-                except Exception:
-                    pass
-
-                # Create version ID for this memory extraction
-                version_id = str(uuid.uuid4())
-
-                # Add memory version with metadata
-                conversation_model.add_memory_version(
-                    version_id=version_id,
-                    memory_count=len(created_memory_ids),
-                    transcript_version_id=transcript_version_id,
-                    provider=memory_provider,
-                    processing_time_seconds=processing_time,
-                    metadata={"memory_ids": created_memory_ids},
-                    set_as_active=True,
+        if success:
+            processing_time = time.time() - start_time
+
+            # Determine memory provider from memory service
+            memory_provider = "chronicle"  # Default
+            try:
+                memory_service_obj = get_memory_service()
+                provider_name = memory_service_obj.__class__.__name__
+                if "OpenMemory" in provider_name:
+                    memory_provider = "openmemory_mcp"
+            except Exception:
+                pass
+
+            # Only create memory version if new memories were created
+            if created_memory_ids:
+                # Add memory version to conversation
+                conversation_model = await Conversation.find_one(
+                    Conversation.conversation_id == conversation_id
+                )
+                if conversation_model:
+                    # Get active transcript version for reference
+                    transcript_version_id = conversation_model.active_transcript_version or "unknown"
+
+                    # Create version ID for this memory extraction
+                    version_id = str(uuid.uuid4())
+
+                    # Add memory version with metadata
+                    conversation_model.add_memory_version(
+                        version_id=version_id,
+                        memory_count=len(created_memory_ids),
+                        transcript_version_id=transcript_version_id,
+                        provider=conversation_model.MemoryProvider.OPENMEMORY_MCP if memory_provider == "openmemory_mcp" else conversation_model.MemoryProvider.CHRONICLE,
+                        processing_time_seconds=processing_time,
+                        metadata={"memory_ids": created_memory_ids},
+                        set_as_active=True,
+                    )
+                    await conversation_model.save()
+
+                logger.info(
+                    f"✅ Completed memory processing for conversation {conversation_id} - created {len(created_memory_ids)} memories in {processing_time:.2f}s"
                 )
-                await conversation_model.save()
-
-            logger.info(
-                f"✅ Completed memory processing for conversation {conversation_id} - created {len(created_memory_ids)} memories in {processing_time:.2f}s"
-            )
 
-            # Update job metadata with memory information
-            from rq import get_current_job
-
-            current_job = get_current_job()
-            if current_job:
-                if not current_job.meta:
-                    current_job.meta = {}
-
-                # Fetch memory details to display in UI
-                memory_details = []
-                try:
-                    for memory_id in created_memory_ids[:5]:  # Limit to first 5 for display
-                        memory_entry = await memory_service.get_memory(memory_id, user_id)
-                        if memory_entry:
-                            # Handle different return types from memory service
-                            memory_text: str
-                            if isinstance(memory_entry, MemoryEntry):
-                                # MemoryEntry object with content attribute
-                                memory_text = memory_entry.content
-                            elif isinstance(memory_entry, dict):
-                                # Dictionary with "content" key
-                                if "content" in memory_entry:
-                                    memory_text = memory_entry["content"]
+                # Update job metadata with memory information
+                from rq import get_current_job
+
+                current_job = get_current_job()
+                if current_job:
+                    if not current_job.meta:
+                        current_job.meta = {}
+
+                    # Fetch memory details to display in UI
+                    memory_details = []
+                    try:
+                        for memory_id in created_memory_ids[:5]:  # Limit to first 5 for display
+                            memory_entry = await memory_service.get_memory(memory_id, user_id)
+                            if memory_entry:
+                                # Handle different return types from memory service
+                                memory_text: str
+                                if isinstance(memory_entry, MemoryEntry):
+                                    # MemoryEntry object with content attribute
+                                    memory_text = memory_entry.content
+                                elif isinstance(memory_entry, dict):
+                                    # Dictionary with "content" key
+                                    if "content" in memory_entry:
+                                        memory_text = memory_entry["content"]
+                                    else:
+                                        logger.error(
+                                            f"Dict memory entry missing 'content' key for {memory_id}: {list(memory_entry.keys())}"
+                                        )
+                                        raise ValueError(
+                                            f"Dict memory entry missing 'content' key for memory {memory_id}"
+                                        )
+                                elif isinstance(memory_entry, str):
+                                    # String content directly
+                                    memory_text = memory_entry
                                 else:
+                                    # Unexpected type
                                     logger.error(
-                                        f"Dict memory entry missing 'content' key for {memory_id}: {list(memory_entry.keys())}"
+                                        f"Unexpected memory entry type for {memory_id}: {type(memory_entry).__name__}"
                                     )
-                                    raise ValueError(
-                                        f"Dict memory entry missing 'content' key for memory {memory_id}"
+                                    raise TypeError(
+                                        f"Unexpected memory entry type: {type(memory_entry).__name__}"
                                     )
-                            elif isinstance(memory_entry, str):
-                                # String content directly
-                                memory_text = memory_entry
-                            else:
-                                # Unexpected type
-                                logger.error(
-                                    f"Unexpected memory entry type for {memory_id}: {type(memory_entry).__name__}"
-                                )
-                                raise TypeError(
-                                    f"Unexpected memory entry type: {type(memory_entry).__name__}"
-                                )
 
-                            # Truncate to 200 chars
-                            memory_details.append(
-                                {"memory_id": memory_id, "text": memory_text[:200]}
-                            )
-                except Exception as e:
-                    logger.warning(f"Failed to fetch memory details for UI: {e}")
-
-                current_job.meta.update(
-                    {
-                        "conversation_id": conversation_id,
-                        "memories_created": len(created_memory_ids),
-                        "memory_ids": created_memory_ids[:5],  # Store first 5 IDs
-                        "memory_details": memory_details,
-                        "processing_time": processing_time,
-                    }
+                                # Truncate to 200 chars
+                                memory_details.append(
+                                    {"memory_id": memory_id, "text": memory_text[:200]}
+                                )
+                    except Exception as e:
+                        logger.warning(f"Failed to fetch memory details for UI: {e}")
+
+                    current_job.meta.update(
+                        {
+                            "conversation_id": conversation_id,
+                            "memories_created": len(created_memory_ids),
+                            "memory_ids": created_memory_ids[:5],  # Store first 5 IDs
+                            "memory_details": memory_details,
+                            "processing_time": processing_time,
+                        }
+                    )
+                    current_job.save_meta()
+            else:
+                logger.info(
+                    f"ℹ️ Memory processing completed for conversation {conversation_id} - no new memories created (deduplication) in {processing_time:.2f}s"
                 )
-                current_job.save_meta()
 
             # NOTE: Listening jobs are restarted by open_conversation_job (not here)
             # This allows users to resume talking immediately after conversation closes,
             # without waiting for memory processing to complete.
 
+            # Trigger memory-level plugins (ALWAYS dispatch when success, even with 0 new memories)
+            try:
+                # Get or initialize plugin router (same pattern as conversation_jobs.py)
+                plugin_router = get_plugin_router()
+                if not plugin_router:
+                    logger.info("🔧 Initializing plugin router in worker process...")
+                    plugin_router = init_plugin_router()
+
+                    # Initialize all plugins asynchronously (same as app_factory.py)
+                    if plugin_router:
+                        for plugin_id, plugin in plugin_router.plugins.items():
+                            try:
+                                await plugin.initialize()
+                                logger.info(f"✅ Plugin '{plugin_id}' initialized")
+                            except Exception as e:
+                                logger.error(f"Failed to initialize plugin '{plugin_id}': {e}")
+
+                if plugin_router:
+                    plugin_data = {
+                        'memories': created_memory_ids or [],
+                        'conversation': {
+                            'conversation_id': conversation_id,
+                            'client_id': client_id,
+                            'user_id': user_id,
+                            'user_email': user_email,
+                        },
+                        'memory_count': len(created_memory_ids) if created_memory_ids else 0,
+                        'conversation_id': conversation_id,
+                    }
+
+                    logger.info(
+                        f"🔌 DISPATCH: memory.processed event "
+                        f"(conversation={conversation_id[:12]}, memories={len(created_memory_ids) if created_memory_ids else 0})"
+                    )
+
+                    plugin_results = await plugin_router.dispatch_event(
+                        event='memory.processed',
+                        user_id=user_id,
+                        data=plugin_data,
+                        metadata={
+                            'processing_time': processing_time,
+                            'memory_provider': memory_provider,
+                        }
+                    )
+
+                    logger.info(
+                        f"🔌 RESULT: memory.processed dispatched to {len(plugin_results) if plugin_results else 0} plugins"
+                    )
+
+                    if plugin_results:
+                        logger.info(f"📌 Triggered {len(plugin_results)} memory-level plugins")
+                        for result in plugin_results:
+                            if result.message:
+                                logger.info(f"  Plugin result: {result.message}")
+
+            except Exception as e:
+                logger.warning(f"⚠️ Error triggering memory-level plugins: {e}")
+
             return {
                 "success": True,
-                "memories_created": len(created_memory_ids),
+                "memories_created": len(created_memory_ids) if created_memory_ids else 0,
                 "processing_time": processing_time,
             }
         else:
-            # No memories created - still successful
-            return {"success": True, "memories_created": 0, "skipped": True}
+            # Memory extraction failed
+            return {"success": False, "error": "Memory extraction returned failure"}
     else:
         return {"success": False, "error": "Memory service returned False"}
 
diff --git a/backends/advanced/src/advanced_omi_backend/workers/obsidian_jobs.py b/backends/advanced/src/advanced_omi_backend/workers/obsidian_jobs.py
index 1956f00b..8c67616d 100644
--- a/backends/advanced/src/advanced_omi_backend/workers/obsidian_jobs.py
+++ b/backends/advanced/src/advanced_omi_backend/workers/obsidian_jobs.py
@@ -34,7 +34,7 @@ async def ingest_obsidian_vault_job(job_id: str, vault_path: str, redis_client=N
     logger.info("Starting Obsidian ingestion job %s", job.id)
 
     # Initialize job meta
-    job.meta["status"] = "processing"
+    job.meta["status"] = "started"
     job.meta["processed"] = 0
     job.meta["total_files"] = 0
     job.meta["errors"] = []
@@ -74,10 +74,10 @@ async def ingest_obsidian_vault_job(job_id: str, vault_path: str, redis_client=N
             # Check for cancellation
             job.refresh()
             if job.get_status() == "canceled":
-                logger.info("Obsidian ingestion job %s cancelled by user", job.id)
-                job.meta["status"] = "cancelled"
+                logger.info("Obsidian ingestion job %s canceled by user", job.id)
+                job.meta["status"] = "canceled"
                 job.save_meta()
-                return {"status": "cancelled"}
+                return {"status": "canceled"}
 
             try:
                 note_data = obsidian_service.parse_obsidian_note(root, filename, vault_path)
@@ -96,12 +96,12 @@ async def ingest_obsidian_vault_job(job_id: str, vault_path: str, redis_client=N
                 job.meta["errors"] = errors
                 job.save_meta()
 
-    job.meta["status"] = "completed"
+    job.meta["status"] = "finished"
     job.save_meta()
-    
+
     return {
-        "status": "completed", 
-        "processed": processed, 
-        "total": total, 
+        "status": "finished",
+        "processed": processed,
+        "total": total,
         "errors": errors
     }
diff --git a/backends/advanced/src/advanced_omi_backend/workers/orchestrator/__init__.py b/backends/advanced/src/advanced_omi_backend/workers/orchestrator/__init__.py
new file mode 100644
index 00000000..1c7b0d7a
--- /dev/null
+++ b/backends/advanced/src/advanced_omi_backend/workers/orchestrator/__init__.py
@@ -0,0 +1,28 @@
+"""
+Worker Orchestrator Package
+
+This package provides a Python-based orchestration system for managing
+Chronicle's worker processes, replacing the bash-based start-workers.sh script.
+
+Components:
+- config: Worker definitions and orchestrator configuration
+- worker_registry: Build worker list with conditional logic
+- process_manager: Process lifecycle management
+- health_monitor: Health checks and self-healing
+"""
+
+from .config import WorkerDefinition, OrchestratorConfig, WorkerType
+from .worker_registry import build_worker_definitions
+from .process_manager import ManagedWorker, ProcessManager, WorkerState
+from .health_monitor import HealthMonitor
+
+__all__ = [
+    "WorkerDefinition",
+    "OrchestratorConfig",
+    "WorkerType",
+    "build_worker_definitions",
+    "ManagedWorker",
+    "ProcessManager",
+    "WorkerState",
+    "HealthMonitor",
+]
diff --git a/backends/advanced/src/advanced_omi_backend/workers/orchestrator/config.py b/backends/advanced/src/advanced_omi_backend/workers/orchestrator/config.py
new file mode 100644
index 00000000..633d366a
--- /dev/null
+++ b/backends/advanced/src/advanced_omi_backend/workers/orchestrator/config.py
@@ -0,0 +1,91 @@
+"""
+Worker Orchestrator Configuration
+
+Defines data structures for worker definitions and orchestrator configuration.
+"""
+
+import os
+from dataclasses import dataclass, field
+from enum import Enum
+from typing import Optional, Callable, List
+
+
+class WorkerType(Enum):
+    """Type of worker process"""
+
+    RQ_WORKER = "rq_worker"  # RQ queue worker
+    STREAM_CONSUMER = "stream_consumer"  # Redis Streams consumer
+
+
+@dataclass
+class WorkerDefinition:
+    """
+    Definition of a single worker process.
+
+    Attributes:
+        name: Unique identifier for the worker
+        command: Full command to execute (as list for subprocess)
+        worker_type: Type of worker (RQ vs stream consumer)
+        queues: Queue names for RQ workers (empty for stream consumers)
+        enabled_check: Optional predicate function to determine if worker should start
+        restart_on_failure: Whether to automatically restart on failure
+        health_check: Optional custom health check function
+    """
+
+    name: str
+    command: List[str]
+    worker_type: WorkerType = WorkerType.RQ_WORKER
+    queues: List[str] = field(default_factory=list)
+    enabled_check: Optional[Callable[[], bool]] = None
+    restart_on_failure: bool = True
+    health_check: Optional[Callable[[], bool]] = None
+
+    def is_enabled(self) -> bool:
+        """Check if this worker should be started"""
+        if self.enabled_check is None:
+            return True
+        return self.enabled_check()
+
+
+@dataclass
+class OrchestratorConfig:
+    """
+    Global configuration for the worker orchestrator.
+
+    All settings can be overridden via environment variables.
+    """
+
+    # Redis connection
+    redis_url: str = field(
+        default_factory=lambda: os.getenv("REDIS_URL", "redis://localhost:6379/0")
+    )
+
+    # Health monitoring settings
+    check_interval: int = field(
+        default_factory=lambda: int(os.getenv("WORKER_CHECK_INTERVAL", "10"))
+    )
+    min_rq_workers: int = field(
+        default_factory=lambda: int(os.getenv("MIN_RQ_WORKERS", "6"))
+    )
+    startup_grace_period: int = field(
+        default_factory=lambda: int(os.getenv("WORKER_STARTUP_GRACE_PERIOD", "30"))
+    )
+
+    # Shutdown settings
+    shutdown_timeout: int = field(
+        default_factory=lambda: int(os.getenv("WORKER_SHUTDOWN_TIMEOUT", "30"))
+    )
+
+    # Logging
+    log_level: str = field(default_factory=lambda: os.getenv("LOG_LEVEL", "INFO"))
+
+    def __post_init__(self):
+        """Validate configuration after initialization"""
+        if self.check_interval <= 0:
+            raise ValueError("check_interval must be positive")
+        if self.min_rq_workers < 0:
+            raise ValueError("min_rq_workers must be non-negative")
+        if self.startup_grace_period < 0:
+            raise ValueError("startup_grace_period must be non-negative")
+        if self.shutdown_timeout <= 0:
+            raise ValueError("shutdown_timeout must be positive")
diff --git a/backends/advanced/src/advanced_omi_backend/workers/orchestrator/health_monitor.py b/backends/advanced/src/advanced_omi_backend/workers/orchestrator/health_monitor.py
new file mode 100644
index 00000000..310999c6
--- /dev/null
+++ b/backends/advanced/src/advanced_omi_backend/workers/orchestrator/health_monitor.py
@@ -0,0 +1,361 @@
+"""
+Health Monitor
+
+Self-healing monitor that detects and recovers from worker failures.
+Periodically checks worker health and restarts failed workers.
+"""
+
+import asyncio
+import logging
+import time
+from typing import Optional
+
+from redis import Redis
+from rq import Worker
+
+from .config import OrchestratorConfig, WorkerType
+from .process_manager import ProcessManager, WorkerState
+
+logger = logging.getLogger(__name__)
+
+
+class HealthMonitor:
+    """
+    Self-healing monitor for worker processes.
+
+    Periodically checks:
+    1. Individual worker health (process liveness)
+    2. RQ worker registration count in Redis
+
+    Automatically restarts failed workers if configured.
+    """
+
+    def __init__(
+        self,
+        process_manager: ProcessManager,
+        config: OrchestratorConfig,
+        redis_client: Redis,
+    ):
+        self.process_manager = process_manager
+        self.config = config
+        self.redis = redis_client
+        self.running = False
+        self.monitor_task: Optional[asyncio.Task] = None
+        self.start_time = time.time()
+        self.last_registration_recovery: Optional[float] = None
+        self.registration_recovery_cooldown = 60  # seconds
+
+    async def start(self):
+        """Start the health monitoring loop"""
+        if self.running:
+            logger.warning("Health monitor already running")
+            return
+
+        self.running = True
+        self.start_time = time.time()
+        logger.info(
+            f"Starting health monitor (check interval: {self.config.check_interval}s, "
+            f"grace period: {self.config.startup_grace_period}s)"
+        )
+
+        self.monitor_task = asyncio.create_task(self._monitor_loop())
+
+    async def stop(self):
+        """Stop the health monitoring loop"""
+        if not self.running:
+            return
+
+        logger.info("Stopping health monitor...")
+        self.running = False
+
+        if self.monitor_task:
+            self.monitor_task.cancel()
+            try:
+                await self.monitor_task
+            except asyncio.CancelledError:
+                pass
+
+        logger.info("Health monitor stopped")
+
+    async def _monitor_loop(self):
+        """Main monitoring loop"""
+        try:
+            while self.running:
+                # Wait for startup grace period before starting checks
+                elapsed = time.time() - self.start_time
+                if elapsed < self.config.startup_grace_period:
+                    remaining = self.config.startup_grace_period - elapsed
+                    logger.debug(
+                        f"In startup grace period - waiting {remaining:.0f}s before health checks"
+                    )
+                    await asyncio.sleep(self.config.check_interval)
+                    continue
+
+                # Perform health checks
+                await self._check_health()
+
+                # Wait for next check
+                await asyncio.sleep(self.config.check_interval)
+
+        except asyncio.CancelledError:
+            logger.info("Health monitor loop cancelled")
+            raise
+        except Exception as e:
+            logger.error(f"Health monitor loop error: {e}", exc_info=True)
+            self.running = False  # Mark monitor as stopped so callers know it's not active
+            raise  # Re-raise to ensure the monitor task fails properly
+
+    async def _check_health(self):
+        """Perform all health checks and restart failed workers"""
+        try:
+            # Check individual worker health
+            worker_health = self._check_worker_health()
+
+            # Check RQ worker registration count
+            rq_health = self._check_rq_worker_registration()
+
+            # If RQ workers lost registration, trigger bulk restart (matches old bash script behavior)
+            if not rq_health:
+                self._handle_registration_loss()
+
+            # Restart failed workers
+            self._restart_failed_workers()
+
+            # Log summary
+            if not worker_health or not rq_health:
+                logger.warning(
+                    f"Health check: worker_health={worker_health}, rq_health={rq_health}"
+                )
+
+        except Exception as e:
+            logger.error(f"Error during health check: {e}", exc_info=True)
+
+    def _check_worker_health(self) -> bool:
+        """
+        Check individual worker health.
+
+        Returns:
+            True if all workers are healthy
+        """
+        all_healthy = True
+
+        for worker in self.process_manager.get_all_workers():
+            try:
+                is_healthy = worker.check_health()
+                if not is_healthy:
+                    all_healthy = False
+                    logger.warning(
+                        f"{worker.name}: Health check failed (state={worker.state.value})"
+                    )
+            except Exception as e:
+                all_healthy = False
+                logger.error(f"{worker.name}: Health check raised exception: {e}")
+
+        return all_healthy
+
+    def _check_rq_worker_registration(self) -> bool:
+        """
+        Check RQ worker registration count in Redis.
+
+        This replicates the bash script's logic:
+        - Query Redis for all registered RQ workers
+        - Check if count >= min_rq_workers
+
+        Returns:
+            True if RQ worker count is sufficient
+        """
+        try:
+            workers = Worker.all(connection=self.redis)
+            worker_count = len(workers)
+
+            if worker_count < self.config.min_rq_workers:
+                logger.warning(
+                    f"RQ worker registration: {worker_count} workers "
+                    f"(expected >= {self.config.min_rq_workers})"
+                )
+                return False
+
+            logger.debug(f"RQ worker registration: {worker_count} workers registered")
+            return True
+
+        except Exception as e:
+            logger.error(f"Failed to check RQ worker registration: {e}")
+            return False
+
+    def _restart_failed_workers(self):
+        """Restart workers that have failed and should be restarted"""
+        for worker in self.process_manager.get_all_workers():
+            # Only restart if:
+            # 1. Worker state is FAILED
+            # 2. Worker definition has restart_on_failure=True
+            if (
+                worker.state == WorkerState.FAILED
+                and worker.definition.restart_on_failure
+            ):
+                logger.warning(
+                    f"{worker.name}: Worker failed, initiating restart "
+                    f"(restart count: {worker.restart_count})"
+                )
+
+                success = self.process_manager.restart_worker(worker.name)
+
+                if success:
+                    logger.info(
+                        f"{worker.name}: Restart successful "
+                        f"(total restarts: {worker.restart_count})"
+                    )
+                else:
+                    logger.error(f"{worker.name}: Restart failed")
+
+    def _handle_registration_loss(self):
+        """
+        Handle RQ worker registration loss.
+
+        This replicates the old bash script's self-healing behavior:
+        - Check if cooldown period has passed
+        - Restart all RQ workers (bulk restart)
+        - Update recovery timestamp
+
+        Cooldown prevents too-frequent restarts during Redis/network issues.
+        """
+        current_time = time.time()
+
+        # Check if cooldown period has passed
+        if self.last_registration_recovery is not None:
+            elapsed = current_time - self.last_registration_recovery
+            if elapsed < self.registration_recovery_cooldown:
+                remaining = self.registration_recovery_cooldown - elapsed
+                logger.debug(
+                    f"Registration recovery cooldown active - "
+                    f"waiting {remaining:.0f}s before next recovery attempt"
+                )
+                return
+
+        logger.warning(
+            "⚠️  RQ worker registration loss detected - initiating bulk restart "
+            "(replicating old start-workers.sh behavior)"
+        )
+
+        # Restart all RQ workers (this method now handles timestamp update internally)
+        success = self._restart_all_rq_workers()
+
+        if success:
+            logger.info("✅ Bulk restart completed - workers should re-register soon")
+        else:
+            logger.error("❌ Bulk restart encountered errors - check individual worker logs")
+
+    def _restart_all_rq_workers(self) -> bool:
+        """
+        Restart all RQ workers (bulk restart) with timing measurements.
+
+        This matches the old bash script's recovery mechanism:
+        - Kill all RQ workers
+        - Restart them
+        - Workers will automatically re-register with Redis on startup
+
+        Returns:
+            True if all RQ workers restarted successfully, False otherwise
+        """
+        rq_workers = [
+            worker
+            for worker in self.process_manager.get_all_workers()
+            if worker.definition.worker_type == WorkerType.RQ_WORKER
+        ]
+
+        if not rq_workers:
+            logger.warning("No RQ workers found to restart")
+            return False
+
+        # START TIMING
+        bulk_restart_start = time.time()
+        logger.warning(
+            f"⚠️  RQ worker registration lost! "
+            f"Starting bulk restart of {len(rq_workers)} workers at {time.strftime('%H:%M:%S')}"
+        )
+
+        all_success = True
+        worker_times = []  # Track individual worker restart times
+
+        for i, worker in enumerate(rq_workers, 1):
+            worker_start = time.time()
+            logger.info(
+                f"  [{i}/{len(rq_workers)}] ↻ Restarting {worker.name} at {time.strftime('%H:%M:%S')}..."
+            )
+
+            success = self.process_manager.restart_worker(worker.name)
+
+            worker_duration = time.time() - worker_start
+            worker_times.append((worker.name, worker_duration))
+
+            if success:
+                logger.info(
+                    f"  [{i}/{len(rq_workers)}] ✓ {worker.name} restarted in {worker_duration:.2f}s"
+                )
+            else:
+                logger.error(
+                    f"  [{i}/{len(rq_workers)}] ✗ {worker.name} restart failed after {worker_duration:.2f}s"
+                )
+                all_success = False
+
+        # END TIMING
+        total_duration = time.time() - bulk_restart_start
+
+        # Log timing summary
+        logger.info(f"\n⏱️  Bulk Restart Timing Summary:")
+        logger.info(f"  Total workers: {len(rq_workers)}")
+        logger.info(
+            f"  Total time: {total_duration:.2f}s ({total_duration/60:.1f} minutes)"
+        )
+        logger.info(f"  Average per worker: {total_duration/len(rq_workers):.2f}s")
+
+        if worker_times:
+            slowest = max(worker_times, key=lambda x: x[1])
+            fastest = min(worker_times, key=lambda x: x[1])
+            logger.info(f"  Slowest worker: {slowest[0]} ({slowest[1]:.2f}s)")
+            logger.info(f"  Fastest worker: {fastest[0]} ({fastest[1]:.2f}s)")
+
+        # Update recovery timestamp (moved here from _handle_registration_loss)
+        self.last_registration_recovery = time.time()
+
+        if all_success:
+            logger.info(
+                f"✅ Successfully restarted all {len(rq_workers)} RQ workers in {total_duration:.2f}s"
+            )
+        else:
+            logger.warning(
+                f"⚠️  Some workers failed to restart (took {total_duration:.2f}s total)"
+            )
+
+        return all_success
+
+    def get_health_status(self) -> dict:
+        """
+        Get current health status summary.
+
+        Returns:
+            Dictionary with health status information
+        """
+        worker_status = self.process_manager.get_status()
+
+        # Count workers by state
+        state_counts = {}
+        for status in worker_status.values():
+            state = status["state"]
+            state_counts[state] = state_counts.get(state, 0) + 1
+
+        # Check RQ worker registration
+        try:
+            rq_workers = Worker.all(connection=self.redis)
+            rq_worker_count = len(rq_workers)
+        except Exception:
+            rq_worker_count = -1  # Error indicator
+
+        return {
+            "running": self.running,
+            "uptime": time.time() - self.start_time if self.running else 0,
+            "total_workers": len(worker_status),
+            "state_counts": state_counts,
+            "rq_worker_count": rq_worker_count,
+            "min_rq_workers": self.config.min_rq_workers,
+            "rq_healthy": rq_worker_count >= self.config.min_rq_workers,
+        }
diff --git a/backends/advanced/src/advanced_omi_backend/workers/orchestrator/process_manager.py b/backends/advanced/src/advanced_omi_backend/workers/orchestrator/process_manager.py
new file mode 100644
index 00000000..5448b96f
--- /dev/null
+++ b/backends/advanced/src/advanced_omi_backend/workers/orchestrator/process_manager.py
@@ -0,0 +1,326 @@
+"""
+Process Manager
+
+Manages lifecycle of all worker processes with state tracking.
+Handles process creation, monitoring, and graceful shutdown.
+"""
+
+import logging
+import subprocess
+import time
+from enum import Enum
+from typing import Dict, List, Optional
+
+from .config import WorkerDefinition
+
+logger = logging.getLogger(__name__)
+
+
+class WorkerState(Enum):
+    """Worker process lifecycle states"""
+
+    PENDING = "pending"  # Not yet started
+    STARTING = "starting"  # Process started, waiting for health check
+    RUNNING = "running"  # Healthy and running
+    UNHEALTHY = "unhealthy"  # Running but health check failed
+    STOPPING = "stopping"  # Shutdown initiated
+    STOPPED = "stopped"  # Cleanly stopped
+    FAILED = "failed"  # Crashed or failed to start
+
+
+class ManagedWorker:
+    """
+    Wraps a single worker process with state tracking.
+
+    Attributes:
+        definition: Worker definition
+        process: Subprocess.Popen object (None if not started)
+        state: Current worker state
+        start_time: Timestamp when worker was started
+        restart_count: Number of times worker has been restarted
+        last_health_check: Timestamp of last health check
+    """
+
+    def __init__(self, definition: WorkerDefinition):
+        self.definition = definition
+        self.process: Optional[subprocess.Popen] = None
+        self.state = WorkerState.PENDING
+        self.start_time: Optional[float] = None
+        self.restart_count = 0
+        self.last_health_check: Optional[float] = None
+
+    @property
+    def name(self) -> str:
+        """Worker name"""
+        return self.definition.name
+
+    @property
+    def pid(self) -> Optional[int]:
+        """Process ID (None if not started)"""
+        return self.process.pid if self.process else None
+
+    @property
+    def is_alive(self) -> bool:
+        """Check if process is alive"""
+        if not self.process:
+            return False
+        return self.process.poll() is None
+
+    def start(self) -> bool:
+        """
+        Start the worker process.
+
+        Returns:
+            True if started successfully, False otherwise
+        """
+        if self.process and self.is_alive:
+            logger.warning(f"{self.name}: Already running (PID {self.pid})")
+            return False
+
+        try:
+            logger.info(f"{self.name}: Starting worker...")
+            logger.debug(f"{self.name}: Command: {' '.join(self.definition.command)}")
+
+            # Don't capture stdout/stderr - let it flow to container logs (Docker captures it)
+            # This prevents buffer overflow and blocking when worker output exceeds 64KB
+            # Worker logs will be visible via 'docker logs' command
+            self.process = subprocess.Popen(
+                self.definition.command,
+                stdout=None,  # Inherit from parent (goes to container stdout)
+                stderr=None,  # Inherit from parent (goes to container stderr)
+            )
+
+            self.state = WorkerState.STARTING
+            self.start_time = time.time()
+
+            logger.info(f"{self.name}: Started with PID {self.pid}")
+            return True
+
+        except Exception as e:
+            logger.error(f"{self.name}: Failed to start: {e}")
+            self.state = WorkerState.FAILED
+            return False
+
+    def stop(self, timeout: int = 30) -> bool:
+        """
+        Gracefully stop the worker process.
+
+        Args:
+            timeout: Maximum wait time in seconds
+
+        Returns:
+            True if stopped successfully, False otherwise
+        """
+        if not self.process or not self.is_alive:
+            logger.debug(f"{self.name}: Already stopped")
+            self.state = WorkerState.STOPPED
+            return True
+
+        try:
+            logger.info(f"{self.name}: Stopping worker (PID {self.pid})...")
+            self.state = WorkerState.STOPPING
+
+            # Send SIGTERM for graceful shutdown
+            self.process.terminate()
+
+            # Wait for process to exit
+            try:
+                self.process.wait(timeout=timeout)
+                logger.info(f"{self.name}: Stopped gracefully")
+                self.state = WorkerState.STOPPED
+                return True
+
+            except subprocess.TimeoutExpired:
+                # Force kill if timeout exceeded
+                logger.warning(
+                    f"{self.name}: Timeout expired, force killing (SIGKILL)..."
+                )
+                self.process.kill()
+                self.process.wait(timeout=5)
+                logger.warning(f"{self.name}: Force killed")
+                self.state = WorkerState.STOPPED
+                return True
+
+        except Exception as e:
+            logger.error(f"{self.name}: Error during shutdown: {e}")
+            self.state = WorkerState.FAILED
+            return False
+
+    def check_health(self) -> bool:
+        """
+        Check worker health.
+
+        Returns:
+            True if healthy, False otherwise
+        """
+        self.last_health_check = time.time()
+
+        # Basic liveness check
+        if not self.is_alive:
+            logger.warning(f"{self.name}: Process is not alive")
+            self.state = WorkerState.FAILED
+            return False
+
+        # Custom health check if defined
+        if self.definition.health_check:
+            try:
+                if not self.definition.health_check():
+                    logger.warning(f"{self.name}: Custom health check failed")
+                    self.state = WorkerState.UNHEALTHY
+                    return False
+            except Exception as e:
+                logger.error(f"{self.name}: Health check raised exception: {e}")
+                self.state = WorkerState.UNHEALTHY
+                return False
+
+        # Update state if currently starting
+        if self.state == WorkerState.STARTING:
+            self.state = WorkerState.RUNNING
+
+        return True
+
+
+class ProcessManager:
+    """
+    Manages all worker processes.
+
+    Provides high-level API for starting, stopping, and monitoring workers.
+    """
+
+    def __init__(self, worker_definitions: List[WorkerDefinition]):
+        self.workers: Dict[str, ManagedWorker] = {
+            defn.name: ManagedWorker(defn) for defn in worker_definitions
+        }
+        logger.info(f"ProcessManager initialized with {len(self.workers)} workers")
+
+    def start_all(self) -> bool:
+        """
+        Start all workers.
+
+        Returns:
+            True if all workers started successfully
+        """
+        logger.info("Starting all workers...")
+        success = True
+
+        for worker in self.workers.values():
+            if not worker.start():
+                success = False
+
+        if success:
+            logger.info("All workers started successfully")
+        else:
+            logger.warning("Some workers failed to start")
+
+        return success
+
+    def stop_all(self, timeout: int = 30) -> bool:
+        """
+        Stop all workers gracefully.
+
+        Args:
+            timeout: Maximum wait time per worker in seconds
+
+        Returns:
+            True if all workers stopped successfully
+        """
+        logger.info("Stopping all workers...")
+        success = True
+
+        for worker in self.workers.values():
+            if not worker.stop(timeout=timeout):
+                success = False
+
+        if success:
+            logger.info("All workers stopped successfully")
+        else:
+            logger.warning("Some workers failed to stop cleanly")
+
+        return success
+
+    def restart_worker(self, name: str, timeout: int = 30) -> bool:
+        """
+        Restart a specific worker with timing measurements.
+
+        Args:
+            name: Worker name
+            timeout: Maximum wait time for shutdown in seconds
+
+        Returns:
+            True if restarted successfully
+        """
+        worker = self.workers.get(name)
+        if not worker:
+            logger.error(f"Worker '{name}' not found")
+            return False
+
+        restart_start = time.time()
+        logger.info(f"{name}: Starting restart at {time.strftime('%H:%M:%S')}")
+
+        # STOP phase with timing
+        stop_start = time.time()
+        stop_success = worker.stop(timeout=timeout)
+        stop_duration = time.time() - stop_start
+
+        if not stop_success:
+            logger.error(
+                f"{name}: Failed to stop cleanly after {stop_duration:.2f}s "
+                f"(timeout was {timeout}s), restart aborted"
+            )
+            worker.state = WorkerState.FAILED
+            return False
+
+        logger.info(
+            f"{name}: Stopped in {stop_duration:.2f}s (timeout was {timeout}s)"
+        )
+
+        # START phase with timing
+        start_start = time.time()
+        success = worker.start()
+        start_duration = time.time() - start_start
+
+        total_restart_time = time.time() - restart_start
+
+        if success:
+            worker.restart_count += 1
+            logger.info(
+                f"{name}: Restart #{worker.restart_count} successful "
+                f"(stop: {stop_duration:.2f}s, start: {start_duration:.2f}s, total: {total_restart_time:.2f}s)"
+            )
+        else:
+            logger.error(
+                f"{name}: Restart failed after {total_restart_time:.2f}s "
+                f"(stop: {stop_duration:.2f}s, start attempt: {start_duration:.2f}s)"
+            )
+
+        return success
+
+    def get_status(self) -> Dict[str, Dict]:
+        """
+        Get detailed status of all workers.
+
+        Returns:
+            Dictionary mapping worker name to status info
+        """
+        status = {}
+
+        for name, worker in self.workers.items():
+            status[name] = {
+                "pid": worker.pid,
+                "state": worker.state.value,
+                "is_alive": worker.is_alive,
+                "restart_count": worker.restart_count,
+                "start_time": worker.start_time,
+                "last_health_check": worker.last_health_check,
+                "queues": worker.definition.queues,
+            }
+
+        return status
+
+    def get_worker(self, name: str) -> Optional[ManagedWorker]:
+        """Get worker by name"""
+        return self.workers.get(name)
+
+    def get_all_workers(self) -> List[ManagedWorker]:
+        """Get all workers"""
+        return list(self.workers.values())
diff --git a/backends/advanced/src/advanced_omi_backend/workers/orchestrator/worker_registry.py b/backends/advanced/src/advanced_omi_backend/workers/orchestrator/worker_registry.py
new file mode 100644
index 00000000..a5cf4b74
--- /dev/null
+++ b/backends/advanced/src/advanced_omi_backend/workers/orchestrator/worker_registry.py
@@ -0,0 +1,137 @@
+"""
+Worker Registry
+
+Builds the complete list of worker definitions with conditional logic.
+Reuses model_registry.py for config.yml parsing.
+"""
+
+import os
+import logging
+from typing import List
+
+from .config import WorkerDefinition, WorkerType
+
+logger = logging.getLogger(__name__)
+
+
+def has_streaming_stt_configured() -> bool:
+    """
+    Check if streaming STT provider is configured in config.yml.
+
+    Returns:
+        True if defaults.stt_stream is configured, False otherwise
+
+    Note: Batch STT is handled by RQ workers in transcription_jobs.py,
+          no separate worker needed.
+    """
+    try:
+        from advanced_omi_backend.model_registry import get_models_registry
+
+        registry = get_models_registry()
+        if registry and registry.defaults:
+            stt_stream_model = registry.get_default("stt_stream")
+            return stt_stream_model is not None
+    except Exception as e:
+        logger.warning(f"Failed to read streaming STT config from config.yml: {e}")
+
+    return False
+
+
+def build_worker_definitions() -> List[WorkerDefinition]:
+    """
+    Build the complete list of worker definitions.
+
+    Returns:
+        List of WorkerDefinition objects, including conditional workers
+    """
+    workers = []
+
+    # 6x RQ Workers - Multi-queue workers (transcription, memory, default)
+    for i in range(1, 7):
+        workers.append(
+            WorkerDefinition(
+                name=f"rq-worker-{i}",
+                command=[
+                    "uv",
+                    "run",
+                    "python",
+                    "-m",
+                    "advanced_omi_backend.workers.rq_worker_entry",
+                    "transcription",
+                    "memory",
+                    "default",
+                ],
+                worker_type=WorkerType.RQ_WORKER,
+                queues=["transcription", "memory", "default"],
+                restart_on_failure=True,
+            )
+        )
+
+    # Audio Persistence Workers - Single-queue workers (audio queue)
+    # Multiple workers allow concurrent audio persistence for multiple sessions
+    for i in range(1, 4):  # 3 audio workers
+        workers.append(
+            WorkerDefinition(
+                name=f"audio-persistence-{i}",
+                command=[
+                    "uv",
+                    "run",
+                    "python",
+                    "-m",
+                    "advanced_omi_backend.workers.rq_worker_entry",
+                    "audio",
+                ],
+                worker_type=WorkerType.RQ_WORKER,
+                queues=["audio"],
+                restart_on_failure=True,
+            )
+        )
+
+    # Streaming STT Worker - Conditional (if streaming STT is configured in config.yml)
+    # This worker uses the registry-driven streaming provider (RegistryStreamingTranscriptionProvider)
+    # Batch transcription happens via RQ jobs in transcription_jobs.py (already uses registry provider)
+    workers.append(
+        WorkerDefinition(
+            name="streaming-stt",
+            command=[
+                "uv",
+                "run",
+                "python",
+                "-m",
+                "advanced_omi_backend.workers.audio_stream_worker",
+            ],
+            worker_type=WorkerType.STREAM_CONSUMER,
+            enabled_check=has_streaming_stt_configured,
+            restart_on_failure=True,
+        )
+    )
+
+    # Log worker configuration
+    try:
+        from advanced_omi_backend.model_registry import get_models_registry
+        registry = get_models_registry()
+        if registry:
+            stt_stream = registry.get_default("stt_stream")
+            stt_batch = registry.get_default("stt")
+            if stt_stream:
+                logger.info(f"Streaming STT configured: {stt_stream.name} ({stt_stream.model_provider})")
+            if stt_batch:
+                logger.info(f"Batch STT configured: {stt_batch.name} ({stt_batch.model_provider}) - handled by RQ workers")
+    except Exception as e:
+        logger.warning(f"Failed to log STT configuration: {e}")
+
+    enabled_workers = [w for w in workers if w.is_enabled()]
+    disabled_workers = [w for w in workers if not w.is_enabled()]
+
+    logger.info(f"Total workers configured: {len(workers)}")
+    logger.info(f"Enabled workers: {len(enabled_workers)}")
+    logger.info(
+        f"Enabled worker names: {', '.join([w.name for w in enabled_workers])}"
+    )
+
+    if disabled_workers:
+        logger.info(
+            f"Disabled workers: {', '.join([w.name for w in disabled_workers])}"
+        )
+
+    return enabled_workers
diff --git a/backends/advanced/src/advanced_omi_backend/workers/speaker_jobs.py b/backends/advanced/src/advanced_omi_backend/workers/speaker_jobs.py
index 066d05c5..d9adbada 100644
--- a/backends/advanced/src/advanced_omi_backend/workers/speaker_jobs.py
+++ b/backends/advanced/src/advanced_omi_backend/workers/speaker_jobs.py
@@ -7,10 +7,16 @@
 import asyncio
 import logging
 import time
-from typing import Dict, Any
+from typing import Any, Dict
 
+from advanced_omi_backend.auth import generate_jwt_for_user
+from advanced_omi_backend.models.conversation import Conversation
 from advanced_omi_backend.models.job import async_job
-from advanced_omi_backend.controllers.queue_controller import transcription_queue
+from advanced_omi_backend.services.audio_stream import (
+    TranscriptionResultsAggregator,
+)
+from advanced_omi_backend.speaker_recognition_client import SpeakerRecognitionClient
+from advanced_omi_backend.users import get_user_by_id
 
 logger = logging.getLogger(__name__)
 
@@ -37,8 +43,6 @@ async def check_enrolled_speakers_job(
     Returns:
         Dict with enrolled_present, identified_speakers, and speaker_result
     """
-    from advanced_omi_backend.services.audio_stream import TranscriptionResultsAggregator
-    from advanced_omi_backend.speaker_recognition_client import SpeakerRecognitionClient
 
     logger.info(f"🎤 Starting enrolled speaker check for session {session_id[:12]}")
 
@@ -64,7 +68,24 @@ async def check_enrolled_speakers_job(
         error_message = speaker_result.get("message", "Unknown error")
         logger.error(f"🎤 [SPEAKER CHECK] Speaker service error: {error_type} - {error_message}")
 
-        # Fail the job - don't create conversation if speaker service failed
+        # For connection failures, assume no enrolled speakers but allow conversation to proceed
+        # Speaker filtering is optional - if service is down, conversation should still be created
+        if error_type in ("connection_failed", "timeout", "client_error"):
+            logger.warning(
+                f"⚠️ Speaker service unavailable ({error_type}), assuming no enrolled speakers. "
+                f"Conversation will proceed normally."
+            )
+            return {
+                "success": True,
+                "session_id": session_id,
+                "speaker_service_unavailable": True,
+                "enrolled_present": False,
+                "identified_speakers": [],
+                "skip_reason": f"Speaker service unavailable: {error_type}",
+                "processing_time_seconds": time.time() - start_time
+            }
+
+        # For other processing errors, also assume no enrolled speakers
         return {
             "success": False,
             "session_id": session_id,
@@ -98,7 +119,6 @@ async def check_enrolled_speakers_job(
             current_job.meta = {}
         current_job.meta.update({
             "session_id": session_id,
-            "audio_uuid": session_id,
             "client_id": client_id,
             "enrolled_present": enrolled_present,
             "identified_speakers": identified_speakers,
@@ -121,9 +141,8 @@ async def check_enrolled_speakers_job(
 async def recognise_speakers_job(
     conversation_id: str,
     version_id: str,
-    audio_path: str,
-    transcript_text: str,
-    words: list,
+    transcript_text: str = "",
+    words: list = None,
     *,
     redis_client=None
 ) -> Dict[str, Any]:
@@ -131,23 +150,21 @@ async def recognise_speakers_job(
     RQ job function for identifying speakers in a transcribed conversation.
 
     This job runs after transcription and:
-    1. Calls speaker recognition service to identify speakers
-    2. Updates the transcript version with identified speaker labels
-    3. Returns results for downstream jobs (memory)
+    1. Reconstructs audio from MongoDB chunks
+    2. Calls speaker recognition service to identify speakers
+    3. Updates the transcript version with identified speaker labels
+    4. Returns results for downstream jobs (memory)
 
     Args:
         conversation_id: Conversation ID
         version_id: Transcript version ID to update
-        audio_path: Path to audio file
-        transcript_text: Transcript text from transcription job
-        words: Word-level timing data from transcription job
+        transcript_text: Transcript text from transcription job (optional, reads from DB if empty)
+        words: Word-level timing data from transcription job (optional, reads from DB if empty)
         redis_client: Redis client (injected by decorator)
 
     Returns:
         Dict with processing results
     """
-    from advanced_omi_backend.models.conversation import Conversation
-    from advanced_omi_backend.speaker_recognition_client import SpeakerRecognitionClient
 
     logger.info(f"🎤 RQ: Starting speaker recognition for conversation {conversation_id}")
 
@@ -162,10 +179,6 @@ async def recognise_speakers_job(
     # Get user_id from conversation
     user_id = conversation.user_id
 
-    # Use the provided audio path
-    actual_audio_path = audio_path
-    logger.info(f"📁 Using audio for speaker recognition: {audio_path}")
-
     # Find the transcript version to update
     transcript_version = None
     for version in conversation.transcript_versions:
@@ -189,36 +202,109 @@ async def recognise_speakers_job(
             "processing_time_seconds": 0
         }
 
-    # Call speaker recognition service
-    try:
-        logger.info(f"🎤 Calling speaker recognition service...")
+    # Read transcript text and words from the transcript version
+    # (Parameters may be empty if called via job dependency)
+    actual_transcript_text = transcript_text or transcript_version.transcript or ""
+    actual_words = words if words else []
+
+    # If words not provided as parameter, read from version.words field (standardized location)
+    if not actual_words and transcript_version.words:
+        # Convert Word objects to dicts for speaker service API
+        actual_words = [
+            {
+                "word": w.word,
+                "start": w.start,
+                "end": w.end,
+                "confidence": w.confidence
+            }
+            for w in transcript_version.words
+        ]
+        logger.info(f"🔤 Loaded {len(actual_words)} words from transcript version.words field")
+    # Backward compatibility: Fall back to metadata if words field is empty (old data)
+    elif not actual_words and transcript_version.metadata.get("words"):
+        actual_words = transcript_version.metadata.get("words", [])
+        logger.info(f"🔤 Loaded {len(actual_words)} words from transcript version metadata (legacy)")
+    # Backward compatibility: Extract from segments if that's all we have (old streaming data)
+    elif not actual_words and transcript_version.segments:
+        for segment in transcript_version.segments:
+            if segment.words:
+                for w in segment.words:
+                    actual_words.append({
+                        "word": w.word,
+                        "start": w.start,
+                        "end": w.end,
+                        "confidence": w.confidence
+                    })
+        if actual_words:
+            logger.info(f"🔤 Extracted {len(actual_words)} words from segments (legacy)")
+
+    if not actual_transcript_text:
+        logger.warning(f"🎤 No transcript text found in version {version_id}")
+        return {
+            "success": False,
+            "conversation_id": conversation_id,
+            "version_id": version_id,
+            "error": "No transcript text available",
+            "processing_time_seconds": 0
+        }
+
+    if not actual_words:
+        logger.warning(f"🎤 No words found in version {version_id}")
+        return {
+            "success": False,
+            "conversation_id": conversation_id,
+            "version_id": version_id,
+            "error": "No word-level timing data available",
+            "processing_time_seconds": 0
+        }
 
-        # Read transcript text and words from the transcript version
-        # (Parameters may be empty if called via job dependency)
-        actual_transcript_text = transcript_text or transcript_version.transcript or ""
-        actual_words = words if words else []
+    transcript_data = {
+        "text": actual_transcript_text,
+        "words": actual_words
+    }
 
-        # If words not provided, we need to get them from metadata
-        if not actual_words and transcript_version.metadata:
-            actual_words = transcript_version.metadata.get("words", [])
+    # Generate backend token for speaker service to fetch audio
+    # Speaker service will check conversation duration and decide
+    # whether to chunk based on its own memory constraints
 
-        if not actual_transcript_text:
-            logger.warning(f"🎤 No transcript text found in version {version_id}")
+    # Get user details for token generation
+    try:
+        user = await get_user_by_id(user_id)
+        if not user:
+            logger.error(f"User {user_id} not found for token generation")
             return {
                 "success": False,
                 "conversation_id": conversation_id,
                 "version_id": version_id,
-                "error": "No transcript text available",
-                "processing_time_seconds": 0
+                "error": "User not found",
+                "processing_time_seconds": time.time() - start_time
             }
 
-        transcript_data = {
-            "text": actual_transcript_text,
-            "words": actual_words
+        backend_token = generate_jwt_for_user(user_id, user.email)
+        logger.info(f"🔐 Generated backend token for speaker service")
+
+    except Exception as token_error:
+        logger.error(f"Failed to generate backend token: {token_error}", exc_info=True)
+        return {
+            "success": False,
+            "conversation_id": conversation_id,
+            "version_id": version_id,
+            "error": f"Token generation failed: {token_error}",
+            "processing_time_seconds": time.time() - start_time
         }
 
+    # Call speaker recognition service with conversation_id
+    # Speaker service will:
+    # 1. Fetch conversation metadata to check duration
+    # 2. Decide whether to chunk based on its MAX_DIARIZE_DURATION setting
+    # 3. Request audio segments via backend API as needed
+    # 4. Return merged speaker segments
+    logger.info(f"🎤 Calling speaker recognition service with conversation_id...")
+
+    try:
         speaker_result = await speaker_client.diarize_identify_match(
-            audio_path=actual_audio_path,
+            conversation_id=conversation_id,
+            backend_token=backend_token,
             transcript_data=transcript_data,
             user_id=user_id
         )
@@ -229,20 +315,61 @@ async def recognise_speakers_job(
             error_message = speaker_result.get("message", "Unknown error")
             logger.error(f"🎤 Speaker recognition service error: {error_type} - {error_message}")
 
-            # Raise exception for connection failures so dependent jobs are canceled
-            # This ensures RQ marks the job as "failed" instead of "completed"
+            # Connection/timeout errors → skip gracefully (existing behavior)
             if error_type in ("connection_failed", "timeout", "client_error"):
-                raise RuntimeError(f"Speaker recognition service unavailable: {error_type} - {error_message}")
-
-            # For other errors (e.g., processing errors), return error dict without failing
-            return {
-                "success": False,
-                "conversation_id": conversation_id,
-                "version_id": version_id,
-                "error": f"Speaker recognition failed: {error_type}",
-                "error_details": error_message,
-                "processing_time_seconds": time.time() - start_time
-            }
+                logger.warning(
+                    f"⚠️ Speaker service unavailable ({error_type}), skipping speaker recognition. "
+                    f"Downstream jobs (memory, title/summary, events) will proceed normally."
+                )
+                return {
+                    "success": True,  # Allow pipeline to continue
+                    "conversation_id": conversation_id,
+                    "version_id": version_id,
+                    "speaker_recognition_enabled": True,
+                    "speaker_service_unavailable": True,
+                    "identified_speakers": [],
+                    "skip_reason": f"Speaker service unavailable: {error_type}",
+                    "error_type": error_type,
+                    "processing_time_seconds": time.time() - start_time
+                }
+
+            # Validation errors → fail job, don't retry
+            elif error_type == "validation_error":
+                logger.error(f"❌ Speaker service validation error: {error_message}")
+                return {
+                    "success": False,
+                    "conversation_id": conversation_id,
+                    "version_id": version_id,
+                    "error": f"Validation error: {error_message}",
+                    "error_type": error_type,
+                    "retryable": False,  # Don't retry validation errors
+                    "processing_time_seconds": time.time() - start_time
+                }
+
+            # Resource errors → fail job, can retry later
+            elif error_type == "resource_error":
+                logger.error(f"❌ Speaker service resource error: {error_message}")
+                return {
+                    "success": False,
+                    "conversation_id": conversation_id,
+                    "version_id": version_id,
+                    "error": f"Resource error: {error_message}",
+                    "error_type": error_type,
+                    "retryable": True,  # Can retry later when resources available
+                    "processing_time_seconds": time.time() - start_time
+                }
+
+            # Unknown errors → fail job
+            else:
+                return {
+                    "success": False,
+                    "conversation_id": conversation_id,
+                    "version_id": version_id,
+                    "error": f"Speaker recognition failed: {error_type}",
+                    "error_details": error_message,
+                    "error_type": error_type,
+                    "processing_time_seconds": time.time() - start_time
+                }
 
         # Service worked but found no segments (legitimate empty result)
         if not speaker_result or "segments" not in speaker_result or not speaker_result["segments"]:
@@ -264,21 +391,43 @@ async def recognise_speakers_job(
         updated_segments = []
         empty_segment_count = 0
         for seg in speaker_segments:
-            segment_text = seg.get("text", "").strip()
+            # FIX: More robust empty segment detection
+            text = seg.get("text", "").strip()
+
+            # Skip segments with no text, whitespace-only, or very short
+            if not text or len(text) < 3:
+                empty_segment_count += 1
+                logger.debug(f"Filtered empty/short segment: text='{text}'")
+                continue
 
-            # Skip segments with no text
-            if not segment_text:
+            # Skip segments with invalid structure
+            if not isinstance(seg.get("start"), (int, float)) or not isinstance(seg.get("end"), (int, float)):
                 empty_segment_count += 1
+                logger.debug(f"Filtered segment with invalid timing: {seg}")
                 continue
 
             speaker_name = seg.get("identified_as") or seg.get("speaker", "Unknown")
+
+            # Extract words from speaker service response (already matched to this segment)
+            words_data = seg.get("words", [])
+            segment_words = [
+                Conversation.Word(
+                    word=w.get("word", ""),
+                    start=w.get("start", 0.0),
+                    end=w.get("end", 0.0),
+                    confidence=w.get("confidence")
+                )
+                for w in words_data
+            ]
+
             updated_segments.append(
                 Conversation.SpeakerSegment(
                     start=seg.get("start", 0),
                     end=seg.get("end", 0),
-                    text=segment_text,
+                    text=text,
                     speaker=speaker_name,
-                    confidence=seg.get("confidence")
+                    confidence=seg.get("confidence"),
+                    words=segment_words  # Use words from speaker service
                 )
             )
 
@@ -322,6 +471,30 @@ async def recognise_speakers_job(
             "processing_time_seconds": processing_time
         }
 
+    except asyncio.TimeoutError as e:
+        logger.error(f"❌ Speaker recognition timeout: {e}")
+
+        # Add timeout metadata to job
+        from rq import get_current_job
+        current_job = get_current_job()
+        if current_job:
+            current_job.meta.update({
+                "error_type": "timeout",
+                "audio_duration": conversation.audio_total_duration if conversation else None,
+                "timeout_occurred_at": time.time()
+            })
+            current_job.save_meta()
+
+        return {
+            "success": False,
+            "conversation_id": conversation_id,
+            "version_id": version_id,
+            "error": "Speaker recognition timeout",
+            "error_type": "timeout",
+            "audio_duration": conversation.audio_total_duration if conversation else None,
+            "processing_time_seconds": time.time() - start_time
+        }
+
     except Exception as speaker_error:
         logger.error(f"❌ Speaker recognition failed: {speaker_error}")
         import traceback
diff --git a/backends/advanced/src/advanced_omi_backend/workers/transcription_jobs.py b/backends/advanced/src/advanced_omi_backend/workers/transcription_jobs.py
index c9216d4f..0ce9d77e 100644
--- a/backends/advanced/src/advanced_omi_backend/workers/transcription_jobs.py
+++ b/backends/advanced/src/advanced_omi_backend/workers/transcription_jobs.py
@@ -8,17 +8,32 @@
 import os
 import logging
 import time
+import uuid
+from datetime import datetime
+from pathlib import Path
 from typing import Dict, Any
+from rq import get_current_job
+from rq.job import Job
+from rq.exceptions import NoSuchJobError
 
 from advanced_omi_backend.models.job import JobPriority, BaseRQJob, async_job
+from advanced_omi_backend.models.conversation import Conversation
+from advanced_omi_backend.models.audio_chunk import AudioChunkDocument
+from beanie.operators import In
 
 from advanced_omi_backend.controllers.queue_controller import (
     transcription_queue,
     redis_conn,
     JOB_RESULT_TTL,
     REDIS_URL,
+    start_post_conversation_jobs,
 )
 from advanced_omi_backend.utils.conversation_utils import analyze_speech, mark_conversation_deleted
+from advanced_omi_backend.utils.audio_chunk_utils import reconstruct_wav_from_conversation, convert_audio_to_chunks
+from advanced_omi_backend.services.plugin_service import get_plugin_router
+from advanced_omi_backend.services.transcription import get_transcription_provider, is_transcription_available
+from advanced_omi_backend.services.audio_stream import TranscriptionResultsAggregator
+from advanced_omi_backend.config import get_backend_config
 
 logger = logging.getLogger(__name__)
 
@@ -123,8 +138,6 @@ def get_speaker_at_time(timestamp: float, speaker_segments: list) -> str:
 @async_job(redis=True, beanie=True)
 async def transcribe_full_audio_job(
     conversation_id: str,
-    audio_uuid: str,
-    audio_path: str,
     version_id: str,
     trigger: str = "reprocess",
     *,
@@ -134,17 +147,16 @@ async def transcribe_full_audio_job(
     RQ job function for transcribing full audio to text (transcription only, no speaker recognition).
 
     This job:
-    1. Transcribes audio to text with generic speaker labels (Speaker 0, Speaker 1, etc.)
-    2. Generates title and summary
-    3. Saves transcript version to conversation
-    4. Returns results for downstream jobs (speaker recognition, memory)
+    1. Reconstructs audio from MongoDB chunks
+    2. Transcribes audio to text with generic speaker labels (Speaker 0, Speaker 1, etc.)
+    3. Generates title and summary
+    4. Saves transcript version to conversation
+    5. Returns results for downstream jobs (speaker recognition, memory)
 
     Speaker recognition is handled by a separate job (recognise_speakers_job).
 
     Args:
         conversation_id: Conversation ID
-        audio_uuid: Audio UUID (unused but kept for compatibility)
-        audio_path: Path to audio file
         version_id: Version ID for new transcript
         trigger: Trigger source
         redis_client: Redis client (injected by decorator)
@@ -152,10 +164,6 @@ async def transcribe_full_audio_job(
     Returns:
         Dict with processing results including transcript data for next job
     """
-    from pathlib import Path
-    from advanced_omi_backend.services.transcription import get_transcription_provider
-    from advanced_omi_backend.models.conversation import Conversation
-
     logger.info(
         f"🔄 RQ: Starting transcript processing for conversation {conversation_id} (trigger: {trigger})"
     )
@@ -167,9 +175,9 @@ async def transcribe_full_audio_job(
     if not conversation:
         raise ValueError(f"Conversation {conversation_id} not found")
 
-    # Use the provided audio path
-    actual_audio_path = audio_path
-    logger.info(f"📁 Using audio for transcription: {audio_path}")
+    # Extract user_id and client_id for plugin context
+    user_id = str(conversation.user_id) if conversation.user_id else None
+    client_id = conversation.client_id if hasattr(conversation, 'client_id') else None
 
     # Get the transcription provider
     provider = get_transcription_provider(mode="batch")
@@ -179,19 +187,31 @@ async def transcribe_full_audio_job(
     provider_name = provider.name
     logger.info(f"Using transcription provider: {provider_name}")
 
-    # Read the audio file
-    audio_file_path = Path(actual_audio_path)
-    if not audio_file_path.exists():
-        raise FileNotFoundError(f"Audio file not found: {actual_audio_path}")
+    # Reconstruct audio from MongoDB chunks
+    logger.info(f"📦 Reconstructing audio from MongoDB chunks for conversation {conversation_id}")
 
-    # Load audio data
-    with open(audio_file_path, "rb") as f:
-        audio_data = f.read()
+    try:
+        # Reconstruct WAV from MongoDB chunks (already in memory as bytes)
+        wav_data = await reconstruct_wav_from_conversation(conversation_id)
 
-    # Transcribe the audio (assume 16kHz sample rate)
-    transcription_result = await provider.transcribe(
-        audio_data=audio_data, sample_rate=16000, diarize=True
-    )
+        logger.info(
+            f"📦 Reconstructed audio from MongoDB chunks: "
+            f"{len(wav_data) / 1024 / 1024:.2f} MB"
+        )
+
+        # Transcribe the audio directly from memory (no disk I/O needed)
+        transcription_result = await provider.transcribe(
+            audio_data=wav_data,  # Pass bytes directly, already in memory
+            sample_rate=16000,
+            diarize=True
+        )
+
+    except ValueError as e:
+        # No chunks found for conversation
+        raise FileNotFoundError(f"No audio chunks found for conversation {conversation_id}: {e}")
+    except Exception as e:
+        logger.error(f"Failed to reconstruct audio from MongoDB: {e}", exc_info=True)
+        raise RuntimeError(f"Audio reconstruction failed: {e}")
 
     # Extract results
     transcript_text = transcription_result.get("text", "")
@@ -202,6 +222,69 @@ async def transcribe_full_audio_job(
         f"📊 Transcription complete: {len(transcript_text)} chars, {len(segments)} segments, {len(words)} words"
     )
 
+    # Trigger transcript-level plugins BEFORE speech validation
+    # This ensures wake-word commands execute even if conversation gets deleted
+    logger.info(f"🔍 DEBUG: About to trigger plugins - transcript_text exists: {bool(transcript_text)}")
+    if transcript_text:
+        try:
+            from advanced_omi_backend.services.plugin_service import init_plugin_router
+
+            # Initialize plugin router if not already initialized (worker context)
+            plugin_router = get_plugin_router()
+            logger.info(f"🔍 DEBUG: Plugin router from service: {plugin_router is not None}")
+
+            if not plugin_router:
+                logger.info("🔧 Initializing plugin router in worker process...")
+                plugin_router = init_plugin_router()
+                logger.info(f"🔧 After init, plugin_router: {plugin_router is not None}, plugins count: {len(plugin_router.plugins) if plugin_router else 0}")
+
+                # Initialize async plugins
+                if plugin_router:
+                    for plugin_id, plugin in plugin_router.plugins.items():
+                        try:
+                            await plugin.initialize()
+                            logger.info(f"✅ Plugin '{plugin_id}' initialized in worker")
+                        except Exception as e:
+                            logger.exception(f"Failed to initialize plugin '{plugin_id}' in worker: {e}")
+
+            logger.info(f"🔍 DEBUG: Plugin router final check: {plugin_router is not None}, has {len(plugin_router.plugins) if plugin_router else 0} plugins")
+
+            if plugin_router:
+                logger.info(f"🔍 DEBUG: Preparing to trigger transcript plugins for conversation {conversation_id}")
+                plugin_data = {
+                    'transcript': transcript_text,
+                    'segment_id': f"{conversation_id}_batch",
+                    'conversation_id': conversation_id,
+                    'segments': segments,
+                    'word_count': len(words),
+                }
+
+                logger.info(
+                    f"🔌 DISPATCH: transcript.batch event "
+                    f"(conversation={conversation_id[:12]}, words={len(words)})"
+                )
+
+                plugin_results = await plugin_router.dispatch_event(
+                    event='transcript.batch',
+                    user_id=user_id,
+                    data=plugin_data,
+                    metadata={'client_id': client_id}
+                )
+
+                logger.info(
+                    f"🔌 RESULT: transcript.batch dispatched to {len(plugin_results) if plugin_results else 0} plugins"
+                )
+
+                if plugin_results:
+                    logger.info(f"✅ Triggered {len(plugin_results)} transcript plugins in batch mode")
+                    for result in plugin_results:
+                        if result.message:
+                            logger.info(f"  Plugin: {result.message}")
+        except Exception as e:
+            logger.exception(f"⚠️ Error triggering transcript plugins in batch mode: {e}")
+
+    logger.info(f"🔍 DEBUG: Plugin processing complete, moving to speech validation")
+
     # Validate meaningful speech BEFORE any further processing
     transcript_data = {"text": transcript_text, "words": words}
     speech_analysis = analyze_speech(transcript_data)
@@ -219,9 +302,7 @@ async def transcribe_full_audio_job(
         )
 
         # Cancel all dependent jobs (cropping, speaker recognition, memory, title/summary)
-        from rq import get_current_job
-        from rq.job import Job
-
+        # Note: get_current_job and Job are already imported at module level
         current_job = get_current_job()
         if current_job:
             # Get all jobs that depend on this transcription job
@@ -250,7 +331,10 @@ async def transcribe_full_audio_job(
                             cancelled_jobs.append(job_id)
                             logger.info(f"✅ Cancelled dependent job: {job_id}")
                     except Exception as e:
-                        logger.debug(f"Job {job_id} not found or already completed: {e}")
+                        if isinstance(e, NoSuchJobError):
+                            logger.debug(f"Job {job_id} hash not found (likely already completed or expired)")
+                        else:
+                            logger.debug(f"Job {job_id} not found or already completed: {e}")
 
                 if cancelled_jobs:
                     logger.info(
@@ -278,79 +362,62 @@ async def transcribe_full_audio_job(
     # Calculate processing time (transcription only)
     processing_time = time.time() - start_time
 
-    # Convert segments to SpeakerSegment objects
-    speaker_segments = []
-
-    if segments:
-        # Use provided segments
-        for seg in segments:
-            # Use identified_as if available (from speaker recognition), otherwise use speaker label
-            speaker_id = seg.get("identified_as") or seg.get("speaker", "Unknown")
-            # Convert speaker ID to string if it's an integer (Deepgram returns int speaker IDs)
-            speaker_name = f"Speaker {speaker_id}" if isinstance(speaker_id, int) else speaker_id
-
-            speaker_segments.append(
-                Conversation.SpeakerSegment(
-                    start=seg.get("start", 0),
-                    end=seg.get("end", 0),
-                    text=seg.get("text", ""),
-                    speaker=speaker_name,
-                    confidence=seg.get("confidence"),
-                )
-            )
-    elif transcript_text:
-        # NOTE: Parakeet falls here.
-        # If no segments but we have text, create a single segment from the full transcript
-        # Calculate duration from words if available, otherwise estimate from audio
-        start_time_seg = 0.0
-        end_time_seg = 0.0
-
-        if words:
-            # Use word timestamps if available
-            start_times = [w.get("start", 0) for w in words if "start" in w]
-            end_times = [w.get("end", 0) for w in words if "end" in w]
-            if start_times:
-                start_time_seg = min(start_times)
-            if end_times:
-                end_time_seg = max(end_times)
-        else:
-            # Estimate duration: assume ~150 words per minute, or use audio file duration
-            # For now, use a default duration if we can't calculate it
-            end_time_seg = len(transcript_text.split()) * 0.4  # Rough estimate: 0.4s per word
+    # Check if we should use provider segments as fallback
+    transcription_config = get_backend_config('transcription')
+    use_provider_segments = transcription_config.get('use_provider_segments', False)
 
-        speaker_segments.append(
+    # If flag enabled and provider returned segments, use them
+    # Otherwise, speaker service will create segments via diarization
+    if use_provider_segments and segments:
+        # Convert dict segments to SpeakerSegment objects
+        speaker_segments = [
             Conversation.SpeakerSegment(
-                start=start_time_seg,
-                end=end_time_seg if end_time_seg > start_time_seg else start_time_seg + 1.0,
-                text=transcript_text,
-                speaker="Unknown",
-                confidence=None,
+                speaker=str(seg.get("speaker", "0")),  # Convert to string for Pydantic validation
+                start=seg.get("start", 0.0),
+                end=seg.get("end", 0.0),
+                text=seg.get("text", "")
             )
+            for seg in segments
+        ]
+        logger.info(
+            f"✅ Using {len(speaker_segments)} segments from transcription provider "
+            f"(use_provider_segments=true)"
         )
+    else:
+        speaker_segments = []
         logger.info(
-            f"📊 Created single segment from transcript text (no segments returned by provider)"
+            f"📊 Transcription complete: {len(words)} words "
+            f"(segments will be created by speaker service)"
         )
 
-    logger.info(f"📊 Created {len(speaker_segments)} speaker segments")
-
     # Add new transcript version
     provider_normalized = provider_name.lower() if provider_name else "unknown"
 
-    # Prepare metadata (transcription only - speaker recognition will add its own metadata)
+    # Convert words to Word objects
+    word_objects = [
+        Conversation.Word(
+            word=w.get("word", ""),
+            start=w.get("start", 0.0),
+            end=w.get("end", 0.0),
+            confidence=w.get("confidence")
+        )
+        for w in words
+    ]
+
+    # Prepare metadata
     metadata = {
         "trigger": trigger,
-        "audio_file_size": len(audio_data),
-        "segment_count": len(segments),
+        "audio_file_size": len(wav_data),
         "word_count": len(words),
-        "words": words,  # Store words for speaker recognition job to read
-        "speaker_recognition": {"enabled": False, "reason": "handled_by_separate_job"},
+        "segments_created_by": "provider" if (use_provider_segments and segments) else "speaker_service",
     }
 
     conversation.add_transcript_version(
         version_id=version_id,
         transcript=transcript_text,
-        segments=speaker_segments,
-        provider=Conversation.TranscriptProvider(provider_normalized),
+        words=word_objects,  # Store at version level (not in metadata!)
+        segments=speaker_segments,  # Empty - will be filled by speaker recognition
+        provider=provider_normalized,  # Now just a string, no enum constructor needed
         model=provider.name,
         processing_time_seconds=processing_time,
         metadata=metadata,
@@ -430,8 +497,6 @@ async def transcribe_full_audio_job(
     )
 
     # Update job metadata with title and summary for UI display
-    from rq import get_current_job
-
     current_job = get_current_job()
     if current_job:
         if not current_job.meta:
@@ -452,7 +517,7 @@ async def transcribe_full_audio_job(
         "success": True,
         "conversation_id": conversation_id,
         "version_id": version_id,
-        "audio_path": str(audio_file_path),
+        "audio_source": "mongodb_chunks",  # Audio reconstructed from MongoDB, no permanent file
         "transcript": transcript_text,
         "segments": [seg.model_dump() for seg in speaker_segments],
         "words": words,  # Needed by speaker recognition
@@ -462,6 +527,293 @@ async def transcribe_full_audio_job(
     }
 
 
+async def create_audio_only_conversation(
+    session_id: str,
+    user_id: str,
+    client_id: str
+) -> "Conversation":
+    """
+    Create or reuse conversation for batch transcription fallback.
+
+    Handles two scenarios:
+    1. always_persist=True - Reuses existing placeholder conversation
+    2. always_persist=False - Creates new conversation from audio chunks
+    """
+    # CASE 1: Check if always_persist placeholder conversation exists
+    # The audio_streaming_persistence_job may have created it already
+    placeholder_conversation = await Conversation.find_one(
+        Conversation.client_id == session_id,
+        Conversation.always_persist == True,
+        In(Conversation.processing_status, ["pending_transcription", "transcription_failed"])
+    )
+
+    if placeholder_conversation:
+        logger.info(
+            f"✅ Found always_persist placeholder conversation {placeholder_conversation.conversation_id[:12]} "
+            f"for session {session_id[:12]}, reusing for batch transcription"
+        )
+        # Update status to show batch transcription is starting
+        placeholder_conversation.processing_status = "batch_transcription"
+        placeholder_conversation.title = "Audio Recording (Batch Transcription...)"
+        placeholder_conversation.summary = "Processing audio with offline transcription..."
+        await placeholder_conversation.save()
+
+        # Audio chunks are already linked to this conversation_id
+        # (stored by audio_streaming_persistence_job)
+        return placeholder_conversation
+
+    # CASE 2: No placeholder exists - create new conversation using session_id
+    # This happens when always_persist=False or audio_persistence_job didn't run
+    # We reuse session_id as conversation_id to avoid unnecessary UUID generation
+    logger.info(
+        f"✅ No placeholder found, creating new conversation for session {session_id[:12]} "
+        f"using session_id as conversation_id"
+    )
+
+    conversation = Conversation(
+        conversation_id=session_id,
+        user_id=user_id,
+        client_id=client_id,
+        title="Audio Recording (Batch Transcription...)",
+        summary="Processing audio with offline transcription...",
+        processing_status="batch_transcription",
+        always_persist=False,  # Mark as False since this is fallback
+        created_at=datetime.utcnow(),
+    )
+    await conversation.insert()
+
+    logger.info(
+        f"✅ Created batch transcription conversation {session_id[:12]} for fallback"
+    )
+    return conversation
+
+
+@async_job(redis=True, beanie=True)
+async def transcription_fallback_check_job(
+    session_id: str,
+    user_id: str,
+    client_id: str,
+    timeout_seconds: int = 1800,
+    *,
+    redis_client=None
+) -> Dict[str, Any]:
+    """
+    Check if streaming transcription succeeded, fallback to batch if needed.
+
+    This job acts as a gate for post-conversation jobs:
+    - If streaming transcript exists → Pass through immediately
+    - If no transcript → Trigger batch transcription, wait for completion, enqueue post-jobs
+
+    Args:
+        session_id: Stream session ID
+        user_id: User ID
+        client_id: Client ID
+        timeout_seconds: Max wait time for batch transcription (default 30 minutes)
+        redis_client: Redis client (injected by decorator)
+
+    Returns:
+        Dict with status (pass_through or batch_fallback_completed) and conversation details
+    """
+    logger.info(f"🔍 Checking transcription status for session {session_id[:12]}")
+
+    # Find conversation by session_id (client_id for streaming sessions)
+    conversation = await Conversation.find_one(
+        Conversation.client_id == session_id
+    )
+
+    # Check if transcript exists (streaming succeeded)
+    if conversation and conversation.active_transcript and conversation.transcript:
+        logger.info(
+            f"✅ Streaming transcript exists for session {session_id[:12]}, "
+            f"passing through (conversation {conversation.conversation_id[:12]})"
+        )
+        return {
+            "status": "pass_through",
+            "transcript_source": "streaming",
+            "conversation_id": conversation.conversation_id
+        }
+
+    # No transcript → Trigger batch fallback
+    logger.warning(
+        f"⚠️ No streaming transcript found for session {session_id[:12]}, "
+        f"attempting batch transcription fallback"
+    )
+
+    # Check if batch provider available
+    if not is_transcription_available(mode="batch"):
+        raise ValueError(
+            "No batch transcription provider available for fallback. "
+            "Configure a batch STT provider (e.g., Parakeet) or fix streaming provider."
+        )
+
+    # If no conversation exists, check if we have audio chunks to transcribe
+    if not conversation:
+        chunks_count = await AudioChunkDocument.find(
+            AudioChunkDocument.conversation_id == session_id
+        ).count()
+
+        if chunks_count == 0:
+            # No MongoDB chunks - check if Redis stream has unprocessed audio
+            logger.info(
+                f"📦 No MongoDB chunks found for session {session_id[:12]}, "
+                f"checking Redis stream for unprocessed audio..."
+            )
+
+            stream_name = f"audio:stream:{client_id}"
+
+            # Check if stream exists and has messages
+            try:
+                stream_length = await redis_client.xlen(stream_name)
+
+                if stream_length == 0:
+                    logger.info(
+                        f"ℹ️ No audio found in Redis stream {stream_name}. "
+                        f"Session ended without audio capture. Skipping fallback."
+                    )
+                    return {
+                        "status": "skipped",
+                        "reason": "no_audio",
+                        "message": "No audio was captured for this session",
+                        "session_id": session_id
+                    }
+
+                logger.info(
+                    f"📡 Found {stream_length} messages in Redis stream {stream_name}, "
+                    f"extracting audio for batch transcription..."
+                )
+
+                # Read all audio messages from stream
+                messages = await redis_client.xrange(stream_name)
+
+                # Collect PCM audio chunks in order
+                audio_chunks = {}  # {chunk_num: audio_data}
+
+                for msg_id, fields in messages:
+                    # Check if this message belongs to our session
+                    msg_session_id = fields.get(b"session_id", b"").decode()
+                    if msg_session_id != session_id:
+                        continue
+
+                    # Get chunk ID
+                    msg_chunk_id = fields.get(b"chunk_id", b"").decode()
+                    if not msg_chunk_id or msg_chunk_id == "END":
+                        continue
+
+                    try:
+                        chunk_num = int(msg_chunk_id)
+                    except ValueError:
+                        continue
+
+                    # Get PCM audio data
+                    audio_data = fields.get(b"audio_data", b"")
+                    if audio_data:
+                        audio_chunks[chunk_num] = audio_data
+
+                if not audio_chunks:
+                    logger.warning(
+                        f"⚠️ Redis stream has {stream_length} messages but no audio chunks "
+                        f"matched session {session_id[:12]}. Skipping fallback."
+                    )
+                    return {
+                        "status": "skipped",
+                        "reason": "no_matching_audio",
+                        "message": "No audio matched this session in Redis stream",
+                        "session_id": session_id
+                    }
+
+                # Combine audio chunks in order
+                sorted_chunks = sorted(audio_chunks.items())
+                combined_audio = b"".join(data for _, data in sorted_chunks)
+
+                logger.info(
+                    f"✅ Extracted {len(sorted_chunks)} audio chunks from Redis stream "
+                    f"({len(combined_audio)} bytes, ~{len(combined_audio)/32000:.1f}s)"
+                )
+
+                # Create conversation placeholder
+                conversation = await create_audio_only_conversation(session_id, user_id, client_id)
+
+                # Save audio to MongoDB chunks for batch transcription
+                num_chunks = await convert_audio_to_chunks(
+                    conversation_id=conversation.conversation_id,
+                    audio_data=combined_audio,
+                    sample_rate=16000,
+                    channels=1,
+                    sample_width=2,
+                )
+
+                logger.info(
+                    f"💾 Persisted {num_chunks} MongoDB chunks for batch transcription "
+                    f"(conversation {conversation.conversation_id[:12]})"
+                )
+
+            except Exception as e:
+                logger.error(f"❌ Failed to extract audio from Redis stream: {e}", exc_info=True)
+                raise
+        else:
+            logger.info(
+                f"✅ Found {chunks_count} MongoDB chunks for session {session_id[:12]}, "
+                f"creating conversation placeholder"
+            )
+
+            # Create conversation placeholder for batch transcription
+            conversation = await create_audio_only_conversation(session_id, user_id, client_id)
+
+    # Enqueue batch transcription job
+    version_id = f"batch_fallback_{session_id[:12]}"
+    batch_job = transcription_queue.enqueue(
+        transcribe_full_audio_job,
+        conversation.conversation_id,
+        version_id,
+        "batch_fallback",
+        job_timeout=1800,
+        job_id=f"transcribe_{conversation.conversation_id[:12]}",
+        description=f"Batch transcription fallback for {session_id[:8]}",
+        meta={"session_id": session_id, "client_id": client_id}
+    )
+
+    logger.info(f"🔄 Enqueued batch transcription fallback job {batch_job.id}")
+
+    # Wait for batch transcription to complete
+    max_wait = timeout_seconds
+    waited = 0
+    while waited < max_wait:
+        batch_job.refresh()
+        if batch_job.is_finished:
+            if batch_job.is_failed:
+                raise Exception(f"Batch transcription failed: {batch_job.exc_info}")
+            logger.info(f"✅ Batch transcription completed successfully")
+            break
+        await asyncio.sleep(2)
+        waited += 2
+
+    if waited >= max_wait:
+        raise TimeoutError(f"Batch transcription timed out after {max_wait}s")
+
+    # Enqueue post-conversation jobs (same as file upload flow)
+    post_jobs = start_post_conversation_jobs(
+        conversation_id=conversation.conversation_id,
+        user_id=user_id,
+        transcript_version_id=version_id,
+        depends_on_job=None,  # Batch already completed (we waited for it)
+        client_id=client_id,
+        end_reason="batch_fallback"
+    )
+
+    logger.info(
+        f"📋 Enqueued {len(post_jobs)} post-conversation jobs for "
+        f"batch fallback conversation {conversation.conversation_id[:12]}"
+    )
+
+    return {
+        "status": "batch_fallback_completed",
+        "transcript_source": "batch",
+        "conversation_id": conversation.conversation_id,
+        "batch_job_id": batch_job.id,
+        "post_job_ids": post_jobs
+    }
+
+
 @async_job(redis=True, beanie=True)
 async def stream_speech_detection_job(
     session_id: str, user_id: str, client_id: str, *, redis_client=None
@@ -486,10 +838,7 @@ async def stream_speech_detection_job(
 
     Note: user_email is fetched from the database when needed.
     """
-    from advanced_omi_backend.services.audio_stream import TranscriptionResultsAggregator
-    from advanced_omi_backend.utils.conversation_utils import analyze_speech
     from .conversation_jobs import open_conversation_job
-    from rq import get_current_job
 
     logger.info(f"🔍 Starting speech detection for session {session_id[:12]}")
 
@@ -519,25 +868,37 @@ async def stream_speech_detection_job(
             {
                 "status": "listening_for_speech",
                 "session_id": session_id,
-                "audio_uuid": session_id,
                 "client_id": client_id,
                 "session_level": True,  # Mark as session-level job
             }
         )
         current_job.save_meta()
 
+    # Track when session closes for graceful shutdown
+    session_closed_at = None
+    final_check_grace_period = 15  # Wait up to 15 seconds for final transcription after session closes
+    last_speech_analysis = None  # Track last analysis for detailed logging
+
     # Main loop: Listen for speech
     while True:
         # Check if job still exists in Redis (detect zombie state)
         from advanced_omi_backend.utils.job_utils import check_job_alive
 
-        if not await check_job_alive(redis_client, current_job):
+        if not await check_job_alive(redis_client, current_job, session_id):
             break
 
-        # Exit conditions
+        # Check if session has closed
         session_status = await redis_client.hget(session_key, "status")
-        if session_status and session_status.decode() in ["complete", "closed"]:
-            logger.info(f"🛑 Session ended, exiting")
+        session_closed = session_status and session_status.decode() in ["finalizing", "finished"]
+
+        if session_closed and session_closed_at is None:
+            # Session just closed - start grace period for final transcription
+            session_closed_at = time.time()
+            logger.info(f"🛑 Session closed, waiting up to {final_check_grace_period}s for final transcription results...")
+
+        # Exit if grace period expired without speech
+        if session_closed_at and (time.time() - session_closed_at) > final_check_grace_period:
+            logger.info(f"✅ Session ended without speech (grace period expired)")
             break
 
         if time.time() - start_time > max_runtime:
@@ -547,12 +908,37 @@ async def stream_speech_detection_job(
         # Get transcription results
         combined = await aggregator.get_combined_results(session_id)
         if not combined["text"]:
+            # Health check: detect transcription errors early during grace period
+            if session_closed_at:
+                # Check for streaming consumer errors in session metadata
+                error_status = await redis_client.hget(session_key, "transcription_error")
+                if error_status:
+                    error_msg = error_status.decode()
+                    logger.error(f"❌ Transcription service error: {error_msg}")
+                    logger.error(f"❌ Session failed - transcription service unavailable")
+                    break
+
+                # Check if we've been waiting too long with no results at all
+                grace_elapsed = time.time() - session_closed_at
+                if grace_elapsed > 5 and not combined.get("chunk_count", 0):
+                    # 5+ seconds with no transcription activity at all - likely API key issue
+                    logger.error(f"❌ No transcription activity after {grace_elapsed:.1f}s - possible API key or connectivity issue")
+                    logger.error(f"❌ Session failed - check transcription service configuration")
+                    break
+
             await asyncio.sleep(2)
             continue
 
         # Step 1: Check for meaningful speech
         transcript_data = {"text": combined["text"], "words": combined.get("words", [])}
+
+        logger.info(
+            f"🔤 TRANSCRIPT [SPEECH_DETECT] session={session_id}, "
+            f"words={len(combined.get('words', []))}, text=\"{combined['text']}\""
+        )
+
         speech_analysis = analyze_speech(transcript_data)
+        last_speech_analysis = speech_analysis  # Track for final logging
 
         logger.info(
             f"🔍 {speech_analysis.get('word_count', 0)} words, "
@@ -561,6 +947,7 @@ async def stream_speech_detection_job(
         )
 
         if not speech_analysis.get("has_speech", False):
+            logger.info(f"⏳ Waiting for more speech - {speech_analysis.get('reason', 'unknown reason')}")
             await asyncio.sleep(2)
             continue
 
@@ -597,7 +984,7 @@ async def stream_speech_detection_job(
                 result_ttl=600,
                 job_id=f"speaker-check_{session_id[:12]}_{conversation_count}",
                 description=f"Speaker check for conversation #{conversation_count+1}",
-                meta={"audio_uuid": session_id, "client_id": client_id},
+                meta={"client_id": client_id},
             )
 
             # Poll for result (with timeout)
@@ -610,8 +997,6 @@ async def stream_speech_detection_job(
                 try:
                     speaker_check_job.refresh()
                 except Exception as e:
-                    from rq.exceptions import NoSuchJobError
-
                     if isinstance(e, NoSuchJobError):
                         logger.warning(
                             f"⚠️ Speaker check job disappeared from Redis (likely completed quickly), assuming not enrolled"
@@ -696,15 +1081,15 @@ async def stream_speech_detection_job(
             client_id,
             speech_detected_at,
             speech_job_id,  # Pass speech detection job ID
-            job_timeout=3600,
+            job_timeout=10800,  # 3 hours to match max_runtime in open_conversation_job
             result_ttl=JOB_RESULT_TTL,  # Use configured TTL (24 hours) instead of 10 minutes
             job_id=f"open-conv_{session_id[:12]}_{conversation_count}",
             description=f"Conversation #{conversation_count+1} for {session_id[:12]}",
-            meta={"audio_uuid": session_id, "client_id": client_id},
+            meta={"client_id": client_id},
         )
 
         # Track the job
-        await redis_client.set(open_job_key, open_job.id, ex=3600)
+        await redis_client.set(open_job_key, open_job.id, ex=10800)  # 3 hours to match job timeout
 
         # Store metadata in speech detection job
         if current_job:
@@ -721,7 +1106,6 @@ async def stream_speech_detection_job(
                     "detected_speakers": identified_speakers,
                     "speech_detected_at": datetime.fromtimestamp(speech_detected_at).isoformat(),
                     "session_id": session_id,
-                    "audio_uuid": session_id,  # For job grouping
                     "client_id": client_id,  # For job grouping
                 }
             )
@@ -739,11 +1123,80 @@ async def stream_speech_detection_job(
         }
 
     # Session ended without speech
-    logger.info(f"✅ Session ended without speech")
+    reason = last_speech_analysis.get('reason', 'No transcription received') if last_speech_analysis else 'No transcription received'
+
+    # Distinguish between transcription failures (error) vs legitimate no speech (info)
+    if reason == 'No transcription received':
+        logger.error(
+            f"❌ Session failed - transcription service did not respond\n"
+            f"   Reason: {reason}\n"
+            f"   Runtime: {time.time() - start_time:.1f}s"
+        )
+    else:
+        logger.info(
+            f"✅ Session ended without meaningful speech\n"
+            f"   Reason: {reason}\n"
+            f"   Runtime: {time.time() - start_time:.1f}s"
+        )
+
+    # Check if this is an always_persist conversation that needs to be marked as failed
+    # NOTE: We check MongoDB directly because the conversation:current Redis key might have been
+    # deleted by the audio persistence job cleanup (which runs in parallel).
+    logger.info(f"🔍 Checking MongoDB for always_persist conversation with client_id: {client_id}")
+
+    # Find conversation by client_id that matches this session
+    # session_id == client_id for streaming sessions (set in _initialize_streaming_session)
+    conversation = await Conversation.find_one(
+        Conversation.client_id == session_id,
+        Conversation.always_persist == True,
+        Conversation.processing_status == "pending_transcription"
+    )
+
+    if conversation:
+        logger.info(f"🔴 Found always_persist placeholder conversation {conversation.conversation_id} for failed session {session_id[:12]}")
+
+        # Update conversation with failure status
+        conversation.processing_status = "transcription_failed"
+        conversation.title = "Audio Recording (Transcription Failed)"
+        conversation.summary = f"Transcription failed: {reason}"
+
+        await conversation.save()
+
+        logger.warning(f"🔴 Marked conversation {conversation.conversation_id} as transcription_failed")
+    else:
+        logger.info(f"ℹ️ No always_persist placeholder conversation found for session {session_id[:12]}")
+
+    # Enqueue fallback check job for failed streaming sessions
+    # This will attempt batch transcription as a fallback
+    fallback_job = transcription_queue.enqueue(
+        transcription_fallback_check_job,
+        session_id,
+        user_id,
+        client_id,
+        timeout_seconds=1800,  # 30 minutes for batch transcription
+        job_timeout=2400,      # 40 minutes job timeout
+        job_id=f"fallback_check_{session_id[:12]}",
+        description=f"Transcription fallback check for {session_id[:8]} (no speech)",
+        meta={"session_id": session_id, "client_id": client_id, "no_speech": True}
+    )
+
+    logger.info(
+        f"📋 Enqueued transcription fallback check job {fallback_job.id} "
+        f"for failed session {session_id[:12]} (no speech detected)"
+    )
+
+    # The fallback job will:
+    # 1. Check for always_persist placeholder conversation
+    # 2. If found, trigger batch transcription using stored audio chunks
+    # 3. Wait for batch completion and enqueue post-conversation jobs
+    # 4. If no placeholder or no audio chunks, fail gracefully with clear error
+
     return {
         "session_id": session_id,
         "user_id": user_id,
         "client_id": client_id,
         "no_speech_detected": True,
+        "fallback_job_id": fallback_job.id,
+        "reason": reason,
         "runtime_seconds": time.time() - start_time,
     }
diff --git a/backends/advanced/src/advanced_omi_backend/workers/waveform_jobs.py b/backends/advanced/src/advanced_omi_backend/workers/waveform_jobs.py
new file mode 100644
index 00000000..911b651d
--- /dev/null
+++ b/backends/advanced/src/advanced_omi_backend/workers/waveform_jobs.py
@@ -0,0 +1,190 @@
+"""
+Waveform generation workers for audio visualization.
+
+This module provides async functions to generate waveform data from
+audio chunks stored in MongoDB. Waveforms are computed on-demand
+and cached for subsequent requests.
+"""
+
+import logging
+import struct
+import time
+from typing import Dict, Any, List
+
+logger = logging.getLogger(__name__)
+
+
+async def generate_waveform_data(
+    conversation_id: str,
+    sample_rate: int = 3,
+) -> Dict[str, Any]:
+    """
+    Generate waveform visualization data from conversation audio chunks.
+
+    This function:
+    1. Retrieves Opus-compressed audio chunks from MongoDB
+    2. Decodes each chunk to PCM
+    3. Downsamples PCM to target sample rate (e.g., 10 samples/sec)
+    4. Calculates amplitude peaks for each sample window
+    5. Normalizes to [-1.0, 1.0] range
+    6. Stores in WaveformData collection
+
+    Args:
+        conversation_id: Conversation ID to generate waveform for
+        sample_rate: Samples per second for waveform (default: 10)
+
+    Returns:
+        Dict with:
+            - success: bool
+            - samples: List[float] (if successful)
+            - sample_rate: int (if successful)
+            - duration_seconds: float (if successful)
+            - error: str (if failed)
+    """
+    from advanced_omi_backend.models.waveform import WaveformData
+    from advanced_omi_backend.utils.audio_chunk_utils import (
+        retrieve_audio_chunks,
+        decode_opus_to_pcm,
+    )
+
+    start_time = time.time()
+    fetch_time = 0.0
+    decode_time = 0.0
+    waveform_gen_time = 0.0
+
+    try:
+        logger.info(f"🎵 Generating waveform for conversation {conversation_id[:12]}... (sample_rate={sample_rate} samples/sec)")
+
+        # Retrieve all audio chunks for conversation
+        fetch_start = time.time()
+        chunks = await retrieve_audio_chunks(conversation_id=conversation_id)
+        fetch_time = time.time() - fetch_start
+
+        logger.info(f"📦 Fetched {len(chunks) if chunks else 0} chunks from MongoDB in {fetch_time:.2f}s")
+
+        if not chunks:
+            logger.warning(f"No audio chunks found for conversation {conversation_id}")
+            return {
+                "success": False,
+                "error": "No audio chunks found for this conversation"
+            }
+
+        # Get audio format from first chunk
+        pcm_sample_rate = chunks[0].sample_rate  # Usually 16000 Hz
+        channels = chunks[0].channels  # Usually 1 (mono)
+        bytes_per_sample = 2  # 16-bit PCM
+
+        # Calculate total duration
+        total_duration = sum(chunk.duration for chunk in chunks)
+
+        # Calculate window size for downsampling
+        # e.g., 16000 samples/sec ÷ 10 waveform_samples/sec = 1600 PCM samples per waveform point
+        window_size_samples = pcm_sample_rate // sample_rate
+        bytes_per_window = window_size_samples * bytes_per_sample * channels
+
+        logger.info(
+            f"Processing {len(chunks)} chunks, "
+            f"total duration: {total_duration:.1f}s, "
+            f"window size: {window_size_samples} samples"
+        )
+
+        # Process chunks and extract amplitude peaks
+        waveform_samples: List[float] = []
+
+        for chunk_idx, chunk in enumerate(chunks):
+            # Decode Opus to PCM
+            decode_start = time.time()
+            pcm_data = await decode_opus_to_pcm(
+                opus_data=chunk.audio_data,
+                sample_rate=pcm_sample_rate,
+                channels=channels,
+            )
+            decode_time += time.time() - decode_start
+
+            # Process PCM data in windows
+            waveform_gen_start = time.time()
+            offset = 0
+            while offset < len(pcm_data):
+                # Extract window
+                window_end = min(offset + bytes_per_window, len(pcm_data))
+                window_bytes = pcm_data[offset:window_end]
+
+                if len(window_bytes) == 0:
+                    break
+
+                # Convert bytes to signed 16-bit integers
+                num_samples_in_window = len(window_bytes) // bytes_per_sample
+                format_str = f"{num_samples_in_window}h"  # 'h' = signed short (16-bit)
+
+                try:
+                    pcm_samples = struct.unpack(format_str, window_bytes)
+                except struct.error as e:
+                    logger.warning(f"Struct unpack error: {e}, skipping window")
+                    offset += bytes_per_window
+                    continue
+
+                # Calculate peak amplitude in this window
+                # Normalize from 16-bit range (-32768 to 32767) to [-1.0, 1.0]
+                if pcm_samples:
+                    max_abs_amplitude = max(abs(s) for s in pcm_samples)
+                    normalized_amplitude = max_abs_amplitude / 32768.0
+                    waveform_samples.append(normalized_amplitude)
+
+                offset += bytes_per_window
+
+            waveform_gen_time += time.time() - waveform_gen_start
+
+            # Log progress for long conversations
+            if (chunk_idx + 1) % 20 == 0:
+                logger.info(
+                    f"Processed {chunk_idx + 1}/{len(chunks)} chunks "
+                    f"({len(waveform_samples)} waveform samples so far)"
+                )
+
+        processing_time = time.time() - start_time
+        other_time = processing_time - (fetch_time + decode_time + waveform_gen_time)
+
+        logger.info(
+            f"✅ Generated waveform: {len(waveform_samples)} samples "
+            f"for {total_duration:.1f}s audio in {processing_time:.2f}s total"
+        )
+        logger.info(
+            f"   ⏱️  Timing breakdown: "
+            f"Fetch={fetch_time:.2f}s, "
+            f"Decode={decode_time:.2f}s, "
+            f"Waveform={waveform_gen_time:.2f}s, "
+            f"Other={other_time:.2f}s"
+        )
+
+        # Store in MongoDB
+        waveform_doc = WaveformData(
+            conversation_id=conversation_id,
+            samples=waveform_samples,
+            sample_rate=sample_rate,
+            duration_seconds=total_duration,
+            processing_time_seconds=processing_time
+        )
+
+        await waveform_doc.insert()
+
+        logger.info(f"💾 Saved waveform to MongoDB for conversation {conversation_id[:12]}")
+
+        return {
+            "success": True,
+            "samples": waveform_samples,
+            "sample_rate": sample_rate,
+            "duration_seconds": total_duration,
+            "processing_time_seconds": processing_time
+        }
+
+    except Exception as e:
+        processing_time = time.time() - start_time
+        logger.error(
+            f"❌ Waveform generation failed for {conversation_id}: {e}",
+            exc_info=True
+        )
+        return {
+            "success": False,
+            "error": str(e),
+            "processing_time_seconds": processing_time
+        }
diff --git a/backends/advanced/src/scripts/cleanup_state.py b/backends/advanced/src/scripts/cleanup_state.py
new file mode 100644
index 00000000..b028ecde
--- /dev/null
+++ b/backends/advanced/src/scripts/cleanup_state.py
@@ -0,0 +1,925 @@
+#!/usr/bin/env python3
+"""
+Backend State Cleanup Script for Chronicle
+
+This script provides comprehensive cleanup of Chronicle backend data including:
+- MongoDB collections (conversations, audio_chunks)
+- Qdrant vector store (memories)
+- Redis job queues and registries
+- Legacy WAV files (backward compatibility)
+
+Features:
+- Optional backup before cleanup (metadata and/or full audio export)
+- Dry-run mode for safe preview
+- User account preservation by default
+- Confirmation prompts with detailed warnings
+"""
+
+import asyncio
+import argparse
+import json
+import logging
+import os
+import shutil
+import sys
+from datetime import datetime
+from pathlib import Path
+from typing import Dict, List, Any, Optional, Tuple
+import struct
+
+# Add parent directory to path for imports
+sys.path.insert(0, str(Path(__file__).parent.parent.parent))
+
+try:
+    import redis
+    from rq import Queue
+    from motor.motor_asyncio import AsyncIOMotorClient
+    from qdrant_client import AsyncQdrantClient
+    from qdrant_client.models import Distance, VectorParams
+    from beanie import init_beanie
+    from advanced_omi_backend.models.conversation import Conversation
+    from advanced_omi_backend.models.audio_chunk import AudioChunkDocument
+    from advanced_omi_backend.models.user import User
+    from advanced_omi_backend.models.waveform import WaveformData
+    from advanced_omi_backend.services.memory.config import build_memory_config_from_env
+except ImportError as e:
+    print(f"Error: Missing required dependency: {e}")
+    print("This script must be run inside the chronicle-backend container")
+    sys.exit(1)
+
+# Configure logging
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(levelname)s - %(message)s'
+)
+logger = logging.getLogger(__name__)
+
+
+def get_qdrant_collection_name() -> str:
+    """Get Qdrant collection name from memory service configuration."""
+    try:
+        memory_config = build_memory_config_from_env()
+        if hasattr(memory_config, 'vector_store_config') and memory_config.vector_store_config:
+            collection_name = memory_config.vector_store_config.get('collection_name', 'chronicle_memories')
+            logger.info(f"Using Qdrant collection name from config: {collection_name}")
+            return collection_name
+    except Exception as e:
+        logger.warning(f"Could not load collection name from config: {e}")
+
+    # Fallback to default
+    logger.info("Using default Qdrant collection name: chronicle_memories")
+    return "chronicle_memories"
+
+
+class CleanupStats:
+    """Track cleanup statistics"""
+    def __init__(self):
+        self.conversations_count = 0
+        self.audio_chunks_count = 0
+        self.waveforms_count = 0
+        self.chat_sessions_count = 0
+        self.chat_messages_count = 0
+        self.memories_count = 0
+        self.redis_jobs_count = 0
+        self.legacy_wav_count = 0
+        self.users_count = 0
+        self.backup_size_bytes = 0
+        self.backup_path = None
+
+
+class BackupManager:
+    """Handle backup operations"""
+
+    def __init__(self, backup_dir: str, export_audio: bool, mongo_db: Any):
+        self.backup_dir = Path(backup_dir)
+        self.export_audio = export_audio
+        self.mongo_db = mongo_db
+        self.timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+        self.backup_path = self.backup_dir / f"backup_{self.timestamp}"
+
+    async def create_backup(
+        self,
+        qdrant_client: Optional[AsyncQdrantClient],
+        stats: CleanupStats
+    ) -> bool:
+        """Create complete backup of all data"""
+        try:
+            logger.info(f"Creating backup at {self.backup_path}")
+            self.backup_path.mkdir(parents=True, exist_ok=True)
+            stats.backup_path = str(self.backup_path)
+
+            # Export MongoDB data
+            await self._export_conversations(stats)
+            await self._export_audio_chunks_metadata(stats)
+            await self._export_waveforms(stats)
+            await self._export_chat_sessions(stats)
+            await self._export_chat_messages(stats)
+
+            # Export audio as WAV if requested
+            if self.export_audio:
+                await self._export_audio_wav(stats)
+
+            # Export Qdrant vectors
+            if qdrant_client:
+                await self._export_memories(qdrant_client, stats)
+
+            # Generate summary
+            await self._generate_summary(stats)
+
+            # Calculate backup size
+            stats.backup_size_bytes = sum(
+                f.stat().st_size for f in self.backup_path.rglob('*') if f.is_file()
+            )
+
+            logger.info(f"Backup completed: {stats.backup_size_bytes / (1024**2):.2f} MB")
+            return True
+
+        except Exception as e:
+            logger.error(f"Backup failed: {e}", exc_info=True)
+            return False
+
+    async def _export_conversations(self, stats: CleanupStats):
+        """Export all conversations to JSON"""
+        logger.info("Exporting conversations...")
+        conversations = await Conversation.find_all().to_list()
+        stats.conversations_count = len(conversations)
+
+        # Serialize conversations (handle datetime, UUID, etc.)
+        conversations_data = []
+        for conv in conversations:
+            conv_dict = conv.model_dump(mode='json')
+            conversations_data.append(conv_dict)
+
+        output_path = self.backup_path / "conversations.json"
+        with open(output_path, 'w') as f:
+            json.dump(conversations_data, f, indent=2, default=str)
+
+        logger.info(f"Exported {stats.conversations_count} conversations")
+
+    async def _export_audio_chunks_metadata(self, stats: CleanupStats):
+        """Export audio chunks metadata (not the actual audio)"""
+        logger.info("Exporting audio chunks metadata...")
+
+        # Use raw MongoDB query to handle malformed documents
+        # (some old/corrupted chunks may not validate against current schema)
+        audio_chunks_collection = self.mongo_db["audio_chunks"]
+        chunks_cursor = audio_chunks_collection.find({})
+
+        chunks_data = []
+        malformed_count = 0
+
+        async for chunk in chunks_cursor:
+            try:
+                # Extract fields safely with defaults for missing values
+                chunk_dict = {
+                    'conversation_id': chunk.get('conversation_id'),
+                    'chunk_index': chunk.get('chunk_index'),
+                    'start_time': chunk.get('start_time'),
+                    'end_time': chunk.get('end_time'),
+                    'duration': chunk.get('duration'),
+                    'original_size': chunk.get('original_size'),
+                    'compressed_size': chunk.get('compressed_size'),
+                    'sample_rate': chunk.get('sample_rate', 16000),
+                    'channels': chunk.get('channels', 1),
+                    'has_speech': chunk.get('has_speech'),
+                    'created_at': str(chunk.get('created_at', ''))
+                }
+                chunks_data.append(chunk_dict)
+            except Exception as e:
+                malformed_count += 1
+                logger.warning(f"Skipping malformed chunk {chunk.get('_id')}: {e}")
+                continue
+
+        stats.audio_chunks_count = len(chunks_data)
+
+        output_path = self.backup_path / "audio_chunks_metadata.json"
+        with open(output_path, 'w') as f:
+            json.dump(chunks_data, f, indent=2, default=str)
+
+        logger.info(f"Exported {stats.audio_chunks_count} audio chunks metadata")
+        if malformed_count > 0:
+            logger.warning(f"Skipped {malformed_count} malformed chunks")
+
+    async def _export_waveforms(self, stats: CleanupStats):
+        """Export waveform visualization data"""
+        logger.info("Exporting waveforms...")
+
+        waveforms = await WaveformData.find_all().to_list()
+        stats.waveforms_count = len(waveforms)
+
+        # Serialize waveforms
+        waveforms_data = []
+        for waveform in waveforms:
+            waveform_dict = waveform.model_dump(mode='json')
+            waveforms_data.append(waveform_dict)
+
+        output_path = self.backup_path / "waveforms.json"
+        with open(output_path, 'w') as f:
+            json.dump(waveforms_data, f, indent=2, default=str)
+
+        logger.info(f"Exported {stats.waveforms_count} waveforms")
+
+    async def _export_chat_sessions(self, stats: CleanupStats):
+        """Export chat sessions metadata"""
+        logger.info("Exporting chat sessions...")
+
+        chat_sessions_collection = self.mongo_db["chat_sessions"]
+        sessions_cursor = chat_sessions_collection.find({})
+
+        sessions_data = []
+        async for session in sessions_cursor:
+            session_dict = {
+                'session_id': session.get('session_id'),
+                'user_id': session.get('user_id'),
+                'title': session.get('title'),
+                'created_at': str(session.get('created_at', '')),
+                'updated_at': str(session.get('updated_at', '')),
+                'metadata': session.get('metadata', {})
+            }
+            sessions_data.append(session_dict)
+
+        stats.chat_sessions_count = len(sessions_data)
+
+        output_path = self.backup_path / "chat_sessions.json"
+        with open(output_path, 'w') as f:
+            json.dump(sessions_data, f, indent=2, default=str)
+
+        logger.info(f"Exported {stats.chat_sessions_count} chat sessions")
+
+    async def _export_chat_messages(self, stats: CleanupStats):
+        """Export chat messages"""
+        logger.info("Exporting chat messages...")
+
+        chat_messages_collection = self.mongo_db["chat_messages"]
+        messages_cursor = chat_messages_collection.find({})
+
+        messages_data = []
+        async for message in messages_cursor:
+            message_dict = {
+                'message_id': message.get('message_id'),
+                'session_id': message.get('session_id'),
+                'user_id': message.get('user_id'),
+                'role': message.get('role'),
+                'content': message.get('content'),
+                'timestamp': str(message.get('timestamp', '')),
+                'memories_used': message.get('memories_used', []),
+                'metadata': message.get('metadata', {})
+            }
+            messages_data.append(message_dict)
+
+        stats.chat_messages_count = len(messages_data)
+
+        output_path = self.backup_path / "chat_messages.json"
+        with open(output_path, 'w') as f:
+            json.dump(messages_data, f, indent=2, default=str)
+
+        logger.info(f"Exported {stats.chat_messages_count} chat messages")
+
+    async def _export_audio_wav(self, stats: CleanupStats):
+        """Export audio as WAV files (1-minute chunks)"""
+        logger.info("Exporting audio as WAV files (this may take a while)...")
+
+        # Get all unique conversation IDs
+        conversations = await Conversation.find_all().to_list()
+        audio_dir = self.backup_path / "audio"
+
+        for conv in conversations:
+            try:
+                await self._export_conversation_audio(conv.conversation_id, audio_dir)
+            except Exception as e:
+                logger.warning(f"Failed to export audio for {conv.conversation_id}: {e}")
+                continue
+
+        logger.info("Audio export completed")
+
+    async def _export_conversation_audio(self, conversation_id: str, audio_dir: Path):
+        """Export audio for a single conversation as 1-minute WAV chunks"""
+        # Get all chunks for this conversation
+        chunks = await AudioChunkDocument.find(
+            AudioChunkDocument.conversation_id == conversation_id
+        ).sort("+chunk_index").to_list()
+
+        if not chunks:
+            return
+
+        # Create conversation directory
+        conv_dir = audio_dir / conversation_id
+        conv_dir.mkdir(parents=True, exist_ok=True)
+
+        # Decode all Opus chunks to PCM
+        pcm_data = []
+        sample_rate = chunks[0].sample_rate
+        channels = chunks[0].channels
+
+        try:
+            import opuslib
+            decoder = opuslib.Decoder(sample_rate, channels)
+
+            for chunk in chunks:
+                # Decode Opus to PCM
+                # Note: frame_size depends on sample rate and duration
+                frame_size = int(sample_rate * chunk.duration / channels)
+                decoded = decoder.decode(bytes(chunk.audio_data), frame_size)
+                pcm_data.append(decoded)
+
+        except ImportError:
+            logger.warning("opuslib not available, skipping audio export")
+            return
+        except Exception as e:
+            logger.warning(f"Failed to decode audio for {conversation_id}: {e}")
+            return
+
+        # Concatenate all PCM data
+        all_pcm = b''.join(pcm_data)
+
+        # Convert bytes to int16 samples
+        samples = struct.unpack(f'<{len(all_pcm)//2}h', all_pcm)
+
+        # Split into 1-minute chunks
+        samples_per_minute = sample_rate * 60 * channels
+        chunk_num = 1
+
+        for start_idx in range(0, len(samples), samples_per_minute):
+            chunk_samples = samples[start_idx:start_idx + samples_per_minute]
+
+            # Write WAV file
+            wav_path = conv_dir / f"chunk_{chunk_num:03d}.wav"
+            self._write_wav(wav_path, sample_rate, channels, chunk_samples)
+            chunk_num += 1
+
+    def _write_wav(self, path: Path, sample_rate: int, channels: int, samples: Tuple[int, ...]):
+        """Write PCM samples to WAV file"""
+        import wave
+
+        with wave.open(str(path), 'wb') as wav_file:
+            wav_file.setnchannels(channels)
+            wav_file.setsampwidth(2)  # 16-bit
+            wav_file.setframerate(sample_rate)
+
+            # Convert samples back to bytes
+            pcm_bytes = struct.pack(f'<{len(samples)}h', *samples)
+            wav_file.writeframes(pcm_bytes)
+
+    async def _export_memories(self, qdrant_client: AsyncQdrantClient, stats: CleanupStats):
+        """Export Qdrant vectors to JSON"""
+        logger.info("Exporting memories from Qdrant...")
+
+        try:
+            collection_name = get_qdrant_collection_name()
+
+            # Check if collection exists
+            collections = await qdrant_client.get_collections()
+            collection_exists = any(
+                col.name == collection_name
+                for col in collections.collections
+            )
+
+            if not collection_exists:
+                logger.info("Memories collection does not exist, skipping export")
+                return
+
+            # Scroll through all vectors
+            memories_data = []
+            offset = None
+
+            while True:
+                result = await qdrant_client.scroll(
+                    collection_name=collection_name,
+                    limit=100,
+                    offset=offset,
+                    with_payload=True,
+                    with_vectors=True
+                )
+
+                points, next_offset = result
+
+                if not points:
+                    break
+
+                for point in points:
+                    memory_dict = {
+                        'id': str(point.id),
+                        'vector': point.vector,
+                        'payload': point.payload
+                    }
+                    memories_data.append(memory_dict)
+
+                if next_offset is None:
+                    break
+
+                offset = next_offset
+
+            stats.memories_count = len(memories_data)
+
+            output_path = self.backup_path / "memories.json"
+            with open(output_path, 'w') as f:
+                json.dump(memories_data, f, indent=2)
+
+            logger.info(f"Exported {stats.memories_count} memories")
+
+        except Exception as e:
+            logger.warning(f"Failed to export memories: {e}")
+
+    async def _generate_summary(self, stats: CleanupStats):
+        """Generate backup summary"""
+        summary = {
+            'timestamp': self.timestamp,
+            'backup_path': str(self.backup_path),
+            'total_conversations': stats.conversations_count,
+            'total_audio_chunks': stats.audio_chunks_count,
+            'total_waveforms': stats.waveforms_count,
+            'total_chat_sessions': stats.chat_sessions_count,
+            'total_chat_messages': stats.chat_messages_count,
+            'total_memories': stats.memories_count,
+            'audio_exported': self.export_audio,
+            'backup_size_bytes': 0  # Will be calculated after all files written
+        }
+
+        output_path = self.backup_path / "backup_summary.json"
+        with open(output_path, 'w') as f:
+            json.dump(summary, f, indent=2)
+
+
+class CleanupManager:
+    """Handle cleanup operations"""
+
+    def __init__(
+        self,
+        mongo_db: Any,
+        redis_conn: Any,
+        qdrant_client: Optional[AsyncQdrantClient],
+        include_wav: bool,
+        delete_users: bool
+    ):
+        self.mongo_db = mongo_db
+        self.redis_conn = redis_conn
+        self.qdrant_client = qdrant_client
+        self.include_wav = include_wav
+        self.delete_users = delete_users
+
+    async def perform_cleanup(self, stats: CleanupStats) -> bool:
+        """Perform all cleanup operations"""
+        try:
+            logger.info("Starting cleanup operations...")
+
+            # MongoDB cleanup
+            await self._cleanup_mongodb(stats)
+
+            # Qdrant cleanup
+            if self.qdrant_client:
+                await self._cleanup_qdrant(stats)
+
+            # Redis cleanup
+            self._cleanup_redis(stats)
+
+            # Legacy WAV cleanup
+            if self.include_wav:
+                self._cleanup_legacy_wav(stats)
+
+            logger.info("Cleanup completed successfully")
+            return True
+
+        except Exception as e:
+            logger.error(f"Cleanup failed: {e}", exc_info=True)
+            return False
+
+    async def _cleanup_mongodb(self, stats: CleanupStats):
+        """Clean MongoDB collections"""
+        logger.info("Cleaning MongoDB collections...")
+
+        # Count before deletion
+        stats.conversations_count = await Conversation.find_all().count()
+        # Use raw MongoDB count to handle malformed documents
+        stats.audio_chunks_count = await self.mongo_db["audio_chunks"].count_documents({})
+        stats.waveforms_count = await WaveformData.find_all().count()
+        stats.chat_sessions_count = await self.mongo_db["chat_sessions"].count_documents({})
+        stats.chat_messages_count = await self.mongo_db["chat_messages"].count_documents({})
+
+        if self.delete_users:
+            stats.users_count = await User.find_all().count()
+
+        # Delete conversations
+        result = await Conversation.find_all().delete()
+        logger.info(f"Deleted {stats.conversations_count} conversations")
+
+        # Delete audio chunks using raw MongoDB to handle malformed documents
+        result = await self.mongo_db["audio_chunks"].delete_many({})
+        logger.info(f"Deleted {stats.audio_chunks_count} audio chunks")
+
+        # Delete waveforms
+        result = await WaveformData.find_all().delete()
+        logger.info(f"Deleted {stats.waveforms_count} waveforms")
+
+        # Delete chat sessions
+        result = await self.mongo_db["chat_sessions"].delete_many({})
+        logger.info(f"Deleted {stats.chat_sessions_count} chat sessions")
+
+        # Delete chat messages
+        result = await self.mongo_db["chat_messages"].delete_many({})
+        logger.info(f"Deleted {stats.chat_messages_count} chat messages")
+
+        # Delete users if requested
+        if self.delete_users:
+            result = await User.find_all().delete()
+            logger.info(f"DANGEROUS: Deleted {stats.users_count} users")
+
+    async def _cleanup_qdrant(self, stats: CleanupStats):
+        """Clean Qdrant vector store"""
+        logger.info("Cleaning Qdrant memories...")
+
+        try:
+            collection_name = get_qdrant_collection_name()
+
+            # Check if collection exists
+            collections = await self.qdrant_client.get_collections()
+            collection_exists = any(
+                col.name == collection_name
+                for col in collections.collections
+            )
+
+            if not collection_exists:
+                logger.info("Memories collection does not exist, skipping cleanup")
+                return
+
+            # Get count before deletion
+            collection_info = await self.qdrant_client.get_collection(collection_name)
+            stats.memories_count = collection_info.points_count
+
+            # Delete and recreate collection
+            await self.qdrant_client.delete_collection(collection_name)
+            logger.info(f"Deleted memories collection ({stats.memories_count} vectors)")
+
+            # Recreate with default configuration
+            await self.qdrant_client.create_collection(
+                collection_name=collection_name,
+                vectors_config=VectorParams(size=1536, distance=Distance.COSINE)
+            )
+            logger.info("Recreated memories collection")
+
+        except Exception as e:
+            logger.warning(f"Failed to clean Qdrant: {e}")
+
+    def _cleanup_redis(self, stats: CleanupStats):
+        """Clean Redis job queues"""
+        logger.info("Cleaning Redis job queues...")
+
+        queue_names = ["transcription", "memory", "audio", "default"]
+        successful_jobs = 0
+        failed_jobs = 0
+        failed_queues = []
+
+        for queue_name in queue_names:
+            job_count = 0  # Initialize to 0 in case counting fails
+            try:
+                queue = Queue(queue_name, connection=self.redis_conn)
+
+                # Count jobs
+                job_count = (
+                    len(queue) +
+                    len(queue.started_job_registry) +
+                    len(queue.finished_job_registry) +
+                    len(queue.failed_job_registry) +
+                    len(queue.canceled_job_registry) +
+                    len(queue.deferred_job_registry) +
+                    len(queue.scheduled_job_registry)
+                )
+
+                # Clear queue and registries
+                queue.empty()
+
+                # Clear job registries (they don't have clear() method in all RQ versions)
+                # So we manually remove all job IDs
+                for job_id in queue.started_job_registry.get_job_ids():
+                    queue.started_job_registry.remove(job_id)
+                for job_id in queue.finished_job_registry.get_job_ids():
+                    queue.finished_job_registry.remove(job_id)
+                for job_id in queue.failed_job_registry.get_job_ids():
+                    queue.failed_job_registry.remove(job_id)
+                for job_id in queue.canceled_job_registry.get_job_ids():
+                    queue.canceled_job_registry.remove(job_id)
+                for job_id in queue.deferred_job_registry.get_job_ids():
+                    queue.deferred_job_registry.remove(job_id)
+                for job_id in queue.scheduled_job_registry.get_job_ids():
+                    queue.scheduled_job_registry.remove(job_id)
+
+                # Only count as successful if cleanup completed without exception
+                successful_jobs += job_count
+                logger.info(f"Cleared {queue_name} queue ({job_count} jobs)")
+
+            except Exception as e:
+                logger.error(f"Failed to clean {queue_name} queue: {e}", exc_info=True)
+                # job_count might be 0 if counting failed, or partial count if cleanup failed
+                failed_jobs += job_count
+                failed_queues.append(queue_name)
+                # Continue processing remaining queues
+
+        stats.redis_jobs_count = successful_jobs
+        if failed_queues:
+            logger.warning(
+                f"Cleared {successful_jobs} Redis jobs, failed to clear {failed_jobs} jobs from queues: {', '.join(failed_queues)}"
+            )
+        else:
+            logger.info(f"Cleared total of {successful_jobs} Redis jobs")
+
+    def _cleanup_legacy_wav(self, stats: CleanupStats):
+        """Clean legacy WAV files"""
+        logger.info("Cleaning legacy WAV files...")
+
+        try:
+            wav_dir = Path("/app/data/audio_chunks")
+
+            if not wav_dir.exists():
+                logger.info("Legacy WAV directory does not exist, skipping")
+                return
+
+            wav_files = list(wav_dir.glob("*.wav"))
+            stats.legacy_wav_count = len(wav_files)
+
+            for wav_file in wav_files:
+                wav_file.unlink()
+
+            logger.info(f"Deleted {stats.legacy_wav_count} legacy WAV files")
+
+        except Exception as e:
+            logger.warning(f"Failed to clean legacy WAV files: {e}")
+
+
+async def get_current_stats(
+    mongo_db: Any,
+    redis_conn: Any,
+    qdrant_client: Optional[AsyncQdrantClient]
+) -> CleanupStats:
+    """Get current statistics before cleanup"""
+    stats = CleanupStats()
+
+    # MongoDB counts
+    stats.conversations_count = await Conversation.find_all().count()
+    # Use raw MongoDB count to handle malformed documents
+    stats.audio_chunks_count = await mongo_db["audio_chunks"].count_documents({})
+    stats.waveforms_count = await WaveformData.find_all().count()
+    stats.chat_sessions_count = await mongo_db["chat_sessions"].count_documents({})
+    stats.chat_messages_count = await mongo_db["chat_messages"].count_documents({})
+    stats.users_count = await User.find_all().count()
+
+    # Qdrant count
+    if qdrant_client:
+        try:
+            collection_name = get_qdrant_collection_name()
+            collection_info = await qdrant_client.get_collection(collection_name)
+            stats.memories_count = collection_info.points_count
+        except Exception:
+            stats.memories_count = 0
+
+    # Redis count
+    try:
+        queue_names = ["transcription", "memory", "audio", "default"]
+        total_jobs = 0
+        for queue_name in queue_names:
+            queue = Queue(queue_name, connection=redis_conn)
+            total_jobs += (
+                len(queue) +
+                len(queue.started_job_registry) +
+                len(queue.finished_job_registry) +
+                len(queue.failed_job_registry) +
+                len(queue.canceled_job_registry) +
+                len(queue.deferred_job_registry) +
+                len(queue.scheduled_job_registry)
+            )
+        stats.redis_jobs_count = total_jobs
+    except Exception:
+        stats.redis_jobs_count = 0
+
+    # Legacy WAV count
+    wav_dir = Path("/app/data/audio_chunks")
+    if wav_dir.exists():
+        stats.legacy_wav_count = len(list(wav_dir.glob("*.wav")))
+
+    return stats
+
+
+def print_stats(stats: CleanupStats, title: str = "Current State"):
+    """Print statistics in a formatted way"""
+    print(f"\n{'='*60}")
+    print(f"{title:^60}")
+    print(f"{'='*60}")
+    print(f"Conversations:        {stats.conversations_count:>10}")
+    print(f"Audio Chunks:         {stats.audio_chunks_count:>10}")
+    print(f"Waveforms:            {stats.waveforms_count:>10}")
+    print(f"Chat Sessions:        {stats.chat_sessions_count:>10}")
+    print(f"Chat Messages:        {stats.chat_messages_count:>10}")
+    print(f"Memories (Qdrant):    {stats.memories_count:>10}")
+    print(f"Redis Jobs:           {stats.redis_jobs_count:>10}")
+    print(f"Legacy WAV Files:     {stats.legacy_wav_count:>10}")
+    print(f"Users:                {stats.users_count:>10}")
+    if stats.backup_path:
+        print(f"\nBackup Location:      {stats.backup_path}")
+        if stats.backup_size_bytes > 0:
+            size_mb = stats.backup_size_bytes / (1024**2)
+            print(f"Backup Size:          {size_mb:>10.2f} MB")
+    print(f"{'='*60}\n")
+
+
+def confirm_action(message: str) -> bool:
+    """Ask for user confirmation"""
+    response = input(f"{message} (yes/no): ").strip().lower()
+    return response == 'yes'
+
+
+async def main():
+    parser = argparse.ArgumentParser(
+        description='Clean Chronicle backend state with optional backup',
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  # Dry-run to see what would be deleted
+  python cleanup_state.py --dry-run
+
+  # Safe cleanup with metadata backup
+  python cleanup_state.py --backup
+
+  # Full backup including audio export
+  python cleanup_state.py --backup --export-audio
+
+  # Automated cleanup without confirmation
+  python cleanup_state.py --backup --force
+        """
+    )
+
+    parser.add_argument(
+        '--backup',
+        action='store_true',
+        help='Create backup before cleaning (metadata only by default)'
+    )
+    parser.add_argument(
+        '--export-audio',
+        action='store_true',
+        help='Include audio WAV export in backup (can be large, requires --backup)'
+    )
+    parser.add_argument(
+        '--include-wav',
+        action='store_true',
+        help='Include legacy WAV file cleanup (backward compat)'
+    )
+    parser.add_argument(
+        '--dry-run',
+        action='store_true',
+        help='Show what would be cleaned without deleting'
+    )
+    parser.add_argument(
+        '--force',
+        action='store_true',
+        help='Skip confirmation prompt'
+    )
+    parser.add_argument(
+        '--backup-dir',
+        type=str,
+        default='/app/data/backups',
+        help='Backup directory location (default: /app/data/backups)'
+    )
+    parser.add_argument(
+        '--delete-users',
+        action='store_true',
+        help='DANGEROUS: Also delete user accounts'
+    )
+
+    args = parser.parse_args()
+
+    # Validate arguments
+    if args.export_audio and not args.backup:
+        logger.error("--export-audio requires --backup")
+        sys.exit(1)
+
+    # Initialize connections
+    logger.info("Connecting to services...")
+
+    # MongoDB
+    mongodb_uri = os.getenv("MONGODB_URI", "mongodb://mongo:27017")
+    mongodb_database = os.getenv("MONGODB_DATABASE", "chronicle")
+    mongo_client = AsyncIOMotorClient(mongodb_uri)
+    mongo_db = mongo_client[mongodb_database]
+
+    # Initialize Beanie
+    await init_beanie(
+        database=mongo_db,
+        document_models=[Conversation, AudioChunkDocument, WaveformData, User]
+    )
+
+    # Redis
+    redis_url = os.getenv("REDIS_URL", "redis://redis:6379/0")
+    redis_conn = redis.from_url(redis_url)
+
+    # Qdrant
+    qdrant_client = None
+    try:
+        qdrant_host = os.getenv("QDRANT_BASE_URL", "qdrant")
+        qdrant_port = int(os.getenv("QDRANT_PORT", "6333"))
+        qdrant_client = AsyncQdrantClient(host=qdrant_host, port=qdrant_port)
+    except Exception as e:
+        logger.warning(f"Qdrant not available: {e}")
+
+    # Get current statistics
+    logger.info("Gathering current statistics...")
+    stats = await get_current_stats(mongo_db, redis_conn, qdrant_client)
+
+    # Print current state
+    print_stats(stats, "Current Backend State")
+
+    # Dry-run mode
+    if args.dry_run:
+        print("\n[DRY-RUN MODE] No actual changes will be made\n")
+        if args.backup:
+            print("Would create backup at:", Path(args.backup_dir) / f"backup_{datetime.now().strftime('%Y%m%d_%H%M%S')}")
+            if args.export_audio:
+                print("Would include audio WAV export (1-minute chunks)")
+        print("\nWould delete:")
+        print(f"  - {stats.conversations_count} conversations")
+        print(f"  - {stats.audio_chunks_count} audio chunks")
+        print(f"  - {stats.waveforms_count} waveforms")
+        print(f"  - {stats.chat_sessions_count} chat sessions")
+        print(f"  - {stats.chat_messages_count} chat messages")
+        print(f"  - {stats.memories_count} memories")
+        print(f"  - {stats.redis_jobs_count} Redis jobs")
+        if args.include_wav:
+            print(f"  - {stats.legacy_wav_count} legacy WAV files")
+        if args.delete_users:
+            print(f"  - {stats.users_count} users (DANGEROUS)")
+        else:
+            print(f"  - Users will be preserved ({stats.users_count} users)")
+        print("\nRun without --dry-run to perform actual cleanup")
+        return
+
+    # Confirmation prompt
+    if not args.force:
+        print("\n⚠️  WARNING: This will permanently delete data!")
+        print(f"  - {stats.conversations_count} conversations")
+        print(f"  - {stats.audio_chunks_count} audio chunks")
+        print(f"  - {stats.waveforms_count} waveforms")
+        print(f"  - {stats.chat_sessions_count} chat sessions")
+        print(f"  - {stats.chat_messages_count} chat messages")
+        print(f"  - {stats.memories_count} memories")
+        print(f"  - {stats.redis_jobs_count} Redis jobs")
+        if args.include_wav:
+            print(f"  - {stats.legacy_wav_count} legacy WAV files")
+        if args.delete_users:
+            print(f"  - {stats.users_count} users (DANGEROUS)")
+        else:
+            print(f"  - Users will be preserved ({stats.users_count} users)")
+
+        if args.backup:
+            print(f"\n✓ Backup will be created at: {args.backup_dir}")
+            if args.export_audio:
+                print("✓ Audio will be exported as WAV files")
+        else:
+            print("\n✗ No backup will be created")
+
+        print()
+        if not confirm_action("Are you sure you want to proceed?"):
+            logger.info("Cleanup cancelled by user")
+            return
+
+    # Create backup if requested
+    if args.backup:
+        backup_manager = BackupManager(args.backup_dir, args.export_audio, mongo_db)
+        success = await backup_manager.create_backup(qdrant_client, stats)
+
+        if not success:
+            logger.error("Backup failed, aborting cleanup")
+            return
+
+        print_stats(stats, "Backup Created")
+
+    # Perform cleanup
+    cleanup_manager = CleanupManager(
+        mongo_db,
+        redis_conn,
+        qdrant_client,
+        args.include_wav,
+        args.delete_users
+    )
+
+    success = await cleanup_manager.perform_cleanup(stats)
+
+    if not success:
+        logger.error("Cleanup failed")
+        return
+
+    # Verify cleanup
+    logger.info("Verifying cleanup...")
+    final_stats = await get_current_stats(mongo_db, redis_conn, qdrant_client)
+    print_stats(final_stats, "Backend State After Cleanup")
+
+    logger.info("✓ Cleanup completed successfully!")
+
+    if args.backup:
+        logger.info(f"✓ Backup saved to: {stats.backup_path}")
+
+
+if __name__ == "__main__":
+    try:
+        asyncio.run(main())
+    except KeyboardInterrupt:
+        logger.info("\nCleanup interrupted by user")
+        sys.exit(1)
+    except Exception as e:
+        logger.error(f"Fatal error: {e}", exc_info=True)
+        sys.exit(1)
diff --git a/backends/advanced/start-k8s.sh b/backends/advanced/start-k8s.sh
index a2f3d817..847e3a6e 100755
--- a/backends/advanced/start-k8s.sh
+++ b/backends/advanced/start-k8s.sh
@@ -79,19 +79,20 @@ sleep 1
 
 # Function to start all workers
 start_workers() {
-    # NEW WORKERS - Redis Streams multi-provider architecture
-    # Single worker ensures sequential processing of audio chunks (matching start-workers.sh)
-    echo "🎵 Starting audio stream Deepgram worker (1 worker for sequential processing)..."
-    if python3 -m advanced_omi_backend.workers.audio_stream_deepgram_worker &
+    # NEW WORKERS - Registry-driven streaming transcription architecture
+    # Single worker ensures sequential processing of audio chunks (matching worker_orchestrator.py)
+    # Uses config.yml for provider selection (Deepgram, Parakeet, etc.)
+    echo "🎵 Starting streaming transcription worker (registry-driven provider from config.yml)..."
+    if python3 -m advanced_omi_backend.workers.audio_stream_worker &
     then
         AUDIO_WORKER_1_PID=$!
-        echo "  ✅ Deepgram stream worker started with PID: $AUDIO_WORKER_1_PID"
+        echo "  ✅ Streaming transcription worker started with PID: $AUDIO_WORKER_1_PID"
     else
-        echo "  ❌ Failed to start Deepgram stream worker"
+        echo "  ❌ Failed to start streaming transcription worker"
         exit 1
     fi
 
-    # Start 3 RQ workers listening to ALL queues (matching start-workers.sh)
+    # Start 3 RQ workers listening to ALL queues (matching worker_orchestrator.py)
     echo "🔧 Starting RQ workers (3 workers, all queues: transcription, memory, default)..."
     if python3 -m advanced_omi_backend.workers.rq_worker_entry transcription memory default &
     then
@@ -123,7 +124,7 @@ start_workers() {
         exit 1
     fi
 
-    # Start 1 dedicated audio persistence worker (matching start-workers.sh)
+    # Start 1 dedicated audio persistence worker (matching worker_orchestrator.py)
     echo "💾 Starting audio persistence worker (1 worker for audio queue)..."
     if python3 -m advanced_omi_backend.workers.rq_worker_entry audio &
     then
diff --git a/backends/advanced/start-workers.sh b/backends/advanced/start-workers.sh
deleted file mode 100755
index 3fea5a39..00000000
--- a/backends/advanced/start-workers.sh
+++ /dev/null
@@ -1,204 +0,0 @@
-#!/bin/bash
-# Unified worker startup script
-# Starts all workers in a single container for efficiency
-
-set -e
-
-echo "🚀 Starting Chronicle Workers..."
-
-# Clean up any stale worker registrations from previous runs
-echo "🧹 Cleaning up stale worker registrations from Redis..."
-# Use RQ's cleanup command to remove dead workers
-uv run python -c "
-from rq import Worker
-from redis import Redis
-import os
-import socket
-
-redis_url = os.getenv('REDIS_URL', 'redis://localhost:6379/0')
-redis_conn = Redis.from_url(redis_url)
-hostname = socket.gethostname()
-
-# Only clean up workers from THIS hostname (pod)
-workers = Worker.all(connection=redis_conn)
-cleaned = 0
-for worker in workers:
-    if worker.hostname == hostname:
-        worker.register_death()
-        cleaned += 1
-print(f'Cleaned up {cleaned} stale workers from {hostname}')
-" 2>/dev/null || echo "No stale workers to clean"
-
-sleep 1
-
-# Function to start all workers
-start_workers() {
-    echo "🔧 Starting RQ workers (6 workers, all queues: transcription, memory, default)..."
-    uv run python -m advanced_omi_backend.workers.rq_worker_entry transcription memory default &
-    RQ_WORKER_1_PID=$!
-    uv run python -m advanced_omi_backend.workers.rq_worker_entry transcription memory default &
-    RQ_WORKER_2_PID=$!
-    uv run python -m advanced_omi_backend.workers.rq_worker_entry transcription memory default &
-    RQ_WORKER_3_PID=$!
-    uv run python -m advanced_omi_backend.workers.rq_worker_entry transcription memory default &
-    RQ_WORKER_4_PID=$!
-    uv run python -m advanced_omi_backend.workers.rq_worker_entry transcription memory default &
-    RQ_WORKER_5_PID=$!
-    uv run python -m advanced_omi_backend.workers.rq_worker_entry transcription memory default &
-    RQ_WORKER_6_PID=$!
-
-    echo "💾 Starting audio persistence worker (1 worker for audio queue)..."
-    uv run python -m advanced_omi_backend.workers.rq_worker_entry audio &
-    AUDIO_PERSISTENCE_WORKER_PID=$!
-
-    # Determine which STT provider to use from config.yml
-    echo "📋 Checking config.yml for default STT provider..."
-    DEFAULT_STT=$(uv run python -c "
-from advanced_omi_backend.model_registry import get_models_registry
-registry = get_models_registry()
-if registry and registry.defaults:
-    stt_model = registry.get_default('stt')
-    if stt_model:
-        print(stt_model.model_provider or '')
-" 2>/dev/null || echo "")
-
-    echo "📋 Configured STT provider: ${DEFAULT_STT:-none}"
-
-    # Only start Deepgram worker if configured as default STT
-    if [[ "$DEFAULT_STT" == "deepgram" ]] && [ -n "$DEEPGRAM_API_KEY" ]; then
-        echo "🎵 Starting audio stream Deepgram worker (1 worker for sequential processing)..."
-        uv run python -m advanced_omi_backend.workers.audio_stream_deepgram_worker &
-        AUDIO_STREAM_DEEPGRAM_WORKER_PID=$!
-    else
-        echo "⏭️  Skipping Deepgram stream worker (not configured as default STT or API key missing)"
-        AUDIO_STREAM_DEEPGRAM_WORKER_PID=""
-    fi
-
-    # Only start Parakeet worker if configured as default STT
-    if [[ "$DEFAULT_STT" == "parakeet" ]]; then
-        echo "🎵 Starting audio stream Parakeet worker (1 worker for sequential processing)..."
-        uv run python -m advanced_omi_backend.workers.audio_stream_parakeet_worker &
-        AUDIO_STREAM_PARAKEET_WORKER_PID=$!
-    else
-        echo "⏭️  Skipping Parakeet stream worker (not configured as default STT)"
-        AUDIO_STREAM_PARAKEET_WORKER_PID=""
-    fi
-
-    echo "✅ All workers started:"
-    echo "  - RQ worker 1: PID $RQ_WORKER_1_PID (transcription, memory, default)"
-    echo "  - RQ worker 2: PID $RQ_WORKER_2_PID (transcription, memory, default)"
-    echo "  - RQ worker 3: PID $RQ_WORKER_3_PID (transcription, memory, default)"
-    echo "  - RQ worker 4: PID $RQ_WORKER_4_PID (transcription, memory, default)"
-    echo "  - RQ worker 5: PID $RQ_WORKER_5_PID (transcription, memory, default)"
-    echo "  - RQ worker 6: PID $RQ_WORKER_6_PID (transcription, memory, default)"
-    echo "  - Audio persistence worker: PID $AUDIO_PERSISTENCE_WORKER_PID (audio queue - file rotation)"
-    [ -n "$AUDIO_STREAM_DEEPGRAM_WORKER_PID" ] && echo "  - Audio stream Deepgram worker: PID $AUDIO_STREAM_DEEPGRAM_WORKER_PID (Redis Streams consumer)" || true
-    [ -n "$AUDIO_STREAM_PARAKEET_WORKER_PID" ] && echo "  - Audio stream Parakeet worker: PID $AUDIO_STREAM_PARAKEET_WORKER_PID (Redis Streams consumer)" || true
-}
-
-# Function to check worker registration health
-check_worker_health() {
-    WORKER_COUNT=$(uv run python -c "
-from rq import Worker
-from redis import Redis
-import os
-import sys
-
-try:
-    redis_url = os.getenv('REDIS_URL', 'redis://localhost:6379/0')
-    r = Redis.from_url(redis_url)
-    workers = Worker.all(connection=r)
-    print(len(workers))
-except Exception as e:
-    print('0', file=sys.stderr)
-    sys.exit(1)
-" 2>/dev/null || echo "0")
-    echo "$WORKER_COUNT"
-}
-
-# Self-healing monitoring function
-monitor_worker_health() {
-    local CHECK_INTERVAL=10  # Check every 10 seconds
-    local MIN_WORKERS=6      # Expect at least 6 RQ workers
-
-    echo "🩺 Starting self-healing monitor (check interval: ${CHECK_INTERVAL}s, min workers: ${MIN_WORKERS})"
-
-    while true; do
-        sleep $CHECK_INTERVAL
-
-        WORKER_COUNT=$(check_worker_health)
-
-        if [ "$WORKER_COUNT" -lt "$MIN_WORKERS" ]; then
-            echo "⚠️ Self-healing: Only $WORKER_COUNT workers registered (expected >= $MIN_WORKERS)"
-            echo "🔧 Self-healing: Restarting all workers to restore registration..."
-
-            # Kill all workers
-            kill $RQ_WORKER_1_PID $RQ_WORKER_2_PID $RQ_WORKER_3_PID $RQ_WORKER_4_PID $RQ_WORKER_5_PID $RQ_WORKER_6_PID $AUDIO_PERSISTENCE_WORKER_PID 2>/dev/null || true
-            [ -n "$AUDIO_STREAM_DEEPGRAM_WORKER_PID" ] && kill $AUDIO_STREAM_DEEPGRAM_WORKER_PID 2>/dev/null || true
-            [ -n "$AUDIO_STREAM_PARAKEET_WORKER_PID" ] && kill $AUDIO_STREAM_PARAKEET_WORKER_PID 2>/dev/null || true
-            wait 2>/dev/null || true
-
-            # Restart workers
-            start_workers
-
-            # Verify recovery
-            sleep 3
-            NEW_WORKER_COUNT=$(check_worker_health)
-            echo "✅ Self-healing: Workers restarted - new count: $NEW_WORKER_COUNT"
-        fi
-    done
-}
-
-# Function to handle shutdown
-shutdown() {
-    echo "🛑 Shutting down workers..."
-    kill $MONITOR_PID 2>/dev/null || true
-    kill $RQ_WORKER_1_PID 2>/dev/null || true
-    kill $RQ_WORKER_2_PID 2>/dev/null || true
-    kill $RQ_WORKER_3_PID 2>/dev/null || true
-    kill $RQ_WORKER_4_PID 2>/dev/null || true
-    kill $RQ_WORKER_5_PID 2>/dev/null || true
-    kill $RQ_WORKER_6_PID 2>/dev/null || true
-    kill $AUDIO_PERSISTENCE_WORKER_PID 2>/dev/null || true
-    [ -n "$AUDIO_STREAM_DEEPGRAM_WORKER_PID" ] && kill $AUDIO_STREAM_DEEPGRAM_WORKER_PID 2>/dev/null || true
-    [ -n "$AUDIO_STREAM_PARAKEET_WORKER_PID" ] && kill $AUDIO_STREAM_PARAKEET_WORKER_PID 2>/dev/null || true
-    wait
-    echo "✅ All workers stopped"
-    exit 0
-}
-
-# Set up signal handlers
-trap shutdown SIGTERM SIGINT
-
-# Configure Python logging for RQ workers
-export PYTHONUNBUFFERED=1
-
-# Start all workers
-start_workers
-
-# Start self-healing monitor in background
-monitor_worker_health &
-MONITOR_PID=$!
-echo "🩺 Self-healing monitor started: PID $MONITOR_PID"
-
-# Keep the script running and let the self-healing monitor handle worker failures
-# Don't use wait -n (fail-fast on first worker exit) - this kills all workers when one fails
-# Instead, wait for the monitor process or explicit shutdown signal
-echo "⏳ Workers running - self-healing monitor will restart failed workers automatically"
-wait $MONITOR_PID
-
-# If monitor exits (should only happen on SIGTERM/SIGINT), shut down gracefully
-echo "🛑 Monitor exited, shutting down all workers..."
-kill $RQ_WORKER_1_PID 2>/dev/null || true
-kill $RQ_WORKER_2_PID 2>/dev/null || true
-kill $RQ_WORKER_3_PID 2>/dev/null || true
-kill $RQ_WORKER_4_PID 2>/dev/null || true
-kill $RQ_WORKER_5_PID 2>/dev/null || true
-kill $RQ_WORKER_6_PID 2>/dev/null || true
-kill $AUDIO_PERSISTENCE_WORKER_PID 2>/dev/null || true
-[ -n "$AUDIO_STREAM_DEEPGRAM_WORKER_PID" ] && kill $AUDIO_STREAM_DEEPGRAM_WORKER_PID 2>/dev/null || true
-[ -n "$AUDIO_STREAM_PARAKEET_WORKER_PID" ] && kill $AUDIO_STREAM_PARAKEET_WORKER_PID 2>/dev/null || true
-wait
-
-echo "✅ All workers stopped gracefully"
-exit 0
diff --git a/backends/advanced/start.sh b/backends/advanced/start.sh
index 5cc79635..feb8d57a 100755
--- a/backends/advanced/start.sh
+++ b/backends/advanced/start.sh
@@ -2,9 +2,17 @@
 
 # Chronicle Backend Startup Script
 # Starts both the FastAPI backend and RQ workers
+# Usage: ./start.sh [--test]
 
 set -e
 
+# Check for test mode flag
+TEST_MODE=false
+if [[ "$1" == "--test" ]]; then
+    TEST_MODE=true
+    echo "🧪 Running in TEST mode (with test dependencies)"
+fi
+
 echo "🚀 Starting Chronicle Backend..."
 
 # Function to handle shutdown
@@ -53,7 +61,12 @@ sleep 2
 
 # Start the main FastAPI application
 echo "🌐 Starting FastAPI backend..."
-uv run --extra deepgram python3 src/advanced_omi_backend/main.py &
+# Use --group test in test mode
+if [ "$TEST_MODE" = true ]; then
+    uv run --extra deepgram --group test python3 src/advanced_omi_backend/main.py &
+else
+    uv run --extra deepgram python3 src/advanced_omi_backend/main.py &
+fi
 BACKEND_PID=$!
 
 # Wait for any process to exit
diff --git a/backends/advanced/tests/test_audio_persistence_mongodb.py b/backends/advanced/tests/test_audio_persistence_mongodb.py
new file mode 100644
index 00000000..30b5212e
--- /dev/null
+++ b/backends/advanced/tests/test_audio_persistence_mongodb.py
@@ -0,0 +1,431 @@
+"""
+Integration tests for MongoDB-based audio chunk persistence.
+
+These tests require a running MongoDB instance and test the complete
+audio chunk pipeline: encoding, storage, retrieval, and reconstruction.
+
+Run with: pytest tests/test_audio_persistence_mongodb.py --mongodb-url=mongodb://localhost:27017
+"""
+
+import asyncio
+import io
+import os
+import struct
+import wave
+from pathlib import Path
+
+import pytest
+from bson import Binary
+from motor.motor_asyncio import AsyncIOMotorClient
+from beanie import init_beanie
+
+from advanced_omi_backend.models.audio_chunk import AudioChunkDocument
+from advanced_omi_backend.models.conversation import Conversation
+from advanced_omi_backend.utils.audio_chunk_utils import (
+    encode_pcm_to_opus,
+    decode_opus_to_pcm,
+    build_wav_from_pcm,
+    retrieve_audio_chunks,
+    concatenate_chunks_to_pcm,
+    reconstruct_wav_from_conversation,
+    convert_wav_to_chunks,
+    wait_for_audio_chunks,
+)
+
+
+# Test configuration
+
+def get_mongodb_url():
+    """Get MongoDB URL from environment or pytest args."""
+    return os.getenv("MONGODB_URI", "mongodb://localhost:27018")
+
+
+def get_test_db_name():
+    """Get test database name."""
+    return os.getenv("TEST_DB_NAME", "test_audio_chunks_db")
+
+
+@pytest.fixture(scope="session")
+def event_loop():
+    """Create event loop for async tests."""
+    loop = asyncio.get_event_loop_policy().new_event_loop()
+    yield loop
+    loop.close()
+
+
+@pytest.fixture(scope="session")
+async def mongodb_client():
+    """Create MongoDB client for tests."""
+    client = AsyncIOMotorClient(get_mongodb_url())
+    yield client
+    client.close()
+
+
+@pytest.fixture(scope="session")
+async def init_db(mongodb_client):
+    """Initialize Beanie with test database."""
+    db = mongodb_client[get_test_db_name()]
+
+    await init_beanie(
+        database=db,
+        document_models=[AudioChunkDocument, Conversation]
+    )
+
+    yield db
+
+    # Cleanup: Drop test database
+    await mongodb_client.drop_database(get_test_db_name())
+
+
+@pytest.fixture
+async def clean_db(init_db):
+    """Clean database before each test."""
+    # Drop all collections
+    await AudioChunkDocument.delete_all()
+    await Conversation.delete_all()
+    yield
+
+
+# Test data generators
+
+def generate_pcm_data(duration_seconds=1, sample_rate=16000):
+    """Generate sample PCM audio data."""
+    num_samples = int(sample_rate * duration_seconds)
+    pcm_bytes = b""
+
+    for i in range(num_samples):
+        # Simple pattern (not actual audio, just valid PCM structure)
+        value = int(32767 * (i % 100) / 100)
+        pcm_bytes += struct.pack("<h", value)
+
+    return pcm_bytes
+
+
+def create_wav_file(pcm_data, output_path, sample_rate=16000):
+    """Create a WAV file from PCM data."""
+    with wave.open(str(output_path), "wb") as wav:
+        wav.setnchannels(1)
+        wav.setsampwidth(2)
+        wav.setframerate(sample_rate)
+        wav.writeframes(pcm_data)
+
+
+# Integration Tests
+
+@pytest.mark.asyncio
+class TestOpusCodecIntegration:
+    """Test Opus encoding/decoding with real data."""
+
+    async def test_encode_decode_roundtrip(self, clean_db):
+        """Test complete encode-decode cycle preserves data structure."""
+        # Generate 1 second of PCM
+        pcm_data = generate_pcm_data(duration_seconds=1)
+
+        # Encode to Opus
+        opus_data = await encode_pcm_to_opus(pcm_data)
+
+        # Verify compression
+        assert len(opus_data) < len(pcm_data) * 0.2  # At least 80% compression
+
+        # Decode back to PCM
+        decoded_pcm = await decode_opus_to_pcm(opus_data)
+
+        # Verify sizes match (allow small variance)
+        assert abs(len(decoded_pcm) - len(pcm_data)) < 1000
+
+    async def test_build_wav_from_pcm(self, clean_db):
+        """Test WAV file construction."""
+        pcm_data = generate_pcm_data(duration_seconds=1)
+
+        wav_data = await build_wav_from_pcm(pcm_data)
+
+        # Verify WAV structure
+        assert wav_data[:4] == b"RIFF"
+        assert b"WAVE" in wav_data
+
+        # Verify readable by wave module
+        wav_buffer = io.BytesIO(wav_data)
+        with wave.open(wav_buffer, "rb") as wav:
+            assert wav.getnchannels() == 1
+            assert wav.getframerate() == 16000
+            frames = wav.readframes(wav.getnframes())
+            assert len(frames) == len(pcm_data)
+
+
+@pytest.mark.asyncio
+class TestMongoDBChunkStorage:
+    """Test MongoDB chunk storage and retrieval."""
+
+    async def test_store_and_retrieve_single_chunk(self, clean_db):
+        """Test storing and retrieving a single audio chunk."""
+        conversation_id = "test-conv-001"
+        pcm_data = generate_pcm_data(duration_seconds=10)
+        opus_data = await encode_pcm_to_opus(pcm_data)
+
+        # Create and save chunk
+        chunk = AudioChunkDocument(
+            conversation_id=conversation_id,
+            chunk_index=0,
+            audio_data=Binary(opus_data),
+            original_size=len(pcm_data),
+            compressed_size=len(opus_data),
+            start_time=0.0,
+            end_time=10.0,
+            duration=10.0,
+            sample_rate=16000,
+            channels=1,
+        )
+        await chunk.insert()
+
+        # Retrieve chunk
+        chunks = await retrieve_audio_chunks(conversation_id)
+
+        assert len(chunks) == 1
+        assert chunks[0].conversation_id == conversation_id
+        assert chunks[0].chunk_index == 0
+        assert len(chunks[0].audio_data) == len(opus_data)
+
+    async def test_retrieve_multiple_chunks_in_order(self, clean_db):
+        """Test retrieving multiple chunks in correct order."""
+        conversation_id = "test-conv-002"
+        num_chunks = 5
+
+        # Create chunks in reverse order
+        for i in range(num_chunks - 1, -1, -1):
+            pcm_data = generate_pcm_data(duration_seconds=10)
+            opus_data = await encode_pcm_to_opus(pcm_data)
+
+            chunk = AudioChunkDocument(
+                conversation_id=conversation_id,
+                chunk_index=i,
+                audio_data=Binary(opus_data),
+                original_size=len(pcm_data),
+                compressed_size=len(opus_data),
+                start_time=float(i * 10),
+                end_time=float((i + 1) * 10),
+                duration=10.0,
+                sample_rate=16000,
+                channels=1,
+            )
+            await chunk.insert()
+
+        # Retrieve all chunks
+        chunks = await retrieve_audio_chunks(conversation_id)
+
+        assert len(chunks) == num_chunks
+        # Verify sorted by chunk_index
+        for i, chunk in enumerate(chunks):
+            assert chunk.chunk_index == i
+
+    async def test_retrieve_chunks_with_pagination(self, clean_db):
+        """Test chunk retrieval with start_index and limit."""
+        conversation_id = "test-conv-003"
+
+        # Create 10 chunks
+        for i in range(10):
+            pcm_data = generate_pcm_data(duration_seconds=10)
+            opus_data = await encode_pcm_to_opus(pcm_data)
+
+            chunk = AudioChunkDocument(
+                conversation_id=conversation_id,
+                chunk_index=i,
+                audio_data=Binary(opus_data),
+                original_size=len(pcm_data),
+                compressed_size=len(opus_data),
+                start_time=float(i * 10),
+                end_time=float((i + 1) * 10),
+                duration=10.0,
+            )
+            await chunk.insert()
+
+        # Retrieve chunks 5-7 (3 chunks starting at index 5)
+        chunks = await retrieve_audio_chunks(
+            conversation_id,
+            start_index=5,
+            limit=3
+        )
+
+        assert len(chunks) == 3
+        assert chunks[0].chunk_index == 5
+        assert chunks[1].chunk_index == 6
+        assert chunks[2].chunk_index == 7
+
+
+@pytest.mark.asyncio
+class TestWAVReconstruction:
+    """Test complete WAV reconstruction from MongoDB chunks."""
+
+    async def test_reconstruct_wav_from_single_chunk(self, clean_db):
+        """Test reconstructing WAV from a single chunk."""
+        conversation_id = "test-conv-004"
+        pcm_data = generate_pcm_data(duration_seconds=10)
+        opus_data = await encode_pcm_to_opus(pcm_data)
+
+        # Store chunk
+        chunk = AudioChunkDocument(
+            conversation_id=conversation_id,
+            chunk_index=0,
+            audio_data=Binary(opus_data),
+            original_size=len(pcm_data),
+            compressed_size=len(opus_data),
+            start_time=0.0,
+            end_time=10.0,
+            duration=10.0,
+        )
+        await chunk.insert()
+
+        # Reconstruct WAV
+        wav_data = await reconstruct_wav_from_conversation(conversation_id)
+
+        # Verify WAV
+        assert wav_data[:4] == b"RIFF"
+        wav_buffer = io.BytesIO(wav_data)
+        with wave.open(wav_buffer, "rb") as wav:
+            assert wav.getnchannels() == 1
+            assert wav.getframerate() == 16000
+
+    async def test_reconstruct_wav_from_multiple_chunks(self, clean_db):
+        """Test reconstructing WAV from multiple chunks."""
+        conversation_id = "test-conv-005"
+        num_chunks = 3
+
+        # Store 3 chunks (30 seconds total)
+        for i in range(num_chunks):
+            pcm_data = generate_pcm_data(duration_seconds=10)
+            opus_data = await encode_pcm_to_opus(pcm_data)
+
+            chunk = AudioChunkDocument(
+                conversation_id=conversation_id,
+                chunk_index=i,
+                audio_data=Binary(opus_data),
+                original_size=len(pcm_data),
+                compressed_size=len(opus_data),
+                start_time=float(i * 10),
+                end_time=float((i + 1) * 10),
+                duration=10.0,
+            )
+            await chunk.insert()
+
+        # Reconstruct complete WAV
+        wav_data = await reconstruct_wav_from_conversation(conversation_id)
+
+        # Verify WAV contains all chunks
+        wav_buffer = io.BytesIO(wav_data)
+        with wave.open(wav_buffer, "rb") as wav:
+            frames = wav.readframes(wav.getnframes())
+            # Should be approximately 30 seconds worth of data
+            expected_size = 16000 * 2 * 30  # sample_rate * bytes_per_sample * seconds
+            assert abs(len(frames) - expected_size) < 10000  # Allow some variance
+
+    async def test_reconstruct_no_chunks_raises_error(self, clean_db):
+        """Test reconstruction fails when no chunks exist."""
+        with pytest.raises(ValueError, match="No audio chunks found"):
+            await reconstruct_wav_from_conversation("nonexistent-conv")
+
+
+@pytest.mark.asyncio
+class TestWAVConversion:
+    """Test WAV file to MongoDB chunk conversion."""
+
+    async def test_convert_wav_to_chunks(self, clean_db, tmp_path):
+        """Test converting WAV file to MongoDB chunks."""
+        conversation_id = "test-conv-006"
+
+        # Create test WAV file (1 second)
+        pcm_data = generate_pcm_data(duration_seconds=1)
+        wav_path = tmp_path / "test.wav"
+        create_wav_file(pcm_data, wav_path)
+
+        # Create conversation
+        conversation = Conversation(
+            conversation_id=conversation_id,
+            audio_uuid="test-audio-001",
+            user_id="test-user",
+            client_id="test-client"
+        )
+        await conversation.insert()
+
+        # Convert to chunks
+        num_chunks = await convert_wav_to_chunks(conversation_id, wav_path)
+
+        assert num_chunks == 1  # 1 second = 1 chunk (10s chunks)
+
+        # Verify chunks in MongoDB
+        chunks = await retrieve_audio_chunks(conversation_id)
+        assert len(chunks) == 1
+
+        # Verify conversation metadata updated
+        updated_conv = await Conversation.find_one(
+            Conversation.conversation_id == conversation_id
+        )
+        assert updated_conv.audio_chunks_count == 1
+        assert updated_conv.audio_total_duration is not None
+        assert updated_conv.audio_compression_ratio is not None
+
+    async def test_convert_long_wav_creates_multiple_chunks(self, clean_db, tmp_path):
+        """Test converting long WAV creates multiple chunks."""
+        conversation_id = "test-conv-007"
+
+        # Create 25-second WAV file
+        pcm_data = generate_pcm_data(duration_seconds=25)
+        wav_path = tmp_path / "long_test.wav"
+        create_wav_file(pcm_data, wav_path)
+
+        # Create conversation
+        conversation = Conversation(
+            conversation_id=conversation_id,
+            audio_uuid="test-audio-002",
+            user_id="test-user",
+            client_id="test-client"
+        )
+        await conversation.insert()
+
+        # Convert to chunks
+        num_chunks = await convert_wav_to_chunks(conversation_id, wav_path)
+
+        assert num_chunks == 3  # 25 seconds = 3 chunks (0-10s, 10-20s, 20-25s)
+
+        # Verify all chunks stored
+        chunks = await retrieve_audio_chunks(conversation_id)
+        assert len(chunks) == 3
+
+
+@pytest.mark.asyncio
+class TestChunkWaiting:
+    """Test waiting for MongoDB chunks to become available."""
+
+    async def test_wait_for_chunks_immediate_success(self, clean_db):
+        """Test wait succeeds when chunks already exist."""
+        conversation_id = "test-conv-008"
+        pcm_data = generate_pcm_data(duration_seconds=10)
+        opus_data = await encode_pcm_to_opus(pcm_data)
+
+        # Create chunk
+        chunk = AudioChunkDocument(
+            conversation_id=conversation_id,
+            chunk_index=0,
+            audio_data=Binary(opus_data),
+            original_size=len(pcm_data),
+            compressed_size=len(opus_data),
+            start_time=0.0,
+            end_time=10.0,
+            duration=10.0,
+        )
+        await chunk.insert()
+
+        # Wait should succeed immediately
+        result = await wait_for_audio_chunks(conversation_id, max_wait_seconds=5)
+        assert result is True
+
+    async def test_wait_for_chunks_timeout(self, clean_db):
+        """Test wait times out when chunks don't exist."""
+        result = await wait_for_audio_chunks(
+            "nonexistent-conv",
+            max_wait_seconds=1
+        )
+        assert result is False
+
+
+# Run tests
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
diff --git a/backends/advanced/tests/test_conversation_models.py b/backends/advanced/tests/test_conversation_models.py
index e4387c89..c2c27dd0 100644
--- a/backends/advanced/tests/test_conversation_models.py
+++ b/backends/advanced/tests/test_conversation_models.py
@@ -134,7 +134,7 @@ def test_add_transcript_version(self):
             version_id="v2",
             transcript="Updated transcript",
             segments=segments,
-            provider=TranscriptProvider.MISTRAL,
+            provider=TranscriptProvider.PARAKEET,
             set_as_active=False
         )
 
@@ -170,7 +170,7 @@ def test_set_active_versions(self):
         segments2 = [SpeakerSegment(start=0.0, end=5.0, text="Version 2", speaker="Speaker A")]
 
         conversation.add_transcript_version("v1", "Transcript 1", segments1, TranscriptProvider.DEEPGRAM)
-        conversation.add_transcript_version("v2", "Transcript 2", segments2, TranscriptProvider.MISTRAL, set_as_active=False)
+        conversation.add_transcript_version("v2", "Transcript 2", segments2, TranscriptProvider.PARAKEET, set_as_active=False)
 
         # Should be v1 active
         assert conversation.active_transcript_version == "v1"
@@ -213,7 +213,6 @@ def test_provider_enums(self):
         """Test that provider enums work correctly."""
         # Test TranscriptProvider enum
         assert TranscriptProvider.DEEPGRAM == "deepgram"
-        assert TranscriptProvider.MISTRAL == "mistral"
         assert TranscriptProvider.PARAKEET == "parakeet"
 
         # Test MemoryProvider enum
diff --git a/backends/advanced/tests/test_email_service.py b/backends/advanced/tests/test_email_service.py
new file mode 100644
index 00000000..f6990f21
--- /dev/null
+++ b/backends/advanced/tests/test_email_service.py
@@ -0,0 +1,210 @@
+"""
+Unit tests for the SMTP Email Service.
+
+Tests email service initialization, configuration validation, and sending functionality.
+"""
+import pytest
+from unittest.mock import Mock, patch, MagicMock
+from advanced_omi_backend.plugins.email_summarizer.email_service import SMTPEmailService
+
+
+class TestSMTPEmailService:
+    """Test SMTP Email Service."""
+
+    def test_initialization_with_valid_config(self):
+        """Test that service initializes with valid configuration."""
+        config = {
+            'smtp_host': 'smtp.gmail.com',
+            'smtp_port': 587,
+            'smtp_username': 'test@example.com',
+            'smtp_password': 'test_password',
+            'smtp_use_tls': True,
+            'from_email': 'noreply@chronicle.ai',
+            'from_name': 'Chronicle AI',
+        }
+
+        service = SMTPEmailService(config)
+
+        assert service.host == 'smtp.gmail.com'
+        assert service.port == 587
+        assert service.username == 'test@example.com'
+        assert service.password == 'test_password'
+        assert service.use_tls is True
+        assert service.from_email == 'noreply@chronicle.ai'
+        assert service.from_name == 'Chronicle AI'
+
+    def test_initialization_with_missing_required_fields(self):
+        """Test that service raises ValueError with incomplete config."""
+        incomplete_configs = [
+            {
+                # Missing smtp_host
+                'smtp_username': 'test@example.com',
+                'smtp_password': 'password',
+                'from_email': 'test@example.com',
+            },
+            {
+                # Missing smtp_username
+                'smtp_host': 'smtp.gmail.com',
+                'smtp_password': 'password',
+                'from_email': 'test@example.com',
+            },
+            {
+                # Missing smtp_password
+                'smtp_host': 'smtp.gmail.com',
+                'smtp_username': 'test@example.com',
+                'from_email': 'test@example.com',
+            },
+            {
+                # Missing from_email
+                'smtp_host': 'smtp.gmail.com',
+                'smtp_username': 'test@example.com',
+                'smtp_password': 'password',
+            },
+        ]
+
+        for config in incomplete_configs:
+            with pytest.raises(ValueError, match="SMTP configuration incomplete"):
+                SMTPEmailService(config)
+
+    def test_initialization_with_defaults(self):
+        """Test that service uses default values for optional fields."""
+        config = {
+            'smtp_host': 'smtp.gmail.com',
+            'smtp_username': 'test@example.com',
+            'smtp_password': 'password',
+            'from_email': 'test@example.com',
+            # No smtp_port, smtp_use_tls, from_name
+        }
+
+        service = SMTPEmailService(config)
+
+        assert service.port == 587  # Default port
+        assert service.use_tls is True  # Default TLS
+        assert service.from_name == 'Chronicle AI'  # Default name
+
+    @pytest.mark.asyncio
+    async def test_send_email_text_only(self):
+        """Test sending plain text email."""
+        config = {
+            'smtp_host': 'smtp.gmail.com',
+            'smtp_port': 587,
+            'smtp_username': 'test@example.com',
+            'smtp_password': 'password',
+            'smtp_use_tls': True,
+            'from_email': 'noreply@chronicle.ai',
+            'from_name': 'Chronicle AI',
+        }
+
+        service = SMTPEmailService(config)
+
+        # Mock the SMTP sending
+        with patch.object(service, '_send_smtp') as mock_send:
+            result = await service.send_email(
+                to_email='recipient@example.com',
+                subject='Test Subject',
+                body_text='This is a test email.',
+            )
+
+            assert result is True
+            assert mock_send.called
+            # Check that MIME message was created
+            msg = mock_send.call_args[0][0]
+            assert msg['Subject'] == 'Test Subject'
+            assert msg['To'] == 'recipient@example.com'
+            assert 'Chronicle AI' in msg['From']
+
+    @pytest.mark.asyncio
+    async def test_send_email_with_html(self):
+        """Test sending email with HTML and plain text versions."""
+        config = {
+            'smtp_host': 'smtp.gmail.com',
+            'smtp_port': 587,
+            'smtp_username': 'test@example.com',
+            'smtp_password': 'password',
+            'smtp_use_tls': True,
+            'from_email': 'noreply@chronicle.ai',
+            'from_name': 'Chronicle AI',
+        }
+
+        service = SMTPEmailService(config)
+
+        # Mock the SMTP sending
+        with patch.object(service, '_send_smtp') as mock_send:
+            result = await service.send_email(
+                to_email='recipient@example.com',
+                subject='Test Subject',
+                body_text='Plain text version',
+                body_html='<h1>HTML version</h1>',
+            )
+
+            assert result is True
+            assert mock_send.called
+
+            # Check that both plain text and HTML parts exist
+            msg = mock_send.call_args[0][0]
+            assert msg.is_multipart()
+
+    @pytest.mark.asyncio
+    async def test_send_email_failure_returns_false(self):
+        """Test that send_email returns False on failure."""
+        config = {
+            'smtp_host': 'smtp.gmail.com',
+            'smtp_port': 587,
+            'smtp_username': 'test@example.com',
+            'smtp_password': 'password',
+            'smtp_use_tls': True,
+            'from_email': 'noreply@chronicle.ai',
+            'from_name': 'Chronicle AI',
+        }
+
+        service = SMTPEmailService(config)
+
+        # Mock the SMTP sending to raise an exception
+        with patch.object(service, '_send_smtp', side_effect=Exception("SMTP error")):
+            result = await service.send_email(
+                to_email='recipient@example.com',
+                subject='Test Subject',
+                body_text='This should fail',
+            )
+
+            assert result is False
+
+    @pytest.mark.asyncio
+    async def test_connection_test_success(self):
+        """Test successful SMTP connection test."""
+        config = {
+            'smtp_host': 'smtp.gmail.com',
+            'smtp_port': 587,
+            'smtp_username': 'test@example.com',
+            'smtp_password': 'password',
+            'smtp_use_tls': True,
+            'from_email': 'noreply@chronicle.ai',
+            'from_name': 'Chronicle AI',
+        }
+
+        service = SMTPEmailService(config)
+
+        # Mock the connection test
+        with patch.object(service, '_test_smtp_connection'):
+            result = await service.test_connection()
+            assert result is True
+
+    @pytest.mark.asyncio
+    async def test_connection_test_failure(self):
+        """Test failed SMTP connection test."""
+        config = {
+            'smtp_host': 'smtp.gmail.com',
+            'smtp_port': 587,
+            'smtp_username': 'test@example.com',
+            'smtp_password': 'password',
+            'smtp_use_tls': True,
+            'from_email': 'noreply@chronicle.ai',
+            'from_name': 'Chronicle AI',
+        }
+
+        service = SMTPEmailService(config)
+
+        # Mock the connection test to fail
+        with patch.object(service, '_test_smtp_connection', side_effect=Exception("Connection failed")):
+            result = await service.test_connection()
+            assert result is False
diff --git a/backends/advanced/tests/test_email_summarizer_plugin.py b/backends/advanced/tests/test_email_summarizer_plugin.py
new file mode 100644
index 00000000..ed6529bd
--- /dev/null
+++ b/backends/advanced/tests/test_email_summarizer_plugin.py
@@ -0,0 +1,284 @@
+"""
+Unit tests for the Email Summarizer Plugin.
+
+Tests plugin initialization, configuration, and event handling.
+"""
+import pytest
+from unittest.mock import Mock, AsyncMock, patch
+from datetime import datetime
+
+from advanced_omi_backend.plugins.email_summarizer.plugin import EmailSummarizerPlugin
+from advanced_omi_backend.plugins.base import PluginContext, PluginResult
+
+
+class TestEmailSummarizerPlugin:
+    """Test Email Summarizer Plugin."""
+
+    def test_plugin_initialization(self):
+        """Test that plugin initializes with valid configuration."""
+        config = {
+            'enabled': True,
+            'events': ['conversation.complete'],
+            'condition': {'type': 'always'},
+            'smtp_host': 'smtp.gmail.com',
+            'smtp_port': 587,
+            'smtp_username': 'test@example.com',
+            'smtp_password': 'password',
+            'smtp_use_tls': True,
+            'from_email': 'noreply@chronicle.ai',
+            'from_name': 'Chronicle AI',
+            'subject_prefix': 'Conversation Summary',
+            'summary_max_sentences': 3,
+        }
+
+        plugin = EmailSummarizerPlugin(config)
+
+        assert plugin.enabled is True
+        assert plugin.subject_prefix == 'Conversation Summary'
+        assert plugin.summary_max_sentences == 3
+        assert plugin.include_conversation_id is True
+        assert plugin.include_duration is True
+
+    def test_plugin_uses_defaults_for_optional_fields(self):
+        """Test that plugin uses default values for optional configuration."""
+        config = {
+            'enabled': True,
+            'events': ['conversation.complete'],
+            'smtp_host': 'smtp.gmail.com',
+            'smtp_username': 'test@example.com',
+            'smtp_password': 'password',
+            'from_email': 'noreply@chronicle.ai',
+        }
+
+        plugin = EmailSummarizerPlugin(config)
+
+        assert plugin.subject_prefix == 'Conversation Summary'  # Default
+        assert plugin.summary_max_sentences == 3  # Default
+        assert plugin.include_conversation_id is True  # Default
+        assert plugin.include_duration is True  # Default
+
+    @pytest.mark.asyncio
+    async def test_plugin_skips_empty_transcript(self):
+        """Test that plugin skips conversations with empty transcripts."""
+        config = {
+            'enabled': True,
+            'events': ['conversation.complete'],
+            'smtp_host': 'smtp.gmail.com',
+            'smtp_username': 'test@example.com',
+            'smtp_password': 'password',
+            'from_email': 'noreply@chronicle.ai',
+        }
+
+        plugin = EmailSummarizerPlugin(config)
+
+        # Mock the email service (not initialized yet, but that's OK for this test)
+        plugin.email_service = Mock()
+
+        # Create context with empty transcript
+        context = PluginContext(
+            user_id='test-user',
+            event='conversation.complete',
+            data={
+                'conversation': {},
+                'transcript': '',  # Empty transcript
+                'duration': 0,
+                'conversation_id': 'test-conv',
+            }
+        )
+
+        result = await plugin.on_conversation_complete(context)
+
+        assert result.success is False
+        assert 'Empty transcript' in result.message
+
+    @pytest.mark.asyncio
+    async def test_plugin_handles_missing_user_email(self):
+        """Test that plugin handles missing user email gracefully."""
+        config = {
+            'enabled': True,
+            'events': ['conversation.complete'],
+            'smtp_host': 'smtp.gmail.com',
+            'smtp_username': 'test@example.com',
+            'smtp_password': 'password',
+            'from_email': 'noreply@chronicle.ai',
+        }
+
+        plugin = EmailSummarizerPlugin(config)
+
+        # Mock _get_user_email to return None
+        plugin._get_user_email = AsyncMock(return_value=None)
+
+        context = PluginContext(
+            user_id='test-user',
+            event='conversation.complete',
+            data={
+                'conversation': {},
+                'transcript': 'Test conversation',
+                'duration': 60,
+                'conversation_id': 'test-conv',
+            }
+        )
+
+        result = await plugin.on_conversation_complete(context)
+
+        assert result.success is False
+        assert 'No email' in result.message
+
+    @pytest.mark.asyncio
+    async def test_plugin_sends_email_on_successful_processing(self):
+        """Test that plugin sends email when everything is configured correctly."""
+        config = {
+            'enabled': True,
+            'events': ['conversation.complete'],
+            'smtp_host': 'smtp.gmail.com',
+            'smtp_username': 'test@example.com',
+            'smtp_password': 'password',
+            'from_email': 'noreply@chronicle.ai',
+        }
+
+        plugin = EmailSummarizerPlugin(config)
+
+        # Mock dependencies
+        plugin._get_user_email = AsyncMock(return_value='user@example.com')
+        plugin._generate_summary = AsyncMock(return_value='This is a test summary.')
+        plugin.email_service = AsyncMock()
+        plugin.email_service.send_email = AsyncMock(return_value=True)
+
+        context = PluginContext(
+            user_id='test-user',
+            event='conversation.complete',
+            data={
+                'conversation': {'created_at': datetime.now()},
+                'transcript': 'This is a test conversation with meaningful content.',
+                'duration': 120,
+                'conversation_id': 'test-conv-123',
+            }
+        )
+
+        result = await plugin.on_conversation_complete(context)
+
+        assert result.success is True
+        assert 'Email sent' in result.message
+        assert result.data['recipient'] == 'user@example.com'
+
+        # Verify email was sent
+        plugin.email_service.send_email.assert_called_once()
+        call_args = plugin.email_service.send_email.call_args[1]
+        assert call_args['to_email'] == 'user@example.com'
+        assert 'Conversation Summary' in call_args['subject']
+
+    @pytest.mark.asyncio
+    async def test_plugin_handles_llm_failure_gracefully(self):
+        """Test that plugin falls back to truncated transcript if LLM fails."""
+        config = {
+            'enabled': True,
+            'events': ['conversation.complete'],
+            'smtp_host': 'smtp.gmail.com',
+            'smtp_username': 'test@example.com',
+            'smtp_password': 'password',
+            'from_email': 'noreply@chronicle.ai',
+        }
+
+        plugin = EmailSummarizerPlugin(config)
+
+        # Mock dependencies
+        plugin._get_user_email = AsyncMock(return_value='user@example.com')
+        plugin.email_service = AsyncMock()
+        plugin.email_service.send_email = AsyncMock(return_value=True)
+
+        # Mock LLM to fail
+        with patch('advanced_omi_backend.plugins.email_summarizer.plugin.async_generate') as mock_llm:
+            mock_llm.side_effect = Exception("LLM service unavailable")
+
+            context = PluginContext(
+                user_id='test-user',
+                event='conversation.complete',
+                data={
+                    'conversation': {},
+                    'transcript': 'A' * 400,  # Long transcript
+                    'duration': 60,
+                    'conversation_id': 'test-conv',
+                }
+            )
+
+            result = await plugin.on_conversation_complete(context)
+
+            # Should still succeed (fallback to truncated transcript)
+            assert result.success is True
+
+            # Verify email was sent with truncated transcript
+            plugin.email_service.send_email.assert_called_once()
+
+    @pytest.mark.asyncio
+    async def test_plugin_handles_email_send_failure(self):
+        """Test that plugin reports failure when email sending fails."""
+        config = {
+            'enabled': True,
+            'events': ['conversation.complete'],
+            'smtp_host': 'smtp.gmail.com',
+            'smtp_username': 'test@example.com',
+            'smtp_password': 'password',
+            'from_email': 'noreply@chronicle.ai',
+        }
+
+        plugin = EmailSummarizerPlugin(config)
+
+        # Mock dependencies
+        plugin._get_user_email = AsyncMock(return_value='user@example.com')
+        plugin._generate_summary = AsyncMock(return_value='Test summary')
+        plugin.email_service = AsyncMock()
+        plugin.email_service.send_email = AsyncMock(return_value=False)  # Email fails
+
+        context = PluginContext(
+            user_id='test-user',
+            event='conversation.complete',
+            data={
+                'conversation': {},
+                'transcript': 'Test conversation',
+                'duration': 60,
+                'conversation_id': 'test-conv',
+            }
+        )
+
+        result = await plugin.on_conversation_complete(context)
+
+        assert result.success is False
+        assert 'Failed to send email' in result.message
+
+    def test_format_subject_with_timestamp(self):
+        """Test email subject formatting with timestamp."""
+        config = {
+            'enabled': True,
+            'events': ['conversation.complete'],
+            'smtp_host': 'smtp.gmail.com',
+            'smtp_username': 'test@example.com',
+            'smtp_password': 'password',
+            'from_email': 'noreply@chronicle.ai',
+            'subject_prefix': 'Test Summary',
+        }
+
+        plugin = EmailSummarizerPlugin(config)
+
+        created_at = datetime(2025, 1, 15, 14, 30, 0)
+        subject = plugin._format_subject(created_at)
+
+        assert 'Test Summary' in subject
+        assert 'Jan 15, 2025' in subject
+
+    def test_format_subject_without_timestamp(self):
+        """Test email subject formatting without timestamp."""
+        config = {
+            'enabled': True,
+            'events': ['conversation.complete'],
+            'smtp_host': 'smtp.gmail.com',
+            'smtp_username': 'test@example.com',
+            'smtp_password': 'password',
+            'from_email': 'noreply@chronicle.ai',
+            'subject_prefix': 'Conversation Summary',
+        }
+
+        plugin = EmailSummarizerPlugin(config)
+
+        subject = plugin._format_subject(None)
+
+        assert subject == 'Conversation Summary'
diff --git a/backends/advanced/tests/test_memory_entry.py b/backends/advanced/tests/test_memory_entry.py
new file mode 100644
index 00000000..fc8ae223
--- /dev/null
+++ b/backends/advanced/tests/test_memory_entry.py
@@ -0,0 +1,158 @@
+"""Unit tests for MemoryEntry dataclass.
+
+Tests timestamp initialization, auto-population, and serialization behavior.
+"""
+
+import time
+from advanced_omi_backend.services.memory.base import MemoryEntry
+
+
+class TestMemoryEntryTimestamps:
+    """Test MemoryEntry timestamp handling."""
+
+    def test_memory_entry_auto_initializes_timestamps(self):
+        """Test that MemoryEntry auto-initializes created_at and updated_at when not provided."""
+        before_creation = int(time.time())
+
+        entry = MemoryEntry(
+            id="test-123",
+            content="Test memory content"
+        )
+
+        after_creation = int(time.time())
+
+        # Both timestamps should be set
+        assert entry.created_at is not None, "created_at should be auto-initialized"
+        assert entry.updated_at is not None, "updated_at should be auto-initialized"
+
+        # Timestamps should be strings
+        assert isinstance(entry.created_at, str), "created_at should be a string"
+        assert isinstance(entry.updated_at, str), "updated_at should be a string"
+
+        # Timestamps should be numeric (Unix timestamps)
+        created_timestamp = int(entry.created_at)
+        updated_timestamp = int(entry.updated_at)
+
+        # Timestamps should be within reasonable range (during test execution)
+        assert before_creation <= created_timestamp <= after_creation, "created_at should be within test execution time"
+        assert before_creation <= updated_timestamp <= after_creation, "updated_at should be within test execution time"
+
+        # Both should be equal since they're created at the same time
+        assert entry.created_at == entry.updated_at, "created_at and updated_at should be equal for new entries"
+
+    def test_memory_entry_with_created_at_only(self):
+        """Test that updated_at defaults to created_at when only created_at is provided."""
+        custom_timestamp = "1234567890"
+
+        entry = MemoryEntry(
+            id="test-123",
+            content="Test memory content",
+            created_at=custom_timestamp
+        )
+
+        assert entry.created_at == custom_timestamp, "created_at should match provided value"
+        assert entry.updated_at == custom_timestamp, "updated_at should default to created_at"
+
+    def test_memory_entry_with_both_timestamps(self):
+        """Test that both timestamps are preserved when explicitly provided."""
+        created_timestamp = "1234567890"
+        updated_timestamp = "1234567900"
+
+        entry = MemoryEntry(
+            id="test-123",
+            content="Test memory content",
+            created_at=created_timestamp,
+            updated_at=updated_timestamp
+        )
+
+        assert entry.created_at == created_timestamp, "created_at should match provided value"
+        assert entry.updated_at == updated_timestamp, "updated_at should match provided value"
+        assert entry.created_at != entry.updated_at, "timestamps should be different when explicitly set"
+
+    def test_memory_entry_to_dict_includes_timestamps(self):
+        """Test that to_dict() serialization includes both timestamp fields."""
+        entry = MemoryEntry(
+            id="test-123",
+            content="Test memory content",
+            metadata={"user_id": "user-456"}
+        )
+
+        entry_dict = entry.to_dict()
+
+        # Verify all expected keys are present
+        assert "id" in entry_dict, "Dict should contain 'id'"
+        assert "memory" in entry_dict, "Dict should contain 'memory' (for frontend)"
+        assert "content" in entry_dict, "Dict should contain 'content'"
+        assert "created_at" in entry_dict, "Dict should contain 'created_at'"
+        assert "updated_at" in entry_dict, "Dict should contain 'updated_at'"
+        assert "metadata" in entry_dict, "Dict should contain 'metadata'"
+        assert "user_id" in entry_dict, "Dict should contain 'user_id' (extracted from metadata)"
+
+        # Verify timestamp values are present and correct
+        assert entry_dict["created_at"] == entry.created_at, "Serialized created_at should match entry"
+        assert entry_dict["updated_at"] == entry.updated_at, "Serialized updated_at should match entry"
+
+        # Verify frontend compatibility
+        assert entry_dict["memory"] == entry.content, "memory field should match content for frontend"
+        assert entry_dict["content"] == entry.content, "content field should match content"
+
+    def test_memory_entry_with_none_timestamps(self):
+        """Test that None timestamps are properly initialized."""
+        entry = MemoryEntry(
+            id="test-123",
+            content="Test memory content",
+            created_at=None,
+            updated_at=None
+        )
+
+        # Both should be auto-initialized even when explicitly set to None
+        assert entry.created_at is not None, "created_at should be auto-initialized from None"
+        assert entry.updated_at is not None, "updated_at should be auto-initialized from None"
+        assert entry.created_at == entry.updated_at, "Both timestamps should be equal when auto-initialized"
+
+    def test_memory_entry_with_all_fields(self):
+        """Test MemoryEntry with all fields populated."""
+        entry = MemoryEntry(
+            id="test-123",
+            content="Test memory content",
+            metadata={"user_id": "user-456", "source": "test"},
+            embedding=[0.1, 0.2, 0.3],
+            score=0.95,
+            created_at="1234567890",
+            updated_at="1234567900"
+        )
+
+        # Verify all fields are preserved
+        assert entry.id == "test-123"
+        assert entry.content == "Test memory content"
+        assert entry.metadata == {"user_id": "user-456", "source": "test"}
+        assert entry.embedding == [0.1, 0.2, 0.3]
+        assert entry.score == 0.95
+        assert entry.created_at == "1234567890"
+        assert entry.updated_at == "1234567900"
+
+        # Verify serialization
+        entry_dict = entry.to_dict()
+        assert entry_dict["score"] == 0.95
+        assert entry_dict["user_id"] == "user-456"
+
+    def test_memory_entry_timestamp_format(self):
+        """Test that timestamps are in the expected format (Unix timestamp strings)."""
+        entry = MemoryEntry(
+            id="test-123",
+            content="Test memory content"
+        )
+
+        # Timestamps should be strings representing Unix timestamps
+        assert entry.created_at.isdigit(), "created_at should be a numeric string"
+        assert entry.updated_at.isdigit(), "updated_at should be a numeric string"
+
+        # Should be parseable as integers
+        created_int = int(entry.created_at)
+        updated_int = int(entry.updated_at)
+
+        # Should be recent timestamps (after year 2020, before year 2100)
+        assert created_int > 1577836800, "Timestamp should be after 2020"
+        assert created_int < 4102444800, "Timestamp should be before 2100"
+        assert updated_int > 1577836800, "Timestamp should be after 2020"
+        assert updated_int < 4102444800, "Timestamp should be before 2100"
diff --git a/backends/advanced/tests/test_memory_providers.py b/backends/advanced/tests/test_memory_providers.py
new file mode 100644
index 00000000..af74bee7
--- /dev/null
+++ b/backends/advanced/tests/test_memory_providers.py
@@ -0,0 +1,239 @@
+"""Unit tests for memory provider timestamp handling.
+
+Tests that all providers properly handle created_at and updated_at fields
+when converting their native formats to MemoryEntry objects.
+"""
+
+import time
+from unittest.mock import Mock
+from advanced_omi_backend.services.memory.providers.mycelia import MyceliaMemoryService
+from advanced_omi_backend.services.memory.providers.openmemory_mcp import OpenMemoryMCPService
+from advanced_omi_backend.services.memory.base import MemoryEntry
+
+
+class TestMyceliaProviderTimestamps:
+    """Test Mycelia provider timestamp handling."""
+
+    def test_mycelia_object_to_memory_entry_with_both_timestamps(self):
+        """Test that Mycelia provider extracts both created_at and updated_at."""
+        # Create a Mycelia service instance
+        service = MyceliaMemoryService(Mock())
+
+        # Mock Mycelia API object response
+        mycelia_obj = {
+            "_id": {"$oid": "507f1f77bcf86cd799439011"},
+            "name": "Test Memory",
+            "details": "Test content",
+            "createdAt": {"$date": "2024-01-01T00:00:00.000Z"},
+            "updatedAt": {"$date": "2024-01-02T00:00:00.000Z"},
+            "isPerson": False,
+            "isEvent": False,
+        }
+
+        # Convert to MemoryEntry
+        entry = service._mycelia_object_to_memory_entry(mycelia_obj, user_id="user-123")
+
+        # Verify both timestamps are extracted
+        assert entry.created_at is not None, "created_at should be extracted"
+        assert entry.updated_at is not None, "updated_at should be extracted"
+
+        # Verify timestamps match the source
+        assert entry.created_at == "2024-01-01T00:00:00.000Z", "created_at should match Mycelia createdAt"
+        assert entry.updated_at == "2024-01-02T00:00:00.000Z", "updated_at should match Mycelia updatedAt"
+
+        # Verify timestamps are different (updated after created)
+        assert entry.created_at != entry.updated_at, "Timestamps should be different"
+
+    def test_mycelia_object_to_memory_entry_with_missing_updated_at(self):
+        """Test that Mycelia provider handles missing updatedAt gracefully."""
+        service = MyceliaMemoryService(Mock())
+
+        # Mock Mycelia object without updatedAt
+        mycelia_obj = {
+            "_id": {"$oid": "507f1f77bcf86cd799439011"},
+            "name": "Test Memory",
+            "details": "Test content",
+            "createdAt": {"$date": "2024-01-01T00:00:00.000Z"},
+            # updatedAt is missing
+            "isPerson": False,
+            "isEvent": False,
+        }
+
+        # Convert to MemoryEntry
+        entry = service._mycelia_object_to_memory_entry(mycelia_obj, user_id="user-123")
+
+        # created_at should be present
+        assert entry.created_at is not None, "created_at should be extracted"
+
+        # updated_at should default to created_at (via MemoryEntry __post_init__)
+        # The _extract_bson_date returns None for missing fields, then __post_init__ sets it to created_at
+        assert entry.updated_at is not None, "updated_at should be set by __post_init__"
+        assert entry.updated_at == entry.created_at, "updated_at should default to created_at when missing"
+
+    def test_mycelia_extract_bson_date(self):
+        """Test Mycelia BSON date extraction."""
+        service = MyceliaMemoryService(Mock())
+
+        # Test BSON date format
+        bson_date = {"$date": "2024-01-01T00:00:00.000Z"}
+        extracted = service._extract_bson_date(bson_date)
+        assert extracted == "2024-01-01T00:00:00.000Z", "Should extract date from BSON format"
+
+        # Test plain string date
+        plain_date = "2024-01-01T00:00:00.000Z"
+        extracted = service._extract_bson_date(plain_date)
+        assert extracted == "2024-01-01T00:00:00.000Z", "Should pass through plain date"
+
+        # Test None
+        extracted = service._extract_bson_date(None)
+        assert extracted is None, "Should return None for None input"
+
+
+class TestOpenMemoryMCPProviderTimestamps:
+    """Test OpenMemory MCP provider timestamp handling."""
+
+    def test_mcp_result_to_memory_entry_with_both_timestamps(self):
+        """Test that OpenMemory MCP provider extracts both timestamps."""
+        # Create OpenMemory MCP service instance
+        service = OpenMemoryMCPService()
+        service.client_name = "test-client"
+        service.server_url = "http://localhost:8765"
+
+        # Mock MCP API response
+        mcp_result = {
+            "id": "mem-123",
+            "content": "Test memory content",
+            "created_at": "1704067200",  # 2024-01-01 00:00:00 UTC
+            "updated_at": "1704153600",  # 2024-01-02 00:00:00 UTC
+            "metadata": {"source": "test"}
+        }
+
+        # Convert to MemoryEntry
+        entry = service._mcp_result_to_memory_entry(mcp_result, user_id="user-123")
+
+        # Verify both timestamps are extracted
+        assert entry is not None, "MemoryEntry should be created"
+        assert entry.created_at is not None, "created_at should be extracted"
+        assert entry.updated_at is not None, "updated_at should be extracted"
+
+        # Verify timestamps match the source
+        assert entry.created_at == "1704067200", "created_at should match MCP response"
+        assert entry.updated_at == "1704153600", "updated_at should match MCP response"
+
+        # Verify timestamps are different
+        assert entry.created_at != entry.updated_at, "Timestamps should be different"
+
+    def test_mcp_result_to_memory_entry_with_missing_updated_at(self):
+        """Test that OpenMemory MCP provider defaults updated_at to created_at when missing."""
+        service = OpenMemoryMCPService()
+        service.client_name = "test-client"
+        service.server_url = "http://localhost:8765"
+
+        # Mock MCP response without updated_at
+        mcp_result = {
+            "id": "mem-123",
+            "content": "Test memory content",
+            "created_at": "1704067200",
+            # updated_at is missing
+        }
+
+        # Convert to MemoryEntry
+        entry = service._mcp_result_to_memory_entry(mcp_result, user_id="user-123")
+
+        # Verify updated_at defaults to created_at
+        assert entry is not None, "MemoryEntry should be created"
+        assert entry.created_at is not None, "created_at should be present"
+        assert entry.updated_at is not None, "updated_at should default to created_at"
+        assert entry.created_at == entry.updated_at, "updated_at should equal created_at when missing"
+
+    def test_mcp_result_to_memory_entry_with_alternate_timestamp_fields(self):
+        """Test that OpenMemory MCP provider handles alternate timestamp field names."""
+        service = OpenMemoryMCPService()
+        service.client_name = "test-client"
+        service.server_url = "http://localhost:8765"
+
+        # Mock MCP response with alternate field names
+        mcp_result = {
+            "id": "mem-123",
+            "memory": "Test memory content",  # Alternate content field
+            "timestamp": "1704067200",  # Alternate created_at field
+            "modified_at": "1704153600",  # Alternate updated_at field
+        }
+
+        # Convert to MemoryEntry
+        entry = service._mcp_result_to_memory_entry(mcp_result, user_id="user-123")
+
+        # Verify conversion handles alternate field names
+        assert entry is not None, "MemoryEntry should be created"
+        assert entry.content == "Test memory content", "Should extract from 'memory' field"
+        assert entry.created_at == "1704067200", "Should extract from 'timestamp' field"
+        assert entry.updated_at == "1704153600", "Should extract from 'modified_at' field"
+
+    def test_mcp_result_with_no_timestamps(self):
+        """Test that OpenMemory MCP provider generates timestamps when none provided."""
+        service = OpenMemoryMCPService()
+        service.client_name = "test-client"
+        service.server_url = "http://localhost:8765"
+
+        before_conversion = int(time.time())
+
+        # Mock MCP response without any timestamp fields
+        mcp_result = {
+            "id": "mem-123",
+            "content": "Test memory content",
+        }
+
+        # Convert to MemoryEntry
+        entry = service._mcp_result_to_memory_entry(mcp_result, user_id="user-123")
+
+        after_conversion = int(time.time())
+
+        # Verify timestamps are auto-generated
+        assert entry is not None, "MemoryEntry should be created"
+        assert entry.created_at is not None, "created_at should be auto-generated"
+        assert entry.updated_at is not None, "updated_at should be auto-generated"
+
+        # Verify timestamps are current (within test execution window)
+        created_int = int(entry.created_at)
+        updated_int = int(entry.updated_at)
+        assert before_conversion <= created_int <= after_conversion, "Timestamp should be current"
+        assert before_conversion <= updated_int <= after_conversion, "Timestamp should be current"
+
+
+class TestProviderTimestampConsistency:
+    """Test that all providers handle timestamps consistently."""
+
+    def test_all_providers_return_memory_entry_with_timestamps(self):
+        """Test that all providers return MemoryEntry objects with both timestamp fields."""
+        # This is a meta-test to ensure all providers conform to the MemoryEntry interface
+
+        # Mycelia
+        mycelia_service = MyceliaMemoryService(Mock())
+        mycelia_obj = {
+            "_id": {"$oid": "507f1f77bcf86cd799439011"},
+            "name": "Test",
+            "details": "Content",
+            "createdAt": {"$date": "2024-01-01T00:00:00.000Z"},
+            "updatedAt": {"$date": "2024-01-02T00:00:00.000Z"},
+        }
+        mycelia_entry = mycelia_service._mycelia_object_to_memory_entry(mycelia_obj, "user-123")
+
+        # OpenMemory MCP
+        mcp_service = OpenMemoryMCPService()
+        mcp_service.client_name = "test"
+        mcp_service.server_url = "http://localhost:8765"
+        mcp_result = {
+            "id": "mem-123",
+            "content": "Content",
+            "created_at": "1704067200",
+            "updated_at": "1704153600",
+        }
+        mcp_entry = mcp_service._mcp_result_to_memory_entry(mcp_result, "user-123")
+
+        # Verify all return MemoryEntry instances with both timestamp fields
+        for entry, provider_name in [(mycelia_entry, "Mycelia"), (mcp_entry, "OpenMemory MCP")]:
+            assert isinstance(entry, MemoryEntry), f"{provider_name} should return MemoryEntry"
+            assert hasattr(entry, "created_at"), f"{provider_name} entry should have created_at"
+            assert hasattr(entry, "updated_at"), f"{provider_name} entry should have updated_at"
+            assert entry.created_at is not None, f"{provider_name} created_at should not be None"
+            assert entry.updated_at is not None, f"{provider_name} updated_at should not be None"
diff --git a/backends/advanced/upload_files.py b/backends/advanced/upload_files.py
index ead58e74..77a001f3 100755
--- a/backends/advanced/upload_files.py
+++ b/backends/advanced/upload_files.py
@@ -321,14 +321,14 @@ def poll_job_status(job_id: str, token: str, base_url: str, total_files: int) ->
                 last_progress = progress
                 last_current_file = current_file
             
-            # Check completion status
-            if status == "completed":
+            # Check completion status (RQ standard: "finished")
+            if status == "finished":
                 elapsed = time.time() - start_time
                 logger.info(f"🎉 Job completed successfully in {elapsed:.0f}s!")
-                
+
                 # Show final file status summary
                 files = job_status.get("files", [])
-                completed = len([f for f in files if f.get("status") == "completed"])
+                completed = len([f for f in files if f.get("status") == "finished"])
                 failed = len([f for f in files if f.get("status") == "failed"])
                 skipped = len([f for f in files if f.get("status") == "skipped"])
                 
diff --git a/backends/advanced/uv.lock b/backends/advanced/uv.lock
index c73386c8..8b3e59c2 100644
--- a/backends/advanced/uv.lock
+++ b/backends/advanced/uv.lock
@@ -29,6 +29,7 @@ dependencies = [
     { name = "motor" },
     { name = "neo4j" },
     { name = "ollama" },
+    { name = "omegaconf" },
     { name = "python-dotenv" },
     { name = "pyyaml" },
     { name = "redis" },
@@ -56,6 +57,7 @@ dev = [
     { name = "pre-commit-uv" },
 ]
 test = [
+    { name = "aiosqlite" },
     { name = "pytest" },
     { name = "pytest-asyncio" },
     { name = "pytest-cov" },
@@ -88,6 +90,7 @@ requires-dist = [
     { name = "motor", specifier = ">=3.7.1" },
     { name = "neo4j", specifier = ">=5.0.0,<6.0.0" },
     { name = "ollama", specifier = ">=0.4.8" },
+    { name = "omegaconf", specifier = ">=2.3.0" },
     { name = "python-dotenv", specifier = ">=1.1.0" },
     { name = "pyyaml", specifier = ">=6.0.1" },
     { name = "redis", specifier = ">=5.0.0" },
@@ -108,6 +111,7 @@ dev = [
     { name = "pre-commit-uv", specifier = ">=4.1.4" },
 ]
 test = [
+    { name = "aiosqlite", specifier = ">=0.20.0" },
     { name = "pytest", specifier = ">=8.4.1" },
     { name = "pytest-asyncio", specifier = ">=1.0.0" },
     { name = "pytest-cov", specifier = ">=6.0.0" },
@@ -226,6 +230,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/fb/76/641ae371508676492379f16e2fa48f4e2c11741bd63c48be4b12a6b09cba/aiosignal-1.4.0-py3-none-any.whl", hash = "sha256:053243f8b92b990551949e63930a839ff0cf0b0ebbe0597b0f3fb19e1a0fe82e", size = 7490, upload-time = "2025-07-03T22:54:42.156Z" },
 ]
 
+[[package]]
+name = "aiosqlite"
+version = "0.22.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/4e/8a/64761f4005f17809769d23e518d915db74e6310474e733e3593cfc854ef1/aiosqlite-0.22.1.tar.gz", hash = "sha256:043e0bd78d32888c0a9ca90fc788b38796843360c855a7262a532813133a0650", size = 14821, upload-time = "2025-12-23T19:25:43.997Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/00/b7/e3bf5133d697a08128598c8d0abc5e16377b51465a33756de24fa7dee953/aiosqlite-0.22.1-py3-none-any.whl", hash = "sha256:21c002eb13823fad740196c5a2e9d8e62f6243bd9e7e4a1f87fb5e44ecb4fceb", size = 17405, upload-time = "2025-12-23T19:25:42.139Z" },
+]
+
 [[package]]
 name = "annotated-doc"
 version = "0.0.4"
@@ -244,6 +257,12 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/78/b6/6307fbef88d9b5ee7421e68d78a9f162e0da4900bc5f5793f6d3d0e34fb8/annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53", size = 13643, upload-time = "2024-05-20T21:33:24.1Z" },
 ]
 
+[[package]]
+name = "antlr4-python3-runtime"
+version = "4.9.3"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/3e/38/7859ff46355f76f8d19459005ca000b6e7012f2f1ca597746cbcd1fbfe5e/antlr4-python3-runtime-4.9.3.tar.gz", hash = "sha256:f224469b4168294902bb1efa80a8bf7855f24c99aef99cbefc1bcd3cce77881b", size = 117034, upload-time = "2021-11-06T17:52:23.524Z" }
+
 [[package]]
 name = "anyio"
 version = "4.12.0"
@@ -2425,6 +2444,19 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/47/4f/4a617ee93d8208d2bcf26b2d8b9402ceaed03e3853c754940e2290fed063/ollama-0.6.1-py3-none-any.whl", hash = "sha256:fc4c984b345735c5486faeee67d8a265214a31cbb828167782dc642ce0a2bf8c", size = 14354, upload-time = "2025-11-13T23:02:16.292Z" },
 ]
 
+[[package]]
+name = "omegaconf"
+version = "2.3.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "antlr4-python3-runtime" },
+    { name = "pyyaml" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/09/48/6388f1bb9da707110532cb70ec4d2822858ddfb44f1cdf1233c20a80ea4b/omegaconf-2.3.0.tar.gz", hash = "sha256:d5d4b6d29955cc50ad50c46dc269bcd92c6e00f5f90d23ab5fee7bfca4ba4cc7", size = 3298120, upload-time = "2022-12-08T20:59:22.753Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/e3/94/1843518e420fa3ed6919835845df698c7e27e183cb997394e4a670973a65/omegaconf-2.3.0-py3-none-any.whl", hash = "sha256:7b4df175cdb08ba400f45cae3bdcae7ba8365db4d165fc65fd04b050ab63b46b", size = 79500, upload-time = "2022-12-08T20:59:19.686Z" },
+]
+
 [[package]]
 name = "openai"
 version = "2.14.0"
diff --git a/backends/advanced/webui/src/App.tsx b/backends/advanced/webui/src/App.tsx
index fca59623..d2c31a05 100644
--- a/backends/advanced/webui/src/App.tsx
+++ b/backends/advanced/webui/src/App.tsx
@@ -13,6 +13,8 @@ import System from './pages/System'
 import Upload from './pages/Upload'
 import Queue from './pages/Queue'
 import LiveRecord from './pages/LiveRecord'
+import Plugins from './pages/Plugins'
+import Finetuning from './pages/Finetuning'
 import ProtectedRoute from './components/auth/ProtectedRoute'
 import { ErrorBoundary, PageErrorBoundary } from './components/ErrorBoundary'
 
@@ -89,6 +91,16 @@ function App() {
                     <Queue />
                   </PageErrorBoundary>
                 } />
+                <Route path="plugins" element={
+                  <PageErrorBoundary>
+                    <Plugins />
+                  </PageErrorBoundary>
+                } />
+                <Route path="finetuning" element={
+                  <PageErrorBoundary>
+                    <Finetuning />
+                  </PageErrorBoundary>
+                } />
               </Route>
             </Routes>
           </Router>
diff --git a/backends/advanced/webui/src/components/ConversationVersionDropdown.tsx b/backends/advanced/webui/src/components/ConversationVersionDropdown.tsx
index 30ea4f1f..ed21f69c 100644
--- a/backends/advanced/webui/src/components/ConversationVersionDropdown.tsx
+++ b/backends/advanced/webui/src/components/ConversationVersionDropdown.tsx
@@ -38,6 +38,8 @@ interface ConversationVersionDropdownProps {
     memory_count: number
     active_transcript_version?: string
     active_memory_version?: string
+    active_transcript_version_number?: number
+    active_memory_version_number?: number
   }
   onVersionChange: () => void
 }
@@ -135,9 +137,9 @@ export default function ConversationVersionDropdown({
             className="flex items-center space-x-1 px-3 py-1 bg-blue-50 dark:bg-blue-900/20 border border-blue-200 dark:border-blue-600 rounded text-blue-700 dark:text-blue-300 hover:bg-blue-100 dark:hover:bg-blue-900/30"
           >
             <span>
-              Transcript: v{versionHistory ?
-                versionHistory.transcript_versions.findIndex(v => v.version_id === versionHistory.active_transcript_version) + 1 :
-                1
+              Transcript: {versionHistory ?
+                `v${versionHistory.transcript_versions.findIndex(v => v.version_id === versionHistory.active_transcript_version) + 1}` :
+                (versionInfo?.active_transcript_version_number ? `v${versionInfo.active_transcript_version_number}` : '-')
               }
             </span>
             <ChevronDown className="h-3 w-3" />
@@ -197,9 +199,9 @@ export default function ConversationVersionDropdown({
             className="flex items-center space-x-1 px-3 py-1 bg-green-50 dark:bg-green-900/20 border border-green-200 dark:border-green-600 rounded text-green-700 dark:text-green-300 hover:bg-green-100 dark:hover:bg-green-900/30"
           >
             <span>
-              Memory: v{versionHistory ?
-                versionHistory.memory_versions.findIndex(v => v.version_id === versionHistory.active_memory_version) + 1 :
-                1
+              Memory: {versionHistory ?
+                `v${versionHistory.memory_versions.findIndex(v => v.version_id === versionHistory.active_memory_version) + 1}` :
+                (versionInfo?.active_memory_version_number ? `v${versionInfo.active_memory_version_number}` : '-')
               }
             </span>
             <ChevronDown className="h-3 w-3" />
diff --git a/backends/advanced/webui/src/components/ConversationVersionHeader.tsx b/backends/advanced/webui/src/components/ConversationVersionHeader.tsx
index 9e7c5e09..55627c4f 100644
--- a/backends/advanced/webui/src/components/ConversationVersionHeader.tsx
+++ b/backends/advanced/webui/src/components/ConversationVersionHeader.tsx
@@ -10,6 +10,8 @@ interface ConversationVersionHeaderProps {
     memory_count: number;
     active_transcript_version?: string;
     active_memory_version?: string;
+    active_transcript_version_number?: number;
+    active_memory_version_number?: number;
   };
   onVersionChange?: () => void;
 }
diff --git a/backends/advanced/webui/src/components/PluginSettings.tsx b/backends/advanced/webui/src/components/PluginSettings.tsx
new file mode 100644
index 00000000..05576120
--- /dev/null
+++ b/backends/advanced/webui/src/components/PluginSettings.tsx
@@ -0,0 +1,195 @@
+import { useState, useEffect } from 'react'
+import { Puzzle, RefreshCw, CheckCircle, Save, RotateCcw, AlertCircle } from 'lucide-react'
+import { systemApi } from '../services/api'
+import { useAuth } from '../contexts/AuthContext'
+
+interface PluginSettingsProps {
+  className?: string
+}
+
+export default function PluginSettings({ className }: PluginSettingsProps) {
+  const [configYaml, setConfigYaml] = useState('')
+  const [loading, setLoading] = useState(false)
+  const [validating, setValidating] = useState(false)
+  const [saving, setSaving] = useState(false)
+  const [message, setMessage] = useState('')
+  const [error, setError] = useState('')
+  const { isAdmin } = useAuth()
+
+  useEffect(() => {
+    loadPluginsConfig()
+  }, [])
+
+  const loadPluginsConfig = async () => {
+    setLoading(true)
+    setError('')
+    setMessage('')
+
+    try {
+      const response = await systemApi.getPluginsConfigRaw()
+      setConfigYaml(response.data.config_yaml || response.data)
+      setMessage('Configuration loaded successfully')
+      setTimeout(() => setMessage(''), 3000)
+    } catch (err: any) {
+      const status = err.response?.status
+      if (status === 401) {
+        setError('Unauthorized: admin privileges required')
+      } else {
+        setError(err.response?.data?.error || 'Failed to load configuration')
+      }
+    } finally {
+      setLoading(false)
+    }
+  }
+
+  const validateConfig = async () => {
+    if (!configYaml.trim()) {
+      setError('Configuration cannot be empty')
+      return
+    }
+
+    setValidating(true)
+    setError('')
+    setMessage('')
+
+    try {
+      const response = await systemApi.validatePluginsConfig(configYaml)
+      if (response.data.valid) {
+        setMessage('✅ Configuration is valid')
+      } else {
+        setError(response.data.error || 'Validation failed')
+      }
+      setTimeout(() => setMessage(''), 3000)
+    } catch (err: any) {
+      setError(err.response?.data?.error || 'Validation failed')
+    } finally {
+      setValidating(false)
+    }
+  }
+
+  const saveConfig = async () => {
+    if (!configYaml.trim()) {
+      setError('Configuration cannot be empty')
+      return
+    }
+
+    setSaving(true)
+    setError('')
+    setMessage('')
+
+    try {
+      await systemApi.updatePluginsConfigRaw(configYaml)
+      setMessage('✅ Configuration saved successfully. Restart backend for changes to take effect.')
+      setTimeout(() => setMessage(''), 5000)
+    } catch (err: any) {
+      setError(err.response?.data?.error || 'Failed to save configuration')
+    } finally {
+      setSaving(false)
+    }
+  }
+
+  const resetConfig = () => {
+    loadPluginsConfig()
+    setMessage('Configuration reset to file version')
+    setTimeout(() => setMessage(''), 3000)
+  }
+
+  if (!isAdmin) {
+    return null
+  }
+
+  return (
+    <div className={className}>
+      <div className="bg-white dark:bg-gray-800 rounded-lg border border-gray-200 dark:border-gray-700 p-6">
+        {/* Header */}
+        <div className="flex items-center justify-between mb-4">
+          <div className="flex items-center space-x-2">
+            <Puzzle className="h-5 w-5 text-blue-600" />
+            <h3 className="text-lg font-semibold text-gray-900 dark:text-gray-100">
+              Plugin Configuration
+            </h3>
+          </div>
+          <div className="flex items-center space-x-2">
+            <button
+              onClick={resetConfig}
+              disabled={loading || saving}
+              className="flex items-center space-x-1 px-3 py-1.5 text-sm text-gray-600 dark:text-gray-400 hover:text-gray-900 dark:hover:text-gray-200 disabled:opacity-50"
+            >
+              <RotateCcw className="h-4 w-4" />
+              <span>Reset</span>
+            </button>
+            <button
+              onClick={loadPluginsConfig}
+              disabled={loading || saving}
+              className="flex items-center space-x-1 px-3 py-1.5 text-sm text-gray-600 dark:text-gray-400 hover:text-gray-900 dark:hover:text-gray-200 disabled:opacity-50"
+            >
+              <RefreshCw className={`h-4 w-4 ${loading ? 'animate-spin' : ''}`} />
+              <span>Reload</span>
+            </button>
+          </div>
+        </div>
+
+        {/* Messages */}
+        {message && (
+          <div className="mb-4 p-3 bg-green-50 dark:bg-green-900/20 border border-green-200 dark:border-green-800 rounded-md flex items-start space-x-2">
+            <CheckCircle className="h-5 w-5 text-green-600 dark:text-green-400 mt-0.5" />
+            <p className="text-sm text-green-700 dark:text-green-300">{message}</p>
+          </div>
+        )}
+
+        {error && (
+          <div className="mb-4 p-3 bg-red-50 dark:bg-red-900/20 border border-red-200 dark:border-red-800 rounded-md flex items-start space-x-2">
+            <AlertCircle className="h-5 w-5 text-red-600 dark:text-red-400 mt-0.5" />
+            <p className="text-sm text-red-700 dark:text-red-300">{error}</p>
+          </div>
+        )}
+
+        {/* Editor */}
+        <div className="mb-4">
+          <textarea
+            value={configYaml}
+            onChange={(e) => setConfigYaml(e.target.value)}
+            disabled={loading || saving}
+            className="w-full h-96 p-4 font-mono text-sm bg-gray-50 dark:bg-gray-900 border border-gray-300 dark:border-gray-600 rounded-md focus:ring-2 focus:ring-blue-500 focus:border-transparent resize-y"
+            placeholder="Loading configuration..."
+            spellCheck={false}
+          />
+        </div>
+
+        {/* Actions */}
+        <div className="flex space-x-3">
+          <button
+            onClick={validateConfig}
+            disabled={loading || validating || saving}
+            className="flex items-center space-x-2 px-4 py-2 text-sm font-medium text-gray-700 dark:text-gray-200 bg-white dark:bg-gray-700 border border-gray-300 dark:border-gray-600 rounded-md hover:bg-gray-50 dark:hover:bg-gray-600 disabled:opacity-50"
+          >
+            <CheckCircle className="h-4 w-4" />
+            <span>{validating ? 'Validating...' : 'Validate'}</span>
+          </button>
+
+          <button
+            onClick={saveConfig}
+            disabled={loading || saving || validating}
+            className="flex items-center space-x-2 px-4 py-2 text-sm font-medium text-white bg-blue-600 rounded-md hover:bg-blue-700 disabled:opacity-50"
+          >
+            <Save className="h-4 w-4" />
+            <span>{saving ? 'Saving...' : 'Save Changes'}</span>
+          </button>
+        </div>
+
+        {/* Help text */}
+        <div className="mt-6 p-4 bg-blue-50 dark:bg-blue-900/20 border border-blue-200 dark:border-blue-800 rounded-md">
+          <h4 className="text-sm font-medium text-blue-900 dark:text-blue-100 mb-2">
+            Configuration Help
+          </h4>
+          <ul className="text-sm text-blue-700 dark:text-blue-300 space-y-1 list-disc list-inside">
+            <li>Define enabled plugins and their trigger types</li>
+            <li>Configure wake words for command-based plugins</li>
+            <li>Set plugin URLs and authentication tokens</li>
+            <li>Changes require backend restart to take effect</li>
+          </ul>
+        </div>
+      </div>
+    </div>
+  )
+}
diff --git a/backends/advanced/webui/src/components/PluginSettingsForm.tsx b/backends/advanced/webui/src/components/PluginSettingsForm.tsx
new file mode 100644
index 00000000..1611a025
--- /dev/null
+++ b/backends/advanced/webui/src/components/PluginSettingsForm.tsx
@@ -0,0 +1,320 @@
+import { useState, useEffect } from 'react'
+import { RefreshCw, AlertCircle } from 'lucide-react'
+import { systemApi } from '../services/api'
+import PluginListSidebar from './plugins/PluginListSidebar'
+import PluginConfigPanel from './plugins/PluginConfigPanel'
+
+interface PluginMetadata {
+  plugin_id: string
+  name: string
+  description: string
+  enabled: boolean
+  status: 'active' | 'disabled' | 'error'
+  supports_testing: boolean
+  config_schema: {
+    orchestration: any
+    settings: Record<string, any>
+    env_vars: Record<string, any>
+  }
+}
+
+interface PluginConfig {
+  orchestration: {
+    enabled: boolean
+    events: string[]
+    condition: {
+      type: 'always' | 'wake_word'
+      wake_words?: string[]
+    }
+  }
+  settings: Record<string, any>
+  env_vars: Record<string, any>
+}
+
+interface PluginSettingsFormProps {
+  className?: string
+}
+
+export default function PluginSettingsForm({ className }: PluginSettingsFormProps) {
+  const [plugins, setPlugins] = useState<PluginMetadata[]>([])
+  const [selectedPluginId, setSelectedPluginId] = useState<string | null>(null)
+  const [currentConfig, setCurrentConfig] = useState<PluginConfig | null>(null)
+  const [originalConfig, setOriginalConfig] = useState<PluginConfig | null>(null)
+  const [loading, setLoading] = useState(false)
+  const [testing, setTesting] = useState(false)
+  const [saving, setSaving] = useState(false)
+  const [errors, setErrors] = useState<Record<string, string>>({})
+  const [message, setMessage] = useState('')
+  const [error, setError] = useState('')
+  const [testResult, setTestResult] = useState<any>(null)
+
+  const selectedPlugin = plugins.find((p) => p.plugin_id === selectedPluginId)
+
+  useEffect(() => {
+    loadPlugins()
+  }, [])
+
+  useEffect(() => {
+    if (selectedPluginId) {
+      loadPluginConfig(selectedPluginId)
+    }
+  }, [selectedPluginId])
+
+  const loadPlugins = async () => {
+    setLoading(true)
+    setError('')
+    setMessage('')
+
+    try {
+      const response = await systemApi.getPluginsMetadata()
+      const pluginsData = response.data.plugins || []
+      setPlugins(pluginsData)
+
+      // Auto-select first plugin if none selected
+      if (!selectedPluginId && pluginsData.length > 0) {
+        setSelectedPluginId(pluginsData[0].plugin_id)
+      }
+
+      setMessage('Plugins loaded successfully')
+      setTimeout(() => setMessage(''), 3000)
+    } catch (err: any) {
+      const status = err.response?.status
+      if (status === 401) {
+        setError('Unauthorized: admin privileges required')
+      } else if (status === 404 || status === 405) {
+        setError('Backend does not expose plugin configuration endpoints')
+      } else {
+        setError(err.response?.data?.detail || 'Failed to load plugins')
+      }
+    } finally {
+      setLoading(false)
+    }
+  }
+
+  const loadPluginConfig = (pluginId: string) => {
+    const plugin = plugins.find((p) => p.plugin_id === pluginId)
+    if (!plugin) return
+
+    // Extract current configuration from plugin metadata
+    const config: PluginConfig = {
+      orchestration: {
+        enabled: plugin.enabled || false,
+        events: [],
+        condition: { type: 'always' }
+      },
+      settings: {},
+      env_vars: {}
+    }
+
+    // Load settings with defaults
+    Object.keys(plugin.config_schema.settings || {}).forEach((key) => {
+      const schema = plugin.config_schema.settings[key]
+      config.settings[key] = schema.default ?? ''
+    })
+
+    // Load env vars (will be masked values from backend)
+    Object.keys(plugin.config_schema.env_vars || {}).forEach((key) => {
+      const schema = plugin.config_schema.env_vars[key]
+      config.env_vars[key] = schema.value ?? ''
+    })
+
+    setCurrentConfig(config)
+    setOriginalConfig(JSON.parse(JSON.stringify(config)))
+    setErrors({})
+    setTestResult(null)
+  }
+
+  const handlePluginSelect = (pluginId: string) => {
+    setSelectedPluginId(pluginId)
+  }
+
+  const handleToggleEnabled = async (pluginId: string, enabled: boolean) => {
+    try {
+      // Update the plugin's enabled state
+      const plugin = plugins.find((p) => p.plugin_id === pluginId)
+      if (!plugin) return
+
+      await systemApi.updatePluginConfigStructured(pluginId, {
+        orchestration: {
+          enabled,
+          events: plugin.config_schema.orchestration?.events || [],
+          condition: plugin.config_schema.orchestration?.condition || { type: 'always' }
+        }
+      })
+
+      // Reload plugins to reflect changes
+      await loadPlugins()
+      setMessage(`Plugin ${enabled ? 'enabled' : 'disabled'} successfully`)
+      setTimeout(() => setMessage(''), 3000)
+    } catch (err: any) {
+      setError(err.response?.data?.detail || `Failed to ${enabled ? 'enable' : 'disable'} plugin`)
+    }
+  }
+
+  const handleConfigChange = (config: PluginConfig) => {
+    setCurrentConfig(config)
+    setErrors({})
+  }
+
+  const handleTestConnection = async () => {
+    if (!selectedPluginId || !currentConfig) return
+
+    setTesting(true)
+    setTestResult(null)
+    setError('')
+
+    try {
+      const response = await systemApi.testPluginConnection(selectedPluginId, {
+        orchestration: currentConfig.orchestration,
+        settings: currentConfig.settings,
+        env_vars: currentConfig.env_vars
+      })
+
+      setTestResult(response.data)
+
+      if (response.data.success) {
+        setMessage('Connection test successful')
+        setTimeout(() => setMessage(''), 3000)
+      }
+    } catch (err: any) {
+      const errorMessage = err.response?.data?.detail || 'Connection test failed'
+      setTestResult({
+        success: false,
+        message: errorMessage
+      })
+      setError(errorMessage)
+    } finally {
+      setTesting(false)
+    }
+  }
+
+  const handleSave = async () => {
+    if (!selectedPluginId || !currentConfig) return
+
+    setSaving(true)
+    setError('')
+    setMessage('')
+    setErrors({})
+
+    try {
+      // Filter out masked env vars (don't send unchanged secrets)
+      const envVarsToSend: Record<string, any> = {}
+      Object.keys(currentConfig.env_vars).forEach((key) => {
+        const value = currentConfig.env_vars[key]
+        // Only send if value is not masked
+        if (typeof value !== 'string' || !value.includes('••••')) {
+          envVarsToSend[key] = value
+        }
+      })
+
+      await systemApi.updatePluginConfigStructured(selectedPluginId, {
+        orchestration: currentConfig.orchestration,
+        settings: currentConfig.settings,
+        env_vars: Object.keys(envVarsToSend).length > 0 ? envVarsToSend : undefined
+      })
+
+      setMessage('Configuration saved successfully. Restart backend to apply changes.')
+      setTimeout(() => setMessage(''), 5000)
+
+      // Reload plugins to reflect changes
+      await loadPlugins()
+    } catch (err: any) {
+      setError(err.response?.data?.detail || 'Failed to save configuration')
+    } finally {
+      setSaving(false)
+    }
+  }
+
+  const handleReset = () => {
+    if (originalConfig) {
+      setCurrentConfig(JSON.parse(JSON.stringify(originalConfig)))
+      setErrors({})
+      setTestResult(null)
+      setMessage('Configuration reset to original values')
+      setTimeout(() => setMessage(''), 3000)
+    }
+  }
+
+  const handleRefresh = async () => {
+    await loadPlugins()
+  }
+
+  return (
+    <div className={className}>
+      <div className="bg-white dark:bg-gray-800 rounded-lg border border-gray-200 dark:border-gray-700 overflow-hidden">
+        {/* Header */}
+        <div className="flex items-center justify-between p-6 border-b border-gray-200 dark:border-gray-700">
+          <div>
+            <h2 className="text-2xl font-bold text-gray-900 dark:text-gray-100">
+              Plugin Configuration
+            </h2>
+            <p className="text-sm text-gray-600 dark:text-gray-400 mt-1">
+              Configure plugins, manage orchestration, and test connections
+            </p>
+          </div>
+          <button
+            onClick={handleRefresh}
+            disabled={loading}
+            className="flex items-center space-x-2 px-4 py-2 bg-gray-600 text-white rounded-md hover:bg-gray-700 transition-colors disabled:opacity-50"
+          >
+            <RefreshCw className={`h-4 w-4 ${loading ? 'animate-spin' : ''}`} />
+            <span>Refresh</span>
+          </button>
+        </div>
+
+        {/* Status Messages */}
+        {message && (
+          <div className="mx-6 mt-4 p-3 bg-green-50 dark:bg-green-900/20 border border-green-200 dark:border-green-800 rounded-md">
+            <p className="text-sm text-green-700 dark:text-green-300">{message}</p>
+          </div>
+        )}
+
+        {error && (
+          <div className="mx-6 mt-4 p-3 bg-red-50 dark:bg-red-900/20 border border-red-200 dark:border-red-800 rounded-md">
+            <div className="flex">
+              <AlertCircle className="h-5 w-5 text-red-400 mr-2 flex-shrink-0" />
+              <p className="text-sm text-red-700 dark:text-red-300">{error}</p>
+            </div>
+          </div>
+        )}
+
+        {/* Main Content */}
+        <div className="flex h-[600px]">
+          {/* Sidebar */}
+          <div className="w-1/3 border-r border-gray-200 dark:border-gray-700 overflow-y-auto">
+            <PluginListSidebar
+              plugins={plugins}
+              selectedPluginId={selectedPluginId}
+              onSelectPlugin={handlePluginSelect}
+              onToggleEnabled={handleToggleEnabled}
+              loading={loading}
+            />
+          </div>
+
+          {/* Config Panel */}
+          <div className="flex-1 overflow-y-auto">
+            {selectedPlugin && currentConfig ? (
+              <PluginConfigPanel
+                plugin={selectedPlugin}
+                config={currentConfig}
+                onChange={handleConfigChange}
+                onTestConnection={selectedPlugin.supports_testing ? handleTestConnection : undefined}
+                onSave={handleSave}
+                onReset={handleReset}
+                errors={errors}
+                testResult={testResult}
+                testing={testing}
+                saving={saving}
+                disabled={loading}
+              />
+            ) : (
+              <div className="flex items-center justify-center h-full text-gray-500 dark:text-gray-400">
+                <p>Select a plugin to configure</p>
+              </div>
+            )}
+          </div>
+        </div>
+      </div>
+    </div>
+  )
+}
diff --git a/backends/advanced/webui/src/components/SpeakerNameDropdown.tsx b/backends/advanced/webui/src/components/SpeakerNameDropdown.tsx
new file mode 100644
index 00000000..00fe3961
--- /dev/null
+++ b/backends/advanced/webui/src/components/SpeakerNameDropdown.tsx
@@ -0,0 +1,141 @@
+import { useState, useRef, useEffect } from 'react'
+import { Check, Plus } from 'lucide-react'
+
+interface SpeakerNameDropdownProps {
+  currentSpeaker: string
+  enrolledSpeakers: Array<{ speaker_id: string; name: string }>
+  onSpeakerChange: (newSpeaker: string) => void
+  segmentIndex: number
+  conversationId: string
+  annotated?: boolean  // If this segment already has an annotation
+  speakerColor?: string  // Tailwind color classes for this speaker
+}
+
+export default function SpeakerNameDropdown({
+  currentSpeaker,
+  enrolledSpeakers,
+  onSpeakerChange,
+  annotated = false,
+  speakerColor = 'text-blue-700 dark:text-blue-300'  // Default blue if not provided
+}: SpeakerNameDropdownProps) {
+  const [isOpen, setIsOpen] = useState(false)
+  const [searchQuery, setSearchQuery] = useState('')
+  const [filteredSpeakers, setFilteredSpeakers] = useState(enrolledSpeakers)
+  const dropdownRef = useRef<HTMLDivElement>(null)
+
+  // Fuzzy search implementation (simple contains match)
+  useEffect(() => {
+    if (searchQuery) {
+      const filtered = enrolledSpeakers.filter(speaker =>
+        speaker.name.toLowerCase().includes(searchQuery.toLowerCase())
+      )
+      setFilteredSpeakers(filtered)
+    } else {
+      setFilteredSpeakers(enrolledSpeakers)
+    }
+  }, [searchQuery, enrolledSpeakers])
+
+  // Close dropdown when clicking outside
+  useEffect(() => {
+    const handleClickOutside = (event: MouseEvent) => {
+      if (dropdownRef.current && !dropdownRef.current.contains(event.target as Node)) {
+        setIsOpen(false)
+        setSearchQuery('')
+      }
+    }
+
+    if (isOpen) {
+      document.addEventListener('mousedown', handleClickOutside)
+    }
+
+    return () => {
+      document.removeEventListener('mousedown', handleClickOutside)
+    }
+  }, [isOpen])
+
+  const handleSpeakerSelect = (speaker: string) => {
+    onSpeakerChange(speaker)
+    setIsOpen(false)
+    setSearchQuery('')
+  }
+
+  const handleCreateNewSpeaker = () => {
+    if (searchQuery.trim()) {
+      onSpeakerChange(searchQuery.trim())
+      setIsOpen(false)
+      setSearchQuery('')
+    }
+  }
+
+  return (
+    <div className="relative inline-block" ref={dropdownRef}>
+      {/* Speaker name button */}
+      <button
+        onClick={() => setIsOpen(!isOpen)}
+        className={`font-medium hover:underline cursor-pointer ${
+          annotated ? 'text-orange-600 dark:text-orange-400' : speakerColor
+        }`}
+        title={annotated ? 'This segment has a pending annotation' : 'Click to edit speaker'}
+      >
+        {currentSpeaker}
+      </button>
+
+      {/* Dropdown menu */}
+      {isOpen && (
+        <div className="absolute top-full left-0 mt-1 w-64 bg-white dark:bg-gray-800 rounded-lg shadow-lg border border-gray-200 dark:border-gray-700 z-50">
+          {/* Search input */}
+          <div className="p-2 border-b border-gray-200 dark:border-gray-700">
+            <input
+              type="text"
+              placeholder="Search or create speaker..."
+              value={searchQuery}
+              onChange={(e) => setSearchQuery(e.target.value)}
+              className="w-full px-3 py-2 text-sm border border-gray-300 dark:border-gray-600 rounded focus:outline-none focus:ring-2 focus:ring-blue-500 bg-white dark:bg-gray-900 text-gray-900 dark:text-gray-100"
+              autoFocus
+              onKeyDown={(e) => {
+                if (e.key === 'Enter' && searchQuery && filteredSpeakers.length === 0) {
+                  handleCreateNewSpeaker()
+                }
+              }}
+            />
+          </div>
+
+          {/* Speaker list */}
+          <div className="max-h-60 overflow-y-auto">
+            {filteredSpeakers.length > 0 ? (
+              filteredSpeakers.map((speaker) => (
+                <button
+                  key={speaker.speaker_id}
+                  onClick={() => handleSpeakerSelect(speaker.name)}
+                  className="w-full text-left px-4 py-2 text-sm hover:bg-gray-100 dark:hover:bg-gray-700 flex items-center justify-between"
+                >
+                  <span className="text-gray-900 dark:text-gray-100">{speaker.name}</span>
+                  {speaker.name === currentSpeaker && (
+                    <Check className="h-4 w-4 text-green-600" />
+                  )}
+                </button>
+              ))
+            ) : (
+              <div className="px-4 py-3 text-sm text-gray-500">
+                No matching speakers found
+              </div>
+            )}
+          </div>
+
+          {/* Create new speaker option */}
+          {searchQuery && filteredSpeakers.length === 0 && (
+            <div className="border-t border-gray-200 dark:border-gray-700">
+              <button
+                onClick={handleCreateNewSpeaker}
+                className="w-full text-left px-4 py-2 text-sm text-blue-600 hover:bg-gray-100 dark:hover:bg-gray-700 flex items-center space-x-2"
+              >
+                <Plus className="h-4 w-4" />
+                <span>Create "{searchQuery}"</span>
+              </button>
+            </div>
+          )}
+        </div>
+      )}
+    </div>
+  )
+}
diff --git a/backends/advanced/webui/src/components/audio/DebugPanel.tsx b/backends/advanced/webui/src/components/audio/DebugPanel.tsx
index 4a82d380..a3785f1d 100644
--- a/backends/advanced/webui/src/components/audio/DebugPanel.tsx
+++ b/backends/advanced/webui/src/components/audio/DebugPanel.tsx
@@ -65,7 +65,7 @@ export default function DebugPanel({ recording }: DebugPanelProps) {
       )}
 
       <div className="mt-3 text-xs text-gray-500 dark:text-gray-500">
-        <p>• WebSocket URL: {recording.hasValidWebSocket ? 'ws_pcm endpoint' : 'Not connected'}</p>
+        <p>• WebSocket URL: {recording.hasValidWebSocket ? '/ws?codec=pcm endpoint' : 'Not connected'}</p>
         <p>• Audio Format: 16kHz, Mono, PCM Int16</p>
         <p>• Protocol: Wyoming (JSON headers + binary payloads)</p>
         <p>• Direct Checks: WS={recording.hasValidWebSocket ? '✅' : '❌'} Mic={recording.hasValidMicrophone ? '✅' : '❌'} Ctx={recording.hasValidAudioContext ? '✅' : '❌'}</p>
diff --git a/backends/advanced/webui/src/components/audio/RecordingStatus.tsx b/backends/advanced/webui/src/components/audio/RecordingStatus.tsx
index d8ad608e..b208beaa 100644
--- a/backends/advanced/webui/src/components/audio/RecordingStatus.tsx
+++ b/backends/advanced/webui/src/components/audio/RecordingStatus.tsx
@@ -57,7 +57,7 @@ export default function RecordingStatus({ recording }: RecordingStatusProps) {
               User: {user?.name || user?.email}
             </p>
             <p className="text-sm text-gray-600 dark:text-gray-400">
-              Endpoint: /ws_pcm
+              Endpoint: /ws?codec=pcm
             </p>
           </div>
         </div>
diff --git a/backends/advanced/webui/src/components/audio/WaveformDisplay.tsx b/backends/advanced/webui/src/components/audio/WaveformDisplay.tsx
new file mode 100644
index 00000000..aaf0c442
--- /dev/null
+++ b/backends/advanced/webui/src/components/audio/WaveformDisplay.tsx
@@ -0,0 +1,173 @@
+import React, { useEffect, useRef, useState } from 'react';
+import { api } from '../../services/api';
+
+interface WaveformData {
+  samples: number[];
+  sample_rate: number;
+  duration_seconds: number;
+}
+
+interface WaveformDisplayProps {
+  conversationId: string;
+  duration: number;
+  currentTime?: number;  // Current playback position in seconds
+  onSeek?: (time: number) => void;  // Callback when user clicks to seek
+  height?: number;  // Canvas height in pixels (default: 100)
+}
+
+export const WaveformDisplay: React.FC<WaveformDisplayProps> = ({
+  conversationId,
+  duration,
+  currentTime,
+  onSeek,
+  height = 100
+}) => {
+  const [waveformData, setWaveformData] = useState<WaveformData | null>(null);
+  const [loading, setLoading] = useState(true);
+  const [error, setError] = useState<string | null>(null);
+  const canvasRef = useRef<HTMLCanvasElement>(null);
+
+  // Fetch waveform data on component mount
+  useEffect(() => {
+    const fetchWaveform = async () => {
+      setLoading(true);
+      setError(null);
+
+      try {
+        const response = await api.get(`/api/conversations/${conversationId}/waveform`);
+        setWaveformData(response.data);
+      } catch (err: any) {
+        const errorMsg = err?.response?.data?.detail || err?.message || 'Failed to load waveform';
+        console.error('Waveform fetch failed:', errorMsg);
+        setError(errorMsg);
+      } finally {
+        setLoading(false);
+      }
+    };
+
+    fetchWaveform();
+  }, [conversationId]);
+
+  // Draw waveform when data changes
+  useEffect(() => {
+    if (!waveformData || !canvasRef.current) return;
+
+    const canvas = canvasRef.current;
+    const ctx = canvas.getContext('2d');
+    if (!ctx) return;
+
+    // Set canvas size
+    const rect = canvas.getBoundingClientRect();
+    canvas.width = rect.width * window.devicePixelRatio;
+    canvas.height = height * window.devicePixelRatio;
+    ctx.scale(window.devicePixelRatio, window.devicePixelRatio);
+
+    // Clear canvas
+    ctx.clearRect(0, 0, rect.width, height);
+
+    // Draw waveform bars
+    drawWaveform(ctx, waveformData.samples, rect.width, height);
+
+    // Draw playback position indicator
+    if (currentTime !== undefined && duration > 0) {
+      drawPlaybackIndicator(ctx, currentTime, duration, rect.width, height);
+    }
+  }, [waveformData, currentTime, duration, height]);
+
+  const drawWaveform = (
+    ctx: CanvasRenderingContext2D,
+    samples: number[],
+    width: number,
+    height: number
+  ) => {
+    const barWidth = width / samples.length;
+    const centerY = height / 2;
+
+    ctx.fillStyle = '#3b82f6'; // Blue bars (Tailwind blue-500)
+
+    samples.forEach((amplitude, i) => {
+      const x = i * barWidth;
+      const barHeight = Math.max(1, amplitude * centerY); // Ensure minimum 1px height
+
+      // Draw bar centered vertically
+      ctx.fillRect(x, centerY - barHeight, barWidth - 1, barHeight * 2);
+    });
+  };
+
+  const drawPlaybackIndicator = (
+    ctx: CanvasRenderingContext2D,
+    currentTime: number,
+    duration: number,
+    width: number,
+    height: number
+  ) => {
+    const progress = currentTime / duration;
+    const x = progress * width;
+
+    // Draw vertical line
+    ctx.strokeStyle = '#ef4444'; // Red line (Tailwind red-500)
+    ctx.lineWidth = 2;
+    ctx.beginPath();
+    ctx.moveTo(x, 0);
+    ctx.lineTo(x, height);
+    ctx.stroke();
+  };
+
+  const handleClick = (e: React.MouseEvent<HTMLCanvasElement>) => {
+    console.log('🖱️ Waveform clicked!');
+
+    if (!onSeek) {
+      console.warn('⚠️ No onSeek callback provided');
+      return;
+    }
+
+    if (!canvasRef.current) {
+      console.warn('⚠️ Canvas ref not available');
+      return;
+    }
+
+    const rect = canvasRef.current.getBoundingClientRect();
+    const x = e.clientX - rect.left;
+    const seekProgress = x / rect.width;
+    const seekTime = seekProgress * duration;
+
+    console.log(`🎵 Waveform seek: clicked at ${x}px (${(seekProgress * 100).toFixed(1)}%) → ${seekTime.toFixed(2)}s`);
+
+    onSeek(seekTime);
+  };
+
+  // Render loading state
+  if (loading) {
+    return (
+      <div
+        className="w-full bg-gray-100 rounded animate-pulse flex items-center justify-center"
+        style={{ height: `${height}px` }}
+      >
+        <span className="text-gray-400 text-sm">Generating waveform...</span>
+      </div>
+    );
+  }
+
+  // Render error state
+  if (error) {
+    return (
+      <div
+        className="w-full bg-gray-50 border border-gray-200 rounded flex items-center justify-center"
+        style={{ height: `${height}px` }}
+      >
+        <span className="text-gray-400 text-sm">No waveform available</span>
+      </div>
+    );
+  }
+
+  // Render waveform
+  return (
+    <canvas
+      ref={canvasRef}
+      onClick={handleClick}
+      className="w-full cursor-pointer hover:opacity-80 transition-opacity rounded"
+      style={{ height: `${height}px` }}
+      title="Click to seek to position"
+    />
+  );
+};
diff --git a/backends/advanced/webui/src/components/layout/Layout.tsx b/backends/advanced/webui/src/components/layout/Layout.tsx
index 5995f823..5a7e10be 100644
--- a/backends/advanced/webui/src/components/layout/Layout.tsx
+++ b/backends/advanced/webui/src/components/layout/Layout.tsx
@@ -1,5 +1,5 @@
 import { Link, useLocation, Outlet } from 'react-router-dom'
-import { Music, MessageSquare, MessageCircle, Brain, Users, Upload, Settings, LogOut, Sun, Moon, Shield, Radio, Layers, Calendar } from 'lucide-react'
+import { Music, MessageSquare, MessageCircle, Brain, Users, Upload, Settings, LogOut, Sun, Moon, Shield, Radio, Layers, Calendar, Puzzle, Zap } from 'lucide-react'
 import { useAuth } from '../../contexts/AuthContext'
 import { useTheme } from '../../contexts/ThemeContext'
 
@@ -18,6 +18,8 @@ export default function Layout() {
     ...(isAdmin ? [
       { path: '/upload', label: 'Upload Audio', icon: Upload },
       { path: '/queue', label: 'Queue Management', icon: Layers },
+      { path: '/plugins', label: 'Plugins', icon: Puzzle },
+      { path: '/finetuning', label: 'Fine-tuning', icon: Zap },
       { path: '/system', label: 'System State', icon: Settings },
     ] : []),
   ]
diff --git a/backends/advanced/webui/src/components/plugins/EnvVarsSection.tsx b/backends/advanced/webui/src/components/plugins/EnvVarsSection.tsx
new file mode 100644
index 00000000..382baeca
--- /dev/null
+++ b/backends/advanced/webui/src/components/plugins/EnvVarsSection.tsx
@@ -0,0 +1,91 @@
+import { Key } from 'lucide-react'
+import FormField, { FieldSchema } from './FormField'
+
+interface EnvVarsSectionProps {
+  schema: Record<string, FieldSchema>
+  values: Record<string, any>
+  onChange: (envVars: Record<string, any>) => void
+  errors?: Record<string, string>
+  disabled?: boolean
+}
+
+export default function EnvVarsSection({
+  schema,
+  values,
+  onChange,
+  errors = {},
+  disabled = false
+}: EnvVarsSectionProps) {
+  const envVarKeys = Object.keys(schema)
+
+  if (envVarKeys.length === 0) {
+    return null
+  }
+
+  const handleChange = (key: string, value: any) => {
+    onChange({
+      ...values,
+      [key]: value
+    })
+  }
+
+  return (
+    <div className="space-y-4">
+      {/* Section Header */}
+      <div className="flex items-center space-x-2 pb-2 border-b border-gray-200 dark:border-gray-700">
+        <Key className="h-5 w-5 text-blue-600" />
+        <h3 className="text-lg font-semibold text-gray-900 dark:text-gray-100">
+          Secrets & Environment Variables
+        </h3>
+      </div>
+
+      <p className="text-sm text-gray-600 dark:text-gray-400">
+        Environment variables and secrets for this plugin. Values are stored securely and masked for display.
+      </p>
+
+      <div className="space-y-4">
+        {envVarKeys.map((key) => {
+          const fieldSchema = schema[key]
+          const value = values[key]
+          const error = errors[key]
+
+          return (
+            <div
+              key={key}
+              className="p-4 bg-gray-50 dark:bg-gray-700/50 rounded-lg"
+            >
+              <FormField
+                fieldKey={key}
+                schema={fieldSchema}
+                value={value}
+                onChange={(newValue) => handleChange(key, newValue)}
+                error={error}
+                disabled={disabled}
+              />
+
+              {fieldSchema.env_var && (
+                <div className="mt-2 text-xs text-gray-500 dark:text-gray-400">
+                  <span className="font-mono bg-gray-200 dark:bg-gray-600 px-2 py-0.5 rounded">
+                    ${fieldSchema.env_var}
+                  </span>
+                  {fieldSchema.secret && (
+                    <span className="ml-2 text-yellow-600 dark:text-yellow-400">
+                      🔒 Stored securely in .env file
+                    </span>
+                  )}
+                </div>
+              )}
+            </div>
+          )
+        })}
+      </div>
+
+      <div className="p-3 bg-yellow-50 dark:bg-yellow-900/20 border border-yellow-200 dark:border-yellow-800 rounded-lg">
+        <p className="text-xs text-yellow-800 dark:text-yellow-200">
+          <strong>Note:</strong> Changing environment variables requires a backend restart to take effect.
+          Existing values are masked with <code className="font-mono">••••••••</code> for security.
+        </p>
+      </div>
+    </div>
+  )
+}
diff --git a/backends/advanced/webui/src/components/plugins/FormField.tsx b/backends/advanced/webui/src/components/plugins/FormField.tsx
new file mode 100644
index 00000000..81896804
--- /dev/null
+++ b/backends/advanced/webui/src/components/plugins/FormField.tsx
@@ -0,0 +1,229 @@
+import { useState } from 'react'
+import { AlertCircle, Eye, EyeOff } from 'lucide-react'
+
+export interface FieldSchema {
+  type: 'string' | 'number' | 'boolean' | 'password' | 'enum' | 'array'
+  label: string
+  default?: any
+  required?: boolean
+  secret?: boolean
+  env_var?: string
+  min?: number
+  max?: number
+  help_text?: string
+  options?: Array<{ value: string; label: string }>
+}
+
+interface FormFieldProps {
+  fieldKey: string
+  schema: FieldSchema
+  value: any
+  onChange: (value: any) => void
+  error?: string
+  disabled?: boolean
+}
+
+export default function FormField({
+  fieldKey,
+  schema,
+  value,
+  onChange,
+  error,
+  disabled = false
+}: FormFieldProps) {
+  const [showPassword, setShowPassword] = useState(false)
+  const [isEditing, setIsEditing] = useState(false)
+
+  const isMaskedValue = typeof value === 'string' && value.includes('••••')
+
+  const renderField = () => {
+    switch (schema.type) {
+      case 'boolean':
+        return (
+          <div className="flex items-center space-x-2">
+            <input
+              type="checkbox"
+              id={fieldKey}
+              checked={value || false}
+              onChange={(e) => onChange(e.target.checked)}
+              disabled={disabled}
+              className="h-4 w-4 text-blue-600 focus:ring-blue-500 border-gray-300 rounded disabled:opacity-50"
+            />
+            <label
+              htmlFor={fieldKey}
+              className="text-sm text-gray-700 dark:text-gray-300"
+            >
+              {schema.label}
+            </label>
+          </div>
+        )
+
+      case 'number':
+        return (
+          <div>
+            <label
+              htmlFor={fieldKey}
+              className="block text-sm font-medium text-gray-700 dark:text-gray-300 mb-1"
+            >
+              {schema.label}
+              {schema.required && <span className="text-red-500 ml-1">*</span>}
+            </label>
+            <input
+              type="number"
+              id={fieldKey}
+              value={value ?? schema.default ?? ''}
+              onChange={(e) => onChange(e.target.valueAsNumber || parseInt(e.target.value))}
+              min={schema.min}
+              max={schema.max}
+              disabled={disabled}
+              className="w-full px-3 py-2 border border-gray-300 dark:border-gray-600 rounded-md bg-white dark:bg-gray-700 text-gray-900 dark:text-gray-100 focus:outline-none focus:ring-2 focus:ring-blue-500 focus:border-blue-500 disabled:opacity-50 disabled:cursor-not-allowed"
+            />
+            {schema.help_text && (
+              <p className="mt-1 text-xs text-gray-500 dark:text-gray-400">
+                {schema.help_text}
+              </p>
+            )}
+          </div>
+        )
+
+      case 'password':
+        const displayValue = isMaskedValue && !isEditing ? value : value || ''
+
+        return (
+          <div>
+            <label
+              htmlFor={fieldKey}
+              className="block text-sm font-medium text-gray-700 dark:text-gray-300 mb-1"
+            >
+              {schema.label}
+              {schema.required && <span className="text-red-500 ml-1">*</span>}
+              {schema.env_var && (
+                <span className="ml-2 text-xs text-gray-500 dark:text-gray-400">
+                  (${schema.env_var})
+                </span>
+              )}
+            </label>
+            <div className="relative">
+              <input
+                type={showPassword ? 'text' : 'password'}
+                id={fieldKey}
+                value={displayValue}
+                onChange={(e) => {
+                  setIsEditing(true)
+                  onChange(e.target.value)
+                }}
+                onFocus={() => {
+                  // When focusing on a masked field, clear it to allow entering new value
+                  if (isMaskedValue && !isEditing) {
+                    setIsEditing(true)
+                    onChange('')
+                  }
+                }}
+                disabled={disabled}
+                placeholder={isMaskedValue ? 'Enter new password to change' : 'Enter password'}
+                className="w-full px-3 py-2 pr-10 border border-gray-300 dark:border-gray-600 rounded-md bg-white dark:bg-gray-700 text-gray-900 dark:text-gray-100 focus:outline-none focus:ring-2 focus:ring-blue-500 focus:border-blue-500 disabled:opacity-50 disabled:cursor-not-allowed"
+              />
+              <button
+                type="button"
+                onClick={() => setShowPassword(!showPassword)}
+                title={showPassword ? 'Hide password' : 'Show password'}
+                className="absolute right-2 top-1/2 -translate-y-1/2 p-1 text-gray-500 hover:text-gray-700 dark:text-gray-400 dark:hover:text-gray-200 transition-colors"
+                disabled={disabled}
+              >
+                {showPassword ? (
+                  <EyeOff className="h-4 w-4" />
+                ) : (
+                  <Eye className="h-4 w-4" />
+                )}
+              </button>
+            </div>
+            {schema.help_text && (
+              <p className="mt-1 text-xs text-gray-500 dark:text-gray-400">
+                {schema.help_text}
+              </p>
+            )}
+            {isMaskedValue && !isEditing && (
+              <p className="mt-1 text-xs text-blue-600 dark:text-blue-400">
+                ✓ Password is set (hidden for security). Click to enter a new password.
+              </p>
+            )}
+            {isEditing && (
+              <p className="mt-1 text-xs text-gray-500 dark:text-gray-400">
+                💡 Click the <Eye className="inline h-3 w-3" /> icon to show/hide password while typing
+              </p>
+            )}
+          </div>
+        )
+
+      case 'enum':
+        return (
+          <div>
+            <label
+              htmlFor={fieldKey}
+              className="block text-sm font-medium text-gray-700 dark:text-gray-300 mb-1"
+            >
+              {schema.label}
+              {schema.required && <span className="text-red-500 ml-1">*</span>}
+            </label>
+            <select
+              id={fieldKey}
+              value={value ?? schema.default ?? ''}
+              onChange={(e) => onChange(e.target.value)}
+              disabled={disabled}
+              className="w-full px-3 py-2 border border-gray-300 dark:border-gray-600 rounded-md bg-white dark:bg-gray-700 text-gray-900 dark:text-gray-100 focus:outline-none focus:ring-2 focus:ring-blue-500 focus:border-blue-500 disabled:opacity-50 disabled:cursor-not-allowed"
+            >
+              {schema.options?.map((option) => (
+                <option key={option.value} value={option.value}>
+                  {option.label}
+                </option>
+              ))}
+            </select>
+            {schema.help_text && (
+              <p className="mt-1 text-xs text-gray-500 dark:text-gray-400">
+                {schema.help_text}
+              </p>
+            )}
+          </div>
+        )
+
+      case 'string':
+      default:
+        return (
+          <div>
+            <label
+              htmlFor={fieldKey}
+              className="block text-sm font-medium text-gray-700 dark:text-gray-300 mb-1"
+            >
+              {schema.label}
+              {schema.required && <span className="text-red-500 ml-1">*</span>}
+            </label>
+            <input
+              type="text"
+              id={fieldKey}
+              value={value ?? schema.default ?? ''}
+              onChange={(e) => onChange(e.target.value)}
+              disabled={disabled}
+              className="w-full px-3 py-2 border border-gray-300 dark:border-gray-600 rounded-md bg-white dark:bg-gray-700 text-gray-900 dark:text-gray-100 focus:outline-none focus:ring-2 focus:ring-blue-500 focus:border-blue-500 disabled:opacity-50 disabled:cursor-not-allowed"
+            />
+            {schema.help_text && (
+              <p className="mt-1 text-xs text-gray-500 dark:text-gray-400">
+                {schema.help_text}
+              </p>
+            )}
+          </div>
+        )
+    }
+  }
+
+  return (
+    <div className="space-y-1">
+      {renderField()}
+      {error && (
+        <div className="flex items-start space-x-1 text-red-600 dark:text-red-400">
+          <AlertCircle className="h-4 w-4 mt-0.5 flex-shrink-0" />
+          <p className="text-xs">{error}</p>
+        </div>
+      )}
+    </div>
+  )
+}
diff --git a/backends/advanced/webui/src/components/plugins/OrchestrationSection.tsx b/backends/advanced/webui/src/components/plugins/OrchestrationSection.tsx
new file mode 100644
index 00000000..f667143c
--- /dev/null
+++ b/backends/advanced/webui/src/components/plugins/OrchestrationSection.tsx
@@ -0,0 +1,239 @@
+import { Zap } from 'lucide-react'
+
+interface OrchestrationConfig {
+  enabled: boolean
+  events: string[]
+  condition: {
+    type: 'always' | 'wake_word'
+    wake_words?: string[]
+  }
+}
+
+interface OrchestrationSectionProps {
+  config: OrchestrationConfig
+  onChange: (config: OrchestrationConfig) => void
+  disabled?: boolean
+}
+
+const AVAILABLE_EVENTS = [
+  { value: 'conversation.complete', label: 'Conversation Complete' },
+  { value: 'transcript.streaming', label: 'Transcript Streaming' },
+  { value: 'memory.created', label: 'Memory Created' },
+]
+
+export default function OrchestrationSection({
+  config,
+  onChange,
+  disabled = false
+}: OrchestrationSectionProps) {
+  const handleEnabledChange = (enabled: boolean) => {
+    onChange({ ...config, enabled })
+  }
+
+  const handleEventToggle = (event: string) => {
+    const events = config.events.includes(event)
+      ? config.events.filter((e) => e !== event)
+      : [...config.events, event]
+    onChange({ ...config, events })
+  }
+
+  const handleConditionTypeChange = (type: 'always' | 'wake_word') => {
+    onChange({
+      ...config,
+      condition: {
+        type,
+        wake_words: type === 'wake_word' ? config.condition.wake_words || [] : undefined
+      }
+    })
+  }
+
+  const handleWakeWordsChange = (value: string) => {
+    const wake_words = value.split(',').map((w) => w.trim()).filter(Boolean)
+    onChange({
+      ...config,
+      condition: {
+        ...config.condition,
+        wake_words
+      }
+    })
+  }
+
+  return (
+    <div className="space-y-4">
+      {/* Section Header */}
+      <div className="flex items-center space-x-2 pb-2 border-b border-gray-200 dark:border-gray-700">
+        <Zap className="h-5 w-5 text-blue-600" />
+        <h3 className="text-lg font-semibold text-gray-900 dark:text-gray-100">
+          Orchestration
+        </h3>
+      </div>
+
+      {/* Enable Plugin Toggle */}
+      <div className="flex items-center justify-between p-3 bg-gray-50 dark:bg-gray-700/50 rounded-lg">
+        <div>
+          <label
+            htmlFor="plugin-enabled"
+            className="text-sm font-medium text-gray-700 dark:text-gray-300"
+          >
+            Enable Plugin
+          </label>
+          <p className="text-xs text-gray-500 dark:text-gray-400 mt-0.5">
+            Activate this plugin for event processing
+          </p>
+        </div>
+        <label className="flex items-center space-x-2 cursor-pointer">
+          <div
+            className={`
+              relative inline-flex h-6 w-11 items-center rounded-full transition-colors
+              ${config.enabled ? 'bg-blue-600' : 'bg-gray-300 dark:bg-gray-600'}
+              ${disabled ? 'opacity-50 cursor-not-allowed' : ''}
+            `}
+            onClick={() => !disabled && handleEnabledChange(!config.enabled)}
+          >
+            <span
+              className={`
+                inline-block h-5 w-5 transform rounded-full bg-white transition-transform
+                ${config.enabled ? 'translate-x-6' : 'translate-x-0.5'}
+              `}
+            />
+          </div>
+        </label>
+      </div>
+
+      {/* Events Selection */}
+      <div>
+        <label className="block text-sm font-medium text-gray-700 dark:text-gray-300 mb-2">
+          Events
+          <span className="text-red-500 ml-1">*</span>
+        </label>
+        <p className="text-xs text-gray-500 dark:text-gray-400 mb-3">
+          Select which events should trigger this plugin
+        </p>
+        <div className="space-y-2">
+          {AVAILABLE_EVENTS.map((event) => (
+            <label
+              key={event.value}
+              className={`
+                flex items-center space-x-3 p-3 border rounded-lg cursor-pointer transition-colors
+                ${
+                  config.events.includes(event.value)
+                    ? 'border-blue-500 bg-blue-50 dark:bg-blue-900/20'
+                    : 'border-gray-200 dark:border-gray-700 hover:border-gray-300 dark:hover:border-gray-600'
+                }
+                ${disabled ? 'opacity-50 cursor-not-allowed' : ''}
+              `}
+            >
+              <input
+                type="checkbox"
+                checked={config.events.includes(event.value)}
+                onChange={() => !disabled && handleEventToggle(event.value)}
+                disabled={disabled}
+                className="h-4 w-4 text-blue-600 focus:ring-blue-500 border-gray-300 rounded"
+              />
+              <span className="text-sm text-gray-900 dark:text-gray-100">
+                {event.label}
+              </span>
+            </label>
+          ))}
+        </div>
+      </div>
+
+      {/* Condition Type */}
+      <div>
+        <label className="block text-sm font-medium text-gray-700 dark:text-gray-300 mb-2">
+          Condition
+          <span className="text-red-500 ml-1">*</span>
+        </label>
+        <p className="text-xs text-gray-500 dark:text-gray-400 mb-3">
+          When should this plugin execute?
+        </p>
+        <div className="space-y-2">
+          <label
+            className={`
+              flex items-center space-x-3 p-3 border rounded-lg cursor-pointer transition-colors
+              ${
+                config.condition.type === 'always'
+                  ? 'border-blue-500 bg-blue-50 dark:bg-blue-900/20'
+                  : 'border-gray-200 dark:border-gray-700 hover:border-gray-300 dark:hover:border-gray-600'
+              }
+              ${disabled ? 'opacity-50 cursor-not-allowed' : ''}
+            `}
+          >
+            <input
+              type="radio"
+              name="condition"
+              value="always"
+              checked={config.condition.type === 'always'}
+              onChange={() => !disabled && handleConditionTypeChange('always')}
+              disabled={disabled}
+              className="h-4 w-4 text-blue-600 focus:ring-blue-500 border-gray-300"
+            />
+            <div>
+              <span className="text-sm font-medium text-gray-900 dark:text-gray-100">
+                Always
+              </span>
+              <p className="text-xs text-gray-500 dark:text-gray-400">
+                Execute on every matching event
+              </p>
+            </div>
+          </label>
+
+          <label
+            className={`
+              flex items-center space-x-3 p-3 border rounded-lg cursor-pointer transition-colors
+              ${
+                config.condition.type === 'wake_word'
+                  ? 'border-blue-500 bg-blue-50 dark:bg-blue-900/20'
+                  : 'border-gray-200 dark:border-gray-700 hover:border-gray-300 dark:hover:border-gray-600'
+              }
+              ${disabled ? 'opacity-50 cursor-not-allowed' : ''}
+            `}
+          >
+            <input
+              type="radio"
+              name="condition"
+              value="wake_word"
+              checked={config.condition.type === 'wake_word'}
+              onChange={() => !disabled && handleConditionTypeChange('wake_word')}
+              disabled={disabled}
+              className="h-4 w-4 text-blue-600 focus:ring-blue-500 border-gray-300"
+            />
+            <div className="flex-1">
+              <span className="text-sm font-medium text-gray-900 dark:text-gray-100">
+                Wake Word
+              </span>
+              <p className="text-xs text-gray-500 dark:text-gray-400">
+                Execute only when specific wake words are detected
+              </p>
+            </div>
+          </label>
+        </div>
+      </div>
+
+      {/* Wake Words Input (conditional) */}
+      {config.condition.type === 'wake_word' && (
+        <div className="pl-7">
+          <label
+            htmlFor="wake-words"
+            className="block text-sm font-medium text-gray-700 dark:text-gray-300 mb-1"
+          >
+            Wake Words
+            <span className="text-red-500 ml-1">*</span>
+          </label>
+          <input
+            type="text"
+            id="wake-words"
+            value={config.condition.wake_words?.join(', ') || ''}
+            onChange={(e) => !disabled && handleWakeWordsChange(e.target.value)}
+            placeholder="e.g., hey jarvis, ok assistant"
+            disabled={disabled}
+            className="w-full px-3 py-2 border border-gray-300 dark:border-gray-600 rounded-md bg-white dark:bg-gray-700 text-gray-900 dark:text-gray-100 focus:outline-none focus:ring-2 focus:ring-blue-500 focus:border-blue-500 disabled:opacity-50 disabled:cursor-not-allowed"
+          />
+          <p className="mt-1 text-xs text-gray-500 dark:text-gray-400">
+            Comma-separated list of wake words (case-insensitive)
+          </p>
+        </div>
+      )}
+    </div>
+  )
+}
diff --git a/backends/advanced/webui/src/components/plugins/PluginConfigPanel.tsx b/backends/advanced/webui/src/components/plugins/PluginConfigPanel.tsx
new file mode 100644
index 00000000..e3cf2089
--- /dev/null
+++ b/backends/advanced/webui/src/components/plugins/PluginConfigPanel.tsx
@@ -0,0 +1,287 @@
+import { useState } from 'react'
+import { Settings, CheckCircle, XCircle, Loader2 } from 'lucide-react'
+import OrchestrationSection from './OrchestrationSection'
+import EnvVarsSection from './EnvVarsSection'
+import FormField, { FieldSchema } from './FormField'
+
+interface PluginMetadata {
+  plugin_id: string
+  name: string
+  description: string
+  enabled: boolean
+  status: string
+  supports_testing: boolean
+  config_schema: {
+    orchestration: any
+    settings: Record<string, FieldSchema>
+    env_vars: Record<string, FieldSchema>
+  }
+}
+
+interface PluginConfig {
+  orchestration: {
+    enabled: boolean
+    events: string[]
+    condition: {
+      type: 'always' | 'wake_word'
+      wake_words?: string[]
+    }
+  }
+  settings: Record<string, any>
+  env_vars: Record<string, any>
+}
+
+interface PluginConfigPanelProps {
+  plugin: PluginMetadata
+  config: PluginConfig
+  onChange: (config: PluginConfig) => void
+  onTestConnection?: () => Promise<void>
+  onSave: () => Promise<void>
+  onReset: () => void
+  errors?: Record<string, string>
+  testResult?: { success: boolean; message: string; details?: any } | null
+  testing?: boolean
+  saving?: boolean
+  disabled?: boolean
+}
+
+export default function PluginConfigPanel({
+  plugin,
+  config,
+  onChange,
+  onTestConnection,
+  onSave,
+  onReset,
+  errors = {},
+  testResult = null,
+  testing = false,
+  saving = false,
+  disabled = false
+}: PluginConfigPanelProps) {
+  const [activeTab, setActiveTab] = useState<'orchestration' | 'settings' | 'secrets'>('orchestration')
+
+  const handleOrchestrationChange = (orchestration: any) => {
+    onChange({ ...config, orchestration })
+  }
+
+  const handleSettingsChange = (key: string, value: any) => {
+    onChange({
+      ...config,
+      settings: { ...config.settings, [key]: value }
+    })
+  }
+
+  const handleEnvVarsChange = (envVars: Record<string, any>) => {
+    onChange({ ...config, env_vars: envVars })
+  }
+
+  const settingsKeys = Object.keys(plugin.config_schema.settings || {})
+  const hasSettings = settingsKeys.length > 0
+  const hasEnvVars = Object.keys(plugin.config_schema.env_vars || {}).length > 0
+
+  return (
+    <div className="h-full flex flex-col">
+      {/* Plugin Header */}
+      <div className="p-6 border-b border-gray-200 dark:border-gray-700">
+        <h2 className="text-2xl font-bold text-gray-900 dark:text-gray-100 mb-2">
+          {plugin.name}
+        </h2>
+        <p className="text-sm text-gray-600 dark:text-gray-400">
+          {plugin.description}
+        </p>
+      </div>
+
+      {/* Tabs */}
+      <div className="flex border-b border-gray-200 dark:border-gray-700 px-6">
+        <button
+          onClick={() => setActiveTab('orchestration')}
+          className={`
+            px-4 py-3 text-sm font-medium border-b-2 transition-colors
+            ${
+              activeTab === 'orchestration'
+                ? 'border-blue-600 text-blue-600 dark:text-blue-400'
+                : 'border-transparent text-gray-600 dark:text-gray-400 hover:text-gray-900 dark:hover:text-gray-200'
+            }
+          `}
+        >
+          Orchestration
+        </button>
+        {hasSettings && (
+          <button
+            onClick={() => setActiveTab('settings')}
+            className={`
+              px-4 py-3 text-sm font-medium border-b-2 transition-colors
+              ${
+                activeTab === 'settings'
+                  ? 'border-blue-600 text-blue-600 dark:text-blue-400'
+                  : 'border-transparent text-gray-600 dark:text-gray-400 hover:text-gray-900 dark:hover:text-gray-200'
+              }
+            `}
+          >
+            Settings
+          </button>
+        )}
+        {hasEnvVars && (
+          <button
+            onClick={() => setActiveTab('secrets')}
+            className={`
+              px-4 py-3 text-sm font-medium border-b-2 transition-colors
+              ${
+                activeTab === 'secrets'
+                  ? 'border-blue-600 text-blue-600 dark:text-blue-400'
+                  : 'border-transparent text-gray-600 dark:text-gray-400 hover:text-gray-900 dark:hover:text-gray-200'
+              }
+            `}
+          >
+            Secrets
+          </button>
+        )}
+      </div>
+
+      {/* Tab Content */}
+      <div className="flex-1 overflow-y-auto p-6">
+        {activeTab === 'orchestration' && (
+          <OrchestrationSection
+            config={config.orchestration}
+            onChange={handleOrchestrationChange}
+            disabled={disabled}
+          />
+        )}
+
+        {activeTab === 'settings' && hasSettings && (
+          <div className="space-y-4">
+            <div className="flex items-center space-x-2 pb-2 border-b border-gray-200 dark:border-gray-700">
+              <Settings className="h-5 w-5 text-blue-600" />
+              <h3 className="text-lg font-semibold text-gray-900 dark:text-gray-100">
+                Plugin Settings
+              </h3>
+            </div>
+
+            <div className="space-y-4">
+              {settingsKeys.map((key) => {
+                const fieldSchema = plugin.config_schema.settings[key]
+                const value = config.settings[key]
+                const error = errors[`settings.${key}`]
+
+                return (
+                  <FormField
+                    key={key}
+                    fieldKey={key}
+                    schema={fieldSchema}
+                    value={value}
+                    onChange={(newValue) => handleSettingsChange(key, newValue)}
+                    error={error}
+                    disabled={disabled}
+                  />
+                )
+              })}
+            </div>
+          </div>
+        )}
+
+        {activeTab === 'secrets' && hasEnvVars && (
+          <EnvVarsSection
+            schema={plugin.config_schema.env_vars}
+            values={config.env_vars}
+            onChange={handleEnvVarsChange}
+            errors={errors}
+            disabled={disabled}
+          />
+        )}
+      </div>
+
+      {/* Test Result Display */}
+      {testResult && (
+        <div className="px-6 pb-4">
+          <div
+            className={`
+              p-4 rounded-lg border flex items-start space-x-3
+              ${
+                testResult.success
+                  ? 'bg-green-50 dark:bg-green-900/20 border-green-200 dark:border-green-800'
+                  : 'bg-red-50 dark:bg-red-900/20 border-red-200 dark:border-red-800'
+              }
+            `}
+          >
+            {testResult.success ? (
+              <CheckCircle className="h-5 w-5 text-green-600 dark:text-green-400 flex-shrink-0 mt-0.5" />
+            ) : (
+              <XCircle className="h-5 w-5 text-red-600 dark:text-red-400 flex-shrink-0 mt-0.5" />
+            )}
+            <div className="flex-1">
+              <p
+                className={`
+                  text-sm font-medium
+                  ${
+                    testResult.success
+                      ? 'text-green-800 dark:text-green-200'
+                      : 'text-red-800 dark:text-red-200'
+                  }
+                `}
+              >
+                {testResult.message}
+              </p>
+              {testResult.details && (
+                <pre className="mt-2 text-xs text-gray-600 dark:text-gray-400 overflow-x-auto">
+                  {JSON.stringify(testResult.details, null, 2)}
+                </pre>
+              )}
+            </div>
+          </div>
+        </div>
+      )}
+
+      {/* Action Buttons */}
+      <div className="p-6 border-t border-gray-200 dark:border-gray-700">
+        <div className="flex flex-wrap items-center gap-3">
+          {plugin.supports_testing && onTestConnection && (
+            <button
+              onClick={onTestConnection}
+              disabled={testing || disabled}
+              className="flex items-center justify-center space-x-2 px-4 py-2 bg-purple-600 text-white rounded-md hover:bg-purple-700 transition-colors disabled:opacity-50 disabled:cursor-not-allowed min-w-[160px]"
+            >
+              {testing ? (
+                <>
+                  <Loader2 className="h-4 w-4 animate-spin" />
+                  <span>Testing...</span>
+                </>
+              ) : (
+                <>
+                  <CheckCircle className="h-4 w-4" />
+                  <span>Test Connection</span>
+                </>
+              )}
+            </button>
+          )}
+
+          <button
+            onClick={onReset}
+            disabled={disabled || saving}
+            className="flex items-center justify-center space-x-2 px-4 py-2 bg-gray-600 text-white rounded-md hover:bg-gray-700 transition-colors disabled:opacity-50 disabled:cursor-not-allowed min-w-[120px]"
+          >
+            <XCircle className="h-4 w-4" />
+            <span>Reset</span>
+          </button>
+
+          <button
+            onClick={onSave}
+            disabled={saving || disabled}
+            className="flex items-center justify-center space-x-2 px-4 py-2 bg-blue-600 text-white rounded-md hover:bg-blue-700 transition-colors disabled:opacity-50 disabled:cursor-not-allowed min-w-[160px]"
+          >
+            {saving ? (
+              <>
+                <Loader2 className="h-4 w-4 animate-spin" />
+                <span>Saving...</span>
+              </>
+            ) : (
+              <>
+                <CheckCircle className="h-4 w-4" />
+                <span>Save Changes</span>
+              </>
+            )}
+          </button>
+        </div>
+      </div>
+    </div>
+  )
+}
diff --git a/backends/advanced/webui/src/components/plugins/PluginListSidebar.tsx b/backends/advanced/webui/src/components/plugins/PluginListSidebar.tsx
new file mode 100644
index 00000000..bf842620
--- /dev/null
+++ b/backends/advanced/webui/src/components/plugins/PluginListSidebar.tsx
@@ -0,0 +1,162 @@
+import { CheckCircle, Circle, AlertTriangle } from 'lucide-react'
+
+interface Plugin {
+  plugin_id: string
+  name: string
+  description: string
+  enabled: boolean
+  status: 'active' | 'disabled' | 'error'
+}
+
+interface PluginListSidebarProps {
+  plugins: Plugin[]
+  selectedPluginId: string | null
+  onSelectPlugin: (pluginId: string) => void
+  onToggleEnabled: (pluginId: string, enabled: boolean) => void
+  loading?: boolean
+}
+
+export default function PluginListSidebar({
+  plugins,
+  selectedPluginId,
+  onSelectPlugin,
+  onToggleEnabled,
+  loading = false
+}: PluginListSidebarProps) {
+  const getStatusIcon = (status: string, enabled: boolean) => {
+    if (!enabled) {
+      return <Circle className="h-4 w-4 text-gray-400" />
+    }
+
+    switch (status) {
+      case 'active':
+        return <CheckCircle className="h-4 w-4 text-green-500" />
+      case 'error':
+        return <AlertTriangle className="h-4 w-4 text-red-500" />
+      default:
+        return <Circle className="h-4 w-4 text-gray-400" />
+    }
+  }
+
+  const getStatusBadge = (status: string, enabled: boolean) => {
+    if (!enabled) {
+      return (
+        <span className="text-xs px-2 py-0.5 rounded-full bg-gray-100 dark:bg-gray-700 text-gray-600 dark:text-gray-400">
+          Disabled
+        </span>
+      )
+    }
+
+    switch (status) {
+      case 'active':
+        return (
+          <span className="text-xs px-2 py-0.5 rounded-full bg-green-100 dark:bg-green-900 text-green-700 dark:text-green-300">
+            Active
+          </span>
+        )
+      case 'error':
+        return (
+          <span className="text-xs px-2 py-0.5 rounded-full bg-red-100 dark:bg-red-900 text-red-700 dark:text-red-300">
+            Error
+          </span>
+        )
+      default:
+        return (
+          <span className="text-xs px-2 py-0.5 rounded-full bg-gray-100 dark:bg-gray-700 text-gray-600 dark:text-gray-400">
+            Unknown
+          </span>
+        )
+    }
+  }
+
+  if (loading) {
+    return (
+      <div className="space-y-2 p-4">
+        {[1, 2, 3].map((i) => (
+          <div
+            key={i}
+            className="h-20 bg-gray-200 dark:bg-gray-700 rounded-lg animate-pulse"
+          />
+        ))}
+      </div>
+    )
+  }
+
+  if (plugins.length === 0) {
+    return (
+      <div className="p-4 text-center text-gray-500 dark:text-gray-400">
+        <p className="text-sm">No plugins found</p>
+      </div>
+    )
+  }
+
+  return (
+    <div className="space-y-2 p-4 overflow-y-auto">
+      {plugins.map((plugin) => {
+        const isSelected = selectedPluginId === plugin.plugin_id
+
+        return (
+          <div
+            key={plugin.plugin_id}
+            onClick={() => onSelectPlugin(plugin.plugin_id)}
+            className={`
+              p-4 rounded-lg border cursor-pointer transition-all
+              ${
+                isSelected
+                  ? 'border-blue-500 bg-blue-50 dark:bg-blue-900/20'
+                  : 'border-gray-200 dark:border-gray-700 hover:border-gray-300 dark:hover:border-gray-600 bg-white dark:bg-gray-800'
+              }
+            `}
+          >
+            {/* Plugin Header */}
+            <div className="flex items-start justify-between mb-2">
+              <div className="flex items-start space-x-2 flex-1">
+                {getStatusIcon(plugin.status, plugin.enabled)}
+                <div className="flex-1 min-w-0">
+                  <h4 className="text-sm font-semibold text-gray-900 dark:text-gray-100 truncate">
+                    {plugin.name}
+                  </h4>
+                </div>
+              </div>
+            </div>
+
+            {/* Plugin Description */}
+            <p className="text-xs text-gray-600 dark:text-gray-400 mb-3 line-clamp-2">
+              {plugin.description}
+            </p>
+
+            {/* Plugin Status and Toggle */}
+            <div className="flex items-center justify-between">
+              {getStatusBadge(plugin.status, plugin.enabled)}
+
+              <label
+                className="flex items-center space-x-2 cursor-pointer"
+                onClick={(e) => {
+                  e.stopPropagation()
+                  onToggleEnabled(plugin.plugin_id, !plugin.enabled)
+                }}
+              >
+                <span className="text-xs text-gray-600 dark:text-gray-400">
+                  {plugin.enabled ? 'Enabled' : 'Disabled'}
+                </span>
+                <div
+                  className={`
+                    relative inline-flex h-5 w-9 items-center rounded-full transition-colors
+                    ${plugin.enabled ? 'bg-blue-600' : 'bg-gray-300 dark:bg-gray-600'}
+                  `}
+                >
+                  <span
+                    className={`
+                      inline-block h-4 w-4 transform rounded-full bg-white transition-transform
+                      ${plugin.enabled ? 'translate-x-5' : 'translate-x-0.5'}
+                    `}
+                  />
+                </div>
+              </label>
+            </div>
+          </div>
+        )
+      })}
+    </div>
+  )
+}
diff --git a/backends/advanced/webui/src/hooks/useAudioRecording.ts b/backends/advanced/webui/src/hooks/useAudioRecording.ts
index dbb29889..164fa9d5 100644
--- a/backends/advanced/webui/src/hooks/useAudioRecording.ts
+++ b/backends/advanced/webui/src/hooks/useAudioRecording.ts
@@ -141,13 +141,13 @@ export const useAudioRecording = (): UseAudioRecordingReturn => {
         if (BACKEND_URL && BACKEND_URL.startsWith('http')) {
           // BACKEND_URL is a full URL (e.g., http://localhost:8000)
           const backendHost = BACKEND_URL.replace(/^https?:\/\//, '')
-          wsUrl = `${wsProtocol}//${backendHost}/ws_pcm?token=${token}&device_name=webui-recorder`
+          wsUrl = `${wsProtocol}//${backendHost}/ws?codec=pcm&token=${token}&device_name=webui-recorder`
         } else if (BACKEND_URL && BACKEND_URL !== '') {
           // BACKEND_URL is a path (e.g., /prod)
-          wsUrl = `${wsProtocol}//${window.location.host}${BACKEND_URL}/ws_pcm?token=${token}&device_name=webui-recorder`
+          wsUrl = `${wsProtocol}//${window.location.host}${BACKEND_URL}/ws?codec=pcm&token=${token}&device_name=webui-recorder`
         } else {
           // BACKEND_URL is empty (same origin)
-          wsUrl = `${wsProtocol}//${window.location.host}/ws_pcm?token=${token}&device_name=webui-recorder`
+          wsUrl = `${wsProtocol}//${window.location.host}/ws?codec=pcm&token=${token}&device_name=webui-recorder`
         }
       const ws = new WebSocket(wsUrl)
       // Note: Don't set binaryType yet - will cause protocol violations with text messages
diff --git a/backends/advanced/webui/src/hooks/useSimpleAudioRecording.ts b/backends/advanced/webui/src/hooks/useSimpleAudioRecording.ts
index cb3e3eee..d34c8ea6 100644
--- a/backends/advanced/webui/src/hooks/useSimpleAudioRecording.ts
+++ b/backends/advanced/webui/src/hooks/useSimpleAudioRecording.ts
@@ -168,13 +168,13 @@ export const useSimpleAudioRecording = (): SimpleAudioRecordingReturn => {
     if (BACKEND_URL && BACKEND_URL.startsWith('http')) {
       // BACKEND_URL is a full URL (e.g., http://localhost:8000)
       const backendHost = BACKEND_URL.replace(/^https?:\/\//, '')
-      wsUrl = `${wsProtocol}//${backendHost}/ws_pcm?token=${token}&device_name=webui-simple-recorder`
+      wsUrl = `${wsProtocol}//${backendHost}/ws?codec=pcm&token=${token}&device_name=webui-recorder`
     } else if (BACKEND_URL && BACKEND_URL !== '') {
       // BACKEND_URL is a path (e.g., /prod)
-      wsUrl = `${wsProtocol}//${window.location.host}${BACKEND_URL}/ws_pcm?token=${token}&device_name=webui-simple-recorder`
+      wsUrl = `${wsProtocol}//${window.location.host}${BACKEND_URL}/ws?codec=pcm&token=${token}&device_name=webui-recorder`
     } else {
       // BACKEND_URL is empty (same origin)
-      wsUrl = `${wsProtocol}//${window.location.host}/ws_pcm?token=${token}&device_name=webui-simple-recorder`
+      wsUrl = `${wsProtocol}//${window.location.host}/ws?codec=pcm&token=${token}&device_name=webui-recorder`
     }
     
     return new Promise<WebSocket>((resolve, reject) => {
@@ -228,6 +228,38 @@ export const useSimpleAudioRecording = (): SimpleAudioRecordingReturn => {
       ws.onmessage = (event) => {
         console.log('📨 Received message from server:', event.data)
         setDebugStats(prev => ({ ...prev, messagesReceived: prev.messagesReceived + 1 }))
+
+        // Parse server messages
+        try {
+          const message = JSON.parse(event.data)
+
+          // Handle error messages from backend
+          if (message.type === 'error') {
+            const errorMsg = message.message || 'Unknown error from server'
+            console.error('❌ Server error:', errorMsg)
+
+            setError(errorMsg)
+            setCurrentStep('error')
+            setDebugStats(prev => ({
+              ...prev,
+              lastError: errorMsg,
+              lastErrorTime: new Date()
+            }))
+
+            // Stop recording and cleanup
+            cleanup()
+            setIsRecording(false)
+          }
+
+          // Handle other message types (interim_transcript, etc.)
+          else if (message.type === 'interim_transcript') {
+            console.log('📝 Received interim transcript:', message.data)
+          }
+
+        } catch (e) {
+          // Not JSON, ignore
+          console.log('📨 Non-JSON message:', event.data)
+        }
       }
     })
   }, [])
diff --git a/backends/advanced/webui/src/pages/Archive.tsx b/backends/advanced/webui/src/pages/Archive.tsx
new file mode 100644
index 00000000..3b8b39c0
--- /dev/null
+++ b/backends/advanced/webui/src/pages/Archive.tsx
@@ -0,0 +1,388 @@
+import { useState, useEffect } from 'react'
+import { Archive as ArchiveIcon, RefreshCw, Calendar, User, RotateCcw, Trash2, ChevronDown, ChevronUp } from 'lucide-react'
+import { conversationsApi } from '../services/api'
+import { authApi } from '../services/api'
+
+interface Conversation {
+  conversation_id: string
+  title?: string
+  summary?: string
+  created_at?: string
+  client_id: string
+  segment_count?: number
+  memory_count?: number
+  deleted?: boolean
+  deletion_reason?: string
+  deleted_at?: string
+  transcript?: string
+  segments?: Array<{
+    text: string
+    speaker: string
+    start: number
+    end: number
+    confidence?: number
+  }>
+}
+
+export default function Archive() {
+  const [conversations, setConversations] = useState<Conversation[]>([])
+  const [loading, setLoading] = useState(true)
+  const [error, setError] = useState<string | null>(null)
+  const [expandedTranscripts, setExpandedTranscripts] = useState<Set<string>>(new Set())
+  const [restoringConversation, setRestoringConversation] = useState<Set<string>>(new Set())
+  const [deletingConversation, setDeletingConversation] = useState<Set<string>>(new Set())
+  const [isAdmin, setIsAdmin] = useState(false)
+
+  const loadArchivedConversations = async () => {
+    try {
+      setLoading(true)
+      // Include deleted conversations and filter for only deleted ones
+      const response = await conversationsApi.getAll(true)
+      const allConversations = response.data.conversations || []
+      // Filter to show only deleted conversations
+      const deletedConversations = allConversations.filter((conv: Conversation) => conv.deleted === true)
+      setConversations(deletedConversations)
+      setError(null)
+    } catch (err: any) {
+      setError(err.message || 'Failed to load archived conversations')
+    } finally {
+      setLoading(false)
+    }
+  }
+
+  const checkAdminStatus = async () => {
+    try {
+      const response = await authApi.getMe()
+      setIsAdmin(response.data.is_superuser || false)
+    } catch {
+      setIsAdmin(false)
+    }
+  }
+
+  useEffect(() => {
+    loadArchivedConversations()
+    checkAdminStatus()
+  }, [])
+
+  const formatDate = (timestamp: number | string) => {
+    if (typeof timestamp === 'string') {
+      const isoString = timestamp.endsWith('Z') || timestamp.includes('+') || timestamp.includes('T') && timestamp.split('T')[1].includes('-')
+        ? timestamp
+        : timestamp + 'Z'
+      return new Date(isoString).toLocaleString()
+    }
+    if (timestamp === 0) {
+      return 'Unknown date'
+    }
+    return new Date(timestamp * 1000).toLocaleString()
+  }
+
+  const handleRestoreConversation = async (conversationId: string) => {
+    try {
+      setRestoringConversation(prev => new Set(prev).add(conversationId))
+
+      const response = await conversationsApi.restore(conversationId)
+
+      if (response.status === 200) {
+        // Refresh archived conversations to show updated data
+        await loadArchivedConversations()
+      } else {
+        setError(`Failed to restore conversation: ${response.data?.error || 'Unknown error'}`)
+      }
+    } catch (err: any) {
+      setError(`Error restoring conversation: ${err.message || 'Unknown error'}`)
+    } finally {
+      setRestoringConversation(prev => {
+        const newSet = new Set(prev)
+        newSet.delete(conversationId)
+        return newSet
+      })
+    }
+  }
+
+  const handlePermanentDelete = async (conversationId: string) => {
+    try {
+      const confirmed = window.confirm(
+        'Are you sure you want to PERMANENTLY delete this conversation? This action CANNOT be undone and will remove all data including audio.'
+      )
+      if (!confirmed) return
+
+      setDeletingConversation(prev => new Set(prev).add(conversationId))
+
+      const response = await conversationsApi.permanentDelete(conversationId)
+
+      if (response.status === 200) {
+        // Refresh archived conversations to show updated data
+        await loadArchivedConversations()
+      } else {
+        setError(`Failed to permanently delete conversation: ${response.data?.error || 'Unknown error'}`)
+      }
+    } catch (err: any) {
+      setError(`Error permanently deleting conversation: ${err.message || 'Unknown error'}`)
+    } finally {
+      setDeletingConversation(prev => {
+        const newSet = new Set(prev)
+        newSet.delete(conversationId)
+        return newSet
+      })
+    }
+  }
+
+  const toggleTranscriptExpansion = async (conversationId: string) => {
+    if (expandedTranscripts.has(conversationId)) {
+      setExpandedTranscripts(prev => {
+        const newSet = new Set(prev)
+        newSet.delete(conversationId)
+        return newSet
+      })
+      return
+    }
+
+    const conversation = conversations.find(c => c.conversation_id === conversationId)
+    if (!conversation || !conversation.conversation_id) {
+      return
+    }
+
+    if (conversation.segments && conversation.segments.length > 0) {
+      setExpandedTranscripts(prev => new Set(prev).add(conversationId))
+      return
+    }
+
+    try {
+      const response = await conversationsApi.getById(conversation.conversation_id)
+      if (response.status === 200 && response.data.conversation) {
+        setConversations(prev => prev.map(c =>
+          c.conversation_id === conversationId
+            ? { ...c, ...response.data.conversation }
+            : c
+        ))
+        setExpandedTranscripts(prev => new Set(prev).add(conversationId))
+      }
+    } catch (err: any) {
+      console.error('Failed to fetch conversation details:', err)
+      setError(`Failed to load transcript: ${err.message || 'Unknown error'}`)
+    }
+  }
+
+  if (loading) {
+    return (
+      <div className="flex items-center justify-center h-64">
+        <div className="animate-spin rounded-full h-8 w-8 border-b-2 border-blue-600"></div>
+        <span className="ml-2 text-gray-600 dark:text-gray-400">Loading archived conversations...</span>
+      </div>
+    )
+  }
+
+  if (error) {
+    return (
+      <div className="text-center">
+        <div className="text-red-600 dark:text-red-400 mb-4">{error}</div>
+        <button
+          onClick={loadArchivedConversations}
+          className="px-4 py-2 bg-blue-600 text-white rounded-lg hover:bg-blue-700 transition-colors"
+        >
+          Try Again
+        </button>
+      </div>
+    )
+  }
+
+  return (
+    <div>
+      {/* Header */}
+      <div className="flex justify-between items-center mb-6">
+        <div className="flex items-center space-x-2">
+          <ArchiveIcon className="h-6 w-6 text-orange-600" />
+          <h1 className="text-2xl font-bold text-gray-900 dark:text-gray-100">
+            Archived Conversations
+          </h1>
+        </div>
+        <button
+          onClick={loadArchivedConversations}
+          className="flex items-center space-x-2 px-4 py-2 bg-blue-600 text-white rounded-lg hover:bg-blue-700 transition-colors"
+        >
+          <RefreshCw className="h-4 w-4" />
+          <span>Refresh</span>
+        </button>
+      </div>
+
+      {/* Archive Info */}
+      <div className="mb-4 p-3 bg-orange-50 dark:bg-orange-900/20 rounded-lg border border-orange-300 dark:border-orange-700">
+        <p className="text-sm text-orange-800 dark:text-orange-300">
+          <strong>Archive:</strong> Deleted conversations are stored here. You can restore them to active view or permanently delete them {isAdmin && '(admin only)'}.
+        </p>
+      </div>
+
+      {/* Archived Conversations List */}
+      <div className="space-y-6">
+        {conversations.length === 0 ? (
+          <div className="text-center text-gray-500 dark:text-gray-400 py-12">
+            <ArchiveIcon className="h-12 w-12 mx-auto mb-4 opacity-50" />
+            <p>No archived conversations</p>
+          </div>
+        ) : (
+          conversations.map((conversation) => (
+            <div
+              key={conversation.conversation_id}
+              className="rounded-lg p-6 border bg-red-50 dark:bg-red-900/20 border-red-300 dark:border-red-700"
+            >
+              {/* Deleted Conversation Banner */}
+              <div className="mb-4 p-3 bg-red-100 dark:bg-red-900/40 rounded-lg border border-red-300 dark:border-red-700">
+                <div className="flex items-start space-x-2">
+                  <ArchiveIcon className="h-5 w-5 text-red-600 dark:text-red-400 mt-0.5 flex-shrink-0" />
+                  <div className="flex-1">
+                    <p className="font-semibold text-red-800 dark:text-red-300 text-sm">Archived Conversation</p>
+                    <p className="text-xs text-red-700 dark:text-red-400 mt-1">
+                      Reason: {conversation.deletion_reason === 'user_deleted'
+                        ? 'User deleted'
+                        : conversation.deletion_reason === 'no_meaningful_speech'
+                        ? 'No meaningful speech detected'
+                        : conversation.deletion_reason === 'audio_file_not_ready'
+                        ? 'Audio file not saved (possible Bluetooth disconnect)'
+                        : conversation.deletion_reason || 'Unknown'}
+                    </p>
+                    {conversation.deleted_at && (
+                      <p className="text-xs text-red-600 dark:text-red-500 mt-1">
+                        Deleted at: {formatDate(conversation.deleted_at)}
+                      </p>
+                    )}
+                  </div>
+                </div>
+              </div>
+
+              {/* Conversation Header */}
+              <div className="flex justify-between items-start mb-4">
+                <div className="flex flex-col space-y-2">
+                  <h2 className="text-xl font-semibold text-gray-900 dark:text-gray-100">
+                    {conversation.title || "Conversation"}
+                  </h2>
+
+                  {conversation.summary && (
+                    <p className="text-sm text-gray-600 dark:text-gray-400 italic">
+                      {conversation.summary}
+                    </p>
+                  )}
+
+                  {/* Metadata */}
+                  <div className="flex items-center space-x-4">
+                    <div className="flex items-center space-x-2 text-sm text-gray-600 dark:text-gray-400">
+                      <Calendar className="h-4 w-4" />
+                      <span>{formatDate(conversation.created_at || '')}</span>
+                    </div>
+                    <div className="flex items-center space-x-2 text-sm text-gray-600 dark:text-gray-400">
+                      <User className="h-4 w-4" />
+                      <span>{conversation.client_id}</span>
+                    </div>
+                  </div>
+                </div>
+
+                {/* Action Buttons */}
+                <div className="flex items-center space-x-2">
+                  {conversation.conversation_id && (
+                    <>
+                      <button
+                        onClick={() => handleRestoreConversation(conversation.conversation_id!)}
+                        disabled={restoringConversation.has(conversation.conversation_id)}
+                        className="flex items-center space-x-2 px-3 py-2 bg-green-600 text-white rounded-lg hover:bg-green-700 transition-colors disabled:opacity-50 disabled:cursor-not-allowed"
+                        title="Restore conversation to active view"
+                      >
+                        {restoringConversation.has(conversation.conversation_id) ? (
+                          <RefreshCw className="h-4 w-4 animate-spin" />
+                        ) : (
+                          <RotateCcw className="h-4 w-4" />
+                        )}
+                        <span>Restore</span>
+                      </button>
+
+                      {isAdmin && (
+                        <button
+                          onClick={() => handlePermanentDelete(conversation.conversation_id!)}
+                          disabled={deletingConversation.has(conversation.conversation_id)}
+                          className="flex items-center space-x-2 px-3 py-2 bg-red-600 text-white rounded-lg hover:bg-red-700 transition-colors disabled:opacity-50 disabled:cursor-not-allowed"
+                          title="Permanently delete (admin only)"
+                        >
+                          {deletingConversation.has(conversation.conversation_id) ? (
+                            <RefreshCw className="h-4 w-4 animate-spin" />
+                          ) : (
+                            <Trash2 className="h-4 w-4" />
+                          )}
+                          <span>Permanent Delete</span>
+                        </button>
+                      )}
+                    </>
+                  )}
+                </div>
+              </div>
+
+              {/* Transcript */}
+              <div className="space-y-2">
+                {(() => {
+                  const segments = conversation.segments || []
+
+                  return (
+                    <>
+                      {/* Transcript Header with Expand/Collapse */}
+                      <div
+                        className="flex items-center justify-between cursor-pointer p-2 rounded-lg hover:bg-gray-100 dark:hover:bg-gray-600 transition-colors"
+                        onClick={() => conversation.conversation_id && toggleTranscriptExpansion(conversation.conversation_id)}
+                      >
+                        <h3 className="font-medium text-gray-900 dark:text-gray-100">
+                          Transcript {(segments.length > 0 || conversation.segment_count) && (
+                            <span className="text-sm text-gray-500 dark:text-gray-400 ml-1">
+                              ({segments.length || conversation.segment_count || 0} segments)
+                            </span>
+                          )}
+                        </h3>
+                        <div className="flex items-center space-x-2">
+                          {conversation.conversation_id && expandedTranscripts.has(conversation.conversation_id) ? (
+                            <ChevronUp className="h-5 w-5 text-gray-500 dark:text-gray-400 transition-transform duration-200" />
+                          ) : (
+                            <ChevronDown className="h-5 w-5 text-gray-500 dark:text-gray-400 transition-transform duration-200" />
+                          )}
+                        </div>
+                      </div>
+
+                      {/* Transcript Content - Conditionally Rendered */}
+                      {conversation.conversation_id && expandedTranscripts.has(conversation.conversation_id) && (
+                        <div className="animate-in slide-in-from-top-2 duration-300 ease-out space-y-4">
+                          {segments.length > 0 ? (
+                            <div className="p-4 bg-white dark:bg-gray-800 rounded-lg border border-gray-200 dark:border-gray-600">
+                              <div className="space-y-1">
+                                {segments.map((segment, index) => {
+                                  const speaker = segment.speaker || 'Unknown'
+                                  return (
+                                    <div
+                                      key={index}
+                                      className="text-sm leading-relaxed flex items-start space-x-2 py-1 px-2 rounded hover:bg-gray-50 dark:hover:bg-gray-700"
+                                    >
+                                      <div className="flex-1 min-w-0">
+                                        <span className="font-medium text-blue-600 dark:text-blue-400">
+                                          {speaker}:
+                                        </span>
+                                        <span className="text-gray-900 dark:text-gray-100 ml-1">
+                                          {segment.text}
+                                        </span>
+                                      </div>
+                                    </div>
+                                  )
+                                })}
+                              </div>
+                            </div>
+                          ) : (
+                            <div className="text-sm text-gray-500 dark:text-gray-400 italic p-4 bg-white dark:bg-gray-800 rounded-lg border border-gray-200 dark:border-gray-600">
+                              No transcript available
+                            </div>
+                          )}
+                        </div>
+                      )}
+                    </>
+                  )
+                })()}
+              </div>
+            </div>
+          ))
+        )}
+      </div>
+    </div>
+  )
+}
diff --git a/backends/advanced/webui/src/pages/Conversations.tsx b/backends/advanced/webui/src/pages/Conversations.tsx
index d4b76ed3..ef57e738 100644
--- a/backends/advanced/webui/src/pages/Conversations.tsx
+++ b/backends/advanced/webui/src/pages/Conversations.tsx
@@ -1,12 +1,13 @@
-import { useState, useEffect, useRef } from 'react'
-import { MessageSquare, RefreshCw, Calendar, User, Play, Pause, MoreVertical, RotateCcw, Zap, ChevronDown, ChevronUp, Trash2 } from 'lucide-react'
-import { conversationsApi, BACKEND_URL } from '../services/api'
+import { useState, useEffect, useRef, useCallback, useMemo } from 'react'
+import { MessageSquare, RefreshCw, Calendar, User, Play, Pause, MoreVertical, RotateCcw, Zap, ChevronDown, ChevronUp, Trash2, Save, X, Check } from 'lucide-react'
+import { conversationsApi, annotationsApi, speakerApi, BACKEND_URL } from '../services/api'
 import ConversationVersionHeader from '../components/ConversationVersionHeader'
 import { getStorageKey } from '../utils/storage'
+import { WaveformDisplay } from '../components/audio/WaveformDisplay'
+import SpeakerNameDropdown from '../components/SpeakerNameDropdown'
 
 interface Conversation {
-  conversation_id?: string
-  audio_uuid: string
+  conversation_id: string
   title?: string
   summary?: string
   detailed_summary?: string
@@ -14,8 +15,8 @@ interface Conversation {
   client_id: string
   segment_count?: number  // From list endpoint
   memory_count?: number  // From list endpoint
-  audio_path?: string
-  cropped_audio_path?: string
+  audio_chunks_count?: number  // Number of MongoDB audio chunks
+  audio_total_duration?: number  // Total duration in seconds
   duration_seconds?: number
   has_memory?: boolean
   transcript?: string  // From detail endpoint
@@ -30,6 +31,8 @@ interface Conversation {
   active_memory_version?: string
   transcript_version_count?: number
   memory_version_count?: number
+  active_transcript_version_number?: number
+  active_memory_version_number?: number
   deleted?: boolean
   deletion_reason?: string
   deleted_at?: string
@@ -61,19 +64,112 @@ export default function Conversations() {
   const [expandedDetailedSummaries, setExpandedDetailedSummaries] = useState<Set<string>>(new Set())
   // Audio playback state
   const [playingSegment, setPlayingSegment] = useState<string | null>(null) // Format: "audioUuid-segmentIndex"
+  const [audioCurrentTime, setAudioCurrentTime] = useState<{ [conversationId: string]: number }>({})
   const audioRefs = useRef<{ [key: string]: HTMLAudioElement }>({})
-  const segmentTimerRef = useRef<number | null>(null)
 
   // Reprocessing state
   const [openDropdown, setOpenDropdown] = useState<string | null>(null)
   const [reprocessingTranscript, setReprocessingTranscript] = useState<Set<string>>(new Set())
   const [reprocessingMemory, setReprocessingMemory] = useState<Set<string>>(new Set())
+  const [reprocessingSpeakers, setReprocessingSpeakers] = useState<Set<string>>(new Set())
   const [deletingConversation, setDeletingConversation] = useState<Set<string>>(new Set())
 
+  // Transcript segment editing state
+  const [editingSegment, setEditingSegment] = useState<string | null>(null) // Format: "conversationId-segmentIndex"
+  const [editedSegmentText, setEditedSegmentText] = useState<string>('')
+  const [savingSegment, setSavingSegment] = useState<boolean>(false)
+  const [segmentEditError, setSegmentEditError] = useState<string | null>(null)
+
+  // Diarization annotation state
+  const [enrolledSpeakers, setEnrolledSpeakers] = useState<Array<{speaker_id: string, name: string}>>([])
+  const [diarizationAnnotations, setDiarizationAnnotations] = useState<Map<string, any[]>>(new Map()) // conversationId -> annotations[]
+
+  // Transcript annotation state
+  const [transcriptAnnotations, setTranscriptAnnotations] = useState<Map<string, any[]>>(new Map()) // conversationId -> annotations[]
+
+  // Unified apply state
+  const [applyingAnnotations, setApplyingAnnotations] = useState<Set<string>>(new Set())
+
+  // Compute merged speaker list that includes speakers from annotations
+  // This ensures newly created speaker names appear in all dropdowns immediately
+  const allSpeakers = useMemo(() => {
+    const speakers = [...enrolledSpeakers]
+    const existingNames = new Set(speakers.map(s => s.name))
+    
+    // Add speakers from all diarization annotations
+    diarizationAnnotations.forEach((annotations) => {
+      annotations.forEach(a => {
+        if (a.corrected_speaker && !existingNames.has(a.corrected_speaker)) {
+          speakers.push({ speaker_id: `annotation_${a.corrected_speaker}`, name: a.corrected_speaker })
+          existingNames.add(a.corrected_speaker)
+        }
+      })
+    })
+    return speakers
+  }, [enrolledSpeakers, diarizationAnnotations])
+
+  // Stable seek handler for waveform click-to-seek
+  const handleSeek = useCallback((conversationId: string, time: number) => {
+    console.log(`🎯 handleSeek called: conversationId=${conversationId}, time=${time.toFixed(2)}s`);
+
+    const audioElement = audioRefs.current[conversationId];
+
+    if (!audioElement) {
+      console.error(`❌ Audio element not found for conversation ${conversationId}`);
+      console.log('Available audio refs:', Object.keys(audioRefs.current));
+      return;
+    }
+
+    console.log(`📍 Audio element found, readyState=${audioElement.readyState}, paused=${audioElement.paused}`);
+
+    // Check if audio is ready for seeking (readyState >= 1 means HAVE_METADATA)
+    if (audioElement.readyState < 1) {
+      console.warn(`⚠️ Audio not ready for seeking (readyState=${audioElement.readyState})`);
+      // Try again after metadata loads
+      audioElement.addEventListener('loadedmetadata', () => {
+        console.log('✅ Metadata loaded, retrying seek');
+        audioElement.currentTime = time;
+      }, { once: true });
+      return;
+    }
+
+    try {
+      // Force a small delay to ensure audio is ready
+      const wasPlaying = !audioElement.paused;
+
+      // Pause before seeking (helps with seeking reliability)
+      if (wasPlaying) {
+        audioElement.pause();
+      }
+
+      // Set the seek position
+      audioElement.currentTime = time;
+
+      // Verify the seek worked
+      setTimeout(() => {
+        console.log(`✅ Seek complete: requested=${time.toFixed(2)}s, actual=${audioElement.currentTime.toFixed(2)}s`);
+
+        if (Math.abs(audioElement.currentTime - time) > 1.0) {
+          console.error(`⚠️ Seek failed! Requested ${time.toFixed(2)}s but got ${audioElement.currentTime.toFixed(2)}s`);
+        }
+      }, 100);
+
+      // Resume playback if it was playing
+      if (wasPlaying) {
+        audioElement.play().catch(err => {
+          console.warn('Could not resume playback after seek:', err);
+        });
+      }
+    } catch (err) {
+      console.error('❌ Seek failed:', err);
+    }
+  }, []); // Empty deps - uses ref which is always stable
+
   const loadConversations = async () => {
     try {
       setLoading(true)
-      const response = await conversationsApi.getAll()
+      // Exclude deleted conversations from main view
+      const response = await conversationsApi.getAll(false)
       // API now returns a flat list with client_id as a field
       const conversationsList = response.data.conversations || []
       setConversations(conversationsList)
@@ -85,8 +181,97 @@ export default function Conversations() {
     }
   }
 
+  const loadEnrolledSpeakers = async () => {
+    try {
+      const response = await speakerApi.getEnrolledSpeakers()
+      setEnrolledSpeakers(response.data.speakers || [])
+    } catch (err: any) {
+      console.error('Failed to load enrolled speakers:', err)
+    }
+  }
+
+  const loadDiarizationAnnotations = async (conversationId: string) => {
+    try {
+      const response = await annotationsApi.getDiarizationAnnotations(conversationId)
+      setDiarizationAnnotations(prev => new Map(prev).set(conversationId, response.data))
+    } catch (err: any) {
+      console.error('Failed to load diarization annotations:', err)
+    }
+  }
+
+  const loadTranscriptAnnotations = async (conversationId: string) => {
+    try {
+      const response = await annotationsApi.getTranscriptAnnotations(conversationId)
+      setTranscriptAnnotations(prev => new Map(prev).set(conversationId, response.data))
+    } catch (err: any) {
+      console.error('Failed to load transcript annotations:', err)
+    }
+  }
+
+  const handleSpeakerChange = async (conversationId: string, segmentIndex: number, originalSpeaker: string, newSpeaker: string, segmentStartTime: number) => {
+    try {
+      await annotationsApi.createDiarizationAnnotation({
+        conversation_id: conversationId,
+        segment_index: segmentIndex,
+        original_speaker: originalSpeaker,
+        corrected_speaker: newSpeaker,
+        segment_start_time: segmentStartTime,
+      })
+      
+      // Temporarily add new speaker name to enrolledSpeakers if it doesn't exist
+      // This makes it immediately available in all dropdowns without requiring a backend reload
+      setEnrolledSpeakers(prev => {
+        const speakerExists = prev.some(speaker => speaker.name === newSpeaker)
+        if (!speakerExists) {
+          // Generate a temporary speaker_id for in-memory use
+          const tempSpeakerId = `temp_${Date.now()}_${newSpeaker.replace(/\s+/g, '_')}`
+          return [...prev, { speaker_id: tempSpeakerId, name: newSpeaker }]
+        }
+        return prev
+      })
+      
+      // Reload annotations for this conversation
+      await loadDiarizationAnnotations(conversationId)
+    } catch (err: any) {
+      console.error('Failed to create annotation:', err)
+      setError('Failed to create speaker annotation')
+    }
+  }
+
+  const handleApplyAllAnnotations = async (conversationId: string) => {
+    try {
+      setApplyingAnnotations(prev => new Set(prev).add(conversationId))
+      setOpenDropdown(null)
+
+      const response = await annotationsApi.applyAllAnnotations(conversationId)
+
+      if (response.status === 200) {
+        const data = response.data
+        console.log(`Applied ${data.diarization_count} diarization and ${data.transcript_count} transcript annotations`)
+
+        // Refresh conversation to show new version
+        await loadConversations()
+
+        // Reload annotations (should be empty now)
+        await loadDiarizationAnnotations(conversationId)
+        await loadTranscriptAnnotations(conversationId)
+      } else {
+        setError(`Failed to apply annotations: ${response.data?.error || 'Unknown error'}`)
+      }
+    } catch (err: any) {
+      setError(`Error applying annotations: ${err.message || 'Unknown error'}`)
+    } finally {
+      setApplyingAnnotations(prev => {
+        const newSet = new Set(prev)
+        newSet.delete(conversationId)
+        return newSet
+      })
+    }
+  }
+
   useEffect(() => {
     loadConversations()
+    loadEnrolledSpeakers()
   }, [])
 
   // Close dropdown when clicking outside
@@ -182,6 +367,40 @@ export default function Conversations() {
     }
   }
 
+  const handleReprocessSpeakers = async (conversation: Conversation) => {
+    try {
+      if (!conversation.conversation_id) {
+        setError('Cannot reprocess speakers: Conversation ID is missing. This conversation may be from an older format.')
+        return
+      }
+
+      setReprocessingSpeakers(prev => new Set(prev).add(conversation.conversation_id!))
+      setOpenDropdown(null)
+
+      const response = await conversationsApi.reprocessSpeakers(
+        conversation.conversation_id,
+        'active'  // Use active transcript version as source
+      )
+
+      if (response.status === 200) {
+        // Refresh conversations to show new version with updated speakers
+        await loadConversations()
+      } else {
+        setError(`Failed to start speaker reprocessing: ${response.data?.error || 'Unknown error'}`)
+      }
+    } catch (err: any) {
+      setError(`Error starting speaker reprocessing: ${err.message || 'Unknown error'}`)
+    } finally {
+      if (conversation.conversation_id) {
+        setReprocessingSpeakers(prev => {
+          const newSet = new Set(prev)
+          newSet.delete(conversation.conversation_id!)
+          return newSet
+        })
+      }
+    }
+  }
+
   const handleDeleteConversation = async (conversationId: string) => {
     try {
       const confirmed = window.confirm('Are you sure you want to delete this conversation? This action cannot be undone.')
@@ -209,6 +428,69 @@ export default function Conversations() {
     }
   }
 
+  // Transcript segment editing handlers
+  const handleStartSegmentEdit = (conversationId: string, segmentIndex: number, originalText: string) => {
+    const segmentKey = `${conversationId}-${segmentIndex}`
+    setEditingSegment(segmentKey)
+    setEditedSegmentText(originalText)
+    setSegmentEditError(null)
+  }
+
+  const handleSaveSegmentEdit = async (conversationId: string, segmentIndex: number, originalText: string) => {
+    if (!editedSegmentText.trim()) {
+      setSegmentEditError('Segment text cannot be empty')
+      return
+    }
+
+    if (editedSegmentText === originalText) {
+      // No changes, just cancel
+      handleCancelSegmentEdit()
+      return
+    }
+
+    try {
+      setSavingSegment(true)
+      setSegmentEditError(null)
+
+      // Create annotation (NOT applied immediately)
+      await annotationsApi.createTranscriptAnnotation({
+        conversation_id: conversationId,
+        segment_index: segmentIndex,
+        original_text: originalText,
+        corrected_text: editedSegmentText
+      })
+
+      // Exit edit mode
+      setEditingSegment(null)
+      setEditedSegmentText('')
+
+      // Reload transcript annotations to show pending badge
+      await loadTranscriptAnnotations(conversationId)
+
+    } catch (err: any) {
+      console.error('Error saving segment edit:', err)
+      setSegmentEditError(err.response?.data?.detail || err.message || 'Failed to save segment edit')
+    } finally {
+      setSavingSegment(false)
+    }
+  }
+
+  const handleCancelSegmentEdit = () => {
+    setEditingSegment(null)
+    setEditedSegmentText('')
+    setSegmentEditError(null)
+  }
+
+  const handleSegmentKeyDown = (e: React.KeyboardEvent<HTMLTextAreaElement>, conversationId: string, segmentIndex: number, originalText: string) => {
+    if (e.key === 'Enter' && (e.ctrlKey || e.metaKey)) {
+      e.preventDefault()
+      handleSaveSegmentEdit(conversationId, segmentIndex, originalText)
+    } else if (e.key === 'Escape') {
+      e.preventDefault()
+      handleCancelSegmentEdit()
+    }
+  }
+
   const toggleDetailedSummary = async (conversationId: string) => {
     // If already expanded, just collapse
     if (expandedDetailedSummaries.has(conversationId)) {
@@ -286,6 +568,10 @@ export default function Conversations() {
             ? { ...c, ...response.data.conversation }
             : c
         ))
+        // Load diarization annotations for this conversation
+        await loadDiarizationAnnotations(conversationId)
+        // Load transcript annotations for this conversation
+        await loadTranscriptAnnotations(conversationId)
         // Expand the transcript
         setExpandedTranscripts(prev => new Set(prev).add(conversationId))
       }
@@ -295,52 +581,43 @@ export default function Conversations() {
     }
   }
 
-  const handleSegmentPlayPause = (conversationId: string, segmentIndex: number, segment: any, useCropped: boolean) => {
+  const handleSegmentPlayPause = (conversationId: string, segmentIndex: number, segment: any) => {
     const segmentId = `${conversationId}-${segmentIndex}`;
-    // Include cropped flag in cache key to handle mode switches
-    const audioKey = `${conversationId}-${useCropped ? 'cropped' : 'original'}`;
 
     // If this segment is already playing, pause it
     if (playingSegment === segmentId) {
-      const audio = audioRefs.current[audioKey];
+      const audio = audioRefs.current[segmentId];
       if (audio) {
         audio.pause();
       }
-      if (segmentTimerRef.current) {
-        window.clearTimeout(segmentTimerRef.current);
-        segmentTimerRef.current = null;
-      }
       setPlayingSegment(null);
       return;
     }
 
     // Stop any currently playing segment
     if (playingSegment) {
-      // Stop all audio elements (could be playing from different mode)
-      Object.values(audioRefs.current).forEach(audio => {
-        audio.pause();
-      });
-      if (segmentTimerRef.current) {
-        window.clearTimeout(segmentTimerRef.current);
-        segmentTimerRef.current = null;
+      const currentAudio = audioRefs.current[playingSegment];
+      if (currentAudio) {
+        currentAudio.pause();
       }
     }
 
-    // Get or create audio element for this conversation + mode combination
-    let audio = audioRefs.current[audioKey];
+    // Get or create audio element for this specific segment
+    let audio = audioRefs.current[segmentId];
 
-    // Check if we need to create a new audio element (none exists or previous had error)
+    // Create new audio element with segment-specific URL
     if (!audio || audio.error) {
       const token = localStorage.getItem(getStorageKey('token')) || '';
-      const audioUrl = `${BACKEND_URL}/api/audio/get_audio/${conversationId}?cropped=${useCropped}&token=${token}`;
-      console.log('Creating audio element with URL:', audioUrl);
-      console.log('Token present:', !!token, 'Token length:', token.length);
+      // Use chunks endpoint with time range for instant loading (only fetches needed chunks)
+      const audioUrl = `${BACKEND_URL}/api/audio/chunks/${conversationId}?start_time=${segment.start}&end_time=${segment.end}&token=${token}`;
+      console.log('Creating segment audio element with URL:', audioUrl);
+      console.log('Segment range:', segment.start, 'to', segment.end, '(duration:', segment.end - segment.start, 'seconds)');
       audio = new Audio(audioUrl);
-      audioRefs.current[audioKey] = audio;
+      audioRefs.current[segmentId] = audio;
 
       // Add error listener for debugging
       audio.addEventListener('error', () => {
-        console.error('Audio element error:', audio.error?.code, audio.error?.message);
+        console.error('Audio segment error:', audio.error?.code, audio.error?.message);
         console.error('Audio src:', audio.src);
       });
 
@@ -350,19 +627,10 @@ export default function Conversations() {
       });
     }
 
-    // Set the start time and play
+    // Play the segment (no need to seek since audio is already trimmed to exact range)
     console.log('Playing segment:', segment.start, 'to', segment.end);
-    audio.currentTime = segment.start;
     audio.play().then(() => {
       setPlayingSegment(segmentId);
-
-      // Set a timer to stop at the segment end time
-      const duration = (segment.end - segment.start) * 1000; // Convert to milliseconds
-      segmentTimerRef.current = window.setTimeout(() => {
-        audio.pause();
-        setPlayingSegment(null);
-        segmentTimerRef.current = null;
-      }, duration);
     }).catch(err => {
       console.error('Error playing audio segment:', err);
       setPlayingSegment(null);
@@ -372,13 +640,10 @@ export default function Conversations() {
   // Cleanup audio on unmount
   useEffect(() => {
     return () => {
-      // Stop all audio and clear timers
+      // Stop all audio elements
       Object.values(audioRefs.current).forEach(audio => {
         audio.pause();
       });
-      if (segmentTimerRef.current) {
-        window.clearTimeout(segmentTimerRef.current);
-      }
     };
   }, [])
 
@@ -446,46 +711,19 @@ export default function Conversations() {
         ) : (
           conversations.map((conversation) => (
             <div
-              key={conversation.conversation_id || conversation.audio_uuid}
-              className={`rounded-lg p-6 border ${
-                conversation.deleted
-                  ? 'bg-red-50 dark:bg-red-900/20 border-red-300 dark:border-red-700'
-                  : 'bg-gray-50 dark:bg-gray-700 border-gray-200 dark:border-gray-600'
-              }`}
+              key={conversation.conversation_id}
+              className="rounded-lg p-6 border bg-gray-50 dark:bg-gray-700 border-gray-200 dark:border-gray-600"
             >
-              {/* Deleted Conversation Warning */}
-              {conversation.deleted && (
-                <div className="mb-4 p-3 bg-red-100 dark:bg-red-900/40 rounded-lg border border-red-300 dark:border-red-700">
-                  <div className="flex items-start space-x-2">
-                    <Trash2 className="h-5 w-5 text-red-600 dark:text-red-400 mt-0.5 flex-shrink-0" />
-                    <div className="flex-1">
-                      <p className="font-semibold text-red-800 dark:text-red-300 text-sm">Processing Failed</p>
-                      <p className="text-xs text-red-700 dark:text-red-400 mt-1">
-                        Reason: {conversation.deletion_reason === 'no_meaningful_speech'
-                          ? 'No meaningful speech detected'
-                          : conversation.deletion_reason === 'audio_file_not_ready'
-                          ? 'Audio file not saved (possible Bluetooth disconnect)'
-                          : conversation.deletion_reason || 'Unknown'}
-                      </p>
-                      {conversation.deleted_at && (
-                        <p className="text-xs text-red-600 dark:text-red-500 mt-1">
-                          Deleted at: {new Date(conversation.deleted_at).toLocaleString()}
-                        </p>
-                      )}
-                    </div>
-                  </div>
-                </div>
-              )}
-
-              {/* Version Selector Header - Only show for conversations with conversation_id */}
-              {conversation.conversation_id && !conversation.deleted && (
-                <ConversationVersionHeader
-                  conversationId={conversation.conversation_id}
+              {/* Version Selector Header */}
+              <ConversationVersionHeader
+                conversationId={conversation.conversation_id}
                   versionInfo={{
                     transcript_count: conversation.transcript_version_count || 0,
                     memory_count: conversation.memory_version_count || 0,
                     active_transcript_version: conversation.active_transcript_version,
-                    active_memory_version: conversation.active_memory_version
+                    active_memory_version: conversation.active_memory_version,
+                    active_transcript_version_number: conversation.active_transcript_version_number,
+                    active_memory_version_number: conversation.active_memory_version_number
                   }}
                   onVersionChange={async () => {
                     // Update only this specific conversation without reloading all conversations
@@ -506,8 +744,7 @@ export default function Conversations() {
                     }
                   }}
                 />
-              )}
-              
+
               {/* Conversation Header */}
               <div className="flex justify-between items-start mb-4">
                 <div className="flex flex-col space-y-2">
@@ -569,8 +806,7 @@ export default function Conversations() {
                   <button
                     onClick={(e) => {
                       e.stopPropagation()
-                      const dropdownKey = conversation.conversation_id || conversation.audio_uuid
-                      setOpenDropdown(openDropdown === dropdownKey ? null : dropdownKey)
+                      setOpenDropdown(openDropdown === conversation.conversation_id ? null : conversation.conversation_id)
                     }}
                     className="p-1 rounded-full hover:bg-gray-200 dark:hover:bg-gray-600 transition-colors"
                     title="Conversation options"
@@ -579,7 +815,7 @@ export default function Conversations() {
                   </button>
 
                   {/* Dropdown Menu */}
-                  {openDropdown === (conversation.conversation_id || conversation.audio_uuid) && (
+                  {openDropdown === conversation.conversation_id && (
                     <div className="absolute right-0 top-8 w-48 bg-white dark:bg-gray-800 rounded-lg shadow-lg border border-gray-200 dark:border-gray-600 py-2 z-10">
                       <button
                         onClick={() => handleReprocessTranscript(conversation)}
@@ -611,6 +847,59 @@ export default function Conversations() {
                           <span className="text-xs text-red-500 ml-1">(ID missing)</span>
                         )}
                       </button>
+                      <button
+                        onClick={() => handleReprocessSpeakers(conversation)}
+                        disabled={!conversation.conversation_id || reprocessingSpeakers.has(conversation.conversation_id)}
+                        className="w-full text-left px-4 py-2 text-sm text-gray-700 dark:text-gray-300 hover:bg-gray-100 dark:hover:bg-gray-700 flex items-center space-x-2 disabled:opacity-50 disabled:cursor-not-allowed"
+                        title="Create new transcript version with re-identified speakers (automatically updates memories)"
+                      >
+                        {conversation.conversation_id && reprocessingSpeakers.has(conversation.conversation_id) ? (
+                          <RefreshCw className="h-4 w-4 animate-spin" />
+                        ) : (
+                          <User className="h-4 w-4" />
+                        )}
+                        <span>Reprocess Who Spoke</span>
+                        {!conversation.conversation_id && (
+                          <span className="text-xs text-red-500 ml-1">(ID missing)</span>
+                        )}
+                      </button>
+                      <div className="border-t border-gray-200 dark:border-gray-600 my-1"></div>
+
+                      {/* Apply All Annotations Button */}
+                      {(() => {
+                        const diarAnnotations = diarizationAnnotations.get(conversation.conversation_id!) || []
+                        const transcriptAnnots = transcriptAnnotations.get(conversation.conversation_id!) || []
+
+                        const diarPending = diarAnnotations.filter(a => !a.processed).length
+                        const transcriptPending = transcriptAnnots.filter(a => !a.processed).length
+                        const totalPending = diarPending + transcriptPending
+
+                        if (totalPending === 0) return null
+
+                        return (
+                          <button
+                            onClick={() => handleApplyAllAnnotations(conversation.conversation_id!)}
+                            disabled={!conversation.conversation_id || applyingAnnotations.has(conversation.conversation_id!)}
+                            className="w-full text-left px-4 py-2 text-sm text-blue-700 dark:text-blue-300 hover:bg-gray-100 dark:hover:bg-gray-700 flex items-center space-x-2 disabled:opacity-50 disabled:cursor-not-allowed font-medium"
+                            title={`Apply ${diarPending} speaker and ${transcriptPending} text corrections`}
+                          >
+                            {conversation.conversation_id && applyingAnnotations.has(conversation.conversation_id!) ? (
+                              <RefreshCw className="h-4 w-4 animate-spin" />
+                            ) : (
+                              <Check className="h-4 w-4" />
+                            )}
+                            <span>
+                              Apply Changes ({totalPending})
+                              {diarPending > 0 && transcriptPending > 0 && (
+                                <span className="text-xs ml-1 text-gray-500">
+                                  ({diarPending} speaker, {transcriptPending} text)
+                                </span>
+                              )}
+                            </span>
+                          </button>
+                        )
+                      })()}
+
                       <div className="border-t border-gray-200 dark:border-gray-600 my-1"></div>
                       <button
                         onClick={() => conversation.conversation_id && handleDeleteConversation(conversation.conversation_id)}
@@ -632,36 +921,55 @@ export default function Conversations() {
                 </div>
               </div>
 
-              {/* Audio Player */}
+              {/* Audio Player with Waveform */}
               <div className="mb-4">
                 <div className="space-y-2">
-                  {(conversation.audio_path || conversation.cropped_audio_path) && (
+                  {(conversation.audio_chunks_count && conversation.audio_chunks_count > 0) && (
                     <>
                       <div className="flex items-center space-x-2 text-sm text-gray-700 dark:text-gray-300">
                         <span className="font-medium">
-                          {debugMode ? '🔧 Original Audio' : '🎵 Audio'}
-                          {debugMode && conversation.cropped_audio_path && ' (Debug Mode)'}
+                          🎵 Audio
                         </span>
                       </div>
+
+                      {/* Waveform Visualization */}
+                      {conversation.conversation_id && conversation.audio_total_duration && (
+                        <WaveformDisplay
+                          conversationId={conversation.conversation_id}
+                          duration={conversation.audio_total_duration}
+                          currentTime={conversation.conversation_id ? audioCurrentTime[conversation.conversation_id] : undefined}
+                          onSeek={(time) => handleSeek(conversation.conversation_id!, time)}
+                          height={80}
+                        />
+                      )}
+
+                      {/* Audio Player */}
                       <audio
+                        ref={(el) => {
+                          if (el && conversation.conversation_id) {
+                            audioRefs.current[conversation.conversation_id] = el;
+                          }
+                        }}
                         controls
                         className="w-full h-10"
                         preload="metadata"
                         style={{ minWidth: '300px' }}
-                        src={`${BACKEND_URL}/api/audio/get_audio/${conversation.conversation_id}?cropped=${!debugMode}&token=${localStorage.getItem(getStorageKey('token')) || ''}`}
+                        src={`${BACKEND_URL}/api/audio/get_audio/${conversation.conversation_id}?token=${localStorage.getItem(getStorageKey('token')) || ''}`}
+                        onTimeUpdate={(e) => {
+                          // Extract currentTime IMMEDIATELY before any async operations
+                          const currentTime = e.currentTarget?.currentTime;
+                          const conversationId = conversation.conversation_id;
+
+                          if (conversationId && currentTime !== undefined) {
+                            setAudioCurrentTime(prev => ({
+                              ...prev,
+                              [conversationId]: currentTime
+                            }));
+                          }
+                        }}
                       >
                         Your browser does not support the audio element.
                       </audio>
-                      {debugMode && conversation.cropped_audio_path && (
-                        <div className="text-xs text-gray-500 dark:text-gray-400">
-                          💡 Cropped version available: {conversation.cropped_audio_path}
-                        </div>
-                      )}
-                      {!debugMode && conversation.cropped_audio_path && (
-                        <div className="text-xs text-gray-500 dark:text-gray-400">
-                          💡 Enable debug mode to hear original with silence
-                        </div>
-                      )}
                     </>
                   )}
                 </div>
@@ -719,26 +1027,23 @@ export default function Conversations() {
                                   // Render the transcript
                                   return segments.map((segment, index) => {
                           const speaker = segment.speaker || 'Unknown'
-                          const speakerColor = speakerColorMap[speaker]
                           // Use conversation_id for unique segment IDs
-                          const conversationKey = conversation.conversation_id || conversation.audio_uuid
-                          const segmentId = `${conversationKey}-${index}`
+                          const segmentId = `${conversation.conversation_id}-${index}`
                           const isPlaying = playingSegment === segmentId
-                          const hasAudio = conversation.cropped_audio_path || conversation.audio_path
-                          // Use cropped audio only if available and not in debug mode
-                          const useCropped = !debugMode && !!conversation.cropped_audio_path
+                          const hasAudio = !!conversation.audio_chunks_count && conversation.audio_chunks_count > 0
+                          const isEditing = editingSegment === segmentId
 
                           return (
                             <div
                               key={index}
                               className={`text-sm leading-relaxed flex items-start space-x-2 py-1 px-2 rounded transition-colors ${
-                                isPlaying ? 'bg-blue-50 dark:bg-blue-900/20' : 'hover:bg-gray-50 dark:hover:bg-gray-700'
+                                isPlaying ? 'bg-blue-50 dark:bg-blue-900/20' : isEditing ? 'bg-yellow-50 dark:bg-yellow-900/20' : 'hover:bg-gray-50 dark:hover:bg-gray-700'
                               }`}
                             >
                               {/* Play/Pause Button */}
-                              {hasAudio && (
+                              {hasAudio && !isEditing && (
                                 <button
-                                  onClick={() => handleSegmentPlayPause(conversationKey, index, segment, useCropped)}
+                                  onClick={() => handleSegmentPlayPause(conversation.conversation_id, index, segment)}
                                   className={`flex-shrink-0 w-5 h-5 rounded-full flex items-center justify-center transition-colors mt-0.5 ${
                                     isPlaying
                                       ? 'bg-blue-600 text-white hover:bg-blue-700'
@@ -760,12 +1065,112 @@ export default function Conversations() {
                                     [start: {segment.start.toFixed(1)}s, end: {segment.end.toFixed(1)}s, duration: {formatDuration(segment.start, segment.end)}]
                                   </span>
                                 )}
-                                <span className={`font-medium ${speakerColor}`}>
-                                  {speaker}:
-                                </span>
-                                <span className="text-gray-900 dark:text-gray-100 ml-1">
-                                  {segment.text}
-                                </span>
+
+                                {/* Speaker Name - Clickable Dropdown for Annotation */}
+                                {(() => {
+                                  const conversationAnnotations = diarizationAnnotations.get(conversation.conversation_id!) || []
+                                  const annotation = conversationAnnotations.find(a => a.segment_index === index && !a.processed)
+                                  const speakerColor = speakerColorMap[speaker]
+
+                                  // Always show dropdown, but use corrected speaker if annotation exists
+                                  // This allows users to edit annotations even after creating them
+                                  const currentSpeaker = annotation ? annotation.corrected_speaker : speaker
+                                  const originalSpeaker = annotation ? annotation.original_speaker : speaker
+
+                                  return (
+                                    <span className="inline-flex items-center space-x-1">
+                                      {annotation && (
+                                        <span className="text-xs bg-orange-100 dark:bg-orange-900 text-orange-600 dark:text-orange-300 px-2 py-0.5 rounded" title="Pending annotation">
+                                          Pending
+                                        </span>
+                                      )}
+                                      <SpeakerNameDropdown
+                                        currentSpeaker={currentSpeaker}
+                                        enrolledSpeakers={allSpeakers}
+                                        onSpeakerChange={(newSpeaker) =>
+                                          handleSpeakerChange(conversation.conversation_id!, index, originalSpeaker, newSpeaker, segment.start)
+                                        }
+                                        segmentIndex={index}
+                                        conversationId={conversation.conversation_id!}
+                                        annotated={!!annotation}
+                                        speakerColor={annotation ? 'text-green-600 dark:text-green-400' : speakerColor}
+                                      />
+                                      <span>:</span>
+                                    </span>
+                                  )
+                                })()}
+
+                                {/* Segment Text - Show pending edit indicator or editable */}
+                                {(() => {
+                                  const transcriptAnnots = transcriptAnnotations.get(conversation.conversation_id!) || []
+                                  const textAnnotation = transcriptAnnots.find(
+                                    a => a.segment_index === index && !a.processed
+                                  )
+
+                                  if (textAnnotation && !isEditing) {
+                                    // Show pending text edit - corrected text is clickable like normal text
+                                    return (
+                                      <span className="inline-flex items-start space-x-2 ml-1">
+                                        <span className="line-through text-gray-400">{textAnnotation.original_text}</span>
+                                        <span>→</span>
+                                        <span
+                                          onClick={() => conversation.conversation_id && handleStartSegmentEdit(conversation.conversation_id, index, textAnnotation.corrected_text)}
+                                          className="text-blue-600 dark:text-blue-400 cursor-pointer hover:bg-yellow-100 dark:hover:bg-yellow-900/30 px-1 rounded transition-colors"
+                                          title="Click to edit segment"
+                                        >
+                                          {textAnnotation.corrected_text}
+                                        </span>
+                                        <span className="text-xs bg-blue-100 dark:bg-blue-900 text-blue-600 dark:text-blue-300 px-2 py-0.5 rounded">Pending</span>
+                                      </span>
+                                    )
+                                  } else if (isEditing) {
+                                    // Show edit textarea
+                                    return (
+                                      <div className="ml-1 space-y-2">
+                                        <textarea
+                                          value={editedSegmentText}
+                                          onChange={(e) => setEditedSegmentText(e.target.value)}
+                                          onKeyDown={(e) => handleSegmentKeyDown(e, conversation.conversation_id, index, segment.text)}
+                                          className="w-full min-h-[60px] px-3 py-2 text-sm border-2 border-blue-500 rounded-lg focus:outline-none focus:ring-2 focus:ring-blue-500 bg-white dark:bg-gray-700 text-gray-900 dark:text-gray-100"
+                                          autoFocus
+                                          disabled={savingSegment}
+                                        />
+                                        <div className="flex items-center gap-2">
+                                          <button
+                                            onClick={() => handleSaveSegmentEdit(conversation.conversation_id, index, segment.text)}
+                                            disabled={savingSegment || editedSegmentText === segment.text}
+                                            className="inline-flex items-center gap-1 px-3 py-1 text-xs font-medium text-white bg-blue-600 rounded-lg hover:bg-blue-700 disabled:opacity-50 disabled:cursor-not-allowed transition-colors"
+                                          >
+                                            <Save className="w-3 h-3" />
+                                            {savingSegment ? 'Saving...' : 'Save'}
+                                          </button>
+                                          <button
+                                            onClick={handleCancelSegmentEdit}
+                                            disabled={savingSegment}
+                                            className="inline-flex items-center gap-1 px-3 py-1 text-xs font-medium text-gray-700 dark:text-gray-300 bg-gray-200 dark:bg-gray-600 rounded-lg hover:bg-gray-300 dark:hover:bg-gray-500 disabled:opacity-50 disabled:cursor-not-allowed transition-colors"
+                                          >
+                                            <X className="w-3 h-3" />
+                                            Cancel
+                                          </button>
+                                          {segmentEditError && (
+                                            <span className="text-xs text-red-600 dark:text-red-400">{segmentEditError}</span>
+                                          )}
+                                        </div>
+                                      </div>
+                                    )
+                                  } else {
+                                    // Show normal text (clickable to edit)
+                                    return (
+                                      <span
+                                        onClick={() => conversation.conversation_id && handleStartSegmentEdit(conversation.conversation_id, index, segment.text)}
+                                        className="text-gray-900 dark:text-gray-100 ml-1 cursor-pointer hover:bg-yellow-100 dark:hover:bg-yellow-900/30 px-1 rounded transition-colors"
+                                        title="Click to edit segment"
+                                      >
+                                        {segment.text}
+                                      </span>
+                                    )
+                                  }
+                                })()}
                               </div>
                             </div>
                           )
@@ -814,9 +1219,6 @@ export default function Conversations() {
                   <h4 className="font-medium text-gray-900 dark:text-gray-100 mb-2">🔧 Debug Info:</h4>
                   <div className="text-xs text-gray-600 dark:text-gray-400 space-y-1">
                     <div>Conversation ID: {conversation.conversation_id || 'N/A'}</div>
-                    <div>Audio UUID: {conversation.audio_uuid}</div>
-                    <div>Original Audio: {conversation.audio_path || 'N/A'}</div>
-                    <div>Cropped Audio: {conversation.cropped_audio_path || 'N/A'}</div>
                     <div>Transcript Version Count: {conversation.transcript_version_count || 0}</div>
                     <div>Memory Version Count: {conversation.memory_version_count || 0}</div>
                     <div>Segment Count: {conversation.segment_count || 0}</div>
diff --git a/backends/advanced/webui/src/pages/ConversationsRouter.tsx b/backends/advanced/webui/src/pages/ConversationsRouter.tsx
index c7e6e95c..4cc98a4c 100644
--- a/backends/advanced/webui/src/pages/ConversationsRouter.tsx
+++ b/backends/advanced/webui/src/pages/ConversationsRouter.tsx
@@ -1,9 +1,10 @@
 import { useState } from 'react'
 import Conversations from './Conversations'
 import ConversationsTimeline from './ConversationsTimeline'
+import Archive from './Archive'
 
 export default function ConversationsRouter() {
-  const [activeTab, setActiveTab] = useState<'classic' | 'timeline'>('classic')
+  const [activeTab, setActiveTab] = useState<'classic' | 'timeline' | 'archive'>('classic')
 
   return (
     <div>
@@ -34,14 +35,28 @@ export default function ConversationsRouter() {
           >
             Timeline
           </button>
+          <button
+            onClick={() => setActiveTab('archive')}
+            className={`
+              py-4 px-1 border-b-2 font-medium text-sm transition-colors
+              ${activeTab === 'archive'
+                ? 'border-orange-600 text-orange-600 dark:text-orange-400'
+                : 'border-transparent text-gray-500 hover:text-gray-700 hover:border-gray-300 dark:text-gray-400 dark:hover:text-gray-300'
+              }
+            `}
+          >
+            Archive
+          </button>
         </nav>
       </div>
 
       {/* Content */}
       {activeTab === 'classic' ? (
         <Conversations />
-      ) : (
+      ) : activeTab === 'timeline' ? (
         <ConversationsTimeline />
+      ) : (
+        <Archive />
       )}
     </div>
   )
diff --git a/backends/advanced/webui/src/pages/ConversationsTimeline.tsx b/backends/advanced/webui/src/pages/ConversationsTimeline.tsx
index 5c3f748f..2b1c83e1 100644
--- a/backends/advanced/webui/src/pages/ConversationsTimeline.tsx
+++ b/backends/advanced/webui/src/pages/ConversationsTimeline.tsx
@@ -5,8 +5,7 @@ import 'react-vertical-timeline-component/style.min.css'
 import { conversationsApi } from '../services/api'
 
 interface Conversation {
-  conversation_id?: string
-  audio_uuid: string
+  conversation_id: string
   title?: string
   summary?: string
   detailed_summary?: string
@@ -14,8 +13,6 @@ interface Conversation {
   client_id: string
   segment_count?: number
   memory_count?: number
-  audio_path?: string
-  cropped_audio_path?: string
   duration_seconds?: number
   has_memory?: boolean
   transcript?: string
@@ -88,11 +85,6 @@ function ConversationCard({ conversation, formatDuration }: ConversationCardProp
               {formatDuration(conversation.duration_seconds)}
             </span>
           )}
-          {conversation.deleted && (
-            <span className="px-2 py-1 bg-red-100 text-red-700 rounded">
-              Failed: {conversation.deletion_reason || 'Unknown'}
-            </span>
-          )}
         </div>
       </div>
 
@@ -150,12 +142,6 @@ function ConversationCard({ conversation, formatDuration }: ConversationCardProp
                 <span className="text-gray-600 font-mono">{conversation.conversation_id.slice(0, 8)}...</span>
               </div>
             )}
-            {conversation.audio_uuid && (
-              <div>
-                <span className="font-medium text-gray-700">Audio UUID:</span>{' '}
-                <span className="text-gray-600 font-mono">{conversation.audio_uuid.slice(0, 8)}...</span>
-              </div>
-            )}
             {conversation.active_transcript_version && (
               <div>
                 <span className="font-medium text-gray-700">Transcript Version:</span>{' '}
@@ -169,24 +155,6 @@ function ConversationCard({ conversation, formatDuration }: ConversationCardProp
               </div>
             )}
           </div>
-
-          {/* Audio Paths */}
-          {(conversation.audio_path || conversation.cropped_audio_path) && (
-            <div className="text-xs space-y-1">
-              {conversation.audio_path && (
-                <div>
-                  <span className="font-medium text-gray-700">Audio:</span>{' '}
-                  <span className="text-gray-600 font-mono">{conversation.audio_path}</span>
-                </div>
-              )}
-              {conversation.cropped_audio_path && (
-                <div>
-                  <span className="font-medium text-gray-700">Cropped:</span>{' '}
-                  <span className="text-gray-600 font-mono">{conversation.cropped_audio_path}</span>
-                </div>
-              )}
-            </div>
-          )}
         </div>
       )}
     </div>
@@ -201,7 +169,8 @@ export default function ConversationsTimeline() {
   const loadConversations = async () => {
     try {
       setLoading(true)
-      const response = await conversationsApi.getAll()
+      // Exclude deleted conversations from timeline view
+      const response = await conversationsApi.getAll(false)
       const conversationsList = response.data.conversations || []
       setConversations(conversationsList)
       setError(null)
@@ -290,7 +259,7 @@ export default function ConversationsTimeline() {
 
             return (
               <VerticalTimelineElement
-                key={conv.conversation_id || conv.audio_uuid}
+                key={conv.conversation_id}
                 date={date.toLocaleString('en-US', {
                   month: 'short',
                   day: 'numeric',
diff --git a/backends/advanced/webui/src/pages/Finetuning.tsx b/backends/advanced/webui/src/pages/Finetuning.tsx
new file mode 100644
index 00000000..be7cc233
--- /dev/null
+++ b/backends/advanced/webui/src/pages/Finetuning.tsx
@@ -0,0 +1,196 @@
+import { useState, useEffect } from 'react'
+import { Zap, RefreshCw, AlertCircle, CheckCircle, Clock } from 'lucide-react'
+import { finetuningApi } from '../services/api'
+
+interface FinetuningStatus {
+  pending_annotation_count: number
+  applied_annotation_count: number
+  trained_annotation_count: number
+  last_training_run: string | null
+  cron_status: {
+    enabled: boolean
+    schedule: string
+    last_run: string | null
+    next_run: string | null
+  }
+}
+
+export default function Finetuning() {
+  const [status, setStatus] = useState<FinetuningStatus | null>(null)
+  const [loading, setLoading] = useState(true)
+  const [processing, setProcessing] = useState(false)
+  const [error, setError] = useState<string | null>(null)
+  const [successMessage, setSuccessMessage] = useState<string | null>(null)
+
+  useEffect(() => {
+    loadStatus()
+  }, [])
+
+  const loadStatus = async () => {
+    try {
+      setLoading(true)
+      const response = await finetuningApi.getStatus()
+      setStatus(response.data)
+      setError(null)
+    } catch (err: any) {
+      setError(err.message || 'Failed to load fine-tuning status')
+    } finally {
+      setLoading(false)
+    }
+  }
+
+  const handleProcessAnnotations = async () => {
+    try {
+      setProcessing(true)
+      setError(null)
+      setSuccessMessage(null)
+
+      const response = await finetuningApi.processAnnotations('diarization')
+
+      setSuccessMessage(
+        `Successfully processed ${response.data.processed_count} annotations for training`
+      )
+
+      // Reload status
+      await loadStatus()
+    } catch (err: any) {
+      setError(err.response?.data?.detail || err.message || 'Failed to process annotations')
+    } finally {
+      setProcessing(false)
+    }
+  }
+
+  if (loading) {
+    return (
+      <div className="flex items-center justify-center h-64">
+        <div className="animate-spin rounded-full h-8 w-8 border-b-2 border-blue-600"></div>
+        <span className="ml-2 text-gray-600">Loading fine-tuning status...</span>
+      </div>
+    )
+  }
+
+  return (
+    <div className="max-w-4xl">
+      {/* Header */}
+      <div className="flex justify-between items-center mb-6">
+        <div className="flex items-center space-x-2">
+          <Zap className="h-6 w-6 text-blue-600" />
+          <h1 className="text-2xl font-bold text-gray-900 dark:text-gray-100">Model Fine-tuning</h1>
+        </div>
+        <button
+          onClick={loadStatus}
+          className="flex items-center space-x-2 px-4 py-2 bg-gray-100 dark:bg-gray-700 text-gray-700 dark:text-gray-300 rounded-lg hover:bg-gray-200 dark:hover:bg-gray-600 transition-colors"
+        >
+          <RefreshCw className="h-4 w-4" />
+          <span>Refresh</span>
+        </button>
+      </div>
+
+      {/* Error/Success Messages */}
+      {error && (
+        <div className="mb-4 p-4 bg-red-50 dark:bg-red-900/20 border border-red-200 dark:border-red-700 rounded-lg flex items-start space-x-2">
+          <AlertCircle className="h-5 w-5 text-red-600 dark:text-red-400 flex-shrink-0 mt-0.5" />
+          <span className="text-red-700 dark:text-red-300">{error}</span>
+        </div>
+      )}
+
+      {successMessage && (
+        <div className="mb-4 p-4 bg-green-50 dark:bg-green-900/20 border border-green-200 dark:border-green-700 rounded-lg flex items-start space-x-2">
+          <CheckCircle className="h-5 w-5 text-green-600 dark:text-green-400 flex-shrink-0 mt-0.5" />
+          <span className="text-green-700 dark:text-green-300">{successMessage}</span>
+        </div>
+      )}
+
+      {/* Annotation Statistics */}
+      <div className="grid grid-cols-1 md:grid-cols-3 gap-4 mb-6">
+        <div className="bg-white dark:bg-gray-800 rounded-lg shadow p-6">
+          <div className="text-sm text-gray-600 dark:text-gray-400 mb-1">Pending Annotations</div>
+          <div className="text-3xl font-bold text-gray-900 dark:text-gray-100">
+            {status?.pending_annotation_count || 0}
+          </div>
+          <div className="text-xs text-gray-500 mt-1">Not yet applied</div>
+        </div>
+
+        <div className="bg-white dark:bg-gray-800 rounded-lg shadow p-6">
+          <div className="text-sm text-gray-600 dark:text-gray-400 mb-1">Ready for Training</div>
+          <div className="text-3xl font-bold text-blue-600 dark:text-blue-400">
+            {status?.applied_annotation_count || 0}
+          </div>
+          <div className="text-xs text-gray-500 mt-1">Applied but not trained</div>
+        </div>
+
+        <div className="bg-white dark:bg-gray-800 rounded-lg shadow p-6">
+          <div className="text-sm text-gray-600 dark:text-gray-400 mb-1">Trained</div>
+          <div className="text-3xl font-bold text-green-600 dark:text-green-400">
+            {status?.trained_annotation_count || 0}
+          </div>
+          <div className="text-xs text-gray-500 mt-1">Sent to model</div>
+        </div>
+      </div>
+
+      {/* Manual Training Trigger */}
+      <div className="bg-white dark:bg-gray-800 rounded-lg shadow p-6 mb-6">
+        <h2 className="text-lg font-semibold text-gray-900 dark:text-gray-100 mb-4">Manual Training</h2>
+        <p className="text-sm text-gray-600 dark:text-gray-400 mb-4">
+          Process applied annotations and send them to the speaker recognition service for model fine-tuning.
+          This will improve speaker identification based on your corrections.
+        </p>
+        <button
+          onClick={handleProcessAnnotations}
+          disabled={processing || (status?.applied_annotation_count || 0) === 0}
+          className="flex items-center space-x-2 px-6 py-3 bg-blue-600 text-white rounded-lg hover:bg-blue-700 disabled:bg-gray-300 disabled:cursor-not-allowed transition-colors"
+        >
+          {processing ? (
+            <>
+              <RefreshCw className="h-5 w-5 animate-spin" />
+              <span>Processing...</span>
+            </>
+          ) : (
+            <>
+              <Zap className="h-5 w-5" />
+              <span>Process {status?.applied_annotation_count || 0} Annotations</span>
+            </>
+          )}
+        </button>
+      </div>
+
+      {/* Cron Job Status */}
+      <div className="bg-white dark:bg-gray-800 rounded-lg shadow p-6">
+        <h2 className="text-lg font-semibold text-gray-900 dark:text-gray-100 mb-4">Automated Training</h2>
+        <div className="space-y-3">
+          <div className="flex items-center space-x-2">
+            <span className="text-sm text-gray-600 dark:text-gray-400">Status:</span>
+            <span className={`text-sm font-medium ${
+              status?.cron_status.enabled ? 'text-green-600 dark:text-green-400' : 'text-gray-400'
+            }`}>
+              {status?.cron_status.enabled ? 'Enabled' : 'Disabled'}
+            </span>
+          </div>
+          <div className="flex items-center space-x-2">
+            <Clock className="h-4 w-4 text-gray-400" />
+            <span className="text-sm text-gray-600 dark:text-gray-400">Schedule:</span>
+            <span className="text-sm font-mono text-gray-900 dark:text-gray-100">
+              {status?.cron_status.schedule || 'Not configured'}
+            </span>
+          </div>
+          {status?.cron_status.last_run && (
+            <div className="flex items-center space-x-2">
+              <span className="text-sm text-gray-600 dark:text-gray-400">Last Run:</span>
+              <span className="text-sm text-gray-900 dark:text-gray-100">
+                {new Date(status.cron_status.last_run).toLocaleString()}
+              </span>
+            </div>
+          )}
+          {status?.cron_status.next_run && (
+            <div className="flex items-center space-x-2">
+              <span className="text-sm text-gray-600 dark:text-gray-400">Next Run:</span>
+              <span className="text-sm text-gray-900 dark:text-gray-100">
+                {new Date(status.cron_status.next_run).toLocaleString()}
+              </span>
+            </div>
+          )}
+        </div>
+      </div>
+    </div>
+  )
+}
diff --git a/backends/advanced/webui/src/pages/Memories.tsx b/backends/advanced/webui/src/pages/Memories.tsx
index 732d1683..77daca51 100644
--- a/backends/advanced/webui/src/pages/Memories.tsx
+++ b/backends/advanced/webui/src/pages/Memories.tsx
@@ -24,7 +24,6 @@ export default function Memories() {
   const [loading, setLoading] = useState(false)
   const [error, setError] = useState<string | null>(null)
   const [searchQuery, setSearchQuery] = useState('')
-  const [showUnfiltered, setShowUnfiltered] = useState(false)
   const [totalCount, setTotalCount] = useState<number | null>(null)
 
   // Semantic search state
@@ -61,9 +60,7 @@ export default function Memories() {
 
     try {
       setLoading(true)
-      const response = showUnfiltered
-        ? await memoriesApi.getUnfiltered(user.id)
-        : await memoriesApi.getAll(user.id)
+      const response = await memoriesApi.getAll(user.id)
 
       console.log('🧠 Memories API response:', response.data)
 
@@ -97,7 +94,7 @@ export default function Memories() {
 
   useEffect(() => {
     loadMemories()
-  }, [user?.id, showUnfiltered])
+  }, [user?.id])
 
   // Semantic search handlers
   const handleSemanticSearch = async () => {
@@ -174,7 +171,12 @@ export default function Memories() {
     (memory.category?.toLowerCase() || '').includes(searchQuery.toLowerCase())
   )
 
-  const formatDate = (dateInput: string | number) => {
+  const formatDate = (dateInput: string | number | undefined | null) => {
+    // Handle missing/undefined dates
+    if (dateInput === undefined || dateInput === null || dateInput === '') {
+      return 'N/A'
+    }
+
     // Handle both timestamp numbers and date strings
     let date: Date
 
@@ -198,7 +200,7 @@ export default function Memories() {
     // Check if date is valid
     if (isNaN(date.getTime())) {
       console.warn('Invalid date:', dateInput)
-      return 'Invalid Date'
+      return 'N/A'
     }
 
     return date.toLocaleString()
@@ -267,18 +269,7 @@ export default function Memories() {
 
       {/* Controls */}
       <div className="space-y-4 mb-6">
-        <div className="flex items-center justify-between">
-          <div className="flex items-center space-x-4">
-            <label className="flex items-center space-x-2 text-sm">
-              <input
-                type="checkbox"
-                checked={showUnfiltered}
-                onChange={(e) => setShowUnfiltered(e.target.checked)}
-                className="rounded border-gray-300"
-              />
-              <span className="text-gray-700 dark:text-gray-300">Show unfiltered</span>
-            </label>
-          </div>
+        <div className="flex items-center justify-end">
           <button
             onClick={loadMemories}
             disabled={loading || !user}
@@ -410,9 +401,9 @@ export default function Memories() {
                 )
               ) : (
                 totalCount !== null ? (
-                  `Showing ${memories.length} of ${totalCount} ${showUnfiltered ? 'unfiltered' : 'filtered'} memories`
+                  `Showing ${memories.length} of ${totalCount} memories`
                 ) : (
-                  `Showing ${showUnfiltered ? 'unfiltered' : 'filtered'} memories (${memories.length} found)`
+                  `Showing ${memories.length} memories`
                 )
               )}
             </p>
diff --git a/backends/advanced/webui/src/pages/MemoryDetail.tsx b/backends/advanced/webui/src/pages/MemoryDetail.tsx
index ed65cf8a..7f852c56 100644
--- a/backends/advanced/webui/src/pages/MemoryDetail.tsx
+++ b/backends/advanced/webui/src/pages/MemoryDetail.tsx
@@ -1,7 +1,7 @@
 import { useState, useEffect } from 'react'
 import { useParams, useNavigate } from 'react-router-dom'
-import { ArrowLeft, Calendar, Tag, Trash2, RefreshCw } from 'lucide-react'
-import { memoriesApi } from '../services/api'
+import { ArrowLeft, Calendar, Tag, Trash2, RefreshCw, Edit3, Save, X } from 'lucide-react'
+import { memoriesApi, annotationsApi } from '../services/api'
 import { useAuth } from '../contexts/AuthContext'
 
 interface Memory {
@@ -40,6 +40,12 @@ export default function MemoryDetail() {
   const [loading, setLoading] = useState(true)
   const [error, setError] = useState<string | null>(null)
 
+  // Inline editing state
+  const [isEditing, setIsEditing] = useState(false)
+  const [editedContent, setEditedContent] = useState('')
+  const [isSaving, setIsSaving] = useState(false)
+  const [saveError, setSaveError] = useState<string | null>(null)
+
   const loadMemory = async () => {
     if (!user?.id || !id) {
       console.log('⏭️ MemoryDetail: Missing user or id', { userId: user?.id, memoryId: id })
@@ -71,6 +77,67 @@ export default function MemoryDetail() {
     }
   }
 
+  const handleStartEdit = () => {
+    if (memory) {
+      setEditedContent(memory.memory)
+      setIsEditing(true)
+      setSaveError(null)
+    }
+  }
+
+  const handleCancelEdit = () => {
+    setIsEditing(false)
+    setEditedContent('')
+    setSaveError(null)
+  }
+
+  const handleSaveEdit = async () => {
+    if (!memory || !id || !user?.id) return
+
+    // Don't save if content hasn't changed
+    if (editedContent === memory.memory) {
+      setIsEditing(false)
+      return
+    }
+
+    setIsSaving(true)
+    setSaveError(null)
+
+    try {
+      // Create annotation to update memory
+      await annotationsApi.createMemoryAnnotation({
+        memory_id: id,
+        original_text: memory.memory,
+        corrected_text: editedContent
+      })
+
+      // Update local state
+      setMemory({
+        ...memory,
+        memory: editedContent,
+        updated_at: new Date().toISOString()
+      })
+
+      setIsEditing(false)
+      console.log('✅ Memory updated successfully')
+    } catch (err: any) {
+      console.error('❌ Failed to save memory:', err)
+      setSaveError(err.response?.data?.detail || err.message || 'Failed to save changes')
+    } finally {
+      setIsSaving(false)
+    }
+  }
+
+  const handleKeyDown = (e: React.KeyboardEvent<HTMLTextAreaElement>) => {
+    if (e.key === 'Enter' && (e.ctrlKey || e.metaKey)) {
+      e.preventDefault()
+      handleSaveEdit()
+    } else if (e.key === 'Escape') {
+      e.preventDefault()
+      handleCancelEdit()
+    }
+  }
+
   const handleDelete = async () => {
     if (!memory || !id) return
 
@@ -204,9 +271,67 @@ export default function MemoryDetail() {
                     {memory.metadata.name}
                   </h1>
                 )}
-                <p className="text-gray-700 dark:text-gray-300 leading-relaxed whitespace-pre-wrap">
-                  {memory.memory}
-                </p>
+
+                {/* Editable Memory Content */}
+                <div className="relative">
+                  {isEditing ? (
+                    // Edit mode
+                    <div className="space-y-3">
+                      <textarea
+                        value={editedContent}
+                        onChange={(e) => setEditedContent(e.target.value)}
+                        onKeyDown={handleKeyDown}
+                        className="w-full min-h-[150px] px-4 py-3 text-gray-700 dark:text-gray-300 bg-white dark:bg-gray-900 border-2 border-blue-500 dark:border-blue-400 rounded-lg focus:outline-none focus:ring-2 focus:ring-blue-500 dark:focus:ring-blue-400 resize-y leading-relaxed"
+                        placeholder="Enter memory content..."
+                        autoFocus
+                        disabled={isSaving}
+                      />
+
+                      {saveError && (
+                        <div className="text-sm text-red-600 dark:text-red-400">
+                          {saveError}
+                        </div>
+                      )}
+
+                      <div className="flex items-center gap-2">
+                        <button
+                          onClick={handleSaveEdit}
+                          disabled={isSaving || editedContent === memory.memory}
+                          className="flex items-center gap-2 px-4 py-2 bg-blue-600 text-white rounded-lg hover:bg-blue-700 transition-colors disabled:opacity-50 disabled:cursor-not-allowed"
+                        >
+                          <Save className="w-4 h-4" />
+                          {isSaving ? 'Saving...' : 'Save'}
+                        </button>
+                        <button
+                          onClick={handleCancelEdit}
+                          disabled={isSaving}
+                          className="flex items-center gap-2 px-4 py-2 bg-gray-200 dark:bg-gray-700 text-gray-700 dark:text-gray-300 rounded-lg hover:bg-gray-300 dark:hover:bg-gray-600 transition-colors disabled:opacity-50 disabled:cursor-not-allowed"
+                        >
+                          <X className="w-4 h-4" />
+                          Cancel
+                        </button>
+                        <span className="text-xs text-gray-500 dark:text-gray-400 ml-2">
+                          Ctrl+Enter to save, Esc to cancel
+                        </span>
+                      </div>
+                    </div>
+                  ) : (
+                    // View mode with hover to edit
+                    <div
+                      onClick={handleStartEdit}
+                      className="group cursor-pointer rounded-lg p-3 -mx-3 transition-colors hover:bg-yellow-50 dark:hover:bg-yellow-900/10"
+                      title="Click to edit"
+                    >
+                      <p className="text-gray-700 dark:text-gray-300 leading-relaxed whitespace-pre-wrap">
+                        {memory.memory}
+                      </p>
+                      <div className="opacity-0 group-hover:opacity-100 transition-opacity mt-2 flex items-center gap-1 text-sm text-gray-500 dark:text-gray-400">
+                        <Edit3 className="w-4 h-4" />
+                        <span>Click to edit</span>
+                      </div>
+                    </div>
+                  )}
+                </div>
               </div>
             </div>
           </div>
diff --git a/backends/advanced/webui/src/pages/Plugins.tsx b/backends/advanced/webui/src/pages/Plugins.tsx
new file mode 100644
index 00000000..adb85930
--- /dev/null
+++ b/backends/advanced/webui/src/pages/Plugins.tsx
@@ -0,0 +1,39 @@
+import { useState } from 'react'
+import { Code, Layout } from 'lucide-react'
+import PluginSettings from '../components/PluginSettings'
+import PluginSettingsForm from '../components/PluginSettingsForm'
+
+export default function Plugins() {
+  const [useFormUI, setUseFormUI] = useState(true)
+
+  return (
+    <div className="p-6">
+      {/* Toggle Button */}
+      <div className="mb-6 flex justify-end">
+        <button
+          onClick={() => setUseFormUI(!useFormUI)}
+          className="flex items-center space-x-2 px-4 py-2 bg-gray-100 dark:bg-gray-700 text-gray-700 dark:text-gray-200 rounded-lg hover:bg-gray-200 dark:hover:bg-gray-600 transition-colors border border-gray-300 dark:border-gray-600"
+        >
+          {useFormUI ? (
+            <>
+              <Code className="h-4 w-4" />
+              <span>Advanced: Edit YAML</span>
+            </>
+          ) : (
+            <>
+              <Layout className="h-4 w-4" />
+              <span>← Back to Form</span>
+            </>
+          )}
+        </button>
+      </div>
+
+      {/* Content */}
+      {useFormUI ? (
+        <PluginSettingsForm />
+      ) : (
+        <PluginSettings />
+      )}
+    </div>
+  )
+}
diff --git a/backends/advanced/webui/src/pages/Queue.tsx b/backends/advanced/webui/src/pages/Queue.tsx
index 3dc774f3..b05f9374 100644
--- a/backends/advanced/webui/src/pages/Queue.tsx
+++ b/backends/advanced/webui/src/pages/Queue.tsx
@@ -27,10 +27,10 @@ import { queueApi } from '../services/api';
 interface QueueStats {
   total_jobs: number;
   queued_jobs: number;
-  processing_jobs: number;
-  completed_jobs: number;
+  started_jobs: number;  // RQ standard, not "processing_jobs"
+  finished_jobs: number;  // RQ standard, not "completed_jobs"
   failed_jobs: number;
-  cancelled_jobs: number;
+  canceled_jobs: number;  // RQ standard (US spelling), not "cancelled_jobs"
   deferred_jobs: number;
   timestamp: string;
 }
@@ -131,15 +131,15 @@ const Queue: React.FC = () => {
   const [showFlushModal, setShowFlushModal] = useState(false);
   const [flushSettings, setFlushSettings] = useState({
     older_than_hours: 24,
-    statuses: ['completed', 'failed'],
+    statuses: ['finished', 'failed'],  // RQ standard status names
     flush_all: false,
     include_failed: false,  // For flush_all mode
-    include_completed: false  // For flush_all mode
+    include_completed: false  // For flush_all mode (note: API expects include_completed for backward compat)
   });
   const [flushing, setFlushing] = useState(false);
-  const [expandedSessions, setExpandedSessions] = useState<Set<string>>(new Set());
+  const [expandedConversations, setExpandedConversations] = useState<Set<string>>(new Set());
   const [expandedJobs, setExpandedJobs] = useState<Set<string>>(new Set());
-  const [sessionJobs, setSessionJobs] = useState<{[sessionId: string]: any[]}>({});
+  const [conversationJobs, setConversationJobs] = useState<{[conversationId: string]: any[]}>({});
   const [lastUpdate, setLastUpdate] = useState<number>(Date.now());
   const [autoRefreshEnabled, setAutoRefreshEnabled] = useState<boolean>(() => {
     // Load from localStorage, default to true
@@ -153,14 +153,14 @@ const Queue: React.FC = () => {
   const [completedConvTimeRange, setCompletedConvTimeRange] = useState(24); // hours
 
   // Use refs to track current state in interval
-  const expandedSessionsRef = useRef<Set<string>>(new Set());
+  const expandedConversationsRef = useRef<Set<string>>(new Set());
   const streamingStatusRef = useRef<StreamingStatus | null>(null);
   const refreshingRef = useRef<boolean>(false);
 
   // Update refs when state changes
   useEffect(() => {
-    expandedSessionsRef.current = expandedSessions;
-  }, [expandedSessions]);
+    expandedConversationsRef.current = expandedConversations;
+  }, [expandedConversations]);
 
   useEffect(() => {
     streamingStatusRef.current = streamingStatus;
@@ -179,26 +179,26 @@ const Queue: React.FC = () => {
     setRefreshing(true);
 
     try {
-      const currentExpanded = expandedSessionsRef.current;
-      const expandedSessionIds = Array.from(currentExpanded);
+      const currentExpanded = expandedConversationsRef.current;
+      const expandedConversationIds = Array.from(currentExpanded);
 
       // Single API call to get all dashboard data
-      const response = await queueApi.getDashboard(expandedSessionIds);
+      const response = await queueApi.getDashboard(expandedConversationIds);
       const dashboardData = response.data;
 
-      // Extract jobs from response
+      // Extract jobs from response (using RQ standard status names)
       const queuedJobs = dashboardData.jobs.queued || [];
-      const processingJobs = dashboardData.jobs.processing || [];
-      const completedJobs = dashboardData.jobs.completed || [];
+      const startedJobs = dashboardData.jobs.started || [];  // RQ standard, not "processing"
+      const finishedJobs = dashboardData.jobs.finished || [];  // RQ standard, not "completed"
       const failedJobs = dashboardData.jobs.failed || [];
 
       // Combine all jobs
-      const allFetchedJobs = [...queuedJobs, ...processingJobs, ...completedJobs, ...failedJobs];
+      const allFetchedJobs = [...queuedJobs, ...startedJobs, ...finishedJobs, ...failedJobs];
 
       console.log(`📊 Fetched ${allFetchedJobs.length} total jobs via consolidated endpoint`);
       console.log(`  - Queued: ${queuedJobs.length}`);
-      console.log(`  - Processing: ${processingJobs.length}`);
-      console.log(`  - Completed: ${completedJobs.length}`);
+      console.log(`  - Started: ${startedJobs.length}`);  // RQ standard
+      console.log(`  - Finished: ${finishedJobs.length}`);  // RQ standard
       console.log(`  - Failed: ${failedJobs.length}`);
 
       // Debug: Log open_conversation_job details
@@ -211,60 +211,64 @@ const Queue: React.FC = () => {
         console.log(`  meta.conversation_id: ${job.meta?.conversation_id}`);
       });
 
-      // Group jobs by session_id (use audio_uuid from metadata)
-      const jobsBySession: {[sessionId: string]: any[]} = {};
+      // Group jobs by conversation_id (primary identifier for conversations)
+      const jobsByConversation: {[conversationId: string]: any[]} = {};
 
       allFetchedJobs.forEach(job => {
         if (!job || !job.job_id) return; // Skip invalid jobs
 
-        // Extract session_id from meta.audio_uuid
-        const sessionId = job.meta?.audio_uuid;
-        if (sessionId) {
-          if (!jobsBySession[sessionId]) {
-            jobsBySession[sessionId] = [];
+        // Extract conversation_id from metadata
+        const conversationId = job.meta?.conversation_id;
+        if (conversationId) {
+          if (!jobsByConversation[conversationId]) {
+            jobsByConversation[conversationId] = [];
           }
-          jobsBySession[sessionId].push(job);
+          jobsByConversation[conversationId].push(job);
 
           // Debug logging for grouping
           if (job.job_type === 'open_conversation_job') {
-            console.log(`✅ Grouped open_conversation_job ${job.job_id} under session ${sessionId}`);
+            console.log(`✅ Grouped open_conversation_job ${job.job_id} under conversation ${conversationId}`);
           }
         } else {
-          // Log jobs that couldn't be grouped
-          console.log(`⚠️ Job ${job.job_id} (${job.job_type}) has no session_id - cannot group`);
+          // Only log warning for non-session-level jobs
+          // Audio persistence jobs are expected to not have conversation_id
+          if (job.meta?.session_level !== true && job.job_type !== 'audio_streaming_persistence_job') {
+            console.log(`⚠️ Job ${job.job_id} (${job.job_type}) has no conversation_id - cannot group`);
+          }
         }
       });
 
-      // Merge session jobs from dashboard response
-      if (dashboardData.session_jobs) {
-        Object.entries(dashboardData.session_jobs).forEach(([sessionId, jobs]: [string, any]) => {
+      // Merge conversation jobs from dashboard response (for backward compatibility, check both session_jobs and conversation_jobs)
+      const dashboardConvJobs = dashboardData.conversation_jobs || dashboardData.session_jobs;
+      if (dashboardConvJobs) {
+        Object.entries(dashboardConvJobs).forEach(([conversationId, jobs]: [string, any]) => {
           // Merge with existing jobs and deduplicate by job_id
-          const existingJobs = jobsBySession[sessionId] || [];
+          const existingJobs = jobsByConversation[conversationId] || [];
           const existingJobIds = new Set(existingJobs.map((j: any) => j.job_id));
           const newJobs = jobs.filter((j: any) => !existingJobIds.has(j.job_id));
-          jobsBySession[sessionId] = [...existingJobs, ...newJobs];
+          jobsByConversation[conversationId] = [...existingJobs, ...newJobs];
         });
       }
 
       // Update state
       setJobs(allFetchedJobs);
-      setSessionJobs(jobsBySession);
+      setConversationJobs(jobsByConversation);
       setStats(dashboardData.stats);
       setStreamingStatus(dashboardData.streaming_status);
       setLastUpdate(Date.now());
 
       // Auto-expand active conversations (those with open_conversation_job in progress)
-      const newExpanded = new Set(expandedSessions);
+      const newExpanded = new Set(expandedConversations);
       const newExpandedJobs = new Set(expandedJobs);
       let expandedCount = 0;
       let expandedJobsCount = 0;
 
       // Find all conversations with active open_conversation_job
-      Object.entries(jobsBySession).forEach(([_sessionId, jobs]) => {
+      Object.entries(jobsByConversation).forEach(([_conversationId, jobs]) => {
         const openConvJob = jobs.find((j: any) => j.job_type === 'open_conversation_job');
         if (openConvJob && openConvJob.status === 'started') {
           const conversationId = openConvJob.meta?.conversation_id;
-          if (conversationId && !expandedSessions.has(conversationId)) {
+          if (conversationId && !expandedConversations.has(conversationId)) {
             newExpanded.add(conversationId);
             expandedCount++;
             console.log(`🔓 Auto-expanding active conversation: ${conversationId}`);
@@ -280,10 +284,10 @@ const Queue: React.FC = () => {
         }
       });
 
-      // Update expanded sessions if any new active conversations found
+      // Update expanded conversations if any new active conversations found
       if (expandedCount > 0) {
         console.log(`📂 Auto-expanded ${expandedCount} active conversation(s)`);
-        setExpandedSessions(newExpanded);
+        setExpandedConversations(newExpanded);
       }
 
       // Update expanded jobs if any new jobs found
@@ -437,12 +441,12 @@ const Queue: React.FC = () => {
   const getStatusIcon = (status: string) => {
     switch (status) {
       case 'queued': return <Clock className="w-4 h-4" />;
-      case 'processing': return <Play className="w-4 h-4 animate-pulse" />;
-      case 'completed': return <CheckCircle className="w-4 h-4" />;
+      case 'started': return <Play className="w-4 h-4 animate-pulse" />;  // RQ standard
+      case 'finished': return <CheckCircle className="w-4 h-4" />;  // RQ standard
       case 'failed': return <XCircle className="w-4 h-4" />;
-      case 'cancelled': return <StopCircle className="w-4 h-4" />;
+      case 'canceled': return <StopCircle className="w-4 h-4" />;  // RQ standard (US spelling)
       case 'deferred': return <Pause className="w-4 h-4" />;
-      case 'waiting': return <Pause className="w-4 h-4" />;
+      case 'scheduled': return <Pause className="w-4 h-4" />;  // RQ standard, not "waiting"
       default: return <Clock className="w-4 h-4" />;
     }
   };
@@ -450,12 +454,12 @@ const Queue: React.FC = () => {
   const getStatusColor = (status: string) => {
     switch (status) {
       case 'queued': return 'text-yellow-600 bg-yellow-100';
-      case 'processing': return 'text-blue-600 bg-blue-100';
-      case 'completed': return 'text-green-600 bg-green-100';
+      case 'started': return 'text-blue-600 bg-blue-100';  // RQ standard
+      case 'finished': return 'text-green-600 bg-green-100';  // RQ standard
       case 'failed': return 'text-red-600 bg-red-100';
-      case 'cancelled': return 'text-gray-600 bg-gray-100';
+      case 'canceled': return 'text-gray-600 bg-gray-100';  // RQ standard (US spelling)
       case 'deferred': return 'text-blue-600 bg-blue-100';
-      case 'waiting': return 'text-blue-600 bg-blue-100';
+      case 'scheduled': return 'text-blue-600 bg-blue-100';  // RQ standard, not "waiting"
       default: return 'text-gray-600 bg-gray-100';
     }
   };
@@ -516,7 +520,7 @@ const Queue: React.FC = () => {
       borderColor = 'border-green-600';
     }
     // Audio processing - orange shades
-    else if (type.includes('audio') || type.includes('persist') || type.includes('cropping')) {
+    else if (type.includes('audio') || type.includes('persist')) {
       bgColor = 'bg-orange-500';
       borderColor = 'border-orange-600';
     }
@@ -532,7 +536,7 @@ const Queue: React.FC = () => {
       borderColor = 'border-red-600';
     }
     // Processing jobs - add pulse animation
-    else if (status === 'processing') {
+    else if (status === 'started') {
       bgColor = bgColor + ' animate-pulse';
     }
 
@@ -630,7 +634,7 @@ const Queue: React.FC = () => {
     // For failed/finished jobs, use completed_at or ended_at. For running jobs, use current time.
     const end = job.completed_at || job.ended_at
       ? new Date((job.completed_at || job.ended_at)!).getTime()
-      : (job.status === 'processing' ? Date.now() : start); // Don't show increasing time for failed jobs
+      : (job.status === 'started' ? Date.now() : start); // Don't show increasing time for failed jobs
     const durationMs = end - start;
 
     if (durationMs < 1000) return `${durationMs}ms`;
@@ -639,36 +643,20 @@ const Queue: React.FC = () => {
     return `${Math.floor(durationMs / 3600000)}h ${Math.floor((durationMs % 3600000) / 60000)}m`;
   };
 
-  // Format seconds to readable time format (e.g., 3m34s or 1h22m32s)
-  const formatSeconds = (seconds: number): string => {
-    if (seconds < 60) {
-      return `${Math.floor(seconds)}s`;
-    } else if (seconds < 3600) {
-      const mins = Math.floor(seconds / 60);
-      const secs = Math.floor(seconds % 60);
-      return `${mins}m${secs}s`;
-    } else {
-      const hours = Math.floor(seconds / 3600);
-      const mins = Math.floor((seconds % 3600) / 60);
-      const secs = Math.floor(seconds % 60);
-      return `${hours}h${mins}m${secs}s`;
-    }
-  };
-
-  const toggleSessionExpansion = (sessionId: string) => {
-    const newExpanded = new Set(expandedSessions);
+  const toggleConversationExpansion = (conversationId: string) => {
+    const newExpanded = new Set(expandedConversations);
 
-    if (newExpanded.has(sessionId)) {
+    if (newExpanded.has(conversationId)) {
       // Collapse
-      newExpanded.delete(sessionId);
-      setExpandedSessions(newExpanded);
+      newExpanded.delete(conversationId);
+      setExpandedConversations(newExpanded);
     } else {
       // Expand and trigger refresh to fetch jobs via dashboard endpoint
-      newExpanded.add(sessionId);
-      setExpandedSessions(newExpanded);
+      newExpanded.add(conversationId);
+      setExpandedConversations(newExpanded);
 
       // Trigger a refresh if jobs not already loaded
-      if (!sessionJobs[sessionId]) {
+      if (!conversationJobs[conversationId]) {
         fetchData();
       }
     }
@@ -770,10 +758,10 @@ const Queue: React.FC = () => {
 
           <div className="bg-white rounded-lg border p-4">
             <div className="flex items-center space-x-2">
-              <Play className={`w-5 h-5 text-blue-600 ${stats.processing_jobs > 0 ? 'animate-pulse' : ''}`} />
+              <Play className={`w-5 h-5 text-blue-600 ${stats.started_jobs > 0 ? 'animate-pulse' : ''}`} />
               <div>
-                <p className="text-sm text-gray-600">Processing</p>
-                <p className="text-xl font-semibold text-blue-600">{stats.processing_jobs}</p>
+                <p className="text-sm text-gray-600">Started</p>
+                <p className="text-xl font-semibold text-blue-600">{stats.started_jobs}</p>
               </div>
             </div>
           </div>
@@ -782,8 +770,8 @@ const Queue: React.FC = () => {
             <div className="flex items-center space-x-2">
               <CheckCircle className="w-5 h-5 text-green-600" />
               <div>
-                <p className="text-sm text-gray-600">Completed</p>
-                <p className="text-xl font-semibold text-green-600">{stats.completed_jobs}</p>
+                <p className="text-sm text-gray-600">Finished</p>
+                <p className="text-xl font-semibold text-green-600">{stats.finished_jobs}</p>
               </div>
             </div>
           </div>
@@ -802,8 +790,8 @@ const Queue: React.FC = () => {
             <div className="flex items-center space-x-2">
               <StopCircle className="w-5 h-5 text-gray-600" />
               <div>
-                <p className="text-sm text-gray-600">Cancelled</p>
-                <p className="text-xl font-semibold text-gray-600">{stats.cancelled_jobs}</p>
+                <p className="text-sm text-gray-600">Canceled</p>
+                <p className="text-xl font-semibold text-gray-600">{stats.canceled_jobs}</p>
               </div>
             </div>
           </div>
@@ -880,7 +868,7 @@ const Queue: React.FC = () => {
                   const clientId = streamKey.replace('audio:stream:', '');
 
                   // Find all listen jobs for this client with deduplication
-                  const allJobsRaw = Object.values(sessionJobs).flat().filter(job => job != null);
+                  const allJobsRaw = Object.values(conversationJobs).flat().filter(job => job != null);
 
                   // Deduplicate by job_id
                   const jobMap = new Map();
@@ -902,7 +890,7 @@ const Queue: React.FC = () => {
                   const allListenJobs = allJobs.filter((job: any) =>
                     job && job.job_type === 'stream_speech_detection_job' &&
                     job.meta?.client_id === clientId &&
-                    job.status !== 'completed' &&
+                    job.status !== 'finished' &&
                     job.status !== 'failed'
                   );
 
@@ -1067,7 +1055,7 @@ const Queue: React.FC = () => {
                 <h4 className="text-sm font-medium text-gray-700 mb-3">Active Conversations</h4>
                 {(() => {
                   // Group all jobs by conversation_id with deduplication
-                  const allJobsRaw = Object.values(sessionJobs).flat().filter(job => job != null);
+                  const allJobsRaw = Object.values(conversationJobs).flat().filter(job => job != null);
 
                   // Deduplicate by job_id
                   const jobMap = new Map();
@@ -1079,54 +1067,32 @@ const Queue: React.FC = () => {
                   const allJobs = Array.from(jobMap.values());
 
                   // Group ALL jobs by conversation_id (regardless of status)
-                  // Also link jobs by audio_uuid so persistence jobs get grouped with conversation
                   const allConversationJobs = new Map<string, any[]>();
-                  const audioUuidToConversationId = new Map<string, string>();
-
-                  // First pass: collect conversation_id to audio_uuid mappings
-                  allJobs.forEach(job => {
-                    if (!job) return;
-                    const conversationId = job.meta?.conversation_id;
-                    const audioUuid = job.meta?.audio_uuid;
 
-                    if (conversationId && audioUuid) {
-                      audioUuidToConversationId.set(audioUuid, conversationId);
-                    }
-                  });
-
-                  // Second pass: group jobs by conversation_id or audio_uuid
+                  // Group jobs by conversation_id only
                   // EXCLUDE session-level jobs (like audio persistence)
                   allJobs.forEach(job => {
                     if (!job) return;
 
                     // Skip session-level jobs (they run for entire session, not per conversation)
-                    // Also skip audio persistence jobs by job_type (for backward compatibility with old jobs)
+                    // Also skip audio persistence jobs by job_type
                     if (job.meta?.session_level === true || job.job_type === 'audio_streaming_persistence_job') {
                       return;
                     }
 
                     const conversationId = job.meta?.conversation_id;
-                    const audioUuid = job.meta?.audio_uuid;
-
-                    // Determine the grouping key
-                    let groupKey = conversationId;
-                    if (!groupKey && audioUuid) {
-                      // Try to find conversation_id via audio_uuid mapping
-                      groupKey = audioUuidToConversationId.get(audioUuid);
-                    }
-
-                    if (groupKey) {
-                      if (!allConversationJobs.has(groupKey)) {
-                        allConversationJobs.set(groupKey, []);
+                    if (conversationId) {
+                      if (!allConversationJobs.has(conversationId)) {
+                        allConversationJobs.set(conversationId, []);
                       }
-                      allConversationJobs.get(groupKey)!.push(job);
+                      allConversationJobs.get(conversationId)!.push(job);
                     }
                   });
 
                   // Filter to only show conversations where at least one job is NOT completed
                   const conversationMap = new Map<string, any[]>();
                   allConversationJobs.forEach((jobs, conversationId) => {
-                    const hasActiveJob = jobs.some(j => j.status !== 'completed' && j.status !== 'failed');
+                    const hasActiveJob = jobs.some(j => j.status !== 'finished' && j.status !== 'failed');
                     if (hasActiveJob) {
                       conversationMap.set(conversationId, jobs);
                     }
@@ -1143,7 +1109,7 @@ const Queue: React.FC = () => {
                   return (
                     <div className="space-y-2">
                       {Array.from(conversationMap.entries()).map(([conversationId, jobs]) => {
-                        const isExpanded = expandedSessions.has(conversationId);
+                        const isExpanded = expandedConversations.has(conversationId);
 
                         // Find the open_conversation_job for metadata, or fallback to any job with metadata
                         const openConvJob = jobs.find(j => j.job_type === 'open_conversation_job');
@@ -1166,7 +1132,7 @@ const Queue: React.FC = () => {
                           <div key={conversationId} className={`rounded-lg border overflow-hidden ${hasFailedJob ? 'bg-red-50 border-red-300' : 'bg-cyan-50 border-cyan-200'}`}>
                             <div
                               className={`flex items-center justify-between p-3 cursor-pointer transition-colors ${hasFailedJob ? 'hover:bg-red-100' : 'hover:bg-cyan-100'}`}
-                              onClick={() => toggleSessionExpansion(conversationId)}
+                              onClick={() => toggleConversationExpansion(conversationId)}
                             >
                               <div className="flex-1">
                                 <div className="flex items-center space-x-2">
@@ -1245,7 +1211,7 @@ const Queue: React.FC = () => {
                                       const startTime = new Date(job.started_at!).getTime();
                                       const endTime = job.completed_at || job.ended_at
                                         ? new Date((job.completed_at || job.ended_at)!).getTime()
-                                        : (job.status === 'processing' ? Date.now() : startTime);
+                                        : (job.status === 'started' ? Date.now() : startTime);
 
                                       return {
                                         job,
@@ -1333,7 +1299,7 @@ const Queue: React.FC = () => {
                                               <div className="flex-1 relative h-6 bg-gray-100 rounded">
                                                 {/* Job Bar */}
                                                 <div
-                                                  className={`absolute h-6 rounded ${barColor} ${job.status === 'processing' ? 'animate-pulse' : ''} flex items-center justify-center`}
+                                                  className={`absolute h-6 rounded ${barColor} ${job.status === 'started' ? 'animate-pulse' : ''} flex items-center justify-center`}
                                                   style={{
                                                     left: `${startPercent}%`,
                                                     width: `${widthPercent}%`
@@ -1530,7 +1496,7 @@ const Queue: React.FC = () => {
                 </div>
                 {(() => {
                   // Group all jobs by conversation_id for completed conversations with deduplication
-                  const allJobsRaw = Object.values(sessionJobs).flat().filter(job => job != null);
+                  const allJobsRaw = Object.values(conversationJobs).flat().filter(job => job != null);
 
                   // Deduplicate by job_id
                   const jobMap = new Map();
@@ -1542,54 +1508,32 @@ const Queue: React.FC = () => {
                   const allJobs = Array.from(jobMap.values());
 
                   // Group ALL jobs by conversation_id (regardless of status)
-                  // Also link jobs by audio_uuid so persistence jobs get grouped with conversation
                   const allConversationJobs = new Map<string, any[]>();
-                  const audioUuidToConversationId = new Map<string, string>();
-
-                  // First pass: collect conversation_id to audio_uuid mappings
-                  allJobs.forEach(job => {
-                    if (!job) return;
-                    const conversationId = job.meta?.conversation_id;
-                    const audioUuid = job.meta?.audio_uuid;
-
-                    if (conversationId && audioUuid) {
-                      audioUuidToConversationId.set(audioUuid, conversationId);
-                    }
-                  });
 
-                  // Second pass: group jobs by conversation_id or audio_uuid
+                  // Group jobs by conversation_id only
                   // EXCLUDE session-level jobs (like audio persistence)
                   allJobs.forEach(job => {
                     if (!job) return;
 
                     // Skip session-level jobs (they run for entire session, not per conversation)
-                    // Also skip audio persistence jobs by job_type (for backward compatibility with old jobs)
+                    // Also skip audio persistence jobs by job_type
                     if (job.meta?.session_level === true || job.job_type === 'audio_streaming_persistence_job') {
                       return;
                     }
 
                     const conversationId = job.meta?.conversation_id;
-                    const audioUuid = job.meta?.audio_uuid;
-
-                    // Determine the grouping key
-                    let groupKey = conversationId;
-                    if (!groupKey && audioUuid) {
-                      // Try to find conversation_id via audio_uuid mapping
-                      groupKey = audioUuidToConversationId.get(audioUuid);
-                    }
-
-                    if (groupKey) {
-                      if (!allConversationJobs.has(groupKey)) {
-                        allConversationJobs.set(groupKey, []);
+                    if (conversationId) {
+                      if (!allConversationJobs.has(conversationId)) {
+                        allConversationJobs.set(conversationId, []);
                       }
-                      allConversationJobs.get(groupKey)!.push(job);
+                      allConversationJobs.get(conversationId)!.push(job);
                     }
                   });
 
                   // Filter to only show conversations where ALL jobs are completed or failed
                   const conversationMap = new Map<string, any[]>();
                   allConversationJobs.forEach((jobs, conversationId) => {
-                    const allJobsComplete = jobs.every(j => j.status === 'completed' || j.status === 'failed');
+                    const allJobsComplete = jobs.every(j => j.status === 'finished' || j.status === 'failed');
                     if (allJobsComplete) {
                       conversationMap.set(conversationId, jobs);
                     }
@@ -1639,7 +1583,7 @@ const Queue: React.FC = () => {
                     <>
                       <div className="space-y-2">
                         {paginatedConversations.map(({ conversationId, jobs }) => {
-                        const isExpanded = expandedSessions.has(conversationId);
+                        const isExpanded = expandedConversations.has(conversationId);
 
                         // Find the open_conversation_job for metadata, or fallback to any job with metadata
                         const openConvJob = jobs.find(j => j.job_type === 'open_conversation_job');
@@ -1660,7 +1604,7 @@ const Queue: React.FC = () => {
                         const summary = transcriptionMeta.summary || null;
 
                         // Check job statuses
-                        const allComplete = jobs.every(j => j.status === 'completed');
+                        const allComplete = jobs.every(j => j.status === 'finished');
                         const hasFailedJob = jobs.some(j => j.status === 'failed');
                         const failedJobCount = jobs.filter(j => j.status === 'failed').length;
 
@@ -1692,7 +1636,7 @@ const Queue: React.FC = () => {
                           <div key={conversationId} className={`rounded-lg border overflow-hidden ${bgColor}`}>
                             <div
                               className={`flex items-center justify-between p-3 cursor-pointer transition-colors ${hoverColor}`}
-                              onClick={() => toggleSessionExpansion(conversationId)}
+                              onClick={() => toggleConversationExpansion(conversationId)}
                             >
                               <div className="flex-1">
                                 <div className="flex items-center space-x-2">
@@ -1786,7 +1730,7 @@ const Queue: React.FC = () => {
                                         const startTime = new Date(job.started_at!).getTime();
                                         const endTime = job.completed_at || job.ended_at
                                           ? new Date((job.completed_at || job.ended_at)!).getTime()
-                                          : (job.status === 'processing' ? Date.now() : startTime);
+                                          : (job.status === 'started' ? Date.now() : startTime);
 
                                         return {
                                           job,
@@ -1874,7 +1818,7 @@ const Queue: React.FC = () => {
                                                 <div className="flex-1 relative h-6 bg-gray-100 rounded">
                                                   {/* Job Bar */}
                                                   <div
-                                                    className={`absolute h-6 rounded ${barColor} ${job.status === 'processing' ? 'animate-pulse' : ''} flex items-center justify-center`}
+                                                    className={`absolute h-6 rounded ${barColor} ${job.status === 'started' ? 'animate-pulse' : ''} flex items-center justify-center`}
                                                     style={{
                                                       left: `${startPercent}%`,
                                                       width: `${widthPercent}%`
@@ -2090,10 +2034,10 @@ const Queue: React.FC = () => {
             >
               <option value="">All Statuses</option>
               <option value="queued">Queued</option>
-              <option value="processing">Processing</option>
-              <option value="completed">Completed</option>
+              <option value="started">Started</option>
+              <option value="finished">Finished</option>
               <option value="failed">Failed</option>
-              <option value="cancelled">Cancelled</option>
+              <option value="canceled">Canceled</option>
               <option value="deferred">Deferred</option>
             </select>
           </div>
@@ -2214,7 +2158,7 @@ const Queue: React.FC = () => {
                       >
                         <Eye className="w-4 h-4" />
                       </button>
-                      {(job.status === 'queued' || job.status === 'processing') && (
+                      {(job.status === 'queued' || job.status === 'started') && (
                         <button
                           onClick={() => cancelJob(job.job_id)}
                           className="text-red-600 hover:text-red-900"
@@ -2223,7 +2167,7 @@ const Queue: React.FC = () => {
                           <StopCircle className="w-4 h-4" />
                         </button>
                       )}
-                      {job.status === 'completed' && (
+                      {job.status === 'finished' && (
                         <button
                           onClick={() => cancelJob(job.job_id)}
                           className="text-gray-400 hover:text-gray-600"
@@ -2483,22 +2427,6 @@ const Queue: React.FC = () => {
                       </div>
                     )}
 
-                    {/* process_cropping_job formatted metadata */}
-                    {selectedJob.func_name?.includes('process_cropping_job') && (
-                      <div className="bg-green-50 p-3 rounded mb-3 space-y-2">
-                        {selectedJob.meta.cropped_duration_seconds !== undefined && (
-                          <div className="text-sm">
-                            <span className="font-medium">Cropped Duration:</span> {formatSeconds(selectedJob.meta.cropped_duration_seconds)}
-                          </div>
-                        )}
-                        {selectedJob.meta.segments_cropped !== undefined && (
-                          <div className="text-sm">
-                            <span className="font-medium">Segments Cropped:</span> {selectedJob.meta.segments_cropped}
-                          </div>
-                        )}
-                      </div>
-                    )}
-
                     {/* Raw JSON metadata (collapsible) */}
                     <details className="mt-2">
                       <summary className="text-sm font-medium text-gray-700 cursor-pointer hover:text-gray-900">
@@ -2572,7 +2500,7 @@ const Queue: React.FC = () => {
                       <div>
                         <label className="block text-xs text-gray-600 mb-1">Job statuses to remove:</label>
                         <div className="space-y-1">
-                          {['completed', 'failed', 'cancelled'].map(status => (
+                          {['finished', 'failed', 'canceled'].map(status => (
                             <label key={status} className="flex items-center space-x-2">
                               <input
                                 type="checkbox"
diff --git a/backends/advanced/webui/src/pages/System.tsx b/backends/advanced/webui/src/pages/System.tsx
index b05cf387..dfe662f7 100644
--- a/backends/advanced/webui/src/pages/System.tsx
+++ b/backends/advanced/webui/src/pages/System.tsx
@@ -1,5 +1,5 @@
 import { useState, useEffect } from 'react'
-import { Settings, RefreshCw, CheckCircle, XCircle, AlertCircle, Activity, Users, Database, Server, Volume2, Mic, Brain } from 'lucide-react'
+import { Settings, RefreshCw, CheckCircle, XCircle, AlertCircle, Activity, Users, Database, Server, Volume2, Mic, Brain, Sliders } from 'lucide-react'
 import { systemApi, speakerApi } from '../services/api'
 import { useAuth } from '../contexts/AuthContext'
 import MemorySettings from '../components/MemorySettings'
@@ -46,10 +46,31 @@ interface DiarizationSettings {
   max_speakers: number
 }
 
+interface DiagnosticIssue {
+  component: string
+  severity: 'error' | 'warning' | 'info'
+  message: string
+  resolution?: string
+}
+
+interface ConfigDiagnostics {
+  timestamp: string
+  overall_status: 'healthy' | 'partial' | 'unhealthy'
+  issues: DiagnosticIssue[]
+  warnings: DiagnosticIssue[]
+  info: DiagnosticIssue[]
+  components: Record<string, {
+    status: string
+    message: string
+    details?: any
+  }>
+}
+
 export default function System() {
   const [healthData, setHealthData] = useState<HealthData | null>(null)
   const [readinessData, setReadinessData] = useState<any>(null)
   const [metricsData, setMetricsData] = useState<MetricsData | null>(null)
+  const [configDiagnostics, setConfigDiagnostics] = useState<ConfigDiagnostics | null>(null)
   const [processorStatus, setProcessorStatus] = useState<ProcessorStatus | null>(null)
   const [activeClients, setActiveClients] = useState<ActiveClient[]>([])
   const [loading, setLoading] = useState(false)
@@ -71,6 +92,14 @@ export default function System() {
   const [providerLoading, setProviderLoading] = useState(false)
   const [providerMessage, setProviderMessage] = useState('')
 
+  // Miscellaneous settings state
+  const [miscSettings, setMiscSettings] = useState({
+    always_persist_enabled: false,
+    use_provider_segments: false
+  })
+  const [miscLoading, setMiscLoading] = useState(false)
+  const [miscMessage, setMiscMessage] = useState('')
+
   const { isAdmin } = useAuth()
 
   const loadSystemData = async () => {
@@ -80,10 +109,11 @@ export default function System() {
       setLoading(true)
       setError(null)
 
-      const [health, readiness, metrics, processor, clients] = await Promise.allSettled([
+      const [health, readiness, metrics, diagnostics, processor, clients] = await Promise.allSettled([
         systemApi.getHealth(),
         systemApi.getReadiness(),
         systemApi.getMetrics().catch(() => ({ data: null })), // Optional endpoint
+        systemApi.getConfigDiagnostics().catch(() => ({ data: null })), // Optional endpoint
         systemApi.getProcessorStatus().catch(() => ({ data: null })), // Optional endpoint
         systemApi.getActiveClients().catch(() => ({ data: [] })), // Optional endpoint
       ])
@@ -97,6 +127,9 @@ export default function System() {
       if (metrics.status === 'fulfilled' && metrics.value.data) {
         setMetricsData(metrics.value.data)
       }
+      if (diagnostics.status === 'fulfilled' && diagnostics.value.data) {
+        setConfigDiagnostics(diagnostics.value.data)
+      }
       if (processor.status === 'fulfilled' && processor.value.data) {
         setProcessorStatus(processor.value.data)
       }
@@ -142,6 +175,38 @@ export default function System() {
     }
   }
 
+  const loadMiscSettings = async () => {
+    try {
+      setMiscLoading(true)
+      const response = await systemApi.getMiscSettings()
+      if (response.data.status === 'success') {
+        setMiscSettings(response.data.settings)
+      }
+    } catch (err: any) {
+      console.error('Failed to load misc settings:', err)
+    } finally {
+      setMiscLoading(false)
+    }
+  }
+
+  const saveMiscSettings = async () => {
+    try {
+      setMiscLoading(true)
+      setMiscMessage('')
+      const response = await systemApi.saveMiscSettings(miscSettings)
+      if (response.data.status === 'success') {
+        setMiscMessage('Settings saved successfully')
+        setTimeout(() => setMiscMessage(''), 3000)
+      } else {
+        setMiscMessage('Failed to save settings')
+      }
+    } catch (err: any) {
+      setMiscMessage('Error: ' + (err.response?.data?.detail || err.message))
+    } finally {
+      setMiscLoading(false)
+    }
+  }
+
   const saveMemoryProvider = async () => {
     if (selectedProvider === currentProvider) {
       setProviderMessage('Provider is already set to ' + selectedProvider)
@@ -186,6 +251,7 @@ export default function System() {
     loadSystemData()
     loadDiarizationSettings()
     loadMemoryProvider()
+    loadMiscSettings()
   }, [isAdmin])
 
   const getStatusIcon = (healthy: boolean) => {
@@ -290,6 +356,104 @@ export default function System() {
         </div>
       )}
 
+      {/* Configuration Diagnostics */}
+      {configDiagnostics && (configDiagnostics.issues.length > 0 || configDiagnostics.warnings.length > 0 || configDiagnostics.info.length > 0) && (
+        <div className="bg-white dark:bg-gray-800 rounded-lg border border-gray-200 dark:border-gray-700 p-6 mb-6">
+          <div className="flex items-center justify-between mb-4">
+            <h3 className="text-lg font-semibold text-gray-900 dark:text-gray-100 flex items-center">
+              <AlertCircle className="h-5 w-5 mr-2 text-blue-600" />
+              Configuration Diagnostics
+            </h3>
+            <div className="flex items-center space-x-2">
+              {configDiagnostics.overall_status === 'healthy' && <CheckCircle className="h-5 w-5 text-green-500" />}
+              {configDiagnostics.overall_status === 'partial' && <AlertCircle className="h-5 w-5 text-yellow-500" />}
+              {configDiagnostics.overall_status === 'unhealthy' && <XCircle className="h-5 w-5 text-red-500" />}
+              <span className={`text-sm font-semibold ${getStatusColor(configDiagnostics.overall_status)}`}>
+                {configDiagnostics.overall_status.toUpperCase()}
+              </span>
+            </div>
+          </div>
+
+          <div className="space-y-3">
+            {/* Errors */}
+            {configDiagnostics.issues.map((issue, idx) => (
+              <div key={`error-${idx}`} className="bg-red-50 dark:bg-red-900/20 border border-red-200 dark:border-red-800 rounded-md p-3">
+                <div className="flex items-start space-x-2">
+                  <XCircle className="h-5 w-5 text-red-600 dark:text-red-400 flex-shrink-0 mt-0.5" />
+                  <div className="flex-1">
+                    <div className="flex items-center space-x-2 mb-1">
+                      <span className="text-xs font-semibold text-red-700 dark:text-red-300 uppercase">
+                        {issue.component}
+                      </span>
+                      <span className="text-xs px-2 py-0.5 bg-red-200 dark:bg-red-800 text-red-800 dark:text-red-200 rounded">
+                        ERROR
+                      </span>
+                    </div>
+                    <p className="text-sm text-red-700 dark:text-red-300 mb-1">
+                      {issue.message}
+                    </p>
+                    {issue.resolution && (
+                      <p className="text-xs text-red-600 dark:text-red-400">
+                        💡 {issue.resolution}
+                      </p>
+                    )}
+                  </div>
+                </div>
+              </div>
+            ))}
+
+            {/* Warnings */}
+            {configDiagnostics.warnings.map((warning, idx) => (
+              <div key={`warning-${idx}`} className="bg-yellow-50 dark:bg-yellow-900/20 border border-yellow-200 dark:border-yellow-800 rounded-md p-3">
+                <div className="flex items-start space-x-2">
+                  <AlertCircle className="h-5 w-5 text-yellow-600 dark:text-yellow-400 flex-shrink-0 mt-0.5" />
+                  <div className="flex-1">
+                    <div className="flex items-center space-x-2 mb-1">
+                      <span className="text-xs font-semibold text-yellow-700 dark:text-yellow-300 uppercase">
+                        {warning.component}
+                      </span>
+                      <span className="text-xs px-2 py-0.5 bg-yellow-200 dark:bg-yellow-800 text-yellow-800 dark:text-yellow-200 rounded">
+                        WARNING
+                      </span>
+                    </div>
+                    <p className="text-sm text-yellow-700 dark:text-yellow-300 mb-1">
+                      {warning.message}
+                    </p>
+                    {warning.resolution && (
+                      <p className="text-xs text-yellow-600 dark:text-yellow-400">
+                        💡 {warning.resolution}
+                      </p>
+                    )}
+                  </div>
+                </div>
+              </div>
+            ))}
+
+            {/* Info */}
+            {configDiagnostics.info.map((info, idx) => (
+              <div key={`info-${idx}`} className="bg-blue-50 dark:bg-blue-900/20 border border-blue-200 dark:border-blue-800 rounded-md p-3">
+                <div className="flex items-start space-x-2">
+                  <CheckCircle className="h-5 w-5 text-blue-600 dark:text-blue-400 flex-shrink-0 mt-0.5" />
+                  <div className="flex-1">
+                    <div className="flex items-center space-x-2 mb-1">
+                      <span className="text-xs font-semibold text-blue-700 dark:text-blue-300 uppercase">
+                        {info.component}
+                      </span>
+                      <span className="text-xs px-2 py-0.5 bg-blue-200 dark:bg-blue-800 text-blue-800 dark:text-blue-200 rounded">
+                        INFO
+                      </span>
+                    </div>
+                    <p className="text-sm text-blue-700 dark:text-blue-300">
+                      {info.message}
+                    </p>
+                  </div>
+                </div>
+              </div>
+            ))}
+          </div>
+        </div>
+      )}
+
       <div className="grid grid-cols-1 lg:grid-cols-2 gap-6">
         {/* Services Status */}
         {healthData?.services && (
@@ -681,6 +845,86 @@ export default function System() {
           </div>
         </div>
 
+        {/* Miscellaneous Configuration */}
+        <div className="bg-white dark:bg-gray-800 rounded-lg border border-gray-200 dark:border-gray-700 p-6">
+          <h3 className="text-lg font-semibold text-gray-900 dark:text-gray-100 mb-4 flex items-center">
+            <Sliders className="h-5 w-5 mr-2 text-blue-600" />
+            Miscellaneous Configuration
+          </h3>
+
+          <div className="space-y-4">
+            {/* Always Persist Audio Toggle */}
+            <div className="flex items-center justify-between p-3 bg-gray-50 dark:bg-gray-700 rounded-md">
+              <div className="flex-1">
+                <div className="font-medium text-gray-900 dark:text-gray-100">
+                  Always Persist Audio
+                </div>
+                <div className="text-sm text-gray-600 dark:text-gray-400">
+                  Create conversations for all audio sessions, even when no speech is detected
+                </div>
+              </div>
+              <label className="relative inline-flex items-center cursor-pointer ml-4">
+                <input
+                  type="checkbox"
+                  checked={miscSettings.always_persist_enabled}
+                  onChange={(e) => setMiscSettings(prev => ({
+                    ...prev,
+                    always_persist_enabled: e.target.checked
+                  }))}
+                  className="sr-only peer"
+                />
+                <div className="w-11 h-6 bg-gray-200 peer-focus:outline-none peer-focus:ring-4 peer-focus:ring-blue-300 dark:peer-focus:ring-blue-800 rounded-full peer dark:bg-gray-600 peer-checked:after:translate-x-full peer-checked:after:border-white after:content-[''] after:absolute after:top-[2px] after:left-[2px] after:bg-white after:border-gray-300 after:border after:rounded-full after:h-5 after:w-5 after:transition-all dark:border-gray-600 peer-checked:bg-blue-600"></div>
+              </label>
+            </div>
+
+            {/* Use Provider Segments Toggle */}
+            <div className="flex items-center justify-between p-3 bg-gray-50 dark:bg-gray-700 rounded-md">
+              <div className="flex-1">
+                <div className="font-medium text-gray-900 dark:text-gray-100">
+                  Use Provider Segments
+                </div>
+                <div className="text-sm text-gray-600 dark:text-gray-400">
+                  Use speech segments from transcription provider instead of speaker service diarization
+                </div>
+              </div>
+              <label className="relative inline-flex items-center cursor-pointer ml-4">
+                <input
+                  type="checkbox"
+                  checked={miscSettings.use_provider_segments}
+                  onChange={(e) => setMiscSettings(prev => ({
+                    ...prev,
+                    use_provider_segments: e.target.checked
+                  }))}
+                  className="sr-only peer"
+                />
+                <div className="w-11 h-6 bg-gray-200 peer-focus:outline-none peer-focus:ring-4 peer-focus:ring-blue-300 dark:peer-focus:ring-blue-800 rounded-full peer dark:bg-gray-600 peer-checked:after:translate-x-full peer-checked:after:border-white after:content-[''] after:absolute after:top-[2px] after:left-[2px] after:bg-white after:border-gray-300 after:border after:rounded-full after:h-5 after:w-5 after:transition-all dark:border-gray-600 peer-checked:bg-blue-600"></div>
+              </label>
+            </div>
+
+            {/* Status Message */}
+            {miscMessage && (
+              <div className={`p-2 rounded-md text-sm ${
+                miscMessage.includes('Error') || miscMessage.includes('Failed')
+                  ? 'bg-red-50 dark:bg-red-900/20 text-red-700 dark:text-red-300'
+                  : 'bg-green-50 dark:bg-green-900/20 text-green-700 dark:text-green-300'
+              }`}>
+                {miscMessage}
+              </div>
+            )}
+
+            {/* Save Button */}
+            <div className="pt-4 border-t border-gray-200 dark:border-gray-600">
+              <button
+                onClick={saveMiscSettings}
+                disabled={miscLoading}
+                className="w-full px-4 py-2 bg-blue-600 text-white rounded-lg hover:bg-blue-700 transition-colors disabled:opacity-50 disabled:cursor-not-allowed"
+              >
+                {miscLoading ? 'Saving...' : 'Save Miscellaneous Settings'}
+              </button>
+            </div>
+          </div>
+        </div>
+
         {/* Speaker Configuration */}
         <SpeakerConfiguration />
 
diff --git a/backends/advanced/webui/src/pages/Upload.tsx b/backends/advanced/webui/src/pages/Upload.tsx
index abc54b29..6c22f4e7 100644
--- a/backends/advanced/webui/src/pages/Upload.tsx
+++ b/backends/advanced/webui/src/pages/Upload.tsx
@@ -49,7 +49,6 @@ export default function Upload() {
       await uploadApi.uploadFromGDriveFolder({
         gdrive_folder_id: gdriveFolderId,
         device_name: 'upload',
-        auto_generate_client: true,
       })
 
       setGdriveUploadStatus({
@@ -194,9 +193,9 @@ export default function Upload() {
         try {
           const res = await obsidianApi.status(obsidianJobId)
           setObsidianStatus(res.data)
-          if (res.data.status === 'completed' || res.data.status === 'failed') {
+          if (res.data.status === 'finished' || res.data.status === 'failed') {
             setObsidianPolling(false)
-            setObsidianMessage(res.data.status === 'completed' ? '✅ Ingestion completed' : `❌ Failed: ${res.data.error || 'unknown error'}`)
+            setObsidianMessage(res.data.status === 'finished' ? '✅ Ingestion completed' : `❌ Failed: ${res.data.error || 'unknown error'}`)
           }
         } catch (err) {
           setObsidianPolling(false)
diff --git a/backends/advanced/webui/src/services/api.ts b/backends/advanced/webui/src/services/api.ts
index 35964fc2..d8486c6a 100644
--- a/backends/advanced/webui/src/services/api.ts
+++ b/backends/advanced/webui/src/services/api.ts
@@ -107,15 +107,30 @@ export const authApi = {
 }
 
 export const conversationsApi = {
-  getAll: () => api.get('/api/conversations'),
+  getAll: (includeDeleted?: boolean) => api.get('/api/conversations', {
+    params: includeDeleted !== undefined ? { include_deleted: includeDeleted } : {}
+  }),
   getById: (id: string) => api.get(`/api/conversations/${id}`),
   delete: (id: string) => api.delete(`/api/conversations/${id}`),
+  restore: (id: string) => api.post(`/api/conversations/${id}/restore`),
+  permanentDelete: (id: string) => api.delete(`/api/conversations/${id}`, {
+    params: { permanent: true }
+  }),
 
   // Reprocessing endpoints
   reprocessTranscript: (conversationId: string) => api.post(`/api/conversations/${conversationId}/reprocess-transcript`),
   reprocessMemory: (conversationId: string, transcriptVersionId: string = 'active') => api.post(`/api/conversations/${conversationId}/reprocess-memory`, null, {
     params: { transcript_version_id: transcriptVersionId }
   }),
+  reprocessSpeakers: (
+    conversationId: string,
+    transcriptVersionId: string = 'active'
+  ) =>
+    api.post(`/api/conversations/${conversationId}/reprocess-speakers`, null, {
+      params: {
+        transcript_version_id: transcriptVersionId
+      }
+    }),
 
   // Version management
   activateTranscriptVersion: (conversationId: string, versionId: string) => api.post(`/api/conversations/${conversationId}/activate-transcript/${versionId}`),
@@ -126,7 +141,6 @@ export const conversationsApi = {
 export const memoriesApi = {
   getAll: (userId?: string) => api.get('/api/memories', { params: userId ? { user_id: userId } : {} }),
   getById: (id: string, userId?: string) => api.get(`/api/memories/${id}`, { params: userId ? { user_id: userId } : {} }),
-  getUnfiltered: (userId?: string) => api.get('/api/memories/unfiltered', { params: userId ? { user_id: userId } : {} }),
   search: (query: string, userId?: string, limit: number = 20, scoreThreshold?: number) =>
     api.get('/api/memories/search', {
       params: {
@@ -140,6 +154,67 @@ export const memoriesApi = {
   deleteAll: () => api.delete('/api/admin/memory/delete-all'),
 }
 
+export const annotationsApi = {
+  // Create annotations
+  createMemoryAnnotation: (data: {
+    memory_id: string
+    original_text: string
+    corrected_text: string
+  }) => api.post('/api/annotations/memory', data),
+
+  createTranscriptAnnotation: (data: {
+    conversation_id: string
+    segment_index: number
+    original_text: string
+    corrected_text: string
+  }) => api.post('/api/annotations/transcript', data),
+
+  // Retrieve annotations
+  getMemoryAnnotations: (memory_id: string) =>
+    api.get(`/api/annotations/memory/${memory_id}`),
+
+  getTranscriptAnnotations: (conversation_id: string) =>
+    api.get(`/api/annotations/transcript/${conversation_id}`),
+
+  // Handle suggestions
+  acceptSuggestion: (annotation_id: string) =>
+    api.patch(`/api/annotations/${annotation_id}/status`, { status: 'accepted' }),
+
+  rejectSuggestion: (annotation_id: string) =>
+    api.patch(`/api/annotations/${annotation_id}/status`, { status: 'rejected' }),
+
+  // Diarization annotations
+  createDiarizationAnnotation: (data: {
+    conversation_id: string
+    segment_index: number
+    original_speaker: string
+    corrected_speaker: string
+    segment_start_time?: number
+  }) => api.post('/api/annotations/diarization', data),
+
+  getDiarizationAnnotations: (conversation_id: string) =>
+    api.get(`/api/annotations/diarization/${conversation_id}`),
+
+  // Apply diarization annotations (creates new version)
+  applyDiarizationAnnotations: (conversation_id: string) =>
+    api.post(`/api/annotations/diarization/${conversation_id}/apply`),
+
+  // Apply ALL pending annotations (diarization + transcript) - creates single new version
+  applyAllAnnotations: (conversation_id: string) =>
+    api.post(`/api/annotations/${conversation_id}/apply`),
+}
+
+export const finetuningApi = {
+  // Process annotations for training
+  processAnnotations: (annotationType: string = 'diarization') =>
+    api.post('/api/finetuning/process-annotations', null, {
+      params: { annotation_type: annotationType }
+    }),
+
+  // Get fine-tuning status
+  getStatus: () => api.get('/api/finetuning/status'),
+}
+
 export const usersApi = {
   getAll: () => api.get('/api/users'),
   create: (userData: any) => api.post('/api/users', userData),
@@ -151,11 +226,17 @@ export const systemApi = {
   getHealth: () => api.get('/health'),
   getReadiness: () => api.get('/readiness'),
   getMetrics: () => api.get('/api/metrics'),
+  getConfigDiagnostics: () => api.get('/api/config/diagnostics'),
   getProcessorStatus: () => api.get('/api/processor/status'),
   getProcessorTasks: () => api.get('/api/processor/tasks'),
   getActiveClients: () => api.get('/api/clients/active'),
   getDiarizationSettings: () => api.get('/api/diarization-settings'),
   saveDiarizationSettings: (settings: any) => api.post('/api/diarization-settings', settings),
+
+  // Miscellaneous Configuration Settings
+  getMiscSettings: () => api.get('/api/misc-settings'),
+  saveMiscSettings: (settings: { always_persist_enabled?: boolean; use_provider_segments?: boolean }) =>
+    api.post('/api/misc-settings', settings),
   
   // Memory Configuration Management
   getMemoryConfigRaw: () => api.get('/api/admin/memory/config/raw'),
@@ -180,6 +261,38 @@ export const systemApi = {
       headers: { 'Content-Type': 'text/plain' }
     }),
 
+  // Plugin Configuration Management (YAML-based)
+  getPluginsConfigRaw: () => api.get('/api/admin/plugins/config'),
+  updatePluginsConfigRaw: (configYaml: string) =>
+    api.post('/api/admin/plugins/config', configYaml, {
+      headers: { 'Content-Type': 'text/plain' }
+    }),
+  validatePluginsConfig: (configYaml: string) =>
+    api.post('/api/admin/plugins/config/validate', configYaml, {
+      headers: { 'Content-Type': 'text/plain' }
+    }),
+
+  // Plugin Configuration Management (Structured/Form-based)
+  getPluginsMetadata: () => api.get('/api/admin/plugins/metadata'),
+  updatePluginConfigStructured: (pluginId: string, config: {
+    orchestration?: {
+      enabled: boolean
+      events: string[]
+      condition: { type: string; wake_words?: string[] }
+    }
+    settings?: Record<string, any>
+    env_vars?: Record<string, string>
+  }) => api.post(`/api/admin/plugins/config/structured/${pluginId}`, config),
+  testPluginConnection: (pluginId: string, config: {
+    orchestration?: {
+      enabled: boolean
+      events: string[]
+      condition: { type: string; wake_words?: string[] }
+    }
+    settings?: Record<string, any>
+    env_vars?: Record<string, string>
+  }) => api.post(`/api/admin/plugins/test-connection/${pluginId}`, config),
+
   // Memory Provider Management
   getMemoryProvider: () => api.get('/api/admin/memory/provider'),
   setMemoryProvider: (provider: string) => api.post('/api/admin/memory/provider', { provider }),
@@ -227,12 +340,11 @@ export const uploadApi = {
       },
     }),
 
-  uploadFromGDriveFolder: (payload: { gdrive_folder_id: string; device_name?: string; auto_generate_client?: boolean }) =>
+  uploadFromGDriveFolder: (payload: { gdrive_folder_id: string; device_name?: string }) =>
     api.post('/api/audio/upload_audio_from_gdrive', null, {
       params: {
         gdrive_folder_id: payload.gdrive_folder_id,
         device_name: payload.device_name,
-        auto_generate_client: payload.auto_generate_client,
       },
       timeout: 300000,
     }),
diff --git a/backends/advanced/worker_orchestrator.py b/backends/advanced/worker_orchestrator.py
new file mode 100755
index 00000000..fb4b6a10
--- /dev/null
+++ b/backends/advanced/worker_orchestrator.py
@@ -0,0 +1,245 @@
+#!/usr/bin/env python3
+"""
+Worker Orchestrator
+
+Main entrypoint for Chronicle worker orchestration system.
+Replaces start-workers.sh bash script with Python-based orchestration.
+
+Usage:
+    python worker_orchestrator.py
+    # Or via Docker: docker compose up workers
+
+Environment Variables:
+    REDIS_URL                    Redis connection URL (default: redis://localhost:6379/0)
+    WORKER_CHECK_INTERVAL        Health check interval in seconds (default: 10)
+    MIN_RQ_WORKERS               Minimum expected RQ workers (default: 6)
+    WORKER_STARTUP_GRACE_PERIOD  Grace period before health checks (default: 30)
+    WORKER_SHUTDOWN_TIMEOUT      Max wait for graceful shutdown (default: 30)
+    LOG_LEVEL                    Logging level (default: INFO)
+"""
+
+import asyncio
+import logging
+import os
+import signal
+import socket
+import sys
+from typing import Optional
+
+from redis import Redis
+from rq import Worker
+
+# Import orchestrator components
+from src.advanced_omi_backend.workers.orchestrator import (
+    HealthMonitor,
+    OrchestratorConfig,
+    ProcessManager,
+    build_worker_definitions,
+)
+
+# Configure logging
+LOG_LEVEL = os.getenv("LOG_LEVEL", "INFO")
+logging.basicConfig(
+    level=LOG_LEVEL,
+    format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
+    stream=sys.stdout,
+)
+
+logger = logging.getLogger(__name__)
+
+
+class WorkerOrchestrator:
+    """
+    Main orchestrator that coordinates all components.
+
+    Handles:
+    - Startup sequence (Redis cleanup, worker startup)
+    - Signal handling (SIGTERM, SIGINT)
+    - Health monitoring
+    - Graceful shutdown
+    """
+
+    def __init__(self):
+        self.config: Optional[OrchestratorConfig] = None
+        self.redis: Optional[Redis] = None
+        self.process_manager: Optional[ProcessManager] = None
+        self.health_monitor: Optional[HealthMonitor] = None
+        self.shutdown_event = asyncio.Event()
+
+    async def startup(self):
+        """
+        Startup sequence.
+
+        1. Load configuration
+        2. Connect to Redis
+        3. Clean up stale worker registrations
+        4. Build worker definitions
+        5. Start all workers
+        6. Setup signal handlers
+        7. Start health monitor
+        """
+        logger.info("🚀 Starting Chronicle Worker Orchestrator...")
+
+        # 1. Load configuration
+        logger.info("Loading configuration...")
+        self.config = OrchestratorConfig()
+        logger.info(f"Redis URL: {self.config.redis_url}")
+        logger.info(f"Check interval: {self.config.check_interval}s")
+        logger.info(f"Min RQ workers: {self.config.min_rq_workers}")
+        logger.info(f"Startup grace period: {self.config.startup_grace_period}s")
+
+        # 2. Connect to Redis
+        logger.info("Connecting to Redis...")
+        self.redis = Redis.from_url(self.config.redis_url)
+        try:
+            self.redis.ping()
+            logger.info("✅ Redis connection successful")
+        except Exception as e:
+            logger.error(f"❌ Failed to connect to Redis: {e}")
+            raise
+
+        # 3. Clean up stale worker registrations
+        logger.info("🧹 Cleaning up stale worker registrations from Redis...")
+        cleaned_count = self._cleanup_stale_workers()
+        if cleaned_count > 0:
+            logger.info(f"Cleaned up {cleaned_count} stale workers")
+        else:
+            logger.info("No stale workers to clean")
+
+        # 4. Build worker definitions
+        logger.info("Building worker definitions...")
+        worker_definitions = build_worker_definitions()
+        logger.info(f"Total enabled workers: {len(worker_definitions)}")
+
+        # 5. Create process manager and start all workers
+        logger.info("Starting all workers...")
+        self.process_manager = ProcessManager(worker_definitions)
+        success = self.process_manager.start_all()
+
+        if not success:
+            logger.error("❌ Some workers failed to start")
+            raise RuntimeError("Worker startup failed")
+
+        # Log worker status
+        logger.info("✅ All workers started:")
+        for worker in self.process_manager.get_all_workers():
+            logger.info(
+                f"  - {worker.name}: PID {worker.pid} "
+                f"(queues: {', '.join(worker.definition.queues) if worker.definition.queues else 'stream consumer'})"
+            )
+
+        # 6. Setup signal handlers
+        loop = asyncio.get_running_loop()
+        for sig in (signal.SIGTERM, signal.SIGINT):
+            loop.add_signal_handler(sig, lambda s=sig: asyncio.create_task(self._signal_handler(s)))
+
+        logger.info("✅ Signal handlers configured (SIGTERM, SIGINT)")
+
+        # 7. Start health monitor
+        logger.info("Starting health monitor...")
+        self.health_monitor = HealthMonitor(
+            self.process_manager, self.config, self.redis
+        )
+        await self.health_monitor.start()
+        logger.info("✅ Health monitor started")
+
+        logger.info("⏳ Workers running - health monitor will auto-restart failed workers")
+
+    def _cleanup_stale_workers(self) -> int:
+        """
+        Clean up stale worker registrations from Redis.
+
+        This replicates the bash script's logic:
+        - Only clean up workers from THIS hostname (pod-aware)
+        - Use RQ's register_death() to properly clean up
+
+        Returns:
+            Number of workers cleaned up
+        """
+        try:
+            hostname = socket.gethostname()
+            workers = Worker.all(connection=self.redis)
+            cleaned = 0
+
+            for worker in workers:
+                if worker.hostname == hostname:
+                    worker.register_death()
+                    cleaned += 1
+
+            return cleaned
+
+        except Exception as e:
+            logger.warning(f"Failed to clean up stale workers: {e}")
+            return 0
+
+    async def _signal_handler(self, sig: signal.Signals):
+        """Handle shutdown signals"""
+        logger.info(f"Received signal: {sig.name}")
+        self.shutdown_event.set()
+
+    async def shutdown(self):
+        """
+        Graceful shutdown sequence.
+
+        1. Stop health monitor
+        2. Stop all workers
+        3. Close Redis connection
+        """
+        logger.info("🛑 Initiating graceful shutdown...")
+
+        # 1. Stop health monitor
+        if self.health_monitor:
+            await self.health_monitor.stop()
+
+        # 2. Stop all workers
+        if self.process_manager:
+            logger.info("Stopping all workers...")
+            self.process_manager.stop_all(timeout=self.config.shutdown_timeout)
+
+        # 3. Close Redis connection
+        if self.redis:
+            logger.info("Closing Redis connection...")
+            self.redis.close()
+
+        logger.info("✅ All workers stopped gracefully")
+
+    async def run(self):
+        """Main run loop - wait for shutdown signal"""
+        try:
+            # Perform startup
+            await self.startup()
+
+            # Wait for shutdown signal
+            await self.shutdown_event.wait()
+
+        except Exception as e:
+            logger.exception(f"❌ Orchestrator error: {e}")
+            raise
+        finally:
+            # Always perform shutdown
+            await self.shutdown()
+
+
+async def main():
+    """Main entrypoint"""
+    orchestrator = WorkerOrchestrator()
+
+    try:
+        await orchestrator.run()
+        sys.exit(0)
+
+    except KeyboardInterrupt:
+        logger.info("Interrupted by user")
+        sys.exit(0)
+
+    except Exception as e:
+        logger.exception(f"Fatal error: {e}")
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    # Ensure unbuffered output for Docker logs
+    os.environ["PYTHONUNBUFFERED"] = "1"
+
+    # Run the orchestrator
+    asyncio.run(main())
diff --git a/backends/charts/advanced-backend/ingress-values.yaml b/backends/charts/advanced-backend/ingress-values.yaml
index b20786cd..4f47ca43 100644
--- a/backends/charts/advanced-backend/ingress-values.yaml
+++ b/backends/charts/advanced-backend/ingress-values.yaml
@@ -7,10 +7,6 @@ ingress:
       pathType: Prefix
     - path: /users
       pathType: Prefix
-    - path: /ws_pcm
-      pathType: Prefix
-    - path: /ws_omi
-      pathType: Prefix
     - path: /ws
       pathType: Prefix
     - path: /health
diff --git a/backends/charts/advanced-backend/templates/deployment.yaml b/backends/charts/advanced-backend/templates/deployment.yaml
index 0e40a7fb..2eb3425d 100644
--- a/backends/charts/advanced-backend/templates/deployment.yaml
+++ b/backends/charts/advanced-backend/templates/deployment.yaml
@@ -67,7 +67,7 @@ spec:
         - name: {{ .Chart.Name }}-workers
           image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}"
           imagePullPolicy: {{ .Values.image.pullPolicy }}
-          command: ["./start-workers.sh"]
+          command: ["uv", "run", "python", "worker_orchestrator.py"]
           envFrom:
             - configMapRef:
                 name: chronicle-config
diff --git a/backends/charts/advanced-backend/templates/workers-deployment.yaml b/backends/charts/advanced-backend/templates/workers-deployment.yaml
index 22751d31..48add12a 100644
--- a/backends/charts/advanced-backend/templates/workers-deployment.yaml
+++ b/backends/charts/advanced-backend/templates/workers-deployment.yaml
@@ -21,7 +21,7 @@ spec:
         - name: {{ .Chart.Name }}-workers
           image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}"
           imagePullPolicy: {{ .Values.image.pullPolicy }}
-          command: ["./start-workers.sh"]
+          command: ["uv", "run", "python", "worker_orchestrator.py"]
           envFrom:
             - configMapRef:
                 name: chronicle-config
diff --git a/config.env.template b/config.env.template
index 3312dfae..bc7d0ca4 100644
--- a/config.env.template
+++ b/config.env.template
@@ -65,16 +65,12 @@ OPENAI_API_KEY = sk-xxxxx
 # SPEECH-TO-TEXT CONFIGURATION
 # ========================================
 
-# Primary transcription provider: deepgram, mistral, or parakeet
+# Primary transcription provider: deepgram or parakeet
 TRANSCRIPTION_PROVIDER = deepgram
 
 # Deepgram configuration
 DEEPGRAM_API_KEY = 90xxxxxx
 
-# Mistral configuration (when TRANSCRIPTION_PROVIDER=mistral)
-MISTRAL_API_KEY = 
-MISTRAL_MODEL = voxtral-mini-2507
-
 # Parakeet ASR configuration (when TRANSCRIPTION_PROVIDER=parakeet)
 PARAKEET_ASR_URL = http://host.docker.internal:8767
 
diff --git a/config/README.md b/config/README.md
index e4f3cf36..44ec2330 100644
--- a/config/README.md
+++ b/config/README.md
@@ -101,6 +101,50 @@ The setup wizard automatically backs up `config.yml` before making changes:
 - Backups: `config.yml.backup.YYYYMMDD_HHMMSS`
 - These are gitignored automatically
 
+## ⚠️ Security: Never Hardcode Secrets in YAML Files
+
+### The Three-File Architecture
+
+Chronicle separates configuration for security:
+
+```
+config/plugins.yml           ← Orchestration (enabled, events)
+                             ← Env var references: ${SMTP_PASSWORD}
+                             ← Safe to commit ✅
+
+backends/advanced/.env       ← Actual secrets
+                             ← SMTP_PASSWORD=abc123
+                             ← Gitignored, never committed ✅
+
+plugins/{id}/config.yml      ← Plugin defaults
+                             ← Non-secret settings
+                             ← Can also use ${ENV_VAR} ✅
+```
+
+### ❌ WRONG:
+```yaml
+# config/plugins.yml
+smtp_password: xnetcqctkkfgzllh  # ❌ NEVER DO THIS!
+```
+
+### ✅ CORRECT:
+```yaml
+# config/plugins.yml
+smtp_password: ${SMTP_PASSWORD}  # ✅ Reference to .env
+```
+
+```bash
+# backends/advanced/.env
+SMTP_PASSWORD=abcdefghijklmnop  # ✅ Actual secret here
+```
+
+### Manual Review
+
+Before committing `config/plugins.yml`, manually verify:
+- No hardcoded passwords or API keys
+- All secrets use `${ENV_VAR}` syntax
+- Only orchestration settings (enabled, events, condition) are present
+
 ## Documentation
 
 For detailed configuration guides, see:
diff --git a/config/config.yml.template b/config/config.yml.template
index 3670a6bb..dcade594 100644
--- a/config/config.yml.template
+++ b/config/config.yml.template
@@ -35,7 +35,7 @@ models:
   api_family: openai
   model_name: llama3.1:latest
   model_url: http://localhost:11434/v1
-  api_key: ${OPENAI_API_KEY:-ollama}
+  api_key: ${oc.env:OPENAI_API_KEY,ollama}
   model_params:
     temperature: 0.2
     max_tokens: 2000
@@ -47,7 +47,7 @@ models:
   api_family: openai
   model_name: nomic-embed-text:latest
   model_url: http://localhost:11434/v1
-  api_key: ${OPENAI_API_KEY:-ollama}
+  api_key: ${oc.env:OPENAI_API_KEY,ollama}
   embedding_dimensions: 768
   model_output: vector
 - name: openai-llm
@@ -57,7 +57,7 @@ models:
   api_family: openai
   model_name: gpt-4o-mini
   model_url: https://api.openai.com/v1
-  api_key: ${OPENAI_API_KEY:-}
+  api_key: ${oc.env:OPENAI_API_KEY,''}
   model_params:
     temperature: 0.2
     max_tokens: 2000
@@ -69,7 +69,7 @@ models:
   api_family: openai
   model_name: text-embedding-3-small
   model_url: https://api.openai.com/v1
-  api_key: ${OPENAI_API_KEY:-}
+  api_key: ${oc.env:OPENAI_API_KEY,''}
   embedding_dimensions: 1536
   model_output: vector
 - name: groq-llm
@@ -79,7 +79,7 @@ models:
   api_family: openai
   model_name: llama-3.1-70b-versatile
   model_url: https://api.groq.com/openai/v1
-  api_key: ${GROQ_API_KEY:-}
+  api_key: ${oc.env:GROQ_API_KEY,''}
   model_params:
     temperature: 0.2
     max_tokens: 2000
@@ -89,17 +89,17 @@ models:
   model_type: vector_store
   model_provider: qdrant
   api_family: qdrant
-  model_url: http://${QDRANT_BASE_URL:-qdrant}:${QDRANT_PORT:-6333}
+  model_url: http://${oc.env:QDRANT_BASE_URL,qdrant}:${oc.env:QDRANT_PORT,6333}
   model_params:
-    host: ${QDRANT_BASE_URL:-qdrant}
-    port: ${QDRANT_PORT:-6333}
+    host: ${oc.env:QDRANT_BASE_URL,qdrant}
+    port: ${oc.env:QDRANT_PORT,6333}
     collection_name: omi_memories
 - name: stt-parakeet-batch
   description: Parakeet NeMo ASR (batch)
   model_type: stt
   model_provider: parakeet
   api_family: http
-  model_url: http://${PARAKEET_ASR_URL:-172.17.0.1:8767}
+  model_url: http://${oc.env:PARAKEET_ASR_URL,172.17.0.1:8767}
   api_key: ''
   operations:
     stt_transcribe:
@@ -118,13 +118,13 @@ models:
   model_provider: deepgram
   api_family: http
   model_url: https://api.deepgram.com/v1
-  api_key: ${DEEPGRAM_API_KEY:-}
+  api_key: ${oc.env:DEEPGRAM_API_KEY,''}
   operations:
     stt_transcribe:
       method: POST
       path: /listen
       headers:
-        Authorization: Token ${DEEPGRAM_API_KEY:-}
+        Authorization: Token ${oc.env:DEEPGRAM_API_KEY,''}
         Content-Type: audio/raw
       query:
         model: nova-3
@@ -217,3 +217,7 @@ speaker_recognition:
   service_url: null
   # Request timeout in seconds
   timeout: 60
+  # Diarization chunking configuration (speaker service self-managed chunking)
+  max_diarize_duration: 60  # Maximum audio duration (seconds) for single PyAnnote call
+  diarize_chunk_overlap: 5.0  # Overlap (seconds) between chunks
+  backend_api_url: http://host.docker.internal:8000  # Backend API URL
diff --git a/config/defaults.yml b/config/defaults.yml
new file mode 100644
index 00000000..83a9c405
--- /dev/null
+++ b/config/defaults.yml
@@ -0,0 +1,359 @@
+# Chronicle Default Configuration
+# This file provides sensible defaults for all configuration options.
+# User overrides in config.yml take precedence over these defaults.
+
+defaults:
+  llm: openai-llm
+  embedding: openai-embed
+  stt: stt-deepgram
+  stt_stream: stt-deepgram-stream
+  tts: tts-http
+  vector_store: vs-qdrant
+
+models:
+  # ===========================
+  # LLM Models
+  # ===========================
+  - name: openai-llm
+    description: OpenAI GPT-4o-mini
+    model_type: llm
+    model_provider: openai
+    api_family: openai
+    model_name: gpt-4o-mini
+    model_url: https://api.openai.com/v1
+    api_key: ${oc.env:OPENAI_API_KEY,''}
+    model_params:
+      temperature: 0.2
+      max_tokens: 2000
+    model_output: json
+
+  - name: local-llm
+    description: Local Ollama LLM
+    model_type: llm
+    model_provider: ollama
+    api_family: openai
+    model_name: llama3.1:latest
+    model_url: http://localhost:11434/v1
+    api_key: ${oc.env:OPENAI_API_KEY,ollama}
+    model_params:
+      temperature: 0.2
+      max_tokens: 2000
+    model_output: json
+
+  - name: groq-llm
+    description: Groq LLM via OpenAI-compatible API
+    model_type: llm
+    model_provider: groq
+    api_family: openai
+    model_name: llama-3.1-70b-versatile
+    model_url: https://api.groq.com/openai/v1
+    api_key: ${oc.env:GROQ_API_KEY,''}
+    model_params:
+      temperature: 0.2
+      max_tokens: 2000
+    model_output: json
+
+  # ===========================
+  # Embedding Models
+  # ===========================
+  - name: openai-embed
+    description: OpenAI text-embedding-3-small
+    model_type: embedding
+    model_provider: openai
+    api_family: openai
+    model_name: text-embedding-3-small
+    model_url: https://api.openai.com/v1
+    api_key: ${oc.env:OPENAI_API_KEY,''}
+    embedding_dimensions: 1536
+    model_output: vector
+
+  - name: local-embed
+    description: Local embeddings via Ollama nomic-embed-text
+    model_type: embedding
+    model_provider: ollama
+    api_family: openai
+    model_name: nomic-embed-text:latest
+    model_url: http://localhost:11434/v1
+    api_key: ${oc.env:OPENAI_API_KEY,ollama}
+    embedding_dimensions: 768
+    model_output: vector
+
+  # ===========================
+  # Speech-to-Text Models
+  # ===========================
+  - name: stt-deepgram
+    description: Deepgram Nova 3 (batch)
+    model_type: stt
+    model_provider: deepgram
+    api_family: http
+    model_url: https://api.deepgram.com/v1
+    api_key: ${oc.env:DEEPGRAM_API_KEY,''}
+    operations:
+      stt_transcribe:
+        method: POST
+        path: /listen
+        headers:
+          Authorization: Token ${oc.env:DEEPGRAM_API_KEY,''}
+          Content-Type: audio/raw
+        query:
+          model: nova-3
+          language: multi
+          smart_format: 'true'
+          punctuate: 'true'
+          diarize: 'true'
+          encoding: linear16
+          sample_rate: 16000
+          channels: '1'
+        response:
+          type: json
+          extract:
+            text: results.channels[0].alternatives[0].transcript
+            words: results.channels[0].alternatives[0].words
+            segments: results.channels[0].alternatives[0].paragraphs.paragraphs
+
+  - name: stt-parakeet-batch
+    description: Parakeet NeMo ASR (batch)
+    model_type: stt
+    model_provider: parakeet
+    api_family: http
+    model_url: http://${oc.env:PARAKEET_ASR_URL,172.17.0.1:8767}
+    api_key: ''
+    operations:
+      stt_transcribe:
+        method: POST
+        path: /transcribe
+        content_type: multipart/form-data
+        response:
+          type: json
+          extract:
+            text: text
+            words: words
+            segments: segments
+
+  # ===========================
+  # Text-to-Speech Models
+  # ===========================
+  - name: tts-http
+    description: Generic JSON TTS endpoint
+    model_type: tts
+    model_provider: custom
+    api_family: http
+    model_url: http://localhost:9000
+    operations:
+      tts_synthesize:
+        method: POST
+        path: /synthesize
+        headers:
+          Content-Type: application/json
+        response:
+          type: json
+
+  # ===========================
+  # Streaming STT Models
+  # ===========================
+  - name: stt-deepgram-stream
+    description: Deepgram Nova 3 streaming transcription over WebSocket
+    model_type: stt_stream
+    model_provider: deepgram
+    api_family: websocket
+    model_url: wss://api.deepgram.com/v1/listen
+    api_key: ${oc.env:DEEPGRAM_API_KEY,''}
+    operations:
+      query:
+        model: nova-3
+        language: multi
+        smart_format: 'true'
+        punctuate: 'true'
+        encoding: linear16
+        sample_rate: 16000
+        channels: '1'
+      end:
+        message:
+          type: CloseStream
+      expect:
+        interim_type: Results
+        final_type: Results
+        extract:
+          text: channel.alternatives[0].transcript
+          words: channel.alternatives[0].words
+          segments: channel.alternatives[0].paragraphs.paragraphs
+
+  - name: stt-parakeet-stream
+    description: Parakeet streaming transcription over WebSocket
+    model_type: stt_stream
+    model_provider: parakeet
+    api_family: websocket
+    model_url: ws://localhost:9001/stream
+    operations:
+      start:
+        message:
+          type: transcribe
+          config:
+            vad_enabled: true
+            vad_silence_ms: 1000
+            time_interval_seconds: 30
+            return_interim_results: true
+            min_audio_seconds: 0.5
+      chunk_header:
+        message:
+          type: audio_chunk
+          rate: 16000
+          width: 2
+          channels: 1
+      end:
+        message:
+          type: stop
+      expect:
+        interim_type: interim_result
+        final_type: final_result
+        extract:
+          text: text
+          words: words
+          segments: segments
+
+  # ===========================
+  # Vector Store
+  # ===========================
+  - name: vs-qdrant
+    description: Qdrant vector database
+    model_type: vector_store
+    model_provider: qdrant
+    api_family: qdrant
+    model_url: http://${oc.env:QDRANT_BASE_URL,qdrant}:${oc.env:QDRANT_PORT,6333}
+    model_params:
+      host: ${oc.env:QDRANT_BASE_URL,qdrant}
+      port: ${oc.env:QDRANT_PORT,6333}
+      collection_name: omi_memories
+
+# ===========================
+# Memory Configuration
+# ===========================
+memory:
+  provider: chronicle
+  timeout_seconds: 1200
+  extraction:
+    enabled: true
+    prompt: |
+      Extract important information from this conversation and return a JSON object with an array named "facts".
+      Include personal preferences, plans, names, dates, locations, numbers, and key details.
+      Keep items concise and useful.
+
+  # OpenMemory MCP provider settings (used when provider: openmemory_mcp)
+  openmemory_mcp:
+    server_url: http://localhost:8765
+    client_name: chronicle
+    user_id: default
+    timeout: 30
+
+  # Mycelia provider settings (used when provider: mycelia)
+  mycelia:
+    api_url: http://localhost:5173
+    timeout: 30
+
+  # Obsidian Neo4j provider settings (legacy)
+  obsidian:
+    enabled: false
+    neo4j_host: neo4j-mem0
+    timeout: 30
+
+# ===========================
+# Speaker Recognition
+# ===========================
+speaker_recognition:
+  # Enable/disable speaker recognition (overrides DISABLE_SPEAKER_RECOGNITION env var)
+  enabled: true
+  # Service URL (defaults to SPEAKER_SERVICE_URL env var if not specified)
+  service_url: null
+  # Request timeout in seconds
+  timeout: 60
+
+  # Hugging Face token for PyAnnote models (secret loaded from .env)
+  hf_token: ${oc.env:HF_TOKEN,''}
+
+  # Speaker identification threshold
+  similarity_threshold: 0.15
+
+  # Diarization chunking configuration (speaker service self-managed chunking)
+  # Maximum audio duration (seconds) for single PyAnnote call
+  # Files longer than this will be chunked automatically by the speaker service
+  max_diarize_duration: 60
+  # Overlap (seconds) between chunks for speaker continuity
+  diarize_chunk_overlap: 5.0
+  # Backend API URL for fetching audio segments (used by speaker service)
+  backend_api_url: http://host.docker.internal:8000
+
+  # Optional: Deepgram API key for wrapper service
+  deepgram_api_key: ${oc.env:DEEPGRAM_API_KEY,''}
+
+# ===========================
+# Chat Configuration
+# ===========================
+chat:
+  system_prompt: |
+    You are a helpful AI assistant with access to the user's conversation history and memories.
+    Provide clear, concise, and accurate responses based on the context available to you.
+
+# ===========================
+# Backend Configuration
+# ===========================
+backend:
+  # Authentication settings (secrets loaded from .env)
+  auth:
+    secret_key: ${oc.env:AUTH_SECRET_KEY,''}
+    admin_email: ${oc.env:ADMIN_EMAIL,''}
+    admin_password: ${oc.env:ADMIN_PASSWORD,''}
+
+  # LLM provider configuration
+  llm:
+    provider: openai  # or ollama
+    api_key: ${oc.env:OPENAI_API_KEY,''}
+    base_url: https://api.openai.com/v1
+    model: gpt-4o-mini
+    timeout: 60
+
+  # Audio processing settings
+  audio:
+    # When enabled, always persist audio even if no speech is detected
+    # This creates conversations for all audio sessions regardless of speech content
+    always_persist_enabled: false
+
+  # Transcription provider configuration
+  transcription:
+    provider: deepgram  # or parakeet
+    api_key: ${oc.env:DEEPGRAM_API_KEY,''}
+    base_url: https://api.deepgram.com
+    # Fallback to provider segments when speaker service unavailable
+    # When true: Use segments from transcription provider (e.g., mock provider in tests)
+    # When false: Expect speaker service to create segments via diarization (default production behavior)
+    use_provider_segments: false
+
+  # Diarization settings
+  diarization:
+    diarization_source: pyannote
+    similarity_threshold: 0.15
+    min_duration: 0.5
+    collar: 2.0
+    min_duration_off: 1.5
+    min_speakers: 2
+    max_speakers: 6
+
+  # Cleanup settings for soft-deleted conversations
+  cleanup:
+    auto_cleanup_enabled: false
+    retention_days: 30
+
+  # Speech detection thresholds
+  speech_detection:
+    min_words: ${oc.decode:${oc.env:SPEECH_DETECTION_MIN_WORDS,10}}              # Minimum words to create conversation
+    min_confidence: ${oc.decode:${oc.env:SPEECH_DETECTION_MIN_CONFIDENCE,0.7}}        # Word confidence threshold
+    min_duration: ${oc.decode:${oc.env:SPEECH_DETECTION_MIN_DURATION,10.0}}         # Minimum speech duration in seconds
+
+  # Conversation stop conditions
+  conversation_stop:
+    transcription_buffer_seconds: 120    # Periodic transcription interval (2 minutes)
+    speech_inactivity_threshold: 60      # Speech gap threshold for closure (1 minute)
+
+  # Audio storage paths
+  audio_storage:
+    audio_base_path: /app/data
+    audio_chunks_path: /app/data/audio_chunks
diff --git a/config/plugins.yml.template b/config/plugins.yml.template
new file mode 100644
index 00000000..cc9134ca
--- /dev/null
+++ b/config/plugins.yml.template
@@ -0,0 +1,78 @@
+# Chronicle Plugin Orchestration Configuration
+#
+# This file controls which plugins are enabled and what events they respond to.
+# Secrets should NEVER be hardcoded here - always use ${ENV_VAR} references.
+#
+# Configuration Architecture:
+# - Secrets:      backends/advanced/.env (gitignored)
+# - Orchestration: config/plugins.yml (this file - safe to commit with env var refs)
+# - Settings:     plugins/{plugin_id}/config.yml (committed defaults)
+#
+# Environment Variable Syntax:
+#   ${ENV_VAR}           - Required variable from .env
+#   ${ENV_VAR:-default}  - Optional with fallback value
+#
+# Example:
+#   smtp_password: ${SMTP_PASSWORD}           # ✅ CORRECT (reference)
+#   smtp_password: xnetcqctkkfgzllh          # ❌ WRONG (hardcoded secret)
+
+plugins:
+  # ========================================
+  # Home Assistant Voice Control Plugin
+  # ========================================
+  homeassistant:
+    enabled: false  # Set to true to enable
+    events:  # Events that trigger this plugin (event-based architecture)
+      - transcript.streaming  # Execute on each streaming transcript chunk
+      # - transcript.batch    # Uncomment to also handle batch transcription
+      # - conversation.complete  # Uncomment to handle completed conversations
+    condition:
+      type: wake_word
+      wake_words:  # Support multiple wake words
+        - hey vivi      # Example: "hey vivi, turn off the lights"
+        - hey jarvis    # Example: "hey jarvis, what's the temperature"
+    ha_url: http://host.docker.internal:8123  # Your Home Assistant URL
+    ha_token: ${HA_TOKEN}  # ALWAYS use env var - never paste actual token here!
+    # To get a long-lived token:
+    # 1. Go to Home Assistant → Profile → Security tab
+    # 2. Scroll to "Long-lived access tokens"
+    # 3. Click "Create Token"
+    # 4. Copy the token and add to backends/advanced/.env:
+    #    HA_TOKEN=your-long-lived-token-here
+
+  # ========================================
+  # Email Summarizer Plugin (Community)
+  # ========================================
+  # Automatically sends email summaries when conversations complete
+  # Requires: SMTP credentials (Gmail App Password recommended)
+  #
+  # Gmail Setup:
+  # 1. Enable 2-Factor Authentication on Google account
+  # 2. Go to: Google Account → Security → 2-Step Verification
+  # 3. Scroll to "App passwords" → Generate password for "Mail"
+  # 4. Copy 16-character password and set as SMTP_PASSWORD
+  # 5. Set SMTP_USERNAME to your Gmail address
+  #
+  # See: plugins/email_summarizer/README.md for full documentation
+
+  email_summarizer:
+    enabled: false  # Set to true to enable email summaries
+    events:
+      - conversation.complete  # Send email when conversation finishes
+    condition:
+      type: always  # Send for all conversations
+
+    # SMTP Configuration (use environment variables for secrets)
+    smtp_host: ${SMTP_HOST:-smtp.gmail.com}
+    smtp_port: ${SMTP_PORT:-587}
+    smtp_username: ${SMTP_USERNAME}  # Your email address
+    smtp_password: ${SMTP_PASSWORD}  # Gmail App Password (16 chars)
+    smtp_use_tls: ${SMTP_USE_TLS:-true}
+    from_email: ${FROM_EMAIL:-noreply@chronicle.ai}
+    from_name: ${FROM_NAME:-Chronicle AI}
+
+    # Email Content Options
+    subject_prefix: "Conversation Summary"
+    summary_max_sentences: 3  # LLM summary length
+    include_conversation_id: true
+    include_duration: true
diff --git a/extras/havpe-relay/README.md b/extras/havpe-relay/README.md
index 2793b36d..f5a6b5db 100644
--- a/extras/havpe-relay/README.md
+++ b/extras/havpe-relay/README.md
@@ -6,7 +6,7 @@ TCP-to-WebSocket relay for ESPHome Voice-PE that connects to the Omi advanced ba
 
 - **TCP Server**: Listens on port 8989 for ESP32 Voice-PE connections
 - **Audio Format Conversion**: Converts 32-bit PCM to 16-bit PCM using easy-audio-interfaces
-- **WebSocket Client**: Forwards converted audio to backend at `/ws_pcm` endpoint
+- **WebSocket Client**: Forwards converted audio to backend at `/ws?codec=pcm` endpoint
 - **Graceful Handling**: Supports reconnections and proper cleanup
 - **Configurable**: Command-line options for ports and endpoints
 
@@ -42,7 +42,7 @@ uv run main.py
 
 This will:
 - Listen for TCP connections on port 8989
-- Forward to WebSocket at `ws://127.0.0.1:8000/ws_pcm`
+- Forward to WebSocket at `ws://127.0.0.1:8000/ws?codec=pcm`
 
 ### Advanced Usage
 
@@ -51,14 +51,14 @@ This will:
 uv run main.py --tcp-port 9090
 
 # Custom WebSocket URL
-uv run main.py --ws-url "ws://192.168.1.100:8000/ws_pcm"
+uv run main.py --ws-url "ws://192.168.1.100:8000/ws?codec=pcm"
 
 # Verbose logging
 uv run main.py -v    # INFO level
 uv run main.py -vv   # DEBUG level
 
 # Full configuration example
-uv run main.py --tcp-port 8989 --ws-url "ws://localhost:8000/ws_pcm" -v
+uv run main.py --tcp-port 8989 --ws-url "ws://localhost:8000/ws?codec=pcm" -v
 ```
 
 ### Command Line Options
@@ -66,13 +66,13 @@ uv run main.py --tcp-port 8989 --ws-url "ws://localhost:8000/ws_pcm" -v
 | Option | Default | Description |
 |--------|---------|-------------|
 | `--tcp-port` | 8989 | TCP port to listen on for ESP32 connections |
-| `--ws-url` | `ws://127.0.0.1:8000/ws_pcm` | WebSocket URL to forward audio to |
+| `--ws-url` | `ws://127.0.0.1:8000/ws?codec=pcm` | WebSocket URL to forward audio to |
 | `-v` / `--verbose` | WARNING | Increase verbosity (-v: INFO, -vv: DEBUG) |
 
 ## Architecture
 
 ```
-ESP32 Voice-PE → TCP:8989 → HAVPE Relay → WebSocket:/ws_pcm → Omi Backend
+ESP32 Voice-PE → TCP:8989 → HAVPE Relay → WebSocket:/ws?codec=pcm → Omi Backend
      (32-bit PCM)                    (16-bit PCM)
 ```
 
@@ -88,7 +88,7 @@ The relay automatically includes the following WebSocket parameters when connect
 
 Example WebSocket URL sent to backend:
 ```
-ws://127.0.0.1:8000/ws_pcm?user_id=esp32_voice_pe&rate=16000&width=2&channels=2&src=voice_pe
+ws://127.0.0.1:8000/ws?codec=pcm?user_id=esp32_voice_pe&rate=16000&width=2&channels=2&src=voice_pe
 ```
 
 ## Development
diff --git a/extras/havpe-relay/docker-compose.yml b/extras/havpe-relay/docker-compose.yml
index a5c0aa10..055f6492 100644
--- a/extras/havpe-relay/docker-compose.yml
+++ b/extras/havpe-relay/docker-compose.yml
@@ -7,7 +7,7 @@ services:
       - "${TCP_PORT:-8989}:8989"
     environment:
       # Connect to backend running on host (adjust as needed)
-      - WS_URL=${WS_URL:-ws://host.docker.internal:8000/ws_pcm}
+      - WS_URL=${WS_URL:-ws://host.docker.internal:8000/ws?codec=pcm}
       - TCP_PORT=${TCP_PORT:-8989}
       # Authentication credentials for backend
       - AUTH_USERNAME=${AUTH_USERNAME}
diff --git a/extras/havpe-relay/main.py b/extras/havpe-relay/main.py
index eac6d58b..36002be3 100644
--- a/extras/havpe-relay/main.py
+++ b/extras/havpe-relay/main.py
@@ -122,7 +122,7 @@ def create_authenticated_websocket_uri(base_ws_url: str, client_id: str, jwt_tok
     Returns:
         Authenticated WebSocket URI
     """
-    return f"{base_ws_url}/ws_pcm?token={jwt_token}&device_name={DEVICE_NAME}"
+    return f"{base_ws_url}/ws?codec=pcm&token={jwt_token}&device_name={DEVICE_NAME}"
 
 
 async def get_authenticated_socket_client(
@@ -151,7 +151,7 @@ async def get_authenticated_socket_client(
     
     # Create authenticated WebSocket URI (client_id will be generated by backend)
     ws_uri = create_authenticated_websocket_uri(backend_ws_url, "", jwt_token)
-    logger.info(f"🔗 Creating WebSocket connection to: {backend_ws_url}/ws_pcm?token={jwt_token[:20]}...&device_name={DEVICE_NAME}")
+    logger.info(f"🔗 Creating WebSocket connection to: {backend_ws_url}/ws?codec=pcm&token={jwt_token[:20]}...&device_name={DEVICE_NAME}")
     
     # Create socket client
     return SocketClient(uri=ws_uri)
diff --git a/extras/local-omi-bt/send_to_adv.py b/extras/local-omi-bt/send_to_adv.py
index 1705a355..e878a404 100644
--- a/extras/local-omi-bt/send_to_adv.py
+++ b/extras/local-omi-bt/send_to_adv.py
@@ -1,6 +1,8 @@
+import asyncio
 import json
 import logging
 import os
+import ssl
 from typing import AsyncGenerator, Optional
 
 import httpx
@@ -8,12 +10,22 @@
 from dotenv import load_dotenv
 from wyoming.audio import AudioChunk
 
-# Configuration
-websocket_uri = "ws://100.83.66.30:8000/ws_pcm"
-backend_url = "http://100.83.66.30:8000"
+# Load environment variables first
 env_path = ".env"
 load_dotenv(env_path)
 
+# Configuration
+BACKEND_HOST = os.getenv("BACKEND_HOST", "100.83.66.30:8000")
+USE_HTTPS = os.getenv("USE_HTTPS", "false").lower() == "true"
+VERIFY_SSL = os.getenv("VERIFY_SSL", "true").lower() == "true"
+
+# Use appropriate protocol based on USE_HTTPS setting
+ws_protocol = "wss" if USE_HTTPS else "ws"
+http_protocol = "https" if USE_HTTPS else "http"
+
+websocket_uri = f"{ws_protocol}://{BACKEND_HOST}/ws?codec=pcm"
+backend_url = f"{http_protocol}://{BACKEND_HOST}"
+
 ADMIN_PASSWORD = os.getenv("ADMIN_PASSWORD")
 ADMIN_EMAIL = os.getenv("ADMIN_EMAIL")
 DEVICE_NAME = "omi-bt"  # Device name for client identification
@@ -35,7 +47,7 @@ async def get_jwt_token(username: str, password: str) -> Optional[str]:
     try:
         logger.info(f"Authenticating with backend as: {username}")
 
-        async with httpx.AsyncClient(timeout=10.0) as client:
+        async with httpx.AsyncClient(timeout=10.0, verify=VERIFY_SSL) as client:
             response = await client.post(
                 f"{backend_url}/auth/jwt/login",
                 data={'username': username, 'password': password},
@@ -73,6 +85,48 @@ async def get_jwt_token(username: str, password: str) -> Optional[str]:
         return None
 
 
+async def receive_handler(websocket, logger):
+    """
+    Background task to continuously receive messages from backend.
+    Processes pongs (keepalive), interim transcripts, and other messages.
+
+    This is critical for WebSocket stability:
+    - Processes pong responses to keep connection alive
+    - Prevents receive buffer overflow
+    - Logs interim transcription results
+    """
+    try:
+        while True:
+            message = await websocket.recv()
+
+            # Try to parse as JSON for structured messages
+            try:
+                data = json.loads(message)
+                msg_type = data.get("type", "unknown")
+
+                if msg_type == "interim_transcript":
+                    # Log interim transcription results
+                    text = data.get("data", {}).get("text", "")[:50]
+                    is_final = data.get("data", {}).get("is_final", False)
+                    logger.debug(f"Interim transcript ({'FINAL' if is_final else 'partial'}): {text}...")
+                elif msg_type == "ready":
+                    logger.info(f"Backend ready message: {data.get('message')}")
+                else:
+                    logger.debug(f"Received message type: {msg_type}")
+
+            except json.JSONDecodeError:
+                # Non-JSON message (could be binary pong frame, etc)
+                logger.debug(f"Received non-JSON message: {str(message)[:50]}")
+
+    except websockets.exceptions.ConnectionClosed:
+        logger.info("Backend connection closed")
+    except asyncio.CancelledError:
+        logger.info("Receive handler cancelled")
+        raise
+    except Exception as e:
+        logger.error(f"Receive handler error: {e}", exc_info=True)
+
+
 async def stream_to_backend(stream: AsyncGenerator[AudioChunk, None]):
     """
     Stream audio to backend using Wyoming protocol with JWT authentication.
@@ -87,57 +141,85 @@ async def stream_to_backend(stream: AsyncGenerator[AudioChunk, None]):
         return
 
     # Connect with JWT token as query parameter
-    uri_with_token = f"{websocket_uri}?token={token}&device_name={DEVICE_NAME}"
+    uri_with_token = f"{websocket_uri}&token={token}&device_name={DEVICE_NAME}"
+
+    # Only use SSL context for wss:// connections
+    ssl_context = None
+    if USE_HTTPS:
+        ssl_context = ssl.create_default_context()
+        if not VERIFY_SSL:
+            # Skip SSL verification (only when explicitly disabled via VERIFY_SSL=false)
+            ssl_context.check_hostname = False
+            ssl_context.verify_mode = ssl.CERT_NONE
 
     logger.info(f"Connecting to WebSocket: {websocket_uri}")
-    async with websockets.connect(uri_with_token) as websocket:
+    async with websockets.connect(
+        uri_with_token,
+        ssl=ssl_context,
+        ping_interval=20,      # Send ping every 20 seconds
+        ping_timeout=120,      # Wait up to 120 seconds for pong (increased from default 20s)
+        close_timeout=10,      # Graceful close timeout
+    ) as websocket:
         # Wait for ready message from backend
         ready_msg = await websocket.recv()
         logger.info(f"Backend ready: {ready_msg}")
 
-        # Send audio-start event (Wyoming protocol)
-        audio_start = {
-            "type": "audio-start",
-            "data": {
-                "rate": 16000,
-                "width": 2,
-                "channels": 1,
-                "mode": "streaming"  # or "batch"
-            },
-            "payload_length": None
-        }
-        await websocket.send(json.dumps(audio_start) + '\n')
-        logger.info("Sent audio-start event")
-
-        # Stream audio chunks
-        chunk_count = 0
-        async for chunk in stream:
-            chunk_count += 1
-            audio_data = chunk.audio  # Extract bytes from AudioChunk
-
-            # Send audio-chunk header (Wyoming protocol)
-            audio_chunk_header = {
-                "type": "audio-chunk",
+        # Launch background receive handler to process pongs and interim results
+        # This is CRITICAL - without this, the connection will timeout at ~80 seconds
+        receive_task = asyncio.create_task(receive_handler(websocket, logger))
+
+        try:
+            # Send audio-start event (Wyoming protocol)
+            audio_start = {
+                "type": "audio-start",
                 "data": {
-                    "rate": chunk.rate,
-                    "width": chunk.width,
-                    "channels": chunk.channels
+                    "rate": 16000,
+                    "width": 2,
+                    "channels": 1,
+                    "mode": "streaming"  # or "batch"
                 },
-                "payload_length": len(audio_data)
+                "payload_length": None
+            }
+            await websocket.send(json.dumps(audio_start) + '\n')
+            logger.info("Sent audio-start event")
+
+            # Stream audio chunks
+            chunk_count = 0
+            async for chunk in stream:
+                chunk_count += 1
+                audio_data = chunk.audio  # Extract bytes from AudioChunk
+
+                # Send audio-chunk header (Wyoming protocol)
+                audio_chunk_header = {
+                    "type": "audio-chunk",
+                    "data": {
+                        "rate": chunk.rate,
+                        "width": chunk.width,
+                        "channels": chunk.channels
+                    },
+                    "payload_length": len(audio_data)
+                }
+                await websocket.send(json.dumps(audio_chunk_header) + '\n')
+
+                # Send audio data as binary
+                await websocket.send(audio_data)
+
+                if chunk_count % 100 == 0:
+                    logger.info(f"Sent {chunk_count} chunks")
+
+            # Send audio-stop event
+            audio_stop = {
+                "type": "audio-stop",
+                "data": {},
+                "payload_length": None
             }
-            await websocket.send(json.dumps(audio_chunk_header) + '\n')
-
-            # Send audio data as binary
-            await websocket.send(audio_data)
-
-            if chunk_count % 100 == 0:
-                logger.info(f"Sent {chunk_count} chunks")
-
-        # Send audio-stop event
-        audio_stop = {
-            "type": "audio-stop",
-            "data": {},
-            "payload_length": None
-        }
-        await websocket.send(json.dumps(audio_stop) + '\n')
-        logger.info(f"Sent audio-stop event. Total chunks: {chunk_count}")
+            await websocket.send(json.dumps(audio_stop) + '\n')
+            logger.info(f"Sent audio-stop event. Total chunks: {chunk_count}")
+
+        finally:
+            # Clean up receive task
+            receive_task.cancel()
+            try:
+                await receive_task
+            except asyncio.CancelledError:
+                logger.info("Receive task cancelled successfully")
diff --git a/extras/speaker-recognition/.env.template b/extras/speaker-recognition/.env.template
index 3d653c62..60c7cc15 100644
--- a/extras/speaker-recognition/.env.template
+++ b/extras/speaker-recognition/.env.template
@@ -1,47 +1,45 @@
-# Speaker Recognition Service Environment Configuration
-# Copy this file to .env and fill in your values
+# ========================================
+# Speaker Recognition Service - Secrets Only
+# ========================================
+# This file contains ONLY secret values (API keys, tokens).
+# All other configuration is in config/config.yml.
 #
-# After copying, choose your deployment mode:
-#   uv sync --group cpu    (CPU-only, lighter, works everywhere)
-#   uv sync --group gpu    (GPU acceleration, requires NVIDIA+CUDA)
-
-# Required: Hugging Face token for pyannote models
+# Setup:
+# 1. Copy this file to .env: cp .env.template .env
+# 2. Fill in your Hugging Face token below (required)
+# 3. Configure non-secret settings in config/config.yml
+# 4. Run: docker compose up --build -d
+
+# ========================================
+# Required Secrets
+# ========================================
+
+# Hugging Face token for PyAnnote speaker recognition models (required)
+# Get your token from: https://huggingface.co/settings/tokens
 HF_TOKEN=your_huggingface_token_here
 
-# Docker build configuration
-# Choose 'cpu' for CPU-only deployment or 'gpu' for GPU acceleration
-# This controls which dependency group is installed in the Docker image
-COMPUTE_MODE=cpu
-
-# PyTorch CUDA version (only used when COMPUTE_MODE=gpu)
-# Options: cu121 (CUDA 12.1), cu126 (CUDA 12.6), cu128 (CUDA 12.8)
-# Should match your system's CUDA version (check with: nvidia-smi)
-PYTORCH_CUDA_VERSION=cu126
-
-# Speaker recognition similarity threshold (0.0-1.0)
-# Lower values = less strict identification, higher values = more strict
-# Typical range: 0.1-0.3 for ECAPA-TDNN models
-SIMILARITY_THRESHOLD=0.15
-
-# Service Configuration
-# SPEAKER_SERVICE_HOST: Interface to bind to (0.0.0.0 = all interfaces, allows cross-network access)
-SPEAKER_SERVICE_HOST=0.0.0.0
-SPEAKER_SERVICE_PORT=8085
-
-# should be 127.0.0.1 here because speaker-service and speaker-service gpu are different
-SPEAKER_SERVICE_URL=http://127.0.0.1:8085
-
-# React Web UI Configuration
-REACT_UI_HOST=0.0.0.0
-# Port configuration (recommended defaults):
-#   HTTP mode: 5174 (direct access, localhost microphone only)
-#   HTTPS mode: 5175 (internal, proxied via nginx on 8444/8081)
-REACT_UI_PORT=5174
-REACT_UI_HTTPS=false
-
-# Optional: External Services
-DEEPGRAM_API_KEY=your_deepgram_api_key_here
-GROQ_API_KEY=your_groq_api_key_here
-
-# Test Configuration (for docker-compose-test.yml)
-SPEAKER_SERVICE_TEST_PORT=8086
\ No newline at end of file
+# ========================================
+# Optional Secrets
+# ========================================
+
+# Deepgram API key (for transcription wrapper service)
+DEEPGRAM_API_KEY=
+
+# Groq API key (for LLM features)
+GROQ_API_KEY=
+
+# ========================================
+# Service Configuration (auto-configured by wizard)
+# ========================================
+
+# PyTorch CUDA version (cpu for CPU-only, cu126/cu121/cu118 for GPU)
+# PYTORCH_CUDA_VERSION=cpu
+
+# Service binding configuration
+# SPEAKER_SERVICE_HOST=0.0.0.0
+# SPEAKER_SERVICE_PORT=8085
+
+# React UI configuration
+# REACT_UI_HOST=0.0.0.0
+# REACT_UI_PORT=5174
+# REACT_UI_HTTPS=false
diff --git a/extras/speaker-recognition/docker-compose-test.yml b/extras/speaker-recognition/docker-compose-test.yml
index 1687dd41..5c11573d 100644
--- a/extras/speaker-recognition/docker-compose-test.yml
+++ b/extras/speaker-recognition/docker-compose-test.yml
@@ -8,8 +8,8 @@ services:
         PYTORCH_CUDA_VERSION: ${COMPUTE_MODE:-cpu}
     image: speaker-recognition:test
     ports:
-      # Map host test port (default 8086) to container port 8085
-      - "${SPEAKER_SERVICE_TEST_PORT:-8086}:8085"
+      # Map host test port (default 8086) to container port 8086
+      - "${SPEAKER_SERVICE_TEST_PORT:-8086}:8086"
     volumes:
       # Mount source code for development (live reload)
       - ./src:/app/src
@@ -26,12 +26,12 @@ services:
       - HF_TOKEN=${HF_TOKEN}
       - SIMILARITY_THRESHOLD=${SIMILARITY_THRESHOLD:-0.15}
       - SPEAKER_SERVICE_HOST=0.0.0.0
-      - SPEAKER_SERVICE_PORT=8085
+      - SPEAKER_SERVICE_PORT=8086
       - DEEPGRAM_API_KEY=${DEEPGRAM_API_KEY}
       - CORS_ORIGINS=http://localhost:3001,http://localhost:8001,https://localhost:3001,https://localhost:8001
     restart: unless-stopped
     healthcheck:
-      test: ["CMD", "curl", "-f", "http://localhost:8085/health"]
+      test: ["CMD", "curl", "-f", "http://localhost:8086/health"]
       interval: 30s
       timeout: 10s
       retries: 3
diff --git a/extras/speaker-recognition/docker-compose.yml b/extras/speaker-recognition/docker-compose.yml
index ea41de04..50d40caf 100644
--- a/extras/speaker-recognition/docker-compose.yml
+++ b/extras/speaker-recognition/docker-compose.yml
@@ -20,6 +20,7 @@ services:
       - ./audio_chunks:/app/audio_chunks
       - ./debug:/app/debug
       - ./speaker_data:/app/data
+      - ../../config:/app/config:ro
     environment:
       - HF_HOME=/models
       - HF_TOKEN=${HF_TOKEN}
@@ -33,15 +34,15 @@ services:
       interval: 30s
       timeout: 10s
       retries: 3
+    networks:
+      default:
+        aliases:
+          - speaker-service
 
   # GPU Profile Configuration
   speaker-service-gpu:
     <<: *base-speaker-service
     profiles: ["gpu"]
-    networks:
-      default:
-        aliases:
-          - speaker-service
     build:
       context: .
       dockerfile: Dockerfile
@@ -54,6 +55,10 @@ services:
             - driver: nvidia
               count: all
               capabilities: [gpu]
+    networks:
+      default:
+        aliases:
+          - speaker-service
   
   # React Web UI
   web-ui:
diff --git a/extras/speaker-recognition/init.py b/extras/speaker-recognition/init.py
index b69e04ee..649238a9 100755
--- a/extras/speaker-recognition/init.py
+++ b/extras/speaker-recognition/init.py
@@ -209,9 +209,7 @@ def setup_compute_mode(self):
             choice = self.prompt_choice("Choose compute mode:", choices, "1")
             compute_mode = "gpu" if choice == "2" else "cpu"
 
-        self.config["COMPUTE_MODE"] = compute_mode
-
-        # Set PYTORCH_CUDA_VERSION for Docker build
+        # Set PYTORCH_CUDA_VERSION for Docker build (profile determined from this)
         if compute_mode == "cpu":
             self.config["PYTORCH_CUDA_VERSION"] = "cpu"
         else:
@@ -250,6 +248,11 @@ def setup_compute_mode(self):
 
         self.console.print(f"[blue][INFO][/blue] Using {compute_mode.upper()} mode with PyTorch CUDA version: {self.config['PYTORCH_CUDA_VERSION']}")
 
+        # Set service host and port defaults
+        self.config["SPEAKER_SERVICE_HOST"] = "0.0.0.0"
+        self.config["SPEAKER_SERVICE_PORT"] = "8085"
+        self.config["REACT_UI_HOST"] = "0.0.0.0"
+
     def setup_deepgram(self):
         """Configure Deepgram API key if provided"""
         # Only set if provided via command line
@@ -373,7 +376,9 @@ def show_summary(self):
         self.console.print()
 
         self.console.print(f"✅ HF Token: {'Configured' if self.config.get('HF_TOKEN') else 'Not configured'}")
-        self.console.print(f"✅ Compute Mode: {self.config.get('COMPUTE_MODE', 'Not configured')}")
+        pytorch_version = self.config.get('PYTORCH_CUDA_VERSION', 'cpu')
+        compute_mode = 'GPU' if pytorch_version.startswith('cu') else 'CPU'
+        self.console.print(f"✅ Compute Mode: {compute_mode} ({pytorch_version})")
         self.console.print(f"✅ HTTPS Enabled: {self.config.get('REACT_UI_HTTPS', 'false')}")
         if self.config.get('DEEPGRAM_API_KEY'):
             self.console.print(f"✅ Deepgram API Key: Configured")
diff --git a/extras/speaker-recognition/pyproject.toml b/extras/speaker-recognition/pyproject.toml
index e9f6bbbf..fdc0628a 100644
--- a/extras/speaker-recognition/pyproject.toml
+++ b/extras/speaker-recognition/pyproject.toml
@@ -10,6 +10,7 @@ dependencies = [
     "scipy>=1.10.0",
     "pyannote.audio>=3.3.2",
     "aiohttp>=3.8.0",
+    "httpx>=0.27.0",
     "python-multipart>=0.0.6",
     "pydantic>=2.0.0",
     "soundfile>=0.12",
@@ -19,6 +20,8 @@ dependencies = [
     "pydub>=0.25.1",
     "yt-dlp>=2025.7.21",
     "websockets>=12.0",
+    "pyyaml>=6.0.0",
+    "omegaconf>=2.3.0",
     # Audio processing and visualization
     "librosa>=0.10.0",
     "plotly>=5.18.0",
diff --git a/extras/speaker-recognition/src/simple_speaker_recognition/api/routers/identification.py b/extras/speaker-recognition/src/simple_speaker_recognition/api/routers/identification.py
index 1c5ac56a..15a6ef7f 100644
--- a/extras/speaker-recognition/src/simple_speaker_recognition/api/routers/identification.py
+++ b/extras/speaker-recognition/src/simple_speaker_recognition/api/routers/identification.py
@@ -97,9 +97,46 @@ async def diarize_and_identify(
     log.info("Processing diarize-and-identify request")
     log.info(f"Parameters - min_duration: {min_duration}, similarity_threshold: {similarity_threshold}, identify_only_enrolled: {identify_only_enrolled}, user_id: {user_id}, min_speakers: {min_speakers}, max_speakers: {max_speakers}, collar: {collar}, min_duration_off: {min_duration_off}")
     log.info(f"File - name: {file.filename}, content_type: {file.content_type}, size: {file.size if hasattr(file, 'size') else 'unknown'}")
-    
+
+    # Early validation: Validate file presence
+    if not file or not file.filename:
+        log.error("❌ VALIDATION ERROR: No audio file provided")
+        raise HTTPException(
+            400,
+            detail={
+                "error": "validation_error",
+                "message": "No audio file provided",
+                "field": "file"
+            }
+        )
+
     # Read audio data once
     audio_data = await file.read()
+
+    # Early validation: Validate non-empty
+    if len(audio_data) == 0:
+        log.error("❌ VALIDATION ERROR: Audio file is empty")
+        raise HTTPException(
+            400,
+            detail={
+                "error": "validation_error",
+                "message": "Audio file is empty",
+                "field": "file"
+            }
+        )
+
+    # Resource check - verify backend is initialized
+    audio_backend = get_audio_backend()
+    if not audio_backend:
+        log.error("❌ RESOURCE ERROR: Audio backend not initialized")
+        raise HTTPException(
+            503,
+            detail={
+                "error": "resource_error",
+                "message": "Audio backend not initialized",
+                "resource": "audio_backend"
+            }
+        )
     
     # Save to temp file for processing
     with secure_temp_file() as tmp:
@@ -121,9 +158,9 @@ async def diarize_and_identify(
         log.info(f"Step 1: Performing speaker diarization on {tmp_path}")
         if min_speakers or max_speakers:
             log.info(f"Using speaker constraints: min={min_speakers}, max={max_speakers}")
-        
-        audio_backend = get_audio_backend()
-        segments = await audio_backend.async_diarize(tmp_path, min_speakers=min_speakers, max_speakers=max_speakers, 
+
+        # Use audio_backend from early validation (already checked above)
+        segments = await audio_backend.async_diarize(tmp_path, min_speakers=min_speakers, max_speakers=max_speakers,
                                                      collar=collar, min_duration_off=min_duration_off)
         
         # Log what PyAnnote produced
@@ -268,9 +305,11 @@ async def diarize_and_identify(
 
 @router.post("/v1/diarize-identify-match")
 async def diarize_identify_match(
-    file: UploadFile = File(..., description="Audio file for diarization and word matching"),
+    file: UploadFile = File(None, description="Audio file for diarization and word matching"),
     transcript_data: str = Form(..., description="JSON string with transcript words and text"),
     user_id: Optional[int] = Form(default=None, description="User ID for speaker identification"),
+    conversation_id: Optional[str] = Form(default=None, description="Conversation ID to fetch audio from backend"),
+    backend_token: Optional[str] = Form(default=None, description="JWT token for backend API authentication"),
     min_duration: float = Form(default=0.5, description="Minimum segment duration in seconds"),
     similarity_threshold: float = Form(default=0.15, description="Speaker similarity threshold"),
     min_speakers: Optional[int] = Form(default=None, description="Minimum number of speakers to detect"),
@@ -281,48 +320,155 @@ async def diarize_identify_match(
 ):
     """
     Diarize audio, identify speakers, and match transcript words to speaker segments.
-    
-    This endpoint:
-    1. Uses internal pyannote for speaker diarization
+
+    This endpoint supports two modes:
+    1. File upload: Pass audio file directly
+    2. Conversation mode: Pass conversation_id + backend_token to fetch audio from Chronicle backend
+
+    The endpoint:
+    1. Uses internal pyannote for speaker diarization (with automatic chunking for large files)
     2. Identifies enrolled speakers for each segment
     3. Matches transcript words to diarization segments by time overlap
     4. Returns complete segments with text and speaker identification
-    
+
     The transcript_data should be a JSON string containing:
     {
         "words": [{"word": "hello", "start": 1.23, "end": 1.45}, ...],
         "text": "full transcript text"
     }
+
+    Maximum audio duration: 2 hours (7200 seconds)
+    Files longer than max_diarize_duration (default 60s) are automatically chunked
     """
-    log.info(f"Processing diarize-identify-match request: {file.filename}")
+    log.info(f"Processing diarize-identify-match request")
+    log.info(f"Mode: {'conversation' if conversation_id else 'file upload'}")
     log.info(f"Parameters - user_id: {user_id}, min_duration: {min_duration}, similarity_threshold: {similarity_threshold}, min_speakers: {min_speakers}, max_speakers: {max_speakers}, collar: {collar}, min_duration_off: {min_duration_off}")
     log.info(f"Transcript data length: {len(transcript_data) if transcript_data else 0}")
-    
-    # Parse transcript data
+
+    # Validate: must provide either file OR conversation_id
+    if not file and not conversation_id:
+        raise HTTPException(400, "Must provide either audio file or conversation_id")
+    if file and conversation_id:
+        raise HTTPException(400, "Cannot provide both audio file and conversation_id - choose one mode")
+    if conversation_id and not backend_token:
+        raise HTTPException(400, "backend_token required when using conversation_id")
+
+    # Early validation: Parse transcript_data FIRST (fail fast if invalid)
     try:
         transcript = json.loads(transcript_data)
         words = transcript.get("words", [])
-        full_text = transcript.get("text", "")
     except json.JSONDecodeError as e:
-        raise HTTPException(400, f"Invalid transcript_data JSON: {str(e)}")
-    
+        error_msg = f"Invalid transcript_data JSON: {str(e)}"
+        log.error(f"❌ VALIDATION ERROR: {error_msg}")
+        raise HTTPException(
+            400,
+            detail={
+                "error": "validation_error",
+                "message": error_msg,
+                "field": "transcript_data"
+            }
+        ) from e
+
     if not words:
-        raise HTTPException(400, "No words found in transcript_data")
-    
-    # Create temporary file
-    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file:
-        tmp_file.write(await file.read())
-        tmp_path = Path(tmp_file.name)
-    
+        error_msg = f"No words found in transcript_data (transcript keys: {list(transcript.keys())}, words type: {type(words)})"
+        log.error(f"❌ VALIDATION ERROR: {error_msg}")
+        raise HTTPException(
+            400,
+            detail={
+                "error": "validation_error",
+                "message": error_msg,
+                "field": "transcript_data.words"
+            }
+        )
+
+    # Resource check - verify model is loaded before processing
+    audio_backend = get_audio_backend()
+    if not audio_backend or not hasattr(audio_backend, 'async_diarize'):
+        log.error("❌ RESOURCE ERROR: Diarization model not loaded")
+        raise HTTPException(
+            503,
+            detail={
+                "error": "resource_error",
+                "message": "Diarization model not loaded",
+                "resource": "diarization_model"
+            }
+        )
+
+    # Get settings for chunking configuration
+    from simple_speaker_recognition.api.service import auth as settings
+    max_diarize_duration = settings.max_diarize_duration  # Default 60 seconds
+    diarize_chunk_overlap = settings.diarize_chunk_overlap  # Default 5 seconds
+    MAX_AUDIO_DURATION = 7200  # 2 hours hard limit
+
+    # Mode 1: Conversation mode - fetch audio from backend
+    if conversation_id:
+        from simple_speaker_recognition.core.backend_client import BackendClient
+
+        backend_client = BackendClient(settings.backend_api_url)
+        try:
+            # Get conversation metadata
+            metadata = await backend_client.get_conversation_metadata(conversation_id, backend_token)
+            total_duration = metadata.get('duration')
+            if total_duration is None:
+                raise HTTPException(400, "Conversation metadata missing duration field")
+
+            log.info(f"Conversation {conversation_id[:12]}: duration={total_duration:.1f}s")
+
+            # Validate: 2 hour maximum
+            if total_duration > MAX_AUDIO_DURATION:
+                raise HTTPException(400, f"Audio duration {total_duration:.1f}s exceeds maximum allowed duration of {MAX_AUDIO_DURATION}s (2 hours)")
+
+            # Fetch full audio from backend
+            log.info(f"Fetching audio from backend for conversation {conversation_id[:12]}")
+            wav_bytes = await backend_client.get_audio_segment(
+                conversation_id,
+                backend_token,
+                start=0.0,
+                duration=total_duration
+            )
+
+            # Write to temp file
+            with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file:
+                tmp_file.write(wav_bytes)
+                tmp_path = Path(tmp_file.name)
+        finally:
+            await backend_client.close()
+
+    # Mode 2: File upload mode
+    else:
+        # Create temporary file from upload
+        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file:
+            tmp_file.write(await file.read())
+            tmp_path = Path(tmp_file.name)
+
+        # Get audio duration for validation
+        from simple_speaker_recognition.utils.audio_processing import get_audio_info
+        audio_info = get_audio_info(str(tmp_path))
+        total_duration = audio_info.get('duration_seconds', 0)
+
+        log.info(f"Uploaded file: {file.filename}, duration={total_duration:.1f}s")
+
+        # Validate: 2 hour maximum
+        if total_duration > MAX_AUDIO_DURATION:
+            tmp_path.unlink(missing_ok=True)
+            raise HTTPException(400, f"Audio duration {total_duration:.1f}s exceeds maximum allowed duration of {MAX_AUDIO_DURATION}s (2 hours)")
+
     try:
-        # Step 1: Perform diarization
+        # Step 1: Perform diarization (chunking happens automatically inside if needed)
         log.info(f"Performing speaker diarization on {tmp_path}")
         if min_speakers or max_speakers:
             log.info(f"Using speaker constraints: min={min_speakers}, max={max_speakers}")
-        
-        audio_backend = get_audio_backend()
-        diarization_segments = await audio_backend.async_diarize(tmp_path, min_speakers=min_speakers, max_speakers=max_speakers,
-                                                                 collar=collar, min_duration_off=min_duration_off)
+
+        # Use audio_backend from early validation (already checked above)
+        diarization_segments = await audio_backend.async_diarize(
+            tmp_path,
+            min_speakers=min_speakers,
+            max_speakers=max_speakers,
+            collar=collar,
+            min_duration_off=min_duration_off,
+            max_duration=max_diarize_duration,
+            chunk_overlap=diarize_chunk_overlap
+        )
         
         # Apply minimum duration filter
         if min_duration > 0:
@@ -358,13 +504,13 @@ async def diarize_identify_match(
                 word_start = word.get("start", 0.0)
                 word_end = word.get("end", 0.0)
                 word_mid = (word_start + word_end) / 2
-                
+
                 # Word belongs to this segment if its midpoint is within range
                 if start_time <= word_mid <= end_time:
-                    segment_words.append(word.get("word", ""))
-            
+                    segment_words.append(word)  # Keep full word object with timestamps
+
             # Create segment with matched text
-            segment_text = " ".join(segment_words).strip()
+            segment_text = " ".join(w.get("word", "") for w in segment_words).strip()
             
             if speaker_info and confidence >= similarity_threshold:
                 # Identified speaker
@@ -376,7 +522,8 @@ async def diarize_identify_match(
                     "identified_as": speaker_info["name"],
                     "speaker_id": speaker_info["id"],
                     "confidence": round(float(confidence), 3),
-                    "status": "identified"
+                    "status": "identified",
+                    "words": segment_words  # Include word-level timestamps
                 })
             else:
                 # Unknown speaker
@@ -388,7 +535,8 @@ async def diarize_identify_match(
                     "identified_as": None,
                     "speaker_id": None,
                     "confidence": round(float(confidence), 3) if confidence else 0.0,
-                    "status": "unknown"
+                    "status": "unknown",
+                    "words": segment_words  # Include word-level timestamps
                 })
         
         # Create summary
diff --git a/extras/speaker-recognition/src/simple_speaker_recognition/api/service.py b/extras/speaker-recognition/src/simple_speaker_recognition/api/service.py
index e08823f5..d849f9d5 100644
--- a/extras/speaker-recognition/src/simple_speaker_recognition/api/service.py
+++ b/extras/speaker-recognition/src/simple_speaker_recognition/api/service.py
@@ -2,6 +2,7 @@
 
 import logging
 import os
+import yaml
 from contextlib import asynccontextmanager
 from pathlib import Path
 from typing import cast, Optional, Union
@@ -23,6 +24,43 @@
 log = logging.getLogger("speaker_service")
 
 
+def load_speaker_config_from_root() -> dict:
+    """
+    Load speaker_recognition section from root config.yml using OmegaConf.
+
+    Returns:
+        Dictionary with speaker_recognition config, or empty dict if not found
+    """
+    try:
+        from omegaconf import OmegaConf
+
+        config_dir = Path(os.getenv("CONFIG_DIR", "/app/config"))
+        defaults_path = config_dir / "defaults.yml"
+        config_path = config_dir / "config.yml"
+
+        if not defaults_path.exists() and not config_path.exists():
+            log.warning(f"No config files found in {config_dir}, using defaults")
+            return {}
+
+        # Load and merge configs using OmegaConf
+        defaults = OmegaConf.load(defaults_path) if defaults_path.exists() else {}
+        user_config = OmegaConf.load(config_path) if config_path.exists() else {}
+        merged = OmegaConf.merge(defaults, user_config)
+
+        # Extract speaker_recognition section
+        speaker_config = merged.get('speaker_recognition', {})
+
+        # Resolve environment variables and convert to dict
+        resolved = OmegaConf.to_container(speaker_config, resolve=True)
+
+        log.info(f"Loaded speaker_recognition config: {resolved}")
+        return resolved
+
+    except Exception as e:
+        log.warning(f"Failed to load root config: {e}, using defaults")
+        return {}
+
+
 class Settings(BaseSettings):
     """Service configuration settings."""
     similarity_threshold: float = Field(default=0.15, description="Cosine similarity threshold for speaker identification (0.1-0.3 typical for ECAPA-TDNN)")
@@ -33,12 +71,44 @@ class Settings(BaseSettings):
     deepgram_base_url: str = Field(default="https://api.deepgram.com", description="Deepgram API base URL")
     hf_token: Optional[str] = Field(default=None, description="Hugging Face token for Pyannote models")
 
+    # Backend API configuration for chunked processing
+    # Loaded from root config.yml speaker_recognition section, can be overridden by env vars
+    max_diarize_duration: int = Field(
+        default=60,
+        description="Maximum audio duration (seconds) for single PyAnnote call"
+    )
+    diarize_chunk_overlap: float = Field(
+        default=5.0,
+        description="Overlap (seconds) between chunks for continuity"
+    )
+    backend_api_url: str = Field(
+        default="http://host.docker.internal:8000",
+        description="Backend API URL for fetching audio segments"
+    )
+
     class Config:
         case_sensitive = True
         env_file = ".env"
         env_file_encoding = "utf-8"
         extra = "ignore"  # Ignore extra environment variables
 
+    def __init__(self, **kwargs):
+        """Initialize settings, loading from root config.yml first, then env overrides."""
+        # Load from root config.yml
+        root_config = load_speaker_config_from_root()
+
+        # Apply root config values as defaults (only if not provided in kwargs or env)
+        if 'max_diarize_duration' not in kwargs and 'MAX_DIARIZE_DURATION' not in os.environ:
+            kwargs['max_diarize_duration'] = root_config.get('max_diarize_duration', 60)
+
+        if 'diarize_chunk_overlap' not in kwargs and 'DIARIZE_CHUNK_OVERLAP' not in os.environ:
+            kwargs['diarize_chunk_overlap'] = root_config.get('diarize_chunk_overlap', 5.0)
+
+        if 'backend_api_url' not in kwargs and 'BACKEND_API_URL' not in os.environ:
+            kwargs['backend_api_url'] = root_config.get('backend_api_url', 'http://host.docker.internal:8000')
+
+        super().__init__(**kwargs)
+
 
 # Get HF_TOKEN from environment and create settings
 hf_token = os.getenv("HF_TOKEN")
diff --git a/extras/speaker-recognition/src/simple_speaker_recognition/core/audio_backend.py b/extras/speaker-recognition/src/simple_speaker_recognition/core/audio_backend.py
index ad286c25..ec891dcc 100644
--- a/extras/speaker-recognition/src/simple_speaker_recognition/core/audio_backend.py
+++ b/extras/speaker-recognition/src/simple_speaker_recognition/core/audio_backend.py
@@ -2,6 +2,7 @@
 
 import asyncio
 import logging
+import tempfile
 from pathlib import Path
 from typing import Dict, List, Optional
 
@@ -107,33 +108,132 @@ def diarize(self, path: Path, min_speakers: Optional[int] = None, max_speakers:
         return segments
 
     async def async_diarize(self, path: Path, min_speakers: Optional[int] = None, max_speakers: Optional[int] = None,
-                           collar: float = 2.0, min_duration_off: float = 1.5) -> List[Dict]:
-        """Async wrapper for diarization.
-        
+                           collar: float = 2.0, min_duration_off: float = 1.5, max_duration: float = 60.0,
+                           chunk_overlap: float = 5.0) -> List[Dict]:
+        """
+        Async wrapper for diarization with automatic chunking for large files.
+
         Args:
             path: Path to the audio file
             min_speakers: Minimum number of speakers to detect
             max_speakers: Maximum number of speakers to detect
             collar: Gap duration (seconds) to merge between speaker segments
             min_duration_off: Minimum silence duration (seconds) before treating as segment boundary
+            max_duration: Maximum duration (seconds) per PyAnnote call - files longer than this are chunked
+            chunk_overlap: Overlap (seconds) between chunks for continuity
+
+        Returns:
+            List of speaker segments (automatically merged if chunked)
         """
-        loop = asyncio.get_running_loop()
-        return await loop.run_in_executor(None, self.diarize, path, min_speakers, max_speakers, collar, min_duration_off)
+        # Get file duration
+        file_duration = float(self.loader.get_duration(str(path)))
+
+        # If file is short enough, process in one go
+        if file_duration <= max_duration:
+            logger.info(f"Processing audio without chunking (duration={file_duration:.1f}s ≤ {max_duration}s)")
+            loop = asyncio.get_running_loop()
+            return await loop.run_in_executor(None, self.diarize, path, min_speakers, max_speakers, collar, min_duration_off)
+
+        # File is too large - chunk it
+        logger.info(f"Processing audio with chunking (duration={file_duration:.1f}s > {max_duration}s)")
+        logger.info(f"Using {int(file_duration / max_duration) + 1} chunks with {chunk_overlap}s overlap")
+
+        all_segments = []
+        current_start = 0.0
+        chunk_num = 0
+
+        while current_start < file_duration:
+            chunk_num += 1
+            chunk_duration = min(max_duration, file_duration - current_start)
+
+            # Add overlap for continuity (except for last chunk)
+            fetch_duration = chunk_duration + chunk_overlap if current_start + chunk_duration < file_duration else chunk_duration
+
+            logger.debug(f"Processing chunk {chunk_num}: start={current_start:.1f}s, duration={chunk_duration:.1f}s")
+
+            # Load audio segment
+            chunk_audio = self.load_wave(path, start=current_start, end=current_start + fetch_duration)
+
+            # Write chunk to temp file for PyAnnote
+            with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as tmp:
+                import soundfile as sf
+                # Extract tensor data and write as WAV
+                audio_tensor = chunk_audio.squeeze().cpu().numpy()
+                sf.write(tmp.name, audio_tensor, 16000)
+                chunk_path = Path(tmp.name)
+
+            try:
+                # Diarize this chunk
+                loop = asyncio.get_running_loop()
+                chunk_segments = await loop.run_in_executor(
+                    None, self.diarize, chunk_path, min_speakers, max_speakers, collar, min_duration_off
+                )
+
+                # Adjust timestamps to absolute time
+                for seg in chunk_segments:
+                    seg['start'] += current_start
+                    seg['end'] += current_start
+                    seg['duration'] = seg['end'] - seg['start']
+
+                # Only keep segments that start before the overlap cutoff
+                cutoff = current_start + chunk_duration
+                chunk_segments = [seg for seg in chunk_segments if seg['start'] < cutoff]
+
+                logger.debug(f"Chunk {chunk_num}: found {len(chunk_segments)} segments")
+                all_segments.extend(chunk_segments)
+
+            finally:
+                chunk_path.unlink(missing_ok=True)
+
+            # Move to next chunk
+            current_start += chunk_duration
+
+        logger.info(f"Chunked diarization complete: {len(all_segments)} segments before merging")
+
+        # Merge adjacent segments from same speaker
+        merged = self._merge_segments(all_segments, max_gap=2.0)
+        logger.info(f"After merging: {len(merged)} final segments")
+
+        return merged
+
+    def _merge_segments(self, segments: List[Dict], max_gap: float = 2.0) -> List[Dict]:
+        """Merge adjacent segments from same speaker."""
+        if not segments:
+            return []
+
+        segments = sorted(segments, key=lambda s: s['start'])
+        merged = []
+        current = segments[0].copy()
+
+        for next_seg in segments[1:]:
+            # Same speaker and close enough?
+            if (current['speaker'] == next_seg['speaker'] and
+                next_seg['start'] - current['end'] <= max_gap):
+                # Merge
+                current['end'] = next_seg['end']
+                current['duration'] = current['end'] - current['start']
+            else:
+                # Save current, start new
+                merged.append(current)
+                current = next_seg.copy()
+
+        merged.append(current)
+        return merged
 
     def load_wave(self, path: Path, start: Optional[float] = None, end: Optional[float] = None) -> torch.Tensor:
         if start is not None and end is not None:
             # Get audio file duration to validate segment bounds
             file_info = self.loader.get_duration(str(path))
             file_duration = float(file_info)
-            
+
             # Clamp segment bounds to file duration
             start_clamped = max(0.0, min(start, file_duration))
             end_clamped = max(start_clamped, min(end, file_duration))
-            
+
             # Log if we had to clamp the segment
             if start != start_clamped or end != end_clamped:
                 logger.warning(f"Segment [{start:.6f}s, {end:.6f}s] clamped to [{start_clamped:.6f}s, {end_clamped:.6f}s] for file duration {file_duration:.6f}s")
-            
+
             seg = Segment(start_clamped, end_clamped)
             wav, _ = self.loader.crop(str(path), seg)
         else:
diff --git a/extras/speaker-recognition/src/simple_speaker_recognition/core/backend_client.py b/extras/speaker-recognition/src/simple_speaker_recognition/core/backend_client.py
new file mode 100644
index 00000000..9eaf1e02
--- /dev/null
+++ b/extras/speaker-recognition/src/simple_speaker_recognition/core/backend_client.py
@@ -0,0 +1,131 @@
+"""Client for fetching audio from Chronicle backend."""
+
+import logging
+import time
+from typing import Optional
+
+import httpx
+
+logger = logging.getLogger(__name__)
+
+
+class BackendClient:
+    """Client for Chronicle backend API to fetch audio segments."""
+
+    def __init__(self, base_url: str, timeout: float = 30.0):
+        """
+        Initialize backend client.
+
+        Args:
+            base_url: Backend API base URL (e.g., http://host.docker.internal:8000)
+            timeout: Request timeout in seconds (default: 30.0, used for metadata)
+        """
+        self.base_url = base_url.rstrip('/')
+        self.timeout = timeout
+        
+        # Default timeout for metadata and other quick operations
+        self.default_timeout = httpx.Timeout(timeout, read=timeout)
+        
+        # Extended timeout for audio fetching (large files can take time)
+        # Connect: 10s, Read: 60s, Write: 30s, Pool: 10s
+        # TODO: Adjust read timeout based on actual measured decode times
+        self.audio_timeout = httpx.Timeout(
+            connect=10.0,
+            read=60.0,
+            write=30.0,
+            pool=10.0
+        )
+        
+        # Use default timeout for the client (will override per-request)
+        self.client = httpx.AsyncClient(timeout=self.default_timeout)
+
+    async def get_conversation_metadata(self, conversation_id: str, token: str) -> dict:
+        """
+        Get conversation metadata (duration, etc.) without loading audio.
+
+        Args:
+            conversation_id: Conversation ID
+            token: JWT token for authentication
+
+        Returns:
+            Dict with conversation_id, duration, created_at, has_audio
+
+        Raises:
+            httpx.HTTPStatusError: If request fails
+        """
+        url = f"{self.base_url}/api/conversations/{conversation_id}/metadata"
+        headers = {"Authorization": f"Bearer {token}"}
+
+        logger.debug(f"Fetching metadata for conversation {conversation_id[:12]}...")
+
+        response = await self.client.get(url, headers=headers)
+        response.raise_for_status()
+
+        metadata = response.json()
+        logger.info(
+            f"Conversation {conversation_id[:12]}: "
+            f"duration={metadata.get('duration', 0):.1f}s, "
+            f"has_audio={metadata.get('has_audio', False)}"
+        )
+
+        return metadata
+
+    async def get_audio_segment(
+        self,
+        conversation_id: str,
+        token: str,
+        start: float = 0.0,
+        duration: Optional[float] = None
+    ) -> bytes:
+        """
+        Get audio segment as WAV bytes.
+
+        Args:
+            conversation_id: Conversation ID
+            token: JWT token for authentication
+            start: Start time in seconds (default: 0.0)
+            duration: Duration in seconds (if None, returns all audio from start)
+
+        Returns:
+            WAV audio bytes
+
+        Raises:
+            httpx.HTTPStatusError: If request fails
+        """
+        url = f"{self.base_url}/api/conversations/{conversation_id}/audio-segments"
+        params = {"start": start}
+        if duration is not None:
+            params["duration"] = duration
+        headers = {"Authorization": f"Bearer {token}"}
+
+        logger.debug(
+            f"Fetching audio segment: conversation={conversation_id[:12]}, "
+            f"start={start:.1f}s, duration={duration or 'all'}s"
+        )
+
+        fetch_start = time.time()
+        
+        # Use extended timeout for audio fetching (large files can take time)
+        response = await self.client.get(
+            url, 
+            params=params, 
+            headers=headers,
+            timeout=self.audio_timeout
+        )
+        response.raise_for_status()
+
+        wav_bytes = response.content
+        fetch_time = time.time() - fetch_start
+        
+        logger.info(
+            f"Fetched audio segment: {len(wav_bytes) / 1024 / 1024:.2f} MB "
+            f"in {fetch_time:.2f}s (conversation={conversation_id[:12]}, "
+            f"start={start:.1f}s, duration={duration or 'all'}s)"
+        )
+
+        return wav_bytes
+
+    async def close(self):
+        """Close HTTP client and release resources."""
+        await self.client.aclose()
+        logger.debug("Backend client closed")
diff --git a/extras/speaker-recognition/tests/test_speaker_service_integration.py b/extras/speaker-recognition/tests/test_speaker_service_integration.py
index 58e55b61..1ee60bd7 100644
--- a/extras/speaker-recognition/tests/test_speaker_service_integration.py
+++ b/extras/speaker-recognition/tests/test_speaker_service_integration.py
@@ -276,7 +276,7 @@ def test_speaker_recognition_pipeline(speaker_service):
     # Phase 6: Conversation Processing (Basic API Functionality)
     print("🗣️ Phase 6: Conversation processing...")
     print("  Note: Testing API functionality, not requiring perfect speaker identification")
-    
+
     with open(conversation_file, "rb") as f:
         files = {"file": (conversation_file.name, f, "audio/wav")}
         params = {
@@ -286,26 +286,26 @@ def test_speaker_recognition_pipeline(speaker_service):
             "min_speakers": "1",
             "max_speakers": "4",
         }
-        
+
         print(f"  Processing conversation audio (file size: {conversation_file.stat().st_size / (1024*1024):.1f}MB)...")
         r = requests.post(f"{SPEAKER_SERVICE_URL}/diarize-and-identify", files=files, params=params, timeout=300)
-    
+
     assert r.status_code == 200, f"Conversation processing failed: {r.status_code} {r.text[:500]}"
     result = r.json()
-    
+
     # Basic structure validation
     assert "segments" in result, "No segments field in response"
     assert isinstance(result["segments"], list), "Segments is not a list"
     assert len(result["segments"]) > 0, "No segments found in conversation"
-    
+
     # Count identified vs unknown segments
     identified_segments = 0
     total_segments = len(result["segments"])
     identified_speakers = set()
-    
+
     for seg in result["segments"]:
         assert "start" in seg and "end" in seg and "speaker" in seg, "Invalid segment structure"
-        
+
         # Check if speaker was identified (correct field names)
         if seg.get("status") == "identified" and seg.get("identified_id"):
             identified_segments += 1
@@ -314,15 +314,105 @@ def test_speaker_recognition_pipeline(speaker_service):
             confidence = seg.get("confidence", 0.0)
             identified_speakers.add(speaker_id)
             print(f"    Segment identified: {speaker_name} ({speaker_id}) confidence={confidence:.3f}")
-    
+
     print(f"  ✅ Found {total_segments} segments, {identified_segments} with speaker identification")
     print(f"  ✅ Identified speakers: {identified_speakers}")
-    
+
     # Success criteria: API works and produces valid output
     # We don't require perfect speaker identification since that depends on audio quality
     assert total_segments > 0, "No segments produced"
     print("✅ Conversation processing API works correctly")
-    
+
+    # Phase 7: Word-Level Data Validation
+    print("📝 Phase 7: Validating word-level timestamp data in segments...")
+    segments_with_words = 0
+    total_words_found = 0
+
+    for seg in result["segments"]:
+        # Each segment should have a words array (empty segments might have empty array)
+        assert "words" in seg, f"Segment missing 'words' field: {seg}"
+        words = seg.get("words", [])
+
+        if len(words) > 0:
+            segments_with_words += 1
+            total_words_found += len(words)
+
+            # Validate word structure
+            for word in words[:3]:  # Check first 3 words of each segment
+                assert "word" in word, f"Word missing 'word' field: {word}"
+                assert "start" in word, f"Word missing 'start' field: {word}"
+                assert "end" in word, f"Word missing 'end' field: {word}"
+                # confidence is optional
+                assert isinstance(word["start"], (int, float)), f"Word 'start' should be numeric: {word}"
+                assert isinstance(word["end"], (int, float)), f"Word 'end' should be numeric: {word}"
+
+    print(f"  ✅ Word-level data: {segments_with_words}/{total_segments} segments have words ({total_words_found} total words)")
+    assert segments_with_words > 0, "No segments contain word-level timestamp data"
+    assert total_words_found > 0, "No words found across all segments"
+    print("✅ Word-level timestamp data validated successfully")
+
+    # Phase 8: Diarize-Identify-Match Endpoint (Backend Integration Mode)
+    print("🔗 Phase 8: Testing /v1/diarize-identify-match endpoint (backend integration mode)...")
+    print("  This endpoint matches transcript words to diarization segments")
+
+    # Create sample transcript_data with word-level timestamps
+    sample_transcript_data = {
+        "text": "Hello everyone. This is a test conversation.",
+        "words": [
+            {"word": "Hello", "start": 0.5, "end": 0.8, "confidence": 0.95},
+            {"word": "everyone", "start": 0.9, "end": 1.3, "confidence": 0.92},
+            {"word": "This", "start": 2.0, "end": 2.2, "confidence": 0.96},
+            {"word": "is", "start": 2.3, "end": 2.4, "confidence": 0.98},
+            {"word": "a", "start": 2.5, "end": 2.6, "confidence": 0.97},
+            {"word": "test", "start": 2.7, "end": 3.0, "confidence": 0.94},
+            {"word": "conversation", "start": 3.1, "end": 3.8, "confidence": 0.93},
+        ]
+    }
+
+    with open(conversation_file, "rb") as f:
+        files = {"file": (conversation_file.name, f, "audio/wav")}
+        data = {
+            "transcript_data": json.dumps(sample_transcript_data),
+            "user_id": "1",
+            "min_duration": "1.0",
+            "similarity_threshold": "0.10",
+            "min_speakers": "1",
+            "max_speakers": "4",
+        }
+
+        r = requests.post(f"{SPEAKER_SERVICE_URL}/v1/diarize-identify-match", files=files, data=data, timeout=300)
+
+    assert r.status_code == 200, f"/v1/diarize-identify-match failed: {r.status_code} {r.text[:500]}"
+    match_result = r.json()
+
+    # Validate response structure
+    assert "segments" in match_result, "No segments in diarize-identify-match response"
+    match_segments = match_result["segments"]
+    assert len(match_segments) > 0, "No segments returned from diarize-identify-match"
+
+    # Validate that segments contain matched words
+    match_segments_with_words = 0
+    match_total_words = 0
+
+    for seg in match_segments:
+        assert "text" in seg, "Segment missing 'text' field"
+        assert "words" in seg, "Segment missing 'words' field (should include matched transcript words)"
+        words = seg.get("words", [])
+
+        if len(words) > 0:
+            match_segments_with_words += 1
+            match_total_words += len(words)
+
+            # Validate word structure matches input transcript_data
+            for word in words[:2]:  # Check first 2 words
+                assert "word" in word, f"Word missing 'word' field: {word}"
+                assert "start" in word, f"Word missing 'start' field: {word}"
+                assert "end" in word, f"Word missing 'end' field: {word}"
+
+    print(f"  ✅ Diarize-identify-match: {match_segments_with_words}/{len(match_segments)} segments have matched words ({match_total_words} total)")
+    assert match_segments_with_words > 0, "No segments contain matched transcript words"
+    print("✅ /v1/diarize-identify-match endpoint validated successfully")
+
     # Final Summary
     print("=" * 80)
     print("🎉 SPEAKER RECOGNITION INTEGRATION TEST COMPLETED SUCCESSFULLY")
@@ -333,6 +423,8 @@ def test_speaker_recognition_pipeline(speaker_service):
     print(f"✅ Database persistence: PASS (2 speakers)")
     print(f"✅ Individual identification: PASS (both speakers)")
     print(f"✅ Conversation processing: PASS ({total_segments} segments, {identified_segments} identified)")
+    print(f"✅ Word-level timestamps: PASS ({total_words_found} words in {segments_with_words} segments)")
+    print(f"✅ Diarize-identify-match: PASS ({match_total_words} matched words in {match_segments_with_words} segments)")
     print("=" * 80)
 
 
diff --git a/extras/speaker-recognition/uv.lock b/extras/speaker-recognition/uv.lock
index 40c9b4a0..f58d89de 100644
--- a/extras/speaker-recognition/uv.lock
+++ b/extras/speaker-recognition/uv.lock
@@ -1,5 +1,5 @@
 version = 1
-revision = 2
+revision = 3
 requires-python = ">=3.10"
 resolution-markers = [
     "(python_full_version >= '3.13' and platform_machine != 'aarch64' and extra != 'extra-26-simple-speaker-recognition-cpu' and extra != 'extra-26-simple-speaker-recognition-cu121' and extra != 'extra-26-simple-speaker-recognition-cu126' and extra == 'extra-26-simple-speaker-recognition-cu128') or (python_full_version >= '3.13' and sys_platform != 'linux' and extra != 'extra-26-simple-speaker-recognition-cpu' and extra != 'extra-26-simple-speaker-recognition-cu121' and extra != 'extra-26-simple-speaker-recognition-cu126' and extra == 'extra-26-simple-speaker-recognition-cu128')",
@@ -1144,6 +1144,8 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/7f/91/ae2eb6b7979e2f9b035a9f612cf70f1bf54aad4e1d125129bef1eae96f19/greenlet-3.2.4-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c2ca18a03a8cfb5b25bc1cbe20f3d9a4c80d8c3b13ba3df49ac3961af0b1018d", size = 584358, upload-time = "2025-08-07T13:18:23.708Z" },
     { url = "https://files.pythonhosted.org/packages/f7/85/433de0c9c0252b22b16d413c9407e6cb3b41df7389afc366ca204dbc1393/greenlet-3.2.4-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:9fe0a28a7b952a21e2c062cd5756d34354117796c6d9215a87f55e38d15402c5", size = 1113550, upload-time = "2025-08-07T13:42:37.467Z" },
     { url = "https://files.pythonhosted.org/packages/a1/8d/88f3ebd2bc96bf7747093696f4335a0a8a4c5acfcf1b757717c0d2474ba3/greenlet-3.2.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:8854167e06950ca75b898b104b63cc646573aa5fef1353d4508ecdd1ee76254f", size = 1137126, upload-time = "2025-08-07T13:18:20.239Z" },
+    { url = "https://files.pythonhosted.org/packages/f1/29/74242b7d72385e29bcc5563fba67dad94943d7cd03552bac320d597f29b2/greenlet-3.2.4-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:f47617f698838ba98f4ff4189aef02e7343952df3a615f847bb575c3feb177a7", size = 1544904, upload-time = "2025-11-04T12:42:04.763Z" },
+    { url = "https://files.pythonhosted.org/packages/c8/e2/1572b8eeab0f77df5f6729d6ab6b141e4a84ee8eb9bc8c1e7918f94eda6d/greenlet-3.2.4-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:af41be48a4f60429d5cad9d22175217805098a9ef7c40bfef44f7669fb9d74d8", size = 1611228, upload-time = "2025-11-04T12:42:08.423Z" },
     { url = "https://files.pythonhosted.org/packages/d6/6f/b60b0291d9623c496638c582297ead61f43c4b72eef5e9c926ef4565ec13/greenlet-3.2.4-cp310-cp310-win_amd64.whl", hash = "sha256:73f49b5368b5359d04e18d15828eecc1806033db5233397748f4ca813ff1056c", size = 298654, upload-time = "2025-08-07T13:50:00.469Z" },
     { url = "https://files.pythonhosted.org/packages/a4/de/f28ced0a67749cac23fecb02b694f6473f47686dff6afaa211d186e2ef9c/greenlet-3.2.4-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:96378df1de302bc38e99c3a9aa311967b7dc80ced1dcc6f171e99842987882a2", size = 272305, upload-time = "2025-08-07T13:15:41.288Z" },
     { url = "https://files.pythonhosted.org/packages/09/16/2c3792cba130000bf2a31c5272999113f4764fd9d874fb257ff588ac779a/greenlet-3.2.4-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:1ee8fae0519a337f2329cb78bd7a8e128ec0f881073d43f023c7b8d4831d5246", size = 632472, upload-time = "2025-08-07T13:42:55.044Z" },
@@ -1153,6 +1155,8 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/1f/8e/abdd3f14d735b2929290a018ecf133c901be4874b858dd1c604b9319f064/greenlet-3.2.4-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2523e5246274f54fdadbce8494458a2ebdcdbc7b802318466ac5606d3cded1f8", size = 587684, upload-time = "2025-08-07T13:18:25.164Z" },
     { url = "https://files.pythonhosted.org/packages/5d/65/deb2a69c3e5996439b0176f6651e0052542bb6c8f8ec2e3fba97c9768805/greenlet-3.2.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:1987de92fec508535687fb807a5cea1560f6196285a4cde35c100b8cd632cc52", size = 1116647, upload-time = "2025-08-07T13:42:38.655Z" },
     { url = "https://files.pythonhosted.org/packages/3f/cc/b07000438a29ac5cfb2194bfc128151d52f333cee74dd7dfe3fb733fc16c/greenlet-3.2.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:55e9c5affaa6775e2c6b67659f3a71684de4c549b3dd9afca3bc773533d284fa", size = 1142073, upload-time = "2025-08-07T13:18:21.737Z" },
+    { url = "https://files.pythonhosted.org/packages/67/24/28a5b2fa42d12b3d7e5614145f0bd89714c34c08be6aabe39c14dd52db34/greenlet-3.2.4-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:c9c6de1940a7d828635fbd254d69db79e54619f165ee7ce32fda763a9cb6a58c", size = 1548385, upload-time = "2025-11-04T12:42:11.067Z" },
+    { url = "https://files.pythonhosted.org/packages/6a/05/03f2f0bdd0b0ff9a4f7b99333d57b53a7709c27723ec8123056b084e69cd/greenlet-3.2.4-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:03c5136e7be905045160b1b9fdca93dd6727b180feeafda6818e6496434ed8c5", size = 1613329, upload-time = "2025-11-04T12:42:12.928Z" },
     { url = "https://files.pythonhosted.org/packages/d8/0f/30aef242fcab550b0b3520b8e3561156857c94288f0332a79928c31a52cf/greenlet-3.2.4-cp311-cp311-win_amd64.whl", hash = "sha256:9c40adce87eaa9ddb593ccb0fa6a07caf34015a29bf8d344811665b573138db9", size = 299100, upload-time = "2025-08-07T13:44:12.287Z" },
     { url = "https://files.pythonhosted.org/packages/44/69/9b804adb5fd0671f367781560eb5eb586c4d495277c93bde4307b9e28068/greenlet-3.2.4-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:3b67ca49f54cede0186854a008109d6ee71f66bd57bb36abd6d0a0267b540cdd", size = 274079, upload-time = "2025-08-07T13:15:45.033Z" },
     { url = "https://files.pythonhosted.org/packages/46/e9/d2a80c99f19a153eff70bc451ab78615583b8dac0754cfb942223d2c1a0d/greenlet-3.2.4-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ddf9164e7a5b08e9d22511526865780a576f19ddd00d62f8a665949327fde8bb", size = 640997, upload-time = "2025-08-07T13:42:56.234Z" },
@@ -1162,6 +1166,8 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/19/0d/6660d55f7373b2ff8152401a83e02084956da23ae58cddbfb0b330978fe9/greenlet-3.2.4-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:3b3812d8d0c9579967815af437d96623f45c0f2ae5f04e366de62a12d83a8fb0", size = 607586, upload-time = "2025-08-07T13:18:28.544Z" },
     { url = "https://files.pythonhosted.org/packages/8e/1a/c953fdedd22d81ee4629afbb38d2f9d71e37d23caace44775a3a969147d4/greenlet-3.2.4-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:abbf57b5a870d30c4675928c37278493044d7c14378350b3aa5d484fa65575f0", size = 1123281, upload-time = "2025-08-07T13:42:39.858Z" },
     { url = "https://files.pythonhosted.org/packages/3f/c7/12381b18e21aef2c6bd3a636da1088b888b97b7a0362fac2e4de92405f97/greenlet-3.2.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:20fb936b4652b6e307b8f347665e2c615540d4b42b3b4c8a321d8286da7e520f", size = 1151142, upload-time = "2025-08-07T13:18:22.981Z" },
+    { url = "https://files.pythonhosted.org/packages/27/45/80935968b53cfd3f33cf99ea5f08227f2646e044568c9b1555b58ffd61c2/greenlet-3.2.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:ee7a6ec486883397d70eec05059353b8e83eca9168b9f3f9a361971e77e0bcd0", size = 1564846, upload-time = "2025-11-04T12:42:15.191Z" },
+    { url = "https://files.pythonhosted.org/packages/69/02/b7c30e5e04752cb4db6202a3858b149c0710e5453b71a3b2aec5d78a1aab/greenlet-3.2.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:326d234cbf337c9c3def0676412eb7040a35a768efc92504b947b3e9cfc7543d", size = 1633814, upload-time = "2025-11-04T12:42:17.175Z" },
     { url = "https://files.pythonhosted.org/packages/e9/08/b0814846b79399e585f974bbeebf5580fbe59e258ea7be64d9dfb253c84f/greenlet-3.2.4-cp312-cp312-win_amd64.whl", hash = "sha256:a7d4e128405eea3814a12cc2605e0e6aedb4035bf32697f72deca74de4105e02", size = 299899, upload-time = "2025-08-07T13:38:53.448Z" },
     { url = "https://files.pythonhosted.org/packages/49/e8/58c7f85958bda41dafea50497cbd59738c5c43dbbea5ee83d651234398f4/greenlet-3.2.4-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:1a921e542453fe531144e91e1feedf12e07351b1cf6c9e8a3325ea600a715a31", size = 272814, upload-time = "2025-08-07T13:15:50.011Z" },
     { url = "https://files.pythonhosted.org/packages/62/dd/b9f59862e9e257a16e4e610480cfffd29e3fae018a68c2332090b53aac3d/greenlet-3.2.4-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:cd3c8e693bff0fff6ba55f140bf390fa92c994083f838fece0f63be121334945", size = 641073, upload-time = "2025-08-07T13:42:57.23Z" },
@@ -1171,6 +1177,8 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/ee/43/3cecdc0349359e1a527cbf2e3e28e5f8f06d3343aaf82ca13437a9aa290f/greenlet-3.2.4-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:23768528f2911bcd7e475210822ffb5254ed10d71f4028387e5a99b4c6699671", size = 610497, upload-time = "2025-08-07T13:18:31.636Z" },
     { url = "https://files.pythonhosted.org/packages/b8/19/06b6cf5d604e2c382a6f31cafafd6f33d5dea706f4db7bdab184bad2b21d/greenlet-3.2.4-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:00fadb3fedccc447f517ee0d3fd8fe49eae949e1cd0f6a611818f4f6fb7dc83b", size = 1121662, upload-time = "2025-08-07T13:42:41.117Z" },
     { url = "https://files.pythonhosted.org/packages/a2/15/0d5e4e1a66fab130d98168fe984c509249c833c1a3c16806b90f253ce7b9/greenlet-3.2.4-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:d25c5091190f2dc0eaa3f950252122edbbadbb682aa7b1ef2f8af0f8c0afefae", size = 1149210, upload-time = "2025-08-07T13:18:24.072Z" },
+    { url = "https://files.pythonhosted.org/packages/1c/53/f9c440463b3057485b8594d7a638bed53ba531165ef0ca0e6c364b5cc807/greenlet-3.2.4-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:6e343822feb58ac4d0a1211bd9399de2b3a04963ddeec21530fc426cc121f19b", size = 1564759, upload-time = "2025-11-04T12:42:19.395Z" },
+    { url = "https://files.pythonhosted.org/packages/47/e4/3bb4240abdd0a8d23f4f88adec746a3099f0d86bfedb623f063b2e3b4df0/greenlet-3.2.4-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:ca7f6f1f2649b89ce02f6f229d7c19f680a6238af656f61e0115b24857917929", size = 1634288, upload-time = "2025-11-04T12:42:21.174Z" },
     { url = "https://files.pythonhosted.org/packages/0b/55/2321e43595e6801e105fcfdee02b34c0f996eb71e6ddffca6b10b7e1d771/greenlet-3.2.4-cp313-cp313-win_amd64.whl", hash = "sha256:554b03b6e73aaabec3745364d6239e9e012d64c68ccd0b8430c64ccc14939a8b", size = 299685, upload-time = "2025-08-07T13:24:38.824Z" },
     { url = "https://files.pythonhosted.org/packages/22/5c/85273fd7cc388285632b0498dbbab97596e04b154933dfe0f3e68156c68c/greenlet-3.2.4-cp314-cp314-macosx_11_0_universal2.whl", hash = "sha256:49a30d5fda2507ae77be16479bdb62a660fa51b1eb4928b524975b3bde77b3c0", size = 273586, upload-time = "2025-08-07T13:16:08.004Z" },
     { url = "https://files.pythonhosted.org/packages/d1/75/10aeeaa3da9332c2e761e4c50d4c3556c21113ee3f0afa2cf5769946f7a3/greenlet-3.2.4-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:299fd615cd8fc86267b47597123e3f43ad79c9d8a22bebdce535e53550763e2f", size = 686346, upload-time = "2025-08-07T13:42:59.944Z" },
@@ -1178,6 +1186,8 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/dc/8b/29aae55436521f1d6f8ff4e12fb676f3400de7fcf27fccd1d4d17fd8fecd/greenlet-3.2.4-cp314-cp314-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:b4a1870c51720687af7fa3e7cda6d08d801dae660f75a76f3845b642b4da6ee1", size = 694659, upload-time = "2025-08-07T13:53:17.759Z" },
     { url = "https://files.pythonhosted.org/packages/92/2e/ea25914b1ebfde93b6fc4ff46d6864564fba59024e928bdc7de475affc25/greenlet-3.2.4-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:061dc4cf2c34852b052a8620d40f36324554bc192be474b9e9770e8c042fd735", size = 695355, upload-time = "2025-08-07T13:18:34.517Z" },
     { url = "https://files.pythonhosted.org/packages/72/60/fc56c62046ec17f6b0d3060564562c64c862948c9d4bc8aa807cf5bd74f4/greenlet-3.2.4-cp314-cp314-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:44358b9bf66c8576a9f57a590d5f5d6e72fa4228b763d0e43fee6d3b06d3a337", size = 657512, upload-time = "2025-08-07T13:18:33.969Z" },
+    { url = "https://files.pythonhosted.org/packages/23/6e/74407aed965a4ab6ddd93a7ded3180b730d281c77b765788419484cdfeef/greenlet-3.2.4-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:2917bdf657f5859fbf3386b12d68ede4cf1f04c90c3a6bc1f013dd68a22e2269", size = 1612508, upload-time = "2025-11-04T12:42:23.427Z" },
+    { url = "https://files.pythonhosted.org/packages/0d/da/343cd760ab2f92bac1845ca07ee3faea9fe52bee65f7bcb19f16ad7de08b/greenlet-3.2.4-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:015d48959d4add5d6c9f6c5210ee3803a830dce46356e3bc326d6776bde54681", size = 1680760, upload-time = "2025-11-04T12:42:25.341Z" },
     { url = "https://files.pythonhosted.org/packages/e3/a5/6ddab2b4c112be95601c13428db1d8b6608a8b6039816f2ba09c346c08fc/greenlet-3.2.4-cp314-cp314-win_amd64.whl", hash = "sha256:e37ab26028f12dbb0ff65f29a8d3d44a765c61e729647bf2ddfbbed621726f01", size = 303425, upload-time = "2025-08-07T13:32:27.59Z" },
 ]
 
@@ -3028,8 +3038,8 @@ name = "omegaconf"
 version = "2.3.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "antlr4-python3-runtime", marker = "extra == 'extra-26-simple-speaker-recognition-cu121' or (extra == 'extra-26-simple-speaker-recognition-cpu' and extra == 'extra-26-simple-speaker-recognition-cu126') or (extra == 'extra-26-simple-speaker-recognition-cpu' and extra == 'extra-26-simple-speaker-recognition-cu128') or (extra == 'extra-26-simple-speaker-recognition-cu126' and extra == 'extra-26-simple-speaker-recognition-cu128')" },
-    { name = "pyyaml", marker = "extra == 'extra-26-simple-speaker-recognition-cu121' or (extra == 'extra-26-simple-speaker-recognition-cpu' and extra == 'extra-26-simple-speaker-recognition-cu126') or (extra == 'extra-26-simple-speaker-recognition-cpu' and extra == 'extra-26-simple-speaker-recognition-cu128') or (extra == 'extra-26-simple-speaker-recognition-cu126' and extra == 'extra-26-simple-speaker-recognition-cu128')" },
+    { name = "antlr4-python3-runtime" },
+    { name = "pyyaml" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/09/48/6388f1bb9da707110532cb70ec4d2822858ddfb44f1cdf1233c20a80ea4b/omegaconf-2.3.0.tar.gz", hash = "sha256:d5d4b6d29955cc50ad50c46dc269bcd92c6e00f5f90d23ab5fee7bfca4ba4cc7", size = 3298120, upload-time = "2022-12-08T20:59:22.753Z" }
 wheels = [
@@ -4440,6 +4450,8 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/6b/fa/3234f913fe9a6525a7b97c6dad1f51e72b917e6872e051a5e2ffd8b16fbb/ruamel.yaml.clib-0.2.14-cp314-cp314-macosx_15_0_arm64.whl", hash = "sha256:70eda7703b8126f5e52fcf276e6c0f40b0d314674f896fc58c47b0aef2b9ae83", size = 137970, upload-time = "2025-09-22T19:51:09.472Z" },
     { url = "https://files.pythonhosted.org/packages/ef/ec/4edbf17ac2c87fa0845dd366ef8d5852b96eb58fcd65fc1ecf5fe27b4641/ruamel.yaml.clib-0.2.14-cp314-cp314-musllinux_1_2_i686.whl", hash = "sha256:a0cb71ccc6ef9ce36eecb6272c81afdc2f565950cdcec33ae8e6cd8f7fc86f27", size = 739639, upload-time = "2025-09-22T19:51:10.566Z" },
     { url = "https://files.pythonhosted.org/packages/15/18/b0e1fafe59051de9e79cdd431863b03593ecfa8341c110affad7c8121efc/ruamel.yaml.clib-0.2.14-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:e7cb9ad1d525d40f7d87b6df7c0ff916a66bc52cb61b66ac1b2a16d0c1b07640", size = 764456, upload-time = "2025-09-22T19:51:11.736Z" },
+    { url = "https://files.pythonhosted.org/packages/e7/cd/150fdb96b8fab27fe08d8a59fe67554568727981806e6bc2677a16081ec7/ruamel_yaml_clib-0.2.14-cp314-cp314-win32.whl", hash = "sha256:9b4104bf43ca0cd4e6f738cb86326a3b2f6eef00f417bd1e7efb7bdffe74c539", size = 102394, upload-time = "2025-11-14T21:57:36.703Z" },
+    { url = "https://files.pythonhosted.org/packages/bd/e6/a3fa40084558c7e1dc9546385f22a93949c890a8b2e445b2ba43935f51da/ruamel_yaml_clib-0.2.14-cp314-cp314-win_amd64.whl", hash = "sha256:13997d7d354a9890ea1ec5937a219817464e5cc344805b37671562a401ca3008", size = 122673, upload-time = "2025-11-14T21:57:38.177Z" },
 ]
 
 [[package]]
@@ -4779,8 +4791,10 @@ dependencies = [
     { name = "easy-audio-interfaces" },
     { name = "faiss-cpu" },
     { name = "fastapi" },
+    { name = "httpx" },
     { name = "librosa" },
     { name = "matplotlib" },
+    { name = "omegaconf" },
     { name = "pandas" },
     { name = "plotly" },
     { name = "pyannote-audio", version = "3.4.0", source = { registry = "https://pypi.org/simple" }, marker = "extra == 'extra-26-simple-speaker-recognition-cu121' or (extra == 'extra-26-simple-speaker-recognition-cpu' and extra == 'extra-26-simple-speaker-recognition-cu126') or (extra == 'extra-26-simple-speaker-recognition-cpu' and extra == 'extra-26-simple-speaker-recognition-cu128') or (extra == 'extra-26-simple-speaker-recognition-cu126' and extra == 'extra-26-simple-speaker-recognition-cu128')" },
@@ -4789,6 +4803,7 @@ dependencies = [
     { name = "pydantic-settings" },
     { name = "pydub" },
     { name = "python-multipart" },
+    { name = "pyyaml" },
     { name = "scikit-learn" },
     { name = "scipy", version = "1.15.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11' or (extra == 'extra-26-simple-speaker-recognition-cpu' and extra == 'extra-26-simple-speaker-recognition-cu121') or (extra == 'extra-26-simple-speaker-recognition-cpu' and extra == 'extra-26-simple-speaker-recognition-cu126') or (extra == 'extra-26-simple-speaker-recognition-cpu' and extra == 'extra-26-simple-speaker-recognition-cu128') or (extra == 'extra-26-simple-speaker-recognition-cu121' and extra == 'extra-26-simple-speaker-recognition-cu126') or (extra == 'extra-26-simple-speaker-recognition-cu121' and extra == 'extra-26-simple-speaker-recognition-cu128') or (extra == 'extra-26-simple-speaker-recognition-cu126' and extra == 'extra-26-simple-speaker-recognition-cu128')" },
     { name = "scipy", version = "1.16.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11' or (extra == 'extra-26-simple-speaker-recognition-cpu' and extra == 'extra-26-simple-speaker-recognition-cu121') or (extra == 'extra-26-simple-speaker-recognition-cpu' and extra == 'extra-26-simple-speaker-recognition-cu126') or (extra == 'extra-26-simple-speaker-recognition-cpu' and extra == 'extra-26-simple-speaker-recognition-cu128') or (extra == 'extra-26-simple-speaker-recognition-cu121' and extra == 'extra-26-simple-speaker-recognition-cu126') or (extra == 'extra-26-simple-speaker-recognition-cu121' and extra == 'extra-26-simple-speaker-recognition-cu128') or (extra == 'extra-26-simple-speaker-recognition-cu126' and extra == 'extra-26-simple-speaker-recognition-cu128')" },
@@ -4844,8 +4859,10 @@ requires-dist = [
     { name = "easy-audio-interfaces", extras = ["local-audio"], marker = "extra == 'local-audio'", specifier = ">=0.7.1" },
     { name = "faiss-cpu", specifier = ">=1.9" },
     { name = "fastapi", specifier = ">=0.115.12" },
+    { name = "httpx", specifier = ">=0.27.0" },
     { name = "librosa", specifier = ">=0.10.0" },
     { name = "matplotlib", specifier = ">=3.8.0" },
+    { name = "omegaconf", specifier = ">=2.3.0" },
     { name = "pandas", specifier = ">=2.0.0" },
     { name = "plotly", specifier = ">=5.18.0" },
     { name = "pyannote-audio", specifier = ">=3.3.2" },
@@ -4853,6 +4870,7 @@ requires-dist = [
     { name = "pydantic-settings", specifier = ">=2.10.1" },
     { name = "pydub", specifier = ">=0.25.1" },
     { name = "python-multipart", specifier = ">=0.0.6" },
+    { name = "pyyaml", specifier = ">=6.0.0" },
     { name = "scikit-learn", specifier = ">=1.4.0" },
     { name = "scipy", specifier = ">=1.10.0" },
     { name = "soundfile", specifier = ">=0.12" },
@@ -5133,7 +5151,7 @@ resolution-markers = [
     "python_full_version < '3.11' and extra != 'extra-26-simple-speaker-recognition-cpu' and extra != 'extra-26-simple-speaker-recognition-cu121' and extra != 'extra-26-simple-speaker-recognition-cu126' and extra != 'extra-26-simple-speaker-recognition-cu128'",
 ]
 dependencies = [
-    { name = "mpmath" },
+    { name = "mpmath", marker = "extra == 'extra-26-simple-speaker-recognition-cpu' or extra != 'extra-26-simple-speaker-recognition-cu121' or (extra == 'extra-26-simple-speaker-recognition-cu121' and extra == 'extra-26-simple-speaker-recognition-cu126') or (extra == 'extra-26-simple-speaker-recognition-cu121' and extra == 'extra-26-simple-speaker-recognition-cu128')" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/83/d3/803453b36afefb7c2bb238361cd4ae6125a569b4db67cd9e79846ba2d68c/sympy-1.14.0.tar.gz", hash = "sha256:d3d3fe8df1e5a0b42f0e7bdf50541697dbe7d23746e894990c030e2b05e72517", size = 7793921, upload-time = "2025-04-27T18:05:01.611Z" }
 wheels = [
diff --git a/run-test.sh b/run-test.sh
deleted file mode 100755
index ebc39a07..00000000
--- a/run-test.sh
+++ /dev/null
@@ -1,113 +0,0 @@
-#!/bin/bash
-
-# Chronicle Local Test Runner
-# Runs the same tests as GitHub CI but configured for local development
-# Usage: ./run-test.sh [advanced-backend|speaker-recognition|all]
-
-set -e
-
-# Colors for output
-RED='\033[0;31m'
-GREEN='\033[0;32m'
-YELLOW='\033[1;33m'
-BLUE='\033[0;34m'
-NC='\033[0m' # No Color
-
-# Print colored output
-print_info() {
-    echo -e "${BLUE}[INFO]${NC} $1"
-}
-
-print_success() {
-    echo -e "${GREEN}[SUCCESS]${NC} $1"
-}
-
-print_warning() {
-    echo -e "${YELLOW}[WARNING]${NC} $1"
-}
-
-print_error() {
-    echo -e "${RED}[ERROR]${NC} $1"
-}
-
-# Function to run advanced backend tests
-run_advanced_backend_tests() {
-    print_info "Running Advanced Backend Integration Tests..."
-    
-    if [ ! -f "backends/advanced/run-test.sh" ]; then
-        print_error "backends/advanced/run-test.sh not found!"
-        return 1
-    fi
-    
-    cd backends/advanced
-    ./run-test.sh
-    cd ../..
-    
-    print_success "Advanced Backend tests completed"
-}
-
-# Function to run speaker recognition tests
-run_speaker_recognition_tests() {
-    print_info "Running Speaker Recognition Tests..."
-    
-    if [ ! -f "extras/speaker-recognition/run-test.sh" ]; then
-        print_error "extras/speaker-recognition/run-test.sh not found!"
-        return 1
-    fi
-    
-    cd extras/speaker-recognition
-    ./run-test.sh
-    cd ../..
-    
-    print_success "Speaker Recognition tests completed"
-}
-
-# Main execution
-print_info "Chronicle Local Test Runner"
-print_info "=============================="
-
-# Check if we're in the right directory
-if [ ! -f "CLAUDE.md" ]; then
-    print_error "Please run this script from the chronicle root directory"
-    exit 1
-fi
-
-# Parse command line argument
-TEST_SUITE="${1:-all}"
-
-case "$TEST_SUITE" in
-    "advanced-backend")
-        run_advanced_backend_tests
-        ;;
-    "speaker-recognition")
-        run_speaker_recognition_tests
-        ;;
-    "all")
-        print_info "Running all test suites..."
-        
-        # Run advanced backend tests
-        if run_advanced_backend_tests; then
-            print_success "Advanced Backend tests: PASSED"
-        else
-            print_error "Advanced Backend tests: FAILED"
-            exit 1
-        fi
-        
-        # Run speaker recognition tests
-        if run_speaker_recognition_tests; then
-            print_success "Speaker Recognition tests: PASSED"
-        else
-            print_error "Speaker Recognition tests: FAILED"
-            exit 1
-        fi
-        
-        print_success "All test suites completed successfully!"
-        ;;
-    *)
-        print_error "Unknown test suite: $TEST_SUITE"
-        echo "Usage: $0 [advanced-backend|speaker-recognition|all]"
-        exit 1
-        ;;
-esac
-
-print_success "Test execution completed!"
\ No newline at end of file
diff --git a/services.py b/services.py
index b4ed44fd..35d11264 100755
--- a/services.py
+++ b/services.py
@@ -80,19 +80,99 @@ def run_compose_command(service_name, command, build=False):
         console.print(f"[red]❌ Docker compose file not found: {compose_file}[/red]")
         return False
 
+    # Step 1: If build is requested, run build separately first (no timeout for CUDA builds)
+    if build and command == 'up':
+        # Build command - need to specify profiles for build too
+        build_cmd = ['docker', 'compose']
+
+        # Add profiles to build command (needed for profile-specific services)
+        if service_name == 'backend':
+            caddyfile_path = service_path / 'Caddyfile'
+            if caddyfile_path.exists() and caddyfile_path.is_file():
+                build_cmd.extend(['--profile', 'https'])
+
+            obsidian_enabled = False
+            config_data = load_config_yml()
+            if config_data:
+                memory_config = config_data.get('memory', {})
+                obsidian_config = memory_config.get('obsidian', {})
+                if obsidian_config.get('enabled', False):
+                    obsidian_enabled = True
+
+            if not obsidian_enabled:
+                env_file = service_path / '.env'
+                if env_file.exists():
+                    env_values = dotenv_values(env_file)
+                    if env_values.get('OBSIDIAN_ENABLED', 'false').lower() == 'true':
+                        obsidian_enabled = True
+
+            if obsidian_enabled:
+                build_cmd.extend(['--profile', 'obsidian'])
+
+        elif service_name == 'speaker-recognition':
+            env_file = service_path / '.env'
+            if env_file.exists():
+                env_values = dotenv_values(env_file)
+                # Derive profile from PYTORCH_CUDA_VERSION (cu126/cu121/etc = gpu, cpu = cpu)
+                pytorch_version = env_values.get('PYTORCH_CUDA_VERSION', 'cpu')
+                profile = 'gpu' if pytorch_version.startswith('cu') else 'cpu'
+                build_cmd.extend(['--profile', profile])
+
+        build_cmd.append('build')
+
+        # Run build with streaming output (no timeout)
+        console.print(f"[cyan]🔨 Building {service_name} (this may take several minutes for CUDA/GPU builds)...[/cyan]")
+        try:
+            process = subprocess.Popen(
+                build_cmd,
+                cwd=service_path,
+                stdout=subprocess.PIPE,
+                stderr=subprocess.STDOUT,
+                text=True,
+                bufsize=1
+            )
+
+            if process.stdout is None:
+                raise RuntimeError("Process stdout is None - unable to read command output")
+
+            for line in process.stdout:
+                line = line.rstrip()
+                if not line:
+                    continue
+
+                if 'error' in line.lower() or 'failed' in line.lower():
+                    console.print(f"  [red]{line}[/red]")
+                elif 'Successfully' in line or 'built' in line.lower():
+                    console.print(f"  [green]{line}[/green]")
+                elif 'Building' in line or 'Step' in line:
+                    console.print(f"  [cyan]{line}[/cyan]")
+                elif 'warning' in line.lower():
+                    console.print(f"  [yellow]{line}[/yellow]")
+                else:
+                    console.print(f"  [dim]{line}[/dim]")
+
+            process.wait()
+
+            if process.returncode != 0:
+                console.print(f"\n[red]❌ Build failed for {service_name}[/red]")
+                return False
+
+            console.print(f"[green]✅ Build completed for {service_name}[/green]")
+
+        except Exception as e:
+            console.print(f"[red]❌ Error building {service_name}: {e}[/red]")
+            return False
+
+    # Step 2: Run the actual command (up/down/restart/status)
     cmd = ['docker', 'compose']
 
-    # For backend service, check if HTTPS is configured (Caddyfile exists)
+    # Add profiles for backend service
     if service_name == 'backend':
         caddyfile_path = service_path / 'Caddyfile'
         if caddyfile_path.exists() and caddyfile_path.is_file():
-            # Enable HTTPS profile to start Caddy service
             cmd.extend(['--profile', 'https'])
 
-        # Check if Obsidian/Neo4j is enabled
         obsidian_enabled = False
-
-        # Method 1: Check config.yml (preferred)
         config_data = load_config_yml()
         if config_data:
             memory_config = config_data.get('memory', {})
@@ -100,7 +180,6 @@ def run_compose_command(service_name, command, build=False):
             if obsidian_config.get('enabled', False):
                 obsidian_enabled = True
 
-        # Method 2: Fallback to .env for backward compatibility
         if not obsidian_enabled:
             env_file = service_path / '.env'
             if env_file.exists():
@@ -114,30 +193,24 @@ def run_compose_command(service_name, command, build=False):
 
     # Handle speaker-recognition service specially
     if service_name == 'speaker-recognition' and command in ['up', 'down']:
-        # Read configuration to determine profile
         env_file = service_path / '.env'
         if env_file.exists():
             env_values = dotenv_values(env_file)
-            compute_mode = env_values.get('COMPUTE_MODE', 'cpu')
+            # Derive profile from PYTORCH_CUDA_VERSION (cu126/cu121/etc = gpu, cpu = cpu)
+            pytorch_version = env_values.get('PYTORCH_CUDA_VERSION', 'cpu')
+            profile = 'gpu' if pytorch_version.startswith('cu') else 'cpu'
 
-            # Add profile flag for both up and down commands
-            if compute_mode == 'gpu':
-                cmd.extend(['--profile', 'gpu'])
-            else:
-                cmd.extend(['--profile', 'cpu'])
+            cmd.extend(['--profile', profile])
 
             if command == 'up':
                 https_enabled = env_values.get('REACT_UI_HTTPS', 'false')
                 if https_enabled.lower() == 'true':
-                    # HTTPS mode: start with profile for all services (includes nginx)
                     cmd.extend(['up', '-d'])
                 else:
-                    # HTTP mode: start specific services with profile (no nginx)
-                    cmd.extend(['up', '-d', 'speaker-service-gpu' if compute_mode == 'gpu' else 'speaker-service-cpu', 'web-ui'])
+                    cmd.extend(['up', '-d', 'speaker-service-gpu' if profile == 'gpu' else 'speaker-service-cpu', 'web-ui'])
             elif command == 'down':
                 cmd.extend(['down'])
         else:
-            # Fallback: no profile
             if command == 'up':
                 cmd.extend(['up', '-d'])
             elif command == 'down':
@@ -152,79 +225,28 @@ def run_compose_command(service_name, command, build=False):
             cmd.extend(['restart'])
         elif command == 'status':
             cmd.extend(['ps'])
-    
-    if command == 'up' and build:
-        cmd.append('--build')
-    
+
     try:
-        # For commands that need real-time output (build), stream to console
-        if build and command == 'up':
-            console.print(f"[dim]Building {service_name} containers...[/dim]")
-            process = subprocess.Popen(
-                cmd,
-                cwd=service_path,
-                stdout=subprocess.PIPE,
-                stderr=subprocess.STDOUT,
-                text=True,
-                bufsize=1
-            )
-            
-            # Simply stream all output with coloring
-            all_output = []
-            
-            if process.stdout is None:
-                raise RuntimeError("Process stdout is None - unable to read command output")
-            for line in process.stdout:
-                line = line.rstrip()
-                if not line:
-                    continue
-                
-                # Store for error context
-                all_output.append(line)
-                
-                # Print with appropriate coloring
-                if 'error' in line.lower() or 'failed' in line.lower():
-                    console.print(f"  [red]{line}[/red]")
-                elif 'Successfully' in line or 'Started' in line or 'Created' in line:
-                    console.print(f"  [green]{line}[/green]")
-                elif 'Building' in line or 'Creating' in line:
-                    console.print(f"  [cyan]{line}[/cyan]")
-                elif 'warning' in line.lower():
-                    console.print(f"  [yellow]{line}[/yellow]")
-                else:
-                    console.print(f"  [dim]{line}[/dim]")
-            
-            # Wait for process to complete
-            process.wait()
-            
-            # If build failed, show error summary
-            if process.returncode != 0:
-                console.print(f"\n[red]❌ Build failed for {service_name}[/red]")
-                return False
-            
+        # Run the command with timeout (build already done if needed)
+        result = subprocess.run(
+            cmd,
+            cwd=service_path,
+            capture_output=True,
+            text=True,
+            check=False,
+            timeout=120  # 2 minute timeout
+        )
+
+        if result.returncode == 0:
             return True
         else:
-            # For non-build commands, run silently unless there's an error
-            result = subprocess.run(
-                cmd,
-                cwd=service_path,
-                capture_output=True,
-                text=True,
-                check=False,
-                timeout=120  # 2 minute timeout for service status checks
-            )
-            
-            if result.returncode == 0:
-                return True
-            else:
-                console.print(f"[red]❌ Command failed[/red]")
-                if result.stderr:
-                    console.print("[red]Error output:[/red]")
-                    # Show all error output
-                    for line in result.stderr.splitlines():
-                        console.print(f"  [dim]{line}[/dim]")
-                return False
-            
+            console.print(f"[red]❌ Command failed[/red]")
+            if result.stderr:
+                console.print("[red]Error output:[/red]")
+                for line in result.stderr.splitlines():
+                    console.print(f"  [dim]{line}[/dim]")
+            return False
+
     except subprocess.TimeoutExpired:
         console.print(f"[red]❌ Command timed out after 2 minutes for {service_name}[/red]")
         return False
@@ -232,10 +254,44 @@ def run_compose_command(service_name, command, build=False):
         console.print(f"[red]❌ Error running command: {e}[/red]")
         return False
 
+def ensure_docker_network():
+    """Ensure chronicle-network exists"""
+    try:
+        # Check if network already exists
+        result = subprocess.run(
+            ['docker', 'network', 'inspect', 'chronicle-network'],
+            capture_output=True,
+            check=False
+        )
+
+        if result.returncode != 0:
+            # Network doesn't exist, create it
+            console.print("[blue]📡 Creating chronicle-network...[/blue]")
+            subprocess.run(
+                ['docker', 'network', 'create', 'chronicle-network'],
+                check=True,
+                capture_output=True
+            )
+            console.print("[green]✅ chronicle-network created[/green]")
+        else:
+            console.print("[dim]📡 chronicle-network already exists[/dim]")
+        return True
+    except subprocess.CalledProcessError as e:
+        console.print(f"[red]❌ Failed to create network: {e}[/red]")
+        return False
+    except Exception as e:
+        console.print(f"[red]❌ Error checking/creating network: {e}[/red]")
+        return False
+
 def start_services(services, build=False):
     """Start specified services"""
     console.print(f"🚀 [bold]Starting {len(services)} services...[/bold]")
-    
+
+    # Ensure Docker network exists before starting services
+    if not ensure_docker_network():
+        console.print("[red]❌ Cannot start services without Docker network[/red]")
+        return
+
     success_count = 0
     for service_name in services:
         if service_name not in SERVICES:
diff --git a/setup-requirements.txt b/setup-requirements.txt
index e6f5f1bc..a66db454 100644
--- a/setup-requirements.txt
+++ b/setup-requirements.txt
@@ -2,4 +2,5 @@
 rich>=13.0.0
 python-dotenv>=1.0.0
 requests>=2.31.0
-pyyaml>=6.0.0
\ No newline at end of file
+pyyaml>=6.0.0
+omegaconf>=2.3.0
\ No newline at end of file
diff --git a/setup_utils.py b/setup_utils.py
new file mode 100644
index 00000000..200bd1e7
--- /dev/null
+++ b/setup_utils.py
@@ -0,0 +1,326 @@
+"""
+Shared utilities for Chronicle setup scripts.
+
+Provides common functions for interactive configuration, password masking,
+and environment file handling. Used by wizard.py, init.py scripts, and plugin setup.
+"""
+
+import getpass
+import secrets
+from pathlib import Path
+from typing import List, Optional
+
+from dotenv import get_key
+
+
+def read_env_value(env_file_path: str, key: str) -> Optional[str]:
+    """
+    Read a value from an .env file using python-dotenv.
+
+    Args:
+        env_file_path: Path to .env file
+        key: Environment variable name
+
+    Returns:
+        Value if found, None otherwise
+
+    Example:
+        >>> value = read_env_value('.env', 'SMTP_HOST')
+        >>> print(value)  # 'smtp.gmail.com' or None
+    """
+    env_path = Path(env_file_path)
+    if not env_path.exists():
+        return None
+
+    value = get_key(str(env_path), key)
+    # get_key returns None if key doesn't exist or value is empty
+    return value if value else None
+
+
+def is_placeholder(value: str, *placeholder_variants: str) -> bool:
+    """
+    Check if a value is a placeholder.
+
+    Normalizes both the value and placeholders (treats hyphens/underscores as equivalent).
+
+    Args:
+        value: The value to check
+        placeholder_variants: One or more placeholder strings to check against
+
+    Returns:
+        True if value matches any placeholder variant
+
+    Example:
+        >>> is_placeholder('your-key-here', 'your_key_here')
+        True
+        >>> is_placeholder('sk-abc123', 'your_key_here')
+        False
+    """
+    if not value:
+        return True
+
+    # Normalize by replacing hyphens with underscores
+    normalized_value = value.replace('-', '_').lower()
+
+    for placeholder in placeholder_variants:
+        normalized_placeholder = placeholder.replace('-', '_').lower()
+        if normalized_value == normalized_placeholder:
+            return True
+
+    return False
+
+
+def mask_value(value: str, show_chars: int = 5) -> str:
+    """
+    Mask a sensitive value, showing only first and last few characters.
+
+    Args:
+        value: The value to mask
+        show_chars: Number of characters to show at start/end (default: 5)
+
+    Returns:
+        Masked string in format: "first5***********last5"
+
+    Examples:
+        >>> mask_value('sk-proj-abc123def456ghi789')
+        'sk-pr***************i789'
+        >>> mask_value('short')
+        'short'
+        >>> mask_value('smtp_password_12345')
+        'smtp_***********2345'
+    """
+    # Strip whitespace before processing
+    value_clean = value.strip() if value else value
+
+    if not value_clean or len(value_clean) <= show_chars * 2:
+        return value
+
+    return f"{value_clean[:show_chars]}{'*' * min(15, len(value_clean) - show_chars * 2)}{value_clean[-show_chars:]}"
+
+
+def prompt_value(prompt_text: str, default: str = "") -> str:
+    """
+    Prompt user for a value with optional default.
+
+    Args:
+        prompt_text: The prompt to display
+        default: Default value if user presses Enter
+
+    Returns:
+        User input or default value
+
+    Example:
+        >>> email = prompt_value("Admin email", "admin@example.com")
+    """
+    try:
+        if default:
+            value = input(f"{prompt_text} [{default}]: ").strip()
+            return value if value else default
+        else:
+            return input(f"{prompt_text}: ").strip()
+    except (EOFError, KeyboardInterrupt):
+        return default
+
+
+def prompt_password(
+    prompt_text: str,
+    min_length: int = 8,
+    allow_generated: bool = False
+) -> str:
+    """
+    Prompt user for a password (hidden input).
+
+    Args:
+        prompt_text: The prompt to display
+        min_length: Minimum password length (default: 8)
+        allow_generated: If True, generate secure password in non-interactive mode
+
+    Returns:
+        Password entered by user or generated password
+
+    Example:
+        >>> password = prompt_password("Admin password")
+        >>> api_key = prompt_password("API Key", min_length=0)  # No length requirement
+    """
+    while True:
+        try:
+            password = getpass.getpass(f"{prompt_text}: ")
+            if len(password) >= min_length:
+                return password
+            if min_length > 0:
+                print(f"[WARNING] Password must be at least {min_length} characters")
+        except (EOFError, KeyboardInterrupt):
+            if allow_generated:
+                # Non-interactive environment - generate secure password
+                print("[WARNING] Non-interactive environment detected")
+                password = f"generated-{secrets.token_hex(8)}"
+                print(f"Generated secure password: {password}")
+                return password
+            else:
+                # Return empty string if generation not allowed
+                return ""
+
+
+def prompt_with_existing_masked(
+    prompt_text: str,
+    existing_value: Optional[str] = None,
+    placeholders: Optional[List[str]] = None,
+    is_password: bool = False,
+    default: str = "",
+    env_file_path: Optional[str] = None,
+    env_key: Optional[str] = None
+) -> str:
+    """
+    Prompt for a value, showing masked existing value if present.
+
+    This is the primary function for plugins to use when prompting for secrets.
+    It automatically:
+    - Reads existing value from .env if env_file_path and env_key provided
+    - Masks sensitive values when displaying
+    - Allows user to press Enter to keep existing value
+    - Falls back to default if no existing value
+
+    Args:
+        prompt_text: The prompt to display
+        existing_value: Existing value (or None to auto-read from .env)
+        placeholders: List of placeholder values to treat as "not set"
+        is_password: Whether to use password input and masking
+        default: Default value if no existing value
+        env_file_path: Path to .env file (for auto-reading existing value)
+        env_key: Environment variable name (for auto-reading existing value)
+
+    Returns:
+        User input, existing value, or default
+
+    Examples:
+        >>> # Basic usage with explicit existing value
+        >>> api_key = prompt_with_existing_masked(
+        ...     "OpenAI API Key",
+        ...     existing_value="sk-abc123",
+        ...     is_password=True
+        ... )
+
+        >>> # Auto-read from .env
+        >>> smtp_password = prompt_with_existing_masked(
+        ...     "SMTP Password",
+        ...     env_file_path=".env",
+        ...     env_key="SMTP_PASSWORD",
+        ...     placeholders=['your-password-here'],
+        ...     is_password=True
+        ... )
+
+        >>> # Plugin setup example
+        >>> ha_token = prompt_with_existing_masked(
+        ...     "Home Assistant Token",
+        ...     env_file_path="../../.env",
+        ...     env_key="HA_TOKEN",
+        ...     placeholders=['your-token-here'],
+        ...     is_password=True
+        ... )
+    """
+    placeholders = placeholders or []
+
+    # Auto-read existing value from .env if parameters provided
+    if existing_value is None and env_file_path and env_key:
+        existing_value = read_env_value(env_file_path, env_key)
+
+    # Check if we have a valid existing value (not a placeholder)
+    has_valid_existing = existing_value and not is_placeholder(existing_value, *placeholders)
+
+    if has_valid_existing:
+        # Show masked value with option to reuse
+        if is_password:
+            masked = mask_value(existing_value)
+            display_prompt = f"{prompt_text} ({masked}) [press Enter to reuse, or enter new]"
+        else:
+            display_prompt = f"{prompt_text} ({existing_value}) [press Enter to reuse, or enter new]"
+
+        if is_password:
+            user_input = prompt_password(display_prompt, min_length=0)
+        else:
+            user_input = prompt_value(display_prompt, "")
+
+        # If user pressed Enter, keep existing value
+        return user_input if user_input else existing_value
+    else:
+        # No existing value, prompt normally
+        if is_password:
+            return prompt_password(prompt_text, min_length=0)
+        else:
+            return prompt_value(prompt_text, default)
+
+
+# Convenience functions for common patterns
+
+def prompt_api_key(
+    service_name: str,
+    env_file_path: str = ".env",
+    env_key: Optional[str] = None,
+    placeholders: Optional[List[str]] = None
+) -> str:
+    """
+    Convenience function for prompting API keys.
+
+    Args:
+        service_name: Human-readable service name (e.g., "OpenAI", "Deepgram")
+        env_file_path: Path to .env file
+        env_key: Environment variable name (defaults to {SERVICE}_API_KEY)
+        placeholders: Custom placeholders (defaults to common API key placeholders)
+
+    Returns:
+        API key value
+
+    Example:
+        >>> api_key = prompt_api_key("OpenAI", env_file_path="../../.env")
+    """
+    env_key = env_key or f"{service_name.upper().replace(' ', '_')}_API_KEY"
+    placeholders = placeholders or [
+        'your-api-key-here',
+        'your_api_key_here',
+        f'your-{service_name.lower()}-key-here'
+    ]
+
+    return prompt_with_existing_masked(
+        prompt_text=f"{service_name} API Key",
+        env_file_path=env_file_path,
+        env_key=env_key,
+        placeholders=placeholders,
+        is_password=True
+    )
+
+
+def prompt_token(
+    service_name: str,
+    env_file_path: str = ".env",
+    env_key: Optional[str] = None,
+    placeholders: Optional[List[str]] = None
+) -> str:
+    """
+    Convenience function for prompting authentication tokens.
+
+    Args:
+        service_name: Human-readable service name (e.g., "Home Assistant", "GitHub")
+        env_file_path: Path to .env file
+        env_key: Environment variable name (defaults to {SERVICE}_TOKEN)
+        placeholders: Custom placeholders (defaults to common token placeholders)
+
+    Returns:
+        Token value
+
+    Example:
+        >>> ha_token = prompt_token("Home Assistant", env_file_path="../../.env")
+    """
+    env_key = env_key or f"{service_name.upper().replace(' ', '_')}_TOKEN"
+    placeholders = placeholders or [
+        'your-token-here',
+        'your_token_here',
+        f'your-{service_name.lower()}-token-here'
+    ]
+
+    return prompt_with_existing_masked(
+        prompt_text=f"{service_name} Token",
+        env_file_path=env_file_path,
+        env_key=env_key,
+        placeholders=placeholders,
+        is_password=True
+    )
diff --git a/start.sh b/start.sh
index 44ba6f2c..b01ef87a 100755
--- a/start.sh
+++ b/start.sh
@@ -1 +1 @@
-uv run --with-requirements setup-requirements.txt python services.py start --all --build
+uv run --with-requirements setup-requirements.txt python services.py start --all "$@"
diff --git a/status.py b/status.py
index 3b3e61c9..27318a60 100644
--- a/status.py
+++ b/status.py
@@ -43,41 +43,9 @@ def get_container_status(service_name: str) -> Dict[str, Any]:
 
     try:
         # Get container status using docker compose ps
+        # Only check containers from active profiles (excludes inactive profile services)
         cmd = ['docker', 'compose', 'ps', '--format', 'json']
 
-        # Handle special profiles for backend (HTTPS and Obsidian)
-        if service_name == 'backend':
-            profiles = []
-            
-            # Check for HTTPS profile
-            caddyfile_path = service_path / 'Caddyfile'
-            if caddyfile_path.exists():
-                profiles.append('https')
-            
-            # Check for Obsidian/Neo4j profile
-            env_file = service_path / '.env'
-            if env_file.exists():
-                env_values = dotenv_values(env_file)
-                neo4j_host = env_values.get('NEO4J_HOST', '')
-                if neo4j_host and neo4j_host not in ['', 'your-neo4j-host-here', 'your_neo4j_host_here']:
-                    profiles.append('obsidian')
-            
-            # Apply profiles if any are needed
-            if profiles:
-                cmd = ['docker', 'compose'] + [item for profile in profiles for item in ['--profile', profile]] + ['ps', '--format', 'json']
-
-        # Handle speaker-recognition profiles
-        if service_name == 'speaker-recognition':
-            from dotenv import dotenv_values
-            env_file = service_path / '.env'
-            if env_file.exists():
-                env_values = dotenv_values(env_file)
-                compute_mode = env_values.get('COMPUTE_MODE', 'cpu')
-                if compute_mode == 'gpu':
-                    cmd = ['docker', 'compose', '--profile', 'gpu', 'ps', '--format', 'json']
-                else:
-                    cmd = ['docker', 'compose', '--profile', 'cpu', 'ps', '--format', 'json']
-
         result = subprocess.run(
             cmd,
             cwd=service_path,
@@ -95,8 +63,14 @@ def get_container_status(service_name: str) -> Dict[str, Any]:
             if line:
                 try:
                     container = json.loads(line)
+                    container_name = container.get('Name', 'unknown')
+
+                    # Skip test containers - they're not part of production services
+                    if '-test-' in container_name.lower():
+                        continue
+
                     containers.append({
-                        'name': container.get('Name', 'unknown'),
+                        'name': container_name,
                         'state': container.get('State', 'unknown'),
                         'status': container.get('Status', 'unknown'),
                         'health': container.get('Health', 'none')
@@ -202,7 +176,12 @@ def show_quick_status():
             container_icon = "🟡"
         elif status['container_status'] == 'stopped':
             container_icon = "🔴"
+        elif status['container_status'] == 'not_found':
+            container_icon = "⚪"
+        elif status['container_status'] in ['error', 'timeout']:
+            container_icon = "⚫"
         else:
+            # Unknown status - log it for debugging
             container_icon = "⚫"
 
         # Health status
diff --git a/tests/.env.test b/tests/.env.test
index 974dcee2..b26c757c 100644
--- a/tests/.env.test
+++ b/tests/.env.test
@@ -15,10 +15,6 @@ TEST_DEVICE_NAME=robot-test
 
 MEMORY_PROVIDER=chronicle
 
-# Docker container names (test environment)
-BACKEND_CONTAINER=advanced-chronicle-backend-test-1
-WORKERS_CONTAINER=advanced-workers-test-1
-MONGO_CONTAINER=advanced-mongo-test-1
-REDIS_CONTAINER=advanced-redis-test-1
-QDRANT_CONTAINER=advanced-qdrant-test-1
-WEBUI_CONTAINER=advanced-webui-test-1
\ No newline at end of file
+# MongoDB Configuration (test environment)
+MONGODB_URI=mongodb://localhost:27018
+TEST_DB_NAME=test_db
\ No newline at end of file
diff --git a/tests/.gitignore b/tests/.gitignore
new file mode 100644
index 00000000..2a915d33
--- /dev/null
+++ b/tests/.gitignore
@@ -0,0 +1,10 @@
+# Test output files
+output.xml
+log.html
+report.html
+results/
+results-no-api/
+
+# Saved container logs (automatically generated)
+logs/*
+!logs/.gitkeep
diff --git a/tests/Dockerfile.mock-llm b/tests/Dockerfile.mock-llm
new file mode 100644
index 00000000..ef9efef5
--- /dev/null
+++ b/tests/Dockerfile.mock-llm
@@ -0,0 +1,15 @@
+FROM python:3.12-slim
+
+WORKDIR /app
+
+# Install dependencies
+RUN pip install --no-cache-dir aiohttp numpy
+
+# Copy mock server script
+COPY tests/libs/mock_llm_server.py .
+
+# Expose HTTP port
+EXPOSE 11435
+
+# Run server
+CMD ["python", "mock_llm_server.py", "--host", "0.0.0.0", "--port", "11435"]
diff --git a/tests/Dockerfile.mock-streaming-stt b/tests/Dockerfile.mock-streaming-stt
new file mode 100644
index 00000000..964946f6
--- /dev/null
+++ b/tests/Dockerfile.mock-streaming-stt
@@ -0,0 +1,15 @@
+FROM python:3.12-slim
+
+WORKDIR /app
+
+# Install websockets dependency
+RUN pip install --no-cache-dir websockets
+
+# Copy mock server script
+COPY tests/libs/mock_streaming_stt_server.py .
+
+# Expose WebSocket port
+EXPOSE 9999
+
+# Run server
+CMD ["python", "mock_streaming_stt_server.py", "--host", "0.0.0.0", "--port", "9999"]
diff --git a/tests/Makefile b/tests/Makefile
index 8ba002f5..1927338b 100644
--- a/tests/Makefile
+++ b/tests/Makefile
@@ -1,39 +1,109 @@
 # Chronicle Test Makefile
-# Shortcuts for running tests
+# Shortcuts for running tests and managing test containers
 
-.PHONY: help all clean
+.PHONY: help all clean \
+        containers-start containers-stop containers-restart containers-rebuild \
+        containers-start-rebuild containers-clean containers-status containers-logs \
+        start stop restart rebuild start-rebuild status logs \
+        test test-quick test-slow test-sdk test-no-api test-with-api-keys test-all-with-slow-and-sdk clean-all \
+        results results-path results-detailed
 
 # Default output directory
 OUTPUTDIR ?= results
 TEST_DIR = endpoints integration infrastructure
+SERVICE ?= chronicle-backend-test
+
+# Test configuration file (set this to use different configs)
+# Can be overridden with CONFIG variable for convenience
+# Examples:
+#   make test CONFIG=deepgram-openai.yml
+#   make test-quick CONFIG=mock-services.yml
+#   make start CONFIG=/app/test-configs/custom.yml
+ifdef CONFIG
+  # If CONFIG is just a filename, prepend the path
+  ifeq ($(findstring /,$(CONFIG)),)
+    export TEST_CONFIG_FILE = /app/test-configs/$(CONFIG)
+  else
+    export TEST_CONFIG_FILE = $(CONFIG)
+  endif
+else
+  export TEST_CONFIG_FILE ?= /app/test-configs/deepgram-openai.yml
+endif
 
 help:
 	@echo "Chronicle Test Targets:"
 	@echo ""
+	@echo "Quick Commands:"
+	@echo "  make test          - Start containers + run tests (uses real APIs)"
+	@echo "  make test-no-api   - Run tests without API keys (CI mode)"
+	@echo "  make test-quick    - Run tests on existing containers"
+	@echo "  make start         - Start test containers"
+	@echo "  make stop          - Stop containers (keep volumes)"
+	@echo "  make restart       - Restart containers (for .env changes)"
+	@echo "  make rebuild       - Rebuild images only (doesn't start)"
+	@echo "  make start-rebuild - Stop + rebuild + start (for code changes)"
+	@echo "  make status        - Show container status"
+	@echo ""
 	@echo "Running Tests:"
-	@echo "  make all         - Run all tests"
+	@echo "  make all         - Run all tests (excludes slow/sdk)"
 	@echo "  make endpoints   - Run only endpoint tests"
 	@echo "  make integration - Run only integration tests"
 	@echo "  make infra       - Run only infrastructure tests"
 	@echo ""
+	@echo "Special Test Tags:"
+	@echo "  make test-slow                    - Run ONLY slow tests (backend restarts)"
+	@echo "  make test-sdk                     - Run ONLY SDK tests (unreleased)"
+	@echo "  make test-no-api                  - Run tests without API keys (CI mode)"
+	@echo "  make test-all-with-slow-and-sdk   - Run ALL tests including excluded"
+	@echo ""
+	@echo "Container Management:"
+	@echo "  make containers-start         - Start test containers"
+	@echo "  make containers-stop          - Stop containers (keep volumes)"
+	@echo "  make containers-restart       - Restart containers (config changes)"
+	@echo "  make containers-rebuild       - Rebuild images only"
+	@echo "  make containers-start-rebuild - Stop + rebuild + start (code changes)"
+	@echo "  make containers-clean         - Save logs + remove everything"
+	@echo "  make containers-status        - Show container health"
+	@echo "  make containers-logs          - View service logs (use SERVICE=name)"
+	@echo ""
+	@echo "View Results:"
+	@echo "  make results          - Quick terminal summary (SSH-friendly)"
+	@echo "  make results-path     - Print path to HTML report for viewing"
+	@echo "  make results-detailed - Detailed terminal output with full errors"
+	@echo ""
 	@echo "Utilities:"
 	@echo "  make clean       - Remove test output files"
+	@echo "  make clean-all   - Clean results + containers (saves logs)"
 	@echo ""
 	@echo "Environment Variables:"
-	@echo "  OUTPUTDIR        - Output directory (default: results)"
+	@echo "  OUTPUTDIR - Output directory (default: results)"
+	@echo "  SERVICE   - Service name for logs (default: chronicle-backend-test)"
+	@echo "  CONFIG    - Config file to use (e.g., deepgram-openai.yml or full path)"
+	@echo ""
+	@echo "Config Options:"
+	@echo "  deepgram-openai.yml            - Real API keys (default)"
+	@echo "  mock-services.yml              - No API keys (for CI)"
+	@echo "  mock-transcription-failure.yml - Test transcription failure scenarios"
 	@echo ""
 	@echo "Examples:"
-	@echo "  make all                     # Full test suite"
-	@echo "  make endpoints               # Only endpoint tests"
-	@echo "  make all OUTPUTDIR=/tmp/out  # Custom output dir"
+	@echo "  make test                          # Default (uses real APIs)"
+	@echo "  make test-no-api                   # CI mode (no API keys)"
+	@echo "  make test CONFIG=mock-services.yml # Custom config"
+	@echo "  make endpoints CONFIG=mock-services.yml  # Endpoint tests with mock"
+	@echo "  make start-rebuild CONFIG=custom.yml     # Rebuild with custom config"
+	@echo "  make containers-logs SERVICE=workers-test  # View worker logs"
+	@echo "  make show-config                   # Show current config"
 
-# Run all tests
+# Run all tests (excludes slow and sdk tests for faster feedback)
 # Creates a persistent fixture conversation that won't be deleted between suites
 all:
-	@echo "Running all tests..."
+	@echo "Running all tests (excluding slow and sdk tests)..."
 	CREATE_FIXTURE=true uv run --with-requirements test-requirements.txt robot --outputdir $(OUTPUTDIR) \
 		--name "All Tests" \
 		--console verbose \
+		--loglevel INFO:INFO \
+		--exclude slow \
+		--exclude sdk \
 		$(TEST_DIR)
 
 # Run only endpoint tests
@@ -42,6 +112,7 @@ endpoints:
 	uv run --with-requirements test-requirements.txt robot --outputdir $(OUTPUTDIR) \
 		--name "Endpoint Tests" \
 		--console verbose \
+		--loglevel INFO:INFO \
 		endpoints
 
 # Run only integration tests
@@ -50,6 +121,7 @@ integration:
 	CREATE_FIXTURE=true uv run --with-requirements test-requirements.txt robot --outputdir $(OUTPUTDIR) \
 		--name "Integration Tests" \
 		--console verbose \
+		--loglevel INFO:INFO \
 		integration
 
 # Run only infrastructure tests
@@ -58,11 +130,179 @@ infra:
 	uv run --with-requirements test-requirements.txt robot --outputdir $(OUTPUTDIR) \
 		--name "Infrastructure Tests" \
 		--console verbose \
+		--loglevel INFO:INFO \
 		infrastructure
 
+# Show current test configuration
+show-config:
+	@echo "Current Test Configuration:"
+	@echo "  TEST_CONFIG_FILE = $(TEST_CONFIG_FILE)"
+	@echo ""
+	@echo "To change config:"
+	@echo "  make test CONFIG=deepgram-openai.yml"
+	@echo "  make test CONFIG=/path/to/custom.yml"
+
 # Clean up test output files
 clean:
 	@echo "Cleaning test outputs..."
 	rm -f output.xml log.html report.html
 	rm -rf $(OUTPUTDIR)
 	@echo "Clean complete!"
+
+# ============================================================================
+# Container Management Targets
+# ============================================================================
+
+# Start test containers
+containers-start:
+	@./bin/start-containers.sh
+
+# Stop test containers (preserve volumes)
+containers-stop:
+	@./bin/stop-containers.sh
+
+# Restart test containers
+containers-restart:
+	@./bin/restart-containers.sh
+
+# Rebuild test containers (images only)
+containers-rebuild:
+	@./bin/rebuild-containers.sh
+
+# Stop, rebuild, and start test containers
+containers-start-rebuild:
+	@./bin/start-rebuild-containers.sh
+
+# Clean test containers (ALWAYS saves logs first!)
+containers-clean:
+	@./bin/clean-containers.sh
+
+# Show container status
+containers-status:
+	@./bin/status-containers.sh
+
+# View container logs
+containers-logs:
+	@./bin/logs-containers.sh $(SERVICE)
+
+# ============================================================================
+# Convenient Aliases
+# ============================================================================
+
+start: containers-start
+stop: containers-stop
+restart: containers-restart
+rebuild: containers-rebuild
+start-rebuild: containers-start-rebuild
+status: containers-status
+logs: containers-logs
+
+# ============================================================================
+# Combined Workflows
+# ============================================================================
+
+# Full workflow: start containers + run all tests
+# If CONFIG is specified and differs from running containers, recreates them
+test:
+	@if docker compose -f ../backends/advanced/docker-compose-test.yml ps chronicle-backend-test 2>/dev/null | grep -q "Up"; then \
+		echo "ℹ️  Containers already running"; \
+		if [ "$(CONFIG)" != "" ]; then \
+			echo "🔄 CONFIG specified - will recreate containers to apply new config"; \
+			$(MAKE) containers-stop; \
+			$(MAKE) containers-start; \
+		else \
+			echo "✅ Using existing containers (use CONFIG=... to switch config)"; \
+		fi \
+	else \
+		$(MAKE) containers-start; \
+	fi
+	@$(MAKE) all
+
+# Quick workflow: run tests on existing containers (ignores CONFIG changes)
+test-quick: all
+
+# Run ONLY slow tests (backend restarts, long timeouts)
+test-slow:
+	@echo "Running slow tests only..."
+	uv run --with-requirements test-requirements.txt robot --outputdir $(OUTPUTDIR) \
+		--name "Slow Tests" \
+		--console verbose \
+		--loglevel INFO:INFO \
+		--include slow \
+		$(TEST_DIR)
+
+# Run ONLY SDK tests (unreleased SDK features)
+test-sdk:
+	@echo "Running SDK tests only..."
+	CREATE_FIXTURE=true uv run --with-requirements test-requirements.txt robot --outputdir $(OUTPUTDIR) \
+		--name "SDK Tests" \
+		--console verbose \
+		--loglevel INFO:INFO \
+		--include sdk \
+		$(TEST_DIR)
+
+# Run ONLY tests that require API keys (Deepgram + OpenAI)
+# Uses default deepgram-openai.yml config
+test-with-api-keys:
+	@echo "🧪 Running tests that require API keys..."
+	@if [ -z "$$DEEPGRAM_API_KEY" ] || [ -z "$$OPENAI_API_KEY" ]; then \
+		echo "❌ Error: DEEPGRAM_API_KEY and OPENAI_API_KEY must be set"; \
+		echo "   export DEEPGRAM_API_KEY='your-key-here'"; \
+		echo "   export OPENAI_API_KEY='your-key-here'"; \
+		exit 1; \
+	fi
+	CREATE_FIXTURE=true uv run --with-requirements test-requirements.txt robot --outputdir $(OUTPUTDIR) \
+		--name "API Key Tests" \
+		--console verbose \
+		--loglevel INFO:INFO \
+		--include requires-api-keys \
+		$(TEST_DIR)
+
+# Run tests without API keys (CI mode)
+# Switches to mock-services.yml config and excludes requires-api-keys tests
+test-no-api:
+	@echo "🔄 Running tests without API keys (CI mode)..."
+	@$(MAKE) containers-stop
+	@TEST_CONFIG_FILE=/app/test-configs/mock-services.yml $(MAKE) containers-start
+	@echo "✅ Containers running with mock-services.yml"
+	@echo "🧪 Running tests (excluding requires-api-keys)..."
+	CREATE_FIXTURE=true uv run --with-requirements test-requirements.txt robot --outputdir $(OUTPUTDIR) \
+		--name "No API Tests" \
+		--console verbose \
+		--loglevel INFO:INFO \
+		--exclude slow \
+		--exclude sdk \
+		--exclude requires-api-keys \
+		$(TEST_DIR)
+
+# Run ALL tests including slow and SDK tests
+test-all-with-slow-and-sdk:
+	@echo "Running ALL tests including slow and SDK tests..."
+	CREATE_FIXTURE=true uv run --with-requirements test-requirements.txt robot --outputdir $(OUTPUTDIR) \
+		--name "All Tests (Including Slow and SDK)" \
+		--console verbose \
+		--loglevel INFO:INFO \
+		$(TEST_DIR)
+
+# ============================================================================
+# View Test Results
+# ============================================================================
+
+# View test results in terminal (SSH-friendly)
+results:
+	@uv run --with-requirements test-requirements.txt python show_results.py
+
+# Print path to HTML report for manual viewing
+results-path:
+	@uv run --with-requirements test-requirements.txt python show_results.py --path
+
+# Detailed terminal output with full error messages
+results-detailed:
+	@uv run --with-requirements test-requirements.txt python show_results.py --detailed
+
+# ============================================================================
+# Cleanup
+# ============================================================================
+
+# Complete cleanup: test results + containers (saves logs)
+clean-all: clean containers-clean
diff --git a/tests/README.md b/tests/README.md
index a16a0281..89b8882b 100644
--- a/tests/README.md
+++ b/tests/README.md
@@ -1,288 +1,272 @@
-# Chronicle API Tests
-
-Comprehensive Robot Framework test suite for the Chronicle advanced backend API endpoints.
+# Chronicle Integration Tests
 
 ## Quick Start
 
-### Running Tests Locally
-
+Start containers and run tests:
 ```bash
-# From the tests/ directory
-./run-robot-tests.sh
+cd tests
+make test           # Start containers + run all tests (excludes slow/sdk)
 ```
 
-The script will:
-1. Check for required API keys (DEEPGRAM_API_KEY, OPENAI_API_KEY)
-2. Start test infrastructure (MongoDB, Redis, Qdrant)
-3. Build and start the backend and workers
-4. Run all Robot Framework tests via Makefile
-5. Display results and cleanup
+Or step by step:
+```bash
+make start          # Start test containers
+make test-all       # Run all tests (excludes slow/sdk)
+make stop           # Stop containers
+```
 
-**This mirrors the GitHub CI workflow for local development.**
+**Note**: Default test runs exclude `slow` tests (backend restarts, long timeouts) and `sdk` tests (unreleased SDK features) for faster feedback. Run these explicitly with `make test-slow` or `make test-sdk` when needed.
 
-### Environment Setup
+## Test Suites
 
-The script will create `tests/setup/.env.test` automatically if it doesn't exist. You can also create it manually:
+Run specific test suites:
 
 ```bash
-# API URLs
-API_URL=http://localhost:8001
-BACKEND_URL=http://localhost:8001
-FRONTEND_URL=http://localhost:3001
+make test-endpoints     # API endpoint tests (~40 tests, fast)
+make test-integration   # End-to-end workflows (~15 tests, slower)
+make test-infra         # Infrastructure resilience tests (~5 tests)
+```
 
-# Test Admin Credentials
-ADMIN_EMAIL=test-admin@example.com
-ADMIN_PASSWORD=test-admin-password-123
+### Special Test Categories
 
-# API Keys (required)
-OPENAI_API_KEY=your-key-here
-DEEPGRAM_API_KEY=your-key-here
+**Slow Tests** (excluded by default for faster feedback):
+```bash
+make test-slow    # Run ONLY slow tests (backend restarts, long timeouts)
+```
+- Backend restart tests (service stop/start cycles)
+- Connection resilience tests
+- Tests requiring >30s timeouts
+- Excluded from default `make test` runs
 
-# Test Configuration
-TEST_TIMEOUT=120
-TEST_DEVICE_NAME=robot-test
+**SDK Tests** (excluded until SDK is released):
+```bash
+make test-sdk     # Run ONLY SDK tests (unreleased features)
 ```
+- SDK client library tests
+- SDK authentication tests
+- SDK upload/retrieval tests
+- Excluded from default `make test` runs until SDK is published
+
+**All Tests Including Excluded**:
+```bash
+make test-all-with-slow-and-sdk    # Run everything including slow and SDK tests
+```
+
+## Container Management
+
+All container operations are available through simple Makefile targets:
 
-### Configuration Options
+| Command | What it does |
+|---------|--------------|
+| `make start` | Start test containers (or reuse if healthy) |
+| `make stop` | Stop containers (saves logs automatically) |
+| `make restart` | Restart containers (keep same images) |
+| `make rebuild` | Rebuild images and restart (for code changes) |
+| `make containers-clean` | **Saves logs** → stops → removes everything |
+| `make status` | Show container health and ports |
+| `make logs SERVICE=<name>` | View logs for specific service |
+
+**Important:** Containers are NEVER removed without saving logs first!
+
+Logs are automatically saved to: `tests/logs/YYYY-MM-DD_HH-MM-SS/`
+
+### Available Services for Logs
 
 ```bash
-# Skip container cleanup (useful for debugging)
-CLEANUP_CONTAINERS=false ./run-robot-tests.sh
+make logs SERVICE=chronicle-backend-test   # Main backend service
+make logs SERVICE=workers-test              # RQ workers
+make logs SERVICE=mongo-test                # MongoDB
+make logs SERVICE=redis-test                # Redis
+make logs SERVICE=qdrant-test               # Vector database
+make logs SERVICE=speaker-service-test      # Speaker recognition
+```
 
-# Custom output directory
-OUTPUTDIR=my-results ./run-robot-tests.sh
+## Test Workflows
 
-# Leave containers running for debugging
-CLEANUP_CONTAINERS=false ./run-robot-tests.sh
+### Full Test Run (Clean Slate)
+```bash
+make containers-clean   # Clean previous state (saves logs)
+make test               # Start fresh + run all tests
 ```
 
-## Running Tests via Makefile
+### Quick Iteration (Reuse Containers)
+```bash
+make start              # Start containers once
+make test-quick         # Run tests (fast, no container startup)
+make test-quick         # Run again (even faster)
+```
 
-If you already have the backend running, you can use the Makefile directly:
+### Code Changes (Rebuild Required)
+```bash
+# After modifying Python code
+make rebuild            # Rebuild images with latest code
+make test-quick         # Run tests on new build
+```
 
-## Test Structure
+## Test Environment
 
-### Test Files
-- **`auth_tests.robot`** - Authentication and user management tests
-- **`memory_tests.robot`** - Memory management and search tests
-- **`conversation_tests.robot`** - Conversation management and versioning tests
-- **`health_tests.robot`** - Health check and status endpoint tests
-- **`chat_tests.robot`** - Chat service and session management tests
-- **`client_queue_tests.robot`** - Client management and queue monitoring tests
-- **`system_admin_tests.robot`** - System administration and configuration tests
-- **`all_api_tests.robot`** - Master test suite runner
+Test services run on separate ports from production to avoid conflicts:
 
-### Resource Files
-- **`resources/auth_keywords.robot`** - Authentication helper keywords
-- **`resources/memory_keywords.robot`** - Memory management keywords
-- **`resources/conversation_keywords.robot`** - Conversation management keywords
-- **`resources/chat_keywords.robot`** - Chat service keywords
-- **`resources/setup_resources.robot`** - Basic setup and health check keywords
-- **`resources/login_resources.robot`** - Login-specific utilities
+| Service | Test Port | Production Port |
+|---------|-----------|-----------------|
+| Backend API | `8001` | `8000` |
+| MongoDB | `27018` | `27017` |
+| Redis | `6380` | `6379` |
+| Qdrant HTTP | `6337` | `6333` |
+| Qdrant gRPC | `6338` | `6334` |
 
-### Configuration
-- **`test_env.py`** - Environment configuration and test data
-- **`.env`** - Environment variables (create from template)
+**Test Database:** Uses `test_db` database (isolated from production)
 
-## Running Tests
+**Test Credentials:**
+- Admin Email: `test-admin@example.com`
+- Admin Password: `test-admin-password-123`
+- JWT Secret: `test-jwt-signing-key-for-integration-tests`
 
-### Prerequisites
-1. Chronicle backend running at `http://localhost:8001` (or set `API_URL` in `.env`)
-2. Admin user credentials configured in `.env`
-3. Robot Framework and RequestsLibrary installed
+## Troubleshooting
 
-### Environment Setup
+### Port Conflicts
 ```bash
-# Copy environment template
-cp .env.template .env
+make status         # See what's running
+make stop           # Stop test containers
+```
 
-# Edit .env with your configuration
-API_URL=http://localhost:8001
-ADMIN_EMAIL=admin@example.com
-ADMIN_PASSWORD=your-secure-admin-password
+If ports are still in use by other services:
+```bash
+lsof -i :8001       # Find what's using port 8001
+# Kill the process or stop the conflicting service
 ```
 
-### Running Individual Test Suites
+### Test Failures
 ```bash
-# Authentication and user tests
-robot auth_tests.robot
+# View backend logs
+make logs SERVICE=chronicle-backend-test
 
-# Memory management tests
-robot memory_tests.robot
+# View worker logs
+make logs SERVICE=workers-test
 
-# Conversation management tests
-robot conversation_tests.robot
+# Check container health
+make status
+```
 
-# Health and status tests
-robot health_tests.robot
+### Clean Slate
+```bash
+make containers-clean    # Saves logs + full cleanup
+make start               # Fresh start
+```
 
-# Chat service tests
-robot chat_tests.robot
+### Container Issues
 
-# Client and queue tests
-robot client_queue_tests.robot
+**Containers won't start:**
+```bash
+make status                  # Check current state
+make containers-clean        # Full cleanup (saves logs)
+make start                   # Start fresh
+```
 
-# System administration tests
-robot system_admin_tests.robot
+**Health checks failing:**
+```bash
+make logs SERVICE=chronicle-backend-test   # Check backend logs
+# Common issues: MongoDB not ready, Redis connection failed
 ```
 
-### Running All Tests
+**Tests hang or timeout:**
 ```bash
-# Run complete test suite
-robot *.robot
+# Check if services are healthy
+make status
 
-# Run with specific tags
-robot --include auth *.robot
-robot --include positive *.robot
-robot --include admin *.robot
+# View logs for stuck service
+make logs SERVICE=workers-test
 ```
 
-### Test Output
+## Log Preservation
+
+**All cleanup operations preserve logs automatically!**
+
+When you run `make containers-clean` or `make clean-all`:
+
+1. **Step 1:** Logs are saved to `tests/logs/YYYY-MM-DD_HH-MM-SS/`
+2. **Step 2:** Containers are stopped and removed
+3. **Step 3:** Volumes are removed
+
+Each log directory contains:
+- Service logs for all containers
+- Container status snapshot
+- Container resource usage stats
+- Test results (if available)
+
+**View saved logs:**
 ```bash
-# Custom output directory
-robot --outputdir results *.robot
+ls -lh tests/logs/                          # List all log archives
+cat tests/logs/2026-01-17_14-30-45/chronicle-backend-test.log
+```
+
+## API Key Separation
+
+Chronicle tests are separated into two execution paths:
+
+### 1. No API Keys Required (~70% of tests)
+These tests run without external API dependencies:
+- Endpoint tests (CRUD operations, permissions)
+- Infrastructure tests (workers, queues, health checks)
+- Basic integration tests
+
+**Configuration:** Uses `configs/mock-services.yml` (no transcription/LLM)
 
-# Verbose logging
-robot --loglevel DEBUG *.robot
+### 2. API Keys Required (~30% of tests)
+These tests require external services:
+- Full E2E tests with transcription (Deepgram)
+- Memory extraction tests (OpenAI)
+- Transcript quality verification
 
-# Parallel execution
-pabot --processes 4 *.robot
+**Configuration:** Uses `configs/deepgram-openai.yml`
+
+**Setup:**
+```bash
+# Copy template
+cp setup/.env.test.template setup/.env.test
+
+# Add API keys
+DEEPGRAM_API_KEY=your-key-here
+OPENAI_API_KEY=your-key-here
 ```
 
-## Test Coverage
-
-### Authentication & Users (`/api/users`, `/auth`)
-- ✅ Login with valid/invalid credentials
-- ✅ Get current user information
-- ✅ Create/update/delete users (admin only)
-- ✅ User authorization and access control
-- ✅ Admin privilege enforcement
-
-### Memory Management (`/api/memories`)
-- ✅ Get user memories with pagination
-- ✅ Search memories with similarity thresholds
-- ✅ Get memories with transcripts
-- ✅ Delete specific memories
-- ✅ Admin memory access across users
-- ✅ Unfiltered memory access for debugging
-
-### Conversation Management (`/api/conversations`)
-- ✅ List and retrieve conversations
-- ✅ Conversation version history
-- ✅ Transcript reprocessing
-- ✅ Memory reprocessing with version selection
-- ✅ Version activation (transcript/memory)
-- ✅ Conversation deletion and cleanup
-- ✅ User data isolation
-
-### Health & Status (`/health`, `/readiness`)
-- ✅ Main health check with service details
-- ✅ Readiness check for orchestration
-- ✅ Authentication service health
-- ✅ Queue system health status
-- ✅ Chat service health check
-- ✅ System metrics (admin only)
-
-### Chat Service (`/api/chat`)
-- ✅ Session creation and management
-- ✅ Session title updates
-- ✅ Message retrieval
-- ✅ Chat statistics
-- ✅ Memory extraction from sessions
-- ✅ Session deletion and cleanup
-
-### Client & Queue Management
-- ✅ Active client monitoring
-- ✅ Queue job listing with pagination
-- ✅ Queue statistics and health
-- ✅ User job isolation
-- ✅ Processing task monitoring (admin only)
-
-### System Administration
-- ✅ Authentication configuration
-- ✅ Diarization settings management
-- ✅ Speaker configuration
-- ✅ Memory configuration (YAML)
-- ✅ Configuration validation and reload
-- ✅ Bulk memory deletion
-
-## Test Categories
-
-### By Access Level
-- **Public**: Health checks, auth config
-- **User**: Memories, conversations, chat sessions
-- **Admin**: User management, system config, metrics
-
-### By Test Type
-- **Positive**: Valid operations and expected responses
-- **Negative**: Invalid inputs, unauthorized access
-- **Security**: Authentication, authorization, data isolation
-- **Integration**: Cross-service functionality
-
-### By Component
-- **Auth**: Authentication and authorization
-- **Memory**: Memory storage and retrieval
-- **Conversation**: Audio processing and transcription
-- **Chat**: Interactive chat functionality
-- **System**: Configuration and administration
-
-## Key Features Tested
-
-### Security
-- JWT token authentication
-- Role-based access control (admin vs user)
-- Data isolation between users
-- Unauthorized access prevention
-
-### Data Management
-- CRUD operations for all entities
-- Pagination and filtering
-- Search functionality with thresholds
-- Versioning and history tracking
-
-### System Integration
-- Service health monitoring
-- Configuration management
-- Queue system monitoring
-- Cross-service communication
-
-### Error Handling
-- Invalid input validation
-- Non-existent resource handling
-- Permission denied scenarios
-- Service unavailability graceful degradation
-
-## Maintenance
-
-### Adding New Tests
-1. Create test file or add to existing suite
-2. Use appropriate resource keywords
-3. Follow naming conventions (`Test Name Test`)
-4. Include proper tags and documentation
-5. Add cleanup in teardown if needed
-
-### Updating Keywords
-1. Modify resource files for reusable functionality
-2. Keep keywords focused and single-purpose
-3. Use proper argument handling
-4. Include documentation strings
-
-### Environment Variables
-Update `test_env.py` when adding new configuration options or test data.
+## Development Tips
 
-## Troubleshooting
+**Faster iteration:**
+1. Start containers once: `make start`
+2. Run specific test suite: `make test-endpoints`
+3. Keep containers running between test runs
+4. Only rebuild when code changes: `make rebuild`
 
-### Common Issues
-- **401 Unauthorized**: Check admin credentials in `.env`
-- **Connection Refused**: Ensure backend is running
-- **Test Failures**: Check service health endpoints first
-- **Timeout Errors**: Increase timeouts in test configuration
+**Debugging specific tests:**
+```bash
+# Run Robot Framework directly for a single test file
+cd tests
+uv run --with-requirements test-requirements.txt robot \
+    --outputdir results \
+    --test "Specific Test Name" \
+    endpoints/test_user.robot
+```
 
-### Debug Mode
+**Clean iteration cycle:**
 ```bash
-# Run with detailed logging
-robot --loglevel TRACE auth_tests.robot
+# 1. Make code changes
+# 2. Rebuild containers
+make rebuild
+
+# 3. Run specific test suite
+make test-endpoints
+
+# 4. View logs if needed
+make logs SERVICE=chronicle-backend-test
+
+# 5. Repeat
+```
+
+---
+
+**Technical Details:** Tests use Robot Framework for end-to-end validation, but you don't need to know Robot Framework to run tests. Just use the Makefile commands above.
 
-# Stop on first failure
-robot --exitonfailure *.robot
-```
\ No newline at end of file
+**For Robot Framework test development guidelines**, see:
+- `TESTING_GUIDELINES.md` - Comprehensive testing patterns and standards
+- `tags.md` - Approved test tags and usage
diff --git a/tests/TESTING_GUIDELINES.md b/tests/TESTING_GUIDELINES.md
index 6c07719a..481b8017 100644
--- a/tests/TESTING_GUIDELINES.md
+++ b/tests/TESTING_GUIDELINES.md
@@ -214,6 +214,315 @@ ${jobs}=    Wait Until Keyword Succeeds    30s    2s
 - Use consistent variable naming across tests
 - Document required environment variables and their purposes
 
+## API Key Separation and Test Organization
+
+### Overview
+
+Chronicle tests are separated into two execution paths based on external API dependencies:
+
+1. **No API Keys Required (~70% of tests)** - Run on all PRs by default
+2. **API Keys Required (~30% of tests)** - Run on dev/main branches only
+
+This separation enables:
+- Fast PR validation without external API dependencies
+- External contributors can run full CI without secret access
+- Reduced API costs (only charged on dev/main pushes)
+- Comprehensive testing still happens on protected branches
+
+### The `requires-api-keys` Tag
+
+**Purpose**: Mark tests that require external API services (Deepgram, OpenAI, etc.)
+
+**Usage**: Add to test files that make external API calls for transcription or memory extraction:
+
+```robot
+*** Test Cases ***
+Full Pipeline Integration Test
+    [Documentation]    Complete end-to-end test with transcription and memory extraction
+    [Tags]    e2e	requires-api-keys
+    [Timeout]    600s
+
+    # This test will be excluded from PR runs
+    # It will run on dev/main branches with API keys
+```
+
+### When to Use `requires-api-keys`
+
+**Add this tag when tests:**
+- Require actual transcription (Deepgram or other STT providers)
+- Require memory extraction with LLM (OpenAI, Ollama with real inference)
+- Verify transcript quality against ground truth
+- Test end-to-end pipeline with real API integration
+
+**Do NOT add this tag when tests:**
+- Test API endpoints (CRUD operations, permissions, etc.)
+- Test infrastructure (worker management, queue operations)
+- Test system health and readiness
+- Can work with mock/stub services
+
+### Test Execution Modes
+
+**1. No-API Tests (PR runs)**
+```bash
+# Excludes tests tagged with requires-api-keys
+cd tests
+./run-no-api-tests.sh
+```
+- Uses `configs/mock-services.yml`
+- No external API calls
+- Fast feedback (~10-15 minutes)
+- Runs ~70% of test suite
+
+**2. Full Tests with API Keys (dev/main runs)**
+```bash
+# Runs all tests including API-dependent ones
+cd tests
+./run-robot-tests.sh
+```
+- Uses `configs/deepgram-openai.yml`
+- Requires DEEPGRAM_API_KEY and OPENAI_API_KEY
+- Comprehensive validation (~20-30 minutes)
+- Runs 100% of test suite
+
+**3. Label-Triggered PR Tests**
+- Add label `test-with-api-keys` to PR
+- Triggers full test suite before merge
+- Useful for testing API integration changes
+
+### Mock Services Configuration
+
+For tests that don't require API keys, use the mock services config:
+
+**File**: `tests/configs/mock-services.yml`
+
+**Features**:
+- Disables external transcription and LLM services
+- Keeps core services operational (MongoDB, Redis, Qdrant)
+- No API keys required
+- Fast test execution
+
+**Use Cases**:
+- Endpoint testing (auth, permissions, CRUD)
+- Infrastructure testing (workers, queues)
+- System health monitoring
+- Local development without API keys
+
+### Writing Tests for API Separation
+
+**Good Example - Endpoint Test (No API Keys)**:
+```robot
+*** Test Cases ***
+User Can Create and Delete Conversations
+    [Documentation]    Test conversation CRUD without transcription
+    [Tags]    conversation
+
+    ${session}=    Get Admin API Session
+    ${conversation}=    Create Test Conversation    ${session}
+    ${deleted}=    Delete Conversation    ${session}    ${conversation}[id]
+    Should Be True    ${deleted}
+```
+
+**Good Example - Integration Test (Requires API Keys)**:
+```robot
+*** Test Cases ***
+Audio Upload Produces Quality Transcript
+    [Documentation]    Verify transcription quality with ground truth
+    [Tags]    e2e	requires-api-keys
+
+    ${conversation}=    Upload Audio File    ${TEST_AUDIO_FILE}
+    Verify Transcription Quality    ${conversation}    ${EXPECTED_TRANSCRIPT}
+    Verify Memory Extraction    ${conversation}
+```
+
+### GitHub Workflows
+
+**Three workflows handle test execution:**
+
+1. **`robot-tests.yml`** (PR - No API Keys)
+   - Triggers: All pull requests
+   - Execution: Excludes `requires-api-keys` tests
+   - No secrets required
+
+2. **`full-tests-with-api.yml`** (Dev/Main - Full Suite)
+   - Triggers: Push to dev/main branches
+   - Execution: All tests including API-dependent
+   - Requires: DEEPGRAM_API_KEY, OPENAI_API_KEY
+
+3. **`pr-tests-with-api.yml`** (PR - Label Triggered)
+   - Triggers: PR with `test-with-api-keys` label
+   - Execution: Full test suite before merge
+   - Requires: DEEPGRAM_API_KEY, OPENAI_API_KEY
+
+### Tag Guidelines for API Separation
+
+**File-Level Tagging**:
+- Tag entire test files that require API keys
+- If ANY test in the file needs APIs, mark the whole file
+- Simpler maintenance than per-test tagging
+
+**Multiple Tags**:
+- Use tab-separated tags (see `tags.md`)
+- Example: `[Tags]    e2e	requires-api-keys`
+- Always include primary component tag (e2e, conversation, memory)
+
+**Tag Statistics**:
+- `requires-api-keys`: ~1-2 test files (integration_test.robot)
+- Most tests: No API requirements
+- See `tests/tags.md` for complete tag list
+
+### Local Development
+
+**Running Tests Locally Without API Keys**:
+```bash
+cd tests
+./run-no-api-tests.sh
+```
+- Works without any API key configuration
+- Fast feedback for most development
+- Tests endpoint logic and infrastructure
+
+**Running Full Tests Locally**:
+```bash
+# Set API keys
+export DEEPGRAM_API_KEY=xxx
+export OPENAI_API_KEY=yyy
+
+cd tests
+./run-robot-tests.sh
+```
+- Validates full pipeline integration
+- Tests transcription and memory extraction
+- Use before pushing to dev/main
+
+## Slow and SDK Test Organization
+
+Chronicle excludes certain tests from default test runs to provide faster feedback and cleaner test execution.
+
+### The `slow` Tag
+
+**Purpose**: Mark tests that require long timeouts (>30s) or infrastructure operations like service restarts.
+
+**Add this tag when tests:**
+- Restart backend or other services (stop/start cycles)
+- Test connection resilience after service failures
+- Require timeouts longer than 30 seconds
+- Test infrastructure operations that significantly slow down test execution
+
+**Do NOT add this tag when tests:**
+- Complete within normal timeouts (<30s)
+- Don't restart or rebuild services
+- Are simple endpoint or integration tests
+
+**Example:**
+```robot
+*** Test Cases ***
+Test Job Persistence Through Backend Restart
+    [Documentation]    Test that RQ jobs persist when backend service restarts
+    [Tags]    queue	slow
+    [Timeout]    120s
+
+    ${job_id}=    Reprocess Transcript    ${conversation_id}
+    Restart Backend Service    wait_timeout=90s    # Longer timeout for slow test
+    ${jobs_after}=    Get job queue
+    Should Be True    ${jobs_count_after} >= 0
+```
+
+**Running Slow Tests:**
+```bash
+cd tests
+
+# Default test run (EXCLUDES slow tests)
+make test         # Faster feedback, no service restarts
+
+# Run ONLY slow tests
+make test-slow    # Explicit slow test execution
+
+# Run ALL tests including slow
+make test-all-with-slow-and-sdk
+```
+
+### The `sdk` Tag
+
+**Purpose**: Mark tests for unreleased SDK functionality that should be excluded until the SDK is published.
+
+**Add this tag when tests:**
+- Test SDK client library features
+- Require SDK installation or SDK-specific imports
+- Are for SDK features not yet released to users
+- Test SDK authentication, upload, or retrieval methods
+
+**Do NOT add this tag when tests:**
+- Test backend API endpoints directly (these should always run)
+- Test features available through direct HTTP/WebSocket calls
+- Are part of the core backend functionality
+
+**Example:**
+```robot
+*** Test Cases ***
+SDK Can Upload Audio File
+    [Documentation]    Test SDK audio upload functionality
+    [Tags]    audio-upload	sdk
+
+    ${result}=    Run Process    uv    run    python
+    ...    ${CURDIR}/../scripts/sdk_test_upload.py
+    ...    ${BACKEND_URL}    ${ADMIN_EMAIL}    ${ADMIN_PASSWORD}    ${test_audio}
+    Should Be Equal As Integers    ${result.rc}    0
+```
+
+**Running SDK Tests:**
+```bash
+cd tests
+
+# Default test run (EXCLUDES sdk tests)
+make test         # SDK not released yet
+
+# Run ONLY SDK tests (when developing SDK)
+make test-sdk
+
+# Run ALL tests including SDK
+make test-all-with-slow-and-sdk
+```
+
+**When to Re-enable SDK Tests:**
+Once the SDK is released and published:
+1. Remove `--exclude sdk` from default Makefile target (`make test`)
+2. Keep the `sdk` tag for organization (allows filtering SDK-specific tests)
+3. Update `tests/README.md` to reflect that SDK tests are included
+
+### Benefits of Excluding Slow and SDK Tests
+
+**Faster Default Test Runs:**
+- Default `make test` excludes slow tests (service restarts, long timeouts)
+- Provides faster feedback during development (saves 2-5 minutes per run)
+- Developers can iterate quickly on endpoint and integration tests
+
+**Cleaner Test Reports:**
+- SDK tests won't fail in CI when SDK isn't released yet
+- No confusing failures for unreleased features
+- Clear separation of released vs unreleased functionality
+
+**Explicit Execution When Needed:**
+- Run slow tests explicitly when testing infrastructure resilience
+- Run SDK tests explicitly when developing SDK features
+- Full test suite available via `make test-all-with-slow-and-sdk`
+
+### Tag Combination Examples
+
+```robot
+# Good - Slow infrastructure test
+[Tags]    queue	slow
+[Timeout]    120s
+
+# Good - Unreleased SDK feature
+[Tags]    audio-upload	sdk
+
+# Good - Multiple component tags
+[Tags]    conversation	memory
+
+# Bad - Don't combine slow and sdk (different purposes)
+[Tags]    slow	sdk
+```
+
 ## Future Additions
 
 As we develop more conventions and encounter new patterns, we will add them to this file:
diff --git a/tests/bin/clean-containers.sh b/tests/bin/clean-containers.sh
new file mode 100755
index 00000000..8a4c748a
--- /dev/null
+++ b/tests/bin/clean-containers.sh
@@ -0,0 +1,26 @@
+#!/bin/bash
+# tests/bin/clean-containers.sh
+# ALWAYS saves logs before removing containers
+
+set -e
+
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+BACKEND_DIR="$SCRIPT_DIR/../../backends/advanced"
+
+echo "🧹 Cleaning test containers..."
+echo ""
+
+# CRITICAL: Save logs first!
+echo "📝 Step 1/2: Saving container logs..."
+"$SCRIPT_DIR/save-container-logs.sh"
+echo ""
+
+# Now safe to remove
+echo "🗑️  Step 2/2: Removing containers and volumes..."
+cd "$BACKEND_DIR"
+docker compose -f docker-compose-test.yml down -v
+
+echo ""
+echo "✅ Cleanup complete!"
+echo "   📁 Logs preserved in: tests/logs/"
+echo "   🔄 Use 'make start' for fresh containers"
diff --git a/tests/bin/logs-containers.sh b/tests/bin/logs-containers.sh
new file mode 100755
index 00000000..fe76a20f
--- /dev/null
+++ b/tests/bin/logs-containers.sh
@@ -0,0 +1,31 @@
+#!/bin/bash
+# tests/bin/logs-containers.sh
+# View logs for specific service
+
+set -e
+
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+BACKEND_DIR="$SCRIPT_DIR/../../backends/advanced"
+SERVICE=$1
+
+if [ -z "$SERVICE" ]; then
+    echo "📋 Available services:"
+    echo "   - chronicle-backend-test"
+    echo "   - workers-test"
+    echo "   - mongo-test"
+    echo "   - redis-test"
+    echo "   - qdrant-test"
+    echo "   - speaker-service-test"
+    echo ""
+    echo "Usage: make containers-logs SERVICE=<service-name>"
+    echo "Example: make containers-logs SERVICE=chronicle-backend-test"
+    exit 1
+fi
+
+cd "$BACKEND_DIR"
+
+echo "📜 Viewing logs for: $SERVICE"
+echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
+echo ""
+
+docker compose -f docker-compose-test.yml logs --tail=100 -f "$SERVICE"
diff --git a/tests/bin/rebuild-containers.sh b/tests/bin/rebuild-containers.sh
new file mode 100755
index 00000000..827e544e
--- /dev/null
+++ b/tests/bin/rebuild-containers.sh
@@ -0,0 +1,22 @@
+#!/bin/bash
+# tests/bin/rebuild-containers.sh
+# Rebuild test container images (does not start containers)
+
+set -e
+
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+BACKEND_DIR="$SCRIPT_DIR/../../backends/advanced"
+
+cd "$BACKEND_DIR"
+
+echo "🔨 Rebuilding test container images..."
+echo "   This will only rebuild images, not start containers."
+echo "   Use 'make start' to start containers after rebuild."
+echo ""
+
+# Build images
+echo "🏗️  Building images..."
+docker compose -f docker-compose-test.yml build
+
+echo "✅ Test container images rebuilt successfully"
+echo "   Run 'make start' to start the containers"
diff --git a/tests/bin/restart-containers.sh b/tests/bin/restart-containers.sh
new file mode 100755
index 00000000..c6d12846
--- /dev/null
+++ b/tests/bin/restart-containers.sh
@@ -0,0 +1,24 @@
+#!/bin/bash
+# tests/bin/restart-containers.sh
+# Restart test containers without rebuilding
+
+set -e
+
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+BACKEND_DIR="$SCRIPT_DIR/../../backends/advanced"
+
+cd "$BACKEND_DIR"
+
+echo "🔄 Restarting test containers..."
+docker compose -f docker-compose-test.yml restart
+
+echo "⏳ Waiting for services to be ready..."
+sleep 5
+
+# Quick health check
+if curl -s http://localhost:8001/health > /dev/null 2>&1; then
+    echo "✅ Test containers restarted successfully"
+else
+    echo "⚠️  Containers restarted but backend health check failed"
+    echo "   Check logs with: make containers-logs SERVICE=chronicle-backend-test"
+fi
diff --git a/tests/bin/save-container-logs.sh b/tests/bin/save-container-logs.sh
new file mode 100755
index 00000000..14f68be9
--- /dev/null
+++ b/tests/bin/save-container-logs.sh
@@ -0,0 +1,45 @@
+#!/bin/bash
+# tests/bin/save-container-logs.sh
+# CRITICAL: Always called before docker compose down -v
+# Saves all container logs to timestamped directory
+
+set -e
+
+TIMESTAMP=$(date +%Y-%m-%d_%H-%M-%S)
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+LOG_DIR="$SCRIPT_DIR/../logs/$TIMESTAMP"
+
+mkdir -p "$LOG_DIR"
+
+echo "📝 Saving container logs to logs/$TIMESTAMP/"
+
+# Get project name (from docker-compose-test.yml)
+# The project name is set in the compose file as 'backend-test'
+PROJECT_NAME="backend-test"
+
+# Service list (based on docker-compose-test.yml)
+SERVICES="chronicle-backend-test workers-test mongo-test redis-test qdrant-test speaker-service-test"
+
+# Save logs for each service
+for service in $SERVICES; do
+    CONTAINER="${PROJECT_NAME}-${service}-1"
+    echo "  - Saving $service logs..."
+    docker logs "$CONTAINER" > "$LOG_DIR/$service.log" 2>&1 || echo "    Warning: Could not save logs for $CONTAINER"
+done
+
+# Save container status
+echo "  - Saving container status..."
+docker ps -a --filter "name=$PROJECT_NAME" > "$LOG_DIR/container-status.txt" 2>&1 || true
+
+# Save container stats (resource usage)
+echo "  - Saving container stats..."
+docker stats --no-stream --no-trunc --filter "name=$PROJECT_NAME" > "$LOG_DIR/container-stats.txt" 2>&1 || true
+
+# Copy test results if they exist
+if [ -d "$SCRIPT_DIR/../results" ]; then
+    echo "  - Copying test results..."
+    cp -r "$SCRIPT_DIR/../results" "$LOG_DIR/test-results" 2>/dev/null || true
+fi
+
+echo "✅ Logs saved to: logs/$TIMESTAMP"
+echo "   View with: ls -lh $LOG_DIR"
diff --git a/tests/bin/start-containers.sh b/tests/bin/start-containers.sh
new file mode 100755
index 00000000..0918b141
--- /dev/null
+++ b/tests/bin/start-containers.sh
@@ -0,0 +1,80 @@
+#!/bin/bash
+# tests/bin/start-containers.sh
+# Start test containers with health checks
+
+set -e
+
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+TESTS_DIR="$SCRIPT_DIR/.."
+BACKEND_DIR="$TESTS_DIR/../backends/advanced"
+
+cd "$BACKEND_DIR"
+
+echo "🚀 Starting test containers..."
+
+# Check if .env.test exists, create from template if needed
+if [ ! -f "$TESTS_DIR/setup/.env.test" ]; then
+    echo "📝 Creating .env.test from template..."
+    if [ -f "$TESTS_DIR/setup/.env.test.template" ]; then
+        cp "$TESTS_DIR/setup/.env.test.template" "$TESTS_DIR/setup/.env.test"
+    else
+        echo "❌ Error: .env.test.template not found"
+        exit 1
+    fi
+fi
+
+# Load environment variables from .env.test (API keys, etc.)
+if [ -f "$TESTS_DIR/setup/.env.test" ]; then
+    echo "📝 Loading environment variables from .env.test..."
+    set -a
+    source "$TESTS_DIR/setup/.env.test"
+    set +a
+fi
+
+# Start containers
+echo "🐳 Starting Docker containers..."
+docker compose -f docker-compose-test.yml up -d
+
+# Wait for services to be healthy
+echo "⏳ Waiting for services to be healthy..."
+sleep 5
+
+# Check backend health
+MAX_RETRIES=30
+RETRY_COUNT=0
+while [ $RETRY_COUNT -lt $MAX_RETRIES ]; do
+    if curl -s http://localhost:8001/health > /dev/null 2>&1; then
+        echo "✅ Backend is healthy"
+        break
+    fi
+    RETRY_COUNT=$((RETRY_COUNT + 1))
+    if [ $RETRY_COUNT -eq $MAX_RETRIES ]; then
+        echo "❌ Backend health check failed after $MAX_RETRIES attempts"
+        exit 1
+    fi
+    echo "   Waiting for backend... ($RETRY_COUNT/$MAX_RETRIES)"
+    sleep 2
+done
+
+# Check readiness (includes dependencies)
+RETRY_COUNT=0
+while [ $RETRY_COUNT -lt $MAX_RETRIES ]; do
+    if curl -s http://localhost:8001/readiness > /dev/null 2>&1; then
+        echo "✅ All services are ready"
+        break
+    fi
+    RETRY_COUNT=$((RETRY_COUNT + 1))
+    if [ $RETRY_COUNT -eq $MAX_RETRIES ]; then
+        echo "❌ Readiness check failed after $MAX_RETRIES attempts"
+        exit 1
+    fi
+    echo "   Waiting for services to be ready... ($RETRY_COUNT/$MAX_RETRIES)"
+    sleep 2
+done
+
+echo ""
+echo "✅ Test containers are running and healthy"
+echo "   Backend: http://localhost:8001"
+echo "   MongoDB: localhost:27018"
+echo "   Redis: localhost:6380"
+echo "   Qdrant: localhost:6337/6338"
diff --git a/tests/bin/start-rebuild-containers.sh b/tests/bin/start-rebuild-containers.sh
new file mode 100755
index 00000000..28cb0667
--- /dev/null
+++ b/tests/bin/start-rebuild-containers.sh
@@ -0,0 +1,57 @@
+#!/bin/bash
+# tests/bin/start-rebuild-containers.sh
+# Stop, rebuild, and start containers (full sequence for code changes)
+
+set -e
+
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+TESTS_DIR="$SCRIPT_DIR/.."
+BACKEND_DIR="$SCRIPT_DIR/../../backends/advanced"
+
+cd "$BACKEND_DIR"
+
+echo "🔨 Rebuilding and starting test containers..."
+echo "   This will:"
+echo "   1. Stop containers"
+echo "   2. Rebuild images with latest code"
+echo "   3. Start containers"
+echo ""
+
+# Load environment variables from .env.test (API keys, etc.)
+if [ -f "$TESTS_DIR/setup/.env.test" ]; then
+    echo "📝 Loading environment variables from .env.test..."
+    set -a
+    source "$TESTS_DIR/setup/.env.test"
+    set +a
+fi
+
+# Stop containers
+echo "🛑 Stopping containers..."
+docker compose -f docker-compose-test.yml stop
+
+# Rebuild and start
+echo "🏗️  Rebuilding images..."
+docker compose -f docker-compose-test.yml up -d --build
+
+# Wait for services
+echo "⏳ Waiting for services to be ready..."
+sleep 5
+
+# Health check
+MAX_RETRIES=30
+RETRY_COUNT=0
+while [ $RETRY_COUNT -lt $MAX_RETRIES ]; do
+    if curl -s http://localhost:8001/health > /dev/null 2>&1; then
+        echo "✅ Backend is healthy"
+        break
+    fi
+    RETRY_COUNT=$((RETRY_COUNT + 1))
+    if [ $RETRY_COUNT -eq $MAX_RETRIES ]; then
+        echo "❌ Backend health check failed after $MAX_RETRIES attempts"
+        exit 1
+    fi
+    echo "   Waiting for backend... ($RETRY_COUNT/$MAX_RETRIES)"
+    sleep 2
+done
+
+echo "✅ Test containers rebuilt and running"
diff --git a/tests/bin/status-containers.sh b/tests/bin/status-containers.sh
new file mode 100755
index 00000000..89344731
--- /dev/null
+++ b/tests/bin/status-containers.sh
@@ -0,0 +1,39 @@
+#!/bin/bash
+# tests/bin/status-containers.sh
+# Show container health and status
+
+set -e
+
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+ENV_FILE="$SCRIPT_DIR/../setup/.env.test"
+
+# Get project name (from docker-compose-test.yml)
+# The project name is set in the compose file as 'backend-test'
+PROJECT_NAME="backend-test"
+
+echo "📊 Test Container Status"
+echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
+echo ""
+
+# Show container status
+docker ps -a --filter "name=$PROJECT_NAME" --format "table {{.Names}}\t{{.Status}}\t{{.Ports}}"
+
+echo ""
+echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
+
+# Check if backend is responsive
+echo ""
+echo "🏥 Health Checks:"
+if curl -s http://localhost:8001/health > /dev/null 2>&1; then
+    echo "   ✅ Backend (http://localhost:8001/health)"
+else
+    echo "   ❌ Backend (http://localhost:8001/health)"
+fi
+
+if curl -s http://localhost:8001/readiness > /dev/null 2>&1; then
+    echo "   ✅ Services Ready (http://localhost:8001/readiness)"
+else
+    echo "   ❌ Services Not Ready (http://localhost:8001/readiness)"
+fi
+
+echo ""
diff --git a/tests/bin/stop-containers.sh b/tests/bin/stop-containers.sh
new file mode 100755
index 00000000..6b893b23
--- /dev/null
+++ b/tests/bin/stop-containers.sh
@@ -0,0 +1,17 @@
+#!/bin/bash
+# tests/bin/stop-containers.sh
+# Stop test containers (preserves volumes)
+
+set -e
+
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+BACKEND_DIR="$SCRIPT_DIR/../../backends/advanced"
+
+cd "$BACKEND_DIR"
+
+echo "🛑 Stopping test containers..."
+docker compose -f docker-compose-test.yml stop
+
+echo "✅ Test containers stopped (volumes preserved)"
+echo "   Use 'make start' to restart"
+echo "   Use 'make containers-clean' to remove everything (saves logs first)"
diff --git a/tests/config/plugins.test.yml b/tests/config/plugins.test.yml
new file mode 100644
index 00000000..89772a56
--- /dev/null
+++ b/tests/config/plugins.test.yml
@@ -0,0 +1,14 @@
+# Test plugin configuration for integration testing
+# This file is loaded during tests to verify event dispatch system
+
+plugins:
+  test_event:
+    enabled: true
+    events:
+      - transcript.streaming
+      - transcript.batch
+      - conversation.complete
+      - memory.processed
+    condition:
+      type: always  # Capture all events without filtering
+    db_path: /app/debug/test_plugin_events.db
diff --git a/tests/configs/README.md b/tests/configs/README.md
index 8b1e196f..0b6ff73d 100644
--- a/tests/configs/README.md
+++ b/tests/configs/README.md
@@ -60,7 +60,7 @@ done
 
 When creating a new test configuration:
 
-1. **Name it descriptively**: `{stt}-{llm}.yml` (e.g., `mistral-openai.yml`)
+1. **Name it descriptively**: `{stt}-{llm}.yml` (e.g., `deepgram-openai.yml`)
 2. **Use environment variables**: Always use `${VAR:-default}` pattern for secrets
 3. **Set appropriate defaults**: Update the `defaults:` section to match your provider combo
 4. **Include only required models**: Don't include models that aren't used
@@ -124,7 +124,7 @@ Test configs use environment variable substitution to avoid hardcoding secrets:
 
 As you add support for new providers, create corresponding test configs:
 
-- `mistral-openai.yml` - Mistral Voxtral STT + OpenAI LLM
+- `deepgram-openai.yml` - Deepgram STT + OpenAI LLM
 - `deepgram-ollama.yml` - Deepgram STT + Local Ollama LLM
 - `parakeet-openai.yml` - Local Parakeet STT + OpenAI LLM
 - etc.
diff --git a/tests/configs/deepgram-openai.yml b/tests/configs/deepgram-openai.yml
index 46c8ddef..9e039a9a 100644
--- a/tests/configs/deepgram-openai.yml
+++ b/tests/configs/deepgram-openai.yml
@@ -1,89 +1,128 @@
-# Test Configuration: Deepgram (STT) + OpenAI (LLM)
-# Cloud-based services - recommended for CI/testing when API credits available
-
+chat:
+  system_prompt: You are a helpful AI assistant with access to the user's personal
+    memories and conversation history.
 defaults:
-  llm: openai-llm
   embedding: openai-embed
+  llm: openai-llm
   stt: stt-deepgram
+  stt_stream: stt-deepgram-stream
   vector_store: vs-qdrant
+memory:
+  extraction:
+    enabled: true
+    prompt: 'Extract important information from this conversation and return a JSON
+      object with an array named "facts".
 
-models:
-  - name: openai-llm
-    description: OpenAI GPT-4o-mini
-    model_type: llm
-    model_provider: openai
-    api_family: openai
-    model_name: gpt-4o-mini
-    model_url: https://api.openai.com/v1
-    api_key: ${OPENAI_API_KEY:-}
-    model_params:
-      temperature: 0.2
-      max_tokens: 2000
-    model_output: json
-
-  - name: openai-embed
-    description: OpenAI text-embedding-3-small
-    model_type: embedding
-    model_provider: openai
-    api_family: openai
-    model_name: text-embedding-3-small
-    model_url: https://api.openai.com/v1
-    api_key: ${OPENAI_API_KEY:-}
-    embedding_dimensions: 1536
-    model_output: vector
-
-  - name: vs-qdrant
-    description: Qdrant vector database
-    model_type: vector_store
-    model_provider: qdrant
-    api_family: qdrant
-    model_url: http://${QDRANT_BASE_URL:-qdrant}:${QDRANT_PORT:-6333}
-    model_params:
-      host: ${QDRANT_BASE_URL:-qdrant}
-      port: ${QDRANT_PORT:-6333}
-      collection_name: omi_memories
+      Include personal preferences, plans, names, dates, locations, numbers, and key
+      details.
 
-  - name: stt-deepgram
-    description: Deepgram Nova 3 (batch)
-    model_type: stt
-    model_provider: deepgram
-    api_family: http
-    model_url: https://api.deepgram.com/v1
-    api_key: ${DEEPGRAM_API_KEY:-}
-    operations:
-      stt_transcribe:
-        method: POST
-        path: /listen
-        headers:
-          Authorization: Token ${DEEPGRAM_API_KEY:-}
-          Content-Type: audio/raw
-        query:
-          model: nova-3
-          language: multi
-          smart_format: 'true'
-          punctuate: 'true'
-          diarize: 'true'
-          encoding: linear16
-          sample_rate: 16000
-          channels: '1'
-        response:
-          type: json
-          extract:
-            text: results.channels[0].alternatives[0].transcript
-            words: results.channels[0].alternatives[0].words
-            segments: results.channels[0].alternatives[0].paragraphs.paragraphs
+      Keep items concise and useful.
 
-memory:
+      '
   provider: chronicle
   timeout_seconds: 1200
-  extraction:
-    enabled: true
-    prompt: |
-      Extract important information from this conversation and return a JSON object with an array named "facts".
-      Include personal preferences, plans, names, dates, locations, numbers, and key details.
-      Keep items concise and useful.
+models:
+- api_family: openai
+  api_key: ${oc.env:OPENAI_API_KEY,}
+  description: OpenAI GPT-4o-mini
+  model_name: gpt-4o-mini
+  model_output: json
+  model_params:
+    max_tokens: 2000
+    temperature: 0.2
+  model_provider: openai
+  model_type: llm
+  model_url: https://api.openai.com/v1
+  name: openai-llm
+- api_family: openai
+  api_key: ${oc.env:OPENAI_API_KEY,}
+  description: OpenAI text-embedding-3-small
+  embedding_dimensions: 1536
+  model_name: text-embedding-3-small
+  model_output: vector
+  model_provider: openai
+  model_type: embedding
+  model_url: https://api.openai.com/v1
+  name: openai-embed
+- api_family: qdrant
+  description: Qdrant vector database
+  model_params:
+    collection_name: omi_memories
+    host: ${oc.env:QDRANT_BASE_URL,qdrant}
+    port: ${oc.env:QDRANT_PORT,6333}
+  model_provider: qdrant
+  model_type: vector_store
+  model_url: http://${oc.env:QDRANT_BASE_URL,qdrant}:${oc.env:QDRANT_PORT,6333}
+  name: vs-qdrant
+- api_family: http
+  api_key: ${oc.env:DEEPGRAM_API_KEY,}
+  description: Deepgram Nova 3 (batch)
+  model_provider: deepgram
+  model_type: stt
+  model_url: https://api.deepgram.com/v1
+  name: stt-deepgram
+  operations:
+    stt_transcribe:
+      headers:
+        Authorization: Token ${oc.env:DEEPGRAM_API_KEY,}
+        Content-Type: audio/raw
+      method: POST
+      path: /listen
+      query:
+        channels: '1'
+        diarize: 'true'
+        encoding: linear16
+        language: multi
+        model: nova-3
+        punctuate: 'true'
+        sample_rate: 16000
+        smart_format: 'true'
+      response:
+        extract:
+          segments: results.channels[0].alternatives[0].paragraphs.paragraphs
+          text: results.channels[0].alternatives[0].transcript
+          words: results.channels[0].alternatives[0].words
+        type: json
+- api_family: websocket
+  api_key: ${oc.env:DEEPGRAM_API_KEY,}
+  description: Deepgram Nova 3 (streaming)
+  model_provider: deepgram
+  model_type: stt_stream
+  model_url: wss://api.deepgram.com/v1/listen
+  name: stt-deepgram-stream
+  operations:
+    chunk_header:
+      message: {}
+    end:
+      message:
+        type: CloseStream
+    expect:
+      extract:
+        segments: []
+        text: channel.alternatives[0].transcript
+        words: channel.alternatives[0].words
+      final_type: Results
+      interim_type: Results
+    query:
+      channels: 1
+      encoding: linear16
+      interim_results: true
+      language: multi
+      model: nova-3
+      punctuate: true
+      sample_rate: 16000
+      smart_format: true
+    start:
+      message: {}
+backend:
+  audio:
+    # Enable always_persist for testing - creates placeholder conversations immediately
+    always_persist_enabled: true
 
 speaker_recognition:
-  # Disable speaker recognition in CI tests (too slow, blocks workers)
-  enabled: false
+  enabled: true  # Using mock client by default (USE_MOCK_SPEAKER_CLIENT=true in docker-compose-test.yml)
   timeout: 60
+  # To test with REAL speaker recognition service:
+  # 1. Start speaker-recognition service: cd extras/speaker-recognition && docker compose up -d
+  # 2. Set USE_MOCK_SPEAKER_CLIENT=false in docker-compose-test.yml (line 62 and 241)
+  # 3. Rebuild test containers: make start-rebuild
diff --git a/tests/configs/mock-services.yml b/tests/configs/mock-services.yml
new file mode 100644
index 00000000..dd6a097c
--- /dev/null
+++ b/tests/configs/mock-services.yml
@@ -0,0 +1,98 @@
+chat:
+  system_prompt: You are a helpful AI assistant with access to the user's personal
+    memories and conversation history.
+defaults:
+  embedding: mock-embed
+  llm: mock-llm
+  stt: mock-stt
+  stt_stream: mock-stt-stream
+  vector_store: vs-qdrant
+memory:
+  extraction:
+    enabled: true
+    prompt: ''
+  provider: chronicle
+  timeout_seconds: 1200
+models:
+- api_family: openai
+  api_key: dummy-key-not-used
+  description: Mock LLM server for testing (local)
+  model_name: gpt-4o-mini
+  model_output: json
+  model_params:
+    max_tokens: 2000
+    temperature: 0.2
+  model_provider: openai
+  model_type: llm
+  model_url: http://host.docker.internal:11435
+  name: mock-llm
+- api_family: openai
+  api_key: dummy-key-not-used
+  description: Mock embedding server for testing (local)
+  embedding_dimensions: 1536
+  model_name: text-embedding-3-small
+  model_output: vector
+  model_provider: openai
+  model_type: embedding
+  model_url: http://host.docker.internal:11435
+  name: mock-embed
+- api_family: qdrant
+  description: Qdrant vector database (local)
+  model_params:
+    collection_name: omi_memories
+    host: ${oc.env:QDRANT_BASE_URL,qdrant}
+    port: ${oc.env:QDRANT_PORT,6333}
+  model_provider: qdrant
+  model_type: vector_store
+  model_url: http://${oc.env:QDRANT_BASE_URL,qdrant}:${oc.env:QDRANT_PORT,6333}
+  name: vs-qdrant
+- api_family: mock
+  api_key: mock-key-not-used
+  description: Mock STT for testing (batch)
+  model_provider: mock
+  model_type: stt
+  model_url: http://localhost:9999
+  name: mock-stt
+  operations:
+    stt_transcribe:
+      headers:
+        Content-Type: audio/raw
+      method: POST
+      path: /transcribe
+      response:
+        extract:
+          text: text
+          words: words
+          segments: segments
+        type: json
+- api_family: mock
+  api_key: mock-key-not-used
+  description: Mock STT for testing (streaming)
+  model_provider: mock
+  model_type: stt_stream
+  model_url: ws://host.docker.internal:9999
+  name: mock-stt-stream
+  operations:
+    chunk_header:
+      message: {}
+    end:
+      message:
+        type: CloseStream
+    expect:
+      extract:
+        text: channel.alternatives[0].transcript
+        words: channel.alternatives[0].words
+        segments: []
+      final_type: Results
+      interim_type: Results
+    start:
+      message: {}
+backend:
+  audio:
+    # Enable always_persist for testing - creates placeholder conversations immediately
+    always_persist_enabled: true
+  transcription:
+    use_provider_segments: true
+speaker_recognition:
+  enabled: false
+  timeout: 60
diff --git a/tests/configs/mock-transcription-failure.yml b/tests/configs/mock-transcription-failure.yml
new file mode 100644
index 00000000..f9b03575
--- /dev/null
+++ b/tests/configs/mock-transcription-failure.yml
@@ -0,0 +1,111 @@
+chat:
+  system_prompt: You are a helpful AI assistant with access to the user's personal
+    memories and conversation history.
+defaults:
+  embedding: mock-embed
+  llm: mock-llm
+  stt: stt-deepgram-invalid
+  stt_stream: stt-deepgram-stream-invalid
+  vector_store: vs-qdrant
+memory:
+  extraction:
+    enabled: false
+  provider: chronicle
+  timeout_seconds: 1200
+models:
+- api_family: openai
+  api_key: dummy-key-not-used
+  description: Dummy LLM for testing (not called)
+  model_name: gpt-4o-mini
+  model_output: json
+  model_params:
+    max_tokens: 2000
+    temperature: 0.2
+  model_provider: openai
+  model_type: llm
+  model_url: https://api.openai.com/v1
+  name: mock-llm
+- api_family: openai
+  api_key: dummy-key-not-used
+  description: Dummy embeddings for testing (not called)
+  embedding_dimensions: 384
+  model_name: text-embedding-3-small
+  model_output: vector
+  model_provider: openai
+  model_type: embedding
+  model_url: https://api.openai.com/v1
+  name: mock-embed
+- api_family: qdrant
+  description: Qdrant vector database (local)
+  model_params:
+    collection_name: omi_memories
+    host: ${oc.env:QDRANT_BASE_URL,qdrant}
+    port: ${oc.env:QDRANT_PORT,6333}
+  model_provider: qdrant
+  model_type: vector_store
+  model_url: http://${oc.env:QDRANT_BASE_URL,qdrant}:${oc.env:QDRANT_PORT,6333}
+  name: vs-qdrant
+# Deepgram with invalid API key to trigger transcription failures
+- api_family: http
+  api_key: invalid-key-for-testing
+  description: Deepgram Nova 3 with invalid key (for testing transcription failure)
+  model_provider: deepgram
+  model_type: stt
+  model_url: https://api.deepgram.com/v1
+  name: stt-deepgram-invalid
+  operations:
+    stt_transcribe:
+      headers:
+        Authorization: Token invalid-key-for-testing
+        Content-Type: audio/raw
+      method: POST
+      path: /listen
+      query:
+        channels: '1'
+        diarize: 'true'
+        encoding: linear16
+        language: multi
+        model: nova-3
+        punctuate: 'true'
+        sample_rate: 16000
+        smart_format: 'true'
+      response:
+        extract:
+          segments: results.channels[0].alternatives[0].paragraphs.paragraphs
+          text: results.channels[0].alternatives[0].transcript
+          words: results.channels[0].alternatives[0].words
+        type: json
+- api_family: websocket
+  api_key: invalid-key-for-testing
+  description: Deepgram Nova 3 streaming with invalid key (for testing transcription failure)
+  model_provider: deepgram
+  model_type: stt_stream
+  model_url: wss://api.deepgram.com/v1/listen
+  name: stt-deepgram-stream-invalid
+  operations:
+    chunk_header:
+      message: {}
+    end:
+      message:
+        type: CloseStream
+    expect:
+      extract:
+        segments: []
+        text: channel.alternatives[0].transcript
+        words: channel.alternatives[0].words
+      final_type: Results
+      interim_type: Results
+    query:
+      channels: 1
+      encoding: linear16
+      interim_results: true
+      language: multi
+      model: nova-3
+      punctuate: true
+      sample_rate: 16000
+      smart_format: true
+    start:
+      message: {}
+speaker_recognition:
+  enabled: false
+  timeout: 60
diff --git a/tests/configs/parakeet-ollama.yml b/tests/configs/parakeet-ollama.yml
index a4ef958d..99dd7362 100644
--- a/tests/configs/parakeet-ollama.yml
+++ b/tests/configs/parakeet-ollama.yml
@@ -15,7 +15,7 @@ models:
     api_family: openai
     model_name: llama3.1:latest
     model_url: http://localhost:11434/v1
-    api_key: ${OPENAI_API_KEY:-ollama}
+    api_key: ${oc.env:OPENAI_API_KEY,ollama}
     model_params:
       temperature: 0.2
       max_tokens: 2000
@@ -28,7 +28,7 @@ models:
     api_family: openai
     model_name: nomic-embed-text:latest
     model_url: http://localhost:11434/v1
-    api_key: ${OPENAI_API_KEY:-ollama}
+    api_key: ${oc.env:OPENAI_API_KEY,ollama}
     embedding_dimensions: 768
     model_output: vector
 
@@ -37,10 +37,10 @@ models:
     model_type: vector_store
     model_provider: qdrant
     api_family: qdrant
-    model_url: http://${QDRANT_BASE_URL:-qdrant}:${QDRANT_PORT:-6333}
+    model_url: http://${oc.env:QDRANT_BASE_URL,qdrant}:${oc.env:QDRANT_PORT,6333}
     model_params:
-      host: ${QDRANT_BASE_URL:-qdrant}
-      port: ${QDRANT_PORT:-6333}
+      host: ${oc.env:QDRANT_BASE_URL,qdrant}
+      port: ${oc.env:QDRANT_PORT,6333}
       collection_name: omi_memories
 
   - name: stt-parakeet-batch
@@ -48,7 +48,7 @@ models:
     model_type: stt
     model_provider: parakeet
     api_family: http
-    model_url: ${PARAKEET_ASR_URL:-http://localhost:8767}
+    model_url: ${oc.env:PARAKEET_ASR_URL,http://localhost:8767}
     api_key: ''
     operations:
       stt_transcribe:
diff --git a/tests/configs/parakeet-openai.yml b/tests/configs/parakeet-openai.yml
index f3147c33..c0d7b40a 100644
--- a/tests/configs/parakeet-openai.yml
+++ b/tests/configs/parakeet-openai.yml
@@ -15,7 +15,7 @@ models:
     api_family: openai
     model_name: gpt-4o-mini
     model_url: https://api.openai.com/v1
-    api_key: ${OPENAI_API_KEY:-}
+    api_key: ${oc.env:OPENAI_API_KEY,}
     model_params:
       temperature: 0.2
       max_tokens: 2000
@@ -28,7 +28,7 @@ models:
     api_family: openai
     model_name: text-embedding-3-small
     model_url: https://api.openai.com/v1
-    api_key: ${OPENAI_API_KEY:-}
+    api_key: ${oc.env:OPENAI_API_KEY,}
     embedding_dimensions: 1536
     model_output: vector
 
@@ -37,10 +37,10 @@ models:
     model_type: vector_store
     model_provider: qdrant
     api_family: qdrant
-    model_url: http://${QDRANT_BASE_URL:-qdrant}:${QDRANT_PORT:-6333}
+    model_url: http://${oc.env:QDRANT_BASE_URL,qdrant}:${oc.env:QDRANT_PORT,6333}
     model_params:
-      host: ${QDRANT_BASE_URL:-qdrant}
-      port: ${QDRANT_PORT:-6333}
+      host: ${oc.env:QDRANT_BASE_URL,qdrant}
+      port: ${oc.env:QDRANT_PORT,6333}
       collection_name: omi_memories
 
   - name: stt-parakeet-batch
@@ -48,7 +48,7 @@ models:
     model_type: stt
     model_provider: parakeet
     api_family: http
-    model_url: ${PARAKEET_ASR_URL:-http://localhost:8767}
+    model_url: ${oc.env:PARAKEET_ASR_URL,http://localhost:8767}
     api_key: ''
     operations:
       stt_transcribe:
diff --git a/tests/endpoints/audio_upload_tests.robot b/tests/endpoints/audio_upload_tests.robot
index aca5ca4f..c4e8fc21 100644
--- a/tests/endpoints/audio_upload_tests.robot
+++ b/tests/endpoints/audio_upload_tests.robot
@@ -21,7 +21,7 @@ Suite Teardown      Suite Teardown
 
 Test Setup       Test Cleanup
 
-Test Tags           audio-upload
+Test Tags           audio-upload	requires-api-keys
 
 
 *** Test Cases ***
@@ -40,23 +40,16 @@ Single Audio File Upload Test
 
     # Verify conversation structure
     Dictionary Should Contain Key    ${conversation}    conversation_id
-    Dictionary Should Contain Key    ${conversation}    audio_uuid
-    Dictionary Should Contain Key    ${conversation}    audio_path
     Dictionary Should Contain Key    ${conversation}    transcript
     Dictionary Should Contain Key    ${conversation}    segments
 
-    # Verify audio_path is set (should be just filename, no folder prefix)
-    Should Not Be Empty    ${conversation}[audio_path]
-    Should Not Contain    ${conversation}[audio_path]    /    msg=audio_path should be relative, not absolute
-    Should Contain    ${conversation}[audio_path]    .wav    msg=audio_path should contain .wav extension
-
     # Verify transcript was generated
     ${transcript}=    Set Variable    ${conversation}[transcript]
     ${transcript_length}=    Get Length    ${transcript}
     Should Be True    ${transcript_length} > 100    msg=Transcript too short: ${transcript_length} chars
 
     Log To Console    ✅ Uploaded audio file
-    Log To Console    📁 Audio path: ${conversation}[audio_path]
+    Log To Console    💾 Storage: MongoDB chunks
     Log To Console    📝 Transcript: ${transcript_length} characters
     Log To Console    🆔 Conversation ID: ${conversation}[conversation_id]
 
@@ -65,24 +58,21 @@ Audio File Upload With Fixtures Folder Test
     [Documentation]    Test uploading audio file to fixtures subfolder
     ...
     ...                Verifies:
-    ...                - File is stored in fixtures/ subfolder
-    ...                - audio_path includes folder prefix
+    ...                - Folder parameter is accepted (for backward compatibility)
+    ...                - Audio is stored in MongoDB chunks
     ...                - Conversation is created correctly
     [Tags]    audio-upload
 
     # Upload audio file to fixtures folder
     ${conversation}=    Upload Audio File    ${TEST_AUDIO_FILE}    device_name=fixture-upload    folder=fixtures
 
-    # Verify audio_path includes fixtures/ prefix
-    Should Start With    ${conversation}[audio_path]    fixtures/    msg=audio_path should start with 'fixtures/'
-    Should Contain    ${conversation}[audio_path]    .wav    msg=audio_path should contain .wav extension
-
+    # audio_path is legacy field (None for MongoDB storage)
     # Verify conversation was created
     Dictionary Should Contain Key    ${conversation}    conversation_id
     Dictionary Should Contain Key    ${conversation}    transcript
 
-    Log To Console    ✅ Uploaded audio file to fixtures folder
-    Log To Console    📁 Audio path: ${conversation}[audio_path]
+    Log To Console    ✅ Uploaded audio file with folder parameter
+    Log To Console    💾 Storage: MongoDB chunks (folder param backward compatible)
     Log To Console    🆔 Conversation ID: ${conversation}[conversation_id]
 
 
@@ -120,7 +110,7 @@ Multiple Audio Files Upload Test
     # Verify summary
     Dictionary Should Contain Key    ${upload_response}    summary
     Should Be Equal As Integers    ${upload_response}[summary][total]    2    msg=Expected 2 files uploaded
-    Should Be Equal As Integers    ${upload_response}[summary][processing]    2    msg=Expected 2 files processing
+    Should Be Equal As Integers    ${upload_response}[summary][started]    2    msg=Expected 2 files started
 
     # Verify both files are in response
     ${files}=    Set Variable    ${upload_response}[files]
@@ -130,8 +120,8 @@ Multiple Audio Files Upload Test
     # Wait for both transcriptions to complete
     FOR    ${file}    IN    @{files}
         ${transcript_job_id}=    Set Variable    ${file}[transcript_job_id]
-        Wait Until Keyword Succeeds    60s    5s    Check Job Status    ${transcript_job_id}    completed
-        Log To Console    ✅ File ${file}[filename] transcription completed
+        Wait Until Keyword Succeeds    60s    5s    Check Job Status    ${transcript_job_id}    finished
+        Log To Console    ✅ File ${file}[filename] transcription finished
     END
 
     Log To Console    ✅ Uploaded and processed ${file_count} audio files
@@ -167,7 +157,7 @@ Invalid File Upload Test
 
     # Verify file was rejected
     Should Be Equal As Integers    ${upload_response}[summary][failed]    1    msg=Expected 1 file to fail
-    Should Be Equal As Integers    ${upload_response}[summary][processing]    0    msg=Expected 0 files processing
+    Should Be Equal As Integers    ${upload_response}[summary][started]    0    msg=Expected 0 files started
 
     # Verify error message mentions WAV files
     ${error_msg}=    Set Variable    ${upload_response}[files][0][error]
@@ -198,17 +188,27 @@ Audio Upload Client ID Generation Test
     Should Contain    ${client_id1}    ${device_name}    msg=Client ID should contain device name
     Should Match Regexp    ${client_id1}    ^[a-f0-9]{6}-${device_name}$    msg=Client ID should match format
 
-    # Verify conversation_id is in all job metadata (transcription, speaker, memory jobs)
+    # Verify conversation_id is in job metadata for all created jobs
+    # Note: Speaker job is only created if speaker recognition is enabled in config
+
+    # 1. Transcription job (always created)
     ${transcribe_job}=    Get Job Details    transcribe_${conversation_id1[:12]}
     ${transcribe_meta}=    Set Variable    ${transcribe_job}[meta]
     Dictionary Should Contain Key    ${transcribe_meta}    conversation_id    msg=Transcription job should have conversation_id in meta
     Should Be Equal    ${transcribe_meta}[conversation_id]    ${conversation_id1}    msg=Transcription job meta conversation_id should match
 
+    # 2. Speaker job (conditional - only if speaker recognition enabled)
     ${speaker_job}=    Get Job Details    speaker_${conversation_id1[:12]}
-    ${speaker_meta}=    Set Variable    ${speaker_job}[meta]
-    Dictionary Should Contain Key    ${speaker_meta}    conversation_id    msg=Speaker job should have conversation_id in meta
-    Should Be Equal    ${speaker_meta}[conversation_id]    ${conversation_id1}    msg=Speaker job meta conversation_id should match
+    IF    ${speaker_job} != ${None}
+        ${speaker_meta}=    Set Variable    ${speaker_job}[meta]
+        Dictionary Should Contain Key    ${speaker_meta}    conversation_id    msg=Speaker job should have conversation_id in meta
+        Should Be Equal    ${speaker_meta}[conversation_id]    ${conversation_id1}    msg=Speaker job meta conversation_id should match
+        Log To Console    ✅ Speaker job metadata verified
+    ELSE
+        Log To Console    Speaker recognition disabled - skipping speaker job check
+    END
 
+    # 3. Memory job (always created if memory extraction enabled)
     ${memory_job}=    Get Job Details    memory_${conversation_id1[:12]}
     ${memory_meta}=    Set Variable    ${memory_job}[meta]
     Dictionary Should Contain Key    ${memory_meta}    conversation_id    msg=Memory job should have conversation_id in meta
@@ -223,7 +223,7 @@ Audio Upload Client ID Generation Test
 
     Log To Console    ✅ Client ID generation verified
     Log To Console    🆔 Client ID: ${client_id1}
-    Log To Console    ✅ conversation_id in job metadata verified
+    Log To Console    ✅ conversation_id in job metadata verified (transcription + memory jobs)
 
 
 Audio Upload Job Tracking Test
@@ -253,5 +253,5 @@ Audio Upload Job Tracking Test
     Should Be True    ${segment_count} > 0    msg=Should have at least one segment
 
     Log To Console    ✅ Job chain verified
-    Log To Console    📝 Transcription: completed
+    Log To Console    📝 Transcription: finished
     Log To Console    💬 Segments: ${segment_count}
diff --git a/tests/endpoints/client_queue_tests.robot b/tests/endpoints/client_queue_tests.robot
index dd8016da..b161b7fa 100644
--- a/tests/endpoints/client_queue_tests.robot
+++ b/tests/endpoints/client_queue_tests.robot
@@ -88,8 +88,8 @@ Get Queue Statistics Test
 
     ${stats}=      Set Variable    ${response.json()}
     Dictionary Should Contain Key    ${stats}    queued_jobs
-    Dictionary Should Contain Key    ${stats}    processing_jobs
-    Dictionary Should Contain Key    ${stats}    completed_jobs
+    Dictionary Should Contain Key    ${stats}    started_jobs
+    Dictionary Should Contain Key    ${stats}    finished_jobs
     Dictionary Should Contain Key    ${stats}    failed_jobs
 
 
diff --git a/tests/endpoints/conversation_tests.robot b/tests/endpoints/conversation_tests.robot
index 3303a6a9..b1a15938 100644
--- a/tests/endpoints/conversation_tests.robot
+++ b/tests/endpoints/conversation_tests.robot
@@ -29,7 +29,6 @@ Get User Conversations Test
             FOR    ${conversation}    IN    @{client_conversations}
                 # Verify conversation structure
                 Dictionary Should Contain Key    ${conversation}    conversation_id
-                Dictionary Should Contain Key    ${conversation}    audio_uuid
                 Dictionary Should Contain Key    ${conversation}    created_at
             END
         END
@@ -46,7 +45,6 @@ Get Conversation By ID Test
 
     # Verify conversation structure
     Dictionary Should Contain Key    ${conversation}    conversation_id
-    Dictionary Should Contain Key    ${conversation}    audio_uuid
     Dictionary Should Contain Key    ${conversation}    created_at
     Should Be Equal    ${conversation}[conversation_id]    ${conversation_id}
 
@@ -61,7 +59,7 @@ Reprocess test and get Conversation Versions Test
 
     # Wait for the reprocess job to complete before getting versions
     ${job_id}=    Set Variable    ${reprocess}[job_id]
-    Wait For Job Status    ${job_id}    completed    timeout=120s    interval=5s
+    Wait For Job Status    ${job_id}    finished    timeout=120s    interval=5s
 
     ${conversation}=           Get Conversation By ID       ${conversation_id}
     ${updated_versions}=           Get Conversation Versions     ${conversation_id}
@@ -113,7 +111,7 @@ Reprocess Memory Test
 
     # Wait for job to complete
     ${job_id}=    Set Variable    ${response}[job_id]
-    Wait For Job Status    ${job_id}    completed    timeout=60s    interval=5s
+    Wait For Job Status    ${job_id}    finished    timeout=60s    interval=5s
 
     # Verify new memory version was created
     ${updated_conversation}=    Get Conversation By ID    ${conversation_id}
@@ -173,7 +171,7 @@ Transcript Version activate Test
         ${reprocess}=    Reprocess Transcript     ${conversation_id}
         # Wait for the reprocess job to complete before getting versions
         ${job_id}=    Set Variable    ${reprocess}[job_id]
-        Wait For Job Status    ${job_id}    completed    timeout=120s    interval=5s
+        Wait For Job Status    ${job_id}    finished    timeout=120s    interval=5s
     END
 
     # Get fresh version list after reprocessing
diff --git a/tests/endpoints/health_tests.robot b/tests/endpoints/health_tests.robot
index b0eaa5e8..dc734496 100644
--- a/tests/endpoints/health_tests.robot
+++ b/tests/endpoints/health_tests.robot
@@ -57,7 +57,6 @@ Health Check Test
     Dictionary Should Contain Key    ${config}    chunk_dir
     Dictionary Should Contain Key    ${config}    active_clients
     Dictionary Should Contain Key    ${config}    new_conversation_timeout_minutes
-    Dictionary Should Contain Key    ${config}    audio_cropping_enabled
     Dictionary Should Contain Key    ${config}    llm_provider
     Dictionary Should Contain Key    ${config}    llm_model
     Dictionary Should Contain Key    ${config}    llm_base_url
@@ -71,7 +70,6 @@ Health Check Test
     Should Not Be Empty    ${config}[chunk_dir]
     Should Be True        isinstance(${config}[active_clients], int)
     Should Be True        ${config}[new_conversation_timeout_minutes] > 0
-    Should Be True        isinstance(${config}[audio_cropping_enabled], bool)
     Should Not Be Empty    ${config}[llm_provider]
     Should Not Be Empty    ${config}[llm_model]
     Should Not Be Empty    ${config}[llm_base_url]
@@ -153,8 +151,8 @@ Queue Stats Test
     ${stats}=          Set Variable    ${response.json()}
     Dictionary Should Contain Key    ${stats}    total_jobs
     Dictionary Should Contain Key    ${stats}    queued_jobs
-    Dictionary Should Contain Key    ${stats}    processing_jobs
-    Dictionary Should Contain Key    ${stats}    completed_jobs
+    Dictionary Should Contain Key    ${stats}    started_jobs
+    Dictionary Should Contain Key    ${stats}    finished_jobs
     Dictionary Should Contain Key    ${stats}    failed_jobs
     Dictionary Should Contain Key    ${stats}    timestamp
 
diff --git a/tests/endpoints/memory_tests.robot b/tests/endpoints/memory_tests.robot
index 0672a2bd..b12a4ff6 100644
--- a/tests/endpoints/memory_tests.robot
+++ b/tests/endpoints/memory_tests.robot
@@ -15,7 +15,7 @@ Test Setup       Test Cleanup
 
 Get User Memories Test
     [Documentation]    Test getting memories for authenticated user and verify trumpet flower memory exists if memories are present
-    [Tags]    memory	permissions
+    [Tags]    memory	permissions	requires-api-keys
 
     ${response}=       GET On Session    api    /api/memories
 
@@ -36,12 +36,25 @@ Get User Memories Test
             Dictionary Should Contain Key    ${memory}    id
             Dictionary Should Contain Key    ${memory}    memory
             Dictionary Should Contain Key    ${memory}    created_at
+            Dictionary Should Contain Key    ${memory}    updated_at
             Dictionary Should Contain Key    ${metadata}    source
             Dictionary Should Contain Key    ${metadata}    client_id
             Dictionary Should Contain Key    ${metadata}    source_id
             Dictionary Should Contain Key    ${metadata}    user_id
             Dictionary Should Contain Key    ${metadata}    user_email
 
+            # Verify timestamps are valid (not "Invalid Date", not empty)
+            Should Not Be Equal    ${memory}[created_at]    ${EMPTY}    created_at should not be empty
+            Should Not Be Equal    ${memory}[updated_at]    ${EMPTY}    updated_at should not be empty
+            Should Not Be Equal    ${memory}[created_at]    Invalid Date    created_at should not be "Invalid Date"
+            Should Not Be Equal    ${memory}[updated_at]    Invalid Date    updated_at should not be "Invalid Date"
+
+            # Verify timestamps are numeric strings (Unix timestamps)
+            ${created_timestamp}=    Convert To Integer    ${memory}[created_at]
+            ${updated_timestamp}=    Convert To Integer    ${memory}[updated_at]
+            Should Be True    ${created_timestamp} > 0    created_at should be a positive timestamp
+            Should Be True    ${updated_timestamp} > 0    updated_at should be a positive timestamp
+
             # Check if memory contains "trumpet flower"
             ${memory_text}=    Convert To String    ${memory}[memory]
             ${contains}=    Run Keyword And Return Status    Should Contain    ${memory_text}    trumpet flower    ignore_case=True
@@ -59,7 +72,7 @@ Get User Memories Test
 
 Search Memories Test
     [Documentation]    Test searching memories by query and verify trumpet flower memory exists
-    [Tags]    memory
+    [Tags]    memory	requires-api-keys
 
     &{params}=         Create Dictionary    query=trumpet flower    limit=20    score_threshold=0.4
     ${response}=       GET On Session    api    /api/memories/search    params=${params}
@@ -92,7 +105,7 @@ Search Memories Test
 
 Memory Pagination Test
     [Documentation]    Test memory pagination with different limits
-    [Tags]    memory
+    [Tags]    memory	requires-api-keys
 
     # Test with small limit
     &{params1}=    Create Dictionary    limit=5
diff --git a/tests/endpoints/plugin_tests.robot b/tests/endpoints/plugin_tests.robot
new file mode 100644
index 00000000..2ba58aa0
--- /dev/null
+++ b/tests/endpoints/plugin_tests.robot
@@ -0,0 +1,148 @@
+*** Settings ***
+Documentation    Plugin Event System Tests
+...
+...              Tests the event-based plugin architecture:
+...              - Plugin configuration with event events
+...              - Event dispatch to subscribed plugins
+...              - Wake word filtering
+...              - Multiple event events
+Library          RequestsLibrary
+Library          Collections
+Library          String
+Library          OperatingSystem
+Resource         ../setup/setup_keywords.robot
+Resource         ../setup/teardown_keywords.robot
+Resource         ../resources/user_keywords.robot
+Resource         ../resources/conversation_keywords.robot
+Resource         ../resources/audio_keywords.robot
+Resource         ../resources/plugin_keywords.robot
+Suite Setup      Suite Setup
+Suite Teardown   Suite Teardown
+Test Setup       Test Cleanup
+
+*** Test Cases ***
+
+Plugin Config Uses Event Subscriptions
+    [Documentation]    Verify plugin configuration uses new event-based format
+    [Tags]    infra
+
+    # Verify HomeAssistant plugin config follows new format
+    Verify HA Plugin Uses Events
+
+Plugin Mock Config Creation
+    [Documentation]    Test creating mock plugin configurations
+    [Tags]    infra
+
+    # Test single event subscription
+    ${single_subscription}=    Create List    transcript.streaming
+    ${config}=    Create Mock Plugin Config
+    ...    events=${single_subscription}
+    Verify Plugin Config Format    ${config}
+
+    ${events}=    Get From Dictionary    ${config}    events
+    Should Contain    ${events}    transcript.streaming
+    ...    msg=Plugin should subscribe to transcript.streaming event
+
+    # Test multiple event events
+    ${events_list}=    Create List    transcript.streaming    transcript.batch    conversation.complete
+    ${multi_config}=    Create Mock Plugin Config
+    ...    events=${events_list}
+    ${multi_subs}=    Get From Dictionary    ${multi_config}    events
+    ${length}=    Get Length    ${multi_subs}
+    Should Be Equal As Integers    ${length}    3
+    ...    msg=Plugin should subscribe to 3 events
+
+Plugin Mock With Wake Word Trigger
+    [Documentation]    Test creating plugin with wake word condition
+    [Tags]    infra
+
+    ${wake_words}=    Create List    hey vivi    vivi    hey jarvis
+    ${wake_word_events}=    Create List    transcript.streaming
+    ${config}=    Create Mock Plugin Config
+    ...    events=${wake_word_events}
+    ...    condition_type=wake_word
+    ...    wake_words=${wake_words}
+
+    # Verify condition configuration
+    ${condition}=    Get From Dictionary    ${config}    condition
+    Dictionary Should Contain Key    ${condition}    type
+    Dictionary Should Contain Key    ${condition}    wake_words
+
+    ${condition_type}=    Get From Dictionary    ${condition}    type
+    Should Be Equal    ${condition_type}    wake_word
+
+    ${configured_wake_words}=    Get From Dictionary    ${condition}    wake_words
+    Lists Should Be Equal    ${configured_wake_words}    ${wake_words}
+
+Event Name Format Validation
+    [Documentation]    Verify event names follow hierarchical naming convention
+    [Tags]    infra
+
+    # Valid event names
+    Verify Event Name Format    transcript.streaming
+    Verify Event Name Format    transcript.batch
+    Verify Event Name Format    conversation.complete
+    Verify Event Name Format    memory.processed
+
+Event Subscription Matching
+    [Documentation]    Test event matching against events
+    [Tags]    infra
+
+    # Exact matching (no wildcards in simple version)
+    Verify Event Matches Subscription    transcript.streaming    transcript.streaming
+    Verify Event Matches Subscription    transcript.batch    transcript.batch
+    Verify Event Matches Subscription    conversation.complete    conversation.complete
+
+Batch Transcription Should Trigger Batch Event
+    [Documentation]    Verify batch transcription conditions transcript.batch event
+    [Tags]    audio-upload	requires-api-keys
+
+    # Upload audio file for batch started
+    ${result}=    Upload Single Audio File
+
+    # Skip test if audio file not available
+    Skip If    ${result}[successful] == 0    Test audio file not available
+
+    # Verify started finished
+    Should Be True    ${result}[successful] > 0
+    ...    msg=At least one file should be processed successfully
+
+    # Note: We can't directly verify event dispatch without plugin instrumentation
+    # This test validates the upload pathway that conditions transcript.batch
+    # Integration with real plugin would verify actual event dispatch
+
+Streaming Transcription Should Trigger Streaming Event
+    [Documentation]    Verify streaming transcription conditions transcript.streaming event
+    [Tags]    audio-streaming	requires-api-keys
+
+    # Note: This would require WebSocket streaming test infrastructure
+    # The event dispatch happens in deepgram_stream_consumer.py:309
+    # Real test would:
+    # 1. Connect WebSocket with test audio
+    # 2. Stream audio data
+    # 3. Verify transcript.streaming event dispatched
+    # 4. Verify subscribed plugins conditioned
+
+    # For now, we verify the config is set up correctly
+    Verify HA Plugin Uses Events
+
+*** Keywords ***
+Upload Single Audio File
+    [Documentation]    Upload a single test audio file for batch started
+
+    # Get test audio file path
+    ${test_audio}=    Set Variable    ${CURDIR}/../../extras/test-audios/short-test.wav
+
+    # Create fallback if test audio doesn't exist
+    ${file_exists}=    Run Keyword And Return Status    File Should Exist    ${test_audio}
+    IF    not ${file_exists}
+        Log    Test audio file not found, test will skip actual upload
+        ${result}=    Create Dictionary    successful=0    message=Test audio not available
+        RETURN    ${result}
+    END
+
+    # Upload file for started
+    # Note: This requires authenticated session and proper endpoint
+    # Implementation depends on your audio upload endpoint
+    ${result}=    Create Dictionary    successful=1    message=Upload simulation
+    RETURN    ${result}
diff --git a/tests/endpoints/rq_queue_tests.robot b/tests/endpoints/rq_queue_tests.robot
index 5206b5ff..cbd58d96 100644
--- a/tests/endpoints/rq_queue_tests.robot
+++ b/tests/endpoints/rq_queue_tests.robot
@@ -24,19 +24,17 @@ ${COMPOSE_FILE}             docker-compose-test.yml
 *** Keywords ***
 
 Restart Backend Service
+    [Arguments]    ${wait_timeout}=20s
     [Documentation]    Restart the backend service to test persistence
     Log    Restarting backend service to test job persistence
 
-    # Stop backend container
-    Run Process    docker    compose    -f    ${COMPOSE_FILE}    stop    chronicle-backend-test
-    ...    cwd=${BACKEND_DIR}    timeout=30s
+    # Restart backend container (handles dependencies properly)
+    # Project name 'backend-test' is set in docker-compose-test.yml
+    Run Process    docker    compose    -f    ${COMPOSE_FILE}    restart    chronicle-backend-test
+    ...    cwd=${BACKEND_DIR}    timeout=60s    shell=True
 
-    # Start backend container again
-    Run Process    docker    compose    -f    ${COMPOSE_FILE}    start    chronicle-backend-test
-    ...    cwd=${BACKEND_DIR}    timeout=60s
-
-    # Wait for backend to be ready again
-    Wait Until Keyword Succeeds    ${TEST_TIMEOUT}    5s
+    # Wait for backend to be ready again (configurable timeout for slow tests)
+    Wait Until Keyword Succeeds    ${wait_timeout}    5s
     ...    Health Check    ${API_URL}
 
     Log    Backend service restarted successfully
@@ -67,7 +65,8 @@ Test RQ Job Enqueuing
 
 Test Job Persistence Through Backend Restart
     [Documentation]    Test that RQ jobs persist when backend service restarts
-    [Tags]    queue
+    [Tags]    queue	slow
+    [Timeout]    120s
 
     # Find test conversation
     ${conversation}=    Find Test Conversation
@@ -80,8 +79,8 @@ Test Job Persistence Through Backend Restart
         ${jobs_before}=    Get job queue
         ${jobs_count_before}=    Get Length    ${jobs_before}
 
-        # Restart backend service
-        Restart Backend Service
+        # Restart backend service with longer timeout for slow test
+        Restart Backend Service    wait_timeout=90s
 
         # Verify queue is still accessible and jobs persist
         ${jobs_after}=    Get job queue
@@ -97,7 +96,8 @@ Test Job Persistence Through Backend Restart
 
 Test Multiple Jobs Persistence
     [Documentation]    Test that specific jobs persist through backend restart
-    [Tags]    queue
+    [Tags]    queue	slow
+    [Timeout]    120s
 
     # Find Test Conversation now returns the oldest conversation (most stable)
     ${conversation}=    Find Test Conversation
@@ -121,8 +121,8 @@ Test Multiple Jobs Persistence
         Log    Job ${job_id} status before restart: ${job_status}
     END
 
-    # Restart backend
-    Restart Backend Service
+    # Restart backend with longer timeout for slow test
+    Restart Backend Service    wait_timeout=90s
 
     # Verify our specific jobs still exist after restart
     ${persisted_count}=    Set Variable    ${0}
@@ -134,7 +134,7 @@ Test Multiple Jobs Persistence
         END
     END
 
-    # At least some jobs should persist (they may have completed during restart)
+    # At least some jobs should persist (they may have finished during restart)
     Should Be True    ${persisted_count} >= 0
     Log    ${persisted_count} out of ${job_count} jobs persisted through restart
 
@@ -159,25 +159,25 @@ Test Queue Stats Accuracy
 
     # Verify stats API returns valid structure
     ${stats}=    Get Queue Stats
-    Dictionary Should Contain Key    ${stats}    processing_jobs
+    Dictionary Should Contain Key    ${stats}    started_jobs
     Dictionary Should Contain Key    ${stats}    queued_jobs
-    Dictionary Should Contain Key    ${stats}    completed_jobs
+    Dictionary Should Contain Key    ${stats}    finished_jobs
     Dictionary Should Contain Key    ${stats}    failed_jobs
 
     # Verify all stats are non-negative integers
-    Should Be True    ${stats}[processing_jobs] >= 0
+    Should Be True    ${stats}[started_jobs] >= 0
     Should Be True    ${stats}[queued_jobs] >= 0
-    Should Be True    ${stats}[completed_jobs] >= 0
+    Should Be True    ${stats}[finished_jobs] >= 0
     Should Be True    ${stats}[failed_jobs] >= 0
 
     Log    Queue stats API is working correctly: ${stats}
 
     # Wait for OUR specific jobs to complete (don't rely on global counts)
     FOR    ${job_id}    IN    @{created_jobs}
-        Wait For Job Status    ${job_id}    completed    timeout=60s    interval=2s
+        Wait For Job Status    ${job_id}    finished    timeout=60s    interval=2s
     END
 
-    Log    All ${job_count} created jobs completed successfully
+    Log    All ${job_count} created jobs finished successfully
 
 Test Queue API Authentication
     [Documentation]    Test that queue endpoints properly enforce authentication
diff --git a/tests/endpoints/system_admin_tests.robot b/tests/endpoints/system_admin_tests.robot
index 5e4b9d3e..eb33c039 100644
--- a/tests/endpoints/system_admin_tests.robot
+++ b/tests/endpoints/system_admin_tests.robot
@@ -140,7 +140,7 @@ Reload Memory Config Test
 
 Delete All User Memories Test
     [Documentation]    Test deleting all memories for current user
-    [Tags]    infra	memory	permissions
+    [Tags]    infra	memory	permissions	requires-api-keys
 
     ${response}=       DELETE On Session    api   /api/admin/memory/delete-all
     Should Be Equal As Integers    ${response.status_code}    200
@@ -153,6 +153,15 @@ Get Chat Configuration Test
     [Documentation]    Test getting chat system prompt (admin only)
     [Tags]    infra	permissions
 
+    # First ensure default prompt is set (cleanup from previous test runs)
+    ${default_prompt}=    Set Variable    You are a helpful AI assistant with access to the user's personal memories and conversation history.
+    &{headers}=        Create Dictionary    Content-Type=text/plain
+    ${response}=       POST On Session    api    /api/admin/chat/config
+    ...                data=${default_prompt}
+    ...                headers=${headers}
+    Should Be Equal As Integers    ${response.status_code}    200
+
+    # Now test getting the default prompt
     ${response}=       GET On Session    api    /api/admin/chat/config
     Should Be Equal As Integers    ${response.status_code}    200
 
@@ -190,6 +199,9 @@ Save And Retrieve Chat Configuration Test
     [Documentation]    Test saving and retrieving chat configuration
     [Tags]    infra	permissions
 
+    # Define known default prompt for restoration (from system_controller.py and chat_service.py)
+    ${default_prompt}=    Set Variable    You are a helpful AI assistant with access to the user's personal memories and conversation history.
+
     # Save custom prompt
     ${custom_prompt}=  Set Variable    You are a specialized AI assistant for technical support and troubleshooting.
     &{headers}=        Create Dictionary    Content-Type=text/plain
@@ -206,6 +218,12 @@ Save And Retrieve Chat Configuration Test
     ${retrieved}=      Set Variable    ${response.text}
     Should Be Equal    ${retrieved}    ${custom_prompt}    msg=Retrieved prompt should match saved prompt
 
+    # Restore default prompt to avoid test interference
+    ${response}=       POST On Session    api    /api/admin/chat/config
+    ...                data=${default_prompt}
+    ...                headers=${headers}
+    Should Be Equal As Integers    ${response.status_code}    200
+
 
 Non-Admin Cannot Access Admin Endpoints Test
     [Documentation]    Test that non-admin users cannot access admin endpoints
diff --git a/tests/infrastructure/infra_tests.robot b/tests/infrastructure/infra_tests.robot
index c0d401db..53d90f3a 100644
--- a/tests/infrastructure/infra_tests.robot
+++ b/tests/infrastructure/infra_tests.robot
@@ -26,8 +26,7 @@ Suite Setup      Suite Setup
 Suite Teardown   Suite Teardown
 Test Setup       Test Cleanup
 *** Variables ***
-# Container names are now dynamically loaded from test_env.py based on COMPOSE_PROJECT_NAME
-# This allows tests to work with different docker-compose project names
+# Container names are loaded from test_env.py (hardcoded to match docker-compose-test.yml project name)
 
 *** Keywords ***
 
@@ -88,21 +87,64 @@ Verify Workers Still Running In Container
 
 Restart Workers Container
     [Documentation]    Restart the workers container to restore registration
+    ...                Uses docker compose for more reliable restart
     Log To Console    \n🔄 Restarting workers container...
 
-    ${result}=    Run Process    docker    restart    ${WORKERS_CONTAINER}    shell=False
-    Should Be Equal As Integers    ${result.rc}    0
+    # Use docker compose restart for more reliable restart
+    ${result}=    Run Process    docker    compose    -f    docker-compose-test.yml    restart    workers-test
+    ...    cwd=${BACKEND_DIR}    shell=False
+
+    IF    ${result.rc} != 0
+        Log To Console    ⚠️ Docker compose restart failed, attempting docker restart...
+        Log To Console    stderr: ${result.stderr}
+        # Fallback to direct docker restart
+        ${result}=    Run Process    docker    restart    ${WORKERS_CONTAINER}    shell=False
+    END
 
-    # Wait for workers to start
-    Sleep    5s    reason=Wait for workers to initialize
+    Should Be Equal As Integers    ${result.rc}    0    msg=Failed to restart workers: ${result.stderr}
+
+    # Wait for workers to start and register
+    Sleep    10s    reason=Wait for workers to initialize and register
     Log To Console    ✅ Workers container restarted
 
+Verify Workers Healthy Or Restart
+    [Documentation]    Check if workers are healthy, restart if needed
+    ...                This ensures subsequent tests have a working environment
+    Log To Console    \n🧹 Cleanup: Verifying worker health...
+
+    TRY
+        ${worker_count}=    Get Worker Count From Health Endpoint
+        Log To Console    Current worker count: ${worker_count}
+
+        IF    ${worker_count} < 6
+            Log To Console    ⚠️ Only ${worker_count} workers detected, restarting to restore health...
+            Restart Workers Container
+
+            # Verify workers recovered after restart
+            Sleep    10s    reason=Wait for workers to fully initialize
+            ${new_count}=    Get Worker Count From Health Endpoint
+            Log To Console    Worker count after restart: ${new_count}
+
+            IF    ${new_count} < 6
+                Log To Console    ⚠️ WARNING: Only ${new_count} workers after restart (expected 6+)
+            ELSE
+                Log To Console    ✅ Workers healthy: ${new_count} workers registered
+            END
+        ELSE
+            Log To Console    ✅ Workers healthy: ${worker_count} workers registered (no restart needed)
+        END
+    EXCEPT    AS    ${error}
+        Log To Console    ⚠️ Failed to verify worker health: ${error}
+        Log To Console    Attempting emergency restart...
+        Restart Workers Container
+    END
+
 *** Test Cases ***
 Worker Registration Loss Detection Test
     [Documentation]    Test that the system can automatically recover when workers lose Redis registration
     ...
     ...                This test simulates the exact failure scenario experienced:
-    ...                1. Workers are running and processing jobs
+    ...                1. Workers are running and started jobs
     ...                2. Workers lose Redis registration (Redis restart, network issue, etc.)
     ...                3. Health endpoint should detect 0 workers
     ...                4. Workers should still be running in container
@@ -117,7 +159,7 @@ Worker Registration Loss Detection Test
     ...                - Health endpoint reports 0 workers when registration is lost
     ...                - Self-healing mechanism detects the issue
     ...                - Workers automatically re-register within monitoring interval
-    [Tags]    infra	queue
+    [Tags]    infra	queue	slow
 
     # Step 1: Verify workers are initially registered
     Log To Console    \n📊 Step 1: Check initial worker registration
@@ -170,17 +212,14 @@ Worker Registration Loss Detection Test
         Fail    Self-healing mechanism not working: Workers did not re-register after 90 seconds
     END
 
-    # Cleanup: Always restart workers after this test to ensure subsequent tests work
-    [Teardown]    Run Keywords
-    ...    Log To Console    \n🧹 Cleanup: Restarting workers for subsequent tests
-    ...    AND    Restart Workers Container
+    [Teardown]    Verify Workers Healthy Or Restart
 
 Worker Count Validation Test
     [Documentation]    Verify the health endpoint accurately reports worker counts
     ...
     ...                This test validates that:
     ...                - Health endpoint includes worker_count field
-    ...                - Worker count matches expected number (7 workers: 6 RQ + 1 audio)
+    ...                - Worker count matches expected number (9 workers: 6 RQ + 3 audio)
     ...                - Worker state information is accurate
     [Tags]    health	queue
 
@@ -217,10 +256,10 @@ Worker Count Validation Test
     Log To Console    Idle workers: ${idle_workers}
 
     # Verify exact worker count
-    # Expected: 7 RQ workers (6 general workers + 1 audio persistence worker)
+    # Expected: 9 RQ workers (6 general workers + 3 audio persistence workers)
     # Note: Audio stream workers (Deepgram/Parakeet) are NOT RQ workers - they don't register
     # We wait up to 20s for registration, so all workers should be present
-    Should Be Equal As Integers    ${worker_count}    7    msg=Expected exactly 7 RQ workers (6 general + 1 audio persistence)
+    Should Be Equal As Integers    ${worker_count}    9    msg=Expected exactly 9 RQ workers (6 general + 3 audio persistence)
 
     # Verify active + idle = total
     ${sum}=    Evaluate    ${active_workers} + ${idle_workers}
@@ -265,34 +304,57 @@ WebSocket Disconnect Conversation End Reason Test
     ...
     ...                This test simulates a Bluetooth/network dropout scenario:
     ...                1. Start streaming audio and create conversation
-    ...                2. Abruptly close WebSocket (simulating disconnect)
-    ...                3. Verify job exits gracefully (no 3600s timeout)
-    ...                4. Verify conversation has end_reason='websocket_disconnect'
+    ...                2. Keep sending audio to prevent inactivity timeout
+    ...                3. Abruptly close WebSocket (simulating disconnect)
+    ...                4. Verify job exits gracefully (no 3600s timeout)
+    ...                5. Verify conversation has end_reason='websocket_disconnect'
     [Tags]    infra	audio-streaming
 
     # Start audio stream and send chunks to trigger conversation
     ${device_name}=    Set Variable    disconnect
+    ${client_id}=    Get Client ID From Device Name    ${device_name}
     ${stream_id}=    Open Audio Stream    device_name=${device_name}
 
-    # Send audio fast (no realtime pacing) to simulate disconnect before END signal
-    Send Audio Chunks To Stream    ${stream_id}    ${TEST_AUDIO_FILE}    num_chunks=200 
-
-    # Wait for conversation job to be created and conversation_id to be populated
-    # Transcription + speech analysis takes time (30-60s with queue)
-    ${conv_jobs}=    Wait Until Keyword Succeeds    60s    3s
-    ...    Job Type Exists For Client    open_conversation    ${device_name}
-
-    # Wait for conversation_id in job meta (created asynchronously)
-    ${conversation_id}=    Wait Until Keyword Succeeds    10s    0.5s
-    ...    Get Conversation ID From Job Meta    open_conversation    ${device_name}
+    # Send audio fast (no realtime pacing) to trigger conversation creation
+    Send Audio Chunks To Stream    ${stream_id}    ${TEST_AUDIO_FILE}    num_chunks=200
+
+    # Initialize conversation_id to None (will be set when found)
+    ${conversation_id}=    Set Variable    ${None}
+
+    # Keep sending audio in a loop to prevent inactivity timeout while waiting for conversation
+    # We need to continuously send audio because SPEECH_INACTIVITY_THRESHOLD_SECONDS=2
+    FOR    ${i}    IN RANGE    20    # Send 20 batches while waiting
+        # Try to get conversation job
+        ${conv_jobs}=    Get Jobs By Type And Client    open_conversation    ${client_id}
+        ${has_job}=    Evaluate    len($conv_jobs) > 0
+
+        IF    ${has_job}
+            # Conversation job exists, try to get conversation_id
+            TRY
+                ${conversation_id}=    Get Conversation ID From Job Meta    open_conversation    ${client_id}
+                # Got conversation_id! Close websocket immediately to trigger disconnect
+                Log To Console    Conversation created (${conversation_id}), closing websocket NOW
+                Close Audio Stream    ${stream_id}
+                BREAK
+            EXCEPT
+                # conversation_id not set yet, keep sending audio
+                Send Audio Chunks To Stream    ${stream_id}    ${TEST_AUDIO_FILE}    num_chunks=50
+                Sleep    1s
+            END
+        ELSE
+            # No conversation job yet, keep sending audio
+            Send Audio Chunks To Stream    ${stream_id}    ${TEST_AUDIO_FILE}    num_chunks=50
+            Sleep    1s
+        END
+    END
 
-    # Simulate WebSocket disconnect (Bluetooth dropout)
-    Close Audio Stream    ${stream_id}
+    # Verify we got the conversation_id before loop ended
+    Should Not Be Equal    ${conversation_id}    ${None}    Failed to get conversation_id within timeout
 
     # Wait for job to complete (should be fast, not 3600s timeout)
     ${conv_jobs}=    Get Jobs By Type And Client    open_conversation    ${device_name}
     ${conv_job}=    Get Most Recent Job    ${conv_jobs}
-    Wait For Job Status    ${conv_job}[job_id]    completed    timeout=60s    interval=2s
+    Wait For Job Status    ${conv_job}[job_id]    finished    timeout=60s    interval=2s
 
     # Wait for end_reason to be saved to database (retry with timeout)
     ${conversation}=    Wait Until Keyword Succeeds    10s    0.5s
diff --git a/tests/integration/always_persist_audio_tests.robot b/tests/integration/always_persist_audio_tests.robot
new file mode 100644
index 00000000..d27dee81
--- /dev/null
+++ b/tests/integration/always_persist_audio_tests.robot
@@ -0,0 +1,355 @@
+*** Settings ***
+Documentation    Always Persist Audio Feature Tests
+...
+...              Tests that verify the always_persist flag ensures audio is saved
+...              to MongoDB even when transcription fails.
+...
+...              Critical scenarios:
+...              - Placeholder conversation created immediately
+...              - Audio chunks persisted despite transcription failure
+...              - Processing status transitions correctly
+...              - Normal behavior preserved when always_persist=false
+
+Resource         ../resources/websocket_keywords.robot
+Resource         ../resources/conversation_keywords.robot
+Resource         ../resources/mongodb_keywords.robot
+Resource         ../resources/redis_keywords.robot
+Resource         ../resources/queue_keywords.robot
+Resource         ../resources/session_keywords.robot
+Resource         ../resources/system_keywords.robot
+Variables        ../setup/test_env.py
+
+Suite Setup      Suite Setup Actions
+Suite Teardown   Suite Teardown Actions
+Test Teardown    Test Cleanup
+
+*** Variables ***
+${TEST_AUDIO_FILE}    ${CURDIR}/../test_assets/DIY_Experts_Glass_Blowing_16khz_mono_1min.wav
+
+*** Keywords ***
+Suite Setup Actions
+    [Documentation]    Setup actions before running tests
+    # Initialize API session for test user
+    ${session}=    Get Admin API Session
+    Set Suite Variable    ${API_SESSION}    ${session}
+
+Suite Teardown Actions
+    [Documentation]    Cleanup after all tests complete
+    # Cleanup any remaining audio streams
+    Cleanup All Audio Streams
+
+Test Cleanup
+    [Documentation]    Cleanup after each test
+    # Stop any active audio streams
+    Cleanup All Audio Streams
+    Sleep    2s    # Allow backend to finalize processing
+
+*** Test Cases ***
+
+Placeholder Conversation Created Immediately With Always Persist
+    [Documentation]    Verify that when always_persist=true, a conversation is created
+    ...                immediately (before speech detection) with placeholder title and
+    ...                processing_status="pending_transcription".
+    [Tags]    conversation	audio-streaming
+
+    ${device_name}=    Set Variable    test-placeholder
+    ${client_id}=    Get Client ID From Device Name    ${device_name}
+
+    # Get baseline conversation count for THIS client_id only
+    ${convs_before}=    Get Conversations By Client ID    ${client_id}
+    ${count_before}=    Get Length    ${convs_before}
+    ${expected_count}=    Evaluate    ${count_before} + 1
+
+    # Start stream with always_persist=true
+    ${stream_id}=    Open Audio Stream With Always Persist    device_name=${device_name}
+
+    # Poll for conversation to be created by audio persistence job (may take 10-15s to start)
+    ${convs_after}=    Wait Until Keyword Succeeds    30s    2s
+    ...    Wait For Conversation By Client ID    ${client_id}    ${expected_count}
+    ${count_after}=    Get Length    ${convs_after}
+
+    # Verify new conversation created for this client
+    Should Be True    ${count_after} >= ${expected_count}
+    ...    Expected at least ${expected_count} conversation(s) for client ${client_id}, found ${count_after}
+
+    # Find the new conversation (most recent)
+    ${new_conv}=    Set Variable    ${convs_after}[0]
+    ${conversation_id}=    Set Variable    ${new_conv}[conversation_id]
+
+    # Verify placeholder title
+    Verify Placeholder Conversation Title    ${conversation_id}
+
+    # Verify processing_status
+    Verify Conversation Processing Status    ${conversation_id}    pending_transcription
+
+    # Verify always_persist flag
+    Verify Conversation Always Persist Flag    ${conversation_id}
+
+    # Close stream
+    Close Audio Stream    ${stream_id}
+
+    Log    ✅ Placeholder conversation created immediately with always_persist=true
+
+
+Normal Behavior Preserved When Always Persist Disabled
+    [Documentation]    Verify that when always_persist=false, the system
+    ...                behaves as before: no conversation created until speech detected.
+    ...                This test temporarily disables the global always_persist setting.
+    [Tags]    conversation	audio-streaming
+
+    ${device_name}=    Set Variable    test-normal
+    ${client_id}=    Get Client ID From Device Name    ${device_name}
+
+    # Temporarily disable always_persist for this test
+    Set Always Persist Enabled    ${API_SESSION}    ${False}
+
+    TRY
+        # Get baseline conversation count for THIS client_id only
+        ${convs_before}=    Get Conversations By Client ID    ${client_id}
+        ${count_before}=    Get Length    ${convs_before}
+
+        # Start stream with always_persist=false (disabled via API above)
+        ${stream_id}=    Open Audio Stream    device_name=${device_name}
+
+        # Conversation should NOT exist immediately for this client
+        Sleep    3s
+        ${convs_after}=    Get Conversations By Client ID    ${client_id}
+        ${count_after}=    Get Length    ${convs_after}
+
+        # Verify no new conversation created yet for this client
+        Should Be Equal As Integers    ${count_after}    ${count_before}
+        ...    Expected no conversation for client ${client_id}, but found ${count_after} - ${count_before} new conversations
+
+        Log    ✅ No placeholder conversation created (always_persist=false)
+
+        # Close stream
+        Close Audio Stream    ${stream_id}
+    FINALLY
+        # Re-enable always_persist for other tests
+        Set Always Persist Enabled    ${API_SESSION}    ${True}
+    END
+
+
+Redis Key Set Immediately With Always Persist
+    [Documentation]    Verify that conversation:current:{session_id} Redis key is set
+    ...                immediately when always_persist=true, allowing audio persistence
+    ...                job to start saving chunks.
+    [Tags]    audio-streaming	infra
+
+    ${device_name}=    Set Variable    test-redis-key
+    ${client_id}=    Get Client ID From Device Name    ${device_name}
+
+    # Get baseline conversation count for THIS client_id only
+    ${convs_before}=    Get Conversations By Client ID    ${client_id}
+    ${count_before}=    Get Length    ${convs_before}
+    ${expected_count}=    Evaluate    ${count_before} + 1
+
+    # Start stream with always_persist=true
+    ${stream_id}=    Open Audio Stream With Always Persist    device_name=${device_name}
+
+    # session_id == client_id for streaming mode (not stream_id!)
+    ${session_id}=    Set Variable    ${client_id}
+
+    # Poll for conversation to be created by audio persistence job
+    ${convs_after}=    Wait Until Keyword Succeeds    30s    2s
+    ...    Wait For Conversation By Client ID    ${client_id}    ${expected_count}
+    ${count_after}=    Get Length    ${convs_after}
+
+    # Verify new conversation created for this client
+    Should Be True    ${count_after} >= ${expected_count}
+    ...    Expected at least ${expected_count} conversation(s) for client ${client_id}, found ${count_after}
+
+    # Get the new conversation (most recent)
+    ${conversation}=    Set Variable    ${convs_after}[0]
+    ${conversation_id}=    Set Variable    ${conversation}[conversation_id]
+
+    # Verify Redis key exists and points to the conversation
+    ${redis_conv_id}=    Verify Conversation Current Key    ${session_id}    ${conversation_id}
+
+    Should Be Equal As Strings    ${redis_conv_id}    ${conversation_id}
+    ...    Redis key should point to placeholder conversation
+
+    Log    ✅ Redis key conversation:current:${session_id} correctly set to ${conversation_id}
+
+    # Close stream
+    Close Audio Stream    ${stream_id}
+
+
+Multiple Sessions Create Separate Conversations
+    [Documentation]    Verify that starting multiple audio sessions with always_persist=true
+    ...                creates separate placeholder conversations for each session.
+    [Tags]    conversation	audio-streaming
+
+    # NOTE: Device names must be <=10 chars to be unique (backend truncates to 10 chars)
+    # Using short names: multi-1, multi-2, multi-3 (7 chars each)
+
+    # Get client IDs for each device
+    ${client_id_1}=    Get Client ID From Device Name    multi-1
+    ${client_id_2}=    Get Client ID From Device Name    multi-2
+    ${client_id_3}=    Get Client ID From Device Name    multi-3
+
+    # Get baseline conversation counts for each client
+    ${convs_before_1}=    Get Conversations By Client ID    ${client_id_1}
+    ${convs_before_2}=    Get Conversations By Client ID    ${client_id_2}
+    ${convs_before_3}=    Get Conversations By Client ID    ${client_id_3}
+    ${count_before_1}=    Get Length    ${convs_before_1}
+    ${count_before_2}=    Get Length    ${convs_before_2}
+    ${count_before_3}=    Get Length    ${convs_before_3}
+    ${expected_count_1}=    Evaluate    ${count_before_1} + 1
+    ${expected_count_2}=    Evaluate    ${count_before_2} + 1
+    ${expected_count_3}=    Evaluate    ${count_before_3} + 1
+
+    # Start 3 separate sessions
+    ${stream_1}=    Open Audio Stream With Always Persist    device_name=multi-1
+    Sleep    1s
+    ${stream_2}=    Open Audio Stream With Always Persist    device_name=multi-2
+    Sleep    1s
+    ${stream_3}=    Open Audio Stream With Always Persist    device_name=multi-3
+
+    # Poll for each conversation to be created (audio persistence jobs may take 10-15s)
+    ${convs_after_1}=    Wait Until Keyword Succeeds    30s    2s
+    ...    Wait For Conversation By Client ID    ${client_id_1}    ${expected_count_1}
+    ${convs_after_2}=    Wait Until Keyword Succeeds    30s    2s
+    ...    Wait For Conversation By Client ID    ${client_id_2}    ${expected_count_2}
+    ${convs_after_3}=    Wait Until Keyword Succeeds    30s    2s
+    ...    Wait For Conversation By Client ID    ${client_id_3}    ${expected_count_3}
+
+    ${count_after_1}=    Get Length    ${convs_after_1}
+    ${count_after_2}=    Get Length    ${convs_after_2}
+    ${count_after_3}=    Get Length    ${convs_after_3}
+
+    # Verify each client has at least 1 new conversation
+    Should Be True    ${count_after_1} >= ${expected_count_1}
+    ...    Expected at least ${expected_count_1} conversation(s) for client ${client_id_1}, found ${count_after_1}
+    Should Be True    ${count_after_2} >= ${expected_count_2}
+    ...    Expected at least ${expected_count_2} conversation(s) for client ${client_id_2}, found ${count_after_2}
+    Should Be True    ${count_after_3} >= ${expected_count_3}
+    ...    Expected at least ${expected_count_3} conversation(s) for client ${client_id_3}, found ${count_after_3}
+
+    # Verify each conversation has unique conversation_id
+    ${conv_id_1}=    Set Variable    ${convs_after_1}[0][conversation_id]
+    ${conv_id_2}=    Set Variable    ${convs_after_2}[0][conversation_id]
+    ${conv_id_3}=    Set Variable    ${convs_after_3}[0][conversation_id]
+
+    Should Not Be Equal    ${conv_id_1}    ${conv_id_2}
+    ...    Duplicate conversation_id found: ${conv_id_1}
+    Should Not Be Equal    ${conv_id_2}    ${conv_id_3}
+    ...    Duplicate conversation_id found: ${conv_id_2}
+    Should Not Be Equal    ${conv_id_1}    ${conv_id_3}
+    ...    Duplicate conversation_id found: ${conv_id_1}
+
+    Log    ✅ 3 separate conversations created with unique IDs
+
+    # Close all streams
+    Close Audio Stream    ${stream_1}
+    Close Audio Stream    ${stream_2}
+    Close Audio Stream    ${stream_3}
+
+
+Audio Chunks Persisted Despite Transcription Failure
+    [Documentation]    Verify that when transcription fails (e.g., invalid Deepgram key),
+    ...                audio chunks are still saved to MongoDB.
+    ...
+    ...                IMPORTANT: This test requires the mock-transcription-failure.yml config.
+    ...                Run with: make test CONFIG=mock-transcription-failure.yml
+    ...                The test will SKIP if transcription succeeds (real API keys).
+    [Tags]    audio-streaming	infra	slow
+
+    ${device_name}=    Set Variable    test-persist-fail
+    ${client_id}=    Get Client ID From Device Name    ${device_name}
+
+    # Start stream with always_persist=true
+    ${stream_id}=    Open Audio Stream With Always Persist    device_name=${device_name}
+
+    # Poll for conversation to be created by audio persistence job
+    ${conversations}=    Wait Until Keyword Succeeds    30s    2s
+    ...    Wait For Conversation By Client ID    ${client_id}    1
+
+    # Send audio chunks (transcription will fail due to invalid API key in config)
+    # Use realtime pacing to ensure chunks arrive while persistence job is running
+    Send Audio Chunks To Stream    ${stream_id}    ${TEST_AUDIO_FILE}    num_chunks=50    realtime_pacing=True
+
+    # Close stream
+    ${total_chunks}=    Close Audio Stream    ${stream_id}
+    Log    Sent ${total_chunks} total chunks
+
+    # Get the conversation for this client
+    ${conversation}=    Set Variable    ${conversations}[0]
+    ${conversation_id}=    Set Variable    ${conversation}[conversation_id]
+
+    # Wait for transcription to attempt and fail (poll instead of fixed sleep)
+    Wait Until Keyword Succeeds    60s    5s
+    ...    Verify Conversation Processing Status    ${conversation_id}    transcription_failed
+
+    # Refresh conversation data after status change (title may have updated)
+    ${updated_conv}=    Get Conversation By ID    ${conversation_id}
+
+    # Verify title indicates failure
+    ${title}=    Set Variable    ${updated_conv}[title]
+    ${title_lower}=    Convert To Lower Case    ${title}
+    Should Contain    ${title_lower}    transcription
+    Should Contain    ${title_lower}    fail
+    ...    Expected title to contain 'transcription' and 'fail', got: ${title}
+
+    # CRITICAL: Verify audio chunks were saved despite transcription failure
+    ${chunks}=    Verify Audio Chunks Exist    ${conversation_id}    min_chunks=1
+
+    ${chunk_count}=    Get Length    ${chunks}
+    Should Be True    ${chunk_count} > 0
+    ...    Expected audio chunks to be saved despite transcription failure
+
+    Log    ✅ Audio chunks persisted despite transcription failure (${chunk_count} chunks saved)
+
+
+Conversation Updates To Completed When Transcription Succeeds
+    [Documentation]    Verify that when transcription succeeds, the placeholder conversation
+    ...                updates from processing_status="pending_transcription" to "completed",
+    ...                and the title updates from placeholder to actual summary.
+    [Tags]    conversation	audio-streaming	requires-api-keys
+
+    ${device_name}=    Set Variable    test-complete
+    ${client_id}=    Get Client ID From Device Name    ${device_name}
+
+    # Get baseline conversation count for THIS client_id only
+    ${convs_before}=    Get Conversations By Client ID    ${client_id}
+    ${count_before}=    Get Length    ${convs_before}
+    ${expected_count}=    Evaluate    ${count_before} + 1
+
+    # Start stream with always_persist=true
+    ${stream_id}=    Open Audio Stream With Always Persist    device_name=${device_name}
+
+    # Poll for placeholder conversation to be created by audio persistence job
+    ${convs_after}=    Wait Until Keyword Succeeds    30s    2s
+    ...    Wait For Conversation By Client ID    ${client_id}    ${expected_count}
+    ${conversation}=    Set Variable    ${convs_after}[0]
+    ${conversation_id}=    Set Variable    ${conversation}[conversation_id]
+
+    # Verify initial placeholder state
+    Verify Conversation Processing Status    ${conversation_id}    pending_transcription
+    Verify Placeholder Conversation Title    ${conversation_id}
+
+    # Send audio chunks with speech (transcription will succeed)
+    # Use realtime pacing so Deepgram can finalize segments
+    Send Audio Chunks To Stream    ${stream_id}    ${TEST_AUDIO_FILE}    num_chunks=200    realtime_pacing=True
+
+    # Close stream
+    Close Audio Stream    ${stream_id}
+
+    # Wait for transcription and title generation to complete
+    Wait Until Keyword Succeeds    90s    5s
+    ...    Verify Conversation Processing Status    ${conversation_id}    completed
+
+    # Verify title updated from placeholder to actual summary
+    ${updated_conv}=    Get Conversation By ID    ${conversation_id}
+    ${title}=    Set Variable    ${updated_conv}[title]
+
+    # Title should NOT contain placeholder text
+    ${title_lower}=    Convert To Lower Case    ${title}
+    ${has_processing}=    Run Keyword And Return Status    Should Contain    ${title_lower}    processing
+    ${has_failed}=    Run Keyword And Return Status    Should Contain    ${title_lower}    transcription failed
+
+    ${is_placeholder}=    Evaluate    ${has_processing} or ${has_failed}
+    Should Not Be True    ${is_placeholder}
+    ...    Expected title to be updated, but still has placeholder: ${title}
+
+    Log    ✅ Conversation updated to completed with title: ${title}
diff --git a/tests/integration/audio_streaming_integration_tests.robot b/tests/integration/audio_streaming_integration_tests.robot
new file mode 100644
index 00000000..f34b7984
--- /dev/null
+++ b/tests/integration/audio_streaming_integration_tests.robot
@@ -0,0 +1,202 @@
+*** Settings ***
+Documentation    Audio Streaming Integration Tests
+...              Tests for streaming transcription provider (Phase 1) and Redis session state (Phase 2)
+...
+...              This test suite validates:
+...              - Phase 1: Registry-driven transcription provider works
+...              - Phase 2: Redis sessions as single source of truth (user_email, job IDs, chunk tracking)
+...              - Phase 2: Session lifecycle management (init, update, cleanup)
+Resource         ../resources/websocket_keywords.robot
+Resource         ../resources/queue_keywords.robot
+Resource         ../resources/redis_keywords.robot
+Resource         ../setup/setup_keywords.robot
+Resource         ../setup/teardown_keywords.robot
+
+Suite Setup      Suite Setup
+Suite Teardown   Suite Teardown
+Test Setup       Test Cleanup
+
+Test Tags        audio-streaming	requires-api-keys
+
+*** Variables ***
+
+
+*** Test Cases ***
+
+Redis Session Schema Contains All Required Fields
+    [Documentation]    Verify Redis session has all Phase 2 fields after stream initialization
+    [Tags]    infra	audio-streaming
+
+    ${device_name}=    Set Variable    redis-schema-test
+    ${stream_id}=    Open Audio Stream    device_name=${device_name}
+
+    # Send a few chunks to trigger session initialization
+    Send Audio Chunks To Stream    ${stream_id}    ${TEST_AUDIO_FILE}    num_chunks=5
+
+    # Allow time for async session initialization to complete
+    Sleep    2s
+
+    # Get session data from Redis using client_id (not stream_id)
+    ${client_id}=    Get Client ID From Device Name    ${device_name}
+    ${session_data}=    Get Redis Session Data    ${client_id}
+
+    # Verify required fields exist
+    Should Not Be Empty    ${session_data}[user_id]    Session missing user_id
+    Should Not Be Empty    ${session_data}[user_email]    Session missing user_email
+    Should Not Be Empty    ${session_data}[client_id]    Session missing client_id
+    Should Not Be Empty    ${session_data}[connection_id]    Session missing connection_id
+    Should Not Be Empty    ${session_data}[stream_name]    Session missing stream_name
+    Should Not Be Empty    ${session_data}[provider]    Session missing provider
+    Should Not Be Empty    ${session_data}[mode]    Session should have mode
+
+    # Verify job IDs are tracked
+    Dictionary Should Contain Key    ${session_data}    speech_detection_job_id
+    Dictionary Should Contain Key    ${session_data}    audio_persistence_job_id
+
+    # Verify connection state
+    Should Be Equal    ${session_data}[websocket_connected]    true
+    Should Be Equal    ${session_data}[status]    active
+
+    Log    ✅ Redis session schema verified
+
+    # Close stream after test completes
+    ${total_chunks}=    Close Audio Stream    ${stream_id}
+    Log    Closed stream, sent ${total_chunks} total chunks
+
+
+Chunk Count Increments In Redis Session
+    [Documentation]    Verify chunk count is tracked in Redis (not ClientState)
+    [Tags]    infra	audio-streaming
+
+    ${device_name}=    Set Variable    chunk-count-test
+    ${stream_id}=    Open Audio Stream    device_name=${device_name}
+    ${client_id}=    Get Client ID From Device Name    ${device_name}
+
+    # Send chunks and verify count increases
+    Send Audio Chunks To Stream    ${stream_id}    ${TEST_AUDIO_FILE}    num_chunks=3
+    Sleep    1s    # Allow chunk counter to update
+    ${session1}=    Get Redis Session Data    ${client_id}
+    ${count1}=    Convert To Integer    ${session1}[chunks_published]
+
+    Send Audio Chunks To Stream    ${stream_id}    ${TEST_AUDIO_FILE}    num_chunks=5
+    Sleep    1s    # Allow chunk counter to update
+    ${session2}=    Get Redis Session Data    ${client_id}
+    ${count2}=    Convert To Integer    ${session2}[chunks_published]
+
+    # Verify count increased (should be at least 8)
+    Should Be True    ${count2} > ${count1}
+    Should Be True    ${count2} >= 8
+
+    Log    ✅ Chunk count tracked in Redis: ${count1} → ${count2}
+
+    # Close stream after test completes
+    ${total_chunks}=    Close Audio Stream    ${stream_id}
+    Log    Closed stream, sent ${total_chunks} total chunks
+
+
+Job IDs Stored In Redis Session
+    [Documentation]    Verify job IDs are stored in Redis session (not ClientState)
+    [Tags]    infra	audio-streaming	queue
+
+    ${device_name}=    Set Variable    job-ids-test
+    ${stream_id}=    Open Audio Stream    device_name=${device_name}
+    ${client_id}=    Get Client ID From Device Name    ${device_name}
+
+    # Send audio to trigger jobs
+    Send Audio Chunks To Stream    ${stream_id}    ${TEST_AUDIO_FILE}    num_chunks=10
+    Sleep    2s
+
+    # Get session data
+    ${session_data}=    Get Redis Session Data    ${client_id}
+
+    # Verify job IDs are populated (not empty strings)
+    Should Not Be Empty    ${session_data}[speech_detection_job_id]
+    Should Not Be Empty    ${session_data}[audio_persistence_job_id]
+
+    Log    ✅ Speech detection job: ${session_data}[speech_detection_job_id]
+    Log    ✅ Audio persistence job: ${session_data}[audio_persistence_job_id]
+
+    # Close stream after test completes
+    ${total_chunks}=    Close Audio Stream    ${stream_id}
+    Log    Closed stream, sent ${total_chunks} total chunks
+
+
+Generic Transcription Provider Works
+    [Documentation]    Verify streaming transcription works with registry-driven provider
+    ...                This tests Phase 1 provider consolidation
+    [Tags]    audio-streaming	queue	e2e
+
+    ${device_name}=    Set Variable    provider-test
+    ${stream_id}=    Open Audio Stream    device_name=${device_name}
+    ${client_id}=    Get Client ID From Device Name    ${device_name}
+
+    # Send sufficient audio for transcription
+    Send Audio Chunks To Stream    ${stream_id}    ${TEST_AUDIO_FILE}    num_chunks=50
+
+    # Wait for speech detection job to process
+    Wait Until Keyword Succeeds    30s    2s
+    ...    Job Type Exists For Client    stream_speech_detection_job    ${client_id}
+
+    # Verify provider is set in Redis session
+    ${session_data}=    Get Redis Session Data    ${client_id}
+    Should Not Be Empty    ${session_data}[provider]
+    Log    ✅ Transcription provider: ${session_data}[provider]
+
+    # Close stream after test completes
+    ${total_chunks}=    Close Audio Stream    ${stream_id}
+    Log    Closed stream, sent ${total_chunks} total chunks
+
+
+Session Cleaned Up After Stream Close
+    [Documentation]    Verify session status is updated when stream closes
+    [Tags]    infra	audio-streaming
+
+    ${device_name}=    Set Variable    cleanup-test
+    ${stream_id}=    Open Audio Stream    device_name=${device_name}
+    ${client_id}=    Get Client ID From Device Name    ${device_name}
+
+    # Send some audio
+    Send Audio Chunks To Stream    ${stream_id}    ${TEST_AUDIO_FILE}    num_chunks=5
+    Sleep    1s    # Allow session to be initialized
+
+    # Verify session is active
+    ${session_before}=    Get Redis Session Data    ${client_id}
+    Should Be Equal    ${session_before}[status]    active
+    Should Be Equal    ${session_before}[websocket_connected]    true
+
+    # Close stream
+    Close Audio Stream    ${stream_id}
+
+    # Wait for finalization
+    Sleep    2s
+
+    # Verify session is finalized or finished (jobs may finish quickly for short streams)
+    ${session_after}=    Get Redis Session Data    ${client_id}
+    Should Be True    '${session_after}[status]' in ['finalizing', 'finished']
+    ...    Session status should be finalizing or finished, got: ${session_after}[status]
+
+    Log    ✅ Session status updated to ${session_after}[status]
+
+
+User Email Tracked In Session
+    [Documentation]    Verify user_email is stored in Redis session for debugging
+    [Tags]    infra	audio-streaming
+
+    ${device_name}=    Set Variable    email-test
+    ${stream_id}=    Open Audio Stream    device_name=${device_name}
+    ${client_id}=    Get Client ID From Device Name    ${device_name}
+
+    # Send a chunk
+    Send Audio Chunks To Stream    ${stream_id}    ${TEST_AUDIO_FILE}    num_chunks=1
+    Sleep    1s    # Allow session to be initialized
+
+    # Get session and verify email
+    ${session_data}=    Get Redis Session Data    ${client_id}
+    Should Not Be Empty    ${session_data}[user_email]
+    Should Contain    ${session_data}[user_email]    @    Email should contain @
+
+    Log    ✅ User email tracked: ${session_data}[user_email]
+
+    # Close stream after test completes
+    ${total_chunks}=    Close Audio Stream    ${stream_id}
+    Log    Closed stream, sent ${total_chunks} total chunks
diff --git a/tests/integration/conversation_queue.robot b/tests/integration/conversation_queue.robot
index bde80392..e0a3b283 100644
--- a/tests/integration/conversation_queue.robot
+++ b/tests/integration/conversation_queue.robot
@@ -14,6 +14,7 @@ Suite Setup      Suite Setup
 Suite Teardown   Suite Teardown
 Test Setup       Clear Test Databases
 
+Test Tags        queue	requires-api-keys
 
 *** Test Cases ***
 
@@ -76,9 +77,9 @@ Test Reprocess Conversation Job Queue
 
     Log    Created conversation: ${conversation_id}    INFO
 
-    # Wait for initial upload processing to complete (transcription job chain)
-    Log    Waiting for initial conversation processing to complete...    INFO
-    Sleep    10s    # Give time for initial job chain (transcription -> speaker -> cropping -> memory)
+    # Wait for initial upload started to complete (transcription job chain)
+    Log    Waiting for initial conversation started to complete...    INFO
+    Sleep    10s    # Give time for initial job chain (transcription -> speaker -> memory)
 
     # Get conversation to verify initial state
     ${initial_conversation}=    Get Conversation By ID    ${conversation_id}
@@ -95,9 +96,9 @@ Test Reprocess Conversation Job Queue
     ${version_id}=    Set Variable    ${reprocess_data}[version_id]
 
 
-    # Wait for transcription job to complete (Deepgram API + processing takes time in CI)
+    # Wait for transcription job to complete (Deepgram API + started takes time in CI)
     Log    Waiting for transcription job ${job_id} to complete...    INFO
-    Wait For Job Status    ${job_id}    completed    timeout=60s    interval=3s
+    Wait For Job Status    ${job_id}    finished    timeout=60s    interval=3s
 
     # Verify conversation was updated with new transcript version
     ${updated_conversation}=    Get Conversation By ID    ${conversation_id}
diff --git a/tests/integration/integration_test.robot b/tests/integration/integration_test.robot
index 4b08381b..31ebd747 100644
--- a/tests/integration/integration_test.robot
+++ b/tests/integration/integration_test.robot
@@ -22,18 +22,18 @@ Test Setup       Clear Test Databases
 
 *** Test Cases ***
 Full Pipeline Integration Test
-    [Documentation]    Complete end-to-end test of audio processing pipeline
-    [Tags]    e2e
+    [Documentation]    Complete end-to-end test of audio started pipeline
+    [Tags]    e2e	requires-api-keys
     [Timeout]          600s
 
     Log    Starting Full Pipeline Integration Test    INFO
 
    
     # Phase 4: Audio Processing - Upload and wait for conversation completion
-    Log    Starting audio upload and processing    INFO
+    Log    Starting audio upload and started    INFO
     ${conversation}=    Upload Audio File    ${TEST_AUDIO_FILE}    ${TEST_DEVICE_NAME}
 
-    Log    Audio processing completed, conversation created    INFO
+    Log    Audio started finished, conversation created    INFO
     Set Global Variable    ${TEST_CONVERSATION}    ${conversation}
 
     # Phase 5: Transcription Verification
@@ -60,8 +60,8 @@ Audio Playback And Segment Timing Test
 
     Log    Conversation created: ${conversation_id}    INFO
 
-    # Wait for cropping job to complete (depends on transcription)
-    Sleep    10s    Wait for post-processing jobs to complete
+    # Wait for post-started jobs to complete
+    Sleep    10s    Wait for post-started jobs
 
     # Refresh conversation data
     ${conversation}=    Get Conversation By ID    ${conversation_id}
@@ -73,21 +73,6 @@ Audio Playback And Segment Timing Test
     Should Be True    ${original_audio_size} > 1000    Original audio file too small: ${original_audio_size} bytes
     Log    Original audio accessible: ${original_audio_size} bytes    INFO
 
-    # Verify cropped audio is accessible (if available)
-    &{params}=    Create Dictionary    cropped=true
-    ${cropped_response}=    GET On Session    api    /api/audio/get_audio/${conversation_id}    params=${params}    expected_status=any
-    IF    ${cropped_response.status_code} == 200
-        Should Be Equal As Strings    ${cropped_response.headers}[content-type]    audio/wav
-        ${cropped_audio_size}=    Get Length    ${cropped_response.content}
-        Should Be True    ${cropped_audio_size} > 0    Cropped audio file is empty
-        Log    Cropped audio accessible: ${cropped_audio_size} bytes    INFO
-
-        # Cropped audio should be smaller or equal to original (silence removed)
-        Should Be True    ${cropped_audio_size} <= ${original_audio_size}    Cropped audio larger than original
-    ELSE
-        Log    Cropped audio not yet available (cropping job may still be running)    WARN
-    END
-
     # Verify segments exist and have valid timestamps
     Dictionary Should Contain Key    ${conversation}    segments
     ${segments}=    Set Variable    ${conversation}[segments]
@@ -131,15 +116,15 @@ Audio Playback And Segment Timing Test
 
 End To End Pipeline With Memory Validation Test
     [Documentation]    Complete E2E test with memory extraction and OpenAI quality validation.
-    ...                Provides comprehensive integration testing of the entire audio processing pipeline.
+    ...                Provides comprehensive integration testing of the entire audio started pipeline.
     ...                Separate from other tests to avoid breaking existing upload-only tests.
     [Tags]    e2e	memory
     [Timeout]    600s
 
     Log    Starting End-to-End Pipeline Test with Memory Validation    INFO
 
-    # Phase 1: Upload audio and wait for complete processing
-    Log    Uploading audio file and waiting for full processing    INFO
+    # Phase 1: Upload audio and wait for complete started
+    Log    Uploading audio file and waiting for full started    INFO
     ${conversation}    ${memories}=    Upload Audio File And Wait For Memory
     ...    ${TEST_AUDIO_FILE}
     ...    ${TEST_DEVICE_NAME}
@@ -211,7 +196,7 @@ Verify Memory Extraction
 
     Log    Verifying memory extraction    INFO
 
-    # Check if conversation has memory count (may still be processing)
+    # Check if conversation has memory count (may still be started)
     ${has_memory_count}=    Run Keyword And Return Status    Dictionary Should Contain Key    ${conversation}    memory_count
     ${memory_count}=    Run Keyword If    ${has_memory_count}
     ...    Set Variable    ${conversation}[memory_count]
@@ -227,11 +212,11 @@ Verify Memory Extraction
 
     ${api_memory_count}=    Get Length    ${memories}
 
-    # Verify memory extraction status (allow for memory processing to be in progress)
+    # Verify memory extraction status (allow for memory started to be in progress)
     Should Be True    ${memory_count} >= 0    Memory count is negative
     Should Be True    ${api_memory_count} >= 0    API memory count is negative
 
-    Log    Memory extraction verification passed (may still be processing)    INFO
+    Log    Memory extraction verification passed (may still be started)    INFO
     Log    Conversation memory count: ${memory_count}, API memory count: ${api_memory_count}    INFO
 
 Verify Chat Integration
@@ -267,6 +252,6 @@ Verify Chat Integration
     ${response}=    DELETE On Session    ${session_alias}    /api/chat/sessions/${session_id}    expected_status=any
     Should Be True    ${response.status_code} in [200, 204]    Chat session deletion failed with status ${response.status_code}
 
-    Log    Chat integration verification completed    INFO
+    Log    Chat integration verification finished    INFO
 
 
diff --git a/tests/integration/mongodb_audio_storage_tests.robot b/tests/integration/mongodb_audio_storage_tests.robot
new file mode 100644
index 00000000..27e1c8d7
--- /dev/null
+++ b/tests/integration/mongodb_audio_storage_tests.robot
@@ -0,0 +1,106 @@
+*** Settings ***
+Documentation    MongoDB Audio Chunk Storage Integration Tests
+...
+...              Validates that audio is stored as MongoDB chunks
+...              instead of disk-based WAV files.
+Resource         ../resources/websocket_keywords.robot
+Resource         ../resources/audio_keywords.robot
+Resource         ../resources/conversation_keywords.robot
+Resource         ../resources/mongodb_keywords.robot
+Resource         ../resources/queue_keywords.robot
+Resource         ../setup/setup_keywords.robot
+Resource         ../setup/teardown_keywords.robot
+Variables        ../setup/test_data.py
+
+
+Suite Setup      Suite Setup
+Suite Teardown   Suite Teardown
+Test Setup       Test Cleanup
+
+
+*** Test Cases ***
+
+MongoDB Chunks Created From File Upload
+    [Documentation]    Verify that uploaded audio files are stored as MongoDB chunks
+    [Tags]    audio-upload
+
+    # Upload 1-minute test audio file
+    ${response}=    POST On Session    api    /api/audio/upload
+    ...             files=${{ {'files': open('${TEST_AUDIO_FILE}', 'rb')} }}
+    ...             params=device_name=upload-mongodb-test
+    ...             expected_status=200
+
+    ${upload_data}=    Set Variable    ${response.json()}
+    ${conversation_id}=    Set Variable    ${upload_data}[files][0][conversation_id]
+    Log    Uploaded conversation: ${conversation_id}
+
+    # Wait for chunks to be written to MongoDB
+    Sleep    5s
+
+    # Verify chunks exist in MongoDB (expect ~6 chunks for 1-minute audio)
+    ${chunks}=    Verify Audio Chunks Exist    ${conversation_id}    min_chunks=5
+
+    ${chunk_count}=    Get Length    ${chunks}
+    Log    ✅ Found ${chunk_count} MongoDB chunks for uploaded file
+
+
+MongoDB Chunks Are Sequential
+    [Documentation]    Verify chunks have sequential chunk_index values
+    [Tags]    audio-upload
+
+    ${response}=    POST On Session    api    /api/audio/upload
+    ...             files=${{ {'files': open('${TEST_AUDIO_FILE}', 'rb')} }}
+    ...             params=device_name=sequential-test
+    ...             expected_status=200
+
+    ${conversation_id}=    Set Variable    ${response.json()}[files][0][conversation_id]
+    Sleep    5s
+
+    ${chunks}=    Get Audio Chunks For Conversation    ${conversation_id}
+
+    # Verify sequential numbering
+    Verify Chunks Are Sequential    ${chunks}
+
+    ${chunk_count}=    Get Length    ${chunks}
+    ${last_index}=    Evaluate    ${chunk_count} - 1
+    Log    ✅ Chunks are sequential (0 to ${last_index})
+
+
+Conversation Has MongoDB Chunk Metadata
+    [Documentation]    Verify conversation has chunk count and duration metadata
+    [Tags]    audio-upload
+
+    ${response}=    POST On Session    api    /api/audio/upload
+    ...             files=${{ {'files': open('${TEST_AUDIO_FILE}', 'rb')} }}
+    ...             params=device_name=metadata-test
+    ...             expected_status=200
+
+    ${conversation_id}=    Set Variable    ${response.json()}[files][0][conversation_id]
+    Sleep    5s
+
+    # Get conversation and verify it has chunk metadata
+    ${conversation}=    Get Conversation By ID    ${conversation_id}
+    Verify Conversation Has Chunk Metadata    ${conversation}
+
+    Log    ✅ Conversation has chunk metadata: ${conversation}[audio_chunks_count] chunks, ${conversation}[audio_total_duration]s
+
+
+Each Chunk Has Valid Metadata
+    [Documentation]    Verify chunk documents have all required fields
+    [Tags]    audio-upload
+
+    ${response}=    POST On Session    api    /api/audio/upload
+    ...             files=${{ {'files': open('${TEST_AUDIO_FILE}', 'rb')} }}
+    ...             params=device_name=chunk-metadata-test
+    ...             expected_status=200
+
+    ${conversation_id}=    Set Variable    ${response.json()}[files][0][conversation_id]
+    Sleep    5s
+
+    ${chunks}=    Get Audio Chunks For Conversation    ${conversation_id}
+
+    # Verify first chunk has all required fields
+    ${first_chunk}=    Set Variable    ${chunks}[0]
+    Verify Audio Chunk Metadata    ${first_chunk}
+
+    Log    ✅ Chunk metadata is valid
diff --git a/tests/integration/plugin_event_tests.robot b/tests/integration/plugin_event_tests.robot
new file mode 100644
index 00000000..cdb8c2ec
--- /dev/null
+++ b/tests/integration/plugin_event_tests.robot
@@ -0,0 +1,242 @@
+*** Settings ***
+Documentation    Plugin Event System Integration Tests
+...
+...              Tests the event-driven plugin architecture by:
+...              - Uploading audio and verifying transcript.batch events
+...              - Streaming audio and verifying transcript.streaming events
+...              - Verifying conversation.complete events after conversation ends
+...              - Verifying memory.processed events after memory extraction
+Library          RequestsLibrary
+Library          Collections
+Library          String
+Library          OperatingSystem
+Resource         ../setup/setup_keywords.robot
+Resource         ../setup/teardown_keywords.robot
+Resource         ../resources/user_keywords.robot
+Resource         ../resources/conversation_keywords.robot
+Resource         ../resources/audio_keywords.robot
+Resource         ../resources/plugin_keywords.robot
+Resource         ../resources/websocket_keywords.robot
+Variables        ../setup/test_data.py
+Suite Setup      Suite Setup
+Suite Teardown   Suite Teardown
+
+*** Variables ***
+# TEST_AUDIO_FILE is loaded from test_data.py
+
+*** Test Cases ***
+
+Verify Test Plugin Configuration
+    [Documentation]    Verify test plugin config file is properly formatted
+    [Tags]    infra
+
+    # Verify test config file exists
+    File Should Exist    ${CURDIR}/../config/plugins.test.yml
+    ...    msg=Test plugin config file should exist
+
+    # Verify test_event plugin is configured
+    ${config_content}=    Get File    ${CURDIR}/../config/plugins.test.yml
+    Should Contain    ${config_content}    test_event
+    ...    msg=Test config should contain test_event plugin
+
+    Should Contain    ${config_content}    transcript.streaming
+    ...    msg=Test plugin should subscribe to transcript.streaming
+
+    Should Contain    ${config_content}    transcript.batch
+    ...    msg=Test plugin should subscribe to transcript.batch
+
+Upload Audio And Verify Transcript Batch Event
+    [Documentation]    Upload audio file and verify transcript.batch event is dispatched
+    [Tags]    audio-upload
+
+    # Clear any existing events
+    Clear Plugin Events
+
+    # Upload test audio file
+    File Should Exist    ${TEST_AUDIO_FILE}
+    ...    msg=Test audio file should exist
+    ${conversation}=    Upload Audio File    ${TEST_AUDIO_FILE}
+    ${conversation_id}=    Set Variable    ${conversation}[conversation_id]
+
+    # Get baseline count for THIS specific conversation (should be 0 before waiting)
+    ${baseline_count}=    Set Variable    ${0}
+
+    # Wait for transcription to complete (polls every 2s, max 30s)
+    # Filter by conversation_id to avoid picking up fixture conversation events
+    ${new_events}=    Wait For Plugin Event    transcript.batch    ${baseline_count}    timeout=30s    conversation_id=${conversation_id}
+
+    # Verify at least one new event was received
+    Should Be True    ${new_events} > 0
+    ...    msg=At least one transcript.batch event should be logged for conversation ${conversation_id}
+
+    # Get the events and verify structure
+    ${events}=    Get Plugin Events By Type    transcript.batch
+    Should Not Be Empty    ${events}
+    ...    msg=Should have transcript.batch events
+
+    # Verify first event has required fields
+    ${event}=    Set Variable    ${events}[0]
+    Log    Event data: ${event}
+
+    # Verify event contains required fields (API returns dictionaries)
+    Dictionary Should Contain Key    ${event}    data
+    ...    msg=Event should have data field
+    Dictionary Should Contain Key    ${event}    user_id
+    ...    msg=Event should have user_id field
+
+Conversation Complete Should Trigger Event
+    [Documentation]    Verify conversation.complete event after conversation ends
+    [Tags]    conversation	requires-api-keys
+
+    # Clear events
+    Clear Plugin Events
+
+    # Upload audio (triggers conversation creation and completion)
+    File Should Exist    ${TEST_AUDIO_FILE}
+    ${conversation}=    Upload Audio File    ${TEST_AUDIO_FILE}
+    ${conversation_id}=    Set Variable    ${conversation}[conversation_id]
+
+    # Get baseline count for THIS specific conversation (should be 0 before waiting)
+    ${baseline_count}=    Set Variable    ${0}
+
+    # Wait for full pipeline: transcription → conversation (polls every 2s, max 40s)
+    # Filter by conversation_id to avoid picking up fixture conversation events
+    ${new_events}=    Wait For Plugin Event    conversation.complete    ${baseline_count}    timeout=40s    conversation_id=${conversation_id}
+
+    Should Be True    ${new_events} > 0
+    ...    msg=At least one conversation.complete event should be logged for conversation ${conversation_id}
+
+    # Verify event structure
+    ${events}=    Get Plugin Events By Type    conversation.complete
+    Should Not Be Empty    ${events}
+
+    # Verify end_reason metadata in plugin event
+    Verify Event Metadata    conversation.complete    end_reason    file_upload    ${conversation_id}
+
+Memory Processing Should Trigger Event
+    [Documentation]    Verify memory.processed event after memory extraction
+    [Tags]    memory
+
+    # Clear events
+    Clear Plugin Events
+
+    # Upload audio with meaningful content for memory extraction
+    File Should Exist    ${TEST_AUDIO_FILE}
+    ${conversation}=    Upload Audio File    ${TEST_AUDIO_FILE}
+    ${conversation_id}=    Set Variable    ${conversation}[conversation_id]
+
+    # Get baseline count for THIS specific conversation (should be 0 before waiting)
+    ${baseline_count}=    Set Variable    ${0}
+
+    # Wait for full pipeline: transcription → conversation → memory (polls every 2s, max 60s)
+    # Filter by conversation_id to avoid picking up fixture conversation events
+    ${new_events}=    Wait For Plugin Event    memory.processed    ${baseline_count}    timeout=60s    conversation_id=${conversation_id}
+
+    Should Be True    ${new_events} > 0
+    ...    msg=At least one memory.processed event should be logged for conversation ${conversation_id}
+
+    # Verify event structure
+    ${events}=    Get Plugin Events By Type    memory.processed
+    Should Not Be Empty    ${events}
+
+WebSocket Disconnect Should Trigger Conversation Complete Event
+    [Documentation]    Verify conversation.complete event when WebSocket disconnects
+    [Tags]    audio-streaming	conversation	requires-api-keys
+    [Timeout]    60s
+
+    # Clear events
+    Clear Plugin Events
+
+    # Open WebSocket stream
+    ${stream_id}=    Open Audio Stream    device_name=plugin-test-ws
+    ${client_id}=    Get Client ID From Device Name    plugin-test-ws
+
+    # Send audio chunks to create conversation (with realtime pacing for Deepgram to finalize segments)
+    ${chunks_sent}=    Send Audio Chunks To Stream    ${stream_id}    ${TEST_AUDIO_FILE}    num_chunks=200    realtime_pacing=True
+
+    # Wait for conversation job to be created (max 30s, poll every 2s)
+    ${jobs}=    Wait Until Keyword Succeeds    30s    2s
+    ...    Wait For New Job To Appear    open_conversation    ${client_id}    0
+    Should Not Be Empty    ${jobs}    At least one conversation job should exist
+    ${conv_meta}=    Set Variable    ${jobs}[0][meta]
+    ${conversation_id}=    Evaluate    $conv_meta.get('conversation_id', '')
+    Should Not Be Equal    ${conversation_id}    ${EMPTY}    Conversation ID should be set
+
+    # Disconnect WebSocket abruptly without audio-stop (triggers websocket_disconnect end_reason)
+    ${total_chunks}=    Close Audio Stream Without Stop Event    ${stream_id}
+    Log    Closed WebSocket stream abruptly, sent ${total_chunks} total chunks
+
+    # Get baseline count for THIS specific conversation (should be 0 before waiting)
+    ${baseline_count}=    Set Variable    ${0}
+
+    # Wait for plugin event dispatch (polls every 2s, max 30s)
+    # Event dispatch depends on memory and title/summary jobs completing (~20-25s total)
+    # Filter by conversation_id to avoid picking up events from other conversations
+    ${new_events}=    Wait For Plugin Event    conversation.complete    ${baseline_count}    timeout=30s    conversation_id=${conversation_id}
+
+    Should Be True    ${new_events} > 0
+    ...    msg=At least one conversation.complete event should be logged for conversation ${conversation_id}
+
+    # Verify plugin event has correct end_reason metadata
+    Verify Event Metadata    conversation.complete    end_reason    websocket_disconnect    ${conversation_id}
+
+    # Verify conversation has end_reason set in database
+    # Wait for end_reason to be persisted (open_conversation_job saves it at the end)
+    Wait Until Keyword Succeeds    10s    1s
+    ...    Conversation Should Have End Reason    ${conversation_id}    websocket_disconnect
+
+    # Verify completed_at timestamp is set
+    ${updated_conversation}=    Get Conversation By ID    ${conversation_id}
+    Should Not Be Equal    ${updated_conversation}[completed_at]    ${None}
+    ...    msg=Conversation should have completed_at timestamp
+
+Verify All Events Are Logged
+    [Documentation]    Comprehensive test that verifies all event types are logged
+    [Tags]    e2e
+
+    # Clear all events
+    Clear Plugin Events
+
+    # Upload audio file (should trigger all events)
+    File Should Exist    ${TEST_AUDIO_FILE}
+    ${conversation}=    Upload Audio File    ${TEST_AUDIO_FILE}
+    ${conversation_id}=    Set Variable    ${conversation}[conversation_id]
+
+    # Get baseline counts for THIS specific conversation (should be 0 for each)
+    ${batch_baseline}=    Set Variable    ${0}
+    ${conv_baseline}=    Set Variable    ${0}
+    ${mem_baseline}=    Set Variable    ${0}
+
+    # Wait for events in pipeline order (polls every 2s for each)
+    # Filter by conversation_id to avoid picking up fixture conversation events
+    ${batch_new}=    Wait For Plugin Event    transcript.batch    ${batch_baseline}    timeout=30s    conversation_id=${conversation_id}
+    ${conv_new}=    Wait For Plugin Event    conversation.complete    ${conv_baseline}    timeout=30s    conversation_id=${conversation_id}
+    ${mem_new}=    Wait For Plugin Event    memory.processed    ${mem_baseline}    timeout=60s    conversation_id=${conversation_id}
+
+    Should Be True    ${batch_new} > 0
+    ...    msg=transcript.batch events should be logged for conversation ${conversation_id}
+
+    Should Be True    ${conv_new} > 0
+    ...    msg=conversation.complete events should be logged for conversation ${conversation_id}
+
+    Should Be True    ${mem_new} > 0
+    ...    msg=memory.processed events should be logged for conversation ${conversation_id}
+
+    # Log summary
+    Log    Events logged for conversation ${conversation_id} - Batch: ${batch_new}, Conversation: ${conv_new}, Memory: ${mem_new}
+
+*** Keywords ***
+Test Suite Setup
+    [Documentation]    Setup for plugin event tests
+    # Standard suite setup
+    Suite Setup
+
+    # Verify test audio file exists
+    File Should Exist    ${TEST_AUDIO_FILE}
+    ...    msg=Test audio file must exist for integration tests
+
+Test Cleanup
+    [Documentation]    Cleanup after each test
+    # Standard cleanup
+    # Note: We intentionally don't clear plugin events between tests
+    # to allow for debugging and event inspection
diff --git a/tests/integration/sdk_tests.robot b/tests/integration/sdk_tests.robot
new file mode 100644
index 00000000..047047b3
--- /dev/null
+++ b/tests/integration/sdk_tests.robot
@@ -0,0 +1,102 @@
+*** Settings ***
+Documentation    Minimal tests for Chronicle Python SDK
+...
+...              Tests basic SDK functionality including authentication,
+...              file upload, and conversation retrieval.
+...
+...              Placeholders included for unimplemented features.
+
+Library          Process
+Library          OperatingSystem
+Library          Collections
+Resource         ../setup/setup_keywords.robot
+Resource         ../setup/teardown_keywords.robot
+Resource         ../resources/session_keywords.robot
+Variables        ../setup/test_env.py
+
+Suite Setup      Suite Setup
+Suite Teardown   Suite Teardown
+
+*** Variables ***
+${BACKEND_URL}        http://localhost:8001
+${SDK_PATH}           ${CURDIR}/../../sdk/python
+${TEST_AUDIO_DIR}     ${CURDIR}/../../extras/test-audios
+
+*** Test Cases ***
+SDK Can Authenticate With Admin Credentials
+    [Documentation]    Test SDK login functionality
+    [Tags]    permissions
+
+    ${result}=    Run Process    uv    run    python
+    ...    ${CURDIR}/../scripts/sdk_test_auth.py
+    ...    ${BACKEND_URL}    ${ADMIN_EMAIL}    ${ADMIN_PASSWORD}
+    Should Be Equal As Integers    ${result.rc}    0    SDK authentication should succeed
+    Should Contain    ${result.stdout}    SUCCESS    Should print success message
+
+SDK Can Upload Audio File
+    [Documentation]    Test SDK audio upload functionality
+    [Tags]    audio-upload	sdk
+
+    ${test_audio}=    Set Variable    ${TEST_AUDIO_DIR}/audio_short.wav
+    File Should Exist    ${test_audio}    Test audio file should exist
+
+    ${result}=    Run Process    uv    run    python
+    ...    ${CURDIR}/../scripts/sdk_test_upload.py
+    ...    ${BACKEND_URL}    ${ADMIN_EMAIL}    ${ADMIN_PASSWORD}    ${test_audio}
+    Should Be Equal As Integers    ${result.rc}    0    SDK upload should succeed
+    Should Contain    ${result.stdout}    STATUS:started    File should be in started status
+
+SDK Can Retrieve Conversations
+    [Documentation]    Test SDK conversation retrieval
+    [Tags]    conversation	sdk
+
+    ${result}=    Run Process    uv    run    python
+    ...    ${CURDIR}/../scripts/sdk_test_conversations.py
+    ...    ${BACKEND_URL}    ${ADMIN_EMAIL}    ${ADMIN_PASSWORD}
+    Should Be Equal As Integers    ${result.rc}    0    SDK should retrieve conversations
+    Should Contain    ${result.stdout}    COUNT:    Should print conversation count
+
+SDK Upload Respects Backend File Size Limit
+    [Documentation]    Verify SDK properly reports backend errors for oversized files
+    [Tags]    audio-upload
+
+    # Note: This tests that SDK handles backend rejection gracefully
+    # The 30-minute limit is enforced by the backend, not the SDK
+    # Full test would require a 30+ minute audio file
+
+    ${result}=    Run Process    uv    run    python
+    ...    ${CURDIR}/../scripts/sdk_test_auth.py
+    ...    ${BACKEND_URL}    ${ADMIN_EMAIL}    ${ADMIN_PASSWORD}
+    Should Be Equal As Integers    ${result.rc}    0    SDK should handle backend errors gracefully
+
+# ==============================================================================
+# PLACEHOLDERS FOR UNIMPLEMENTED FEATURES
+# ==============================================================================
+
+SDK Can Stream Large Audio Files Via WebSocket
+    [Documentation]    PLACEHOLDER: WebSocket streaming support not yet implemented
+    [Tags]    audio-streaming
+    Skip    WebSocket streaming not implemented in SDK yet
+
+SDK Can Resume Interrupted Uploads
+    [Documentation]    PLACEHOLDER: Resumable uploads not supported by backend
+    [Tags]    audio-upload
+    Skip    Resumable uploads not supported
+
+SDK Can Handle Batch Upload With Progress
+    [Documentation]    PLACEHOLDER: Batch upload is implemented but needs Robot test
+    [Tags]    audio-batch
+    Skip    Test implementation pending
+
+SDK Can Search Memories
+    [Documentation]    PLACEHOLDER: Memory search API not exposed in SDK yet
+    [Tags]    memory
+    Skip    Memory search not implemented in SDK
+
+SDK Can Manage Action Items
+    [Documentation]    PLACEHOLDER: Action items API not exposed in SDK yet
+    [Tags]    infra
+    Skip    Action items not implemented in SDK
+
+*** Keywords ***
+# Using Suite Setup/Teardown from setup_keywords.robot
diff --git a/tests/integration/websocket_streaming_tests.robot b/tests/integration/websocket_streaming_tests.robot
index 01e0a533..63baadf8 100644
--- a/tests/integration/websocket_streaming_tests.robot
+++ b/tests/integration/websocket_streaming_tests.robot
@@ -14,6 +14,8 @@ Suite Setup      Suite Setup
 Suite Teardown   Suite Teardown
 Test Setup        Test Cleanup
 
+Test Tags         audio-streaming	requires-api-keys
+
 *** Variables ***
 
 
@@ -60,44 +62,67 @@ Conversation Job Created After Speech Detection
     [Tags]    audio-streaming	queue	conversation
 
     # Open stream
-    ${stream_id}=    Open Audio Stream    device_name=ws-conv
+    ${device_name}=    Set Variable    ws-conv
+    ${stream_id}=    Open Audio Stream    device_name=${device_name}
+    ${client_id}=    Get Client ID From Device Name    ${device_name}
 
     # Send enough audio to trigger speech detection (test audio has speech)
     # Test audio is 4 minutes long at 16kHz, sending 200 chunks ensures enough speech
-    Send Audio Chunks To Stream    ${stream_id}    ${TEST_AUDIO_FILE}    num_chunks=200
+    # Use realtime pacing so Deepgram can finalize transcription segments as audio streams in
+    Send Audio Chunks To Stream    ${stream_id}    ${TEST_AUDIO_FILE}    num_chunks=200    realtime_pacing=True
 
     # Wait for open_conversation job to be created (transcription + speech analysis takes time)
-    # Deepgram/OpenAI API calls + job processing can take 30-60s with queue
+    # Deepgram/OpenAI API calls + job started can take 30-60s with queue
     Wait Until Keyword Succeeds    60s    3s
-    ...    Job Type Exists For Client    open_conversation    ws-conv
+    ...    Job Type Exists For Client    open_conversation    ${client_id}
 
     Log To Console    Open conversation job created after speech detection
 
     # Then verify speech detection job has conversation_job_id linked
-    ${speech_jobs}=    Wait Until Keyword Succeeds    15s    2s
-    ...    Job Type Exists For Client    speech_detection    ws-conv
-    Job Has Conversation ID    ${speech_jobs}[0]
-    [Teardown]    Close Audio Stream    ${stream_id}
+    # Note: After conversation completes, a NEW speech_detection job is created for the next conversation
+    # So we need to get the jobs and find the one with conversation_job_id set
+    ${speech_jobs}=    Get Jobs By Type And Client    speech_detection    ${client_id}
+    Should Not Be Empty    ${speech_jobs}    msg=No speech_detection jobs found for ${client_id}
+
+    # Find the job with conversation_job_id (the original one that created the conversation)
+    ${found_linked_job}=    Set Variable    ${False}
+    FOR    ${job}    IN    @{speech_jobs}
+        ${meta}=    Set Variable    ${job}[meta]
+        ${conv_job_id}=    Evaluate    $meta.get('conversation_job_id')
+        IF    '${conv_job_id}' != 'None'
+            ${found_linked_job}=    Set Variable    ${True}
+            Log To Console    Found speech_detection job with conversation_job_id: ${conv_job_id}
+            BREAK
+        END
+    END
+    Should Be True    ${found_linked_job}    msg=No speech_detection job has conversation_job_id set
+
+    # Close stream after test completes
+    ${total_chunks}=    Close Audio Stream    ${stream_id}
+    Log    Closed stream, sent ${total_chunks} total chunks
 
 
 Conversation Closes On Inactivity Timeout And Restarts Speech Detection
-    [Documentation]    Verify that after SPEECH_INACTIVITY_THRESHOLD_SECONDS of silence,
+    [Documentation]    Verify that after SPEECH_INACTIVITY_THRESHOLD_SECONDS of silence (audio time),
     ...                the open_conversation job closes with timeout_triggered=True,
     ...                a new speech_detection job is created for the next conversation,
-    ...                and post-conversation jobs are enqueued (transcription, speaker, memory, title).
+    ...                and post-conversation jobs are enqueued (speaker, memory, title).
+    ...                Note: Streaming conversations use streaming transcript (no batch transcription).
     ...
-    ...                Test environment sets SPEECH_INACTIVITY_THRESHOLD_SECONDS=5 in docker-compose-test.yml.
-    [Tags]    audio-streaming	queue	conversation
+    ...                Test environment sets SPEECH_INACTIVITY_THRESHOLD_SECONDS=20 in docker-compose-test.yml.
+    [Tags]    audio-streaming	queue	conversation	slow
 
     ${device_name}=    Set Variable    test-post
+    ${client_id}=    Get Client ID From Device Name    ${device_name}
 
     # Open stream and send enough audio to trigger speech detection and conversation
     ${stream_id}=    Open Audio Stream    device_name=${device_name}
-    Send Audio Chunks To Stream    ${stream_id}    ${TEST_AUDIO_FILE}    num_chunks=200
+    # Use realtime pacing so Deepgram can finalize transcription segments as audio streams in
+    Send Audio Chunks To Stream    ${stream_id}    ${TEST_AUDIO_FILE}    num_chunks=200    realtime_pacing=True
 
     # Wait for conversation job to be created (transcription + speech analysis takes time)
     ${conv_jobs}=    Wait Until Keyword Succeeds    60s    3s
-    ...    Job Type Exists For Client    open_conversation    ${device_name}
+    ...    Job Type Exists For Client    open_conversation    ${client_id}
     ${conv_job}=    Set Variable    ${conv_jobs}[0]
     ${conv_job_id}=    Set Variable    ${conv_job}[job_id]
     ${conv_meta}=    Set Variable    ${conv_job}[meta]
@@ -105,7 +130,7 @@ Conversation Closes On Inactivity Timeout And Restarts Speech Detection
     Log To Console    Conversation job created: ${conv_job_id}, conversation_id: ${conversation_id}
 
     # Record the initial speech detection job (will be replaced after timeout)
-    ${initial_speech_jobs}=    Get Jobs By Type And Client    speech_detection    ${device_name}
+    ${initial_speech_jobs}=    Get Jobs By Type And Client    speech_detection    ${client_id}
     ${initial_speech_count}=    Get Length    ${initial_speech_jobs}
     Log To Console    Initial speech detection jobs: ${initial_speech_count}
 
@@ -113,34 +138,29 @@ Conversation Closes On Inactivity Timeout And Restarts Speech Detection
     # The conversation should auto-close after SPEECH_INACTIVITY_THRESHOLD_SECONDS
     Log To Console    Waiting for inactivity timeout to trigger conversation close...
 
-    # Wait for conversation job to complete (status changes from 'started' to 'completed')
-    # Timeout value should be > SPEECH_INACTIVITY_THRESHOLD_SECONDS + buffer
-    Wait For Job Status    ${conv_job_id}    completed    timeout=30s    interval=2s
-    Log To Console    Conversation job completed (timeout triggered)
+    # Wait for conversation job to complete (status changes from 'started' to 'finished')
+    # Timeout needs: (audio send time ~60s) + (silence timeout 20s) + (buffer 10s) = 90s
+    Wait For Job Status    ${conv_job_id}    finished    timeout=90s    interval=2s
+    Log To Console    Conversation job finished (timeout triggered)
 
     # Verify a NEW speech detection job (2nd one) was created for next conversation
     # The handle_end_of_conversation function creates a new speech_detection job
     ${new_speech_jobs}=    Wait Until Keyword Succeeds    30s    2s
-    ...    Job Type Exists For Client    speech_detection    ${device_name}    2
+    ...    Job Type Exists For Client    speech_detection    ${client_id}    2
     ${new_speech_count}=    Get Length    ${new_speech_jobs}
     Should Be True    ${new_speech_count} >= ${initial_speech_count}
     ...    Expected new speech detection job but count is ${new_speech_count} (was ${initial_speech_count})
     Log To Console    New speech detection job created for next conversation
 
     # Verify post-conversation jobs were enqueued (linked by conversation_id, not client_id)
-    # These jobs process the completed conversation: transcription, speaker recognition, memory, title
-    ${transcription_jobs}=    Wait Until Keyword Succeeds    30s    2s
-    ...    Job Type Exists For Conversation    transcribe_full_audio_job    ${conversation_id}
-    Log To Console    Post-conversation transcription job enqueued
+    # These jobs process the finished conversation: speaker recognition, memory, title
+    # Note: Streaming conversations no longer have batch transcription - transcript comes from streaming
+    Log To Console    Verifying post-conversation jobs (speaker, memory, title)...
 
-    # Speaker recognition job should also be created
+    # Speaker recognition job should be created
     ${speaker_jobs}=    Get Jobs By Type And Conversation    recognise_speakers_job    ${conversation_id}
     Log To Console    Speaker recognition jobs found: ${speaker_jobs.__len__()}
 
-    # Audio cropping job should be created
-    ${cropping_jobs}=    Get Jobs By Type And Conversation    process_cropping_job    ${conversation_id}
-    Log To Console    Cropping jobs found: ${cropping_jobs.__len__()}
-
     # Title/summary generation job should be created
     ${title_jobs}=    Get Jobs By Type And Conversation    generate_title_summary_job    ${conversation_id}
     Log To Console    Title/summary jobs found: ${title_jobs.__len__()}
@@ -150,70 +170,4 @@ Conversation Closes On Inactivity Timeout And Restarts Speech Detection
     Log To Console    Memory jobs found: ${memory_jobs.__len__()}
 
 
-Segment Timestamps Match Cropped Audio
-    [Documentation]    Verify that after conversation closes and cropping completes,
-    ...                segment timestamps are adjusted to match the cropped audio file.
-    [Tags]    audio-streaming	audio-upload
-
-    ${device_name}=    Set Variable    seg-test
-
-    # # Open stream
-    ${stream_id}=    Open Audio Stream    device_name=${device_name}
-
-    # Send Audio Chunks To Stream    ${stream_id}    ${TEST_AUDIO_FILE}    num_chunks=250
-
-    # Wait for conversation job to be created (transcription + speech analysis takes time)
-    # ${conv_jobs}=    Wait Until Keyword Succeeds    60s    3s
-    # ...    Job Type Exists For Client    open_conversation    ${device_name}
-
-    # conversation 1
-    ${conversation_id_1}=    Stream And Wait For Conversation    ${stream_id}    ${TEST_AUDIO_FILE}    ${device_name}    num_chunks=250
-    Log To Console    Conversation 1 completed: ${conversation_id_1}
-
-    # conversation 2, with 500 chunks (enough for 8 segments to match expected timestamps)
-    ${conversation_id}=    Stream And Wait For Conversation    ${stream_id}    ${TEST_AUDIO_FILE}    ${device_name}    num_chunks=500
-    Log To Console    Conversation 2 completed: ${conversation_id}
-
-    # Wait for cropping job to complete
-    ${cropping_jobs}=    Wait Until Keyword Succeeds    30s    2s
-    ...    Job Type Exists For Conversation    process_cropping_job    ${conversation_id}
-    ${cropping_job}=    Set Variable    ${cropping_jobs}[0]
-    Wait For Job Status    ${cropping_job}[job_id]    completed    timeout=30s    interval=2s
-    Log To Console    Cropping job completed
-
-    # Wait for database updates
-    Sleep    2s
-
-    # Fetch the conversation with updated segments
-    ${conversation}=    get conversation by id    ${conversation_id}
-
-    # Verify cropped audio path exists
-    Should Not Be Empty    ${conversation}[cropped_audio_path]
-    Log To Console    Cropped audio: ${conversation}[cropped_audio_path]
-
-    # Get segments
-    ${segments}=    Set Variable    ${conversation}[segments]
-
-    ${segment_count}=    Get Length    ${segments}
-    Should Be True    ${segment_count} > 0    No segments found
-    Log To Console    Found ${segment_count} segments
-
-    # Verify timestamps are adjusted to cropped audio (should start from 0)
-    ${first_segment}=    Set Variable    ${segments}[0]
-    Should Be True    ${first_segment}[start] == 0.0    First segment should start at 0.0s after cropping
-
-    # Verify last segment timing is reasonable (should be within the audio duration)
-    ${last_segment}=    Set Variable    ${segments}[-1]
-    # Should Be True    ${last_segment}[end] > 50    Last segment should extend beyond 50s for 100s audio
-    Should Be True    ${last_segment}[end] < 110    Last segment should be within 110s
-
-    # Verify segments match expected test data timestamps
-    # Uses default EXPECTED_SEGMENT_TIMES from test_data.py
-    # To use a different dataset: Verify Segments Match Expected Timestamps    ${segments}    ${EXPECTED_SEGMENT_TIMES_SHORT}
-    # To use custom tolerance: Verify Segments Match Expected Timestamps    ${segments}    ${EXPECTED_SEGMENT_TIMES}    ${tolerance}=1.0
-    Verify Segments Match Expected Timestamps    ${segments}    expected_segments=${EXPECTED_SEGMENT_TIMES}   
-
-    Log To Console    ✓ Validated ${segment_count} segments with proper cropped timestamps matching expected data
-
-
 
diff --git a/tests/integration/websocket_transcription_e2e_test.robot b/tests/integration/websocket_transcription_e2e_test.robot
new file mode 100644
index 00000000..3429b52f
--- /dev/null
+++ b/tests/integration/websocket_transcription_e2e_test.robot
@@ -0,0 +1,242 @@
+*** Settings ***
+Documentation    End-to-End WebSocket Streaming Transcription Tests
+...
+...              This test suite validates the complete transcription data flow
+...              that was previously untested, which led to the end_marker bug.
+...
+...              Critical paths tested:
+...              1. Audio → Deepgram WebSocket → Interim results (pub/sub)
+...              2. Stream close → end_marker sent → CloseStream message
+...              3. Deepgram → Final results → Redis stream transcription:results:{session_id}
+...              4. Speech detection job → Reads Redis stream → Creates conversation
+...
+...              These tests would have caught the missing end_marker bug immediately.
+
+Resource         ../resources/websocket_keywords.robot
+Resource         ../resources/conversation_keywords.robot
+Resource         ../resources/redis_keywords.robot
+Resource         ../resources/queue_keywords.robot
+Resource         ../setup/setup_keywords.robot
+Resource         ../setup/teardown_keywords.robot
+
+Suite Setup      Suite Setup
+Suite Teardown   Suite Teardown
+Test Setup       Test Cleanup
+
+Test Tags        audio-streaming	e2e	requires-api-keys
+
+
+*** Test Cases ***
+
+WebSocket Stream Produces Final Transcripts In Redis
+    [Documentation]    Verify that closing a stream triggers end_marker,
+    ...                CloseStream message to Deepgram, and final results
+    ...                are written to Redis stream transcription:results:{session_id}
+    ...
+    ...                This test directly validates the bug fix:
+    ...                - Producer sends end_marker when finalizing session
+    ...                - Streaming consumer detects end_marker
+    ...                - Consumer sends CloseStream to Deepgram
+    ...                - Deepgram returns final results (is_final=True)
+    ...                - Final results written to Redis stream
+    [Tags]    audio-streaming	infra
+
+    ${device_name}=    Set Variable    final-transcript-test
+    ${client_id}=    Get Client ID From Device Name    ${device_name}
+
+    # Open stream and send audio
+    ${stream_id}=    Open Audio Stream    device_name=${device_name}
+    Send Audio Chunks To Stream    ${stream_id}    ${TEST_AUDIO_FILE}    num_chunks=100
+
+    # Critical: Close stream triggers the entire finalization flow
+    Log    Closing stream - should trigger: end_marker → CloseStream → final results
+    Close Audio Stream    ${stream_id}
+
+    # Allow time for streaming consumer to process end_marker and get final results
+    Sleep    5s
+
+    # Verify Redis stream transcription:results:{client_id} has entries
+    ${stream_name}=    Set Variable    transcription:results:${client_id}
+    ${stream_length}=    Redis Command    XLEN    ${stream_name}
+
+    Should Be True    ${stream_length} > 0
+    ...    Redis stream ${stream_name} is empty - no final transcripts received! This means end_marker was not sent or CloseStream failed.
+
+    Log    ✅ Redis stream has ${stream_length} final transcript(s)
+
+
+Speech Detection Receives Transcription From Stream
+    [Documentation]    Verify speech detection job successfully reads transcripts
+    ...                from Redis stream and does NOT fail with "no_speech_detected"
+    ...
+    ...                This is the exact failure scenario from the bug:
+    ...                - Speech detection reads from transcription:results:{session_id}
+    ...                - If stream is empty, returns "No transcription received"
+    ...                - If stream has data, creates conversation
+    [Tags]    audio-streaming	queue
+
+    ${device_name}=    Set Variable    speech-receives-test
+    ${client_id}=    Get Client ID From Device Name    ${device_name}
+
+    # Stream audio and close
+    ${stream_id}=    Open Audio Stream    device_name=${device_name}
+    Send Audio Chunks To Stream    ${stream_id}    ${TEST_AUDIO_FILE}    num_chunks=200    realtime_pacing=True
+    Close Audio Stream    ${stream_id}
+
+    # Wait for speech detection job to complete
+    # It should find transcripts in Redis stream and create conversation
+    ${speech_jobs}=    Wait Until Keyword Succeeds    60s    3s
+    ...    Get Jobs By Type And Client    speech_detection    ${client_id}
+
+    Should Not Be Empty    ${speech_jobs}    No speech detection job found
+
+    # Get the first (most recent) speech detection job
+    ${speech_job}=    Set Variable    ${speech_jobs}[0]
+    ${job_id}=    Set Variable    ${speech_job}[job_id]
+
+    # Wait for job to complete
+    Wait For Job Status    ${job_id}    finished    timeout=60s    interval=2s
+
+    # Get job result
+    ${result}=    Get Job Result    ${job_id}
+
+    # Critical assertion: Job should NOT have "no_speech_detected"
+    # This would indicate the Redis stream was empty
+    Should Not Contain    ${result}    no_speech_detected    Speech detection failed with no_speech_detected - Redis stream was empty!
+
+    # Job should have created a conversation
+    Should Contain    ${result}    conversation_job_id    Speech detection did not create conversation_job_id
+
+    Log    ✅ Speech detection successfully received transcription from Redis stream
+
+
+Conversation Created With Valid Transcript
+    [Documentation]    End-to-end verification: Audio → Transcription → Conversation
+    ...                Ensures the complete pipeline works with WebSocket streaming
+    [Tags]    audio-streaming	conversation
+
+    ${device_name}=    Set Variable    e2e-conv-test
+    ${client_id}=    Get Client ID From Device Name    ${device_name}
+
+    # Stream audio (enough to trigger speech detection)
+    ${stream_id}=    Open Audio Stream    device_name=${device_name}
+    Send Audio Chunks To Stream    ${stream_id}    ${TEST_AUDIO_FILE}    num_chunks=200    realtime_pacing=True
+    Close Audio Stream    ${stream_id}
+
+    # DIAGNOSTIC: Verify speech detection job completes before checking for conversation
+    Log    Waiting for speech detection job to complete...
+    ${speech_jobs}=    Wait Until Keyword Succeeds    30s    3s
+    ...    Get Jobs By Type And Client    speech_detection    ${client_id}
+
+    Should Not Be Empty    ${speech_jobs}    No speech detection job found
+    ${speech_job}=    Set Variable    ${speech_jobs}[0]
+    ${speech_job_id}=    Set Variable    ${speech_job}[job_id]
+
+    # Wait for speech detection to finish
+    Wait For Job Status    ${speech_job_id}    finished    timeout=30s    interval=2s
+
+    # Verify speech was detected (not no_speech_detected)
+    ${speech_result}=    Get Job Result    ${speech_job_id}
+    Should Not Contain    ${speech_result}    no_speech_detected
+    ...    Speech detection failed with no_speech_detected - transcript may be empty or insufficient
+    Should Contain    ${speech_result}    conversation_job_id
+    ...    Speech detection did not create conversation_job_id
+
+    Log    ✅ Speech detection completed successfully, conversation job should exist
+
+    # Wait for conversation to be created
+    ${conv_jobs}=    Wait Until Keyword Succeeds    60s    3s
+    ...    Job Type Exists For Client    open_conversation    ${client_id}
+
+    ${conv_job}=    Set Variable    ${conv_jobs}[0]
+    ${conv_meta}=    Set Variable    ${conv_job}[meta]
+    ${conversation_id}=    Evaluate    $conv_meta.get('conversation_id', '')
+
+    Should Not Be Empty    ${conversation_id}    Conversation ID not found in open_conversation job metadata
+
+    # Wait for conversation to complete started (inactivity timeout)
+    Wait For Job Status    ${conv_job}[job_id]    finished    timeout=60s    interval=2s
+
+    # Retrieve the conversation
+    ${conversation}=    Get Conversation By ID    ${conversation_id}
+
+    # Verify conversation has transcript
+    Dictionary Should Contain Key    ${conversation}    transcript
+    ${transcript}=    Set Variable    ${conversation}[transcript]
+    Should Not Be Empty    ${transcript}    Conversation has empty transcript
+
+    # Verify transcript has content (at least 50 characters for meaningful speech)
+    ${transcript_text}=    Run Keyword If    isinstance($transcript, list)
+    ...    Set Variable    ${transcript}[0][text]
+    ...    ELSE    Set Variable    ${transcript}
+
+    ${transcript_length}=    Get Length    ${transcript_text}
+    Should Be True    ${transcript_length} >= 50    Transcript too short: ${transcript_length} characters (expected 50+)
+
+    Log    ✅ Conversation created with valid transcript: ${transcript_length} characters
+
+
+Stream Close Sends End Marker To Redis Stream
+    [Documentation]    Verify the producer actually sends end_marker when finalizing
+    ...                This is a low-level infrastructure test to catch the exact bug
+    [Tags]    audio-streaming	infra
+
+    ${device_name}=    Set Variable    end-marker-test
+    ${client_id}=    Get Client ID From Device Name    ${device_name}
+
+    # Open stream and send some audio
+    ${stream_id}=    Open Audio Stream    device_name=${device_name}
+    Send Audio Chunks To Stream    ${stream_id}    ${TEST_AUDIO_FILE}    num_chunks=50
+
+    # Get the audio stream name (where chunks are sent)
+    ${audio_stream_name}=    Set Variable    audio:stream:${client_id}
+
+    # Close stream - this MUST send end_marker
+    Close Audio Stream    ${stream_id}
+
+    # Allow time for end_marker to be written
+    Sleep    2s
+
+    # Read all messages from audio stream to find end_marker
+    # Note: Redis Command returns string output from redis-cli, not a list
+    ${xrange_output}=    Redis Command    XRANGE    ${audio_stream_name}    -    +
+
+    # Search for end_marker in the redis-cli output string
+    # redis-cli XRANGE returns text with field names, so we just check if end_marker appears
+    ${found_end_marker}=    Run Keyword And Return Status
+    ...    Should Contain    ${xrange_output}    end_marker
+    ...    ignore_case=True
+
+    Should Be True    ${found_end_marker}    end_marker NOT found in Redis stream ${audio_stream_name}! Producer.finalize_session() did not send end_marker. XRANGE output: ${xrange_output}
+
+    Log    ✅ end_marker successfully sent to Redis stream
+
+
+Streaming Consumer Closes Deepgram Connection On End Marker
+    [Documentation]    Verify streaming consumer detects end_marker and closes cleanly
+    ...                This tests the consumer side of the bug fix
+    [Tags]    audio-streaming	infra
+
+    ${device_name}=    Set Variable    consumer-close-test
+    ${client_id}=    Get Client ID From Device Name    ${device_name}
+
+    # Stream and close
+    ${stream_id}=    Open Audio Stream    device_name=${device_name}
+    Send Audio Chunks To Stream    ${stream_id}    ${TEST_AUDIO_FILE}    num_chunks=100
+    Close Audio Stream    ${stream_id}
+
+    # Wait for streaming consumer to process end_marker
+    Sleep    10s
+
+    # Check for Deepgram timeout errors in backend logs
+    # If end_marker works, we should NOT see timeout errors
+    ${logs}=    Get Backend Logs    since=30s
+
+    # Should NOT contain Deepgram timeout error
+    Should Not Contain    ${logs}    error 1011    Deepgram timeout error found - CloseStream was not sent! This indicates end_marker was not processed by streaming consumer.
+
+    Should Not Contain    ${logs}    Deepgram did not receive audio data or a text message within the timeout window    Deepgram timeout found - stream was not closed properly
+
+    Log    ✅ No Deepgram timeout errors - streaming consumer processed end_marker correctly
+
+
diff --git a/tests/libs/audio_stream_library.py b/tests/libs/audio_stream_library.py
index 25399175..f2fb1aab 100644
--- a/tests/libs/audio_stream_library.py
+++ b/tests/libs/audio_stream_library.py
@@ -44,6 +44,7 @@ def stream_audio_file(
     device_name: str = "robot-test",
     recording_mode: str = "streaming",
     use_wyoming: bool = True,
+    always_persist: bool = False,
 ) -> int:
     """Stream a WAV file via WebSocket (blocking)."""
     return _stream_audio_file(
@@ -53,6 +54,7 @@ def stream_audio_file(
         device_name=device_name,
         recording_mode=recording_mode,
         use_wyoming=use_wyoming,
+        always_persist=always_persist,
     )
 
 
@@ -65,6 +67,7 @@ def start_audio_stream(
     token: str,
     device_name: str = "robot-test",
     recording_mode: str = "streaming",
+    always_persist: bool = False,
 ) -> str:
     """Start a new audio stream (non-blocking)."""
     return _manager.start_stream(
@@ -72,6 +75,7 @@ def start_audio_stream(
         token=token,
         device_name=device_name,
         recording_mode=recording_mode,
+        always_persist=always_persist,
     )
 
 
@@ -136,6 +140,15 @@ def stop_audio_stream(stream_id: str) -> int:
     return _manager.stop_stream(stream_id)
 
 
+def close_audio_stream_without_stop(stream_id: str) -> int:
+    """Close WebSocket connection without sending audio-stop event.
+
+    This simulates abrupt disconnection (network failure, client crash)
+    and should trigger websocket_disconnect end_reason.
+    """
+    return _manager.close_stream_without_stop(stream_id)
+
+
 def cleanup_all_streams():
     """Stop all active streams."""
     _manager.cleanup_all()
diff --git a/tests/libs/auth_helpers.py b/tests/libs/auth_helpers.py
new file mode 100644
index 00000000..e9625d85
--- /dev/null
+++ b/tests/libs/auth_helpers.py
@@ -0,0 +1,41 @@
+"""Robot Framework library for authentication helpers.
+
+Provides utilities for working with JWT tokens and user authentication.
+"""
+
+import base64
+import json
+
+
+def get_user_id_from_token(jwt_token: str) -> str:
+    """Extract user ID from JWT token.
+
+    Args:
+        jwt_token: JWT token string (format: header.payload.signature)
+
+    Returns:
+        User ID from the 'sub' field in the token payload
+
+    Example:
+        ${user_id}=    Get User ID From Token    ${token}
+    """
+    # Split token into parts
+    parts = jwt_token.split('.')
+    if len(parts) != 3:
+        raise ValueError(f"Invalid JWT token format: expected 3 parts, got {len(parts)}")
+
+    # Decode payload (add padding if needed)
+    payload_b64 = parts[1]
+    padding = (4 - len(payload_b64) % 4) % 4
+    payload_b64_padded = payload_b64 + ('=' * padding)
+
+    # Base64 decode and parse JSON
+    payload_bytes = base64.urlsafe_b64decode(payload_b64_padded)
+    payload = json.loads(payload_bytes.decode('utf-8'))
+
+    # Extract user ID from 'sub' field
+    user_id = payload.get('sub')
+    if not user_id:
+        raise ValueError("Token payload does not contain 'sub' field")
+
+    return user_id
diff --git a/tests/libs/mock_llm_server.py b/tests/libs/mock_llm_server.py
new file mode 100755
index 00000000..d7eea5d9
--- /dev/null
+++ b/tests/libs/mock_llm_server.py
@@ -0,0 +1,328 @@
+#!/usr/bin/env python3
+"""
+Mock LLM Server - OpenAI-compatible HTTP server for testing.
+
+This server mimics OpenAI's API for chat completions and embeddings without external dependencies.
+
+Architecture:
+- HTTP server on 0.0.0.0:11435
+- Three endpoints: /v1/chat/completions, /v1/embeddings, /v1/models
+- Deterministic responses for reproducible tests
+
+Request Detection:
+- Fact extraction: system prompt contains "FACT_RETRIEVAL_PROMPT" or "extract facts"
+- Memory updates: system prompt contains "UPDATE_MEMORY_PROMPT" or "memory manager"
+"""
+
+import asyncio
+import json
+import logging
+import argparse
+import hashlib
+from typing import List
+from aiohttp import web
+import numpy as np
+
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+)
+logger = logging.getLogger(__name__)
+
+
+def generate_deterministic_embedding(text: str, dimensions: int = 1536) -> List[float]:
+    """
+    Generate deterministic embedding using hash seeding.
+
+    Same text always produces same embedding for reproducible tests.
+    Generates unit vector for cosine similarity compatibility.
+    """
+    # Use SHA-256 hash as seed
+    hash_bytes = hashlib.sha256(text.encode('utf-8')).digest()
+    seed = int.from_bytes(hash_bytes[:4], 'big')
+
+    # Generate reproducible random vector
+    rng = np.random.default_rng(seed)
+    vector = rng.standard_normal(dimensions)
+
+    # Normalize to unit vector (cosine similarity compatible)
+    norm = np.linalg.norm(vector)
+    return (vector / norm).tolist()
+
+
+def detect_request_type(messages: List[dict]) -> str:
+    """
+    Detect request type by analyzing system prompt.
+
+    Returns:
+    - "fact_extraction": For fact retrieval prompts
+    - "memory_update": For memory manager prompts
+    - "general": For other requests
+    """
+    if not messages:
+        return "general"
+
+    # Check first message (usually system prompt)
+    first_message = messages[0].get("content", "").lower()
+
+    # Fact extraction detection
+    if "fact_retrieval_prompt" in first_message or "extract facts" in first_message:
+        return "fact_extraction"
+
+    # Memory update detection
+    if "update_memory_prompt" in first_message or "memory manager" in first_message:
+        return "memory_update"
+
+    return "general"
+
+
+def create_fact_extraction_response() -> dict:
+    """Create fact extraction response (JSON format)."""
+    facts = [
+        "User likes hiking",
+        "User met with John",
+        "Discussed project timeline",
+        "User prefers morning meetings",
+        "User is working on Chronicle project"
+    ]
+
+    content = json.dumps({"facts": facts})
+
+    return {
+        "id": "chatcmpl-mock-fact",
+        "object": "chat.completion",
+        "created": 1234567890,
+        "model": "gpt-4o-mini",
+        "choices": [{
+            "index": 0,
+            "message": {
+                "role": "assistant",
+                "content": content
+            },
+            "finish_reason": "stop"
+        }],
+        "usage": {
+            "prompt_tokens": 100,
+            "completion_tokens": 50,
+            "total_tokens": 150
+        }
+    }
+
+
+def create_memory_update_response() -> dict:
+    """
+    Create memory update response (XML format).
+
+    Supports multiple XML formats:
+    - Plain XML: <result>...</result>
+    - Markdown code blocks: ```xml ... ```
+    - DeepSeek think tags: <think>...</think><result>...</result>
+    """
+    # Plain XML format (most common)
+    xml_content = """<result>
+  <memory>
+    <item id="0" event="UPDATE">
+      <text>User likes hiking in the mountains</text>
+      <old_memory>User likes hiking</old_memory>
+    </item>
+    <item id="1" event="CREATE">
+      <text>User prefers morning meetings before 10am</text>
+    </item>
+  </memory>
+</result>"""
+
+    return {
+        "id": "chatcmpl-mock-memory",
+        "object": "chat.completion",
+        "created": 1234567890,
+        "model": "gpt-4o-mini",
+        "choices": [{
+            "index": 0,
+            "message": {
+                "role": "assistant",
+                "content": xml_content
+            },
+            "finish_reason": "stop"
+        }],
+        "usage": {
+            "prompt_tokens": 150,
+            "completion_tokens": 80,
+            "total_tokens": 230
+        }
+    }
+
+
+def create_general_response(user_message: str) -> dict:
+    """Create general chat completion response."""
+    response_text = f"This is a mock response to: {user_message}"
+
+    return {
+        "id": "chatcmpl-mock-general",
+        "object": "chat.completion",
+        "created": 1234567890,
+        "model": "gpt-4o-mini",
+        "choices": [{
+            "index": 0,
+            "message": {
+                "role": "assistant",
+                "content": response_text
+            },
+            "finish_reason": "stop"
+        }],
+        "usage": {
+            "prompt_tokens": 50,
+            "completion_tokens": 20,
+            "total_tokens": 70
+        }
+    }
+
+
+async def handle_chat_completions(request: web.Request) -> web.Response:
+    """Handle /v1/chat/completions endpoint."""
+    try:
+        data = await request.json()
+        messages = data.get("messages", [])
+
+        # Detect request type
+        request_type = detect_request_type(messages)
+        logger.info(f"Chat completion request detected as: {request_type}")
+
+        # Generate appropriate response
+        if request_type == "fact_extraction":
+            response = create_fact_extraction_response()
+            logger.info("Returning fact extraction response")
+
+        elif request_type == "memory_update":
+            response = create_memory_update_response()
+            logger.info("Returning memory update response")
+
+        else:
+            user_content = messages[-1].get("content", "") if messages else ""
+            response = create_general_response(user_content)
+            logger.info("Returning general response")
+
+        return web.json_response(response)
+
+    except Exception as e:
+        logger.error(f"Error handling chat completions: {e}", exc_info=True)
+        return web.json_response(
+            {"error": {"message": str(e), "type": "server_error"}},
+            status=500
+        )
+
+
+async def handle_embeddings(request: web.Request) -> web.Response:
+    """Handle /v1/embeddings endpoint."""
+    try:
+        data = await request.json()
+        input_texts = data.get("input", [])
+
+        # Ensure input is a list
+        if isinstance(input_texts, str):
+            input_texts = [input_texts]
+
+        # Generate deterministic embeddings
+        embeddings_data = []
+        for idx, text in enumerate(input_texts):
+            embedding = generate_deterministic_embedding(text, dimensions=1536)
+            embeddings_data.append({
+                "object": "embedding",
+                "embedding": embedding,
+                "index": idx
+            })
+
+        logger.info(f"Generated {len(embeddings_data)} embeddings")
+
+        response = {
+            "object": "list",
+            "data": embeddings_data,
+            "model": "text-embedding-3-small",
+            "usage": {
+                "prompt_tokens": len(input_texts) * 10,
+                "total_tokens": len(input_texts) * 10
+            }
+        }
+
+        return web.json_response(response)
+
+    except Exception as e:
+        logger.error(f"Error handling embeddings: {e}", exc_info=True)
+        return web.json_response(
+            {"error": {"message": str(e), "type": "server_error"}},
+            status=500
+        )
+
+
+async def handle_models(request: web.Request) -> web.Response:
+    """Handle /v1/models endpoint."""
+    response = {
+        "object": "list",
+        "data": [
+            {
+                "id": "gpt-4o-mini",
+                "object": "model",
+                "created": 1234567890,
+                "owned_by": "mock-llm"
+            },
+            {
+                "id": "text-embedding-3-small",
+                "object": "model",
+                "created": 1234567890,
+                "owned_by": "mock-llm"
+            }
+        ]
+    }
+
+    logger.info("Returning available models")
+    return web.json_response(response)
+
+
+async def handle_health(request: web.Request) -> web.Response:
+    """Handle health check endpoint."""
+    return web.json_response({"status": "healthy"})
+
+
+def create_app() -> web.Application:
+    """Create aiohttp application with routes."""
+    app = web.Application()
+
+    # OpenAI-compatible routes
+    app.router.add_post('/v1/chat/completions', handle_chat_completions)
+    app.router.add_post('/v1/embeddings', handle_embeddings)
+    app.router.add_get('/v1/models', handle_models)
+
+    # Health check
+    app.router.add_get('/health', handle_health)
+
+    return app
+
+
+def main(host: str, port: int):
+    """Start HTTP server."""
+    logger.info(f"Starting Mock LLM Server on {host}:{port}")
+    logger.info(f"OpenAI-compatible endpoints:")
+    logger.info(f"  - POST /v1/chat/completions")
+    logger.info(f"  - POST /v1/embeddings")
+    logger.info(f"  - GET /v1/models")
+    logger.info(f"  - GET /health")
+    logger.info(f"Deterministic embeddings: 1536 dimensions")
+
+    app = create_app()
+    web.run_app(app, host=host, port=port, access_log=logger)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Mock LLM Server")
+    parser.add_argument("--host", default="0.0.0.0", help="Server host (default: 0.0.0.0)")
+    parser.add_argument("--port", type=int, default=11435, help="Server port (default: 11435)")
+    parser.add_argument("--debug", action="store_true", help="Enable debug logging")
+
+    args = parser.parse_args()
+
+    if args.debug:
+        logger.setLevel(logging.DEBUG)
+
+    try:
+        main(args.host, args.port)
+    except KeyboardInterrupt:
+        logger.info("Server stopped by user")
diff --git a/tests/libs/mock_streaming_stt_server.py b/tests/libs/mock_streaming_stt_server.py
new file mode 100755
index 00000000..8faf492a
--- /dev/null
+++ b/tests/libs/mock_streaming_stt_server.py
@@ -0,0 +1,190 @@
+#!/usr/bin/env python3
+"""
+Mock Streaming STT Server - Deepgram-compatible WebSocket server for testing.
+
+This server mimics Deepgram's streaming transcription API with nested JSON responses
+that match the extraction paths used in the config (e.g., channel.alternatives[0].transcript).
+
+Architecture:
+- Async WebSocket server on 0.0.0.0:9999
+- Sends interim results every 10 audio chunks
+- Sends final results on CloseStream with >2s duration and >5 words (speech detection thresholds)
+"""
+
+import asyncio
+import json
+import logging
+import argparse
+from typing import Optional
+import websockets
+from websockets.server import WebSocketServerProtocol
+
+logging.basicConfig(
+    level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+)
+logger = logging.getLogger(__name__)
+
+
+def create_deepgram_response(
+    transcript: str,
+    is_final: bool,
+    words: Optional[list] = None,
+    confidence: float = 0.99
+) -> dict:
+    """
+    Create Deepgram-compatible nested response format.
+
+    Format matches extraction paths in config:
+    - channel.alternatives[0].transcript
+    - channel.alternatives[0].words
+    """
+    if words is None:
+        # Generate word timestamps from transcript
+        words = []
+        current_time = 0.0
+        for word in transcript.split():
+            words.append({
+                "word": word,
+                "start": current_time,
+                "end": current_time + 0.3,
+                "confidence": confidence
+            })
+            current_time += 0.35  # 0.3s word + 0.05s gap
+
+    return {
+        "type": "Results",
+        "is_final": is_final,
+        "channel": {
+            "alternatives": [{
+                "transcript": transcript,
+                "confidence": confidence,
+                "words": words
+            }]
+        }
+    }
+
+
+def create_final_response() -> dict:
+    """
+    Create final response with >2s duration and >5 words.
+
+    Speech detection thresholds (docker-compose-test.yml):
+    - SPEECH_DETECTION_MIN_DURATION: 2.0s
+    - SPEECH_DETECTION_MIN_WORDS: 5
+    """
+    # Create 7 words spanning >2.0 seconds
+    words = []
+    transcript_words = ["This", "is", "a", "test", "conversation", "about", "hiking"]
+
+    current_time = 0.0
+    for word in transcript_words:
+        words.append({
+            "word": word,
+            "start": current_time,
+            "end": current_time + 0.35,
+            "confidence": 0.99
+        })
+        current_time += 0.4  # 0.35s word + 0.05s gap
+
+    # Final timestamp should be >2.0s
+    assert words[-1]["end"] > 2.0, f"Duration {words[-1]['end']}s must be >2.0s"
+
+    transcript = " ".join(transcript_words)
+
+    return create_deepgram_response(
+        transcript=transcript,
+        is_final=True,
+        words=words,
+        confidence=0.99
+    )
+
+
+async def handle_client(websocket: WebSocketServerProtocol):
+    """Handle WebSocket client connection."""
+    client_id = f"{websocket.remote_address[0]}:{websocket.remote_address[1]}"
+    logger.info(f"Client connected: {client_id}")
+
+    chunk_count = 0
+
+    try:
+        # Send initial empty result
+        initial = create_deepgram_response(transcript="", is_final=False)
+        await websocket.send(json.dumps(initial))
+        logger.debug(f"Sent initial result to {client_id}")
+
+        async for message in websocket:
+            # Handle binary audio chunks
+            if isinstance(message, bytes):
+                chunk_count += 1
+                logger.debug(f"Received audio chunk {chunk_count} from {client_id}")
+
+                # Send interim results every 10 chunks
+                if chunk_count % 10 == 0:
+                    interim = create_deepgram_response(
+                        transcript=f"Interim transcription chunk {chunk_count // 10}",
+                        is_final=False
+                    )
+                    await websocket.send(json.dumps(interim))
+                    logger.debug(f"Sent interim result to {client_id}")
+
+            # Handle control messages
+            elif isinstance(message, str):
+                try:
+                    data = json.loads(message)
+                    msg_type = data.get("type")
+
+                    if msg_type == "CloseStream":
+                        logger.info(f"Received CloseStream from {client_id}")
+
+                        # Send final result with >2s duration and >5 words
+                        final = create_final_response()
+                        await websocket.send(json.dumps(final))
+                        logger.info(f"Sent final result to {client_id}: {final['channel']['alternatives'][0]['transcript']}")
+
+                        # Close connection gracefully
+                        await websocket.close()
+                        break
+
+                    else:
+                        logger.warning(f"Unknown message type from {client_id}: {msg_type}")
+
+                except json.JSONDecodeError:
+                    logger.error(f"Invalid JSON from {client_id}: {message}")
+
+    except websockets.exceptions.ConnectionClosed:
+        logger.info(f"Client disconnected: {client_id}")
+
+    except Exception as e:
+        logger.error(f"Error handling client {client_id}: {e}", exc_info=True)
+
+    finally:
+        logger.info(f"Connection closed: {client_id}, processed {chunk_count} chunks")
+
+
+async def main(host: str, port: int):
+    """Start WebSocket server."""
+    logger.info(f"Starting Mock Streaming STT Server on {host}:{port}")
+    logger.info(f"Deepgram-compatible nested response format")
+    logger.info(f"Speech detection: >2.0s duration, >5 words")
+
+    async with websockets.serve(handle_client, host, port):
+        logger.info(f"Server ready and listening on ws://{host}:{port}")
+        await asyncio.Future()  # Run forever
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Mock Streaming STT Server")
+    parser.add_argument("--host", default="0.0.0.0", help="Server host (default: 0.0.0.0)")
+    parser.add_argument("--port", type=int, default=9999, help="Server port (default: 9999)")
+    parser.add_argument("--debug", action="store_true", help="Enable debug logging")
+
+    args = parser.parse_args()
+
+    if args.debug:
+        logger.setLevel(logging.DEBUG)
+
+    try:
+        asyncio.run(main(args.host, args.port))
+    except KeyboardInterrupt:
+        logger.info("Server stopped by user")
diff --git a/tests/libs/mongodb_helper.py b/tests/libs/mongodb_helper.py
new file mode 100644
index 00000000..aaa91cd0
--- /dev/null
+++ b/tests/libs/mongodb_helper.py
@@ -0,0 +1,106 @@
+"""
+MongoDB helper functions for Robot Framework tests.
+
+Provides direct MongoDB access for verifying audio chunk storage.
+"""
+
+import os
+from pathlib import Path
+from pymongo import MongoClient
+from dotenv import load_dotenv
+
+# Load test environment variables
+tests_dir = Path(__file__).parent.parent
+load_dotenv(tests_dir / ".env.test", override=False)
+
+
+def get_mongodb_uri():
+    """Get MongoDB URI from environment."""
+    return os.getenv("MONGODB_URI", "mongodb://localhost:27018")
+
+
+def get_db_name():
+    """Get database name from environment."""
+    return os.getenv("TEST_DB_NAME", "test_db")
+
+
+def get_audio_chunks(conversation_id):
+    """
+    Get all audio chunks for a conversation from MongoDB.
+
+    Args:
+        conversation_id: Conversation ID to query
+
+    Returns:
+        List of audio chunk documents (as dictionaries)
+    """
+    client = MongoClient(get_mongodb_uri())
+    db = client[get_db_name()]
+
+    try:
+        # Query audio_chunks collection
+        chunks = list(db.audio_chunks.find(
+            {"conversation_id": conversation_id},
+            sort=[("chunk_index", 1)]
+        ))
+
+        # Convert ObjectId to string and Binary to bytes length for Robot Framework
+        for chunk in chunks:
+            if "_id" in chunk:
+                chunk["_id"] = str(chunk["_id"])
+
+            # Convert binary audio_data to length (Robot can't handle binary)
+            if "audio_data" in chunk:
+                chunk["audio_data_length"] = len(chunk["audio_data"])
+                # Keep reference but don't pass actual binary data
+                chunk["audio_data"] = f"<Binary data: {len(chunk['audio_data'])} bytes>"
+
+        return chunks
+
+    finally:
+        client.close()
+
+
+def get_conversation_chunk_count(conversation_id):
+    """
+    Get the count of audio chunks for a conversation.
+
+    Args:
+        conversation_id: Conversation ID to query
+
+    Returns:
+        Number of chunks
+    """
+    client = MongoClient(get_mongodb_uri())
+    db = client[get_db_name()]
+
+    try:
+        count = db.audio_chunks.count_documents({"conversation_id": conversation_id})
+        return count
+    finally:
+        client.close()
+
+
+def verify_chunks_exist(conversation_id, min_chunks=1):
+    """
+    Verify that audio chunks exist for a conversation.
+
+    Args:
+        conversation_id: Conversation ID to verify
+        min_chunks: Minimum number of chunks expected
+
+    Returns:
+        True if chunks exist and meet minimum count
+
+    Raises:
+        AssertionError if chunks don't meet requirements
+    """
+    chunks = get_audio_chunks(conversation_id)
+    actual_count = len(chunks)
+
+    if actual_count < min_chunks:
+        raise AssertionError(
+            f"Expected at least {min_chunks} chunks, found {actual_count}"
+        )
+
+    return True
diff --git a/tests/logs/.gitkeep b/tests/logs/.gitkeep
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/mocks/__init__.py b/tests/mocks/__init__.py
new file mode 100644
index 00000000..472a0895
--- /dev/null
+++ b/tests/mocks/__init__.py
@@ -0,0 +1 @@
+"""Mock implementations for testing."""
diff --git a/tests/mocks/mock_speaker_client.py b/tests/mocks/mock_speaker_client.py
new file mode 100644
index 00000000..e53a556e
--- /dev/null
+++ b/tests/mocks/mock_speaker_client.py
@@ -0,0 +1,160 @@
+"""Mock speaker recognition client for testing without heavy ML dependencies."""
+
+import logging
+from typing import Dict, Optional
+
+logger = logging.getLogger(__name__)
+
+
+class MockSpeakerRecognitionClient:
+    """
+    Mock speaker recognition client that returns pre-computed segments.
+
+    Used in test environments to avoid running resource-intensive speaker
+    recognition service. Segments are based on test_data.py expectations.
+    """
+
+    # Map audio filenames to mock segment data
+    # Segments follow the structure expected by the backend:
+    # {
+    #   "start": float,          # Start time in seconds
+    #   "end": float,            # End time in seconds
+    #   "text": str,             # Transcript text for this segment
+    #   "speaker": int,          # Speaker label (0, 1, 2, etc.)
+    #   "identified_as": str,    # Speaker name or "Unknown"
+    #   "confidence": float      # Optional confidence score
+    # }
+
+    MOCK_SEGMENTS = {
+        "DIY_Experts_Glass_Blowing_16khz_mono_1min.wav": [
+            {
+                "start": 0.0,
+                "end": 10.08,
+                "speaker": 0,
+                "identified_as": "Unknown",
+                "text": "The pumpkin that'll last for forever. Finally. Does it count? Today, we're taking a glass blowing class.",
+                "confidence": 0.95
+            },
+            {
+                "start": 10.28,
+                "end": 20.255,
+                "speaker": 0,
+                "identified_as": "Unknown",
+                "text": "I'm sweating already. We've worked with a lot of materials before, but we've only scratched the surface",
+                "confidence": 0.93
+            },
+            {
+                "start": 20.455,
+                "end": 21.895,
+                "speaker": 1,
+                "identified_as": "Unknown",
+                "text": "when it comes to glass",
+                "confidence": 0.91
+            },
+            {
+                "start": 22.095,
+                "end": 23.615,
+                "speaker": 0,
+                "identified_as": "Unknown",
+                "text": "and that's because",
+                "confidence": 0.94
+            },
+            {
+                "start": 23.815,
+                "end": 28.135,
+                "speaker": 1,
+                "identified_as": "Unknown",
+                "text": "a little intimidating. We've got about 400 pounds",
+                "confidence": 0.92
+            },
+            {
+                "start": 28.335,
+                "end": 43.08,
+                "speaker": 0,
+                "identified_as": "Unknown",
+                "text": "of liquid glass in this furnace right here. Nick's gonna really help us out. Nick, I'm excited and nervous. Me too.",
+                "confidence": 0.96
+            },
+            {
+                "start": 43.28,
+                "end": 44.48,
+                "speaker": 1,
+                "identified_as": "Unknown",
+                "text": "So we're gonna",
+                "confidence": 0.90
+            },
+            {
+                "start": 44.68,
+                "end": 46.76,
+                "speaker": 0,
+                "identified_as": "Unknown",
+                "text": "make what's called a trumpet",
+                "confidence": 0.95
+            },
+            {
+                "start": 46.96,
+                "end": 50.24,
+                "speaker": 0,
+                "identified_as": "Unknown",
+                "text": "flower. We're using gravity as a tool.",
+                "confidence": 0.93
+            }
+        ]
+    }
+
+    def __init__(self):
+        """Initialize mock client."""
+        logger.info("🎤 Mock speaker recognition client initialized")
+
+    async def diarize_identify_match(
+        self,
+        conversation_id: str,
+        backend_token: str,
+        transcript_data: Dict,
+        user_id: Optional[str] = None
+    ) -> Dict:
+        """
+        Return pre-computed mock segments for known test audio files.
+
+        Args:
+            conversation_id: Not used in mock (audio filename derived from transcript)
+            backend_token: Not used in mock
+            transcript_data: Dict with 'text' and 'words' - used to identify audio file
+            user_id: Not used in mock
+
+        Returns:
+            Dictionary with 'segments' array matching speaker service format
+        """
+        logger.info(f"🎤 Mock speaker client processing conversation: {conversation_id[:12]}...")
+
+        # Try to identify which test audio this is from the transcript
+        transcript_text = transcript_data.get("text", "").lower()
+
+        # Match by transcript content
+        if "glass blowing" in transcript_text or "glass" in transcript_text:
+            filename = "DIY_Experts_Glass_Blowing_16khz_mono_1min.wav"
+            if filename in self.MOCK_SEGMENTS:
+                segments = self.MOCK_SEGMENTS[filename]
+                logger.info(f"🎤 Mock returning {len(segments)} segments for DIY Glass Blowing audio")
+                return {"segments": segments}
+
+        # Fallback: Create single generic segment
+        logger.warning(f"🎤 Mock: No pre-computed segments found, creating generic segment")
+
+        # Get duration from words if available
+        words = transcript_data.get("words", [])
+        if words:
+            duration = words[-1].get("end", 60.0)
+        else:
+            duration = 60.0
+
+        return {
+            "segments": [{
+                "start": 0.0,
+                "end": duration,
+                "speaker": 0,
+                "identified_as": "Unknown",
+                "text": transcript_data.get("text", ""),
+                "confidence": 0.85
+            }]
+        }
diff --git a/tests/resources/audio_keywords.robot b/tests/resources/audio_keywords.robot
index 2d37fcbc..c752e511 100644
--- a/tests/resources/audio_keywords.robot
+++ b/tests/resources/audio_keywords.robot
@@ -43,25 +43,51 @@ Upload Audio File
       ${upload_response}=    Set Variable    ${response.json()}
       Log    Parsed upload response: ${upload_response}
 
-      # Validate upload was successful
-      Should Be Equal As Strings    ${upload_response['summary']['processing']}    1    Upload failed: No files enqueued
-      Should Be Equal As Strings    ${upload_response['files'][0]['status']}    processing    Upload failed: ${response.text}
+      # Check HTTP status code first - fail immediately with clear error message
+      IF    ${response.status_code} == 400
+          ${error_msg}=    Set Variable    ${upload_response['files'][0]['error']}
+          Fail    Upload failed (HTTP 400): All files failed - ${error_msg}
+      END
+
+      IF    ${response.status_code} == 207
+          ${error_msg}=    Set Variable    Partial upload failure - check logs
+          Log    WARN: Partial upload (HTTP 207): ${upload_response['summary']['failed']} of ${upload_response['summary']['total']} files failed
+          # Continue anyway since some files succeeded
+      END
+
+      # Validate upload was successful (should be 200 or 207 at this point)
+      Should Be Equal As Strings    ${upload_response['summary']['started']}    1    Upload failed: No files enqueued
+      Should Be Equal As Strings    ${upload_response['files'][0]['status']}    started    Upload failed: ${upload_response['files'][0].get('error', 'Unknown error')}
 
       # Extract important values
-      ${audio_uuid}=    Set Variable    ${upload_response['files'][0]['audio_uuid']}
       ${job_id}=        Set Variable    ${upload_response['files'][0]['conversation_id']}
       ${transcript_job_id}=    Set Variable    ${upload_response['files'][0]['transcript_job_id']}
-      Log    Audio UUID: ${audio_uuid}
       Log    Conversation ID: ${job_id}
       Log    Transcript Job ID: ${transcript_job_id}
 
-      # Wait for conversation to be created and transcribed
-      Log    Waiting for transcription to complete...
+      # Check if transcript_job_id is None (job not created)
+      ${is_none}=    Evaluate    $transcript_job_id is None or str($transcript_job_id) == 'None'
 
-      Wait Until Keyword Succeeds    60s    5s       Check job status   ${transcript_job_id}    completed
+      IF    ${is_none}
+          # Transcript job not created - skip job waiting and poll for conversation directly
+          Log    Transcript job ID is None - transcription job not created. Polling for conversation directly...
+
+          # Poll for conversation to appear (max 60s)
+          Wait Until Keyword Succeeds    60s    5s
+          ...    Get Conversation By ID    ${job_id}
+
+          ${conversation}=    Get Conversation By ID    ${job_id}
+          Log    Found conversation (without job tracking): ${conversation}
+          RETURN    ${conversation}
+      END
+
+      # Normal path: Wait for transcription job to complete
+      Log    Waiting for transcription job ${transcript_job_id} to complete...
+
+      Wait Until Keyword Succeeds    60s    5s       Check job status   ${transcript_job_id}    finished
       ${job}=    Get Job Details    ${transcript_job_id}
 
-     # Get the completed conversation
+     # Get the finished conversation
       ${conversation}=     Get Conversation By ID    ${job}[result][conversation_id]
       Should Not Be Equal    ${conversation}    ${None}    Conversation not found after upload and processing
 
@@ -70,7 +96,7 @@ Upload Audio File
 
 
 Upload Audio File And Wait For Memory
-    [Documentation]    Upload audio file and wait for complete processing including memory extraction.
+    [Documentation]    Upload audio file and wait for complete started including memory extraction.
     ...                This is for E2E testing - use Upload Audio File for upload-only tests.
     ...                Performs assertions inline to verify successful memory extraction.
     [Arguments]    ${audio_file_path}    ${device_name}=robot-test    ${folder}=.    ${min_memories}=1
@@ -98,9 +124,9 @@ Upload Audio File And Wait For Memory
     Should Be True    ${result}[success]
     ...    Memory extraction failed: ${result.get('error_message', 'Unknown error')}
 
-    # Verify job completed successfully
-    Should Be Equal As Strings    ${result}[status]    completed
-    ...    Expected job status 'completed', got '${result}[status]'
+    # Verify job finished successfully
+    Should Be Equal As Strings    ${result}[status]    finished
+    ...    Expected job status 'finished', got '${result}[status]'
 
     # Verify minimum memories were extracted
     ${memory_count}=    Set Variable    ${result}[memory_count]
@@ -111,11 +137,3 @@ Upload Audio File And Wait For Memory
     Log    Successfully extracted ${memory_count} memories
 
     RETURN    ${conversation}    ${memories}
-
-
-Get Cropped Audio Info
-    [Documentation]    Get cropped audio information for a conversation
-    [Arguments]     ${audio_uuid}
-
-    ${response}=    GET On Session    api    /api/conversations/${audio_uuid}/cropped    headers=${headers}
-    RETURN    ${response.json()}[cropped_audios]    
diff --git a/tests/resources/conversation_keywords.robot b/tests/resources/conversation_keywords.robot
index 3b8e4632..f83efc6e 100644
--- a/tests/resources/conversation_keywords.robot
+++ b/tests/resources/conversation_keywords.robot
@@ -16,10 +16,41 @@ Get User Conversations
     ${response}=    GET On Session    api    /api/conversations    expected_status=200
     RETURN    ${response.json()}[conversations]
 
+Get Conversations By Client ID
+    [Documentation]    Get conversations filtered by client_id
+    ...                Returns only conversations matching the specified client_id
+    [Arguments]    ${client_id}
+
+    ${all_conversations}=    Get User Conversations
+    ${filtered}=    Create List
+
+    FOR    ${conv}    IN    @{all_conversations}
+        ${conv_client_id}=    Set Variable    ${conv}[client_id]
+        IF    '${conv_client_id}' == '${client_id}'
+            Append To List    ${filtered}    ${conv}
+        END
+    END
+
+    RETURN    ${filtered}
+
+Wait For Conversation By Client ID
+    [Documentation]    Wait for at least one conversation to exist for the given client_id.
+    ...                Polls until a conversation is found or timeout is reached.
+    ...                Returns the list of conversations for that client.
+    [Arguments]    ${client_id}    ${expected_count}=1
+
+    ${conversations}=    Get Conversations By Client ID    ${client_id}
+    ${count}=    Get Length    ${conversations}
+
+    Should Be True    ${count} >= ${expected_count}
+    ...    Expected at least ${expected_count} conversation(s) for client ${client_id}, found ${count}
+
+    RETURN    ${conversations}
+
 Get Conversation By ID
     [Documentation]    Get a specific conversation by ID
     [Arguments]       ${conversation_id}
-    ${response}=    GET On Session    api    /api/conversations/${conversation_id} 
+    ${response}=    GET On Session    api    /api/conversations/${conversation_id}
     RETURN    ${response.json()}[conversation]
 
 Get Conversation Versions
@@ -48,7 +79,7 @@ Reprocess Transcript
     ${initial_status}=    Set Variable    ${reprocess_data}[status]
 
     Log    Reprocess job created: ${job_id} with status: ${initial_status}    INFO
-    Should Be True    '${initial_status}' in ['queued', 'processing']    Status should be 'queued' or 'processing', got: ${initial_status}
+    Should Be True    '${initial_status}' in ['queued', 'started']    Status should be 'queued' or 'started', got: ${initial_status}
 
     RETURN    ${response.json()}
 
@@ -76,9 +107,9 @@ Activate Memory Version
 
 Delete Conversation
     [Documentation]    Delete a conversation
-    [Arguments]     ${audio_uuid}
+    [Arguments]     ${conversation_id}
 
-    ${response}=    DELETE On Session    api    /api/conversations/${audio_uuid}    headers=${headers}
+    ${response}=    DELETE On Session    api    /api/conversations/${conversation_id}    headers=${headers}
     RETURN    ${response.json()}
 
 Delete Conversation Version
@@ -95,24 +126,17 @@ Close Current Conversation
     ${response}=    POST On Session    api    /api/conversations/${client_id}/close    headers=${headers}
     RETURN    ${response.json()}
 
-Get Cropped Audio Info
-    [Documentation]    Get cropped audio information for a conversation
-    [Arguments]     ${audio_uuid}
-
-    ${response}=    GET On Session    api    /api/conversations/${audio_uuid}/cropped    headers=${headers}
-    RETURN    ${response.json()}[cropped_audios]    
-
 Add Speaker To Conversation
     [Documentation]    Add a speaker to the speakers_identified list
-    [Arguments]    ${audio_uuid}    ${speaker_id}
+    [Arguments]    ${conversation_id}    ${speaker_id}
     &{params}=     Create Dictionary    speaker_id=${speaker_id}
 
-    ${response}=    POST On Session    api    /api/conversations/${audio_uuid}/speakers    headers=${headers}    params=${params}
+    ${response}=    POST On Session    api    /api/conversations/${conversation_id}/speakers    headers=${headers}    params=${params}
     RETURN    ${response.json()}
 
 Update Transcript Segment
     [Documentation]    Update a specific transcript segment
-    [Arguments]    ${audio_uuid}    ${segment_index}    ${speaker_id}=${None}    ${start_time}=${None}    ${end_time}=${None}
+    [Arguments]    ${conversation_id}    ${segment_index}    ${speaker_id}=${None}    ${start_time}=${None}    ${end_time}=${None}
     &{params}=     Create Dictionary
 
     IF    '${speaker_id}' != '${None}'
@@ -125,12 +149,12 @@ Update Transcript Segment
         Set To Dictionary    ${params}    end_time=${end_time}
     END
 
-    ${response}=    PUT On Session    api    /api/conversations/${audio_uuid}/transcript/${segment_index}    headers=${headers}    params=${params}
+    ${response}=    PUT On Session    api    /api/conversations/${conversation_id}/transcript/${segment_index}    headers=${headers}    params=${params}
     RETURN    ${response.json()}
 
 
 Create Test Conversation
-    [Documentation]    Create a test conversation by processing a test audio file
+    [Documentation]    Create a test conversation by started a test audio file
     [Arguments]     ${device_name}=test-device
 
     # Upload test audio file to create a conversation
@@ -161,7 +185,7 @@ Find Test Conversation
     Log    No conversations found, creating one by uploading test audio
     ${conversation}=    Upload Audio File    ${TEST_AUDIO_FILE}    ${TEST_DEVICE_NAME}
 
-    # Wait for initial processing to complete
+    # Wait for initial started to complete
     Sleep    5s
 
     RETURN    ${conversation}
@@ -174,3 +198,56 @@ Check Conversation Has End Reason
     ${end_reason}=    Set Variable    ${conversation}[end_reason]
     Should Not Be Equal As Strings    ${end_reason}    None    msg=End reason not set yet
     RETURN    ${conversation}
+
+Conversation Should Have End Reason
+    [Documentation]    Verify conversation has specific end_reason value
+    ...
+    ...    This keyword checks if the conversation's end_reason field matches the expected value.
+    [Arguments]    ${conversation_id}    ${expected_end_reason}
+
+    ${conversation}=    Get Conversation By ID    ${conversation_id}
+    ${actual_end_reason}=    Set Variable    ${conversation}[end_reason]
+    Should Be Equal As Strings    ${actual_end_reason}    ${expected_end_reason}
+    ...    msg=Expected end_reason '${expected_end_reason}', got '${actual_end_reason}'
+
+Verify Conversation Processing Status
+    [Documentation]    Verify conversation has expected processing_status value
+    [Arguments]    ${conversation_id}    ${expected_status}
+
+    ${conversation}=    Get Conversation By ID    ${conversation_id}
+
+    Should Contain    ${conversation}    processing_status
+    Should Be Equal As Strings    ${conversation}[processing_status]    ${expected_status}
+    ...    Expected processing_status='${expected_status}', got '${conversation}[processing_status]'
+
+    Log    ✅ Conversation ${conversation_id} has processing_status='${expected_status}'
+
+Verify Conversation Always Persist Flag
+    [Documentation]    Verify conversation has always_persist=True
+    [Arguments]    ${conversation_id}
+
+    ${conversation}=    Get Conversation By ID    ${conversation_id}
+
+    Should Contain    ${conversation}    always_persist
+    Should Be True    ${conversation}[always_persist]
+    ...    Expected always_persist=True, got ${conversation}[always_persist]
+
+    Log    ✅ Conversation ${conversation_id} has always_persist=True
+
+Verify Placeholder Conversation Title
+    [Documentation]    Verify conversation has placeholder title
+    [Arguments]    ${conversation_id}
+
+    ${conversation}=    Get Conversation By ID    ${conversation_id}
+
+    # Placeholder title can be either "Processing..." or "Transcription Failed"
+    ${title}=    Set Variable    ${conversation}[title]
+    ${has_processing}=    Run Keyword And Return Status    Should Contain    ${title}    Processing
+    ${has_failed}=    Run Keyword And Return Status    Should Contain    ${title}    Transcription Failed
+
+    ${is_placeholder}=    Evaluate    ${has_processing} or ${has_failed}
+
+    Should Be True    ${is_placeholder}
+    ...    Expected placeholder title, got: ${title}
+
+    Log    ✅ Conversation has placeholder title: ${title}
diff --git a/tests/resources/memory_keywords.robot b/tests/resources/memory_keywords.robot
index 2ab79d9c..a18cc7cf 100644
--- a/tests/resources/memory_keywords.robot
+++ b/tests/resources/memory_keywords.robot
@@ -115,7 +115,7 @@ Wait For Memory Extraction
     ...                {
     ...                  'success': True/False,
     ...                  'error_message': 'Error description' (only if success=False),
-    ...                  'status': 'completed'/'failed'/'timeout'/'not_found',
+    ...                  'status': 'finished'/'failed'/'timeout'/'not_found',
     ...                  'job': {job object} (if available),
     ...                  'memories': [list of memories] (if successful),
     ...                  'memory_count': int (if successful)
@@ -170,9 +170,9 @@ Wait For Memory Extraction
         ${final_job}=    Set Variable    ${job}
         ${final_status}=    Set Variable    ${status}
 
-        # Success case - job completed
-        IF    '${status}' == 'completed' or '${status}' == 'finished'
-            Log    Memory job completed successfully
+        # Success case - job finished
+        IF    '${status}' == 'finished' or '${status}' == 'finished'
+            Log    Memory job finished successfully
             BREAK
         END
 
@@ -181,7 +181,7 @@ Wait For Memory Extraction
             ${error_info}=    Evaluate    $job.get('exc_info', 'Unknown error')
             ${result}=    Create Dictionary
             ...    success=${False}
-            ...    error_message=Memory job failed during processing: ${error_info}
+            ...    error_message=Memory job failed during started: ${error_info}
             ...    status=failed
             ...    job=${job}
             RETURN    ${result}
@@ -215,7 +215,7 @@ Wait For Memory Extraction
         # Return success result
         ${result}=    Create Dictionary
         ...    success=${True}
-        ...    status=completed
+        ...    status=finished
         ...    job=${final_job}
         ...    memories=${memories}
         ...    memory_count=${memory_count}
diff --git a/tests/resources/mongodb_keywords.robot b/tests/resources/mongodb_keywords.robot
new file mode 100644
index 00000000..58a1f991
--- /dev/null
+++ b/tests/resources/mongodb_keywords.robot
@@ -0,0 +1,118 @@
+*** Settings ***
+Documentation    MongoDB Audio Chunk Verification Keywords
+...
+...              Keywords for verifying MongoDB audio chunk storage.
+...              Used to test the MongoDB migration from disk-based WAV files.
+Library          Collections
+Library          ../libs/mongodb_helper.py
+Resource         session_keywords.robot
+Resource         conversation_keywords.robot
+
+
+*** Keywords ***
+
+Get Audio Chunks For Conversation
+    [Documentation]    Retrieve audio chunks from MongoDB for a conversation
+    [Arguments]    ${conversation_id}
+
+    ${chunks}=    Get Audio Chunks    ${conversation_id}
+    RETURN    ${chunks}
+
+
+Verify Audio Chunks Exist
+    [Documentation]    Verify that audio chunks exist in MongoDB for a conversation
+    [Arguments]    ${conversation_id}    ${min_chunks}=1
+
+    ${chunks}=    Get Audio Chunks For Conversation    ${conversation_id}
+    ${chunk_count}=    Get Length    ${chunks}
+
+    Should Be True    ${chunk_count} >= ${min_chunks}
+    ...    Expected at least ${min_chunks} chunks, found ${chunk_count}
+
+    Log    ✅ Found ${chunk_count} audio chunks in MongoDB for conversation ${conversation_id}
+    RETURN    ${chunks}
+
+
+Verify Audio Chunk Metadata
+    [Documentation]    Verify chunk has correct metadata structure
+    [Arguments]    ${chunk}
+
+    # Verify required fields exist
+    Dictionary Should Contain Key    ${chunk}    conversation_id
+    Dictionary Should Contain Key    ${chunk}    chunk_index
+    Dictionary Should Contain Key    ${chunk}    original_size
+    Dictionary Should Contain Key    ${chunk}    compressed_size
+    Dictionary Should Contain Key    ${chunk}    start_time
+    Dictionary Should Contain Key    ${chunk}    end_time
+    Dictionary Should Contain Key    ${chunk}    duration
+    Dictionary Should Contain Key    ${chunk}    sample_rate
+    Dictionary Should Contain Key    ${chunk}    channels
+
+    # Verify field values are valid
+    Should Be True    ${chunk}[chunk_index] >= 0
+    Should Be True    ${chunk}[original_size] > 0
+    Should Be True    ${chunk}[compressed_size] > 0
+    Should Be True    ${chunk}[duration] > 0
+    Should Be Equal As Integers    ${chunk}[sample_rate]    16000
+    Should Be Equal As Integers    ${chunk}[channels]    1
+
+    Log    ✅ Chunk ${chunk}[chunk_index]: ${chunk}[duration]s duration
+
+
+Verify Chunks Are Sequential
+    [Documentation]    Verify chunks have sequential chunk_index values
+    [Arguments]    ${chunks}
+
+    ${chunk_count}=    Get Length    ${chunks}
+    Should Be True    ${chunk_count} > 0    No chunks to verify
+
+    # Sort by chunk_index
+    ${sorted_chunks}=    Evaluate    sorted(${chunks}, key=lambda x: x['chunk_index'])
+
+    # Verify sequential numbering starting from 0
+    FOR    ${i}    IN RANGE    ${chunk_count}
+        ${chunk}=    Set Variable    ${sorted_chunks}[${i}]
+        Should Be Equal As Integers    ${chunk}[chunk_index]    ${i}
+        ...    Chunk index mismatch: expected ${i}, got ${chunk}[chunk_index]
+    END
+
+    Log    ✅ ${chunk_count} chunks are sequential (0 to ${chunk_count - 1})
+
+
+Calculate Total Audio Size
+    [Documentation]    Calculate total original and compressed audio size from chunks
+    [Arguments]    ${chunks}
+
+    ${total_original}=    Set Variable    ${0}
+    ${total_compressed}=    Set Variable    ${0}
+
+    FOR    ${chunk}    IN    @{chunks}
+        ${total_original}=    Evaluate    ${total_original} + ${chunk}[original_size]
+        ${total_compressed}=    Evaluate    ${total_compressed} + ${chunk}[compressed_size]
+    END
+
+    ${overall_ratio}=    Evaluate    ${total_compressed} / ${total_original} if ${total_original} > 0 else 0
+    ${savings_percent}=    Evaluate    (1 - ${overall_ratio}) * 100
+
+    Log    📦 Total audio: ${total_original} bytes (PCM) → ${total_compressed} bytes (Opus)
+    Log    📊 Compression: ${overall_ratio:.3f} ratio (${savings_percent:.1f}% savings)
+
+    RETURN    ${total_original}    ${total_compressed}    ${overall_ratio}
+
+
+Verify Conversation Has Chunk Metadata
+    [Documentation]    Verify conversation has correct MongoDB chunk metadata fields
+    [Arguments]    ${conversation}
+
+    # Verify MongoDB chunk fields exist
+    Dictionary Should Contain Key    ${conversation}    audio_chunks_count
+    Dictionary Should Contain Key    ${conversation}    audio_total_duration
+
+    # Verify values are valid
+    Should Be True    ${conversation}[audio_chunks_count] > 0
+    ...    Conversation should have audio_chunks_count > 0
+
+    Should Be True    ${conversation}[audio_total_duration] > 0
+    ...    Conversation should have audio_total_duration > 0
+
+    Log    ✅ Conversation metadata: ${conversation}[audio_chunks_count] chunks, ${conversation}[audio_total_duration]s duration
diff --git a/tests/resources/plugin_keywords.robot b/tests/resources/plugin_keywords.robot
new file mode 100644
index 00000000..4e8d52d0
--- /dev/null
+++ b/tests/resources/plugin_keywords.robot
@@ -0,0 +1,279 @@
+*** Settings ***
+Documentation    Plugin testing resource file
+...
+...              This file contains keywords for plugin testing.
+...              Keywords in this file should handle:
+...              - Mock plugin creation and registration
+...              - Plugin event subscription verification
+...              - Event dispatch testing via API
+...              - Wake word condition testing
+...
+Library          Collections
+Library          OperatingSystem
+Library          Process
+Library          RequestsLibrary
+
+*** Keywords ***
+Create Mock Plugin Config
+    [Documentation]    Create a mock plugin configuration for testing
+    [Arguments]    ${events}    ${condition_type}=always    ${wake_words}=${NONE}
+
+    ${config}=    Create Dictionary
+    ...    enabled=True
+    ...    events=${events}
+
+    ${condition}=    Create Dictionary    type=${condition_type}
+    IF    $wake_words is not None
+        Set To Dictionary    ${condition}    wake_words=${wake_words}
+    END
+    Set To Dictionary    ${config}    condition=${condition}
+
+    RETURN    ${config}
+
+Verify Plugin Config Format
+    [Documentation]    Verify plugin config follows new event-based format
+    [Arguments]    ${config}
+
+    Dictionary Should Contain Key    ${config}    events
+    ...    msg=Plugin config should have 'events' field
+
+    ${events}=    Get From Dictionary    ${config}    events
+    Should Be True    isinstance(${events}, list)
+    ...    msg=Subscriptions should be a list
+
+    ${length}=    Get Length    ${events}
+    Should Be True    ${length} > 0
+    ...    msg=Plugin should subscribe to at least one event
+
+Verify Event Name Format
+    [Documentation]    Verify event name follows hierarchical naming convention
+    [Arguments]    ${event}
+
+    Should Contain    ${event}    .
+    ...    msg=Event name should contain dot separator (e.g., 'transcript.streaming')
+
+    ${parts}=    Split String    ${event}    .
+    ${length}=    Get Length    ${parts}
+    Should Be True    ${length} > 1
+    ...    msg=Event should have domain and type (e.g., 'transcript.streaming')
+
+Verify Event Matches Subscription
+    [Documentation]    Verify an event would match a subscription
+    [Arguments]    ${event}    ${subscription}
+
+    Should Be Equal    ${event}    ${subscription}
+    ...    msg=Event '${event}' should match subscription '${subscription}'
+
+Get Test Plugins Config Path
+    [Documentation]    Get path to test plugins configuration
+    RETURN    ${CURDIR}/../../config/plugins.yml
+
+Verify HA Plugin Uses Events
+    [Documentation]    Verify HomeAssistant plugin config uses event events
+
+    ${plugins_yml}=    Get Test Plugins Config Path
+    ${config_content}=    Get File    ${plugins_yml}
+
+    Should Contain    ${config_content}    events:
+    ...    msg=Plugin config should use 'events' field
+
+    Should Contain    ${config_content}    transcript.streaming
+    ...    msg=HA plugin should subscribe to 'transcript.streaming' event
+
+    Should Not Contain    ${config_content}    access_level:
+    ...    msg=Plugin config should NOT use old 'access_level' field
+
+# Test Plugin Event Database Keywords
+
+Clear Plugin Events
+    [Documentation]    Clear all events from test plugin database via API
+    ${response}=    DELETE On Session    api    /api/test/plugins/events
+    Should Be Equal As Integers    ${response.status_code}    200
+    Log    Cleared ${response.json()}[events_cleared] plugin events
+
+Get Plugin Events By Type
+    [Arguments]    ${event_type}
+    [Documentation]    Query plugin events by event type via API
+    ${response}=    GET On Session    api    /api/test/plugins/events    params=event_type=${event_type}
+    Should Be Equal As Integers    ${response.status_code}    200
+    RETURN    ${response.json()}[events]
+
+Get Plugin Events By User
+    [Arguments]    ${user_id}
+    [Documentation]    Query plugin events by user_id
+    # Note: Not implemented in API yet, keeping for backward compatibility
+    ${response}=    GET On Session    api    /api/test/plugins/events
+    Should Be Equal As Integers    ${response.status_code}    200
+    ${all_events}=    Set Variable    ${response.json()}[events]
+    # Filter by user_id in Robot Framework
+    ${filtered}=    Create List
+    FOR    ${event}    IN    @{all_events}
+        IF    '${event}[user_id]' == '${user_id}'
+            Append To List    ${filtered}    ${event}
+        END
+    END
+    RETURN    ${filtered}
+
+Get All Plugin Events
+    [Documentation]    Get all events from test plugin database via API
+    ${response}=    GET On Session    api    /api/test/plugins/events
+    Should Be Equal As Integers    ${response.status_code}    200
+    RETURN    ${response.json()}[events]
+
+Get Plugin Event Count
+    [Arguments]    ${event_type}=${NONE}
+    [Documentation]    Get count of events via API, optionally filtered by type
+    IF    '${event_type}' != 'None'
+        ${response}=    GET On Session    api    /api/test/plugins/events/count    params=event_type=${event_type}
+    ELSE
+        ${response}=    GET On Session    api    /api/test/plugins/events/count
+    END
+    Should Be Equal As Integers    ${response.status_code}    200
+    RETURN    ${response.json()}[count]
+
+Verify Event Contains Data
+    [Arguments]    ${event}    @{required_fields}
+    [Documentation]    Verify event contains required data fields
+    FOR    ${field}    IN    @{required_fields}
+        Dictionary Should Contain Key    ${event}    ${field}
+        ...    msg=Event should contain field '${field}'
+    END
+
+Wait For Plugin Event
+    [Documentation]    Wait for at least one new plugin event of the specified type
+    ...
+    ...    Polls the database until the event count increases above the baseline.
+    ...    Uses configurable timeout and retry interval for efficient polling.
+    ...
+    ...    Arguments:
+    ...    - event_type: The event type to wait for (e.g., 'transcript.batch')
+    ...    - baseline_count: The event count before the operation started
+    ...    - timeout: Maximum time to wait (default: 30s)
+    ...    - retry_interval: Time between polling attempts (default: 2s)
+    ...    - conversation_id: Optional conversation_id to filter events (default: empty)
+    [Arguments]    ${event_type}    ${baseline_count}    ${timeout}=30s    ${retry_interval}=2s    ${conversation_id}=${EMPTY}
+
+    Wait Until Keyword Succeeds    ${timeout}    ${retry_interval}
+    ...    Plugin Event Count Should Be Greater Than    ${event_type}    ${baseline_count}    ${conversation_id}
+
+    # After successful wait, get the final count
+    ${current_count}=    Get Plugin Event Count    ${event_type}
+    ${new_events}=    Evaluate    ${current_count} - ${baseline_count}
+    RETURN    ${new_events}
+
+Plugin Event Count Should Be Greater Than
+    [Documentation]    Assert that the current event count is greater than baseline
+    ...
+    ...    This keyword is used by Wait For Plugin Event for polling.
+    ...    It will fail (causing a retry) until the condition is met.
+    ...    Optionally filters by conversation_id if provided.
+    [Arguments]    ${event_type}    ${baseline_count}    ${conversation_id}=${EMPTY}
+
+    # Get all events of this type
+    ${events}=    Get Plugin Events By Type    ${event_type}
+
+    # If conversation_id filter specified, filter events
+    IF    '${conversation_id}' != ''
+        ${filtered_events}=    Create List
+        FOR    ${event}    IN    @{events}
+            ${event_data}=    Set Variable    ${event}[data]
+            ${event_conv_id}=    Evaluate    $event_data.get('conversation_id', '')
+            IF    '${event_conv_id}' == '${conversation_id}'
+                Append To List    ${filtered_events}    ${event}
+            END
+        END
+        ${current_count}=    Get Length    ${filtered_events}
+    ELSE
+        ${current_count}=    Get Length    ${events}
+    END
+
+    ${new_events}=    Evaluate    ${current_count} - ${baseline_count}
+
+    # Build error message with conversation_id context if filtering
+    IF    '${conversation_id}' != ''
+        ${error_msg}=    Set Variable    Expected new ${event_type} events for conversation ${conversation_id}, but count is still ${current_count} (baseline: ${baseline_count})
+    ELSE
+        ${error_msg}=    Set Variable    Expected new ${event_type} events, but count is still ${current_count} (baseline: ${baseline_count})
+    END
+
+    Should Be True    ${new_events} > 0    msg=${error_msg}
+
+    RETURN    ${new_events}
+
+Should Contain Event
+    [Documentation]    Verify event type exists, optionally filtered by conversation_id
+    [Arguments]    ${event_type}    ${conversation_id}=${EMPTY}
+
+    ${events}=    Get Plugin Events By Type    ${event_type}
+    Should Not Be Empty    ${events}
+    ...    msg=No events found for event type '${event_type}'
+
+    IF    '${conversation_id}' != ''
+        # Filter events by conversation_id in the data field
+        ${found}=    Set Variable    ${False}
+        FOR    ${event}    IN    @{events}
+            ${event_data}=    Set Variable    ${event}[data]
+            ${event_conv_id}=    Evaluate    $event_data.get('conversation_id', '')
+            IF    '${event_conv_id}' == '${conversation_id}'
+                ${found}=    Set Variable    ${True}
+                BREAK
+            END
+        END
+        Should Be True    ${found}
+        ...    msg=No events found for conversation '${conversation_id}' with event type '${event_type}'
+    END
+
+Verify Event Metadata
+    [Documentation]    Verify specific metadata field value exists in events
+    [Arguments]    ${event_type}    ${metadata_key}    ${expected_value}    ${conversation_id}=${EMPTY}
+
+    ${events}=    Get Plugin Events By Type    ${event_type}
+    Should Not Be Empty    ${events}
+    ...    msg=No events found for event type '${event_type}'
+
+    # Collect conversation IDs for better error messages
+    ${found_conv_ids}=    Create List
+    ${found_metadata_values}=    Create List
+
+    # Find matching event (optionally filtered by conversation_id)
+    ${found}=    Set Variable    ${False}
+    FOR    ${event}    IN    @{events}
+        # Track conversation_id for debugging
+        ${event_data}=    Set Variable    ${event}[data]
+        ${event_conv_id}=    Evaluate    $event_data.get('conversation_id', '')
+        IF    '${event_conv_id}' != ''
+            Append To List    ${found_conv_ids}    ${event_conv_id}
+        END
+
+        # If conversation_id filter specified, check if this is the right conversation
+        ${is_match}=    Set Variable    ${True}
+        IF    '${conversation_id}' != ''
+            IF    '${event_conv_id}' != '${conversation_id}'
+                ${is_match}=    Set Variable    ${False}
+            END
+        END
+
+        # If this is a matching event, check metadata
+        IF    ${is_match}
+            ${event_metadata}=    Set Variable    ${event}[metadata]
+            Dictionary Should Contain Key    ${event_metadata}    ${metadata_key}
+            ...    msg=Event metadata missing key '${metadata_key}'
+            ${actual_value}=    Get From Dictionary    ${event_metadata}    ${metadata_key}
+            Append To List    ${found_metadata_values}    ${actual_value}
+            IF    '${actual_value}' == '${expected_value}'
+                ${found}=    Set Variable    ${True}
+                BREAK
+            END
+        END
+    END
+
+    # Build detailed error message if not found
+    IF    not ${found}
+        ${unique_conv_ids}=    Evaluate    list(set($found_conv_ids))
+        IF    '${conversation_id}' != ''
+            ${error_msg}=    Set Variable    No events found with metadata '${metadata_key}=${expected_value}' for conversation '${conversation_id}'. Found conversation IDs: ${unique_conv_ids}. Found metadata values: ${found_metadata_values}
+        ELSE
+            ${error_msg}=    Set Variable    No events found with metadata '${metadata_key}=${expected_value}' for event type '${event_type}'. Found conversation IDs: ${unique_conv_ids}. Found metadata values: ${found_metadata_values}
+        END
+        Fail    ${error_msg}
+    END
diff --git a/tests/resources/queue_keywords.robot b/tests/resources/queue_keywords.robot
index 3d709661..b6678bf7 100644
--- a/tests/resources/queue_keywords.robot
+++ b/tests/resources/queue_keywords.robot
@@ -23,8 +23,16 @@ Get queue length
 
 Get Job Details
     [Documentation]    Get job details from the queue API by searching the jobs list
+    ...                Returns None if job not found
+    ...                Handles None job_id gracefully with clear error message
     [Arguments]    ${job_id}
 
+    # Check if job_id is None before attempting to query
+    ${is_none}=    Evaluate    $job_id is None or str($job_id) == 'None'
+    IF    ${is_none}
+        Fail    Cannot get job details: job_id is None. This usually means the job was not created successfully (check API response for errors).
+    END
+
     ${response}=    GET On Session    api    /api/queue/jobs
     Should Be Equal As Integers    ${response.status_code}    200
     ${jobs_data}=    Set Variable    ${response.json()}
@@ -38,17 +46,28 @@ Get Job Details
     END
 
     # If we get here, job not found - return None
+    Log    Job with ID '${job_id}' not found in queue    WARN
     RETURN    ${None}
 
 Get Job Status
     [Documentation]    Get just the status of a specific job by ID (lightweight endpoint)
+    ...                Returns None if job not found or if job_id is None
+    ...                Handles None job_id gracefully by returning None
     [Arguments]    ${job_id}
 
+    # Check if job_id is None - return None instead of failing
+    ${is_none}=    Evaluate    $job_id is None or str($job_id) == 'None'
+    IF    ${is_none}
+        Log    Job ID is None, cannot get status    WARN
+        RETURN    ${None}
+    END
+
     # Use the lightweight status endpoint - try to get the response
     ${success}=    Run Keyword And Return Status    GET On Session    api    /api/queue/jobs/${job_id}/status    expected_status=200
 
     IF    not ${success}
         # Job not found
+        Log    Job with ID '${job_id}' not found    DEBUG
         RETURN    ${None}
     END
 
@@ -59,20 +78,29 @@ Get Job Status
 
 Check job status
     [Documentation]    Check the status of a specific job by ID
-    ...                Fails immediately if job is in 'failed' state when expecting 'completed'
+    ...                Fails immediately if job is in 'failed' state when expecting 'finished'
+    ...                Handles None job_id gracefully with clear error message
     [Arguments]    ${job_id}    ${expected_status}
 
+    # Check if job_id is None or 'None' string before attempting to query
+    ${is_none}=    Evaluate    $job_id is None or str($job_id) == 'None'
+    IF    ${is_none}
+        Fail    Cannot check job status: job_id is None. This usually means the job was not created successfully (check API response for errors).
+    END
+
     ${job}=    Get Job status    ${job_id}
 
-    # If job is None (not found), fail explicitly
-    Should Not Be Equal    ${job}[job_id]    ${None}    Job with ID ${job_id} not found in queue
+    # If job is None (not found), fail explicitly with context
+    IF    ${job} == ${None}
+        Fail    Job with ID '${job_id}' not found in queue. The job may have expired, been flushed, or never existed.
+    END
 
     ${actual_status}=    Set Variable    ${job}[status]
     Log    Job ${job_id} status: ${actual_status} (expected: ${expected_status})
 
-    # Fail fast if job is in failed state when we're expecting completed
-    IF    '${actual_status}' == 'failed' and '${expected_status}' == 'completed'
-        ${error_msg}=    Evaluate    $job.get('exc_info') or $job.get('error', 'Unknown error')
+    # Fail fast if job is in failed state when we're expecting finished
+    IF    '${actual_status}' == 'failed' and '${expected_status}' == 'finished'
+        ${error_msg}=    Evaluate    $job.get('error_message') or $job.get('exc_info') or $job.get('error', 'Unknown error')
         Fail    Job ${job_id} failed: ${error_msg}
     END
 
@@ -142,8 +170,15 @@ Find Job For Client
 
 Wait For Job Status
     [Documentation]    Wait for a job to reach a specific status
+    ...                Handles None job_id gracefully with clear error message
     [Arguments]    ${job_id}    ${expected_status}    ${timeout}=60s    ${interval}=5s
 
+    # Check if job_id is None before waiting (fail fast with clear message)
+    ${is_none}=    Evaluate    $job_id is None or str($job_id) == 'None'
+    IF    ${is_none}
+        Fail    Cannot wait for job status: job_id is None. This usually means the job was not created successfully (check API response for errors).
+    END
+
     Wait Until Keyword Succeeds    ${timeout}    ${interval}
     ...    Check job status    ${job_id}    ${expected_status}
 
@@ -249,12 +284,13 @@ Cancel All Running Jobs
     END
 
 Flush In Progress Jobs
-    [Documentation]    Flush only queued and in-progress jobs (preserves completed/failed jobs)
-    ...                Use in test cleanup to reset queue state without losing job history
+    [Documentation]    Flush queued, in-progress, and finished jobs (preserves only failed jobs for debugging)
+    ...                Use in test cleanup to reset queue state between tests
+    ...                Includes finished jobs to prevent test contamination from previous runs
 
     Log To Console    Flushing in-progress and queued jobs...
     TRY
-        ${payload}=    Create Dictionary    confirm=${True}
+        ${payload}=    Create Dictionary    confirm=${True}    include_finished=${True}
         ${response}=    POST On Session    api    /api/queue/flush-all    json=${payload}    expected_status=200
         ${result}=    Set Variable    ${response.json()}
         Log To Console    Successfully flushed ${result}[total_removed] jobs
@@ -309,9 +345,9 @@ Get Most Recent Job
 
 Get Conversation ID From Job Meta
     [Documentation]    Extract conversation_id from job meta, fails if not present
-    [Arguments]    ${job_type}    ${device_name}
+    [Arguments]    ${job_type}    ${client_id}
 
-    ${conv_jobs}=    Get Jobs By Type And Client    ${job_type}    ${device_name}
+    ${conv_jobs}=    Get Jobs By Type And Client    ${job_type}    ${client_id}
     ${conv_job}=    Get Most Recent Job    ${conv_jobs}
     ${conv_meta}=    Set Variable    ${conv_job}[meta]
     ${conversation_id}=    Evaluate    $conv_meta.get('conversation_id', '')
@@ -319,9 +355,24 @@ Get Conversation ID From Job Meta
     RETURN    ${conversation_id}
 
 Job Should Be Complete
-    [Documentation]    Check if job has reached a completed state (completed, finished, or failed)
+    [Documentation]    Check if job has reached a terminal state (finished or failed)
     [Arguments]    ${job_id}
 
     ${job}=    Get Job status    ${job_id}
     ${status}=    Set Variable    ${job}[status]
-    Should Be True    '${status}' in ['completed', 'finished', 'failed']    Job status: ${status}
\ No newline at end of file
+    Should Be True    '${status}' in ['finished', 'failed']    Job status: ${status}
+
+
+Get Job Result
+    [Documentation]    Get the result field of a finished job
+    ...                Useful for checking job output/return values
+    [Arguments]    ${job_id}
+
+    # Get full job details
+    ${response}=    GET On Session    api    /api/queue/jobs/${job_id}
+    ...    expected_status=200
+
+    ${job_data}=    Set Variable    ${response.json()}
+    ${result}=    Set Variable    ${job_data}[result]
+
+    RETURN    ${result}
\ No newline at end of file
diff --git a/tests/resources/redis_keywords.robot b/tests/resources/redis_keywords.robot
new file mode 100644
index 00000000..e6179afd
--- /dev/null
+++ b/tests/resources/redis_keywords.robot
@@ -0,0 +1,156 @@
+*** Settings ***
+Documentation    Redis session management and verification keywords
+...
+...              This file contains keywords for interacting with Redis sessions
+...              and verifying session state during tests.
+...
+...              Keywords in this file handle:
+...              - Reading Redis session data
+...              - Verifying session schema
+...              - Session state checks
+...
+...              Keywords that should NOT be in this file:
+...              - Verification/assertion keywords (belong in tests)
+...              - API session management (belong in session_resources.robot)
+Library          Process
+Library          Collections
+Variables        ../setup/test_env.py
+
+*** Keywords ***
+
+Get Redis Session Data
+    [Documentation]    Get session data from Redis for a given stream/session ID
+    [Arguments]    ${session_id}
+
+    # Use redis-cli to get session hash
+    ${redis_key}=    Set Variable    audio:session:${session_id}
+    ${result}=    Run Process    docker    exec    ${REDIS_CONTAINER}
+    ...    redis-cli    HGETALL    ${redis_key}
+
+    Should Be Equal As Integers    ${result.rc}    0
+    ...    Redis command failed: ${result.stderr}
+
+    # Parse output (HGETALL returns: field1 value1 field2 value2 ...)
+    @{lines}=    Split String    ${result.stdout}    \n
+    &{session_data}=    Create Dictionary
+
+    # Process pairs
+    ${length}=    Get Length    ${lines}
+    FOR    ${i}    IN RANGE    0    ${length}    2
+        ${key}=    Get From List    ${lines}    ${i}
+        ${value_index}=    Evaluate    ${i} + 1
+        IF    ${value_index} < ${length}
+            ${value}=    Get From List    ${lines}    ${value_index}
+            Set To Dictionary    ${session_data}    ${key}=${value}
+        END
+    END
+
+    RETURN    ${session_data}
+
+
+Verify Session Has Field
+    [Documentation]    Verify a Redis session has a specific field
+    [Arguments]    ${session_id}    ${field_name}
+
+    ${session}=    Get Redis Session Data    ${session_id}
+    Dictionary Should Contain Key    ${session}    ${field_name}
+    ...    Session ${session_id} missing field: ${field_name}
+
+
+Get Session Field Value
+    [Documentation]    Get a specific field value from Redis session
+    [Arguments]    ${session_id}    ${field_name}
+
+    ${session}=    Get Redis Session Data    ${session_id}
+    ${value}=    Get From Dictionary    ${session}    ${field_name}
+    RETURN    ${value}
+
+
+Session Field Should Equal
+    [Documentation]    Verify a session field has a specific value
+    [Arguments]    ${session_id}    ${field_name}    ${expected_value}
+
+    ${actual}=    Get Session Field Value    ${session_id}    ${field_name}
+    Should Be Equal    ${actual}    ${expected_value}
+    ...    Session field ${field_name} mismatch: expected ${expected_value}, got ${actual}
+
+
+Redis Command
+    [Documentation]    Execute a generic Redis command and return the result
+    ...                Useful for operations like XLEN, XRANGE, etc.
+    [Arguments]    ${command}    @{args}
+
+    # Execute redis-cli command
+    ${result}=    Run Process    docker    exec    ${REDIS_CONTAINER}
+    ...    redis-cli    ${command}    @{args}
+
+    Should Be Equal As Integers    ${result.rc}    0
+    ...    Redis command failed: ${result.stderr}
+
+    # Return stdout, stripping whitespace
+    ${output}=    Strip String    ${result.stdout}
+
+    # Try to convert to integer if it's a number (for commands like XLEN)
+    ${is_digit}=    Run Keyword And Return Status    Should Match Regexp    ${output}    ^\\d+$
+    ${return_value}=    Run Keyword If    ${is_digit}
+    ...    Convert To Integer    ${output}
+    ...    ELSE    Set Variable    ${output}
+
+    RETURN    ${return_value}
+
+
+Get Backend Logs
+    [Documentation]    Get backend container logs for debugging
+    [Arguments]    ${since}=5m
+
+    ${result}=    Run Process    docker    compose    logs    --since    ${since}    chronicle-backend
+    ...    shell=True    stderr=STDOUT
+
+    RETURN    ${result.stdout}
+
+Verify Redis Key Exists
+    [Documentation]    Verify that a Redis key exists
+    [Arguments]    ${redis_key}
+
+    # Use Redis EXISTS command (returns 1 if key exists, 0 otherwise)
+    ${exists}=    Redis Command    EXISTS    ${redis_key}
+
+    Should Be Equal As Integers    ${exists}    1
+    ...    Redis key does not exist: ${redis_key}
+
+    Log    ✅ Redis key exists: ${redis_key}
+
+Verify Conversation Current Key
+    [Documentation]    Verify conversation:current:{session_id} key exists and has correct value
+    ...                Uses pattern matching to handle counter suffixes (-2, -3, etc)
+    [Arguments]    ${session_id}    ${expected_conversation_id}=${None}
+
+    # Use KEYS pattern to find matching key (handles counter suffixes like -2, -3)
+    ${pattern}=    Set Variable    conversation:current:${session_id}*
+    ${result}=    Run Process    docker    exec    ${REDIS_CONTAINER}
+    ...    redis-cli    KEYS    ${pattern}
+    Should Be Equal As Integers    ${result.rc}    0
+
+    # Get matching keys
+    @{keys}=    Split String    ${result.stdout}    \n
+    ${keys_list}=    Evaluate    [k for k in ${keys} if k.strip()]
+    ${num_keys}=    Get Length    ${keys_list}
+
+    Should Be True    ${num_keys} > 0
+    ...    Redis key not found for pattern: ${pattern}
+
+    # Get the first matching key
+    ${redis_key}=    Get From List    ${keys_list}    0
+    Log    Found Redis key: ${redis_key}
+
+    # Get the conversation_id value
+    ${conversation_id}=    Redis Command    GET    ${redis_key}
+
+    # Optionally verify it matches expected value
+    IF    '${expected_conversation_id}' != '${None}'
+        Should Be Equal As Strings    ${conversation_id}    ${expected_conversation_id}
+        ...    Redis key value mismatch: expected ${expected_conversation_id}, got ${conversation_id}
+    END
+
+    Log    ✅ ${redis_key} = ${conversation_id}
+    RETURN    ${conversation_id}
diff --git a/tests/resources/session_keywords.robot b/tests/resources/session_keywords.robot
index b7c21619..bb4d4444 100644
--- a/tests/resources/session_keywords.robot
+++ b/tests/resources/session_keywords.robot
@@ -42,6 +42,18 @@ Get Anonymous Session
 
     Create Session    ${session_name}    ${base_url}    verify=True
 
+Get Admin API Session
+    [Documentation]    Get an authenticated admin API session (creates if doesn't exist)
+    [Arguments]    ${session_name}=api
+
+    # Try to use existing session, create if needed
+    ${session_exists}=    Run Keyword And Return Status    GET On Session    ${session_name}    /health    expected_status=any
+    IF    not ${session_exists}
+        Create API Session    ${session_name}
+    END
+
+    RETURN    ${session_name}
+
 
 # Core Authentication
 Get Authentication Token
diff --git a/tests/resources/system_keywords.robot b/tests/resources/system_keywords.robot
index 0bdfcb56..c084b2d0 100644
--- a/tests/resources/system_keywords.robot
+++ b/tests/resources/system_keywords.robot
@@ -1,19 +1,22 @@
 *** Settings ***
-Documentation    Health check and service readiness verification keywords
+Documentation    Health check, service readiness, and mock service management keywords
 ...
-...              This file contains keywords for checking service health and readiness.
-...              Keywords in this file handle API endpoint health checks and service status verification.
+...              This file contains keywords for checking service health and managing mock services.
+...              Keywords in this file handle API endpoint health checks, service status verification,
+...              and starting/stopping mock services for testing.
 ...
 ...              Keywords in this file handle:
 ...              - Health endpoint checks
 ...              - Readiness endpoint checks
 ...              - Service availability verification
+...              - Mock service lifecycle management
 ...
 ...              Keywords that should NOT be in this file:
 ...              - Docker service management (belong in setup_env_keywords.robot)
 ...              - Data management (belong in test_manager_keywords.robot)
 ...              - User/session management (belong in respective resource files)
 Library          RequestsLibrary
+Library          Process
 Variables        ../setup/test_env.py
 
 
@@ -36,3 +39,65 @@ Health Check
     ${response}=    GET    ${base_url}/health    expected_status=200    timeout=2
     Should Be Equal As Integers    ${response.status_code}    200
     RETURN    ${True}
+
+
+Start Mock Transcription Server
+    [Documentation]    Start the mock WebSocket transcription server on port 9999
+    ...                Used for testing transcription workflows without external API dependencies.
+
+    # Start mock server as background process
+    ${handle}=    Start Process
+    ...    python3    ${CURDIR}/../scripts/mock_transcription_server.py    --host    0.0.0.0    --port    9999
+    ...    alias=mock_transcription_server
+    ...    stdout=${OUTPUTDIR}/mock_transcription_server.log
+    ...    stderr=STDOUT
+
+    # Store process handle for cleanup
+    Set Suite Variable    ${MOCK_TRANSCRIPTION_HANDLE}    ${handle}
+
+    # Wait for server to start
+    Sleep    2s
+
+    Log    ✅ Started Mock Transcription Server on ws://localhost:9999
+
+
+Stop Mock Transcription Server
+    [Documentation]    Stop the mock WebSocket transcription server
+
+    # Check if handle exists
+    ${handle_exists}=    Run Keyword And Return Status    Variable Should Exist    ${MOCK_TRANSCRIPTION_HANDLE}
+
+    IF    ${handle_exists}
+        # Terminate the process gracefully
+        Terminate Process    ${MOCK_TRANSCRIPTION_HANDLE}
+
+        # Wait for process to exit
+        ${result}=    Wait For Process    ${MOCK_TRANSCRIPTION_HANDLE}    timeout=5s    on_timeout=kill
+
+        Log    ✅ Stopped Mock Transcription Server (exit code: ${result.rc})
+    ELSE
+        Log    ⚠️ Mock Transcription Server handle not found (may not have been started)
+    END
+
+
+Set Always Persist Enabled
+    [Documentation]    Set the always_persist_enabled setting via API.
+    ...                Requires admin session.
+    [Arguments]    ${session}    ${enabled}=${True}
+
+    ${settings}=    Create Dictionary    always_persist_enabled=${enabled}
+    ${response}=    POST On Session    ${session}    /api/misc-settings    json=${settings}
+    Should Be Equal As Integers    ${response.status_code}    200
+    Log    ✅ Set always_persist_enabled=${enabled}
+
+
+Get Always Persist Enabled
+    [Documentation]    Get the current always_persist_enabled setting via API.
+    ...                Requires admin session.
+    [Arguments]    ${session}
+
+    ${response}=    GET On Session    ${session}    /api/misc-settings
+    Should Be Equal As Integers    ${response.status_code}    200
+    ${settings}=    Set Variable    ${response.json()}
+    ${enabled}=    Set Variable    ${settings}[always_persist_enabled]
+    RETURN    ${enabled}
diff --git a/tests/resources/websocket_keywords.robot b/tests/resources/websocket_keywords.robot
index f1ee54b4..57b9f3ee 100644
--- a/tests/resources/websocket_keywords.robot
+++ b/tests/resources/websocket_keywords.robot
@@ -2,12 +2,37 @@
 Documentation    WebSocket audio streaming keywords using the shared AudioStreamClient
 Library          Collections
 Library          OperatingSystem
+Library          String
 Library          ../libs/audio_stream_library.py
+Library          ../libs/auth_helpers.py
 Variables        ../setup/test_env.py
 Resource         session_keywords.robot
 Resource         queue_keywords.robot
 
 *** Keywords ***
+Get Client ID From Device Name
+    [Documentation]    Construct client_id from device_name for test admin user
+    ...                Format: {last_6_chars_of_user_id}-{first_10_chars_of_device_name}
+    ...                Matches backend logic in client_manager.py:generate_client_id()
+    [Arguments]    ${device_name}
+
+    # Get admin user ID dynamically from JWT token (changes on each database reset)
+    ${admin_session}=    Get Admin API Session
+    ${token}=    Get Authentication Token    ${admin_session}    ${ADMIN_EMAIL}    ${ADMIN_PASSWORD}
+    ${user_id}=    Get User ID From Token    ${token}
+
+    # Extract last 6 characters of user ID (matches backend logic)
+    ${user_suffix}=    Get Substring    ${user_id}    -6
+
+    # Sanitize and truncate device name to 10 chars (matches backend: [:10])
+    # Backend sanitizes: lowercase, alphanumeric + hyphens only
+    ${device_lower}=    Convert To Lower Case    ${device_name}
+    ${device_truncated}=    Get Substring    ${device_lower}    0    10
+
+    ${client_id}=    Set Variable    ${user_suffix}-${device_truncated}
+    RETURN    ${client_id}
+
+
 Stream Audio File Via WebSocket
     [Documentation]    Stream a WAV file via WebSocket using Wyoming protocol
     ...                Uses the shared AudioStreamClient from advanced_omi_backend.clients
@@ -73,6 +98,45 @@ Open Audio Stream
     Log    Started audio stream ${stream_id} for device ${device_name}
     RETURN    ${stream_id}
 
+Open Audio Stream With Always Persist
+    [Documentation]    Start a WebSocket audio stream with always_persist=True
+    ...                This ensures audio is saved to MongoDB even if transcription fails.
+    ...                Returns stream_id for sending chunks.
+    [Arguments]    ${device_name}=robot-test    ${recording_mode}=streaming
+
+    ${token}=    Get Authentication Token    api    ${ADMIN_EMAIL}    ${ADMIN_PASSWORD}
+
+    ${stream_id}=    Start Audio Stream
+    ...    base_url=${API_URL}
+    ...    token=${token}
+    ...    device_name=${device_name}
+    ...    recording_mode=${recording_mode}
+    ...    always_persist=${True}
+
+    Log    Started audio stream ${stream_id} with always_persist=True
+    RETURN    ${stream_id}
+
+Stream Audio File With Always Persist
+    [Documentation]    Stream a WAV file via WebSocket with always_persist=True
+    ...                This ensures audio is saved to MongoDB even if transcription fails.
+    [Arguments]    ${audio_file_path}    ${device_name}=robot-test    ${recording_mode}=streaming
+
+    File Should Exist    ${audio_file_path}
+
+    ${token}=    Get Authentication Token    api    ${ADMIN_EMAIL}    ${ADMIN_PASSWORD}
+
+    ${chunks_sent}=    Stream Audio File
+    ...    base_url=${API_URL}
+    ...    token=${token}
+    ...    wav_path=${audio_file_path}
+    ...    device_name=${device_name}
+    ...    recording_mode=${recording_mode}
+    ...    always_persist=${True}
+
+    Log    Streamed ${chunks_sent} chunks with always_persist=True
+    Should Be True    ${chunks_sent} > 0
+    RETURN    ${chunks_sent}
+
 Send Audio Chunks To Stream
     [Documentation]    Send audio chunks from a file to an open stream
     [Arguments]    ${stream_id}    ${audio_file_path}    ${num_chunks}=${None}    ${realtime_pacing}=False
@@ -107,18 +171,32 @@ Close Audio Stream
     Log    Stopped stream ${stream_id}, total chunks: ${total_chunks}
     RETURN    ${total_chunks}
 
+Close Audio Stream Without Stop Event
+    [Documentation]    Close WebSocket connection without sending audio-stop event.
+    ...                This simulates abrupt disconnection (network failure, client crash)
+    ...                and should trigger websocket_disconnect end_reason.
+    [Arguments]    ${stream_id}
+
+    # Call the Python library method directly
+    ${total_chunks}=    Close Audio Stream Without Stop    ${stream_id}
+    Log    Closed stream ${stream_id} abruptly (no audio-stop), total chunks: ${total_chunks}
+    RETURN    ${total_chunks}
+
 Cleanup All Audio Streams
     [Documentation]    Stop all active streams (use in teardown)
     Cleanup All Streams
 
 Stream And Wait For Conversation
     [Documentation]    Send audio chunks to stream, wait for conversation to be created and closed.
-    ...                Returns the conversation_id of the completed conversation.
+    ...                Returns the conversation_id of the finished conversation.
     ...                Works correctly even with existing conversations by tracking new conversation creation.
     [Arguments]    ${stream_id}    ${audio_file_path}    ${device_name}    ${num_chunks}=100
 
+    # Construct client_id from device_name for job lookups
+    ${client_id}=    Get Client ID From Device Name    ${device_name}
+
     # Get baseline conversation IDs before streaming to detect new conversation
-    ${baseline_jobs}=    Get Jobs By Type And Client    open_conversation    ${device_name}
+    ${baseline_jobs}=    Get Jobs By Type And Client    open_conversation    ${client_id}
     ${existing_conv_ids}=    Create List
     FOR    ${job}    IN    @{baseline_jobs}
         ${meta}=    Set Variable    ${job}[meta]
@@ -134,14 +212,14 @@ Stream And Wait For Conversation
 
     # Wait for NEW conversation job to be created (not in baseline)
     ${new_job}=    Wait Until Keyword Succeeds    60s    3s
-    ...    Wait For New Conversation Job    open_conversation    ${device_name}    ${existing_conv_ids}
+    ...    Wait For New Conversation Job    open_conversation    ${client_id}    ${existing_conv_ids}
 
     ${conv_meta}=    Set Variable    ${new_job}[meta]
     ${conversation_id}=    Evaluate    $conv_meta.get('conversation_id', '')
     Log    New conversation created: ${conversation_id}
 
     # Wait for conversation to close via inactivity timeout (with queue drain, can take 45+ seconds)
-    Wait For Job Status    ${new_job}[job_id]    completed    timeout=60s    interval=2s
+    Wait For Job Status    ${new_job}[job_id]    finished    timeout=60s    interval=2s
     Log    Conversation closed: ${conversation_id}
 
     RETURN    ${conversation_id}
diff --git a/tests/run-custom.sh b/tests/run-custom.sh
index c1ce1317..8ed2f4cb 100755
--- a/tests/run-custom.sh
+++ b/tests/run-custom.sh
@@ -1,20 +1,144 @@
 #!/bin/bash
-# Quick wrapper for running Robot tests with custom configs
-# Usage: ./run-custom.sh <config-name> [parakeet-url]
-#
-# Examples:
-#   ./run-custom.sh parakeet-openai http://host.docker.internal:8767
-#   ./run-custom.sh deepgram-openai
-#   ./run-custom.sh parakeet-ollama http://host.docker.internal:8767
+# Custom test runner for debugging - runs specific test files or tags
+# Usage:
+#   ./run-custom.sh integration/phase1_phase2_tests.robot  # Run specific file
+#   ./run-custom.sh --tag audio-streaming                  # Run by tag
+#   ./run-custom.sh --test "Generic Transcription Provider Works"  # Run specific test
 
 set -e
 
-CONFIG_NAME="${1:-parakeet-openai}"
-PARAKEET_URL="${2:-http://host.docker.internal:8767}"
+# Colors for output
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+BLUE='\033[0;34m'
+NC='\033[0m' # No Color
 
-echo "Running Robot tests with config: ${CONFIG_NAME}"
-echo "Parakeet ASR URL: ${PARAKEET_URL}"
+print_info() {
+    echo -e "${BLUE}[INFO]${NC} $1"
+}
 
-CONFIG_FILE="../tests/configs/${CONFIG_NAME}.yml" \
-  PARAKEET_ASR_URL="${PARAKEET_URL}" \
-  ./run-robot-tests.sh
+print_success() {
+    echo -e "${GREEN}[SUCCESS]${NC} $1"
+}
+
+print_error() {
+    echo -e "${RED}[ERROR]${NC} $1"
+}
+
+# Check if we're in the right directory
+if [ ! -f "Makefile" ] || [ ! -d "endpoints" ]; then
+    print_error "Please run this script from the tests/ directory"
+    exit 1
+fi
+
+# Parse arguments
+TEST_FILE=""
+TAG=""
+TEST_NAME=""
+OUTPUTDIR="${OUTPUTDIR:-results-custom}"
+
+while [[ $# -gt 0 ]]; do
+    case $1 in
+        --tag)
+            TAG="$2"
+            shift 2
+            ;;
+        --test)
+            TEST_NAME="$2"
+            shift 2
+            ;;
+        --output)
+            OUTPUTDIR="$2"
+            shift 2
+            ;;
+        -h|--help)
+            echo "Usage: $0 [OPTIONS] [TEST_FILE]"
+            echo ""
+            echo "Options:"
+            echo "  --tag TAG          Run tests with specific tag"
+            echo "  --test NAME        Run specific test by name"
+            echo "  --output DIR       Output directory (default: results-custom)"
+            echo "  -h, --help         Show this help message"
+            echo ""
+            echo "Examples:"
+            echo "  $0 integration/phase1_phase2_tests.robot"
+            echo "  $0 --tag audio-streaming"
+            echo "  $0 --test \"Generic Transcription Provider Works\""
+            exit 0
+            ;;
+        *)
+            TEST_FILE="$1"
+            shift
+            ;;
+    esac
+done
+
+# Load environment variables
+if [ -f "setup/.env.test" ]; then
+    print_info "Loading environment from setup/.env.test..."
+    set -a
+    source setup/.env.test
+    set +a
+else
+    print_error "setup/.env.test not found. Run ./run-robot-tests.sh first to create test environment."
+    exit 1
+fi
+
+# Verify services are running
+print_info "Checking if test services are running..."
+if ! curl -s http://localhost:8001/health > /dev/null 2>&1; then
+    print_error "Backend test service is not running on port 8001"
+    print_info "Start services with: ./setup-test-containers.sh"
+    print_info "Or let this script start them (will take time)..."
+    read -p "Start test services now? [y/N] " -n 1 -r
+    echo
+    if [[ $REPLY =~ ^[Yy]$ ]]; then
+        print_info "Starting test infrastructure..."
+        # Get the script directory to find setup script
+        SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+        "$SCRIPT_DIR/setup-test-containers.sh"
+    else
+        exit 1
+    fi
+fi
+
+print_success "Backend is ready"
+
+# Build robot command
+ROBOT_CMD="uv run --with-requirements test-requirements.txt robot"
+ROBOT_CMD="$ROBOT_CMD --outputdir $OUTPUTDIR"
+ROBOT_CMD="$ROBOT_CMD --loglevel DEBUG"  # Enable debug logging
+
+if [ -n "$TAG" ]; then
+    print_info "Running tests with tag: $TAG"
+    ROBOT_CMD="$ROBOT_CMD --include $TAG"
+fi
+
+if [ -n "$TEST_NAME" ]; then
+    print_info "Running test: $TEST_NAME"
+    ROBOT_CMD="$ROBOT_CMD --test \"$TEST_NAME\""
+fi
+
+if [ -n "$TEST_FILE" ]; then
+    print_info "Running test file: $TEST_FILE"
+    ROBOT_CMD="$ROBOT_CMD $TEST_FILE"
+else
+    # Run all tests if no specific file/tag/test specified
+    print_info "Running all tests (no filter specified)"
+    ROBOT_CMD="$ROBOT_CMD endpoints integration infrastructure"
+fi
+
+print_info "Command: $ROBOT_CMD"
+print_info "Output directory: $OUTPUTDIR"
+echo ""
+
+# Run the tests
+if eval $ROBOT_CMD; then
+    print_success "Tests completed successfully!"
+    exit 0
+else
+    print_error "Tests failed!"
+    print_info "View results: $OUTPUTDIR/log.html"
+    exit 1
+fi
diff --git a/tests/run-no-api-tests.sh b/tests/run-no-api-tests.sh
new file mode 100755
index 00000000..b5c5a505
--- /dev/null
+++ b/tests/run-no-api-tests.sh
@@ -0,0 +1,176 @@
+#!/bin/bash
+
+# Robot Framework Test Runner (No API Keys Required)
+# Runs tests that don't require external API services (Deepgram, OpenAI)
+# Excludes tests tagged with 'requires-api-keys'
+
+set -e
+
+# Colors for output
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+YELLOW='\033[1;33m'
+BLUE='\033[0;34m'
+NC='\033[0m' # No Color
+
+print_info() {
+    echo -e "${BLUE}[INFO]${NC} $1"
+}
+
+print_success() {
+    echo -e "${GREEN}[SUCCESS]${NC} $1"
+}
+
+print_warning() {
+    echo -e "${YELLOW}[WARNING]${NC} $1"
+}
+
+print_error() {
+    echo -e "${RED}[ERROR]${NC} $1"
+}
+
+# Check if we're in the right directory
+if [ ! -f "Makefile" ] || [ ! -d "endpoints" ]; then
+    print_error "Please run this script from the tests/ directory"
+    exit 1
+fi
+
+# Set absolute paths for consistent directory references
+TESTS_DIR="$(pwd)"
+BACKEND_DIR="$(cd ../backends/advanced && pwd)"
+
+print_info "Robot Framework Test Runner (No API Keys)"
+print_info "=========================================="
+print_info "This runner executes tests that don't require external API services"
+print_info "Tests tagged with 'requires-api-keys', 'slow', and 'sdk' are excluded"
+
+# Configuration
+CLEANUP_CONTAINERS="${CLEANUP_CONTAINERS:-false}"
+OUTPUTDIR="${OUTPUTDIR:-results-no-api}"
+
+# Use mock services config (no API keys needed)
+# Set TEST_CONFIG_FILE to point to mock-services.yml inside the container
+export TEST_CONFIG_FILE="/app/test-configs/mock-services.yml"
+
+print_info "Using config file: ${TEST_CONFIG_FILE}"
+print_warning "Memory extraction and transcription are disabled in this mode"
+
+# Load environment variables if available (but don't require them)
+if [ -f "setup/.env.test" ]; then
+    print_info "Loading environment variables from setup/.env.test..."
+    set -a
+    source setup/.env.test
+    set +a
+fi
+
+# Create test environment file if it doesn't exist (without API keys)
+if [ ! -f "setup/.env.test" ]; then
+    print_info "Creating test environment file..."
+    mkdir -p setup
+
+    cat > setup/.env.test << EOF
+# API URLs
+API_URL=http://localhost:8001
+BACKEND_URL=http://localhost:8001
+FRONTEND_URL=http://localhost:3001
+
+# Test Admin Credentials
+ADMIN_EMAIL=test-admin@example.com
+ADMIN_PASSWORD=test-admin-password-123
+
+# Test Configuration
+TEST_TIMEOUT=120
+TEST_DEVICE_NAME=robot-test
+
+# Note: No API keys required for this test mode
+# OPENAI_API_KEY and DEEPGRAM_API_KEY are not needed
+EOF
+    print_success "Created setup/.env.test"
+fi
+
+# Start test containers using dedicated startup script
+FRESH_BUILD=true "$TESTS_DIR/setup-test-containers.sh"
+
+# Run Robot Framework tests via Makefile with tag exclusion
+# Exclude tests that require API keys, slow tests, and SDK tests
+print_info "Running Robot Framework tests (excluding requires-api-keys, slow, sdk tags)..."
+print_info "Output directory: $OUTPUTDIR"
+
+# Run tests with tag exclusion
+if timeout 30m uv run --with-requirements test-requirements.txt \
+    robot --exclude requires-api-keys \
+    --exclude slow \
+    --exclude sdk \
+    --outputdir "$OUTPUTDIR" \
+    --loglevel INFO \
+    --consolecolors on \
+    --consolemarkers on \
+    .; then
+    TEST_EXIT_CODE=0
+else
+    TEST_EXIT_CODE=$?
+fi
+
+# Show service logs if tests failed
+if [ $TEST_EXIT_CODE -ne 0 ]; then
+    print_info "Showing service logs..."
+    cd "$BACKEND_DIR"
+    echo "=== Backend Logs (last 50 lines) ==="
+    docker compose -f docker-compose-test.yml logs --tail=50 chronicle-backend-test
+    echo ""
+    echo "=== Worker Logs (last 50 lines) ==="
+    docker compose -f docker-compose-test.yml logs --tail=50 workers-test
+    cd "$TESTS_DIR"
+fi
+
+# Display test results summary
+if [ -f "$OUTPUTDIR/output.xml" ]; then
+    print_info "Test Results Summary:"
+    uv run python3 << 'PYTHON_SCRIPT'
+import xml.etree.ElementTree as ET
+import os
+
+output_file = os.getenv('OUTPUTDIR', 'results-no-api') + '/output.xml'
+try:
+    tree = ET.parse(output_file)
+    root = tree.getroot()
+
+    # Get overall stats
+    stats = root.find('.//total/stat')
+    if stats is not None:
+        passed = stats.get("pass", "0")
+        failed = stats.get("fail", "0")
+        print(f'✅ Passed: {passed}')
+        print(f'❌ Failed: {failed}')
+        print(f'📊 Total: {int(passed) + int(failed)}')
+
+        # Show failed tests if any
+        if int(failed) > 0:
+            print('\n❌ Failed Tests:')
+            failed_tests = root.findall('.//test')
+            for test in failed_tests:
+                status = test.find('status')
+                if status is not None and status.get('status') == 'FAIL':
+                    test_name = test.get('name', 'Unknown')
+                    print(f'  - {test_name}')
+except Exception as e:
+    print(f'Error parsing results: {e}')
+PYTHON_SCRIPT
+fi
+
+# Cleanup containers if requested
+if [ "$CLEANUP_CONTAINERS" = "true" ]; then
+    print_info "Cleaning up test containers..."
+    cd "$BACKEND_DIR"
+    docker compose -f docker-compose-test.yml down -v --remove-orphans
+    cd "$TESTS_DIR"
+    print_success "Cleanup completed"
+fi
+
+# Final status
+if [ $TEST_EXIT_CODE -eq 0 ]; then
+    print_success "All tests passed! ✅"
+else
+    print_error "Some tests failed ❌"
+    exit $TEST_EXIT_CODE
+fi
diff --git a/tests/run-robot-tests.sh b/tests/run-robot-tests.sh
index b5af8682..7f7cd5c4 100755
--- a/tests/run-robot-tests.sh
+++ b/tests/run-robot-tests.sh
@@ -43,18 +43,12 @@ print_info "Robot Framework Test Runner"
 print_info "============================"
 
 # Configuration
-CLEANUP_CONTAINERS="${CLEANUP_CONTAINERS:-true}"
+CLEANUP_CONTAINERS="${CLEANUP_CONTAINERS:-false}"  # Changed default: keep containers running for faster re-runs
 OUTPUTDIR="${OUTPUTDIR:-results}"
 
-# Set default CONFIG_FILE if not provided
-# Use test config by default (disables speaker recognition for CI performance)
-# Override: CONFIG_FILE=../config/config.yml ./run-robot-tests.sh
-export CONFIG_FILE="${CONFIG_FILE:-configs/deepgram-openai.yml}"
-
-# Convert CONFIG_FILE to absolute path (Docker Compose resolves relative paths from compose file location)
-if [[ ! "$CONFIG_FILE" = /* ]]; then
-    CONFIG_FILE="$(cd "$(dirname "$CONFIG_FILE")" && pwd)/$(basename "$CONFIG_FILE")"
-fi
+# Use Deepgram + OpenAI config for full API tests
+# Set TEST_CONFIG_FILE to point to deepgram-openai.yml inside the container
+export TEST_CONFIG_FILE="/app/test-configs/deepgram-openai.yml"
 
 # Load environment variables (CI or local)
 if [ -f "setup/.env.test" ] && [ -z "$DEEPGRAM_API_KEY" ]; then
@@ -85,10 +79,43 @@ print_info "DEEPGRAM_API_KEY length: ${#DEEPGRAM_API_KEY}"
 print_info "OPENAI_API_KEY length: ${#OPENAI_API_KEY}"
 print_info "Using config file: $CONFIG_FILE"
 
+# Load HF_TOKEN from speaker-recognition/.env for test environment
+SPEAKER_ENV="../extras/speaker-recognition/.env"
+if [ -f "$SPEAKER_ENV" ] && [ -z "$HF_TOKEN" ]; then
+    print_info "Loading HF_TOKEN from speaker-recognition service..."
+    set -a
+    source "$SPEAKER_ENV"
+    set +a
+
+    if [ -n "$HF_TOKEN" ]; then
+        # Mask token for display
+        if [ ${#HF_TOKEN} -gt 15 ]; then
+            MASKED_TOKEN="${HF_TOKEN:0:5}***************${HF_TOKEN: -5}"
+        else
+            MASKED_TOKEN="***************"
+        fi
+        print_info "HF_TOKEN configured: $MASKED_TOKEN"
+    fi
+elif [ -n "$HF_TOKEN" ]; then
+    # Already set (e.g., from CI)
+    if [ ${#HF_TOKEN} -gt 15 ]; then
+        MASKED_TOKEN="${HF_TOKEN:0:5}***************${HF_TOKEN: -5}"
+    else
+        MASKED_TOKEN="***************"
+    fi
+    print_info "HF_TOKEN configured: $MASKED_TOKEN"
+else
+    print_warning "HF_TOKEN not found - speaker recognition tests may fail"
+    print_info "Configure via wizard: uv run --with-requirements ../setup-requirements.txt python ../wizard.py"
+fi
+
+export HF_TOKEN
+
 # Create test environment file if it doesn't exist
 if [ ! -f "setup/.env.test" ]; then
     print_info "Creating test environment file..."
     mkdir -p setup
+
     cat > setup/.env.test << EOF
 # API URLs
 API_URL=http://localhost:8001
@@ -110,127 +137,8 @@ EOF
     print_success "Created setup/.env.test"
 fi
 
-# Navigate to backend directory for docker compose
-cd "$BACKEND_DIR"
-
-print_info "Starting test infrastructure..."
-
-# Use unique project name to avoid conflicts with development environment
-export COMPOSE_PROJECT_NAME="advanced-backend-test"
-
-# Ensure required config files exist
-# memory_config.yaml no longer used; memory settings live in config.yml
-
-# Clean up any existing test containers and volumes for fresh start
-print_info "Cleaning up any existing test environment..."
-docker compose -f docker-compose-test.yml down -v 2>/dev/null || true
-
-# Force remove any stuck containers with test names (uses COMPOSE_PROJECT_NAME)
-print_info "Removing any stuck test containers..."
-# Dynamically construct container names from docker-compose services
-TEST_SERVICES=(mongo-test redis-test qdrant-test chronicle-backend-test workers-test webui-test speaker-service-test)
-for service in "${TEST_SERVICES[@]}"; do
-    docker rm -f "${COMPOSE_PROJECT_NAME}-${service}-1" 2>/dev/null || true
-done
-
-# Start infrastructure services (MongoDB, Redis, Qdrant)
-print_info "Starting MongoDB, Redis, and Qdrant (fresh containers)..."
-docker compose -f docker-compose-test.yml up -d --quiet-pull mongo-test redis-test qdrant-test
-
-# Wait for MongoDB
-print_info "Waiting for MongoDB (up to 60s)..."
-for i in {1..30}; do
-    if docker compose -f docker-compose-test.yml exec -T mongo-test mongosh --eval "db.adminCommand({ping: 1})" > /dev/null 2>&1; then
-        print_success "MongoDB is ready"
-        break
-    fi
-    if [ $i -eq 30 ]; then
-        print_error "MongoDB failed to start"
-        docker compose -f docker-compose-test.yml logs mongo-test
-        exit 1
-    fi
-    sleep 2
-done
-
-# Wait for Qdrant
-print_info "Waiting for Qdrant (up to 60s)..."
-for i in {1..30}; do
-    if curl -s http://localhost:6337/healthz > /dev/null 2>&1; then
-        print_success "Qdrant is ready"
-        break
-    fi
-    if [ $i -eq 30 ]; then
-        print_error "Qdrant failed to start"
-        docker compose -f docker-compose-test.yml logs qdrant-test
-        exit 1
-    fi
-    sleep 2
-done
-
-# Build and start backend
-print_info "Building backend..."
-docker compose -f docker-compose-test.yml build chronicle-backend-test
-
-print_info "Starting backend..."
-docker compose -f docker-compose-test.yml up -d chronicle-backend-test
-
-# Wait for backend
-print_info "Waiting for backend (up to 120s)..."
-for i in {1..40}; do
-    if curl -s http://localhost:8001/health > /dev/null 2>&1; then
-        print_success "Backend is ready"
-        break
-    fi
-    if [ $i -eq 40 ]; then
-        print_error "Backend failed to start"
-        docker compose -f docker-compose-test.yml logs chronicle-backend-test
-        exit 1
-    fi
-    sleep 3
-done
-
-# Start workers
-print_info "Starting RQ workers..."
-docker compose -f docker-compose-test.yml up -d workers-test
-
-# Wait for workers container
-print_info "Waiting for workers container (up to 30s)..."
-for i in {1..15}; do
-    if docker compose -f docker-compose-test.yml ps workers-test | grep -q "Up"; then
-        print_success "Workers container is running"
-        break
-    fi
-    if [ $i -eq 15 ]; then
-        print_error "Workers container failed to start"
-        docker compose -f docker-compose-test.yml logs workers-test
-        exit 1
-    fi
-    sleep 2
-done
-
-# Verify workers are registered
-print_info "Waiting for workers to register with Redis (up to 60s)..."
-for i in {1..30}; do
-    WORKER_COUNT=$(docker compose -f docker-compose-test.yml exec -T workers-test uv run python -c 'from rq import Worker; from redis import Redis; import os; r = Redis.from_url(os.getenv("REDIS_URL", "redis://redis-test:6379/0")); print(len(Worker.all(connection=r)))' 2>/dev/null || echo "0")
-
-    if [ "$WORKER_COUNT" -ge 6 ]; then
-        print_success "Found $WORKER_COUNT workers registered"
-        break
-    fi
-
-    if [ $i -eq 30 ]; then
-        print_error "Workers failed to register after 60s"
-        docker compose -f docker-compose-test.yml logs --tail=50 workers-test
-        exit 1
-    fi
-
-    sleep 2
-done
-
-print_success "All services ready!"
-
-# Return to tests directory
-cd "$TESTS_DIR"
+# Start test containers using dedicated startup script
+FRESH_BUILD=true "$TESTS_DIR/setup-test-containers.sh"
 
 # Run Robot Framework tests via Makefile
 # Dependencies are handled automatically by 'uv run' in Makefile
@@ -344,14 +252,10 @@ cd "$TESTS_DIR"
 
 # Cleanup test containers
 if [ "$CLEANUP_CONTAINERS" = "true" ]; then
-    print_info "Cleaning up test containers..."
-    cd "$BACKEND_DIR"
-    docker compose -f docker-compose-test.yml down -v
-    cd "$TESTS_DIR"
-    print_success "Cleanup complete"
+    REMOVE_VOLUMES=true "$TESTS_DIR/teardown-test-containers.sh"
 else
-    print_warning "Skipping container cleanup (CLEANUP_CONTAINERS=false)"
-    print_info "To cleanup manually: cd $BACKEND_DIR && docker compose -f docker-compose-test.yml down -v"
+    print_warning "Keeping containers running for next test (CLEANUP_CONTAINERS=false)"
+    print_info "To cleanup manually: REMOVE_VOLUMES=true ./teardown-test-containers.sh"
 fi
 
 if [ $TEST_EXIT_CODE -eq 0 ]; then
diff --git a/tests/scripts/sdk_test_auth.py b/tests/scripts/sdk_test_auth.py
new file mode 100644
index 00000000..c453998b
--- /dev/null
+++ b/tests/scripts/sdk_test_auth.py
@@ -0,0 +1,18 @@
+#!/usr/bin/env python3
+"""Test SDK authentication."""
+import sys
+from pathlib import Path
+
+# Add SDK to path
+sdk_path = Path(__file__).parent.parent.parent / "sdk" / "python"
+sys.path.insert(0, str(sdk_path))
+
+from chronicle_sdk import ChronicleClient
+
+backend_url = sys.argv[1]
+email = sys.argv[2]
+password = sys.argv[3]
+
+client = ChronicleClient(backend_url)
+client.login(email, password)
+print("SUCCESS")
diff --git a/tests/scripts/sdk_test_conversations.py b/tests/scripts/sdk_test_conversations.py
new file mode 100644
index 00000000..51d1d473
--- /dev/null
+++ b/tests/scripts/sdk_test_conversations.py
@@ -0,0 +1,19 @@
+#!/usr/bin/env python3
+"""Test SDK conversation retrieval."""
+import sys
+from pathlib import Path
+
+# Add SDK to path
+sdk_path = Path(__file__).parent.parent.parent / "sdk" / "python"
+sys.path.insert(0, str(sdk_path))
+
+from chronicle_sdk import ChronicleClient
+
+backend_url = sys.argv[1]
+email = sys.argv[2]
+password = sys.argv[3]
+
+client = ChronicleClient(backend_url)
+client.login(email, password)
+conversations = client.get_conversations(limit=1)
+print(f"COUNT:{len(conversations)}")
diff --git a/tests/scripts/sdk_test_upload.py b/tests/scripts/sdk_test_upload.py
new file mode 100644
index 00000000..12a5a7ec
--- /dev/null
+++ b/tests/scripts/sdk_test_upload.py
@@ -0,0 +1,23 @@
+#!/usr/bin/env python3
+"""Test SDK audio upload."""
+import sys
+from pathlib import Path
+
+# Add SDK to path
+sdk_path = Path(__file__).parent.parent.parent / "sdk" / "python"
+sys.path.insert(0, str(sdk_path))
+
+from chronicle_sdk import ChronicleClient
+
+backend_url = sys.argv[1]
+email = sys.argv[2]
+password = sys.argv[3]
+audio_file = sys.argv[4]
+
+client = ChronicleClient(backend_url, timeout=60)
+client.login(email, password)
+result = client.upload_audio(audio_file)
+
+print(f"STATUS:{result.files[0].status}")
+if result.files[0].conversation_id:
+    print(f"CONVERSATION_ID:{result.files[0].conversation_id}")
diff --git a/tests/setup-test-containers.sh b/tests/setup-test-containers.sh
new file mode 100755
index 00000000..8ff3ff45
--- /dev/null
+++ b/tests/setup-test-containers.sh
@@ -0,0 +1,82 @@
+#!/bin/bash
+# Test Container Startup Script
+# Smart startup - checks if already running, handles port conflicts automatically
+
+set -e
+
+# Colors
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+BLUE='\033[0;34m'
+YELLOW='\033[1;33m'
+NC='\033[0m'
+
+print_info() { echo -e "${BLUE}[INFO]${NC} $1"; }
+print_success() { echo -e "${GREEN}[SUCCESS]${NC} $1"; }
+print_warning() { echo -e "${YELLOW}[WARNING]${NC} $1"; }
+print_error() { echo -e "${RED}[ERROR]${NC} $1"; }
+
+# Navigate to backend directory
+SCRIPT_DIR="$(dirname "$0")"
+cd "$SCRIPT_DIR/../backends/advanced" || exit 1
+
+# Load environment variables for tests
+if [ -f "$SCRIPT_DIR/setup/.env.test" ]; then
+    print_info "Loading test environment..."
+    set -a
+    source "$SCRIPT_DIR/setup/.env.test"
+    set +a
+fi
+
+# Load HF_TOKEN from speaker-recognition service if available
+SPEAKER_ENV="$SCRIPT_DIR/../extras/speaker-recognition/.env"
+if [ -f "$SPEAKER_ENV" ] && [ -z "$HF_TOKEN" ]; then
+    print_info "Loading HF_TOKEN from speaker-recognition..."
+    set -a
+    source "$SPEAKER_ENV"
+    set +a
+fi
+
+# Configuration
+FRESH_BUILD="${FRESH_BUILD:-false}"  # Set to true for clean rebuild with volume removal
+
+# Check if containers are already running and healthy
+if [ "$FRESH_BUILD" = "false" ]; then
+    if curl -s http://localhost:8001/health > /dev/null 2>&1; then
+        print_success "Test containers already running and healthy"
+        print_info "Backend: http://localhost:8001"
+        print_info "To force rebuild: FRESH_BUILD=true ./setup-test-containers.sh"
+        exit 0
+    fi
+fi
+
+# Clean up any existing test containers to avoid port conflicts
+print_info "Cleaning up any existing test containers..."
+docker compose -f docker-compose-test.yml down 2>/dev/null || true
+
+# Remove any stale "Created" containers that might be holding ports
+docker ps -a --filter "name=backend-test" --filter "status=created" --format "{{.Names}}" | xargs -r docker rm -f 2>/dev/null || true
+
+# Fresh build - remove everything and rebuild
+if [ "$FRESH_BUILD" = "true" ]; then
+    print_info "Fresh build requested - removing volumes and rebuilding images..."
+    docker compose -f docker-compose-test.yml down -v 2>/dev/null || true
+
+    # Start with build flag
+    print_info "Building and starting test containers..."
+    docker compose -f docker-compose-test.yml up -d --build --wait
+
+    print_success "Fresh build complete!"
+else
+    # Normal startup
+    print_info "Starting test containers..."
+    docker compose -f docker-compose-test.yml up -d --wait
+
+    print_success "Containers started!"
+fi
+
+print_success "All services ready!"
+print_info "Backend: http://localhost:8001"
+print_info "MongoDB: localhost:27018"
+print_info "Redis: localhost:6380"
+print_info "Qdrant: localhost:6337"
diff --git a/tests/setup/setup_keywords.robot b/tests/setup/setup_keywords.robot
index 656a082d..b0cb87b7 100644
--- a/tests/setup/setup_keywords.robot
+++ b/tests/setup/setup_keywords.robot
@@ -104,15 +104,26 @@ Start Docker Services
         RETURN
     END
 
+    # Get HF_TOKEN from environment if available
+    ${hf_token}=    Get Environment Variable    HF_TOKEN    default=${EMPTY}
+
     # Clean up any stopped/stuck containers first
     Run Process    docker    compose    -f    ${compose_file}    down    -v    cwd=${working_dir}    shell=True
     Run Process    docker    rm    -f    ${MONGO_CONTAINER}    ${REDIS_CONTAINER}    ${QDRANT_CONTAINER}    ${BACKEND_CONTAINER}    ${WORKERS_CONTAINER}    ${WEBUI_CONTAINER}    shell=True
 
-    # Start containers
+    # Start containers with HF_TOKEN passed through if available
     IF    ${build}
-        Run Process    docker    compose    -f    ${compose_file}    up    -d    --build    cwd=${working_dir}    shell=True
+        IF    '${hf_token}' != '${EMPTY}'
+            Run Process    docker    compose    -f    ${compose_file}    up    -d    --build    cwd=${working_dir}    shell=True    env:HF_TOKEN=${hf_token}
+        ELSE
+            Run Process    docker    compose    -f    ${compose_file}    up    -d    --build    cwd=${working_dir}    shell=True
+        END
     ELSE
-        Run Process    docker    compose    -f    ${compose_file}    up    -d    cwd=${working_dir}    shell=True
+        IF    '${hf_token}' != '${EMPTY}'
+            Run Process    docker    compose    -f    ${compose_file}    up    -d    cwd=${working_dir}    shell=True    env:HF_TOKEN=${hf_token}
+        ELSE
+            Run Process    docker    compose    -f    ${compose_file}    up    -d    cwd=${working_dir}    shell=True
+        END
     END
 
     Log    Waiting for services to be ready...
@@ -132,8 +143,15 @@ Rebuild Docker Services
     [Documentation]    Rebuild and restart Docker services
     [Arguments]    ${compose_file}=docker-compose-test.yml    ${working_dir}=${BACKEND_DIR}
 
+    # Get HF_TOKEN from environment if available
+    ${hf_token}=    Get Environment Variable    HF_TOKEN    default=${EMPTY}
+
     Log To Console    Rebuilding containers with latest code...
-    Run Process    docker    compose    -f    ${compose_file}    up    -d    --build    cwd=${working_dir}    shell=True
+    IF    '${hf_token}' != '${EMPTY}'
+        Run Process    docker    compose    -f    ${compose_file}    up    -d    --build    cwd=${working_dir}    shell=True    env:HF_TOKEN=${hf_token}
+    ELSE
+        Run Process    docker    compose    -f    ${compose_file}    up    -d    --build    cwd=${working_dir}    shell=True
+    END
 
     Log To Console    Waiting for services to be ready...
     Wait Until Keyword Succeeds    60s    5s    Check Services Ready    ${API_URL}
@@ -154,8 +172,15 @@ Start Speaker Recognition Service
         RETURN
     END
 
+    # Get HF_TOKEN from environment if available
+    ${hf_token}=    Get Environment Variable    HF_TOKEN    default=${EMPTY}
+
     Log    Starting speaker-recognition-service
-    Run Process    docker    compose    -f    docker-compose-test.yml    up    -d    --build    cwd=${SPEAKER_RECOGNITION_DIR}    shell=True
+    IF    '${hf_token}' != '${EMPTY}'
+        Run Process    docker    compose    -f    docker-compose-test.yml    up    -d    --build    cwd=${SPEAKER_RECOGNITION_DIR}    shell=True    env:HF_TOKEN=${hf_token}
+    ELSE
+        Run Process    docker    compose    -f    docker-compose-test.yml    up    -d    --build    cwd=${SPEAKER_RECOGNITION_DIR}    shell=True
+    END
 
     Log    Waiting for speaker recognition service to start...
     Wait Until Keyword Succeeds    60s    5s    Check Services Ready    ${SPEAKER_RECOGNITION_URL}
diff --git a/tests/setup/test_data.py b/tests/setup/test_data.py
index 6d73b265..e1821796 100644
--- a/tests/setup/test_data.py
+++ b/tests/setup/test_data.py
@@ -51,7 +51,7 @@
 ]
 
 # Expected segment timestamps for DIY Glass Blowing audio (4-minute version, 500 chunks)
-# These are the cropped audio timestamps after silence removal
+# These are the audio segment timestamps with speech detection
 # Updated 2025-01-22 based on actual test output with streaming websocket processing
 EXPECTED_SEGMENT_TIMES = [
     {"start": 0.0, "end": 10.08},
diff --git a/tests/setup/test_env.py b/tests/setup/test_env.py
index 94956a14..924d0592 100644
--- a/tests/setup/test_env.py
+++ b/tests/setup/test_env.py
@@ -11,6 +11,7 @@
 # Find repository root (tests/setup/test_env.py -> go up 2 levels)
 REPO_ROOT = Path(__file__).parent.parent.parent
 backend_dir = REPO_ROOT / "backends" / "advanced"
+tests_dir = REPO_ROOT / "tests"
 
 # Export absolute paths for Robot Framework keywords
 BACKEND_DIR = str(backend_dir.absolute())
@@ -19,7 +20,9 @@
 
 # Load in reverse order of precedence (since override=False won't overwrite existing vars)
 # Load .env.test first (will set test-specific values)
-load_dotenv(backend_dir / ".env.test", override=False)
+# Try tests/setup/.env.test first, then fall back to tests/.env.test
+load_dotenv(Path(__file__).parent / ".env.test", override=False)
+load_dotenv(tests_dir / ".env.test", override=False)
 
 # Load .env second (will only fill in missing values, won't override .env.test or existing env vars)
 load_dotenv(backend_dir / ".env", override=False)
@@ -78,12 +81,10 @@
     "default_timeout": 30
 }
 
-# Docker Container Names (dynamically based on COMPOSE_PROJECT_NAME)
-# Default to 'advanced' if not set (which is the directory name)
-COMPOSE_PROJECT_NAME = os.getenv('COMPOSE_PROJECT_NAME', 'advanced')
-BACKEND_CONTAINER = f"{COMPOSE_PROJECT_NAME}-chronicle-backend-test-1"
-WORKERS_CONTAINER = f"{COMPOSE_PROJECT_NAME}-workers-test-1"
-MONGO_CONTAINER = f"{COMPOSE_PROJECT_NAME}-mongo-test-1"
-REDIS_CONTAINER = f"{COMPOSE_PROJECT_NAME}-redis-test-1"
-QDRANT_CONTAINER = f"{COMPOSE_PROJECT_NAME}-qdrant-test-1"
-WEBUI_CONTAINER = f"{COMPOSE_PROJECT_NAME}-webui-test-1"
+# Docker Container Names (based on docker-compose-test.yml project name: backend-test)
+BACKEND_CONTAINER = "backend-test-chronicle-backend-test-1"
+WORKERS_CONTAINER = "backend-test-workers-test-1"
+MONGO_CONTAINER = "backend-test-mongo-test-1"
+REDIS_CONTAINER = "backend-test-redis-test-1"
+QDRANT_CONTAINER = "backend-test-qdrant-test-1"
+WEBUI_CONTAINER = "backend-test-webui-test-1"
diff --git a/tests/setup/test_manager_keywords.robot b/tests/setup/test_manager_keywords.robot
index 8927994a..da415783 100644
--- a/tests/setup/test_manager_keywords.robot
+++ b/tests/setup/test_manager_keywords.robot
@@ -39,6 +39,14 @@ Clear Test Databases
     # Clear conversations except those tagged as fixtures
     Run Process    docker exec ${MONGO_CONTAINER} mongosh test_db --eval "db.conversations.deleteMany({\\$or: [{'is_fixture': {\\$exists: false}}, {'is_fixture': false}]})"    shell=True
 
+    # Delete old fixture conversations that don't have audio chunks (from pre-MongoDB-migration)
+    ${delete_fixtures_script}=    Set Variable    const fixturesWithoutChunks = db.conversations.find({'is_fixture': true}).toArray().filter(c => db.audio_chunks.countDocuments({conversation_id: c.conversation_id}) === 0).map(c => c.conversation_id); if (fixturesWithoutChunks.length > 0) { db.conversations.deleteMany({conversation_id: {$in: fixturesWithoutChunks}}); print('Deleted ' + fixturesWithoutChunks.length + ' old fixture(s)'); }
+    Run Process    docker    exec    ${MONGO_CONTAINER}    mongosh    test_db    --eval    ${delete_fixtures_script}    shell=True
+
+    # Clear audio chunks except those belonging to remaining fixture conversations
+    ${clear_chunks_script}=    Set Variable    const fixtureIds = db.conversations.find({'is_fixture': true}, {conversation_id: 1}).map(c => c.conversation_id); db.audio_chunks.deleteMany({conversation_id: {$nin: fixtureIds}})
+    Run Process    docker    exec    ${MONGO_CONTAINER}    mongosh    test_db    --eval    ${clear_chunks_script}    shell=True
+
     # Clear job references from remaining conversations to prevent "No such job" errors
     Run Process    docker exec ${MONGO_CONTAINER} mongosh test_db --eval "db.conversations.updateMany({}, {\\$unset: {'transcription_job_id': '', 'speaker_job_id': '', 'memory_job_id': ''}})"    shell=True
 
@@ -63,16 +71,18 @@ Clear Test Databases
 
     # Clear audio files (except fixtures subfolder)
     Run Process    bash    -c    find ${BACKEND_DIR}/data/test_audio_chunks -maxdepth 1 -name "*.wav" -delete || true    shell=True
-    Run Process    bash    -c    rm -rf ${BACKEND_DIR}/data/test_debug_dir/* || true    shell=True
+    # Don't delete plugin database - just clear its contents later via Clear Plugin Events keyword
+    # Run Process    bash    -c    rm -rf ${BACKEND_DIR}/data/test_debug_dir/* || true    shell=True
     Log To Console    Audio files cleared (fixtures/ subfolder preserved)
 
     # Clear container audio files (except fixtures subfolder)
     Run Process    bash    -c    docker exec ${BACKEND_CONTAINER} find /app/audio_chunks -maxdepth 1 -name "*.wav" -delete || true    shell=True
-    Run Process    bash    -c    docker exec ${BACKEND_CONTAINER} find /app/debug_dir -name "*" -type f -delete || true    shell=True
+    # Don't delete plugin database files - database is cleared via Clear Plugin Events keyword
+    # Run Process    bash    -c    docker exec ${BACKEND_CONTAINER} find /app/debug_dir -name "*" -type f -delete || true    shell=True
 
-    # Clear Redis queues and job registries (preserve worker registrations, failed and completed jobs)
-    # Delete all rq:* keys except worker registrations (rq:worker:*), failed jobs (rq:failed:*), and completed jobs (rq:finished:*)
-    ${redis_clear_script}=    Set Variable    redis-cli --scan --pattern "rq:*" | grep -Ev "^rq:(worker|failed|finished)" | xargs -r redis-cli DEL; redis-cli --scan --pattern "audio:*" | xargs -r redis-cli DEL; redis-cli --scan --pattern "consumer:*" | xargs -r redis-cli DEL
+    # Clear Redis queues and job registries (preserve worker registrations, failed and finished jobs)
+    # Delete all rq:* keys except worker registrations (rq:worker:*), failed jobs (rq:failed:*), and finished jobs (rq:finished:*)
+    ${redis_clear_script}=    Set Variable    redis-cli --scan --pattern "rq:*" | grep -Ev "^rq:(worker|failed|finished)" | xargs -r redis-cli DEL; redis-cli --scan --pattern "audio:*" | xargs -r redis-cli DEL; redis-cli --scan --pattern "consumer:*" | xargs -r redis-cli DEL; redis-cli --scan --pattern "transcription:*" | xargs -r redis-cli DEL
     Run Process    docker    exec    ${REDIS_CONTAINER}    sh    -c    ${redis_clear_script}    shell=True
     Log To Console    Redis queues and job registries cleared (worker registrations preserved)
 
@@ -83,6 +93,7 @@ Clear All Test Data
     # Wipe all MongoDB collections
     Run Process    docker exec ${MONGO_CONTAINER} mongosh test_db --eval "db.users.deleteMany({})"    shell=True
     Run Process    docker exec ${MONGO_CONTAINER} mongosh test_db --eval "db.conversations.deleteMany({})"    shell=True
+    Run Process    docker exec ${MONGO_CONTAINER} mongosh test_db --eval "db.audio_chunks.deleteMany({})"    shell=True
     Log To Console    MongoDB completely cleared
 
     # Clear Qdrant
@@ -111,6 +122,30 @@ Create Fixture Conversation
     ...                Returns the conversation ID
     [Arguments]    ${device_name}=fixture-device
 
+    # Check if a fixture already exists via API
+    ${conversations}=    Get User Conversations
+    ${fixtures}=    Evaluate    [c for c in $conversations if c.get('is_fixture', False)]
+    ${fixture_count}=    Get Length    ${fixtures}
+
+    # If fixture exists, reuse it
+    IF    ${fixture_count} > 0
+        ${conversation_id}=    Set Variable    ${fixtures}[0][conversation_id]
+        Log To Console    \n✓ Reusing existing fixture conversation: ${conversation_id}
+
+        # Verify it still has transcript (sanity check)
+        Dictionary Should Contain Key    ${fixtures}[0]    transcript
+        ${transcript}=    Set Variable    ${fixtures}[0][transcript]
+        Should Not Be Empty    ${transcript}    Fixture conversation has no transcript
+        ${transcript_len}=    Get Length    ${transcript}
+        Log To Console    ✓ Fixture transcript length: ${transcript_len} chars
+
+        # Set global variable for other tests to use
+        Set Global Variable    ${FIXTURE_CONVERSATION_ID}    ${conversation_id}
+
+        RETURN    ${conversation_id}
+    END
+
+    # No fixture exists, create new one
     Log To Console    \nCreating fixture conversation...
 
     # Upload test audio to fixtures folder
diff --git a/tests/tags.md b/tests/tags.md
index 6ddb6fba..0ee5cb56 100644
--- a/tests/tags.md
+++ b/tests/tags.md
@@ -4,7 +4,7 @@ This document defines the standard tags used across the Chronicle test suite.
 
 ## Simplified Tag Set
 
-Chronicle uses a **minimal, focused tag set** for test organization. Only 11 tags are permitted.
+Chronicle uses a **minimal, focused tag set** for test organization. Only 14 tags are permitted.
 
 ## Tag Format
 
@@ -88,6 +88,29 @@ Chronicle uses a **minimal, focused tag set** for test organization. Only 11 tag
 - Full pipeline testing
 - Cross-service integration
 
+### Special Tags
+
+**`requires-api-keys`** - Tests requiring external API services (cloud providers)
+- Full E2E integration tests with transcription and LLM processing
+- Memory extraction verification tests
+- Transcript similarity verification tests
+- Requires: DEEPGRAM_API_KEY and/or OPENAI_API_KEY environment variables
+- These tests are excluded from PR runs by default (run only on dev/main branches)
+
+**`slow`** - Tests requiring long timeouts (>30s) or infrastructure operations
+- Backend restart tests (service stop/start cycles)
+- Connection resilience tests
+- Heavy integration tests with multiple service restarts
+- Excluded from default `make test` runs for faster feedback
+- Run explicitly with `make test-slow` or `make test-all-with-slow`
+
+**`sdk`** - Tests for unreleased SDK functionality
+- SDK integration tests
+- SDK authentication tests
+- SDK API endpoint tests
+- Excluded from default `make test` runs until SDK is released
+- Run explicitly with `make test-sdk` when developing SDK features
+
 ## Tag Usage Guidelines
 
 ### Single Tag per Test (Preferred)
@@ -132,6 +155,9 @@ Use 2-3 tags only when testing interactions between components:
 8. **Is it about health checks?** → `health`
 9. **Is it end-to-end?** → `e2e`
 10. **Is it infrastructure/config?** → `infra`
+11. **Does it require external API keys?** → Add `requires-api-keys` tag
+12. **Does it take >30s or restart services?** → Add `slow` tag
+13. **Is it for unreleased SDK features?** → Add `sdk` tag
 
 ### Examples
 
@@ -163,7 +189,7 @@ Use 2-3 tags only when testing interactions between components:
 
 ## Prohibited Tags
 
-**DO NOT create or use any tags other than the 11 approved tags above.**
+**DO NOT create or use any tags other than the 14 approved tags above.**
 
 Commonly misused tags that should NOT be used:
 - ❌ `positive`, `negative` - Test outcome is in the results, not tags
@@ -226,10 +252,13 @@ Current distribution (approximate):
 - `health`: 9 tests
 - `audio-streaming`: 4 tests
 - `audio-upload`: 3 tests
+- `slow`: 2 tests (backend restart tests)
+- `sdk`: 2 tests (SDK integration tests)
+- `requires-api-keys`: 1 test (integration_test.robot)
 - `audio-batch`: 0 tests (reserved for future use)
 
 ---
 
-**Last Updated:** 2025-01-23
-**Total Approved Tags:** 11
+**Last Updated:** 2026-01-18
+**Total Approved Tags:** 14
 **Enforcement:** Mandatory - no exceptions
diff --git a/tests/teardown-test-containers.sh b/tests/teardown-test-containers.sh
new file mode 100755
index 00000000..9a86e7f7
--- /dev/null
+++ b/tests/teardown-test-containers.sh
@@ -0,0 +1,38 @@
+#!/bin/bash
+# Test Container Teardown Script
+# Simplified - just uses docker compose down
+
+set -e
+
+# Colors
+RED='\033[0;31m'
+GREEN='\033[0;32m'
+BLUE='\033[0;34m'
+YELLOW='\033[1;33m'
+NC='\033[0m'
+
+print_info() { echo -e "${BLUE}[INFO]${NC} $1"; }
+print_success() { echo -e "${GREEN}[SUCCESS]${NC} $1"; }
+print_warning() { echo -e "${YELLOW}[WARNING]${NC} $1"; }
+
+# Navigate to backend directory
+SCRIPT_DIR="$(dirname "$0")"
+cd "$SCRIPT_DIR/../backends/advanced" || exit 1
+
+# Load .env.test if available for other environment variables
+if [ -f "$SCRIPT_DIR/setup/.env.test" ]; then
+    set -a
+    source "$SCRIPT_DIR/setup/.env.test"
+    set +a
+fi
+
+if [ "${REMOVE_VOLUMES:-false}" = "true" ]; then
+    print_info "Stopping containers and removing volumes..."
+    docker compose -f docker-compose-test.yml down -v
+    print_success "Containers and volumes removed"
+else
+    print_info "Stopping containers (keeping volumes)..."
+    docker compose -f docker-compose-test.yml down
+    print_success "Containers stopped (volumes preserved)"
+    print_warning "To remove volumes: REMOVE_VOLUMES=true ./teardown-test-containers.sh"
+fi
diff --git a/tests/test-requirements.txt b/tests/test-requirements.txt
index 4efaf39b..f32614e0 100644
--- a/tests/test-requirements.txt
+++ b/tests/test-requirements.txt
@@ -2,6 +2,8 @@ robotframework
 robotframework-tidy
 robotframework-requests
 robotframework-browser
+robotframework-databaselibrary
 python-dotenv
 websockets
+pymongo
  
\ No newline at end of file
diff --git a/tests/unit/test_config_loading.py b/tests/unit/test_config_loading.py
new file mode 100644
index 00000000..28477462
--- /dev/null
+++ b/tests/unit/test_config_loading.py
@@ -0,0 +1,199 @@
+"""Test configuration loading and merging.
+
+Tests for the configuration system that merges defaults.yml with config.yml
+and provides proper caching and reload mechanisms.
+"""
+
+import pytest
+from pathlib import Path
+from advanced_omi_backend.config import get_config, merge_configs, reload_config
+
+
+def test_merge_configs_basic():
+    """Test basic config merging."""
+    defaults = {"a": 1, "b": 2}
+    overrides = {"b": 3, "c": 4}
+
+    result = merge_configs(defaults, overrides)
+
+    assert result["a"] == 1  # From defaults
+    assert result["b"] == 3  # Override
+    assert result["c"] == 4  # New key
+
+
+def test_merge_configs_nested():
+    """Test nested dictionary merging."""
+    defaults = {
+        "memory": {
+            "provider": "chronicle",
+            "timeout": 120
+        }
+    }
+    overrides = {
+        "memory": {
+            "provider": "openmemory_mcp"
+        }
+    }
+
+    result = merge_configs(defaults, overrides)
+
+    assert result["memory"]["provider"] == "openmemory_mcp"  # Override
+    assert result["memory"]["timeout"] == 120  # Preserved from defaults
+
+
+def test_merge_configs_deep_nested():
+    """Test deeply nested dictionary merging."""
+    defaults = {
+        "models": {
+            "llm": {
+                "openai": {
+                    "model": "gpt-4o-mini",
+                    "temperature": 0.2,
+                    "max_tokens": 2000
+                }
+            }
+        }
+    }
+    overrides = {
+        "models": {
+            "llm": {
+                "openai": {
+                    "temperature": 0.5
+                }
+            }
+        }
+    }
+
+    result = merge_configs(defaults, overrides)
+
+    assert result["models"]["llm"]["openai"]["model"] == "gpt-4o-mini"  # Preserved
+    assert result["models"]["llm"]["openai"]["temperature"] == 0.5  # Override
+    assert result["models"]["llm"]["openai"]["max_tokens"] == 2000  # Preserved
+
+
+def test_merge_configs_list_replacement():
+    """Test that lists are replaced, not merged."""
+    defaults = {"items": [1, 2, 3]}
+    overrides = {"items": [4, 5]}
+
+    result = merge_configs(defaults, overrides)
+
+    assert result["items"] == [4, 5]  # List replaced entirely
+
+
+def test_merge_configs_empty_override():
+    """Test merging with empty override dictionary."""
+    defaults = {"a": 1, "b": 2}
+    overrides = {}
+
+    result = merge_configs(defaults, overrides)
+
+    assert result["a"] == 1
+    assert result["b"] == 2
+
+
+def test_merge_configs_empty_defaults():
+    """Test merging with empty defaults dictionary."""
+    defaults = {}
+    overrides = {"a": 1, "b": 2}
+
+    result = merge_configs(defaults, overrides)
+
+    assert result["a"] == 1
+    assert result["b"] == 2
+
+
+def test_get_config_structure():
+    """Test that get_config returns expected structure."""
+    config = get_config()
+
+    # Should have main sections
+    assert isinstance(config, dict)
+    assert "defaults" in config or "models" in config  # At least one of these should exist
+
+
+def test_get_config_caching():
+    """Test config caching mechanism."""
+    config1 = get_config()
+    config2 = get_config()
+
+    # Should return cached instance (same object)
+    assert config1 is config2
+
+
+def test_reload_config():
+    """Test config reload invalidates cache."""
+    config1 = get_config()
+    config2 = reload_config()
+
+    # Should be different instances after reload
+    # (Note: Content might be the same, but object should be different)
+    # We check that reload returns a config object
+    assert isinstance(config2, dict)
+
+
+def test_merge_configs_none_handling():
+    """Test handling of None values in merging."""
+    defaults = {"a": 1, "b": None}
+    overrides = {"b": 2, "c": None}
+
+    result = merge_configs(defaults, overrides)
+
+    assert result["a"] == 1
+    assert result["b"] == 2  # Override None with value
+    assert result["c"] is None  # New key with None
+
+
+def test_merge_configs_complex_scenario():
+    """Test complex real-world scenario with mixed types."""
+    defaults = {
+        "defaults": {
+            "llm": "openai-llm",
+            "stt": "stt-deepgram"
+        },
+        "models": [
+            {"name": "model1", "type": "llm"},
+            {"name": "model2", "type": "embedding"}
+        ],
+        "memory": {
+            "provider": "chronicle",
+            "timeout_seconds": 1200,
+            "extraction": {
+                "enabled": True,
+                "prompt": "Default prompt"
+            }
+        }
+    }
+    overrides = {
+        "defaults": {
+            "llm": "local-llm"
+        },
+        "models": [
+            {"name": "model3", "type": "llm"}
+        ],
+        "memory": {
+            "extraction": {
+                "prompt": "Custom prompt"
+            }
+        }
+    }
+
+    result = merge_configs(defaults, overrides)
+
+    # Defaults section merged
+    assert result["defaults"]["llm"] == "local-llm"  # Override
+    assert result["defaults"]["stt"] == "stt-deepgram"  # Preserved
+
+    # Models list replaced
+    assert len(result["models"]) == 1
+    assert result["models"][0]["name"] == "model3"
+
+    # Memory section deeply merged
+    assert result["memory"]["provider"] == "chronicle"  # Preserved
+    assert result["memory"]["timeout_seconds"] == 1200  # Preserved
+    assert result["memory"]["extraction"]["enabled"] is True  # Preserved
+    assert result["memory"]["extraction"]["prompt"] == "Custom prompt"  # Override
+
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"])
diff --git a/wizard.py b/wizard.py
index 31a436cd..fc69557e 100755
--- a/wizard.py
+++ b/wizard.py
@@ -9,49 +9,23 @@
 import sys
 from datetime import datetime
 from pathlib import Path
-import yaml
 
-from dotenv import get_key
+import yaml
 from rich import print as rprint
 from rich.console import Console
 from rich.prompt import Confirm, Prompt
 
-console = Console()
-
-def read_env_value(env_file_path, key):
-    """Read a value from an .env file using python-dotenv"""
-    env_path = Path(env_file_path)
-    if not env_path.exists():
-        return None
-
-    value = get_key(str(env_path), key)
-    # get_key returns None if key doesn't exist or value is empty
-    return value if value else None
+# Import shared setup utilities
+from setup_utils import (
+    prompt_password,
+    prompt_value,
+    prompt_with_existing_masked,
+    read_env_value,
+    mask_value,
+    is_placeholder
+)
 
-def is_placeholder(value, *placeholder_variants):
-    """
-    Check if a value is a placeholder by normalizing both the value and placeholders.
-    Treats 'your-key-here' and 'your_key_here' as equivalent.
-
-    Args:
-        value: The value to check
-        placeholder_variants: One or more placeholder strings to check against
-
-    Returns:
-        True if value matches any placeholder variant (after normalization)
-    """
-    if not value:
-        return True
-
-    # Normalize by replacing hyphens with underscores
-    normalized_value = value.replace('-', '_').lower()
-
-    for placeholder in placeholder_variants:
-        normalized_placeholder = placeholder.replace('-', '_').lower()
-        if normalized_value == normalized_placeholder:
-            return True
-
-    return False
+console = Console()
 
 SERVICES = {
     'backend': {
@@ -81,6 +55,44 @@ def is_placeholder(value, *placeholder_variants):
     }
 }
 
+def discover_available_plugins():
+    """
+    Discover plugins by scanning plugins directory.
+
+    Returns:
+        Dictionary mapping plugin_id to plugin metadata:
+        {
+            'plugin_id': {
+                'has_setup': bool,
+                'setup_path': Path or None,
+                'dir': Path
+            }
+        }
+    """
+    plugins_dir = Path("backends/advanced/src/advanced_omi_backend/plugins")
+
+    if not plugins_dir.exists():
+        console.print(f"[yellow]Warning: Plugins directory not found: {plugins_dir}[/yellow]")
+        return {}
+
+    discovered = {}
+    skip_dirs = {'__pycache__', '__init__.py', 'base.py', 'router.py'}
+
+    for plugin_dir in plugins_dir.iterdir():
+        if not plugin_dir.is_dir() or plugin_dir.name in skip_dirs:
+            continue
+
+        plugin_id = plugin_dir.name
+        setup_script = plugin_dir / "setup.py"
+
+        discovered[plugin_id] = {
+            'has_setup': setup_script.exists(),
+            'setup_path': setup_script if setup_script.exists() else None,
+            'dir': plugin_dir
+        }
+
+    return discovered
+
 def check_service_exists(service_name, service_config):
     """Check if service directory and script exist"""
     service_path = Path(service_config['path'])
@@ -153,7 +165,8 @@ def cleanup_unselected_services(selected_services):
                 console.print(f"🧹 [dim]Backed up {service_name} configuration to {backup_file.name} (service not selected)[/dim]")
 
 def run_service_setup(service_name, selected_services, https_enabled=False, server_ip=None,
-                     obsidian_enabled=False, neo4j_password=None, transcription_provider='deepgram'):
+                     obsidian_enabled=False, neo4j_password=None, hf_token=None,
+                     transcription_provider='deepgram'):
     """Execute individual service setup script"""
     if service_name == 'advanced':
         service = SERVICES['backend'][service_name]
@@ -184,35 +197,18 @@ def run_service_setup(service_name, selected_services, https_enabled=False, serv
         # Add HTTPS configuration for services that support it
         if service_name == 'speaker-recognition' and https_enabled and server_ip:
             cmd.extend(['--enable-https', '--server-ip', server_ip])
-        
-        # For speaker-recognition, validate HF_TOKEN is required
+
+        # For speaker-recognition, pass HF_TOKEN from centralized configuration
         if service_name == 'speaker-recognition':
-            # HF_TOKEN is required for speaker-recognition
+            # Define the speaker env path
             speaker_env_path = 'extras/speaker-recognition/.env'
-            hf_token = read_env_value(speaker_env_path, 'HF_TOKEN')
-            
-            # Check if HF_TOKEN is missing or is a placeholder
-            if not hf_token or is_placeholder(hf_token, 'your_huggingface_token_here', 'your-huggingface-token-here', 'hf_xxxxx'):
-                console.print("\n[red][ERROR][/red] HF_TOKEN is required for speaker-recognition service")
-                console.print("[yellow]Speaker recognition requires a Hugging Face token to download models[/yellow]")
-                console.print("Get your token from: https://huggingface.co/settings/tokens")
-                console.print()
-                
-                # Prompt for HF_TOKEN
-                try:
-                    hf_token_input = console.input("[cyan]Enter your HF_TOKEN[/cyan]: ").strip()
-                    if not hf_token_input or is_placeholder(hf_token_input, 'your_huggingface_token_here', 'your-huggingface-token-here', 'hf_xxxxx'):
-                        console.print("[red][ERROR][/red] Invalid HF_TOKEN provided. Speaker-recognition setup cancelled.")
-                        return False
-                    hf_token = hf_token_input
-                except EOFError:
-                    console.print("[red][ERROR][/red] HF_TOKEN is required. Speaker-recognition setup cancelled.")
-                    return False
-            
-            # Pass HF Token to init script
-            cmd.extend(['--hf-token', hf_token])
-            console.print("[green][SUCCESS][/green] HF_TOKEN configured")
-            
+
+            # HF Token should have been provided via setup_hf_token_if_needed()
+            if hf_token:
+                cmd.extend(['--hf-token', hf_token])
+            else:
+                console.print("[yellow][WARNING][/yellow] No HF_TOKEN provided - speaker recognition may fail to download models")
+
             # Pass Deepgram API key from backend if available
             backend_env_path = 'backends/advanced/.env'
             deepgram_key = read_env_value(backend_env_path, 'DEEPGRAM_API_KEY')
@@ -289,6 +285,80 @@ def show_service_status():
         status = "✅" if exists else "⏸️"
         console.print(f"  {status} {service_config['description']} - {msg}")
 
+def run_plugin_setup(plugin_id, plugin_info):
+    """Run a plugin's setup.py script"""
+    setup_path = plugin_info['setup_path']
+
+    try:
+        # Run plugin setup script interactively (don't capture output)
+        # This allows the plugin to prompt for user input
+        result = subprocess.run(
+            ['uv', 'run', '--with-requirements', 'setup-requirements.txt', 'python', str(setup_path)],
+            cwd=str(Path.cwd())
+        )
+
+        if result.returncode == 0:
+            console.print(f"\n[green]✅ {plugin_id} configured successfully[/green]")
+            return True
+        else:
+            console.print(f"\n[red]❌ {plugin_id} setup failed with exit code {result.returncode}[/red]")
+            return False
+
+    except Exception as e:
+        console.print(f"[red]❌ Error running {plugin_id} setup: {e}[/red]")
+        return False
+
+def setup_plugins():
+    """Discover and setup plugins via delegation"""
+    console.print("\n🔌 [bold cyan]Plugin Configuration[/bold cyan]")
+    console.print("Chronicle supports community plugins for extended functionality.\n")
+
+    # Discover available plugins
+    available_plugins = discover_available_plugins()
+
+    if not available_plugins:
+        console.print("[dim]No plugins found[/dim]")
+        return
+
+    # Ask about enabling community plugins
+    try:
+        enable_plugins = Confirm.ask(
+            "Enable community plugins?",
+            default=True
+        )
+    except EOFError:
+        console.print("Using default: Yes")
+        enable_plugins = True
+
+    if not enable_plugins:
+        console.print("[dim]Skipping plugin configuration[/dim]")
+        return
+
+    # For each plugin with setup script
+    configured_count = 0
+    for plugin_id, plugin_info in available_plugins.items():
+        if not plugin_info['has_setup']:
+            console.print(f"[dim]  {plugin_id}: No setup wizard available (configure manually)[/dim]")
+            continue
+
+        # Ask if user wants to configure this plugin
+        try:
+            configure = Confirm.ask(
+                f"  Configure {plugin_id} plugin?",
+                default=False
+            )
+        except EOFError:
+            configure = False
+
+        if configure:
+            # Delegate to plugin's setup script
+            console.print(f"\n[cyan]Running {plugin_id} setup wizard...[/cyan]")
+            success = run_plugin_setup(plugin_id, plugin_info)
+            if success:
+                configured_count += 1
+
+    console.print(f"\n[green]✅ Configured {configured_count} plugin(s)[/green]")
+
 def setup_git_hooks():
     """Setup pre-commit hooks for development"""
     console.print("\n🔧 [bold]Setting up development environment...[/bold]")
@@ -319,6 +389,46 @@ def setup_git_hooks():
     except Exception as e:
         console.print(f"⚠️  [yellow]Could not setup git hooks: {e} (optional)[/yellow]")
 
+def setup_hf_token_if_needed(selected_services):
+    """Prompt for Hugging Face token if needed by selected services.
+
+    Args:
+        selected_services: List of service names selected by user
+
+    Returns:
+        HF_TOKEN string if provided, None otherwise
+    """
+    # Check if any selected services need HF_TOKEN
+    needs_hf_token = 'speaker-recognition' in selected_services or 'advanced' in selected_services
+
+    if not needs_hf_token:
+        return None
+
+    console.print("\n🤗 [bold cyan]Hugging Face Token Configuration[/bold cyan]")
+    console.print("Required for speaker recognition (PyAnnote models)")
+    console.print("\n[blue][INFO][/blue] Get yours from: https://huggingface.co/settings/tokens\n")
+
+    # Check for existing token from speaker-recognition service
+    speaker_env_path = 'extras/speaker-recognition/.env'
+    existing_token = read_env_value(speaker_env_path, 'HF_TOKEN')
+
+    # Use the masked prompt function
+    hf_token = prompt_with_existing_masked(
+        prompt_text="Hugging Face Token",
+        existing_value=existing_token,
+        placeholders=['your_huggingface_token_here', 'your-huggingface-token-here', 'hf_xxxxx'],
+        is_password=True,
+        default=""
+    )
+
+    if hf_token:
+        masked = mask_value(hf_token)
+        console.print(f"[green]✅ HF_TOKEN configured: {masked}[/green]\n")
+        return hf_token
+    else:
+        console.print("[yellow]⚠️  No HF_TOKEN provided - speaker recognition may fail[/yellow]\n")
+        return None
+
 def setup_config_file():
     """Setup config/config.yml from template if it doesn't exist"""
     config_file = Path("config/config.yml")
@@ -393,7 +503,10 @@ def main():
     if not selected_services:
         console.print("\n[yellow]No services selected. Exiting.[/yellow]")
         return
-    
+
+    # HF Token Configuration (if services require it)
+    hf_token = setup_hf_token_if_needed(selected_services)
+
     # HTTPS Configuration (for services that need it)
     https_enabled = False
     server_ip = None
@@ -417,27 +530,18 @@ def main():
             console.print("[blue][INFO][/blue] For local-only access, use 'localhost'")
             console.print("Examples: localhost, 100.64.1.2, your-domain.com")
 
-            # Check for existing SERVER_IP
+            # Check for existing SERVER_IP from backend .env
             backend_env_path = 'backends/advanced/.env'
             existing_ip = read_env_value(backend_env_path, 'SERVER_IP')
 
-            if existing_ip and existing_ip not in ['localhost', 'your-server-ip-here']:
-                # Show existing IP with option to reuse
-                prompt_text = f"Server IP/Domain for SSL certificates ({existing_ip}) [press Enter to reuse, or enter new]"
-                default_value = existing_ip
-            else:
-                prompt_text = "Server IP/Domain for SSL certificates [localhost]"
-                default_value = "localhost"
-
-            while True:
-                try:
-                    server_ip = console.input(f"{prompt_text}: ").strip()
-                    if not server_ip:
-                        server_ip = default_value
-                    break
-                except EOFError:
-                    server_ip = default_value
-                    break
+            # Use the new masked prompt function
+            server_ip = prompt_with_existing_masked(
+                prompt_text="Server IP/Domain for SSL certificates",
+                existing_value=existing_ip,
+                placeholders=['localhost', 'your-server-ip-here'],
+                is_password=False,
+                default="localhost"
+            )
 
             console.print(f"[green]✅[/green] HTTPS configured for: {server_ip}")
 
@@ -488,11 +592,16 @@ def main():
 
     for service in selected_services:
         if run_service_setup(service, selected_services, https_enabled, server_ip,
-                            obsidian_enabled, neo4j_password, transcription_provider):
+                            obsidian_enabled, neo4j_password, hf_token, transcription_provider):
             success_count += 1
         else:
             failed_services.append(service)
 
+    # Plugin Configuration (AFTER backend .env is created)
+    # This ensures plugins can add their secrets to the existing .env file
+    # without the backend init overwriting them
+    setup_plugins()
+
     # Check for Obsidian/Neo4j configuration (read from config.yml)
     obsidian_enabled = False
     if 'advanced' in selected_services and 'advanced' not in failed_services: