From 790ba693c4b87ffa92929e8929112d06fbfe9fcd Mon Sep 17 00:00:00 2001 From: Ankush Malaker <43288948+AnkushMalaker@users.noreply.github.com> Date: Sat, 20 Dec 2025 03:25:10 +0000 Subject: [PATCH] Update configuration files for model providers and Docker setup - Changed LLM, embedding, and STT providers in `config.yml` to OpenAI and Deepgram. - Removed read-only flag from `config.yml` in Docker Compose files to allow UI configuration saving. - Updated memory configuration endpoint to accept plain text for YAML input. --- backends/advanced/docker-compose.yml | 4 +- .../routers/modules/system_routes.py | 4 +- config.yml | 389 +++++++++--------- 3 files changed, 188 insertions(+), 209 deletions(-) diff --git a/backends/advanced/docker-compose.yml b/backends/advanced/docker-compose.yml index dced4041..34f12c53 100644 --- a/backends/advanced/docker-compose.yml +++ b/backends/advanced/docker-compose.yml @@ -12,7 +12,7 @@ services: - ./data/audio_chunks:/app/audio_chunks - ./data/debug_dir:/app/debug_dir - ./data:/app/data - - ../../config.yml:/app/config.yml:ro + - ../../config.yml:/app/config.yml # Removed :ro to allow UI config saving environment: - DEEPGRAM_API_KEY=${DEEPGRAM_API_KEY} - MISTRAL_API_KEY=${MISTRAL_API_KEY} @@ -65,7 +65,7 @@ services: - ./start-workers.sh:/app/start-workers.sh - ./data/audio_chunks:/app/audio_chunks - ./data:/app/data - - ../../config.yml:/app/config.yml:ro + - ../../config.yml:/app/config.yml # Removed :ro for consistency environment: - DEEPGRAM_API_KEY=${DEEPGRAM_API_KEY} - MISTRAL_API_KEY=${MISTRAL_API_KEY} diff --git a/backends/advanced/src/advanced_omi_backend/routers/modules/system_routes.py b/backends/advanced/src/advanced_omi_backend/routers/modules/system_routes.py index e51c036c..5737171c 100644 --- a/backends/advanced/src/advanced_omi_backend/routers/modules/system_routes.py +++ b/backends/advanced/src/advanced_omi_backend/routers/modules/system_routes.py @@ -82,7 +82,7 @@ async def get_memory_config_raw(current_user: User = Depends(current_superuser)) @router.post("/admin/memory/config/raw") async def update_memory_config_raw( - config_yaml: str = Body(..., embed=True), + config_yaml: str = Body(..., media_type="text/plain"), current_user: User = Depends(current_superuser) ): """Update memory configuration YAML and hot reload. Admin only.""" @@ -91,7 +91,7 @@ async def update_memory_config_raw( @router.post("/admin/memory/config/validate") async def validate_memory_config( - config_yaml: str = Body(..., embed=True), + config_yaml: str = Body(..., media_type="text/plain"), current_user: User = Depends(current_superuser) ): """Validate memory configuration YAML syntax. Admin only.""" diff --git a/config.yml b/config.yml index 678e6d70..ac412d1e 100644 --- a/config.yml +++ b/config.yml @@ -1,220 +1,199 @@ defaults: - llm: emberfang-llm - embedding: emberfang-embed - stt: stt-parakeet-batch + llm: openai-llm + embedding: openai-embed + stt: stt-deepgram tts: tts-http vector_store: vs-qdrant - models: - # llama cpp llm, name can be anything - - name: emberfang-llm - description: Emberfang One LLM - model_type: llm - model_provider: openai - model_name: gpt-oss-20b-f16 - model_url: http://192.168.1.166:8084/v1 - api_key: "1234" - model_params: - temperature: 0.2 - max_tokens: 2000 - model_output: json - - - name: emberfang-embed - description: Emberfang embeddings (nomic-embed-text) - model_type: embedding - model_provider: openai - model_name: nomic-embed-text-v1.5 - model_url: http://192.168.1.166:8084/v1 - api_key: "1234" - embedding_dimensions: 768 - model_output: vector - - # Local Ollama LLM (OpenAI-compatible) - - name: local-llm - description: Local Ollama LLM - model_type: llm - model_provider: ollama - api_family: openai - model_name: llama3.1:latest - model_url: http://localhost:11434/v1 - api_key: ${OPENAI_API_KEY:-ollama} - model_params: - temperature: 0.2 - max_tokens: 2000 - model_output: json - - # Local Ollama embedding model (OpenAI-compatible embeddings endpoint) - - name: local-embed - description: Local embeddings via Ollama nomic-embed-text - model_type: embedding - model_provider: ollama - api_family: openai - model_name: nomic-embed-text:latest - model_url: http://localhost:11434/v1 - api_key: ${OPENAI_API_KEY:-ollama} - embedding_dimensions: 768 - model_output: vector - - # Hosted OpenAI (optional) - - name: openai-llm - description: OpenAI GPT-4o-mini - model_type: llm - model_provider: openai - api_family: openai - model_name: gpt-4o-mini - model_url: https://api.openai.com/v1 - api_key: ${OPENAI_API_KEY:-} - model_params: - temperature: 0.2 - max_tokens: 2000 - model_output: json - - - name: openai-embed - description: OpenAI text-embedding-3-small - model_type: embedding - model_provider: openai - api_family: openai - model_name: text-embedding-3-small - model_url: https://api.openai.com/v1 - api_key: ${OPENAI_API_KEY:-} - embedding_dimensions: 1536 - model_output: vector - - # Hosted Groq (OpenAI-compatible chat) - - name: groq-llm - description: Groq LLM via OpenAI-compatible API - model_type: llm - model_provider: groq - api_family: openai - model_name: llama-3.1-70b-versatile - model_url: https://api.groq.com/openai/v1 - api_key: ${GROQ_API_KEY:-} - model_params: - temperature: 0.2 - max_tokens: 2000 - model_output: json - - # Vector store (Qdrant) - - name: vs-qdrant - description: Qdrant vector database - model_type: vector_store - model_provider: qdrant - api_family: qdrant - model_url: http://qdrant:6333 - model_params: - host: qdrant - port: 6333 - collection_name: omi_memories - - # STT (Parakeet over HTTP, batch transcription) - - name: stt-parakeet-batch - description: Parakeet NeMo ASR (batch) - model_type: stt - model_provider: parakeet - api_family: http - model_url: http://172.17.0.1:8767 - api_key: "" - operations: - stt_transcribe: - method: POST - path: /transcribe - content_type: multipart/form-data - response: - type: json - extract: - text: text - words: words - segments: segments - - # STT (Deepgram over HTTP, config-driven) - - name: stt-deepgram - description: Deepgram Nova 3 (batch) - model_type: stt - model_provider: deepgram - api_family: http - model_url: https://api.deepgram.com/v1 - api_key: ${DEEPGRAM_API_KEY:-} - operations: - stt_transcribe: - method: POST - path: /listen - headers: - Authorization: Token ${DEEPGRAM_API_KEY:-} - Content-Type: audio/raw - query: - model: nova-3 - language: multi - smart_format: "true" - punctuate: "true" - diarize: false - encoding: linear16 - sample_rate: 16000 - channels: "1" - response: - type: json - extract: - text: results.channels[0].alternatives[0].transcript - words: results.channels[0].alternatives[0].words - segments: results.channels[0].alternatives[0].paragraphs.paragraphs - - # TTS (placeholder; configure to your provider) - - name: tts-http - description: Generic JSON TTS endpoint - model_type: tts - model_provider: custom - api_family: http - model_url: http://localhost:9000 - operations: - tts_synthesize: - method: POST - path: /synthesize - headers: - Content-Type: application/json - response: - type: json - - # STT streaming (Parakeet via WebSocket; config-driven template) - - name: stt-parakeet-stream - description: Parakeet streaming transcription over WebSocket - model_type: stt_stream - model_provider: parakeet - api_family: websocket - model_url: ws://localhost:9001/stream - operations: - start: - message: - type: transcribe - config: - vad_enabled: true - vad_silence_ms: 1000 - time_interval_seconds: 30 - return_interim_results: true - min_audio_seconds: 0.5 - chunk_header: - message: - type: audio_chunk - rate: 16000 - width: 2 - channels: 1 - end: - message: - type: stop - expect: - interim_type: interim_result - final_type: final_result +- name: emberfang-llm + description: Emberfang One LLM + model_type: llm + model_provider: openai + model_name: gpt-oss-20b-f16 + model_url: http://192.168.1.166:8084/v1 + api_key: '1234' + model_params: + temperature: 0.2 + max_tokens: 2000 + model_output: json +- name: emberfang-embed + description: Emberfang embeddings (nomic-embed-text) + model_type: embedding + model_provider: openai + model_name: nomic-embed-text-v1.5 + model_url: http://192.168.1.166:8084/v1 + api_key: '1234' + embedding_dimensions: 768 + model_output: vector +- name: local-llm + description: Local Ollama LLM + model_type: llm + model_provider: ollama + api_family: openai + model_name: llama3.1:latest + model_url: http://localhost:11434/v1 + api_key: ${OPENAI_API_KEY:-ollama} + model_params: + temperature: 0.2 + max_tokens: 2000 + model_output: json +- name: local-embed + description: Local embeddings via Ollama nomic-embed-text + model_type: embedding + model_provider: ollama + api_family: openai + model_name: nomic-embed-text:latest + model_url: http://localhost:11434/v1 + api_key: ${OPENAI_API_KEY:-ollama} + embedding_dimensions: 768 + model_output: vector +- name: openai-llm + description: OpenAI GPT-4o-mini + model_type: llm + model_provider: openai + api_family: openai + model_name: gpt-4o-mini + model_url: https://api.openai.com/v1 + api_key: ${OPENAI_API_KEY:-} + model_params: + temperature: 0.2 + max_tokens: 2000 + model_output: json +- name: openai-embed + description: OpenAI text-embedding-3-small + model_type: embedding + model_provider: openai + api_family: openai + model_name: text-embedding-3-small + model_url: https://api.openai.com/v1 + api_key: ${OPENAI_API_KEY:-} + embedding_dimensions: 1536 + model_output: vector +- name: groq-llm + description: Groq LLM via OpenAI-compatible API + model_type: llm + model_provider: groq + api_family: openai + model_name: llama-3.1-70b-versatile + model_url: https://api.groq.com/openai/v1 + api_key: ${GROQ_API_KEY:-} + model_params: + temperature: 0.2 + max_tokens: 2000 + model_output: json +- name: vs-qdrant + description: Qdrant vector database + model_type: vector_store + model_provider: qdrant + api_family: qdrant + model_url: http://${QDRANT_BASE_URL:-qdrant}:${QDRANT_PORT:-6333} + model_params: + host: ${QDRANT_BASE_URL:-qdrant} + port: ${QDRANT_PORT:-6333} + collection_name: omi_memories +- name: stt-parakeet-batch + description: Parakeet NeMo ASR (batch) + model_type: stt + model_provider: parakeet + api_family: http + model_url: http://172.17.0.1:8767 + api_key: '' + operations: + stt_transcribe: + method: POST + path: /transcribe + content_type: multipart/form-data + response: + type: json extract: text: text words: words segments: segments - +- name: stt-deepgram + description: Deepgram Nova 3 (batch) + model_type: stt + model_provider: deepgram + api_family: http + model_url: https://api.deepgram.com/v1 + api_key: ${DEEPGRAM_API_KEY:-} + operations: + stt_transcribe: + method: POST + path: /listen + headers: + Authorization: Token ${DEEPGRAM_API_KEY:-} + Content-Type: audio/raw + query: + model: nova-3 + language: multi + smart_format: 'true' + punctuate: 'true' + diarize: false + encoding: linear16 + sample_rate: 16000 + channels: '1' + response: + type: json + extract: + text: results.channels[0].alternatives[0].transcript + words: results.channels[0].alternatives[0].words + segments: results.channels[0].alternatives[0].paragraphs.paragraphs +- name: tts-http + description: Generic JSON TTS endpoint + model_type: tts + model_provider: custom + api_family: http + model_url: http://localhost:9000 + operations: + tts_synthesize: + method: POST + path: /synthesize + headers: + Content-Type: application/json + response: + type: json +- name: stt-parakeet-stream + description: Parakeet streaming transcription over WebSocket + model_type: stt_stream + model_provider: parakeet + api_family: websocket + model_url: ws://localhost:9001/stream + operations: + start: + message: + type: transcribe + config: + vad_enabled: true + vad_silence_ms: 1000 + time_interval_seconds: 30 + return_interim_results: true + min_audio_seconds: 0.5 + chunk_header: + message: + type: audio_chunk + rate: 16000 + width: 2 + channels: 1 + end: + message: + type: stop + expect: + interim_type: interim_result + final_type: final_result + extract: + text: text + words: words + segments: segments memory: - provider: chronicle # chronicle | openmemory_mcp | mycelia + provider: chronicle timeout_seconds: 1200 extraction: enabled: true - # Optional custom prompt; if omitted, a built-in default is used - prompt: | - Extract important information from this conversation and return a JSON object with an array named "facts". Include personal preferences, plans, names, dates, locations, numbers, and key details. Keep items concise and useful. + prompt: 'Extract important information from this conversation and return a JSON + object with an array named "facts". Include personal preferences, plans, names, + dates, locations, numbers, and key details hehehe. Keep items concise and useful. + + ' openmemory_mcp: server_url: http://localhost:8765 client_name: chronicle