diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 33f133b..e41e8e1 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -255,16 +255,14 @@ jobs: NEO4J_URI: ${{ secrets.NEO4J_STAGING_URI }} NEO4J_USER: neo4j NEO4J_PASSWORD: ${{ secrets.NEO4J_STAGING_PASSWORD }} - # Anthropic API for Graphiti entity extraction - ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} - ANTHROPIC_MODEL: claude-sonnet-4-20250514 # Vertex AI for embeddings EMBEDDING_PROVIDER: vertex-ai VERTEX_AI_PROJECT: ai-knowledge-base-42 VERTEX_AI_LOCATION: us-central1 # LLM configuration LLM_PROVIDER: gemini - GEMINI_MODEL_ID: gemini-2.0-flash + GEMINI_INTAKE_MODEL: gemini-2.5-flash + GEMINI_CONVERSATION_MODEL: gemini-2.5-flash GCP_PROJECT_ID: ai-knowledge-base-42 steps: - uses: actions/checkout@v4 diff --git a/deploy/terraform/cloudrun-jobs.tf b/deploy/terraform/cloudrun-jobs.tf index ac4292e..4985315 100644 --- a/deploy/terraform/cloudrun-jobs.tf +++ b/deploy/terraform/cloudrun-jobs.tf @@ -140,6 +140,11 @@ resource "google_cloud_run_v2_job" "pipeline" { value = "true" } + env { + name = "GEMINI_INTAKE_MODEL" + value = "gemini-2.5-flash" + } + env { name = "GRAPHITI_BULK_ENABLED" value = "true" diff --git a/deploy/terraform/cloudrun-slack.tf b/deploy/terraform/cloudrun-slack.tf index afc9f94..a07bdab 100644 --- a/deploy/terraform/cloudrun-slack.tf +++ b/deploy/terraform/cloudrun-slack.tf @@ -39,16 +39,6 @@ resource "google_cloud_run_v2_service" "slack_bot" { } } - env { - name = "ANTHROPIC_API_KEY" - value_source { - secret_key_ref { - secret = google_secret_manager_secret.anthropic_api_key.secret_id - version = "latest" - } - } - } - # Graph Database Configuration (Graphiti + Neo4j) env { name = "GRAPH_BACKEND" @@ -82,7 +72,7 @@ resource "google_cloud_run_v2_service" "slack_bot" { } env { - name = "GEMINI_MODEL_ID" + name = "GEMINI_CONVERSATION_MODEL" value = "gemini-2.5-flash" } @@ -106,6 +96,11 @@ resource "google_cloud_run_v2_service" "slack_bot" { value = var.region } + env { + name = "GOOGLE_GENAI_USE_VERTEXAI" + value = "true" + } + # Health check startup_probe { http_get { @@ -134,7 +129,6 @@ resource "google_cloud_run_v2_service" "slack_bot" { depends_on = [ google_secret_manager_secret_version.slack_bot_token, google_secret_manager_secret_version.slack_signing_secret, - google_secret_manager_secret_version.anthropic_api_key, ] } diff --git a/deploy/terraform/secret-manager.tf b/deploy/terraform/secret-manager.tf index c2a0bef..77eb566 100644 --- a/deploy/terraform/secret-manager.tf +++ b/deploy/terraform/secret-manager.tf @@ -180,12 +180,6 @@ resource "google_secret_manager_secret_iam_member" "slack_signing_secret_access" member = "serviceAccount:${google_service_account.slack_bot.email}" } -resource "google_secret_manager_secret_iam_member" "slack_anthropic_access" { - secret_id = google_secret_manager_secret.anthropic_api_key.secret_id - role = "roles/secretmanager.secretAccessor" - member = "serviceAccount:${google_service_account.slack_bot.email}" -} - # Slack Bot Neo4j password access resource "google_secret_manager_secret_iam_member" "slack_neo4j_password_access" { secret_id = google_secret_manager_secret.neo4j_password.secret_id @@ -213,12 +207,6 @@ resource "google_secret_manager_secret_iam_member" "jobs_confluence_token_access member = "serviceAccount:${google_service_account.jobs.email}" } -resource "google_secret_manager_secret_iam_member" "jobs_anthropic_access" { - secret_id = google_secret_manager_secret.anthropic_api_key.secret_id - role = "roles/secretmanager.secretAccessor" - member = "serviceAccount:${google_service_account.jobs.email}" -} - # Jobs service account Neo4j password access resource "google_secret_manager_secret_iam_member" "jobs_neo4j_password_access" { secret_id = google_secret_manager_secret.neo4j_password.secret_id diff --git a/deploy/terraform/staging.tf b/deploy/terraform/staging.tf index 31c1b72..f7ea4f3 100644 --- a/deploy/terraform/staging.tf +++ b/deploy/terraform/staging.tf @@ -170,16 +170,6 @@ resource "google_cloud_run_v2_service" "slack_bot_staging" { } } - env { - name = "ANTHROPIC_API_KEY" - value_source { - secret_key_ref { - secret = google_secret_manager_secret.anthropic_api_key.secret_id - version = "latest" - } - } - } - # Graph Database Configuration (Graphiti + Neo4j) env { name = "GRAPH_BACKEND" @@ -213,7 +203,7 @@ resource "google_cloud_run_v2_service" "slack_bot_staging" { } env { - name = "GEMINI_MODEL_ID" + name = "GEMINI_CONVERSATION_MODEL" value = "gemini-2.5-flash" } @@ -237,6 +227,11 @@ resource "google_cloud_run_v2_service" "slack_bot_staging" { value = var.region } + env { + name = "GOOGLE_GENAI_USE_VERTEXAI" + value = "true" + } + env { name = "ENVIRONMENT" value = "staging" @@ -326,12 +321,6 @@ resource "google_secret_manager_secret_iam_member" "slack_bot_staging_signing_ac member = "serviceAccount:${google_service_account.slack_bot_staging.email}" } -resource "google_secret_manager_secret_iam_member" "slack_bot_staging_anthropic_access" { - secret_id = google_secret_manager_secret.anthropic_api_key.id - role = "roles/secretmanager.secretAccessor" - member = "serviceAccount:${google_service_account.slack_bot_staging.email}" -} - # Reference existing Neo4j password secret (created in secret-manager.tf) data "google_secret_manager_secret" "neo4j_password_secret" { secret_id = "neo4j-password" @@ -466,6 +455,11 @@ resource "google_cloud_run_v2_job" "confluence_sync_staging" { value = "true" } + env { + name = "GEMINI_INTAKE_MODEL" + value = "gemini-2.5-flash" + } + # Adaptive bulk indexing (TCP-style congestion control) env { name = "GRAPHITI_BULK_ENABLED" @@ -476,17 +470,6 @@ resource "google_cloud_run_v2_job" "confluence_sync_staging" { name = "CHECKPOINT_PERSIST_PATH" value = "/mnt/pipeline-state/staging-knowledge-base.db" } - - # Keep Anthropic key as fallback (optional) - env { - name = "ANTHROPIC_API_KEY" - value_source { - secret_key_ref { - secret = google_secret_manager_secret.anthropic_api_key.secret_id - version = "latest" - } - } - } } volumes { diff --git a/src/knowledge_base/config.py b/src/knowledge_base/config.py index e939e38..b23d4c7 100644 --- a/src/knowledge_base/config.py +++ b/src/knowledge_base/config.py @@ -33,7 +33,7 @@ class Settings(BaseSettings): REDIS_URL: str = "redis://redis:6379/0" # LLM Provider Selection - LLM_PROVIDER: str = "claude" # 'ollama', 'claude', or empty for auto-select + LLM_PROVIDER: str = "gemini" # 'gemini', 'claude', 'vertex-claude', or 'ollama' # Ollama (local LLM) OLLAMA_BASE_URL: str = "http://ollama:11434" @@ -45,10 +45,13 @@ class Settings(BaseSettings): EMBEDDING_MODEL: str = "all-MiniLM-L6-v2" # sentence-transformer model INDEX_BATCH_SIZE: int = 100 - # Anthropic (Claude) + # Gemini model settings (separate models for different use cases) + # Gemini 2.5 Flash supports up to 65K output tokens (required for graphiti-core's 16384) + GEMINI_INTAKE_MODEL: str = "gemini-2.5-flash" # Graphiti entity extraction (intake pipeline) + GEMINI_CONVERSATION_MODEL: str = "gemini-2.5-flash" # Slack bot RAG conversations + + # Anthropic (Claude) — used when LLM_PROVIDER=claude ANTHROPIC_API_KEY: str = "" - # Using Sonnet for Graphiti entity extraction - Haiku doesn't support the max_tokens - # that graphiti-core internally uses (16384). Sonnet is more expensive but works. ANTHROPIC_MODEL: str = "claude-sonnet-4-20250514" METADATA_BATCH_SIZE: int = 10 @@ -113,9 +116,9 @@ class Settings(BaseSettings): VERTEX_AI_LOCATION: str = "us-central1" # Region for Vertex AI VERTEX_AI_EMBEDDING_MODEL: str = "text-embedding-005" # Embedding model VERTEX_AI_EMBEDDING_DIMENSION: int = 768 # Embedding dimension - # Gemini 2.5 Flash supports up to 65K output tokens (required for graphiti-core's 16384) - # Gemini 2.0 Flash only supports 8K output which causes errors with graphiti - VERTEX_AI_LLM_MODEL: str = "gemini-2.5-flash" # Gemini model for entity extraction + # DEPRECATED: Use GEMINI_INTAKE_MODEL / GEMINI_CONVERSATION_MODEL instead. + # Kept for backward compat with existing deployments that set this env var. + VERTEX_AI_LLM_MODEL: str = "" VERTEX_AI_CLAUDE_MODEL: str = "claude-sonnet-4@20250514" # Claude via Vertex AI VERTEX_AI_BATCH_SIZE: int = 20 # Max texts per embedding batch (keep under 20k token limit) VERTEX_AI_TIMEOUT: float = 60.0 # API timeout in seconds @@ -144,6 +147,16 @@ def confluence_space_list(self) -> list[str]: return [] return [s.strip() for s in self.CONFLUENCE_SPACE_KEYS.split(",") if s.strip()] + @model_validator(mode="after") + def migrate_vertex_ai_llm_model(self) -> "Settings": + """Backward compat: map deprecated VERTEX_AI_LLM_MODEL to new settings.""" + if self.VERTEX_AI_LLM_MODEL: + if not os.environ.get("GEMINI_INTAKE_MODEL"): + self.GEMINI_INTAKE_MODEL = self.VERTEX_AI_LLM_MODEL + if not os.environ.get("GEMINI_CONVERSATION_MODEL"): + self.GEMINI_CONVERSATION_MODEL = self.VERTEX_AI_LLM_MODEL + return self + @model_validator(mode="after") def check_security_settings(self) -> "Settings": """Validate security settings.""" diff --git a/src/knowledge_base/graph/graphiti_client.py b/src/knowledge_base/graph/graphiti_client.py index 6058af3..8500aae 100644 --- a/src/knowledge_base/graph/graphiti_client.py +++ b/src/knowledge_base/graph/graphiti_client.py @@ -205,34 +205,24 @@ async def _create_neo4j_client(self) -> "Graphiti": def _get_llm_client(self): """Get the LLM client for Graphiti entity extraction. - Supports multiple LLM providers: - - 'claude'/'anthropic': Uses Anthropic Claude API - - 'gemini': Uses Google Gemini API - - Falls back based on available credentials. + Dispatches based on LLM_PROVIDER setting. No silent fallback — + if the configured provider is not available, raises an error. """ - from graphiti_core.llm_client import LLMConfig - llm_provider = settings.LLM_PROVIDER.lower() - # Try Gemini if explicitly configured or as fallback if llm_provider == "gemini": return self._get_gemini_client() - # Try Anthropic - if llm_provider in ("claude", "anthropic", ""): - if settings.ANTHROPIC_API_KEY: - return self._get_anthropic_client() - else: - # Fall back to Gemini if Anthropic key not available - logger.warning( - "ANTHROPIC_API_KEY not set, falling back to Gemini for entity extraction" + if llm_provider in ("claude", "anthropic"): + if not settings.ANTHROPIC_API_KEY: + raise GraphitiClientError( + "LLM_PROVIDER is set to 'claude' but ANTHROPIC_API_KEY is not configured." ) - return self._get_gemini_client() + return self._get_anthropic_client() raise GraphitiClientError( - f"Unsupported LLM_PROVIDER: {llm_provider}. " - "Use 'claude', 'anthropic', or 'gemini'." + f"Unsupported LLM_PROVIDER for Graphiti: '{llm_provider}'. " + "Use 'gemini' or 'claude'." ) def _get_anthropic_client(self): @@ -273,7 +263,7 @@ def _get_gemini_client(self): # Check for Google API key (direct API access) google_api_key = os.environ.get("GOOGLE_API_KEY", "") - model = settings.VERTEX_AI_LLM_MODEL or "gemini-2.0-flash" + model = settings.GEMINI_INTAKE_MODEL # If we have an API key, use consumer Gemini API if google_api_key: diff --git a/src/knowledge_base/rag/factory.py b/src/knowledge_base/rag/factory.py index d54782a..82ed429 100644 --- a/src/knowledge_base/rag/factory.py +++ b/src/knowledge_base/rag/factory.py @@ -64,55 +64,36 @@ def get_provider(name: str) -> "BaseLLM": async def get_llm(provider: str | None = None) -> "BaseLLM": """Get an LLM instance (main entry point). - Selection order: - 1. Use specified provider if given - 2. Use LLM_PROVIDER from config if set - 3. Auto-select based on availability (Claude if API key exists, else Ollama) + Uses the configured LLM_PROVIDER. No silent fallback — if the configured + provider is not available, raises an error immediately. Args: - provider: Specific provider name, or None for automatic selection + provider: Specific provider name, or None to use LLM_PROVIDER setting Returns: Configured LLM instance Raises: - LLMProviderNotConfiguredError: If no provider is available + LLMProviderNotConfiguredError: If no provider is configured or available """ - # Use specified or configured provider provider_name = provider or settings.LLM_PROVIDER + if not provider_name: + raise LLMProviderNotConfiguredError( + "LLM_PROVIDER is not configured. " + "Set it to 'gemini', 'claude', 'vertex-claude', or 'ollama'.", + provider="none", + ) + + llm = get_provider(provider_name) + if not await llm.is_available(): + raise LLMProviderNotConfiguredError( + f"LLM provider '{provider_name}' is configured but not available. " + f"Check your credentials and configuration.", + provider=provider_name, + ) - if provider_name: - llm = get_provider(provider_name) - if await llm.is_available(): - logger.info(f"Using LLM provider: {llm.provider_name}") - return llm - logger.warning(f"Configured provider '{provider_name}' not available") - - # Auto-select: try Claude, then Gemini, then Ollama - if settings.ANTHROPIC_API_KEY: - llm = get_provider("claude") - if await llm.is_available(): - logger.info("Auto-selected Claude LLM provider") - return llm - - # Try Gemini if GCP project is configured - if settings.VERTEX_AI_PROJECT or settings.GCP_PROJECT_ID: - llm = get_provider("gemini") - if await llm.is_available(): - logger.info("Auto-selected Gemini LLM provider") - return llm - - # Fall back to Ollama - llm = get_provider("ollama") - if await llm.is_available(): - logger.info("Auto-selected Ollama LLM provider") - return llm - - raise LLMProviderNotConfiguredError( - "No LLM provider is configured or available. " - "Set ANTHROPIC_API_KEY for Claude, configure GCP project for Gemini, or ensure Ollama is running.", - provider="none", - ) + logger.info(f"Using LLM provider: {llm.provider_name}") + return llm # Import BaseLLM here to avoid circular imports diff --git a/src/knowledge_base/rag/providers/gemini.py b/src/knowledge_base/rag/providers/gemini.py index a33129e..4aa774d 100644 --- a/src/knowledge_base/rag/providers/gemini.py +++ b/src/knowledge_base/rag/providers/gemini.py @@ -45,7 +45,7 @@ def __init__( """ self.project = project or settings.VERTEX_AI_PROJECT or settings.GCP_PROJECT_ID self.location = location or settings.VERTEX_AI_LOCATION - self.model_name = model or settings.VERTEX_AI_LLM_MODEL + self.model_name = model or settings.GEMINI_CONVERSATION_MODEL self.max_output_tokens = max_output_tokens self.temperature = temperature self._model = None diff --git a/tests/test_config_gemini.py b/tests/test_config_gemini.py new file mode 100644 index 0000000..d99ddc6 --- /dev/null +++ b/tests/test_config_gemini.py @@ -0,0 +1,76 @@ +"""Tests for Gemini LLM configuration settings.""" + +import os +from unittest.mock import patch + +import pytest + +from knowledge_base.config import Settings + + +class TestGeminiConfig: + """Tests for Gemini-related configuration settings.""" + + def test_llm_provider_default_is_gemini(self): + """Default LLM_PROVIDER should be 'gemini'.""" + with patch.dict(os.environ, {}, clear=True): + s = Settings(_env_file=None) + assert s.LLM_PROVIDER == "gemini" + + def test_gemini_intake_model_default(self): + """Default GEMINI_INTAKE_MODEL should be gemini-2.5-flash.""" + with patch.dict(os.environ, {}, clear=True): + s = Settings(_env_file=None) + assert s.GEMINI_INTAKE_MODEL == "gemini-2.5-flash" + + def test_gemini_conversation_model_default(self): + """Default GEMINI_CONVERSATION_MODEL should be gemini-2.5-flash.""" + with patch.dict(os.environ, {}, clear=True): + s = Settings(_env_file=None) + assert s.GEMINI_CONVERSATION_MODEL == "gemini-2.5-flash" + + def test_vertex_ai_llm_model_backward_compat_intake(self): + """Setting VERTEX_AI_LLM_MODEL should populate GEMINI_INTAKE_MODEL.""" + with patch.dict(os.environ, {"VERTEX_AI_LLM_MODEL": "gemini-2.0-flash"}, clear=True): + s = Settings(_env_file=None) + assert s.GEMINI_INTAKE_MODEL == "gemini-2.0-flash" + + def test_vertex_ai_llm_model_backward_compat_conversation(self): + """Setting VERTEX_AI_LLM_MODEL should populate GEMINI_CONVERSATION_MODEL.""" + with patch.dict(os.environ, {"VERTEX_AI_LLM_MODEL": "gemini-2.0-flash"}, clear=True): + s = Settings(_env_file=None) + assert s.GEMINI_CONVERSATION_MODEL == "gemini-2.0-flash" + + def test_explicit_intake_model_overrides_vertex_ai(self): + """Explicit GEMINI_INTAKE_MODEL should win over VERTEX_AI_LLM_MODEL.""" + with patch.dict(os.environ, { + "VERTEX_AI_LLM_MODEL": "gemini-2.0-flash", + "GEMINI_INTAKE_MODEL": "gemini-2.5-flash", + }, clear=True): + s = Settings(_env_file=None) + assert s.GEMINI_INTAKE_MODEL == "gemini-2.5-flash" + + def test_explicit_conversation_model_overrides_vertex_ai(self): + """Explicit GEMINI_CONVERSATION_MODEL should win over VERTEX_AI_LLM_MODEL.""" + with patch.dict(os.environ, { + "VERTEX_AI_LLM_MODEL": "gemini-2.0-flash", + "GEMINI_CONVERSATION_MODEL": "gemini-2.5-pro", + }, clear=True): + s = Settings(_env_file=None) + assert s.GEMINI_CONVERSATION_MODEL == "gemini-2.5-pro" + + def test_different_intake_and_conversation_models(self): + """User can set different models for intake and conversation.""" + with patch.dict(os.environ, { + "GEMINI_INTAKE_MODEL": "gemini-2.5-flash", + "GEMINI_CONVERSATION_MODEL": "gemini-2.5-pro", + }, clear=True): + s = Settings(_env_file=None) + assert s.GEMINI_INTAKE_MODEL == "gemini-2.5-flash" + assert s.GEMINI_CONVERSATION_MODEL == "gemini-2.5-pro" + + def test_llm_provider_configurable(self): + """User can override LLM_PROVIDER to any supported value.""" + with patch.dict(os.environ, {"LLM_PROVIDER": "claude"}, clear=True): + s = Settings(_env_file=None) + assert s.LLM_PROVIDER == "claude" diff --git a/tests/test_llm.py b/tests/test_llm.py index c765e57..3f706cb 100644 --- a/tests/test_llm.py +++ b/tests/test_llm.py @@ -198,8 +198,9 @@ class TestLLMFactory: """Tests for LLM factory and provider selection.""" def test_get_available_providers(self): - """Test that available providers include claude and ollama.""" + """Test that available providers include gemini, claude and ollama.""" providers = get_available_providers() + assert "gemini" in providers assert "claude" in providers assert "ollama" in providers @@ -244,31 +245,24 @@ async def test_get_llm_uses_config_provider(self, monkeypatch): assert llm.provider_name == "ollama" @pytest.mark.asyncio - async def test_get_llm_auto_selects_claude_with_api_key(self): - """Test that get_llm auto-selects Claude when API key is set.""" - # Patch both factory and claude provider settings - with patch("knowledge_base.rag.factory.settings") as factory_settings, \ - patch("knowledge_base.rag.providers.claude.settings") as claude_settings: - factory_settings.LLM_PROVIDER = "" # Empty for auto-select - factory_settings.ANTHROPIC_API_KEY = "test-api-key" - - claude_settings.ANTHROPIC_API_KEY = "test-api-key" - claude_settings.ANTHROPIC_MODEL = "claude-3-5-haiku-20241022" + async def test_get_llm_empty_provider_raises_error(self): + """Test that empty LLM_PROVIDER raises an error (no auto-fallback).""" + with patch("knowledge_base.rag.factory.settings") as mock_settings: + mock_settings.LLM_PROVIDER = "" - llm = await get_llm() - assert llm.provider_name == "claude" + with pytest.raises(LLMProviderNotConfiguredError): + await get_llm() @pytest.mark.asyncio - async def test_get_llm_falls_back_to_ollama(self): - """Test that get_llm falls back to Ollama when no Claude key.""" + async def test_get_llm_unavailable_provider_raises_error(self): + """Test that unavailable configured provider raises error (no fallback).""" with patch("knowledge_base.rag.factory.settings") as mock_settings: - mock_settings.LLM_PROVIDER = "" # Empty for auto-select - mock_settings.ANTHROPIC_API_KEY = "" # No Claude key - mock_settings.OLLAMA_BASE_URL = "http://test:11434" - mock_settings.OLLAMA_LLM_MODEL = "test-model" + mock_settings.LLM_PROVIDER = "claude" + mock_settings.ANTHROPIC_API_KEY = "" # Claude not configured - llm = await get_llm() - assert llm.provider_name == "ollama" + with pytest.raises(LLMProviderNotConfiguredError) as exc_info: + await get_llm() + assert "claude" in str(exc_info.value) class TestOllamaLLMProviderInterface: