From a105aabe9c76af88287d454df6b1a4b34185cc64 Mon Sep 17 00:00:00 2001 From: paisley <8197966+su8su@users.noreply.github.com> Date: Wed, 10 Dec 2025 19:18:12 +0800 Subject: [PATCH 1/5] test model connection --- frontend/src/api/setting.ts | 14 + .../components/models/model-detail.tsx | 56 ++- frontend/src/types/setting.ts | 15 + python/configs/providers/dashscope.yaml | 116 +++---- python/configs/providers/deepseek.yaml | 40 +-- python/configs/providers/google.yaml | 101 +++--- python/valuecell/server/api/routers/models.py | 327 ++++++++++++++++++ .../valuecell/server/api/schemas/__init__.py | 4 +- python/valuecell/server/api/schemas/model.py | 29 ++ 9 files changed, 554 insertions(+), 148 deletions(-) diff --git a/frontend/src/api/setting.ts b/frontend/src/api/setting.ts index 517366826..120baa829 100644 --- a/frontend/src/api/setting.ts +++ b/frontend/src/api/setting.ts @@ -10,6 +10,8 @@ import { apiClient } from "@/lib/api-client"; import type { MemoryItem, ModelProvider, + CheckModelRequest, + CheckModelResult, ProviderDetail, ProviderModelInfo, } from "@/types/setting"; @@ -170,6 +172,18 @@ export const useSetDefaultProviderModel = () => { }); }; +/** + * Check model availability by provider/model with optional strict live check. + * - When `strict` is false, validates configuration only (API key/base URL). + * - When `strict` is true, performs a minimal request to verify reachability. + */ +export const useCheckModelAvailability = () => { + return useMutation({ + mutationFn: (params: CheckModelRequest) => + apiClient.post>("/models/check", params), + }); +}; + /** * Hook to get model providers sorted by API key availability. * Providers with API keys configured appear first. diff --git a/frontend/src/app/setting/components/models/model-detail.tsx b/frontend/src/app/setting/components/models/model-detail.tsx index ffaa253e2..c1db4f41c 100644 --- a/frontend/src/app/setting/components/models/model-detail.tsx +++ b/frontend/src/app/setting/components/models/model-detail.tsx @@ -10,6 +10,7 @@ import { useSetDefaultProvider, useSetDefaultProviderModel, useUpdateProviderConfig, + useCheckModelAvailability, } from "@/api/setting"; import { Button } from "@/components/ui/button"; import { @@ -65,9 +66,18 @@ export function ModelDetail({ provider }: ModelDetailProps) { useSetDefaultProviderModel(); const { mutate: setDefaultProvider, isPending: settingDefaultProvider } = useSetDefaultProvider(); + const { mutateAsync: checkAvailability, isPending: checkingAvailability } = + useCheckModelAvailability(); const [isAddDialogOpen, setIsAddDialogOpen] = useState(false); const [showApiKey, setShowApiKey] = useState(false); + const [checkResult, setCheckResult] = useState<{ + ok: boolean; + provider: string; + model_id: string; + status?: string; + error?: string; + } | null>(null); const configForm = useForm({ defaultValues: { @@ -133,7 +143,8 @@ export function ModelDetail({ provider }: ModelDetailProps) { addingModel || deletingModel || settingDefaultModel || - settingDefaultProvider; + settingDefaultProvider || + checkingAvailability; if (detailLoading) { return ( @@ -213,6 +224,49 @@ export function ModelDetail({ provider }: ModelDetailProps) { Click here to get the API key + {/* Check availability controls */} +
+ +
+ {checkResult && ( +
+ {checkResult.ok ? ( + + Available{checkResult.status ? ` (${checkResult.status})` : ""} + + ) : ( + + Unavailable{checkResult.status ? ` (${checkResult.status})` : ""} + {checkResult.error ? `: ${checkResult.error}` : ""} + + )} +
+ )} )} diff --git a/frontend/src/types/setting.ts b/frontend/src/types/setting.ts index 6c12f5380..08d609169 100644 --- a/frontend/src/types/setting.ts +++ b/frontend/src/types/setting.ts @@ -20,3 +20,18 @@ export type ProviderDetail = { default_model_id: string; models: ProviderModelInfo[]; }; + +// --- Model availability check --- +export type CheckModelRequest = { + provider?: string; + model_id?: string; + api_key?: string; +}; + +export type CheckModelResult = { + ok: boolean; + provider: string; + model_id: string; + status?: string; + error?: string; +}; diff --git a/python/configs/providers/dashscope.yaml b/python/configs/providers/dashscope.yaml index 95d816ca1..bb1c26c42 100644 --- a/python/configs/providers/dashscope.yaml +++ b/python/configs/providers/dashscope.yaml @@ -1,85 +1,63 @@ -# ============================================ -# DashScope Provider Configuration -# ============================================ -# DashScope (Alibaba Cloud Bailian) exposes an OpenAI-compatible API for Qwen3 models. -# Configure the API key via DASHSCOPE_API_KEY or override using env vars at runtime. - name: DashScope provider_type: dashscope enabled: true - -# Connection parameters for DashScope compatible-mode endpoint. connection: base_url: https://dashscope.aliyuncs.com/compatible-mode/v1 api_key_env: DASHSCOPE_API_KEY - -# Default chat model used when no model_id is specified. default_model: qwen3-max - -# Global default inference parameters. defaults: temperature: 0.7 max_tokens: 16384 - -# Commonly used Qwen3 models available via DashScope. models: - - id: qwen3-max - name: Qwen3 Max - context_length: 256000 - max_output_tokens: 16384 - description: Qwen3 Max model with strongest performance - supported_inputs: - - text - supported_outputs: - - text - - - id: qwen3-max-preview - name: Qwen3 Max Preview - context_length: 256000 - max_output_tokens: 16384 - description: Qwen3 Max preview model - supported_inputs: - - text - supported_outputs: - - text - - - id: qwen-plus - name: Qwen Plus - context_length: 256000 - max_output_tokens: 16384 - description: Qwen Plus model with balanced performance - supported_inputs: - - text - supported_outputs: - - text - - - id: qwen-flash - name: Qwen Flash - context_length: 256000 - max_output_tokens: 16384 - description: Qwen Flash model optimized for fast response - supported_inputs: - - text - supported_outputs: - - text - -# Embedding configuration for DashScope text embedding models. +- id: qwen3-max + name: Qwen3 Max + context_length: 256000 + max_output_tokens: 16384 + description: Qwen3 Max model with strongest performance + supported_inputs: + - text + supported_outputs: + - text +- id: qwen3-max-preview + name: Qwen3 Max Preview + context_length: 256000 + max_output_tokens: 16384 + description: Qwen3 Max preview model + supported_inputs: + - text + supported_outputs: + - text +- id: qwen-plus + name: Qwen Plus + context_length: 256000 + max_output_tokens: 16384 + description: Qwen Plus model with balanced performance + supported_inputs: + - text + supported_outputs: + - text +- id: qwen-flash + name: Qwen Flash + context_length: 256000 + max_output_tokens: 16384 + description: Qwen Flash model optimized for fast response + supported_inputs: + - text + supported_outputs: + - text embedding: default_model: text-embedding-v4 - defaults: dimensions: 2048 - encoding_format: "float" - + encoding_format: float models: - - id: text-embedding-v4 - name: Text Embedding V4 - dimensions: 2048 - max_input: 8192 - description: DashScope text embedding v4 model (latest) - - - id: text-embedding-v3 - name: Text Embedding V3 - dimensions: 1024 - max_input: 8192 - description: DashScope text embedding v3 model \ No newline at end of file + - id: text-embedding-v4 + name: Text Embedding V4 + dimensions: 2048 + max_input: 8192 + description: DashScope text embedding v4 model (latest) + - id: text-embedding-v3 + name: Text Embedding V3 + dimensions: 1024 + max_input: 8192 + description: DashScope text embedding v3 model diff --git a/python/configs/providers/deepseek.yaml b/python/configs/providers/deepseek.yaml index 9cfd652e5..6c1062782 100644 --- a/python/configs/providers/deepseek.yaml +++ b/python/configs/providers/deepseek.yaml @@ -1,31 +1,19 @@ -# ============================================ -# DeepSeek Provider Configuration -# ============================================ -name: "DeepSeek" -provider_type: "deepseek" - -enabled: true # Default is true if not specified - -# Connection Configuration +name: DeepSeek +provider_type: deepseek +enabled: true connection: - base_url: "https://api.deepseek.com/v1" - api_key_env: "DEEPSEEK_API_KEY" - -# Default model if none specified -default_model: "deepseek-chat" - -# Model Parameters Defaults + base_url: https://api.deepseek.com/v1 + api_key_env: DEEPSEEK_API_KEY +default_model: deepseek-chat defaults: temperature: 0.7 max_tokens: 8096 - -# Available Models (commonly used) models: - - id: "deepseek-chat" - name: "DeepSeek Chat" - context_length: 128000 - description: "DeepSeek Chat model" - - id: "deepseek-reasoner" - name: "DeepSeek Reasoner" - context_length: 128000 - description: "DeepSeek Reasoner model with enhanced reasoning capabilities" +- id: deepseek-chat + name: DeepSeek Chat + context_length: 128000 + description: DeepSeek Chat model +- id: deepseek-reasoner + name: DeepSeek Reasoner + context_length: 128000 + description: DeepSeek Reasoner model with enhanced reasoning capabilities diff --git a/python/configs/providers/google.yaml b/python/configs/providers/google.yaml index 9c26bdf2e..4d95b3fe4 100644 --- a/python/configs/providers/google.yaml +++ b/python/configs/providers/google.yaml @@ -1,59 +1,58 @@ -# ============================================ -# Google Provider Configuration -# ============================================ -name: "Google" -provider_type: "google" - -enabled: true # Default is true if not specified - -# Connection Configuration +name: Google +provider_type: google +enabled: true connection: - base_url: "https://generativelanguage.googleapis.com/v1beta" - api_key_env: "GOOGLE_API_KEY" - -# Default model if none specified -default_model: "gemini-2.5-flash" - -# Model Parameters Defaults + base_url: https://generativelanguage.googleapis.com/v1beta + api_key_env: GOOGLE_API_KEY +default_model: gemini-2.5-pro defaults: temperature: 0.7 - -# Available Models models: - - id: "gemini-2.5-flash" - name: "Gemini 2.5 Flash" - context_length: 1048576 - max_output_tokens: 65536 - description: "Fast and efficient Gemini model supporting text, images, video, and audio" - supported_inputs: ["text", "images", "video", "audio"] - supported_outputs: ["text"] - - - id: "gemini-2.5-pro" - name: "Gemini 2.5 Pro" - context_length: 1048576 - max_output_tokens: 65536 - description: "Most capable Gemini model supporting audio, images, video, text, and PDF" - supported_inputs: ["audio", "images", "video", "text", "pdf"] - supported_outputs: ["text"] - -# ============================================ -# Embedding Models Configuration -# ============================================ +- id: gemini-2.5-flash + name: Gemini 2.5 Flash + context_length: 1048576 + max_output_tokens: 65536 + description: Fast and efficient Gemini model supporting text, images, video, and + audio + supported_inputs: + - text + - images + - video + - audio + supported_outputs: + - text +- id: gemini-2.5-pro + name: Gemini 2.5 Pro + context_length: 1048576 + max_output_tokens: 65536 + description: Most capable Gemini model supporting audio, images, video, text, and + PDF + supported_inputs: + - audio + - images + - video + - text + - pdf + supported_outputs: + - text embedding: - # Default embedding model - default_model: "gemini-embedding-001" - - # Default parameters + default_model: gemini-embedding-001 defaults: dimensions: 3072 - task_type: "RETRIEVAL_DOCUMENT" # or RETRIEVAL_QUERY depending on use case - - # Available embedding models + task_type: RETRIEVAL_DOCUMENT models: - - id: "gemini-embedding-001" - name: "Gemini Embedding 001" - dimensions: 3072 - max_input: 2048 - supported_dimensions: [128, 256, 512, 768, 1024, 1536, 2048, 3072] - description: "Google Gemini Embedding Model with flexible dimensions (128-3072, recommended: 768, 1536, 3072)" - + - id: gemini-embedding-001 + name: Gemini Embedding 001 + dimensions: 3072 + max_input: 2048 + supported_dimensions: + - 128 + - 256 + - 512 + - 768 + - 1024 + - 1536 + - 2048 + - 3072 + description: 'Google Gemini Embedding Model with flexible dimensions (128-3072, + recommended: 768, 1536, 3072)' diff --git a/python/valuecell/server/api/routers/models.py b/python/valuecell/server/api/routers/models.py index b55390c72..de928cf9a 100644 --- a/python/valuecell/server/api/routers/models.py +++ b/python/valuecell/server/api/routers/models.py @@ -15,6 +15,8 @@ from ..schemas import SuccessResponse from ..schemas.model import ( AddModelRequest, + CheckModelRequest, + CheckModelResponse, ModelItem, ModelProviderSummary, ProviderDetailData, @@ -482,4 +484,329 @@ async def set_provider_default_model( status_code=500, detail=f"Failed to set default model: {e}" ) + @router.post( + "/check", + response_model=SuccessResponse[CheckModelResponse], + summary="Check model availability", + description=( + "Perform a minimal live request to verify the model responds. " + "This endpoint does not validate provider configuration or API key presence." + ), + ) + async def check_model( + payload: CheckModelRequest, + ) -> SuccessResponse[CheckModelResponse]: + try: + manager = get_config_manager() + provider = payload.provider or manager.primary_provider + cfg = manager.get_provider_config(provider) + if cfg is None: + raise HTTPException( + status_code=404, detail=f"Provider '{provider}' not found" + ) + + model_id = payload.model_id or cfg.default_model + if not model_id: + raise HTTPException( + status_code=400, + detail="Model id not specified and provider has no default", + ) + # Perform a minimal live request (ping) without configuration validation + result = CheckModelResponse( + ok=False, + provider=provider, + model_id=model_id, + status=None, + error=None, + ) + try: + import asyncio + from time import perf_counter + + import httpx + except Exception as e: + result.ok = False + result.status = "runtime_missing" + result.error = f"Runtime dependency missing: {e}" + return SuccessResponse.create(data=result, msg="Live check failed") + + # Prefer a direct minimal request for OpenAI-compatible providers. + # This avoids hidden fallbacks and validates API key/auth. + api_key = (payload.api_key or cfg.api_key or "").strip() + base_url = (getattr(cfg, "base_url", None) or "").strip() + # Use direct request timeout only (no agent fallback) + direct_timeout_s = 3.0 + if provider == "google": + direct_timeout_s = 3.0 + + def _normalize_model_id_for_provider(provider_name: str, mid: str) -> str: + """Normalize model id for specific providers to avoid 404s. + + - Google Gemini: sometimes configs use vendor-prefixed ids like + "google/gemini-1.5-flash"; the REST path expects just the model + name segment (e.g., "gemini-1.5-flash"). + - Other providers: return as-is. + """ + if provider_name == "google" and "/" in mid: + return mid.split("/")[-1] + return mid + + normalized_model_id = _normalize_model_id_for_provider(provider, model_id) + + async def _direct_openai_like_ping(endpoint: str) -> bool: + headers = { + "Authorization": f"Bearer {api_key}", + "Content-Type": "application/json", + } + json_body = { + "model": model_id, + "messages": [{"role": "user", "content": "ping"}], + "max_tokens": 1, + "temperature": 0, + } + async with httpx.AsyncClient(timeout=direct_timeout_s) as client: + resp = await client.post(endpoint, headers=headers, json=json_body) + # Handle auth failures explicitly + if resp.status_code in (401, 403): + try: + err_json = resp.json() + msg = err_json.get("error", {}).get("message") or str(err_json) + except Exception: + msg = resp.text + result.ok = False + result.status = "auth_failed" + result.error = msg or "Unauthorized" + return False + if resp.status_code >= 400: + # Other request failures + try: + err_json = resp.json() + msg = err_json.get("error", {}).get("message") or str(err_json) + except Exception: + msg = resp.text + result.ok = False + result.status = "request_failed" + result.error = msg or f"HTTP {resp.status_code}" + return False + # Success path: verify minimal structure + try: + data = resp.json() + except Exception: + data = None + if not data or "choices" not in data: + result.ok = False + result.status = "request_failed" + result.error = "Unexpected response structure" + return False + result.status = "reachable" + result.ok = True + return True + + async def _direct_google_ping(endpoint: str) -> bool: + # Gemini REST uses api key via query param `key`, but we also + # set header to be safe. + headers = { + "Content-Type": "application/json", + "x-goog-api-key": api_key, + } + json_body = { + "contents": [ + { + "role": "user", + "parts": [{"text": "ping"}], + } + ] + } + async with httpx.AsyncClient(timeout=direct_timeout_s) as client: + resp = await client.post( + endpoint, + headers=headers, + params={"key": api_key} if api_key else None, + json=json_body, + ) + if resp.status_code in (401, 403): + try: + err_json = resp.json() + msg = err_json.get("error", {}).get("message") or str(err_json) + except Exception: + msg = resp.text + result.ok = False + result.status = "auth_failed" + result.error = msg or "Unauthorized" + return False + if resp.status_code >= 400: + try: + err_json = resp.json() + msg = err_json.get("error", {}).get("message") or str(err_json) + except Exception: + msg = resp.text + result.ok = False + result.status = "request_failed" + # Preserve HTTP code in error to enable v1/v1beta fallback on 404 + if msg: + result.error = f"HTTP {resp.status_code}: {msg}" + else: + result.error = f"HTTP {resp.status_code}" + return False + # Minimal success: presence of candidates + try: + data = resp.json() + except Exception: + data = None + if not data or "candidates" not in data: + result.ok = False + result.status = "request_failed" + result.error = "Unexpected response structure" + return False + result.status = "reachable" + result.ok = True + return True + + def _normalize_base_url(url: str) -> str: + return (url or "").strip().rstrip("/") + + def _resolve_endpoint() -> tuple[str | None, str]: + """Return (endpoint, style) where style in {"openai_like", "google", "azure"}. + + Priority: if base_url provided, derive from host; else fall back to known provider mappings. + """ + bu = _normalize_base_url(base_url) + # Host-driven detection + if bu: + lower = bu.lower() + if "generativelanguage.googleapis.com" in lower or "googleapis.com" in lower: + # Construct Google endpoint for fast direct ping + # If base_url already includes version segment, do not repeat it + if lower.endswith("/v1beta") or "/v1beta/" in lower: + endpoint = f"{bu}/models/{normalized_model_id}:generateContent" + elif lower.endswith("/v1") or "/v1/" in lower: + endpoint = f"{bu}/models/{normalized_model_id}:generateContent" + else: + endpoint = f"{bu}/v1beta/models/{normalized_model_id}:generateContent" + return endpoint, "google" + if "openai.azure.com" in lower or "/openai/deployments" in lower: + # If user pasted a deployments URL, keep it; otherwise construct from base_url + # Azure requires api_version + api_version = getattr(cfg, "extra_config", {}).get("api_version") if hasattr(cfg, "extra_config") else None + if not api_version: + return None, "azure" + endpoint = f"{bu}/openai/deployments/{model_id}/chat/completions?api-version={api_version}" + return endpoint, "azure" + if "openrouter.ai" in lower: + return f"{bu}/api/v1/chat/completions" if not lower.endswith("/api/v1") else f"{bu}/chat/completions", "openai_like" + if "openai.com" in lower: + return f"{bu}/v1/chat/completions" if not lower.endswith("/v1") else f"{bu}/chat/completions", "openai_like" + if "deepseek.com" in lower: + return f"{bu}/v1/chat/completions" if not lower.endswith("/v1") else f"{bu}/chat/completions", "openai_like" + if "siliconflow" in lower: + return f"{bu}/v1/chat/completions" if not lower.endswith("/v1") else f"{bu}/chat/completions", "openai_like" + if "dashscope.aliyuncs.com" in lower or "dashscope.com" in lower: + # DashScope OpenAI-compatible endpoint lives under compatible-mode + if lower.endswith("/compatible-mode/v1"): + return f"{bu}/chat/completions", "openai_like" + return f"{bu}/compatible-mode/v1/chat/completions", "openai_like" + # If base_url provided but host is unrecognized: + # - For openai-compatible, treat as generic OpenAI-like + # - For Google/Azure, ignore base_url and fall through to provider fallback + # - For other providers, fall through to provider fallback to use official endpoints + if provider == "openai-compatible": + return f"{bu}/v1/chat/completions", "openai_like" + + # Provider-driven fallback + if provider == "google": + # Official Google endpoint for direct ping (v1beta by default) + return f"https://generativelanguage.googleapis.com/v1beta/models/{normalized_model_id}:generateContent", "google" + if provider == "azure": + api_version = getattr(cfg, "extra_config", {}).get("api_version") if hasattr(cfg, "extra_config") else None + if base_url and api_version: + endpoint = f"{base_url}/openai/deployments/{model_id}/chat/completions?api-version={api_version}" + return endpoint, "azure" + return None, "azure" + if provider == "openai": + return "https://api.openai.com/v1/chat/completions", "openai_like" + if provider == "openrouter": + return "https://openrouter.ai/api/v1/chat/completions", "openai_like" + if provider == "deepseek": + return "https://api.deepseek.com/v1/chat/completions", "openai_like" + if provider == "siliconflow": + return "https://api.siliconflow.cn/v1/chat/completions", "openai_like" + if provider == "dashscope": + return "https://dashscope.aliyuncs.com/compatible-mode/v1/chat/completions", "openai_like" + if provider == "openai-compatible": + if base_url: + bu = _normalize_base_url(base_url) + if bu.endswith("/v1"): + return f"{bu}/chat/completions", "openai_like" + return f"{bu}/v1/chat/completions", "openai_like" + return None, "openai_like" + + # Decide endpoint for known OpenAI-compatible providers + completed_via_direct = False + try: + if not api_key: + # Missing API key: fail fast for providers requiring auth + if provider in {"openai", "openrouter", "deepseek", "siliconflow", "azure", "google"}: + result.ok = False + result.status = "auth_failed" + result.error = "API key is missing" + return SuccessResponse.create(data=result, msg="Auth failed") + + endpoint, style = _resolve_endpoint() + + if endpoint: + # Perform direct ping with timeout + start = perf_counter() + if style == "google": + completed_via_direct = await asyncio.wait_for(_direct_google_ping(endpoint), timeout=direct_timeout_s) + # If 404 from v1beta, try v1 (or vice versa) + if not completed_via_direct and (result.error or "").find("404") != -1: + alt_endpoint = None + if "/v1beta/" in endpoint: + alt_endpoint = endpoint.replace("/v1beta/", "/v1/") + elif "/v1/" in endpoint: + alt_endpoint = endpoint.replace("/v1/", "/v1beta/") + if alt_endpoint: + # Reset status/error before retry + result.status = None + result.error = None + completed_via_direct = await asyncio.wait_for(_direct_google_ping(alt_endpoint), timeout=direct_timeout_s) + else: + completed_via_direct = await asyncio.wait_for(_direct_openai_like_ping(endpoint), timeout=direct_timeout_s) + if completed_via_direct: + return SuccessResponse.create(data=result, msg="Model reachable") + else: + # If direct ping determined failure, return immediately + return SuccessResponse.create(data=result, msg=result.status or "Request failed") + else: + # No endpoint available for direct probe + result.ok = False + result.status = "probe_unavailable" + if style == "azure": + result.error = "Azure requires API Host (base_url) and api_version for direct probe" + elif provider == "openai-compatible" and not base_url: + result.error = "OpenAI-compatible provider requires API Host to run direct probe" + else: + result.error = "Direct probe endpoint not resolved" + return SuccessResponse.create(data=result, msg="Probe unavailable") + except asyncio.TimeoutError: + result.ok = False + result.status = "timeout" + result.error = f"Timed out after {int(direct_timeout_s * 1000)} ms" + return SuccessResponse.create(data=result, msg="Timeout") + except httpx.TimeoutException: + result.ok = False + result.status = "timeout" + result.error = f"Timed out after {int(direct_timeout_s * 1000)} ms" + return SuccessResponse.create(data=result, msg="Timeout") + except Exception as e: + # Direct probe threw an unexpected error; report and do not fall back to agent + result.ok = False + result.status = "request_failed" + result.error = str(e) + return SuccessResponse.create(data=result, msg="Request failed") + except HTTPException: + raise + except Exception as e: + raise HTTPException(status_code=500, detail=f"Failed to check model: {e}") + return router diff --git a/python/valuecell/server/api/schemas/__init__.py b/python/valuecell/server/api/schemas/__init__.py index 58e8f300e..d80acf94a 100644 --- a/python/valuecell/server/api/schemas/__init__.py +++ b/python/valuecell/server/api/schemas/__init__.py @@ -35,7 +35,7 @@ UserI18nSettingsData, UserI18nSettingsRequest, ) -from .model import LLMModelConfigData +from .model import CheckModelRequest, CheckModelResponse, LLMModelConfigData from .task import TaskCancelData from .user_profile import ( CreateUserProfileRequest, @@ -117,4 +117,6 @@ "TaskCancelData", # Model schemas "LLMModelConfigData", + "CheckModelRequest", + "CheckModelResponse", ] diff --git a/python/valuecell/server/api/schemas/model.py b/python/valuecell/server/api/schemas/model.py index b641c3211..6a72b426b 100644 --- a/python/valuecell/server/api/schemas/model.py +++ b/python/valuecell/server/api/schemas/model.py @@ -83,3 +83,32 @@ class SetDefaultModelRequest(BaseModel): None, description="Optional display name; added/updated in models list if provided", ) + + +# --- Model availability check --- +class CheckModelRequest(BaseModel): + """Request payload to check if a provider+model is usable.""" + + provider: Optional[str] = Field( + None, description="Provider to check; defaults to current primary provider" + ) + model_id: Optional[str] = Field( + None, description="Model id to check; defaults to provider's default model" + ) + api_key: Optional[str] = Field( + None, description="Temporary API key to use for this check (optional)" + ) + # strict/live check removed; this endpoint now validates configuration only. + + +class CheckModelResponse(BaseModel): + """Response payload describing the model availability check result.""" + + ok: bool = Field(..., description="Whether the provider+model is usable") + provider: str = Field(..., description="Provider under test") + model_id: str = Field(..., description="Model id under test") + status: Optional[str] = Field( + None, + description="Status label like 'valid_config', 'reachable', 'timeout', 'request_failed'", + ) + error: Optional[str] = Field(None, description="Error message if any") From 62e4d808572b22fb966bb5dd7265e1efdc84ae1e Mon Sep 17 00:00:00 2001 From: paisley <8197966+su8su@users.noreply.github.com> Date: Thu, 11 Dec 2025 11:44:04 +0800 Subject: [PATCH 2/5] fix:fix google provider timeout --- python/valuecell/server/api/routers/models.py | 126 ++++++++++++++---- 1 file changed, 100 insertions(+), 26 deletions(-) diff --git a/python/valuecell/server/api/routers/models.py b/python/valuecell/server/api/routers/models.py index de928cf9a..6fec260a1 100644 --- a/python/valuecell/server/api/routers/models.py +++ b/python/valuecell/server/api/routers/models.py @@ -511,6 +511,7 @@ async def check_model( status_code=400, detail="Model id not specified and provider has no default", ) + # Perform a minimal live request (ping) without configuration validation result = CheckModelResponse( ok=False, @@ -521,7 +522,6 @@ async def check_model( ) try: import asyncio - from time import perf_counter import httpx except Exception as e: @@ -535,9 +535,9 @@ async def check_model( api_key = (payload.api_key or cfg.api_key or "").strip() base_url = (getattr(cfg, "base_url", None) or "").strip() # Use direct request timeout only (no agent fallback) - direct_timeout_s = 3.0 + direct_timeout_s = 5.0 if provider == "google": - direct_timeout_s = 3.0 + direct_timeout_s = 30.0 def _normalize_model_id_for_provider(provider_name: str, mid: str) -> str: """Normalize model id for specific providers to avoid 404s. @@ -624,6 +624,7 @@ async def _direct_google_ping(endpoint: str) -> bool: params={"key": api_key} if api_key else None, json=json_body, ) + if resp.status_code in (401, 403): try: err_json = resp.json() @@ -674,37 +675,75 @@ def _resolve_endpoint() -> tuple[str | None, str]: # Host-driven detection if bu: lower = bu.lower() - if "generativelanguage.googleapis.com" in lower or "googleapis.com" in lower: + if ( + "generativelanguage.googleapis.com" in lower + or "googleapis.com" in lower + ): # Construct Google endpoint for fast direct ping + # Handle cases where base_url already includes '/models' or full ':generateContent' path + if ":generatecontent" in lower: + # Treat as full endpoint + return bu, "google" + if "/models/" in lower: + # If base_url already includes '/models', avoid duplicating + if lower.endswith("/models"): + endpoint = f"{bu}/{normalized_model_id}:generateContent" + else: + # base_url might be '/models/{model}', append ':generateContent' if missing + endpoint = ( + f"{bu}:generateContent" + if not lower.endswith(":generatecontent") + else bu + ) + return endpoint, "google" # If base_url already includes version segment, do not repeat it if lower.endswith("/v1beta") or "/v1beta/" in lower: - endpoint = f"{bu}/models/{normalized_model_id}:generateContent" + endpoint = ( + f"{bu}/models/{normalized_model_id}:generateContent" + ) elif lower.endswith("/v1") or "/v1/" in lower: - endpoint = f"{bu}/models/{normalized_model_id}:generateContent" + endpoint = ( + f"{bu}/models/{normalized_model_id}:generateContent" + ) else: endpoint = f"{bu}/v1beta/models/{normalized_model_id}:generateContent" return endpoint, "google" if "openai.azure.com" in lower or "/openai/deployments" in lower: # If user pasted a deployments URL, keep it; otherwise construct from base_url # Azure requires api_version - api_version = getattr(cfg, "extra_config", {}).get("api_version") if hasattr(cfg, "extra_config") else None + api_version = ( + getattr(cfg, "extra_config", {}).get("api_version") + if hasattr(cfg, "extra_config") + else None + ) if not api_version: return None, "azure" endpoint = f"{bu}/openai/deployments/{model_id}/chat/completions?api-version={api_version}" return endpoint, "azure" if "openrouter.ai" in lower: - return f"{bu}/api/v1/chat/completions" if not lower.endswith("/api/v1") else f"{bu}/chat/completions", "openai_like" + return f"{bu}/api/v1/chat/completions" if not lower.endswith( + "/api/v1" + ) else f"{bu}/chat/completions", "openai_like" if "openai.com" in lower: - return f"{bu}/v1/chat/completions" if not lower.endswith("/v1") else f"{bu}/chat/completions", "openai_like" + return f"{bu}/v1/chat/completions" if not lower.endswith( + "/v1" + ) else f"{bu}/chat/completions", "openai_like" if "deepseek.com" in lower: - return f"{bu}/v1/chat/completions" if not lower.endswith("/v1") else f"{bu}/chat/completions", "openai_like" + return f"{bu}/v1/chat/completions" if not lower.endswith( + "/v1" + ) else f"{bu}/chat/completions", "openai_like" if "siliconflow" in lower: - return f"{bu}/v1/chat/completions" if not lower.endswith("/v1") else f"{bu}/chat/completions", "openai_like" + return f"{bu}/v1/chat/completions" if not lower.endswith( + "/v1" + ) else f"{bu}/chat/completions", "openai_like" if "dashscope.aliyuncs.com" in lower or "dashscope.com" in lower: # DashScope OpenAI-compatible endpoint lives under compatible-mode if lower.endswith("/compatible-mode/v1"): return f"{bu}/chat/completions", "openai_like" - return f"{bu}/compatible-mode/v1/chat/completions", "openai_like" + return ( + f"{bu}/compatible-mode/v1/chat/completions", + "openai_like", + ) # If base_url provided but host is unrecognized: # - For openai-compatible, treat as generic OpenAI-like # - For Google/Azure, ignore base_url and fall through to provider fallback @@ -715,9 +754,16 @@ def _resolve_endpoint() -> tuple[str | None, str]: # Provider-driven fallback if provider == "google": # Official Google endpoint for direct ping (v1beta by default) - return f"https://generativelanguage.googleapis.com/v1beta/models/{normalized_model_id}:generateContent", "google" + return ( + f"https://generativelanguage.googleapis.com/v1beta/models/{normalized_model_id}:generateContent", + "google", + ) if provider == "azure": - api_version = getattr(cfg, "extra_config", {}).get("api_version") if hasattr(cfg, "extra_config") else None + api_version = ( + getattr(cfg, "extra_config", {}).get("api_version") + if hasattr(cfg, "extra_config") + else None + ) if base_url and api_version: endpoint = f"{base_url}/openai/deployments/{model_id}/chat/completions?api-version={api_version}" return endpoint, "azure" @@ -725,13 +771,22 @@ def _resolve_endpoint() -> tuple[str | None, str]: if provider == "openai": return "https://api.openai.com/v1/chat/completions", "openai_like" if provider == "openrouter": - return "https://openrouter.ai/api/v1/chat/completions", "openai_like" + return ( + "https://openrouter.ai/api/v1/chat/completions", + "openai_like", + ) if provider == "deepseek": return "https://api.deepseek.com/v1/chat/completions", "openai_like" if provider == "siliconflow": - return "https://api.siliconflow.cn/v1/chat/completions", "openai_like" + return ( + "https://api.siliconflow.cn/v1/chat/completions", + "openai_like", + ) if provider == "dashscope": - return "https://dashscope.aliyuncs.com/compatible-mode/v1/chat/completions", "openai_like" + return ( + "https://dashscope.aliyuncs.com/compatible-mode/v1/chat/completions", + "openai_like", + ) if provider == "openai-compatible": if base_url: bu = _normalize_base_url(base_url) @@ -745,7 +800,14 @@ def _resolve_endpoint() -> tuple[str | None, str]: try: if not api_key: # Missing API key: fail fast for providers requiring auth - if provider in {"openai", "openrouter", "deepseek", "siliconflow", "azure", "google"}: + if provider in { + "openai", + "openrouter", + "deepseek", + "siliconflow", + "azure", + "google", + }: result.ok = False result.status = "auth_failed" result.error = "API key is missing" @@ -755,11 +817,15 @@ def _resolve_endpoint() -> tuple[str | None, str]: if endpoint: # Perform direct ping with timeout - start = perf_counter() if style == "google": - completed_via_direct = await asyncio.wait_for(_direct_google_ping(endpoint), timeout=direct_timeout_s) + completed_via_direct = await asyncio.wait_for( + _direct_google_ping(endpoint), timeout=direct_timeout_s + ) # If 404 from v1beta, try v1 (or vice versa) - if not completed_via_direct and (result.error or "").find("404") != -1: + if ( + not completed_via_direct + and (result.error or "").find("404") != -1 + ): alt_endpoint = None if "/v1beta/" in endpoint: alt_endpoint = endpoint.replace("/v1beta/", "/v1/") @@ -769,14 +835,22 @@ def _resolve_endpoint() -> tuple[str | None, str]: # Reset status/error before retry result.status = None result.error = None - completed_via_direct = await asyncio.wait_for(_direct_google_ping(alt_endpoint), timeout=direct_timeout_s) + completed_via_direct = await asyncio.wait_for( + _direct_google_ping(alt_endpoint), + timeout=direct_timeout_s, + ) else: - completed_via_direct = await asyncio.wait_for(_direct_openai_like_ping(endpoint), timeout=direct_timeout_s) + completed_via_direct = await asyncio.wait_for( + _direct_openai_like_ping(endpoint), timeout=direct_timeout_s + ) if completed_via_direct: - return SuccessResponse.create(data=result, msg="Model reachable") + return SuccessResponse.create( + data=result, msg="Model reachable" + ) else: - # If direct ping determined failure, return immediately - return SuccessResponse.create(data=result, msg=result.status or "Request failed") + return SuccessResponse.create( + data=result, msg=result.status or "Request failed" + ) else: # No endpoint available for direct probe result.ok = False From 61f2115ea1f97ba7715dc0d0c56dee558edd07b0 Mon Sep 17 00:00:00 2001 From: DigHuang <114602213+DigHuang@users.noreply.github.com> Date: Thu, 11 Dec 2025 15:03:32 +0800 Subject: [PATCH 3/5] refactor: simplify model availability check state management by directly using `useCheckModelAvailability` hook's return values. --- frontend/src/api/setting.ts | 4 +- .../components/models/model-detail.tsx | 66 +++++++++---------- 2 files changed, 34 insertions(+), 36 deletions(-) diff --git a/frontend/src/api/setting.ts b/frontend/src/api/setting.ts index 120baa829..fc179de5a 100644 --- a/frontend/src/api/setting.ts +++ b/frontend/src/api/setting.ts @@ -8,10 +8,10 @@ import { API_QUERY_KEYS } from "@/constants/api"; import type { ApiResponse } from "@/lib/api-client"; import { apiClient } from "@/lib/api-client"; import type { - MemoryItem, - ModelProvider, CheckModelRequest, CheckModelResult, + MemoryItem, + ModelProvider, ProviderDetail, ProviderModelInfo, } from "@/types/setting"; diff --git a/frontend/src/app/setting/components/models/model-detail.tsx b/frontend/src/app/setting/components/models/model-detail.tsx index c1db4f41c..751702d3e 100644 --- a/frontend/src/app/setting/components/models/model-detail.tsx +++ b/frontend/src/app/setting/components/models/model-detail.tsx @@ -5,12 +5,12 @@ import { useEffect, useState } from "react"; import { z } from "zod"; import { useAddProviderModel, + useCheckModelAvailability, useDeleteProviderModel, useGetModelProviderDetail, useSetDefaultProvider, useSetDefaultProviderModel, useUpdateProviderConfig, - useCheckModelAvailability, } from "@/api/setting"; import { Button } from "@/components/ui/button"; import { @@ -66,18 +66,15 @@ export function ModelDetail({ provider }: ModelDetailProps) { useSetDefaultProviderModel(); const { mutate: setDefaultProvider, isPending: settingDefaultProvider } = useSetDefaultProvider(); - const { mutateAsync: checkAvailability, isPending: checkingAvailability } = - useCheckModelAvailability(); + const { + data: checkResult, + mutateAsync: checkAvailability, + isPending: checkingAvailability, + reset: resetCheckResult, + } = useCheckModelAvailability(); const [isAddDialogOpen, setIsAddDialogOpen] = useState(false); const [showApiKey, setShowApiKey] = useState(false); - const [checkResult, setCheckResult] = useState<{ - ok: boolean; - provider: string; - model_id: string; - status?: string; - error?: string; - } | null>(null); const configForm = useForm({ defaultValues: { @@ -105,8 +102,11 @@ export function ModelDetail({ provider }: ModelDetailProps) { }, [providerDetail, configForm.setFieldValue]); useEffect(() => { - if (provider) setShowApiKey(false); - }, [provider]); + if (provider) { + setShowApiKey(false); + resetCheckResult(); + } + }, [provider, resetCheckResult]); const addModelForm = useForm({ defaultValues: { @@ -232,37 +232,35 @@ export function ModelDetail({ provider }: ModelDetailProps) { className="h-8" disabled={isBusy} onClick={async () => { - setCheckResult(null); - try { - const resp = await checkAvailability({ - provider, - model_id: providerDetail.default_model_id, - }); - setCheckResult(resp.data); - } catch (e) { - setCheckResult({ - ok: false, - provider, - model_id: providerDetail.default_model_id, - status: "request_failed", - error: String(e), - }); - } + await checkAvailability({ + provider, + model_id: providerDetail.default_model_id, + }); }} > - {checkingAvailability ? "Checking..." : "Check Availability"} + {checkingAvailability + ? "Checking..." + : "Check Availability"} - {checkResult && ( + {checkResult?.data && (
- {checkResult.ok ? ( + {checkResult.data.ok ? ( - Available{checkResult.status ? ` (${checkResult.status})` : ""} + Available + {checkResult.data.status + ? ` (${checkResult.data.status})` + : ""} ) : ( - Unavailable{checkResult.status ? ` (${checkResult.status})` : ""} - {checkResult.error ? `: ${checkResult.error}` : ""} + Unavailable + {checkResult.data.status + ? ` (${checkResult.data.status})` + : ""} + {checkResult.data.error + ? `: ${checkResult.data.error}` + : ""} )}
From 339533c5dba7cc2fb396af72939eb8ef4c01265a Mon Sep 17 00:00:00 2001 From: DigHuang <114602213+DigHuang@users.noreply.github.com> Date: Fri, 12 Dec 2025 17:47:50 +0800 Subject: [PATCH 4/5] feat: Improve API key input layout by grouping with availability check button and repositioning API key link --- .../components/models/model-detail.tsx | 91 +++++++++---------- 1 file changed, 45 insertions(+), 46 deletions(-) diff --git a/frontend/src/app/setting/components/models/model-detail.tsx b/frontend/src/app/setting/components/models/model-detail.tsx index 751702d3e..f7eec1dc0 100644 --- a/frontend/src/app/setting/components/models/model-detail.tsx +++ b/frontend/src/app/setting/components/models/model-detail.tsx @@ -184,52 +184,44 @@ export function ModelDetail({ provider }: ModelDetailProps) { > API key - - field.handleChange(e.target.value)} - onBlur={() => configForm.handleSubmit()} - onKeyDown={(e) => { - if (e.key === "Enter") { - e.preventDefault(); - e.currentTarget.blur(); - } - }} - /> - - setShowApiKey(!showApiKey)} - aria-label={ - showApiKey ? "Hide password" : "Show password" - } - > - {showApiKey ? ( - - ) : ( - - )} - - - - - Click here to get the API key - - - {/* Check availability controls */} -
+
+ + field.handleChange(e.target.value)} + onBlur={() => configForm.handleSubmit()} + onKeyDown={(e) => { + if (e.key === "Enter") { + e.preventDefault(); + e.currentTarget.blur(); + } + }} + /> + + setShowApiKey(!showApiKey)} + aria-label={ + showApiKey ? "Hide password" : "Show password" + } + > + {showApiKey ? ( + + ) : ( + + )} + + + +
{checkResult?.data && ( -
+
{checkResult.data.ok ? ( Available @@ -265,6 +257,13 @@ export function ModelDetail({ provider }: ModelDetailProps) { )}
)} + + Click here to get the API key + + )} From 138318d4f2fab92a48006672e171b92bef9cd17b Mon Sep 17 00:00:00 2001 From: DigHuang <114602213+DigHuang@users.noreply.github.com> Date: Mon, 15 Dec 2025 11:11:54 +0800 Subject: [PATCH 5/5] revert: model config yaml --- python/configs/providers/dashscope.yaml | 116 ++++++++++++++---------- python/configs/providers/deepseek.yaml | 40 +++++--- python/configs/providers/google.yaml | 101 +++++++++++---------- 3 files changed, 146 insertions(+), 111 deletions(-) diff --git a/python/configs/providers/dashscope.yaml b/python/configs/providers/dashscope.yaml index bb1c26c42..95d816ca1 100644 --- a/python/configs/providers/dashscope.yaml +++ b/python/configs/providers/dashscope.yaml @@ -1,63 +1,85 @@ +# ============================================ +# DashScope Provider Configuration +# ============================================ +# DashScope (Alibaba Cloud Bailian) exposes an OpenAI-compatible API for Qwen3 models. +# Configure the API key via DASHSCOPE_API_KEY or override using env vars at runtime. + name: DashScope provider_type: dashscope enabled: true + +# Connection parameters for DashScope compatible-mode endpoint. connection: base_url: https://dashscope.aliyuncs.com/compatible-mode/v1 api_key_env: DASHSCOPE_API_KEY + +# Default chat model used when no model_id is specified. default_model: qwen3-max + +# Global default inference parameters. defaults: temperature: 0.7 max_tokens: 16384 + +# Commonly used Qwen3 models available via DashScope. models: -- id: qwen3-max - name: Qwen3 Max - context_length: 256000 - max_output_tokens: 16384 - description: Qwen3 Max model with strongest performance - supported_inputs: - - text - supported_outputs: - - text -- id: qwen3-max-preview - name: Qwen3 Max Preview - context_length: 256000 - max_output_tokens: 16384 - description: Qwen3 Max preview model - supported_inputs: - - text - supported_outputs: - - text -- id: qwen-plus - name: Qwen Plus - context_length: 256000 - max_output_tokens: 16384 - description: Qwen Plus model with balanced performance - supported_inputs: - - text - supported_outputs: - - text -- id: qwen-flash - name: Qwen Flash - context_length: 256000 - max_output_tokens: 16384 - description: Qwen Flash model optimized for fast response - supported_inputs: - - text - supported_outputs: - - text + - id: qwen3-max + name: Qwen3 Max + context_length: 256000 + max_output_tokens: 16384 + description: Qwen3 Max model with strongest performance + supported_inputs: + - text + supported_outputs: + - text + + - id: qwen3-max-preview + name: Qwen3 Max Preview + context_length: 256000 + max_output_tokens: 16384 + description: Qwen3 Max preview model + supported_inputs: + - text + supported_outputs: + - text + + - id: qwen-plus + name: Qwen Plus + context_length: 256000 + max_output_tokens: 16384 + description: Qwen Plus model with balanced performance + supported_inputs: + - text + supported_outputs: + - text + + - id: qwen-flash + name: Qwen Flash + context_length: 256000 + max_output_tokens: 16384 + description: Qwen Flash model optimized for fast response + supported_inputs: + - text + supported_outputs: + - text + +# Embedding configuration for DashScope text embedding models. embedding: default_model: text-embedding-v4 + defaults: dimensions: 2048 - encoding_format: float + encoding_format: "float" + models: - - id: text-embedding-v4 - name: Text Embedding V4 - dimensions: 2048 - max_input: 8192 - description: DashScope text embedding v4 model (latest) - - id: text-embedding-v3 - name: Text Embedding V3 - dimensions: 1024 - max_input: 8192 - description: DashScope text embedding v3 model + - id: text-embedding-v4 + name: Text Embedding V4 + dimensions: 2048 + max_input: 8192 + description: DashScope text embedding v4 model (latest) + + - id: text-embedding-v3 + name: Text Embedding V3 + dimensions: 1024 + max_input: 8192 + description: DashScope text embedding v3 model \ No newline at end of file diff --git a/python/configs/providers/deepseek.yaml b/python/configs/providers/deepseek.yaml index 6c1062782..9cfd652e5 100644 --- a/python/configs/providers/deepseek.yaml +++ b/python/configs/providers/deepseek.yaml @@ -1,19 +1,31 @@ -name: DeepSeek -provider_type: deepseek -enabled: true +# ============================================ +# DeepSeek Provider Configuration +# ============================================ +name: "DeepSeek" +provider_type: "deepseek" + +enabled: true # Default is true if not specified + +# Connection Configuration connection: - base_url: https://api.deepseek.com/v1 - api_key_env: DEEPSEEK_API_KEY -default_model: deepseek-chat + base_url: "https://api.deepseek.com/v1" + api_key_env: "DEEPSEEK_API_KEY" + +# Default model if none specified +default_model: "deepseek-chat" + +# Model Parameters Defaults defaults: temperature: 0.7 max_tokens: 8096 + +# Available Models (commonly used) models: -- id: deepseek-chat - name: DeepSeek Chat - context_length: 128000 - description: DeepSeek Chat model -- id: deepseek-reasoner - name: DeepSeek Reasoner - context_length: 128000 - description: DeepSeek Reasoner model with enhanced reasoning capabilities + - id: "deepseek-chat" + name: "DeepSeek Chat" + context_length: 128000 + description: "DeepSeek Chat model" + - id: "deepseek-reasoner" + name: "DeepSeek Reasoner" + context_length: 128000 + description: "DeepSeek Reasoner model with enhanced reasoning capabilities" diff --git a/python/configs/providers/google.yaml b/python/configs/providers/google.yaml index 4d95b3fe4..9c26bdf2e 100644 --- a/python/configs/providers/google.yaml +++ b/python/configs/providers/google.yaml @@ -1,58 +1,59 @@ -name: Google -provider_type: google -enabled: true +# ============================================ +# Google Provider Configuration +# ============================================ +name: "Google" +provider_type: "google" + +enabled: true # Default is true if not specified + +# Connection Configuration connection: - base_url: https://generativelanguage.googleapis.com/v1beta - api_key_env: GOOGLE_API_KEY -default_model: gemini-2.5-pro + base_url: "https://generativelanguage.googleapis.com/v1beta" + api_key_env: "GOOGLE_API_KEY" + +# Default model if none specified +default_model: "gemini-2.5-flash" + +# Model Parameters Defaults defaults: temperature: 0.7 + +# Available Models models: -- id: gemini-2.5-flash - name: Gemini 2.5 Flash - context_length: 1048576 - max_output_tokens: 65536 - description: Fast and efficient Gemini model supporting text, images, video, and - audio - supported_inputs: - - text - - images - - video - - audio - supported_outputs: - - text -- id: gemini-2.5-pro - name: Gemini 2.5 Pro - context_length: 1048576 - max_output_tokens: 65536 - description: Most capable Gemini model supporting audio, images, video, text, and - PDF - supported_inputs: - - audio - - images - - video - - text - - pdf - supported_outputs: - - text + - id: "gemini-2.5-flash" + name: "Gemini 2.5 Flash" + context_length: 1048576 + max_output_tokens: 65536 + description: "Fast and efficient Gemini model supporting text, images, video, and audio" + supported_inputs: ["text", "images", "video", "audio"] + supported_outputs: ["text"] + + - id: "gemini-2.5-pro" + name: "Gemini 2.5 Pro" + context_length: 1048576 + max_output_tokens: 65536 + description: "Most capable Gemini model supporting audio, images, video, text, and PDF" + supported_inputs: ["audio", "images", "video", "text", "pdf"] + supported_outputs: ["text"] + +# ============================================ +# Embedding Models Configuration +# ============================================ embedding: - default_model: gemini-embedding-001 + # Default embedding model + default_model: "gemini-embedding-001" + + # Default parameters defaults: dimensions: 3072 - task_type: RETRIEVAL_DOCUMENT + task_type: "RETRIEVAL_DOCUMENT" # or RETRIEVAL_QUERY depending on use case + + # Available embedding models models: - - id: gemini-embedding-001 - name: Gemini Embedding 001 - dimensions: 3072 - max_input: 2048 - supported_dimensions: - - 128 - - 256 - - 512 - - 768 - - 1024 - - 1536 - - 2048 - - 3072 - description: 'Google Gemini Embedding Model with flexible dimensions (128-3072, - recommended: 768, 1536, 3072)' + - id: "gemini-embedding-001" + name: "Gemini Embedding 001" + dimensions: 3072 + max_input: 2048 + supported_dimensions: [128, 256, 512, 768, 1024, 1536, 2048, 3072] + description: "Google Gemini Embedding Model with flexible dimensions (128-3072, recommended: 768, 1536, 3072)" +