diff --git a/frontend/src/app/setting/components/models/model-detail.tsx b/frontend/src/app/setting/components/models/model-detail.tsx
index 67ca6fb54..81b7767ee 100644
--- a/frontend/src/app/setting/components/models/model-detail.tsx
+++ b/frontend/src/app/setting/components/models/model-detail.tsx
@@ -135,6 +135,8 @@ export function ModelDetail({ provider }: ModelDetailProps) {
settingDefaultModel ||
settingDefaultProvider;
+ const isDashScope = (provider ?? "").toLowerCase() === "dashscope";
+
if (detailLoading) {
return (
Loading provider details...
@@ -216,23 +218,24 @@ export function ModelDetail({ provider }: ModelDetailProps) {
)}
- {/* API Host section */}
-
- {(field) => (
-
-
- API Host
-
- field.handleChange(e.target.value)}
- onBlur={() => configForm.handleSubmit()}
- />
-
-
- )}
-
+ {!isDashScope && (
+
+ {(field) => (
+
+
+ API Host
+
+ field.handleChange(e.target.value)}
+ onBlur={() => configForm.handleSubmit()}
+ />
+
+
+ )}
+
+ )}
{/* Models section */}
diff --git a/python/configs/providers/dashscope.yaml b/python/configs/providers/dashscope.yaml
index 95d816ca1..60c9efddc 100644
--- a/python/configs/providers/dashscope.yaml
+++ b/python/configs/providers/dashscope.yaml
@@ -1,85 +1,90 @@
# ============================================
# DashScope Provider Configuration
# ============================================
-# DashScope (Alibaba Cloud Bailian) exposes an OpenAI-compatible API for Qwen3 models.
-# Configure the API key via DASHSCOPE_API_KEY or override using env vars at runtime.
+name: "DashScope"
+provider_type: "dashscope"
-name: DashScope
-provider_type: dashscope
-enabled: true
+enabled: true # Default is true if not specified
-# Connection parameters for DashScope compatible-mode endpoint.
+# Connection Configuration
connection:
- base_url: https://dashscope.aliyuncs.com/compatible-mode/v1
- api_key_env: DASHSCOPE_API_KEY
+ # DashScope (Alibaba Cloud Bailian) native HTTP uses fixed official host.
+ # base_url is not required; leave empty unless using a proxy.
+ api_key_env: "DASHSCOPE_API_KEY"
+ base_url: ""
-# Default chat model used when no model_id is specified.
-default_model: qwen3-max
+# Default chat model if none specified
+default_model: "qwen3-max"
-# Global default inference parameters.
+# Model Parameters Defaults
defaults:
temperature: 0.7
max_tokens: 16384
-# Commonly used Qwen3 models available via DashScope.
+# Available Models
models:
- - id: qwen3-max
- name: Qwen3 Max
+ - id: "qwen3-max"
+ name: "Qwen3 Max"
context_length: 256000
max_output_tokens: 16384
- description: Qwen3 Max model with strongest performance
+ description: "Qwen3 Max model with strongest performance"
supported_inputs:
- text
supported_outputs:
- text
- - id: qwen3-max-preview
- name: Qwen3 Max Preview
+ - id: "qwen3-max-preview"
+ name: "Qwen3 Max Preview"
context_length: 256000
max_output_tokens: 16384
- description: Qwen3 Max preview model
+ description: "Qwen3 Max preview model"
supported_inputs:
- text
supported_outputs:
- text
- - id: qwen-plus
- name: Qwen Plus
+ - id: "qwen-plus"
+ name: "Qwen Plus"
context_length: 256000
max_output_tokens: 16384
- description: Qwen Plus model with balanced performance
+ description: "Qwen Plus model with balanced performance"
supported_inputs:
- text
supported_outputs:
- text
- - id: qwen-flash
- name: Qwen Flash
+ - id: "qwen-flash"
+ name: "Qwen Flash"
context_length: 256000
max_output_tokens: 16384
- description: Qwen Flash model optimized for fast response
+ description: "Qwen Flash model optimized for fast response"
supported_inputs:
- text
supported_outputs:
- text
-# Embedding configuration for DashScope text embedding models.
+# ============================================
+# Embedding Models Configuration
+# ============================================
embedding:
- default_model: text-embedding-v4
+ # Default embedding model
+ default_model: "text-embedding-v4"
+ # Default parameters
defaults:
dimensions: 2048
encoding_format: "float"
+ # Available embedding models
models:
- - id: text-embedding-v4
- name: Text Embedding V4
+ - id: "text-embedding-v4"
+ name: "Text Embedding V4"
dimensions: 2048
max_input: 8192
- description: DashScope text embedding v4 model (latest)
+ description: "DashScope text embedding v4 model (latest)"
- - id: text-embedding-v3
- name: Text Embedding V3
+ - id: "text-embedding-v3"
+ name: "Text Embedding V3"
dimensions: 1024
max_input: 8192
- description: DashScope text embedding v3 model
\ No newline at end of file
+ description: "DashScope text embedding v3 model"
diff --git a/python/valuecell/adapters/models/__init__.py b/python/valuecell/adapters/models/__init__.py
index eca956049..bc350df27 100644
--- a/python/valuecell/adapters/models/__init__.py
+++ b/python/valuecell/adapters/models/__init__.py
@@ -30,6 +30,8 @@
OpenAIProvider,
OpenRouterProvider,
SiliconFlowProvider,
+ create_embedder,
+ create_embedder_for_agent,
create_model,
create_model_for_agent,
get_model_factory,
@@ -52,4 +54,6 @@
# Convenience functions
"create_model",
"create_model_for_agent",
+ "create_embedder",
+ "create_embedder_for_agent",
]
diff --git a/python/valuecell/adapters/models/factory.py b/python/valuecell/adapters/models/factory.py
index 15835a8ed..22cb89843 100644
--- a/python/valuecell/adapters/models/factory.py
+++ b/python/valuecell/adapters/models/factory.py
@@ -8,9 +8,17 @@
4. Supports fallback providers for reliability
"""
+import asyncio
+import json
from abc import ABC, abstractmethod
-from typing import Any, Dict, Optional
-
+from typing import Any, AsyncIterator, Dict, Iterator, List, Optional
+
+import requests
+from agno.exceptions import ModelProviderError
+from agno.models.base import Model
+from agno.models.message import Message
+from agno.models.metrics import Metrics
+from agno.models.response import ModelResponse
from loguru import logger
from valuecell.config.manager import ConfigManager, ProviderConfig, get_config_manager
@@ -507,40 +515,22 @@ class DashScopeProvider(ModelProvider):
"""DashScope model provider (native)"""
def create_model(self, model_id: Optional[str] = None, **kwargs):
- """Create DashScope model via agno (native)"""
- try:
- from agno.models.dashscope import DashScope
- except ImportError:
- raise ImportError(
- "agno package not installed. Install with: pip install agno"
- )
-
+ """Create DashScope model via native HTTP"""
model_id = model_id or self.config.default_model
params = {**self.config.parameters, **kwargs}
- # Prefer native endpoint; ignore compatible-mode base_url if present
- base_url = self.config.base_url
- if base_url and "compatible-mode" in base_url:
- base_url = None
-
- logger.info(f"Creating DashScope (native) model: {model_id}")
+ logger.info(f"Creating DashScope (native HTTP) model: {model_id}")
- return DashScope(
+ return DashScopeNativeChatModel(
id=model_id,
api_key=self.config.api_key,
- base_url=base_url,
temperature=params.get("temperature"),
max_tokens=params.get("max_tokens"),
top_p=params.get("top_p"),
)
def create_embedder(self, model_id: Optional[str] = None, **kwargs):
- """Create embedder via DashScope (OpenAI-compatible)"""
- try:
- from agno.knowledge.embedder.openai import OpenAIEmbedder
- except ImportError:
- raise ImportError("agno package not installed")
-
+ """Create embedder via DashScope (native)"""
# Use provided model_id or default embedding model
model_id = model_id or self.config.default_embedding_model
@@ -552,12 +542,11 @@ def create_embedder(self, model_id: Optional[str] = None, **kwargs):
# Merge parameters: provider embedding defaults < kwargs
params = {**self.config.embedding_parameters, **kwargs}
- logger.info(f"Creating DashScope embedder: {model_id}")
+ logger.info(f"Creating DashScope embedder (native): {model_id}")
- return OpenAIEmbedder(
+ return DashScopeNativeEmbedder(
id=model_id,
api_key=self.config.api_key,
- base_url=self.config.base_url,
dimensions=int(params.get("dimensions", 2048))
if params.get("dimensions")
else None,
@@ -565,6 +554,345 @@ def create_embedder(self, model_id: Optional[str] = None, **kwargs):
)
+class DashScopeNativeEmbedder:
+ def __init__(
+ self,
+ *,
+ id: str,
+ api_key: str,
+ dimensions: Optional[int] = None,
+ encoding_format: str = "float",
+ ) -> None:
+ self.id = id
+ self.api_key = api_key
+ self.dimensions = dimensions
+ self.encoding_format = encoding_format
+ self.enable_batch = False
+ self.batch_size = 100
+
+ def _headers(self) -> Dict[str, str]:
+ return {
+ "Authorization": f"Bearer {self.api_key}",
+ "Content-Type": "application/json",
+ }
+
+ def _payload(self, texts: list[str]) -> Dict[str, Any]:
+ payload: Dict[str, Any] = {
+ "model": self.id,
+ "input": {"texts": texts},
+ }
+ if self.dimensions:
+ payload["dimensions"] = int(self.dimensions)
+ if self.encoding_format:
+ payload["encoding_format"] = self.encoding_format
+ return payload
+
+ def _post(self, texts: list[str]) -> Dict[str, Any]:
+ url = (
+ "https://dashscope.aliyuncs.com/api/v1/services/embeddings/"
+ "text-embedding/text-embedding"
+ )
+ resp = requests.post(
+ url,
+ headers=self._headers(),
+ json=self._payload(texts),
+ timeout=30,
+ )
+ resp.raise_for_status()
+ return resp.json()
+
+ def _parse_embedding(self, data: Dict[str, Any]) -> list[float]:
+ try:
+ out = data.get("output")
+ if isinstance(out, dict):
+ if (
+ "embeddings" in out
+ and isinstance(out["embeddings"], list)
+ and out["embeddings"]
+ ):
+ emb = out["embeddings"][0]
+ if isinstance(emb, dict) and "embedding" in emb:
+ return [float(x) for x in emb["embedding"]]
+ elif isinstance(emb, list):
+ return [float(x) for x in emb]
+ if "vectors" in out and out["vectors"]:
+ vec = out["vectors"][0]
+ return [float(x) for x in vec]
+ # OpenAI-compatible fallback shape
+ if "data" in data and isinstance(data["data"], list) and data["data"]:
+ item = data["data"][0]
+ if isinstance(item, dict) and "embedding" in item:
+ return [float(x) for x in item["embedding"]]
+ except Exception:
+ pass
+ raise ValueError("Failed to parse DashScope embedding response")
+
+ def get_embedding(self, text: str) -> list[float]:
+ data = self._post([text])
+ return self._parse_embedding(data)
+
+ def get_embedding_and_usage(self, text: str):
+ emb = self.get_embedding(text)
+ usage = None
+ return emb, usage
+
+ async def async_get_embedding(self, text: str) -> list[float]:
+ return await asyncio.to_thread(self.get_embedding, text)
+
+ async def async_get_embedding_and_usage(self, text: str):
+ emb = await self.async_get_embedding(text)
+ usage = None
+ return emb, usage
+
+
+class DashScopeNativeChatModel(Model):
+ def __init__(
+ self,
+ *,
+ id: str,
+ api_key: str,
+ temperature: Optional[float] = None,
+ max_tokens: Optional[int] = None,
+ top_p: Optional[float] = None,
+ ) -> None:
+ self.id = id
+ self.api_key = api_key
+ self.temperature = temperature
+ self.max_tokens = max_tokens
+ self.top_p = top_p
+ self.name = "Qwen"
+ self.provider = "DashScope (native)"
+ self.supports_native_structured_outputs = False
+ self.supports_json_schema_outputs = False
+
+ def _headers(self) -> Dict[str, str]:
+ return {
+ "Authorization": f"Bearer {self.api_key}",
+ "Content-Type": "application/json",
+ }
+
+ def _payload(self, messages: List[Message]) -> Dict[str, Any]:
+ payload: Dict[str, Any] = {
+ "model": self.id,
+ "input": {
+ "messages": [
+ {"role": m.role, "content": m.get_content_string()}
+ for m in messages
+ ]
+ },
+ }
+ parameters: Dict[str, Any] = {}
+ if self.temperature is not None:
+ parameters["temperature"] = self.temperature
+ if self.max_tokens is not None:
+ parameters["max_tokens"] = self.max_tokens
+ if self.top_p is not None:
+ parameters["top_p"] = self.top_p
+ parameters["result_format"] = "message"
+ if parameters:
+ payload["parameters"] = parameters
+ return payload
+
+ def _post(self, messages: List[Message]) -> Dict[str, Any]:
+ url = "https://dashscope.aliyuncs.com/api/v1/services/aigc/text-generation/generation"
+ resp = requests.post(
+ url,
+ headers=self._headers(),
+ json=self._payload(messages),
+ timeout=60,
+ )
+ if resp.status_code >= 400:
+ try:
+ err = resp.json()
+ except Exception:
+ err = {"message": resp.text}
+ raise ModelProviderError(
+ message=err.get("message")
+ or err.get("error", {}).get("message")
+ or resp.text,
+ status_code=resp.status_code,
+ model_name=self.name,
+ model_id=self.id,
+ )
+ return resp.json()
+
+ def _parse_provider_response(
+ self, response: Dict[str, Any], **kwargs
+ ) -> ModelResponse:
+ mr = ModelResponse()
+ try:
+ out = response.get("output", {})
+ choices = out.get("choices", [])
+ if choices:
+ msg = choices[0].get("message", {})
+ mr.role = msg.get("role") or "assistant"
+ mr.content = msg.get("content")
+ usage = response.get("usage", {})
+ if usage:
+ mr.response_usage = Metrics(
+ input_tokens=int(usage.get("input_tokens", 0) or 0),
+ output_tokens=int(usage.get("output_tokens", 0) or 0),
+ total_tokens=int(usage.get("total_tokens", 0) or 0),
+ provider_metrics={"request_id": response.get("request_id")},
+ )
+ except Exception as e:
+ raise ModelProviderError(
+ message=str(e), model_name=self.name, model_id=self.id
+ )
+ return mr
+
+ def _parse_provider_response_delta(self, response: Any) -> ModelResponse:
+ return self._parse_provider_response(response)
+
+ def invoke(
+ self,
+ messages: List[Message],
+ assistant_message: Message,
+ response_format: Optional[Dict] = None,
+ tools: Optional[List[Dict[str, Any]]] = None,
+ tool_choice: Optional[Dict[str, Any]] = None,
+ run_response: Optional[Any] = None,
+ ) -> ModelResponse:
+ if run_response and run_response.metrics:
+ run_response.metrics.set_time_to_first_token()
+ assistant_message.metrics.start_timer()
+ data = self._post(messages)
+ assistant_message.metrics.stop_timer()
+ return self._parse_provider_response(data, response_format=response_format)
+
+ async def ainvoke(
+ self,
+ messages: List[Message],
+ assistant_message: Message,
+ response_format: Optional[Dict] = None,
+ tools: Optional[List[Dict[str, Any]]] = None,
+ tool_choice: Optional[Dict[str, Any]] = None,
+ run_response: Optional[Any] = None,
+ ) -> ModelResponse:
+ if run_response and run_response.metrics:
+ run_response.metrics.set_time_to_first_token()
+ assistant_message.metrics.start_timer()
+ data = await asyncio.to_thread(self._post, messages)
+ assistant_message.metrics.stop_timer()
+ return self._parse_provider_response(data, response_format=response_format)
+
+ def invoke_stream(
+ self,
+ messages: List[Message],
+ assistant_message: Message,
+ response_format: Optional[Dict] = None,
+ tools: Optional[List[Dict[str, Any]]] = None,
+ tool_choice: Optional[Dict[str, Any]] = None,
+ run_response: Optional[Any] = None,
+ ) -> Iterator[ModelResponse]:
+ url = "https://dashscope.aliyuncs.com/api/v1/services/aigc/text-generation/generation"
+ headers = {**self._headers(), "Accept": "text/event-stream"}
+ payload = self._payload(messages)
+ payload.setdefault("parameters", {})
+ payload["parameters"]["stream"] = True
+ payload["parameters"]["incremental_output"] = True
+ if run_response and run_response.metrics:
+ run_response.metrics.set_time_to_first_token()
+ assistant_message.metrics.start_timer()
+ last_content = ""
+ with requests.post(
+ url,
+ headers=headers,
+ json=payload,
+ stream=True,
+ timeout=60,
+ ) as resp:
+ if resp.status_code >= 400:
+ try:
+ err = resp.json()
+ except Exception:
+ err = {"message": resp.text}
+ raise ModelProviderError(
+ message=err.get("message")
+ or err.get("error", {}).get("message")
+ or resp.text,
+ status_code=resp.status_code,
+ model_name=self.name,
+ model_id=self.id,
+ )
+ for line in resp.iter_lines(decode_unicode=True):
+ if not line:
+ continue
+ if isinstance(line, bytes):
+ try:
+ line = line.decode("utf-8")
+ except Exception:
+ continue
+ if line.startswith("data:"):
+ data_str = line[5:].strip()
+ if not data_str:
+ continue
+ try:
+ evt = json.loads(data_str)
+ except Exception:
+ continue
+ mr = ModelResponse()
+ out = evt.get("output", {})
+ choices = out.get("choices") or []
+ if choices:
+ msg = choices[0].get("message", {})
+ cur = msg.get("content") or ""
+ delta = (
+ cur[len(last_content) :]
+ if cur.startswith(last_content)
+ else cur
+ )
+ last_content = cur
+ mr.content = delta
+ mr.role = msg.get("role") or "assistant"
+ yield mr
+ usage = evt.get("usage")
+ if usage:
+ m = Metrics(
+ input_tokens=int(usage.get("input_tokens", 0) or 0),
+ output_tokens=int(usage.get("output_tokens", 0) or 0),
+ total_tokens=int(usage.get("total_tokens", 0) or 0),
+ provider_metrics={"request_id": evt.get("request_id")},
+ )
+ mr = ModelResponse()
+ mr.response_usage = m
+ yield mr
+ assistant_message.metrics.stop_timer()
+
+ async def ainvoke_stream(
+ self,
+ messages: List[Message],
+ assistant_message: Message,
+ response_format: Optional[Dict] = None,
+ tools: Optional[List[Dict[str, Any]]] = None,
+ tool_choice: Optional[Dict[str, Any]] = None,
+ run_response: Optional[Any] = None,
+ ) -> AsyncIterator[ModelResponse]:
+ loop = asyncio.get_running_loop()
+ queue: asyncio.Queue = asyncio.Queue()
+
+ def run_sync():
+ try:
+ for chunk in self.invoke_stream(
+ messages=messages,
+ assistant_message=assistant_message,
+ response_format=response_format,
+ tools=tools,
+ tool_choice=tool_choice,
+ run_response=run_response,
+ ):
+ loop.call_soon_threadsafe(queue.put_nowait, chunk)
+ finally:
+ loop.call_soon_threadsafe(queue.put_nowait, None)
+
+ await asyncio.to_thread(run_sync)
+ while True:
+ item = await queue.get()
+ if item is None:
+ break
+ yield item
+
+
class ModelFactory:
"""
Factory for creating model instances with provider abstraction
diff --git a/python/valuecell/utils/model.py b/python/valuecell/utils/model.py
index be452c30d..08723b1ef 100644
--- a/python/valuecell/utils/model.py
+++ b/python/valuecell/utils/model.py
@@ -57,6 +57,11 @@ def model_should_use_json_mode(model: AgnoModel) -> bool:
provider = getattr(model, "provider", None)
name = getattr(model, "name", None)
+ # DashScope native models: prefer JSON mode for compatibility
+ if provider and "dashscope" in str(provider).lower():
+ logger.debug("Detected DashScope model - using JSON mode")
+ return True
+
# Google Gemini requires JSON mode
if provider == AgnoGeminiModel.provider and name == AgnoGeminiModel.name:
logger.debug("Detected Gemini model - using JSON mode")