diff --git a/astrbot/core/config/default.py b/astrbot/core/config/default.py index 7d5b89334..4fffab42f 100644 --- a/astrbot/core/config/default.py +++ b/astrbot/core/config/default.py @@ -97,6 +97,7 @@ "dequeue_context_length": 1, "streaming_response": False, "show_tool_use_status": False, + "sanitize_context_by_modalities": False, "agent_runner_type": "local", "dify_agent_runner_provider_id": "", "coze_agent_runner_provider_id": "", @@ -2643,6 +2644,14 @@ class ChatProviderTemplate(TypedDict): "provider_settings.agent_runner_type": "local", }, }, + "provider_settings.sanitize_context_by_modalities": { + "description": "按模型能力清理历史上下文", + "type": "bool", + "hint": "开启后,在每次请求 LLM 前会按当前模型提供商中所选择的模型能力删除对话中不支持的图片/工具调用结构(会改变模型看到的历史)", + "condition": { + "provider_settings.agent_runner_type": "local", + }, + }, "provider_settings.max_agent_step": { "description": "工具调用轮数上限", "type": "int", diff --git a/astrbot/core/pipeline/process_stage/method/agent_sub_stages/internal.py b/astrbot/core/pipeline/process_stage/method/agent_sub_stages/internal.py index 474b40e34..6969177c8 100644 --- a/astrbot/core/pipeline/process_stage/method/agent_sub_stages/internal.py +++ b/astrbot/core/pipeline/process_stage/method/agent_sub_stages/internal.py @@ -52,6 +52,10 @@ async def initialize(self, ctx: PipelineContext) -> None: self.max_step = 30 self.show_tool_use: bool = settings.get("show_tool_use_status", True) self.show_reasoning = settings.get("display_reasoning_text", False) + self.sanitize_context_by_modalities: bool = settings.get( + "sanitize_context_by_modalities", + False, + ) self.kb_agentic_mode: bool = conf.get("kb_agentic_mode", False) file_extract_conf: dict = settings.get("file_extract", {}) @@ -202,6 +206,97 @@ def _modalities_fix( ) req.func_tool = None + def _sanitize_context_by_modalities( + self, + provider: Provider, + req: ProviderRequest, + ) -> None: + """Sanitize `req.contexts` (including history) by current provider modalities.""" + if not self.sanitize_context_by_modalities: + return + + if not isinstance(req.contexts, list) or not req.contexts: + return + + modalities = provider.provider_config.get("modalities", None) + # if modalities is not configured, do not sanitize. + if not modalities or not isinstance(modalities, list): + return + + supports_image = bool("image" in modalities) + supports_tool_use = bool("tool_use" in modalities) + + if supports_image and supports_tool_use: + return + + sanitized_contexts: list[dict] = [] + removed_image_blocks = 0 + removed_tool_messages = 0 + removed_tool_calls = 0 + + for msg in req.contexts: + if not isinstance(msg, dict): + continue + + role = msg.get("role") + if not role: + continue + + new_msg: dict = msg + + # tool_use sanitize + if not supports_tool_use: + if role == "tool": + # tool response block + removed_tool_messages += 1 + continue + if role == "assistant" and "tool_calls" in new_msg: + # assistant message with tool calls + if "tool_calls" in new_msg: + removed_tool_calls += 1 + new_msg.pop("tool_calls", None) + new_msg.pop("tool_call_id", None) + + # image sanitize + if not supports_image: + content = new_msg.get("content") + if isinstance(content, list): + filtered_parts: list = [] + removed_any_image = False + for part in content: + if isinstance(part, dict): + part_type = str(part.get("type", "")).lower() + if part_type in {"image_url", "image"}: + removed_any_image = True + removed_image_blocks += 1 + continue + filtered_parts.append(part) + + if removed_any_image: + new_msg["content"] = filtered_parts + + # drop empty assistant messages (e.g. only tool_calls without content) + if role == "assistant": + content = new_msg.get("content") + has_tool_calls = bool(new_msg.get("tool_calls")) + if not has_tool_calls: + if not content: + continue + if isinstance(content, str) and not content.strip(): + continue + + sanitized_contexts.append(new_msg) + + if removed_image_blocks or removed_tool_messages or removed_tool_calls: + logger.debug( + "sanitize_context_by_modalities applied: " + f"removed_image_blocks={removed_image_blocks}, " + f"removed_tool_messages={removed_tool_messages}, " + f"removed_tool_calls={removed_tool_calls}" + ) + + req.contexts = sanitized_contexts + def _plugin_tool_fix( self, event: AstrMessageEvent, @@ -447,6 +542,9 @@ async def process( # filter tools, only keep tools from this pipeline's selected plugins self._plugin_tool_fix(event, req) + # sanitize contexts (including history) by provider modalities + self._sanitize_context_by_modalities(provider, req) + stream_to_general = ( self.unsupported_streaming_strategy == "turn_off" and not event.platform_meta.support_streaming_message diff --git a/dashboard/src/i18n/locales/en-US/features/config-metadata.json b/dashboard/src/i18n/locales/en-US/features/config-metadata.json index e0f694c33..0768039d6 100644 --- a/dashboard/src/i18n/locales/en-US/features/config-metadata.json +++ b/dashboard/src/i18n/locales/en-US/features/config-metadata.json @@ -187,6 +187,10 @@ "show_tool_use_status": { "description": "Output Function Call Status" }, + "sanitize_context_by_modalities": { + "description": "Sanitize History by Modalities", + "hint": "When enabled, sanitizes contexts before each LLM request by removing image blocks and tool-call structures that the current provider's modalities do not support (this changes what the model sees)." + }, "max_agent_step": { "description": "Maximum Tool Call Rounds" }, @@ -524,4 +528,4 @@ } } } -} \ No newline at end of file +} diff --git a/dashboard/src/i18n/locales/zh-CN/features/config-metadata.json b/dashboard/src/i18n/locales/zh-CN/features/config-metadata.json index 589aa54a0..7e88d9a7e 100644 --- a/dashboard/src/i18n/locales/zh-CN/features/config-metadata.json +++ b/dashboard/src/i18n/locales/zh-CN/features/config-metadata.json @@ -184,6 +184,10 @@ "show_tool_use_status": { "description": "输出函数调用状态" }, + "sanitize_context_by_modalities": { + "description": "按模型能力清理历史上下文", + "hint": "开启后,在每次请求 LLM 前会按当前模型提供商中所选择的模型能力删除对话中不支持的图片/工具调用结构(会改变模型看到的历史)" + }, "max_agent_step": { "description": "工具调用轮数上限" }, @@ -522,4 +526,4 @@ } } } -} \ No newline at end of file +}