From 1805188e4bae35849b15b3a971e3b759904d8f9d Mon Sep 17 00:00:00 2001 From: Zhaofeng Zhang <24791380+vcfgv@users.noreply.github.com> Date: Wed, 29 Oct 2025 11:27:42 +0800 Subject: [PATCH 1/5] fix(planner): handle malformed planner responses with error message --- python/valuecell/core/plan/planner.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/python/valuecell/core/plan/planner.py b/python/valuecell/core/plan/planner.py index 64d65e00d..a389f7fc6 100644 --- a/python/valuecell/core/plan/planner.py +++ b/python/valuecell/core/plan/planner.py @@ -213,6 +213,14 @@ async def _analyze_input_and_create_tasks( # Parse planning result and create tasks plan_raw = run_response.content + if not isinstance(plan_raw, PlannerResponse): + return ( + [], + ( + f"Planner produced a malformed response: `{plan_raw}`. " + "Please check your model capabilities and try again later." + ), + ) logger.info(f"Planner produced plan: {plan_raw}") # Check if plan is inadequate or has no tasks From 42438169e22c68438f88b0c56e20ab6d38d14c34 Mon Sep 17 00:00:00 2001 From: Zhaofeng Zhang <24791380+vcfgv@users.noreply.github.com> Date: Wed, 29 Oct 2025 11:27:42 +0800 Subject: [PATCH 2/5] refactor(prompts): enhance query handling and confirmation logic for scheduled tasks --- python/valuecell/core/plan/prompts.py | 238 +++++++++++--------------- 1 file changed, 96 insertions(+), 142 deletions(-) diff --git a/python/valuecell/core/plan/prompts.py b/python/valuecell/core/plan/prompts.py index c879d335a..c1b91c5b2 100644 --- a/python/valuecell/core/plan/prompts.py +++ b/python/valuecell/core/plan/prompts.py @@ -19,43 +19,61 @@ - If no agent stands out after reviewing the tool output, fall back to "ResearchAgent". - Create exactly one task with the user's query unchanged and set `pattern` to `once` by default. -2) Avoid optimization -- Do NOT rewrite, optimize, summarize, or split the query. +2) Query handling +- For normal tasks: forward the query EXACTLY as provided, unchanged. +- For scheduled/recurring tasks after confirmation: transform the query into single-execution form by: + * Removing time/schedule phrases (e.g., "every hour", "daily at 9 AM") + * Removing notification verbs (e.g., "notify me", "alert me", "let me know") + * Converting to direct action (e.g., "Monitor X and notify if Y" → "Check X for Y") - Only block when the request is clearly unusable (e.g., illegal content or impossible instruction). In that case, return `adequate: false` with a short reason and no tasks. 3) Contextual and preference statements - Treat short/contextual replies (e.g., "Go on", "tell me more") and user preferences/rules (e.g., "do not provide investment advice") as valid inputs; forward them unchanged as a single task. -- IMPORTANT: If the previous interaction was waiting for user confirmation (adequate: false with guidance_message asking for confirmation), then treat confirmation responses (e.g., "yes", "confirm", "ok", "proceed") as confirmations, NOT as contextual statements to be forwarded. +- IMPORTANT: Detecting confirmation scenarios: + * Check if the last planner response had `adequate: false` with a `guidance_message` asking for confirmation + * If yes, treat confirmation responses (e.g., "yes", "confirm", "ok", "proceed") as confirmations + * If no, treat them as regular contextual statements to be forwarded + * After detecting confirmation, retrieve the original query from conversation history to create the task 4) Recurring intent and schedule confirmation -- If the query suggests recurring monitoring WITHOUT a specific schedule, return `adequate: false` with a confirmation question in `guidance_message`. +- If the query suggests recurring monitoring WITHOUT a specific schedule, return `adequate: false` with a confirmation question that: + * Asks whether user wants one-time analysis or recurring monitoring + * If user chooses recurring, ask for specific schedule (e.g., "How often? Every hour, daily, or other interval?") - If the query explicitly specifies a schedule (e.g., "every hour", "daily at 9 AM"), you MUST confirm with the user first: * Return `adequate: false` with a clear confirmation request in `guidance_message` * The message should describe the task and the exact schedule being set up - * Store the original query in session history for reference - * After user confirms (e.g., "yes", "confirm", "ok", "proceed"), extract the CORE task requirement from the original query, removing time-related phrases - * IMPORTANT: The task `query` field should contain ONLY the core task description WITHOUT time/schedule information - * CRITICAL: Convert the query into a SINGLE-EXECUTION form that the remote agent can complete independently: - - Remove words suggesting continuous monitoring or notification: "alert", "notify", "remind", "inform", "send notification", "let me know", "tell me when" - - Transform into a direct query or analysis request: "Check X and report significant changes" → "Check X for significant changes" - - The query should be actionable in one execution cycle without requiring the agent to establish ongoing monitoring - * Schedule information should be stored in `schedule_config` separately, NOT in the query text - * The confirmation response itself should NOT be used as the task query + * After user confirms (e.g., "yes", "confirm", "ok", "proceed"): + - Retrieve the original query from conversation history + - Transform it into single-execution form as described in rule 2 + - Extract schedule information to `schedule_config` (separate from query text) + - The confirmation response itself should NOT be used as the task query * If user declines or provides corrections, adjust the plan accordingly +- CRITICAL: Do NOT create recurring tasks without explicit schedule. If user confirms recurring but no schedule is provided, ask for schedule details. 5) Schedule configuration for recurring tasks - If the user specifies a time interval (e.g., "every hour", "every 30 minutes"), set `schedule_config.interval_minutes` accordingly. - If the user specifies a daily time (e.g., "every day at 9 AM", "daily at 14:00"), set `schedule_config.daily_time` in HH:MM format (24-hour). - Only one of `interval_minutes` or `daily_time` should be set, not both. -- If no schedule is specified for a recurring task, leave `schedule_config` as null (system will use default behavior). +- IMPORTANT: Recurring tasks MUST have an explicit schedule. If user confirms recurring intent but provides no schedule, ask for a specific time interval or daily time before creating the task. 6) Agent targeting policy -- Trust the specified agent's capabilities; do not over-validate or split into multiple tasks. - -7) Language & tone +- When `target_agent_name` is provided: use it directly without validation. +- When `target_agent_name` is not provided: call `tool_get_enabled_agents` and match based on: + * Agent's description relevance to the query + * Agent's available skills matching the task requirements + * If no clear match (confidence < 70%), fall back to "ResearchAgent" as the general-purpose agent. +- Trust the selected agent's capabilities; do not split into multiple tasks. + +7) Task title formatting +- Keep titles concise and descriptive. +- For English/space-delimited languages: maximum 10 words. +- For CJK languages (Chinese/Japanese/Korean): maximum 20 characters. +- For mixed-language titles: apply the stricter limit based on the dominant language. +- If the query is too long, extract the core subject (e.g., "Tesla Q3 revenue" from "What was Tesla's Q3 2024 revenue?"). + +8) Language & tone - Always respond in the user's language. Detect language from the user's query if no explicit locale is provided. - `guidance_message` MUST be written in the user's language. -- For Chinese users, use concise, polite phrasing and avoid mixed-language text. """ @@ -63,22 +81,18 @@ -- Default to pass-through: create a single task addressed to the provided `target_agent_name`, or to the best-fit agent identified via `tool_get_enabled_agents` when the target is unspecified (fall back to "ResearchAgent" only if no clear match is found). -- Set `pattern` to `once` unless the user explicitly confirms recurring intent. -- For each task, also provide a concise `title` summarizing the task. Keep it short: no more than 10 words (if space-delimited) or 10 characters (for CJK/no-space text). -- For recurring tasks with schedules: extract the core task requirement and transform it into a single-execution form: - * Remove time-related phrases (these go into `schedule_config`) - * Remove notification/monitoring verbs: "alert", "notify", "remind", "inform", "send notification", "let me know", "tell me when" - * Convert to direct action: "Monitor X and notify if Y" → "Check X for Y" - * The query should be executable once without implying ongoing monitoring -- Avoid query optimization and task splitting, but DO transform queries for scheduled tasks into single-execution form. +- Default to pass-through: create a single task with the original query unchanged for normal requests. +- Set `pattern` to `once` by default; only set to `recurring` when user explicitly confirms recurring intent. +- Provide a concise `title` following rule 7 (10 words for English, 20 characters for CJK). +- Agent selection: use provided `target_agent_name` or select via `tool_get_enabled_agents` following rule 6. +- For scheduled/recurring tasks after confirmation: transform the query following rule 2 (remove time phrases and notification verbs, convert to single-execution form). -- If the request is clearly unusable (illegal content or impossible instruction), return `adequate: false` with a short reason and no tasks. Provide a `guidance_message` explaining why the request cannot be processed. -- If the request suggests recurring monitoring or scheduled tasks, return `adequate: false` with a confirmation question in `guidance_message`. -- When waiting for confirmation: check conversation history to detect if the previous response was a confirmation request. If yes, and user responds with confirmation words (yes/ok/confirm/proceed), use the ORIGINAL query from history to create the task, NOT the confirmation response itself. -- When `adequate: false`, always provide a clear, user-friendly `guidance_message` that explains what is needed or asks for clarification. +- If the request is clearly unusable (illegal content or impossible instruction), return `adequate: false` with explanation in `guidance_message`. Provide no tasks. +- If the request suggests recurring monitoring or scheduled tasks without user confirmation, return `adequate: false` with a confirmation question in `guidance_message`. +- Confirmation detection: check conversation history for previous `adequate: false` response. If found and current input is a confirmation word (yes/ok/confirm/proceed), retrieve the original query from history to create the task. +- When `adequate: false`, always provide a clear, user-friendly `guidance_message` in the user's language. - When confirming a scheduled/recurring task, the `guidance_message` MUST follow the user's language. @@ -97,19 +111,19 @@ { "tasks": [ { - "title": "Short task title (<= 10 words or characters)", - "query": "User's original query, unchanged", - "agent_name": "target_agent_name (or best-fit agent selected via tool_get_enabled_agents when not provided)", + "title": "Short task title (<= 10 words for English, <= 20 chars for CJK)", + "query": "User's original query (unchanged for normal tasks, transformed for scheduled tasks after confirmation)", + "agent_name": "target_agent_name (or best-fit agent selected via tool_get_enabled_agents)", "pattern": "once" | "recurring", "schedule_config": { "interval_minutes": , "daily_time": "" - } (optional, only for recurring tasks with explicit schedule) + } (required for recurring tasks; must have either interval_minutes or daily_time set) } ], "adequate": true/false, "reason": "Brief explanation of planning decision", - "guidance_message": "User-friendly message when adequate is false (optional, required when adequate is false)" + "guidance_message": "User-friendly message in user's language (required when adequate is false)" } @@ -117,7 +131,7 @@ - + Input: { "target_agent_name": "ResearchAgent", @@ -135,34 +149,12 @@ } ], "adequate": true, - "reason": "Pass-through to the specified agent." + "reason": "Pass-through to specified agent with unchanged query." } - + - -Input: -{ - "target_agent_name": null, - "query": "Analyze the latest market trends" -} - -Output: -{ - "tasks": [ - { - "title": "Market trends", - "query": "Analyze the latest market trends", - "agent_name": "ResearchAgent", - "pattern": "once" - } - ], - "adequate": true, - "reason": "No target agent specified; selected ResearchAgent after reviewing tool_get_enabled_agents." -} - - - -// Normal contextual continuation (NOT a confirmation scenario) + +// Contextual continuation - forward unchanged Input: { "target_agent_name": "ResearchAgent", @@ -173,159 +165,121 @@ { "tasks": [ { - "title": "Go on", + "title": "Continue", "query": "Go on", "agent_name": "ResearchAgent", "pattern": "once" } ], "adequate": true, - "reason": "Contextual continuation; forwarded unchanged." + "reason": "Contextual continuation forwarded unchanged." } - + - -// Step 1: needs confirmation + +// Step 1: Recurring intent without schedule - ask for clarification Input: { "target_agent_name": "ResearchAgent", - "query": "Monitor Apple's quarterly earnings and notify me each time they release results" + "query": "Monitor Apple's quarterly earnings" } Output: { "tasks": [], "adequate": false, - "reason": "This suggests recurring monitoring. Need user confirmation.", - "guidance_message": "I understand you want to monitor Apple's quarterly earnings. Do you want me to set up a recurring task that checks for updates regularly, or would you prefer a one-time analysis of their latest earnings?" -} - -// Step 2: user confirms with simple "yes" -// IMPORTANT: Use conversation history to retrieve the ORIGINAL query, not "Yes, set up regular updates" -Input: -{ - "target_agent_name": "ResearchAgent", - "query": "Yes, set up regular updates" -} - -Output: -{ - "tasks": [ - { - "title": "Apple earnings monitor", - "query": "Monitor Apple's quarterly earnings and notify me each time they release results", - "agent_name": "ResearchAgent", - "pattern": "recurring" - } - ], - "adequate": true, - "reason": "User confirmed recurring intent; created recurring task with the ORIGINAL query from history." + "reason": "Recurring intent detected but no schedule specified.", + "guidance_message": "Would you like a one-time analysis of Apple's latest earnings, or recurring monitoring? If recurring, please specify how often (e.g., daily, weekly, every hour)." } - - -// Step 1: Detect schedule and request confirmation +// Step 2: User specifies schedule Input: { "target_agent_name": "ResearchAgent", - "query": "Check Tesla stock price every hour and alert me if there's significant change" + "query": "Recurring, check daily at 9 AM" } Output: { "tasks": [], "adequate": false, - "reason": "Scheduled task requires user confirmation.", - "guidance_message": "To better set up the Tesla price check task, please confirm the update frequency: every 60 minutes" + "reason": "Scheduled task requires final confirmation.", + "guidance_message": "To set up Apple earnings monitoring, please confirm: daily at 09:00" } -// Step 2: User confirms -// IMPORTANT: Extract core task WITHOUT time phrases AND convert to single-execution form. -// Remove "alert me" (notification intent) - agent should just check and report findings. +// Step 3: User confirms - create task with schedule Input: { "target_agent_name": "ResearchAgent", - "query": "Yes, please proceed" + "query": "Yes, confirmed" } Output: { "tasks": [ { - "title": "Tesla price check", - "query": "Check Tesla stock price for significant changes", + "title": "Apple earnings monitor", + "query": "Monitor Apple's quarterly earnings", "agent_name": "ResearchAgent", "pattern": "recurring", "schedule_config": { - "interval_minutes": 60, - "daily_time": null + "interval_minutes": null, + "daily_time": "09:00" } } ], "adequate": true, - "reason": "User confirmed scheduled task. Created recurring task with single-execution query (removed 'every hour' and 'alert me')." + "reason": "User confirmed scheduled task with daily_time schedule." } - + - -// Step 1: Detect daily schedule and request confirmation + +// Step 1: Scheduled task - request confirmation Input: { "target_agent_name": "ResearchAgent", - "query": "Analyze market trends every day at 9 AM" + "query": "Check Tesla stock price every hour and alert me if there's significant change" } Output: { "tasks": [], "adequate": false, - "reason": "Scheduled task requires user confirmation.", - "guidance_message": "To better set up the Market trends task, please confirm the update frequency: daily at 09:00" + "reason": "Scheduled task requires confirmation.", + "guidance_message": "To set up the Tesla price check, please confirm: every 60 minutes" } -// Step 2: User confirms -// IMPORTANT: Extract core task WITHOUT time phrases. "every day at 9 AM" goes to schedule_config, not query. +// Step 2: User confirms - transform query to single-execution form +// Remove time phrase ("every hour") and notification verb ("alert me") Input: { "target_agent_name": "ResearchAgent", - "query": "Yes, set it up" + "query": "Yes, proceed" } Output: { "tasks": [ { - "title": "Market trends", - "query": "Analyze market trends", + "title": "Tesla price check", + "query": "Check Tesla stock price for significant changes", "agent_name": "ResearchAgent", "pattern": "recurring", "schedule_config": { - "interval_minutes": null, - "daily_time": "09:00" + "interval_minutes": 60, + "daily_time": null } } ], "adequate": true, - "reason": "User confirmed scheduled task. Created recurring task with core requirement only (removed 'every day at 9 AM' from query)." + "reason": "Confirmed. Query transformed: removed 'every hour' (→schedule_config) and 'alert me' (notification intent)." } - - - -// Examples of transforming queries into single-execution form for scheduled tasks: -// Original: "Monitor AAPL stock and notify me if it drops below $150" -// Transformed: "Check AAPL stock price relative to $150 threshold" -// -// Original: "Keep track of Bitcoin price and let me know when it reaches $50k" -// Transformed: "Check Bitcoin price relative to $50k target" -// -// Original: "Watch for new AI research papers and alert me about important ones" -// Transformed: "Find and evaluate new AI research papers for importance" -// -// Original: "Send me a reminder to review my portfolio" -// Transformed: "Review portfolio and provide analysis" - - - + +// Note: For daily_time schedule, use format like: +// "schedule_config": {"interval_minutes": null, "daily_time": "09:00"} + + + Input: { "target_agent_name": null, @@ -337,9 +291,9 @@ "tasks": [], "adequate": false, "reason": "Request involves illegal activity.", - "guidance_message": "I cannot assist with requests that involve illegal activities such as unauthorized access to accounts. If you have a legitimate security concern, please consider contacting the appropriate authorities or the account owner directly." + "guidance_message": "I cannot assist with illegal activities such as unauthorized access to accounts. If you have a security concern, please contact appropriate authorities." } - + """ From eaf19cfaa0dc70197981058cf4917191066857c1 Mon Sep 17 00:00:00 2001 From: Zhaofeng Zhang <24791380+vcfgv@users.noreply.github.com> Date: Wed, 29 Oct 2025 11:27:42 +0800 Subject: [PATCH 3/5] test: add handling for malformed planner responses with guidance message --- .../valuecell/core/plan/tests/test_planner.py | 61 +++++++++++++++++++ 1 file changed, 61 insertions(+) diff --git a/python/valuecell/core/plan/tests/test_planner.py b/python/valuecell/core/plan/tests/test_planner.py index 98f53d7cb..5f61d5ed5 100644 --- a/python/valuecell/core/plan/tests/test_planner.py +++ b/python/valuecell/core/plan/tests/test_planner.py @@ -165,3 +165,64 @@ def test_tool_get_enabled_agents_formats_cards(monkeypatch: pytest.MonkeyPatch): assert "" in output assert "Lookup" in output assert "" in output + + +@pytest.mark.asyncio +async def test_create_plan_handles_malformed_response(monkeypatch: pytest.MonkeyPatch): + """Planner returns non-PlannerResponse content -> guidance message with error.""" + + malformed_content = "not-a-planner-response" + + class FakeAgent: + def __init__(self, *args, **kwargs): + pass + + def run(self, *args, **kwargs): + return SimpleNamespace( + is_paused=False, + tools_requiring_user_input=[], + tools=[], + content=malformed_content, + ) + + monkeypatch.setattr(planner_mod, "Agent", FakeAgent) + monkeypatch.setattr(planner_mod, "get_model", lambda _: "stub-model") + monkeypatch.setattr(planner_mod, "agent_debug_mode_enabled", lambda: False) + + planner = ExecutionPlanner(StubConnections()) + + user_input = UserInput( + query="malformed please", + target_agent_name="", + meta=UserInputMetadata(conversation_id="conv-x", user_id="user-x"), + ) + + async def callback(_): + raise AssertionError("callback should not be invoked for malformed response") + + plan = await planner.create_plan(user_input, callback, "thread-x") + + # Should return no tasks and a guidance message explaining the issue + assert plan.tasks == [] + assert plan.guidance_message + assert "malformed response" in plan.guidance_message + assert malformed_content in plan.guidance_message + + +def test_tool_get_agent_description_dict_and_missing(): + """Cover dict formatting branch and not-found fallback in agent description.""" + + class Conn(StubConnections): + def __init__(self): + super().__init__({"DictAgent": {"name": "DictAgent", "desc": "d"}}) + + planner = ExecutionPlanner(Conn()) + + # Dict branch returns str(dict) + out = planner.tool_get_agent_description("DictAgent") + assert isinstance(out, str) + assert "DictAgent" in out + + # Not found branch + missing = planner.tool_get_agent_description("MissingAgent") + assert "could not be found" in missing From 58963188c75626da6384a86bcd7cf407c52d1b79 Mon Sep 17 00:00:00 2001 From: Zhaofeng Zhang <24791380+vcfgv@users.noreply.github.com> Date: Wed, 29 Oct 2025 11:27:42 +0800 Subject: [PATCH 4/5] refactor(prompts): ensure guidance_message and query are generated in the user's language --- python/valuecell/core/plan/prompts.py | 2 +- python/valuecell/core/super_agent/prompts.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/python/valuecell/core/plan/prompts.py b/python/valuecell/core/plan/prompts.py index c1b91c5b2..77c6b527b 100644 --- a/python/valuecell/core/plan/prompts.py +++ b/python/valuecell/core/plan/prompts.py @@ -73,7 +73,7 @@ 8) Language & tone - Always respond in the user's language. Detect language from the user's query if no explicit locale is provided. -- `guidance_message` MUST be written in the user's language. +- `guidance_message` and `query` MUST be written in the user's language. """ diff --git a/python/valuecell/core/super_agent/prompts.py b/python/valuecell/core/super_agent/prompts.py index 94597c985..63e5ec9ce 100644 --- a/python/valuecell/core/super_agent/prompts.py +++ b/python/valuecell/core/super_agent/prompts.py @@ -49,5 +49,6 @@ - When decision == "answer": include a short `answer_content` and skip `enriched_query`. - When decision == "handoff_to_planner": prefer including `enriched_query` that preserves the user intent. - Keep `reason` short and helpful. +- Always generate `answer_content` and `enriched_query` in the user's language. Detect language from the user's query if no explicit locale is provided. """ From 68f55440f970eba94af80f3e6c8c964281b01b83 Mon Sep 17 00:00:00 2001 From: Zhaofeng Zhang <24791380+vcfgv@users.noreply.github.com> Date: Wed, 29 Oct 2025 14:34:49 +0800 Subject: [PATCH 5/5] refactor(tests): update model stubbing to use utils module API in test_create_plan_handles_malformed_response --- python/valuecell/core/plan/tests/test_planner.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/python/valuecell/core/plan/tests/test_planner.py b/python/valuecell/core/plan/tests/test_planner.py index 0628d916b..18554a8f0 100644 --- a/python/valuecell/core/plan/tests/test_planner.py +++ b/python/valuecell/core/plan/tests/test_planner.py @@ -186,7 +186,10 @@ def run(self, *args, **kwargs): ) monkeypatch.setattr(planner_mod, "Agent", FakeAgent) - monkeypatch.setattr(planner_mod, "get_model", lambda _: "stub-model") + # Use utils module API for model stubbing per planner implementation + monkeypatch.setattr( + model_utils_mod, "get_model_for_agent", lambda *args, **kwargs: "stub-model" + ) monkeypatch.setattr(planner_mod, "agent_debug_mode_enabled", lambda: False) planner = ExecutionPlanner(StubConnections()) @@ -209,13 +212,19 @@ async def callback(_): assert malformed_content in plan.guidance_message -def test_tool_get_agent_description_dict_and_missing(): +def test_tool_get_agent_description_dict_and_missing(monkeypatch: pytest.MonkeyPatch): """Cover dict formatting branch and not-found fallback in agent description.""" class Conn(StubConnections): def __init__(self): super().__init__({"DictAgent": {"name": "DictAgent", "desc": "d"}}) + # Avoid real model creation in planner __init__ + monkeypatch.setattr( + model_utils_mod, "get_model_for_agent", lambda *args, **kwargs: "stub-model" + ) + monkeypatch.setattr(planner_mod, "agent_debug_mode_enabled", lambda: False) + planner = ExecutionPlanner(Conn()) # Dict branch returns str(dict)