HKUDS · thelooter · Jan 19, 2026
diff --git a/.gitignore b/.gitignore
@@ -272,3 +272,8 @@ celerybeat.pid
 *.wav
 *.ogg
 !demo/*.mp4
+
+# ============================================
+# Miscellaneous
+# ============================================
+plans
diff --git a/config/main.yaml b/config/main.yaml
@@ -27,6 +27,10 @@ tools:
   query_item:
     enabled: true
     max_results: 5
+  math_solver:
+    enabled: true
+    timeout: 30
+    workspace: ./data/user/math_solver_workspace
 logging:
   # Global log level for the entire system (DEBUG, INFO, WARNING, ERROR)
   # This controls both DeepTutor logs and RAG module logs
@@ -59,6 +63,7 @@ solve:
     - "rag_hybrid"
     - "web_search"
     - "query_item"
+    - "math_solver"
     - "none"
   agents:
     investigate_agent:

diff --git a/requirements.txt b/requirements.txt
@@ -59,6 +59,7 @@ arxiv>=2.0.0
 # Scientific computing (for RAG and code execution)
 # ============================================
 numpy>=1.24.0,<2.0.0  # NumPy 1.24+ required for array API, <2.0 for compatibility
+sympy>=1.13.0         # Symbolic mathematics for math solver tool
 # matplotlib>=3.7.0  # Uncomment if needed for plotting
 # scipy>=1.11.0      # Uncomment if needed for scientific computing
 # pandas>=2.0.0      # Uncomment if needed for data analysis

diff --git a/src/agents/solve/__init__.py b/src/agents/solve/__init__.py
@@ -33,9 +33,6 @@
 
 # Main controller
 from .main_solver import MainSolver
-
-# Session management
-from .session_manager import SolverSessionManager, get_solver_session_manager
 from .memory import (
     InvestigateMemory,
     KnowledgeItem,
@@ -45,6 +42,9 @@
     ToolCallRecord,
 )
 
+# Session management
+from .session_manager import SolverSessionManager, get_solver_session_manager
+
 # Solve loop
 from .solve_loop import (
     ManagerAgent,

diff --git a/src/agents/solve/prompts/en/solve_loop/solve_agent.yaml b/src/agents/solve/prompts/en/solve_loop/solve_agent.yaml
@@ -10,20 +10,41 @@ system: |
   # Core Decision Logic
   Choose the most appropriate tool type based on the step:
 
-  1. **Involves calculation, derivation, plotting, or data processing**
+  1. **Symbolic mathematics (derivatives, integrals, equations, matrices, simplification)**
+     -> Select `math_solver`
+     - Derivatives, integrals, limits
+     - Equation solving (algebraic, differential)
+     - Matrix operations (determinant, inverse, eigenvalues)
+     - Expression simplification
+     - ❌ Do NOT write SymPy code
+     - ✅ Provide ONLY the mathematical expression in **Python/sympy syntax**
+     - Use `**` for powers, `*` for multiplication, `sqrt()` for square root
+     - For integrals: write `integrate(expression, (variable, lower, upper))`
+     - For derivatives: write `diff(expression, variable)`
+     - Examples:
+       - "x**2 + 2*x + 1 = 0"
+       - "sqrt(x + a) + sqrt(2x - a) = x"
+       - "integrate(x**n * exp(-a*x), (x, 0, oo))"
+       - "diff(x**3, x)"
+     - ❌ Do NOT use LaTeX symbols (∫, ∞, √, ∂, etc.) - use Python syntax only
+
+  2. **Numerical computation, plotting, or data processing**
      -> Select `code_execution`
+     - General programming tasks
+     - Visualization (matplotlib, plotting)
+     - Data analysis and processing
      - ❌ Do NOT write Python code
-     - ✅ Only describe the intent of the computation in one short sentence
+     - ✅ Only describe the intent of the computation
 
-  2. **Involves definition lookup, principle confirmation, or formula retrieval**
+  3. **Involves definition lookup, principle confirmation, or formula retrieval**
      -> Select `rag_naive` or `rag_hybrid`
      - Precise formula / definition → `rag_naive`
      - Conceptual understanding / comparison → `rag_hybrid`
 
-  3. **Involves latest information or external knowledge**
+  4. **Involves latest information or external knowledge**
      -> Select `web_search`
 
-  4. **Pure logical reasoning, summarization, or information already sufficient**
+  5. **Pure logical reasoning, summarization, or information already sufficient**
      -> Select `none`
      - Write the answer directly as intent text
 
@@ -32,6 +53,7 @@ system: |
   - ❌ Do NOT include code blocks, backticks, or multiline strings
   - ❌ Do NOT use the field name `query`
   - ❌ Do NOT simulate execution
+  - ❌ For math_solver: Do NOT include natural language instructions like "square both sides" or "simplify to obtain"
 
   # Role Boundaries
   - Follow the role of the current step strictly
@@ -45,8 +67,8 @@ system: |
     "thoughts": "Brief explanation of your decision (one sentence)",
     "tool_calls": [
       {
-        "type": "code_execution | rag_naive | rag_hybrid | web_search | none",
-        "intent": "One-line description of what the tool should do"
+        "type": "math_solver | code_execution | rag_naive | rag_hybrid | web_search | none",
+        "intent": "For math_solver: ONLY the mathematical expression. For others: One-line description of what the tool should do"
       }
     ]
   }

diff --git a/src/agents/solve/prompts/zh/solve_loop/solve_agent.yaml b/src/agents/solve/prompts/zh/solve_loop/solve_agent.yaml
@@ -2,28 +2,46 @@ system: |
   # 角色定位
   你是 Solve 阶段的**工具策略家 (Tool Strategist)**。
   你的任务是：**根据当前步骤目标，决定是否需要调用工具，以及调用哪一种工具**。
-  ⚠️ 你只负责“决策”，不负责“执行”。
+  ⚠️ 你只负责"决策"，不负责"执行"。
 
   # 核心决策逻辑
   你需要判断当前步骤的性质，并选择最匹配的工具类型：
 
-  1. **涉及计算、推导、绘图、数据处理**
+  1. **符号数学计算（求导、积分、解方程、矩阵运算、化简）**
+     -> 选择 `math_solver`
+     - 求导、积分、极限
+     - 方程求解（代数、微分）
+     - 矩阵运算（行列式、逆、特征值）
+     - 表达式化简
+     - ❌ 不要编写 SymPy 代码
+     - ✅ 只提供数学表达式，使用 **Python/sympy 语法**
+     - 用 `**` 表示乘方，`*` 表示乘法，`sqrt()` 表示平方根
+     - 对于积分：写 `integrate(表达式, (变量, 下限, 上限))`
+     - 对于导数：写 `diff(表达式, 变量)`
+     - 示例：
+       - "x**2 + 2*x + 1 = 0"
+       - "sqrt(x + a) + sqrt(2x - a) = x"
+       - "integrate(x**n * exp(-a*x), (x, 0, oo))"
+       - "diff(x**3, x)"
+     - ❌ 不要使用 LaTeX 符号（∫, ∞, √, ∂ 等）- 只用 Python 语法
+
+  2. **涉及计算、推导、绘图、数据处理**
      -> 选择 `code_execution`
      - ⚠️【重要】你 **绝对不能** 编写或输出任何可执行代码
-     - 你只需要用**一句简短的话**描述“要做什么计算 / 推导 / 绘图”
+     - 你只需要用**一句简短的话**描述"要做什么计算 / 推导 / 绘图"
      - 例如：
-       - “使用符号计算推导反向传播中的梯度公式”
-       - “计算函数在给定区间内的数值并绘制曲线”
+       - "使用符号计算推导反向传播中的梯度公式"
+       - "计算函数在给定区间内的数值并绘制曲线"
 
-  2. **涉及定义查找、原理确认、公式检索**
+  3. **涉及定义查找、原理确认、公式检索**
      -> 选择 `rag_naive` 或 `rag_hybrid`
      - 精确公式 / 定义 → `rag_naive`
      - 机制理解 / 对比分析 → `rag_hybrid`
 
-  3. **涉及最新信息或外部知识**
+  4. **涉及最新信息或外部知识**
      -> 选择 `web_search`
 
-  4. **纯逻辑推理、总结，或当前信息已经足够**
+  5. **纯逻辑推理、总结，或当前信息已经足够**
      -> 选择 `none`
      - 在 `query` 字段中直接给出该步骤的文字性结论
 
@@ -32,10 +50,12 @@ system: |
   - ❌ 不要输出多行文本
   - ❌ 不要使用 ``` 或任何代码块
   - ❌ 不要在 JSON 中包含换行符
-  - ❌ 不要尝试“模拟执行代码”
+  - ❌ 不要尝试"模拟执行代码"
+  - ❌ 对于 math_solver：不要使用 LaTeX 符号（∫, ∞, √ 等）
+  - ❌ 对于 math_solver：不要包含"两边平方"或"化简得到"等自然语言指令
 
   # 角色边界（必须遵守）
-  - 你只做“工具选择与意图描述”，不做工具执行
+  - 你只做"工具选择与意图描述"，不做工具执行
   - 不重复已有轨迹中的工具调用
   - 一旦信息足够，立刻使用 `none` 结束该步骤
 
@@ -46,8 +66,8 @@ system: |
     "thoughts": "简要说明你的决策理由（一句话即可）",
     "tool_calls": [
       {
-        "type": "code_execution | rag_naive | rag_hybrid | web_search | none",
-        "intent": "一句话描述你希望工具完成的事情"
+        "type": "math_solver | code_execution | rag_naive | rag_hybrid | web_search | none",
+        "intent": "对于 math_solver：只提供 Python/sympy 语法的数学表达式；对于其他工具：一句话描述你希望工具完成的事情"
       }
     ]
   }
@@ -71,6 +91,6 @@ user_template: |
   要求：
   1. 严格按照当前步骤目标 `{step_target}` 行事
   2. 避免重复已有轨迹中的工具调用
-  3. 如果需要计算或推导，只描述“做什么”，不要写代码
+  3. 如果需要计算或推导，只描述"做什么"，不要写代码
   4. 如果信息已经足够，选择 `none` 并直接给出结论
   5. 只输出 JSON，不要输出任何解释性文字
diff --git a/src/agents/solve/session_manager.py b/src/agents/solve/session_manager.py
@@ -112,7 +112,8 @@ def create_session(
             "title": title[:100],  # Limit title length
             "messages": [],
             "kb_name": kb_name,
-            "token_stats": token_stats or {
+            "token_stats": token_stats
+            or {
                 "model": "Unknown",
                 "calls": 0,
                 "tokens": 0,

diff --git a/src/agents/solve/solve_loop/solve_agent.py b/src/agents/solve/solve_loop/solve_agent.py
@@ -29,6 +29,7 @@ class SolveAgent(BaseAgent):
         "rag_hybrid",
         "web_search",
         "code_execution",
+        "math_solver",
         "finish",
     }
 

diff --git a/src/config/constants.py b/src/config/constants.py
@@ -19,6 +19,7 @@
     "rag_naive",
     "rag_hybrid",
     "query_item",
+    "math_solver",
     "none",
     "finish",
 ]

diff --git a/src/services/llm/capabilities.py b/src/services/llm/capabilities.py
@@ -348,4 +348,5 @@ def get_effective_temperature(
     "has_thinking_tags",
     "supports_tools",
     "requires_api_version",
-    "get_effective_temperature",]
+    "get_effective_temperature",
+]
diff --git a/src/services/llm/cloud_provider.py b/src/services/llm/cloud_provider.py
@@ -296,9 +296,7 @@ async def _openai_stream(
     data = {
         "model": model,
         "messages": msg_list,
-        "temperature": get_effective_temperature(
-            binding, model, kwargs.get("temperature", 0.7)
-        ),
+        "temperature": get_effective_temperature(binding, model, kwargs.get("temperature", 0.7)),
         "stream": True,
     }