Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -272,3 +272,8 @@ celerybeat.pid
*.wav
*.ogg
!demo/*.mp4

# ============================================
# Miscellaneous
# ============================================
plans
5 changes: 5 additions & 0 deletions config/main.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,10 @@ tools:
query_item:
enabled: true
max_results: 5
math_solver:
enabled: true
timeout: 30
workspace: ./data/user/math_solver_workspace
logging:
# Global log level for the entire system (DEBUG, INFO, WARNING, ERROR)
# This controls both DeepTutor logs and RAG module logs
Expand Down Expand Up @@ -59,6 +63,7 @@ solve:
- "rag_hybrid"
- "web_search"
- "query_item"
- "math_solver"
- "none"
agents:
investigate_agent:
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ arxiv>=2.0.0
# Scientific computing (for RAG and code execution)
# ============================================
numpy>=1.24.0,<2.0.0 # NumPy 1.24+ required for array API, <2.0 for compatibility
sympy>=1.13.0 # Symbolic mathematics for math solver tool
# matplotlib>=3.7.0 # Uncomment if needed for plotting
# scipy>=1.11.0 # Uncomment if needed for scientific computing
# pandas>=2.0.0 # Uncomment if needed for data analysis
Expand Down
6 changes: 3 additions & 3 deletions src/agents/solve/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,9 +33,6 @@

# Main controller
from .main_solver import MainSolver

# Session management
from .session_manager import SolverSessionManager, get_solver_session_manager
from .memory import (
InvestigateMemory,
KnowledgeItem,
Expand All @@ -45,6 +42,9 @@
ToolCallRecord,
)

# Session management
from .session_manager import SolverSessionManager, get_solver_session_manager

# Solve loop
from .solve_loop import (
ManagerAgent,
Expand Down
36 changes: 29 additions & 7 deletions src/agents/solve/prompts/en/solve_loop/solve_agent.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,20 +10,41 @@ system: |
# Core Decision Logic
Choose the most appropriate tool type based on the step:

1. **Involves calculation, derivation, plotting, or data processing**
1. **Symbolic mathematics (derivatives, integrals, equations, matrices, simplification)**
-> Select `math_solver`
- Derivatives, integrals, limits
- Equation solving (algebraic, differential)
- Matrix operations (determinant, inverse, eigenvalues)
- Expression simplification
- ❌ Do NOT write SymPy code
- ✅ Provide ONLY the mathematical expression in **Python/sympy syntax**
- Use `**` for powers, `*` for multiplication, `sqrt()` for square root
- For integrals: write `integrate(expression, (variable, lower, upper))`
- For derivatives: write `diff(expression, variable)`
- Examples:
- "x**2 + 2*x + 1 = 0"
- "sqrt(x + a) + sqrt(2x - a) = x"
- "integrate(x**n * exp(-a*x), (x, 0, oo))"
- "diff(x**3, x)"
- ❌ Do NOT use LaTeX symbols (∫, ∞, √, ∂, etc.) - use Python syntax only

2. **Numerical computation, plotting, or data processing**
-> Select `code_execution`
- General programming tasks
- Visualization (matplotlib, plotting)
- Data analysis and processing
- ❌ Do NOT write Python code
- ✅ Only describe the intent of the computation in one short sentence
- ✅ Only describe the intent of the computation

2. **Involves definition lookup, principle confirmation, or formula retrieval**
3. **Involves definition lookup, principle confirmation, or formula retrieval**
-> Select `rag_naive` or `rag_hybrid`
- Precise formula / definition → `rag_naive`
- Conceptual understanding / comparison → `rag_hybrid`

3. **Involves latest information or external knowledge**
4. **Involves latest information or external knowledge**
-> Select `web_search`

4. **Pure logical reasoning, summarization, or information already sufficient**
5. **Pure logical reasoning, summarization, or information already sufficient**
-> Select `none`
- Write the answer directly as intent text

Expand All @@ -32,6 +53,7 @@ system: |
- ❌ Do NOT include code blocks, backticks, or multiline strings
- ❌ Do NOT use the field name `query`
- ❌ Do NOT simulate execution
- ❌ For math_solver: Do NOT include natural language instructions like "square both sides" or "simplify to obtain"

# Role Boundaries
- Follow the role of the current step strictly
Expand All @@ -45,8 +67,8 @@ system: |
"thoughts": "Brief explanation of your decision (one sentence)",
"tool_calls": [
{
"type": "code_execution | rag_naive | rag_hybrid | web_search | none",
"intent": "One-line description of what the tool should do"
"type": "math_solver | code_execution | rag_naive | rag_hybrid | web_search | none",
"intent": "For math_solver: ONLY the mathematical expression. For others: One-line description of what the tool should do"
}
]
}
Expand Down
46 changes: 33 additions & 13 deletions src/agents/solve/prompts/zh/solve_loop/solve_agent.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,28 +2,46 @@ system: |
# 角色定位
你是 Solve 阶段的**工具策略家 (Tool Strategist)**。
你的任务是:**根据当前步骤目标,决定是否需要调用工具,以及调用哪一种工具**。
⚠️ 你只负责“决策”,不负责“执行”
⚠️ 你只负责"决策",不负责"执行"

# 核心决策逻辑
你需要判断当前步骤的性质,并选择最匹配的工具类型:

1. **涉及计算、推导、绘图、数据处理**
1. **符号数学计算(求导、积分、解方程、矩阵运算、化简)**
-> 选择 `math_solver`
- 求导、积分、极限
- 方程求解(代数、微分)
- 矩阵运算(行列式、逆、特征值)
- 表达式化简
- ❌ 不要编写 SymPy 代码
- ✅ 只提供数学表达式,使用 **Python/sympy 语法**
- 用 `**` 表示乘方,`*` 表示乘法,`sqrt()` 表示平方根
- 对于积分:写 `integrate(表达式, (变量, 下限, 上限))`
- 对于导数:写 `diff(表达式, 变量)`
- 示例:
- "x**2 + 2*x + 1 = 0"
- "sqrt(x + a) + sqrt(2x - a) = x"
- "integrate(x**n * exp(-a*x), (x, 0, oo))"
- "diff(x**3, x)"
- ❌ 不要使用 LaTeX 符号(∫, ∞, √, ∂ 等)- 只用 Python 语法

2. **涉及计算、推导、绘图、数据处理**
-> 选择 `code_execution`
- ⚠️【重要】你 **绝对不能** 编写或输出任何可执行代码
- 你只需要用**一句简短的话**描述要做什么计算 / 推导 / 绘图
- 你只需要用**一句简短的话**描述"要做什么计算 / 推导 / 绘图"
- 例如:
- 使用符号计算推导反向传播中的梯度公式
- 计算函数在给定区间内的数值并绘制曲线
- "使用符号计算推导反向传播中的梯度公式"
- "计算函数在给定区间内的数值并绘制曲线"

2. **涉及定义查找、原理确认、公式检索**
3. **涉及定义查找、原理确认、公式检索**
-> 选择 `rag_naive` 或 `rag_hybrid`
- 精确公式 / 定义 → `rag_naive`
- 机制理解 / 对比分析 → `rag_hybrid`

3. **涉及最新信息或外部知识**
4. **涉及最新信息或外部知识**
-> 选择 `web_search`

4. **纯逻辑推理、总结,或当前信息已经足够**
5. **纯逻辑推理、总结,或当前信息已经足够**
-> 选择 `none`
- 在 `query` 字段中直接给出该步骤的文字性结论

Expand All @@ -32,10 +50,12 @@ system: |
- ❌ 不要输出多行文本
- ❌ 不要使用 ``` 或任何代码块
- ❌ 不要在 JSON 中包含换行符
- ❌ 不要尝试“模拟执行代码”
- ❌ 不要尝试"模拟执行代码"
- ❌ 对于 math_solver:不要使用 LaTeX 符号(∫, ∞, √ 等)
- ❌ 对于 math_solver:不要包含"两边平方"或"化简得到"等自然语言指令

# 角色边界(必须遵守)
- 你只做工具选择与意图描述,不做工具执行
- 你只做"工具选择与意图描述",不做工具执行
- 不重复已有轨迹中的工具调用
- 一旦信息足够,立刻使用 `none` 结束该步骤

Expand All @@ -46,8 +66,8 @@ system: |
"thoughts": "简要说明你的决策理由(一句话即可)",
"tool_calls": [
{
"type": "code_execution | rag_naive | rag_hybrid | web_search | none",
"intent": "一句话描述你希望工具完成的事情"
"type": "math_solver | code_execution | rag_naive | rag_hybrid | web_search | none",
"intent": "对于 math_solver:只提供 Python/sympy 语法的数学表达式;对于其他工具:一句话描述你希望工具完成的事情"
}
]
}
Expand All @@ -71,6 +91,6 @@ user_template: |
要求:
1. 严格按照当前步骤目标 `{step_target}` 行事
2. 避免重复已有轨迹中的工具调用
3. 如果需要计算或推导,只描述做什么,不要写代码
3. 如果需要计算或推导,只描述"做什么",不要写代码
4. 如果信息已经足够,选择 `none` 并直接给出结论
5. 只输出 JSON,不要输出任何解释性文字
3 changes: 2 additions & 1 deletion src/agents/solve/session_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,8 @@ def create_session(
"title": title[:100], # Limit title length
"messages": [],
"kb_name": kb_name,
"token_stats": token_stats or {
"token_stats": token_stats
or {
"model": "Unknown",
"calls": 0,
"tokens": 0,
Expand Down
1 change: 1 addition & 0 deletions src/agents/solve/solve_loop/solve_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ class SolveAgent(BaseAgent):
"rag_hybrid",
"web_search",
"code_execution",
"math_solver",
"finish",
}

Expand Down
1 change: 1 addition & 0 deletions src/config/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
"rag_naive",
"rag_hybrid",
"query_item",
"math_solver",
"none",
"finish",
]
Expand Down
3 changes: 2 additions & 1 deletion src/services/llm/capabilities.py
Original file line number Diff line number Diff line change
Expand Up @@ -348,4 +348,5 @@ def get_effective_temperature(
"has_thinking_tags",
"supports_tools",
"requires_api_version",
"get_effective_temperature",]
"get_effective_temperature",
]
4 changes: 1 addition & 3 deletions src/services/llm/cloud_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -296,9 +296,7 @@ async def _openai_stream(
data = {
"model": model,
"messages": msg_list,
"temperature": get_effective_temperature(
binding, model, kwargs.get("temperature", 0.7)
),
"temperature": get_effective_temperature(binding, model, kwargs.get("temperature", 0.7)),
"stream": True,
}

Expand Down
Loading