Skip to content
Merged

Dev #22

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file modified assets/benchmark/evaluation_cortex_mem.webp
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified assets/benchmark/evaluation_langmem.webp
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
19 changes: 10 additions & 9 deletions examples/lomoco-evaluation/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -50,18 +50,17 @@ lomoco-evaluation/

**基础依赖**(所有系统都需要):
- Python 3.8+
- 必需的 Python 包: `pip install openai httpx toml tqdm jinja2 sentence-transformers scipy numpy`
- 必需的 Python 包: `pip install openai httpx toml tqdm jinja2 sentence-transformers scipy numpy qdrant-client`

**Cortex Memory 专用**:
- Rust 和 Cargo
- Qdrant 向量数据库

**LangMem 专用**:
- LangMem 和 LangGraph: `pip install langmem langgraph`
- 无额外依赖(现在使用 Qdrant 向量数据库,与 Cortex Memory 相同)

### 2. 启动 Qdrant 服务(仅 Cortex Memory 需要
### 2. 启动 Qdrant 服务(所有系统都需要

如果使用 Cortex Memory,需要启动 Qdrant 服务
Cortex Memory 和 LangMem 都需要 Qdrant 向量数据库服务

```bash
# macOS: 使用 Homebrew 安装
Expand Down Expand Up @@ -139,16 +138,16 @@ python3 generate_report.py \

### 方式二:使用 LangMem 评估

**适用场景**: 评估基于 LangChain/LangGraph 的 LangMem 记忆系统
**适用场景**: 评估基于 Qdrant 向量数据库的 LangMem 记忆系统

**前置要求**:
- 安装 LangMem: `pip install langmem langgraph`
- 无额外依赖(使用与 Cortex Memory 相同的 Qdrant 向量数据库)

```bash
# 1. 添加记忆到 LangMem
# 1. 添加记忆到 LangMem(使用 Qdrant 向量数据库)
python3 run_langmem_evaluation.py --method add --data dataset/locomo50.json

# 2. 搜索记忆并生成答案
# 2. 搜索记忆并生成答案(使用语义搜索)
python3 run_langmem_evaluation.py --method search --data dataset/locomo50.json --top_k 10

# 3. 评估结果
Expand All @@ -163,6 +162,8 @@ python3 generate_report.py \
--output results/langmem_report.html
```

**注意**: LangMem 现在使用与 Cortex Memory 相同的 Qdrant 向量数据库和 embedding 模型,确保评测的公平性。

### 方式三:使用 Simple RAG 基线

**适用场景**: 评估简单的 RAG 基线系统作为对比参考
Expand Down
102 changes: 51 additions & 51 deletions examples/lomoco-evaluation/run_langmem_evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,121 +19,121 @@
def run_add_experiment(data_path="dataset/locomo50.json"):
"""运行添加记忆的实验"""
print("=" * 60)
print("LangMem 添加记忆实验")
print("LangMem Add Memory Experiment")
print("=" * 60)

try:
# 初始化 LangMemAdd
print("🔄 初始化 LangMemAdd...")
print("Initializing LangMemAdd...")
add_manager = LangMemAdd(data_path=data_path, batch_size=1)
print("LangMemAdd 初始化成功")
print("LangMemAdd initialized successfully")

# 处理所有对话
print("🔄 开始添加记忆到 LangMem...")
print("Adding memories to LangMem...")
add_manager.process_all_conversations()
print("✅ 所有记忆添加完成")
print("All memories added successfully")

# 清理资源
del add_manager
print("🧹 资源清理完成")
print("\n✅ 添加记忆实验完成!")
print("Resources cleaned up")

print("\nAdd memory experiment completed!")
return True

except Exception as e:
print(f"❌ 添加记忆实验失败: {e}")
print(f"Add memory experiment failed: {e}")
return False


def run_search_experiment(data_path="dataset/locomo50.json", top_k=10):
"""运行搜索记忆的实验"""
print("=" * 60)
print("LangMem 搜索记忆实验")
print("LangMem Search Memory Experiment")
print("=" * 60)

try:
# 初始化 LangMemSearch
print("🔄 初始化 LangMemSearch...")
print("Initializing LangMemSearch...")
search_manager = LangMemSearch(
output_path="results/langmem_results.json",
output_path="results/langmem_results.json",
top_k=top_k
)
print("LangMemSearch 初始化成功")
print("LangMemSearch initialized successfully")

# 处理数据文件并生成结果
print("🔄 开始搜索记忆并回答问题...")
print("Searching memories and answering questions...")
search_manager.process_data_file(data_path)
print("✅ 搜索记忆实验完成")
print("Search memory experiment completed")

# 检查结果文件
if os.path.exists("results/langmem_results.json"):
with open("results/langmem_results.json", "r") as f:
results = json.load(f)
print(f"📊 生成了 {len(results)} 个结果")
print(f"Generated {len(results)} results")

# 清理资源
del search_manager
print("🧹 资源清理完成")
print("\n✅ 搜索记忆实验完成!")
print("Resources cleaned up")

print("\nSearch memory experiment completed!")
return True

except Exception as e:
print(f"❌ 搜索记忆实验失败: {e}")
print(f"Search memory experiment failed: {e}")
return False


def main():
"""主函数"""
parser = argparse.ArgumentParser(description="运行 LangMem 评估")
parser = argparse.ArgumentParser(description="Run LangMem evaluation")
parser.add_argument(
"--method",
choices=["add", "search"],
"--method",
choices=["add", "search"],
required=True,
help="要运行的方法: add (添加记忆) 或 search (搜索记忆)"
help="Method to run: add (add memories) or search (search memories)"
)
parser.add_argument(
"--data",
type=str,
default="dataset/locomo50.json",
help="数据集文件路径 (默认: dataset/locomo50.json)"
help="Dataset file path (default: dataset/locomo50.json)"
)
parser.add_argument(
"--top_k",
type=int,
"--top_k",
type=int,
default=10,
help="搜索时返回的记忆数量"
help="Number of memories to return during search"
)

args = parser.parse_args()

# 创建 results 目录
os.makedirs("results", exist_ok=True)
print("🚀 开始运行 LangMem 评估")
print(f"📋 方法: {args.method}")
print(f"📊 数据集: {args.data}")

print("Starting LangMem evaluation")
print(f"Method: {args.method}")
print(f"Dataset: {args.data}")

success = False

if args.method == "add":
success = run_add_experiment(args.data)
elif args.method == "search":
success = run_search_experiment(args.data, args.top_k)

if success:
print("\n🎉 评估成功完成!")
print("\n📋 后续步骤:")
print("1. 运行评估: python -m metrics.memory_evaluation \\")
print("\nEvaluation completed successfully!")
print("\nNext steps:")
print("1. Run evaluation: python -m metrics.memory_evaluation \\")
print(f" --results results/langmem_results.json \\")
print(f" --dataset {args.data} \\")
print(f" --output results/langmem_evaluated.json")
print("\n2. 生成HTML报告:")
print("\n2. Generate HTML report:")
print(" python generate_report.py \\")
print(f" --results results/langmem_evaluated.json \\")
print(f" --output results/langmem_report.html")
else:
print("\n❌ 评估失败,请检查错误信息")
print("\nEvaluation failed, please check error messages")
sys.exit(1)


Expand Down
Loading