diff --git a/assets/benchmark/evaluation_cortex_mem.webp b/assets/benchmark/evaluation_cortex_mem.webp index 96692eb..8111238 100644 Binary files a/assets/benchmark/evaluation_cortex_mem.webp and b/assets/benchmark/evaluation_cortex_mem.webp differ diff --git a/assets/benchmark/evaluation_langmem.webp b/assets/benchmark/evaluation_langmem.webp index 725f3f8..b883d30 100644 Binary files a/assets/benchmark/evaluation_langmem.webp and b/assets/benchmark/evaluation_langmem.webp differ diff --git a/examples/lomoco-evaluation/README.md b/examples/lomoco-evaluation/README.md index 5385553..a8b24f1 100644 --- a/examples/lomoco-evaluation/README.md +++ b/examples/lomoco-evaluation/README.md @@ -50,18 +50,17 @@ lomoco-evaluation/ **基础依赖**(所有系统都需要): - Python 3.8+ -- 必需的 Python 包: `pip install openai httpx toml tqdm jinja2 sentence-transformers scipy numpy` +- 必需的 Python 包: `pip install openai httpx toml tqdm jinja2 sentence-transformers scipy numpy qdrant-client` **Cortex Memory 专用**: - Rust 和 Cargo -- Qdrant 向量数据库 **LangMem 专用**: -- LangMem 和 LangGraph: `pip install langmem langgraph` +- 无额外依赖(现在使用 Qdrant 向量数据库,与 Cortex Memory 相同) -### 2. 启动 Qdrant 服务(仅 Cortex Memory 需要) +### 2. 启动 Qdrant 服务(所有系统都需要) -如果使用 Cortex Memory,需要启动 Qdrant 服务: +Cortex Memory 和 LangMem 都需要 Qdrant 向量数据库服务: ```bash # macOS: 使用 Homebrew 安装 @@ -139,16 +138,16 @@ python3 generate_report.py \ ### 方式二:使用 LangMem 评估 -**适用场景**: 评估基于 LangChain/LangGraph 的 LangMem 记忆系统 +**适用场景**: 评估基于 Qdrant 向量数据库的 LangMem 记忆系统 **前置要求**: -- 安装 LangMem: `pip install langmem langgraph` +- 无额外依赖(使用与 Cortex Memory 相同的 Qdrant 向量数据库) ```bash -# 1. 添加记忆到 LangMem +# 1. 添加记忆到 LangMem(使用 Qdrant 向量数据库) python3 run_langmem_evaluation.py --method add --data dataset/locomo50.json -# 2. 搜索记忆并生成答案 +# 2. 搜索记忆并生成答案(使用语义搜索) python3 run_langmem_evaluation.py --method search --data dataset/locomo50.json --top_k 10 # 3. 评估结果 @@ -163,6 +162,8 @@ python3 generate_report.py \ --output results/langmem_report.html ``` +**注意**: LangMem 现在使用与 Cortex Memory 相同的 Qdrant 向量数据库和 embedding 模型,确保评测的公平性。 + ### 方式三:使用 Simple RAG 基线 **适用场景**: 评估简单的 RAG 基线系统作为对比参考 diff --git a/examples/lomoco-evaluation/run_langmem_evaluation.py b/examples/lomoco-evaluation/run_langmem_evaluation.py index 11e6806..11be365 100644 --- a/examples/lomoco-evaluation/run_langmem_evaluation.py +++ b/examples/lomoco-evaluation/run_langmem_evaluation.py @@ -19,121 +19,121 @@ def run_add_experiment(data_path="dataset/locomo50.json"): """运行添加记忆的实验""" print("=" * 60) - print("LangMem 添加记忆实验") + print("LangMem Add Memory Experiment") print("=" * 60) - + try: # 初始化 LangMemAdd - print("🔄 初始化 LangMemAdd...") + print("Initializing LangMemAdd...") add_manager = LangMemAdd(data_path=data_path, batch_size=1) - print("✅ LangMemAdd 初始化成功") - + print("LangMemAdd initialized successfully") + # 处理所有对话 - print("🔄 开始添加记忆到 LangMem...") + print("Adding memories to LangMem...") add_manager.process_all_conversations() - print("✅ 所有记忆添加完成") - + print("All memories added successfully") + # 清理资源 del add_manager - print("🧹 资源清理完成") - - print("\n✅ 添加记忆实验完成!") + print("Resources cleaned up") + + print("\nAdd memory experiment completed!") return True - + except Exception as e: - print(f"❌ 添加记忆实验失败: {e}") + print(f"Add memory experiment failed: {e}") return False def run_search_experiment(data_path="dataset/locomo50.json", top_k=10): """运行搜索记忆的实验""" print("=" * 60) - print("LangMem 搜索记忆实验") + print("LangMem Search Memory Experiment") print("=" * 60) - + try: # 初始化 LangMemSearch - print("🔄 初始化 LangMemSearch...") + print("Initializing LangMemSearch...") search_manager = LangMemSearch( - output_path="results/langmem_results.json", + output_path="results/langmem_results.json", top_k=top_k ) - print("✅ LangMemSearch 初始化成功") - + print("LangMemSearch initialized successfully") + # 处理数据文件并生成结果 - print("🔄 开始搜索记忆并回答问题...") + print("Searching memories and answering questions...") search_manager.process_data_file(data_path) - print("✅ 搜索记忆实验完成") - + print("Search memory experiment completed") + # 检查结果文件 if os.path.exists("results/langmem_results.json"): with open("results/langmem_results.json", "r") as f: results = json.load(f) - print(f"📊 生成了 {len(results)} 个结果") - + print(f"Generated {len(results)} results") + # 清理资源 del search_manager - print("🧹 资源清理完成") - - print("\n✅ 搜索记忆实验完成!") + print("Resources cleaned up") + + print("\nSearch memory experiment completed!") return True - + except Exception as e: - print(f"❌ 搜索记忆实验失败: {e}") + print(f"Search memory experiment failed: {e}") return False def main(): """主函数""" - parser = argparse.ArgumentParser(description="运行 LangMem 评估") + parser = argparse.ArgumentParser(description="Run LangMem evaluation") parser.add_argument( - "--method", - choices=["add", "search"], + "--method", + choices=["add", "search"], required=True, - help="要运行的方法: add (添加记忆) 或 search (搜索记忆)" + help="Method to run: add (add memories) or search (search memories)" ) parser.add_argument( "--data", type=str, default="dataset/locomo50.json", - help="数据集文件路径 (默认: dataset/locomo50.json)" + help="Dataset file path (default: dataset/locomo50.json)" ) parser.add_argument( - "--top_k", - type=int, + "--top_k", + type=int, default=10, - help="搜索时返回的记忆数量" + help="Number of memories to return during search" ) - + args = parser.parse_args() - + # 创建 results 目录 os.makedirs("results", exist_ok=True) - - print("🚀 开始运行 LangMem 评估") - print(f"📋 方法: {args.method}") - print(f"📊 数据集: {args.data}") - + + print("Starting LangMem evaluation") + print(f"Method: {args.method}") + print(f"Dataset: {args.data}") + success = False - + if args.method == "add": success = run_add_experiment(args.data) elif args.method == "search": success = run_search_experiment(args.data, args.top_k) - + if success: - print("\n🎉 评估成功完成!") - print("\n📋 后续步骤:") - print("1. 运行评估: python -m metrics.memory_evaluation \\") + print("\nEvaluation completed successfully!") + print("\nNext steps:") + print("1. Run evaluation: python -m metrics.memory_evaluation \\") print(f" --results results/langmem_results.json \\") print(f" --dataset {args.data} \\") print(f" --output results/langmem_evaluated.json") - print("\n2. 生成HTML报告:") + print("\n2. Generate HTML report:") print(" python generate_report.py \\") print(f" --results results/langmem_evaluated.json \\") print(f" --output results/langmem_report.html") else: - print("\n❌ 评估失败,请检查错误信息") + print("\nEvaluation failed, please check error messages") sys.exit(1) diff --git a/examples/lomoco-evaluation/src/langmem_eval/add.py b/examples/lomoco-evaluation/src/langmem_eval/add.py index e9bb09f..3f3daf8 100644 --- a/examples/lomoco-evaluation/src/langmem_eval/add.py +++ b/examples/lomoco-evaluation/src/langmem_eval/add.py @@ -2,15 +2,17 @@ import os import time import logging +import toml from pathlib import Path -from typing import List, Dict, Any +from typing import List, Dict, Any, Optional from tqdm import tqdm try: - from langgraph.store.memory import InMemoryStore + from qdrant_client import QdrantClient + from qdrant_client.models import Distance, VectorParams, PointStruct, Filter, FieldCondition, MatchValue except ImportError: raise ImportError( - "langgraph is not installed. Please install it using: pip install langgraph" + "qdrant-client is not installed. Please install it using: pip install qdrant-client" ) from .config_utils import check_openai_config, get_config_value, validate_config @@ -20,7 +22,7 @@ class LangMemAdd: - """Class to add memories to LangMem for evaluation""" + """Class to add memories to LangMem for evaluation using Qdrant vector database""" def __init__(self, data_path=None, batch_size=2, config_path=None): self.batch_size = batch_size @@ -83,32 +85,90 @@ def _find_config_file(self): raise FileNotFoundError("Could not find config.toml file") def _initialize_langmem(self): - """Initialize LangMem memory store""" + """Initialize LangMem with Qdrant vector database and embedding model""" try: - # Get LLM configuration - api_key = get_config_value(self.config_path, "llm", "api_key") - api_base_url = get_config_value(self.config_path, "llm", "api_base_url") - model_name = get_config_value(self.config_path, "llm", "model_efficient", "gpt-3.5-turbo") + # Load config + config_data = toml.load(self.config_path) - # Create OpenAI client for answer generation + # Get Qdrant configuration + qdrant_config = config_data.get("qdrant", {}) + self.qdrant_url = qdrant_config.get("url", "http://localhost:6334") + self.collection_name = qdrant_config.get("collection_name", "memo-rs") + + # Get embedding configuration + embedding_config = config_data.get("embedding", {}) + self.embedding_api_base_url = embedding_config.get("api_base_url", "") + self.embedding_api_key = embedding_config.get("api_key", "") + self.embedding_model_name = embedding_config.get("model_name", "") + self.embedding_batch_size = embedding_config.get("batch_size", 10) + + # Initialize Qdrant client + self.qdrant_client = QdrantClient(url=self.qdrant_url) + + # Create collection if it doesn't exist + self._ensure_collection_exists() + + # Initialize embedding client import httpx from openai import OpenAI - self.openai_client = OpenAI( - api_key=api_key, - base_url=api_base_url, + self.embedding_client = OpenAI( + api_key=self.embedding_api_key, + base_url=self.embedding_api_base_url, http_client=httpx.Client(verify=False) ) - # Create memory store - self.store = InMemoryStore() + # Get embedding dimension + self.embedding_dim = self._get_embedding_dimension() - logger.info("✅ LangMem initialized successfully") + logger.info(f"✅ LangMem initialized successfully with Qdrant at {self.qdrant_url}") + logger.info(f"✅ Collection: {self.collection_name}, Embedding dim: {self.embedding_dim}") except Exception as e: logger.error(f"❌ Failed to initialize LangMem: {e}") raise + def _ensure_collection_exists(self): + """Ensure Qdrant collection exists, create if not""" + try: + collections = self.qdrant_client.get_collections().collections + collection_names = [c.name for c in collections] + + if self.collection_name not in collection_names: + # Get embedding dimension first + embedding_dim = self._get_embedding_dimension() + logger.info(f"Creating collection: {self.collection_name} with dim={embedding_dim}") + self.qdrant_client.create_collection( + collection_name=self.collection_name, + vectors_config=VectorParams(size=embedding_dim, distance=Distance.COSINE) + ) + except Exception as e: + logger.warning(f"Could not ensure collection exists: {e}") + + def _get_embedding_dimension(self) -> int: + """Get embedding dimension by making a test call""" + try: + response = self.embedding_client.embeddings.create( + model=self.embedding_model_name, + input=["test"] + ) + return len(response.data[0].embedding) + except Exception as e: + logger.warning(f"Could not get embedding dimension, using default 1024: {e}") + return 1024 + + def _get_embedding(self, text: str) -> List[float]: + """Get embedding for text""" + try: + response = self.embedding_client.embeddings.create( + model=self.embedding_model_name, + input=text + ) + return response.data[0].embedding + except Exception as e: + logger.error(f"Error getting embedding: {e}") + return [] + def load_data(self): if not self.data_path: raise ValueError("data_path not set") @@ -117,23 +177,37 @@ def load_data(self): return self.data def add_memory(self, user_id: str, content: str, timestamp: str = "") -> bool: - """Add a memory using LangMem store""" + """Add a memory using Qdrant vector database with embedding""" try: - # Create namespace for this user - namespace = ("memories", user_id) + # Generate embedding for the content + embedding = self._get_embedding(content) + + if not embedding: + logger.error(f"❌ Failed to generate embedding for user {user_id}") + self.stats["failed_memories"] += 1 + return False - # Generate a unique key for this memory + # Generate a unique ID for this memory import uuid - memory_key = str(uuid.uuid4()) + memory_id = str(uuid.uuid4()) - # Store the memory directly - memory_value = { - "content": content, - "timestamp": timestamp, - "created_at": time.time() - } + # Create point for Qdrant + point = PointStruct( + id=memory_id, + vector=embedding, + payload={ + "user_id": user_id, + "content": content, + "timestamp": timestamp, + "created_at": time.time() + } + ) - self.store.put(namespace, memory_key, memory_value) + # Insert into Qdrant + self.qdrant_client.upsert( + collection_name=self.collection_name, + points=[point] + ) self.stats["successful_memories"] += 1 logger.debug(f"✅ Successfully added memory for user {user_id}") @@ -260,39 +334,6 @@ def process_all_conversations(self, max_workers=1): # Print summary self.print_summary() - - # Save the store to a file for later use - self._save_store_to_file() - - def _save_store_to_file(self): - """Save the memory store to a file using JSON""" - import json - memory_file = "results/langmem_store.json" - try: - os.makedirs("results", exist_ok=True) - - # Convert store to a serializable format - memories_dict = {} - for namespace_tuple in self.store._data.keys(): - namespace_str = "/".join(namespace_tuple) - memories_dict[namespace_str] = {} - for key, value in self.store._data[namespace_tuple].items(): - # Convert Item to dict - memories_dict[namespace_str][key] = { - "namespace": namespace_tuple, - "key": key, - "value": value.value if hasattr(value, 'value') else value - } - - # Save as JSON - with open(memory_file, 'w') as f: - json.dump(memories_dict, f, indent=2) - - print(f"✅ Saved memory store to {memory_file}") - except Exception as e: - print(f"⚠️ Could not save memory store to file: {e}") - import traceback - traceback.print_exc() def print_summary(self): """Print processing summary""" diff --git a/examples/lomoco-evaluation/src/langmem_eval/search.py b/examples/lomoco-evaluation/src/langmem_eval/search.py index 40e1a87..07a6810 100644 --- a/examples/lomoco-evaluation/src/langmem_eval/search.py +++ b/examples/lomoco-evaluation/src/langmem_eval/search.py @@ -2,19 +2,21 @@ import os import time import logging +import toml from collections import defaultdict from pathlib import Path -from typing import List, Dict, Tuple, Any +from typing import List, Dict, Tuple, Any, Optional from jinja2 import Template from openai import OpenAI from tqdm import tqdm try: - from langgraph.store.memory import InMemoryStore + from qdrant_client import QdrantClient + from qdrant_client.models import Filter, FieldCondition, MatchValue except ImportError: raise ImportError( - "langgraph is not installed. Please install it using: pip install langgraph" + "qdrant-client is not installed. Please install it using: pip install qdrant-client" ) from .config_utils import check_openai_config, get_config_value, validate_config @@ -24,7 +26,7 @@ class LangMemSearch: - """Class to search memories in LangMem for evaluation""" + """Class to search memories in LangMem for evaluation using Qdrant vector database""" def __init__(self, output_path="results.json", top_k=10, config_path=None): self.top_k = top_k @@ -92,54 +94,67 @@ def __init__(self, output_path="results.json", top_k=10, config_path=None): f"OpenAI configuration not properly set in {self.config_path}" ) - # Initialize OpenAI client from config.toml - api_key = get_config_value(self.config_path, "llm", "api_key") - api_base = get_config_value(self.config_path, "llm", "api_base_url") - self.llm_model = get_config_value(self.config_path, "llm", "model_efficient", "gpt-3.5-turbo") - - # Create HTTP client with SSL verification disabled for internal APIs - import httpx - http_client = httpx.Client(verify=False) - - self.openai_client = OpenAI( - api_key=api_key, - base_url=api_base, - http_client=http_client - ) - - # Initialize LangMem store - # Note: This will be a new store. For persistence, we need to use the same store instance - # or use a persistent store. For now, we'll assume memories are added in the same session. - self.store = InMemoryStore() - - # Try to load previously stored memories from a file if exists - self._load_memories_from_file() + # Initialize components + self._initialize_components() - def _load_memories_from_file(self): - """Load memories from a JSON file if it exists""" - import json - memory_file = "results/langmem_store.json" + def _initialize_components(self): + """Initialize Qdrant client, embedding client, and LLM client""" try: - if os.path.exists(memory_file): - print(f"📂 Found memory file: {memory_file}") - with open(memory_file, 'r') as f: - memories_dict = json.load(f) - - print(f"✅ Loaded JSON with {len(memories_dict)} namespaces") - - # Restore memories to store - total_items = 0 - for namespace_str, items in memories_dict.items(): - namespace_tuple = tuple(namespace_str.split('/')) - for key, item_data in items.items(): - self.store.put(namespace_tuple, key, item_data["value"]) - total_items += 1 - - print(f"✅ Successfully loaded {total_items} memories from {memory_file}") + # Load config + config_data = toml.load(self.config_path) + + # Get Qdrant configuration + qdrant_config = config_data.get("qdrant", {}) + self.qdrant_url = qdrant_config.get("url", "http://localhost:6334") + self.collection_name = qdrant_config.get("collection_name", "memo-rs") + + # Get embedding configuration + embedding_config = config_data.get("embedding", {}) + self.embedding_api_base_url = embedding_config.get("api_base_url", "") + self.embedding_api_key = embedding_config.get("api_key", "") + self.embedding_model_name = embedding_config.get("model_name", "") + + # Get LLM configuration + api_key = get_config_value(self.config_path, "llm", "api_key") + api_base = get_config_value(self.config_path, "llm", "api_base_url") + self.llm_model = get_config_value(self.config_path, "llm", "model_efficient", "gpt-3.5-turbo") + + # Initialize Qdrant client + self.qdrant_client = QdrantClient(url=self.qdrant_url) + + # Initialize embedding client + import httpx + self.embedding_client = OpenAI( + api_key=self.embedding_api_key, + base_url=self.embedding_api_base_url, + http_client=httpx.Client(verify=False) + ) + + # Initialize LLM client + self.openai_client = OpenAI( + api_key=api_key, + base_url=api_base, + http_client=httpx.Client(verify=False) + ) + + logger.info(f"✅ LangMemSearch initialized successfully with Qdrant at {self.qdrant_url}") + logger.info(f"✅ Collection: {self.collection_name}") + except Exception as e: - print(f"⚠️ Could not load memories from file: {e}") - import traceback - traceback.print_exc() + logger.error(f"❌ Failed to initialize LangMemSearch: {e}") + raise + + def _get_embedding(self, text: str) -> List[float]: + """Get embedding for text""" + try: + response = self.embedding_client.embeddings.create( + model=self.embedding_model_name, + input=text + ) + return response.data[0].embedding + except Exception as e: + logger.error(f"Error getting embedding: {e}") + return [] def _find_config_file(self): """Find config.toml file in standard locations""" @@ -170,69 +185,53 @@ def _find_config_file(self): raise FileNotFoundError("Could not find config.toml file") def search_memory(self, user_id: str, query: str, max_retries: int = 3, retry_delay: float = 1) -> Tuple[List[Dict], float]: - """Search for memories using LangMem store""" + """Search for memories using Qdrant vector database with semantic search""" start_time = time.time() retries = 0 while retries < max_retries: try: - # Create namespace for this user - namespace = ("memories", user_id) - - # Search memories in the store - # LangMem store supports semantic search through the search method - memories = [] + # Generate embedding for the query + query_embedding = self._get_embedding(query) - # Get all memories for this user - all_memories = list(self.store.search(namespace)) + if not query_embedding: + logger.error(f"❌ Failed to generate embedding for query: {query}") + return [], time.time() - start_time - # Debug: print what we found - if len(all_memories) == 0: - # Try to search with empty namespace to see all memories - all_items = list(self.store.search(())) - logger.debug(f"Total items in store: {len(all_items)}") - if len(all_items) > 0: - # Print first few items to see structure - for i, item in enumerate(all_items[:3]): - logger.debug(f"Item {i}: namespace={item.namespace}, key={item.key}") - - # Simple relevance scoring based on query matching - # In a real implementation, you would use embedding-based similarity - query_lower = query.lower() - scored_memories = [] + # Search in Qdrant with filter for user_id + search_filter = Filter( + must=[ + FieldCondition( + key="user_id", + match=MatchValue(value=user_id) + ) + ] + ) - for memory_item in all_memories: - memory_value = memory_item.value - - # Convert memory to string if it's not - if isinstance(memory_value, dict): - memory_content = str(memory_value) - else: - memory_content = str(memory_value) - - # Simple keyword matching score - score = 0.0 - query_words = query_lower.split() - for word in query_words: - if word in memory_content.lower(): - score += 1.0 - - if score > 0: - scored_memories.append({ - "memory": memory_content, - "timestamp": "", # LangMem doesn't store timestamp by default - "score": score, - }) + # Use query_points instead of search (newer Qdrant API) + search_results = self.qdrant_client.query_points( + collection_name=self.collection_name, + query=query_embedding, + query_filter=search_filter, + limit=self.top_k, + with_payload=True + ).points - # Sort by score and take top_k - scored_memories.sort(key=lambda x: x["score"], reverse=True) - memories = scored_memories[:self.top_k] + # Convert Qdrant results to memory format + memories = [] + for result in search_results: + payload = result.payload + memories.append({ + "memory": payload.get("content", ""), + "timestamp": payload.get("timestamp", ""), + "score": result.score, + }) end_time = time.time() return memories, end_time - start_time except Exception as e: - print(f"Search error: {e}, retrying...") + logger.error(f"Search error: {e}, retrying...") retries += 1 if retries >= max_retries: raise e