diff --git a/examples/example_5_with_lazyllm_client.py b/examples/example_5_with_lazyllm_client.py new file mode 100644 index 00000000..7e64cd46 --- /dev/null +++ b/examples/example_5_with_lazyllm_client.py @@ -0,0 +1,250 @@ +""" +Unified Example: LazyLLM Integration Demo +========================================= + +This example merges functionalities from: +1. Example 1: Conversation Memory Processing +2. Example 2: Skill Extraction +3. Example 3: Multimodal Processing + +It demonstrates how to use the LazyLLM backend for: +- Processing conversation history +- Extracting technical skills from logs +- Handling multimodal content (images + text) +- defaut source and model are from qwen + +Usage: + export MEMU_QWEN_API_KEY=your_api_key + python examples/example_5_with_lazyllm_client.py +""" + +import asyncio +import os +import sys +from pathlib import Path + +# Add src to sys.path FIRST before importing memu +project_root = Path(__file__).parent.parent +src_path = str(project_root / "src") +if src_path not in sys.path: + sys.path.insert(0, src_path) + +from memu.app import MemoryService # noqa: E402 + +# ========================================== +# PART 1: Conversation Memory Processing +# ========================================== + + +async def run_conversation_memory_demo(service): + print("\n" + "=" * 60) + print("PART 1: Conversation Memory Processing") + print("=" * 60) + + conversation_files = [ + "examples/resources/conversations/conv1.json", + "examples/resources/conversations/conv2.json", + "examples/resources/conversations/conv3.json", + ] + + total_items = 0 + categories = [] + + for conv_file in conversation_files: + if not os.path.exists(conv_file): + print(f"⚠ File not found: {conv_file}") + continue + + try: + print(f" Processing: {conv_file}") + result = await service.memorize(resource_url=conv_file, modality="conversation") + total_items += len(result.get("items", [])) + categories = result.get("categories", []) + print(f" ✓ Extracted {len(result.get('items', []))} items") + except Exception as e: + print(f" ✗ Error processing {conv_file}: {e}") + + # Output generation + output_dir = "examples/output/lazyllm_example/conversation" + os.makedirs(output_dir, exist_ok=True) + await generate_markdown_output(categories, output_dir) + print(f"✓ Conversation processing complete. Output: {output_dir}") + + +# ========================================== +# PART 2: Skill Extraction +# ========================================== + + +async def run_skill_extraction_demo(service): + print("\n" + "=" * 60) + print("PART 2: Skill Extraction from Logs") + print("=" * 60) + + # Configure prompt for skill extraction + skill_prompt = """ + You are analyzing an agent execution log. Extract the key actions taken, their outcomes, and lessons learned. + + Output MUST be valid XML wrapped in tags. + Format: + + + + [Action] Description... + [Lesson] Key lesson... + + + Category Name + + + + + Text: {resource} + """ + + # Update service config for skill extraction + service.memorize_config.memory_types = ["skill"] + service.memorize_config.memory_type_prompts = {"skill": skill_prompt} + + logs = ["examples/resources/logs/log1.txt", "examples/resources/logs/log2.txt", "examples/resources/logs/log3.txt"] + + all_skills = [] + for log_file in logs: + if not os.path.exists(log_file): + continue + + print(f" Processing log: {log_file}") + try: + result = await service.memorize(resource_url=log_file, modality="document") + for item in result.get("items", []): + if item.get("memory_type") == "skill": + all_skills.append(item.get("summary", "")) + print(f" ✓ Extracted {len(result.get('items', []))} skills") + except Exception as e: + print(f" ✗ Error: {e}") + + # Generate summary guide + if all_skills: + output_file = "examples/output/lazyllm_example/skills/skill_guide.md" + await generate_skill_guide(all_skills, service, output_file) + print(f"✓ Skill guide generated: {output_file}") + + +# ========================================== +# PART 3: Multimodal Memory +# ========================================== + + +async def run_multimodal_demo(service): + print("\n" + "=" * 60) + print("PART 3: Multimodal Memory Processing") + print("=" * 60) + + # Configure for knowledge extraction + xml_prompt = """ + Analyze content and extract key information. + Output MUST be valid XML wrapped in tags. + Format: + + + Extracted content... + category_name + + + + Content: {resource} + """ + + service.memorize_config.memory_types = ["knowledge"] + service.memorize_config.memory_type_prompts = {"knowledge": xml_prompt} + + resources = [ + ("examples/resources/docs/doc1.txt", "document"), + ("examples/resources/images/image1.png", "image"), + ] + + categories = [] + for res_file, modality in resources: + if not os.path.exists(res_file): + continue + + print(f" Processing {modality}: {res_file}") + try: + result = await service.memorize(resource_url=res_file, modality=modality) + categories = result.get("categories", []) + print(f" ✓ Extracted {len(result.get('items', []))} items") + except Exception as e: + print(f" ✗ Error: {e}") + + output_dir = "examples/output/lazyllm_example/multimodal" + os.makedirs(output_dir, exist_ok=True) + await generate_markdown_output(categories, output_dir) + print(f"✓ Multimodal processing complete. Output: {output_dir}") + + +# ========================================== +# Helpers +# ========================================== + + +async def generate_markdown_output(categories, output_dir): + for cat in categories: + name = cat.get("name", "unknown") + summary = cat.get("summary", "") + if not summary: + continue + + with open(os.path.join(output_dir, f"{name}.md"), "w", encoding="utf-8") as f: + f.write(f"# {name.replace('_', ' ').title()}\n\n") + cleaned = summary.replace("", "").replace("", "").strip() + f.write(cleaned) + + +async def generate_skill_guide(skills, service, output_file): + os.makedirs(os.path.dirname(output_file), exist_ok=True) + skills_text = "\n\n".join(skills) + prompt = f"Summarize these skills into a guide:\n\n{skills_text}" + + # Use LazyLLM via service + summary = await service.llm_client.summarize(text=prompt) + + with open(output_file, "w", encoding="utf-8") as f: + f.write(summary) + + +# ========================================== +# Main Entry +# ========================================== + + +async def main(): + print("Unified LazyLLM Example") + print("=" * 60) + # 1. Initialize Shared Service + service = MemoryService( + llm_profiles={ + "default": { + "client_backend": "lazyllm_backend", + "chat_model": "qwen3-max", + "embed_model": "text-embedding-v3", + "lazyllm_source": { + "source": "qwen", + "llm_source": "qwen", + "vlm_source": "qwen", + "embed_source": "qwen", + "stt_source": "qwen", + "vlm_model": "qwen-vl-plus", + "stt_model": "qwen-audio-turbo", + }, + }, + } + ) + + # 2. Run Demos + await run_conversation_memory_demo(service) + # await run_skill_extraction_demo(service) + # await run_multimodal_demo(service) + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/pyproject.toml b/pyproject.toml index 8bed66ef..ba66f519 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,6 +27,7 @@ dependencies = [ "alembic>=1.14.0", "pendulum>=3.1.0", "langchain-core>=1.2.7", + "lazyllm>=0.7.3", ] [build-system] diff --git a/src/memu/app/service.py b/src/memu/app/service.py index ff678707..4e2dea04 100644 --- a/src/memu/app/service.py +++ b/src/memu/app/service.py @@ -117,6 +117,19 @@ def _init_llm_client(self, config: LLMConfig | None = None) -> Any: endpoint_overrides=cfg.endpoint_overrides, embed_model=cfg.embed_model, ) + elif backend == "lazyllm_backend": + from memu.llm.lazyllm_client import LazyLLMClient + + return LazyLLMClient( + llm_source=cfg.lazyllm_source.llm_source or cfg.lazyllm_source.source, + vlm_source=cfg.lazyllm_source.vlm_source or cfg.lazyllm_source.source, + embed_source=cfg.lazyllm_source.embed_source or cfg.lazyllm_source.source, + stt_source=cfg.lazyllm_source.stt_source or cfg.lazyllm_source.source, + chat_model=cfg.chat_model, + embed_model=cfg.embed_model, + vlm_model=cfg.lazyllm_source.vlm_model, + stt_model=cfg.lazyllm_source.stt_model, + ) else: msg = f"Unknown llm_client_backend '{cfg.client_backend}'" raise ValueError(msg) diff --git a/src/memu/app/settings.py b/src/memu/app/settings.py index 008715a5..40e3d337 100644 --- a/src/memu/app/settings.py +++ b/src/memu/app/settings.py @@ -89,6 +89,16 @@ def _default_memory_categories() -> list[CategoryConfig]: ] +class LazyLLMSource(BaseModel): + source: str | None = Field(default=None, description="default source for lazyllm client backend") + llm_source: str | None = Field(default=None, description="LLM source for lazyllm client backend") + embed_source: str | None = Field(default=None, description="Embedding source for lazyllm client backend") + vlm_source: str | None = Field(default=None, description="VLM source for lazyllm client backend") + stt_source: str | None = Field(default=None, description="STT source for lazyllm client backend") + vlm_model: str = Field(default="qwen-vl-plus", description="Vision language model for lazyllm client backend") + stt_model: str = Field(default="qwen-audio-turbo", description="Speech-to-text model for lazyllm client backend") + + class LLMConfig(BaseModel): provider: str = Field( default="openai", @@ -99,8 +109,9 @@ class LLMConfig(BaseModel): chat_model: str = Field(default="gpt-4o-mini") client_backend: str = Field( default="sdk", - description="Which LLM client backend to use: 'httpx' (httpx) or 'sdk' (official OpenAI).", + description="Which LLM client backend to use: 'httpx' (httpx), 'sdk' (official OpenAI), or 'lazyllm_backend' (for more LLM source like Qwen, Doubao, SIliconflow, etc.)", ) + lazyllm_source: LazyLLMSource = Field(default=LazyLLMSource()) endpoint_overrides: dict[str, str] = Field( default_factory=dict, description="Optional overrides for HTTP endpoints (keys: 'chat'/'summary').", diff --git a/src/memu/llm/lazyllm_client.py b/src/memu/llm/lazyllm_client.py new file mode 100644 index 00000000..eb1c40f7 --- /dev/null +++ b/src/memu/llm/lazyllm_client.py @@ -0,0 +1,134 @@ +import asyncio +import functools +from typing import Any, cast + +import lazyllm # type: ignore[import-untyped] +from lazyllm import LOG + + +class LazyLLMClient: + """LAZYLLM client that relies on the LazyLLM framework.""" + + DEFAULT_SOURCE = "qwen" + + def __init__( + self, + *, + llm_source: str | None = None, + vlm_source: str | None = None, + embed_source: str | None = None, + stt_source: str | None = None, + chat_model: str | None = None, + vlm_model: str | None = None, + embed_model: str | None = None, + stt_model: str | None = None, + ): + self.llm_source = llm_source or self.DEFAULT_SOURCE + self.vlm_source = vlm_source or self.DEFAULT_SOURCE + self.embed_source = embed_source or self.DEFAULT_SOURCE + self.stt_source = stt_source or self.DEFAULT_SOURCE + self.chat_model = chat_model + self.vlm_model = vlm_model + self.embed_model = embed_model + self.stt_model = stt_model + + async def _call_async(self, client: Any, *args: Any, **kwargs: Any) -> Any: + """ + Asynchronously call a LazyLLM client with given arguments and keyword arguments. + """ + if kwargs: + return await asyncio.to_thread(functools.partial(client, *args, **kwargs)) + else: + return await asyncio.to_thread(client, *args) + + async def summarize( + self, + text: str, + *, + max_tokens: int | None = None, + system_prompt: str | None = None, + ) -> str: + """ + Generate a summary or response for the input text using the configured LLM backend. + + Args: + text: The input text to summarize or process. + max_tokens: (Optional) Maximum number of tokens to generate. + system_prompt: (Optional) System instruction to guide the LLM behavior. + Return: + The generated summary text as a string. + """ + client = lazyllm.namespace("MEMU").OnlineModule(source=self.llm_source, model=self.chat_model, type="llm") + prompt = system_prompt or "Summarize the text in one short paragraph." + full_prompt = f"{prompt}\n\ntext:\n{text}" + LOG.debug(f"Summarizing text with {self.llm_source}/{self.chat_model}") + response = await self._call_async(client, full_prompt) + return cast(str, response) + + async def vision( + self, + prompt: str, + image_path: str, + *, + max_tokens: int | None = None, + system_prompt: str | None = None, + ) -> tuple[str, Any]: + """ + Process an image with a text prompt using the configured VLM (Vision-Language Model). + + + Args: + prompt: Text prompt describing the request or question about the image. + image_path: Path to the image file to be analyzed. + max_tokens: (Optional) Maximum number of tokens to generate. + system_prompt: (Optional) System instruction to guide the VLM behavior. + Return: + A tuple containing the generated text response and None (reserved for metadata). + """ + client = lazyllm.namespace("MEMU").OnlineModule(source=self.vlm_source, model=self.vlm_model, type="vlm") + LOG.debug(f"Processing image with {self.vlm_source}/{self.vlm_model}: {image_path}") + # LazyLLM VLM accepts prompt as first positional argument and image_path as keyword argument + response = await self._call_async(client, prompt, lazyllm_files=image_path) + return response, None + + async def embed( + self, + texts: list[str], + batch_size: int = 10, + ) -> list[list[float]]: + """ + Generate vector embeddings for a list of text strings. + + Args: + texts: List of text strings to embed. + batch_size: (Optional) Batch size for processing embeddings (default: 10). + Return: + A list of embedding vectors (list of floats), one for each input text. + """ + client = lazyllm.namespace("MEMU").OnlineModule( + source=self.embed_source, model=self.embed_model, type="embed", batch_size=batch_size + ) + LOG.debug(f"embed {len(texts)} texts with {self.embed_source}/{self.embed_model}") + response = await self._call_async(client, texts) + return cast(list[list[float]], response) + + async def transcribe( + self, + audio_path: str, + language: str | None = None, + prompt: str | None = None, + ) -> str: + """ + Transcribe audio content to text using the configured STT (Speech-to-Text) backend. + + Args: + audio_path: Path to the audio file to transcribe. + language: (Optional) Language code of the audio content. + prompt: (Optional) Text prompt to guide the transcription or translation. + Return: + The transcribed text as a string. + """ + client = lazyllm.namespace("MEMU").OnlineModule(source=self.stt_source, model=self.stt_model, type="stt") + LOG.debug(f"Transcribing audio with {self.stt_source}/{self.stt_model}: {audio_path}") + response = await self._call_async(client, audio_path) + return cast(str, response) diff --git a/tests/test_lazyllm.py b/tests/test_lazyllm.py new file mode 100644 index 00000000..d622b709 --- /dev/null +++ b/tests/test_lazyllm.py @@ -0,0 +1,91 @@ +#!/usr/bin/env python3 +""" +Quick test script to verify LazyLLM backend configuration and basic functionality. + +Usage: + export MEMU_QWEN_API_KEY=your_api_key + python examples/test_lazyllm.py +""" + +import asyncio +import os +import sys + +# Add src to sys.path +src_path = os.path.abspath("src") +sys.path.insert(0, src_path) + +from memu.llm.lazyllm_client import LazyLLMClient # noqa: E402 + + +async def test_lazyllm_client(): + """Test LazyLLMClient with basic operations.""" + + print("LazyLLM Backend Test") + print("=" * 60) + + # Get API key from environment + try: + client = LazyLLMClient( + llm_source="qwen", + vlm_source="qwen", + embed_source="qwen", + stt_source="qwen", + chat_model="qwen-plus", + vlm_model="qwen-vl-plus", + embed_model="text-embedding-v3", + stt_model="qwen-audio-turbo", + ) + print("✓ LazyLLMClient initialized successfully") + except Exception as e: + print(f"❌ Failed to initialize LazyLLMClient: {e}") + return False + + # Test 1: Summarization + print("\n[Test 1] Testing summarization...") + try: + test_text = "这是一段关于Python编程的文本。Python是一种高级编程语言,具有简单易学的语法。它被广泛用于数据分析、机器学习和Web开发。" # noqa: RUF001 + result = await client.summarize(test_text) + print("✓ Summarization successful") + print(f" Result: {result[:100]}...") + except Exception as e: + print(f"❌ Summarization failed: {e}") + import traceback + + traceback.print_exc() + + # Test 2: Embedding + print("\n[Test 2] Testing embedding...") + try: + test_texts = ["Hello world", "How are you", "Nice to meet you"] + embeddings = await client.embed(test_texts) + print("✓ Embedding successful") + print(f" Generated {len(embeddings)} embeddings") + if embeddings and embeddings[0]: + print(f" Embedding dimension: {len(embeddings[0])}") + except Exception as e: + print(f"❌ Embedding failed: {e}") + import traceback + + traceback.print_exc() + + # Test 3: Vision (requires image file) + print("\n[Test 3] Testing vision...") + test_image_path = "examples/resources/images/image1.png" + if os.path.exists(test_image_path): + try: + result, _ = await client.vision(prompt="描述这张图片的内容", image_path=test_image_path) + print("✓ Vision successful") + print(f" Result: {result[:100]}...") + except Exception as e: + print(f"❌ Vision failed: {e}") + import traceback + + traceback.print_exc() + else: + print(f"⚠ Skipped: Test image not found at {test_image_path}") + + +if __name__ == "__main__": + success = asyncio.run(test_lazyllm_client()) + sys.exit(0 if success else 1)