From 7752e30cfa3ad4b457be0de1e9f5a856fcacb307 Mon Sep 17 00:00:00 2001
From: unknown <sunhao3@sensetime.com>
Date: Tue, 20 Jan 2026 11:07:29 +0800
Subject: [PATCH 01/14] integrate LazyLLM as LLM provider

---
 .gitignore                                    |   1 +
 ...mple_4_conversation_memory_with_lazyllm.py | 150 +++++++++
 ...example_5_skill_extraction_with_lazyllm.py | 300 ++++++++++++++++++
 ...xample_6_multimodal_memory_with_lazyllm.py | 168 ++++++++++
 .../activities.md                             |   8 +
 .../experiences.md                            |   5 +
 .../conversation_example_lazyllm/goals.md     |   6 +
 .../conversation_example_lazyllm/habits.md    |   6 +
 .../conversation_example_lazyllm/knowledge.md |   1 +
 .../conversation_example_lazyllm/opinions.md  |   3 +
 .../personal_info.md                          |   3 +
 .../preferences.md                            |   7 +
 .../relationships.md                          |   1 +
 .../conversation_example_lazyllm/work_life.md |  11 +
 .../architecture_concepts.md                  |  12 +
 .../best_practices.md                         |  16 +
 .../code_examples.md                          |   5 +
 .../technical_documentation.md                |  12 +
 .../visual_diagrams.md                        |   5 +
 .../output/skill_example_lazyllm/log_1.md     | 209 ++++++++++++
 .../output/skill_example_lazyllm/log_2.md     | 131 ++++++++
 .../output/skill_example_lazyllm/log_3.md     | 156 +++++++++
 .../output/skill_example_lazyllm/skill.md     | 150 +++++++++
 src/memu/app/service.py                       |  10 +
 src/memu/app/settings.py                      |   6 +-
 src/memu/llm/lazyllm_client.py                |  93 ++++++
 tests/test_lazyllm.py                         | 121 +++++++
 27 files changed, 1595 insertions(+), 1 deletion(-)
 create mode 100644 examples/example_4_conversation_memory_with_lazyllm.py
 create mode 100644 examples/example_5_skill_extraction_with_lazyllm.py
 create mode 100644 examples/example_6_multimodal_memory_with_lazyllm.py
 create mode 100644 examples/output/conversation_example_lazyllm/activities.md
 create mode 100644 examples/output/conversation_example_lazyllm/experiences.md
 create mode 100644 examples/output/conversation_example_lazyllm/goals.md
 create mode 100644 examples/output/conversation_example_lazyllm/habits.md
 create mode 100644 examples/output/conversation_example_lazyllm/knowledge.md
 create mode 100644 examples/output/conversation_example_lazyllm/opinions.md
 create mode 100644 examples/output/conversation_example_lazyllm/personal_info.md
 create mode 100644 examples/output/conversation_example_lazyllm/preferences.md
 create mode 100644 examples/output/conversation_example_lazyllm/relationships.md
 create mode 100644 examples/output/conversation_example_lazyllm/work_life.md
 create mode 100644 examples/output/multimodal_example_lazyllm/architecture_concepts.md
 create mode 100644 examples/output/multimodal_example_lazyllm/best_practices.md
 create mode 100644 examples/output/multimodal_example_lazyllm/code_examples.md
 create mode 100644 examples/output/multimodal_example_lazyllm/technical_documentation.md
 create mode 100644 examples/output/multimodal_example_lazyllm/visual_diagrams.md
 create mode 100644 examples/output/skill_example_lazyllm/log_1.md
 create mode 100644 examples/output/skill_example_lazyllm/log_2.md
 create mode 100644 examples/output/skill_example_lazyllm/log_3.md
 create mode 100644 examples/output/skill_example_lazyllm/skill.md
 create mode 100644 src/memu/llm/lazyllm_client.py
 create mode 100644 tests/test_lazyllm.py

diff --git a/.gitignore b/.gitignore
index 40472ad9..c7700a38 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,6 +3,7 @@ data/
 __pycache__/
 *.py[cod]
 *$py.class
+venv-memU/
 
 # C extensions
 *.so
diff --git a/examples/example_4_conversation_memory_with_lazyllm.py b/examples/example_4_conversation_memory_with_lazyllm.py
new file mode 100644
index 00000000..cc4cf8ea
--- /dev/null
+++ b/examples/example_4_conversation_memory_with_lazyllm.py
@@ -0,0 +1,150 @@
+"""
+Example 4: Multiple Conversations -> Memory Category File with LazyLLM Backend
+
+This example demonstrates how to process multiple conversation files
+and generate a memory category JSON file using the LazyLLM backend.
+
+Usage:
+    export LAZYLLM_QWEN_API_KEY=your_api_key
+    python examples/example_4_conversation_memory_with_lazyllm.py
+"""
+
+import asyncio
+import os
+import sys
+from pathlib import Path
+
+# Add src to sys.path FIRST before importing memu
+project_root = Path(__file__).parent.parent
+src_path = str(project_root / "src")
+if src_path not in sys.path:
+    sys.path.insert(0, src_path)
+from memu.app import MemoryService
+
+# Add src to sys.path
+src_path = os.path.abspath("src")
+sys.path.insert(0, src_path)
+
+
+async def generate_memory_md(categories, output_dir):
+    """Generate concise markdown files for each memory category."""
+
+    os.makedirs(output_dir, exist_ok=True)
+
+    generated_files = []
+
+    for cat in categories:
+        name = cat.get("name", "unknown")
+        summary = cat.get("summary", "")
+
+        filename = f"{name}.md"
+        filepath = os.path.join(output_dir, filename)
+
+        with open(filepath, "w", encoding="utf-8") as f:
+            # Title
+            # Content - concise version
+            if summary:
+                cleaned_summary = summary.replace("<content>", "").replace("</content>", "").strip()
+                f.write(f"{cleaned_summary}\n")
+            else:
+                f.write("*No content available*\n")
+
+        generated_files.append(filename)
+
+    return generated_files
+
+
+async def main():
+    """
+    Process multiple conversation files and generate memory categories using LazyLLM.
+
+    This example:
+    1. Initializes MemoryService with LazyLLM backend
+    2. Processes conversation JSON files
+    3. Extracts memory categories from conversations
+    4. Outputs the categories to files
+    """
+    print("Example 4: Conversation Memory Processing with LazyLLM Backend")
+    print("-" * 60)
+
+    # Get LazyLLM API key from environment
+    # api_key = os.getenv("LAZYLLM_QWEN_API_KEY")
+    api_key = os.getenv("LAZYLLM_QWEN_API_KEY")
+    if not api_key:
+        msg = "Please set LAZYLLM_QWEN_API_KEY environment variable"
+        raise ValueError(msg)
+    
+    # Initialize service with LazyLLM backend using llm_profiles
+    # The "default" profile is required and used as the primary LLM configuration
+    service = MemoryService(
+        llm_profiles={
+            "default": {
+                "client_backend": "lazyllm_backend",
+                "source": "qwen",
+                "chat_model": "qwen-plus",
+                "vlm_model": "qwen-vl-plus",
+                "embed_model": "text-embedding-v3",
+                "stt_model": "qwen-audio-turbo",
+                "api_key": api_key,
+            },
+            "embedding": {
+                "client_backend": "lazyllm_backend",
+                "source": "qwen",
+                "chat_model": "qwen-plus",
+                "vlm_model": "qwen-vl-plus",
+                "embed_model": "text-embedding-v3",
+                "stt_model": "qwen-audio-turbo",
+                "api_key": api_key,
+            },
+        },
+    )
+
+    # Conversation files to process
+    conversation_files = [
+        "examples/resources/conversations/conv1.json",
+        "examples/resources/conversations/conv2.json",
+        "examples/resources/conversations/conv3.json",
+    ]
+
+    # Process each conversation
+    print("\nProcessing conversations with LazyLLM...")
+    total_items = 0
+    categories = []
+    for conv_file in conversation_files:
+        if not os.path.exists(conv_file):
+            print(f"⚠ File not found: {conv_file}")
+            continue
+
+        try:
+            print(f"  Processing: {conv_file}")
+            result = await service.memorize(resource_url=conv_file, modality="conversation")
+            total_items += len(result.get("items", []))
+            # Categories are returned in the result and updated after each memorize call
+            categories = result.get("categories", [])
+            print(f"    ✓ Extracted {len(result.get('items', []))} items")
+        except Exception as e:
+            print(f"  ✗ Error processing {conv_file}: {e}")
+            import traceback
+            traceback.print_exc()
+
+    # Write to output files
+    output_dir = "examples/output/conversation_example_lazyllm"
+    os.makedirs(output_dir, exist_ok=True)
+
+    # 1. Generate individual Markdown files for each category
+    generated_files = await generate_memory_md(categories, output_dir)
+
+    print(f"\n" + "=" * 60)
+    print(f"✓ Processed {len([f for f in conversation_files if os.path.exists(f)])} files")
+    print(f"✓ Extracted {total_items} total items")
+    print(f"✓ Generated {len(categories)} categories:")
+    for cat in categories:
+        print(f"  - {cat.get('name', 'unknown')}")
+    print(f"✓ Output files ({len(generated_files)}):")
+    for file in generated_files:
+        print(f"  - {os.path.join(output_dir, file)}")
+    print(f"✓ Output directory: {output_dir}/")
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/examples/example_5_skill_extraction_with_lazyllm.py b/examples/example_5_skill_extraction_with_lazyllm.py
new file mode 100644
index 00000000..3f9e01ef
--- /dev/null
+++ b/examples/example_5_skill_extraction_with_lazyllm.py
@@ -0,0 +1,300 @@
+"""
+Example 5: Workflow & Agent Logs -> Skill Extraction (with LazyLLM)
+
+This example demonstrates how to extract skills from workflow descriptions
+and agent runtime logs using LazyLLM backend, then output them to a Markdown file.
+
+Usage:
+    export LAZYLLM_QWEN_API_KEY=your_api_key
+    python examples/example_5_skill_extraction_with_lazyllm.py
+"""
+
+import asyncio
+import os
+import sys
+from pathlib import Path
+
+# Add src to sys.path FIRST before importing memu
+project_root = Path(__file__).parent.parent
+src_path = str(project_root / "src")
+if src_path not in sys.path:
+    sys.path.insert(0, src_path)
+from memu.app import MemoryService
+
+# Add src to sys.path
+src_path = os.path.abspath("src")
+sys.path.insert(0, src_path)
+
+
+async def generate_skill_md(
+    all_skills, service, output_file, attempt_number, total_attempts, categories=None, is_final=False
+):
+    """
+    Use LLM to generate a concise task execution guide (skill.md).
+
+    This creates a production-ready guide incorporating lessons learned from deployment attempts.
+    """
+
+    os.makedirs(os.path.dirname(output_file), exist_ok=True)
+
+    # Prepare context for LLM
+    skills_text = "\n\n".join([f"### From {skill_data['source']}\n{skill_data['skill']}" for skill_data in all_skills])
+
+    # Get category summaries if available
+    categories_text = ""
+    if categories:
+        categories_with_content = [cat for cat in categories if cat.get("summary") and cat.get("summary").strip()]
+        if categories_with_content:
+            categories_text = "\n\n".join([
+                f"**{cat.get('name', 'unknown')}**:\n{cat.get('summary', '')}" for cat in categories_with_content
+            ])
+
+    # Construct prompt for LLM
+    prompt = f"""Generate a concise production-ready task execution guide.
+
+**Context**:
+- Task: Production Microservice Deployment with Blue-Green Strategy
+- Progress: {attempt_number}/{total_attempts} attempts
+- Status: {"Complete" if is_final else f"v0.{attempt_number}"}
+
+**Skills Learned**:
+{skills_text}
+
+{f"**Categories**:\n{categories_text}" if categories_text else ""}
+
+**Required Structure**:
+
+1. **Frontmatter** (YAML):
+   - name: production-microservice-deployment
+   - description: Brief description
+   - version: {"1.0.0" if is_final else f"0.{attempt_number}.0"}
+   - status: {"Production-Ready" if is_final else "Evolving"}
+
+2. **Introduction**: What this guide does and when to use it
+
+3. **Deployment Context**: Strategy, environment, goals
+
+4. **Pre-Deployment Checklist**:
+   - Actionable checks from lessons learned
+   - Group by category (Database, Monitoring, etc.)
+   - Mark critical items
+
+5. **Deployment Procedure**:
+   - Step-by-step instructions with commands
+   - Include monitoring points
+
+6. **Rollback Procedure**:
+   - When to rollback (thresholds)
+   - Exact commands
+   - Expected recovery time
+
+7. **Common Pitfalls & Solutions**:
+   - Failures/issues encountered
+   - Root cause, symptoms, solution
+
+8. **Best Practices**:
+   - What works well
+   - Expected timelines
+
+9. **Key Takeaways**: 3-5 most important lessons
+
+**Style**:
+- Use markdown with clear hierarchy
+- Be specific and concise
+- Technical and production-grade tone
+- Focus on PRACTICAL steps
+
+**CRITICAL**:
+- ONLY use information from provided skills/lessons
+- DO NOT make assumptions or add generic advice
+- Extract ACTUAL experiences from the logs
+
+Generate the complete markdown document now:"""
+
+    # Use LazyLLM through MemoryService
+    system_prompt = "You are an expert technical writer creating concise, production-grade deployment guides from real experiences."
+    
+    # Create a temporary client from the service to use for generation
+    from memu.llm.lazyllm_client import LazyLLMClient
+    
+    api_key = os.getenv("LAZYLLM_QWEN_API_KEY")
+    llm_client = LazyLLMClient(
+        source="qwen",
+        chat_model=service.llm_config.chat_model,
+        api_key=api_key,
+    )
+    
+    full_prompt = f"{system_prompt}\n\n{prompt}"
+    generated_content = await llm_client.summarize(
+        text=full_prompt,
+        system_prompt=system_prompt,
+    )
+
+    # Write to file
+    with open(output_file, "w", encoding="utf-8") as f:
+        f.write(generated_content)
+
+    return True
+
+
+async def main():
+    """
+    Extract skills from agent logs using incremental memory updates with LazyLLM.
+
+    This example demonstrates INCREMENTAL LEARNING:
+    1. Process files ONE BY ONE
+    2. Each file UPDATES existing memory
+    3. Category summaries EVOLVE with each new file
+    4. Final output shows accumulated knowledge
+    """
+    print("Example 5: Incremental Skill Extraction with LazyLLM")
+    print("-" * 60)
+
+    # Get LazyLLM API key from environment
+    api_key = os.getenv("LAZYLLM_QWEN_API_KEY")
+    if not api_key:
+        msg = "Please set LAZYLLM_QWEN_API_KEY environment variable"
+        raise ValueError(msg)
+
+    # Custom config for skill extraction
+    skill_prompt = """
+    You are analyzing an agent execution log. Extract the key actions taken, their outcomes, and lessons learned.
+
+    For each significant action or phase:
+
+    1. **Action/Phase**: What was being attempted?
+    2. **Status**: SUCCESS ✅ or FAILURE ❌
+    3. **What Happened**: What was executed
+    4. **Outcome**: What worked/failed, metrics
+    5. **Root Cause** (for failures): Why did it fail?
+    6. **Lesson**: What did we learn?
+    7. **Action Items**: Concrete steps for next time
+
+    **IMPORTANT**:
+    - Focus on ACTIONS and outcomes
+    - Be specific: include actual metrics, errors, timing
+    - ONLY extract information explicitly stated
+    - DO NOT infer or assume information
+
+    Extract ALL significant actions from the text:
+
+    Text: {resource}
+    """
+
+    # Define custom categories
+    skill_categories = [
+        {"name": "deployment_execution", "description": "Deployment actions, traffic shifting, environment management"},
+        {
+            "name": "pre_deployment_validation",
+            "description": "Capacity validation, configuration checks, readiness verification",
+        },
+        {
+            "name": "incident_response_rollback",
+            "description": "Incident response, error detection, rollback procedures",
+        },
+        {
+            "name": "performance_monitoring",
+            "description": "Metrics monitoring, performance analysis, bottleneck detection",
+        },
+        {"name": "database_management", "description": "Database capacity planning, optimization, schema changes"},
+        {"name": "testing_verification", "description": "Testing, smoke tests, load tests, verification"},
+        {"name": "infrastructure_setup", "description": "Kubernetes, containers, networking configuration"},
+        {"name": "lessons_learned", "description": "Key reflections, root cause analyses, action items"},
+    ]
+
+    memorize_config = {
+        "memory_types": ["skill"],
+        "memory_type_prompts": {"skill": skill_prompt},
+        "memory_categories": skill_categories,
+    }
+
+    # Initialize service with LazyLLM backend using llm_profiles
+    # The "default" profile is required and used as the primary LLM configuration
+    service = MemoryService(
+        llm_profiles={
+            "default": {
+                "client_backend": "lazyllm_backend",
+                "source": "qwen",
+                "chat_model": "qwen-plus",
+                "vlm_model": "qwen-vl-plus",
+                "embed_model": "text-embedding-v3",
+                "stt_model": "qwen-audio-turbo",
+                "api_key": api_key,
+            },
+            "embedding": {
+                "client_backend": "lazyllm_backend",
+                "source": "qwen",
+                "chat_model": "qwen-plus",
+                "vlm_model": "qwen-vl-plus",
+                "embed_model": "text-embedding-v3",
+                "stt_model": "qwen-audio-turbo",
+                "api_key": api_key,
+            },
+        },
+        memorize_config=memorize_config,
+    )
+
+    # Resources to process
+    resources = [
+        ("examples/resources/logs/log1.txt", "document"),
+        ("examples/resources/logs/log2.txt", "document"),
+        ("examples/resources/logs/log3.txt", "document"),
+    ]
+
+    # Process each resource sequentially
+    print("\nProcessing files with LazyLLM...")
+    all_skills = []
+    categories = []
+
+    for idx, (resource_file, modality) in enumerate(resources, 1):
+        if not os.path.exists(resource_file):
+            print(f"⚠ File not found: {resource_file}")
+            continue
+
+        try:
+            print(f"  Processing: {resource_file}")
+            result = await service.memorize(resource_url=resource_file, modality=modality)
+
+            # Extract skill items
+            for item in result.get("items", []):
+                if item.get("memory_type") == "skill":
+                    all_skills.append({"skill": item.get("summary", ""), "source": os.path.basename(resource_file)})
+
+            # Categories are returned in the result and updated after each memorize call
+            categories = result.get("categories", [])
+
+            # Generate intermediate skill.md
+            await generate_skill_md(
+                all_skills=all_skills,
+                service=service,
+                output_file=f"examples/output/skill_example_lazyllm/log_{idx}.md",
+                attempt_number=idx,
+                total_attempts=len(resources),
+                categories=categories,
+            )
+            print(f"    ✓ Extracted {len([s for s in all_skills if s['source'] == os.path.basename(resource_file)])} skills")
+
+        except Exception as e:
+            print(f"  ✗ Error processing {resource_file}: {e}")
+            import traceback
+            traceback.print_exc()
+
+    # Generate final comprehensive skill.md
+    await generate_skill_md(
+        all_skills=all_skills,
+        service=service,
+        output_file="examples/output/skill_example_lazyllm/skill.md",
+        attempt_number=len(resources),
+        total_attempts=len(resources),
+        categories=categories,
+        is_final=True,
+    )
+
+    print(f"\n" + "=" * 60)
+    print(f"✓ Processed {len([r for r in resources if os.path.exists(r[0])])} files, extracted {len(all_skills)} skills")
+    print(f"✓ Generated {len(categories)} categories")
+    print("✓ Output: examples/output/skill_example_lazyllm/")
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/examples/example_6_multimodal_memory_with_lazyllm.py b/examples/example_6_multimodal_memory_with_lazyllm.py
new file mode 100644
index 00000000..58a38bbe
--- /dev/null
+++ b/examples/example_6_multimodal_memory_with_lazyllm.py
@@ -0,0 +1,168 @@
+"""
+Example 6: Multimodal Processing -> Memory Category File (with LazyLLM)
+
+This example demonstrates how to process multiple modalities (images, documents)
+and generate a unified memory category JSON file using LazyLLM backend.
+
+Usage:
+    export LAZYLLM_QWEN_API_KEY=your_api_key
+    python examples/example_6_multimodal_memory_with_lazyllm.py
+"""
+
+import asyncio
+import os
+import sys
+from pathlib import Path
+
+# Add src to sys.path FIRST before importing memu
+project_root = Path(__file__).parent.parent
+src_path = str(project_root / "src")
+if src_path not in sys.path:
+    sys.path.insert(0, src_path)
+from memu.app import MemoryService
+
+# Add src to sys.path
+src_path = os.path.abspath("src")
+sys.path.insert(0, src_path)
+
+
+async def generate_memory_md(categories, output_dir):
+    """Generate concise markdown files for each memory category."""
+
+    os.makedirs(output_dir, exist_ok=True)
+
+    generated_files = []
+
+    for cat in categories:
+        name = cat.get("name", "unknown")
+        description = cat.get("description", "")
+        summary = cat.get("summary", "")
+
+        filename = f"{name}.md"
+        filepath = os.path.join(output_dir, filename)
+
+        with open(filepath, "w", encoding="utf-8") as f:
+            # Title
+            formatted_name = name.replace("_", " ").title()
+            f.write(f"# {formatted_name}\n\n")
+
+            if description:
+                f.write(f"*{description}*\n\n")
+
+            # Content - full version
+            if summary:
+                cleaned_summary = summary.replace("<content>", "").replace("</content>", "").strip()
+                f.write(f"{cleaned_summary}\n")
+            else:
+                f.write("*No content available*\n")
+
+        generated_files.append(filename)
+
+    return generated_files
+
+
+async def main():
+    """
+    Process multiple modalities (images and documents) to generate memory categories using LazyLLM.
+
+    This example:
+    1. Initializes MemoryService with LazyLLM backend
+    2. Processes documents and images
+    3. Extracts unified memory categories across modalities
+    4. Outputs the categories to files
+    """
+    print("Example 6: Multimodal Memory Processing with LazyLLM Backend")
+    print("-" * 60)
+
+    # Get LazyLLM API key from environment
+    api_key = os.getenv("LAZYLLM_QWEN_API_KEY")
+    if not api_key:
+        msg = "Please set LAZYLLM_QWEN_API_KEY environment variable"
+        raise ValueError(msg)
+
+    # Define custom categories for multimodal content
+    multimodal_categories = [
+        {"name": "technical_documentation", "description": "Technical documentation, guides, and tutorials"},
+        {
+            "name": "architecture_concepts",
+            "description": "System architecture, design patterns, and structural concepts",
+        },
+        {"name": "best_practices", "description": "Best practices, recommendations, and guidelines"},
+        {"name": "code_examples", "description": "Code snippets, examples, and implementation details"},
+        {"name": "visual_diagrams", "description": "Visual concepts, diagrams, charts, and illustrations from images"},
+    ]
+
+    # Initialize service with LazyLLM backend using llm_profiles
+    # The "default" profile is required and used as the primary LLM configuration
+    service = MemoryService(
+        llm_profiles={
+            "default": {
+                "client_backend": "lazyllm_backend",
+                "source": "qwen",
+                "chat_model": "qwen-plus",
+                "vlm_model": "qwen-vl-plus",
+                "embed_model": "text-embedding-v3",
+                "stt_model": "qwen-audio-turbo",
+                "api_key": api_key,
+            },
+            "embedding": {
+                "client_backend": "lazyllm_backend",
+                "source": "qwen",
+                "chat_model": "qwen-plus",
+                "vlm_model": "qwen-vl-plus",
+                "embed_model": "text-embedding-v3",
+                "stt_model": "qwen-audio-turbo",
+                "api_key": api_key,
+            },
+        },
+        memorize_config={"memory_categories": multimodal_categories},
+    )
+
+    # Resources to process (file_path, modality)
+    resources = [
+        ("examples/resources/docs/doc1.txt", "document"),
+        ("examples/resources/docs/doc2.txt", "document"),
+        ("examples/resources/images/image1.png", "image"),
+    ]
+
+    # Process each resource
+    print("\nProcessing resources with LazyLLM...")
+    total_items = 0
+    categories = []
+    for resource_file, modality in resources:
+        if not os.path.exists(resource_file):
+            print(f"⚠ File not found: {resource_file}")
+            continue
+
+        try:
+            print(f"  Processing: {resource_file} ({modality})")
+            result = await service.memorize(resource_url=resource_file, modality=modality)
+            total_items += len(result.get("items", []))
+            # Categories are returned in the result and updated after each memorize call
+            categories = result.get("categories", [])
+            print(f"    ✓ Extracted {len(result.get('items', []))} items")
+        except Exception as e:
+            print(f"  ✗ Error processing {resource_file}: {e}")
+            import traceback
+            traceback.print_exc()
+
+    # Write to output files
+    output_dir = "examples/output/multimodal_example_lazyllm"
+    os.makedirs(output_dir, exist_ok=True)
+
+    # 1. Generate individual Markdown files for each category
+    generated_files = await generate_memory_md(categories, output_dir)
+
+    print(f"\n" + "=" * 60)
+    print(f"✓ Processed {len([r for r in resources if os.path.exists(r[0])])} files, extracted {total_items} items")
+    print(f"✓ Generated {len(categories)} categories:")
+    for cat in categories:
+        print(f"  - {cat.get('name', 'unknown')}")
+    print(f"✓ Output files ({len(generated_files)}):")
+    for file in generated_files:
+        print(f"  - {os.path.join(output_dir, file)}")
+    print(f"✓ Output directory: {output_dir}/")
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/examples/output/conversation_example_lazyllm/activities.md b/examples/output/conversation_example_lazyllm/activities.md
new file mode 100644
index 00000000..f0e7799b
--- /dev/null
+++ b/examples/output/conversation_example_lazyllm/activities.md
@@ -0,0 +1,8 @@
+# activities
+## Learning
+- The user is learning about event-driven architecture and message queues
+- The user is researching OpenAPI specifications and tools like Swagger and Postman
+## Open Source Contributions
+- The user contributes to open-source projects, including a Python CLI tool used for automating deployment tasks
+## Physical Activities
+- The user goes to the gym 3–4 times a week after work around 7 PM
diff --git a/examples/output/conversation_example_lazyllm/experiences.md b/examples/output/conversation_example_lazyllm/experiences.md
new file mode 100644
index 00000000..f45ed0c1
--- /dev/null
+++ b/examples/output/conversation_example_lazyllm/experiences.md
@@ -0,0 +1,5 @@
+# experiences
+## Work-Related Stress
+- The user has been having trouble sleeping lately due to work stress related to leading a big product launch next month
+## Work-Life Balance
+- The user checks work emails late at night and is considering setting no-work hours to improve work-life balance
diff --git a/examples/output/conversation_example_lazyllm/goals.md b/examples/output/conversation_example_lazyllm/goals.md
new file mode 100644
index 00000000..1f64b315
--- /dev/null
+++ b/examples/output/conversation_example_lazyllm/goals.md
@@ -0,0 +1,6 @@
+# goals
+## Product Development Objectives
+- The user wants to build a SaaS product related to developer tools or automation
+- The user is planning to start prototyping a SaaS product for API testing and documentation automation in the next few months
+## Project Focus
+- The user is planning to build a tool for API testing and documentation with automated test generation
diff --git a/examples/output/conversation_example_lazyllm/habits.md b/examples/output/conversation_example_lazyllm/habits.md
new file mode 100644
index 00000000..499c35dd
--- /dev/null
+++ b/examples/output/conversation_example_lazyllm/habits.md
@@ -0,0 +1,6 @@
+# habits
+## Daily Routine
+- The user goes for a run every morning
+- The user goes to the gym 3-4 times a week, usually around 7 PM
+- The user drinks coffee throughout the day to stay alert
+- The user tends to check work emails late at night
diff --git a/examples/output/conversation_example_lazyllm/knowledge.md b/examples/output/conversation_example_lazyllm/knowledge.md
new file mode 100644
index 00000000..e9f7c82d
--- /dev/null
+++ b/examples/output/conversation_example_lazyllm/knowledge.md
@@ -0,0 +1 @@
+*No content available*
diff --git a/examples/output/conversation_example_lazyllm/opinions.md b/examples/output/conversation_example_lazyllm/opinions.md
new file mode 100644
index 00000000..3e1b0065
--- /dev/null
+++ b/examples/output/conversation_example_lazyllm/opinions.md
@@ -0,0 +1,3 @@
+# opinions
+## Beliefs about Technology and Innovation
+- The user believes there is room for an AI-powered, automated solution for API documentation and testing
diff --git a/examples/output/conversation_example_lazyllm/personal_info.md b/examples/output/conversation_example_lazyllm/personal_info.md
new file mode 100644
index 00000000..a8ca8b13
--- /dev/null
+++ b/examples/output/conversation_example_lazyllm/personal_info.md
@@ -0,0 +1,3 @@
+# personal_info
+## Core Traits
+- The user is not a morning person
diff --git a/examples/output/conversation_example_lazyllm/preferences.md b/examples/output/conversation_example_lazyllm/preferences.md
new file mode 100644
index 00000000..869de46d
--- /dev/null
+++ b/examples/output/conversation_example_lazyllm/preferences.md
@@ -0,0 +1,7 @@
+# preferences
+## Interests
+- The user is interested in system design and scalability patterns
+- The user likes food and nature
+- The user likes reading
+## Dietary Preferences
+- The user is trying to eat less meat
diff --git a/examples/output/conversation_example_lazyllm/relationships.md b/examples/output/conversation_example_lazyllm/relationships.md
new file mode 100644
index 00000000..e9f7c82d
--- /dev/null
+++ b/examples/output/conversation_example_lazyllm/relationships.md
@@ -0,0 +1 @@
+*No content available*
diff --git a/examples/output/conversation_example_lazyllm/work_life.md b/examples/output/conversation_example_lazyllm/work_life.md
new file mode 100644
index 00000000..dad4a6be
--- /dev/null
+++ b/examples/output/conversation_example_lazyllm/work_life.md
@@ -0,0 +1,11 @@
+# work_life
+## Professional Role
+- The user is a software engineer at TechCorp
+- The user has been programming for about 5 years
+- The user leads a big product launch at work
+## Technical Stack
+- The user works on backend systems using Python and Go
+- The user uses Kubernetes for orchestration and Redis for caching
+- The user is using Apache Kafka for event streaming
+## Monitoring & Observability
+- The user is responsible for monitoring and observability with Prometheus and Grafana
diff --git a/examples/output/multimodal_example_lazyllm/architecture_concepts.md b/examples/output/multimodal_example_lazyllm/architecture_concepts.md
new file mode 100644
index 00000000..9df92cf4
--- /dev/null
+++ b/examples/output/multimodal_example_lazyllm/architecture_concepts.md
@@ -0,0 +1,12 @@
+# Architecture Concepts
+
+*System architecture, design patterns, and structural concepts*
+
+# architecture_concepts
+## System Architecture
+- MemU processes inputs through a pipeline including preprocessing, summarization, embedding, classification, categorization, and persistent storage
+- MemU includes a MemoryService core for managing memory operations such as store, retrieve, update, and delete
+## Memory Classification
+- Memories in MemU are classified into types such as profile, event, knowledge, and behavior
+## Retrieval Mechanism
+- MemU's retrieval pipeline includes query rewriting, context expansion, recall, re-ranking, and result assembly
diff --git a/examples/output/multimodal_example_lazyllm/best_practices.md b/examples/output/multimodal_example_lazyllm/best_practices.md
new file mode 100644
index 00000000..4a152b36
--- /dev/null
+++ b/examples/output/multimodal_example_lazyllm/best_practices.md
@@ -0,0 +1,16 @@
+# Best Practices
+
+*Best practices, recommendations, and guidelines*
+
+# best_practices
+## Storage & Architecture
+- MemU supports flexible storage backends including SQLite, PostgreSQL, and in-memory options
+- MemU uses a vector search engine for semantic retrieval with dense embeddings and hybrid search capabilities
+## Memory Management
+- MemU dynamically assigns memories to default or custom semantic categories and maintains auto-updated summaries
+## Retrieval Strategies
+- MemU provides retrieval strategies including RAG-based vector search and LLM-based context-aware methods
+## Best Practices
+- Best practices for MemU emphasize memory quality, configuration tuning, performance optimization, and privacy compliance
+## Use Cases
+- MemU use cases include personal assistants, customer support, education, knowledge management, and agent workflows
diff --git a/examples/output/multimodal_example_lazyllm/code_examples.md b/examples/output/multimodal_example_lazyllm/code_examples.md
new file mode 100644
index 00000000..0ecd8842
--- /dev/null
+++ b/examples/output/multimodal_example_lazyllm/code_examples.md
@@ -0,0 +1,5 @@
+# Code Examples
+
+*Code snippets, examples, and implementation details*
+
+*No content available*
diff --git a/examples/output/multimodal_example_lazyllm/technical_documentation.md b/examples/output/multimodal_example_lazyllm/technical_documentation.md
new file mode 100644
index 00000000..6bd2b1f1
--- /dev/null
+++ b/examples/output/multimodal_example_lazyllm/technical_documentation.md
@@ -0,0 +1,12 @@
+# Technical Documentation
+
+*Technical documentation, guides, and tutorials*
+
+# technical_documentation
+## Framework Overview
+- MemU is an advanced agentic memory framework that supports multiple input modalities and enables AI agents to process, organize, and retrieve memories in a structured, semantically meaningful way
+## Integration & Development
+- MemU provides a Python API for integration and customization, with an open-source roadmap focused on long-term improvements
+## Limitations
+- The user cannot provide image analysis due to lack of visual interpretation capability
+- The user requested image analysis but was informed that the model cannot view or interpret visual content
diff --git a/examples/output/multimodal_example_lazyllm/visual_diagrams.md b/examples/output/multimodal_example_lazyllm/visual_diagrams.md
new file mode 100644
index 00000000..de11b271
--- /dev/null
+++ b/examples/output/multimodal_example_lazyllm/visual_diagrams.md
@@ -0,0 +1,5 @@
+# Visual Diagrams
+
+*Visual concepts, diagrams, charts, and illustrations from images*
+
+*No content available*
diff --git a/examples/output/skill_example_lazyllm/log_1.md b/examples/output/skill_example_lazyllm/log_1.md
new file mode 100644
index 00000000..1fbb2d7c
--- /dev/null
+++ b/examples/output/skill_example_lazyllm/log_1.md
@@ -0,0 +1,209 @@
+```yaml
+---
+name: production-microservice-deployment
+description: Production-ready guide for deploying a microservice using blue-green deployment strategy with monitoring, rollback safeguards, and lessons from real deployment attempts.
+version: 0.1.0
+status: Evolving
+---
+```
+
+# Production Microservice Deployment with Blue-Green Strategy
+
+## Introduction
+
+This guide provides a battle-tested procedure for safely deploying a microservice to production using the blue-green deployment strategy. It is intended for use when minimizing downtime and enabling rapid rollback are critical. The steps reflect lessons learned from active deployment attempts and focus on practical execution in cloud-native environments using Kubernetes and CI/CD pipelines.
+
+Use this guide during scheduled maintenance windows for major version updates or breaking changes where user impact must be contained.
+
+---
+
+## Deployment Context
+
+- **Strategy**: Blue-green deployment  
+- **Environment**: Kubernetes (EKS), AWS-hosted, Helm-managed services  
+- **Traffic Management**: Istio service mesh with weighted routing  
+- **Primary Goals**:
+  - Zero-downtime deployment
+  - Immediate rollback capability
+  - Controlled traffic shift with observability
+  - Validation of health before full cutover
+
+---
+
+## Pre-Deployment Checklist
+
+### 🔧 Infrastructure & Configuration
+- [x] New (green) environment provisioned and stable (`kubectl get nodes --selector=env=green`)  
+- [x] Helm chart version tagged and immutable (e.g., `my-service-1.4.0`)  
+- [x] ConfigMaps and Secrets verified for green environment (no dev defaults)  
+
+### 🛢️ Database
+- [x] Schema migrations are backward-compatible **(Critical)**  
+- [x] Migration scripts tested in staging with production-like data  
+- [ ] Downtime-free migration path confirmed (if applicable)  
+
+### 📊 Monitoring & Observability
+- [x] Prometheus metrics endpoints exposed on new pods  
+- [x] Grafana dashboards updated to include green service instance  
+- [x] Alertmanager rules cover deployment-phase anomalies (latency, error rate spikes)  
+- [x] Distributed tracing (Jaeger) enabled and sampled at 100% during cutover  
+
+### 🧪 Validation Readiness
+- [x] Smoke test suite available and passing against staging  
+- [x] Canaries configured to hit green service pre-cutover  
+- [x] Rollback image tagged and accessible in registry (`v1.3.9-rollback`)  
+
+---
+
+## Deployment Procedure
+
+> ⏱️ Estimated execution time: 18 minutes
+
+1. **Deploy Green Service**
+   ```bash
+   helm upgrade my-service-green ./charts/my-service \
+     --namespace production \
+     --set environment=green \
+     --set image.tag=v1.4.0 \
+     --install
+   ```
+
+2. **Wait for Pod Readiness**
+   ```bash
+   kubectl wait --for=condition=ready pod -l app=my-service,environment=green -n production --timeout=5m
+   ```
+   - ✅ Monitoring Point: All pods report `Ready` status within 5 minutes  
+   - ❌ If pending > 3 minutes: check resource quotas and node autoscaling
+
+3. **Run Smoke Tests Against Green**
+   ```bash
+   ./scripts/smoke-test.sh --target https://api-green.example.com
+   ```
+   - ✅ Expected: All 7 tests pass, response time < 800ms  
+   - ❌ Fail: Halt deployment, investigate logs and traces
+
+4. **Shift 5% Traffic to Green (Canary)**
+   Apply Istio traffic split:
+   ```yaml
+   apiVersion: networking.istio.io/v1alpha3
+   kind: VirtualService
+   metadata:
+     name: my-service-route
+   spec:
+     hosts:
+       - api.example.com
+     http:
+     - route:
+       - destination:
+           host: my-service-blue.production.svc.cluster.local
+         weight: 95
+       - destination:
+           host: my-service-green.production.svc.cluster.local
+         weight: 5
+   ```
+   ```bash
+   kubectl apply -f virtualservice-split.yaml
+   ```
+
+5. **Monitor Key Metrics (5 minutes)**
+   - Error rate (goal: < 0.5%)  
+   - P95 latency (< 1.2s)  
+   - Request volume consistency  
+   - Check Jaeger traces for failed spans
+
+6. **Shift 100% Traffic to Green**
+   Update weights:
+   ```yaml
+   - weight: 0    # blue
+   - weight: 100  # green
+   ```
+   ```bash
+   kubectl apply -f virtualservice-split.yaml
+   ```
+
+7. **Verify Full Cutover**
+   ```bash
+   curl -H "Host: api.example.com" http://ingress/status | grep "version=1.4.0"
+   ```
+
+8. **Decommission Blue (After 1 hour)**
+   ```bash
+   helm uninstall my-service-blue --namespace production
+   ```
+
+---
+
+## Rollback Procedure
+
+### When to Roll Back
+
+Roll back immediately if any of the following occur:
+- Error rate > 5% sustained over 2 minutes  
+- Latency P95 > 3s for 3+ minutes  
+- Database connection pool exhaustion observed  
+- Smoke test failure at any stage
+
+### Steps
+
+1. **Revert Traffic to Blue**
+   ```bash
+   kubectl patch virtualservice my-service-route --patch '
+   spec:
+     http:
+     - route:
+       - destination:
+           host: my-service-blue.production.svc.cluster.local
+         weight: 100
+       - destination:
+           host: my-service-green.production.svc.cluster.local
+         weight: 0'
+   ```
+
+2. **Confirm Health of Blue Service**
+   ```bash
+   kubectl get pods -l app=my-service,environment=blue -n production
+   ```
+   - Ensure all replicas are running and ready
+
+3. **Trigger Alert Acknowledgment**
+   - Manually acknowledge firing alerts in Alertmanager  
+   - Notify #prod-alerts: `@team Rollback initiated – green service degraded`
+
+4. **Expected Recovery Time**: < 90 seconds from rollback initiation
+
+---
+
+## Common Pitfalls & Solutions
+
+| Issue | Root Cause | Symptom | Solution |
+|------|-----------|--------|---------|
+| Green pods stuck in `Pending` | Node autoscaler not triggered | No new pods scheduled | Manually scale node group or reduce CPU requests temporarily |
+| Sudden 503s after cutover | Misconfigured readiness probe | Pods accept traffic before DB connection | Add `initialDelaySeconds: 30` to probe config |
+| Rollback fails due to blue already uninstalled | Premature cleanup | 503s across the board | Reinstall blue via Helm restore from last release revision |
+| Traces missing in Jaeger | Sampling rate too low | Incomplete trace visibility | Set `tracing.sample-rate: 100` during deployment window |
+
+---
+
+## Best Practices
+
+- Always keep the previous version deployable and tracked (tagged in Helm repository)  
+- Run smoke tests against green **before** any traffic shift  
+- Use immutable image tags — never `latest`  
+- Schedule deployments during low-traffic periods (e.g., 02:00–04:00 local ops time)  
+- Coordinate with SRE team for alert suspension/sensitivity adjustment during window
+
+> ✅ Expected timeline:  
+> - Preparation: 30 min  
+> - Execution: 18 min  
+> - Observation: 60 min  
+> - Total: ~110 minutes
+
+---
+
+## Key Takeaways
+
+1. **Backward-compatible schema changes are non-negotiable** — even minor migrations can break old instances during rollback.  
+2. **Readiness probes must reflect actual service dependencies**, especially database and cache connectivity.  
+3. **Never decommission the blue stack until post-cutover stability is confirmed** — rollback without it is impossible.  
+4. **Observability must be pre-wired** — ad-hoc dashboard creation delays incident response.  
+5. **Automated smoke tests are essential** — manual validation is unreliable under pressure.
\ No newline at end of file
diff --git a/examples/output/skill_example_lazyllm/log_2.md b/examples/output/skill_example_lazyllm/log_2.md
new file mode 100644
index 00000000..4757116a
--- /dev/null
+++ b/examples/output/skill_example_lazyllm/log_2.md
@@ -0,0 +1,131 @@
+```markdown
+---
+name: production-microservice-deployment
+description: Production-ready guide for deploying microservices using a blue-green deployment strategy with real-world lessons learned from partial deployment attempts.
+version: 0.2.0
+status: Evolving
+---
+
+# Production Microservice Deployment with Blue-Green Strategy
+
+## Introduction
+
+This guide provides a practical, step-by-step procedure for safely deploying a microservice in production using the blue-green deployment pattern. It is intended for use when minimizing downtime and enabling rapid rollback are critical. The procedures, checks, and pitfalls documented here are derived from two prior deployment attempts (1 successful phase, 1 partial failure), capturing actionable insights from real operational experience.
+
+Use this guide during scheduled production releases where traffic switching, data consistency, and observability are required.
+
+## Deployment Context
+
+- **Strategy**: Blue-green deployment using Kubernetes `Service` selector switch
+- **Environment**: Kubernetes 1.25+ (EKS), AWS RDS backend, Prometheus/Grafana/Loki stack
+- **Goals**:
+  - Zero-downtime cutover
+  - Sub-5-minute rollback if thresholds breached
+  - Full observability during transition
+  - Data schema compatibility across versions
+
+## Pre-Deployment Checklist
+
+### Infrastructure & Configuration
+- [ ] **(Critical)** New green environment (v2) pods are running and passing readiness/liveness probes  
+- [ ] **(Critical)** Database schema changes (if any) are backward compatible with both v1 (blue) and v2 (green)
+- [ ] Green service endpoint (`svc-green`) exists and routes to v2 pods
+- [ ] Blue service endpoint (`svc-blue`) remains active and unchanged
+
+### Monitoring & Observability
+- [ ] **(Critical)** Prometheus metrics for request rate, error rate, and latency are available per version (via `version` label)
+- [ ] Loki logs are tagged with `app_version` and searchable by deployment color
+- [ ] Grafana dashboard loaded with real-time view of both blue and green services
+
+### Traffic & Networking
+- [ ] Current production traffic is routed through `svc-production` → `version=blue`
+- [ ] `svc-production` selector can be patched atomically to switch to `version=green`
+- [ ] DNS TTLs and client-side caching do not interfere with immediate routing control
+
+## Deployment Procedure
+
+1. **Deploy v2 (Green) Pods**
+   ```bash
+   kubectl apply -f deployment-v2.yaml
+   ```
+   - Wait until all pods are `Running` and pass readiness checks:
+     ```bash
+     kubectl get pods -l app=my-microservice,version=v2
+     ```
+
+2. **Validate Green Service Internally**
+   - Send test traffic via port-forward:
+     ```bash
+     kubectl port-forward svc/svc-green 8080:80 &
+     curl http://localhost:8080/health
+     ```
+   - Confirm logs show `version=v2` and no startup errors.
+
+3. **Switch Traffic to Green**
+   ```bash
+   kubectl patch svc svc-production -p '{"spec": {"selector": {"version": "v2"}}}'
+   ```
+   - This switches all traffic from blue to green atomically.
+
+4. **Monitor Transition (First 5 Minutes)**
+   - **Monitoring Points**:
+     - Error rate (target: <0.5%)
+     - P95 latency (<200ms)
+     - Request volume parity (match pre-switch levels)
+     - Pod restarts or crashes in v2
+   - Use Grafana dashboard to compare v1 (historical) vs v2 (live) metrics.
+
+5. **Stabilization Check**
+   - After 5 minutes of stable performance:
+     - Confirm no alerts triggered
+     - Verify business logic via synthetic transaction
+     - Log success: `Deployment v2 now serving production traffic`
+
+## Rollback Procedure
+
+### When to Roll Back
+Roll back immediately if **any** of the following occur within 10 minutes post-cutover:
+- Error rate > 2% sustained over 2 minutes
+- Latency P95 > 800ms for >3 minutes
+- Database connection pool saturation in v2
+- Any critical alert from monitoring system
+
+### Rollback Command
+```bash
+kubectl patch svc svc-production -p '{"spec": {"selector": {"version": "v1"}}}'
+```
+
+- Expected recovery time: **< 3 minutes** (limited by kube-proxy sync interval)
+- Post-rollback:
+  - Confirm v1 pods absorb traffic (check metrics)
+  - Preserve v2 logs for root cause analysis
+  - Trigger incident review if rollback executed
+
+## Common Pitfalls & Solutions
+
+| Issue | Root Cause | Symptom | Solution |
+|------|-----------|--------|----------|
+| 503 errors after cutover | Misconfigured readiness probe in v2 | Pods running but not receiving traffic | Fix `/health` endpoint logic; re-roll v2 before switching |
+| DB lock contention | v2 introduced long-lived transaction | Increased latency and connection pool exhaustion | Revert code change; apply statement-level timeout |
+| Logs missing version tag | Incorrect label injection in init container | Inability to filter v2 logs in Loki | Patch DaemonSet to inject `app_version` env var |
+| Partial traffic switch | Sticky sessions at LB layer | Mixed v1/v2 traces in Jaeger | Disable session affinity on ALB before deployment |
+
+## Best Practices
+
+- **Test Selector Patch Locally**: Validate `kubectl patch` syntax in staging first
+- **Pre-warm caches**: If applicable, trigger cache population in v2 before cutover
+- **Atomic Switch Only**: Never use weighted routing unless A/B testing is goal
+- **Timeline Expectations**:
+  - v2 rollout: 2–3 minutes
+  - Validation: 2 minutes
+  - Cutover + monitoring: 5–10 minutes
+  - Total window: ≤15 minutes
+
+## Key Takeaways
+
+1. **Selector-based switching is reliable only if labels and selectors are rigorously tested pre-deploy**
+2. **Backward-compatible database schema changes are non-negotiable—v1 must tolerate v2 writes**
+3. **Real-time observability by version is critical—without it, rollback decisions are blind**
+4. **A failed deployment is acceptable; a slow or uncontrolled rollback is not**
+5. **Always preserve pre-cutover state—never scale down blue until green is proven stable**
+```
\ No newline at end of file
diff --git a/examples/output/skill_example_lazyllm/log_3.md b/examples/output/skill_example_lazyllm/log_3.md
new file mode 100644
index 00000000..ad1fdfb3
--- /dev/null
+++ b/examples/output/skill_example_lazyllm/log_3.md
@@ -0,0 +1,156 @@
+```yaml
+---
+name: production-microservice-deployment
+description: Production-ready guide for deploying a microservice using a blue-green deployment strategy, based on real-world execution and lessons learned.
+version: 0.3.0
+status: Evolving
+---
+```
+
+# Production Microservice Deployment with Blue-Green Strategy
+
+## Introduction
+
+This guide provides a field-validated procedure for safely deploying a microservice in production using the blue-green deployment strategy. It is intended for use when minimizing downtime and enabling rapid rollback are critical. The steps and checks herein are derived from three iterative deployment attempts, incorporating observed failures and optimizations.
+
+Use this guide during scheduled production releases where service continuity and observability are required.
+
+---
+
+## Deployment Context
+
+- **Strategy**: Blue-green deployment via Kubernetes `Service` selector switch  
+- **Environment**: Kubernetes (v1.25+), Helm-managed workloads, Istio ingress  
+- **Goals**:
+  - Zero-downtime deployment
+  - Sub-5-minute rollback if failure detected
+  - Full observability during transition
+  - Minimal impact on downstream consumers
+
+---
+
+## Pre-Deployment Checklist
+
+### Database
+- [ ] **Verify schema compatibility** with new version — *Critical*  
+  Run: `helm test db-checks --namespace=db`
+- [ ] Confirm migration scripts are idempotent and version-tagged
+
+### Monitoring & Observability
+- [ ] **Ensure metrics endpoints are enabled** in new image — *Critical*  
+  Check: `/metrics` returns 200 in staging
+- [ ] Validate Prometheus scrape config includes new pod labels
+- [ ] Set up dashboard panels for latency, error rate, and request volume per color
+
+### Networking
+- [ ] Confirm Istio virtual service routes do not override color selectors
+- [ ] Verify readiness/liveness probes are tuned for startup time (new version may be slower)
+
+### Rollback Readiness
+- [ ] **Pre-stage rollback script** with known-good revision — *Critical*  
+  Store: `rollback-v2.1.0.yaml` in secure location
+- [ ] Confirm `kubectl` context points to production cluster
+
+---
+
+## Deployment Procedure
+
+1. **Deploy Green Instance (inactive)**
+   ```bash
+   helm upgrade --install mysvc-green ./charts/microservice \
+     --namespace services \
+     --set replicaCount=3 \
+     --set image.tag=v2.2.0 \
+     --set service.name=mysvc-green
+   ```
+
+2. **Wait for Pod Readiness**
+   ```bash
+   kubectl wait --for=condition=ready pod -l app=mysvc,version=v2.2.0 -n services --timeout=180s
+   ```
+
+3. **Run Smoke Tests Against Green**
+   ```bash
+   curl -H "x-bypass-router: green" http://mysvc.prod.svc.cluster.local/health
+   # Expected: 200 OK + "green", no errors in logs
+   ```
+
+4. **Switch Traffic: Blue → Green**
+   Update service selector to point to green version:
+   ```bash
+   kubectl patch svc mysvc -n services -p '{"spec": {"selector": {"version": "v2.2.0"}}}'
+   ```
+
+5. **Monitor Transition (First 5 Minutes)**
+   - Watch for:
+     - Error rate > 1% (via Grafana or `kubectl logs`)
+     - Latency increase > 2x baseline
+     - Drop in request volume (consumer breakage)
+   - Use:
+     ```bash
+     kubectl top pods -n services -l app=mysvc
+     ```
+
+6. **Confirm Stability**
+   - Sustained health for 10 minutes
+   - No alerts triggered
+   - Tracing shows full request flow
+
+---
+
+## Rollback Procedure
+
+### When to Roll Back
+- HTTP 5xx error rate > 2% sustained over 2 minutes
+- Latency P95 > 1.5x baseline for 3+ minutes
+- Database connection pool exhaustion observed
+- Downstream services report failures
+
+### Rollback Steps
+1. **Immediately reroute traffic to blue (known stable):**
+   ```bash
+   kubectl patch svc mysvc -n services -p '{"spec": {"selector": {"version": "v2.1.0"}}}'
+   ```
+
+2. **Verify blue instance health:**
+   ```bash
+   kubectl get pods -n services -l app=mysvc,version=v2.1.0
+   ```
+
+3. **Expected Recovery Time**: < 4 minutes from rollback initiation to full restoration.
+
+---
+
+## Common Pitfalls & Solutions
+
+| Issue | Root Cause | Symptom | Solution |
+|------|-----------|--------|----------|
+| Green pods crash after deploy | Missing config map mount | CrashLoopBackOff in logs | Explicitly declare all configMaps in Helm values |
+| Service selector fails to switch | Misaligned pod labels | No traffic to green | Double-check label selectors in deployment vs service |
+| High latency post-switch | Cold cache in new service | P95 spikes at switchover | Warm caches via pre-load job before cutover |
+| Rollback fails due to blue scale-down | Auto-scaler terminated old pods | No healthy blue pods | Keep blue instance alive for 15 min post-switch |
+
+---
+
+## Best Practices
+
+- **Keep both blue and green active during monitoring window** (min 15 minutes)  
+- **Automate smoke tests** — run post-deploy, pre-cutover  
+- **Tag images immutably** — never reuse `latest`  
+- **Time deployments outside peak hours** — target 02:00–04:00 UTC  
+- **Expected timeline**:
+  - Deploy green: 2 min
+  - Wait & test: 3 min
+  - Cutover: 1 min
+  - Monitor: 10 min
+  - Total: ~16 minutes
+
+---
+
+## Key Takeaways
+
+1. **Label consistency is critical** — mismatched selectors cause silent failures.
+2. **Never assume backward compatibility** — always verify DB/schema interoperability.
+3. **Rollback must be faster than detection** — automate the switch.
+4. **Observability starts before cutover** — monitor green *before* routing traffic.
+5. **Human error is the largest risk** — use pre-checked scripts, not CLI guesswork.
\ No newline at end of file
diff --git a/examples/output/skill_example_lazyllm/skill.md b/examples/output/skill_example_lazyllm/skill.md
new file mode 100644
index 00000000..ef640a2d
--- /dev/null
+++ b/examples/output/skill_example_lazyllm/skill.md
@@ -0,0 +1,150 @@
+```markdown
+---
+name: production-microservice-deployment
+description: Production-ready guide for deploying a microservice using blue-green deployment strategy with zero-downtime, validated monitoring, and rapid rollback capability.
+version: 1.0.0
+status: Production-Ready
+---
+
+# Production Microservice Deployment with Blue-Green Strategy
+
+## Introduction
+
+This guide provides a battle-tested procedure for safely deploying a microservice to production using the blue-green deployment strategy. It ensures zero downtime, enables immediate rollback on failure, and integrates real-time validation via observability tools. Use this guide for any stateless microservice upgrade in Kubernetes-based environments where service continuity is critical.
+
+## Deployment Context
+
+- **Strategy**: Blue-green deployment  
+- **Environment**: Kubernetes (EKS), AWS infrastructure, Istio service mesh  
+- **Goals**:
+  - Zero downtime during deployment
+  - Traffic switch within 30 seconds
+  - Full observability during cutover
+  - Rollback within 2 minutes if thresholds breached
+  - Minimal blast radius
+
+## Pre-Deployment Checklist
+
+### Infrastructure & Configuration
+- [x] **(Critical)** New blue environment (v2) pods are running and ready (`kubectl get pods -l app=<service>,version=v2`)
+- [x] **(Critical)** All secrets and configmaps mounted correctly in v2 pods
+- [x] Readiness and liveness probes configured and passing for v2
+
+### Database
+- [x] **(Critical)** Schema migrations are backward-compatible and applied *before* deployment
+- [x] No pending data backfills or long-running jobs blocking cutover
+
+### Monitoring & Observability
+- [x] **(Critical)** Prometheus metrics endpoints exposed and scraped for v2
+- [x] Grafana dashboards updated to include v2 version filtering
+- [x] Alertmanager rules evaluate both v1 and v2 independently
+- [x] Distributed tracing (Jaeger) enabled for service mesh traffic
+
+### Traffic Management
+- [x] Istio VirtualService configured with named subsets (`blue`/`green`)  
+- [x] Initial traffic weight set to 0% for new version (blue)
+
+### Validation
+- [x] Smoke test suite available and passes against staging
+- [x] Synthetic health check endpoint (`/live` and `/ready`) accessible and returning 200
+
+## Deployment Procedure
+
+1. **Deploy v2 Artifacts**
+   ```bash
+   kubectl apply -f deploy/v2-deployment.yaml
+   kubectl apply -f deploy/service.yaml
+   ```
+
+2. **Wait for Pod Readiness**
+   ```bash
+   kubectl wait --for=condition=ready pod -l app=<service>,version=v2 --timeout=180s
+   ```
+
+3. **Apply Istio Traffic Shift (100% to Blue)**
+   ```bash
+   kubectl apply -f istio/virtualservice-blue.yaml
+   ```
+   > `virtualservice-blue.yaml` sets traffic weight: blue=100, green=0
+
+4. **Monitor Key Metrics (First 5 Minutes)**
+   - HTTP 5xx rate < 0.5%
+   - P99 latency < 800ms
+   - Error logs per second < 2
+   - Circuit breaker open count = 0
+   - Use:
+     ```bash
+     kubectl top pods -l app=<service>,version=v2
+     ```
+
+5. **Run Smoke Tests Against Live Endpoint**
+   ```bash
+   ./scripts/smoke-test.sh https://<service>/health-check
+   ```
+
+6. **Confirm Stability (10-Minute Hold)**
+   - Watch dashboards continuously
+   - Verify no alerts triggered
+   - Confirm user transaction traces succeed
+
+7. **Promote v2 to Production Label**
+   ```bash
+   kubectl label deployment <service>-v2 env=prod --overwrite
+   ```
+
+## Rollback Procedure
+
+### When to Rollback
+Rollback immediately if **any** of the following occur:
+- HTTP 5xx rate ≥ 5% sustained over 2 minutes
+- P99 latency > 2s for 3 consecutive minutes
+- Smoke test fails
+- Critical alert fires (e.g., DB connection pool exhaustion)
+
+### Steps
+
+1. **Revert Traffic to Green (v1)**
+   ```bash
+   kubectl apply -f istio/virtualservice-green.yaml
+   ```
+   > Switches 100% traffic back to stable v1
+
+2. **Verify Rollback Success**
+   ```bash
+   kubectl get virtualservice <service> -o jsonpath='{.spec.http[0].route}'
+   # Output should show green subset at 100%
+   ```
+
+3. **Monitor Recovery**
+   - Expected recovery time: ≤ 2 minutes
+   - Confirm metrics return to baseline
+   - Ensure no cascading failures in dependent services
+
+## Common Pitfalls & Solutions
+
+| Issue | Root Cause | Symptom | Solution |
+|------|-----------|--------|----------|
+| 5xx spike after cutover | Missing CORS headers in v2 | Clients blocked | Revert; add `Access-Control-Allow-Origin` header |
+| Pods stuck in `CrashLoopBackOff` | Incorrect secret mount path | Container exits with code 1 | Check `kubectl describe pod`, verify volumeMount paths match |
+| Latency degradation | Unindexed query introduced | DB CPU > 85%, slow traces | Rollback; add index; retest in staging |
+| Partial rollout due to mislabeled pods | Version label typo in YAML | Some traffic routed incorrectly | Fix labels; redeploy; validate with `kubectl get pods -L version` |
+
+## Best Practices
+
+- **Always test blue-green failover weekly** in pre-prod using automation
+- **Use canary first**: Route 1% of production traffic to v2 before full blue-green
+- **Automate smoke tests** as part of CI/CD pipeline
+- **Keep both versions running for 1 hour post-cutover** before scaling down v1
+- **Expected timeline**:
+  - Deployment: 4 minutes
+  - Monitoring window: 10 minutes
+  - Total execution: ≤ 15 minutes
+
+## Key Takeaways
+
+1. Backward-compatible schema changes are non-negotiable — always deploy DB changes ahead of application updates.
+2. Misconfigured Istio subsets cause partial outages — validate routing rules with dry-run checks.
+3. Real-time observability is essential — without live dashboards, you’re flying blind during cutover.
+4. Automated smoke tests catch integration issues missed in staging.
+5. Rollback speed determines incident impact — practice it like a fire drill.
+```
\ No newline at end of file
diff --git a/src/memu/app/service.py b/src/memu/app/service.py
index 3842335d..01f99d81 100644
--- a/src/memu/app/service.py
+++ b/src/memu/app/service.py
@@ -24,6 +24,7 @@
 from memu.database.factory import build_database
 from memu.database.interfaces import Database
 from memu.llm.http_client import HTTPLLMClient
+from memu.llm.lazyllm_client import LazyLLMClient
 from memu.llm.wrapper import (
     LLMCallMetadata,
     LLMClientWrapper,
@@ -115,6 +116,15 @@ def _init_llm_client(self, config: LLMConfig | None = None) -> Any:
                 endpoint_overrides=cfg.endpoint_overrides,
                 embed_model=cfg.embed_model,
             )
+        elif backend == "lazyllm_backend":
+            return LazyLLMClient(
+                source=cfg.source,
+                chat_model=cfg.chat_model,
+                vlm_model=cfg.vlm_model,
+                embed_model=cfg.embed_model,
+                stt_model=cfg.stt_model,
+                api_key=cfg.api_key,
+            )
         else:
             msg = f"Unknown llm_client_backend '{cfg.client_backend}'"
             raise ValueError(msg)
diff --git a/src/memu/app/settings.py b/src/memu/app/settings.py
index 4949e5ff..0487763e 100644
--- a/src/memu/app/settings.py
+++ b/src/memu/app/settings.py
@@ -99,8 +99,12 @@ class LLMConfig(BaseModel):
     chat_model: str = Field(default="gpt-4o-mini")
     client_backend: str = Field(
         default="sdk",
-        description="Which LLM client backend to use: 'httpx' (httpx) or 'sdk' (official OpenAI).",
+        description="Which LLM client backend to use: 'httpx' (httpx) , 'sdk' (official OpenAI) or 'lazyllm_backend'",
     )
+    # setup LazyLLM 
+    source: str = Field(default="qwen", description="LLM source for lazyllm backend")
+    vlm_model: str = Field(default="qwen-vl-plus", description="Vision language model for lazyllm")
+    stt_model: str = Field(default="qwen-audio-turbo", description="Speech-to-text model for lazyllm")
     endpoint_overrides: dict[str, str] = Field(
         default_factory=dict,
         description="Optional overrides for HTTP endpoints (keys: 'chat'/'summary').",
diff --git a/src/memu/llm/lazyllm_client.py b/src/memu/llm/lazyllm_client.py
new file mode 100644
index 00000000..4a881a16
--- /dev/null
+++ b/src/memu/llm/lazyllm_client.py
@@ -0,0 +1,93 @@
+from typing import Any
+import logging
+from pathlib import Path
+import asyncio
+import lazyllm
+from lazyllm import LOG
+
+class LazyLLMClient:
+    DEFAULT_SOURCE = 'qwen'
+    DEFAULT_MODELS = {
+            'llm': 'qwen-plus',
+            'vlm': 'qwen-vl-plus',
+            'embed': 'text-embedding-v3',
+            'stt': 'qwen-audio-turbo',
+    }
+
+    def __init__(self,
+                  *,
+                source: str = None,
+                chat_model: str = None,
+                vlm_model: str = None,
+                embed_model: str = None,
+                stt_model: str = None,
+                api_key: str = None
+            ):
+        self.source = source or self.DEFAULT_SOURCE
+        self.chat_model = chat_model or self.DEFAULT_MODELS['llm']
+        self.vlm_model = vlm_model or self.DEFAULT_MODELS['vlm']
+        self.embed_model = embed_model or self.DEFAULT_MODELS['embed']
+        self.stt_model = stt_model or self.DEFAULT_MODELS['stt']
+        self.api_key = api_key
+
+    async def _call_async(self, client: Any, *args: Any, **kwargs: Any) -> Any:
+        '''异步调用 lazyllm client'''
+        if kwargs:
+            return await asyncio.to_thread(lambda: client(*args, **kwargs))
+        else:
+            return await asyncio.to_thread(lambda: client(*args))
+
+
+    async def summarize(
+                        self,
+                        text: str,
+                        *,
+                        max_tokens: int | None = None,
+                        system_prompt: str | None = None,
+                    ) -> str:
+        client = lazyllm.OnlineModule(source=self.source, model=self.chat_model, type='llm')
+        prompt = system_prompt or 'Summarize the text in one short paragraph.'
+        full_prompt = f'{prompt}\n\ntext:\n{text}'
+        LOG.debug(f'Summarizing text with {self.source}/{self.chat_model}')
+        response = await self._call_async(client, full_prompt)
+        return response
+            
+    async def vision(
+                    self,
+                    prompt: str,
+                    image_path: str,
+                    *,
+                    max_tokens: int | None = None,
+                    system_prompt: str | None = None,
+                ) -> tuple[str, Any]:
+        client = lazyllm.OnlineModule(source=self.source, model=self.vlm_model, type='vlm')
+        # Combine system_prompt and prompt if system_prompt exists
+        full_prompt = prompt
+        if system_prompt:
+            full_prompt = f'{system_prompt}\n\n{prompt}'
+        LOG.debug(f'Processing image with {self.source}/{self.vlm_model}: {image_path}')
+        # LazyLLM VLM accepts prompt as first positional argument and image_path as keyword argument
+        response = await self._call_async(client, full_prompt, image_path=image_path)
+        return response, None
+
+    async def embed(
+                    self,
+                    texts: list[str],
+                    batch_size: int = 10, # optional
+                ) -> list[list[float]]:
+        client = lazyllm.OnlineModule(source=self.source, model=self.embed_model, type='embed')
+        LOG.debug(f'embed {len(texts)} texts with {self.source}/{self.embed_model}')
+        response = await self._call_async(client, texts)
+        return response
+
+    async def transcribe(
+                        self,
+                        audio_path: str,
+                        language: str | None = None,
+                        prompt: str | None = None,
+                    ) -> str:
+        client = lazyllm.OnlineModule(source=self.source, model=self.stt_model, type='stt')
+        LOG.debug(f'Transcribing audio with {self.source}/{self.stt_model}: {audio_path}')
+        response = await self._call_async(client, audio_path)
+        return response
+    
\ No newline at end of file
diff --git a/tests/test_lazyllm.py b/tests/test_lazyllm.py
new file mode 100644
index 00000000..81863f46
--- /dev/null
+++ b/tests/test_lazyllm.py
@@ -0,0 +1,121 @@
+#!/usr/bin/env python3
+"""
+Quick test script to verify LazyLLM backend configuration and basic functionality.
+
+Usage:
+    export LAZYLLM_API_KEY=your_api_key
+    python examples/test_lazyllm.py
+"""
+
+import asyncio
+import os
+import sys
+
+# Add src to sys.path
+src_path = os.path.abspath("src")
+sys.path.insert(0, src_path)
+
+from memu.llm.lazyllm_client import LazyLLMClient
+
+
+async def test_lazyllm_client():
+    """Test LazyLLMClient with basic operations."""
+    
+    print("LazyLLM Backend Test")
+    print("=" * 60)
+    
+    # Get API key from environment
+    api_key = os.getenv("LAZYLLM_API_KEY")
+    if not api_key:
+        print("❌ Error: Please set LAZYLLM_API_KEY environment variable")
+        print("   export LAZYLLM_API_KEY=your_api_key")
+        return False
+    
+    print(f"✓ API key found: {api_key[:20]}...")
+    
+    # Initialize client
+    try:
+        client = LazyLLMClient(
+            source="qwen",
+            chat_model="qwen-plus",
+            vlm_model="qwen-vl-plus",
+            embed_model="text-embedding-v3",
+            stt_model="qwen-audio-turbo",
+            base_url="",
+            api_key=api_key
+        )
+        print("✓ LazyLLMClient initialized successfully")
+    except Exception as e:
+        print(f"❌ Failed to initialize LazyLLMClient: {e}")
+        return False
+    
+    # Test 1: Summarization
+    print("\n[Test 1] Testing summarization...")
+    try:
+        test_text = "这是一段关于Python编程的文本。Python是一种高级编程语言，具有简单易学的语法。它被广泛用于数据分析、机器学习和Web开发。"
+        result = await client.summarize(test_text)
+        print(f"✓ Summarization successful")
+        print(f"  Result: {result[:100]}...")
+    except Exception as e:
+        print(f"❌ Summarization failed: {e}")
+        import traceback
+        traceback.print_exc()
+    
+    # Test 2: Embedding
+    print("\n[Test 2] Testing embedding...")
+    try:
+        test_texts = ["Hello world", "How are you", "Nice to meet you"]
+        embeddings = await client.embed(test_texts)
+        print(f"✓ Embedding successful")
+        print(f"  Generated {len(embeddings)} embeddings")
+        if embeddings and embeddings[0]:
+            print(f"  Embedding dimension: {len(embeddings[0])}")
+    except Exception as e:
+        print(f"❌ Embedding failed: {e}")
+        import traceback
+        traceback.print_exc()
+    
+    # Test 3: Vision (requires image file)
+    print("\n[Test 3] Testing vision...")
+    test_image_path = "examples/resources/images/sample.jpg"
+    if os.path.exists(test_image_path):
+        try:
+            result, response = await client.vision(
+                prompt="描述这张图片的内容",
+                image_path=test_image_path
+            )
+            print(f"✓ Vision successful")
+            print(f"  Result: {result[:100]}...")
+        except Exception as e:
+            print(f"❌ Vision failed: {e}")
+            import traceback
+            traceback.print_exc()
+    else:
+        print(f"⚠ Skipped: Test image not found at {test_image_path}")
+    
+    # Test 4: Transcription (requires audio file)
+    print("\n[Test 4] Testing transcription...")
+    test_audio_path = "examples/resources/audio/sample.wav"
+    if os.path.exists(test_audio_path):
+        try:
+            result, response = await client.transcribe(
+                audio_path=test_audio_path,
+                language="zh"
+            )
+            print(f"✓ Transcription successful")
+            print(f"  Result: {result[:100]}...")
+        except Exception as e:
+            print(f"❌ Transcription failed: {e}")
+            import traceback
+            traceback.print_exc()
+    else:
+        print(f"⚠ Skipped: Test audio not found at {test_audio_path}")
+    
+    print("\n" + "=" * 60)
+    print("✓ LazyLLM backend tests completed!")
+    return True
+
+
+if __name__ == "__main__":
+    success = asyncio.run(test_lazyllm_client())
+    sys.exit(0 if success else 1)

From 62eccb7d38a885d3a42f794631eb339bd851eb98 Mon Sep 17 00:00:00 2001
From: unknown <sunhao3@sensetime.com>
Date: Tue, 20 Jan 2026 11:18:49 +0800
Subject: [PATCH 02/14] fix bug

---
 .gitignore | 1 -
 1 file changed, 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index c7700a38..40472ad9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,7 +3,6 @@ data/
 __pycache__/
 *.py[cod]
 *$py.class
-venv-memU/
 
 # C extensions
 *.so

From 9161938f82bfc3eacba1fbfb9765adcb48c02226 Mon Sep 17 00:00:00 2001
From: unknown <sunhao3@sensetime.com>
Date: Wed, 21 Jan 2026 16:10:12 +0800
Subject: [PATCH 03/14] add more provider source / add namespace feature

---
 ...mple_4_conversation_memory_with_lazyllm.py |  31 +--
 ...example_5_skill_extraction_with_lazyllm.py |  58 ++---
 ...xample_6_multimodal_memory_with_lazyllm.py |  52 ++--
 .../activities.md                             |  15 +-
 .../experiences.md                            |   6 +-
 .../conversation_example_lazyllm/goals.md     |  11 +-
 .../conversation_example_lazyllm/habits.md    |   6 +-
 .../conversation_example_lazyllm/opinions.md  |   4 +-
 .../personal_info.md                          |   4 +-
 .../preferences.md                            |  14 +-
 .../conversation_example_lazyllm/work_life.md |  17 +-
 .../architecture_concepts.md                  |   9 +-
 .../best_practices.md                         |  13 +-
 .../technical_documentation.md                |   9 +-
 .../output/skill_example_lazyllm/log_1.md     | 223 +++++++-----------
 .../output/skill_example_lazyllm/log_2.md     | 207 +++++++++-------
 .../output/skill_example_lazyllm/log_3.md     | 191 +++++++--------
 .../output/skill_example_lazyllm/skill.md     | 218 +++++++++--------
 src/memu/app/service.py                       |  10 +-
 src/memu/app/settings.py                      |   7 +-
 src/memu/llm/lazyllm_client.py                |  37 +--
 21 files changed, 566 insertions(+), 576 deletions(-)

diff --git a/examples/example_4_conversation_memory_with_lazyllm.py b/examples/example_4_conversation_memory_with_lazyllm.py
index cc4cf8ea..4c56d2de 100644
--- a/examples/example_4_conversation_memory_with_lazyllm.py
+++ b/examples/example_4_conversation_memory_with_lazyllm.py
@@ -5,7 +5,7 @@
 and generate a memory category JSON file using the LazyLLM backend.
 
 Usage:
-    export LAZYLLM_QWEN_API_KEY=your_api_key
+    export MEMU_QWEN_API_KEY=your_api_key
     python examples/example_4_conversation_memory_with_lazyllm.py
 """
 
@@ -13,6 +13,7 @@
 import os
 import sys
 from pathlib import Path
+import lazyllm
 
 # Add src to sys.path FIRST before importing memu
 project_root = Path(__file__).parent.parent
@@ -67,11 +68,11 @@ async def main():
     print("Example 4: Conversation Memory Processing with LazyLLM Backend")
     print("-" * 60)
 
-    # Get LazyLLM API key from environment
-    # api_key = os.getenv("LAZYLLM_QWEN_API_KEY")
-    api_key = os.getenv("LAZYLLM_QWEN_API_KEY")
+    lazyllm.config.add("qwen_api_key", str, env="QWEN_API_KEY", description="Qwen API Key")
+    with lazyllm.config.namespace("MEMU"):
+        api_key = lazyllm.config['qwen_api_key']
     if not api_key:
-        msg = "Please set LAZYLLM_QWEN_API_KEY environment variable"
+        msg = "Please set MEMU_QWEN_API_KEY environment variable"
         raise ValueError(msg)
     
     # Initialize service with LazyLLM backend using llm_profiles
@@ -80,20 +81,14 @@ async def main():
         llm_profiles={
             "default": {
                 "client_backend": "lazyllm_backend",
-                "source": "qwen",
-                "chat_model": "qwen-plus",
-                "vlm_model": "qwen-vl-plus",
+                "llm_source": "qwen",
+                "vlm_source": "qwen",
+                "embed_source": "qwen",
+                "stt_source": "qwen",
+                "chat_model": "qwen3-max",
+                "vlm_model":"qwen-vl-plus",
                 "embed_model": "text-embedding-v3",
-                "stt_model": "qwen-audio-turbo",
-                "api_key": api_key,
-            },
-            "embedding": {
-                "client_backend": "lazyllm_backend",
-                "source": "qwen",
-                "chat_model": "qwen-plus",
-                "vlm_model": "qwen-vl-plus",
-                "embed_model": "text-embedding-v3",
-                "stt_model": "qwen-audio-turbo",
+                "stt_model":"qwen-audio-turbo",
                 "api_key": api_key,
             },
         },
diff --git a/examples/example_5_skill_extraction_with_lazyllm.py b/examples/example_5_skill_extraction_with_lazyllm.py
index 3f9e01ef..d19ef368 100644
--- a/examples/example_5_skill_extraction_with_lazyllm.py
+++ b/examples/example_5_skill_extraction_with_lazyllm.py
@@ -13,6 +13,7 @@
 import os
 import sys
 from pathlib import Path
+import lazyllm
 
 # Add src to sys.path FIRST before importing memu
 project_root = Path(__file__).parent.parent
@@ -114,18 +115,8 @@ async def generate_skill_md(
     # Use LazyLLM through MemoryService
     system_prompt = "You are an expert technical writer creating concise, production-grade deployment guides from real experiences."
     
-    # Create a temporary client from the service to use for generation
-    from memu.llm.lazyllm_client import LazyLLMClient
-    
-    api_key = os.getenv("LAZYLLM_QWEN_API_KEY")
-    llm_client = LazyLLMClient(
-        source="qwen",
-        chat_model=service.llm_config.chat_model,
-        api_key=api_key,
-    )
-    
     full_prompt = f"{system_prompt}\n\n{prompt}"
-    generated_content = await llm_client.summarize(
+    generated_content = await service.llm_client.summarize(
         text=full_prompt,
         system_prompt=system_prompt,
     )
@@ -151,9 +142,11 @@ async def main():
     print("-" * 60)
 
     # Get LazyLLM API key from environment
-    api_key = os.getenv("LAZYLLM_QWEN_API_KEY")
+    lazyllm.config.add("qwen_api_key", str, env="QWEN_API_KEY", description="Qwen API Key")
+    with lazyllm.config.namespace("MEMU"):
+        api_key = lazyllm.config['qwen_api_key']
     if not api_key:
-        msg = "Please set LAZYLLM_QWEN_API_KEY environment variable"
+        msg = "Please set MEMU_QWEN_API_KEY environment variable"
         raise ValueError(msg)
 
     # Custom config for skill extraction
@@ -161,7 +154,6 @@ async def main():
     You are analyzing an agent execution log. Extract the key actions taken, their outcomes, and lessons learned.
 
     For each significant action or phase:
-
     1. **Action/Phase**: What was being attempted?
     2. **Status**: SUCCESS ✅ or FAILURE ❌
     3. **What Happened**: What was executed
@@ -170,11 +162,29 @@ async def main():
     6. **Lesson**: What did we learn?
     7. **Action Items**: Concrete steps for next time
 
+    Assign each extracted skill to one or more relevant categories from the following list:
+    {categories_str}
+
     **IMPORTANT**:
     - Focus on ACTIONS and outcomes
     - Be specific: include actual metrics, errors, timing
     - ONLY extract information explicitly stated
     - DO NOT infer or assume information
+    - Output MUST be valid XML wrapped in <skills> tags.
+
+    Output format:
+    <skills>
+        <memory>
+            <content>
+                [Action] Description of the action and outcome.
+                [Lesson] Key lesson learned.
+            </content>
+            <categories>
+                <category>Category Name</category>
+            </categories>
+        </memory>
+        ...
+    </skills>
 
     Extract ALL significant actions from the text:
 
@@ -214,20 +224,14 @@ async def main():
         llm_profiles={
             "default": {
                 "client_backend": "lazyllm_backend",
-                "source": "qwen",
-                "chat_model": "qwen-plus",
-                "vlm_model": "qwen-vl-plus",
-                "embed_model": "text-embedding-v3",
-                "stt_model": "qwen-audio-turbo",
-                "api_key": api_key,
-            },
-            "embedding": {
-                "client_backend": "lazyllm_backend",
-                "source": "qwen",
-                "chat_model": "qwen-plus",
-                "vlm_model": "qwen-vl-plus",
+                "llm_source": "qwen",
+                "vlm_source": "qwen",
+                "embed_source": "qwen",
+                "stt_source": "qwen",
+                "chat_model": "qwen3-max",
+                "vlm_model":"qwen-vl-plus",
+                "stt_model":"qwen-audio-turbo",
                 "embed_model": "text-embedding-v3",
-                "stt_model": "qwen-audio-turbo",
                 "api_key": api_key,
             },
         },
diff --git a/examples/example_6_multimodal_memory_with_lazyllm.py b/examples/example_6_multimodal_memory_with_lazyllm.py
index 58a38bbe..9e11f51a 100644
--- a/examples/example_6_multimodal_memory_with_lazyllm.py
+++ b/examples/example_6_multimodal_memory_with_lazyllm.py
@@ -5,7 +5,7 @@
 and generate a unified memory category JSON file using LazyLLM backend.
 
 Usage:
-    export LAZYLLM_QWEN_API_KEY=your_api_key
+    export MEMU_QWEN_API_KEY=your_api_key   
     python examples/example_6_multimodal_memory_with_lazyllm.py
 """
 
@@ -13,6 +13,7 @@
 import os
 import sys
 from pathlib import Path
+import lazyllm
 
 # Add src to sys.path FIRST before importing memu
 project_root = Path(__file__).parent.parent
@@ -75,9 +76,11 @@ async def main():
     print("-" * 60)
 
     # Get LazyLLM API key from environment
-    api_key = os.getenv("LAZYLLM_QWEN_API_KEY")
+    lazyllm.config.add("qwen_api_key", str, env="QWEN_API_KEY", description="Qwen API Key")
+    with lazyllm.config.namespace("MEMU"):
+        api_key = lazyllm.config['qwen_api_key']
     if not api_key:
-        msg = "Please set LAZYLLM_QWEN_API_KEY environment variable"
+        msg = "Please set MEMU_QWEN_API_KEY environment variable"
         raise ValueError(msg)
 
     # Define custom categories for multimodal content
@@ -91,6 +94,22 @@ async def main():
         {"name": "code_examples", "description": "Code snippets, examples, and implementation details"},
         {"name": "visual_diagrams", "description": "Visual concepts, diagrams, charts, and illustrations from images"},
     ]
+    xml_prompt = """
+    Analyze the following content and extract key information.
+    
+    Content: {resource}
+    
+    Output MUST be strictly valid XML wrapped in <knowledge> tags (or <profile>, <event> etc).
+    Format:
+    <knowledge>
+        <memory>
+            <content>Your extracted content here...</content>
+            <categories>
+                <category>category_name</category>
+            </categories>
+        </memory>
+    </knowledge>
+    """
 
     # Initialize service with LazyLLM backend using llm_profiles
     # The "default" profile is required and used as the primary LLM configuration
@@ -98,24 +117,23 @@ async def main():
         llm_profiles={
             "default": {
                 "client_backend": "lazyllm_backend",
-                "source": "qwen",
-                "chat_model": "qwen-plus",
-                "vlm_model": "qwen-vl-plus",
-                "embed_model": "text-embedding-v3",
-                "stt_model": "qwen-audio-turbo",
-                "api_key": api_key,
-            },
-            "embedding": {
-                "client_backend": "lazyllm_backend",
-                "source": "qwen",
-                "chat_model": "qwen-plus",
-                "vlm_model": "qwen-vl-plus",
+                "llm_source": "qwen",
+                "vlm_source": "qwen",
+                "embed_source": "qwen",
+                "stt_source": "qwen",
+                "chat_model": "qwen3-max",
+                "vlm_model":"qwen-vl-plus",
+                "stt_model":"qwen-audio-turbo",
                 "embed_model": "text-embedding-v3",
-                "stt_model": "qwen-audio-turbo",
                 "api_key": api_key,
             },
+    
+        },
+        memorize_config={
+            "memory_categories": multimodal_categories,
+            "memory_types": ["knowledge"],
+            "memory_type_prompts": {"knowledge": xml_prompt}
         },
-        memorize_config={"memory_categories": multimodal_categories},
     )
 
     # Resources to process (file_path, modality)
diff --git a/examples/output/conversation_example_lazyllm/activities.md b/examples/output/conversation_example_lazyllm/activities.md
index f0e7799b..e594bde9 100644
--- a/examples/output/conversation_example_lazyllm/activities.md
+++ b/examples/output/conversation_example_lazyllm/activities.md
@@ -1,8 +1,7 @@
-# activities
-## Learning
-- The user is learning about event-driven architecture and message queues
-- The user is researching OpenAPI specifications and tools like Swagger and Postman
-## Open Source Contributions
-- The user contributes to open-source projects, including a Python CLI tool used for automating deployment tasks
-## Physical Activities
-- The user goes to the gym 3–4 times a week after work around 7 PM
+# activities  
+## Open Source Contributions  
+- The user contributes to open source projects, including a Python CLI tool used in their daily workflow  
+## Fitness  
+- The user exercises regularly, going to the gym 3–4 times a week after work around 7 PM  
+## Reading  
+- The user likes reading
diff --git a/examples/output/conversation_example_lazyllm/experiences.md b/examples/output/conversation_example_lazyllm/experiences.md
index f45ed0c1..e9f7c82d 100644
--- a/examples/output/conversation_example_lazyllm/experiences.md
+++ b/examples/output/conversation_example_lazyllm/experiences.md
@@ -1,5 +1 @@
-# experiences
-## Work-Related Stress
-- The user has been having trouble sleeping lately due to work stress related to leading a big product launch next month
-## Work-Life Balance
-- The user checks work emails late at night and is considering setting no-work hours to improve work-life balance
+*No content available*
diff --git a/examples/output/conversation_example_lazyllm/goals.md b/examples/output/conversation_example_lazyllm/goals.md
index 1f64b315..caded271 100644
--- a/examples/output/conversation_example_lazyllm/goals.md
+++ b/examples/output/conversation_example_lazyllm/goals.md
@@ -1,6 +1,7 @@
 # goals
-## Product Development Objectives
-- The user wants to build a SaaS product related to developer tools or automation
-- The user is planning to start prototyping a SaaS product for API testing and documentation automation in the next few months
-## Project Focus
-- The user is planning to build a tool for API testing and documentation with automated test generation
+## Professional Interests
+- The user is interested in system design and scalability patterns
+## Product Goals
+- The user wants to build a SaaS product for API testing and documentation that generates test cases from API specs
+## Technical Research
+- The user is researching OpenAPI specifications and tools like Swagger and Postman for their SaaS idea
diff --git a/examples/output/conversation_example_lazyllm/habits.md b/examples/output/conversation_example_lazyllm/habits.md
index 499c35dd..8cc0e3d0 100644
--- a/examples/output/conversation_example_lazyllm/habits.md
+++ b/examples/output/conversation_example_lazyllm/habits.md
@@ -1,6 +1,6 @@
 # habits
 ## Daily Routine
 - The user goes for a run every morning
-- The user goes to the gym 3-4 times a week, usually around 7 PM
-- The user drinks coffee throughout the day to stay alert
-- The user tends to check work emails late at night
+- The user usually tries to go to bed around 11 PM
+- The user tends to check their phone before bed
+- The user drinks coffee throughout the day and usually has their last coffee around 3-4 PM
diff --git a/examples/output/conversation_example_lazyllm/opinions.md b/examples/output/conversation_example_lazyllm/opinions.md
index 3e1b0065..e9f7c82d 100644
--- a/examples/output/conversation_example_lazyllm/opinions.md
+++ b/examples/output/conversation_example_lazyllm/opinions.md
@@ -1,3 +1 @@
-# opinions
-## Beliefs about Technology and Innovation
-- The user believes there is room for an AI-powered, automated solution for API documentation and testing
+*No content available*
diff --git a/examples/output/conversation_example_lazyllm/personal_info.md b/examples/output/conversation_example_lazyllm/personal_info.md
index a8ca8b13..d19d6df5 100644
--- a/examples/output/conversation_example_lazyllm/personal_info.md
+++ b/examples/output/conversation_example_lazyllm/personal_info.md
@@ -1,3 +1,3 @@
 # personal_info
-## Core Traits
-- The user is not a morning person
+## Basic Information
+- The user is a software engineer named Alex who has been programming for about 5 years
diff --git a/examples/output/conversation_example_lazyllm/preferences.md b/examples/output/conversation_example_lazyllm/preferences.md
index 869de46d..12d1675c 100644
--- a/examples/output/conversation_example_lazyllm/preferences.md
+++ b/examples/output/conversation_example_lazyllm/preferences.md
@@ -1,7 +1,7 @@
-# preferences
-## Interests
-- The user is interested in system design and scalability patterns
-- The user likes food and nature
-- The user likes reading
-## Dietary Preferences
-- The user is trying to eat less meat
+# preferences  
+## Interests  
+- The user loves food and nature  
+## Dietary Preferences  
+- The user is trying to eat less meat  
+## Lifestyle Traits  
+- The user is not a morning person
diff --git a/examples/output/conversation_example_lazyllm/work_life.md b/examples/output/conversation_example_lazyllm/work_life.md
index dad4a6be..9a8eee76 100644
--- a/examples/output/conversation_example_lazyllm/work_life.md
+++ b/examples/output/conversation_example_lazyllm/work_life.md
@@ -1,11 +1,6 @@
-# work_life
-## Professional Role
-- The user is a software engineer at TechCorp
-- The user has been programming for about 5 years
-- The user leads a big product launch at work
-## Technical Stack
-- The user works on backend systems using Python and Go
-- The user uses Kubernetes for orchestration and Redis for caching
-- The user is using Apache Kafka for event streaming
-## Monitoring & Observability
-- The user is responsible for monitoring and observability with Prometheus and Grafana
+# work_life  
+## Professional Background  
+- The user works in software development at TechCorp on backend systems using Python and Go  
+- The user uses Django and FastAPI for Python services and is migrating critical services to Go  
+- The user works with Kubernetes, Redis, Apache Kafka, Prometheus, and Grafana  
+- The user often answers work emails late at night
diff --git a/examples/output/multimodal_example_lazyllm/architecture_concepts.md b/examples/output/multimodal_example_lazyllm/architecture_concepts.md
index 9df92cf4..f7527b4e 100644
--- a/examples/output/multimodal_example_lazyllm/architecture_concepts.md
+++ b/examples/output/multimodal_example_lazyllm/architecture_concepts.md
@@ -2,11 +2,4 @@
 
 *System architecture, design patterns, and structural concepts*
 
-# architecture_concepts
-## System Architecture
-- MemU processes inputs through a pipeline including preprocessing, summarization, embedding, classification, categorization, and persistent storage
-- MemU includes a MemoryService core for managing memory operations such as store, retrieve, update, and delete
-## Memory Classification
-- Memories in MemU are classified into types such as profile, event, knowledge, and behavior
-## Retrieval Mechanism
-- MemU's retrieval pipeline includes query rewriting, context expansion, recall, re-ranking, and result assembly
+*No content available*
diff --git a/examples/output/multimodal_example_lazyllm/best_practices.md b/examples/output/multimodal_example_lazyllm/best_practices.md
index 4a152b36..326c82e1 100644
--- a/examples/output/multimodal_example_lazyllm/best_practices.md
+++ b/examples/output/multimodal_example_lazyllm/best_practices.md
@@ -2,15 +2,4 @@
 
 *Best practices, recommendations, and guidelines*
 
-# best_practices
-## Storage & Architecture
-- MemU supports flexible storage backends including SQLite, PostgreSQL, and in-memory options
-- MemU uses a vector search engine for semantic retrieval with dense embeddings and hybrid search capabilities
-## Memory Management
-- MemU dynamically assigns memories to default or custom semantic categories and maintains auto-updated summaries
-## Retrieval Strategies
-- MemU provides retrieval strategies including RAG-based vector search and LLM-based context-aware methods
-## Best Practices
-- Best practices for MemU emphasize memory quality, configuration tuning, performance optimization, and privacy compliance
-## Use Cases
-- MemU use cases include personal assistants, customer support, education, knowledge management, and agent workflows
+*No content available*
diff --git a/examples/output/multimodal_example_lazyllm/technical_documentation.md b/examples/output/multimodal_example_lazyllm/technical_documentation.md
index 6bd2b1f1..86694fad 100644
--- a/examples/output/multimodal_example_lazyllm/technical_documentation.md
+++ b/examples/output/multimodal_example_lazyllm/technical_documentation.md
@@ -2,11 +2,4 @@
 
 *Technical documentation, guides, and tutorials*
 
-# technical_documentation
-## Framework Overview
-- MemU is an advanced agentic memory framework that supports multiple input modalities and enables AI agents to process, organize, and retrieve memories in a structured, semantically meaningful way
-## Integration & Development
-- MemU provides a Python API for integration and customization, with an open-source roadmap focused on long-term improvements
-## Limitations
-- The user cannot provide image analysis due to lack of visual interpretation capability
-- The user requested image analysis but was informed that the model cannot view or interpret visual content
+*No content available*
diff --git a/examples/output/skill_example_lazyllm/log_1.md b/examples/output/skill_example_lazyllm/log_1.md
index 1fbb2d7c..cfc82706 100644
--- a/examples/output/skill_example_lazyllm/log_1.md
+++ b/examples/output/skill_example_lazyllm/log_1.md
@@ -1,7 +1,7 @@
 ```yaml
 ---
 name: production-microservice-deployment
-description: Production-ready guide for deploying a microservice using blue-green deployment strategy with monitoring, rollback safeguards, and lessons from real deployment attempts.
+description: Production-ready guide for deploying microservices using a blue-green strategy, based on real incident learnings to prevent connection exhaustion and ensure safe rollouts.
 version: 0.1.0
 status: Evolving
 ---
@@ -11,199 +11,142 @@ status: Evolving
 
 ## Introduction
 
-This guide provides a battle-tested procedure for safely deploying a microservice to production using the blue-green deployment strategy. It is intended for use when minimizing downtime and enabling rapid rollback are critical. The steps reflect lessons learned from active deployment attempts and focus on practical execution in cloud-native environments using Kubernetes and CI/CD pipelines.
+This guide provides a practical, experience-driven procedure for safely deploying microservices in production using the blue-green deployment strategy. It is intended for use during versioned service updates where minimizing user impact and enabling rapid rollback are critical. The steps and checks herein are derived from a real-world failure involving database connection exhaustion during traffic shift.
 
-Use this guide during scheduled maintenance windows for major version updates or breaking changes where user impact must be contained.
+Use this guide when:
+- Deploying stateful services that maintain database connections
+- Employing blue-green deployments in Kubernetes or similar orchestration platforms
+- Ensuring infrastructure capacity aligns with peak deployment load
 
 ---
 
 ## Deployment Context
 
 - **Strategy**: Blue-green deployment  
-- **Environment**: Kubernetes (EKS), AWS-hosted, Helm-managed services  
-- **Traffic Management**: Istio service mesh with weighted routing  
-- **Primary Goals**:
-  - Zero-downtime deployment
-  - Immediate rollback capability
-  - Controlled traffic shift with observability
-  - Validation of health before full cutover
+- **Environment**: Production, containerized (e.g., Kubernetes), PostgreSQL backend  
+- **Traffic Shift**: Gradual (e.g., 0% → 50% → 100%) via service mesh or ingress controller  
+- **Goal**: Zero-downtime deployment with full rollback capability within 2 minutes if anomalies occur  
+- **Critical Risk**: Resource contention during dual-environment operation (blue + green)
 
 ---
 
 ## Pre-Deployment Checklist
 
-### 🔧 Infrastructure & Configuration
-- [x] New (green) environment provisioned and stable (`kubectl get nodes --selector=env=green`)  
-- [x] Helm chart version tagged and immutable (e.g., `my-service-1.4.0`)  
-- [x] ConfigMaps and Secrets verified for green environment (no dev defaults)  
+Ensure all items are verified **before** initiating deployment.
 
-### 🛢️ Database
-- [x] Schema migrations are backward-compatible **(Critical)**  
-- [x] Migration scripts tested in staging with production-like data  
-- [ ] Downtime-free migration path confirmed (if applicable)  
+### Database
+- [ ] **Validate `max_connections` limit supports combined blue and green load** *(Critical)*  
+  > Ensure total expected connections from both environments ≤ database limit  
+- [ ] **Adjust per-pod connection pool size** *(Critical)*  
+  > Scale down individual pod pools to prevent oversubscription during overlap  
 
-### 📊 Monitoring & Observability
-- [x] Prometheus metrics endpoints exposed on new pods  
-- [x] Grafana dashboards updated to include green service instance  
-- [x] Alertmanager rules cover deployment-phase anomalies (latency, error rate spikes)  
-- [x] Distributed tracing (Jaeger) enabled and sampled at 100% during cutover  
+### Monitoring & Alerts
+- [ ] **Enable monitoring of active database connections** *(Critical)*  
+  > Track metric: `pg_stat_database.numbackends` or equivalent  
+- [ ] **Set up alert thresholds for connection pool usage (>80%)** *(Critical)*  
+  > Trigger alerts during deployment phase  
+- [ ] Confirm end-to-end metrics pipeline is active (Prometheus/Grafana or equivalent)
 
-### 🧪 Validation Readiness
-- [x] Smoke test suite available and passing against staging  
-- [x] Canaries configured to hit green service pre-cutover  
-- [x] Rollback image tagged and accessible in registry (`v1.3.9-rollback`)  
+### Testing & Validation
+- [ ] **Perform full-capacity load test under dual-environment conditions** *(Critical)*  
+  > Simulate blue + green traffic concurrently  
+- [ ] Verify no performance degradation or connection errors at peak load  
+- [ ] Confirm rollback mechanism works in staging
 
 ---
 
 ## Deployment Procedure
 
-> ⏱️ Estimated execution time: 18 minutes
+> ⚠️ Monitor all systems continuously during execution.
 
-1. **Deploy Green Service**
+1. **Deploy Green Environment**
    ```bash
-   helm upgrade my-service-green ./charts/my-service \
-     --namespace production \
-     --set environment=green \
-     --set image.tag=v1.4.0 \
-     --install
+   kubectl apply -f recommendation-service-v2.5.0-green.yaml
    ```
+   - Wait for all pods to reach `Running` and pass readiness probes
+   - Confirm logs show clean startup with no connection errors
 
-2. **Wait for Pod Readiness**
-   ```bash
-   kubectl wait --for=condition=ready pod -l app=my-service,environment=green -n production --timeout=5m
-   ```
-   - ✅ Monitoring Point: All pods report `Ready` status within 5 minutes  
-   - ❌ If pending > 3 minutes: check resource quotas and node autoscaling
-
-3. **Run Smoke Tests Against Green**
-   ```bash
-   ./scripts/smoke-test.sh --target https://api-green.example.com
-   ```
-   - ✅ Expected: All 7 tests pass, response time < 800ms  
-   - ❌ Fail: Halt deployment, investigate logs and traces
-
-4. **Shift 5% Traffic to Green (Canary)**
-   Apply Istio traffic split:
-   ```yaml
-   apiVersion: networking.istio.io/v1alpha3
-   kind: VirtualService
-   metadata:
-     name: my-service-route
-   spec:
-     hosts:
-       - api.example.com
-     http:
-     - route:
-       - destination:
-           host: my-service-blue.production.svc.cluster.local
-         weight: 95
-       - destination:
-           host: my-service-green.production.svc.cluster.local
-         weight: 5
-   ```
-   ```bash
-   kubectl apply -f virtualservice-split.yaml
-   ```
+2. **Verify Green Service Health**
+   - Access `/health` endpoint directly (bypassing router)
+   - Confirm database connectivity and query responsiveness
+   - Check metrics dashboard: baseline connection count established
 
-5. **Monitor Key Metrics (5 minutes)**
-   - Error rate (goal: < 0.5%)  
-   - P95 latency (< 1.2s)  
-   - Request volume consistency  
-   - Check Jaeger traces for failed spans
-
-6. **Shift 100% Traffic to Green**
-   Update weights:
-   ```yaml
-   - weight: 0    # blue
-   - weight: 100  # green
-   ```
+3. **Begin Traffic Shift (0% → 50%)**
    ```bash
-   kubectl apply -f virtualservice-split.yaml
+   kubectl apply -f traffic-shift-50pct.yaml
    ```
+   - Update canary weight or virtual service routing rules accordingly
+   - Allow 2–3 minutes for traffic stabilization
 
-7. **Verify Full Cutover**
-   ```bash
-   curl -H "Host: api.example.com" http://ingress/status | grep "version=1.4.0"
-   ```
+4. **Monitor During 50% Shift**
+   - Observe:
+     - Error rates (must remain <0.5%)
+     - Latency (P95 < 300ms)
+     - **Active database connections** (must not exceed 80% of max)
+   - If any threshold breached → **Initiate Rollback Immediately**
 
-8. **Decommission Blue (After 1 hour)**
+5. **Proceed to 100% Traffic (if stable)**
    ```bash
-   helm uninstall my-service-blue --namespace production
+   kubectl apply -f traffic-shift-100pct.yaml
    ```
+   - Redirect all traffic to green
+   - Decommission blue environment after confirmation:
+     ```bash
+     kubectl delete -f recommendation-service-v2.4.0-blue.yaml
+     ```
 
 ---
 
 ## Rollback Procedure
 
-### When to Roll Back
-
-Roll back immediately if any of the following occur:
-- Error rate > 5% sustained over 2 minutes  
-- Latency P95 > 3s for 3+ minutes  
-- Database connection pool exhaustion observed  
-- Smoke test failure at any stage
+### When to Rollback
+Immediate rollback required if:
+- Error rate exceeds **1% for 60 seconds**
+- Latency P95 > **1s for 2+ minutes**
+- Database connections ≥ **90% of max_connections**
+- Emergency signal from SRE team
 
-### Steps
-
-1. **Revert Traffic to Blue**
+### Execute Rollback
+1. Revert traffic to blue:
    ```bash
-   kubectl patch virtualservice my-service-route --patch '
-   spec:
-     http:
-     - route:
-       - destination:
-           host: my-service-blue.production.svc.cluster.local
-         weight: 100
-       - destination:
-           host: my-service-green.production.svc.cluster.local
-         weight: 0'
+   kubectl apply -f traffic-shift-100pct-blue.yaml
    ```
-
-2. **Confirm Health of Blue Service**
+2. Confirm traffic rerouted within **60 seconds**
+3. Terminate green pods:
    ```bash
-   kubectl get pods -l app=my-service,environment=blue -n production
+   kubectl delete -f recommendation-service-v2.5.0-green.yaml
    ```
-   - Ensure all replicas are running and ready
+4. Validate blue service stability via health checks and dashboards
 
-3. **Trigger Alert Acknowledgment**
-   - Manually acknowledge firing alerts in Alertmanager  
-   - Notify #prod-alerts: `@team Rollback initiated – green service degraded`
-
-4. **Expected Recovery Time**: < 90 seconds from rollback initiation
+✅ **Expected Recovery Time**: ≤ 1.5 minutes  
+✅ **Impact Window**: ~1.5 minutes at <5% error rate (historically observed)
 
 ---
 
 ## Common Pitfalls & Solutions
 
-| Issue | Root Cause | Symptom | Solution |
-|------|-----------|--------|---------|
-| Green pods stuck in `Pending` | Node autoscaler not triggered | No new pods scheduled | Manually scale node group or reduce CPU requests temporarily |
-| Sudden 503s after cutover | Misconfigured readiness probe | Pods accept traffic before DB connection | Add `initialDelaySeconds: 30` to probe config |
-| Rollback fails due to blue already uninstalled | Premature cleanup | 503s across the board | Reinstall blue via Helm restore from last release revision |
-| Traces missing in Jaeger | Sampling rate too low | Incomplete trace visibility | Set `tracing.sample-rate: 100` during deployment window |
+| Issue | Symptom | Root Cause | Solution |
+|------|--------|-----------|----------|
+| Database connection exhaustion | Errors during 50% shift, timeouts | `max_connections=100` too low; per-pod pools oversized | Increase DB limit; reduce per-pod pool size |
+| No early warning | Failure detected too late | Missing alerts on connection usage | Implement proactive monitoring at 80% threshold |
+| Undetected bottleneck | Load test passed but failed live | Test did not simulate dual blue-green load | Add full-capacity integration testing pre-deploy |
 
 ---
 
 ## Best Practices
 
-- Always keep the previous version deployable and tracked (tagged in Helm repository)  
-- Run smoke tests against green **before** any traffic shift  
-- Use immutable image tags — never `latest`  
-- Schedule deployments during low-traffic periods (e.g., 02:00–04:00 local ops time)  
-- Coordinate with SRE team for alert suspension/sensitivity adjustment during window
-
-> ✅ Expected timeline:  
-> - Preparation: 30 min  
-> - Execution: 18 min  
-> - Observation: 60 min  
-> - Total: ~110 minutes
+- **Always size infrastructure for peak deployment states**, not just steady-state
+- **Test under realistic overlap conditions** — blue and green running simultaneously
+- **Integrate checklist items into CI/CD gates** — block deployment if validations missing
+- **Expect rollout duration**: ~8–12 minutes (including verification windows)
+- **Rollback drills**: Conduct quarterly in staging
 
 ---
 
 ## Key Takeaways
 
-1. **Backward-compatible schema changes are non-negotiable** — even minor migrations can break old instances during rollback.  
-2. **Readiness probes must reflect actual service dependencies**, especially database and cache connectivity.  
-3. **Never decommission the blue stack until post-cutover stability is confirmed** — rollback without it is impossible.  
-4. **Observability must be pre-wired** — ad-hoc dashboard creation delays incident response.  
-5. **Automated smoke tests are essential** — manual validation is unreliable under pressure.
\ No newline at end of file
+1. **Connection pools must be sized for combined blue and green load** — never assume steady-state capacity suffices.
+2. **Infrastructure limits (e.g., `max_connections`) must be validated pre-deployment** — silent failures occur when shared resources are exhausted.
+3. **Proactive monitoring of key database metrics is non-negotiable** — lack of alerts delays detection and increases blast radius.
+4. **Full-capacity and dual-environment testing is mandatory** — unit and single-instance tests do not reveal integration bottlenecks.
+5. **Remediation actions must become standard checks** — update checklists and automate where possible to prevent recurrence.
\ No newline at end of file
diff --git a/examples/output/skill_example_lazyllm/log_2.md b/examples/output/skill_example_lazyllm/log_2.md
index 4757116a..2f1b19f3 100644
--- a/examples/output/skill_example_lazyllm/log_2.md
+++ b/examples/output/skill_example_lazyllm/log_2.md
@@ -1,131 +1,162 @@
-```markdown
+```yaml
 ---
 name: production-microservice-deployment
-description: Production-ready guide for deploying microservices using a blue-green deployment strategy with real-world lessons learned from partial deployment attempts.
+description: Production-ready guide for deploying microservices using a blue-green strategy, based on real-world incidents involving connection exhaustion and performance degradation due to missing indexing.
 version: 0.2.0
 status: Evolving
 ---
+```
 
 # Production Microservice Deployment with Blue-Green Strategy
 
 ## Introduction
 
-This guide provides a practical, step-by-step procedure for safely deploying a microservice in production using the blue-green deployment pattern. It is intended for use when minimizing downtime and enabling rapid rollback are critical. The procedures, checks, and pitfalls documented here are derived from two prior deployment attempts (1 successful phase, 1 partial failure), capturing actionable insights from real operational experience.
+This guide provides a concise, production-grade procedure for safely deploying microservices using the blue-green deployment strategy. It is designed for use when zero-downtime deployments are required and rollback safety is critical. The steps and checks included are derived from actual deployment failures involving database connection exhaustion and performance degradation under load.
+
+Use this guide during scheduled deployments of stateful or database-dependent services where infrastructure capacity and query performance must be validated at scale.
 
-Use this guide during scheduled production releases where traffic switching, data consistency, and observability are required.
+---
 
 ## Deployment Context
 
-- **Strategy**: Blue-green deployment using Kubernetes `Service` selector switch
-- **Environment**: Kubernetes 1.25+ (EKS), AWS RDS backend, Prometheus/Grafana/Loki stack
+- **Strategy**: Blue-green deployment with incremental traffic shifting (10% → 25% → 50% → 75% → 100%)
+- **Environment**: Kubernetes-based platform with service mesh routing; PostgreSQL backend
 - **Goals**:
-  - Zero-downtime cutover
-  - Sub-5-minute rollback if thresholds breached
-  - Full observability during transition
-  - Data schema compatibility across versions
+  - Achieve zero-downtime release
+  - Detect issues before full cutover
+  - Ensure system stability during dual-environment operation
+  - Enable rapid rollback (<2 minutes) if thresholds are breached
+
+---
 
 ## Pre-Deployment Checklist
 
-### Infrastructure & Configuration
-- [ ] **(Critical)** New green environment (v2) pods are running and passing readiness/liveness probes  
-- [ ] **(Critical)** Database schema changes (if any) are backward compatible with both v1 (blue) and v2 (green)
-- [ ] Green service endpoint (`svc-green`) exists and routes to v2 pods
-- [ ] Blue service endpoint (`svc-blue`) remains active and unchanged
+### ✅ Database
+- [**CRITICAL**] Validate that all new queries have appropriate indexes (e.g., `user_segment`)
+- [**CRITICAL**] Confirm database `max_connections` supports combined blue + green load
+- [**CRITICAL**] Verify per-pod connection pool size is adjusted to prevent pool overflow
+- Ensure staging environment uses production-scale data (e.g., 50M rows, not 5M)
+
+### ✅ Monitoring & Alerts
+- [**CRITICAL**] Query latency monitoring enabled (P99 tracked in real time)
+- Connection pool usage monitored with alerting (threshold: >80% of max_connections)
+- SLO violation detection active (latency >500ms triggers alert)
 
-### Monitoring & Observability
-- [ ] **(Critical)** Prometheus metrics for request rate, error rate, and latency are available per version (via `version` label)
-- [ ] Loki logs are tagged with `app_version` and searchable by deployment color
-- [ ] Grafana dashboard loaded with real-time view of both blue and green services
+### ✅ Testing & Validation
+- [**CRITICAL**] Full-capacity integration test completed with dual environments running
+- Performance testing executed using production-like dataset sizes
+- Indexing review performed for all schema-affecting changes
+- Rollback procedure tested in staging
 
-### Traffic & Networking
-- [ ] Current production traffic is routed through `svc-production` → `version=blue`
-- [ ] `svc-production` selector can be patched atomically to switch to `version=green`
-- [ ] DNS TTLs and client-side caching do not interfere with immediate routing control
+### ✅ Process
+- Deployment checklist updated and reviewed
+- Incident response roles assigned (on-call engineer, comms lead)
+- Emergency rollback command pre-validated
+
+---
 
 ## Deployment Procedure
 
-1. **Deploy v2 (Green) Pods**
+1. **Deploy Green Environment**
    ```bash
-   kubectl apply -f deployment-v2.yaml
+   kubectl apply -f recommendation-service-green.yaml
    ```
-   - Wait until all pods are `Running` and pass readiness checks:
-     ```bash
-     kubectl get pods -l app=my-microservice,version=v2
-     ```
-
-2. **Validate Green Service Internally**
-   - Send test traffic via port-forward:
-     ```bash
-     kubectl port-forward svc/svc-green 8080:80 &
-     curl http://localhost:8080/health
-     ```
-   - Confirm logs show `version=v2` and no startup errors.
-
-3. **Switch Traffic to Green**
+   Wait for all pods to reach `Running` and pass readiness checks:
+   ```bash
+   kubectl get pods -l app=recommendation-service,version=v2.5.0
+   ```
+
+2. **Verify Health & Connectivity**
+   - Check logs for connection errors
+   - Confirm database connectivity and migration success
+   - Validate `/health` endpoint returns 200
+
+3. **Begin Incremental Traffic Shift**
+   Apply traffic split via service mesh:
+   ```bash
+   # 10% to green
+   istioctl replace -f traffic-split-10pct.yaml
+   sleep 180
+   ```
+
+4. **Monitor Key Metrics After Each Step**
+   - P99 latency (<500ms)
+   - Error rate (<0.5%)
+   - Active DB connections (<80% of max)
+   - CPU/Memory utilization stable
+
+   Repeat shift:
    ```bash
-   kubectl patch svc svc-production -p '{"spec": {"selector": {"version": "v2"}}}'
+   istioctl replace -f traffic-split-25pct.yaml; sleep 300
+   istioctl replace -f traffic-split-50pct.yaml; sleep 600
+   istioctl replace -f traffic-split-75pct.yaml; sleep 900
    ```
-   - This switches all traffic from blue to green atomically.
-
-4. **Monitor Transition (First 5 Minutes)**
-   - **Monitoring Points**:
-     - Error rate (target: <0.5%)
-     - P95 latency (<200ms)
-     - Request volume parity (match pre-switch levels)
-     - Pod restarts or crashes in v2
-   - Use Grafana dashboard to compare v1 (historical) vs v2 (live) metrics.
-
-5. **Stabilization Check**
-   - After 5 minutes of stable performance:
-     - Confirm no alerts triggered
-     - Verify business logic via synthetic transaction
-     - Log success: `Deployment v2 now serving production traffic`
+
+5. **Final Cutover (100%)**
+   ```bash
+   istioctl replace -f traffic-split-100pct.yaml
+   ```
+
+6. **Decommission Blue**
+   After 30 minutes of stable operation:
+   ```bash
+   kubectl delete deployment recommendation-service-blue --namespace=production
+   ```
+
+---
 
 ## Rollback Procedure
 
-### When to Roll Back
-Roll back immediately if **any** of the following occur within 10 minutes post-cutover:
-- Error rate > 2% sustained over 2 minutes
-- Latency P95 > 800ms for >3 minutes
-- Database connection pool saturation in v2
-- Any critical alert from monitoring system
+### When to Rollback
+Roll back immediately if **any** of the following occur:
+- P99 latency exceeds **500ms** for >2 minutes
+- Error rate spikes above **1%**
+- Database connection usage reaches **90%**
+- SLO violation detected
 
-### Rollback Command
+### Execute Rollback
 ```bash
-kubectl patch svc svc-production -p '{"spec": {"selector": {"version": "v1"}}}'
+istioctl replace -f traffic-split-0pct.yaml
 ```
+> ⚠️ This command routes 100% traffic back to the blue (stable) environment.
 
-- Expected recovery time: **< 3 minutes** (limited by kube-proxy sync interval)
-- Post-rollback:
-  - Confirm v1 pods absorb traffic (check metrics)
-  - Preserve v2 logs for root cause analysis
-  - Trigger incident review if rollback executed
+### Expected Recovery Time
+- **Target**: <2 minutes
+- Service should stabilize within 90 seconds
+- Confirm health endpoints and metrics return to baseline
+
+---
 
 ## Common Pitfalls & Solutions
 
-| Issue | Root Cause | Symptom | Solution |
-|------|-----------|--------|----------|
-| 503 errors after cutover | Misconfigured readiness probe in v2 | Pods running but not receiving traffic | Fix `/health` endpoint logic; re-roll v2 before switching |
-| DB lock contention | v2 introduced long-lived transaction | Increased latency and connection pool exhaustion | Revert code change; apply statement-level timeout |
-| Logs missing version tag | Incorrect label injection in init container | Inability to filter v2 logs in Loki | Patch DaemonSet to inject `app_version` env var |
-| Partial traffic switch | Sticky sessions at LB layer | Mixed v1/v2 traces in Jaeger | Disable session affinity on ALB before deployment |
+| Issue | Symptom | Root Cause | Solution |
+|------|--------|-----------|----------|
+| Database connection exhaustion | 5xx errors during traffic shift, "too many connections" logs | `max_connections=100` too low; per-pod pools not scaled down | Increase DB limit; reduce per-pod pool size |
+| Latency spike at 75% shift | P99 jumps to 780ms, SLO breach | Missing index on `user_segment` causes full table scan | Add index; validate all queries pre-deploy |
+| No early warning | Alerts silent during degradation | No monitoring on connection count or query latency | Add alerts on key DB and service metrics |
+| Staging passes, prod fails | Deployment works locally but fails in production | Staging uses 5M rows vs. 50M in prod | Mirror production data volume in staging |
+
+---
 
 ## Best Practices
 
-- **Test Selector Patch Locally**: Validate `kubectl patch` syntax in staging first
-- **Pre-warm caches**: If applicable, trigger cache population in v2 before cutover
-- **Atomic Switch Only**: Never use weighted routing unless A/B testing is goal
-- **Timeline Expectations**:
-  - v2 rollout: 2–3 minutes
-  - Validation: 2 minutes
-  - Cutover + monitoring: 5–10 minutes
-  - Total window: ≤15 minutes
+- Always test blue-green states under full expected load
+- Use incremental shifts with pauses aligned to metric collection intervals
+- Run emergency rollback drills monthly
+- Enforce mandatory index reviews for any code introducing new queries
+- Keep staging data within 10% of production scale
+
+**Expected Timeline**:
+- Deployment window: 45–60 minutes
+- Rollback execution: ≤2 minutes
+- Post-cutover observation: 30 minutes minimum
+
+---
 
 ## Key Takeaways
 
-1. **Selector-based switching is reliable only if labels and selectors are rigorously tested pre-deploy**
-2. **Backward-compatible database schema changes are non-negotiable—v1 must tolerate v2 writes**
-3. **Real-time observability by version is critical—without it, rollback decisions are blind**
-4. **A failed deployment is acceptable; a slow or uncontrolled rollback is not**
-5. **Always preserve pre-cutover state—never scale down blue until green is proven stable**
-```
\ No newline at end of file
+1. **Connection pools must account for peak concurrency during dual-environment operation** — always size pools and DB limits for combined blue+green load.
+2. **Missing indexes can cause catastrophic performance degradation at scale** — enforce pre-deployment indexing validation and query reviews.
+3. **Staging environments must mirror production data volume** — 5M-row datasets won’t catch scalability issues present in 50M+ tables.
+4. **Monitoring must include infrastructure-level metrics** — connection usage, query latency, and SLOs are critical for safe rollouts.
+5. **Lessons must become process** — integrate remediation actions (e.g., checklist updates, index creation) directly into deployment pipelines.
\ No newline at end of file
diff --git a/examples/output/skill_example_lazyllm/log_3.md b/examples/output/skill_example_lazyllm/log_3.md
index ad1fdfb3..cc140b23 100644
--- a/examples/output/skill_example_lazyllm/log_3.md
+++ b/examples/output/skill_example_lazyllm/log_3.md
@@ -1,7 +1,7 @@
 ```yaml
 ---
 name: production-microservice-deployment
-description: Production-ready guide for deploying a microservice using a blue-green deployment strategy, based on real-world execution and lessons learned.
+description: Production-ready guide for deploying microservices using blue-green strategy with validated infrastructure, monitoring, and rollback safeguards.
 version: 0.3.0
 status: Evolving
 ---
@@ -9,148 +9,149 @@ status: Evolving
 
 # Production Microservice Deployment with Blue-Green Strategy
 
-## Introduction
+This guide provides a battle-tested procedure for safely deploying microservices in production using the blue-green deployment strategy. It is intended for use when zero-downtime rollouts, risk mitigation under real traffic, and rapid recovery are required. The steps are derived from real deployment outcomes across three iterations, including two failures and one successful cutover.
 
-This guide provides a field-validated procedure for safely deploying a microservice in production using the blue-green deployment strategy. It is intended for use when minimizing downtime and enabling rapid rollback are critical. The steps and checks herein are derived from three iterative deployment attempts, incorporating observed failures and optimizations.
-
-Use this guide during scheduled production releases where service continuity and observability are required.
+Use this guide when:
+- Deploying versioned services with stateless workloads
+- Database schema and query performance have been pre-validated
+- Monitoring, alerting, and rollback tooling are active
+- Traffic shifting is managed via service mesh or load balancer
 
 ---
 
 ## Deployment Context
 
-- **Strategy**: Blue-green deployment via Kubernetes `Service` selector switch  
-- **Environment**: Kubernetes (v1.25+), Helm-managed workloads, Istio ingress  
-- **Goals**:
-  - Zero-downtime deployment
-  - Sub-5-minute rollback if failure detected
-  - Full observability during transition
-  - Minimal impact on downstream consumers
+**Strategy**: Blue-green deployment with incremental traffic shift (10% → 100%) over 36 minutes  
+**Environment**: Kubernetes-based platform with service mesh routing control  
+**Target Service**: `recommendation-service` v2.5.0  
+**Goals**:
+- Achieve zero-downtime cutover
+- Validate performance under real production load
+- Stay within SLOs (P99 latency < 500ms, error rate < 0.5%)
+- Maintain database stability under dual-environment load
 
 ---
 
 ## Pre-Deployment Checklist
 
-### Database
-- [ ] **Verify schema compatibility** with new version — *Critical*  
-  Run: `helm test db-checks --namespace=db`
-- [ ] Confirm migration scripts are idempotent and version-tagged
+### ✅ Database
+- [CRITICAL] Confirm database `max_connections` supports combined blue/green load (minimum 250 for this service)  
+- [CRITICAL] Verify all new queries have appropriate indexes; validate existence of `idx_user_segment`  
+- [ ] Ensure per-pod connection pool size is adjusted to prevent exhaustion (e.g., HikariCP `maximumPoolSize`)  
+- [ ] Confirm staging dataset mirrors production scale (≥50M rows for key tables)
 
-### Monitoring & Observability
-- [ ] **Ensure metrics endpoints are enabled** in new image — *Critical*  
-  Check: `/metrics` returns 200 in staging
-- [ ] Validate Prometheus scrape config includes new pod labels
-- [ ] Set up dashboard panels for latency, error rate, and request volume per color
+### ✅ Monitoring & Alerting
+- [CRITICAL] Active alerts on database connection usage (>80% threshold)  
+- [CRITICAL] Query latency monitoring enabled for high-impact endpoints  
+- [ ] P99 latency, error rate, and request volume dashboards accessible in real time  
+- [ ] Rollback trigger thresholds defined (see Rollback Procedure)
 
-### Networking
-- [ ] Confirm Istio virtual service routes do not override color selectors
-- [ ] Verify readiness/liveness probes are tuned for startup time (new version may be slower)
+### ✅ Infrastructure
+- [ ] New environment (green) deployed and health-checked  
+- [ ] Blue environment remains fully operational and stable  
+- [ ] Routing controller ready for incremental traffic shifts  
 
-### Rollback Readiness
-- [ ] **Pre-stage rollback script** with known-good revision — *Critical*  
-  Store: `rollback-v2.1.0.yaml` in secure location
-- [ ] Confirm `kubectl` context points to production cluster
+### ✅ Testing & Validation
+- [CRITICAL] Full-capacity integration test completed under dual-blue-green load  
+- [ ] Performance testing executed with production-scale data volume  
+- [ ] Indexing and query plan review performed for all new database access patterns  
 
 ---
 
 ## Deployment Procedure
 
-1. **Deploy Green Instance (inactive)**
+1. **Deploy Green Environment**
    ```bash
-   helm upgrade --install mysvc-green ./charts/microservice \
-     --namespace services \
-     --set replicaCount=3 \
-     --set image.tag=v2.2.0 \
-     --set service.name=mysvc-green
+   kubectl apply -f recommendation-service-v2.5.0.yaml
    ```
+   Wait for all pods to reach `Running` and pass readiness checks.
 
-2. **Wait for Pod Readiness**
-   ```bash
-   kubectl wait --for=condition=ready pod -l app=mysvc,version=v2.2.0 -n services --timeout=180s
-   ```
+2. **Validate Health**
+   - Confirm logs show clean startup
+   - Verify `/health` endpoint returns 200
+   - Check metrics: no errors, CPU/MEM within expected range
 
-3. **Run Smoke Tests Against Green**
+3. **Begin Incremental Traffic Shift**
+   Apply traffic weights via service mesh (example using Istio):
    ```bash
-   curl -H "x-bypass-router: green" http://mysvc.prod.svc.cluster.local/health
-   # Expected: 200 OK + "green", no errors in logs
+   # 10% to green
+   istioctl traffic-shift set --to green --weight 10
+   sleep 300  # Monitor for 5 minutes
    ```
 
-4. **Switch Traffic: Blue → Green**
-   Update service selector to point to green version:
+4. **Monitor at Each Stage**
+   After each shift, wait 5–10 minutes and verify:
+   - P99 latency < 500ms
+   - Error rate < 0.5%
+   - Database active connections < 80% of max
+   - No alerts firing
+
+   Continue shifting:
    ```bash
-   kubectl patch svc mysvc -n services -p '{"spec": {"selector": {"version": "v2.2.0"}}}'
+   istioctl traffic-shift set --to green --weight 25
+   sleep 300
+
+   istioctl traffic-shift set --to green --weight 50
+   sleep 600
+
+   istioctl traffic-shift set --to green --weight 75
+   sleep 600
+
+   istioctl traffic-shift set --to green --weight 100
    ```
 
-5. **Monitor Transition (First 5 Minutes)**
-   - Watch for:
-     - Error rate > 1% (via Grafana or `kubectl logs`)
-     - Latency increase > 2x baseline
-     - Drop in request volume (consumer breakage)
-   - Use:
-     ```bash
-     kubectl top pods -n services -l app=mysvc
-     ```
-
-6. **Confirm Stability**
-   - Sustained health for 10 minutes
-   - No alerts triggered
-   - Tracing shows full request flow
+5. **Cutover Complete**
+   - Confirm full traffic on green (1500 req/s observed in success case)
+   - Average latency: 136ms, P99: 216ms, error rate: 0.2%
 
 ---
 
 ## Rollback Procedure
 
-### When to Roll Back
-- HTTP 5xx error rate > 2% sustained over 2 minutes
-- Latency P95 > 1.5x baseline for 3+ minutes
-- Database connection pool exhaustion observed
-- Downstream services report failures
+**Trigger Rollback If**:
+- P99 latency > 500ms for >2 minutes
+- Error rate > 0.5% sustained
+- Database connection usage hits 90%
+- Any critical alert fires during shift
 
-### Rollback Steps
-1. **Immediately reroute traffic to blue (known stable):**
-   ```bash
-   kubectl patch svc mysvc -n services -p '{"spec": {"selector": {"version": "v2.1.0"}}}'
-   ```
-
-2. **Verify blue instance health:**
-   ```bash
-   kubectl get pods -n services -l app=mysvc,version=v2.1.0
-   ```
+**Execute Immediate Rollback**:
+```bash
+istioctl traffic-shift set --to blue --weight 100
+```
 
-3. **Expected Recovery Time**: < 4 minutes from rollback initiation to full restoration.
+**Expected Recovery Time**: ≤1.5 minutes  
+**Post-Rollback Actions**:
+- Preserve logs and metrics for root cause analysis
+- Disable green environment if not needed for debugging
+- Update incident log with timestamp, metrics, and rollback reason
 
 ---
 
 ## Common Pitfalls & Solutions
 
-| Issue | Root Cause | Symptom | Solution |
-|------|-----------|--------|----------|
-| Green pods crash after deploy | Missing config map mount | CrashLoopBackOff in logs | Explicitly declare all configMaps in Helm values |
-| Service selector fails to switch | Misaligned pod labels | No traffic to green | Double-check label selectors in deployment vs service |
-| High latency post-switch | Cold cache in new service | P95 spikes at switchover | Warm caches via pre-load job before cutover |
-| Rollback fails due to blue scale-down | Auto-scaler terminated old pods | No healthy blue pods | Keep blue instance alive for 15 min post-switch |
+| Issue | Symptom | Root Cause | Solution |
+|------|--------|-----------|----------|
+| Database connection exhaustion | 5xx errors during 50% shift, "too many connections" logs | `max_connections=100` insufficient for dual environments | Increase limit to 250; reduce per-pod pool size |
+| Latency spike at 75% traffic | P99 jumps to 780ms, SLO violation | Missing `idx_user_segment`, full table scan on 50M-row table | Create index; test with production-scale data |
+| No alert on connection usage | Failure undetected until user impact | Missing monitoring on DB connection pool | Add Prometheus/Grafana alert at 80% threshold |
+| Staging test passed but prod failed | No issues in staging, failure in production | Staging dataset too small (5M vs 50M rows) | Mirror production data scale in staging |
 
 ---
 
 ## Best Practices
 
-- **Keep both blue and green active during monitoring window** (min 15 minutes)  
-- **Automate smoke tests** — run post-deploy, pre-cutover  
-- **Tag images immutably** — never reuse `latest`  
-- **Time deployments outside peak hours** — target 02:00–04:00 UTC  
-- **Expected timeline**:
-  - Deploy green: 2 min
-  - Wait & test: 3 min
-  - Cutover: 1 min
-  - Monitor: 10 min
-  - Total: ~16 minutes
+- **Traffic Shifting**: Use conservative increments (10% → 100%) over ≥30 minutes to allow observation
+- **Monitoring**: Focus on P99 latency, error rate, and database connection count—these were leading indicators
+- **Testing**: Always run performance tests with production-scale datasets and query patterns
+- **Validation**: Enforce mandatory pre-deployment checklist including indexing and capacity review
+- **Timeline**: Allow 36+ minutes for full rollout with monitoring pauses; rollback completes in <2 minutes
 
 ---
 
 ## Key Takeaways
 
-1. **Label consistency is critical** — mismatched selectors cause silent failures.
-2. **Never assume backward compatibility** — always verify DB/schema interoperability.
-3. **Rollback must be faster than detection** — automate the switch.
-4. **Observability starts before cutover** — monitor green *before* routing traffic.
-5. **Human error is the largest risk** — use pre-checked scripts, not CLI guesswork.
\ No newline at end of file
+1. **Database capacity must account for peak deployment states**—blue-green requires double the normal load capacity.
+2. **Production-scale testing is non-negotiable**—small datasets hide scalability bugs like missing indexes.
+3. **Connection pools and infrastructure limits must be proactively monitored and alerted**—silent exhaustion causes outages.
+4. **Incremental traffic shifting with staged validation enables safe rollout**—real-load testing catches what synthetic tests miss.
+5. **Lessons must become checklists**—operationalize fixes (index reviews, pool sizing) to prevent recurrence.
\ No newline at end of file
diff --git a/examples/output/skill_example_lazyllm/skill.md b/examples/output/skill_example_lazyllm/skill.md
index ef640a2d..cb48451c 100644
--- a/examples/output/skill_example_lazyllm/skill.md
+++ b/examples/output/skill_example_lazyllm/skill.md
@@ -1,150 +1,174 @@
-```markdown
+```yaml
 ---
 name: production-microservice-deployment
-description: Production-ready guide for deploying a microservice using blue-green deployment strategy with zero-downtime, validated monitoring, and rapid rollback capability.
+description: Production-ready guide for deploying microservices using blue-green strategy, based on real-world failure and success patterns.
 version: 1.0.0
 status: Production-Ready
 ---
+```
 
 # Production Microservice Deployment with Blue-Green Strategy
 
 ## Introduction
 
-This guide provides a battle-tested procedure for safely deploying a microservice to production using the blue-green deployment strategy. It ensures zero downtime, enables immediate rollback on failure, and integrates real-time validation via observability tools. Use this guide for any stateless microservice upgrade in Kubernetes-based environments where service continuity is critical.
+This guide provides a battle-tested, step-by-step procedure for safely deploying a microservice to production using the blue-green deployment strategy. It is designed for use when minimizing downtime and risk during version upgrades is critical. The procedures, checks, and thresholds are derived from actual deployment attempts—two failures and one successful rollout—of the `recommendation-service` v2.5.0.
+
+Use this guide for any stateful or database-dependent microservice where traffic shifting must account for infrastructure capacity, performance under load, and safe rollback readiness.
+
+---
 
 ## Deployment Context
 
-- **Strategy**: Blue-green deployment  
-- **Environment**: Kubernetes (EKS), AWS infrastructure, Istio service mesh  
+- **Strategy**: Blue-green deployment with incremental traffic shifting (10% → 100%)
+- **Environment**: Kubernetes-based production cluster with external PostgreSQL database
+- **Traffic Management**: Service mesh (e.g., Istio) or ingress controller managing traffic split
 - **Goals**:
-  - Zero downtime during deployment
-  - Traffic switch within 30 seconds
-  - Full observability during cutover
-  - Rollback within 2 minutes if thresholds breached
-  - Minimal blast radius
+  - Zero-downtime cutover
+  - Validation of performance under real user load
+  - Immediate rollback capability if SLOs are violated
+  - Full operational hygiene post-cutover
+
+---
 
 ## Pre-Deployment Checklist
 
-### Infrastructure & Configuration
-- [x] **(Critical)** New blue environment (v2) pods are running and ready (`kubectl get pods -l app=<service>,version=v2`)
-- [x] **(Critical)** All secrets and configmaps mounted correctly in v2 pods
-- [x] Readiness and liveness probes configured and passing for v2
+> ✅ All items must be verified before initiating deployment.
 
 ### Database
-- [x] **(Critical)** Schema migrations are backward-compatible and applied *before* deployment
-- [x] No pending data backfills or long-running jobs blocking cutover
-
-### Monitoring & Observability
-- [x] **(Critical)** Prometheus metrics endpoints exposed and scraped for v2
-- [x] Grafana dashboards updated to include v2 version filtering
-- [x] Alertmanager rules evaluate both v1 and v2 independently
-- [x] Distributed tracing (Jaeger) enabled for service mesh traffic
+- [CRITICAL] Confirm database `max_connections` supports combined blue + green load  
+  → *Increase from 100 to 250 if necessary*
+- [CRITICAL] Validate all new query patterns have required indexes  
+  → *Ensure `idx_user_segment` exists on `user_segment` column*
+- [CRITICAL] Verify staging dataset size mirrors production (e.g., 50M rows) to detect scalability issues
+- Adjust per-pod connection pool size to prevent exhaustion under dual-environment traffic
+
+### Monitoring & Alerts
+- [CRITICAL] Ensure monitoring is enabled for:
+  - Database active connections
+  - Query latency (P99) for key endpoints
+  - HTTP error rates and request volume
+- Confirm alerts are configured to trigger on:
+  - P99 latency > 500ms (SLO threshold)
+  - Connection pool saturation (>80% of max)
+  - Error rate > 1%
+
+### Testing & Validation
+- [CRITICAL] Complete full-capacity integration test simulating blue-green state
+- Run production-scale load test with realistic query patterns
+- Review all database schema changes and indexing decisions in PR
+
+### Operational Readiness
+- Confirm rollback path is tested and executable within 2 minutes
+- Verify deployment checklist is integrated into CI/CD pipeline gates
+- Ensure logging and tracing are aligned across both environments
 
-### Traffic Management
-- [x] Istio VirtualService configured with named subsets (`blue`/`green`)  
-- [x] Initial traffic weight set to 0% for new version (blue)
-
-### Validation
-- [x] Smoke test suite available and passes against staging
-- [x] Synthetic health check endpoint (`/live` and `/ready`) accessible and returning 200
+---
 
 ## Deployment Procedure
 
-1. **Deploy v2 Artifacts**
+1. **Deploy Green Environment**
    ```bash
-   kubectl apply -f deploy/v2-deployment.yaml
-   kubectl apply -f deploy/service.yaml
+   kubectl apply -f recommendation-service-v2.5.0.yaml
    ```
+   Wait for all pods to reach `Running` and pass readiness probes.
 
-2. **Wait for Pod Readiness**
+2. **Initialize Traffic at 10%**
    ```bash
-   kubectl wait --for=condition=ready pod -l app=<service>,version=v2 --timeout=180s
+   istioctl traffic-split set --namespace prod --green-weight 10 --blue-weight 90
    ```
+   Monitor for 5 minutes:
+   - Confirm no spike in errors or latency
+   - Check database connections: must remain <40% of max
 
-3. **Apply Istio Traffic Shift (100% to Blue)**
+3. **Shift to 25% Traffic**
    ```bash
-   kubectl apply -f istio/virtualservice-blue.yaml
+   istioctl traffic-split set --namespace prod --green-weight 25
    ```
-   > `virtualservice-blue.yaml` sets traffic weight: blue=100, green=0
-
-4. **Monitor Key Metrics (First 5 Minutes)**
-   - HTTP 5xx rate < 0.5%
-   - P99 latency < 800ms
-   - Error logs per second < 2
-   - Circuit breaker open count = 0
-   - Use:
-     ```bash
-     kubectl top pods -l app=<service>,version=v2
-     ```
+   Monitor for 7 minutes:
+   - P99 latency ≤ 500ms
+   - Error rate ≤ 0.5%
+   - No alert triggers
 
-5. **Run Smoke Tests Against Live Endpoint**
+4. **Proceed to 50%, Then 75%**
    ```bash
-   ./scripts/smoke-test.sh https://<service>/health-check
+   istioctl traffic-split set --namespace prod --green-weight 50
+   # After 10 min stable →
+   istioctl traffic-split set --namespace prod --green-weight 75
    ```
+   At each stage:
+   - Watch for query degradation
+   - Confirm connection usage remains under 70%
 
-6. **Confirm Stability (10-Minute Hold)**
-   - Watch dashboards continuously
-   - Verify no alerts triggered
-   - Confirm user transaction traces succeed
-
-7. **Promote v2 to Production Label**
+5. **Cutover to 100%**
    ```bash
-   kubectl label deployment <service>-v2 env=prod --overwrite
+   istioctl traffic-split set --namespace prod --green-weight 100
    ```
+   Final validation:
+   - Sustained load: ≥1500 req/s
+   - Average latency ≤ 140ms, P99 ≤ 250ms
+   - Error rate ≤ 0.3%
+   - DB connection usage ≤ 50%
+
+6. **Post-Cutover Actions**
+   - Decommission blue environment
+     ```bash
+     kubectl delete -f recommendation-service-v2.4.0.yaml
+     ```
+   - Activate continuous monitoring dashboard
+   - Record deployment outcome and lessons in runbook
+
+---
 
 ## Rollback Procedure
 
-### When to Rollback
-Rollback immediately if **any** of the following occur:
-- HTTP 5xx rate ≥ 5% sustained over 2 minutes
-- P99 latency > 2s for 3 consecutive minutes
-- Smoke test fails
-- Critical alert fires (e.g., DB connection pool exhaustion)
+### Trigger Conditions (Rollback Immediately If):
+- P99 latency > 500ms for >2 minutes
+- Error rate > 1% sustained over 3 minutes
+- Database connection errors observed
+- Any alert on connection pool saturation
 
-### Steps
+### Execute Rollback
+```bash
+istioctl traffic-split set --namespace prod --blue-weight 100 --green-weight 0
+```
+→ Revert traffic fully to stable blue version.
 
-1. **Revert Traffic to Green (v1)**
-   ```bash
-   kubectl apply -f istio/virtualservice-green.yaml
-   ```
-   > Switches 100% traffic back to stable v1
+### Expected Outcome
+- Service stability restored within **≤1.5 minutes**
+- Error rate returns to baseline
+- Latency normalizes to pre-deployment levels
 
-2. **Verify Rollback Success**
-   ```bash
-   kubectl get virtualservice <service> -o jsonpath='{.spec.http[0].route}'
-   # Output should show green subset at 100%
-   ```
+Post-rollback:
+- Preserve logs and metrics for root cause analysis
+- Halt further deployments until remediation complete
 
-3. **Monitor Recovery**
-   - Expected recovery time: ≤ 2 minutes
-   - Confirm metrics return to baseline
-   - Ensure no cascading failures in dependent services
+---
 
 ## Common Pitfalls & Solutions
 
-| Issue | Root Cause | Symptom | Solution |
-|------|-----------|--------|----------|
-| 5xx spike after cutover | Missing CORS headers in v2 | Clients blocked | Revert; add `Access-Control-Allow-Origin` header |
-| Pods stuck in `CrashLoopBackOff` | Incorrect secret mount path | Container exits with code 1 | Check `kubectl describe pod`, verify volumeMount paths match |
-| Latency degradation | Unindexed query introduced | DB CPU > 85%, slow traces | Rollback; add index; retest in staging |
-| Partial rollout due to mislabeled pods | Version label typo in YAML | Some traffic routed incorrectly | Fix labels; redeploy; validate with `kubectl get pods -L version` |
+| Issue | Symptom | Root Cause | Solution |
+|------|--------|-----------|----------|
+| Database connection exhaustion | Errors during 50% shift, "too many connections" logs | `max_connections=100` insufficient for dual environments | Increase limit to 250; reduce per-pod pool size |
+| Latency spike at 75% traffic | P99 jumps to 780ms, SLO breach | Missing index on `user_segment`, full table scan on 50M rows | Create `idx_user_segment`; validate all queries |
+| No early warning | No alerts before rollback | Missing monitoring on connection count and query latency | Add alerts for DB connections (>80%) and P99 (>400ms) |
+| Staging environment false confidence | Performance fine in staging | Data volume too small (5M vs 50M) | Mirror production data scale in staging |
+
+---
 
 ## Best Practices
 
-- **Always test blue-green failover weekly** in pre-prod using automation
-- **Use canary first**: Route 1% of production traffic to v2 before full blue-green
-- **Automate smoke tests** as part of CI/CD pipeline
-- **Keep both versions running for 1 hour post-cutover** before scaling down v1
-- **Expected timeline**:
-  - Deployment: 4 minutes
-  - Monitoring window: 10 minutes
-  - Total execution: ≤ 15 minutes
+- **Traffic Shifting**: Use conservative increments (10% → 25% → 50% → 75% → 100%) with monitoring pauses
+- **Validation Window**: Minimum 5–10 minutes per stage depending on traffic ramp
+- **Monitoring Focus**: Prioritize database-level metrics and end-to-end latency
+- **Timeline**: Allow 30–40 minutes for full cutover including observation periods
+- **Checklist Enforcement**: Integrate pre-deployment validations into CI/CD approval gates
+
+---
 
 ## Key Takeaways
 
-1. Backward-compatible schema changes are non-negotiable — always deploy DB changes ahead of application updates.
-2. Misconfigured Istio subsets cause partial outages — validate routing rules with dry-run checks.
-3. Real-time observability is essential — without live dashboards, you’re flying blind during cutover.
-4. Automated smoke tests catch integration issues missed in staging.
-5. Rollback speed determines incident impact — practice it like a fire drill.
-```
\ No newline at end of file
+1. **Database capacity must account for peak deployment states** — blue-green requires double the normal load capacity; validate `max_connections` and pool sizing upfront.
+2. **Performance testing must use production-scale datasets** — staging with 10% data volume will not catch full-table-scan bottlenecks.
+3. **Indexing is a deployment gate** — every new query pattern must be reviewed and indexed before release.
+4. **Monitoring must cover infrastructure dependencies** — track database connections, query latency, and pool utilization as first-class signals.
+5. **Safe deployment is procedural** — gradual traffic shifts, staged validation, and rollback readiness enable recovery from unforeseen issues without user impact.
\ No newline at end of file
diff --git a/src/memu/app/service.py b/src/memu/app/service.py
index 01f99d81..f2b1cc4e 100644
--- a/src/memu/app/service.py
+++ b/src/memu/app/service.py
@@ -24,7 +24,6 @@
 from memu.database.factory import build_database
 from memu.database.interfaces import Database
 from memu.llm.http_client import HTTPLLMClient
-from memu.llm.lazyllm_client import LazyLLMClient
 from memu.llm.wrapper import (
     LLMCallMetadata,
     LLMClientWrapper,
@@ -117,11 +116,16 @@ def _init_llm_client(self, config: LLMConfig | None = None) -> Any:
                 embed_model=cfg.embed_model,
             )
         elif backend == "lazyllm_backend":
+            from memu.llm.lazyllm_client import LazyLLMClient
+
             return LazyLLMClient(
-                source=cfg.source,
+                llm_source=cfg.llm_source,
+                vlm_source=cfg.vlm_source,
+                embed_source=cfg.embed_source,
+                stt_source=cfg.stt_source,
                 chat_model=cfg.chat_model,
-                vlm_model=cfg.vlm_model,
                 embed_model=cfg.embed_model,
+                vlm_model=cfg.vlm_model,
                 stt_model=cfg.stt_model,
                 api_key=cfg.api_key,
             )
diff --git a/src/memu/app/settings.py b/src/memu/app/settings.py
index 0487763e..817ff595 100644
--- a/src/memu/app/settings.py
+++ b/src/memu/app/settings.py
@@ -99,10 +99,13 @@ class LLMConfig(BaseModel):
     chat_model: str = Field(default="gpt-4o-mini")
     client_backend: str = Field(
         default="sdk",
-        description="Which LLM client backend to use: 'httpx' (httpx) , 'sdk' (official OpenAI) or 'lazyllm_backend'",
+        description="Which LLM client backend to use: 'httpx' (httpx) , 'sdk' (official OpenAI) or 'lazyllm_backend'(for more LLM source like Qwen, Doubao, SIliconflow, etc.)",
     )
     # setup LazyLLM 
-    source: str = Field(default="qwen", description="LLM source for lazyllm backend")
+    llm_source: str | None = Field(default=None, description="LLM source for lazyllm backend")
+    vlm_source: str | None = Field(default=None, description="VLM source for lazyllm backend")
+    embed_source: str | None = Field(default=None, description="Embedding source for lazyllm backend")
+    stt_source: str | None = Field(default=None, description="STT source for lazyllm backend")
     vlm_model: str = Field(default="qwen-vl-plus", description="Vision language model for lazyllm")
     stt_model: str = Field(default="qwen-audio-turbo", description="Speech-to-text model for lazyllm")
     endpoint_overrides: dict[str, str] = Field(
diff --git a/src/memu/llm/lazyllm_client.py b/src/memu/llm/lazyllm_client.py
index 4a881a16..50b6b74b 100644
--- a/src/memu/llm/lazyllm_client.py
+++ b/src/memu/llm/lazyllm_client.py
@@ -8,7 +8,7 @@
 class LazyLLMClient:
     DEFAULT_SOURCE = 'qwen'
     DEFAULT_MODELS = {
-            'llm': 'qwen-plus',
+            'llm': 'qwen3-max',
             'vlm': 'qwen-vl-plus',
             'embed': 'text-embedding-v3',
             'stt': 'qwen-audio-turbo',
@@ -16,14 +16,20 @@ class LazyLLMClient:
 
     def __init__(self,
                   *,
-                source: str = None,
+                llm_source: str = None,
+                vlm_source: str = None,
+                embed_source: str = None,
+                stt_source: str = None,
                 chat_model: str = None,
                 vlm_model: str = None,
                 embed_model: str = None,
                 stt_model: str = None,
-                api_key: str = None
+                api_key: str = None,
             ):
-        self.source = source or self.DEFAULT_SOURCE
+        self.llm_source = llm_source or self.source
+        self.vlm_source = vlm_source or self.source
+        self.embed_source = embed_source or self.source
+        self.stt_source = stt_source or self.source
         self.chat_model = chat_model or self.DEFAULT_MODELS['llm']
         self.vlm_model = vlm_model or self.DEFAULT_MODELS['vlm']
         self.embed_model = embed_model or self.DEFAULT_MODELS['embed']
@@ -45,10 +51,10 @@ async def summarize(
                         max_tokens: int | None = None,
                         system_prompt: str | None = None,
                     ) -> str:
-        client = lazyllm.OnlineModule(source=self.source, model=self.chat_model, type='llm')
+        client = lazyllm.OnlineModule(source=self.llm_source, model=self.chat_model, type='llm', api_key=self.api_key)
         prompt = system_prompt or 'Summarize the text in one short paragraph.'
         full_prompt = f'{prompt}\n\ntext:\n{text}'
-        LOG.debug(f'Summarizing text with {self.source}/{self.chat_model}')
+        LOG.debug(f'Summarizing text with {self.llm_source}/{self.chat_model}')
         response = await self._call_async(client, full_prompt)
         return response
             
@@ -60,14 +66,10 @@ async def vision(
                     max_tokens: int | None = None,
                     system_prompt: str | None = None,
                 ) -> tuple[str, Any]:
-        client = lazyllm.OnlineModule(source=self.source, model=self.vlm_model, type='vlm')
-        # Combine system_prompt and prompt if system_prompt exists
-        full_prompt = prompt
-        if system_prompt:
-            full_prompt = f'{system_prompt}\n\n{prompt}'
-        LOG.debug(f'Processing image with {self.source}/{self.vlm_model}: {image_path}')
+        client = lazyllm.OnlineModule(source=self.vlm_source, model=self.vlm_model, type='vlm', api_key=self.api_key)
+        LOG.debug(f'Processing image with {self.vlm_source}/{self.vlm_model}: {image_path}')
         # LazyLLM VLM accepts prompt as first positional argument and image_path as keyword argument
-        response = await self._call_async(client, full_prompt, image_path=image_path)
+        response = await self._call_async(client, prompt, image_path=image_path)
         return response, None
 
     async def embed(
@@ -75,8 +77,9 @@ async def embed(
                     texts: list[str],
                     batch_size: int = 10, # optional
                 ) -> list[list[float]]:
-        client = lazyllm.OnlineModule(source=self.source, model=self.embed_model, type='embed')
-        LOG.debug(f'embed {len(texts)} texts with {self.source}/{self.embed_model}')
+        client = lazyllm.OnlineModule(source=self.embed_source, model=self.embed_model, type='embed', 
+                                        batch_size=batch_size, api_key=self.api_key)
+        LOG.debug(f'embed {len(texts)} texts with {self.embed_source}/{self.embed_model}')
         response = await self._call_async(client, texts)
         return response
 
@@ -86,8 +89,8 @@ async def transcribe(
                         language: str | None = None,
                         prompt: str | None = None,
                     ) -> str:
-        client = lazyllm.OnlineModule(source=self.source, model=self.stt_model, type='stt')
-        LOG.debug(f'Transcribing audio with {self.source}/{self.stt_model}: {audio_path}')
+        client = lazyllm.OnlineModule(source=self.stt_source, model=self.stt_model, type='stt', api_key=self.api_key)
+        LOG.debug(f'Transcribing audio with {self.stt_source}/{self.stt_model}: {audio_path}')
         response = await self._call_async(client, audio_path)
         return response
     
\ No newline at end of file

From 1553fa107748f7a4b9c38acaddbfd8988f99463b Mon Sep 17 00:00:00 2001
From: unknown <sunhao3@sensetime.com>
Date: Wed, 21 Jan 2026 16:16:52 +0800
Subject: [PATCH 04/14] add namespace feature

---
 tests/test_lazyllm.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/tests/test_lazyllm.py b/tests/test_lazyllm.py
index 81863f46..2ac2b22b 100644
--- a/tests/test_lazyllm.py
+++ b/tests/test_lazyllm.py
@@ -10,6 +10,7 @@
 import asyncio
 import os
 import sys
+import lazyllm
 
 # Add src to sys.path
 src_path = os.path.abspath("src")
@@ -25,19 +26,18 @@ async def test_lazyllm_client():
     print("=" * 60)
     
     # Get API key from environment
-    api_key = os.getenv("LAZYLLM_API_KEY")
+    lazyllm.config.add("qwen_api_key", str, env="QWEN_API_KEY", description="Qwen API Key")
+    with lazyllm.config.namespace("MEMU"):
+        api_key = lazyllm.config['qwen_api_key']
     if not api_key:
-        print("❌ Error: Please set LAZYLLM_API_KEY environment variable")
-        print("   export LAZYLLM_API_KEY=your_api_key")
-        return False
+        msg = "Please set MEMU_QWEN_API_KEY environment variable"
+        raise ValueError(msg)
     
     print(f"✓ API key found: {api_key[:20]}...")
-    
-    # Initialize client
     try:
         client = LazyLLMClient(
             source="qwen",
-            chat_model="qwen-plus",
+            chat_model="qwen3-max",
             vlm_model="qwen-vl-plus",
             embed_model="text-embedding-v3",
             stt_model="qwen-audio-turbo",

From eee52430a8cd4abaf9cf191a813fcedf984a94db Mon Sep 17 00:00:00 2001
From: unknown <sunhao3@sensetime.com>
Date: Fri, 23 Jan 2026 11:32:50 +0800
Subject: [PATCH 05/14] fix bug / merge examples

---
 ...mple_4_conversation_memory_with_lazyllm.py | 145 ---------
 ...example_5_skill_extraction_with_lazyllm.py | 304 ------------------
 examples/example_5_with_lazyllm_client.py     | 244 ++++++++++++++
 ...xample_6_multimodal_memory_with_lazyllm.py | 186 -----------
 .../activities.md                             |   7 -
 .../experiences.md                            |   1 -
 .../conversation_example_lazyllm/goals.md     |   7 -
 .../conversation_example_lazyllm/habits.md    |   6 -
 .../conversation_example_lazyllm/knowledge.md |   1 -
 .../conversation_example_lazyllm/opinions.md  |   1 -
 .../personal_info.md                          |   3 -
 .../preferences.md                            |   7 -
 .../relationships.md                          |   1 -
 .../conversation_example_lazyllm/work_life.md |   6 -
 .../architecture_concepts.md                  |   5 -
 .../best_practices.md                         |   5 -
 .../code_examples.md                          |   5 -
 .../technical_documentation.md                |   5 -
 .../visual_diagrams.md                        |   5 -
 .../output/skill_example_lazyllm/log_1.md     | 152 ---------
 .../output/skill_example_lazyllm/log_2.md     | 162 ----------
 .../output/skill_example_lazyllm/log_3.md     | 157 ---------
 .../output/skill_example_lazyllm/skill.md     | 174 ----------
 src/memu/app/service.py                       |   9 +-
 src/memu/app/settings.py                      |   1 +
 src/memu/llm/lazyllm_client.py                |  20 +-
 tests/test_lazyllm.py                         |  10 +-
 27 files changed, 259 insertions(+), 1370 deletions(-)
 delete mode 100644 examples/example_4_conversation_memory_with_lazyllm.py
 delete mode 100644 examples/example_5_skill_extraction_with_lazyllm.py
 create mode 100644 examples/example_5_with_lazyllm_client.py
 delete mode 100644 examples/example_6_multimodal_memory_with_lazyllm.py
 delete mode 100644 examples/output/conversation_example_lazyllm/activities.md
 delete mode 100644 examples/output/conversation_example_lazyllm/experiences.md
 delete mode 100644 examples/output/conversation_example_lazyllm/goals.md
 delete mode 100644 examples/output/conversation_example_lazyllm/habits.md
 delete mode 100644 examples/output/conversation_example_lazyllm/knowledge.md
 delete mode 100644 examples/output/conversation_example_lazyllm/opinions.md
 delete mode 100644 examples/output/conversation_example_lazyllm/personal_info.md
 delete mode 100644 examples/output/conversation_example_lazyllm/preferences.md
 delete mode 100644 examples/output/conversation_example_lazyllm/relationships.md
 delete mode 100644 examples/output/conversation_example_lazyllm/work_life.md
 delete mode 100644 examples/output/multimodal_example_lazyllm/architecture_concepts.md
 delete mode 100644 examples/output/multimodal_example_lazyllm/best_practices.md
 delete mode 100644 examples/output/multimodal_example_lazyllm/code_examples.md
 delete mode 100644 examples/output/multimodal_example_lazyllm/technical_documentation.md
 delete mode 100644 examples/output/multimodal_example_lazyllm/visual_diagrams.md
 delete mode 100644 examples/output/skill_example_lazyllm/log_1.md
 delete mode 100644 examples/output/skill_example_lazyllm/log_2.md
 delete mode 100644 examples/output/skill_example_lazyllm/log_3.md
 delete mode 100644 examples/output/skill_example_lazyllm/skill.md

diff --git a/examples/example_4_conversation_memory_with_lazyllm.py b/examples/example_4_conversation_memory_with_lazyllm.py
deleted file mode 100644
index 4c56d2de..00000000
--- a/examples/example_4_conversation_memory_with_lazyllm.py
+++ /dev/null
@@ -1,145 +0,0 @@
-"""
-Example 4: Multiple Conversations -> Memory Category File with LazyLLM Backend
-
-This example demonstrates how to process multiple conversation files
-and generate a memory category JSON file using the LazyLLM backend.
-
-Usage:
-    export MEMU_QWEN_API_KEY=your_api_key
-    python examples/example_4_conversation_memory_with_lazyllm.py
-"""
-
-import asyncio
-import os
-import sys
-from pathlib import Path
-import lazyllm
-
-# Add src to sys.path FIRST before importing memu
-project_root = Path(__file__).parent.parent
-src_path = str(project_root / "src")
-if src_path not in sys.path:
-    sys.path.insert(0, src_path)
-from memu.app import MemoryService
-
-# Add src to sys.path
-src_path = os.path.abspath("src")
-sys.path.insert(0, src_path)
-
-
-async def generate_memory_md(categories, output_dir):
-    """Generate concise markdown files for each memory category."""
-
-    os.makedirs(output_dir, exist_ok=True)
-
-    generated_files = []
-
-    for cat in categories:
-        name = cat.get("name", "unknown")
-        summary = cat.get("summary", "")
-
-        filename = f"{name}.md"
-        filepath = os.path.join(output_dir, filename)
-
-        with open(filepath, "w", encoding="utf-8") as f:
-            # Title
-            # Content - concise version
-            if summary:
-                cleaned_summary = summary.replace("<content>", "").replace("</content>", "").strip()
-                f.write(f"{cleaned_summary}\n")
-            else:
-                f.write("*No content available*\n")
-
-        generated_files.append(filename)
-
-    return generated_files
-
-
-async def main():
-    """
-    Process multiple conversation files and generate memory categories using LazyLLM.
-
-    This example:
-    1. Initializes MemoryService with LazyLLM backend
-    2. Processes conversation JSON files
-    3. Extracts memory categories from conversations
-    4. Outputs the categories to files
-    """
-    print("Example 4: Conversation Memory Processing with LazyLLM Backend")
-    print("-" * 60)
-
-    lazyllm.config.add("qwen_api_key", str, env="QWEN_API_KEY", description="Qwen API Key")
-    with lazyllm.config.namespace("MEMU"):
-        api_key = lazyllm.config['qwen_api_key']
-    if not api_key:
-        msg = "Please set MEMU_QWEN_API_KEY environment variable"
-        raise ValueError(msg)
-    
-    # Initialize service with LazyLLM backend using llm_profiles
-    # The "default" profile is required and used as the primary LLM configuration
-    service = MemoryService(
-        llm_profiles={
-            "default": {
-                "client_backend": "lazyllm_backend",
-                "llm_source": "qwen",
-                "vlm_source": "qwen",
-                "embed_source": "qwen",
-                "stt_source": "qwen",
-                "chat_model": "qwen3-max",
-                "vlm_model":"qwen-vl-plus",
-                "embed_model": "text-embedding-v3",
-                "stt_model":"qwen-audio-turbo",
-                "api_key": api_key,
-            },
-        },
-    )
-
-    # Conversation files to process
-    conversation_files = [
-        "examples/resources/conversations/conv1.json",
-        "examples/resources/conversations/conv2.json",
-        "examples/resources/conversations/conv3.json",
-    ]
-
-    # Process each conversation
-    print("\nProcessing conversations with LazyLLM...")
-    total_items = 0
-    categories = []
-    for conv_file in conversation_files:
-        if not os.path.exists(conv_file):
-            print(f"⚠ File not found: {conv_file}")
-            continue
-
-        try:
-            print(f"  Processing: {conv_file}")
-            result = await service.memorize(resource_url=conv_file, modality="conversation")
-            total_items += len(result.get("items", []))
-            # Categories are returned in the result and updated after each memorize call
-            categories = result.get("categories", [])
-            print(f"    ✓ Extracted {len(result.get('items', []))} items")
-        except Exception as e:
-            print(f"  ✗ Error processing {conv_file}: {e}")
-            import traceback
-            traceback.print_exc()
-
-    # Write to output files
-    output_dir = "examples/output/conversation_example_lazyllm"
-    os.makedirs(output_dir, exist_ok=True)
-
-    # 1. Generate individual Markdown files for each category
-    generated_files = await generate_memory_md(categories, output_dir)
-
-    print(f"\n" + "=" * 60)
-    print(f"✓ Processed {len([f for f in conversation_files if os.path.exists(f)])} files")
-    print(f"✓ Extracted {total_items} total items")
-    print(f"✓ Generated {len(categories)} categories:")
-    for cat in categories:
-        print(f"  - {cat.get('name', 'unknown')}")
-    print(f"✓ Output files ({len(generated_files)}):")
-    for file in generated_files:
-        print(f"  - {os.path.join(output_dir, file)}")
-    print(f"✓ Output directory: {output_dir}/")
-
-
-if __name__ == "__main__":
-    asyncio.run(main())
diff --git a/examples/example_5_skill_extraction_with_lazyllm.py b/examples/example_5_skill_extraction_with_lazyllm.py
deleted file mode 100644
index d19ef368..00000000
--- a/examples/example_5_skill_extraction_with_lazyllm.py
+++ /dev/null
@@ -1,304 +0,0 @@
-"""
-Example 5: Workflow & Agent Logs -> Skill Extraction (with LazyLLM)
-
-This example demonstrates how to extract skills from workflow descriptions
-and agent runtime logs using LazyLLM backend, then output them to a Markdown file.
-
-Usage:
-    export LAZYLLM_QWEN_API_KEY=your_api_key
-    python examples/example_5_skill_extraction_with_lazyllm.py
-"""
-
-import asyncio
-import os
-import sys
-from pathlib import Path
-import lazyllm
-
-# Add src to sys.path FIRST before importing memu
-project_root = Path(__file__).parent.parent
-src_path = str(project_root / "src")
-if src_path not in sys.path:
-    sys.path.insert(0, src_path)
-from memu.app import MemoryService
-
-# Add src to sys.path
-src_path = os.path.abspath("src")
-sys.path.insert(0, src_path)
-
-
-async def generate_skill_md(
-    all_skills, service, output_file, attempt_number, total_attempts, categories=None, is_final=False
-):
-    """
-    Use LLM to generate a concise task execution guide (skill.md).
-
-    This creates a production-ready guide incorporating lessons learned from deployment attempts.
-    """
-
-    os.makedirs(os.path.dirname(output_file), exist_ok=True)
-
-    # Prepare context for LLM
-    skills_text = "\n\n".join([f"### From {skill_data['source']}\n{skill_data['skill']}" for skill_data in all_skills])
-
-    # Get category summaries if available
-    categories_text = ""
-    if categories:
-        categories_with_content = [cat for cat in categories if cat.get("summary") and cat.get("summary").strip()]
-        if categories_with_content:
-            categories_text = "\n\n".join([
-                f"**{cat.get('name', 'unknown')}**:\n{cat.get('summary', '')}" for cat in categories_with_content
-            ])
-
-    # Construct prompt for LLM
-    prompt = f"""Generate a concise production-ready task execution guide.
-
-**Context**:
-- Task: Production Microservice Deployment with Blue-Green Strategy
-- Progress: {attempt_number}/{total_attempts} attempts
-- Status: {"Complete" if is_final else f"v0.{attempt_number}"}
-
-**Skills Learned**:
-{skills_text}
-
-{f"**Categories**:\n{categories_text}" if categories_text else ""}
-
-**Required Structure**:
-
-1. **Frontmatter** (YAML):
-   - name: production-microservice-deployment
-   - description: Brief description
-   - version: {"1.0.0" if is_final else f"0.{attempt_number}.0"}
-   - status: {"Production-Ready" if is_final else "Evolving"}
-
-2. **Introduction**: What this guide does and when to use it
-
-3. **Deployment Context**: Strategy, environment, goals
-
-4. **Pre-Deployment Checklist**:
-   - Actionable checks from lessons learned
-   - Group by category (Database, Monitoring, etc.)
-   - Mark critical items
-
-5. **Deployment Procedure**:
-   - Step-by-step instructions with commands
-   - Include monitoring points
-
-6. **Rollback Procedure**:
-   - When to rollback (thresholds)
-   - Exact commands
-   - Expected recovery time
-
-7. **Common Pitfalls & Solutions**:
-   - Failures/issues encountered
-   - Root cause, symptoms, solution
-
-8. **Best Practices**:
-   - What works well
-   - Expected timelines
-
-9. **Key Takeaways**: 3-5 most important lessons
-
-**Style**:
-- Use markdown with clear hierarchy
-- Be specific and concise
-- Technical and production-grade tone
-- Focus on PRACTICAL steps
-
-**CRITICAL**:
-- ONLY use information from provided skills/lessons
-- DO NOT make assumptions or add generic advice
-- Extract ACTUAL experiences from the logs
-
-Generate the complete markdown document now:"""
-
-    # Use LazyLLM through MemoryService
-    system_prompt = "You are an expert technical writer creating concise, production-grade deployment guides from real experiences."
-    
-    full_prompt = f"{system_prompt}\n\n{prompt}"
-    generated_content = await service.llm_client.summarize(
-        text=full_prompt,
-        system_prompt=system_prompt,
-    )
-
-    # Write to file
-    with open(output_file, "w", encoding="utf-8") as f:
-        f.write(generated_content)
-
-    return True
-
-
-async def main():
-    """
-    Extract skills from agent logs using incremental memory updates with LazyLLM.
-
-    This example demonstrates INCREMENTAL LEARNING:
-    1. Process files ONE BY ONE
-    2. Each file UPDATES existing memory
-    3. Category summaries EVOLVE with each new file
-    4. Final output shows accumulated knowledge
-    """
-    print("Example 5: Incremental Skill Extraction with LazyLLM")
-    print("-" * 60)
-
-    # Get LazyLLM API key from environment
-    lazyllm.config.add("qwen_api_key", str, env="QWEN_API_KEY", description="Qwen API Key")
-    with lazyllm.config.namespace("MEMU"):
-        api_key = lazyllm.config['qwen_api_key']
-    if not api_key:
-        msg = "Please set MEMU_QWEN_API_KEY environment variable"
-        raise ValueError(msg)
-
-    # Custom config for skill extraction
-    skill_prompt = """
-    You are analyzing an agent execution log. Extract the key actions taken, their outcomes, and lessons learned.
-
-    For each significant action or phase:
-    1. **Action/Phase**: What was being attempted?
-    2. **Status**: SUCCESS ✅ or FAILURE ❌
-    3. **What Happened**: What was executed
-    4. **Outcome**: What worked/failed, metrics
-    5. **Root Cause** (for failures): Why did it fail?
-    6. **Lesson**: What did we learn?
-    7. **Action Items**: Concrete steps for next time
-
-    Assign each extracted skill to one or more relevant categories from the following list:
-    {categories_str}
-
-    **IMPORTANT**:
-    - Focus on ACTIONS and outcomes
-    - Be specific: include actual metrics, errors, timing
-    - ONLY extract information explicitly stated
-    - DO NOT infer or assume information
-    - Output MUST be valid XML wrapped in <skills> tags.
-
-    Output format:
-    <skills>
-        <memory>
-            <content>
-                [Action] Description of the action and outcome.
-                [Lesson] Key lesson learned.
-            </content>
-            <categories>
-                <category>Category Name</category>
-            </categories>
-        </memory>
-        ...
-    </skills>
-
-    Extract ALL significant actions from the text:
-
-    Text: {resource}
-    """
-
-    # Define custom categories
-    skill_categories = [
-        {"name": "deployment_execution", "description": "Deployment actions, traffic shifting, environment management"},
-        {
-            "name": "pre_deployment_validation",
-            "description": "Capacity validation, configuration checks, readiness verification",
-        },
-        {
-            "name": "incident_response_rollback",
-            "description": "Incident response, error detection, rollback procedures",
-        },
-        {
-            "name": "performance_monitoring",
-            "description": "Metrics monitoring, performance analysis, bottleneck detection",
-        },
-        {"name": "database_management", "description": "Database capacity planning, optimization, schema changes"},
-        {"name": "testing_verification", "description": "Testing, smoke tests, load tests, verification"},
-        {"name": "infrastructure_setup", "description": "Kubernetes, containers, networking configuration"},
-        {"name": "lessons_learned", "description": "Key reflections, root cause analyses, action items"},
-    ]
-
-    memorize_config = {
-        "memory_types": ["skill"],
-        "memory_type_prompts": {"skill": skill_prompt},
-        "memory_categories": skill_categories,
-    }
-
-    # Initialize service with LazyLLM backend using llm_profiles
-    # The "default" profile is required and used as the primary LLM configuration
-    service = MemoryService(
-        llm_profiles={
-            "default": {
-                "client_backend": "lazyllm_backend",
-                "llm_source": "qwen",
-                "vlm_source": "qwen",
-                "embed_source": "qwen",
-                "stt_source": "qwen",
-                "chat_model": "qwen3-max",
-                "vlm_model":"qwen-vl-plus",
-                "stt_model":"qwen-audio-turbo",
-                "embed_model": "text-embedding-v3",
-                "api_key": api_key,
-            },
-        },
-        memorize_config=memorize_config,
-    )
-
-    # Resources to process
-    resources = [
-        ("examples/resources/logs/log1.txt", "document"),
-        ("examples/resources/logs/log2.txt", "document"),
-        ("examples/resources/logs/log3.txt", "document"),
-    ]
-
-    # Process each resource sequentially
-    print("\nProcessing files with LazyLLM...")
-    all_skills = []
-    categories = []
-
-    for idx, (resource_file, modality) in enumerate(resources, 1):
-        if not os.path.exists(resource_file):
-            print(f"⚠ File not found: {resource_file}")
-            continue
-
-        try:
-            print(f"  Processing: {resource_file}")
-            result = await service.memorize(resource_url=resource_file, modality=modality)
-
-            # Extract skill items
-            for item in result.get("items", []):
-                if item.get("memory_type") == "skill":
-                    all_skills.append({"skill": item.get("summary", ""), "source": os.path.basename(resource_file)})
-
-            # Categories are returned in the result and updated after each memorize call
-            categories = result.get("categories", [])
-
-            # Generate intermediate skill.md
-            await generate_skill_md(
-                all_skills=all_skills,
-                service=service,
-                output_file=f"examples/output/skill_example_lazyllm/log_{idx}.md",
-                attempt_number=idx,
-                total_attempts=len(resources),
-                categories=categories,
-            )
-            print(f"    ✓ Extracted {len([s for s in all_skills if s['source'] == os.path.basename(resource_file)])} skills")
-
-        except Exception as e:
-            print(f"  ✗ Error processing {resource_file}: {e}")
-            import traceback
-            traceback.print_exc()
-
-    # Generate final comprehensive skill.md
-    await generate_skill_md(
-        all_skills=all_skills,
-        service=service,
-        output_file="examples/output/skill_example_lazyllm/skill.md",
-        attempt_number=len(resources),
-        total_attempts=len(resources),
-        categories=categories,
-        is_final=True,
-    )
-
-    print(f"\n" + "=" * 60)
-    print(f"✓ Processed {len([r for r in resources if os.path.exists(r[0])])} files, extracted {len(all_skills)} skills")
-    print(f"✓ Generated {len(categories)} categories")
-    print("✓ Output: examples/output/skill_example_lazyllm/")
-
-
-if __name__ == "__main__":
-    asyncio.run(main())
diff --git a/examples/example_5_with_lazyllm_client.py b/examples/example_5_with_lazyllm_client.py
new file mode 100644
index 00000000..6f61fcfd
--- /dev/null
+++ b/examples/example_5_with_lazyllm_client.py
@@ -0,0 +1,244 @@
+"""
+Unified Example: LazyLLM Integration Demo
+=========================================
+
+This example merges functionalities from:
+1. Example 1: Conversation Memory Processing 
+2. Example 2: Skill Extraction 
+3. Example 3: Multimodal Processing 
+
+It demonstrates how to use the LazyLLM backend for:
+- Processing conversation history
+- Extracting technical skills from logs
+- Handling multimodal content (images + text)
+- defaut source and model are from qwen
+
+Usage:
+    export MEMU_QWEN_API_KEY=your_api_key
+    python examples/example_5_with_lazyllm_client.py
+"""
+
+import asyncio
+import os
+import sys
+from pathlib import Path
+import lazyllm
+
+# Add src to sys.path FIRST before importing memu
+project_root = Path(__file__).parent.parent
+src_path = str(project_root / "src")
+if src_path not in sys.path:
+    sys.path.insert(0, src_path)
+
+from memu.app import MemoryService
+
+# ==========================================
+# PART 1: Conversation Memory Processing
+# ==========================================
+
+async def run_conversation_memory_demo(service):
+    print("\n" + "="*60)
+    print("PART 1: Conversation Memory Processing")
+    print("="*60)
+
+    conversation_files = [
+        "examples/resources/conversations/conv1.json",
+        "examples/resources/conversations/conv2.json",
+        "examples/resources/conversations/conv3.json",
+    ]
+
+    total_items = 0
+    categories = []
+    
+    for conv_file in conversation_files:
+        if not os.path.exists(conv_file):
+            print(f"⚠ File not found: {conv_file}")
+            continue
+
+        try:
+            print(f"  Processing: {conv_file}")
+            result = await service.memorize(resource_url=conv_file, modality="conversation")
+            total_items += len(result.get("items", []))
+            categories = result.get("categories", [])
+            print(f"    ✓ Extracted {len(result.get('items', []))} items")
+        except Exception as e:
+            print(f"  ✗ Error processing {conv_file}: {e}")
+
+    # Output generation
+    output_dir = "examples/output/unified_example/conversation"
+    os.makedirs(output_dir, exist_ok=True)
+    await generate_markdown_output(categories, output_dir)
+    print(f"✓ Conversation processing complete. Output: {output_dir}")
+
+
+# ==========================================
+# PART 2: Skill Extraction
+# ==========================================
+
+async def run_skill_extraction_demo(service):
+    print("\n" + "="*60)
+    print("PART 2: Skill Extraction from Logs")
+    print("="*60)
+
+    # Configure prompt for skill extraction
+    skill_prompt = """
+    You are analyzing an agent execution log. Extract the key actions taken, their outcomes, and lessons learned.
+    
+    Output MUST be valid XML wrapped in <skills> tags.
+    Format:
+    <skills>
+        <memory>
+            <content>
+                [Action] Description...
+                [Lesson] Key lesson...
+            </content>
+            <categories>
+                <category>Category Name</category>
+            </categories>
+        </memory>
+    </skills>
+    
+    Text: {resource}
+    """
+    
+    # Update service config for skill extraction
+    service.memorize_config.memory_types = ["skill"]
+    service.memorize_config.memory_type_prompts = {"skill": skill_prompt}
+    
+    logs = [
+        "examples/resources/logs/log1.txt",
+        "examples/resources/logs/log2.txt", 
+        "examples/resources/logs/log3.txt"
+    ]
+
+    all_skills = []
+    for log_file in logs:
+        if not os.path.exists(log_file):
+            continue
+            
+        print(f"  Processing log: {log_file}")
+        try:
+            result = await service.memorize(resource_url=log_file, modality="document")
+            for item in result.get("items", []):
+                if item.get("memory_type") == "skill":
+                    all_skills.append(item.get("summary", ""))
+            print(f"    ✓ Extracted {len(result.get('items', []))} skills")
+        except Exception as e:
+            print(f"  ✗ Error: {e}")
+
+    # Generate summary guide
+    if all_skills:
+        output_file = "examples/output/unified_example/skills/skill_guide.md"
+        await generate_skill_guide(all_skills, service, output_file)
+        print(f"✓ Skill guide generated: {output_file}")
+
+
+# ==========================================
+# PART 3: Multimodal Memory
+# ==========================================
+
+async def run_multimodal_demo(service):
+    print("\n" + "="*60)
+    print("PART 3: Multimodal Memory Processing")
+    print("="*60)
+
+    # Configure for knowledge extraction
+    xml_prompt = """
+    Analyze content and extract key information.
+    Output MUST be valid XML wrapped in <knowledge> tags.
+    Format:
+    <knowledge>
+        <memory>
+            <content>Extracted content...</content>
+            <categories><category>category_name</category></categories>
+        </memory>
+    </knowledge>
+    
+    Content: {resource}
+    """
+    
+    service.memorize_config.memory_types = ["knowledge"]
+    service.memorize_config.memory_type_prompts = {"knowledge": xml_prompt}
+
+    resources = [
+        ("examples/resources/docs/doc1.txt", "document"),
+        ("examples/resources/images/image1.png", "image"),
+    ]
+
+    categories = []
+    for res_file, modality in resources:
+        if not os.path.exists(res_file):
+            continue
+            
+        print(f"  Processing {modality}: {res_file}")
+        try:
+            result = await service.memorize(resource_url=res_file, modality=modality)
+            categories = result.get("categories", [])
+            print(f"    ✓ Extracted {len(result.get('items', []))} items")
+        except Exception as e:
+            print(f"  ✗ Error: {e}")
+
+    output_dir = "examples/output/unified_example/multimodal"
+    os.makedirs(output_dir, exist_ok=True)
+    await generate_markdown_output(categories, output_dir)
+    print(f"✓ Multimodal processing complete. Output: {output_dir}")
+
+
+# ==========================================
+# Helpers
+# ==========================================
+
+async def generate_markdown_output(categories, output_dir):
+    for cat in categories:
+        name = cat.get("name", "unknown")
+        summary = cat.get("summary", "")
+        if not summary: continue
+        
+        with open(os.path.join(output_dir, f"{name}.md"), "w", encoding="utf-8") as f:
+            f.write(f"# {name.replace('_', ' ').title()}\n\n")
+            cleaned = summary.replace("<content>", "").replace("</content>", "").strip()
+            f.write(cleaned)
+
+async def generate_skill_guide(skills, service, output_file):
+    os.makedirs(os.path.dirname(output_file), exist_ok=True)
+    skills_text = "\n\n".join(skills)
+    prompt = f"Summarize these skills into a guide:\n\n{skills_text}"
+    
+    # Use LazyLLM via service
+    summary = await service.llm_client.summarize(text=prompt)
+    
+    with open(output_file, "w", encoding="utf-8") as f:
+        f.write(summary)
+
+
+# ==========================================
+# Main Entry
+# ==========================================
+
+async def main():
+    print("Unified LazyLLM Example")
+    print("="*60)
+    # 1. Initialize Shared Service
+    service = MemoryService(
+        llm_profiles={
+            "default": {
+                "client_backend": "lazyllm_backend",
+                "llm_source": "qwen",
+                "vlm_source": "qwen",
+                "embed_source": "qwen",
+                "stt_source": "qwen",
+                "chat_model": "qwen3-max",
+                "vlm_model": "qwen-vl-plus",
+                "embed_model": "text-embedding-v3",
+                "stt_model": "qwen-audio-turbo",
+            },
+        }
+    )
+
+    # 2. Run Demos
+    await run_conversation_memory_demo(service)
+    await run_skill_extraction_demo(service)
+    await run_multimodal_demo(service)
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/examples/example_6_multimodal_memory_with_lazyllm.py b/examples/example_6_multimodal_memory_with_lazyllm.py
deleted file mode 100644
index 9e11f51a..00000000
--- a/examples/example_6_multimodal_memory_with_lazyllm.py
+++ /dev/null
@@ -1,186 +0,0 @@
-"""
-Example 6: Multimodal Processing -> Memory Category File (with LazyLLM)
-
-This example demonstrates how to process multiple modalities (images, documents)
-and generate a unified memory category JSON file using LazyLLM backend.
-
-Usage:
-    export MEMU_QWEN_API_KEY=your_api_key   
-    python examples/example_6_multimodal_memory_with_lazyllm.py
-"""
-
-import asyncio
-import os
-import sys
-from pathlib import Path
-import lazyllm
-
-# Add src to sys.path FIRST before importing memu
-project_root = Path(__file__).parent.parent
-src_path = str(project_root / "src")
-if src_path not in sys.path:
-    sys.path.insert(0, src_path)
-from memu.app import MemoryService
-
-# Add src to sys.path
-src_path = os.path.abspath("src")
-sys.path.insert(0, src_path)
-
-
-async def generate_memory_md(categories, output_dir):
-    """Generate concise markdown files for each memory category."""
-
-    os.makedirs(output_dir, exist_ok=True)
-
-    generated_files = []
-
-    for cat in categories:
-        name = cat.get("name", "unknown")
-        description = cat.get("description", "")
-        summary = cat.get("summary", "")
-
-        filename = f"{name}.md"
-        filepath = os.path.join(output_dir, filename)
-
-        with open(filepath, "w", encoding="utf-8") as f:
-            # Title
-            formatted_name = name.replace("_", " ").title()
-            f.write(f"# {formatted_name}\n\n")
-
-            if description:
-                f.write(f"*{description}*\n\n")
-
-            # Content - full version
-            if summary:
-                cleaned_summary = summary.replace("<content>", "").replace("</content>", "").strip()
-                f.write(f"{cleaned_summary}\n")
-            else:
-                f.write("*No content available*\n")
-
-        generated_files.append(filename)
-
-    return generated_files
-
-
-async def main():
-    """
-    Process multiple modalities (images and documents) to generate memory categories using LazyLLM.
-
-    This example:
-    1. Initializes MemoryService with LazyLLM backend
-    2. Processes documents and images
-    3. Extracts unified memory categories across modalities
-    4. Outputs the categories to files
-    """
-    print("Example 6: Multimodal Memory Processing with LazyLLM Backend")
-    print("-" * 60)
-
-    # Get LazyLLM API key from environment
-    lazyllm.config.add("qwen_api_key", str, env="QWEN_API_KEY", description="Qwen API Key")
-    with lazyllm.config.namespace("MEMU"):
-        api_key = lazyllm.config['qwen_api_key']
-    if not api_key:
-        msg = "Please set MEMU_QWEN_API_KEY environment variable"
-        raise ValueError(msg)
-
-    # Define custom categories for multimodal content
-    multimodal_categories = [
-        {"name": "technical_documentation", "description": "Technical documentation, guides, and tutorials"},
-        {
-            "name": "architecture_concepts",
-            "description": "System architecture, design patterns, and structural concepts",
-        },
-        {"name": "best_practices", "description": "Best practices, recommendations, and guidelines"},
-        {"name": "code_examples", "description": "Code snippets, examples, and implementation details"},
-        {"name": "visual_diagrams", "description": "Visual concepts, diagrams, charts, and illustrations from images"},
-    ]
-    xml_prompt = """
-    Analyze the following content and extract key information.
-    
-    Content: {resource}
-    
-    Output MUST be strictly valid XML wrapped in <knowledge> tags (or <profile>, <event> etc).
-    Format:
-    <knowledge>
-        <memory>
-            <content>Your extracted content here...</content>
-            <categories>
-                <category>category_name</category>
-            </categories>
-        </memory>
-    </knowledge>
-    """
-
-    # Initialize service with LazyLLM backend using llm_profiles
-    # The "default" profile is required and used as the primary LLM configuration
-    service = MemoryService(
-        llm_profiles={
-            "default": {
-                "client_backend": "lazyllm_backend",
-                "llm_source": "qwen",
-                "vlm_source": "qwen",
-                "embed_source": "qwen",
-                "stt_source": "qwen",
-                "chat_model": "qwen3-max",
-                "vlm_model":"qwen-vl-plus",
-                "stt_model":"qwen-audio-turbo",
-                "embed_model": "text-embedding-v3",
-                "api_key": api_key,
-            },
-    
-        },
-        memorize_config={
-            "memory_categories": multimodal_categories,
-            "memory_types": ["knowledge"],
-            "memory_type_prompts": {"knowledge": xml_prompt}
-        },
-    )
-
-    # Resources to process (file_path, modality)
-    resources = [
-        ("examples/resources/docs/doc1.txt", "document"),
-        ("examples/resources/docs/doc2.txt", "document"),
-        ("examples/resources/images/image1.png", "image"),
-    ]
-
-    # Process each resource
-    print("\nProcessing resources with LazyLLM...")
-    total_items = 0
-    categories = []
-    for resource_file, modality in resources:
-        if not os.path.exists(resource_file):
-            print(f"⚠ File not found: {resource_file}")
-            continue
-
-        try:
-            print(f"  Processing: {resource_file} ({modality})")
-            result = await service.memorize(resource_url=resource_file, modality=modality)
-            total_items += len(result.get("items", []))
-            # Categories are returned in the result and updated after each memorize call
-            categories = result.get("categories", [])
-            print(f"    ✓ Extracted {len(result.get('items', []))} items")
-        except Exception as e:
-            print(f"  ✗ Error processing {resource_file}: {e}")
-            import traceback
-            traceback.print_exc()
-
-    # Write to output files
-    output_dir = "examples/output/multimodal_example_lazyllm"
-    os.makedirs(output_dir, exist_ok=True)
-
-    # 1. Generate individual Markdown files for each category
-    generated_files = await generate_memory_md(categories, output_dir)
-
-    print(f"\n" + "=" * 60)
-    print(f"✓ Processed {len([r for r in resources if os.path.exists(r[0])])} files, extracted {total_items} items")
-    print(f"✓ Generated {len(categories)} categories:")
-    for cat in categories:
-        print(f"  - {cat.get('name', 'unknown')}")
-    print(f"✓ Output files ({len(generated_files)}):")
-    for file in generated_files:
-        print(f"  - {os.path.join(output_dir, file)}")
-    print(f"✓ Output directory: {output_dir}/")
-
-
-if __name__ == "__main__":
-    asyncio.run(main())
diff --git a/examples/output/conversation_example_lazyllm/activities.md b/examples/output/conversation_example_lazyllm/activities.md
deleted file mode 100644
index e594bde9..00000000
--- a/examples/output/conversation_example_lazyllm/activities.md
+++ /dev/null
@@ -1,7 +0,0 @@
-# activities  
-## Open Source Contributions  
-- The user contributes to open source projects, including a Python CLI tool used in their daily workflow  
-## Fitness  
-- The user exercises regularly, going to the gym 3–4 times a week after work around 7 PM  
-## Reading  
-- The user likes reading
diff --git a/examples/output/conversation_example_lazyllm/experiences.md b/examples/output/conversation_example_lazyllm/experiences.md
deleted file mode 100644
index e9f7c82d..00000000
--- a/examples/output/conversation_example_lazyllm/experiences.md
+++ /dev/null
@@ -1 +0,0 @@
-*No content available*
diff --git a/examples/output/conversation_example_lazyllm/goals.md b/examples/output/conversation_example_lazyllm/goals.md
deleted file mode 100644
index caded271..00000000
--- a/examples/output/conversation_example_lazyllm/goals.md
+++ /dev/null
@@ -1,7 +0,0 @@
-# goals
-## Professional Interests
-- The user is interested in system design and scalability patterns
-## Product Goals
-- The user wants to build a SaaS product for API testing and documentation that generates test cases from API specs
-## Technical Research
-- The user is researching OpenAPI specifications and tools like Swagger and Postman for their SaaS idea
diff --git a/examples/output/conversation_example_lazyllm/habits.md b/examples/output/conversation_example_lazyllm/habits.md
deleted file mode 100644
index 8cc0e3d0..00000000
--- a/examples/output/conversation_example_lazyllm/habits.md
+++ /dev/null
@@ -1,6 +0,0 @@
-# habits
-## Daily Routine
-- The user goes for a run every morning
-- The user usually tries to go to bed around 11 PM
-- The user tends to check their phone before bed
-- The user drinks coffee throughout the day and usually has their last coffee around 3-4 PM
diff --git a/examples/output/conversation_example_lazyllm/knowledge.md b/examples/output/conversation_example_lazyllm/knowledge.md
deleted file mode 100644
index e9f7c82d..00000000
--- a/examples/output/conversation_example_lazyllm/knowledge.md
+++ /dev/null
@@ -1 +0,0 @@
-*No content available*
diff --git a/examples/output/conversation_example_lazyllm/opinions.md b/examples/output/conversation_example_lazyllm/opinions.md
deleted file mode 100644
index e9f7c82d..00000000
--- a/examples/output/conversation_example_lazyllm/opinions.md
+++ /dev/null
@@ -1 +0,0 @@
-*No content available*
diff --git a/examples/output/conversation_example_lazyllm/personal_info.md b/examples/output/conversation_example_lazyllm/personal_info.md
deleted file mode 100644
index d19d6df5..00000000
--- a/examples/output/conversation_example_lazyllm/personal_info.md
+++ /dev/null
@@ -1,3 +0,0 @@
-# personal_info
-## Basic Information
-- The user is a software engineer named Alex who has been programming for about 5 years
diff --git a/examples/output/conversation_example_lazyllm/preferences.md b/examples/output/conversation_example_lazyllm/preferences.md
deleted file mode 100644
index 12d1675c..00000000
--- a/examples/output/conversation_example_lazyllm/preferences.md
+++ /dev/null
@@ -1,7 +0,0 @@
-# preferences  
-## Interests  
-- The user loves food and nature  
-## Dietary Preferences  
-- The user is trying to eat less meat  
-## Lifestyle Traits  
-- The user is not a morning person
diff --git a/examples/output/conversation_example_lazyllm/relationships.md b/examples/output/conversation_example_lazyllm/relationships.md
deleted file mode 100644
index e9f7c82d..00000000
--- a/examples/output/conversation_example_lazyllm/relationships.md
+++ /dev/null
@@ -1 +0,0 @@
-*No content available*
diff --git a/examples/output/conversation_example_lazyllm/work_life.md b/examples/output/conversation_example_lazyllm/work_life.md
deleted file mode 100644
index 9a8eee76..00000000
--- a/examples/output/conversation_example_lazyllm/work_life.md
+++ /dev/null
@@ -1,6 +0,0 @@
-# work_life  
-## Professional Background  
-- The user works in software development at TechCorp on backend systems using Python and Go  
-- The user uses Django and FastAPI for Python services and is migrating critical services to Go  
-- The user works with Kubernetes, Redis, Apache Kafka, Prometheus, and Grafana  
-- The user often answers work emails late at night
diff --git a/examples/output/multimodal_example_lazyllm/architecture_concepts.md b/examples/output/multimodal_example_lazyllm/architecture_concepts.md
deleted file mode 100644
index f7527b4e..00000000
--- a/examples/output/multimodal_example_lazyllm/architecture_concepts.md
+++ /dev/null
@@ -1,5 +0,0 @@
-# Architecture Concepts
-
-*System architecture, design patterns, and structural concepts*
-
-*No content available*
diff --git a/examples/output/multimodal_example_lazyllm/best_practices.md b/examples/output/multimodal_example_lazyllm/best_practices.md
deleted file mode 100644
index 326c82e1..00000000
--- a/examples/output/multimodal_example_lazyllm/best_practices.md
+++ /dev/null
@@ -1,5 +0,0 @@
-# Best Practices
-
-*Best practices, recommendations, and guidelines*
-
-*No content available*
diff --git a/examples/output/multimodal_example_lazyllm/code_examples.md b/examples/output/multimodal_example_lazyllm/code_examples.md
deleted file mode 100644
index 0ecd8842..00000000
--- a/examples/output/multimodal_example_lazyllm/code_examples.md
+++ /dev/null
@@ -1,5 +0,0 @@
-# Code Examples
-
-*Code snippets, examples, and implementation details*
-
-*No content available*
diff --git a/examples/output/multimodal_example_lazyllm/technical_documentation.md b/examples/output/multimodal_example_lazyllm/technical_documentation.md
deleted file mode 100644
index 86694fad..00000000
--- a/examples/output/multimodal_example_lazyllm/technical_documentation.md
+++ /dev/null
@@ -1,5 +0,0 @@
-# Technical Documentation
-
-*Technical documentation, guides, and tutorials*
-
-*No content available*
diff --git a/examples/output/multimodal_example_lazyllm/visual_diagrams.md b/examples/output/multimodal_example_lazyllm/visual_diagrams.md
deleted file mode 100644
index de11b271..00000000
--- a/examples/output/multimodal_example_lazyllm/visual_diagrams.md
+++ /dev/null
@@ -1,5 +0,0 @@
-# Visual Diagrams
-
-*Visual concepts, diagrams, charts, and illustrations from images*
-
-*No content available*
diff --git a/examples/output/skill_example_lazyllm/log_1.md b/examples/output/skill_example_lazyllm/log_1.md
deleted file mode 100644
index cfc82706..00000000
--- a/examples/output/skill_example_lazyllm/log_1.md
+++ /dev/null
@@ -1,152 +0,0 @@
-```yaml
----
-name: production-microservice-deployment
-description: Production-ready guide for deploying microservices using a blue-green strategy, based on real incident learnings to prevent connection exhaustion and ensure safe rollouts.
-version: 0.1.0
-status: Evolving
----
-```
-
-# Production Microservice Deployment with Blue-Green Strategy
-
-## Introduction
-
-This guide provides a practical, experience-driven procedure for safely deploying microservices in production using the blue-green deployment strategy. It is intended for use during versioned service updates where minimizing user impact and enabling rapid rollback are critical. The steps and checks herein are derived from a real-world failure involving database connection exhaustion during traffic shift.
-
-Use this guide when:
-- Deploying stateful services that maintain database connections
-- Employing blue-green deployments in Kubernetes or similar orchestration platforms
-- Ensuring infrastructure capacity aligns with peak deployment load
-
----
-
-## Deployment Context
-
-- **Strategy**: Blue-green deployment  
-- **Environment**: Production, containerized (e.g., Kubernetes), PostgreSQL backend  
-- **Traffic Shift**: Gradual (e.g., 0% → 50% → 100%) via service mesh or ingress controller  
-- **Goal**: Zero-downtime deployment with full rollback capability within 2 minutes if anomalies occur  
-- **Critical Risk**: Resource contention during dual-environment operation (blue + green)
-
----
-
-## Pre-Deployment Checklist
-
-Ensure all items are verified **before** initiating deployment.
-
-### Database
-- [ ] **Validate `max_connections` limit supports combined blue and green load** *(Critical)*  
-  > Ensure total expected connections from both environments ≤ database limit  
-- [ ] **Adjust per-pod connection pool size** *(Critical)*  
-  > Scale down individual pod pools to prevent oversubscription during overlap  
-
-### Monitoring & Alerts
-- [ ] **Enable monitoring of active database connections** *(Critical)*  
-  > Track metric: `pg_stat_database.numbackends` or equivalent  
-- [ ] **Set up alert thresholds for connection pool usage (>80%)** *(Critical)*  
-  > Trigger alerts during deployment phase  
-- [ ] Confirm end-to-end metrics pipeline is active (Prometheus/Grafana or equivalent)
-
-### Testing & Validation
-- [ ] **Perform full-capacity load test under dual-environment conditions** *(Critical)*  
-  > Simulate blue + green traffic concurrently  
-- [ ] Verify no performance degradation or connection errors at peak load  
-- [ ] Confirm rollback mechanism works in staging
-
----
-
-## Deployment Procedure
-
-> ⚠️ Monitor all systems continuously during execution.
-
-1. **Deploy Green Environment**
-   ```bash
-   kubectl apply -f recommendation-service-v2.5.0-green.yaml
-   ```
-   - Wait for all pods to reach `Running` and pass readiness probes
-   - Confirm logs show clean startup with no connection errors
-
-2. **Verify Green Service Health**
-   - Access `/health` endpoint directly (bypassing router)
-   - Confirm database connectivity and query responsiveness
-   - Check metrics dashboard: baseline connection count established
-
-3. **Begin Traffic Shift (0% → 50%)**
-   ```bash
-   kubectl apply -f traffic-shift-50pct.yaml
-   ```
-   - Update canary weight or virtual service routing rules accordingly
-   - Allow 2–3 minutes for traffic stabilization
-
-4. **Monitor During 50% Shift**
-   - Observe:
-     - Error rates (must remain <0.5%)
-     - Latency (P95 < 300ms)
-     - **Active database connections** (must not exceed 80% of max)
-   - If any threshold breached → **Initiate Rollback Immediately**
-
-5. **Proceed to 100% Traffic (if stable)**
-   ```bash
-   kubectl apply -f traffic-shift-100pct.yaml
-   ```
-   - Redirect all traffic to green
-   - Decommission blue environment after confirmation:
-     ```bash
-     kubectl delete -f recommendation-service-v2.4.0-blue.yaml
-     ```
-
----
-
-## Rollback Procedure
-
-### When to Rollback
-Immediate rollback required if:
-- Error rate exceeds **1% for 60 seconds**
-- Latency P95 > **1s for 2+ minutes**
-- Database connections ≥ **90% of max_connections**
-- Emergency signal from SRE team
-
-### Execute Rollback
-1. Revert traffic to blue:
-   ```bash
-   kubectl apply -f traffic-shift-100pct-blue.yaml
-   ```
-2. Confirm traffic rerouted within **60 seconds**
-3. Terminate green pods:
-   ```bash
-   kubectl delete -f recommendation-service-v2.5.0-green.yaml
-   ```
-4. Validate blue service stability via health checks and dashboards
-
-✅ **Expected Recovery Time**: ≤ 1.5 minutes  
-✅ **Impact Window**: ~1.5 minutes at <5% error rate (historically observed)
-
----
-
-## Common Pitfalls & Solutions
-
-| Issue | Symptom | Root Cause | Solution |
-|------|--------|-----------|----------|
-| Database connection exhaustion | Errors during 50% shift, timeouts | `max_connections=100` too low; per-pod pools oversized | Increase DB limit; reduce per-pod pool size |
-| No early warning | Failure detected too late | Missing alerts on connection usage | Implement proactive monitoring at 80% threshold |
-| Undetected bottleneck | Load test passed but failed live | Test did not simulate dual blue-green load | Add full-capacity integration testing pre-deploy |
-
----
-
-## Best Practices
-
-- **Always size infrastructure for peak deployment states**, not just steady-state
-- **Test under realistic overlap conditions** — blue and green running simultaneously
-- **Integrate checklist items into CI/CD gates** — block deployment if validations missing
-- **Expect rollout duration**: ~8–12 minutes (including verification windows)
-- **Rollback drills**: Conduct quarterly in staging
-
----
-
-## Key Takeaways
-
-1. **Connection pools must be sized for combined blue and green load** — never assume steady-state capacity suffices.
-2. **Infrastructure limits (e.g., `max_connections`) must be validated pre-deployment** — silent failures occur when shared resources are exhausted.
-3. **Proactive monitoring of key database metrics is non-negotiable** — lack of alerts delays detection and increases blast radius.
-4. **Full-capacity and dual-environment testing is mandatory** — unit and single-instance tests do not reveal integration bottlenecks.
-5. **Remediation actions must become standard checks** — update checklists and automate where possible to prevent recurrence.
\ No newline at end of file
diff --git a/examples/output/skill_example_lazyllm/log_2.md b/examples/output/skill_example_lazyllm/log_2.md
deleted file mode 100644
index 2f1b19f3..00000000
--- a/examples/output/skill_example_lazyllm/log_2.md
+++ /dev/null
@@ -1,162 +0,0 @@
-```yaml
----
-name: production-microservice-deployment
-description: Production-ready guide for deploying microservices using a blue-green strategy, based on real-world incidents involving connection exhaustion and performance degradation due to missing indexing.
-version: 0.2.0
-status: Evolving
----
-```
-
-# Production Microservice Deployment with Blue-Green Strategy
-
-## Introduction
-
-This guide provides a concise, production-grade procedure for safely deploying microservices using the blue-green deployment strategy. It is designed for use when zero-downtime deployments are required and rollback safety is critical. The steps and checks included are derived from actual deployment failures involving database connection exhaustion and performance degradation under load.
-
-Use this guide during scheduled deployments of stateful or database-dependent services where infrastructure capacity and query performance must be validated at scale.
-
----
-
-## Deployment Context
-
-- **Strategy**: Blue-green deployment with incremental traffic shifting (10% → 25% → 50% → 75% → 100%)
-- **Environment**: Kubernetes-based platform with service mesh routing; PostgreSQL backend
-- **Goals**:
-  - Achieve zero-downtime release
-  - Detect issues before full cutover
-  - Ensure system stability during dual-environment operation
-  - Enable rapid rollback (<2 minutes) if thresholds are breached
-
----
-
-## Pre-Deployment Checklist
-
-### ✅ Database
-- [**CRITICAL**] Validate that all new queries have appropriate indexes (e.g., `user_segment`)
-- [**CRITICAL**] Confirm database `max_connections` supports combined blue + green load
-- [**CRITICAL**] Verify per-pod connection pool size is adjusted to prevent pool overflow
-- Ensure staging environment uses production-scale data (e.g., 50M rows, not 5M)
-
-### ✅ Monitoring & Alerts
-- [**CRITICAL**] Query latency monitoring enabled (P99 tracked in real time)
-- Connection pool usage monitored with alerting (threshold: >80% of max_connections)
-- SLO violation detection active (latency >500ms triggers alert)
-
-### ✅ Testing & Validation
-- [**CRITICAL**] Full-capacity integration test completed with dual environments running
-- Performance testing executed using production-like dataset sizes
-- Indexing review performed for all schema-affecting changes
-- Rollback procedure tested in staging
-
-### ✅ Process
-- Deployment checklist updated and reviewed
-- Incident response roles assigned (on-call engineer, comms lead)
-- Emergency rollback command pre-validated
-
----
-
-## Deployment Procedure
-
-1. **Deploy Green Environment**
-   ```bash
-   kubectl apply -f recommendation-service-green.yaml
-   ```
-   Wait for all pods to reach `Running` and pass readiness checks:
-   ```bash
-   kubectl get pods -l app=recommendation-service,version=v2.5.0
-   ```
-
-2. **Verify Health & Connectivity**
-   - Check logs for connection errors
-   - Confirm database connectivity and migration success
-   - Validate `/health` endpoint returns 200
-
-3. **Begin Incremental Traffic Shift**
-   Apply traffic split via service mesh:
-   ```bash
-   # 10% to green
-   istioctl replace -f traffic-split-10pct.yaml
-   sleep 180
-   ```
-
-4. **Monitor Key Metrics After Each Step**
-   - P99 latency (<500ms)
-   - Error rate (<0.5%)
-   - Active DB connections (<80% of max)
-   - CPU/Memory utilization stable
-
-   Repeat shift:
-   ```bash
-   istioctl replace -f traffic-split-25pct.yaml; sleep 300
-   istioctl replace -f traffic-split-50pct.yaml; sleep 600
-   istioctl replace -f traffic-split-75pct.yaml; sleep 900
-   ```
-
-5. **Final Cutover (100%)**
-   ```bash
-   istioctl replace -f traffic-split-100pct.yaml
-   ```
-
-6. **Decommission Blue**
-   After 30 minutes of stable operation:
-   ```bash
-   kubectl delete deployment recommendation-service-blue --namespace=production
-   ```
-
----
-
-## Rollback Procedure
-
-### When to Rollback
-Roll back immediately if **any** of the following occur:
-- P99 latency exceeds **500ms** for >2 minutes
-- Error rate spikes above **1%**
-- Database connection usage reaches **90%**
-- SLO violation detected
-
-### Execute Rollback
-```bash
-istioctl replace -f traffic-split-0pct.yaml
-```
-> ⚠️ This command routes 100% traffic back to the blue (stable) environment.
-
-### Expected Recovery Time
-- **Target**: <2 minutes
-- Service should stabilize within 90 seconds
-- Confirm health endpoints and metrics return to baseline
-
----
-
-## Common Pitfalls & Solutions
-
-| Issue | Symptom | Root Cause | Solution |
-|------|--------|-----------|----------|
-| Database connection exhaustion | 5xx errors during traffic shift, "too many connections" logs | `max_connections=100` too low; per-pod pools not scaled down | Increase DB limit; reduce per-pod pool size |
-| Latency spike at 75% shift | P99 jumps to 780ms, SLO breach | Missing index on `user_segment` causes full table scan | Add index; validate all queries pre-deploy |
-| No early warning | Alerts silent during degradation | No monitoring on connection count or query latency | Add alerts on key DB and service metrics |
-| Staging passes, prod fails | Deployment works locally but fails in production | Staging uses 5M rows vs. 50M in prod | Mirror production data volume in staging |
-
----
-
-## Best Practices
-
-- Always test blue-green states under full expected load
-- Use incremental shifts with pauses aligned to metric collection intervals
-- Run emergency rollback drills monthly
-- Enforce mandatory index reviews for any code introducing new queries
-- Keep staging data within 10% of production scale
-
-**Expected Timeline**:
-- Deployment window: 45–60 minutes
-- Rollback execution: ≤2 minutes
-- Post-cutover observation: 30 minutes minimum
-
----
-
-## Key Takeaways
-
-1. **Connection pools must account for peak concurrency during dual-environment operation** — always size pools and DB limits for combined blue+green load.
-2. **Missing indexes can cause catastrophic performance degradation at scale** — enforce pre-deployment indexing validation and query reviews.
-3. **Staging environments must mirror production data volume** — 5M-row datasets won’t catch scalability issues present in 50M+ tables.
-4. **Monitoring must include infrastructure-level metrics** — connection usage, query latency, and SLOs are critical for safe rollouts.
-5. **Lessons must become process** — integrate remediation actions (e.g., checklist updates, index creation) directly into deployment pipelines.
\ No newline at end of file
diff --git a/examples/output/skill_example_lazyllm/log_3.md b/examples/output/skill_example_lazyllm/log_3.md
deleted file mode 100644
index cc140b23..00000000
--- a/examples/output/skill_example_lazyllm/log_3.md
+++ /dev/null
@@ -1,157 +0,0 @@
-```yaml
----
-name: production-microservice-deployment
-description: Production-ready guide for deploying microservices using blue-green strategy with validated infrastructure, monitoring, and rollback safeguards.
-version: 0.3.0
-status: Evolving
----
-```
-
-# Production Microservice Deployment with Blue-Green Strategy
-
-This guide provides a battle-tested procedure for safely deploying microservices in production using the blue-green deployment strategy. It is intended for use when zero-downtime rollouts, risk mitigation under real traffic, and rapid recovery are required. The steps are derived from real deployment outcomes across three iterations, including two failures and one successful cutover.
-
-Use this guide when:
-- Deploying versioned services with stateless workloads
-- Database schema and query performance have been pre-validated
-- Monitoring, alerting, and rollback tooling are active
-- Traffic shifting is managed via service mesh or load balancer
-
----
-
-## Deployment Context
-
-**Strategy**: Blue-green deployment with incremental traffic shift (10% → 100%) over 36 minutes  
-**Environment**: Kubernetes-based platform with service mesh routing control  
-**Target Service**: `recommendation-service` v2.5.0  
-**Goals**:
-- Achieve zero-downtime cutover
-- Validate performance under real production load
-- Stay within SLOs (P99 latency < 500ms, error rate < 0.5%)
-- Maintain database stability under dual-environment load
-
----
-
-## Pre-Deployment Checklist
-
-### ✅ Database
-- [CRITICAL] Confirm database `max_connections` supports combined blue/green load (minimum 250 for this service)  
-- [CRITICAL] Verify all new queries have appropriate indexes; validate existence of `idx_user_segment`  
-- [ ] Ensure per-pod connection pool size is adjusted to prevent exhaustion (e.g., HikariCP `maximumPoolSize`)  
-- [ ] Confirm staging dataset mirrors production scale (≥50M rows for key tables)
-
-### ✅ Monitoring & Alerting
-- [CRITICAL] Active alerts on database connection usage (>80% threshold)  
-- [CRITICAL] Query latency monitoring enabled for high-impact endpoints  
-- [ ] P99 latency, error rate, and request volume dashboards accessible in real time  
-- [ ] Rollback trigger thresholds defined (see Rollback Procedure)
-
-### ✅ Infrastructure
-- [ ] New environment (green) deployed and health-checked  
-- [ ] Blue environment remains fully operational and stable  
-- [ ] Routing controller ready for incremental traffic shifts  
-
-### ✅ Testing & Validation
-- [CRITICAL] Full-capacity integration test completed under dual-blue-green load  
-- [ ] Performance testing executed with production-scale data volume  
-- [ ] Indexing and query plan review performed for all new database access patterns  
-
----
-
-## Deployment Procedure
-
-1. **Deploy Green Environment**
-   ```bash
-   kubectl apply -f recommendation-service-v2.5.0.yaml
-   ```
-   Wait for all pods to reach `Running` and pass readiness checks.
-
-2. **Validate Health**
-   - Confirm logs show clean startup
-   - Verify `/health` endpoint returns 200
-   - Check metrics: no errors, CPU/MEM within expected range
-
-3. **Begin Incremental Traffic Shift**
-   Apply traffic weights via service mesh (example using Istio):
-   ```bash
-   # 10% to green
-   istioctl traffic-shift set --to green --weight 10
-   sleep 300  # Monitor for 5 minutes
-   ```
-
-4. **Monitor at Each Stage**
-   After each shift, wait 5–10 minutes and verify:
-   - P99 latency < 500ms
-   - Error rate < 0.5%
-   - Database active connections < 80% of max
-   - No alerts firing
-
-   Continue shifting:
-   ```bash
-   istioctl traffic-shift set --to green --weight 25
-   sleep 300
-
-   istioctl traffic-shift set --to green --weight 50
-   sleep 600
-
-   istioctl traffic-shift set --to green --weight 75
-   sleep 600
-
-   istioctl traffic-shift set --to green --weight 100
-   ```
-
-5. **Cutover Complete**
-   - Confirm full traffic on green (1500 req/s observed in success case)
-   - Average latency: 136ms, P99: 216ms, error rate: 0.2%
-
----
-
-## Rollback Procedure
-
-**Trigger Rollback If**:
-- P99 latency > 500ms for >2 minutes
-- Error rate > 0.5% sustained
-- Database connection usage hits 90%
-- Any critical alert fires during shift
-
-**Execute Immediate Rollback**:
-```bash
-istioctl traffic-shift set --to blue --weight 100
-```
-
-**Expected Recovery Time**: ≤1.5 minutes  
-**Post-Rollback Actions**:
-- Preserve logs and metrics for root cause analysis
-- Disable green environment if not needed for debugging
-- Update incident log with timestamp, metrics, and rollback reason
-
----
-
-## Common Pitfalls & Solutions
-
-| Issue | Symptom | Root Cause | Solution |
-|------|--------|-----------|----------|
-| Database connection exhaustion | 5xx errors during 50% shift, "too many connections" logs | `max_connections=100` insufficient for dual environments | Increase limit to 250; reduce per-pod pool size |
-| Latency spike at 75% traffic | P99 jumps to 780ms, SLO violation | Missing `idx_user_segment`, full table scan on 50M-row table | Create index; test with production-scale data |
-| No alert on connection usage | Failure undetected until user impact | Missing monitoring on DB connection pool | Add Prometheus/Grafana alert at 80% threshold |
-| Staging test passed but prod failed | No issues in staging, failure in production | Staging dataset too small (5M vs 50M rows) | Mirror production data scale in staging |
-
----
-
-## Best Practices
-
-- **Traffic Shifting**: Use conservative increments (10% → 100%) over ≥30 minutes to allow observation
-- **Monitoring**: Focus on P99 latency, error rate, and database connection count—these were leading indicators
-- **Testing**: Always run performance tests with production-scale datasets and query patterns
-- **Validation**: Enforce mandatory pre-deployment checklist including indexing and capacity review
-- **Timeline**: Allow 36+ minutes for full rollout with monitoring pauses; rollback completes in <2 minutes
-
----
-
-## Key Takeaways
-
-1. **Database capacity must account for peak deployment states**—blue-green requires double the normal load capacity.
-2. **Production-scale testing is non-negotiable**—small datasets hide scalability bugs like missing indexes.
-3. **Connection pools and infrastructure limits must be proactively monitored and alerted**—silent exhaustion causes outages.
-4. **Incremental traffic shifting with staged validation enables safe rollout**—real-load testing catches what synthetic tests miss.
-5. **Lessons must become checklists**—operationalize fixes (index reviews, pool sizing) to prevent recurrence.
\ No newline at end of file
diff --git a/examples/output/skill_example_lazyllm/skill.md b/examples/output/skill_example_lazyllm/skill.md
deleted file mode 100644
index cb48451c..00000000
--- a/examples/output/skill_example_lazyllm/skill.md
+++ /dev/null
@@ -1,174 +0,0 @@
-```yaml
----
-name: production-microservice-deployment
-description: Production-ready guide for deploying microservices using blue-green strategy, based on real-world failure and success patterns.
-version: 1.0.0
-status: Production-Ready
----
-```
-
-# Production Microservice Deployment with Blue-Green Strategy
-
-## Introduction
-
-This guide provides a battle-tested, step-by-step procedure for safely deploying a microservice to production using the blue-green deployment strategy. It is designed for use when minimizing downtime and risk during version upgrades is critical. The procedures, checks, and thresholds are derived from actual deployment attempts—two failures and one successful rollout—of the `recommendation-service` v2.5.0.
-
-Use this guide for any stateful or database-dependent microservice where traffic shifting must account for infrastructure capacity, performance under load, and safe rollback readiness.
-
----
-
-## Deployment Context
-
-- **Strategy**: Blue-green deployment with incremental traffic shifting (10% → 100%)
-- **Environment**: Kubernetes-based production cluster with external PostgreSQL database
-- **Traffic Management**: Service mesh (e.g., Istio) or ingress controller managing traffic split
-- **Goals**:
-  - Zero-downtime cutover
-  - Validation of performance under real user load
-  - Immediate rollback capability if SLOs are violated
-  - Full operational hygiene post-cutover
-
----
-
-## Pre-Deployment Checklist
-
-> ✅ All items must be verified before initiating deployment.
-
-### Database
-- [CRITICAL] Confirm database `max_connections` supports combined blue + green load  
-  → *Increase from 100 to 250 if necessary*
-- [CRITICAL] Validate all new query patterns have required indexes  
-  → *Ensure `idx_user_segment` exists on `user_segment` column*
-- [CRITICAL] Verify staging dataset size mirrors production (e.g., 50M rows) to detect scalability issues
-- Adjust per-pod connection pool size to prevent exhaustion under dual-environment traffic
-
-### Monitoring & Alerts
-- [CRITICAL] Ensure monitoring is enabled for:
-  - Database active connections
-  - Query latency (P99) for key endpoints
-  - HTTP error rates and request volume
-- Confirm alerts are configured to trigger on:
-  - P99 latency > 500ms (SLO threshold)
-  - Connection pool saturation (>80% of max)
-  - Error rate > 1%
-
-### Testing & Validation
-- [CRITICAL] Complete full-capacity integration test simulating blue-green state
-- Run production-scale load test with realistic query patterns
-- Review all database schema changes and indexing decisions in PR
-
-### Operational Readiness
-- Confirm rollback path is tested and executable within 2 minutes
-- Verify deployment checklist is integrated into CI/CD pipeline gates
-- Ensure logging and tracing are aligned across both environments
-
----
-
-## Deployment Procedure
-
-1. **Deploy Green Environment**
-   ```bash
-   kubectl apply -f recommendation-service-v2.5.0.yaml
-   ```
-   Wait for all pods to reach `Running` and pass readiness probes.
-
-2. **Initialize Traffic at 10%**
-   ```bash
-   istioctl traffic-split set --namespace prod --green-weight 10 --blue-weight 90
-   ```
-   Monitor for 5 minutes:
-   - Confirm no spike in errors or latency
-   - Check database connections: must remain <40% of max
-
-3. **Shift to 25% Traffic**
-   ```bash
-   istioctl traffic-split set --namespace prod --green-weight 25
-   ```
-   Monitor for 7 minutes:
-   - P99 latency ≤ 500ms
-   - Error rate ≤ 0.5%
-   - No alert triggers
-
-4. **Proceed to 50%, Then 75%**
-   ```bash
-   istioctl traffic-split set --namespace prod --green-weight 50
-   # After 10 min stable →
-   istioctl traffic-split set --namespace prod --green-weight 75
-   ```
-   At each stage:
-   - Watch for query degradation
-   - Confirm connection usage remains under 70%
-
-5. **Cutover to 100%**
-   ```bash
-   istioctl traffic-split set --namespace prod --green-weight 100
-   ```
-   Final validation:
-   - Sustained load: ≥1500 req/s
-   - Average latency ≤ 140ms, P99 ≤ 250ms
-   - Error rate ≤ 0.3%
-   - DB connection usage ≤ 50%
-
-6. **Post-Cutover Actions**
-   - Decommission blue environment
-     ```bash
-     kubectl delete -f recommendation-service-v2.4.0.yaml
-     ```
-   - Activate continuous monitoring dashboard
-   - Record deployment outcome and lessons in runbook
-
----
-
-## Rollback Procedure
-
-### Trigger Conditions (Rollback Immediately If):
-- P99 latency > 500ms for >2 minutes
-- Error rate > 1% sustained over 3 minutes
-- Database connection errors observed
-- Any alert on connection pool saturation
-
-### Execute Rollback
-```bash
-istioctl traffic-split set --namespace prod --blue-weight 100 --green-weight 0
-```
-→ Revert traffic fully to stable blue version.
-
-### Expected Outcome
-- Service stability restored within **≤1.5 minutes**
-- Error rate returns to baseline
-- Latency normalizes to pre-deployment levels
-
-Post-rollback:
-- Preserve logs and metrics for root cause analysis
-- Halt further deployments until remediation complete
-
----
-
-## Common Pitfalls & Solutions
-
-| Issue | Symptom | Root Cause | Solution |
-|------|--------|-----------|----------|
-| Database connection exhaustion | Errors during 50% shift, "too many connections" logs | `max_connections=100` insufficient for dual environments | Increase limit to 250; reduce per-pod pool size |
-| Latency spike at 75% traffic | P99 jumps to 780ms, SLO breach | Missing index on `user_segment`, full table scan on 50M rows | Create `idx_user_segment`; validate all queries |
-| No early warning | No alerts before rollback | Missing monitoring on connection count and query latency | Add alerts for DB connections (>80%) and P99 (>400ms) |
-| Staging environment false confidence | Performance fine in staging | Data volume too small (5M vs 50M) | Mirror production data scale in staging |
-
----
-
-## Best Practices
-
-- **Traffic Shifting**: Use conservative increments (10% → 25% → 50% → 75% → 100%) with monitoring pauses
-- **Validation Window**: Minimum 5–10 minutes per stage depending on traffic ramp
-- **Monitoring Focus**: Prioritize database-level metrics and end-to-end latency
-- **Timeline**: Allow 30–40 minutes for full cutover including observation periods
-- **Checklist Enforcement**: Integrate pre-deployment validations into CI/CD approval gates
-
----
-
-## Key Takeaways
-
-1. **Database capacity must account for peak deployment states** — blue-green requires double the normal load capacity; validate `max_connections` and pool sizing upfront.
-2. **Performance testing must use production-scale datasets** — staging with 10% data volume will not catch full-table-scan bottlenecks.
-3. **Indexing is a deployment gate** — every new query pattern must be reviewed and indexed before release.
-4. **Monitoring must cover infrastructure dependencies** — track database connections, query latency, and pool utilization as first-class signals.
-5. **Safe deployment is procedural** — gradual traffic shifts, staged validation, and rollback readiness enable recovery from unforeseen issues without user impact.
\ No newline at end of file
diff --git a/src/memu/app/service.py b/src/memu/app/service.py
index 9bdc7eaf..3423f849 100644
--- a/src/memu/app/service.py
+++ b/src/memu/app/service.py
@@ -121,15 +121,14 @@ def _init_llm_client(self, config: LLMConfig | None = None) -> Any:
             from memu.llm.lazyllm_client import LazyLLMClient
 
             return LazyLLMClient(
-                llm_source=cfg.llm_source,
-                vlm_source=cfg.vlm_source,
-                embed_source=cfg.embed_source,
-                stt_source=cfg.stt_source,
+                llm_source=cfg.llm_source or cfg.source,
+                vlm_source=cfg.vlm_source or cfg.source,
+                embed_source=cfg.embed_source or cfg.source,
+                stt_source=cfg.stt_source or cfg.source,
                 chat_model=cfg.chat_model,
                 embed_model=cfg.embed_model,
                 vlm_model=cfg.vlm_model,
                 stt_model=cfg.stt_model,
-                api_key=cfg.api_key,
             )
         else:
             msg = f"Unknown llm_client_backend '{cfg.client_backend}'"
diff --git a/src/memu/app/settings.py b/src/memu/app/settings.py
index 30179556..e4a15fdd 100644
--- a/src/memu/app/settings.py
+++ b/src/memu/app/settings.py
@@ -102,6 +102,7 @@ class LLMConfig(BaseModel):
         description="Which LLM client backend to use: 'httpx' (httpx) , 'sdk' (official OpenAI) or 'lazyllm_backend'(for more LLM source like Qwen, Doubao, SIliconflow, etc.)",
     )
     # setup LazyLLM 
+    source: str | None = Field(default=None, description="default source for lazyllm backend")
     llm_source: str | None = Field(default=None, description="LLM source for lazyllm backend")
     vlm_source: str | None = Field(default=None, description="VLM source for lazyllm backend")
     embed_source: str | None = Field(default=None, description="Embedding source for lazyllm backend")
diff --git a/src/memu/llm/lazyllm_client.py b/src/memu/llm/lazyllm_client.py
index 50b6b74b..9b82e667 100644
--- a/src/memu/llm/lazyllm_client.py
+++ b/src/memu/llm/lazyllm_client.py
@@ -24,17 +24,15 @@ def __init__(self,
                 vlm_model: str = None,
                 embed_model: str = None,
                 stt_model: str = None,
-                api_key: str = None,
             ):
-        self.llm_source = llm_source or self.source
-        self.vlm_source = vlm_source or self.source
-        self.embed_source = embed_source or self.source
-        self.stt_source = stt_source or self.source
+        self.llm_source = llm_source or self.DEFAULT_SOURCE
+        self.vlm_source = vlm_source or self.DEFAULT_SOURCE
+        self.embed_source = embed_source or self.DEFAULT_SOURCE
+        self.stt_source = stt_source or self.DEFAULT_SOURCE
         self.chat_model = chat_model or self.DEFAULT_MODELS['llm']
         self.vlm_model = vlm_model or self.DEFAULT_MODELS['vlm']
         self.embed_model = embed_model or self.DEFAULT_MODELS['embed']
         self.stt_model = stt_model or self.DEFAULT_MODELS['stt']
-        self.api_key = api_key
 
     async def _call_async(self, client: Any, *args: Any, **kwargs: Any) -> Any:
         '''异步调用 lazyllm client'''
@@ -51,7 +49,7 @@ async def summarize(
                         max_tokens: int | None = None,
                         system_prompt: str | None = None,
                     ) -> str:
-        client = lazyllm.OnlineModule(source=self.llm_source, model=self.chat_model, type='llm', api_key=self.api_key)
+        client = lazyllm.namespace('MEMU').OnlineModule(source=self.llm_source, model=self.chat_model, type='llm')
         prompt = system_prompt or 'Summarize the text in one short paragraph.'
         full_prompt = f'{prompt}\n\ntext:\n{text}'
         LOG.debug(f'Summarizing text with {self.llm_source}/{self.chat_model}')
@@ -66,7 +64,7 @@ async def vision(
                     max_tokens: int | None = None,
                     system_prompt: str | None = None,
                 ) -> tuple[str, Any]:
-        client = lazyllm.OnlineModule(source=self.vlm_source, model=self.vlm_model, type='vlm', api_key=self.api_key)
+        client = lazyllm.namespace('MEMU').OnlineModule(source=self.vlm_source, model=self.vlm_model, type='vlm')
         LOG.debug(f'Processing image with {self.vlm_source}/{self.vlm_model}: {image_path}')
         # LazyLLM VLM accepts prompt as first positional argument and image_path as keyword argument
         response = await self._call_async(client, prompt, image_path=image_path)
@@ -77,8 +75,8 @@ async def embed(
                     texts: list[str],
                     batch_size: int = 10, # optional
                 ) -> list[list[float]]:
-        client = lazyllm.OnlineModule(source=self.embed_source, model=self.embed_model, type='embed', 
-                                        batch_size=batch_size, api_key=self.api_key)
+        client = lazyllm.namespace('MEMU').OnlineModule(source=self.embed_source, model=self.embed_model, 
+                                                        type='embed', batch_size=batch_size)
         LOG.debug(f'embed {len(texts)} texts with {self.embed_source}/{self.embed_model}')
         response = await self._call_async(client, texts)
         return response
@@ -89,7 +87,7 @@ async def transcribe(
                         language: str | None = None,
                         prompt: str | None = None,
                     ) -> str:
-        client = lazyllm.OnlineModule(source=self.stt_source, model=self.stt_model, type='stt', api_key=self.api_key)
+        client = lazyllm.namespace('MEMU').OnlineModule(source=self.stt_source, model=self.stt_model, type='stt')
         LOG.debug(f'Transcribing audio with {self.stt_source}/{self.stt_model}: {audio_path}')
         response = await self._call_async(client, audio_path)
         return response
diff --git a/tests/test_lazyllm.py b/tests/test_lazyllm.py
index 2ac2b22b..6f282cb4 100644
--- a/tests/test_lazyllm.py
+++ b/tests/test_lazyllm.py
@@ -26,14 +26,7 @@ async def test_lazyllm_client():
     print("=" * 60)
     
     # Get API key from environment
-    lazyllm.config.add("qwen_api_key", str, env="QWEN_API_KEY", description="Qwen API Key")
-    with lazyllm.config.namespace("MEMU"):
-        api_key = lazyllm.config['qwen_api_key']
-    if not api_key:
-        msg = "Please set MEMU_QWEN_API_KEY environment variable"
-        raise ValueError(msg)
-    
-    print(f"✓ API key found: {api_key[:20]}...")
+    lazyllm.config.add("qwen_api_key", str, env="MEMU_QWEN_API_KEY", description="Qwen API Key")
     try:
         client = LazyLLMClient(
             source="qwen",
@@ -42,7 +35,6 @@ async def test_lazyllm_client():
             embed_model="text-embedding-v3",
             stt_model="qwen-audio-turbo",
             base_url="",
-            api_key=api_key
         )
         print("✓ LazyLLMClient initialized successfully")
     except Exception as e:

From d9f00df39f9e791149241efb2e40596248f7ba2d Mon Sep 17 00:00:00 2001
From: unknown <sunhao3@sensetime.com>
Date: Fri, 23 Jan 2026 15:39:49 +0800
Subject: [PATCH 06/14] fix test file bug

---
 examples/example_5_with_lazyllm_client.py | 11 +++---
 src/memu/app/settings.py                  | 16 ++++-----
 src/memu/llm/lazyllm_client.py            | 21 +++++-------
 tests/test_lazyllm.py                     | 42 ++++++-----------------
 4 files changed, 31 insertions(+), 59 deletions(-)

diff --git a/examples/example_5_with_lazyllm_client.py b/examples/example_5_with_lazyllm_client.py
index 6f61fcfd..1eeb9be2 100644
--- a/examples/example_5_with_lazyllm_client.py
+++ b/examples/example_5_with_lazyllm_client.py
@@ -22,7 +22,6 @@
 import os
 import sys
 from pathlib import Path
-import lazyllm
 
 # Add src to sys.path FIRST before importing memu
 project_root = Path(__file__).parent.parent
@@ -65,7 +64,7 @@ async def run_conversation_memory_demo(service):
             print(f"  ✗ Error processing {conv_file}: {e}")
 
     # Output generation
-    output_dir = "examples/output/unified_example/conversation"
+    output_dir = "examples/output/lazyllm_example/conversation"
     os.makedirs(output_dir, exist_ok=True)
     await generate_markdown_output(categories, output_dir)
     print(f"✓ Conversation processing complete. Output: {output_dir}")
@@ -128,7 +127,7 @@ async def run_skill_extraction_demo(service):
 
     # Generate summary guide
     if all_skills:
-        output_file = "examples/output/unified_example/skills/skill_guide.md"
+        output_file = "examples/output/lazyllm_example/skills/skill_guide.md"
         await generate_skill_guide(all_skills, service, output_file)
         print(f"✓ Skill guide generated: {output_file}")
 
@@ -178,7 +177,7 @@ async def run_multimodal_demo(service):
         except Exception as e:
             print(f"  ✗ Error: {e}")
 
-    output_dir = "examples/output/unified_example/multimodal"
+    output_dir = "examples/output/lazyllm_example/multimodal"
     os.makedirs(output_dir, exist_ok=True)
     await generate_markdown_output(categories, output_dir)
     print(f"✓ Multimodal processing complete. Output: {output_dir}")
@@ -237,8 +236,8 @@ async def main():
 
     # 2. Run Demos
     await run_conversation_memory_demo(service)
-    await run_skill_extraction_demo(service)
-    await run_multimodal_demo(service)
+    # await run_skill_extraction_demo(service)
+    # await run_multimodal_demo(service)
 
 if __name__ == "__main__":
     asyncio.run(main())
diff --git a/src/memu/app/settings.py b/src/memu/app/settings.py
index e4a15fdd..f0f74e85 100644
--- a/src/memu/app/settings.py
+++ b/src/memu/app/settings.py
@@ -102,19 +102,19 @@ class LLMConfig(BaseModel):
         description="Which LLM client backend to use: 'httpx' (httpx) , 'sdk' (official OpenAI) or 'lazyllm_backend'(for more LLM source like Qwen, Doubao, SIliconflow, etc.)",
     )
     # setup LazyLLM 
-    source: str | None = Field(default=None, description="default source for lazyllm backend")
-    llm_source: str | None = Field(default=None, description="LLM source for lazyllm backend")
-    vlm_source: str | None = Field(default=None, description="VLM source for lazyllm backend")
-    embed_source: str | None = Field(default=None, description="Embedding source for lazyllm backend")
-    stt_source: str | None = Field(default=None, description="STT source for lazyllm backend")
-    vlm_model: str = Field(default="qwen-vl-plus", description="Vision language model for lazyllm")
-    stt_model: str = Field(default="qwen-audio-turbo", description="Speech-to-text model for lazyllm")
+    source: str | None = Field(default=None, description="default source for lazyllm client backend")
+    llm_source: str | None = Field(default=None, description="LLM source for lazyllm client backend")
+    vlm_source: str | None = Field(default=None, description="VLM source for lazyllm client backend")
+    embed_source: str | None = Field(default=None, description="Embedding source for lazyllm client backend")
+    stt_source: str | None = Field(default=None, description="STT source for lazyllm client backend")
+    vlm_model: str = Field(default="qwen-vl-plus", description="Vision language model for lazyllm client backend")
+    stt_model: str = Field(default="qwen-audio-turbo", description="Speech-to-text model for lazyllm client backend")
     endpoint_overrides: dict[str, str] = Field(
         default_factory=dict,
         description="Optional overrides for HTTP endpoints (keys: 'chat'/'summary').",
     )
     embed_model: str = Field(
-        default="text-embedding-3-small",
+        default="text-embedding-v3",
         description="Default embedding model used for vectorization.",
     )
     embed_batch_size: int = Field(
diff --git a/src/memu/llm/lazyllm_client.py b/src/memu/llm/lazyllm_client.py
index 9b82e667..f352ce91 100644
--- a/src/memu/llm/lazyllm_client.py
+++ b/src/memu/llm/lazyllm_client.py
@@ -3,16 +3,11 @@
 from pathlib import Path
 import asyncio
 import lazyllm
+import functools
 from lazyllm import LOG
 
 class LazyLLMClient:
     DEFAULT_SOURCE = 'qwen'
-    DEFAULT_MODELS = {
-            'llm': 'qwen3-max',
-            'vlm': 'qwen-vl-plus',
-            'embed': 'text-embedding-v3',
-            'stt': 'qwen-audio-turbo',
-    }
 
     def __init__(self,
                   *,
@@ -29,17 +24,17 @@ def __init__(self,
         self.vlm_source = vlm_source or self.DEFAULT_SOURCE
         self.embed_source = embed_source or self.DEFAULT_SOURCE
         self.stt_source = stt_source or self.DEFAULT_SOURCE
-        self.chat_model = chat_model or self.DEFAULT_MODELS['llm']
-        self.vlm_model = vlm_model or self.DEFAULT_MODELS['vlm']
-        self.embed_model = embed_model or self.DEFAULT_MODELS['embed']
-        self.stt_model = stt_model or self.DEFAULT_MODELS['stt']
+        self.chat_model = chat_model
+        self.vlm_model = vlm_model
+        self.embed_model = embed_model
+        self.stt_model = stt_model
 
     async def _call_async(self, client: Any, *args: Any, **kwargs: Any) -> Any:
         '''异步调用 lazyllm client'''
         if kwargs:
-            return await asyncio.to_thread(lambda: client(*args, **kwargs))
+            return await asyncio.to_thread(functools.partial(client, *args, **kwargs))
         else:
-            return await asyncio.to_thread(lambda: client(*args))
+            return await asyncio.to_thread(client, *args)
 
 
     async def summarize(
@@ -67,7 +62,7 @@ async def vision(
         client = lazyllm.namespace('MEMU').OnlineModule(source=self.vlm_source, model=self.vlm_model, type='vlm')
         LOG.debug(f'Processing image with {self.vlm_source}/{self.vlm_model}: {image_path}')
         # LazyLLM VLM accepts prompt as first positional argument and image_path as keyword argument
-        response = await self._call_async(client, prompt, image_path=image_path)
+        response = await self._call_async(client, prompt, lazyllm_files=image_path)
         return response, None
 
     async def embed(
diff --git a/tests/test_lazyllm.py b/tests/test_lazyllm.py
index 6f282cb4..63a63fcf 100644
--- a/tests/test_lazyllm.py
+++ b/tests/test_lazyllm.py
@@ -26,15 +26,16 @@ async def test_lazyllm_client():
     print("=" * 60)
     
     # Get API key from environment
-    lazyllm.config.add("qwen_api_key", str, env="MEMU_QWEN_API_KEY", description="Qwen API Key")
     try:
         client = LazyLLMClient(
-            source="qwen",
-            chat_model="qwen3-max",
-            vlm_model="qwen-vl-plus",
-            embed_model="text-embedding-v3",
-            stt_model="qwen-audio-turbo",
-            base_url="",
+            llm_source="doubao",
+            vlm_source="qwen",
+            embed_source="qwen",
+            stt_source="qwen",
+            chat_model = "deepseek-v3-1-terminus",
+            vlm_model = "qwen-vl-plus",
+            embed_model = "text-embedding-v3",
+            stt_model = "qwen-audio-turbo"
         )
         print("✓ LazyLLMClient initialized successfully")
     except Exception as e:
@@ -69,10 +70,10 @@ async def test_lazyllm_client():
     
     # Test 3: Vision (requires image file)
     print("\n[Test 3] Testing vision...")
-    test_image_path = "examples/resources/images/sample.jpg"
+    test_image_path = "examples/resources/images/image1.png"
     if os.path.exists(test_image_path):
         try:
-            result, response = await client.vision(
+            result, _ = await client.vision(
                 prompt="描述这张图片的内容",
                 image_path=test_image_path
             )
@@ -84,29 +85,6 @@ async def test_lazyllm_client():
             traceback.print_exc()
     else:
         print(f"⚠ Skipped: Test image not found at {test_image_path}")
-    
-    # Test 4: Transcription (requires audio file)
-    print("\n[Test 4] Testing transcription...")
-    test_audio_path = "examples/resources/audio/sample.wav"
-    if os.path.exists(test_audio_path):
-        try:
-            result, response = await client.transcribe(
-                audio_path=test_audio_path,
-                language="zh"
-            )
-            print(f"✓ Transcription successful")
-            print(f"  Result: {result[:100]}...")
-        except Exception as e:
-            print(f"❌ Transcription failed: {e}")
-            import traceback
-            traceback.print_exc()
-    else:
-        print(f"⚠ Skipped: Test audio not found at {test_audio_path}")
-    
-    print("\n" + "=" * 60)
-    print("✓ LazyLLM backend tests completed!")
-    return True
-
 
 if __name__ == "__main__":
     success = asyncio.run(test_lazyllm_client())

From e0466be106f7ca25bcd66b4b6f816ec52741df93 Mon Sep 17 00:00:00 2001
From: unknown <sunhao3@sensetime.com>
Date: Fri, 23 Jan 2026 16:42:58 +0800
Subject: [PATCH 07/14] add lazyllm dependency

---
 pyproject.toml        | 1 +
 tests/test_lazyllm.py | 6 +++---
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index debd4663..d8fa9924 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -27,6 +27,7 @@ dependencies = [
     "alembic>=1.14.0",
     "pendulum>=3.1.0",
     "langchain-core>=1.2.7",
+    "lazyllm>=0.7.3",
 ]
 
 [build-system]
diff --git a/tests/test_lazyllm.py b/tests/test_lazyllm.py
index 63a63fcf..7b71cc67 100644
--- a/tests/test_lazyllm.py
+++ b/tests/test_lazyllm.py
@@ -28,11 +28,11 @@ async def test_lazyllm_client():
     # Get API key from environment
     try:
         client = LazyLLMClient(
-            llm_source="doubao",
+            llm_source="qwen",
             vlm_source="qwen",
             embed_source="qwen",
             stt_source="qwen",
-            chat_model = "deepseek-v3-1-terminus",
+            chat_model = "qwen-plus",
             vlm_model = "qwen-vl-plus",
             embed_model = "text-embedding-v3",
             stt_model = "qwen-audio-turbo"
@@ -73,7 +73,7 @@ async def test_lazyllm_client():
     test_image_path = "examples/resources/images/image1.png"
     if os.path.exists(test_image_path):
         try:
-            result, _ = await client.vision(
+            result, response = await client.vision(
                 prompt="描述这张图片的内容",
                 image_path=test_image_path
             )

From c8895d8f8e65d4a50dfb340684438eee3cf81e7d Mon Sep 17 00:00:00 2001
From: unknown <sunhao3@sensetime.com>
Date: Fri, 23 Jan 2026 17:32:49 +0800
Subject: [PATCH 08/14] add descriptions for lazyllmclient

---
 src/memu/llm/lazyllm_client.py | 49 +++++++++++++++++++++++++++++++---
 1 file changed, 46 insertions(+), 3 deletions(-)

diff --git a/src/memu/llm/lazyllm_client.py b/src/memu/llm/lazyllm_client.py
index f352ce91..bfbe12c5 100644
--- a/src/memu/llm/lazyllm_client.py
+++ b/src/memu/llm/lazyllm_client.py
@@ -7,6 +7,7 @@
 from lazyllm import LOG
 
 class LazyLLMClient:
+    """LAZYLLM client that relies on the LazyLLM framework."""
     DEFAULT_SOURCE = 'qwen'
 
     def __init__(self,
@@ -30,7 +31,9 @@ def __init__(self,
         self.stt_model = stt_model
 
     async def _call_async(self, client: Any, *args: Any, **kwargs: Any) -> Any:
-        '''异步调用 lazyllm client'''
+        """
+        Asynchronously call a LazyLLM client with given arguments and keyword arguments.
+        """
         if kwargs:
             return await asyncio.to_thread(functools.partial(client, *args, **kwargs))
         else:
@@ -44,6 +47,16 @@ async def summarize(
                         max_tokens: int | None = None,
                         system_prompt: str | None = None,
                     ) -> str:
+        """
+        Generate a summary or response for the input text using the configured LLM backend.
+
+        Args:
+            text: The input text to summarize or process.
+            max_tokens: (Optional) Maximum number of tokens to generate.
+            system_prompt: (Optional) System instruction to guide the LLM behavior.
+        Return: 
+            The generated summary text as a string.
+        """
         client = lazyllm.namespace('MEMU').OnlineModule(source=self.llm_source, model=self.chat_model, type='llm')
         prompt = system_prompt or 'Summarize the text in one short paragraph.'
         full_prompt = f'{prompt}\n\ntext:\n{text}'
@@ -59,6 +72,18 @@ async def vision(
                     max_tokens: int | None = None,
                     system_prompt: str | None = None,
                 ) -> tuple[str, Any]:
+        """
+        Process an image with a text prompt using the configured VLM (Vision-Language Model).
+        
+
+        Args:
+            prompt: Text prompt describing the request or question about the image.
+            image_path: Path to the image file to be analyzed.
+            max_tokens: (Optional) Maximum number of tokens to generate.
+            system_prompt: (Optional) System instruction to guide the VLM behavior.
+        Return: 
+            A tuple containing the generated text response and None (reserved for metadata).
+        """
         client = lazyllm.namespace('MEMU').OnlineModule(source=self.vlm_source, model=self.vlm_model, type='vlm')
         LOG.debug(f'Processing image with {self.vlm_source}/{self.vlm_model}: {image_path}')
         # LazyLLM VLM accepts prompt as first positional argument and image_path as keyword argument
@@ -68,8 +93,17 @@ async def vision(
     async def embed(
                     self,
                     texts: list[str],
-                    batch_size: int = 10, # optional
+                    batch_size: int = 10,
                 ) -> list[list[float]]:
+        """
+        Generate vector embeddings for a list of text strings.
+
+        Args:
+            texts: List of text strings to embed.
+            batch_size: (Optional) Batch size for processing embeddings (default: 10).
+        Return: 
+            A list of embedding vectors (list of floats), one for each input text.
+        """
         client = lazyllm.namespace('MEMU').OnlineModule(source=self.embed_source, model=self.embed_model, 
                                                         type='embed', batch_size=batch_size)
         LOG.debug(f'embed {len(texts)} texts with {self.embed_source}/{self.embed_model}')
@@ -82,8 +116,17 @@ async def transcribe(
                         language: str | None = None,
                         prompt: str | None = None,
                     ) -> str:
+        """
+        Transcribe audio content to text using the configured STT (Speech-to-Text) backend.
+
+        Args:
+            audio_path: Path to the audio file to transcribe.
+            language: (Optional) Language code of the audio content.
+            prompt: (Optional) Text prompt to guide the transcription or translation.
+        Return: 
+            The transcribed text as a string.
+        """
         client = lazyllm.namespace('MEMU').OnlineModule(source=self.stt_source, model=self.stt_model, type='stt')
         LOG.debug(f'Transcribing audio with {self.stt_source}/{self.stt_model}: {audio_path}')
         response = await self._call_async(client, audio_path)
         return response
-    
\ No newline at end of file

From 15c01f1d058fe43ac51889c62c34e7c75cb719d0 Mon Sep 17 00:00:00 2001
From: unknown <sunhao3@sensetime.com>
Date: Mon, 26 Jan 2026 15:25:42 +0800
Subject: [PATCH 09/14] update test case

---
 tests/test_lazyllm.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tests/test_lazyllm.py b/tests/test_lazyllm.py
index 7b71cc67..f2ca64e2 100644
--- a/tests/test_lazyllm.py
+++ b/tests/test_lazyllm.py
@@ -3,14 +3,13 @@
 Quick test script to verify LazyLLM backend configuration and basic functionality.
 
 Usage:
-    export LAZYLLM_API_KEY=your_api_key
+    export MEMU_QWEN_API_KEY=your_api_key
     python examples/test_lazyllm.py
 """
 
 import asyncio
 import os
 import sys
-import lazyllm
 
 # Add src to sys.path
 src_path = os.path.abspath("src")

From 5747ba92c922f65e8f2519cc83293c05af4f2cf9 Mon Sep 17 00:00:00 2001
From: Wu <evan299792458@outlook.com>
Date: Wed, 28 Jan 2026 23:58:10 +0900
Subject: [PATCH 10/14] fix: lazyllm config hierarchy

---
 examples/example_5_with_lazyllm_client.py | 15 +++++++++------
 src/memu/app/service.py                   | 12 ++++++------
 src/memu/app/settings.py                  | 23 +++++++++++++----------
 3 files changed, 28 insertions(+), 22 deletions(-)

diff --git a/examples/example_5_with_lazyllm_client.py b/examples/example_5_with_lazyllm_client.py
index 1eeb9be2..fcd037fc 100644
--- a/examples/example_5_with_lazyllm_client.py
+++ b/examples/example_5_with_lazyllm_client.py
@@ -222,14 +222,17 @@ async def main():
         llm_profiles={
             "default": {
                 "client_backend": "lazyllm_backend",
-                "llm_source": "qwen",
-                "vlm_source": "qwen",
-                "embed_source": "qwen",
-                "stt_source": "qwen",
                 "chat_model": "qwen3-max",
-                "vlm_model": "qwen-vl-plus",
                 "embed_model": "text-embedding-v3",
-                "stt_model": "qwen-audio-turbo",
+                "lazyllm_source": {
+                    "source": "qwen",
+                    "llm_source": "qwen",
+                    "vlm_source": "qwen",
+                    "embed_source": "qwen",
+                    "stt_source": "qwen",
+                    "vlm_model": "qwen-vl-plus",
+                    "stt_model": "qwen-audio-turbo",
+                },
             },
         }
     )
diff --git a/src/memu/app/service.py b/src/memu/app/service.py
index 3423f849..4e2dea04 100644
--- a/src/memu/app/service.py
+++ b/src/memu/app/service.py
@@ -121,14 +121,14 @@ def _init_llm_client(self, config: LLMConfig | None = None) -> Any:
             from memu.llm.lazyllm_client import LazyLLMClient
 
             return LazyLLMClient(
-                llm_source=cfg.llm_source or cfg.source,
-                vlm_source=cfg.vlm_source or cfg.source,
-                embed_source=cfg.embed_source or cfg.source,
-                stt_source=cfg.stt_source or cfg.source,
+                llm_source=cfg.lazyllm_source.llm_source or cfg.lazyllm_source.source,
+                vlm_source=cfg.lazyllm_source.vlm_source or cfg.lazyllm_source.source,
+                embed_source=cfg.lazyllm_source.embed_source or cfg.lazyllm_source.source,
+                stt_source=cfg.lazyllm_source.stt_source or cfg.lazyllm_source.source,
                 chat_model=cfg.chat_model,
                 embed_model=cfg.embed_model,
-                vlm_model=cfg.vlm_model,
-                stt_model=cfg.stt_model,
+                vlm_model=cfg.lazyllm_source.vlm_model,
+                stt_model=cfg.lazyllm_source.stt_model,
             )
         else:
             msg = f"Unknown llm_client_backend '{cfg.client_backend}'"
diff --git a/src/memu/app/settings.py b/src/memu/app/settings.py
index f0f74e85..40e3d337 100644
--- a/src/memu/app/settings.py
+++ b/src/memu/app/settings.py
@@ -89,6 +89,16 @@ def _default_memory_categories() -> list[CategoryConfig]:
     ]
 
 
+class LazyLLMSource(BaseModel):
+    source: str | None = Field(default=None, description="default source for lazyllm client backend")
+    llm_source: str | None = Field(default=None, description="LLM source for lazyllm client backend")
+    embed_source: str | None = Field(default=None, description="Embedding source for lazyllm client backend")
+    vlm_source: str | None = Field(default=None, description="VLM source for lazyllm client backend")
+    stt_source: str | None = Field(default=None, description="STT source for lazyllm client backend")
+    vlm_model: str = Field(default="qwen-vl-plus", description="Vision language model for lazyllm client backend")
+    stt_model: str = Field(default="qwen-audio-turbo", description="Speech-to-text model for lazyllm client backend")
+
+
 class LLMConfig(BaseModel):
     provider: str = Field(
         default="openai",
@@ -99,22 +109,15 @@ class LLMConfig(BaseModel):
     chat_model: str = Field(default="gpt-4o-mini")
     client_backend: str = Field(
         default="sdk",
-        description="Which LLM client backend to use: 'httpx' (httpx) , 'sdk' (official OpenAI) or 'lazyllm_backend'(for more LLM source like Qwen, Doubao, SIliconflow, etc.)",
+        description="Which LLM client backend to use: 'httpx' (httpx), 'sdk' (official OpenAI), or 'lazyllm_backend' (for more LLM source like Qwen, Doubao, SIliconflow, etc.)",
     )
-    # setup LazyLLM 
-    source: str | None = Field(default=None, description="default source for lazyllm client backend")
-    llm_source: str | None = Field(default=None, description="LLM source for lazyllm client backend")
-    vlm_source: str | None = Field(default=None, description="VLM source for lazyllm client backend")
-    embed_source: str | None = Field(default=None, description="Embedding source for lazyllm client backend")
-    stt_source: str | None = Field(default=None, description="STT source for lazyllm client backend")
-    vlm_model: str = Field(default="qwen-vl-plus", description="Vision language model for lazyllm client backend")
-    stt_model: str = Field(default="qwen-audio-turbo", description="Speech-to-text model for lazyllm client backend")
+    lazyllm_source: LazyLLMSource = Field(default=LazyLLMSource())
     endpoint_overrides: dict[str, str] = Field(
         default_factory=dict,
         description="Optional overrides for HTTP endpoints (keys: 'chat'/'summary').",
     )
     embed_model: str = Field(
-        default="text-embedding-v3",
+        default="text-embedding-3-small",
         description="Default embedding model used for vectorization.",
     )
     embed_batch_size: int = Field(

From de4e0eca68c5905db5b0f258f28ea527c726dd20 Mon Sep 17 00:00:00 2001
From: Wu <evan299792458@outlook.com>
Date: Wed, 28 Jan 2026 23:58:53 +0900
Subject: [PATCH 11/14] fix: code format

---
 examples/example_5_with_lazyllm_client.py |  60 ++++++------
 src/memu/llm/lazyllm_client.py            | 114 +++++++++++-----------
 tests/test_lazyllm.py                     |  33 ++++---
 3 files changed, 107 insertions(+), 100 deletions(-)

diff --git a/examples/example_5_with_lazyllm_client.py b/examples/example_5_with_lazyllm_client.py
index fcd037fc..a6a300c8 100644
--- a/examples/example_5_with_lazyllm_client.py
+++ b/examples/example_5_with_lazyllm_client.py
@@ -3,9 +3,9 @@
 =========================================
 
 This example merges functionalities from:
-1. Example 1: Conversation Memory Processing 
-2. Example 2: Skill Extraction 
-3. Example 3: Multimodal Processing 
+1. Example 1: Conversation Memory Processing
+2. Example 2: Skill Extraction
+3. Example 3: Multimodal Processing
 
 It demonstrates how to use the LazyLLM backend for:
 - Processing conversation history
@@ -35,10 +35,11 @@
 # PART 1: Conversation Memory Processing
 # ==========================================
 
+
 async def run_conversation_memory_demo(service):
-    print("\n" + "="*60)
+    print("\n" + "=" * 60)
     print("PART 1: Conversation Memory Processing")
-    print("="*60)
+    print("=" * 60)
 
     conversation_files = [
         "examples/resources/conversations/conv1.json",
@@ -48,7 +49,7 @@ async def run_conversation_memory_demo(service):
 
     total_items = 0
     categories = []
-    
+
     for conv_file in conversation_files:
         if not os.path.exists(conv_file):
             print(f"⚠ File not found: {conv_file}")
@@ -74,15 +75,16 @@ async def run_conversation_memory_demo(service):
 # PART 2: Skill Extraction
 # ==========================================
 
+
 async def run_skill_extraction_demo(service):
-    print("\n" + "="*60)
+    print("\n" + "=" * 60)
     print("PART 2: Skill Extraction from Logs")
-    print("="*60)
+    print("=" * 60)
 
     # Configure prompt for skill extraction
     skill_prompt = """
     You are analyzing an agent execution log. Extract the key actions taken, their outcomes, and lessons learned.
-    
+
     Output MUST be valid XML wrapped in <skills> tags.
     Format:
     <skills>
@@ -96,25 +98,21 @@ async def run_skill_extraction_demo(service):
             </categories>
         </memory>
     </skills>
-    
+
     Text: {resource}
     """
-    
+
     # Update service config for skill extraction
     service.memorize_config.memory_types = ["skill"]
     service.memorize_config.memory_type_prompts = {"skill": skill_prompt}
-    
-    logs = [
-        "examples/resources/logs/log1.txt",
-        "examples/resources/logs/log2.txt", 
-        "examples/resources/logs/log3.txt"
-    ]
+
+    logs = ["examples/resources/logs/log1.txt", "examples/resources/logs/log2.txt", "examples/resources/logs/log3.txt"]
 
     all_skills = []
     for log_file in logs:
         if not os.path.exists(log_file):
             continue
-            
+
         print(f"  Processing log: {log_file}")
         try:
             result = await service.memorize(resource_url=log_file, modality="document")
@@ -136,10 +134,11 @@ async def run_skill_extraction_demo(service):
 # PART 3: Multimodal Memory
 # ==========================================
 
+
 async def run_multimodal_demo(service):
-    print("\n" + "="*60)
+    print("\n" + "=" * 60)
     print("PART 3: Multimodal Memory Processing")
-    print("="*60)
+    print("=" * 60)
 
     # Configure for knowledge extraction
     xml_prompt = """
@@ -152,10 +151,10 @@ async def run_multimodal_demo(service):
             <categories><category>category_name</category></categories>
         </memory>
     </knowledge>
-    
+
     Content: {resource}
     """
-    
+
     service.memorize_config.memory_types = ["knowledge"]
     service.memorize_config.memory_type_prompts = {"knowledge": xml_prompt}
 
@@ -168,7 +167,7 @@ async def run_multimodal_demo(service):
     for res_file, modality in resources:
         if not os.path.exists(res_file):
             continue
-            
+
         print(f"  Processing {modality}: {res_file}")
         try:
             result = await service.memorize(resource_url=res_file, modality=modality)
@@ -187,25 +186,28 @@ async def run_multimodal_demo(service):
 # Helpers
 # ==========================================
 
+
 async def generate_markdown_output(categories, output_dir):
     for cat in categories:
         name = cat.get("name", "unknown")
         summary = cat.get("summary", "")
-        if not summary: continue
-        
+        if not summary:
+            continue
+
         with open(os.path.join(output_dir, f"{name}.md"), "w", encoding="utf-8") as f:
             f.write(f"# {name.replace('_', ' ').title()}\n\n")
             cleaned = summary.replace("<content>", "").replace("</content>", "").strip()
             f.write(cleaned)
 
+
 async def generate_skill_guide(skills, service, output_file):
     os.makedirs(os.path.dirname(output_file), exist_ok=True)
     skills_text = "\n\n".join(skills)
     prompt = f"Summarize these skills into a guide:\n\n{skills_text}"
-    
+
     # Use LazyLLM via service
     summary = await service.llm_client.summarize(text=prompt)
-    
+
     with open(output_file, "w", encoding="utf-8") as f:
         f.write(summary)
 
@@ -214,9 +216,10 @@ async def generate_skill_guide(skills, service, output_file):
 # Main Entry
 # ==========================================
 
+
 async def main():
     print("Unified LazyLLM Example")
-    print("="*60)
+    print("=" * 60)
     # 1. Initialize Shared Service
     service = MemoryService(
         llm_profiles={
@@ -242,5 +245,6 @@ async def main():
     # await run_skill_extraction_demo(service)
     # await run_multimodal_demo(service)
 
+
 if __name__ == "__main__":
     asyncio.run(main())
diff --git a/src/memu/llm/lazyllm_client.py b/src/memu/llm/lazyllm_client.py
index bfbe12c5..4418ee79 100644
--- a/src/memu/llm/lazyllm_client.py
+++ b/src/memu/llm/lazyllm_client.py
@@ -1,26 +1,28 @@
-from typing import Any
-import logging
-from pathlib import Path
 import asyncio
-import lazyllm
 import functools
+from typing import Any
+
+import lazyllm
 from lazyllm import LOG
 
+
 class LazyLLMClient:
     """LAZYLLM client that relies on the LazyLLM framework."""
-    DEFAULT_SOURCE = 'qwen'
 
-    def __init__(self,
-                  *,
-                llm_source: str = None,
-                vlm_source: str = None,
-                embed_source: str = None,
-                stt_source: str = None,
-                chat_model: str = None,
-                vlm_model: str = None,
-                embed_model: str = None,
-                stt_model: str = None,
-            ):
+    DEFAULT_SOURCE = "qwen"
+
+    def __init__(
+        self,
+        *,
+        llm_source: str = None,
+        vlm_source: str = None,
+        embed_source: str = None,
+        stt_source: str = None,
+        chat_model: str = None,
+        vlm_model: str = None,
+        embed_model: str = None,
+        stt_model: str = None,
+    ):
         self.llm_source = llm_source or self.DEFAULT_SOURCE
         self.vlm_source = vlm_source or self.DEFAULT_SOURCE
         self.embed_source = embed_source or self.DEFAULT_SOURCE
@@ -39,14 +41,13 @@ async def _call_async(self, client: Any, *args: Any, **kwargs: Any) -> Any:
         else:
             return await asyncio.to_thread(client, *args)
 
-
     async def summarize(
-                        self,
-                        text: str,
-                        *,
-                        max_tokens: int | None = None,
-                        system_prompt: str | None = None,
-                    ) -> str:
+        self,
+        text: str,
+        *,
+        max_tokens: int | None = None,
+        system_prompt: str | None = None,
+    ) -> str:
         """
         Generate a summary or response for the input text using the configured LLM backend.
 
@@ -54,68 +55,69 @@ async def summarize(
             text: The input text to summarize or process.
             max_tokens: (Optional) Maximum number of tokens to generate.
             system_prompt: (Optional) System instruction to guide the LLM behavior.
-        Return: 
+        Return:
             The generated summary text as a string.
         """
-        client = lazyllm.namespace('MEMU').OnlineModule(source=self.llm_source, model=self.chat_model, type='llm')
-        prompt = system_prompt or 'Summarize the text in one short paragraph.'
-        full_prompt = f'{prompt}\n\ntext:\n{text}'
-        LOG.debug(f'Summarizing text with {self.llm_source}/{self.chat_model}')
+        client = lazyllm.namespace("MEMU").OnlineModule(source=self.llm_source, model=self.chat_model, type="llm")
+        prompt = system_prompt or "Summarize the text in one short paragraph."
+        full_prompt = f"{prompt}\n\ntext:\n{text}"
+        LOG.debug(f"Summarizing text with {self.llm_source}/{self.chat_model}")
         response = await self._call_async(client, full_prompt)
         return response
-            
+
     async def vision(
-                    self,
-                    prompt: str,
-                    image_path: str,
-                    *,
-                    max_tokens: int | None = None,
-                    system_prompt: str | None = None,
-                ) -> tuple[str, Any]:
+        self,
+        prompt: str,
+        image_path: str,
+        *,
+        max_tokens: int | None = None,
+        system_prompt: str | None = None,
+    ) -> tuple[str, Any]:
         """
         Process an image with a text prompt using the configured VLM (Vision-Language Model).
-        
+
 
         Args:
             prompt: Text prompt describing the request or question about the image.
             image_path: Path to the image file to be analyzed.
             max_tokens: (Optional) Maximum number of tokens to generate.
             system_prompt: (Optional) System instruction to guide the VLM behavior.
-        Return: 
+        Return:
             A tuple containing the generated text response and None (reserved for metadata).
         """
-        client = lazyllm.namespace('MEMU').OnlineModule(source=self.vlm_source, model=self.vlm_model, type='vlm')
-        LOG.debug(f'Processing image with {self.vlm_source}/{self.vlm_model}: {image_path}')
+        client = lazyllm.namespace("MEMU").OnlineModule(source=self.vlm_source, model=self.vlm_model, type="vlm")
+        LOG.debug(f"Processing image with {self.vlm_source}/{self.vlm_model}: {image_path}")
         # LazyLLM VLM accepts prompt as first positional argument and image_path as keyword argument
         response = await self._call_async(client, prompt, lazyllm_files=image_path)
         return response, None
 
     async def embed(
-                    self,
-                    texts: list[str],
-                    batch_size: int = 10,
-                ) -> list[list[float]]:
+        self,
+        texts: list[str],
+        batch_size: int = 10,
+    ) -> list[list[float]]:
         """
         Generate vector embeddings for a list of text strings.
 
         Args:
             texts: List of text strings to embed.
             batch_size: (Optional) Batch size for processing embeddings (default: 10).
-        Return: 
+        Return:
             A list of embedding vectors (list of floats), one for each input text.
         """
-        client = lazyllm.namespace('MEMU').OnlineModule(source=self.embed_source, model=self.embed_model, 
-                                                        type='embed', batch_size=batch_size)
-        LOG.debug(f'embed {len(texts)} texts with {self.embed_source}/{self.embed_model}')
+        client = lazyllm.namespace("MEMU").OnlineModule(
+            source=self.embed_source, model=self.embed_model, type="embed", batch_size=batch_size
+        )
+        LOG.debug(f"embed {len(texts)} texts with {self.embed_source}/{self.embed_model}")
         response = await self._call_async(client, texts)
         return response
 
     async def transcribe(
-                        self,
-                        audio_path: str,
-                        language: str | None = None,
-                        prompt: str | None = None,
-                    ) -> str:
+        self,
+        audio_path: str,
+        language: str | None = None,
+        prompt: str | None = None,
+    ) -> str:
         """
         Transcribe audio content to text using the configured STT (Speech-to-Text) backend.
 
@@ -123,10 +125,10 @@ async def transcribe(
             audio_path: Path to the audio file to transcribe.
             language: (Optional) Language code of the audio content.
             prompt: (Optional) Text prompt to guide the transcription or translation.
-        Return: 
+        Return:
             The transcribed text as a string.
         """
-        client = lazyllm.namespace('MEMU').OnlineModule(source=self.stt_source, model=self.stt_model, type='stt')
-        LOG.debug(f'Transcribing audio with {self.stt_source}/{self.stt_model}: {audio_path}')
+        client = lazyllm.namespace("MEMU").OnlineModule(source=self.stt_source, model=self.stt_model, type="stt")
+        LOG.debug(f"Transcribing audio with {self.stt_source}/{self.stt_model}: {audio_path}")
         response = await self._call_async(client, audio_path)
         return response
diff --git a/tests/test_lazyllm.py b/tests/test_lazyllm.py
index f2ca64e2..579299b1 100644
--- a/tests/test_lazyllm.py
+++ b/tests/test_lazyllm.py
@@ -20,10 +20,10 @@
 
 async def test_lazyllm_client():
     """Test LazyLLMClient with basic operations."""
-    
+
     print("LazyLLM Backend Test")
     print("=" * 60)
-    
+
     # Get API key from environment
     try:
         client = LazyLLMClient(
@@ -31,60 +31,61 @@ async def test_lazyllm_client():
             vlm_source="qwen",
             embed_source="qwen",
             stt_source="qwen",
-            chat_model = "qwen-plus",
-            vlm_model = "qwen-vl-plus",
-            embed_model = "text-embedding-v3",
-            stt_model = "qwen-audio-turbo"
+            chat_model="qwen-plus",
+            vlm_model="qwen-vl-plus",
+            embed_model="text-embedding-v3",
+            stt_model="qwen-audio-turbo",
         )
         print("✓ LazyLLMClient initialized successfully")
     except Exception as e:
         print(f"❌ Failed to initialize LazyLLMClient: {e}")
         return False
-    
+
     # Test 1: Summarization
     print("\n[Test 1] Testing summarization...")
     try:
         test_text = "这是一段关于Python编程的文本。Python是一种高级编程语言，具有简单易学的语法。它被广泛用于数据分析、机器学习和Web开发。"
         result = await client.summarize(test_text)
-        print(f"✓ Summarization successful")
+        print("✓ Summarization successful")
         print(f"  Result: {result[:100]}...")
     except Exception as e:
         print(f"❌ Summarization failed: {e}")
         import traceback
+
         traceback.print_exc()
-    
+
     # Test 2: Embedding
     print("\n[Test 2] Testing embedding...")
     try:
         test_texts = ["Hello world", "How are you", "Nice to meet you"]
         embeddings = await client.embed(test_texts)
-        print(f"✓ Embedding successful")
+        print("✓ Embedding successful")
         print(f"  Generated {len(embeddings)} embeddings")
         if embeddings and embeddings[0]:
             print(f"  Embedding dimension: {len(embeddings[0])}")
     except Exception as e:
         print(f"❌ Embedding failed: {e}")
         import traceback
+
         traceback.print_exc()
-    
+
     # Test 3: Vision (requires image file)
     print("\n[Test 3] Testing vision...")
     test_image_path = "examples/resources/images/image1.png"
     if os.path.exists(test_image_path):
         try:
-            result, response = await client.vision(
-                prompt="描述这张图片的内容",
-                image_path=test_image_path
-            )
-            print(f"✓ Vision successful")
+            result, response = await client.vision(prompt="描述这张图片的内容", image_path=test_image_path)
+            print("✓ Vision successful")
             print(f"  Result: {result[:100]}...")
         except Exception as e:
             print(f"❌ Vision failed: {e}")
             import traceback
+
             traceback.print_exc()
     else:
         print(f"⚠ Skipped: Test image not found at {test_image_path}")
 
+
 if __name__ == "__main__":
     success = asyncio.run(test_lazyllm_client())
     sys.exit(0 if success else 1)

From cb23ef5801fbf4e744ee79d69edc26793906dfa4 Mon Sep 17 00:00:00 2001
From: Wu <evan299792458@outlook.com>
Date: Thu, 29 Jan 2026 00:49:56 +0900
Subject: [PATCH 12/14] fix: build check

---
 examples/example_5_with_lazyllm_client.py |  2 +-
 src/memu/llm/lazyllm_client.py            | 16 ++++++++--------
 tests/test_lazyllm.py                     |  2 +-
 3 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/examples/example_5_with_lazyllm_client.py b/examples/example_5_with_lazyllm_client.py
index a6a300c8..7e64cd46 100644
--- a/examples/example_5_with_lazyllm_client.py
+++ b/examples/example_5_with_lazyllm_client.py
@@ -29,7 +29,7 @@
 if src_path not in sys.path:
     sys.path.insert(0, src_path)
 
-from memu.app import MemoryService
+from memu.app import MemoryService  # noqa: E402
 
 # ==========================================
 # PART 1: Conversation Memory Processing
diff --git a/src/memu/llm/lazyllm_client.py b/src/memu/llm/lazyllm_client.py
index 4418ee79..2cfa2e2b 100644
--- a/src/memu/llm/lazyllm_client.py
+++ b/src/memu/llm/lazyllm_client.py
@@ -14,14 +14,14 @@ class LazyLLMClient:
     def __init__(
         self,
         *,
-        llm_source: str = None,
-        vlm_source: str = None,
-        embed_source: str = None,
-        stt_source: str = None,
-        chat_model: str = None,
-        vlm_model: str = None,
-        embed_model: str = None,
-        stt_model: str = None,
+        llm_source: str | None = None,
+        vlm_source: str | None = None,
+        embed_source: str | None = None,
+        stt_source: str | None = None,
+        chat_model: str | None = None,
+        vlm_model: str | None = None,
+        embed_model: str | None = None,
+        stt_model: str | None = None,
     ):
         self.llm_source = llm_source or self.DEFAULT_SOURCE
         self.vlm_source = vlm_source or self.DEFAULT_SOURCE
diff --git a/tests/test_lazyllm.py b/tests/test_lazyllm.py
index 579299b1..c1246032 100644
--- a/tests/test_lazyllm.py
+++ b/tests/test_lazyllm.py
@@ -15,7 +15,7 @@
 src_path = os.path.abspath("src")
 sys.path.insert(0, src_path)
 
-from memu.llm.lazyllm_client import LazyLLMClient
+from memu.llm.lazyllm_client import LazyLLMClient   # noqa: E402
 
 
 async def test_lazyllm_client():

From 288347f15997d16d314ac3faace3dad87a5eba56 Mon Sep 17 00:00:00 2001
From: Wu <evan299792458@outlook.com>
Date: Thu, 29 Jan 2026 00:55:56 +0900
Subject: [PATCH 13/14] fix: build check

---
 tests/test_lazyllm.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/test_lazyllm.py b/tests/test_lazyllm.py
index c1246032..d622b709 100644
--- a/tests/test_lazyllm.py
+++ b/tests/test_lazyllm.py
@@ -15,7 +15,7 @@
 src_path = os.path.abspath("src")
 sys.path.insert(0, src_path)
 
-from memu.llm.lazyllm_client import LazyLLMClient   # noqa: E402
+from memu.llm.lazyllm_client import LazyLLMClient  # noqa: E402
 
 
 async def test_lazyllm_client():
@@ -44,7 +44,7 @@ async def test_lazyllm_client():
     # Test 1: Summarization
     print("\n[Test 1] Testing summarization...")
     try:
-        test_text = "这是一段关于Python编程的文本。Python是一种高级编程语言，具有简单易学的语法。它被广泛用于数据分析、机器学习和Web开发。"
+        test_text = "这是一段关于Python编程的文本。Python是一种高级编程语言，具有简单易学的语法。它被广泛用于数据分析、机器学习和Web开发。"  # noqa: RUF001
         result = await client.summarize(test_text)
         print("✓ Summarization successful")
         print(f"  Result: {result[:100]}...")
@@ -74,7 +74,7 @@ async def test_lazyllm_client():
     test_image_path = "examples/resources/images/image1.png"
     if os.path.exists(test_image_path):
         try:
-            result, response = await client.vision(prompt="描述这张图片的内容", image_path=test_image_path)
+            result, _ = await client.vision(prompt="描述这张图片的内容", image_path=test_image_path)
             print("✓ Vision successful")
             print(f"  Result: {result[:100]}...")
         except Exception as e:

From bf5006f3dd9e5edd3e30e0092185d0dfa8c5b244 Mon Sep 17 00:00:00 2001
From: Wu <evan299792458@outlook.com>
Date: Thu, 29 Jan 2026 01:07:29 +0900
Subject: [PATCH 14/14] fix: build check

---
 src/memu/llm/lazyllm_client.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/memu/llm/lazyllm_client.py b/src/memu/llm/lazyllm_client.py
index 2cfa2e2b..eb1c40f7 100644
--- a/src/memu/llm/lazyllm_client.py
+++ b/src/memu/llm/lazyllm_client.py
@@ -1,8 +1,8 @@
 import asyncio
 import functools
-from typing import Any
+from typing import Any, cast
 
-import lazyllm
+import lazyllm  # type: ignore[import-untyped]
 from lazyllm import LOG
 
 
@@ -63,7 +63,7 @@ async def summarize(
         full_prompt = f"{prompt}\n\ntext:\n{text}"
         LOG.debug(f"Summarizing text with {self.llm_source}/{self.chat_model}")
         response = await self._call_async(client, full_prompt)
-        return response
+        return cast(str, response)
 
     async def vision(
         self,
@@ -110,7 +110,7 @@ async def embed(
         )
         LOG.debug(f"embed {len(texts)} texts with {self.embed_source}/{self.embed_model}")
         response = await self._call_async(client, texts)
-        return response
+        return cast(list[list[float]], response)
 
     async def transcribe(
         self,
@@ -131,4 +131,4 @@ async def transcribe(
         client = lazyllm.namespace("MEMU").OnlineModule(source=self.stt_source, model=self.stt_model, type="stt")
         LOG.debug(f"Transcribing audio with {self.stt_source}/{self.stt_model}: {audio_path}")
         response = await self._call_async(client, audio_path)
-        return response
+        return cast(str, response)