ValueCell-ai · vcfgv · Oct 15, 2025 · Oct 13, 2025 · Oct 14, 2025 · Oct 14, 2025
diff --git a/.env.example b/.env.example
@@ -21,9 +21,10 @@ AGENT_DEBUG_MODE=false
 # Model Provider settings
 # Get your key from: https://openrouter.ai/
 OPENROUTER_API_KEY=
+GOOGLE_API_KEY=
 
 # Model IDs for OpenRouter
-PLANNER_MODEL_ID=google/gemini-2.5-pro
+PLANNER_MODEL_ID=google/gemini-2.5-flash
 SEC_PARSER_MODEL_ID=openai/gpt-4o-mini
 SEC_ANALYSIS_MODEL_ID=deepseek/deepseek-chat-v3-0324
 AI_HEDGE_FUND_PARSER_MODEL_ID=google/gemini-2.5-flash

diff --git a/.gitignore b/.gitignore
@@ -213,11 +213,17 @@ __marimo__/
 *.db-shm
 *.sqlite
 *.sqlite3
+lancedb
+.lancedb
 
 # Downloads
 *csv
 *xlsx
 *xls
 
 # MACOS
-.DS_Store
+.DS_Store
+
+# Local files
+logs
+.knowledge
diff --git a/python/configs/agent_cards/investment_research_agent.json b/python/configs/agent_cards/investment_research_agent.json
@@ -0,0 +1,57 @@
+{
+    "name": "ResearchAgent",
+    "display_name": "Research Agent",
+    "url": "http://localhost:10004/",
+    "description": "Research Agent analyzes SEC filings (10-K, 10-Q, 13F, 8-K) and internal knowledge to produce sourceable, actionable summaries and data extracts. It can extract financial line items, reconcile filings with knowledge search results, summarize holdings from 13F filings, and recommend follow-ups with exact tool calls.",
+    "skills": [
+        {
+            "id": "extract_financials",
+            "name": "Extract financial line items",
+            "description": "Retrieve and extract numeric financial line items (revenue, net income, EPS, cash flow) and cite the filing source.",
+            "examples": [
+                "What was Apple's revenue in Q4 2024? Provide the filing source.",
+                "Show net income and EPS for Tesla in the most recent 10-Q."
+            ],
+            "tags": ["10-K","10-Q","financials","filings"]
+        },
+        {
+            "id": "analyze_13f_filings",
+            "name": "Analyze 13F holdings",
+            "description": "Parse 13F filings to extract institutional holdings, position sizes, and changes over time.",
+            "examples": [
+                "What are the top holdings of Berkshire Hathaway in the latest 13F filing?",
+                "How has Vanguard's top 10 holdings changed over the last four quarters?"
+            ],
+            "tags": ["13F","holdings","institutional"]
+        },
+        {
+            "id": "synthesize_context",
+            "name": "Synthesize context and commentary",
+            "description": "Combine extracted filing data with knowledge-base search results to provide interpretation, analyst commentary, and historical context, always citing sources.",
+            "examples": [
+                "Summarize analyst commentary on Apple's guidance and cite sources.",
+                "Compare revenue trends for company X over the last three years with sources."
+            ],
+            "tags": ["analysis","context","knowledge-base"]
+        },
+        {
+            "id": "monitor_and_alert",
+            "name": "Monitor filings and notify",
+            "description": "Track company filings and notify users of new relevant SEC documents, with a short sourceable summary and suggested follow-ups.",
+            "examples": [
+                "Notify me when Tesla files its next 10-Q and provide a one-paragraph summary with sources.",
+                "Alert me to significant changes in Berkshire Hathaway's 13F holdings."
+            ],
+            "tags": ["monitor","alerts","tracking"]
+        }
+    ],
+    "enabled": true,
+    "metadata": {
+        "version": "1.0.0",
+        "author": "ValueCell Team",
+        "tags": [
+            "sec filings",
+            "fundamental analysis"
+        ]
+    }
+}
diff --git a/python/pyproject.toml b/python/pyproject.toml
@@ -18,10 +18,14 @@ dependencies = [
     "yfinance>=0.2.65",
     "requests>=2.32.5",
     "akshare>=1.17.44",
-    "agno[openai]>=2.0,<3.0",
+    "agno[openai, google, lancedb]>=2.0,<3.0",
     "edgartools>=4.12.2",
     "sqlalchemy>=2.0.43",
     "aiosqlite>=0.19.0",
+    "unstructured>=0.18.15",
+    "markdown>=3.9",
+    "loguru>=0.7.3",
+    "aiofiles>=24.1.0",
 ]
 
 [project.optional-dependencies]
@@ -109,4 +113,4 @@ skip_glob = [
     "**/tests/**",
     "docs/**",
     "**/docs/**",
-]
+]
diff --git a/python/uv.lock b/python/uv.lock
diff --git a/python/valuecell/agents/research_agent/__init__.py b/python/valuecell/agents/research_agent/__init__.py
diff --git a/python/valuecell/agents/research_agent/__main__.py b/python/valuecell/agents/research_agent/__main__.py
@@ -0,0 +1,9 @@
+import asyncio
+
+from valuecell.core.agent.decorator import create_wrapped_agent
+
+from .core import ResearchAgent
+
+if __name__ == "__main__":
+    agent = create_wrapped_agent(ResearchAgent)
+    asyncio.run(agent.serve())
diff --git a/python/valuecell/agents/research_agent/core.py b/python/valuecell/agents/research_agent/core.py
@@ -0,0 +1,77 @@
+import os
+from typing import AsyncGenerator, Iterator
+
+from agno.agent import Agent, RunOutputEvent
+from agno.db.in_memory import InMemoryDb
+from agno.models.google import Gemini
+from edgar import set_identity
+from loguru import logger
+
+from valuecell.agents.research_agent.knowledge import knowledge
+from valuecell.agents.research_agent.prompts import (
+    KNOWLEDGE_AGENT_EXPECTED_OUTPUT,
+    KNOWLEDGE_AGENT_INSTRUCTION,
+)
+from valuecell.agents.research_agent.sources import fetch_sec_filings
+from valuecell.core.agent.responses import streaming
+from valuecell.core.types import BaseAgent, StreamResponse
+from valuecell.utils.env import agent_debug_mode_enabled
+
+
+class ResearchAgent(BaseAgent):
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+        self.knowledge_research_agent = Agent(
+            model=Gemini(id="gemini-2.5-flash"),
+            instructions=[KNOWLEDGE_AGENT_INSTRUCTION],
+            expected_output=KNOWLEDGE_AGENT_EXPECTED_OUTPUT,
+            tools=[fetch_sec_filings],
+            knowledge=knowledge,
+            db=InMemoryDb(),
+            # context
+            search_knowledge=True,
+            add_datetime_to_context=True,
+            add_history_to_context=True,
+            num_history_runs=3,
+            read_chat_history=True,
+            enable_session_summaries=True,
+            # configuration
+            debug_mode=agent_debug_mode_enabled(),
+        )
+        set_identity(os.getenv("SEC_EMAIL"))
+
+    async def stream(
+        self, query: str, conversation_id: str, task_id: str
+    ) -> AsyncGenerator[StreamResponse, None]:
+        response_stream: Iterator[RunOutputEvent] = self.knowledge_research_agent.arun(
+            query,
+            stream=True,
+            stream_intermediate_steps=True,
+            session_id=conversation_id,
+        )
+        async for event in response_stream:
+            if event.event == "RunContent":
+                yield streaming.message_chunk(event.content)
+            elif event.event == "ToolCallStarted":
+                yield streaming.tool_call_started(
+                    event.tool.tool_call_id, event.tool.tool_name
+                )
+            elif event.event == "ToolCallCompleted":
+                yield streaming.tool_call_completed(
+                    event.tool.result, event.tool.tool_call_id, event.tool.tool_name
+                )
+        logger.info("Financial data analysis completed")
+
+        yield streaming.done()
+
+
+if __name__ == "__main__":
+    import asyncio
+
+    async def main():
+        agent = ResearchAgent()
+        query = "Provide a summary of Apple's 2024 all quarterly and annual reports."
+        async for response in agent.stream(query, "test_session", "test_task"):
+            print(response)
+
+    asyncio.run(main())
diff --git a/python/valuecell/agents/research_agent/knowledge.py b/python/valuecell/agents/research_agent/knowledge.py
@@ -0,0 +1,25 @@
+from pathlib import Path
+from typing import Optional
+
+from agno.knowledge.chunking.markdown import MarkdownChunking
+from agno.knowledge.knowledge import Knowledge
+from agno.knowledge.reader.markdown_reader import MarkdownReader
+
+from .vdb import vector_db
+
+knowledge = Knowledge(
+    vector_db=vector_db,
+    max_results=10,
+)
+md_reader = MarkdownReader(chunking_strategy=MarkdownChunking())
+
+
+async def insert_md_file_to_knowledge(
+    name: str, path: Path, metadata: Optional[dict] = None
+):
+    await knowledge.add_content_async(
+        name=name,
+        path=path,
+        metadata=metadata,
+        reader=md_reader,
+    )