diff --git a/python/valuecell/agents/research_agent/core.py b/python/valuecell/agents/research_agent/core.py
index 205917e39..f645c8f6c 100644
--- a/python/valuecell/agents/research_agent/core.py
+++ b/python/valuecell/agents/research_agent/core.py
@@ -12,6 +12,7 @@
KNOWLEDGE_AGENT_INSTRUCTION,
)
from valuecell.agents.research_agent.sources import (
+ fetch_ashare_filings,
fetch_event_sec_filings,
fetch_periodic_sec_filings,
web_search,
@@ -29,6 +30,7 @@ def __init__(self, **kwargs):
tools = [
fetch_periodic_sec_filings,
fetch_event_sec_filings,
+ fetch_ashare_filings,
web_search,
]
self.knowledge_research_agent = Agent(
diff --git a/python/valuecell/agents/research_agent/knowledge.py b/python/valuecell/agents/research_agent/knowledge.py
index fd07ab881..8be8447ff 100644
--- a/python/valuecell/agents/research_agent/knowledge.py
+++ b/python/valuecell/agents/research_agent/knowledge.py
@@ -4,6 +4,7 @@
from agno.knowledge.chunking.markdown import MarkdownChunking
from agno.knowledge.knowledge import Knowledge
from agno.knowledge.reader.markdown_reader import MarkdownReader
+from agno.knowledge.reader.pdf_reader import PDFReader
from .vdb import vector_db
@@ -12,6 +13,7 @@
max_results=10,
)
md_reader = MarkdownReader(chunking_strategy=MarkdownChunking())
+pdf_reader = PDFReader(chunking_strategy=MarkdownChunking())
async def insert_md_file_to_knowledge(
@@ -23,3 +25,11 @@ async def insert_md_file_to_knowledge(
metadata=metadata,
reader=md_reader,
)
+
+
+async def insert_pdf_file_to_knowledge(url: str, metadata: Optional[dict] = None):
+ await knowledge.add_content_async(
+ url=url,
+ metadata=metadata,
+ reader=pdf_reader,
+ )
diff --git a/python/valuecell/agents/research_agent/prompts.py b/python/valuecell/agents/research_agent/prompts.py
index 56b16698a..2d7a24d49 100644
--- a/python/valuecell/agents/research_agent/prompts.py
+++ b/python/valuecell/agents/research_agent/prompts.py
@@ -6,6 +6,7 @@
- fetch_periodic_sec_filings(ticker_or_cik, forms, year?, quarter?, limit?): Use this for scheduled reports like 10-K/10-Q when you need primary-source facts (revenue, net income, MD&A text). Prefer batching by year to reduce calls. Note: year/quarter filters apply to filing_date (edgar behavior), not period_of_report. If year is omitted, the tool returns the latest filings using `limit` (default 10). If quarter is provided, year must also be provided.
- fetch_event_sec_filings(ticker_or_cik, forms, start_date?, end_date?, limit?): Use this for event-driven filings like 8-K and ownership forms (3/4/5). Use date ranges and limits to control scope.
+- fetch_ashare_filings(stock_code, report_types, year?, quarter?, limit?): Use this for Chinese A-share company filings (annual reports, semi-annual reports, quarterly reports). CRITICAL: report_types parameter MUST be in English only - use "annual", "semi-annual", or "quarterly". Never use Chinese terms like "年报", "半年报", or "季报". The function will reject Chinese parameters with an error.
- Knowledge base search: Use the agent's internal knowledge index to find summaries, historical context, analyst commentary, and previously ingested documents.
@@ -18,6 +19,15 @@
- Suggest follow-up queries for additional details
3. Smart defaults: If year/quarter are unspecified for periodic filings, default to the most recent available data rather than calling multiple periods. For event-driven filings, use a recent date window (e.g., last 90 days) with a small limit unless the user specifies otherwise.
4. Knowledge base first: For broad questions or interpretive queries, search the knowledge base before calling filing tools. Only fetch new filings if the knowledge base lacks the specific data needed.
+
+A-share filings (fetch_ashare_filings) specific guidelines:
+- ALWAYS use English report types: "annual", "semi-annual", "quarterly"
+- NEVER use Chinese terms: "年报", "半年报", "季报" will cause errors
+- Stock codes should be 6-digit format (e.g., "600519" for Kweichow Moutai, "000001" for Ping An Bank)
+- When users mention Chinese report types, translate them to English before calling the function:
+ * 年报/年度报告 → "annual"
+ * 半年报/半年度报告/中报 → "semi-annual"
+ * 季报/季度报告/一季报/三季报 → "quarterly"
@@ -39,6 +49,13 @@
3. Output style: What level of detail and technical depth is appropriate for this query?
+
+Example: A-share filing query (user asks "茅台2024年年报的营收是多少?"):
+Tool plan: User mentioned "年报" (annual report) in Chinese, so translate to "annual" before calling fetch_ashare_filings('600519', 'annual', year=2024).
+
+CRITICAL NOTE: In this example, the user asked about "年报" (annual report) in Chinese, but the tool call correctly used "annual" in English. Always translate Chinese report types to English before calling fetch_ashare_filings.
+
+
1. Clarify: If the user's request lacks a ticker/CIK, form type, or time range, ask a single clarifying question.
2. Primary check: If the user requests factual items (financial line items, footnote detail, MD&A text), call `fetch_periodic_sec_filings` (10-Q/10-K) with specific filters. For corporate events or disclosures, call `fetch_event_sec_filings` (8-K/3/4/5) with a relevant date range.
@@ -182,6 +199,32 @@
---
+Example 4 - A-share filing query (user asks "茅台2024年年报的营收是多少?"):
+Tool plan: User mentioned "年报" (annual report) in Chinese, so translate to "annual" before calling fetch_ashare_filings('600519', 'annual', year=2024).
+
+Response:
+"According to Kweichow Moutai's 2024 annual report, the company achieved operating revenue of 150.67 billion yuan [2024 Annual Report](file://...), representing a 15.2% year-over-year increase. Moutai liquor sales contributed 136.89 billion yuan (90.9% of total revenue), while series liquor sales reached 13.78 billion yuan [same source].
+
+This revenue level represents a historic high for Moutai, primarily driven by product mix optimization and stable market demand growth [knowledge base: 2024 performance analysis].
+
+Would you like me to analyze Moutai's profitability metrics further, or compare its revenue performance with other liquor companies?"
+
+---
+
+Example 5 - A-share quarterly filing query (user asks "茅台2024年第三季度报告的净利润是多少?"):
+Tool plan: User mentioned "第三季度报告" (quarterly report) in Chinese, so translate to "quarterly" before calling fetch_ashare_filings('600519', 'quarterly', year=2024, quarter=3).
+
+Response:
+"According to Kweichow Moutai's Q3 2024 quarterly report, the company achieved net profit of 36.85 billion yuan [Q3 2024 Quarterly Report](file://...), representing a 12.8% year-over-year increase. Cumulative net profit for the first three quarters reached 110.52 billion yuan, up 14.1% year-over-year [same source].
+
+The Q3 net profit margin reached 24.5%, an improvement of 0.8 percentage points compared to the same period last year, indicating continued improvement in the company's profitability [knowledge base: Q3 2024 analysis].
+
+Would you like me to analyze Moutai's quarterly profit trends, or compare performance across different quarters?"
+
+CRITICAL NOTE: In these examples, users asked about "年报" (annual report) and "第三季度报告" (quarterly report) in Chinese, but the tool calls correctly used "annual" and "quarterly" in English. Always translate Chinese report types to English before calling fetch_ashare_filings.
+
+---
+
Note: In all examples, tool calls are batched when possible, sources are cited naturally, and the response style matches the query type. Each response ends with a contextual, actionable follow-up suggestion to encourage continued exploration.
"""
diff --git a/python/valuecell/agents/research_agent/schemas.py b/python/valuecell/agents/research_agent/schemas.py
index 3f343588d..adc07da9a 100644
--- a/python/valuecell/agents/research_agent/schemas.py
+++ b/python/valuecell/agents/research_agent/schemas.py
@@ -15,3 +15,27 @@ class SECFilingResult:
name: str
path: Path
metadata: SECFilingMetadata
+
+
+@dataclass
+class AShareFilingMetadata:
+ """A-share filing metadata"""
+
+ doc_type: (
+ str # Report type: annual report, semi-annual report, quarterly report, etc.
+ )
+ company: str # Company name
+ stock_code: str # Stock code
+ market: str # Market: SZSE, SSE
+ period_of_report: str # Report period
+ filing_date: str # Filing date
+ announcement_title: str = "" # Announcement title for quarter filtering
+
+
+@dataclass
+class AShareFilingResult:
+ """A-share filing result"""
+
+ name: str
+ path: Path
+ metadata: AShareFilingMetadata
diff --git a/python/valuecell/agents/research_agent/sources.py b/python/valuecell/agents/research_agent/sources.py
index 51861dbf7..5284e56ce 100644
--- a/python/valuecell/agents/research_agent/sources.py
+++ b/python/valuecell/agents/research_agent/sources.py
@@ -1,9 +1,11 @@
import os
+import re
from datetime import date, datetime
from pathlib import Path
from typing import Iterable, List, Optional, Sequence
import aiofiles
+import aiohttp
from agno.agent import Agent
from agno.models.google import Gemini
from agno.models.openrouter import OpenRouter
@@ -12,8 +14,13 @@
from valuecell.utils.path import get_knowledge_path
-from .knowledge import insert_md_file_to_knowledge
-from .schemas import SECFilingMetadata, SECFilingResult
+from .knowledge import insert_md_file_to_knowledge, insert_pdf_file_to_knowledge
+from .schemas import (
+ AShareFilingMetadata,
+ AShareFilingResult,
+ SECFilingMetadata,
+ SECFilingResult,
+)
def _ensure_list(value: str | Sequence[str] | None) -> List[str]:
@@ -24,6 +31,33 @@ def _ensure_list(value: str | Sequence[str] | None) -> List[str]:
return list(value)
+def _extract_quarter_from_title(title: str) -> Optional[int]:
+ """Extract quarter number from announcement title
+
+ Args:
+ title: Announcement title string
+
+ Returns:
+ Quarter number (1-4) if found, None otherwise
+ """
+ if not title:
+ return None
+
+ # Common patterns for quarterly reports in Chinese titles
+ quarter_patterns = [
+ (r"第一季度|一季度|1季度|Q1", 1),
+ (r"第二季度|二季度|2季度|Q2|半年度|中期", 2), # Semi-annual is often Q2
+ (r"第三季度|三季度|3季度|Q3", 3),
+ (r"第四季度|四季度|4季度|Q4|年度报告|年报", 4), # Annual is often Q4
+ ]
+
+ for pattern, quarter in quarter_patterns:
+ if re.search(pattern, title, re.IGNORECASE):
+ return quarter
+
+ return None
+
+
def _parse_date(d: str | date | None) -> Optional[date]:
if d is None:
return None
@@ -225,3 +259,413 @@ async def _web_search_google(query: str) -> str:
model = Gemini(id="gemini-2.5-flash", search=True)
response = await Agent(model=model).arun(query)
return response.content
+
+
+def _normalize_stock_code(stock_code: str) -> str:
+ """Normalize stock code format"""
+ # Remove possible prefixes and suffixes, keep only digits
+ code = re.sub(r"[^\d]", "", stock_code)
+ # Ensure it's a 6-digit number
+ if len(code) == 6:
+ return code
+ elif len(code) < 6:
+ return code.zfill(6)
+ else:
+ return code[:6]
+
+
+async def _write_and_ingest_ashare(
+ filings_data: List[dict],
+ knowledge_dir: Path,
+) -> List[AShareFilingResult]:
+ """Write A-share filing data to files and import to knowledge base"""
+ knowledge_dir.mkdir(parents=True, exist_ok=True)
+ results: List[AShareFilingResult] = []
+
+ for filing_data in filings_data:
+ # Build file name
+ stock_code = filing_data["stock_code"]
+ doc_type = filing_data["doc_type"]
+ period = filing_data["period_of_report"]
+
+ # Get PDF URL from filing data
+ pdf_url = filing_data.get("pdf_url", "")
+
+ # Create metadata
+ metadata = AShareFilingMetadata(
+ doc_type=doc_type,
+ company=filing_data["company"],
+ stock_code=stock_code,
+ market=filing_data["market"],
+ period_of_report=period,
+ filing_date=filing_data["filing_date"],
+ announcement_title=filing_data.get("announcement_title", ""),
+ )
+
+ # Create result object
+ file_name = f"{stock_code}_{doc_type}_{period}.pdf"
+ result = AShareFilingResult(name=file_name, path=pdf_url, metadata=metadata)
+ results.append(result)
+
+ # Import to knowledge base - use PDF URL if available
+ await insert_pdf_file_to_knowledge(url=pdf_url, metadata=metadata.__dict__)
+
+ return results
+
+
+async def _get_correct_orgid(
+ stock_code: str, session: aiohttp.ClientSession
+) -> Optional[str]:
+ """Get correct orgId for a stock code from CNINFO search API
+
+ Args:
+ stock_code: Stock code (e.g., "002460")
+ session: aiohttp session
+
+ Returns:
+ Optional[str]: The correct orgId, or None if not found
+ """
+ search_url = "http://www.cninfo.com.cn/new/information/topSearch/query"
+
+ headers = {
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
+ "Accept": "*/*",
+ "Accept-Encoding": "gzip, deflate",
+ "Accept-Language": "zh-CN,zh;q=0.9",
+ "Connection": "keep-alive",
+ "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
+ "Host": "www.cninfo.com.cn",
+ "Origin": "http://www.cninfo.com.cn",
+ "Referer": "http://www.cninfo.com.cn/new/commonUrl/pageOfSearch?url=disclosure/list/search&lastPage=index",
+ "X-Requested-With": "XMLHttpRequest",
+ }
+
+ search_data = {"keyWord": stock_code}
+
+ try:
+ async with session.post(
+ search_url, headers=headers, data=search_data
+ ) as response:
+ if response.status == 200:
+ result = await response.json()
+
+ if result and len(result) > 0:
+ # Find the exact match for the stock code
+ for company_info in result:
+ if company_info.get("code") == stock_code:
+ return company_info.get("orgId")
+
+ # If no exact match, return the first result's orgId
+ return result[0].get("orgId")
+
+ except Exception as e:
+ print(f"Error getting orgId for {stock_code}: {e}")
+
+ return None
+
+
+async def _fetch_cninfo_data(
+ stock_code: str,
+ report_types: List[str],
+ years: List[int],
+ quarters: List[int],
+ limit: int,
+) -> List[dict]:
+ """Fetch real A-share filing data from CNINFO API
+
+ Args:
+ stock_code: Normalized stock code
+ report_types: List of report types
+ years: List of years
+ quarters: List of quarters (1-4), empty list means all quarters
+ limit: Maximum number of records to fetch
+
+ Returns:
+ List[dict]: List of filing data
+ """
+
+ # CNINFO API configuration
+ base_url = "http://www.cninfo.com.cn/new/hisAnnouncement/query"
+
+ # Request headers configuration
+ headers = {
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
+ "Accept": "*/*",
+ "Accept-Encoding": "gzip, deflate",
+ "Accept-Language": "zh-CN,zh;q=0.9",
+ "Connection": "keep-alive",
+ "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
+ "Host": "www.cninfo.com.cn",
+ "Origin": "http://www.cninfo.com.cn",
+ "Referer": "http://www.cninfo.com.cn/new/commonUrl/pageOfSearch?url=disclosure/list/search&lastPage=index",
+ "X-Requested-With": "XMLHttpRequest",
+ }
+
+ # Report type mapping (supports both English and Chinese for backward compatibility)
+ category_mapping = {
+ "annual": "category_ndbg_szsh",
+ "semi-annual": "category_bndbg_szsh",
+ "quarterly": "category_sjdbg_szsh",
+ }
+
+ # Determine exchange
+ column = "szse" if stock_code.startswith(("000", "002", "300")) else "sse"
+
+ filings_data = []
+ current_year = datetime.now().year
+ target_years = (
+ years if years else [current_year, current_year - 1, current_year - 2]
+ )
+
+ async with aiohttp.ClientSession() as session:
+ # Get correct orgId first
+ org_id = await _get_correct_orgid(stock_code, session)
+ if not org_id:
+ print(f"Warning: Could not get orgId for stock {stock_code}")
+ return []
+
+ # Determine plate based on stock code
+ if stock_code.startswith(("000", "002", "300")):
+ plate = "sz"
+ else:
+ plate = "sh"
+
+ for report_type in report_types:
+ if len(filings_data) >= limit:
+ break
+
+ category = category_mapping.get(report_type, "category_ndbg_szsh")
+
+ # Build time range
+ for target_year in target_years:
+ if len(filings_data) >= limit:
+ break
+
+ # Set search time range
+ start_date = f"{target_year}-01-01"
+ end_date = f"{target_year + 1}-01-01"
+ se_date = f"{start_date}~{end_date}"
+
+ form_data = {
+ "pageNum": "1",
+ "pageSize": "30",
+ "column": column,
+ "tabName": "fulltext",
+ "plate": plate,
+ "stock": f"{stock_code},{org_id}",
+ "searchkey": "",
+ "secid": "",
+ "category": f"{category};",
+ "trade": "",
+ "seDate": se_date,
+ "sortName": "",
+ "sortType": "",
+ "isHLtitle": "true",
+ }
+
+ try:
+ async with session.post(
+ base_url, headers=headers, data=form_data
+ ) as response:
+ if response.status == 200:
+ result = await response.json()
+ announcements = result.get("announcements", [])
+
+ if announcements is None:
+ continue
+
+ for announcement in announcements:
+ if len(filings_data) >= limit:
+ break
+
+ announcement_title = announcement.get(
+ "announcementTitle", ""
+ )
+
+ # Apply quarter filtering for quarterly reports
+ if report_type == "quarterly" and quarters:
+ # Extract quarter from announcement title
+ quarter_from_title = _extract_quarter_from_title(
+ announcement_title
+ )
+ if (
+ quarter_from_title
+ and quarter_from_title not in quarters
+ ):
+ continue # Skip this announcement if quarter doesn't match
+
+ # Extract filing information
+ filing_info = {
+ "stock_code": announcement.get(
+ "secCode", stock_code
+ ),
+ "company": announcement.get("secName", ""),
+ "market": "SZSE" if column == "szse" else "SSE",
+ "doc_type": report_type,
+ "period_of_report": f"{target_year}",
+ "filing_date": announcement.get("adjunctUrl", "")[
+ 10:20
+ ]
+ if announcement.get("adjunctUrl")
+ else f"{target_year}-04-30",
+ "announcement_id": announcement.get(
+ "announcementId", ""
+ ),
+ "announcement_title": announcement_title,
+ "org_id": announcement.get("orgId", ""),
+ "content": "", # Will fetch detailed content in subsequent steps
+ }
+
+ # Fetch PDF URL
+ pdf_url = await _fetch_announcement_content(
+ session, filing_info
+ )
+ filing_info["pdf_url"] = pdf_url
+
+ filings_data.append(filing_info)
+
+ except Exception as e:
+ print(
+ f"Error fetching {stock_code} {report_type} {target_year} data: {e}"
+ )
+ continue
+
+ return filings_data
+
+
+async def _fetch_announcement_content(
+ session: aiohttp.ClientSession, filing_info: dict
+) -> str:
+ """Fetch PDF URL from CNINFO API
+
+ Args:
+ session: aiohttp session
+ filing_info: Filing information dictionary
+
+ Returns:
+ PDF URL string, or empty string if not available
+ """
+ try:
+ # CNINFO announcement detail API
+ detail_url = "http://www.cninfo.com.cn/new/announcement/bulletin_detail"
+
+ headers = {
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36"
+ }
+
+ params = {
+ "announceId": filing_info.get("announcement_id", ""),
+ "flag": "true",
+ "announceTime": filing_info.get("filing_date", ""),
+ }
+
+ async with session.post(detail_url, headers=headers, params=params) as response:
+ if response.status == 200:
+ result = await response.json()
+
+ # Extract PDF link with fallback options
+ pdf_url = result.get("fileUrl", "")
+ if not pdf_url:
+ # Fallback: construct URL from adjunctUrl if available
+ announcement_data = result.get("announcement", {})
+ adjunct_url = announcement_data.get("adjunctUrl", "")
+ if adjunct_url:
+ pdf_url = f"http://static.cninfo.com.cn/{adjunct_url}"
+
+ return pdf_url
+
+ except Exception as e:
+ print(f"Error fetching announcement details: {e}")
+
+ # Return empty string if failed
+ return ""
+
+
+async def fetch_ashare_filings(
+ stock_code: str,
+ report_types: List[str] | str = "annual",
+ year: Optional[int | List[int]] = None,
+ quarter: Optional[int | List[int]] = None,
+ limit: int = 10,
+) -> List[AShareFilingResult]:
+ """Fetch A-share filing data from CNINFO and import to knowledge base
+
+ Args:
+ stock_code: Stock code (e.g.: 000001, 600036, etc.)
+ report_types: Report types (ENGLISH ONLY). Supported values: "annual", "semi-annual", "quarterly".
+ Default is "annual". Chinese parameters are NOT supported.
+ year: Year filter, can be a single year or list of years. If not provided, fetch latest reports
+ quarter: Quarter filter (1-4), can be a single quarter or list of quarters.
+ Only applicable when report_types includes "quarterly". Requires year to be provided.
+ limit: Maximum number of records to fetch, default 10
+
+ Returns:
+ List[AShareFilingResult]: List of A-share filing results
+
+ Raises:
+ ValueError: If report_types contains Chinese parameters or invalid values,
+ or if quarter is provided without year
+
+ Examples:
+ # Fetch latest annual report of Ping An Bank
+ await fetch_ashare_filings("000001", "annual", limit=1)
+
+ # Fetch 2024 annual and semi-annual reports of Kweichow Moutai
+ await fetch_ashare_filings("600519", ["annual", "semi-annual"], year=2024)
+
+ # Fetch 2024 Q3 quarterly report of Kweichow Moutai
+ await fetch_ashare_filings("600519", "quarterly", year=2024, quarter=3)
+
+ # Fetch 2024 Q1 and Q3 quarterly reports of Kweichow Moutai
+ await fetch_ashare_filings("600519", "quarterly", year=2024, quarter=[1, 3])
+
+ # This will raise ValueError (Chinese parameters not supported):
+ # await fetch_ashare_filings("600519", "年报") # DON'T DO THIS
+ """
+
+ # Normalize stock code
+ normalized_code = _normalize_stock_code(stock_code)
+
+ # Normalize report types
+ report_types_list = _ensure_list(report_types)
+ if not report_types_list:
+ report_types_list = ["annual"]
+
+ # Validate quarter parameter
+ if quarter is not None:
+ if year is None:
+ raise ValueError("Quarter parameter requires year to be provided")
+ if "quarterly" not in report_types_list:
+ raise ValueError(
+ "Quarter parameter is only applicable when report_types includes 'quarterly'"
+ )
+
+ # Normalize years
+ years_list = []
+ if year is not None:
+ if isinstance(year, int):
+ years_list = [year]
+ else:
+ years_list = list(year)
+
+ # Normalize quarters
+ quarters_list = []
+ if quarter is not None:
+ if isinstance(quarter, int):
+ quarters_list = [quarter]
+ else:
+ quarters_list = list(quarter)
+
+ # Validate quarter values
+ for q in quarters_list:
+ if not isinstance(q, int) or q < 1 or q > 4:
+ raise ValueError(f"Quarter must be between 1 and 4, got: {q}")
+
+ # Fetch real data from CNINFO
+ filings_data = await _fetch_cninfo_data(
+ normalized_code, report_types_list, years_list, quarters_list, limit
+ )
+
+ # Write to files and import to knowledge base
+ knowledge_dir = Path(get_knowledge_path())
+ return await _write_and_ingest_ashare(filings_data, knowledge_dir)