From c75a9cc2613dc445cdf949c1b543adf694e878b2 Mon Sep 17 00:00:00 2001 From: paisley Date: Mon, 27 Oct 2025 16:17:20 +0800 Subject: [PATCH 1/8] feat: research agent support a share --- .../valuecell/agents/research_agent/core.py | 2 + .../agents/research_agent/schemas.py | 23 ++ .../agents/research_agent/sources.py | 363 +++++++++++++++++- 3 files changed, 387 insertions(+), 1 deletion(-) diff --git a/python/valuecell/agents/research_agent/core.py b/python/valuecell/agents/research_agent/core.py index 205917e39..2902d76fc 100644 --- a/python/valuecell/agents/research_agent/core.py +++ b/python/valuecell/agents/research_agent/core.py @@ -12,6 +12,7 @@ KNOWLEDGE_AGENT_INSTRUCTION, ) from valuecell.agents.research_agent.sources import ( + fetch_a_share_filings, fetch_event_sec_filings, fetch_periodic_sec_filings, web_search, @@ -29,6 +30,7 @@ def __init__(self, **kwargs): tools = [ fetch_periodic_sec_filings, fetch_event_sec_filings, + fetch_a_share_filings, web_search, ] self.knowledge_research_agent = Agent( diff --git a/python/valuecell/agents/research_agent/schemas.py b/python/valuecell/agents/research_agent/schemas.py index 3f343588d..3f21e59c1 100644 --- a/python/valuecell/agents/research_agent/schemas.py +++ b/python/valuecell/agents/research_agent/schemas.py @@ -15,3 +15,26 @@ class SECFilingResult: name: str path: Path metadata: SECFilingMetadata + + +@dataclass +class AShareFilingMetadata: + """A-share filing metadata""" + + doc_type: ( + str # Report type: annual report, semi-annual report, quarterly report, etc. + ) + company: str # Company name + stock_code: str # Stock code + market: str # Market: SZSE, SSE + period_of_report: str # Report period + filing_date: str # Filing date + + +@dataclass +class AShareFilingResult: + """A-share filing result""" + + name: str + path: Path + metadata: AShareFilingMetadata diff --git a/python/valuecell/agents/research_agent/sources.py b/python/valuecell/agents/research_agent/sources.py index 51861dbf7..7422f28ba 100644 --- a/python/valuecell/agents/research_agent/sources.py +++ b/python/valuecell/agents/research_agent/sources.py @@ -1,9 +1,11 @@ import os +import re from datetime import date, datetime from pathlib import Path from typing import Iterable, List, Optional, Sequence import aiofiles +import aiohttp from agno.agent import Agent from agno.models.google import Gemini from agno.models.openrouter import OpenRouter @@ -13,7 +15,12 @@ from valuecell.utils.path import get_knowledge_path from .knowledge import insert_md_file_to_knowledge -from .schemas import SECFilingMetadata, SECFilingResult +from .schemas import ( + AShareFilingMetadata, + AShareFilingResult, + SECFilingMetadata, + SECFilingResult, +) def _ensure_list(value: str | Sequence[str] | None) -> List[str]: @@ -225,3 +232,357 @@ async def _web_search_google(query: str) -> str: model = Gemini(id="gemini-2.5-flash", search=True) response = await Agent(model=model).arun(query) return response.content + + +def _normalize_stock_code(stock_code: str) -> str: + """Normalize stock code format""" + # Remove possible prefixes and suffixes, keep only digits + code = re.sub(r"[^\d]", "", stock_code) + # Ensure it's a 6-digit number + if len(code) == 6: + return code + elif len(code) < 6: + return code.zfill(6) + else: + return code[:6] + + +async def _write_and_ingest_a_share( + filings_data: List[dict], + knowledge_dir: Path, +) -> List[AShareFilingResult]: + """Write A-share filing data to files and import to knowledge base""" + knowledge_dir.mkdir(parents=True, exist_ok=True) + results: List[AShareFilingResult] = [] + + for filing_data in filings_data: + # Build file name + stock_code = filing_data["stock_code"] + doc_type = filing_data["doc_type"] + period = filing_data["period_of_report"] + file_name = f"AShare_{stock_code}_{doc_type}_{period}.md" + path = knowledge_dir / file_name + + # Use complete content returned from _fetch_announcement_content + content = filing_data.get( + "content", + f"""# {filing_data["company"]} ({stock_code}) {doc_type} + +## Basic Information +- **Company Name**: {filing_data["company"]} +- **Stock Code**: {stock_code} +- **Exchange**: {filing_data["market"]} +- **Report Type**: {doc_type} +- **Report Period**: {period} +- **Filing Date**: {filing_data["filing_date"]} + +## Filing Content +{filing_data.get("announcement_title", "Filing content is being processed...")} + +--- +*Data Source: CNINFO* +""", + ) + + # Write to file + async with aiofiles.open(path, "w", encoding="utf-8") as file: + await file.write(content) + + # Create metadata + metadata = AShareFilingMetadata( + doc_type=doc_type, + company=filing_data["company"], + stock_code=stock_code, + market=filing_data["market"], + period_of_report=period, + filing_date=filing_data["filing_date"], + ) + + # Create result object + result = AShareFilingResult(file_name, path, metadata) + results.append(result) + + # Import to knowledge base + await insert_md_file_to_knowledge( + name=file_name, path=path, metadata=metadata.__dict__ + ) + + return results + + +async def _fetch_cninfo_data( + stock_code: str, report_types: List[str], years: List[int], limit: int +) -> List[dict]: + """Fetch real A-share filing data from CNINFO API + + Args: + stock_code: Normalized stock code + report_types: List of report types + years: List of years + limit: Maximum number of records to fetch + + Returns: + List[dict]: List of filing data + """ + + # CNINFO API configuration + base_url = "http://www.cninfo.com.cn/new/hisAnnouncement/query" + + # Request headers configuration + headers = { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36", + "Accept": "*/*", + "Accept-Encoding": "gzip, deflate", + "Accept-Language": "zh-CN,zh;q=0.9", + "Connection": "keep-alive", + "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8", + "Host": "www.cninfo.com.cn", + "Origin": "http://www.cninfo.com.cn", + "Referer": "http://www.cninfo.com.cn/new/commonUrl/pageOfSearch?url=disclosure/list/search&lastPage=index", + "X-Requested-With": "XMLHttpRequest", + } + + # Report type mapping (supports both English and Chinese for backward compatibility) + category_mapping = { + "annual": "category_ndbg_szsh", + "semi-annual": "category_bndbg_szsh", + "quarterly": "category_sjdbg_szsh", + } + + # Determine exchange + column = "szse" if stock_code.startswith(("000", "002", "300")) else "sse" + + filings_data = [] + current_year = datetime.now().year + target_years = ( + years if years else [current_year, current_year - 1, current_year - 2] + ) + + async with aiohttp.ClientSession() as session: + for report_type in report_types: + if len(filings_data) >= limit: + break + + category = category_mapping.get(report_type, "category_ndbg_szsh") + + # Build time range + for target_year in target_years: + if len(filings_data) >= limit: + break + + # Set search time range + start_date = f"{target_year}-01-01" + end_date = f"{target_year + 1}-01-01" + se_date = f"{start_date}~{end_date}" + + # Build request parameters + # Build orgId based on stock code + if stock_code.startswith(("000", "002", "300")): + # SZSE stocks + org_id = f"gssz{stock_code.zfill(7)}" # Pad to 7 digits + plate = "sz" + else: + # SSE stocks + org_id = f"gssh{stock_code.zfill(7)}" # Pad to 7 digits + plate = "sh" + + form_data = { + "pageNum": "1", + "pageSize": "30", + "column": column, + "tabName": "fulltext", + "plate": plate, + "stock": f"{stock_code},{org_id}", + "searchkey": "", + "secid": "", + "category": f"{category};", + "trade": "", + "seDate": se_date, + "sortName": "", + "sortType": "", + "isHLtitle": "true", + } + + try: + async with session.post( + base_url, headers=headers, data=form_data + ) as response: + if response.status == 200: + result = await response.json() + announcements = result.get("announcements", []) + + if announcements is None: + continue + + for announcement in announcements: + if len(filings_data) >= limit: + break + + # Extract filing information + filing_info = { + "stock_code": announcement.get( + "secCode", stock_code + ), + "company": announcement.get("secName", ""), + "market": "SZSE" if column == "szse" else "SSE", + "doc_type": report_type, + "period_of_report": f"{target_year}", + "filing_date": announcement.get("adjunctUrl", "")[ + 10:20 + ] + if announcement.get("adjunctUrl") + else f"{target_year}-04-30", + "announcement_id": announcement.get( + "announcementId", "" + ), + "announcement_title": announcement.get( + "announcementTitle", "" + ), + "org_id": announcement.get("orgId", ""), + "content": "", # Will fetch detailed content in subsequent steps + } + + # Fetch detailed content + content = await _fetch_announcement_content( + session, filing_info + ) + filing_info["content"] = content + + filings_data.append(filing_info) + + except Exception as e: + print( + f"Error fetching {stock_code} {report_type} {target_year} data: {e}" + ) + continue + + return filings_data + + +async def _fetch_announcement_content( + session: aiohttp.ClientSession, filing_info: dict +) -> str: + """Fetch detailed content of announcement + + Args: + session: aiohttp session + filing_info: Filing information dictionary + + Returns: + str: Announcement content + """ + try: + # CNINFO announcement detail API + detail_url = "http://www.cninfo.com.cn/new/announcement/bulletin_detail" + + headers = { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36" + } + + params = { + "announceId": filing_info.get("announcement_id", ""), + "flag": "true", + "announceTime": filing_info.get("filing_date", ""), + } + + async with session.post(detail_url, headers=headers, params=params) as response: + if response.status == 200: + result = await response.json() + + # Build filing content + content = f"""# {filing_info["company"]} ({filing_info["stock_code"]}) {filing_info["doc_type"]} + +## Basic Information +- **Company Name**: {filing_info["company"]} +- **Stock Code**: {filing_info["stock_code"]} +- **Exchange**: {filing_info["market"]} +- **Report Type**: {filing_info["doc_type"]} +- **Report Period**: {filing_info["period_of_report"]} +- **Filing Date**: {filing_info["filing_date"]} + +## Filing Content + +{filing_info.get("announcement_title", "")} + +## Financial Data +*Note: Detailed financial data needs to be extracted from PDF files, basic information is shown here* + +PDF File Link: {result.get("fileUrl", "Not available")} + +--- +*Data Source: CNINFO* +""" + return content + + except Exception as e: + print(f"Error fetching announcement details: {e}") + + # Return basic content + return f"""# {filing_info["company"]} ({filing_info["stock_code"]}) {filing_info["doc_type"]} + +## Basic Information +- **Company Name**: {filing_info["company"]} +- **Stock Code**: {filing_info["stock_code"]} +- **Exchange**: {filing_info["market"]} +- **Report Type**: {filing_info["doc_type"]} +- **Report Period**: {filing_info["period_of_report"]} +- **Filing Date**: {filing_info["filing_date"]} + +## Filing Content + +{filing_info.get("announcement_title", "")} + +--- +*Data Source: CNINFO* +""" + + +async def fetch_a_share_filings( + stock_code: str, + report_types: List[str] | str = "annual", + year: Optional[int | List[int]] = None, + limit: int = 10, +) -> List[AShareFilingResult]: + """Fetch A-share filing data from CNINFO and import to knowledge base + + Args: + stock_code: Stock code (e.g.: 000001, 600036, etc.) + report_types: Report types, options: "annual", "semi-annual", "quarterly" or Chinese "年报", "半年报", "季报". Default is "annual" + year: Year filter, can be a single year or list of years. If not provided, fetch latest reports + limit: Maximum number of records to fetch, default 10 + + Returns: + List[AShareFilingResult]: List of A-share filing results + + Examples: + # Fetch latest annual report of Ping An Bank + await fetch_a_share_filings("000001", "annual", limit=1) + + # Fetch 2023 annual and semi-annual reports of Kweichow Moutai + await fetch_a_share_filings("600519", ["annual", "semi-annual"], year=2023) + """ + + # Normalize stock code + normalized_code = _normalize_stock_code(stock_code) + + # Normalize report types + report_types_list = _ensure_list(report_types) + if not report_types_list: + report_types_list = ["annual"] + + # Normalize years + years_list = [] + if year is not None: + if isinstance(year, int): + years_list = [year] + else: + years_list = list(year) + + # Fetch real data from CNINFO + filings_data = await _fetch_cninfo_data( + normalized_code, report_types_list, years_list, limit + ) + + # Write to files and import to knowledge base + knowledge_dir = Path(get_knowledge_path()) + return await _write_and_ingest_a_share(filings_data, knowledge_dir) From 43a10f1c57c3ec2332c65823e6b5deca319c7087 Mon Sep 17 00:00:00 2001 From: paisley Date: Mon, 27 Oct 2025 17:42:16 +0800 Subject: [PATCH 2/8] Switch A-share filing ingestion to use PDF files Refactored the ingestion pipeline to fetch and import PDF files for A-share filings instead of generating and storing markdown files. --- .../agents/research_agent/knowledge.py | 10 ++ .../agents/research_agent/schemas.py | 3 +- .../agents/research_agent/sources.py | 100 ++++-------------- 3 files changed, 33 insertions(+), 80 deletions(-) diff --git a/python/valuecell/agents/research_agent/knowledge.py b/python/valuecell/agents/research_agent/knowledge.py index fd07ab881..8be8447ff 100644 --- a/python/valuecell/agents/research_agent/knowledge.py +++ b/python/valuecell/agents/research_agent/knowledge.py @@ -4,6 +4,7 @@ from agno.knowledge.chunking.markdown import MarkdownChunking from agno.knowledge.knowledge import Knowledge from agno.knowledge.reader.markdown_reader import MarkdownReader +from agno.knowledge.reader.pdf_reader import PDFReader from .vdb import vector_db @@ -12,6 +13,7 @@ max_results=10, ) md_reader = MarkdownReader(chunking_strategy=MarkdownChunking()) +pdf_reader = PDFReader(chunking_strategy=MarkdownChunking()) async def insert_md_file_to_knowledge( @@ -23,3 +25,11 @@ async def insert_md_file_to_knowledge( metadata=metadata, reader=md_reader, ) + + +async def insert_pdf_file_to_knowledge(url: str, metadata: Optional[dict] = None): + await knowledge.add_content_async( + url=url, + metadata=metadata, + reader=pdf_reader, + ) diff --git a/python/valuecell/agents/research_agent/schemas.py b/python/valuecell/agents/research_agent/schemas.py index 3f21e59c1..00cd54771 100644 --- a/python/valuecell/agents/research_agent/schemas.py +++ b/python/valuecell/agents/research_agent/schemas.py @@ -35,6 +35,5 @@ class AShareFilingMetadata: class AShareFilingResult: """A-share filing result""" - name: str - path: Path + url: str metadata: AShareFilingMetadata diff --git a/python/valuecell/agents/research_agent/sources.py b/python/valuecell/agents/research_agent/sources.py index 7422f28ba..6fde61db3 100644 --- a/python/valuecell/agents/research_agent/sources.py +++ b/python/valuecell/agents/research_agent/sources.py @@ -14,7 +14,7 @@ from valuecell.utils.path import get_knowledge_path -from .knowledge import insert_md_file_to_knowledge +from .knowledge import insert_md_file_to_knowledge, insert_pdf_file_to_knowledge from .schemas import ( AShareFilingMetadata, AShareFilingResult, @@ -260,33 +260,9 @@ async def _write_and_ingest_a_share( stock_code = filing_data["stock_code"] doc_type = filing_data["doc_type"] period = filing_data["period_of_report"] - file_name = f"AShare_{stock_code}_{doc_type}_{period}.md" - path = knowledge_dir / file_name - - # Use complete content returned from _fetch_announcement_content - content = filing_data.get( - "content", - f"""# {filing_data["company"]} ({stock_code}) {doc_type} - -## Basic Information -- **Company Name**: {filing_data["company"]} -- **Stock Code**: {stock_code} -- **Exchange**: {filing_data["market"]} -- **Report Type**: {doc_type} -- **Report Period**: {period} -- **Filing Date**: {filing_data["filing_date"]} - -## Filing Content -{filing_data.get("announcement_title", "Filing content is being processed...")} - ---- -*Data Source: CNINFO* -""", - ) - # Write to file - async with aiofiles.open(path, "w", encoding="utf-8") as file: - await file.write(content) + # Get PDF URL from filing data + pdf_url = filing_data.get("pdf_url", "") # Create metadata metadata = AShareFilingMetadata( @@ -299,13 +275,11 @@ async def _write_and_ingest_a_share( ) # Create result object - result = AShareFilingResult(file_name, path, metadata) + result = AShareFilingResult(url=pdf_url, metadata=metadata) results.append(result) - # Import to knowledge base - await insert_md_file_to_knowledge( - name=file_name, path=path, metadata=metadata.__dict__ - ) + # Import to knowledge base - use PDF URL if available, otherwise use markdown file + await insert_pdf_file_to_knowledge(url=pdf_url, metadata=metadata.__dict__) return results @@ -442,11 +416,11 @@ async def _fetch_cninfo_data( "content": "", # Will fetch detailed content in subsequent steps } - # Fetch detailed content - content = await _fetch_announcement_content( + # Fetch PDF URL + pdf_url = await _fetch_announcement_content( session, filing_info ) - filing_info["content"] = content + filing_info["pdf_url"] = pdf_url filings_data.append(filing_info) @@ -462,14 +436,14 @@ async def _fetch_cninfo_data( async def _fetch_announcement_content( session: aiohttp.ClientSession, filing_info: dict ) -> str: - """Fetch detailed content of announcement + """Fetch PDF URL from CNINFO API Args: session: aiohttp session filing_info: Filing information dictionary Returns: - str: Announcement content + PDF URL string, or empty string if not available """ try: # CNINFO announcement detail API @@ -489,52 +463,22 @@ async def _fetch_announcement_content( if response.status == 200: result = await response.json() - # Build filing content - content = f"""# {filing_info["company"]} ({filing_info["stock_code"]}) {filing_info["doc_type"]} - -## Basic Information -- **Company Name**: {filing_info["company"]} -- **Stock Code**: {filing_info["stock_code"]} -- **Exchange**: {filing_info["market"]} -- **Report Type**: {filing_info["doc_type"]} -- **Report Period**: {filing_info["period_of_report"]} -- **Filing Date**: {filing_info["filing_date"]} + # Extract PDF link with fallback options + pdf_url = result.get("fileUrl", "") + if not pdf_url: + # Fallback: construct URL from adjunctUrl if available + announcement_data = result.get("announcement", {}) + adjunct_url = announcement_data.get("adjunctUrl", "") + if adjunct_url: + pdf_url = f"http://static.cninfo.com.cn/{adjunct_url}" -## Filing Content - -{filing_info.get("announcement_title", "")} - -## Financial Data -*Note: Detailed financial data needs to be extracted from PDF files, basic information is shown here* - -PDF File Link: {result.get("fileUrl", "Not available")} - ---- -*Data Source: CNINFO* -""" - return content + return pdf_url except Exception as e: print(f"Error fetching announcement details: {e}") - # Return basic content - return f"""# {filing_info["company"]} ({filing_info["stock_code"]}) {filing_info["doc_type"]} - -## Basic Information -- **Company Name**: {filing_info["company"]} -- **Stock Code**: {filing_info["stock_code"]} -- **Exchange**: {filing_info["market"]} -- **Report Type**: {filing_info["doc_type"]} -- **Report Period**: {filing_info["period_of_report"]} -- **Filing Date**: {filing_info["filing_date"]} - -## Filing Content - -{filing_info.get("announcement_title", "")} - ---- -*Data Source: CNINFO* -""" + # Return empty string if failed + return "" async def fetch_a_share_filings( From 0ca21408468733dc7d9f8f007d913f4ee534325a Mon Sep 17 00:00:00 2001 From: paisley Date: Mon, 27 Oct 2025 18:02:22 +0800 Subject: [PATCH 3/8] Update comment to reflect PDF-only knowledge base import Revised the comment in _write_and_ingest_a_share to clarify that only the PDF URL is used for importing to the knowledge base, removing reference to markdown files. --- python/valuecell/agents/research_agent/sources.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/valuecell/agents/research_agent/sources.py b/python/valuecell/agents/research_agent/sources.py index 6fde61db3..9afd7b143 100644 --- a/python/valuecell/agents/research_agent/sources.py +++ b/python/valuecell/agents/research_agent/sources.py @@ -278,7 +278,7 @@ async def _write_and_ingest_a_share( result = AShareFilingResult(url=pdf_url, metadata=metadata) results.append(result) - # Import to knowledge base - use PDF URL if available, otherwise use markdown file + # Import to knowledge base - use PDF URL if available await insert_pdf_file_to_knowledge(url=pdf_url, metadata=metadata.__dict__) return results From befd481958c3f208272f304c59a22c6b4eb53228 Mon Sep 17 00:00:00 2001 From: paisley Date: Mon, 27 Oct 2025 18:36:23 +0800 Subject: [PATCH 4/8] refine --- python/valuecell/agents/research_agent/core.py | 4 ++-- python/valuecell/agents/research_agent/sources.py | 12 ++++++------ 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/python/valuecell/agents/research_agent/core.py b/python/valuecell/agents/research_agent/core.py index 2902d76fc..f645c8f6c 100644 --- a/python/valuecell/agents/research_agent/core.py +++ b/python/valuecell/agents/research_agent/core.py @@ -12,7 +12,7 @@ KNOWLEDGE_AGENT_INSTRUCTION, ) from valuecell.agents.research_agent.sources import ( - fetch_a_share_filings, + fetch_ashare_filings, fetch_event_sec_filings, fetch_periodic_sec_filings, web_search, @@ -30,7 +30,7 @@ def __init__(self, **kwargs): tools = [ fetch_periodic_sec_filings, fetch_event_sec_filings, - fetch_a_share_filings, + fetch_ashare_filings, web_search, ] self.knowledge_research_agent = Agent( diff --git a/python/valuecell/agents/research_agent/sources.py b/python/valuecell/agents/research_agent/sources.py index 9afd7b143..470c06537 100644 --- a/python/valuecell/agents/research_agent/sources.py +++ b/python/valuecell/agents/research_agent/sources.py @@ -247,7 +247,7 @@ def _normalize_stock_code(stock_code: str) -> str: return code[:6] -async def _write_and_ingest_a_share( +async def _write_and_ingest_ashare( filings_data: List[dict], knowledge_dir: Path, ) -> List[AShareFilingResult]: @@ -481,7 +481,7 @@ async def _fetch_announcement_content( return "" -async def fetch_a_share_filings( +async def fetch_ashare_filings( stock_code: str, report_types: List[str] | str = "annual", year: Optional[int | List[int]] = None, @@ -491,7 +491,7 @@ async def fetch_a_share_filings( Args: stock_code: Stock code (e.g.: 000001, 600036, etc.) - report_types: Report types, options: "annual", "semi-annual", "quarterly" or Chinese "年报", "半年报", "季报". Default is "annual" + report_types: Report types, options: "annual", "semi-annual", "quarterly". Default is "annual" year: Year filter, can be a single year or list of years. If not provided, fetch latest reports limit: Maximum number of records to fetch, default 10 @@ -502,8 +502,8 @@ async def fetch_a_share_filings( # Fetch latest annual report of Ping An Bank await fetch_a_share_filings("000001", "annual", limit=1) - # Fetch 2023 annual and semi-annual reports of Kweichow Moutai - await fetch_a_share_filings("600519", ["annual", "semi-annual"], year=2023) + # Fetch 2025 annual and semi-annual reports of Kweichow Moutai + await fetch_a_share_filings("600519", ["annual", "semi-annual"], year=2025) """ # Normalize stock code @@ -529,4 +529,4 @@ async def fetch_a_share_filings( # Write to files and import to knowledge base knowledge_dir = Path(get_knowledge_path()) - return await _write_and_ingest_a_share(filings_data, knowledge_dir) + return await _write_and_ingest_ashare(filings_data, knowledge_dir) From 11edd41c40674bf77b5f1de3bae4f427909d7f95 Mon Sep 17 00:00:00 2001 From: paisley Date: Mon, 27 Oct 2025 18:51:05 +0800 Subject: [PATCH 5/8] refine --- python/valuecell/agents/research_agent/sources.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/valuecell/agents/research_agent/sources.py b/python/valuecell/agents/research_agent/sources.py index 470c06537..ed47cbbe2 100644 --- a/python/valuecell/agents/research_agent/sources.py +++ b/python/valuecell/agents/research_agent/sources.py @@ -500,10 +500,10 @@ async def fetch_ashare_filings( Examples: # Fetch latest annual report of Ping An Bank - await fetch_a_share_filings("000001", "annual", limit=1) + await fetch_ashare_filings("000001", "annual", limit=1) # Fetch 2025 annual and semi-annual reports of Kweichow Moutai - await fetch_a_share_filings("600519", ["annual", "semi-annual"], year=2025) + await fetch_ashare_filings("600519", ["annual", "semi-annual"], year=2025) """ # Normalize stock code From 2dce95b3b9f20ade96bd3201d4cace2f8964b45e Mon Sep 17 00:00:00 2001 From: paisley Date: Tue, 28 Oct 2025 11:36:17 +0800 Subject: [PATCH 6/8] Convert Chinese example responses to English in prompts.py --- .../agents/research_agent/prompts.py | 43 +++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/python/valuecell/agents/research_agent/prompts.py b/python/valuecell/agents/research_agent/prompts.py index 56b16698a..2d7a24d49 100644 --- a/python/valuecell/agents/research_agent/prompts.py +++ b/python/valuecell/agents/research_agent/prompts.py @@ -6,6 +6,7 @@ - fetch_periodic_sec_filings(ticker_or_cik, forms, year?, quarter?, limit?): Use this for scheduled reports like 10-K/10-Q when you need primary-source facts (revenue, net income, MD&A text). Prefer batching by year to reduce calls. Note: year/quarter filters apply to filing_date (edgar behavior), not period_of_report. If year is omitted, the tool returns the latest filings using `limit` (default 10). If quarter is provided, year must also be provided. - fetch_event_sec_filings(ticker_or_cik, forms, start_date?, end_date?, limit?): Use this for event-driven filings like 8-K and ownership forms (3/4/5). Use date ranges and limits to control scope. +- fetch_ashare_filings(stock_code, report_types, year?, quarter?, limit?): Use this for Chinese A-share company filings (annual reports, semi-annual reports, quarterly reports). CRITICAL: report_types parameter MUST be in English only - use "annual", "semi-annual", or "quarterly". Never use Chinese terms like "年报", "半年报", or "季报". The function will reject Chinese parameters with an error. - Knowledge base search: Use the agent's internal knowledge index to find summaries, historical context, analyst commentary, and previously ingested documents. @@ -18,6 +19,15 @@ - Suggest follow-up queries for additional details 3. Smart defaults: If year/quarter are unspecified for periodic filings, default to the most recent available data rather than calling multiple periods. For event-driven filings, use a recent date window (e.g., last 90 days) with a small limit unless the user specifies otherwise. 4. Knowledge base first: For broad questions or interpretive queries, search the knowledge base before calling filing tools. Only fetch new filings if the knowledge base lacks the specific data needed. + +A-share filings (fetch_ashare_filings) specific guidelines: +- ALWAYS use English report types: "annual", "semi-annual", "quarterly" +- NEVER use Chinese terms: "年报", "半年报", "季报" will cause errors +- Stock codes should be 6-digit format (e.g., "600519" for Kweichow Moutai, "000001" for Ping An Bank) +- When users mention Chinese report types, translate them to English before calling the function: + * 年报/年度报告 → "annual" + * 半年报/半年度报告/中报 → "semi-annual" + * 季报/季度报告/一季报/三季报 → "quarterly" @@ -39,6 +49,13 @@ 3. Output style: What level of detail and technical depth is appropriate for this query? + +Example: A-share filing query (user asks "茅台2024年年报的营收是多少?"): +Tool plan: User mentioned "年报" (annual report) in Chinese, so translate to "annual" before calling fetch_ashare_filings('600519', 'annual', year=2024). + +CRITICAL NOTE: In this example, the user asked about "年报" (annual report) in Chinese, but the tool call correctly used "annual" in English. Always translate Chinese report types to English before calling fetch_ashare_filings. + + 1. Clarify: If the user's request lacks a ticker/CIK, form type, or time range, ask a single clarifying question. 2. Primary check: If the user requests factual items (financial line items, footnote detail, MD&A text), call `fetch_periodic_sec_filings` (10-Q/10-K) with specific filters. For corporate events or disclosures, call `fetch_event_sec_filings` (8-K/3/4/5) with a relevant date range. @@ -182,6 +199,32 @@ --- +Example 4 - A-share filing query (user asks "茅台2024年年报的营收是多少?"): +Tool plan: User mentioned "年报" (annual report) in Chinese, so translate to "annual" before calling fetch_ashare_filings('600519', 'annual', year=2024). + +Response: +"According to Kweichow Moutai's 2024 annual report, the company achieved operating revenue of 150.67 billion yuan [2024 Annual Report](file://...), representing a 15.2% year-over-year increase. Moutai liquor sales contributed 136.89 billion yuan (90.9% of total revenue), while series liquor sales reached 13.78 billion yuan [same source]. + +This revenue level represents a historic high for Moutai, primarily driven by product mix optimization and stable market demand growth [knowledge base: 2024 performance analysis]. + +Would you like me to analyze Moutai's profitability metrics further, or compare its revenue performance with other liquor companies?" + +--- + +Example 5 - A-share quarterly filing query (user asks "茅台2024年第三季度报告的净利润是多少?"): +Tool plan: User mentioned "第三季度报告" (quarterly report) in Chinese, so translate to "quarterly" before calling fetch_ashare_filings('600519', 'quarterly', year=2024, quarter=3). + +Response: +"According to Kweichow Moutai's Q3 2024 quarterly report, the company achieved net profit of 36.85 billion yuan [Q3 2024 Quarterly Report](file://...), representing a 12.8% year-over-year increase. Cumulative net profit for the first three quarters reached 110.52 billion yuan, up 14.1% year-over-year [same source]. + +The Q3 net profit margin reached 24.5%, an improvement of 0.8 percentage points compared to the same period last year, indicating continued improvement in the company's profitability [knowledge base: Q3 2024 analysis]. + +Would you like me to analyze Moutai's quarterly profit trends, or compare performance across different quarters?" + +CRITICAL NOTE: In these examples, users asked about "年报" (annual report) and "第三季度报告" (quarterly report) in Chinese, but the tool calls correctly used "annual" and "quarterly" in English. Always translate Chinese report types to English before calling fetch_ashare_filings. + +--- + Note: In all examples, tool calls are batched when possible, sources are cited naturally, and the response style matches the query type. Each response ends with a contextual, actionable follow-up suggestion to encourage continued exploration. """ From 2684aa6a7ef46d53b5d4634e8a6f1aeac8d94e22 Mon Sep 17 00:00:00 2001 From: paisley Date: Tue, 28 Oct 2025 11:36:56 +0800 Subject: [PATCH 7/8] refine --- .../agents/research_agent/schemas.py | 4 +- .../agents/research_agent/sources.py | 105 ++++++++++++++++-- 2 files changed, 99 insertions(+), 10 deletions(-) diff --git a/python/valuecell/agents/research_agent/schemas.py b/python/valuecell/agents/research_agent/schemas.py index 00cd54771..adc07da9a 100644 --- a/python/valuecell/agents/research_agent/schemas.py +++ b/python/valuecell/agents/research_agent/schemas.py @@ -29,11 +29,13 @@ class AShareFilingMetadata: market: str # Market: SZSE, SSE period_of_report: str # Report period filing_date: str # Filing date + announcement_title: str = "" # Announcement title for quarter filtering @dataclass class AShareFilingResult: """A-share filing result""" - url: str + name: str + path: Path metadata: AShareFilingMetadata diff --git a/python/valuecell/agents/research_agent/sources.py b/python/valuecell/agents/research_agent/sources.py index ed47cbbe2..0c2b2e9da 100644 --- a/python/valuecell/agents/research_agent/sources.py +++ b/python/valuecell/agents/research_agent/sources.py @@ -31,6 +31,33 @@ def _ensure_list(value: str | Sequence[str] | None) -> List[str]: return list(value) +def _extract_quarter_from_title(title: str) -> Optional[int]: + """Extract quarter number from announcement title + + Args: + title: Announcement title string + + Returns: + Quarter number (1-4) if found, None otherwise + """ + if not title: + return None + + # Common patterns for quarterly reports in Chinese titles + quarter_patterns = [ + (r"第一季度|一季度|1季度|Q1", 1), + (r"第二季度|二季度|2季度|Q2|半年度|中期", 2), # Semi-annual is often Q2 + (r"第三季度|三季度|3季度|Q3", 3), + (r"第四季度|四季度|4季度|Q4|年度报告|年报", 4), # Annual is often Q4 + ] + + for pattern, quarter in quarter_patterns: + if re.search(pattern, title, re.IGNORECASE): + return quarter + + return None + + def _parse_date(d: str | date | None) -> Optional[date]: if d is None: return None @@ -272,10 +299,12 @@ async def _write_and_ingest_ashare( market=filing_data["market"], period_of_report=period, filing_date=filing_data["filing_date"], + announcement_title=filing_data.get("announcement_title", ""), ) # Create result object - result = AShareFilingResult(url=pdf_url, metadata=metadata) + file_name = f"{stock_code}_{doc_type}_{period}.pdf" + result = AShareFilingResult(name=file_name, path=pdf_url, metadata=metadata) results.append(result) # Import to knowledge base - use PDF URL if available @@ -285,7 +314,11 @@ async def _write_and_ingest_ashare( async def _fetch_cninfo_data( - stock_code: str, report_types: List[str], years: List[int], limit: int + stock_code: str, + report_types: List[str], + years: List[int], + quarters: List[int], + limit: int, ) -> List[dict]: """Fetch real A-share filing data from CNINFO API @@ -293,6 +326,7 @@ async def _fetch_cninfo_data( stock_code: Normalized stock code report_types: List of report types years: List of years + quarters: List of quarters (1-4), empty list means all quarters limit: Maximum number of records to fetch Returns: @@ -392,6 +426,22 @@ async def _fetch_cninfo_data( if len(filings_data) >= limit: break + announcement_title = announcement.get( + "announcementTitle", "" + ) + + # Apply quarter filtering for quarterly reports + if report_type == "quarterly" and quarters: + # Extract quarter from announcement title + quarter_from_title = _extract_quarter_from_title( + announcement_title + ) + if ( + quarter_from_title + and quarter_from_title not in quarters + ): + continue # Skip this announcement if quarter doesn't match + # Extract filing information filing_info = { "stock_code": announcement.get( @@ -409,9 +459,7 @@ async def _fetch_cninfo_data( "announcement_id": announcement.get( "announcementId", "" ), - "announcement_title": announcement.get( - "announcementTitle", "" - ), + "announcement_title": announcement_title, "org_id": announcement.get("orgId", ""), "content": "", # Will fetch detailed content in subsequent steps } @@ -485,25 +533,42 @@ async def fetch_ashare_filings( stock_code: str, report_types: List[str] | str = "annual", year: Optional[int | List[int]] = None, + quarter: Optional[int | List[int]] = None, limit: int = 10, ) -> List[AShareFilingResult]: """Fetch A-share filing data from CNINFO and import to knowledge base Args: stock_code: Stock code (e.g.: 000001, 600036, etc.) - report_types: Report types, options: "annual", "semi-annual", "quarterly". Default is "annual" + report_types: Report types (ENGLISH ONLY). Supported values: "annual", "semi-annual", "quarterly". + Default is "annual". Chinese parameters are NOT supported. year: Year filter, can be a single year or list of years. If not provided, fetch latest reports + quarter: Quarter filter (1-4), can be a single quarter or list of quarters. + Only applicable when report_types includes "quarterly". Requires year to be provided. limit: Maximum number of records to fetch, default 10 Returns: List[AShareFilingResult]: List of A-share filing results + Raises: + ValueError: If report_types contains Chinese parameters or invalid values, + or if quarter is provided without year + Examples: # Fetch latest annual report of Ping An Bank await fetch_ashare_filings("000001", "annual", limit=1) - # Fetch 2025 annual and semi-annual reports of Kweichow Moutai - await fetch_ashare_filings("600519", ["annual", "semi-annual"], year=2025) + # Fetch 2024 annual and semi-annual reports of Kweichow Moutai + await fetch_ashare_filings("600519", ["annual", "semi-annual"], year=2024) + + # Fetch 2024 Q3 quarterly report of Kweichow Moutai + await fetch_ashare_filings("600519", "quarterly", year=2024, quarter=3) + + # Fetch 2024 Q1 and Q3 quarterly reports of Kweichow Moutai + await fetch_ashare_filings("600519", "quarterly", year=2024, quarter=[1, 3]) + + # This will raise ValueError (Chinese parameters not supported): + # await fetch_ashare_filings("600519", "年报") # DON'T DO THIS """ # Normalize stock code @@ -514,6 +579,15 @@ async def fetch_ashare_filings( if not report_types_list: report_types_list = ["annual"] + # Validate quarter parameter + if quarter is not None: + if year is None: + raise ValueError("Quarter parameter requires year to be provided") + if "quarterly" not in report_types_list: + raise ValueError( + "Quarter parameter is only applicable when report_types includes 'quarterly'" + ) + # Normalize years years_list = [] if year is not None: @@ -522,9 +596,22 @@ async def fetch_ashare_filings( else: years_list = list(year) + # Normalize quarters + quarters_list = [] + if quarter is not None: + if isinstance(quarter, int): + quarters_list = [quarter] + else: + quarters_list = list(quarter) + + # Validate quarter values + for q in quarters_list: + if not isinstance(q, int) or q < 1 or q > 4: + raise ValueError(f"Quarter must be between 1 and 4, got: {q}") + # Fetch real data from CNINFO filings_data = await _fetch_cninfo_data( - normalized_code, report_types_list, years_list, limit + normalized_code, report_types_list, years_list, quarters_list, limit ) # Write to files and import to knowledge base From 9e336713765e8be45eb376e89050d08569c47390 Mon Sep 17 00:00:00 2001 From: paisley Date: Tue, 28 Oct 2025 13:57:18 +0800 Subject: [PATCH 8/8] refine --- .../agents/research_agent/sources.py | 74 ++++++++++++++++--- 1 file changed, 63 insertions(+), 11 deletions(-) diff --git a/python/valuecell/agents/research_agent/sources.py b/python/valuecell/agents/research_agent/sources.py index 0c2b2e9da..5284e56ce 100644 --- a/python/valuecell/agents/research_agent/sources.py +++ b/python/valuecell/agents/research_agent/sources.py @@ -313,6 +313,57 @@ async def _write_and_ingest_ashare( return results +async def _get_correct_orgid( + stock_code: str, session: aiohttp.ClientSession +) -> Optional[str]: + """Get correct orgId for a stock code from CNINFO search API + + Args: + stock_code: Stock code (e.g., "002460") + session: aiohttp session + + Returns: + Optional[str]: The correct orgId, or None if not found + """ + search_url = "http://www.cninfo.com.cn/new/information/topSearch/query" + + headers = { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36", + "Accept": "*/*", + "Accept-Encoding": "gzip, deflate", + "Accept-Language": "zh-CN,zh;q=0.9", + "Connection": "keep-alive", + "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8", + "Host": "www.cninfo.com.cn", + "Origin": "http://www.cninfo.com.cn", + "Referer": "http://www.cninfo.com.cn/new/commonUrl/pageOfSearch?url=disclosure/list/search&lastPage=index", + "X-Requested-With": "XMLHttpRequest", + } + + search_data = {"keyWord": stock_code} + + try: + async with session.post( + search_url, headers=headers, data=search_data + ) as response: + if response.status == 200: + result = await response.json() + + if result and len(result) > 0: + # Find the exact match for the stock code + for company_info in result: + if company_info.get("code") == stock_code: + return company_info.get("orgId") + + # If no exact match, return the first result's orgId + return result[0].get("orgId") + + except Exception as e: + print(f"Error getting orgId for {stock_code}: {e}") + + return None + + async def _fetch_cninfo_data( stock_code: str, report_types: List[str], @@ -367,6 +418,18 @@ async def _fetch_cninfo_data( ) async with aiohttp.ClientSession() as session: + # Get correct orgId first + org_id = await _get_correct_orgid(stock_code, session) + if not org_id: + print(f"Warning: Could not get orgId for stock {stock_code}") + return [] + + # Determine plate based on stock code + if stock_code.startswith(("000", "002", "300")): + plate = "sz" + else: + plate = "sh" + for report_type in report_types: if len(filings_data) >= limit: break @@ -383,17 +446,6 @@ async def _fetch_cninfo_data( end_date = f"{target_year + 1}-01-01" se_date = f"{start_date}~{end_date}" - # Build request parameters - # Build orgId based on stock code - if stock_code.startswith(("000", "002", "300")): - # SZSE stocks - org_id = f"gssz{stock_code.zfill(7)}" # Pad to 7 digits - plate = "sz" - else: - # SSE stocks - org_id = f"gssh{stock_code.zfill(7)}" # Pad to 7 digits - plate = "sh" - form_data = { "pageNum": "1", "pageSize": "30",