From db11ef79f01a26c942e6143c95bfd519f5edaa0c Mon Sep 17 00:00:00 2001 From: hazeone <709547807@qq.com> Date: Tue, 18 Nov 2025 09:33:49 +0800 Subject: [PATCH 1/4] add rootdata source --- .../valuecell/agents/research_agent/core.py | 12 + .../agents/research_agent/sources.py | 448 ++++++ python/valuecell/agents/sources/__init__.py | 30 + python/valuecell/agents/sources/rootdata.py | 1323 +++++++++++++++++ 4 files changed, 1813 insertions(+) create mode 100644 python/valuecell/agents/sources/__init__.py create mode 100644 python/valuecell/agents/sources/rootdata.py diff --git a/python/valuecell/agents/research_agent/core.py b/python/valuecell/agents/research_agent/core.py index 8c33df9fd..b50c891a4 100644 --- a/python/valuecell/agents/research_agent/core.py +++ b/python/valuecell/agents/research_agent/core.py @@ -16,6 +16,12 @@ fetch_ashare_filings, fetch_event_sec_filings, fetch_periodic_sec_filings, + get_crypto_person_detail, + get_crypto_project_detail, + get_crypto_vc_detail, + search_crypto_people, + search_crypto_projects, + search_crypto_vcs, web_search, ) from valuecell.agents.utils.context import build_ctx_from_dep @@ -32,6 +38,12 @@ def __init__(self, **kwargs): fetch_event_sec_filings, fetch_ashare_filings, web_search, + search_crypto_projects, + get_crypto_project_detail, + search_crypto_vcs, + get_crypto_vc_detail, + search_crypto_people, + get_crypto_person_detail, ] self.knowledge_research_agent = Agent( model=model_utils_mod.get_model_for_agent("research_agent"), diff --git a/python/valuecell/agents/research_agent/sources.py b/python/valuecell/agents/research_agent/sources.py index 923a92568..75bb7872a 100644 --- a/python/valuecell/agents/research_agent/sources.py +++ b/python/valuecell/agents/research_agent/sources.py @@ -9,7 +9,14 @@ from agno.agent import Agent from edgar import Company from edgar.entity.filings import EntityFilings +from loguru import logger +from valuecell.agents.sources import ( + get_person_detail, + get_project_detail, + search_people, + search_projects, +) from valuecell.utils.path import get_knowledge_path from .knowledge import insert_md_file_to_knowledge, insert_pdf_file_to_knowledge @@ -690,3 +697,444 @@ async def fetch_ashare_filings( # Write to files and import to knowledge base knowledge_dir = Path(get_knowledge_path()) return await _write_and_ingest_ashare(filings_data, knowledge_dir) + + +# ============================================================================ +# Crypto Project Data Tools (RootData) +# ============================================================================ + + +async def search_crypto_projects( + query: str, + limit: int = 10, +) -> str: + """Search cryptocurrency projects on RootData by keyword. + + Use this tool when users ask about cryptocurrency projects, tokens, or blockchain ecosystems. + Examples: "What is Ethereum?", "Tell me about DeFi projects", "Find projects related to AI" + + Args: + query: Search keyword (project name, token symbol, or category like "DeFi", "AI", "GameFi") + limit: Maximum number of results to return (default: 5, max recommended: 10) + + Returns: + Formatted string with project information including name, description, tags, and key metrics. + """ + + logger.info(f"Searching crypto projects for: {query}") + + try: + projects = await search_projects(query, limit=limit, use_playwright=True) + + if not projects: + return f"No cryptocurrency projects found for query: {query}" + + # Format results as context + results = [f"Found {len(projects)} cryptocurrency project(s) for '{query}':\n"] + + for i, proj in enumerate(projects, 1): + result_lines = [ + f"\n{i}. **{proj.name}** (ID: {proj.id})", + ] + + if proj.token_symbol: + result_lines.append(f" - Token: ${proj.token_symbol}") + + if proj.token_price is not None: + price_str = f" - Price: ${proj.token_price:.4f}" + if proj.price_change_24h is not None: + change_sign = "+" if proj.price_change_24h >= 0 else "" + price_str += f" ({change_sign}{proj.price_change_24h:.2f}% 24h)" + result_lines.append(price_str) + + if proj.brief_intro: + # Truncate long descriptions + brief = proj.brief_intro[:200] + if len(proj.brief_intro) > 200: + brief += "..." + result_lines.append(f" - Brief: {brief}") + + if proj.tags: + result_lines.append(f" - Tags: {', '.join(proj.tags[:5])}") + + if proj.twitter: + result_lines.append(f" - Twitter: @{proj.twitter}") + + if proj.website: + result_lines.append(f" - Website: {proj.website}") + + result_lines.append( + f" - Use get_crypto_project_detail({proj.id}) for full details" + ) + + results.append("\n".join(result_lines)) + + logger.debug(f"Search crypto projects results: {results}") + return "\n".join(results) + + except Exception as e: + logger.error(f"Error searching crypto projects: {e}") + return f"Error searching cryptocurrency projects: {str(e)}" + + +async def get_crypto_project_detail( + project_id: int, +) -> str: + """Get detailed information about a specific cryptocurrency project by its ID. + + Use this tool after search_crypto_projects to get comprehensive project details. + The project_id can be found in search results. + + Args: + project_id: RootData project ID (obtained from search_crypto_projects results) + + Returns: + Detailed project information including full description, team, metrics, and links. + """ + + logger.info(f"Fetching crypto project detail for ID: {project_id}") + + try: + project = await get_project_detail(project_id) + + if not project: + return f"No project found with ID: {project_id}" + + # Format detailed information + details = [ + f"# {project.name}", + "", + ] + + if project.token_symbol: + details.append(f"**Token Symbol:** ${project.token_symbol}") + + if project.token_price is not None: + price_line = f"**Current Price:** ${project.token_price:.4f}" + if project.price_change_24h is not None: + change_sign = "+" if project.price_change_24h >= 0 else "" + price_line += f" ({change_sign}{project.price_change_24h:.2f}% 24h)" + details.append(price_line) + + if project.founded_year: + details.append(f"**Founded:** {project.founded_year}") + + details.append("") + + if project.brief_intro: + details.append("## Brief Introduction") + details.append(project.brief_intro) + details.append("") + + if project.description: + details.append("## Description") + details.append(project.description) + details.append("") + + if project.tags: + details.append("## Tags") + details.append(", ".join(project.tags)) + details.append("") + + if project.members: + details.append("## Team Members") + for member in project.members: + details.append(f"- {member}") + details.append("") + + details.append("## Links") + if project.website: + details.append(f"- Website: {project.website}") + if project.twitter: + details.append(f"- Twitter: https://twitter.com/{project.twitter}") + + return "\n".join(details) + + except Exception as e: + logger.error(f"Error fetching crypto project detail: {e}") + return f"Error fetching project details: {str(e)}" + + +async def search_crypto_vcs( + query: str, + limit: int = 5, +) -> str: + """Search venture capital firms and crypto investors on RootData. + + Use this tool when users ask about VCs, investment firms, or crypto investors. + Examples: "Who invested in Ethereum?", "Find VCs focused on DeFi", "Tell me about a16z crypto" + + Args: + query: Search keyword (VC name, investment focus, or category) + limit: Maximum number of results to return (default: 5, max recommended: 10) + + Returns: + Formatted string with VC information including name, description, portfolio, and links. + """ + from loguru import logger + + from valuecell.agents.sources import search_vcs + + logger.info(f"Searching crypto VCs for: {query}") + + try: + vcs = await search_vcs(query, limit=limit, use_playwright=True) + + if not vcs: + return f"No venture capital firms found for query: {query}" + + # Format results as context + results = [f"Found {len(vcs)} venture capital firm(s) for '{query}':\n"] + + for i, vc in enumerate(vcs, 1): + result_lines = [ + f"\n{i}. **{vc.name}** (ID: {vc.id})", + ] + + if vc.portfolio_count is not None: + result_lines.append(f" - Portfolio: {vc.portfolio_count} companies") + + if vc.total_investments is not None: + result_lines.append(f" - Total Investments: {vc.total_investments}") + + if vc.brief_intro: + brief = vc.brief_intro[:200] + if len(vc.brief_intro) > 200: + brief += "..." + result_lines.append(f" - Brief: {brief}") + + if vc.tags: + result_lines.append(f" - Focus: {', '.join(vc.tags[:5])}") + + if vc.twitter: + result_lines.append(f" - Twitter: @{vc.twitter}") + + if vc.website: + result_lines.append(f" - Website: {vc.website}") + + result_lines.append( + f" - Use get_crypto_vc_detail({vc.id}) for full details" + ) + + results.append("\n".join(result_lines)) + + return "\n".join(results) + + except Exception as e: + logger.error(f"Error searching crypto VCs: {e}") + return f"Error searching venture capital firms: {str(e)}" + + +async def get_crypto_vc_detail( + vc_id: int, +) -> str: + """Get detailed information about a specific VC firm by its ID. + + Use this tool after search_crypto_vcs to get comprehensive VC details. + The vc_id can be found in search results. + + Args: + vc_id: RootData VC ID (obtained from search_crypto_vcs results) + + Returns: + Detailed VC information including full description, portfolio, and links. + """ + from loguru import logger + + from valuecell.agents.sources import get_vc_detail + + logger.info(f"Fetching crypto VC detail for ID: {vc_id}") + + try: + vc = await get_vc_detail(vc_id) + + if not vc: + return f"No VC found with ID: {vc_id}" + + # Format detailed information + details = [ + f"# {vc.name}", + "", + ] + + if vc.portfolio_count is not None: + details.append(f"**Portfolio Size:** {vc.portfolio_count} companies") + + if vc.total_investments is not None: + details.append(f"**Total Investments:** {vc.total_investments}") + + if vc.founded_year: + details.append(f"**Founded:** {vc.founded_year}") + + details.append("") + + if vc.brief_intro: + details.append("## Brief Introduction") + details.append(vc.brief_intro) + details.append("") + + if vc.description: + details.append("## Description") + details.append(vc.description) + details.append("") + + if vc.tags: + details.append("## Investment Focus") + details.append(", ".join(vc.tags)) + details.append("") + + details.append("## Links") + if vc.website: + details.append(f"- Website: {vc.website}") + if vc.twitter: + details.append(f"- Twitter: https://twitter.com/{vc.twitter}") + + return "\n".join(details) + + except Exception as e: + logger.error(f"Error fetching crypto VC detail: {e}") + return f"Error fetching VC details: {str(e)}" + + +async def search_crypto_people( + query: str, + limit: int = 5, +) -> str: + """Search crypto industry people on RootData (founders, executives, investors). + + Use this tool when users ask about people in crypto, founders, or industry leaders. + Examples: "Who is Vitalik Buterin?", "Find founders of Ethereum", "Tell me about crypto investors" + + Args: + query: Search keyword (person name, role, or organization) + limit: Maximum number of results to return (default: 5, max recommended: 10) + + Returns: + Formatted string with person information including name, title, projects, and links. + """ + + logger.info(f"Searching crypto people for: {query}") + + try: + people = await search_people(query, limit=limit, use_playwright=True) + + if not people: + return f"No people found for query: {query}" + + # Format results as context + results = [f"Found {len(people)} person/people for '{query}':\n"] + + for i, person in enumerate(people, 1): + result_lines = [ + f"\n{i}. **{person.name}** (ID: {person.id})", + ] + + if person.title: + result_lines.append(f" - Title: {person.title}") + + if person.current_organization: + result_lines.append(f" - Organization: {person.current_organization}") + + if person.brief_intro: + brief = person.brief_intro[:200] + if len(person.brief_intro) > 200: + brief += "..." + result_lines.append(f" - Brief: {brief}") + + if person.projects: + result_lines.append( + f" - Projects: {', '.join(person.projects[:3])}" + + ("..." if len(person.projects) > 3 else "") + ) + + if person.tags: + result_lines.append(f" - Roles: {', '.join(person.tags[:5])}") + + if person.twitter: + result_lines.append(f" - Twitter: @{person.twitter}") + + if person.linkedin: + result_lines.append(f" - LinkedIn: {person.linkedin}") + + result_lines.append( + f" - Use get_crypto_person_detail({person.id}) for full details" + ) + + results.append("\n".join(result_lines)) + + return "\n".join(results) + + except Exception as e: + logger.error(f"Error searching crypto people: {e}") + return f"Error searching people: {str(e)}" + + +async def get_crypto_person_detail( + person_id: int, +) -> str: + """Get detailed information about a specific person by their ID. + + Use this tool after search_crypto_people to get comprehensive person details. + The person_id can be found in search results. + + Args: + person_id: RootData person ID (obtained from search_crypto_people results) + + Returns: + Detailed person information including full bio, projects, and links. + """ + + logger.info(f"Fetching crypto person detail for ID: {person_id}") + + try: + person = await get_person_detail(person_id) + + if not person: + return f"No person found with ID: {person_id}" + + # Format detailed information + details = [ + f"# {person.name}", + "", + ] + + if person.title: + details.append(f"**Title:** {person.title}") + + if person.current_organization: + details.append(f"**Organization:** {person.current_organization}") + + details.append("") + + if person.brief_intro: + details.append("## Brief Introduction") + details.append(person.brief_intro) + details.append("") + + if person.description: + details.append("## Biography") + details.append(person.description) + details.append("") + + if person.projects: + details.append("## Associated Projects") + for project in person.projects: + details.append(f"- {project}") + details.append("") + + if person.tags: + details.append("## Roles & Expertise") + details.append(", ".join(person.tags)) + details.append("") + + details.append("## Links") + if person.twitter: + details.append(f"- Twitter: https://twitter.com/{person.twitter}") + if person.linkedin: + details.append(f"- LinkedIn: {person.linkedin}") + + return "\n".join(details) + + except Exception as e: + logger.error(f"Error fetching crypto person detail: {e}") + return f"Error fetching person details: {str(e)}" diff --git a/python/valuecell/agents/sources/__init__.py b/python/valuecell/agents/sources/__init__.py new file mode 100644 index 000000000..4d25311c1 --- /dev/null +++ b/python/valuecell/agents/sources/__init__.py @@ -0,0 +1,30 @@ +""" +ValueCell agents data sources + +Available sources: +- rootdata: Cryptocurrency projects, VCs and people data from RootData.com +""" + +from valuecell.agents.sources.rootdata import ( + RootDataPerson, + RootDataProject, + RootDataVC, + get_person_detail, + get_project_detail, + get_vc_detail, + search_people, + search_projects, + search_vcs, +) + +__all__ = [ + "RootDataProject", + "RootDataVC", + "RootDataPerson", + "get_project_detail", + "get_vc_detail", + "get_person_detail", + "search_projects", + "search_vcs", + "search_people", +] diff --git a/python/valuecell/agents/sources/rootdata.py b/python/valuecell/agents/sources/rootdata.py new file mode 100644 index 000000000..30af03922 --- /dev/null +++ b/python/valuecell/agents/sources/rootdata.py @@ -0,0 +1,1323 @@ +""" +RootData API Client - Cryptocurrency projects, VCs and people data fetching tool + +Extracts comprehensive data from RootData using Playwright browser automation to access +server-side rendered (SSR) data from window.__NUXT__ object. + +Features: +--------- +- **Rich Project Details**: Get 40+ fields including price, market cap, supply, + historical prices, contracts, social links, and community sentiment +- **Smart Search**: Search projects by name or keyword with browser interaction +- **VC & People Data**: Search and retrieve venture capital firms and people information +- **Fallback Support**: Automatic fallback to HTML parsing if Playwright unavailable + +Quick Start: +------------ +```python +import asyncio +from valuecell.agents.sources.rootdata import ( + search_projects, + get_project_detail +) + +async def main(): + # Search for projects + projects = await search_projects("Aster", limit=10) + for project in projects: + print(f"{project.name} ({project.token_symbol})") + + # Get detailed information + if projects: + detail = await get_project_detail(projects[0].id) + print(f"Price: ${detail.token_price}") + print(f"Market Cap: ${detail.market_cap}") + print(f"24h Change: {detail.price_change_24h}%") + print(f"Ecosystems: {', '.join(detail.ecosystems)}") + print(f"Contracts: {detail.contracts}") + print(f"Community Hold: {detail.hold_percentage}%") + +asyncio.run(main()) +``` + +Requirements: +------------- +- playwright: `pip install playwright && playwright install chromium` +- httpx: `pip install httpx` +- beautifulsoup4: `pip install beautifulsoup4` +""" + +import re +from typing import Any, Dict, List, Optional + +import httpx +from bs4 import BeautifulSoup +from loguru import logger +from pydantic import BaseModel, Field + +# ============================================================================ +# Data Models +# ============================================================================ + + +class RootDataProject(BaseModel): + """Cryptocurrency project information""" + + # Basic Info + id: int + name: str + brief_intro: str = Field(default="", description="Brief introduction") + description: str = Field(default="", description="Detailed description") + image_url: Optional[str] = Field(None, description="Project logo") + founded_year: Optional[int] = Field(None, description="Founded year") + + # Status + status: Optional[str] = Field(None, description="Project status (Active/Inactive)") + level: Optional[int] = Field(None, description="Project level/tier") + rank: Optional[int] = Field(None, description="Project rank") + + # Tags and Categories + tags: List[str] = Field(default_factory=list, description="Project tags") + ecosystems: List[str] = Field( + default_factory=list, description="Blockchain ecosystems" + ) + + # Token Information + token_symbol: Optional[str] = Field(None, description="Token symbol") + token_price: Optional[float] = Field(None, description="Current token price") + market_cap: Optional[float] = Field(None, description="Market capitalization") + fdv: Optional[float] = Field(None, description="Fully diluted valuation") + volume_24h: Optional[float] = Field(None, description="24h trading volume") + volume_change_24h: Optional[float] = Field(None, description="24h volume change") + + # Supply Information + circulating_supply: Optional[float] = Field(None, description="Circulating supply") + total_supply: Optional[float] = Field(None, description="Total supply") + max_supply: Optional[float] = Field(None, description="Maximum supply") + + # Price Changes + price_change_1h: Optional[float] = Field(None, description="1h price change %") + price_change_24h: Optional[float] = Field(None, description="24h price change %") + price_change_7d: Optional[float] = Field(None, description="7d price change %") + price_change_30d: Optional[float] = Field(None, description="30d price change %") + price_change_60d: Optional[float] = Field(None, description="60d price change %") + + # Historical Prices + ath: Optional[float] = Field(None, description="All-time high price") + ath_date: Optional[str] = Field(None, description="All-time high date") + atl: Optional[float] = Field(None, description="All-time low price") + atl_date: Optional[str] = Field(None, description="All-time low date") + + # Contract Information + contracts: List[Dict[str, str]] = Field( + default_factory=list, description="Smart contract addresses and chains" + ) + + # Social Links + website: Optional[str] = Field(None, description="Official website") + twitter: Optional[str] = Field(None, description="Twitter/X account") + discord: Optional[str] = Field(None, description="Discord server") + telegram: Optional[str] = Field(None, description="Telegram group") + github: Optional[str] = Field(None, description="GitHub repository") + + # External Links + coingecko_url: Optional[str] = Field(None, description="CoinGecko URL") + coinmarketcap_url: Optional[str] = Field(None, description="CoinMarketCap URL") + defillama_url: Optional[str] = Field(None, description="DefiLlama URL") + + # Community Sentiment + hold_percentage: Optional[float] = Field( + None, description="Percentage of users holding" + ) + fud_percentage: Optional[float] = Field( + None, description="Percentage of users FUDing" + ) + + # Special Flags + is_rootdata_list: Optional[bool] = Field( + None, description="In RootData featured list" + ) + is_rootdata_list_2025: Optional[bool] = Field( + None, description="In RootData 2025 list" + ) + + # Legacy fields for backward compatibility + members: List[str] = Field( + default_factory=list, description="Team members (deprecated)" + ) + + class Config: + populate_by_name = True + + +class RootDataVC(BaseModel): + """Venture Capital / Investment firm information""" + + id: int + name: str + brief_intro: str = Field(default="", description="Brief introduction") + description: str = Field(default="", description="Detailed description") + tags: List[str] = Field(default_factory=list, description="Investment focus tags") + website: Optional[str] = Field(None, description="Website URL") + twitter: Optional[str] = Field(None, description="Twitter account") + image_url: Optional[str] = Field(None, description="Logo URL") + founded_year: Optional[int] = Field(None, description="Founded year") + portfolio_count: Optional[int] = Field( + None, description="Number of portfolio companies" + ) + total_investments: Optional[int] = Field( + None, description="Total number of investments" + ) + + class Config: + populate_by_name = True + + +class RootDataPerson(BaseModel): + """Person information (founders, executives, investors)""" + + id: int + name: str + title: Optional[str] = Field(None, description="Job title or role") + brief_intro: str = Field(default="", description="Brief introduction") + description: str = Field(default="", description="Detailed description") + tags: List[str] = Field(default_factory=list, description="Role/expertise tags") + twitter: Optional[str] = Field(None, description="Twitter account") + linkedin: Optional[str] = Field(None, description="LinkedIn profile") + image_url: Optional[str] = Field(None, description="Profile picture URL") + projects: List[str] = Field(default_factory=list, description="Associated projects") + current_organization: Optional[str] = Field( + None, description="Current organization" + ) + + class Config: + populate_by_name = True + + +# ============================================================================ +# Simple HTML Parser Functions +# ============================================================================ + + +async def fetch_page_html(url: str) -> str: + """Fetch HTML content from a URL""" + headers = { + "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36", + "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", + } + + async with httpx.AsyncClient(timeout=30.0) as client: + try: + response = await client.get(url, headers=headers) + if response.status_code == 200: + return response.text + else: + logger.warning(f"Failed to fetch {url}: status {response.status_code}") + return "" + except Exception as e: + logger.warning(f"Error fetching {url}: {e}") + return "" + + +def extract_project_id_from_url(url: str) -> Optional[int]: + """Extract project ID from RootData URL + + Example: https://www.rootdata.com/Projects/detail/Ethereum?k=MTI%3D + The 'k' parameter is base64-encoded ID + """ + import base64 + + match = re.search(r"[?&]k=([^&]+)", url) + if match: + try: + encoded_id = match.group(1).replace("%3D", "=") + decoded = base64.b64decode(encoded_id).decode("utf-8") + return int(decoded) + except Exception as e: + logger.warning(f"Failed to decode project ID: {e}") + return None + + +async def get_project_from_page( + project_id_or_url: str | int, +) -> Optional[RootDataProject]: + """ + Get project information by scraping the project detail page + + Args: + project_id_or_url: Project ID (int) or full URL (str) + + Returns: + RootDataProject or None if failed + + Examples: + # By ID + project = await get_project_from_page(12) # Ethereum + + # By URL + project = await get_project_from_page("https://www.rootdata.com/Projects/detail/Ethereum?k=MTI%3D") + """ + # Construct URL + if isinstance(project_id_or_url, int): + import base64 + + encoded_id = base64.b64encode(str(project_id_or_url).encode()).decode() + url = f"https://www.rootdata.com/Projects/detail/Project?k={encoded_id}" + else: + url = project_id_or_url + project_id_or_url = extract_project_id_from_url(url) or 0 + + logger.info(f"Fetching project page: {url}") + + html = await fetch_page_html(url) + if not html: + return None + + soup = BeautifulSoup(html, "html.parser") + + try: + # Extract project data from page + # Note: This is a basic implementation. Actual selectors may need adjustment + # based on RootData's HTML structure + + name = "" + h1 = soup.find("h1") + if h1: + name = h1.text.strip() + + token_symbol = "" + h3 = soup.find("h3") + if h3: + token_symbol = h3.text.strip() + + brief_intro = "" + description = "" + paras = soup.find_all("p") + for p in paras: + text = p.text.strip() + if len(text) > 20: # Likely description text + if not brief_intro: + brief_intro = text + elif len(text) > len(description): + description = text + + # Extract tags + tags = [] + tag_elements = soup.find_all(class_=re.compile(r"tag|label", re.I)) + for tag_el in tag_elements: + tag_text = tag_el.text.strip() + if tag_text and len(tag_text) < 30: # Reasonable tag length + tags.append(tag_text) + + # Extract links + website = None + twitter = None + links = soup.find_all("a", href=True) + for link in links: + href = link["href"] + if "twitter.com" in href or "x.com" in href: + twitter = href.split("/")[-1] + elif href.startswith("http") and "rootdata.com" not in href: + if not website: + website = href + + project = RootDataProject( + id=project_id_or_url, + name=name, + brief_intro=brief_intro, + description=description, + tags=list(set(tags))[:10], # Deduplicate and limit + token_symbol=token_symbol, + twitter=twitter, + website=website, + ) + + logger.info(f"Successfully extracted project: {name}") + return project + + except Exception as e: + logger.warning(f"Failed to parse project page: {e}") + return None + + +# ============================================================================ +# Recommendation: Use a proper web scraping service or browser automation +# ============================================================================ + + +async def get_project_with_playwright(project_id: int) -> Optional[RootDataProject]: + """ + Get detailed project information using Playwright to access server-side rendered data + + This method extracts data from window.__NUXT__ which contains complete project information + including price, market cap, supply, social links, and more. + + Args: + project_id: Project ID + + Returns: + RootDataProject with comprehensive data or None if failed + + Example: + project = await get_project_with_playwright(1179) # Ripae project + """ + try: + from playwright.async_api import async_playwright + except ImportError: + logger.warning("Playwright not installed. Install with: pip install playwright") + return None + + import base64 + + encoded_id = base64.b64encode(str(project_id).encode()).decode() + url = f"https://www.rootdata.com/Projects/detail/Project?k={encoded_id}" + + logger.info(f"Fetching project {project_id} with Playwright: {url}") + + async with async_playwright() as p: + browser = await p.chromium.launch(headless=True) + page = await browser.new_page() + + try: + await page.goto(url, wait_until="networkidle", timeout=30000) + await page.wait_for_timeout(1000) + + # Extract data from window.__NUXT__ + project_data = await page.evaluate("""() => { + if (!window.__NUXT__ || !window.__NUXT__.data) { + return null; + } + + const nuxtArray = window.__NUXT__.data; + + // Find the detail object + for (let i = 0; i < nuxtArray.length; i++) { + const item = nuxtArray[i]; + if (item && item.detail && item.detail.id) { + return item.detail; + } + } + + return null; + }""") + + await browser.close() + + if not project_data: + logger.warning(f"No project data found for ID: {project_id}") + return None + + # Parse the data into RootDataProject + project = _parse_project_from_nuxt_data(project_data) + logger.info(f"Successfully extracted project: {project.name}") + return project + + except Exception as e: + logger.warning(f"Playwright scraping failed: {e}") + await browser.close() + return None + + +def _parse_project_from_nuxt_data(data: Dict[str, Any]) -> RootDataProject: + """ + Parse project data from window.__NUXT__ format to RootDataProject + + Args: + data: Raw data from window.__NUXT__.data[x].detail + + Returns: + RootDataProject instance + """ + + # Helper to extract multilingual text + def get_text(field): + if isinstance(field, dict): + return field.get("en_value") or field.get("cn_value") or "" + return str(field) if field else "" + + # Helper to parse float safely + def parse_float(value): + if value is None or value == "": + return None + try: + return float(value) + except (ValueError, TypeError): + return None + + # Extract name + name = get_text(data.get("name", "")) + + # Extract tags + tags = [] + tag_list = data.get("tagList", []) + if isinstance(tag_list, list): + for tag in tag_list: + if isinstance(tag, dict) and "name" in tag: + tag_name = get_text(tag["name"]) + if tag_name: + tags.append(tag_name) + + # Extract ecosystems + ecosystems = [] + sj_list = data.get("sjList", []) + if isinstance(sj_list, list): + for eco in sj_list: + if isinstance(eco, dict) and "name" in eco: + ecosystems.append(str(eco["name"])) + + # Extract contracts + contracts = [] + contract_list = data.get("contracts", []) + if isinstance(contract_list, list): + for contract in contract_list: + if isinstance(contract, dict): + contracts.append( + { + "address": contract.get("contractAddress", ""), + "chain": contract.get("contractPlatform", ""), + "explorer_url": contract.get("contractExplorerUrl", ""), + } + ) + + # Determine status + status = None + operate_status = data.get("operateStatus") + if operate_status == 1: + status = "Active" + elif operate_status == 2: + status = "Inactive" + + # Calculate sentiment percentages + hold_num = parse_float(data.get("holdNum")) + fud_num = parse_float(data.get("fudNum")) + hold_percentage = None + fud_percentage = None + if hold_num is not None and fud_num is not None: + total = hold_num + fud_num + if total > 0: + hold_percentage = (hold_num / total) * 100 + fud_percentage = (fud_num / total) * 100 + + return RootDataProject( + id=data.get("id", 0), + name=name, + brief_intro=get_text(data.get("briefIntd", "")), + description=get_text(data.get("intd", "")), + image_url=data.get("logoImg"), + founded_year=int(data.get("establishDate")) + if data.get("establishDate") + else None, + # Status + status=status, + level=data.get("level"), + rank=data.get("rank"), + # Tags and categories + tags=tags, + ecosystems=ecosystems, + # Token info + token_symbol=data.get("lssuingCode") or data.get("symbol"), + token_price=parse_float(data.get("price")), + market_cap=parse_float(data.get("marketCap")), + fdv=parse_float(data.get("fullyDilutedMarketCap")), + volume_24h=parse_float(data.get("volume24")), + volume_change_24h=parse_float(data.get("volumeChange24")), + # Supply + circulating_supply=parse_float(data.get("circulatingSupply")), + total_supply=parse_float(data.get("totalSupply")), + max_supply=parse_float(data.get("maxSupply")), + # Price changes + price_change_1h=parse_float(data.get("percentChange1h")), + price_change_24h=parse_float(data.get("percentChange24")), + price_change_7d=parse_float(data.get("percentChange7d")), + price_change_30d=parse_float(data.get("percentChange30d")), + price_change_60d=parse_float(data.get("percentChange60d")), + # Historical prices + ath=parse_float(data.get("ath")), + ath_date=data.get("athDate"), + atl=parse_float(data.get("atl")), + atl_date=data.get("atlDate"), + # Contracts + contracts=contracts, + # Social links + website=data.get("website"), + twitter=data.get("twitterUrl"), + discord=data.get("discordUrl"), + telegram=data.get("telegramUrl"), + github=data.get("githubUrl"), + # External links + coingecko_url=data.get("coingeckoUrl"), + coinmarketcap_url=data.get("coinmarketcapUrl"), + defillama_url=data.get("defillamaUrl"), + # Community sentiment + hold_percentage=hold_percentage, + fud_percentage=fud_percentage, + # Special flags + is_rootdata_list=bool(data.get("isRootdataList")), + is_rootdata_list_2025=bool(data.get("isRootdataList2025")), + ) + + +# ============================================================================ +# Main Functions +# ============================================================================ + + +async def search_projects_with_browser_interaction( + query: str, limit: int = 10 +) -> List[RootDataProject]: + """ + Search projects by interacting with the website's search functionality using Playwright + + This method actually uses the website's search box and extracts results from the rendered page, + ensuring we get the same results as a user would see. + + Args: + query: Search keyword + limit: Maximum results + + Returns: + List of projects + """ + try: + from playwright.async_api import async_playwright + except ImportError: + logger.error( + "Playwright not installed. Install with: " + "pip install playwright && playwright install chromium" + ) + return [] + + logger.info(f"Searching projects via browser interaction for: {query}") + + async with async_playwright() as p: + browser = await p.chromium.launch(headless=True) + page = await browser.new_page() + + try: + # Navigate to homepage + await page.goto( + "https://www.rootdata.com", wait_until="networkidle", timeout=30000 + ) + + # Wait for page to load + await page.wait_for_timeout(500) + + # Click on the search area to reveal the search input + try: + # Try to click the search trigger element + await page.click( + 'text="Search project, VC, person, X account, token, archive."', + timeout=1000, + ) + await page.wait_for_timeout(500) + except Exception as e: + logger.warning( + f"Could not click search trigger: {e}, trying alternative method" + ) + # Try alternative selector + try: + await page.click('[class*="search"]', timeout=1000) + await page.wait_for_timeout(500) + except Exception: + pass + + # Now find and use the search input that appeared + search_input = await page.query_selector('input[placeholder*="Search"]') + if not search_input: + logger.error("Could not find search input after clicking search area") + await browser.close() + return [] + + # Type the query + await search_input.fill(query) + + # Wait for search results to load + # We need to wait longer than before to ensure API results are loaded + try: + # Wait for project links to appear in the dialog + await page.wait_for_selector( + 'dialog a[href*="/Projects/detail/"], [role="dialog"] a[href*="/Projects/detail/"]', + timeout=5000, + ) + # Extra wait to ensure all results are rendered + await page.wait_for_timeout(500) + except Exception as e: + logger.warning(f"Timeout waiting for search results: {e}") + await page.wait_for_timeout(500) # Give it more time anyway + + # Extract search results from the search dropdown + projects_data = await page.evaluate("""() => { + // Find the search results dialog + const dialog = document.querySelector('dialog, [role="dialog"]'); + if (!dialog) { + return []; + } + + // Find the Projects section that contains project links + // The search results are organized by category (All, Projects, VC, People, etc.) + let projectsContainer = null; + + // Look for a div/section that contains both "Projects" text and project links + const allDivs = dialog.querySelectorAll('div, section'); + for (const div of allDivs) { + const text = div.textContent; + if (text.includes('Projects') && div.querySelector('a[href*="/Projects/detail/"]')) { + projectsContainer = div; + break; + } + } + + // If we couldn't find a specific projects container, use the whole dialog + if (!projectsContainer) { + projectsContainer = dialog; + } + + // Find all project links in the Projects section + const projectLinks = projectsContainer.querySelectorAll('a[href*="/Projects/detail/"]'); + const projects = []; + + for (const link of projectLinks) { + // Extract project data from link + const href = link.getAttribute('href'); + const idMatch = href.match(/Projects\\/detail\\/([^?]+)\\?k=([^&]+)/); + const nameSlug = idMatch ? idMatch[1] : ''; + const idBase64 = idMatch ? idMatch[2] : ''; + + // Try to extract ID from base64 (URL-encoded) + let id = 0; + try { + // Decode URL encoding first, then base64 + const decodedBase64 = decodeURIComponent(idBase64); + id = parseInt(atob(decodedBase64)); + } catch (e) { + // If decode fails, continue without ID + } + + // The link itself contains all the text, parse it + const linkText = link.textContent.trim(); + + // Extract name from h4 heading within the link + const nameEl = link.querySelector('h4'); + const name = nameEl ? nameEl.textContent.trim() : nameSlug.replace(/%20/g, ' '); + + // Extract token symbol (usually right after the name) + const symbolMatch = linkText.match(/([A-Z]{2,10})(?=\\s+\\$|\\s+[A-Z#])/); + const symbol = symbolMatch ? symbolMatch[1] : null; + + // Extract price + const priceMatch = linkText.match(/\\$([0-9.]+)/); + const price = priceMatch ? parseFloat(priceMatch[1]) : null; + + // Extract tags (look for words after price) + const tags = []; + const tagMatches = linkText.matchAll(/([A-Z][a-zA-Z]+)(?=\\s|$)/g); + for (const match of tagMatches) { + const tag = match[1]; + if (tag !== symbol && tags.length < 5) { + tags.push(tag); + } + } + + // Extract description (paragraph within the link) + const descEl = link.querySelector('p'); + const description = descEl ? descEl.textContent.trim() : ''; + + projects.push({ + id: id, + name: name, + nameSlug: nameSlug, + token_symbol: symbol, + price: price, + tags: tags, + description: description, + href: href + }); + } + + return projects; + }""") + + await browser.close() + + if not projects_data: + logger.warning(f"No projects found for query: {query}") + return [] + + # Parse projects + projects = [] + for proj_data in projects_data[:limit]: + try: + # Extract ID from href if not found + proj_id = proj_data.get("id", 0) + if proj_id == 0 and proj_data.get("href"): + # Try to extract from URL + import base64 + + href = proj_data["href"] + k_match = re.search(r"\\?k=([^&]+)", href) + if k_match: + try: + proj_id = int( + base64.b64decode(k_match.group(1)).decode() + ) + except Exception as e: + logger.warning(f"Failed to decode project ID: {e}") + + # Note: Search results have limited data, full details require get_project_detail() + project = RootDataProject( + id=proj_id, + name=proj_data.get("name", ""), + brief_intro=proj_data.get("description", ""), + description=proj_data.get("description", ""), + tags=proj_data.get("tags", []), + token_symbol=proj_data.get("token_symbol"), + token_price=proj_data.get("price"), + ) + + projects.append(project) + + except Exception as e: + logger.warning(f"Failed to parse project: {e}") + continue + + logger.info(f"Found {len(projects)} projects for query: {query}") + return projects + + except Exception as e: + logger.error(f"Browser interaction error: {e}") + await browser.close() + return [] + + +async def search_projects( + query: str, limit: int = 10, use_playwright: bool = True +) -> List[RootDataProject]: + """ + Search crypto projects using browser interaction to get accurate results + + This method simulates a user searching on the RootData website, ensuring + we get the same results that a real user would see. + + Args: + query: Search keyword + limit: Maximum number of results + use_playwright: Kept for backward compatibility (always uses browser interaction) + + Returns: + List of projects + + Examples: + # Search for "Aster" + projects = await search_projects("Aster", limit=10) + + # Search for "DeFi" + projects = await search_projects("DeFi", limit=5) + """ + + # Use browser interaction search (only reliable method) + try: + projects = await search_projects_with_browser_interaction(query, limit) + if projects: + return projects + logger.warning(f"No projects found for query: {query}") + return [] + except Exception as e: + logger.error(f"Browser interaction search failed: {e}") + return [] + + +async def get_project_detail( + project_id: int, use_playwright: bool = True +) -> Optional[RootDataProject]: + """ + Get comprehensive project details + + This function first tries to use Playwright to extract complete data from + window.__NUXT__. If that fails or Playwright is not available, it falls back + to HTML parsing (with less detailed information). + + Args: + project_id: Project ID (e.g., 12 for Ethereum, 1179 for Ripae) + use_playwright: If True, use Playwright for detailed data extraction + + Returns: + RootDataProject with comprehensive information or None if failed + + Examples: + # Get full project details (recommended) + project = await get_project_detail(1179) + + # Fallback to HTML parsing only + project = await get_project_detail(1179, use_playwright=False) + """ + if use_playwright: + try: + project = await get_project_with_playwright(project_id) + if project: + return project + logger.warning( + f"Playwright extraction failed for project {project_id}, " + "falling back to HTML parsing" + ) + except Exception as e: + logger.warning( + f"Playwright error for project {project_id}: {e}, " + "falling back to HTML parsing" + ) + + # Fallback to HTML parsing (less detailed) + return await get_project_from_page(project_id) + + +# Backward compatibility aliases +get_project_detail_simple = get_project_detail +search_projects_simple = search_projects + + +# ============================================================================ +# VC Search Functions +# ============================================================================ + + +async def search_vcs_with_playwright(query: str, limit: int = 10) -> List[RootDataVC]: + """ + Search VCs using Playwright (browser automation) + + Args: + query: Search keyword + limit: Maximum number of results + + Returns: + List of VCs + """ + try: + from playwright.async_api import async_playwright + except ImportError: + logger.error( + "Playwright not installed. Install with: " + "pip install playwright && playwright install chromium" + ) + return [] + + url = f"https://www.rootdata.com/Investors?k={query}" + logger.info(f"Searching VCs with Playwright: {url}") + + async with async_playwright() as p: + browser = await p.chromium.launch(headless=True) + page = await browser.new_page() + + try: + await page.goto(url, wait_until="networkidle", timeout=30000) + await page.wait_for_timeout(500) + + # Extract data from __NUXT__ + vcs_data = await page.evaluate("""() => { + if (!window.__NUXT__ || !window.__NUXT__.data) { + return []; + } + + const dataArray = window.__NUXT__.data; + for (let i = 0; i < dataArray.length; i++) { + const item = dataArray[i]; + if (item && typeof item === 'object') { + const keys = ['list', 'investors', 'items', 'data', 'records']; + for (const key of keys) { + if (item[key] && Array.isArray(item[key])) { + return item[key]; + } + } + } + } + return []; + }""") + + await browser.close() + + if not vcs_data: + logger.warning(f"No VCs found for query: {query}") + return [] + + # Parse VCs + vcs = [] + for vc_data in vcs_data[:limit]: + try: + name = vc_data.get("name", {}) + if isinstance(name, dict): + name = name.get("en_value") or name.get("cn_value") or "" + + tags = [] + if "enTagNames" in vc_data: + tags_str = vc_data["enTagNames"] + if tags_str: + tags = [t.strip() for t in str(tags_str).split(",")] + + vc = RootDataVC( + id=vc_data.get("id", 0), + name=str(name), + brief_intro=vc_data.get("enBriefIntd") or "", + description=vc_data.get("enIntd") or "", + tags=tags, + twitter=vc_data.get("twitter"), + website=vc_data.get("website"), + image_url=vc_data.get("imgUrl"), + portfolio_count=vc_data.get("portfolioCount"), + total_investments=vc_data.get("totalInvestments"), + ) + + vcs.append(vc) + + except Exception as e: + logger.warning(f"Failed to parse VC: {e}") + continue + + logger.info(f"Found {len(vcs)} VCs for query: {query}") + return vcs + + except Exception as e: + logger.error(f"Playwright error: {e}") + await browser.close() + return [] + + +async def search_vcs( + query: str, limit: int = 10, use_playwright: bool = True +) -> List[RootDataVC]: + """ + Search venture capital firms and investors + + Args: + query: Search keyword + limit: Maximum number of results + use_playwright: If True, use browser automation (more reliable) + + Returns: + List of VCs + + Examples: + # Search for "a16z" + vcs = await search_vcs("a16z", limit=5) + + # Search for VCs focused on DeFi + vcs = await search_vcs("DeFi", limit=10) + """ + + if use_playwright: + try: + return await search_vcs_with_playwright(query, limit) + except Exception as e: + logger.warning(f"Playwright VC search failed: {e}") + + # Fallback: return empty list (HTML parsing for VCs would be similar to projects) + logger.warning( + "VC search requires Playwright. Install with: pip install playwright" + ) + return [] + + +async def get_vc_detail(vc_id: int) -> Optional[RootDataVC]: + """ + Get VC details by ID + + Args: + vc_id: VC ID + + Returns: + RootDataVC or None + """ + import base64 + + encoded_id = base64.b64encode(str(vc_id).encode()).decode() + url = f"https://www.rootdata.com/Investors/detail/Investor?k={encoded_id}" + + logger.info(f"Fetching VC page: {url}") + + html = await fetch_page_html(url) + if not html: + return None + + soup = BeautifulSoup(html, "html.parser") + + try: + name = "" + h1 = soup.find("h1") + if h1: + name = h1.text.strip() + + brief_intro = "" + description = "" + paras = soup.find_all("p") + for p in paras: + text = p.text.strip() + if len(text) > 20: + if not brief_intro: + brief_intro = text + elif len(text) > len(description): + description = text + + tags = [] + tag_elements = soup.find_all(class_=re.compile(r"tag|label", re.I)) + for tag_el in tag_elements: + tag_text = tag_el.text.strip() + if tag_text and len(tag_text) < 30: + tags.append(tag_text) + + website = None + twitter = None + links = soup.find_all("a", href=True) + for link in links: + href = link["href"] + if "twitter.com" in href or "x.com" in href: + twitter = href.split("/")[-1] + elif href.startswith("http") and "rootdata.com" not in href: + if not website: + website = href + + vc = RootDataVC( + id=vc_id, + name=name, + brief_intro=brief_intro, + description=description, + tags=list(set(tags))[:10], + twitter=twitter, + website=website, + ) + + logger.info(f"Successfully extracted VC: {name}") + return vc + + except Exception as e: + logger.warning(f"Failed to parse VC page: {e}") + return None + + +# ============================================================================ +# People Search Functions +# ============================================================================ + + +async def search_people_with_playwright( + query: str, limit: int = 10 +) -> List[RootDataPerson]: + """ + Search people using Playwright (browser automation) + + Args: + query: Search keyword + limit: Maximum number of results + + Returns: + List of people + """ + try: + from playwright.async_api import async_playwright + except ImportError: + logger.error( + "Playwright not installed. Install with: " + "pip install playwright && playwright install chromium" + ) + return [] + + url = f"https://www.rootdata.com/People?k={query}" + logger.info(f"Searching people with Playwright: {url}") + + async with async_playwright() as p: + browser = await p.chromium.launch(headless=True) + page = await browser.new_page() + + try: + await page.goto(url, wait_until="networkidle", timeout=30000) + await page.wait_for_timeout(500) + + # Extract data from __NUXT__ + people_data = await page.evaluate("""() => { + if (!window.__NUXT__ || !window.__NUXT__.data) { + return []; + } + + const dataArray = window.__NUXT__.data; + for (let i = 0; i < dataArray.length; i++) { + const item = dataArray[i]; + if (item && typeof item === 'object') { + const keys = ['list', 'people', 'persons', 'items', 'data', 'records']; + for (const key of keys) { + if (item[key] && Array.isArray(item[key])) { + return item[key]; + } + } + } + } + return []; + }""") + + await browser.close() + + if not people_data: + logger.warning(f"No people found for query: {query}") + return [] + + # Parse people + people = [] + for person_data in people_data[:limit]: + try: + name = person_data.get("name", {}) + if isinstance(name, dict): + name = name.get("en_value") or name.get("cn_value") or "" + + tags = [] + if "enTagNames" in person_data: + tags_str = person_data["enTagNames"] + if tags_str: + tags = [t.strip() for t in str(tags_str).split(",")] + + projects = [] + if "projects" in person_data and isinstance( + person_data["projects"], list + ): + projects = [ + p.get("name", "") + for p in person_data["projects"] + if isinstance(p, dict) + ] + + person = RootDataPerson( + id=person_data.get("id", 0), + name=str(name), + title=person_data.get("title"), + brief_intro=person_data.get("enBriefIntd") or "", + description=person_data.get("enIntd") or "", + tags=tags, + twitter=person_data.get("twitter"), + linkedin=person_data.get("linkedin"), + image_url=person_data.get("imgUrl"), + projects=projects, + current_organization=person_data.get("organization"), + ) + + people.append(person) + + except Exception as e: + logger.warning(f"Failed to parse person: {e}") + continue + + logger.info(f"Found {len(people)} people for query: {query}") + return people + + except Exception as e: + logger.error(f"Playwright error: {e}") + await browser.close() + return [] + + +async def search_people( + query: str, limit: int = 10, use_playwright: bool = True +) -> List[RootDataPerson]: + """ + Search people (founders, executives, investors) + + Args: + query: Search keyword (person name or role) + limit: Maximum number of results + use_playwright: If True, use browser automation (more reliable) + + Returns: + List of people + + Examples: + # Search for "Vitalik Buterin" + people = await search_people("Vitalik Buterin", limit=5) + + # Search for founders + people = await search_people("founder", limit=10) + """ + + if use_playwright: + try: + return await search_people_with_playwright(query, limit) + except Exception as e: + logger.warning(f"Playwright people search failed: {e}") + + logger.warning( + "People search requires Playwright. Install with: pip install playwright" + ) + return [] + + +async def get_person_detail(person_id: int) -> Optional[RootDataPerson]: + """ + Get person details by ID + + Args: + person_id: Person ID + + Returns: + RootDataPerson or None + """ + import base64 + + encoded_id = base64.b64encode(str(person_id).encode()).decode() + url = f"https://www.rootdata.com/People/detail/Person?k={encoded_id}" + + logger.info(f"Fetching person page: {url}") + + html = await fetch_page_html(url) + if not html: + return None + + soup = BeautifulSoup(html, "html.parser") + + try: + name = "" + h1 = soup.find("h1") + if h1: + name = h1.text.strip() + + title = "" + h2 = soup.find("h2") + if h2: + title = h2.text.strip() + + brief_intro = "" + description = "" + paras = soup.find_all("p") + for p in paras: + text = p.text.strip() + if len(text) > 20: + if not brief_intro: + brief_intro = text + elif len(text) > len(description): + description = text + + tags = [] + tag_elements = soup.find_all(class_=re.compile(r"tag|label", re.I)) + for tag_el in tag_elements: + tag_text = tag_el.text.strip() + if tag_text and len(tag_text) < 30: + tags.append(tag_text) + + twitter = None + linkedin = None + links = soup.find_all("a", href=True) + for link in links: + href = link["href"] + if "twitter.com" in href or "x.com" in href: + twitter = href.split("/")[-1] + elif "linkedin.com" in href: + linkedin = href + + person = RootDataPerson( + id=person_id, + name=name, + title=title, + brief_intro=brief_intro, + description=description, + tags=list(set(tags))[:10], + twitter=twitter, + linkedin=linkedin, + ) + + logger.info(f"Successfully extracted person: {name}") + return person + + except Exception as e: + logger.warning(f"Failed to parse person page: {e}") + return None From c7e07083e251604eff0d85a074faba759612cad0 Mon Sep 17 00:00:00 2001 From: hazeone <709547807@qq.com> Date: Tue, 18 Nov 2025 10:15:01 +0800 Subject: [PATCH 2/4] change getting data by two phases to single phase --- .../agents/research_agent/sources.py | 347 +----------------- 1 file changed, 20 insertions(+), 327 deletions(-) diff --git a/python/valuecell/agents/research_agent/sources.py b/python/valuecell/agents/research_agent/sources.py index 75bb7872a..0f4c3f78d 100644 --- a/python/valuecell/agents/research_agent/sources.py +++ b/python/valuecell/agents/research_agent/sources.py @@ -14,8 +14,10 @@ from valuecell.agents.sources import ( get_person_detail, get_project_detail, + get_vc_detail, search_people, search_projects, + search_vcs, ) from valuecell.utils.path import get_knowledge_path @@ -718,7 +720,7 @@ async def search_crypto_projects( limit: Maximum number of results to return (default: 5, max recommended: 10) Returns: - Formatted string with project information including name, description, tags, and key metrics. + JSON string with project information including name, description, tags, and key metrics. """ logger.info(f"Searching crypto projects for: {query}") @@ -733,35 +735,11 @@ async def search_crypto_projects( results = [f"Found {len(projects)} cryptocurrency project(s) for '{query}':\n"] for i, proj in enumerate(projects, 1): - result_lines = [ - f"\n{i}. **{proj.name}** (ID: {proj.id})", - ] - - if proj.token_symbol: - result_lines.append(f" - Token: ${proj.token_symbol}") - - if proj.token_price is not None: - price_str = f" - Price: ${proj.token_price:.4f}" - if proj.price_change_24h is not None: - change_sign = "+" if proj.price_change_24h >= 0 else "" - price_str += f" ({change_sign}{proj.price_change_24h:.2f}% 24h)" - result_lines.append(price_str) - - if proj.brief_intro: - # Truncate long descriptions - brief = proj.brief_intro[:200] - if len(proj.brief_intro) > 200: - brief += "..." - result_lines.append(f" - Brief: {brief}") - - if proj.tags: - result_lines.append(f" - Tags: {', '.join(proj.tags[:5])}") - - if proj.twitter: - result_lines.append(f" - Twitter: @{proj.twitter}") - - if proj.website: - result_lines.append(f" - Website: {proj.website}") + proj = await get_project_detail(proj.id) + if not proj: + logger.warning(f"No project found with ID: {proj.id}") + continue + return proj.model_dump_json() result_lines.append( f" - Use get_crypto_project_detail({proj.id}) for full details" @@ -777,84 +755,6 @@ async def search_crypto_projects( return f"Error searching cryptocurrency projects: {str(e)}" -async def get_crypto_project_detail( - project_id: int, -) -> str: - """Get detailed information about a specific cryptocurrency project by its ID. - - Use this tool after search_crypto_projects to get comprehensive project details. - The project_id can be found in search results. - - Args: - project_id: RootData project ID (obtained from search_crypto_projects results) - - Returns: - Detailed project information including full description, team, metrics, and links. - """ - - logger.info(f"Fetching crypto project detail for ID: {project_id}") - - try: - project = await get_project_detail(project_id) - - if not project: - return f"No project found with ID: {project_id}" - - # Format detailed information - details = [ - f"# {project.name}", - "", - ] - - if project.token_symbol: - details.append(f"**Token Symbol:** ${project.token_symbol}") - - if project.token_price is not None: - price_line = f"**Current Price:** ${project.token_price:.4f}" - if project.price_change_24h is not None: - change_sign = "+" if project.price_change_24h >= 0 else "" - price_line += f" ({change_sign}{project.price_change_24h:.2f}% 24h)" - details.append(price_line) - - if project.founded_year: - details.append(f"**Founded:** {project.founded_year}") - - details.append("") - - if project.brief_intro: - details.append("## Brief Introduction") - details.append(project.brief_intro) - details.append("") - - if project.description: - details.append("## Description") - details.append(project.description) - details.append("") - - if project.tags: - details.append("## Tags") - details.append(", ".join(project.tags)) - details.append("") - - if project.members: - details.append("## Team Members") - for member in project.members: - details.append(f"- {member}") - details.append("") - - details.append("## Links") - if project.website: - details.append(f"- Website: {project.website}") - if project.twitter: - details.append(f"- Twitter: https://twitter.com/{project.twitter}") - - return "\n".join(details) - - except Exception as e: - logger.error(f"Error fetching crypto project detail: {e}") - return f"Error fetching project details: {str(e)}" - - async def search_crypto_vcs( query: str, limit: int = 5, @@ -871,9 +771,6 @@ async def search_crypto_vcs( Returns: Formatted string with VC information including name, description, portfolio, and links. """ - from loguru import logger - - from valuecell.agents.sources import search_vcs logger.info(f"Searching crypto VCs for: {query}") @@ -883,119 +780,20 @@ async def search_crypto_vcs( if not vcs: return f"No venture capital firms found for query: {query}" - # Format results as context - results = [f"Found {len(vcs)} venture capital firm(s) for '{query}':\n"] + logger.debug(f"Search crypto VCs get {len(vcs)} results.") for i, vc in enumerate(vcs, 1): - result_lines = [ - f"\n{i}. **{vc.name}** (ID: {vc.id})", - ] - - if vc.portfolio_count is not None: - result_lines.append(f" - Portfolio: {vc.portfolio_count} companies") - - if vc.total_investments is not None: - result_lines.append(f" - Total Investments: {vc.total_investments}") - - if vc.brief_intro: - brief = vc.brief_intro[:200] - if len(vc.brief_intro) > 200: - brief += "..." - result_lines.append(f" - Brief: {brief}") - - if vc.tags: - result_lines.append(f" - Focus: {', '.join(vc.tags[:5])}") - - if vc.twitter: - result_lines.append(f" - Twitter: @{vc.twitter}") - - if vc.website: - result_lines.append(f" - Website: {vc.website}") - - result_lines.append( - f" - Use get_crypto_vc_detail({vc.id}) for full details" - ) - - results.append("\n".join(result_lines)) - - return "\n".join(results) + vc = await get_vc_detail(vc.id) + if not vc: + logger.warning(f"No VC found with ID: {vc.id}") + continue + return vc.model_dump_json() except Exception as e: logger.error(f"Error searching crypto VCs: {e}") return f"Error searching venture capital firms: {str(e)}" -async def get_crypto_vc_detail( - vc_id: int, -) -> str: - """Get detailed information about a specific VC firm by its ID. - - Use this tool after search_crypto_vcs to get comprehensive VC details. - The vc_id can be found in search results. - - Args: - vc_id: RootData VC ID (obtained from search_crypto_vcs results) - - Returns: - Detailed VC information including full description, portfolio, and links. - """ - from loguru import logger - - from valuecell.agents.sources import get_vc_detail - - logger.info(f"Fetching crypto VC detail for ID: {vc_id}") - - try: - vc = await get_vc_detail(vc_id) - - if not vc: - return f"No VC found with ID: {vc_id}" - - # Format detailed information - details = [ - f"# {vc.name}", - "", - ] - - if vc.portfolio_count is not None: - details.append(f"**Portfolio Size:** {vc.portfolio_count} companies") - - if vc.total_investments is not None: - details.append(f"**Total Investments:** {vc.total_investments}") - - if vc.founded_year: - details.append(f"**Founded:** {vc.founded_year}") - - details.append("") - - if vc.brief_intro: - details.append("## Brief Introduction") - details.append(vc.brief_intro) - details.append("") - - if vc.description: - details.append("## Description") - details.append(vc.description) - details.append("") - - if vc.tags: - details.append("## Investment Focus") - details.append(", ".join(vc.tags)) - details.append("") - - details.append("## Links") - if vc.website: - details.append(f"- Website: {vc.website}") - if vc.twitter: - details.append(f"- Twitter: https://twitter.com/{vc.twitter}") - - return "\n".join(details) - - except Exception as e: - logger.error(f"Error fetching crypto VC detail: {e}") - return f"Error fetching VC details: {str(e)}" - - async def search_crypto_people( query: str, limit: int = 5, @@ -1021,120 +819,15 @@ async def search_crypto_people( if not people: return f"No people found for query: {query}" - # Format results as context - results = [f"Found {len(people)} person/people for '{query}':\n"] + logger.debug(f"Search crypto people get {len(people)} results.") for i, person in enumerate(people, 1): - result_lines = [ - f"\n{i}. **{person.name}** (ID: {person.id})", - ] - - if person.title: - result_lines.append(f" - Title: {person.title}") - - if person.current_organization: - result_lines.append(f" - Organization: {person.current_organization}") - - if person.brief_intro: - brief = person.brief_intro[:200] - if len(person.brief_intro) > 200: - brief += "..." - result_lines.append(f" - Brief: {brief}") - - if person.projects: - result_lines.append( - f" - Projects: {', '.join(person.projects[:3])}" - + ("..." if len(person.projects) > 3 else "") - ) - - if person.tags: - result_lines.append(f" - Roles: {', '.join(person.tags[:5])}") - - if person.twitter: - result_lines.append(f" - Twitter: @{person.twitter}") - - if person.linkedin: - result_lines.append(f" - LinkedIn: {person.linkedin}") - - result_lines.append( - f" - Use get_crypto_person_detail({person.id}) for full details" - ) - - results.append("\n".join(result_lines)) - - return "\n".join(results) + person = await get_person_detail(person.id) + if not person: + logger.warning(f"No person found with ID: {person.id}") + continue + return person.model_dump_json() except Exception as e: logger.error(f"Error searching crypto people: {e}") return f"Error searching people: {str(e)}" - - -async def get_crypto_person_detail( - person_id: int, -) -> str: - """Get detailed information about a specific person by their ID. - - Use this tool after search_crypto_people to get comprehensive person details. - The person_id can be found in search results. - - Args: - person_id: RootData person ID (obtained from search_crypto_people results) - - Returns: - Detailed person information including full bio, projects, and links. - """ - - logger.info(f"Fetching crypto person detail for ID: {person_id}") - - try: - person = await get_person_detail(person_id) - - if not person: - return f"No person found with ID: {person_id}" - - # Format detailed information - details = [ - f"# {person.name}", - "", - ] - - if person.title: - details.append(f"**Title:** {person.title}") - - if person.current_organization: - details.append(f"**Organization:** {person.current_organization}") - - details.append("") - - if person.brief_intro: - details.append("## Brief Introduction") - details.append(person.brief_intro) - details.append("") - - if person.description: - details.append("## Biography") - details.append(person.description) - details.append("") - - if person.projects: - details.append("## Associated Projects") - for project in person.projects: - details.append(f"- {project}") - details.append("") - - if person.tags: - details.append("## Roles & Expertise") - details.append(", ".join(person.tags)) - details.append("") - - details.append("## Links") - if person.twitter: - details.append(f"- Twitter: https://twitter.com/{person.twitter}") - if person.linkedin: - details.append(f"- LinkedIn: {person.linkedin}") - - return "\n".join(details) - - except Exception as e: - logger.error(f"Error fetching crypto person detail: {e}") - return f"Error fetching person details: {str(e)}" From 08cd3f371408f6ad44a62fb317209addb2fbe26e Mon Sep 17 00:00:00 2001 From: hazeone <709547807@qq.com> Date: Tue, 18 Nov 2025 10:18:55 +0800 Subject: [PATCH 3/4] annotate crypto tools in research agent --- .../valuecell/agents/research_agent/core.py | 19 +++++++------------ .../agents/research_agent/sources.py | 14 ++------------ 2 files changed, 9 insertions(+), 24 deletions(-) diff --git a/python/valuecell/agents/research_agent/core.py b/python/valuecell/agents/research_agent/core.py index b50c891a4..dd8ff0863 100644 --- a/python/valuecell/agents/research_agent/core.py +++ b/python/valuecell/agents/research_agent/core.py @@ -16,12 +16,9 @@ fetch_ashare_filings, fetch_event_sec_filings, fetch_periodic_sec_filings, - get_crypto_person_detail, - get_crypto_project_detail, - get_crypto_vc_detail, - search_crypto_people, - search_crypto_projects, - search_crypto_vcs, + # search_crypto_people, + # search_crypto_projects, + # search_crypto_vcs, web_search, ) from valuecell.agents.utils.context import build_ctx_from_dep @@ -38,12 +35,10 @@ def __init__(self, **kwargs): fetch_event_sec_filings, fetch_ashare_filings, web_search, - search_crypto_projects, - get_crypto_project_detail, - search_crypto_vcs, - get_crypto_vc_detail, - search_crypto_people, - get_crypto_person_detail, + # TODO: The RootData tools will cost lots of time, so we disable them for now. + # search_crypto_projects, + # search_crypto_vcs, + # search_crypto_people, ] self.knowledge_research_agent = Agent( model=model_utils_mod.get_model_for_agent("research_agent"), diff --git a/python/valuecell/agents/research_agent/sources.py b/python/valuecell/agents/research_agent/sources.py index 0f4c3f78d..f0f4ae879 100644 --- a/python/valuecell/agents/research_agent/sources.py +++ b/python/valuecell/agents/research_agent/sources.py @@ -731,24 +731,14 @@ async def search_crypto_projects( if not projects: return f"No cryptocurrency projects found for query: {query}" - # Format results as context - results = [f"Found {len(projects)} cryptocurrency project(s) for '{query}':\n"] + logger.debug(f"Search crypto projects get {len(projects)} results.") for i, proj in enumerate(projects, 1): proj = await get_project_detail(proj.id) if not proj: logger.warning(f"No project found with ID: {proj.id}") continue - return proj.model_dump_json() - - result_lines.append( - f" - Use get_crypto_project_detail({proj.id}) for full details" - ) - - results.append("\n".join(result_lines)) - - logger.debug(f"Search crypto projects results: {results}") - return "\n".join(results) + return proj.model_dump_json(exclude_none=True) except Exception as e: logger.error(f"Error searching crypto projects: {e}") From 53dd6a9769889b955337f031a35cc24b53593a91 Mon Sep 17 00:00:00 2001 From: hazeone <709547807@qq.com> Date: Tue, 18 Nov 2025 10:19:11 +0800 Subject: [PATCH 4/4] lint --- python/valuecell/agents/research_agent/core.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/python/valuecell/agents/research_agent/core.py b/python/valuecell/agents/research_agent/core.py index dd8ff0863..1c83777a1 100644 --- a/python/valuecell/agents/research_agent/core.py +++ b/python/valuecell/agents/research_agent/core.py @@ -12,13 +12,10 @@ KNOWLEDGE_AGENT_EXPECTED_OUTPUT, KNOWLEDGE_AGENT_INSTRUCTION, ) -from valuecell.agents.research_agent.sources import ( +from valuecell.agents.research_agent.sources import ( # search_crypto_people,; search_crypto_projects,; search_crypto_vcs, fetch_ashare_filings, fetch_event_sec_filings, fetch_periodic_sec_filings, - # search_crypto_people, - # search_crypto_projects, - # search_crypto_vcs, web_search, ) from valuecell.agents.utils.context import build_ctx_from_dep