Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 26 additions & 19 deletions python/valuecell/adapters/assets/akshare_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

import decimal
import logging
import re
import threading
import time
from datetime import datetime, timedelta
Expand Down Expand Up @@ -589,13 +590,14 @@ def _is_hk_stock_code(self, search_term: str) -> bool:
def _get_hk_stock_by_code(self, stock_code: str) -> Optional[AssetSearchResult]:
"""Get HK stock info by stock code using direct query."""
try:
# Format HK stock code
formatted_code = (
stock_code.zfill(5) if not stock_code.startswith("0") else stock_code
)
# Format HK stock code - pad to 5 digits
formatted_code = stock_code.zfill(5)

# Validate: HK stock codes should be 5 digits
if not (formatted_code.isdigit() and len(formatted_code) == 5):
return None

# Try to get HK stock data - note: AKShare may not have direct individual HK stock query
# so we create a basic result based on code
# Create internal ticker in standard format
internal_ticker = f"HKEX:{formatted_code}"

# Create basic result - in production, you might want to query actual HK stock info
Expand Down Expand Up @@ -2165,23 +2167,28 @@ def _generate_ticker_variations(self, search_term: str) -> List[str]:

return variations

# Ticker validation patterns
TICKER_VALIDATION_PATTERNS = {
"SSE": re.compile(r"^6\d{5}$"), # Shanghai: 6xxxxx
"SZSE": re.compile(r"^[03]\d{5}$"), # Shenzhen: 0xxxxx or 3xxxxx
"BSE": re.compile(r"^8\d{5}$"), # Beijing: 8xxxxx
"HKEX": re.compile(r"^\d{5}$"), # Hong Kong: 5 digits
"NASDAQ": re.compile(
r"^[A-Z0-9]{1,5}$"
), # US markets: 1-5 alphanumeric uppercase
"NYSE": re.compile(
r"^[A-Z0-9]{1,5}$"
), # US markets: 1-5 alphanumeric uppercase
"CRYPTO": re.compile(r"^[A-Z0-9]{1,5}$"), # Crypto: 1-5 alphanumeric uppercase
}

def validate_ticker(self, ticker: str) -> bool:
"""Validate if ticker is supported by AKShare."""
"""Validate if ticker is supported by AKShare and matches standard format."""
try:
exchange, symbol = ticker.split(":", 1)

# Exchange validation rules
validation_rules = {
"SSE": lambda s: s.isdigit() and len(s) == 6,
"SZSE": lambda s: s.isdigit() and len(s) == 6,
"BSE": lambda s: s.isdigit() and len(s) == 6,
"HKEX": lambda s: s.isdigit() and 1 <= len(s) <= 5,
"NASDAQ": lambda s: 1 <= len(s) <= 5,
"NYSE": lambda s: 1 <= len(s) <= 5,
}

validator = validation_rules.get(exchange)
return validator(symbol) if validator else False
pattern = self.TICKER_VALIDATION_PATTERNS.get(exchange)
return bool(pattern and pattern.match(symbol))

except ValueError:
return False
Expand Down
33 changes: 25 additions & 8 deletions python/valuecell/adapters/assets/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,18 +160,34 @@ def to_internal_format(
if source_ticker in index_reverse_mapping:
return index_reverse_mapping[source_ticker]

# Special handling for crypto from yfinance - remove currency suffix
if source == DataSource.YFINANCE and (
"-USD" in source_ticker
or "-CAD" in source_ticker
or "-EUR" in source_ticker
):
# Remove any currency suffix
crypto_symbol = source_ticker.split("-")[0].upper()
return f"CRYPTO:{crypto_symbol}"

# Special handling for Hong Kong stocks from yfinance
if source == DataSource.YFINANCE and source_ticker.endswith(".HK"):
symbol = source_ticker[:-3] # Remove .HK suffix
# Remove leading zeros for internal format (0700 -> 700)
if source == DataSource.YFINANCE and ".HK" in source_ticker:
symbol = source_ticker.replace(".HK", "") # Remove .HK suffix
# Keep as digits only, no leading zero removal for internal format
if symbol.isdigit():
symbol = str(int(symbol)) # This removes leading zeros
# Pad to 5 digits for Hong Kong stocks
symbol = symbol.zfill(5)
return f"HKEX:{symbol}"

# Special handling for crypto from yfinance
if source == DataSource.YFINANCE and "-USD" in source_ticker:
crypto_symbol = source_ticker.replace("-USD", "")
return f"CRYPTO:{crypto_symbol}"
# Special handling for Shanghai stocks from yfinance
if source == DataSource.YFINANCE and ".SS" in source_ticker:
symbol = source_ticker.replace(".SS", "")
return f"SSE:{symbol}"

# Special handling for Shenzhen stocks from yfinance
if source == DataSource.YFINANCE and ".SZ" in source_ticker:
symbol = source_ticker.replace(".SZ", "")
return f"SZSE:{symbol}"

# Check for known suffixes
if source in self.reverse_mappings:
Expand All @@ -184,6 +200,7 @@ def to_internal_format(

# If no suffix found and default exchange provided
if default_exchange:
# For US stocks from yfinance, symbol is already clean
return f"{default_exchange}:{source_ticker}"

# For other assets without clear exchange mapping
Expand Down
103 changes: 76 additions & 27 deletions python/valuecell/adapters/assets/yfinance_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,8 +147,24 @@ def _create_search_result_from_quote(
}
mapped_exchange = exchange_mapping.get(exchange, exchange)

# Create internal ticker with correct exchange
internal_ticker = f"{mapped_exchange}:{symbol}"
# Filter: Only support specific exchanges
supported_exchanges = ["NASDAQ", "NYSE", "SSE", "SZSE", "HKEX", "CRYPTO"]
if mapped_exchange not in supported_exchanges:
logger.debug(
f"Skipping unsupported exchange: {mapped_exchange} for symbol {symbol}"
)
return None

# Convert to internal ticker format and normalize
# Remove any suffixes that yfinance might include
internal_ticker = self.convert_to_internal_ticker(symbol, mapped_exchange)

# Validate the ticker format
if not self._is_valid_internal_ticker(internal_ticker):
logger.debug(
f"Invalid ticker format after conversion: {internal_ticker}"
)
return None

# Get asset type from quote type
quote_type = quote.get("quoteType", "").upper()
Expand All @@ -160,10 +176,8 @@ def _create_search_result_from_quote(
country = "CN"
elif mapped_exchange == "HKEX":
country = "HK"
elif mapped_exchange == "TSE":
country = "JP"
elif mapped_exchange in ["LSE", "EURONEXT", "XETRA"]:
country = "GB" if mapped_exchange == "LSE" else "DE"
elif mapped_exchange == "CRYPTO":
country = "US"

# Get names in different languages
long_name = quote.get("longname", quote.get("shortname", symbol))
Expand Down Expand Up @@ -645,28 +659,63 @@ def _perform_health_check(self) -> Any:
except Exception as e:
return {"status": "error", "message": str(e)}

def validate_ticker(self, ticker: str) -> bool:
"""Validate if ticker is supported by Yahoo Finance."""
def _is_valid_internal_ticker(self, ticker: str) -> bool:
"""Validate if internal ticker format is correct and supported.

Args:
ticker: Internal ticker format (e.g., "NASDAQ:AAPL", "HKEX:00700", "CRYPTO:BTC")

Returns:
True if ticker format is valid
"""
try:
if ":" not in ticker:
return False

exchange, symbol = ticker.split(":", 1)

# Yahoo Finance supports most major exchanges
supported_exchanges = [
"NASDAQ",
"NYSE",
"AMEX", # US
"SSE",
"SZSE", # China
"HKEX", # Hong Kong
"TSE", # Tokyo
"LSE", # London
"EURONEXT", # Europe
"TSX", # Toronto
"ASX", # Australia
"CRYPTO", # Crypto
]

return exchange in supported_exchanges

except ValueError:
# Validate exchange
supported_exchanges = ["NASDAQ", "NYSE", "SSE", "SZSE", "HKEX", "CRYPTO"]
if exchange not in supported_exchanges:
return False

# Validate symbol format based on exchange
if exchange in ["NASDAQ", "NYSE"]:
# US stocks: 1-5 uppercase letters, no special characters except hyphen
return (
bool(symbol)
and len(symbol) <= 5
and symbol.replace("-", "").isalnum()
)

elif exchange in ["SSE", "SZSE"]:
# A-shares: exactly 6 digits
return symbol.isdigit() and len(symbol) == 6

elif exchange == "HKEX":
# HK stocks: 1-5 digits (e.g., 00700)
# HK indices: uppercase letters (e.g., HSI, HSCEI)
# No .HK suffix allowed
if ".HK" in symbol:
return False
return (symbol.isdigit() and 1 <= len(symbol) <= 5) or (
symbol.isalpha() and symbol.isupper()
)

elif exchange == "CRYPTO":
# Crypto: uppercase letters, no currency suffix (e.g., BTC, not BTC-USD)
return (
bool(symbol)
and symbol.isalpha()
and symbol.isupper()
and "-" not in symbol
)

return False

except (ValueError, AttributeError):
return False

def validate_ticker(self, ticker: str) -> bool:
"""Validate if ticker is supported by Yahoo Finance."""
return self._is_valid_internal_ticker(ticker)