From 04eca2b4765244784329557aed027329addc9ec7 Mon Sep 17 00:00:00 2001 From: hazeone <709547807@qq.com> Date: Wed, 1 Oct 2025 23:16:03 +0800 Subject: [PATCH 1/3] fix: search result symbol does not match standard --- .../adapters/assets/akshare_adapter.py | 29 +++--- python/valuecell/adapters/assets/base.py | 29 ++++-- .../adapters/assets/yfinance_adapter.py | 90 +++++++++++++------ 3 files changed, 98 insertions(+), 50 deletions(-) diff --git a/python/valuecell/adapters/assets/akshare_adapter.py b/python/valuecell/adapters/assets/akshare_adapter.py index 10f4d545a..44642b12a 100644 --- a/python/valuecell/adapters/assets/akshare_adapter.py +++ b/python/valuecell/adapters/assets/akshare_adapter.py @@ -589,13 +589,14 @@ def _is_hk_stock_code(self, search_term: str) -> bool: def _get_hk_stock_by_code(self, stock_code: str) -> Optional[AssetSearchResult]: """Get HK stock info by stock code using direct query.""" try: - # Format HK stock code - formatted_code = ( - stock_code.zfill(5) if not stock_code.startswith("0") else stock_code - ) + # Format HK stock code - pad to 5 digits + formatted_code = stock_code.zfill(5) + + # Validate: HK stock codes should be 5 digits + if not (formatted_code.isdigit() and len(formatted_code) == 5): + return None - # Try to get HK stock data - note: AKShare may not have direct individual HK stock query - # so we create a basic result based on code + # Create internal ticker in standard format internal_ticker = f"HKEX:{formatted_code}" # Create basic result - in production, you might want to query actual HK stock info @@ -2166,18 +2167,18 @@ def _generate_ticker_variations(self, search_term: str) -> List[str]: return variations def validate_ticker(self, ticker: str) -> bool: - """Validate if ticker is supported by AKShare.""" + """Validate if ticker is supported by AKShare and matches standard format.""" try: exchange, symbol = ticker.split(":", 1) - # Exchange validation rules + # Exchange validation rules - strict format checking validation_rules = { - "SSE": lambda s: s.isdigit() and len(s) == 6, - "SZSE": lambda s: s.isdigit() and len(s) == 6, - "BSE": lambda s: s.isdigit() and len(s) == 6, - "HKEX": lambda s: s.isdigit() and 1 <= len(s) <= 5, - "NASDAQ": lambda s: 1 <= len(s) <= 5, - "NYSE": lambda s: 1 <= len(s) <= 5, + "SSE": lambda s: s.isdigit() and len(s) == 6 and s.startswith("6"), + "SZSE": lambda s: s.isdigit() and len(s) == 6 and s.startswith(("0", "3")), + "BSE": lambda s: s.isdigit() and len(s) == 6 and s.startswith("8"), + "HKEX": lambda s: s.isdigit() and len(s) == 5, # Must be exactly 5 digits + "NASDAQ": lambda s: 1 <= len(s) <= 5 and s.isalnum() and s.isupper(), + "NYSE": lambda s: 1 <= len(s) <= 5 and s.isalnum() and s.isupper(), } validator = validation_rules.get(exchange) diff --git a/python/valuecell/adapters/assets/base.py b/python/valuecell/adapters/assets/base.py index ce1d23567..22de75af9 100644 --- a/python/valuecell/adapters/assets/base.py +++ b/python/valuecell/adapters/assets/base.py @@ -160,18 +160,30 @@ def to_internal_format( if source_ticker in index_reverse_mapping: return index_reverse_mapping[source_ticker] + # Special handling for crypto from yfinance - remove currency suffix + if source == DataSource.YFINANCE and ("-USD" in source_ticker or "-CAD" in source_ticker or "-EUR" in source_ticker): + # Remove any currency suffix + crypto_symbol = source_ticker.split("-")[0].upper() + return f"CRYPTO:{crypto_symbol}" + # Special handling for Hong Kong stocks from yfinance - if source == DataSource.YFINANCE and source_ticker.endswith(".HK"): - symbol = source_ticker[:-3] # Remove .HK suffix - # Remove leading zeros for internal format (0700 -> 700) + if source == DataSource.YFINANCE and ".HK" in source_ticker: + symbol = source_ticker.replace(".HK", "") # Remove .HK suffix + # Keep as digits only, no leading zero removal for internal format if symbol.isdigit(): - symbol = str(int(symbol)) # This removes leading zeros + # Pad to 5 digits for Hong Kong stocks + symbol = symbol.zfill(5) return f"HKEX:{symbol}" - # Special handling for crypto from yfinance - if source == DataSource.YFINANCE and "-USD" in source_ticker: - crypto_symbol = source_ticker.replace("-USD", "") - return f"CRYPTO:{crypto_symbol}" + # Special handling for Shanghai stocks from yfinance + if source == DataSource.YFINANCE and ".SS" in source_ticker: + symbol = source_ticker.replace(".SS", "") + return f"SSE:{symbol}" + + # Special handling for Shenzhen stocks from yfinance + if source == DataSource.YFINANCE and ".SZ" in source_ticker: + symbol = source_ticker.replace(".SZ", "") + return f"SZSE:{symbol}" # Check for known suffixes if source in self.reverse_mappings: @@ -184,6 +196,7 @@ def to_internal_format( # If no suffix found and default exchange provided if default_exchange: + # For US stocks from yfinance, symbol is already clean return f"{default_exchange}:{source_ticker}" # For other assets without clear exchange mapping diff --git a/python/valuecell/adapters/assets/yfinance_adapter.py b/python/valuecell/adapters/assets/yfinance_adapter.py index ec3b67c10..666072696 100644 --- a/python/valuecell/adapters/assets/yfinance_adapter.py +++ b/python/valuecell/adapters/assets/yfinance_adapter.py @@ -147,8 +147,20 @@ def _create_search_result_from_quote( } mapped_exchange = exchange_mapping.get(exchange, exchange) - # Create internal ticker with correct exchange - internal_ticker = f"{mapped_exchange}:{symbol}" + # Filter: Only support specific exchanges + supported_exchanges = ["NASDAQ", "NYSE", "SSE", "SZSE", "HKEX", "CRYPTO"] + if mapped_exchange not in supported_exchanges: + logger.debug(f"Skipping unsupported exchange: {mapped_exchange} for symbol {symbol}") + return None + + # Convert to internal ticker format and normalize + # Remove any suffixes that yfinance might include + internal_ticker = self.convert_to_internal_ticker(symbol, mapped_exchange) + + # Validate the ticker format + if not self._is_valid_internal_ticker(internal_ticker): + logger.debug(f"Invalid ticker format after conversion: {internal_ticker}") + return None # Get asset type from quote type quote_type = quote.get("quoteType", "").upper() @@ -160,10 +172,8 @@ def _create_search_result_from_quote( country = "CN" elif mapped_exchange == "HKEX": country = "HK" - elif mapped_exchange == "TSE": - country = "JP" - elif mapped_exchange in ["LSE", "EURONEXT", "XETRA"]: - country = "GB" if mapped_exchange == "LSE" else "DE" + elif mapped_exchange == "CRYPTO": + country = "US" # Get names in different languages long_name = quote.get("longname", quote.get("shortname", symbol)) @@ -645,28 +655,52 @@ def _perform_health_check(self) -> Any: except Exception as e: return {"status": "error", "message": str(e)} - def validate_ticker(self, ticker: str) -> bool: - """Validate if ticker is supported by Yahoo Finance.""" + def _is_valid_internal_ticker(self, ticker: str) -> bool: + """Validate if internal ticker format is correct and supported. + + Args: + ticker: Internal ticker format (e.g., "NASDAQ:AAPL", "HKEX:00700", "CRYPTO:BTC") + + Returns: + True if ticker format is valid + """ try: + if ":" not in ticker: + return False + exchange, symbol = ticker.split(":", 1) - - # Yahoo Finance supports most major exchanges - supported_exchanges = [ - "NASDAQ", - "NYSE", - "AMEX", # US - "SSE", - "SZSE", # China - "HKEX", # Hong Kong - "TSE", # Tokyo - "LSE", # London - "EURONEXT", # Europe - "TSX", # Toronto - "ASX", # Australia - "CRYPTO", # Crypto - ] - - return exchange in supported_exchanges - - except ValueError: + + # Validate exchange + supported_exchanges = ["NASDAQ", "NYSE", "SSE", "SZSE", "HKEX", "CRYPTO"] + if exchange not in supported_exchanges: + return False + + # Validate symbol format based on exchange + if exchange in ["NASDAQ", "NYSE"]: + # US stocks: 1-5 uppercase letters, no special characters except hyphen + return bool(symbol) and len(symbol) <= 5 and symbol.replace("-", "").isalnum() + + elif exchange in ["SSE", "SZSE"]: + # A-shares: exactly 6 digits + return symbol.isdigit() and len(symbol) == 6 + + elif exchange == "HKEX": + # HK stocks: 1-5 digits (e.g., 00700) + # HK indices: uppercase letters (e.g., HSI, HSCEI) + # No .HK suffix allowed + if ".HK" in symbol: + return False + return (symbol.isdigit() and 1 <= len(symbol) <= 5) or (symbol.isalpha() and symbol.isupper()) + + elif exchange == "CRYPTO": + # Crypto: uppercase letters, no currency suffix (e.g., BTC, not BTC-USD) + return bool(symbol) and symbol.isalpha() and symbol.isupper() and "-" not in symbol + + return False + + except (ValueError, AttributeError): return False + + def validate_ticker(self, ticker: str) -> bool: + """Validate if ticker is supported by Yahoo Finance.""" + return self._is_valid_internal_ticker(ticker) From c6ac52e93a21cf6d3b3fcadc564bd4b30fc49aac Mon Sep 17 00:00:00 2001 From: hazeone <709547807@qq.com> Date: Wed, 1 Oct 2025 23:16:38 +0800 Subject: [PATCH 2/3] lint --- .../adapters/assets/akshare_adapter.py | 7 ++- python/valuecell/adapters/assets/base.py | 6 ++- .../adapters/assets/yfinance_adapter.py | 47 ++++++++++++------- 3 files changed, 41 insertions(+), 19 deletions(-) diff --git a/python/valuecell/adapters/assets/akshare_adapter.py b/python/valuecell/adapters/assets/akshare_adapter.py index 44642b12a..72da1b6cb 100644 --- a/python/valuecell/adapters/assets/akshare_adapter.py +++ b/python/valuecell/adapters/assets/akshare_adapter.py @@ -2174,9 +2174,12 @@ def validate_ticker(self, ticker: str) -> bool: # Exchange validation rules - strict format checking validation_rules = { "SSE": lambda s: s.isdigit() and len(s) == 6 and s.startswith("6"), - "SZSE": lambda s: s.isdigit() and len(s) == 6 and s.startswith(("0", "3")), + "SZSE": lambda s: s.isdigit() + and len(s) == 6 + and s.startswith(("0", "3")), "BSE": lambda s: s.isdigit() and len(s) == 6 and s.startswith("8"), - "HKEX": lambda s: s.isdigit() and len(s) == 5, # Must be exactly 5 digits + "HKEX": lambda s: s.isdigit() + and len(s) == 5, # Must be exactly 5 digits "NASDAQ": lambda s: 1 <= len(s) <= 5 and s.isalnum() and s.isupper(), "NYSE": lambda s: 1 <= len(s) <= 5 and s.isalnum() and s.isupper(), } diff --git a/python/valuecell/adapters/assets/base.py b/python/valuecell/adapters/assets/base.py index 22de75af9..5cef4a72e 100644 --- a/python/valuecell/adapters/assets/base.py +++ b/python/valuecell/adapters/assets/base.py @@ -161,7 +161,11 @@ def to_internal_format( return index_reverse_mapping[source_ticker] # Special handling for crypto from yfinance - remove currency suffix - if source == DataSource.YFINANCE and ("-USD" in source_ticker or "-CAD" in source_ticker or "-EUR" in source_ticker): + if source == DataSource.YFINANCE and ( + "-USD" in source_ticker + or "-CAD" in source_ticker + or "-EUR" in source_ticker + ): # Remove any currency suffix crypto_symbol = source_ticker.split("-")[0].upper() return f"CRYPTO:{crypto_symbol}" diff --git a/python/valuecell/adapters/assets/yfinance_adapter.py b/python/valuecell/adapters/assets/yfinance_adapter.py index 666072696..9a982fcde 100644 --- a/python/valuecell/adapters/assets/yfinance_adapter.py +++ b/python/valuecell/adapters/assets/yfinance_adapter.py @@ -150,16 +150,20 @@ def _create_search_result_from_quote( # Filter: Only support specific exchanges supported_exchanges = ["NASDAQ", "NYSE", "SSE", "SZSE", "HKEX", "CRYPTO"] if mapped_exchange not in supported_exchanges: - logger.debug(f"Skipping unsupported exchange: {mapped_exchange} for symbol {symbol}") + logger.debug( + f"Skipping unsupported exchange: {mapped_exchange} for symbol {symbol}" + ) return None # Convert to internal ticker format and normalize # Remove any suffixes that yfinance might include internal_ticker = self.convert_to_internal_ticker(symbol, mapped_exchange) - + # Validate the ticker format if not self._is_valid_internal_ticker(internal_ticker): - logger.debug(f"Invalid ticker format after conversion: {internal_ticker}") + logger.debug( + f"Invalid ticker format after conversion: {internal_ticker}" + ) return None # Get asset type from quote type @@ -657,47 +661,58 @@ def _perform_health_check(self) -> Any: def _is_valid_internal_ticker(self, ticker: str) -> bool: """Validate if internal ticker format is correct and supported. - + Args: ticker: Internal ticker format (e.g., "NASDAQ:AAPL", "HKEX:00700", "CRYPTO:BTC") - + Returns: True if ticker format is valid """ try: if ":" not in ticker: return False - + exchange, symbol = ticker.split(":", 1) - + # Validate exchange supported_exchanges = ["NASDAQ", "NYSE", "SSE", "SZSE", "HKEX", "CRYPTO"] if exchange not in supported_exchanges: return False - + # Validate symbol format based on exchange if exchange in ["NASDAQ", "NYSE"]: # US stocks: 1-5 uppercase letters, no special characters except hyphen - return bool(symbol) and len(symbol) <= 5 and symbol.replace("-", "").isalnum() - + return ( + bool(symbol) + and len(symbol) <= 5 + and symbol.replace("-", "").isalnum() + ) + elif exchange in ["SSE", "SZSE"]: # A-shares: exactly 6 digits return symbol.isdigit() and len(symbol) == 6 - + elif exchange == "HKEX": # HK stocks: 1-5 digits (e.g., 00700) # HK indices: uppercase letters (e.g., HSI, HSCEI) # No .HK suffix allowed if ".HK" in symbol: return False - return (symbol.isdigit() and 1 <= len(symbol) <= 5) or (symbol.isalpha() and symbol.isupper()) - + return (symbol.isdigit() and 1 <= len(symbol) <= 5) or ( + symbol.isalpha() and symbol.isupper() + ) + elif exchange == "CRYPTO": # Crypto: uppercase letters, no currency suffix (e.g., BTC, not BTC-USD) - return bool(symbol) and symbol.isalpha() and symbol.isupper() and "-" not in symbol - + return ( + bool(symbol) + and symbol.isalpha() + and symbol.isupper() + and "-" not in symbol + ) + return False - + except (ValueError, AttributeError): return False From 7bc8b5b17ddbce34ad45f6463a7788701b1f254f Mon Sep 17 00:00:00 2001 From: hazeone <709547807@qq.com> Date: Thu, 9 Oct 2025 18:04:05 +0800 Subject: [PATCH 3/3] fix problem --- .../adapters/assets/akshare_adapter.py | 33 ++++++++++--------- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/python/valuecell/adapters/assets/akshare_adapter.py b/python/valuecell/adapters/assets/akshare_adapter.py index 72da1b6cb..100a18bf8 100644 --- a/python/valuecell/adapters/assets/akshare_adapter.py +++ b/python/valuecell/adapters/assets/akshare_adapter.py @@ -6,6 +6,7 @@ import decimal import logging +import re import threading import time from datetime import datetime, timedelta @@ -2166,26 +2167,28 @@ def _generate_ticker_variations(self, search_term: str) -> List[str]: return variations + # Ticker validation patterns + TICKER_VALIDATION_PATTERNS = { + "SSE": re.compile(r"^6\d{5}$"), # Shanghai: 6xxxxx + "SZSE": re.compile(r"^[03]\d{5}$"), # Shenzhen: 0xxxxx or 3xxxxx + "BSE": re.compile(r"^8\d{5}$"), # Beijing: 8xxxxx + "HKEX": re.compile(r"^\d{5}$"), # Hong Kong: 5 digits + "NASDAQ": re.compile( + r"^[A-Z0-9]{1,5}$" + ), # US markets: 1-5 alphanumeric uppercase + "NYSE": re.compile( + r"^[A-Z0-9]{1,5}$" + ), # US markets: 1-5 alphanumeric uppercase + "CRYPTO": re.compile(r"^[A-Z0-9]{1,5}$"), # Crypto: 1-5 alphanumeric uppercase + } + def validate_ticker(self, ticker: str) -> bool: """Validate if ticker is supported by AKShare and matches standard format.""" try: exchange, symbol = ticker.split(":", 1) - # Exchange validation rules - strict format checking - validation_rules = { - "SSE": lambda s: s.isdigit() and len(s) == 6 and s.startswith("6"), - "SZSE": lambda s: s.isdigit() - and len(s) == 6 - and s.startswith(("0", "3")), - "BSE": lambda s: s.isdigit() and len(s) == 6 and s.startswith("8"), - "HKEX": lambda s: s.isdigit() - and len(s) == 5, # Must be exactly 5 digits - "NASDAQ": lambda s: 1 <= len(s) <= 5 and s.isalnum() and s.isupper(), - "NYSE": lambda s: 1 <= len(s) <= 5 and s.isalnum() and s.isupper(), - } - - validator = validation_rules.get(exchange) - return validator(symbol) if validator else False + pattern = self.TICKER_VALIDATION_PATTERNS.get(exchange) + return bool(pattern and pattern.match(symbol)) except ValueError: return False