From 6c33830b68286f1605107666e33484f65e7784dd Mon Sep 17 00:00:00 2001 From: aan Date: Sun, 10 Mar 2024 17:38:46 +0100 Subject: [PATCH] feat: more tests & update db_json. --- bearish/scrapers/base.py | 34 +-- bearish/scrapers/investing.py | 19 +- bearish/scrapers/main.py | 57 ++++- bearish/scrapers/model.py | 37 ++- bearish/tests/scrapers/conftest.py | 20 ++ bearish/tests/scrapers/data/db.json | 282 +++++++++++++++++++++++ bearish/tests/scrapers/test_investing.py | 16 +- bearish/tests/scrapers/test_scrapers.py | 64 +++-- poetry.lock | 222 +++++++++++++++++- pyproject.toml | 1 + 10 files changed, 676 insertions(+), 76 deletions(-) create mode 100644 bearish/tests/scrapers/data/db.json diff --git a/bearish/scrapers/base.py b/bearish/scrapers/base.py index 4345ff0..9c4cef8 100644 --- a/bearish/scrapers/base.py +++ b/bearish/scrapers/base.py @@ -9,17 +9,17 @@ import pandas as pd import simplejson +import undetected_chromedriver as uc # type: ignore from pydantic import BaseModel, ConfigDict, Field, PrivateAttr, computed_field from selenium.common import MoveTargetOutOfBoundsException, TimeoutException from selenium.webdriver import ActionChains, Chrome, Keys -from selenium.webdriver.chrome.options import Options from selenium.webdriver.chrome.webdriver import WebDriver from selenium.webdriver.remote.webdriver import WebDriver as BaseWebDriver from selenium.webdriver.remote.webelement import WebElement from selenium.webdriver.support import expected_conditions from selenium.webdriver.support.wait import WebDriverWait -from bearish.scrapers.model import HistoricalData +from bearish.scrapers.model import HistoricalData, _clean from bearish.scrapers.settings import InvestingCountry, TradingCountry from bearish.scrapers.type import Locator @@ -85,13 +85,11 @@ def move_by_x_offset_from_left_border(element: BaseElement, x_offset: int) -> bo return right_border -def init_chrome(load_strategy_none: bool = False, headless: bool = False) -> Chrome: - option = Options() +def init_chrome(headless: bool = True) -> uc.Chrome: + options = {} if headless: - option.add_argument("--headless") - if load_strategy_none: - option.page_load_strategy = "none" - return Chrome(options=option) + options.update({"headless": True}) + return uc.Chrome(use_subprocess=False, version_main=121, **options) def bearish_path_fun() -> Path: @@ -105,16 +103,6 @@ class BaseSettings(BaseModel): ... -def clean_dict(data: Dict[str, Any]) -> Dict[str, Any]: - cleaned_data = {} - for name, value in data.items(): - if isinstance(value, dict): - cleaned_data[str(name)] = clean_dict(value) - else: - cleaned_data[str(name)] = value - return cleaned_data - - def _replace_values( tables: list[pd.DataFrame], replace_values: Dict[str, str] ) -> list[pd.DataFrame]: @@ -166,15 +154,6 @@ def _get_country_name_per_enum( ) -def _clean( - data: List[Dict[str, Any]] | Dict[str, Any] -) -> List[Dict[str, Any]] | Dict[str, Any]: - if isinstance(data, list): - return [clean_dict(data_) for data_ in data] - else: - return clean_dict(data) - - class CountryNameMixin: @abc.abstractmethod def _get_country_name(self) -> str: @@ -187,6 +166,7 @@ class BasePage(BaseModel): settings: BaseSettings browser: WebDriver = Field(default_factory=init_chrome, description="") bearish_path: Path = Field(default_factory=bearish_path_fun, description="") + first_page_only: Optional[bool] = False model_config = ConfigDict(arbitrary_types_allowed=True, use_enum_values=True) _tables = PrivateAttr(default_factory=list) _skip_existing = PrivateAttr(default=True) diff --git a/bearish/scrapers/investing.py b/bearish/scrapers/investing.py index 1ae686d..c2d6f7b 100644 --- a/bearish/scrapers/investing.py +++ b/bearish/scrapers/investing.py @@ -1,4 +1,5 @@ import contextlib +import datetime from functools import partial from typing import Any, Dict, List, Literal @@ -24,6 +25,8 @@ from bearish.scrapers.settings import InvestingCountry from bearish.scrapers.type import Locator +ONE_PAGE = 3 + COLUMNS_LENGTH = 2 @@ -78,12 +81,20 @@ def get_statements_urls(self, exchange: str) -> List[str]: ] +class UpdateInvestingSettings(InvestingSettings): + start_date: str = Field( + default_factory=lambda: ( + datetime.date.today() - datetime.timedelta(days=1) + ).strftime("%d-%m-%Y") + ) + + class InvestingScreenerScraper(BasePage, CountryNameMixin): country: int settings: InvestingSettings = Field(default=InvestingSettings()) source: Literal["trading", "investing", "yahoo"] = "investing" browser: WebDriver = Field( - default_factory=lambda: init_chrome(load_strategy_none=True, headless=True), + default_factory=lambda: init_chrome(headless=True), description="", ) @@ -133,6 +144,8 @@ def read_next_pages(self) -> None: except (ElementClickInterceptedException, TimeoutException): break page_number += 1 + if (page_number == ONE_PAGE) and self.first_page_only: + break def _custom_scrape(self) -> list[dict[str, Any]]: self.click_one_trust_button() @@ -145,10 +158,6 @@ class InvestingTickerScraper(BaseTickerPage): exchange: str source: Literal["trading", "investing", "yahoo"] = "investing" settings: InvestingSettings = Field(default=InvestingSettings()) - browser: WebDriver = Field( - default_factory=lambda: init_chrome(load_strategy_none=True, headless=False), - description="", - ) @model_validator(mode="before") @classmethod diff --git a/bearish/scrapers/main.py b/bearish/scrapers/main.py index 72a5309..6edcf0b 100644 --- a/bearish/scrapers/main.py +++ b/bearish/scrapers/main.py @@ -4,8 +4,9 @@ from typing import Any, Dict, Literal, Optional, Type, Union from pydantic import BaseModel, ConfigDict, Field +from selenium.webdriver.chrome.webdriver import WebDriver -from bearish.scrapers.base import BasePage, bearish_path_fun +from bearish.scrapers.base import BasePage, BaseSettings, bearish_path_fun, init_chrome from bearish.scrapers.investing import InvestingScreenerScraper, InvestingTickerScraper from bearish.scrapers.model import Ticker, merge, unflatten_json from bearish.scrapers.settings import InvestingCountry, TradingCountry @@ -48,38 +49,59 @@ class Scraper(BaseModel): ) source: Source country: Literal["germany", "france", "belgium", "usa"] + settings: Optional[BaseSettings] = None + browser: WebDriver = Field(default_factory=init_chrome, description="") - def _screener_scraper(self) -> BasePage: + def _screener_scraper(self, first_page_only: bool = False) -> BasePage: return self.source.screener( # type: ignore country=getattr(self.source.country, self.country), bearish_path=self.bearish_path, + first_page_only=first_page_only, + settings=self.settings, + browser=self.browser, ) def scrape( - self, skip_existing: bool = True, symbols: Optional[list[str]] = None + self, + skip_existing: bool = True, + symbols: Optional[list[str]] = None, + first_page_only: bool = False, ) -> None: - screener_scraper = self._screener_scraper() + screener_scraper = self._screener_scraper(first_page_only=first_page_only) screener_scraper.scrape(skip_existing=skip_existing) - tickers = Ticker.from_json(screener_scraper.get_stored_raw()) + tickers = Ticker.from_json( + screener_scraper.get_stored_raw(), source=self.source.screener.source + ) tickers = _filter_by_symbols(tickers=tickers, symbols=symbols) for ticker in tickers: scraper = self.source.ticker( # type: ignore - exchange=ticker.reference, bearish_path=self.bearish_path + browser=self.browser, + exchange=ticker.reference, + bearish_path=self.bearish_path, + settings=self.settings, ) try: scraper.scrape(skip_existing=skip_existing) except Exception as e: logger.error(f"Fail {ticker.reference}. reason: {e}") - def create_db_json(self) -> list[Dict[str, Any]]: + def create_db_json( + self, symbols: Optional[list[str]] = None + ) -> list[Dict[str, Any]]: scraper = self._screener_scraper() if not scraper.get_stored_raw().exists(): return [] - tickers = Ticker.from_json(scraper.get_stored_raw()) + tickers = Ticker.from_json( + scraper.get_stored_raw(), source=self.source.ticker.source + ) db_json = [] + tickers = _filter_by_symbols(tickers=tickers, symbols=symbols) for ticker in tickers: ticker_scraper = self.source.ticker( # type: ignore - browser=None, exchange=ticker.reference, bearish_path=self.bearish_path + browser=self.browser, + exchange=ticker.reference, + bearish_path=self.bearish_path, + settings=self.settings, ) if not ticker_scraper.get_stored_raw().exists(): continue @@ -89,3 +111,20 @@ def create_db_json(self) -> list[Dict[str, Any]]: merge(Ticker, ticker, ticker_) db_json.append(ticker.model_dump()) return db_json + + def update_db_json(self, db_json_path: Path) -> None: + db_json = json.loads(db_json_path.read_text()) + tickers = [Ticker(**ticker_json) for ticker_json in db_json] + for ticker in tickers: + ticker_scraper = self.source.ticker( # type: ignore + browser=self.browser, + exchange=ticker.reference, + bearish_path=self.bearish_path, + settings=self.settings, + ) + if ticker_scraper.source != ticker.source: + continue + records = ticker_scraper.scrape(skip_existing=False) + if not records: + continue + Ticker.from_record(records, source=ticker.source) diff --git a/bearish/scrapers/model.py b/bearish/scrapers/model.py index 2ef4fb3..a0d660f 100644 --- a/bearish/scrapers/model.py +++ b/bearish/scrapers/model.py @@ -3,7 +3,7 @@ import json from datetime import datetime from pathlib import Path -from typing import Any, Dict, List, Optional, Type, Union +from typing import Any, Dict, List, Literal, Optional, Type, Union from pydantic import ( AliasChoices, @@ -358,7 +358,7 @@ class Ticker(BaseTickerModel): default=None, validation_alias=AliasChoices("Name", "name") ) symbol: Optional[str] = Field(default=None, validation_alias=AliasChoices("Symbol")) - source: Optional[str] = None + source: Literal["trading", "investing", "yahoo"] sector: Optional[str] = Field(default=None, validation_alias=AliasChoices("Sector")) reference: Optional[str] = None industry: Optional[str] = Field( @@ -378,9 +378,19 @@ def reference_validator(cls, value: str) -> str: return value @classmethod - def from_json(cls, path: Path) -> List["Ticker"]: + def from_json( + cls, path: Path, source: Literal["trading", "investing", "yahoo"] + ) -> List["Ticker"]: records = json.loads(Path(path).read_text()) - return [cls(**unflatten_json(cls, record)) for record in records] + return [cls.from_record(record, source) for record in records] + + @classmethod + def from_record( + cls, + record: Dict[str, Any] | list[Dict[str, Any]], + source: Literal["trading", "investing", "yahoo"], + ) -> "Ticker": + return cls(**(unflatten_json(cls, _clean(record) | {"source": source}))) # type: ignore def is_nested(schema: Type[BaseModel]) -> bool: @@ -422,3 +432,22 @@ def unflatten_json(schema: Type[BaseModel], data: Dict[str, Any]) -> Dict[str, A original_data[name] = unflatten_json(field.annotation, data) copy_data.update(original_data) return schema(**copy_data).model_dump() + + +def _clean( + data: List[Dict[str, Any]] | Dict[str, Any] +) -> List[Dict[str, Any]] | Dict[str, Any]: + if isinstance(data, list): + return [clean_dict(data_) for data_ in data] + else: + return clean_dict(data) + + +def clean_dict(data: Dict[str, Any]) -> Dict[str, Any]: + cleaned_data = {} + for name, value in data.items(): + if isinstance(value, dict): + cleaned_data[str(name)] = clean_dict(value) + else: + cleaned_data[str(name)] = value + return cleaned_data diff --git a/bearish/tests/scrapers/conftest.py b/bearish/tests/scrapers/conftest.py index 1ceaef9..7830af2 100644 --- a/bearish/tests/scrapers/conftest.py +++ b/bearish/tests/scrapers/conftest.py @@ -3,6 +3,8 @@ import pytest +from bearish.scrapers.investing import InvestingSettings, UpdateInvestingSettings + @pytest.fixture(scope="session") def screener_investing() -> Path: @@ -39,3 +41,21 @@ def investing_record(investing_records: list[dict]) -> dict: @pytest.fixture(scope="session") def trading_record(trading_records: list[dict]) -> dict: return trading_records[0] + + +@pytest.fixture +def invest_settings() -> InvestingSettings: + return InvestingSettings( + suffixes=[ + "-income-statement", + ] + ) + + +@pytest.fixture +def update_invest_settings() -> UpdateInvestingSettings: + return UpdateInvestingSettings( + suffixes=[ + "-income-statement", + ] + ) diff --git a/bearish/tests/scrapers/data/db.json b/bearish/tests/scrapers/data/db.json new file mode 100644 index 0000000..c799277 --- /dev/null +++ b/bearish/tests/scrapers/data/db.json @@ -0,0 +1,282 @@ +[ + { + "name": "UCB", + "symbol": "UCB", + "source": "investing", + "sector": "Healthcare", + "reference": "ucb", + "industry": "Pharmaceuticals", + "exchange": "Brussels", + "fundamental": { + "income_statement": { + "revenue": { + "yearly": { + "2023-12-31": 5182.0, + "2022-12-31": 5447.0, + "2021-12-31": 5777.0, + "2020-12-31": 5347.0 + }, + "quarterly": { + "2023-12-31": 1331.5, + "2023-09-30": 1331.5, + "2023-06-30": 1259.5, + "2023-03-31": 1259.5 + } + }, + "cost_of_goods_sold": { + "yearly": { + "2023-12-31": 1707.0, + "2022-12-31": 1674.0, + "2021-12-31": 1438.0, + "2020-12-31": 1363.0 + }, + "quarterly": { + "2023-12-31": 452.5, + "2023-09-30": 452.5, + "2023-06-30": 401.0, + "2023-03-31": 401.0 + } + }, + "gross_profit": { + "yearly": { + "2023-12-31": 3475.0, + "2022-12-31": 3773.0, + "2021-12-31": 4339.0, + "2020-12-31": 3984.0 + }, + "quarterly": { + "2023-12-31": 879.0, + "2023-09-30": 879.0, + "2023-06-30": 858.5, + "2023-03-31": 858.5 + } + }, + "total_operating_expenses": { + "yearly": { + "2023-12-31": 4604.0, + "2022-12-31": 4848.0, + "2021-12-31": 4463.0, + "2020-12-31": 4376.0 + }, + "quarterly": { + "2023-12-31": "-", + "2023-09-30": 1249.0, + "2023-06-30": 1053.0, + "2023-03-31": 1053.0 + } + }, + "operating_income": { + "yearly": { + "2023-12-31": 578.0, + "2022-12-31": 599.0, + "2021-12-31": 1314.0, + "2020-12-31": 971.0 + }, + "quarterly": { + "2023-12-31": 82.5, + "2023-09-30": 82.5, + "2023-06-30": 206.5, + "2023-03-31": 206.5 + } + }, + "net_income_before_taxes": { + "yearly": { + "2023-12-31": 441.0, + "2022-12-31": 511.0, + "2021-12-31": 1225.0, + "2020-12-31": 880.0 + }, + "quarterly": { + "2023-12-31": 20.0, + "2023-09-30": 20.0, + "2023-06-30": 200.5, + "2023-03-31": 200.5 + } + }, + "net_income_after_taxes": { + "yearly": { + "2023-12-31": 343.0, + "2022-12-31": 420.0, + "2021-12-31": 1055.0, + "2020-12-31": 761.0 + }, + "quarterly": { + "2023-12-31": 16.0, + "2023-09-30": 16.0, + "2023-06-30": 155.5, + "2023-03-31": 155.5 + } + }, + "net_income": { + "yearly": { + "2023-12-31": 343.0, + "2022-12-31": 418.0, + "2021-12-31": 1058.0, + "2020-12-31": 732.0 + }, + "quarterly": { + "2023-12-31": 16.0, + "2023-09-30": 16.0, + "2023-06-30": 155.5, + "2023-03-31": 155.5 + } + } + }, + "balance_sheet": { + "total_current_assets": null, + "total_assets": null, + "total_current_liabilities": null, + "total_liabilities": null, + "total_equity": null + }, + "cash_flow": {}, + "ratios": { + "price_earning_ratio": 62.44, + "earning_per_share": 1.81, + "earning_per_share_ttm": null, + "earning_per_share_yoy": "-17.97", + "payout_ratio": 73.47, + "dividend_yield": 0.85 + }, + "valuation": { + "market_cap": 20850000000.0, + "market_cap_performance_one_year": null, + "enterprise_value": null + } + }, + "historical": { + "price": { + "2024-03-08": 109.9, + "2024-03-07": 109.6, + "2024-03-06": 109.0, + "2024-03-05": 106.85, + "2024-03-04": 107.9, + "2024-03-01": 108.55, + "2024-02-29": 106.5, + "2024-02-28": 105.0, + "2024-02-27": 97.14, + "2024-02-26": 96.64, + "2024-02-23": 96.42, + "2024-02-22": 95.56, + "2024-02-21": 94.2, + "2024-02-20": 95.06, + "2024-02-19": 95.4, + "2024-02-16": 94.9, + "2024-02-15": 94.62, + "2024-02-14": 94.06, + "2024-02-13": 93.28, + "2024-02-12": 94.5 + }, + "open": { + "2024-03-08": 109.0, + "2024-03-07": 108.85, + "2024-03-06": 107.15, + "2024-03-05": 107.7, + "2024-03-04": 107.65, + "2024-03-01": 106.55, + "2024-02-29": 103.5, + "2024-02-28": 97.38, + "2024-02-27": 96.8, + "2024-02-26": 96.4, + "2024-02-23": 95.8, + "2024-02-22": 94.7, + "2024-02-21": 94.58, + "2024-02-20": 95.02, + "2024-02-19": 94.76, + "2024-02-16": 94.5, + "2024-02-15": 93.5, + "2024-02-14": 93.08, + "2024-02-13": 94.8, + "2024-02-12": 93.68 + }, + "high": { + "2024-03-08": 110.1, + "2024-03-07": 109.6, + "2024-03-06": 109.1, + "2024-03-05": 108.35, + "2024-03-04": 109.35, + "2024-03-01": 109.4, + "2024-02-29": 106.95, + "2024-02-28": 105.0, + "2024-02-27": 97.18, + "2024-02-26": 98.14, + "2024-02-23": 96.94, + "2024-02-22": 95.94, + "2024-02-21": 94.88, + "2024-02-20": 95.94, + "2024-02-19": 95.82, + "2024-02-16": 95.46, + "2024-02-15": 94.66, + "2024-02-14": 94.14, + "2024-02-13": 94.8, + "2024-02-12": 94.86 + }, + "low": { + "2024-03-08": 109.0, + "2024-03-07": 107.95, + "2024-03-06": 106.45, + "2024-03-05": 106.2, + "2024-03-04": 107.55, + "2024-03-01": 106.55, + "2024-02-29": 102.65, + "2024-02-28": 95.6, + "2024-02-27": 95.96, + "2024-02-26": 96.4, + "2024-02-23": 95.48, + "2024-02-22": 94.7, + "2024-02-21": 93.68, + "2024-02-20": 94.8, + "2024-02-19": 94.76, + "2024-02-16": 94.44, + "2024-02-15": 93.5, + "2024-02-14": 93.08, + "2024-02-13": 92.56, + "2024-02-12": 93.26 + }, + "volume": { + "2024-03-08": 275250.0, + "2024-03-07": 351110.0, + "2024-03-06": 259190.0, + "2024-03-05": 249850.0, + "2024-03-04": 307970.0, + "2024-03-01": 419670.0, + "2024-02-29": 1140000.0, + "2024-02-28": 828020.0, + "2024-02-27": 247870.0, + "2024-02-26": 250490.0, + "2024-02-23": 185490.0, + "2024-02-22": 230000.0, + "2024-02-21": 265590.0, + "2024-02-20": 226310.0, + "2024-02-19": 135100.0, + "2024-02-16": 169880.0, + "2024-02-15": 237220.0, + "2024-02-14": 135760.0, + "2024-02-13": 300060.0, + "2024-02-12": 270970.0 + }, + "change": { + "2024-03-08": 0.27, + "2024-03-07": 0.55, + "2024-03-06": 2.01, + "2024-03-05": -0.97, + "2024-03-04": -0.6, + "2024-03-01": 1.92, + "2024-02-29": 1.43, + "2024-02-28": 8.09, + "2024-02-27": 0.52, + "2024-02-26": 0.23, + "2024-02-23": 0.9, + "2024-02-22": 1.44, + "2024-02-21": -0.9, + "2024-02-20": -0.36, + "2024-02-19": 0.53, + "2024-02-16": 0.3, + "2024-02-15": 0.6, + "2024-02-14": 0.84, + "2024-02-13": -1.29, + "2024-02-12": 0.88 + } + } + } +] diff --git a/bearish/tests/scrapers/test_investing.py b/bearish/tests/scrapers/test_investing.py index 15c3be0..1597c0b 100644 --- a/bearish/tests/scrapers/test_investing.py +++ b/bearish/tests/scrapers/test_investing.py @@ -2,8 +2,6 @@ import tempfile from pathlib import Path -import pytest - from bearish.scrapers.base import init_chrome from bearish.scrapers.investing import ( InvestingScreenerScraper, @@ -15,24 +13,16 @@ logger = logging.getLogger(__name__) -@pytest.fixture -def invest_settings() -> InvestingSettings: - return InvestingSettings( - suffixes=[ - "-income-statement", - ] - ) - - def test_investing_screener_belgium(invest_settings: InvestingSettings) -> None: with tempfile.TemporaryDirectory() as temp_directory: temp_path = Path(temp_directory).joinpath("investing") - browser = init_chrome(load_strategy_none=True, headless=True) + browser = init_chrome(headless=True) scraper = InvestingScreenerScraper( browser=browser, country=InvestingCountry.belgium, settings=invest_settings, bearish_path=temp_path, + first_page_only=True, ) data = scraper.scrape() assert data @@ -45,7 +35,7 @@ def test_investing_ticker_scraper(invest_settings: InvestingSettings) -> None: scraper = InvestingTickerScraper( exchange="ucb", settings=invest_settings, - browser=init_chrome(load_strategy_none=True, headless=True), + browser=init_chrome(headless=True), bearish_path=temp_path, ) data = scraper.scrape() diff --git a/bearish/tests/scrapers/test_scrapers.py b/bearish/tests/scrapers/test_scrapers.py index ef856f9..692d282 100644 --- a/bearish/tests/scrapers/test_scrapers.py +++ b/bearish/tests/scrapers/test_scrapers.py @@ -1,45 +1,75 @@ import json import logging +import tempfile from pathlib import Path import pytest +from bearish.scrapers.investing import InvestingSettings, UpdateInvestingSettings from bearish.scrapers.main import DataSource, Scraper logger = logging.getLogger(__name__) -@pytest.mark.skip("Too slow") -def test_investing_main_scraper() -> None: - scraper = Scraper(source=DataSource.investing, country="belgium") - scraper.scrape(skip_existing=False) +def test_investing_main_scraper(invest_settings: InvestingSettings) -> None: + with tempfile.TemporaryDirectory() as temp_directory: + temp_path = Path(temp_directory).joinpath("investing", "scraper") + scraper = Scraper( + source=DataSource.investing, + country="belgium", + bearish_path=temp_path, + settings=invest_settings, + ) + scraper.scrape(skip_existing=False, symbols=["UCB"], first_page_only=True) + assert Path(temp_path).joinpath("investing", "screener", "belgium").exists() + assert Path(temp_path).joinpath("investing", "ticker", "ucb").exists() + db_json = scraper.create_db_json() + assert len(db_json) == 1 + assert "historical" in db_json[0] + + +def test_investing_db_json_update( + update_invest_settings: UpdateInvestingSettings, +) -> None: + db_json_path = Path(__file__).parent.joinpath("data", "db.json") + with tempfile.TemporaryDirectory() as temp_directory: + temp_path = Path(temp_directory).joinpath("investing", "scraper") + scraper = Scraper( + source=DataSource.investing, + country="belgium", + bearish_path=temp_path, + settings=update_invest_settings, + ) + scraper.update_db_json(db_json_path) @pytest.mark.skip("Too slow") -def test_investing_main_scraper_france() -> None: - scraper = Scraper(source=DataSource.investing, country="france") +def test_investing_main_scraper_france(invest_settings: InvestingSettings) -> None: + scraper = Scraper( + source=DataSource.investing, country="france", settings=invest_settings + ) scraper.scrape(skip_existing=False) @pytest.mark.skip("Too slow") -def test_investing_main_scraper_germany() -> None: - scraper = Scraper(source=DataSource.investing, country="germany") +def test_investing_main_scraper_germany(invest_settings: InvestingSettings) -> None: + scraper = Scraper( + source=DataSource.investing, country="germany", settings=invest_settings + ) scraper.scrape(skip_existing=False) @pytest.mark.skip("Too slow") -def test_investing_main_scraper_db_json() -> None: - scraper = Scraper(source=DataSource.investing, country="belgium") +def test_investing_main_scraper_db_json(invest_settings: InvestingSettings) -> None: + scraper = Scraper( + source=DataSource.investing, country="belgium", settings=invest_settings + ) db_json = scraper.create_db_json() - scraper = Scraper(source=DataSource.investing, country="france") + scraper = Scraper( + source=DataSource.investing, country="france", settings=invest_settings + ) db_json += scraper.create_db_json() f = Path("/home/aan/Documents/bullish/data/db_json.json") f.touch(exist_ok=True) with f.open(mode="w") as p: json.dump(db_json, p, indent=4) - - -@pytest.mark.skip("Too slow") -def test_trading_main_scraper() -> None: - scraper = Scraper(source=DataSource.trading, country="belgium") - scraper.scrape() diff --git a/poetry.lock b/poetry.lock index 212088b..6af89da 100644 --- a/poetry.lock +++ b/poetry.lock @@ -158,6 +158,106 @@ files = [ {file = "cfgv-3.4.0.tar.gz", hash = "sha256:e52591d4c5f5dead8e0f673fb16db7949d2cfb3f7da4582893288f0ded8fe560"}, ] +[[package]] +name = "charset-normalizer" +version = "3.3.2" +description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." +category = "main" +optional = false +python-versions = ">=3.7.0" +files = [ + {file = "charset-normalizer-3.3.2.tar.gz", hash = "sha256:f30c3cb33b24454a82faecaf01b19c18562b1e89558fb6c56de4d9118a032fd5"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:25baf083bf6f6b341f4121c2f3c548875ee6f5339300e08be3f2b2ba1721cdd3"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:06435b539f889b1f6f4ac1758871aae42dc3a8c0e24ac9e60c2384973ad73027"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9063e24fdb1e498ab71cb7419e24622516c4a04476b17a2dab57e8baa30d6e03"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6897af51655e3691ff853668779c7bad41579facacf5fd7253b0133308cf000d"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1d3193f4a680c64b4b6a9115943538edb896edc190f0b222e73761716519268e"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cd70574b12bb8a4d2aaa0094515df2463cb429d8536cfb6c7ce983246983e5a6"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8465322196c8b4d7ab6d1e049e4c5cb460d0394da4a27d23cc242fbf0034b6b5"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a9a8e9031d613fd2009c182b69c7b2c1ef8239a0efb1df3f7c8da66d5dd3d537"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:beb58fe5cdb101e3a055192ac291b7a21e3b7ef4f67fa1d74e331a7f2124341c"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:e06ed3eb3218bc64786f7db41917d4e686cc4856944f53d5bdf83a6884432e12"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:2e81c7b9c8979ce92ed306c249d46894776a909505d8f5a4ba55b14206e3222f"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:572c3763a264ba47b3cf708a44ce965d98555f618ca42c926a9c1616d8f34269"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:fd1abc0d89e30cc4e02e4064dc67fcc51bd941eb395c502aac3ec19fab46b519"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-win32.whl", hash = "sha256:3d47fa203a7bd9c5b6cee4736ee84ca03b8ef23193c0d1ca99b5089f72645c73"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-win_amd64.whl", hash = "sha256:10955842570876604d404661fbccbc9c7e684caf432c09c715ec38fbae45ae09"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:802fe99cca7457642125a8a88a084cef28ff0cf9407060f7b93dca5aa25480db"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:573f6eac48f4769d667c4442081b1794f52919e7edada77495aaed9236d13a96"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:549a3a73da901d5bc3ce8d24e0600d1fa85524c10287f6004fbab87672bf3e1e"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f27273b60488abe721a075bcca6d7f3964f9f6f067c8c4c605743023d7d3944f"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1ceae2f17a9c33cb48e3263960dc5fc8005351ee19db217e9b1bb15d28c02574"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:65f6f63034100ead094b8744b3b97965785388f308a64cf8d7c34f2f2e5be0c4"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:753f10e867343b4511128c6ed8c82f7bec3bd026875576dfd88483c5c73b2fd8"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4a78b2b446bd7c934f5dcedc588903fb2f5eec172f3d29e52a9096a43722adfc"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:e537484df0d8f426ce2afb2d0f8e1c3d0b114b83f8850e5f2fbea0e797bd82ae"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:eb6904c354526e758fda7167b33005998fb68c46fbc10e013ca97f21ca5c8887"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:deb6be0ac38ece9ba87dea880e438f25ca3eddfac8b002a2ec3d9183a454e8ae"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:4ab2fe47fae9e0f9dee8c04187ce5d09f48eabe611be8259444906793ab7cbce"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:80402cd6ee291dcb72644d6eac93785fe2c8b9cb30893c1af5b8fdd753b9d40f"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-win32.whl", hash = "sha256:7cd13a2e3ddeed6913a65e66e94b51d80a041145a026c27e6bb76c31a853c6ab"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-win_amd64.whl", hash = "sha256:663946639d296df6a2bb2aa51b60a2454ca1cb29835324c640dafb5ff2131a77"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:0b2b64d2bb6d3fb9112bafa732def486049e63de9618b5843bcdd081d8144cd8"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:ddbb2551d7e0102e7252db79ba445cdab71b26640817ab1e3e3648dad515003b"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:55086ee1064215781fff39a1af09518bc9255b50d6333f2e4c74ca09fac6a8f6"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8f4a014bc36d3c57402e2977dada34f9c12300af536839dc38c0beab8878f38a"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a10af20b82360ab00827f916a6058451b723b4e65030c5a18577c8b2de5b3389"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8d756e44e94489e49571086ef83b2bb8ce311e730092d2c34ca8f7d925cb20aa"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:90d558489962fd4918143277a773316e56c72da56ec7aa3dc3dbbe20fdfed15b"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6ac7ffc7ad6d040517be39eb591cac5ff87416c2537df6ba3cba3bae290c0fed"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:7ed9e526742851e8d5cc9e6cf41427dfc6068d4f5a3bb03659444b4cabf6bc26"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:8bdb58ff7ba23002a4c5808d608e4e6c687175724f54a5dade5fa8c67b604e4d"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:6b3251890fff30ee142c44144871185dbe13b11bab478a88887a639655be1068"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:b4a23f61ce87adf89be746c8a8974fe1c823c891d8f86eb218bb957c924bb143"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:efcb3f6676480691518c177e3b465bcddf57cea040302f9f4e6e191af91174d4"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-win32.whl", hash = "sha256:d965bba47ddeec8cd560687584e88cf699fd28f192ceb452d1d7ee807c5597b7"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-win_amd64.whl", hash = "sha256:96b02a3dc4381e5494fad39be677abcb5e6634bf7b4fa83a6dd3112607547001"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:95f2a5796329323b8f0512e09dbb7a1860c46a39da62ecb2324f116fa8fdc85c"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c002b4ffc0be611f0d9da932eb0f704fe2602a9a949d1f738e4c34c75b0863d5"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a981a536974bbc7a512cf44ed14938cf01030a99e9b3a06dd59578882f06f985"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3287761bc4ee9e33561a7e058c72ac0938c4f57fe49a09eae428fd88aafe7bb6"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:42cb296636fcc8b0644486d15c12376cb9fa75443e00fb25de0b8602e64c1714"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0a55554a2fa0d408816b3b5cedf0045f4b8e1a6065aec45849de2d6f3f8e9786"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:c083af607d2515612056a31f0a8d9e0fcb5876b7bfc0abad3ecd275bc4ebc2d5"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:87d1351268731db79e0f8e745d92493ee2841c974128ef629dc518b937d9194c"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:bd8f7df7d12c2db9fab40bdd87a7c09b1530128315d047a086fa3ae3435cb3a8"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:c180f51afb394e165eafe4ac2936a14bee3eb10debc9d9e4db8958fe36afe711"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:8c622a5fe39a48f78944a87d4fb8a53ee07344641b0562c540d840748571b811"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-win32.whl", hash = "sha256:db364eca23f876da6f9e16c9da0df51aa4f104a972735574842618b8c6d999d4"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-win_amd64.whl", hash = "sha256:86216b5cee4b06df986d214f664305142d9c76df9b6512be2738aa72a2048f99"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:6463effa3186ea09411d50efc7d85360b38d5f09b870c48e4600f63af490e56a"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:6c4caeef8fa63d06bd437cd4bdcf3ffefe6738fb1b25951440d80dc7df8c03ac"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:37e55c8e51c236f95b033f6fb391d7d7970ba5fe7ff453dad675e88cf303377a"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fb69256e180cb6c8a894fee62b3afebae785babc1ee98b81cdf68bbca1987f33"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ae5f4161f18c61806f411a13b0310bea87f987c7d2ecdbdaad0e94eb2e404238"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b2b0a0c0517616b6869869f8c581d4eb2dd83a4d79e0ebcb7d373ef9956aeb0a"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:45485e01ff4d3630ec0d9617310448a8702f70e9c01906b0d0118bdf9d124cf2"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:eb00ed941194665c332bf8e078baf037d6c35d7c4f3102ea2d4f16ca94a26dc8"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:2127566c664442652f024c837091890cb1942c30937add288223dc895793f898"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:a50aebfa173e157099939b17f18600f72f84eed3049e743b68ad15bd69b6bf99"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:4d0d1650369165a14e14e1e47b372cfcb31d6ab44e6e33cb2d4e57265290044d"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:923c0c831b7cfcb071580d3f46c4baf50f174be571576556269530f4bbd79d04"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:06a81e93cd441c56a9b65d8e1d043daeb97a3d0856d177d5c90ba85acb3db087"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-win32.whl", hash = "sha256:6ef1d82a3af9d3eecdba2321dc1b3c238245d890843e040e41e470ffa64c3e25"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-win_amd64.whl", hash = "sha256:eb8821e09e916165e160797a6c17edda0679379a4be5c716c260e836e122f54b"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:c235ebd9baae02f1b77bcea61bce332cb4331dc3617d254df3323aa01ab47bd4"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:5b4c145409bef602a690e7cfad0a15a55c13320ff7a3ad7ca59c13bb8ba4d45d"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:68d1f8a9e9e37c1223b656399be5d6b448dea850bed7d0f87a8311f1ff3dabb0"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:22afcb9f253dac0696b5a4be4a1c0f8762f8239e21b99680099abd9b2b1b2269"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e27ad930a842b4c5eb8ac0016b0a54f5aebbe679340c26101df33424142c143c"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1f79682fbe303db92bc2b1136016a38a42e835d932bab5b3b1bfcfbf0640e519"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b261ccdec7821281dade748d088bb6e9b69e6d15b30652b74cbbac25e280b796"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:122c7fa62b130ed55f8f285bfd56d5f4b4a5b503609d181f9ad85e55c89f4185"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:d0eccceffcb53201b5bfebb52600a5fb483a20b61da9dbc885f8b103cbe7598c"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:9f96df6923e21816da7e0ad3fd47dd8f94b2a5ce594e00677c0013018b813458"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:7f04c839ed0b6b98b1a7501a002144b76c18fb1c1850c8b98d458ac269e26ed2"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:34d1c8da1e78d2e001f363791c98a272bb734000fcef47a491c1e3b0505657a8"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:ff8fa367d09b717b2a17a052544193ad76cd49979c805768879cb63d9ca50561"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-win32.whl", hash = "sha256:aed38f6e4fb3f5d6bf81bfa990a07806be9d83cf7bacef998ab1a9bd660a581f"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-win_amd64.whl", hash = "sha256:b01b88d45a6fcb69667cd6d2f7a9aeb4bf53760d7fc536bf679ec94fe9f3ff3d"}, + {file = "charset_normalizer-3.3.2-py3-none-any.whl", hash = "sha256:3e4d1f6587322d2788836a99c69062fbb091331ec940e02d12d179c1d53e25fc"}, +] + [[package]] name = "colorama" version = "0.4.6" @@ -1519,6 +1619,28 @@ files = [ {file = "PyYAML-6.0.1.tar.gz", hash = "sha256:bfdf460b1736c775f2ba9f6a92bca30bc2095067b8a9d77876d1fad6cc3b4a43"}, ] +[[package]] +name = "requests" +version = "2.31.0" +description = "Python HTTP for Humans." +category = "main" +optional = false +python-versions = ">=3.7" +files = [ + {file = "requests-2.31.0-py3-none-any.whl", hash = "sha256:58cd2187c01e70e6e26505bca751777aa9f2ee0b7f4300988b709f44e013003f"}, + {file = "requests-2.31.0.tar.gz", hash = "sha256:942c5a758f98d790eaed1a29cb6eefc7ffb0d1cf7af05c3d2791656dbd6ad1e1"}, +] + +[package.dependencies] +certifi = ">=2017.4.17" +charset-normalizer = ">=2,<4" +idna = ">=2.5,<4" +urllib3 = ">=1.21.1,<3" + +[package.extras] +socks = ["PySocks (>=1.5.6,!=1.5.7)"] +use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] + [[package]] name = "ruff" version = "0.3.0" @@ -1882,6 +2004,22 @@ files = [ {file = "tzdata-2024.1.tar.gz", hash = "sha256:2674120f8d891909751c38abcdfd386ac0a5a1127954fbc332af6b5ceae07efd"}, ] +[[package]] +name = "undetected-chromedriver" +version = "3.5.5" +description = "('Selenium.webdriver.Chrome replacement with compatiblity for Brave, and other Chromium based browsers.', 'Not triggered by CloudFlare/Imperva/hCaptcha and such.', 'NOTE: results may vary due to many factors. No guarantees are given, except for ongoing efforts in understanding detection algorithms.')" +category = "main" +optional = false +python-versions = "*" +files = [ + {file = "undetected-chromedriver-3.5.5.tar.gz", hash = "sha256:9f945e1435005247abe17de316bcfda85b284a4177fd5f25167c78ced33b65ec"}, +] + +[package.dependencies] +requests = "*" +selenium = ">=4.9.0" +websockets = "*" + [[package]] name = "unidecode" version = "1.3.8" @@ -1948,6 +2086,88 @@ files = [ {file = "webencodings-0.5.1.tar.gz", hash = "sha256:b36a1c245f2d304965eb4e0a82848379241dc04b865afcc4aab16748587e1923"}, ] +[[package]] +name = "websockets" +version = "12.0" +description = "An implementation of the WebSocket Protocol (RFC 6455 & 7692)" +category = "main" +optional = false +python-versions = ">=3.8" +files = [ + {file = "websockets-12.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:d554236b2a2006e0ce16315c16eaa0d628dab009c33b63ea03f41c6107958374"}, + {file = "websockets-12.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:2d225bb6886591b1746b17c0573e29804619c8f755b5598d875bb4235ea639be"}, + {file = "websockets-12.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:eb809e816916a3b210bed3c82fb88eaf16e8afcf9c115ebb2bacede1797d2547"}, + {file = "websockets-12.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c588f6abc13f78a67044c6b1273a99e1cf31038ad51815b3b016ce699f0d75c2"}, + {file = "websockets-12.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5aa9348186d79a5f232115ed3fa9020eab66d6c3437d72f9d2c8ac0c6858c558"}, + {file = "websockets-12.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6350b14a40c95ddd53e775dbdbbbc59b124a5c8ecd6fbb09c2e52029f7a9f480"}, + {file = "websockets-12.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:70ec754cc2a769bcd218ed8d7209055667b30860ffecb8633a834dde27d6307c"}, + {file = "websockets-12.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:6e96f5ed1b83a8ddb07909b45bd94833b0710f738115751cdaa9da1fb0cb66e8"}, + {file = "websockets-12.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:4d87be612cbef86f994178d5186add3d94e9f31cc3cb499a0482b866ec477603"}, + {file = "websockets-12.0-cp310-cp310-win32.whl", hash = "sha256:befe90632d66caaf72e8b2ed4d7f02b348913813c8b0a32fae1cc5fe3730902f"}, + {file = "websockets-12.0-cp310-cp310-win_amd64.whl", hash = "sha256:363f57ca8bc8576195d0540c648aa58ac18cf85b76ad5202b9f976918f4219cf"}, + {file = "websockets-12.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:5d873c7de42dea355d73f170be0f23788cf3fa9f7bed718fd2830eefedce01b4"}, + {file = "websockets-12.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3f61726cae9f65b872502ff3c1496abc93ffbe31b278455c418492016e2afc8f"}, + {file = "websockets-12.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ed2fcf7a07334c77fc8a230755c2209223a7cc44fc27597729b8ef5425aa61a3"}, + {file = "websockets-12.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8e332c210b14b57904869ca9f9bf4ca32f5427a03eeb625da9b616c85a3a506c"}, + {file = "websockets-12.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5693ef74233122f8ebab026817b1b37fe25c411ecfca084b29bc7d6efc548f45"}, + {file = "websockets-12.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6e9e7db18b4539a29cc5ad8c8b252738a30e2b13f033c2d6e9d0549b45841c04"}, + {file = "websockets-12.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:6e2df67b8014767d0f785baa98393725739287684b9f8d8a1001eb2839031447"}, + {file = "websockets-12.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:bea88d71630c5900690fcb03161ab18f8f244805c59e2e0dc4ffadae0a7ee0ca"}, + {file = "websockets-12.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:dff6cdf35e31d1315790149fee351f9e52978130cef6c87c4b6c9b3baf78bc53"}, + {file = "websockets-12.0-cp311-cp311-win32.whl", hash = "sha256:3e3aa8c468af01d70332a382350ee95f6986db479ce7af14d5e81ec52aa2b402"}, + {file = "websockets-12.0-cp311-cp311-win_amd64.whl", hash = "sha256:25eb766c8ad27da0f79420b2af4b85d29914ba0edf69f547cc4f06ca6f1d403b"}, + {file = "websockets-12.0-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:0e6e2711d5a8e6e482cacb927a49a3d432345dfe7dea8ace7b5790df5932e4df"}, + {file = "websockets-12.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:dbcf72a37f0b3316e993e13ecf32f10c0e1259c28ffd0a85cee26e8549595fbc"}, + {file = "websockets-12.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:12743ab88ab2af1d17dd4acb4645677cb7063ef4db93abffbf164218a5d54c6b"}, + {file = "websockets-12.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7b645f491f3c48d3f8a00d1fce07445fab7347fec54a3e65f0725d730d5b99cb"}, + {file = "websockets-12.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:9893d1aa45a7f8b3bc4510f6ccf8db8c3b62120917af15e3de247f0780294b92"}, + {file = "websockets-12.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1f38a7b376117ef7aff996e737583172bdf535932c9ca021746573bce40165ed"}, + {file = "websockets-12.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:f764ba54e33daf20e167915edc443b6f88956f37fb606449b4a5b10ba42235a5"}, + {file = "websockets-12.0-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:1e4b3f8ea6a9cfa8be8484c9221ec0257508e3a1ec43c36acdefb2a9c3b00aa2"}, + {file = "websockets-12.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:9fdf06fd06c32205a07e47328ab49c40fc1407cdec801d698a7c41167ea45113"}, + {file = "websockets-12.0-cp312-cp312-win32.whl", hash = "sha256:baa386875b70cbd81798fa9f71be689c1bf484f65fd6fb08d051a0ee4e79924d"}, + {file = "websockets-12.0-cp312-cp312-win_amd64.whl", hash = "sha256:ae0a5da8f35a5be197f328d4727dbcfafa53d1824fac3d96cdd3a642fe09394f"}, + {file = "websockets-12.0-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:5f6ffe2c6598f7f7207eef9a1228b6f5c818f9f4d53ee920aacd35cec8110438"}, + {file = "websockets-12.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:9edf3fc590cc2ec20dc9d7a45108b5bbaf21c0d89f9fd3fd1685e223771dc0b2"}, + {file = "websockets-12.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:8572132c7be52632201a35f5e08348137f658e5ffd21f51f94572ca6c05ea81d"}, + {file = "websockets-12.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:604428d1b87edbf02b233e2c207d7d528460fa978f9e391bd8aaf9c8311de137"}, + {file = "websockets-12.0-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:1a9d160fd080c6285e202327aba140fc9a0d910b09e423afff4ae5cbbf1c7205"}, + {file = "websockets-12.0-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:87b4aafed34653e465eb77b7c93ef058516cb5acf3eb21e42f33928616172def"}, + {file = "websockets-12.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:b2ee7288b85959797970114deae81ab41b731f19ebcd3bd499ae9ca0e3f1d2c8"}, + {file = "websockets-12.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:7fa3d25e81bfe6a89718e9791128398a50dec6d57faf23770787ff441d851967"}, + {file = "websockets-12.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:a571f035a47212288e3b3519944f6bf4ac7bc7553243e41eac50dd48552b6df7"}, + {file = "websockets-12.0-cp38-cp38-win32.whl", hash = "sha256:3c6cc1360c10c17463aadd29dd3af332d4a1adaa8796f6b0e9f9df1fdb0bad62"}, + {file = "websockets-12.0-cp38-cp38-win_amd64.whl", hash = "sha256:1bf386089178ea69d720f8db6199a0504a406209a0fc23e603b27b300fdd6892"}, + {file = "websockets-12.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:ab3d732ad50a4fbd04a4490ef08acd0517b6ae6b77eb967251f4c263011a990d"}, + {file = "websockets-12.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:a1d9697f3337a89691e3bd8dc56dea45a6f6d975f92e7d5f773bc715c15dde28"}, + {file = "websockets-12.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:1df2fbd2c8a98d38a66f5238484405b8d1d16f929bb7a33ed73e4801222a6f53"}, + {file = "websockets-12.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:23509452b3bc38e3a057382c2e941d5ac2e01e251acce7adc74011d7d8de434c"}, + {file = "websockets-12.0-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2e5fc14ec6ea568200ea4ef46545073da81900a2b67b3e666f04adf53ad452ec"}, + {file = "websockets-12.0-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:46e71dbbd12850224243f5d2aeec90f0aaa0f2dde5aeeb8fc8df21e04d99eff9"}, + {file = "websockets-12.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:b81f90dcc6c85a9b7f29873beb56c94c85d6f0dac2ea8b60d995bd18bf3e2aae"}, + {file = "websockets-12.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:a02413bc474feda2849c59ed2dfb2cddb4cd3d2f03a2fedec51d6e959d9b608b"}, + {file = "websockets-12.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:bbe6013f9f791944ed31ca08b077e26249309639313fff132bfbf3ba105673b9"}, + {file = "websockets-12.0-cp39-cp39-win32.whl", hash = "sha256:cbe83a6bbdf207ff0541de01e11904827540aa069293696dd528a6640bd6a5f6"}, + {file = "websockets-12.0-cp39-cp39-win_amd64.whl", hash = "sha256:fc4e7fa5414512b481a2483775a8e8be7803a35b30ca805afa4998a84f9fd9e8"}, + {file = "websockets-12.0-pp310-pypy310_pp73-macosx_10_9_x86_64.whl", hash = "sha256:248d8e2446e13c1d4326e0a6a4e9629cb13a11195051a73acf414812700badbd"}, + {file = "websockets-12.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f44069528d45a933997a6fef143030d8ca8042f0dfaad753e2906398290e2870"}, + {file = "websockets-12.0-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c4e37d36f0d19f0a4413d3e18c0d03d0c268ada2061868c1e6f5ab1a6d575077"}, + {file = "websockets-12.0-pp310-pypy310_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3d829f975fc2e527a3ef2f9c8f25e553eb7bc779c6665e8e1d52aa22800bb38b"}, + {file = "websockets-12.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:2c71bd45a777433dd9113847af751aae36e448bc6b8c361a566cb043eda6ec30"}, + {file = "websockets-12.0-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:0bee75f400895aef54157b36ed6d3b308fcab62e5260703add87f44cee9c82a6"}, + {file = "websockets-12.0-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:423fc1ed29f7512fceb727e2d2aecb952c46aa34895e9ed96071821309951123"}, + {file = "websockets-12.0-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:27a5e9964ef509016759f2ef3f2c1e13f403725a5e6a1775555994966a66e931"}, + {file = "websockets-12.0-pp38-pypy38_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c3181df4583c4d3994d31fb235dc681d2aaad744fbdbf94c4802485ececdecf2"}, + {file = "websockets-12.0-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:b067cb952ce8bf40115f6c19f478dc71c5e719b7fbaa511359795dfd9d1a6468"}, + {file = "websockets-12.0-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:00700340c6c7ab788f176d118775202aadea7602c5cc6be6ae127761c16d6b0b"}, + {file = "websockets-12.0-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e469d01137942849cff40517c97a30a93ae79917752b34029f0ec72df6b46399"}, + {file = "websockets-12.0-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ffefa1374cd508d633646d51a8e9277763a9b78ae71324183693959cf94635a7"}, + {file = "websockets-12.0-pp39-pypy39_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba0cab91b3956dfa9f512147860783a1829a8d905ee218a9837c18f683239611"}, + {file = "websockets-12.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:2cb388a5bfb56df4d9a406783b7f9dbefb888c09b71629351cc6b036e9259370"}, + {file = "websockets-12.0-py3-none-any.whl", hash = "sha256:dc284bbc8d7c78a6c69e0c7325ab46ee5e40bb4d50e494d8131a07ef47500e9e"}, + {file = "websockets-12.0.tar.gz", hash = "sha256:81df9cbcbb6c260de1e007e58c011bfebe2dafc8435107b0537f393dd38c8b1b"}, +] + [[package]] name = "wsproto" version = "1.2.0" @@ -1966,4 +2186,4 @@ h11 = ">=0.9.0,<1" [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "1cbe47bf2a37795b079499610d0d770351183a014dce2b7ffc2140e830c39f36" +content-hash = "dc2ad6c63d964a38a59f7be89c45d00d969a3ce50296a4b29c5bbffd4ec2c989" diff --git a/pyproject.toml b/pyproject.toml index acaf0cf..2b35873 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,6 +19,7 @@ lxml = "^5.1.0" html5lib = "^1.1" typeguard = "^4.1.5" nox = "^2023.4.22" +undetected-chromedriver = "^3.5.5"