From 36051a7a89e0982dbc5fe69bd97f2ce3f258c56c Mon Sep 17 00:00:00 2001 From: Lucas Faudman Date: Thu, 18 Apr 2024 16:43:03 -0700 Subject: [PATCH] Try wrap support for try_ to ignore provided WebDriverExceptions, dynamic methods created in __getattr__ now save when save_dynamic_methods kwarg == True, add tests to check previous + user-agent --- src/souperscraper/__init__.py | 2 +- src/souperscraper/souperscraper.py | 397 ++++++++++++++++++++--------- tests/test_souperscraper.py | 70 ++++- 3 files changed, 338 insertions(+), 131 deletions(-) diff --git a/src/souperscraper/__init__.py b/src/souperscraper/__init__.py index f9b86bf..6dece83 100644 --- a/src/souperscraper/__init__.py +++ b/src/souperscraper/__init__.py @@ -1,2 +1,2 @@ -from .souperscraper import SouperScraper, Keys +from .souperscraper import SouperScraper, Keys, By, EC, WebElement, WebDriverException from .getchromedriver import get_chromedriver \ No newline at end of file diff --git a/src/souperscraper/souperscraper.py b/src/souperscraper/souperscraper.py index 92defc0..6d60373 100644 --- a/src/souperscraper/souperscraper.py +++ b/src/souperscraper/souperscraper.py @@ -1,3 +1,8 @@ +from typing import Optional, Union, Literal, Type, Iterable, Callable +from functools import partial, wraps +from pathlib import Path +from time import sleep + from bs4 import BeautifulSoup from selenium.webdriver.common.by import By from selenium.webdriver.common.keys import Keys @@ -11,101 +16,113 @@ from selenium.webdriver.common.alert import Alert from selenium.webdriver.support.wait import WebDriverWait from selenium.webdriver.support import expected_conditions as EC -from selenium.common.exceptions import WebDriverException, NoSuchElementException, StaleElementReferenceException, TimeoutException from selenium.webdriver.remote.webelement import WebElement +from selenium.common.exceptions import ( + WebDriverException, + TimeoutException, +) -from time import sleep -from functools import partial -from typing import Optional, Union, Literal, Type, Iterable, Callable -from pathlib import Path - -WebDriverType = Literal['chrome', 'edge', 'firefox', - 'ie', 'safari', 'webkitgtk', 'wpewebkit'] -SoupParser = Literal['html.parser', 'lxml', 'lxml-xml', 'xml', 'html5lib'] +WebDriverType = Literal[ + "chrome", "edge", "firefox", "ie", "safari", "webkitgtk", "wpewebkit" +] +SoupParser = Literal["html.parser", "lxml", "lxml-xml", "xml", "html5lib"] -def import_webdriver(webdriver_type: WebDriverType) -> tuple[Type, Type, Optional[Type]]: +def import_webdriver( + webdriver_type: WebDriverType, +) -> tuple[Type, Type, Optional[Type]]: """Imports and returns Selenium WebDriver class, Service class, and Options class for webdriver_type""" webdriver_modules = { - 'chrome': { - 'Service': 'selenium.webdriver.chrome.service', - 'Options': 'selenium.webdriver.chrome.options', - 'WebDriver': 'selenium.webdriver.chrome.webdriver' + "chrome": { + "Service": "selenium.webdriver.chrome.service", + "Options": "selenium.webdriver.chrome.options", + "WebDriver": "selenium.webdriver.chrome.webdriver", + }, + "edge": { + "Service": "selenium.webdriver.edge.service", + "Options": "selenium.webdriver.edge.options", + "WebDriver": "selenium.webdriver.edge.webdriver", }, - 'edge': { - 'Service': 'selenium.webdriver.edge.service', - 'Options': 'selenium.webdriver.edge.options', - 'WebDriver': 'selenium.webdriver.edge.webdriver' + "firefox": { + "Service": "selenium.webdriver.firefox.service", + "Options": "selenium.webdriver.firefox.options", + "WebDriver": "selenium.webdriver.firefox.webdriver", }, - 'firefox': { - 'Service': 'selenium.webdriver.firefox.service', - 'Options': 'selenium.webdriver.firefox.options', - 'WebDriver': 'selenium.webdriver.firefox.webdriver' + "ie": { + "Service": "selenium.webdriver.ie.service", + "Options": "selenium.webdriver.ie.options", + "WebDriver": "selenium.webdriver.ie.webdriver", }, - 'ie': { - 'Service': 'selenium.webdriver.ie.service', - 'Options': 'selenium.webdriver.ie.options', - 'WebDriver': 'selenium.webdriver.ie.webdriver' + "safari": { + "Service": "selenium.webdriver.safari.service", + "WebDriver": "selenium.webdriver.safari.webdriver", }, - 'safari': { - 'Service': 'selenium.webdriver.safari.service', - 'WebDriver': 'selenium.webdriver.safari.webdriver' + "webkitgtk": { + "Service": "selenium.webdriver.webkitgtk.service", + "Options": "selenium.webdriver.webkitgtk.options", + "WebDriver": "selenium.webdriver.webkitgtk.webdriver", }, - 'webkitgtk': { - 'Service': 'selenium.webdriver.webkitgtk.service', - 'Options': 'selenium.webdriver.webkitgtk.options', - 'WebDriver': 'selenium.webdriver.webkitgtk.webdriver' + "wpewebkit": { + "Service": "selenium.webdriver.wpewebkit.service", + "Options": "selenium.webdriver.wpewebkit.options", + "WebDriver": "selenium.webdriver.wpewebkit.webdriver", }, - 'wpewebkit': { - 'Service': 'selenium.webdriver.wpewebkit.service', - 'Options': 'selenium.webdriver.wpewebkit.options', - 'WebDriver': 'selenium.webdriver.wpewebkit.webdriver' - } } if webdriver_type in webdriver_modules: module = webdriver_modules[webdriver_type] from importlib import import_module + return ( - import_module(module['WebDriver']).WebDriver, - import_module(module['Service']).Service, - import_module(module['Options'] - ).Options if 'Options' in module else None + import_module(module["WebDriver"]).WebDriver, + import_module(module["Service"]).Service, + import_module(module["Options"]).Options if "Options" in module else None, ) else: raise ValueError(f"Unsupported webdriver type: {webdriver_type}") class SouperScraper: - def __init__(self, - soup_parser: SoupParser = 'html.parser', - executable_path: Union[str, Path] = './chromedriver', - selenium_webdriver_type: WebDriverType = "chrome", - selenium_service_kwargs: Optional[dict] = None, - selenium_options_args: Optional[Iterable[str]] = None, - selenium_webdriver_cls_override: Optional[Type] = None, - selenium_service_cls_override: Optional[Type] = None, - selenium_options_cls_override: Optional[Type] = None, - keep_alive: bool = True, - user_agent: Optional[str] = None, - proxy: Optional[str] = None, - ) -> None: - + def __init__( + self, + soup_parser: SoupParser = "html.parser", + executable_path: Union[str, Path] = "./chromedriver", + selenium_webdriver_type: WebDriverType = "chrome", + selenium_service_kwargs: Optional[dict] = None, + selenium_options_args: Optional[Iterable[str]] = None, + selenium_webdriver_cls_override: Optional[Type] = None, + selenium_service_cls_override: Optional[Type] = None, + selenium_options_cls_override: Optional[Type] = None, + keep_alive: bool = True, + user_agent: Optional[str] = None, + proxy: Optional[str] = None, + save_dynamic_methods: bool = True, + ) -> None: # Check if executable_path exists and add it to the Selenium Service kwargs - executable_path = Path(executable_path) if isinstance( - executable_path, str) else executable_path + executable_path = ( + Path(executable_path) + if isinstance(executable_path, str) + else executable_path + ) if not executable_path.exists(): raise FileNotFoundError( - f"Executable path {executable_path} does not exist. Use souperscraper.get_chromedriver() to download chromedriver.") + f"Executable path {executable_path} does not exist. Use souperscraper.get_chromedriver() to download chromedriver." + ) selenium_service_kwargs = selenium_service_kwargs or {} if executable_path and "executable_path" not in selenium_service_kwargs: selenium_service_kwargs["executable_path"] = str(executable_path) # Import Selenium WebDriver class, Service class, and Options class for webdriver_type or use the override classes - selenium_webdriver_cls, selenium_service_cls, selenium_options_cls = import_webdriver(selenium_webdriver_type) - selenium_webdriver_cls = selenium_webdriver_cls_override or selenium_webdriver_cls + ( + selenium_webdriver_cls, + selenium_service_cls, + selenium_options_cls, + ) = import_webdriver(selenium_webdriver_type) + selenium_webdriver_cls = ( + selenium_webdriver_cls_override or selenium_webdriver_cls + ) selenium_service_cls = selenium_service_cls_override or selenium_service_cls selenium_options_cls = selenium_options_cls_override or selenium_options_cls @@ -122,72 +139,108 @@ def __init__(self, # Add user_agent to Selenium Options object if user_agent: - self.selenium_options.add_argument( - f'--user-agent="{user_agent}"') + self.selenium_options.add_argument(f'--user-agent="{user_agent}"') # Add proxy to Selenium Options object if proxy: - self.selenium_options.add_argument(f'--proxy-server={proxy}') + self.selenium_options.add_argument(f'--proxy-server="{proxy}"') # Create Selenium WebDriver object from Service and Options objects self.webdriver = selenium_webdriver_cls( - service=self.selenium_service, options=self.selenium_options, keep_alive=keep_alive) + service=self.selenium_service, + options=self.selenium_options, + keep_alive=keep_alive, + ) # Save for later to use when calling SoupScraper.soup self.soup_parser = soup_parser self.user_agent = user_agent self.proxy = proxy + # Save dynamic methods for later use + self.save_dynamic_methods = save_dynamic_methods + + def __del__(self): """Quit webdriver when SoupScraper object is deleted or garbage collected""" - if hasattr(self, 'webdriver'): + if hasattr(self, "webdriver"): self.webdriver.quit() def __getattr__(self, attr): - # Check if attribute exists in SoupScraper object + # Check if attribute already exists in SoupScraper object + # (Defined in this class or dynamically created by __getattr__ with save_dynamic_methods=True) if attr in dir(self): return super().__getattribute__(attr) - + # Check if attribute exists in webdriver object - if (webdriver := super().__getattribute__("webdriver")) and attr in dir(webdriver): + if (webdriver := super().__getattribute__("webdriver")) and attr in dir( + webdriver + ): return getattr(webdriver, attr) - # Split attribute by '_' to check for 'soup', 'by', 'wait', etc. - split_attr = attr.split('_') - # If the attr starts with soup, return the attribute from self.soup - if 'soup' == split_attr[0]: - return getattr(super().__getattribute__("soup"), "_".join(split_attr[1:])) - - # Attempt to find locator and expected_condition in split_attr - # If found, return a partial function with locator and expected_condition - # For example: - # self.wait_for_visibility_of_element_located_by_id(locator_value) is equivalent to - # WebDriverWait(self.webdriver, 3).until(EC.visibility_of_element_located((By.ID, locator_value)) - locator = None - expected_condition = None - if "by" in split_attr: - by_index = split_attr.index("by") - locator = " ".join(split_attr[by_index + 1:]) - split_attr = split_attr[:by_index] - - if "wait" in split_attr: - wait_index = split_attr.index("wait") - offset = 3 if "not" in split_attr else 2 - expected_condition = getattr( - EC, "_".join(split_attr[wait_index + offset:])) - split_attr = split_attr[:wait_index + offset] - - attr = "_".join(split_attr) - - if locator and expected_condition: - return partial(getattr(self, attr), expected_condition, locator) - elif locator: - return partial(getattr(self, attr), locator) - elif expected_condition: - return partial(getattr(self, attr), expected_condition) - + if attr.startswith("soup_"): + return getattr(super().__getattribute__("soup"), attr[5:]) + + # Try to create a dynamic attr not found in the SoupScraper, WebDriver, or BeautifulSoup objects + dynamic_method = None + + # Check if attr is a try_ wrapped method or attempt to find locator and expected_condition + if attr.startswith("try_"): + dynamic_method = super().__getattribute__('_try_wrapper')(getattr(self, attr[4:])) + else: + # Split attribute by '_' to check for 'soup', 'by', 'wait', etc. + split_attr = attr.split("_") + + # Attempt to find locator and expected_condition in split_attr + # If found, return a partial function with locator and expected_condition + # For example: + # self.wait_for_visibility_of_element_located_by_id(locator_value) is equivalent to + # WebDriverWait(self.webdriver, 3).until(EC.visibility_of_element_located((By.ID, locator_value)) + locator = None + expected_condition = None + if "by" in split_attr: + by_index = split_attr.index("by") + locator = " ".join(split_attr[by_index + 1 :]) + split_attr = split_attr[:by_index] + + if "wait" in split_attr: + wait_index = split_attr.index("wait") + offset = 3 if "not" in split_attr else 2 + expected_condition = getattr( + EC, "_".join(split_attr[wait_index + offset :]) + ) + split_attr = split_attr[: wait_index + offset] + + + bare_attr = "_".join(split_attr) # attr after removing locator and expected_condition + if locator and expected_condition: + dynamic_method = partial(getattr(self, bare_attr), expected_condition, locator) + elif locator: + dynamic_method = partial(getattr(self, bare_attr), locator) + elif expected_condition: + dynamic_method = partial(getattr(self, bare_attr), expected_condition) + + if dynamic_method: + # Save dynamic method if save_dynamic_methods is True + if self.save_dynamic_methods: + setattr(self, attr, dynamic_method) + return dynamic_method + + # Call super().__getattr__ if dynamic method is not found return super().__getattribute__(attr) + + + def _try_wrapper(self, func): + @wraps(func) + def wrapper(self, *args, ignore_exceptions=(WebDriverException,), **kwargs): + try: + return func(self, *args, **kwargs) + except ignore_exceptions as e: + print(e) + return None + + return wrapper def _get_soup(self) -> BeautifulSoup: """ @@ -212,7 +265,9 @@ def _get_all_window_handles(self) -> list[str]: """Returns webdriver.window_handles""" return self.webdriver.window_handles - def _new_window_handle(self, window_type='window', url=None, sleep_secs=None) -> None: + def _new_window_handle( + self, window_type="window", url=None, sleep_secs=None + ) -> None: self.webdriver.switch_to.new_window(window_type) if url: self.goto(url, sleep_secs) @@ -252,7 +307,9 @@ def _get_window_handle_by_url(self, url) -> Optional[str]: return window_handle return None - def switch_to_window(self, index=None, title=None, url=None, window_handle=None) -> Optional[str]: + def switch_to_window( + self, index=None, title=None, url=None, window_handle=None + ) -> Optional[str]: if index: window_handle = self._get_window_handle_by_index(index) elif title: @@ -264,15 +321,17 @@ def switch_to_window(self, index=None, title=None, url=None, window_handle=None) self._switch_to_window_handle(window_handle) return window_handle - def switch_to_tab(self, index=None, title=None, url=None, window_handle=None) -> Optional[str]: + def switch_to_tab( + self, index=None, title=None, url=None, window_handle=None + ) -> Optional[str]: return self.switch_to_window(index, title, url, window_handle) def new_tab(self, url=None, sleep_secs=None) -> str: - self._new_window_handle('tab', url, sleep_secs) + self._new_window_handle("tab", url, sleep_secs) return self._get_all_window_handles()[-1] def new_window(self, url=None, sleep_secs=None) -> str: - self._new_window_handle('window', url, sleep_secs) + self._new_window_handle("window", url, sleep_secs) return self._get_all_window_handles()[-1] def goto(self, url, sleep_secs=None) -> str: @@ -330,11 +389,19 @@ def windows(self) -> list[str]: # GET WRAPPED WEBDRIVER METHODS (ActionChains, ActionBuilder, Alert, WebDriverWait) - def get_action_chains(self, duration: int = 250, devices: Optional[list] = None) -> ActionChains: + def get_action_chains( + self, duration: int = 250, devices: Optional[list] = None + ) -> ActionChains: """Returns ActionChains object from self.webdriver with duration and devices""" return ActionChains(self.webdriver, duration, devices) - def get_action_builder(self, mouse: Optional[PointerInput] = None, wheel: Optional[WheelInput] = None, keyboard: Optional[KeyInput] = None, duration: int = 250) -> ActionBuilder: + def get_action_builder( + self, + mouse: Optional[PointerInput] = None, + wheel: Optional[WheelInput] = None, + keyboard: Optional[KeyInput] = None, + duration: int = 250, + ) -> ActionBuilder: """Returns ActionBuilder object from self.webdriver with mouse, wheel, keyboard, and duration""" return ActionBuilder(self.webdriver, mouse, wheel, keyboard, duration) @@ -344,45 +411,116 @@ def get_alert(self) -> Alert: def get_wait(self, timeout, poll_frequency, ignored_exceptions) -> WebDriverWait: """Returns WebDriverWait object from self.webdriver with timeout, poll_frequency, and ignored_exceptions""" - return WebDriverWait(self.webdriver, timeout, poll_frequency, ignored_exceptions) + return WebDriverWait( + self.webdriver, timeout, poll_frequency, ignored_exceptions + ) - def _wait(self, method, *method_args, timeout=3.0, poll_frequency=0.5, ignored_exceptions=None, until=True) -> Union[WebElement, bool, None]: + def _wait( + self, + method, + *method_args, + timeout=3.0, + poll_frequency=0.5, + ignored_exceptions=None, + until=True, + ) -> Union[WebElement, bool, None]: """Wait for method(*method_args) with WebDriverWait""" wait = self.get_wait(timeout, poll_frequency, ignored_exceptions) try: if until: - return wait.until(method(method_args)) if len(method_args) > 1 else wait.until(method(*method_args)) + return ( + wait.until(method(method_args)) + if len(method_args) > 1 + else wait.until(method(*method_args)) + ) else: - return wait.until_not(method(method_args)) if len(method_args) > 1 else wait.until_not(method(*method_args)) + return ( + wait.until_not(method(method_args)) + if len(method_args) > 1 + else wait.until_not(method(*method_args)) + ) except TimeoutException as e: print(e) return None # WAIT FOR ELEMENT METHODS -> element(s) - def wait_until(self, expected_condition: Callable, *expected_condition_args, timeout=3.0, poll_frequency=0.5, ignored_exceptions=None): + def wait_until( + self, + expected_condition: Callable, + *expected_condition_args, + timeout=3.0, + poll_frequency=0.5, + ignored_exceptions=None, + ): """Wait for element with expected_condition(locator, locator_value) or return None if timeout""" - return self._wait(expected_condition, *expected_condition_args, timeout=timeout, poll_frequency=poll_frequency, ignored_exceptions=ignored_exceptions) + return self._wait( + expected_condition, + *expected_condition_args, + timeout=timeout, + poll_frequency=poll_frequency, + ignored_exceptions=ignored_exceptions, + ) - def wait_until_not(self, expected_condition: Callable, *expected_condition_args, timeout=3.0, poll_frequency=0.5, ignored_exceptions=None): + def wait_until_not( + self, + expected_condition: Callable, + *expected_condition_args, + timeout=3.0, + poll_frequency=0.5, + ignored_exceptions=None, + ): """Wait for element with expected_condition(locator, locator_value) or return None if timeout""" - return self._wait(expected_condition, *expected_condition_args, timeout=timeout, poll_frequency=poll_frequency, ignored_exceptions=ignored_exceptions, until=False) + return self._wait( + expected_condition, + *expected_condition_args, + timeout=timeout, + poll_frequency=poll_frequency, + ignored_exceptions=ignored_exceptions, + until=False, + ) - def wait_for(self, expected_condition: Callable, *expected_condition_args, timeout=3.0, poll_frequency=0.5, ignored_exceptions=None): + def wait_for( + self, + expected_condition: Callable, + *expected_condition_args, + timeout=3.0, + poll_frequency=0.5, + ignored_exceptions=None, + ): """Wait for element with expected_condition(locator, locator_value) or return None if timeout""" - return self._wait(expected_condition, *expected_condition_args, timeout=timeout, poll_frequency=poll_frequency, ignored_exceptions=ignored_exceptions) + return self._wait( + expected_condition, + *expected_condition_args, + timeout=timeout, + poll_frequency=poll_frequency, + ignored_exceptions=ignored_exceptions, + ) - def wait_for_not(self, expected_condition: Callable, *expected_condition_args, timeout=3.0, poll_frequency=0.5, ignored_exceptions=None): + def wait_for_not( + self, + expected_condition: Callable, + *expected_condition_args, + timeout=3.0, + poll_frequency=0.5, + ignored_exceptions=None, + ): """Wait for element with expected_condition(locator, locator_value) or return None if timeout""" - return self._wait(expected_condition, *expected_condition_args, timeout=timeout, poll_frequency=poll_frequency, ignored_exceptions=ignored_exceptions, until=False) + return self._wait( + expected_condition, + *expected_condition_args, + timeout=timeout, + poll_frequency=poll_frequency, + ignored_exceptions=ignored_exceptions, + until=False, + ) # SCROLL TO ELEMENT METHODS def scroll_to(self, element: WebElement) -> WebElement: """Scroll to element with element.location_once_scrolled_into_view""" - self.webdriver.execute_script( - "arguments[0].scrollIntoView(true);", element) + self.webdriver.execute_script("arguments[0].scrollIntoView(true);", element) return element def scroll_to_element(self, locator: str, locator_value: str) -> WebElement: @@ -401,9 +539,16 @@ def find_elements_by_text(self, text: str) -> WebElement: """Find elements by text with xpath""" return self.find_elements_by_xpath(f"//*[text()='{text}']") - def wait_for_element_by_text(self, text: str, timeout=3.0, poll_frequency=0.5, ignored_exceptions=None) -> WebElement: + def wait_for_element_by_text( + self, text: str, timeout=3.0, poll_frequency=0.5, ignored_exceptions=None + ) -> WebElement: """Wait for element by text with xpath""" - return self.wait_for_presence_of_element_located_by_xpath(f"//*[text()='{text}']", timeout=timeout, poll_frequency=poll_frequency, ignored_exceptions=ignored_exceptions) + return self.wait_for_presence_of_element_located_by_xpath( + f"//*[text()='{text}']", + timeout=timeout, + poll_frequency=poll_frequency, + ignored_exceptions=ignored_exceptions, + ) def scroll_to_element_by_text(self, text: str) -> WebElement: """Scroll to element by text with xpath""" diff --git a/tests/test_souperscraper.py b/tests/test_souperscraper.py index 3f0f14f..f2fd4c6 100644 --- a/tests/test_souperscraper.py +++ b/tests/test_souperscraper.py @@ -1,8 +1,11 @@ import pytest -from souperscraper import SouperScraper +from souperscraper import SouperScraper, WebDriverException +from selenium.common.exceptions import JavascriptException, NoSuchElementException +from bs4 import Tag from time import time -scraper = SouperScraper(executable_path="/Users/lucasfaudman/Documents/souperscraper/chromedriver") +scraper = SouperScraper(executable_path="/Users/lucasfaudman/Documents/souperscraper/chromedriver", + save_dynamic_methods=False) @pytest.fixture def selenium_test_html_static(tmpdir): @@ -163,7 +166,7 @@ def test_goto_sleep(): def test_get_soup(): scraper.goto("https://www.example.com/") soup = scraper.soup - assert soup.find('title').string == "Example Domain" + assert isinstance((title := soup.find('title')), Tag) and title.string == "Example Domain" def test_new_tab(): scraper.goto("https://google.com") @@ -208,4 +211,63 @@ def test_wait_for_element(selenium_test_html_dynamic): assert scraper.find_element_by_tag_name("ul").is_displayed() == True assert len(scraper.find_elements_by_tag_name("li")) == 6 - scraper.wait_for_invisibility_of_element_located_by_id("vegetableSnippet", timeout=6) \ No newline at end of file + scraper.wait_for_invisibility_of_element_located_by_id("vegetableSnippet", timeout=6) + + +def test_try_wrapper_methods(selenium_test_html_static): + scraper.goto(selenium_test_html_static) + with pytest.raises(WebDriverException) as e: + scraper.find_element_by_class_name("nonexistent") + assert isinstance(e, NoSuchElementException) + + non_elm = scraper.try_find_element_by_class_name("nonexistent") + assert non_elm == None + + with pytest.raises(JavascriptException) as e: + # Only NoSuchElementException is ignored so JavascriptException should be raised + scraper.try_execute_script("", ignore_exceptions=NoSuchElementException) + assert isinstance(e, JavascriptException) + + with pytest.raises(JavascriptException) as e: + # Try with tuple of exceptions + scraper.try_execute_script("", ignore_exceptions=(NoSuchElementException,)) + assert isinstance(e, JavascriptException) + + # No exceptions should be raised since JavascriptException + # is a subclass of WebDriverException (the default ignored exception) + bad_js = scraper.try_execute_script("") + assert bad_js == None + + +def test_save_dynamic_methods(selenium_test_html_static): + scraper.goto(selenium_test_html_static) + + # Test with save_dynamic_methods = False + # Each call should return a new method + # Methods should not be saved + scraper.save_dynamic_methods = False + dynamic_method_getattr_call1 = scraper.find_elements_by_class_name + dynamic_method_getattr_call2 = scraper.find_elements_by_class_name + assert id(dynamic_method_getattr_call1) != id(dynamic_method_getattr_call2) + assert "find_elements_by_class_name" not in dir(scraper) + + # Test with save_dynamic_methods = True + # Each call should return the same method + # Methods should be saved + scraper.save_dynamic_methods = True + dynamic_method_getattr_call1 = scraper.find_elements_by_class_name + dynamic_method_getattr_call2 = scraper.find_elements_by_class_name + assert id(dynamic_method_getattr_call1) == id(dynamic_method_getattr_call2) + assert "find_elements_by_class_name" in dir(scraper) + +@pytest.mark.parametrize("user_agent", [ + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3", + "SOME ARBITRARY USER AGENT STRING", +]) +def test_user_agent(user_agent): + scraper = SouperScraper(executable_path="/Users/lucasfaudman/Documents/souperscraper/chromedriver", + user_agent=user_agent) + + scraper.goto('https://www.whatismybrowser.com/detect/what-is-my-user-agent/') + ua_elm = scraper.wait_for_visibility_of_element_located_by_id("detected_value") + assert ua_elm.text.strip('"') == user_agent == scraper.user_agent \ No newline at end of file