Skip to content

Commit

Permalink
v3.4.0
Browse files Browse the repository at this point in the history
- Added --workers to adjust pool thread workers for Thread Pool Scraper
- Use Firefox instead of Chrome
- Adjusted wait time for clicking pop-up ad while scrolling down to 1 second
- Adjusted wait time for clicking load more result button to 1 second
- Adjusted wait time for clicking pop-up ad to 2 seconds
- Adjusted scroll-down length to 2,000 pixels
- Use logging instead of loguru
- Adjusted scripts to use the same logger for all scrapers
- Added driver wait for clicking 'load more result' button
- Fixed 'check_if_current_date_has_passed' function bug for Month End Scraper
- Added more tests
- Added logic to check in case the past year is entered for Thread Pool and Month End scraper
- Added timezone parameter for Thread Pool and Month End scrapers so that they check the past date based on the entered timezone, mostly for fixing timezone problems when testing using GitHub Action
- Added timezone parameter for 'check_if_current_date_has_passed' mostly for fixing timezone problems when testing using GitHub Action
- Adjusted log message
- Added ElementClickInterceptedException handler when clicking pop-up ad and the load more result button
  • Loading branch information
sakan811 committed Jun 14, 2024
1 parent c2b8cc3 commit cbd8706
Showing 1 changed file with 39 additions and 28 deletions.
67 changes: 39 additions & 28 deletions japan_avg_hotel_price_finder/scrape.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,7 @@ def __init__(self, details: Details):
self.hotel_data_dict = {'Hotel': [], 'Price': [], 'Review': []}
self.load_more_result_clicked = 0
self.pop_up_clicked = 0
self.obstructing_classes = ['a3f7e233ba', 'f0fbe41bfe.b290b28eaf']

def _click_load_more_result_button(self, driver: WebDriver) -> None:
"""
Expand All @@ -200,28 +201,33 @@ def _click_load_more_result_button(self, driver: WebDriver) -> None:
logger.error(f'The \'load more result\' button not found. Keep scrolling.')
except ElementClickInterceptedException as e:
logger.warning(e)
logger.warning("ElementClickInterceptedException: The button is obscured. Trying to handle the obstruction.")

logger.info("Identify the obstructing element")
try:
overlay = driver.find_element(By.CLASS_NAME, 'a3f7e233ba')
if overlay.is_displayed():
logger.info("Found an obstructing overlay, attempting to hide it.")
driver.execute_script("arguments[0].style.display='none';", overlay)
logger.info("Obstructing overlay hidden.")
except NoSuchElementException as e:
logger.warning(e)
logger.warning("No obstructing overlay found.")

logger.info("Retry clicking the button")
load_more_button = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, load_more_result_css_selector)))
logger.warning("ElementClickInterceptedException: The pop-up ad is obscured. "
"Trying to handle the obstruction.")

# List of possible obstructing class names
obstructing_classes = self.obstructing_classes

logger.info("Identifying the obstructing element(s)")

for class_name in obstructing_classes:
try:
overlay = driver.find_element(By.CLASS_NAME, class_name)
if overlay.is_displayed():
logger.info(f"Found an obstructing overlay with class '{class_name}', attempting to hide it.")
driver.execute_script("arguments[0].style.display='none';", overlay)
logger.info(f"Obstructing overlay with class '{class_name}' hidden.")
except NoSuchElementException:
logger.info(f"No obstructing overlay found with class '{class_name}'.")

logger.info("Retry clicking the pop-up ad")
load_more_button = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, ads_css_selector)))
load_more_button.click()
except Exception as e:
logger.error(e)
logger.error(f'Unexpected error occurred')
else:
self.load_more_result_clicked += 1
logger.debug(f'{load_more_result_css_selector} clicked successfully')
logger.debug(f'Load more result button clicked successfully')

def _find_box_elements(self, soup) -> bs4.ResultSet:
"""
Expand Down Expand Up @@ -377,18 +383,23 @@ def _click_pop_up_ad(self, wait: WebDriverWait, driver: WebDriver) -> None:
logger.error(f'Moving on')
except ElementClickInterceptedException as e:
logger.warning(e)
logger.warning("ElementClickInterceptedException: The pop-up ad is obscured. Trying to handle the obstruction.")

logger.info("Identify the obstructing element")
try:
overlay = driver.find_element(By.CLASS_NAME, 'a3f7e233ba')
if overlay.is_displayed():
logger.info("Found an obstructing overlay, attempting to hide it.")
driver.execute_script("arguments[0].style.display='none';", overlay)
logger.info("Obstructing overlay hidden.")
except NoSuchElementException as e:
logger.warning(e)
logger.warning("No obstructing overlay found.")
logger.warning("ElementClickInterceptedException: The pop-up ad is obscured. "
"Trying to handle the obstruction.")

# List of possible obstructing class names
obstructing_classes = self.obstructing_classes

logger.info("Identifying the obstructing element(s)")

for class_name in obstructing_classes:
try:
overlay = driver.find_element(By.CLASS_NAME, class_name)
if overlay.is_displayed():
logger.info(f"Found an obstructing overlay with class '{class_name}', attempting to hide it.")
driver.execute_script("arguments[0].style.display='none';", overlay)
logger.info(f"Obstructing overlay with class '{class_name}' hidden.")
except NoSuchElementException:
logger.info(f"No obstructing overlay found with class '{class_name}'.")

logger.info("Retry clicking the pop-up ad")
load_more_button = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, ads_css_selector)))
Expand Down

0 comments on commit cbd8706

Please sign in to comment.