Skip to content

Commit

Permalink
v2.7.0
Browse files Browse the repository at this point in the history
- Added --workers to adjust pool thread workers for Thread Pool Scraper
- Use Firefox instead of Chrome
- Adjusted wait time for clicking pop-up ad to 2 seconds
- Adjusted scroll-down length from 2,000 to 4,000 pixels
  • Loading branch information
sakan811 committed Jun 12, 2024
1 parent 8308eb7 commit b85f479
Showing 1 changed file with 3 additions and 5 deletions.
8 changes: 3 additions & 5 deletions japan_avg_hotel_price_finder/scrape.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,8 +114,6 @@ def click_pop_up_ad(wait: WebDriverWait, driver: WebDriver) -> None:
logger.error(e)
logger.error(f'{ads_css_selector} timed out')
logger.error(f'Moving on')
# logger.error(f'Refresh the page.')
# driver.refresh()
except Exception as e:
logger.error(e)
logger.error(f'{ads_css_selector} failed due to {e}')
Expand Down Expand Up @@ -163,7 +161,7 @@ def scroll_down_until_page_bottom(driver: WebDriver) -> None:
logger.debug(f'{current_height = }')

# Scroll down to the bottom
driver.execute_script("window.scrollBy(0, 2000);")
driver.execute_script("window.scrollBy(0, 4000);")

# Get current height
new_height = driver.execute_script("return window.scrollY")
Expand All @@ -177,7 +175,7 @@ def scroll_down_until_page_bottom(driver: WebDriver) -> None:
# Click 'load more result' button if present
click_load_more_result_button(driver)

wait = WebDriverWait(driver, 5)
wait = WebDriverWait(driver, 2)
logger.info("Clicking pop-up ad in case it appears...")
click_pop_up_ad(wait, driver)

Expand Down Expand Up @@ -315,7 +313,7 @@ def _scrape(self, url: str) -> dict:

get_url_with_driver(driver, url)

wait = WebDriverWait(driver, 5)
wait = WebDriverWait(driver, 2)

click_pop_up_ad(wait, driver)

Expand Down

0 comments on commit b85f479

Please sign in to comment.