Skip to content

Commit

Permalink
v2.7.0
Browse files Browse the repository at this point in the history
- Added --workers to adjust pool thread workers for Thread Pool Scraper
- Use Firefox instead of Chrome
- Adjusted wait time for clicking pop-up ad to 2 seconds
- Adjusted scroll-down length from 2,000 to 8,000 pixels
- Adjusted logger from loguru to use Standard Output instead of Standard Error for logging that printed in terminal
- Added InvalidSessionIdException and NoSuchWindowException for a Thread Pool Scraper
  • Loading branch information
sakan811 committed Jun 12, 2024
1 parent c47c12c commit c52e115
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 3 deletions.
2 changes: 1 addition & 1 deletion japan_avg_hotel_price_finder/scrape.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ def scroll_down_until_page_bottom(driver: WebDriver) -> None:
logger.debug(f'{current_height = }')

# Scroll down to the bottom
driver.execute_script("window.scrollBy(0, 8000);")
driver.execute_script("window.scrollBy(0, 6000);")

# Get current height
new_height = driver.execute_script("return window.scrollY")
Expand Down
7 changes: 5 additions & 2 deletions japan_avg_hotel_price_finder/thread_scrape.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

import pandas as pd
from loguru import logger
from selenium.common import InvalidSessionIdException
from selenium.common import InvalidSessionIdException, NoSuchWindowException

from japan_avg_hotel_price_finder.scrape_until_month_end import MonthEndBasicScraper
from japan_avg_hotel_price_finder.utils import check_if_current_date_has_passed
Expand Down Expand Up @@ -88,7 +88,10 @@ def scrape_each_date(day: int) -> None:
future.result()
except InvalidSessionIdException as e:
logger.error(e)
logger.error('Tried to run command without establishing a connection')
logger.error('Tried to run command without establishing a connection.')
except NoSuchWindowException as e:
logger.error(e)
logger.error('The browser window was closed already.')

# Concatenate all DataFrames in the 'results' list into a single DataFrame
df = pd.concat(results, ignore_index=True)
Expand Down

0 comments on commit c52e115

Please sign in to comment.