From bcee4b6c7155dd20670912676efc7274a5dfb0e9 Mon Sep 17 00:00:00 2001
From: Sakan <sakanbeer@gmail.com>
Date: Mon, 17 Jun 2024 17:57:04 +0700
Subject: [PATCH] v4.0.0 - Added --workers to adjust pool thread workers for
 Thread Pool Scraper - Adjusted default pool thread workers to 5 - Use Firefox
 instead of Chrome - Adjusted wait time for clicking pop-up ad and load more
 result button while scrolling down to 0.1 seconds - Adjust WebDriver Wait
 poll frequency to 0 - Adjusted scroll-down length to 2,000 pixels - Use
 logging instead of loguru - Adjusted scripts to use the same logger for all
 scrapers - Added driver wait for clicking 'load more result' button - Fixed
 'check_if_current_date_has_passed' function bug for Month End Scraper - Added
 more tests - Added logic to check in case the past year is entered for Thread
 Pool and Month End scraper - Added timezone parameter for Thread Pool and
 Month End scrapers so that they check the past date based on the entered
 timezone, mostly for fixing timezone problems when testing using GitHub
 Action - Added timezone parameter for 'check_if_current_date_has_passed'
 mostly for fixing timezone problems when testing using GitHub Action -
 Adjusted log message - Added ElementClickInterceptedException handler when
 clicking pop-up ad and the load more result button - Added
 NoSuchWindowException handler while scrolling down the browser window - Added
 finally block to ensure that the driver is closed - Handle case when HTML
 content is None. - Add CSS selector list for clicking load more result button
 - Adjusted save data process - REmoved Month End scraper

---
 japan_avg_hotel_price_finder/scrape.py | 10 +++++-----
 japan_avg_hotel_price_finder/utils.py  |  4 ++--
 tests/test_utils.py                    |  6 ++++--
 3 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/japan_avg_hotel_price_finder/scrape.py b/japan_avg_hotel_price_finder/scrape.py
index 7983435..b7d1410 100644
--- a/japan_avg_hotel_price_finder/scrape.py
+++ b/japan_avg_hotel_price_finder/scrape.py
@@ -337,12 +337,12 @@ def start_scraping_process(self, check_in: str, check_out: str) -> tuple[DataFra
 
         if self.num_load_more_result_clicked_list < 1:
             logger.warning("Load more result button is never clicked. "
-                           "The CSS selector for the load more result button might have a problem."
+                           "The CSS selector for the load more result button might have a problem. "
                            "Please update the CSS selector in '_click_load_more_result_button' function.")
-        if self.num_pop_up_clicked_list < 1:
-            logger.warning("Pop-up ad is never clicked. "
-                           "The CSS selector for the pop-up ad might have a problem."
-                           "Please update the CSS selector of the pop-up ad in '_click_pop_up_ad' function.")
+            if self.num_pop_up_clicked_list < 1:
+                logger.warning("Pop-up ad is never clicked. "
+                               "The CSS selector for the pop-up ad might have a problem. "
+                               "Please update the CSS selector of the pop-up ad in '_click_pop_up_ad' function.")
 
         logger.info('Return scraped data as a Pandas DataFrame')
         return df_filtered, city, check_in, check_out
diff --git a/japan_avg_hotel_price_finder/utils.py b/japan_avg_hotel_price_finder/utils.py
index 36fa3fe..85afe29 100644
--- a/japan_avg_hotel_price_finder/utils.py
+++ b/japan_avg_hotel_price_finder/utils.py
@@ -97,12 +97,12 @@ def check_db_if_all_date_was_scraped(db: str) -> None:
     scrape_missing_dates(db=db, missing_dates=missing_dates, to_sqlite=True)
 
 
-def check_csv_if_all_date_was_scraped() -> None:
+def check_csv_if_all_date_was_scraped(directory) -> None:
     """
     Check inside the CSV files directory if all dates of each month were scraped today.
+    :param directory: Path to the CSV files directory.
     :returns: None
     """
-    directory = 'scraped_hotel_data_csv'
     logger.info(f"Checking CSV files in the {directory} directory if all date was scraped today...")
     temp_db = 'temp_db.db'
     try:
diff --git a/tests/test_utils.py b/tests/test_utils.py
index 897aafe..5abca9c 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -102,11 +102,13 @@ def test_check_if_all_date_was_scraped_csv() -> None:
         start_day=start_day, month=month, year=year, nights=nights, sqlite_name=sqlite_name
     )
 
+    directory = 'test_check_if_all_date_was_scraped_csv'
+
     thread_scrape = ThreadPoolScraper(hotel_stay)
     df, city, check_in, check_out = thread_scrape.thread_scrape(timezone=city_timezone, max_workers=5)
     save_scraped_data(dataframe=df, city=city, check_in=check_in,
-                      check_out=check_out, save_dir='test_check_if_all_date_was_scraped_csv')
-    check_csv_if_all_date_was_scraped()
+                      check_out=check_out, save_dir=directory)
+    check_csv_if_all_date_was_scraped(directory)
 
     with sqlite3.connect(sqlite_name) as conn:
         directory = 'test_check_if_all_date_was_scraped_csv'