diff --git a/japan_avg_hotel_price_finder/utils.py b/japan_avg_hotel_price_finder/utils.py index 85afe29..35cf896 100644 --- a/japan_avg_hotel_price_finder/utils.py +++ b/japan_avg_hotel_price_finder/utils.py @@ -86,21 +86,23 @@ def find_missing_dates_in_db(sqlite_db: str) -> list: return missing_dates -def check_db_if_all_date_was_scraped(db: str) -> None: +def check_in_db_if_all_date_was_scraped(db: str, to_sqlite: bool = False) -> None: """ Check inside the SQLite database if all dates of each month were scraped today. :param db: Path to the SQLite database. + :param to_sqlite: If True, load the data to the SQLite database, else save to CSV. :returns: None """ logger.info(f"Checking in the SQLite database '{db}' if any date was not scraped today...") missing_dates = find_missing_dates_in_db(db) - scrape_missing_dates(db=db, missing_dates=missing_dates, to_sqlite=True) + scrape_missing_dates(db=db, missing_dates=missing_dates, to_sqlite=to_sqlite) -def check_csv_if_all_date_was_scraped(directory) -> None: +def check_in_csv_dir_if_all_date_was_scraped(directory: str = 'scraped_hotel_data_csv') -> None: """ Check inside the CSV files directory if all dates of each month were scraped today. :param directory: Path to the CSV files directory. + Default is 'scraped_hotel_data_csv' folder. :returns: None """ logger.info(f"Checking CSV files in the {directory} directory if all date was scraped today...") @@ -115,15 +117,15 @@ def check_csv_if_all_date_was_scraped(directory) -> None: with sqlite3.connect(temp_db) as con: df.to_sql('HotelPrice', con, if_exists='replace', index=False) - missing_dates = find_missing_dates_in_db(temp_db) - scrape_missing_dates(missing_dates=missing_dates) + check_in_db_if_all_date_was_scraped(temp_db) else: logger.warning("No CSV files were found") except FileNotFoundError as e: logger.error(e) logger.error(f"{directory} folder not found.") except Exception as e: - logger.error(f"An unexpected error occurred: {e}") + logger.error(e) + logger.error(f"An unexpected error occurred") if os.path.exists(temp_db): try: @@ -308,6 +310,8 @@ def save_scraped_data( save_dir='scraped_hotel_data_csv') -> None: """ Save scraped data to CSV or SQLite database. + The CSV files directory is created automatically if it doesn't exist. + The default CSV files directory name is depended on the default value of 'save_dir' parameter. :param dataframe: Pandas DataFrame. :param details_dataclass: Details dataclass object. Only needed if saving to SQLite database. diff --git a/main.py b/main.py index c6e681e..4015b6c 100644 --- a/main.py +++ b/main.py @@ -17,8 +17,8 @@ from japan_avg_hotel_price_finder.configure_logging import configure_logging_with_file from japan_avg_hotel_price_finder.scrape import BasicScraper from japan_avg_hotel_price_finder.thread_scrape import ThreadPoolScraper -from japan_avg_hotel_price_finder.utils import check_csv_if_all_date_was_scraped, check_db_if_all_date_was_scraped, \ - save_scraped_data +from japan_avg_hotel_price_finder.utils import check_in_db_if_all_date_was_scraped, \ + save_scraped_data, check_in_csv_dir_if_all_date_was_scraped from set_details import Details logger = configure_logging_with_file('jp_hotel_data.log', 'jp_hotel_data') @@ -53,23 +53,23 @@ data_tuple = thread_scrape.thread_scrape(max_workers=workers) df = data_tuple[0] save_scraped_data(dataframe=df, details_dataclass=details, to_sqlite=to_sqlite) - check_db_if_all_date_was_scraped(details.sqlite_name) + check_in_db_if_all_date_was_scraped(details.sqlite_name, to_sqlite=to_sqlite) else: df, city, check_in, check_out = thread_scrape.thread_scrape(max_workers=workers) save_scraped_data(dataframe=df, city=city, check_in=check_in, check_out=check_out) - check_csv_if_all_date_was_scraped() + check_in_csv_dir_if_all_date_was_scraped() else: if to_sqlite: data_tuple = thread_scrape.thread_scrape() df = data_tuple[0] save_scraped_data(dataframe=df, details_dataclass=details, to_sqlite=to_sqlite) - check_db_if_all_date_was_scraped(details.sqlite_name) + check_in_db_if_all_date_was_scraped(details.sqlite_name, to_sqlite=to_sqlite) else: df, city, check_in, check_out = thread_scrape.thread_scrape() save_scraped_data(dataframe=df, city=city, check_in=check_in, check_out=check_out) - check_csv_if_all_date_was_scraped() + check_in_csv_dir_if_all_date_was_scraped() elif args.scraper: logger.info('Using basic scraper') check_in = details.check_in diff --git a/tests/test_utils.py b/tests/test_utils.py index 5abca9c..ccad651 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -7,8 +7,8 @@ from japan_avg_hotel_price_finder.thread_scrape import ThreadPoolScraper from japan_avg_hotel_price_finder.utils import check_if_current_date_has_passed, find_missing_dates, find_csv_files, \ - convert_csv_to_df, get_count_of_date_by_mth_asof_today_query, check_csv_if_all_date_was_scraped, \ - check_db_if_all_date_was_scraped, save_scraped_data + convert_csv_to_df, get_count_of_date_by_mth_asof_today_query, \ + check_in_db_if_all_date_was_scraped, save_scraped_data, check_in_csv_dir_if_all_date_was_scraped from set_details import Details @@ -108,7 +108,7 @@ def test_check_if_all_date_was_scraped_csv() -> None: df, city, check_in, check_out = thread_scrape.thread_scrape(timezone=city_timezone, max_workers=5) save_scraped_data(dataframe=df, city=city, check_in=check_in, check_out=check_out, save_dir=directory) - check_csv_if_all_date_was_scraped(directory) + check_in_csv_dir_if_all_date_was_scraped(directory) with sqlite3.connect(sqlite_name) as conn: directory = 'test_check_if_all_date_was_scraped_csv' @@ -160,7 +160,7 @@ def test_check_if_all_date_was_scraped() -> None: data_tuple = thread_scrape.thread_scrape(timezone=city_timezone, max_workers=5) df = data_tuple[0] save_scraped_data(dataframe=df, details_dataclass=hotel_stay, to_sqlite=True) - check_db_if_all_date_was_scraped(hotel_stay.sqlite_name) + check_in_db_if_all_date_was_scraped(hotel_stay.sqlite_name) with sqlite3.connect(sqlite_name) as conn: query = get_count_of_date_by_mth_asof_today_query()