From decd5aa6d4d1adbbfa86ef4f28d4b0a945ea9e9c Mon Sep 17 00:00:00 2001 From: Sakan Date: Tue, 9 Jul 2024 00:27:35 +0700 Subject: [PATCH] v5.4.0 - Use async when doing requests - Added more tests --- check_missing_dates.py | 37 +++++++++++-------- .../migrate_to_sqlite.py | 2 + japan_avg_hotel_price_finder/utils.py | 15 ++++---- tests/test_utils/test_utils.py | 5 ++- 4 files changed, 35 insertions(+), 24 deletions(-) diff --git a/check_missing_dates.py b/check_missing_dates.py index c3099f0..8bea212 100644 --- a/check_missing_dates.py +++ b/check_missing_dates.py @@ -1,4 +1,5 @@ import argparse +import asyncio from japan_avg_hotel_price_finder.configure_logging import configure_logging_with_file from japan_avg_hotel_price_finder.utils import check_in_db_if_all_date_was_scraped, \ @@ -6,18 +7,24 @@ logger = configure_logging_with_file('jp_hotel_data.log', 'jp_hotel_data') -parser = argparse.ArgumentParser(description='Parser that control which kind of missing dates checkers to use.') -parser.add_argument('--check_db', type=str, default=False, help='Check missing dates in database') -parser.add_argument('--check_csv', type=str, default=False, help='Check missing dates in CSV file directory') - -args = parser.parse_args() - -if args.check_db: - db = args.check_db - check_in_db_if_all_date_was_scraped(db=db, to_sqlite=True) -elif args.check_csv: - directory = args.check_csv - directory = str(directory) - check_in_csv_dir_if_all_date_was_scraped(directory) -else: - logger.warning('Please use --check_db or --check_csv') \ No newline at end of file + +async def check_missing_dates_main(): + parser = argparse.ArgumentParser(description='Parser that control which kind of missing dates checkers to use.') + parser.add_argument('--check_db', type=str, default=False, help='Check missing dates in database') + parser.add_argument('--check_csv', type=str, default=False, help='Check missing dates in CSV file directory') + + args = parser.parse_args() + + if args.check_db: + db = args.check_db + await check_in_db_if_all_date_was_scraped(db=db, to_sqlite=True) + elif args.check_csv: + directory = args.check_csv + directory = str(directory) + await check_in_csv_dir_if_all_date_was_scraped(directory) + else: + logger.warning('Please use --check_db or --check_csv') + + +if __name__ == '__main__': + asyncio.run(check_missing_dates_main()) diff --git a/japan_avg_hotel_price_finder/migrate_to_sqlite.py b/japan_avg_hotel_price_finder/migrate_to_sqlite.py index 3294084..283e786 100644 --- a/japan_avg_hotel_price_finder/migrate_to_sqlite.py +++ b/japan_avg_hotel_price_finder/migrate_to_sqlite.py @@ -54,6 +54,8 @@ def migrate_data_to_sqlite(df_filtered: pd.DataFrame, details: Details) -> None: # Save the DataFrame to a table named 'HotelPrice' df_filtered.to_sql('HotelPrice', con=con, if_exists='append', index=False, dtype=hotel_price_dtype) + con.commit() + logger.info(f'Data has been saved to {db}') create_average_room_price_by_date_view(db) diff --git a/japan_avg_hotel_price_finder/utils.py b/japan_avg_hotel_price_finder/utils.py index e1619a0..70bd168 100644 --- a/japan_avg_hotel_price_finder/utils.py +++ b/japan_avg_hotel_price_finder/utils.py @@ -93,7 +93,7 @@ def find_missing_dates_in_db(sqlite_db: str) -> list: return missing_dates -def check_in_db_if_all_date_was_scraped(db: str, to_sqlite: bool = False) -> None: +async def check_in_db_if_all_date_was_scraped(db: str, to_sqlite: bool = False) -> None: """ Check inside the SQLite database if all dates of each month were scraped today. :param db: Path to the SQLite database. @@ -103,10 +103,10 @@ def check_in_db_if_all_date_was_scraped(db: str, to_sqlite: bool = False) -> Non """ logger.info(f"Checking in the SQLite database '{db}' if any date was not scraped today...") missing_dates = find_missing_dates_in_db(db) - scrape_missing_dates(db=db, missing_dates=missing_dates, to_sqlite=to_sqlite) + await scrape_missing_dates(db=db, missing_dates=missing_dates, to_sqlite=to_sqlite) -def check_in_csv_dir_if_all_date_was_scraped(directory: str = 'scraped_hotel_data_csv') -> None: +async def check_in_csv_dir_if_all_date_was_scraped(directory: str = 'scraped_hotel_data_csv') -> None: """ Check inside the CSV files directory if all dates of each month were scraped today. :param directory: Path to the CSV files directory. @@ -125,7 +125,7 @@ def check_in_csv_dir_if_all_date_was_scraped(directory: str = 'scraped_hotel_dat with sqlite3.connect(temp_db) as con: df.to_sql('HotelPrice', con, if_exists='replace', index=False) - check_in_db_if_all_date_was_scraped(temp_db) + await check_in_db_if_all_date_was_scraped(temp_db) else: logger.warning("No CSV files were found") except FileNotFoundError as e: @@ -145,7 +145,7 @@ def check_in_csv_dir_if_all_date_was_scraped(directory: str = 'scraped_hotel_dat logger.info("Truncate the HotelPrice table in the temporary database.") with sqlite3.connect(temp_db) as con: con.execute("DELETE FROM HotelPrice") - logger.warning("Please delete the temporary database manually after the web-scraping process finishes.") + logger.warning("Please delete the temporary database manually.") def get_count_of_date_by_mth_asof_today_query(): @@ -259,7 +259,7 @@ def find_dates_of_the_month_in_db(db: str, days_in_month, month, year) -> tuple: return dates_in_db, end_date, start_date -def scrape_missing_dates(db: str = None, missing_dates: list = None, to_sqlite: bool = False): +async def scrape_missing_dates(db: str = None, missing_dates: list = None, to_sqlite: bool = False): """ Scrape missing dates with BasicScraper. :param db: SQLite database path. @@ -268,9 +268,10 @@ def scrape_missing_dates(db: str = None, missing_dates: list = None, to_sqlite: Set to False as default. :return: None """ + logger.info("Scraping missing dates...") if missing_dates: for date in missing_dates: - scrape_with_basic_scraper(db, date, to_sqlite) + await scrape_with_basic_scraper(db, date, to_sqlite) else: logger.warning(f"Missing dates is None. No missing dates to scrape.") diff --git a/tests/test_utils/test_utils.py b/tests/test_utils/test_utils.py index 3f68b7a..a684db1 100644 --- a/tests/test_utils/test_utils.py +++ b/tests/test_utils/test_utils.py @@ -7,7 +7,8 @@ scrape_missing_dates -def test_scrape_missing_dates() -> None: +@pytest.mark.asyncio +async def test_scrape_missing_dates() -> None: db = 'test_scrape_missing_dates.db' today = datetime.datetime.today() @@ -26,7 +27,7 @@ def test_scrape_missing_dates() -> None: second_missing_date = f'{year}-{month_str}-11' third_missing_date = f'{year}-{month_str}-20' missing_dates = [first_missing_date, second_missing_date, third_missing_date] - scrape_missing_dates(db=db, missing_dates=missing_dates, to_sqlite=True) + await scrape_missing_dates(db=db, missing_dates=missing_dates, to_sqlite=True) with sqlite3.connect(db) as con: query = get_count_of_date_by_mth_asof_today_query()