-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathautomated_scraper.py
46 lines (35 loc) · 1.79 KB
/
automated_scraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
import argparse
import asyncio
import calendar
import os
from japan_avg_hotel_price_finder.configure_logging import main_logger
from japan_avg_hotel_price_finder.whole_mth_graphql_scraper import WholeMonthGraphQLScraper
# Initialize argument parser
parser = argparse.ArgumentParser(description='Parser that control which kind of scraper to use.')
parser.add_argument('--month', type=int, help='Month to scrape data for (1-12)')
parser.add_argument('--japan', type=bool, default=False, help='Whether to scrape hotels from all city in Japan')
args = parser.parse_args()
class AutomatedScraper(WholeMonthGraphQLScraper):
async def main(self):
df = await self.scrape_whole_month()
month_name = calendar.month_name[self.month]
path = 'scraped_hotel_data_csv'
try:
os.makedirs(path, exist_ok=True)
except OSError as e:
main_logger.error(f"Error creating directory '{path}': {e}")
raise OSError
csv_file_name = f'{self.city}_hotel_data_{month_name}_{self.year}.csv'
csv_file_path = os.path.join(path, csv_file_name)
df.to_csv(csv_file_path, index=False)
if __name__ == '__main__':
if not args.month:
main_logger.warning('Please specify month to scrape data with --month argument')
else:
year = 2025
scraper = AutomatedScraper(year=year, month=args.month, start_day=1, check_in='',
check_out='', group_adults=1, group_children=0, num_rooms=1, nights=1,
selected_currency='USD', sqlite_name='', scrape_only_hotel=True,
country='Japan', city='Osaka')
main_logger.info(f'Setting month to scrape to {args.month} for {scraper.__class__.__name__}...')
asyncio.run(scraper.main())