Skip to content

Scrape

Scrape #196

Workflow file for this run

name: Scrape
on:
schedule:
- cron: "0 0 * * *"
workflow_dispatch:
env:
ACTIONS_ALLOW_UNSECURE_COMMANDS: true
USER_AGENT: ${{ secrets.USER_AGENT }}
X_BOOKING_CONTEXT_ACTION_NAME: ${{ secrets.X_BOOKING_CONTEXT_ACTION_NAME }}
X_BOOKING_CONTEXT_AID: ${{ secrets.X_BOOKING_CONTEXT_AID }}
X_BOOKING_CSRF_TOKEN: ${{ secrets.X_BOOKING_CSRF_TOKEN }}
X_BOOKING_ET_SERIALIZED_STATE: ${{ secrets.X_BOOKING_ET_SERIALIZED_STATE }}
X_BOOKING_PAGEVIEW_ID: ${{ secrets.X_BOOKING_PAGEVIEW_ID }}
X_BOOKING_SITE_TYPE_ID: ${{ secrets.X_BOOKING_SITE_TYPE_ID }}
X_BOOKING_TOPIC: ${{ secrets.X_BOOKING_TOPIC }}
jobs:
scrape-latest:
runs-on: windows-latest
steps:
- name: Checkout repo
uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.12'
- name: Install requirements
run: pip install -r requirements.txt
# - name: Run Scraper For January
# run: python automated_scraper.py --month=1
#
# - name: Run Scraper For February
# run: python automated_scraper.py --month=2
# - name: Run Scraper For March
# run: python automated_scraper.py --month=3
# - name: Run Scraper For April
# run: python automated_scraper.py --month=4
# - name: Run Scraper For May
# run: python automated_scraper.py --month=5
# - name: Run Scraper For June
# run: python automated_scraper.py --month=6
# - name: Run Scraper For July
# run: python automated_scraper.py --month=7
# - name: Run Scraper For August
# run: python automated_scraper.py --month=8
- name: Run Scraper For September
run: python automated_scraper.py --month=9
- name: Run Scraper For October
run: python automated_scraper.py --month=10
- name: Run Scraper For November
run: python automated_scraper.py --month=11
- name: Run Scraper For December
run: python automated_scraper.py --month=12
- id: 'auth'
uses: 'google-github-actions/auth@v2'
with:
credentials_json: ${{ secrets.GCP_CREDENTIALS }}
- id: 'upload-file'
uses: 'google-github-actions/upload-cloud-storage@v2'
with:
path: './'
destination: 'weekly_osaka_hotel_data'
glob: 'scraped_hotel_data_csv/*.csv'