-
Notifications
You must be signed in to change notification settings - Fork 0
56 lines (46 loc) · 1.4 KB
/
scrape.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
name: Daily update to Google Sheets
on:
push:
branches: [master]
pull_request:
branches: [master]
schedule:
- cron: "0 0 * * *"
jobs:
adjust-column-widths:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: "3.12"
cache: "pip"
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt pandas google-auth google-auth-oauthlib google-auth-httplib2 google-api-python-client
- name: Install Playwright browsers
run: playwright install --with-deps chromium firefox webkit
- name: Run scraping
env:
PYTHONUNBUFFERED: 1
run: |
chmod +x ./pipeline/scrape.sh
./pipeline/scrape.sh
- name: Upload to Google Sheets
env:
GCP_JSON: ${{ secrets.GCP_JSON }}
GOOGLE_SHEETS_ID: ${{ secrets.GOOGLE_SHEETS_ID }}
PYTHONUNBUFFERED: 1
run: python pipeline/upload_to_sheets.py
- name: Adjust Column Widths
env:
GCP_JSON: ${{ secrets.GCP_JSON }}
GOOGLE_SHEETS_ID: ${{ secrets.GOOGLE_SHEETS_ID }}
PYTHONUNBUFFERED: 1
run: python pipeline/adjust_column_widths.py
- name: Cleanup
if: always()
run: |
rm -rf output