This repository has been archived by the owner on May 6, 2024. It is now read-only.
Run scrapers #85963
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Run scrapers | |
on: | |
schedule: | |
- cron: '*/20 * * * *' # run every 20min from 05:00-21:00 UTC | |
workflow_dispatch: ~ | |
jobs: | |
run_scraper: | |
runs-on: ubuntu-20.04 | |
continue-on-error: false | |
timeout-minutes: 10 | |
strategy: | |
fail-fast: false | |
matrix: | |
canton: | |
#- AG | |
#- AI # currently no data available | |
- AR | |
- BE | |
- BL | |
- BS | |
#- FR | |
- GE | |
- GL | |
#- GR | |
- JU | |
- LU | |
- NE | |
- NW | |
#- OW | |
- SG | |
- SH | |
#- SO | |
#- SZ | |
- TG | |
#- TI | |
#- UR # no more data available | |
- VD | |
- VS | |
- ZG | |
- ZH | |
- FL | |
steps: | |
- uses: actions/checkout@v3 | |
- name: Set up Python 3.7 | |
uses: actions/setup-python@v4 | |
with: | |
python-version: 3.7 | |
- run: npm ci | |
- name: Remove broken apt repos | |
run: | | |
for apt_file in `grep -lr microsoft /etc/apt/sources.list.d/`; do sudo rm $apt_file; done | |
- name: Install dependencies | |
env: | |
SCRAPER_KEY: ${{ matrix.canton }} | |
run: | | |
python -m pip install --upgrade pip | |
pip install -r requirements.txt | |
sudo apt update || true # do not fail if update does not work | |
sudo apt-get install sqlite3 | |
sudo apt-get install poppler-utils | |
if [ "$SCRAPER_KEY" = "GE" ] ; then | |
sudo apt-get install chromium-browser | |
fi | |
- name: Scrape new data | |
env: | |
SCRAPER_KEY: ${{ matrix.canton }} | |
SCRAPER_OVERWRITE: ${{ matrix.overwrite }} | |
run: | | |
./scrapers/run_scraper.sh | |
- name: Check if there are changes in the repo | |
run: | | |
if git diff -w --no-ext-diff --quiet | |
then | |
echo "changed=0" >> $GITHUB_OUTPUT | |
else | |
echo "changed=1" >> $GITHUB_OUTPUT | |
fi | |
id: changes | |
- name: Set commit message | |
env: | |
SCRAPER_KEY: ${{ matrix.canton }} | |
run: | | |
if [ "$SCRAPER_KEY" = "FL" ] ; then | |
echo "commit_msg=Update COVID19_Fallzahlen_${SCRAPER_KEY}_total.csv from scraper" >> $GITHUB_ENV | |
else | |
echo "commit_msg=Update COVID19_Fallzahlen_Kanton_${SCRAPER_KEY}_total.csv from scraper" >> $GITHUB_ENV | |
fi | |
- name: Sleep randomly # sleep here to prevent two workers to commit at the same time | |
if: steps.changes.outputs.changed == 1 # only sleep, if we will try to commit | |
run: sleep $[ ( $RANDOM % 30 ) + 1 ]s | |
- name: Commit and push to repo | |
if: steps.changes.outputs.changed == 1 # only try to commit if there are actually changes | |
uses: github-actions-x/commit@v2.9 | |
with: | |
github-token: ${{ secrets.GITHUB_TOKEN }} | |
push-branch: master | |
name: GitHub Action Scraper | |
email: scraper@open.zh.ch | |
commit-message: ${{ env.commit_msg }} | |
rebase: 'true' | |
- name: Validate scraper output | |
continue-on-error: true | |
env: | |
SCRAPER_KEY: ${{ matrix.canton }} | |
run: | | |
echo "validate_status=failed" >> $GITHUB_ENV | |
./scrapers/validate_scraper_output.sh | |
echo "validate_status=success" >> $GITHUB_ENV | |
- name: Get current unix timestamp | |
if: always() | |
id: date | |
run: echo "ts=$(date +'%s')" >> $GITHUB_OUTPUT | |
- name: Notify slack validation error | |
if: ${{ env.validate_status == 'failed' }} | |
env: | |
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }} | |
uses: pullreminders/slack-action@master | |
with: | |
args: '{\"channel\":\"C013C0UUQ4S\", \"attachments\": [{\"fallback\": \"Job failed.\", \"color\": \"warning\", \"title\": \"CSV validation for ${{ matrix.canton }} failed\", \"title_link\": \"https://github.com/openZH/covid_19/actions/runs/${{ github.run_id }}?check_suite_focus=true\", \"text\": \":x: CSV validation after scraping failed\", \"footer\": \"<https://github.com/openZH/covid_19|openZH/covid_19>\", \"footer_icon\": \"https://github.com/abinoda/slack-action/raw/master/docs/app-icons/github-icon.png\", \"ts\": \"${{steps.date.outputs.ts}}\"}]}' | |
- name: Notify slack failure | |
if: ${{ failure() || cancelled() }} | |
env: | |
SLACK_BOT_TOKEN: ${{ secrets.SLACK_BOT_TOKEN }} | |
uses: pullreminders/slack-action@master | |
with: | |
args: '{\"channel\":\"C013C0UUQ4S\", \"attachments\": [{\"fallback\": \"Job failed.\", \"color\": \"danger\", \"title\": \"Run Scrapers ${{ matrix.canton }}\", \"title_link\": \"https://github.com/openZH/covid_19/actions/runs/${{ github.run_id }}?check_suite_focus=true\", \"text\": \":x: Scraper failed\", \"footer\": \"<https://github.com/openZH/covid_19|openZH/covid_19>\", \"footer_icon\": \"https://github.com/abinoda/slack-action/raw/master/docs/app-icons/github-icon.png\", \"ts\": \"${{steps.date.outputs.ts}}\"}]}' | |
merge_csvs: | |
needs: run_scraper | |
if: always() | |
runs-on: ubuntu-20.04 | |
timeout-minutes: 10 | |
steps: | |
- uses: actions/checkout@v3 | |
- name: Setup Ruby | |
uses: ruby/setup-ruby@v1 | |
with: | |
ruby-version: '2.6' | |
- name: Print header line and then merge all files without header line | |
run: ruby scripts/merge_canton_csvs.rb > COVID19_Fallzahlen_CH_total_v2.csv | |
# Create v2 files | |
- name: Create v1 files based on new structure | |
run: | | |
./scripts/transform_all_new2old.sh | |
./scripts/new2oldcsv.py COVID19_Fallzahlen_CH_total_v2.csv > COVID19_Fallzahlen_CH_total.csv | |
- name: Check if there are changes in the repo | |
run: | | |
if git diff -w --no-ext-diff --quiet | |
then | |
echo "changed=0" >> $GITHUB_OUTPUT | |
else | |
echo "changed=1" >> $GITHUB_OUTPUT | |
fi | |
id: changes | |
# Commit to repo with updated file | |
- name: Commit and push to repo | |
if: steps.changes.outputs.changed == 1 # only try to commit if there are actually changes | |
uses: github-actions-x/commit@v2.9 | |
with: | |
github-token: ${{ secrets.GITHUB_TOKEN }} | |
push-branch: master | |
name: GitHub Action Scraper | |
email: scraper@open.zh.ch | |
commit-message: Update COVID19_Fallzahlen_CH_total.csv | |
rebase: 'true' | |
update_readme: | |
needs: merge_csvs | |
if: always() | |
runs-on: ubuntu-20.04 | |
timeout-minutes: 10 | |
steps: | |
- uses: actions/checkout@v3 | |
- name: Update README with latest update dates from cantons | |
run: ./scripts/update_dates_in_readme.sh | |
- name: Check if there are changes in the repo | |
run: | | |
if git diff -w --no-ext-diff --quiet | |
then | |
echo "changed=0" >> $GITHUB_OUTPUT | |
else | |
echo "changed=1" >> $GITHUB_OUTPUT | |
fi | |
id: changes | |
# Commit to repo with updated file | |
- name: Commit and push to repo | |
if: steps.changes.outputs.changed == 1 # only try to commit if there are actually changes | |
uses: github-actions-x/commit@v2.9 | |
with: | |
github-token: ${{ secrets.GITHUB_TOKEN }} | |
push-branch: master | |
name: GitHub Action Scraper | |
email: scraper@open.zh.ch | |
commit-message: Update dates in README | |
rebase: 'true' |