Skip to content

Scrape

Scrape #695

Workflow file for this run

name: Scrape
on:
workflow_dispatch:
schedule:
- cron: '0 0,12 * * *'
permissions:
contents: write
jobs:
scrape:
name: Scrape
runs-on: ubuntu-latest
steps:
- id: checkout
name: Checkout
uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: '3.11'
cache: 'pipenv'
- name: Install pipenv
run: curl https://raw.githubusercontent.com/pypa/pipenv/master/get-pipenv.py | python
shell: bash
- id: install-python-dependencies
name: Install Python dependencies
run: pipenv sync
shell: bash
- name: Run scrape command
run: pipenv run python -m src.download --verbose
shell: bash
# - name: Run transform command
# run: pipenv run python -m src.transform --verbose
# shell: bash
# - name: Run rss command
# run: pipenv run python -m src.rss --verbose
# shell: bash
# - name: Run Mastodon command
# run: pipenv run python -m src.toot
# shell: bash
# env:
# MASTODON_CLIENT_KEY: ${{ secrets.MASTODON_CLIENT_KEY }}
# MASTODON_CLIENT_SECRET: ${{ secrets.MASTODON_CLIENT_SECRET }}
# MASTODON_ACCESS_TOKEN: ${{ secrets.MASTODON_ACCESS_TOKEN }}
- name: Datestamp
run: date > data/timestamp.txt
shell: bash
- name: Commit results
run: |
git config --global user.name "github-actions[bot]"
git config --global user.email "actions@github.com"
git config pull.rebase false
git status
git pull origin $GITHUB_REF
git add ./data
git commit -m "Scrape" --author="palewire <palewire@users.noreply.github.com>" && git push || true
shell: bash
- name: Upload artifact
uses: actions/upload-pages-artifact@v3
with:
path: data/clean
deploy:
name: Deploy
runs-on: ubuntu-latest
needs: scrape
permissions:
pages: write
id-token: write
environment:
name: github-pages
url: ${{ steps.deployment.outputs.page_url }}
steps:
- id: deployment
name: Deploy data to GitHub Pages
uses: actions/deploy-pages@v4