calculate-times-auto-2024-zcta-40-weighted #812
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
--- | |
name: calculate-times | |
run-name: calculate-times-${{ inputs.mode }}-${{ inputs.year }}-${{ inputs.geography }}-${{ inputs.state }}-${{ inputs.centroid_type }} | |
on: | |
workflow_dispatch: | |
inputs: | |
# Input values match those in params.yaml | |
mode: | |
required: true | |
description: Mode of travel | |
default: 'auto' | |
type: choice | |
options: | |
- auto | |
- bicycle | |
- pedestrian | |
year: | |
required: true | |
description: Census/OSM data year | |
default: '2020' | |
type: choice | |
options: | |
- '2020' | |
- '2021' | |
- '2022' | |
- '2023' | |
- '2024' | |
geography: | |
required: true | |
description: Census data geography | |
default: county | |
type: choice | |
options: | |
- state | |
- county | |
- county_subdivision | |
- tract | |
- block_group | |
- zcta | |
state: | |
required: true | |
description: Target Census state | |
default: '01' | |
type: choice | |
options: | |
- '01' | |
- '02' | |
- '04' | |
- '05' | |
- '06' | |
- '08' | |
- '09' | |
- '10' | |
- '11' | |
- '12' | |
- '13' | |
- '15' | |
- '16' | |
- '17' | |
- '18' | |
- '19' | |
- '20' | |
- '21' | |
- '22' | |
- '23' | |
- '24' | |
- '25' | |
- '26' | |
- '27' | |
- '28' | |
- '29' | |
- '30' | |
- '31' | |
- '32' | |
- '33' | |
- '34' | |
- '35' | |
- '36' | |
- '37' | |
- '38' | |
- '39' | |
- '40' | |
- '41' | |
- '42' | |
- '44' | |
- '45' | |
- '46' | |
- '47' | |
- '48' | |
- '49' | |
- '50' | |
- '51' | |
- '53' | |
- '54' | |
- '55' | |
- '56' | |
centroid_type: | |
required: true | |
description: Whether or not to use population-weighted locations | |
default: weighted | |
type: choice | |
options: | |
- weighted | |
- unweighted | |
override_chunks: | |
required: false | |
description: | | |
Comma-separated chunks to run e.g. 0-5_000-100,6-11_000-100. | |
Will run all chunks if null | |
type: string | |
env: | |
AWS_DEFAULT_REGION: us-east-1 | |
# See: https://github.com/aws/aws-cli/issues/5262#issuecomment-705832151 | |
AWS_EC2_METADATA_DISABLED: true | |
PYTHONUNBUFFERED: "1" | |
jobs: | |
# Using the location data, split the origins into N jobs (max 256) | |
setup-jobs: | |
runs-on: ubuntu-24.04 | |
outputs: | |
chunks: ${{ steps.create-job-chunks.outputs.chunks }} | |
steps: | |
- name: Checkout | |
uses: actions/checkout@v4 | |
- name: Setup Cloudflare credentials | |
uses: ./.github/actions/setup-cloudflare-s3 | |
with: | |
CLOUDFLARE_S3_API_ACCESS_KEY_ID: ${{ secrets.CLOUDFLARE_S3_API_ACCESS_KEY_ID }} | |
CLOUDFLARE_S3_API_SECRET_ACCESS_KEY: ${{ secrets.CLOUDFLARE_S3_API_SECRET_ACCESS_KEY }} | |
- name: Fetch GitHub user and group ID | |
shell: bash | |
id: fetch-ids | |
run: | | |
echo "USER_ID=$(id -u)" >> $GITHUB_ENV | |
echo "GROUP_ID=$(id -g)" >> $GITHUB_ENV | |
- name: Build Dockerized dependencies | |
uses: ./.github/actions/build-docker | |
with: | |
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
- name: Load Docker image | |
run: | | |
docker load --input /tmp/opentimes.tar | |
docker image ls -a | |
- name: Fetch locations data | |
uses: ./.github/actions/fetch-locations | |
- name: Install uv | |
uses: astral-sh/setup-uv@v5 | |
with: | |
enable-cache: true | |
cache-suffix: "site-data" | |
cache-dependency-glob: | | |
pyproject.toml | |
uv.lock | |
- name: Install Python dependencies | |
id: install-python-dependencies | |
shell: bash | |
run: | | |
uv python install | |
uv venv | |
uv pip install ".[site,data]" | |
- name: Create job chunks | |
id: create-job-chunks | |
working-directory: 'data' | |
shell: bash | |
run: | | |
export USER_ID=${{ env.USER_ID }} | |
export GROUP_ID=${{ env.GROUP_ID }} | |
uv run ./src/split_chunks.py \ | |
--year ${{ inputs.year }} --geography ${{ inputs.geography }} \ | |
--state ${{ inputs.state }} > chunks.txt | |
echo "chunks=$(cat chunks.txt)" >> $GITHUB_OUTPUT | |
# If override chunks are set, use those instead | |
chunks_parsed=($(cat chunks.txt | jq -r '.[]')) | |
if [ -n "${{ inputs.override_chunks }}" ]; then | |
override_chunks_parsed=($(echo "${{ inputs.override_chunks }}" | tr -d ' ' | tr ',' ' ')) | |
for chunk in "${override_chunks_parsed[@]}"; do | |
if [[ ! " ${chunks_parsed[@]} " =~ " ${chunk} " ]]; then | |
echo "Error: Override chunk ${chunk} is not in the chunks for this origin" | |
echo "Chunks include: ${chunks_parsed[@]}" | |
exit 1 | |
fi | |
done | |
chunks_json=$(printf '%s\n' "${override_chunks_parsed[@]}" | jq -c -R . | jq -c -s .) | |
echo "Creating jobs for chunks: ${override_chunks_parsed[@]}" | |
echo "chunks=$chunks_json" > $GITHUB_OUTPUT | |
else | |
echo "Creating jobs for chunks: ${chunks_parsed[@]}" | |
fi | |
run-job: | |
runs-on: ubuntu-24.04 | |
needs: setup-jobs | |
strategy: | |
# Don't fail all chunks if one fails | |
fail-fast: false | |
matrix: | |
chunk: ${{ fromJSON(needs.setup-jobs.outputs.chunks) }} | |
steps: | |
- name: Checkout | |
uses: actions/checkout@v4 | |
- name: Setup Cloudflare credentials | |
uses: ./.github/actions/setup-cloudflare-s3 | |
with: | |
CLOUDFLARE_S3_API_ACCESS_KEY_ID: ${{ secrets.CLOUDFLARE_S3_API_ACCESS_KEY_ID }} | |
CLOUDFLARE_S3_API_SECRET_ACCESS_KEY: ${{ secrets.CLOUDFLARE_S3_API_SECRET_ACCESS_KEY }} | |
- name: Fetch locations data | |
uses: ./.github/actions/fetch-locations | |
# Don't fetch tile data in setup-jobs because they're very large and | |
# will churn the Actions cache. We want to wait to fetch it until jobs | |
# have actually been picked up | |
- name: Fetch Valhalla tile data | |
uses: ./.github/actions/fetch-valhalla-tiles | |
with: | |
year: ${{ inputs.year }} | |
state: ${{ inputs.state }} | |
- name: Fetch GitHub user and group ID | |
shell: bash | |
id: fetch-ids | |
run: | | |
echo "USER_ID=$(id -u)" >> $GITHUB_ENV | |
echo "GROUP_ID=$(id -g)" >> $GITHUB_ENV | |
- name: Fetch Docker image | |
uses: actions/download-artifact@v4 | |
with: | |
name: opentimes-docker-${{ hashFiles('./data/Dockerfile', './pyproject.toml') }} | |
path: /tmp | |
- name: Load Docker image | |
run: | | |
docker load --input /tmp/opentimes.tar | |
docker image ls -a | |
- name: Install uv | |
uses: astral-sh/setup-uv@v5 | |
with: | |
enable-cache: true | |
cache-suffix: "site-data" | |
cache-dependency-glob: | | |
pyproject.toml | |
uv.lock | |
- name: Install Python dependencies | |
id: install-python-dependencies | |
shell: bash | |
run: | | |
uv python install | |
uv venv | |
uv pip install ".[site,data]" | |
- name: Extract tiles | |
shell: bash | |
working-directory: 'data' | |
run: | | |
tile_path="year=${{ inputs.year }}/geography=state/state=${{ inputs.state }}" | |
ln ./intermediate/valhalla_tiles/"$tile_path"/valhalla_tiles.tar.zst ./build/ | |
tar -xf ./build/valhalla_tiles.tar.zst -C ./build | |
rm -f ./build/valhalla_tiles.tar.zst | |
# In rare cases the runner gets killed due to OoM errors. This bumps swap | |
# to 90% of the space remaining on disk | |
- name: Increase swapfile | |
run: | | |
space_left=$(df /dev/root -B 1 --output=avail | grep -v Avail) | |
space_mult=0.9 | |
space_alloc=$(echo "${space_left}*${space_mult}" | bc) | |
space_alloc_rnd=$(printf %.0f $(echo ${space_alloc})) | |
sudo swapoff -a | |
sudo fallocate -l ${space_alloc_rnd} /swapfile | |
sudo chmod 600 /swapfile | |
sudo mkswap /swapfile | |
sudo swapon /swapfile | |
sudo swapon --show | |
- name: Run job chunk | |
shell: bash | |
working-directory: 'data' | |
run: | | |
export USER_ID=${{ env.USER_ID }} | |
export GROUP_ID=${{ env.GROUP_ID }} | |
docker compose up --quiet-pull valhalla-run-fp valhalla-run-sp -d | |
uv run ./src/calculate_times.py \ | |
--mode ${{ inputs.mode }} --year ${{ inputs.year }} \ | |
--geography ${{ inputs.geography }} --state ${{ inputs.state }} \ | |
--centroid-type ${{ inputs.centroid_type }} \ | |
--chunk ${{ matrix.chunk }} --write-to-s3 | |
# Clear the cache if we're one of the last running jobs. Using this instead | |
# of a separate workflow step because the steps often come last in the job | |
# queue and then won't run when many workflows are queued at the same time | |
- name: Clear workflow cache | |
if: always() | |
continue-on-error: true | |
shell: bash | |
run: | | |
endpoint='repos/${{ github.repository }}/actions/runs/${{ github.run_id }}/jobs --paginate -q' | |
total_jobs=$(gh api $endpoint '.total_count') | |
complete_jobs=$(gh api $endpoint '.jobs[] | select(.status == "completed")' | wc -l) | |
in_progress_jobs=$(gh api $endpoint '.jobs[] | select(.status == "in_progress")' | wc -l) | |
n_remaining=$((total_jobs - complete_jobs)) | |
all_statuses=$((complete_jobs + in_progress_jobs)) | |
echo "Total number of jobs: $total_jobs" | |
echo "Number of jobs complete: $total_jobs" | |
echo "Number of jobs remaining: $n_remaining" | |
echo "Number of jobs run/running: $all_statuses" | |
if [ "$n_remaining" -lt 2 ] && [ "$total_jobs" -eq "$all_statuses" ]; then | |
echo "Less than 5 jobs still running. Clearing workflow cache!" | |
gh cache delete \ | |
valhalla-tiles-${{ inputs.year }}-${{ inputs.state }}-${{ hashFiles('./data/dvc.lock') }} || true | |
fi | |
env: | |
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} |