Skip to content

Commit

Permalink
Switch from Python bindings to client model for better parallelism (#7)
Browse files Browse the repository at this point in the history
* Test parallel execution

* Disable stdout sink

* Test using client/server model for Valhalla

* Add working client/server parallelism

* Pass error message to ValueError

* Update GHA with new server/client config

* Add missing uv subcommand

* Add uv venvs

* Replace DOCKER_INTERNAL_PATH

* Remove subshell

* Increase chunk sizes

* Limit threads to CPU count

* Tweak settings and batch size

* Bump batch size

* Cleanup log language
  • Loading branch information
dfsnow authored Dec 22, 2024
1 parent 8e7086e commit 7b0b59b
Show file tree
Hide file tree
Showing 19 changed files with 213 additions and 237 deletions.
2 changes: 1 addition & 1 deletion .github/actions/fetch-locations/action.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ runs:
shell: bash
working-directory: 'data'
run: |
dvc pull --no-run-cache create_destpoint_by_state \
uv run dvc pull --no-run-cache create_destpoint_by_state \
create_cenloc_national create_cenloc_by_state
- name: Cache save location input data
Expand Down
2 changes: 1 addition & 1 deletion .github/actions/fetch-valhalla-tiles/action.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ runs:
working-directory: 'data'
run: |
tile_path=year=${{ inputs.year }}/geography=state/state=${{ inputs.state }}
dvc pull --no-run-cache \
uv run dvc pull --no-run-cache \
./intermediate/valhalla_tiles/"$tile_path"/valhalla_tiles.tar.zst
- name: Cache save tile input data
Expand Down
14 changes: 5 additions & 9 deletions .github/actions/setup-dvc/action.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,19 +7,15 @@ runs:
using: composite
steps:
- name: Install uv
uses: astral-sh/setup-uv@v3
uses: astral-sh/setup-uv@v5
with:
enable-cache: true
cache-suffix: "dvc"

- name: Install Python
uses: actions/setup-python@v5
with:
python-version-file: .python-version

- name: Install DVC
id: install_python_deps
shell: bash
run: uv pip install "dvc[s3]"
env:
UV_SYSTEM_PYTHON: 1
run: |
uv python install
uv venv
uv pip install "dvc[s3]"
47 changes: 40 additions & 7 deletions .github/workflows/calculate-times.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,6 @@ env:
# See: https://github.com/aws/aws-cli/issues/5262#issuecomment-705832151
AWS_EC2_METADATA_DISABLED: true
PYTHONUNBUFFERED: "1"
UV_SYSTEM_PYTHON: 1

jobs:
# Using the location data, split the origins into N jobs (max 256)
Expand Down Expand Up @@ -156,20 +155,37 @@ jobs:
docker load --input /tmp/opentimes.tar
docker image ls -a
- name: Install uv
uses: astral-sh/setup-uv@v5
with:
enable-cache: true
cache-suffix: "site-data"
cache-dependency-glob: |
pyproject.toml
uv.lock
- name: Install Python dependencies
id: install-python-dependencies
shell: bash
run: |
uv python install
uv venv
uv pip install ".[site,data]"
- name: Fetch locations data
uses: ./.github/actions/fetch-locations

- name: Create job chunks
id: create-job-chunks
working-directory: 'data'
shell: bash
run: |
export USER_ID=${{ env.USER_ID }}
export GROUP_ID=${{ env.GROUP_ID }}
chunks=$(docker compose run --rm --quiet-pull \
--entrypoint=python valhalla-run /data/src/split_chunks.py \
uv run python ./src/split_chunks.py \
--year ${{ inputs.year }} --geography ${{ inputs.geography }} \
--state ${{ inputs.state }})
echo "chunks=$chunks" >> $GITHUB_OUTPUT
--state ${{ inputs.state }} > chunks.txt
echo "chunks=$(cat chunks.txt)" >> $GITHUB_OUTPUT
# If override chunks are set, use those instead
chunks_parsed=($(echo "$chunks" | jq -r '.[]'))
Expand Down Expand Up @@ -238,6 +254,23 @@ jobs:
docker load --input /tmp/opentimes.tar
docker image ls -a
- name: Install uv
uses: astral-sh/setup-uv@v5
with:
enable-cache: true
cache-suffix: "site-data"
cache-dependency-glob: |
pyproject.toml
uv.lock
- name: Install Python dependencies
id: install-python-dependencies
shell: bash
run: |
uv python install
uv venv
uv pip install ".[site,data]"
- name: Extract tiles
shell: bash
working-directory: 'data'
Expand All @@ -253,8 +286,8 @@ jobs:
run: |
export USER_ID=${{ env.USER_ID }}
export GROUP_ID=${{ env.GROUP_ID }}
docker compose run --rm --quiet-pull --entrypoint=python \
valhalla-run /data/src/calculate_times.py \
docker compose up --quiet-pull valhalla-run-fp valhalla-run-sp -d
uv run python ./src/calculate_times.py \
--mode ${{ inputs.mode }} --year ${{ inputs.year }} \
--geography ${{ inputs.geography }} --state ${{ inputs.state }} \
--centroid-type ${{ inputs.centroid_type }} \
Expand Down
14 changes: 5 additions & 9 deletions .github/workflows/create-public-files.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,6 @@ env:
# See: https://github.com/aws/aws-cli/issues/5262#issuecomment-705832151
AWS_EC2_METADATA_DISABLED: true
PYTHONUNBUFFERED: "1"
UV_SYSTEM_PYTHON: 1

jobs:
setup-jobs:
Expand Down Expand Up @@ -79,23 +78,20 @@ jobs:
uses: actions/checkout@v4

- name: Install uv
uses: astral-sh/setup-uv@v3
uses: astral-sh/setup-uv@v5
with:
enable-cache: true
cache-suffix: "site-data"
cache-dependency-glob: |
pyproject.toml
uv.lock
- name: Install Python
uses: actions/setup-python@v5
with:
python-version-file: .python-version

- name: Install Python dependencies
id: install-python-dependencies
shell: bash
run: uv pip install ".[site]" ".[data]"
run: |
uv python install
uv pip install ".[site,data]"
- name: Setup Cloudflare credentials
uses: ./.github/actions/setup-cloudflare-s3
Expand All @@ -110,7 +106,7 @@ jobs:
run: |
datasets_parsed=($(echo "${{ inputs.dataset }}" | tr -d ' ' | tr ',' ' '))
for dataset in "${datasets_parsed[@]}"; do
python ./src/create_public_files.py \
uv run python ./src/create_public_files.py \
--dataset "$dataset" --version ${{ inputs.version }} \
--mode ${{ matrix.mode }} --year ${{ matrix.year }} \
--geography ${{ matrix.geography }}
Expand Down
5 changes: 2 additions & 3 deletions .github/workflows/create-valhalla-tiles.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@ env:
# See: https://github.com/aws/aws-cli/issues/5262#issuecomment-705832151
AWS_EC2_METADATA_DISABLED: true
PYTHONUNBUFFERED: "1"
UV_SYSTEM_PYTHON: 1

jobs:
setup-jobs:
Expand Down Expand Up @@ -110,7 +109,7 @@ jobs:
shell: bash
working-directory: 'data'
run: |
dvc pull --no-run-cache \
uv run dvc pull --no-run-cache \
./intermediate/osmextract/year=${{ inputs.year }}/geography=state/state=${{ matrix.state }}/${{ matrix.state }}.osm.pbf
- name: Run job chunk
Expand All @@ -120,7 +119,7 @@ jobs:
# Disable elevation for Alaska (which requires 22GB of tiles)
BUILD_ELEVATION: ${{ matrix.state == '02' && 'False' || 'True' }}
run: |
dvc repro -s create_valhalla_tiles@${{ inputs.year }}-${{ matrix.state }}
uv dvc repro -s create_valhalla_tiles@${{ inputs.year }}-${{ matrix.state }}
- name: Write tile files to S3
shell: bash
Expand Down
15 changes: 6 additions & 9 deletions .github/workflows/pre-commit.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ name: pre-commit

env:
PYTHONUNBUFFERED: "1"
UV_SYSTEM_PYTHON: 1

jobs:
pre-commit:
Expand All @@ -17,19 +16,17 @@ jobs:
uses: actions/checkout@v4

- name: Install uv
uses: astral-sh/setup-uv@v3
uses: astral-sh/setup-uv@v5
with:
enable-cache: true
cache-suffix: "pre-commit"

- name: Install Python
uses: actions/setup-python@v5
with:
python-version-file: .python-version

- name: Install pre-commit
shell: bash
run: uv pip install pre-commit
run: |
uv python install
uv venv
uv pip install pre-commit
- name: Cache pre-commit environment
uses: actions/cache@v4
Expand All @@ -39,4 +36,4 @@ jobs:

- name: Run pre-commit
shell: bash
run: pre-commit run --show-diff-on-failure --color=always --all-files
run: uv run pre-commit run --show-diff-on-failure --color=always --all-files
11 changes: 1 addition & 10 deletions .github/workflows/pypi-publish.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ name: pypi-publish

env:
PYTHONUNBUFFERED: "1"
UV_SYSTEM_PYTHON: 1

jobs:
pypi-publish:
Expand All @@ -23,15 +22,7 @@ jobs:
uses: actions/checkout@v4

- name: Install uv
uses: astral-sh/setup-uv@v3

- name: Install Python
uses: actions/setup-python@v5
with:
python-version-file: .python-version

- name: Install Python dependencies
run: uv pip install .
uses: astral-sh/setup-uv@v5

- name: Build Python dist
run: uv build
Expand Down
15 changes: 6 additions & 9 deletions .github/workflows/update-data-site.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@ env:
# See: https://github.com/aws/aws-cli/issues/5262#issuecomment-705832151
AWS_EC2_METADATA_DISABLED: true
PYTHONUNBUFFERED: "1"
UV_SYSTEM_PYTHON: 1

jobs:
update-site:
Expand All @@ -25,23 +24,21 @@ jobs:
uses: actions/checkout@v4

- name: Install uv
uses: astral-sh/setup-uv@v3
uses: astral-sh/setup-uv@v5
with:
enable-cache: true
cache-suffix: "site-data"
cache-dependency-glob: |
pyproject.toml
uv.lock
- name: Install Python
uses: actions/setup-python@v5
with:
python-version-file: .python-version

- name: Install Python dependencies
id: install-python-dependencies
shell: bash
run: uv pip install ".[site]" ".[data]"
run: |
uv python install
uv venv
uv pip install ".[site,data]"
- name: Setup Cloudflare credentials
uses: ./.github/actions/setup-cloudflare-s3
Expand All @@ -57,7 +54,7 @@ jobs:
echo "::add-mask::${{ secrets.CLOUDFLARE_CACHE_API_TOKEN }}"
echo "::add-mask::${{ secrets.CLOUDFLARE_CACHE_ZONE_ID }}"
bucket=$(yq e '.s3.public_bucket' params.yaml)
python ./src/create_public_site.py
uv run python ./src/create_public_site.py
env:
CLOUDFLARE_CACHE_API_TOKEN: ${{ secrets.CLOUDFLARE_CACHE_API_TOKEN }}
CLOUDFLARE_CACHE_ZONE_ID: ${{ secrets.CLOUDFLARE_CACHE_ZONE_ID }}
20 changes: 5 additions & 15 deletions data/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,17 +1,7 @@
FROM ghcr.io/nilsnolde/docker-valhalla/valhalla:latest

SHELL ["/bin/bash", "-o", "pipefail", "-c"]
WORKDIR /custom_files
USER root

# Install Python package dependencies
RUN apt-get update && \
apt-get install -y --no-install-recommends \
osmium-tool=1.16.0-1build1 libzstd1=1.5.5+dfsg2-2build1.1 \
libudunits2-dev=2.2.28-7build1 libproj-dev=9.4.0-1build2 \
gdal-bin=3.8.4+dfsg-3ubuntu3 geos-bin=3.12.1-3build1 && \
rm -rf /var/lib/apt/lists/*

# Create a new valhalla user with correct ids
# https://jtreminio.com/blog/running-docker-containers-as-current-host-user
ARG USER_ID=59999
Expand All @@ -21,10 +11,10 @@ RUN userdel -f valhalla && \
if getent group ${GROUP_ID}; then groupdel $(getent group ${GROUP_ID} | cut -d: -f1); fi && \
groupadd -g ${GROUP_ID} valhalla && \
useradd -l -u ${USER_ID} -g valhalla valhalla && \
install -d -m 0755 -o valhalla -g valhalla /home/valhalla
install -d -m 0755 -o valhalla -g valhalla /home/valhalla && \
rm -rf /custom_files && mkdir -p /custom_files && \
chmod 0775 /custom_files && \
chown valhalla:valhalla /custom_files

# Install Python dependencies for opentimes work inside docker
COPY --from=ghcr.io/astral-sh/uv:0.5.0 /uv /uvx /bin/
COPY pyproject.toml uv.lock ./
RUN uv pip install --no-cache-dir --system --break-system-packages .[data]
WORKDIR /custom_files
USER valhalla
6 changes: 3 additions & 3 deletions data/params.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,14 @@ actions:

# The minimum number of origins to include in a job. Higher = fewer jobs
# that take longer. Lower = more jobs that finish quicker
origin_min_chunk_size: 500
origin_min_chunk_size: 900

# The max number of destination splits to create for a workflow
destination_n_chunks: 4

# The minimum number of destinations included in each job. For reference,
# most states have around 10K Census tract destinations
destination_min_chunk_size: 10000
destination_min_chunk_size: 20000

times:
# Travel times output version. Follows SemVer (kind of):
Expand All @@ -46,7 +46,7 @@ times:

# Maximum size of chunk of origins AND destinations to process in a single call to
# Valhalla. This is necessary because larger matrices will cause it to choke
max_split_size: 250
max_split_size: 150

# Coordinates are snapped to the OSM street network before time calculation.
# Setting this to true will use the snapped coordinates directly in the
Expand Down
Loading

0 comments on commit 7b0b59b

Please sign in to comment.