Skip to content

Fetch and ingest

Fetch and ingest #2097

name: Fetch and ingest
on:
schedule:
# Note times are in UTC, which is 1 or 2 hours behind CET depending on daylight savings.
#
# Note the actual runs might be late.
# Numerous people were confused, about that, including me:
# - https://github.community/t/scheduled-action-running-consistently-late/138025/11
# - https://github.com/github/docs/issues/3059
#
# Note, '*' is a special character in YAML, so you have to quote this string.
#
# Docs:
# - https://docs.github.com/en/actions/learn-github-actions/events-that-trigger-workflows#schedule
#
# Tool that deciphers this particular format of crontab string:
# - https://crontab.guru/
#
# Runs at 5pm UTC (1pm EDT/10am PDT) since curation by NCBI happens on the East Coast.
# We were running into invalid zip archive errors at 9am PDT, so hoping an hour
# delay will lower the error frequency
- cron: '0 17 * * *'
repository_dispatch:
types:
- fetch-and-ingest
# Manually triggered using GitHub's UI
workflow_dispatch:
inputs:
image:
description: 'Specific container image to use for build (will override the default of "nextstrain build")'
required: false
jobs:
fetch-and-ingest:
permissions:
id-token: write
uses: nextstrain/.github/.github/workflows/pathogen-repo-build.yaml@master
secrets: inherit
with:
runtime: aws-batch
env: |
NEXTSTRAIN_DOCKER_IMAGE: ${{ inputs.image }}
GITHUB_RUN_ID: ${{ github.run_id }}
SLACK_CHANNELS: ${{ vars.SLACK_CHANNELS }}
run: |
nextstrain build \
--detach \
--no-download \
--cpus 32 \
--memory 64gib \
--env GITHUB_RUN_ID \
--env SLACK_TOKEN \
--env SLACK_CHANNELS \
--env PAT_GITHUB_DISPATCH="$GH_TOKEN_NEXTSTRAIN_BOT_WORKFLOW_DISPATCH" \
ingest \
nextstrain_automation \
--configfiles build-configs/nextstrain-automation/config.yaml \