Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions .github/mergify.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,28 @@ pull_request_rules:
- -files~=^\.github/(actions|workflows)/.*\.ya?ml$
- -files~=^\.github/workflows/actionlint\.

# medium e2e workflow
- or:
- and:
# note this should match the triggering criteria in 'e2e-nvidia-l4-x1.yml'
- check-success~=e2e-medium-workflow-complete
- or:
- files~=\.py$
- files=pyproject.toml
- files=tox.ini
- files=requirements.txt
- files=requirements-dev.txt
- files=constraints-dev.txt
- files~=^\.github/workflows/.*\.yml$ # This workflow
- and:
- files~=\.py$
- files=pyproject.toml
- files=tox.ini
- files=requirements.txt
- files=requirements-dev.txt
- files=constraints-dev.txt
- files~=^\.github/workflows/.*\.yml$ # This workflow

# code lint workflow
- or:
- and:
Expand Down
32 changes: 28 additions & 4 deletions .github/workflows/e2e-nvidia-l40s-x4-sdk.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,10 @@ name: E2E (NVIDIA L40S x4) SDK Test

on:
# only run on PRs that touch certain regex paths
pull_request:
pull_request_target:
branches:
- main
- release-*
paths:
# note this should match the merging criteria in 'mergify.yml'
- "**.py"
Expand All @@ -15,20 +16,29 @@ on:
- "requirements.txt"
- "requirements-dev.txt"
- "constraints-dev.txt"
- ".github/workflows/e2e-nvidia-l40s-x4-sdk.yaml" # This workflow
- ".github/workflows/*.yml" # This workflow
workflow_dispatch:
inputs:
pr_or_branch:
description: 'pull request number or branch name'
required: true
default: 'main'

concurrency:
group: ${{ github.workflow }}-${{ github.event.number || github.ref }}
cancel-in-progress: true

env:
TMPDIR: /home/tmp

defaults:
run:
shell: bash


permissions:
contents: read

jobs:
start-large-ec2-runner:
runs-on: ubuntu-latest
Expand Down Expand Up @@ -97,8 +107,11 @@ jobs:
- start-large-ec2-runner
runs-on: ${{ needs.start-large-ec2-runner.outputs.label }}

permissions:
pull-requests: write

# It is important that this job has no write permissions and has
# no access to any secrets. This part (e2e-medium-test) is where we are running
# untrusted code from PRs.
permissions: {}

steps:
- name: Install Packages
Expand Down Expand Up @@ -308,3 +321,14 @@ jobs:
run: |
echo "::warning::Failed to upload Phase 2 loss graph to S3. This won't block the workflow, but you may want to investigate."
echo "Loss graph upload failed" >> "${GITHUB_STEP_SUMMARY}"

e2e-medium-workflow-complete:
# we don't want to block PRs on failed EC2 cleanup
# so not requiring "stop-medium-ec2-runner" as well
permissions:
checks: read
uses: ./.github/workflows/status-checks.yml
with:
job_ids: >- # Space-separated job ids to wait on for status checks
start-large-ec2-runner
e2e-medium-test
60 changes: 60 additions & 0 deletions .github/workflows/status-checks.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
# SPDX-License-Identifier: Apache-2.0
# yamllint disable rule:line-length

name: Status Checks Reusable Workflow

on:
workflow_call:
inputs:
job_ids:
description: 'Space-separated job ids to wait on for status checks'
required: true
type: string
delay:
description: 'Period in seconds to wait before first poll of GitHub Check Runs'
required: false
type: number
default: 10
interval:
description: 'Interval or period in seconds between polling GitHub Check Runs'
required: false
type: number
default: 10
timeout:
description: 'Timeout in seconds to complete polling GitHub Check Runs'
required: false
type: number
default: 3600

env:
LC_ALL: en_US.UTF-8

defaults:
run:
shell: bash

permissions:
checks: read

jobs:
status-checks:
runs-on: ubuntu-latest
steps:
- name: "Harden Runner"
uses: step-security/harden-runner@0634a2670c59f64b4a01f0f96f84700a4088b9f0 # v2.12.0
with:
egress-policy: audit # TODO: change to 'egress-policy: block' after couple of runs

- name: "Set status check variables"
id: set_variables
run: |
jq -nr '[$ARGS.positional[] | split("\\s"; null) | map(select(. != ""))] | flatten | join("|") | ("match_pattern=(" + . + ")")' --args "${{ inputs.job_ids }}" >> "$GITHUB_OUTPUT"

- name: "Wait for status checks"
uses: poseidon/wait-for-status-checks@899c768d191b56eef585c18f8558da19e1f3e707 # v0.6.0
with:
token: ${{ secrets.GITHUB_TOKEN }}
match_pattern: ${{ steps.set_variables.outputs.match_pattern }}
delay: ${{ inputs.delay }}
interval: ${{ inputs.interval }}
timeout: ${{ inputs.timeout }}
Loading