Fix XGLM loss computation (PyTorch and TensorFlow) #753
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: PR comment GitHub CI | |
on: | |
issue_comment: | |
types: | |
- created | |
branches-ignore: | |
- main | |
concurrency: | |
group: ${{ github.workflow }}-${{ github.event.issue.number }}-${{ startsWith(github.event.comment.body, 'run-slow') || startsWith(github.event.comment.body, 'run slow') || startsWith(github.event.comment.body, 'run_slow') }} | |
cancel-in-progress: true | |
permissions: read-all | |
env: | |
HF_HOME: /mnt/cache | |
TRANSFORMERS_IS_CI: yes | |
OMP_NUM_THREADS: 8 | |
MKL_NUM_THREADS: 8 | |
RUN_SLOW: yes | |
# For gated repositories, we still need to agree to share information on the Hub repo. page in order to get access. | |
# This token is created under the bot `hf-transformers-bot`. | |
HF_HUB_READ_TOKEN: ${{ secrets.HF_HUB_READ_TOKEN }} | |
SIGOPT_API_TOKEN: ${{ secrets.SIGOPT_API_TOKEN }} | |
TF_FORCE_GPU_ALLOW_GROWTH: true | |
RUN_PT_TF_CROSS_TESTS: 1 | |
CUDA_VISIBLE_DEVICES: 0,1 | |
jobs: | |
get-pr-number: | |
runs-on: ubuntu-22.04 | |
name: Get PR number | |
# For security: only allow team members to run | |
if: ${{ github.event.issue.state == 'open' && contains(fromJSON('["ydshieh", "ArthurZucker", "zucchini-nlp", "qubvel", "molbap", "gante", "LysandreJik", "Cyrilvallez", "Rocketknight1"]'), github.actor) && (startsWith(github.event.comment.body, 'run-slow') || startsWith(github.event.comment.body, 'run slow') || startsWith(github.event.comment.body, 'run_slow')) }} | |
outputs: | |
PR_NUMBER: ${{ steps.set_pr_number.outputs.PR_NUMBER }} | |
steps: | |
- name: Get PR number | |
shell: bash | |
run: | | |
if [[ "${{ github.event.issue.number }}" != "" && "${{ github.event.issue.pull_request }}" != "" ]]; then | |
echo "PR_NUMBER=${{ github.event.issue.number }}" >> $GITHUB_ENV | |
else | |
echo "PR_NUMBER=" >> $GITHUB_ENV | |
fi | |
- name: Check PR number | |
shell: bash | |
run: | | |
echo "${{ env.PR_NUMBER }}" | |
- name: Set PR number | |
id: set_pr_number | |
run: echo "PR_NUMBER=${{ env.PR_NUMBER }}" >> "$GITHUB_OUTPUT" | |
get-sha: | |
runs-on: ubuntu-22.04 | |
needs: get-pr-number | |
if: ${{ needs.get-pr-number.outputs.PR_NUMBER != ''}} | |
outputs: | |
PR_HEAD_SHA: ${{ steps.get_sha.outputs.PR_HEAD_SHA }} | |
PR_MERGE_SHA: ${{ steps.get_sha.outputs.PR_MERGE_SHA }} | |
steps: | |
- uses: actions/checkout@v4 | |
with: | |
fetch-depth: "0" | |
ref: "refs/pull/${{needs.get-pr-number.outputs.PR_NUMBER}}/merge" | |
- name: Get SHA (and verify timestamps against the issue comment date) | |
id: get_sha | |
env: | |
PR_NUMBER: ${{ needs.get-pr-number.outputs.PR_NUMBER }} | |
COMMENT_DATE: ${{ github.event.comment.created_at }} | |
run: | | |
git fetch origin refs/pull/$PR_NUMBER/head:refs/remotes/pull/$PR_NUMBER/head | |
git checkout refs/remotes/pull/$PR_NUMBER/head | |
echo "PR_HEAD_SHA: $(git log -1 --format=%H)" | |
echo "PR_HEAD_SHA=$(git log -1 --format=%H)" >> "$GITHUB_OUTPUT" | |
git fetch origin refs/pull/$PR_NUMBER/merge:refs/remotes/pull/$PR_NUMBER/merge | |
git checkout refs/remotes/pull/$PR_NUMBER/merge | |
echo "PR_MERGE_SHA: $(git log -1 --format=%H)" | |
echo "PR_MERGE_SHA=$(git log -1 --format=%H)" >> "$GITHUB_OUTPUT" | |
PR_MERGE_COMMIT_TIMESTAMP=$(git log -1 --date=unix --format=%cd) | |
echo "PR_MERGE_COMMIT_TIMESTAMP: $PR_MERGE_COMMIT_TIMESTAMP" | |
COMMENT_TIMESTAMP=$(date -d "${COMMENT_DATE}" +"%s") | |
echo "COMMENT_DATE: $COMMENT_DATE" | |
echo "COMMENT_TIMESTAMP: $COMMENT_TIMESTAMP" | |
if [ $COMMENT_TIMESTAMP -le $PR_MERGE_COMMIT_TIMESTAMP ]; then | |
echo "Last commit on the pull request is newer than the issue comment triggering this run! Abort!"; | |
exit -1; | |
fi | |
# use a python script to handle this complex logic | |
# case 1: `run-slow` (auto. infer with limited number of models, but in particular, new model) | |
# case 2: `run-slow model_1, model_2` | |
get-tests: | |
runs-on: ubuntu-22.04 | |
needs: [get-pr-number, get-sha] | |
if: ${{ needs.get-pr-number.outputs.PR_NUMBER != ''}} | |
outputs: | |
models: ${{ steps.models_to_run.outputs.models }} | |
steps: | |
- uses: actions/checkout@v4 | |
with: | |
fetch-depth: "0" | |
ref: "refs/pull/${{needs.get-pr-number.outputs.PR_NUMBER}}/merge" | |
- name: Verify merge commit SHA | |
env: | |
VERIFIED_PR_MERGE_SHA: ${{ needs.get-sha.outputs.PR_MERGE_SHA }} | |
run: | | |
PR_MERGE_SHA=$(git log -1 --format=%H) | |
if [ $PR_MERGE_SHA != $VERIFIED_PR_MERGE_SHA ]; then | |
echo "The merged commit SHA is not the same as the verified one! Security issue detected, abort the workflow!"; | |
exit -1; | |
fi | |
- name: Get models to test | |
env: | |
PR_COMMENT: ${{ github.event.comment.body }} | |
run: | | |
python -m pip install GitPython | |
python utils/pr_slow_ci_models.py --message "$PR_COMMENT" | tee output.txt | |
echo "models=$(tail -n 1 output.txt)" >> $GITHUB_ENV | |
- name: Show models to test | |
id: models_to_run | |
run: | | |
echo "${{ env.models }}" | |
echo "models=${{ env.models }}" >> $GITHUB_ENV | |
echo "models=${{ env.models }}" >> $GITHUB_OUTPUT | |
reply_to_comment: | |
name: Reply to the comment | |
if: ${{ needs.get-tests.outputs.models != '[]' }} | |
needs: [get-pr-number, get-tests] | |
permissions: | |
pull-requests: write | |
runs-on: ubuntu-22.04 | |
steps: | |
- name: Reply to the comment | |
env: | |
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
MODELS: ${{ needs.get-tests.outputs.models }} | |
run: | | |
gh api \ | |
--method POST \ | |
-H "Accept: application/vnd.github+json" \ | |
-H "X-GitHub-Api-Version: 2022-11-28" \ | |
repos/${{ github.repository }}/issues/${{ needs.get-pr-number.outputs.PR_NUMBER }}/comments \ | |
-f "body=This comment contains run-slow, running the specified jobs: ${{ env.MODELS }} ..." | |
create_run: | |
name: Create run | |
if: ${{ needs.get-tests.outputs.models != '[]' }} | |
needs: [get-sha, get-tests, reply_to_comment] | |
permissions: | |
statuses: write | |
runs-on: ubuntu-22.04 | |
steps: | |
- name: Create Run | |
id: create_run | |
env: | |
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
# Create a commit status (pending) for a run of this workflow. The status has to be updated later in `update_run_status`. | |
# See https://docs.github.com/en/rest/commits/statuses?apiVersion=2022-11-28#create-a-commit-status | |
GITHUB_RUN_URL: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }} | |
run: | | |
gh api \ | |
--method POST \ | |
-H "Accept: application/vnd.github+json" \ | |
-H "X-GitHub-Api-Version: 2022-11-28" \ | |
repos/${{ github.repository }}/statuses/${{ needs.get-sha.outputs.PR_HEAD_SHA }} \ | |
-f "target_url=$GITHUB_RUN_URL" -f "state=pending" -f "description=Slow CI job" -f "context=pytest/custom-tests" | |
run_models_gpu: | |
name: Run all tests for the model | |
if: ${{ needs.get-tests.outputs.models != '[]' }} | |
needs: [get-pr-number, get-sha, get-tests, create_run] | |
strategy: | |
fail-fast: false | |
matrix: | |
folders: ${{ fromJson(needs.get-tests.outputs.models) }} | |
machine_type: [aws-g4dn-2xlarge-cache, aws-g4dn-12xlarge-cache] | |
runs-on: | |
group: '${{ matrix.machine_type }}' | |
container: | |
image: huggingface/transformers-all-latest-gpu | |
options: --gpus all --shm-size "16gb" --ipc host -v /mnt/cache/.cache/huggingface:/mnt/cache/ | |
steps: | |
- name: Echo input and matrix info | |
shell: bash | |
run: | | |
echo "${{ matrix.folders }}" | |
- name: Echo folder ${{ matrix.folders }} | |
shell: bash | |
# For folders like `models/bert`, set an env. var. (`matrix_folders`) to `models_bert`, which will be used to | |
# set the artifact folder names (because the character `/` is not allowed). | |
run: | | |
echo "${{ matrix.folders }}" | |
matrix_folders=${{ matrix.folders }} | |
matrix_folders=${matrix_folders/'models/'/'models_'} | |
echo "$matrix_folders" | |
echo "matrix_folders=$matrix_folders" >> $GITHUB_ENV | |
- name: Checkout to PR merge commit | |
working-directory: /transformers | |
run: | | |
git fetch origin refs/pull/${{ needs.get-pr-number.outputs.PR_NUMBER }}/merge:refs/remotes/pull/${{ needs.get-pr-number.outputs.PR_NUMBER }}/merge | |
git checkout refs/remotes/pull/${{ needs.get-pr-number.outputs.PR_NUMBER }}/merge | |
git log -1 --format=%H | |
- name: Verify merge commit SHA | |
env: | |
VERIFIED_PR_MERGE_SHA: ${{ needs.get-sha.outputs.PR_MERGE_SHA }} | |
working-directory: /transformers | |
run: | | |
PR_MERGE_SHA=$(git log -1 --format=%H) | |
if [ $PR_MERGE_SHA != $VERIFIED_PR_MERGE_SHA ]; then | |
echo "The merged commit SHA is not the same as the verified one! Security issue detected, abort the workflow!"; | |
exit -1; | |
fi | |
- name: Reinstall transformers in edit mode (remove the one installed during docker image build) | |
working-directory: /transformers | |
run: python3 -m pip uninstall -y transformers && python3 -m pip install -e . | |
- name: NVIDIA-SMI | |
run: | | |
nvidia-smi | |
- name: Set `machine_type` for report and artifact names | |
working-directory: /transformers | |
shell: bash | |
run: | | |
echo "${{ matrix.machine_type }}" | |
if [ "${{ matrix.machine_type }}" = "aws-g4dn-2xlarge-cache" ]; then | |
machine_type=single-gpu | |
elif [ "${{ matrix.machine_type }}" = "aws-g4dn-12xlarge-cache" ]; then | |
machine_type=multi-gpu | |
else | |
machine_type=${{ matrix.machine_type }} | |
fi | |
echo "$machine_type" | |
echo "machine_type=$machine_type" >> $GITHUB_ENV | |
- name: Environment | |
working-directory: /transformers | |
run: | | |
python3 utils/print_env.py | |
- name: Show installed libraries and their versions | |
working-directory: /transformers | |
run: pip freeze | |
- name: Run all tests on GPU | |
working-directory: /transformers | |
run: | | |
export CUDA_VISIBLE_DEVICES="$(python3 utils/set_cuda_devices_for_ci.py --test_folder ${{ matrix.folders }})" | |
echo $CUDA_VISIBLE_DEVICES | |
python3 -m pytest -v -rsfE --make-reports=${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports tests/${{ matrix.folders }} | |
- name: Failure short reports | |
if: ${{ failure() }} | |
continue-on-error: true | |
run: cat /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/failures_short.txt | |
- name: Make sure report directory exists | |
shell: bash | |
run: | | |
mkdir -p /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports | |
echo "hello" > /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports/hello.txt | |
echo "${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports" | |
- name: "Test suite reports artifacts: ${{ env.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports" | |
if: ${{ always() }} | |
uses: actions/upload-artifact@v4 | |
with: | |
name: ${{ env.machine_type }}_run_models_gpu_${{ env.matrix_folders }}_test_reports | |
path: /transformers/reports/${{ env.machine_type }}_run_models_gpu_${{ matrix.folders }}_test_reports | |
update_run_status: | |
name: Update Check Run Status | |
needs: [get-sha, create_run, run_models_gpu] | |
permissions: | |
statuses: write | |
if: ${{ always() && needs.create_run.result == 'success' }} | |
runs-on: ubuntu-22.04 | |
env: | |
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} | |
GITHUB_RUN_URL: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }} | |
steps: | |
- name: Get `run_models_gpu` job status | |
run: | | |
echo "${{ needs.run_models_gpu.result }}" | |
if [ "${{ needs.run_models_gpu.result }}" = "cancelled" ]; then | |
echo "STATUS=failure" >> $GITHUB_ENV | |
elif [ "${{ needs.run_models_gpu.result }}" = "skipped" ]; then | |
echo "STATUS=success" >> $GITHUB_ENV | |
else | |
echo "STATUS=${{ needs.run_models_gpu.result }}" >> $GITHUB_ENV | |
fi | |
- name: Update PR commit statuses | |
run: | | |
echo "${{ needs.run_models_gpu.result }}" | |
echo "${{ env.STATUS }}" | |
gh api \ | |
--method POST \ | |
-H "Accept: application/vnd.github+json" \ | |
-H "X-GitHub-Api-Version: 2022-11-28" \ | |
repos/${{ github.repository }}/statuses/${{ needs.get-sha.outputs.PR_HEAD_SHA }} \ | |
-f "target_url=$GITHUB_RUN_URL" -f "state=${{ env.STATUS }}" -f "description=Slow CI job" -f "context=pytest/custom-tests" |