From 180689c4f8c2fe49614e315672a7e7dd22798311 Mon Sep 17 00:00:00 2001 From: Benson Ma Date: Tue, 13 Aug 2024 11:12:50 -0700 Subject: [PATCH] [fbgemm_gpu] Add GenAI-only variant release workflow - Add GenAI-only variant release workflow --- .github/scripts/fbgemm_gpu_test.bash | 1 + .github/scripts/utils_pip.bash | 16 +- .github/workflows/fbgemm_gpu_ci_cpu.yml | 2 +- .github/workflows/fbgemm_gpu_ci_cuda.yml | 2 +- .github/workflows/fbgemm_gpu_ci_genai.yml | 4 +- .github/workflows/fbgemm_gpu_release_cpu.yml | 4 +- .github/workflows/fbgemm_gpu_release_cuda.yml | 4 +- .../workflows/fbgemm_gpu_release_genai.yml | 196 ++++++++++++++++++ 8 files changed, 213 insertions(+), 16 deletions(-) create mode 100644 .github/workflows/fbgemm_gpu_release_genai.yml diff --git a/.github/scripts/fbgemm_gpu_test.bash b/.github/scripts/fbgemm_gpu_test.bash index c50351cb3c..83a7730fd2 100644 --- a/.github/scripts/fbgemm_gpu_test.bash +++ b/.github/scripts/fbgemm_gpu_test.bash @@ -265,6 +265,7 @@ test_all_fbgemm_gpu_modules () { # Determine the FBGEMM_GPU varaiant if needed if [ "$fbgemm_gpu_variant" == "" ]; then + echo "[TEST] FBGEMM_GPU variant not explicitly provided by user; will automatically determine from the FBGEMM_GPU installation ..." # shellcheck disable=SC2086 fbgemm_gpu_variant=$(conda run ${env_prefix} python -c "import fbgemm_gpu; print(fbgemm_gpu.__variant__)") echo "[TEST] Determined FBGEMM_GPU variant from installation: ${fbgemm_gpu_variant}" diff --git a/.github/scripts/utils_pip.bash b/.github/scripts/utils_pip.bash index 8522fa7476..4afcc826d9 100644 --- a/.github/scripts/utils_pip.bash +++ b/.github/scripts/utils_pip.bash @@ -293,14 +293,14 @@ download_from_pytorch_pip () { publish_to_pypi () { local env_name="$1" - local package_name="$2" - local pypi_token="$3" - if [ "$pypi_token" == "" ]; then + local pypi_token="$2" + local package_filepath="$3" + if [ "$pypi_token" == "" ] || [ "$package_filepath" == "" ]; then echo "Usage: ${FUNCNAME[0]} ENV_NAME PACKAGE_NAME PYPI_TOKEN" echo "Example(s):" - echo " ${FUNCNAME[0]} build_env fbgemm_gpu_nightly-*.whl MY_TOKEN" + echo " ${FUNCNAME[0]} build_env MY_TOKEN fbgemm_gpu_nightly-*.whl" echo "" - echo "PYPI_TOKEN is missing!" + echo "Either PYPI_TOKEN and/or package filepath is missing!" return 1 else echo "################################################################################" @@ -322,7 +322,7 @@ publish_to_pypi () { (test_python_import_package "${env_name}" twine) || return 1 (test_python_import_package "${env_name}" OpenSSL) || return 1 - echo "[PUBLISH] Uploading package(s) to PyPI: ${package_name} ..." + echo "[PUBLISH] Uploading package(s) to PyPI: ${package_filepath} ..." # shellcheck disable=SC2086 conda run ${env_prefix} \ python -m twine upload \ @@ -330,8 +330,8 @@ publish_to_pypi () { --password "${pypi_token}" \ --skip-existing \ --verbose \ - "${package_name}" + "${package_filepath}" - echo "[PUBLISH] Successfully published package(s) to PyPI: ${package_name}" + echo "[PUBLISH] Successfully published package(s) to PyPI: ${package_filepath}" echo "[PUBLISH] NOTE: The publish command is a successful no-op if the wheel version already existed in PyPI; please double check!" } diff --git a/.github/workflows/fbgemm_gpu_ci_cpu.yml b/.github/workflows/fbgemm_gpu_ci_cpu.yml index 56f03b8998..1df50be75b 100644 --- a/.github/workflows/fbgemm_gpu_ci_cpu.yml +++ b/.github/workflows/fbgemm_gpu_ci_cpu.yml @@ -191,4 +191,4 @@ jobs: if: ${{ (github.event_name == 'schedule' || (github.event_name == 'workflow_dispatch' && github.event.inputs.publish_to_pypi == 'true')) && matrix.compiler == 'gcc' }} env: PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }} - run: . $PRELUDE; publish_to_pypi $BUILD_ENV *.whl "$PYPI_TOKEN" + run: . $PRELUDE; publish_to_pypi $BUILD_ENV "$PYPI_TOKEN" *.whl diff --git a/.github/workflows/fbgemm_gpu_ci_cuda.yml b/.github/workflows/fbgemm_gpu_ci_cuda.yml index b9647cf699..45957afc7b 100644 --- a/.github/workflows/fbgemm_gpu_ci_cuda.yml +++ b/.github/workflows/fbgemm_gpu_ci_cuda.yml @@ -212,4 +212,4 @@ jobs: if: ${{ (github.event_name == 'schedule' && matrix.cuda-version == matrix.cuda-version-publish) || (github.event_name == 'workflow_dispatch' && github.event.inputs.publish_to_pypi == 'true' && matrix.cuda-version == matrix.cuda-version-publish) }} env: PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }} - run: . $PRELUDE; publish_to_pypi $BUILD_ENV *.whl "$PYPI_TOKEN" + run: . $PRELUDE; publish_to_pypi $BUILD_ENV "$PYPI_TOKEN" *.whl diff --git a/.github/workflows/fbgemm_gpu_ci_genai.yml b/.github/workflows/fbgemm_gpu_ci_genai.yml index 95baa53fd6..7ce15ce519 100644 --- a/.github/workflows/fbgemm_gpu_ci_genai.yml +++ b/.github/workflows/fbgemm_gpu_ci_genai.yml @@ -56,7 +56,7 @@ jobs: env: PRELUDE: .github/scripts/setup_env.bash BUILD_ENV: build_binary - BUILD_VARIANT: cuda + BUILD_VARIANT: genai continue-on-error: true strategy: # Don't fast-fail all the other builds if one of the them fails @@ -212,4 +212,4 @@ jobs: if: ${{ (github.event_name == 'schedule' && matrix.cuda-version == matrix.cuda-version-publish) || (github.event_name == 'workflow_dispatch' && github.event.inputs.publish_to_pypi == 'true' && matrix.cuda-version == matrix.cuda-version-publish) }} env: PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }} - run: . $PRELUDE; publish_to_pypi $BUILD_ENV *.whl "$PYPI_TOKEN" + run: . $PRELUDE; publish_to_pypi $BUILD_ENV "$PYPI_TOKEN" *.whl diff --git a/.github/workflows/fbgemm_gpu_release_cpu.yml b/.github/workflows/fbgemm_gpu_release_cpu.yml index d91359b6a1..1bb2038cd6 100644 --- a/.github/workflows/fbgemm_gpu_release_cpu.yml +++ b/.github/workflows/fbgemm_gpu_release_cpu.yml @@ -179,8 +179,8 @@ jobs: timeout-minutes: ${{ matrix.host-machine.timeout }} run: . $PRELUDE; test_all_fbgemm_gpu_modules $BUILD_ENV - - name: Push FBGEMM_GPU (CPU version) Binary to PYPI + - name: Push Wheel to PyPI if: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.publish_to_pypi == 'true' }} env: PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }} - run: . $PRELUDE; publish_to_pypi $BUILD_ENV *.whl "$PYPI_TOKEN" + run: . $PRELUDE; publish_to_pypi $BUILD_ENV "$PYPI_TOKEN" *.whl diff --git a/.github/workflows/fbgemm_gpu_release_cuda.yml b/.github/workflows/fbgemm_gpu_release_cuda.yml index b4436d0e2a..cbe5f9c1a4 100644 --- a/.github/workflows/fbgemm_gpu_release_cuda.yml +++ b/.github/workflows/fbgemm_gpu_release_cuda.yml @@ -189,8 +189,8 @@ jobs: timeout-minutes: 20 run: . $PRELUDE; test_all_fbgemm_gpu_modules $BUILD_ENV - - name: Push FBGEMM_GPU Binary to PYPI + - name: Push Wheel to PyPI if: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.publish_to_pypi == 'true' && matrix.cuda-version == github.event.inputs.cuda_version }} env: PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }} - run: . $PRELUDE; publish_to_pypi $BUILD_ENV *.whl "$PYPI_TOKEN" + run: . $PRELUDE; publish_to_pypi $BUILD_ENV "$PYPI_TOKEN" *.whl diff --git a/.github/workflows/fbgemm_gpu_release_genai.yml b/.github/workflows/fbgemm_gpu_release_genai.yml new file mode 100644 index 0000000000..14d744d620 --- /dev/null +++ b/.github/workflows/fbgemm_gpu_release_genai.yml @@ -0,0 +1,196 @@ +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# This source code is licensed under the BSD-style license found in the +# LICENSE file in the root directory of this source tree. + +# This workflow is used for building and publishing FBGEMM_GPU-GenAI release +# builds against PyTorch-CUDA Release to public PyPI. +name: FBGEMM_GPU-GenAI Release Build + +on: + # PR Trigger (enabled for regression checks and debugging) + # + pull_request: + branches: + - ^v([0-9]+)\.([0-9]+)\.([0-9]+)-release + + # Push Trigger (enable to catch errors coming out of multiple merges) + # + push: + branches: + - ^v([0-9]+)\.([0-9]+)\.([0-9]+)-release + + # Manual Trigger + # + workflow_dispatch: + inputs: + pytorch_channel: + description: Package Channel to Use for PyTorch Installation + type: choice + required: false + options: [ "nightly", "test", "release" ] + default: "test" + cuda_version: + description: CUDA Version to Use for Building Artifact + type: choice + required: false + options: [ "11.8.0", "12.1.1", "12.4.1" ] + default: "12.1.1" + publish_to_pypi: + description: Publish Artifact to PyPI + type: boolean + required: false + default: false + +concurrency: + # Cancel previous runs in the PR if a new commit is pushed + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +jobs: + # Build on CPU hosts and upload to GHA + build_artifact: + runs-on: ${{ matrix.host-machine.instance }} + container: + image: amazonlinux:2023 + options: --user root + defaults: + run: + shell: bash + env: + PRELUDE: .github/scripts/setup_env.bash + BUILD_ENV: build_binary + BUILD_VARIANT: genai + continue-on-error: true + strategy: + # Don't fast-fail all the other builds if one of the them fails + fail-fast: false + matrix: + host-machine: [ + { arch: x86, instance: "linux.24xlarge" }, + ] + python-version: [ "3.8", "3.9", "3.10", "3.11", "3.12" ] + cuda-version: [ "11.8.0", "12.1.1", "12.4.1" ] + + steps: + - name: Setup Build Container + run: yum update -y; yum install -y binutils findutils git pciutils sudo tar wget which + + - name: Checkout the Repository + uses: actions/checkout@v4 + + - name: Display System Info + run: . $PRELUDE; print_system_info + + - name: Display GPU Info + run: . $PRELUDE; print_gpu_info + + - name: Setup Miniconda + run: . $PRELUDE; setup_miniconda $HOME/miniconda + + - name: Create Conda Environment + run: . $PRELUDE; create_conda_environment $BUILD_ENV ${{ matrix.python-version }} + + - name: Install C/C++ Compilers + run: . $PRELUDE; install_cxx_compiler $BUILD_ENV + + - name: Install Build Tools + run: . $PRELUDE; install_build_tools $BUILD_ENV + + - name: Install CUDA + run: . $PRELUDE; install_cuda $BUILD_ENV ${{ matrix.cuda-version }} + + - name: Install PyTorch Test + run: . $PRELUDE; install_pytorch_pip $BUILD_ENV ${{ github.event.inputs.pytorch_channel }} cuda/${{ matrix.cuda-version }} + + - name: Collect PyTorch Environment Info + if: ${{ success() || failure() }} + run: if . $PRELUDE && which conda; then collect_pytorch_env_info $BUILD_ENV; fi + + - name: Install cuDNN + run: . $PRELUDE; install_cudnn $BUILD_ENV "$(pwd)/build_only/cudnn" ${{ matrix.cuda-version }} + + - name: Prepare FBGEMM_GPU Build + run: . $PRELUDE; cd fbgemm_gpu; prepare_fbgemm_gpu_build $BUILD_ENV + + - name: Build FBGEMM_GPU + run: . $PRELUDE; cd fbgemm_gpu; build_fbgemm_gpu_package $BUILD_ENV release genai + + - name: Upload Built Wheel as GHA Artifact + uses: actions/upload-artifact@v3 + with: + name: fbgemm_gpu_release_genai_${{ matrix.host-machine.arch }}_${{ matrix.python-version }}_cu${{ matrix.cuda-version }}.whl + path: fbgemm_gpu/dist/*.whl + if-no-files-found: error + + + # Download the built artifact from GHA, test on GPU, and push to PyPI + test_and_publish_artifact: + runs-on: ${{ matrix.host-machine.instance }} + defaults: + run: + shell: bash + env: + PRELUDE: .github/scripts/setup_env.bash + BUILD_ENV: build_binary + BUILD_VARIANT: genai + ENFORCE_CUDA_DEVICE: 1 + strategy: + fail-fast: false + matrix: + host-machine: [ + { arch: x86, instance: "linux.g5.4xlarge.nvidia.gpu" }, + ] + python-version: [ "3.8", "3.9", "3.10", "3.11", "3.12" ] + cuda-version: [ "11.8.0", "12.1.1", "12.4.1" ] + needs: build_artifact + + steps: + - name: Checkout the Repository + uses: actions/checkout@v3 + + - name: Download Wheel Artifact from GHA + uses: actions/download-artifact@v3 + with: + name: fbgemm_gpu_release_genai_${{ matrix.host-machine.arch }}_${{ matrix.python-version }}_cu${{ matrix.cuda-version }}.whl + + - name: Install NVIDIA Drivers and NVIDIA-Docker Runtime + uses: pytorch/test-infra/.github/actions/setup-nvidia@main + + - name: Display System Info + run: . $PRELUDE; print_system_info; print_ec2_info + + - name: Display GPU Info + run: . $PRELUDE; print_gpu_info + + - name: Setup Miniconda + run: . $PRELUDE; setup_miniconda $HOME/miniconda + + - name: Create Conda Environment + run: . $PRELUDE; create_conda_environment $BUILD_ENV ${{ matrix.python-version }} + + - name: Install CUDA + run: . $PRELUDE; install_cuda $BUILD_ENV ${{ matrix.cuda-version }} + + - name: Install PyTorch Test + run: . $PRELUDE; install_pytorch_pip $BUILD_ENV ${{ github.event.inputs.pytorch_channel }} cuda/${{ matrix.cuda-version }} + + - name: Collect PyTorch Environment Info + if: ${{ success() || failure() }} + run: if . $PRELUDE && which conda; then collect_pytorch_env_info $BUILD_ENV; fi + + - name: Prepare FBGEMM_GPU Build + run: . $PRELUDE; cd fbgemm_gpu; prepare_fbgemm_gpu_build $BUILD_ENV + + - name: Install FBGEMM_GPU + run: . $PRELUDE; install_fbgemm_gpu_wheel $BUILD_ENV *.whl + + - name: Test with PyTest + timeout-minutes: 20 + run: . $PRELUDE; test_all_fbgemm_gpu_modules $BUILD_ENV + + - name: Push Wheel to PyPI + if: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.publish_to_pypi == 'true' && matrix.cuda-version == github.event.inputs.cuda_version }} + env: + PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }} + run: . $PRELUDE; publish_to_pypi $BUILD_ENV "$PYPI_TOKEN" *.whl