From 180689c4f8c2fe49614e315672a7e7dd22798311 Mon Sep 17 00:00:00 2001
From: Benson Ma <bensonma415@meta.com>
Date: Tue, 13 Aug 2024 11:12:50 -0700
Subject: [PATCH] [fbgemm_gpu] Add GenAI-only variant release workflow

- Add GenAI-only variant release workflow
---
 .github/scripts/fbgemm_gpu_test.bash          |   1 +
 .github/scripts/utils_pip.bash                |  16 +-
 .github/workflows/fbgemm_gpu_ci_cpu.yml       |   2 +-
 .github/workflows/fbgemm_gpu_ci_cuda.yml      |   2 +-
 .github/workflows/fbgemm_gpu_ci_genai.yml     |   4 +-
 .github/workflows/fbgemm_gpu_release_cpu.yml  |   4 +-
 .github/workflows/fbgemm_gpu_release_cuda.yml |   4 +-
 .../workflows/fbgemm_gpu_release_genai.yml    | 196 ++++++++++++++++++
 8 files changed, 213 insertions(+), 16 deletions(-)
 create mode 100644 .github/workflows/fbgemm_gpu_release_genai.yml

diff --git a/.github/scripts/fbgemm_gpu_test.bash b/.github/scripts/fbgemm_gpu_test.bash
index c50351cb3c..83a7730fd2 100644
--- a/.github/scripts/fbgemm_gpu_test.bash
+++ b/.github/scripts/fbgemm_gpu_test.bash
@@ -265,6 +265,7 @@ test_all_fbgemm_gpu_modules () {
 
   # Determine the FBGEMM_GPU varaiant if needed
   if [ "$fbgemm_gpu_variant" == "" ]; then
+    echo "[TEST] FBGEMM_GPU variant not explicitly provided by user; will automatically determine from the FBGEMM_GPU installation ..."
     # shellcheck disable=SC2086
     fbgemm_gpu_variant=$(conda run ${env_prefix} python -c "import fbgemm_gpu; print(fbgemm_gpu.__variant__)")
     echo "[TEST] Determined FBGEMM_GPU variant from installation: ${fbgemm_gpu_variant}"
diff --git a/.github/scripts/utils_pip.bash b/.github/scripts/utils_pip.bash
index 8522fa7476..4afcc826d9 100644
--- a/.github/scripts/utils_pip.bash
+++ b/.github/scripts/utils_pip.bash
@@ -293,14 +293,14 @@ download_from_pytorch_pip () {
 
 publish_to_pypi () {
   local env_name="$1"
-  local package_name="$2"
-  local pypi_token="$3"
-  if [ "$pypi_token" == "" ]; then
+  local pypi_token="$2"
+  local package_filepath="$3"
+  if [ "$pypi_token" == "" ] || [ "$package_filepath" == "" ]; then
     echo "Usage: ${FUNCNAME[0]} ENV_NAME PACKAGE_NAME PYPI_TOKEN"
     echo "Example(s):"
-    echo "    ${FUNCNAME[0]} build_env fbgemm_gpu_nightly-*.whl MY_TOKEN"
+    echo "    ${FUNCNAME[0]} build_env MY_TOKEN fbgemm_gpu_nightly-*.whl"
     echo ""
-    echo "PYPI_TOKEN is missing!"
+    echo "Either PYPI_TOKEN and/or package filepath is missing!"
     return 1
   else
     echo "################################################################################"
@@ -322,7 +322,7 @@ publish_to_pypi () {
   (test_python_import_package "${env_name}" twine) || return 1
   (test_python_import_package "${env_name}" OpenSSL) || return 1
 
-  echo "[PUBLISH] Uploading package(s) to PyPI: ${package_name} ..."
+  echo "[PUBLISH] Uploading package(s) to PyPI: ${package_filepath} ..."
   # shellcheck disable=SC2086
   conda run ${env_prefix} \
     python -m twine upload \
@@ -330,8 +330,8 @@ publish_to_pypi () {
       --password "${pypi_token}" \
       --skip-existing \
       --verbose \
-      "${package_name}"
+      "${package_filepath}"
 
-  echo "[PUBLISH] Successfully published package(s) to PyPI: ${package_name}"
+  echo "[PUBLISH] Successfully published package(s) to PyPI: ${package_filepath}"
   echo "[PUBLISH] NOTE: The publish command is a successful no-op if the wheel version already existed in PyPI; please double check!"
 }
diff --git a/.github/workflows/fbgemm_gpu_ci_cpu.yml b/.github/workflows/fbgemm_gpu_ci_cpu.yml
index 56f03b8998..1df50be75b 100644
--- a/.github/workflows/fbgemm_gpu_ci_cpu.yml
+++ b/.github/workflows/fbgemm_gpu_ci_cpu.yml
@@ -191,4 +191,4 @@ jobs:
       if: ${{ (github.event_name == 'schedule' || (github.event_name == 'workflow_dispatch' && github.event.inputs.publish_to_pypi == 'true')) && matrix.compiler == 'gcc' }}
       env:
         PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }}
-      run: . $PRELUDE; publish_to_pypi $BUILD_ENV *.whl "$PYPI_TOKEN"
+      run: . $PRELUDE; publish_to_pypi $BUILD_ENV "$PYPI_TOKEN" *.whl
diff --git a/.github/workflows/fbgemm_gpu_ci_cuda.yml b/.github/workflows/fbgemm_gpu_ci_cuda.yml
index b9647cf699..45957afc7b 100644
--- a/.github/workflows/fbgemm_gpu_ci_cuda.yml
+++ b/.github/workflows/fbgemm_gpu_ci_cuda.yml
@@ -212,4 +212,4 @@ jobs:
       if: ${{ (github.event_name == 'schedule' && matrix.cuda-version == matrix.cuda-version-publish) || (github.event_name == 'workflow_dispatch' && github.event.inputs.publish_to_pypi == 'true' && matrix.cuda-version == matrix.cuda-version-publish) }}
       env:
         PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }}
-      run: . $PRELUDE; publish_to_pypi $BUILD_ENV *.whl "$PYPI_TOKEN"
+      run: . $PRELUDE; publish_to_pypi $BUILD_ENV "$PYPI_TOKEN" *.whl
diff --git a/.github/workflows/fbgemm_gpu_ci_genai.yml b/.github/workflows/fbgemm_gpu_ci_genai.yml
index 95baa53fd6..7ce15ce519 100644
--- a/.github/workflows/fbgemm_gpu_ci_genai.yml
+++ b/.github/workflows/fbgemm_gpu_ci_genai.yml
@@ -56,7 +56,7 @@ jobs:
     env:
       PRELUDE: .github/scripts/setup_env.bash
       BUILD_ENV: build_binary
-      BUILD_VARIANT: cuda
+      BUILD_VARIANT: genai
     continue-on-error: true
     strategy:
       # Don't fast-fail all the other builds if one of the them fails
@@ -212,4 +212,4 @@ jobs:
       if: ${{ (github.event_name == 'schedule' && matrix.cuda-version == matrix.cuda-version-publish) || (github.event_name == 'workflow_dispatch' && github.event.inputs.publish_to_pypi == 'true' && matrix.cuda-version == matrix.cuda-version-publish) }}
       env:
         PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }}
-      run: . $PRELUDE; publish_to_pypi $BUILD_ENV *.whl "$PYPI_TOKEN"
+      run: . $PRELUDE; publish_to_pypi $BUILD_ENV "$PYPI_TOKEN" *.whl
diff --git a/.github/workflows/fbgemm_gpu_release_cpu.yml b/.github/workflows/fbgemm_gpu_release_cpu.yml
index d91359b6a1..1bb2038cd6 100644
--- a/.github/workflows/fbgemm_gpu_release_cpu.yml
+++ b/.github/workflows/fbgemm_gpu_release_cpu.yml
@@ -179,8 +179,8 @@ jobs:
       timeout-minutes: ${{ matrix.host-machine.timeout }}
       run: . $PRELUDE; test_all_fbgemm_gpu_modules $BUILD_ENV
 
-    - name: Push FBGEMM_GPU (CPU version) Binary to PYPI
+    - name: Push Wheel to PyPI
       if: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.publish_to_pypi == 'true' }}
       env:
         PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }}
-      run: . $PRELUDE; publish_to_pypi $BUILD_ENV *.whl "$PYPI_TOKEN"
+      run: . $PRELUDE; publish_to_pypi $BUILD_ENV "$PYPI_TOKEN" *.whl
diff --git a/.github/workflows/fbgemm_gpu_release_cuda.yml b/.github/workflows/fbgemm_gpu_release_cuda.yml
index b4436d0e2a..cbe5f9c1a4 100644
--- a/.github/workflows/fbgemm_gpu_release_cuda.yml
+++ b/.github/workflows/fbgemm_gpu_release_cuda.yml
@@ -189,8 +189,8 @@ jobs:
       timeout-minutes: 20
       run: . $PRELUDE; test_all_fbgemm_gpu_modules $BUILD_ENV
 
-    - name: Push FBGEMM_GPU Binary to PYPI
+    - name: Push Wheel to PyPI
       if: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.publish_to_pypi == 'true' && matrix.cuda-version == github.event.inputs.cuda_version }}
       env:
         PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }}
-      run: . $PRELUDE; publish_to_pypi $BUILD_ENV *.whl "$PYPI_TOKEN"
+      run: . $PRELUDE; publish_to_pypi $BUILD_ENV "$PYPI_TOKEN" *.whl
diff --git a/.github/workflows/fbgemm_gpu_release_genai.yml b/.github/workflows/fbgemm_gpu_release_genai.yml
new file mode 100644
index 0000000000..14d744d620
--- /dev/null
+++ b/.github/workflows/fbgemm_gpu_release_genai.yml
@@ -0,0 +1,196 @@
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+# This workflow is used for building and publishing FBGEMM_GPU-GenAI release
+# builds against PyTorch-CUDA Release to public PyPI.
+name: FBGEMM_GPU-GenAI Release Build
+
+on:
+  # PR Trigger (enabled for regression checks and debugging)
+  #
+  pull_request:
+    branches:
+      - ^v([0-9]+)\.([0-9]+)\.([0-9]+)-release
+
+  # Push Trigger (enable to catch errors coming out of multiple merges)
+  #
+  push:
+    branches:
+      - ^v([0-9]+)\.([0-9]+)\.([0-9]+)-release
+
+  # Manual Trigger
+  #
+  workflow_dispatch:
+    inputs:
+      pytorch_channel:
+        description: Package Channel to Use for PyTorch Installation
+        type: choice
+        required: false
+        options: [ "nightly", "test", "release" ]
+        default: "test"
+      cuda_version:
+        description: CUDA Version to Use for Building Artifact
+        type: choice
+        required: false
+        options: [ "11.8.0", "12.1.1", "12.4.1" ]
+        default: "12.1.1"
+      publish_to_pypi:
+        description: Publish Artifact to PyPI
+        type: boolean
+        required: false
+        default: false
+
+concurrency:
+  # Cancel previous runs in the PR if a new commit is pushed
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  # Build on CPU hosts and upload to GHA
+  build_artifact:
+    runs-on: ${{ matrix.host-machine.instance }}
+    container:
+      image: amazonlinux:2023
+      options: --user root
+    defaults:
+      run:
+        shell: bash
+    env:
+      PRELUDE: .github/scripts/setup_env.bash
+      BUILD_ENV: build_binary
+      BUILD_VARIANT: genai
+    continue-on-error: true
+    strategy:
+      # Don't fast-fail all the other builds if one of the them fails
+      fail-fast: false
+      matrix:
+        host-machine: [
+          { arch: x86, instance: "linux.24xlarge" },
+        ]
+        python-version: [ "3.8", "3.9", "3.10", "3.11", "3.12" ]
+        cuda-version: [ "11.8.0", "12.1.1", "12.4.1" ]
+
+    steps:
+    - name: Setup Build Container
+      run: yum update -y; yum install -y binutils findutils git pciutils sudo tar wget which
+
+    - name: Checkout the Repository
+      uses: actions/checkout@v4
+
+    - name: Display System Info
+      run: . $PRELUDE; print_system_info
+
+    - name: Display GPU Info
+      run: . $PRELUDE; print_gpu_info
+
+    - name: Setup Miniconda
+      run: . $PRELUDE; setup_miniconda $HOME/miniconda
+
+    - name: Create Conda Environment
+      run: . $PRELUDE; create_conda_environment $BUILD_ENV ${{ matrix.python-version }}
+
+    - name: Install C/C++ Compilers
+      run: . $PRELUDE; install_cxx_compiler $BUILD_ENV
+
+    - name: Install Build Tools
+      run: . $PRELUDE; install_build_tools $BUILD_ENV
+
+    - name: Install CUDA
+      run: . $PRELUDE; install_cuda $BUILD_ENV ${{ matrix.cuda-version }}
+
+    - name: Install PyTorch Test
+      run: . $PRELUDE; install_pytorch_pip $BUILD_ENV ${{ github.event.inputs.pytorch_channel }} cuda/${{ matrix.cuda-version }}
+
+    - name: Collect PyTorch Environment Info
+      if: ${{ success() || failure() }}
+      run: if . $PRELUDE && which conda; then collect_pytorch_env_info $BUILD_ENV; fi
+
+    - name: Install cuDNN
+      run: . $PRELUDE; install_cudnn $BUILD_ENV "$(pwd)/build_only/cudnn" ${{ matrix.cuda-version }}
+
+    - name: Prepare FBGEMM_GPU Build
+      run: . $PRELUDE; cd fbgemm_gpu; prepare_fbgemm_gpu_build $BUILD_ENV
+
+    - name: Build FBGEMM_GPU
+      run: . $PRELUDE; cd fbgemm_gpu; build_fbgemm_gpu_package $BUILD_ENV release genai
+
+    - name: Upload Built Wheel as GHA Artifact
+      uses: actions/upload-artifact@v3
+      with:
+        name: fbgemm_gpu_release_genai_${{ matrix.host-machine.arch }}_${{ matrix.python-version }}_cu${{ matrix.cuda-version }}.whl
+        path: fbgemm_gpu/dist/*.whl
+        if-no-files-found: error
+
+
+  # Download the built artifact from GHA, test on GPU, and push to PyPI
+  test_and_publish_artifact:
+    runs-on: ${{ matrix.host-machine.instance }}
+    defaults:
+      run:
+        shell: bash
+    env:
+      PRELUDE: .github/scripts/setup_env.bash
+      BUILD_ENV: build_binary
+      BUILD_VARIANT: genai
+      ENFORCE_CUDA_DEVICE: 1
+    strategy:
+      fail-fast: false
+      matrix:
+        host-machine: [
+          { arch: x86, instance: "linux.g5.4xlarge.nvidia.gpu" },
+        ]
+        python-version: [ "3.8", "3.9", "3.10", "3.11", "3.12" ]
+        cuda-version: [ "11.8.0", "12.1.1", "12.4.1" ]
+    needs: build_artifact
+
+    steps:
+    - name: Checkout the Repository
+      uses: actions/checkout@v3
+
+    - name: Download Wheel Artifact from GHA
+      uses: actions/download-artifact@v3
+      with:
+        name: fbgemm_gpu_release_genai_${{ matrix.host-machine.arch }}_${{ matrix.python-version }}_cu${{ matrix.cuda-version }}.whl
+
+    - name: Install NVIDIA Drivers and NVIDIA-Docker Runtime
+      uses: pytorch/test-infra/.github/actions/setup-nvidia@main
+
+    - name: Display System Info
+      run: . $PRELUDE; print_system_info; print_ec2_info
+
+    - name: Display GPU Info
+      run: . $PRELUDE; print_gpu_info
+
+    - name: Setup Miniconda
+      run: . $PRELUDE; setup_miniconda $HOME/miniconda
+
+    - name: Create Conda Environment
+      run: . $PRELUDE; create_conda_environment $BUILD_ENV ${{ matrix.python-version }}
+
+    - name: Install CUDA
+      run: . $PRELUDE; install_cuda $BUILD_ENV ${{ matrix.cuda-version }}
+
+    - name: Install PyTorch Test
+      run: . $PRELUDE; install_pytorch_pip $BUILD_ENV ${{ github.event.inputs.pytorch_channel }} cuda/${{ matrix.cuda-version }}
+
+    - name: Collect PyTorch Environment Info
+      if: ${{ success() || failure() }}
+      run: if . $PRELUDE && which conda; then collect_pytorch_env_info $BUILD_ENV; fi
+
+    - name: Prepare FBGEMM_GPU Build
+      run: . $PRELUDE; cd fbgemm_gpu; prepare_fbgemm_gpu_build $BUILD_ENV
+
+    - name: Install FBGEMM_GPU
+      run: . $PRELUDE; install_fbgemm_gpu_wheel $BUILD_ENV *.whl
+
+    - name: Test with PyTest
+      timeout-minutes: 20
+      run: . $PRELUDE; test_all_fbgemm_gpu_modules $BUILD_ENV
+
+    - name: Push Wheel to PyPI
+      if: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.publish_to_pypi == 'true' && matrix.cuda-version == github.event.inputs.cuda_version }}
+      env:
+        PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }}
+      run: . $PRELUDE; publish_to_pypi $BUILD_ENV "$PYPI_TOKEN" *.whl