Skip to content

Commit

Permalink
2024-08-14 nightly release (3070f88)
Browse files Browse the repository at this point in the history
  • Loading branch information
pytorchbot committed Aug 14, 2024
1 parent d7dd14e commit 6f1270d
Show file tree
Hide file tree
Showing 28 changed files with 762 additions and 161 deletions.
1 change: 1 addition & 0 deletions .github/scripts/fbgemm_gpu_test.bash
Original file line number Diff line number Diff line change
Expand Up @@ -265,6 +265,7 @@ test_all_fbgemm_gpu_modules () {

# Determine the FBGEMM_GPU varaiant if needed
if [ "$fbgemm_gpu_variant" == "" ]; then
echo "[TEST] FBGEMM_GPU variant not explicitly provided by user; will automatically determine from the FBGEMM_GPU installation ..."
# shellcheck disable=SC2086
fbgemm_gpu_variant=$(conda run ${env_prefix} python -c "import fbgemm_gpu; print(fbgemm_gpu.__variant__)")
echo "[TEST] Determined FBGEMM_GPU variant from installation: ${fbgemm_gpu_variant}"
Expand Down
16 changes: 8 additions & 8 deletions .github/scripts/utils_pip.bash
Original file line number Diff line number Diff line change
Expand Up @@ -293,14 +293,14 @@ download_from_pytorch_pip () {

publish_to_pypi () {
local env_name="$1"
local package_name="$2"
local pypi_token="$3"
if [ "$pypi_token" == "" ]; then
local pypi_token="$2"
local package_filepath="$3"
if [ "$pypi_token" == "" ] || [ "$package_filepath" == "" ]; then
echo "Usage: ${FUNCNAME[0]} ENV_NAME PACKAGE_NAME PYPI_TOKEN"
echo "Example(s):"
echo " ${FUNCNAME[0]} build_env fbgemm_gpu_nightly-*.whl MY_TOKEN"
echo " ${FUNCNAME[0]} build_env MY_TOKEN fbgemm_gpu_nightly-*.whl"
echo ""
echo "PYPI_TOKEN is missing!"
echo "Either PYPI_TOKEN and/or package filepath is missing!"
return 1
else
echo "################################################################################"
Expand All @@ -322,16 +322,16 @@ publish_to_pypi () {
(test_python_import_package "${env_name}" twine) || return 1
(test_python_import_package "${env_name}" OpenSSL) || return 1

echo "[PUBLISH] Uploading package(s) to PyPI: ${package_name} ..."
echo "[PUBLISH] Uploading package(s) to PyPI: ${package_filepath} ..."
# shellcheck disable=SC2086
conda run ${env_prefix} \
python -m twine upload \
--username __token__ \
--password "${pypi_token}" \
--skip-existing \
--verbose \
"${package_name}"
"${package_filepath}"

echo "[PUBLISH] Successfully published package(s) to PyPI: ${package_name}"
echo "[PUBLISH] Successfully published package(s) to PyPI: ${package_filepath}"
echo "[PUBLISH] NOTE: The publish command is a successful no-op if the wheel version already existed in PyPI; please double check!"
}
2 changes: 1 addition & 1 deletion .github/workflows/fbgemm_gpu_ci_cpu.yml
Original file line number Diff line number Diff line change
Expand Up @@ -191,4 +191,4 @@ jobs:
if: ${{ (github.event_name == 'schedule' || (github.event_name == 'workflow_dispatch' && github.event.inputs.publish_to_pypi == 'true')) && matrix.compiler == 'gcc' }}
env:
PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }}
run: . $PRELUDE; publish_to_pypi $BUILD_ENV *.whl "$PYPI_TOKEN"
run: . $PRELUDE; publish_to_pypi $BUILD_ENV "$PYPI_TOKEN" *.whl
2 changes: 1 addition & 1 deletion .github/workflows/fbgemm_gpu_ci_cuda.yml
Original file line number Diff line number Diff line change
Expand Up @@ -212,4 +212,4 @@ jobs:
if: ${{ (github.event_name == 'schedule' && matrix.cuda-version == matrix.cuda-version-publish) || (github.event_name == 'workflow_dispatch' && github.event.inputs.publish_to_pypi == 'true' && matrix.cuda-version == matrix.cuda-version-publish) }}
env:
PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }}
run: . $PRELUDE; publish_to_pypi $BUILD_ENV *.whl "$PYPI_TOKEN"
run: . $PRELUDE; publish_to_pypi $BUILD_ENV "$PYPI_TOKEN" *.whl
4 changes: 2 additions & 2 deletions .github/workflows/fbgemm_gpu_ci_genai.yml
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ jobs:
env:
PRELUDE: .github/scripts/setup_env.bash
BUILD_ENV: build_binary
BUILD_VARIANT: cuda
BUILD_VARIANT: genai
continue-on-error: true
strategy:
# Don't fast-fail all the other builds if one of the them fails
Expand Down Expand Up @@ -212,4 +212,4 @@ jobs:
if: ${{ (github.event_name == 'schedule' && matrix.cuda-version == matrix.cuda-version-publish) || (github.event_name == 'workflow_dispatch' && github.event.inputs.publish_to_pypi == 'true' && matrix.cuda-version == matrix.cuda-version-publish) }}
env:
PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }}
run: . $PRELUDE; publish_to_pypi $BUILD_ENV *.whl "$PYPI_TOKEN"
run: . $PRELUDE; publish_to_pypi $BUILD_ENV "$PYPI_TOKEN" *.whl
4 changes: 2 additions & 2 deletions .github/workflows/fbgemm_gpu_release_cpu.yml
Original file line number Diff line number Diff line change
Expand Up @@ -179,8 +179,8 @@ jobs:
timeout-minutes: ${{ matrix.host-machine.timeout }}
run: . $PRELUDE; test_all_fbgemm_gpu_modules $BUILD_ENV

- name: Push FBGEMM_GPU (CPU version) Binary to PYPI
- name: Push Wheel to PyPI
if: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.publish_to_pypi == 'true' }}
env:
PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }}
run: . $PRELUDE; publish_to_pypi $BUILD_ENV *.whl "$PYPI_TOKEN"
run: . $PRELUDE; publish_to_pypi $BUILD_ENV "$PYPI_TOKEN" *.whl
4 changes: 2 additions & 2 deletions .github/workflows/fbgemm_gpu_release_cuda.yml
Original file line number Diff line number Diff line change
Expand Up @@ -189,8 +189,8 @@ jobs:
timeout-minutes: 20
run: . $PRELUDE; test_all_fbgemm_gpu_modules $BUILD_ENV

- name: Push FBGEMM_GPU Binary to PYPI
- name: Push Wheel to PyPI
if: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.publish_to_pypi == 'true' && matrix.cuda-version == github.event.inputs.cuda_version }}
env:
PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }}
run: . $PRELUDE; publish_to_pypi $BUILD_ENV *.whl "$PYPI_TOKEN"
run: . $PRELUDE; publish_to_pypi $BUILD_ENV "$PYPI_TOKEN" *.whl
196 changes: 196 additions & 0 deletions .github/workflows/fbgemm_gpu_release_genai.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,196 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

# This workflow is used for building and publishing FBGEMM_GPU-GenAI release
# builds against PyTorch-CUDA Release to public PyPI.
name: FBGEMM_GPU-GenAI Release Build

on:
# PR Trigger (enabled for regression checks and debugging)
#
pull_request:
branches:
- ^v([0-9]+)\.([0-9]+)\.([0-9]+)-release

# Push Trigger (enable to catch errors coming out of multiple merges)
#
push:
branches:
- ^v([0-9]+)\.([0-9]+)\.([0-9]+)-release

# Manual Trigger
#
workflow_dispatch:
inputs:
pytorch_channel:
description: Package Channel to Use for PyTorch Installation
type: choice
required: false
options: [ "nightly", "test", "release" ]
default: "test"
cuda_version:
description: CUDA Version to Use for Building Artifact
type: choice
required: false
options: [ "11.8.0", "12.1.1", "12.4.1" ]
default: "12.1.1"
publish_to_pypi:
description: Publish Artifact to PyPI
type: boolean
required: false
default: false

concurrency:
# Cancel previous runs in the PR if a new commit is pushed
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true

jobs:
# Build on CPU hosts and upload to GHA
build_artifact:
runs-on: ${{ matrix.host-machine.instance }}
container:
image: amazonlinux:2023
options: --user root
defaults:
run:
shell: bash
env:
PRELUDE: .github/scripts/setup_env.bash
BUILD_ENV: build_binary
BUILD_VARIANT: genai
continue-on-error: true
strategy:
# Don't fast-fail all the other builds if one of the them fails
fail-fast: false
matrix:
host-machine: [
{ arch: x86, instance: "linux.24xlarge" },
]
python-version: [ "3.8", "3.9", "3.10", "3.11", "3.12" ]
cuda-version: [ "11.8.0", "12.1.1", "12.4.1" ]

steps:
- name: Setup Build Container
run: yum update -y; yum install -y binutils findutils git pciutils sudo tar wget which

- name: Checkout the Repository
uses: actions/checkout@v4

- name: Display System Info
run: . $PRELUDE; print_system_info

- name: Display GPU Info
run: . $PRELUDE; print_gpu_info

- name: Setup Miniconda
run: . $PRELUDE; setup_miniconda $HOME/miniconda

- name: Create Conda Environment
run: . $PRELUDE; create_conda_environment $BUILD_ENV ${{ matrix.python-version }}

- name: Install C/C++ Compilers
run: . $PRELUDE; install_cxx_compiler $BUILD_ENV

- name: Install Build Tools
run: . $PRELUDE; install_build_tools $BUILD_ENV

- name: Install CUDA
run: . $PRELUDE; install_cuda $BUILD_ENV ${{ matrix.cuda-version }}

- name: Install PyTorch Test
run: . $PRELUDE; install_pytorch_pip $BUILD_ENV ${{ github.event.inputs.pytorch_channel }} cuda/${{ matrix.cuda-version }}

- name: Collect PyTorch Environment Info
if: ${{ success() || failure() }}
run: if . $PRELUDE && which conda; then collect_pytorch_env_info $BUILD_ENV; fi

- name: Install cuDNN
run: . $PRELUDE; install_cudnn $BUILD_ENV "$(pwd)/build_only/cudnn" ${{ matrix.cuda-version }}

- name: Prepare FBGEMM_GPU Build
run: . $PRELUDE; cd fbgemm_gpu; prepare_fbgemm_gpu_build $BUILD_ENV

- name: Build FBGEMM_GPU
run: . $PRELUDE; cd fbgemm_gpu; build_fbgemm_gpu_package $BUILD_ENV release genai

- name: Upload Built Wheel as GHA Artifact
uses: actions/upload-artifact@v3
with:
name: fbgemm_gpu_release_genai_${{ matrix.host-machine.arch }}_${{ matrix.python-version }}_cu${{ matrix.cuda-version }}.whl
path: fbgemm_gpu/dist/*.whl
if-no-files-found: error


# Download the built artifact from GHA, test on GPU, and push to PyPI
test_and_publish_artifact:
runs-on: ${{ matrix.host-machine.instance }}
defaults:
run:
shell: bash
env:
PRELUDE: .github/scripts/setup_env.bash
BUILD_ENV: build_binary
BUILD_VARIANT: genai
ENFORCE_CUDA_DEVICE: 1
strategy:
fail-fast: false
matrix:
host-machine: [
{ arch: x86, instance: "linux.g5.4xlarge.nvidia.gpu" },
]
python-version: [ "3.8", "3.9", "3.10", "3.11", "3.12" ]
cuda-version: [ "11.8.0", "12.1.1", "12.4.1" ]
needs: build_artifact

steps:
- name: Checkout the Repository
uses: actions/checkout@v3

- name: Download Wheel Artifact from GHA
uses: actions/download-artifact@v3
with:
name: fbgemm_gpu_release_genai_${{ matrix.host-machine.arch }}_${{ matrix.python-version }}_cu${{ matrix.cuda-version }}.whl

- name: Install NVIDIA Drivers and NVIDIA-Docker Runtime
uses: pytorch/test-infra/.github/actions/setup-nvidia@main

- name: Display System Info
run: . $PRELUDE; print_system_info; print_ec2_info

- name: Display GPU Info
run: . $PRELUDE; print_gpu_info

- name: Setup Miniconda
run: . $PRELUDE; setup_miniconda $HOME/miniconda

- name: Create Conda Environment
run: . $PRELUDE; create_conda_environment $BUILD_ENV ${{ matrix.python-version }}

- name: Install CUDA
run: . $PRELUDE; install_cuda $BUILD_ENV ${{ matrix.cuda-version }}

- name: Install PyTorch Test
run: . $PRELUDE; install_pytorch_pip $BUILD_ENV ${{ github.event.inputs.pytorch_channel }} cuda/${{ matrix.cuda-version }}

- name: Collect PyTorch Environment Info
if: ${{ success() || failure() }}
run: if . $PRELUDE && which conda; then collect_pytorch_env_info $BUILD_ENV; fi

- name: Prepare FBGEMM_GPU Build
run: . $PRELUDE; cd fbgemm_gpu; prepare_fbgemm_gpu_build $BUILD_ENV

- name: Install FBGEMM_GPU
run: . $PRELUDE; install_fbgemm_gpu_wheel $BUILD_ENV *.whl

- name: Test with PyTest
timeout-minutes: 20
run: . $PRELUDE; test_all_fbgemm_gpu_modules $BUILD_ENV

- name: Push Wheel to PyPI
if: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.publish_to_pypi == 'true' && matrix.cuda-version == github.event.inputs.cuda_version }}
env:
PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }}
run: . $PRELUDE; publish_to_pypi $BUILD_ENV "$PYPI_TOKEN" *.whl
45 changes: 38 additions & 7 deletions fbgemm_gpu/experimental/gemm/test/fp8_gemm_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ def _test_matmul_fp8_row(
fp8_fast_accum: bool,
use_bias: bool = False,
transpose_input: bool = False,
compile: bool = False,
) -> None:
M, N, K = shape
a = torch.randn(M, K, dtype=torch.bfloat16, device=device)
Expand All @@ -113,13 +114,42 @@ def _test_matmul_fp8_row(
torch.randn(N, dtype=torch.float32, device=device) if use_bias else None
)

# Quantize inputs.
a_fp8, a_scale = quantize_fp8_row(a)
b_fp8, b_scale = quantize_fp8_row(b)

result = matmul_fp8_row(
a_fp8, b_fp8, a_scale, b_scale, bias=bias, fp8_fast_accum=fp8_fast_accum
)
# Test that we can compile the full fp8 matmul operation.
if compile:

@torch.compile(fullgraph=True)
def _quantize_matmul_fp8(
a: torch.Tensor,
b: torch.Tensor,
bias: Optional[torch.Tensor],
fp8_fast_accum: bool,
) -> torch.Tensor:
a_fp8, a_scale = quantize_fp8_row(a)
b_fp8, b_scale = quantize_fp8_row(b)
return matmul_fp8_row(
a_fp8,
b_fp8,
a_scale,
b_scale,
bias=bias,
fp8_fast_accum=fp8_fast_accum,
)

result = _quantize_matmul_fp8(a, b, bias, fp8_fast_accum)
# Otherwise run normally.
else:
# Quantize inputs.
a_fp8, a_scale = quantize_fp8_row(a)
b_fp8, b_scale = quantize_fp8_row(b)

result = matmul_fp8_row(
a_fp8,
b_fp8,
a_scale,
b_scale,
bias=bias,
fp8_fast_accum=fp8_fast_accum,
)
self.assertTrue(result.shape == (M, N))

expected_result = a @ b.T
Expand All @@ -130,6 +160,7 @@ def _test_matmul_fp8_row(
)

_test_matmul_fp8_row((3, 4, 5), torch.device("cuda"), True)
_test_matmul_fp8_row((3, 4, 5), torch.device("cuda"), True, compile=True)
_test_matmul_fp8_row(
(5, 4, 5), torch.device("cuda"), True, transpose_input=True
)
Expand Down
Loading

0 comments on commit 6f1270d

Please sign in to comment.