Skip to content

Commit

Permalink
2024-08-13 nightly release (9f17b23)
Browse files Browse the repository at this point in the history
  • Loading branch information
pytorchbot committed Aug 13, 2024
1 parent dfc47f0 commit d7dd14e
Show file tree
Hide file tree
Showing 18 changed files with 1,367 additions and 1,037 deletions.
2 changes: 1 addition & 1 deletion .github/scripts/fbgemm_gpu_build.bash
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ prepare_fbgemm_gpu_build () {
(exec_with_retries 3 conda run --no-capture-output ${env_prefix} python -m pip install -r requirements.txt) || return 1

# BUILD_VARIANT is provided by the github workflow file
if [ "$BUILD_VARIANT" == "cuda" ]; then
if [ "$BUILD_VARIANT" == "cuda" ] || [ "$BUILD_VARIANT" == "genai" ]; then
(install_triton_pip "${env_name}") || return 1
fi

Expand Down
5 changes: 3 additions & 2 deletions .github/workflows/fbgemm_gpu_ci_cpu.yml
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,8 @@ jobs:
uses: actions/upload-artifact@v4
with:
name: fbgemm_gpu_nightly_cpu_${{ matrix.host-machine.arch }}_${{ matrix.compiler }}_py${{ matrix.python-version }}.whl
path: fbgemm_gpu/dist/fbgemm_gpu_nightly_cpu-*.whl
path: fbgemm_gpu/dist/*.whl
if-no-files-found: error


# Download the built artifact from GHA, test on GPU, and push to PyPI
Expand Down Expand Up @@ -190,4 +191,4 @@ jobs:
if: ${{ (github.event_name == 'schedule' || (github.event_name == 'workflow_dispatch' && github.event.inputs.publish_to_pypi == 'true')) && matrix.compiler == 'gcc' }}
env:
PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }}
run: . $PRELUDE; publish_to_pypi $BUILD_ENV fbgemm_gpu_nightly_cpu-*.whl "$PYPI_TOKEN"
run: . $PRELUDE; publish_to_pypi $BUILD_ENV *.whl "$PYPI_TOKEN"
5 changes: 3 additions & 2 deletions .github/workflows/fbgemm_gpu_ci_cuda.yml
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,8 @@ jobs:
uses: actions/upload-artifact@v3
with:
name: fbgemm_gpu_nightly_cuda_${{ matrix.host-machine.arch }}_${{ matrix.compiler }}_py${{ matrix.python-version }}_cu${{ matrix.cuda-version }}.whl
path: fbgemm_gpu/dist/fbgemm_gpu_nightly-*.whl
path: fbgemm_gpu/dist/*.whl
if-no-files-found: error


# Download the built artifact from GHA, test on GPU, and push to PyPI
Expand Down Expand Up @@ -211,4 +212,4 @@ jobs:
if: ${{ (github.event_name == 'schedule' && matrix.cuda-version == matrix.cuda-version-publish) || (github.event_name == 'workflow_dispatch' && github.event.inputs.publish_to_pypi == 'true' && matrix.cuda-version == matrix.cuda-version-publish) }}
env:
PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }}
run: . $PRELUDE; publish_to_pypi $BUILD_ENV fbgemm_gpu_nightly-*.whl "$PYPI_TOKEN"
run: . $PRELUDE; publish_to_pypi $BUILD_ENV *.whl "$PYPI_TOKEN"
215 changes: 215 additions & 0 deletions .github/workflows/fbgemm_gpu_ci_genai.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,215 @@
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

# This workflow is used for FBGEMM_GPU-GenAI CI as well as nightly builds of
# FBGEMM_GPU-GenAI against PyTorch-CUDA Nightly.
name: FBGEMM_GPU-GenAI CI

on:
# PR Trigger (enabled for regression checks and debugging)
#
pull_request:
branches:
- main

# Push Trigger (enable to catch errors coming out of multiple merges)
#
push:
branches:
- main

# Cron Trigger (UTC)
#
# Based on the Conda page for PyTorch-nightly, the GPU nightly releases appear
# around 02:30 PST every day (roughly 2 hours after the CPU releases)
#
schedule:
- cron: '45 12 * * *'

# Manual Trigger
#
workflow_dispatch:
inputs:
publish_to_pypi:
description: Publish Artifact to PyPI
type: boolean
required: false
default: false

concurrency:
# Cancel previous runs in the PR if a new commit is pushed
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true

jobs:
# Build on CPU hosts and upload to GHA
build_artifact:
runs-on: ${{ matrix.host-machine.instance }}
container:
image: amazonlinux:2023
options: --user root
defaults:
run:
shell: bash
env:
PRELUDE: .github/scripts/setup_env.bash
BUILD_ENV: build_binary
BUILD_VARIANT: cuda
continue-on-error: true
strategy:
# Don't fast-fail all the other builds if one of the them fails
fail-fast: false
matrix:
host-machine: [
{ arch: x86, instance: "linux.24xlarge" },
]
python-version: [ "3.8", "3.9", "3.10", "3.11", "3.12" ]
cuda-version: [ "11.8.0", "12.1.1", "12.4.1" ]
compiler: [ "gcc", "clang" ]

steps:
- name: Setup Build Container
run: yum update -y; yum install -y binutils findutils git pciutils sudo tar wget which

- name: Checkout the Repository
uses: actions/checkout@v4
with:
submodules: true

- name: Display System Info
run: . $PRELUDE; print_system_info

- name: Display GPU Info
run: . $PRELUDE; print_gpu_info

- name: Setup Miniconda
run: . $PRELUDE; setup_miniconda $HOME/miniconda

- name: Create Conda Environment
run: . $PRELUDE; create_conda_environment $BUILD_ENV ${{ matrix.python-version }}

- name: Install C/C++ Compilers
run: . $PRELUDE; install_cxx_compiler $BUILD_ENV ${{ matrix.compiler }}

- name: Install Build Tools
run: . $PRELUDE; install_build_tools $BUILD_ENV

- name: Install CUDA
run: . $PRELUDE; install_cuda $BUILD_ENV ${{ matrix.cuda-version }}

# Install via PIP to avoid defaulting to the CPU variant if the GPU variant of the day is not ready
- name: Install PyTorch Nightly
run: . $PRELUDE; install_pytorch_pip $BUILD_ENV nightly cuda/${{ matrix.cuda-version }}

- name: Collect PyTorch Environment Info
if: ${{ success() || failure() }}
run: if . $PRELUDE && which conda; then collect_pytorch_env_info $BUILD_ENV; fi

- name: Install cuDNN
run: . $PRELUDE; install_cudnn $BUILD_ENV "$(pwd)/build_only/cudnn" ${{ matrix.cuda-version }}

- name: Prepare FBGEMM_GPU Build
run: . $PRELUDE; cd fbgemm_gpu; prepare_fbgemm_gpu_build $BUILD_ENV

- name: Build FBGEMM_GPU Wheel
run: . $PRELUDE; cd fbgemm_gpu; build_fbgemm_gpu_package $BUILD_ENV nightly genai

- name: Upload Built Wheel as GHA Artifact
# Cannot upgrade to actions/upload-artifact@v4 yet because GLIBC on the instance is too old
uses: actions/upload-artifact@v3
with:
name: fbgemm_gpu_nightly_genai_${{ matrix.host-machine.arch }}_${{ matrix.compiler }}_py${{ matrix.python-version }}_cu${{ matrix.cuda-version }}.whl
path: fbgemm_gpu/dist/*.whl
if-no-files-found: error


# Download the built artifact from GHA, test on GPU, and push to PyPI
test_and_publish_artifact:
# runs-on: linux.4xlarge.nvidia.gpu
# Use available instance types - https://github.com/pytorch/test-infra/blob/main/.github/scale-config.yml
runs-on: ${{ matrix.host-machine.instance }}
defaults:
run:
shell: bash
env:
PRELUDE: .github/scripts/setup_env.bash
BUILD_ENV: build_binary
BUILD_VARIANT: genai
ENFORCE_CUDA_DEVICE: 1
strategy:
fail-fast: false
matrix:
host-machine: [
{ arch: x86, instance: "linux.g5.4xlarge.nvidia.gpu" },
# TODO: Enable when A100 machine queues are reasonably small enough for doing per-PR CI
# https://hud.pytorch.org/metrics
# { arch: x86, instance: "linux.gcp.a100" },
]
python-version: [ "3.8", "3.9", "3.10", "3.11", "3.12" ]
cuda-version: [ "11.8.0", "12.1.1", "12.4.1" ]
# Specify exactly ONE CUDA version for artifact publish
cuda-version-publish: [ "12.1.1" ]
compiler: [ "gcc", "clang" ]
needs: build_artifact

steps:
# Cannot upgrade to actions/checkout@v4 yet because GLIBC on the instance is too old
- name: Checkout the Repository
uses: actions/checkout@v3
with:
submodules: true

- name: Download Wheel Artifact from GHA
# Cannot upgrade to actions/download-artifact@v4 yet because GLIBC on the instance is too old
uses: actions/download-artifact@v3
with:
name: fbgemm_gpu_nightly_genai_${{ matrix.host-machine.arch }}_${{ matrix.compiler }}_py${{ matrix.python-version }}_cu${{ matrix.cuda-version }}.whl

# Use PyTorch test infrastructure action - https://github.com/pytorch/test-infra/blob/main/.github/actions/setup-nvidia/action.yml
- name: Install NVIDIA Drivers and NVIDIA-Docker Runtime
uses: pytorch/test-infra/.github/actions/setup-nvidia@main

- name: Display System Info
run: . $PRELUDE; print_system_info; print_ec2_info

- name: Display GPU Info
run: . $PRELUDE; print_gpu_info

- name: Setup Miniconda
run: . $PRELUDE; setup_miniconda $HOME/miniconda

- name: Create Conda Environment
run: . $PRELUDE; create_conda_environment $BUILD_ENV ${{ matrix.python-version }}

- name: Install C/C++ Compilers for Updated LIBGCC
# Install clang libraries to enable building and install triton
run: . $PRELUDE; install_cxx_compiler $BUILD_ENV clang

- name: Install CUDA
run: . $PRELUDE; install_cuda $BUILD_ENV ${{ matrix.cuda-version }}

# Install via PIP to avoid defaulting to the CPU variant if the GPU variant of the day is not ready
- name: Install PyTorch Nightly
run: . $PRELUDE; install_pytorch_pip $BUILD_ENV nightly cuda/${{ matrix.cuda-version }}

- name: Collect PyTorch Environment Info
if: ${{ success() || failure() }}
run: if . $PRELUDE && which conda; then collect_pytorch_env_info $BUILD_ENV; fi

- name: Prepare FBGEMM_GPU Build
run: . $PRELUDE; cd fbgemm_gpu; prepare_fbgemm_gpu_build $BUILD_ENV

- name: Install FBGEMM_GPU Wheel
run: . $PRELUDE; install_fbgemm_gpu_wheel $BUILD_ENV *.whl

- name: Test with PyTest
timeout-minutes: 30
run: . $PRELUDE; test_all_fbgemm_gpu_modules $BUILD_ENV

- name: Push Wheel to PyPI
if: ${{ (github.event_name == 'schedule' && matrix.cuda-version == matrix.cuda-version-publish) || (github.event_name == 'workflow_dispatch' && github.event.inputs.publish_to_pypi == 'true' && matrix.cuda-version == matrix.cuda-version-publish) }}
env:
PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }}
run: . $PRELUDE; publish_to_pypi $BUILD_ENV *.whl "$PYPI_TOKEN"
3 changes: 2 additions & 1 deletion .github/workflows/fbgemm_gpu_ci_rocm.yml
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,8 @@ jobs:
uses: actions/upload-artifact@v4
with:
name: fbgemm_gpu_nightly_rocm_${{ matrix.host-machine.arch }}_${{ matrix.compiler }}_py${{ matrix.python-version }}_rocm${{ matrix.rocm-version }}.whl
path: fbgemm_gpu/dist/fbgemm_gpu_nightly_rocm-*.whl
path: fbgemm_gpu/dist/*.whl
if-no-files-found: error


# Download the built artifact from GHA, test on GPU, and push to PyPI
Expand Down
5 changes: 3 additions & 2 deletions .github/workflows/fbgemm_gpu_release_cpu.yml
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,8 @@ jobs:
uses: actions/upload-artifact@v4
with:
name: fbgemm_gpu_release_cpu_${{ matrix.host-machine.arch }}_${{ matrix.python-version }}.whl
path: fbgemm_gpu/dist/fbgemm_gpu_cpu-*.whl
path: fbgemm_gpu/dist/*.whl
if-no-files-found: error


# Download the built artifact from GHA, test on GPU, and push to PyPI
Expand Down Expand Up @@ -182,4 +183,4 @@ jobs:
if: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.publish_to_pypi == 'true' }}
env:
PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }}
run: . $PRELUDE; publish_to_pypi $BUILD_ENV fbgemm_gpu_cpu-*.whl "$PYPI_TOKEN"
run: . $PRELUDE; publish_to_pypi $BUILD_ENV *.whl "$PYPI_TOKEN"
5 changes: 3 additions & 2 deletions .github/workflows/fbgemm_gpu_release_cuda.yml
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,8 @@ jobs:
uses: actions/upload-artifact@v3
with:
name: fbgemm_gpu_release_cuda_${{ matrix.host-machine.arch }}_${{ matrix.python-version }}_cu${{ matrix.cuda-version }}.whl
path: fbgemm_gpu/dist/fbgemm_gpu-*.whl
path: fbgemm_gpu/dist/*.whl
if-no-files-found: error


# Download the built artifact from GHA, test on GPU, and push to PyPI
Expand Down Expand Up @@ -192,4 +193,4 @@ jobs:
if: ${{ github.event_name == 'workflow_dispatch' && github.event.inputs.publish_to_pypi == 'true' && matrix.cuda-version == github.event.inputs.cuda_version }}
env:
PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }}
run: . $PRELUDE; publish_to_pypi $BUILD_ENV fbgemm_gpu-*.whl "$PYPI_TOKEN"
run: . $PRELUDE; publish_to_pypi $BUILD_ENV *.whl "$PYPI_TOKEN"
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
# General
.DS_Store
*~
.hypothesis/

# Byte-compiled / optimized / DLL files
__pycache__/
Expand Down
7 changes: 4 additions & 3 deletions fbgemm_gpu/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,10 @@ set(CMAKE_VERBOSE_MAKEFILE on)
# FBGEMM_GPU Build Options
################################################################################

option(FBGEMM_CPU_ONLY "Build FBGEMM_GPU without GPU support" OFF)
option(USE_ROCM "Build FBGEMM_GPU for ROCm" OFF)
option(FBGEMM_GENAI_ONLY "Build FBGEMM_GPU with GEN AI only support" OFF)
option(FBGEMM_CPU_ONLY "Build FBGEMM_GPU without GPU support" OFF)
option(USE_ROCM "Build FBGEMM_GPU for ROCm" OFF)
option(FBGEMM_GENAI_ONLY "Build FBGEMM_GPU with GEN AI only support" OFF)
option(USE_FB_ONLY "Build FBGEMM_GPU FB only operators" OFF)

if((NOT FBGEMM_CPU_ONLY) AND
((EXISTS "/opt/rocm/") OR (EXISTS $ENV{ROCM_PATH})) AND
Expand Down
11 changes: 11 additions & 0 deletions fbgemm_gpu/experimental/gen_ai/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,14 @@ if(USE_ROCM)
else()
set(quantize_ops_sources
src/quantize/cutlass_extensions.cu
src/quantize/cutlass_extensions/f8f8bf16.cu
src/quantize/cutlass_extensions/f8f8bf16_blockwise.cu
src/quantize/cutlass_extensions/f8f8bf16_cublas.cu
src/quantize/cutlass_extensions/f8f8bf16_rowwise.cu
src/quantize/cutlass_extensions/i8i8bf16.cu
src/quantize/cutlass_extensions/f8i4bf16_rowwise.cu
src/quantize/cutlass_extensions/i8i8bf16_dynamic.cu
src/quantize/cutlass_extensions/bf16i4bf16_rowwise.cu
src/quantize/quantize.cu
src/quantize/quantize.cpp)
endif()
Expand All @@ -58,6 +61,14 @@ set(experimental_gen_ai_cpp_source_files
${quantize_ops_sources}
${comm_ops_sources})

# Set the source file for FB only CPP
if(USE_FB_ONLY)
file(GLOB fb_only_ops_sources
fb/src/*/*.cu
fb/src/*/*.cpp)
list(APPEND experimental_gen_ai_cpp_source_files ${fb_only_ops_sources})
endif()

set_source_files_properties(${experimental_gen_ai_cpp_source_files}
PROPERTIES INCLUDE_DIRECTORIES
"${fbgemm_sources_include_directories}")
Expand Down
3 changes: 3 additions & 0 deletions fbgemm_gpu/experimental/gen_ai/gen_ai/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@
torch.ops.load_library(
os.path.join(os.path.dirname(__file__), "fbgemm_gpu_experimental_gen_ai_py.so")
)
torch.classes.load_library(
os.path.join(os.path.dirname(__file__), "fbgemm_gpu_experimental_gen_ai_py.so")
)
else:
torch.ops.load_library(
"//deeplearning/fbgemm/fbgemm_gpu/experimental/gen_ai:attention_ops"
Expand Down
Loading

0 comments on commit d7dd14e

Please sign in to comment.