Skip to content

Commit

Permalink
Release v1.13
Browse files Browse the repository at this point in the history
  • Loading branch information
ptrendx committed Dec 9, 2024
2 parents 7a7225c + ccd7a0c commit e5edd6c
Show file tree
Hide file tree
Showing 132 changed files with 12,494 additions and 4,209 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ jobs:
name: 'PaddlePaddle'
runs-on: ubuntu-latest
container:
image: nvcr.io/nvidia/paddlepaddle:24.07-py3
image: nvcr.io/nvidia/paddlepaddle:24.10-py3
options: --user root
steps:
- name: 'Checkout'
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/docs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@ jobs:
uses: actions/checkout@v3
- name: 'Install dependencies'
run: |
pip install sphinx==5.1.1 sphinx_rtd_theme==1.0.0 nbsphinx==0.8.10 IPython ipython_genutils==0.2.0 ipywidgets==8.0.2 astroid==2.15.7
pip install breathe==4.34.0 sphinx-autoapi==2.0.1
pip install sphinx==8.1.3 sphinx_rtd_theme==3.0.1 nbsphinx==0.9.5 IPython ipython_genutils==0.2.0 ipywidgets==8.0.2 astroid==3.3.2
pip install breathe==4.35.0 sphinx-autoapi==3.3.2
sudo apt-get install -y pandoc graphviz doxygen
export GIT_SHA=$(git show-ref --hash HEAD)
- name: 'Build docs'
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/trigger-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ jobs:
|| github.actor == 'pggPL'
|| github.actor == 'vasunvidia'
|| github.actor == 'erhoo82'
|| github.actor == 'kocchop'
)
steps:
- name: Check if comment is issued by authorized person
Expand Down
3 changes: 1 addition & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,7 @@ __pycache__
.hypothesis
.devcontainer.json
tests/cpp/build/
docs/_build
.ipynb_checkpoints
docs/doxygen
*.log
CMakeFiles/CMakeSystem.cmake
sdist/
Expand All @@ -40,3 +38,4 @@ dist/
downloads/
.pytest_cache/
compile_commands.json
.nfs
2 changes: 1 addition & 1 deletion build_tools/VERSION.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
1.12.0
1.13.0
2 changes: 1 addition & 1 deletion build_tools/paddle.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def setup_paddle_extension(
# Source files
csrc_source_files = Path(csrc_source_files)
sources = [
csrc_source_files / "extensions.cu",
csrc_source_files / "extensions.cpp",
csrc_source_files / "common.cpp",
csrc_source_files / "custom_ops.cu",
]
Expand Down
21 changes: 5 additions & 16 deletions build_tools/pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
from .utils import (
all_files_in_dir,
cuda_archs,
cuda_path,
cuda_version,
)

Expand All @@ -27,11 +26,8 @@ def setup_pytorch_extension(
csrc_source_files = Path(csrc_source_files)
extensions_dir = csrc_source_files / "extensions"
sources = [
csrc_source_files / "common.cu",
csrc_source_files / "common.cpp",
csrc_source_files / "ts_fp8_op.cpp",
csrc_source_files / "userbuffers" / "ipcsocket.cc",
csrc_source_files / "userbuffers" / "userbuffers.cu",
csrc_source_files / "userbuffers" / "userbuffers-host.cpp",
] + all_files_in_dir(extensions_dir)

# Header files
Expand Down Expand Up @@ -85,19 +81,14 @@ def setup_pytorch_extension(
continue # Already handled
nvcc_flags.extend(["-gencode", f"arch=compute_{arch},code=sm_{arch}"])

# Libraries
library_dirs = []
libraries = []
if bool(int(os.getenv("NVTE_UB_WITH_MPI", 0))):
if bool(int(os.getenv("NVTE_UB_WITH_MPI", "0"))):
assert (
os.getenv("MPI_HOME") is not None
), "MPI_HOME must be set when compiling with NVTE_UB_WITH_MPI=1"
mpi_home = Path(os.getenv("MPI_HOME"))
include_dirs.append(mpi_home / "include")
), "MPI_HOME=/path/to/mpi must be set when compiling with NVTE_UB_WITH_MPI=1!"
mpi_path = Path(os.getenv("MPI_HOME"))
include_dirs.append(mpi_path / "include")
cxx_flags.append("-DNVTE_UB_WITH_MPI")
nvcc_flags.append("-DNVTE_UB_WITH_MPI")
library_dirs.append(mpi_home / "lib")
libraries.append("mpi")

# Construct PyTorch CUDA extension
sources = [str(path) for path in sources]
Expand All @@ -112,6 +103,4 @@ def setup_pytorch_extension(
"cxx": cxx_flags,
"nvcc": nvcc_flags,
},
libraries=[str(lib) for lib in libraries],
library_dirs=[str(lib_dir) for lib_dir in library_dirs],
)
3 changes: 3 additions & 0 deletions docs/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
_build
doxygen
sphinx_rtd_theme
9 changes: 7 additions & 2 deletions docs/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -16,5 +16,10 @@ help:

# Catch-all target: route all unknown targets to Sphinx using the new
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
%: Makefile
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
%: Makefile sphinx_rtd_theme
PYTHONPATH=sphinx_rtd_theme:$(PYTHONPATH) $(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)

# Patch Sphinx RTD theme 3.0.1 to add version selector in sidebar
sphinx_rtd_theme:
git clone --depth=1 -b 3.0.1 --single-branch https://github.com/readthedocs/sphinx_rtd_theme.git
bash -c "cd sphinx_rtd_theme; git apply ../version_select.patch"
4 changes: 4 additions & 0 deletions docs/api/pytorch.rst
Original file line number Diff line number Diff line change
Expand Up @@ -51,3 +51,7 @@ pyTorch
.. autoapifunction:: transformer_engine.pytorch.moe_permute

.. autoapifunction:: transformer_engine.pytorch.moe_unpermute

.. autoapifunction:: transformer_engine.pytorch.initialize_ub

.. autoapifunction:: transformer_engine.pytorch.destroy_ub
74 changes: 27 additions & 47 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,38 +2,30 @@
#
# See LICENSE for license information.

import datetime
import os
import sys
import sphinx_rtd_theme
from sphinx.ext.autodoc.mock import mock
from sphinx.ext.autodoc import between, ClassDocumenter, AttributeDocumenter
from sphinx.util import inspect
from builtins import str
from enum import Enum
import re
import pathlib
import subprocess
from pathlib import Path
from datetime import date

te_path = os.path.dirname(os.path.realpath(__file__))
from builtins import str

with open(te_path + "/../build_tools/VERSION.txt", "r") as f:
te_version = f.readline().strip()
# Basic project info
project = "Transformer Engine"
author = "NVIDIA CORPORATION & AFFILIATES"

# Copyright statement
release_year = 2022

current_year = date.today().year
current_year = datetime.date.today().year
if current_year == release_year:
copyright_year = release_year
else:
copyright_year = str(release_year) + "-" + str(current_year)
copyright = f"{copyright_year}, NVIDIA CORPORATION & AFFILIATES. All rights reserved."

project = "Transformer Engine"
copyright = "{}, NVIDIA CORPORATION & AFFILIATES. All rights reserved.".format(copyright_year)
author = "NVIDIA CORPORATION & AFFILIATES"
# Transformer Engine root directory
root_path = pathlib.Path(__file__).resolve().parent.parent

# Git hash
git_sha = os.getenv("GIT_SHA")

if not git_sha:
try:
git_sha = (
Expand All @@ -44,31 +36,16 @@
)
except:
git_sha = "0000000"

git_sha = git_sha[:7] if len(git_sha) > 7 else git_sha

if "dev" in te_version:
version = str(te_version + "-" + git_sha)
# Version
with open(root_path / "build_tools" / "VERSION.txt", "r") as f:
_raw_version = f.readline().strip()
if "dev" in _raw_version:
version = str(_raw_version + "-" + git_sha)
else:
version = str(te_version)
release = te_version

# hack: version is used for html creation, so put the version picker
# link here as well:
option_on = " selected"
option_off = ""
release_opt = option_on
option_nr = 0
version = (
version
+ """<br/>
Version select: <select onChange="window.location.href = this.value" onFocus="this.selectedIndex = {0}">
<option value="https://docs.nvidia.com/deeplearning/transformer-engine/user-guide/index.html"{1}>Current release</option>
<option value="https://docs.nvidia.com/deeplearning/transformer-engine/documentation-archive.html">Older releases</option>
</select>""".format(
option_nr, release_opt
)
)
version = str(_raw_version)
release = _raw_version

# -- General configuration ---------------------------------------------------
# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration
Expand All @@ -92,12 +69,10 @@

pygments_style = "sphinx"


# -- Options for HTML output -------------------------------------------------
# https://www.sphinx-doc.org/en/master/usage/configuration.html#options-for-html-output

html_theme = "sphinx_rtd_theme"
html_theme_path = [sphinx_rtd_theme.get_html_theme_path()]
html_static_path = ["_static"]
html_show_sphinx = False

Expand All @@ -106,7 +81,12 @@
"css/nvidia_footer.css",
]

html_theme_options = {"display_version": True, "collapse_navigation": False, "logo_only": False}
html_theme_options = {
"collapse_navigation": False,
"logo_only": False,
"version_selector": False,
"language_selector": False,
}

napoleon_custom_sections = [
("Parallelism parameters", "params_style"),
Expand All @@ -116,8 +96,8 @@
("FP8-related parameters", "params_style"),
]

breathe_projects = {"TransformerEngine": os.path.abspath("doxygen/xml/")}
breathe_projects = {"TransformerEngine": root_path / "docs" / "doxygen" / "xml"}
breathe_default_project = "TransformerEngine"

autoapi_generate_api_docs = False
autoapi_dirs = ["../transformer_engine"]
autoapi_dirs = [root_path / "transformer_engine"]
21 changes: 21 additions & 0 deletions docs/version_select.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
diff --git a/sphinx_rtd_theme/layout.html b/sphinx_rtd_theme/layout.html
index e6a38b1..579eaec 100644
--- a/sphinx_rtd_theme/layout.html
+++ b/sphinx_rtd_theme/layout.html
@@ -124,6 +124,16 @@
{%- endif %}
</a>

+ {# Show TE version and version selector #}
+ <div class="version">
+ {{ version }}
+ <br>
+ Version select: <select onChange="window.location.href = this.value" onFocus="this.selectedIndex = {0}">
+ <option value="https://docs.nvidia.com/deeplearning/transformer-engine/user-guide/index.html"{1}>Current release</option>
+ <option value="https://docs.nvidia.com/deeplearning/transformer-engine/documentation-archive.html">Older releases</option>
+ </select>
+ </div>
+
{%- if READTHEDOCS or DEBUG %}
{%- if theme_version_selector or theme_language_selector %}
<div class="switch-menus">
2 changes: 2 additions & 0 deletions qa/L0_jax_unittest/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,5 +18,7 @@ pip install -r $TE_PATH/examples/jax/encoder/requirements.txt

pytest -c $TE_PATH/tests/jax/pytest.ini -v $TE_PATH/examples/jax/mnist

# Make encoder tests to have run-to-run deterministic to have the stable CI results
export XLA_FLAGS="${XLA_FLAGS} --xla_gpu_deterministic_ops"
pytest -c $TE_PATH/tests/jax/pytest.ini -v $TE_PATH/examples/jax/encoder --ignore=$TE_PATH/examples/jax/encoder/test_multiprocessing_encoder.py
pytest -c $TE_PATH/tests/jax/pytest.ini -v $TE_PATH/examples/jax/encoder/test_multiprocessing_encoder.py
9 changes: 8 additions & 1 deletion qa/L1_jax_distributed_unittest/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,11 @@
set -xe

: ${TE_PATH:=/opt/transformerengine}
pytest -c $TE_PATH/tests/jax/pytest.ini -v $TE_PATH/tests/jax/test_distributed_*

# Skip ring attention tests since they need fixed environment vars
pytest -c $TE_PATH/tests/jax/pytest.ini -v $TE_PATH/tests/jax/test_distributed_* -k 'not test_context_parallel_ring_attn'

# Test ring attention with and without scan loop
NVTE_FUSED_RING_ATTENTION_USE_SCAN=0 pytest -c $TE_PATH/tests/jax/pytest.ini -v $TE_PATH/tests/jax/test_distributed_fused_attn.py -k test_context_parallel_ring_attn
NVTE_FUSED_RING_ATTENTION_USE_SCAN=1 XLA_FLAGS="--xla_experimental_ignore_channel_id" \
pytest -c $TE_PATH/tests/jax/pytest.ini -v $TE_PATH/tests/jax/test_distributed_fused_attn.py -k test_context_parallel_ring_attn
1 change: 1 addition & 0 deletions qa/L1_pytorch_distributed_unittest/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,5 @@ pip install pytest==8.2.1
pytest -v -s $TE_PATH/tests/pytorch/distributed/test_numerics.py
pytest -v -s $TE_PATH/tests/pytorch/distributed/test_comm_gemm_overlap.py
pytest -v -s $TE_PATH/tests/pytorch/distributed/test_fusible_ops.py
pytest -v -s $TE_PATH/tests/pytorch/distributed/test_fusible_ops_with_userbuffers.py
pytest -v -s $TE_PATH/tests/pytorch/fused_attn/test_fused_attn_with_cp.py
58 changes: 58 additions & 0 deletions qa/L1_pytorch_mcore_integration/test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
# Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# See LICENSE for license information.

set -e

# Paths
: ${TE_PATH:=/opt/transformerengine}
: ${MCORE_PATH:=${TE_PATH}/qa/L1_pytorch_mcore_integration/Megatron-LM}

# Download Megatron-LM if needed
if [ ! -d "${MCORE_PATH}" ]; then
pushd $(dirname ${MCORE_PATH})
git clone -b core_r0.9.0 https://github.com/NVIDIA/Megatron-LM.git Megatron-LM
popd
fi

# Megatron-LM invocation
COMMAND="
NVTE_TORCH_COMPILE=0
NVTE_ALLOW_NONDETERMINISTIC_ALGO=0
NVTE_FLASH_ATTN=1
NVTE_FWD_LAYERNORM_SM_MARGIN=0
NVTE_BWD_LAYERNORM_SM_MARGIN=0
CUDA_DEVICE_MAX_CONNECTIONS=1
NVTE_BIAS_GELU_NVFUSION=0
NVTE_BIAS_DROPOUT_FUSION=0
python
-m torch.distributed.launch
--use_env
--nnodes=1
--nproc_per_node=1
${MCORE_PATH}/pretrain_gpt.py
--tensor-model-parallel-size 1
--pipeline-model-parallel-size 1
--use-cpu-initialization
--num-layers 2
--hidden-size 128
--num-attention-heads 8
--seq-length 128
--max-position-embeddings 2048
--micro-batch-size 1
--global-batch-size 8
--train-iters 10
--eval-iters 10
--lr 1e-4
--mock-data
--vocab-file /data/gpt3/pile-cc1-cc2-shuf/bpe/gpt2-vocab.json
--merge-file /data/gpt3/pile-cc1-cc2-shuf/bpe/gpt2-merges.txt
--transformer-impl transformer_engine
--fp8-format hybrid
"
COMMAND=$(echo "${COMMAND}" | tr '\n' ' ')

# Launch Megatron-LM
bash -c "${COMMAND}"
2 changes: 1 addition & 1 deletion qa/L3_pytorch_FA_versions_test/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ pip install pytest==8.2.1
export MAX_JOBS=4

# Iterate over Flash Attention versions
FA_versions=(2.1.1 2.3.0 2.4.0.post1 2.4.1 2.5.7 2.6.3 3.0.0b1)
FA_versions=(2.1.1 2.3.0 2.4.1 2.5.7 2.6.3 3.0.0b1)
for fa_version in "${FA_versions[@]}"
do

Expand Down
9 changes: 8 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,13 +57,20 @@ def run(self):

def setup_common_extension() -> CMakeExtension:
"""Setup CMake extension for common library"""
cmake_flags = ["-DCMAKE_CUDA_ARCHITECTURES={}".format(cuda_archs())]
if bool(int(os.getenv("NVTE_UB_WITH_MPI", "0"))):
assert (
os.getenv("MPI_HOME") is not None
), "MPI_HOME must be set when compiling with NVTE_UB_WITH_MPI=1"
cmake_flags.append("-DNVTE_UB_WITH_MPI=ON")

# Project directory root
root_path = Path(__file__).resolve().parent

return CMakeExtension(
name="transformer_engine",
cmake_path=root_path / Path("transformer_engine/common"),
cmake_flags=["-DCMAKE_CUDA_ARCHITECTURES={}".format(cuda_archs())],
cmake_flags=cmake_flags,
)


Expand Down
Loading

0 comments on commit e5edd6c

Please sign in to comment.