Skip to content

Commit

Permalink
[fbgemm_gpu] Build script enhancements
Browse files Browse the repository at this point in the history
- Script enhancements for installing compiler toolchains (leftover from D60430228)
  • Loading branch information
q10 committed Aug 14, 2024
1 parent 0b41cce commit dd2c370
Show file tree
Hide file tree
Showing 5 changed files with 101 additions and 29 deletions.
20 changes: 14 additions & 6 deletions .github/scripts/utils_base.bash
Original file line number Diff line number Diff line change
Expand Up @@ -87,17 +87,25 @@ env_name_or_prefix () {
fi
}

append_to_library_path () {
local env=$1
local value="$2"
append_to_envvar () {
local env_name="$1"
local key="$2"
local value="$3"

local env_prefix=$(env_name_or_prefix "${env_name}")

echo "[INSTALL] Appending to LD_LIBRARY_PATH: ${value} ..."
echo "[ENV] Appending to ${key}: ${value} ..."
# shellcheck disable=SC2155,SC2086
local ld_library_path=$(conda run ${env_prefix} printenv LD_LIBRARY_PATH)
local current_value=$(conda run ${env_prefix} printenv ${key})
# shellcheck disable=SC2086
print_exec conda env config vars set ${env_prefix} LD_LIBRARY_PATH="${ld_library_path:+${ld_library_path}:}${value}"
(print_exec conda env config vars set ${env_prefix} "${key}"="${current_value:+${current_value}:}${value}") || return 1
}

append_to_library_path () {
local env_name="$1"
local value="$2"

(append_to_envvar "${env_name}" LD_LIBRARY_PATH "${value}") || return 1
}

test_network_connection () {
Expand Down
46 changes: 39 additions & 7 deletions .github/scripts/utils_build.bash
Original file line number Diff line number Diff line change
Expand Up @@ -67,18 +67,45 @@ __conda_install_glibc () {
(exec_with_retries 3 conda install ${env_prefix} -c conda-forge -y "sysroot_linux-${archname}"=2.17) || return 1
}

__set_glibcxx_preload () {
# shellcheck disable=SC2155
local env_prefix=$(env_name_or_prefix "${env_name}")

# shellcheck disable=SC2155,SC2086
local conda_prefix=$(conda run ${env_prefix} printenv CONDA_PREFIX)

echo "[TEST] Enumerating libstdc++.so files ..."
# shellcheck disable=SC2155
local all_libcxx_libs=$(find "${conda_prefix}/lib" -type f -name 'libstdc++.so*' -print | sort)
for f in $all_libcxx_libs; do
echo "$f";
objdump -TC "$f" | grep GLIBCXX_ | sed 's/.*GLIBCXX_\([.0-9]*\).*/GLIBCXX_\1/g' | sort -Vu | cat
echo ""
done

# NOTE: This is needed to force FBGEMM_GPU from defaulting on loading the
# system-provided libstdc++, which may be older than the Conda-installed
# libstdc++ and thus might not support the GLIBCXX version required by
# FBGEMM_GPU. This phenomenon is known to at least occur in the Netlify docs
# builds!
echo "[TEST] Appending the Conda-installed libstdc++ to LD_PRELOAD ..."
append_to_envvar "${env_name}" LD_PRELOAD "${all_libcxx_libs[0]}"
}

__conda_install_gcc () {
# Install gxx_linux-<arch> from conda-forge instead of from anaconda channel.
#
# NOTE: We install g++ 10.x instead of 11.x becaue 11.x builds binaries that
# reference GLIBCXX_3.4.29, which may not be available on systems with older
# versions of libstdc++.so.6 such as CentOS Stream 8 and Ubuntu 20.04

# shellcheck disable=SC2155
local env_prefix=$(env_name_or_prefix "${env_name}")

# NOTE: g++ 10.x is installed by default instead of 11.x+ becaue 11.x+ builds
# binaries that reference GLIBCXX_3.4.29, which may not be available on
# systems with older versions of libstdc++.so.6 such as CentOS Stream 8 and
# Ubuntu 20.04. However, if libfolly is used, GLIBCXX_3.4.30+ will be
# required, which will require 11.x+.
#
# shellcheck disable=SC2155
local gcc_version=10.4.0
local gcc_version="${GCC_VERSION:-10.4.0}"

echo "[INSTALL] Installing GCC (${gcc_version}, ${archname}) through Conda ..."
# shellcheck disable=SC2086
Expand All @@ -98,14 +125,19 @@ __conda_install_gcc () {
print_exec ln -sf "${cc_path}" "$(dirname "$cc_path")/gcc"
print_exec ln -sf "${cxx_path}" "$(dirname "$cxx_path")/c++"
print_exec ln -sf "${cxx_path}" "$(dirname "$cxx_path")/g++"

if [ "$SET_GLIBCXX_PRELOAD" == "1" ]; then
# Set libstdc++ preload options
__set_glibcxx_preload
fi
}

__conda_install_clang () {
# shellcheck disable=SC2155
local env_prefix=$(env_name_or_prefix "${env_name}")

# shellcheck disable=SC2155
local llvm_version=16.0.6
local llvm_version="${LLVM_VERSION:-16.0.6}"

echo "[INSTALL] Installing Clang (${llvm_version}, ${archname}) and relevant libraries through Conda ..."
# NOTE: libcxx from conda-forge is outdated for linux-aarch64, so we cannot
Expand Down Expand Up @@ -203,7 +235,7 @@ install_cxx_compiler () {
# https://forums.developer.nvidia.com/t/cuda-issues-with-clang-compiler/177589/8
__conda_install_gcc

# Install the C/C++ compiler
# Install Clang if needed
if [ "$compiler" == "clang" ]; then
# Existing symlinks to cc / c++ / gcc / g++ will be overridden
__conda_install_clang
Expand Down
17 changes: 14 additions & 3 deletions .github/scripts/utils_cuda.bash
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,10 @@ install_cuda () {
# shellcheck disable=SC2086
print_exec conda env config vars set ${env_prefix} NVML_LIB_PATH="${nvml_lib_path}"

local nvcc_prepend_flags=(
-allow-unsupported-compiler
)

if print_exec "conda run ${env_prefix} c++ --version | grep -i clang"; then
# Explicitly set whatever $CONDA_PREFIX/bin/c++ points to as the the host
# compiler, but set GNU libstdc++ (as opposed to Clang libc++) as the
Expand All @@ -90,14 +94,21 @@ install_cuda () {
# NOTE: There appears to be no ROCm equivalent for NVCC_PREPEND_FLAGS:
# https://github.com/ROCm/HIP/issues/931
#
echo "[BUILD] Explicitly setting Clang as the host compiler for NVCC: ${cxx_path}"
echo "[BUILD] Setting Clang as the NVCC host compiler: ${cxx_path}"

# shellcheck disable=SC2155,SC2086
local cxx_path=$(conda run ${env_prefix} which c++)
# shellcheck disable=SC2086
print_exec conda env config vars set ${env_prefix} NVCC_PREPEND_FLAGS=\"-Xcompiler -stdlib=libstdc++ -ccbin ${cxx_path} -allow-unsupported-compiler\"

nvcc_prepend_flags+=(
-Xcompiler -stdlib=libstdc++
-ccbin "${cxx_path}"
)
fi

echo "[BUILD] Setting prepend flags for NVCC ..."
# shellcheck disable=SC2086,SC2145
print_exec conda env config vars set ${env_prefix} NVCC_PREPEND_FLAGS=\""${nvcc_prepend_flags[@]}"\"

# https://stackoverflow.com/questions/27686382/how-can-i-dump-all-nvcc-preprocessor-defines
echo "[INFO] Printing out all preprocessor defines in nvcc ..."
# shellcheck disable=SC2086
Expand Down
43 changes: 30 additions & 13 deletions fbgemm_gpu/docs/src/fbgemm_gpu-development/BuildInstructions.rst
Original file line number Diff line number Diff line change
Expand Up @@ -79,8 +79,8 @@ Follow the instructions for setting up the Conda environment at
:ref:`fbgemm-gpu.build.setup.tools.install`.


Set Up for CUDA Build
---------------------
Set Up for CUDA / GenAI-Only Build
----------------------------------

The CUDA build of FBGEMM_GPU requires a recent version of ``nvcc`` **that
supports compute capability 3.5+**. Setting the machine up for CUDA builds of
Expand Down Expand Up @@ -280,7 +280,7 @@ toolchain **that supports C++20**:

.. code:: sh
# Use a recent version of LLVM+Clang
# Minimum LLVM+Clang version required for FBGEMM_GPU
llvm_version=16.0.6
# NOTE: libcxx from conda-forge is outdated for linux-aarch64, so we cannot
Expand Down Expand Up @@ -447,8 +447,8 @@ Verify the PyTorch-Triton installation with an ``import`` test:
# Ensure that the package loads properly
conda run -n ${env_name} python -c "import triton"
Build the FBGEMM_GPU Package
----------------------------
Other Pre-Build Setup
---------------------

.. _fbgemm-gpu.build.prepare:

Expand Down Expand Up @@ -517,7 +517,7 @@ Python platform name must first be properly set:
.. _fbgemm-gpu.build.process.cpu:

CPU-Only Build
~~~~~~~~~~~~~~
--------------

For CPU-only builds, the ``--cpu_only`` flag needs to be specified.

Expand Down Expand Up @@ -558,10 +558,13 @@ Note that this presumes the Clang toolchain is properly installed along with the
GCC toolchain, and is made available as ``${cxxprefix}/bin/cc`` and
``${cxxprefix}/bin/c++``.

To enable runtime debug features, such as device-side assertions in CUDA and
HIP, simply append the ``--debug`` flag when invoking ``setup.py``.

.. _fbgemm-gpu.build.process.cuda:

CUDA Build
~~~~~~~~~~
----------

Building FBGEMM_GPU for CUDA requires both NVML and cuDNN to be installed and
made available to the build through environment variables. The presence of a
Expand All @@ -583,6 +586,20 @@ toolchains have been properly installed.
# [OPTIONAL] Provide the CUB installation directory (applicable only to CUDA versions prior to 11.1)
export CUB_DIR=/path/to/cub
# [OPTIONAL] Allow NVCC to use host compilers that are newer than what NVCC officially supports
nvcc_prepend_flags=(
-allow-unsupported-compiler
)
# [OPTIONAL] If clang is the host compiler, set NVCC to use libstdc++ since libc++ is not supported
nvcc_prepend_flags+=(
-Xcompiler -stdlib=libstdc++
-ccbin "/path/to/clang++"
)
# [OPTIONAL] Set NVCC_PREPEND_FLAGS as needed
export NVCC_PREPEND_FLAGS="${nvcc_prepend_flags[@]}"
# Specify cuDNN header and library paths
export CUDNN_INCLUDE_DIR=/path/to/cudnn/include
export CUDNN_LIBRARY=/path/to/cudnn/lib
Expand Down Expand Up @@ -620,8 +637,8 @@ toolchains have been properly installed.
.. _fbgemm-gpu.build.process.genai:

Experimental-Only (GenAI) Build
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
GenAI-Only Build
----------------

By default, the CUDA build of FBGEMM_GPU includes all experimental modules that
are used for GenAI applications. The instructions for building just the
Expand Down Expand Up @@ -651,7 +668,7 @@ Note that currently, only CUDA is supported for the experimental modules.
.. _fbgemm-gpu.build.process.rocm:

ROCm Build
~~~~~~~~~~
----------

For ROCm builds, ``ROCM_PATH`` and ``PYTORCH_ROCM_ARCH`` need to be specified.
The presence of a ROCm device, however, is not required for building
Expand Down Expand Up @@ -688,13 +705,13 @@ presuming the toolchains have been properly installed.
-DCMAKE_CXX_FLAGS="-DTORCH_USE_HIP_DSA"
Post-Build Checks (For Developers)
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
----------------------------------

After the build completes, it is useful to run some checks that verify
that the build is actually correct.

Undefined Symbols Check
^^^^^^^^^^^^^^^^^^^^^^^
~~~~~~~~~~~~~~~~~~~~~~~

Because FBGEMM_GPU contains a lot of Jinja and C++ template instantiations, it
is important to make sure that there are no undefined symbols that are
Expand All @@ -711,7 +728,7 @@ accidentally generated over the course of development:
nm -gDCu "${fbgemm_gpu_lib_path}" | sort
GLIBC Version Compatibility Check
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

It is also useful to verify that the version numbers of GLIBCXX
referenced as well as the availability of certain function symbols:
Expand Down
4 changes: 4 additions & 0 deletions netlify.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@
export BUILD_ENV=build_docs
. ../../.github/scripts/setup_env.bash
# Print system info
print_exec uname -a
print_exec ldd --version
# Set up Conda environment
setup_miniconda $HOME/miniconda
create_conda_environment $BUILD_ENV 3.12
Expand Down

0 comments on commit dd2c370

Please sign in to comment.