[fbgemm_gpu] Build script enhancements

- Script enhancements for installing compiler toolchains (leftover from D60430228)
pytorch · Aug 14, 2024 · dd2c370 · dd2c370
1 parent 0b41cce
commit dd2c370
Show file tree

Hide file tree

Showing 5 changed files with 101 additions and 29 deletions.
diff --git a/.github/scripts/utils_base.bash b/.github/scripts/utils_base.bash
@@ -87,17 +87,25 @@ env_name_or_prefix () {
   fi
 }
 
-append_to_library_path () {
-  local env=$1
-  local value="$2"
+append_to_envvar () {
+  local env_name="$1"
+  local key="$2"
+  local value="$3"
 
   local env_prefix=$(env_name_or_prefix "${env_name}")
 
-  echo "[INSTALL] Appending to LD_LIBRARY_PATH: ${value} ..."
+  echo "[ENV] Appending to ${key}: ${value} ..."
   # shellcheck disable=SC2155,SC2086
-  local ld_library_path=$(conda run ${env_prefix} printenv LD_LIBRARY_PATH)
+  local current_value=$(conda run ${env_prefix} printenv ${key})
   # shellcheck disable=SC2086
-  print_exec conda env config vars set ${env_prefix} LD_LIBRARY_PATH="${ld_library_path:+${ld_library_path}:}${value}"
+  (print_exec conda env config vars set ${env_prefix} "${key}"="${current_value:+${current_value}:}${value}") || return 1
+}
+
+append_to_library_path () {
+  local env_name="$1"
+  local value="$2"
+
+  (append_to_envvar "${env_name}" LD_LIBRARY_PATH "${value}") || return 1
 }
 
 test_network_connection () {

diff --git a/.github/scripts/utils_build.bash b/.github/scripts/utils_build.bash
@@ -67,18 +67,45 @@ __conda_install_glibc () {
   (exec_with_retries 3 conda install ${env_prefix} -c conda-forge -y "sysroot_linux-${archname}"=2.17) || return 1
 }
 
+__set_glibcxx_preload () {
+  # shellcheck disable=SC2155
+  local env_prefix=$(env_name_or_prefix "${env_name}")
+
+  # shellcheck disable=SC2155,SC2086
+  local conda_prefix=$(conda run ${env_prefix} printenv CONDA_PREFIX)
+
+  echo "[TEST] Enumerating libstdc++.so files ..."
+  # shellcheck disable=SC2155
+  local all_libcxx_libs=$(find "${conda_prefix}/lib" -type f -name 'libstdc++.so*' -print | sort)
+  for f in $all_libcxx_libs; do
+    echo "$f";
+    objdump -TC "$f" | grep GLIBCXX_ | sed 's/.*GLIBCXX_\([.0-9]*\).*/GLIBCXX_\1/g' | sort -Vu | cat
+    echo ""
+  done
+
+  # NOTE: This is needed to force FBGEMM_GPU from defaulting on loading the
+  # system-provided libstdc++, which may be older than the Conda-installed
+  # libstdc++ and thus might not support the GLIBCXX version required by
+  # FBGEMM_GPU.  This phenomenon is known to at least occur in the Netlify docs
+  # builds!
+  echo "[TEST] Appending the Conda-installed libstdc++ to LD_PRELOAD ..."
+  append_to_envvar "${env_name}" LD_PRELOAD "${all_libcxx_libs[0]}"
+}
+
 __conda_install_gcc () {
   # Install gxx_linux-<arch> from conda-forge instead of from anaconda channel.
-  #
-  # NOTE: We install g++ 10.x instead of 11.x becaue 11.x builds binaries that
-  # reference GLIBCXX_3.4.29, which may not be available on systems with older
-  # versions of libstdc++.so.6 such as CentOS Stream 8 and Ubuntu 20.04
 
   # shellcheck disable=SC2155
   local env_prefix=$(env_name_or_prefix "${env_name}")
 
+  # NOTE: g++ 10.x is installed by default instead of 11.x+ becaue 11.x+ builds
+  # binaries that reference GLIBCXX_3.4.29, which may not be available on
+  # systems  with older versions of libstdc++.so.6 such as CentOS Stream 8 and
+  # Ubuntu 20.04.  However, if libfolly is used, GLIBCXX_3.4.30+ will be
+  # required, which will require 11.x+.
+  #
   # shellcheck disable=SC2155
-  local gcc_version=10.4.0
+  local gcc_version="${GCC_VERSION:-10.4.0}"
 
   echo "[INSTALL] Installing GCC (${gcc_version}, ${archname}) through Conda ..."
   # shellcheck disable=SC2086
@@ -98,14 +125,19 @@ __conda_install_gcc () {
   print_exec ln -sf "${cc_path}" "$(dirname "$cc_path")/gcc"
   print_exec ln -sf "${cxx_path}" "$(dirname "$cxx_path")/c++"
   print_exec ln -sf "${cxx_path}" "$(dirname "$cxx_path")/g++"
+
+  if [ "$SET_GLIBCXX_PRELOAD" == "1" ]; then
+    # Set libstdc++ preload options
+    __set_glibcxx_preload
+  fi
 }
 
 __conda_install_clang () {
   # shellcheck disable=SC2155
   local env_prefix=$(env_name_or_prefix "${env_name}")
 
   # shellcheck disable=SC2155
-  local llvm_version=16.0.6
+  local llvm_version="${LLVM_VERSION:-16.0.6}"
 
   echo "[INSTALL] Installing Clang (${llvm_version}, ${archname}) and relevant libraries through Conda ..."
   # NOTE: libcxx from conda-forge is outdated for linux-aarch64, so we cannot
@@ -203,7 +235,7 @@ install_cxx_compiler () {
   #   https://forums.developer.nvidia.com/t/cuda-issues-with-clang-compiler/177589/8
   __conda_install_gcc
 
-  # Install the C/C++ compiler
+  # Install Clang if needed
   if [ "$compiler" == "clang" ]; then
     # Existing symlinks to cc / c++ / gcc / g++ will be overridden
     __conda_install_clang

diff --git a/.github/scripts/utils_cuda.bash b/.github/scripts/utils_cuda.bash
@@ -78,6 +78,10 @@ install_cuda () {
   # shellcheck disable=SC2086
   print_exec conda env config vars set ${env_prefix} NVML_LIB_PATH="${nvml_lib_path}"
 
+  local nvcc_prepend_flags=(
+    -allow-unsupported-compiler
+  )
+
   if print_exec "conda run ${env_prefix} c++ --version | grep -i clang"; then
     # Explicitly set whatever $CONDA_PREFIX/bin/c++ points to as the the host
     # compiler, but set GNU libstdc++ (as opposed to Clang libc++) as the
@@ -90,14 +94,21 @@ install_cuda () {
     # NOTE: There appears to be no ROCm equivalent for NVCC_PREPEND_FLAGS:
     #   https://github.com/ROCm/HIP/issues/931
     #
-    echo "[BUILD] Explicitly setting Clang as the host compiler for NVCC: ${cxx_path}"
+    echo "[BUILD] Setting Clang as the NVCC host compiler: ${cxx_path}"
 
     # shellcheck disable=SC2155,SC2086
     local cxx_path=$(conda run ${env_prefix} which c++)
-    # shellcheck disable=SC2086
-    print_exec conda env config vars set ${env_prefix} NVCC_PREPEND_FLAGS=\"-Xcompiler -stdlib=libstdc++ -ccbin ${cxx_path} -allow-unsupported-compiler\"
+
+    nvcc_prepend_flags+=(
+      -Xcompiler -stdlib=libstdc++
+      -ccbin "${cxx_path}"
+    )
   fi
 
+  echo "[BUILD] Setting prepend flags for NVCC ..."
+  # shellcheck disable=SC2086,SC2145
+  print_exec conda env config vars set ${env_prefix} NVCC_PREPEND_FLAGS=\""${nvcc_prepend_flags[@]}"\"
+
   # https://stackoverflow.com/questions/27686382/how-can-i-dump-all-nvcc-preprocessor-defines
   echo "[INFO] Printing out all preprocessor defines in nvcc ..."
   # shellcheck disable=SC2086

diff --git a/fbgemm_gpu/docs/src/fbgemm_gpu-development/BuildInstructions.rst b/fbgemm_gpu/docs/src/fbgemm_gpu-development/BuildInstructions.rst
@@ -79,8 +79,8 @@ Follow the instructions for setting up the Conda environment at
 :ref:`fbgemm-gpu.build.setup.tools.install`.
 
 
-Set Up for CUDA Build
----------------------
+Set Up for CUDA / GenAI-Only Build
+----------------------------------
 
 The CUDA build of FBGEMM_GPU requires a recent version of ``nvcc`` **that
 supports compute capability 3.5+**. Setting the machine up for CUDA builds of
@@ -280,7 +280,7 @@ toolchain **that supports C++20**:
 
 .. code:: sh
 
-  # Use a recent version of LLVM+Clang
+  # Minimum LLVM+Clang version required for FBGEMM_GPU
   llvm_version=16.0.6
 
   # NOTE: libcxx from conda-forge is outdated for linux-aarch64, so we cannot
@@ -447,8 +447,8 @@ Verify the PyTorch-Triton installation with an ``import`` test:
   # Ensure that the package loads properly
   conda run -n ${env_name} python -c "import triton"
 
-Build the FBGEMM_GPU Package
-----------------------------
+Other Pre-Build Setup
+---------------------
 
 .. _fbgemm-gpu.build.prepare:
 
@@ -517,7 +517,7 @@ Python platform name must first be properly set:
 .. _fbgemm-gpu.build.process.cpu:
 
 CPU-Only Build
-~~~~~~~~~~~~~~
+--------------
 
 For CPU-only builds, the ``--cpu_only`` flag needs to be specified.
 
@@ -558,10 +558,13 @@ Note that this presumes the Clang toolchain is properly installed along with the
 GCC toolchain, and is made available as ``${cxxprefix}/bin/cc`` and
 ``${cxxprefix}/bin/c++``.
 
+To enable runtime debug features, such as device-side assertions in CUDA and
+HIP, simply append the ``--debug`` flag when invoking ``setup.py``.
+
 .. _fbgemm-gpu.build.process.cuda:
 
 CUDA Build
-~~~~~~~~~~
+----------
 
 Building FBGEMM_GPU for CUDA requires both NVML and cuDNN to be installed and
 made available to the build through environment variables.  The presence of a
@@ -583,6 +586,20 @@ toolchains have been properly installed.
   # [OPTIONAL] Provide the CUB installation directory (applicable only to CUDA versions prior to 11.1)
   export CUB_DIR=/path/to/cub
 
+  # [OPTIONAL] Allow NVCC to use host compilers that are newer than what NVCC officially supports
+  nvcc_prepend_flags=(
+    -allow-unsupported-compiler
+  )
+
+  # [OPTIONAL] If clang is the host compiler, set NVCC to use libstdc++ since libc++ is not supported
+  nvcc_prepend_flags+=(
+    -Xcompiler -stdlib=libstdc++
+    -ccbin "/path/to/clang++"
+  )
+
+  # [OPTIONAL] Set NVCC_PREPEND_FLAGS as needed
+  export NVCC_PREPEND_FLAGS="${nvcc_prepend_flags[@]}"
+
   # Specify cuDNN header and library paths
   export CUDNN_INCLUDE_DIR=/path/to/cudnn/include
   export CUDNN_LIBRARY=/path/to/cudnn/lib
@@ -620,8 +637,8 @@ toolchains have been properly installed.
 
 .. _fbgemm-gpu.build.process.genai:
 
-Experimental-Only (GenAI) Build
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+GenAI-Only Build
+----------------
 
 By default, the CUDA build of FBGEMM_GPU includes all experimental modules that
 are used for GenAI applications.  The instructions for building just the
@@ -651,7 +668,7 @@ Note that currently, only CUDA is supported for the experimental modules.
 .. _fbgemm-gpu.build.process.rocm:
 
 ROCm Build
-~~~~~~~~~~
+----------
 
 For ROCm builds, ``ROCM_PATH`` and ``PYTORCH_ROCM_ARCH`` need to be specified.
 The presence of a ROCm device, however, is not required for building
@@ -688,13 +705,13 @@ presuming the toolchains have been properly installed.
       -DCMAKE_CXX_FLAGS="-DTORCH_USE_HIP_DSA"
 
 Post-Build Checks (For Developers)
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+----------------------------------
 
 After the build completes, it is useful to run some checks that verify
 that the build is actually correct.
 
 Undefined Symbols Check
-^^^^^^^^^^^^^^^^^^^^^^^
+~~~~~~~~~~~~~~~~~~~~~~~
 
 Because FBGEMM_GPU contains a lot of Jinja and C++ template instantiations, it
 is important to make sure that there are no undefined symbols that are
@@ -711,7 +728,7 @@ accidentally generated over the course of development:
   nm -gDCu "${fbgemm_gpu_lib_path}" | sort
 
 GLIBC Version Compatibility Check
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 It is also useful to verify that the version numbers of GLIBCXX
 referenced as well as the availability of certain function symbols:

diff --git a/netlify.toml b/netlify.toml
@@ -12,6 +12,10 @@
     export BUILD_ENV=build_docs
     . ../../.github/scripts/setup_env.bash
 
+    # Print system info
+    print_exec uname -a
+    print_exec ldd --version
+
     # Set up Conda environment
     setup_miniconda             $HOME/miniconda
     create_conda_environment    $BUILD_ENV 3.12