diff --git a/CMakeLists.txt b/CMakeLists.txt index 1128a453..1f59ea3c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -146,17 +146,21 @@ endif() ################################################################################ option(QV_GPU_SUPPORT "Toggle GPU support" ON) -message(CHECK_START "Determining desired GPU support level") if(QV_GPU_SUPPORT) - message(CHECK_PASS "enabled") -else() - message(CHECK_PASS "disabled") + # TODO(skg) Set minimum required version? + find_package(CUDAToolkit) + find_package(ROCmSMI) + if (NOT (CUDAToolkit_FOUND OR ROCmSMI_FOUND)) + set(QV_GPU_SUPPORT OFF) + message(STATUS "Turning off GPU support: no suitable support found") + endif() endif() +message(CHECK_START "Determining GPU support level") if(QV_GPU_SUPPORT) - # TODO(skg) Set minimum required version? - find_package(CUDAToolkit) - find_package(ROCm) + message(CHECK_PASS "enabled") +else() + message(CHECK_PASS "disabled") endif() # Specify default build type diff --git a/cmake/FindROCm.cmake b/cmake/FindROCm.cmake deleted file mode 100644 index 3bfb9aa9..00000000 --- a/cmake/FindROCm.cmake +++ /dev/null @@ -1,74 +0,0 @@ -# -# Copyright (c) 2022 Triad National Security, LLC -# All rights reserved. -# -# This file is part of the quo-vadis project. See the LICENSE file at the -# top-level directory of this distribution. -# - -find_path( - ROCM_HOME - NAMES - bin/rocm-smi - HINTS - /opt/rocm -) - -find_path( - ROCM_SMI_INCLUDE_DIR - NAMES - rocm_smi/rocm_smi.h - HINTS - "${ROCM_HOME}/rocm_smi/include" -) - -find_path( - ROCM_OPENCL_INCLUDE_DIR - NAMES - CL/cl.h - HINTS - "${ROCM_HOME}/opencl/include" -) - -find_library( - ROCM_LIBRARY - NAMES - rocm_smi64 - HINTS - "${ROCM_HOME}/lib" -) - -include(FindPackageHandleStandardArgs) -# Handle the QUIETLY and REQUIRED arguments and set ROCM_FOUND to TRUE if all -# listed variables are TRUE -find_package_handle_standard_args( - ROCm - DEFAULT_MSG - ROCM_HOME ROCM_SMI_INCLUDE_DIR ROCM_OPENCL_INCLUDE_DIR ROCM_LIBRARY -) - -mark_as_advanced( - ROCM_HOME - ROCM_SMI_INCLUDE_DIR - ROCM_OPENCL_INCLUDE_DIR - ROCM_LIBRARY -) - -set(ROCM_HOME "${ROCM_HOME}") -set(ROCM_LIBRARIES "${ROCM_LIBRARY}") -set(ROCM_INCLUDE_DIRS "${ROCM_SMI_INCLUDE_DIR}") -list(APPEND ROCM_INCLUDE_DIRS "${ROCM_OPENCL_INCLUDE_DIR}") - -add_library( - ROCm - SHARED IMPORTED GLOBAL -) - -set_target_properties( - ROCm - PROPERTIES - IMPORTED_LOCATION "${ROCM_LIBRARIES}" - INTERFACE_INCLUDE_DIRECTORIES "${ROCM_INCLUDE_DIRS}" -) - -# vim: ts=4 sts=4 sw=4 expandtab diff --git a/cmake/FindROCmSMI.cmake b/cmake/FindROCmSMI.cmake new file mode 100644 index 00000000..dfe74ab4 --- /dev/null +++ b/cmake/FindROCmSMI.cmake @@ -0,0 +1,86 @@ +# +# Copyright (c) 2022-2024 Triad National Security, LLC +# All rights reserved. +# +# This file is part of the quo-vadis project. See the LICENSE file at the +# top-level directory of this distribution. +# + +find_path( + ROCmSMI_HOME + NAMES + bin/rocm-smi + HINTS + /opt/rocm +) + +find_path( + ROCmSMI_INCLUDE_DIR + NAMES + rocm_smi/rocm_smi.h + HINTS + "${ROCmSMI_HOME}/rocm_smi/include" +) + +find_library( + ROCmSMI_LIBRARY + NAMES + rocm_smi64 + HINTS + "${ROCmSMI_HOME}/lib" +) + +include(FindPackageHandleStandardArgs) +# Handle the QUIETLY and REQUIRED arguments and set ROCmSMI_FOUND to TRUE if all +# listed variables are TRUE +find_package_handle_standard_args( + ROCmSMI + DEFAULT_MSG + ROCmSMI_HOME ROCmSMI_INCLUDE_DIR ROCmSMI_LIBRARY +) + +if (ROCmSMI_FOUND) + # Force test compile of code every time. + unset(QV_ROCmSMI_COMPILES CACHE) + # Some versions of ROCm SMI are broken, so test here. + try_compile( + QV_ROCmSMI_COMPILES + "${PROJECT_BINARY_DIR}/QVTryCompile" + SOURCES + "${CMAKE_SOURCE_DIR}/cmake/cmake-try-compile-rocm-smi.c" + COMPILE_DEFINITIONS + "-I${ROCmSMI_INCLUDE_DIR}" + LINK_LIBRARIES + "${ROCmSMI_LIBRARY}" + ) + if (QV_ROCmSMI_COMPILES) + message(STATUS "Found usable ROCm SMI: ${ROCmSMI_INCLUDE_DIR}") + else() + message(STATUS "Could not find a usable ROCm SMI") + set(ROCmSMI_FOUND FALSE) + endif() +endif() + +mark_as_advanced( + ROCmSMI_HOME + ROCmSMI_INCLUDE_DIR + ROCmSMI_LIBRARY +) + +set(ROCmSMI_HOME "${ROCmSMI_HOME}") +set(ROCmSMI_LIBRARIES "${ROCmSMI_LIBRARY}") +set(ROCmSMI_INCLUDE_DIRS "${ROCmSMI_INCLUDE_DIR}") + +add_library( + ROCmSMI + SHARED IMPORTED GLOBAL +) + +set_target_properties( + ROCmSMI + PROPERTIES + IMPORTED_LOCATION "${ROCmSMI_LIBRARIES}" + INTERFACE_INCLUDE_DIRECTORIES "${ROCmSMI_INCLUDE_DIRS}" +) + +# vim: ts=4 sts=4 sw=4 expandtab diff --git a/cmake/QVhwloc.cmake b/cmake/QVhwloc.cmake index d10027d7..afa0c86e 100644 --- a/cmake/QVhwloc.cmake +++ b/cmake/QVhwloc.cmake @@ -52,13 +52,10 @@ else() list(APPEND QVI_HWLOC_GPU_FLAGS "--enable-nvml=no") endif() -if(ROCM_FOUND AND QV_GPU_SUPPORT) +if(ROCmSMI_FOUND AND QV_GPU_SUPPORT) list(APPEND QVI_HWLOC_GPU_FLAGS "--enable-rsmi") - set( - QVI_HWLOC_CPPFLAGS - "-I${ROCM_SMI_INCLUDE_DIR} -I${ROCM_OPENCL_INCLUDE_DIR}" - ) - set(QVI_HWLOC_LDFLAGS "-L${ROCM_HOME}/lib") + set(QVI_HWLOC_CPPFLAGS "-I${ROCmSMI_INCLUDE_DIRS}") + set(QVI_HWLOC_LDFLAGS "${ROCmSMI_LIBRARIES}") list(APPEND QVI_HWLOC_CONFIG_VARS "CPPFLAGS=${QVI_HWLOC_CPPFLAGS}") list(APPEND QVI_HWLOC_CONFIG_VARS "LDFLAGS=${QVI_HWLOC_LDFLAGS}") set(PCIACCESS_NEEDED TRUE) @@ -140,11 +137,11 @@ if(CUDAToolkit_FOUND AND QV_GPU_SUPPORT) ) endif() -if(ROCM_FOUND AND QV_GPU_SUPPORT) +if(ROCmSMI_FOUND AND QV_GPU_SUPPORT) target_link_libraries( hwloc INTERFACE - ROCm + ROCmSMI ) endif() diff --git a/cmake/cmake-try-compile-rocm-smi.c b/cmake/cmake-try-compile-rocm-smi.c new file mode 100644 index 00000000..468ceb25 --- /dev/null +++ b/cmake/cmake-try-compile-rocm-smi.c @@ -0,0 +1,7 @@ +#include "rocm_smi/rocm_smi.h" + +int main(void) +{ + rsmi_status_t rsmi_rc = rsmi_init(0); + return 0; +} diff --git a/config.h.in b/config.h.in index 3bfb63e4..363feba5 100644 --- a/config.h.in +++ b/config.h.in @@ -67,7 +67,7 @@ #cmakedefine QV_GPU_SUPPORT #cmakedefine CUDAToolkit_FOUND -#cmakedefine ROCM_FOUND +#cmakedefine ROCmSMI_FOUND #endif diff --git a/src/qvi-hwloc.cc b/src/qvi-hwloc.cc index c6556657..ef57446a 100644 --- a/src/qvi-hwloc.cc +++ b/src/qvi-hwloc.cc @@ -496,6 +496,9 @@ static int discover_gpu_devices( qvi_hwloc_t *hwl ) { +#ifndef QV_GPU_SUPPORT + return QV_SUCCESS; +#endif // This will maintain a mapping of PCI bus ID to device pointers. qvi_hwloc_dev_map_t devmap; diff --git a/src/qvi-rsmi.cc b/src/qvi-rsmi.cc index 1591f87a..4c88c119 100644 --- a/src/qvi-rsmi.cc +++ b/src/qvi-rsmi.cc @@ -14,7 +14,7 @@ #include "qvi-rsmi.h" #include "qvi-hwloc.h" -#ifdef ROCM_FOUND +#ifdef ROCmSMI_FOUND #include "rocm_smi/rocm_smi.h" #include "hwloc/rsmi.h" #endif @@ -25,7 +25,7 @@ qvi_hwloc_rsmi_get_device_cpuset_by_device_id( uint32_t devid, hwloc_cpuset_t cpuset ) { -#ifndef ROCM_FOUND +#ifndef ROCmSMI_FOUND QVI_UNUSED(hwl); QVI_UNUSED(devid); QVI_UNUSED(cpuset);