Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update ROCm SMI cmake and some GPU code. #119

Merged
merged 1 commit into from
Apr 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 11 additions & 7 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -146,17 +146,21 @@ endif()
################################################################################
option(QV_GPU_SUPPORT "Toggle GPU support" ON)

message(CHECK_START "Determining desired GPU support level")
if(QV_GPU_SUPPORT)
message(CHECK_PASS "enabled")
else()
message(CHECK_PASS "disabled")
# TODO(skg) Set minimum required version?
find_package(CUDAToolkit)
find_package(ROCmSMI)
if (NOT (CUDAToolkit_FOUND OR ROCmSMI_FOUND))
set(QV_GPU_SUPPORT OFF)
message(STATUS "Turning off GPU support: no suitable support found")
endif()
endif()

message(CHECK_START "Determining GPU support level")
if(QV_GPU_SUPPORT)
# TODO(skg) Set minimum required version?
find_package(CUDAToolkit)
find_package(ROCm)
message(CHECK_PASS "enabled")
else()
message(CHECK_PASS "disabled")
endif()

# Specify default build type
Expand Down
74 changes: 0 additions & 74 deletions cmake/FindROCm.cmake

This file was deleted.

86 changes: 86 additions & 0 deletions cmake/FindROCmSMI.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
#
# Copyright (c) 2022-2024 Triad National Security, LLC
# All rights reserved.
#
# This file is part of the quo-vadis project. See the LICENSE file at the
# top-level directory of this distribution.
#

find_path(
ROCmSMI_HOME
NAMES
bin/rocm-smi
HINTS
/opt/rocm
)

find_path(
ROCmSMI_INCLUDE_DIR
NAMES
rocm_smi/rocm_smi.h
HINTS
"${ROCmSMI_HOME}/rocm_smi/include"
)

find_library(
ROCmSMI_LIBRARY
NAMES
rocm_smi64
HINTS
"${ROCmSMI_HOME}/lib"
)

include(FindPackageHandleStandardArgs)
# Handle the QUIETLY and REQUIRED arguments and set ROCmSMI_FOUND to TRUE if all
# listed variables are TRUE
find_package_handle_standard_args(
ROCmSMI
DEFAULT_MSG
ROCmSMI_HOME ROCmSMI_INCLUDE_DIR ROCmSMI_LIBRARY
)

if (ROCmSMI_FOUND)
# Force test compile of code every time.
unset(QV_ROCmSMI_COMPILES CACHE)
# Some versions of ROCm SMI are broken, so test here.
try_compile(
QV_ROCmSMI_COMPILES
"${PROJECT_BINARY_DIR}/QVTryCompile"
SOURCES
"${CMAKE_SOURCE_DIR}/cmake/cmake-try-compile-rocm-smi.c"
COMPILE_DEFINITIONS
"-I${ROCmSMI_INCLUDE_DIR}"
LINK_LIBRARIES
"${ROCmSMI_LIBRARY}"
)
if (QV_ROCmSMI_COMPILES)
message(STATUS "Found usable ROCm SMI: ${ROCmSMI_INCLUDE_DIR}")
else()
message(STATUS "Could not find a usable ROCm SMI")
set(ROCmSMI_FOUND FALSE)
endif()
endif()

mark_as_advanced(
ROCmSMI_HOME
ROCmSMI_INCLUDE_DIR
ROCmSMI_LIBRARY
)

set(ROCmSMI_HOME "${ROCmSMI_HOME}")
set(ROCmSMI_LIBRARIES "${ROCmSMI_LIBRARY}")
set(ROCmSMI_INCLUDE_DIRS "${ROCmSMI_INCLUDE_DIR}")

add_library(
ROCmSMI
SHARED IMPORTED GLOBAL
)

set_target_properties(
ROCmSMI
PROPERTIES
IMPORTED_LOCATION "${ROCmSMI_LIBRARIES}"
INTERFACE_INCLUDE_DIRECTORIES "${ROCmSMI_INCLUDE_DIRS}"
)

# vim: ts=4 sts=4 sw=4 expandtab
13 changes: 5 additions & 8 deletions cmake/QVhwloc.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -52,13 +52,10 @@ else()
list(APPEND QVI_HWLOC_GPU_FLAGS "--enable-nvml=no")
endif()

if(ROCM_FOUND AND QV_GPU_SUPPORT)
if(ROCmSMI_FOUND AND QV_GPU_SUPPORT)
list(APPEND QVI_HWLOC_GPU_FLAGS "--enable-rsmi")
set(
QVI_HWLOC_CPPFLAGS
"-I${ROCM_SMI_INCLUDE_DIR} -I${ROCM_OPENCL_INCLUDE_DIR}"
)
set(QVI_HWLOC_LDFLAGS "-L${ROCM_HOME}/lib")
set(QVI_HWLOC_CPPFLAGS "-I${ROCmSMI_INCLUDE_DIRS}")
set(QVI_HWLOC_LDFLAGS "${ROCmSMI_LIBRARIES}")
list(APPEND QVI_HWLOC_CONFIG_VARS "CPPFLAGS=${QVI_HWLOC_CPPFLAGS}")
list(APPEND QVI_HWLOC_CONFIG_VARS "LDFLAGS=${QVI_HWLOC_LDFLAGS}")
set(PCIACCESS_NEEDED TRUE)
Expand Down Expand Up @@ -140,11 +137,11 @@ if(CUDAToolkit_FOUND AND QV_GPU_SUPPORT)
)
endif()

if(ROCM_FOUND AND QV_GPU_SUPPORT)
if(ROCmSMI_FOUND AND QV_GPU_SUPPORT)
target_link_libraries(
hwloc
INTERFACE
ROCm
ROCmSMI
)
endif()

Expand Down
7 changes: 7 additions & 0 deletions cmake/cmake-try-compile-rocm-smi.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#include "rocm_smi/rocm_smi.h"

int main(void)
{
rsmi_status_t rsmi_rc = rsmi_init(0);
return 0;
}
2 changes: 1 addition & 1 deletion config.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@

#cmakedefine QV_GPU_SUPPORT
#cmakedefine CUDAToolkit_FOUND
#cmakedefine ROCM_FOUND
#cmakedefine ROCmSMI_FOUND

#endif

Expand Down
3 changes: 3 additions & 0 deletions src/qvi-hwloc.cc
Original file line number Diff line number Diff line change
Expand Up @@ -496,6 +496,9 @@ static int
discover_gpu_devices(
qvi_hwloc_t *hwl
) {
#ifndef QV_GPU_SUPPORT
return QV_SUCCESS;
#endif
// This will maintain a mapping of PCI bus ID to device pointers.
qvi_hwloc_dev_map_t devmap;

Expand Down
4 changes: 2 additions & 2 deletions src/qvi-rsmi.cc
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
#include "qvi-rsmi.h"
#include "qvi-hwloc.h"

#ifdef ROCM_FOUND
#ifdef ROCmSMI_FOUND
#include "rocm_smi/rocm_smi.h"
#include "hwloc/rsmi.h"
#endif
Expand All @@ -25,7 +25,7 @@ qvi_hwloc_rsmi_get_device_cpuset_by_device_id(
uint32_t devid,
hwloc_cpuset_t cpuset
) {
#ifndef ROCM_FOUND
#ifndef ROCmSMI_FOUND
QVI_UNUSED(hwl);
QVI_UNUSED(devid);
QVI_UNUSED(cpuset);
Expand Down
Loading