-
Notifications
You must be signed in to change notification settings - Fork 20
HIP version of asgard #400
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 35 commits
79ab7a9
f70d02b
0e65742
3daf645
e2491dc
4b94cc8
e816103
0d98648
0a78c9e
b397160
15a9d35
b0a81a1
0889a35
3d2f466
119cbb2
b9002c6
d08af1e
3dc539d
ef16b56
a745fbe
e591cd9
770a040
c318c89
cf7c04a
d5f34ca
8c69e57
49f45dc
8cd9e45
2de03a3
c5ed1fd
c79997b
91b0626
acf6305
37ebc25
8909eab
ef3c11b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,4 @@ | ||
cmake_minimum_required (VERSION 3.19) | ||
cmake_minimum_required (VERSION 3.21) | ||
|
||
project (asgard | ||
VERSION 0.3.0 | ||
|
@@ -12,7 +12,7 @@ include (FetchContent) | |
find_package (Git) | ||
|
||
# Define a macro to register new projects. | ||
function (register_project name dir url default_tag) | ||
function (register_project name dir url default_tag make_avail) | ||
set (BUILD_TAG_${dir} ${default_tag} CACHE STRING "Name of the tag to checkout.") | ||
set (BUILD_REPO_${dir} ${url} CACHE STRING "URL of the repo to clone.") | ||
|
||
|
@@ -24,7 +24,9 @@ function (register_project name dir url default_tag) | |
SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/contrib/${dir} | ||
) | ||
|
||
FetchContent_MakeAvailable(${name}) | ||
if (${make_avail}) | ||
FetchContent_MakeAvailable(${name}) | ||
endif() | ||
endfunction () | ||
|
||
# Changes to the current version of kromult should proceed through a pull | ||
|
@@ -33,6 +35,7 @@ register_project (kronmult | |
KRONMULT | ||
https://github.com/project-asgard/kronmult.git | ||
f941819685bbd3026a85145dde286f593683c1f4 | ||
OFF | ||
) | ||
|
||
############################################################################### | ||
|
@@ -78,7 +81,7 @@ option (ASGARD_PROFILE_PERF "enable profiling support for using linux perf" "") | |
option (ASGARD_PROFILE_VALGRIND "enable profiling support for using valgrind" "") | ||
option (ASGARD_GRAPHVIZ_PATH "optional location of bin/ containing dot executable" "") | ||
option (ASGARD_IO_HIGHFIVE "Use the HighFive HDF5 header library for I/O" OFF) | ||
option (ASGARD_USE_CUDA "Optional CUDA support for asgard" OFF) | ||
option (ASGARD_USE_HIP "Optional HIP support for asgard" OFF) | ||
option (ASGARD_USE_OPENMP "Optional openMP support for asgard" ON) | ||
option (ASGARD_USE_MPI "Optional distributed computing support for asgard" OFF) | ||
include(CMakeDependentOption) | ||
|
@@ -136,30 +139,175 @@ if(ASGARD_USE_OPENMP) | |
endif() | ||
endif() | ||
|
||
if(ASGARD_USE_CUDA) | ||
find_package(CUDA 9.0 REQUIRED) # eventually want to remove this - how to set min version with enable_language? | ||
include_directories(${CUDA_INCLUDE_DIRS}) | ||
enable_language(CUDA) | ||
set (CMAKE_CUDA_STANDARD 14) | ||
set (CMAKE_CUDA_STANDARD_REQUIRED ON) | ||
endif() | ||
# convenience flags for which HIP platform has been setup | ||
set(ASGARD_PLATFORM_NVCC 0) | ||
set(ASGARD_PLATFORM_AMD 0) | ||
if(ASGARD_USE_HIP) | ||
# search for HIP and libraries | ||
if(NOT DEFINED HIP_PATH) | ||
if(NOT DEFINED ENV{HIP_PATH}) | ||
set(HIP_PATH "/opt/rocm/hip" CACHE PATH "Path to which HIP has been installed") | ||
else() | ||
set(HIP_PATH $ENV{HIP_PATH} CACHE PATH "Path to which HIP has been installed") | ||
endif() | ||
endif() | ||
|
||
# build component to interface with Ed's kronmult lib | ||
##TODO: link to kronmult as interface library | ||
add_library(kronmult_cuda SHARED src/device/kronmult_cuda.cpp) | ||
if(ASGARD_USE_CUDA) | ||
set_source_files_properties( src/device/kronmult_cuda.cpp PROPERTIES LANGUAGE CUDA ) # no .cu extension | ||
set_target_properties( kronmult_cuda PROPERTIES CUDA_ARCHITECTURES OFF) | ||
set_target_properties( kronmult_cuda PROPERTIES COMPILE_FLAGS "-arch sm_70 -g -lineinfo --ptxas-options=-O3") | ||
set_target_properties( kronmult_cuda PROPERTIES LINK_FLAGS "-Wl,-rpath,${CMAKE_BINARY_DIR}") | ||
# set HIP_CLANG_PATH for potential installs in non-standard locations (such as rocm with spack) | ||
if (NOT DEFINED HIP_CLANG_PATH) | ||
if(NOT DEFINED ENV{HIP_CLANG_PATH}) | ||
set(HIP_CLANG_PATH "${ROCM_PATH}/llvm/bin" CACHE PATH "Path to HIP clang binaries") | ||
else() | ||
set(HIP_CLANG_PATH $ENV{HIP_CLANG_PATH} CACHE PATH "Path to HIP clang binaries") | ||
endif() | ||
endif() | ||
|
||
# note: could probably grab this path directly using hipconfig? | ||
if (NOT DEFINED HIP_CLANG_INCLUDE_PATH) | ||
if(NOT DEFINED ENV{HIP_CLANG_INCLUDE_PATH}) | ||
# probably need a better way to get the compiler version.. this will cause non-existent paths for non-clang compilers | ||
set(HIP_CLANG_INCLUDE_PATH "${HIP_CLANG_PATH}/../lib/clang/${CMAKE_CXX_COMPILER_VERSION}/include" CACHE PATH "Path to HIP clang include directory") | ||
else() | ||
set(HIP_CLANG_INCLUDE_PATH $ENV{HIP_CLANG_INCLUDE_PATH} CACHE PATH "Path to HIP clang include directory") | ||
endif() | ||
endif() | ||
|
||
if(NOT DEFINED HIPBLAS_PATH) | ||
if(NOT DEFINED ENV{HIPBLAS_PATH}) | ||
set(HIPBLAS_PATH "${HIP_PATH}/../hipblas" CACHE PATH "Path to which HIPBLAS has been installed") | ||
else() | ||
set(HIPBLAS_PATH $ENV{HIPBLAS_PATH} CACHE PATH "Path to which HIPBLAS has been installed") | ||
endif() | ||
endif() | ||
|
||
# try to find hipconfig executable which can help detect platforms and include dirs | ||
find_program(ASGARD_HIPCONFIG_PATH hipconfig HINTS "${HIP_PATH}/bin") | ||
if(ASGARD_HIPCONFIG_PATH) | ||
execute_process(COMMAND ${ASGARD_HIPCONFIG_PATH} --platform OUTPUT_VARIABLE ASGARD_HIP_PLATFORM) | ||
elseif(DEFINED ENV{HIP_PLATFORM}) | ||
set(ASGARD_HIP_PLATFORM "$ENV{HIP_PLATFORM}") | ||
else() | ||
message(FATAL_ERROR "Could not determine HIP platform, make sure HIP_PLATFORM is set") | ||
endif() | ||
|
||
message(STATUS "HIP platform has been detected as ${ASGARD_HIP_PLATFORM}") | ||
# hip >= 4.2 is now using "amd" to identify platform | ||
if(ASGARD_HIP_PLATFORM STREQUAL "hcc" OR ASGARD_HIP_PLATFORM STREQUAL "amd") | ||
set(ASGARD_PLATFORM_AMD 1) | ||
# hip <= 4.1 uses "nvcc" to identify nvidia platforms, >= 4.2 uses "nvidia" | ||
elseif(ASGARD_HIP_PLATFORM STREQUAL "nvcc" OR ASGARD_HIP_PLATFORM STREQUAL "nvidia") | ||
set(ASGARD_PLATFORM_NVCC 1) | ||
endif() | ||
|
||
# double check for cuda path since HIP uses it internally | ||
if(ASGARD_PLATFORM_NVCC) | ||
if (NOT DEFINED ENV{CUDA_PATH}) | ||
find_path(ASGARD_HIP_DEFAULT_CUDA_PATH "cuda.h" PATH /usr/local/cuda/include) | ||
if (NOT ASGARD_HIP_DEFAULT_CUDA_PATH) | ||
message(FATAL_ERROR "Make sure the CUDA_PATH env is set to locate for HIP") | ||
endif() | ||
endif() | ||
message(STATUS "Found CUDA_PATH: $ENV{CUDA_PATH}") | ||
endif() | ||
|
||
# look for HIP cmake configs in different locations | ||
list(APPEND CMAKE_MODULE_PATH "${HIP_PATH}/cmake" "${ROCM_PATH}") | ||
if(ASGARD_PLATFORM_AMD) | ||
# note: causes issues on nvidia, but might be needed on amd platforms? | ||
list(APPEND CMAKE_PREFIX_PATH "${HIP_PATH}/lib/cmake" "${ROCM_PATH}") | ||
|
||
# output a warning if compiling for AMD without using amd-clang | ||
if(NOT CMAKE_CXX_COMPILER_ID MATCHES "Clang") | ||
message(WARNING "Compiling HIP for AMD without using AMD clang might not work. Use -DCMAKE_CXX_COMPILER=clang++") | ||
endif() | ||
endif() | ||
list(APPEND CMAKE_PREFIX_PATH "${HIPBLAS_PATH}/lib/cmake") | ||
|
||
set(HIP_VERBOSE_BUILD ON CACHE STRING "Verbose compilation for HIP") | ||
|
||
set(ASGARD_HIP_FLAGS "-std=c++14;-g" CACHE STRING "HIP compiler flags for both AMD and NVIDIA") | ||
set(GPU_ARCH "70" CACHE STRING "AMD/NVIDIA GPU architecture number (such as 906 or 70)") | ||
|
||
if(ASGARD_PLATFORM_AMD) | ||
set(AMDGPU_TARGETS "gfx${GPU_ARCH}" CACHE STRING "GPU target architectures to compile for" FORCE) | ||
set(GPU_TARGETS "gfx${GPU_ARCH}" CACHE STRING "GPU target architectures to compile for" FORCE) | ||
|
||
# need a much later version for AMD since ipiv=nullptr fix not in hipblas until >4.3.1 | ||
find_package(HIP 4.3.0 REQUIRED) | ||
find_package(hipblas 0.49 REQUIRED) | ||
else() | ||
find_package(HIP 4.0 REQUIRED) | ||
find_package(hipblas REQUIRED) | ||
endif() | ||
# Print some debug info about HIP configuration | ||
message(STATUS "HIP PLATFORM: ${HIP_PLATFORM}") | ||
message(STATUS "HIP COMPILER: ${HIP_COMPILER}") | ||
message(STATUS "HIP RUNTIME: ${HIP_RUNTIME}") | ||
message(STATUS "HIP Includes: ${HIP_INCLUDE_DIRS}") | ||
message(STATUS "HIP Libraries: ${HIP_LIBRARIES}") | ||
|
||
if(ASGARD_PLATFORM_NVCC) | ||
find_package(CUDA 9.0 REQUIRED) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we eliminate the old |
||
include_directories(${CUDA_INCLUDE_DIRS}) | ||
enable_language(CUDA) | ||
set (CMAKE_CUDA_STANDARD 14) | ||
set (CMAKE_CUDA_STANDARD_REQUIRED ON) | ||
add_compile_definitions(__HIP_PLATFORM_NVCC__ __HIP_PLATFORM_NVIDIA__) | ||
set(ASGARD_NVCC_FLAGS "${ASGARD_HIP_FLAGS}; -gencode arch=compute_${GPU_ARCH},code=compute_${GPU_ARCH} --ptxas-options=-O3 -lineinfo" CACHE STRING "Flags to pass to NVCC" FORCE) # nvcc specific options | ||
elseif(ASGARD_PLATFORM_AMD) | ||
#enable_language(HIP) # not yet added to latest cmake, but should be available in 3.21 | ||
# these compile definitions should be added automatically if using amd's clang, but | ||
# may not necessarily be added if compiling with gcc or others | ||
add_compile_definitions(__HIP_PLATFORM_HCC__ __HIP_PLATFORM_AMD__) | ||
set(ASGARD_AMD_FLAGS "${ASGARD_HIP_FLAGS}; --amdgpu-target=gfx${GPU_ARCH};-O3" CACHE STRING "Flags to pass to amd-clang for HIP" FORCE) # amdgpu specific options | ||
endif() | ||
|
||
if (hipBLAS_FOUND) | ||
message(STATUS "Found rocBLAS version ${rocBLAS_VERSION}: ${HIPBLAS_INCLUDE_DIRS}") | ||
endif() | ||
|
||
include_directories(SYSTEM ${HIP_INCLUDE_DIRS}) | ||
# assume this include path since HIP_INCLUDE_DIRS is not being set on nvidia platform | ||
include_directories(SYSTEM "${HIP_PATH}/include") | ||
include_directories(${HIPBLAS_INCLUDE_DIRS}) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we use |
||
|
||
# set source file language properties | ||
if(ASGARD_PLATFORM_AMD) | ||
#set_source_files_properties( src/device/kronmult_cuda.cpp PROPERTIES LANGUAGE HIP ) # should work after cmake 3.21 release? | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can we use this now that we require CMake 3.21? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, I believe this should work now but I am not able to test it at the moment since the AMD machine is still down. |
||
set_source_files_properties( src/device/kronmult_cuda.cpp PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1) | ||
elseif(ASGARD_PLATFORM_NVCC) | ||
set_source_files_properties( src/device/kronmult_cuda.cpp PROPERTIES LANGUAGE CUDA ) # no .cu extension | ||
endif() | ||
|
||
# Turn on GPU support in kronmult. | ||
set (USE_GPU ON CACHE BOOL "Turn on kronmult gpu support" FORCE) | ||
endif() | ||
|
||
# Fetch kronmult after configuring everything, but before adding libraries | ||
FetchContent_MakeAvailable(kronmult) | ||
|
||
if(ASGARD_USE_HIP) | ||
set(HIP_HIPCC_FLAGS "${HIP_HIPCC_FLAGS} ${ASGARD_HIP_FLAGS}" CACHE STRING "") | ||
set(HIP_CLANG_FLAGS "${HIP_CLANG_FLAGS} ${ASGARD_AMD_FLAGS}" CACHE STRING "") | ||
set(HIP_NVCC_FLAGS "${HIP_NVCC_FLAGS} ${ASGARD_NVCC_FLAGS}" CACHE STRING "") | ||
|
||
hip_add_library(kronmult_cuda SHARED src/device/kronmult_cuda.cpp | ||
HIPCC_OPTIONS "${ASGARD_HIP_FLAGS}" | ||
NVCC_OPTIONS "${ASGARD_NVCC_FLAGS}" | ||
CLANG_OPTIONS "${ASGARD_AMD_FLAGS}") | ||
if(ASGARD_PLATFORM_NVCC) | ||
set_target_properties( kronmult_cuda PROPERTIES CUDA_ARCHITECTURES OFF) | ||
set_target_properties( kronmult_cuda PROPERTIES COMPILE_FLAGS "-arch sm_${GPU_ARCH} -g -lineinfo --ptxas-options=-O3") | ||
endif() | ||
set_target_properties( kronmult_cuda PROPERTIES LINK_FLAGS "-Wl,-rpath,${CMAKE_BINARY_DIR}") | ||
else() | ||
# build component to interface with Ed's kronmult lib | ||
##TODO: link to kronmult as interface library | ||
add_library(kronmult_cuda SHARED src/device/kronmult_cuda.cpp) | ||
endif() | ||
|
||
if(ASGARD_USE_MKL) | ||
if(ASGARD_USE_CUDA) | ||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --compiler-options -fopenmp") | ||
if(ASGARD_USE_HIP AND ASGARD_PLATFORM_NVCC) | ||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --compiler-options -fopenmp") | ||
else() | ||
target_compile_options (kronmult_cuda PRIVATE "-fopenmp") # CMAKE doesn't handle MKL openmp link properly | ||
if(APPLE) # Need to link against the same openmp library as the MKL. | ||
|
@@ -239,9 +387,7 @@ if (build_hdf5) | |
add_dependencies (io hdf5-ext) | ||
endif () | ||
|
||
if (build_kron) | ||
add_dependencies (kronmult_cuda kronmult-ext) | ||
endif () | ||
add_dependencies (kronmult_cuda kron) | ||
|
||
if (ASGARD_USE_SCALAPACK) | ||
target_link_libraries (tensors PRIVATE scalapack_matrix_info cblacs_grid) | ||
|
@@ -281,7 +427,11 @@ if (ASGARD_USE_OPENMP AND NOT ASGARD_USE_MKL) | |
target_link_libraries(kronmult PRIVATE OpenMP::OpenMP_CXX) | ||
endif () | ||
|
||
target_link_libraries(kronmult_cuda PUBLIC kron) | ||
if (ASGARD_USE_HIP AND ASGARD_PLATFORM_AMD) | ||
target_link_libraries(kronmult_cuda PUBLIC kron hip::device) | ||
else () | ||
target_link_libraries(kronmult_cuda PRIVATE kron) | ||
endif() | ||
|
||
if (ASGARD_USE_OPENMP AND NOT ASGARD_USE_MKL) | ||
target_link_libraries(kronmult_cuda PRIVATE OpenMP::OpenMP_CXX) | ||
|
@@ -302,9 +452,13 @@ else () | |
target_link_libraries (lib_dispatch PRIVATE LINALG::LINALG) | ||
endif () | ||
|
||
if (ASGARD_USE_CUDA) | ||
target_link_libraries(lib_dispatch PRIVATE ${CUDA_LIBRARIES} | ||
${CUDA_CUBLAS_LIBRARIES}) | ||
if (ASGARD_USE_HIP) | ||
if(ASGARD_PLATFORM_AMD) | ||
target_link_libraries(lib_dispatch PRIVATE hip::device) | ||
elseif(ASGARD_PLATFORM_NVCC) | ||
target_link_libraries(lib_dispatch PRIVATE ${CUDA_LIBRARIES}) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does HIP not take care of linking against CUDA? |
||
endif() | ||
target_link_libraries(lib_dispatch PRIVATE roc::hipblas) | ||
endif() | ||
|
||
if (ASGARD_USE_OPENMP AND NOT ASGARD_USE_MKL) | ||
|
@@ -330,8 +484,13 @@ target_link_libraries (quadrature PRIVATE matlab_utilities tensors) | |
target_link_libraries (solver PRIVATE distribution fast_math lib_dispatch tensors) | ||
|
||
target_link_libraries (tensors PRIVATE lib_dispatch) | ||
if (ASGARD_USE_CUDA) | ||
target_link_libraries (tensors PRIVATE ${CUDA_LIBRARIES}) | ||
if (ASGARD_USE_HIP) | ||
if(ASGARD_PLATFORM_AMD) | ||
target_link_libraries(tensors PRIVATE hip::device) | ||
elseif(ASGARD_PLATFORM_NVCC) | ||
target_link_libraries(tensors PRIVATE ${CUDA_LIBRARIES}) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does HIP not take care of linking against CUDA? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I haven't been able to get it to work automatically, but I might be missing something. |
||
endif() | ||
target_link_libraries (tensors PRIVATE roc::hipblas) | ||
endif () | ||
if (ASGARD_USE_SCALAPACK) | ||
add_compile_definitions (ASGARD_USE_SCALAPACK) | ||
|
@@ -432,7 +591,7 @@ if (ASGARD_BUILD_TESTS) | |
target_link_libraries (${component}-tests PRIVATE ${component} MPI::MPI_CXX) | ||
if (${component} IN_LIST mpi_test_components) | ||
set(test_ranks "4") | ||
if (ASGARD_USE_CUDA) | ||
if (ASGARD_USE_HIP) | ||
set(test_ranks "1") | ||
endif () | ||
if (${ASGARD_TESTING_RANKS}) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -864,7 +864,11 @@ void test_batched_gemv(int const m, int const n, int const lda, | |
|
||
batched_gemv(a_batch, x_batch, y_batch, alpha, beta); | ||
|
||
P const tol_factor = 1e-17; | ||
P tol_factor = 1e-17; | ||
if constexpr (resrc == resource::device) | ||
{ | ||
tol_factor = 1e-7; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 😮 |
||
} | ||
for (int i = 0; i < num_batch; ++i) | ||
{ | ||
if constexpr (resrc == resource::host) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Using CUDA through the HIP API is not a great idea at the moment. The biggest issue comes from the use of math-libraries such as cuBlas and rocBlas, where they don't fully mirror or port the capabilities (especially true for sparse calls). Lesser problems (but problems never the less) come from availability and support across platforms, Nvidia based systems do not have universal support for HIP, also optimizations and performance.
HIP and CUDA can sit side by side in the code and have only one flipped on/off. All we need (usually) is to change the abstraction of memory allocation and data movement, as well as the kernels which seldom require any change, i.e., we can use the same kernels, just compile them differently.