From 2b54a56e392c3fa70a555bb7eaa314c8d2f9d851 Mon Sep 17 00:00:00 2001 From: Stanley Tsang Date: Mon, 6 Mar 2023 14:18:52 -0700 Subject: [PATCH] 5.5 cherry pick (#234) * HIP SDK spaces support, plus previous benchmark fixes (#225) * HIP SDK spaces support, plus previous benchmark fixes * Better fix for Windows * Update cmake_path function for rmake.py * Update copyright * Fix toolchain-windows.cmake for HIP SDK (#227) * HIP SDK spaces support, plus previous benchmark fixes * Better fix for Windows * Update cmake_path function for rmake.py * Update copyright * Cleanup toolchain-windows.cmake, compatibility with HIP SDK * Take python3 by default * Fix out of bounds access in test (#230) * fix invalid seed_value computation * Merge branch '156-test_hipcub_block_run_length_decode-cpp-out-of-bounds-memory-access' into 'develop_stream' Resolve "test_hipcub_block_run_length_decode.cpp out of bounds memory access" Closes #156 See merge request amd/libraries/hipCUB!138 --------- Co-authored-by: Robin Voetter * Restore test executables in tests package (#231) * Fix missing copyright header (#232) * Make googlebenchmark not a shared library anymore (#233) * Update changelog for 5.5 cherry picks --------- Co-authored-by: Nol Moonen Co-authored-by: Robin Voetter Co-authored-by: Lauren Wrubleski Co-authored-by: Cory Bloor --- CHANGELOG.md | 2 ++ benchmark/benchmark_utils.hpp | 11 ++++--- benchmark/cmdparser.hpp | 7 +++++ benchmark/common_benchmark_header.hpp | 1 + cmake/Dependencies.cmake | 8 +++-- examples/example_utils.hpp | 29 +++++++++++++++++ rmake.py | 14 +++++++-- test/CMakeLists.txt | 1 + .../test_hipcub_block_run_length_decode.cpp | 9 +++--- toolchain-windows.cmake | 31 ++++++------------- 10 files changed, 78 insertions(+), 35 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a0089d7c..82476a20 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,8 @@ See README.md on how to build the hipCUB documentation using Doxygen. ### Changed - CUB backend references CUB and Thrust version 1.17.2. - Improved benchmark coverage of `BlockScan` by adding `ExclusiveScan`, benchmark coverage of `BlockRadixSort` by adding `SortBlockedToStriped`, and benchmark coverage of `WarpScan` by adding `Broadcast`. +### Fixed +- Windows HIP SDK support ### Known Issues - `BlockRadixRankMatch` is currently broken under the rocPRIM backend. - `BlockRadixRankMatch` with a warp size that does not exactly divide the block size is broken under the CUB backend. diff --git a/benchmark/benchmark_utils.hpp b/benchmark/benchmark_utils.hpp index c7cfd47a..de250f0f 100644 --- a/benchmark/benchmark_utils.hpp +++ b/benchmark/benchmark_utils.hpp @@ -43,16 +43,17 @@ namespace benchmark_utils { - +const size_t default_max_random_size = 1024 * 1024; // get_random_data() generates only part of sequence and replicates it, // because benchmarks usually do not need "true" random sequence. template -inline auto get_random_data(size_t size, T min, T max, size_t max_random_size = 1024 * 1024) +inline auto get_random_data(size_t size, T min, T max, size_t max_random_size = default_max_random_size) -> typename std::enable_if::value, std::vector>::type { std::random_device rd; std::default_random_engine gen(rd()); - std::uniform_int_distribution distribution(min, max); + using distribution_type = typename std::conditional<(sizeof(T)==1), short, T>::type; + std::uniform_int_distribution distribution(min, max); std::vector data(size); std::generate( data.begin(), data.begin() + std::min(size, max_random_size), @@ -66,7 +67,7 @@ inline auto get_random_data(size_t size, T min, T max, size_t max_random_size = } template -inline auto get_random_data(size_t size, T min, T max, size_t max_random_size = 1024 * 1024) +inline auto get_random_data(size_t size, T min, T max, size_t max_random_size = default_max_random_size) -> typename std::enable_if::value, std::vector>::type { std::random_device rd; @@ -85,7 +86,7 @@ inline auto get_random_data(size_t size, T min, T max, size_t max_random_size = } template -inline std::vector get_random_data01(size_t size, float p, size_t max_random_size = 1024 * 1024) +inline std::vector get_random_data01(size_t size, float p, size_t max_random_size = default_max_random_size) { std::random_device rd; std::default_random_engine gen(rd()); diff --git a/benchmark/cmdparser.hpp b/benchmark/cmdparser.hpp index 5ffc24f6..6cf82788 100644 --- a/benchmark/cmdparser.hpp +++ b/benchmark/cmdparser.hpp @@ -185,6 +185,13 @@ namespace cli { return std::stoul(elements[0]); } + static unsigned long long parse(const std::vector& elements, const unsigned long long&) { + if (elements.size() != 1) + throw std::bad_cast(); + + return std::stoull(elements[0]); + } + static long parse(const std::vector& elements, const long&) { if (elements.size() != 1) throw std::bad_cast(); diff --git a/benchmark/common_benchmark_header.hpp b/benchmark/common_benchmark_header.hpp index 7327b3f6..6c1c70f0 100644 --- a/benchmark/common_benchmark_header.hpp +++ b/benchmark/common_benchmark_header.hpp @@ -31,6 +31,7 @@ #include #include #include +#include // Google Benchmark #include "benchmark/benchmark.h" diff --git a/cmake/Dependencies.cmake b/cmake/Dependencies.cmake index 2fb885c5..6dc01d49 100644 --- a/cmake/Dependencies.cmake +++ b/cmake/Dependencies.cmake @@ -182,7 +182,11 @@ if(BUILD_BENCHMARK) set(GOOGLEBENCHMARK_ROOT ${CMAKE_CURRENT_BINARY_DIR}/deps/googlebenchmark CACHE PATH "") if(NOT (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")) # hip-clang cannot compile googlebenchmark for some reason - set(COMPILER_OVERRIDE "-DCMAKE_CXX_COMPILER=g++") + if(WIN32) + set(COMPILER_OVERRIDE "-DCMAKE_CXX_COMPILER=cl") + else() + set(COMPILER_OVERRIDE "-DCMAKE_CXX_COMPILER=g++") + endif() endif() download_project( @@ -191,7 +195,7 @@ if(BUILD_BENCHMARK) GIT_TAG v1.6.1 GIT_SHALLOW TRUE INSTALL_DIR ${GOOGLEBENCHMARK_ROOT} - CMAKE_ARGS -DCMAKE_BUILD_TYPE=RELEASE -DBENCHMARK_ENABLE_TESTING=OFF -DBUILD_SHARED_LIBS=ON -DCMAKE_INSTALL_PREFIX= ${COMPILER_OVERRIDE} + CMAKE_ARGS -DCMAKE_BUILD_TYPE=RELEASE -DBENCHMARK_ENABLE_TESTING=OFF -DBUILD_SHARED_LIBS=OFF -DCMAKE_INSTALL_PREFIX= -DCMAKE_CXX_STANDARD=14 ${COMPILER_OVERRIDE} LOG_DOWNLOAD TRUE LOG_CONFIGURE TRUE LOG_BUILD TRUE diff --git a/examples/example_utils.hpp b/examples/example_utils.hpp index 9bbc054e..a8216c70 100644 --- a/examples/example_utils.hpp +++ b/examples/example_utils.hpp @@ -1,3 +1,32 @@ +/****************************************************************************** + * Copyright (c) 2011, Duane Merrill. All rights reserved. + * Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. + * Modifications Copyright (c) 2021-2023, Advanced Micro Devices, Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + ******************************************************************************/ + #ifndef EXAMPLES_EXAMPLE_UTILS_HPP #define EXAMPLES_EXAMPLE_UTILS_HPP #include "mersenne.h" diff --git a/rmake.py b/rmake.py index 9874946f..9a1dce38 100644 --- a/rmake.py +++ b/rmake.py @@ -1,5 +1,5 @@ #!/usr/bin/python3 -"""Copyright 2020-2021 Advanced Micro Devices, Inc. +"""Copyright 2020-2023 Advanced Micro Devices, Inc. Manage build and installation""" import re @@ -72,6 +72,12 @@ def delete_dir(dir_path) : linux_path = pathlib.Path(dir_path).absolute() #print( linux_path ) run_cmd( "rm" , f"-rf {linux_path}") + +def cmake_path(os_path): + if OS_info["ID"] == "windows": + return os_path.replace("\\", "/") + else: + return os.path.realpath(os_path) def config_cmd(): global args @@ -85,7 +91,9 @@ def config_cmd(): cmake_platform_opts = [] if (OS_info["ID"] == 'windows'): # we don't have ROCM on windows but have hip, ROCM can be downloaded if required - rocm_path = os.getenv( 'ROCM_PATH', "C:/hipsdk/rocm-cmake-master") #C:/hip") # rocm/Utils/cmake-rocm4.2.0" + # CMAKE_PREFIX_PATH set to rocm_path and HIP_PATH set BY SDK Installer + raw_rocm_path = cmake_path(os.getenv('HIP_PATH', "C:/hip")) + rocm_path = f'"{raw_rocm_path}"' # guard against spaces in path cmake_executable = "cmake.exe" toolchain = os.path.join( src_path, "toolchain-windows.cmake" ) #set CPACK_PACKAGING_INSTALL_PREFIX= defined as blank as it is appended to end of path for archive creation @@ -143,7 +151,7 @@ def config_cmd(): cmake_options.append( f"-DROCM_DISABLE_LDCONFIG=ON" ) if args.build_clients: - cmake_options.append( f"-DBUILD_TEST=ON -DBUILD_DIR={build_dir}" ) + cmake_options.append( f"-DBUILD_TEST=ON -DBUILD_BENCHMARK=ON -DBUILD_DIR={build_dir}" ) cmake_options.append( f"-DAMDGPU_TARGETS={args.gpu_architecture}" ) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 9f327647..a084b5f2 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -43,6 +43,7 @@ function(add_relative_test test_name test_target) endif() file(RELATIVE_PATH rel_path "${CMAKE_CURRENT_BINARY_DIR}" "${EXE_PATH}/${EXE_NAME}") add_test(NAME "${test_name}" COMMAND "./${rel_path}") + rocm_install(TARGETS ${test_target} COMPONENT tests) file(APPEND "${INSTALL_TEST_FILE}" "add_test(${test_name} \"../${EXE_NAME}\")\n") endfunction() diff --git a/test/hipcub/test_hipcub_block_run_length_decode.cpp b/test/hipcub/test_hipcub_block_run_length_decode.cpp index 2c06843e..adfe98b1 100644 --- a/test/hipcub/test_hipcub_block_run_length_decode.cpp +++ b/test/hipcub/test_hipcub_block_run_length_decode.cpp @@ -1,6 +1,6 @@ // MIT License // -// Copyright (c) 2021-2022 Advanced Micro Devices, Inc. All rights reserved. +// Copyright (c) 2021-2023 Advanced Micro Devices, Inc. All rights reserved. // // Permission is hereby granted, free of charge, to any person obtaining a copy // of this software and associated documentation files (the "Software"), to deal @@ -101,7 +101,7 @@ void block_run_length_decode_kernel( const unsigned global_thread_idx = BlockSize * hipBlockIdx_x + hipThreadIdx_x; hipcub::LoadDirectBlocked(global_thread_idx, d_run_items, run_items); hipcub::LoadDirectBlocked(global_thread_idx, d_run_lengths, run_lengths); - + unsigned total_decoded_size{}; BlockRunLengthDecodeT block_run_length_decode( temp_storage, @@ -140,7 +140,8 @@ TYPED_TEST(HipcubBlockRunLengthDecodeTest, TestDecode) for (size_t seed_index = 0; seed_index < random_seeds_count + seed_size; seed_index++) { - const unsigned seed_value = seed_index >= random_seeds_count ? seeds[seed_index] : rand(); + const unsigned int seed_value + = seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count]; SCOPED_TRACE(testing::Message() << "with seed= " << seed_value); const LengthT max_run_length = static_cast( @@ -164,7 +165,7 @@ TYPED_TEST(HipcubBlockRunLengthDecodeTest, TestDecode) std::uniform_int_distribution num_empty_runs_dist(1, 4); const size_t num_trailing_empty_runs = num_empty_runs_dist(prng); num_runs += num_trailing_empty_runs; - + const auto empty_run_items = test_utils::get_random_data( num_trailing_empty_runs, std::numeric_limits::min(), diff --git a/toolchain-windows.cmake b/toolchain-windows.cmake index 233e074c..b14405ca 100644 --- a/toolchain-windows.cmake +++ b/toolchain-windows.cmake @@ -3,7 +3,10 @@ # Ninja doesn't support platform #set(CMAKE_GENERATOR_PLATFORM x64) -if (DEFINED ENV{HIP_DIR}) +if (DEFINED ENV{HIP_PATH}) + file(TO_CMAKE_PATH "$ENV{HIP_PATH}" HIP_DIR) + set(rocm_bin "${HIP_DIR}/bin") +elseif (DEFINED ENV{HIP_DIR}) file(TO_CMAKE_PATH "$ENV{HIP_DIR}" HIP_DIR) set(rocm_bin "${HIP_DIR}/bin") else() @@ -11,30 +14,21 @@ else() set(rocm_bin "C:/hip/bin") endif() -#set(CMAKE_CXX_COMPILER "${rocm_bin}/hipcc.bat") -#set(CMAKE_C_COMPILER "${rocm_bin}/hipcc.bat") set(CMAKE_CXX_COMPILER "${rocm_bin}/clang++.exe") set(CMAKE_C_COMPILER "${rocm_bin}/clang.exe") -#set(CMAKE_CXX_LINKER "${rocm_bin}/hipcc.bat" ) - -# TODO remove, just to speed up slow cmake -set(CMAKE_C_COMPILER_WORKS 1) -set(CMAKE_CXX_COMPILER_WORKS 1) -# +if (NOT python) + set(python "python3") # take default for windows +endif() -#set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -IC:/hip/include -IC:/hip/lib/clang/12.0.0 -DWIN32 -D_CRT_SECURE_NO_WARNINGS") -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -I${HIP_DIR}/include -DWIN32 -D_CRT_SECURE_NO_WARNINGS") +# our usage flags +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DWIN32 -D_CRT_SECURE_NO_WARNINGS") # flags for clang direct use -#set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14 -fms-extensions -fms-compatibility") # -Wno-ignored-attributes to avoid warning: __declspec attribute 'dllexport' is not supported [-Wignored-attributes] which is used by msvc compiler set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14 -fms-extensions -fms-compatibility -Wno-ignored-attributes") -# flags for clang direct use with hip -# -x hip causes linker error -#set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -x hip -IC:/hip/include/hip -D__HIP_PLATFORM_AMD__ -D__HIP_ROCclr__ -DHIP_CLANG_HCC_COMPAT_MODE=1") -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -I${HIP_DIR}/include/hip -D__HIP_PLATFORM_AMD__ -D__HIP_ROCclr__ -DHIP_CLANG_HCC_COMPAT_MODE=1") +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D__HIP_PLATFORM_AMD__ -D__HIP_ROCclr__ -DHIP_CLANG_HCC_COMPAT_MODE=1") if (DEFINED ENV{VCPKG_PATH}) file(TO_CMAKE_PATH "$ENV{VCPKG_PATH}" VCPKG_PATH) @@ -42,8 +36,3 @@ else() set(VCPKG_PATH "C:/github/vcpkg") endif() include("${VCPKG_PATH}/scripts/buildsystems/vcpkg.cmake") -# set(GTEST_DIR "C:/rocm/Utils/GTestMSVC") -# set(GTEST_INCLUDE_DIR "${GTEST_DIR}/include") -# set(GTEST_LIBRARY "${GTEST_DIR}/lib/Release/gtest.lib") -# set(GTEST_MAIN_LIBRARY "${GTEST_DIR}/lib/Release/gtest_main.lib") -# set(GTEST_LIBRARIES "${GTEST_DIR}/lib/Release/gtest.lib;${GTEST_DIR}/lib/Release/gtest_main.lib")