Skip to content

Commit

Permalink
5.5 cherry pick (#234)
Browse files Browse the repository at this point in the history
* HIP SDK spaces support, plus previous benchmark fixes (#225)

* HIP SDK spaces support, plus previous benchmark fixes

* Better fix for Windows

* Update cmake_path function for rmake.py

* Update copyright

* Fix toolchain-windows.cmake for HIP SDK (#227)

* HIP SDK spaces support, plus previous benchmark fixes

* Better fix for Windows

* Update cmake_path function for rmake.py

* Update copyright

* Cleanup toolchain-windows.cmake, compatibility with HIP SDK

* Take python3 by default

* Fix out of bounds access in test (#230)

* fix invalid seed_value computation

* Merge branch '156-test_hipcub_block_run_length_decode-cpp-out-of-bounds-memory-access' into 'develop_stream'

Resolve "test_hipcub_block_run_length_decode.cpp out of bounds memory access"

Closes #156

See merge request amd/libraries/hipCUB!138

---------

Co-authored-by: Robin Voetter <robin@streamhpc.com>

* Restore test executables in tests package (#231)

* Fix missing copyright header (#232)

* Make googlebenchmark not a shared library anymore (#233)

* Update changelog for 5.5 cherry picks

---------

Co-authored-by: Nol Moonen <nolmoonen@hotmail.nl>
Co-authored-by: Robin Voetter <robin@streamhpc.com>
Co-authored-by: Lauren Wrubleski <Lauren.Wrubleski@amd.com>
Co-authored-by: Cory Bloor <Cordell.Bloor@amd.com>
  • Loading branch information
5 people authored Mar 6, 2023
1 parent ff5afa9 commit 2b54a56
Show file tree
Hide file tree
Showing 10 changed files with 78 additions and 35 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ See README.md on how to build the hipCUB documentation using Doxygen.
### Changed
- CUB backend references CUB and Thrust version 1.17.2.
- Improved benchmark coverage of `BlockScan` by adding `ExclusiveScan`, benchmark coverage of `BlockRadixSort` by adding `SortBlockedToStriped`, and benchmark coverage of `WarpScan` by adding `Broadcast`.
### Fixed
- Windows HIP SDK support
### Known Issues
- `BlockRadixRankMatch` is currently broken under the rocPRIM backend.
- `BlockRadixRankMatch` with a warp size that does not exactly divide the block size is broken under the CUB backend.
Expand Down
11 changes: 6 additions & 5 deletions benchmark/benchmark_utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,16 +43,17 @@

namespace benchmark_utils
{

const size_t default_max_random_size = 1024 * 1024;
// get_random_data() generates only part of sequence and replicates it,
// because benchmarks usually do not need "true" random sequence.
template<class T>
inline auto get_random_data(size_t size, T min, T max, size_t max_random_size = 1024 * 1024)
inline auto get_random_data(size_t size, T min, T max, size_t max_random_size = default_max_random_size)
-> typename std::enable_if<std::is_integral<T>::value, std::vector<T>>::type
{
std::random_device rd;
std::default_random_engine gen(rd());
std::uniform_int_distribution<T> distribution(min, max);
using distribution_type = typename std::conditional<(sizeof(T)==1), short, T>::type;
std::uniform_int_distribution<distribution_type> distribution(min, max);
std::vector<T> data(size);
std::generate(
data.begin(), data.begin() + std::min(size, max_random_size),
Expand All @@ -66,7 +67,7 @@ inline auto get_random_data(size_t size, T min, T max, size_t max_random_size =
}

template<class T>
inline auto get_random_data(size_t size, T min, T max, size_t max_random_size = 1024 * 1024)
inline auto get_random_data(size_t size, T min, T max, size_t max_random_size = default_max_random_size)
-> typename std::enable_if<std::is_floating_point<T>::value, std::vector<T>>::type
{
std::random_device rd;
Expand All @@ -85,7 +86,7 @@ inline auto get_random_data(size_t size, T min, T max, size_t max_random_size =
}

template<class T>
inline std::vector<T> get_random_data01(size_t size, float p, size_t max_random_size = 1024 * 1024)
inline std::vector<T> get_random_data01(size_t size, float p, size_t max_random_size = default_max_random_size)
{
std::random_device rd;
std::default_random_engine gen(rd());
Expand Down
7 changes: 7 additions & 0 deletions benchmark/cmdparser.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,13 @@ namespace cli {
return std::stoul(elements[0]);
}

static unsigned long long parse(const std::vector<std::string>& elements, const unsigned long long&) {
if (elements.size() != 1)
throw std::bad_cast();

return std::stoull(elements[0]);
}

static long parse(const std::vector<std::string>& elements, const long&) {
if (elements.size() != 1)
throw std::bad_cast();
Expand Down
1 change: 1 addition & 0 deletions benchmark/common_benchmark_header.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
#include <limits>
#include <cmath>
#include <cstdlib>
#include <numeric>

// Google Benchmark
#include "benchmark/benchmark.h"
Expand Down
8 changes: 6 additions & 2 deletions cmake/Dependencies.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,11 @@ if(BUILD_BENCHMARK)
set(GOOGLEBENCHMARK_ROOT ${CMAKE_CURRENT_BINARY_DIR}/deps/googlebenchmark CACHE PATH "")
if(NOT (CMAKE_CXX_COMPILER_ID STREQUAL "GNU"))
# hip-clang cannot compile googlebenchmark for some reason
set(COMPILER_OVERRIDE "-DCMAKE_CXX_COMPILER=g++")
if(WIN32)
set(COMPILER_OVERRIDE "-DCMAKE_CXX_COMPILER=cl")
else()
set(COMPILER_OVERRIDE "-DCMAKE_CXX_COMPILER=g++")
endif()
endif()

download_project(
Expand All @@ -191,7 +195,7 @@ if(BUILD_BENCHMARK)
GIT_TAG v1.6.1
GIT_SHALLOW TRUE
INSTALL_DIR ${GOOGLEBENCHMARK_ROOT}
CMAKE_ARGS -DCMAKE_BUILD_TYPE=RELEASE -DBENCHMARK_ENABLE_TESTING=OFF -DBUILD_SHARED_LIBS=ON -DCMAKE_INSTALL_PREFIX=<INSTALL_DIR> ${COMPILER_OVERRIDE}
CMAKE_ARGS -DCMAKE_BUILD_TYPE=RELEASE -DBENCHMARK_ENABLE_TESTING=OFF -DBUILD_SHARED_LIBS=OFF -DCMAKE_INSTALL_PREFIX=<INSTALL_DIR> -DCMAKE_CXX_STANDARD=14 ${COMPILER_OVERRIDE}
LOG_DOWNLOAD TRUE
LOG_CONFIGURE TRUE
LOG_BUILD TRUE
Expand Down
29 changes: 29 additions & 0 deletions examples/example_utils.hpp
Original file line number Diff line number Diff line change
@@ -1,3 +1,32 @@
/******************************************************************************
* Copyright (c) 2011, Duane Merrill. All rights reserved.
* Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved.
* Modifications Copyright (c) 2021-2023, Advanced Micro Devices, Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of the NVIDIA CORPORATION nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE FOR ANY
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
******************************************************************************/

#ifndef EXAMPLES_EXAMPLE_UTILS_HPP
#define EXAMPLES_EXAMPLE_UTILS_HPP
#include "mersenne.h"
Expand Down
14 changes: 11 additions & 3 deletions rmake.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#!/usr/bin/python3
"""Copyright 2020-2021 Advanced Micro Devices, Inc.
"""Copyright 2020-2023 Advanced Micro Devices, Inc.
Manage build and installation"""

import re
Expand Down Expand Up @@ -72,6 +72,12 @@ def delete_dir(dir_path) :
linux_path = pathlib.Path(dir_path).absolute()
#print( linux_path )
run_cmd( "rm" , f"-rf {linux_path}")

def cmake_path(os_path):
if OS_info["ID"] == "windows":
return os_path.replace("\\", "/")
else:
return os.path.realpath(os_path)

def config_cmd():
global args
Expand All @@ -85,7 +91,9 @@ def config_cmd():
cmake_platform_opts = []
if (OS_info["ID"] == 'windows'):
# we don't have ROCM on windows but have hip, ROCM can be downloaded if required
rocm_path = os.getenv( 'ROCM_PATH', "C:/hipsdk/rocm-cmake-master") #C:/hip") # rocm/Utils/cmake-rocm4.2.0"
# CMAKE_PREFIX_PATH set to rocm_path and HIP_PATH set BY SDK Installer
raw_rocm_path = cmake_path(os.getenv('HIP_PATH', "C:/hip"))
rocm_path = f'"{raw_rocm_path}"' # guard against spaces in path
cmake_executable = "cmake.exe"
toolchain = os.path.join( src_path, "toolchain-windows.cmake" )
#set CPACK_PACKAGING_INSTALL_PREFIX= defined as blank as it is appended to end of path for archive creation
Expand Down Expand Up @@ -143,7 +151,7 @@ def config_cmd():
cmake_options.append( f"-DROCM_DISABLE_LDCONFIG=ON" )

if args.build_clients:
cmake_options.append( f"-DBUILD_TEST=ON -DBUILD_DIR={build_dir}" )
cmake_options.append( f"-DBUILD_TEST=ON -DBUILD_BENCHMARK=ON -DBUILD_DIR={build_dir}" )

cmake_options.append( f"-DAMDGPU_TARGETS={args.gpu_architecture}" )

Expand Down
1 change: 1 addition & 0 deletions test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ function(add_relative_test test_name test_target)
endif()
file(RELATIVE_PATH rel_path "${CMAKE_CURRENT_BINARY_DIR}" "${EXE_PATH}/${EXE_NAME}")
add_test(NAME "${test_name}" COMMAND "./${rel_path}")
rocm_install(TARGETS ${test_target} COMPONENT tests)
file(APPEND "${INSTALL_TEST_FILE}" "add_test(${test_name} \"../${EXE_NAME}\")\n")
endfunction()

Expand Down
9 changes: 5 additions & 4 deletions test/hipcub/test_hipcub_block_run_length_decode.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
// MIT License
//
// Copyright (c) 2021-2022 Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2021-2023 Advanced Micro Devices, Inc. All rights reserved.
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
Expand Down Expand Up @@ -101,7 +101,7 @@ void block_run_length_decode_kernel(
const unsigned global_thread_idx = BlockSize * hipBlockIdx_x + hipThreadIdx_x;
hipcub::LoadDirectBlocked(global_thread_idx, d_run_items, run_items);
hipcub::LoadDirectBlocked(global_thread_idx, d_run_lengths, run_lengths);

unsigned total_decoded_size{};
BlockRunLengthDecodeT block_run_length_decode(
temp_storage,
Expand Down Expand Up @@ -140,7 +140,8 @@ TYPED_TEST(HipcubBlockRunLengthDecodeTest, TestDecode)

for (size_t seed_index = 0; seed_index < random_seeds_count + seed_size; seed_index++)
{
const unsigned seed_value = seed_index >= random_seeds_count ? seeds[seed_index] : rand();
const unsigned int seed_value
= seed_index < random_seeds_count ? rand() : seeds[seed_index - random_seeds_count];
SCOPED_TRACE(testing::Message() << "with seed= " << seed_value);

const LengthT max_run_length = static_cast<LengthT>(
Expand All @@ -164,7 +165,7 @@ TYPED_TEST(HipcubBlockRunLengthDecodeTest, TestDecode)
std::uniform_int_distribution<size_t> num_empty_runs_dist(1, 4);
const size_t num_trailing_empty_runs = num_empty_runs_dist(prng);
num_runs += num_trailing_empty_runs;

const auto empty_run_items = test_utils::get_random_data<ItemT>(
num_trailing_empty_runs,
std::numeric_limits<ItemT>::min(),
Expand Down
31 changes: 10 additions & 21 deletions toolchain-windows.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -3,47 +3,36 @@
# Ninja doesn't support platform
#set(CMAKE_GENERATOR_PLATFORM x64)

if (DEFINED ENV{HIP_DIR})
if (DEFINED ENV{HIP_PATH})
file(TO_CMAKE_PATH "$ENV{HIP_PATH}" HIP_DIR)
set(rocm_bin "${HIP_DIR}/bin")
elseif (DEFINED ENV{HIP_DIR})
file(TO_CMAKE_PATH "$ENV{HIP_DIR}" HIP_DIR)
set(rocm_bin "${HIP_DIR}/bin")
else()
set(HIP_DIR "C:/hip")
set(rocm_bin "C:/hip/bin")
endif()

#set(CMAKE_CXX_COMPILER "${rocm_bin}/hipcc.bat")
#set(CMAKE_C_COMPILER "${rocm_bin}/hipcc.bat")
set(CMAKE_CXX_COMPILER "${rocm_bin}/clang++.exe")
set(CMAKE_C_COMPILER "${rocm_bin}/clang.exe")

#set(CMAKE_CXX_LINKER "${rocm_bin}/hipcc.bat" )

# TODO remove, just to speed up slow cmake
set(CMAKE_C_COMPILER_WORKS 1)
set(CMAKE_CXX_COMPILER_WORKS 1)
#
if (NOT python)
set(python "python3") # take default for windows
endif()

#set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -IC:/hip/include -IC:/hip/lib/clang/12.0.0 -DWIN32 -D_CRT_SECURE_NO_WARNINGS")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -I${HIP_DIR}/include -DWIN32 -D_CRT_SECURE_NO_WARNINGS")
# our usage flags
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DWIN32 -D_CRT_SECURE_NO_WARNINGS")

# flags for clang direct use
#set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14 -fms-extensions -fms-compatibility")
# -Wno-ignored-attributes to avoid warning: __declspec attribute 'dllexport' is not supported [-Wignored-attributes] which is used by msvc compiler
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14 -fms-extensions -fms-compatibility -Wno-ignored-attributes")

# flags for clang direct use with hip
# -x hip causes linker error
#set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -x hip -IC:/hip/include/hip -D__HIP_PLATFORM_AMD__ -D__HIP_ROCclr__ -DHIP_CLANG_HCC_COMPAT_MODE=1")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -I${HIP_DIR}/include/hip -D__HIP_PLATFORM_AMD__ -D__HIP_ROCclr__ -DHIP_CLANG_HCC_COMPAT_MODE=1")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D__HIP_PLATFORM_AMD__ -D__HIP_ROCclr__ -DHIP_CLANG_HCC_COMPAT_MODE=1")

if (DEFINED ENV{VCPKG_PATH})
file(TO_CMAKE_PATH "$ENV{VCPKG_PATH}" VCPKG_PATH)
else()
set(VCPKG_PATH "C:/github/vcpkg")
endif()
include("${VCPKG_PATH}/scripts/buildsystems/vcpkg.cmake")
# set(GTEST_DIR "C:/rocm/Utils/GTestMSVC")
# set(GTEST_INCLUDE_DIR "${GTEST_DIR}/include")
# set(GTEST_LIBRARY "${GTEST_DIR}/lib/Release/gtest.lib")
# set(GTEST_MAIN_LIBRARY "${GTEST_DIR}/lib/Release/gtest_main.lib")
# set(GTEST_LIBRARIES "${GTEST_DIR}/lib/Release/gtest.lib;${GTEST_DIR}/lib/Release/gtest_main.lib")

0 comments on commit 2b54a56

Please sign in to comment.