diff --git a/CMakeLists.txt b/CMakeLists.txt index 35dbe56..5ffe1f5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3,6 +3,20 @@ cmake_minimum_required(VERSION 3.17) # TODO: find a workaround to be able to lower the version requirement project(hwmalloc VERSION 0.3.0 LANGUAGES CXX) +# ------------------------------------------------------------------------------ +# Policies +# ------------------------------------------------------------------------------ +function(set_policy POLICY_NAME POLICY_VALUE) + if(POLICY ${POLICY_NAME}) + cmake_policy(SET ${POLICY_NAME} ${POLICY_VALUE}) + endif() +endfunction() + +set_policy(CMP0028 NEW) # Double colon in target name means ALIAS or IMPORTED target +set_policy(CMP0060 NEW) # Link libraries by full path even in implicit directories +set_policy(CMP0074 NEW) # find_package uses XXX_ROOT vars using PackageName +set_policy(CMP0144 NEW) # find_package allows XXX_ROOT vars using PACKAGENAME Uppercase + # --------------------------------------------------------------------- # CMake setup, C++ version, build type, modules, etc # --------------------------------------------------------------------- diff --git a/include/hwmalloc/detail/pool.hpp b/include/hwmalloc/detail/pool.hpp index e75721d..85fda28 100644 --- a/include/hwmalloc/detail/pool.hpp +++ b/include/hwmalloc/detail/pool.hpp @@ -41,7 +41,8 @@ class pool else if (a.node != expected_numa_node) { numa().free(a); - throw std::runtime_error("could not allocate on requested numa node"); + throw std::runtime_error("could not allocate on requested numa node " + + std::to_string(expected_numa_node)); } return a; } @@ -72,7 +73,7 @@ class pool auto s = std::make_unique(this, hwmalloc::register_memory(*m_context, a.ptr, a.size), a, - hwmalloc::register_device_memory(*m_context, device_ptr, a.size), device_ptr, + hwmalloc::register_device_memory(*m_context, m_device_id, device_ptr, a.size), device_ptr, m_device_id, m_block_size, m_free_stack); m_segments[s.get()] = std::move(s); set_device_id(tmp); diff --git a/include/hwmalloc/detail/region_traits.hpp b/include/hwmalloc/detail/region_traits.hpp index d32d91f..8c0983a 100644 --- a/include/hwmalloc/detail/region_traits.hpp +++ b/include/hwmalloc/detail/region_traits.hpp @@ -39,7 +39,7 @@ struct region_traits #if HWMALLOC_ENABLE_DEVICE using device_region_type = - decltype(hwmalloc::register_device_memory(*((Context*)0), nullptr, 0u)); + decltype(hwmalloc::register_device_memory(*((Context*)0), int(0), nullptr, 0u)); static_assert(!std::is_copy_constructible::value, "device_region is copy constructible"); diff --git a/include/hwmalloc/detail/user_allocation.hpp b/include/hwmalloc/detail/user_allocation.hpp index 3c7a5a2..b9f30bd 100644 --- a/include/hwmalloc/detail/user_allocation.hpp +++ b/include/hwmalloc/detail/user_allocation.hpp @@ -73,19 +73,19 @@ struct user_allocation } #if HWMALLOC_ENABLE_DEVICE - user_allocation(Context* context, void* device_ptr, int /*device_id*/, std::size_t size) + user_allocation(Context* context, void* device_ptr, int device_id, std::size_t size) : m_host_allocation{std::malloc(size), true} , m_region{hwmalloc::register_memory(*context, m_host_allocation.m_ptr, size)} , m_device_region{std::make_unique( - hwmalloc::register_device_memory(*context, device_ptr, size))} + hwmalloc::register_device_memory(*context, device_id, device_ptr, size))} { } - user_allocation(Context* context, void* ptr, void* device_ptr, int /*device_id*/, std::size_t size) + user_allocation(Context* context, void* ptr, void* device_ptr, int device_id, std::size_t size) : m_host_allocation{ptr, false} , m_region{hwmalloc::register_memory(*context, ptr, size)} , m_device_region{std::make_unique( - hwmalloc::register_device_memory(*context, device_ptr, size))} + hwmalloc::register_device_memory(*context, device_id, device_ptr, size))} { } #endif diff --git a/include/hwmalloc/fancy_ptr/void_ptr.hpp b/include/hwmalloc/fancy_ptr/void_ptr.hpp index 98c887a..51cdd1d 100644 --- a/include/hwmalloc/fancy_ptr/void_ptr.hpp +++ b/include/hwmalloc/fancy_ptr/void_ptr.hpp @@ -70,9 +70,7 @@ class hw_void_ptr constexpr VoidPtr get() const noexcept { return m_data.m_ptr; } - auto handle() const noexcept { return m_data.m_handle; } - const auto& handle_ref() const noexcept { return m_data.m_handle; } - auto& handle_ref() noexcept { return m_data.m_handle; } + auto handle() const noexcept { return m_data.m_handle; } #if HWMALLOC_ENABLE_DEVICE constexpr VoidPtr device_ptr() const noexcept { return m_data.m_device_ptr; } diff --git a/include/hwmalloc/register_device.hpp b/include/hwmalloc/register_device.hpp index 44b4211..622081d 100644 --- a/include/hwmalloc/register_device.hpp +++ b/include/hwmalloc/register_device.hpp @@ -18,7 +18,7 @@ namespace detail // default implementation: call normal registration template constexpr auto -register_device_memory(Context&& c, void* ptr, std::size_t size) noexcept( +register_device_memory(Context&& c, int device_id, void* ptr, std::size_t size) noexcept( noexcept(hwmalloc::register_memory(std::forward(c), ptr, size))) -> decltype(hwmalloc::register_memory(std::forward(c), ptr, size)) { @@ -28,11 +28,11 @@ register_device_memory(Context&& c, void* ptr, std::size_t size) noexcept( struct register_device_fn { template - constexpr auto operator()(Context&& c, void* ptr, std::size_t size) const - noexcept(noexcept(register_device_memory(std::forward(c), ptr, size))) - -> decltype(register_device_memory(std::forward(c), ptr, size)) + constexpr auto operator()(Context&& c, int device_id, void* ptr, std::size_t size) const + noexcept(noexcept(register_device_memory(std::forward(c), device_id, ptr, size))) + -> decltype(register_device_memory(std::forward(c), device_id, ptr, size)) { - return register_device_memory(std::forward(c), ptr, size); + return register_device_memory(std::forward(c), device_id, ptr, size); } }; } // namespace detail diff --git a/src/device_cuda.cpp b/src/device_cuda.cpp index fb9544d..2724889 100644 --- a/src/device_cuda.cpp +++ b/src/device_cuda.cpp @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include @@ -48,8 +49,13 @@ device_malloc(std::size_t size) { void* ptr; HWMALLOC_CHECK_CUDA_RESULT(cudaMalloc(&ptr, size)); - HWMALLOC_LOG("allocating", size, "bytes using cudaMalloc on device", get_device_id(), ":", - (std::uintptr_t)ptr); + +#ifdef HWMALLOC_ENABLE_LOGGING + std::stringstream tmp; + tmp << std::right << "0x" << std::setfill('0') << std::setw(12) << std::noshowbase + << std::hex << reinterpret_cast(ptr); + HWMALLOC_LOG("allocating", size, "bytes using cudaMalloc on device", get_device_id(), ":", tmp.str()); +#endif return ptr; } @@ -72,6 +78,7 @@ memcpy_to_device(void* dst, void const* src, std::size_t count) HWMALLOC_CHECK_CUDA_RESULT(cudaEventRecord(done, stream)); HWMALLOC_CHECK_CUDA_RESULT(cudaEventSynchronize(done)); HWMALLOC_CHECK_CUDA_RESULT(cudaEventDestroy(done)); + HWMALLOC_CHECK_CUDA_RESULT(cudaStreamDestroy(stream)); } void @@ -86,6 +93,7 @@ memcpy_to_host(void* dst, void const* src, std::size_t count) HWMALLOC_CHECK_CUDA_RESULT(cudaEventRecord(done, stream)); HWMALLOC_CHECK_CUDA_RESULT(cudaEventSynchronize(done)); HWMALLOC_CHECK_CUDA_RESULT(cudaEventDestroy(done)); + HWMALLOC_CHECK_CUDA_RESULT(cudaStreamDestroy(stream)); } } // namespace hwmalloc diff --git a/src/numa.cpp b/src/numa.cpp index c5856fe..db43e85 100644 --- a/src/numa.cpp +++ b/src/numa.cpp @@ -15,6 +15,7 @@ #include #include #include +#include #include #ifdef HWMALLOC_NUMA_THROWS @@ -134,8 +135,12 @@ numa_tools::allocate(size_type num_pages, index_type node) const noexcept auto ptr = numa_alloc_onnode(num_pages * page_size_, node); // fall back to malloc if (!ptr) return allocate_malloc(num_pages); - HWMALLOC_LOG("allocating", num_pages * page_size_, - "bytes using numa_alloc:", (std::uintptr_t)ptr); +#ifdef HWMALLOC_ENABLE_LOGGING + std::stringstream tmp; + tmp << std::right << "0x" << std::setfill('0') << std::setw(12) << std::noshowbase << std::hex + << reinterpret_cast(ptr); + HWMALLOC_LOG("allocating", num_pages * page_size_, "bytes using numa_alloc:", tmp.str()); +#endif return {ptr, num_pages * page_size_, node}; }