Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,20 @@ cmake_minimum_required(VERSION 3.17)
# TODO: find a workaround to be able to lower the version requirement
project(hwmalloc VERSION 0.3.0 LANGUAGES CXX)

# ------------------------------------------------------------------------------
# Policies
# ------------------------------------------------------------------------------
function(set_policy POLICY_NAME POLICY_VALUE)
if(POLICY ${POLICY_NAME})
cmake_policy(SET ${POLICY_NAME} ${POLICY_VALUE})
endif()
endfunction()

set_policy(CMP0028 NEW) # Double colon in target name means ALIAS or IMPORTED target
set_policy(CMP0060 NEW) # Link libraries by full path even in implicit directories
set_policy(CMP0074 NEW) # find_package uses XXX_ROOT vars using PackageName
set_policy(CMP0144 NEW) # find_package allows XXX_ROOT vars using PACKAGENAME Uppercase

# ---------------------------------------------------------------------
# CMake setup, C++ version, build type, modules, etc
# ---------------------------------------------------------------------
Expand Down
5 changes: 3 additions & 2 deletions include/hwmalloc/detail/pool.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,8 @@ class pool
else if (a.node != expected_numa_node)
{
numa().free(a);
throw std::runtime_error("could not allocate on requested numa node");
throw std::runtime_error("could not allocate on requested numa node "
+ std::to_string(expected_numa_node));
}
return a;
}
Expand Down Expand Up @@ -72,7 +73,7 @@ class pool

auto s = std::make_unique<segment_type>(this,
hwmalloc::register_memory(*m_context, a.ptr, a.size), a,
hwmalloc::register_device_memory(*m_context, device_ptr, a.size), device_ptr,
hwmalloc::register_device_memory(*m_context, m_device_id, device_ptr, a.size), device_ptr,
m_device_id, m_block_size, m_free_stack);
m_segments[s.get()] = std::move(s);
set_device_id(tmp);
Expand Down
2 changes: 1 addition & 1 deletion include/hwmalloc/detail/region_traits.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ struct region_traits

#if HWMALLOC_ENABLE_DEVICE
using device_region_type =
decltype(hwmalloc::register_device_memory(*((Context*)0), nullptr, 0u));
decltype(hwmalloc::register_device_memory(*((Context*)0), int(0), nullptr, 0u));

static_assert(!std::is_copy_constructible<device_region_type>::value,
"device_region is copy constructible");
Expand Down
8 changes: 4 additions & 4 deletions include/hwmalloc/detail/user_allocation.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,19 +73,19 @@ struct user_allocation
}

#if HWMALLOC_ENABLE_DEVICE
user_allocation(Context* context, void* device_ptr, int /*device_id*/, std::size_t size)
user_allocation(Context* context, void* device_ptr, int device_id, std::size_t size)
: m_host_allocation{std::malloc(size), true}
, m_region{hwmalloc::register_memory(*context, m_host_allocation.m_ptr, size)}
, m_device_region{std::make_unique<device_region_type>(
hwmalloc::register_device_memory(*context, device_ptr, size))}
hwmalloc::register_device_memory(*context, device_id, device_ptr, size))}
{
}

user_allocation(Context* context, void* ptr, void* device_ptr, int /*device_id*/, std::size_t size)
user_allocation(Context* context, void* ptr, void* device_ptr, int device_id, std::size_t size)
: m_host_allocation{ptr, false}
, m_region{hwmalloc::register_memory(*context, ptr, size)}
, m_device_region{std::make_unique<device_region_type>(
hwmalloc::register_device_memory(*context, device_ptr, size))}
hwmalloc::register_device_memory(*context, device_id, device_ptr, size))}
{
}
#endif
Expand Down
4 changes: 1 addition & 3 deletions include/hwmalloc/fancy_ptr/void_ptr.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,9 +70,7 @@ class hw_void_ptr

constexpr VoidPtr get() const noexcept { return m_data.m_ptr; }

auto handle() const noexcept { return m_data.m_handle; }
const auto& handle_ref() const noexcept { return m_data.m_handle; }
auto& handle_ref() noexcept { return m_data.m_handle; }
auto handle() const noexcept { return m_data.m_handle; }

#if HWMALLOC_ENABLE_DEVICE
constexpr VoidPtr device_ptr() const noexcept { return m_data.m_device_ptr; }
Expand Down
10 changes: 5 additions & 5 deletions include/hwmalloc/register_device.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ namespace detail
// default implementation: call normal registration
template<class Context>
constexpr auto
register_device_memory(Context&& c, void* ptr, std::size_t size) noexcept(
register_device_memory(Context&& c, int device_id, void* ptr, std::size_t size) noexcept(
noexcept(hwmalloc::register_memory(std::forward<Context>(c), ptr, size)))
-> decltype(hwmalloc::register_memory(std::forward<Context>(c), ptr, size))
{
Expand All @@ -28,11 +28,11 @@ register_device_memory(Context&& c, void* ptr, std::size_t size) noexcept(
struct register_device_fn
{
template<typename Context>
constexpr auto operator()(Context&& c, void* ptr, std::size_t size) const
noexcept(noexcept(register_device_memory(std::forward<Context>(c), ptr, size)))
-> decltype(register_device_memory(std::forward<Context>(c), ptr, size))
constexpr auto operator()(Context&& c, int device_id, void* ptr, std::size_t size) const
noexcept(noexcept(register_device_memory(std::forward<Context>(c), device_id, ptr, size)))
-> decltype(register_device_memory(std::forward<Context>(c), device_id, ptr, size))
{
return register_device_memory(std::forward<Context>(c), ptr, size);
return register_device_memory(std::forward<Context>(c), device_id, ptr, size);
}
};
} // namespace detail
Expand Down
12 changes: 10 additions & 2 deletions src/device_cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include <hwmalloc/device.hpp>
#include <hwmalloc/log.hpp>
#include <cstdint>
#include <iomanip>
#include <cuda_runtime.h>
#include <stdexcept>
#include <string>
Expand Down Expand Up @@ -48,8 +49,13 @@ device_malloc(std::size_t size)
{
void* ptr;
HWMALLOC_CHECK_CUDA_RESULT(cudaMalloc(&ptr, size));
HWMALLOC_LOG("allocating", size, "bytes using cudaMalloc on device", get_device_id(), ":",
(std::uintptr_t)ptr);

#ifdef HWMALLOC_ENABLE_LOGGING
std::stringstream tmp;
tmp << std::right << "0x" << std::setfill('0') << std::setw(12) << std::noshowbase
<< std::hex << reinterpret_cast<uintptr_t>(ptr);
HWMALLOC_LOG("allocating", size, "bytes using cudaMalloc on device", get_device_id(), ":", tmp.str());
#endif
return ptr;
}

Expand All @@ -72,6 +78,7 @@ memcpy_to_device(void* dst, void const* src, std::size_t count)
HWMALLOC_CHECK_CUDA_RESULT(cudaEventRecord(done, stream));
HWMALLOC_CHECK_CUDA_RESULT(cudaEventSynchronize(done));
HWMALLOC_CHECK_CUDA_RESULT(cudaEventDestroy(done));
HWMALLOC_CHECK_CUDA_RESULT(cudaStreamDestroy(stream));
}

void
Expand All @@ -86,6 +93,7 @@ memcpy_to_host(void* dst, void const* src, std::size_t count)
HWMALLOC_CHECK_CUDA_RESULT(cudaEventRecord(done, stream));
HWMALLOC_CHECK_CUDA_RESULT(cudaEventSynchronize(done));
HWMALLOC_CHECK_CUDA_RESULT(cudaEventDestroy(done));
HWMALLOC_CHECK_CUDA_RESULT(cudaStreamDestroy(stream));
}

} // namespace hwmalloc
9 changes: 7 additions & 2 deletions src/numa.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#include <algorithm>
#include <cstdlib>
#include <cstdint>
#include <iomanip>
#include <sys/sysinfo.h>

#ifdef HWMALLOC_NUMA_THROWS
Expand Down Expand Up @@ -134,8 +135,12 @@ numa_tools::allocate(size_type num_pages, index_type node) const noexcept
auto ptr = numa_alloc_onnode(num_pages * page_size_, node);
// fall back to malloc
if (!ptr) return allocate_malloc(num_pages);
HWMALLOC_LOG("allocating", num_pages * page_size_,
"bytes using numa_alloc:", (std::uintptr_t)ptr);
#ifdef HWMALLOC_ENABLE_LOGGING
std::stringstream tmp;
tmp << std::right << "0x" << std::setfill('0') << std::setw(12) << std::noshowbase << std::hex
<< reinterpret_cast<uintptr_t>(ptr);
HWMALLOC_LOG("allocating", num_pages * page_size_, "bytes using numa_alloc:", tmp.str());
#endif
return {ptr, num_pages * page_size_, node};
}

Expand Down
Loading