Skip to content

Commit

Permalink
Format files
Browse files Browse the repository at this point in the history
  • Loading branch information
msimberg committed Sep 6, 2024
1 parent b9df191 commit a3d6493
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 13 deletions.
8 changes: 6 additions & 2 deletions include/dlaf/memory/memory_chunk.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,15 @@ namespace memory {

namespace internal {
umpire::Allocator& getUmpireHostAllocator();
void initializeUmpireHostAllocator(std::size_t initial_bytes, std::size_t next_bytes, std::size_t alignment_bytes, double coalesce_free_ratio, double coalesce_reallocation_ratio);
void initializeUmpireHostAllocator(std::size_t initial_bytes, std::size_t next_bytes,
std::size_t alignment_bytes, double coalesce_free_ratio,
double coalesce_reallocation_ratio);
void finalizeUmpireHostAllocator();

#ifdef DLAF_WITH_GPU
void initializeUmpireDeviceAllocator(std::size_t initial_bytes, std::size_t next_bytes, std::size_t alignment_bytes, double coalesce_free_ratio, double coalesce_reallocation_ratio);
void initializeUmpireDeviceAllocator(std::size_t initial_bytes, std::size_t next_bytes,
std::size_t alignment_bytes, double coalesce_free_ratio,
double coalesce_reallocation_ratio);
void finalizeUmpireDeviceAllocator();
umpire::Allocator& getUmpireDeviceAllocator();
#endif
Expand Down
12 changes: 10 additions & 2 deletions src/init.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,10 @@ struct Init {
template <>
struct Init<Backend::MC> {
static void initialize(const configuration& cfg) {
memory::internal::initializeUmpireHostAllocator(cfg.umpire_host_memory_pool_initial_bytes, cfg.umpire_host_memory_pool_next_bytes, cfg.umpire_host_memory_pool_alignment_bytes, cfg.umpire_host_memory_pool_coalescing_free_ratio, cfg.umpire_host_memory_pool_coalescing_reallocation_ratio);
memory::internal::initializeUmpireHostAllocator(
cfg.umpire_host_memory_pool_initial_bytes, cfg.umpire_host_memory_pool_next_bytes,
cfg.umpire_host_memory_pool_alignment_bytes, cfg.umpire_host_memory_pool_coalescing_free_ratio,
cfg.umpire_host_memory_pool_coalescing_reallocation_ratio);
}

static void finalize() {
Expand Down Expand Up @@ -104,7 +107,10 @@ template <>
struct Init<Backend::GPU> {
static void initialize(const configuration& cfg) {
const int device = 0;
memory::internal::initializeUmpireDeviceAllocator(cfg.umpire_device_memory_pool_initial_bytes, cfg.umpire_device_memory_pool_initial_bytes, cfg.umpire_device_memory_pool_alignment_bytes, cfg.umpire_host_memory_pool_coalescing_free_ratio, cfg.umpire_host_memory_pool_coalescing_reallocation_ratio);
memory::internal::initializeUmpireDeviceAllocator(
cfg.umpire_device_memory_pool_initial_bytes, cfg.umpire_device_memory_pool_initial_bytes,
cfg.umpire_device_memory_pool_alignment_bytes, cfg.umpire_host_memory_pool_coalescing_free_ratio,
cfg.umpire_host_memory_pool_coalescing_reallocation_ratio);
initializeGpuPool(device, cfg.num_np_gpu_streams_per_thread, cfg.num_hp_gpu_streams_per_thread);
pika::cuda::experimental::detail::register_polling(pika::resource::get_thread_pool("default"));
}
Expand Down Expand Up @@ -195,6 +201,7 @@ void updateConfigurationValue(const pika::program_options::variables_map& vm, T&
}

void updateConfiguration(const pika::program_options::variables_map& vm, configuration& cfg) {
// clang-format off
updateConfigurationValue(vm, cfg.num_np_gpu_streams_per_thread, "NUM_NP_GPU_STREAMS_PER_THREAD", "num-np-gpu-streams-per-thread");
updateConfigurationValue(vm, cfg.num_hp_gpu_streams_per_thread, "NUM_HP_GPU_STREAMS_PER_THREAD", "num-hp-gpu-streams-per-thread");
updateConfigurationValue(vm, cfg.umpire_host_memory_pool_initial_bytes, "UMPIRE_HOST_MEMORY_POOL_INITIAL_BYTES", "umpire-host-memory-pool-initial-bytes");
Expand All @@ -207,6 +214,7 @@ void updateConfiguration(const pika::program_options::variables_map& vm, configu
updateConfigurationValue(vm, cfg.umpire_device_memory_pool_alignment_bytes, "UMPIRE_DEVICE_MEMORY_POOL_ALIGNMENT_BYTES", "umpire-device-memory-pool-alignment-bytes");
updateConfigurationValue(vm, cfg.umpire_device_memory_pool_coalescing_free_ratio, "UMPIRE_DEVICE_MEMORY_POOL_COALESCING_FREE_RATIO", "umpire-device-memory-pool-coalescing-free-ratio");
updateConfigurationValue(vm, cfg.umpire_device_memory_pool_coalescing_reallocation_ratio, "UMPIRE_DEVICE_MEMORY_POOL_COALESCING_REALLOCATION_RATIO", "umpire-device-memory-pool-coalescing-reallocation-ratio");
// clang-format on
cfg.mpi_pool = (pika::resource::pool_exists("mpi")) ? "mpi" : "default";

// Warn if not using MPI pool without --dlaf:no-mpi-pool
Expand Down
26 changes: 17 additions & 9 deletions src/memory/memory_chunk.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,18 +60,22 @@ using CoalesceHeuristicType = umpire::strategy::PoolCoalesceHeuristic<PoolType>;
// this can lead to strange reallocations, so we simply avoid that case. Two or
// more blocks are always coalesced to one block, so no reallocation will
// happen immediately after coalescing two or more blocks.
static CoalesceHeuristicType get_coalesce_heuristic(double coalesce_free_ratio, double coalesce_reallocation_ratio) {
static CoalesceHeuristicType get_coalesce_heuristic(double coalesce_free_ratio,
double coalesce_reallocation_ratio) {
return [=](const PoolType& pool) {
std::size_t threshold = static_cast<std::size_t>(coalesce_free_ratio * pool.getActualSize());
if (pool.getReleasableBlocks() >= 2 && pool.getReleasableSize() >= threshold) {
return static_cast<std::size_t>(coalesce_reallocation_ratio * pool.getActualSize());
} else {
}
else {
return static_cast<std::size_t>(0);
}
};
}

void initializeUmpireHostAllocator(std::size_t initial_bytes, std::size_t next_bytes, std::size_t alignment_bytes, double coalesce_free_ratio, double coalesce_reallocation_ratio) {
void initializeUmpireHostAllocator(std::size_t initial_bytes, std::size_t next_bytes,
std::size_t alignment_bytes, double coalesce_free_ratio,
double coalesce_reallocation_ratio) {
#ifdef DLAF_WITH_GPU
static bool initialized = false;

Expand All @@ -80,9 +84,9 @@ void initializeUmpireHostAllocator(std::size_t initial_bytes, std::size_t next_b
if (!initialized) {
auto host_allocator = umpire::ResourceManager::getInstance().getAllocator("PINNED");
auto pooled_host_allocator =
umpire::ResourceManager::getInstance().makeAllocator<umpire::strategy::QuickPool>("DLAF_PINNED_pool",
host_allocator,
initial_bytes, next_bytes, alignment_bytes, get_coalesce_heuristic(coalesce_free_ratio, coalesce_reallocation_ratio));
umpire::ResourceManager::getInstance().makeAllocator<umpire::strategy::QuickPool>(
"DLAF_PINNED_pool", host_allocator, initial_bytes, next_bytes, alignment_bytes,
get_coalesce_heuristic(coalesce_free_ratio, coalesce_reallocation_ratio));
auto thread_safe_pooled_host_allocator =
umpire::ResourceManager::getInstance().makeAllocator<umpire::strategy::ThreadSafeAllocator>(
"DLAF_PINNED_thread_safe_pool", pooled_host_allocator);
Expand All @@ -92,14 +96,17 @@ void initializeUmpireHostAllocator(std::size_t initial_bytes, std::size_t next_b
initialized = true;
}
#else
dlaf::internal::silenceUnusedWarningFor(initial_bytes, next_bytes, alignment_bytes, coalesce_free_ratio, coalesce_reallocation_ratio);
dlaf::internal::silenceUnusedWarningFor(initial_bytes, next_bytes, alignment_bytes,
coalesce_free_ratio, coalesce_reallocation_ratio);
#endif
}

void finalizeUmpireHostAllocator() {}

#ifdef DLAF_WITH_GPU
void initializeUmpireDeviceAllocator(std::size_t initial_bytes, std::size_t next_bytes, std::size_t alignment_bytes, double coalesce_free_ratio, double coalesce_reallocation_ratio) {
void initializeUmpireDeviceAllocator(std::size_t initial_bytes, std::size_t next_bytes,
std::size_t alignment_bytes, double coalesce_free_ratio,
double coalesce_reallocation_ratio) {
static bool initialized = false;

// Umpire pools cannot be released, so we keep the pools around even when
Expand All @@ -108,7 +115,8 @@ void initializeUmpireDeviceAllocator(std::size_t initial_bytes, std::size_t next
auto device_allocator = umpire::ResourceManager::getInstance().getAllocator("DEVICE");
auto pooled_device_allocator =
umpire::ResourceManager::getInstance().makeAllocator<umpire::strategy::QuickPool>(
"DLAF_DEVICE_pool", device_allocator, initial_bytes, next_bytes, alignment_bytes, get_coalesce_heuristic(coalesce_free_ratio, coalesce_reallocation_ratio));
"DLAF_DEVICE_pool", device_allocator, initial_bytes, next_bytes, alignment_bytes,
get_coalesce_heuristic(coalesce_free_ratio, coalesce_reallocation_ratio));
auto thread_safe_pooled_device_allocator =
umpire::ResourceManager::getInstance().makeAllocator<umpire::strategy::ThreadSafeAllocator>(
"DLAF_DEVICE_thread_safe_pool", pooled_device_allocator);
Expand Down

0 comments on commit a3d6493

Please sign in to comment.