Skip to content

Commit

Permalink
Merge pull request #1245 from nrspruit/enable_relaxed_alloc
Browse files Browse the repository at this point in the history
[L0] Only Override max allocation limits given env
  • Loading branch information
kbenzie authored Jan 17, 2024
2 parents 8007b22 + 0ebaca6 commit 5d58871
Show file tree
Hide file tree
Showing 4 changed files with 16 additions and 20 deletions.
26 changes: 11 additions & 15 deletions source/adapters/level_zero/device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

#include "device.hpp"
#include "ur_level_zero.hpp"
#include "ur_util.hpp"
#include <algorithm>
#include <climits>
#include <optional>
Expand Down Expand Up @@ -268,9 +269,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(
return ReturnValue(uint32_t{64});
}
case UR_DEVICE_INFO_MAX_MEM_ALLOC_SIZE:
// if not optimized for 32-bit access, return total memory size.
// otherwise, return only maximum allocatable size.
if (Device->useOptimized32bitAccess() == 0) {
// if the user wishes to allocate large allocations on a system that usually
// does not allow that allocation size, then we return the max global mem
// size as the limit.
if (Device->useRelaxedAllocationLimits()) {
return ReturnValue(uint64_t{calculateGlobalMemSize(Device)});
} else {
return ReturnValue(uint64_t{Device->ZeDeviceProperties->maxMemAllocSize});
Expand Down Expand Up @@ -923,20 +925,14 @@ ur_device_handle_t_::useImmediateCommandLists() {
}
}

int32_t ur_device_handle_t_::useOptimized32bitAccess() {
static const int32_t Optimize32bitAccessMode = [this] {
// If device is Intel(R) Data Center GPU Max,
// use default provided by L0 driver.
// TODO: Use IP versioning to select based on range of devices
if (this->isPVC())
return -1;
const char *UrRet = std::getenv("UR_L0_USE_OPTIMIZED_32BIT_ACCESS");
if (!UrRet)
return 0;
return std::atoi(UrRet);
bool ur_device_handle_t_::useRelaxedAllocationLimits() {
static const bool EnableRelaxedAllocationLimits = [] {
auto UrRet = ur_getenv("UR_L0_ENABLE_RELAXED_ALLOCATION_LIMITS");
const bool RetVal = UrRet ? std::stoi(*UrRet) : 0;
return RetVal;
}();

return Optimize32bitAccessMode;
return EnableRelaxedAllocationLimits;
}

ur_result_t ur_device_handle_t_::initialize(int SubSubDeviceOrdinal,
Expand Down
2 changes: 1 addition & 1 deletion source/adapters/level_zero/device.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ struct ur_device_handle_t_ : _ur_object {
// provide support for only one, like for Intel(R)
// Data Center GPU Max, for which L0 driver only
// supports stateless.
int32_t useOptimized32bitAccess();
bool useRelaxedAllocationLimits();

bool isSubDevice() { return RootDevice != nullptr; }

Expand Down
4 changes: 2 additions & 2 deletions source/adapters/level_zero/program.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramBuildExp(
ZeBuildOptions += pOptions;
}

if (phDevices[0]->useOptimized32bitAccess() == 0) {
if (phDevices[0]->useRelaxedAllocationLimits()) {
ZeBuildOptions += " -ze-opt-greater-than-4GB-buffer-required";
}

Expand Down Expand Up @@ -256,7 +256,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urProgramCompile(
// ze-opt-greater-than-4GB-buffer-required to disable
// stateful optimizations and be able to use larger than
// 4GB allocations on these kernels.
if (Context->Devices[0]->useOptimized32bitAccess() == 0) {
if (Context->Devices[0]->useRelaxedAllocationLimits()) {
Program->BuildFlags += " -ze-opt-greater-than-4GB-buffer-required";
}
}
Expand Down
4 changes: 2 additions & 2 deletions source/adapters/level_zero/usm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -178,11 +178,11 @@ static ur_result_t USMDeviceAllocImpl(void **ResultPtr,
ZeDesc.flags = 0;
ZeDesc.ordinal = 0;

if (Device->useOptimized32bitAccess() == 0 &&
ZeStruct<ze_relaxed_allocation_limits_exp_desc_t> RelaxedDesc;
if (Device->useRelaxedAllocationLimits() &&
(Size > Device->ZeDeviceProperties->maxMemAllocSize)) {
// Tell Level-Zero to accept Size > maxMemAllocSize if
// large allocations are used.
ZeStruct<ze_relaxed_allocation_limits_exp_desc_t> RelaxedDesc;
RelaxedDesc.flags = ZE_RELAXED_ALLOCATION_LIMITS_EXP_FLAG_MAX_SIZE;
ZeDesc.pNext = &RelaxedDesc;
}
Expand Down

0 comments on commit 5d58871

Please sign in to comment.