Skip to content

Commit

Permalink
Merge branch 'main' into fix_usm_allocation
Browse files Browse the repository at this point in the history
  • Loading branch information
lbushi25 authored May 2, 2024
2 parents 47e59a8 + f3fb858 commit 23694f8
Show file tree
Hide file tree
Showing 36 changed files with 715 additions and 365 deletions.
12 changes: 9 additions & 3 deletions .github/CODEOWNERS
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,21 @@

# Level Zero adapter
source/adapters/level_zero @oneapi-src/unified-runtime-level-zero-write
test/adapters/level_zero @oneapi-src/unified-runtime-level-zero-write

# CUDA and HIP adapters
source/adapters/cuda @oneapi-src/unified-runtime-cuda-write
test/adapters/cuda @oneapi-src/unified-runtime-cuda-write
source/adapters/hip @oneapi-src/unified-runtime-hip-write
test/adapters/hip @oneapi-src/unified-runtime-hip-write

# OpenCL adapter
source/adapters/opencl @oneapi-src/unified-runtime-opencl-write
test/adapters/opencl @oneapi-src/unified-runtime-opencl-write

# Native CPU adapter
source/adapters/native_cpu @oneapi-src/unified-runtime-native-cpu-write
test/adapters/native_cpu @oneapi-src/unified-runtime-native-cpu-write

# Command-buffer experimental feature
source/adapters/**/command_buffer.* @oneapi-src/unified-runtime-command-buffer-write
Expand All @@ -20,6 +25,7 @@ scripts/core/exp-command-buffer.yml @oneapi-src/unified-runtime-command-buff
test/conformance/exp_command_buffer** @oneapi-src/unified-runtime-command-buffer-write

# Bindless Images experimental feature
scripts/core/EXP-BINDLESS-IMAGES.rst @oneapi-src/unified-runtime-bindless-images-write
scripts/core/exp-bindless-images.yml @oneapi-src/unified-runtime-bindless-images-write
source/adapters/**/image.* @oneapi-src/unified-runtime-bindless-images-write
source/adapters/**/image.* @oneapi-src/unified-runtime-bindless-images-write
scripts/core/EXP-BINDLESS-IMAGES.rst @oneapi-src/unified-runtime-bindless-images-write
scripts/core/exp-bindless-images.yml @oneapi-src/unified-runtime-bindless-images-write
test/conformance/exp_bindless_images** @oneapi-src/unified-runtime-bindless-images-write
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ option(UR_BUILD_EXAMPLE_CODEGEN "Build the codegen example." OFF)
option(VAL_USE_LIBBACKTRACE_BACKTRACE "enable libbacktrace validation backtrace for linux" OFF)
option(UR_ENABLE_ASSERTIONS "Enable assertions for all build types" OFF)
set(UR_DPCXX "" CACHE FILEPATH "Path of the DPC++ compiler executable")
set(UR_DPCXX_BUILD_FLAGS "" CACHE STRING "Build flags to pass to DPC++ when compiling device programs")
set(UR_SYCL_LIBRARY_DIR "" CACHE PATH
"Path of the SYCL runtime library directory")
set(UR_CONFORMANCE_TARGET_TRIPLES "" CACHE STRING
Expand Down
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,7 @@ List of options provided by CMake:
| UR_HIP_PLATFORM | Build HIP adapter for AMD or NVIDIA platform | AMD/NVIDIA | AMD |
| UR_ENABLE_COMGR | Enable comgr lib usage | AMD/NVIDIA | AMD |
| UR_DPCXX | Path of the DPC++ compiler executable to build CTS device binaries | File path | `""` |
| UR_DPCXX_BUILD_FLAGS | Build flags to pass to DPC++ when compiling device programs | Space-separated options list | `""` |
| UR_SYCL_LIBRARY_DIR | Path of the SYCL runtime library directory to build CTS device binaries | Directory path | `""` |
| UR_HIP_ROCM_DIR | Path of the default ROCm HIP installation | Directory path | `/opt/rocm` |
| UR_HIP_INCLUDE_DIR | Path of the ROCm HIP include directory | Directory path | `${UR_HIP_ROCM_DIR}/include` |
Expand Down
404 changes: 208 additions & 196 deletions include/ur_api.h

Large diffs are not rendered by default.

90 changes: 90 additions & 0 deletions include/ur_print.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2553,6 +2553,24 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_device_info_t value) {
case UR_DEVICE_INFO_CUBEMAP_SEAMLESS_FILTERING_SUPPORT_EXP:
os << "UR_DEVICE_INFO_CUBEMAP_SEAMLESS_FILTERING_SUPPORT_EXP";
break;
case UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D_USM_EXP:
os << "UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D_USM_EXP";
break;
case UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D_EXP:
os << "UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D_EXP";
break;
case UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_USM_EXP:
os << "UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_USM_EXP";
break;
case UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_EXP:
os << "UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_EXP";
break;
case UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_USM_EXP:
os << "UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_USM_EXP";
break;
case UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_EXP:
os << "UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_EXP";
break;
default:
os << "unknown enumerator";
break;
Expand Down Expand Up @@ -4190,6 +4208,78 @@ inline ur_result_t printTagged(std::ostream &os, const void *ptr, ur_device_info

os << ")";
} break;
case UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D_USM_EXP: {
const ur_bool_t *tptr = (const ur_bool_t *)ptr;
if (sizeof(ur_bool_t) > size) {
os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_bool_t) << ")";
return UR_RESULT_ERROR_INVALID_SIZE;
}
os << (const void *)(tptr) << " (";

os << *tptr;

os << ")";
} break;
case UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D_EXP: {
const ur_bool_t *tptr = (const ur_bool_t *)ptr;
if (sizeof(ur_bool_t) > size) {
os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_bool_t) << ")";
return UR_RESULT_ERROR_INVALID_SIZE;
}
os << (const void *)(tptr) << " (";

os << *tptr;

os << ")";
} break;
case UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_USM_EXP: {
const ur_bool_t *tptr = (const ur_bool_t *)ptr;
if (sizeof(ur_bool_t) > size) {
os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_bool_t) << ")";
return UR_RESULT_ERROR_INVALID_SIZE;
}
os << (const void *)(tptr) << " (";

os << *tptr;

os << ")";
} break;
case UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_EXP: {
const ur_bool_t *tptr = (const ur_bool_t *)ptr;
if (sizeof(ur_bool_t) > size) {
os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_bool_t) << ")";
return UR_RESULT_ERROR_INVALID_SIZE;
}
os << (const void *)(tptr) << " (";

os << *tptr;

os << ")";
} break;
case UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_USM_EXP: {
const ur_bool_t *tptr = (const ur_bool_t *)ptr;
if (sizeof(ur_bool_t) > size) {
os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_bool_t) << ")";
return UR_RESULT_ERROR_INVALID_SIZE;
}
os << (const void *)(tptr) << " (";

os << *tptr;

os << ")";
} break;
case UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_EXP: {
const ur_bool_t *tptr = (const ur_bool_t *)ptr;
if (sizeof(ur_bool_t) > size) {
os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_bool_t) << ")";
return UR_RESULT_ERROR_INVALID_SIZE;
}
os << (const void *)(tptr) << " (";

os << *tptr;

os << ")";
} break;
default:
os << "unknown enumerator";
return UR_RESULT_ERROR_INVALID_ENUMERATION;
Expand Down
8 changes: 8 additions & 0 deletions scripts/core/EXP-BINDLESS-IMAGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,12 @@ Enums
* ${X}_DEVICE_INFO_INTEROP_SEMAPHORE_EXPORT_SUPPORT_EXP
* ${X}_DEVICE_INFO_CUBEMAP_SUPPORT_EXP
* ${X}_DEVICE_INFO_CUBEMAP_SEAMLESS_FILTERING_SUPPORT_EXP
* ${X}_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D_USM_EXP
* ${X}_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D_EXP
* ${X}_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_USM_EXP
* ${X}_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_EXP
* ${X}_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_USM_EXP
* ${X}_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_EXP

* ${x}_command_t
* ${X}_COMMAND_INTEROP_SEMAPHORE_WAIT_EXP
Expand Down Expand Up @@ -198,6 +204,8 @@ Changelog
+------------------------------------------------------------------------+
| 10.0 | Added cubemap image type, sampling properties, and device |
| | queries. |
+------------------------------------------------------------------------+
| 11.0 | Added device queries for sampled image fetch capabilities. |
+----------+-------------------------------------------------------------+

Contributors
Expand Down
18 changes: 18 additions & 0 deletions scripts/core/exp-bindless-images.yml
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,24 @@ etors:
- name: CUBEMAP_SEAMLESS_FILTERING_SUPPORT_EXP
value: "0x2011"
desc: "[$x_bool_t] returns true if the device supports sampling cubemapped images across face boundaries"
- name: BINDLESS_SAMPLED_IMAGE_FETCH_1D_USM_EXP
value: "0x2012"
desc: "[$x_bool_t] returns true if the device is capable of fetching USM backed 1D sampled image data."
- name: BINDLESS_SAMPLED_IMAGE_FETCH_1D_EXP
value: "0x2013"
desc: "[$x_bool_t] returns true if the device is capable of fetching non-USM backed 1D sampled image data."
- name: BINDLESS_SAMPLED_IMAGE_FETCH_2D_USM_EXP
value: "0x2014"
desc: "[$x_bool_t] returns true if the device is capable of fetching USM backed 2D sampled image data."
- name: BINDLESS_SAMPLED_IMAGE_FETCH_2D_EXP
value: "0x2015"
desc: "[$x_bool_t] returns true if the device is capable of fetching non-USM backed 2D sampled image data."
- name: BINDLESS_SAMPLED_IMAGE_FETCH_3D_USM_EXP
value: "0x2016"
desc: "[$x_bool_t] returns true if the device is capable of fetching USM backed 3D sampled image data."
- name: BINDLESS_SAMPLED_IMAGE_FETCH_3D_EXP
value: "0x2017"
desc: "[$x_bool_t] returns true if the device is capable of fetching non-USM backed 3D sampled image data."
--- #--------------------------------------------------------------------------
type: enum
extend: true
Expand Down
24 changes: 24 additions & 0 deletions source/adapters/cuda/device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -926,6 +926,30 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
// CUDA supports cubemap seamless filtering.
return ReturnValue(true);
}
case UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D_USM_EXP: {
// CUDA does support fetching 1D USM sampled image data.
return ReturnValue(true);
}
case UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_1D_EXP: {
// CUDA does not support fetching 1D non-USM sampled image data.
return ReturnValue(false);
}
case UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_USM_EXP: {
// CUDA does support fetching 2D USM sampled image data.
return ReturnValue(true);
}
case UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_2D_EXP: {
// CUDA does support fetching 2D non-USM sampled image data.
return ReturnValue(true);
}
case UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_USM_EXP: {
// CUDA does not support 3D USM sampled textures
return ReturnValue(false);
}
case UR_DEVICE_INFO_BINDLESS_SAMPLED_IMAGE_FETCH_3D_EXP: {
// CUDA does support fetching 3D non-USM sampled image data.
return ReturnValue(true);
}
case UR_DEVICE_INFO_DEVICE_ID: {
int Value = 0;
UR_CHECK_ERROR(cuDeviceGetAttribute(
Expand Down
8 changes: 5 additions & 3 deletions source/adapters/level_zero/command_buffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1048,8 +1048,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp(
// Create command-list to execute before `CommandListPtr` and will signal
// when `EventWaitList` dependencies are complete.
ur_command_list_ptr_t WaitCommandList{};
UR_CALL(Queue->Context->getAvailableCommandList(Queue, WaitCommandList,
false, false));
UR_CALL(Queue->Context->getAvailableCommandList(
Queue, WaitCommandList, false, NumEventsInWaitList, EventWaitList,
false));

ZE2UR_CALL(zeCommandListAppendBarrier,
(WaitCommandList->first, CommandBuffer->WaitEvent->ZeEvent,
Expand Down Expand Up @@ -1086,7 +1087,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp(
// Create a command-list to signal RetEvent on completion
ur_command_list_ptr_t SignalCommandList{};
UR_CALL(Queue->Context->getAvailableCommandList(Queue, SignalCommandList,
false, false));
false, NumEventsInWaitList,
EventWaitList, false));
// Reset the wait-event for the UR command-buffer that is signaled when its
// submission dependencies have been satisfied.
ZE2UR_CALL(zeCommandListAppendEventReset,
Expand Down
13 changes: 13 additions & 0 deletions source/adapters/level_zero/common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,19 @@ static const uint32_t UrL0QueueSyncNonBlocking = [] {
return L0QueueSyncLockingModeValue;
}();

// Controls whether the L0 Adapter creates signal events for commands on
// integrated gpu devices.
static const uint32_t UrL0OutOfOrderIntegratedSignalEvent = [] {
const char *UrL0OutOfOrderIntegratedSignalEventEnv =
std::getenv("UR_L0_OOQ_INTEGRATED_SIGNAL_EVENT");
uint32_t UrL0OutOfOrderIntegratedSignalEventValue = 1;
if (UrL0OutOfOrderIntegratedSignalEventEnv) {
UrL0OutOfOrderIntegratedSignalEventValue =
std::atoi(UrL0OutOfOrderIntegratedSignalEventEnv);
}
return UrL0OutOfOrderIntegratedSignalEventValue;
}();

// This class encapsulates actions taken along with a call to Level Zero API.
class ZeCall {
private:
Expand Down
17 changes: 13 additions & 4 deletions source/adapters/level_zero/context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -645,7 +645,8 @@ static const size_t CmdListsCleanupThreshold = [] {
// Retrieve an available command list to be used in a PI call.
ur_result_t ur_context_handle_t_::getAvailableCommandList(
ur_queue_handle_t Queue, ur_command_list_ptr_t &CommandList,
bool UseCopyEngine, bool AllowBatching,
bool UseCopyEngine, uint32_t NumEventsInWaitList,
const ur_event_handle_t *EventWaitList, bool AllowBatching,
ze_command_queue_handle_t *ForcedCmdQueue) {
// Immediate commandlists have been pre-allocated and are always available.
if (Queue->UsingImmCmdLists) {
Expand Down Expand Up @@ -677,9 +678,17 @@ ur_result_t ur_context_handle_t_::getAvailableCommandList(
// for this queue.
if (Queue->hasOpenCommandList(UseCopyEngine)) {
if (AllowBatching) {
CommandList = CommandBatch.OpenCommandList;
UR_CALL(Queue->insertStartBarrierIfDiscardEventsMode(CommandList));
return UR_RESULT_SUCCESS;
bool batchingAllowed = true;
if (!UrL0OutOfOrderIntegratedSignalEvent &&
Queue->Device->isIntegrated()) {
batchingAllowed = eventCanBeBatched(Queue, UseCopyEngine,
NumEventsInWaitList, EventWaitList);
}
if (batchingAllowed) {
CommandList = CommandBatch.OpenCommandList;
UR_CALL(Queue->insertStartBarrierIfDiscardEventsMode(CommandList));
return UR_RESULT_SUCCESS;
}
}
// If this command isn't allowed to be batched or doesn't match the forced
// command queue, then we need to go ahead and execute what is already in
Expand Down
10 changes: 5 additions & 5 deletions source/adapters/level_zero/context.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -292,11 +292,11 @@ struct ur_context_handle_t_ : _ur_object {
// When using immediate commandlists, retrieves an immediate command list
// for executing on this device. Immediate commandlists are created only
// once for each SYCL Queue and after that they are reused.
ur_result_t
getAvailableCommandList(ur_queue_handle_t Queue,
ur_command_list_ptr_t &CommandList,
bool UseCopyEngine, bool AllowBatching = false,
ze_command_queue_handle_t *ForcedCmdQueue = nullptr);
ur_result_t getAvailableCommandList(
ur_queue_handle_t Queue, ur_command_list_ptr_t &CommandList,
bool UseCopyEngine, uint32_t NumEventsInWaitList,
const ur_event_handle_t *EventWaitList, bool AllowBatching = false,
ze_command_queue_handle_t *ForcedCmdQueue = nullptr);

// Checks if Device is covered by this context.
// For that the Device or its root devices need to be in the context.
Expand Down
4 changes: 4 additions & 0 deletions source/adapters/level_zero/device.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,10 @@ struct ur_device_handle_t_ : _ur_object {
(ZeDeviceProperties->deviceId & 0xff0) == 0xb60;
}

bool isIntegrated() {
return (ZeDeviceProperties->flags & ZE_DEVICE_PROPERTY_FLAG_INTEGRATED);
}

// Does this device represent a single compute slice?
bool isCCS() const {
return QueueGroup[ur_device_handle_t_::queue_group_info_t::Compute]
Expand Down
Loading

0 comments on commit 23694f8

Please sign in to comment.