Skip to content

Commit

Permalink
[SYCL][UR][CUDA][HIP] Add support for command-buffer kernel updates (i…
Browse files Browse the repository at this point in the history
…ntel#15287)

Updates the call to urCommandBufferAppendKernelLaunchExp to use the new
UR parameters.

Corresponding UR PR:
oneapi-src/unified-runtime#1924

---------

Co-authored-by: Aaron Greig <aaron.greig@codeplay.com>
  • Loading branch information
fabiomestre and aarongreig authored Oct 1, 2024
1 parent 489d95e commit 48f0e93
Show file tree
Hide file tree
Showing 5 changed files with 36 additions and 13 deletions.
12 changes: 6 additions & 6 deletions sycl/cmake/modules/FetchUnifiedRuntime.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -117,13 +117,13 @@ if(SYCL_UR_USE_FETCH_CONTENT)
endfunction()

set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git")
# commit 22962057df1b9d538e08088a7b75d9d8e7c29f90 (HEAD, origin/main, origin/HEAD)
# Merge: e824ddc2 f0a1c433
# commit 532a4ecb72da4876cef61a4ae4d638e27ad609d5
# Merge: 22962057 d944ff33
# Author: aarongreig <aaron.greig@codeplay.com>
# Date: Fri Sep 27 16:54:04 2024 +0100
# Merge pull request #2017 from nrspruit/new_sysman_init
# [L0] Use zesInit for SysMan API usage
set(UNIFIED_RUNTIME_TAG 22962057df1b9d538e08088a7b75d9d8e7c29f90)
# Date: Mon Sep 30 10:43:10 2024 +0100
# Merge pull request #1924 from Bensuo/fabio/cmd_buffer_kernel_update
# Add support for command-buffer kernel updates
set(UNIFIED_RUNTIME_TAG 532a4ecb72da4876cef61a4ae4d638e27ad609d5)

set(UMF_BUILD_EXAMPLES OFF CACHE INTERNAL "EXAMPLES")
# Due to the use of dependentloadflag and no installer for UMF and hwloc we need
Expand Down
17 changes: 13 additions & 4 deletions sycl/source/detail/device_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -704,17 +704,26 @@ bool device_impl::has(aspect Aspect) const {
return CallSuccessful && Result != nullptr;
}
case aspect::ext_oneapi_graph: {
bool SupportsCommandBufferUpdate = false;
ur_device_command_buffer_update_capability_flags_t UpdateCapabilities;
bool CallSuccessful =
getAdapter()->call_nocheck<UrApiKind::urDeviceGetInfo>(
MDevice, UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP,
sizeof(SupportsCommandBufferUpdate), &SupportsCommandBufferUpdate,
MDevice, UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_CAPABILITIES_EXP,
sizeof(UpdateCapabilities), &UpdateCapabilities,
nullptr) == UR_RESULT_SUCCESS;
if (!CallSuccessful) {
return false;
}

return has(aspect::ext_oneapi_limited_graph) && SupportsCommandBufferUpdate;
/* The kernel handle update capability is not yet required for the
* ext_oneapi_graph aspect */
ur_device_command_buffer_update_capability_flags_t RequiredCapabilities =
UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_ARGUMENTS |
UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_LOCAL_WORK_SIZE |
UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_SIZE |
UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_OFFSET;

return has(aspect::ext_oneapi_limited_graph) &&
(UpdateCapabilities & RequiredCapabilities) == RequiredCapabilities;
}
case aspect::ext_oneapi_limited_graph: {
bool SupportsCommandBuffers = false;
Expand Down
1 change: 1 addition & 0 deletions sycl/source/detail/graph_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1474,6 +1474,7 @@ void exec_graph_impl::updateImpl(std::shared_ptr<node_impl> Node) {
}
}

UpdateDesc.hNewKernel = UrKernel;
UpdateDesc.numNewMemObjArgs = MemobjDescs.size();
UpdateDesc.pNewMemObjArgList = MemobjDescs.data();
UpdateDesc.numNewPointerArgs = PtrDescs.size();
Expand Down
2 changes: 1 addition & 1 deletion sycl/source/detail/scheduler/commands.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2538,7 +2538,7 @@ ur_result_t enqueueImpCommandBufferKernel(
ur_result_t Res =
Adapter->call_nocheck<UrApiKind::urCommandBufferAppendKernelLaunchExp>(
CommandBuffer, UrKernel, NDRDesc.Dims, &NDRDesc.GlobalOffset[0],
&NDRDesc.GlobalSize[0], LocalSize, SyncPoints.size(),
&NDRDesc.GlobalSize[0], LocalSize, 0, nullptr, SyncPoints.size(),
SyncPoints.size() ? SyncPoints.data() : nullptr, OutSyncPoint,
OutCommand);

Expand Down
17 changes: 15 additions & 2 deletions sycl/unittests/helpers/UrMock.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -200,14 +200,27 @@ inline ur_result_t mock_urDeviceGetInfo(void *pParams) {
case UR_DEVICE_INFO_AVAILABLE:
case UR_DEVICE_INFO_LINKER_AVAILABLE:
case UR_DEVICE_INFO_COMPILER_AVAILABLE:
case UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP:
case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP: {
case UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP: {
if (*params->ppPropValue)
*static_cast<ur_bool_t *>(*params->ppPropValue) = true;
if (*params->ppPropSizeRet)
**params->ppPropSizeRet = sizeof(true);
return UR_RESULT_SUCCESS;
}
case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_CAPABILITIES_EXP: {
if (*params->ppPropValue)
*static_cast<ur_device_command_buffer_update_capability_flags_t *>(
*params->ppPropValue) =
UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_ARGUMENTS |
UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_SIZE |
UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_LOCAL_WORK_SIZE |
UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_OFFSET |
UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_HANDLE;
if (*params->ppPropSizeRet)
**params->ppPropSizeRet =
sizeof(ur_device_command_buffer_update_capability_flags_t);
return UR_RESULT_SUCCESS;
}
// This mock GPU device has no sub-devices
case UR_DEVICE_INFO_SUPPORTED_PARTITIONS: {
if (*params->ppPropSizeRet) {
Expand Down

0 comments on commit 48f0e93

Please sign in to comment.