Skip to content

Commit 1f1f181

Browse files
committed
[L0] Add support for in-order lists using L0 driver
Signed-off-by: Raiyan Latif <raiyan.latif@intel.com>
1 parent a2757b2 commit 1f1f181

File tree

5 files changed

+75
-17
lines changed

5 files changed

+75
-17
lines changed

source/adapters/level_zero/device.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1029,6 +1029,22 @@ bool ur_device_handle_t_::useRelaxedAllocationLimits() {
10291029
return EnableRelaxedAllocationLimits;
10301030
}
10311031

1032+
bool ur_device_handle_t_::useDriverInOrderLists() {
1033+
// Use in-order lists implementation from L0 driver instead
1034+
// of adapter's implementation.
1035+
static const bool UseDriverInOrderLists = [] {
1036+
// Temporary Enable by Default to test CI Results
1037+
return true;
1038+
1039+
const char *UrRet = std::getenv("UR_L0_USE_DRIVER_INORDER_LISTS");
1040+
if (!UrRet)
1041+
return false;
1042+
return std::atoi(UrRet) != 0;
1043+
}();
1044+
1045+
return UseDriverInOrderLists;
1046+
}
1047+
10321048
ur_result_t ur_device_handle_t_::initialize(int SubSubDeviceOrdinal,
10331049
int SubSubDeviceIndex) {
10341050
// Maintain various device properties cache.

source/adapters/level_zero/device.hpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,9 @@ struct ur_device_handle_t_ : _ur_object {
143143
// Read env settings to select immediate commandlist mode.
144144
ImmCmdlistMode useImmediateCommandLists();
145145

146+
// Whether Adapter uses driver's implementation of in-order lists or not
147+
bool useDriverInOrderLists();
148+
146149
// Returns whether immediate command lists are used on this device.
147150
ImmCmdlistMode ImmCommandListUsed{};
148151

source/adapters/level_zero/event.cpp

Lines changed: 46 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,19 @@ static const bool UseMultipleCmdlistBarriers = [] {
4343
return std::atoi(UseMultipleCmdlistBarriersFlag) > 0;
4444
}();
4545

46+
bool WaitListEmptyOrAllEventsFromSameQueue(
47+
ur_queue_handle_t Queue, uint32_t NumEventsInWaitList,
48+
const ur_event_handle_t *EventWaitList) {
49+
if (!NumEventsInWaitList)
50+
return true;
51+
52+
for (uint32_t i = 0; i < NumEventsInWaitList; ++i)
53+
if (Queue != EventWaitList[i]->UrQueue)
54+
return false;
55+
56+
return true;
57+
}
58+
4659
UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWait(
4760
ur_queue_handle_t Queue, ///< [in] handle of the queue object
4861
uint32_t NumEventsInWaitList, ///< [in] size of the event wait list
@@ -206,21 +219,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrier(
206219
bool IsInternal = OutEvent == nullptr;
207220
ur_event_handle_t *Event = OutEvent ? OutEvent : &InternalEvent;
208221

209-
auto WaitListEmptyOrAllEventsFromSameQueue = [Queue, NumEventsInWaitList,
210-
EventWaitList]() {
211-
if (!NumEventsInWaitList)
212-
return true;
213-
214-
for (uint32_t I = 0; I < NumEventsInWaitList; ++I)
215-
if (Queue != EventWaitList[I]->UrQueue)
216-
return false;
217-
218-
return true;
219-
};
220-
221222
// For in-order queue and wait-list which is empty or has events from
222223
// the same queue just use the last command event as the barrier event.
223-
if (Queue->isInOrderQueue() && WaitListEmptyOrAllEventsFromSameQueue() &&
224+
if (Queue->isInOrderQueue() &&
225+
WaitListEmptyOrAllEventsFromSameQueue(Queue, NumEventsInWaitList,
226+
EventWaitList) &&
224227
Queue->LastCommandEvent && !Queue->LastCommandEvent->IsDiscarded) {
225228
UR_CALL(urEventRetain(Queue->LastCommandEvent));
226229
*Event = Queue->LastCommandEvent;
@@ -1189,6 +1192,26 @@ ur_result_t _ur_ze_event_list_t::createAndRetainUrZeEventList(
11891192
CurQueue->LastCommandEvent && CurQueue->LastCommandEvent->IsDiscarded)
11901193
IncludeLastCommandEvent = false;
11911194

1195+
// If we are using L0 native implementation for handling in-order queues,
1196+
// then we don't need to add the last enqueued event into the waitlist, as
1197+
// the native driver implementation will already ensure in-order semantics.
1198+
// The only exception is when a different immediate command was last used on
1199+
// the same UR Queue.
1200+
if (CurQueue->Device->useDriverInOrderLists() && CurQueue->isInOrderQueue()) {
1201+
if (CurQueue->UsingImmCmdLists) {
1202+
auto QueueGroup = CurQueue->getQueueGroup(UseCopyEngine);
1203+
uint32_t QueueGroupOrdinal, QueueIndex;
1204+
auto NextIndex = QueueGroup.getQueueIndex(&QueueGroupOrdinal, &QueueIndex,
1205+
/*QueryOnly */ true);
1206+
auto NextImmCmdList = QueueGroup.ImmCmdLists[NextIndex];
1207+
IncludeLastCommandEvent &=
1208+
CurQueue->LastUsedCommandList != CurQueue->CommandListMap.end() &&
1209+
NextImmCmdList != CurQueue->LastUsedCommandList;
1210+
} else {
1211+
IncludeLastCommandEvent = false;
1212+
}
1213+
}
1214+
11921215
try {
11931216
uint32_t TmpListLength = 0;
11941217

@@ -1205,6 +1228,16 @@ ur_result_t _ur_ze_event_list_t::createAndRetainUrZeEventList(
12051228
this->UrEventList = new ur_event_handle_t[EventListLength];
12061229
}
12071230

1231+
// For in-order queue and wait-list which is empty or has events only from
1232+
// the same queue then we don't need to wait on any other additional events
1233+
if (CurQueue->Device->useDriverInOrderLists() &&
1234+
CurQueue->isInOrderQueue() &&
1235+
WaitListEmptyOrAllEventsFromSameQueue(CurQueue, EventListLength,
1236+
EventList)) {
1237+
this->Length = TmpListLength;
1238+
return UR_RESULT_SUCCESS;
1239+
}
1240+
12081241
if (EventListLength > 0) {
12091242
for (uint32_t I = 0; I < EventListLength; I++) {
12101243
{

source/adapters/level_zero/kernel.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -214,8 +214,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch(
214214
// the code can do a urKernelRelease on this kernel.
215215
(*Event)->CommandData = (void *)Kernel;
216216

217-
// Increment the reference count of the Kernel and indicate that the Kernel is
218-
// in use. Once the event has been signalled, the code in
217+
// Increment the reference count of the Kernel and indicate that the Kernel
218+
// is in use. Once the event has been signalled, the code in
219219
// CleanupCompletedEvent(Event) will do a urKernelRelease to update the
220220
// reference count on the kernel, using the kernel saved in CommandData.
221221
UR_CALL(urKernelRetain(Kernel));

source/adapters/level_zero/queue.cpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1893,6 +1893,10 @@ ur_result_t ur_queue_handle_t_::createCommandList(
18931893
ZeStruct<ze_command_list_desc_t> ZeCommandListDesc;
18941894
ZeCommandListDesc.commandQueueGroupOrdinal = QueueGroupOrdinal;
18951895

1896+
if (Device->useDriverInOrderLists() && isInOrderQueue()) {
1897+
ZeCommandListDesc.flags = ZE_COMMAND_LIST_FLAG_IN_ORDER;
1898+
}
1899+
18961900
ZE2UR_CALL(zeCommandListCreate, (Context->ZeContext, Device->ZeDevice,
18971901
&ZeCommandListDesc, &ZeCommandList));
18981902

@@ -2006,8 +2010,10 @@ ur_command_list_ptr_t &ur_queue_handle_t_::ur_queue_group_t::getImmCmdList() {
20062010
Priority = "High";
20072011
}
20082012

2009-
// Evaluate performance of explicit usage for "0" index.
2010-
if (QueueIndex != 0) {
2013+
if (Queue->Device->useDriverInOrderLists() && Queue->isInOrderQueue()) {
2014+
ZeCommandQueueDesc.flags = ZE_COMMAND_QUEUE_FLAG_IN_ORDER;
2015+
} else if (QueueIndex != 0) {
2016+
// Evaluate performance of explicit usage for "0" index.
20112017
ZeCommandQueueDesc.flags = ZE_COMMAND_QUEUE_FLAG_EXPLICIT_ONLY;
20122018
}
20132019

0 commit comments

Comments
 (0)