@@ -43,6 +43,20 @@ static const bool UseMultipleCmdlistBarriers = [] {
43
43
return std::atoi (UseMultipleCmdlistBarriersFlag) > 0 ;
44
44
}();
45
45
46
+ bool WaitListEmptyOrAllEventsFromSameQueue (
47
+ ur_queue_handle_t Queue, uint32_t NumEventsInWaitList,
48
+ const ur_event_handle_t *EventWaitList) {
49
+ if (!NumEventsInWaitList)
50
+ return true ;
51
+
52
+ for (uint32_t i = 0 ; i < NumEventsInWaitList; ++i) {
53
+ if (Queue != EventWaitList[i]->UrQueue )
54
+ return false ;
55
+ }
56
+
57
+ return true ;
58
+ }
59
+
46
60
UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWait (
47
61
ur_queue_handle_t Queue, // /< [in] handle of the queue object
48
62
uint32_t NumEventsInWaitList, // /< [in] size of the event wait list
@@ -206,21 +220,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrier(
206
220
bool IsInternal = OutEvent == nullptr ;
207
221
ur_event_handle_t *Event = OutEvent ? OutEvent : &InternalEvent;
208
222
209
- auto WaitListEmptyOrAllEventsFromSameQueue = [Queue, NumEventsInWaitList,
210
- EventWaitList]() {
211
- if (!NumEventsInWaitList)
212
- return true ;
213
-
214
- for (uint32_t I = 0 ; I < NumEventsInWaitList; ++I)
215
- if (Queue != EventWaitList[I]->UrQueue )
216
- return false ;
217
-
218
- return true ;
219
- };
220
-
221
223
// For in-order queue and wait-list which is empty or has events from
222
224
// the same queue just use the last command event as the barrier event.
223
- if (Queue->isInOrderQueue () && WaitListEmptyOrAllEventsFromSameQueue () &&
225
+ if (Queue->isInOrderQueue () &&
226
+ WaitListEmptyOrAllEventsFromSameQueue (Queue, NumEventsInWaitList,
227
+ EventWaitList) &&
224
228
Queue->LastCommandEvent && !Queue->LastCommandEvent ->IsDiscarded ) {
225
229
UR_CALL (urEventRetain (Queue->LastCommandEvent ));
226
230
*Event = Queue->LastCommandEvent ;
@@ -1189,6 +1193,23 @@ ur_result_t _ur_ze_event_list_t::createAndRetainUrZeEventList(
1189
1193
CurQueue->LastCommandEvent && CurQueue->LastCommandEvent ->IsDiscarded )
1190
1194
IncludeLastCommandEvent = false ;
1191
1195
1196
+ // If we are using L0 native implementation for handling in-order queues,
1197
+ // then we don't need to add the last enqueued event into the waitlist, as
1198
+ // the native driver implementation will already ensure in-order semantics.
1199
+ // The only exception is when a different immediate command was last used on
1200
+ // the same UR Queue.
1201
+ if (CurQueue->Device ->useDriverInOrderLists () && CurQueue->isInOrderQueue () &&
1202
+ CurQueue->UsingImmCmdLists ) {
1203
+ auto QueueGroup = CurQueue->getQueueGroup (UseCopyEngine);
1204
+ uint32_t QueueGroupOrdinal, QueueIndex;
1205
+ auto NextIndex = QueueGroup.getQueueIndex (&QueueGroupOrdinal, &QueueIndex,
1206
+ /* QueryOnly */ true );
1207
+ auto NextImmCmdList = QueueGroup.ImmCmdLists [NextIndex];
1208
+ IncludeLastCommandEvent &=
1209
+ CurQueue->LastUsedCommandList != CurQueue->CommandListMap .end () &&
1210
+ NextImmCmdList != CurQueue->LastUsedCommandList ;
1211
+ }
1212
+
1192
1213
try {
1193
1214
uint32_t TmpListLength = 0 ;
1194
1215
@@ -1205,6 +1226,16 @@ ur_result_t _ur_ze_event_list_t::createAndRetainUrZeEventList(
1205
1226
this ->UrEventList = new ur_event_handle_t [EventListLength];
1206
1227
}
1207
1228
1229
+ // For in-order queue and wait-list which is empty or has events only from
1230
+ // the same queue then we don't need to wait on any other additional events
1231
+ if (CurQueue->Device ->useDriverInOrderLists () &&
1232
+ CurQueue->isInOrderQueue () &&
1233
+ WaitListEmptyOrAllEventsFromSameQueue (CurQueue, EventListLength,
1234
+ EventList)) {
1235
+ this ->Length = TmpListLength;
1236
+ return UR_RESULT_SUCCESS;
1237
+ }
1238
+
1208
1239
if (EventListLength > 0 ) {
1209
1240
for (uint32_t I = 0 ; I < EventListLength; I++) {
1210
1241
{
0 commit comments