@@ -43,6 +43,19 @@ static const bool UseMultipleCmdlistBarriers = [] {
43
43
return std::atoi (UseMultipleCmdlistBarriersFlag) > 0 ;
44
44
}();
45
45
46
+ bool WaitListEmptyOrAllEventsFromSameQueue (
47
+ ur_queue_handle_t Queue, uint32_t NumEventsInWaitList,
48
+ const ur_event_handle_t *EventWaitList) {
49
+ if (!NumEventsInWaitList)
50
+ return true ;
51
+
52
+ for (uint32_t i = 0 ; i < NumEventsInWaitList; ++i)
53
+ if (Queue != EventWaitList[i]->UrQueue )
54
+ return false ;
55
+
56
+ return true ;
57
+ }
58
+
46
59
UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWait (
47
60
ur_queue_handle_t Queue, // /< [in] handle of the queue object
48
61
uint32_t NumEventsInWaitList, // /< [in] size of the event wait list
@@ -206,21 +219,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWaitWithBarrier(
206
219
bool IsInternal = OutEvent == nullptr ;
207
220
ur_event_handle_t *Event = OutEvent ? OutEvent : &InternalEvent;
208
221
209
- auto WaitListEmptyOrAllEventsFromSameQueue = [Queue, NumEventsInWaitList,
210
- EventWaitList]() {
211
- if (!NumEventsInWaitList)
212
- return true ;
213
-
214
- for (uint32_t I = 0 ; I < NumEventsInWaitList; ++I)
215
- if (Queue != EventWaitList[I]->UrQueue )
216
- return false ;
217
-
218
- return true ;
219
- };
220
-
221
222
// For in-order queue and wait-list which is empty or has events from
222
223
// the same queue just use the last command event as the barrier event.
223
- if (Queue->isInOrderQueue () && WaitListEmptyOrAllEventsFromSameQueue () &&
224
+ if (Queue->isInOrderQueue () &&
225
+ WaitListEmptyOrAllEventsFromSameQueue (Queue, NumEventsInWaitList,
226
+ EventWaitList) &&
224
227
Queue->LastCommandEvent && !Queue->LastCommandEvent ->IsDiscarded ) {
225
228
UR_CALL (urEventRetain (Queue->LastCommandEvent ));
226
229
*Event = Queue->LastCommandEvent ;
@@ -1189,6 +1192,26 @@ ur_result_t _ur_ze_event_list_t::createAndRetainUrZeEventList(
1189
1192
CurQueue->LastCommandEvent && CurQueue->LastCommandEvent ->IsDiscarded )
1190
1193
IncludeLastCommandEvent = false ;
1191
1194
1195
+ // If we are using L0 native implementation for handling in-order queues,
1196
+ // then we don't need to add the last enqueued event into the waitlist, as
1197
+ // the native driver implementation will already ensure in-order semantics.
1198
+ // The only exception is when a different immediate command was last used on
1199
+ // the same UR Queue.
1200
+ if (CurQueue->Device ->useDriverInOrderLists () && CurQueue->isInOrderQueue ()) {
1201
+ if (CurQueue->UsingImmCmdLists ) {
1202
+ auto QueueGroup = CurQueue->getQueueGroup (UseCopyEngine);
1203
+ uint32_t QueueGroupOrdinal, QueueIndex;
1204
+ auto NextIndex = QueueGroup.getQueueIndex (&QueueGroupOrdinal, &QueueIndex,
1205
+ /* QueryOnly */ true );
1206
+ auto NextImmCmdList = QueueGroup.ImmCmdLists [NextIndex];
1207
+ IncludeLastCommandEvent &=
1208
+ CurQueue->LastUsedCommandList != CurQueue->CommandListMap .end () &&
1209
+ NextImmCmdList != CurQueue->LastUsedCommandList ;
1210
+ } else {
1211
+ IncludeLastCommandEvent = false ;
1212
+ }
1213
+ }
1214
+
1192
1215
try {
1193
1216
uint32_t TmpListLength = 0 ;
1194
1217
@@ -1205,6 +1228,16 @@ ur_result_t _ur_ze_event_list_t::createAndRetainUrZeEventList(
1205
1228
this ->UrEventList = new ur_event_handle_t [EventListLength];
1206
1229
}
1207
1230
1231
+ // For in-order queue and wait-list which is empty or has events only from
1232
+ // the same queue then we don't need to wait on any other additional events
1233
+ if (CurQueue->Device ->useDriverInOrderLists () &&
1234
+ CurQueue->isInOrderQueue () &&
1235
+ WaitListEmptyOrAllEventsFromSameQueue (CurQueue, EventListLength,
1236
+ EventList)) {
1237
+ this ->Length = TmpListLength;
1238
+ return UR_RESULT_SUCCESS;
1239
+ }
1240
+
1208
1241
if (EventListLength > 0 ) {
1209
1242
for (uint32_t I = 0 ; I < EventListLength; I++) {
1210
1243
{
0 commit comments