Skip to content

Commit 92a0250

Browse files
raiyanlanrspruit
authored andcommitted
[L0] Allocate event pools efficiently in multi-device scenarios
Signed-off-by: Raiyan Latif <raiyan.latif@intel.com>
1 parent 47102cb commit 92a0250

File tree

9 files changed

+249
-80
lines changed

9 files changed

+249
-80
lines changed

source/adapters/level_zero/command_buffer.cpp

Lines changed: 19 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -293,7 +293,8 @@ static ur_result_t enqueueCommandBufferMemCopyHelper(
293293
SyncPointWaitList, ZeEventList));
294294

295295
ur_event_handle_t LaunchEvent;
296-
UR_CALL(EventCreate(CommandBuffer->Context, nullptr, false, &LaunchEvent));
296+
UR_CALL(
297+
EventCreate(CommandBuffer->Context, nullptr, false, false, &LaunchEvent));
297298
LaunchEvent->CommandType = CommandType;
298299

299300
// Get sync point and register the event with it.
@@ -358,7 +359,8 @@ static ur_result_t enqueueCommandBufferMemCopyRectHelper(
358359
SyncPointWaitList, ZeEventList));
359360

360361
ur_event_handle_t LaunchEvent;
361-
UR_CALL(EventCreate(CommandBuffer->Context, nullptr, false, &LaunchEvent));
362+
UR_CALL(
363+
EventCreate(CommandBuffer->Context, nullptr, false, false, &LaunchEvent));
362364
LaunchEvent->CommandType = CommandType;
363365

364366
// Get sync point and register the event with it.
@@ -401,7 +403,8 @@ static ur_result_t enqueueCommandBufferFillHelper(
401403
SyncPointWaitList, ZeEventList));
402404

403405
ur_event_handle_t LaunchEvent;
404-
UR_CALL(EventCreate(CommandBuffer->Context, nullptr, true, &LaunchEvent));
406+
UR_CALL(
407+
EventCreate(CommandBuffer->Context, nullptr, false, true, &LaunchEvent));
405408
LaunchEvent->CommandType = CommandType;
406409

407410
// Get sync point and register the event with it.
@@ -453,8 +456,10 @@ urCommandBufferCreateExp(ur_context_handle_t Context, ur_device_handle_t Device,
453456
// Create signal & wait events to be used in the command-list for sync
454457
// on command-buffer enqueue.
455458
auto RetCommandBuffer = *CommandBuffer;
456-
UR_CALL(EventCreate(Context, nullptr, false, &RetCommandBuffer->SignalEvent));
457-
UR_CALL(EventCreate(Context, nullptr, false, &RetCommandBuffer->WaitEvent));
459+
UR_CALL(EventCreate(Context, nullptr, false, false,
460+
&RetCommandBuffer->SignalEvent));
461+
UR_CALL(EventCreate(Context, nullptr, false, false,
462+
&RetCommandBuffer->WaitEvent));
458463

459464
// Add prefix commands
460465
ZE2UR_CALL(zeCommandListAppendEventReset,
@@ -550,7 +555,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp(
550555
UR_CALL(getEventsFromSyncPoints(CommandBuffer, NumSyncPointsInWaitList,
551556
SyncPointWaitList, ZeEventList));
552557
ur_event_handle_t LaunchEvent;
553-
UR_CALL(EventCreate(CommandBuffer->Context, nullptr, false, &LaunchEvent));
558+
UR_CALL(
559+
EventCreate(CommandBuffer->Context, nullptr, false, false, &LaunchEvent));
554560
LaunchEvent->CommandType = UR_COMMAND_KERNEL_LAUNCH;
555561

556562
// Get sync point and register the event with it.
@@ -732,7 +738,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMPrefetchExp(
732738
}
733739

734740
ur_event_handle_t LaunchEvent;
735-
UR_CALL(EventCreate(CommandBuffer->Context, nullptr, true, &LaunchEvent));
741+
UR_CALL(
742+
EventCreate(CommandBuffer->Context, nullptr, false, true, &LaunchEvent));
736743
LaunchEvent->CommandType = UR_COMMAND_USM_PREFETCH;
737744

738745
// Get sync point and register the event with it.
@@ -795,7 +802,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp(
795802
}
796803

797804
ur_event_handle_t LaunchEvent;
798-
UR_CALL(EventCreate(CommandBuffer->Context, nullptr, true, &LaunchEvent));
805+
UR_CALL(
806+
EventCreate(CommandBuffer->Context, nullptr, false, true, &LaunchEvent));
799807
LaunchEvent->CommandType = UR_COMMAND_USM_ADVISE;
800808

801809
// Get sync point and register the event with it.
@@ -933,9 +941,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp(
933941
(SignalCommandList->first, CommandBuffer->WaitEvent->ZeEvent));
934942

935943
if (Event) {
936-
UR_CALL(createEventAndAssociateQueue(Queue, &RetEvent,
937-
UR_COMMAND_COMMAND_BUFFER_ENQUEUE_EXP,
938-
SignalCommandList, false, true));
944+
UR_CALL(createEventAndAssociateQueue(
945+
Queue, &RetEvent, UR_COMMAND_COMMAND_BUFFER_ENQUEUE_EXP,
946+
SignalCommandList, false, false, true));
939947

940948
if ((Queue->Properties & UR_QUEUE_FLAG_PROFILING_ENABLE)) {
941949
// Multiple submissions of a command buffer implies that we need to save

source/adapters/level_zero/context.cpp

Lines changed: 34 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -471,12 +471,17 @@ static const uint32_t MaxNumEventsPerPool = [] {
471471

472472
ur_result_t ur_context_handle_t_::getFreeSlotInExistingOrNewPool(
473473
ze_event_pool_handle_t &Pool, size_t &Index, bool HostVisible,
474-
bool ProfilingEnabled) {
474+
bool ProfilingEnabled, ur_device_handle_t Device) {
475475
// Lock while updating event pool machinery.
476476
std::scoped_lock<ur_mutex> Lock(ZeEventPoolCacheMutex);
477477

478+
ze_device_handle_t ZeDevice = nullptr;
479+
480+
if (Device) {
481+
ZeDevice = Device->ZeDevice;
482+
}
478483
std::list<ze_event_pool_handle_t> *ZePoolCache =
479-
getZeEventPoolCache(HostVisible, ProfilingEnabled);
484+
getZeEventPoolCache(HostVisible, ProfilingEnabled, ZeDevice);
480485

481486
if (!ZePoolCache->empty()) {
482487
if (NumEventsAvailableInEventPool[ZePoolCache->front()] == 0) {
@@ -511,9 +516,14 @@ ur_result_t ur_context_handle_t_::getFreeSlotInExistingOrNewPool(
511516
urPrint("ze_event_pool_desc_t flags set to: %d\n", ZeEventPoolDesc.flags);
512517

513518
std::vector<ze_device_handle_t> ZeDevices;
514-
std::for_each(
515-
Devices.begin(), Devices.end(),
516-
[&](const ur_device_handle_t &D) { ZeDevices.push_back(D->ZeDevice); });
519+
if (ZeDevice) {
520+
ZeDevices.push_back(ZeDevice);
521+
} else {
522+
std::for_each(Devices.begin(), Devices.end(),
523+
[&](const ur_device_handle_t &D) {
524+
ZeDevices.push_back(D->ZeDevice);
525+
});
526+
}
517527

518528
ZE2UR_CALL(zeEventPoolCreate, (ZeContext, &ZeEventPoolDesc,
519529
ZeDevices.size(), &ZeDevices[0], ZePool));
@@ -528,11 +538,10 @@ ur_result_t ur_context_handle_t_::getFreeSlotInExistingOrNewPool(
528538
return UR_RESULT_SUCCESS;
529539
}
530540

531-
ur_event_handle_t
532-
ur_context_handle_t_::getEventFromContextCache(bool HostVisible,
533-
bool WithProfiling) {
541+
ur_event_handle_t ur_context_handle_t_::getEventFromContextCache(
542+
bool HostVisible, bool WithProfiling, ur_device_handle_t Device) {
534543
std::scoped_lock<ur_mutex> Lock(EventCacheMutex);
535-
auto Cache = getEventCache(HostVisible, WithProfiling);
544+
auto Cache = getEventCache(HostVisible, WithProfiling, Device);
536545
if (Cache->empty())
537546
return nullptr;
538547

@@ -546,8 +555,14 @@ ur_context_handle_t_::getEventFromContextCache(bool HostVisible,
546555

547556
void ur_context_handle_t_::addEventToContextCache(ur_event_handle_t Event) {
548557
std::scoped_lock<ur_mutex> Lock(EventCacheMutex);
549-
auto Cache =
550-
getEventCache(Event->isHostVisible(), Event->isProfilingEnabled());
558+
ur_device_handle_t Device = nullptr;
559+
560+
if (!Event->IsMultiDevice && Event->UrQueue) {
561+
Device = Event->UrQueue->Device;
562+
}
563+
564+
auto Cache = getEventCache(Event->isHostVisible(),
565+
Event->isProfilingEnabled(), Device);
551566
Cache->emplace_back(Event);
552567
}
553568

@@ -562,8 +577,14 @@ ur_context_handle_t_::decrementUnreleasedEventsInPool(ur_event_handle_t Event) {
562577
return UR_RESULT_SUCCESS;
563578
}
564579

565-
std::list<ze_event_pool_handle_t> *ZePoolCache =
566-
getZeEventPoolCache(Event->isHostVisible(), Event->isProfilingEnabled());
580+
ze_device_handle_t ZeDevice = nullptr;
581+
582+
if (!Event->IsMultiDevice && Event->UrQueue) {
583+
ZeDevice = Event->UrQueue->Device->ZeDevice;
584+
}
585+
586+
std::list<ze_event_pool_handle_t> *ZePoolCache = getZeEventPoolCache(
587+
Event->isHostVisible(), Event->isProfilingEnabled(), ZeDevice);
567588

568589
// Put the empty pool to the cache of the pools.
569590
if (NumEventsUnreleasedInEventPool[Event->ZeEventPool] == 0)

source/adapters/level_zero/context.hpp

Lines changed: 66 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,9 @@ struct ur_context_handle_t_ : _ur_object {
142142
//
143143
// Cache of event pools to which host-visible events are added to.
144144
std::vector<std::list<ze_event_pool_handle_t>> ZeEventPoolCache{4};
145+
std::vector<std::unordered_map<ze_device_handle_t,
146+
std::list<ze_event_pool_handle_t> *>>
147+
ZeEventPoolCacheDeviceMap{4};
145148

146149
// This map will be used to determine if a pool is full or not
147150
// by storing number of empty slots available in the pool.
@@ -163,6 +166,9 @@ struct ur_context_handle_t_ : _ur_object {
163166

164167
// Caches for events.
165168
std::vector<std::list<ur_event_handle_t>> EventCaches{4};
169+
std::vector<
170+
std::unordered_map<ur_device_handle_t, std::list<ur_event_handle_t> *>>
171+
EventCachesDeviceMap{4};
166172

167173
// Initialize the PI context.
168174
ur_result_t initialize();
@@ -188,20 +194,46 @@ struct ur_context_handle_t_ : _ur_object {
188194
// slot for an event with profiling capabilities.
189195
ur_result_t getFreeSlotInExistingOrNewPool(ze_event_pool_handle_t &, size_t &,
190196
bool HostVisible,
191-
bool ProfilingEnabled);
197+
bool ProfilingEnabled,
198+
ur_device_handle_t Device);
192199

193200
// Get ur_event_handle_t from cache.
194201
ur_event_handle_t getEventFromContextCache(bool HostVisible,
195-
bool WithProfiling);
202+
bool WithProfiling,
203+
ur_device_handle_t Device);
196204

197205
// Add ur_event_handle_t to cache.
198206
void addEventToContextCache(ur_event_handle_t);
199207

200-
auto getZeEventPoolCache(bool HostVisible, bool WithProfiling) {
201-
if (HostVisible)
202-
return WithProfiling ? &ZeEventPoolCache[0] : &ZeEventPoolCache[1];
203-
else
204-
return WithProfiling ? &ZeEventPoolCache[2] : &ZeEventPoolCache[3];
208+
auto getZeEventPoolCache(bool HostVisible, bool WithProfiling,
209+
ze_device_handle_t ZeDevice) {
210+
if (HostVisible) {
211+
if (ZeDevice) {
212+
auto ZeEventPoolCacheMap = WithProfiling
213+
? &ZeEventPoolCacheDeviceMap[0]
214+
: &ZeEventPoolCacheDeviceMap[1];
215+
if (ZeEventPoolCacheMap->find(ZeDevice) == ZeEventPoolCacheMap->end()) {
216+
ZeEventPoolCache.emplace_back();
217+
(*ZeEventPoolCacheMap)[ZeDevice] = &ZeEventPoolCache.back();
218+
}
219+
return (*ZeEventPoolCacheMap)[ZeDevice];
220+
} else {
221+
return WithProfiling ? &ZeEventPoolCache[0] : &ZeEventPoolCache[1];
222+
}
223+
} else {
224+
if (ZeDevice) {
225+
auto ZeEventPoolCacheMap = WithProfiling
226+
? &ZeEventPoolCacheDeviceMap[2]
227+
: &ZeEventPoolCacheDeviceMap[3];
228+
if (ZeEventPoolCacheMap->find(ZeDevice) == ZeEventPoolCacheMap->end()) {
229+
ZeEventPoolCache.emplace_back();
230+
(*ZeEventPoolCacheMap)[ZeDevice] = &ZeEventPoolCache.back();
231+
}
232+
return (*ZeEventPoolCacheMap)[ZeDevice];
233+
} else {
234+
return WithProfiling ? &ZeEventPoolCache[2] : &ZeEventPoolCache[3];
235+
}
236+
}
205237
}
206238

207239
// Decrement number of events living in the pool upon event destroy
@@ -240,11 +272,33 @@ struct ur_context_handle_t_ : _ur_object {
240272

241273
private:
242274
// Get the cache of events for a provided scope and profiling mode.
243-
auto getEventCache(bool HostVisible, bool WithProfiling) {
244-
if (HostVisible)
245-
return WithProfiling ? &EventCaches[0] : &EventCaches[1];
246-
else
247-
return WithProfiling ? &EventCaches[2] : &EventCaches[3];
275+
auto getEventCache(bool HostVisible, bool WithProfiling,
276+
ur_device_handle_t Device) {
277+
if (HostVisible) {
278+
if (Device) {
279+
auto EventCachesMap =
280+
WithProfiling ? &EventCachesDeviceMap[0] : &EventCachesDeviceMap[1];
281+
if (EventCachesMap->find(Device) == EventCachesMap->end()) {
282+
EventCaches.emplace_back();
283+
(*EventCachesMap)[Device] = &EventCaches.back();
284+
}
285+
return (*EventCachesMap)[Device];
286+
} else {
287+
return WithProfiling ? &EventCaches[0] : &EventCaches[1];
288+
}
289+
} else {
290+
if (Device) {
291+
auto EventCachesMap =
292+
WithProfiling ? &EventCachesDeviceMap[2] : &EventCachesDeviceMap[3];
293+
if (EventCachesMap->find(Device) == EventCachesMap->end()) {
294+
EventCaches.emplace_back();
295+
(*EventCachesMap)[Device] = &EventCaches.back();
296+
}
297+
return (*EventCachesMap)[Device];
298+
} else {
299+
return WithProfiling ? &EventCaches[2] : &EventCaches[3];
300+
}
301+
}
248302
}
249303
};
250304

0 commit comments

Comments
 (0)