Skip to content

Commit

Permalink
[UR] Draft for adding support for counter-based events
Browse files Browse the repository at this point in the history
Draft for counter-based events implementation. As of now, only the
creation of event, cmdlists, cmdqueue/pools are implemented.

Signed-off-by: Zhang, Winston <winston.zhang@intel.com>
  • Loading branch information
winstonzhang-intel committed Feb 28, 2024
1 parent 4814e71 commit 9e3024c
Show file tree
Hide file tree
Showing 8 changed files with 89 additions and 33 deletions.
7 changes: 4 additions & 3 deletions source/adapters/level_zero/command_buffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -942,9 +942,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp(
(SignalCommandList->first, CommandBuffer->WaitEvent->ZeEvent));

if (Event) {
UR_CALL(createEventAndAssociateQueue(
Queue, &RetEvent, UR_COMMAND_COMMAND_BUFFER_ENQUEUE_EXP,
SignalCommandList, false, false, true));
UR_CALL(createEventAndAssociateQueue(Queue, &RetEvent,
UR_COMMAND_COMMAND_BUFFER_ENQUEUE_EXP,
SignalCommandList, false, false, true,
Queue->usingCounterBasedEvents()));

if ((Queue->Properties & UR_QUEUE_FLAG_PROFILING_ENABLE)) {
// Multiple submissions of a command buffer implies that we need to save
Expand Down
13 changes: 12 additions & 1 deletion source/adapters/level_zero/context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -468,7 +468,8 @@ static const uint32_t MaxNumEventsPerPool = [] {

ur_result_t ur_context_handle_t_::getFreeSlotInExistingOrNewPool(
ze_event_pool_handle_t &Pool, size_t &Index, bool HostVisible,
bool ProfilingEnabled, ur_device_handle_t Device) {
bool ProfilingEnabled, ur_device_handle_t Device,
std::optional<bool> CounterBasedEventEnabled) {
// Lock while updating event pool machinery.
std::scoped_lock<ur_mutex> Lock(ZeEventPoolCacheMutex);

Expand Down Expand Up @@ -510,6 +511,16 @@ ur_result_t ur_context_handle_t_::getFreeSlotInExistingOrNewPool(
ZeEventPoolDesc.flags |= ZE_EVENT_POOL_FLAG_HOST_VISIBLE;
if (ProfilingEnabled)
ZeEventPoolDesc.flags |= ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
if (CounterBasedEventEnabled.has_value() &&
CounterBasedEventEnabled.value()) {
ZeEventPoolDesc.flags |= ZE_EVENT_POOL_FLAG_HOST_VISIBLE;
ze_event_pool_counter_based_exp_desc_t counterBasedExt = {
ZE_STRUCTURE_TYPE_COUNTER_BASED_EVENT_POOL_EXP_DESC};
counterBasedExt.flags |=
ZE_EVENT_POOL_COUNTER_BASED_EXP_FLAG_IMMEDIATE |
ZE_EVENT_POOL_COUNTER_BASED_EXP_FLAG_NON_IMMEDIATE;
ZeEventPoolDesc.pNext = &counterBasedExt;
}
urPrint("ze_event_pool_desc_t flags set to: %d\n", ZeEventPoolDesc.flags);

std::vector<ze_device_handle_t> ZeDevices;
Expand Down
8 changes: 4 additions & 4 deletions source/adapters/level_zero/context.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -192,10 +192,10 @@ struct ur_context_handle_t_ : _ur_object {
// pool then create new one. The HostVisible parameter tells if we need a
// slot for a host-visible event. The ProfilingEnabled tells is we need a
// slot for an event with profiling capabilities.
ur_result_t getFreeSlotInExistingOrNewPool(ze_event_pool_handle_t &, size_t &,
bool HostVisible,
bool ProfilingEnabled,
ur_device_handle_t Device);
ur_result_t getFreeSlotInExistingOrNewPool(
ze_event_pool_handle_t &, size_t &, bool HostVisible,
bool ProfilingEnabled, ur_device_handle_t Device,
std::optional<bool> CounterBasedEventEnabled = std::nullopt);

// Get ur_event_handle_t from cache.
ur_event_handle_t getEventFromContextCache(bool HostVisible,
Expand Down
15 changes: 10 additions & 5 deletions source/adapters/level_zero/event.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -606,7 +606,7 @@ ur_result_t ur_event_handle_t_::getOrCreateHostVisibleEvent(
UR_CALL(createEventAndAssociateQueue(
UrQueue, &HostVisibleEvent, UR_EXT_COMMAND_TYPE_USER, CommandList,
/* IsInternal */ false, /* IsMultiDevice */ false,
/* HostVisible */ true));
/* HostVisible */ true, UrQueue->usingCounterBasedEvents()));

ZE2UR_CALL(zeCommandListAppendWaitOnEvents,
(CommandList->first, 1, &ZeEvent));
Expand Down Expand Up @@ -1049,7 +1049,8 @@ ur_result_t CleanupCompletedEvent(ur_event_handle_t Event, bool QueueLocked,
//
ur_result_t EventCreate(ur_context_handle_t Context, ur_queue_handle_t Queue,
bool IsMultiDevice, bool HostVisible,
ur_event_handle_t *RetEvent) {
ur_event_handle_t *RetEvent,
std::optional<bool> CounterBasedEventEnabled) {

bool ProfilingEnabled = !Queue || Queue->isProfilingEnabled();

Expand All @@ -1071,14 +1072,18 @@ ur_result_t EventCreate(ur_context_handle_t Context, ur_queue_handle_t Queue,
size_t Index = 0;

if (auto Res = Context->getFreeSlotInExistingOrNewPool(
ZeEventPool, Index, HostVisible, ProfilingEnabled, Device))
ZeEventPool, Index, HostVisible, ProfilingEnabled, Device,
CounterBasedEventEnabled.has_value()
? CounterBasedEventEnabled.value()
: false))
return Res;

ZeStruct<ze_event_desc_t> ZeEventDesc;
ZeEventDesc.index = Index;
ZeEventDesc.wait = 0;

if (HostVisible) {
if (HostVisible || (CounterBasedEventEnabled.has_value() &&
CounterBasedEventEnabled.value())) {
ZeEventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST;
} else {
//
Expand Down Expand Up @@ -1287,7 +1292,7 @@ ur_result_t _ur_ze_event_list_t::createAndRetainUrZeEventList(

UR_CALL(createEventAndAssociateQueue(
Queue, &MultiDeviceEvent, EventList[I]->CommandType, CommandList,
IsInternal, IsMultiDevice));
IsInternal, IsMultiDevice, Queue->usingCounterBasedEvents()));
MultiDeviceZeEvent = MultiDeviceEvent->ZeEvent;
const auto &ZeCommandList = CommandList->first;
EventList[I]->RefCount.increment();
Expand Down
7 changes: 4 additions & 3 deletions source/adapters/level_zero/event.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,10 @@

extern "C" {
ur_result_t urEventReleaseInternal(ur_event_handle_t Event);
ur_result_t EventCreate(ur_context_handle_t Context, ur_queue_handle_t Queue,
bool IsMultiDevice, bool HostVisible,
ur_event_handle_t *RetEvent);
ur_result_t
EventCreate(ur_context_handle_t Context, ur_queue_handle_t Queue,
bool IsMultiDevice, bool HostVisible, ur_event_handle_t *RetEvent,
std::optional<bool> CounterBasedEventEnabled = std::nullopt);
} // extern "C"

// This is an experimental option that allows to disable caching of events in
Expand Down
3 changes: 2 additions & 1 deletion source/adapters/level_zero/kernel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueKernelLaunch(
ur_event_handle_t *Event = OutEvent ? OutEvent : &InternalEvent;

UR_CALL(createEventAndAssociateQueue(Queue, Event, UR_COMMAND_KERNEL_LAUNCH,
CommandList, IsInternal, false));
CommandList, IsInternal, false,
Queue->usingCounterBasedEvents()));
ZeEvent = (*Event)->ZeEvent;
(*Event)->WaitList = TmpWaitList;

Expand Down
64 changes: 49 additions & 15 deletions source/adapters/level_zero/queue.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1127,7 +1127,9 @@ ur_queue_handle_t_::executeCommandList(ur_command_list_ptr_t CommandList,
reinterpret_cast<ur_queue_handle_t>(this), &HostVisibleEvent,
UR_EXT_COMMAND_TYPE_USER, CommandList,
/* IsInternal */ false, /* IsMultiDevice */ true,
/* HostVisible */ true);
/* HostVisible */ true,
(reinterpret_cast<ur_queue_handle_t>(this))
->usingCounterBasedEvents());
if (Res)
return Res;

Expand Down Expand Up @@ -1364,6 +1366,28 @@ bool ur_queue_handle_t_::isInOrderQueue() const {
0);
}

bool ur_queue_handle_t_::usingCounterBasedEvents() const {
if (!this->isInOrderQueue())
return false;

static const bool UseDriverCounterBasedEvents = [this] {
const char *UrRet = std::getenv("UR_L0_USE_DRIVER_COUNTER_BASED_EVENTS");
if (!UrRet)
return false;
return std::atoi(UrRet) != 0;
}();

bool usingInOrderList = true;
for (auto &&It = this->CommandListMap.begin();
It != this->CommandListMap.end(); ++It) {
if (It->second.ZeQueueDesc.flags != ZE_COMMAND_QUEUE_FLAG_IN_ORDER) {
usingInOrderList = false;
break;
}
}
return UseDriverCounterBasedEvents && usingInOrderList;
}

// Helper function to perform the necessary cleanup of the events from reset cmd
// list.
ur_result_t CleanupEventListFromResetCmdList(
Expand Down Expand Up @@ -1498,12 +1522,11 @@ ur_event_handle_t ur_queue_handle_t_::getEventFromQueueCache(bool IsMultiDevice,
// visible pool.
// \param HostVisible tells if the event must be created in the
// host-visible pool. If not set then this function will decide.
ur_result_t createEventAndAssociateQueue(ur_queue_handle_t Queue,
ur_event_handle_t *Event,
ur_command_t CommandType,
ur_command_list_ptr_t CommandList,
bool IsInternal, bool IsMultiDevice,
std::optional<bool> HostVisible) {
ur_result_t createEventAndAssociateQueue(
ur_queue_handle_t Queue, ur_event_handle_t *Event, ur_command_t CommandType,
ur_command_list_ptr_t CommandList, bool IsInternal, bool IsMultiDevice,
std::optional<bool> HostVisible,
std::optional<bool> usingCounterBasedEvents) {

if (!HostVisible.has_value()) {
// Internal/discarded events do not need host-scope visibility.
Expand All @@ -1516,8 +1539,10 @@ ur_result_t createEventAndAssociateQueue(ur_queue_handle_t Queue,
: nullptr;

if (*Event == nullptr)
UR_CALL(EventCreate(Queue->Context, Queue, IsMultiDevice,
HostVisible.value(), Event));
UR_CALL(EventCreate(
Queue->Context, Queue, IsMultiDevice, HostVisible.value(), Event,
usingCounterBasedEvents.has_value() ? usingCounterBasedEvents.value()
: false));

(*Event)->UrQueue = Queue;
(*Event)->CommandType = CommandType;
Expand Down Expand Up @@ -1805,6 +1830,9 @@ ur_queue_handle_t_::ur_queue_group_t::getZeQueue(uint32_t *QueueGroupOrdinal) {
ZeCommandQueueDesc.ordinal = *QueueGroupOrdinal;
ZeCommandQueueDesc.index = QueueIndex;
ZeCommandQueueDesc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS;
if (Queue->usingCounterBasedEvents()) {
ZeCommandQueueDesc.mode = ZE_COMMAND_QUEUE_MODE_SYNCHRONOUS;
}
const char *Priority = "Normal";
if (Queue->isPriorityLow()) {
ZeCommandQueueDesc.priority = ZE_COMMAND_QUEUE_PRIORITY_PRIORITY_LOW;
Expand Down Expand Up @@ -1859,27 +1887,33 @@ ur_result_t ur_queue_handle_t_::createCommandList(
ze_command_list_handle_t ZeCommandList;

uint32_t QueueGroupOrdinal;
ZeStruct<ze_command_list_desc_t> ZeCommandListDesc;
if (usingCounterBasedEvents()) {
ZeCommandListDesc.flags = ZE_COMMAND_LIST_FLAG_IN_ORDER;
ZeCommandListDesc.stype = ZE_STRUCTURE_TYPE_COMMAND_LIST_DESC;
}
auto &QGroup = getQueueGroup(UseCopyEngine);
auto &ZeCommandQueue =
ForcedCmdQueue ? *ForcedCmdQueue : QGroup.getZeQueue(&QueueGroupOrdinal);
if (ForcedCmdQueue)
QueueGroupOrdinal = QGroup.getCmdQueueOrdinal(ZeCommandQueue);

ZeStruct<ze_command_list_desc_t> ZeCommandListDesc;
ZeCommandListDesc.commandQueueGroupOrdinal = QueueGroupOrdinal;

ZE2UR_CALL(zeCommandListCreate, (Context->ZeContext, Device->ZeDevice,
&ZeCommandListDesc, &ZeCommandList));

ZE2UR_CALL(zeFenceCreate, (ZeCommandQueue, &ZeFenceDesc, &ZeFence));
if (!usingCounterBasedEvents()) {
ZE2UR_CALL(zeFenceCreate, (ZeCommandQueue, &ZeFenceDesc, &ZeFence));
}
ZeStruct<ze_command_queue_desc_t> ZeQueueDesc;
ZeQueueDesc.ordinal = QueueGroupOrdinal;
std::tie(CommandList, std::ignore) = CommandListMap.insert(
std::pair<ze_command_list_handle_t, ur_command_list_info_t>(
ZeCommandList, {ZeFence, false, false, ZeCommandQueue, ZeQueueDesc}));

UR_CALL(insertStartBarrierIfDiscardEventsMode(CommandList));
UR_CALL(insertActiveBarriers(CommandList, UseCopyEngine));
if (!usingCounterBasedEvents()) {
UR_CALL(insertStartBarrierIfDiscardEventsMode(CommandList));
UR_CALL(insertActiveBarriers(CommandList, UseCopyEngine));
}
return UR_RESULT_SUCCESS;
}

Expand Down
5 changes: 4 additions & 1 deletion source/adapters/level_zero/queue.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -399,6 +399,8 @@ struct ur_queue_handle_t_ : _ur_object {
// Returns true if the queue is a in-order queue.
bool isInOrderQueue() const;

bool usingCounterBasedEvents() const;

// Returns true if the queue has discard events property.
bool isDiscardEvents() const;

Expand Down Expand Up @@ -543,7 +545,8 @@ struct ur_queue_handle_t_ : _ur_object {
ur_result_t createEventAndAssociateQueue(
ur_queue_handle_t Queue, ur_event_handle_t *Event, ur_command_t CommandType,
ur_command_list_ptr_t CommandList, bool IsInternal, bool IsMultiDevice,
std::optional<bool> HostVisible = std::nullopt);
std::optional<bool> HostVisible = std::nullopt,
std::optional<bool> usingCounterBasedEvents = std::nullopt);

// Helper function to perform the necessary cleanup of the events from reset cmd
// list.
Expand Down

0 comments on commit 9e3024c

Please sign in to comment.