From 260e7e1999c52db4a398652781612b8dac212a41 Mon Sep 17 00:00:00 2001 From: Maxime France-Pillois Date: Thu, 1 Feb 2024 14:22:16 +0000 Subject: [PATCH 01/12] [EXP][CMDBUF] Support for Node Profiling Adds a new entry point urEventGetSyncPointProfilingInfoExp. This function queries the profiling information of a sync-point. It takes the handle of the event returned from the graph submission and the sync-point associated with the node from which we want to obtain the profiling information. Implements node profiling support. --- include/ur_api.h | 48 ++++++++++ include/ur_ddi.h | 38 ++++++++ include/ur_print.h | 10 ++ include/ur_print.hpp | 45 +++++++++ scripts/core/EXP-COMMAND-BUFFER.rst | 21 +++++ scripts/core/exp-command-buffer.yml | 35 +++++++ scripts/core/registry.yml | 3 + source/adapters/adapter.def.in | 1 + source/adapters/adapter.map.in | 1 + source/adapters/cuda/command_buffer.cpp | 13 +++ source/adapters/hip/command_buffer.cpp | 13 +++ source/adapters/level_zero/command_buffer.cpp | 92 +++++++++++++++++++ .../level_zero/ur_interface_loader.cpp | 14 +++ source/adapters/native_cpu/command_buffer.cpp | 6 ++ source/adapters/null/ur_nullddi.cpp | 65 +++++++++++++ source/adapters/opencl/command_buffer.cpp | 13 +++ source/loader/layers/tracing/ur_trcddi.cpp | 80 ++++++++++++++++ source/loader/layers/validation/ur_valddi.cpp | 85 +++++++++++++++++ source/loader/ur_ldrddi.cpp | 91 ++++++++++++++++++ source/loader/ur_libapi.cpp | 48 ++++++++++ source/loader/ur_libddi.cpp | 5 + source/loader/ur_print.cpp | 8 ++ source/ur_api.cpp | 40 ++++++++ 23 files changed, 775 insertions(+) diff --git a/include/ur_api.h b/include/ur_api.h index 442c364e0c..86b93452c4 100644 --- a/include/ur_api.h +++ b/include/ur_api.h @@ -215,6 +215,7 @@ typedef enum ur_function_t { UR_FUNCTION_COMMAND_BUFFER_APPEND_USM_ADVISE_EXP = 213, ///< Enumerator for ::urCommandBufferAppendUSMAdviseExp UR_FUNCTION_ENQUEUE_COOPERATIVE_KERNEL_LAUNCH_EXP = 214, ///< Enumerator for ::urEnqueueCooperativeKernelLaunchExp UR_FUNCTION_KERNEL_SUGGEST_MAX_COOPERATIVE_GROUP_COUNT_EXP = 215, ///< Enumerator for ::urKernelSuggestMaxCooperativeGroupCountExp + UR_FUNCTION_EVENT_GET_SYNC_POINT_PROFILING_INFO_EXP = 218, ///< Enumerator for ::urEventGetSyncPointProfilingInfoExp /// @cond UR_FUNCTION_FORCE_UINT32 = 0x7fffffff /// @endcond @@ -8341,6 +8342,40 @@ urCommandBufferEnqueueExp( ///< command-buffer execution instance. ); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Get profiling information for the sync point execution associated with +/// an event object +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hEvent` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `::UR_PROFILING_INFO_COMMAND_COMPLETE < propName` +/// - ::UR_RESULT_ERROR_PROFILING_INFO_NOT_AVAILABLE +/// + If `hEvent`s associated queue was not created with `UR_QUEUE_FLAG_PROFILING_ENABLE`. +/// - ::UR_RESULT_ERROR_INVALID_VALUE +/// + `pPropValue && propSize == 0` +/// - ::UR_RESULT_ERROR_INVALID_EVENT +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP - "If `syncPoint` is not referencing a command enqueued in this CommandBuffer. Typically, `syncPoint` > number of commands" +UR_APIEXPORT ur_result_t UR_APICALL +urEventGetSyncPointProfilingInfoExp( + ur_event_handle_t hEvent, ///< [in] handle of the event object + ur_exp_command_buffer_sync_point_t syncPoint, ///< [in] Sync point referencing the node (i.e. command) from which we want + ///< to get profile information + ur_profiling_info_t propName, ///< [in] the name of the profiling property to query + size_t propSize, ///< [in] size in bytes of the profiling property value + void *pPropValue, ///< [out][optional][typename(propName, propSize)] value of the profiling + ///< property + size_t *pPropSizeRet ///< [out][optional] pointer to the actual size in bytes returned in + ///< propValue +); + #if !defined(__GNUC__) #pragma endregion #endif @@ -9026,6 +9061,19 @@ typedef struct ur_event_set_callback_params_t { void **ppUserData; } ur_event_set_callback_params_t; +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function parameters for urEventGetSyncPointProfilingInfoExp +/// @details Each entry is a pointer to the parameter passed to the function; +/// allowing the callback the ability to modify the parameter's value +typedef struct ur_event_get_sync_point_profiling_info_exp_params_t { + ur_event_handle_t *phEvent; + ur_exp_command_buffer_sync_point_t *psyncPoint; + ur_profiling_info_t *ppropName; + size_t *ppropSize; + void **ppPropValue; + size_t **ppPropSizeRet; +} ur_event_get_sync_point_profiling_info_exp_params_t; + /////////////////////////////////////////////////////////////////////////////// /// @brief Function parameters for urProgramCreateWithIL /// @details Each entry is a pointer to the parameter passed to the function; diff --git a/include/ur_ddi.h b/include/ur_ddi.h index 77f2f35f70..6ecbdc5998 100644 --- a/include/ur_ddi.h +++ b/include/ur_ddi.h @@ -269,6 +269,43 @@ typedef ur_result_t(UR_APICALL *ur_pfnGetEventProcAddrTable_t)( ur_api_version_t, ur_event_dditable_t *); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function-pointer for urEventGetSyncPointProfilingInfoExp +typedef ur_result_t(UR_APICALL *ur_pfnEventGetSyncPointProfilingInfoExp_t)( + ur_event_handle_t, + ur_exp_command_buffer_sync_point_t, + ur_profiling_info_t, + size_t, + void *, + size_t *); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Table of EventExp functions pointers +typedef struct ur_event_exp_dditable_t { + ur_pfnEventGetSyncPointProfilingInfoExp_t pfnGetSyncPointProfilingInfoExp; +} ur_event_exp_dditable_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Exported function for filling application's EventExp table +/// with current process' addresses +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// - ::UR_RESULT_ERROR_UNSUPPORTED_VERSION +UR_DLLEXPORT ur_result_t UR_APICALL +urGetEventExpProcAddrTable( + ur_api_version_t version, ///< [in] API version requested + ur_event_exp_dditable_t *pDdiTable ///< [in,out] pointer to table of DDI function pointers +); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function-pointer for urGetEventExpProcAddrTable +typedef ur_result_t(UR_APICALL *ur_pfnGetEventExpProcAddrTable_t)( + ur_api_version_t, + ur_event_exp_dditable_t *); + /////////////////////////////////////////////////////////////////////////////// /// @brief Function-pointer for urProgramCreateWithIL typedef ur_result_t(UR_APICALL *ur_pfnProgramCreateWithIL_t)( @@ -2306,6 +2343,7 @@ typedef struct ur_dditable_t { ur_platform_dditable_t Platform; ur_context_dditable_t Context; ur_event_dditable_t Event; + ur_event_exp_dditable_t EventExp; ur_program_dditable_t Program; ur_program_exp_dditable_t ProgramExp; ur_kernel_dditable_t Kernel; diff --git a/include/ur_print.h b/include/ur_print.h index 9edf7554d9..cf84365491 100644 --- a/include/ur_print.h +++ b/include/ur_print.h @@ -1448,6 +1448,16 @@ UR_APIEXPORT ur_result_t UR_APICALL urPrintEventCreateWithNativeHandleParams(con /// - `buff_size < out_size` UR_APIEXPORT ur_result_t UR_APICALL urPrintEventSetCallbackParams(const struct ur_event_set_callback_params_t *params, char *buffer, const size_t buff_size, size_t *out_size); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_event_get_sync_point_profiling_info_exp_params_t params struct +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// - `NULL == buffer` +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// - `buff_size < out_size` +UR_APIEXPORT ur_result_t UR_APICALL urPrintEventGetSyncPointProfilingInfoExpParams(const struct ur_event_get_sync_point_profiling_info_exp_params_t *params, char *buffer, const size_t buff_size, size_t *out_size); + /////////////////////////////////////////////////////////////////////////////// /// @brief Print ur_program_create_with_il_params_t params struct /// @returns diff --git a/include/ur_print.hpp b/include/ur_print.hpp index 6b27b2a443..788087a6a4 100644 --- a/include/ur_print.hpp +++ b/include/ur_print.hpp @@ -879,6 +879,9 @@ inline std::ostream &operator<<(std::ostream &os, ur_function_t value) { case UR_FUNCTION_KERNEL_SUGGEST_MAX_COOPERATIVE_GROUP_COUNT_EXP: os << "UR_FUNCTION_KERNEL_SUGGEST_MAX_COOPERATIVE_GROUP_COUNT_EXP"; break; + case UR_FUNCTION_EVENT_GET_SYNC_POINT_PROFILING_INFO_EXP: + os << "UR_FUNCTION_EVENT_GET_SYNC_POINT_PROFILING_INFO_EXP"; + break; default: os << "unknown enumerator"; break; @@ -9888,6 +9891,45 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct return os; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_event_get_sync_point_profiling_info_exp_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_event_get_sync_point_profiling_info_exp_params_t *params) { + + os << ".hEvent = "; + + ur::details::printPtr(os, + *(params->phEvent)); + + os << ", "; + os << ".syncPoint = "; + + os << *(params->psyncPoint); + + os << ", "; + os << ".propName = "; + + os << *(params->ppropName); + + os << ", "; + os << ".propSize = "; + + os << *(params->ppropSize); + + os << ", "; + os << ".pPropValue = "; + ur::details::printTagged(os, *(params->ppPropValue), *(params->ppropName), *(params->ppropSize)); + + os << ", "; + os << ".pPropSizeRet = "; + + ur::details::printPtr(os, + *(params->ppPropSizeRet)); + + return os; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Print operator for the ur_program_create_with_il_params_t type /// @returns @@ -15965,6 +16007,9 @@ inline ur_result_t UR_APICALL printFunctionParams(std::ostream &os, ur_function_ case UR_FUNCTION_EVENT_SET_CALLBACK: { os << (const struct ur_event_set_callback_params_t *)params; } break; + case UR_FUNCTION_EVENT_GET_SYNC_POINT_PROFILING_INFO_EXP: { + os << (const struct ur_event_get_sync_point_profiling_info_exp_params_t *)params; + } break; case UR_FUNCTION_PROGRAM_CREATE_WITH_IL: { os << (const struct ur_program_create_with_il_params_t *)params; } break; diff --git a/scripts/core/EXP-COMMAND-BUFFER.rst b/scripts/core/EXP-COMMAND-BUFFER.rst index a6a32a66a1..43bbc73b79 100644 --- a/scripts/core/EXP-COMMAND-BUFFER.rst +++ b/scripts/core/EXP-COMMAND-BUFFER.rst @@ -132,6 +132,23 @@ were obtained from. pLocalWorkSize, 1, &syncPoint, nullptr); + // Finalize the CommandBuffer to launch it + ${x}CommandBufferFinalizeExp(hCommandBuffer); + + // Execute the CommandBuffer and obtain the event associated to this + // execution + ${x}_event_handle_t event; + ${x}CommandBufferEnqueueExp(hCommandBuffer, hQueue, 0, nullptr, + &event); + + // Get SyncPoint profiling information + ${x}_profiling_info_t propName; + size_t propSize; + void* pPropValue; + size_t pPropSizeRet; + ${x}EventGetSyncPointProfilingInfoExp(event, syncPoint, propName, propSize, + pPropValue, &pPropSizeRet); + Enqueueing Command-Buffers -------------------------------------------------------------------------------- @@ -211,6 +228,7 @@ Functions * ${x}CommandBufferAppendUSMPrefetchExp * ${x}CommandBufferAppendUSMAdviseExp * ${x}CommandBufferEnqueueExp +* ${x}EventGetSyncPointProfilingInfoExp Changelog -------------------------------------------------------------------------------- @@ -227,6 +245,9 @@ Changelog | 1.3 | Add function definitions for Prefetch and Advise | | | commands | +-----------+-------------------------------------------------------+ +| 1.4 | Add function definitions for getting sync point | +| | profiling information | ++-----------+-------------------------------------------------------+ Contributors -------------------------------------------------------------------------------- diff --git a/scripts/core/exp-command-buffer.yml b/scripts/core/exp-command-buffer.yml index 7d1b686aab..3aeec3f6b4 100644 --- a/scripts/core/exp-command-buffer.yml +++ b/scripts/core/exp-command-buffer.yml @@ -704,3 +704,38 @@ returns: - "If event objects in phEventWaitList are not valid events." - $X_RESULT_ERROR_OUT_OF_HOST_MEMORY - $X_RESULT_ERROR_OUT_OF_RESOURCES +--- #-------------------------------------------------------------------------- +type: function +desc: "Get profiling information for the sync point execution associated with an event object" +class: $xEvent +name: GetSyncPointProfilingInfoExp +ordinal: "0" +params: + - type: $x_event_handle_t + name: hEvent + desc: "[in] handle of the event object" + - type: $x_exp_command_buffer_sync_point_t + name: syncPoint + desc: "[in] Sync point referencing the node (i.e. command) from which we want to get profile information" + - type: $x_profiling_info_t + name: propName + desc: "[in] the name of the profiling property to query" + - type: size_t + name: propSize + desc: "[in] size in bytes of the profiling property value" + - type: void* + name: pPropValue + desc: "[out][optional][typename(propName, propSize)] value of the profiling property" + - type: size_t* + name: pPropSizeRet + desc: "[out][optional] pointer to the actual size in bytes returned in propValue" +returns: + - $X_RESULT_ERROR_PROFILING_INFO_NOT_AVAILABLE: + - "If `hEvent`s associated queue was not created with `UR_QUEUE_FLAG_PROFILING_ENABLE`." + - $X_RESULT_ERROR_INVALID_VALUE: + - "`pPropValue && propSize == 0`" + - $X_RESULT_ERROR_INVALID_EVENT + - $X_RESULT_ERROR_OUT_OF_RESOURCES + - $X_RESULT_ERROR_OUT_OF_HOST_MEMORY + - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP + - "If `syncPoint` is not referencing a command enqueued in this CommandBuffer. Typically, `syncPoint` > number of commands" diff --git a/scripts/core/registry.yml b/scripts/core/registry.yml index 6195cd4980..f84edd7d7d 100644 --- a/scripts/core/registry.yml +++ b/scripts/core/registry.yml @@ -559,6 +559,9 @@ etors: - name: KERNEL_SUGGEST_MAX_COOPERATIVE_GROUP_COUNT_EXP desc: Enumerator for $xKernelSuggestMaxCooperativeGroupCountExp value: '215' +- name: EVENT_GET_SYNC_POINT_PROFILING_INFO_EXP + desc: Enumerator for $xEventGetSyncPointProfilingInfoExp + value: '218' --- type: enum desc: Defines structure types diff --git a/source/adapters/adapter.def.in b/source/adapters/adapter.def.in index 3c18c78bd1..b5bdefab66 100644 --- a/source/adapters/adapter.def.in +++ b/source/adapters/adapter.def.in @@ -7,6 +7,7 @@ EXPORTS urGetEnqueueProcAddrTable urGetEnqueueExpProcAddrTable urGetEventProcAddrTable + urGetEventExpProcAddrTable urGetKernelProcAddrTable urGetKernelExpProcAddrTable urGetMemProcAddrTable diff --git a/source/adapters/adapter.map.in b/source/adapters/adapter.map.in index bb08ae7d88..be74473b21 100644 --- a/source/adapters/adapter.map.in +++ b/source/adapters/adapter.map.in @@ -7,6 +7,7 @@ urGetEnqueueProcAddrTable; urGetEnqueueExpProcAddrTable; urGetEventProcAddrTable; + urGetEventExpProcAddrTable; urGetKernelProcAddrTable; urGetKernelExpProcAddrTable; urGetMemProcAddrTable; diff --git a/source/adapters/cuda/command_buffer.cpp b/source/adapters/cuda/command_buffer.cpp index a65530a1f1..7da742931b 100644 --- a/source/adapters/cuda/command_buffer.cpp +++ b/source/adapters/cuda/command_buffer.cpp @@ -762,3 +762,16 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( return Result; } + +UR_APIEXPORT ur_result_t UR_APICALL urEventGetSyncPointProfilingInfoExp( + ur_event_handle_t Event, ur_exp_command_buffer_sync_point_t SyncPoint, + ur_profiling_info_t PropName, size_t PropValueSize, void *PropValue, + size_t *PropValueSizeRet) { + (void)Event; + (void)SyncPoint; + (void)PropName; + (void)PropValueSize; + (void)PropValue; + (void)PropValueSizeRet; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} diff --git a/source/adapters/hip/command_buffer.cpp b/source/adapters/hip/command_buffer.cpp index 54a6fa2f4e..930d5dbe41 100644 --- a/source/adapters/hip/command_buffer.cpp +++ b/source/adapters/hip/command_buffer.cpp @@ -162,3 +162,16 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( "implemented for HIP adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } + +UR_APIEXPORT ur_result_t UR_APICALL urEventGetSyncPointProfilingInfoExp( + ur_event_handle_t Event, ur_exp_command_buffer_sync_point_t SyncPoint, + ur_profiling_info_t PropName, size_t PropValueSize, void *PropValue, + size_t *PropValueSizeRet) { + (void)Event; + (void)SyncPoint; + (void)PropName; + (void)PropValueSize; + (void)PropValue; + (void)PropValueSizeRet; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} diff --git a/source/adapters/level_zero/command_buffer.cpp b/source/adapters/level_zero/command_buffer.cpp index bbe49cb705..23dbfac3bc 100644 --- a/source/adapters/level_zero/command_buffer.cpp +++ b/source/adapters/level_zero/command_buffer.cpp @@ -986,3 +986,95 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( return UR_RESULT_SUCCESS; } + +UR_APIEXPORT ur_result_t UR_APICALL urEventGetSyncPointProfilingInfoExp( + ur_event_handle_t Event, ur_exp_command_buffer_sync_point_t SyncPoint, + ur_profiling_info_t PropName, size_t PropValueSize, void *PropValue, + size_t *PropValueSizeRet) { + std::shared_lock EventLock(Event->Mutex); + + if (Event->UrQueue && + (Event->UrQueue->Properties & UR_QUEUE_FLAG_PROFILING_ENABLE) == 0) { + return UR_RESULT_ERROR_PROFILING_INFO_NOT_AVAILABLE; + } + + ur_device_handle_t Device = + Event->UrQueue ? Event->UrQueue->Device : Event->Context->Devices[0]; + + uint64_t ZeTimerResolution = Device->ZeDeviceProperties->timerResolution; + const uint64_t TimestampMaxValue = + ((1ULL << Device->ZeDeviceProperties->kernelTimestampValidBits) - 1ULL); + + UrReturnHelper ReturnValue(PropValueSize, PropValue, PropValueSizeRet); + + ze_kernel_timestamp_result_t tsResult; + + // Node profiling info is stored in the CommandData field of the event + // returned from graph submission. + // The timing info of each command corresponding to a node is stored using + // `zeCommandListAppendQueryKernelTimestamps`. This command stores the timing + // info of each command/node in the same order as the sync-points that were + // assigned to this node when it was enqueued. Consequently, `SyncPoint` + // corresponds to the index of the memory slot containing the timestamps + // corresponding to this specific node. + if (Event->CommandType == UR_COMMAND_COMMAND_BUFFER_ENQUEUE_EXP) { + if (Event->CommandData) { + command_buffer_profiling_t *ProfilingsPtr; + switch (PropName) { + case UR_PROFILING_INFO_COMMAND_START: { + ProfilingsPtr = + static_cast(Event->CommandData); + uint64_t Index = static_cast(SyncPoint); + + if (Index > ProfilingsPtr->NumEvents) { + return UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP; + } + + uint64_t StartTime = + ProfilingsPtr->Timestamps[Index].global.kernelStart; + uint64_t ContextStartTime = + (StartTime & TimestampMaxValue) * ZeTimerResolution; + return ReturnValue(ContextStartTime); + } + case UR_PROFILING_INFO_COMMAND_END: { + ProfilingsPtr = + static_cast(Event->CommandData); + uint64_t Index = static_cast(SyncPoint); + + if (Index > ProfilingsPtr->NumEvents) { + return UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP; + } + + uint64_t EndTime = ProfilingsPtr->Timestamps[Index].global.kernelEnd; + uint64_t LastStart = + ProfilingsPtr->Timestamps[Index].global.kernelStart; + uint64_t ContextStartTime = (LastStart & TimestampMaxValue); + uint64_t ContextEndTime = (EndTime & TimestampMaxValue); + + // + // Handle a possible wrap-around (the underlying HW counter is < + // 64-bit). Note, it will not report correct time if there were multiple + // wrap arounds, and the longer term plan is to enlarge the capacity of + // the HW timestamps. + // + if (ContextEndTime <= ContextStartTime) { + ContextEndTime += TimestampMaxValue; + } + ContextEndTime *= ZeTimerResolution; + return ReturnValue(ContextEndTime); + } + default: + urPrint( + "urEventGetSyncPointProfilingInfoExp: not supported ParamName\n"); + return UR_RESULT_ERROR_INVALID_VALUE; + } + } else { + return UR_RESULT_ERROR_PROFILING_INFO_NOT_AVAILABLE; + } + } else { + urPrint("urEventGetSyncPointProfilingInfoExp: not supported Event type\n"); + return UR_RESULT_ERROR_INVALID_VALUE; + } + + return UR_RESULT_SUCCESS; +} diff --git a/source/adapters/level_zero/ur_interface_loader.cpp b/source/adapters/level_zero/ur_interface_loader.cpp index 74d0706b31..50d7cf585e 100644 --- a/source/adapters/level_zero/ur_interface_loader.cpp +++ b/source/adapters/level_zero/ur_interface_loader.cpp @@ -345,6 +345,20 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( return retVal; } +UR_DLLEXPORT ur_result_t UR_APICALL urGetEventExpProcAddrTable( + ur_api_version_t version, ///< [in] API version requested + ur_event_exp_dditable_t + *pDdiTable ///< [in,out] pointer to table of DDI function pointers +) { + auto retVal = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != retVal) { + return retVal; + } + pDdiTable->pfnGetSyncPointProfilingInfoExp = + urEventGetSyncProfilingProfilingInfoExp; + return retVal; +} + UR_DLLEXPORT ur_result_t UR_APICALL urGetUsmP2PExpProcAddrTable( ur_api_version_t version, ur_usm_p2p_exp_dditable_t *pDdiTable) { auto retVal = validateProcInputs(version, pDdiTable); diff --git a/source/adapters/native_cpu/command_buffer.cpp b/source/adapters/native_cpu/command_buffer.cpp index 50b38c9d52..88f22d9865 100644 --- a/source/adapters/native_cpu/command_buffer.cpp +++ b/source/adapters/native_cpu/command_buffer.cpp @@ -162,3 +162,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( ur_exp_command_buffer_sync_point_t *) { return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } + +UR_APIEXPORT ur_result_t UR_APICALL urEventGetSyncPointProfilingInfoExp( + ur_event_handle_t, ur_exp_command_buffer_sync_point_t, ur_profiling_info_t, + size_t, void *, size_t *) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} diff --git a/source/adapters/null/ur_nullddi.cpp b/source/adapters/null/ur_nullddi.cpp index 464aa59d54..380ec6b9e9 100644 --- a/source/adapters/null/ur_nullddi.cpp +++ b/source/adapters/null/ur_nullddi.cpp @@ -5232,6 +5232,40 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferEnqueueExp( return exceptionToResult(std::current_exception()); } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urEventGetSyncPointProfilingInfoExp +__urdlllocal ur_result_t UR_APICALL urEventGetSyncPointProfilingInfoExp( + ur_event_handle_t hEvent, ///< [in] handle of the event object + ur_exp_command_buffer_sync_point_t + syncPoint, ///< [in] Sync point referencing the node (i.e. command) from which we want + ///< to get profile information + ur_profiling_info_t + propName, ///< [in] the name of the profiling property to query + size_t propSize, ///< [in] size in bytes of the profiling property value + void * + pPropValue, ///< [out][optional][typename(propName, propSize)] value of the profiling + ///< property + size_t * + pPropSizeRet ///< [out][optional] pointer to the actual size in bytes returned in + ///< propValue + ) try { + ur_result_t result = UR_RESULT_SUCCESS; + + // if the driver has created a custom function, then call it instead of using the generic path + auto pfnGetSyncPointProfilingInfoExp = + d_context.urDdiTable.EventExp.pfnGetSyncPointProfilingInfoExp; + if (nullptr != pfnGetSyncPointProfilingInfoExp) { + result = pfnGetSyncPointProfilingInfoExp( + hEvent, syncPoint, propName, propSize, pPropValue, pPropSizeRet); + } else { + // generic implementation + } + + return result; +} catch (...) { + return exceptionToResult(std::current_exception()); +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urEnqueueCooperativeKernelLaunchExp __urdlllocal ur_result_t UR_APICALL urEnqueueCooperativeKernelLaunchExp( @@ -5901,6 +5935,37 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetEventProcAddrTable( return exceptionToResult(std::current_exception()); } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Exported function for filling application's EventExp table +/// with current process' addresses +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// - ::UR_RESULT_ERROR_UNSUPPORTED_VERSION +UR_DLLEXPORT ur_result_t UR_APICALL urGetEventExpProcAddrTable( + ur_api_version_t version, ///< [in] API version requested + ur_event_exp_dditable_t + *pDdiTable ///< [in,out] pointer to table of DDI function pointers + ) try { + if (nullptr == pDdiTable) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + if (driver::d_context.version < version) { + return UR_RESULT_ERROR_UNSUPPORTED_VERSION; + } + + ur_result_t result = UR_RESULT_SUCCESS; + + pDdiTable->pfnGetSyncPointProfilingInfoExp = + driver::urEventGetSyncPointProfilingInfoExp; + + return result; +} catch (...) { + return exceptionToResult(std::current_exception()); +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Exported function for filling application's Kernel table /// with current process' addresses diff --git a/source/adapters/opencl/command_buffer.cpp b/source/adapters/opencl/command_buffer.cpp index 74cdd8a03d..97b819f500 100644 --- a/source/adapters/opencl/command_buffer.cpp +++ b/source/adapters/opencl/command_buffer.cpp @@ -356,3 +356,16 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( return UR_RESULT_SUCCESS; } + +UR_APIEXPORT ur_result_t UR_APICALL urEventGetSyncPointProfilingInfoExp( + ur_event_handle_t Event, ur_exp_command_buffer_sync_point_t SyncPoint, + ur_profiling_info_t PropName, size_t PropValueSize, void *PropValue, + size_t *PropValueSizeRet) { + (void)Event; + (void)SyncPoint; + (void)PropName; + (void)PropValueSize; + (void)PropValue; + (void)PropValueSizeRet; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} diff --git a/source/loader/layers/tracing/ur_trcddi.cpp b/source/loader/layers/tracing/ur_trcddi.cpp index 5867d295ae..edf20adbce 100644 --- a/source/loader/layers/tracing/ur_trcddi.cpp +++ b/source/loader/layers/tracing/ur_trcddi.cpp @@ -5809,6 +5809,46 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferEnqueueExp( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urEventGetSyncPointProfilingInfoExp +__urdlllocal ur_result_t UR_APICALL urEventGetSyncPointProfilingInfoExp( + ur_event_handle_t hEvent, ///< [in] handle of the event object + ur_exp_command_buffer_sync_point_t + syncPoint, ///< [in] Sync point referencing the node (i.e. command) from which we want + ///< to get profile information + ur_profiling_info_t + propName, ///< [in] the name of the profiling property to query + size_t propSize, ///< [in] size in bytes of the profiling property value + void * + pPropValue, ///< [out][optional][typename(propName, propSize)] value of the profiling + ///< property + size_t * + pPropSizeRet ///< [out][optional] pointer to the actual size in bytes returned in + ///< propValue +) { + auto pfnGetSyncPointProfilingInfoExp = + context.urDdiTable.EventExp.pfnGetSyncPointProfilingInfoExp; + + if (nullptr == pfnGetSyncPointProfilingInfoExp) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + } + + ur_event_get_sync_point_profiling_info_exp_params_t params = { + &hEvent, &syncPoint, &propName, &propSize, &pPropValue, &pPropSizeRet}; + uint64_t instance = context.notify_begin( + UR_FUNCTION_EVENT_GET_SYNC_POINT_PROFILING_INFO_EXP, + "urEventGetSyncPointProfilingInfoExp", ¶ms); + + ur_result_t result = pfnGetSyncPointProfilingInfoExp( + hEvent, syncPoint, propName, propSize, pPropValue, pPropSizeRet); + + context.notify_end(UR_FUNCTION_EVENT_GET_SYNC_POINT_PROFILING_INFO_EXP, + "urEventGetSyncPointProfilingInfoExp", ¶ms, &result, + instance); + + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urEnqueueCooperativeKernelLaunchExp __urdlllocal ur_result_t UR_APICALL urEnqueueCooperativeKernelLaunchExp( @@ -6648,6 +6688,41 @@ __urdlllocal ur_result_t UR_APICALL urGetEventProcAddrTable( return result; } /////////////////////////////////////////////////////////////////////////////// +/// @brief Exported function for filling application's EventExp table +/// with current process' addresses +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// - ::UR_RESULT_ERROR_UNSUPPORTED_VERSION +__urdlllocal ur_result_t UR_APICALL urGetEventExpProcAddrTable( + ur_api_version_t version, ///< [in] API version requested + ur_event_exp_dditable_t + *pDdiTable ///< [in,out] pointer to table of DDI function pointers +) { + auto &dditable = ur_tracing_layer::context.urDdiTable.EventExp; + + if (nullptr == pDdiTable) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + if (UR_MAJOR_VERSION(ur_tracing_layer::context.version) != + UR_MAJOR_VERSION(version) || + UR_MINOR_VERSION(ur_tracing_layer::context.version) > + UR_MINOR_VERSION(version)) { + return UR_RESULT_ERROR_UNSUPPORTED_VERSION; + } + + ur_result_t result = UR_RESULT_SUCCESS; + + dditable.pfnGetSyncPointProfilingInfoExp = + pDdiTable->pfnGetSyncPointProfilingInfoExp; + pDdiTable->pfnGetSyncPointProfilingInfoExp = + ur_tracing_layer::urEventGetSyncPointProfilingInfoExp; + + return result; +} +/////////////////////////////////////////////////////////////////////////////// /// @brief Exported function for filling application's Kernel table /// with current process' addresses /// @@ -7426,6 +7501,11 @@ ur_result_t context_t::init(ur_dditable_t *dditable, UR_API_VERSION_CURRENT, &dditable->Event); } + if (UR_RESULT_SUCCESS == result) { + result = ur_tracing_layer::urGetEventExpProcAddrTable( + UR_API_VERSION_CURRENT, &dditable->EventExp); + } + if (UR_RESULT_SUCCESS == result) { result = ur_tracing_layer::urGetKernelProcAddrTable( UR_API_VERSION_CURRENT, &dditable->Kernel); diff --git a/source/loader/layers/validation/ur_valddi.cpp b/source/loader/layers/validation/ur_valddi.cpp index db59ca3b11..3c24d47546 100644 --- a/source/loader/layers/validation/ur_valddi.cpp +++ b/source/loader/layers/validation/ur_valddi.cpp @@ -7622,6 +7622,50 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferEnqueueExp( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urEventGetSyncPointProfilingInfoExp +__urdlllocal ur_result_t UR_APICALL urEventGetSyncPointProfilingInfoExp( + ur_event_handle_t hEvent, ///< [in] handle of the event object + ur_exp_command_buffer_sync_point_t + syncPoint, ///< [in] Sync point referencing the node (i.e. command) from which we want + ///< to get profile information + ur_profiling_info_t + propName, ///< [in] the name of the profiling property to query + size_t propSize, ///< [in] size in bytes of the profiling property value + void * + pPropValue, ///< [out][optional][typename(propName, propSize)] value of the profiling + ///< property + size_t * + pPropSizeRet ///< [out][optional] pointer to the actual size in bytes returned in + ///< propValue +) { + auto pfnGetSyncPointProfilingInfoExp = + context.urDdiTable.EventExp.pfnGetSyncPointProfilingInfoExp; + + if (nullptr == pfnGetSyncPointProfilingInfoExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + if (context.enableParameterValidation) { + if (NULL == hEvent) { + return UR_RESULT_ERROR_INVALID_NULL_HANDLE; + } + + if (UR_PROFILING_INFO_COMMAND_COMPLETE < propName) { + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } + + if (pPropValue && propSize == 0) { + return UR_RESULT_ERROR_INVALID_VALUE; + } + } + + ur_result_t result = pfnGetSyncPointProfilingInfoExp( + hEvent, syncPoint, propName, propSize, pPropValue, pPropSizeRet); + + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urEnqueueCooperativeKernelLaunchExp __urdlllocal ur_result_t UR_APICALL urEnqueueCooperativeKernelLaunchExp( @@ -8529,6 +8573,42 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetEventProcAddrTable( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Exported function for filling application's EventExp table +/// with current process' addresses +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// - ::UR_RESULT_ERROR_UNSUPPORTED_VERSION +UR_DLLEXPORT ur_result_t UR_APICALL urGetEventExpProcAddrTable( + ur_api_version_t version, ///< [in] API version requested + ur_event_exp_dditable_t + *pDdiTable ///< [in,out] pointer to table of DDI function pointers +) { + auto &dditable = ur_validation_layer::context.urDdiTable.EventExp; + + if (nullptr == pDdiTable) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + if (UR_MAJOR_VERSION(ur_validation_layer::context.version) != + UR_MAJOR_VERSION(version) || + UR_MINOR_VERSION(ur_validation_layer::context.version) > + UR_MINOR_VERSION(version)) { + return UR_RESULT_ERROR_UNSUPPORTED_VERSION; + } + + ur_result_t result = UR_RESULT_SUCCESS; + + dditable.pfnGetSyncPointProfilingInfoExp = + pDdiTable->pfnGetSyncPointProfilingInfoExp; + pDdiTable->pfnGetSyncPointProfilingInfoExp = + ur_validation_layer::urEventGetSyncPointProfilingInfoExp; + + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Exported function for filling application's Kernel table /// with current process' addresses @@ -9337,6 +9417,11 @@ ur_result_t context_t::init(ur_dditable_t *dditable, UR_API_VERSION_CURRENT, &dditable->Event); } + if (UR_RESULT_SUCCESS == result) { + result = ur_validation_layer::urGetEventExpProcAddrTable( + UR_API_VERSION_CURRENT, &dditable->EventExp); + } + if (UR_RESULT_SUCCESS == result) { result = ur_validation_layer::urGetKernelProcAddrTable( UR_API_VERSION_CURRENT, &dditable->Kernel); diff --git a/source/loader/ur_ldrddi.cpp b/source/loader/ur_ldrddi.cpp index a3a4ccaaa0..c10f8019f1 100644 --- a/source/loader/ur_ldrddi.cpp +++ b/source/loader/ur_ldrddi.cpp @@ -7259,6 +7259,43 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferEnqueueExp( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urEventGetSyncPointProfilingInfoExp +__urdlllocal ur_result_t UR_APICALL urEventGetSyncPointProfilingInfoExp( + ur_event_handle_t hEvent, ///< [in] handle of the event object + ur_exp_command_buffer_sync_point_t + syncPoint, ///< [in] Sync point referencing the node (i.e. command) from which we want + ///< to get profile information + ur_profiling_info_t + propName, ///< [in] the name of the profiling property to query + size_t propSize, ///< [in] size in bytes of the profiling property value + void * + pPropValue, ///< [out][optional][typename(propName, propSize)] value of the profiling + ///< property + size_t * + pPropSizeRet ///< [out][optional] pointer to the actual size in bytes returned in + ///< propValue +) { + ur_result_t result = UR_RESULT_SUCCESS; + + // extract platform's function pointer table + auto dditable = reinterpret_cast(hEvent)->dditable; + auto pfnGetSyncPointProfilingInfoExp = + dditable->ur.EventExp.pfnGetSyncPointProfilingInfoExp; + if (nullptr == pfnGetSyncPointProfilingInfoExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + // convert loader handle to platform handle + hEvent = reinterpret_cast(hEvent)->handle; + + // forward to device-platform + result = pfnGetSyncPointProfilingInfoExp( + hEvent, syncPoint, propName, propSize, pPropValue, pPropSizeRet); + + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urEnqueueCooperativeKernelLaunchExp __urdlllocal ur_result_t UR_APICALL urEnqueueCooperativeKernelLaunchExp( @@ -8144,6 +8181,60 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetEventProcAddrTable( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Exported function for filling application's EventExp table +/// with current process' addresses +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// - ::UR_RESULT_ERROR_UNSUPPORTED_VERSION +UR_DLLEXPORT ur_result_t UR_APICALL urGetEventExpProcAddrTable( + ur_api_version_t version, ///< [in] API version requested + ur_event_exp_dditable_t + *pDdiTable ///< [in,out] pointer to table of DDI function pointers +) { + if (nullptr == pDdiTable) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + if (ur_loader::context->version < version) { + return UR_RESULT_ERROR_UNSUPPORTED_VERSION; + } + + ur_result_t result = UR_RESULT_SUCCESS; + + // Load the device-platform DDI tables + for (auto &platform : ur_loader::context->platforms) { + if (platform.initStatus != UR_RESULT_SUCCESS) { + continue; + } + auto getTable = reinterpret_cast( + ur_loader::LibLoader::getFunctionPtr(platform.handle.get(), + "urGetEventExpProcAddrTable")); + if (!getTable) { + continue; + } + platform.initStatus = getTable(version, &platform.dditable.ur.EventExp); + } + + if (UR_RESULT_SUCCESS == result) { + if (ur_loader::context->platforms.size() != 1 || + ur_loader::context->forceIntercept) { + // return pointers to loader's DDIs + pDdiTable->pfnGetSyncPointProfilingInfoExp = + ur_loader::urEventGetSyncPointProfilingInfoExp; + } else { + // return pointers directly to platform's DDIs + *pDdiTable = + ur_loader::context->platforms.front().dditable.ur.EventExp; + } + } + + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Exported function for filling application's Kernel table /// with current process' addresses diff --git a/source/loader/ur_libapi.cpp b/source/loader/ur_libapi.cpp index cd4a70c91e..391529ffd7 100644 --- a/source/loader/ur_libapi.cpp +++ b/source/loader/ur_libapi.cpp @@ -7839,6 +7839,54 @@ ur_result_t UR_APICALL urCommandBufferEnqueueExp( return exceptionToResult(std::current_exception()); } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Get profiling information for the sync point execution associated with +/// an event object +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hEvent` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `::UR_PROFILING_INFO_COMMAND_COMPLETE < propName` +/// - ::UR_RESULT_ERROR_PROFILING_INFO_NOT_AVAILABLE +/// + If `hEvent`s associated queue was not created with `UR_QUEUE_FLAG_PROFILING_ENABLE`. +/// - ::UR_RESULT_ERROR_INVALID_VALUE +/// + `pPropValue && propSize == 0` +/// - ::UR_RESULT_ERROR_INVALID_EVENT +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP - "If `syncPoint` is not referencing a command enqueued in this CommandBuffer. Typically, `syncPoint` > number of commands" +ur_result_t UR_APICALL urEventGetSyncPointProfilingInfoExp( + ur_event_handle_t hEvent, ///< [in] handle of the event object + ur_exp_command_buffer_sync_point_t + syncPoint, ///< [in] Sync point referencing the node (i.e. command) from which we want + ///< to get profile information + ur_profiling_info_t + propName, ///< [in] the name of the profiling property to query + size_t propSize, ///< [in] size in bytes of the profiling property value + void * + pPropValue, ///< [out][optional][typename(propName, propSize)] value of the profiling + ///< property + size_t * + pPropSizeRet ///< [out][optional] pointer to the actual size in bytes returned in + ///< propValue + ) try { + auto pfnGetSyncPointProfilingInfoExp = + ur_lib::context->urDdiTable.EventExp.pfnGetSyncPointProfilingInfoExp; + if (nullptr == pfnGetSyncPointProfilingInfoExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + return pfnGetSyncPointProfilingInfoExp(hEvent, syncPoint, propName, + propSize, pPropValue, pPropSizeRet); +} catch (...) { + return exceptionToResult(std::current_exception()); +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Enqueue a command to execute a cooperative kernel /// diff --git a/source/loader/ur_libddi.cpp b/source/loader/ur_libddi.cpp index bf28e09a71..ea471d3ed7 100644 --- a/source/loader/ur_libddi.cpp +++ b/source/loader/ur_libddi.cpp @@ -55,6 +55,11 @@ __urdlllocal ur_result_t context_t::urLoaderInit() { urGetEventProcAddrTable(UR_API_VERSION_CURRENT, &urDdiTable.Event); } + if (UR_RESULT_SUCCESS == result) { + result = urGetEventExpProcAddrTable(UR_API_VERSION_CURRENT, + &urDdiTable.EventExp); + } + if (UR_RESULT_SUCCESS == result) { result = urGetKernelProcAddrTable(UR_API_VERSION_CURRENT, &urDdiTable.Kernel); diff --git a/source/loader/ur_print.cpp b/source/loader/ur_print.cpp index 87660ee2cc..af8a53abf5 100644 --- a/source/loader/ur_print.cpp +++ b/source/loader/ur_print.cpp @@ -1616,6 +1616,14 @@ ur_result_t urPrintEventSetCallbackParams( return str_copy(&ss, buffer, buff_size, out_size); } +ur_result_t urPrintEventGetSyncPointProfilingInfoExpParams( + const struct ur_event_get_sync_point_profiling_info_exp_params_t *params, + char *buffer, const size_t buff_size, size_t *out_size) { + std::stringstream ss; + ss << params; + return str_copy(&ss, buffer, buff_size, out_size); +} + ur_result_t urPrintKernelCreateParams(const struct ur_kernel_create_params_t *params, char *buffer, const size_t buff_size, diff --git a/source/ur_api.cpp b/source/ur_api.cpp index 26f24aba08..4cff513d08 100644 --- a/source/ur_api.cpp +++ b/source/ur_api.cpp @@ -6624,6 +6624,46 @@ ur_result_t UR_APICALL urCommandBufferEnqueueExp( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Get profiling information for the sync point execution associated with +/// an event object +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hEvent` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `::UR_PROFILING_INFO_COMMAND_COMPLETE < propName` +/// - ::UR_RESULT_ERROR_PROFILING_INFO_NOT_AVAILABLE +/// + If `hEvent`s associated queue was not created with `UR_QUEUE_FLAG_PROFILING_ENABLE`. +/// - ::UR_RESULT_ERROR_INVALID_VALUE +/// + `pPropValue && propSize == 0` +/// - ::UR_RESULT_ERROR_INVALID_EVENT +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP - "If `syncPoint` is not referencing a command enqueued in this CommandBuffer. Typically, `syncPoint` > number of commands" +ur_result_t UR_APICALL urEventGetSyncPointProfilingInfoExp( + ur_event_handle_t hEvent, ///< [in] handle of the event object + ur_exp_command_buffer_sync_point_t + syncPoint, ///< [in] Sync point referencing the node (i.e. command) from which we want + ///< to get profile information + ur_profiling_info_t + propName, ///< [in] the name of the profiling property to query + size_t propSize, ///< [in] size in bytes of the profiling property value + void * + pPropValue, ///< [out][optional][typename(propName, propSize)] value of the profiling + ///< property + size_t * + pPropSizeRet ///< [out][optional] pointer to the actual size in bytes returned in + ///< propValue +) { + ur_result_t result = UR_RESULT_SUCCESS; + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Enqueue a command to execute a cooperative kernel /// From 2353f0c0d8c6b27b13213536a2f5011d7177da6b Mon Sep 17 00:00:00 2001 From: Maxime France-Pillois Date: Fri, 2 Feb 2024 10:23:58 +0000 Subject: [PATCH 02/12] removes unused variable. --- source/adapters/level_zero/command_buffer.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/source/adapters/level_zero/command_buffer.cpp b/source/adapters/level_zero/command_buffer.cpp index 23dbfac3bc..eb7ff3dff4 100644 --- a/source/adapters/level_zero/command_buffer.cpp +++ b/source/adapters/level_zero/command_buffer.cpp @@ -1007,8 +1007,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetSyncPointProfilingInfoExp( UrReturnHelper ReturnValue(PropValueSize, PropValue, PropValueSizeRet); - ze_kernel_timestamp_result_t tsResult; - // Node profiling info is stored in the CommandData field of the event // returned from graph submission. // The timing info of each command corresponding to a node is stored using @@ -1024,7 +1022,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetSyncPointProfilingInfoExp( case UR_PROFILING_INFO_COMMAND_START: { ProfilingsPtr = static_cast(Event->CommandData); - uint64_t Index = static_cast(SyncPoint); + const uint64_t Index = static_cast(SyncPoint); if (Index > ProfilingsPtr->NumEvents) { return UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP; @@ -1039,7 +1037,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetSyncPointProfilingInfoExp( case UR_PROFILING_INFO_COMMAND_END: { ProfilingsPtr = static_cast(Event->CommandData); - uint64_t Index = static_cast(SyncPoint); + const uint64_t Index = static_cast(SyncPoint); if (Index > ProfilingsPtr->NumEvents) { return UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP; From 965695a3b75a1d1d086c80758ee6d0b44a8dcb67 Mon Sep 17 00:00:00 2001 From: Maxime France-Pillois Date: Fri, 2 Feb 2024 10:43:09 +0000 Subject: [PATCH 03/12] Fixes naming issue --- source/adapters/level_zero/ur_interface_loader.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/source/adapters/level_zero/ur_interface_loader.cpp b/source/adapters/level_zero/ur_interface_loader.cpp index 50d7cf585e..45bc6b1664 100644 --- a/source/adapters/level_zero/ur_interface_loader.cpp +++ b/source/adapters/level_zero/ur_interface_loader.cpp @@ -355,7 +355,7 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetEventExpProcAddrTable( return retVal; } pDdiTable->pfnGetSyncPointProfilingInfoExp = - urEventGetSyncProfilingProfilingInfoExp; + urEventGetSyncPointProfilingInfoExp; return retVal; } From 27450f3a4a2c85907ec78f464c8e2d0f43d01a97 Mon Sep 17 00:00:00 2001 From: Maxime France-Pillois Date: Fri, 2 Feb 2024 11:43:57 +0000 Subject: [PATCH 04/12] Removes const in casting --- source/adapters/level_zero/command_buffer.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/source/adapters/level_zero/command_buffer.cpp b/source/adapters/level_zero/command_buffer.cpp index eb7ff3dff4..0160344a84 100644 --- a/source/adapters/level_zero/command_buffer.cpp +++ b/source/adapters/level_zero/command_buffer.cpp @@ -1022,7 +1022,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetSyncPointProfilingInfoExp( case UR_PROFILING_INFO_COMMAND_START: { ProfilingsPtr = static_cast(Event->CommandData); - const uint64_t Index = static_cast(SyncPoint); + const uint64_t Index = static_cast(SyncPoint); if (Index > ProfilingsPtr->NumEvents) { return UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP; @@ -1037,7 +1037,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetSyncPointProfilingInfoExp( case UR_PROFILING_INFO_COMMAND_END: { ProfilingsPtr = static_cast(Event->CommandData); - const uint64_t Index = static_cast(SyncPoint); + const uint64_t Index = static_cast(SyncPoint); if (Index > ProfilingsPtr->NumEvents) { return UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP; From 27bdbe25f8bb6d79177d42cf0e4d6a485b00bdca Mon Sep 17 00:00:00 2001 From: Maxime France-Pillois Date: Fri, 2 Feb 2024 14:38:56 +0000 Subject: [PATCH 05/12] Adds missing declarations --- source/adapters/cuda/ur_interface_loader.cpp | 14 ++++++++++++++ source/adapters/hip/ur_interface_loader.cpp | 14 ++++++++++++++ source/adapters/native_cpu/ur_interface_loader.cpp | 14 ++++++++++++++ source/adapters/opencl/ur_interface_loader.cpp | 14 ++++++++++++++ 4 files changed, 56 insertions(+) diff --git a/source/adapters/cuda/ur_interface_loader.cpp b/source/adapters/cuda/ur_interface_loader.cpp index f31ffe6d87..acad886b8a 100644 --- a/source/adapters/cuda/ur_interface_loader.cpp +++ b/source/adapters/cuda/ur_interface_loader.cpp @@ -298,6 +298,20 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( return retVal; } +UR_DLLEXPORT ur_result_t UR_APICALL urGetEventExpProcAddrTable( + ur_api_version_t version, ///< [in] API version requested + ur_event_exp_dditable_t + *pDdiTable ///< [in,out] pointer to table of DDI function pointers +) { + auto retVal = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != retVal) { + return retVal; + } + pDdiTable->pfnGetSyncPointProfilingInfoExp = + urEventGetSyncPointProfilingInfoExp; + return retVal; +} + UR_DLLEXPORT ur_result_t UR_APICALL urGetUsmP2PExpProcAddrTable( ur_api_version_t version, ur_usm_p2p_exp_dditable_t *pDdiTable) { auto retVal = validateProcInputs(version, pDdiTable); diff --git a/source/adapters/hip/ur_interface_loader.cpp b/source/adapters/hip/ur_interface_loader.cpp index 7707e78425..0cd3f48c9e 100644 --- a/source/adapters/hip/ur_interface_loader.cpp +++ b/source/adapters/hip/ur_interface_loader.cpp @@ -295,6 +295,20 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( return retVal; } +UR_DLLEXPORT ur_result_t UR_APICALL urGetEventExpProcAddrTable( + ur_api_version_t version, ///< [in] API version requested + ur_event_exp_dditable_t + *pDdiTable ///< [in,out] pointer to table of DDI function pointers +) { + auto retVal = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != retVal) { + return retVal; + } + pDdiTable->pfnGetSyncPointProfilingInfoExp = + urEventGetSyncPointProfilingInfoExp; + return retVal; +} + UR_DLLEXPORT ur_result_t UR_APICALL urGetUsmP2PExpProcAddrTable( ur_api_version_t version, ur_usm_p2p_exp_dditable_t *pDdiTable) { auto retVal = validateProcInputs(version, pDdiTable); diff --git a/source/adapters/native_cpu/ur_interface_loader.cpp b/source/adapters/native_cpu/ur_interface_loader.cpp index 9408101927..7559b5717e 100644 --- a/source/adapters/native_cpu/ur_interface_loader.cpp +++ b/source/adapters/native_cpu/ur_interface_loader.cpp @@ -287,6 +287,20 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( return retVal; } +UR_DLLEXPORT ur_result_t UR_APICALL urGetEventExpProcAddrTable( + ur_api_version_t version, ///< [in] API version requested + ur_event_exp_dditable_t + *pDdiTable ///< [in,out] pointer to table of DDI function pointers +) { + auto retVal = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != retVal) { + return retVal; + } + pDdiTable->pfnGetSyncPointProfilingInfoExp = + urEventGetSyncPointProfilingInfoExp; + return retVal; +} + UR_DLLEXPORT ur_result_t UR_APICALL urGetUsmP2PExpProcAddrTable( ur_api_version_t version, ur_usm_p2p_exp_dditable_t *pDdiTable) { auto retVal = validateProcInputs(version, pDdiTable); diff --git a/source/adapters/opencl/ur_interface_loader.cpp b/source/adapters/opencl/ur_interface_loader.cpp index ac2c33475b..df63dc6f50 100644 --- a/source/adapters/opencl/ur_interface_loader.cpp +++ b/source/adapters/opencl/ur_interface_loader.cpp @@ -305,6 +305,20 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( return retVal; } +UR_DLLEXPORT ur_result_t UR_APICALL urGetEventExpProcAddrTable( + ur_api_version_t version, ///< [in] API version requested + ur_event_exp_dditable_t + *pDdiTable ///< [in,out] pointer to table of DDI function pointers +) { + auto retVal = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != retVal) { + return retVal; + } + pDdiTable->pfnGetSyncPointProfilingInfoExp = + urEventGetSyncPointProfilingInfoExp; + return retVal; +} + UR_DLLEXPORT ur_result_t UR_APICALL urGetUsmP2PExpProcAddrTable( ur_api_version_t version, ur_usm_p2p_exp_dditable_t *pDdiTable) { auto retVal = validateProcInputs(version, pDdiTable); From cb295ca927e301586a359225ab95113e24ec6742 Mon Sep 17 00:00:00 2001 From: Maxime France-Pillois Date: Thu, 8 Feb 2024 11:45:34 +0000 Subject: [PATCH 06/12] Update scripts/core/exp-command-buffer.yml Co-authored-by: Ewan Crawford --- scripts/core/exp-command-buffer.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/core/exp-command-buffer.yml b/scripts/core/exp-command-buffer.yml index 3aeec3f6b4..ec8ead7ff1 100644 --- a/scripts/core/exp-command-buffer.yml +++ b/scripts/core/exp-command-buffer.yml @@ -731,7 +731,7 @@ params: desc: "[out][optional] pointer to the actual size in bytes returned in propValue" returns: - $X_RESULT_ERROR_PROFILING_INFO_NOT_AVAILABLE: - - "If `hEvent`s associated queue was not created with `UR_QUEUE_FLAG_PROFILING_ENABLE`." + - "If `hEvent`s associated queue was not created with `$X_QUEUE_FLAG_PROFILING_ENABLE`." - $X_RESULT_ERROR_INVALID_VALUE: - "`pPropValue && propSize == 0`" - $X_RESULT_ERROR_INVALID_EVENT From 7b67e788d4e5255846b7bf7c3f5a6345d2b7bcff Mon Sep 17 00:00:00 2001 From: Maxime France-Pillois Date: Thu, 8 Feb 2024 11:45:41 +0000 Subject: [PATCH 07/12] Update scripts/core/exp-command-buffer.yml Co-authored-by: Ewan Crawford --- scripts/core/exp-command-buffer.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/core/exp-command-buffer.yml b/scripts/core/exp-command-buffer.yml index ec8ead7ff1..366eb86290 100644 --- a/scripts/core/exp-command-buffer.yml +++ b/scripts/core/exp-command-buffer.yml @@ -738,4 +738,4 @@ returns: - $X_RESULT_ERROR_OUT_OF_RESOURCES - $X_RESULT_ERROR_OUT_OF_HOST_MEMORY - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP - - "If `syncPoint` is not referencing a command enqueued in this CommandBuffer. Typically, `syncPoint` > number of commands" + - "If `syncPoint` does not reference a command in the command-buffer submission associated with `hEvent.`" From ced8cae7455cb8e3f50eca03ddbcea9b7bc98fc3 Mon Sep 17 00:00:00 2001 From: Maxime France-Pillois Date: Thu, 8 Feb 2024 12:05:43 +0000 Subject: [PATCH 08/12] Adds generated files + updates variable names + format --- include/ur_api.h | 4 +- source/adapters/cuda/command_buffer.cpp | 4 +- source/adapters/hip/command_buffer.cpp | 4 +- source/adapters/level_zero/command_buffer.cpp | 110 +++++++++--------- source/adapters/opencl/command_buffer.cpp | 4 +- source/loader/ur_libapi.cpp | 4 +- source/ur_api.cpp | 4 +- 7 files changed, 65 insertions(+), 69 deletions(-) diff --git a/include/ur_api.h b/include/ur_api.h index 86b93452c4..0606bbce19 100644 --- a/include/ur_api.h +++ b/include/ur_api.h @@ -8356,13 +8356,13 @@ urCommandBufferEnqueueExp( /// - ::UR_RESULT_ERROR_INVALID_ENUMERATION /// + `::UR_PROFILING_INFO_COMMAND_COMPLETE < propName` /// - ::UR_RESULT_ERROR_PROFILING_INFO_NOT_AVAILABLE -/// + If `hEvent`s associated queue was not created with `UR_QUEUE_FLAG_PROFILING_ENABLE`. +/// + If `hEvent`s associated queue was not created with `::UR_QUEUE_FLAG_PROFILING_ENABLE`. /// - ::UR_RESULT_ERROR_INVALID_VALUE /// + `pPropValue && propSize == 0` /// - ::UR_RESULT_ERROR_INVALID_EVENT /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY -/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP - "If `syncPoint` is not referencing a command enqueued in this CommandBuffer. Typically, `syncPoint` > number of commands" +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP - "If `syncPoint` does not reference a command in the command-buffer submission associated with `hEvent.`" UR_APIEXPORT ur_result_t UR_APICALL urEventGetSyncPointProfilingInfoExp( ur_event_handle_t hEvent, ///< [in] handle of the event object diff --git a/source/adapters/cuda/command_buffer.cpp b/source/adapters/cuda/command_buffer.cpp index 7da742931b..15105bf341 100644 --- a/source/adapters/cuda/command_buffer.cpp +++ b/source/adapters/cuda/command_buffer.cpp @@ -764,10 +764,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( } UR_APIEXPORT ur_result_t UR_APICALL urEventGetSyncPointProfilingInfoExp( - ur_event_handle_t Event, ur_exp_command_buffer_sync_point_t SyncPoint, + ur_event_handle_t hEvent, ur_exp_command_buffer_sync_point_t SyncPoint, ur_profiling_info_t PropName, size_t PropValueSize, void *PropValue, size_t *PropValueSizeRet) { - (void)Event; + (void)hEvent; (void)SyncPoint; (void)PropName; (void)PropValueSize; diff --git a/source/adapters/hip/command_buffer.cpp b/source/adapters/hip/command_buffer.cpp index 930d5dbe41..a9ea61395b 100644 --- a/source/adapters/hip/command_buffer.cpp +++ b/source/adapters/hip/command_buffer.cpp @@ -164,10 +164,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( } UR_APIEXPORT ur_result_t UR_APICALL urEventGetSyncPointProfilingInfoExp( - ur_event_handle_t Event, ur_exp_command_buffer_sync_point_t SyncPoint, + ur_event_handle_t hEvent, ur_exp_command_buffer_sync_point_t SyncPoint, ur_profiling_info_t PropName, size_t PropValueSize, void *PropValue, size_t *PropValueSizeRet) { - (void)Event; + (void)hEvent; (void)SyncPoint; (void)PropName; (void)PropValueSize; diff --git a/source/adapters/level_zero/command_buffer.cpp b/source/adapters/level_zero/command_buffer.cpp index 0160344a84..e7caa327b7 100644 --- a/source/adapters/level_zero/command_buffer.cpp +++ b/source/adapters/level_zero/command_buffer.cpp @@ -988,18 +988,18 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( } UR_APIEXPORT ur_result_t UR_APICALL urEventGetSyncPointProfilingInfoExp( - ur_event_handle_t Event, ur_exp_command_buffer_sync_point_t SyncPoint, + ur_event_handle_t hEvent, ur_exp_command_buffer_sync_point_t SyncPoint, ur_profiling_info_t PropName, size_t PropValueSize, void *PropValue, size_t *PropValueSizeRet) { - std::shared_lock EventLock(Event->Mutex); + std::shared_lock EventLock(hEvent->Mutex); - if (Event->UrQueue && - (Event->UrQueue->Properties & UR_QUEUE_FLAG_PROFILING_ENABLE) == 0) { + if (hEvent->UrQueue && + (hEvent->UrQueue->Properties & UR_QUEUE_FLAG_PROFILING_ENABLE) == 0) { return UR_RESULT_ERROR_PROFILING_INFO_NOT_AVAILABLE; } ur_device_handle_t Device = - Event->UrQueue ? Event->UrQueue->Device : Event->Context->Devices[0]; + hEvent->UrQueue ? hEvent->UrQueue->Device : hEvent->Context->Devices[0]; uint64_t ZeTimerResolution = Device->ZeDeviceProperties->timerResolution; const uint64_t TimestampMaxValue = @@ -1015,62 +1015,58 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetSyncPointProfilingInfoExp( // assigned to this node when it was enqueued. Consequently, `SyncPoint` // corresponds to the index of the memory slot containing the timestamps // corresponding to this specific node. - if (Event->CommandType == UR_COMMAND_COMMAND_BUFFER_ENQUEUE_EXP) { - if (Event->CommandData) { - command_buffer_profiling_t *ProfilingsPtr; - switch (PropName) { - case UR_PROFILING_INFO_COMMAND_START: { - ProfilingsPtr = - static_cast(Event->CommandData); - const uint64_t Index = static_cast(SyncPoint); - - if (Index > ProfilingsPtr->NumEvents) { - return UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP; - } + if (hEvent->CommandType != UR_COMMAND_COMMAND_BUFFER_ENQUEUE_EXP) { + urPrint("urEventGetSyncPointProfilingInfoExp: not supported Event type\n"); + return UR_RESULT_ERROR_INVALID_VALUE; + } + if (!hEvent->CommandData) { + return UR_RESULT_ERROR_PROFILING_INFO_NOT_AVAILABLE; + } - uint64_t StartTime = - ProfilingsPtr->Timestamps[Index].global.kernelStart; - uint64_t ContextStartTime = - (StartTime & TimestampMaxValue) * ZeTimerResolution; - return ReturnValue(ContextStartTime); - } - case UR_PROFILING_INFO_COMMAND_END: { - ProfilingsPtr = - static_cast(Event->CommandData); - const uint64_t Index = static_cast(SyncPoint); + command_buffer_profiling_t *ProfilingsPtr; + switch (PropName) { + case UR_PROFILING_INFO_COMMAND_START: { + ProfilingsPtr = + static_cast(hEvent->CommandData); + const uint64_t Index = static_cast(SyncPoint); - if (Index > ProfilingsPtr->NumEvents) { - return UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP; - } + if (Index > ProfilingsPtr->NumEvents) { + return UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP; + } - uint64_t EndTime = ProfilingsPtr->Timestamps[Index].global.kernelEnd; - uint64_t LastStart = - ProfilingsPtr->Timestamps[Index].global.kernelStart; - uint64_t ContextStartTime = (LastStart & TimestampMaxValue); - uint64_t ContextEndTime = (EndTime & TimestampMaxValue); - - // - // Handle a possible wrap-around (the underlying HW counter is < - // 64-bit). Note, it will not report correct time if there were multiple - // wrap arounds, and the longer term plan is to enlarge the capacity of - // the HW timestamps. - // - if (ContextEndTime <= ContextStartTime) { - ContextEndTime += TimestampMaxValue; - } - ContextEndTime *= ZeTimerResolution; - return ReturnValue(ContextEndTime); - } - default: - urPrint( - "urEventGetSyncPointProfilingInfoExp: not supported ParamName\n"); - return UR_RESULT_ERROR_INVALID_VALUE; - } - } else { - return UR_RESULT_ERROR_PROFILING_INFO_NOT_AVAILABLE; + uint64_t StartTime = ProfilingsPtr->Timestamps[Index].global.kernelStart; + uint64_t ContextStartTime = + (StartTime & TimestampMaxValue) * ZeTimerResolution; + return ReturnValue(ContextStartTime); + } + case UR_PROFILING_INFO_COMMAND_END: { + ProfilingsPtr = + static_cast(hEvent->CommandData); + const uint64_t Index = static_cast(SyncPoint); + + if (Index > ProfilingsPtr->NumEvents) { + return UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP; } - } else { - urPrint("urEventGetSyncPointProfilingInfoExp: not supported Event type\n"); + + uint64_t EndTime = ProfilingsPtr->Timestamps[Index].global.kernelEnd; + uint64_t LastStart = ProfilingsPtr->Timestamps[Index].global.kernelStart; + uint64_t ContextStartTime = (LastStart & TimestampMaxValue); + uint64_t ContextEndTime = (EndTime & TimestampMaxValue); + + // + // Handle a possible wrap-around (the underlying HW counter is < + // 64-bit). Note, it will not report correct time if there were multiple + // wrap arounds, and the longer term plan is to enlarge the capacity of + // the HW timestamps. + // + if (ContextEndTime <= ContextStartTime) { + ContextEndTime += TimestampMaxValue; + } + ContextEndTime *= ZeTimerResolution; + return ReturnValue(ContextEndTime); + } + default: + urPrint("urEventGetSyncPointProfilingInfoExp: not supported ParamName\n"); return UR_RESULT_ERROR_INVALID_VALUE; } diff --git a/source/adapters/opencl/command_buffer.cpp b/source/adapters/opencl/command_buffer.cpp index 97b819f500..34b176d308 100644 --- a/source/adapters/opencl/command_buffer.cpp +++ b/source/adapters/opencl/command_buffer.cpp @@ -358,10 +358,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( } UR_APIEXPORT ur_result_t UR_APICALL urEventGetSyncPointProfilingInfoExp( - ur_event_handle_t Event, ur_exp_command_buffer_sync_point_t SyncPoint, + ur_event_handle_t hEvent, ur_exp_command_buffer_sync_point_t SyncPoint, ur_profiling_info_t PropName, size_t PropValueSize, void *PropValue, size_t *PropValueSizeRet) { - (void)Event; + (void)hEvent; (void)SyncPoint; (void)PropName; (void)PropValueSize; diff --git a/source/loader/ur_libapi.cpp b/source/loader/ur_libapi.cpp index 391529ffd7..250e497aac 100644 --- a/source/loader/ur_libapi.cpp +++ b/source/loader/ur_libapi.cpp @@ -7853,13 +7853,13 @@ ur_result_t UR_APICALL urCommandBufferEnqueueExp( /// - ::UR_RESULT_ERROR_INVALID_ENUMERATION /// + `::UR_PROFILING_INFO_COMMAND_COMPLETE < propName` /// - ::UR_RESULT_ERROR_PROFILING_INFO_NOT_AVAILABLE -/// + If `hEvent`s associated queue was not created with `UR_QUEUE_FLAG_PROFILING_ENABLE`. +/// + If `hEvent`s associated queue was not created with `::UR_QUEUE_FLAG_PROFILING_ENABLE`. /// - ::UR_RESULT_ERROR_INVALID_VALUE /// + `pPropValue && propSize == 0` /// - ::UR_RESULT_ERROR_INVALID_EVENT /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY -/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP - "If `syncPoint` is not referencing a command enqueued in this CommandBuffer. Typically, `syncPoint` > number of commands" +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP - "If `syncPoint` does not reference a command in the command-buffer submission associated with `hEvent.`" ur_result_t UR_APICALL urEventGetSyncPointProfilingInfoExp( ur_event_handle_t hEvent, ///< [in] handle of the event object ur_exp_command_buffer_sync_point_t diff --git a/source/ur_api.cpp b/source/ur_api.cpp index 4cff513d08..41e16c9913 100644 --- a/source/ur_api.cpp +++ b/source/ur_api.cpp @@ -6638,13 +6638,13 @@ ur_result_t UR_APICALL urCommandBufferEnqueueExp( /// - ::UR_RESULT_ERROR_INVALID_ENUMERATION /// + `::UR_PROFILING_INFO_COMMAND_COMPLETE < propName` /// - ::UR_RESULT_ERROR_PROFILING_INFO_NOT_AVAILABLE -/// + If `hEvent`s associated queue was not created with `UR_QUEUE_FLAG_PROFILING_ENABLE`. +/// + If `hEvent`s associated queue was not created with `::UR_QUEUE_FLAG_PROFILING_ENABLE`. /// - ::UR_RESULT_ERROR_INVALID_VALUE /// + `pPropValue && propSize == 0` /// - ::UR_RESULT_ERROR_INVALID_EVENT /// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES /// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY -/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP - "If `syncPoint` is not referencing a command enqueued in this CommandBuffer. Typically, `syncPoint` > number of commands" +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP - "If `syncPoint` does not reference a command in the command-buffer submission associated with `hEvent.`" ur_result_t UR_APICALL urEventGetSyncPointProfilingInfoExp( ur_event_handle_t hEvent, ///< [in] handle of the event object ur_exp_command_buffer_sync_point_t From 7d89bae0fe681f5c27a51473293163f395b0a3b1 Mon Sep 17 00:00:00 2001 From: Maxime France-Pillois Date: Fri, 9 Feb 2024 17:30:41 +0000 Subject: [PATCH 09/12] Updates parameter names --- source/adapters/cuda/command_buffer.cpp | 16 ++++++++-------- source/adapters/hip/command_buffer.cpp | 16 ++++++++-------- source/adapters/level_zero/command_buffer.cpp | 16 ++++++++-------- source/adapters/opencl/command_buffer.cpp | 16 ++++++++-------- 4 files changed, 32 insertions(+), 32 deletions(-) diff --git a/source/adapters/cuda/command_buffer.cpp b/source/adapters/cuda/command_buffer.cpp index 15105bf341..5cf7c81a5e 100644 --- a/source/adapters/cuda/command_buffer.cpp +++ b/source/adapters/cuda/command_buffer.cpp @@ -764,14 +764,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( } UR_APIEXPORT ur_result_t UR_APICALL urEventGetSyncPointProfilingInfoExp( - ur_event_handle_t hEvent, ur_exp_command_buffer_sync_point_t SyncPoint, - ur_profiling_info_t PropName, size_t PropValueSize, void *PropValue, - size_t *PropValueSizeRet) { + ur_event_handle_t hEvent, ur_exp_command_buffer_sync_point_t syncPoint, + ur_profiling_info_t propName, size_t propSize, void *pPropValue, + size_t *pPropSizeRet) { (void)hEvent; - (void)SyncPoint; - (void)PropName; - (void)PropValueSize; - (void)PropValue; - (void)PropValueSizeRet; + (void)syncPoint; + (void)propName; + (void)propSize; + (void)pPropValue; + (void)pPropValueSizeRet; return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } diff --git a/source/adapters/hip/command_buffer.cpp b/source/adapters/hip/command_buffer.cpp index a9ea61395b..c28514ed93 100644 --- a/source/adapters/hip/command_buffer.cpp +++ b/source/adapters/hip/command_buffer.cpp @@ -164,14 +164,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( } UR_APIEXPORT ur_result_t UR_APICALL urEventGetSyncPointProfilingInfoExp( - ur_event_handle_t hEvent, ur_exp_command_buffer_sync_point_t SyncPoint, - ur_profiling_info_t PropName, size_t PropValueSize, void *PropValue, - size_t *PropValueSizeRet) { + ur_event_handle_t hEvent, ur_exp_command_buffer_sync_point_t syncPoint, + ur_profiling_info_t propName, size_t propSize, void *pPropValue, + size_t *pPropSizeRet) { (void)hEvent; - (void)SyncPoint; - (void)PropName; - (void)PropValueSize; - (void)PropValue; - (void)PropValueSizeRet; + (void)syncPoint; + (void)propName; + (void)propSize; + (void)pPropValue; + (void)pPropValueSizeRet; return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } diff --git a/source/adapters/level_zero/command_buffer.cpp b/source/adapters/level_zero/command_buffer.cpp index e7caa327b7..305184056c 100644 --- a/source/adapters/level_zero/command_buffer.cpp +++ b/source/adapters/level_zero/command_buffer.cpp @@ -988,9 +988,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( } UR_APIEXPORT ur_result_t UR_APICALL urEventGetSyncPointProfilingInfoExp( - ur_event_handle_t hEvent, ur_exp_command_buffer_sync_point_t SyncPoint, - ur_profiling_info_t PropName, size_t PropValueSize, void *PropValue, - size_t *PropValueSizeRet) { + ur_event_handle_t hEvent, ur_exp_command_buffer_sync_point_t syncPoint, + ur_profiling_info_t propName, size_t propSize, void *pPropValue, + size_t *pPropSizeRet) { std::shared_lock EventLock(hEvent->Mutex); if (hEvent->UrQueue && @@ -1005,14 +1005,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetSyncPointProfilingInfoExp( const uint64_t TimestampMaxValue = ((1ULL << Device->ZeDeviceProperties->kernelTimestampValidBits) - 1ULL); - UrReturnHelper ReturnValue(PropValueSize, PropValue, PropValueSizeRet); + UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); // Node profiling info is stored in the CommandData field of the event // returned from graph submission. // The timing info of each command corresponding to a node is stored using // `zeCommandListAppendQueryKernelTimestamps`. This command stores the timing // info of each command/node in the same order as the sync-points that were - // assigned to this node when it was enqueued. Consequently, `SyncPoint` + // assigned to this node when it was enqueued. Consequently, `syncPoint` // corresponds to the index of the memory slot containing the timestamps // corresponding to this specific node. if (hEvent->CommandType != UR_COMMAND_COMMAND_BUFFER_ENQUEUE_EXP) { @@ -1024,11 +1024,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetSyncPointProfilingInfoExp( } command_buffer_profiling_t *ProfilingsPtr; - switch (PropName) { + switch (propName) { case UR_PROFILING_INFO_COMMAND_START: { ProfilingsPtr = static_cast(hEvent->CommandData); - const uint64_t Index = static_cast(SyncPoint); + const uint64_t Index = static_cast(syncPoint); if (Index > ProfilingsPtr->NumEvents) { return UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP; @@ -1042,7 +1042,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetSyncPointProfilingInfoExp( case UR_PROFILING_INFO_COMMAND_END: { ProfilingsPtr = static_cast(hEvent->CommandData); - const uint64_t Index = static_cast(SyncPoint); + const uint64_t Index = static_cast(syncPoint); if (Index > ProfilingsPtr->NumEvents) { return UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP; diff --git a/source/adapters/opencl/command_buffer.cpp b/source/adapters/opencl/command_buffer.cpp index 34b176d308..8b0eb3beee 100644 --- a/source/adapters/opencl/command_buffer.cpp +++ b/source/adapters/opencl/command_buffer.cpp @@ -358,14 +358,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( } UR_APIEXPORT ur_result_t UR_APICALL urEventGetSyncPointProfilingInfoExp( - ur_event_handle_t hEvent, ur_exp_command_buffer_sync_point_t SyncPoint, - ur_profiling_info_t PropName, size_t PropValueSize, void *PropValue, - size_t *PropValueSizeRet) { + ur_event_handle_t hEvent, ur_exp_command_buffer_sync_point_t syncPoint, + ur_profiling_info_t propName, size_t propSize, void *pPropValue, + size_t *pPropSizeRet) { (void)hEvent; - (void)SyncPoint; - (void)PropName; - (void)PropValueSize; - (void)PropValue; - (void)PropValueSizeRet; + (void)syncPoint; + (void)propName; + (void)propSize; + (void)pPropValue; + (void)pPropValueSizeRet; return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } From 5905073231b6b41e132771c7edfa9a113f6855db Mon Sep 17 00:00:00 2001 From: Maxime France-Pillois Date: Tue, 13 Feb 2024 09:51:53 +0000 Subject: [PATCH 10/12] typos --- source/adapters/cuda/command_buffer.cpp | 2 +- source/adapters/hip/command_buffer.cpp | 2 +- source/adapters/opencl/command_buffer.cpp | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/source/adapters/cuda/command_buffer.cpp b/source/adapters/cuda/command_buffer.cpp index 5cf7c81a5e..2478e2c1ca 100644 --- a/source/adapters/cuda/command_buffer.cpp +++ b/source/adapters/cuda/command_buffer.cpp @@ -772,6 +772,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetSyncPointProfilingInfoExp( (void)propName; (void)propSize; (void)pPropValue; - (void)pPropValueSizeRet; + (void)pPropSizeRet; return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } diff --git a/source/adapters/hip/command_buffer.cpp b/source/adapters/hip/command_buffer.cpp index c28514ed93..8635fe744a 100644 --- a/source/adapters/hip/command_buffer.cpp +++ b/source/adapters/hip/command_buffer.cpp @@ -172,6 +172,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetSyncPointProfilingInfoExp( (void)propName; (void)propSize; (void)pPropValue; - (void)pPropValueSizeRet; + (void)pPropSizeRet; return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } diff --git a/source/adapters/opencl/command_buffer.cpp b/source/adapters/opencl/command_buffer.cpp index 8b0eb3beee..dc48988a53 100644 --- a/source/adapters/opencl/command_buffer.cpp +++ b/source/adapters/opencl/command_buffer.cpp @@ -366,6 +366,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetSyncPointProfilingInfoExp( (void)propName; (void)propSize; (void)pPropValue; - (void)pPropValueSizeRet; + (void)pPropSizeRet; return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } From 306bd8206f2184f10ef9a573e0ebf66274dbb62e Mon Sep 17 00:00:00 2001 From: Maxime France-Pillois Date: Tue, 13 Feb 2024 10:11:43 +0000 Subject: [PATCH 11/12] Updates generated files --- source/loader/layers/validation/ur_valddi.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/source/loader/layers/validation/ur_valddi.cpp b/source/loader/layers/validation/ur_valddi.cpp index ff817127c9..4b95b07957 100644 --- a/source/loader/layers/validation/ur_valddi.cpp +++ b/source/loader/layers/validation/ur_valddi.cpp @@ -8593,6 +8593,11 @@ __urdlllocal ur_result_t UR_APICALL urEventGetSyncPointProfilingInfoExp( } } + if (context.enableLifetimeValidation && + !refCountContext.isReferenceValid(hEvent)) { + refCountContext.logInvalidReference(hEvent); + } + ur_result_t result = pfnGetSyncPointProfilingInfoExp( hEvent, syncPoint, propName, propSize, pPropValue, pPropSizeRet); From fdb228d9ac66128cf4d05bbb45315e93c1bf3db5 Mon Sep 17 00:00:00 2001 From: Maxime France-Pillois Date: Wed, 14 Feb 2024 09:22:38 +0000 Subject: [PATCH 12/12] updates generated files --- source/adapters/hip/command_buffer.cpp | 1 - source/adapters/native_cpu/command_buffer.cpp | 1 - 2 files changed, 2 deletions(-) diff --git a/source/adapters/hip/command_buffer.cpp b/source/adapters/hip/command_buffer.cpp index d0eb655013..6c1400887c 100644 --- a/source/adapters/hip/command_buffer.cpp +++ b/source/adapters/hip/command_buffer.cpp @@ -204,4 +204,3 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetSyncPointProfilingInfoExp( (void)pPropSizeRet; return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } - diff --git a/source/adapters/native_cpu/command_buffer.cpp b/source/adapters/native_cpu/command_buffer.cpp index 7f008e677d..d1de90a150 100644 --- a/source/adapters/native_cpu/command_buffer.cpp +++ b/source/adapters/native_cpu/command_buffer.cpp @@ -197,4 +197,3 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventGetSyncPointProfilingInfoExp( size_t, void *, size_t *) { return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } -