Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[EXP][CMDBUF] Support for Node Profiling #1309

Open
wants to merge 17 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 48 additions & 0 deletions include/ur_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,7 @@ typedef enum ur_function_t {
UR_FUNCTION_COMMAND_BUFFER_APPEND_USM_ADVISE_EXP = 213, ///< Enumerator for ::urCommandBufferAppendUSMAdviseExp
UR_FUNCTION_ENQUEUE_COOPERATIVE_KERNEL_LAUNCH_EXP = 214, ///< Enumerator for ::urEnqueueCooperativeKernelLaunchExp
UR_FUNCTION_KERNEL_SUGGEST_MAX_COOPERATIVE_GROUP_COUNT_EXP = 215, ///< Enumerator for ::urKernelSuggestMaxCooperativeGroupCountExp
UR_FUNCTION_EVENT_GET_SYNC_POINT_PROFILING_INFO_EXP = 218, ///< Enumerator for ::urEventGetSyncPointProfilingInfoExp
/// @cond
UR_FUNCTION_FORCE_UINT32 = 0x7fffffff
/// @endcond
Expand Down Expand Up @@ -8341,6 +8342,40 @@ urCommandBufferEnqueueExp(
///< command-buffer execution instance.
);

///////////////////////////////////////////////////////////////////////////////
/// @brief Get profiling information for the sync point execution associated with
/// an event object
///
/// @returns
/// - ::UR_RESULT_SUCCESS
/// - ::UR_RESULT_ERROR_UNINITIALIZED
/// - ::UR_RESULT_ERROR_DEVICE_LOST
/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC
/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE
/// + `NULL == hEvent`
/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION
/// + `::UR_PROFILING_INFO_COMMAND_COMPLETE < propName`
/// - ::UR_RESULT_ERROR_PROFILING_INFO_NOT_AVAILABLE
/// + If `hEvent`s associated queue was not created with `::UR_QUEUE_FLAG_PROFILING_ENABLE`.
/// - ::UR_RESULT_ERROR_INVALID_VALUE
/// + `pPropValue && propSize == 0`
/// - ::UR_RESULT_ERROR_INVALID_EVENT
/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES
/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY
/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP - "If `syncPoint` does not reference a command in the command-buffer submission associated with `hEvent.`"
UR_APIEXPORT ur_result_t UR_APICALL
urEventGetSyncPointProfilingInfoExp(
ur_event_handle_t hEvent, ///< [in] handle of the event object
ur_exp_command_buffer_sync_point_t syncPoint, ///< [in] Sync point referencing the node (i.e. command) from which we want
///< to get profile information
ur_profiling_info_t propName, ///< [in] the name of the profiling property to query
size_t propSize, ///< [in] size in bytes of the profiling property value
void *pPropValue, ///< [out][optional][typename(propName, propSize)] value of the profiling
///< property
size_t *pPropSizeRet ///< [out][optional] pointer to the actual size in bytes returned in
///< propValue
);

#if !defined(__GNUC__)
#pragma endregion
#endif
Expand Down Expand Up @@ -9026,6 +9061,19 @@ typedef struct ur_event_set_callback_params_t {
void **ppUserData;
} ur_event_set_callback_params_t;

///////////////////////////////////////////////////////////////////////////////
/// @brief Function parameters for urEventGetSyncPointProfilingInfoExp
/// @details Each entry is a pointer to the parameter passed to the function;
/// allowing the callback the ability to modify the parameter's value
typedef struct ur_event_get_sync_point_profiling_info_exp_params_t {
ur_event_handle_t *phEvent;
ur_exp_command_buffer_sync_point_t *psyncPoint;
ur_profiling_info_t *ppropName;
size_t *ppropSize;
void **ppPropValue;
size_t **ppPropSizeRet;
} ur_event_get_sync_point_profiling_info_exp_params_t;

///////////////////////////////////////////////////////////////////////////////
/// @brief Function parameters for urProgramCreateWithIL
/// @details Each entry is a pointer to the parameter passed to the function;
Expand Down
38 changes: 38 additions & 0 deletions include/ur_ddi.h
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,43 @@ typedef ur_result_t(UR_APICALL *ur_pfnGetEventProcAddrTable_t)(
ur_api_version_t,
ur_event_dditable_t *);

///////////////////////////////////////////////////////////////////////////////
/// @brief Function-pointer for urEventGetSyncPointProfilingInfoExp
typedef ur_result_t(UR_APICALL *ur_pfnEventGetSyncPointProfilingInfoExp_t)(
ur_event_handle_t,
ur_exp_command_buffer_sync_point_t,
ur_profiling_info_t,
size_t,
void *,
size_t *);

///////////////////////////////////////////////////////////////////////////////
/// @brief Table of EventExp functions pointers
typedef struct ur_event_exp_dditable_t {
ur_pfnEventGetSyncPointProfilingInfoExp_t pfnGetSyncPointProfilingInfoExp;
} ur_event_exp_dditable_t;

///////////////////////////////////////////////////////////////////////////////
/// @brief Exported function for filling application's EventExp table
/// with current process' addresses
///
/// @returns
/// - ::UR_RESULT_SUCCESS
/// - ::UR_RESULT_ERROR_UNINITIALIZED
/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER
/// - ::UR_RESULT_ERROR_UNSUPPORTED_VERSION
UR_DLLEXPORT ur_result_t UR_APICALL
urGetEventExpProcAddrTable(
ur_api_version_t version, ///< [in] API version requested
ur_event_exp_dditable_t *pDdiTable ///< [in,out] pointer to table of DDI function pointers
);

///////////////////////////////////////////////////////////////////////////////
/// @brief Function-pointer for urGetEventExpProcAddrTable
typedef ur_result_t(UR_APICALL *ur_pfnGetEventExpProcAddrTable_t)(
ur_api_version_t,
ur_event_exp_dditable_t *);

///////////////////////////////////////////////////////////////////////////////
/// @brief Function-pointer for urProgramCreateWithIL
typedef ur_result_t(UR_APICALL *ur_pfnProgramCreateWithIL_t)(
Expand Down Expand Up @@ -2306,6 +2343,7 @@ typedef struct ur_dditable_t {
ur_platform_dditable_t Platform;
ur_context_dditable_t Context;
ur_event_dditable_t Event;
ur_event_exp_dditable_t EventExp;
ur_program_dditable_t Program;
ur_program_exp_dditable_t ProgramExp;
ur_kernel_dditable_t Kernel;
Expand Down
8 changes: 8 additions & 0 deletions include/ur_print.h
Original file line number Diff line number Diff line change
Expand Up @@ -1162,6 +1162,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urPrintEventCreateWithNativeHandleParams(con
/// - `buff_size < out_size`
UR_APIEXPORT ur_result_t UR_APICALL urPrintEventSetCallbackParams(const struct ur_event_set_callback_params_t *params, char *buffer, const size_t buff_size, size_t *out_size);

///////////////////////////////////////////////////////////////////////////////
/// @brief Print ur_event_get_sync_point_profiling_info_exp_params_t struct
/// @returns
/// - ::UR_RESULT_SUCCESS
/// - ::UR_RESULT_ERROR_INVALID_SIZE
/// - `buff_size < out_size`
UR_APIEXPORT ur_result_t UR_APICALL urPrintEventGetSyncPointProfilingInfoExpParams(const struct ur_event_get_sync_point_profiling_info_exp_params_t *params, char *buffer, const size_t buff_size, size_t *out_size);

///////////////////////////////////////////////////////////////////////////////
/// @brief Print ur_program_create_with_il_params_t struct
/// @returns
Expand Down
45 changes: 45 additions & 0 deletions include/ur_print.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -879,6 +879,9 @@ inline std::ostream &operator<<(std::ostream &os, ur_function_t value) {
case UR_FUNCTION_KERNEL_SUGGEST_MAX_COOPERATIVE_GROUP_COUNT_EXP:
os << "UR_FUNCTION_KERNEL_SUGGEST_MAX_COOPERATIVE_GROUP_COUNT_EXP";
break;
case UR_FUNCTION_EVENT_GET_SYNC_POINT_PROFILING_INFO_EXP:
os << "UR_FUNCTION_EVENT_GET_SYNC_POINT_PROFILING_INFO_EXP";
break;
default:
os << "unknown enumerator";
break;
Expand Down Expand Up @@ -9888,6 +9891,45 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct
return os;
}

///////////////////////////////////////////////////////////////////////////////
/// @brief Print operator for the ur_event_get_sync_point_profiling_info_exp_params_t type
/// @returns
/// std::ostream &
inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_event_get_sync_point_profiling_info_exp_params_t *params) {

os << ".hEvent = ";

ur::details::printPtr(os,
*(params->phEvent));

os << ", ";
os << ".syncPoint = ";

os << *(params->psyncPoint);

os << ", ";
os << ".propName = ";

os << *(params->ppropName);

os << ", ";
os << ".propSize = ";

os << *(params->ppropSize);

os << ", ";
os << ".pPropValue = ";
ur::details::printTagged(os, *(params->ppPropValue), *(params->ppropName), *(params->ppropSize));

os << ", ";
os << ".pPropSizeRet = ";

ur::details::printPtr(os,
*(params->ppPropSizeRet));

return os;
}

///////////////////////////////////////////////////////////////////////////////
/// @brief Print operator for the ur_program_create_with_il_params_t type
/// @returns
Expand Down Expand Up @@ -15965,6 +16007,9 @@ inline ur_result_t UR_APICALL printFunctionParams(std::ostream &os, ur_function_
case UR_FUNCTION_EVENT_SET_CALLBACK: {
os << (const struct ur_event_set_callback_params_t *)params;
} break;
case UR_FUNCTION_EVENT_GET_SYNC_POINT_PROFILING_INFO_EXP: {
os << (const struct ur_event_get_sync_point_profiling_info_exp_params_t *)params;
} break;
case UR_FUNCTION_PROGRAM_CREATE_WITH_IL: {
os << (const struct ur_program_create_with_il_params_t *)params;
} break;
Expand Down
21 changes: 21 additions & 0 deletions scripts/core/EXP-COMMAND-BUFFER.rst
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,23 @@ were obtained from.
pLocalWorkSize, 1, &syncPoint,
nullptr);

// Finalize the CommandBuffer to launch it
${x}CommandBufferFinalizeExp(hCommandBuffer);

// Execute the CommandBuffer and obtain the event associated to this
// execution
${x}_event_handle_t event;
${x}CommandBufferEnqueueExp(hCommandBuffer, hQueue, 0, nullptr,
&event);

// Get SyncPoint profiling information
${x}_profiling_info_t propName;
size_t propSize;
void* pPropValue;
size_t pPropSizeRet;
${x}EventGetSyncPointProfilingInfoExp(event, syncPoint, propName, propSize,
pPropValue, &pPropSizeRet);

Enqueueing Command-Buffers
--------------------------------------------------------------------------------

Expand Down Expand Up @@ -211,6 +228,7 @@ Functions
* ${x}CommandBufferAppendUSMPrefetchExp
* ${x}CommandBufferAppendUSMAdviseExp
* ${x}CommandBufferEnqueueExp
* ${x}EventGetSyncPointProfilingInfoExp

Changelog
--------------------------------------------------------------------------------
Expand All @@ -227,6 +245,9 @@ Changelog
| 1.3 | Add function definitions for Prefetch and Advise |
| | commands |
+-----------+-------------------------------------------------------+
| 1.4 | Add function definitions for getting sync point |
| | profiling information |
+-----------+-------------------------------------------------------+

Contributors
--------------------------------------------------------------------------------
Expand Down
35 changes: 35 additions & 0 deletions scripts/core/exp-command-buffer.yml
Original file line number Diff line number Diff line change
Expand Up @@ -704,3 +704,38 @@ returns:
- "If event objects in phEventWaitList are not valid events."
- $X_RESULT_ERROR_OUT_OF_HOST_MEMORY
- $X_RESULT_ERROR_OUT_OF_RESOURCES
--- #--------------------------------------------------------------------------
type: function
desc: "Get profiling information for the sync point execution associated with an event object"
class: $xEvent
name: GetSyncPointProfilingInfoExp
ordinal: "0"
params:
- type: $x_event_handle_t
name: hEvent
desc: "[in] handle of the event object"
- type: $x_exp_command_buffer_sync_point_t
name: syncPoint
desc: "[in] Sync point referencing the node (i.e. command) from which we want to get profile information"
- type: $x_profiling_info_t
name: propName
desc: "[in] the name of the profiling property to query"
- type: size_t
name: propSize
desc: "[in] size in bytes of the profiling property value"
- type: void*
name: pPropValue
desc: "[out][optional][typename(propName, propSize)] value of the profiling property"
- type: size_t*
name: pPropSizeRet
desc: "[out][optional] pointer to the actual size in bytes returned in propValue"
returns:
- $X_RESULT_ERROR_PROFILING_INFO_NOT_AVAILABLE:
- "If `hEvent`s associated queue was not created with `$X_QUEUE_FLAG_PROFILING_ENABLE`."
- $X_RESULT_ERROR_INVALID_VALUE:
- "`pPropValue && propSize == 0`"
- $X_RESULT_ERROR_INVALID_EVENT
- $X_RESULT_ERROR_OUT_OF_RESOURCES
- $X_RESULT_ERROR_OUT_OF_HOST_MEMORY
- $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP
- "If `syncPoint` does not reference a command in the command-buffer submission associated with `hEvent.`"
3 changes: 3 additions & 0 deletions scripts/core/registry.yml
Original file line number Diff line number Diff line change
Expand Up @@ -559,6 +559,9 @@ etors:
- name: KERNEL_SUGGEST_MAX_COOPERATIVE_GROUP_COUNT_EXP
desc: Enumerator for $xKernelSuggestMaxCooperativeGroupCountExp
value: '215'
- name: EVENT_GET_SYNC_POINT_PROFILING_INFO_EXP
desc: Enumerator for $xEventGetSyncPointProfilingInfoExp
value: '218'
---
type: enum
desc: Defines structure types
Expand Down
1 change: 1 addition & 0 deletions source/adapters/adapter.def.in
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ EXPORTS
urGetEnqueueProcAddrTable
urGetEnqueueExpProcAddrTable
urGetEventProcAddrTable
urGetEventExpProcAddrTable
urGetKernelProcAddrTable
urGetKernelExpProcAddrTable
urGetMemProcAddrTable
Expand Down
1 change: 1 addition & 0 deletions source/adapters/adapter.map.in
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
urGetEnqueueProcAddrTable;
urGetEnqueueExpProcAddrTable;
urGetEventProcAddrTable;
urGetEventExpProcAddrTable;
urGetKernelProcAddrTable;
urGetKernelExpProcAddrTable;
urGetMemProcAddrTable;
Expand Down
13 changes: 13 additions & 0 deletions source/adapters/cuda/command_buffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -762,3 +762,16 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp(

return Result;
}

UR_APIEXPORT ur_result_t UR_APICALL urEventGetSyncPointProfilingInfoExp(
ur_event_handle_t hEvent, ur_exp_command_buffer_sync_point_t SyncPoint,
ur_profiling_info_t PropName, size_t PropValueSize, void *PropValue,
size_t *PropValueSizeRet) {
(void)hEvent;
(void)SyncPoint;
(void)PropName;
(void)PropValueSize;
(void)PropValue;
(void)PropValueSizeRet;
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
14 changes: 14 additions & 0 deletions source/adapters/cuda/ur_interface_loader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -298,6 +298,20 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable(
return retVal;
}

UR_DLLEXPORT ur_result_t UR_APICALL urGetEventExpProcAddrTable(
ur_api_version_t version, ///< [in] API version requested
ur_event_exp_dditable_t
*pDdiTable ///< [in,out] pointer to table of DDI function pointers
) {
auto retVal = validateProcInputs(version, pDdiTable);
if (UR_RESULT_SUCCESS != retVal) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: Prefer regular lhs_val == const_rhs_val, i.e. retVal != UR_RESULT_SUCCESS over the Yoda notation, for readability reasons. This is not a blocker.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm a little confused by your comments (this one and following), because all the conditions in these files use Yoda notation.

return retVal;
}
pDdiTable->pfnGetSyncPointProfilingInfoExp =
urEventGetSyncPointProfilingInfoExp;
return retVal;
}

UR_DLLEXPORT ur_result_t UR_APICALL urGetUsmP2PExpProcAddrTable(
ur_api_version_t version, ur_usm_p2p_exp_dditable_t *pDdiTable) {
auto retVal = validateProcInputs(version, pDdiTable);
Expand Down
13 changes: 13 additions & 0 deletions source/adapters/hip/command_buffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -162,3 +162,16 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp(
"implemented for HIP adapter.");
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}

UR_APIEXPORT ur_result_t UR_APICALL urEventGetSyncPointProfilingInfoExp(
ur_event_handle_t hEvent, ur_exp_command_buffer_sync_point_t SyncPoint,
ur_profiling_info_t PropName, size_t PropValueSize, void *PropValue,
size_t *PropValueSizeRet) {
(void)hEvent;
(void)SyncPoint;
(void)PropName;
(void)PropValueSize;
(void)PropValue;
(void)PropValueSizeRet;
return UR_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
14 changes: 14 additions & 0 deletions source/adapters/hip/ur_interface_loader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -295,6 +295,20 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable(
return retVal;
}

UR_DLLEXPORT ur_result_t UR_APICALL urGetEventExpProcAddrTable(
ur_api_version_t version, ///< [in] API version requested
ur_event_exp_dditable_t
*pDdiTable ///< [in,out] pointer to table of DDI function pointers
) {
auto retVal = validateProcInputs(version, pDdiTable);
if (UR_RESULT_SUCCESS != retVal) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same as the other case wrt the Yoda notation used for the comparsion.

return retVal;
}
pDdiTable->pfnGetSyncPointProfilingInfoExp =
urEventGetSyncPointProfilingInfoExp;
return retVal;
}

UR_DLLEXPORT ur_result_t UR_APICALL urGetUsmP2PExpProcAddrTable(
ur_api_version_t version, ur_usm_p2p_exp_dditable_t *pDdiTable) {
auto retVal = validateProcInputs(version, pDdiTable);
Expand Down
Loading
Loading