diff --git a/include/ur_api.h b/include/ur_api.h index b25855be01..d6b97ee3a0 100644 --- a/include/ur_api.h +++ b/include/ur_api.h @@ -215,6 +215,7 @@ typedef enum ur_function_t { UR_FUNCTION_COMMAND_BUFFER_APPEND_USM_ADVISE_EXP = 213, ///< Enumerator for ::urCommandBufferAppendUSMAdviseExp UR_FUNCTION_ENQUEUE_COOPERATIVE_KERNEL_LAUNCH_EXP = 214, ///< Enumerator for ::urEnqueueCooperativeKernelLaunchExp UR_FUNCTION_KERNEL_SUGGEST_MAX_COOPERATIVE_GROUP_COUNT_EXP = 215, ///< Enumerator for ::urKernelSuggestMaxCooperativeGroupCountExp + UR_FUNCTION_EVENT_GET_SYNC_POINT_PROFILING_INFO_EXP = 218, ///< Enumerator for ::urEventGetSyncPointProfilingInfoExp /// @cond UR_FUNCTION_FORCE_UINT32 = 0x7fffffff /// @endcond @@ -8339,6 +8340,40 @@ urCommandBufferEnqueueExp( ///< command-buffer execution instance. ); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Get profiling information for the sync point execution associated with +/// an event object +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hEvent` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `::UR_PROFILING_INFO_COMMAND_COMPLETE < propName` +/// - ::UR_RESULT_ERROR_PROFILING_INFO_NOT_AVAILABLE +/// + If `hEvent`s associated queue was not created with `UR_QUEUE_FLAG_PROFILING_ENABLE`. +/// - ::UR_RESULT_ERROR_INVALID_VALUE +/// + `pPropValue && propSize == 0` +/// - ::UR_RESULT_ERROR_INVALID_EVENT +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP - "If `syncPoint` is not referencing a command enqueued in this CommandBuffer. Typically, `syncPoint` > number of commands" +UR_APIEXPORT ur_result_t UR_APICALL +urEventGetSyncPointProfilingInfoExp( + ur_event_handle_t hEvent, ///< [in] handle of the event object + ur_exp_command_buffer_sync_point_t syncPoint, ///< [in] Sync point referencing the node (i.e. command) from which we want + ///< to get profile information + ur_profiling_info_t propName, ///< [in] the name of the profiling property to query + size_t propSize, ///< [in] size in bytes of the profiling property value + void *pPropValue, ///< [out][optional][typename(propName, propSize)] value of the profiling + ///< property + size_t *pPropSizeRet ///< [out][optional] pointer to the actual size in bytes returned in + ///< propValue +); + #if !defined(__GNUC__) #pragma endregion #endif @@ -9024,6 +9059,19 @@ typedef struct ur_event_set_callback_params_t { void **ppUserData; } ur_event_set_callback_params_t; +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function parameters for urEventGetSyncPointProfilingInfoExp +/// @details Each entry is a pointer to the parameter passed to the function; +/// allowing the callback the ability to modify the parameter's value +typedef struct ur_event_get_sync_point_profiling_info_exp_params_t { + ur_event_handle_t *phEvent; + ur_exp_command_buffer_sync_point_t *psyncPoint; + ur_profiling_info_t *ppropName; + size_t *ppropSize; + void **ppPropValue; + size_t **ppPropSizeRet; +} ur_event_get_sync_point_profiling_info_exp_params_t; + /////////////////////////////////////////////////////////////////////////////// /// @brief Function parameters for urProgramCreateWithIL /// @details Each entry is a pointer to the parameter passed to the function; diff --git a/include/ur_ddi.h b/include/ur_ddi.h index 92fc742f72..1bcd8beedb 100644 --- a/include/ur_ddi.h +++ b/include/ur_ddi.h @@ -269,6 +269,43 @@ typedef ur_result_t(UR_APICALL *ur_pfnGetEventProcAddrTable_t)( ur_api_version_t, ur_event_dditable_t *); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function-pointer for urEventGetSyncPointProfilingInfoExp +typedef ur_result_t(UR_APICALL *ur_pfnEventGetSyncPointProfilingInfoExp_t)( + ur_event_handle_t, + ur_exp_command_buffer_sync_point_t, + ur_profiling_info_t, + size_t, + void *, + size_t *); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Table of EventExp functions pointers +typedef struct ur_event_exp_dditable_t { + ur_pfnEventGetSyncPointProfilingInfoExp_t pfnGetSyncPointProfilingInfoExp; +} ur_event_exp_dditable_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Exported function for filling application's EventExp table +/// with current process' addresses +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// - ::UR_RESULT_ERROR_UNSUPPORTED_VERSION +UR_DLLEXPORT ur_result_t UR_APICALL +urGetEventExpProcAddrTable( + ur_api_version_t version, ///< [in] API version requested + ur_event_exp_dditable_t *pDdiTable ///< [in,out] pointer to table of DDI function pointers +); + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Function-pointer for urGetEventExpProcAddrTable +typedef ur_result_t(UR_APICALL *ur_pfnGetEventExpProcAddrTable_t)( + ur_api_version_t, + ur_event_exp_dditable_t *); + /////////////////////////////////////////////////////////////////////////////// /// @brief Function-pointer for urProgramCreateWithIL typedef ur_result_t(UR_APICALL *ur_pfnProgramCreateWithIL_t)( @@ -2305,6 +2342,7 @@ typedef struct ur_dditable_t { ur_platform_dditable_t Platform; ur_context_dditable_t Context; ur_event_dditable_t Event; + ur_event_exp_dditable_t EventExp; ur_program_dditable_t Program; ur_program_exp_dditable_t ProgramExp; ur_kernel_dditable_t Kernel; diff --git a/include/ur_print.h b/include/ur_print.h index 9edf7554d9..cf84365491 100644 --- a/include/ur_print.h +++ b/include/ur_print.h @@ -1448,6 +1448,16 @@ UR_APIEXPORT ur_result_t UR_APICALL urPrintEventCreateWithNativeHandleParams(con /// - `buff_size < out_size` UR_APIEXPORT ur_result_t UR_APICALL urPrintEventSetCallbackParams(const struct ur_event_set_callback_params_t *params, char *buffer, const size_t buff_size, size_t *out_size); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_event_get_sync_point_profiling_info_exp_params_t params struct +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// - `NULL == buffer` +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// - `buff_size < out_size` +UR_APIEXPORT ur_result_t UR_APICALL urPrintEventGetSyncPointProfilingInfoExpParams(const struct ur_event_get_sync_point_profiling_info_exp_params_t *params, char *buffer, const size_t buff_size, size_t *out_size); + /////////////////////////////////////////////////////////////////////////////// /// @brief Print ur_program_create_with_il_params_t params struct /// @returns diff --git a/include/ur_print.hpp b/include/ur_print.hpp index 63cf0e3aea..7a062e3237 100644 --- a/include/ur_print.hpp +++ b/include/ur_print.hpp @@ -879,6 +879,9 @@ inline std::ostream &operator<<(std::ostream &os, ur_function_t value) { case UR_FUNCTION_KERNEL_SUGGEST_MAX_COOPERATIVE_GROUP_COUNT_EXP: os << "UR_FUNCTION_KERNEL_SUGGEST_MAX_COOPERATIVE_GROUP_COUNT_EXP"; break; + case UR_FUNCTION_EVENT_GET_SYNC_POINT_PROFILING_INFO_EXP: + os << "UR_FUNCTION_EVENT_GET_SYNC_POINT_PROFILING_INFO_EXP"; + break; default: os << "unknown enumerator"; break; @@ -9888,6 +9891,45 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct return os; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_event_get_sync_point_profiling_info_exp_params_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_event_get_sync_point_profiling_info_exp_params_t *params) { + + os << ".hEvent = "; + + ur::details::printPtr(os, + *(params->phEvent)); + + os << ", "; + os << ".syncPoint = "; + + os << *(params->psyncPoint); + + os << ", "; + os << ".propName = "; + + os << *(params->ppropName); + + os << ", "; + os << ".propSize = "; + + os << *(params->ppropSize); + + os << ", "; + os << ".pPropValue = "; + ur::details::printTagged(os, *(params->ppPropValue), *(params->ppropName), *(params->ppropSize)); + + os << ", "; + os << ".pPropSizeRet = "; + + ur::details::printPtr(os, + *(params->ppPropSizeRet)); + + return os; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Print operator for the ur_program_create_with_il_params_t type /// @returns @@ -15959,6 +16001,9 @@ inline ur_result_t UR_APICALL printFunctionParams(std::ostream &os, ur_function_ case UR_FUNCTION_EVENT_SET_CALLBACK: { os << (const struct ur_event_set_callback_params_t *)params; } break; + case UR_FUNCTION_EVENT_GET_SYNC_POINT_PROFILING_INFO_EXP: { + os << (const struct ur_event_get_sync_point_profiling_info_exp_params_t *)params; + } break; case UR_FUNCTION_PROGRAM_CREATE_WITH_IL: { os << (const struct ur_program_create_with_il_params_t *)params; } break; diff --git a/scripts/core/CONTRIB.rst b/scripts/core/CONTRIB.rst index cf2c8e870b..f1d986c93d 100644 --- a/scripts/core/CONTRIB.rst +++ b/scripts/core/CONTRIB.rst @@ -205,6 +205,29 @@ arguments: * Pointer to handle arguments, such as out parameters, are prefixed with ``ph`` i.e. ``phQueue``. +Limitations +----------- + +There are some limitations on the patterns our spec generator can handle. These +limitations are due to convenience of implementation rather than design: if +they are preventing you from implementing a feature please open an issue and we +will be happy to try and accommodate your use case. Otherwise beware of the +following: + +* A function parameter or struct member which is a struct type that has any of + the following members in its type definition must not have the ``[range]`` + tag: + + * An object handle with the ``[range]`` tag + + * A struct type with the ``[range]`` tag that has an object handle member + +* A struct member which is a pointer to a struct type must not have the + ``[optional]`` tag if that struct (or any of its members, recursively) has + an object handle member in its definition. + +* A struct member which is an object handle must not have the ``[out]`` tag. + Forks and Pull Requests ======================= diff --git a/scripts/core/EXP-COMMAND-BUFFER.rst b/scripts/core/EXP-COMMAND-BUFFER.rst index a6a32a66a1..43bbc73b79 100644 --- a/scripts/core/EXP-COMMAND-BUFFER.rst +++ b/scripts/core/EXP-COMMAND-BUFFER.rst @@ -132,6 +132,23 @@ were obtained from. pLocalWorkSize, 1, &syncPoint, nullptr); + // Finalize the CommandBuffer to launch it + ${x}CommandBufferFinalizeExp(hCommandBuffer); + + // Execute the CommandBuffer and obtain the event associated to this + // execution + ${x}_event_handle_t event; + ${x}CommandBufferEnqueueExp(hCommandBuffer, hQueue, 0, nullptr, + &event); + + // Get SyncPoint profiling information + ${x}_profiling_info_t propName; + size_t propSize; + void* pPropValue; + size_t pPropSizeRet; + ${x}EventGetSyncPointProfilingInfoExp(event, syncPoint, propName, propSize, + pPropValue, &pPropSizeRet); + Enqueueing Command-Buffers -------------------------------------------------------------------------------- @@ -211,6 +228,7 @@ Functions * ${x}CommandBufferAppendUSMPrefetchExp * ${x}CommandBufferAppendUSMAdviseExp * ${x}CommandBufferEnqueueExp +* ${x}EventGetSyncPointProfilingInfoExp Changelog -------------------------------------------------------------------------------- @@ -227,6 +245,9 @@ Changelog | 1.3 | Add function definitions for Prefetch and Advise | | | commands | +-----------+-------------------------------------------------------+ +| 1.4 | Add function definitions for getting sync point | +| | profiling information | ++-----------+-------------------------------------------------------+ Contributors -------------------------------------------------------------------------------- diff --git a/scripts/core/exp-command-buffer.yml b/scripts/core/exp-command-buffer.yml index 7d1b686aab..3aeec3f6b4 100644 --- a/scripts/core/exp-command-buffer.yml +++ b/scripts/core/exp-command-buffer.yml @@ -704,3 +704,38 @@ returns: - "If event objects in phEventWaitList are not valid events." - $X_RESULT_ERROR_OUT_OF_HOST_MEMORY - $X_RESULT_ERROR_OUT_OF_RESOURCES +--- #-------------------------------------------------------------------------- +type: function +desc: "Get profiling information for the sync point execution associated with an event object" +class: $xEvent +name: GetSyncPointProfilingInfoExp +ordinal: "0" +params: + - type: $x_event_handle_t + name: hEvent + desc: "[in] handle of the event object" + - type: $x_exp_command_buffer_sync_point_t + name: syncPoint + desc: "[in] Sync point referencing the node (i.e. command) from which we want to get profile information" + - type: $x_profiling_info_t + name: propName + desc: "[in] the name of the profiling property to query" + - type: size_t + name: propSize + desc: "[in] size in bytes of the profiling property value" + - type: void* + name: pPropValue + desc: "[out][optional][typename(propName, propSize)] value of the profiling property" + - type: size_t* + name: pPropSizeRet + desc: "[out][optional] pointer to the actual size in bytes returned in propValue" +returns: + - $X_RESULT_ERROR_PROFILING_INFO_NOT_AVAILABLE: + - "If `hEvent`s associated queue was not created with `UR_QUEUE_FLAG_PROFILING_ENABLE`." + - $X_RESULT_ERROR_INVALID_VALUE: + - "`pPropValue && propSize == 0`" + - $X_RESULT_ERROR_INVALID_EVENT + - $X_RESULT_ERROR_OUT_OF_RESOURCES + - $X_RESULT_ERROR_OUT_OF_HOST_MEMORY + - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP + - "If `syncPoint` is not referencing a command enqueued in this CommandBuffer. Typically, `syncPoint` > number of commands" diff --git a/scripts/core/registry.yml b/scripts/core/registry.yml index 6195cd4980..f84edd7d7d 100644 --- a/scripts/core/registry.yml +++ b/scripts/core/registry.yml @@ -559,6 +559,9 @@ etors: - name: KERNEL_SUGGEST_MAX_COOPERATIVE_GROUP_COUNT_EXP desc: Enumerator for $xKernelSuggestMaxCooperativeGroupCountExp value: '215' +- name: EVENT_GET_SYNC_POINT_PROFILING_INFO_EXP + desc: Enumerator for $xEventGetSyncPointProfilingInfoExp + value: '218' --- type: enum desc: Defines structure types diff --git a/scripts/parse_specs.py b/scripts/parse_specs.py index 332af88cc7..1bff8887f4 100644 --- a/scripts/parse_specs.py +++ b/scripts/parse_specs.py @@ -83,7 +83,7 @@ def _get_etor_value(value, prev): """ validate documents meet some basic (easily detectable) requirements of code generation """ -def _validate_doc(f, d, tags, line_num): +def _validate_doc(f, d, tags, line_num, meta): is_iso = lambda x : re.match(r"[_a-zA-Z][_a-zA-Z0-9]{0,30}", x) def __validate_ordinal(d): @@ -265,7 +265,35 @@ def __validate_base(d): elif type_traits.is_properties(d['name']) and not d.get('base', "").endswith("base_properties_t"): raise Exception("'base' must be '%s_base_properties_t': %s"%(namespace, d['name'])) - def __validate_members(d, tags): + def __validate_struct_range_members(name, members, meta): + def has_handle(members, meta): + for m in members: + if type_traits.is_handle(m): + return True + if type_traits.is_struct(m, meta): + return has_handle( + type_traits.get_struct_members(m['type']), meta) + return False + + for m in members: + if param_traits.is_range(m) and type_traits.is_handle(m['type']): + raise Exception( + f"struct range {name} must not contain range of object handles {m['name']}" + ) + if type_traits.is_struct(m['type'], meta): + member_members = type_traits.get_struct_members( + m['type'], meta) + # We can't handle a range of structs with handles within a range of structs + if param_traits.is_range(m) and has_handle( + member_members, meta): + raise Exception( + f"struct range {m['name']} is already within struct range {name}, and must not contain an object handle" + ) + # We keep passing the original name so we can report it in + # exception messages. + __validate_struct_range_members(name, member_members, meta) + + def __validate_members(d, tags, meta): if 'members' not in d: raise Exception("'%s' requires the following sequence of mappings: {`members`}"%d['type']) @@ -286,21 +314,28 @@ def __validate_members(d, tags): if not annotation: raise Exception(prefix+"'desc' must start with {'[in]', '[out]', '[in,out]'}") - if type_traits.is_handle(item['type']): - raise Exception(prefix+"'type' must not be '*_handle_t': %s"%item['type']) - if item['type'].endswith("flag_t"): raise Exception(prefix+"'type' must not be '*_flag_t': %s"%item['type']) if d['type'] == 'union'and item.get('tag') is None: raise Exception(prefix + f"union member {item['name']} must include a 'tag' annotation") + if type_traits.is_struct(item['type'], + meta) and param_traits.is_range(item): + member_members = type_traits.get_struct_members( + item['type'], meta) + __validate_struct_range_members(item['name'], member_members, + meta) + + if type_traits.is_handle(item['type']) and param_traits.is_output(item): + raise Exception(prefix + f"struct member {item['name']} is an object handle, so it must not be have the [out] tag") + ver = __validate_version(item, prefix=prefix, base_version=d_ver) if ver < max_ver: raise Exception(prefix+"'version' must be increasing: %s"%item['version']) max_ver = ver - def __validate_params(d, tags): + def __validate_params(d, tags, meta): if 'params' not in d: raise Exception("'function' requires the following sequence of mappings: {`params`}") @@ -347,6 +382,11 @@ def __validate_params(d, tags): if not has_queue: raise Exception(prefix+"bounds must only be used on entry points which take a `hQueue` parameter") + if type_traits.is_struct(item['type'], + meta) and param_traits.is_range(item): + members = type_traits.get_struct_members(item['type'], meta) + __validate_struct_range_members(item['name'], members, meta) + ver = __validate_version(item, prefix=prefix, base_version=d_ver) if ver < max_ver: raise Exception(prefix+"'version' must be increasing: %s"%item['version']) @@ -421,7 +461,7 @@ def __validate_union_tag(d): __validate_union_tag(d) __validate_type(d, 'name', tags) __validate_base(d) - __validate_members(d, tags) + __validate_members(d, tags, meta) __validate_details(d) __validate_ordinal(d) __validate_version(d) @@ -435,7 +475,7 @@ def __validate_union_tag(d): else: __validate_name(d, 'name', tags, case='camel') - __validate_params(d, tags) + __validate_params(d, tags, meta) __validate_details(d) __validate_ordinal(d) __validate_version(d) @@ -893,7 +933,7 @@ def parse(section, version, tags, meta, ref): for i, d in enumerate(docs): d = _preprocess(d) - if not _validate_doc(f, d, tags, line_nums[i]): + if not _validate_doc(f, d, tags, line_nums[i], meta): continue d = _filter_version(d, float(version)) diff --git a/scripts/templates/helper.py b/scripts/templates/helper.py index ecfcbbb8b8..39fef9baac 100644 --- a/scripts/templates/helper.py +++ b/scripts/templates/helper.py @@ -231,7 +231,7 @@ def is_array(cls, name): return True if re.match(cls.RE_ARRAY, name) else False except: return False - + @classmethod def get_array_length(cls, name): if not cls.is_array(name): @@ -239,7 +239,7 @@ def get_array_length(cls, name): match = re.match(cls.RE_ARRAY, name) return match.groups()[1] - + @classmethod def get_array_element_type(cls, name): if not cls.is_array(name): @@ -248,6 +248,14 @@ def get_array_element_type(cls, name): match = re.match(cls.RE_ARRAY, name) return match.groups()[0] + @staticmethod + def get_struct_members(type_name, meta): + struct_type = _remove_const_ptr(type_name) + if not struct_type in meta['struct']: + raise Exception( + f"Cannot return members of non-struct type {struct_type}") + return meta['struct'][struct_type]['members'] + """ Extracts traits from a value name """ @@ -427,7 +435,7 @@ def is_bounds(cls, item): return True if re.match(cls.RE_BOUNDS, item['desc']) else False except: return False - + @classmethod def tagged_member(cls, item): try: @@ -1266,6 +1274,120 @@ def get_loader_prologue(namespace, tags, obj, meta): return prologue + +""" +Private: + Takes a list of struct members and recursively searches for class handles. + Returns a list of class handles with access chains to reach them (e.g. + "struct_a->struct_b.handle"). Also handles ranges of class handles and + ranges of structs with class handle members, although the latter only works + to one level of recursion i.e. a range of structs with a range of structs + with a handle member will not work. +""" +def get_struct_handle_members(namespace, + tags, + meta, + members, + parent='', + is_struct_range=False): + handle_members = [] + for m in members: + if type_traits.is_class_handle(m['type'], meta): + m_tname = _remove_const_ptr(subt(namespace, tags, m['type'])) + m_objname = re.sub(r"(\w+)_handle_t", r"\1_object_t", m_tname) + # We can deal with a range of handles, but not if it's in a range of structs + if param_traits.is_range(m) and not is_struct_range: + handle_members.append({ + 'parent': parent, + 'name': m['name'], + 'obj_name': m_objname, + 'type': m_tname, + 'range_start': param_traits.range_start(m), + 'range_end': param_traits.range_end(m) + }) + else: + handle_members.append({ + 'parent': parent, + 'name': m['name'], + 'obj_name': m_objname, + 'optional': param_traits.is_optional(m) + }) + elif type_traits.is_struct(m['type'], meta): + member_struct_members = type_traits.get_struct_members( + m['type'], meta) + if param_traits.is_range(m): + # If we've hit a range of structs we need to start a new recursion looking + # for handle members. We do not support range within range, so skip that + if is_struct_range: + continue + range_handle_members = get_struct_handle_members( + namespace, tags, meta, member_struct_members, '', True) + if range_handle_members: + handle_members.append({ + 'parent': parent, + 'name': m['name'], + 'type': subt(namespace, tags, _remove_const_ptr(m['type'])), + 'range_start': param_traits.range_start(m), + 'range_end': param_traits.range_end(m), + 'handle_members': range_handle_members + }) + else: + # If it's just a struct we can keep recursing in search of handles + m_is_pointer = type_traits.is_pointer(m['type']) + new_parent_deref = '->' if m_is_pointer else '.' + new_parent = m['name'] + new_parent_deref + handle_members += get_struct_handle_members( + namespace, tags, meta, member_struct_members, new_parent, + is_struct_range) + + return handle_members + + +""" +Public: + Strips a string of all dereferences. + + This is useful in layer templates for creating unique variable names. For + instance if we need to copy pMyStruct->member.hObject out of a function + parameter into a local variable we use this to get the unique (or at least + distinct from any other parameter we might copy) variable name + pMyStructmemberhObject. +""" +def strip_deref(string_to_strip): + string_to_strip = string_to_strip.replace('.', '') + return string_to_strip.replace('->', '') + + +""" +Public: + Takes a function object and recurses through its struct parameters to return + a list of structs that have handle object members the loader will need to + convert. +""" +def get_object_handle_structs_to_convert(namespace, tags, obj, meta): + structs = [] + params = _filter_param_list(obj['params'], ["[in]"]) + + for item in params: + if type_traits.is_struct(item['type'], meta): + members = type_traits.get_struct_members(item['type'], meta) + handle_members = get_struct_handle_members(namespace, tags, meta, + members) + if handle_members: + name = subt(namespace, tags, item['name']) + tname = _remove_const_ptr(subt(namespace, tags, item['type'])) + struct = { + 'name': name, + 'type': tname, + 'optional': param_traits.is_optional(item), + 'members': handle_members + } + + structs.append(struct) + + return structs + + """ Public: returns an enum object with the given name diff --git a/source/adapters/adapter.def.in b/source/adapters/adapter.def.in index 3c18c78bd1..b5bdefab66 100644 --- a/source/adapters/adapter.def.in +++ b/source/adapters/adapter.def.in @@ -7,6 +7,7 @@ EXPORTS urGetEnqueueProcAddrTable urGetEnqueueExpProcAddrTable urGetEventProcAddrTable + urGetEventExpProcAddrTable urGetKernelProcAddrTable urGetKernelExpProcAddrTable urGetMemProcAddrTable diff --git a/source/adapters/adapter.map.in b/source/adapters/adapter.map.in index bb08ae7d88..be74473b21 100644 --- a/source/adapters/adapter.map.in +++ b/source/adapters/adapter.map.in @@ -7,6 +7,7 @@ urGetEnqueueProcAddrTable; urGetEnqueueExpProcAddrTable; urGetEventProcAddrTable; + urGetEventExpProcAddrTable; urGetKernelProcAddrTable; urGetKernelExpProcAddrTable; urGetMemProcAddrTable; diff --git a/source/adapters/cuda/command_buffer.cpp b/source/adapters/cuda/command_buffer.cpp index a65530a1f1..7da742931b 100644 --- a/source/adapters/cuda/command_buffer.cpp +++ b/source/adapters/cuda/command_buffer.cpp @@ -762,3 +762,16 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( return Result; } + +UR_APIEXPORT ur_result_t UR_APICALL urEventGetSyncPointProfilingInfoExp( + ur_event_handle_t Event, ur_exp_command_buffer_sync_point_t SyncPoint, + ur_profiling_info_t PropName, size_t PropValueSize, void *PropValue, + size_t *PropValueSizeRet) { + (void)Event; + (void)SyncPoint; + (void)PropName; + (void)PropValueSize; + (void)PropValue; + (void)PropValueSizeRet; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} diff --git a/source/adapters/hip/command_buffer.cpp b/source/adapters/hip/command_buffer.cpp index 54a6fa2f4e..930d5dbe41 100644 --- a/source/adapters/hip/command_buffer.cpp +++ b/source/adapters/hip/command_buffer.cpp @@ -162,3 +162,16 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( "implemented for HIP adapter."); return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } + +UR_APIEXPORT ur_result_t UR_APICALL urEventGetSyncPointProfilingInfoExp( + ur_event_handle_t Event, ur_exp_command_buffer_sync_point_t SyncPoint, + ur_profiling_info_t PropName, size_t PropValueSize, void *PropValue, + size_t *PropValueSizeRet) { + (void)Event; + (void)SyncPoint; + (void)PropName; + (void)PropValueSize; + (void)PropValue; + (void)PropValueSizeRet; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} diff --git a/source/adapters/level_zero/command_buffer.cpp b/source/adapters/level_zero/command_buffer.cpp index bbe49cb705..23dbfac3bc 100644 --- a/source/adapters/level_zero/command_buffer.cpp +++ b/source/adapters/level_zero/command_buffer.cpp @@ -986,3 +986,95 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( return UR_RESULT_SUCCESS; } + +UR_APIEXPORT ur_result_t UR_APICALL urEventGetSyncPointProfilingInfoExp( + ur_event_handle_t Event, ur_exp_command_buffer_sync_point_t SyncPoint, + ur_profiling_info_t PropName, size_t PropValueSize, void *PropValue, + size_t *PropValueSizeRet) { + std::shared_lock EventLock(Event->Mutex); + + if (Event->UrQueue && + (Event->UrQueue->Properties & UR_QUEUE_FLAG_PROFILING_ENABLE) == 0) { + return UR_RESULT_ERROR_PROFILING_INFO_NOT_AVAILABLE; + } + + ur_device_handle_t Device = + Event->UrQueue ? Event->UrQueue->Device : Event->Context->Devices[0]; + + uint64_t ZeTimerResolution = Device->ZeDeviceProperties->timerResolution; + const uint64_t TimestampMaxValue = + ((1ULL << Device->ZeDeviceProperties->kernelTimestampValidBits) - 1ULL); + + UrReturnHelper ReturnValue(PropValueSize, PropValue, PropValueSizeRet); + + ze_kernel_timestamp_result_t tsResult; + + // Node profiling info is stored in the CommandData field of the event + // returned from graph submission. + // The timing info of each command corresponding to a node is stored using + // `zeCommandListAppendQueryKernelTimestamps`. This command stores the timing + // info of each command/node in the same order as the sync-points that were + // assigned to this node when it was enqueued. Consequently, `SyncPoint` + // corresponds to the index of the memory slot containing the timestamps + // corresponding to this specific node. + if (Event->CommandType == UR_COMMAND_COMMAND_BUFFER_ENQUEUE_EXP) { + if (Event->CommandData) { + command_buffer_profiling_t *ProfilingsPtr; + switch (PropName) { + case UR_PROFILING_INFO_COMMAND_START: { + ProfilingsPtr = + static_cast(Event->CommandData); + uint64_t Index = static_cast(SyncPoint); + + if (Index > ProfilingsPtr->NumEvents) { + return UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP; + } + + uint64_t StartTime = + ProfilingsPtr->Timestamps[Index].global.kernelStart; + uint64_t ContextStartTime = + (StartTime & TimestampMaxValue) * ZeTimerResolution; + return ReturnValue(ContextStartTime); + } + case UR_PROFILING_INFO_COMMAND_END: { + ProfilingsPtr = + static_cast(Event->CommandData); + uint64_t Index = static_cast(SyncPoint); + + if (Index > ProfilingsPtr->NumEvents) { + return UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP; + } + + uint64_t EndTime = ProfilingsPtr->Timestamps[Index].global.kernelEnd; + uint64_t LastStart = + ProfilingsPtr->Timestamps[Index].global.kernelStart; + uint64_t ContextStartTime = (LastStart & TimestampMaxValue); + uint64_t ContextEndTime = (EndTime & TimestampMaxValue); + + // + // Handle a possible wrap-around (the underlying HW counter is < + // 64-bit). Note, it will not report correct time if there were multiple + // wrap arounds, and the longer term plan is to enlarge the capacity of + // the HW timestamps. + // + if (ContextEndTime <= ContextStartTime) { + ContextEndTime += TimestampMaxValue; + } + ContextEndTime *= ZeTimerResolution; + return ReturnValue(ContextEndTime); + } + default: + urPrint( + "urEventGetSyncPointProfilingInfoExp: not supported ParamName\n"); + return UR_RESULT_ERROR_INVALID_VALUE; + } + } else { + return UR_RESULT_ERROR_PROFILING_INFO_NOT_AVAILABLE; + } + } else { + urPrint("urEventGetSyncPointProfilingInfoExp: not supported Event type\n"); + return UR_RESULT_ERROR_INVALID_VALUE; + } + + return UR_RESULT_SUCCESS; +} diff --git a/source/adapters/level_zero/ur_interface_loader.cpp b/source/adapters/level_zero/ur_interface_loader.cpp index 74d0706b31..50d7cf585e 100644 --- a/source/adapters/level_zero/ur_interface_loader.cpp +++ b/source/adapters/level_zero/ur_interface_loader.cpp @@ -345,6 +345,20 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetCommandBufferExpProcAddrTable( return retVal; } +UR_DLLEXPORT ur_result_t UR_APICALL urGetEventExpProcAddrTable( + ur_api_version_t version, ///< [in] API version requested + ur_event_exp_dditable_t + *pDdiTable ///< [in,out] pointer to table of DDI function pointers +) { + auto retVal = validateProcInputs(version, pDdiTable); + if (UR_RESULT_SUCCESS != retVal) { + return retVal; + } + pDdiTable->pfnGetSyncPointProfilingInfoExp = + urEventGetSyncProfilingProfilingInfoExp; + return retVal; +} + UR_DLLEXPORT ur_result_t UR_APICALL urGetUsmP2PExpProcAddrTable( ur_api_version_t version, ur_usm_p2p_exp_dditable_t *pDdiTable) { auto retVal = validateProcInputs(version, pDdiTable); diff --git a/source/adapters/native_cpu/command_buffer.cpp b/source/adapters/native_cpu/command_buffer.cpp index 50b38c9d52..88f22d9865 100644 --- a/source/adapters/native_cpu/command_buffer.cpp +++ b/source/adapters/native_cpu/command_buffer.cpp @@ -162,3 +162,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferAppendUSMAdviseExp( ur_exp_command_buffer_sync_point_t *) { return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } + +UR_APIEXPORT ur_result_t UR_APICALL urEventGetSyncPointProfilingInfoExp( + ur_event_handle_t, ur_exp_command_buffer_sync_point_t, ur_profiling_info_t, + size_t, void *, size_t *) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} diff --git a/source/adapters/null/ur_nullddi.cpp b/source/adapters/null/ur_nullddi.cpp index d6887ee12f..f7be764f3e 100644 --- a/source/adapters/null/ur_nullddi.cpp +++ b/source/adapters/null/ur_nullddi.cpp @@ -5230,6 +5230,40 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferEnqueueExp( return exceptionToResult(std::current_exception()); } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urEventGetSyncPointProfilingInfoExp +__urdlllocal ur_result_t UR_APICALL urEventGetSyncPointProfilingInfoExp( + ur_event_handle_t hEvent, ///< [in] handle of the event object + ur_exp_command_buffer_sync_point_t + syncPoint, ///< [in] Sync point referencing the node (i.e. command) from which we want + ///< to get profile information + ur_profiling_info_t + propName, ///< [in] the name of the profiling property to query + size_t propSize, ///< [in] size in bytes of the profiling property value + void * + pPropValue, ///< [out][optional][typename(propName, propSize)] value of the profiling + ///< property + size_t * + pPropSizeRet ///< [out][optional] pointer to the actual size in bytes returned in + ///< propValue + ) try { + ur_result_t result = UR_RESULT_SUCCESS; + + // if the driver has created a custom function, then call it instead of using the generic path + auto pfnGetSyncPointProfilingInfoExp = + d_context.urDdiTable.EventExp.pfnGetSyncPointProfilingInfoExp; + if (nullptr != pfnGetSyncPointProfilingInfoExp) { + result = pfnGetSyncPointProfilingInfoExp( + hEvent, syncPoint, propName, propSize, pPropValue, pPropSizeRet); + } else { + // generic implementation + } + + return result; +} catch (...) { + return exceptionToResult(std::current_exception()); +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urEnqueueCooperativeKernelLaunchExp __urdlllocal ur_result_t UR_APICALL urEnqueueCooperativeKernelLaunchExp( @@ -5899,6 +5933,37 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetEventProcAddrTable( return exceptionToResult(std::current_exception()); } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Exported function for filling application's EventExp table +/// with current process' addresses +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// - ::UR_RESULT_ERROR_UNSUPPORTED_VERSION +UR_DLLEXPORT ur_result_t UR_APICALL urGetEventExpProcAddrTable( + ur_api_version_t version, ///< [in] API version requested + ur_event_exp_dditable_t + *pDdiTable ///< [in,out] pointer to table of DDI function pointers + ) try { + if (nullptr == pDdiTable) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + if (driver::d_context.version < version) { + return UR_RESULT_ERROR_UNSUPPORTED_VERSION; + } + + ur_result_t result = UR_RESULT_SUCCESS; + + pDdiTable->pfnGetSyncPointProfilingInfoExp = + driver::urEventGetSyncPointProfilingInfoExp; + + return result; +} catch (...) { + return exceptionToResult(std::current_exception()); +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Exported function for filling application's Kernel table /// with current process' addresses diff --git a/source/adapters/opencl/command_buffer.cpp b/source/adapters/opencl/command_buffer.cpp index 74cdd8a03d..97b819f500 100644 --- a/source/adapters/opencl/command_buffer.cpp +++ b/source/adapters/opencl/command_buffer.cpp @@ -356,3 +356,16 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp( return UR_RESULT_SUCCESS; } + +UR_APIEXPORT ur_result_t UR_APICALL urEventGetSyncPointProfilingInfoExp( + ur_event_handle_t Event, ur_exp_command_buffer_sync_point_t SyncPoint, + ur_profiling_info_t PropName, size_t PropValueSize, void *PropValue, + size_t *PropValueSizeRet) { + (void)Event; + (void)SyncPoint; + (void)PropName; + (void)PropValueSize; + (void)PropValue; + (void)PropValueSizeRet; + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; +} diff --git a/source/loader/layers/tracing/ur_trcddi.cpp b/source/loader/layers/tracing/ur_trcddi.cpp index 402b64d638..7493813123 100644 --- a/source/loader/layers/tracing/ur_trcddi.cpp +++ b/source/loader/layers/tracing/ur_trcddi.cpp @@ -5807,6 +5807,46 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferEnqueueExp( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urEventGetSyncPointProfilingInfoExp +__urdlllocal ur_result_t UR_APICALL urEventGetSyncPointProfilingInfoExp( + ur_event_handle_t hEvent, ///< [in] handle of the event object + ur_exp_command_buffer_sync_point_t + syncPoint, ///< [in] Sync point referencing the node (i.e. command) from which we want + ///< to get profile information + ur_profiling_info_t + propName, ///< [in] the name of the profiling property to query + size_t propSize, ///< [in] size in bytes of the profiling property value + void * + pPropValue, ///< [out][optional][typename(propName, propSize)] value of the profiling + ///< property + size_t * + pPropSizeRet ///< [out][optional] pointer to the actual size in bytes returned in + ///< propValue +) { + auto pfnGetSyncPointProfilingInfoExp = + context.urDdiTable.EventExp.pfnGetSyncPointProfilingInfoExp; + + if (nullptr == pfnGetSyncPointProfilingInfoExp) { + return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; + } + + ur_event_get_sync_point_profiling_info_exp_params_t params = { + &hEvent, &syncPoint, &propName, &propSize, &pPropValue, &pPropSizeRet}; + uint64_t instance = context.notify_begin( + UR_FUNCTION_EVENT_GET_SYNC_POINT_PROFILING_INFO_EXP, + "urEventGetSyncPointProfilingInfoExp", ¶ms); + + ur_result_t result = pfnGetSyncPointProfilingInfoExp( + hEvent, syncPoint, propName, propSize, pPropValue, pPropSizeRet); + + context.notify_end(UR_FUNCTION_EVENT_GET_SYNC_POINT_PROFILING_INFO_EXP, + "urEventGetSyncPointProfilingInfoExp", ¶ms, &result, + instance); + + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urEnqueueCooperativeKernelLaunchExp __urdlllocal ur_result_t UR_APICALL urEnqueueCooperativeKernelLaunchExp( @@ -6646,6 +6686,41 @@ __urdlllocal ur_result_t UR_APICALL urGetEventProcAddrTable( return result; } /////////////////////////////////////////////////////////////////////////////// +/// @brief Exported function for filling application's EventExp table +/// with current process' addresses +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// - ::UR_RESULT_ERROR_UNSUPPORTED_VERSION +__urdlllocal ur_result_t UR_APICALL urGetEventExpProcAddrTable( + ur_api_version_t version, ///< [in] API version requested + ur_event_exp_dditable_t + *pDdiTable ///< [in,out] pointer to table of DDI function pointers +) { + auto &dditable = ur_tracing_layer::context.urDdiTable.EventExp; + + if (nullptr == pDdiTable) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + if (UR_MAJOR_VERSION(ur_tracing_layer::context.version) != + UR_MAJOR_VERSION(version) || + UR_MINOR_VERSION(ur_tracing_layer::context.version) > + UR_MINOR_VERSION(version)) { + return UR_RESULT_ERROR_UNSUPPORTED_VERSION; + } + + ur_result_t result = UR_RESULT_SUCCESS; + + dditable.pfnGetSyncPointProfilingInfoExp = + pDdiTable->pfnGetSyncPointProfilingInfoExp; + pDdiTable->pfnGetSyncPointProfilingInfoExp = + ur_tracing_layer::urEventGetSyncPointProfilingInfoExp; + + return result; +} +/////////////////////////////////////////////////////////////////////////////// /// @brief Exported function for filling application's Kernel table /// with current process' addresses /// @@ -7424,6 +7499,11 @@ ur_result_t context_t::init(ur_dditable_t *dditable, UR_API_VERSION_CURRENT, &dditable->Event); } + if (UR_RESULT_SUCCESS == result) { + result = ur_tracing_layer::urGetEventExpProcAddrTable( + UR_API_VERSION_CURRENT, &dditable->EventExp); + } + if (UR_RESULT_SUCCESS == result) { result = ur_tracing_layer::urGetKernelProcAddrTable( UR_API_VERSION_CURRENT, &dditable->Kernel); diff --git a/source/loader/layers/validation/ur_valddi.cpp b/source/loader/layers/validation/ur_valddi.cpp index 72e225028c..57d42d807b 100644 --- a/source/loader/layers/validation/ur_valddi.cpp +++ b/source/loader/layers/validation/ur_valddi.cpp @@ -7616,6 +7616,50 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferEnqueueExp( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urEventGetSyncPointProfilingInfoExp +__urdlllocal ur_result_t UR_APICALL urEventGetSyncPointProfilingInfoExp( + ur_event_handle_t hEvent, ///< [in] handle of the event object + ur_exp_command_buffer_sync_point_t + syncPoint, ///< [in] Sync point referencing the node (i.e. command) from which we want + ///< to get profile information + ur_profiling_info_t + propName, ///< [in] the name of the profiling property to query + size_t propSize, ///< [in] size in bytes of the profiling property value + void * + pPropValue, ///< [out][optional][typename(propName, propSize)] value of the profiling + ///< property + size_t * + pPropSizeRet ///< [out][optional] pointer to the actual size in bytes returned in + ///< propValue +) { + auto pfnGetSyncPointProfilingInfoExp = + context.urDdiTable.EventExp.pfnGetSyncPointProfilingInfoExp; + + if (nullptr == pfnGetSyncPointProfilingInfoExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + if (context.enableParameterValidation) { + if (NULL == hEvent) { + return UR_RESULT_ERROR_INVALID_NULL_HANDLE; + } + + if (UR_PROFILING_INFO_COMMAND_COMPLETE < propName) { + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } + + if (pPropValue && propSize == 0) { + return UR_RESULT_ERROR_INVALID_VALUE; + } + } + + ur_result_t result = pfnGetSyncPointProfilingInfoExp( + hEvent, syncPoint, propName, propSize, pPropValue, pPropSizeRet); + + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urEnqueueCooperativeKernelLaunchExp __urdlllocal ur_result_t UR_APICALL urEnqueueCooperativeKernelLaunchExp( @@ -8523,6 +8567,42 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetEventProcAddrTable( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Exported function for filling application's EventExp table +/// with current process' addresses +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// - ::UR_RESULT_ERROR_UNSUPPORTED_VERSION +UR_DLLEXPORT ur_result_t UR_APICALL urGetEventExpProcAddrTable( + ur_api_version_t version, ///< [in] API version requested + ur_event_exp_dditable_t + *pDdiTable ///< [in,out] pointer to table of DDI function pointers +) { + auto &dditable = ur_validation_layer::context.urDdiTable.EventExp; + + if (nullptr == pDdiTable) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + if (UR_MAJOR_VERSION(ur_validation_layer::context.version) != + UR_MAJOR_VERSION(version) || + UR_MINOR_VERSION(ur_validation_layer::context.version) > + UR_MINOR_VERSION(version)) { + return UR_RESULT_ERROR_UNSUPPORTED_VERSION; + } + + ur_result_t result = UR_RESULT_SUCCESS; + + dditable.pfnGetSyncPointProfilingInfoExp = + pDdiTable->pfnGetSyncPointProfilingInfoExp; + pDdiTable->pfnGetSyncPointProfilingInfoExp = + ur_validation_layer::urEventGetSyncPointProfilingInfoExp; + + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Exported function for filling application's Kernel table /// with current process' addresses @@ -9331,6 +9411,11 @@ ur_result_t context_t::init(ur_dditable_t *dditable, UR_API_VERSION_CURRENT, &dditable->Event); } + if (UR_RESULT_SUCCESS == result) { + result = ur_validation_layer::urGetEventExpProcAddrTable( + UR_API_VERSION_CURRENT, &dditable->EventExp); + } + if (UR_RESULT_SUCCESS == result) { result = ur_validation_layer::urGetKernelProcAddrTable( UR_API_VERSION_CURRENT, &dditable->Kernel); diff --git a/source/loader/ur_ldrddi.cpp b/source/loader/ur_ldrddi.cpp index 201315272f..a3afff4410 100644 --- a/source/loader/ur_ldrddi.cpp +++ b/source/loader/ur_ldrddi.cpp @@ -7254,6 +7254,43 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferEnqueueExp( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urEventGetSyncPointProfilingInfoExp +__urdlllocal ur_result_t UR_APICALL urEventGetSyncPointProfilingInfoExp( + ur_event_handle_t hEvent, ///< [in] handle of the event object + ur_exp_command_buffer_sync_point_t + syncPoint, ///< [in] Sync point referencing the node (i.e. command) from which we want + ///< to get profile information + ur_profiling_info_t + propName, ///< [in] the name of the profiling property to query + size_t propSize, ///< [in] size in bytes of the profiling property value + void * + pPropValue, ///< [out][optional][typename(propName, propSize)] value of the profiling + ///< property + size_t * + pPropSizeRet ///< [out][optional] pointer to the actual size in bytes returned in + ///< propValue +) { + ur_result_t result = UR_RESULT_SUCCESS; + + // extract platform's function pointer table + auto dditable = reinterpret_cast(hEvent)->dditable; + auto pfnGetSyncPointProfilingInfoExp = + dditable->ur.EventExp.pfnGetSyncPointProfilingInfoExp; + if (nullptr == pfnGetSyncPointProfilingInfoExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + // convert loader handle to platform handle + hEvent = reinterpret_cast(hEvent)->handle; + + // forward to device-platform + result = pfnGetSyncPointProfilingInfoExp( + hEvent, syncPoint, propName, propSize, pPropValue, pPropSizeRet); + + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Intercept function for urEnqueueCooperativeKernelLaunchExp __urdlllocal ur_result_t UR_APICALL urEnqueueCooperativeKernelLaunchExp( @@ -8139,6 +8176,60 @@ UR_DLLEXPORT ur_result_t UR_APICALL urGetEventProcAddrTable( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Exported function for filling application's EventExp table +/// with current process' addresses +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +/// - ::UR_RESULT_ERROR_UNSUPPORTED_VERSION +UR_DLLEXPORT ur_result_t UR_APICALL urGetEventExpProcAddrTable( + ur_api_version_t version, ///< [in] API version requested + ur_event_exp_dditable_t + *pDdiTable ///< [in,out] pointer to table of DDI function pointers +) { + if (nullptr == pDdiTable) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + if (ur_loader::context->version < version) { + return UR_RESULT_ERROR_UNSUPPORTED_VERSION; + } + + ur_result_t result = UR_RESULT_SUCCESS; + + // Load the device-platform DDI tables + for (auto &platform : ur_loader::context->platforms) { + if (platform.initStatus != UR_RESULT_SUCCESS) { + continue; + } + auto getTable = reinterpret_cast( + ur_loader::LibLoader::getFunctionPtr(platform.handle.get(), + "urGetEventExpProcAddrTable")); + if (!getTable) { + continue; + } + platform.initStatus = getTable(version, &platform.dditable.ur.EventExp); + } + + if (UR_RESULT_SUCCESS == result) { + if (ur_loader::context->platforms.size() != 1 || + ur_loader::context->forceIntercept) { + // return pointers to loader's DDIs + pDdiTable->pfnGetSyncPointProfilingInfoExp = + ur_loader::urEventGetSyncPointProfilingInfoExp; + } else { + // return pointers directly to platform's DDIs + *pDdiTable = + ur_loader::context->platforms.front().dditable.ur.EventExp; + } + } + + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Exported function for filling application's Kernel table /// with current process' addresses diff --git a/source/loader/ur_libapi.cpp b/source/loader/ur_libapi.cpp index 0a69fcd1e2..bc7cbf7ab8 100644 --- a/source/loader/ur_libapi.cpp +++ b/source/loader/ur_libapi.cpp @@ -7836,6 +7836,54 @@ ur_result_t UR_APICALL urCommandBufferEnqueueExp( return exceptionToResult(std::current_exception()); } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Get profiling information for the sync point execution associated with +/// an event object +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hEvent` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `::UR_PROFILING_INFO_COMMAND_COMPLETE < propName` +/// - ::UR_RESULT_ERROR_PROFILING_INFO_NOT_AVAILABLE +/// + If `hEvent`s associated queue was not created with `UR_QUEUE_FLAG_PROFILING_ENABLE`. +/// - ::UR_RESULT_ERROR_INVALID_VALUE +/// + `pPropValue && propSize == 0` +/// - ::UR_RESULT_ERROR_INVALID_EVENT +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP - "If `syncPoint` is not referencing a command enqueued in this CommandBuffer. Typically, `syncPoint` > number of commands" +ur_result_t UR_APICALL urEventGetSyncPointProfilingInfoExp( + ur_event_handle_t hEvent, ///< [in] handle of the event object + ur_exp_command_buffer_sync_point_t + syncPoint, ///< [in] Sync point referencing the node (i.e. command) from which we want + ///< to get profile information + ur_profiling_info_t + propName, ///< [in] the name of the profiling property to query + size_t propSize, ///< [in] size in bytes of the profiling property value + void * + pPropValue, ///< [out][optional][typename(propName, propSize)] value of the profiling + ///< property + size_t * + pPropSizeRet ///< [out][optional] pointer to the actual size in bytes returned in + ///< propValue + ) try { + auto pfnGetSyncPointProfilingInfoExp = + ur_lib::context->urDdiTable.EventExp.pfnGetSyncPointProfilingInfoExp; + if (nullptr == pfnGetSyncPointProfilingInfoExp) { + return UR_RESULT_ERROR_UNINITIALIZED; + } + + return pfnGetSyncPointProfilingInfoExp(hEvent, syncPoint, propName, + propSize, pPropValue, pPropSizeRet); +} catch (...) { + return exceptionToResult(std::current_exception()); +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Enqueue a command to execute a cooperative kernel /// diff --git a/source/loader/ur_libddi.cpp b/source/loader/ur_libddi.cpp index bf28e09a71..ea471d3ed7 100644 --- a/source/loader/ur_libddi.cpp +++ b/source/loader/ur_libddi.cpp @@ -55,6 +55,11 @@ __urdlllocal ur_result_t context_t::urLoaderInit() { urGetEventProcAddrTable(UR_API_VERSION_CURRENT, &urDdiTable.Event); } + if (UR_RESULT_SUCCESS == result) { + result = urGetEventExpProcAddrTable(UR_API_VERSION_CURRENT, + &urDdiTable.EventExp); + } + if (UR_RESULT_SUCCESS == result) { result = urGetKernelProcAddrTable(UR_API_VERSION_CURRENT, &urDdiTable.Kernel); diff --git a/source/loader/ur_print.cpp b/source/loader/ur_print.cpp index 87660ee2cc..af8a53abf5 100644 --- a/source/loader/ur_print.cpp +++ b/source/loader/ur_print.cpp @@ -1616,6 +1616,14 @@ ur_result_t urPrintEventSetCallbackParams( return str_copy(&ss, buffer, buff_size, out_size); } +ur_result_t urPrintEventGetSyncPointProfilingInfoExpParams( + const struct ur_event_get_sync_point_profiling_info_exp_params_t *params, + char *buffer, const size_t buff_size, size_t *out_size) { + std::stringstream ss; + ss << params; + return str_copy(&ss, buffer, buff_size, out_size); +} + ur_result_t urPrintKernelCreateParams(const struct ur_kernel_create_params_t *params, char *buffer, const size_t buff_size, diff --git a/source/ur_api.cpp b/source/ur_api.cpp index 2bcc229f29..59cecbb144 100644 --- a/source/ur_api.cpp +++ b/source/ur_api.cpp @@ -6621,6 +6621,46 @@ ur_result_t UR_APICALL urCommandBufferEnqueueExp( return result; } +/////////////////////////////////////////////////////////////////////////////// +/// @brief Get profiling information for the sync point execution associated with +/// an event object +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_UNINITIALIZED +/// - ::UR_RESULT_ERROR_DEVICE_LOST +/// - ::UR_RESULT_ERROR_ADAPTER_SPECIFIC +/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE +/// + `NULL == hEvent` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `::UR_PROFILING_INFO_COMMAND_COMPLETE < propName` +/// - ::UR_RESULT_ERROR_PROFILING_INFO_NOT_AVAILABLE +/// + If `hEvent`s associated queue was not created with `UR_QUEUE_FLAG_PROFILING_ENABLE`. +/// - ::UR_RESULT_ERROR_INVALID_VALUE +/// + `pPropValue && propSize == 0` +/// - ::UR_RESULT_ERROR_INVALID_EVENT +/// - ::UR_RESULT_ERROR_OUT_OF_RESOURCES +/// - ::UR_RESULT_ERROR_OUT_OF_HOST_MEMORY +/// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP - "If `syncPoint` is not referencing a command enqueued in this CommandBuffer. Typically, `syncPoint` > number of commands" +ur_result_t UR_APICALL urEventGetSyncPointProfilingInfoExp( + ur_event_handle_t hEvent, ///< [in] handle of the event object + ur_exp_command_buffer_sync_point_t + syncPoint, ///< [in] Sync point referencing the node (i.e. command) from which we want + ///< to get profile information + ur_profiling_info_t + propName, ///< [in] the name of the profiling property to query + size_t propSize, ///< [in] size in bytes of the profiling property value + void * + pPropValue, ///< [out][optional][typename(propName, propSize)] value of the profiling + ///< property + size_t * + pPropSizeRet ///< [out][optional] pointer to the actual size in bytes returned in + ///< propValue +) { + ur_result_t result = UR_RESULT_SUCCESS; + return result; +} + /////////////////////////////////////////////////////////////////////////////// /// @brief Enqueue a command to execute a cooperative kernel ///