From 666b9961085de3f4bcfac6a1b412a0965525c8d7 Mon Sep 17 00:00:00 2001 From: Laurent Morichetti Date: Mon, 15 Aug 2022 12:00:42 -0700 Subject: [PATCH] SWDEV-351980 - Move activity_ to the ProfilingInfo The activity_ is only instantiated if profiling is enabled. Remove the HIP private global record ID. Instead, use the correlation ID stored in the hip_api_data_t by the profiler while the last HIP function is in scope. For NDRange and Copy commands, store the kernel name and byte size (respectively) in the record. General cleanups to improve the code's readability. (cherry picked from commit 3e2681b1d5a32f1e9e24805aed3606dd1fe774a9) SWDEV-351980 - Enable profiling for commands reporting activities Profiling should be enabled for any command reporting activities as the activity record captures the profilingInfo's start and end timestamps. Since IS_PROFILER_ON is only used to determine whether API tracing is enabled, there is no need to expose it globally, it should be a property of the activity_prof::CallbacksTable. (cherry picked from commit 6853a1164412ab999e1494ac6bb0b73f505edda1) SWDEV-351980 - Consolidate registration tables in the roctracer library Remove the activity_prof::CallbacksTable. The table was redundant with the information already stored in the roctracer library. Instead use a single callback into the roctracer library to query whether the activity is enabled, and to report it. (cherry picked from commit b73e1751b8cdf3c409014733ab2eb67a09ad03a1) Change-Id: If452a42113547fae4426eabe692b677e6fc415d5 --- device/device.hpp | 2 - device/rocm/rocvirtual.cpp | 3 +- device/rocm/rocvirtual.hpp | 5 - platform/activity.cpp | 169 ++++++++++++++++++++++++-------- platform/activity.hpp | 193 +++++++++---------------------------- platform/command.cpp | 13 +-- platform/command.hpp | 7 +- platform/prof_protocol.h | 15 +-- utils/flags.cpp | 1 - utils/flags.hpp | 1 - 10 files changed, 197 insertions(+), 212 deletions(-) diff --git a/device/device.hpp b/device/device.hpp index 72d9c8ce7..a3da76b15 100644 --- a/device/device.hpp +++ b/device/device.hpp @@ -1246,8 +1246,6 @@ class VirtualDevice : public amd::HeapObject { virtual void submitStreamOperation(amd::StreamOperationCommand& cmd) { ShouldNotReachHere(); } virtual void submitVirtualMap(amd::VirtualMapCommand& cmd) { ShouldNotReachHere(); } - virtual void profilerAttach(bool enable) = 0; - virtual address allocKernelArguments(size_t size, size_t alignment) { return nullptr; } //! Get the blit manager object diff --git a/device/rocm/rocvirtual.cpp b/device/rocm/rocvirtual.cpp index 8c2af93be..570d97fb5 100644 --- a/device/rocm/rocvirtual.cpp +++ b/device/rocm/rocvirtual.cpp @@ -25,6 +25,7 @@ #include "device/rocm/rocmemory.hpp" #include "device/rocm/rocblit.hpp" #include "device/rocm/roccounters.hpp" +#include "platform/activity.hpp" #include "platform/kernel.hpp" #include "platform/context.hpp" #include "platform/command.hpp" @@ -167,7 +168,7 @@ bool HsaAmdSignalHandler(hsa_signal_value_t value, void* arg) { return false; } - if (ts->gpu()->isProfilerAttached()) { + if (activity_prof::IsEnabled(OP_ID_DISPATCH)) { amd::Command* head = ts->getParsedCommand(); if (head == nullptr) { head = ts->command().GetBatchHead(); diff --git a/device/rocm/rocvirtual.hpp b/device/rocm/rocvirtual.hpp index 307d9150e..7e9c34544 100644 --- a/device/rocm/rocvirtual.hpp +++ b/device/rocm/rocvirtual.hpp @@ -396,10 +396,6 @@ class VirtualGPU : public device::VirtualDevice { Timestamp* timestamp() const { return timestamp_; } - void profilerAttach(bool enable = false) { profilerAttached_ = enable; } - - bool isProfilerAttached() const { return profilerAttached_; } - //! Indicates the status of the callback handler. The callback would process the commands //! and would collect profiling data, update refcounts bool isHandlerPending() const { return barriers_.IsHandlerPending(); } @@ -483,7 +479,6 @@ class VirtualGPU : public device::VirtualDevice { uint32_t cooperative_ : 1; //!< Cooperative launch is enabled uint32_t addSystemScope_ : 1; //!< Insert a system scope to the next aql uint32_t tracking_created_ : 1; //!< Enabled if tracking object was properly initialized - uint32_t profilerAttached_ : 1; //!< Indicates if profiler is attached uint32_t retainExternalSignals_ : 1; //!< Indicate to retain external signal array }; uint32_t state_; diff --git a/platform/activity.cpp b/platform/activity.cpp index 2e19ef273..2242ee276 100644 --- a/platform/activity.cpp +++ b/platform/activity.cpp @@ -19,47 +19,134 @@ THE SOFTWARE. */ #include "platform/activity.hpp" +#include "platform/command.hpp" +#include "platform/commandqueue.hpp" -ACTIVITY_PROF_INSTANCES(); - -#define CASE_STRING(X, C) case X: case_string = #C ;break; - -const char* getOclCommandKindString(uint32_t op) { - const char* case_string; - - switch(static_cast(op)) { - CASE_STRING(0, InternalMarker) - CASE_STRING(CL_COMMAND_MARKER, Marker) - CASE_STRING(CL_COMMAND_NDRANGE_KERNEL, KernelExecution) - CASE_STRING(CL_COMMAND_READ_BUFFER, CopyDeviceToHost) - CASE_STRING(CL_COMMAND_WRITE_BUFFER, CopyHostToDevice) - CASE_STRING(CL_COMMAND_COPY_BUFFER, CopyDeviceToDevice) - CASE_STRING(CL_COMMAND_READ_BUFFER_RECT, CopyDeviceToHost2D) - CASE_STRING(CL_COMMAND_WRITE_BUFFER_RECT, CopyHostToDevice2D) - CASE_STRING(CL_COMMAND_COPY_BUFFER_RECT, CopyDeviceToDevice2D) - CASE_STRING(CL_COMMAND_FILL_BUFFER, FillBuffer) - CASE_STRING(CL_COMMAND_TASK, Task) - CASE_STRING(CL_COMMAND_NATIVE_KERNEL, NativeKernel) - CASE_STRING(CL_COMMAND_READ_IMAGE, ReadImage) - CASE_STRING(CL_COMMAND_WRITE_IMAGE, WriteImage) - CASE_STRING(CL_COMMAND_COPY_IMAGE, CopyImage) - CASE_STRING(CL_COMMAND_COPY_IMAGE_TO_BUFFER, CopyImageToBuffer) - CASE_STRING(CL_COMMAND_COPY_BUFFER_TO_IMAGE, CopyBufferToImage) - CASE_STRING(CL_COMMAND_MAP_BUFFER, MapBuffer) - CASE_STRING(CL_COMMAND_MAP_IMAGE, MapImage) - CASE_STRING(CL_COMMAND_UNMAP_MEM_OBJECT, UnmapMemObject) - CASE_STRING(CL_COMMAND_ACQUIRE_GL_OBJECTS, AcquireGLObjects) - CASE_STRING(CL_COMMAND_RELEASE_GL_OBJECTS, ReleaseGLObjects) - CASE_STRING(CL_COMMAND_USER, User) - CASE_STRING(CL_COMMAND_BARRIER, Barrier) - CASE_STRING(CL_COMMAND_MIGRATE_MEM_OBJECTS, MigrateMemObjects) - CASE_STRING(CL_COMMAND_FILL_IMAGE, FillImage) - CASE_STRING(CL_COMMAND_SVM_FREE, SvmFree) - CASE_STRING(CL_COMMAND_SVM_MEMCPY, SvmMemcpy) - CASE_STRING(CL_COMMAND_SVM_MEMFILL, SvmMemFill) - CASE_STRING(CL_COMMAND_SVM_MAP, SvmMap) - CASE_STRING(CL_COMMAND_SVM_UNMAP, SvmUnmap) - default: case_string = "Unknown command type"; +#include + +namespace activity_prof { + +decltype(report_activity) report_activity{nullptr}; + +#if USE_PROF_API + +#if defined(__linux__) +__thread activity_correlation_id_t correlation_id __attribute__((tls_model("initial-exec"))) = 0; +#elif defined(_WIN32) +__declspec(thread) activity_correlation_id_t correlation_id = 0; +#endif // defined(_WIN32) + +static inline size_t linearSize(const amd::Coord3D& size3d) { + size_t size = size3d[0]; + if (size3d[1] != 0) size *= size3d[1]; + if (size3d[2] != 0) size *= size3d[2]; + return size; +} + +bool IsEnabled(OpId operation_id) { + if (operation_id < OP_ID_NUMBER) + if (auto report = report_activity.load(std::memory_order_relaxed)) + return report(ACTIVITY_DOMAIN_HIP_OPS, operation_id, nullptr) == 0; + return false; +} + +void ReportActivity(const amd::Command& command) { + assert(command.profilingInfo().enabled_ && "profiling must be enabled for this command"); + auto operation_id = OperationId(command.type()); + if (operation_id >= OP_ID_NUMBER) + // This command does not translate into a profiler activity (dispatch, memcopy, etc...), there + // is nothing to report to the profiler. + return; + + auto function = report_activity.load(std::memory_order_relaxed); + if (!function) return; + + const auto* queue = command.queue(); + assert(queue != nullptr); + + activity_record_t record{ + ACTIVITY_DOMAIN_HIP_OPS, // activity domain + command.type(), // activity kind + operation_id, // operation id + command.profilingInfo().correlation_id_, // activity correlation id + command.profilingInfo().start_, // begin timestamp, ns + command.profilingInfo().end_, // end timestamp, ns + {{ + static_cast(queue->device().index()), // device id + queue->vdev()->index() // queue id + }}, + {} // copied data size for memcpy, or kernel name for dispatch + }; + + switch (command.type()) { + case CL_COMMAND_NDRANGE_KERNEL: + record.kernel_name = + static_cast(command).kernel().name().c_str(); + break; + case CL_COMMAND_READ_BUFFER: + case CL_COMMAND_READ_BUFFER_RECT: + record.bytes = linearSize(static_cast(command).size()); + break; + case CL_COMMAND_WRITE_BUFFER: + case CL_COMMAND_WRITE_BUFFER_RECT: + record.bytes = linearSize(static_cast(command).size()); + break; + case CL_COMMAND_COPY_BUFFER: + case CL_COMMAND_COPY_BUFFER_RECT: + record.bytes = linearSize(static_cast(command).size()); + break; + default: + break; + } + + function(ACTIVITY_DOMAIN_HIP_OPS, operation_id, &record); +} + +#endif // USE_PROF_API + +} // namespace activity_prof + +#define CASE_STRING(X, C) \ + case X: \ + return #C + +const char* getOclCommandKindString(cl_command_type commandType) { + switch (commandType) { + CASE_STRING(0, InternalMarker); + CASE_STRING(CL_COMMAND_MARKER, Marker); + CASE_STRING(CL_COMMAND_NDRANGE_KERNEL, KernelExecution); + CASE_STRING(CL_COMMAND_READ_BUFFER, CopyDeviceToHost); + CASE_STRING(CL_COMMAND_WRITE_BUFFER, CopyHostToDevice); + CASE_STRING(CL_COMMAND_COPY_BUFFER, CopyDeviceToDevice); + CASE_STRING(CL_COMMAND_READ_BUFFER_RECT, CopyDeviceToHost2D); + CASE_STRING(CL_COMMAND_WRITE_BUFFER_RECT, CopyHostToDevice2D); + CASE_STRING(CL_COMMAND_COPY_BUFFER_RECT, CopyDeviceToDevice2D); + CASE_STRING(CL_COMMAND_FILL_BUFFER, FillBuffer); + CASE_STRING(CL_COMMAND_TASK, Task); + CASE_STRING(CL_COMMAND_NATIVE_KERNEL, NativeKernel); + CASE_STRING(CL_COMMAND_READ_IMAGE, ReadImage); + CASE_STRING(CL_COMMAND_WRITE_IMAGE, WriteImage); + CASE_STRING(CL_COMMAND_COPY_IMAGE, CopyImage); + CASE_STRING(CL_COMMAND_COPY_IMAGE_TO_BUFFER, CopyImageToBuffer); + CASE_STRING(CL_COMMAND_COPY_BUFFER_TO_IMAGE, CopyBufferToImage); + CASE_STRING(CL_COMMAND_MAP_BUFFER, MapBuffer); + CASE_STRING(CL_COMMAND_MAP_IMAGE, MapImage); + CASE_STRING(CL_COMMAND_UNMAP_MEM_OBJECT, UnmapMemObject); + CASE_STRING(CL_COMMAND_ACQUIRE_GL_OBJECTS, AcquireGLObjects); + CASE_STRING(CL_COMMAND_RELEASE_GL_OBJECTS, ReleaseGLObjects); + CASE_STRING(CL_COMMAND_USER, User); + CASE_STRING(CL_COMMAND_BARRIER, Barrier); + CASE_STRING(CL_COMMAND_MIGRATE_MEM_OBJECTS, MigrateMemObjects); + CASE_STRING(CL_COMMAND_FILL_IMAGE, FillImage); + CASE_STRING(CL_COMMAND_SVM_FREE, SvmFree); + CASE_STRING(CL_COMMAND_SVM_MEMCPY, SvmMemcpy); + CASE_STRING(CL_COMMAND_SVM_MEMFILL, SvmMemFill); + CASE_STRING(CL_COMMAND_SVM_MAP, SvmMap); + CASE_STRING(CL_COMMAND_SVM_UNMAP, SvmUnmap); + CASE_STRING(ROCCLR_COMMAND_STREAM_WAIT_VALUE, StreamWait); + CASE_STRING(ROCCLR_COMMAND_STREAM_WRITE_VALUE, StreamWrite); + default: + break; }; - return case_string; + return "Unknown command kind"; }; diff --git a/platform/activity.hpp b/platform/activity.hpp index bcda9e93e..e6256807a 100644 --- a/platform/activity.hpp +++ b/platform/activity.hpp @@ -20,170 +20,71 @@ #pragma once -#include "thread/monitor.hpp" +#include "top.hpp" #include +#include #include -#include +#include +#include + +namespace amd { +class Command; +} // namespace amd #define USE_PROF_API 1 #if USE_PROF_API + enum OpId { OP_ID_DISPATCH = 0, OP_ID_COPY = 1, OP_ID_BARRIER = 2, OP_ID_NUMBER = 3 }; #include "prof_protocol.h" -// Statically allocated table of callbacks and global unique ID of each operation -#define ACTIVITY_PROF_INSTANCES() \ - namespace activity_prof { \ - CallbacksTable::table_t CallbacksTable::table_{}; \ - std::atomic ActivityProf::globe_record_id_(0); \ - } // activity_prof - namespace activity_prof { -typedef activity_correlation_id_t record_id_t; -typedef activity_op_t op_id_t; -typedef uint32_t command_id_t; - -typedef activity_id_callback_t id_callback_fun_t; -typedef activity_async_callback_t callback_fun_t; -typedef void* callback_arg_t; - -// Activity callbacks table -class CallbacksTable { - public: - struct table_t { - id_callback_fun_t id_callback; - callback_fun_t op_callback; - callback_arg_t arg; - std::atomic enabled[OP_ID_NUMBER]; - }; - - // Initialize record id callback and activity callback - static void init(const id_callback_fun_t& id_callback, const callback_fun_t& op_callback, - const callback_arg_t& arg) { - table_.id_callback = id_callback; - table_.op_callback = op_callback; - table_.arg = arg; - } - - static bool SetEnabled(const op_id_t& op_id, const bool& enable) { - bool ret = true; - if (op_id < OP_ID_NUMBER) { - table_.enabled[op_id].store(enable, std::memory_order_release); - } else { - ret = false; - } - return ret; - } - - static bool IsEnabled(const op_id_t& op_id) { - return table_.enabled[op_id].load(std::memory_order_acquire); - } - static id_callback_fun_t get_id_callback() { return table_.id_callback; } - static callback_fun_t get_op_callback() { return table_.op_callback; } - static callback_arg_t get_arg() { return table_.arg; } - - private: - static table_t table_; -}; - -// Activity profile class -class ActivityProf { - public: - // Domain ID - static constexpr int ACTIVITY_DOMAIN_ID = ACTIVITY_DOMAIN_HIP_VDI; - - ActivityProf() : command_id_(0), queue_id_(0), device_id_(0), record_id_(0), enabled_(false) {} - - // Initialization - void Initialize(const command_id_t command_id, const uint32_t queue_id, - const uint32_t device_id) { - activity_op_t op_id = (command_id == CL_COMMAND_NDRANGE_KERNEL) ? OP_ID_DISPATCH : OP_ID_COPY; - enabled_ = CallbacksTable::IsEnabled(op_id); - if (IsEnabled()) { - command_id_ = command_id; - queue_id_ = queue_id; - device_id_ = device_id; - record_id_ = globe_record_id_.fetch_add(1, std::memory_order_relaxed); - (CallbacksTable::get_id_callback())(record_id_); - } +extern std::atomic + report_activity; + +#if defined(__linux__) +extern __thread activity_correlation_id_t correlation_id __attribute__((tls_model("initial-exec"))); +#elif defined(_WIN32) +extern __declspec(thread) activity_correlation_id_t correlation_id; +#endif // defined(_WIN32) + +constexpr OpId OperationId(cl_command_type commandType) { + switch (commandType) { + case CL_COMMAND_NDRANGE_KERNEL: + return OP_ID_DISPATCH; + case CL_COMMAND_READ_BUFFER: + case CL_COMMAND_READ_BUFFER_RECT: + case CL_COMMAND_WRITE_BUFFER: + case CL_COMMAND_WRITE_BUFFER_RECT: + case CL_COMMAND_COPY_BUFFER: + case CL_COMMAND_COPY_BUFFER_RECT: + case CL_COMMAND_FILL_BUFFER: + case CL_COMMAND_READ_IMAGE: + case CL_COMMAND_WRITE_IMAGE: + case CL_COMMAND_COPY_IMAGE: + case CL_COMMAND_FILL_IMAGE: + case CL_COMMAND_COPY_BUFFER_TO_IMAGE: + case CL_COMMAND_COPY_IMAGE_TO_BUFFER: + return OP_ID_COPY; + case CL_COMMAND_MARKER: + return OP_ID_BARRIER; + default: + return OP_ID_NUMBER; } +} - template inline void ReportEventTimestamps(T& obj, const size_t bytes = 0) { - if (IsEnabled()) { - uint64_t start = obj.profilingInfo().start_; - uint64_t end = obj.profilingInfo().end_; - callback(obj.type(), start, end, bytes); - } - } - - bool IsEnabled() const { return enabled_; } - - private: - // Activity callback routine - void callback(const command_id_t command_id, - const uint64_t begin_ts, const uint64_t end_ts, const size_t bytes) { - activity_op_t op_id = (command_id == CL_COMMAND_NDRANGE_KERNEL) ? OP_ID_DISPATCH : OP_ID_COPY; - activity_record_t record { - ACTIVITY_DOMAIN_ID, // domain id - (activity_kind_t)command_id, // activity kind - op_id, // operation id - record_id_, // activity correlation id - begin_ts, // begin timestamp, ns - end_ts, // end timestamp, ns - { - { - static_cast(device_id_), // device id - queue_id_ // queue id - } - }, - bytes // copied data size, for memcpy - }; - (CallbacksTable::get_op_callback())(op_id, &record, CallbacksTable::get_arg()); - } - - command_id_t command_id_; //!< Command ID, executed on the queue - uint32_t queue_id_; //!< Queue ID, associated with this command - uint32_t device_id_; //!< Device ID, associated with this command - record_id_t record_id_; //!< Uniqueue execution ID(counter) of this command - bool enabled_; //!< Activity profiling is enabled - - // Global record ID - static std::atomic globe_record_id_; //!< GLobal counter of all executed commands -}; +bool IsEnabled(OpId operation_id); +void ReportActivity(const amd::Command& command); } // namespace activity_prof -#else -#define ACTIVITY_PROF_INSTANCES() - -namespace activity_prof { -typedef uint32_t op_id_t; -typedef uint32_t command_id_t; - -typedef void* id_callback_fun_t; -typedef void* callback_fun_t; -typedef void* callback_arg_t; - -struct CallbacksTable { - static void init(const id_callback_fun_t& id_callback, const callback_fun_t& op_callback, - const callback_arg_t& arg) {} - static bool SetEnabled(const op_id_t& op_id, const bool& enable) { return false; } -}; - -class ActivityProf { - public: - ActivityProf() {} - inline void Initialize(const command_id_t command_id, const uint32_t queue_id, - const uint32_t device_id) {} - template inline void ReportEventTimestamps(T& obj, const size_t bytes = 0) {} - inline bool IsEnabled() { return false; } -}; +#else // !USE_PROF_API -} // namespace activity_prof +static inline void ReportActivity(const amd::Command& command) {} -#endif +#endif // !USE_PROF_API -const char* getOclCommandKindString(uint32_t op); +const char* getOclCommandKindString(cl_command_type kind); diff --git a/platform/command.cpp b/platform/command.cpp index 2a6729d33..26336b318 100644 --- a/platform/command.cpp +++ b/platform/command.cpp @@ -26,6 +26,7 @@ * \date October 2008 */ +#include "platform/activity.hpp" #include "platform/command.hpp" #include "platform/commandqueue.hpp" #include "device/device.hpp" @@ -43,14 +44,13 @@ namespace amd { // ================================================================================================ -Event::Event(HostQueue& queue) +Event::Event(HostQueue& queue, bool profilingEnabled) : callbacks_(NULL), status_(CL_INT_MAX), hw_event_(nullptr), notify_event_(nullptr), device_(&queue.device()), - profilingInfo_(IS_PROFILER_ON || queue.properties().test(CL_QUEUE_PROFILING_ENABLE) || - Agent::shouldPostEventEvents()), + profilingInfo_(profilingEnabled), event_scope_(Device::kCacheStateInvalid) { notified_.clear(); } @@ -162,7 +162,8 @@ bool Event::setStatus(int32_t status, uint64_t timeStamp) { releaseResources(); } - activity_.ReportEventTimestamps(command()); + if (profilingInfo().enabled_) activity_prof::ReportActivity(command()); + // Broadcast all the waiters. if (referenceCount() > 1) { signal(); @@ -311,7 +312,8 @@ const Event::EventWaitList Event::nullWaitList(0); // ================================================================================================ Command::Command(HostQueue& queue, cl_command_type type, const EventWaitList& eventWaitList, uint32_t commandWaitBits, const Event* waitingEvent) - : Event(queue), + : Event(queue, activity_prof::IsEnabled(activity_prof::OperationId(type)) || + queue.properties().test(CL_QUEUE_PROFILING_ENABLE) || Agent::shouldPostEventEvents()), queue_(&queue), next_(nullptr), type_(type), @@ -323,7 +325,6 @@ Command::Command(HostQueue& queue, cl_command_type type, for (const auto &event: eventWaitList) { event->retain(); } - if (type != 0) activity_.Initialize(type, queue.vdev()->index(), queue.device().index()); } // ================================================================================================ diff --git a/platform/command.hpp b/platform/command.hpp index 72888a750..651955735 100644 --- a/platform/command.hpp +++ b/platform/command.hpp @@ -108,6 +108,7 @@ class Event : public RuntimeObject { if (enabled) { clear(); callback_ = nullptr; + correlation_id_ = activity_prof::correlation_id; } } @@ -118,6 +119,7 @@ class Event : public RuntimeObject { bool enabled_; //!< Profiling enabled for the wave limiter uint32_t waves_; //!< The number of waves used in a dispatch ProfilingCallback* callback_; + uint64_t correlation_id_; bool marker_ts_; //!< TS marker void clear() { @@ -137,13 +139,11 @@ class Event : public RuntimeObject { } } profilingInfo_; - activity_prof::ActivityProf activity_; //!< Activity profiling - //! Construct a new event. Event(); //! Construct a new event associated to the given command \a queue. - Event(HostQueue& queue); + Event(HostQueue& queue, bool profilingEnabled = false); //! Destroy the event. virtual ~Event(); @@ -164,6 +164,7 @@ class Event : public RuntimeObject { profilingInfo_.enabled_ = true; profilingInfo_.clear(); profilingInfo_.callback_ = nullptr; + profilingInfo_.correlation_id_ = activity_prof::correlation_id; } public: diff --git a/platform/prof_protocol.h b/platform/prof_protocol.h index ae23fa5ab..5471ab064 100644 --- a/platform/prof_protocol.h +++ b/platform/prof_protocol.h @@ -27,9 +27,10 @@ typedef enum { ACTIVITY_DOMAIN_HSA_API = 0, // HSA API domain ACTIVITY_DOMAIN_HSA_OPS = 1, // HSA async activity domain - ACTIVITY_DOMAIN_HCC_OPS = 2, // HCC async activity domain + ACTIVITY_DOMAIN_HIP_OPS = 2, // HIP async activity domain + ACTIVITY_DOMAIN_HCC_OPS = ACTIVITY_DOMAIN_HIP_OPS, // HCC async activity domain + ACTIVITY_DOMAIN_HIP_VDI = ACTIVITY_DOMAIN_HIP_OPS, // HIP VDI domain ACTIVITY_DOMAIN_HIP_API = 3, // HIP API domain - ACTIVITY_DOMAIN_HIP_VDI = ACTIVITY_DOMAIN_HCC_OPS, // HIP VDI domain ACTIVITY_DOMAIN_KFD_API = 4, // KFD API domain ACTIVITY_DOMAIN_EXT_API = 5, // External ID domain ACTIVITY_DOMAIN_ROCTX = 6, // ROCTX domain @@ -42,7 +43,7 @@ typedef enum { ACTIVITY_EXT_OP_EXTERN_ID = 1 } activity_ext_op_t; -// API calback type +// API callback type typedef void (*activity_rtapi_callback_t)(uint32_t domain, uint32_t cid, const void* data, void* arg); typedef uint32_t activity_kind_t; typedef uint32_t activity_op_t; @@ -78,13 +79,15 @@ struct activity_record_t { activity_correlation_id_t external_id; // external correlatino id }; }; + union { size_t bytes; // data size bytes + const char* kernel_name; + }; }; // Activity sync calback type -typedef void* (*activity_sync_callback_t)(uint32_t cid, activity_record_t* record, const void* data, void* arg); +typedef void (*activity_sync_callback_t)(uint32_t cid, activity_record_t* record, const void* data, void* arg); // Activity async calback type -typedef void (*activity_id_callback_t)(activity_correlation_id_t id); -typedef void (*activity_async_callback_t)(uint32_t op, void* record, void* arg); +typedef void (*activity_async_callback_t)(uint32_t op, activity_record_t* record, void* arg); #endif // INC_EXT_PROF_PROTOCOL_H_ diff --git a/utils/flags.cpp b/utils/flags.cpp index 616743d53..d97182f13 100644 --- a/utils/flags.cpp +++ b/utils/flags.cpp @@ -82,7 +82,6 @@ namespace amd { #endif // __APPLE__ bool IS_HIP = false; -std::atomic_bool IS_PROFILER_ON(false); #if !defined(_WIN32) && defined(WITH_PAL_DEVICE) bool IS_LEGACY = true; diff --git a/utils/flags.hpp b/utils/flags.hpp index a1927eb0b..94ee1957b 100644 --- a/utils/flags.hpp +++ b/utils/flags.hpp @@ -280,7 +280,6 @@ release(bool, GPU_STREAMOPS_CP_WAIT, false, \ namespace amd { extern bool IS_HIP; -extern std::atomic_bool IS_PROFILER_ON; extern bool IS_LEGACY;