Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor workload tracking and message generation #72

Merged
merged 11 commits into from
Feb 7, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,6 @@
[submodule "source_third_party/gtest"]
path = source_third_party/gtest
url = https://github.com/google/googletest
[submodule "source_third_party/protopuf"]
path = source_third_party/protopuf
url = https://github.com/PragmaTwice/protopuf.git
5 changes: 4 additions & 1 deletion .mypy.ini
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
[mypy]
exclude = lglpy/timeline/protos/.*\.py
exclude = lglpy/timeline/protos/
ignore_missing_imports = True
disable_error_code = annotation-unchecked

[mypy-lglpy.timeline.data.raw_trace]
disable_error_code = attr-defined

[mypy-lglpy.comms.service_gpu_timeline]
disable_error_code = attr-defined

[mypy-google.*]
ignore_missing_imports = True
2 changes: 1 addition & 1 deletion .pycodestyle.ini
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
[pycodestyle]
exclude = lglpy/timeline/protos
ignore = E402,E126,E127
ignore = E402,E126,E127,W503
max-line-length = 80
19 changes: 19 additions & 0 deletions docs/updating_protobuf_files.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Updating the generated protobuf (de)serialization code

This project uses protobufs for (de)serialization of certain data:

* In the raw GPU timeline messages sent from `layer_gpu_timeline` to the host.
* In the Perfetto data collected from the device.

Python decoders for those protocols are pre-generated and stored in the sources
under `lglpy/timeline/protos`.

To regenerate or update the timeline protocol files use:

protoc -I layer_gpu_timeline/ \
--python_out=lglpy/timeline/protos/layer_driver/ \
layer_gpu_timeline/timeline.proto

- - -

_Copyright © 2025, Arm Limited and contributors._
4 changes: 4 additions & 0 deletions layer_gpu_timeline/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,10 @@ set(LGL_CONFIG_LOG 1)
include(../source_common/compiler_helper.cmake)
include(../cmake/clang-tools.cmake)

# TPIP
set(BUILD_TESTS OFF)
add_subdirectory(../source_third_party/protopuf "source_third_party/protopuf")

# Build steps
add_subdirectory(../source_common/comms source_common/comms)
add_subdirectory(../source_common/framework source_common/framework)
Expand Down
9 changes: 6 additions & 3 deletions layer_gpu_timeline/source/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,8 @@ add_library(
layer_device_functions_render_pass.cpp
layer_device_functions_trace_rays.cpp
layer_device_functions_transfer.cpp
timeline_comms.cpp)
timeline_comms.cpp
timeline_protobuf_encoder.cpp)

target_include_directories(
${VK_LAYER} PRIVATE
Expand All @@ -64,7 +65,8 @@ target_include_directories(
target_include_directories(
${VK_LAYER} SYSTEM PRIVATE
../../source_third_party/
../../source_third_party/khronos/vulkan/include/)
../../source_third_party/khronos/vulkan/include/
../../source_third_party/protopuf/include/)

lgl_set_build_options(${VK_LAYER})

Expand All @@ -73,7 +75,8 @@ target_link_libraries(
lib_layer_comms
lib_layer_framework
lib_layer_trackers
$<$<PLATFORM_ID:Android>:log>)
$<$<PLATFORM_ID:Android>:log>
protopuf)

if (CMAKE_BUILD_TYPE STREQUAL "Release")
add_custom_command(
Expand Down
19 changes: 2 additions & 17 deletions layer_gpu_timeline/source/device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,18 +28,13 @@
#include "comms/comms_module.hpp"
#include "framework/utils.hpp"
#include "instance.hpp"
#include "timeline_protobuf_encoder.hpp"

#include <array>
#include <fstream>
#include <iostream>
#include <vector>

#include <nlohmann/json.hpp>
#include <sys/stat.h>
#include <unistd.h>

using json = nlohmann::json;

/**
* @brief The dispatch lookup for all of the created Vulkan devices.
*/
Expand Down Expand Up @@ -125,15 +120,5 @@ Device::Device(Instance* _instance,

pid_t processPID = getpid();

json deviceMetadata {
{"type", "device"},
{"pid", static_cast<uint32_t>(processPID)},
{"device", reinterpret_cast<uintptr_t>(device)},
{"deviceName", name},
{"driverMajor", major},
{"driverMinor", minor},
{"driverPatch", patch},
};

commsWrapper->txMessage(deviceMetadata.dump());
TimelineProtobufEncoder::emitMetadata(*this, processPID, major, minor, patch, std::move(name));
}
11 changes: 2 additions & 9 deletions layer_gpu_timeline/source/device.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -131,18 +131,11 @@ class Device
~Device() = default;

/**
* @brief Callback for sending messages on frame boundary.
* @brief Callback for sending some message for the device.
*
* @param message The message to send.
*/
void onFrame(const std::string& message) { commsWrapper->txMessage(message); }

/**
* @brief Callback for sending messages on workload submit to a queue.
*
* @param message The message to send.
*/
void onWorkloadSubmit(const std::string& message) { commsWrapper->txMessage(message); }
void txMessage(Comms::MessageData&& message) { commsWrapper->txMessage(std::move(message)); }

/**
* @brief Get the cumulative stats for this device.
Expand Down
66 changes: 22 additions & 44 deletions layer_gpu_timeline/source/layer_device_functions_queue.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,17 +25,13 @@

#include "device.hpp"
#include "framework/device_dispatch_table.hpp"
#include "utils/misc.hpp"
#include "timeline_protobuf_encoder.hpp"
#include "trackers/queue.hpp"

#include <mutex>

#include <nlohmann/json.hpp>
#include <time.h>

using json = nlohmann::json;

using namespace std::placeholders;

extern std::mutex g_vulkanLock;

/**
Expand Down Expand Up @@ -66,34 +62,26 @@ static uint64_t getClockMonotonicRaw()
/**
* @brief Emit the queue submit time metadata.
*
* @param queue The queue being submitted to.
* @param callback The data emit callback.
* @param queue The queue being submitted to.
* @param workloadVisitor The data emit callback.
*/
static void emitQueueMetadata(VkDevice device, VkQueue queue, std::function<void(const std::string&)> callback)
static void emitQueueMetadata(VkQueue queue, TimelineProtobufEncoder& workloadVisitor)
{
// Write the queue submit metadata
json submitMetadata {
{"type", "submit"},
{"device", reinterpret_cast<uintptr_t>(device)},
{"queue", reinterpret_cast<uintptr_t>(queue)},
{"timestamp", getClockMonotonicRaw()},
};

callback(submitMetadata.dump());
workloadVisitor.emitSubmit(queue, getClockMonotonicRaw());
}

/**
* @brief Emit the command buffer submit time metadata.
*
* @param layer The layer context.
* @param queue The queue being submitted to.
* @param commandBuffer The command buffer being submitted.
* @param callback The data emit callback.
* @param layer The layer context.
* @param queue The queue being submitted to.
* @param commandBuffer The command buffer being submitted.
* @param workloadVisitor The data emit callback.
*/
static void emitCommandBufferMetadata(Device& layer,
VkQueue queue,
VkCommandBuffer commandBuffer,
std::function<void(const std::string&)> callback)
Tracker::SubmitCommandWorkloadVisitor& workloadVisitor)
{
// Fetch layer proxies for this workload
auto& tracker = layer.getStateTracker();
Expand All @@ -102,7 +90,7 @@ static void emitCommandBufferMetadata(Device& layer,

// Play the layer command stream into the queue
const auto& LCS = trackCB.getSubmitCommandStream();
trackQueue.runSubmitCommandStream(LCS, callback);
trackQueue.runSubmitCommandStream(LCS, workloadVisitor);
}

/* See Vulkan API for documentation. */
Expand All @@ -120,14 +108,7 @@ VKAPI_ATTR VkResult VKAPI_CALL layer_vkQueuePresentKHR<user_tag>(VkQueue queue,

// This is run with the lock held to ensure that all queue submit
// messages are sent sequentially to the host tool
json frame {
{"type", "frame"},
{"device", reinterpret_cast<uintptr_t>(layer->device)},
{"fid", tracker.totalStats.getFrameCount()},
{"timestamp", getClockMonotonicRaw()},
};

layer->onFrame(frame.dump());
TimelineProtobufEncoder::emitFrame(*layer, tracker.totalStats.getFrameCount(), getClockMonotonicRaw());

// Release the lock to call into the driver
lock.unlock();
Expand All @@ -145,13 +126,12 @@ VKAPI_ATTR VkResult VKAPI_CALL
std::unique_lock<std::mutex> lock {g_vulkanLock};
auto* layer = Device::retrieve(queue);

auto onSubmit = std::bind(&Device::onWorkloadSubmit, layer, _1);

// This is run with the lock held to ensure that all queue submit
// messages are sent sequentially and contiguously to the host tool
TimelineProtobufEncoder workloadVisitor {*layer};

// Add queue-level metadata
emitQueueMetadata(layer->device, queue, onSubmit);
emitQueueMetadata(queue, workloadVisitor);

// Add per-command buffer metadata
for (uint32_t i = 0; i < submitCount; i++)
Expand All @@ -160,7 +140,7 @@ VKAPI_ATTR VkResult VKAPI_CALL
for (uint32_t j = 0; j < submit.commandBufferCount; j++)
{
VkCommandBuffer commandBuffer = submit.pCommandBuffers[j];
emitCommandBufferMetadata(*layer, queue, commandBuffer, onSubmit);
emitCommandBufferMetadata(*layer, queue, commandBuffer, workloadVisitor);
}
}

Expand All @@ -180,13 +160,12 @@ VKAPI_ATTR VkResult VKAPI_CALL
std::unique_lock<std::mutex> lock {g_vulkanLock};
auto* layer = Device::retrieve(queue);

auto onSubmit = std::bind(&Device::onWorkloadSubmit, layer, _1);

// This is run with the lock held to ensure that all queue submit
// messages are sent sequentially and contiguously to the host tool
TimelineProtobufEncoder workloadVisitor {*layer};

// Add queue-level metadata
emitQueueMetadata(layer->device, queue, onSubmit);
emitQueueMetadata(queue, workloadVisitor);

// Add per-command buffer metadata
for (uint32_t i = 0; i < submitCount; i++)
Expand All @@ -195,7 +174,7 @@ VKAPI_ATTR VkResult VKAPI_CALL
for (uint32_t j = 0; j < submit.commandBufferInfoCount; j++)
{
VkCommandBuffer commandBuffer = submit.pCommandBufferInfos[j].commandBuffer;
emitCommandBufferMetadata(*layer, queue, commandBuffer, onSubmit);
emitCommandBufferMetadata(*layer, queue, commandBuffer, workloadVisitor);
}
}

Expand All @@ -215,13 +194,12 @@ VKAPI_ATTR VkResult VKAPI_CALL
std::unique_lock<std::mutex> lock {g_vulkanLock};
auto* layer = Device::retrieve(queue);

auto onSubmit = std::bind(&Device::onWorkloadSubmit, layer, _1);

// This is run with the lock held to ensure that all queue submit
// messages are sent sequentially and contiguously to the host tool
TimelineProtobufEncoder workloadVisitor {*layer};

// Add queue-level metadata
emitQueueMetadata(layer->device, queue, onSubmit);
emitQueueMetadata(queue, workloadVisitor);

// Add per-command buffer metadata
for (uint32_t i = 0; i < submitCount; i++)
Expand All @@ -230,7 +208,7 @@ VKAPI_ATTR VkResult VKAPI_CALL
for (uint32_t j = 0; j < submit.commandBufferInfoCount; j++)
{
VkCommandBuffer commandBuffer = submit.pCommandBufferInfos[j].commandBuffer;
emitCommandBufferMetadata(*layer, queue, commandBuffer, onSubmit);
emitCommandBufferMetadata(*layer, queue, commandBuffer, workloadVisitor);
}
}

Expand Down
Loading