From 9b0a2e2c405d41fdfc49a1846e32c618717d0597 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Samuel=20=C5=BD=C3=BAbor?= Date: Tue, 3 Feb 2026 15:46:53 +0100 Subject: [PATCH 1/4] gpu: rework command buffers --- src/core/CMakeLists.txt | 3 + src/core/emulation_context.cpp | 31 +- src/core/horizon/display/driver.cpp | 10 +- src/core/horizon/display/driver.hpp | 6 +- src/core/horizon/display/layer.cpp | 13 +- src/core/horizon/display/layer.hpp | 7 +- src/core/hw/tegra_x1/gpu/engines/2d.cpp | 16 +- src/core/hw/tegra_x1/gpu/engines/2d.hpp | 8 +- src/core/hw/tegra_x1/gpu/engines/3d.cpp | 144 ++++---- src/core/hw/tegra_x1/gpu/engines/3d.hpp | 49 ++- src/core/hw/tegra_x1/gpu/engines/compute.hpp | 2 +- src/core/hw/tegra_x1/gpu/engines/copy.cpp | 8 +- src/core/hw/tegra_x1/gpu/engines/copy.hpp | 6 +- .../hw/tegra_x1/gpu/engines/engine_base.hpp | 8 +- src/core/hw/tegra_x1/gpu/engines/inline.hpp | 2 +- .../hw/tegra_x1/gpu/engines/inline_base.cpp | 8 +- .../hw/tegra_x1/gpu/engines/inline_base.hpp | 14 +- src/core/hw/tegra_x1/gpu/gpu.cpp | 9 +- src/core/hw/tegra_x1/gpu/gpu.hpp | 16 +- .../hw/tegra_x1/gpu/macro/driver_base.cpp | 12 +- .../hw/tegra_x1/gpu/macro/driver_base.hpp | 11 +- .../tegra_x1/gpu/macro/interpreter/driver.cpp | 14 +- .../tegra_x1/gpu/macro/interpreter/driver.hpp | 4 +- src/core/hw/tegra_x1/gpu/pfifo.cpp | 43 ++- src/core/hw/tegra_x1/gpu/pfifo.hpp | 16 +- .../hw/tegra_x1/gpu/renderer/buffer_base.hpp | 15 +- .../hw/tegra_x1/gpu/renderer/buffer_cache.cpp | 11 +- .../hw/tegra_x1/gpu/renderer/buffer_cache.hpp | 5 +- .../hw/tegra_x1/gpu/renderer/buffer_view.hpp | 11 +- .../tegra_x1/gpu/renderer/command_buffer.hpp | 12 + .../hw/tegra_x1/gpu/renderer/index_cache.cpp | 5 +- .../hw/tegra_x1/gpu/renderer/index_cache.hpp | 3 +- .../hw/tegra_x1/gpu/renderer/metal/buffer.cpp | 18 +- .../hw/tegra_x1/gpu/renderer/metal/buffer.hpp | 9 +- .../gpu/renderer/metal/command_buffer.cpp | 189 ++++++++++ .../gpu/renderer/metal/command_buffer.hpp | 87 +++++ .../tegra_x1/gpu/renderer/metal/renderer.cpp | 331 +++++------------- .../tegra_x1/gpu/renderer/metal/renderer.hpp | 124 ++----- .../gpu/renderer/metal/surface_compositor.cpp | 45 +-- .../gpu/renderer/metal/surface_compositor.hpp | 15 +- .../tegra_x1/gpu/renderer/metal/texture.cpp | 29 +- .../tegra_x1/gpu/renderer/metal/texture.hpp | 16 +- .../tegra_x1/gpu/renderer/renderer_base.hpp | 18 +- .../gpu/renderer/surface_compositor.hpp | 5 +- .../hw/tegra_x1/gpu/renderer/texture_base.hpp | 16 +- .../tegra_x1/gpu/renderer/texture_cache.cpp | 43 ++- .../tegra_x1/gpu/renderer/texture_cache.hpp | 17 +- 47 files changed, 801 insertions(+), 683 deletions(-) create mode 100644 src/core/hw/tegra_x1/gpu/renderer/command_buffer.hpp create mode 100644 src/core/hw/tegra_x1/gpu/renderer/metal/command_buffer.cpp create mode 100644 src/core/hw/tegra_x1/gpu/renderer/metal/command_buffer.hpp diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index 850e49ec..ebd5be1b 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -548,6 +548,7 @@ add_library(hydra-core hw/tegra_x1/gpu/renderer/shader_decompiler/codegen/lang/msl/emitter.hpp hw/tegra_x1/gpu/renderer/const.cpp hw/tegra_x1/gpu/renderer/const.hpp + hw/tegra_x1/gpu/renderer/command_buffer.hpp hw/tegra_x1/gpu/renderer/surface_compositor.hpp hw/tegra_x1/gpu/renderer/buffer_base.hpp hw/tegra_x1/gpu/renderer/buffer_view.hpp @@ -577,6 +578,8 @@ add_library(hydra-core hw/tegra_x1/gpu/renderer/metal/maxwell_to_mtl.cpp hw/tegra_x1/gpu/renderer/metal/maxwell_to_mtl.hpp hw/tegra_x1/gpu/renderer/metal/impl.cpp + hw/tegra_x1/gpu/renderer/metal/command_buffer.cpp + hw/tegra_x1/gpu/renderer/metal/command_buffer.hpp hw/tegra_x1/gpu/renderer/metal/surface_compositor.cpp hw/tegra_x1/gpu/renderer/metal/surface_compositor.hpp hw/tegra_x1/gpu/renderer/metal/buffer.cpp diff --git a/src/core/emulation_context.cpp b/src/core/emulation_context.cpp index a01348ec..4286a8ef 100644 --- a/src/core/emulation_context.cpp +++ b/src/core/emulation_context.cpp @@ -25,6 +25,7 @@ #include "core/hw/tegra_x1/cpu/mmu.hpp" #include "core/hw/tegra_x1/cpu/thread.hpp" #include "core/hw/tegra_x1/gpu/renderer/buffer_base.hpp" +#include "core/hw/tegra_x1/gpu/renderer/command_buffer.hpp" #include "core/hw/tegra_x1/gpu/renderer/surface_compositor.hpp" #include "core/hw/tegra_x1/gpu/renderer/texture_base.hpp" #include "core/input/device_manager.hpp" @@ -445,11 +446,15 @@ void EmulationContext::ProgressFrame(u32 width, u32 height, accumulated_dt += layer->GetAccumulatedDT(); } + // Command buffer + auto command_buffer = gpu->GetRenderer().CreateCommandBuffer(); + // Acquire present textures - bool acquired = os->GetDisplayDriver().AcquirePresentTextures(); + bool acquired = + os->GetDisplayDriver().AcquirePresentTextures(command_buffer); // Render pass - os->GetDisplayDriver().Present(compositor, width, height); + os->GetDisplayDriver().Present(command_buffer, compositor, width, height); if (loading) { if (acquired) { @@ -490,9 +495,9 @@ void EmulationContext::ProgressFrame(u32 width, u32 height, int2 size = {(i32)nintendo_logo->GetDescriptor().width, (i32)nintendo_logo->GetDescriptor().height}; int2 dst_offset = {32, 32}; - compositor->DrawTexture(nintendo_logo, IntRect2D({0, 0}, size), - IntRect2D(dst_offset, size), true, - opacity); + compositor->DrawTexture( + command_buffer, nintendo_logo, IntRect2D({0, 0}, size), + IntRect2D(dst_offset, size), true, opacity); } // Startup movie @@ -510,9 +515,9 @@ void EmulationContext::ProgressFrame(u32 width, u32 height, (i32)frame->GetDescriptor().height}; int2 dst_offset = {(i32)width - size.x() - 32, (i32)height - size.y() - 32}; - compositor->DrawTexture(frame, IntRect2D({0, 0}, size), - IntRect2D(dst_offset, size), true, - opacity); + compositor->DrawTexture( + command_buffer, frame, IntRect2D({0, 0}, size), + IntRect2D(dst_offset, size), true, opacity); } } } else { @@ -533,6 +538,9 @@ void EmulationContext::ProgressFrame(u32 width, u32 height, } } + compositor->Present(command_buffer); + + delete command_buffer; delete compositor; // Signal V-Sync @@ -575,12 +583,11 @@ void EmulationContext::TakeScreenshot() { } // Copy to a buffer - RENDERER_INSTANCE.LockMutex(); + auto command_buffer = RENDERER_INSTANCE.CreateCommandBuffer(); auto buffer = RENDERER_INSTANCE.AllocateTemporaryBuffer( static_cast(rect.size.y() * rect.size.x() * 4)); - buffer->CopyFrom(texture, rect.origin, rect.size); - RENDERER_INSTANCE.EndCommandBuffer(); - RENDERER_INSTANCE.UnlockMutex(); + buffer->CopyFrom(command_buffer, texture, rect.origin, rect.size); + delete command_buffer; // TODO: wait for the command buffer to finish diff --git a/src/core/horizon/display/driver.cpp b/src/core/horizon/display/driver.cpp index 16bbb52f..13f182b5 100644 --- a/src/core/horizon/display/driver.cpp +++ b/src/core/horizon/display/driver.cpp @@ -6,7 +6,8 @@ namespace hydra::horizon::display { Driver::Driver() { display_pool.Add(new Display()); } -bool Driver::AcquirePresentTextures() { +bool Driver::AcquirePresentTextures( + hw::tegra_x1::gpu::renderer::ICommandBuffer* command_buffer) { bool acquired = false; { std::lock_guard lock(layer_mutex); @@ -14,7 +15,8 @@ bool Driver::AcquirePresentTextures() { layer_id++) { if (!layer_pool.IsValid(layer_id)) continue; - acquired |= layer_pool.Get(layer_id)->AcquirePresentTexture(); + acquired |= + layer_pool.Get(layer_id)->AcquirePresentTexture(command_buffer); } } @@ -22,6 +24,7 @@ bool Driver::AcquirePresentTextures() { } void Driver::Present( + hw::tegra_x1::gpu::renderer::ICommandBuffer* command_buffer, hw::tegra_x1::gpu::renderer::ISurfaceCompositor* compositor, u32 width, u32 height) { std::lock_guard lock(layer_mutex); @@ -67,7 +70,8 @@ void Driver::Present( // Present for (u32 i = 0; i < sorted_layers.size(); i++) - sorted_layers[i]->Present(compositor, dst_rect, dst_scale, i != 0); + sorted_layers[i]->Present(command_buffer, compositor, dst_rect, + dst_scale, i != 0); } void Driver::SignalVSync() { diff --git a/src/core/horizon/display/driver.hpp b/src/core/horizon/display/driver.hpp index 5773c556..3ab2cdf9 100644 --- a/src/core/horizon/display/driver.hpp +++ b/src/core/horizon/display/driver.hpp @@ -61,8 +61,10 @@ class Driver { } // Presenting - bool AcquirePresentTextures(); - void Present(hw::tegra_x1::gpu::renderer::ISurfaceCompositor* compositor, + bool AcquirePresentTextures( + hw::tegra_x1::gpu::renderer::ICommandBuffer* command_buffer); + void Present(hw::tegra_x1::gpu::renderer::ICommandBuffer* command_buffer, + hw::tegra_x1::gpu::renderer::ISurfaceCompositor* compositor, u32 width, u32 height); void SignalVSync(); diff --git a/src/core/horizon/display/layer.cpp b/src/core/horizon/display/layer.cpp index cf6c4759..ca92b97a 100644 --- a/src/core/horizon/display/layer.cpp +++ b/src/core/horizon/display/layer.cpp @@ -8,7 +8,8 @@ namespace hydra::horizon::display { -bool Layer::AcquirePresentTexture() { +bool Layer::AcquirePresentTexture( + hw::tegra_x1::gpu::renderer::ICommandBuffer* command_buffer) { // Get the buffer to present auto& binder = OS_INSTANCE.GetDisplayDriver().GetBinder(binder_id); @@ -19,8 +20,8 @@ bool Layer::AcquirePresentTexture() { const auto& buffer = binder.GetBuffer(slot); // Texture - present_texture = - GPU_INSTANCE.GetTexture(process->GetMmu(), buffer.nv_buffer); + present_texture = GPU_INSTANCE.GetTexture(command_buffer, process->GetMmu(), + buffer.nv_buffer); // Rect src_rect = {}; @@ -58,7 +59,8 @@ bool Layer::AcquirePresentTexture() { return true; } -void Layer::Present(hw::tegra_x1::gpu::renderer::ISurfaceCompositor* compositor, +void Layer::Present(hw::tegra_x1::gpu::renderer::ICommandBuffer* command_buffer, + hw::tegra_x1::gpu::renderer::ISurfaceCompositor* compositor, FloatRect2D dst_rect, f32 dst_scale, bool transparent) { if (!present_texture) return; @@ -68,7 +70,8 @@ void Layer::Present(hw::tegra_x1::gpu::renderer::ISurfaceCompositor* compositor, dst_rect.size = float2(size) * dst_scale; // Draw - compositor->DrawTexture(present_texture, src_rect, dst_rect, transparent); + compositor->DrawTexture(command_buffer, present_texture, src_rect, dst_rect, + transparent); } AccumulatedTime Layer::GetAccumulatedDT() { diff --git a/src/core/horizon/display/layer.hpp b/src/core/horizon/display/layer.hpp index 88a903ab..9d1efcac 100644 --- a/src/core/horizon/display/layer.hpp +++ b/src/core/horizon/display/layer.hpp @@ -4,6 +4,7 @@ namespace hydra::hw::tegra_x1::gpu::renderer { class TextureBase; +class ICommandBuffer; class ISurfaceCompositor; } // namespace hydra::hw::tegra_x1::gpu::renderer @@ -21,8 +22,10 @@ class Layer { void Open() {} void Close() {} - bool AcquirePresentTexture(); - void Present(hw::tegra_x1::gpu::renderer::ISurfaceCompositor* compositor, + bool AcquirePresentTexture( + hw::tegra_x1::gpu::renderer::ICommandBuffer* command_buffer); + void Present(hw::tegra_x1::gpu::renderer::ICommandBuffer* command_buffer, + hw::tegra_x1::gpu::renderer::ISurfaceCompositor* compositor, FloatRect2D dst_rect, f32 dst_scale, bool transparent); // Time diff --git a/src/core/hw/tegra_x1/gpu/engines/2d.cpp b/src/core/hw/tegra_x1/gpu/engines/2d.cpp index f183a193..363e22ea 100644 --- a/src/core/hw/tegra_x1/gpu/engines/2d.cpp +++ b/src/core/hw/tegra_x1/gpu/engines/2d.cpp @@ -10,14 +10,13 @@ DEFINE_METHOD_TABLE(TwoD, 0x237, 1, Copy, u32) #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wunused-parameter" -void TwoD::Copy(GMmu& gmmu, const u32 index, - const u32 pixels_from_memory_src_y0_int) { +void TwoD::Copy(const u32 index, const u32 pixels_from_memory_src_y0_int) { auto& pixels = regs.pixels_from_memory; pixels.src_y0.integer = pixels_from_memory_src_y0_int; // TODO: can these also not be textures? - auto src = GetTexture(gmmu, regs.src, renderer::TextureUsage::Read); - auto dst = GetTexture(gmmu, regs.dst, renderer::TextureUsage::Write); + auto src = GetTexture(regs.src, renderer::TextureUsage::Read); + auto dst = GetTexture(regs.dst, renderer::TextureUsage::Write); const auto dudx = static_cast(pixels.dudx); const auto dvdy = static_cast(pixels.dvdy); @@ -28,7 +27,7 @@ void TwoD::Copy(GMmu& gmmu, const u32 index, const auto src_width = static_cast(pixels.dst_width * dudx); const auto src_height = static_cast(pixels.dst_height * dvdy); - dst->BlitFrom(src, + dst->BlitFrom(tls_crnt_command_buffer, src, {static_cast(src_x0), static_cast(src_y0), static_cast(regs.src.layer)}, {src_width, src_height, 1}, @@ -40,10 +39,10 @@ void TwoD::Copy(GMmu& gmmu, const u32 index, #pragma GCC diagnostic pop -renderer::TextureBase* TwoD::GetTexture(GMmu& gmmu, const Texture2DInfo& info, +renderer::TextureBase* TwoD::GetTexture(const Texture2DInfo& info, renderer::TextureUsage usage) { const renderer::TextureDescriptor descriptor( - gmmu.UnmapAddr(info.addr), renderer::TextureType::_2D, + tls_crnt_gmmu->UnmapAddr(info.addr), renderer::TextureType::_2D, renderer::to_texture_format(info.format), NvKind::Pitch, // TODO: correct? u32(info.width), u32(info.height), 1, @@ -53,7 +52,8 @@ renderer::TextureBase* TwoD::GetTexture(GMmu& gmmu, const Texture2DInfo& info, renderer::to_texture_format(info.format), info.width) // HACK ); - return RENDERER_INSTANCE.GetTextureCache().Find(descriptor, usage); + return RENDERER_INSTANCE.GetTextureCache().Find(tls_crnt_command_buffer, + descriptor, usage); } } // namespace hydra::hw::tegra_x1::gpu::engines diff --git a/src/core/hw/tegra_x1/gpu/engines/2d.hpp b/src/core/hw/tegra_x1/gpu/engines/2d.hpp index df55f186..ba735711 100644 --- a/src/core/hw/tegra_x1/gpu/engines/2d.hpp +++ b/src/core/hw/tegra_x1/gpu/engines/2d.hpp @@ -71,16 +71,14 @@ struct Regs2D { class TwoD : public EngineWithRegsBase { public: - void Method(GMmu& gmmu, u32 method, u32 arg) override; + void Method(u32 method, u32 arg) override; private: // Commands - void Copy(GMmu& gmmu, const u32 index, - const u32 pixels_from_memory_src_y0_int); + void Copy(const u32 index, const u32 pixels_from_memory_src_y0_int); // Helpers - static renderer::TextureBase* GetTexture(GMmu& gmmu, - const Texture2DInfo& info, + static renderer::TextureBase* GetTexture(const Texture2DInfo& info, renderer::TextureUsage usage); }; diff --git a/src/core/hw/tegra_x1/gpu/engines/3d.cpp b/src/core/hw/tegra_x1/gpu/engines/3d.cpp index 0c35f470..06107f2e 100644 --- a/src/core/hw/tegra_x1/gpu/engines/3d.cpp +++ b/src/core/hw/tegra_x1/gpu/engines/3d.cpp @@ -252,7 +252,7 @@ ThreeD::~ThreeD() { SINGLETON_UNSET_INSTANCE(); } -void ThreeD::FlushMacro(GMmu& gmmu) { macro_driver->Execute(gmmu); } +void ThreeD::FlushMacro() { macro_driver->Execute(); } void ThreeD::Macro(u32 method, u32 arg) { u32 index = (method - MACRO_METHODS_REGION) >> 1; @@ -270,33 +270,30 @@ void ThreeD::Macro(u32 method, u32 arg) { #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wunused-parameter" -void ThreeD::LoadMmeInstructionRamPointer(GMmu& gmmu, const u32 index, - const u32 ptr) { +void ThreeD::LoadMmeInstructionRamPointer(const u32 index, const u32 ptr) { macro_driver->LoadInstructionRamPointer(ptr); } -void ThreeD::LoadMmeInstructionRam(GMmu& gmmu, const u32 index, - const u32 data) { +void ThreeD::LoadMmeInstructionRam(const u32 index, const u32 data) { macro_driver->LoadInstructionRam(data); } -void ThreeD::LoadMmeStartAddressRamPointer(GMmu& gmmu, const u32 index, - const u32 ptr) { +void ThreeD::LoadMmeStartAddressRamPointer(const u32 index, const u32 ptr) { macro_driver->LoadStartAddressRamPointer(ptr); } -void ThreeD::LoadMmeStartAddressRam(GMmu& gmmu, const u32 index, - const u32 data) { +void ThreeD::LoadMmeStartAddressRam(const u32 index, const u32 data) { macro_driver->LoadStartAddressRam(data); } -void ThreeD::DrawVertexArray(GMmu& gmmu, const u32 index, u32 count) { - if (!DrawInternal(gmmu)) +void ThreeD::DrawVertexArray(const u32 index, u32 count) { + if (!DrawInternal()) return; auto index_type = IndexType::None; auto primitive_type = regs.begin.primitive_type; const auto index_buffer = RENDERER_INSTANCE.GetIndexCache().Decode( + tls_crnt_command_buffer, {.type = index_type, .primitive_type = primitive_type, .count = count}, index_type, primitive_type, count); @@ -309,23 +306,25 @@ void ThreeD::DrawVertexArray(GMmu& gmmu, const u32 index, u32 count) { // Vertex start is set as vertex base instead, as start is now index // start // TODO: instance count - RENDERER_INSTANCE.DrawIndexed(primitive_type, 0, count, - regs.vertex_array_start, + RENDERER_INSTANCE.DrawIndexed(tls_crnt_command_buffer, primitive_type, + 0, count, regs.vertex_array_start, regs.base_instance, 1); } else { // Draw // TODO: instance count - RENDERER_INSTANCE.Draw(primitive_type, regs.vertex_array_start, count, + RENDERER_INSTANCE.Draw(tls_crnt_command_buffer, primitive_type, + regs.vertex_array_start, count, regs.base_instance, 1); } } -void ThreeD::DrawVertexElements(GMmu& gmmu, const u32 index, u32 count) { - if (!DrawInternal(gmmu)) +void ThreeD::DrawVertexElements(const u32 index, u32 count) { + if (!DrawInternal()) return; // Index buffer - gpu_vaddr_t index_buffer_ptr = gmmu.UnmapAddr(regs.index_buffer_addr); + gpu_vaddr_t index_buffer_ptr = + tls_crnt_gmmu->UnmapAddr(regs.index_buffer_addr); // TODO: uncomment? u32 index_buffer_size = count * get_index_type_size( @@ -337,6 +336,7 @@ void ThreeD::DrawVertexElements(GMmu& gmmu, const u32 index, u32 count) { auto index_type = regs.index_type; auto primitive_type = regs.begin.primitive_type; const auto index_buffer = RENDERER_INSTANCE.GetIndexCache().Decode( + tls_crnt_command_buffer, {.type = index_type, .primitive_type = primitive_type, .count = count, @@ -349,13 +349,12 @@ void ThreeD::DrawVertexElements(GMmu& gmmu, const u32 index, u32 count) { // Draw // TODO: instance count - RENDERER_INSTANCE.DrawIndexed(primitive_type, regs.vertex_elements_start, - count, regs.base_vertex, regs.base_instance, - 1); + RENDERER_INSTANCE.DrawIndexed(tls_crnt_command_buffer, primitive_type, + regs.vertex_elements_start, count, + regs.base_vertex, regs.base_instance, 1); } -void ThreeD::ClearBuffer(GMmu& gmmu, const u32 index, - const ClearBufferData data) { +void ThreeD::ClearBuffer(const u32 index, const ClearBufferData data) { LOG_DEBUG(Gpu, "Depth: {}, stencil: {}, color mask: 0x{:x}, target id: {}, " "layer id: {}", @@ -366,39 +365,42 @@ void ThreeD::ClearBuffer(GMmu& gmmu, const u32 index, // TODO: implement // Regular clear - RENDERER_INSTANCE.BindRenderPass(GetRenderPass(gmmu)); + RENDERER_INSTANCE.BindRenderPass(GetRenderPass()); if (data.color_mask != 0x0) - RENDERER_INSTANCE.ClearColor(data.target_id, data.layer_id, - data.color_mask, regs.clear_color); + RENDERER_INSTANCE.ClearColor(tls_crnt_command_buffer, data.target_id, + data.layer_id, data.color_mask, + regs.clear_color); if (data.depth) - RENDERER_INSTANCE.ClearDepth(data.layer_id, regs.clear_depth); + RENDERER_INSTANCE.ClearDepth(tls_crnt_command_buffer, data.layer_id, + regs.clear_depth); if (data.stencil) - RENDERER_INSTANCE.ClearStencil(data.layer_id, regs.clear_stencil); + RENDERER_INSTANCE.ClearStencil(tls_crnt_command_buffer, data.layer_id, + regs.clear_stencil); } -void ThreeD::SetReportSemaphore(GMmu& gmmu, const u32 index, const u32 data) { +void ThreeD::SetReportSemaphore(const u32 index, const u32 data) { ONCE(LOG_FUNC_STUBBED(Engines)); - const uptr ptr = gmmu.UnmapAddr(regs.report_semaphore_addr); + const uptr ptr = tls_crnt_gmmu->UnmapAddr(regs.report_semaphore_addr); // HACK *reinterpret_cast(ptr) = regs.report_semaphore_payload; } -void ThreeD::FirmwareCall4(GMmu& gmmu, const u32 index, const u32 data) { +void ThreeD::FirmwareCall4(const u32 index, const u32 data) { ONCE(LOG_FUNC_STUBBED(Engines)); // TODO: find out what this does regs.mme_scratch[0] = 0x1; } -void ThreeD::LoadConstBuffer(GMmu& gmmu, const u32 index, const u32 data) { +void ThreeD::LoadConstBuffer(const u32 index, const u32 data) { const uptr const_buffer_gpu_addr = u64(regs.const_buffer_selector); const uptr gpu_addr = const_buffer_gpu_addr + regs.load_const_buffer_offset; - const auto ptr = gmmu.UnmapAddr(gpu_addr); + const auto ptr = tls_crnt_gmmu->UnmapAddr(gpu_addr); *reinterpret_cast(ptr) = data; regs.load_const_buffer_offset += sizeof(u32); @@ -408,7 +410,7 @@ void ThreeD::LoadConstBuffer(GMmu& gmmu, const u32 index, const u32 data) { RENDERER_INSTANCE.InvalidateMemory(Range::FromSize(ptr, sizeof(u32))); } -void ThreeD::BindGroup(GMmu& gmmu, const u32 index, const u32 data) { +void ThreeD::BindGroup(const u32 index, const u32 data) { const auto shader_stage_index = index / 0x8; const auto group = index % 0x8; @@ -421,7 +423,7 @@ void ThreeD::BindGroup(GMmu& gmmu, const u32 index, const u32 data) { bool valid = data & 0x1; if (valid) { const uptr const_buffer_gpu_ptr = - gmmu.UnmapAddr(regs.const_buffer_selector); + tls_crnt_gmmu->UnmapAddr(regs.const_buffer_selector); const auto range = Range::FromSize( const_buffer_gpu_ptr, regs.const_buffer_selector_size); @@ -440,7 +442,7 @@ void ThreeD::BindGroup(GMmu& gmmu, const u32 index, const u32 data) { #pragma GCC diagnostic pop renderer::TextureBase* -ThreeD::GetColorTargetTexture(GMmu& gmmu, u32 render_target_index) const { +ThreeD::GetColorTargetTexture(u32 render_target_index) const { const auto& render_target = regs.color_targets[render_target_index]; const auto gpu_addr = u64(render_target.addr); @@ -455,7 +457,7 @@ ThreeD::GetColorTargetTexture(GMmu& gmmu, u32 render_target_index) const { const u32 width_hint = regs.screen_scissor.horizontal.x + regs.screen_scissor.horizontal.width; const renderer::TextureDescriptor descriptor( - gmmu.UnmapAddr(gpu_addr), renderer::TextureType::_2D, format, + tls_crnt_gmmu->UnmapAddr(gpu_addr), renderer::TextureType::_2D, format, NvKind::Pitch, // TODO: correct? GetMinimumWidth(render_target.width, format, width_hint, render_target.tile_mode.is_linear), @@ -464,10 +466,10 @@ ThreeD::GetColorTargetTexture(GMmu& gmmu, u32 render_target_index) const { get_texture_format_stride(format, render_target.width)); return RENDERER_INSTANCE.GetTextureCache().Find( - descriptor, renderer::TextureUsage::Write); + tls_crnt_command_buffer, descriptor, renderer::TextureUsage::Write); } -renderer::TextureBase* ThreeD::GetDepthStencilTargetTexture(GMmu& gmmu) const { +renderer::TextureBase* ThreeD::GetDepthStencilTargetTexture() const { const auto gpu_addr = u64(regs.depth_target_addr); if (gpu_addr == 0x0) { // TODO: is this really an error? @@ -479,7 +481,7 @@ renderer::TextureBase* ThreeD::GetDepthStencilTargetTexture(GMmu& gmmu) const { const u32 width_hint = regs.screen_scissor.horizontal.x + regs.screen_scissor.horizontal.width; const renderer::TextureDescriptor descriptor( - gmmu.UnmapAddr(gpu_addr), renderer::TextureType::_2D, format, + tls_crnt_gmmu->UnmapAddr(gpu_addr), renderer::TextureType::_2D, format, NvKind::Pitch, // TODO: correct? GetMinimumWidth(regs.depth_target_width, format, width_hint, false), regs.depth_target_height, 1, @@ -487,25 +489,24 @@ renderer::TextureBase* ThreeD::GetDepthStencilTargetTexture(GMmu& gmmu) const { get_texture_format_stride(format, regs.depth_target_width)); return RENDERER_INSTANCE.GetTextureCache().Find( - descriptor, renderer::TextureUsage::Write); + tls_crnt_command_buffer, descriptor, renderer::TextureUsage::Write); } -renderer::RenderPassBase* ThreeD::GetRenderPass(GMmu& gmmu) const { +renderer::RenderPassBase* ThreeD::GetRenderPass() const { renderer::RenderPassDescriptor descriptor{}; // Color targets for (u32 i = 0; i < regs.color_target_control.count; i++) { descriptor.color_targets[i] = { - .texture = GetColorTargetTexture( - gmmu, regs.color_target_control.GetMap(i)), + .texture = + GetColorTargetTexture(regs.color_target_control.GetMap(i)), }; } // Depth stencil target descriptor.depth_stencil_target = { - .texture = - (regs.depth_target_enabled ? GetDepthStencilTargetTexture(gmmu) - : nullptr), + .texture = (regs.depth_target_enabled ? GetDepthStencilTargetTexture() + : nullptr), }; return RENDERER_INSTANCE.GetRenderPassCache().Find(descriptor); @@ -594,13 +595,13 @@ renderer::ShaderBase* ThreeD::GetShaderUnchecked(ShaderStage stage) const { return active_shaders[u32(to_renderer_shader_type(stage))]; } -renderer::ShaderBase* ThreeD::GetShader(GMmu& gmmu, ShaderStage stage) { +renderer::ShaderBase* ThreeD::GetShader(ShaderStage stage) { const auto& program = regs.shader_programs[usize(stage)]; if (!program.config.enable) return nullptr; uptr gpu_addr = u64(regs.shader_program_region) + program.offset; - uptr ptr = gmmu.UnmapAddr(gpu_addr); + uptr ptr = tls_crnt_gmmu->UnmapAddr(gpu_addr); renderer::GuestShaderDescriptor descriptor{ .stage = stage, @@ -629,15 +630,15 @@ renderer::ShaderBase* ThreeD::GetShader(GMmu& gmmu, ShaderStage stage) { return active_shader; } -renderer::PipelineBase* ThreeD::GetPipeline(GMmu& gmmu) { +renderer::PipelineBase* ThreeD::GetPipeline() { renderer::PipelineDescriptor descriptor{}; // Shaders // TODO: add all shaders descriptor.shaders[u32(renderer::ShaderType::Vertex)] = - GetShader(gmmu, ShaderStage::VertexB); + GetShader(ShaderStage::VertexB); descriptor.shaders[u32(renderer::ShaderType::Fragment)] = - GetShader(gmmu, ShaderStage::Fragment); + GetShader(ShaderStage::Fragment); // Vertex state @@ -718,8 +719,7 @@ renderer::PipelineBase* ThreeD::GetPipeline(GMmu& gmmu) { return RENDERER_INSTANCE.GetPipelineCache().Find(descriptor); } -renderer::BufferView ThreeD::GetVertexBuffer(GMmu& gmmu, - u32 vertex_array_index) const { +renderer::BufferView ThreeD::GetVertexBuffer(u32 vertex_array_index) const { const auto& vertex_array = regs.vertex_arrays[vertex_array_index]; // HACK @@ -728,15 +728,15 @@ renderer::BufferView ThreeD::GetVertexBuffer(GMmu& gmmu, return renderer::BufferView(); } - const auto ptr = gmmu.UnmapAddr(vertex_array.addr); + const auto ptr = tls_crnt_gmmu->UnmapAddr(vertex_array.addr); const auto size = u64(regs.vertex_array_limits[vertex_array_index]) + 1 - u64(vertex_array.addr); return RENDERER_INSTANCE.GetBufferCache().Get( - Range::FromSize(ptr, size)); + tls_crnt_command_buffer, Range::FromSize(ptr, size)); } renderer::TextureBase* -ThreeD::GetTexture(GMmu& gmmu, const TextureImageControl& tic) const { +ThreeD::GetTexture(const TextureImageControl& tic) const { // HACK if (tic.hdr_version == TicHdrVersion::_1DBuffer) { LOG_ERROR(Engines, "1D buffer"); @@ -773,8 +773,8 @@ ThreeD::GetTexture(GMmu& gmmu, const TextureImageControl& tic) const { } const renderer::TextureDescriptor descriptor( - gmmu.UnmapAddr(gpu_addr), ToTextureType(tic.texture_type), format, kind, - static_cast(tic.width_minus_one + 1), + tls_crnt_gmmu->UnmapAddr(gpu_addr), ToTextureType(tic.texture_type), + format, kind, static_cast(tic.width_minus_one + 1), static_cast(tic.height_minus_one + 1), static_cast(tic.depth_minus_one + 1), tic.tile_height_gobs_log2, // TODO: correct? @@ -784,7 +784,7 @@ ThreeD::GetTexture(GMmu& gmmu, const TextureImageControl& tic) const { tic.format_word.swizzle_z, tic.format_word.swizzle_w)); return RENDERER_INSTANCE.GetTextureCache().Find( - descriptor, renderer::TextureUsage::Read); + tls_crnt_command_buffer, descriptor, renderer::TextureUsage::Read); } renderer::SamplerBase* @@ -810,8 +810,7 @@ ThreeD::GetSampler(const TextureSamplerControl& tsc) const { } void ThreeD::ConfigureShaderStage( - GMmu& gmmu, const ShaderStage stage, - const TextureImageControl* tex_header_pool, + const ShaderStage stage, const TextureImageControl* tex_header_pool, const TextureSamplerControl* tex_sampler_pool) { const auto shader_type = to_renderer_shader_type(stage); const u32 stage_index = static_cast(stage) - @@ -834,7 +833,8 @@ void ThreeD::ConfigureShaderStage( continue; } - const auto buffer = RENDERER_INSTANCE.GetBufferCache().Get(range); + const auto buffer = RENDERER_INSTANCE.GetBufferCache().Get( + tls_crnt_command_buffer, range); RENDERER_INSTANCE.BindUniformBuffer(buffer, shader_type, index); } @@ -853,7 +853,7 @@ void ThreeD::ConfigureShaderStage( // Image const auto image_handle = get_image_handle(texture_handle); const auto& tic = tex_header_pool[image_handle]; - const auto texture = GetTexture(gmmu, tic); + const auto texture = GetTexture(tic); // Sampler const auto sampler_handle = get_sampler_handle(texture_handle); @@ -867,9 +867,9 @@ void ThreeD::ConfigureShaderStage( } } -bool ThreeD::DrawInternal(GMmu& gmmu) { +bool ThreeD::DrawInternal() { // Flush tracked pages - gmmu.GetMmu()->FlushTrackedPages(); + tls_crnt_gmmu->GetMmu()->FlushTrackedPages(); // State if (!regs.shader_programs[(u32)ShaderStage::VertexB].config.enable) { @@ -877,14 +877,14 @@ bool ThreeD::DrawInternal(GMmu& gmmu) { return false; } - RENDERER_INSTANCE.BindRenderPass(GetRenderPass(gmmu)); + RENDERER_INSTANCE.BindRenderPass(GetRenderPass()); for (u32 i = 0; i < VIEWPORT_COUNT; i++) { RENDERER_INSTANCE.SetViewport(i, GetViewport(i)); RENDERER_INSTANCE.SetScissor(i, GetScissor(i)); } - RENDERER_INSTANCE.BindPipeline(GetPipeline(gmmu)); + RENDERER_INSTANCE.BindPipeline(GetPipeline()); for (u32 i = 0; i < VERTEX_ARRAY_COUNT; i++) { const auto& vertex_array = regs.vertex_arrays[i]; @@ -895,7 +895,7 @@ bool ThreeD::DrawInternal(GMmu& gmmu) { continue; } - const auto buffer = GetVertexBuffer(gmmu, i); + const auto buffer = GetVertexBuffer(i); RENDERER_INSTANCE.BindVertexBuffer(buffer, i); } @@ -905,14 +905,14 @@ bool ThreeD::DrawInternal(GMmu& gmmu) { // TODO: remove the condition if (tex_header_pool_gpu_addr != 0x0 && tex_sampler_pool_gpu_addr != 0x0) { const auto tex_header_pool = reinterpret_cast( - gmmu.UnmapAddr(tex_header_pool_gpu_addr)); + tls_crnt_gmmu->UnmapAddr(tex_header_pool_gpu_addr)); const auto tex_sampler_pool = reinterpret_cast( - gmmu.UnmapAddr(tex_sampler_pool_gpu_addr)); + tls_crnt_gmmu->UnmapAddr(tex_sampler_pool_gpu_addr)); // TODO: configure all stages - ConfigureShaderStage(gmmu, ShaderStage::VertexB, tex_header_pool, + ConfigureShaderStage(ShaderStage::VertexB, tex_header_pool, tex_sampler_pool); - ConfigureShaderStage(gmmu, ShaderStage::Fragment, tex_header_pool, + ConfigureShaderStage(ShaderStage::Fragment, tex_header_pool, tex_sampler_pool); } diff --git a/src/core/hw/tegra_x1/gpu/engines/3d.hpp b/src/core/hw/tegra_x1/gpu/engines/3d.hpp index 26413347..5c6b4684 100644 --- a/src/core/hw/tegra_x1/gpu/engines/3d.hpp +++ b/src/core/hw/tegra_x1/gpu/engines/3d.hpp @@ -558,9 +558,9 @@ class ThreeD : public EngineWithRegsBase, public InlineBase { ThreeD(); ~ThreeD() override; - void Method(GMmu& gmmu, u32 method, u32 arg) override; + void Method(u32 method, u32 arg) override; - void FlushMacro(GMmu& gmmu) override; + void FlushMacro() override; // Getters const Regs3D& GetRegs() const { return regs; } @@ -583,15 +583,13 @@ class ThreeD : public EngineWithRegsBase, public InlineBase { // Methods DEFINE_INLINE_ENGINE_METHODS; - void LoadMmeInstructionRamPointer(GMmu& gmmu, const u32 index, - const u32 ptr); - void LoadMmeInstructionRam(GMmu& gmmu, const u32 index, const u32 data); - void LoadMmeStartAddressRamPointer(GMmu& gmmu, const u32 index, - const u32 ptr); - void LoadMmeStartAddressRam(GMmu& gmmu, const u32 index, const u32 data); + void LoadMmeInstructionRamPointer(const u32 index, const u32 ptr); + void LoadMmeInstructionRam(const u32 index, const u32 data); + void LoadMmeStartAddressRamPointer(const u32 index, const u32 ptr); + void LoadMmeStartAddressRam(const u32 index, const u32 data); - void DrawVertexArray(GMmu& gmmu, const u32 index, u32 count); - void DrawVertexElements(GMmu& gmmu, const u32 index, u32 count); + void DrawVertexArray(const u32 index, u32 count); + void DrawVertexElements(const u32 index, u32 count); struct ClearBufferData { bool depth : 1; @@ -601,37 +599,34 @@ class ThreeD : public EngineWithRegsBase, public InlineBase { u32 layer_id : 11; }; - void ClearBuffer(GMmu& gmmu, const u32 index, const ClearBufferData data); + void ClearBuffer(const u32 index, const ClearBufferData data); // HACK - void SetReportSemaphore(GMmu& gmmu, const u32 index, const u32 data); + void SetReportSemaphore(const u32 index, const u32 data); - void FirmwareCall4(GMmu& gmmu, const u32 index, const u32 data); + void FirmwareCall4(const u32 index, const u32 data); - void LoadConstBuffer(GMmu& gmmu, const u32 index, const u32 data); - void BindGroup(GMmu& gmmu, const u32 index, const u32 data); + void LoadConstBuffer(const u32 index, const u32 data); + void BindGroup(const u32 index, const u32 data); // Helpers - renderer::TextureBase* GetColorTargetTexture(GMmu& gmmu, - u32 render_target_index) const; - renderer::TextureBase* GetDepthStencilTargetTexture(GMmu& gmmu) const; - renderer::RenderPassBase* GetRenderPass(GMmu& gmmu) const; + renderer::TextureBase* GetColorTargetTexture(u32 render_target_index) const; + renderer::TextureBase* GetDepthStencilTargetTexture() const; + renderer::RenderPassBase* GetRenderPass() const; renderer::Viewport GetViewport(u32 index); renderer::Scissor GetScissor(u32 index); renderer::ShaderBase* GetShaderUnchecked(ShaderStage stage) const; - renderer::ShaderBase* GetShader(GMmu& gmmu, ShaderStage stage); - renderer::PipelineBase* GetPipeline(GMmu& gmmu); - renderer::BufferView GetVertexBuffer(GMmu& gmmu, - u32 vertex_array_index) const; - renderer::TextureBase* GetTexture(GMmu& gmmu, - const TextureImageControl& tic) const; + renderer::ShaderBase* GetShader(ShaderStage stage); + renderer::PipelineBase* GetPipeline(); + renderer::BufferView GetVertexBuffer(u32 vertex_array_index) const; + renderer::TextureBase* GetTexture(const TextureImageControl& tic) const; renderer::SamplerBase* GetSampler(const TextureSamplerControl& tsc) const; - void ConfigureShaderStage(GMmu& gmmu, const ShaderStage stage, + void ConfigureShaderStage(const ShaderStage stage, const TextureImageControl* tex_header_pool, const TextureSamplerControl* tex_sampler_pool); - bool DrawInternal(GMmu& gmmu); + bool DrawInternal(); }; } // namespace hydra::hw::tegra_x1::gpu::engines diff --git a/src/core/hw/tegra_x1/gpu/engines/compute.hpp b/src/core/hw/tegra_x1/gpu/engines/compute.hpp index f21ef3d0..c708932f 100644 --- a/src/core/hw/tegra_x1/gpu/engines/compute.hpp +++ b/src/core/hw/tegra_x1/gpu/engines/compute.hpp @@ -12,7 +12,7 @@ struct RegsCompute { class Compute : public EngineWithRegsBase, public InlineBase { public: - void Method(GMmu& gmmu, u32 method, u32 arg) override; + void Method(u32 method, u32 arg) override; private: // Methods diff --git a/src/core/hw/tegra_x1/gpu/engines/copy.cpp b/src/core/hw/tegra_x1/gpu/engines/copy.cpp index 4cc8f8dc..95ec80b6 100644 --- a/src/core/hw/tegra_x1/gpu/engines/copy.cpp +++ b/src/core/hw/tegra_x1/gpu/engines/copy.cpp @@ -12,7 +12,7 @@ DEFINE_METHOD_TABLE(Copy, 0xc0, 1, LaunchDMA, LaunchDMAData) #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wunused-parameter" -void Copy::LaunchDMA(GMmu& gmmu, const u32 index, const LaunchDMAData data) { +void Copy::LaunchDMA(const u32 index, const LaunchDMAData data) { // TODO: implement component remapping // HACK u32 src_stride = regs.src.stride; @@ -37,8 +37,8 @@ void Copy::LaunchDMA(GMmu& gmmu, const u32 index, const LaunchDMAData data) { } } - const auto src_ptr = gmmu.UnmapAddr(regs.offset_in); - const auto dst_ptr = gmmu.UnmapAddr(regs.offset_out); + const auto src_ptr = tls_crnt_gmmu->UnmapAddr(regs.offset_in); + const auto dst_ptr = tls_crnt_gmmu->UnmapAddr(regs.offset_out); if (data.src_memory_layout == MemoryLayout::Pitch) { if (data.dst_memory_layout == MemoryLayout::Pitch) { for (u32 i = 0; i < regs.line_count; i++) @@ -80,7 +80,7 @@ void Copy::LaunchDMA(GMmu& gmmu, const u32 index, const LaunchDMAData data) { #pragma GCC diagnostic pop /* -renderer::BufferBase* Copy::GetBuffer(GMmu& gmmu, const Iova addr, const usize +renderer::BufferBase* Copy::GetBuffer(const Iova addr, const usize size) { const renderer::BufferDescriptor descriptor{ .ptr = gmmu.UnmapAddr(addr), .size = size, }; diff --git a/src/core/hw/tegra_x1/gpu/engines/copy.hpp b/src/core/hw/tegra_x1/gpu/engines/copy.hpp index 94d13977..be48efbc 100644 --- a/src/core/hw/tegra_x1/gpu/engines/copy.hpp +++ b/src/core/hw/tegra_x1/gpu/engines/copy.hpp @@ -118,7 +118,7 @@ struct RegsCopy { class Copy : public EngineWithRegsBase { public: - void Method(GMmu& gmmu, u32 method, u32 arg) override; + void Method(u32 method, u32 arg) override; private: // Commands @@ -140,10 +140,10 @@ class Copy : public EngineWithRegsBase { BypassL2 bypass_l2 : 1; }; - void LaunchDMA(GMmu& gmmu, const u32 index, const LaunchDMAData data); + void LaunchDMA(const u32 index, const LaunchDMAData data); // Helpers - // static renderer::BufferBase* GetBuffer(GMmu& gmmu, const Iova addr, + // static renderer::BufferBase* GetBuffer(const Iova addr, // const usize size); // static renderer::TextureBase* GetTexture(const u32 gpu_addr_lo, // const u32 gpu_addr_hi, diff --git a/src/core/hw/tegra_x1/gpu/engines/engine_base.hpp b/src/core/hw/tegra_x1/gpu/engines/engine_base.hpp index ff1a5b02..e41f7c44 100644 --- a/src/core/hw/tegra_x1/gpu/engines/engine_base.hpp +++ b/src/core/hw/tegra_x1/gpu/engines/engine_base.hpp @@ -4,11 +4,11 @@ #define METHOD_CASE(method_begin, method_count, func, arg_type) \ case method_begin ...(method_begin + method_count - 1): \ - func(gmmu, method - method_begin, std::bit_cast(arg)); \ + func(method - method_begin, std::bit_cast(arg)); \ break; #define DEFINE_METHOD_TABLE(type, ...) \ - void type::Method(GMmu& gmmu, u32 method, u32 arg) { \ + void type::Method(u32 method, u32 arg) { \ if (method >= MACRO_METHODS_REGION) { \ Macro(method, arg); \ return; \ @@ -36,9 +36,9 @@ class EngineBase { virtual ~EngineBase() = default; - virtual void Method(GMmu& gmmu, u32 method, u32 arg) = 0; + virtual void Method(u32 method, u32 arg) = 0; - virtual void FlushMacro([[maybe_unused]] GMmu& gmmu) { + virtual void FlushMacro() { LOG_ERROR(Engines, "This engine does not support macros"); throw Error::MacrosNotSupported; } diff --git a/src/core/hw/tegra_x1/gpu/engines/inline.hpp b/src/core/hw/tegra_x1/gpu/engines/inline.hpp index c1929bc8..7d5471af 100644 --- a/src/core/hw/tegra_x1/gpu/engines/inline.hpp +++ b/src/core/hw/tegra_x1/gpu/engines/inline.hpp @@ -10,7 +10,7 @@ struct RegsInline_ { class Inline : public EngineWithRegsBase, public InlineBase { public: - void Method(GMmu& gmmu, u32 method, u32 arg) override; + void Method(u32 method, u32 arg) override; private: // Methods diff --git a/src/core/hw/tegra_x1/gpu/engines/inline_base.cpp b/src/core/hw/tegra_x1/gpu/engines/inline_base.cpp index ff7f664b..5c1b935c 100644 --- a/src/core/hw/tegra_x1/gpu/engines/inline_base.cpp +++ b/src/core/hw/tegra_x1/gpu/engines/inline_base.cpp @@ -8,13 +8,13 @@ namespace hydra::hw::tegra_x1::gpu::engines { #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wunused-parameter" -void InlineBase::LaunchDMAImpl(GMmu& gmmu, RegsInline& regs, const u32 index, +void InlineBase::LaunchDMAImpl(RegsInline& regs, const u32 index, const u32 data) { LOG_FUNC_WITH_ARGS_STUBBED(Engines, "index: {}, data: {:#x}", index, data); } -void InlineBase::LoadInlineDataImpl(GMmu& gmmu, RegsInline& regs, - const u32 index, const u32 data) { +void InlineBase::LoadInlineDataImpl(RegsInline& regs, const u32 index, + const u32 data) { inline_data.push_back(data); // TODO: correct? if (inline_data.size() * sizeof(u32) == @@ -23,7 +23,7 @@ void InlineBase::LoadInlineDataImpl(GMmu& gmmu, RegsInline& regs, // TODO: determine what type of copy this is based on launch DMA args // Buffer to buffer - uptr dst_ptr = gmmu.UnmapAddr(regs.offset_out); + uptr dst_ptr = tls_crnt_gmmu->UnmapAddr(regs.offset_out); // TODO: do a Gpu copy instead? memcpy(reinterpret_cast(dst_ptr), inline_data.data(), inline_data.size() * sizeof(u32)); diff --git a/src/core/hw/tegra_x1/gpu/engines/inline_base.hpp b/src/core/hw/tegra_x1/gpu/engines/inline_base.hpp index ff3e3d0d..6fda725f 100644 --- a/src/core/hw/tegra_x1/gpu/engines/inline_base.hpp +++ b/src/core/hw/tegra_x1/gpu/engines/inline_base.hpp @@ -5,11 +5,11 @@ #define INLINE_ENGINE_TABLE \ 0x6c, 1, LaunchDMA, u32, 0x6d, 1, LoadInlineData, u32 #define DEFINE_INLINE_ENGINE_METHODS \ - void LaunchDMA(GMmu& gmmu, const u32 index, const u32 data) { \ - LaunchDMAImpl(gmmu, regs.regs_inline, index, data); \ + void LaunchDMA(const u32 index, const u32 data) { \ + LaunchDMAImpl(regs.regs_inline, index, data); \ } \ - void LoadInlineData(GMmu& gmmu, const u32 index, const u32 data) { \ - LoadInlineDataImpl(gmmu, regs.regs_inline, index, data); \ + void LoadInlineData(const u32 index, const u32 data) { \ + LoadInlineDataImpl(regs.regs_inline, index, data); \ } namespace hydra::hw::tegra_x1::gpu::engines { @@ -36,10 +36,8 @@ struct RegsInline { class InlineBase { protected: // Commands - void LaunchDMAImpl(GMmu& gmmu, RegsInline& regs, const u32 index, - const u32 data); - void LoadInlineDataImpl(GMmu& gmmu, RegsInline& regs, const u32 index, - const u32 data); + void LaunchDMAImpl(RegsInline& regs, const u32 index, const u32 data); + void LoadInlineDataImpl(RegsInline& regs, const u32 index, const u32 data); private: std::vector inline_data; diff --git a/src/core/hw/tegra_x1/gpu/gpu.cpp b/src/core/hw/tegra_x1/gpu/gpu.cpp index b4d955d2..5e36111e 100644 --- a/src/core/hw/tegra_x1/gpu/gpu.cpp +++ b/src/core/hw/tegra_x1/gpu/gpu.cpp @@ -37,7 +37,7 @@ Gpu::~Gpu() { SINGLETON_UNSET_INSTANCE(); } -void Gpu::SubchannelMethod(GMmu& gmmu, u32 subchannel, u32 method, u32 arg) { +void Gpu::SubchannelMethod(u32 subchannel, u32 method, u32 arg) { if (method == 0x0) { // SetEngine ASSERT_DEBUG(subchannel <= SUBCHANNEL_COUNT, Gpu, "Invalid subchannel {}", subchannel); @@ -76,10 +76,11 @@ void Gpu::SubchannelMethod(GMmu& gmmu, u32 subchannel, u32 method, u32 arg) { return; } - GetEngineAtSubchannel(subchannel)->Method(gmmu, method, arg); + GetEngineAtSubchannel(subchannel)->Method(method, arg); } -renderer::TextureBase* Gpu::GetTexture(cpu::IMmu* mmu, +renderer::TextureBase* Gpu::GetTexture(renderer::ICommandBuffer* command_buffer, + cpu::IMmu* mmu, const NvGraphicsBuffer& buff) { LOG_DEBUG(Gpu, "Map id: {}, width: {}, " @@ -94,7 +95,7 @@ renderer::TextureBase* Gpu::GetTexture(cpu::IMmu* mmu, buff.planes[0].kind, buff.planes[0].width, buff.planes[0].height, 1, buff.planes[0].block_height_log2, buff.planes[0].pitch); - return renderer->GetTextureCache().Find(descriptor, + return renderer->GetTextureCache().Find(command_buffer, descriptor, renderer::TextureUsage::Present); } diff --git a/src/core/hw/tegra_x1/gpu/gpu.hpp b/src/core/hw/tegra_x1/gpu/gpu.hpp index 3b86d548..a1a17f7f 100644 --- a/src/core/hw/tegra_x1/gpu/gpu.hpp +++ b/src/core/hw/tegra_x1/gpu/gpu.hpp @@ -18,6 +18,10 @@ class IMmu; namespace hydra::hw::tegra_x1::gpu { +namespace renderer { +class ICommandBuffer; +} + struct MemoryMap { uptr addr = 0; usize size; @@ -28,6 +32,9 @@ struct MemoryMap { constexpr usize SUBCHANNEL_COUNT = 5; // TODO: correct? +inline thread_local GMmu* tls_crnt_gmmu = nullptr; +inline thread_local renderer::ICommandBuffer* tls_crnt_command_buffer = nullptr; + class Gpu { public: static Gpu& GetInstance(); @@ -80,14 +87,15 @@ class Gpu { return engine; } - void SubchannelMethod(GMmu& gmmu, u32 subchannel, u32 method, u32 arg); + void SubchannelMethod(u32 subchannel, u32 method, u32 arg); - void SubchannelFlushMacro(GMmu& gmmu, u32 subchannel) { - GetEngineAtSubchannel(subchannel)->FlushMacro(gmmu); + void SubchannelFlushMacro(u32 subchannel) { + GetEngineAtSubchannel(subchannel)->FlushMacro(); } // Texture - renderer::TextureBase* GetTexture(cpu::IMmu* mmu, + renderer::TextureBase* GetTexture(renderer::ICommandBuffer* command_buffer, + cpu::IMmu* mmu, const NvGraphicsBuffer& buff); // Getters diff --git a/src/core/hw/tegra_x1/gpu/macro/driver_base.cpp b/src/core/hw/tegra_x1/gpu/macro/driver_base.cpp index 730b9971..ba2eb2c3 100644 --- a/src/core/hw/tegra_x1/gpu/macro/driver_base.cpp +++ b/src/core/hw/tegra_x1/gpu/macro/driver_base.cpp @@ -4,8 +4,8 @@ namespace hydra::hw::tegra_x1::gpu::macro { -void DriverBase::Execute(GMmu& gmmu) { - ExecuteImpl(gmmu, start_address_ram[index], param1); +void DriverBase::Execute() { + ExecuteImpl(start_address_ram[index], param1); // TODO: what should happen when there are still parameters in the queue? if (!param_queue.empty()) { @@ -31,7 +31,7 @@ void DriverBase::LoadStartAddressRam(u32 data) { start_address_ram[start_address_ram_ptr++] = data; } -bool DriverBase::ParseInstruction(GMmu& gmmu, u32 pc) { +bool DriverBase::ParseInstruction(u32 pc) { u32 instruction = instruction_ram[pc]; // LOG_DEBUG(Macro, "PC: 0x{:08x}, instruction: 0x{:08x}", pc, instruction); @@ -110,7 +110,7 @@ bool DriverBase::ParseInstruction(GMmu& gmmu, u32 pc) { ResultOperation result_op = static_cast(GET_DATA_U32(4, 3)); u8 rD = GET_REG(8); - InstResult(gmmu, result_op, rD, value); + InstResult(result_op, rD, value); } // Check if exit @@ -128,8 +128,8 @@ void DriverBase::SetMethod(u32 value) { increment = (value >> 12) & 0x3f; } -void DriverBase::Send(GMmu& gmmu, u32 arg) { - engine_3d->Method(gmmu, method, arg); +void DriverBase::Send(u32 arg) { + engine_3d->Method(method, arg); method += increment; } diff --git a/src/core/hw/tegra_x1/gpu/macro/driver_base.hpp b/src/core/hw/tegra_x1/gpu/macro/driver_base.hpp index 2613710e..00e1babc 100644 --- a/src/core/hw/tegra_x1/gpu/macro/driver_base.hpp +++ b/src/core/hw/tegra_x1/gpu/macro/driver_base.hpp @@ -22,7 +22,7 @@ class DriverBase { DriverBase(engines::ThreeD* engine_3d_) : engine_3d{engine_3d_} {} virtual ~DriverBase() = default; - void Execute(GMmu& gmmu); + void Execute(); void LoadInstructionRamPointer(u32 ptr); void LoadInstructionRam(u32 data); @@ -34,7 +34,7 @@ class DriverBase { void LoadParam(u32 data) { param_queue.push(data); } protected: - virtual void ExecuteImpl(GMmu& gmmu, u32 pc, u32 param1) = 0; + virtual void ExecuteImpl(u32 pc, u32 param1) = 0; virtual u32 InstAlu(AluOperation op, u8 rA, u8 rB) = 0; virtual u32 InstAddImmediate(u8 rA, i32 imm) = 0; @@ -44,10 +44,9 @@ class DriverBase { virtual u32 InstRead(u8 rA, u32 imm) = 0; virtual void InstBranch(BranchCondition cond, u8 rA, i32 imm, bool& branched) = 0; - virtual void InstResult(GMmu& gmmu, ResultOperation op, u8 rD, - u32 value) = 0; + virtual void InstResult(ResultOperation op, u8 rD, u32 value) = 0; - bool ParseInstruction(GMmu& gmmu, u32 pc); + bool ParseInstruction(u32 pc); u32 FetchParam() { ASSERT_DEBUG(!param_queue.empty(), Macro, "Parameter queue is empty"); @@ -60,7 +59,7 @@ class DriverBase { u32 Get3DReg(u32 reg_3d); void SetMethod(u32 value); - void Send(GMmu& gmmu, u32 arg); + void Send(u32 arg); private: engines::ThreeD* engine_3d; diff --git a/src/core/hw/tegra_x1/gpu/macro/interpreter/driver.cpp b/src/core/hw/tegra_x1/gpu/macro/interpreter/driver.cpp index 98727bd1..8843b466 100644 --- a/src/core/hw/tegra_x1/gpu/macro/interpreter/driver.cpp +++ b/src/core/hw/tegra_x1/gpu/macro/interpreter/driver.cpp @@ -2,12 +2,12 @@ namespace hydra::hw::tegra_x1::gpu::macro::interpreter { -void Driver::ExecuteImpl(GMmu& gmmu, u32 pc_, u32 param1) { +void Driver::ExecuteImpl(u32 pc_, u32 param1) { pc = pc_; SetRegU32(1, param1); while (true) { - if (ParseInstruction(gmmu, pc)) + if (ParseInstruction(pc)) break; if (pc == branch_after) { @@ -130,7 +130,7 @@ void Driver::InstBranch(BranchCondition cond, u8 rA, i32 imm, bool& branched) { } } -void Driver::InstResult(GMmu& gmmu, ResultOperation op, u8 rD, u32 value) { +void Driver::InstResult(ResultOperation op, u8 rD, u32 value) { LOG_DEBUG(Macro, "result op: {}, r{}, value: 0x{:08x}", op, rD, value); switch (op) { @@ -146,11 +146,11 @@ void Driver::InstResult(GMmu& gmmu, ResultOperation op, u8 rD, u32 value) { break; case ResultOperation::FetchAndSend: SetRegU32(rD, FetchParam()); - Send(gmmu, value); + Send(value); break; case ResultOperation::MoveAndSend: SetRegU32(rD, value); - Send(gmmu, value); + Send(value); break; case ResultOperation::FetchAndSetMethod: SetRegU32(rD, FetchParam()); @@ -159,12 +159,12 @@ void Driver::InstResult(GMmu& gmmu, ResultOperation op, u8 rD, u32 value) { case ResultOperation::MoveAndSetMethodFetchAndSend: SetRegU32(rD, value); SetMethod(value); - Send(gmmu, FetchParam()); + Send(FetchParam()); break; case ResultOperation::MoveAndSetMethodSend: SetRegU32(rD, value); SetMethod(value); - Send(gmmu, (value >> 12) & 0x3f); + Send((value >> 12) & 0x3f); break; } } diff --git a/src/core/hw/tegra_x1/gpu/macro/interpreter/driver.hpp b/src/core/hw/tegra_x1/gpu/macro/interpreter/driver.hpp index cf677d69..89da8b02 100644 --- a/src/core/hw/tegra_x1/gpu/macro/interpreter/driver.hpp +++ b/src/core/hw/tegra_x1/gpu/macro/interpreter/driver.hpp @@ -9,7 +9,7 @@ class Driver : public DriverBase { Driver(engines::ThreeD* engine_3d) : DriverBase(engine_3d) {} protected: - void ExecuteImpl(GMmu& gmmu, u32 pc_, u32 param1) override; + void ExecuteImpl(u32 pc_, u32 param1) override; u32 InstAlu(AluOperation op, u8 rA, u8 rB) override; u32 InstAddImmediate(u8 rA, i32 imm) override; @@ -19,7 +19,7 @@ class Driver : public DriverBase { u32 InstRead(u8 rA, u32 imm) override; void InstBranch(BranchCondition cond, u8 rA, i32 imm, bool& branched) override; - void InstResult(GMmu& gmmu, ResultOperation op, u8 rD, u32 value) override; + void InstResult(ResultOperation op, u8 rD, u32 value) override; private: u32 pc; diff --git a/src/core/hw/tegra_x1/gpu/pfifo.cpp b/src/core/hw/tegra_x1/gpu/pfifo.cpp index b784f4fd..1a65c3d5 100644 --- a/src/core/hw/tegra_x1/gpu/pfifo.cpp +++ b/src/core/hw/tegra_x1/gpu/pfifo.cpp @@ -2,6 +2,7 @@ #include "core/hw/tegra_x1/gpu/const.hpp" #include "core/hw/tegra_x1/gpu/gpu.hpp" +#include "core/hw/tegra_x1/gpu/renderer/command_buffer.hpp" namespace hydra::hw::tegra_x1::gpu { @@ -57,6 +58,14 @@ struct CommandHeader { SecondaryOpcode secondary_opcode : 3; }; +template +T Read(uptr& gpu_addr) { + T word = tls_crnt_gmmu->Load(gpu_addr); + gpu_addr += sizeof(T); + + return word; +} + } // namespace void Pfifo::SubmitEntries(GMmu& gmmu, const std::vector& entries, @@ -66,14 +75,18 @@ void Pfifo::SubmitEntries(GMmu& gmmu, const std::vector& entries, LOG_DEBUG(Gpu, "Flags: {}", flags); RENDERER_INSTANCE.LockMutex(); + tls_crnt_gmmu = &gmmu; + tls_crnt_command_buffer = RENDERER_INSTANCE.CreateCommandBuffer(); for (const auto& entry : entries) { - SubmitEntry(gmmu, entry); + SubmitEntry(entry); } - RENDERER_INSTANCE.EndCommandBuffer(); + delete tls_crnt_command_buffer; + tls_crnt_command_buffer = nullptr; + tls_crnt_gmmu = nullptr; RENDERER_INSTANCE.UnlockMutex(); } -void Pfifo::SubmitEntry(GMmu& gmmu, const GpfifoEntry entry) { +void Pfifo::SubmitEntry(const GpfifoEntry entry) { LOG_DEBUG( Gpu, "Gpfifo entry (addr lo: {:#x}, addr hi: {:#x}, size: {:#x}, allow " @@ -88,7 +101,7 @@ void Pfifo::SubmitEntry(GMmu& gmmu, const GpfifoEntry entry) { while (gpu_addr < end) { try { - if (!SubmitCommand(gmmu, gpu_addr)) + if (!SubmitCommand(gpu_addr)) break; } catch (Gpu::GetEngineAtSubchannelError error) { break; @@ -98,8 +111,8 @@ void Pfifo::SubmitEntry(GMmu& gmmu, const GpfifoEntry entry) { } } -bool Pfifo::SubmitCommand(GMmu& gmmu, uptr& gpu_addr) { - const auto header = Read(gmmu, gpu_addr); +bool Pfifo::SubmitCommand(uptr& gpu_addr) { + const auto header = Read(gpu_addr); LOG_DEBUG( Gpu, "Method: {:#x}, subchannel: {}, arg: {:#x}, secondary opcode: {}", header.method, header.subchannel, header.arg, header.secondary_opcode); @@ -123,7 +136,7 @@ bool Pfifo::SubmitCommand(GMmu& gmmu, uptr& gpu_addr) { } case SecondaryOpcode::IncMethod: for (u32 i = 0; i < header.arg; i++) - ProcessMethodArg(gmmu, header.subchannel, gpu_addr, offset, true); + ProcessMethodArg(header.subchannel, gpu_addr, offset, true); break; case SecondaryOpcode::Grp2UseTert: { const auto tert = static_cast(header.arg & 0x3); @@ -136,15 +149,15 @@ bool Pfifo::SubmitCommand(GMmu& gmmu, uptr& gpu_addr) { } case SecondaryOpcode::NonIncMethod: for (u32 i = 0; i < header.arg; i++) - ProcessMethodArg(gmmu, header.subchannel, gpu_addr, offset, false); + ProcessMethodArg(header.subchannel, gpu_addr, offset, false); break; case SecondaryOpcode::ImmDataMethod: - Gpu::GetInstance().SubchannelMethod(gmmu, header.subchannel, offset, + Gpu::GetInstance().SubchannelMethod(header.subchannel, offset, header.arg); break; case SecondaryOpcode::OneInc: for (u32 i = 0; i < header.arg; i++) - ProcessMethodArg(gmmu, header.subchannel, gpu_addr, offset, i == 0); + ProcessMethodArg(header.subchannel, gpu_addr, offset, i == 0); break; default: LOG_NOT_IMPLEMENTED(Gpu, "Secondary opcode {}", @@ -155,15 +168,15 @@ bool Pfifo::SubmitCommand(GMmu& gmmu, uptr& gpu_addr) { // TODO: is it okay to prefetch the parameters and then execute the // macro? if (header.method >= MACRO_METHODS_REGION) - Gpu::GetInstance().SubchannelFlushMacro(gmmu, header.subchannel); + Gpu::GetInstance().SubchannelFlushMacro(header.subchannel); return true; } -void Pfifo::ProcessMethodArg(GMmu& gmmu, u32 subchannel, uptr& gpu_addr, - u32& method, bool increment) { - u32 arg = Read(gmmu, gpu_addr); - Gpu::GetInstance().SubchannelMethod(gmmu, subchannel, method, arg); +void Pfifo::ProcessMethodArg(u32 subchannel, uptr& gpu_addr, u32& method, + bool increment) { + u32 arg = Read(gpu_addr); + Gpu::GetInstance().SubchannelMethod(subchannel, method, arg); if (increment) method++; } diff --git a/src/core/hw/tegra_x1/gpu/pfifo.hpp b/src/core/hw/tegra_x1/gpu/pfifo.hpp index 44e56532..0e10de00 100644 --- a/src/core/hw/tegra_x1/gpu/pfifo.hpp +++ b/src/core/hw/tegra_x1/gpu/pfifo.hpp @@ -18,20 +18,12 @@ class Pfifo { GpfifoFlags flags); private: - void SubmitEntry(GMmu& gmmu, const GpfifoEntry entry); - bool SubmitCommand(GMmu& gmmu, uptr& gpu_addr); // TODO: return void + void SubmitEntry(const GpfifoEntry entry); + bool SubmitCommand(uptr& gpu_addr); // TODO: return void // Helpers - template - T Read(GMmu& gmmu, uptr& gpu_addr) { - T word = gmmu.Load(gpu_addr); - gpu_addr += sizeof(T); - - return word; - } - - void ProcessMethodArg(GMmu& gmmu, u32 subchannel, uptr& gpu_addr, - u32& method, bool increment); + void ProcessMethodArg(u32 subchannel, uptr& gpu_addr, u32& method, + bool increment); }; } // namespace hydra::hw::tegra_x1::gpu diff --git a/src/core/hw/tegra_x1/gpu/renderer/buffer_base.hpp b/src/core/hw/tegra_x1/gpu/renderer/buffer_base.hpp index 1fcc934a..e3e1822c 100644 --- a/src/core/hw/tegra_x1/gpu/renderer/buffer_base.hpp +++ b/src/core/hw/tegra_x1/gpu/renderer/buffer_base.hpp @@ -4,6 +4,7 @@ namespace hydra::hw::tegra_x1::gpu::renderer { +class ICommandBuffer; class TextureBase; class BufferBase { @@ -20,22 +21,24 @@ class BufferBase { size_ = size - dst_offset; CopyFromImpl(data, dst_offset, size_); } - void CopyFrom(BufferBase* src, u64 dst_offset = 0, u64 src_offset = 0, + void CopyFrom(ICommandBuffer* command_buffer, BufferBase* src, + u64 dst_offset = 0, u64 src_offset = 0, u64 size_ = invalid()) { if (size_ == invalid()) size_ = std::min(src->GetSize() - src_offset, size - dst_offset); - CopyFromImpl(src, dst_offset, src_offset, size_); + CopyFromImpl(command_buffer, src, dst_offset, src_offset, size_); } - virtual void CopyFrom(TextureBase* src, const uint3 src_origin, - const uint3 src_size, u64 dst_offset = 0) = 0; + virtual void CopyFrom(ICommandBuffer* command_buffer, TextureBase* src, + const uint3 src_origin, const uint3 src_size, + u64 dst_offset = 0) = 0; protected: u64 size; // Copying virtual void CopyFromImpl(const uptr data, u64 dst_offset, u64 size) = 0; - virtual void CopyFromImpl(BufferBase* src, u64 dst_offset, u64 src_offset, - u64 size) = 0; + virtual void CopyFromImpl(ICommandBuffer* command_buffer, BufferBase* src, + u64 dst_offset, u64 src_offset, u64 size) = 0; public: GETTER(size, GetSize); diff --git a/src/core/hw/tegra_x1/gpu/renderer/buffer_cache.cpp b/src/core/hw/tegra_x1/gpu/renderer/buffer_cache.cpp index aea6c28c..64e5c6fb 100644 --- a/src/core/hw/tegra_x1/gpu/renderer/buffer_cache.cpp +++ b/src/core/hw/tegra_x1/gpu/renderer/buffer_cache.cpp @@ -10,20 +10,20 @@ BufferCache::~BufferCache() { delete entry.second.buffer; } -BufferView BufferCache::Get(Range range) { +BufferView BufferCache::Get(ICommandBuffer* command_buffer, Range range) { auto& entry = Find(range); if (entry.buffer) { // Check for memory invalidation if (entry.invalidation_range.has_value() && entry.invalidation_range->Intersects(range)) { const auto invalidation_range = entry.invalidation_range.value(); - UpdateRange(entry, invalidation_range); + UpdateRange(command_buffer, entry, invalidation_range); entry.invalidation_range = std::nullopt; } } else { // Create new buffer entry.buffer = RENDERER_INSTANCE.CreateBuffer(entry.range.GetSize()); - UpdateRange(entry, entry.range); + UpdateRange(command_buffer, entry, entry.range); } return BufferView(entry.buffer, range.GetBegin() - entry.range.GetBegin(), @@ -53,7 +53,8 @@ void BufferCache::InvalidateMemory(Range range) { } } -void BufferCache::UpdateRange(BufferEntry& entry, Range range) { +void BufferCache::UpdateRange(ICommandBuffer* command_buffer, + BufferEntry& entry, Range range) { if (entry.inline_copy) { // Do an inline update if possible entry.buffer->CopyFrom(range.GetBegin(), @@ -65,7 +66,7 @@ void BufferCache::UpdateRange(BufferEntry& entry, Range range) { auto tmp_buffer = RENDERER_INSTANCE.AllocateTemporaryBuffer(range.GetSize()); tmp_buffer->CopyFrom(range.GetBegin()); - entry.buffer->CopyFrom(tmp_buffer, + entry.buffer->CopyFrom(command_buffer, tmp_buffer, range.GetBegin() - entry.range.GetBegin(), 0, range.GetSize()); RENDERER_INSTANCE.FreeTemporaryBuffer(tmp_buffer); diff --git a/src/core/hw/tegra_x1/gpu/renderer/buffer_cache.hpp b/src/core/hw/tegra_x1/gpu/renderer/buffer_cache.hpp index 466b11ff..f3dc30f9 100644 --- a/src/core/hw/tegra_x1/gpu/renderer/buffer_cache.hpp +++ b/src/core/hw/tegra_x1/gpu/renderer/buffer_cache.hpp @@ -21,7 +21,7 @@ class BufferCache { public: ~BufferCache(); - BufferView Get(Range range); + BufferView Get(ICommandBuffer* command_buffer, Range range); void InvalidateMemory(Range range); @@ -29,7 +29,8 @@ class BufferCache { std::map entries; // Helpers - static void UpdateRange(BufferEntry& entry, Range range); + static void UpdateRange(ICommandBuffer* command_buffer, BufferEntry& entry, + Range range); BufferEntry& Find(Range range); }; diff --git a/src/core/hw/tegra_x1/gpu/renderer/buffer_view.hpp b/src/core/hw/tegra_x1/gpu/renderer/buffer_view.hpp index 13c559f4..0a8fc35f 100644 --- a/src/core/hw/tegra_x1/gpu/renderer/buffer_view.hpp +++ b/src/core/hw/tegra_x1/gpu/renderer/buffer_view.hpp @@ -23,14 +23,15 @@ struct BufferView { size_ = size - offset; base->CopyFrom(data, offset, size_); } - void CopyFrom(const BufferView& src, u64 size_ = invalid()) { + void CopyFrom(ICommandBuffer* command_buffer, const BufferView& src, + u64 size_ = invalid()) { if (size_ == invalid()) size_ = std::min(src.size - src.offset, size - offset); - base->CopyFrom(src.base, offset, src.offset, size_); + base->CopyFrom(command_buffer, src.base, offset, src.offset, size_); } - void CopyFrom(TextureBase* src, const uint3 src_origin, - const uint3 src_size) { - base->CopyFrom(src, src_origin, src_size, offset); + void CopyFrom(ICommandBuffer* command_buffer, TextureBase* src, + const uint3 src_origin, const uint3 src_size) { + base->CopyFrom(command_buffer, src, src_origin, src_size, offset); } protected: diff --git a/src/core/hw/tegra_x1/gpu/renderer/command_buffer.hpp b/src/core/hw/tegra_x1/gpu/renderer/command_buffer.hpp new file mode 100644 index 00000000..db4c83d0 --- /dev/null +++ b/src/core/hw/tegra_x1/gpu/renderer/command_buffer.hpp @@ -0,0 +1,12 @@ +#pragma once + +#include "core/hw/tegra_x1/gpu/renderer/const.hpp" + +namespace hydra::hw::tegra_x1::gpu::renderer { + +class ICommandBuffer { + public: + virtual ~ICommandBuffer() = default; +}; + +} // namespace hydra::hw::tegra_x1::gpu::renderer diff --git a/src/core/hw/tegra_x1/gpu/renderer/index_cache.cpp b/src/core/hw/tegra_x1/gpu/renderer/index_cache.cpp index 31537825..a17423dd 100644 --- a/src/core/hw/tegra_x1/gpu/renderer/index_cache.cpp +++ b/src/core/hw/tegra_x1/gpu/renderer/index_cache.cpp @@ -96,7 +96,8 @@ IndexCache::~IndexCache() { RENDERER_INSTANCE.FreeTemporaryBuffer(index_buffer); } -BufferView IndexCache::Decode(const IndexDescriptor& descriptor, +BufferView IndexCache::Decode(ICommandBuffer* command_buffer, + const IndexDescriptor& descriptor, engines::IndexType& out_type, engines::PrimitiveType& out_primitive_type, u32& out_count) { @@ -118,7 +119,7 @@ BufferView IndexCache::Decode(const IndexDescriptor& descriptor, } else { \ if (descriptor.mem_range) \ return RENDERER_INSTANCE.GetBufferCache().Get( \ - *descriptor.mem_range); \ + command_buffer, *descriptor.mem_range); \ else \ return BufferView(); \ } \ diff --git a/src/core/hw/tegra_x1/gpu/renderer/index_cache.hpp b/src/core/hw/tegra_x1/gpu/renderer/index_cache.hpp index 923aad75..d3a211b2 100644 --- a/src/core/hw/tegra_x1/gpu/renderer/index_cache.hpp +++ b/src/core/hw/tegra_x1/gpu/renderer/index_cache.hpp @@ -18,7 +18,8 @@ class IndexCache { public: ~IndexCache(); - BufferView Decode(const IndexDescriptor& descriptor, + BufferView Decode(ICommandBuffer* command_buffer, + const IndexDescriptor& descriptor, engines::IndexType& out_type, engines::PrimitiveType& out_primitive_type, u32& out_count); diff --git a/src/core/hw/tegra_x1/gpu/renderer/metal/buffer.cpp b/src/core/hw/tegra_x1/gpu/renderer/metal/buffer.cpp index e4674f08..b3922725 100644 --- a/src/core/hw/tegra_x1/gpu/renderer/metal/buffer.cpp +++ b/src/core/hw/tegra_x1/gpu/renderer/metal/buffer.cpp @@ -1,5 +1,6 @@ #include "core/hw/tegra_x1/gpu/renderer/metal/buffer.hpp" +#include "core/hw/tegra_x1/gpu/renderer/metal/command_buffer.hpp" #include "core/hw/tegra_x1/gpu/renderer/metal/renderer.hpp" #include "core/hw/tegra_x1/gpu/renderer/metal/texture.hpp" @@ -15,11 +16,14 @@ Buffer::Buffer(MTL::Buffer* buffer_) Buffer::~Buffer() { buffer->release(); } -void Buffer::CopyFrom(TextureBase* src, const uint3 src_origin, - const uint3 src_size, u64 dst_offset) { +void Buffer::CopyFrom(ICommandBuffer* command_buffer, TextureBase* src, + const uint3 src_origin, const uint3 src_size, + u64 dst_offset) { + const auto command_buffer_impl = + static_cast(command_buffer); auto src_impl = static_cast(src); - auto blit_encoder = METAL_RENDERER_INSTANCE.GetBlitCommandEncoder(); + auto blit_encoder = command_buffer_impl->GetBlitCommandEncoder(); // TODO: bytes per image // TODO: calculate the stride for the Metal pixel format blit_encoder->copyFromTexture( @@ -37,11 +41,13 @@ void Buffer::CopyFromImpl(const uptr data, u64 dst_offset, u64 size_) { size_); } -void Buffer::CopyFromImpl(BufferBase* src, u64 dst_offset, u64 src_offset, - u64 size_) { +void Buffer::CopyFromImpl(ICommandBuffer* command_buffer, BufferBase* src, + u64 dst_offset, u64 src_offset, u64 size_) { + const auto command_buffer_impl = + static_cast(command_buffer); auto src_impl = static_cast(src); - auto blit_encoder = METAL_RENDERER_INSTANCE.GetBlitCommandEncoder(); + auto blit_encoder = command_buffer_impl->GetBlitCommandEncoder(); blit_encoder->copyFromBuffer(src_impl->GetBuffer(), src_offset, buffer, dst_offset, size_); } diff --git a/src/core/hw/tegra_x1/gpu/renderer/metal/buffer.hpp b/src/core/hw/tegra_x1/gpu/renderer/metal/buffer.hpp index 1a5143ab..231106ce 100644 --- a/src/core/hw/tegra_x1/gpu/renderer/metal/buffer.hpp +++ b/src/core/hw/tegra_x1/gpu/renderer/metal/buffer.hpp @@ -16,16 +16,17 @@ class Buffer final : public BufferBase { } // Copying - void CopyFrom(TextureBase* src, const uint3 src_origin, - const uint3 src_size, u64 dst_offset) override; + void CopyFrom(ICommandBuffer* command_buffer, TextureBase* src, + const uint3 src_origin, const uint3 src_size, + u64 dst_offset) override; private: MTL::Buffer* buffer; // Copying void CopyFromImpl(const uptr data, u64 dst_offset, u64 size_) override; - void CopyFromImpl(BufferBase* src, u64 dst_offset, u64 src_offset, - u64 size_) override; + void CopyFromImpl(ICommandBuffer* command_buffer, BufferBase* src, + u64 dst_offset, u64 src_offset, u64 size_) override; public: GETTER(buffer, GetBuffer); diff --git a/src/core/hw/tegra_x1/gpu/renderer/metal/command_buffer.cpp b/src/core/hw/tegra_x1/gpu/renderer/metal/command_buffer.cpp new file mode 100644 index 00000000..f27bf071 --- /dev/null +++ b/src/core/hw/tegra_x1/gpu/renderer/metal/command_buffer.cpp @@ -0,0 +1,189 @@ +#include "core/hw/tegra_x1/gpu/renderer/metal/command_buffer.hpp" + +namespace hydra::hw::tegra_x1::gpu::renderer::metal { + +CommandBuffer::CommandBuffer(MTL::CommandQueue* command_queue) { + TMP_AUTORELEASE_POOL_BEGIN(); + command_buffer = command_queue->commandBuffer()->retain(); + TMP_AUTORELEASE_POOL_END(); +} + +CommandBuffer::~CommandBuffer() { + EndEncoding(); + command_buffer->commit(); + command_buffer->release(); +} + +MTL::RenderCommandEncoder* CommandBuffer::GetRenderCommandEncoder( + MTL::RenderPassDescriptor* render_pass_descriptor) { + if (render_pass_descriptor == encoder_state.render_pass) + return GetRenderCommandEncoderUnchecked(); + + encoder_state.render_pass = render_pass_descriptor; + encoder_state.render = {}; + + return CreateRenderCommandEncoder(render_pass_descriptor); +} + +MTL::RenderCommandEncoder* CommandBuffer::CreateRenderCommandEncoder( + MTL::RenderPassDescriptor* render_pass_descriptor) { + EndEncoding(); + + TMP_AUTORELEASE_POOL_BEGIN(); + command_encoder = + command_buffer->renderCommandEncoder(render_pass_descriptor)->retain(); + TMP_AUTORELEASE_POOL_END(); + + encoder_type = EncoderType::Render; + encoder_state.render_pass = render_pass_descriptor; + + return GetRenderCommandEncoderUnchecked(); +} + +MTL::BlitCommandEncoder* CommandBuffer::GetBlitCommandEncoder() { + if (encoder_type == EncoderType::Blit) + return GetBlitCommandEncoderUnchecked(); + + EndEncoding(); + + TMP_AUTORELEASE_POOL_BEGIN(); + command_encoder = command_buffer->blitCommandEncoder()->retain(); + TMP_AUTORELEASE_POOL_END(); + + encoder_type = EncoderType::Blit; + + return GetBlitCommandEncoderUnchecked(); +} + +void CommandBuffer::EndEncoding() { + if (encoder_type == EncoderType::None) + return; + + command_encoder->endEncoding(); + command_encoder->release(); + command_encoder = nullptr; + encoder_type = EncoderType::None; + + // Reset the render pass + encoder_state.render_pass = nullptr; +} + +void CommandBuffer::SetRenderPipelineState(MTL::RenderPipelineState* pipeline) { + auto& bound_pipeline = encoder_state.render.pipeline; + if (pipeline == bound_pipeline) + return; + + GetRenderCommandEncoderUnchecked()->setRenderPipelineState(pipeline); + bound_pipeline = pipeline; +} + +void CommandBuffer::SetDepthStencilState( + MTL::DepthStencilState* depth_stencil_state) { + auto& bound_depth_stencil_state = encoder_state.render.depth_stencil_state; + if (depth_stencil_state == bound_depth_stencil_state) + return; + + GetRenderCommandEncoderUnchecked()->setDepthStencilState( + depth_stencil_state); + bound_depth_stencil_state = depth_stencil_state; +} + +void CommandBuffer::SetCullMode(MTL::CullMode cull_mode) { + auto& bound_cull_mode = encoder_state.render.cull_mode; + if (cull_mode == bound_cull_mode) + return; + + GetRenderCommandEncoderUnchecked()->setCullMode(cull_mode); + bound_cull_mode = cull_mode; +} + +void CommandBuffer::SetFrontFaceWinding(MTL::Winding front_face_winding) { + auto& bound_front_face_winding = encoder_state.render.front_face_winding; + if (front_face_winding == bound_front_face_winding) + return; + + GetRenderCommandEncoderUnchecked()->setFrontFacingWinding( + front_face_winding); + bound_front_face_winding = front_face_winding; +} + +void CommandBuffer::SetBuffer(MTL::Buffer* buffer, u64 offset, + ShaderType shader_type, u32 index) { + ASSERT_DEBUG(index < BUFFER_COUNT, MetalRenderer, "Invalid buffer index {}", + index); + + auto& bound_buffer = + encoder_state.render.buffers[static_cast(shader_type)][index]; + if (buffer == bound_buffer.buffer && offset == bound_buffer.offset) + return; + + // TODO: fast path for offset only change + + switch (shader_type) { + case ShaderType::Vertex: + GetRenderCommandEncoderUnchecked()->setVertexBuffer(buffer, offset, + index); + break; + case ShaderType::Fragment: + GetRenderCommandEncoderUnchecked()->setFragmentBuffer(buffer, offset, + index); + break; + default: + LOG_ERROR(MetalRenderer, "Invalid shader type {}", shader_type); + break; + } + bound_buffer.buffer = buffer; + bound_buffer.offset = offset; +} + +void CommandBuffer::SetTexture(MTL::Texture* texture, ShaderType shader_type, + u32 index) { + ASSERT_DEBUG(index < TEXTURE_COUNT, MetalRenderer, + "Invalid texture index {}", index); + + auto& bound_texture = + encoder_state.render.textures[static_cast(shader_type)][index]; + if (texture == bound_texture) + return; + + switch (shader_type) { + case ShaderType::Vertex: + GetRenderCommandEncoderUnchecked()->setVertexTexture(texture, index); + break; + case ShaderType::Fragment: + GetRenderCommandEncoderUnchecked()->setFragmentTexture(texture, index); + break; + default: + LOG_ERROR(MetalRenderer, "Invalid shader type {}", shader_type); + break; + } + bound_texture = texture; +} + +void CommandBuffer::SetSampler(MTL::SamplerState* sampler, + ShaderType shader_type, u32 index) { + ASSERT_DEBUG(index < TEXTURE_COUNT, MetalRenderer, + "Invalid texture index {}", index); + + auto& bound_sampler = + encoder_state.render.samplers[static_cast(shader_type)][index]; + if (sampler == bound_sampler) + return; + + switch (shader_type) { + case ShaderType::Vertex: + GetRenderCommandEncoderUnchecked()->setVertexSamplerState(sampler, + index); + break; + case ShaderType::Fragment: + GetRenderCommandEncoderUnchecked()->setFragmentSamplerState(sampler, + index); + break; + default: + LOG_ERROR(MetalRenderer, "Invalid shader type {}", shader_type); + break; + } + bound_sampler = sampler; +} + +} // namespace hydra::hw::tegra_x1::gpu::renderer::metal diff --git a/src/core/hw/tegra_x1/gpu/renderer/metal/command_buffer.hpp b/src/core/hw/tegra_x1/gpu/renderer/metal/command_buffer.hpp new file mode 100644 index 00000000..cea18461 --- /dev/null +++ b/src/core/hw/tegra_x1/gpu/renderer/metal/command_buffer.hpp @@ -0,0 +1,87 @@ +#pragma once + +#include "core/hw/tegra_x1/gpu/renderer/command_buffer.hpp" +#include "core/hw/tegra_x1/gpu/renderer/metal/const.hpp" + +namespace hydra::hw::tegra_x1::gpu::renderer::metal { + +enum class EncoderType { + None, + Render, + Compute, + Blit, +}; + +struct MtlBufferState { + MTL::Buffer* buffer{nullptr}; + u64 offset{0}; +}; + +struct EncoderRenderState { + MTL::RenderPipelineState* pipeline{nullptr}; + MTL::DepthStencilState* depth_stencil_state{nullptr}; + MTL::CullMode cull_mode{MTL::CullModeNone}; + MTL::Winding front_face_winding{MTL::WindingClockwise}; + std::array, + usize(ShaderType::Count)> + buffers{}; + std::array, + usize(ShaderType::Count)> + textures{}; + std::array, + usize(ShaderType::Count)> + samplers{}; +}; + +struct EncoderState { + MTL::RenderPassDescriptor* render_pass{nullptr}; + EncoderRenderState render{}; +}; + +class CommandBuffer final : public ICommandBuffer { + public: + CommandBuffer(MTL::CommandQueue* command_queue); + ~CommandBuffer() override; + + MTL::RenderCommandEncoder* GetRenderCommandEncoderUnchecked() { + ASSERT_DEBUG(encoder_type == EncoderType::Render, MetalRenderer, + "Render command encoder not active"); + return static_cast(command_encoder); + } + MTL::RenderCommandEncoder* + GetRenderCommandEncoder(MTL::RenderPassDescriptor* render_pass_descriptor); + MTL::RenderCommandEncoder* CreateRenderCommandEncoder( + MTL::RenderPassDescriptor* render_pass_descriptor); + + MTL::BlitCommandEncoder* GetBlitCommandEncoderUnchecked() { + ASSERT_DEBUG(encoder_type == EncoderType::Blit, MetalRenderer, + "Blit command encoder not active"); + return static_cast(command_encoder); + } + MTL::BlitCommandEncoder* GetBlitCommandEncoder(); + + void EndEncoding(); + + // Encoder state setting + void SetRenderPipelineState(MTL::RenderPipelineState* pipeline); + void SetDepthStencilState(MTL::DepthStencilState* depth_stencil_state); + void SetCullMode(MTL::CullMode cull_mode); + void SetFrontFaceWinding(MTL::Winding front_face_winding); + void SetBuffer(MTL::Buffer* buffer, u64 offset, ShaderType shader_type, + u32 index); + void SetTexture(MTL::Texture* texture, ShaderType shader_type, u32 index); + void SetSampler(MTL::SamplerState* sampler, ShaderType shader_type, + u32 index); + + private: + MTL::CommandBuffer* command_buffer{nullptr}; + MTL::CommandEncoder* command_encoder{nullptr}; + EncoderType encoder_type{EncoderType::None}; + + EncoderState encoder_state{}; + + public: + GETTER(command_buffer, GetCommandBuffer); +}; + +} // namespace hydra::hw::tegra_x1::gpu::renderer::metal diff --git a/src/core/hw/tegra_x1/gpu/renderer/metal/renderer.cpp b/src/core/hw/tegra_x1/gpu/renderer/metal/renderer.cpp index a1d44f16..05c16b8f 100644 --- a/src/core/hw/tegra_x1/gpu/renderer/metal/renderer.cpp +++ b/src/core/hw/tegra_x1/gpu/renderer/metal/renderer.cpp @@ -3,6 +3,7 @@ #include "common/config.hpp" #include "core/hw/tegra_x1/gpu/engines/3d.hpp" #include "core/hw/tegra_x1/gpu/renderer/metal/buffer.hpp" +#include "core/hw/tegra_x1/gpu/renderer/metal/command_buffer.hpp" #include "core/hw/tegra_x1/gpu/renderer/metal/const.hpp" #include "core/hw/tegra_x1/gpu/renderer/metal/maxwell_to_mtl.hpp" #include "core/hw/tegra_x1/gpu/renderer/metal/pipeline.hpp" @@ -138,7 +139,7 @@ ISurfaceCompositor* Renderer::AcquireNextSurface() { if (!drawable) return nullptr; - return new SurfaceCompositor(drawable, command_queue); + return new SurfaceCompositor(drawable); } BufferBase* Renderer::CreateBuffer(u64 size) { return new Buffer(size); } @@ -164,7 +165,9 @@ SamplerBase* Renderer::CreateSampler(const SamplerDescriptor& descriptor) { return new Sampler(descriptor); } -void Renderer::EndCommandBuffer() { CommitCommandBuffer(); } +ICommandBuffer* Renderer::CreateCommandBuffer() { + return new CommandBuffer(command_queue); +} RenderPassBase* Renderer::CreateRenderPass(const RenderPassDescriptor& descriptor) { @@ -175,8 +178,10 @@ void Renderer::BindRenderPass(const RenderPassBase* render_pass) { state.render_pass = static_cast(render_pass); } -void Renderer::ClearColor(u32 render_target_id, u32 layer, u8 mask, - const uint4 color) { +void Renderer::ClearColor(ICommandBuffer* command_buffer, u32 render_target_id, + u32 layer, u8 mask, const uint4 color) { + const auto command_buffer_impl = + static_cast(command_buffer); auto texture = static_cast(state.render_pass->GetDescriptor() .color_targets[render_target_id] .texture); @@ -192,10 +197,11 @@ void Renderer::ClearColor(u32 render_target_id, u32 layer, u8 mask, ASSERT_DEBUG(layer == 0, MetalRenderer, "Layered clears (layer: {}) not implemented", layer); - auto encoder = GetRenderCommandEncoder(); + auto encoder = GetRenderCommandEncoder(command_buffer_impl); - SetRenderPipelineState(clear_color_pipeline_cache->Find( - {texture->GetPixelFormat(), render_target_id, mask})); + command_buffer_impl->SetRenderPipelineState( + clear_color_pipeline_cache->Find( + {texture->GetPixelFormat(), render_target_id, mask})); // TODO: set viewport and scissor encoder->setVertexBytes(&render_target_id, sizeof(render_target_id), 0); encoder->setFragmentBytes(&color, sizeof(color), 0); @@ -203,7 +209,10 @@ void Renderer::ClearColor(u32 render_target_id, u32 layer, u8 mask, NS::UInteger(3)); } -void Renderer::ClearDepth(u32 layer, const float value) { +void Renderer::ClearDepth(ICommandBuffer* command_buffer, u32 layer, + const float value) { + const auto command_buffer_impl = + static_cast(command_buffer); auto texture = static_cast( state.render_pass->GetDescriptor().depth_stencil_target.texture); @@ -221,11 +230,12 @@ void Renderer::ClearDepth(u32 layer, const float value) { return; } - auto encoder = GetRenderCommandEncoder(); + auto encoder = GetRenderCommandEncoder(command_buffer_impl); - SetRenderPipelineState( + command_buffer_impl->SetRenderPipelineState( clear_depth_pipeline_cache->Find(texture->GetPixelFormat())); - SetDepthStencilState(depth_stencil_state_always_and_write); + command_buffer_impl->SetDepthStencilState( + depth_stencil_state_always_and_write); // TODO: set viewport and scissor struct { u32 layer_id; @@ -236,7 +246,9 @@ void Renderer::ClearDepth(u32 layer, const float value) { NS::UInteger(3)); } -void Renderer::ClearStencil(u32 layer, const u32 value) { +void Renderer::ClearStencil(ICommandBuffer* command_buffer, u32 layer, + const u32 value) { + (void)command_buffer; ONCE(LOG_FUNC_WITH_ARGS_NOT_IMPLEMENTED( MetalRenderer, "layer: {}, value: {:#x}", layer, value)); } @@ -306,33 +318,41 @@ void Renderer::UnbindTextures(ShaderType shader_type) { state.textures[u32(shader_type)] = {}; } -void Renderer::Draw(const engines::PrimitiveType primitive_type, +void Renderer::Draw(ICommandBuffer* command_buffer, + const engines::PrimitiveType primitive_type, const u32 start, const u32 count, const u32 base_instance, const u32 instance_count) { + const auto command_buffer_impl = + static_cast(command_buffer); + // Check for errors if (!CanDraw()) return; - BindDrawState(); + BindDrawState(command_buffer_impl); - auto encoder = GetRenderCommandEncoderUnchecked(); + auto encoder = command_buffer_impl->GetRenderCommandEncoderUnchecked(); // Draw encoder->drawPrimitives(to_mtl_primitive_type(primitive_type), start, count, instance_count, base_instance); } -void Renderer::DrawIndexed(const engines::PrimitiveType primitive_type, +void Renderer::DrawIndexed(ICommandBuffer* command_buffer, + const engines::PrimitiveType primitive_type, const u32 start, const u32 count, const u32 base_vertex, const u32 base_instance, const u32 instance_count) { + const auto command_buffer_impl = + static_cast(command_buffer); + // Check for errors if (!CanDraw()) return; - BindDrawState(); + BindDrawState(command_buffer_impl); - auto encoder = GetRenderCommandEncoderUnchecked(); + auto encoder = command_buffer_impl->GetRenderCommandEncoderUnchecked(); // Draw auto index_buffer_mtl = @@ -348,175 +368,28 @@ void Renderer::DrawIndexed(const engines::PrimitiveType primitive_type, instance_count, base_vertex, base_instance); } -void Renderer::EnsureCommandBuffer() { - if (!command_buffer) { - TMP_AUTORELEASE_POOL_BEGIN(); - command_buffer = command_queue->commandBuffer()->retain(); - TMP_AUTORELEASE_POOL_END(); - } +MTL::RenderCommandEncoder* +Renderer::GetRenderCommandEncoder(CommandBuffer* command_buffer) { + return command_buffer->GetRenderCommandEncoder( + state.render_pass->GetRenderPassDescriptor()); } -MTL::RenderCommandEncoder* Renderer::GetRenderCommandEncoder() { - auto mtl_render_pass = state.render_pass->GetRenderPassDescriptor(); - if (mtl_render_pass == encoder_state.render_pass) - return GetRenderCommandEncoderUnchecked(); - - encoder_state.render_pass = mtl_render_pass; - encoder_state.render = {}; - - // Reset bindings - encoder_state.render.buffers = {}; - encoder_state.render.textures = {}; - encoder_state.render.samplers = {}; - - return CreateRenderCommandEncoder(mtl_render_pass); +void Renderer::SetRenderPipelineState(CommandBuffer* command_buffer) { + command_buffer->SetRenderPipelineState(state.pipeline->GetPipeline()); } -MTL::RenderCommandEncoder* Renderer::CreateRenderCommandEncoder( - MTL::RenderPassDescriptor* render_pass_descriptor) { - EnsureCommandBuffer(); - EndEncoding(); - - TMP_AUTORELEASE_POOL_BEGIN(); - command_encoder = - command_buffer->renderCommandEncoder(render_pass_descriptor)->retain(); - TMP_AUTORELEASE_POOL_END(); - - encoder_type = EncoderType::Render; - encoder_state.render_pass = render_pass_descriptor; - - // HACK: bind null textures - for (u32 i = 0; i < TEXTURE_COUNT; i++) { - GetRenderCommandEncoderUnchecked()->setVertexTexture(null_texture, i); - GetRenderCommandEncoderUnchecked()->setFragmentTexture(null_texture, i); - } - - return GetRenderCommandEncoderUnchecked(); -} - -MTL::BlitCommandEncoder* Renderer::GetBlitCommandEncoder() { - if (encoder_type == EncoderType::Blit) - return GetBlitCommandEncoderUnchecked(); - - EnsureCommandBuffer(); - EndEncoding(); - - TMP_AUTORELEASE_POOL_BEGIN(); - command_encoder = command_buffer->blitCommandEncoder()->retain(); - TMP_AUTORELEASE_POOL_END(); - - encoder_type = EncoderType::Blit; - - return GetBlitCommandEncoderUnchecked(); -} - -void Renderer::EndEncoding() { - if (encoder_type == EncoderType::None) - return; - - command_encoder->endEncoding(); - command_encoder->release(); - command_encoder = nullptr; - encoder_type = EncoderType::None; - - // Reset the render pass - encoder_state.render_pass = nullptr; -} - -void Renderer::SetRenderPipelineState(MTL::RenderPipelineState* pipeline) { - auto& bound_pipeline = encoder_state.render.pipeline; - if (pipeline == bound_pipeline) - return; - - GetRenderCommandEncoderUnchecked()->setRenderPipelineState(pipeline); - bound_pipeline = pipeline; -} - -void Renderer::SetRenderPipelineState() { - SetRenderPipelineState(state.pipeline->GetPipeline()); -} - -void Renderer::SetDepthStencilState( - MTL::DepthStencilState* depth_stencil_state) { - auto& bound_depth_stencil_state = encoder_state.render.depth_stencil_state; - if (depth_stencil_state == bound_depth_stencil_state) - return; - - GetRenderCommandEncoderUnchecked()->setDepthStencilState( - depth_stencil_state); - bound_depth_stencil_state = depth_stencil_state; -} - -void Renderer::SetDepthStencilState() { +void Renderer::SetDepthStencilState(CommandBuffer* command_buffer) { DepthStencilStateDescriptor descriptor{ .depth_test_enabled = static_cast(REGS_3D.depth_test_enabled), .depth_write_enabled = static_cast(REGS_3D.depth_write_enabled), .depth_compare_op = REGS_3D.depth_compare_op, }; - SetDepthStencilState(depth_stencil_state_cache->Find(descriptor)); -} - -void Renderer::SetCullMode(MTL::CullMode cull_mode) { - auto& bound_cull_mode = encoder_state.render.cull_mode; - if (cull_mode == bound_cull_mode) - return; - - GetRenderCommandEncoderUnchecked()->setCullMode(cull_mode); - bound_cull_mode = cull_mode; -} - -void Renderer::SetFrontFaceWinding(MTL::Winding front_face_winding) { - auto& bound_front_face_winding = encoder_state.render.front_face_winding; - if (front_face_winding == bound_front_face_winding) - return; - - GetRenderCommandEncoderUnchecked()->setFrontFacingWinding( - front_face_winding); - bound_front_face_winding = front_face_winding; + command_buffer->SetDepthStencilState( + depth_stencil_state_cache->Find(descriptor)); } -void Renderer::SetCullState() { - /* - if (REGS_3D.cull_face_enabled) { - SetCullMode(ToMtlCullMode(REGS_3D.cull_face_mode)); - SetFrontFaceWinding(ToMtlWinding(REGS_3D.front_face_winding)); - } else { - SetCullMode(MTL::CullModeNone); - } - */ -} - -void Renderer::SetBuffer(MTL::Buffer* buffer, u64 offset, - ShaderType shader_type, u32 index) { - ASSERT_DEBUG(index < BUFFER_COUNT, MetalRenderer, "Invalid buffer index {}", - index); - - auto& bound_buffer = - encoder_state.render.buffers[static_cast(shader_type)][index]; - if (buffer == bound_buffer.buffer && offset == bound_buffer.offset) - return; - - // TODO: fast path for offset only change - - switch (shader_type) { - case ShaderType::Vertex: - GetRenderCommandEncoderUnchecked()->setVertexBuffer(buffer, offset, - index); - break; - case ShaderType::Fragment: - GetRenderCommandEncoderUnchecked()->setFragmentBuffer(buffer, offset, - index); - break; - default: - LOG_ERROR(MetalRenderer, "Invalid shader type {}", shader_type); - break; - } - bound_buffer.buffer = buffer; - bound_buffer.offset = offset; -} - -void Renderer::SetVertexBuffer(u32 index) { +void Renderer::SetVertexBuffer(CommandBuffer* command_buffer, u32 index) { ASSERT_DEBUG(index < VERTEX_ARRAY_COUNT, MetalRenderer, "Invalid vertex buffer index {}", index); @@ -524,12 +397,13 @@ void Renderer::SetVertexBuffer(u32 index) { if (!buffer.GetBase()) return; - SetBuffer(static_cast(buffer.GetBase())->GetBuffer(), - buffer.GetOffset(), ShaderType::Vertex, - GetVertexBufferIndex(index)); + command_buffer->SetBuffer( + static_cast(buffer.GetBase())->GetBuffer(), buffer.GetOffset(), + ShaderType::Vertex, GetVertexBufferIndex(index)); } -void Renderer::SetUniformBuffer(ShaderType shader_type, u32 index) { +void Renderer::SetUniformBuffer(CommandBuffer* command_buffer, + ShaderType shader_type, u32 index) { // TODO: get the index from resource mapping ASSERT_DEBUG(index < CONST_BUFFER_BINDING_COUNT, MetalRenderer, @@ -540,73 +414,27 @@ void Renderer::SetUniformBuffer(ShaderType shader_type, u32 index) { if (!buffer.GetBase()) return; - SetBuffer(static_cast(buffer.GetBase())->GetBuffer(), - buffer.GetOffset(), shader_type, index); + command_buffer->SetBuffer( + static_cast(buffer.GetBase())->GetBuffer(), buffer.GetOffset(), + shader_type, index); } -void Renderer::SetTexture(MTL::Texture* texture, ShaderType shader_type, +void Renderer::SetTexture(CommandBuffer* command_buffer, ShaderType shader_type, u32 index) { - ASSERT_DEBUG(index < TEXTURE_COUNT, MetalRenderer, - "Invalid texture index {}", index); - - auto& bound_texture = - encoder_state.render.textures[static_cast(shader_type)][index]; - if (texture == bound_texture) - return; - - switch (shader_type) { - case ShaderType::Vertex: - GetRenderCommandEncoderUnchecked()->setVertexTexture(texture, index); - break; - case ShaderType::Fragment: - GetRenderCommandEncoderUnchecked()->setFragmentTexture(texture, index); - break; - default: - LOG_ERROR(MetalRenderer, "Invalid shader type {}", shader_type); - break; - } - bound_texture = texture; -} - -void Renderer::SetSampler(MTL::SamplerState* sampler, ShaderType shader_type, - u32 index) { - ASSERT_DEBUG(index < TEXTURE_COUNT, MetalRenderer, - "Invalid texture index {}", index); - - auto& bound_sampler = - encoder_state.render.samplers[static_cast(shader_type)][index]; - if (sampler == bound_sampler) - return; - - switch (shader_type) { - case ShaderType::Vertex: - GetRenderCommandEncoderUnchecked()->setVertexSamplerState(sampler, - index); - break; - case ShaderType::Fragment: - GetRenderCommandEncoderUnchecked()->setFragmentSamplerState(sampler, - index); - break; - default: - LOG_ERROR(MetalRenderer, "Invalid shader type {}", shader_type); - break; - } - bound_sampler = sampler; -} - -void Renderer::SetTexture(ShaderType shader_type, u32 index) { const auto texture = state.textures[u32(shader_type)][index]; if (texture.texture) - SetTexture(texture.texture->GetTexture(), shader_type, index); + command_buffer->SetTexture(texture.texture->GetTexture(), shader_type, + index); if (texture.sampler) - SetSampler(texture.sampler->GetSampler(), shader_type, index); + command_buffer->SetSampler(texture.sampler->GetSampler(), shader_type, + index); } // TODO: what about 3D textures? -void Renderer::BlitTexture(MTL::Texture* src, const float3 src_origin, - const usize3 src_size, MTL::Texture* dst, - const u32 dst_layer, const float3 dst_origin, - const usize3 dst_size) { +void Renderer::BlitTexture(CommandBuffer* command_buffer, MTL::Texture* src, + const float3 src_origin, const usize3 src_size, + MTL::Texture* dst, const u32 dst_layer, + const float3 dst_origin, const usize3 dst_size) { // Render pass auto render_pass_descriptor = MTL::RenderPassDescriptor::alloc()->init(); auto color_attachment = @@ -617,7 +445,8 @@ void Renderer::BlitTexture(MTL::Texture* src, const float3 src_origin, // texture color_attachment->setStoreAction(MTL::StoreActionStore); - auto encoder = CreateRenderCommandEncoder(render_pass_descriptor); + auto encoder = + command_buffer->CreateRenderCommandEncoder(render_pass_descriptor); render_pass_descriptor->release(); // Draw @@ -695,8 +524,6 @@ void Renderer::BeginCapture() { } void Renderer::EndCapture() { - CommitCommandBuffer(); - auto captureManager = MTL::CaptureManager::sharedCaptureManager(); captureManager->stopCapture(); } @@ -710,13 +537,21 @@ bool Renderer::CanDraw() { return true; } -void Renderer::BindDrawState() { - auto encoder = GetRenderCommandEncoder(); +void Renderer::BindDrawState(CommandBuffer* command_buffer) { + auto encoder = GetRenderCommandEncoder(command_buffer); // States - SetRenderPipelineState(); - SetDepthStencilState(); - SetCullState(); + SetRenderPipelineState(command_buffer); + SetDepthStencilState(command_buffer); + + /* + if (REGS_3D.cull_face_enabled) { + SetCullMode(ToMtlCullMode(REGS_3D.cull_face_mode)); + SetFrontFaceWinding(ToMtlWinding(REGS_3D.front_face_winding)); + } else { + SetCullMode(MTL::CullModeNone); + } + */ // Viewport and scissor MTL::Viewport viewports[VIEWPORT_COUNT]; @@ -739,17 +574,17 @@ void Renderer::BindDrawState() { // Resources for (u32 i = 0; i < VERTEX_ARRAY_COUNT; i++) - SetVertexBuffer(i); + SetVertexBuffer(command_buffer, i); for (u32 shader_type = 0; shader_type < usize(ShaderType::Count); shader_type++) { for (u32 i = 0; i < CONST_BUFFER_BINDING_COUNT; i++) - SetUniformBuffer(ShaderType(shader_type), i); + SetUniformBuffer(command_buffer, ShaderType(shader_type), i); } // TODO: storage buffers for (u32 shader_type = 0; shader_type < usize(ShaderType::Count); shader_type++) { for (u32 i = 0; i < TEXTURE_COUNT; i++) - SetTexture(ShaderType(shader_type), i); + SetTexture(command_buffer, ShaderType(shader_type), i); } } diff --git a/src/core/hw/tegra_x1/gpu/renderer/metal/renderer.hpp b/src/core/hw/tegra_x1/gpu/renderer/metal/renderer.hpp index 532405b8..fbd714c8 100644 --- a/src/core/hw/tegra_x1/gpu/renderer/metal/renderer.hpp +++ b/src/core/hw/tegra_x1/gpu/renderer/metal/renderer.hpp @@ -12,19 +12,13 @@ namespace hydra::hw::tegra_x1::gpu::renderer::metal { +class CommandBuffer; class Buffer; class Texture; class Sampler; class RenderPass; class Pipeline; -enum class EncoderType { - None, - Render, - Compute, - Blit, -}; - struct CombinedTextureSampler { const Texture* texture{nullptr}; const Sampler* sampler{nullptr}; @@ -47,32 +41,6 @@ struct State { // TODO: images }; -struct MtlBufferState { - MTL::Buffer* buffer{nullptr}; - u64 offset{0}; -}; - -struct EncoderRenderState { - MTL::RenderPipelineState* pipeline{nullptr}; - MTL::DepthStencilState* depth_stencil_state{nullptr}; - MTL::CullMode cull_mode{MTL::CullModeNone}; - MTL::Winding front_face_winding{MTL::WindingClockwise}; - std::array, - usize(ShaderType::Count)> - buffers{}; - std::array, - usize(ShaderType::Count)> - textures{}; - std::array, - usize(ShaderType::Count)> - samplers{}; -}; - -struct EncoderState { - MTL::RenderPassDescriptor* render_pass{nullptr}; - EncoderRenderState render{}; -}; - class Renderer : public RendererBase { public: static Renderer& GetInstance(); @@ -96,7 +64,7 @@ class Renderer : public RendererBase { SamplerBase* CreateSampler(const SamplerDescriptor& descriptor) override; // Command buffer - void EndCommandBuffer() override; + ICommandBuffer* CreateCommandBuffer() override; // Render pass RenderPassBase* @@ -104,10 +72,12 @@ class Renderer : public RendererBase { void BindRenderPass(const RenderPassBase* render_pass) override; // Clear - void ClearColor(u32 render_target_id, u32 layer, u8 mask, - const uint4 color) override; - void ClearDepth(u32 layer, const float value) override; - void ClearStencil(u32 layer, const u32 value) override; + void ClearColor(ICommandBuffer* command_buffer, u32 render_target_id, + u32 layer, u8 mask, const uint4 color) override; + void ClearDepth(ICommandBuffer* command_buffer, u32 layer, + const float value) override; + void ClearStencil(ICommandBuffer* command_buffer, u32 layer, + const u32 value) override; // Viewport and scissor void SetViewport(u32 index, const Viewport& viewport) override; @@ -134,71 +104,33 @@ class Renderer : public RendererBase { void UnbindTextures(ShaderType shader_type) override; // Draw - void Draw(const engines::PrimitiveType primitive_type, const u32 start, + void Draw(ICommandBuffer* command_buffer, + const engines::PrimitiveType primitive_type, const u32 start, const u32 count, const u32 base_instance, const u32 instance_count) override; - void DrawIndexed(const engines::PrimitiveType primitive_type, + void DrawIndexed(ICommandBuffer* command_buffer, + const engines::PrimitiveType primitive_type, const u32 start, const u32 count, const u32 base_vertex, const u32 base_instance, const u32 instance_count) override; // Helpers - - // Command buffer - void EnsureCommandBuffer(); - - void CommitCommandBuffer() { - if (command_buffer) { - EndEncoding(); - - command_buffer->commit(); - // HACK: wait until completed so as to avoid sync issues - command_buffer->waitUntilCompleted(); - command_buffer->release(); - command_buffer = nullptr; - } - } - - MTL::RenderCommandEncoder* GetRenderCommandEncoderUnchecked() { - ASSERT_DEBUG(encoder_type == EncoderType::Render, MetalRenderer, - "Render command encoder not active"); - return static_cast(command_encoder); - } - MTL::RenderCommandEncoder* GetRenderCommandEncoder(); - MTL::RenderCommandEncoder* CreateRenderCommandEncoder( - MTL::RenderPassDescriptor* render_pass_descriptor); - - MTL::BlitCommandEncoder* GetBlitCommandEncoderUnchecked() { - ASSERT_DEBUG(encoder_type == EncoderType::Blit, MetalRenderer, - "Blit command encoder not active"); - return static_cast(command_encoder); - } - MTL::BlitCommandEncoder* GetBlitCommandEncoder(); - - void EndEncoding(); + MTL::RenderCommandEncoder* + GetRenderCommandEncoder(CommandBuffer* command_buffer); // Encoder state setting - void SetRenderPipelineState(MTL::RenderPipelineState* pipeline); - void SetRenderPipelineState(); - void SetDepthStencilState(MTL::DepthStencilState* depth_stencil_state); - void SetDepthStencilState(); - void SetCullMode(MTL::CullMode cull_mode); - void SetFrontFaceWinding(MTL::Winding front_face_winding); - void SetCullState(); - void SetBuffer(MTL::Buffer* buffer, u64 offset, ShaderType shader_type, - u32 index); - void SetVertexBuffer(u32 index); - void SetUniformBuffer(ShaderType shader_type, u32 index); - void SetTexture(MTL::Texture* texture, ShaderType shader_type, u32 index); - void SetSampler(MTL::SamplerState* sampler, ShaderType shader_type, + void SetRenderPipelineState(CommandBuffer* command_buffer); + void SetDepthStencilState(CommandBuffer* command_buffer); + void SetVertexBuffer(CommandBuffer* command_buffer, u32 index); + void SetUniformBuffer(CommandBuffer* command_buffer, ShaderType shader_type, + u32 index); + void SetTexture(CommandBuffer* command_buffer, ShaderType shader_type, u32 index); - void SetTexture(ShaderType shader_type, u32 index); - // Other - void BlitTexture(MTL::Texture* src, const float3 src_origin, - const usize3 src_size, MTL::Texture* dst, - const u32 dst_layer, const float3 dst_origin, - const usize3 dst_size); + void BlitTexture(CommandBuffer* command_buffer, MTL::Texture* src, + const float3 src_origin, const usize3 src_size, + MTL::Texture* dst, const u32 dst_layer, + const float3 dst_origin, const usize3 dst_size); protected: // Capture @@ -230,21 +162,15 @@ class Renderer : public RendererBase { // Null MTL::Texture* null_texture; - // Command buffer - MTL::CommandBuffer* command_buffer{nullptr}; - MTL::CommandEncoder* command_encoder{nullptr}; - EncoderType encoder_type{EncoderType::None}; - // State State state; [[maybe_unused]] u32 padding[0x100]; // HACK: for some reason, writing to some fields of the // encoder_state corrupts the state - EncoderState encoder_state; // Helpers bool CanDraw(); - void BindDrawState(); + void BindDrawState(CommandBuffer* command_buffer); public: GETTER(device, GetDevice); diff --git a/src/core/hw/tegra_x1/gpu/renderer/metal/surface_compositor.cpp b/src/core/hw/tegra_x1/gpu/renderer/metal/surface_compositor.cpp index ad70b0ad..b3663c6f 100644 --- a/src/core/hw/tegra_x1/gpu/renderer/metal/surface_compositor.cpp +++ b/src/core/hw/tegra_x1/gpu/renderer/metal/surface_compositor.cpp @@ -1,52 +1,37 @@ #include "core/hw/tegra_x1/gpu/renderer/metal/surface_compositor.hpp" #include "core/hw/tegra_x1/gpu/renderer/metal/blit_pipeline_cache.hpp" +#include "core/hw/tegra_x1/gpu/renderer/metal/command_buffer.hpp" #include "core/hw/tegra_x1/gpu/renderer/metal/renderer.hpp" #include "core/hw/tegra_x1/gpu/renderer/metal/texture.hpp" namespace hydra::hw::tegra_x1::gpu::renderer::metal { -SurfaceCompositor::SurfaceCompositor(CA::MetalDrawable* drawable_, - MTL::CommandQueue* command_queue) +SurfaceCompositor::SurfaceCompositor(CA::MetalDrawable* drawable_) : drawable{drawable_} { - TMP_AUTORELEASE_POOL_BEGIN(); - - // Command buffer - command_buffer = command_queue->commandBuffer()->retain(); - - // Encoder - NS_STACK_SCOPED auto render_pass_descriptor = - MTL::RenderPassDescriptor::alloc()->init(); + // Render pass + render_pass_descriptor = MTL::RenderPassDescriptor::alloc()->init(); auto color_attachment = render_pass_descriptor->colorAttachments()->object(0); color_attachment->setTexture(drawable->texture()); color_attachment->setLoadAction(MTL::LoadActionClear); color_attachment->setClearColor(MTL::ClearColor::Make(0.0, 0.0, 0.0, 1.0)); color_attachment->setStoreAction(MTL::StoreActionStore); - - encoder = - command_buffer->renderCommandEncoder(render_pass_descriptor)->retain(); - - TMP_AUTORELEASE_POOL_END(); } -SurfaceCompositor::~SurfaceCompositor() { - // Encoder - encoder->endEncoding(); - encoder->release(); - - // Command buffer - command_buffer->presentDrawable(drawable); - command_buffer->commit(); - command_buffer->release(); -} +SurfaceCompositor::~SurfaceCompositor() { render_pass_descriptor->release(); } -void SurfaceCompositor::DrawTexture(const TextureBase* texture, +void SurfaceCompositor::DrawTexture(ICommandBuffer* command_buffer, + const TextureBase* texture, const FloatRect2D src_rect, const FloatRect2D dst_rect, bool transparent, f32 opacity) { + auto command_buffer_impl = static_cast(command_buffer); auto texture_impl = static_cast(texture); + auto encoder = + command_buffer_impl->GetRenderCommandEncoder(render_pass_descriptor); + // Draw encoder->setRenderPipelineState( METAL_RENDERER_INSTANCE.GetBlitPipelineCache()->Find( @@ -77,4 +62,12 @@ void SurfaceCompositor::DrawTexture(const TextureBase* texture, NS::UInteger(3)); } +void SurfaceCompositor::Present(ICommandBuffer* command_buffer) { + auto command_buffer_impl = static_cast(command_buffer); + + command_buffer_impl->GetRenderCommandEncoder(render_pass_descriptor); + command_buffer_impl->EndEncoding(); + command_buffer_impl->GetCommandBuffer()->presentDrawable(drawable); +} + } // namespace hydra::hw::tegra_x1::gpu::renderer::metal diff --git a/src/core/hw/tegra_x1/gpu/renderer/metal/surface_compositor.hpp b/src/core/hw/tegra_x1/gpu/renderer/metal/surface_compositor.hpp index 5b569d65..33894ae3 100644 --- a/src/core/hw/tegra_x1/gpu/renderer/metal/surface_compositor.hpp +++ b/src/core/hw/tegra_x1/gpu/renderer/metal/surface_compositor.hpp @@ -5,20 +5,21 @@ namespace hydra::hw::tegra_x1::gpu::renderer::metal { +class CommandBuffer; + class SurfaceCompositor final : public ISurfaceCompositor { public: - SurfaceCompositor(CA::MetalDrawable* drawable_, - MTL::CommandQueue* command_queue); + SurfaceCompositor(CA::MetalDrawable* drawable_); ~SurfaceCompositor() override; - void DrawTexture(const TextureBase* texture, const FloatRect2D src_rect, - const FloatRect2D dst_rect, bool transparent, - f32 opacity) override; + void DrawTexture(ICommandBuffer* command_buffer, const TextureBase* texture, + const FloatRect2D src_rect, const FloatRect2D dst_rect, + bool transparent, f32 opacity) override; + void Present(ICommandBuffer* command_buffer) override; private: CA::MetalDrawable* drawable; - MTL::CommandBuffer* command_buffer; - MTL::RenderCommandEncoder* encoder; + MTL::RenderPassDescriptor* render_pass_descriptor; }; } // namespace hydra::hw::tegra_x1::gpu::renderer::metal diff --git a/src/core/hw/tegra_x1/gpu/renderer/metal/texture.cpp b/src/core/hw/tegra_x1/gpu/renderer/metal/texture.cpp index 5ef13fb7..4c9f79b5 100644 --- a/src/core/hw/tegra_x1/gpu/renderer/metal/texture.cpp +++ b/src/core/hw/tegra_x1/gpu/renderer/metal/texture.cpp @@ -1,6 +1,7 @@ #include "core/hw/tegra_x1/gpu/renderer/metal/texture.hpp" #include "core/hw/tegra_x1/gpu/renderer/metal/buffer.hpp" +#include "core/hw/tegra_x1/gpu/renderer/metal/command_buffer.hpp" #include "core/hw/tegra_x1/gpu/renderer/metal/maxwell_to_mtl.hpp" #include "core/hw/tegra_x1/gpu/renderer/metal/renderer.hpp" @@ -89,11 +90,13 @@ void Texture::CopyFrom(const uptr data) { 0, reinterpret_cast(data), descriptor.stride); } -void Texture::CopyFrom(const BufferBase* src, const usize src_stride, - uint3 dst_origin, usize3 size) { +void Texture::CopyFrom(ICommandBuffer* command_buffer, const BufferBase* src, + const usize src_stride, uint3 dst_origin, usize3 size) { + const auto command_buffer_impl = + static_cast(command_buffer); const auto mtl_src = static_cast(src)->GetBuffer(); - auto encoder = METAL_RENDERER_INSTANCE.GetBlitCommandEncoder(); + auto encoder = command_buffer_impl->GetBlitCommandEncoder(); u32 dst_layer = 0; u32 layer_count = 1; @@ -115,11 +118,13 @@ void Texture::CopyFrom(const BufferBase* src, const usize src_stride, } } -void Texture::CopyFrom(const TextureBase* src, uint3 src_origin, - uint3 dst_origin, usize3 size) { +void Texture::CopyFrom(ICommandBuffer* command_buffer, const TextureBase* src, + uint3 src_origin, uint3 dst_origin, usize3 size) { + const auto command_buffer_impl = + static_cast(command_buffer); const auto mtl_src = static_cast(src)->GetTexture(); - auto encoder = METAL_RENDERER_INSTANCE.GetBlitCommandEncoder(); + auto encoder = command_buffer_impl->GetBlitCommandEncoder(); u32 src_layer = 0; u32 dst_layer = 0; @@ -149,12 +154,14 @@ void Texture::CopyFrom(const TextureBase* src, uint3 src_origin, } } -void Texture::BlitFrom(const TextureBase* src, const float3 src_origin, - const usize3 src_size, const float3 dst_origin, - const usize3 dst_size) { +void Texture::BlitFrom(ICommandBuffer* command_buffer, const TextureBase* src, + const float3 src_origin, const usize3 src_size, + const float3 dst_origin, const usize3 dst_size) { + const auto command_buffer_impl = + static_cast(command_buffer); METAL_RENDERER_INSTANCE.BlitTexture( - static_cast(src)->GetTexture(), src_origin, src_size, - texture, 0, dst_origin, dst_size); + command_buffer_impl, static_cast(src)->GetTexture(), + src_origin, src_size, texture, 0, dst_origin, dst_size); } MTL::Texture* Texture::CreateViewImpl(TextureFormat format, diff --git a/src/core/hw/tegra_x1/gpu/renderer/metal/texture.hpp b/src/core/hw/tegra_x1/gpu/renderer/metal/texture.hpp index a6fe035e..9056fdf6 100644 --- a/src/core/hw/tegra_x1/gpu/renderer/metal/texture.hpp +++ b/src/core/hw/tegra_x1/gpu/renderer/metal/texture.hpp @@ -15,15 +15,17 @@ class Texture final : public TextureBase { // Copying void CopyFrom(const uptr data) override; - void CopyFrom(const BufferBase* src, const usize src_stride, - const uint3 dst_origin, const usize3 size) override; - void CopyFrom(const TextureBase* src, const uint3 src_origin, - const uint3 dst_origin, const usize3 size) override; + void CopyFrom(ICommandBuffer* command_buffer, const BufferBase* src, + const usize src_stride, const uint3 dst_origin, + const usize3 size) override; + void CopyFrom(ICommandBuffer* command_buffer, const TextureBase* src, + const uint3 src_origin, const uint3 dst_origin, + const usize3 size) override; // Blitting - void BlitFrom(const TextureBase* src, const float3 src_origin, - const usize3 src_size, const float3 dst_origin, - const usize3 dst_size) override; + void BlitFrom(ICommandBuffer* command_buffer, const TextureBase* src, + const float3 src_origin, const usize3 src_size, + const float3 dst_origin, const usize3 dst_size) override; private: bool owns_base{false}; diff --git a/src/core/hw/tegra_x1/gpu/renderer/renderer_base.hpp b/src/core/hw/tegra_x1/gpu/renderer/renderer_base.hpp index d24f44f3..5c2cbaf9 100644 --- a/src/core/hw/tegra_x1/gpu/renderer/renderer_base.hpp +++ b/src/core/hw/tegra_x1/gpu/renderer/renderer_base.hpp @@ -11,6 +11,7 @@ namespace hydra::hw::tegra_x1::gpu::renderer { +class ICommandBuffer; class ISurfaceCompositor; class TextureBase; class SamplerBase; @@ -61,7 +62,7 @@ class RendererBase { virtual SamplerBase* CreateSampler(const SamplerDescriptor& descriptor) = 0; // Command buffer - virtual void EndCommandBuffer() = 0; + virtual ICommandBuffer* CreateCommandBuffer() = 0; // Render pass virtual RenderPassBase* @@ -69,10 +70,13 @@ class RendererBase { virtual void BindRenderPass(const RenderPassBase* render_pass) = 0; // Clear - virtual void ClearColor(u32 render_target_id, u32 layer, u8 mask, + virtual void ClearColor(ICommandBuffer* command_buffer, + u32 render_target_id, u32 layer, u8 mask, const uint4 color) = 0; - virtual void ClearDepth(u32 layer, const float value) = 0; - virtual void ClearStencil(u32 layer, const u32 value) = 0; + virtual void ClearDepth(ICommandBuffer* command_buffer, u32 layer, + const float value) = 0; + virtual void ClearStencil(ICommandBuffer* command_buffer, u32 layer, + const u32 value) = 0; // Viewport and scissor virtual void SetViewport(u32 index, const Viewport& viewport) = 0; @@ -102,10 +106,12 @@ class RendererBase { virtual void UnbindTextures(ShaderType shader_type) = 0; // Draw - virtual void Draw(const engines::PrimitiveType primitive_type, + virtual void Draw(ICommandBuffer* command_buffer, + const engines::PrimitiveType primitive_type, const u32 start, const u32 count, const u32 base_instance, const u32 instance_count) = 0; - virtual void DrawIndexed(const engines::PrimitiveType primitive_type, + virtual void DrawIndexed(ICommandBuffer* command_buffer, + const engines::PrimitiveType primitive_type, const u32 start, const u32 count, const u32 base_vertex, const u32 base_instance, const u32 instance_count) = 0; diff --git a/src/core/hw/tegra_x1/gpu/renderer/surface_compositor.hpp b/src/core/hw/tegra_x1/gpu/renderer/surface_compositor.hpp index 9bde3335..77efb005 100644 --- a/src/core/hw/tegra_x1/gpu/renderer/surface_compositor.hpp +++ b/src/core/hw/tegra_x1/gpu/renderer/surface_compositor.hpp @@ -4,16 +4,19 @@ namespace hydra::hw::tegra_x1::gpu::renderer { +class ICommandBuffer; class TextureBase; class ISurfaceCompositor { public: virtual ~ISurfaceCompositor() = default; - virtual void DrawTexture(const TextureBase* texture, + virtual void DrawTexture(ICommandBuffer* command_buffer, + const TextureBase* texture, const FloatRect2D src_rect, const FloatRect2D dst_rect, bool transparent, f32 opacity = 1.0f) = 0; + virtual void Present(ICommandBuffer* command_buffer) = 0; }; } // namespace hydra::hw::tegra_x1::gpu::renderer diff --git a/src/core/hw/tegra_x1/gpu/renderer/texture_base.hpp b/src/core/hw/tegra_x1/gpu/renderer/texture_base.hpp index ea5aa8a3..bc47fabf 100644 --- a/src/core/hw/tegra_x1/gpu/renderer/texture_base.hpp +++ b/src/core/hw/tegra_x1/gpu/renderer/texture_base.hpp @@ -4,6 +4,7 @@ namespace hydra::hw::tegra_x1::gpu::renderer { +class ICommandBuffer; class BufferBase; class TextureBase { @@ -17,17 +18,20 @@ class TextureBase { // Copying virtual void CopyFrom(const uptr data) = 0; - virtual void CopyFrom(const BufferBase* src, const usize src_stride, - const uint3 dst_origin, const usize3 size) = 0; - void CopyFrom(const BufferBase* src) { - CopyFrom(src, descriptor.stride, uint3({0, 0, 0}), + virtual void CopyFrom(ICommandBuffer* command_buffer, const BufferBase* src, + const usize src_stride, const uint3 dst_origin, + const usize3 size) = 0; + void CopyFrom(ICommandBuffer* command_buffer, const BufferBase* src) { + CopyFrom(command_buffer, src, descriptor.stride, uint3({0, 0, 0}), usize3({descriptor.width, descriptor.height, 1})); } - virtual void CopyFrom(const TextureBase* src, const uint3 src_origin, + virtual void CopyFrom(ICommandBuffer* command_buffer, + const TextureBase* src, const uint3 src_origin, const uint3 dst_origin, const usize3 size) = 0; // Blitting - virtual void BlitFrom(const TextureBase* src, const float3 src_origin, + virtual void BlitFrom(ICommandBuffer* command_buffer, + const TextureBase* src, const float3 src_origin, const usize3 src_size, const float3 dst_origin, const usize3 dst_size) = 0; diff --git a/src/core/hw/tegra_x1/gpu/renderer/texture_cache.cpp b/src/core/hw/tegra_x1/gpu/renderer/texture_cache.cpp index d1e06681..cf68f8cd 100644 --- a/src/core/hw/tegra_x1/gpu/renderer/texture_cache.cpp +++ b/src/core/hw/tegra_x1/gpu/renderer/texture_cache.cpp @@ -19,7 +19,8 @@ TextureCache::~TextureCache() { } } -TextureBase* TextureCache::Find(const TextureDescriptor& descriptor, +TextureBase* TextureCache::Find(ICommandBuffer* command_buffer, + const TextureDescriptor& descriptor, TextureUsage usage) { const auto range = descriptor.GetRange(); @@ -30,7 +31,7 @@ TextureBase* TextureCache::Find(const TextureDescriptor& descriptor, auto& prev_mem = prev->second; if (prev_mem.range.GetEnd() >= range.GetEnd()) { // Fully contained - return AddToMemory(prev_mem, descriptor, usage); + return AddToMemory(command_buffer, prev_mem, descriptor, usage); } } @@ -58,7 +59,8 @@ TextureBase* TextureCache::Find(const TextureDescriptor& descriptor, // Insert merged interval auto inserted = entries.emplace(mem.range.GetBegin(), mem); - return AddToMemory(inserted.first->second, descriptor, usage); + return AddToMemory(command_buffer, inserted.first->second, descriptor, + usage); } void TextureCache::InvalidateMemory(Range range) { @@ -103,7 +105,8 @@ TextureMem TextureCache::MergeMemories(const TextureMem& a, return res; } -TextureBase* TextureCache::AddToMemory(TextureMem& mem, +TextureBase* TextureCache::AddToMemory(ICommandBuffer* command_buffer, + TextureMem& mem, const TextureDescriptor& descriptor, TextureUsage usage) { const auto range = descriptor.GetRange(); @@ -114,7 +117,7 @@ TextureBase* TextureCache::AddToMemory(TextureMem& mem, if (!sparse_tex_opt.has_value()) { auto& sparse_tex = mem.cache.Add(descriptor.GetHash()); auto& group = sparse_tex.cache.Add(descriptor.ptr); - return GetTexture(group, mem, descriptor, usage); + return GetTexture(command_buffer, group, mem, descriptor, usage); } auto& sparse_tex = **sparse_tex_opt; @@ -124,7 +127,7 @@ TextureBase* TextureCache::AddToMemory(TextureMem& mem, if (group_opt) { auto& group = **group_opt; if (group.base->GetDescriptor().GetRange().Contains(range)) - return GetTexture(group, mem, descriptor, usage); + return GetTexture(command_buffer, group, mem, descriptor, usage); else sparse_tex.cache.Remove(descriptor.ptr); } @@ -147,7 +150,7 @@ TextureBase* TextureCache::AddToMemory(TextureMem& mem, // HACK: create a new texture auto& group = sparse_tex.cache.Add(descriptor.ptr); - return GetTexture(group, mem, descriptor, usage); + return GetTexture(command_buffer, group, mem, descriptor, usage); /* // Create a new entry and merge it with others @@ -223,13 +226,14 @@ TextureBase* TextureCache::AddToMemory(TextureMem& mem, */ } -TextureBase* TextureCache::GetTexture(TextureGroup& group, TextureMem& mem, +TextureBase* TextureCache::GetTexture(ICommandBuffer* command_buffer, + TextureGroup& group, TextureMem& mem, const TextureDescriptor& descriptor, TextureUsage usage) { if (!group.base) - Create(descriptor, group); + Create(command_buffer, descriptor, group); - Update(group, mem, usage); + Update(command_buffer, group, mem, usage); // If the formats match and swizzle is the default swizzle, // return base @@ -257,17 +261,18 @@ TextureCache::GetTextureView(TextureGroup& group, return view; } -void TextureCache::Create(const TextureDescriptor& descriptor, +void TextureCache::Create(ICommandBuffer* command_buffer, + const TextureDescriptor& descriptor, TextureGroup& group) { auto desc = descriptor; desc.swizzle_channels = get_texture_format_default_swizzle_channels(desc.format); group.base = RENDERER_INSTANCE.CreateTexture(desc); - DecodeTexture(group); + DecodeTexture(command_buffer, group); } -void TextureCache::Update(TextureGroup& group, TextureMem& mem, - TextureUsage usage) { +void TextureCache::Update(ICommandBuffer* command_buffer, TextureGroup& group, + TextureMem& mem, TextureUsage usage) { bool sync = false; if (group.update_timestamp < mem.info.modified_timestamp) { // If modified by the guest @@ -311,7 +316,8 @@ void TextureCache::Update(TextureGroup& group, TextureMem& mem, static_cast(copy_range.GetSize() / layer_size); // TODO: make sure the formats match - base->CopyFrom(other_base, uint3({0, 0, src_layer}), + base->CopyFrom(command_buffer, other_base, + uint3({0, 0, src_layer}), uint3({0, 0, dst_layer}), usize3({descriptor.width, descriptor.height, layer_count})); @@ -331,7 +337,7 @@ void TextureCache::Update(TextureGroup& group, TextureMem& mem, } if (sync) - DecodeTexture(group); + DecodeTexture(command_buffer, group); if (usage == TextureUsage::Read) mem.info.MarkRead(); @@ -356,14 +362,15 @@ u32 TextureCache::GetDataHash(const TextureBase* texture) { return hash.ToHashCode(); } -void TextureCache::DecodeTexture(TextureGroup& group) { +void TextureCache::DecodeTexture(ICommandBuffer* command_buffer, + TextureGroup& group) { const auto& descriptor = group.base->GetDescriptor(); // Align the height to 16 bytes (TODO: why 16?) auto tmp_buffer = RENDERER_INSTANCE.AllocateTemporaryBuffer( descriptor.depth * align(descriptor.height, 16u) * descriptor.stride); texture_decoder.Decode(descriptor, (u8*)tmp_buffer->GetPtr()); - group.base->CopyFrom(tmp_buffer); + group.base->CopyFrom(command_buffer, tmp_buffer); RENDERER_INSTANCE.FreeTemporaryBuffer(tmp_buffer); } diff --git a/src/core/hw/tegra_x1/gpu/renderer/texture_cache.hpp b/src/core/hw/tegra_x1/gpu/renderer/texture_cache.hpp index a20a9f7d..83b9d5d4 100644 --- a/src/core/hw/tegra_x1/gpu/renderer/texture_cache.hpp +++ b/src/core/hw/tegra_x1/gpu/renderer/texture_cache.hpp @@ -8,6 +8,7 @@ class IMmu; namespace hydra::hw::tegra_x1::gpu::renderer { +class ICommandBuffer; class TextureBase; typedef std::chrono::steady_clock TextureCacheClock; @@ -47,7 +48,8 @@ class TextureCache { public: ~TextureCache(); - TextureBase* Find(const TextureDescriptor& descriptor, TextureUsage usage); + TextureBase* Find(ICommandBuffer* command_buffer, + const TextureDescriptor& descriptor, TextureUsage usage); void InvalidateMemory(Range range); @@ -57,20 +59,23 @@ class TextureCache { std::map entries; TextureMem MergeMemories(const TextureMem& a, const TextureMem& b); - TextureBase* AddToMemory(TextureMem& mem, + TextureBase* AddToMemory(ICommandBuffer* command_buffer, TextureMem& mem, const TextureDescriptor& descriptor, TextureUsage usage); - TextureBase* GetTexture(TextureGroup& group, TextureMem& mem, + TextureBase* GetTexture(ICommandBuffer* command_buffer, TextureGroup& group, + TextureMem& mem, const TextureDescriptor& descriptor, TextureUsage usage); TextureBase* GetTextureView(TextureGroup& group, const TextureViewDescriptor& descriptor); - void Create(const TextureDescriptor& descriptor, TextureGroup& group); - void Update(TextureGroup& group, TextureMem& mem, TextureUsage usage); + void Create(ICommandBuffer* command_buffer, + const TextureDescriptor& descriptor, TextureGroup& group); + void Update(ICommandBuffer* command_buffer, TextureGroup& group, + TextureMem& mem, TextureUsage usage); // Helpers u32 GetDataHash(const TextureBase* texture); - void DecodeTexture(TextureGroup& group); + void DecodeTexture(ICommandBuffer* command_buffer, TextureGroup& group); // TODO: encode texture }; From 9032b1ad035821457093de53fc13254d6273aed2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Samuel=20=C5=BD=C3=BAbor?= Date: Tue, 3 Feb 2026 19:04:27 +0100 Subject: [PATCH 2/4] gpu: simplify mutexes --- src/core/emulation_context.cpp | 4 ---- src/core/horizon/display/binder.cpp | 2 -- src/core/hw/tegra_x1/gpu/engines/3d.cpp | 9 ++++++++- src/core/hw/tegra_x1/gpu/gpu.cpp | 2 ++ src/core/hw/tegra_x1/gpu/pfifo.cpp | 2 -- src/core/hw/tegra_x1/gpu/renderer/renderer_base.hpp | 6 ------ src/core/hw/tegra_x1/gpu/renderer/texture_cache.hpp | 4 ++++ 7 files changed, 14 insertions(+), 15 deletions(-) diff --git a/src/core/emulation_context.cpp b/src/core/emulation_context.cpp index 4286a8ef..f4571f12 100644 --- a/src/core/emulation_context.cpp +++ b/src/core/emulation_context.cpp @@ -605,18 +605,14 @@ void EmulationContext::TakeScreenshot() { stbi_flip_vertically_on_write(false); // Free the buffer - RENDERER_INSTANCE.LockMutex(); RENDERER_INSTANCE.FreeTemporaryBuffer(buffer); - RENDERER_INSTANCE.UnlockMutex(); }); thread.detach(); } void EmulationContext::CaptureGpuFrame() { // TODO: allow multiple frames - gpu->GetRenderer().LockMutex(); gpu->GetRenderer().CaptureFrames(1); - gpu->GetRenderer().UnlockMutex(); } void EmulationContext::TryApplyPatch(horizon::kernel::Process* process, diff --git a/src/core/horizon/display/binder.cpp b/src/core/horizon/display/binder.cpp index 7cff3fd6..1827a4d5 100644 --- a/src/core/horizon/display/binder.cpp +++ b/src/core/horizon/display/binder.cpp @@ -58,9 +58,7 @@ void Binder::QueueBuffer(i32 slot, const BqBufferInput& input) { // Debug // TODO: only do this for the main process - RENDERER_INSTANCE.LockMutex(); RENDERER_INSTANCE.NotifyDebugFrameBoundary(); - RENDERER_INSTANCE.UnlockMutex(); } i32 Binder::ConsumeBuffer(BqBufferInput& out_input) { diff --git a/src/core/hw/tegra_x1/gpu/engines/3d.cpp b/src/core/hw/tegra_x1/gpu/engines/3d.cpp index 06107f2e..d9e8d71d 100644 --- a/src/core/hw/tegra_x1/gpu/engines/3d.cpp +++ b/src/core/hw/tegra_x1/gpu/engines/3d.cpp @@ -365,7 +365,11 @@ void ThreeD::ClearBuffer(const u32 index, const ClearBufferData data) { // TODO: implement // Regular clear - RENDERER_INSTANCE.BindRenderPass(GetRenderPass()); + { + std::lock_guard texture_cache_lock( + RENDERER_INSTANCE.GetTextureCache().GetMutex()); + RENDERER_INSTANCE.BindRenderPass(GetRenderPass()); + } if (data.color_mask != 0x0) RENDERER_INSTANCE.ClearColor(tls_crnt_command_buffer, data.target_id, @@ -868,6 +872,9 @@ void ThreeD::ConfigureShaderStage( } bool ThreeD::DrawInternal() { + std::lock_guard texture_cache_lock( + RENDERER_INSTANCE.GetTextureCache().GetMutex()); + // Flush tracked pages tls_crnt_gmmu->GetMmu()->FlushTrackedPages(); diff --git a/src/core/hw/tegra_x1/gpu/gpu.cpp b/src/core/hw/tegra_x1/gpu/gpu.cpp index 5e36111e..beb75657 100644 --- a/src/core/hw/tegra_x1/gpu/gpu.cpp +++ b/src/core/hw/tegra_x1/gpu/gpu.cpp @@ -82,6 +82,8 @@ void Gpu::SubchannelMethod(u32 subchannel, u32 method, u32 arg) { renderer::TextureBase* Gpu::GetTexture(renderer::ICommandBuffer* command_buffer, cpu::IMmu* mmu, const NvGraphicsBuffer& buff) { + std::lock_guard texture_cache_lock(renderer->GetTextureCache().GetMutex()); + LOG_DEBUG(Gpu, "Map id: {}, width: {}, " "height: {}", diff --git a/src/core/hw/tegra_x1/gpu/pfifo.cpp b/src/core/hw/tegra_x1/gpu/pfifo.cpp index 1a65c3d5..baef6624 100644 --- a/src/core/hw/tegra_x1/gpu/pfifo.cpp +++ b/src/core/hw/tegra_x1/gpu/pfifo.cpp @@ -74,7 +74,6 @@ void Pfifo::SubmitEntries(GMmu& gmmu, const std::vector& entries, (void)flags; LOG_DEBUG(Gpu, "Flags: {}", flags); - RENDERER_INSTANCE.LockMutex(); tls_crnt_gmmu = &gmmu; tls_crnt_command_buffer = RENDERER_INSTANCE.CreateCommandBuffer(); for (const auto& entry : entries) { @@ -83,7 +82,6 @@ void Pfifo::SubmitEntries(GMmu& gmmu, const std::vector& entries, delete tls_crnt_command_buffer; tls_crnt_command_buffer = nullptr; tls_crnt_gmmu = nullptr; - RENDERER_INSTANCE.UnlockMutex(); } void Pfifo::SubmitEntry(const GpfifoEntry entry) { diff --git a/src/core/hw/tegra_x1/gpu/renderer/renderer_base.hpp b/src/core/hw/tegra_x1/gpu/renderer/renderer_base.hpp index 5c2cbaf9..d2f70e2e 100644 --- a/src/core/hw/tegra_x1/gpu/renderer/renderer_base.hpp +++ b/src/core/hw/tegra_x1/gpu/renderer/renderer_base.hpp @@ -42,10 +42,6 @@ class RendererBase { // TODO: shader cache } - // Mutex - void LockMutex() { mutex.lock(); } - void UnlockMutex() { mutex.unlock(); } - // Surface virtual void SetSurface(void* surface) = 0; virtual ISurfaceCompositor* AcquireNextSurface() = 0; @@ -140,8 +136,6 @@ class RendererBase { virtual void EndCapture() = 0; private: - std::mutex mutex; - // Caches BufferCache buffer_cache; TextureCache texture_cache; diff --git a/src/core/hw/tegra_x1/gpu/renderer/texture_cache.hpp b/src/core/hw/tegra_x1/gpu/renderer/texture_cache.hpp index 83b9d5d4..3d079ab3 100644 --- a/src/core/hw/tegra_x1/gpu/renderer/texture_cache.hpp +++ b/src/core/hw/tegra_x1/gpu/renderer/texture_cache.hpp @@ -54,6 +54,7 @@ class TextureCache { void InvalidateMemory(Range range); private: + std::mutex mutex; TextureDecoder texture_decoder; std::map entries; @@ -77,6 +78,9 @@ class TextureCache { u32 GetDataHash(const TextureBase* texture); void DecodeTexture(ICommandBuffer* command_buffer, TextureGroup& group); // TODO: encode texture + + public: + REF_GETTER(mutex, GetMutex); }; } // namespace hydra::hw::tegra_x1::gpu::renderer From b41da22ca26ac1b5fae3d69117b9ae935a8e7b18 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Samuel=20=C5=BD=C3=BAbor?= Date: Tue, 3 Feb 2026 20:06:52 +0100 Subject: [PATCH 3/4] gpu: process commands on a separate thread --- .../services/nvdrv/ioctl/nvhost_gpu.cpp | 26 ++++++---- src/core/hw/tegra_x1/gpu/engines/3d.cpp | 2 +- src/core/hw/tegra_x1/gpu/pfifo.cpp | 50 +++++++++++++++---- src/core/hw/tegra_x1/gpu/pfifo.hpp | 22 +++++++- 4 files changed, 77 insertions(+), 23 deletions(-) diff --git a/src/core/horizon/services/nvdrv/ioctl/nvhost_gpu.cpp b/src/core/horizon/services/nvdrv/ioctl/nvhost_gpu.cpp index 06972d01..84e04d73 100644 --- a/src/core/horizon/services/nvdrv/ioctl/nvhost_gpu.cpp +++ b/src/core/horizon/services/nvdrv/ioctl/nvhost_gpu.cpp @@ -48,8 +48,7 @@ NvResult NvHostGpu::SubmitGpfifo( GPU_INSTANCE.GetPfifo().SubmitEntries( *process->GetGMmu(), - std::vector(entries, - entries + num_entries), + std::span(entries, num_entries), inout_flags_and_detailed_error); // HACK @@ -59,21 +58,25 @@ NvResult NvHostGpu::SubmitGpfifo( } NvResult NvHostGpu::AllocObjCtx(u32 class_num, u32 flags, u64* out_obj_id) { - LOG_FUNC_WITH_ARGS_STUBBED(Services, "class number: {}, flags: {:#x}", class_num, flags); + LOG_FUNC_WITH_ARGS_STUBBED(Services, "class number: {}, flags: {:#x}", + class_num, flags); // HACK *out_obj_id = 0; return NvResult::Success; } -NvResult NvHostGpu::ZCullBind(gpu_vaddr_t addr, u32 mode, [[maybe_unused]] u32 reserved) { - LOG_FUNC_WITH_ARGS_STUBBED(Services, "address: {:#x}, mode: {}", addr, mode); +NvResult NvHostGpu::ZCullBind(gpu_vaddr_t addr, u32 mode, + [[maybe_unused]] u32 reserved) { + LOG_FUNC_WITH_ARGS_STUBBED(Services, "address: {:#x}, mode: {}", addr, + mode); return NvResult::Success; } NvResult NvHostGpu::SetErrorNotifier(u64 offset, u64 size, u32 mem, [[maybe_unused]] u32 reserved) { - LOG_FUNC_WITH_ARGS_STUBBED(Services, "offset: {:#x}, size: {:#x}, memory: {}", offset, size, mem); + LOG_FUNC_WITH_ARGS_STUBBED( + Services, "offset: {:#x}, size: {:#x}, memory: {}", offset, size, mem); return NvResult::Success; } @@ -89,10 +92,13 @@ NvResult NvHostGpu::GetErrorNotification(u64* out_timestamp, u32* out_info32, return NvResult::Success; } -NvResult NvHostGpu::AllocGpfifoEX(u32 num_entries, u32 num_jobs, u32 flags, - hw::tegra_x1::gpu::Fence* out_fence, - [[maybe_unused]] std::array reserved) { - LOG_FUNC_WITH_ARGS_STUBBED(Services, "number of entries: {}, num_jobs: {}, flags: {:#x}", num_entries, num_jobs, flags); +NvResult +NvHostGpu::AllocGpfifoEX(u32 num_entries, u32 num_jobs, u32 flags, + hw::tegra_x1::gpu::Fence* out_fence, + [[maybe_unused]] std::array reserved) { + LOG_FUNC_WITH_ARGS_STUBBED( + Services, "number of entries: {}, num_jobs: {}, flags: {:#x}", + num_entries, num_jobs, flags); // HACK *out_fence = {}; diff --git a/src/core/hw/tegra_x1/gpu/engines/3d.cpp b/src/core/hw/tegra_x1/gpu/engines/3d.cpp index d9e8d71d..a1606815 100644 --- a/src/core/hw/tegra_x1/gpu/engines/3d.cpp +++ b/src/core/hw/tegra_x1/gpu/engines/3d.cpp @@ -390,7 +390,7 @@ void ThreeD::SetReportSemaphore(const u32 index, const u32 data) { const uptr ptr = tls_crnt_gmmu->UnmapAddr(regs.report_semaphore_addr); - // HACK + // TODO: correct? *reinterpret_cast(ptr) = regs.report_semaphore_payload; } diff --git a/src/core/hw/tegra_x1/gpu/pfifo.cpp b/src/core/hw/tegra_x1/gpu/pfifo.cpp index baef6624..87332d0e 100644 --- a/src/core/hw/tegra_x1/gpu/pfifo.cpp +++ b/src/core/hw/tegra_x1/gpu/pfifo.cpp @@ -68,20 +68,50 @@ T Read(uptr& gpu_addr) { } // namespace -void Pfifo::SubmitEntries(GMmu& gmmu, const std::vector& entries, +Pfifo::Pfifo() : thread(&Pfifo::ThreadFunc, this) {} + +Pfifo::~Pfifo() { + stop = true; + cond_var.notify_all(); + thread.join(); +} + +void Pfifo::SubmitEntries(GMmu& gmmu, std::span entries, GpfifoFlags flags) { - // TODO: flags - (void)flags; + std::lock_guard lock(mutex); LOG_DEBUG(Gpu, "Flags: {}", flags); - tls_crnt_gmmu = &gmmu; - tls_crnt_command_buffer = RENDERER_INSTANCE.CreateCommandBuffer(); - for (const auto& entry : entries) { - SubmitEntry(entry); + entry_lists.emplace( + gmmu, std::vector(entries.begin(), entries.end()), flags); +} + +void Pfifo::ThreadFunc() { + std::unique_lock lock(mutex); + while (true) { + cond_var.wait(lock); + if (stop) + return; + + // Process entry lists + while (!entry_lists.empty()) { + const auto entry_list = entry_lists.front(); + entry_lists.pop(); + + lock.unlock(); + + // Entries + // TODO: flags + tls_crnt_gmmu = &entry_list.gmmu; + tls_crnt_command_buffer = RENDERER_INSTANCE.CreateCommandBuffer(); + for (const auto& entry : entry_list.entries) + SubmitEntry(entry); + delete tls_crnt_command_buffer; + tls_crnt_command_buffer = nullptr; + tls_crnt_gmmu = nullptr; + + lock.lock(); + } } - delete tls_crnt_command_buffer; - tls_crnt_command_buffer = nullptr; - tls_crnt_gmmu = nullptr; } void Pfifo::SubmitEntry(const GpfifoEntry entry) { diff --git a/src/core/hw/tegra_x1/gpu/pfifo.hpp b/src/core/hw/tegra_x1/gpu/pfifo.hpp index 0e10de00..eb81f439 100644 --- a/src/core/hw/tegra_x1/gpu/pfifo.hpp +++ b/src/core/hw/tegra_x1/gpu/pfifo.hpp @@ -11,13 +11,31 @@ namespace hydra::hw::tegra_x1::gpu { class GMmu; +struct GpfifoEntryList { + GMmu& gmmu; + std::vector entries; + GpfifoFlags flags; +}; + class Pfifo { public: - // TODO: use std::span instead - void SubmitEntries(GMmu& gmmu, const std::vector& entries, + Pfifo(); + ~Pfifo(); + + void SubmitEntries(GMmu& gmmu, std::span entries, GpfifoFlags flags); private: + std::mutex mutex; + std::condition_variable cond_var; + + std::queue entry_lists; + bool stop{false}; + + std::thread thread; // TODO: jthread + + void ThreadFunc(); + void SubmitEntry(const GpfifoEntry entry); bool SubmitCommand(uptr& gpu_addr); // TODO: return void From 2e42ef6039e0268b95836ca3d51fa73e9d8719c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Samuel=20=C5=BD=C3=BAbor?= Date: Wed, 4 Feb 2026 18:46:09 +0100 Subject: [PATCH 4/4] gpu: fix pfifo deadlock --- src/core/hw/tegra_x1/gpu/pfifo.cpp | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/src/core/hw/tegra_x1/gpu/pfifo.cpp b/src/core/hw/tegra_x1/gpu/pfifo.cpp index 87332d0e..9c773e5a 100644 --- a/src/core/hw/tegra_x1/gpu/pfifo.cpp +++ b/src/core/hw/tegra_x1/gpu/pfifo.cpp @@ -1,5 +1,6 @@ #include "core/hw/tegra_x1/gpu/pfifo.hpp" +#include "core/debugger/debugger_manager.hpp" #include "core/hw/tegra_x1/gpu/const.hpp" #include "core/hw/tegra_x1/gpu/gpu.hpp" #include "core/hw/tegra_x1/gpu/renderer/command_buffer.hpp" @@ -78,19 +79,27 @@ Pfifo::~Pfifo() { void Pfifo::SubmitEntries(GMmu& gmmu, std::span entries, GpfifoFlags flags) { - std::lock_guard lock(mutex); LOG_DEBUG(Gpu, "Flags: {}", flags); - entry_lists.emplace( - gmmu, std::vector(entries.begin(), entries.end()), flags); + { + std::lock_guard lock(mutex); + entry_lists.emplace( + gmmu, std::vector(entries.begin(), entries.end()), + flags); + } + + cond_var.notify_all(); } void Pfifo::ThreadFunc() { + DEBUGGER_MANAGER_INSTANCE.GetDebuggerForCurrentProcess().RegisterThisThread( + "GPU thread"); + std::unique_lock lock(mutex); while (true) { cond_var.wait(lock); if (stop) - return; + break; // Process entry lists while (!entry_lists.empty()) { @@ -112,6 +121,9 @@ void Pfifo::ThreadFunc() { lock.lock(); } } + + DEBUGGER_MANAGER_INSTANCE.GetDebuggerForCurrentProcess() + .UnregisterThisThread(); } void Pfifo::SubmitEntry(const GpfifoEntry entry) {