diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index cc3a8424..cd2cde24 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -3,6 +3,7 @@ add_library(hydra-common macros.hpp type_aliases.hpp types.hpp + range.hpp traits.hpp functions.hpp atomic.hpp @@ -15,6 +16,7 @@ add_library(hydra-common pool.hpp static_pool.hpp dynamic_pool.hpp + small_cache.hpp filesystem.hpp log.cpp log.hpp diff --git a/src/common/common.hpp b/src/common/common.hpp index 42ef7fa0..9ea5a9f4 100644 --- a/src/common/common.hpp +++ b/src/common/common.hpp @@ -25,6 +25,8 @@ #include "common/objc.hpp" #include "common/optional_helper.hpp" #include "common/platform.hpp" +#include "common/range.hpp" +#include "common/small_cache.hpp" #include "common/static_pool.hpp" #include "common/string.hpp" #include "common/time.hpp" diff --git a/src/common/config.cpp b/src/common/config.cpp index 7c44bdef..6b5adea1 100644 --- a/src/common/config.cpp +++ b/src/common/config.cpp @@ -137,6 +137,7 @@ void Config::LoadDefaults() { log_fs_access = GetDefaultLogFsAccess(); debug_logging = GetDefaultDebugLogging(); process_args = GetDefaultProcessArgs(); + recover_from_segfault = GetDefaultRecoverFromSegfault(); gdb_enabled = GetDefaultGdbEnabled(); gdb_port = GetDefaultGdbPort(); gdb_wait_for_client = GetDefaultGdbWaitForClient(); @@ -218,6 +219,7 @@ void Config::Serialize() { debug["log_fs_access"] = log_fs_access; debug["debug_logging"] = debug_logging; debug["process_args"] = process_args; + debug["recover_from_segfault"] = recover_from_segfault; debug["gdb_enabled"] = gdb_enabled; debug["gdb_port"] = gdb_port; debug["gdb_wait_for_client"] = gdb_wait_for_client; @@ -303,6 +305,8 @@ void Config::Deserialize() { GetDefaultDebugLogging()); process_args = toml::find_or>( debug, "process_args", GetDefaultProcessArgs()); + recover_from_segfault = toml::find_or( + debug, "recover_from_segfault", GetDefaultRecoverFromSegfault()); gdb_enabled = toml::find_or(debug, "gdb_enabled", GetDefaultGdbEnabled()); gdb_port = toml::find_or(debug, "gdb_port", GetDefaultGdbPort()); @@ -365,6 +369,7 @@ void Config::Log() { LOG_INFO(Other, "Log FS access: {}", log_fs_access); LOG_INFO(Other, "Debug logging: {}", debug_logging); LOG_INFO(Other, "Process arguments: {}", process_args); + LOG_INFO(Other, "Recover from segfault: {}", recover_from_segfault); LOG_INFO(Other, "GDB enabled: {}", gdb_enabled); LOG_INFO(Other, "GDB port: {}", gdb_port); LOG_INFO(Other, "GDB wait for client: {}", gdb_wait_for_client); diff --git a/src/common/config.hpp b/src/common/config.hpp index f0461ff1..7d6feeb7 100644 --- a/src/common/config.hpp +++ b/src/common/config.hpp @@ -107,6 +107,7 @@ class Config { bool log_fs_access; bool debug_logging; std::vector process_args; + bool recover_from_segfault; bool gdb_enabled; u16 gdb_port; bool gdb_wait_for_client; @@ -154,6 +155,7 @@ class Config { bool GetDefaultLogFsAccess() const { return false; } bool GetDefaultDebugLogging() const { return false; } std::vector GetDefaultProcessArgs() const { return {}; } + bool GetDefaultRecoverFromSegfault() const { return false; } bool GetDefaultGdbEnabled() const { return false; } u16 GetDefaultGdbPort() const { return 1234; } bool GetDefaultGdbWaitForClient() const { return false; } @@ -179,6 +181,7 @@ class Config { REF_GETTER(log_fs_access, GetLogFsAccess); REF_GETTER(debug_logging, GetDebugLogging); REF_GETTER(process_args, GetProcessArgs); + REF_GETTER(recover_from_segfault, GetRecoverFromSegfault); REF_GETTER(gdb_enabled, GetGdbEnabled); REF_GETTER(gdb_port, GetGdbPort); REF_GETTER(gdb_wait_for_client, GetGdbWaitForClient); diff --git a/src/common/functions.hpp b/src/common/functions.hpp index 2547f5eb..bfe01b15 100644 --- a/src/common/functions.hpp +++ b/src/common/functions.hpp @@ -63,7 +63,7 @@ T align(T v, T alignment) { template bool is_aligned(T v, T alignment) { - return (v & (alignment - 1)) == 0x0; + return (v % alignment) == 0x0; } template @@ -136,14 +136,6 @@ inline std::string u64_to_str(u64 value) { return std::string(str, std::min(strlen(str), size_t(8))); } -template -void push_unique(std::vector& vec, T value) { - auto it = std::find_if(vec.begin(), vec.end(), - [&](const T v) { return v == value; }); - if (it == vec.end()) - vec.push_back(value); -} - inline std::string demangle(const char* mangled_name) { i32 status; std::unique_ptr result{ diff --git a/src/common/io/sparse_stream.hpp b/src/common/io/sparse_stream.hpp index 0657638c..eb3ac9b1 100644 --- a/src/common/io/sparse_stream.hpp +++ b/src/common/io/sparse_stream.hpp @@ -1,11 +1,12 @@ #pragma once #include "common/io/stream.hpp" +#include "common/range.hpp" namespace hydra::io { struct SparseStreamEntry { - range range; + Range range; IStream* stream; }; @@ -38,9 +39,9 @@ class SparseStream : public IStream { const auto entry = GetEntry(seek); const auto max_read_size = std::min( - entry.range.end - seek, static_cast(buffer.size())); + entry.range.GetEnd() - seek, static_cast(buffer.size())); if (entry.stream) { - entry.stream->SeekTo(seek - entry.range.begin); + entry.stream->SeekTo(seek - entry.range.GetBegin()); entry.stream->ReadRaw(buffer.subspan(0, max_read_size)); } else { std::fill(buffer.begin(), @@ -59,9 +60,9 @@ class SparseStream : public IStream { const auto entry = GetEntry(seek); const auto max_write_size = std::min( - entry.range.end - seek, static_cast(buffer.size())); + entry.range.GetEnd() - seek, static_cast(buffer.size())); if (entry.stream) { - entry.stream->SeekTo(seek - entry.range.begin); + entry.stream->SeekTo(seek - entry.range.GetBegin()); entry.stream->WriteRaw(buffer.subspan(0, max_write_size)); } @@ -92,12 +93,12 @@ class SparseStream : public IStream { auto next_it = std::upper_bound(entries.begin(), entries.end(), offset, [](u64 offset, const SparseStreamEntry& entry) { - return offset < entry.range.begin; + return offset < entry.range.GetBegin(); }); // If the offset is before the first entry, return an empty entry if (next_it == entries.begin()) - return {.stream = nullptr, .range = {0, next_it->range.begin}}; + return {.stream = nullptr, .range = {0, next_it->range.GetBegin()}}; auto it = std::prev(next_it); @@ -105,10 +106,10 @@ class SparseStream : public IStream { if (!it->range.Contains(offset)) { if (next_it == entries.end()) return {.stream = nullptr, - .range = {it->range.end, size - offset}}; + .range = {it->range.GetEnd(), size - offset}}; return {.stream = nullptr, - .range = {it->range.end, next_it->range.begin}}; + .range = {it->range.GetEnd(), next_it->range.GetBegin()}}; } // Cache the entry and return it diff --git a/src/common/log.hpp b/src/common/log.hpp index 6100fba2..3eb2c332 100644 --- a/src/common/log.hpp +++ b/src/common/log.hpp @@ -65,7 +65,8 @@ #define ASSERT_ALIGNMENT(value, alignment, c, name) \ ASSERT(is_aligned(value, alignment), c, \ - name " must be {}-byte aligned", alignment) + name " must be {:#x}-byte aligned (value: {:#x})", alignment, \ + value) #ifdef HYDRA_DEBUG #define ASSERT_DEBUG(condition, c, ...) ASSERT(condition, c, __VA_ARGS__) diff --git a/src/common/macros.hpp b/src/common/macros.hpp index 0314975e..ff0ea439 100644 --- a/src/common/macros.hpp +++ b/src/common/macros.hpp @@ -104,6 +104,8 @@ decltype(member)& name() { return member; } #define CONST_REF_GETTER(member, name) \ const decltype(member)& name() const { return member; } +#define CONSTEXPR_GETTER(member, name) \ + constexpr decltype(member) name() const { return member; } #define SETTER(member, name) \ void name(const decltype(member) member##_) { member = member##_; } @@ -131,6 +133,9 @@ setter_name) \ CONST_REF_GETTER(member, getter_name) \ CONST_REF_SETTER(member, setter_name) +#define CONSTEXPR_GETTER_AND_SETTER(member, getter_name, setter_name) \ + CONSTEXPR_GETTER(member, getter_name) \ + SETTER(member, setter_name) #define PARENS () diff --git a/src/common/range.hpp b/src/common/range.hpp new file mode 100644 index 00000000..5f423b8d --- /dev/null +++ b/src/common/range.hpp @@ -0,0 +1,89 @@ +#pragma once + +#include "common/functions.hpp" +#include "common/log.hpp" +#include "common/macros.hpp" +#include "common/type_aliases.hpp" + +namespace hydra { + +template +class Range { + public: + static constexpr Range FromSize(T begin_, T size) { + return Range(begin_, begin_ + size); + } + + constexpr Range() : begin{0}, end{0} {} + constexpr Range(T begin_, T end_) : begin{begin_}, end{end_} {} + + bool operator==(const Range& other) const { + return begin == other.begin && end == other.end; + } + + void operator+=(T offset) { + begin += offset; + end += offset; + } + + void operator-=(T offset) { + begin -= offset; + end -= offset; + } + + // Size + constexpr T GetSize() const { return end - begin; } + constexpr void SetSize(T size) { end = begin + size; } + + // Intersection + bool Contains(T value) const { return value >= begin && value < end; } + bool Contains(const Range& other) const { + return other.begin >= begin && other.end <= end; + } + + bool Intersects(const Range& other) const { + return begin < other.end && end > other.begin; + } + + // Combining + Range ClampedTo(const Range& bounds) const { + return Range(std::max(begin, bounds.begin), + std::min(end, bounds.end)); + } + + Range Union(const Range& other) const { + return Range(std::min(begin, other.begin), std::max(end, other.end)); + } + + private: + T begin; + T end; + + public: + CONSTEXPR_GETTER_AND_SETTER(begin, GetBegin, SetBegin); + CONSTEXPR_GETTER_AND_SETTER(end, GetEnd, SetEnd); +}; + +} // namespace hydra + +template +struct fmt::formatter> : formatter { + fmt::formatter value_formatter; + + constexpr auto parse(fmt::format_parse_context& ctx) { + return value_formatter.parse(ctx); + } + + template + auto format(const hydra::Range& range, FormatContext& ctx) const { + auto out = ctx.out(); + + *out++ = '<'; + out = value_formatter.format(range.GetBegin(), ctx); + out = fmt::format_to(out, "..."); + out = value_formatter.format(range.GetEnd(), ctx); + *out++ = ')'; + + return out; + } +}; diff --git a/src/common/small_cache.hpp b/src/common/small_cache.hpp new file mode 100644 index 00000000..c1a59eb6 --- /dev/null +++ b/src/common/small_cache.hpp @@ -0,0 +1,175 @@ +#pragma once + +#include +#include + +#include "common/functions.hpp" +#include "common/log.hpp" +#include "common/macros.hpp" +#include "common/type_aliases.hpp" + +namespace hydra { + +template +class SmallCache { + public: + // Iterator + class iterator { + friend class SmallCache; + + public: + using map_iter = typename std::map::iterator; + + iterator(SmallCache* cache_, usize fast_index_) + : cache{cache_}, fast_index{fast_index_} { + advance_fast(); + } + + iterator(SmallCache* cache, map_iter slow_it) + : cache(cache), fast_index(fast_cache_size), slow_it(slow_it) {} + + std::pair operator*() const { + if (fast_index < fast_cache_size) { + auto& e = cache->fast_cache[fast_index]; + return {e.key, e.value}; + } + return {slow_it->first, slow_it->second}; + } + + iterator& operator++() { + if (fast_index < fast_cache_size) { + ++fast_index; + advance_fast(); + } else { + ++slow_it; + } + return *this; + } + + bool operator==(const iterator& other) const { + return cache == other.cache && fast_index == other.fast_index && + (fast_index < fast_cache_size || slow_it == other.slow_it); + } + + bool operator!=(const iterator& other) const { + return !(*this == other); + } + + private: + void advance_fast() { + while (fast_index < fast_cache_size && + !cache->fast_cache[fast_index].occupied) { + ++fast_index; + } + + if (fast_index >= fast_cache_size) { + slow_it = cache->slow_cache.begin(); + } + } + + SmallCache* cache; + usize fast_index; + map_iter slow_it; + }; + + iterator begin() { return iterator(this, 0); } + iterator end() { return iterator(this, slow_cache.end()); } + + void Clear() { + fast_cache.fill({}); + slow_cache.clear(); + } + + enum class AddError { + AlreadyPresent, + }; + + T& Add(KeyT key, const T& value = {}) { + // Insert into fast cache if possible + for (auto& entry : fast_cache) { + if (!entry.occupied) { + entry.occupied = true; + entry.key = key; + entry.value = value; + return entry.value; + } else { + ASSERT_THROWING(entry.key != key, Common, + AddError::AlreadyPresent, + "Entry already present"); + } + } + + // Fallback to slow cache + auto res = slow_cache.emplace(key, value); + ASSERT_THROWING(res.second, Common, AddError::AlreadyPresent, + "Entry already present"); + return res.first->second; + } + + iterator Remove(iterator it) { + // Fast cache + if (it.fast_index < fast_cache_size) { + fast_cache[it.fast_index].occupied = false; + + // Advance to the next element + iterator next = it; + ++next; + return next; + } + + // Slow cache + if (it.slow_it != slow_cache.end()) { + auto next_slow = std::next(it.slow_it); + slow_cache.erase(it.slow_it); + return iterator(this, next_slow); + } + + return end(); + } + + void Remove(KeyT key) { Remove(FindIter(key)); } + + iterator FindIter(KeyT key) { + // Fast cache + for (u32 i = 0; i < fast_cache_size; i++) { + if (fast_cache[i].occupied && fast_cache[i].key == key) + return iterator(this, i); + } + + // Slow cache + auto it = slow_cache.find(key); + if (it != slow_cache.end()) { + return iterator(this, it); + } + + return end(); + } + + std::optional Find(KeyT key) { + const auto it = FindIter(key); + if (it == end()) + return std::nullopt; + + return &(*it).second; + } + + T& FindOrAdd(KeyT key) { + const auto opt = Find(key); + if (opt.has_value()) + return **opt; + + return Add(key); + } + + private: + struct FastCacheEntry { + bool occupied{false}; + KeyT key; + T value; + }; + + std::array fast_cache; + std::map slow_cache; +}; + +} // namespace hydra diff --git a/src/common/types.hpp b/src/common/types.hpp index 1e0cbb54..7093d944 100644 --- a/src/common/types.hpp +++ b/src/common/types.hpp @@ -10,49 +10,6 @@ namespace hydra { -template -struct range { - public: - T begin; - T end; - - static constexpr range FromSize(T begin, T size) { - return range(begin, begin + size); - } - - constexpr range() : begin{0}, end{0} {} - constexpr range(T begin_) : begin{begin_}, end{invalid()} {} - constexpr range(T begin_, T end_) : begin{begin_}, end{end_} {} - - bool operator==(const range& other) const { - return begin == other.begin && end == other.end; - } - - void Shift(T offset) { - begin += offset; - end += offset; - } - void ShiftLeft(T offset) { - begin -= offset; - end -= offset; - } - - bool Contains(const T other) const { return other >= begin && other < end; } - bool Contains(const range& other) const { - return other.begin >= begin && other.end <= end; - } - - bool Intersects(const range& other) const { - return begin < other.end && end > other.begin; - } - - T GetSize() const { return end - begin; } - - public: - GETTER(begin, GetBegin); - GETTER(end, GetEnd); -}; - struct sized_ptr { public: sized_ptr() : ptr{0x0}, size{0} {} @@ -373,50 +330,6 @@ class strong_number_typedef { using strong_number_typedef::strong_number_typedef; \ } -template -class small_cache { - public: - T& Find(KeyT key) { - // Check fast cache - for (auto& entry : fast_cache) { - if (entry.key == key) { - return entry.value; - } - } - - // Check slow cache - auto it = slow_cache.find(key); - if (it != slow_cache.end()) { - return it->second; - } - - // Not found - - // Attempt to add to fast cache - for (auto& entry : fast_cache) { - if (entry.key == KeyT{}) { - entry.key = key; - entry.value = T{}; - return entry.value; - } - } - - // Add to slow cache as a fallback - slow_cache[key] = T{}; - - return slow_cache[key]; - } - - private: - struct FastCacheEntry { - KeyT key; - T value; - }; - - std::array fast_cache; - std::map slow_cache; -}; - template class CacheBase { public: @@ -463,16 +376,6 @@ struct fmt::formatter> : formatter { } }; -// TODO: rework -template -struct fmt::formatter> : formatter { - template - auto format(hydra::range value, FormatContext& ctx) const { - return formatter::format( - fmt::format("<{}...{})", value.begin, value.end), ctx); - } -}; - template struct fmt::formatter> : formatter { template diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index 29218f4c..ebd5be1b 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -490,6 +490,7 @@ add_library(hydra-core hw/tegra_x1/gpu/renderer/shader_decompiler/const.hpp hw/tegra_x1/gpu/renderer/shader_decompiler/decompiler.cpp hw/tegra_x1/gpu/renderer/shader_decompiler/decompiler.hpp + hw/tegra_x1/gpu/renderer/shader_decompiler/ir/type.cpp hw/tegra_x1/gpu/renderer/shader_decompiler/ir/type.hpp hw/tegra_x1/gpu/renderer/shader_decompiler/ir/value.hpp hw/tegra_x1/gpu/renderer/shader_decompiler/ir/instruction.hpp @@ -547,7 +548,10 @@ add_library(hydra-core hw/tegra_x1/gpu/renderer/shader_decompiler/codegen/lang/msl/emitter.hpp hw/tegra_x1/gpu/renderer/const.cpp hw/tegra_x1/gpu/renderer/const.hpp + hw/tegra_x1/gpu/renderer/command_buffer.hpp + hw/tegra_x1/gpu/renderer/surface_compositor.hpp hw/tegra_x1/gpu/renderer/buffer_base.hpp + hw/tegra_x1/gpu/renderer/buffer_view.hpp hw/tegra_x1/gpu/renderer/buffer_cache.cpp hw/tegra_x1/gpu/renderer/buffer_cache.hpp hw/tegra_x1/gpu/renderer/texture_base.hpp @@ -574,6 +578,10 @@ add_library(hydra-core hw/tegra_x1/gpu/renderer/metal/maxwell_to_mtl.cpp hw/tegra_x1/gpu/renderer/metal/maxwell_to_mtl.hpp hw/tegra_x1/gpu/renderer/metal/impl.cpp + hw/tegra_x1/gpu/renderer/metal/command_buffer.cpp + hw/tegra_x1/gpu/renderer/metal/command_buffer.hpp + hw/tegra_x1/gpu/renderer/metal/surface_compositor.cpp + hw/tegra_x1/gpu/renderer/metal/surface_compositor.hpp hw/tegra_x1/gpu/renderer/metal/buffer.cpp hw/tegra_x1/gpu/renderer/metal/buffer.hpp hw/tegra_x1/gpu/renderer/metal/texture.cpp diff --git a/src/core/c_api.cpp b/src/core/c_api.cpp index 03575063..f6d000f1 100644 --- a/src/core/c_api.cpp +++ b/src/core/c_api.cpp @@ -290,6 +290,10 @@ HYDRA_EXPORT void* hydra_config_get_process_args() { return &hydra::CONFIG_INSTANCE.GetProcessArgs(); } +HYDRA_EXPORT bool* hydra_config_get_recover_from_segfault() { + return &hydra::CONFIG_INSTANCE.GetRecoverFromSegfault(); +} + HYDRA_EXPORT bool* hydra_config_get_gdb_enabled() { return &hydra::CONFIG_INSTANCE.GetGdbEnabled(); } diff --git a/src/core/c_api.h b/src/core/c_api.h index 7dfc2631..5cfab562 100644 --- a/src/core/c_api.h +++ b/src/core/c_api.h @@ -194,6 +194,7 @@ uint32_t* hydra_config_get_log_output(); bool* hydra_config_get_log_fs_access(); bool* hydra_config_get_debug_logging(); void* hydra_config_get_process_args(); +bool* hydra_config_get_recover_from_segfault(); bool* hydra_config_get_gdb_enabled(); uint16_t* hydra_config_get_gdb_port(); bool* hydra_config_get_gdb_wait_for_client(); diff --git a/src/core/debugger/debugger.hpp b/src/core/debugger/debugger.hpp index bb16bb54..540467af 100644 --- a/src/core/debugger/debugger.hpp +++ b/src/core/debugger/debugger.hpp @@ -104,7 +104,7 @@ class Thread { struct Symbol { std::string name; - range guest_mem_range; + Range guest_mem_range; }; class SymbolTable { diff --git a/src/core/debugger/gdb_server.cpp b/src/core/debugger/gdb_server.cpp index 8e74bfef..28906da7 100644 --- a/src/core/debugger/gdb_server.cpp +++ b/src/core/debugger/gdb_server.cpp @@ -510,7 +510,7 @@ void GdbServer::HandleQuery(std::string_view command) { // TODO: number_to_hex? output += fmt::format( R"()", - symbol.name, symbol.guest_mem_range.begin); + symbol.name, symbol.guest_mem_range.GetBegin()); } output += ""; SendPacket(PageFromBuffer(output, command.substr(21))); @@ -603,7 +603,7 @@ void GdbServer::HandleInsertBreakpoint(std::string_view command) { const auto mmu = debugger.process->GetMmu(); replaced_instructions.insert({addr, mmu->Read(addr)}); mmu->Write(addr, BRK); - NotifyMemoryChanged(range(addr, 4)); + NotifyMemoryChanged(Range(addr, 4)); } SendPacket(GDB_OK); @@ -641,7 +641,7 @@ void GdbServer::HandleRemoveBreakpoint(std::string_view command) { ASSERT(it != replaced_instructions.end(), Debugger, "Breakpoint not found at address {:#x}", addr); mmu->Write(addr, it->second); - NotifyMemoryChanged(range(addr, 4)); + NotifyMemoryChanged(Range(addr, 4)); replaced_instructions.erase(it); } @@ -685,8 +685,8 @@ void GdbServer::HandleGetExecutables() { std::string path = fmt::format("{}/{}.elf", dir_path, module_.name); // Output - output += - fmt::format("\"{}\":{:#x}", path, module_.guest_mem_range.begin); + output += fmt::format("\"{}\":{:#x}", path, + module_.guest_mem_range.GetBegin()); if (i < debugger.GetModuleTable().GetSymbols().size() - 1) output += ";"; @@ -795,7 +795,7 @@ void GdbServer::NotifySupervisorPausedImpl(horizon::kernel::GuestThread* thread, SendPacket(GetThreadStatus(thread, signal)); } -void GdbServer::NotifyMemoryChanged(range mem_range) { +void GdbServer::NotifyMemoryChanged(Range mem_range) { for (const auto& [_, thread] : debugger.threads) thread.guest_thread->GetThread()->NotifyMemoryChanged(mem_range); } diff --git a/src/core/debugger/gdb_server.hpp b/src/core/debugger/gdb_server.hpp index 3a37870d..83141c62 100644 --- a/src/core/debugger/gdb_server.hpp +++ b/src/core/debugger/gdb_server.hpp @@ -73,7 +73,7 @@ class GdbServer { void NotifySupervisorPausedImpl(horizon::kernel::GuestThread* thread, Signal signal); - void NotifyMemoryChanged(range mem_range); + void NotifyMemoryChanged(Range mem_range); }; } // namespace hydra::debugger diff --git a/src/core/emulation_context.cpp b/src/core/emulation_context.cpp index e747d6cb..f4571f12 100644 --- a/src/core/emulation_context.cpp +++ b/src/core/emulation_context.cpp @@ -25,6 +25,8 @@ #include "core/hw/tegra_x1/cpu/mmu.hpp" #include "core/hw/tegra_x1/cpu/thread.hpp" #include "core/hw/tegra_x1/gpu/renderer/buffer_base.hpp" +#include "core/hw/tegra_x1/gpu/renderer/command_buffer.hpp" +#include "core/hw/tegra_x1/gpu/renderer/surface_compositor.hpp" #include "core/hw/tegra_x1/gpu/renderer/texture_base.hpp" #include "core/input/device_manager.hpp" @@ -273,8 +275,9 @@ void EmulationContext::LoadAndStart(horizon::loader::LoaderBase* loader) { u32 width, height; if (auto data = loader->LoadNintendoLogo(width, height)) { hw::tegra_x1::gpu::renderer::TextureDescriptor descriptor( - 0x0, hw::tegra_x1::gpu::renderer::TextureFormat::RGBA8Unorm, - hw::tegra_x1::gpu::NvKind::Generic_16BX2, width, height, 0x0, + 0x0, hw::tegra_x1::gpu::renderer::TextureType::_2D, + hw::tegra_x1::gpu::renderer::TextureFormat::RGBA8Unorm, + hw::tegra_x1::gpu::NvKind::Generic_16BX2, width, height, 1, 0x0, width * 4); nintendo_logo = gpu->GetRenderer().CreateTexture(descriptor); nintendo_logo->CopyFrom(reinterpret_cast(data)); @@ -287,8 +290,9 @@ void EmulationContext::LoadAndStart(horizon::loader::LoaderBase* loader) { if (auto data = loader->LoadStartupMovie(startup_movie_delays, width, height, frame_count)) { hw::tegra_x1::gpu::renderer::TextureDescriptor descriptor( - 0x0, hw::tegra_x1::gpu::renderer::TextureFormat::RGBA8Unorm, - hw::tegra_x1::gpu::NvKind::Generic_16BX2, width, height, 0x0, + 0x0, hw::tegra_x1::gpu::renderer::TextureType::_2D, + hw::tegra_x1::gpu::renderer::TextureFormat::RGBA8Unorm, + hw::tegra_x1::gpu::NvKind::Generic_16BX2, width, height, 1, 0x0, width * 4); startup_movie.reserve(frame_count); for (u32 i = 0; i < frame_count; i++) { @@ -431,12 +435,8 @@ void EmulationContext::ProgressFrame(u32 width, u32 height, // Present // Acquire surface - auto& renderer = gpu->GetRenderer(); - // NOTE: this waits for a surface to be available. We don't lock the mutex, - // as that would block all other rendering for a long time. The mutex also - // doesn't need to be locked, as all surface related operations are done on - // this thread. - if (!renderer.AcquireNextSurface()) + auto compositor = gpu->GetRenderer().AcquireNextSurface(); + if (!compositor) return; // Delta time @@ -446,80 +446,81 @@ void EmulationContext::ProgressFrame(u32 width, u32 height, accumulated_dt += layer->GetAccumulatedDT(); } - renderer.LockMutex(); + // Command buffer + auto command_buffer = gpu->GetRenderer().CreateCommandBuffer(); // Acquire present textures - bool acquired = os->GetDisplayDriver().AcquirePresentTextures(); + bool acquired = + os->GetDisplayDriver().AcquirePresentTextures(command_buffer); // Render pass - renderer.BeginSurfaceRenderPass(); - os->GetDisplayDriver().Present(width, height); - - if (acquired && loading) { - // TODO: till when should the loading screen be shown? - // Stop the loading screen on the first present - loading = false; - - // Free loading assets - if (nintendo_logo) { - delete nintendo_logo; - nintendo_logo = nullptr; - } - if (!startup_movie.empty()) { - for (auto frame : startup_movie) - delete frame; - startup_movie.clear(); - startup_movie.shrink_to_fit(); - startup_movie_delays.clear(); - startup_movie_delays.shrink_to_fit(); - } - } else if (loading) { - const auto crnt_time = clock_t::now(); - - // Display loading screen - - // Fade in - f32 opacity = 1.0f; - if (crnt_time < startup_movie_fade_in_time) - opacity = - 1.0f - - std::chrono::duration_cast>( - startup_movie_fade_in_time - crnt_time) / - std::chrono::duration_cast>( - STARTUP_MOVIE_FADE_IN_DURATION); - - // Nintendo logo - if (nintendo_logo) { - int2 size = {(i32)nintendo_logo->GetDescriptor().width, - (i32)nintendo_logo->GetDescriptor().height}; - int2 dst_offset = {32, 32}; - renderer.DrawTextureToSurface( - nintendo_logo, IntRect2D({0, 0}, size), - IntRect2D(dst_offset, size), true, opacity); - } + os->GetDisplayDriver().Present(command_buffer, compositor, width, height); + + if (loading) { + if (acquired) { + // TODO: till when should the loading screen be shown? + // Stop the loading screen on the first present + loading = false; + + // Free loading assets + if (nintendo_logo) { + delete nintendo_logo; + nintendo_logo = nullptr; + } + if (!startup_movie.empty()) { + for (auto frame : startup_movie) + delete frame; + startup_movie.clear(); + startup_movie.shrink_to_fit(); + startup_movie_delays.clear(); + startup_movie_delays.shrink_to_fit(); + } + } else { + const auto crnt_time = clock_t::now(); - // Startup movie - if (!startup_movie.empty()) { - // Progress frame - while (crnt_time > next_startup_movie_frame_time) { - startup_movie_frame = - (startup_movie_frame + 1) % startup_movie.size(); - next_startup_movie_frame_time += - startup_movie_delays[startup_movie_frame]; + // Display loading screen + + // Fade in + f32 opacity = 1.0f; + if (crnt_time < startup_movie_fade_in_time) + opacity = + 1.0f - + std::chrono::duration_cast>( + startup_movie_fade_in_time - crnt_time) / + std::chrono::duration_cast>( + STARTUP_MOVIE_FADE_IN_DURATION); + + // Nintendo logo + if (nintendo_logo) { + int2 size = {(i32)nintendo_logo->GetDescriptor().width, + (i32)nintendo_logo->GetDescriptor().height}; + int2 dst_offset = {32, 32}; + compositor->DrawTexture( + command_buffer, nintendo_logo, IntRect2D({0, 0}, size), + IntRect2D(dst_offset, size), true, opacity); } - auto frame = startup_movie[startup_movie_frame]; - int2 size = {(i32)frame->GetDescriptor().width, - (i32)frame->GetDescriptor().height}; - int2 dst_offset = {(i32)width - size.x() - 32, - (i32)height - size.y() - 32}; - renderer.DrawTextureToSurface(frame, IntRect2D({0, 0}, size), - IntRect2D(dst_offset, size), true, - opacity); + // Startup movie + if (!startup_movie.empty()) { + // Progress frame + while (crnt_time > next_startup_movie_frame_time) { + startup_movie_frame = + (startup_movie_frame + 1) % startup_movie.size(); + next_startup_movie_frame_time += + startup_movie_delays[startup_movie_frame]; + } + + auto frame = startup_movie[startup_movie_frame]; + int2 size = {(i32)frame->GetDescriptor().width, + (i32)frame->GetDescriptor().height}; + int2 dst_offset = {(i32)width - size.x() - 32, + (i32)height - size.y() - 32}; + compositor->DrawTexture( + command_buffer, frame, IntRect2D({0, 0}, size), + IntRect2D(dst_offset, size), true, opacity); + } } - } - - if (!loading) { + } else { // Delta time const auto now = clock_t::now(); const auto time_since_last_dt_averaging = now - last_dt_averaging_time; @@ -537,10 +538,10 @@ void EmulationContext::ProgressFrame(u32 width, u32 height, } } - renderer.EndSurfaceRenderPass(); - renderer.PresentSurface(); - renderer.EndCommandBuffer(); - renderer.UnlockMutex(); + compositor->Present(command_buffer); + + delete command_buffer; + delete compositor; // Signal V-Sync os->GetDisplayDriver().SignalVSync(); @@ -582,12 +583,11 @@ void EmulationContext::TakeScreenshot() { } // Copy to a buffer - RENDERER_INSTANCE.LockMutex(); + auto command_buffer = RENDERER_INSTANCE.CreateCommandBuffer(); auto buffer = RENDERER_INSTANCE.AllocateTemporaryBuffer( static_cast(rect.size.y() * rect.size.x() * 4)); - buffer->CopyFrom(texture, rect.origin, rect.size); - RENDERER_INSTANCE.EndCommandBuffer(); - RENDERER_INSTANCE.UnlockMutex(); + buffer->CopyFrom(command_buffer, texture, rect.origin, rect.size); + delete command_buffer; // TODO: wait for the command buffer to finish @@ -600,22 +600,19 @@ void EmulationContext::TakeScreenshot() { stbi_flip_vertically_on_write(flip_y); if (!stbi_write_jpg(filename.c_str(), rect.size.x(), rect.size.y(), 4, - (void*)buffer->GetDescriptor().ptr, 100)) + (void*)buffer->GetPtr(), 100)) LOG_ERROR(Other, "Failed to save screenshot to {}", filename); stbi_flip_vertically_on_write(false); // Free the buffer - RENDERER_INSTANCE.LockMutex(); RENDERER_INSTANCE.FreeTemporaryBuffer(buffer); - RENDERER_INSTANCE.UnlockMutex(); }); thread.detach(); } void EmulationContext::CaptureGpuFrame() { - gpu->GetRenderer().LockMutex(); - gpu->GetRenderer().CaptureFrame(); - gpu->GetRenderer().UnlockMutex(); + // TODO: allow multiple frames + gpu->GetRenderer().CaptureFrames(1); } void EmulationContext::TryApplyPatch(horizon::kernel::Process* process, diff --git a/src/core/horizon/applets/controller/applet.cpp b/src/core/horizon/applets/controller/applet.cpp index 470615ef..c34a782d 100644 --- a/src/core/horizon/applets/controller/applet.cpp +++ b/src/core/horizon/applets/controller/applet.cpp @@ -6,8 +6,16 @@ result_t Applet::Run() { const auto arg_private = PopInData(); // Mode - ASSERT(arg_private.mode == Mode::ShowControllerSupport, Applets, - "Unimplemented mode {}", arg_private.mode); + if (arg_private.mode != Mode::ShowControllerSupport) { + LOG_WARN(Applets, "Unimplemented mode {}", arg_private.mode); + + // Dummy response + PushOutData(ResultInfoInternal{ + .info = {}, + .result = RESULT_SUCCESS, + }); + return RESULT_SUCCESS; + } // Run with the correct args switch (arg_private.controller_support_arg_size) { diff --git a/src/core/horizon/display/binder.cpp b/src/core/horizon/display/binder.cpp index df85cd69..1827a4d5 100644 --- a/src/core/horizon/display/binder.cpp +++ b/src/core/horizon/display/binder.cpp @@ -1,5 +1,7 @@ #include "core/horizon/display/binder.hpp" +#include "core/hw/tegra_x1/gpu/gpu.hpp" + namespace hydra::horizon::display { void Binder::AddBuffer(i32 slot, const GraphicBuffer& buff) { @@ -41,16 +43,22 @@ i32 Binder::GetAvailableSlot() { } void Binder::QueueBuffer(i32 slot, const BqBufferInput& input) { - std::lock_guard lock(queue_mutex); - queued_buffers.push({slot, input}); - buffers[slot].queued = true; + { + std::lock_guard lock(queue_mutex); + queued_buffers.push({slot, input}); + buffers[slot].queued = true; - // Time - const auto now = clock_t::now(); - accumulated_dt += now - last_queue_time; - last_queue_time = now; + // Time + const auto now = clock_t::now(); + accumulated_dt += now - last_queue_time; + last_queue_time = now; - queue_cv.notify_all(); + queue_cv.notify_all(); + } + + // Debug + // TODO: only do this for the main process + RENDERER_INSTANCE.NotifyDebugFrameBoundary(); } i32 Binder::ConsumeBuffer(BqBufferInput& out_input) { diff --git a/src/core/horizon/display/driver.cpp b/src/core/horizon/display/driver.cpp index 6ea2c851..13f182b5 100644 --- a/src/core/horizon/display/driver.cpp +++ b/src/core/horizon/display/driver.cpp @@ -6,7 +6,8 @@ namespace hydra::horizon::display { Driver::Driver() { display_pool.Add(new Display()); } -bool Driver::AcquirePresentTextures() { +bool Driver::AcquirePresentTextures( + hw::tegra_x1::gpu::renderer::ICommandBuffer* command_buffer) { bool acquired = false; { std::lock_guard lock(layer_mutex); @@ -14,14 +15,18 @@ bool Driver::AcquirePresentTextures() { layer_id++) { if (!layer_pool.IsValid(layer_id)) continue; - acquired |= layer_pool.Get(layer_id)->AcquirePresentTexture(); + acquired |= + layer_pool.Get(layer_id)->AcquirePresentTexture(command_buffer); } } return acquired; } -void Driver::Present(u32 width, u32 height) { +void Driver::Present( + hw::tegra_x1::gpu::renderer::ICommandBuffer* command_buffer, + hw::tegra_x1::gpu::renderer::ISurfaceCompositor* compositor, u32 width, + u32 height) { std::lock_guard lock(layer_mutex); std::vector sorted_layers; for (u32 layer_id = 1; layer_id < layer_pool.GetCapacity() + 1; @@ -65,7 +70,8 @@ void Driver::Present(u32 width, u32 height) { // Present for (u32 i = 0; i < sorted_layers.size(); i++) - sorted_layers[i]->Present(dst_rect, dst_scale, i != 0); + sorted_layers[i]->Present(command_buffer, compositor, dst_rect, + dst_scale, i != 0); } void Driver::SignalVSync() { diff --git a/src/core/horizon/display/driver.hpp b/src/core/horizon/display/driver.hpp index 33a1fcd6..3ab2cdf9 100644 --- a/src/core/horizon/display/driver.hpp +++ b/src/core/horizon/display/driver.hpp @@ -61,8 +61,11 @@ class Driver { } // Presenting - bool AcquirePresentTextures(); - void Present(u32 width, u32 height); + bool AcquirePresentTextures( + hw::tegra_x1::gpu::renderer::ICommandBuffer* command_buffer); + void Present(hw::tegra_x1::gpu::renderer::ICommandBuffer* command_buffer, + hw::tegra_x1::gpu::renderer::ISurfaceCompositor* compositor, + u32 width, u32 height); void SignalVSync(); Layer* GetFirstLayerForProcess(kernel::Process* process); diff --git a/src/core/horizon/display/layer.cpp b/src/core/horizon/display/layer.cpp index 5e297896..ca92b97a 100644 --- a/src/core/horizon/display/layer.cpp +++ b/src/core/horizon/display/layer.cpp @@ -3,11 +3,13 @@ #include "core/horizon/kernel/process.hpp" #include "core/horizon/os.hpp" #include "core/hw/tegra_x1/gpu/gpu.hpp" +#include "core/hw/tegra_x1/gpu/renderer/surface_compositor.hpp" #include "core/hw/tegra_x1/gpu/renderer/texture_base.hpp" namespace hydra::horizon::display { -bool Layer::AcquirePresentTexture() { +bool Layer::AcquirePresentTexture( + hw::tegra_x1::gpu::renderer::ICommandBuffer* command_buffer) { // Get the buffer to present auto& binder = OS_INSTANCE.GetDisplayDriver().GetBinder(binder_id); @@ -18,8 +20,8 @@ bool Layer::AcquirePresentTexture() { const auto& buffer = binder.GetBuffer(slot); // Texture - present_texture = - GPU_INSTANCE.GetTexture(process->GetMmu(), buffer.nv_buffer); + present_texture = GPU_INSTANCE.GetTexture(command_buffer, process->GetMmu(), + buffer.nv_buffer); // Rect src_rect = {}; @@ -57,7 +59,9 @@ bool Layer::AcquirePresentTexture() { return true; } -void Layer::Present(FloatRect2D dst_rect, f32 dst_scale, bool transparent) { +void Layer::Present(hw::tegra_x1::gpu::renderer::ICommandBuffer* command_buffer, + hw::tegra_x1::gpu::renderer::ISurfaceCompositor* compositor, + FloatRect2D dst_rect, f32 dst_scale, bool transparent) { if (!present_texture) return; @@ -66,8 +70,8 @@ void Layer::Present(FloatRect2D dst_rect, f32 dst_scale, bool transparent) { dst_rect.size = float2(size) * dst_scale; // Draw - RENDERER_INSTANCE.DrawTextureToSurface(present_texture, src_rect, dst_rect, - transparent); + compositor->DrawTexture(command_buffer, present_texture, src_rect, dst_rect, + transparent); } AccumulatedTime Layer::GetAccumulatedDT() { diff --git a/src/core/horizon/display/layer.hpp b/src/core/horizon/display/layer.hpp index 3361aba4..9d1efcac 100644 --- a/src/core/horizon/display/layer.hpp +++ b/src/core/horizon/display/layer.hpp @@ -4,7 +4,9 @@ namespace hydra::hw::tegra_x1::gpu::renderer { class TextureBase; -} +class ICommandBuffer; +class ISurfaceCompositor; +} // namespace hydra::hw::tegra_x1::gpu::renderer namespace hydra::horizon::display { @@ -20,8 +22,11 @@ class Layer { void Open() {} void Close() {} - bool AcquirePresentTexture(); - void Present(FloatRect2D dst_rect, f32 dst_scale, bool transparent); + bool AcquirePresentTexture( + hw::tegra_x1::gpu::renderer::ICommandBuffer* command_buffer); + void Present(hw::tegra_x1::gpu::renderer::ICommandBuffer* command_buffer, + hw::tegra_x1::gpu::renderer::ISurfaceCompositor* compositor, + FloatRect2D dst_rect, f32 dst_scale, bool transparent); // Time AccumulatedTime GetAccumulatedDT(); diff --git a/src/core/horizon/filesystem/sparse_file.hpp b/src/core/horizon/filesystem/sparse_file.hpp index 192eb5c0..d99b5667 100644 --- a/src/core/horizon/filesystem/sparse_file.hpp +++ b/src/core/horizon/filesystem/sparse_file.hpp @@ -85,7 +85,7 @@ class SparseFile : public IFile { streams.reserve(entries.size()); for (const auto& entry : entries) { streams.push_back( - {range(entry.offset, entry.offset + entry.file->GetSize()), + {Range(entry.offset, entry.offset + entry.file->GetSize()), entry.file->Open(flags)}); } diff --git a/src/core/horizon/kernel/applet_resource.hpp b/src/core/horizon/kernel/applet_resource.hpp index 3d3fc5db..5c85379d 100644 --- a/src/core/horizon/kernel/applet_resource.hpp +++ b/src/core/horizon/kernel/applet_resource.hpp @@ -84,7 +84,7 @@ class AppletResourcePool { } private: - ResourceArray resources = {std::nullopt}; + ResourceArray resources{}; // Helpers std::optional& GetResourceOpt(AppletResourceUserId aruid) { diff --git a/src/core/horizon/kernel/const.hpp b/src/core/horizon/kernel/const.hpp index beedba8e..07ec59a9 100644 --- a/src/core/horizon/kernel/const.hpp +++ b/src/core/horizon/kernel/const.hpp @@ -5,14 +5,14 @@ namespace hydra::horizon::kernel { constexpr handle_id_t CURRENT_PROCESS_PSEUDO_HANDLE = 0xffff8001; constexpr handle_id_t CURRENT_THREAD_PSEUDO_HANDLE = 0xffff8000; -constexpr range ADDRESS_SPACE = - range(0x10000000, 0x200000000); -constexpr range STACK_REGION = range(0x10000000, 0x20000000); -constexpr range TLS_REGION = range(0x20000000, 0x30000000); -constexpr range ALIAS_REGION = range(0x30000000, 0x40000000); -constexpr range EXECUTABLE_REGION = - range(0x40000000, 0x80000000); -constexpr range HEAP_REGION = range(0x100000000, 0x200000000); +constexpr Range ADDRESS_SPACE = + Range(0x10000000, 0x200000000); +constexpr Range STACK_REGION = Range(0x10000000, 0x20000000); +constexpr Range TLS_REGION = Range(0x20000000, 0x30000000); +constexpr Range ALIAS_REGION = Range(0x30000000, 0x40000000); +constexpr Range EXECUTABLE_REGION = + Range(0x40000000, 0x80000000); +constexpr Range HEAP_REGION = Range(0x100000000, 0x200000000); constexpr usize HEAP_MEM_ALIGNMENT = 0x200000; diff --git a/src/core/horizon/kernel/kernel.cpp b/src/core/horizon/kernel/kernel.cpp index fd1527f2..5caeba5b 100644 --- a/src/core/horizon/kernel/kernel.cpp +++ b/src/core/horizon/kernel/kernel.cpp @@ -366,15 +366,16 @@ result_t Kernel::SetHeapSize(Process* crnt_process, usize size, auto& heap_mem = crnt_process->GetHeapMemory(); if (!heap_mem) { heap_mem = CPU_INSTANCE.AllocateMemory(size); - crnt_process->GetMmu()->Map(HEAP_REGION.begin, heap_mem, + crnt_process->GetMmu()->Map(HEAP_REGION.GetBegin(), heap_mem, {MemoryType::Normal_1_0_0, MemoryAttribute::None, - MemoryPermission::ReadWriteExecute}); + MemoryPermission::ReadWrite}); } else { - crnt_process->GetMmu()->ResizeHeap(heap_mem, HEAP_REGION.begin, size); + crnt_process->GetMmu()->ResizeHeap(heap_mem, HEAP_REGION.GetBegin(), + size); } - out_base = HEAP_REGION.begin; + out_base = HEAP_REGION.GetBegin(); return RESULT_SUCCESS; } @@ -403,7 +404,8 @@ result_t Kernel::SetMemoryAttribute(Process* crnt_process, vaddr_t addr, "{}, value: {})", addr, size, mask, value); - crnt_process->GetMmu()->SetMemoryAttribute(addr, size, mask, value); + crnt_process->GetMmu()->SetMemoryAttribute( + Range::FromSize(addr, size), mask, value); return RESULT_SUCCESS; } @@ -415,7 +417,8 @@ result_t Kernel::MapMemory(Process* crnt_process, uptr dst_addr, uptr src_addr, "0x{:08x})", dst_addr, src_addr, size); - crnt_process->GetMmu()->Map(dst_addr, src_addr, size); + crnt_process->GetMmu()->Map(dst_addr, + Range::FromSize(src_addr, size)); return RESULT_SUCCESS; } @@ -431,7 +434,7 @@ result_t Kernel::UnmapMemory(Process* crnt_process, uptr dst_addr, // TODO: verify that src_addr is the same as the one used in MapMemory? (void)src_addr; - crnt_process->GetMmu()->Unmap(dst_addr, size); + crnt_process->GetMmu()->Unmap(Range::FromSize(dst_addr, size)); return RESULT_SUCCESS; } @@ -585,7 +588,7 @@ result_t Kernel::MapSharedMemory(Process* crnt_process, SharedMemory* shmem, "0x{:08x}, perm: {})", shmem->GetDebugName(), addr, size, perm); - shmem->MapToRange(crnt_process->GetMmu(), range(addr, uptr(addr + size)), + shmem->MapToRange(crnt_process->GetMmu(), Range(addr, uptr(addr + size)), perm); return RESULT_SUCCESS; @@ -600,7 +603,7 @@ result_t Kernel::UnmapSharedMemory(Process* crnt_process, SharedMemory* shmem, "0x{:08x})", shmem->GetDebugName(), addr, size); - crnt_process->GetMmu()->Unmap(addr, size); + crnt_process->GetMmu()->Unmap(Range::FromSize(addr, size)); return RESULT_SUCCESS; } @@ -976,13 +979,13 @@ result_t Kernel::GetInfo(Process* crnt_process, InfoType info_type, out_info = 0xf; return RESULT_SUCCESS; case InfoType::AliasRegionAddress: - out_info = ALIAS_REGION.begin; + out_info = ALIAS_REGION.GetBegin(); return RESULT_SUCCESS; case InfoType::AliasRegionSize: out_info = ALIAS_REGION.GetSize(); return RESULT_SUCCESS; case InfoType::HeapRegionAddress: - out_info = HEAP_REGION.begin; + out_info = HEAP_REGION.GetBegin(); return RESULT_SUCCESS; case InfoType::HeapRegionSize: out_info = HEAP_REGION.GetSize(); @@ -1013,13 +1016,13 @@ result_t Kernel::GetInfo(Process* crnt_process, InfoType info_type, out_info = crnt_process->GetRandomEntropy()[info_sub_type]; return RESULT_SUCCESS; case InfoType::AslrRegionAddress: - out_info = ADDRESS_SPACE.begin; + out_info = ADDRESS_SPACE.GetBegin(); return RESULT_SUCCESS; case InfoType::AslrRegionSize: out_info = ADDRESS_SPACE.GetSize(); return RESULT_SUCCESS; case InfoType::StackRegionAddress: - out_info = STACK_REGION.begin; + out_info = STACK_REGION.GetBegin(); return RESULT_SUCCESS; case InfoType::StackRegionSize: out_info = STACK_REGION.GetSize(); @@ -1078,7 +1081,7 @@ result_t Kernel::MapPhysicalMemory(Process* crnt_process, vaddr_t addr, if (!is_aligned(size, hw::tegra_x1::cpu::GUEST_PAGE_SIZE)) return MAKE_RESULT(Svc, 101); // Invalid size - if (!ALIAS_REGION.Contains(range::FromSize(addr, size))) + if (!ALIAS_REGION.Contains(Range::FromSize(addr, size))) return MAKE_RESULT(Svc, 110); // Invalid memory region auto mem = CPU_INSTANCE.AllocateMemory(size); @@ -1344,7 +1347,8 @@ result_t Kernel::MapProcessMemory(Process* crnt_process, vaddr_t dst_addr, // TODO: correct? const auto ptr = process->GetMmu()->UnmapAddr(src_addr); - crnt_process->GetMmu()->Map(dst_addr, ptr, size, {}); // TODO: state + crnt_process->GetMmu()->Map(dst_addr, Range::FromSize(ptr, size), + {}); // TODO: state return RESULT_SUCCESS; } @@ -1356,7 +1360,7 @@ result_t Kernel::MapProcessCodeMemory(Process* process, vaddr_t dst_addr, "src_addr: 0x{:08x}, size: {})", process->GetDebugName(), dst_addr, src_addr, size); - process->GetMmu()->Map(dst_addr, src_addr, size); + process->GetMmu()->Map(dst_addr, Range::FromSize(src_addr, size)); return RESULT_SUCCESS; } @@ -1371,7 +1375,7 @@ result_t Kernel::UnmapProcessCodeMemory(Process* process, vaddr_t dst_addr, // TODO: verify that src_addr is the same as the one used in MapMemory? (void)src_addr; - process->GetMmu()->Unmap(dst_addr, size); + process->GetMmu()->Unmap(Range::FromSize(dst_addr, size)); return RESULT_SUCCESS; } diff --git a/src/core/horizon/kernel/process.cpp b/src/core/horizon/kernel/process.cpp index eb1ae8f6..af62552c 100644 --- a/src/core/horizon/kernel/process.cpp +++ b/src/core/horizon/kernel/process.cpp @@ -11,7 +11,7 @@ namespace hydra::horizon::kernel { Process::Process(const std::string_view debug_name) : SynchronizationObject(false, debug_name), mmu{CPU_INSTANCE.CreateMmu()}, - gmmu{new hw::tegra_x1::gpu::GMmu()} { + gmmu{new hw::tegra_x1::gpu::GMmu(mmu)} { // TODO: use title ID and name as debugger name? DEBUGGER_MANAGER_INSTANCE.AttachDebugger( this, @@ -29,14 +29,9 @@ Process::~Process() { DEBUGGER_MANAGER_INSTANCE.DetachDebugger(this); } -uptr Process::CreateMemory(range region, usize size, MemoryType type, - MemoryPermission perm, bool add_guard_page, - vaddr_t& out_base) { - out_base = mmu->FindFreeMemory( - region, - size + (add_guard_page ? hw::tegra_x1::cpu::GUEST_PAGE_SIZE : 0x0)); - if (add_guard_page && out_base != region.begin) - out_base += hw::tegra_x1::cpu::GUEST_PAGE_SIZE; +uptr Process::CreateMemory(Range region, usize size, MemoryType type, + MemoryPermission perm, vaddr_t& out_base) { + out_base = mmu->FindFreeMemory(region, size); ASSERT(out_base != 0x0, Kernel, "Failed to find free memory"); auto mem = CPU_INSTANCE.AllocateMemory(size); @@ -47,13 +42,33 @@ uptr Process::CreateMemory(range region, usize size, MemoryType type, } uptr Process::CreateExecutableMemory(const std::string_view module_name, - usize size, MemoryPermission perm, - bool add_guard_page, vaddr_t& out_base) { - // TODO: use MemoryType::Static - auto ptr = CreateMemory(EXECUTABLE_REGION, size, static_cast(3), - perm, add_guard_page, out_base); + CodeSet code_set, vaddr_t& out_base) { + // TODO: use MemoryType::Static? + auto ptr = CreateMemory(EXECUTABLE_REGION, code_set.size, + static_cast(3), MemoryPermission::Read, + out_base); + + // Protect + mmu->Protect( + Range::FromSize( + out_base + code_set.code.GetBegin(), + align(code_set.code.GetSize(), hw::tegra_x1::cpu::GUEST_PAGE_SIZE)), + MemoryPermission::ReadExecute); + // mmu->Protect( + // Range::FromSize(out_base + code_set.ro_data.GetBegin(), + // align(code_set.ro_data.GetSize(), + // hw::tegra_x1::cpu::GUEST_PAGE_SIZE)), + // MemoryPermission::Read); + mmu->Protect( + Range::FromSize( + out_base + code_set.data.GetBegin(), + align(code_set.data.GetSize(), hw::tegra_x1::cpu::GUEST_PAGE_SIZE)), + MemoryPermission::ReadWrite); + + // Debug DEBUGGER_MANAGER_INSTANCE.GetDebugger(this).GetModuleTable().RegisterSymbol( - {std::string(module_name), range(out_base, out_base + size)}); + {std::string(module_name), + Range(out_base, out_base + code_set.size)}); return ptr; } @@ -74,7 +89,7 @@ void Process::CreateStackMemory(usize stack_size) { // 0x10, priority); auto handle_id = AddHandle(main_thread); main_thread_stack_mem = CPU_INSTANCE.AllocateMemory(stack_size); - mmu->Map(STACK_REGION.begin, main_thread_stack_mem, + mmu->Map(STACK_REGION.GetBegin(), main_thread_stack_mem, {MemoryType::Stack, MemoryAttribute::None, MemoryPermission::ReadWrite}); } diff --git a/src/core/horizon/kernel/process.hpp b/src/core/horizon/kernel/process.hpp index 8fd851ee..36b93fde 100644 --- a/src/core/horizon/kernel/process.hpp +++ b/src/core/horizon/kernel/process.hpp @@ -29,20 +29,23 @@ enum class ProcessState { DebugSuspended = 7, }; +struct CodeSet { + u64 size; + Range code; + Range ro_data; + Range data; +}; + class Process : public SynchronizationObject { public: Process(const std::string_view debug_name = "Process"); ~Process() override; // Memory - // TODO: remove add_guard_page - uptr CreateMemory(range region, usize size, MemoryType type, - MemoryPermission perm, bool add_guard_page, - vaddr_t& out_base); - // TODO: should the caller be able to specify permissions? - uptr CreateExecutableMemory(const std::string_view module_name, usize size, - MemoryPermission perm, bool add_guard_page, - vaddr_t& out_base); + uptr CreateMemory(Range region, usize size, MemoryType type, + MemoryPermission perm, vaddr_t& out_base); + uptr CreateExecutableMemory(const std::string_view module_name, + CodeSet code_set, vaddr_t& out_base); hw::tegra_x1::cpu::IMemory* CreateTlsMemory(vaddr_t& base); void CreateStackMemory(usize stack_size); @@ -149,7 +152,7 @@ class Process : public SynchronizationObject { std::vector executable_mems; hw::tegra_x1::cpu::IMemory* main_thread_stack_mem{nullptr}; - vaddr_t tls_mem_base{TLS_REGION.begin}; + vaddr_t tls_mem_base{TLS_REGION.GetBegin()}; // Thread GuestThread* main_thread{nullptr}; diff --git a/src/core/horizon/kernel/shared_memory.cpp b/src/core/horizon/kernel/shared_memory.cpp index e64c6c46..4bc839ff 100644 --- a/src/core/horizon/kernel/shared_memory.cpp +++ b/src/core/horizon/kernel/shared_memory.cpp @@ -15,8 +15,8 @@ SharedMemory::SharedMemory(usize size, const std::string_view debug_name) SharedMemory::~SharedMemory() { delete memory; } void SharedMemory::MapToRange(hw::tegra_x1::cpu::IMmu* mmu, - const range range, MemoryPermission perm) { - mmu->Map(range.begin, memory, + const Range range, MemoryPermission perm) { + mmu->Map(range.GetBegin(), memory, {MemoryType::Shared, MemoryAttribute::None, perm}); } diff --git a/src/core/horizon/kernel/shared_memory.hpp b/src/core/horizon/kernel/shared_memory.hpp index f8349baa..1b4fa691 100644 --- a/src/core/horizon/kernel/shared_memory.hpp +++ b/src/core/horizon/kernel/shared_memory.hpp @@ -16,7 +16,7 @@ class SharedMemory : public AutoObject { const std::string_view debug_name = "SharedMemory"); ~SharedMemory() override; - void MapToRange(hw::tegra_x1::cpu::IMmu* mmu, const range range_, + void MapToRange(hw::tegra_x1::cpu::IMmu* mmu, const Range range_, MemoryPermission perm); // Getters diff --git a/src/core/horizon/loader/homebrew_loader.cpp b/src/core/horizon/loader/homebrew_loader.cpp index 35ff80ac..4568a3f2 100644 --- a/src/core/horizon/loader/homebrew_loader.cpp +++ b/src/core/horizon/loader/homebrew_loader.cpp @@ -49,9 +49,10 @@ class HomebrewThread : public kernel::GuestThread { public: // TODO: don't hardcode priority HomebrewThread(kernel::Process* process, std::string_view path_) - : kernel::GuestThread( - process, kernel::STACK_REGION.begin + STACK_MEMORY_SIZE - 0x10, - 0x2c, "Homebrew thread"), + : kernel::GuestThread(process, + kernel::STACK_REGION.GetBegin() + + STACK_MEMORY_SIZE - 0x10, + 0x2c, "Homebrew thread"), path{path_} {} protected: @@ -86,7 +87,7 @@ class HomebrewThread : public kernel::GuestThread { auto state_ptr = process->CreateMemory( kernel::EXECUTABLE_REGION, ARGV_SIZE * 2 + NEXT_LOAD_PATH_SIZE, static_cast(4), - kernel::MemoryPermission::ReadWrite, true, state_base); + kernel::MemoryPermission::ReadWrite, state_base); // Notice text { diff --git a/src/core/horizon/loader/nro_loader.cpp b/src/core/horizon/loader/nro_loader.cpp index 9ec6dfce..8a844883 100644 --- a/src/core/horizon/loader/nro_loader.cpp +++ b/src/core/horizon/loader/nro_loader.cpp @@ -16,11 +16,6 @@ enum class NroSectionType { Data, }; -struct NroSection { - u32 offset; - u32 size; -}; - struct NroHeader { u8 rocrt[16]; u32 magic; @@ -60,7 +55,10 @@ NroLoader::NroLoader(filesystem::IFile* file_, const bool is_entry_point_) header.magic); size = header.size; - text_offset = header.GetSection(NroSectionType::Text).offset; + sections[0] = header.GetSection(NroSectionType::Text); + sections[1] = header.GetSection(NroSectionType::Ro); + sections[2] = header.GetSection(NroSectionType::Data); + sections[2].size += header.bss_size; bss_size = header.bss_size; delete stream; @@ -70,15 +68,17 @@ void NroLoader::LoadProcess(kernel::Process* process) { auto stream = file->Open(filesystem::FileOpenFlags::Read); // Create executable memory - executable_size = stream->GetSize() + bss_size; + // TODO: is the size correct? + const auto set = kernel::CodeSet{ + GetExecutableSize() + 0x1000, // HACK: one extra page + Range::FromSize(sections[0].offset, sections[0].size), + Range::FromSize(sections[1].offset, sections[1].size), + Range::FromSize(sections[2].offset, sections[2].size)}; // TODO: module name - executable_ptr = process->CreateExecutableMemory( - "main.nro", executable_size, kernel::MemoryPermission::ReadWriteExecute, - true, - executable_base); // TODO: is the permission correct? + executable_ptr = + process->CreateExecutableMemory("main.nro", set, executable_base); stream->SeekTo(0); - stream->ReadToSpan( - std::span(reinterpret_cast(executable_ptr), stream->GetSize())); + stream->ReadToSpan(std::span(reinterpret_cast(executable_ptr), size)); // Debug symbols // TODO @@ -102,7 +102,7 @@ void NroLoader::LoadProcess(kernel::Process* process) { } vaddr_t NroLoader::GetEntryPoint() const { - return executable_base + sizeof(NroHeader) + text_offset; + return executable_base + sizeof(NroHeader) + sections[0].offset; } } // namespace hydra::horizon::loader diff --git a/src/core/horizon/loader/nro_loader.hpp b/src/core/horizon/loader/nro_loader.hpp index e7655a16..95f6c14a 100644 --- a/src/core/horizon/loader/nro_loader.hpp +++ b/src/core/horizon/loader/nro_loader.hpp @@ -4,6 +4,11 @@ namespace hydra::horizon::loader { +struct NroSection { + u32 offset; + u32 size; +}; + class NroLoader : public LoaderBase { public: enum class Error { @@ -16,17 +21,19 @@ class NroLoader : public LoaderBase { vaddr_t GetEntryPoint() const; + u64 GetExecutableSize() const { return size + bss_size; } + private: filesystem::IFile* file; const bool is_entry_point; u64 size; - u32 text_offset; - u32 bss_size; + NroSection sections[3]; + u64 bss_size; uptr executable_ptr{invalid()}; vaddr_t executable_base{invalid()}; - usize executable_size{invalid()}; + usize executable_size{0}; void TryLoadAssetSection(filesystem::IFile* file); @@ -34,7 +41,6 @@ class NroLoader : public LoaderBase { GETTER(size, GetSize); GETTER(executable_ptr, GetExecutablePtr); GETTER(executable_base, GetExecutableBase); - GETTER(executable_size, GetExecutableSize); }; } // namespace hydra::horizon::loader diff --git a/src/core/horizon/loader/nso_loader.cpp b/src/core/horizon/loader/nso_loader.cpp index 219cab2b..b9abd3f4 100644 --- a/src/core/horizon/loader/nso_loader.cpp +++ b/src/core/horizon/loader/nso_loader.cpp @@ -96,24 +96,6 @@ NsoLoader::NsoLoader(filesystem::IFile* file_, const std::string_view name_, text_offset = header.text.memory_offset; - // Determine executable memory size - executable_size = - std::max(executable_size, static_cast(header.text.memory_offset + - header.text.size)); - executable_size = - std::max(executable_size, - static_cast(header.ro.memory_offset + header.ro.size)); - executable_size = - std::max(executable_size, static_cast(header.data.memory_offset + - header.data.size)); - executable_size += header.bss_size; - LOG_DEBUG(Loader, - "NSO: 0x{:08x} + 0x{:08x}, 0x{:08x} + 0x{:08x}, 0x{:08x} + " - "0x{:08x}, 0x{:08x}", - header.text.memory_offset, header.text.size, - header.ro.memory_offset, header.ro.size, - header.data.memory_offset, header.data.size, header.bss_size); - // Segments segments[0] = {header.text, header.text_file_size, any(header.flags & NsoFlags::TextCompressed)}; @@ -121,6 +103,20 @@ NsoLoader::NsoLoader(filesystem::IFile* file_, const std::string_view name_, any(header.flags & NsoFlags::RoCompressed)}; segments[2] = {header.data, header.data_file_size, any(header.flags & NsoFlags::DataCompressed)}; + segments[2].seg.size += header.bss_size; + + // Determine executable memory size + for (u32 i = 0; i < 3; i++) { + executable_size = std::max( + executable_size, static_cast(segments[i].seg.memory_offset + + segments[i].seg.size)); + } + LOG_DEBUG(Loader, + "NSO: 0x{:08x} + 0x{:08x}, 0x{:08x} + 0x{:08x}, 0x{:08x} + " + "0x{:08x}, 0x{:08x}", + header.text.memory_offset, header.text.size, + header.ro.memory_offset, header.ro.size, + header.data.memory_offset, header.data.size, header.bss_size); dyn_str_offset = header.dyn_str_offset; dyn_str_size = header.dyn_str_size; @@ -139,10 +135,16 @@ void NsoLoader::LoadProcess(kernel::Process* process) { auto stream = file->Open(filesystem::FileOpenFlags::Read); // Create executable memory + const auto set = + kernel::CodeSet{executable_size, + Range::FromSize(segments[0].seg.memory_offset, + segments[0].seg.size), + Range::FromSize(segments[1].seg.memory_offset, + segments[1].seg.size), + Range::FromSize(segments[2].seg.memory_offset, + segments[2].seg.size)}; vaddr_t base; - auto ptr = process->CreateExecutableMemory( - name, executable_size, kernel::MemoryPermission::ReadExecute, true, - base); + auto ptr = process->CreateExecutableMemory(name, set, base); LOG_DEBUG(Loader, "Base: 0x{:08x}, size: 0x{:08x}", base, executable_size); // Segments @@ -161,7 +163,7 @@ void NsoLoader::LoadProcess(kernel::Process* process) { auto arg_data_ptr = reinterpret_cast(process->CreateMemory( kernel::EXECUTABLE_REGION, ARG_DATA_SIZE, static_cast(4), kernel::MemoryPermission::ReadWrite, - false, arg_data_base)); + arg_data_base)); arg_data_ptr->allocated_size = ARG_DATA_SIZE; arg_data_ptr->string_size = static_cast(arg_data_str.size() + 1); std::memcpy(arg_data_ptr->str, arg_data_str.c_str(), arg_data_str.size()); @@ -207,7 +209,7 @@ void NsoLoader::LoadProcess(kernel::Process* process) { .GetFunctionTable() .RegisterSymbol( {demangle(std::string(name)), - range(base + symbol.st_value, + Range(base + symbol.st_value, base + symbol.st_value + symbol.st_size)}); } } @@ -220,7 +222,8 @@ void NsoLoader::LoadProcess(kernel::Process* process) { // Main thread auto main_thread = new kernel::GuestThread( - process, kernel::STACK_REGION.begin + main_thread_stack_size - 0x10, + process, + kernel::STACK_REGION.GetBegin() + main_thread_stack_size - 0x10, main_thread_priority); const auto main_thread_handle_id = process->SetMainThread(main_thread); diff --git a/src/core/horizon/services/hid/internal/shared_memory.hpp b/src/core/horizon/services/hid/internal/shared_memory.hpp index b7e9fa16..907f1d37 100644 --- a/src/core/horizon/services/hid/internal/shared_memory.hpp +++ b/src/core/horizon/services/hid/internal/shared_memory.hpp @@ -24,17 +24,19 @@ struct RingLifo { atomic_store(&index, next_index); // TODO: why? - if (ReadCount() < max_entries - 1) { + // TODO: should be max_entries - 1 + if (ReadCount() < 1) { atomic_fetch_add(&count, 1ull); } } - void WriteNext(const T& data_) { - auto data = data_; + void WriteNext(const T& data) { + // HACK: const cast try { - data.sampling_number = GetCurrentStorage().sampling_number + 1; + const_cast(data).sampling_number = + GetCurrentStorage().sampling_number + 1; } catch (Error error) { - data.sampling_number = 0; + const_cast(data).sampling_number = 0; } Write(data); } diff --git a/src/core/horizon/services/nvdrv/ioctl/nvhost_as_gpu.cpp b/src/core/horizon/services/nvdrv/ioctl/nvhost_as_gpu.cpp index 74c4233a..fa6036e4 100644 --- a/src/core/horizon/services/nvdrv/ioctl/nvhost_as_gpu.cpp +++ b/src/core/horizon/services/nvdrv/ioctl/nvhost_as_gpu.cpp @@ -74,7 +74,7 @@ NvResult NvHostAsGpu::MapBufferEX(kernel::Process* process, addr = inout_addr; inout_addr = process->GetGMmu()->MapBufferToAddressSpace( - process->GetMmu()->UnmapAddr(map.addr + buffer_offset), size, addr); + Range::FromSize(map.addr + buffer_offset, size), addr); return NvResult::Success; } diff --git a/src/core/horizon/services/nvdrv/ioctl/nvhost_gpu.cpp b/src/core/horizon/services/nvdrv/ioctl/nvhost_gpu.cpp index 06972d01..84e04d73 100644 --- a/src/core/horizon/services/nvdrv/ioctl/nvhost_gpu.cpp +++ b/src/core/horizon/services/nvdrv/ioctl/nvhost_gpu.cpp @@ -48,8 +48,7 @@ NvResult NvHostGpu::SubmitGpfifo( GPU_INSTANCE.GetPfifo().SubmitEntries( *process->GetGMmu(), - std::vector(entries, - entries + num_entries), + std::span(entries, num_entries), inout_flags_and_detailed_error); // HACK @@ -59,21 +58,25 @@ NvResult NvHostGpu::SubmitGpfifo( } NvResult NvHostGpu::AllocObjCtx(u32 class_num, u32 flags, u64* out_obj_id) { - LOG_FUNC_WITH_ARGS_STUBBED(Services, "class number: {}, flags: {:#x}", class_num, flags); + LOG_FUNC_WITH_ARGS_STUBBED(Services, "class number: {}, flags: {:#x}", + class_num, flags); // HACK *out_obj_id = 0; return NvResult::Success; } -NvResult NvHostGpu::ZCullBind(gpu_vaddr_t addr, u32 mode, [[maybe_unused]] u32 reserved) { - LOG_FUNC_WITH_ARGS_STUBBED(Services, "address: {:#x}, mode: {}", addr, mode); +NvResult NvHostGpu::ZCullBind(gpu_vaddr_t addr, u32 mode, + [[maybe_unused]] u32 reserved) { + LOG_FUNC_WITH_ARGS_STUBBED(Services, "address: {:#x}, mode: {}", addr, + mode); return NvResult::Success; } NvResult NvHostGpu::SetErrorNotifier(u64 offset, u64 size, u32 mem, [[maybe_unused]] u32 reserved) { - LOG_FUNC_WITH_ARGS_STUBBED(Services, "offset: {:#x}, size: {:#x}, memory: {}", offset, size, mem); + LOG_FUNC_WITH_ARGS_STUBBED( + Services, "offset: {:#x}, size: {:#x}, memory: {}", offset, size, mem); return NvResult::Success; } @@ -89,10 +92,13 @@ NvResult NvHostGpu::GetErrorNotification(u64* out_timestamp, u32* out_info32, return NvResult::Success; } -NvResult NvHostGpu::AllocGpfifoEX(u32 num_entries, u32 num_jobs, u32 flags, - hw::tegra_x1::gpu::Fence* out_fence, - [[maybe_unused]] std::array reserved) { - LOG_FUNC_WITH_ARGS_STUBBED(Services, "number of entries: {}, num_jobs: {}, flags: {:#x}", num_entries, num_jobs, flags); +NvResult +NvHostGpu::AllocGpfifoEX(u32 num_entries, u32 num_jobs, u32 flags, + hw::tegra_x1::gpu::Fence* out_fence, + [[maybe_unused]] std::array reserved) { + LOG_FUNC_WITH_ARGS_STUBBED( + Services, "number of entries: {}, num_jobs: {}, flags: {:#x}", + num_entries, num_jobs, flags); // HACK *out_fence = {}; diff --git a/src/core/horizon/services/ro/detail/ro_interface.cpp b/src/core/horizon/services/ro/detail/ro_interface.cpp index 4d7419dc..7f25e899 100644 --- a/src/core/horizon/services/ro/detail/ro_interface.cpp +++ b/src/core/horizon/services/ro/detail/ro_interface.cpp @@ -17,8 +17,8 @@ result_t IRoInterface::MapManualLoadModuleMemory(kernel::Process* process, auto mmu = process->GetMmu(); const auto base = mmu->FindFreeMemory(kernel::EXECUTABLE_REGION, nro_size + bss_size); - mmu->Map(base, nro_addr, nro_size); - mmu->Map(base + nro_size, bss_addr, bss_size); + mmu->Map(base, Range::FromSize(nro_addr, nro_size)); + mmu->Map(base + nro_size, Range::FromSize(bss_addr, bss_size)); *out_addr = base; return RESULT_SUCCESS; diff --git a/src/core/hw/tegra_x1/cpu/dynarmic/mmu.cpp b/src/core/hw/tegra_x1/cpu/dynarmic/mmu.cpp index e16d63e9..b6b74572 100644 --- a/src/core/hw/tegra_x1/cpu/dynarmic/mmu.cpp +++ b/src/core/hw/tegra_x1/cpu/dynarmic/mmu.cpp @@ -6,44 +6,55 @@ namespace hydra::hw::tegra_x1::cpu::dynarmic { -void Mmu::Map(vaddr_t dst_va, uptr ptr, usize size, +void Mmu::Map(vaddr_t dst_va, Range range, const horizon::kernel::MemoryState state) { - ASSERT_ALIGNMENT(size, GUEST_PAGE_SIZE, Dynarmic, "size"); + ASSERT_ALIGNMENT(range.GetSize(), GUEST_PAGE_SIZE, Dynarmic, "size"); u64 va_page = dst_va / GUEST_PAGE_SIZE; - u64 size_page = size / GUEST_PAGE_SIZE; + u64 size_page = range.GetSize() / GUEST_PAGE_SIZE; u64 va_page_end = va_page + size_page; for (u64 page = va_page; page < va_page_end; ++page) { - auto page_ptr = ptr + ((page - va_page) * GUEST_PAGE_SIZE); + auto page_ptr = range.GetBegin() + ((page - va_page) * GUEST_PAGE_SIZE); pages[page] = page_ptr; states[page] = state; } } -void Mmu::Map(vaddr_t dst_va, vaddr_t src_va, usize size) { - ASSERT_ALIGNMENT(size, GUEST_PAGE_SIZE, Dynarmic, "size"); +void Mmu::Map(vaddr_t dst_va, Range range) { + ASSERT_ALIGNMENT(range.GetBegin(), GUEST_PAGE_SIZE, Dynarmic, "begin"); + ASSERT_ALIGNMENT(range.GetEnd(), GUEST_PAGE_SIZE, Dynarmic, "end"); - auto src_page = src_va / GUEST_PAGE_SIZE; + auto src_page = range.GetBegin() / GUEST_PAGE_SIZE; auto dst_page = dst_va / GUEST_PAGE_SIZE; - auto size_page = size / GUEST_PAGE_SIZE; - for (u64 i = 0; i < size_page; i++) { + for (u64 i = 0; i < range.GetSize() / GUEST_PAGE_SIZE; i++) { pages[dst_page + i] = pages[src_page + i]; states[dst_page + i] = states[src_page + i]; } } -void Mmu::Unmap(vaddr_t va, usize size) { - ASSERT_ALIGNMENT(size, GUEST_PAGE_SIZE, Dynarmic, "size"); +void Mmu::Unmap(Range range) { + ASSERT_ALIGNMENT(range.GetBegin(), GUEST_PAGE_SIZE, Dynarmic, "begin"); + ASSERT_ALIGNMENT(range.GetEnd(), GUEST_PAGE_SIZE, Dynarmic, "end"); - auto va_page = va / GUEST_PAGE_SIZE; - auto size_page = size / GUEST_PAGE_SIZE; - auto va_page_end = va_page + size_page; - for (u64 page = va_page; page < va_page_end; ++page) { + for (u64 page = range.GetBegin() / GUEST_PAGE_SIZE; + page < range.GetEnd() / GUEST_PAGE_SIZE; ++page) { pages[page] = 0x0; states[page] = {.type = horizon::kernel::MemoryType::Free}; } } +// TODO: actually protect the memory +void Mmu::Protect(Range range, + horizon::kernel::MemoryPermission perm) { + ASSERT_ALIGNMENT(range.GetBegin(), GUEST_PAGE_SIZE, Dynarmic, "begin"); + ASSERT_ALIGNMENT(range.GetEnd(), GUEST_PAGE_SIZE, Dynarmic, "end"); + + for (u64 page = range.GetBegin() / GUEST_PAGE_SIZE; + page < range.GetEnd() / GUEST_PAGE_SIZE; ++page) { + states[page].perm = perm; + } +} + void Mmu::ResizeHeap(IMemory* heap_mem, vaddr_t va, usize size) { auto mem_impl = static_cast(heap_mem); @@ -81,15 +92,14 @@ MemoryRegion Mmu::QueryRegion(vaddr_t va) const { }; } -void Mmu::SetMemoryAttribute(vaddr_t va, usize size, +void Mmu::SetMemoryAttribute(Range range, horizon::kernel::MemoryAttribute mask, horizon::kernel::MemoryAttribute value) { - ASSERT_ALIGNMENT(size, GUEST_PAGE_SIZE, Dynarmic, "size"); + ASSERT_ALIGNMENT(range.GetBegin(), GUEST_PAGE_SIZE, Dynarmic, "begin"); + ASSERT_ALIGNMENT(range.GetEnd(), GUEST_PAGE_SIZE, Dynarmic, "end"); - auto va_page = va / GUEST_PAGE_SIZE; - auto size_page = size / GUEST_PAGE_SIZE; - auto va_page_end = va_page + size_page; - for (u64 page = va_page; page < va_page_end; ++page) { + for (u64 page = range.GetBegin() / GUEST_PAGE_SIZE; + page < range.GetEnd() / GUEST_PAGE_SIZE; ++page) { auto& state = states[page]; state.attr = (state.attr & ~mask) | (value & mask); } diff --git a/src/core/hw/tegra_x1/cpu/dynarmic/mmu.hpp b/src/core/hw/tegra_x1/cpu/dynarmic/mmu.hpp index 067abb19..22a3bf12 100644 --- a/src/core/hw/tegra_x1/cpu/dynarmic/mmu.hpp +++ b/src/core/hw/tegra_x1/cpu/dynarmic/mmu.hpp @@ -6,25 +6,47 @@ namespace hydra::hw::tegra_x1::cpu::dynarmic { constexpr usize PAGE_COUNT = - horizon::kernel::ADDRESS_SPACE.end / GUEST_PAGE_SIZE; + horizon::kernel::ADDRESS_SPACE.GetEnd() / GUEST_PAGE_SIZE; class Mmu : public IMmu { public: - void Map(vaddr_t dst_va, uptr ptr, usize size, + void Map(vaddr_t dst_va, Range range, const horizon::kernel::MemoryState state) override; - void Map(vaddr_t dst_va, vaddr_t src_va, usize size) override; - void Unmap(vaddr_t va, usize size) override; + void Map(vaddr_t dst_va, Range range) override; + void Unmap(Range range) override; + void Protect(Range range, + horizon::kernel::MemoryPermission perm) override; void ResizeHeap(IMemory* heap_mem, vaddr_t va, usize size) override; uptr UnmapAddr(vaddr_t va) const override; MemoryRegion QueryRegion(vaddr_t va) const override; - void SetMemoryAttribute(vaddr_t va, usize size, + void SetMemoryAttribute(Range range, horizon::kernel::MemoryAttribute mask, horizon::kernel::MemoryAttribute value) override; uptr GetPageTablePtr() const { return reinterpret_cast(&pages); } + protected: + // Write tracking + void SetWriteTrackingEnabled(Range range, bool enable) override { + // TODO: implement + (void)range; + (void)enable; + ONCE(LOG_FUNC_NOT_IMPLEMENTED(Dynarmic)); + } + bool TrySuspendWriteTracking(Range range) override { + // TODO: implement + (void)range; + ONCE(LOG_FUNC_NOT_IMPLEMENTED(Dynarmic)); + return false; + } + void ResumeWriteTracking(Range range) override { + // TODO: implement + (void)range; + ONCE(LOG_FUNC_NOT_IMPLEMENTED(Dynarmic)); + } + private: uptr pages[PAGE_COUNT] = {0x0}; horizon::kernel::MemoryState states[PAGE_COUNT] = { diff --git a/src/core/hw/tegra_x1/cpu/dynarmic/thread.hpp b/src/core/hw/tegra_x1/cpu/dynarmic/thread.hpp index 21b7c4fb..d8483abb 100644 --- a/src/core/hw/tegra_x1/cpu/dynarmic/thread.hpp +++ b/src/core/hw/tegra_x1/cpu/dynarmic/thread.hpp @@ -24,7 +24,7 @@ class Thread final : public IThread, private Dynarmic::A64::UserCallbacks { void Run() override; - void NotifyMemoryChanged(range mem_range) override { + void NotifyMemoryChanged(Range mem_range) override { jit->InvalidateCacheRange(mem_range.GetBegin(), mem_range.GetSize()); } diff --git a/src/core/hw/tegra_x1/cpu/hypervisor/const.hpp b/src/core/hw/tegra_x1/cpu/hypervisor/const.hpp index 2f1ba9f3..bf80d671 100644 --- a/src/core/hw/tegra_x1/cpu/hypervisor/const.hpp +++ b/src/core/hw/tegra_x1/cpu/hypervisor/const.hpp @@ -22,45 +22,49 @@ constexpr uptr EXCEPTION_TRAMPOLINE_OFFSET = 0x800; u64(res)); \ } +constexpr u32 AP_SHIFT = 6; +constexpr u32 PNX_SHIFT = 53; +constexpr u32 UXN_SHIFT = 54; + // From Ryujinx enum class ApFlags : u64 { - ApShift = 6, - PxnShift = 53, - UxnShift = 54, - - UserExecuteKernelReadWriteExecute = (0UL << (int)ApShift), - UserReadWriteExecuteKernelReadWrite = (1UL << (int)ApShift), - UserExecuteKernelReadExecute = (2UL << (int)ApShift), - UserReadExecuteKernelReadExecute = (3UL << (int)ApShift), - - UserExecuteKernelReadWrite = (1UL << (int)PxnShift) | (0UL << (int)ApShift), - UserExecuteKernelRead = (1UL << (int)PxnShift) | (2UL << (int)ApShift), - UserReadExecuteKernelRead = (1UL << (int)PxnShift) | (3UL << (int)ApShift), - - UserNoneKernelReadWriteExecute = - (1UL << (int)UxnShift) | (0UL << (int)ApShift), - UserReadWriteKernelReadWrite = - (1UL << (int)UxnShift) | (1UL << (int)ApShift), - UserNoneKernelReadExecute = (1UL << (int)UxnShift) | (2UL << (int)ApShift), - UserReadKernelReadExecute = (1UL << (int)UxnShift) | (3UL << (int)ApShift), + UserExecuteKernelReadWriteExecute = (0ull << AP_SHIFT), + UserReadWriteExecuteKernelReadWrite = (1ull << AP_SHIFT), + UserExecuteKernelReadExecute = (2ull << AP_SHIFT), + UserReadExecuteKernelReadExecute = (3ull << AP_SHIFT), + + UserExecuteKernelReadWrite = (1ull << PNX_SHIFT) | (0ull << AP_SHIFT), + UserExecuteKernelRead = (1ull << PNX_SHIFT) | (2ull << AP_SHIFT), + UserReadExecuteKernelRead = (1ull << PNX_SHIFT) | (3ull << AP_SHIFT), + + UserNoneKernelReadWriteExecute = (1ull << UXN_SHIFT) | (0ull << AP_SHIFT), + UserReadWriteKernelReadWrite = (1ull << UXN_SHIFT) | (1ull << AP_SHIFT), + UserNoneKernelReadExecute = (1ull << UXN_SHIFT) | (2ull << AP_SHIFT), + UserReadKernelReadExecute = (1ull << UXN_SHIFT) | (3ull << AP_SHIFT), UserNoneKernelReadWrite = - (1UL << (int)PxnShift) | (1UL << (int)UxnShift) | (0UL << (int)ApShift), + (1ull << PNX_SHIFT) | (1ull << UXN_SHIFT) | (0ull << AP_SHIFT), UserNoneKernelRead = - (1UL << (int)PxnShift) | (1UL << (int)UxnShift) | (2UL << (int)ApShift), + (1ull << PNX_SHIFT) | (1ull << UXN_SHIFT) | (2ull << AP_SHIFT), UserReadKernelRead = - (1UL << (int)PxnShift) | (1UL << (int)UxnShift) | (3UL << (int)ApShift), + (1ull << PNX_SHIFT) | (1ull << UXN_SHIFT) | (3ull << AP_SHIFT), }; -inline uptr allocate_vm_memory(usize size) { +constexpr u64 AP_FLAGS_MASK = + (1ull << PNX_SHIFT) | (1ull << UXN_SHIFT) | (3ull << AP_SHIFT); + +enum class AllocateVmMemoryError { + AllocationFailed, +}; + +inline uptr AllocateVmMemory(usize size) { ASSERT_ALIGNMENT(size, APPLE_PAGE_SIZE, Hypervisor, "size") void* ptr; - posix_memalign(&ptr, APPLE_PAGE_SIZE, size); - if (!ptr) { - LOG_FATAL(Hypervisor, "Failed to allocate memory"); - return 0x0; - } + const auto res = posix_memalign(&ptr, APPLE_PAGE_SIZE, size); + ASSERT_THROWING(res == 0, Hypervisor, + AllocateVmMemoryError::AllocationFailed, + "Failed to allocate memory: {:#x}", res); // Clear the memory memset(ptr, 0, size); diff --git a/src/core/hw/tegra_x1/cpu/hypervisor/cpu.cpp b/src/core/hw/tegra_x1/cpu/hypervisor/cpu.cpp index 537ec7c0..da21fd56 100644 --- a/src/core/hw/tegra_x1/cpu/hypervisor/cpu.cpp +++ b/src/core/hw/tegra_x1/cpu/hypervisor/cpu.cpp @@ -52,11 +52,12 @@ Cpu::Cpu() // simplicity reasons // Kernel memory - kernel_page_table.Map(0x0, kernel_mem.GetPtr(), KERNEL_MEM_SIZE, - {horizon::kernel::MemoryType::Kernel, - horizon::kernel::MemoryAttribute::None, - horizon::kernel::MemoryPermission::Execute}, - ApFlags::UserNoneKernelReadExecute); + kernel_page_table.Map( + 0x0, Range::FromSize(kernel_mem.GetPtr(), KERNEL_MEM_SIZE), + {horizon::kernel::MemoryType::Kernel, + horizon::kernel::MemoryAttribute::None, + horizon::kernel::MemoryPermission::Execute}, + ApFlags::UserNoneKernelReadExecute); for (u64 offset = 0; offset < 0x780; offset += 0x80) { memcpy(reinterpret_cast(kernel_mem.GetPtr() + offset), @@ -71,11 +72,11 @@ Cpu::Cpu() /* GET_CURRENT_PROCESS_DEBUGGER().GetModuleTable().RegisterSymbol( {"Hypervisor::handler", - range(KERNEL_REGION_BASE, + Range(KERNEL_REGION_BASE, KERNEL_REGION_BASE + EXCEPTION_TRAMPOLINE_OFFSET)}); GET_CURRENT_PROCESS_DEBUGGER().GetModuleTable().RegisterSymbol( {"Hypervisor::trampoline", - range(KERNEL_REGION_BASE + EXCEPTION_TRAMPOLINE_OFFSET, + Range(KERNEL_REGION_BASE + EXCEPTION_TRAMPOLINE_OFFSET, KERNEL_REGION_BASE + EXCEPTION_TRAMPOLINE_OFFSET + sizeof(exception_trampoline))}); */ diff --git a/src/core/hw/tegra_x1/cpu/hypervisor/memory.hpp b/src/core/hw/tegra_x1/cpu/hypervisor/memory.hpp index 29882eb0..075d644d 100644 --- a/src/core/hw/tegra_x1/cpu/hypervisor/memory.hpp +++ b/src/core/hw/tegra_x1/cpu/hypervisor/memory.hpp @@ -26,7 +26,7 @@ class Memory : public IMemory { void Allocate() { const auto size = GetSizeAligned(); - ptr = allocate_vm_memory(size); + ptr = AllocateVmMemory(size); // Map // TODO: Why does this fail occasionally? diff --git a/src/core/hw/tegra_x1/cpu/hypervisor/mmu.cpp b/src/core/hw/tegra_x1/cpu/hypervisor/mmu.cpp index 07715717..eae8467c 100644 --- a/src/core/hw/tegra_x1/cpu/hypervisor/mmu.cpp +++ b/src/core/hw/tegra_x1/cpu/hypervisor/mmu.cpp @@ -19,10 +19,7 @@ namespace hydra::hw::tegra_x1::cpu::hypervisor { namespace { -inline ApFlags to_ap_flags(horizon::kernel::MemoryPermission perm) { - // HACK - return ApFlags::UserReadWriteExecuteKernelReadWrite; - +inline ApFlags ToApFlags(horizon::kernel::MemoryPermission perm) { if (any(perm & horizon::kernel::MemoryPermission::Read)) { if (any(perm & horizon::kernel::MemoryPermission::Write)) { if (any(perm & horizon::kernel::MemoryPermission::Execute)) { @@ -99,22 +96,37 @@ Mmu::Mmu() : user_page_table(FindFreePageTableRegion()) { Mmu::~Mmu() { ReleasePageTableRegion(user_page_table.GetBase()); } -void Mmu::Map(vaddr_t dst_va, uptr ptr, usize size, +void Mmu::Map(vaddr_t dst_va, Range range, const horizon::kernel::MemoryState state) { - ASSERT_ALIGNMENT(size, GUEST_PAGE_SIZE, Hypervisor, "size"); - user_page_table.Map(dst_va, ptr, size, state, to_ap_flags(state.perm)); + ASSERT_ALIGNMENT(dst_va, GUEST_PAGE_SIZE, Hypervisor, "destination VA"); + ASSERT_ALIGNMENT(range.GetSize(), GUEST_PAGE_SIZE, Hypervisor, "size"); + user_page_table.Map(dst_va, range, state, ToApFlags(state.perm)); } // HACK: this assumes that the whole src range is stored contiguously in // physical memory -void Mmu::Map(vaddr_t dst_va, vaddr_t src_va, usize size) { - const auto region = user_page_table.QueryRegion(src_va); - paddr_t pa = region.UnmapAddr(src_va); - user_page_table.Map(dst_va, pa, size, region.state, - to_ap_flags(region.state.perm)); +void Mmu::Map(vaddr_t dst_va, Range range) { + ASSERT_ALIGNMENT(range.GetBegin(), GUEST_PAGE_SIZE, Hypervisor, "begin"); + ASSERT_ALIGNMENT(range.GetEnd(), GUEST_PAGE_SIZE, Hypervisor, "end"); + const auto region = user_page_table.QueryRegion(range.GetBegin()); + paddr_t pa = region.UnmapAddr(range.GetBegin()); + // TODO: also inherit flags + user_page_table.Map(dst_va, Range::FromSize(pa, range.GetSize()), + region.state, ToApFlags(region.state.perm)); } -void Mmu::Unmap(vaddr_t va, usize size) { user_page_table.Unmap(va, size); } +void Mmu::Unmap(Range range) { + ASSERT_ALIGNMENT(range.GetBegin(), GUEST_PAGE_SIZE, Hypervisor, "begin"); + ASSERT_ALIGNMENT(range.GetEnd(), GUEST_PAGE_SIZE, Hypervisor, "end"); + user_page_table.Unmap(range); +} + +void Mmu::Protect(Range range, + horizon::kernel::MemoryPermission perm) { + ASSERT_ALIGNMENT(range.GetBegin(), GUEST_PAGE_SIZE, Hypervisor, "begin"); + ASSERT_ALIGNMENT(range.GetEnd(), GUEST_PAGE_SIZE, Hypervisor, "end"); + user_page_table.SetMemoryPermission(range, perm, ToApFlags(perm)); +} // TODO: just improve this... void Mmu::ResizeHeap(IMemory* heap_mem, vaddr_t va, usize size) { @@ -122,8 +134,8 @@ void Mmu::ResizeHeap(IMemory* heap_mem, vaddr_t va, usize size) { const auto region = user_page_table.QueryRegion(va); paddr_t pa = region.UnmapAddr(va); - user_page_table.Map(va, pa, size, region.state, - to_ap_flags(region.state.perm)); + user_page_table.Map(va, Range::FromSize(pa, size), region.state, + ToApFlags(region.state.perm)); } uptr Mmu::UnmapAddr(vaddr_t va) const { return user_page_table.UnmapAddr(va); } @@ -138,10 +150,28 @@ MemoryRegion Mmu::QueryRegion(vaddr_t va) const { }; } -void Mmu::SetMemoryAttribute(vaddr_t va, usize size, +void Mmu::SetMemoryAttribute(Range range, horizon::kernel::MemoryAttribute mask, horizon::kernel::MemoryAttribute value) { - user_page_table.SetMemoryAttribute(va, size, mask, value); + user_page_table.SetMemoryAttribute(range, mask, value); +} + +void Mmu::SetWriteTrackingEnabled(Range range, bool enable) { + ASSERT_ALIGNMENT(range.GetBegin(), GUEST_PAGE_SIZE, Hypervisor, "begin"); + ASSERT_ALIGNMENT(range.GetEnd(), GUEST_PAGE_SIZE, Hypervisor, "end"); + user_page_table.SetWriteTrackingEnabled(range, enable); +} + +bool Mmu::TrySuspendWriteTracking(Range range) { + ASSERT_ALIGNMENT(range.GetBegin(), GUEST_PAGE_SIZE, Hypervisor, "begin"); + ASSERT_ALIGNMENT(range.GetEnd(), GUEST_PAGE_SIZE, Hypervisor, "end"); + return user_page_table.TrySuspendWriteTracking(range); +} + +void Mmu::ResumeWriteTracking(Range range) { + ASSERT_ALIGNMENT(range.GetBegin(), GUEST_PAGE_SIZE, Hypervisor, "begin"); + ASSERT_ALIGNMENT(range.GetEnd(), GUEST_PAGE_SIZE, Hypervisor, "end"); + user_page_table.ResumeWriteTracking(range); } } // namespace hydra::hw::tegra_x1::cpu::hypervisor diff --git a/src/core/hw/tegra_x1/cpu/hypervisor/mmu.hpp b/src/core/hw/tegra_x1/cpu/hypervisor/mmu.hpp index 7fb356f4..fa5c4df4 100644 --- a/src/core/hw/tegra_x1/cpu/hypervisor/mmu.hpp +++ b/src/core/hw/tegra_x1/cpu/hypervisor/mmu.hpp @@ -14,19 +14,27 @@ class Mmu : public IMmu { Mmu(); ~Mmu() override; - void Map(vaddr_t dst_va, uptr ptr, usize size, + void Map(vaddr_t dst_va, Range range, const horizon::kernel::MemoryState state) override; - void Map(vaddr_t dst_va, vaddr_t src_va, usize size) override; - void Unmap(vaddr_t va, usize size) override; + void Map(vaddr_t dst_va, Range range) override; + void Unmap(Range range) override; + void Protect(Range range, + horizon::kernel::MemoryPermission perm) override; void ResizeHeap(IMemory* heap_mem, vaddr_t va, usize size) override; uptr UnmapAddr(vaddr_t va) const override; MemoryRegion QueryRegion(vaddr_t va) const override; - void SetMemoryAttribute(vaddr_t va, usize size, + void SetMemoryAttribute(Range range, horizon::kernel::MemoryAttribute mask, horizon::kernel::MemoryAttribute value) override; + protected: + // Write tracking + void SetWriteTrackingEnabled(Range range, bool enable) override; + bool TrySuspendWriteTracking(Range range) override; + void ResumeWriteTracking(Range range) override; + private: PageTable user_page_table; diff --git a/src/core/hw/tegra_x1/cpu/hypervisor/page_allocator.cpp b/src/core/hw/tegra_x1/cpu/hypervisor/page_allocator.cpp index 223d5a8c..f7db581a 100644 --- a/src/core/hw/tegra_x1/cpu/hypervisor/page_allocator.cpp +++ b/src/core/hw/tegra_x1/cpu/hypervisor/page_allocator.cpp @@ -31,7 +31,7 @@ void PageAllocator::Allocate(usize page_count) { page_count = align(page_count, PAGE_COUNT_ALIGNMENT); const usize size = page_count * GUEST_PAGE_SIZE; - uptr ptr = allocate_vm_memory(size); + uptr ptr = AllocateVmMemory(size); const paddr_t pa = base_pa + current_page * GUEST_PAGE_SIZE; HV_ASSERT_SUCCESS( diff --git a/src/core/hw/tegra_x1/cpu/hypervisor/page_table.cpp b/src/core/hw/tegra_x1/cpu/hypervisor/page_table.cpp index eb2bc113..b16b1a28 100644 --- a/src/core/hw/tegra_x1/cpu/hypervisor/page_table.cpp +++ b/src/core/hw/tegra_x1/cpu/hypervisor/page_table.cpp @@ -1,5 +1,6 @@ #include "core/hw/tegra_x1/cpu/hypervisor/page_table.hpp" +#include "common/type_aliases.hpp" #include "core/debugger/debugger_manager.hpp" #define PTE_TYPE_MASK 0x3ull @@ -37,11 +38,11 @@ PageTableLevel::PageTableLevel(u32 level_, const Page page_, PageTableLevel& PageTableLevel::GetNext(PageAllocator& allocator, u32 index) { ASSERT_DEBUG(level < 2, Hypervisor, "Level 2 is the last level"); - auto& next = next_levels[index]; + auto& next = next_levels[index].level; if (!next) { next = new PageTableLevel(level + 1, allocator.GetNextPage(), base_va + index * GetBlockSize()); - WriteEntry(index, next->page.pa | PTE_TABLE); + GetEntry(index) = next->page.pa | PTE_TABLE; } return *next; @@ -52,24 +53,23 @@ PageTable::PageTable(paddr_t base_pa) PageTable::~PageTable() = default; -void PageTable::Map(vaddr_t va, paddr_t pa, usize size, - const horizon::kernel::MemoryState state, ApFlags flags) { - LOG_DEBUG(Hypervisor, "va: 0x{:08x}, pa: 0x{:08x}, size: 0x{:08x}", va, pa, - size); +void PageTable::Map(vaddr_t va, Range range, + const horizon::kernel::MemoryState state, + ApFlags ap_flags) { + LOG_DEBUG(Hypervisor, "va: {:#x}, range: {:#x}", va, range); ASSERT_ALIGNMENT(va, GUEST_PAGE_SIZE, Hypervisor, "va"); - ASSERT_ALIGNMENT(pa, GUEST_PAGE_SIZE, Hypervisor, "pa"); - ASSERT_ALIGNMENT(size, GUEST_PAGE_SIZE, Hypervisor, "size"); + ASSERT_ALIGNMENT(range.GetBegin(), GUEST_PAGE_SIZE, Hypervisor, "begin"); + ASSERT_ALIGNMENT(range.GetEnd(), GUEST_PAGE_SIZE, Hypervisor, "end"); - MapLevel(top_level, va, pa, size, state, flags); + MapLevel(top_level, va, range.GetBegin(), range.GetSize(), state, ap_flags); } -void PageTable::Unmap(vaddr_t va, usize size) { - LOG_FUNC_WITH_ARGS_NOT_IMPLEMENTED(Hypervisor, "va: {:#x}, size: {:#x}", va, - size); +void PageTable::Unmap(Range range) { + LOG_FUNC_WITH_ARGS_NOT_IMPLEMENTED(Hypervisor, "range: {:#x}", range); } -// TODO: find out if there is a cheaper way +// TODO: use IterateRange PageRegion PageTable::QueryRegion(vaddr_t va) const { #define FREE_MEMORY(region_va, region_size) \ PageRegion { \ @@ -86,7 +86,7 @@ PageRegion PageTable::QueryRegion(vaddr_t va) const { u32 index = top_level.VaToIndex(va); auto* level = &top_level; - u64 entry = top_level.ReadEntry(index); + u64 entry = top_level.GetEntry(index); while ((entry & PTE_TYPE_MASK) != PTE_BLOCK(level->GetLevel())) { if ((entry & PTE_TYPE_MASK) != PTE_TABLE) return FREE_MEMORY(va & ~(level->GetBlockSize() - 1), @@ -94,7 +94,7 @@ PageRegion PageTable::QueryRegion(vaddr_t va) const { level = level->GetNextNoNew(index); index = level->VaToIndex(va); - entry = level->ReadEntry(index); + entry = level->GetEntry(index); } PageRegion region; @@ -106,32 +106,77 @@ PageRegion PageTable::QueryRegion(vaddr_t va) const { return region; } -// TODO: this should subdivide the table if necessary -void PageTable::SetMemoryAttribute(vaddr_t va, usize size, +void PageTable::SetMemoryPermission(Range range, + horizon::kernel::MemoryPermission perm, + ApFlags ap_flags) { + ModifyRange(range, [perm, ap_flags]([[maybe_unused]] Range range, + u64& entry, + horizon::kernel::MemoryState& state, + [[maybe_unused]] PageFlags flags) { + if (!any(flags & PageFlags::WriteTrackingEnabled)) { + entry &= ~AP_FLAGS_MASK; + entry |= static_cast(ap_flags); + } + state.perm = perm; + }); +} + +void PageTable::SetMemoryAttribute(Range range, horizon::kernel::MemoryAttribute mask, horizon::kernel::MemoryAttribute value) { - auto va_page = va / GUEST_PAGE_SIZE; - auto size_page = size / GUEST_PAGE_SIZE; - auto va_page_end = va_page + size_page; - for (u64 page = va_page; page < va_page_end; ++page) { - u32 index = top_level.VaToIndex(page * GUEST_PAGE_SIZE); - auto* level = &top_level; - u64 entry = top_level.ReadEntry(index); - while ((entry & PTE_TYPE_MASK) != PTE_BLOCK(level->GetLevel())) { - if ((entry & PTE_TYPE_MASK) != PTE_TABLE) - break; + ModifyRange(range, [mask, value]([[maybe_unused]] Range range, + [[maybe_unused]] u64& entry, + horizon::kernel::MemoryState& state, + [[maybe_unused]] PageFlags flags) { + state.attr = (state.attr & ~mask) | (value & mask); + }); +} - level = level->GetNextNoNew(index); - index = level->VaToIndex(page * GUEST_PAGE_SIZE); - entry = level->ReadEntry(index); +void PageTable::SetWriteTrackingEnabled(Range range, bool enable) { + ModifyRange(range, + [enable]([[maybe_unused]] Range range, u64& entry, + [[maybe_unused]] horizon::kernel::MemoryState& state, + PageFlags& flags) { + // AP flags + entry &= ~AP_FLAGS_MASK; + entry |= static_cast( + enable ? ApFlags::UserReadKernelRead + : ApFlags::UserReadWriteKernelReadWrite); + + // Page flags + if (enable) + flags |= PageFlags::WriteTrackingEnabled; + else + flags &= ~PageFlags::WriteTrackingEnabled; + }); +} + +bool PageTable::TrySuspendWriteTracking(Range range) { + bool res = false; + ModifyRange(range, [&res]( + [[maybe_unused]] Range range, u64& entry, + [[maybe_unused]] horizon::kernel::MemoryState& state, + [[maybe_unused]] PageFlags& flags) { + bool enabled = any(flags & PageFlags::WriteTrackingEnabled); + if (enabled) { + entry &= ~AP_FLAGS_MASK; + entry |= static_cast(ApFlags::UserReadWriteKernelReadWrite); + res = true; } + }); - if ((entry & PTE_TYPE_MASK) != PTE_TABLE) - continue; + return res; +} - auto& state = level->GetLevelState(index); - state.attr = (state.attr & ~mask) | (value & mask); - } +void PageTable::ResumeWriteTracking(Range range) { + ModifyRange(range, []([[maybe_unused]] Range range, u64& entry, + [[maybe_unused]] horizon::kernel::MemoryState& state, + PageFlags& flags) { + if (any(flags & PageFlags::WriteTrackingEnabled)) { + entry &= ~AP_FLAGS_MASK; + entry |= static_cast(ApFlags::UserReadKernelRead); + } + }); } paddr_t PageTable::UnmapAddr(vaddr_t va) const { @@ -146,13 +191,13 @@ paddr_t PageTable::UnmapAddr(vaddr_t va) const { void PageTable::MapLevel(PageTableLevel& level, vaddr_t va, paddr_t pa, usize size, const horizon::kernel::MemoryState state, - ApFlags flags) { + ApFlags ap_flags) { vaddr_t end_va = va + size; do { MapLevelNext( level, va, pa, std::min(align(va + 1, level.GetBlockSize()) - va, end_va - va), - state, flags); + state, ap_flags); vaddr_t old_va = va; va = align_down(va + level.GetBlockSize(), level.GetBlockSize()); @@ -163,7 +208,7 @@ void PageTable::MapLevel(PageTableLevel& level, vaddr_t va, paddr_t pa, void PageTable::MapLevelNext(PageTableLevel& level, vaddr_t va, paddr_t pa, usize size, const horizon::kernel::MemoryState state, - ApFlags flags) { + ApFlags ap_flags) { // LOG_DEBUG(Hypervisor, // "Level: {}, va: 0x{:08x}, pa: 0x{:08x}, size: 0x{:08x}", // level.GetLevel(), va, pa, size); @@ -171,11 +216,72 @@ void PageTable::MapLevelNext(PageTableLevel& level, vaddr_t va, paddr_t pa, u32 index = level.VaToIndex(va); // TODO: uncomment if (/*size == level.GetBlockSize()*/ level.GetLevel() == 2) { - level.WriteEntry(index, pa | PTE_BLOCK(level.GetLevel()) | PTE_AF | - PTE_INNER_SHEREABLE | (u64)flags); - level.SetLevelState(index, state); + level.GetEntry(index) = pa | PTE_BLOCK(level.GetLevel()) | PTE_AF | + PTE_INNER_SHEREABLE | + static_cast(ap_flags); + level.GetLevelState(index) = state; } else { - MapLevel(level.GetNext(allocator, index), va, pa, size, state, flags); + MapLevel(level.GetNext(allocator, index), va, pa, size, state, + ap_flags); + } +} + +void PageTable::IterateRange( + Range range, + std::function, u64, const horizon::kernel::MemoryState&, + PageFlags)> + callback) const { + for (u64 page = range.GetBegin() / GUEST_PAGE_SIZE; + page < range.GetEnd() / GUEST_PAGE_SIZE; ++page) { + u32 index = top_level.VaToIndex(page * GUEST_PAGE_SIZE); + auto* level = &top_level; + u64 entry = top_level.GetEntry(index); + while ((entry & PTE_TYPE_MASK) != PTE_BLOCK(level->GetLevel())) { + if ((entry & PTE_TYPE_MASK) != PTE_TABLE) + break; + + level = level->GetNextNoNew(index); + index = level->VaToIndex(page * GUEST_PAGE_SIZE); + entry = level->GetEntry(index); + } + + if ((entry & PTE_TYPE_MASK) != PTE_TABLE) + continue; + + callback( + Range::FromSize(page * GUEST_PAGE_SIZE, GUEST_PAGE_SIZE), + level->GetEntry(index), level->GetLevelState(index), + level->GetLevelFlags(index)); + } +} + +// TODO: this should subdivide the table if necessary +void PageTable::ModifyRange( + Range range, + std::function, u64&, horizon::kernel::MemoryState&, + PageFlags&)> + callback) { + for (u64 page = range.GetBegin() / GUEST_PAGE_SIZE; + page < range.GetEnd() / GUEST_PAGE_SIZE; ++page) { + u32 index = top_level.VaToIndex(page * GUEST_PAGE_SIZE); + auto* level = &top_level; + u64 entry = top_level.GetEntry(index); + while ((entry & PTE_TYPE_MASK) != PTE_BLOCK(level->GetLevel())) { + if ((entry & PTE_TYPE_MASK) != PTE_TABLE) + break; + + level = level->GetNextNoNew(index); + index = level->VaToIndex(page * GUEST_PAGE_SIZE); + entry = level->GetEntry(index); + } + + if ((entry & PTE_TYPE_MASK) != PTE_TABLE) + continue; + + callback( + Range::FromSize(page * GUEST_PAGE_SIZE, GUEST_PAGE_SIZE), + level->GetEntry(index), level->GetLevelState(index), + level->GetLevelFlags(index)); } } diff --git a/src/core/hw/tegra_x1/cpu/hypervisor/page_table.hpp b/src/core/hw/tegra_x1/cpu/hypervisor/page_table.hpp index 8db4a443..66398c77 100644 --- a/src/core/hw/tegra_x1/cpu/hypervisor/page_table.hpp +++ b/src/core/hw/tegra_x1/cpu/hypervisor/page_table.hpp @@ -15,6 +15,12 @@ constexpr usize ENTRY_COUNT = 1ull << BLOCK_SHIFT_DIFF; // TODO: correct? constexpr usize ADDRESS_SPACE_SIZE = 1ull << GET_BLOCK_SHIFT(-1); +enum class PageFlags : u8 { + None = 0, + WriteTrackingEnabled = BITL(0), +}; +ENABLE_ENUM_BITWISE_OPERATORS(PageFlags); + struct PageTableLevel { PageTableLevel(u32 level_, const Page page_, const vaddr_t base_va_); @@ -28,24 +34,24 @@ struct PageTableLevel { return static_cast((va - base_va) >> GET_BLOCK_SHIFT(level)); } - u64 ReadEntry(u32 index) const { - const u64* table = reinterpret_cast(page.ptr); + u64& GetEntry(u32 index) { + u64* table = reinterpret_cast(page.ptr); return table[index]; } - void WriteEntry(u32 index, u64 entry) { - u64* table = reinterpret_cast(page.ptr); - table[index] = entry; + const u64& GetEntry(u32 index) const { + const u64* table = reinterpret_cast(page.ptr); + return table[index]; } PageTableLevel* GetNextNoNew(u32 index) { ASSERT_DEBUG(level < 2, Hypervisor, "Level 2 is the last level"); - return next_levels[index]; + return next_levels[index].level; } const PageTableLevel* GetNextNoNew(u32 index) const { ASSERT_DEBUG(level < 2, Hypervisor, "Level 2 is the last level"); - return next_levels[index]; + return next_levels[index].level; } PageTableLevel& GetNext(PageAllocator& allocator, u32 index); @@ -53,23 +59,30 @@ struct PageTableLevel { u32 GetBlockShift() const { return GET_BLOCK_SHIFT(level); } horizon::kernel::MemoryState& GetLevelState(u32 index) { - return level_states[index]; + return next_levels[index].state; } const horizon::kernel::MemoryState& GetLevelState(u32 index) const { - return level_states[index]; + return next_levels[index].state; } - void SetLevelState(u32 index, const horizon::kernel::MemoryState state) { - level_states[index] = state; + PageFlags& GetLevelFlags(u32 index) { return next_levels[index].flags; } + + const PageFlags& GetLevelFlags(u32 index) const { + return next_levels[index].flags; } private: + struct NextLevel { + PageTableLevel* level{nullptr}; + horizon::kernel::MemoryState state{}; + PageFlags flags{}; + }; + u32 level; const Page page; const vaddr_t base_va; - PageTableLevel* next_levels[ENTRY_COUNT] = {nullptr}; - horizon::kernel::MemoryState level_states[ENTRY_COUNT] = {}; + std::array next_levels{}; public: GETTER(level, GetLevel); @@ -89,15 +102,24 @@ class PageTable { PageTable(paddr_t base_pa); ~PageTable(); - void Map(vaddr_t va, paddr_t pa, usize size, - const horizon::kernel::MemoryState state, ApFlags flags); - void Unmap(vaddr_t va, usize size); + void Map(vaddr_t va, Range range, + const horizon::kernel::MemoryState state, ApFlags ap_flags); + void Unmap(Range range); + // State PageRegion QueryRegion(vaddr_t va) const; - void SetMemoryAttribute(vaddr_t va, usize size, + void SetMemoryPermission(Range range, + horizon::kernel::MemoryPermission perm, + ApFlags ap_flags); + void SetMemoryAttribute(Range range, horizon::kernel::MemoryAttribute mask, horizon::kernel::MemoryAttribute value); + // Write tracking + void SetWriteTrackingEnabled(Range range, bool enable); + bool TrySuspendWriteTracking(Range range); + void ResumeWriteTracking(Range range); + paddr_t UnmapAddr(vaddr_t va) const; paddr_t GetBase() const { return allocator.GetBase(); } @@ -107,9 +129,21 @@ class PageTable { PageTableLevel top_level; void MapLevel(PageTableLevel& level, vaddr_t va, paddr_t pa, usize size, - const horizon::kernel::MemoryState state, ApFlags flags); + const horizon::kernel::MemoryState state, ApFlags ap_flags); void MapLevelNext(PageTableLevel& level, vaddr_t va, paddr_t pa, usize size, - const horizon::kernel::MemoryState state, ApFlags flags); + const horizon::kernel::MemoryState state, + ApFlags ap_flags); + + void IterateRange( + Range range, + std::function, u64, + const horizon::kernel::MemoryState&, PageFlags)> + callback) const; + void + ModifyRange(Range range, + std::function, u64&, + horizon::kernel::MemoryState&, PageFlags&)> + callback); }; } // namespace hydra::hw::tegra_x1::cpu::hypervisor diff --git a/src/core/hw/tegra_x1/cpu/hypervisor/thread.cpp b/src/core/hw/tegra_x1/cpu/hypervisor/thread.cpp index 3f711836..bff4ea40 100644 --- a/src/core/hw/tegra_x1/cpu/hypervisor/thread.cpp +++ b/src/core/hw/tegra_x1/cpu/hypervisor/thread.cpp @@ -176,12 +176,26 @@ void Thread::Run() { break; } case ExceptionClass::DataAbortLowerEl: { + // TODO: use the correct size + if (MMU.TrackWrite(Range::FromSize(far, 8))) + break; + bool far_valid = (esr & 0x00000400) == 0; ASSERT_DEBUG(far_valid, Hypervisor, "FAR not valid"); - GET_CURRENT_PROCESS_DEBUGGER().BreakOnThisThread( - "Data abort (PC: 0x{:08x}, FAR: 0x{:08x})", state.pc, - far); + if (CONFIG_INSTANCE.GetRecoverFromSegfault()) { + LOG_ERROR( + Hypervisor, + "Data abort (PC: 0x{:08x}, address: 0x{:08x})", + state.pc, far); + + // Just move on to the next instruction + state.pc += 4; + } else { + GET_CURRENT_PROCESS_DEBUGGER().BreakOnThisThread( + "Data abort (PC: 0x{:08x}, address: 0x{:08x})", + state.pc, far); + } break; } default: diff --git a/src/core/hw/tegra_x1/cpu/mmu.cpp b/src/core/hw/tegra_x1/cpu/mmu.cpp index 6d2b086d..0c4a3ce3 100644 --- a/src/core/hw/tegra_x1/cpu/mmu.cpp +++ b/src/core/hw/tegra_x1/cpu/mmu.cpp @@ -1,5 +1,7 @@ #include "core/hw/tegra_x1/cpu/mmu.hpp" +#include "core/hw/tegra_x1/gpu/gpu.hpp" + namespace hydra::hw::tegra_x1::cpu { horizon::kernel::MemoryInfo IMmu::QueryMemory(vaddr_t va) const { @@ -28,7 +30,7 @@ horizon::kernel::MemoryInfo IMmu::QueryMemory(vaddr_t va) const { // Next vaddr_t addr = info.addr + info.size; - if (addr >= horizon::kernel::ADDRESS_SPACE.end) + if (addr >= horizon::kernel::ADDRESS_SPACE.GetEnd()) break; region = QueryRegion(addr); @@ -46,21 +48,49 @@ horizon::kernel::MemoryInfo IMmu::QueryMemory(vaddr_t va) const { return info; } -vaddr_t IMmu::FindFreeMemory(range region, usize size) const { +vaddr_t IMmu::FindFreeMemory(Range region, usize size) const { size = align(size, GUEST_PAGE_SIZE); - auto crnt_region = range::FromSize(region.begin, size); + auto crnt_region = Range::FromSize(region.GetBegin(), size); while (region.Contains(crnt_region)) { - const auto info = QueryMemory(crnt_region.begin); - const auto mem_range = range(std::max(info.addr, region.begin), - info.addr + info.size); + const auto info = QueryMemory(crnt_region.GetBegin()); + const auto mem_range = Range( + std::max(info.addr, region.GetBegin()), info.addr + info.size); if (info.state.type == horizon::kernel::MemoryType::Free && mem_range.Contains(crnt_region)) - return mem_range.begin; + return mem_range.GetBegin(); - crnt_region.Shift(mem_range.GetSize()); + crnt_region += mem_range.GetSize(); } return 0x0; } +bool IMmu::TrackWrite(Range range) { + const auto aligned_range = + Range(align_down(range.GetBegin(), GUEST_PAGE_SIZE), + align(range.GetEnd(), GUEST_PAGE_SIZE)); + if (!TrySuspendWriteTracking(aligned_range)) + return false; + + // Notify the GPU + // TODO: what about non-contiguous regions? + const auto ptr = UnmapAddr(aligned_range.GetBegin()); + RENDERER_INSTANCE.InvalidateMemory( + Range::FromSize(ptr, aligned_range.GetSize())); + + { + std::lock_guard lock(write_tracking_mutex); + tracked_pages.push_back(aligned_range); + } + + return true; +} + +void IMmu::FlushTrackedPages() { + std::lock_guard lock(write_tracking_mutex); + for (const auto& range : tracked_pages) + ResumeWriteTracking(range); + tracked_pages.clear(); +} + } // namespace hydra::hw::tegra_x1::cpu diff --git a/src/core/hw/tegra_x1/cpu/mmu.hpp b/src/core/hw/tegra_x1/cpu/mmu.hpp index 2be4f103..80637a04 100644 --- a/src/core/hw/tegra_x1/cpu/mmu.hpp +++ b/src/core/hw/tegra_x1/cpu/mmu.hpp @@ -19,26 +19,41 @@ class IMmu { public: virtual ~IMmu() = default; - virtual void Map(vaddr_t dst_va, uptr ptr, usize size, + virtual void Map(vaddr_t dst_va, Range range, const horizon::kernel::MemoryState state) = 0; void Map(vaddr_t dst_va, IMemory* memory, const horizon::kernel::MemoryState state) { - Map(dst_va, memory->GetPtr(), memory->GetSize(), state); + Map(dst_va, Range::FromSize(memory->GetPtr(), memory->GetSize()), + state); } - virtual void Map(vaddr_t dst_va, vaddr_t src_va, usize size) = 0; - virtual void Unmap(vaddr_t va, usize size) = 0; + virtual void Map(vaddr_t dst_va, Range range) = 0; + virtual void Unmap(Range range) = 0; + virtual void Protect(Range range, + horizon::kernel::MemoryPermission perm) = 0; + virtual void ResizeHeap(IMemory* heap_mem, vaddr_t va, usize size) = 0; // TODO: remove this virtual uptr UnmapAddr(vaddr_t va) const = 0; virtual MemoryRegion QueryRegion(vaddr_t va) const = 0; - virtual void SetMemoryAttribute(vaddr_t va, usize size, + virtual void SetMemoryAttribute(Range range, horizon::kernel::MemoryAttribute mask, horizon::kernel::MemoryAttribute value) = 0; horizon::kernel::MemoryInfo QueryMemory(vaddr_t va) const; - vaddr_t FindFreeMemory(range region, usize size) const; + vaddr_t FindFreeMemory(Range region, usize size) const; + + // Write tracking + void EnableWriteTracking(Range range) { + SetWriteTrackingEnabled(range, true); + } + void DisableWriteTracking(Range range) { + SetWriteTrackingEnabled(range, false); + } + bool TrackWrite(Range range); + void FlushTrackedPages(); + // Read template bool TryRead(vaddr_t va, T& out_value) const { const auto ptr = UnmapAddr(va); @@ -57,6 +72,7 @@ class IMmu { return value; } + // Write template bool TryWrite(vaddr_t va, T value) const { const auto ptr = UnmapAddr(va); @@ -79,6 +95,16 @@ class IMmu { ASSERT_DEBUG(ptr != 0x0, Cpu, "Failed to unmap va 0x{:08x}", va); atomic_store(reinterpret_cast(ptr), value); } + + protected: + // Write tracking + virtual void SetWriteTrackingEnabled(Range range, bool enable) = 0; + virtual bool TrySuspendWriteTracking(Range range) = 0; + virtual void ResumeWriteTracking(Range range) = 0; + + private: + std::mutex write_tracking_mutex; + std::vector> tracked_pages; }; } // namespace hydra::hw::tegra_x1::cpu diff --git a/src/core/hw/tegra_x1/cpu/thread.hpp b/src/core/hw/tegra_x1/cpu/thread.hpp index 4a6ddd71..f4c1dbc8 100644 --- a/src/core/hw/tegra_x1/cpu/thread.hpp +++ b/src/core/hw/tegra_x1/cpu/thread.hpp @@ -38,7 +38,7 @@ class IThread { virtual void Run() = 0; virtual void - NotifyMemoryChanged([[maybe_unused]] range mem_range) {} + NotifyMemoryChanged([[maybe_unused]] Range mem_range) {} // Debug void GetStackTrace(stack_frame_callback_fn_t callback); diff --git a/src/core/hw/tegra_x1/gpu/const.hpp b/src/core/hw/tegra_x1/gpu/const.hpp index 1556342f..24d6d949 100644 --- a/src/core/hw/tegra_x1/gpu/const.hpp +++ b/src/core/hw/tegra_x1/gpu/const.hpp @@ -752,8 +752,7 @@ constexpr usize VIEWPORT_COUNT = 16; constexpr usize VERTEX_ATTRIB_COUNT = 32; constexpr usize VERTEX_ARRAY_COUNT = 16; -constexpr usize CONST_BUFFER_BINDING_COUNT = 32; -constexpr usize UNIFORM_BUFFER_BINDING_COUNT = 64; // TODO: what is this? +constexpr usize CONST_BUFFER_BINDING_COUNT = 32; // TODO: what is this? constexpr usize STORAGE_BUFFER_BINDING_COUNT = 16; constexpr usize TEXTURE_BINDING_COUNT = 32; constexpr usize IMAGE_BINDING_COUNT = 8; diff --git a/src/core/hw/tegra_x1/gpu/engines/2d.cpp b/src/core/hw/tegra_x1/gpu/engines/2d.cpp index c474b9df..363e22ea 100644 --- a/src/core/hw/tegra_x1/gpu/engines/2d.cpp +++ b/src/core/hw/tegra_x1/gpu/engines/2d.cpp @@ -10,14 +10,13 @@ DEFINE_METHOD_TABLE(TwoD, 0x237, 1, Copy, u32) #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wunused-parameter" -void TwoD::Copy(GMmu& gmmu, const u32 index, - const u32 pixels_from_memory_src_y0_int) { +void TwoD::Copy(const u32 index, const u32 pixels_from_memory_src_y0_int) { auto& pixels = regs.pixels_from_memory; pixels.src_y0.integer = pixels_from_memory_src_y0_int; // TODO: can these also not be textures? - auto src = GetTexture(gmmu, regs.src, renderer::TextureUsage::Read); - auto dst = GetTexture(gmmu, regs.dst, renderer::TextureUsage::Write); + auto src = GetTexture(regs.src, renderer::TextureUsage::Read); + auto dst = GetTexture(regs.dst, renderer::TextureUsage::Write); const auto dudx = static_cast(pixels.dudx); const auto dvdy = static_cast(pixels.dvdy); @@ -28,28 +27,33 @@ void TwoD::Copy(GMmu& gmmu, const u32 index, const auto src_width = static_cast(pixels.dst_width * dudx); const auto src_height = static_cast(pixels.dst_height * dvdy); - dst->BlitFrom(src, regs.src.layer, {f32(src_x0), f32(src_y0), 0}, - {src_width, src_height, 1}, regs.dst.layer, - {f32(pixels.dst_x0), f32(pixels.dst_y0), 0}, + dst->BlitFrom(tls_crnt_command_buffer, src, + {static_cast(src_x0), static_cast(src_y0), + static_cast(regs.src.layer)}, + {src_width, src_height, 1}, + {static_cast(pixels.dst_x0), + static_cast(pixels.dst_y0), + static_cast(regs.dst.layer)}, {pixels.dst_width, pixels.dst_height, 1}); } #pragma GCC diagnostic pop -renderer::TextureBase* TwoD::GetTexture(GMmu& gmmu, const Texture2DInfo& info, +renderer::TextureBase* TwoD::GetTexture(const Texture2DInfo& info, renderer::TextureUsage usage) { const renderer::TextureDescriptor descriptor( - gmmu.UnmapAddr(info.addr), renderer::to_texture_format(info.format), + tls_crnt_gmmu->UnmapAddr(info.addr), renderer::TextureType::_2D, + renderer::to_texture_format(info.format), NvKind::Pitch, // TODO: correct? - u32(info.width), u32(info.height), + u32(info.width), u32(info.height), 1, 0, // HACK /*u32(info.stride)*/ renderer::get_texture_format_stride( renderer::to_texture_format(info.format), info.width) // HACK ); - return RENDERER_INSTANCE.GetTextureCache().GetTextureView(descriptor, - usage); + return RENDERER_INSTANCE.GetTextureCache().Find(tls_crnt_command_buffer, + descriptor, usage); } } // namespace hydra::hw::tegra_x1::gpu::engines diff --git a/src/core/hw/tegra_x1/gpu/engines/2d.hpp b/src/core/hw/tegra_x1/gpu/engines/2d.hpp index df55f186..ba735711 100644 --- a/src/core/hw/tegra_x1/gpu/engines/2d.hpp +++ b/src/core/hw/tegra_x1/gpu/engines/2d.hpp @@ -71,16 +71,14 @@ struct Regs2D { class TwoD : public EngineWithRegsBase { public: - void Method(GMmu& gmmu, u32 method, u32 arg) override; + void Method(u32 method, u32 arg) override; private: // Commands - void Copy(GMmu& gmmu, const u32 index, - const u32 pixels_from_memory_src_y0_int); + void Copy(const u32 index, const u32 pixels_from_memory_src_y0_int); // Helpers - static renderer::TextureBase* GetTexture(GMmu& gmmu, - const Texture2DInfo& info, + static renderer::TextureBase* GetTexture(const Texture2DInfo& info, renderer::TextureUsage usage); }; diff --git a/src/core/hw/tegra_x1/gpu/engines/3d.cpp b/src/core/hw/tegra_x1/gpu/engines/3d.cpp index 99142e7e..a1606815 100644 --- a/src/core/hw/tegra_x1/gpu/engines/3d.cpp +++ b/src/core/hw/tegra_x1/gpu/engines/3d.cpp @@ -1,5 +1,6 @@ #include "core/hw/tegra_x1/gpu/engines/3d.hpp" +#include "core/hw/tegra_x1/cpu/mmu.hpp" #include "core/hw/tegra_x1/gpu/gpu.hpp" #include "core/hw/tegra_x1/gpu/macro/interpreter/driver.hpp" #include "core/hw/tegra_x1/gpu/renderer/buffer_base.hpp" @@ -15,6 +16,28 @@ namespace { u32 get_image_handle(u32 handle) { return extract_bits(handle, 0, 20); } u32 get_sampler_handle(u32 handle) { return extract_bits(handle, 20, 12); } +renderer::TextureType ToTextureType(TextureType type) { + switch (type) { + case TextureType::_1D: + return renderer::TextureType::_1D; + case TextureType::_1DArray: + return renderer::TextureType::_1DArray; + case TextureType::_1DBuffer: + return renderer::TextureType::_1DBuffer; + case TextureType::_2D: + case TextureType::_2DNoMipmap: + return renderer::TextureType::_2D; + case TextureType::_2DArray: + return renderer::TextureType::_2DArray; + case TextureType::_3D: + return renderer::TextureType::_3D; + case TextureType::Cubemap: + return renderer::TextureType::Cube; + case TextureType::CubeArray: + return renderer::TextureType::CubeArray; + } +} + constexpr u32 GL_MIN = 0x8007; constexpr u32 GL_MAX = 0x8008; constexpr u32 GL_FUNC_ADD = 0x8006; @@ -161,6 +184,26 @@ renderer::BlendFactor get_blend_factor(u32 blend_factor) { } } +// Render target width is aligned to the stride, lets try to figure out the real +// one +u32 GetMinimumWidth(u32 width, renderer::TextureFormat format, u32 width_hint, + bool is_linear) { + if (is_linear || width <= width_hint) + return width; + + // Get the smallest width that would still align up to the same GOB + // count + const auto bpp = renderer::get_texture_format_bpp(format); + const auto alignment = 64 / bpp; + const auto width_aligned = align(width, alignment); + // HACK + // return std::clamp(width_aligned - alignment + 1, width_hint, + // width_aligned); + if (width_aligned - alignment + 1 <= width_hint) + return width_hint; + return width; +} + } // namespace DEFINE_METHOD_TABLE(ThreeD, INLINE_ENGINE_TABLE, 0x45, 1, @@ -209,7 +252,7 @@ ThreeD::~ThreeD() { SINGLETON_UNSET_INSTANCE(); } -void ThreeD::FlushMacro(GMmu& gmmu) { macro_driver->Execute(gmmu); } +void ThreeD::FlushMacro() { macro_driver->Execute(); } void ThreeD::Macro(u32 method, u32 arg) { u32 index = (method - MACRO_METHODS_REGION) >> 1; @@ -227,41 +270,34 @@ void ThreeD::Macro(u32 method, u32 arg) { #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wunused-parameter" -void ThreeD::LoadMmeInstructionRamPointer(GMmu& gmmu, const u32 index, - const u32 ptr) { +void ThreeD::LoadMmeInstructionRamPointer(const u32 index, const u32 ptr) { macro_driver->LoadInstructionRamPointer(ptr); } -void ThreeD::LoadMmeInstructionRam(GMmu& gmmu, const u32 index, - const u32 data) { +void ThreeD::LoadMmeInstructionRam(const u32 index, const u32 data) { macro_driver->LoadInstructionRam(data); } -void ThreeD::LoadMmeStartAddressRamPointer(GMmu& gmmu, const u32 index, - const u32 ptr) { +void ThreeD::LoadMmeStartAddressRamPointer(const u32 index, const u32 ptr) { macro_driver->LoadStartAddressRamPointer(ptr); } -void ThreeD::LoadMmeStartAddressRam(GMmu& gmmu, const u32 index, - const u32 data) { +void ThreeD::LoadMmeStartAddressRam(const u32 index, const u32 data) { macro_driver->LoadStartAddressRam(data); } -void ThreeD::DrawVertexArray(GMmu& gmmu, const u32 index, u32 count) { - if (!DrawInternal(gmmu)) +void ThreeD::DrawVertexArray(const u32 index, u32 count) { + if (!DrawInternal()) return; auto index_type = IndexType::None; auto primitive_type = regs.begin.primitive_type; - renderer::BufferBase* index_buffer = - RENDERER_INSTANCE.GetIndexCache().Decode( - {.type = index_type, - .primitive_type = primitive_type, - .count = count, - .src_index_buffer = nullptr}, - index_type, primitive_type, count); - - if (index_buffer) { + const auto index_buffer = RENDERER_INSTANCE.GetIndexCache().Decode( + tls_crnt_command_buffer, + {.type = index_type, .primitive_type = primitive_type, .count = count}, + index_type, primitive_type, count); + + if (index_buffer.GetBase()) { // Bind index buffer RENDERER_INSTANCE.BindIndexBuffer(index_buffer, index_type); @@ -270,53 +306,55 @@ void ThreeD::DrawVertexArray(GMmu& gmmu, const u32 index, u32 count) { // Vertex start is set as vertex base instead, as start is now index // start // TODO: instance count - RENDERER_INSTANCE.DrawIndexed(primitive_type, 0, count, - regs.vertex_array_start, + RENDERER_INSTANCE.DrawIndexed(tls_crnt_command_buffer, primitive_type, + 0, count, regs.vertex_array_start, regs.base_instance, 1); } else { // Draw // TODO: instance count - RENDERER_INSTANCE.Draw(primitive_type, regs.vertex_array_start, count, + RENDERER_INSTANCE.Draw(tls_crnt_command_buffer, primitive_type, + regs.vertex_array_start, count, regs.base_instance, 1); } } -void ThreeD::DrawVertexElements(GMmu& gmmu, const u32 index, u32 count) { - if (!DrawInternal(gmmu)) +void ThreeD::DrawVertexElements(const u32 index, u32 count) { + if (!DrawInternal()) return; // Index buffer - gpu_vaddr_t index_buffer_ptr = gmmu.UnmapAddr(regs.index_buffer_addr); + gpu_vaddr_t index_buffer_ptr = + tls_crnt_gmmu->UnmapAddr(regs.index_buffer_addr); // TODO: uncomment? u32 index_buffer_size = count * get_index_type_size( regs.index_type); // u64(regs.index_buffer_limit_addr) + 1 // - u64(regs.index_buffer_addr); - auto index_buffer = RENDERER_INSTANCE.GetBufferCache().Find( - {index_buffer_ptr, index_buffer_size}); + const auto range = + Range::FromSize(index_buffer_ptr, index_buffer_size); auto index_type = regs.index_type; auto primitive_type = regs.begin.primitive_type; - index_buffer = RENDERER_INSTANCE.GetIndexCache().Decode( + const auto index_buffer = RENDERER_INSTANCE.GetIndexCache().Decode( + tls_crnt_command_buffer, {.type = index_type, .primitive_type = primitive_type, .count = count, - .src_index_buffer = index_buffer}, + .mem_range = range}, index_type, primitive_type, count); // Bind index buffer - ASSERT_DEBUG(index_buffer, Gpu, "Index buffer not found"); + ASSERT_DEBUG(index_buffer.GetBase(), Gpu, "Index buffer not found"); RENDERER_INSTANCE.BindIndexBuffer(index_buffer, index_type); // Draw // TODO: instance count - RENDERER_INSTANCE.DrawIndexed(primitive_type, regs.vertex_elements_start, - count, regs.base_vertex, regs.base_instance, - 1); + RENDERER_INSTANCE.DrawIndexed(tls_crnt_command_buffer, primitive_type, + regs.vertex_elements_start, count, + regs.base_vertex, regs.base_instance, 1); } -void ThreeD::ClearBuffer(GMmu& gmmu, const u32 index, - const ClearBufferData data) { +void ThreeD::ClearBuffer(const u32 index, const ClearBufferData data) { LOG_DEBUG(Gpu, "Depth: {}, stencil: {}, color mask: 0x{:x}, target id: {}, " "layer id: {}", @@ -327,46 +365,57 @@ void ThreeD::ClearBuffer(GMmu& gmmu, const u32 index, // TODO: implement // Regular clear - RENDERER_INSTANCE.BindRenderPass(GetRenderPass(gmmu)); + { + std::lock_guard texture_cache_lock( + RENDERER_INSTANCE.GetTextureCache().GetMutex()); + RENDERER_INSTANCE.BindRenderPass(GetRenderPass()); + } if (data.color_mask != 0x0) - RENDERER_INSTANCE.ClearColor(data.target_id, data.layer_id, - data.color_mask, regs.clear_color); + RENDERER_INSTANCE.ClearColor(tls_crnt_command_buffer, data.target_id, + data.layer_id, data.color_mask, + regs.clear_color); if (data.depth) - RENDERER_INSTANCE.ClearDepth(data.layer_id, regs.clear_depth); + RENDERER_INSTANCE.ClearDepth(tls_crnt_command_buffer, data.layer_id, + regs.clear_depth); if (data.stencil) - RENDERER_INSTANCE.ClearStencil(data.layer_id, regs.clear_stencil); + RENDERER_INSTANCE.ClearStencil(tls_crnt_command_buffer, data.layer_id, + regs.clear_stencil); } -void ThreeD::SetReportSemaphore(GMmu& gmmu, const u32 index, const u32 data) { +void ThreeD::SetReportSemaphore(const u32 index, const u32 data) { ONCE(LOG_FUNC_STUBBED(Engines)); - const uptr ptr = gmmu.UnmapAddr(regs.report_semaphore_addr); + const uptr ptr = tls_crnt_gmmu->UnmapAddr(regs.report_semaphore_addr); - // HACK + // TODO: correct? *reinterpret_cast(ptr) = regs.report_semaphore_payload; } -void ThreeD::FirmwareCall4(GMmu& gmmu, const u32 index, const u32 data) { +void ThreeD::FirmwareCall4(const u32 index, const u32 data) { ONCE(LOG_FUNC_STUBBED(Engines)); // TODO: find out what this does regs.mme_scratch[0] = 0x1; } -void ThreeD::LoadConstBuffer(GMmu& gmmu, const u32 index, const u32 data) { +void ThreeD::LoadConstBuffer(const u32 index, const u32 data) { const uptr const_buffer_gpu_addr = u64(regs.const_buffer_selector); const uptr gpu_addr = const_buffer_gpu_addr + regs.load_const_buffer_offset; + const auto ptr = tls_crnt_gmmu->UnmapAddr(gpu_addr); - gmmu.Store(gpu_addr, data); - + *reinterpret_cast(ptr) = data; regs.load_const_buffer_offset += sizeof(u32); + + // Invalidate + // TODO: invalidate as a whole + RENDERER_INSTANCE.InvalidateMemory(Range::FromSize(ptr, sizeof(u32))); } -void ThreeD::BindGroup(GMmu& gmmu, const u32 index, const u32 data) { - const auto shader_stage = static_cast(index / 0x8 + 1); +void ThreeD::BindGroup(const u32 index, const u32 data) { + const auto shader_stage_index = index / 0x8; const auto group = index % 0x8; switch (group) { @@ -378,18 +427,13 @@ void ThreeD::BindGroup(GMmu& gmmu, const u32 index, const u32 data) { bool valid = data & 0x1; if (valid) { const uptr const_buffer_gpu_ptr = - gmmu.UnmapAddr(regs.const_buffer_selector); - - const auto buffer = RENDERER_INSTANCE.GetBufferCache().Find( - {const_buffer_gpu_ptr, regs.const_buffer_selector_size}); + tls_crnt_gmmu->UnmapAddr(regs.const_buffer_selector); - bound_const_buffers[index] = const_buffer_gpu_ptr; - RENDERER_INSTANCE.BindUniformBuffer( - buffer, to_renderer_shader_type(shader_stage), index); + const auto range = Range::FromSize( + const_buffer_gpu_ptr, regs.const_buffer_selector_size); + bound_const_buffers[shader_stage_index][index] = range; } else { - bound_const_buffers[index] = 0x0; - RENDERER_INSTANCE.BindUniformBuffer( - nullptr, to_renderer_shader_type(shader_stage), index); + bound_const_buffers[shader_stage_index][index] = Range(); } break; } @@ -402,7 +446,7 @@ void ThreeD::BindGroup(GMmu& gmmu, const u32 index, const u32 data) { #pragma GCC diagnostic pop renderer::TextureBase* -ThreeD::GetColorTargetTexture(GMmu& gmmu, u32 render_target_index) const { +ThreeD::GetColorTargetTexture(u32 render_target_index) const { const auto& render_target = regs.color_targets[render_target_index]; const auto gpu_addr = u64(render_target.addr); @@ -414,18 +458,22 @@ ThreeD::GetColorTargetTexture(GMmu& gmmu, u32 render_target_index) const { } const auto format = renderer::to_texture_format(render_target.format); + const u32 width_hint = + regs.screen_scissor.horizontal.x + regs.screen_scissor.horizontal.width; const renderer::TextureDescriptor descriptor( - gmmu.UnmapAddr(gpu_addr), format, + tls_crnt_gmmu->UnmapAddr(gpu_addr), renderer::TextureType::_2D, format, NvKind::Pitch, // TODO: correct? - render_target.width, render_target.height, + GetMinimumWidth(render_target.width, format, width_hint, + render_target.tile_mode.is_linear), + render_target.height, 1, 0, // TODO get_texture_format_stride(format, render_target.width)); - return RENDERER_INSTANCE.GetTextureCache().GetTextureView( - descriptor, renderer::TextureUsage::Write); + return RENDERER_INSTANCE.GetTextureCache().Find( + tls_crnt_command_buffer, descriptor, renderer::TextureUsage::Write); } -renderer::TextureBase* ThreeD::GetDepthStencilTargetTexture(GMmu& gmmu) const { +renderer::TextureBase* ThreeD::GetDepthStencilTargetTexture() const { const auto gpu_addr = u64(regs.depth_target_addr); if (gpu_addr == 0x0) { // TODO: is this really an error? @@ -434,33 +482,35 @@ renderer::TextureBase* ThreeD::GetDepthStencilTargetTexture(GMmu& gmmu) const { } const auto format = renderer::to_texture_format(regs.depth_target_format); + const u32 width_hint = + regs.screen_scissor.horizontal.x + regs.screen_scissor.horizontal.width; const renderer::TextureDescriptor descriptor( - gmmu.UnmapAddr(gpu_addr), format, + tls_crnt_gmmu->UnmapAddr(gpu_addr), renderer::TextureType::_2D, format, NvKind::Pitch, // TODO: correct? - regs.depth_target_width, regs.depth_target_height, + GetMinimumWidth(regs.depth_target_width, format, width_hint, false), + regs.depth_target_height, 1, 0, // TODO get_texture_format_stride(format, regs.depth_target_width)); - return RENDERER_INSTANCE.GetTextureCache().GetTextureView( - descriptor, renderer::TextureUsage::Write); + return RENDERER_INSTANCE.GetTextureCache().Find( + tls_crnt_command_buffer, descriptor, renderer::TextureUsage::Write); } -renderer::RenderPassBase* ThreeD::GetRenderPass(GMmu& gmmu) const { +renderer::RenderPassBase* ThreeD::GetRenderPass() const { renderer::RenderPassDescriptor descriptor{}; // Color targets for (u32 i = 0; i < regs.color_target_control.count; i++) { descriptor.color_targets[i] = { - .texture = GetColorTargetTexture( - gmmu, regs.color_target_control.GetMap(i)), + .texture = + GetColorTargetTexture(regs.color_target_control.GetMap(i)), }; } // Depth stencil target descriptor.depth_stencil_target = { - .texture = - (regs.depth_target_enabled ? GetDepthStencilTargetTexture(gmmu) - : nullptr), + .texture = (regs.depth_target_enabled ? GetDepthStencilTargetTexture() + : nullptr), }; return RENDERER_INSTANCE.GetRenderPassCache().Find(descriptor); @@ -471,7 +521,7 @@ renderer::Viewport ThreeD::GetViewport(u32 index) { const auto& extent = REGS_3D.viewports[index]; const auto& transform = REGS_3D.viewport_transforms[index]; - if (REGS_3D.viewport_transform_enabled) { + if (/*REGS_3D.viewport_transform_enabled*/ true) { // HACK auto scale_x = transform.scale_x; auto scale_y = transform.scale_y; if (any(REGS_3D.window_origin_flags & @@ -549,13 +599,13 @@ renderer::ShaderBase* ThreeD::GetShaderUnchecked(ShaderStage stage) const { return active_shaders[u32(to_renderer_shader_type(stage))]; } -renderer::ShaderBase* ThreeD::GetShader(GMmu& gmmu, ShaderStage stage) { +renderer::ShaderBase* ThreeD::GetShader(ShaderStage stage) { const auto& program = regs.shader_programs[usize(stage)]; if (!program.config.enable) return nullptr; uptr gpu_addr = u64(regs.shader_program_region) + program.offset; - uptr ptr = gmmu.UnmapAddr(gpu_addr); + uptr ptr = tls_crnt_gmmu->UnmapAddr(gpu_addr); renderer::GuestShaderDescriptor descriptor{ .stage = stage, @@ -584,15 +634,15 @@ renderer::ShaderBase* ThreeD::GetShader(GMmu& gmmu, ShaderStage stage) { return active_shader; } -renderer::PipelineBase* ThreeD::GetPipeline(GMmu& gmmu) { +renderer::PipelineBase* ThreeD::GetPipeline() { renderer::PipelineDescriptor descriptor{}; // Shaders // TODO: add all shaders descriptor.shaders[u32(renderer::ShaderType::Vertex)] = - GetShader(gmmu, ShaderStage::VertexB); + GetShader(ShaderStage::VertexB); descriptor.shaders[u32(renderer::ShaderType::Fragment)] = - GetShader(gmmu, ShaderStage::Fragment); + GetShader(ShaderStage::Fragment); // Vertex state @@ -673,27 +723,24 @@ renderer::PipelineBase* ThreeD::GetPipeline(GMmu& gmmu) { return RENDERER_INSTANCE.GetPipelineCache().Find(descriptor); } -renderer::BufferBase* ThreeD::GetVertexBuffer(GMmu& gmmu, - u32 vertex_array_index) const { +renderer::BufferView ThreeD::GetVertexBuffer(u32 vertex_array_index) const { const auto& vertex_array = regs.vertex_arrays[vertex_array_index]; // HACK if (u64(vertex_array.addr) == 0x0) { ONCE(LOG_ERROR(Engines, "Invalid vertex buffer")); - return nullptr; + return renderer::BufferView(); } - const renderer::BufferDescriptor descriptor{ - .ptr = gmmu.UnmapAddr(vertex_array.addr), - .size = u64(regs.vertex_array_limits[vertex_array_index]) + 1 - - u64(vertex_array.addr), - }; - - return RENDERER_INSTANCE.GetBufferCache().Find(descriptor); + const auto ptr = tls_crnt_gmmu->UnmapAddr(vertex_array.addr); + const auto size = u64(regs.vertex_array_limits[vertex_array_index]) + 1 - + u64(vertex_array.addr); + return RENDERER_INSTANCE.GetBufferCache().Get( + tls_crnt_command_buffer, Range::FromSize(ptr, size)); } renderer::TextureBase* -ThreeD::GetTexture(GMmu& gmmu, const TextureImageControl& tic) const { +ThreeD::GetTexture(const TextureImageControl& tic) const { // HACK if (tic.hdr_version == TicHdrVersion::_1DBuffer) { LOG_ERROR(Engines, "1D buffer"); @@ -706,7 +753,8 @@ ThreeD::GetTexture(GMmu& gmmu, const TextureImageControl& tic) const { return nullptr; } - const auto format = renderer::to_texture_format(tic.format_word); + const auto format = + renderer::to_texture_format(tic.format_word, tic.is_srgb); NvKind kind; u32 stride; @@ -729,17 +777,18 @@ ThreeD::GetTexture(GMmu& gmmu, const TextureImageControl& tic) const { } const renderer::TextureDescriptor descriptor( - gmmu.UnmapAddr(gpu_addr), format, kind, - static_cast(tic.width_minus_one + 1), + tls_crnt_gmmu->UnmapAddr(gpu_addr), ToTextureType(tic.texture_type), + format, kind, static_cast(tic.width_minus_one + 1), static_cast(tic.height_minus_one + 1), + static_cast(tic.depth_minus_one + 1), tic.tile_height_gobs_log2, // TODO: correct? stride, renderer::SwizzleChannels( format, tic.format_word.swizzle_x, tic.format_word.swizzle_y, tic.format_word.swizzle_z, tic.format_word.swizzle_w)); - return RENDERER_INSTANCE.GetTextureCache().GetTextureView( - descriptor, renderer::TextureUsage::Read); + return RENDERER_INSTANCE.GetTextureCache().Find( + tls_crnt_command_buffer, descriptor, renderer::TextureUsage::Read); } renderer::SamplerBase* @@ -765,22 +814,42 @@ ThreeD::GetSampler(const TextureSamplerControl& tsc) const { } void ThreeD::ConfigureShaderStage( - GMmu& gmmu, const ShaderStage stage, - const TextureImageControl* tex_header_pool, + const ShaderStage stage, const TextureImageControl* tex_header_pool, const TextureSamplerControl* tex_sampler_pool) { - // const u32 stage_index = static_cast(stage) - - // 1; // 1 is subtracted, because VertexA is skipped + const auto shader_type = to_renderer_shader_type(stage); + const u32 stage_index = static_cast(stage) - + 1; // 1 is subtracted, because VertexA is skipped const auto shader = GetShaderUnchecked(stage); const auto& resource_mapping = shader->GetDescriptor().resource_mapping; - // TODO: how are uniform buffers handled? + // Uniform buffers + RENDERER_INSTANCE.UnbindUniformBuffers(shader_type); + for (u32 i = 0; i < CONST_BUFFER_BINDING_COUNT; i++) { + const auto index = resource_mapping.uniform_buffers[i]; + if (index == invalid()) + continue; + + // TODO: analyze the shader to get the max possible size + const auto range = bound_const_buffers[stage_index][i]; + if (range.GetBegin() == 0x0) { + LOG_WARN(Engines, "Uniform buffer at index {} is not bound", index); + continue; + } + + const auto buffer = RENDERER_INSTANCE.GetBufferCache().Get( + tls_crnt_command_buffer, range); + RENDERER_INSTANCE.BindUniformBuffer(buffer, shader_type, index); + } + // TODO: storage buffers // Textures - RENDERER_INSTANCE.UnbindTextures(to_renderer_shader_type(stage)); + RENDERER_INSTANCE.UnbindTextures(shader_type); auto tex_const_buffer = reinterpret_cast( - bound_const_buffers[regs.bindless_texture_const_buffer_slot]); + bound_const_buffers[stage_index] + [regs.bindless_texture_const_buffer_slot] + .GetBegin()); for (const auto [const_buffer_index, renderer_index] : resource_mapping.textures) { const auto texture_handle = tex_const_buffer[const_buffer_index]; @@ -788,7 +857,7 @@ void ThreeD::ConfigureShaderStage( // Image const auto image_handle = get_image_handle(texture_handle); const auto& tic = tex_header_pool[image_handle]; - const auto texture = GetTexture(gmmu, tic); + const auto texture = GetTexture(tic); // Sampler const auto sampler_handle = get_sampler_handle(texture_handle); @@ -796,41 +865,44 @@ void ThreeD::ConfigureShaderStage( const auto sampler = GetSampler(tsc); if (texture && sampler) - RENDERER_INSTANCE.BindTexture(texture, sampler, - to_renderer_shader_type(stage), + RENDERER_INSTANCE.BindTexture(texture, sampler, shader_type, renderer_index); // TODO: else bind null texture } - - // TODO: images } -bool ThreeD::DrawInternal(GMmu& gmmu) { +bool ThreeD::DrawInternal() { + std::lock_guard texture_cache_lock( + RENDERER_INSTANCE.GetTextureCache().GetMutex()); + + // Flush tracked pages + tls_crnt_gmmu->GetMmu()->FlushTrackedPages(); + + // State if (!regs.shader_programs[(u32)ShaderStage::VertexB].config.enable) { LOG_WARN(Engines, "Vertex B stage not enabled, skipping draw"); return false; } - RENDERER_INSTANCE.BindRenderPass(GetRenderPass(gmmu)); + RENDERER_INSTANCE.BindRenderPass(GetRenderPass()); for (u32 i = 0; i < VIEWPORT_COUNT; i++) { RENDERER_INSTANCE.SetViewport(i, GetViewport(i)); RENDERER_INSTANCE.SetScissor(i, GetScissor(i)); } - RENDERER_INSTANCE.BindPipeline(GetPipeline(gmmu)); + RENDERER_INSTANCE.BindPipeline(GetPipeline()); for (u32 i = 0; i < VERTEX_ARRAY_COUNT; i++) { const auto& vertex_array = regs.vertex_arrays[i]; // HACK: Super Meat Boy contains invalid vertex arrays with address 4096 if (!vertex_array.config.enable || - (vertex_array.addr.hi == 0 && vertex_array.addr.lo == 4096)) - continue; - - const auto buffer = GetVertexBuffer(gmmu, i); - if (!buffer) + (vertex_array.addr.hi == 0 && vertex_array.addr.lo == 4096)) { + RENDERER_INSTANCE.BindVertexBuffer(renderer::BufferView(), i); continue; + } + const auto buffer = GetVertexBuffer(i); RENDERER_INSTANCE.BindVertexBuffer(buffer, i); } @@ -840,14 +912,14 @@ bool ThreeD::DrawInternal(GMmu& gmmu) { // TODO: remove the condition if (tex_header_pool_gpu_addr != 0x0 && tex_sampler_pool_gpu_addr != 0x0) { const auto tex_header_pool = reinterpret_cast( - gmmu.UnmapAddr(tex_header_pool_gpu_addr)); + tls_crnt_gmmu->UnmapAddr(tex_header_pool_gpu_addr)); const auto tex_sampler_pool = reinterpret_cast( - gmmu.UnmapAddr(tex_sampler_pool_gpu_addr)); + tls_crnt_gmmu->UnmapAddr(tex_sampler_pool_gpu_addr)); // TODO: configure all stages - ConfigureShaderStage(gmmu, ShaderStage::VertexB, tex_header_pool, + ConfigureShaderStage(ShaderStage::VertexB, tex_header_pool, tex_sampler_pool); - ConfigureShaderStage(gmmu, ShaderStage::Fragment, tex_header_pool, + ConfigureShaderStage(ShaderStage::Fragment, tex_header_pool, tex_sampler_pool); } diff --git a/src/core/hw/tegra_x1/gpu/engines/3d.hpp b/src/core/hw/tegra_x1/gpu/engines/3d.hpp index c9dbe55e..5c6b4684 100644 --- a/src/core/hw/tegra_x1/gpu/engines/3d.hpp +++ b/src/core/hw/tegra_x1/gpu/engines/3d.hpp @@ -1,7 +1,7 @@ #pragma once #include "core/hw/tegra_x1/gpu/engines/inline_base.hpp" -#include "core/hw/tegra_x1/gpu/renderer/const.hpp" +#include "core/hw/tegra_x1/gpu/renderer/buffer_view.hpp" #define REGS_3D engines::ThreeD::GetInstance().GetRegs() @@ -10,7 +10,6 @@ class DriverBase; } namespace hydra::hw::tegra_x1::gpu::renderer { -class BufferBase; class TextureBase; class SamplerBase; class RenderPassBase; @@ -69,8 +68,8 @@ struct TextureImageControl { u32 width_minus_one : 16; u32 view_layer_base_0_2 : 3; u32 aniso_spread_max_log2 : 3; - u32 is_sRGB : 1; - u32 texture_type : 4; // TextureType + u32 is_srgb : 1; + TextureType texture_type : 4; u32 sector_promotion : 2; // SectorPromotion u32 border_size : 3; // BorderSize @@ -559,9 +558,9 @@ class ThreeD : public EngineWithRegsBase, public InlineBase { ThreeD(); ~ThreeD() override; - void Method(GMmu& gmmu, u32 method, u32 arg) override; + void Method(u32 method, u32 arg) override; - void FlushMacro(GMmu& gmmu) override; + void FlushMacro() override; // Getters const Regs3D& GetRegs() const { return regs; } @@ -574,24 +573,23 @@ class ThreeD : public EngineWithRegsBase, public InlineBase { macro::DriverBase* macro_driver; // Active state (for quick access) - renderer::ShaderBase* active_shaders[u32(renderer::ShaderType::Count)] = { - nullptr}; + renderer::ShaderBase* active_shaders[static_cast( + renderer::ShaderType::Count)] = {nullptr}; // State - uptr bound_const_buffers[CONST_BUFFER_BINDING_COUNT] = {0x0}; + Range bound_const_buffers[static_cast(ShaderStage::Count) - 1] + [CONST_BUFFER_BINDING_COUNT]; // Methods DEFINE_INLINE_ENGINE_METHODS; - void LoadMmeInstructionRamPointer(GMmu& gmmu, const u32 index, - const u32 ptr); - void LoadMmeInstructionRam(GMmu& gmmu, const u32 index, const u32 data); - void LoadMmeStartAddressRamPointer(GMmu& gmmu, const u32 index, - const u32 ptr); - void LoadMmeStartAddressRam(GMmu& gmmu, const u32 index, const u32 data); + void LoadMmeInstructionRamPointer(const u32 index, const u32 ptr); + void LoadMmeInstructionRam(const u32 index, const u32 data); + void LoadMmeStartAddressRamPointer(const u32 index, const u32 ptr); + void LoadMmeStartAddressRam(const u32 index, const u32 data); - void DrawVertexArray(GMmu& gmmu, const u32 index, u32 count); - void DrawVertexElements(GMmu& gmmu, const u32 index, u32 count); + void DrawVertexArray(const u32 index, u32 count); + void DrawVertexElements(const u32 index, u32 count); struct ClearBufferData { bool depth : 1; @@ -601,37 +599,34 @@ class ThreeD : public EngineWithRegsBase, public InlineBase { u32 layer_id : 11; }; - void ClearBuffer(GMmu& gmmu, const u32 index, const ClearBufferData data); + void ClearBuffer(const u32 index, const ClearBufferData data); // HACK - void SetReportSemaphore(GMmu& gmmu, const u32 index, const u32 data); + void SetReportSemaphore(const u32 index, const u32 data); - void FirmwareCall4(GMmu& gmmu, const u32 index, const u32 data); + void FirmwareCall4(const u32 index, const u32 data); - void LoadConstBuffer(GMmu& gmmu, const u32 index, const u32 data); - void BindGroup(GMmu& gmmu, const u32 index, const u32 data); + void LoadConstBuffer(const u32 index, const u32 data); + void BindGroup(const u32 index, const u32 data); // Helpers - renderer::TextureBase* GetColorTargetTexture(GMmu& gmmu, - u32 render_target_index) const; - renderer::TextureBase* GetDepthStencilTargetTexture(GMmu& gmmu) const; - renderer::RenderPassBase* GetRenderPass(GMmu& gmmu) const; + renderer::TextureBase* GetColorTargetTexture(u32 render_target_index) const; + renderer::TextureBase* GetDepthStencilTargetTexture() const; + renderer::RenderPassBase* GetRenderPass() const; renderer::Viewport GetViewport(u32 index); renderer::Scissor GetScissor(u32 index); renderer::ShaderBase* GetShaderUnchecked(ShaderStage stage) const; - renderer::ShaderBase* GetShader(GMmu& gmmu, ShaderStage stage); - renderer::PipelineBase* GetPipeline(GMmu& gmmu); - renderer::BufferBase* GetVertexBuffer(GMmu& gmmu, - u32 vertex_array_index) const; - renderer::TextureBase* GetTexture(GMmu& gmmu, - const TextureImageControl& tic) const; + renderer::ShaderBase* GetShader(ShaderStage stage); + renderer::PipelineBase* GetPipeline(); + renderer::BufferView GetVertexBuffer(u32 vertex_array_index) const; + renderer::TextureBase* GetTexture(const TextureImageControl& tic) const; renderer::SamplerBase* GetSampler(const TextureSamplerControl& tsc) const; - void ConfigureShaderStage(GMmu& gmmu, const ShaderStage stage, + void ConfigureShaderStage(const ShaderStage stage, const TextureImageControl* tex_header_pool, const TextureSamplerControl* tex_sampler_pool); - bool DrawInternal(GMmu& gmmu); + bool DrawInternal(); }; } // namespace hydra::hw::tegra_x1::gpu::engines diff --git a/src/core/hw/tegra_x1/gpu/engines/compute.hpp b/src/core/hw/tegra_x1/gpu/engines/compute.hpp index f21ef3d0..c708932f 100644 --- a/src/core/hw/tegra_x1/gpu/engines/compute.hpp +++ b/src/core/hw/tegra_x1/gpu/engines/compute.hpp @@ -12,7 +12,7 @@ struct RegsCompute { class Compute : public EngineWithRegsBase, public InlineBase { public: - void Method(GMmu& gmmu, u32 method, u32 arg) override; + void Method(u32 method, u32 arg) override; private: // Methods diff --git a/src/core/hw/tegra_x1/gpu/engines/const.hpp b/src/core/hw/tegra_x1/gpu/engines/const.hpp index c8387e80..57093ae1 100644 --- a/src/core/hw/tegra_x1/gpu/engines/const.hpp +++ b/src/core/hw/tegra_x1/gpu/engines/const.hpp @@ -128,7 +128,18 @@ enum class CompareOp : u32 { Always = 8, }; -// Pitch - buffer, block linear - texture +enum class TextureType : u32 { + _1D = 0, + _2D = 1, + _3D = 2, + Cubemap = 3, + _1DArray = 4, + _2DArray = 5, + _1DBuffer = 6, + _2DNoMipmap = 7, + CubeArray = 8, +}; + enum class MemoryLayout : u32 { BlockLinear, Pitch, diff --git a/src/core/hw/tegra_x1/gpu/engines/copy.cpp b/src/core/hw/tegra_x1/gpu/engines/copy.cpp index 4addc415..95ec80b6 100644 --- a/src/core/hw/tegra_x1/gpu/engines/copy.cpp +++ b/src/core/hw/tegra_x1/gpu/engines/copy.cpp @@ -12,7 +12,7 @@ DEFINE_METHOD_TABLE(Copy, 0xc0, 1, LaunchDMA, LaunchDMAData) #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wunused-parameter" -void Copy::LaunchDMA(GMmu& gmmu, const u32 index, const LaunchDMAData data) { +void Copy::LaunchDMA(const u32 index, const LaunchDMAData data) { // TODO: implement component remapping // HACK u32 src_stride = regs.src.stride; @@ -37,22 +37,20 @@ void Copy::LaunchDMA(GMmu& gmmu, const u32 index, const LaunchDMAData data) { } } - const auto src_ptr = gmmu.UnmapAddr(regs.offset_in); - const auto dst_ptr = gmmu.UnmapAddr(regs.offset_out); + const auto src_ptr = tls_crnt_gmmu->UnmapAddr(regs.offset_in); + const auto dst_ptr = tls_crnt_gmmu->UnmapAddr(regs.offset_out); if (data.src_memory_layout == MemoryLayout::Pitch) { if (data.dst_memory_layout == MemoryLayout::Pitch) { for (u32 i = 0; i < regs.line_count; i++) memcpy(reinterpret_cast(dst_ptr + regs.stride_out * i), reinterpret_cast(src_ptr + regs.stride_in * i), regs.stride_in); - } else { - // NOTE: a texture copy could be possible, as LineLengthIn contains - // the width and PitchOut contains the stride, hence we could find - // the pixel size and later use a texture view to alias the base - // texture. However, this could break the order in which memory is - // copied and create issues. Block formats could also be - // problematic. + // Invalidate + RENDERER_INSTANCE.GetBufferCache().InvalidateMemory( + Range::FromSize(dst_ptr, + regs.line_count * regs.stride_out)); + } else { // Encode as Generic 16BX2 // TODO: block size log2 can also be negative? encode_generic_16bx2(dst_stride, regs.line_count, @@ -60,33 +58,6 @@ void Copy::LaunchDMA(GMmu& gmmu, const u32 index, const LaunchDMAData data) { regs.dst.block_size.height)), reinterpret_cast(src_ptr), reinterpret_cast(dst_ptr)); - - // memcpy((void*)gmmu.UnmapAddr(regs.offset_out), - // (void*)gmmu.UnmapAddr(regs.offset_in), stride * - // regs.line_count); - - /* - const auto src = - GetBuffer(regs.offset_in, - regs.line_length_in * - regs.line_count); // TODO: is the size correct? - const auto dst = GetBuffer(regs.offset_out, - regs.line_length_in * regs.line_count); - - dst->CopyFrom(src); - */ - - /* - auto texture = - GetTexture(regs.offset_out_lo, regs.offset_out_hi, regs.dst); - - const auto& descriptor = texture->GetDescriptor(); - // TODO: use layer as origin Z in case of 3D textures - texture->CopyFrom( - buffer, regs.stride_in, regs.dst.layer, - {regs.dst.origin.x, regs.dst.origin.y, 0}, - {descriptor.width, descriptor.height, regs.dst.depth}); - */ } } else { if (data.dst_memory_layout == MemoryLayout::Pitch) { @@ -101,15 +72,15 @@ void Copy::LaunchDMA(GMmu& gmmu, const u32 index, const LaunchDMAData data) { } } - // TODO: correct? - RENDERER_INSTANCE.GetTextureCache().NotifyGuestModifiedData( - range(dst_ptr, regs.stride_in * regs.line_count)); + // Invalidate + RENDERER_INSTANCE.GetTextureCache().InvalidateMemory( + Range(dst_ptr, regs.stride_in * regs.line_count)); } #pragma GCC diagnostic pop /* -renderer::BufferBase* Copy::GetBuffer(GMmu& gmmu, const Iova addr, const usize +renderer::BufferBase* Copy::GetBuffer(const Iova addr, const usize size) { const renderer::BufferDescriptor descriptor{ .ptr = gmmu.UnmapAddr(addr), .size = size, }; diff --git a/src/core/hw/tegra_x1/gpu/engines/copy.hpp b/src/core/hw/tegra_x1/gpu/engines/copy.hpp index 94d13977..be48efbc 100644 --- a/src/core/hw/tegra_x1/gpu/engines/copy.hpp +++ b/src/core/hw/tegra_x1/gpu/engines/copy.hpp @@ -118,7 +118,7 @@ struct RegsCopy { class Copy : public EngineWithRegsBase { public: - void Method(GMmu& gmmu, u32 method, u32 arg) override; + void Method(u32 method, u32 arg) override; private: // Commands @@ -140,10 +140,10 @@ class Copy : public EngineWithRegsBase { BypassL2 bypass_l2 : 1; }; - void LaunchDMA(GMmu& gmmu, const u32 index, const LaunchDMAData data); + void LaunchDMA(const u32 index, const LaunchDMAData data); // Helpers - // static renderer::BufferBase* GetBuffer(GMmu& gmmu, const Iova addr, + // static renderer::BufferBase* GetBuffer(const Iova addr, // const usize size); // static renderer::TextureBase* GetTexture(const u32 gpu_addr_lo, // const u32 gpu_addr_hi, diff --git a/src/core/hw/tegra_x1/gpu/engines/engine_base.hpp b/src/core/hw/tegra_x1/gpu/engines/engine_base.hpp index ff1a5b02..e41f7c44 100644 --- a/src/core/hw/tegra_x1/gpu/engines/engine_base.hpp +++ b/src/core/hw/tegra_x1/gpu/engines/engine_base.hpp @@ -4,11 +4,11 @@ #define METHOD_CASE(method_begin, method_count, func, arg_type) \ case method_begin ...(method_begin + method_count - 1): \ - func(gmmu, method - method_begin, std::bit_cast(arg)); \ + func(method - method_begin, std::bit_cast(arg)); \ break; #define DEFINE_METHOD_TABLE(type, ...) \ - void type::Method(GMmu& gmmu, u32 method, u32 arg) { \ + void type::Method(u32 method, u32 arg) { \ if (method >= MACRO_METHODS_REGION) { \ Macro(method, arg); \ return; \ @@ -36,9 +36,9 @@ class EngineBase { virtual ~EngineBase() = default; - virtual void Method(GMmu& gmmu, u32 method, u32 arg) = 0; + virtual void Method(u32 method, u32 arg) = 0; - virtual void FlushMacro([[maybe_unused]] GMmu& gmmu) { + virtual void FlushMacro() { LOG_ERROR(Engines, "This engine does not support macros"); throw Error::MacrosNotSupported; } diff --git a/src/core/hw/tegra_x1/gpu/engines/inline.hpp b/src/core/hw/tegra_x1/gpu/engines/inline.hpp index c1929bc8..7d5471af 100644 --- a/src/core/hw/tegra_x1/gpu/engines/inline.hpp +++ b/src/core/hw/tegra_x1/gpu/engines/inline.hpp @@ -10,7 +10,7 @@ struct RegsInline_ { class Inline : public EngineWithRegsBase, public InlineBase { public: - void Method(GMmu& gmmu, u32 method, u32 arg) override; + void Method(u32 method, u32 arg) override; private: // Methods diff --git a/src/core/hw/tegra_x1/gpu/engines/inline_base.cpp b/src/core/hw/tegra_x1/gpu/engines/inline_base.cpp index 4c212387..5c1b935c 100644 --- a/src/core/hw/tegra_x1/gpu/engines/inline_base.cpp +++ b/src/core/hw/tegra_x1/gpu/engines/inline_base.cpp @@ -8,13 +8,13 @@ namespace hydra::hw::tegra_x1::gpu::engines { #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wunused-parameter" -void InlineBase::LaunchDMAImpl(GMmu& gmmu, RegsInline& regs, const u32 index, +void InlineBase::LaunchDMAImpl(RegsInline& regs, const u32 index, const u32 data) { LOG_FUNC_WITH_ARGS_STUBBED(Engines, "index: {}, data: {:#x}", index, data); } -void InlineBase::LoadInlineDataImpl(GMmu& gmmu, RegsInline& regs, - const u32 index, const u32 data) { +void InlineBase::LoadInlineDataImpl(RegsInline& regs, const u32 index, + const u32 data) { inline_data.push_back(data); // TODO: correct? if (inline_data.size() * sizeof(u32) == @@ -23,7 +23,7 @@ void InlineBase::LoadInlineDataImpl(GMmu& gmmu, RegsInline& regs, // TODO: determine what type of copy this is based on launch DMA args // Buffer to buffer - uptr dst_ptr = gmmu.UnmapAddr(regs.offset_out); + uptr dst_ptr = tls_crnt_gmmu->UnmapAddr(regs.offset_out); // TODO: do a Gpu copy instead? memcpy(reinterpret_cast(dst_ptr), inline_data.data(), inline_data.size() * sizeof(u32)); @@ -34,6 +34,10 @@ void InlineBase::LoadInlineDataImpl(GMmu& gmmu, RegsInline& regs, dst->CopyFrom(inline_data.data()); */ inline_data.clear(); + + // Invalidate + RENDERER_INSTANCE.InvalidateMemory( + Range::FromSize(dst_ptr, inline_data.size() * sizeof(u32))); } } diff --git a/src/core/hw/tegra_x1/gpu/engines/inline_base.hpp b/src/core/hw/tegra_x1/gpu/engines/inline_base.hpp index ff3e3d0d..6fda725f 100644 --- a/src/core/hw/tegra_x1/gpu/engines/inline_base.hpp +++ b/src/core/hw/tegra_x1/gpu/engines/inline_base.hpp @@ -5,11 +5,11 @@ #define INLINE_ENGINE_TABLE \ 0x6c, 1, LaunchDMA, u32, 0x6d, 1, LoadInlineData, u32 #define DEFINE_INLINE_ENGINE_METHODS \ - void LaunchDMA(GMmu& gmmu, const u32 index, const u32 data) { \ - LaunchDMAImpl(gmmu, regs.regs_inline, index, data); \ + void LaunchDMA(const u32 index, const u32 data) { \ + LaunchDMAImpl(regs.regs_inline, index, data); \ } \ - void LoadInlineData(GMmu& gmmu, const u32 index, const u32 data) { \ - LoadInlineDataImpl(gmmu, regs.regs_inline, index, data); \ + void LoadInlineData(const u32 index, const u32 data) { \ + LoadInlineDataImpl(regs.regs_inline, index, data); \ } namespace hydra::hw::tegra_x1::gpu::engines { @@ -36,10 +36,8 @@ struct RegsInline { class InlineBase { protected: // Commands - void LaunchDMAImpl(GMmu& gmmu, RegsInline& regs, const u32 index, - const u32 data); - void LoadInlineDataImpl(GMmu& gmmu, RegsInline& regs, const u32 index, - const u32 data); + void LaunchDMAImpl(RegsInline& regs, const u32 index, const u32 data); + void LoadInlineDataImpl(RegsInline& regs, const u32 index, const u32 data); private: std::vector inline_data; diff --git a/src/core/hw/tegra_x1/gpu/gmmu.cpp b/src/core/hw/tegra_x1/gpu/gmmu.cpp index d94528fb..ba8421b3 100644 --- a/src/core/hw/tegra_x1/gpu/gmmu.cpp +++ b/src/core/hw/tegra_x1/gpu/gmmu.cpp @@ -1,5 +1,7 @@ #include "core/hw/tegra_x1/gpu/gmmu.hpp" +#include "core/hw/tegra_x1/cpu/mmu.hpp" + namespace hydra::hw::tegra_x1::gpu { uptr GMmu::UnmapAddr(uptr gpu_addr) { @@ -11,4 +13,28 @@ uptr GMmu::UnmapAddr(uptr gpu_addr) { return as.ptr + (gpu_addr - base); } +uptr GMmu::CreateAddressSpace(Range range, uptr gpu_addr) { + uptr ptr; + if (range.GetBegin() != 0x0) { + ptr = mmu->UnmapAddr(range.GetBegin()); + + // Write tracking + mmu->EnableWriteTracking(range); + } else { + ptr = reinterpret_cast(malloc(range.GetSize())); + } + + AddressSpace as; + as.ptr = ptr; + as.size = range.GetSize(); + + if (gpu_addr == invalid()) { + gpu_addr = address_space_base; + address_space_base += align(range.GetSize(), GPU_PAGE_SIZE); + } + Map(gpu_addr, as); + + return gpu_addr; +} + } // namespace hydra::hw::tegra_x1::gpu diff --git a/src/core/hw/tegra_x1/gpu/gmmu.hpp b/src/core/hw/tegra_x1/gpu/gmmu.hpp index 6ffe022e..46dcdeba 100644 --- a/src/core/hw/tegra_x1/gpu/gmmu.hpp +++ b/src/core/hw/tegra_x1/gpu/gmmu.hpp @@ -17,6 +17,8 @@ struct AddressSpace { // TODO: free memory class GMmu : public GenericMmu { public: + GMmu(cpu::IMmu* mmu_) : mmu{mmu_} {} + usize ImplGetSize(const AddressSpace& as) const { return as.size; } AddressSpace& UnmapAddrToAddressSpace(uptr gpu_addr) { @@ -37,32 +39,19 @@ class GMmu : public GenericMmu { [[maybe_unused]] AddressSpace as) {} // Address space - uptr CreateAddressSpace(uptr ptr, usize size, uptr gpu_addr) { - if (ptr == 0x0) - ptr = reinterpret_cast(malloc(size)); - - AddressSpace as; - as.ptr = ptr; - as.size = size; - - if (gpu_addr == invalid()) { - gpu_addr = address_space_base; - address_space_base += align(size, GPU_PAGE_SIZE); - } - Map(gpu_addr, as); - - return gpu_addr; - } + uptr CreateAddressSpace(Range range, uptr gpu_addr); uptr AllocatePrivateAddressSpace(usize size, uptr gpu_addr) { - return CreateAddressSpace(0x0, size, gpu_addr); + return CreateAddressSpace(Range::FromSize(0x0, size), + gpu_addr); } - uptr MapBufferToAddressSpace(uptr ptr, usize size, uptr gpu_addr) { - return CreateAddressSpace(ptr, size, gpu_addr); + uptr MapBufferToAddressSpace(Range range, uptr gpu_addr) { + return CreateAddressSpace(range, gpu_addr); } - // TODO: correct? + // TODO + /* void ModifyAddressSpace(uptr ptr, usize size, uptr gpu_addr) { auto& as = UnmapAddrToAddressSpace(gpu_addr); ASSERT_DEBUG(size == as.size, Gpu, "Size mismatch: {} != {}", size, @@ -70,10 +59,16 @@ class GMmu : public GenericMmu { as.ptr = ptr; } + */ private: + cpu::IMmu* mmu; + // TODO: use a better way to allocate new memory uptr address_space_base{GPU_PAGE_SIZE}; + + public: + GETTER(mmu, GetMmu); }; } // namespace hydra::hw::tegra_x1::gpu diff --git a/src/core/hw/tegra_x1/gpu/gpu.cpp b/src/core/hw/tegra_x1/gpu/gpu.cpp index b6460115..beb75657 100644 --- a/src/core/hw/tegra_x1/gpu/gpu.cpp +++ b/src/core/hw/tegra_x1/gpu/gpu.cpp @@ -37,7 +37,7 @@ Gpu::~Gpu() { SINGLETON_UNSET_INSTANCE(); } -void Gpu::SubchannelMethod(GMmu& gmmu, u32 subchannel, u32 method, u32 arg) { +void Gpu::SubchannelMethod(u32 subchannel, u32 method, u32 arg) { if (method == 0x0) { // SetEngine ASSERT_DEBUG(subchannel <= SUBCHANNEL_COUNT, Gpu, "Invalid subchannel {}", subchannel); @@ -76,11 +76,14 @@ void Gpu::SubchannelMethod(GMmu& gmmu, u32 subchannel, u32 method, u32 arg) { return; } - GetEngineAtSubchannel(subchannel)->Method(gmmu, method, arg); + GetEngineAtSubchannel(subchannel)->Method(method, arg); } -renderer::TextureBase* Gpu::GetTexture(cpu::IMmu* mmu, +renderer::TextureBase* Gpu::GetTexture(renderer::ICommandBuffer* command_buffer, + cpu::IMmu* mmu, const NvGraphicsBuffer& buff) { + std::lock_guard texture_cache_lock(renderer->GetTextureCache().GetMutex()); + LOG_DEBUG(Gpu, "Map id: {}, width: {}, " "height: {}", @@ -89,12 +92,13 @@ renderer::TextureBase* Gpu::GetTexture(cpu::IMmu* mmu, // TODO: why are there more planes? renderer::TextureDescriptor descriptor( mmu->UnmapAddr(GetMap(buff.nvmap_id).addr + buff.planes[0].offset), + renderer::TextureType::_2D, renderer::to_texture_format(buff.planes[0].color_format), - buff.planes[0].kind, buff.planes[0].width, buff.planes[0].height, + buff.planes[0].kind, buff.planes[0].width, buff.planes[0].height, 1, buff.planes[0].block_height_log2, buff.planes[0].pitch); - return renderer->GetTextureCache().GetTextureView( - descriptor, renderer::TextureUsage::Present); + return renderer->GetTextureCache().Find(command_buffer, descriptor, + renderer::TextureUsage::Present); } } // namespace hydra::hw::tegra_x1::gpu diff --git a/src/core/hw/tegra_x1/gpu/gpu.hpp b/src/core/hw/tegra_x1/gpu/gpu.hpp index 3b86d548..a1a17f7f 100644 --- a/src/core/hw/tegra_x1/gpu/gpu.hpp +++ b/src/core/hw/tegra_x1/gpu/gpu.hpp @@ -18,6 +18,10 @@ class IMmu; namespace hydra::hw::tegra_x1::gpu { +namespace renderer { +class ICommandBuffer; +} + struct MemoryMap { uptr addr = 0; usize size; @@ -28,6 +32,9 @@ struct MemoryMap { constexpr usize SUBCHANNEL_COUNT = 5; // TODO: correct? +inline thread_local GMmu* tls_crnt_gmmu = nullptr; +inline thread_local renderer::ICommandBuffer* tls_crnt_command_buffer = nullptr; + class Gpu { public: static Gpu& GetInstance(); @@ -80,14 +87,15 @@ class Gpu { return engine; } - void SubchannelMethod(GMmu& gmmu, u32 subchannel, u32 method, u32 arg); + void SubchannelMethod(u32 subchannel, u32 method, u32 arg); - void SubchannelFlushMacro(GMmu& gmmu, u32 subchannel) { - GetEngineAtSubchannel(subchannel)->FlushMacro(gmmu); + void SubchannelFlushMacro(u32 subchannel) { + GetEngineAtSubchannel(subchannel)->FlushMacro(); } // Texture - renderer::TextureBase* GetTexture(cpu::IMmu* mmu, + renderer::TextureBase* GetTexture(renderer::ICommandBuffer* command_buffer, + cpu::IMmu* mmu, const NvGraphicsBuffer& buff); // Getters diff --git a/src/core/hw/tegra_x1/gpu/macro/driver_base.cpp b/src/core/hw/tegra_x1/gpu/macro/driver_base.cpp index 730b9971..ba2eb2c3 100644 --- a/src/core/hw/tegra_x1/gpu/macro/driver_base.cpp +++ b/src/core/hw/tegra_x1/gpu/macro/driver_base.cpp @@ -4,8 +4,8 @@ namespace hydra::hw::tegra_x1::gpu::macro { -void DriverBase::Execute(GMmu& gmmu) { - ExecuteImpl(gmmu, start_address_ram[index], param1); +void DriverBase::Execute() { + ExecuteImpl(start_address_ram[index], param1); // TODO: what should happen when there are still parameters in the queue? if (!param_queue.empty()) { @@ -31,7 +31,7 @@ void DriverBase::LoadStartAddressRam(u32 data) { start_address_ram[start_address_ram_ptr++] = data; } -bool DriverBase::ParseInstruction(GMmu& gmmu, u32 pc) { +bool DriverBase::ParseInstruction(u32 pc) { u32 instruction = instruction_ram[pc]; // LOG_DEBUG(Macro, "PC: 0x{:08x}, instruction: 0x{:08x}", pc, instruction); @@ -110,7 +110,7 @@ bool DriverBase::ParseInstruction(GMmu& gmmu, u32 pc) { ResultOperation result_op = static_cast(GET_DATA_U32(4, 3)); u8 rD = GET_REG(8); - InstResult(gmmu, result_op, rD, value); + InstResult(result_op, rD, value); } // Check if exit @@ -128,8 +128,8 @@ void DriverBase::SetMethod(u32 value) { increment = (value >> 12) & 0x3f; } -void DriverBase::Send(GMmu& gmmu, u32 arg) { - engine_3d->Method(gmmu, method, arg); +void DriverBase::Send(u32 arg) { + engine_3d->Method(method, arg); method += increment; } diff --git a/src/core/hw/tegra_x1/gpu/macro/driver_base.hpp b/src/core/hw/tegra_x1/gpu/macro/driver_base.hpp index 2613710e..00e1babc 100644 --- a/src/core/hw/tegra_x1/gpu/macro/driver_base.hpp +++ b/src/core/hw/tegra_x1/gpu/macro/driver_base.hpp @@ -22,7 +22,7 @@ class DriverBase { DriverBase(engines::ThreeD* engine_3d_) : engine_3d{engine_3d_} {} virtual ~DriverBase() = default; - void Execute(GMmu& gmmu); + void Execute(); void LoadInstructionRamPointer(u32 ptr); void LoadInstructionRam(u32 data); @@ -34,7 +34,7 @@ class DriverBase { void LoadParam(u32 data) { param_queue.push(data); } protected: - virtual void ExecuteImpl(GMmu& gmmu, u32 pc, u32 param1) = 0; + virtual void ExecuteImpl(u32 pc, u32 param1) = 0; virtual u32 InstAlu(AluOperation op, u8 rA, u8 rB) = 0; virtual u32 InstAddImmediate(u8 rA, i32 imm) = 0; @@ -44,10 +44,9 @@ class DriverBase { virtual u32 InstRead(u8 rA, u32 imm) = 0; virtual void InstBranch(BranchCondition cond, u8 rA, i32 imm, bool& branched) = 0; - virtual void InstResult(GMmu& gmmu, ResultOperation op, u8 rD, - u32 value) = 0; + virtual void InstResult(ResultOperation op, u8 rD, u32 value) = 0; - bool ParseInstruction(GMmu& gmmu, u32 pc); + bool ParseInstruction(u32 pc); u32 FetchParam() { ASSERT_DEBUG(!param_queue.empty(), Macro, "Parameter queue is empty"); @@ -60,7 +59,7 @@ class DriverBase { u32 Get3DReg(u32 reg_3d); void SetMethod(u32 value); - void Send(GMmu& gmmu, u32 arg); + void Send(u32 arg); private: engines::ThreeD* engine_3d; diff --git a/src/core/hw/tegra_x1/gpu/macro/interpreter/driver.cpp b/src/core/hw/tegra_x1/gpu/macro/interpreter/driver.cpp index 98727bd1..8843b466 100644 --- a/src/core/hw/tegra_x1/gpu/macro/interpreter/driver.cpp +++ b/src/core/hw/tegra_x1/gpu/macro/interpreter/driver.cpp @@ -2,12 +2,12 @@ namespace hydra::hw::tegra_x1::gpu::macro::interpreter { -void Driver::ExecuteImpl(GMmu& gmmu, u32 pc_, u32 param1) { +void Driver::ExecuteImpl(u32 pc_, u32 param1) { pc = pc_; SetRegU32(1, param1); while (true) { - if (ParseInstruction(gmmu, pc)) + if (ParseInstruction(pc)) break; if (pc == branch_after) { @@ -130,7 +130,7 @@ void Driver::InstBranch(BranchCondition cond, u8 rA, i32 imm, bool& branched) { } } -void Driver::InstResult(GMmu& gmmu, ResultOperation op, u8 rD, u32 value) { +void Driver::InstResult(ResultOperation op, u8 rD, u32 value) { LOG_DEBUG(Macro, "result op: {}, r{}, value: 0x{:08x}", op, rD, value); switch (op) { @@ -146,11 +146,11 @@ void Driver::InstResult(GMmu& gmmu, ResultOperation op, u8 rD, u32 value) { break; case ResultOperation::FetchAndSend: SetRegU32(rD, FetchParam()); - Send(gmmu, value); + Send(value); break; case ResultOperation::MoveAndSend: SetRegU32(rD, value); - Send(gmmu, value); + Send(value); break; case ResultOperation::FetchAndSetMethod: SetRegU32(rD, FetchParam()); @@ -159,12 +159,12 @@ void Driver::InstResult(GMmu& gmmu, ResultOperation op, u8 rD, u32 value) { case ResultOperation::MoveAndSetMethodFetchAndSend: SetRegU32(rD, value); SetMethod(value); - Send(gmmu, FetchParam()); + Send(FetchParam()); break; case ResultOperation::MoveAndSetMethodSend: SetRegU32(rD, value); SetMethod(value); - Send(gmmu, (value >> 12) & 0x3f); + Send((value >> 12) & 0x3f); break; } } diff --git a/src/core/hw/tegra_x1/gpu/macro/interpreter/driver.hpp b/src/core/hw/tegra_x1/gpu/macro/interpreter/driver.hpp index cf677d69..89da8b02 100644 --- a/src/core/hw/tegra_x1/gpu/macro/interpreter/driver.hpp +++ b/src/core/hw/tegra_x1/gpu/macro/interpreter/driver.hpp @@ -9,7 +9,7 @@ class Driver : public DriverBase { Driver(engines::ThreeD* engine_3d) : DriverBase(engine_3d) {} protected: - void ExecuteImpl(GMmu& gmmu, u32 pc_, u32 param1) override; + void ExecuteImpl(u32 pc_, u32 param1) override; u32 InstAlu(AluOperation op, u8 rA, u8 rB) override; u32 InstAddImmediate(u8 rA, i32 imm) override; @@ -19,7 +19,7 @@ class Driver : public DriverBase { u32 InstRead(u8 rA, u32 imm) override; void InstBranch(BranchCondition cond, u8 rA, i32 imm, bool& branched) override; - void InstResult(GMmu& gmmu, ResultOperation op, u8 rD, u32 value) override; + void InstResult(ResultOperation op, u8 rD, u32 value) override; private: u32 pc; diff --git a/src/core/hw/tegra_x1/gpu/memory_util.cpp b/src/core/hw/tegra_x1/gpu/memory_util.cpp index 88aa19dc..6ba8274c 100644 --- a/src/core/hw/tegra_x1/gpu/memory_util.cpp +++ b/src/core/hw/tegra_x1/gpu/memory_util.cpp @@ -5,14 +5,14 @@ namespace hydra::hw::tegra_x1::gpu { namespace { template -void process_generic_16bx2(u32 stride, u32 height, u32 block_height_log2, +void process_generic_16bx2(u32 stride, u32 rows, u32 block_height_log2, u8* encoded, u8* decoded) { const auto block_height_gobs = 1u << block_height_log2; const auto block_height_px = 8u << block_height_log2; - const auto width_blocks = stride >> 6; - const auto height_blocks = - (height + block_height_px - 1) >> (3 + block_height_log2); + const auto horizontal_blocks = stride >> 6; + const auto vertical_blocks = + (rows + block_height_px - 1) >> (3 + block_height_log2); // Clear the output buffer first // TODO: is this necessary? @@ -20,12 +20,12 @@ void process_generic_16bx2(u32 stride, u32 height, u32 block_height_log2, constexpr usize BLOCK_SIZE = 32; - for (u32 block_y = 0; block_y < height_blocks; block_y++) { - for (u32 block_x = 0; block_x < width_blocks; block_x++) { + for (u32 block_y = 0; block_y < vertical_blocks; block_y++) { + for (u32 block_x = 0; block_x < horizontal_blocks; block_x++) { for (u32 gob_y = 0; gob_y < block_height_gobs; gob_y++) { const u32 x = block_x * 64; const u32 y = block_y * block_height_px + gob_y * 8; - if (y < height) { + if (y < rows) { u8* decoded_gob = (u8*)decoded + y * stride + x; // Reverse the 16Bx2 swizzling for each GOB for (u32 i = 0; i < BLOCK_SIZE; i++) { @@ -54,15 +54,15 @@ void process_generic_16bx2(u32 stride, u32 height, u32 block_height_log2, } // namespace -void encode_generic_16bx2(u32 stride, u32 height, u32 block_height_log2, +void encode_generic_16bx2(u32 stride, u32 rows, u32 block_height_log2, u8* in_data, u8* out_data) { - process_generic_16bx2(stride, height, block_height_log2, out_data, + process_generic_16bx2(stride, rows, block_height_log2, out_data, in_data); } -void decode_generic_16bx2(u32 stride, u32 height, u32 block_height_log2, +void decode_generic_16bx2(u32 stride, u32 rows, u32 block_height_log2, u8* in_data, u8* out_data) { - process_generic_16bx2(stride, height, block_height_log2, in_data, + process_generic_16bx2(stride, rows, block_height_log2, in_data, out_data); } diff --git a/src/core/hw/tegra_x1/gpu/memory_util.hpp b/src/core/hw/tegra_x1/gpu/memory_util.hpp index 50401f1d..3e740846 100644 --- a/src/core/hw/tegra_x1/gpu/memory_util.hpp +++ b/src/core/hw/tegra_x1/gpu/memory_util.hpp @@ -4,10 +4,10 @@ namespace hydra::hw::tegra_x1::gpu { -void encode_generic_16bx2(u32 stride, u32 height, u32 block_height_log2, +void encode_generic_16bx2(u32 stride, u32 rows, u32 block_height_log2, u8* in_data, u8* out_data); -void decode_generic_16bx2(u32 stride, u32 height, u32 block_height_log2, +void decode_generic_16bx2(u32 stride, u32 rows, u32 block_height_log2, u8* in_data, u8* out_data); } // namespace hydra::hw::tegra_x1::gpu diff --git a/src/core/hw/tegra_x1/gpu/pfifo.cpp b/src/core/hw/tegra_x1/gpu/pfifo.cpp index b784f4fd..9c773e5a 100644 --- a/src/core/hw/tegra_x1/gpu/pfifo.cpp +++ b/src/core/hw/tegra_x1/gpu/pfifo.cpp @@ -1,7 +1,9 @@ #include "core/hw/tegra_x1/gpu/pfifo.hpp" +#include "core/debugger/debugger_manager.hpp" #include "core/hw/tegra_x1/gpu/const.hpp" #include "core/hw/tegra_x1/gpu/gpu.hpp" +#include "core/hw/tegra_x1/gpu/renderer/command_buffer.hpp" namespace hydra::hw::tegra_x1::gpu { @@ -57,23 +59,74 @@ struct CommandHeader { SecondaryOpcode secondary_opcode : 3; }; +template +T Read(uptr& gpu_addr) { + T word = tls_crnt_gmmu->Load(gpu_addr); + gpu_addr += sizeof(T); + + return word; +} + } // namespace -void Pfifo::SubmitEntries(GMmu& gmmu, const std::vector& entries, +Pfifo::Pfifo() : thread(&Pfifo::ThreadFunc, this) {} + +Pfifo::~Pfifo() { + stop = true; + cond_var.notify_all(); + thread.join(); +} + +void Pfifo::SubmitEntries(GMmu& gmmu, std::span entries, GpfifoFlags flags) { - // TODO: flags - (void)flags; LOG_DEBUG(Gpu, "Flags: {}", flags); - RENDERER_INSTANCE.LockMutex(); - for (const auto& entry : entries) { - SubmitEntry(gmmu, entry); + { + std::lock_guard lock(mutex); + entry_lists.emplace( + gmmu, std::vector(entries.begin(), entries.end()), + flags); + } + + cond_var.notify_all(); +} + +void Pfifo::ThreadFunc() { + DEBUGGER_MANAGER_INSTANCE.GetDebuggerForCurrentProcess().RegisterThisThread( + "GPU thread"); + + std::unique_lock lock(mutex); + while (true) { + cond_var.wait(lock); + if (stop) + break; + + // Process entry lists + while (!entry_lists.empty()) { + const auto entry_list = entry_lists.front(); + entry_lists.pop(); + + lock.unlock(); + + // Entries + // TODO: flags + tls_crnt_gmmu = &entry_list.gmmu; + tls_crnt_command_buffer = RENDERER_INSTANCE.CreateCommandBuffer(); + for (const auto& entry : entry_list.entries) + SubmitEntry(entry); + delete tls_crnt_command_buffer; + tls_crnt_command_buffer = nullptr; + tls_crnt_gmmu = nullptr; + + lock.lock(); + } } - RENDERER_INSTANCE.EndCommandBuffer(); - RENDERER_INSTANCE.UnlockMutex(); + + DEBUGGER_MANAGER_INSTANCE.GetDebuggerForCurrentProcess() + .UnregisterThisThread(); } -void Pfifo::SubmitEntry(GMmu& gmmu, const GpfifoEntry entry) { +void Pfifo::SubmitEntry(const GpfifoEntry entry) { LOG_DEBUG( Gpu, "Gpfifo entry (addr lo: {:#x}, addr hi: {:#x}, size: {:#x}, allow " @@ -88,7 +141,7 @@ void Pfifo::SubmitEntry(GMmu& gmmu, const GpfifoEntry entry) { while (gpu_addr < end) { try { - if (!SubmitCommand(gmmu, gpu_addr)) + if (!SubmitCommand(gpu_addr)) break; } catch (Gpu::GetEngineAtSubchannelError error) { break; @@ -98,8 +151,8 @@ void Pfifo::SubmitEntry(GMmu& gmmu, const GpfifoEntry entry) { } } -bool Pfifo::SubmitCommand(GMmu& gmmu, uptr& gpu_addr) { - const auto header = Read(gmmu, gpu_addr); +bool Pfifo::SubmitCommand(uptr& gpu_addr) { + const auto header = Read(gpu_addr); LOG_DEBUG( Gpu, "Method: {:#x}, subchannel: {}, arg: {:#x}, secondary opcode: {}", header.method, header.subchannel, header.arg, header.secondary_opcode); @@ -123,7 +176,7 @@ bool Pfifo::SubmitCommand(GMmu& gmmu, uptr& gpu_addr) { } case SecondaryOpcode::IncMethod: for (u32 i = 0; i < header.arg; i++) - ProcessMethodArg(gmmu, header.subchannel, gpu_addr, offset, true); + ProcessMethodArg(header.subchannel, gpu_addr, offset, true); break; case SecondaryOpcode::Grp2UseTert: { const auto tert = static_cast(header.arg & 0x3); @@ -136,15 +189,15 @@ bool Pfifo::SubmitCommand(GMmu& gmmu, uptr& gpu_addr) { } case SecondaryOpcode::NonIncMethod: for (u32 i = 0; i < header.arg; i++) - ProcessMethodArg(gmmu, header.subchannel, gpu_addr, offset, false); + ProcessMethodArg(header.subchannel, gpu_addr, offset, false); break; case SecondaryOpcode::ImmDataMethod: - Gpu::GetInstance().SubchannelMethod(gmmu, header.subchannel, offset, + Gpu::GetInstance().SubchannelMethod(header.subchannel, offset, header.arg); break; case SecondaryOpcode::OneInc: for (u32 i = 0; i < header.arg; i++) - ProcessMethodArg(gmmu, header.subchannel, gpu_addr, offset, i == 0); + ProcessMethodArg(header.subchannel, gpu_addr, offset, i == 0); break; default: LOG_NOT_IMPLEMENTED(Gpu, "Secondary opcode {}", @@ -155,15 +208,15 @@ bool Pfifo::SubmitCommand(GMmu& gmmu, uptr& gpu_addr) { // TODO: is it okay to prefetch the parameters and then execute the // macro? if (header.method >= MACRO_METHODS_REGION) - Gpu::GetInstance().SubchannelFlushMacro(gmmu, header.subchannel); + Gpu::GetInstance().SubchannelFlushMacro(header.subchannel); return true; } -void Pfifo::ProcessMethodArg(GMmu& gmmu, u32 subchannel, uptr& gpu_addr, - u32& method, bool increment) { - u32 arg = Read(gmmu, gpu_addr); - Gpu::GetInstance().SubchannelMethod(gmmu, subchannel, method, arg); +void Pfifo::ProcessMethodArg(u32 subchannel, uptr& gpu_addr, u32& method, + bool increment) { + u32 arg = Read(gpu_addr); + Gpu::GetInstance().SubchannelMethod(subchannel, method, arg); if (increment) method++; } diff --git a/src/core/hw/tegra_x1/gpu/pfifo.hpp b/src/core/hw/tegra_x1/gpu/pfifo.hpp index 44e56532..eb81f439 100644 --- a/src/core/hw/tegra_x1/gpu/pfifo.hpp +++ b/src/core/hw/tegra_x1/gpu/pfifo.hpp @@ -11,27 +11,37 @@ namespace hydra::hw::tegra_x1::gpu { class GMmu; +struct GpfifoEntryList { + GMmu& gmmu; + std::vector entries; + GpfifoFlags flags; +}; + class Pfifo { public: - // TODO: use std::span instead - void SubmitEntries(GMmu& gmmu, const std::vector& entries, + Pfifo(); + ~Pfifo(); + + void SubmitEntries(GMmu& gmmu, std::span entries, GpfifoFlags flags); private: - void SubmitEntry(GMmu& gmmu, const GpfifoEntry entry); - bool SubmitCommand(GMmu& gmmu, uptr& gpu_addr); // TODO: return void + std::mutex mutex; + std::condition_variable cond_var; - // Helpers - template - T Read(GMmu& gmmu, uptr& gpu_addr) { - T word = gmmu.Load(gpu_addr); - gpu_addr += sizeof(T); + std::queue entry_lists; + bool stop{false}; - return word; - } + std::thread thread; // TODO: jthread - void ProcessMethodArg(GMmu& gmmu, u32 subchannel, uptr& gpu_addr, - u32& method, bool increment); + void ThreadFunc(); + + void SubmitEntry(const GpfifoEntry entry); + bool SubmitCommand(uptr& gpu_addr); // TODO: return void + + // Helpers + void ProcessMethodArg(u32 subchannel, uptr& gpu_addr, u32& method, + bool increment); }; } // namespace hydra::hw::tegra_x1::gpu diff --git a/src/core/hw/tegra_x1/gpu/renderer/buffer_base.hpp b/src/core/hw/tegra_x1/gpu/renderer/buffer_base.hpp index 2b29f05a..e3e1822c 100644 --- a/src/core/hw/tegra_x1/gpu/renderer/buffer_base.hpp +++ b/src/core/hw/tegra_x1/gpu/renderer/buffer_base.hpp @@ -4,24 +4,44 @@ namespace hydra::hw::tegra_x1::gpu::renderer { +class ICommandBuffer; class TextureBase; class BufferBase { public: - BufferBase(const BufferDescriptor& descriptor_) : descriptor{descriptor_} {} + BufferBase(u64 size_) : size{size_} {} virtual ~BufferBase() = default; - // Copying - virtual void CopyFrom(const uptr data) = 0; - virtual void CopyFrom(BufferBase* src) = 0; - virtual void CopyFrom(TextureBase* src, const uint3 src_origin, - const uint3 src_size) = 0; + virtual uptr GetPtr() const = 0; - // Getters - const BufferDescriptor& GetDescriptor() const { return descriptor; } + // Copying + void CopyFrom(const uptr data, u64 dst_offset = 0, + u64 size_ = invalid()) { + if (size_ == invalid()) + size_ = size - dst_offset; + CopyFromImpl(data, dst_offset, size_); + } + void CopyFrom(ICommandBuffer* command_buffer, BufferBase* src, + u64 dst_offset = 0, u64 src_offset = 0, + u64 size_ = invalid()) { + if (size_ == invalid()) + size_ = std::min(src->GetSize() - src_offset, size - dst_offset); + CopyFromImpl(command_buffer, src, dst_offset, src_offset, size_); + } + virtual void CopyFrom(ICommandBuffer* command_buffer, TextureBase* src, + const uint3 src_origin, const uint3 src_size, + u64 dst_offset = 0) = 0; protected: - const BufferDescriptor descriptor; + u64 size; + + // Copying + virtual void CopyFromImpl(const uptr data, u64 dst_offset, u64 size) = 0; + virtual void CopyFromImpl(ICommandBuffer* command_buffer, BufferBase* src, + u64 dst_offset, u64 src_offset, u64 size) = 0; + + public: + GETTER(size, GetSize); }; } // namespace hydra::hw::tegra_x1::gpu::renderer diff --git a/src/core/hw/tegra_x1/gpu/renderer/buffer_cache.cpp b/src/core/hw/tegra_x1/gpu/renderer/buffer_cache.cpp index 60f64078..64e5c6fb 100644 --- a/src/core/hw/tegra_x1/gpu/renderer/buffer_cache.cpp +++ b/src/core/hw/tegra_x1/gpu/renderer/buffer_cache.cpp @@ -5,28 +5,115 @@ namespace hydra::hw::tegra_x1::gpu::renderer { -BufferBase* BufferCache::Create(const BufferDescriptor& descriptor) { - auto texture = RENDERER_INSTANCE.CreateBuffer(descriptor); - // TODO: upload buffer +BufferCache::~BufferCache() { + for (auto& entry : entries) + delete entry.second.buffer; +} + +BufferView BufferCache::Get(ICommandBuffer* command_buffer, Range range) { + auto& entry = Find(range); + if (entry.buffer) { + // Check for memory invalidation + if (entry.invalidation_range.has_value() && + entry.invalidation_range->Intersects(range)) { + const auto invalidation_range = entry.invalidation_range.value(); + UpdateRange(command_buffer, entry, invalidation_range); + entry.invalidation_range = std::nullopt; + } + } else { + // Create new buffer + entry.buffer = RENDERER_INSTANCE.CreateBuffer(entry.range.GetSize()); + UpdateRange(command_buffer, entry, entry.range); + } - return texture; + return BufferView(entry.buffer, range.GetBegin() - entry.range.GetBegin(), + range.GetSize()); } -void BufferCache::Update(BufferBase* buffer) { - (void)buffer; +void BufferCache::InvalidateMemory(Range range) { + auto it = entries.upper_bound(range.GetBegin()); + if (it != entries.begin()) + it--; - // TODO: if data changed - if (false) - ; // TODO: upload buffer + while (it != entries.end() && + it->second.range.GetBegin() < range.GetEnd()) { + auto& entry = it->second; + if (entry.range.GetEnd() > range.GetBegin()) { + const auto invalidation_range = range.ClampedTo(entry.range); + if (entry.invalidation_range.has_value()) { + // Combine with an existing invalidation range if it exists + entry.invalidation_range = + entry.invalidation_range.value().Union(invalidation_range); + } else { + // Clamp the range + entry.invalidation_range = invalidation_range; + } + } + it++; + } } -u32 BufferCache::Hash(const BufferDescriptor& descriptor) { - HashCode hash; - hash.Add(descriptor.ptr); - hash.Add(descriptor.size); - return hash.ToHashCode(); +void BufferCache::UpdateRange(ICommandBuffer* command_buffer, + BufferEntry& entry, Range range) { + if (entry.inline_copy) { + // Do an inline update if possible + entry.buffer->CopyFrom(range.GetBegin(), + range.GetBegin() - entry.range.GetBegin(), + range.GetSize()); + entry.inline_copy = false; + } else { + // Copy from a temporary buffer + auto tmp_buffer = + RENDERER_INSTANCE.AllocateTemporaryBuffer(range.GetSize()); + tmp_buffer->CopyFrom(range.GetBegin()); + entry.buffer->CopyFrom(command_buffer, tmp_buffer, + range.GetBegin() - entry.range.GetBegin(), 0, + range.GetSize()); + RENDERER_INSTANCE.FreeTemporaryBuffer(tmp_buffer); + } } -void BufferCache::DestroyElement(BufferBase* buffer) { delete buffer; } +BufferEntry& BufferCache::Find(Range range) { + // Check for containing interval + auto it = entries.upper_bound(range.GetBegin()); + if (it != entries.begin()) { + auto prev = std::prev(it); + if (prev->second.range.GetEnd() >= range.GetEnd()) { + // Fully contained + return prev->second; + } + } + + // Insert and merge + auto new_range = range; + + it = entries.lower_bound(range.GetBegin()); + + // Merge with previous if overlapping/touching + if (it != entries.begin()) { + auto prev = std::prev(it); + if (prev->second.range.GetEnd() >= new_range.GetBegin()) { + new_range = Range( + prev->second.range.GetBegin(), + std::max(new_range.GetEnd(), prev->second.range.GetEnd())); + it = entries.erase(prev); + } + } + + // Merge with following entries + while (it != entries.end() && it->first <= new_range.GetEnd()) { + new_range = Range( + new_range.GetBegin(), + std::max(new_range.GetEnd(), it->second.range.GetEnd())); + it = entries.erase(it); + } + + // Insert merged interval + auto inserted = + entries.emplace(new_range.GetBegin(), + BufferEntry{.buffer = nullptr, .range = new_range}); + + return inserted.first->second; +} } // namespace hydra::hw::tegra_x1::gpu::renderer diff --git a/src/core/hw/tegra_x1/gpu/renderer/buffer_cache.hpp b/src/core/hw/tegra_x1/gpu/renderer/buffer_cache.hpp index bacd654c..f3dc30f9 100644 --- a/src/core/hw/tegra_x1/gpu/renderer/buffer_cache.hpp +++ b/src/core/hw/tegra_x1/gpu/renderer/buffer_cache.hpp @@ -1,6 +1,6 @@ #pragma once -#include "core/hw/tegra_x1/gpu/renderer/const.hpp" +#include "core/hw/tegra_x1/gpu/renderer/buffer_view.hpp" namespace hydra::hw::tegra_x1::cpu { class IMmu; @@ -8,20 +8,30 @@ class IMmu; namespace hydra::hw::tegra_x1::gpu::renderer { -class BufferBase; +// TODO: also release the buffer +struct BufferEntry { + BufferBase* buffer{nullptr}; + Range range; + std::optional> invalidation_range{}; + bool inline_copy{true}; // TODO: reset to true when not in use +}; -class BufferCache - : public CacheBase { +// TODO: optional data hashing +class BufferCache { public: - void Destroy() {} + ~BufferCache(); - BufferBase* Create(const BufferDescriptor& descriptor); - void Update(BufferBase* buffer); - u32 Hash(const BufferDescriptor& descriptor); + BufferView Get(ICommandBuffer* command_buffer, Range range); - void DestroyElement(BufferBase* buffer); + void InvalidateMemory(Range range); private: + std::map entries; + + // Helpers + static void UpdateRange(ICommandBuffer* command_buffer, BufferEntry& entry, + Range range); + BufferEntry& Find(Range range); }; } // namespace hydra::hw::tegra_x1::gpu::renderer diff --git a/src/core/hw/tegra_x1/gpu/renderer/buffer_view.hpp b/src/core/hw/tegra_x1/gpu/renderer/buffer_view.hpp new file mode 100644 index 00000000..0a8fc35f --- /dev/null +++ b/src/core/hw/tegra_x1/gpu/renderer/buffer_view.hpp @@ -0,0 +1,48 @@ +#pragma once + +#include "core/hw/tegra_x1/gpu/renderer/buffer_base.hpp" + +namespace hydra::hw::tegra_x1::gpu::renderer { + +struct BufferView { + public: + BufferView() = default; + BufferView(BufferBase* base_, u64 offset_ = 0, u64 size_ = invalid()) + : base{base_}, offset{offset_}, size{size_} { + if (size == invalid()) + size = base->GetSize() - offset; + } + + bool IsValid() const { return base != nullptr; } + + uptr GetPtr() const { return base->GetPtr() + offset; } + + // Copying + void CopyFrom(const uptr data, u64 size_ = invalid()) { + if (size_ == invalid()) + size_ = size - offset; + base->CopyFrom(data, offset, size_); + } + void CopyFrom(ICommandBuffer* command_buffer, const BufferView& src, + u64 size_ = invalid()) { + if (size_ == invalid()) + size_ = std::min(src.size - src.offset, size - offset); + base->CopyFrom(command_buffer, src.base, offset, src.offset, size_); + } + void CopyFrom(ICommandBuffer* command_buffer, TextureBase* src, + const uint3 src_origin, const uint3 src_size) { + base->CopyFrom(command_buffer, src, src_origin, src_size, offset); + } + + protected: + BufferBase* base{nullptr}; + u64 offset{0}; + u64 size{0}; + + public: + GETTER(base, GetBase); + GETTER(offset, GetOffset); + GETTER(size, GetSize); +}; + +} // namespace hydra::hw::tegra_x1::gpu::renderer diff --git a/src/core/hw/tegra_x1/gpu/renderer/command_buffer.hpp b/src/core/hw/tegra_x1/gpu/renderer/command_buffer.hpp new file mode 100644 index 00000000..db4c83d0 --- /dev/null +++ b/src/core/hw/tegra_x1/gpu/renderer/command_buffer.hpp @@ -0,0 +1,12 @@ +#pragma once + +#include "core/hw/tegra_x1/gpu/renderer/const.hpp" + +namespace hydra::hw::tegra_x1::gpu::renderer { + +class ICommandBuffer { + public: + virtual ~ICommandBuffer() = default; +}; + +} // namespace hydra::hw::tegra_x1::gpu::renderer diff --git a/src/core/hw/tegra_x1/gpu/renderer/const.cpp b/src/core/hw/tegra_x1/gpu/renderer/const.cpp index 04d7a262..42620896 100644 --- a/src/core/hw/tegra_x1/gpu/renderer/const.cpp +++ b/src/core/hw/tegra_x1/gpu/renderer/const.cpp @@ -2,6 +2,35 @@ namespace hydra::hw::tegra_x1::gpu::renderer { +namespace { + +enum class TextureTypeCompatibility { + _1D, + _1DBuffer, + _2D, + _3D, + Cube, +}; + +static TextureTypeCompatibility ToTextureTypeCompatibility(TextureType type) { + switch (type) { + case TextureType::_1D: + case TextureType::_1DArray: + return TextureTypeCompatibility::_1D; + case TextureType::_1DBuffer: + return TextureTypeCompatibility::_1DBuffer; + case TextureType::_2D: + case TextureType::_2DArray: + case TextureType::_3D: // TODO: 2D arrays aren't compatible with 3D + return TextureTypeCompatibility::_2D; + case TextureType::Cube: + case TextureType::CubeArray: + return TextureTypeCompatibility::Cube; + } +} + +} // namespace + TextureFormat to_texture_format(NvColorFormat color_format) { #define NV_COLOR_FORMAT_CASE(color_format, texture_format) \ case NvColorFormat::color_format: \ @@ -25,43 +54,56 @@ TextureFormat to_texture_format(NvColorFormat color_format) { #undef NV_COLOR_FORMAT_CASE } -TextureFormat to_texture_format(const ImageFormatWord image_format_word) { -#define IMAGE_FORMAT_CASE(img_format, c_r, c_g, c_b, c_a, texture_format) \ +TextureFormat to_texture_format(const ImageFormatWord image_format_word, + bool is_srgb) { +#define IMAGE_FORMAT_CASE_IMPL(img_format, c_r, c_g, c_b, c_a, texture_format, \ + is_srgb_) \ else if (image_format_word.image_format == ImageFormat::img_format && \ image_format_word.component_r == ImageComponent::c_r && \ image_format_word.component_g == ImageComponent::c_g && \ image_format_word.component_b == ImageComponent::c_b && \ - image_format_word.component_a == \ - ImageComponent::c_a) return TextureFormat::texture_format; + image_format_word.component_a == ImageComponent::c_a && \ + is_srgb == is_srgb_) return TextureFormat::texture_format; + +#define IMAGE_FORMAT_CASE(img_format, c_r, c_g, c_b, c_a, texture_format) \ + IMAGE_FORMAT_CASE_IMPL(img_format, c_r, c_g, c_b, c_a, texture_format, \ + false) +#define IMAGE_FORMAT_CASE_SRGB(img_format, c_r, c_g, c_b, c_a, texture_format) \ + IMAGE_FORMAT_CASE_IMPL(img_format, c_r, c_g, c_b, c_a, texture_format, true) // TODO: more formats + // TODO: check if (image_format_word.image_format == ImageFormat::Invalid) return TextureFormat::Invalid; IMAGE_FORMAT_CASE(R16, Float, Float, Float, Float, R16Float) IMAGE_FORMAT_CASE(R32, Float, Float, Float, Float, R32Float) - IMAGE_FORMAT_CASE(ARGB8, Unorm, Unorm, Unorm, Unorm, - RGBA8Unorm) // TODO: why argb? + IMAGE_FORMAT_CASE(ARGB8, Unorm, Unorm, Unorm, Unorm, RGBA8Unorm) + IMAGE_FORMAT_CASE_SRGB(ARGB8, Unorm, Unorm, Unorm, Unorm, RGBA8Unorm_sRGB) IMAGE_FORMAT_CASE(R8, Unorm, Unorm, Unorm, Unorm, R8Unorm) IMAGE_FORMAT_CASE(R16, Unorm, Unorm, Unorm, Unorm, R16Unorm) - IMAGE_FORMAT_CASE(GR8, Unorm, Unorm, Unorm, Unorm, - RG8Unorm) // TODO: correct? + IMAGE_FORMAT_CASE(GR8, Unorm, Unorm, Unorm, Unorm, RG8Unorm) + IMAGE_FORMAT_CASE(GR8, Snorm, Snorm, Snorm, Snorm, RG8Snorm) IMAGE_FORMAT_CASE(RG16, Unorm, Unorm, Unorm, Unorm, RG16Unorm) IMAGE_FORMAT_CASE(RG16, Snorm, Snorm, Snorm, Snorm, RG16Snorm) IMAGE_FORMAT_CASE(RG16, Uint, Uint, Uint, Uint, RG16Uint) IMAGE_FORMAT_CASE(RG16, Sint, Sint, Sint, Sint, RG16Sint) IMAGE_FORMAT_CASE(RG16, Float, Float, Float, Float, RG16Float) IMAGE_FORMAT_CASE(DXT1, Unorm, Unorm, Unorm, Unorm, BC1_RGB) + IMAGE_FORMAT_CASE_SRGB(DXT1, Unorm, Unorm, Unorm, Unorm, BC1_RGB_sRGB) IMAGE_FORMAT_CASE(DXT23, Unorm, Unorm, Unorm, Unorm, BC2_RGBA) + IMAGE_FORMAT_CASE_SRGB(DXT23, Unorm, Unorm, Unorm, Unorm, BC2_RGBA_sRGB) IMAGE_FORMAT_CASE(DXT45, Unorm, Unorm, Unorm, Unorm, BC3_RGBA) + IMAGE_FORMAT_CASE_SRGB(DXT45, Unorm, Unorm, Unorm, Unorm, BC3_RGBA_sRGB) IMAGE_FORMAT_CASE(DXN2, Unorm, Unorm, Unorm, Unorm, BC5_RGUnorm) IMAGE_FORMAT_CASE(DXN2, Snorm, Snorm, Snorm, Snorm, BC5_RGSnorm) IMAGE_FORMAT_CASE(B5G6R5, Unorm, Unorm, Unorm, Unorm, B5G6R5Unorm) - IMAGE_FORMAT_CASE(ABGR4, Unorm, Unorm, Unorm, Unorm, - RGBA4Unorm) // TODO: correct? + IMAGE_FORMAT_CASE(ABGR4, Unorm, Unorm, Unorm, Unorm, RGBA4Unorm) IMAGE_FORMAT_CASE(A1BGR5, Unorm, Unorm, Unorm, Unorm, A1BGR5Unorm) IMAGE_FORMAT_CASE(B10GR11Float, Float, Float, Float, Float, RG11B10Float) IMAGE_FORMAT_CASE(A2BGR10, Unorm, Unorm, Unorm, Unorm, RGB10A2Unorm) IMAGE_FORMAT_CASE(ASTC_2D_4X4, Unorm, Unorm, Unorm, Unorm, ASTC_RGBA_4x4) + IMAGE_FORMAT_CASE_SRGB(ASTC_2D_4X4, Unorm, Unorm, Unorm, Unorm, + ASTC_RGBA_4x4_sRGB) IMAGE_FORMAT_CASE(DXN1, Unorm, Unorm, Unorm, Unorm, BC4_RUnorm) IMAGE_FORMAT_CASE(Z24S8, Uint, Unorm, Unorm, Unorm, Z24Unorm_S8Uint) IMAGE_FORMAT_CASE(Z16, Unorm, Unorm, Unorm, Unorm, Z16Unorm) @@ -71,12 +113,10 @@ TextureFormat to_texture_format(const ImageFormatWord image_format_word) { IMAGE_FORMAT_CASE(RGBA32, Float, Float, Float, Float, RGBA32Float) IMAGE_FORMAT_CASE(BC7U, Unorm, Unorm, Unorm, Unorm, BC7_RGBAUnorm) else { - LOG_NOT_IMPLEMENTED( - Gpu, "Image format {}, components: {}, {}, {}, {}", - image_format_word.image_format, image_format_word.component_r, - image_format_word.component_g, image_format_word.component_b, - image_format_word.component_a); - throw; + LOG_FATAL(Gpu, "Image format {}, components: {}, {}, {}, {}, sRGB: {}", + image_format_word.image_format, image_format_word.component_r, + image_format_word.component_g, image_format_word.component_b, + image_format_word.component_a, is_srgb); } #undef IMAGE_FORMAT_CASE @@ -152,9 +192,7 @@ TextureFormat to_texture_format(ColorSurfaceFormat color_surface_format) { COLOR_SURFACE_FORMAT_CASE(BGRX8UnormUnknownFE, Invalid) COLOR_SURFACE_FORMAT_CASE(Y32UintUnknownFF, Invalid) default: - LOG_NOT_IMPLEMENTED(Gpu, "Color surface format {}", - color_surface_format); - return TextureFormat::Invalid; + LOG_FATAL(Gpu, "Color surface format {}", color_surface_format); } #undef COLOR_SURFACE_FORMAT_CASE @@ -178,16 +216,119 @@ TextureFormat to_texture_format(DepthSurfaceFormat depth_surface_format) { DEPTH_SURFACE_FORMAT_CASE(Z32X8C8X16Float, Invalid) DEPTH_SURFACE_FORMAT_CASE(Z32S8C8X16Float, Invalid) default: - LOG_NOT_IMPLEMENTED(Gpu, "Depth surface format {}", - depth_surface_format); - // TODO: don't throw - throw; - return TextureFormat::Invalid; + LOG_FATAL(Gpu, "Depth surface format {}", depth_surface_format); } #undef DEPTH_SURFACE_FORMAT_CASE } +u32 get_texture_format_bpp(const TextureFormat format) { + switch (format) { + case TextureFormat::Invalid: + throw GetTextureFormatBppError::InvalidFormat; + case TextureFormat::R8Unorm: + case TextureFormat::R8Snorm: + case TextureFormat::R8Uint: + case TextureFormat::R8Sint: + return 1; + case TextureFormat::R16Float: + case TextureFormat::R16Unorm: + case TextureFormat::R16Snorm: + case TextureFormat::R16Uint: + case TextureFormat::R16Sint: + return 2; + case TextureFormat::R32Float: + case TextureFormat::R32Uint: + case TextureFormat::R32Sint: + return 4; + case TextureFormat::RG8Unorm: + case TextureFormat::RG8Snorm: + case TextureFormat::RG8Uint: + case TextureFormat::RG8Sint: + return 2; + case TextureFormat::RG16Float: + case TextureFormat::RG16Unorm: + case TextureFormat::RG16Snorm: + case TextureFormat::RG16Uint: + case TextureFormat::RG16Sint: + return 4; + case TextureFormat::RG32Float: + case TextureFormat::RG32Uint: + case TextureFormat::RG32Sint: + return 8; + case TextureFormat::RGB32Float: + case TextureFormat::RGB32Uint: + case TextureFormat::RGB32Sint: + return 12; + case TextureFormat::RGBA8Unorm: + case TextureFormat::RGBA8Snorm: + case TextureFormat::RGBA8Uint: + case TextureFormat::RGBA8Sint: + case TextureFormat::RGBA8Unorm_sRGB: + case TextureFormat::RGBX8Unorm: + case TextureFormat::RGBX8Snorm: + case TextureFormat::RGBX8Uint: + case TextureFormat::RGBX8Sint: + case TextureFormat::RGBX8Unorm_sRGB: + return 4; + case TextureFormat::RGBA16Float: + case TextureFormat::RGBA16Unorm: + case TextureFormat::RGBA16Snorm: + case TextureFormat::RGBA16Uint: + case TextureFormat::RGBA16Sint: + case TextureFormat::RGBX16Float: + case TextureFormat::RGBX16Unorm: + case TextureFormat::RGBX16Snorm: + case TextureFormat::RGBX16Uint: + case TextureFormat::RGBX16Sint: + return 8; + case TextureFormat::RGBA32Float: + case TextureFormat::RGBA32Uint: + case TextureFormat::RGBA32Sint: + case TextureFormat::RGBX32Float: + case TextureFormat::RGBX32Uint: + case TextureFormat::RGBX32Sint: + return 16; + case TextureFormat::S8Uint: + return 1; + case TextureFormat::Z16Unorm: + return 2; + case TextureFormat::Z24Unorm_X8Uint: + case TextureFormat::Z24Unorm_S8Uint: + return 4; + case TextureFormat::Z32Float: + return 4; + case TextureFormat::Z32Float_X24S8Uint: + return 8; + return 4; + case TextureFormat::RGBA4Unorm: + return 2; + case TextureFormat::RGB5Unorm: + case TextureFormat::RGB5A1Unorm: + case TextureFormat::R5G6B5Unorm: + return 2; + case TextureFormat::RGB10A2Unorm: + case TextureFormat::RGB10A2Uint: + return 4; + case TextureFormat::RG11B10Float: + return 4; + case TextureFormat::E5BGR9Float: + return 4; + case TextureFormat::B5G6R5Unorm: + case TextureFormat::BGR5Unorm: + case TextureFormat::BGR5A1Unorm: + case TextureFormat::A1BGR5Unorm: + return 2; + case TextureFormat::BGRX8Unorm: + case TextureFormat::BGRA8Unorm: + case TextureFormat::BGRX8Unorm_sRGB: + case TextureFormat::BGRA8Unorm_sRGB: + return 4; + default: + throw GetTextureFormatBppError::UnsupportedFormatForBpp; + } +} + u32 get_texture_format_stride(const TextureFormat format, u32 width) { // TODO: check this switch (format) { @@ -834,6 +975,39 @@ get_texture_format_default_swizzle_channels(const TextureFormat format) { #undef SWIZZLE } +u32 TextureDescriptor::GetHash() const { + HashCode hash; + hash.Add(ptr); + hash.Add(width); + hash.Add(height); + hash.Add(depth); + hash.Add(stride); + + hash.Add(ToTextureTypeCompatibility(type)); + + // TODO: get format info from the renderer instead + hash.Add(is_texture_format_compressed(format)); + hash.Add(is_texture_format_depth_or_stencil(format)); + hash.Add(get_texture_format_stride(format, 16)); + + return hash.ToHashCode(); +} + +u32 TextureViewDescriptor::GetHash() const { + HashCode hash; + hash.Add(format); + hash.Add(swizzle_channels.r); + hash.Add(swizzle_channels.g); + hash.Add(swizzle_channels.b); + hash.Add(swizzle_channels.a); + hash.Add(levels.GetBegin()); + hash.Add(levels.GetEnd()); + hash.Add(layers.GetBegin()); + hash.Add(layers.GetEnd()); + + return hash.ToHashCode(); +} + usize get_vertex_format_size(engines::VertexAttribSize size) { switch (size) { case engines::VertexAttribSize::_1x32: diff --git a/src/core/hw/tegra_x1/gpu/renderer/const.hpp b/src/core/hw/tegra_x1/gpu/renderer/const.hpp index bc56451c..faa19844 100644 --- a/src/core/hw/tegra_x1/gpu/renderer/const.hpp +++ b/src/core/hw/tegra_x1/gpu/renderer/const.hpp @@ -7,6 +7,17 @@ namespace hydra::hw::tegra_x1::gpu::renderer { class TextureBase; class ShaderBase; +enum class TextureType { + _1D, + _1DArray, + _1DBuffer, + _2D, + _2DArray, + _3D, + Cube, + CubeArray, +}; + enum class TextureFormat { Invalid, @@ -142,10 +153,17 @@ enum class TextureFormat { }; TextureFormat to_texture_format(NvColorFormat color_format); -TextureFormat to_texture_format(const ImageFormatWord image_format_word); +TextureFormat to_texture_format(const ImageFormatWord image_format_word, + bool is_srgb); TextureFormat to_texture_format(ColorSurfaceFormat color_surface_format); TextureFormat to_texture_format(DepthSurfaceFormat depth_surface_format); +enum class GetTextureFormatBppError { + InvalidFormat, + UnsupportedFormatForBpp, +}; + +u32 get_texture_format_bpp(const TextureFormat format); u32 get_texture_format_stride(const TextureFormat format, u32 width); bool is_texture_format_compressed(const TextureFormat format); bool is_texture_format_depth_or_stencil(const TextureFormat format); @@ -165,6 +183,10 @@ struct SwizzleChannels { ImageSwizzle b : 3; ImageSwizzle a : 3; + SwizzleChannels() + : r{ImageSwizzle::R}, g{ImageSwizzle::G}, b{ImageSwizzle::B}, + a{ImageSwizzle::A} {} + SwizzleChannels(const ImageSwizzle r_, const ImageSwizzle g_, const ImageSwizzle b_, const ImageSwizzle a_) : r{r_}, g{g_}, b{b_}, a{a_} {} @@ -181,78 +203,59 @@ struct SwizzleChannels { SwizzleChannels get_texture_format_default_swizzle_channels(const TextureFormat format); -enum class BlendOperation { - Add = 1, - Sub = 2, - RevSub = 3, - Min = 4, - Max = 5, -}; - -enum class BlendFactor { - Zero = 1, - One = 2, - SrcColor = 3, - InvSrcColor = 4, - SrcAlpha = 5, - InvSrcAlpha = 6, - DstAlpha = 7, - InvDstAlpha = 8, - DstColor = 9, - InvDstColor = 10, - SrcAlphaSaturate = 11, - Src1Color = 16, - InvSrc1Color = 17, - Src1Alpha = 18, - InvSrc1Alpha = 19, - ConstColor = 20, - InvConstColor = 21, - ConstAlpha = 22, - InvConstAlpha = 23, -}; - -struct BufferDescriptor { - uptr ptr; - usize size; -}; - struct TextureDescriptor { uptr ptr; + TextureType type; TextureFormat format; NvKind kind; u32 width; u32 height; + u32 depth; u32 block_height_log2; u32 stride; SwizzleChannels swizzle_channels; // TODO: more - TextureDescriptor(const uptr ptr_, const TextureFormat format_, - const NvKind kind_, const u32 width_, const u32 height_, + TextureDescriptor(const uptr ptr_, const TextureType type_, + const TextureFormat format_, const NvKind kind_, + const u32 width_, const u32 height_, const u32 depth_, const u32 block_height_log2_, const u32 stride_, const SwizzleChannels& swizzle_channels_) - : ptr{ptr_}, format{format_}, kind{kind_}, width{width_}, - height{height_}, block_height_log2{block_height_log2_}, + : ptr{ptr_}, type{type_}, format{format_}, kind{kind_}, width{width_}, + height{height_}, depth{depth_}, block_height_log2{block_height_log2_}, stride{stride_}, swizzle_channels{swizzle_channels_} {} - TextureDescriptor(const uptr ptr_, const TextureFormat format_, - const NvKind kind_, const u32 width_, const u32 height_, + TextureDescriptor(const uptr ptr_, const TextureType type_, + const TextureFormat format_, const NvKind kind_, + const u32 width_, const u32 height_, const u32 depth_, const u32 block_height_log2_, const u32 stride_) : TextureDescriptor( - ptr_, format_, kind_, width_, height_, block_height_log2_, - stride_, get_texture_format_default_swizzle_channels(format_)) {} + ptr_, type_, format_, kind_, width_, height_, depth_, + block_height_log2_, stride_, + get_texture_format_default_swizzle_channels(format_)) {} + + u64 GetLayerSizeInBytes() const { return height * stride; } + u64 GetSizeInBytes() const { return depth * GetLayerSizeInBytes(); } + Range GetRange() const { + return Range::FromSize(ptr, GetSizeInBytes()); + } + + u32 GetHash() const; }; struct TextureViewDescriptor { TextureFormat format; SwizzleChannels swizzle_channels; + Range levels; + Range layers; - u32 GetHash() const { - return (u32)format | ((u32)swizzle_channels.r << 8) | - ((u32)swizzle_channels.g << 11) | - ((u32)swizzle_channels.b << 14) | - ((u32)swizzle_channels.a << 17); - } + TextureViewDescriptor(TextureFormat format_, + SwizzleChannels swizzle_channels_, Range levels_, + Range layers_) + : format{format_}, swizzle_channels{swizzle_channels_}, levels{levels_}, + layers{layers_} {} + + u32 GetHash() const; }; enum class SamplerFilter { @@ -289,6 +292,36 @@ struct SamplerDescriptor { // TODO: more }; +enum class BlendOperation { + Add = 1, + Sub = 2, + RevSub = 3, + Min = 4, + Max = 5, +}; + +enum class BlendFactor { + Zero = 1, + One = 2, + SrcColor = 3, + InvSrcColor = 4, + SrcAlpha = 5, + InvSrcAlpha = 6, + DstAlpha = 7, + InvDstAlpha = 8, + DstColor = 9, + InvDstColor = 10, + SrcAlphaSaturate = 11, + Src1Color = 16, + InvSrc1Color = 17, + Src1Alpha = 18, + InvSrc1Alpha = 19, + ConstColor = 20, + InvConstColor = 21, + ConstAlpha = 22, + InvConstAlpha = 23, +}; + struct RenderTargetDescriptor { TextureBase* texture; bool load_action_clear = false; @@ -324,13 +357,13 @@ enum class ShaderType { }; struct ResourceMapping { - u32 uniform_buffers[UNIFORM_BUFFER_BINDING_COUNT]; + u32 uniform_buffers[CONST_BUFFER_BINDING_COUNT]; // TODO: storage buffers std::map textures; // TODO: images ResourceMapping() { - for (u32 i = 0; i < UNIFORM_BUFFER_BINDING_COUNT; i++) + for (u32 i = 0; i < CONST_BUFFER_BINDING_COUNT; i++) uniform_buffers[i] = invalid(); // TODO: storage buffers // TODO: images @@ -384,6 +417,11 @@ enum class TextureUsage { } // namespace hydra::hw::tegra_x1::gpu::renderer +ENABLE_ENUM_FORMATTING(hydra::hw::tegra_x1::gpu::renderer::TextureType, _1D, + "1D", _1DArray, "1D array", _1DBuffer, "1D buffer", _2D, + "2D", _2DArray, "2D array", _3D, "3D", Cube, "cube", + CubeArray, "cube array") + ENABLE_ENUM_FORMATTING( hydra::hw::tegra_x1::gpu::renderer::TextureFormat, Invalid, "invalid", R8Unorm, "r8unorm", R8Snorm, "r8snorm", R8Uint, "r8uint", R8Sint, "r8sint", diff --git a/src/core/hw/tegra_x1/gpu/renderer/index_cache.cpp b/src/core/hw/tegra_x1/gpu/renderer/index_cache.cpp index d68023d4..a17423dd 100644 --- a/src/core/hw/tegra_x1/gpu/renderer/index_cache.cpp +++ b/src/core/hw/tegra_x1/gpu/renderer/index_cache.cpp @@ -96,10 +96,11 @@ IndexCache::~IndexCache() { RENDERER_INSTANCE.FreeTemporaryBuffer(index_buffer); } -BufferBase* IndexCache::Decode(const IndexDescriptor& descriptor, - engines::IndexType& out_type, - engines::PrimitiveType& out_primitive_type, - u32& out_count) { +BufferView IndexCache::Decode(ICommandBuffer* command_buffer, + const IndexDescriptor& descriptor, + engines::IndexType& out_type, + engines::PrimitiveType& out_primitive_type, + u32& out_count) { #define PRIMITIVE_TYPE_SWITCH(macro, u8_index_macro) \ switch (descriptor.primitive_type) { \ case engines::PrimitiveType::Quads: \ @@ -116,7 +117,11 @@ BufferBase* IndexCache::Decode(const IndexDescriptor& descriptor, u8_index_macro(); \ break; \ } else { \ - return descriptor.src_index_buffer; \ + if (descriptor.mem_range) \ + return RENDERER_INSTANCE.GetBufferCache().Get( \ + command_buffer, *descriptor.mem_range); \ + else \ + return BufferView(); \ } \ } @@ -155,9 +160,9 @@ BufferBase* IndexCache::Decode(const IndexDescriptor& descriptor, index_buffer = RENDERER_INSTANCE.AllocateTemporaryBuffer(out_count * index_size); uptr in_ptr = 0x0; - if (descriptor.src_index_buffer) - in_ptr = descriptor.src_index_buffer->GetDescriptor().ptr; - auto out_ptr = index_buffer->GetDescriptor().ptr; + if (descriptor.mem_range) + in_ptr = descriptor.mem_range->GetBegin(); + auto out_ptr = index_buffer->GetPtr(); #define DECODE(name) decode_##name(in_ptr, out_ptr, out_type, descriptor.count) @@ -168,20 +173,23 @@ BufferBase* IndexCache::Decode(const IndexDescriptor& descriptor, #define DECODE_MACRO_U8_INDEX() \ decode_u8_indices(in_ptr, out_ptr, descriptor.count); - if (descriptor.src_index_buffer == nullptr) { + if (descriptor.mem_range) { PRIMITIVE_TYPE_SWITCH(DECODE_MACRO_AUTO, DECODE_MACRO_AUTO_U8_INDEX) } else { PRIMITIVE_TYPE_SWITCH(DECODE_MACRO, DECODE_MACRO_U8_INDEX) } - return index_buffer; + return BufferView(index_buffer); } // namespace hydra::hw::tegra_x1::gpu::renderer u32 IndexCache::Hash(const IndexDescriptor& descriptor) { HashCode hash; hash.Add(descriptor.type); hash.Add(descriptor.primitive_type); - hash.Add(descriptor.src_index_buffer); + if (descriptor.mem_range) { + hash.Add(descriptor.mem_range->GetBegin()); + hash.Add(descriptor.mem_range->GetEnd()); + } hash.Add(descriptor.count); return hash.ToHashCode(); } diff --git a/src/core/hw/tegra_x1/gpu/renderer/index_cache.hpp b/src/core/hw/tegra_x1/gpu/renderer/index_cache.hpp index 382b146d..d3a211b2 100644 --- a/src/core/hw/tegra_x1/gpu/renderer/index_cache.hpp +++ b/src/core/hw/tegra_x1/gpu/renderer/index_cache.hpp @@ -1,6 +1,6 @@ #pragma once -#include "core/hw/tegra_x1/gpu/renderer/const.hpp" +#include "core/hw/tegra_x1/gpu/renderer/buffer_view.hpp" namespace hydra::hw::tegra_x1::gpu::renderer { @@ -10,17 +10,19 @@ struct IndexDescriptor { engines::IndexType type; engines::PrimitiveType primitive_type; u32 count; - BufferBase* src_index_buffer; + std::optional> mem_range{std::nullopt}; }; +// TODO: memory invalidation class IndexCache { public: ~IndexCache(); - BufferBase* Decode(const IndexDescriptor& descriptor, - engines::IndexType& out_type, - engines::PrimitiveType& out_primitive_type, - u32& out_count); + BufferView Decode(ICommandBuffer* command_buffer, + const IndexDescriptor& descriptor, + engines::IndexType& out_type, + engines::PrimitiveType& out_primitive_type, + u32& out_count); u32 Hash(const IndexDescriptor& descriptor); private: diff --git a/src/core/hw/tegra_x1/gpu/renderer/metal/blit_pipeline_cache.hpp b/src/core/hw/tegra_x1/gpu/renderer/metal/blit_pipeline_cache.hpp index 590da469..c0b5e363 100644 --- a/src/core/hw/tegra_x1/gpu/renderer/metal/blit_pipeline_cache.hpp +++ b/src/core/hw/tegra_x1/gpu/renderer/metal/blit_pipeline_cache.hpp @@ -4,6 +4,12 @@ namespace hydra::hw::tegra_x1::gpu::renderer::metal { +struct BlitParams { + float2 src_offset; + float2 src_scale; + f32 opacity; +}; + struct BlitPipelineDescriptor { MTL::PixelFormat pixel_format; bool transparent; diff --git a/src/core/hw/tegra_x1/gpu/renderer/metal/buffer.cpp b/src/core/hw/tegra_x1/gpu/renderer/metal/buffer.cpp index 45a8e979..b3922725 100644 --- a/src/core/hw/tegra_x1/gpu/renderer/metal/buffer.cpp +++ b/src/core/hw/tegra_x1/gpu/renderer/metal/buffer.cpp @@ -1,56 +1,55 @@ #include "core/hw/tegra_x1/gpu/renderer/metal/buffer.hpp" +#include "core/hw/tegra_x1/gpu/renderer/metal/command_buffer.hpp" #include "core/hw/tegra_x1/gpu/renderer/metal/renderer.hpp" #include "core/hw/tegra_x1/gpu/renderer/metal/texture.hpp" namespace hydra::hw::tegra_x1::gpu::renderer::metal { -Buffer::Buffer(const BufferDescriptor& descriptor) : BufferBase(descriptor) { +Buffer::Buffer(u64 size) : BufferBase(size) { buffer = METAL_RENDERER_INSTANCE.GetDevice()->newBuffer( - reinterpret_cast(descriptor.ptr), descriptor.size, - MTL::ResourceStorageModeShared, nullptr); + size, MTL::ResourceStorageModeShared); } -Buffer::Buffer(MTL::Buffer* buffer_, u32 offset_) - : BufferBase({reinterpret_cast(buffer_->contents()) + offset_, - buffer_->allocatedSize()}), - buffer{buffer_}, offset{offset_} { - owns_buffer = false; -} - -Buffer::~Buffer() { - if (owns_buffer) - buffer->release(); -} - -void Buffer::CopyFrom(const uptr data) { - memcpy((u8*)buffer->contents() + offset, reinterpret_cast(data), - descriptor.size); -} - -void Buffer::CopyFrom(BufferBase* src) { - auto src_impl = static_cast(src); +Buffer::Buffer(MTL::Buffer* buffer_) + : BufferBase(buffer_->allocatedSize()), buffer{buffer_} {} - auto blit_encoder = METAL_RENDERER_INSTANCE.GetBlitCommandEncoder(); - blit_encoder->copyFromBuffer(src_impl->GetBuffer(), src_impl->GetOffset(), - buffer, offset, descriptor.size); -} +Buffer::~Buffer() { buffer->release(); } -void Buffer::CopyFrom(TextureBase* src, const uint3 src_origin, - const uint3 src_size) { +void Buffer::CopyFrom(ICommandBuffer* command_buffer, TextureBase* src, + const uint3 src_origin, const uint3 src_size, + u64 dst_offset) { + const auto command_buffer_impl = + static_cast(command_buffer); auto src_impl = static_cast(src); - auto blit_encoder = METAL_RENDERER_INSTANCE.GetBlitCommandEncoder(); + auto blit_encoder = command_buffer_impl->GetBlitCommandEncoder(); // TODO: bytes per image // TODO: calculate the stride for the Metal pixel format blit_encoder->copyFromTexture( src_impl->GetTexture(), 0, 0, MTL::Origin::Make(src_origin.x(), src_origin.y(), src_origin.z()), MTL::Size::Make(src_size.x(), src_size.y(), src_size.z()), buffer, - offset, + dst_offset, get_texture_format_stride(src_impl->GetDescriptor().format, src_size.x()), 0); } +void Buffer::CopyFromImpl(const uptr data, u64 dst_offset, u64 size_) { + memcpy((u8*)buffer->contents() + dst_offset, reinterpret_cast(data), + size_); +} + +void Buffer::CopyFromImpl(ICommandBuffer* command_buffer, BufferBase* src, + u64 dst_offset, u64 src_offset, u64 size_) { + const auto command_buffer_impl = + static_cast(command_buffer); + auto src_impl = static_cast(src); + + auto blit_encoder = command_buffer_impl->GetBlitCommandEncoder(); + blit_encoder->copyFromBuffer(src_impl->GetBuffer(), src_offset, buffer, + dst_offset, size_); +} + } // namespace hydra::hw::tegra_x1::gpu::renderer::metal diff --git a/src/core/hw/tegra_x1/gpu/renderer/metal/buffer.hpp b/src/core/hw/tegra_x1/gpu/renderer/metal/buffer.hpp index 7660bede..231106ce 100644 --- a/src/core/hw/tegra_x1/gpu/renderer/metal/buffer.hpp +++ b/src/core/hw/tegra_x1/gpu/renderer/metal/buffer.hpp @@ -7,24 +7,29 @@ namespace hydra::hw::tegra_x1::gpu::renderer::metal { class Buffer final : public BufferBase { public: - Buffer(const BufferDescriptor& descriptor); - Buffer(MTL::Buffer* buffer_, u32 offset_); + Buffer(u64 size); + Buffer(MTL::Buffer* buffer_); ~Buffer() override; + uptr GetPtr() const override { + return reinterpret_cast(buffer->contents()); + } + // Copying - void CopyFrom(const uptr data) override; - void CopyFrom(BufferBase* src) override; - void CopyFrom(TextureBase* src, const uint3 src_origin, - const uint3 src_size) override; + void CopyFrom(ICommandBuffer* command_buffer, TextureBase* src, + const uint3 src_origin, const uint3 src_size, + u64 dst_offset) override; private: MTL::Buffer* buffer; - u32 offset{0}; - bool owns_buffer{true}; + + // Copying + void CopyFromImpl(const uptr data, u64 dst_offset, u64 size_) override; + void CopyFromImpl(ICommandBuffer* command_buffer, BufferBase* src, + u64 dst_offset, u64 src_offset, u64 size_) override; public: GETTER(buffer, GetBuffer); - GETTER(offset, GetOffset); }; } // namespace hydra::hw::tegra_x1::gpu::renderer::metal diff --git a/src/core/hw/tegra_x1/gpu/renderer/metal/command_buffer.cpp b/src/core/hw/tegra_x1/gpu/renderer/metal/command_buffer.cpp new file mode 100644 index 00000000..f27bf071 --- /dev/null +++ b/src/core/hw/tegra_x1/gpu/renderer/metal/command_buffer.cpp @@ -0,0 +1,189 @@ +#include "core/hw/tegra_x1/gpu/renderer/metal/command_buffer.hpp" + +namespace hydra::hw::tegra_x1::gpu::renderer::metal { + +CommandBuffer::CommandBuffer(MTL::CommandQueue* command_queue) { + TMP_AUTORELEASE_POOL_BEGIN(); + command_buffer = command_queue->commandBuffer()->retain(); + TMP_AUTORELEASE_POOL_END(); +} + +CommandBuffer::~CommandBuffer() { + EndEncoding(); + command_buffer->commit(); + command_buffer->release(); +} + +MTL::RenderCommandEncoder* CommandBuffer::GetRenderCommandEncoder( + MTL::RenderPassDescriptor* render_pass_descriptor) { + if (render_pass_descriptor == encoder_state.render_pass) + return GetRenderCommandEncoderUnchecked(); + + encoder_state.render_pass = render_pass_descriptor; + encoder_state.render = {}; + + return CreateRenderCommandEncoder(render_pass_descriptor); +} + +MTL::RenderCommandEncoder* CommandBuffer::CreateRenderCommandEncoder( + MTL::RenderPassDescriptor* render_pass_descriptor) { + EndEncoding(); + + TMP_AUTORELEASE_POOL_BEGIN(); + command_encoder = + command_buffer->renderCommandEncoder(render_pass_descriptor)->retain(); + TMP_AUTORELEASE_POOL_END(); + + encoder_type = EncoderType::Render; + encoder_state.render_pass = render_pass_descriptor; + + return GetRenderCommandEncoderUnchecked(); +} + +MTL::BlitCommandEncoder* CommandBuffer::GetBlitCommandEncoder() { + if (encoder_type == EncoderType::Blit) + return GetBlitCommandEncoderUnchecked(); + + EndEncoding(); + + TMP_AUTORELEASE_POOL_BEGIN(); + command_encoder = command_buffer->blitCommandEncoder()->retain(); + TMP_AUTORELEASE_POOL_END(); + + encoder_type = EncoderType::Blit; + + return GetBlitCommandEncoderUnchecked(); +} + +void CommandBuffer::EndEncoding() { + if (encoder_type == EncoderType::None) + return; + + command_encoder->endEncoding(); + command_encoder->release(); + command_encoder = nullptr; + encoder_type = EncoderType::None; + + // Reset the render pass + encoder_state.render_pass = nullptr; +} + +void CommandBuffer::SetRenderPipelineState(MTL::RenderPipelineState* pipeline) { + auto& bound_pipeline = encoder_state.render.pipeline; + if (pipeline == bound_pipeline) + return; + + GetRenderCommandEncoderUnchecked()->setRenderPipelineState(pipeline); + bound_pipeline = pipeline; +} + +void CommandBuffer::SetDepthStencilState( + MTL::DepthStencilState* depth_stencil_state) { + auto& bound_depth_stencil_state = encoder_state.render.depth_stencil_state; + if (depth_stencil_state == bound_depth_stencil_state) + return; + + GetRenderCommandEncoderUnchecked()->setDepthStencilState( + depth_stencil_state); + bound_depth_stencil_state = depth_stencil_state; +} + +void CommandBuffer::SetCullMode(MTL::CullMode cull_mode) { + auto& bound_cull_mode = encoder_state.render.cull_mode; + if (cull_mode == bound_cull_mode) + return; + + GetRenderCommandEncoderUnchecked()->setCullMode(cull_mode); + bound_cull_mode = cull_mode; +} + +void CommandBuffer::SetFrontFaceWinding(MTL::Winding front_face_winding) { + auto& bound_front_face_winding = encoder_state.render.front_face_winding; + if (front_face_winding == bound_front_face_winding) + return; + + GetRenderCommandEncoderUnchecked()->setFrontFacingWinding( + front_face_winding); + bound_front_face_winding = front_face_winding; +} + +void CommandBuffer::SetBuffer(MTL::Buffer* buffer, u64 offset, + ShaderType shader_type, u32 index) { + ASSERT_DEBUG(index < BUFFER_COUNT, MetalRenderer, "Invalid buffer index {}", + index); + + auto& bound_buffer = + encoder_state.render.buffers[static_cast(shader_type)][index]; + if (buffer == bound_buffer.buffer && offset == bound_buffer.offset) + return; + + // TODO: fast path for offset only change + + switch (shader_type) { + case ShaderType::Vertex: + GetRenderCommandEncoderUnchecked()->setVertexBuffer(buffer, offset, + index); + break; + case ShaderType::Fragment: + GetRenderCommandEncoderUnchecked()->setFragmentBuffer(buffer, offset, + index); + break; + default: + LOG_ERROR(MetalRenderer, "Invalid shader type {}", shader_type); + break; + } + bound_buffer.buffer = buffer; + bound_buffer.offset = offset; +} + +void CommandBuffer::SetTexture(MTL::Texture* texture, ShaderType shader_type, + u32 index) { + ASSERT_DEBUG(index < TEXTURE_COUNT, MetalRenderer, + "Invalid texture index {}", index); + + auto& bound_texture = + encoder_state.render.textures[static_cast(shader_type)][index]; + if (texture == bound_texture) + return; + + switch (shader_type) { + case ShaderType::Vertex: + GetRenderCommandEncoderUnchecked()->setVertexTexture(texture, index); + break; + case ShaderType::Fragment: + GetRenderCommandEncoderUnchecked()->setFragmentTexture(texture, index); + break; + default: + LOG_ERROR(MetalRenderer, "Invalid shader type {}", shader_type); + break; + } + bound_texture = texture; +} + +void CommandBuffer::SetSampler(MTL::SamplerState* sampler, + ShaderType shader_type, u32 index) { + ASSERT_DEBUG(index < TEXTURE_COUNT, MetalRenderer, + "Invalid texture index {}", index); + + auto& bound_sampler = + encoder_state.render.samplers[static_cast(shader_type)][index]; + if (sampler == bound_sampler) + return; + + switch (shader_type) { + case ShaderType::Vertex: + GetRenderCommandEncoderUnchecked()->setVertexSamplerState(sampler, + index); + break; + case ShaderType::Fragment: + GetRenderCommandEncoderUnchecked()->setFragmentSamplerState(sampler, + index); + break; + default: + LOG_ERROR(MetalRenderer, "Invalid shader type {}", shader_type); + break; + } + bound_sampler = sampler; +} + +} // namespace hydra::hw::tegra_x1::gpu::renderer::metal diff --git a/src/core/hw/tegra_x1/gpu/renderer/metal/command_buffer.hpp b/src/core/hw/tegra_x1/gpu/renderer/metal/command_buffer.hpp new file mode 100644 index 00000000..cea18461 --- /dev/null +++ b/src/core/hw/tegra_x1/gpu/renderer/metal/command_buffer.hpp @@ -0,0 +1,87 @@ +#pragma once + +#include "core/hw/tegra_x1/gpu/renderer/command_buffer.hpp" +#include "core/hw/tegra_x1/gpu/renderer/metal/const.hpp" + +namespace hydra::hw::tegra_x1::gpu::renderer::metal { + +enum class EncoderType { + None, + Render, + Compute, + Blit, +}; + +struct MtlBufferState { + MTL::Buffer* buffer{nullptr}; + u64 offset{0}; +}; + +struct EncoderRenderState { + MTL::RenderPipelineState* pipeline{nullptr}; + MTL::DepthStencilState* depth_stencil_state{nullptr}; + MTL::CullMode cull_mode{MTL::CullModeNone}; + MTL::Winding front_face_winding{MTL::WindingClockwise}; + std::array, + usize(ShaderType::Count)> + buffers{}; + std::array, + usize(ShaderType::Count)> + textures{}; + std::array, + usize(ShaderType::Count)> + samplers{}; +}; + +struct EncoderState { + MTL::RenderPassDescriptor* render_pass{nullptr}; + EncoderRenderState render{}; +}; + +class CommandBuffer final : public ICommandBuffer { + public: + CommandBuffer(MTL::CommandQueue* command_queue); + ~CommandBuffer() override; + + MTL::RenderCommandEncoder* GetRenderCommandEncoderUnchecked() { + ASSERT_DEBUG(encoder_type == EncoderType::Render, MetalRenderer, + "Render command encoder not active"); + return static_cast(command_encoder); + } + MTL::RenderCommandEncoder* + GetRenderCommandEncoder(MTL::RenderPassDescriptor* render_pass_descriptor); + MTL::RenderCommandEncoder* CreateRenderCommandEncoder( + MTL::RenderPassDescriptor* render_pass_descriptor); + + MTL::BlitCommandEncoder* GetBlitCommandEncoderUnchecked() { + ASSERT_DEBUG(encoder_type == EncoderType::Blit, MetalRenderer, + "Blit command encoder not active"); + return static_cast(command_encoder); + } + MTL::BlitCommandEncoder* GetBlitCommandEncoder(); + + void EndEncoding(); + + // Encoder state setting + void SetRenderPipelineState(MTL::RenderPipelineState* pipeline); + void SetDepthStencilState(MTL::DepthStencilState* depth_stencil_state); + void SetCullMode(MTL::CullMode cull_mode); + void SetFrontFaceWinding(MTL::Winding front_face_winding); + void SetBuffer(MTL::Buffer* buffer, u64 offset, ShaderType shader_type, + u32 index); + void SetTexture(MTL::Texture* texture, ShaderType shader_type, u32 index); + void SetSampler(MTL::SamplerState* sampler, ShaderType shader_type, + u32 index); + + private: + MTL::CommandBuffer* command_buffer{nullptr}; + MTL::CommandEncoder* command_encoder{nullptr}; + EncoderType encoder_type{EncoderType::None}; + + EncoderState encoder_state{}; + + public: + GETTER(command_buffer, GetCommandBuffer); +}; + +} // namespace hydra::hw::tegra_x1::gpu::renderer::metal diff --git a/src/core/hw/tegra_x1/gpu/renderer/metal/const.hpp b/src/core/hw/tegra_x1/gpu/renderer/metal/const.hpp index 5748b9f9..52485fbc 100644 --- a/src/core/hw/tegra_x1/gpu/renderer/metal/const.hpp +++ b/src/core/hw/tegra_x1/gpu/renderer/metal/const.hpp @@ -12,6 +12,17 @@ namespace hydra::hw::tegra_x1::gpu::renderer::metal { +__attribute__((unused)) static inline void StackAutoRelease(void* object) { + (*(NS::Object**)object)->release(); +} + +#define NS_STACK_SCOPED \ + __attribute__((cleanup(StackAutoRelease))) __attribute__((unused)) + +#define TMP_AUTORELEASE_POOL_BEGIN() \ + NS::AutoreleasePool* tmp_pool_ = NS::AutoreleasePool::alloc()->init() +#define TMP_AUTORELEASE_POOL_END() tmp_pool_->release() + // Cast from const char* to NS::String* inline NS::String* ToNSString(const char* str) { return NS::String::string(str, NS::ASCIIStringEncoding); diff --git a/src/core/hw/tegra_x1/gpu/renderer/metal/maxwell_to_mtl.cpp b/src/core/hw/tegra_x1/gpu/renderer/metal/maxwell_to_mtl.cpp index 839194f0..dd9957dd 100644 --- a/src/core/hw/tegra_x1/gpu/renderer/metal/maxwell_to_mtl.cpp +++ b/src/core/hw/tegra_x1/gpu/renderer/metal/maxwell_to_mtl.cpp @@ -2,6 +2,27 @@ namespace hydra::hw::tegra_x1::gpu::renderer::metal { +MTL::TextureType ToMtlTextureType(TextureType type) { + switch (type) { + case TextureType::_1D: + return MTL::TextureType1D; + case TextureType::_1DArray: + return MTL::TextureType1DArray; + case TextureType::_1DBuffer: + return MTL::TextureTypeTextureBuffer; + case TextureType::_2D: + return MTL::TextureType2D; + case TextureType::_2DArray: + return MTL::TextureType2DArray; + case TextureType::_3D: + return MTL::TextureType3D; + case TextureType::Cube: + return MTL::TextureTypeCube; + case TextureType::CubeArray: + return MTL::TextureTypeCubeArray; + } +} + #define PIXEL_FORMAT_ENTRY(format, pixel_format, has_depth, has_stencil, \ component_indices) \ { \ @@ -73,13 +94,14 @@ std::map pixel_format_lut = { COLOR_PIXEL_FORMAT_ENTRY_RGBA(RGBX8Unorm_sRGB, RGBA8Unorm_sRGB), // HACK COLOR_PIXEL_FORMAT_ENTRY_RGBA(RGBA8Unorm_sRGB, RGBA8Unorm_sRGB), COLOR_PIXEL_FORMAT_ENTRY(RGBA4Unorm, ABGR4Unorm, PASS({3, 2, 1, 0})), - COLOR_PIXEL_FORMAT_ENTRY_RGBA(RGB5Unorm, BGR5A1Unorm), // HACK - COLOR_PIXEL_FORMAT_ENTRY_RGBA(R5G6B5Unorm, B5G6R5Unorm), // HACK - COLOR_PIXEL_FORMAT_ENTRY_RGBA(RGB10A2Unorm, RGB10A2Unorm), // HACK - COLOR_PIXEL_FORMAT_ENTRY_RGBA(RGB10A2Uint, RGB10A2Uint), // HACK - COLOR_PIXEL_FORMAT_ENTRY_RGBA(RG11B10Float, RG11B10Float), // HACK - COLOR_PIXEL_FORMAT_ENTRY_RGBA(E5BGR9Float, RGB9E5Float), // HACK - COLOR_PIXEL_FORMAT_ENTRY_RGBA(BC1_RGB, BC1_RGBA), // HACK + COLOR_PIXEL_FORMAT_ENTRY_RGBA(RGB5Unorm, BGR5A1Unorm), // HACK + COLOR_PIXEL_FORMAT_ENTRY(R5G6B5Unorm, B5G6R5Unorm, + PASS({2, 1, 0, 3})), // TODO: correct? + COLOR_PIXEL_FORMAT_ENTRY_RGBA(RGB10A2Unorm, RGB10A2Unorm), + COLOR_PIXEL_FORMAT_ENTRY_RGBA(RGB10A2Uint, RGB10A2Uint), + COLOR_PIXEL_FORMAT_ENTRY_RGBA(RG11B10Float, RG11B10Float), + COLOR_PIXEL_FORMAT_ENTRY_RGBA(E5BGR9Float, RGB9E5Float), // HACK + COLOR_PIXEL_FORMAT_ENTRY_RGBA(BC1_RGB, BC1_RGBA), COLOR_PIXEL_FORMAT_ENTRY_RGBA(BC1_RGBA, BC1_RGBA), COLOR_PIXEL_FORMAT_ENTRY_RGBA(BC2_RGBA, BC2_RGBA), COLOR_PIXEL_FORMAT_ENTRY_RGBA(BC3_RGBA, BC3_RGBA), diff --git a/src/core/hw/tegra_x1/gpu/renderer/metal/maxwell_to_mtl.hpp b/src/core/hw/tegra_x1/gpu/renderer/metal/maxwell_to_mtl.hpp index bda86508..9d4eb8ac 100644 --- a/src/core/hw/tegra_x1/gpu/renderer/metal/maxwell_to_mtl.hpp +++ b/src/core/hw/tegra_x1/gpu/renderer/metal/maxwell_to_mtl.hpp @@ -5,6 +5,8 @@ namespace hydra::hw::tegra_x1::gpu::renderer::metal { +MTL::TextureType ToMtlTextureType(TextureType type); + struct PixelFormatInfo { MTL::PixelFormat pixel_format; bool has_depth{false}; diff --git a/src/core/hw/tegra_x1/gpu/renderer/metal/renderer.cpp b/src/core/hw/tegra_x1/gpu/renderer/metal/renderer.cpp index 6a18c1c5..05c16b8f 100644 --- a/src/core/hw/tegra_x1/gpu/renderer/metal/renderer.cpp +++ b/src/core/hw/tegra_x1/gpu/renderer/metal/renderer.cpp @@ -3,12 +3,14 @@ #include "common/config.hpp" #include "core/hw/tegra_x1/gpu/engines/3d.hpp" #include "core/hw/tegra_x1/gpu/renderer/metal/buffer.hpp" +#include "core/hw/tegra_x1/gpu/renderer/metal/command_buffer.hpp" #include "core/hw/tegra_x1/gpu/renderer/metal/const.hpp" #include "core/hw/tegra_x1/gpu/renderer/metal/maxwell_to_mtl.hpp" #include "core/hw/tegra_x1/gpu/renderer/metal/pipeline.hpp" #include "core/hw/tegra_x1/gpu/renderer/metal/render_pass.hpp" #include "core/hw/tegra_x1/gpu/renderer/metal/sampler.hpp" #include "core/hw/tegra_x1/gpu/renderer/metal/shader.hpp" +#include "core/hw/tegra_x1/gpu/renderer/metal/surface_compositor.hpp" #include "core/hw/tegra_x1/gpu/renderer/metal/texture.hpp" // TODO: define in a separate file @@ -21,16 +23,6 @@ using namespace metal; namespace hydra::hw::tegra_x1::gpu::renderer::metal { -namespace { - -struct BlitParams { - float2 src_offset; - float2 src_scale; - f32 opacity; -}; - -} // namespace - SINGLETON_DEFINE_GET_INSTANCE(Renderer, MetalRenderer) Renderer::Renderer() { @@ -109,15 +101,6 @@ Renderer::Renderer() { gradient, sizeof(gradient) / NULL_TEXTURE_HEIGHT); } - // Clear state - for (u32 shader_type = 0; shader_type < usize(ShaderType::Count); - shader_type++) { - for (u32 i = 0; i < CONST_BUFFER_BINDING_COUNT; i++) - state.uniform_buffers[shader_type][i] = nullptr; - for (u32 i = 0; i < TEXTURE_COUNT; i++) - state.textures[shader_type][i] = {nullptr, nullptr}; - } - // Info info = { .supports_quads_primitive = false, @@ -147,85 +130,30 @@ void Renderer::SetSurface(void* surface) { // TODO: set pixel format } -bool Renderer::AcquireNextSurface() { +ISurfaceCompositor* Renderer::AcquireNextSurface() { + // Drawable if (!layer) - return false; + return nullptr; drawable = layer->nextDrawable(); - return (drawable != nullptr); -} - -void Renderer::BeginSurfaceRenderPass() { - ASSERT_DEBUG(drawable != nullptr, MetalRenderer, "Drawable cannot be null"); - - auto render_pass_descriptor = MTL::RenderPassDescriptor::alloc()->init(); - auto color_attachment = - render_pass_descriptor->colorAttachments()->object(0); - color_attachment->setTexture(drawable->texture()); - color_attachment->setLoadAction(MTL::LoadActionClear); - color_attachment->setClearColor(MTL::ClearColor::Make(0.0, 0.0, 0.0, 1.0)); - color_attachment->setStoreAction(MTL::StoreActionStore); + if (!drawable) + return nullptr; - CreateRenderCommandEncoder(render_pass_descriptor); - render_pass_descriptor->release(); -} - -void Renderer::DrawTextureToSurface(const TextureBase* texture, - const FloatRect2D src_rect, - const FloatRect2D dst_rect, - bool transparent, f32 opacity) { - auto texture_impl = static_cast(texture); - auto encoder = GetRenderCommandEncoderUnchecked(); - - // Draw - encoder->setRenderPipelineState(blit_pipeline_cache->Find( - {drawable->texture()->pixelFormat(), transparent})); - encoder->setViewport(MTL::Viewport{ - (f64)dst_rect.origin.x(), (f64)dst_rect.origin.y(), - (f64)dst_rect.size.x(), (f64)dst_rect.size.y(), 0.0, 1.0}); - - u32 zero = 0; - encoder->setVertexBytes(&zero, sizeof(zero), 0); - - // Src rect - const auto src_width = texture->GetDescriptor().width; - const auto src_height = texture->GetDescriptor().height; - BlitParams params = { - .src_offset = {src_rect.origin.x() / src_width, - src_rect.origin.y() / src_height}, - .src_scale = {src_rect.size.x() / src_width, - src_rect.size.y() / src_height}, - .opacity = opacity, - }; - - encoder->setFragmentBytes(¶ms, sizeof(params), 0); - encoder->setFragmentTexture(texture_impl->GetTexture(), NS::UInteger(0)); - encoder->setFragmentSamplerState(linear_sampler, NS::UInteger(0)); - encoder->drawPrimitives(MTL::PrimitiveTypeTriangle, NS::UInteger(0), - NS::UInteger(3)); -} - -void Renderer::EndSurfaceRenderPass() { EndEncoding(); } - -void Renderer::PresentSurfaceImpl() { - command_buffer->presentDrawable(drawable); + return new SurfaceCompositor(drawable); } -BufferBase* Renderer::CreateBuffer(const BufferDescriptor& descriptor) { - return new Buffer(descriptor); -} +BufferBase* Renderer::CreateBuffer(u64 size) { return new Buffer(size); } -BufferBase* Renderer::AllocateTemporaryBuffer(const u32 size) { +BufferBase* Renderer::AllocateTemporaryBuffer(const u64 size) { // TODO: use a buffer allocator instead auto buffer = device->newBuffer(size, MTL::ResourceStorageModeShared); - return new Buffer(buffer, 0); + return new Buffer(buffer); } void Renderer::FreeTemporaryBuffer(BufferBase* buffer) { auto buffer_impl = static_cast(buffer); // TODO: use a buffer allocator instead - buffer_impl->GetBuffer()->release(); delete buffer_impl; } @@ -237,7 +165,9 @@ SamplerBase* Renderer::CreateSampler(const SamplerDescriptor& descriptor) { return new Sampler(descriptor); } -void Renderer::EndCommandBuffer() { CommitCommandBuffer(); } +ICommandBuffer* Renderer::CreateCommandBuffer() { + return new CommandBuffer(command_queue); +} RenderPassBase* Renderer::CreateRenderPass(const RenderPassDescriptor& descriptor) { @@ -248,8 +178,10 @@ void Renderer::BindRenderPass(const RenderPassBase* render_pass) { state.render_pass = static_cast(render_pass); } -void Renderer::ClearColor(u32 render_target_id, u32 layer, u8 mask, - const uint4 color) { +void Renderer::ClearColor(ICommandBuffer* command_buffer, u32 render_target_id, + u32 layer, u8 mask, const uint4 color) { + const auto command_buffer_impl = + static_cast(command_buffer); auto texture = static_cast(state.render_pass->GetDescriptor() .color_targets[render_target_id] .texture); @@ -265,10 +197,11 @@ void Renderer::ClearColor(u32 render_target_id, u32 layer, u8 mask, ASSERT_DEBUG(layer == 0, MetalRenderer, "Layered clears (layer: {}) not implemented", layer); - auto encoder = GetRenderCommandEncoder(); + auto encoder = GetRenderCommandEncoder(command_buffer_impl); - SetRenderPipelineState(clear_color_pipeline_cache->Find( - {texture->GetPixelFormat(), render_target_id, mask})); + command_buffer_impl->SetRenderPipelineState( + clear_color_pipeline_cache->Find( + {texture->GetPixelFormat(), render_target_id, mask})); // TODO: set viewport and scissor encoder->setVertexBytes(&render_target_id, sizeof(render_target_id), 0); encoder->setFragmentBytes(&color, sizeof(color), 0); @@ -276,7 +209,10 @@ void Renderer::ClearColor(u32 render_target_id, u32 layer, u8 mask, NS::UInteger(3)); } -void Renderer::ClearDepth(u32 layer, const float value) { +void Renderer::ClearDepth(ICommandBuffer* command_buffer, u32 layer, + const float value) { + const auto command_buffer_impl = + static_cast(command_buffer); auto texture = static_cast( state.render_pass->GetDescriptor().depth_stencil_target.texture); @@ -294,11 +230,12 @@ void Renderer::ClearDepth(u32 layer, const float value) { return; } - auto encoder = GetRenderCommandEncoder(); + auto encoder = GetRenderCommandEncoder(command_buffer_impl); - SetRenderPipelineState( + command_buffer_impl->SetRenderPipelineState( clear_depth_pipeline_cache->Find(texture->GetPixelFormat())); - SetDepthStencilState(depth_stencil_state_always_and_write); + command_buffer_impl->SetDepthStencilState( + depth_stencil_state_always_and_write); // TODO: set viewport and scissor struct { u32 layer_id; @@ -309,7 +246,9 @@ void Renderer::ClearDepth(u32 layer, const float value) { NS::UInteger(3)); } -void Renderer::ClearStencil(u32 layer, const u32 value) { +void Renderer::ClearStencil(ICommandBuffer* command_buffer, u32 layer, + const u32 value) { + (void)command_buffer; ONCE(LOG_FUNC_WITH_ARGS_NOT_IMPLEMENTED( MetalRenderer, "layer: {}, value: {:#x}", layer, value)); } @@ -334,24 +273,23 @@ void Renderer::BindPipeline(const PipelineBase* pipeline) { state.pipeline = static_cast(pipeline); } -void Renderer::BindVertexBuffer(BufferBase* buffer, u32 index) { - state.vertex_buffers[index] = static_cast(buffer); +void Renderer::BindVertexBuffer(const BufferView& buffer, u32 index) { + state.vertex_buffers[index] = buffer; } -void Renderer::BindIndexBuffer(BufferBase* index_buffer, +void Renderer::BindIndexBuffer(const BufferView& index_buffer, engines::IndexType index_type) { - state.index_buffer = static_cast(index_buffer); + state.index_buffer = index_buffer; state.index_type = index_type; } -void Renderer::BindUniformBuffer(BufferBase* buffer, ShaderType shader_type, - u32 index) { +void Renderer::BindUniformBuffer(const BufferView& buffer, + ShaderType shader_type, u32 index) { // HACK if (shader_type == ShaderType::Count) return; - state.uniform_buffers[u32(shader_type)][index] = - static_cast(buffer); + state.uniform_buffers[u32(shader_type)][index] = buffer; } void Renderer::BindTexture(TextureBase* texture, SamplerBase* sampler, @@ -364,233 +302,108 @@ void Renderer::BindTexture(TextureBase* texture, SamplerBase* sampler, static_cast(sampler)}; } +void Renderer::UnbindUniformBuffers(ShaderType shader_type) { + // HACK + if (shader_type == ShaderType::Count) + return; + + state.uniform_buffers[u32(shader_type)] = {}; +} + void Renderer::UnbindTextures(ShaderType shader_type) { // HACK if (shader_type == ShaderType::Count) return; - for (u32 i = 0; i < TEXTURE_COUNT; i++) - state.textures[u32(shader_type)][i] = {nullptr, nullptr}; + state.textures[u32(shader_type)] = {}; } -void Renderer::Draw(const engines::PrimitiveType primitive_type, +void Renderer::Draw(ICommandBuffer* command_buffer, + const engines::PrimitiveType primitive_type, const u32 start, const u32 count, const u32 base_instance, const u32 instance_count) { + const auto command_buffer_impl = + static_cast(command_buffer); + // Check for errors if (!CanDraw()) return; - BindDrawState(); + BindDrawState(command_buffer_impl); - auto encoder = GetRenderCommandEncoderUnchecked(); + auto encoder = command_buffer_impl->GetRenderCommandEncoderUnchecked(); // Draw encoder->drawPrimitives(to_mtl_primitive_type(primitive_type), start, count, instance_count, base_instance); } -void Renderer::DrawIndexed(const engines::PrimitiveType primitive_type, +void Renderer::DrawIndexed(ICommandBuffer* command_buffer, + const engines::PrimitiveType primitive_type, const u32 start, const u32 count, const u32 base_vertex, const u32 base_instance, const u32 instance_count) { + const auto command_buffer_impl = + static_cast(command_buffer); + // Check for errors if (!CanDraw()) return; - BindDrawState(); + BindDrawState(command_buffer_impl); - auto encoder = GetRenderCommandEncoderUnchecked(); + auto encoder = command_buffer_impl->GetRenderCommandEncoderUnchecked(); // Draw - auto index_buffer_mtl = state.index_buffer->GetBuffer(); + auto index_buffer_mtl = + static_cast(state.index_buffer.GetBase())->GetBuffer(); // TODO: is start used correctly? - const auto index_buffer_offset = static_cast( - start * engines::get_index_type_size(state.index_type)); + const auto index_buffer_offset = + static_cast(start * + engines::get_index_type_size(state.index_type)) + + state.index_buffer.GetOffset(); encoder->drawIndexedPrimitives(to_mtl_primitive_type(primitive_type), count, to_mtl_index_type(state.index_type), index_buffer_mtl, index_buffer_offset, instance_count, base_vertex, base_instance); } -void Renderer::EnsureCommandBuffer() { - if (!command_buffer) { - command_buffer = command_queue->commandBuffer(); - } -} - -MTL::RenderCommandEncoder* Renderer::GetRenderCommandEncoder() { - auto mtl_render_pass = state.render_pass->GetRenderPassDescriptor(); - if (mtl_render_pass == encoder_state.render_pass) - return GetRenderCommandEncoderUnchecked(); - - encoder_state.render_pass = mtl_render_pass; - encoder_state.render = {}; - - // Reset bindings - for (u32 shader_type = 0; shader_type < usize(ShaderType::Count); - shader_type++) { - for (u32 i = 0; i < BUFFER_COUNT; i++) { - encoder_state.render.buffers[shader_type][i] = nullptr; - } - for (u32 i = 0; i < TEXTURE_COUNT; i++) { - encoder_state.render.textures[shader_type][i] = nullptr; - encoder_state.render.samplers[shader_type][i] = nullptr; - } - } - - return CreateRenderCommandEncoder(mtl_render_pass); -} - -MTL::RenderCommandEncoder* Renderer::CreateRenderCommandEncoder( - MTL::RenderPassDescriptor* render_pass_descriptor) { - EnsureCommandBuffer(); - EndEncoding(); - - command_encoder = - command_buffer->renderCommandEncoder(render_pass_descriptor); - encoder_type = EncoderType::Render; - encoder_state.render_pass = render_pass_descriptor; - - // HACK: bind null textures - for (u32 i = 0; i < TEXTURE_COUNT; i++) { - GetRenderCommandEncoderUnchecked()->setVertexTexture(null_texture, i); - GetRenderCommandEncoderUnchecked()->setFragmentTexture(null_texture, i); - } - - return GetRenderCommandEncoderUnchecked(); +MTL::RenderCommandEncoder* +Renderer::GetRenderCommandEncoder(CommandBuffer* command_buffer) { + return command_buffer->GetRenderCommandEncoder( + state.render_pass->GetRenderPassDescriptor()); } -MTL::BlitCommandEncoder* Renderer::GetBlitCommandEncoder() { - if (encoder_type == EncoderType::Blit) - return GetBlitCommandEncoderUnchecked(); - - EnsureCommandBuffer(); - EndEncoding(); - - command_encoder = command_buffer->blitCommandEncoder(); - encoder_type = EncoderType::Blit; - - return GetBlitCommandEncoderUnchecked(); +void Renderer::SetRenderPipelineState(CommandBuffer* command_buffer) { + command_buffer->SetRenderPipelineState(state.pipeline->GetPipeline()); } -void Renderer::EndEncoding() { - if (encoder_type == EncoderType::None) - return; - - command_encoder->endEncoding(); - // TODO: command encoders are autoreleased. However, we only have one global - // autorelease pool, so the encoders will live in the memory until the - // thread exits. An ideal solution would be to disable autorelease pools - // entirely, but for now we just let the encoders to pollute the memory. - // command_encoder->release(); // TODO: release? - command_encoder = nullptr; - encoder_type = EncoderType::None; - - // Reset the render pass - encoder_state.render_pass = nullptr; -} - -void Renderer::SetRenderPipelineState(MTL::RenderPipelineState* pipeline) { - auto& bound_pipeline = encoder_state.render.pipeline; - if (pipeline == bound_pipeline) - return; - - GetRenderCommandEncoderUnchecked()->setRenderPipelineState(pipeline); - bound_pipeline = pipeline; -} - -void Renderer::SetRenderPipelineState() { - SetRenderPipelineState(state.pipeline->GetPipeline()); -} - -void Renderer::SetDepthStencilState( - MTL::DepthStencilState* depth_stencil_state) { - auto& bound_depth_stencil_state = encoder_state.render.depth_stencil_state; - if (depth_stencil_state == bound_depth_stencil_state) - return; - - GetRenderCommandEncoderUnchecked()->setDepthStencilState( - depth_stencil_state); - bound_depth_stencil_state = depth_stencil_state; -} - -void Renderer::SetDepthStencilState() { +void Renderer::SetDepthStencilState(CommandBuffer* command_buffer) { DepthStencilStateDescriptor descriptor{ .depth_test_enabled = static_cast(REGS_3D.depth_test_enabled), .depth_write_enabled = static_cast(REGS_3D.depth_write_enabled), .depth_compare_op = REGS_3D.depth_compare_op, }; - SetDepthStencilState(depth_stencil_state_cache->Find(descriptor)); -} - -void Renderer::SetCullMode(MTL::CullMode cull_mode) { - auto& bound_cull_mode = encoder_state.render.cull_mode; - if (cull_mode == bound_cull_mode) - return; - - GetRenderCommandEncoderUnchecked()->setCullMode(cull_mode); - bound_cull_mode = cull_mode; -} - -void Renderer::SetFrontFaceWinding(MTL::Winding front_face_winding) { - auto& bound_front_face_winding = encoder_state.render.front_face_winding; - if (front_face_winding == bound_front_face_winding) - return; - - GetRenderCommandEncoderUnchecked()->setFrontFacingWinding( - front_face_winding); - bound_front_face_winding = front_face_winding; -} - -void Renderer::SetCullState() { - /* - if (REGS_3D.cull_face_enabled) { - SetCullMode(ToMtlCullMode(REGS_3D.cull_face_mode)); - SetFrontFaceWinding(ToMtlWinding(REGS_3D.front_face_winding)); - } else { - SetCullMode(MTL::CullModeNone); - } - */ -} - -void Renderer::SetBuffer(MTL::Buffer* buffer, ShaderType shader_type, - u32 index) { - ASSERT_DEBUG(index < BUFFER_COUNT, MetalRenderer, "Invalid buffer index {}", - index); - - auto& bound_buffer = - encoder_state.render.buffers[static_cast(shader_type)][index]; - if (buffer == bound_buffer) - return; - - switch (shader_type) { - case ShaderType::Vertex: - GetRenderCommandEncoderUnchecked()->setVertexBuffer(buffer, 0, index); - break; - case ShaderType::Fragment: - GetRenderCommandEncoderUnchecked()->setFragmentBuffer(buffer, 0, index); - break; - default: - LOG_ERROR(MetalRenderer, "Invalid shader type {}", shader_type); - break; - } - bound_buffer = buffer; + command_buffer->SetDepthStencilState( + depth_stencil_state_cache->Find(descriptor)); } -void Renderer::SetVertexBuffer(u32 index) { +void Renderer::SetVertexBuffer(CommandBuffer* command_buffer, u32 index) { ASSERT_DEBUG(index < VERTEX_ARRAY_COUNT, MetalRenderer, "Invalid vertex buffer index {}", index); const auto buffer = state.vertex_buffers[index]; - if (!buffer) + if (!buffer.GetBase()) return; - SetBuffer(buffer->GetBuffer(), ShaderType::Vertex, - GetVertexBufferIndex(index)); + command_buffer->SetBuffer( + static_cast(buffer.GetBase())->GetBuffer(), buffer.GetOffset(), + ShaderType::Vertex, GetVertexBufferIndex(index)); } -void Renderer::SetUniformBuffer(ShaderType shader_type, u32 index) { +void Renderer::SetUniformBuffer(CommandBuffer* command_buffer, + ShaderType shader_type, u32 index) { // TODO: get the index from resource mapping ASSERT_DEBUG(index < CONST_BUFFER_BINDING_COUNT, MetalRenderer, @@ -598,75 +411,30 @@ void Renderer::SetUniformBuffer(ShaderType shader_type, u32 index) { const auto buffer = state.uniform_buffers[static_cast(shader_type)][index]; - if (!buffer) - return; - - SetBuffer(buffer->GetBuffer(), shader_type, index); -} - -void Renderer::SetTexture(MTL::Texture* texture, ShaderType shader_type, - u32 index) { - ASSERT_DEBUG(index < TEXTURE_COUNT, MetalRenderer, - "Invalid texture index {}", index); - - auto& bound_texture = - encoder_state.render.textures[static_cast(shader_type)][index]; - if (texture == bound_texture) + if (!buffer.GetBase()) return; - switch (shader_type) { - case ShaderType::Vertex: - GetRenderCommandEncoderUnchecked()->setVertexTexture(texture, index); - break; - case ShaderType::Fragment: - GetRenderCommandEncoderUnchecked()->setFragmentTexture(texture, index); - break; - default: - LOG_ERROR(MetalRenderer, "Invalid shader type {}", shader_type); - break; - } - bound_texture = texture; + command_buffer->SetBuffer( + static_cast(buffer.GetBase())->GetBuffer(), buffer.GetOffset(), + shader_type, index); } -void Renderer::SetSampler(MTL::SamplerState* sampler, ShaderType shader_type, +void Renderer::SetTexture(CommandBuffer* command_buffer, ShaderType shader_type, u32 index) { - ASSERT_DEBUG(index < TEXTURE_COUNT, MetalRenderer, - "Invalid texture index {}", index); - - auto& bound_sampler = - encoder_state.render.samplers[static_cast(shader_type)][index]; - if (sampler == bound_sampler) - return; - - switch (shader_type) { - case ShaderType::Vertex: - GetRenderCommandEncoderUnchecked()->setVertexSamplerState(sampler, - index); - break; - case ShaderType::Fragment: - GetRenderCommandEncoderUnchecked()->setFragmentSamplerState(sampler, - index); - break; - default: - LOG_ERROR(MetalRenderer, "Invalid shader type {}", shader_type); - break; - } - bound_sampler = sampler; -} - -void Renderer::SetTexture(ShaderType shader_type, u32 index) { const auto texture = state.textures[u32(shader_type)][index]; if (texture.texture) - SetTexture(texture.texture->GetTexture(), shader_type, index); + command_buffer->SetTexture(texture.texture->GetTexture(), shader_type, + index); if (texture.sampler) - SetSampler(texture.sampler->GetSampler(), shader_type, index); + command_buffer->SetSampler(texture.sampler->GetSampler(), shader_type, + index); } // TODO: what about 3D textures? -void Renderer::BlitTexture(MTL::Texture* src, const float3 src_origin, - const usize3 src_size, MTL::Texture* dst, - const u32 dst_layer, const float3 dst_origin, - const usize3 dst_size) { +void Renderer::BlitTexture(CommandBuffer* command_buffer, MTL::Texture* src, + const float3 src_origin, const usize3 src_size, + MTL::Texture* dst, const u32 dst_layer, + const float3 dst_origin, const usize3 dst_size) { // Render pass auto render_pass_descriptor = MTL::RenderPassDescriptor::alloc()->init(); auto color_attachment = @@ -677,7 +445,8 @@ void Renderer::BlitTexture(MTL::Texture* src, const float3 src_origin, // texture color_attachment->setStoreAction(MTL::StoreActionStore); - auto encoder = CreateRenderCommandEncoder(render_pass_descriptor); + auto encoder = + command_buffer->CreateRenderCommandEncoder(render_pass_descriptor); render_pass_descriptor->release(); // Draw @@ -752,17 +521,11 @@ void Renderer::BeginCapture() { LOG_ERROR(MetalRenderer, "Failed to start GPU capture: {}", error->localizedDescription()->utf8String()); } - - capturing = true; } void Renderer::EndCapture() { - CommitCommandBuffer(); - auto captureManager = MTL::CaptureManager::sharedCaptureManager(); captureManager->stopCapture(); - - capturing = false; } bool Renderer::CanDraw() { @@ -774,13 +537,21 @@ bool Renderer::CanDraw() { return true; } -void Renderer::BindDrawState() { - auto encoder = GetRenderCommandEncoder(); +void Renderer::BindDrawState(CommandBuffer* command_buffer) { + auto encoder = GetRenderCommandEncoder(command_buffer); // States - SetRenderPipelineState(); - SetDepthStencilState(); - SetCullState(); + SetRenderPipelineState(command_buffer); + SetDepthStencilState(command_buffer); + + /* + if (REGS_3D.cull_face_enabled) { + SetCullMode(ToMtlCullMode(REGS_3D.cull_face_mode)); + SetFrontFaceWinding(ToMtlWinding(REGS_3D.front_face_winding)); + } else { + SetCullMode(MTL::CullModeNone); + } + */ // Viewport and scissor MTL::Viewport viewports[VIEWPORT_COUNT]; @@ -803,35 +574,18 @@ void Renderer::BindDrawState() { // Resources for (u32 i = 0; i < VERTEX_ARRAY_COUNT; i++) - SetVertexBuffer(i); + SetVertexBuffer(command_buffer, i); for (u32 shader_type = 0; shader_type < usize(ShaderType::Count); shader_type++) { for (u32 i = 0; i < CONST_BUFFER_BINDING_COUNT; i++) - SetUniformBuffer(ShaderType(shader_type), i); + SetUniformBuffer(command_buffer, ShaderType(shader_type), i); } // TODO: storage buffers for (u32 shader_type = 0; shader_type < usize(ShaderType::Count); shader_type++) { for (u32 i = 0; i < TEXTURE_COUNT; i++) - SetTexture(ShaderType(shader_type), i); - } - - // Debug -#define CAPTURE 0 -#if CAPTURE - static bool did_capture = false; - if (!did_capture) { - BeginCapture(); - did_capture = true; - } - - static u32 frames = 0; - if (capturing) { - if (frames >= 100) - EndCapture(); - frames++; + SetTexture(command_buffer, ShaderType(shader_type), i); } -#endif } } // namespace hydra::hw::tegra_x1::gpu::renderer::metal diff --git a/src/core/hw/tegra_x1/gpu/renderer/metal/renderer.hpp b/src/core/hw/tegra_x1/gpu/renderer/metal/renderer.hpp index 7672c3ee..fbd714c8 100644 --- a/src/core/hw/tegra_x1/gpu/renderer/metal/renderer.hpp +++ b/src/core/hw/tegra_x1/gpu/renderer/metal/renderer.hpp @@ -12,17 +12,16 @@ namespace hydra::hw::tegra_x1::gpu::renderer::metal { +class CommandBuffer; class Buffer; class Texture; class Sampler; class RenderPass; class Pipeline; -enum class EncoderType { - None, - Render, - Compute, - Blit, +struct CombinedTextureSampler { + const Texture* texture{nullptr}; + const Sampler* sampler{nullptr}; }; struct State { @@ -30,33 +29,18 @@ struct State { Viewport viewports[VIEWPORT_COUNT]; Scissor scissors[VIEWPORT_COUNT]; const Pipeline* pipeline{nullptr}; - const Buffer* index_buffer{nullptr}; + BufferView index_buffer{}; engines::IndexType index_type{engines::IndexType::None}; - const Buffer* vertex_buffers[VERTEX_ARRAY_COUNT] = {nullptr}; - const Buffer* uniform_buffers[usize(ShaderType::Count)] - [CONST_BUFFER_BINDING_COUNT]; - struct { - const Texture* texture; - const Sampler* sampler; - } textures[usize(ShaderType::Count)][TEXTURE_BINDING_COUNT]; + std::array vertex_buffers{}; + std::array, + usize(ShaderType::Count)> + uniform_buffers{}; + std::array, + usize(ShaderType::Count)> + textures{}; // TODO: images }; -struct EncoderRenderState { - MTL::RenderPipelineState* pipeline{nullptr}; - MTL::DepthStencilState* depth_stencil_state{nullptr}; - MTL::CullMode cull_mode{MTL::CullModeNone}; - MTL::Winding front_face_winding{MTL::WindingClockwise}; - MTL::Buffer* buffers[usize(ShaderType::Count)][BUFFER_COUNT]; - MTL::Texture* textures[usize(ShaderType::Count)][TEXTURE_COUNT]; - MTL::SamplerState* samplers[usize(ShaderType::Count)][TEXTURE_COUNT]; -}; - -struct EncoderState { - MTL::RenderPassDescriptor* render_pass{nullptr}; - EncoderRenderState render{}; -}; - class Renderer : public RendererBase { public: static Renderer& GetInstance(); @@ -66,19 +50,11 @@ class Renderer : public RendererBase { // Surface void SetSurface(void* surface) override; - - bool AcquireNextSurface() override; - void BeginSurfaceRenderPass() override; - void DrawTextureToSurface(const TextureBase* texture, - const FloatRect2D src_rect, - const FloatRect2D dst_rect, bool transparent, - f32 opacity) override; - void EndSurfaceRenderPass() override; - void PresentSurfaceImpl() override; + ISurfaceCompositor* AcquireNextSurface() override; // Buffer - BufferBase* CreateBuffer(const BufferDescriptor& descriptor) override; - BufferBase* AllocateTemporaryBuffer(const u32 size) override; + BufferBase* CreateBuffer(u64 size) override; + BufferBase* AllocateTemporaryBuffer(const u64 size) override; void FreeTemporaryBuffer(BufferBase* buffer) override; // Texture @@ -88,7 +64,7 @@ class Renderer : public RendererBase { SamplerBase* CreateSampler(const SamplerDescriptor& descriptor) override; // Command buffer - void EndCommandBuffer() override; + ICommandBuffer* CreateCommandBuffer() override; // Render pass RenderPassBase* @@ -96,10 +72,12 @@ class Renderer : public RendererBase { void BindRenderPass(const RenderPassBase* render_pass) override; // Clear - void ClearColor(u32 render_target_id, u32 layer, u8 mask, - const uint4 color) override; - void ClearDepth(u32 layer, const float value) override; - void ClearStencil(u32 layer, const u32 value) override; + void ClearColor(ICommandBuffer* command_buffer, u32 render_target_id, + u32 layer, u8 mask, const uint4 color) override; + void ClearDepth(ICommandBuffer* command_buffer, u32 layer, + const float value) override; + void ClearStencil(ICommandBuffer* command_buffer, u32 layer, + const u32 value) override; // Viewport and scissor void SetViewport(u32 index, const Viewport& viewport) override; @@ -113,81 +91,46 @@ class Renderer : public RendererBase { void BindPipeline(const PipelineBase* pipeline) override; // Resource binding - void BindVertexBuffer(BufferBase* buffer, u32 index) override; - void BindIndexBuffer(BufferBase* index_buffer, + void BindVertexBuffer(const BufferView& buffer, u32 index) override; + void BindIndexBuffer(const BufferView& index_buffer, engines::IndexType index_type) override; - void BindUniformBuffer(BufferBase* buffer, ShaderType shader_type, + void BindUniformBuffer(const BufferView& buffer, ShaderType shader_type, u32 index) override; void BindTexture(TextureBase* texture, SamplerBase* sampler, ShaderType shader_type, u32 index) override; // Resource unbinding + void UnbindUniformBuffers(ShaderType shader_type) override; void UnbindTextures(ShaderType shader_type) override; // Draw - void Draw(const engines::PrimitiveType primitive_type, const u32 start, + void Draw(ICommandBuffer* command_buffer, + const engines::PrimitiveType primitive_type, const u32 start, const u32 count, const u32 base_instance, const u32 instance_count) override; - void DrawIndexed(const engines::PrimitiveType primitive_type, + void DrawIndexed(ICommandBuffer* command_buffer, + const engines::PrimitiveType primitive_type, const u32 start, const u32 count, const u32 base_vertex, const u32 base_instance, const u32 instance_count) override; // Helpers - - // Command buffer - void EnsureCommandBuffer(); - - void CommitCommandBuffer() { - if (command_buffer) { - EndEncoding(); - - command_buffer->commit(); - // HACK: wait until completed so as to avoid sync issues - command_buffer->waitUntilCompleted(); - command_buffer = nullptr; - } - } - - MTL::RenderCommandEncoder* GetRenderCommandEncoderUnchecked() { - ASSERT_DEBUG(encoder_type == EncoderType::Render, MetalRenderer, - "Render command encoder not active"); - return static_cast(command_encoder); - } - MTL::RenderCommandEncoder* GetRenderCommandEncoder(); - MTL::RenderCommandEncoder* CreateRenderCommandEncoder( - MTL::RenderPassDescriptor* render_pass_descriptor); - - MTL::BlitCommandEncoder* GetBlitCommandEncoderUnchecked() { - ASSERT_DEBUG(encoder_type == EncoderType::Blit, MetalRenderer, - "Blit command encoder not active"); - return static_cast(command_encoder); - } - MTL::BlitCommandEncoder* GetBlitCommandEncoder(); - - void EndEncoding(); + MTL::RenderCommandEncoder* + GetRenderCommandEncoder(CommandBuffer* command_buffer); // Encoder state setting - void SetRenderPipelineState(MTL::RenderPipelineState* pipeline); - void SetRenderPipelineState(); - void SetDepthStencilState(MTL::DepthStencilState* depth_stencil_state); - void SetDepthStencilState(); - void SetCullMode(MTL::CullMode cull_mode); - void SetFrontFaceWinding(MTL::Winding front_face_winding); - void SetCullState(); - void SetBuffer(MTL::Buffer* buffer, ShaderType shader_type, u32 index); - void SetVertexBuffer(u32 index); - void SetUniformBuffer(ShaderType shader_type, u32 index); - void SetTexture(MTL::Texture* texture, ShaderType shader_type, u32 index); - void SetSampler(MTL::SamplerState* sampler, ShaderType shader_type, + void SetRenderPipelineState(CommandBuffer* command_buffer); + void SetDepthStencilState(CommandBuffer* command_buffer); + void SetVertexBuffer(CommandBuffer* command_buffer, u32 index); + void SetUniformBuffer(CommandBuffer* command_buffer, ShaderType shader_type, + u32 index); + void SetTexture(CommandBuffer* command_buffer, ShaderType shader_type, u32 index); - void SetTexture(ShaderType shader_type, u32 index); - // Other - void BlitTexture(MTL::Texture* src, const float3 src_origin, - const usize3 src_size, MTL::Texture* dst, - const u32 dst_layer, const float3 dst_origin, - const usize3 dst_size); + void BlitTexture(CommandBuffer* command_buffer, MTL::Texture* src, + const float3 src_origin, const usize3 src_size, + MTL::Texture* dst, const u32 dst_layer, + const float3 dst_origin, const usize3 dst_size); protected: // Capture @@ -219,27 +162,20 @@ class Renderer : public RendererBase { // Null MTL::Texture* null_texture; - // Command buffer - MTL::CommandBuffer* command_buffer{nullptr}; - MTL::CommandEncoder* command_encoder{nullptr}; - EncoderType encoder_type{EncoderType::None}; - // State State state; [[maybe_unused]] u32 padding[0x100]; // HACK: for some reason, writing to some fields of the // encoder_state corrupts the state - EncoderState encoder_state; - - // Debug - bool capturing = false; // Helpers bool CanDraw(); - void BindDrawState(); + void BindDrawState(CommandBuffer* command_buffer); public: GETTER(device, GetDevice); + GETTER(blit_pipeline_cache, GetBlitPipelineCache); + GETTER(linear_sampler, GetLinearSampler); }; } // namespace hydra::hw::tegra_x1::gpu::renderer::metal diff --git a/src/core/hw/tegra_x1/gpu/renderer/metal/shader.cpp b/src/core/hw/tegra_x1/gpu/renderer/metal/shader.cpp index a830b098..9ca756d2 100644 --- a/src/core/hw/tegra_x1/gpu/renderer/metal/shader.cpp +++ b/src/core/hw/tegra_x1/gpu/renderer/metal/shader.cpp @@ -5,6 +5,15 @@ namespace hydra::hw::tegra_x1::gpu::renderer::metal { Shader::Shader(const ShaderDescriptor& descriptor) : ShaderBase(descriptor) { + // Options + NS_STACK_SCOPED MTL::CompileOptions* options = + MTL::CompileOptions::alloc()->init(); + if (false) // TODO: make this configurable + options->setFastMathEnabled(true); + if (true) // TODO: make this configurable + options->setPreserveInvariance(true); + + // Library MTL::Library* library; switch (descriptor.backend) { case ShaderBackend::Msl: { @@ -12,8 +21,15 @@ Shader::Shader(const ShaderDescriptor& descriptor) : ShaderBase(descriptor) { std::string source; source.assign(descriptor.code.begin(), descriptor.code.end()); - library = CreateLibraryFromSource(METAL_RENDERER_INSTANCE.GetDevice(), - source); + NS::Error* error; + library = METAL_RENDERER_INSTANCE.GetDevice()->newLibrary( + ToNSString(source), options, &error); + if (error) { + LOG_ERROR(MetalRenderer, "Failed to create Metal library: {}", + error->localizedDescription()->utf8String()); + error->release(); // TODO: autorelease + return; + } break; } case ShaderBackend::Air: { @@ -24,12 +40,14 @@ Shader::Shader(const ShaderDescriptor& descriptor) : ShaderBase(descriptor) { }); NS::Error* error; + // TODO: options library = METAL_RENDERER_INSTANCE.GetDevice()->newLibrary(dispatch_data, &error); if (error) { LOG_ERROR(MetalRenderer, "Failed to create Metal library: {}", error->localizedDescription()->utf8String()); - error->release(); // TODO: release? + error->release(); // TODO: autorelease + return; } break; } @@ -40,6 +58,7 @@ Shader::Shader(const ShaderDescriptor& descriptor) : ShaderBase(descriptor) { break; } + // Function function = library->newFunction(ToNSString("main_")); library->release(); } diff --git a/src/core/hw/tegra_x1/gpu/renderer/metal/shader.hpp b/src/core/hw/tegra_x1/gpu/renderer/metal/shader.hpp index 4e35f413..3eee9d41 100644 --- a/src/core/hw/tegra_x1/gpu/renderer/metal/shader.hpp +++ b/src/core/hw/tegra_x1/gpu/renderer/metal/shader.hpp @@ -11,7 +11,7 @@ class Shader final : public ShaderBase { ~Shader() override; private: - MTL::Function* function; + MTL::Function* function{nullptr}; public: GETTER(function, GetFunction); diff --git a/src/core/hw/tegra_x1/gpu/renderer/metal/surface_compositor.cpp b/src/core/hw/tegra_x1/gpu/renderer/metal/surface_compositor.cpp new file mode 100644 index 00000000..b3663c6f --- /dev/null +++ b/src/core/hw/tegra_x1/gpu/renderer/metal/surface_compositor.cpp @@ -0,0 +1,73 @@ +#include "core/hw/tegra_x1/gpu/renderer/metal/surface_compositor.hpp" + +#include "core/hw/tegra_x1/gpu/renderer/metal/blit_pipeline_cache.hpp" +#include "core/hw/tegra_x1/gpu/renderer/metal/command_buffer.hpp" +#include "core/hw/tegra_x1/gpu/renderer/metal/renderer.hpp" +#include "core/hw/tegra_x1/gpu/renderer/metal/texture.hpp" + +namespace hydra::hw::tegra_x1::gpu::renderer::metal { + +SurfaceCompositor::SurfaceCompositor(CA::MetalDrawable* drawable_) + : drawable{drawable_} { + // Render pass + render_pass_descriptor = MTL::RenderPassDescriptor::alloc()->init(); + auto color_attachment = + render_pass_descriptor->colorAttachments()->object(0); + color_attachment->setTexture(drawable->texture()); + color_attachment->setLoadAction(MTL::LoadActionClear); + color_attachment->setClearColor(MTL::ClearColor::Make(0.0, 0.0, 0.0, 1.0)); + color_attachment->setStoreAction(MTL::StoreActionStore); +} + +SurfaceCompositor::~SurfaceCompositor() { render_pass_descriptor->release(); } + +void SurfaceCompositor::DrawTexture(ICommandBuffer* command_buffer, + const TextureBase* texture, + const FloatRect2D src_rect, + const FloatRect2D dst_rect, + bool transparent, f32 opacity) { + auto command_buffer_impl = static_cast(command_buffer); + auto texture_impl = static_cast(texture); + + auto encoder = + command_buffer_impl->GetRenderCommandEncoder(render_pass_descriptor); + + // Draw + encoder->setRenderPipelineState( + METAL_RENDERER_INSTANCE.GetBlitPipelineCache()->Find( + {drawable->texture()->pixelFormat(), transparent})); + encoder->setViewport(MTL::Viewport{ + (f64)dst_rect.origin.x(), (f64)dst_rect.origin.y(), + (f64)dst_rect.size.x(), (f64)dst_rect.size.y(), 0.0, 1.0}); + + u32 zero = 0; + encoder->setVertexBytes(&zero, sizeof(zero), 0); + + // Src rect + const auto src_width = texture->GetDescriptor().width; + const auto src_height = texture->GetDescriptor().height; + BlitParams params = { + .src_offset = {src_rect.origin.x() / src_width, + src_rect.origin.y() / src_height}, + .src_scale = {src_rect.size.x() / src_width, + src_rect.size.y() / src_height}, + .opacity = opacity, + }; + + encoder->setFragmentBytes(¶ms, sizeof(params), 0); + encoder->setFragmentTexture(texture_impl->GetTexture(), NS::UInteger(0)); + encoder->setFragmentSamplerState(METAL_RENDERER_INSTANCE.GetLinearSampler(), + NS::UInteger(0)); + encoder->drawPrimitives(MTL::PrimitiveTypeTriangle, NS::UInteger(0), + NS::UInteger(3)); +} + +void SurfaceCompositor::Present(ICommandBuffer* command_buffer) { + auto command_buffer_impl = static_cast(command_buffer); + + command_buffer_impl->GetRenderCommandEncoder(render_pass_descriptor); + command_buffer_impl->EndEncoding(); + command_buffer_impl->GetCommandBuffer()->presentDrawable(drawable); +} + +} // namespace hydra::hw::tegra_x1::gpu::renderer::metal diff --git a/src/core/hw/tegra_x1/gpu/renderer/metal/surface_compositor.hpp b/src/core/hw/tegra_x1/gpu/renderer/metal/surface_compositor.hpp new file mode 100644 index 00000000..33894ae3 --- /dev/null +++ b/src/core/hw/tegra_x1/gpu/renderer/metal/surface_compositor.hpp @@ -0,0 +1,25 @@ +#pragma once + +#include "core/hw/tegra_x1/gpu/renderer/metal/const.hpp" +#include "core/hw/tegra_x1/gpu/renderer/surface_compositor.hpp" + +namespace hydra::hw::tegra_x1::gpu::renderer::metal { + +class CommandBuffer; + +class SurfaceCompositor final : public ISurfaceCompositor { + public: + SurfaceCompositor(CA::MetalDrawable* drawable_); + ~SurfaceCompositor() override; + + void DrawTexture(ICommandBuffer* command_buffer, const TextureBase* texture, + const FloatRect2D src_rect, const FloatRect2D dst_rect, + bool transparent, f32 opacity) override; + void Present(ICommandBuffer* command_buffer) override; + + private: + CA::MetalDrawable* drawable; + MTL::RenderPassDescriptor* render_pass_descriptor; +}; + +} // namespace hydra::hw::tegra_x1::gpu::renderer::metal diff --git a/src/core/hw/tegra_x1/gpu/renderer/metal/texture.cpp b/src/core/hw/tegra_x1/gpu/renderer/metal/texture.cpp index a781694a..4c9f79b5 100644 --- a/src/core/hw/tegra_x1/gpu/renderer/metal/texture.cpp +++ b/src/core/hw/tegra_x1/gpu/renderer/metal/texture.cpp @@ -1,6 +1,7 @@ #include "core/hw/tegra_x1/gpu/renderer/metal/texture.hpp" #include "core/hw/tegra_x1/gpu/renderer/metal/buffer.hpp" +#include "core/hw/tegra_x1/gpu/renderer/metal/command_buffer.hpp" #include "core/hw/tegra_x1/gpu/renderer/metal/maxwell_to_mtl.hpp" #include "core/hw/tegra_x1/gpu/renderer/metal/renderer.hpp" @@ -8,41 +9,57 @@ namespace hydra::hw::tegra_x1::gpu::renderer::metal { Texture::Texture(const TextureDescriptor& descriptor) : TextureBase(descriptor) { + const auto type = ToMtlTextureType(descriptor.type); + MTL::TextureDescriptor* desc = MTL::TextureDescriptor::alloc()->init(); - // TODO: type + desc->setTextureType(type); desc->setWidth(descriptor.width); desc->setHeight(descriptor.height); + switch (descriptor.type) { + case TextureType::_1DArray: + case TextureType::_2DArray: + desc->setArrayLength(descriptor.depth); + break; + case TextureType::CubeArray: + // TODO: correct? + ASSERT_DEBUG(descriptor.depth % 6 == 0, MetalRenderer, + "Invalid cube array depth {}", descriptor.depth); + desc->setArrayLength(descriptor.depth / 6); + break; + case TextureType::_3D: + desc->setDepth(descriptor.depth); + break; + default: + ASSERT_DEBUG(descriptor.depth == 1, MetalRenderer, + "Invalid depth {} for type {}", descriptor.depth, + descriptor.type); + break; + } + const auto& pixel_format_info = to_mtl_pixel_format_info(descriptor.format); pixel_format = pixel_format_info.pixel_format; desc->setPixelFormat(pixel_format); - auto base_texture = METAL_RENDERER_INSTANCE.GetDevice()->newTexture(desc); + base_texture = METAL_RENDERER_INSTANCE.GetDevice()->newTexture(desc); if (pixel_format_info.component_indices == uchar4{0, 1, 2, 3}) { - mtl_texture = base_texture; + texture = base_texture; } else { - // Swizzle - static constexpr MTL::TextureSwizzle swizzle_components[] = { - MTL::TextureSwizzleRed, MTL::TextureSwizzleGreen, - MTL::TextureSwizzleBlue, MTL::TextureSwizzleAlpha}; - MTL::TextureSwizzleChannels swizzle_channels( - swizzle_components[pixel_format_info.component_indices[0]], - swizzle_components[pixel_format_info.component_indices[1]], - swizzle_components[pixel_format_info.component_indices[2]], - swizzle_components[pixel_format_info.component_indices[3]]); - - mtl_texture = base_texture->newTextureView( - pixel_format_info.pixel_format, MTL::TextureType2D, NS::Range(0, 1), - NS::Range(0, 1), swizzle_channels); - base_texture->release(); + owns_base = true; + texture = CreateViewImpl(descriptor.format, SwizzleChannels()); } } Texture::Texture(const TextureDescriptor& descriptor, MTL::Texture* mtl_texture_) - : TextureBase(descriptor), mtl_texture{mtl_texture_} {} + : TextureBase(descriptor), owns_base{false}, + base_texture{mtl_texture_}, texture{mtl_texture_} {} -Texture::~Texture() { mtl_texture->release(); } +Texture::~Texture() { + if (owns_base) + base_texture->release(); + texture->release(); +} TextureBase* Texture::CreateView(const TextureViewDescriptor& descriptor) { const auto& pixel_format_info = to_mtl_pixel_format_info(descriptor.format); @@ -59,65 +76,131 @@ TextureBase* Texture::CreateView(const TextureViewDescriptor& descriptor) { swizzle_components[pixel_format_info.component_indices[2]], swizzle_components[pixel_format_info.component_indices[3]]); - // TODO: don't hardcode type, ranges and levels - auto mtl_view = mtl_texture->newTextureView( - to_mtl_pixel_format(descriptor.format), MTL::TextureType2D, - NS::Range(0, 1), NS::Range(0, 1), swizzle_channels); - auto desc = GetDescriptor(); desc.format = descriptor.format; desc.swizzle_channels = descriptor.swizzle_channels; - return new Texture(desc, mtl_view); + return new Texture( + desc, CreateViewImpl(descriptor.format, descriptor.swizzle_channels)); } void Texture::CopyFrom(const uptr data) { - mtl_texture->replaceRegion( - MTL::Region(0, 0, 0, descriptor.width, descriptor.height, 1), 0, - reinterpret_cast(data), descriptor.stride); + texture->replaceRegion(MTL::Region(0, 0, 0, descriptor.width, + descriptor.height, descriptor.depth), + 0, reinterpret_cast(data), descriptor.stride); } -void Texture::CopyFrom(const BufferBase* src, const usize src_stride, - const u32 dst_layer, const uint3 dst_origin, - const usize3 size) { +void Texture::CopyFrom(ICommandBuffer* command_buffer, const BufferBase* src, + const usize src_stride, uint3 dst_origin, usize3 size) { + const auto command_buffer_impl = + static_cast(command_buffer); const auto mtl_src = static_cast(src)->GetBuffer(); - auto encoder = METAL_RENDERER_INSTANCE.GetBlitCommandEncoder(); + auto encoder = command_buffer_impl->GetBlitCommandEncoder(); - // TODO: bytes per image - encoder->copyFromBuffer( - mtl_src, 0, src_stride, 0, MTL::Size(size.x(), size.y(), size.z()), - mtl_texture, dst_layer, 0, - MTL::Origin(dst_origin.x(), dst_origin.y(), dst_origin.z())); + u32 dst_layer = 0; + u32 layer_count = 1; + if (descriptor.type != TextureType::_3D) { + dst_layer = dst_origin.z(); + dst_origin.z() = 0; + layer_count = static_cast(size.z()); + size.z() = 1; + } + + const auto bytes_per_image = descriptor.depth * src_stride; + for (u32 i = 0; i < layer_count; i++) { + const auto crnt_dst_layer = dst_layer + i; + encoder->copyFromBuffer( + mtl_src, crnt_dst_layer * bytes_per_image, src_stride, + bytes_per_image, MTL::Size(size.x(), size.y(), size.z()), texture, + crnt_dst_layer, 0, + MTL::Origin(dst_origin.x(), dst_origin.y(), dst_origin.z())); + } } -void Texture::CopyFrom(const TextureBase* src, const u32 src_layer, - const uint3 src_origin, const u32 dst_layer, - const uint3 dst_origin, const usize3 size) { +void Texture::CopyFrom(ICommandBuffer* command_buffer, const TextureBase* src, + uint3 src_origin, uint3 dst_origin, usize3 size) { + const auto command_buffer_impl = + static_cast(command_buffer); const auto mtl_src = static_cast(src)->GetTexture(); - auto encoder = METAL_RENDERER_INSTANCE.GetBlitCommandEncoder(); + auto encoder = command_buffer_impl->GetBlitCommandEncoder(); + + u32 src_layer = 0; + u32 dst_layer = 0; + u32 layer_count = 1; + if (descriptor.type != TextureType::_3D) { + dst_layer = dst_origin.z(); + dst_origin.z() = 0; + } + + if (src->GetDescriptor().type != TextureType::_3D) { + src_layer = src_origin.z(); + src_origin.z() = 0; + } + + if (descriptor.type != TextureType::_3D || + src->GetDescriptor().type != TextureType::_3D) { + layer_count = static_cast(size.z()); + size.z() = 1; + } - // TODO: bytes per image - encoder->copyFromTexture( - mtl_src, src_layer, 0, - MTL::Origin(src_origin.x(), src_origin.y(), src_origin.z()), - MTL::Size(size.x(), size.y(), size.z()), mtl_texture, dst_layer, 0, - MTL::Origin(dst_origin.x(), dst_origin.y(), dst_origin.z())); + for (u32 i = 0; i < layer_count; i++) { + encoder->copyFromTexture( + mtl_src, src_layer + i, 0, + MTL::Origin(src_origin.x(), src_origin.y(), src_origin.z()), + MTL::Size(size.x(), size.y(), size.z()), texture, dst_layer + i, 0, + MTL::Origin(dst_origin.x(), dst_origin.y(), dst_origin.z())); + } } -void Texture::BlitFrom(const TextureBase* src, const u32 src_layer, +void Texture::BlitFrom(ICommandBuffer* command_buffer, const TextureBase* src, const float3 src_origin, const usize3 src_size, - const u32 dst_layer, const float3 dst_origin, - const usize3 dst_size) { - // TODO: src layer - ASSERT_DEBUG(src_layer == 0, MetalRenderer, - "Source layered blits (source layer: {}) not implemented", - src_layer); - + const float3 dst_origin, const usize3 dst_size) { + const auto command_buffer_impl = + static_cast(command_buffer); METAL_RENDERER_INSTANCE.BlitTexture( - static_cast(src)->GetTexture(), src_origin, src_size, - mtl_texture, dst_layer, dst_origin, dst_size); + command_buffer_impl, static_cast(src)->GetTexture(), + src_origin, src_size, texture, 0, dst_origin, dst_size); +} + +MTL::Texture* Texture::CreateViewImpl(TextureFormat format, + SwizzleChannels swizzle_channels) { + const auto& pixel_format_info = to_mtl_pixel_format_info(format); + + // Swizzle + MTL::TextureSwizzle swizzle_components[] = { + to_mtl_swizzle(swizzle_channels.r), to_mtl_swizzle(swizzle_channels.g), + to_mtl_swizzle(swizzle_channels.b), to_mtl_swizzle(swizzle_channels.a)}; + MTL::TextureSwizzleChannels swizzle_channels_mtl( + swizzle_components[pixel_format_info.component_indices[0]], + swizzle_components[pixel_format_info.component_indices[1]], + swizzle_components[pixel_format_info.component_indices[2]], + swizzle_components[pixel_format_info.component_indices[3]]); + + // TODO: ranges and levels + + u32 levels = 1; + switch (descriptor.type) { + case TextureType::_1DArray: + case TextureType::_2DArray: + case TextureType::CubeArray: + levels = descriptor.depth; + break; + case TextureType::Cube: + // TODO: assert that depth is 6 + levels = 6; + break; + case TextureType::_3D: + // TODO: assert that depth matches + break; + default: + break; + } + + return base_texture->newTextureView( + to_mtl_pixel_format(format), ToMtlTextureType(this->descriptor.type), + NS::Range(0, 1), NS::Range(0, levels), swizzle_channels_mtl); } } // namespace hydra::hw::tegra_x1::gpu::renderer::metal diff --git a/src/core/hw/tegra_x1/gpu/renderer/metal/texture.hpp b/src/core/hw/tegra_x1/gpu/renderer/metal/texture.hpp index e09a0310..9056fdf6 100644 --- a/src/core/hw/tegra_x1/gpu/renderer/metal/texture.hpp +++ b/src/core/hw/tegra_x1/gpu/renderer/metal/texture.hpp @@ -15,28 +15,31 @@ class Texture final : public TextureBase { // Copying void CopyFrom(const uptr data) override; - void CopyFrom(const BufferBase* src, const usize src_stride, - const u32 dst_layer, const uint3 dst_origin, + void CopyFrom(ICommandBuffer* command_buffer, const BufferBase* src, + const usize src_stride, const uint3 dst_origin, + const usize3 size) override; + void CopyFrom(ICommandBuffer* command_buffer, const TextureBase* src, + const uint3 src_origin, const uint3 dst_origin, const usize3 size) override; - void CopyFrom(const TextureBase* src, const u32 src_layer, - const uint3 src_origin, const u32 dst_layer, - const uint3 dst_origin, const usize3 size) override; // Blitting - void BlitFrom(const TextureBase* src, const u32 src_layer, + void BlitFrom(ICommandBuffer* command_buffer, const TextureBase* src, const float3 src_origin, const usize3 src_size, - const u32 dst_layer, const float3 dst_origin, - const usize3 dst_size) override; - - // Getters - MTL::Texture* GetTexture() const { return mtl_texture; } - - MTL::PixelFormat GetPixelFormat() const { return pixel_format; } + const float3 dst_origin, const usize3 dst_size) override; private: - MTL::Texture* mtl_texture; + bool owns_base{false}; + MTL::Texture* base_texture; + MTL::Texture* texture; MTL::PixelFormat pixel_format; + + MTL::Texture* CreateViewImpl(TextureFormat format, + SwizzleChannels swizzle_channels); + + public: + GETTER(texture, GetTexture); + GETTER(pixel_format, GetPixelFormat); }; } // namespace hydra::hw::tegra_x1::gpu::renderer::metal diff --git a/src/core/hw/tegra_x1/gpu/renderer/renderer_base.hpp b/src/core/hw/tegra_x1/gpu/renderer/renderer_base.hpp index b136b53c..d2f70e2e 100644 --- a/src/core/hw/tegra_x1/gpu/renderer/renderer_base.hpp +++ b/src/core/hw/tegra_x1/gpu/renderer/renderer_base.hpp @@ -1,7 +1,7 @@ #pragma once #include "core/hw/tegra_x1/gpu/renderer/buffer_cache.hpp" -#include "core/hw/tegra_x1/gpu/renderer/const.hpp" +#include "core/hw/tegra_x1/gpu/renderer/buffer_view.hpp" #include "core/hw/tegra_x1/gpu/renderer/index_cache.hpp" #include "core/hw/tegra_x1/gpu/renderer/pipeline_cache.hpp" #include "core/hw/tegra_x1/gpu/renderer/render_pass_cache.hpp" @@ -11,7 +11,8 @@ namespace hydra::hw::tegra_x1::gpu::renderer { -class BufferBase; +class ICommandBuffer; +class ISurfaceCompositor; class TextureBase; class SamplerBase; class RenderPassBase; @@ -34,38 +35,20 @@ class RendererBase { public: virtual ~RendererBase() {} - // Mutex - void LockMutex() { mutex.lock(); } - void UnlockMutex() { mutex.unlock(); } + // TODO: make this thread safe + void InvalidateMemory(Range range) { + buffer_cache.InvalidateMemory(range); + texture_cache.InvalidateMemory(range); + // TODO: shader cache + } // Surface virtual void SetSurface(void* surface) = 0; - virtual bool AcquireNextSurface() = 0; - virtual void BeginSurfaceRenderPass() = 0; - // Uses lower left origin - virtual void DrawTextureToSurface(const TextureBase* texture, - const FloatRect2D src_rect, - const FloatRect2D dst_rect, - bool transparent, f32 opacity = 1.0f) = 0; - virtual void EndSurfaceRenderPass() = 0; - void PresentSurface() { - PresentSurfaceImpl(); - - if (is_capturing) { - EndCapture(); - is_capturing = false; - } - - if (begin_capture) { - BeginCapture(); - is_capturing = true; - begin_capture = false; - } - } + virtual ISurfaceCompositor* AcquireNextSurface() = 0; // Buffer - virtual BufferBase* CreateBuffer(const BufferDescriptor& descriptor) = 0; - virtual BufferBase* AllocateTemporaryBuffer(const u32 size) = 0; + virtual BufferBase* CreateBuffer(u64 size) = 0; + virtual BufferBase* AllocateTemporaryBuffer(const u64 size) = 0; virtual void FreeTemporaryBuffer(BufferBase* buffer) = 0; // Texture @@ -75,7 +58,7 @@ class RendererBase { virtual SamplerBase* CreateSampler(const SamplerDescriptor& descriptor) = 0; // Command buffer - virtual void EndCommandBuffer() = 0; + virtual ICommandBuffer* CreateCommandBuffer() = 0; // Render pass virtual RenderPassBase* @@ -83,10 +66,13 @@ class RendererBase { virtual void BindRenderPass(const RenderPassBase* render_pass) = 0; // Clear - virtual void ClearColor(u32 render_target_id, u32 layer, u8 mask, + virtual void ClearColor(ICommandBuffer* command_buffer, + u32 render_target_id, u32 layer, u8 mask, const uint4 color) = 0; - virtual void ClearDepth(u32 layer, const float value) = 0; - virtual void ClearStencil(u32 layer, const u32 value) = 0; + virtual void ClearDepth(ICommandBuffer* command_buffer, u32 layer, + const float value) = 0; + virtual void ClearStencil(ICommandBuffer* command_buffer, u32 layer, + const u32 value) = 0; // Viewport and scissor virtual void SetViewport(u32 index, const Viewport& viewport) = 0; @@ -101,44 +87,55 @@ class RendererBase { virtual void BindPipeline(const PipelineBase* pipeline) = 0; // Resource binding - virtual void BindVertexBuffer(BufferBase* buffer, u32 index) = 0; - virtual void BindIndexBuffer(BufferBase* index_buffer, + virtual void BindVertexBuffer(const BufferView& buffer, u32 index) = 0; + virtual void BindIndexBuffer(const BufferView& index_buffer, engines::IndexType index_type) = 0; - virtual void BindUniformBuffer(BufferBase* buffer, ShaderType shader_type, - u32 index) = 0; + virtual void BindUniformBuffer(const BufferView& buffer, + ShaderType shader_type, u32 index) = 0; // TODO: storage buffers virtual void BindTexture(TextureBase* texture, SamplerBase* sampler, ShaderType shader_type, u32 index) = 0; // TODO: images // Resource unbinding + virtual void UnbindUniformBuffers(ShaderType shader_type) = 0; virtual void UnbindTextures(ShaderType shader_type) = 0; // Draw - virtual void Draw(const engines::PrimitiveType primitive_type, + virtual void Draw(ICommandBuffer* command_buffer, + const engines::PrimitiveType primitive_type, const u32 start, const u32 count, const u32 base_instance, const u32 instance_count) = 0; - virtual void DrawIndexed(const engines::PrimitiveType primitive_type, + virtual void DrawIndexed(ICommandBuffer* command_buffer, + const engines::PrimitiveType primitive_type, const u32 start, const u32 count, const u32 base_vertex, const u32 base_instance, const u32 instance_count) = 0; - // Capture - void CaptureFrame() { begin_capture = true; } + // Debug + void CaptureFrames(u32 count) { frames_to_capture = count; } + void NotifyDebugFrameBoundary() { + if (frames_to_capture > 0) { + if (capturing) { + if (--frames_to_capture == 0) { + EndCapture(); + capturing = false; + } + } else { + BeginCapture(); + capturing = true; + } + } + } protected: Info info{}; - // Surface - virtual void PresentSurfaceImpl() = 0; - // Capture virtual void BeginCapture() = 0; virtual void EndCapture() = 0; private: - std::mutex mutex; - // Caches BufferCache buffer_cache; TextureCache texture_cache; @@ -149,8 +146,8 @@ class RendererBase { IndexCache index_cache; // Capture - bool begin_capture{false}; - bool is_capturing{false}; + u32 frames_to_capture{0}; + bool capturing{false}; public: CONST_REF_GETTER(info, GetInfo); diff --git a/src/core/hw/tegra_x1/gpu/renderer/shader_decompiler/analyzer/memory_analyzer.cpp b/src/core/hw/tegra_x1/gpu/renderer/shader_decompiler/analyzer/memory_analyzer.cpp index 4c0ad07d..19b61962 100644 --- a/src/core/hw/tegra_x1/gpu/renderer/shader_decompiler/analyzer/memory_analyzer.cpp +++ b/src/core/hw/tegra_x1/gpu/renderer/shader_decompiler/analyzer/memory_analyzer.cpp @@ -4,13 +4,13 @@ namespace hydra::hw::tegra_x1::gpu::renderer::shader_decomp::analyzer { namespace { -void push_sv(std::vector& svs, std::vector& stage_in_outs, - u64 addr) { +void push_sv(std::unordered_set& svs, + std::unordered_set& stage_in_outs, u64 addr) { const auto sv = get_sv_access_from_addr(addr).sv; if (sv.semantic == SvSemantic::UserInOut) - push_unique(stage_in_outs, sv.index); + stage_in_outs.insert(sv.index); else - push_unique(svs, sv.semantic); + svs.insert(sv.semantic); } } // namespace @@ -85,12 +85,7 @@ void MemoryAnalyzer::HandleAMemLoad(const AMem amem) { } void MemoryAnalyzer::HandleCMemLoad(const CMem cmem) { - if (cmem.reg != RZ) - LOG_WARN(ShaderDecompiler, "Indexing not implemented (src: {})", - cmem.reg); - - auto& size = uniform_buffers[cmem.idx]; - size = std::max(size, static_cast(cmem.imm) + sizeof(u32)); + const_buffers.insert(cmem.idx); } void MemoryAnalyzer::HandleAMemStore(const AMem amem) { diff --git a/src/core/hw/tegra_x1/gpu/renderer/shader_decompiler/analyzer/memory_analyzer.hpp b/src/core/hw/tegra_x1/gpu/renderer/shader_decompiler/analyzer/memory_analyzer.hpp index 7be783c1..0f64b602 100644 --- a/src/core/hw/tegra_x1/gpu/renderer/shader_decompiler/analyzer/memory_analyzer.hpp +++ b/src/core/hw/tegra_x1/gpu/renderer/shader_decompiler/analyzer/memory_analyzer.hpp @@ -14,11 +14,11 @@ class MemoryAnalyzer { void Analyze(const ir::Module& modul); private: - std::vector input_svs; - std::vector output_svs; - std::vector stage_inputs; - std::vector stage_outputs; - std::map uniform_buffers; + std::unordered_set input_svs; + std::unordered_set output_svs; + std::unordered_set stage_inputs; + std::unordered_set stage_outputs; + std::unordered_set const_buffers; std::map textures; // Helpers @@ -32,7 +32,7 @@ class MemoryAnalyzer { CONST_REF_GETTER(output_svs, GetOutputSVs); CONST_REF_GETTER(stage_inputs, GetStageInputs); CONST_REF_GETTER(stage_outputs, GetStageOutputs); - CONST_REF_GETTER(uniform_buffers, GetUniformBuffers); + CONST_REF_GETTER(const_buffers, GetConstBuffers); CONST_REF_GETTER(textures, GetTextures); }; diff --git a/src/core/hw/tegra_x1/gpu/renderer/shader_decompiler/codegen/lang/emitter.cpp b/src/core/hw/tegra_x1/gpu/renderer/shader_decompiler/codegen/lang/emitter.cpp index 34f54276..9cba72e4 100644 --- a/src/core/hw/tegra_x1/gpu/renderer/shader_decompiler/codegen/lang/emitter.cpp +++ b/src/core/hw/tegra_x1/gpu/renderer/shader_decompiler/codegen/lang/emitter.cpp @@ -33,8 +33,9 @@ void LangEmitter::Start() { EnterScope("struct State"); Write("Reg r[256];"); Write("bool p[8];"); // TODO: is the size correct? - for (const auto& [index, size] : memory_analyzer.GetUniformBuffers()) - Write("Reg c{}[{}];", index, size); + // TODO: move this to the backend + for (auto index : memory_analyzer.GetConstBuffers()) + Write("constant Reg* c{};", index); Write("Reg a_in[0x200];"); // TODO: what should the size be? Write("Reg a_out[0x200];"); // TODO: what should the size be? EmitStateBindings(); @@ -135,15 +136,8 @@ void LangEmitter::EmitMainFunctionPrologue() { WriteNewline(); // Constant memory - - // Uniform buffers - for (const auto& [index, size] : memory_analyzer.GetUniformBuffers()) { - const auto u32_count = static_cast(size / sizeof(u32)); - for (u32 i = 0; i < u32_count; i++) - WriteStatement("{} = ubuff{}.data[{}]", - GetConstMemoryStr({index, RZ, i * sizeof(u32)}), - index, i); - } + for (auto index : memory_analyzer.GetConstBuffers()) + WriteStatement("state.c{} = c{}", index, index); WriteNewline(); EmitStateBindingAssignments(); diff --git a/src/core/hw/tegra_x1/gpu/renderer/shader_decompiler/codegen/lang/msl/emitter.cpp b/src/core/hw/tegra_x1/gpu/renderer/shader_decompiler/codegen/lang/msl/emitter.cpp index 38491eae..9225883f 100644 --- a/src/core/hw/tegra_x1/gpu/renderer/shader_decompiler/codegen/lang/msl/emitter.cpp +++ b/src/core/hw/tegra_x1/gpu/renderer/shader_decompiler/codegen/lang/msl/emitter.cpp @@ -9,6 +9,20 @@ namespace hydra::hw::tegra_x1::gpu::renderer::shader_decomp::codegen::lang:: namespace { +std::string PixelImapTypeToStr(PixelImapType type) { + switch (type) { + case PixelImapType::Constant: + return "flat"; + case PixelImapType::Perspective: + return ""; + case PixelImapType::ScreenLinear: + return "center_no_perspective"; + default: + LOG_ERROR(ShaderDecompiler, "Pixel imap unused"); + return ""; + } +} + std::string TextureTypeToStr(TextureType type, bool is_depth) { // TODO: check if depth can be used with the type std::string prefix = is_depth ? "depth" : "texture"; @@ -69,7 +83,7 @@ MslEmitter::MslEmitter(const DecompilerContext& context, ResourceMapping& out_resource_mapping) : LangEmitter(context, memory_analyzer, state, out_code, out_resource_mapping) { - for (const auto& [index, size] : memory_analyzer.GetUniformBuffers()) { + for (auto index : memory_analyzer.GetConstBuffers()) { out_resource_mapping.uniform_buffers[index] = index; } @@ -120,7 +134,7 @@ void MslEmitter::EmitDeclarations() { continue; const auto sv = Sv(SvSemantic::UserInOut, i); - Write("vec<{}, 4> {} {};", ToType(vertex_attrib_state.type), + Write("vec<{}, 4> {} [[{}]];", ToType(vertex_attrib_state.type), GetSvStr(sv), GetSvQualifierStr(sv, false)); } break; @@ -128,8 +142,12 @@ void MslEmitter::EmitDeclarations() { Write("float4 position [[position]];"); for (const auto input : memory_analyzer.GetStageInputs()) { const auto sv = Sv(SvSemantic::UserInOut, input); + std::string attribute = PixelImapTypeToStr( + context.frag.pixel_imaps[input].GetFirstUsedType()); // TODO: don't hardcode the type - Write("float4 {} {};", GetSvStr(sv), GetSvQualifierStr(sv, false)); + Write("float4 {} [[{}{}{}]];", GetSvStr(sv), + GetSvQualifierStr(sv, false), attribute.empty() ? "" : ", ", + attribute); } break; default: @@ -146,7 +164,7 @@ void MslEmitter::EmitDeclarations() { // SVs // HACK: always write position in vertex shaders if (context.type == ShaderType::Vertex) - Write("float4 position [[position]];"); + Write("float4 position [[position, invariant]];"); for (const auto sv_semantic : memory_analyzer.GetOutputSVs()) { switch (sv_semantic) { case SvSemantic::Position: @@ -165,7 +183,8 @@ void MslEmitter::EmitDeclarations() { for (const auto output : memory_analyzer.GetStageOutputs()) { const auto sv = Sv(SvSemantic::UserInOut, output); // TODO: don't hardcode the type - Write("float4 {} {};", GetSvStr(sv), GetSvQualifierStr(sv, true)); + Write("float4 {} [[{}]];", GetSvStr(sv), + GetSvQualifierStr(sv, true)); } break; case ShaderType::Fragment: @@ -176,7 +195,7 @@ void MslEmitter::EmitDeclarations() { continue; const auto sv = Sv(SvSemantic::UserInOut, i); - Write("vec<{}, 4> {} {};", ToType(color_target_data_type), + Write("vec<{}, 4> {} [[{}]];", ToType(color_target_data_type), GetSvStr(sv), GetSvQualifierStr(sv, true)); } break; @@ -186,17 +205,7 @@ void MslEmitter::EmitDeclarations() { ExitScopeEmpty(true); WriteNewline(); - - // Uniform buffers - for (const auto& [index, size] : memory_analyzer.GetUniformBuffers()) { - EnterScope("struct UBuff{}", index); - - // Data - Write("uint data[0x{:x}];", size / sizeof(u32)); - - ExitScopeEmpty(true); - } - WriteNewline(); + ; } void MslEmitter::EmitStateBindings() { @@ -256,8 +265,8 @@ void MslEmitter::EmitMainPrototype() { } // Uniform buffers - for (const auto& [index, size] : memory_analyzer.GetUniformBuffers()) { - ADD_ARG("constant UBuff{}& ubuff{} [[buffer({})]]", index, index, + for (auto index : memory_analyzer.GetConstBuffers()) { + ADD_ARG("constant Reg* c{} [[buffer({})]]", index, index, out_resource_mapping.uniform_buffers[index]); } @@ -454,26 +463,26 @@ std::string MslEmitter::GetSvStr(const Sv& sv) { std::string MslEmitter::GetSvQualifierStr(const Sv& sv, bool output) { switch (sv.semantic) { case SvSemantic::Position: - return "[[position]]"; + return "position"; case SvSemantic::UserInOut: switch (context.type) { case ShaderType::Vertex: if (output) - return fmt::format("[[user(locn{})]]", sv.index); + return fmt::format("user(locn{})", sv.index); else - return fmt::format("[[attribute({})]]", sv.index); + return fmt::format("attribute({})", sv.index); case ShaderType::Fragment: if (output) - return fmt::format("[[color({})]]", sv.index); + return fmt::format("color({})", sv.index); else - return fmt::format("[[user(locn{})]]", sv.index); + return fmt::format("user(locn{})", sv.index); default: return INVALID_VALUE; } case SvSemantic::InstanceID: - return "[[instance_id]]"; + return "instance_id"; case SvSemantic::VertexID: - return "[[vertex_id]]"; + return "vertex_id"; default: LOG_ERROR(ShaderDecompiler, "Unknown SV semantic {}", sv.semantic); return INVALID_VALUE; diff --git a/src/core/hw/tegra_x1/gpu/renderer/shader_decompiler/decoder/const.hpp b/src/core/hw/tegra_x1/gpu/renderer/shader_decompiler/decoder/const.hpp index 72cbfaaa..a3ea6c70 100644 --- a/src/core/hw/tegra_x1/gpu/renderer/shader_decompiler/decoder/const.hpp +++ b/src/core/hw/tegra_x1/gpu/renderer/shader_decompiler/decoder/const.hpp @@ -129,4 +129,23 @@ inline bool HandlePredCond(ir::Builder& builder, pred_t pred, bool pred_inv) { } } +enum class EnsureIntegerSignednessError { + NotAnInteger, +}; + +template +ir::Value EnsureIntegerSignedness(ir::Builder& builder, + const ir::Value& value) { + const auto type = value.GetType(); + if (signed_) { + if (type.IsUnsignedInteger()) + return builder.OpCast(value, type.SignedEquivalent()); + } else { + if (type.IsSignedInteger()) + return builder.OpCast(value, type.UnsignedEquivalent()); + } + + return value; +} + } // namespace hydra::hw::tegra_x1::gpu::renderer::shader_decomp::decoder diff --git a/src/core/hw/tegra_x1/gpu/renderer/shader_decompiler/decoder/decoder.cpp b/src/core/hw/tegra_x1/gpu/renderer/shader_decompiler/decoder/decoder.cpp index d2f97594..02a7b3ba 100644 --- a/src/core/hw/tegra_x1/gpu/renderer/shader_decompiler/decoder/decoder.cpp +++ b/src/core/hw/tegra_x1/gpu/renderer/shader_decompiler/decoder/decoder.cpp @@ -496,9 +496,7 @@ void Decoder::ParseNextInstruction() { INST(0x5b40000000000000, 0xfff0000000000000) { COMMENT_NOT_IMPLEMENTED("icmp"); } - INST(0x5b00000000000000, 0xffc0000000000000) { - COMMENT_NOT_IMPLEMENTED("xmad"); - } + INST(0x5b00000000000000, 0xffc0000000000000) { EMIT(XmadR); } INST(0x5a80000000000000, 0xff80000000000000) { COMMENT_NOT_IMPLEMENTED("imadsp"); } @@ -547,9 +545,7 @@ void Decoder::ParseNextInstruction() { COMMENT_NOT_IMPLEMENTED("imad"); } INST(0x5180000000000000, 0xff80000000000000) { EMIT(FfmaRC); } - INST(0x5100000000000000, 0xff80000000000000) { - COMMENT_NOT_IMPLEMENTED("xmad"); - } + INST(0x5100000000000000, 0xff80000000000000) { EMIT(XmadRC); } INST(0x50f8000000000000, 0xfff8000000000000) { COMMENT_NOT_IMPLEMENTED("fswzadd"); } @@ -581,9 +577,7 @@ void Decoder::ParseNextInstruction() { INST(0x5000000000000000, 0xff80000000000000) { COMMENT_NOT_IMPLEMENTED("vabsdiff4"); } - INST(0x4e00000000000000, 0xfe00000000000000) { - COMMENT_NOT_IMPLEMENTED("xmad"); - } + INST(0x4e00000000000000, 0xfe00000000000000) { EMIT(XmadC); } INST(0x4cf0000000000000, 0xfff8000000000000) { COMMENT_NOT_IMPLEMENTED("r2p"); } @@ -756,9 +750,7 @@ void Decoder::ParseNextInstruction() { INST(0x3640000000000000, 0xfef0000000000000) { COMMENT_NOT_IMPLEMENTED("icmp"); } - INST(0x3600000000000000, 0xfec0000000000000) { - COMMENT_NOT_IMPLEMENTED("xmad"); - } + INST(0x3600000000000000, 0xfec0000000000000) { EMIT(XmadI); } INST(0x3480000000000000, 0xfe80000000000000) { COMMENT_NOT_IMPLEMENTED("imadsp"); } diff --git a/src/core/hw/tegra_x1/gpu/renderer/shader_decompiler/decoder/integer_arithmetic.cpp b/src/core/hw/tegra_x1/gpu/renderer/shader_decompiler/decoder/integer_arithmetic.cpp index ed223e94..c6f49f07 100644 --- a/src/core/hw/tegra_x1/gpu/renderer/shader_decompiler/decoder/integer_arithmetic.cpp +++ b/src/core/hw/tegra_x1/gpu/renderer/shader_decompiler/decoder/integer_arithmetic.cpp @@ -44,6 +44,83 @@ void EmitIscadd(DecoderContext& context, pred_t pred, bool pred_inv, context.builder.OpEndIf(); } +// TODO: check this +ir::Value Extend16To32(DecoderContext& context, ir::Value value, bool high) { + if (high) + return context.builder.OpShiftRight(value, ir::Value::ConstantU(16)); + else + return context.builder.OpBitfieldExtract(value, ir::Value::ConstantU(0), + ir::Value::ConstantU(16)); +} + +// TODO: x, write_cc +// TODO: verify +void EmitXmad(DecoderContext& context, pred_t pred, bool pred_inv, XmadCop mode, + bool product_shift_left, bool merge, reg_t dst, reg_t src_a, + bool a_signed, bool high_a, ir::Value src_b, bool high_b, + ir::Value src_c) { + const auto conditional = HandlePredCond(context.builder, pred, pred_inv); + + auto src_a_v = ir::Value::Register(src_a, a_signed ? ir::ScalarType::I32 + : ir::ScalarType::U32); + src_a_v = Extend16To32(context, src_a_v, high_a); + auto src_b_v = Extend16To32(context, src_b, high_b); + + auto res = context.builder.OpMultiply( + EnsureIntegerSignedness(context.builder, src_a_v), + EnsureIntegerSignedness(context.builder, src_b_v)); + if (product_shift_left) + res = context.builder.OpShiftLeft(res, ir::Value::ConstantU(16)); + + auto src_c_v = src_c; + switch (mode) { + case XmadCop::Cfull: + break; + case XmadCop::Clo: + src_c_v = Extend16To32(context, src_c, false); + break; + case XmadCop::Chi: + src_c_v = Extend16To32(context, src_c, true); + break; + case XmadCop::Cbcc: + src_c_v = context.builder.OpAdd( + src_c, context.builder.OpShiftLeft( + EnsureIntegerSignedness(context.builder, src_b), + ir::Value::ConstantU(16))); + break; + case XmadCop::Csfu: { + const auto src_a_adjusted = context.builder.OpShiftLeft( + context.builder.OpShiftRight( + EnsureIntegerSignedness(context.builder, src_a_v), + ir::Value::ConstantU(31)), + ir::Value::ConstantU(16)); + const auto src_b_adjusted = context.builder.OpShiftLeft( + context.builder.OpShiftRight( + EnsureIntegerSignedness(context.builder, src_b_v), + ir::Value::ConstantU(31)), + ir::Value::ConstantU(16)); + src_c_v = context.builder.OpAdd( + src_c, context.builder.OpNeg( + context.builder.OpAdd(src_a_adjusted, src_b_adjusted))); + break; + } + } + + // TODO: extend + res = context.builder.OpAdd(res, src_c_v); + + if (merge) { + res = context.builder.OpBitwiseAnd(res, ir::Value::ConstantU(0xffff)); + res = context.builder.OpBitwiseOr( + res, context.builder.OpShiftLeft(src_b, ir::Value::ConstantU(16))); + } + + context.builder.OpCopy(ir::Value::Register(dst, ir::ScalarType::U32), res); + + if (conditional) + context.builder.OpEndIf(); +} + } // namespace void EmitIaddR(DecoderContext& context, InstIaddR inst) { @@ -100,4 +177,46 @@ void EmitIscadd32I(DecoderContext& context, InstIscadd32I inst) { ir::Value::Constant(inst.imm, ir::ScalarType::I32)); } +void EmitXmadR(DecoderContext& context, InstXmadR inst) { + EmitXmad(context, inst.base.pred, inst.base.pred_inv, inst.cop, inst.psl, + inst.mrg, inst.base.dst, inst.base.src_a, inst.base.a_signed, + inst.base.hilo_a, + ir::Value::Register(inst.src_b, inst.base.b_signed + ? ir::ScalarType::I32 + : ir::ScalarType::U32), + inst.hilo_b, ir::Value::Register(inst.src_c, ir::ScalarType::U32)); +} + +void EmitXmadRC(DecoderContext& context, InstXmadRC inst) { + EmitXmad( + context, inst.base.pred, inst.base.pred_inv, inst.cop, false, false, + inst.base.dst, inst.base.src_a, inst.base.a_signed, inst.base.hilo_a, + ir::Value::Register(inst.src_b, inst.base.b_signed + ? ir::ScalarType::I32 + : ir::ScalarType::U32), + inst.hilo_b, + ir::Value::ConstMemory(CMem(inst.cbuf_slot, RZ, inst.cbuf_offset * 4), + ir::ScalarType::U32)); +} + +void EmitXmadC(DecoderContext& context, InstXmadC inst) { + EmitXmad( + context, inst.base.pred, inst.base.pred_inv, inst.cop, false, false, + inst.base.dst, inst.base.src_a, inst.base.a_signed, inst.base.hilo_a, + ir::Value::ConstMemory(CMem(inst.cbuf_slot, RZ, inst.cbuf_offset * 4), + inst.base.b_signed ? ir::ScalarType::I32 + : ir::ScalarType::U32), + inst.hilo_b, ir::Value::Register(inst.src_c, ir::ScalarType::U32)); +} + +void EmitXmadI(DecoderContext& context, InstXmadI inst) { + EmitXmad(context, inst.base.pred, inst.base.pred_inv, inst.cop, inst.psl, + inst.mrg, inst.base.dst, inst.base.src_a, inst.base.a_signed, + inst.base.hilo_a, + ir::Value::Constant(inst.imm16, inst.base.b_signed + ? ir::ScalarType::I32 + : ir::ScalarType::U32), + false, ir::Value::Register(inst.src_c, ir::ScalarType::U32)); +} + } // namespace hydra::hw::tegra_x1::gpu::renderer::shader_decomp::decoder diff --git a/src/core/hw/tegra_x1/gpu/renderer/shader_decompiler/decoder/integer_arithmetic.hpp b/src/core/hw/tegra_x1/gpu/renderer/shader_decompiler/decoder/integer_arithmetic.hpp index a1d4e6c4..d0066eb9 100644 --- a/src/core/hw/tegra_x1/gpu/renderer/shader_decompiler/decoder/integer_arithmetic.hpp +++ b/src/core/hw/tegra_x1/gpu/renderer/shader_decompiler/decoder/integer_arithmetic.hpp @@ -43,7 +43,7 @@ union InstIscaddBase { BitField64 src_a; BitField64 pred; BitField64 pred_inv; - BitField64 shift; + BitField64 shift; BitField64 write_cc; BitField64 avg_mode; }; @@ -62,4 +62,69 @@ union InstIscadd32I { void EmitIscadd32I(DecoderContext& context, InstIscadd32I inst); +enum class XmadCop { + Cfull = 0, + Clo = 1, + Chi = 2, + Csfu = 3, + Cbcc = 4, +}; + +union InstXmadBase { + BitField64 dst; + BitField64 src_a; + BitField64 pred; + BitField64 pred_inv; + BitField64 x; + BitField64 write_cc; + BitField64 a_signed; + BitField64 b_signed; + BitField64 hilo_a; +}; + +union InstXmadR { + InstXmadBase base; + BitField64 src_b; + BitField64 hilo_b; + BitField64 psl; + BitField64 mrg; + BitField64 src_c; + BitField64 cop; +}; + +void EmitXmadR(DecoderContext& context, InstXmadR inst); + +union InstXmadRC { + InstXmadBase base; + BitField64 cbuf_offset; + BitField64 cbuf_slot; + BitField64 src_b; + BitField64 cop; + BitField64 hilo_b; +}; + +void EmitXmadRC(DecoderContext& context, InstXmadRC inst); + +union InstXmadC { + InstXmadBase base; + BitField64 cbuf_offset; + BitField64 cbuf_slot; + BitField64 src_c; + BitField64 cop; + BitField64 hilo_b; +}; + +void EmitXmadC(DecoderContext& context, InstXmadC inst); + +union InstXmadI { + InstXmadBase base; + BitField64 imm16; + BitField64 psl; + BitField64 mrg; + BitField64 src_c; + BitField64 cop; +}; + +void EmitXmadI(DecoderContext& context, InstXmadI inst); + } // namespace hydra::hw::tegra_x1::gpu::renderer::shader_decomp::decoder diff --git a/src/core/hw/tegra_x1/gpu/renderer/shader_decompiler/decoder/memory.cpp b/src/core/hw/tegra_x1/gpu/renderer/shader_decompiler/decoder/memory.cpp index 514a5513..6ccb7547 100644 --- a/src/core/hw/tegra_x1/gpu/renderer/shader_decompiler/decoder/memory.cpp +++ b/src/core/hw/tegra_x1/gpu/renderer/shader_decompiler/decoder/memory.cpp @@ -28,11 +28,16 @@ void EmitLoadAttribute(DecoderContext& context, pred_t pred, bool pred_inv, unreachable(); } + std::vector values; + values.reserve(count); for (u32 i = 0; i < count; i++) { - context.builder.OpCopy( - ir::Value::Register(dst + static_cast(i)), - ir::Value::AttrMemory( - AMem(src, src_offset + i * sizeof(u32), is_input))); + values.push_back(context.builder.OpCopy(ir::Value::AttrMemory( + AMem(src, src_offset + i * sizeof(u32), is_input)))); + } + + for (u32 i = 0; i < count; i++) { + context.builder.OpCopy(ir::Value::Register(dst + static_cast(i)), + values[i]); } if (conditional) @@ -105,11 +110,16 @@ void EmitLoadConstant(DecoderContext& context, pred_t pred, bool pred_inv, size == LsSize2::S16) LOG_NOT_IMPLEMENTED(ShaderDecompiler, "Small integer loading"); + std::vector values; + values.reserve(count); for (u32 i = 0; i < count; i++) { - context.builder.OpCopy( - ir::Value::Register(dst + static_cast(i)), - ir::Value::ConstMemory( - CMem(cbuf_slot, src, cbuf_offset + i * sizeof(u32)))); + values.push_back(context.builder.OpCopy(ir::Value::ConstMemory( + CMem(cbuf_slot, src, cbuf_offset + i * sizeof(u32))))); + } + + for (u32 i = 0; i < count; i++) { + context.builder.OpCopy(ir::Value::Register(dst + static_cast(i)), + values[i]); } if (conditional) @@ -149,10 +159,16 @@ void EmitLoadGlobal(DecoderContext& context, pred_t pred, bool pred_inv, LOG_FUNC_WITH_ARGS_NOT_IMPLEMENTED( ShaderDecompiler, "size: {}, src: {}, offset: {}", size, src, offset); + std::vector values; + values.reserve(count); for (u32 i = 0; i < count; i++) { // TODO: global memory + values.push_back(ir::Value::ConstantU(0)); + } + + for (u32 i = 0; i < count; i++) { context.builder.OpCopy(ir::Value::Register(dst + static_cast(i)), - ir::Value::ConstantU(0)); + values[i]); } if (conditional) diff --git a/src/core/hw/tegra_x1/gpu/renderer/shader_decompiler/decoder/texture.cpp b/src/core/hw/tegra_x1/gpu/renderer/shader_decompiler/decoder/texture.cpp index aed16a47..f39e743f 100644 --- a/src/core/hw/tegra_x1/gpu/renderer/shader_decompiler/decoder/texture.cpp +++ b/src/core/hw/tegra_x1/gpu/renderer/shader_decompiler/decoder/texture.cpp @@ -46,49 +46,30 @@ u8 ToTexComponent(TextureComponent component) { } } -enum class ComponentSwizzle { - None, - Zero, - X, - Y, - Z, - W, -}; - void CopyTextureResult(ir::Builder& builder, std::array dsts, - ir::Value res, - std::array swizzles) { - for (u32 i = 0; i < 4; i++) { - const auto swizzle = swizzles[i]; - if (swizzle == ComponentSwizzle::None) + ir::Value res, u8 component_mask) { + u32 output_index = 0; + for (u8 i = 0; i < 4; i++) { + if ((component_mask & (1 << i)) == 0x0) continue; - ir::Value value = ir::Value::Undefined(); - switch (swizzle) { - case ComponentSwizzle::Zero: - value = ir::Value::ConstantF(0.0f); - break; - case ComponentSwizzle::X: - value = builder.OpVectorExtract(res, 0); - break; - case ComponentSwizzle::Y: - value = builder.OpVectorExtract(res, 1); - break; - case ComponentSwizzle::Z: - value = builder.OpVectorExtract(res, 2); - break; - case ComponentSwizzle::W: - value = builder.OpVectorExtract(res, 3); - break; - default: - unreachable(); - } - - builder.OpCopy(ir::Value::Register(dsts[i], ir::ScalarType::F32), - value); + builder.OpCopy( + ir::Value::Register(dsts[output_index++], ir::ScalarType::F32), + builder.OpVectorExtract(res, i)); } } +void CopyTextureResult(ir::Builder& builder, reg_t dst, ir::Value res, + u8 component_mask) { + CopyTextureResult(builder, {dst + 0, dst + 1, dst + 2, dst + 3}, res, + component_mask); +} + +constexpr std::array, 2> COMPONENT_MASK_LUT = {{ + {0b0001, 0b0010, 0b0100, 0b1000, 0b0011, 0b1001, 0b1010, 0b1100}, + {0b0111, 0b1011, 0b1101, 0b1110, 0b1111, 0b0000, 0b0000, 0b0000}, +}}; + // TODO: nodep void EmitTextureSample(DecoderContext& context, pred_t pred, bool pred_inv, bool int_coords, TextureSampleTarget target, reg_t dst0, @@ -213,74 +194,12 @@ void EmitTextureSample(DecoderContext& context, pred_t pred, bool pred_inv, ir::Value res = context.builder.OpTextureSample( cbuf_index, type, flags, array_index, coords_v, cmp_value, lod); - std::array swizzles; - if (dst1 == RZ) { - switch (write_mask) { - case 0x0: - swizzles = {ComponentSwizzle::X, ComponentSwizzle::None, - ComponentSwizzle::None, ComponentSwizzle::None}; - break; - case 0x1: - swizzles = {ComponentSwizzle::Y, ComponentSwizzle::None, - ComponentSwizzle::None, ComponentSwizzle::None}; - break; - case 0x2: - swizzles = {ComponentSwizzle::Z, ComponentSwizzle::None, - ComponentSwizzle::None, ComponentSwizzle::None}; - break; - case 0x3: - swizzles = {ComponentSwizzle::W, ComponentSwizzle::None, - ComponentSwizzle::None, ComponentSwizzle::None}; - break; - case 0x4: - swizzles = {ComponentSwizzle::X, ComponentSwizzle::Y, - ComponentSwizzle::None, ComponentSwizzle::None}; - break; - case 0x5: - swizzles = {ComponentSwizzle::X, ComponentSwizzle::W, - ComponentSwizzle::None, ComponentSwizzle::None}; - break; - case 0x6: - swizzles = {ComponentSwizzle::Y, ComponentSwizzle::W, - ComponentSwizzle::None, ComponentSwizzle::None}; - break; - case 0x7: - swizzles = {ComponentSwizzle::Z, ComponentSwizzle::W, - ComponentSwizzle::None, ComponentSwizzle::None}; - break; - default: - unreachable(); - } - } else { - switch (write_mask) { - case 0x0: - swizzles = {ComponentSwizzle::X, ComponentSwizzle::Y, - ComponentSwizzle::Z, ComponentSwizzle::None}; - break; - case 0x1: - swizzles = {ComponentSwizzle::X, ComponentSwizzle::Y, - ComponentSwizzle::W, ComponentSwizzle::None}; - break; - case 0x2: - swizzles = {ComponentSwizzle::X, ComponentSwizzle::Z, - ComponentSwizzle::W, ComponentSwizzle::None}; - break; - case 0x3: - swizzles = {ComponentSwizzle::Y, ComponentSwizzle::Z, - ComponentSwizzle::W, ComponentSwizzle::None}; - break; - case 0x4: - swizzles = {ComponentSwizzle::X, ComponentSwizzle::Y, - ComponentSwizzle::Z, ComponentSwizzle::W}; - break; - default: - LOG_WARN(ShaderDecompiler, "Invalid write mask {:#x}", write_mask); - break; - } - } - + const auto component_mask = + COMPONENT_MASK_LUT[dst1 == RZ ? 0 : 1][write_mask]; + ASSERT_DEBUG(component_mask != 0, ShaderDecompiler, + "Invalid component mask"); CopyTextureResult(context.builder, {dst0 + 0, dst0 + 1, dst1 + 0, dst1 + 1}, - res, swizzles); + res, component_mask); if (conditional) context.builder.OpEndIf(); @@ -289,7 +208,7 @@ void EmitTextureSample(DecoderContext& context, pred_t pred, bool pred_inv, // TODO: ndv, nodep, dc, dst_pred, aoffi, lod, lc void EmitTextureSample2(DecoderContext& context, pred_t pred, bool pred_inv, bool int_coords, TextureDimension dim, reg_t dst, - u8 write_mask, reg_t src_a, reg_t src_b, + u8 component_mask, reg_t src_a, reg_t src_b, u32 cbuf_index) { (void)src_b; @@ -357,14 +276,7 @@ void EmitTextureSample2(DecoderContext& context, pred_t pred, bool pred_inv, ir::Value res = context.builder.OpTextureSample( cbuf_index, type, flags, array_index, coords_v, ir::Value::Undefined(), ir::Value::Undefined()); - - for (u8 i = 0; i < 4; i++) { - if ((write_mask & (1 << i)) == 0x0) - continue; - - context.builder.OpCopy(ir::Value::Register(dst++, ir::ScalarType::F32), - context.builder.OpVectorExtract(res, i)); - } + CopyTextureResult(context.builder, dst, res, component_mask); if (conditional) context.builder.OpEndIf(); @@ -372,7 +284,7 @@ void EmitTextureSample2(DecoderContext& context, pred_t pred, bool pred_inv, // TODO: dim, ndv, nodep, dc, offset, lc, dst_pred void EmitTextureGather(DecoderContext& context, pred_t pred, bool pred_inv, - TextureComponent component, reg_t dst, u8 write_mask, + TextureComponent component, reg_t dst, u8 component_mask, reg_t src_a, reg_t src_b, u32 cbuf_index) { // TODO: src B (void)src_b; @@ -385,12 +297,7 @@ void EmitTextureGather(DecoderContext& context, pred_t pred, bool pred_inv, ir::Value::Register(src_a + 1, ir::ScalarType::F32)}); const auto res = context.builder.OpTextureGather(cbuf_index, coords_v, ToTexComponent(component)); - CopyTextureResult( - context.builder, {dst + 0, dst + 1, dst + 2, dst + 3}, res, - {write_mask & 0x1 ? ComponentSwizzle::X : ComponentSwizzle::None, - write_mask & 0x2 ? ComponentSwizzle::Y : ComponentSwizzle::None, - write_mask & 0x4 ? ComponentSwizzle::Z : ComponentSwizzle::None, - write_mask & 0x8 ? ComponentSwizzle::W : ComponentSwizzle::None}); + CopyTextureResult(context.builder, dst, res, component_mask); if (conditional) context.builder.OpEndIf(); @@ -408,9 +315,7 @@ void EmitTextureGather2(DecoderContext& context, pred_t pred, bool pred_inv, const auto res = context.builder.OpTextureGather(cbuf_index, coords_v, ToTexComponent(component)); CopyTextureResult(context.builder, {dst0 + 0, dst0 + 1, dst1 + 0, dst1 + 1}, - res, - {ComponentSwizzle::X, ComponentSwizzle::Y, - ComponentSwizzle::Z, ComponentSwizzle::W}); + res, 0b1111); if (conditional) context.builder.OpEndIf(); diff --git a/src/core/hw/tegra_x1/gpu/renderer/shader_decompiler/ir/builder.hpp b/src/core/hw/tegra_x1/gpu/renderer/shader_decompiler/ir/builder.hpp index f7544407..26b951ae 100644 --- a/src/core/hw/tegra_x1/gpu/renderer/shader_decompiler/ir/builder.hpp +++ b/src/core/hw/tegra_x1/gpu/renderer/shader_decompiler/ir/builder.hpp @@ -17,10 +17,13 @@ class Builder { void OpCopy(const Value& dst, const Value& src) { AddInstructionWithDst(Opcode::Copy, dst, {src}); } - Value OpCast(const Value& src, ScalarType dst_type) { + Value OpCopy(const Value& src) { + return AddInstruction(Opcode::Copy, src.GetType(), {src}); + } + Value OpCast(const Value& src, Type dst_type) { return AddInstruction(Opcode::Cast, dst_type, {src}); } - Value OpBitCast(const Value& src, ScalarType dst_type) { + Value OpBitCast(const Value& src, Type dst_type) { return AddInstruction(Opcode::BitCast, dst_type, {src}); } diff --git a/src/core/hw/tegra_x1/gpu/renderer/shader_decompiler/ir/type.cpp b/src/core/hw/tegra_x1/gpu/renderer/shader_decompiler/ir/type.cpp new file mode 100644 index 00000000..bb61ab87 --- /dev/null +++ b/src/core/hw/tegra_x1/gpu/renderer/shader_decompiler/ir/type.cpp @@ -0,0 +1,78 @@ +#include "core/hw/tegra_x1/gpu/renderer/shader_decompiler/ir/type.hpp" + +namespace hydra::hw::tegra_x1::gpu::renderer::shader_decomp::ir { + +ScalarType ScalarSignedEquivalent(ScalarType scalar) { + ASSERT_THROWING_DEBUG(ScalarIsUnsignedInteger(scalar), ShaderDecompiler, + TypeError::NotUnsigned, + "Type {} is not an unsigned integer", scalar); + switch (scalar) { + case ScalarType::U8: + return ScalarType::I8; + case ScalarType::U16: + return ScalarType::I16; + case ScalarType::U32: + return ScalarType::I32; + default: + unreachable(); + } +} + +ScalarType ScalarUnsignedEquivalent(ScalarType scalar) { + ASSERT_THROWING_DEBUG(ScalarIsSignedInteger(scalar), ShaderDecompiler, + TypeError::NotSigned, + "Type {} is not a signed integer", scalar); + switch (scalar) { + case ScalarType::I8: + return ScalarType::U8; + case ScalarType::I16: + return ScalarType::U16; + case ScalarType::I32: + return ScalarType::U32; + default: + unreachable(); + } +} + +ScalarType Type::GetScalarType() const { + ASSERT_THROWING_DEBUG(IsScalar(), ShaderDecompiler, TypeError::NotAScalar, + "Type {} is not a scalar", *this); + return scalar; +} + +VectorType Type::GetVectorType() const { + ASSERT_THROWING_DEBUG(IsVector(), ShaderDecompiler, TypeError::NotAVector, + "Type {} is not a vector", *this); + return vector; +} + +// Type creation +Type Type::SignedEquivalent() const { + ASSERT_THROWING_DEBUG(IsInteger(), ShaderDecompiler, + TypeError::NotAnInteger, "Type {} is not an integer", + *this); + switch (kind) { + case TypeKind::Scalar: + return ScalarSignedEquivalent(scalar); + case TypeKind::Vector: + return vector.SignedEquivalent(); + default: + unreachable(); + } +} + +Type Type::UnsignedEquivalent() const { + ASSERT_THROWING_DEBUG(IsInteger(), ShaderDecompiler, + TypeError::NotAnInteger, "Type {} is not an integer", + *this); + switch (kind) { + case TypeKind::Scalar: + return ScalarUnsignedEquivalent(scalar); + case TypeKind::Vector: + return vector.UnsignedEquivalent(); + default: + unreachable(); + } +} + +} // namespace hydra::hw::tegra_x1::gpu::renderer::shader_decomp::ir diff --git a/src/core/hw/tegra_x1/gpu/renderer/shader_decompiler/ir/type.hpp b/src/core/hw/tegra_x1/gpu/renderer/shader_decompiler/ir/type.hpp index c84ebe1d..c27c629b 100644 --- a/src/core/hw/tegra_x1/gpu/renderer/shader_decompiler/ir/type.hpp +++ b/src/core/hw/tegra_x1/gpu/renderer/shader_decompiler/ir/type.hpp @@ -23,6 +23,16 @@ enum class ScalarType { F32, }; +enum class TypeError { + NotAScalar, + NotAVector, + NotABoolean, + NotAnInteger, + NotAFloatingPoint, + NotSigned, + NotUnsigned, +}; + inline bool ScalarIsInteger(ScalarType scalar) { switch (scalar) { case ScalarType::U8: @@ -48,6 +58,17 @@ inline bool ScalarIsSignedInteger(ScalarType scalar) { } } +inline bool ScalarIsUnsignedInteger(ScalarType scalar) { + switch (scalar) { + case ScalarType::U8: + case ScalarType::U16: + case ScalarType::U32: + return true; + default: + return false; + } +} + inline bool ScalarIsFloatingPoint(ScalarType scalar) { switch (scalar) { case ScalarType::F16: @@ -58,6 +79,9 @@ inline bool ScalarIsFloatingPoint(ScalarType scalar) { } } +ScalarType ScalarSignedEquivalent(ScalarType scalar); +ScalarType ScalarUnsignedEquivalent(ScalarType scalar); + class VectorType { public: VectorType(ScalarType element_type, u8 size) @@ -70,7 +94,16 @@ class VectorType { // Check bool IsInteger() const { return ScalarIsInteger(element_type); } bool IsSignedInteger() const { return ScalarIsSignedInteger(element_type); } + bool IsUnsignedInteger() const { + return ScalarIsUnsignedInteger(element_type); + } bool IsFloatingPoint() const { return ScalarIsFloatingPoint(element_type); } + VectorType SignedEquivalent() const { + return VectorType(ScalarSignedEquivalent(element_type), size); + } + VectorType UnsignedEquivalent() const { + return VectorType(ScalarUnsignedEquivalent(element_type), size); + } private: ScalarType element_type; @@ -134,6 +167,17 @@ class Type { } } + bool IsUnsignedInteger() const { + switch (kind) { + case TypeKind::Scalar: + return ScalarIsUnsignedInteger(scalar); + case TypeKind::Vector: + return vector.IsUnsignedInteger(); + default: + return false; + } + } + bool IsFloatingPoint() const { switch (kind) { case TypeKind::Scalar: @@ -146,15 +190,12 @@ class Type { } // Get - ScalarType GetScalarType() const { - ASSERT_DEBUG(IsScalar(), ShaderDecompiler, "Type is not a scalar"); - return scalar; - } + ScalarType GetScalarType() const; + VectorType GetVectorType() const; - VectorType GetVectorType() const { - ASSERT_DEBUG(IsVector(), ShaderDecompiler, "Type is not a vector"); - return vector; - } + // Type creation + Type SignedEquivalent() const; + Type UnsignedEquivalent() const; private: TypeKind kind; diff --git a/src/core/hw/tegra_x1/gpu/renderer/surface_compositor.hpp b/src/core/hw/tegra_x1/gpu/renderer/surface_compositor.hpp new file mode 100644 index 00000000..77efb005 --- /dev/null +++ b/src/core/hw/tegra_x1/gpu/renderer/surface_compositor.hpp @@ -0,0 +1,22 @@ +#pragma once + +#include "core/hw/tegra_x1/gpu/renderer/const.hpp" + +namespace hydra::hw::tegra_x1::gpu::renderer { + +class ICommandBuffer; +class TextureBase; + +class ISurfaceCompositor { + public: + virtual ~ISurfaceCompositor() = default; + + virtual void DrawTexture(ICommandBuffer* command_buffer, + const TextureBase* texture, + const FloatRect2D src_rect, + const FloatRect2D dst_rect, bool transparent, + f32 opacity = 1.0f) = 0; + virtual void Present(ICommandBuffer* command_buffer) = 0; +}; + +} // namespace hydra::hw::tegra_x1::gpu::renderer diff --git a/src/core/hw/tegra_x1/gpu/renderer/texture_base.hpp b/src/core/hw/tegra_x1/gpu/renderer/texture_base.hpp index 60e3fc76..bc47fabf 100644 --- a/src/core/hw/tegra_x1/gpu/renderer/texture_base.hpp +++ b/src/core/hw/tegra_x1/gpu/renderer/texture_base.hpp @@ -4,6 +4,7 @@ namespace hydra::hw::tegra_x1::gpu::renderer { +class ICommandBuffer; class BufferBase; class TextureBase { @@ -17,21 +18,21 @@ class TextureBase { // Copying virtual void CopyFrom(const uptr data) = 0; - virtual void CopyFrom(const BufferBase* src, const usize src_stride, - const u32 dst_layer, const uint3 dst_origin, + virtual void CopyFrom(ICommandBuffer* command_buffer, const BufferBase* src, + const usize src_stride, const uint3 dst_origin, const usize3 size) = 0; - void CopyFrom(const BufferBase* src) { - CopyFrom(src, descriptor.stride, 0, uint3({0, 0, 0}), + void CopyFrom(ICommandBuffer* command_buffer, const BufferBase* src) { + CopyFrom(command_buffer, src, descriptor.stride, uint3({0, 0, 0}), usize3({descriptor.width, descriptor.height, 1})); } - virtual void CopyFrom(const TextureBase* src, const u32 src_layer, - const uint3 src_origin, const u32 dst_layer, + virtual void CopyFrom(ICommandBuffer* command_buffer, + const TextureBase* src, const uint3 src_origin, const uint3 dst_origin, const usize3 size) = 0; // Blitting - virtual void BlitFrom(const TextureBase* src, const u32 src_layer, - const float3 src_origin, const usize3 src_size, - const u32 dst_layer, const float3 dst_origin, + virtual void BlitFrom(ICommandBuffer* command_buffer, + const TextureBase* src, const float3 src_origin, + const usize3 src_size, const float3 dst_origin, const usize3 dst_size) = 0; // Getters diff --git a/src/core/hw/tegra_x1/gpu/renderer/texture_cache.cpp b/src/core/hw/tegra_x1/gpu/renderer/texture_cache.cpp index fdf8d4fa..cf68f8cd 100644 --- a/src/core/hw/tegra_x1/gpu/renderer/texture_cache.cpp +++ b/src/core/hw/tegra_x1/gpu/renderer/texture_cache.cpp @@ -1,6 +1,5 @@ #include "core/hw/tegra_x1/gpu/renderer/texture_cache.hpp" -#include "common/time.hpp" #include "core/hw/tegra_x1/gpu/gpu.hpp" #include "core/hw/tegra_x1/gpu/renderer/buffer_base.hpp" #include "core/hw/tegra_x1/gpu/renderer/const.hpp" @@ -9,128 +8,347 @@ namespace hydra::hw::tegra_x1::gpu::renderer { TextureCache::~TextureCache() { - // TODO: uncomment - /* - for (auto& [key, value] : texture_mem_map) { - for (auto& [key, value] : value.cache) { - delete value.base; - for (auto& [key, value] : value.view_cache) - delete value; + for (auto& [key, mem] : entries) { + for (const auto& [key, sparse_tex] : mem.cache) { + for (const auto& [key, group] : sparse_tex.cache) { + delete group.base; + for (const auto& [key, view] : group.view_cache) + delete view; + } } } - */ } -TextureBase* TextureCache::GetTextureView(const TextureDescriptor& descriptor, - TextureUsage usage) { - auto& texture_mem = texture_mem_map[descriptor.ptr]; - auto& tex = texture_mem.cache.Find(GetTextureHash(descriptor)); - if (!tex.base) - Create(descriptor, tex, texture_mem.info); +TextureBase* TextureCache::Find(ICommandBuffer* command_buffer, + const TextureDescriptor& descriptor, + TextureUsage usage) { + const auto range = descriptor.GetRange(); - Update(tex, texture_mem.info, usage); + // Check for containing interval + auto it = entries.upper_bound(range.GetBegin()); + if (it != entries.begin()) { + auto prev = std::prev(it); + auto& prev_mem = prev->second; + if (prev_mem.range.GetEnd() >= range.GetEnd()) { + // Fully contained + return AddToMemory(command_buffer, prev_mem, descriptor, usage); + } + } - // If the formats match and swizzle is the default swizzle, return base - if (descriptor.format == tex.base->GetDescriptor().format && - descriptor.swizzle_channels == - tex.base->GetDescriptor().swizzle_channels) { - return tex.base; + // Insert and merge + TextureMem mem{.range = range}; + + it = entries.lower_bound(range.GetBegin()); + + // Merge with previous if overlapping + if (it != entries.begin()) { + auto prev = std::prev(it); + const auto& prev_mem = prev->second; + if (prev_mem.range.GetEnd() > mem.range.GetBegin()) { + mem = MergeMemories(mem, prev_mem); + it = entries.erase(prev); + } } - // Otherwise, get a texture view - auto view_desc = TextureViewDescriptor{ - .format = descriptor.format, - .swizzle_channels = descriptor.swizzle_channels, + // Merge with following entries + while (it != entries.end() && it->first < mem.range.GetEnd()) { + const auto& crnt_mem = it->second; + mem = MergeMemories(mem, crnt_mem); + it = entries.erase(it); + } + + // Insert merged interval + auto inserted = entries.emplace(mem.range.GetBegin(), mem); + return AddToMemory(command_buffer, inserted.first->second, descriptor, + usage); +} + +void TextureCache::InvalidateMemory(Range range) { + auto it = entries.upper_bound(range.GetBegin()); + if (it != entries.begin()) + it--; + + for (; it != entries.end() && it->first < range.GetEnd(); it++) { + auto& mem = it->second; + + // We assume that textures that have been written to by the GPU are + // never modified by the CPU + if (mem.info.written_timestamp != TextureCacheTimePoint{}) + continue; + + // Check if its in the range + if (mem.range.GetEnd() > range.GetBegin()) + mem.info.MarkModified(); + } +} + +TextureMem TextureCache::MergeMemories(const TextureMem& a, + const TextureMem& b) { + TextureMem res; + res.range = a.range.Union(b.range); + res.info = { + .modified_timestamp = + std::max(a.info.modified_timestamp, b.info.modified_timestamp), + .read_timestamp = + std::max(a.info.read_timestamp, b.info.read_timestamp), + .written_timestamp = + std::max(a.info.written_timestamp, b.info.written_timestamp), }; - auto& view = tex.view_cache.Find(view_desc.GetHash()); - if (view) - return view; - view = tex.base->CreateView(view_desc); + // HACK + for (const auto& [key, tex] : const_cast(a).cache) + res.cache.Add(key, tex); - return view; + for (const auto& [key, tex] : const_cast(b).cache) + res.cache.Add(key, tex); + + return res; } -void TextureCache::NotifyGuestModifiedData(const range mem_range) { - // TODO: mark all overlapping memories as modified - auto it = texture_mem_map.find(mem_range.begin); - if (it == texture_mem_map.end()) - return; +TextureBase* TextureCache::AddToMemory(ICommandBuffer* command_buffer, + TextureMem& mem, + const TextureDescriptor& descriptor, + TextureUsage usage) { + const auto range = descriptor.GetRange(); + const auto layer_size = descriptor.GetLayerSizeInBytes(); + + // Check if it is a new entry + auto sparse_tex_opt = mem.cache.Find(descriptor.GetHash()); + if (!sparse_tex_opt.has_value()) { + auto& sparse_tex = mem.cache.Add(descriptor.GetHash()); + auto& group = sparse_tex.cache.Add(descriptor.ptr); + return GetTexture(command_buffer, group, mem, descriptor, usage); + } + + auto& sparse_tex = **sparse_tex_opt; + + // Check if it is just a view with smaller layer count + auto group_opt = sparse_tex.cache.Find(descriptor.ptr); + if (group_opt) { + auto& group = **group_opt; + if (group.base->GetDescriptor().GetRange().Contains(range)) + return GetTexture(command_buffer, group, mem, descriptor, usage); + else + sparse_tex.cache.Remove(descriptor.ptr); + } + + // Check if it is a proper layer view + for (const auto& [key, group] : sparse_tex.cache) { + if (group.base->GetDescriptor().GetRange().Contains(range)) { + const auto offset = static_cast( + range.GetBegin() - group.base->GetDescriptor().ptr); + ASSERT_ALIGNMENT_DEBUG(offset, layer_size, Gpu, + "texture view offset"); + const auto layers = + Range::FromSize(offset / layer_size, descriptor.depth); + return GetTextureView( + group, TextureViewDescriptor(descriptor.format, + descriptor.swizzle_channels, + Range(0, 1), layers)); + } + } + + // HACK: create a new texture + auto& group = sparse_tex.cache.Add(descriptor.ptr); + return GetTexture(command_buffer, group, mem, descriptor, usage); + + /* + // Create a new entry and merge it with others + auto new_range = range; + std::vector removed_textures; + for (auto it = sparse_tex.cache.begin(); it != sparse_tex.cache.end();) { + const auto& group = (*it).second; + const auto crnt_range = group.base->GetDescriptor().GetRange(); + if (crnt_range.Intersects(range)) { + // If the texture pointer difference is a multiple of the layer + // size, merge the ranges + const auto diff = + (new_range.GetBegin() > crnt_range.GetBegin() + ? new_range.GetBegin() - crnt_range.GetBegin() + : crnt_range.GetBegin() - new_range.GetBegin()); + if (diff % layer_size == 0) { + new_range = new_range.Union(crnt_range); + removed_textures.push_back(group.base); + // TODO: queue for deletion + it = sparse_tex.cache.Remove(it); + } else { + LOG_WARN(Gpu, "Merging {:#x} with {:#x}", new_range, + crnt_range); + LOG_WARN( + Gpu, + "[TEX 1] Ptr: {:#x}, format: {}, width: {}, height: {}, " + "depth: {}, stride: {:#x}", + descriptor.ptr, descriptor.format, descriptor.width, + descriptor.height, descriptor.depth, descriptor.stride); + LOG_WARN( + Gpu, + "[TEX 2] Ptr: {:#x}, format: {}, width: {}, height: {}, " + "depth: {}, stride: {:#x}", + group.base->GetDescriptor().ptr, + group.base->GetDescriptor().format, + group.base->GetDescriptor().width, + group.base->GetDescriptor().height, + group.base->GetDescriptor().depth, + group.base->GetDescriptor().stride); + ++it; + } + } else { + ++it; + } + } + + // Create new group + auto new_descriptor = descriptor; + new_descriptor.ptr = new_range.GetBegin(); + ASSERT_ALIGNMENT_DEBUG(new_range.GetSize(), layer_size, Gpu, + "merged range"); + new_descriptor.depth = static_cast(new_range.GetSize() / layer_size); + auto& group = sparse_tex.cache.Add(new_descriptor.ptr); + auto new_tex = GetTexture(group, mem.info, new_descriptor, usage); - it->second.info.MarkModified(); + LOG_INFO(Gpu, "ADDED GROUP {:#x} AT {:#x} TO SPARSE TEX {:#x}", + (u64)(&group), new_descriptor.ptr, (u64)(&sparse_tex)); + + // Copy the old textures to the new one + for (const auto tex : removed_textures) { + const auto offset = + static_cast(tex->GetDescriptor().ptr - new_range.GetBegin()); + ASSERT_ALIGNMENT_DEBUG(offset, layer_size, Gpu, + "removed texture offset"); + // TODO: make sure the formats match + new_tex->CopyFrom( + tex, 0, uint3({0, 0, 0}), offset / layer_size, uint3({0, 0, 0}), + usize3({descriptor.width, descriptor.height, descriptor.depth})); + } + + // TODO: return a view + return new_tex; + */ +} + +TextureBase* TextureCache::GetTexture(ICommandBuffer* command_buffer, + TextureGroup& group, TextureMem& mem, + const TextureDescriptor& descriptor, + TextureUsage usage) { + if (!group.base) + Create(command_buffer, descriptor, group); + + Update(command_buffer, group, mem, usage); + + // If the formats match and swizzle is the default swizzle, + // return base + if (descriptor.format == group.base->GetDescriptor().format && + descriptor.swizzle_channels == SwizzleChannels()) { + return group.base; + } + + // Otherwise, get a texture view + auto view_desc = TextureViewDescriptor( + descriptor.format, descriptor.swizzle_channels, Range(0, 1), + Range(0, descriptor.depth)); + return GetTextureView(group, view_desc); } -void TextureCache::Create(const TextureDescriptor& descriptor, Tex& tex, - TextureMemInfo& info) { +TextureBase* +TextureCache::GetTextureView(TextureGroup& group, + const TextureViewDescriptor& descriptor) { + auto view_opt = group.view_cache.Find(descriptor.GetHash()); + if (view_opt.has_value()) + return **view_opt; + + auto view = group.base->CreateView(descriptor); + group.view_cache.Add(descriptor.GetHash(), view); + return view; +} + +void TextureCache::Create(ICommandBuffer* command_buffer, + const TextureDescriptor& descriptor, + TextureGroup& group) { auto desc = descriptor; desc.swizzle_channels = get_texture_format_default_swizzle_channels(desc.format); - tex.base = RENDERER_INSTANCE.CreateTexture(desc); - DecodeTexture(tex, info); + group.base = RENDERER_INSTANCE.CreateTexture(desc); + DecodeTexture(command_buffer, group); } -void TextureCache::Update(Tex& tex, TextureMemInfo& info, TextureUsage usage) { +void TextureCache::Update(ICommandBuffer* command_buffer, TextureGroup& group, + TextureMem& mem, TextureUsage usage) { bool sync = false; - bool update_data_hash = true; - if (tex.cpu_sync_timestamp < info.modified_timestamp) { + if (group.update_timestamp < mem.info.modified_timestamp) { // If modified by the guest sync = true; - } else if (tex.data_hash != info.data_hash.hash) { - // Data changed, but this texture hasn't been updated yet - sync = true; - } else if (info.written_timestamp == TextureCacheTimePoint{}) { + } else if (group.update_timestamp < mem.info.written_timestamp) { + // Other textures in this memory changed, let's copy them + const auto base = group.base; + const auto& descriptor = base->GetDescriptor(); + const auto range = descriptor.GetRange(); + const auto layer_size = descriptor.GetLayerSizeInBytes(); + for (const auto& [key, sparse_tex] : mem.cache) { + // TODO: skip this sparse texture + + for (const auto& [key, other_group] : sparse_tex.cache) { + const auto other_base = other_group.base; + const auto& other_descriptor = other_base->GetDescriptor(); + const auto other_range = other_descriptor.GetRange(); + + // Check if the textures can actually be copied + if (other_descriptor.width != descriptor.width || + other_descriptor.height != descriptor.height || + other_descriptor.stride != descriptor.stride) + continue; + + if (range.Intersects(other_range)) { + const auto copy_range = range.ClampedTo(other_range); + const auto dst_offset = + copy_range.GetBegin() - range.GetBegin(); + + // Check if the textures are aligned properly + if (dst_offset % layer_size != 0x0) + continue; + + // Now copy + const auto src_layer = static_cast( + (copy_range.GetBegin() - other_range.GetBegin()) / + layer_size); + const auto dst_layer = + static_cast(dst_offset / layer_size); + const auto layer_count = + static_cast(copy_range.GetSize() / layer_size); + + // TODO: make sure the formats match + base->CopyFrom(command_buffer, other_base, + uint3({0, 0, src_layer}), + uint3({0, 0, dst_layer}), + usize3({descriptor.width, descriptor.height, + layer_count})); + } + } + } + + group.MarkUpdated(); + } else if (mem.info.written_timestamp == TextureCacheTimePoint{}) { // Never written to if (usage == TextureUsage::Present) { - // Presenting, but never written to + // Presented, but never written to sync = true; } else if (usage == TextureUsage::Read) { // Read, but never written to - - // Check if the data hash needs to be checked - auto& data_hash = info.data_hash; - if (data_hash.ShouldCheck()) { - u32 data_hash = GetTextureDataHash(tex.base); - if (data_hash != info.data_hash.hash) { - sync = true; - update_data_hash = false; - info.data_hash.Update(data_hash); - } else { - info.data_hash.NotifyNotChanged(); - } - } else if (data_hash.check_success_rate >= - DataHash::MIN_SUCCESS_RATE) { - // If there is a high chance that the data has changed - sync = true; - update_data_hash = false; - } } } if (sync) - DecodeTexture(tex, info, update_data_hash); + DecodeTexture(command_buffer, group); if (usage == TextureUsage::Read) - info.MarkRead(); + mem.info.MarkRead(); else if (usage == TextureUsage::Write) - info.MarkWritten(); -} - -u32 TextureCache::GetTextureHash(const TextureDescriptor& descriptor) { - HashCode hash; - hash.Add(descriptor.ptr); - hash.Add(descriptor.stride); // TODO: extend the width instead - hash.Add(descriptor.height); - - // View compatbility hash - // TODO: get format info from the renderer instead - hash.Add(is_texture_format_compressed(descriptor.format)); - hash.Add(is_texture_format_depth_or_stencil(descriptor.format)); - hash.Add(get_texture_format_stride(descriptor.format, 16)); + mem.info.MarkWritten(); - return hash.ToHashCode(); + if (usage == TextureUsage::Write || sync) + group.MarkUpdated(); } -u32 TextureCache::GetTextureDataHash(const TextureBase* texture) { +u32 TextureCache::GetDataHash(const TextureBase* texture) { constexpr u32 SAMPLE_COUNT = 37; const auto& descriptor = texture->GetDescriptor(); @@ -144,22 +362,16 @@ u32 TextureCache::GetTextureDataHash(const TextureBase* texture) { return hash.ToHashCode(); } -void TextureCache::DecodeTexture(Tex& tex, TextureMemInfo& info, - bool update_data_hash) { - const auto& descriptor = tex.base->GetDescriptor(); +void TextureCache::DecodeTexture(ICommandBuffer* command_buffer, + TextureGroup& group) { + const auto& descriptor = group.base->GetDescriptor(); // Align the height to 16 bytes (TODO: why 16?) auto tmp_buffer = RENDERER_INSTANCE.AllocateTemporaryBuffer( - descriptor.stride * align(descriptor.height, 16u)); - texture_decoder.Decode(descriptor, (u8*)tmp_buffer->GetDescriptor().ptr); - tex.base->CopyFrom(tmp_buffer); + descriptor.depth * align(descriptor.height, 16u) * descriptor.stride); + texture_decoder.Decode(descriptor, (u8*)tmp_buffer->GetPtr()); + group.base->CopyFrom(command_buffer, tmp_buffer); RENDERER_INSTANCE.FreeTemporaryBuffer(tmp_buffer); - - // Update metadata - tex.MarkCpuSynced(); - if (update_data_hash) - info.data_hash.Update(GetTextureDataHash(tex.base)); - tex.data_hash = info.data_hash.hash; } } // namespace hydra::hw::tegra_x1::gpu::renderer diff --git a/src/core/hw/tegra_x1/gpu/renderer/texture_cache.hpp b/src/core/hw/tegra_x1/gpu/renderer/texture_cache.hpp index 99fc2523..3d079ab3 100644 --- a/src/core/hw/tegra_x1/gpu/renderer/texture_cache.hpp +++ b/src/core/hw/tegra_x1/gpu/renderer/texture_cache.hpp @@ -8,59 +8,28 @@ class IMmu; namespace hydra::hw::tegra_x1::gpu::renderer { +class ICommandBuffer; class TextureBase; typedef std::chrono::steady_clock TextureCacheClock; typedef TextureCacheClock::time_point TextureCacheTimePoint; -struct Tex { +struct TextureGroup { TextureBase* base{nullptr}; - small_cache view_cache; - TextureCacheTimePoint cpu_sync_timestamp{}; - u32 data_hash{0}; + SmallCache view_cache; + TextureCacheTimePoint update_timestamp{}; - void MarkCpuSynced() { cpu_sync_timestamp = TextureCacheClock::now(); } + void MarkUpdated() { update_timestamp = TextureCacheClock::now(); } }; -struct DataHash { - static constexpr f32 MIN_SUCCESS_RATE = 0.2f; - static constexpr f32 MIX_AMOUNT = 0.1f; - - u32 hash{0}; - TextureCacheTimePoint updated_timestamp{}; - f32 check_success_rate{MIN_SUCCESS_RATE}; - - bool ShouldCheck() const { - // The farther away the success rate is from MIN_SUCCESS_RATE, the - // longer the check interval - f32 amount; - if (check_success_rate >= MIN_SUCCESS_RATE) - amount = (check_success_rate - MIN_SUCCESS_RATE) / - (1.0f - MIN_SUCCESS_RATE); - else - amount = (MIN_SUCCESS_RATE - check_success_rate) / MIN_SUCCESS_RATE; - f32 amount_squared = amount * amount; - std::chrono::milliseconds check_interval = - std::chrono::milliseconds(static_cast(amount_squared * 10000)); - - return TextureCacheClock::now() > updated_timestamp + check_interval; - } - - void Update(u32 hash_) { - hash = hash_; - updated_timestamp = TextureCacheClock::now(); - check_success_rate = - check_success_rate * (1.0f - MIX_AMOUNT) + MIX_AMOUNT; - } - - void NotifyNotChanged() { check_success_rate *= 1.0f - MIX_AMOUNT; } +struct SparseTexture { + SmallCache cache; }; struct TextureMemInfo { TextureCacheTimePoint modified_timestamp{}; TextureCacheTimePoint read_timestamp{}; TextureCacheTimePoint written_timestamp{}; - DataHash data_hash{}; void MarkModified() { modified_timestamp = TextureCacheClock::now(); } void MarkRead() { read_timestamp = TextureCacheClock::now(); } @@ -68,36 +37,50 @@ struct TextureMemInfo { }; struct TextureMem { + Range range; TextureMemInfo info; - small_cache cache; + SmallCache cache; }; -// TODO: track GPU modifications as well? +// TODO: destroy textures +// TODO: texture readback class TextureCache { public: ~TextureCache(); - TextureBase* GetTextureView(const TextureDescriptor& descriptor, - TextureUsage usage); + TextureBase* Find(ICommandBuffer* command_buffer, + const TextureDescriptor& descriptor, TextureUsage usage); - void NotifyGuestModifiedData(const range mem_range); + void InvalidateMemory(Range range); private: + std::mutex mutex; TextureDecoder texture_decoder; - // TODO: use a more memory lookup friendly data structure - std::map texture_mem_map; - - void Create(const TextureDescriptor& descriptor, Tex& tex, - TextureMemInfo& info); - void Update(Tex& tex, TextureMemInfo& info, TextureUsage usage); + std::map entries; + + TextureMem MergeMemories(const TextureMem& a, const TextureMem& b); + TextureBase* AddToMemory(ICommandBuffer* command_buffer, TextureMem& mem, + const TextureDescriptor& descriptor, + TextureUsage usage); + TextureBase* GetTexture(ICommandBuffer* command_buffer, TextureGroup& group, + TextureMem& mem, + const TextureDescriptor& descriptor, + TextureUsage usage); + TextureBase* GetTextureView(TextureGroup& group, + const TextureViewDescriptor& descriptor); + void Create(ICommandBuffer* command_buffer, + const TextureDescriptor& descriptor, TextureGroup& group); + void Update(ICommandBuffer* command_buffer, TextureGroup& group, + TextureMem& mem, TextureUsage usage); // Helpers - u32 GetTextureHash(const TextureDescriptor& descriptor); - u32 GetTextureDataHash(const TextureBase* texture); - void DecodeTexture(Tex& tex, TextureMemInfo& info, - bool update_data_hash = true); + u32 GetDataHash(const TextureBase* texture); + void DecodeTexture(ICommandBuffer* command_buffer, TextureGroup& group); // TODO: encode texture + + public: + REF_GETTER(mutex, GetMutex); }; } // namespace hydra::hw::tegra_x1::gpu::renderer diff --git a/src/core/hw/tegra_x1/gpu/renderer/texture_decoder.cpp b/src/core/hw/tegra_x1/gpu/renderer/texture_decoder.cpp index ec9267e2..3c4ade5a 100644 --- a/src/core/hw/tegra_x1/gpu/renderer/texture_decoder.cpp +++ b/src/core/hw/tegra_x1/gpu/renderer/texture_decoder.cpp @@ -14,10 +14,12 @@ void TextureDecoder::Decode(const TextureDescriptor& descriptor, u8* out_data) { switch (descriptor.kind) { case NvKind::Pitch: case NvKind::PitchNoSwizzle: - std::memcpy(out_data, in_data, descriptor.stride * descriptor.height); + std::memcpy(out_data, in_data, + descriptor.depth * descriptor.height * descriptor.stride); break; default: - decode_generic_16bx2(descriptor.stride, descriptor.height, + decode_generic_16bx2(descriptor.stride, + descriptor.depth * descriptor.height, descriptor.block_height_log2, in_data, out_data); break; } diff --git a/src/frontend/swiftui/Api.swift b/src/frontend/swiftui/Api.swift index e71f173b..53749fbb 100644 --- a/src/frontend/swiftui/Api.swift +++ b/src/frontend/swiftui/Api.swift @@ -402,6 +402,10 @@ func hydraConfigGetProcessArgs() -> HydraStringList { HydraStringList(handle: hydra_config_get_process_args()) } +func hydraConfigGetRecoverFromSegfault() -> UnsafeMutablePointer { + hydra_config_get_recover_from_segfault() +} + func hydraConfigGetGdbEnabled() -> UnsafeMutablePointer { hydra_config_get_gdb_enabled() } diff --git a/src/frontend/swiftui/Settings/DebugSettingsView.swift b/src/frontend/swiftui/Settings/DebugSettingsView.swift index 5825d1b6..bcd27d59 100644 --- a/src/frontend/swiftui/Settings/DebugSettingsView.swift +++ b/src/frontend/swiftui/Settings/DebugSettingsView.swift @@ -4,7 +4,11 @@ struct DebugSettingsView: View { @State private var logOutput: HydraLogOutput = HYDRA_LOG_OUTPUT_INVALID @State private var logFsAccess = false @State private var debugLogging = false + // TODO: process args + + @State private var recoverFromSegfault = false + @State private var gdbEnabled = false @State private var gdbPort: UInt16 = 0 @State private var gdbWaitForClient = false @@ -37,7 +41,16 @@ struct DebugSettingsView: View { hydraConfigGetDebugLogging().pointee = newValue } } + // TODO: process arguments + + Section("Error handling") { + Toggle("Recover from segfault", isOn: self.$recoverFromSegfault) + .onChange(of: self.recoverFromSegfault) { _, newValue in + hydraConfigGetRecoverFromSegfault().pointee = newValue + } + } + Section("GDB") { Toggle("Enabled", isOn: self.$gdbEnabled) .onChange(of: self.gdbEnabled) { _, newValue in @@ -58,6 +71,9 @@ struct DebugSettingsView: View { self.logOutput.rawValue = hydraConfigGetLogOutput().pointee self.logFsAccess = hydraConfigGetLogFsAccess().pointee self.debugLogging = hydraConfigGetDebugLogging().pointee + + self.recoverFromSegfault = hydraConfigGetRecoverFromSegfault().pointee + self.gdbEnabled = hydraConfigGetGdbEnabled().pointee self.gdbPort = hydraConfigGetGdbPort().pointee self.gdbWaitForClient = hydraConfigGetGdbWaitForClient().pointee