From 73deba690ea9b772afd17007e6b5ec574c52818b Mon Sep 17 00:00:00 2001 From: trigger Date: Thu, 10 Jun 2021 22:46:29 -0700 Subject: [PATCH] Reimplement RSX reservation access https://github.com/RPCS3/rpcs3/pull/8983 --- rpcs3/Emu/Cell/SPUThread.cpp | 90 +++++++++++++++++++++++-------- rpcs3/Emu/RSX/RSXThread.cpp | 35 ++++++++---- rpcs3/Emu/RSX/RSXThread.h | 3 +- rpcs3/Emu/RSX/rsx_methods.cpp | 21 +++++++- rpcs3/Emu/System.h | 1 + rpcs3/rpcs3qt/emu_settings.h | 2 + rpcs3/rpcs3qt/settings_dialog.cpp | 3 ++ rpcs3/rpcs3qt/settings_dialog.ui | 7 +++ rpcs3/rpcs3qt/tooltips.h | 1 + 9 files changed, 128 insertions(+), 35 deletions(-) diff --git a/rpcs3/Emu/Cell/SPUThread.cpp b/rpcs3/Emu/Cell/SPUThread.cpp index fa56b6c6f3d3..e3975b952e39 100644 --- a/rpcs3/Emu/Cell/SPUThread.cpp +++ b/rpcs3/Emu/Cell/SPUThread.cpp @@ -6,6 +6,7 @@ #include "Emu/System.h" #include "Emu/IdManager.h" +#include "Emu/RSX/RSXThread.h" #include "Emu/Cell/PPUThread.h" #include "Emu/Cell/ErrorCodes.h" #include "Emu/Cell/lv2/sys_spu.h" @@ -68,6 +69,26 @@ static FORCE_INLINE void mov_rdata(decltype(spu_thread::rdata)& dst, const declt } } +// Returns nullptr if rsx does not need pausing on reservations op, rsx ptr otherwise +static FORCE_INLINE rsx::thread* get_rsx_if_needs_res_pause(u32 addr) +{ + if (!g_cfg.core.rsx_accurate_res_access) [[likely]] + { + return {}; + } + + const auto render = rsx::get_current_renderer(); + + ASSUME(render); + + if (render->iomap_table.io[addr >> 20] == -1) [[likely]] + { + return {}; + } + + return render; +} + extern u64 get_timebased_time(); extern u64 get_system_time(); @@ -1673,12 +1694,20 @@ void spu_thread::do_putlluc(const spu_mfc_cmd& args) if (g_cfg.core.spu_accurate_putlluc) { - // Full lock (heavyweight) - // TODO: vm::check_addr + const auto render = get_rsx_if_needs_res_pause(addr); + + if (render) render->pause(); + auto& super_data = *vm::get_super_ptr(addr); - vm::writer_lock lock(addr); - mov_rdata(super_data, to_write); - res.release(res.load() + 127); + { + // Full lock (heavyweight) + // TODO: vm::check_addr + vm::writer_lock lock(addr); + mov_rdata(super_data, to_write); + res.release(res.load() + 127); + } + + if (render) render->unpause(); } else { @@ -1886,15 +1915,23 @@ bool spu_thread::process_mfc_cmd() if (g_cfg.core.spu_accurate_getllar) { *reinterpret_cast*>(&data) += 0; + + const auto render = get_rsx_if_needs_res_pause(addr); + + if (render) render->pause(); + const auto& super_data = *vm::get_super_ptr(addr); + { + // Full lock (heavyweight) + // TODO: vm::check_addr + vm::writer_lock lock(addr); - // Full lock (heavyweight) - // TODO: vm::check_addr - vm::writer_lock lock(addr); + ntime = old_time; + mov_rdata(dst, super_data); + res.release(old_time); + } - ntime = old_time; - mov_rdata(dst, super_data); - res.release(old_time); + if (render) render->unpause(); } else { @@ -1987,22 +2024,29 @@ bool spu_thread::process_mfc_cmd() { *reinterpret_cast*>(&data) += 0; - auto& super_data = *vm::get_super_ptr(addr); + const auto render = get_rsx_if_needs_res_pause(addr); - // Full lock (heavyweight) - // TODO: vm::check_addr - vm::writer_lock lock(addr); + if (render) render->pause(); - if (cmp_rdata(rdata, super_data)) - { - mov_rdata(super_data, to_write); - res.release(old_time + 128); - result = 1; - } - else + auto& super_data = *vm::get_super_ptr(addr); { - res.release(old_time); + // Full lock (heavyweight) + // TODO: vm::check_addr + vm::writer_lock lock(addr); + + if (cmp_rdata(rdata, super_data)) + { + mov_rdata(super_data, to_write); + res.release(old_time + 128); + result = 1; + } + else + { + res.release(old_time); + } } + + if (render) render->unpause(); } else { diff --git a/rpcs3/Emu/RSX/RSXThread.cpp b/rpcs3/Emu/RSX/RSXThread.cpp index 7133a5579a72..afa459861e66 100644 --- a/rpcs3/Emu/RSX/RSXThread.cpp +++ b/rpcs3/Emu/RSX/RSXThread.cpp @@ -607,9 +607,7 @@ namespace rsx // Wait for external pause events if (external_interrupt_lock) { - external_interrupt_ack.store(true); - - while (external_interrupt_lock) _mm_pause(); + wait_pause(); } // Note a possible rollback address @@ -2458,10 +2456,7 @@ namespace rsx //Pause/cont wrappers for FIFO ctrl. Never call this from rsx thread itself! void thread::pause() { - while (external_interrupt_lock.exchange(true)) [[unlikely]] - { - _mm_pause(); - } + external_interrupt_lock++; while (!external_interrupt_ack) { @@ -2470,14 +2465,34 @@ namespace rsx _mm_pause(); } - - external_interrupt_ack.store(false); } void thread::unpause() { // TODO: Clean this shit up - external_interrupt_lock.store(false); + external_interrupt_lock--; + } + + void thread::wait_pause() + { + do + { + if (g_cfg.video.multithreaded_rsx) + { + g_dma_manager.sync(); + } + + external_interrupt_ack.store(true); + + while (external_interrupt_lock) + { + // TODO: Investigate non busy-spinning method + _mm_pause(); + } + + external_interrupt_ack.store(false); + } + while (external_interrupt_lock); } u32 thread::get_load() diff --git a/rpcs3/Emu/RSX/RSXThread.h b/rpcs3/Emu/RSX/RSXThread.h index 00a85158b373..87cf554d29c3 100644 --- a/rpcs3/Emu/RSX/RSXThread.h +++ b/rpcs3/Emu/RSX/RSXThread.h @@ -612,7 +612,7 @@ namespace rsx RsxDmaControl* ctrl = nullptr; rsx_iomap_table iomap_table; u32 restore_point = 0; - atomic_t external_interrupt_lock{ false }; + atomic_t external_interrupt_lock{ 0 }; atomic_t external_interrupt_ack{ false }; void flush_fifo(); void recover_fifo(); @@ -912,6 +912,7 @@ namespace rsx void pause(); void unpause(); + void wait_pause(); // Get RSX approximate load in % u32 get_load(); diff --git a/rpcs3/Emu/RSX/rsx_methods.cpp b/rpcs3/Emu/RSX/rsx_methods.cpp index de7b4625652b..b60f105bdcf8 100644 --- a/rpcs3/Emu/RSX/rsx_methods.cpp +++ b/rpcs3/Emu/RSX/rsx_methods.cpp @@ -100,17 +100,26 @@ namespace rsx if (Emu.IsStopped()) return; + // Wait for external pause events + if (rsx->external_interrupt_lock) + { + rsx->wait_pause(); + continue; + } + if (const auto tdr = static_cast(g_cfg.video.driver_recovery_timeout)) { if (Emu.IsPaused()) { + const u64 start0 = get_system_time(); + while (Emu.IsPaused()) { std::this_thread::sleep_for(1ms); } // Reset - start = get_system_time(); + start += get_system_time() - start0; } else { @@ -829,6 +838,8 @@ namespace rsx const u32 pixel_offset = (method_registers.blit_engine_output_pitch_nv3062() * y) + (x * write_len); u32 address = get_address(method_registers.blit_engine_output_offset_nv3062() + pixel_offset + (index * write_len), method_registers.blit_engine_output_location_nv3062()); + //auto res = vm::passive_lock(address, address + write_len); + switch (write_len) { case 4: @@ -841,6 +852,7 @@ namespace rsx fmt::throw_exception("Unreachable" HERE); } + //res->release(0); rsx->m_graphics_state |= rsx::pipeline_state::fragment_program_dirty; } }; @@ -993,6 +1005,9 @@ namespace rsx const u32 dst_address = get_address(dst_offset, dst_dma); const u32 src_line_length = (in_w * in_bpp); + + //auto res = vm::passive_lock(dst_address, dst_address + (in_pitch * (in_h - 1) + src_line_length)); + if (is_block_transfer && (clip_h == 1 || (in_pitch == out_pitch && src_line_length == in_pitch))) { const u32 nb_lines = std::min(clip_h, in_h); @@ -1360,6 +1375,8 @@ namespace rsx } } + //auto res = vm::passive_lock(write_address, data_length + write_address); + u8 *dst = vm::_ptr(write_address); const u8 *src = vm::_ptr(read_address); @@ -1415,6 +1432,8 @@ namespace rsx } } } + + //res->release(0); } } diff --git a/rpcs3/Emu/System.h b/rpcs3/Emu/System.h index baeee9f1cd47..b84b88b8cbe3 100644 --- a/rpcs3/Emu/System.h +++ b/rpcs3/Emu/System.h @@ -419,6 +419,7 @@ struct cfg_root : cfg::node cfg::_enum spu_block_size{this, "SPU Block Size", spu_block_size_type::safe}; cfg::_bool spu_accurate_getllar{this, "Accurate GETLLAR", false}; cfg::_bool spu_accurate_putlluc{this, "Accurate PUTLLUC", false}; + cfg::_bool rsx_accurate_res_access{this, "Accurate RSX reservation access", false, true}; cfg::_bool spu_verification{this, "SPU Verification", true}; // Should be enabled cfg::_bool spu_cache{this, "SPU Cache", true}; cfg::_bool spu_prof{this, "SPU Profiler", false}; diff --git a/rpcs3/rpcs3qt/emu_settings.h b/rpcs3/rpcs3qt/emu_settings.h index b31a8d5fdb2e..0489b287c4e4 100644 --- a/rpcs3/rpcs3qt/emu_settings.h +++ b/rpcs3/rpcs3qt/emu_settings.h @@ -41,6 +41,7 @@ class emu_settings : public QObject EnableTSX, AccurateGETLLAR, AccuratePUTLLUC, + AccurateRSXAccess, AccurateXFloat, ApproximateXFloat, SetDAZandFTZ, @@ -296,6 +297,7 @@ public Q_SLOTS: { EnableTSX, { "Core", "Enable TSX"}}, { AccurateGETLLAR, { "Core", "Accurate GETLLAR"}}, { AccuratePUTLLUC, { "Core", "Accurate PUTLLUC"}}, + { AccurateRSXAccess, { "Core", "Accurate RSX reservation access"}}, { AccurateXFloat, { "Core", "Accurate xfloat"}}, { ApproximateXFloat, { "Core", "Approximate xfloat"}}, { SetDAZandFTZ, { "Core", "Set DAZ and FTZ"}}, diff --git a/rpcs3/rpcs3qt/settings_dialog.cpp b/rpcs3/rpcs3qt/settings_dialog.cpp index 9cd0d3b6ce20..f79f65ea7fc2 100644 --- a/rpcs3/rpcs3qt/settings_dialog.cpp +++ b/rpcs3/rpcs3qt/settings_dialog.cpp @@ -1613,6 +1613,9 @@ settings_dialog::settings_dialog(std::shared_ptr guiSettings, std: xemu_settings->EnhanceCheckBox(ui->accuratePUTLLUC, emu_settings::AccuratePUTLLUC); SubscribeTooltip(ui->accuratePUTLLUC, tooltips.settings.accurate_putlluc); + xemu_settings->EnhanceCheckBox(ui->accurateRSXAccess, emu_settings::AccurateRSXAccess); + SubscribeTooltip(ui->accurateRSXAccess, tooltips.settings.accurate_rsx_access); + xemu_settings->EnhanceCheckBox(ui->hookStFunc, emu_settings::HookStaticFuncs); SubscribeTooltip(ui->hookStFunc, tooltips.settings.hook_static_functions); diff --git a/rpcs3/rpcs3qt/settings_dialog.ui b/rpcs3/rpcs3qt/settings_dialog.ui index 4313b43920b3..7b3a2427e54b 100644 --- a/rpcs3/rpcs3qt/settings_dialog.ui +++ b/rpcs3/rpcs3qt/settings_dialog.ui @@ -3134,6 +3134,13 @@ + + + + Accurate RSX reservation access + + + diff --git a/rpcs3/rpcs3qt/tooltips.h b/rpcs3/rpcs3qt/tooltips.h index 22e561cc1a3a..0b93d0888a55 100644 --- a/rpcs3/rpcs3qt/tooltips.h +++ b/rpcs3/rpcs3qt/tooltips.h @@ -78,6 +78,7 @@ class Tooltips : public QObject const QString set_daz_and_ftz = tr("Never use this."); const QString accurate_getllar = tr("Never use this."); const QString accurate_putlluc = tr("Never use this."); + const QString accurate_rsx_access = tr("Never use this."); const QString hook_static_functions = tr("Allows to hook some functions like 'memcpy' replacing them with high-level implementations. May do nothing or break things. Experimental."); const QString gl_legacy_buffers = tr("Enables use of classic OpenGL buffers which allows capturing tools to work with RPCS3 e.g RenderDoc.\nIf unsure, don't use this option."); const QString force_high_pz = tr("Only useful when debugging differences in GPU hardware.\nNot necessary for average users.\nIf unsure, don't use this option.");