From fef282fcd52b46f2cdfd973e9263ef69b037218c Mon Sep 17 00:00:00 2001 From: refractionpcsx2 Date: Mon, 25 Sep 2023 04:22:03 +0100 Subject: [PATCH] GS: Clean up CLUT dirty handling --- pcsx2/GS/GSClut.cpp | 8 +- pcsx2/GS/GSClut.h | 2 +- pcsx2/GS/GSState.cpp | 150 ++++++++++--------------- pcsx2/GS/GSState.h | 2 +- pcsx2/GS/Renderers/HW/GSRendererHW.cpp | 2 +- 5 files changed, 64 insertions(+), 100 deletions(-) diff --git a/pcsx2/GS/GSClut.cpp b/pcsx2/GS/GSClut.cpp index 8100a6f7c85bf..a123346cd1dce 100644 --- a/pcsx2/GS/GSClut.cpp +++ b/pcsx2/GS/GSClut.cpp @@ -120,13 +120,9 @@ u8 GSClut::IsInvalid() return m_write.dirty; } -void GSClut::ClearDrawInvalidity(bool clear_all) +void GSClut::ClearDrawInvalidity() { - if (clear_all) - { - m_write.dirty = 0; - } - else if (m_write.dirty & 2) + if (m_write.dirty & 2) { m_write.dirty = 1; } diff --git a/pcsx2/GS/GSClut.h b/pcsx2/GS/GSClut.h index 336ac65aacdbb..f616ee86d20e4 100644 --- a/pcsx2/GS/GSClut.h +++ b/pcsx2/GS/GSClut.h @@ -110,7 +110,7 @@ class alignas(32) GSClut final : public GSAlignedClass<32> bool InvalidateRange(u32 start_block, u32 end_block, bool is_draw = false); u8 IsInvalid(); - void ClearDrawInvalidity(bool clear_all); + void ClearDrawInvalidity(); u32 GetCLUTCBP(); u32 GetCLUTCPSM(); void SetNextCLUTTEX0(u64 CBP); diff --git a/pcsx2/GS/GSState.cpp b/pcsx2/GS/GSState.cpp index f41466623c8f6..8c4d9afac01c2 100644 --- a/pcsx2/GS/GSState.cpp +++ b/pcsx2/GS/GSState.cpp @@ -849,20 +849,20 @@ void GSState::ApplyTEX0(GIFRegTEX0& TEX0) GL_REG("Apply TEX0_%d = 0x%x_%x", i, TEX0.U32[1], TEX0.U32[0]); + if ((TEX0.PSM & 0x7) >= 3 && TEX0.CLD) + { + m_mem.m_clut.ClearDrawInvalidity(); + m_mem.m_clut.SetNextCLUTTEX0(TEX0.U64); + CheckCLUTValidity(m_prev_env.PRIM.PRIM); + } + // Even if TEX0 did not change, a new palette may have been uploaded and will overwrite the currently queued for drawing. const bool wt = m_mem.m_clut.WriteTest(TEX0, m_env.TEXCLUT); // No need to flush on CLUT if we aren't texture mapping. if (wt) { - m_mem.m_clut.SetNextCLUTTEX0(TEX0.U64); - if (TEX0.CBP != m_mem.m_clut.GetCLUTCBP()) - { - m_mem.m_clut.ClearDrawInvalidity(true); - CLUTAutoFlush(m_prev_env.PRIM.PRIM); - } - - if ((m_prev_env.PRIM.TME && (m_prev_env.CTXT[m_prev_env.PRIM.CTXT].TEX0.PSM & 0x7) >= 3) || m_mem.m_clut.IsInvalid()) + if ((m_prev_env.PRIM.TME && (m_prev_env.CTXT[m_prev_env.PRIM.CTXT].TEX0.PSM & 0x7) >= 3) || (m_mem.m_clut.IsInvalid() & 2)) Flush(GSFlushReason::CLUTCHANGE); else FlushWrite(); @@ -924,7 +924,10 @@ void GSState::ApplyTEX0(GIFRegTEX0& TEX0) m_mem.m_clut.Write(m_env.CTXT[i].TEX0, m_env.TEXCLUT); } - constexpr u64 mask = 0x1f78001fffffffffull; // TBP0 TBW PSM TW TH TCC TFX CPSM CSA + u64 mask = 0x1fffffffffull; // TBP0 TBW PSM TW TH TCC TFX + if ((TEX0.PSM & 0x7) >= 3) + mask |= 0x1f78000000000000ull; // CPSM CSA + if (i == m_prev_env.PRIM.CTXT) { if ((m_prev_env.CTXT[i].TEX0.U64 ^ m_env.CTXT[i].TEX0.U64) & mask) @@ -1488,6 +1491,9 @@ void GSState::Flush(GSFlushReason reason) { m_state_flush_reason = reason; + // Used to prompt the current draw that it's modifying its own CLUT. + CheckCLUTValidity(m_prev_env.PRIM.PRIM); + if (m_dirty_gs_regs) { m_draw_env = &m_prev_env; @@ -1578,7 +1584,7 @@ inline bool GSState::TestDrawChanged() return true; const int context = m_prev_env.PRIM.CTXT; - const GSDrawingContext ctx = m_prev_env.CTXT[context]; + const GSDrawingContext& ctx = m_prev_env.CTXT[context]; // If the frame is getting updated check the FRAME, otherwise, we can ignore it if ((ctx.TEST.ATST != ATST_NEVER) || !ctx.TEST.ATE || (ctx.TEST.AFAIL & 1) || ctx.TEST.DATE) { @@ -2959,63 +2965,72 @@ __forceinline bool GSState::IsAutoFlushDraw(u32 prim) return false; } -__forceinline void GSState::CLUTAutoFlush(u32 prim) +static constexpr u32 NumIndicesForPrim(u32 prim) { - if (m_mem.m_clut.IsInvalid() & 2) - return; - - u32 n = 1; - switch (prim) { case GS_POINTLIST: - n = 1; - break; + case GS_INVALID: + return 1; case GS_LINELIST: - case GS_LINESTRIP: case GS_SPRITE: - n = 2; - break; + case GS_LINESTRIP: + return 2; case GS_TRIANGLELIST: case GS_TRIANGLESTRIP: - n = 3; - break; case GS_TRIANGLEFAN: - n = 3; - break; - case GS_INVALID: + return 3; default: - break; + return 0; } +} - const int ctx = m_prev_env.PRIM.CTXT; - if ((m_index.tail > 0 || (m_vertex.tail == n - 1)) && (GSLocalMemory::m_psm[m_prev_env.CTXT[ctx].TEX0.PSM].pal == 0 || !m_prev_env.PRIM.TME)) +static constexpr u32 MaxVerticesForPrim(u32 prim) +{ + switch (prim) { - const GSLocalMemory::psm_t& fpsm = GSLocalMemory::m_psm[m_prev_env.CTXT[ctx].FRAME.PSM]; + // Four indices per 1 vertex. + case GS_POINTLIST: + case GS_INVALID: - if ((m_prev_env.CTXT[ctx].FRAME.FBMSK & fpsm.fmsk) != fpsm.fmsk && GSLocalMemory::m_psm[m_mem.m_clut.GetCLUTCPSM()].bpp == fpsm.bpp) - { - const u32 startbp = fpsm.info.bn(temp_draw_rect.x, temp_draw_rect.y, m_prev_env.CTXT[ctx].FRAME.Block(), m_prev_env.CTXT[ctx].FRAME.FBW); + // Indices are shifted left by 2 to form quads. + case GS_LINELIST: + case GS_LINESTRIP: + return (std::numeric_limits::max() / 4) - 4; - // If it's a point, then we only have one coord, so the address for start and end will be the same, which is bad for the following check. - u32 endbp = startbp; - // otherwise calculate the end. - if (prim != GS_POINTLIST || (m_index.tail > 1)) - endbp = fpsm.info.bn(temp_draw_rect.z - 1, temp_draw_rect.w - 1, m_prev_env.CTXT[ctx].FRAME.Block(), m_prev_env.CTXT[ctx].FRAME.FBW); + // Four indices per two vertices. + case GS_SPRITE: + return (std::numeric_limits::max() / 2) - 2; - m_mem.m_clut.InvalidateRange(startbp, endbp, true); - } + case GS_TRIANGLELIST: + case GS_TRIANGLESTRIP: + case GS_TRIANGLEFAN: + default: + return (std::numeric_limits::max() - 3); + } +} + +__forceinline void GSState::CheckCLUTValidity(u32 prim) +{ + if (m_mem.m_clut.IsInvalid() & 2) + return; + + u32 n = NumIndicesForPrim(prim); - const GSLocalMemory::psm_t& zpsm = GSLocalMemory::m_psm[m_prev_env.CTXT[ctx].ZBUF.PSM]; - if (!m_prev_env.CTXT[ctx].ZBUF.ZMSK && GSLocalMemory::m_psm[m_mem.m_clut.GetCLUTCPSM()].bpp == zpsm.bpp) + const GSDrawingContext& ctx = m_prev_env.CTXT[m_prev_env.PRIM.CTXT]; + if ((m_index.tail > 0 || (m_vertex.tail == n - 1)) && (GSLocalMemory::m_psm[ctx.TEX0.PSM].pal == 0 || !m_prev_env.PRIM.TME)) + { + const GSLocalMemory::psm_t& fpsm = GSLocalMemory::m_psm[ctx.FRAME.PSM]; + const bool frame_needed = !(ctx.TEST.ATE && ctx.TEST.ATST == 0 && ctx.TEST.AFAIL == 2) && ((ctx.FRAME.FBMSK & fpsm.fmsk) != fpsm.fmsk); + if (frame_needed && GSLocalMemory::m_psm[m_mem.m_clut.GetCLUTCPSM()].bpp == fpsm.bpp) { - const u32 startbp = zpsm.info.bn(temp_draw_rect.x, temp_draw_rect.y, m_prev_env.CTXT[ctx].ZBUF.Block(), m_prev_env.CTXT[ctx].FRAME.FBW); + const u32 startbp = fpsm.info.bn(temp_draw_rect.x, temp_draw_rect.y, ctx.FRAME.Block(), ctx.FRAME.FBW); // If it's a point, then we only have one coord, so the address for start and end will be the same, which is bad for the following check. u32 endbp = startbp; // otherwise calculate the end. if (prim != GS_POINTLIST || (m_index.tail > 1)) - endbp = zpsm.info.bn(temp_draw_rect.z - 1, temp_draw_rect.w - 1, m_prev_env.CTXT[ctx].ZBUF.Block(), m_prev_env.CTXT[ctx].FRAME.FBW); + endbp = fpsm.info.bn(temp_draw_rect.z - 1, temp_draw_rect.w - 1, ctx.FRAME.Block(), ctx.FRAME.FBW); m_mem.m_clut.InvalidateRange(startbp, endbp, true); } @@ -3292,51 +3307,6 @@ __forceinline void GSState::HandleAutoFlush() } } -static constexpr u32 NumIndicesForPrim(u32 prim) -{ - switch (prim) - { - case GS_POINTLIST: - case GS_INVALID: - return 1; - case GS_LINELIST: - case GS_SPRITE: - case GS_LINESTRIP: - return 2; - case GS_TRIANGLELIST: - case GS_TRIANGLESTRIP: - case GS_TRIANGLEFAN: - return 3; - default: - return 0; - } -} - -static constexpr u32 MaxVerticesForPrim(u32 prim) -{ - switch (prim) - { - // Four indices per 1 vertex. - case GS_POINTLIST: - case GS_INVALID: - - // Indices are shifted left by 2 to form quads. - case GS_LINELIST: - case GS_LINESTRIP: - return (std::numeric_limits::max() / 4) - 4; - - // Four indices per two vertices. - case GS_SPRITE: - return (std::numeric_limits::max() / 2) - 2; - - case GS_TRIANGLELIST: - case GS_TRIANGLESTRIP: - case GS_TRIANGLEFAN: - default: - return (std::numeric_limits::max() - 3); - } -} - template __forceinline void GSState::VertexKick(u32 skip) { @@ -3585,8 +3555,6 @@ __forceinline void GSState::VertexKick(u32 skip) temp_draw_rect = draw_min.blend32<12>(draw_max); temp_draw_rect = temp_draw_rect.rintersect(m_context->scissor.in); - CLUTAutoFlush(prim); - constexpr u32 max_vertices = MaxVerticesForPrim(prim); if (max_vertices != 0 && m_vertex.tail >= max_vertices) Flush(VERTEXCOUNT); diff --git a/pcsx2/GS/GSState.h b/pcsx2/GS/GSState.h index 69cdae41ba9e4..9e3fd1de29985 100644 --- a/pcsx2/GS/GSState.h +++ b/pcsx2/GS/GSState.h @@ -164,7 +164,7 @@ class GSState : public GSAlignedClass<32> bool IsAutoFlushDraw(u32 prim); template void HandleAutoFlush(); - void CLUTAutoFlush(u32 prim); + void CheckCLUTValidity(u32 prim); template void VertexKick(u32 skip); diff --git a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp index c0bda25d2cc29..70dbf20d9307d 100644 --- a/pcsx2/GS/Renderers/HW/GSRendererHW.cpp +++ b/pcsx2/GS/Renderers/HW/GSRendererHW.cpp @@ -1917,7 +1917,7 @@ void GSRendererHW::Draw() if (GSConfig.UserHacks_CPUCLUTRender > 0 || GSConfig.UserHacks_GPUTargetCLUTMode != GSGPUTargetCLUTMode::Disabled) { const CLUTDrawTestResult result = (GSConfig.UserHacks_CPUCLUTRender == 2) ? PossibleCLUTDrawAggressive() : PossibleCLUTDraw(); - m_mem.m_clut.ClearDrawInvalidity(false); + m_mem.m_clut.ClearDrawInvalidity(); if (result == CLUTDrawTestResult::CLUTDrawOnCPU && GSConfig.UserHacks_CPUCLUTRender > 0) { if (SwPrimRender(*this, true, true))