Skip to content

Commit

Permalink
GS: Clean up CLUT dirty handling
Browse files Browse the repository at this point in the history
  • Loading branch information
refractionpcsx2 committed Sep 25, 2023
1 parent ec9e540 commit fef282f
Show file tree
Hide file tree
Showing 5 changed files with 64 additions and 100 deletions.
8 changes: 2 additions & 6 deletions pcsx2/GS/GSClut.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -120,13 +120,9 @@ u8 GSClut::IsInvalid()
return m_write.dirty;
}

void GSClut::ClearDrawInvalidity(bool clear_all)
void GSClut::ClearDrawInvalidity()
{
if (clear_all)
{
m_write.dirty = 0;
}
else if (m_write.dirty & 2)
if (m_write.dirty & 2)
{
m_write.dirty = 1;
}
Expand Down
2 changes: 1 addition & 1 deletion pcsx2/GS/GSClut.h
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ class alignas(32) GSClut final : public GSAlignedClass<32>

bool InvalidateRange(u32 start_block, u32 end_block, bool is_draw = false);
u8 IsInvalid();
void ClearDrawInvalidity(bool clear_all);
void ClearDrawInvalidity();
u32 GetCLUTCBP();
u32 GetCLUTCPSM();
void SetNextCLUTTEX0(u64 CBP);
Expand Down
150 changes: 59 additions & 91 deletions pcsx2/GS/GSState.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -849,20 +849,20 @@ void GSState::ApplyTEX0(GIFRegTEX0& TEX0)

GL_REG("Apply TEX0_%d = 0x%x_%x", i, TEX0.U32[1], TEX0.U32[0]);

if ((TEX0.PSM & 0x7) >= 3 && TEX0.CLD)
{
m_mem.m_clut.ClearDrawInvalidity();
m_mem.m_clut.SetNextCLUTTEX0(TEX0.U64);
CheckCLUTValidity(m_prev_env.PRIM.PRIM);
}

// Even if TEX0 did not change, a new palette may have been uploaded and will overwrite the currently queued for drawing.
const bool wt = m_mem.m_clut.WriteTest(TEX0, m_env.TEXCLUT);

// No need to flush on CLUT if we aren't texture mapping.
if (wt)
{
m_mem.m_clut.SetNextCLUTTEX0(TEX0.U64);
if (TEX0.CBP != m_mem.m_clut.GetCLUTCBP())
{
m_mem.m_clut.ClearDrawInvalidity(true);
CLUTAutoFlush(m_prev_env.PRIM.PRIM);
}

if ((m_prev_env.PRIM.TME && (m_prev_env.CTXT[m_prev_env.PRIM.CTXT].TEX0.PSM & 0x7) >= 3) || m_mem.m_clut.IsInvalid())
if ((m_prev_env.PRIM.TME && (m_prev_env.CTXT[m_prev_env.PRIM.CTXT].TEX0.PSM & 0x7) >= 3) || (m_mem.m_clut.IsInvalid() & 2))
Flush(GSFlushReason::CLUTCHANGE);
else
FlushWrite();
Expand Down Expand Up @@ -924,7 +924,10 @@ void GSState::ApplyTEX0(GIFRegTEX0& TEX0)
m_mem.m_clut.Write(m_env.CTXT[i].TEX0, m_env.TEXCLUT);
}

constexpr u64 mask = 0x1f78001fffffffffull; // TBP0 TBW PSM TW TH TCC TFX CPSM CSA
u64 mask = 0x1fffffffffull; // TBP0 TBW PSM TW TH TCC TFX
if ((TEX0.PSM & 0x7) >= 3)
mask |= 0x1f78000000000000ull; // CPSM CSA

if (i == m_prev_env.PRIM.CTXT)
{
if ((m_prev_env.CTXT[i].TEX0.U64 ^ m_env.CTXT[i].TEX0.U64) & mask)
Expand Down Expand Up @@ -1488,6 +1491,9 @@ void GSState::Flush(GSFlushReason reason)
{
m_state_flush_reason = reason;

// Used to prompt the current draw that it's modifying its own CLUT.
CheckCLUTValidity(m_prev_env.PRIM.PRIM);

if (m_dirty_gs_regs)
{
m_draw_env = &m_prev_env;
Expand Down Expand Up @@ -1578,7 +1584,7 @@ inline bool GSState::TestDrawChanged()
return true;

const int context = m_prev_env.PRIM.CTXT;
const GSDrawingContext ctx = m_prev_env.CTXT[context];
const GSDrawingContext& ctx = m_prev_env.CTXT[context];
// If the frame is getting updated check the FRAME, otherwise, we can ignore it
if ((ctx.TEST.ATST != ATST_NEVER) || !ctx.TEST.ATE || (ctx.TEST.AFAIL & 1) || ctx.TEST.DATE)
{
Expand Down Expand Up @@ -2959,63 +2965,72 @@ __forceinline bool GSState::IsAutoFlushDraw(u32 prim)
return false;
}

__forceinline void GSState::CLUTAutoFlush(u32 prim)
static constexpr u32 NumIndicesForPrim(u32 prim)
{
if (m_mem.m_clut.IsInvalid() & 2)
return;

u32 n = 1;

switch (prim)
{
case GS_POINTLIST:
n = 1;
break;
case GS_INVALID:
return 1;
case GS_LINELIST:
case GS_LINESTRIP:
case GS_SPRITE:
n = 2;
break;
case GS_LINESTRIP:
return 2;
case GS_TRIANGLELIST:
case GS_TRIANGLESTRIP:
n = 3;
break;
case GS_TRIANGLEFAN:
n = 3;
break;
case GS_INVALID:
return 3;
default:
break;
return 0;
}
}

const int ctx = m_prev_env.PRIM.CTXT;
if ((m_index.tail > 0 || (m_vertex.tail == n - 1)) && (GSLocalMemory::m_psm[m_prev_env.CTXT[ctx].TEX0.PSM].pal == 0 || !m_prev_env.PRIM.TME))
static constexpr u32 MaxVerticesForPrim(u32 prim)
{
switch (prim)
{
const GSLocalMemory::psm_t& fpsm = GSLocalMemory::m_psm[m_prev_env.CTXT[ctx].FRAME.PSM];
// Four indices per 1 vertex.
case GS_POINTLIST:
case GS_INVALID:

if ((m_prev_env.CTXT[ctx].FRAME.FBMSK & fpsm.fmsk) != fpsm.fmsk && GSLocalMemory::m_psm[m_mem.m_clut.GetCLUTCPSM()].bpp == fpsm.bpp)
{
const u32 startbp = fpsm.info.bn(temp_draw_rect.x, temp_draw_rect.y, m_prev_env.CTXT[ctx].FRAME.Block(), m_prev_env.CTXT[ctx].FRAME.FBW);
// Indices are shifted left by 2 to form quads.
case GS_LINELIST:
case GS_LINESTRIP:
return (std::numeric_limits<u16>::max() / 4) - 4;

// If it's a point, then we only have one coord, so the address for start and end will be the same, which is bad for the following check.
u32 endbp = startbp;
// otherwise calculate the end.
if (prim != GS_POINTLIST || (m_index.tail > 1))
endbp = fpsm.info.bn(temp_draw_rect.z - 1, temp_draw_rect.w - 1, m_prev_env.CTXT[ctx].FRAME.Block(), m_prev_env.CTXT[ctx].FRAME.FBW);
// Four indices per two vertices.
case GS_SPRITE:
return (std::numeric_limits<u16>::max() / 2) - 2;

m_mem.m_clut.InvalidateRange(startbp, endbp, true);
}
case GS_TRIANGLELIST:
case GS_TRIANGLESTRIP:
case GS_TRIANGLEFAN:
default:
return (std::numeric_limits<u16>::max() - 3);
}
}

__forceinline void GSState::CheckCLUTValidity(u32 prim)
{
if (m_mem.m_clut.IsInvalid() & 2)
return;

u32 n = NumIndicesForPrim(prim);

const GSLocalMemory::psm_t& zpsm = GSLocalMemory::m_psm[m_prev_env.CTXT[ctx].ZBUF.PSM];
if (!m_prev_env.CTXT[ctx].ZBUF.ZMSK && GSLocalMemory::m_psm[m_mem.m_clut.GetCLUTCPSM()].bpp == zpsm.bpp)
const GSDrawingContext& ctx = m_prev_env.CTXT[m_prev_env.PRIM.CTXT];
if ((m_index.tail > 0 || (m_vertex.tail == n - 1)) && (GSLocalMemory::m_psm[ctx.TEX0.PSM].pal == 0 || !m_prev_env.PRIM.TME))
{
const GSLocalMemory::psm_t& fpsm = GSLocalMemory::m_psm[ctx.FRAME.PSM];
const bool frame_needed = !(ctx.TEST.ATE && ctx.TEST.ATST == 0 && ctx.TEST.AFAIL == 2) && ((ctx.FRAME.FBMSK & fpsm.fmsk) != fpsm.fmsk);
if (frame_needed && GSLocalMemory::m_psm[m_mem.m_clut.GetCLUTCPSM()].bpp == fpsm.bpp)
{
const u32 startbp = zpsm.info.bn(temp_draw_rect.x, temp_draw_rect.y, m_prev_env.CTXT[ctx].ZBUF.Block(), m_prev_env.CTXT[ctx].FRAME.FBW);
const u32 startbp = fpsm.info.bn(temp_draw_rect.x, temp_draw_rect.y, ctx.FRAME.Block(), ctx.FRAME.FBW);

// If it's a point, then we only have one coord, so the address for start and end will be the same, which is bad for the following check.
u32 endbp = startbp;
// otherwise calculate the end.
if (prim != GS_POINTLIST || (m_index.tail > 1))
endbp = zpsm.info.bn(temp_draw_rect.z - 1, temp_draw_rect.w - 1, m_prev_env.CTXT[ctx].ZBUF.Block(), m_prev_env.CTXT[ctx].FRAME.FBW);
endbp = fpsm.info.bn(temp_draw_rect.z - 1, temp_draw_rect.w - 1, ctx.FRAME.Block(), ctx.FRAME.FBW);

m_mem.m_clut.InvalidateRange(startbp, endbp, true);
}
Expand Down Expand Up @@ -3292,51 +3307,6 @@ __forceinline void GSState::HandleAutoFlush()
}
}

static constexpr u32 NumIndicesForPrim(u32 prim)
{
switch (prim)
{
case GS_POINTLIST:
case GS_INVALID:
return 1;
case GS_LINELIST:
case GS_SPRITE:
case GS_LINESTRIP:
return 2;
case GS_TRIANGLELIST:
case GS_TRIANGLESTRIP:
case GS_TRIANGLEFAN:
return 3;
default:
return 0;
}
}

static constexpr u32 MaxVerticesForPrim(u32 prim)
{
switch (prim)
{
// Four indices per 1 vertex.
case GS_POINTLIST:
case GS_INVALID:

// Indices are shifted left by 2 to form quads.
case GS_LINELIST:
case GS_LINESTRIP:
return (std::numeric_limits<u16>::max() / 4) - 4;

// Four indices per two vertices.
case GS_SPRITE:
return (std::numeric_limits<u16>::max() / 2) - 2;

case GS_TRIANGLELIST:
case GS_TRIANGLESTRIP:
case GS_TRIANGLEFAN:
default:
return (std::numeric_limits<u16>::max() - 3);
}
}

template <u32 prim, bool auto_flush, bool index_swap>
__forceinline void GSState::VertexKick(u32 skip)
{
Expand Down Expand Up @@ -3585,8 +3555,6 @@ __forceinline void GSState::VertexKick(u32 skip)
temp_draw_rect = draw_min.blend32<12>(draw_max);
temp_draw_rect = temp_draw_rect.rintersect(m_context->scissor.in);

CLUTAutoFlush(prim);

constexpr u32 max_vertices = MaxVerticesForPrim(prim);
if (max_vertices != 0 && m_vertex.tail >= max_vertices)
Flush(VERTEXCOUNT);
Expand Down
2 changes: 1 addition & 1 deletion pcsx2/GS/GSState.h
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,7 @@ class GSState : public GSAlignedClass<32>
bool IsAutoFlushDraw(u32 prim);
template<u32 prim, bool index_swap>
void HandleAutoFlush();
void CLUTAutoFlush(u32 prim);
void CheckCLUTValidity(u32 prim);

template <u32 prim, bool auto_flush, bool index_swap>
void VertexKick(u32 skip);
Expand Down
2 changes: 1 addition & 1 deletion pcsx2/GS/Renderers/HW/GSRendererHW.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1917,7 +1917,7 @@ void GSRendererHW::Draw()
if (GSConfig.UserHacks_CPUCLUTRender > 0 || GSConfig.UserHacks_GPUTargetCLUTMode != GSGPUTargetCLUTMode::Disabled)
{
const CLUTDrawTestResult result = (GSConfig.UserHacks_CPUCLUTRender == 2) ? PossibleCLUTDrawAggressive() : PossibleCLUTDraw();
m_mem.m_clut.ClearDrawInvalidity(false);
m_mem.m_clut.ClearDrawInvalidity();
if (result == CLUTDrawTestResult::CLUTDrawOnCPU && GSConfig.UserHacks_CPUCLUTRender > 0)
{
if (SwPrimRender(*this, true, true))
Expand Down

0 comments on commit fef282f

Please sign in to comment.