Skip to content

Commit

Permalink
GXPerf OK
Browse files Browse the repository at this point in the history
  • Loading branch information
LagoLunatic committed Nov 10, 2024
1 parent 990068e commit c81da30
Show file tree
Hide file tree
Showing 7 changed files with 85 additions and 114 deletions.
4 changes: 2 additions & 2 deletions configure.py
Original file line number Diff line number Diff line change
Expand Up @@ -375,7 +375,7 @@ def MatchingFor(*versions):
Object(Matching, "f_ap/f_ap_game.cpp"),

# f_op
Object(Matching, "f_op/f_op_actor.cpp", extra_cflags=['-pragma "nosyminline on"']),
Object(Matching, "f_op/f_op_actor.cpp", extra_cflags=["-sym off"]),
Object(Matching, "f_op/f_op_actor_iter.cpp"),
Object(Matching, "f_op/f_op_actor_tag.cpp"),
Object(Matching, "f_op/f_op_actor_mng.cpp", extra_cflags=['-pragma "nosyminline on"']),
Expand Down Expand Up @@ -1165,7 +1165,7 @@ def MatchingFor(*versions):
Object(NonMatching, "dolphin/gx/GXStubs.c"),
Object(NonMatching, "dolphin/gx/GXDisplayList.c"),
Object(NonMatching, "dolphin/gx/GXTransform.c", extra_cflags=["-fp_contract off"]),
Object(NonMatching, "dolphin/gx/GXPerf.c"),
Object(Matching, "dolphin/gx/GXPerf.c"),
],
),
DolphinLib(
Expand Down
22 changes: 18 additions & 4 deletions include/dolphin/gx/GX.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,8 @@ extern "C" {
(reg) = ((u32) (reg) & ~(((1 << (nbits)) - 1) << (shift))) | \
((u32) (value) << (shift));

#define FAST_FLAG_SET(regOrg, newFlag, shift, size) \
do { \
(regOrg) = (u32)__rlwimi((int)(regOrg), (int)(newFlag), (shift), (32 - (shift) - (size)), (31 - (shift))); \
} while (0);
#define SET_REG_FIELD(reg, size, shift, val) \
(reg) = ((u32)(reg) & ~(((1 << (size)) - 1) << (shift))) | ((u32)(val) << (shift)); \

#define GX_LOAD_BP_REG 0x61
#define GX_NOP 0
Expand Down Expand Up @@ -287,6 +285,22 @@ do { \
regAddr = addr; \
} while (0)

static inline u32 __GXReadCPCounterU32(u32 regAddrL, u32 regAddrH) {
u32 ctrH0;
u32 ctrH1;
u32 ctrL;

ctrH0 = GX_GET_CP_REG(regAddrH);

do {
ctrH1 = ctrH0;
ctrL = GX_GET_CP_REG(regAddrL);
ctrH0 = GX_GET_CP_REG(regAddrH);
} while (ctrH0 != ctrH1);

return (ctrH0 << 0x10) | ctrL;
}

#ifdef __cplusplus
};
#endif
Expand Down
4 changes: 2 additions & 2 deletions src/dolphin/gx/GXBump.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ void GXSetTevIndirect(GXTevStageID tevStage, GXIndTexStageID texStage, GXIndTexF
GXIndTexBiasSel biasSel, GXIndTexMtxID mtxID, GXIndTexWrap wrapS,
GXIndTexWrap wrapT, u8 addPrev, u8 utcLod, GXIndTexAlphaSel alphaSel) {
u32 field = 0;
u32 stage = tevStage + 0x10;

GX_BITFIELD_SET(field, 30, 2, texStage);
GX_BITFIELD_SET(field, 28, 2, texFmt);
Expand All @@ -21,7 +20,7 @@ void GXSetTevIndirect(GXTevStageID tevStage, GXIndTexStageID texStage, GXIndTexF
GX_BITFIELD_SET(field, 13, 3, wrapT);
GX_BITFIELD_SET(field, 12, 1, utcLod);
GX_BITFIELD_SET(field, 11, 1, addPrev);
GX_BITFIELD_SET(field, 0, 8, stage);
GX_BITFIELD_SET(field, 0, 8, tevStage + 0x10);

GXFIFO.u8 = 0x61;
GXFIFO.s32 = field;
Expand All @@ -33,6 +32,7 @@ void GXSetIndTexMtx(GXIndTexMtxID mtxID, f32 offset[6], s8 scale_exp) {
u32 val;
u32 field;
f32 mtx2[6];
u32 stack_padding[6];

scale_exp += 17;

Expand Down
16 changes: 8 additions & 8 deletions src/dolphin/gx/GXFifo.c
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ static void GXUnderflowHandler() {
static void GXBreakPointHandler(OSContext* context) {
OSContext bpContext;

FAST_FLAG_SET(gx->cpEnable, 0, 5, 1);
SET_REG_FIELD(gx->cpEnable, 1, 5, 2);
GX_SET_CP_REG(1, gx->cpEnable);

if (BreakPointCB) {
Expand Down Expand Up @@ -233,12 +233,12 @@ void __GXFifoInit(void) {
}

void __GXFifoReadEnable(void) {
FAST_FLAG_SET(gx->cpEnable, 1, 0, 1);
SET_REG_FIELD(gx->cpEnable, 1, 0, 2);
GX_SET_CP_REG(1, gx->cpEnable);
}

void __GXFifoReadDisable(void) {
FAST_FLAG_SET(gx->cpEnable, 0, 0, 1);
SET_REG_FIELD(gx->cpEnable, 1, 0, 2);
GX_SET_CP_REG(1, gx->cpEnable);
}

Expand All @@ -249,19 +249,19 @@ void __GXFifoLink(u8 link) {
} else {
b = 0;
}
FAST_FLAG_SET(gx->cpEnable, b, 4, 1);
SET_REG_FIELD(gx->cpEnable, 1, 4, 2);
GX_SET_CP_REG(1, gx->cpEnable);
}

void __GXWriteFifoIntEnable(u32 p1, u32 p2) {
FAST_FLAG_SET(gx->cpEnable, p1, 2, 1);
FAST_FLAG_SET(gx->cpEnable, (u8)p2, 3, 1);
SET_REG_FIELD(gx->cpEnable, 1, 2, 2);
SET_REG_FIELD(gx->cpEnable, 1, 3, 2);
GX_SET_CP_REG(1, gx->cpEnable);
}

void __GXWriteFifoIntReset(u32 p1, u32 p2) {
FAST_FLAG_SET(gx->cpClr, p1, 0, 1);
FAST_FLAG_SET(gx->cpClr, (u8)p2, 1, 1);
SET_REG_FIELD(gx->cpClr, 1, 0, 2);
SET_REG_FIELD(gx->cpClr, 1, 1, 2);
GX_SET_CP_REG(2, gx->cpClr);
}

Expand Down
80 changes: 18 additions & 62 deletions src/dolphin/gx/GXInit.c
Original file line number Diff line number Diff line change
Expand Up @@ -48,48 +48,6 @@ u16* __cpReg;
/* ############################################################################################## */
u32* __piReg;

inline void __GXInitRevisionBits(void) {
u32 i;
for (i = 0; i < 8; i++) {
FAST_FLAG_SET(gx->vatA[i], 1, 30, 33);
FAST_FLAG_SET(gx->vatB[i], 1, 31, 33);

GXFIFO.u8 = 0x8;
GXFIFO.u8 = i | 0x80;
GXFIFO.u32 = gx->vatB[i];
}

{
u32 reg1 = 0;
u32 reg2 = 0;

FAST_FLAG_SET(reg1, 1, 0, 1);
FAST_FLAG_SET(reg1, 1, 1, 1);
FAST_FLAG_SET(reg1, 1, 2, 1);
FAST_FLAG_SET(reg1, 1, 3, 1);
FAST_FLAG_SET(reg1, 1, 4, 1);
FAST_FLAG_SET(reg1, 1, 5, 1);
GXFIFO.u8 = 0x10;
GXFIFO.u32 = 0x1000;
GXFIFO.u32 = reg1;

FAST_FLAG_SET(reg2, 1, 0, 1);
GXFIFO.u8 = 0x10;
GXFIFO.u32 = 0x1012;
GXFIFO.u32 = reg2;
}

{
u32 reg = 0;
FAST_FLAG_SET(reg, 1, 0, 1);
FAST_FLAG_SET(reg, 1, 1, 1);
FAST_FLAG_SET(reg, 1, 2, 1);
FAST_FLAG_SET(reg, 1, 3, 1);
FAST_FLAG_SET(reg, 0x58, 24, 8);
GFWriteBPCmd(reg);
}
}

static u16 DefaultTexData[] ALIGN_DECL(32) = {
0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF,
0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF,
Expand Down Expand Up @@ -157,47 +115,47 @@ GXFifoObj* GXInit(void* base, u32 size) {
EnableWriteGatherPipe();

gx->genMode = 0;
FAST_FLAG_SET(gx->genMode, 0, 24, 8);
SET_REG_FIELD(gx->genMode, 8, 24, 2);

gx->bpMask = 255;
FAST_FLAG_SET(gx->bpMask, 0xF, 24, 8);
SET_REG_FIELD(gx->bpMask, 8, 24, 2);

gx->lpSize = 0;
FAST_FLAG_SET(gx->lpSize, 34, 24, 8);
SET_REG_FIELD(gx->lpSize, 8, 24, 2);

for (i = 0; i < GX_MAX_TEVSTAGE; i++) {
gx->tevc[i] = 0;
gx->teva[i] = 0;
gx->tref[i / 2] = 0;
gx->texmapId[i] = GX_TEXMAP_NULL;

FAST_FLAG_SET(gx->tevc[i], 0xC0 + i * 2, 24, 8);
FAST_FLAG_SET(gx->teva[i], 0xC1 + i * 2, 24, 8);
FAST_FLAG_SET(gx->tevKsel[i / 2], 0xF6 + i / 2, 24, 8);
FAST_FLAG_SET(gx->tref[i / 2], 0x28 + i / 2, 24, 8);
SET_REG_FIELD(gx->tevc[i], 8, 24, 2);
SET_REG_FIELD(gx->teva[i], 8, 24, 2);
SET_REG_FIELD(gx->tevKsel[i / 2], 8, 24, 2);
SET_REG_FIELD(gx->tref[i / 2], 8, 24, 2);
}

gx->iref = 0;
FAST_FLAG_SET(gx->iref, 0x27, 24, 8);
SET_REG_FIELD(gx->iref, 8, 24, 2);

for (i = 0; i < GX_MAXCOORD; i++) {
gx->suTs0[i] = 0;
gx->suTs1[i] = 0;

FAST_FLAG_SET(gx->suTs0[i], 0x30 + i * 2, 24, 8);
FAST_FLAG_SET(gx->suTs1[i], 0x31 + i * 2, 24, 8);
SET_REG_FIELD(gx->suTs0[i], 8, 24, 2);
SET_REG_FIELD(gx->suTs1[i], 8, 24, 2);
}

FAST_FLAG_SET(gx->suScis0, 0x20, 24, 8);
FAST_FLAG_SET(gx->suScis1, 0x21, 24, 8);
SET_REG_FIELD(gx->suScis0, 8, 24, 2);
SET_REG_FIELD(gx->suScis1, 8, 24, 2);

FAST_FLAG_SET(gx->cmode0, 0x41, 24, 8);
FAST_FLAG_SET(gx->cmode1, 0x42, 24, 8);
SET_REG_FIELD(gx->cmode0, 8, 24, 2);
SET_REG_FIELD(gx->cmode1, 8, 24, 2);

FAST_FLAG_SET(gx->zmode, 0x40, 24, 8);
FAST_FLAG_SET(gx->peCtrl, 0x43, 24, 8);
SET_REG_FIELD(gx->zmode, 8, 24, 2);
SET_REG_FIELD(gx->peCtrl, 8, 24, 2);

FAST_FLAG_SET(gx->cpTex, 0, 7, 2);
SET_REG_FIELD(gx->cpTex, 2, 7, 2);

gx->zScale = 1.6777216E7f;
gx->zOffset = 0.0f;
Expand All @@ -223,8 +181,6 @@ GXFifoObj* GXInit(void* base, u32 size) {
GFWriteBPCmd(val1);
}

__GXInitRevisionBits();

for (i = 0; i < GX_MAX_TEXMAP; i++) {
GXInitTexCacheRegion(&gx->TexRegions0[i], GX_FALSE, GXTexRegionAddrTable[i],
GX_TEXCACHE_32K, GXTexRegionAddrTable[i + 8], GX_TEXCACHE_32K);
Expand All @@ -244,7 +200,7 @@ GXFifoObj* GXInit(void* base, u32 size) {

GX_SET_CP_REG(3, 0);

FAST_FLAG_SET(gx->perfSel, 0, 4, 4);
SET_REG_FIELD(gx->perfSel, 4, 4, 2);

GXFIFO.u8 = 0x8;
GXFIFO.u8 = 0x20;
Expand Down
29 changes: 15 additions & 14 deletions src/dolphin/gx/GXPerf.c
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ void GXSetGPMetric(GXPerf0 perf0, GXPerf1 perf1) {
case GX_PERF1_VC_STREAMBUF_LOW:
case GX_PERF1_VC_ALL_STALLS:
case GX_PERF1_VERTICES:
FAST_FLAG_SET(gx->perfSel, 0, 4, 4);
SET_REG_FIELD(gx->perfSel, 4, 4, 0);
GXFIFO.u8 = 8;
GXFIFO.u8 = 32;
GXFIFO.u32 = gx->perfSel;
Expand Down Expand Up @@ -266,49 +266,49 @@ void GXSetGPMetric(GXPerf0 perf0, GXPerf1 perf1) {
break;

case GX_PERF1_VC_ELEMQ_FULL:
FAST_FLAG_SET(gx->perfSel, 2, 4, 4);
SET_REG_FIELD(gx->perfSel, 4, 4, 2);
GXFIFO.u8 = 8;
GXFIFO.u8 = 32;
GXFIFO.u32 = gx->perfSel;
break;
case GX_PERF1_VC_MISSQ_FULL:
FAST_FLAG_SET(gx->perfSel, 3, 4, 4);
SET_REG_FIELD(gx->perfSel, 4, 4, 3);
GXFIFO.u8 = 8;
GXFIFO.u8 = 32;
GXFIFO.u32 = gx->perfSel;
break;
case GX_PERF1_VC_MEMREQ_FULL:
FAST_FLAG_SET(gx->perfSel, 4, 4, 4);
SET_REG_FIELD(gx->perfSel, 4, 4, 4);
GXFIFO.u8 = 8;
GXFIFO.u8 = 32;
GXFIFO.u32 = gx->perfSel;
break;
case GX_PERF1_VC_STATUS7:
FAST_FLAG_SET(gx->perfSel, 5, 4, 4);
SET_REG_FIELD(gx->perfSel, 4, 4, 5);
GXFIFO.u8 = 8;
GXFIFO.u8 = 32;
GXFIFO.u32 = gx->perfSel;
break;
case GX_PERF1_VC_MISSREP_FULL:
FAST_FLAG_SET(gx->perfSel, 6, 4, 4);
SET_REG_FIELD(gx->perfSel, 4, 4, 6);
GXFIFO.u8 = 8;
GXFIFO.u8 = 32;
GXFIFO.u32 = gx->perfSel;
break;
case GX_PERF1_VC_STREAMBUF_LOW:
FAST_FLAG_SET(gx->perfSel, 7, 4, 4);
SET_REG_FIELD(gx->perfSel, 4, 4, 7);
GXFIFO.u8 = 8;
GXFIFO.u8 = 32;
GXFIFO.u32 = gx->perfSel;
break;
case GX_PERF1_VC_ALL_STALLS:
FAST_FLAG_SET(gx->perfSel, 9, 4, 4);
SET_REG_FIELD(gx->perfSel, 4, 4, 9);
GXFIFO.u8 = 8;
GXFIFO.u8 = 32;
GXFIFO.u32 = gx->perfSel;
break;
case GX_PERF1_VERTICES:
FAST_FLAG_SET(gx->perfSel, 8, 4, 4);
SET_REG_FIELD(gx->perfSel, 4, 4, 8);
GXFIFO.u8 = 8;
GXFIFO.u8 = 32;
GXFIFO.u32 = gx->perfSel;
Expand Down Expand Up @@ -339,10 +339,11 @@ void GXClearGPMetric(void) {
}

#pragma scheduling off
void GXReadXfRasMetric(u32* xfWaitIn, u32* xfWaitOut, u32* rasBusy, u32* clocks) {
// *rasBusy = GXReadCPReg(32, 33);
// *clocks = GXReadCPReg(34, 35);
// *xfWaitIn = GXReadCPReg(36, 37);
// *xfWaitOut = GXReadCPReg(38, 39);
void GXReadXfRasMetric(u32 *xf_wait_in, u32 *xf_wait_out, u32 *ras_busy, u32 *clocks)
{
*ras_busy = __GXReadCPCounterU32(32, 33);
*clocks = __GXReadCPCounterU32(34, 35);
*xf_wait_in = __GXReadCPCounterU32(36, 37);
*xf_wait_out = __GXReadCPCounterU32(38, 39);
}
#pragma scheduling reset
Loading

0 comments on commit c81da30

Please sign in to comment.