Skip to content

Commit

Permalink
[ARM64_DYNAREC][32BITS] Small optim on jump table for 32bits access (…
Browse files Browse the repository at this point in the history
…1 less read, or 2 in SAVE_MEM configuration)
  • Loading branch information
ptitSeb committed Feb 4, 2024
1 parent 165961f commit 0c41d98
Show file tree
Hide file tree
Showing 9 changed files with 50 additions and 34 deletions.
9 changes: 9 additions & 0 deletions src/custommem.c
Original file line number Diff line number Diff line change
Expand Up @@ -960,6 +960,15 @@ uintptr_t getJumpTable64()
#endif
}

uintptr_t getJumpTable32()
{
#ifdef JMPTABL_SHIFT4
return (uintptr_t)box64_jmptbl4[0][0];
#else
return (uintptr_t)box64_jmptbl3[0];
#endif
}

uintptr_t getJumpTableAddress64(uintptr_t addr)
{
uintptr_t idx3, idx2, idx1, idx0;
Expand Down
12 changes: 6 additions & 6 deletions src/dynarec/arm64/dynarec_arm64_00.c
Original file line number Diff line number Diff line change
Expand Up @@ -923,7 +923,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
if(dyn->insts[ninst].x64.jmp_insts==-1) { \
if(!(dyn->insts[ninst].x64.barrier&BARRIER_FLOAT)) \
fpu_purgecache(dyn, ninst, 1, x1, x2, x3); \
jump_to_next(dyn, addr+i8, 0, ninst); \
jump_to_next(dyn, addr+i8, 0, ninst, rex.is32bits); \
} else { \
CacheTransform(dyn, ninst, cacheupd, x1, x2, x3); \
i32 = dyn->insts[dyn->insts[ninst].x64.jmp_insts].address-(dyn->native_size);\
Expand Down Expand Up @@ -2756,7 +2756,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
if(dyn->insts[ninst].x64.jmp_insts==-1) { \
if(!(dyn->insts[ninst].x64.barrier&BARRIER_FLOAT)) \
fpu_purgecache(dyn, ninst, 1, x1, x2, x3); \
jump_to_next(dyn, addr+i8, 0, ninst); \
jump_to_next(dyn, addr+i8, 0, ninst, rex.is32bits); \
} else { \
CacheTransform(dyn, ninst, cacheupd, x1, x2, x3); \
i32 = dyn->insts[dyn->insts[ninst].x64.jmp_insts].address-(dyn->native_size); \
Expand Down Expand Up @@ -2917,7 +2917,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
*ok = 0;
*need_epilog = 0;
}
jump_to_next(dyn, addr+i32, 0, ninst);
jump_to_next(dyn, addr+i32, 0, ninst, rex.is32bits);
break;
}
break;
Expand All @@ -2940,7 +2940,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
if(dyn->insts[ninst].x64.jmp_insts==-1) {
// out of the block
fpu_purgecache(dyn, ninst, 1, x1, x2, x3);
jump_to_next(dyn, (uintptr_t)getAlternate((void*)j64), 0, ninst);
jump_to_next(dyn, (uintptr_t)getAlternate((void*)j64), 0, ninst, rex.is32bits);
} else {
// inside the block
CacheTransform(dyn, ninst, CHECK_CACHE(), x1, x2, x3);
Expand Down Expand Up @@ -3327,14 +3327,14 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
STPx_S7_preindex(x4, xRIP, xSP, -16);
}
PUSH1z(xRIP);
jump_to_next(dyn, 0, ed, ninst);
jump_to_next(dyn, 0, ed, ninst, rex.is32bits);
break;
case 4: // JMP Ed
INST_NAME("JMP Ed");
READFLAGS(X_PEND);
BARRIER(BARRIER_FLOAT);
GETEDz(0);
jump_to_next(dyn, 0, ed, ninst);
jump_to_next(dyn, 0, ed, ninst, rex.is32bits);
*need_epilog = 0;
*ok = 0;
break;
Expand Down
2 changes: 1 addition & 1 deletion src/dynarec/arm64/dynarec_arm64_0f.c
Original file line number Diff line number Diff line change
Expand Up @@ -1523,7 +1523,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
if(dyn->insts[ninst].x64.jmp_insts==-1) { \
if(!(dyn->insts[ninst].x64.barrier&BARRIER_FLOAT)) \
fpu_purgecache(dyn, ninst, 1, x1, x2, x3); \
jump_to_next(dyn, addr+i32_, 0, ninst); \
jump_to_next(dyn, addr+i32_, 0, ninst, rex.is32bits); \
} else { \
CacheTransform(dyn, ninst, cacheupd, x1, x2, x3); \
i32 = dyn->insts[dyn->insts[ninst].x64.jmp_insts].address-(dyn->native_size); \
Expand Down
4 changes: 2 additions & 2 deletions src/dynarec/arm64/dynarec_arm64_64.c
Original file line number Diff line number Diff line change
Expand Up @@ -1155,14 +1155,14 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
STPx_S7_preindex(x4, xRIP, xSP, -16);
}
PUSH1z(xRIP);
jump_to_next(dyn, 0, ed, ninst);
jump_to_next(dyn, 0, ed, ninst, rex.is32bits);
break;
case 4: // JMP Ed
INST_NAME("JMP Ed");
READFLAGS(X_PEND);
BARRIER(BARRIER_FLOAT);
GETEDOz(x6, 0);
jump_to_next(dyn, 0, ed, ninst);
jump_to_next(dyn, 0, ed, ninst, rex.is32bits);
*need_epilog = 0;
*ok = 0;
break;
Expand Down
2 changes: 1 addition & 1 deletion src/dynarec/arm64/dynarec_arm64_67.c
Original file line number Diff line number Diff line change
Expand Up @@ -1014,7 +1014,7 @@ uintptr_t dynarec64_67(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
if(dyn->insts[ninst].x64.jmp_insts==-1) { \
if(!(dyn->insts[ninst].x64.barrier&BARRIER_FLOAT)) \
fpu_purgecache(dyn, ninst, 1, x1, x2, x3); \
jump_to_next(dyn, addr+i8, 0, ninst); \
jump_to_next(dyn, addr+i8, 0, ninst, rex.is32bits); \
} else { \
CacheTransform(dyn, ninst, cacheupd, x1, x2, x3); \
i32 = dyn->insts[dyn->insts[ninst].x64.jmp_insts].address-(dyn->native_size);\
Expand Down
50 changes: 28 additions & 22 deletions src/dynarec/arm64/dynarec_arm64_helper.c
Original file line number Diff line number Diff line change
Expand Up @@ -564,7 +564,7 @@ void jump_to_epilog(dynarec_arm_t* dyn, uintptr_t ip, int reg, int ninst)
BR(x2);
}

void jump_to_next(dynarec_arm_t* dyn, uintptr_t ip, int reg, int ninst)
void jump_to_next(dynarec_arm_t* dyn, uintptr_t ip, int reg, int ninst, int is32bits)
{
MAYUSE(dyn); MAYUSE(ninst);
MESSAGE(LOG_DUMP, "Jump to next\n");
Expand All @@ -575,15 +575,17 @@ void jump_to_next(dynarec_arm_t* dyn, uintptr_t ip, int reg, int ninst)
MOVx_REG(xRIP, reg);
}
NOTEST(x2);
uintptr_t tbl = getJumpTable64();
uintptr_t tbl = is32bits?getJumpTable32():getJumpTable64();
MAYUSE(tbl);
TABLE64(x3, tbl);
#ifdef JMPTABL_SHIFT4
UBFXx(x2, xRIP, JMPTABL_START4, JMPTABL_SHIFT4);
LDRx_REG_LSL3(x3, x3, x2);
#endif
UBFXx(x2, xRIP, JMPTABL_START3, JMPTABL_SHIFT3);
LDRx_REG_LSL3(x3, x3, x2);
if(!is32bits) {
#ifdef JMPTABL_SHIFT4
UBFXx(x2, xRIP, JMPTABL_START4, JMPTABL_SHIFT4);
LDRx_REG_LSL3(x3, x3, x2);
#endif
UBFXx(x2, xRIP, JMPTABL_START3, JMPTABL_SHIFT3);
LDRx_REG_LSL3(x3, x3, x2);
}
UBFXx(x2, xRIP, JMPTABL_START2, JMPTABL_SHIFT2);
LDRx_REG_LSL3(x3, x3, x2);
UBFXx(x2, xRIP, JMPTABL_START1, JMPTABL_SHIFT1);
Expand Down Expand Up @@ -624,15 +626,17 @@ void ret_to_epilog(dynarec_arm_t* dyn, int ninst, rex_t rex)
// not the correct return address, regular jump, but purge the stack first, it's unsync now...
SUBx_U12(xSP, xSavedSP, 16);
}
uintptr_t tbl = getJumpTable64();
uintptr_t tbl = rex.is32bits?getJumpTable32():getJumpTable64();
NOTEST(x2);
MOV64x(x2, tbl);
#ifdef JMPTABL_SHIFT4
UBFXx(x3, xRIP, JMPTABL_START4, JMPTABL_SHIFT4);
LDRx_REG_LSL3(x2, x2, x3);
#endif
UBFXx(x3, xRIP, JMPTABL_START3, JMPTABL_SHIFT3);
LDRx_REG_LSL3(x2, x2, x3);
if(!rex.is32bits) {
#ifdef JMPTABL_SHIFT4
UBFXx(x3, xRIP, JMPTABL_START4, JMPTABL_SHIFT4);
LDRx_REG_LSL3(x2, x2, x3);
#endif
UBFXx(x3, xRIP, JMPTABL_START3, JMPTABL_SHIFT3);
LDRx_REG_LSL3(x2, x2, x3);
}
UBFXx(x3, xRIP, JMPTABL_START2, JMPTABL_SHIFT2);
LDRx_REG_LSL3(x2, x2, x3);
UBFXx(x3, xRIP, JMPTABL_START1, JMPTABL_SHIFT1);
Expand Down Expand Up @@ -665,15 +669,17 @@ void retn_to_epilog(dynarec_arm_t* dyn, int ninst, rex_t rex, int n)
// not the correct return address, regular jump
SUBx_U12(xSP, xSavedSP, 16);
}
uintptr_t tbl = getJumpTable64();
uintptr_t tbl = rex.is32bits?getJumpTable32():getJumpTable64();
NOTEST(x2);
MOV64x(x2, tbl);
#ifdef JMPTABL_SHIFT4
UBFXx(x3, xRIP, JMPTABL_START4, JMPTABL_SHIFT4);
LDRx_REG_LSL3(x2, x2, x3);
#endif
UBFXx(x3, xRIP, JMPTABL_START3, JMPTABL_SHIFT3);
LDRx_REG_LSL3(x2, x2, x3);
if(!rex.is32bits) {
#ifdef JMPTABL_SHIFT4
UBFXx(x3, xRIP, JMPTABL_START4, JMPTABL_SHIFT4);
LDRx_REG_LSL3(x2, x2, x3);
#endif
UBFXx(x3, xRIP, JMPTABL_START3, JMPTABL_SHIFT3);
LDRx_REG_LSL3(x2, x2, x3);
}
UBFXx(x3, xRIP, JMPTABL_START2, JMPTABL_SHIFT2);
LDRx_REG_LSL3(x2, x2, x3);
UBFXx(x3, xRIP, JMPTABL_START1, JMPTABL_SHIFT1);
Expand Down
2 changes: 1 addition & 1 deletion src/dynarec/arm64/dynarec_arm64_helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -1126,7 +1126,7 @@ uintptr_t geted16(dynarec_arm_t* dyn, uintptr_t addr, int ninst, uint8_t nextop,

// generic x64 helper
void jump_to_epilog(dynarec_arm_t* dyn, uintptr_t ip, int reg, int ninst);
void jump_to_next(dynarec_arm_t* dyn, uintptr_t ip, int reg, int ninst);
void jump_to_next(dynarec_arm_t* dyn, uintptr_t ip, int reg, int ninst, int is32bits);
void ret_to_epilog(dynarec_arm_t* dyn, int ninst, rex_t rex);
void retn_to_epilog(dynarec_arm_t* dyn, int ninst, rex_t rex, int n);
void iret_to_epilog(dynarec_arm_t* dyn, int ninst, int is64bits);
Expand Down
2 changes: 1 addition & 1 deletion src/dynarec/dynarec_native_pass.c
Original file line number Diff line number Diff line change
Expand Up @@ -315,7 +315,7 @@ uintptr_t native_pass(dynarec_native_t* dyn, uintptr_t addr, int alternate, int
++ninst;
NOTEST(x3);
fpu_purgecache(dyn, ninst, 0, x1, x2, x3);
jump_to_next(dyn, addr, 0, ninst);
jump_to_next(dyn, addr, 0, ninst, rex.is32bits);
ok=0; need_epilog=0;
}
}
Expand Down
1 change: 1 addition & 0 deletions src/include/custommem.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ void setJumpTableDefault64(void* addr);
void setJumpTableDefaultRef64(void* addr, void* jmp);
int isJumpTableDefault64(void* addr);
uintptr_t getJumpTable64(void);
uintptr_t getJumpTable32(void);
uintptr_t getJumpTableAddress64(uintptr_t addr);
uintptr_t getJumpAddress64(uintptr_t addr);

Expand Down

0 comments on commit 0c41d98

Please sign in to comment.