Skip to content

Commit

Permalink
Changed x87 way of handling FFREE opcode ([DYNAREC] too, improving x8…
Browse files Browse the repository at this point in the history
…7 robustness overall)
  • Loading branch information
ptitSeb committed Apr 24, 2024
1 parent db32e49 commit 2a79b60
Show file tree
Hide file tree
Showing 27 changed files with 428 additions and 245 deletions.
9 changes: 5 additions & 4 deletions src/dynarec/arm64/dynarec_arm64_00.c
Original file line number Diff line number Diff line change
Expand Up @@ -2161,7 +2161,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
if(box64_dynarec_safeflags) {
READFLAGS(X_PEND); // lets play safe here too
}
BARRIER(BARRIER_FLOAT);
fpu_purgecache(dyn, ninst, 1, x1, x2, x3); // using next, even if there no next
i32 = F16;
retn_to_epilog(dyn, ninst, rex, i32);
*need_epilog = 0;
Expand All @@ -2173,7 +2173,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
if(box64_dynarec_safeflags) {
READFLAGS(X_PEND); // so instead, force the deferred flags, so it's not too slow, and flags are not lost
}
BARRIER(BARRIER_FLOAT);
fpu_purgecache(dyn, ninst, 1, x1, x2, x3); // using next, even if there no next
ret_to_epilog(dyn, ninst, rex);
*need_epilog = 0;
*ok = 0;
Expand Down Expand Up @@ -3041,19 +3041,20 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
SETFLAGS(X_ALL, SF_SET_NODF); // Hack to set flags to "dont'care" state
}
// regular call
if(box64_dynarec_callret && box64_dynarec_bigblock>1) {
/*if(box64_dynarec_callret && box64_dynarec_bigblock>1) {
BARRIER(BARRIER_FULL);
BARRIER_NEXT(BARRIER_FULL);
} else {
BARRIER(BARRIER_FLOAT);
*need_epilog = 0;
*ok = 0;
}
}*/
if(rex.is32bits) {
MOV32w(x2, addr);
} else {
TABLE64(x2, addr);
}
fpu_purgecache(dyn, ninst, 1, x1, x3, x4);
PUSH1z(x2);
if(box64_dynarec_callret) {
SET_HASCALLRET();
Expand Down
66 changes: 35 additions & 31 deletions src/dynarec/arm64/dynarec_arm64_d9.c
Original file line number Diff line number Diff line change
Expand Up @@ -136,40 +136,44 @@ uintptr_t dynarec64_D9(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
i1 = x87_get_current_cache(dyn, ninst, 0, NEON_CACHE_ST_D);
// value put in x14
if(i1==-1) {
// not in cache, so check Empty status and load it
i2 = -dyn->n.x87stack;
LDRw_U12(x3, xEmu, offsetof(x64emu_t, fpu_stack));
if(i2) {
if(i2<0) {
ADDw_U12(x3, x3, -i2);
} else {
SUBw_U12(x3, x3, i2);
if(fpu_is_st_freed(dyn, ninst, 0)) {
MOV32w(x4, 0b100000100000000);
B_MARK3_nocond;
} else {
// not in cache, so check Empty status and load it
i2 = -dyn->n.x87stack;
LDRw_U12(x3, xEmu, offsetof(x64emu_t, fpu_stack));
if(i2) {
if(i2<0) {
ADDw_U12(x3, x3, -i2);
} else {
SUBw_U12(x3, x3, i2);
}
}
}
CMPSw_U12(x3, 0);
MOV32w(x3, 0b100000100000000);
CSELx(x4, x3, x4, cLE); // empty: C3,C2,C0 = 101
B_MARK3(cLE);
// x4 will be the actual top
LDRw_U12(x4, xEmu, offsetof(x64emu_t, top));
if(i2) {
if(i2<0) {
SUBw_U12(x4, x4, -i2);
} else {
ADDw_U12(x4, x4, i2);
CMPSw_U12(x3, 0);
MOV32w(x3, 0b100000100000000);
CSELx(x4, x3, x4, cLE); // empty: C3,C2,C0 = 101
B_MARK3(cLE);
// x4 will be the actual top
LDRw_U12(x4, xEmu, offsetof(x64emu_t, top));
if(i2) {
if(i2<0) {
SUBw_U12(x4, x4, -i2);
} else {
ADDw_U12(x4, x4, i2);
}
ANDw_mask(x4, x4, 0, 3); // (emu->top + i)&7
}
ANDw_mask(x4, x4, 0, 3); // (emu->top + i)&7
// load tag
LDRH_U12(x3, xEmu, offsetof(x64emu_t, fpu_tags));
TSTw_mask(x3, 0, 1); // 0b11
MOV32w(x3, 0b100000100000000);
CSELx(x4, x3, x4, cNE); // empty: C3,C2,C0 = 101
B_MARK3(cNE);
// load x2 with ST0 anyway, for sign extraction
ADDx_REG_LSL(x1, xEmu, x4, 3);
LDRx_U12(x2, x1, offsetof(x64emu_t, x87));
}
// load tag
ADDx_U12(x1, xEmu, offsetof(x64emu_t, p_regs));
LDRw_REG_LSL2(x3, x1, x4);
CMPSw_U12(x3, 0b11); // empty
MOV32w(x3, 0b100000100000000);
CSELx(x4, x3, x4, cEQ); // empty: C3,C2,C0 = 101
B_MARK3(cEQ);
// load x2 with ST0 anyway, for sign extraction
ADDx_REG_LSL(x1, xEmu, x4, 3);
LDRx_U12(x2, x1, offsetof(x64emu_t, x87));
} else {
// simply move from cache reg to x2
v1 = dyn->n.x87reg[i1];
Expand Down
29 changes: 25 additions & 4 deletions src/dynarec/arm64/dynarec_arm64_functions.c
Original file line number Diff line number Diff line change
Expand Up @@ -389,6 +389,7 @@ void neoncacheUnwind(neoncache_t* cache)
// unswap
int a = -1;
int b = -1;
// in neoncache
for(int j=0; j<24 && ((a==-1) || (b==-1)); ++j)
if((cache->neoncache[j].t == NEON_CACHE_ST_D || cache->neoncache[j].t == NEON_CACHE_ST_F || cache->neoncache[j].t == NEON_CACHE_ST_I64)) {
if(cache->neoncache[j].n == cache->combined1)
Expand All @@ -401,11 +402,12 @@ void neoncacheUnwind(neoncache_t* cache)
cache->neoncache[a].n = cache->neoncache[b].n;
cache->neoncache[b].n = tmp;
}
// done
cache->swapped = 0;
cache->combined1 = cache->combined2 = 0;
}
if(cache->news) {
// reove the newly created neoncache
// remove the newly created neoncache
for(int i=0; i<24; ++i)
if(cache->news&(1<<i))
cache->neoncache[i].v = 0;
Expand All @@ -422,11 +424,23 @@ void neoncacheUnwind(neoncache_t* cache)
}
}
cache->x87stack-=cache->stack_push;
cache->tags>>=(cache->stack_push*2);
cache->stack-=cache->stack_push;
if(cache->pushed>=cache->stack_push)
cache->pushed-=cache->stack_push;
else
cache->pushed = 0;
cache->stack_push = 0;
}
cache->x87stack+=cache->stack_pop;
cache->stack_next = cache->stack;
if(cache->stack_pop) {
if(cache->poped>=cache->stack_pop)
cache->poped-=cache->stack_pop;
else
cache->poped = 0;
cache->tags<<=(cache->stack_pop*2);
}
cache->stack_pop = 0;
cache->barrier = 0;
// And now, rebuild the x87cache info with neoncache
Expand Down Expand Up @@ -594,10 +608,9 @@ void print_opcode(dynarec_native_t* dyn, int ninst, uint32_t opcode)

static void x87_reset(neoncache_t* n)
{
for (int i=0; i<8; ++i) {
for (int i=0; i<8; ++i)
n->x87cache[i] = -1;
n->freed[i] = -1;
}
n->tags = 0;
n->x87stack = 0;
n->stack = 0;
n->stack_next = 0;
Expand All @@ -606,6 +619,9 @@ static void x87_reset(neoncache_t* n)
n->combined1 = n->combined2 = 0;
n->swapped = 0;
n->barrier = 0;
n->pushed = 0;
n->poped = 0;

for(int i=0; i<24; ++i)
if(n->neoncache[i].t == NEON_CACHE_ST_F
|| n->neoncache[i].t == NEON_CACHE_ST_D
Expand Down Expand Up @@ -641,3 +657,8 @@ void fpu_reset_ninst(dynarec_arm_t* dyn, int ninst)
sse_reset(&dyn->insts[ninst].n);
fpu_reset_reg_neoncache(&dyn->insts[ninst].n);
}

int fpu_is_st_freed(dynarec_native_t* dyn, int ninst, int st)
{
return (dyn->n.tags&(0b11<<(st*2)))?1:0;
}
3 changes: 3 additions & 0 deletions src/dynarec/arm64/dynarec_arm64_functions.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,4 +58,7 @@ void print_opcode(dynarec_native_t* dyn, int ninst, uint32_t opcode);
// reset the cache
void fpu_reset(dynarec_native_t* dyn);
void fpu_reset_ninst(dynarec_native_t* dyn, int ninst);

// is st freed
int fpu_is_st_freed(dynarec_native_t* dyn, int ninst, int st);
#endif //__DYNAREC_ARM_FUNCTIONS_H__
Loading

0 comments on commit 2a79b60

Please sign in to comment.