diff --git a/Makefile b/Makefile index f73075ff..0798d9f6 100644 --- a/Makefile +++ b/Makefile @@ -188,7 +188,7 @@ ifeq ($(call has, JIT), 1) ifeq ("$(CHECK_LLVM_LIBS)", "0") OBJS_EXT += t2c.o CFLAGS += -g $(shell $(LLVM_CONFIG) --cflags) - LDFLAGS += $(shell $(LLVM_CONFIG) --libs) + LDFLAGS += $(shell $(LLVM_CONFIG) --libfiles) else $(error No llvm-config-18 installed. Check llvm-config-18 installation in advance, or use "ENABLE_T2C=0" to disable tier-2 LLVM compiler) endif diff --git a/src/decode.h b/src/decode.h index e2b2984c..edb88f42 100644 --- a/src/decode.h +++ b/src/decode.h @@ -288,6 +288,9 @@ typedef struct { struct rv_insn *target[HISTORY_SIZE]; #else uint32_t times[HISTORY_SIZE]; +#if RV32_HAS(SYSTEM) + uint32_t satp[HISTORY_SIZE]; +#endif #endif } branch_history_table_t; diff --git a/src/emulate.c b/src/emulate.c index 293ce031..e706925c 100644 --- a/src/emulate.c +++ b/src/emulate.c @@ -42,7 +42,9 @@ extern struct target_ops gdbstub_ops; #define IF_imm(i, v) (i->imm == v) #if RV32_HAS(SYSTEM) +#if !RV32_HAS(JIT) static bool need_clear_block_map = false; +#endif static uint32_t reloc_enable_mmu_jalr_addr; static bool reloc_enable_mmu = false; bool need_retranslate = false; @@ -704,6 +706,7 @@ static inline void remove_next_nth_ir(const riscv_t *rv, * Strategies are being devised to increase the number of instructions that * match the pattern, including possible instruction reordering. */ +#if RV32_HAS(MOP_FUSION) static void match_pattern(riscv_t *rv, block_t *block) { uint32_t i; @@ -795,7 +798,7 @@ static void match_pattern(riscv_t *rv, block_t *block) } } } - +#endif typedef struct { bool is_constant[N_RV_REGS]; uint32_t const_val[N_RV_REGS]; @@ -838,12 +841,11 @@ static block_t *block_find_or_translate(riscv_t *rv) block_t *next_blk = block_find(map, rv->PC); #else /* lookup the next block in the block cache */ - /* - * The function "cache_get()" gets the cached block by the given "key (PC)". - * In system simulation, the returned block might be dropped because it is - * not the one from the current process (by checking SATP CSR register). - */ block_t *next_blk = (block_t *) cache_get(rv->block_cache, rv->PC, true); +#if RV32_HAS(SYSTEM) + if (next_blk && next_blk->satp != rv->csr_satp) + next_blk = NULL; +#endif #endif if (next_blk) @@ -861,12 +863,20 @@ static block_t *block_find_or_translate(riscv_t *rv) block_translate(rv, next_blk); +#if RV32_HAS(JIT) && RV32_HAS(SYSTEM) + /* + * may be an ifetch fault which changes satp, Do not do this + * in "block_alloc() + */ + next_blk->satp = rv->csr_satp; +#endif + optimize_constant(rv, next_blk); + #if RV32_HAS(GDBSTUB) if (likely(!rv->debug_mode)) #endif -#if RV32_HAS(MOP_FUSION) - /* macro operation fusion */ +#if RV32_HAS(GDBSTUB) || RV32_HAS(MOP_FUSION) match_pattern(rv, next_blk); #endif @@ -890,8 +900,6 @@ static block_t *block_find_or_translate(riscv_t *rv) return next_blk; } - list_del_init(&replaced_blk->list); - if (prev == replaced_blk) prev = NULL; @@ -910,6 +918,32 @@ static block_t *block_find_or_translate(riscv_t *rv) if (untaken == replaced_blk_entry) { entry->ir_tail->branch_untaken = NULL; } + + /* upadte JALR LUT */ + if (!entry->ir_tail->branch_table) + continue; + +#if 0 + /* + * This branch lookup updating is unused since we get the PC from it and + * use function "cache_get()" achieve the branch prediction of T1C. + * However, if the structure "branch_table_t" is going to reference the + * block directly, this updating is nacessary to avoid to use the freed + * blocks. + */ + for (int i = 0; i < HISTORY_SIZE; i++) { + if (entry->ir_tail->branch_table->PC[i] == replaced_blk->pc_start) { + IIF(RV32_HAS(SYSTEM)) + (if (entry->ir_tail->branch_table->satp[i] == + replaced_blk->satp), ) + { + entry->ir_tail->branch_table->PC[i] = + entry->ir_tail->branch_table->satp[i] = + entry->ir_tail->branch_table->times[i] = 0; + } + } + } +#endif } /* free IRs in replaced block */ @@ -923,6 +957,7 @@ static block_t *block_find_or_translate(riscv_t *rv) mpool_free(rv->block_ir_mp, ir); } + list_del_init(&replaced_blk->list); mpool_free(rv->block_mp, replaced_blk); #if RV32_HAS(T2C) pthread_mutex_unlock(&rv->cache_lock); @@ -941,6 +976,10 @@ static bool runtime_profiler(riscv_t *rv, block_t *block) * we posit that our profiler could effectively identify hotspots using * three key indicators. */ +#if RV32_HAS(SYSTEM) + if (block->satp != rv->csr_satp) + return false; +#endif uint32_t freq = cache_freq(rv->block_cache, block->pc_start); /* To profile a block after chaining, it must first be executed. */ if (unlikely(freq >= 2 && block->has_loops)) @@ -1022,15 +1061,21 @@ void rv_step(void *arg) block_t *block = block_find_or_translate(rv); /* by now, a block should be available */ assert(block); +#if RV32_HAS(JIT) && RV32_HAS(SYSTEM) + assert(block->satp == rv->csr_satp); +#endif /* After emulating the previous block, it is determined whether the * branch is taken or not. The IR array of the current block is then * assigned to either the branch_taken or branch_untaken pointer of * the previous block. */ - #if RV32_HAS(BLOCK_CHAINING) - if (prev) { + if (prev +#if RV32_HAS(JIT) && RV32_HAS(SYSTEM) + && prev->satp == rv->csr_satp +#endif + ) { rv_insn_t *last_ir = prev->ir_tail; /* chain block */ if (!insn_is_unconditional_branch(last_ir->opcode)) { @@ -1048,7 +1093,7 @@ void rv_step(void *arg) #endif last_pc = rv->PC; #if RV32_HAS(JIT) -#if RV32_HAS(T2C) +#if RV32_HAS(T2C) && !RV32_HAS(SYSTEM) /* executed through the tier-2 JIT compiler */ if (block->hot2) { ((exec_t2c_func_t) block->func)(rv); diff --git a/src/jit.c b/src/jit.c index da5eb079..fa23b5e2 100644 --- a/src/jit.c +++ b/src/jit.c @@ -45,6 +45,9 @@ #include "riscv.h" #include "riscv_private.h" #include "utils.h" +#if RV32_HAS(SYSTEM) +#include "system.h" +#endif #define JIT_CLS_MASK 0x07 #define JIT_ALU_OP_MASK 0xf0 @@ -267,12 +270,15 @@ static inline void set_dirty(int reg_idx, bool is_dirty) } } -static inline void offset_map_insert(struct jit_state *state, int32_t target_pc) +static inline void offset_map_insert(struct jit_state *state, block_t *block) { struct offset_map *map_entry = &state->offset_map[state->n_blocks++]; - assert(state->n_blocks < MAX_BLOCKS); - map_entry->pc = target_pc; + assert(state->n_blocks <= MAX_BLOCKS); + map_entry->pc = block->pc_start; map_entry->offset = state->offset; +#if RV32_HAS(SYSTEM) + map_entry->satp = block->satp; +#endif } #if !defined(__APPLE__) @@ -287,6 +293,10 @@ static void emit_bytes(struct jit_state *state, void *data, uint32_t len) should_flush = true; return; } + if (unlikely(state->n_blocks == MAX_BLOCKS)) { + should_flush = true; + return; + } #if defined(__APPLE__) && defined(__aarch64__) pthread_jit_write_protect_np(false); #endif @@ -330,11 +340,12 @@ static inline void emit_modrm_and_displacement(struct jit_state *state, int m, int32_t d) { + /* + * Do not use short encoding even if the offset is one byte value since the + * length of operation is undetermined. + */ if (d == 0 && (m & 7) != RBP) { emit_modrm(state, 0x00, r, m); - } else if ((int8_t) d == d) { - emit_modrm(state, 0x40, r, m); - emit1(state, d); } else { emit_modrm(state, 0x80, r, m); emit4(state, d); @@ -377,12 +388,16 @@ static inline void emit_pop(struct jit_state *state, int r) } static inline void emit_jump_target_address(struct jit_state *state, - int32_t target_pc) + int32_t target_pc, + uint32_t target_satp UNUSED) { struct jump *jump = &state->jumps[state->n_jumps++]; - assert(state->n_jumps < MAX_JUMPS); + assert(state->n_jumps <= MAX_JUMPS); jump->offset_loc = state->offset; jump->target_pc = target_pc; +#if RV32_HAS(SYSTEM) + jump->target_satp = target_satp; +#endif emit4(state, 0); } #elif defined(__aarch64__) @@ -591,7 +606,7 @@ static inline void emit_jump_target_offset(struct jit_state *state, uint32_t jump_state_offset) { struct jump *jump = &state->jumps[state->n_jumps++]; - assert(state->n_jumps < MAX_JUMPS); + assert(state->n_jumps <= MAX_JUMPS); jump->offset_loc = jump_loc; jump->target_offset = jump_state_offset; } @@ -968,17 +983,22 @@ static inline void emit_store(struct jit_state *state, set_dirty(src, false); } -static inline void emit_jmp(struct jit_state *state, uint32_t target_pc) +static inline void emit_jmp(struct jit_state *state, + uint32_t target_pc, + uint32_t target_satp) { #if defined(__x86_64__) emit1(state, 0xe9); - emit_jump_target_address(state, target_pc); + emit_jump_target_address(state, target_pc, target_satp); #elif defined(__aarch64__) struct jump *jump = &state->jumps[state->n_jumps++]; - assert(state->n_jumps < MAX_JUMPS); + assert(state->n_jumps <= MAX_JUMPS); jump->offset_loc = state->offset; jump->target_pc = target_pc; emit_a64(state, UBR_B); +#if RV32_HAS(SYSTEM) + jump->target_satp = target_satp; +#endif #endif } @@ -1017,7 +1037,7 @@ static inline void emit_exit(struct jit_state *state) emit_jump_target_offset(state, state->offset, state->exit_loc); emit4(state, 0); #elif defined(__aarch64__) - emit_jmp(state, TARGET_PC_EXIT); + emit_jmp(state, TARGET_PC_EXIT, 0); #endif } @@ -1233,6 +1253,211 @@ static void muldivmod(struct jit_state *state, } #endif /* RV32_HAS(EXT_M) */ +#if RV32_HAS(SYSTEM) +uint32_t jit_mmio_read_wrapper(riscv_t *rv, uint32_t addr) +{ + MMIO_READ(); + __UNREACHABLE; +} + +void jit_mmu_handler(riscv_t *rv, uint32_t vreg_idx) +{ + assert(vreg_idx < 32); + + uint32_t addr = rv->jit_mmu.vaddr; + + if (!rv->csr_satp) { + rv->jit_mmu.paddr = addr; + return; + } + + bool ok; + uint32_t level, *pte = mmu_walk(rv, addr, &level); + + if (rv->jit_mmu.type == rv_insn_sb || rv->jit_mmu.type == rv_insn_sh || + rv->jit_mmu.type == rv_insn_sw) + ok = mmu_write_fault_check(rv, pte, addr, PTE_W); + else + ok = mmu_read_fault_check(rv, pte, addr, PTE_R); + + if (unlikely(!ok)) + pte = mmu_walk(rv, addr, &level); + + get_ppn_and_offset(); + addr = ppn | offset; + + if (likely(addr < PRIV(rv)->mem->mem_size)) { + rv->jit_mmu.is_mmio = 0; + rv->jit_mmu.paddr = addr; + return; + } + + uint32_t val; + rv->jit_mmu.is_mmio = 1; + + switch (rv->jit_mmu.type) { + case rv_insn_sb: + val = rv->X[vreg_idx] & 0xff; + MMIO_WRITE(); + break; + case rv_insn_sh: + val = rv->X[vreg_idx] & 0xffff; + MMIO_WRITE(); + break; + case rv_insn_sw: + val = rv->X[vreg_idx]; + MMIO_WRITE(); + break; + case rv_insn_lb: + rv->X[vreg_idx] = (int8_t) jit_mmio_read_wrapper(rv, addr); + break; + case rv_insn_lh: + rv->X[vreg_idx] = (int16_t) jit_mmio_read_wrapper(rv, addr); + break; + case rv_insn_lw: + rv->X[vreg_idx] = jit_mmio_read_wrapper(rv, addr); + break; + case rv_insn_lbu: + rv->X[vreg_idx] = (uint8_t) jit_mmio_read_wrapper(rv, addr); + break; + case rv_insn_lhu: + rv->X[vreg_idx] = (uint16_t) jit_mmio_read_wrapper(rv, addr); + break; + default: + assert(NULL); + __UNREACHABLE; + } +} + +void emit_jit_mmu_handler(struct jit_state *state, uint8_t vreg_idx) +{ + assert(vreg_idx < 32); + +#if defined(__x86_64__) + /* push $rdi */ + emit1(state, 0xff); + emit_modrm(state, 0x3 << 6, 0x6, parameter_reg[0]); + + /* mov $vreg_idx, %rsi */ + emit1(state, 0xbe); + emit4(state, vreg_idx); + + /* call jit_mmu_handler */ + emit_load_imm(state, temp_reg, (uintptr_t) &jit_mmu_handler); + emit1(state, 0xff); + emit_modrm(state, 0x3 << 6, 0x2, temp_reg); + + /* pop rv to $rdi */ + emit1(state, 0x8f); + emit_modrm(state, 0x3 << 6, 0x0, parameter_reg[0]); +#elif defined(__aarch64__) + uint32_t insn; + + /* push rv into stack */ + insn = (0xf81f0fe << 4) | R0; + emit_a64(state, insn); + + emit_movewide_imm(state, false, R1, vreg_idx); + + /* blr jit_mmu_handler */ + emit_movewide_imm(state, true, temp_reg, (uintptr_t) &jit_mmu_handler); + insn = (0xd63f << 16) | (temp_reg << 5); + emit_a64(state, insn); + + /* pop from stack */ + insn = (0xf84107e << 4) | R0; + emit_a64(state, insn); +#endif +} + +void emit_jit_mmio_escape_load(struct jit_state *state) +{ +#if defined(__x86_64__) + /* JE */ + emit1(state, 0x0f); + emit1(state, 0x84); + + /* pre-calculated jump offset */ + emit4(state, 0xb); + return; +#elif defined(__aarch64__) + /* b.eq (3 instructions) */ + emit_a64(state, (0x54 << 24) | (0x3 << 5)); +#endif +} + +void emit_jit_mmio_escape_load_end(struct jit_state *state, + int rv_insn_type UNUSED) +{ +#if defined(__x86_64__) + /* JMP */ + emit1(state, 0xe9); + + /* pre-calculated jump offset */ + switch (rv_insn_type) { + case rv_insn_sb: + case rv_insn_sh: + emit4(state, 0x1c); + return; + case rv_insn_sw: + emit4(state, 0x1b); + return; + case rv_insn_lb: + case rv_insn_lh: + case rv_insn_lbu: + case rv_insn_lhu: + emit4(state, 0x16); + return; + case rv_insn_lw: + emit4(state, 0x15); + return; + default: + assert(NULL); + __UNREACHABLE; + } +#elif defined(__aarch64__) + /* b (5 instructions) */ + emit_a64(state, (0x54 << 24) | (0x6 << 5) | 0xe); +#endif +} + +void emit_jit_mmio_escape_store(struct jit_state *state, + int rv_insn_type UNUSED) +{ +#if defined(__x86_64__) + /* JE */ + emit1(state, 0x0f); + emit1(state, 0x84); + + /* pre-calculated jump offset */ + switch (rv_insn_type) { + case rv_insn_sb: + case rv_insn_sh: + emit4(state, 0x1c); + return; + case rv_insn_sw: + emit4(state, 0x1b); + return; + case rv_insn_lb: + case rv_insn_lh: + case rv_insn_lbu: + case rv_insn_lhu: + emit4(state, 0x16); + return; + case rv_insn_lw: + emit4(state, 0x15); + return; + default: + assert(NULL); + __UNREACHABLE; + } +#elif defined(__aarch64__) + /* b.eq (7 instructions) */ + emit_a64(state, (0x54 << 24) | (0x7 << 5)); +#endif +} +#endif + static void prepare_translate(struct jit_state *state) { #if defined(__x86_64__) @@ -1734,7 +1959,9 @@ static void ra_load2_sext(struct jit_state *state, } } -void parse_branch_history_table(struct jit_state *state, rv_insn_t *ir) +void parse_branch_history_table(struct jit_state *state, + riscv_t *rv UNUSED, + rv_insn_t *ir) { int max_idx = 0; branch_history_table_t *bt = ir->branch_table; @@ -1745,14 +1972,21 @@ void parse_branch_history_table(struct jit_state *state, rv_insn_t *ir) max_idx = i; } if (bt->PC[max_idx] && bt->times[max_idx] >= IN_JUMP_THRESHOLD) { - save_reg(state, 0); - unmap_vm_reg(0); - emit_load_imm(state, register_map[0].reg_idx, bt->PC[max_idx]); - emit_cmp32(state, temp_reg, register_map[0].reg_idx); - uint32_t jump_loc = state->offset; - emit_jcc_offset(state, 0x85); - emit_jmp(state, bt->PC[max_idx]); - emit_jump_target_offset(state, JUMP_LOC, state->offset); + IIF(RV32_HAS(SYSTEM))(if (bt->satp[max_idx] == rv->csr_satp), ) + { + save_reg(state, 0); + unmap_vm_reg(0); + emit_load_imm(state, register_map[0].reg_idx, bt->PC[max_idx]); + emit_cmp32(state, temp_reg, register_map[0].reg_idx); + uint32_t jump_loc = state->offset; + emit_jcc_offset(state, 0x85); +#if RV32_HAS(SYSTEM) + emit_jmp(state, bt->PC[max_idx], bt->satp[max_idx]); +#else + emit_jmp(state, bt->PC[max_idx], 0); +#endif + emit_jump_target_offset(state, JUMP_LOC, state->offset); + } } } @@ -1914,8 +2148,12 @@ static void resolve_jumps(struct jit_state *state) target_loc = jump.offset_loc + sizeof(uint32_t); for (int i = 0; i < state->n_blocks; i++) { if (jump.target_pc == state->offset_map[i].pc) { - target_loc = state->offset_map[i].offset; - break; + IIF(RV32_HAS(SYSTEM)) + (if (jump.target_satp == state->offset_map[i].satp), ) + { + target_loc = state->offset_map[i].offset; + break; + } } } } @@ -1936,11 +2174,14 @@ static void translate_chained_block(struct jit_state *state, riscv_t *rv, block_t *block) { - if (set_has(&state->set, block->pc_start)) + if (set_has(&state->set, COMPOSED_KEY(block))) return; - set_add(&state->set, block->pc_start); - offset_map_insert(state, block->pc_start); + if (state->n_blocks == MAX_BLOCKS) + return; + + assert(set_add(&state->set, COMPOSED_KEY(block))); + offset_map_insert(state, block); translate(state, rv, block); if (unlikely(should_flush)) return; @@ -1948,15 +2189,22 @@ static void translate_chained_block(struct jit_state *state, if (ir->branch_untaken && !set_has(&state->set, ir->branch_untaken->pc)) { block_t *block1 = cache_get(rv->block_cache, ir->branch_untaken->pc, false); - if (block1->translatable) - translate_chained_block(state, rv, block1); + if (block1->translatable) { + IIF(RV32_HAS(SYSTEM)) + (if (block1->satp == rv->csr_satp), ) + translate_chained_block(state, rv, block1); + } } if (ir->branch_taken && !set_has(&state->set, ir->branch_taken->pc)) { block_t *block1 = cache_get(rv->block_cache, ir->branch_taken->pc, false); - if (block1->translatable) - translate_chained_block(state, rv, block1); + if (block1->translatable) { + IIF(RV32_HAS(SYSTEM)) + (if (block1->satp == rv->csr_satp), ) + translate_chained_block(state, rv, block1); + } } + branch_history_table_t *bt = ir->branch_table; if (bt) { int max_idx = 0; @@ -1968,10 +2216,16 @@ static void translate_chained_block(struct jit_state *state, } if (bt->PC[max_idx] && bt->times[max_idx] >= IN_JUMP_THRESHOLD && !set_has(&state->set, bt->PC[max_idx])) { - block_t *block1 = - cache_get(rv->block_cache, bt->PC[max_idx], false); - if (block1 && block1->translatable) - translate_chained_block(state, rv, block1); + IIF(RV32_HAS(SYSTEM))(if (bt->satp[max_idx] == rv->csr_satp), ) + { + block_t *block1 = + cache_get(rv->block_cache, bt->PC[max_idx], false); + if (block1 && block1->translatable) { + IIF(RV32_HAS(SYSTEM)) + (if (block1->satp == rv->csr_satp), ) + translate_chained_block(state, rv, block1); + } + } } } } @@ -1979,18 +2233,23 @@ static void translate_chained_block(struct jit_state *state, void jit_translate(riscv_t *rv, block_t *block) { struct jit_state *state = rv->jit_state; - if (set_has(&state->set, block->pc_start)) { + if (set_has(&state->set, COMPOSED_KEY(block))) { for (int i = 0; i < state->n_blocks; i++) { - if (block->pc_start == state->offset_map[i].pc) { + if (block->pc_start == state->offset_map[i].pc +#if RV32_HAS(SYSTEM) + && block->satp == state->offset_map[i].satp +#endif + ) { block->offset = state->offset_map[i].offset; block->hot = true; return; } } + assert(NULL); __UNREACHABLE; } restart: - memset(state->jumps, 0, 1024 * sizeof(struct jump)); + memset(state->jumps, 0, MAX_JUMPS * sizeof(struct jump)); state->n_jumps = 0; block->offset = state->offset; translate_chained_block(state, rv, block); diff --git a/src/jit.h b/src/jit.h index 3967a1df..4bbafa2f 100644 --- a/src/jit.h +++ b/src/jit.h @@ -14,11 +14,17 @@ struct jump { uint32_t offset_loc; uint32_t target_pc; uint32_t target_offset; +#if RV32_HAS(SYSTEM) + uint32_t target_satp; +#endif }; struct offset_map { uint32_t pc; uint32_t offset; +#if RV32_HAS(SYSTEM) + uint32_t satp; +#endif }; struct jit_state { diff --git a/src/riscv.c b/src/riscv.c index a06a2f33..8e1d103b 100644 --- a/src/riscv.c +++ b/src/riscv.c @@ -463,7 +463,7 @@ riscv_t *rv_create(riscv_user_t rv_attr) rv->jit_state = jit_state_init(CODE_CACHE_SIZE); rv->block_cache = cache_create(BLOCK_MAP_CAPACITY_BITS); assert(rv->block_cache); -#if RV32_HAS(T2C) +#if RV32_HAS(T2C) && !RV32_HAS(SYSTEM) rv->quit = false; rv->jit_cache = jit_cache_init(); /* prepare wait queue. */ @@ -566,8 +566,10 @@ bool rv_has_halted(riscv_t *rv) void rv_delete(riscv_t *rv) { assert(rv); -#if !RV32_HAS(JIT) +#if !RV32_HAS(JIT) || (RV32_HAS(SYSTEM) && !RV32_HAS(ELF_LOADER)) vm_attr_t *attr = PRIV(rv); +#endif +#if !RV32_HAS(JIT) map_delete(attr->fd_map); memory_delete(attr->mem); block_map_destroy(rv); diff --git a/src/riscv_private.h b/src/riscv_private.h index 0ae6f279..684426dd 100644 --- a/src/riscv_private.h +++ b/src/riscv_private.h @@ -90,6 +90,9 @@ typedef struct block { bool translatable; /**< Determine the block has RV32AF insturctions or not */ bool has_loops; /**< Determine the block has loop or not */ +#if RV32_HAS(SYSTEM) + uint32_t satp; +#endif #if RV32_HAS(T2C) bool compiled; /**< The T2C request is enqueued or not */ #endif @@ -126,6 +129,18 @@ struct riscv_internal { riscv_word_t X[N_RV_REGS]; riscv_word_t PC; +#if RV32_HAS(JIT) && RV32_HAS(SYSTEM) + /* + * Aarch encoder only accepts 9 bits signed offset. Do not put this + * structure to the bottom. + */ + struct { + uint32_t is_mmio; /* whether is MMIO or not */ + uint32_t type; /* 0: read, 1: write */ + uint32_t vaddr; + uint32_t paddr; + } jit_mmu; +#endif /* user provided data */ riscv_user_t data; diff --git a/src/rv32_jit.c b/src/rv32_jit.c index 4c1dad9f..50ec8c03 100644 --- a/src/rv32_jit.c +++ b/src/rv32_jit.c @@ -13,7 +13,7 @@ GEN(jal, { emit_load_imm(state, vm_reg[0], ir->pc + 4); } store_back(state); - emit_jmp(state, ir->pc + ir->imm); + emit_jmp(state, ir->pc + ir->imm, rv->csr_satp); emit_load_imm(state, temp_reg, ir->pc + ir->imm); emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); emit_exit(state); @@ -28,7 +28,7 @@ GEN(jalr, { emit_load_imm(state, vm_reg[1], ir->pc + 4); } store_back(state); - parse_branch_history_table(state, ir); + parse_branch_history_table(state, rv, ir); emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); emit_exit(state); }) @@ -39,14 +39,14 @@ GEN(beq, { uint32_t jump_loc = state->offset; emit_jcc_offset(state, 0x84); if (ir->branch_untaken) { - emit_jmp(state, ir->pc + 4); + emit_jmp(state, ir->pc + 4, rv->csr_satp); } emit_load_imm(state, temp_reg, ir->pc + 4); emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); emit_exit(state); emit_jump_target_offset(state, JUMP_LOC, state->offset); if (ir->branch_taken) { - emit_jmp(state, ir->pc + ir->imm); + emit_jmp(state, ir->pc + ir->imm, rv->csr_satp); } emit_load_imm(state, temp_reg, ir->pc + ir->imm); emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); @@ -59,14 +59,14 @@ GEN(bne, { uint32_t jump_loc = state->offset; emit_jcc_offset(state, 0x85); if (ir->branch_untaken) { - emit_jmp(state, ir->pc + 4); + emit_jmp(state, ir->pc + 4, rv->csr_satp); } emit_load_imm(state, temp_reg, ir->pc + 4); emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); emit_exit(state); emit_jump_target_offset(state, JUMP_LOC, state->offset); if (ir->branch_taken) { - emit_jmp(state, ir->pc + ir->imm); + emit_jmp(state, ir->pc + ir->imm, rv->csr_satp); } emit_load_imm(state, temp_reg, ir->pc + ir->imm); emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); @@ -79,14 +79,14 @@ GEN(blt, { uint32_t jump_loc = state->offset; emit_jcc_offset(state, 0x8c); if (ir->branch_untaken) { - emit_jmp(state, ir->pc + 4); + emit_jmp(state, ir->pc + 4, rv->csr_satp); } emit_load_imm(state, temp_reg, ir->pc + 4); emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); emit_exit(state); emit_jump_target_offset(state, JUMP_LOC, state->offset); if (ir->branch_taken) { - emit_jmp(state, ir->pc + ir->imm); + emit_jmp(state, ir->pc + ir->imm, rv->csr_satp); } emit_load_imm(state, temp_reg, ir->pc + ir->imm); emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); @@ -99,14 +99,14 @@ GEN(bge, { uint32_t jump_loc = state->offset; emit_jcc_offset(state, 0x8d); if (ir->branch_untaken) { - emit_jmp(state, ir->pc + 4); + emit_jmp(state, ir->pc + 4, rv->csr_satp); } emit_load_imm(state, temp_reg, ir->pc + 4); emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); emit_exit(state); emit_jump_target_offset(state, JUMP_LOC, state->offset); if (ir->branch_taken) { - emit_jmp(state, ir->pc + ir->imm); + emit_jmp(state, ir->pc + ir->imm, rv->csr_satp); } emit_load_imm(state, temp_reg, ir->pc + ir->imm); emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); @@ -119,14 +119,14 @@ GEN(bltu, { uint32_t jump_loc = state->offset; emit_jcc_offset(state, 0x82); if (ir->branch_untaken) { - emit_jmp(state, ir->pc + 4); + emit_jmp(state, ir->pc + 4, rv->csr_satp); } emit_load_imm(state, temp_reg, ir->pc + 4); emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); emit_exit(state); emit_jump_target_offset(state, JUMP_LOC, state->offset); if (ir->branch_taken) { - emit_jmp(state, ir->pc + ir->imm); + emit_jmp(state, ir->pc + ir->imm, rv->csr_satp); } emit_load_imm(state, temp_reg, ir->pc + ir->imm); emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); @@ -139,14 +139,14 @@ GEN(bgeu, { uint32_t jump_loc = state->offset; emit_jcc_offset(state, 0x83); if (ir->branch_untaken) { - emit_jmp(state, ir->pc + 4); + emit_jmp(state, ir->pc + 4, rv->csr_satp); } emit_load_imm(state, temp_reg, ir->pc + 4); emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); emit_exit(state); emit_jump_target_offset(state, JUMP_LOC, state->offset); if (ir->branch_taken) { - emit_jmp(state, ir->pc + ir->imm); + emit_jmp(state, ir->pc + ir->imm, rv->csr_satp); } emit_load_imm(state, temp_reg, ir->pc + ir->imm); emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); @@ -155,66 +155,366 @@ GEN(bgeu, { GEN(lb, { memory_t *m = PRIV(rv)->mem; vm_reg[0] = ra_load(state, ir->rs1); - emit_load_imm(state, temp_reg, (intptr_t) (m->mem_base + ir->imm)); - emit_alu64(state, 0x01, vm_reg[0], temp_reg); - vm_reg[1] = map_vm_reg(state, ir->rd); - emit_load_sext(state, S8, temp_reg, vm_reg[1], 0); + IIF(RV32_HAS(SYSTEM)) + ( + { + emit_load_imm(state, temp_reg, ir->imm); + emit_alu32(state, 0x01, vm_reg[0], temp_reg); + emit_store(state, S32, temp_reg, parameter_reg[0], + offsetof(riscv_t, jit_mmu.vaddr)); + emit_load_imm(state, temp_reg, rv_insn_lb); + emit_store(state, S32, temp_reg, parameter_reg[0], + offsetof(riscv_t, jit_mmu.type)); + + store_back(state); + emit_jit_mmu_handler(state, ir->rd); + /* clear register mapping */ + reset_reg(); + + /* + * If it's MMIO, assign the read value to host register, otherwise, + * load from the memory. + */ + emit_load(state, S32, parameter_reg[0], temp_reg, + offsetof(riscv_t, jit_mmu.is_mmio)); + emit_cmp_imm32(state, temp_reg, 0); + emit_jit_mmio_escape_load(state); + vm_reg[1] = map_vm_reg(state, ir->rd); + + emit_load(state, S32, parameter_reg[0], vm_reg[1], + offsetof(riscv_t, X) + 4 * ir->rd); + /* jump over reguler loading */ + emit_jit_mmio_escape_load_end(state, rv_insn_lb); + + emit_load(state, S32, parameter_reg[0], vm_reg[0], + offsetof(riscv_t, jit_mmu.paddr)); + emit_load_imm(state, temp_reg, (uintptr_t) m->mem_base); + emit_alu64(state, 0x01, vm_reg[0], temp_reg); + emit_load_sext(state, S8, temp_reg, vm_reg[1], 0); + }, + { + emit_load_imm(state, temp_reg, (uintptr_t) (m->mem_base + ir->imm)); + emit_alu64(state, 0x01, vm_reg[0], temp_reg); + vm_reg[1] = map_vm_reg(state, ir->rd); + emit_load_sext(state, S8, temp_reg, vm_reg[1], 0); + }) }) GEN(lh, { memory_t *m = PRIV(rv)->mem; vm_reg[0] = ra_load(state, ir->rs1); - emit_load_imm(state, temp_reg, (intptr_t) (m->mem_base + ir->imm)); - emit_alu64(state, 0x01, vm_reg[0], temp_reg); - vm_reg[1] = map_vm_reg(state, ir->rd); - emit_load_sext(state, S16, temp_reg, vm_reg[1], 0); + IIF(RV32_HAS(SYSTEM)) + ( + { + emit_load_imm(state, temp_reg, ir->imm); + emit_alu32(state, 0x01, vm_reg[0], temp_reg); + emit_store(state, S32, temp_reg, parameter_reg[0], + offsetof(riscv_t, jit_mmu.vaddr)); + emit_load_imm(state, temp_reg, rv_insn_lh); + emit_store(state, S32, temp_reg, parameter_reg[0], + offsetof(riscv_t, jit_mmu.type)); + + store_back(state); + emit_jit_mmu_handler(state, ir->rd); + /* clear register mapping */ + reset_reg(); + + /* + * If it's MMIO, assign the read value to host register, otherwise, + * load from the memory. + */ + emit_load(state, S32, parameter_reg[0], temp_reg, + offsetof(riscv_t, jit_mmu.is_mmio)); + emit_cmp_imm32(state, temp_reg, 0); + emit_jit_mmio_escape_load(state); + vm_reg[1] = map_vm_reg(state, ir->rd); + + emit_load(state, S32, parameter_reg[0], vm_reg[1], + offsetof(riscv_t, X) + 4 * ir->rd); + /* jump over reguler loading */ + emit_jit_mmio_escape_load_end(state, rv_insn_lh); + + emit_load(state, S32, parameter_reg[0], vm_reg[0], + offsetof(riscv_t, jit_mmu.paddr)); + emit_load_imm(state, temp_reg, (uintptr_t) m->mem_base); + emit_alu64(state, 0x01, vm_reg[0], temp_reg); + emit_load_sext(state, S16, temp_reg, vm_reg[1], 0); + }, + { + emit_load_imm(state, temp_reg, (uintptr_t) (m->mem_base + ir->imm)); + emit_alu64(state, 0x01, vm_reg[0], temp_reg); + vm_reg[1] = map_vm_reg(state, ir->rd); + emit_load_sext(state, S16, temp_reg, vm_reg[1], 0); + }) }) GEN(lw, { memory_t *m = PRIV(rv)->mem; vm_reg[0] = ra_load(state, ir->rs1); - emit_load_imm(state, temp_reg, (intptr_t) (m->mem_base + ir->imm)); - emit_alu64(state, 0x01, vm_reg[0], temp_reg); - vm_reg[1] = map_vm_reg(state, ir->rd); - emit_load(state, S32, temp_reg, vm_reg[1], 0); + IIF(RV32_HAS(SYSTEM)) + ( + { + emit_load_imm(state, temp_reg, ir->imm); + emit_alu32(state, 0x01, vm_reg[0], temp_reg); + emit_store(state, S32, temp_reg, parameter_reg[0], + offsetof(riscv_t, jit_mmu.vaddr)); + emit_load_imm(state, temp_reg, rv_insn_lw); + emit_store(state, S32, temp_reg, parameter_reg[0], + offsetof(riscv_t, jit_mmu.type)); + + store_back(state); + emit_jit_mmu_handler(state, ir->rd); + /* clear register mapping */ + reset_reg(); + + /* + * If it's MMIO, assign the read value to host register, otherwise, + * load from the memory. + */ + emit_load(state, S32, parameter_reg[0], temp_reg, + offsetof(riscv_t, jit_mmu.is_mmio)); + emit_cmp_imm32(state, temp_reg, 0); + emit_jit_mmio_escape_load(state); + vm_reg[1] = map_vm_reg(state, ir->rd); + + emit_load(state, S32, parameter_reg[0], vm_reg[1], + offsetof(riscv_t, X) + 4 * ir->rd); + /* jump over reguler loading */ + emit_jit_mmio_escape_load_end(state, rv_insn_lw); + + emit_load(state, S32, parameter_reg[0], vm_reg[0], + offsetof(riscv_t, jit_mmu.paddr)); + emit_load_imm(state, temp_reg, (uintptr_t) m->mem_base); + emit_alu64(state, 0x01, vm_reg[0], temp_reg); + emit_load(state, S32, temp_reg, vm_reg[1], 0); + }, + { + emit_load_imm(state, temp_reg, (uintptr_t) (m->mem_base + ir->imm)); + emit_alu64(state, 0x01, vm_reg[0], temp_reg); + vm_reg[1] = map_vm_reg(state, ir->rd); + emit_load(state, S32, temp_reg, vm_reg[1], 0); + }) }) GEN(lbu, { memory_t *m = PRIV(rv)->mem; vm_reg[0] = ra_load(state, ir->rs1); - emit_load_imm(state, temp_reg, (intptr_t) (m->mem_base + ir->imm)); - emit_alu64(state, 0x01, vm_reg[0], temp_reg); - vm_reg[1] = map_vm_reg(state, ir->rd); - emit_load(state, S8, temp_reg, vm_reg[1], 0); + IIF(RV32_HAS(SYSTEM)) + ( + { + emit_load_imm(state, temp_reg, ir->imm); + emit_alu32(state, 0x01, vm_reg[0], temp_reg); + emit_store(state, S32, temp_reg, parameter_reg[0], + offsetof(riscv_t, jit_mmu.vaddr)); + emit_load_imm(state, temp_reg, rv_insn_lbu); + emit_store(state, S32, temp_reg, parameter_reg[0], + offsetof(riscv_t, jit_mmu.type)); + + store_back(state); + emit_jit_mmu_handler(state, ir->rd); + /* clear register mapping */ + reset_reg(); + + /* + * If it's MMIO, assign the read value to host register, otherwise, + * load from the memory. + */ + emit_load(state, S32, parameter_reg[0], temp_reg, + offsetof(riscv_t, jit_mmu.is_mmio)); + emit_cmp_imm32(state, temp_reg, 0); + emit_jit_mmio_escape_load(state); + vm_reg[1] = map_vm_reg(state, ir->rd); + + emit_load(state, S32, parameter_reg[0], vm_reg[1], + offsetof(riscv_t, X) + 4 * ir->rd); + /* jump over reguler loading */ + emit_jit_mmio_escape_load_end(state, rv_insn_lbu); + + emit_load(state, S32, parameter_reg[0], vm_reg[0], + offsetof(riscv_t, jit_mmu.paddr)); + emit_load_imm(state, temp_reg, (uintptr_t) m->mem_base); + emit_alu64(state, 0x01, vm_reg[0], temp_reg); + emit_load(state, S8, temp_reg, vm_reg[1], 0); + }, + { + emit_load_imm(state, temp_reg, (uintptr_t) (m->mem_base + ir->imm)); + emit_alu64(state, 0x01, vm_reg[0], temp_reg); + vm_reg[1] = map_vm_reg(state, ir->rd); + emit_load(state, S8, temp_reg, vm_reg[1], 0); + }) }) GEN(lhu, { memory_t *m = PRIV(rv)->mem; vm_reg[0] = ra_load(state, ir->rs1); - emit_load_imm(state, temp_reg, (intptr_t) (m->mem_base + ir->imm)); - emit_alu64(state, 0x01, vm_reg[0], temp_reg); - vm_reg[1] = map_vm_reg(state, ir->rd); - emit_load(state, S16, temp_reg, vm_reg[1], 0); + IIF(RV32_HAS(SYSTEM)) + ( + { + emit_load_imm(state, temp_reg, ir->imm); + emit_alu32(state, 0x01, vm_reg[0], temp_reg); + emit_store(state, S32, temp_reg, parameter_reg[0], + offsetof(riscv_t, jit_mmu.vaddr)); + emit_load_imm(state, temp_reg, rv_insn_lhu); + emit_store(state, S32, temp_reg, parameter_reg[0], + offsetof(riscv_t, jit_mmu.type)); + + store_back(state); + emit_jit_mmu_handler(state, ir->rd); + /* clear register mapping */ + reset_reg(); + + /* + * If it's MMIO, assign the read value to host register, otherwise, + * load from the memory. + */ + emit_load(state, S32, parameter_reg[0], temp_reg, + offsetof(riscv_t, jit_mmu.is_mmio)); + emit_cmp_imm32(state, temp_reg, 0); + emit_jit_mmio_escape_load(state); + vm_reg[1] = map_vm_reg(state, ir->rd); + + emit_load(state, S32, parameter_reg[0], vm_reg[1], + offsetof(riscv_t, X) + 4 * ir->rd); + /* jump over reguler loading */ + emit_jit_mmio_escape_load_end(state, rv_insn_lhu); + + emit_load(state, S32, parameter_reg[0], vm_reg[0], + offsetof(riscv_t, jit_mmu.paddr)); + emit_load_imm(state, temp_reg, (uintptr_t) m->mem_base); + emit_alu64(state, 0x01, vm_reg[0], temp_reg); + emit_load(state, S16, temp_reg, vm_reg[1], 0); + }, + { + emit_load_imm(state, temp_reg, (uintptr_t) (m->mem_base + ir->imm)); + emit_alu64(state, 0x01, vm_reg[0], temp_reg); + vm_reg[1] = map_vm_reg(state, ir->rd); + emit_load(state, S16, temp_reg, vm_reg[1], 0); + }) }) GEN(sb, { memory_t *m = PRIV(rv)->mem; vm_reg[0] = ra_load(state, ir->rs1); - emit_load_imm(state, temp_reg, (intptr_t) (m->mem_base + ir->imm)); - emit_alu64(state, 0x01, vm_reg[0], temp_reg); - vm_reg[1] = ra_load(state, ir->rs2); - emit_store(state, S8, vm_reg[1], temp_reg, 0); + IIF(RV32_HAS(SYSTEM)) + ( + { + emit_load_imm(state, temp_reg, ir->imm); + emit_alu32(state, 0x01, vm_reg[0], temp_reg); + emit_store(state, S32, temp_reg, parameter_reg[0], + offsetof(riscv_t, jit_mmu.vaddr)); + emit_load_imm(state, temp_reg, rv_insn_sb); + emit_store(state, S32, temp_reg, parameter_reg[0], + offsetof(riscv_t, jit_mmu.type)); + store_back(state); + emit_jit_mmu_handler(state, ir->rs2); + /* clear register mapping */ + reset_reg(); + + /* + * If it's MMIO, it does not need to do the storing since it has + * been done in the mmio handler, otherwise, store the value into th + * memory. + */ + emit_load(state, S32, parameter_reg[0], temp_reg, + offsetof(riscv_t, jit_mmu.is_mmio)); + emit_cmp_imm32(state, temp_reg, 1); + emit_jit_mmio_escape_store(state, rv_insn_sb); + + emit_load(state, S32, parameter_reg[0], vm_reg[0], + offsetof(riscv_t, jit_mmu.paddr)); + emit_load_imm(state, temp_reg, (uintptr_t) m->mem_base); + emit_alu64(state, 0x01, vm_reg[0], temp_reg); + vm_reg[1] = ra_load(state, ir->rs2); + emit_store(state, S8, vm_reg[1], temp_reg, 0); + reset_reg(); + }, + { + emit_load_imm(state, temp_reg, (uintptr_t) (m->mem_base + ir->imm)); + emit_alu64(state, 0x01, vm_reg[0], temp_reg); + vm_reg[1] = ra_load(state, ir->rs2); + emit_store(state, S8, vm_reg[1], temp_reg, 0); + }) }) GEN(sh, { memory_t *m = PRIV(rv)->mem; vm_reg[0] = ra_load(state, ir->rs1); - emit_load_imm(state, temp_reg, (intptr_t) (m->mem_base + ir->imm)); - emit_alu64(state, 0x01, vm_reg[0], temp_reg); - vm_reg[1] = ra_load(state, ir->rs2); - emit_store(state, S16, vm_reg[1], temp_reg, 0); + IIF(RV32_HAS(SYSTEM)) + ( + { + emit_load_imm(state, temp_reg, ir->imm); + emit_alu32(state, 0x01, vm_reg[0], temp_reg); + emit_store(state, S32, temp_reg, parameter_reg[0], + offsetof(riscv_t, jit_mmu.vaddr)); + emit_load_imm(state, temp_reg, rv_insn_sh); + emit_store(state, S32, temp_reg, parameter_reg[0], + offsetof(riscv_t, jit_mmu.type)); + store_back(state); + emit_jit_mmu_handler(state, ir->rs2); + /* clear register mapping */ + reset_reg(); + + /* + * If it's MMIO, it does not need to do the storing since it has + * been done in the mmio handler, otherwise, store the value into th + * memory. + */ + emit_load(state, S32, parameter_reg[0], temp_reg, + offsetof(riscv_t, jit_mmu.is_mmio)); + emit_cmp_imm32(state, temp_reg, 1); + emit_jit_mmio_escape_store(state, rv_insn_sh); + + emit_load(state, S32, parameter_reg[0], vm_reg[0], + offsetof(riscv_t, jit_mmu.paddr)); + emit_load_imm(state, temp_reg, (uintptr_t) m->mem_base); + emit_alu64(state, 0x01, vm_reg[0], temp_reg); + vm_reg[1] = ra_load(state, ir->rs2); + emit_store(state, S16, vm_reg[1], temp_reg, 0); + reset_reg(); + }, + { + emit_load_imm(state, temp_reg, (uintptr_t) (m->mem_base + ir->imm)); + emit_alu64(state, 0x01, vm_reg[0], temp_reg); + vm_reg[1] = ra_load(state, ir->rs2); + emit_store(state, S16, vm_reg[1], temp_reg, 0); + }) }) GEN(sw, { memory_t *m = PRIV(rv)->mem; vm_reg[0] = ra_load(state, ir->rs1); - emit_load_imm(state, temp_reg, (intptr_t) (m->mem_base + ir->imm)); - emit_alu64(state, 0x01, vm_reg[0], temp_reg); - vm_reg[1] = ra_load(state, ir->rs2); - emit_store(state, S32, vm_reg[1], temp_reg, 0); + IIF(RV32_HAS(SYSTEM)) + ( + { + emit_load_imm(state, temp_reg, ir->imm); + emit_alu32(state, 0x01, vm_reg[0], temp_reg); + emit_store(state, S32, temp_reg, parameter_reg[0], + offsetof(riscv_t, jit_mmu.vaddr)); + emit_load_imm(state, temp_reg, rv_insn_sw); + emit_store(state, S32, temp_reg, parameter_reg[0], + offsetof(riscv_t, jit_mmu.type)); + store_back(state); + emit_jit_mmu_handler(state, ir->rs2); + /* clear register mapping */ + reset_reg(); + + /* + * If it's MMIO, it does not need to do the storing since it has + * been done in the mmio handler, otherwise, store the value into th + * memory. + */ + emit_load(state, S32, parameter_reg[0], temp_reg, + offsetof(riscv_t, jit_mmu.is_mmio)); + emit_cmp_imm32(state, temp_reg, 1); + emit_jit_mmio_escape_store(state, rv_insn_sw); + + emit_load(state, S32, parameter_reg[0], vm_reg[0], + offsetof(riscv_t, jit_mmu.paddr)); + emit_load_imm(state, temp_reg, (uintptr_t) m->mem_base); + emit_alu64(state, 0x01, vm_reg[0], temp_reg); + vm_reg[1] = ra_load(state, ir->rs2); + emit_store(state, S32, vm_reg[1], temp_reg, 0); + reset_reg(); + }, + { + emit_load_imm(state, temp_reg, (uintptr_t) (m->mem_base + ir->imm)); + emit_alu64(state, 0x01, vm_reg[0], temp_reg); + vm_reg[1] = ra_load(state, ir->rs2); + emit_store(state, S32, vm_reg[1], temp_reg, 0); + }) }) GEN(addi, { vm_reg[0] = ra_load(state, ir->rs1); @@ -388,7 +688,9 @@ GEN(ebreak, { }) GEN(wfi, { assert(NULL); }) GEN(uret, { assert(NULL); }) +#if RV32_HAS(SYSTEM) GEN(sret, { assert(NULL); }) +#endif GEN(hret, { assert(NULL); }) GEN(mret, { assert(NULL); }) GEN(sfencevma, { assert(NULL); }) @@ -539,7 +841,7 @@ GEN(cjal, { vm_reg[0] = map_vm_reg(state, rv_reg_ra); emit_load_imm(state, vm_reg[0], ir->pc + 2); store_back(state); - emit_jmp(state, ir->pc + ir->imm); + emit_jmp(state, ir->pc + ir->imm, rv->csr_satp); emit_load_imm(state, temp_reg, ir->pc + ir->imm); emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); emit_exit(state); @@ -598,7 +900,7 @@ GEN(cand, { }) GEN(cj, { store_back(state); - emit_jmp(state, ir->pc + ir->imm); + emit_jmp(state, ir->pc + ir->imm, rv->csr_satp); emit_load_imm(state, temp_reg, ir->pc + ir->imm); emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); emit_exit(state); @@ -610,14 +912,14 @@ GEN(cbeqz, { uint32_t jump_loc = state->offset; emit_jcc_offset(state, 0x84); if (ir->branch_untaken) { - emit_jmp(state, ir->pc + 2); + emit_jmp(state, ir->pc + 2, rv->csr_satp); } emit_load_imm(state, temp_reg, ir->pc + 2); emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); emit_exit(state); emit_jump_target_offset(state, JUMP_LOC, state->offset); if (ir->branch_taken) { - emit_jmp(state, ir->pc + ir->imm); + emit_jmp(state, ir->pc + ir->imm, rv->csr_satp); } emit_load_imm(state, temp_reg, ir->pc + ir->imm); emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); @@ -630,14 +932,14 @@ GEN(cbnez, { uint32_t jump_loc = state->offset; emit_jcc_offset(state, 0x85); if (ir->branch_untaken) { - emit_jmp(state, ir->pc + 2); + emit_jmp(state, ir->pc + 2, rv->csr_satp); } emit_load_imm(state, temp_reg, ir->pc + 2); emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); emit_exit(state); emit_jump_target_offset(state, JUMP_LOC, state->offset); if (ir->branch_taken) { - emit_jmp(state, ir->pc + ir->imm); + emit_jmp(state, ir->pc + ir->imm, rv->csr_satp); } emit_load_imm(state, temp_reg, ir->pc + ir->imm); emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); @@ -659,7 +961,7 @@ GEN(cjr, { vm_reg[0] = ra_load(state, ir->rs1); emit_mov(state, vm_reg[0], temp_reg); store_back(state); - parse_branch_history_table(state, ir); + parse_branch_history_table(state, rv, ir); emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); emit_exit(state); }) @@ -685,7 +987,7 @@ GEN(cjalr, { vm_reg[1] = map_vm_reg(state, rv_reg_ra); emit_load_imm(state, vm_reg[1], ir->pc + 2); store_back(state); - parse_branch_history_table(state, ir); + parse_branch_history_table(state, rv, ir); emit_store(state, S32, temp_reg, parameter_reg[0], offsetof(riscv_t, PC)); emit_exit(state); }) diff --git a/src/rv32_template.c b/src/rv32_template.c index e0e41cf6..30d5b14e 100644 --- a/src/rv32_template.c +++ b/src/rv32_template.c @@ -167,11 +167,18 @@ RVOP( struct rv_insn *taken = ir->branch_taken; if (taken) { #if RV32_HAS(JIT) - cache_get(rv->block_cache, PC, true); - if (!set_add(&pc_set, PC)) - has_loops = true; - if (cache_hot(rv->block_cache, PC)) - goto end_op; + IIF(RV32_HAS(SYSTEM)(if (!rv->is_trapped && !reloc_enable_mmu), )) + { + IIF(RV32_HAS(SYSTEM)) + (block_t *next =, ) cache_get(rv->block_cache, PC, true); + IIF(RV32_HAS(SYSTEM))(if (next->satp == rv->csr_satp), ) + { + if (!set_add(&pc_set, PC)) + has_loops = true; + if (cache_hot(rv->block_cache, PC)) + goto end_op; + } + } #endif #if RV32_HAS(SYSTEM) if (!rv->is_trapped) @@ -244,32 +251,45 @@ RVOP( } \ } #else -#define LOOKUP_OR_UPDATE_BRANCH_HISTORY_TABLE() \ - block_t *block = cache_get(rv->block_cache, PC, true); \ - if (block) { \ - for (int i = 0; i < HISTORY_SIZE; i++) { \ - if (ir->branch_table->PC[i] == PC) { \ - ir->branch_table->times[i]++; \ - if (cache_hot(rv->block_cache, PC)) \ - goto end_op; \ - } \ - } \ - /* update branch history table */ \ - int min_idx = 0; \ - for (int i = 0; i < HISTORY_SIZE; i++) { \ - if (!ir->branch_table->times[i]) { \ - min_idx = i; \ - break; \ - } else if (ir->branch_table->times[min_idx] > \ - ir->branch_table->times[i]) { \ - min_idx = i; \ - } \ - } \ - ir->branch_table->times[min_idx] = 1; \ - ir->branch_table->PC[min_idx] = PC; \ - if (cache_hot(rv->block_cache, PC)) \ - goto end_op; \ - MUST_TAIL return block->ir_head->impl(rv, block->ir_head, cycle, PC); \ +#define LOOKUP_OR_UPDATE_BRANCH_HISTORY_TABLE() \ + IIF(RV32_HAS(SYSTEM))(if (!rv->is_trapped && !reloc_enable_mmu), ) \ + { \ + block_t *block = cache_get(rv->block_cache, PC, true); \ + if (block) { \ + IIF(RV32_HAS(SYSTEM))(if (block->satp == rv->csr_satp), ) \ + { \ + for (int i = 0; i < HISTORY_SIZE; i++) { \ + if (ir->branch_table->PC[i] == PC) { \ + IIF(RV32_HAS(SYSTEM)) \ + (if (ir->branch_table->satp[i] == rv->csr_satp), ) \ + { \ + ir->branch_table->times[i]++; \ + if (cache_hot(rv->block_cache, PC)) \ + goto end_op; \ + } \ + } \ + } \ + /* update branch history table */ \ + int min_idx = 0; \ + for (int i = 0; i < HISTORY_SIZE; i++) { \ + if (!ir->branch_table->times[i]) { \ + min_idx = i; \ + break; \ + } else if (ir->branch_table->times[min_idx] > \ + ir->branch_table->times[i]) { \ + min_idx = i; \ + } \ + } \ + ir->branch_table->times[min_idx] = 1; \ + ir->branch_table->PC[min_idx] = PC; \ + IIF(RV32_HAS(SYSTEM)) \ + (ir->branch_table->satp[min_idx] = rv->csr_satp, ); \ + if (cache_hot(rv->block_cache, PC)) \ + goto end_op; \ + MUST_TAIL return block->ir_head->impl(rv, block->ir_head, \ + cycle, PC); \ + } \ + } \ } #endif @@ -359,11 +379,14 @@ RVOP( IIF(RV32_HAS(JIT)) \ ( \ { \ - cache_get(rv->block_cache, PC + 4, true); \ - if (!set_add(&pc_set, PC + 4)) \ - has_loops = true; \ - if (cache_hot(rv->block_cache, PC + 4)) \ - goto nextop; \ + block_t *next = cache_get(rv->block_cache, PC + 4, true); \ + if (next IIF(RV32_HAS(SYSTEM))( \ + &&next->satp == rv->csr_satp, )) { \ + if (!set_add(&pc_set, PC + 4)) \ + has_loops = true; \ + if (cache_hot(rv->block_cache, PC + 4)) \ + goto nextop; \ + } \ }, ); \ PC += 4; \ IIF(RV32_HAS(SYSTEM)) \ @@ -393,11 +416,14 @@ RVOP( IIF(RV32_HAS(JIT)) \ ( \ { \ - cache_get(rv->block_cache, PC, true); \ - if (!set_add(&pc_set, PC)) \ - has_loops = true; \ - if (cache_hot(rv->block_cache, PC)) \ - goto end_op; \ + block_t *next = cache_get(rv->block_cache, PC, true); \ + if (next IIF(RV32_HAS(SYSTEM))( \ + &&next->satp == rv->csr_satp, )) { \ + if (!set_add(&pc_set, PC)) \ + has_loops = true; \ + if (cache_hot(rv->block_cache, PC)) \ + goto end_op; \ + } \ }, ); \ IIF(RV32_HAS(SYSTEM)) \ ( \ @@ -2079,11 +2105,15 @@ RVOP( struct rv_insn *taken = ir->branch_taken; if (taken) { #if RV32_HAS(JIT) - cache_get(rv->block_cache, PC, true); - if (!set_add(&pc_set, PC)) - has_loops = true; - if (cache_hot(rv->block_cache, PC)) - goto end_op; + IIF(RV32_HAS(SYSTEM)) + (block_t *next =, ) cache_get(rv->block_cache, PC, true); + IIF(RV32_HAS(SYSTEM))(if (next->satp == rv->csr_satp), ) + { + if (!set_add(&pc_set, PC)) + has_loops = true; + if (cache_hot(rv->block_cache, PC)) + goto end_op; + } #endif #if RV32_HAS(SYSTEM) @@ -2246,11 +2276,15 @@ RVOP( struct rv_insn *taken = ir->branch_taken; if (taken) { #if RV32_HAS(JIT) - cache_get(rv->block_cache, PC, true); - if (!set_add(&pc_set, PC)) - has_loops = true; - if (cache_hot(rv->block_cache, PC)) - goto end_op; + IIF(RV32_HAS(SYSTEM)) + (block_t *next =, ) cache_get(rv->block_cache, PC, true); + IIF(RV32_HAS(SYSTEM))(if (next->satp == rv->csr_satp), ) + { + if (!set_add(&pc_set, PC)) + has_loops = true; + if (cache_hot(rv->block_cache, PC)) + goto end_op; + } #endif #if RV32_HAS(SYSTEM) if (!rv->is_trapped) @@ -2284,11 +2318,15 @@ RVOP( if (!untaken) goto nextop; #if RV32_HAS(JIT) - cache_get(rv->block_cache, PC + 2, true); - if (!set_add(&pc_set, PC + 2)) - has_loops = true; - if (cache_hot(rv->block_cache, PC + 2)) - goto nextop; + IIF(RV32_HAS(SYSTEM)) + (block_t *next =, ) cache_get(rv->block_cache, PC + 2, true); + IIF(RV32_HAS(SYSTEM))(if (next->satp == rv->csr_satp), ) + { + if (!set_add(&pc_set, PC + 2)) + has_loops = true; + if (cache_hot(rv->block_cache, PC + 2)) + goto nextop; + } #endif PC += 2; #if RV32_HAS(SYSTEM) @@ -2306,11 +2344,15 @@ RVOP( struct rv_insn *taken = ir->branch_taken; if (taken) { #if RV32_HAS(JIT) - cache_get(rv->block_cache, PC, true); - if (!set_add(&pc_set, PC)) - has_loops = true; - if (cache_hot(rv->block_cache, PC)) - goto end_op; + IIF(RV32_HAS(SYSTEM)) + (block_t *next =, ) cache_get(rv->block_cache, PC, true); + IIF(RV32_HAS(SYSTEM))(if (next->satp == rv->csr_satp), ) + { + if (!set_add(&pc_set, PC)) + has_loops = true; + if (cache_hot(rv->block_cache, PC)) + goto end_op; + } #endif #if RV32_HAS(SYSTEM) if (!rv->is_trapped) @@ -2353,11 +2395,15 @@ RVOP( if (!untaken) goto nextop; #if RV32_HAS(JIT) - cache_get(rv->block_cache, PC + 2, true); - if (!set_add(&pc_set, PC + 2)) - has_loops = true; - if (cache_hot(rv->block_cache, PC + 2)) - goto nextop; + IIF(RV32_HAS(SYSTEM)) + (block_t *next =, ) cache_get(rv->block_cache, PC + 2, true); + IIF(RV32_HAS(SYSTEM))(if (next->satp == rv->csr_satp), ) + { + if (!set_add(&pc_set, PC + 2)) + has_loops = true; + if (cache_hot(rv->block_cache, PC + 2)) + goto nextop; + } #endif PC += 2; #if RV32_HAS(SYSTEM) @@ -2375,11 +2421,15 @@ RVOP( struct rv_insn *taken = ir->branch_taken; if (taken) { #if RV32_HAS(JIT) - cache_get(rv->block_cache, PC, true); - if (!set_add(&pc_set, PC)) - has_loops = true; - if (cache_hot(rv->block_cache, PC)) - goto end_op; + IIF(RV32_HAS(SYSTEM)) + (block_t *next =, ) cache_get(rv->block_cache, PC, true); + IIF(RV32_HAS(SYSTEM))(if (next->satp == rv->csr_satp), ) + { + if (!set_add(&pc_set, PC)) + has_loops = true; + if (cache_hot(rv->block_cache, PC)) + goto end_op; + } #endif #if RV32_HAS(SYSTEM) if (!rv->is_trapped) diff --git a/src/utils.c b/src/utils.c index 3199235d..13696855 100644 --- a/src/utils.c +++ b/src/utils.c @@ -3,6 +3,7 @@ * "LICENSE" for information on usage and redistribution of this file. */ +#include #include #include #include @@ -174,7 +175,11 @@ char *sanitize_path(const char *input) return ret; } +#if RV32_HAS(SYSTEM) && RV32_HAS(JIT) +HASH_FUNC_IMPL_64(set_hash_64, SET_SIZE_BITS, 1 << SET_SIZE_BITS); +#else HASH_FUNC_IMPL(set_hash, SET_SIZE_BITS, 1 << SET_SIZE_BITS); +#endif void set_reset(set_t *set) { @@ -186,15 +191,25 @@ void set_reset(set_t *set) * @set: a pointer points to target set * @key: the key of the inserted entry */ +#if RV32_HAS(SYSTEM) && RV32_HAS(JIT) +bool set_add(set_t *set, uint64_t key) +#else bool set_add(set_t *set, uint32_t key) +#endif { +#if RV32_HAS(SYSTEM) && RV32_HAS(JIT) + const uint64_t index = set_hash_64(key); +#else const uint32_t index = set_hash(key); +#endif + uint8_t count = 0; - while (set->table[index][count]) { + while (count < SET_SLOTS_SIZE && set->table[index][count]) { if (set->table[index][count++] == key) return false; } + assert(count < SET_SLOTS_SIZE); set->table[index][count] = key; return true; } @@ -204,10 +219,19 @@ bool set_add(set_t *set, uint32_t key) * @set: a pointer points to target set * @key: the key of the inserted entry */ +#if RV32_HAS(SYSTEM) && RV32_HAS(JIT) +bool set_has(set_t *set, uint64_t key) +#else bool set_has(set_t *set, uint32_t key) +#endif { +#if RV32_HAS(SYSTEM) && RV32_HAS(JIT) + const uint64_t index = set_hash_64(key); +#else const uint32_t index = set_hash(key); - for (uint8_t count = 0; set->table[index][count]; count++) { +#endif + for (uint8_t count = 0; count < SET_SLOTS_SIZE && set->table[index][count]; + count++) { if (set->table[index][count] == key) return true; } diff --git a/src/utils.h b/src/utils.h index 78f68985..e6f42228 100644 --- a/src/utils.h +++ b/src/utils.h @@ -24,6 +24,14 @@ void rv_clock_gettime(struct timespec *tp); return (val * 0x61C88647 >> (32 - size_bits)) & ((size) - (1)); \ } +#define HASH_FUNC_IMPL_64(name, size_bits, size) \ + FORCE_INLINE uint64_t name(uint64_t val) \ + { \ + /* 0x61c8864680b583eb is 64-bit golden ratio */ \ + return (val * 0x61c8864680b583ebull >> (64 - size_bits)) & \ + ((size) - (1)); \ + } + /* sanitize_path returns the shortest path name equivalent to path * by purely lexical processing. It applies the following rules * iteratively until no further processing can be done: @@ -133,11 +141,26 @@ static inline void list_del_init(struct list_head *node) #define SET_SIZE (1 << SET_SIZE_BITS) #define SET_SLOTS_SIZE 32 +/* + * Use composed key in JIT system simulation. The higher 32 bits stores the + * value of supervisor address translation and protection (SATP) register, + * and the lower 32 bits stores the program counter (PC) as same as userspace + * simulation. + */ +#define COMPOSED_KEY(block) \ + IIF(RV32_HAS(SYSTEM)) \ + (((((uint64_t) block->satp) << 32) | (uint64_t) block->pc_start), \ + (uint32_t) block->pc_start) + /* The set consists of SET_SIZE buckets, with each bucket containing * SET_SLOTS_SIZE slots. */ typedef struct { +#if RV32_HAS(SYSTEM) && RV32_HAS(JIT) + uint64_t table[SET_SIZE][SET_SLOTS_SIZE]; +#else uint32_t table[SET_SIZE][SET_SLOTS_SIZE]; +#endif } set_t; /** @@ -151,11 +174,19 @@ void set_reset(set_t *set); * @set: a pointer points to target set * @key: the key of the inserted entry */ +#if RV32_HAS(SYSTEM) && RV32_HAS(JIT) +bool set_add(set_t *set, uint64_t key); +#else bool set_add(set_t *set, uint32_t key); +#endif /** * set_has - check whether the element exist in the set or not * @set: a pointer points to target set * @key: the key of the inserted entry */ +#if RV32_HAS(SYSTEM) && RV32_HAS(JIT) +bool set_has(set_t *set, uint64_t key); +#else bool set_has(set_t *set, uint32_t key); +#endif