Skip to content

Commit

Permalink
Fixup x64 arch build
Browse files Browse the repository at this point in the history
  • Loading branch information
jmpews committed Feb 11, 2024
1 parent 422243e commit f9e88eb
Show file tree
Hide file tree
Showing 42 changed files with 465 additions and 386 deletions.
3 changes: 2 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,6 @@ set(dobby.SOURCE_FILE_LIST ${dobby.SOURCE_FILE_LIST}
# codegen
source/core/codegen/codegen-arm.cc
source/core/codegen/codegen-ia32.cc
source/core/codegen/codegen-x64.cc

# memory kit

Expand Down Expand Up @@ -156,6 +155,8 @@ set(dobby.SOURCE_FILE_LIST ${dobby.SOURCE_FILE_LIST}
source/TrampolineBridge/ClosureTrampolineBridge/x64/helper_x64.cc
source/TrampolineBridge/ClosureTrampolineBridge/x64/closure_bridge_x64.cc
source/TrampolineBridge/ClosureTrampolineBridge/x64/ClosureTrampolineX64.cc
source/TrampolineBridge/ClosureTrampolineBridge/x64/closure_bridge_x64.asm
source/TrampolineBridge/ClosureTrampolineBridge/x64/closure_trampoline_x64.asm

# plugin register

Expand Down
2 changes: 2 additions & 0 deletions common/os_arch_features.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,9 @@
namespace features {

template <typename T> inline T arm_thumb_fix_addr(T &addr) {
#if defined(__arm__) || defined(__aarch64__)
addr = (T)((uintptr_t)addr & ~1);
#endif
return addr;
}

Expand Down
6 changes: 2 additions & 4 deletions include/dobby.h
Original file line number Diff line number Diff line change
Expand Up @@ -94,17 +94,15 @@ typedef struct _RegisterContext {
} DobbyRegisterContext;
#elif defined(_M_X64) || defined(__x86_64__)
typedef struct {
uint64_t dummy_0;
uint64_t rsp;

union {
struct {
uint64_t rax, rbx, rcx, rdx, rbp, rsp, rdi, rsi, r8, r9, r10, r11, r12, r13, r14, r15;
} regs;
} general;

uint64_t dummy_1;
uint64_t dummy_0;
uint64_t flags;
uint64_t ret;
} DobbyRegisterContext;
#endif

Expand Down
2 changes: 1 addition & 1 deletion source/Backend/UserMode/MultiThreadSupport/ThreadSupport.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

#include "dobby/dobby_internal.h"

#include "source/Backend/UserMode/Thread/PlatformThread.h"
#include "Backend/UserMode/Thread/PlatformThread.h"

// StackFrame base in CallStack
typedef struct _StackFrame {
Expand Down
4 changes: 2 additions & 2 deletions source/InstructionRelocation/arm/InstructionRelocationARM.h
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ class ThumbAssembler : public Assembler {
}

void EmitAddress(uint32_t value) {
buffer_->Emit32(value);
buffer_->Emit<int32_t>(value);
}

// =====
Expand Down Expand Up @@ -277,7 +277,7 @@ class ThumbTurboAssembler : public ThumbAssembler {
void relocDataLabels() {
for (auto *data_label : data_labels_) {
bindLabel(data_label);
reinterpret_cast<CodeBufferBase *>(buffer_)->EmitBuffer(data_label->data_, data_label->data_size_);
reinterpret_cast<CodeMemBuffer *>(buffer_)->EmitBuffer(data_label->data_, data_label->data_size_);
}
}

Expand Down
26 changes: 12 additions & 14 deletions source/InstructionRelocation/x64/InstructionRelocationX64.cc
Original file line number Diff line number Diff line change
Expand Up @@ -11,17 +11,19 @@
#include "core/assembler/assembler-x64.h"
#include "core/codegen/codegen-x64.h"

#include "MemoryAllocator/CodeMemBuffer.h"

using namespace zz::x64;

int GenRelocateCodeFixed(void *buffer, CodeMemBlock *origin, CodeMemBlock *relocated, bool branch) {
TurboAssembler turbo_assembler_(0);
// Set fixed executable code chunk address
turbo_assembler_.SetRealizedAddress((void *)relocated->addr);
turbo_assembler_.set_fixed_addr(relocated->addr());
#define _ turbo_assembler_.
#define __ turbo_assembler_.code_buffer()->

auto curr_orig_ip = (addr64_t)origin->addr;
auto curr_relo_ip = (addr64_t)relocated->addr;
auto curr_orig_ip = (addr64_t)origin->addr();
auto curr_relo_ip = (addr64_t)relocated->addr();

auto buffer_cursor = (uint8_t *)buffer;

Expand All @@ -36,7 +38,7 @@ int GenRelocateCodeFixed(void *buffer, CodeMemBlock *origin, CodeMemBlock *reloc
// go next
curr_orig_ip += insn.length;
buffer_cursor += insn.length;
curr_relo_ip = (addr64_t)relocated->addr + turbo_assembler_.ip_offset();
curr_relo_ip = (addr64_t)relocated->addr() + turbo_assembler_.pc_offset();
}

// jmp to the origin rest instructions
Expand All @@ -45,25 +47,21 @@ int GenRelocateCodeFixed(void *buffer, CodeMemBlock *origin, CodeMemBlock *reloc
// TODO: 6 == jmp [RIP + disp32] instruction size
addr64_t stub_addr = curr_relo_ip + 6;
codegen.JmpNearIndirect(stub_addr);
turbo_assembler_.code_buffer()->Emit64(curr_orig_ip);
turbo_assembler_.code_buffer()->Emit<int64_t>(curr_orig_ip);
}

// update origin
int new_origin_len = curr_orig_ip - (addr_t)origin->addr;
origin->reset(origin->addr, new_origin_len);
auto new_origin_len = curr_orig_ip - origin->addr();
origin->reset(origin->addr(), new_origin_len);

int relo_len = turbo_assembler_.code_buffer()->buffer_size();
int relo_len = turbo_assembler_.code_buffer()->buffer_size;
if (relo_len > relocated->size) {
DEBUG_LOG("pre-alloc code chunk not enough");
return -1;
}

// generate executable code
{
auto code = AssemblyCodeBuilder::FinalizeFromTurboAssembler(&turbo_assembler_);
relocated->reset(code->addr, code->size);
delete code;
}
auto relocated_ = AssemblerCodeBuilder::FinalizeFromTurboAssembler(&turbo_assembler_);
*relocated = relocated_;

return 0;
}
Expand Down
84 changes: 42 additions & 42 deletions source/InstructionRelocation/x86/InstructionRelocationX86Shared.cc
Original file line number Diff line number Diff line change
Expand Up @@ -11,26 +11,26 @@
using namespace zz::x86;

// x64 jmp absolute address
inline void codegen_x64_jmp_absolute_addr(CodeBufferBase *buffer, addr_t target) {
inline void codegen_x64_jmp_absolute_addr(CodeMemBuffer *buffer, addr_t target) {
// jmp *(rip)
buffer->Emit8(0xFF);
buffer->Emit8(0x25); // ModR/M: 00 100 101
buffer->Emit32(0x00);
buffer->Emit<int8_t>(0xFF);
buffer->Emit<int8_t>(0x25); // ModR/M: 00 100 101
buffer->Emit<int32_t>(0x00);
// .long target
buffer->Emit64(target);
buffer->Emit<int64_t>(target);
}

// simple impl for ReloLabel
inline void emit_rel32_label(CodeBufferBase *buffer, uint32_t last_offset, addr_t curr_relo_ip, addr_t orig_dst_ip) {
addr_t curr_offset = buffer->buffer_size();
inline void emit_rel32_label(CodeMemBuffer *buffer, uint32_t last_offset, addr_t curr_relo_ip, addr_t orig_dst_ip) {
addr_t curr_offset = buffer->buffer_size;
uint32_t relo_insn_len = curr_offset + sizeof(uint32_t) - last_offset;
addr_t relo_ip = curr_relo_ip + relo_insn_len;
int32_t new_offset = orig_dst_ip - relo_ip;
buffer->Emit32(new_offset);
buffer->Emit<int32_t>(new_offset);
}

int GenRelocateSingleX86Insn(addr_t curr_orig_ip, addr_t curr_relo_ip, uint8_t *buffer_cursor, AssemblerBase *assembler,
CodeBufferBase *code_buffer, x86_insn_decode_t &insn, int8_t mode) {
CodeMemBuffer *code_buffer, x86_insn_decode_t &insn, int8_t mode) {
#define __ code_buffer->

int relocated_insn_len = -1;
Expand All @@ -44,12 +44,12 @@ int GenRelocateSingleX86Insn(addr_t curr_orig_ip, addr_t curr_relo_ip, uint8_t *
// x86 ip register == next instruction address
curr_orig_ip = curr_orig_ip + insn.length;

auto last_relo_offset = code_buffer->buffer_size();
auto last_relo_offset = code_buffer->buffer_size;

static auto x86_insn_encode_start = 0;
static auto x86_insn_encoded_len = 0;
auto x86_insn_encode_begin = [&] { x86_insn_encode_start = code_buffer->buffer_size(); };
auto x86_insn_encode_end = [&] { x86_insn_encoded_len = code_buffer->buffer_size() - x86_insn_encode_start; };
auto x86_insn_encode_begin = [&] { x86_insn_encode_start = code_buffer->buffer_size; };
auto x86_insn_encode_end = [&] { x86_insn_encoded_len = code_buffer->buffer_size - x86_insn_encode_start; };

if (insn.primary_opcode >= 0x70 && insn.primary_opcode <= 0x7F) { // jcc rel8
DEBUG_LOG("[x86 relo] %p: jc rel8", buffer_cursor);
Expand All @@ -60,19 +60,19 @@ int GenRelocateSingleX86Insn(addr_t curr_orig_ip, addr_t curr_relo_ip, uint8_t *
uint8_t opcode = 0x80 | (insn.primary_opcode & 0x0f);

x86_insn_encode_begin();
__ Emit8(0x0F);
__ Emit8(opcode);
__ Emit<int8_t>(0x0F);
__ Emit<int8_t>(opcode);
emit_rel32_label(code_buffer, x86_insn_encode_start, curr_relo_ip, orig_dst_ip);
#else
// jcc_true stage 1
const uint8_t label_jcc_cond_true_stage2 = 2;
__ Emit8(insn.primary_opcode);
__ Emit8(label_jcc_cond_true_stage2);
__ Emit<int8_t>(insn.primary_opcode);
__ Emit<int8_t>(label_jcc_cond_true_stage2);

// jcc_false
const uint8_t label_cond_false = 6 + 8;
__ Emit8(0xEB);
__ Emit8(label_cond_false);
__ Emit<int8_t>(0xEB);
__ Emit<int8_t>(label_cond_false);

// jcc_true stage 2, jmp to orig dst
codegen_x64_jmp_absolute_addr(code_buffer, orig_dst_ip);
Expand All @@ -89,22 +89,21 @@ int GenRelocateSingleX86Insn(addr_t curr_orig_ip, addr_t curr_relo_ip, uint8_t *
{

uint32_t jmp_near_range = (uint32_t)2 * 1024 * 1024 * 1024;
auto rip_insn_seq = (addr_t)NearMemoryAllocator::Shared()->allocateNearExecMemory(
insn.length + 6 + 8, orig_dst_ip, jmp_near_range);

auto blk = gNearMemoryAllocator.allocNearCodeBlock(insn.length + 6 + 8, orig_dst_ip, jmp_near_range);
auto rip_insn_seq = (addr_t)blk.addr();
rip_insn_seq_addr = rip_insn_seq;
}

// jmp *(rip) => jmp to [rip insn seq]
x86_insn_encode_begin();
__ Emit8(0xFF);
__ Emit8(0x25); // ModR/M: 00 100 101
__ Emit32(0);
__ Emit64(rip_insn_seq_addr);
__ Emit<int8_t>(0xFF);
__ Emit<int8_t>(0x25); // ModR/M: 00 100 101
__ Emit<int32_t>(0);
__ Emit<int64_t>(rip_insn_seq_addr);
x86_insn_encode_end();

{
auto rip_insn_seq_buffer = CodeBufferBase();
auto rip_insn_seq_buffer = CodeMemBuffer();
#define ___ rip_insn_seq_buffer.

auto rip_insn_req_ip = rip_insn_seq_addr;
Expand All @@ -113,7 +112,7 @@ int GenRelocateSingleX86Insn(addr_t curr_orig_ip, addr_t curr_relo_ip, uint8_t *

// keep orig insn opcode
___ EmitBuffer(buffer_cursor, insn.displacement_offset);
___ Emit32(new_disp);
___ Emit<int32_t>(new_disp);
// keep orig insn immediate
if (insn.immediate_offset) {
___ EmitBuffer((buffer_cursor + insn.immediate_offset), insn.length - insn.immediate_offset);
Expand All @@ -123,7 +122,7 @@ int GenRelocateSingleX86Insn(addr_t curr_orig_ip, addr_t curr_relo_ip, uint8_t *
auto relo_next_ip = curr_relo_ip + x86_insn_encoded_len;
codegen_x64_jmp_absolute_addr(&rip_insn_seq_buffer, relo_next_ip);

DobbyCodePatch((void *)rip_insn_seq_addr, rip_insn_seq_buffer.buffer(), rip_insn_seq_buffer.buffer_size());
DobbyCodePatch((void *)rip_insn_seq_addr, rip_insn_seq_buffer.buffer, rip_insn_seq_buffer.buffer_size);
}

} else if (insn.primary_opcode == 0xEB) { // jmp rel8
Expand All @@ -134,7 +133,7 @@ int GenRelocateSingleX86Insn(addr_t curr_orig_ip, addr_t curr_relo_ip, uint8_t *

#if defined(TARGET_ARCH_IA32)
x86_insn_encode_begin();
__ Emit8(0xE9);
__ Emit<int8_t>(0xE9);
emit_rel32_label(code_buffer, x86_insn_encode_start, curr_relo_ip, orig_dst_ip);
#else
// jmp *(rip)
Expand All @@ -154,25 +153,25 @@ int GenRelocateSingleX86Insn(addr_t curr_orig_ip, addr_t curr_relo_ip, uint8_t *
__ EmitBuffer(buffer_cursor, insn.immediate_offset);
emit_rel32_label(code_buffer, x86_insn_encode_start, curr_relo_ip, orig_dst_ip);
#else
__ Emit8(0xFF);
__ Emit<int8_t>(0xFF);
if (insn.primary_opcode == 0xE8) {
// call *(rip + 2)
__ Emit8(0x15); // ModR/M: 00 010 101
__ Emit32(2);
__ Emit<int8_t>(0x15); // ModR/M: 00 010 101
__ Emit<int32_t>(2);

// jmp 8
__ Emit8(0xEB);
__ Emit8(0x08);
__ Emit<int8_t>(0xEB);
__ Emit<int8_t>(0x08);

// dst
__ Emit64(orig_dst_ip);
__ Emit<int64_t>(orig_dst_ip);
} else {
// jmp *(rip)
__ Emit8(0x25); // ModR/M: 00 100 101
__ Emit32(0);
__ Emit<int8_t>(0x25); // ModR/M: 00 100 101
__ Emit<int32_t>(0);

// dst
__ Emit64(orig_dst_ip);
__ Emit<int64_t>(orig_dst_ip);
}
#endif
} else if (insn.primary_opcode >= 0xE0 && insn.primary_opcode <= 0xE2) { // LOOPNZ/LOOPZ/LOOP/JECXZ
Expand All @@ -187,7 +186,7 @@ int GenRelocateSingleX86Insn(addr_t curr_orig_ip, addr_t curr_relo_ip, uint8_t *

// insn -> relocated insn
{
int relo_offset = code_buffer->buffer_size();
int relo_offset = code_buffer->buffer_size;
int relo_len = relo_offset - last_relo_offset;
DEBUG_LOG("insn -> relocated insn: %d -> %d", insn.length, relo_len);
}
Expand All @@ -197,9 +196,10 @@ int GenRelocateSingleX86Insn(addr_t curr_orig_ip, addr_t curr_relo_ip, uint8_t *
void GenRelocateCodeX86Shared(void *buffer, CodeMemBlock *origin, CodeMemBlock *relocated, bool branch) {
int expected_relocated_mem_size = 32;
x86_try_again:
if (!relocated->addr) {
auto relocated_mem = MemoryAllocator::SharedAllocator()->allocateExecMemory(expected_relocated_mem_size);
if (relocated_mem == nullptr) {
if (!relocated->addr()) {
auto blk = gMemoryAllocator.allocExecBlock(expected_relocated_mem_size);
auto relocated_mem = blk.addr();
if (relocated_mem == 0) {
return;
}
relocated->reset((addr_t)relocated_mem, expected_relocated_mem_size);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,4 @@ int GenRelocateCodeFixed(void *buffer, CodeMemBlock *origin, CodeMemBlock *reloc
void GenRelocateCodeX86Shared(void *buffer, CodeMemBlock *origin, CodeMemBlock *relocated, bool branch);

int GenRelocateSingleX86Insn(addr_t curr_orig_ip, addr_t curr_relo_ip, uint8_t *buffer_cursor, AssemblerBase *assembler,
CodeBufferBase *code_buffer, x86_insn_decode_t &insn, int8_t mode);
CodeMemBuffer *code_buffer, x86_insn_decode_t &insn, int8_t mode);
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
#ifndef X86_INSN_DECODE_H
#define X86_INSN_DECODE_H
#pragma once

#include <stdint.h>
#include "dobby/common.h"

#include "dobby.h"
#include "dobby/types.h"
#include "dobby/platform_features.h"
#include "dobby/platform_detect_macro.h"
#include "dobby/utility_macro.h"

typedef enum {
X86_INSN_SPEC_DEFAULT_64_BIT = 1 << 0,
Expand Down Expand Up @@ -55,7 +59,7 @@ typedef union {
uint8_t code;
uint8_t type;
};
uint8_t data[2];
uint8_t data[3];
} x86_insn_operand_spec_t;

typedef struct {
Expand All @@ -69,7 +73,7 @@ typedef struct {
uint16_t flags;
#define X86_INSN_FLAG_SET_SSE_GROUP(n) ((n) << 5)
#define X86_INSN_FLAG_GET_SSE_GROUP(f) (((f) >> 5) & 0x1f)
#define X86_INSN_FLAG_SET_MODRM_REG_GROUP(n) (((n)&0x3f) << 10)
#define X86_INSN_FLAG_SET_MODRM_REG_GROUP(n) (((n) & 0x3f) << 10)
#define X86_INSN_FLAG_GET_MODRM_REG_GROUP(f) (((f) >> 10) & 0x3f)
} x86_insn_spec_t;

Expand Down Expand Up @@ -195,6 +199,4 @@ void x86_insn_decode(x86_insn_decode_t *insn, uint8_t *buffer, x86_options_t *co

#ifdef __cplusplus
}
#endif

#endif
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

#include "dobby/dobby_internal.h"

#include "source/InterceptRouting/RoutingPlugin.h"
#include "InterceptRouting/RoutingPlugin.h"

class NearBranchTrampolinePlugin : public RoutingPluginInterface {};

Expand Down
Loading

0 comments on commit f9e88eb

Please sign in to comment.