Skip to content

Commit

Permalink
Merge pull request #1694 from Shaikh-Ubaid/elf_structs
Browse files Browse the repository at this point in the history
X64: Use Struct for ELF Ehdr and Phdr headers
  • Loading branch information
ubaidsk authored Apr 14, 2023
2 parents f6bcbd2 + 1a99f9a commit 57ebbc8
Show file tree
Hide file tree
Showing 4 changed files with 279 additions and 99 deletions.
5 changes: 3 additions & 2 deletions src/libasr/codegen/asr_to_wasm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#include <chrono>
#include <iomanip>
#include <fstream>
#include <climits>

#include <libasr/asr.h>
#include <libasr/exception.h>
Expand Down Expand Up @@ -526,7 +527,7 @@ class ASRToWASMVisitor : public ASR::BaseVisitor<ASRToWASMVisitor> {

using namespace wasm;
int kind = ASRUtils::extract_kind_from_ttype_t(v->m_type);
uint32_t global_var_idx = -1;
uint32_t global_var_idx = UINT_MAX;
switch (v->m_type->type){
case ASR::ttypeType::Integer: {
uint64_t init_val = 0;
Expand Down Expand Up @@ -583,7 +584,7 @@ class ASRToWASMVisitor : public ASR::BaseVisitor<ASRToWASMVisitor> {
global_var_idx = m_wa.declare_global_var(i32, 0);
}
}
LCOMPILERS_ASSERT(global_var_idx >= 0);
LCOMPILERS_ASSERT(global_var_idx < UINT_MAX);
m_global_var_idx_map[get_hash((ASR::asr_t *)v)] = global_var_idx;
}

Expand Down
11 changes: 3 additions & 8 deletions src/libasr/codegen/wasm_to_x64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -581,8 +581,6 @@ class X64Visitor : public WASMDecoder<X64Visitor>,
void visit_F32Sqrt() { visit_F64Sqrt(); }

void gen_x64_bytes() {
emit_elf64_header(m_a);

// declare compile-time strings
std::string base_memory = " "; /* in wasm backend, memory starts after 4 bytes*/
for (uint32_t i = 0; i < data_segments.size(); i++) {
Expand All @@ -592,7 +590,6 @@ class X64Visitor : public WASMDecoder<X64Visitor>,

NO_OF_IMPORTS = imports.size();

m_a.align_by_byte(0x1000);
m_a.add_label("text_segment_start");
for (uint32_t idx = 0; idx < type_indices.size(); idx++) {
m_a.add_label(exports[idx + 1].name);
Expand All @@ -616,9 +613,9 @@ class X64Visitor : public WASMDecoder<X64Visitor>,
emit_double_const(m_a, d.first, d.second);
}

m_a.align_by_byte(0x1000);
m_a.add_label("text_segment_end");

m_a.align_by_byte(0x1000);
m_a.add_label("data_segment_start");
for (auto &s : label_to_str) {
emit_data_string(m_a, s.first, s.second);
Expand Down Expand Up @@ -647,8 +644,6 @@ class X64Visitor : public WASMDecoder<X64Visitor>,
}
}
m_a.add_label("data_segment_end");

emit_elf64_footer(m_a);
}
};

Expand Down Expand Up @@ -700,15 +695,15 @@ Result<int> wasm_to_x64(Vec<uint8_t> &wasm_bytes, Allocator &al,

{
auto t1 = std::chrono::high_resolution_clock::now();
m_a.save_binary(filename);
m_a.save_binary64(filename);
auto t2 = std::chrono::high_resolution_clock::now();
time_save =
std::chrono::duration_cast<std::chrono::milliseconds>(t2 - t1)
.count();
}

//! Helpful for debugging
// std::cout << x64_visitor.m_a.get_asm() << std::endl;
// std::cout << x64_visitor.m_a.get_asm64() << std::endl;

if (time_report) {
std::cout << "Codegen Time report:" << std::endl;
Expand Down
261 changes: 179 additions & 82 deletions src/libasr/codegen/x86_assembler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,25 @@

namespace LCompilers {

void X86Assembler::save_binary64(const std::string &filename) {
Vec<uint8_t> header = create_elf64_x86_header(
m_al, origin(), get_defined_symbol("_start").value,
compute_seg_size("text_segment_start", "text_segment_end"),
compute_seg_size("data_segment_start", "data_segment_end"));
{
std::ofstream out;
out.open(filename);
out.write((const char*) header.p, header.size());
out.write((const char*) m_code.p, m_code.size());
}
#ifdef LFORTRAN_LINUX
int mod = 0755;
if (chmod(filename.c_str(),mod) < 0) {
throw AssemblerError("chmod failed");
}
#endif
}

void X86Assembler::save_binary(const std::string &filename) {
{
std::ofstream out;
Expand All @@ -25,6 +44,36 @@ void X86Assembler::save_binary(const std::string &filename) {
#endif
}

// ELF header structure for 32-bit
struct Elf32_Ehdr {
uint8_t ident[16];
uint16_t type;
uint16_t machine;
uint32_t version;
uint32_t entry;
uint32_t phoff;
uint32_t shoff;
uint32_t flags;
uint16_t ehsize;
uint16_t phentsize;
uint16_t phnum;
uint16_t shentsize;
uint16_t shnum;
uint16_t shstrndx;
};

// Program header structure for 32-bit
struct Elf32_Phdr {
uint32_t type;
uint32_t offset;
uint32_t vaddr;
uint32_t paddr;
uint32_t filesz;
uint32_t memsz;
uint32_t flags;
uint32_t align;
};

void emit_elf32_header(X86Assembler &a, uint32_t p_flags) {
/* Elf32_Ehdr */
a.add_label("ehdr");
Expand Down Expand Up @@ -283,94 +332,142 @@ void emit_print_float(X86Assembler &a, const std::string &name) {

/************************* 64-bit functions **************************/

void emit_elf64_header(X86Assembler &a) {
/* Elf64_Ehdr */
a.add_label("ehdr");
// e_ident
a.asm_db_imm8(0x7F);
a.asm_db_imm8('E');
a.asm_db_imm8('L');
a.asm_db_imm8('F');
a.asm_db_imm8(2);
a.asm_db_imm8(1);
a.asm_db_imm8(1);
a.asm_db_imm8(0);
// ELF header structure for 64-bit
struct Elf64_Ehdr {
uint8_t ident[16];
uint16_t type;
uint16_t machine;
uint32_t version;
uint64_t entry;
uint64_t phoff;
uint64_t shoff;
uint32_t flags;
uint16_t ehsize;
uint16_t phentsize;
uint16_t phnum;
uint16_t shentsize;
uint16_t shnum;
uint16_t shstrndx;
};

// Program header structure for 64-bit
struct Elf64_Phdr {
uint32_t type;
uint32_t flags;
uint64_t offset;
uint64_t vaddr;
uint64_t paddr;
uint64_t filesz;
uint64_t memsz;
uint64_t align;
};

Elf64_Ehdr get_elf_header(uint64_t asm_entry) {
Elf64_Ehdr e;
e.ident[0] = 0x7f; // magic number
e.ident[1] = 'E';
e.ident[2] = 'L';
e.ident[3] = 'F';
e.ident[4] = 2; // file class (64-bit)
e.ident[5] = 1; // data encoding (little endian)
e.ident[6] = 1; // ELF version
e.ident[7] = 0; // padding
e.ident[8] = 0;
e.ident[9] = 0;
e.ident[10] = 0;
e.ident[11] = 0;
e.ident[12] = 0;
e.ident[13] = 0;
e.ident[14] = 0;
e.ident[15] = 0;
e.type = 2;
e.machine = 0x3e;
e.version = 1;
e.entry = asm_entry;
e.phoff = sizeof(Elf64_Ehdr);
e.shoff = 0;
e.flags = 0;
e.ehsize = sizeof(Elf64_Ehdr);
e.phentsize = sizeof(Elf64_Phdr);
e.phnum = 3;
e.shentsize = 0;
e.shnum = 0;
e.shstrndx = 0;
return e;
}

a.asm_db_imm8(0);
a.asm_db_imm8(0);
a.asm_db_imm8(0);
a.asm_db_imm8(0);
Elf64_Phdr get_seg_header(uint32_t flags, uint64_t origin_addr,
uint64_t seg_size, uint64_t prev_seg_offset, uint64_t prev_seg_size) {
Elf64_Phdr p;
p.type = 1;
p.flags = flags;
p.offset = prev_seg_offset + prev_seg_size;
p.vaddr = origin_addr + p.offset;
p.paddr = p.vaddr;
p.filesz = seg_size;
p.memsz = p.filesz;
p.align = 0x1000;
return p;
}

a.asm_db_imm8(0);
a.asm_db_imm8(0);
a.asm_db_imm8(0);
a.asm_db_imm8(0);
template <typename T>
void append_header_bytes(Allocator &al, T src, Vec<uint8_t> &des) {
char *byteArray = (char *)&src;
for (size_t i = 0; i < sizeof(src); i++) {
des.push_back(al, byteArray[i]);
}
}

a.asm_dw_imm16(2); // e_type
a.asm_dw_imm16(0x3e); // e_machine
a.asm_dd_imm32(1); // e_version
a.asm_dq_label("_start"); // e_entry
a.asm_dq_label("e_phoff"); // e_phoff
a.asm_dq_imm64(0); // e_shoff
a.asm_dd_imm32(0); // e_flags
a.asm_dw_label("ehdrsize"); // e_ehsize
a.asm_dw_label("phdrsize"); // e_phentsize
a.asm_dw_imm16(3); // e_phnum
a.asm_dw_imm16(0); // e_shentsize
a.asm_dw_imm16(0); // e_shnum
a.asm_dw_imm16(0); // e_shstrndx

/* Elf64_Phdr */
a.add_label("phdr");
a.asm_dd_imm32(1); // p_type
a.asm_dd_imm32(4); // p_flags (permission to read only)
a.asm_dq_imm64(0); // p_offset
a.asm_dq_imm64(a.origin()); // p_vaddr
a.asm_dq_imm64(a.origin()); // p_paddr
a.asm_dq_label("phdr_size"); // p_filesz
a.asm_dq_label("phdr_size"); // p_memsz
a.asm_dq_imm64(0x1000); // p_align

/* text_segment_phdr */
a.add_label("text_phdr");
a.asm_dd_imm32(1); // p_type
a.asm_dd_imm32(5); // p_flags (permission to read and execute)
a.asm_dq_label("text_segment_offset"); // p_offset
a.asm_dq_label("text_segment_start"); // p_vaddr
a.asm_dq_label("text_segment_start"); // p_paddr
a.asm_dq_label("text_segment_size"); // p_filesz
a.asm_dq_label("text_segment_size"); // p_memsz
a.asm_dq_imm64(0x1000); // p_align

/* data_segment_phdr */
a.add_label("data_phdr");
a.asm_dd_imm32(1); // p_type
a.asm_dd_imm32(6); // p_flags (permission to read and write)
a.asm_dq_label("data_segment_offset"); // p_offset
a.asm_dq_label("data_segment_start"); // p_vaddr
a.asm_dq_label("data_segment_start"); // p_paddr
a.asm_dq_label("data_segment_size"); // p_filesz
a.asm_dq_label("data_segment_size"); // p_memsz
a.asm_dq_imm64(0x1000); // p_align
}
void align_by_byte(Allocator &al, Vec<uint8_t> &code, uint64_t alignment) {
uint64_t code_size = code.size() ;
uint64_t padding_size = (alignment * ceil(code_size / (double)alignment)) - code_size;
for (size_t i = 0; i < padding_size; i++) {
code.push_back(al, 0);
}
}

void emit_elf64_footer(X86Assembler &a) {
a.add_var("ehdrsize", "ehdr", "phdr");
a.add_var("phdrsize", "phdr", "text_phdr");
a.add_var64("e_phoff", "ehdr", "phdr");
a.add_var64("phdr_size", "ehdr", "text_segment_start");
a.add_var64("text_segment_offset", "ehdr", "text_segment_start");
a.add_var64("text_segment_size", "text_segment_start", "text_segment_end");
a.add_var64("data_segment_offset", "ehdr", "data_segment_start");
a.add_var64("data_segment_size", "data_segment_start", "data_segment_end");
}
Vec<uint8_t> create_elf64_x86_header(Allocator &al, uint64_t origin, uint64_t entry,
uint64_t text_seg_size, uint64_t data_seg_size) {

void emit_exit_64(X86Assembler &a, std::string name, int exit_code) {
a.add_label(name);
// void exit(int status);
a.asm_mov_r64_imm64(LCompilers::X64Reg::rax, 60); // sys_exit
a.asm_mov_r64_imm64(LCompilers::X64Reg::rdi, exit_code); // exit code
a.asm_syscall(); // syscall
/*
The header segment is a segment which holds the elf and program headers.
Its size currently is
sizeof(Elf64_Ehdr) + 3 * sizeof(Elf64_Phdr)
that is, 64 + 3 * 56 = 232
Since, it is a segment, it needs to be aligned by boundary 0x1000
(we add temporary zero bytes as padding to accomplish this alignment)
Thus, the header segment size for us currently is 0x1000.
For now, we are hardcoding this size here.
TODO: Later compute this header segment size dynamically depending
on the different segments present
*/
const int HEADER_SEGMENT_SIZE = 0x1000;

// adjust/offset the origin address as per the extra bytes of HEADER_SEGMENT_SIZE
uint64_t origin_addr = origin - HEADER_SEGMENT_SIZE;

Elf64_Ehdr e = get_elf_header(entry);
Elf64_Phdr p_program = get_seg_header(4, origin_addr, HEADER_SEGMENT_SIZE, 0, 0);
Elf64_Phdr p_text_seg = get_seg_header(5, origin_addr, text_seg_size, p_program.offset, p_program.filesz);
Elf64_Phdr p_data_seg = get_seg_header(6, origin_addr, data_seg_size, p_text_seg.offset, p_text_seg.filesz);

Vec<uint8_t> header;
header.reserve(al, HEADER_SEGMENT_SIZE);

{
append_header_bytes(al, e, header);
append_header_bytes(al, p_program, header);
append_header_bytes(al, p_text_seg, header);
append_header_bytes(al, p_data_seg, header);

LCompilers::align_by_byte(al, header, 0x1000);
}

return header;
}

void emit_print_64(X86Assembler &a, const std::string &msg_label, uint64_t size)
Expand Down
Loading

0 comments on commit 57ebbc8

Please sign in to comment.