Skip to content
1 change: 1 addition & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ CRUST_FLAGS=-g --edition 2021 -C opt-level=0 -C panic="abort"

RSS=\
$(SRC)/arena.rs \
$(SRC)/hashtable.rs \
$(SRC)/b.rs \
$(SRC)/ir.rs \
$(SRC)/crust.rs \
Expand Down
130 changes: 72 additions & 58 deletions src/b.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,16 +38,18 @@ pub mod params;
pub mod ir;
pub mod time;
pub mod shlex;
pub mod hashtable;

use core::ffi::*;
use core::mem::zeroed;
use core::ptr;
use core::slice;
use core::cmp;
use crust::Str;
use hashtable::{HashTable, HtEntry};
use nob::*;
use flag::*;
use crust::libc::*;
use crust::assoc_lookup_cstr;
use arena::Arena;
use targets::*;
use lexer::{Lexer, Loc, Token};
Expand All @@ -61,8 +63,8 @@ pub unsafe fn add_libb_files(path: *const c_char, target: *const c_char, inputs:
// why is rust like this.
return Some(false);
}
include_path_if_exists(inputs, arena::sprintf(&mut (*c).arena, c!("%s/all.b"), path));
include_path_if_exists(inputs, arena::sprintf(&mut (*c).arena, c!("%s/%s.b"), path, target));
include_path_if_exists(inputs, arena::sprintf(&mut (*c).interner.arena, c!("%s/all.b"), path));
include_path_if_exists(inputs, arena::sprintf(&mut (*c).interner.arena, c!("%s/%s.b"), path, target));
Some(true)
}

Expand Down Expand Up @@ -139,38 +141,32 @@ pub enum Storage {
}

#[derive(Clone, Copy)]
pub struct Var {
pub name: *const c_char,
pub struct VarData {
pub loc: Loc,
pub storage: Storage,
}

pub unsafe fn scope_push(vars: *mut Array<Array<Var>>) {
pub unsafe fn scope_push(vars: *mut Array<HashTable<*const c_char, VarData>>) {
if (*vars).count < (*vars).capacity {
// Reusing already allocated scopes
(*vars).count += 1;
(*da_last_mut(vars).expect("There should be always at least the global scope")).count = 0;
let last_scope = da_last_mut(vars).expect("There should be always at least the global scope");
HashTable::clear(last_scope);
} else {
da_append(vars, zeroed());
}
}

pub unsafe fn scope_pop(vars: *mut Array<Array<Var>>) {
pub unsafe fn scope_pop(vars: *mut Array<HashTable<*const c_char, VarData>>) {
assert!((*vars).count > 0);
(*vars).count -= 1;
}

pub unsafe fn find_var_near(vars: *const Array<Var>, name: *const c_char) -> *const Var {
for i in 0..(*vars).count {
let var = (*vars).items.add(i);
if strcmp((*var).name, name) == 0 {
return var
}
}
ptr::null()
pub unsafe fn find_var_near(vars: *const HashTable<*const c_char, VarData>, name: *const c_char) -> *const VarData {
HashTable::get(vars, name).unwrap_or(ptr::null())
}

pub unsafe fn find_var_deep(vars: *const Array<Array<Var>>, name: *const c_char) -> *const Var {
pub unsafe fn find_var_deep(vars: *const Array<HashTable<*const c_char, VarData>>, name: *const c_char) -> *const VarData {
let mut i = (*vars).count;
while i > 0 {
let var = find_var_near((*vars).items.add(i-1), name);
Expand All @@ -195,7 +191,7 @@ pub unsafe fn declare_var(c: *mut Compiler, name: *const c_char, loc: Loc, stora
da_append(&mut (*c).func_scope_events, ScopeEvent::Declare {name, index});
}

da_append(scope, Var {name, loc, storage});
HashTable::insert(scope, name, VarData {loc, storage});
Some(())
}

Expand Down Expand Up @@ -329,17 +325,26 @@ pub unsafe fn allocate_auto_var(t: *mut AutoVarsAtor) -> usize {


pub unsafe fn compile_string(string: *const c_char, c: *mut Compiler) -> usize {
let offset = (*c).program.data.count;
let string_len = strlen(string);
da_append_many(&mut (*c).program.data, slice::from_raw_parts(string as *const u8, string_len));
// TODO: Strings in B are not NULL-terminated.
// They are terminated with symbol '*e' ('*' is escape character akin to '\' in C) which according to the
// spec is called just "end-of-file" without any elaboration on what its value is. Maybe it had a specific
// value on PDP that was a common knowledge at the time? In any case that breaks compatibility with
// libc. While the language is still in development we gonna terminate it with 0. We will make it
// "spec complaint" later.
da_append(&mut (*c).program.data, 0); // NULL-terminator
offset
// TODO: Don't use second hashtable, which requires changes to the API, returning string address
// instead of data offset
let string = intern(&mut (*c).interner, string);
match HashTable::find(&(*c).string_offset, string) {
HtEntry::Occupied(entry) => (*entry).value,
HtEntry::Vacant(entry) => {
let offset = (*c).program.data.count;
let string_len = strlen(string);
da_append_many(&mut (*c).program.data, slice::from_raw_parts(string as *const u8, string_len));
// TODO: Strings in B are not NULL-terminated.
// They are terminated with symbol '*e' ('*' is escape character akin to '\' in C) which according to the
// spec is called just "end-of-file" without any elaboration on what its value is. Maybe it had a specific
// value on PDP that was a common knowledge at the time? In any case that breaks compatibility with
// libc. While the language is still in development we gonna terminate it with 0. We will make it
// "spec complaint" later.
da_append(&mut (*c).program.data, 0); // NULL-terminator
HashTable::insert_new(&mut (*c).string_offset, entry, string, offset);
offset
},
}
}

pub unsafe fn compile_primary_expression(l: *mut Lexer, c: *mut Compiler) -> Option<(Arg, bool)> {
Expand Down Expand Up @@ -415,7 +420,7 @@ pub unsafe fn compile_primary_expression(l: *mut Lexer, c: *mut Compiler) -> Opt
}
Token::CharLit | Token::IntLit => Some((Arg::Literal((*l).int_number), false)),
Token::ID => {
let name = arena::strdup(&mut (*c).arena, (*l).string);
let name = intern(&mut (*c).interner, (*l).string);

let var_def = find_var_deep(&mut (*c).vars, name);
if var_def.is_null() {
Expand Down Expand Up @@ -674,7 +679,7 @@ pub unsafe fn compile_asm_stmts(l: *mut Lexer, c: *mut Compiler, stmts: *mut Arr
get_and_expect_token(l, Token::String)?;
match (*l).token {
Token::String => {
let line = arena::strdup(&mut (*c).arena, (*l).string);
let line = intern(&mut (*c).interner, (*l).string);
let loc = (*l).loc;
da_append(stmts, AsmStmt { line, loc });
}
Expand Down Expand Up @@ -712,7 +717,7 @@ pub unsafe fn compile_statement(l: *mut Lexer, c: *mut Compiler) -> Option<()> {
Token::Extrn => {
while (*l).token != Token::SemiColon {
get_and_expect_token(l, Token::ID)?;
let name = arena::strdup(&mut (*c).arena, (*l).string);
let name = intern(&mut (*c).interner, (*l).string);
name_declare_if_not_exists(&mut (*c).program.extrns, name);
declare_var(c, name, (*l).loc, Storage::External {name})?;
get_and_expect_tokens(l, &[Token::SemiColon, Token::Comma])?;
Expand All @@ -722,7 +727,7 @@ pub unsafe fn compile_statement(l: *mut Lexer, c: *mut Compiler) -> Option<()> {
Token::Auto => {
while (*l).token != Token::SemiColon {
get_and_expect_token(l, Token::ID)?;
let name = arena::strdup(&mut (*c).arena, (*l).string);
let name = intern(&mut (*c).interner, (*l).string);
let index = allocate_auto_var(&mut (*c).auto_vars_ator);
declare_var(c, name, (*l).loc, Storage::Auto {index})?;
get_and_expect_tokens(l, &[Token::SemiColon, Token::Comma, Token::IntLit, Token::CharLit])?;
Expand Down Expand Up @@ -805,7 +810,7 @@ pub unsafe fn compile_statement(l: *mut Lexer, c: *mut Compiler) -> Option<()> {
}
Token::Goto => {
get_and_expect_token(l, Token::ID)?;
let name = arena::strdup(&mut (*c).arena, (*l).string);
let name = intern(&mut (*c).interner, (*l).string);
let loc = (*l).loc;
let addr = (*c).func_body.count;
da_append(&mut (*c).func_gotos, Goto {name, loc, addr});
Expand Down Expand Up @@ -879,7 +884,7 @@ pub unsafe fn compile_statement(l: *mut Lexer, c: *mut Compiler) -> Option<()> {
}
_ => {
if (*l).token == Token::ID {
let name = arena::strdup(&mut (*c).arena, (*l).string);
let name = intern(&mut (*c).interner, (*l).string);
let name_loc = (*l).loc;
lexer::get_token(l)?;
if (*l).token == Token::Colon {
Expand Down Expand Up @@ -913,10 +918,28 @@ pub struct Switch {
pub cond: usize,
}

/// Deduplicates and prolongs strings lifetime
#[derive(Clone, Copy)]
pub struct StringInterner {
pub deduper: HashTable<Str, ()>,
pub arena: Arena,
}

pub unsafe fn intern(interner: *mut StringInterner, string: *const c_char) -> *mut c_char {
match HashTable::find(&(*interner).deduper, Str(string)) {
HtEntry::Occupied(entry) => (*entry).key.0 as *mut c_char,
HtEntry::Vacant(entry) => {
let ptr = arena::strdup(&mut (*interner).arena, string);
HashTable::insert_new(&mut (*interner).deduper, entry, Str(ptr), ());
ptr
}
}
}

#[derive(Clone, Copy)]
pub struct Compiler {
pub program: Program,
pub vars: Array<Array<Var>>,
pub vars: Array<HashTable<*const c_char, VarData>>,
pub auto_vars_ator: AutoVarsAtor,
pub func_body: Array<OpWithLocation>,
pub func_goto_labels: Array<GotoLabel>,
Expand All @@ -926,17 +949,8 @@ pub struct Compiler {
pub used_funcs: Array<UsedFunc>,
pub op_label_count: usize,
pub switch_stack: Array<Switch>,
/// Arena into which the Compiler allocates all the names and
/// objects that need to live for the duration of the
/// compilation. Even if some object/names don't need to live that
/// long (for example, function labels need to live only for the
/// duration of that function compilation), just letting them live
/// longer makes the memory management easier.
///
/// Basically just dump everything into this arena and if you ever
/// need to reset the state of the Compiler, just reset all its
/// Dynamic Arrays and this Arena.
pub arena: Arena,
pub interner: StringInterner,
pub string_offset: HashTable<*const c_char, usize>,
pub error_count: usize,
pub historical: bool,
}
Expand Down Expand Up @@ -968,9 +982,9 @@ pub unsafe fn compile_program(l: *mut Lexer, c: *mut Compiler) -> Option<()> {
Token::Variadic => {
get_and_expect_token_but_continue(l, c, Token::OParen)?;
get_and_expect_token_but_continue(l, c, Token::ID)?;
let func = arena::strdup(&mut (*c).arena, (*l).string);
let func = intern(&mut (*c).interner, (*l).string);
let func_loc = (*l).loc;
if let Some(existing_variadic) = assoc_lookup_cstr(da_slice((*c).program.variadics), func) {
if let Some(existing_variadic) = HashTable::get(&(*c).program.variadics, func) {
// TODO: report all the duplicate variadics maybe?
diagf!(func_loc, c!("ERROR: duplicate variadic declaration `%s`\n"), func);
diagf!((*existing_variadic).loc, c!("NOTE: the first declaration is located here\n"));
Expand All @@ -982,25 +996,25 @@ pub unsafe fn compile_program(l: *mut Lexer, c: *mut Compiler) -> Option<()> {
diagf!((*l).loc, c!("ERROR: variadic function `%s` cannot have 0 arguments\n"), func);
bump_error_count(c)?;
}
da_append(&mut (*c).program.variadics, (func, Variadic {
HashTable::insert(&mut (*c).program.variadics, func, Variadic {
loc: func_loc,
fixed_args: (*l).int_number as usize,
}));
});
get_and_expect_token_but_continue(l, c, Token::CParen)?;
get_and_expect_token_but_continue(l, c, Token::SemiColon)?;
}
Token::Extrn => {
while (*l).token != Token::SemiColon {
get_and_expect_token(l, Token::ID)?;
let name = arena::strdup(&mut (*c).arena, (*l).string);
let name = intern(&mut (*c).interner, (*l).string);
name_declare_if_not_exists(&mut (*c).program.extrns, name);
declare_var(c, name, (*l).loc, Storage::External {name})?;
get_and_expect_tokens(l, &[Token::SemiColon, Token::Comma])?;
}
}
_ => {
expect_token(l, Token::ID)?;
let name = arena::strdup(&mut (*c).arena, (*l).string);
let name = intern(&mut (*c).interner, (*l).string);
let name_loc = (*l).loc;
declare_var(c, name, name_loc, Storage::External{name})?;

Expand All @@ -1017,7 +1031,7 @@ pub unsafe fn compile_program(l: *mut Lexer, c: *mut Compiler) -> Option<()> {
(*l).parse_point = saved_point;
'params: loop {
get_and_expect_token(l, Token::ID)?;
let name = arena::strdup(&mut (*c).arena, (*l).string);
let name = intern(&mut (*c).interner, (*l).string);
let name_loc = (*l).loc;
let index = allocate_auto_var(&mut (*c).auto_vars_ator);
declare_var(c, name, name_loc, Storage::Auto{index})?;
Expand Down Expand Up @@ -1099,7 +1113,7 @@ pub unsafe fn compile_program(l: *mut Lexer, c: *mut Compiler) -> Option<()> {
Token::IntLit | Token::CharLit => ImmediateValue::Literal((*l).int_number),
Token::String => ImmediateValue::DataOffset(compile_string((*l).string, c)),
Token::ID => {
let name = arena::strdup(&mut (*c).arena, (*l).string);
let name = intern(&mut (*c).interner, (*l).string);
let scope = da_last_mut(&mut (*c).vars).expect("There should be always at least the global scope");
let var = find_var_near(scope, name);
if var.is_null() {
Expand Down Expand Up @@ -1289,7 +1303,7 @@ pub unsafe fn main(mut argc: i32, mut argv: *mut*mut c_char) -> Option<()> {

let mut c: Compiler = zeroed();
c.historical = *historical;
let executable_directory = arena::strdup(&mut c.arena, dirname(flag_program_name()));
let executable_directory = arena::strdup(&mut c.interner.arena, dirname(flag_program_name()));

if (*linker).count > 0 {
let mut s: Shlex = zeroed();
Expand All @@ -1302,7 +1316,7 @@ pub unsafe fn main(mut argc: i32, mut argv: *mut*mut c_char) -> Option<()> {
log(Log_Level::WARNING, c!("Flag -%s is DEPRECATED! Interpreting it as `-%s %s` instead."), flag_name(linker), PARAM_FLAG_NAME, codegen_arg);
}

let gen = target.new(&mut c.arena, da_slice(*codegen_args))?;
let gen = target.new(&mut c.interner.arena, da_slice(*codegen_args))?;

if input_paths.count == 0 {
usage();
Expand All @@ -1320,7 +1334,7 @@ pub unsafe fn main(mut argc: i32, mut argv: *mut*mut c_char) -> Option<()> {
// - Some sort of instalation prefix? (Requires making build system more complicated)
//
// - rexim (2025-06-12 20:56:08)
add_libb_files(arena::sprintf(&mut c.arena, c!("%s/libb/"), executable_directory), *target_name, &mut input_paths, &mut c);
add_libb_files(arena::sprintf(&mut c.interner.arena, c!("%s/libb/"), executable_directory), *target_name, &mut input_paths, &mut c);
}

let mut sb: String_Builder = zeroed();
Expand Down
1 change: 1 addition & 0 deletions src/btest.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ pub mod lexer;
pub mod codegen;
pub mod shlex;
pub mod params;
pub mod hashtable;

use core::ffi::*;
use core::cmp;
Expand Down
10 changes: 5 additions & 5 deletions src/codegen/gas_aarch64/mod.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
use core::ffi::*;
use core::mem::zeroed;
use crate::hashtable::HashTable;
use crate::nob::*;
use crate::crust::libc::*;
use crate::crust::assoc_lookup_cstr;
use crate::ir::*;
use crate::lexer::*;
use crate::missingf;
Expand Down Expand Up @@ -127,7 +127,7 @@ pub unsafe fn load_arg_to_reg(arg: Arg, reg: *const c_char, output: *mut String_
};
}

pub unsafe fn generate_function(name: *const c_char, _name_loc: Loc, params_count: usize, auto_vars_count: usize, os: Os, variadics: *const [(*const c_char, Variadic)], body: *const [OpWithLocation], output: *mut String_Builder) {
pub unsafe fn generate_function(name: *const c_char, _name_loc: Loc, params_count: usize, auto_vars_count: usize, os: Os, variadics: *const HashTable<*const c_char, Variadic>, body: *const [OpWithLocation], output: *mut String_Builder) {
let stack_size = align_bytes(auto_vars_count*8, 16);
match os {
Os::Linux => {
Expand Down Expand Up @@ -316,7 +316,7 @@ pub unsafe fn generate_function(name: *const c_char, _name_loc: Loc, params_coun
let mut fixed_args = 0;
match fun {
Arg::External(name) | Arg::RefExternal(name) => {
if let Some(variadic) = assoc_lookup_cstr(variadics, name) {
if let Some(variadic) = HashTable::get(variadics, name) {
fixed_args = (*variadic).fixed_args;
}
}
Expand Down Expand Up @@ -395,7 +395,7 @@ pub unsafe fn generate_function(name: *const c_char, _name_loc: Loc, params_coun
sb_appendf(output, c!(" ret\n"));
}

pub unsafe fn generate_funcs(output: *mut String_Builder, funcs: *const [Func], variadics: *const [(*const c_char, Variadic)], os: Os) {
pub unsafe fn generate_funcs(output: *mut String_Builder, funcs: *const [Func], variadics: *const HashTable<*const c_char, Variadic>, os: Os) {
sb_appendf(output, c!(".text\n"));
for i in 0..funcs.len() {
generate_function((*funcs)[i].name, (*funcs)[i].name_loc, (*funcs)[i].params_count, (*funcs)[i].auto_vars_count, os, variadics, da_slice((*funcs)[i].body), output);
Expand Down Expand Up @@ -572,7 +572,7 @@ pub unsafe fn generate_program(

if debug { todo!("Debug information for aarch64") }

generate_funcs(output, da_slice((*program).funcs), da_slice((*program).variadics), os);
generate_funcs(output, da_slice((*program).funcs), &(*program).variadics, os);
generate_asm_funcs(output, da_slice((*program).asm_funcs), os);
generate_globals(output, da_slice((*program). globals), os);
generate_data_section(output, da_slice((*program).data));
Expand Down
Loading