From 293e8567697ace49819037615ea7d9e7cc876527 Mon Sep 17 00:00:00 2001 From: Georgiy Belyanin Date: Sat, 27 Sep 2025 16:50:31 +0300 Subject: [PATCH 01/49] parser: introduce basic AST and parser This is quasi-initial commit of the DOOML language compiler done in terms of functional language compilers 2025 SPbU course. Basically, it is OCaml-like language with (drastically) truncated list of features. This patch introduces a basic AST and parser with pretty-printing facilities. Only integers, units and tuples are supported yet. This will be implemented by Georgiy Belyanin and Ignatiy Sergeev. --- DOOML/.gitignore | 3 + DOOML/.ocamlformat | 3 + DOOML/DOOML.opam | 41 +++ DOOML/DOOML.opam.template | 7 + DOOML/Makefile | 20 ++ DOOML/bin/LL.ml | 94 +++++++ DOOML/bin/dune | 22 ++ DOOML/bin/main.ml | 60 +++++ DOOML/bin/run.t | 20 ++ DOOML/bin/runtime.c | 10 + DOOML/dune-project | 33 +++ DOOML/lib/ast.ml | 109 ++++++++ DOOML/lib/dune | 10 + DOOML/lib/fe.ml | 414 +++++++++++++++++++++++++++++++ DOOML/many_tests/.ocamlformat | 1 + DOOML/many_tests/typed/001fac.ml | 1 + DOOML/many_tests/typed/dune | 2 + DOOML/many_tests/typed/typed.t | 2 + DOOML/out.ll | 12 + DOOML/shell.nix | 12 + DOOML/test_qc/dune | 13 + DOOML/test_qc/test_qc.ml | 98 ++++++++ DOOML/test_qc/test_qc.mli | 7 + 23 files changed, 994 insertions(+) create mode 100644 DOOML/.gitignore create mode 100644 DOOML/.ocamlformat create mode 100644 DOOML/DOOML.opam create mode 100644 DOOML/DOOML.opam.template create mode 100644 DOOML/Makefile create mode 100644 DOOML/bin/LL.ml create mode 100644 DOOML/bin/dune create mode 100644 DOOML/bin/main.ml create mode 100644 DOOML/bin/run.t create mode 100644 DOOML/bin/runtime.c create mode 100644 DOOML/dune-project create mode 100644 DOOML/lib/ast.ml create mode 100644 DOOML/lib/dune create mode 100644 DOOML/lib/fe.ml create mode 100644 DOOML/many_tests/.ocamlformat create mode 120000 DOOML/many_tests/typed/001fac.ml create mode 100644 DOOML/many_tests/typed/dune create mode 100644 DOOML/many_tests/typed/typed.t create mode 100644 DOOML/out.ll create mode 100644 DOOML/shell.nix create mode 100644 DOOML/test_qc/dune create mode 100644 DOOML/test_qc/test_qc.ml create mode 100644 DOOML/test_qc/test_qc.mli diff --git a/DOOML/.gitignore b/DOOML/.gitignore new file mode 100644 index 00000000..0e5f1e4b --- /dev/null +++ b/DOOML/.gitignore @@ -0,0 +1,3 @@ +/_build +/_coverage + diff --git a/DOOML/.ocamlformat b/DOOML/.ocamlformat new file mode 100644 index 00000000..25919d0e --- /dev/null +++ b/DOOML/.ocamlformat @@ -0,0 +1,3 @@ +version=0.27.0 +profile=janestreet + diff --git a/DOOML/DOOML.opam b/DOOML/DOOML.opam new file mode 100644 index 00000000..ce90481c --- /dev/null +++ b/DOOML/DOOML.opam @@ -0,0 +1,41 @@ +# This file is generated by dune, edit dune-project instead +opam-version: "2.0" +synopsis: "A short LLVM demo" +maintainer: ["Georgiy Belyanin" "Ignatiy Sergeev"] +authors: ["Georgiy Belyanin" "Ignatiy Sergeev"] +license: "LGPL-3.0-or-later WITH OCaml-LGPL-linking-exception" +homepage: "https://github.com/Kakadu/comp24" +bug-reports: "https://github.com/Kakadu/comp24/issues" +depends: [ + "ocaml" + "dune" {>= "3.8" & = "3.19.1"} + "angstrom" {= "0.16.0"} + "qcheck" + "bisect_ppx" + "llvm" {= "18-shared"} + "qcheck" {with-tests} + "ppx_deriving_qcheck" {= "0.6"} + "odoc" {with-doc} +] +build: [ + ["dune" "subst"] {dev} + [ + "dune" + "build" + "-p" + name + "-j" + jobs + "@install" + "@runtest" {with-test} + "@doc" {with-doc} + ] +] +dev-repo: "git+https://github.com/Kakadu/comp24.git" +depexts: [ + [ "llvm-18-dev" "clang-18" "gcc-riscv64-linux-gnu" "g++-riscv64-linux-gnu" "qemu-user"] {os-distribution = "ubuntu"} +] +pin-depends: [ + ["ppx_deriving_qcheck.0.6" "git+https://github.com/c-cube/qcheck.git#42429bf06ba12373cad02b1404f50d0ad6238af5"] +] +# Don't edit '*.opam' file manually. Use 'dune b @install' diff --git a/DOOML/DOOML.opam.template b/DOOML/DOOML.opam.template new file mode 100644 index 00000000..f4e537bf --- /dev/null +++ b/DOOML/DOOML.opam.template @@ -0,0 +1,7 @@ +depexts: [ + [ "llvm-18-dev" "clang-18" "gcc-riscv64-linux-gnu" "g++-riscv64-linux-gnu" "qemu-user"] {os-distribution = "ubuntu"} +] +pin-depends: [ + ["ppx_deriving_qcheck.0.6" "git+https://github.com/c-cube/qcheck.git#42429bf06ba12373cad02b1404f50d0ad6238af5"] +] +# Don't edit '*.opam' file manually. Use 'dune b @install' diff --git a/DOOML/Makefile b/DOOML/Makefile new file mode 100644 index 00000000..c0615678 --- /dev/null +++ b/DOOML/Makefile @@ -0,0 +1,20 @@ +.PHONY: all test +all: + dune build + +test: + dune test + +TEST_COV_D = /tmp/cov +COVERAGE_OPTS = --coverage-path $(TEST_COV_D) --expect bin/ + +.PHONY: test_coverage coverage +test_coverage: coverage +coverage: + $(RM) -r $(TEST_COV_D) + mkdir -p $(TEST_COV_D) + BISECT_FILE=$(TEST_COV_D)/language dune runtest --no-print-directory \ + --instrument-with bisect_ppx --force + bisect-ppx-report html $(COVERAGE_OPTS) + bisect-ppx-report summary $(COVERAGE_OPTS) + @echo "Use 'xdg-open _coverage/index.html' to see coverage report" diff --git a/DOOML/bin/LL.ml b/DOOML/bin/LL.ml new file mode 100644 index 00000000..5e2d2da2 --- /dev/null +++ b/DOOML/bin/LL.ml @@ -0,0 +1,94 @@ +[@@@ocaml.text "/*"] + +(** Copyright 2023-2024, Kakadu and contributors *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +[@@@ocaml.text "/*"] + +open Llvm +open Printf + +module type S = sig + val context : Llvm.llcontext + val module_ : Llvm.llmodule + val builder : Llvm.llbuilder + val build_store : Llvm.llvalue -> Llvm.llvalue -> Llvm.llvalue + val build_call : lltype -> ?name:string -> llvalue -> llvalue list -> llvalue + val lookup_func_exn : string -> llvalue + val has_toplevel_func : string -> bool + val build_add : ?name:string -> llvalue -> llvalue -> llvalue + val build_sub : ?name:string -> llvalue -> llvalue -> llvalue + val build_mul : ?name:string -> llvalue -> llvalue -> llvalue + val build_sdiv : ?name:string -> llvalue -> llvalue -> llvalue + val build_icmp : ?name:string -> Icmp.t -> llvalue -> llvalue -> llvalue + + (** [set_metadata v kind fmt] sets metadata to value [v] of kind [k]. + Returns this value [v]. Useful for attaching debugging *) + val set_metadata + : llvalue + -> string + -> ('a, Format.formatter, unit, llvalue) format4 + -> 'a + + (* ?? *) + + val build_ptrtoint : ?name:string -> llvalue -> lltype -> llvalue + val build_inttoptr : ?name:string -> llvalue -> lltype -> llvalue + val build_pointercast : ?name:string -> llvalue -> lltype -> llvalue + + (** Just aliases *) + + val const_int : Llvm.lltype -> int -> Llvm.llvalue + val params : Llvm.llvalue -> Llvm.llvalue array + val pp_value : Format.formatter -> llvalue -> unit +end + +let make context builder module_ = + let module L : S = struct + let context = context + let builder = builder + let module_ = module_ + let build_store a b = Llvm.build_store a b builder + + let build_call typ ?(name = "") f args = + build_call typ f (Array.of_list args) name builder + ;; + + let has_toplevel_func fname = + match lookup_function fname module_ with + | Some _ -> true + | None -> false + ;; + + let lookup_func_exn fname = + match lookup_function fname module_ with + | Some f -> f + | None -> failwith (sprintf "Function '%s' not found" fname) + ;; + + let build_add ?(name = "") l r = build_add l r name builder + let build_sub ?(name = "") l r = build_sub l r name builder + let build_mul ?(name = "") l r = build_mul l r name builder + let build_sdiv ?(name = "") l r = build_sdiv l r name builder + let build_icmp ?(name = "") op l r = build_icmp op l r name builder + let build_ptrtoint ?(name = "") e typ = Llvm.build_ptrtoint e typ name builder + let build_inttoptr ?(name = "") e typ = Llvm.build_inttoptr e typ name builder + let build_pointercast ?(name = "") f typ = Llvm.build_pointercast f typ name builder + + let set_metadata v kind fmt = + Format.kasprintf + (fun s -> + Llvm.set_metadata v (Llvm.mdkind_id context kind) (Llvm.mdstring context s); + v) + fmt + ;; + + (* Aliases *) + let const_int = Llvm.const_int + let params = Llvm.params + let pp_value ppf x = Format.fprintf ppf "%s" (Llvm.string_of_llvalue x) + end + in + (module L : S) +;; diff --git a/DOOML/bin/dune b/DOOML/bin/dune new file mode 100644 index 00000000..687e70bf --- /dev/null +++ b/DOOML/bin/dune @@ -0,0 +1,22 @@ +(library + (name LL) + (public_name DOOML.LL) + (modules LL) + (wrapped false) + (libraries + llvm + llvm.analysis + llvm.executionengine + ; + )) + +(executable + (public_name DOOML) + (name main) + (modules main) + (libraries LL) + (instrumentation + (backend bisect_ppx))) + +(cram + (deps ./main.exe runtime.c)) diff --git a/DOOML/bin/main.ml b/DOOML/bin/main.ml new file mode 100644 index 00000000..37ad326d --- /dev/null +++ b/DOOML/bin/main.ml @@ -0,0 +1,60 @@ +[@@@ocaml.text "/*"] + +(** Copyright 2023-2025, Kakadu and contributors *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +[@@@ocaml.text "/*"] + +let () = + let context = Llvm.global_context () in + let builder = Llvm.builder context in + let () = assert (Llvm_executionengine.initialize ()) in + let the_module = Llvm.create_module context "main" in + Llvm.set_target_triple "x86_64-pc-linux-gnu" the_module; + let _the_execution_engine = Llvm_executionengine.create the_module in + let module LL = (val LL.make context builder the_module) in + let i64_type = Llvm.i64_type context in + let void_type = Llvm.void_type context in + let ptr_type = Llvm.pointer_type context in + let prepare_main () = + let ft = + (* TODO main has special args *) + let args = Array.make 0 ptr_type in + Llvm.function_type i64_type args + in + let the_function = Llvm.declare_function "main" ft the_module in + (* Create a new basic block to start insertion into. *) + let bb = Llvm.append_block context "entry" the_function in + Llvm.position_at_end bb builder; + (* Add all arguments to the symbol table and create their allocas. *) + (* Finish off the function. *) + let (_ : Llvm.llvalue) = + LL.build_call + (Llvm.function_type void_type [| i64_type |]) + LL.(lookup_func_exn "print_int") + [ Llvm.const_int i64_type 70 ] + in + let (_ : Llvm.llvalue) = Llvm.build_ret (Llvm.const_int i64_type 0) builder in + (* Validate the generated code, checking for consistency. *) + (match Llvm_analysis.verify_function the_function with + | true -> () + | false -> + Stdlib.Format.printf + "invalid function generated\n%s\n" + (Llvm.string_of_llvalue the_function); + Llvm_analysis.assert_valid_function the_function); + (* Optimize the function. *) + (* let (_ : bool) = Llvm.PassManager.run_function the_function the_fpm in *) + (* Llvm.dump_value the_function; *) + () + in + let _ = + Llvm.declare_function + "print_int" + (Llvm.function_type (Llvm.void_type context) [| i64_type |]) + the_module + in + prepare_main (); + Llvm.print_module "out.ll" the_module +;; diff --git a/DOOML/bin/run.t b/DOOML/bin/run.t new file mode 100644 index 00000000..c5569b68 --- /dev/null +++ b/DOOML/bin/run.t @@ -0,0 +1,20 @@ + $ clang-18 -c runtime.c -o runtime.o + $ ./main.exe + $ ls + main.exe + out.ll + runtime.c + runtime.o + $ cat out.ll | grep -E 'source_filename|target datalayout|ModuleID' --invert-match + target triple = "x86_64-pc-linux-gnu" + + declare void @print_int(i64) + + define i64 @main() { + entry: + call void @print_int(i64 70) + ret i64 0 + } + $ clang-18 out.ll runtime.o -o demo1.exe + $ echo "Press $(./demo1.exe) to pay respect" + Press F to pay respect diff --git a/DOOML/bin/runtime.c b/DOOML/bin/runtime.c new file mode 100644 index 00000000..9c00aa03 --- /dev/null +++ b/DOOML/bin/runtime.c @@ -0,0 +1,10 @@ +/* Copyright 2023-2024, Kakadu and contributors */ +/* SPDX-License-Identifier: LGPL-3.0-or-later */ + +#include +#include + +void print_int(int64_t n) { + putchar(n); + fflush(stdout); +} \ No newline at end of file diff --git a/DOOML/dune-project b/DOOML/dune-project new file mode 100644 index 00000000..3901660e --- /dev/null +++ b/DOOML/dune-project @@ -0,0 +1,33 @@ +(lang dune 3.8) + +(name DOOML) + +(generate_opam_files true) + +(source + (github Kakadu/comp24)) + +(authors "Georgiy Belyanin" "Ignatiy Sergeev") + +(maintainers "Georgiy Belyanin" "Ignatiy Sergeev") + +(license "LGPL-3.0-or-later WITH OCaml-LGPL-linking-exception") + +(package + (name DOOML) + (synopsis "A short LLVM demo") + (depends + ocaml + (dune + (= "3.19.1")) + (angstrom + (= "0.16.0")) + qcheck + bisect_ppx + (llvm + (= "18-shared")) + (qcheck :with-tests) + (ppx_deriving_qcheck + (= "0.6")))) + +; See the complete stanza docs at https://dune.readthedocs.io/en/stable/dune-files.html#dune-project diff --git a/DOOML/lib/ast.ml b/DOOML/lib/ast.ml new file mode 100644 index 00000000..9f748309 --- /dev/null +++ b/DOOML/lib/ast.ml @@ -0,0 +1,109 @@ +type ident = string + +let pp_ident ppf ident = + match String.get ident 0 with + | 'a' .. 'z' | 'A' .. 'Z' | '_' -> Format.fprintf ppf "%s" ident + | _ -> Format.fprintf ppf "(%s)" ident +;; + +let pp_sep_space ppf () = Format.fprintf ppf " " +let pp_sep_quote ppf () = Format.fprintf ppf ", " + +type ty = + | Int + | Bool + | List of ty + | Unit +[@@deriving show, variants] + +type pattern = + | PUnit (** () *) + | Plug (** _ *) + | Ident of ident + | Tuple of pattern list +[@@deriving variants] + +let rec pp_pattern ppf = function + | PUnit -> Format.fprintf ppf "()" + | Plug -> Format.fprintf ppf "_" + | Ident s -> Format.fprintf ppf "%s" s + | Tuple ss -> + Format.fprintf ppf "%a" (Format.pp_print_list ~pp_sep:pp_sep_quote pp_pattern) ss +;; + +type decl_body = pattern [@@deriving show] + +type rec_flag = + | Rec + | NonRec +[@@deriving variants] + +type expr = + | Const of int + | Var of ident + | App of expr * expr + | Let of rec_flag * pattern * expr * expr + | Ite of expr * expr * expr + | Fun of pattern list * expr +[@@deriving variants] + +let rec pp_expr ppf = function + | Const c -> Format.fprintf ppf "%d" c + | Var ident -> Format.fprintf ppf "%a" pp_ident ident + | App (f, (Const _ as arg)) | App (f, (Var _ as arg)) -> + Format.fprintf ppf "%a %a" pp_expr f pp_expr arg + | App (f, arg) -> Format.fprintf ppf "%a (%a)" pp_expr f pp_expr arg + | Let (rec_flag, pattern, bind, body) -> + let rec_flag = + match rec_flag with + | Rec -> "rec " + | NonRec -> "" + in + Format.fprintf + ppf + "let %s%a = %a in@ %a" + rec_flag + pp_pattern + pattern + pp_expr + bind + pp_expr + body + | Fun (patterns, body) -> + Format.fprintf + ppf + "@[fun %a ->@ @[%a@]@]" + (Format.pp_print_list ~pp_sep:pp_sep_space pp_pattern) + patterns + pp_expr + body + | Ite (cond, then_, else_) -> + Format.fprintf + ppf + "if @[%a@] then@ @[%a@]@ else@ @[%a@]" + pp_expr + cond + pp_expr + then_ + pp_expr + else_ +;; + +type top_level = LetDecl of rec_flag * pattern * expr [@@deriving variants] + +let pp_top_level ppf = function + | LetDecl (rec_flag, pattern, body) -> + let rec_flag = + match rec_flag with + | Rec -> "rec " + | NonRec -> "" + in + Format.fprintf + ppf + "@[let %s%a =@ @[%a@]@]@.;;@." + rec_flag + pp_pattern + pattern + pp_expr + body +;; diff --git a/DOOML/lib/dune b/DOOML/lib/dune new file mode 100644 index 00000000..9db51790 --- /dev/null +++ b/DOOML/lib/dune @@ -0,0 +1,10 @@ +(library + (name DOOML) + (public_name DOOML.Lib) + (modules Ast Fe) + (libraries base stdlib angstrom) + (inline_tests) + (preprocess + (pps ppx_expect ppx_deriving.show ppx_variants_conv ppx_inline_test)) + (instrumentation + (backend bisect_ppx))) diff --git a/DOOML/lib/fe.ml b/DOOML/lib/fe.ml new file mode 100644 index 00000000..449c7367 --- /dev/null +++ b/DOOML/lib/fe.ml @@ -0,0 +1,414 @@ +open Angstrom +module Map = Base.Map.Poly + +let is_whitespace = function + | ' ' | '\t' | '\n' | '\r' -> true + | _ -> false +;; + +let whitespace = take_while is_whitespace +let whitespace1 = take_while1 is_whitespace +let token f = whitespace *> f <* whitespace + +let is_digit = function + | '0' .. '9' -> true + | _ -> false +;; + +let is_idschar = function + | 'a' .. 'z' | 'A' .. 'Z' | '_' -> true + | _ -> false +;; + +let is_idchar = function + | 'a' .. 'z' | 'A' .. 'Z' | '_' | '0' .. '9' -> true + | _ -> false +;; + +let is_fbinopchar = function + | '$' | '&' | '*' | '+' | '-' | '/' | '=' | '>' | '@' | '^' | '|' | '%' | '<' | '#' -> + true + | _ -> false +;; + +let is_sbinopchar = function + | '$' + | '&' + | '*' + | '+' + | '-' + | '/' + | '=' + | '>' + | '@' + | '^' + | '|' + | '%' + | '<' + | '!' + | '.' + | ':' + | '?' + | '~' -> true + | _ -> false +;; + +let is_funopchar = function + | '?' | '~' | '!' -> true + | _ -> false +;; + +let is_sunopchar = is_sbinopchar + +let kws = + Map.of_alist_exn + [ "rec", () + ; "let", () + ; "in", () + ; "type", () + ; "fun", () + ; "if", () + ; "then", () + ; "else", () + ] +;; + +let spf = Format.asprintf +let pp_sep_space ppf () = Format.fprintf ppf " " +let pp_sep_newline ppf () = Format.fprintf ppf "\n" +let failf fmt = Format.kasprintf fail fmt +let parens f = token (char '(') *> f <* token (char ')') + +let chainl1 e op = + let rec go acc = lift2 (fun f x -> f acc x) op e >>= go <|> return acc in + e >>= go +;; + +let kw = take_while1 is_idchar |> token + +let ident = + let un_ident = + let* hd = satisfy is_funopchar in + let* tl = take_while is_sunopchar in + String.cat (String.make 1 hd) tl |> return + in + let binop_ident = + let* hd = satisfy is_fbinopchar in + let* tl = take_while is_sbinopchar in + String.cat (String.make 1 hd) tl |> return + in + let* ident = + take_while1 is_idchar |> token <|> parens un_ident <|> parens binop_ident + in + if Map.find kws ident |> Option.is_none + then ident |> return + else failf "expected ident, but found keyword %s" ident +;; + +let punit = (string "()" |> token) *> return Ast.punit + +let pattern = + fix (fun _pattern -> punit <|> (ident >>= fun ident -> return (Ast.ident ident))) +;; + +let const = + let* v = take_while1 is_digit |> token in + v |> int_of_string |> Ast.const |> return +;; + +let var = + let* ident = ident in + Ast.var ident |> return +;; + +let rec_flag = + let rec_ = token (string "rec") *> return Ast.rec_ in + rec_ <|> return Ast.nonrec_ +;; + +let let_ expr = + let* rec_flag = rec_flag in + let* pattern = pattern "expected pattern after 'let'" in + let* _ = token (char '=') in + let* body = + expr spf "expected expression after 'let %a ='" Ast.pp_pattern pattern + in + let* _ = token (string "in") "expected \'in\' after introducing new let binding" in + let* expr = expr "expected expression after 'let ... = ... in'" in + Ast.let_ rec_flag pattern body expr |> return +;; + +let fun_ expr = + let* patterns = many1 pattern "expected one or more function argument names" in + let* _ = token (string "->") in + let* expr = + expr + spf + "expected expression after 'fun %a ' " + (Format.pp_print_list ~pp_sep:pp_sep_space Ast.pp_pattern) + patterns + in + Ast.fun_ patterns expr |> return +;; + +let app expr = + let* f = expr in + let* args = many1 expr in + match args with + | hd :: tl -> List.fold_left Ast.app (Ast.app f hd) tl |> return + | _ -> assert false +;; + +let binops expr = + let binop op expr = + let op = token op in + chainl1 expr op + in + let combine op = + string op *> return (fun lhs rhs -> Ast.app (Ast.app (Ast.var op) lhs) rhs) + in + (* Stolen from https://ocaml.org/manual/5.1/api/Ocaml_operators.html. *) + expr + |> binop (combine "*" <|> combine "/") + |> binop (combine "+" <|> combine "-") + |> binop (combine "<" <|> combine "<=" <|> combine "==" <|> combine "=" <|> combine "<>") + |> binop (combine ">" <|> combine ">=") + |> binop (combine "&&") + |> binop (combine "||") +;; + +let ite expr = + let* cond = expr spf "expected expression inside 'if' condition" in + let* _ = token (string "then") spf "expected 'then'" in + let* then_ = expr spf "expected expression inside 'then' clause" in + let* _ = token (string "else") spf "expected 'else'" in + let* else_ = expr spf "expected expression inside 'else' clause" in + Ast.ite cond then_ else_ |> return +;; + +let expr = + fix (fun expr -> + let expr' = + let* c = whitespace *> peek_char in + match c with + | Some '0' .. '9' -> const + | Some '(' -> + let* r = parens expr <|> var in + r |> return + | _ -> var + in + let expr' = app expr' <|> expr' in + let expr' = binops expr' in + let expr' = + (let* kw = kw in + match kw with + | "let" -> + let* () = commit in + let_ expr + | "fun" -> + let* () = commit in + fun_ expr + | "if" -> + let* () = commit in + ite expr + | _ -> fail "") + <|> expr' + in + expr') +;; + +let ty = fail "types and ADTs are not implemented yet" + +let let_decl = + let* rec_flag = rec_flag in + let* pattern = pattern "expected pattern after 'let'" in + let* _ = token (char '=') in + let* body = + expr spf "expected expression after 'let %a ='" Ast.pp_pattern pattern + in + let* _ = token (string ";;") in + Ast.letdecl rec_flag pattern body |> return +;; + +let top_level = + let* kw = kw in + match kw with + | "let" -> let_decl + | "type" -> ty + | _ -> fail "expected top level declaration" +;; + +let parse = parse_string ~consume:Consume.All (many1 top_level) + +let parse_and_print code = + match parse_string ~consume:Consume.All (many1 top_level) code with + | Ok res -> + Format.printf "%a" (Format.pp_print_list ~pp_sep:pp_sep_newline Ast.pp_top_level) res + | Error res -> + Format.printf + "%s" + (res + |> String.split_on_char '>' + |> List.rev + |> List.map String.trim + |> String.concat "\n") +;; + +let%expect_test "const definition to 15" = + parse_and_print "let const_15 = fun () -> 15;;"; + [%expect + {| + let const_15 = + fun () -> + 15 + ;; + |}] +;; + +let%expect_test "simple algebraic expression" = + parse_and_print "let sub = fun a b c -> a + b + c;;"; + [%expect + {| + let sub = + fun a b c -> + (+) ((+) a b) c + ;; + |}] +;; + +let%expect_test "find stddev" = + parse_and_print "let stddev = fun a b c -> a * a + ((b * b) + (c * c));;"; + [%expect + {| + let stddev = + fun a b c -> + (+) ((*) a a) ((+) ((*) b b) ((*) c c)) + ;; + |}] +;; + +let%expect_test "use let ins for test" = + parse_and_print + {| + let some_test = fun a b c -> + let a_plus_b = a + b in + let b_plus_c = b + c in + let c_plus_a = c + a in + 5 + ;; + |}; + [%expect + {| + let some_test = + fun a b c -> + let a_plus_b = (+) a b in let b_plus_c = (+) b c in + let c_plus_a = (+) c a in 5 + ;; + |}] +;; + +let%expect_test "wrong let ins" = + parse_and_print + {| + let some_test = fun () -> + let % = 5 in 10 + ;; + |}; + [%expect + {| + expected pattern after 'let': char '(' + expected expression after 'fun () ' + expected expression after 'let some_test =' + |}] +;; + +let%expect_test "simple call" = + parse_and_print "let swap = fun a b k -> k b a;;"; + [%expect + {| + let swap = + fun a b k -> + k b a + ;; + |}] +;; + +let%expect_test "simple call with parens" = + parse_and_print "let wrong_swap = fun a b k -> k (b a);;"; + [%expect + {| + let wrong_swap = + fun a b k -> + k (b a) + ;; + |}] +;; + +let%expect_test "factorial" = + parse_and_print + {| + let rec fac = fun n -> + if n = 0 then + 1 + else + n * (fac (n - 1)) + ;; + |}; + [%expect + {| + let rec fac = + fun n -> + if (=) n 0 then 1 else (*) n (fac ((-) n 1)) + ;; + |}] +;; + +let%expect_test "factorial" = + parse_and_print + {| + let f = 15;; + let g = f + 3;; + let h = g * 15;; + |}; + [%expect {| + let f = + 15 + ;; + + let g = + (+) f 3 + ;; + + let h = + (*) g 15 + ;; + |}] +;; + +let%expect_test "quick check" = + let top_levels = + parse + {| + let rec fac = fun n -> + if n = 0 then + 1 + else + n * (fac (n - 1)) + ;; + |} + |> Result.get_ok + in + let top_levels' = + Format.asprintf + "%a" + (Format.pp_print_list ~pp_sep:pp_sep_newline Ast.pp_top_level) + top_levels + |> parse + |> Result.map_error (fun err -> + Format.printf "%s" err; + err) + |> Result.get_ok + in + Format.printf "%b" (top_levels = top_levels'); + [%expect {| true |}] +;; diff --git a/DOOML/many_tests/.ocamlformat b/DOOML/many_tests/.ocamlformat new file mode 100644 index 00000000..e3346c16 --- /dev/null +++ b/DOOML/many_tests/.ocamlformat @@ -0,0 +1 @@ +disable=true diff --git a/DOOML/many_tests/typed/001fac.ml b/DOOML/many_tests/typed/001fac.ml new file mode 120000 index 00000000..219cccf7 --- /dev/null +++ b/DOOML/many_tests/typed/001fac.ml @@ -0,0 +1 @@ +../../../manytests/typed/001fac.ml \ No newline at end of file diff --git a/DOOML/many_tests/typed/dune b/DOOML/many_tests/typed/dune new file mode 100644 index 00000000..f66331bb --- /dev/null +++ b/DOOML/many_tests/typed/dune @@ -0,0 +1,2 @@ +(cram + (deps ./001fac.ml)) diff --git a/DOOML/many_tests/typed/typed.t b/DOOML/many_tests/typed/typed.t new file mode 100644 index 00000000..ab349b10 --- /dev/null +++ b/DOOML/many_tests/typed/typed.t @@ -0,0 +1,2 @@ + $ wc 001fac.ml + 6 30 105 001fac.ml diff --git a/DOOML/out.ll b/DOOML/out.ll new file mode 100644 index 00000000..9f73f8af --- /dev/null +++ b/DOOML/out.ll @@ -0,0 +1,12 @@ +; ModuleID = 'main' +source_filename = "main" +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-pc-linux-gnu" + +declare void @print_int(i64) + +define i64 @main() { +entry: + call void @print_int(i64 70) + ret i64 0 +} diff --git a/DOOML/shell.nix b/DOOML/shell.nix new file mode 100644 index 00000000..10cf0b93 --- /dev/null +++ b/DOOML/shell.nix @@ -0,0 +1,12 @@ +{ pkgs ? import {} }: +pkgs.mkShell { + name = "Chrobelias"; + packages = with pkgs; [ + gmp + opam + + llvm_18 + zlib + libtinfo + ]; +} diff --git a/DOOML/test_qc/dune b/DOOML/test_qc/dune new file mode 100644 index 00000000..9ca61266 --- /dev/null +++ b/DOOML/test_qc/dune @@ -0,0 +1,13 @@ +(executable + (name test_qc) + (modules test_qc) + (libraries qcheck angstrom) + (preprocess + (pps ppx_deriving_qcheck ppx_deriving.show))) + +(rule + (alias runtest) + (deps + (:< test_qc.exe)) + (action + (run %{<}))) diff --git a/DOOML/test_qc/test_qc.ml b/DOOML/test_qc/test_qc.ml new file mode 100644 index 00000000..fa125b7f --- /dev/null +++ b/DOOML/test_qc/test_qc.ml @@ -0,0 +1,98 @@ +[@@@ocaml.text "/*"] + +(** Copyright 2023-2024, Kakadu and contributors *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +[@@@ocaml.text "/*"] + +(* run this test via `dune test --force` *) + +module AST = struct + type t = + | Const of (int[@gen QCheck.Gen.return 1]) + | Add of t * t + [@@deriving qcheck, show { with_path = false }] +end + +module PP = struct + let rec pp ppf = function + | AST.Const n -> Format.fprintf ppf "%d" n + (* | Add (l, r) -> Format.fprintf ppf "%a+%a" pp l pp r *) + | Add (l, r) -> Format.fprintf ppf "(%a+%a)" pp l pp r + ;; +end + +module Parser = struct + open Angstrom + + let prio expr table = + let len = Array.length table in + let rec helper level = + if level >= len + then expr + else ( + let xs = table.(level) in + return (List.fold_left (fun acc (op, r) -> op acc r)) + <*> helper (level + 1) + <*> many + (choice + (List.map + (fun (op, f) -> op *> helper (level + 1) >>= fun r -> return (f, r)) + xs))) + in + helper 0 + ;; + + let expr_small = + let code0 = Char.code '0' in + Angstrom.satisfy (function + | '0' .. '9' -> true + | _ -> false) + >>| fun c -> AST.Const (Char.code c - code0) + ;; + + let expr = + fix (fun self -> + let add a b = AST.Add (a, b) in + prio (expr_small <|> (char '(' *> self <* char ')')) [| [ char '+', add ] |]) + ;; +end + +let rec shrink_expr = + let open QCheck.Iter in + (* fun _ -> empty *) + function + | AST.Const _ -> empty + | Add (l, r) -> + of_list [ l; r ] + <+> (shrink_expr l >>= fun l -> return (AST.Add (l, r))) + <+> (shrink_expr r >>= fun r -> return (AST.Add (l, r))) +;; + +let arbitrary_expr = + (* let open QCheck.Iter in *) + QCheck.make AST.gen ~print:(Format.asprintf "%a" PP.pp) ~shrink:shrink_expr +;; + +let _ = + QCheck_runner.run_tests + [ QCheck.( + Test.make arbitrary_expr (fun l -> + match + Angstrom.parse_string + ~consume:Angstrom.Consume.All + Parser.expr + (Format.asprintf "%a" PP.pp l) + with + | Result.Ok after when after = l -> true + | Result.Ok after -> + Format.printf "before : %a\n%!" AST.pp l; + (* Format.printf " : `%a`\n%!" PP.pp l; *) + Format.printf "`%a`\n%!" AST.pp after; + false + | Result.Error _ -> + (* Format.printf "failed on : %a\n%!" Lam.pp l; *) + false)) + ] +;; diff --git a/DOOML/test_qc/test_qc.mli b/DOOML/test_qc/test_qc.mli new file mode 100644 index 00000000..a65c69d1 --- /dev/null +++ b/DOOML/test_qc/test_qc.mli @@ -0,0 +1,7 @@ +[@@@ocaml.text "/*"] + +(** Copyright 2023-2024, Kakadu and contributors *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +[@@@ocaml.text "/*"] From 7534f166fbac82a0860c5f3ca2e2052ff75d3e05 Mon Sep 17 00:00:00 2001 From: Georgiy Belyanin Date: Sun, 23 Nov 2025 23:20:10 +0300 Subject: [PATCH 02/49] me: add a-normal-form This patch introduces a-normal-form (ANF) as a step in the compiler's middle-end. ANF is responsible to make the function calls only contain variables or immediate values as their arguments which makes the code kind of resemble assembly in the sense that assembler instructions usually work with registers/immediate values not with compound statements. Example: ```ocaml (* Before ANF *) let f = let q = f ((g + sup0) * (2 * i)) in q ;; (* After ANF *) let f = let sup2 = (*) 2 i in let sup5 = (+) g sup0 in let sup6 = (*) sup5 sup2 in let sup7 = (f) sup6 in let q = sup7 in q ;; ``` --- DOOML/lib/anf.ml | 136 +++++++++++++++++++++++++++++++++++++++++++++++ DOOML/lib/dune | 2 +- 2 files changed, 137 insertions(+), 1 deletion(-) create mode 100644 DOOML/lib/anf.ml diff --git a/DOOML/lib/anf.ml b/DOOML/lib/anf.ml new file mode 100644 index 00000000..675d8631 --- /dev/null +++ b/DOOML/lib/anf.ml @@ -0,0 +1,136 @@ +type immexpr = + | ImmNum of int + | ImmId of string +[@@deriving variants] + +let pp_immexpr ppf = function + | ImmNum d -> Format.fprintf ppf "%d" d + | ImmId s -> Format.fprintf ppf "%s" s +;; + +type cexpr = + | CImm of immexpr + | CIte of immexpr * immexpr * immexpr + | CApp of string * immexpr list +[@@deriving variants, show] + +let pp_cexpr ppf = function + | CImm imm -> Format.fprintf ppf "%a" pp_immexpr imm + | CIte (cond_, then_, else_) -> + Format.fprintf + ppf + "if %a then %a else %a" + pp_immexpr + cond_ + pp_immexpr + then_ + pp_immexpr + else_ + | CApp (s, immexprs) -> + Format.fprintf + ppf + "(%s) %a" + s + (Format.pp_print_list ~pp_sep:(fun ppf () -> Format.fprintf ppf " ") pp_immexpr) + immexprs +;; + +type aexpr = + | ALet of string * cexpr * aexpr + | AExpr of cexpr +[@@deriving variants, show] + +let rec pp_aexpr ppf = function + | ALet (name, cexpr, aexpr) -> + Format.fprintf ppf "let %s = %a in @ %a" name pp_cexpr cexpr pp_aexpr aexpr + | AExpr cexpr -> Format.fprintf ppf "%a" pp_cexpr cexpr +;; + +type ctx = { syms : string list } + +let addsym (ctx : ctx) v = { syms = v :: ctx.syms } + +let gensym ?prefix (ctx : ctx) = + let prefix = Option.value prefix ~default:"sup" in + let rec aux i = + let v = String.cat prefix (Int.to_string i) in + if List.mem v ctx.syms then aux (i + 1) else v + in + let v = aux (List.length ctx.syms) in + v, addsym ctx v +;; + +let rec anf ctx (k : ctx -> immexpr -> aexpr) = function + | Ast.Const d -> k ctx (immnum d) + | Var s -> + let ctx = addsym ctx s in + k ctx (immid s) + | App _ as app -> + let rec aux ctx immexprs = function + | Ast.Var s -> + let sym, ctx = gensym ctx in + alet sym (capp s immexprs) (k ctx (immid sym)) + | App (f', expr') -> + anf ctx (fun ctx immexpr -> aux ctx (immexpr :: immexprs) f') expr' + | f -> + anf + ctx + (fun ctx immf -> + let sym, ctx = gensym ctx in + let sym', ctx = gensym ctx in + alet sym (cimm immf) (alet sym' (capp sym immexprs) (k ctx (immid sym')))) + f + in + aux ctx [] app + | Let (_rec, name, bind, expr) -> + let name = + match name with + | PUnit -> "()" + | Plug -> "_" + | Ident s -> s + | Tuple _ -> failwith "tbd" + in + let ctx = addsym ctx name in + anf ctx (fun ctx immbind -> alet name (cimm immbind) (anf ctx k expr)) bind + | Ite (cond_, then_, else_) -> + anf + ctx + (fun ctx immcond -> + anf + ctx + (fun ctx immthen -> + anf + ctx + (fun ctx immelse -> + alet "ite" (cite immcond immthen immelse) (k ctx (immid "ite"))) + else_) + then_) + cond_ + | Fun _ -> failwith "should be CC/LL first" +;; + +let anf = anf { syms = [] } (fun _ctx imm -> aexpr (cimm imm)) + +let%expect_test "basic" = + let ast = + Fe.parse + {| + let f = + let q = f ((g + sup0) * (2 * i)) in + q ;; + |} + |> Result.get_ok + |> List.hd + in + match ast with + | LetDecl (_, _name, body) -> + Format.printf "@[@ %a@]@." pp_aexpr (anf body); + [%expect {| + let sup2 = (*) 2 i in + let sup5 = (+) g sup0 in + let sup6 = (*) sup5 sup2 in + let sup7 = (f) sup6 in + let q = sup7 in + q + |}] +;; diff --git a/DOOML/lib/dune b/DOOML/lib/dune index 9db51790..8cd3cfcb 100644 --- a/DOOML/lib/dune +++ b/DOOML/lib/dune @@ -1,7 +1,7 @@ (library (name DOOML) (public_name DOOML.Lib) - (modules Ast Fe) + (modules Anf Ast Fe) (libraries base stdlib angstrom) (inline_tests) (preprocess From 7b29df1aa71fce538f2a581515890a804fcf493e Mon Sep 17 00:00:00 2001 From: Georgiy Belyanin Date: Sat, 3 Jan 2026 20:25:46 +0300 Subject: [PATCH 03/49] me: improve AST pretty-printing This patch fixes AST pretty-printing by properly using format boxes. It is relatively hard to describe what has changed since the previous usage was completely wrong. Let's provide a few examples instead to notice the difference between the old formatting and the new one. Old: ``` let a = 15 in let b = 4 in let c = 8 in ... ``` New: ``` let a = 15 in let b = 4 in let c = 8 in ... ``` Old: ``` let smth = if long_cond then long_one else long_two in ``` New: ``` let smth = if long_cond then long_one else long_two in ``` --- DOOML/lib/ast.ml | 8 +++--- DOOML/lib/fe.ml | 69 +++++++++++------------------------------------- 2 files changed, 19 insertions(+), 58 deletions(-) diff --git a/DOOML/lib/ast.ml b/DOOML/lib/ast.ml index 9f748309..4dd5168a 100644 --- a/DOOML/lib/ast.ml +++ b/DOOML/lib/ast.ml @@ -61,7 +61,7 @@ let rec pp_expr ppf = function in Format.fprintf ppf - "let %s%a = %a in@ %a" + "@[let %s%a =@;<1 2>@[%a@]@;<1 0>in@]@;<0 0>%a" rec_flag pp_pattern pattern @@ -72,7 +72,7 @@ let rec pp_expr ppf = function | Fun (patterns, body) -> Format.fprintf ppf - "@[fun %a ->@ @[%a@]@]" + "fun %a ->@;<1 2>@[%a@]" (Format.pp_print_list ~pp_sep:pp_sep_space pp_pattern) patterns pp_expr @@ -80,7 +80,7 @@ let rec pp_expr ppf = function | Ite (cond, then_, else_) -> Format.fprintf ppf - "if @[%a@] then@ @[%a@]@ else@ @[%a@]" + "if %a then@;<1 2>@[%a@]@;<1 0>else@;<1 2>@[%a@]" pp_expr cond pp_expr @@ -100,7 +100,7 @@ let pp_top_level ppf = function in Format.fprintf ppf - "@[let %s%a =@ @[%a@]@]@.;;@." + "@[let %s%a =@;<1 2>@[%a@]@;<0 0>;;@]@." rec_flag pp_pattern pattern diff --git a/DOOML/lib/fe.ml b/DOOML/lib/fe.ml index 449c7367..d401b2d1 100644 --- a/DOOML/lib/fe.ml +++ b/DOOML/lib/fe.ml @@ -256,35 +256,17 @@ let parse_and_print code = let%expect_test "const definition to 15" = parse_and_print "let const_15 = fun () -> 15;;"; - [%expect - {| - let const_15 = - fun () -> - 15 - ;; - |}] + [%expect {| let const_15 = fun () -> 15;; |}] ;; let%expect_test "simple algebraic expression" = parse_and_print "let sub = fun a b c -> a + b + c;;"; - [%expect - {| - let sub = - fun a b c -> - (+) ((+) a b) c - ;; - |}] + [%expect {| let sub = fun a b c -> (+) ((+) a b) c;; |}] ;; let%expect_test "find stddev" = parse_and_print "let stddev = fun a b c -> a * a + ((b * b) + (c * c));;"; - [%expect - {| - let stddev = - fun a b c -> - (+) ((*) a a) ((+) ((*) b b) ((*) c c)) - ;; - |}] + [%expect {| let stddev = fun a b c -> (+) ((*) a a) ((+) ((*) b b) ((*) c c));; |}] ;; let%expect_test "use let ins for test" = @@ -301,8 +283,10 @@ let%expect_test "use let ins for test" = {| let some_test = fun a b c -> - let a_plus_b = (+) a b in let b_plus_c = (+) b c in - let c_plus_a = (+) c a in 5 + let a_plus_b = (+) a b in + let b_plus_c = (+) b c in + let c_plus_a = (+) c a in + 5 ;; |}] ;; @@ -324,24 +308,12 @@ let%expect_test "wrong let ins" = let%expect_test "simple call" = parse_and_print "let swap = fun a b k -> k b a;;"; - [%expect - {| - let swap = - fun a b k -> - k b a - ;; - |}] + [%expect {| let swap = fun a b k -> k b a;; |}] ;; let%expect_test "simple call with parens" = parse_and_print "let wrong_swap = fun a b k -> k (b a);;"; - [%expect - {| - let wrong_swap = - fun a b k -> - k (b a) - ;; - |}] + [%expect {| let wrong_swap = fun a b k -> k (b a);; |}] ;; let%expect_test "factorial" = @@ -354,13 +326,7 @@ let%expect_test "factorial" = n * (fac (n - 1)) ;; |}; - [%expect - {| - let rec fac = - fun n -> - if (=) n 0 then 1 else (*) n (fac ((-) n 1)) - ;; - |}] + [%expect {| let rec fac = fun n -> if (=) n 0 then 1 else (*) n (fac ((-) n 1));; |}] ;; let%expect_test "factorial" = @@ -370,18 +336,13 @@ let%expect_test "factorial" = let g = f + 3;; let h = g * 15;; |}; - [%expect {| - let f = - 15 - ;; + [%expect + {| + let f = 15;; - let g = - (+) f 3 - ;; + let g = (+) f 3;; - let h = - (*) g 15 - ;; + let h = (*) g 15;; |}] ;; From d3cc9231ede6745e070c81c1d074d90b91aa6728 Mon Sep 17 00:00:00 2001 From: Georgiy Belyanin Date: Sat, 3 Jan 2026 20:29:13 +0300 Subject: [PATCH 04/49] me: fix ANF of if-then-else Previous implementation of the if-then-else ANF process was wrong. It actually executed both then and else branches and then chosen one of the results. This patch fixes it and now both branches are executed exclusively. --- DOOML/lib/anf.ml | 85 ++++++++++++++++++++++++++++++++++-------------- 1 file changed, 61 insertions(+), 24 deletions(-) diff --git a/DOOML/lib/anf.ml b/DOOML/lib/anf.ml index 675d8631..9758d798 100644 --- a/DOOML/lib/anf.ml +++ b/DOOML/lib/anf.ml @@ -10,21 +10,30 @@ let pp_immexpr ppf = function type cexpr = | CImm of immexpr - | CIte of immexpr * immexpr * immexpr + | CIte of immexpr * aexpr * aexpr | CApp of string * immexpr list -[@@deriving variants, show] -let pp_cexpr ppf = function +and aexpr = + | ALet of string * cexpr * aexpr + | AExpr of cexpr + +let cimm imm = CImm imm +let cite cond_ then_ else_ = CIte (cond_, then_, else_) +let capp f args = CApp (f, args) +let alet bind v body = ALet (bind, v, body) +let aexpr cexpr = AExpr cexpr + +let rec pp_cexpr ppf = function | CImm imm -> Format.fprintf ppf "%a" pp_immexpr imm | CIte (cond_, then_, else_) -> Format.fprintf ppf - "if %a then %a else %a" + "if %a then@;<1 2>@[%a@]@;<1 0>else@;<1 2>@[%a@]" pp_immexpr cond_ - pp_immexpr + pp_aexpr then_ - pp_immexpr + pp_aexpr else_ | CApp (s, immexprs) -> Format.fprintf @@ -33,16 +42,17 @@ let pp_cexpr ppf = function s (Format.pp_print_list ~pp_sep:(fun ppf () -> Format.fprintf ppf " ") pp_immexpr) immexprs -;; -type aexpr = - | ALet of string * cexpr * aexpr - | AExpr of cexpr -[@@deriving variants, show] - -let rec pp_aexpr ppf = function +and pp_aexpr ppf = function | ALet (name, cexpr, aexpr) -> - Format.fprintf ppf "let %s = %a in @ %a" name pp_cexpr cexpr pp_aexpr aexpr + Format.fprintf + ppf + "@[let %s =@;<1 2>@[%a@]@;<1 0>in@]@;<0 0>%a" + name + pp_cexpr + cexpr + pp_aexpr + aexpr | AExpr cexpr -> Format.fprintf ppf "%a" pp_cexpr cexpr ;; @@ -96,15 +106,10 @@ let rec anf ctx (k : ctx -> immexpr -> aexpr) = function anf ctx (fun ctx immcond -> - anf - ctx - (fun ctx immthen -> - anf - ctx - (fun ctx immelse -> - alet "ite" (cite immcond immthen immelse) (k ctx (immid "ite"))) - else_) - then_) + let then_ = anf ctx k then_ in + let else_ = anf ctx k else_ in + let sym, ctx = gensym ~prefix:"ite" ctx in + alet sym (cite immcond then_ else_) (k ctx (immid sym))) cond_ | Fun _ -> failwith "should be CC/LL first" ;; @@ -125,7 +130,8 @@ let%expect_test "basic" = match ast with | LetDecl (_, _name, body) -> Format.printf "@[@ %a@]@." pp_aexpr (anf body); - [%expect {| + [%expect + {| let sup2 = (*) 2 i in let sup5 = (+) g sup0 in let sup6 = (*) sup5 sup2 in @@ -134,3 +140,34 @@ let%expect_test "basic" = q |}] ;; + +let%expect_test "ite" = + let ast = + Fe.parse + {| + let rec fac = + if (k = 1) then (1) else (fac (k - 1) * k) + ;; + |} + |> Result.map_error (fun err -> Format.printf "Error %s" err) + |> Result.get_ok + |> List.hd + in + match ast with + | LetDecl (_, _name, body) -> + Format.printf "@[@ %a@]@." pp_aexpr (anf body); + [%expect + {| + let sup1 = (=) k 1 in + let ite2 = + if sup1 then + 1 + else + let sup4 = (-) k 1 in + let sup5 = (fac) sup4 in + let sup6 = (*) sup5 k in + sup6 + in + ite2 + |}] +;; From be95b2f2a7000ce0fdb7495f0816a9f20d97b5be Mon Sep 17 00:00:00 2001 From: Georgiy Belyanin Date: Fri, 16 Jan 2026 18:28:19 +0300 Subject: [PATCH 05/49] parser: not parse keywords as identifiers This patch prevents the parser from handling the keywords as identifiers. --- DOOML/lib/fe.ml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/DOOML/lib/fe.ml b/DOOML/lib/fe.ml index d401b2d1..62a03691 100644 --- a/DOOML/lib/fe.ml +++ b/DOOML/lib/fe.ml @@ -84,7 +84,10 @@ let chainl1 e op = e >>= go ;; -let kw = take_while1 is_idchar |> token +let kw = + let* kw = take_while1 is_idchar |> token in + if Map.mem kws kw then return kw else fail (spf "expected keyword, but found %s" kw) +;; let ident = let un_ident = From 63fc9ec356ee7fff2d591879bd4550e01c107409 Mon Sep 17 00:00:00 2001 From: Georgiy Belyanin Date: Fri, 16 Jan 2026 18:30:57 +0300 Subject: [PATCH 06/49] parser: support plugs, tuples, units This patch introduces plugs, tuples, and units into the parser. In other words, the following code can now be parsed properly. ```ocaml let () = _;; let _ = _;; let (a, b) = _;; ``` --- DOOML/lib/ast.ml | 2 +- DOOML/lib/fe.ml | 36 ++++++++++++++++++++++++++++++------ 2 files changed, 31 insertions(+), 7 deletions(-) diff --git a/DOOML/lib/ast.ml b/DOOML/lib/ast.ml index 4dd5168a..3adeeb24 100644 --- a/DOOML/lib/ast.ml +++ b/DOOML/lib/ast.ml @@ -28,7 +28,7 @@ let rec pp_pattern ppf = function | Plug -> Format.fprintf ppf "_" | Ident s -> Format.fprintf ppf "%s" s | Tuple ss -> - Format.fprintf ppf "%a" (Format.pp_print_list ~pp_sep:pp_sep_quote pp_pattern) ss + Format.fprintf ppf "(%a)" (Format.pp_print_list ~pp_sep:pp_sep_quote pp_pattern) ss ;; type decl_body = pattern [@@deriving show] diff --git a/DOOML/lib/fe.ml b/DOOML/lib/fe.ml index 62a03691..007ceda7 100644 --- a/DOOML/lib/fe.ml +++ b/DOOML/lib/fe.ml @@ -109,9 +109,20 @@ let ident = ;; let punit = (string "()" |> token) *> return Ast.punit +let plug = (string "_" |> token) *> return Ast.plug + +let tuple pattern = + let tuple = + let* fpattern = pattern in + let* patterns = many (token (char ',') *> pattern) in + return (Ast.tuple (fpattern :: patterns)) + in + parens tuple +;; let pattern = - fix (fun _pattern -> punit <|> (ident >>= fun ident -> return (Ast.ident ident))) + fix (fun pattern -> + punit <|> plug <|> tuple pattern <|> (ident >>= fun ident -> return (Ast.ident ident))) ;; let const = @@ -302,11 +313,7 @@ let%expect_test "wrong let ins" = ;; |}; [%expect - {| - expected pattern after 'let': char '(' - expected expression after 'fun () ' - expected expression after 'let some_test =' - |}] + {| expected expression after 'let some_test =': expected ident, but found keyword fun |}] ;; let%expect_test "simple call" = @@ -349,6 +356,23 @@ let%expect_test "factorial" = |}] ;; +let%expect_test "tuples and plugs" = + parse_and_print + {| + let () = _;; + let _ = _;; + let (a, b) = _;; + |}; + [%expect + {| + let () = _;; + + let _ = _;; + + let (a, b) = _;; + |}] +;; + let%expect_test "quick check" = let top_levels = parse From 259f23a9c5b1f21dcb2117acc52ff824176c141a Mon Sep 17 00:00:00 2001 From: Georgiy Belyanin Date: Fri, 16 Jan 2026 18:34:03 +0300 Subject: [PATCH 07/49] parser: fix if-then-else internal handling This patch fixes the if-then-else (ite) condition parsing. The problem was that it was impossible to use ite inside ite conditions (for some unknown reason). The problem could be fixed by removing a few parser-combinator commits. Let's do it even though it would make errors less useful. Actually, the patch improves pretty-printing too. --- DOOML/lib/ast.ml | 8 +++++++- DOOML/lib/fe.ml | 41 +++++++++++++++++++++++++++-------------- 2 files changed, 34 insertions(+), 15 deletions(-) diff --git a/DOOML/lib/ast.ml b/DOOML/lib/ast.ml index 3adeeb24..9700203f 100644 --- a/DOOML/lib/ast.ml +++ b/DOOML/lib/ast.ml @@ -47,9 +47,15 @@ type expr = | Fun of pattern list * expr [@@deriving variants] +let fun_ args = function + | Fun (args', body') -> fun_ (args @ args') body' + | body -> fun_ args body +;; + let rec pp_expr ppf = function | Const c -> Format.fprintf ppf "%d" c | Var ident -> Format.fprintf ppf "%a" pp_ident ident + | App ((Fun _ as f), arg) -> Format.fprintf ppf "(%a) %a" pp_expr f pp_expr arg | App (f, (Const _ as arg)) | App (f, (Var _ as arg)) -> Format.fprintf ppf "%a %a" pp_expr f pp_expr arg | App (f, arg) -> Format.fprintf ppf "%a (%a)" pp_expr f pp_expr arg @@ -61,7 +67,7 @@ let rec pp_expr ppf = function in Format.fprintf ppf - "@[let %s%a =@;<1 2>@[%a@]@;<1 0>in@]@;<0 0>%a" + "let %s%a =@;<1 2>@[%a@]@;<1 0>in@;<1 0>%a" rec_flag pp_pattern pattern diff --git a/DOOML/lib/fe.ml b/DOOML/lib/fe.ml index 007ceda7..37472fa1 100644 --- a/DOOML/lib/fe.ml +++ b/DOOML/lib/fe.ml @@ -216,15 +216,9 @@ let expr = let expr' = (let* kw = kw in match kw with - | "let" -> - let* () = commit in - let_ expr - | "fun" -> - let* () = commit in - fun_ expr - | "if" -> - let* () = commit in - ite expr + | "let" -> let_ expr + | "fun" -> fun_ expr + | "if" -> ite expr | _ -> fail "") <|> expr' in @@ -247,8 +241,12 @@ let let_decl = let top_level = let* kw = kw in match kw with - | "let" -> let_decl - | "type" -> ty + | "let" -> + let* () = commit in + let_decl + | "type" -> + let* () = commit in + ty | _ -> fail "expected top level declaration" ;; @@ -297,9 +295,15 @@ let%expect_test "use let ins for test" = {| let some_test = fun a b c -> - let a_plus_b = (+) a b in - let b_plus_c = (+) b c in - let c_plus_a = (+) c a in + let a_plus_b = + (+) a b + in + let b_plus_c = + (+) b c + in + let c_plus_a = + (+) c a + in 5 ;; |}] @@ -356,6 +360,15 @@ let%expect_test "factorial" = |}] ;; +let%expect_test "ite in ite" = + parse_and_print + {| + let f = if (if 1 then 0 else 0) then 1 else 0;; + |}; + [%expect + {| let f = if if 1 then 0 else 0 then 1 else 0;; |}] +;; + let%expect_test "tuples and plugs" = parse_and_print {| From 9be031611cf085ebc5ef6eaf9124105f76e89df0 Mon Sep 17 00:00:00 2001 From: Georgiy Belyanin Date: Fri, 16 Jan 2026 18:36:36 +0300 Subject: [PATCH 08/49] anf: accept whole program & anf tuples This patch significantly improves the ANF stage. It does the following. * It makes ANF accept the whole program as input. * It improves pretty-printing (similarly to AST in the previous patch). * It allows to ANF tuples as follows. Before ANF. ``` let (a, b) = c;; ``` After ANF. ``` let a = nth c 0;; let b = nth c 1;; ``` It also makes it internally use a state monad. --- DOOML/lib/anf.ml | 365 ++++++++++++++++++++++++++++++++++++--------- DOOML/lib/dune | 2 +- DOOML/lib/state.ml | 19 +++ 3 files changed, 318 insertions(+), 68 deletions(-) create mode 100644 DOOML/lib/state.ml diff --git a/DOOML/lib/anf.ml b/DOOML/lib/anf.ml index 9758d798..cf320508 100644 --- a/DOOML/lib/anf.ml +++ b/DOOML/lib/anf.ml @@ -47,7 +47,7 @@ and pp_aexpr ppf = function | ALet (name, cexpr, aexpr) -> Format.fprintf ppf - "@[let %s =@;<1 2>@[%a@]@;<1 0>in@]@;<0 0>%a" + "let %s =@;<1 2>@[%a@]@;<1 0>in@;<1 0>%a" name pp_cexpr cexpr @@ -56,65 +56,175 @@ and pp_aexpr ppf = function | AExpr cexpr -> Format.fprintf ppf "%a" pp_cexpr cexpr ;; -type ctx = { syms : string list } +type decl = Decl of Ast.rec_flag * string * string list * aexpr [@@deriving variants] -let addsym (ctx : ctx) v = { syms = v :: ctx.syms } +let pp_decl ppf = function + | Decl (rec_flag, name, args, body) -> + let rec_flag = + match rec_flag with + | Rec -> "rec " + | NonRec -> "" + in + Format.fprintf + ppf + "@[let %s%s %a =@;<1 2>@[%a@]@;<0 0>;;@]@." + rec_flag + name + (Format.pp_print_list + ~pp_sep:(fun ppf () -> Format.fprintf ppf " ") + Format.pp_print_string) + args + pp_aexpr + body +;; -let gensym ?prefix (ctx : ctx) = - let prefix = Option.value prefix ~default:"sup" in - let rec aux i = - let v = String.cat prefix (Int.to_string i) in - if List.mem v ctx.syms then aux (i + 1) else v - in - let v = aux (List.length ctx.syms) in - v, addsym ctx v +module Ctx = struct + type t = { syms : string list } + + let addsym v (ctx : t) = { syms = v :: ctx.syms } + + let gensym ?prefix () (ctx : t) = + let prefix = Option.value prefix ~default:"sup" in + let rec aux i = + let v = String.cat prefix (Int.to_string i) in + if List.mem v ctx.syms then aux (i + 1) else v + in + let v = aux (List.length ctx.syms) in + v, addsym v ctx + ;; +end + +module State = struct + include State.M (Ctx) + + let addsym sym ctx = put (Ctx.addsym sym ctx) () + let gensym = Ctx.gensym +end + +open State + +let rec arg = function + | Ast.PUnit -> return ("()", []) + | Ast.Plug -> return ("_", []) + | Ast.Ident name -> return (name, []) + | Ast.Tuple els -> + let* els = + List.fold_right + (fun a acc -> + let* acc = acc in + let* a = arg a in + return (a :: acc)) + els + (return []) + in + let* sym = gensym () in + let lets = + List.mapi (fun i (name, _) -> name, capp "nth" [ immid sym; immnum i ]) els + in + let lets = lets @ List.concat_map snd els in + return (sym, lets) ;; -let rec anf ctx (k : ctx -> immexpr -> aexpr) = function - | Ast.Const d -> k ctx (immnum d) +let rec anf (k : immexpr -> Ctx.t -> aexpr * Ctx.t) = function + | Ast.Const d -> + let* ret = k (immnum d) in + return ret | Var s -> - let ctx = addsym ctx s in - k ctx (immid s) + let* () = addsym s in + let* ret = k (immid s) in + return ret | App _ as app -> - let rec aux ctx immexprs = function + let rec aux immexprs = function | Ast.Var s -> - let sym, ctx = gensym ctx in - alet sym (capp s immexprs) (k ctx (immid sym)) + let* sym = gensym () in + let* expr = k (immid sym) in + let ret = alet sym (capp s immexprs) expr in + return ret | App (f', expr') -> - anf ctx (fun ctx immexpr -> aux ctx (immexpr :: immexprs) f') expr' + anf + (fun immexpr -> + let* f' = aux (immexpr :: immexprs) f' in + return f') + expr' | f -> anf - ctx - (fun ctx immf -> - let sym, ctx = gensym ctx in - let sym', ctx = gensym ctx in - alet sym (cimm immf) (alet sym' (capp sym immexprs) (k ctx (immid sym')))) + (fun immf -> + let* sym = gensym () in + let* sym' = gensym () in + let* expr = k (immid sym') in + return (alet sym (cimm immf) (alet sym' (capp sym immexprs) expr))) f in - aux ctx [] app + aux [] app | Let (_rec, name, bind, expr) -> - let name = - match name with - | PUnit -> "()" - | Plug -> "_" - | Ident s -> s - | Tuple _ -> failwith "tbd" + let* name, lets = arg name in + let* () = addsym name in + let* ret = + anf + (fun immbind -> + let* expr = anf k expr in + let expr = + List.fold_right (fun (name, bind) acc -> alet name bind acc) lets expr + in + let ret = alet name (cimm immbind) expr in + return ret) + bind in - let ctx = addsym ctx name in - anf ctx (fun ctx immbind -> alet name (cimm immbind) (anf ctx k expr)) bind + return ret | Ite (cond_, then_, else_) -> - anf - ctx - (fun ctx immcond -> - let then_ = anf ctx k then_ in - let else_ = anf ctx k else_ in - let sym, ctx = gensym ~prefix:"ite" ctx in - alet sym (cite immcond then_ else_) (k ctx (immid sym))) - cond_ + let* ret = + anf + (fun immcond -> + let* then_ = anf (fun imm -> return (aexpr (cimm imm))) then_ in + let* else_ = anf (fun imm -> return (aexpr (cimm imm))) else_ in + let* sym = gensym ~prefix:"ite" () in + let* expr = k (immid sym) in + let ret = alet sym (cite immcond then_ else_) expr in + return ret) + cond_ + in + return ret | Fun _ -> failwith "should be CC/LL first" ;; -let anf = anf { syms = [] } (fun _ctx imm -> aexpr (cimm imm)) +let anf program = + List.fold_right + (fun a acc -> + let* acc = acc in + match a with + | Ast.LetDecl (rec_flag, name, Fun (args, body)) -> + let* args = + List.fold_right + (fun a acc -> + let* acc = acc in + let* a = arg a in + return (a :: acc)) + args + (return []) + in + let args, lets = List.split args in + let lets = List.concat lets in + let* body = anf (fun imm -> return (aexpr (cimm imm))) body in + let* name, lets' = arg name in + let body = + List.fold_right (fun (name, bind) acc -> alet name bind acc) lets body + in + let ret = decl rec_flag name args body in + let lets' = + List.map (fun (name, bind) -> decl Ast.nonrec_ name [] (aexpr bind)) lets' + in + return ((ret :: lets') @ acc) + | Ast.LetDecl (rec_flag, name, v) -> + let* v = anf (fun imm -> return (aexpr (cimm imm))) v in + let* name, lets = arg name in + let v = List.fold_right (fun (name, bind) acc -> alet name bind acc) lets v in + let ret = decl rec_flag name [] v in + return (ret :: acc)) + program + (return []) + { syms = [] } + |> fst +;; let%expect_test "basic" = let ast = @@ -125,20 +235,32 @@ let%expect_test "basic" = q ;; |} |> Result.get_ok - |> List.hd in - match ast with - | LetDecl (_, _name, body) -> - Format.printf "@[@ %a@]@." pp_aexpr (anf body); - [%expect - {| - let sup2 = (*) 2 i in - let sup5 = (+) g sup0 in - let sup6 = (*) sup5 sup2 in - let sup7 = (f) sup6 in - let q = sup7 in + Format.printf + "%a" + (Format.pp_print_list ~pp_sep:Format.pp_print_newline pp_decl) + (anf ast); + [%expect + {| + let f = + let sup2 = + (*) 2 i + in + let sup5 = + (+) g sup0 + in + let sup6 = + (*) sup5 sup2 + in + let sup7 = + (f) sup6 + in + let q = + sup7 + in q - |}] + ;; + |}] ;; let%expect_test "ite" = @@ -151,23 +273,132 @@ let%expect_test "ite" = |} |> Result.map_error (fun err -> Format.printf "Error %s" err) |> Result.get_ok - |> List.hd in - match ast with - | LetDecl (_, _name, body) -> - Format.printf "@[@ %a@]@." pp_aexpr (anf body); - [%expect - {| - let sup1 = (=) k 1 in - let ite2 = + Format.printf + "%a" + (Format.pp_print_list ~pp_sep:Format.pp_print_newline pp_decl) + (anf ast); + [%expect + {| + let rec fac = + let sup1 = + (=) k 1 + in + let ite7 = if sup1 then 1 else - let sup4 = (-) k 1 in - let sup5 = (fac) sup4 in - let sup6 = (*) sup5 k in + let sup4 = + (-) k 1 + in + let sup5 = + (fac) sup4 + in + let sup6 = + (*) sup5 k + in sup6 in - ite2 - |}] + ite7 + ;; + |}] +;; + +let%expect_test "task 2" = + let asts = + Fe.parse + {| + let main = + let x = if (if 0 then 1 else 2) + then 0 else 1 in + large x + ;; + |} + |> Result.map_error (fun err -> Format.printf "Error %s" err) + |> Result.get_ok + |> Cc.cc + |> Ll.ll + |> anf + in + Format.printf "%a" (Format.pp_print_list ~pp_sep:Format.pp_print_newline pp_decl) asts; + [%expect + {| + let main = + let ite1 = + if 0 then 1 else 2 + in + let ite2 = + if ite1 then 0 else 1 + in + let x = + ite2 + in + let sup4 = + (large) x + in + sup4 + ;; + |}] +;; + +let%expect_test "task 3" = + let asts = + Fe.parse + {| + let (f, g) = fun a (b, c) -> + let (b1, b2) = b in + let (c1, c2) = c in + b1 + b2 + c1 + c2 + ;; + |} + |> Result.map_error (fun err -> Format.printf "Error %s" err) + |> Result.get_ok + |> Cc.cc + |> Ll.ll + |> anf + in + Format.printf "%a" (Format.pp_print_list ~pp_sep:Format.pp_print_newline pp_decl) asts; + [%expect + {| + let sup14 a sup0 = + let b = + (nth) sup0 0 + in + let c = + (nth) sup0 1 + in + let sup1 = + b + in + let b1 = + (nth) sup1 0 + in + let b2 = + (nth) sup1 1 + in + let sup4 = + c + in + let c1 = + (nth) sup4 0 + in + let c2 = + (nth) sup4 1 + in + let sup11 = + (+) b1 b2 + in + let sup12 = + (+) sup11 c1 + in + let sup13 = + (+) sup12 c2 + in + sup13 + ;; + + let f = (nth) sup14 0;; + + let g = (nth) sup14 1;; + |}] ;; diff --git a/DOOML/lib/dune b/DOOML/lib/dune index 8cd3cfcb..977bf24a 100644 --- a/DOOML/lib/dune +++ b/DOOML/lib/dune @@ -1,7 +1,7 @@ (library (name DOOML) (public_name DOOML.Lib) - (modules Anf Ast Fe) + (modules Anf Ast Fe State) (libraries base stdlib angstrom) (inline_tests) (preprocess diff --git a/DOOML/lib/state.ml b/DOOML/lib/state.ml new file mode 100644 index 00000000..f090df92 --- /dev/null +++ b/DOOML/lib/state.ml @@ -0,0 +1,19 @@ +module M (S : sig + type t + end) = +struct + type state = S.t + type 'a t = state -> 'a * state + + let return v st = v, st + + let ( >>= ) m f st = + let v, st' = m st in + f v st' + ;; + + let ( let* ) = ( >>= ) + let get st = st, st + let put st v = v, st + let run m = m +end From 4b030912dcec6a4cb651c1d5aba72a3a40eeedf2 Mon Sep 17 00:00:00 2001 From: Georgiy Belyanin Date: Fri, 16 Jan 2026 18:43:40 +0300 Subject: [PATCH 09/49] cc/ll: introduce basic CC/LL This patch introduces basic closure-conversion and lambda-lifting allowing DOOML to handle closures. They are usually used together so they are added in a single patch. ```ocaml let f = fun a c -> let g = fun b -> a + b in g c ;; (* CC turns it into... *) let f = fun a c -> let g = (fun a b -> a + b) a in g c ;; (* LL turns it into... *) let g = (fun a b -> a + b) a;; let f = fun a c -> g c ;; ``` --- DOOML/lib/builtin.ml | 23 +++ DOOML/lib/cc.ml | 362 +++++++++++++++++++++++++++++++++++++++++++ DOOML/lib/cc.mli | 1 + DOOML/lib/dune | 2 +- DOOML/lib/ll.ml | 159 +++++++++++++++++++ DOOML/lib/ll.mli | 1 + 6 files changed, 547 insertions(+), 1 deletion(-) create mode 100644 DOOML/lib/builtin.ml create mode 100644 DOOML/lib/cc.ml create mode 100644 DOOML/lib/cc.mli create mode 100644 DOOML/lib/ll.ml create mode 100644 DOOML/lib/ll.mli diff --git a/DOOML/lib/builtin.ml b/DOOML/lib/builtin.ml new file mode 100644 index 00000000..6f41f648 --- /dev/null +++ b/DOOML/lib/builtin.ml @@ -0,0 +1,23 @@ +type t = + { name : string + ; arity : int + } + +let all = + [ { name = "*"; arity = 2 } + ; { name = "/"; arity = 2 } + ; { name = "+"; arity = 2 } + ; { name = "-"; arity = 2 } + ; { name = "<"; arity = 2 } + ; { name = "<="; arity = 2 } + ; { name = "=="; arity = 2 } + ; { name = "="; arity = 2 } + ; { name = "<>"; arity = 2 } + ; { name = ">"; arity = 2 } + ; { name = ">="; arity = 2 } + ; { name = "&&"; arity = 2 } + ; { name = "||"; arity = 2 } + ; { name = "print_int"; arity = 1 } + ; { name = "nth"; arity = 2 } + ] +;; diff --git a/DOOML/lib/cc.ml b/DOOML/lib/cc.ml new file mode 100644 index 00000000..7ef43863 --- /dev/null +++ b/DOOML/lib/cc.ml @@ -0,0 +1,362 @@ +module Scope = Set.Make (String) + +module Ctx = struct + type t = + { captured : string list + ; locals : Scope.t + ; globals : Scope.t + ; recs : Scope.t + } + + let rec of_pattern = function + | Ast.PUnit | Plug -> Scope.empty + | Ident s -> Scope.singleton s + | Tuple patterns -> + List.map of_pattern patterns |> List.fold_left Scope.union Scope.empty + ;; + + let mem s (ctx : t) = + Scope.mem s ctx.locals || Scope.mem s ctx.recs || Scope.mem s ctx.globals + ;; + + let capture name (ctx : t) = + if mem name ctx + then ctx + else { ctx with locals = Scope.add name ctx.locals; captured = name :: ctx.captured } + ;; + + let extend pattern (ctx : t) = + let names = of_pattern pattern in + { ctx with locals = Scope.union ctx.locals names } + ;; + + let global pattern (ctx : t) = + let names = of_pattern pattern in + { ctx with globals = Scope.union ctx.globals names } + ;; + + let rec_ pattern (ctx : t) = + let names = of_pattern pattern in + { ctx with locals = Scope.union ctx.locals names; recs = names } + ;; + + let nonrec_ pattern (ctx : t) = + let names = of_pattern pattern in + { ctx with locals = Scope.union ctx.locals names; recs = Scope.empty } + ;; + + let up (ctx' : t) (ctx : t) = + let captured = + ctx.captured + |> List.filter (fun local -> + (not (Scope.mem local ctx'.locals)) && not (List.mem local ctx'.captured)) + in + let captured = captured @ ctx'.captured in + { ctx' with captured } + ;; + + let empty = + let scope = + Scope.of_list (Builtin.all |> List.map (fun (builtin : Builtin.t) -> builtin.name)) + in + let locals = scope in + let captured = [] in + { locals; captured; globals = Scope.empty; recs = Scope.empty } + ;; + + let of_args args (ctx : t) = + let scope = + Scope.of_list (Builtin.all |> List.map (fun (builtin : Builtin.t) -> builtin.name)) + in + let locals = + List.map of_pattern args |> List.fold_left Scope.union scope |> Scope.union ctx.recs + in + let captured = [] in + { locals; captured; globals = ctx.globals; recs = Scope.empty } + ;; +end + +module State = struct + include State.M (Ctx) + + let empty = Ctx.empty + let of_args v ctx = get (Ctx.of_args v ctx) + let extend v ctx = put (Ctx.extend v ctx) () + let global v ctx = put (Ctx.global v ctx) () + let rec_ v ctx = put (Ctx.rec_ v ctx) () + let nonrec_ v ctx = put (Ctx.nonrec_ v ctx) () + let capture v ctx = put (Ctx.capture v ctx) () + let up v ctx = put (Ctx.up v ctx) () +end + +open State + +let rec cc = function + | Ast.Const _ as c -> return c + | Var name as v -> + let* () = capture name in + return v + | Fun (args', body') -> + (* fix rec here *) + let* ctx' = of_args args' in + let body'', ctx'' = cc body' ctx' in + let f = Ast.fun_ args' body'' in + let captured = ctx''.captured in + (match captured with + | [] -> return f + | captured -> + let args = List.map Ast.ident captured in + let f = Ast.fun_ args f in + List.fold_left + (fun f v -> + let* f = f in + let* v = cc (Ast.var v) in + Ast.app f v |> return) + (return f) + captured) + | App (f, g) -> + let* f = cc f in + let* g = cc g in + let a = Ast.app f g in + return a + | Let (rec_flag, pattern, bind, body) -> + let* bind = + let* ctx = get in + let* () = + match rec_flag with + | Ast.Rec -> rec_ pattern + | Ast.NonRec -> nonrec_ pattern + in + let* bind = cc bind in + let* () = up ctx in + return bind + in + let* () = extend pattern in + let* body = cc body in + let l = Ast.let_ rec_flag pattern bind body in + return l + | Ite (cond_, then_, else_) -> + let* cond_ = cc cond_ in + let* then_ = cc then_ in + let* else_ = cc else_ in + let ite = Ast.ite cond_ then_ else_ in + return ite +;; + +let cc asts = + List.fold_left + (fun acc (Ast.LetDecl (rec_flag, pattern, body)) -> + let* acc = acc in + let* body = + let* ctx = get in + let* () = + match rec_flag with + | Ast.Rec -> rec_ pattern + | Ast.NonRec -> nonrec_ pattern + in + let* bind = cc body in + let* () = up ctx in + return bind + in + let* () = global pattern in + let ret = Ast.letdecl rec_flag pattern body in + return (ret :: acc)) + (return []) + asts + empty + |> fst + |> List.rev +;; + +let%expect_test "basic" = + let ast = + Fe.parse + {| + let f = fun a b c -> + let g = fun d -> d + a in + let h = fun e -> e + b in + (g c) + (h c) + ;; + |} + |> Result.map_error (fun err -> Format.printf "Error %s" err) + |> Result.get_ok + in + Format.printf + "%a" + (Format.pp_print_list ~pp_sep:Format.pp_print_newline Ast.pp_top_level) + (cc ast); + [%expect + {| + let f = + fun a b c -> + let g = + (fun a d -> (+) d a) a + in + let h = + (fun b e -> (+) e b) b + in + (+) (g c) (h c) + ;; + |}] +;; + +let%expect_test "basic 2" = + let ast = + Fe.parse + {| + let f = fun a b c -> + let g = fun d -> a + d in + let h = fun e -> b + g e in + let i = fun f -> c + h f + g f + p in + i (a + b + c + q) + ;; + |} + |> Result.map_error (fun err -> Format.printf "Error %s" err) + |> Result.get_ok + in + Format.printf + "%a" + (Format.pp_print_list ~pp_sep:Format.pp_print_newline Ast.pp_top_level) + (cc ast); + [%expect + {| + let f = + (fun q p a b c -> + let g = + (fun a d -> (+) a d) a + in + let h = + (fun g b e -> (+) b (g e)) g b + in + let i = + (fun p g h c f -> (+) ((+) ((+) c (h f)) (g f)) p) p g h c + in + i ((+) ((+) ((+) a b) c) q)) q p + ;; + |}] +;; + +let%expect_test "recursion" = + let ast = + Fe.parse + {| + let rec f = fun a b -> + let p = f a in + let rec g = fun n -> if (n = 1) then f (p n) n else g (f a b) in + g (a + b) + ;; + |} + |> Result.map_error (fun err -> Format.printf "Error %s" err) + |> Result.get_ok + in + Format.printf + "%a" + (Format.pp_print_list ~pp_sep:Format.pp_print_newline Ast.pp_top_level) + (cc ast); + [%expect + {| + let rec f = + fun a b -> + let p = + f a + in + let rec g = + (fun b a p f n -> if (=) n 1 then f (p n) n else g (f a b)) b a p f + in + g ((+) a b) + ;; + |}] +;; + +let%expect_test "multiple" = + let ast = + Fe.parse + {| + let rec f = fun n -> + if n = 1 then 1 + else (f (n - 1)) * n + ;; + + let main = fun () -> + let _ = print_int (f 1) in + let _ = print_int (f 2) in + let _ = print_int (f 5) in + print_int (f 8) + ;; + |} + |> Result.map_error (fun err -> Format.printf "Error %s" err) + |> Result.get_ok + in + Format.printf + "%a" + (Format.pp_print_list ~pp_sep:Format.pp_print_newline Ast.pp_top_level) + (cc ast); + [%expect + {| + let rec f = fun n -> if (=) n 1 then 1 else (*) (f ((-) n 1)) n;; + + let main = + fun () -> + let _ = + print_int (f 1) + in + let _ = + print_int (f 2) + in + let _ = + print_int (f 5) + in + print_int (f 8) + ;; + |}] +;; + +let%expect_test "multiple 2" = + let ast = + Fe.parse + {| + let rec f = fun n -> + if n = 1 then 1 + else (f (n - 1)) * n + ;; + + let b = 1;; + let main = fun () -> + let c = 3 in + let f = fun () -> print_int (f a) in + let f = fun () -> print_int (f (a + b)) in + let f = fun () -> print_int (f (a + c)) in + print_int (f 8) + ;; + |} + |> Result.map_error (fun err -> Format.printf "Error %s" err) + |> Result.get_ok + in + Format.printf + "%a" + (Format.pp_print_list ~pp_sep:Format.pp_print_newline Ast.pp_top_level) + (cc ast); + [%expect + {| + let rec f = fun n -> if (=) n 1 then 1 else (*) (f ((-) n 1)) n;; + + let b = 1;; + + let main = + (fun a () -> + let c = + 3 + in + let f = + (fun a () -> print_int (f a)) a + in + let f = + (fun a () -> print_int (f ((+) a b))) a + in + let f = + (fun c a () -> print_int (f ((+) a c))) c a + in + print_int (f 8)) a + ;; + |}] +;; diff --git a/DOOML/lib/cc.mli b/DOOML/lib/cc.mli new file mode 100644 index 00000000..c34a1792 --- /dev/null +++ b/DOOML/lib/cc.mli @@ -0,0 +1 @@ +val cc : Ast.top_level list -> Ast.top_level list diff --git a/DOOML/lib/dune b/DOOML/lib/dune index 977bf24a..2357e868 100644 --- a/DOOML/lib/dune +++ b/DOOML/lib/dune @@ -1,7 +1,7 @@ (library (name DOOML) (public_name DOOML.Lib) - (modules Anf Ast Fe State) + (modules Anf Ast Builtin Cc Fe Ll State) (libraries base stdlib angstrom) (inline_tests) (preprocess diff --git a/DOOML/lib/ll.ml b/DOOML/lib/ll.ml new file mode 100644 index 00000000..9f706109 --- /dev/null +++ b/DOOML/lib/ll.ml @@ -0,0 +1,159 @@ +module Scope = struct + include Set.Make (String) +end + +module Ctx = struct + type t = + { lifts : (Ast.pattern * Ast.pattern list * Ast.expr) list + ; symbols : Scope.t + } + + let gen ?prefix (ctx : t) = + let prefix = Option.value ~default:"_" prefix in + let v = Scope.cardinal ctx.symbols in + let rec aux off = + let name = String.concat "_" [ prefix; Int.to_string (v + off) ] in + if not (Scope.mem name ctx.symbols) then name else aux (off + 1) + in + aux 0 + ;; + + let rec of_pattern = function + | Ast.PUnit | Plug -> Scope.empty + | Ident s -> Scope.singleton s + | Tuple patterns -> + List.map of_pattern patterns |> List.fold_left Scope.union Scope.empty + ;; + + let extend pattern (ctx : t) = + let names = of_pattern pattern in + { ctx with symbols = Scope.union ctx.symbols names } + ;; + + let lift args body (ctx : t) = + let name = gen ctx in + let v = Ast.var name in + let name = Ast.ident name in + let ctx = extend name ctx in + v, { ctx with lifts = (name, args, body) :: ctx.lifts } + ;; + + let empty = { lifts = []; symbols = Scope.empty } +end + +module State = struct + include State.M (Ctx) + + let empty = Ctx.empty + let lift = Ctx.lift + let extend v ctx = put (Ctx.extend v ctx) () +end + +open State + +let rec ll = function + | Ast.Const _ as c -> return c + | Var _ as v -> return v + | Fun (args', body') -> + let* body' = ll body' in + let* f = lift args' body' in + return f + | App (f, g) -> + let* f = ll f in + let* g = ll g in + let a = Ast.app f g in + return a + | Let (rec_flag, pattern, bind, body) -> + let* () = + match rec_flag with + | Ast.Rec -> extend pattern + | Ast.NonRec -> return () + in + let* bind = ll bind in + let* () = extend pattern in + let* body = ll body in + let l = Ast.let_ rec_flag pattern bind body in + return l + | Ite (cond_, then_, else_) -> + let* cond_ = ll cond_ in + let* then_ = ll then_ in + let* else_ = ll else_ in + let ite = Ast.ite cond_ then_ else_ in + return ite +;; + +let ll = + let ctx = ref empty in + List.concat_map (function Ast.LetDecl (rec_flag, pattern, body) -> + (ctx + := match rec_flag with + | Ast.Rec -> extend pattern !ctx |> snd + | Ast.NonRec -> !ctx); + let lifts, body = + match body with + | Fun (args, body) -> + let body, ctx = ll body !ctx in + ctx.lifts, Ast.fun_ args body + | ast -> + let ast, ctx = ll ast !ctx in + ctx.lifts, ast + in + ctx := extend pattern !ctx |> snd; + Ast.letdecl rec_flag pattern body + :: List.map + (fun (name, args, body) -> Ast.letdecl Ast.nonrec_ name (Ast.fun_ args body)) + lifts + |> List.rev) +;; + +let%expect_test "basic" = + let asts = + Fe.parse + {| + let f = + (fun q p -> + (fun q p a b c -> + let g = + (fun a -> fun d -> (+) a d) a + in + let h = + (fun g b -> fun e -> (+) b (g e)) g b + in + let i = + (fun p g h c -> fun f -> (+) ((+) ((+) c (h f)) (g f)) p) p g h c + in + i ((+) ((+) ((+) a b) c) q)) q p) + ;; + |} + |> Result.map_error (fun err -> Format.printf "Error %s" err) + |> Result.get_ok + in + Format.printf + "%a" + (Format.pp_print_list ~pp_sep:Format.pp_print_newline Ast.pp_top_level) + (ll asts); + [%expect + {| + let __0 = fun a d -> (+) a d;; + + let __2 = fun g b e -> (+) b (g e);; + + let __4 = fun p g h c f -> (+) ((+) ((+) c (h f)) (g f)) p;; + + let __6 = + fun q p a b c -> + let g = + __0 a + in + let h = + __2 g b + in + let i = + __4 p g h c + in + i ((+) ((+) ((+) a b) c) q) + ;; + + let f = fun q p -> __6 q p;; + |}] +;; diff --git a/DOOML/lib/ll.mli b/DOOML/lib/ll.mli new file mode 100644 index 00000000..1533096f --- /dev/null +++ b/DOOML/lib/ll.mli @@ -0,0 +1 @@ +val ll : Ast.top_level list -> Ast.top_level list From 7ade878a5251f62709c8e475f8c8e6c17e5dae6b Mon Sep 17 00:00:00 2001 From: Georgiy Belyanin Date: Fri, 16 Jan 2026 19:20:16 +0300 Subject: [PATCH 10/49] riscv: add basic runtime & compilation into rv64gc This patch introduces basic DOOML compilation into RISC-V assembly, namely rv64gc. The patch does not yet verify it. However, a few tests are about to be added soon. Additionally, there is C runtime. It must be compiled by cross-toolchain and linked across the generated assembly in order for everything to work properly. --- DOOML/lib/dune | 20 +- DOOML/lib/riscv.ml | 732 ++++++++++++++++++++++++++++++++++++++++++++ DOOML/lib/runtime.c | 216 +++++++++++++ 3 files changed, 967 insertions(+), 1 deletion(-) create mode 100644 DOOML/lib/riscv.ml create mode 100644 DOOML/lib/runtime.c diff --git a/DOOML/lib/dune b/DOOML/lib/dune index 2357e868..5981ac2c 100644 --- a/DOOML/lib/dune +++ b/DOOML/lib/dune @@ -1,10 +1,28 @@ (library (name DOOML) (public_name DOOML.Lib) - (modules Anf Ast Builtin Cc Fe Ll State) + (modules Anf Ast Builtin Cc Fe Ll Riscv State) (libraries base stdlib angstrom) (inline_tests) (preprocess (pps ppx_expect ppx_deriving.show ppx_variants_conv ppx_inline_test)) (instrumentation (backend bisect_ppx))) + +(rule + (targets runtime.so) + (deps runtime.c) + (action + (run gcc -fPIC -shared %{deps} -o %{targets}))) + +(rule + (targets riscv64-runtime.o) + (deps runtime.c) + (action + (run riscv64-linux-gnu-gcc -o %{targets} -c %{deps}))) + +(rule + (targets riscv64-runtime.a) + (deps riscv64-runtime.o) + (action + (run riscv64-linux-gnu-ar rcs %{targets} %{deps}))) diff --git a/DOOML/lib/riscv.ml b/DOOML/lib/riscv.ml new file mode 100644 index 00000000..8c744aca --- /dev/null +++ b/DOOML/lib/riscv.ml @@ -0,0 +1,732 @@ +module Map = Map.Make (String) +module Set = Set.Make (String) + +type reg = + | Zero + | Ra + | Sp + | Gp + | Arg of int + | Saved of int + | Temp of int +[@@deriving variants] + +let rv = arg 0 +let bp = saved 0 + +let pp_reg ppf = + Format.( + function + | Zero -> fprintf ppf "zero" + | Ra -> fprintf ppf "ra" + | Sp -> fprintf ppf "sp" + | Gp -> fprintf ppf "gp" + | Arg c -> fprintf ppf "a%d" c + | Saved c -> fprintf ppf "s%d" c + | Temp c -> fprintf ppf "t%d" c) +;; + +type instr = + (* Arithmetic instructions *) + | Addi of reg * reg * int + | Add of reg * reg * reg + | Sh1add of reg * reg * reg + | Sh2add of reg * reg * reg + | Sh3add of reg * reg * reg + | Add_wu of reg * reg * reg + | Sh1add_wu of reg * reg * reg + | Sh2add_wu of reg * reg * reg + | Sh3add_wu of reg * reg * reg + | Addiw of reg * reg * int + | Addw of reg * reg * reg + | Sub of reg * reg * reg + | Subw of reg * reg * reg + | Neg of reg * reg + | Negw of reg * reg + | Mul of reg * reg * reg + | Mulw of reg * reg * reg + | Mulh of reg * reg * reg + | Mulhu of reg * reg * reg + | Mulhsu of reg * reg * reg + | Div of reg * reg * reg + | Divu of reg * reg * reg + | Rem of reg * reg * reg + | Remu of reg * reg * reg + | Min of reg * reg * reg + | Max of reg * reg * reg + | Minu of reg * reg * reg + | Maxu of reg * reg * reg + (* Comparison instructions *) + | Seqz of reg * reg + | Snez of reg * reg + | Slti of reg * reg * int + | Slt of reg * reg * reg + | Sltiu of reg * reg * int + | Sltu of reg * reg * reg + (* Bit manipulation instructions *) + | Bexti of reg * reg * int + | Bext of reg * reg * reg + | Andi of reg * reg * int + | And of reg * reg * reg + | Andn of reg * reg * reg + | Bclri of reg * reg * int + | Bclr of reg * reg * reg + | Ori of reg * reg * int + | Or of reg * reg * reg + | Orn of reg * reg * reg + | Bseti of reg * reg * int + | Bset of reg * reg * reg + | Xori of reg * reg * int + | Xor of reg * reg * reg + | Xnor of reg * reg * reg + | Binvi of reg * reg * int + | Binv of reg * reg * reg + | Not of reg * reg + | Orc_b of reg * reg + (* Shift instructions *) + | Slli of reg * reg * int + | Sll of reg * reg * reg + | Slliw of reg * reg * int + | Sllw of reg * reg * reg + | Slli_wu of reg * reg * int + | Srli of reg * reg * int + | Srl of reg * reg * reg + | Srliw of reg * reg * int + | Srlw of reg * reg * reg + | Srai of reg * reg * int + | Sra of reg * reg * reg + | Sraiw of reg * reg * int + | Sraw of reg * reg * reg + | Rori of reg * reg * int + | Ror of reg * reg * reg + | Rol of reg * reg * reg + | Roriw of reg * reg * int + | Rorw of reg * reg * reg + | Rolw of reg * reg * reg + (* Count instructions *) + | Clz of reg * reg + | Clzw of reg * reg + | Ctz of reg * reg + | Ctzw of reg * reg + | Cpop of reg * reg + | Cpopw of reg * reg + (* Jump and branch instructions *) + | J of string (* label *) + | Jal of reg * string + | Jr of reg * int option + | Jalr of reg * reg * int option + | Call of string (* symbol *) + | Tail of string (* symbol *) + | Ret + | Beq of reg * reg * string (* label *) + | Bne of reg * reg * string (* label *) + | Blt of reg * reg * string (* label *) + | Bgt of reg * reg * string (* label *) + | Bge of reg * reg * string (* label *) + | Ble of reg * reg * string (* label *) + | Bltu of reg * reg * string (* label *) + | Bgtu of reg * reg * string (* label *) + | Bgeu of reg * reg * string (* label *) + | Bleu of reg * reg * string (* label *) + (* Load & store *) + | La of reg * string + | Ld of reg * int * reg + | Sd of reg * int * reg +[@@deriving variants] + +let mv rd rs = addi rd rs 0 +let li rd v = addi rd zero v + +let _pp_instr fmt = + Format.( + function + (* Arithmetic instructions *) + | Addi (rd, rs, imm) -> fprintf fmt "addi %a, %a, %d" pp_reg rd pp_reg rs imm + | Add (rd, rs1, rs2) -> fprintf fmt "add %a, %a, %a" pp_reg rd pp_reg rs1 pp_reg rs2 + | Sh1add (rd, rs1, rs2) -> + fprintf fmt "sh1add %a, %a, %a" pp_reg rd pp_reg rs1 pp_reg rs2 + | Sh2add (rd, rs1, rs2) -> + fprintf fmt "sh2add %a, %a, %a" pp_reg rd pp_reg rs1 pp_reg rs2 + | Sh3add (rd, rs1, rs2) -> + fprintf fmt "sh3add %a, %a, %a" pp_reg rd pp_reg rs1 pp_reg rs2 + | Add_wu (rd, rs1, rs2) -> + fprintf fmt "add.wu %a, %a, %a" pp_reg rd pp_reg rs1 pp_reg rs2 + | Sh1add_wu (rd, rs1, rs2) -> + fprintf fmt "sh1add.wu %a, %a, %a" pp_reg rd pp_reg rs1 pp_reg rs2 + | Sh2add_wu (rd, rs1, rs2) -> + fprintf fmt "sh2add.wu %a, %a, %a" pp_reg rd pp_reg rs1 pp_reg rs2 + | Sh3add_wu (rd, rs1, rs2) -> + fprintf fmt "sh3add.wu %a, %a, %a" pp_reg rd pp_reg rs1 pp_reg rs2 + | Addiw (rd, rs, imm) -> fprintf fmt "addiw %a, %a, %d" pp_reg rd pp_reg rs imm + | Addw (rd, rs1, rs2) -> fprintf fmt "addw %a, %a, %a" pp_reg rd pp_reg rs1 pp_reg rs2 + | Sub (rd, rs1, rs2) -> fprintf fmt "sub %a, %a, %a" pp_reg rd pp_reg rs1 pp_reg rs2 + | Subw (rd, rs1, rs2) -> fprintf fmt "subw %a, %a, %a" pp_reg rd pp_reg rs1 pp_reg rs2 + | Neg (rd, rs) -> fprintf fmt "neg %a, %a" pp_reg rd pp_reg rs + | Negw (rd, rs) -> fprintf fmt "negw %a, %a" pp_reg rd pp_reg rs + | Mul (rd, rs1, rs2) -> fprintf fmt "mul %a, %a, %a" pp_reg rd pp_reg rs1 pp_reg rs2 + | Mulw (rd, rs1, rs2) -> fprintf fmt "mulw %a, %a, %a" pp_reg rd pp_reg rs1 pp_reg rs2 + | Mulh (rd, rs1, rs2) -> fprintf fmt "mulh %a, %a, %a" pp_reg rd pp_reg rs1 pp_reg rs2 + | Mulhu (rd, rs1, rs2) -> + fprintf fmt "mulhu %a, %a, %a" pp_reg rd pp_reg rs1 pp_reg rs2 + | Mulhsu (rd, rs1, rs2) -> + fprintf fmt "mulhsu %a, %a, %a" pp_reg rd pp_reg rs1 pp_reg rs2 + | Div (rd, rs1, rs2) -> fprintf fmt "div %a, %a, %a" pp_reg rd pp_reg rs1 pp_reg rs2 + | Divu (rd, rs1, rs2) -> fprintf fmt "divu %a, %a, %a" pp_reg rd pp_reg rs1 pp_reg rs2 + | Rem (rd, rs1, rs2) -> fprintf fmt "rem %a, %a, %a" pp_reg rd pp_reg rs1 pp_reg rs2 + | Remu (rd, rs1, rs2) -> fprintf fmt "remu %a, %a, %a" pp_reg rd pp_reg rs1 pp_reg rs2 + | Min (rd, rs1, rs2) -> fprintf fmt "min %a, %a, %a" pp_reg rd pp_reg rs1 pp_reg rs2 + | Max (rd, rs1, rs2) -> fprintf fmt "max %a, %a, %a" pp_reg rd pp_reg rs1 pp_reg rs2 + | Minu (rd, rs1, rs2) -> fprintf fmt "minu %a, %a, %a" pp_reg rd pp_reg rs1 pp_reg rs2 + | Maxu (rd, rs1, rs2) -> fprintf fmt "maxu %a, %a, %a" pp_reg rd pp_reg rs1 pp_reg rs2 + (* Comparison instructions *) + | Seqz (rd, rs) -> fprintf fmt "seqz %a, %a" pp_reg rd pp_reg rs + | Snez (rd, rs) -> fprintf fmt "snez %a, %a" pp_reg rd pp_reg rs + | Slti (rd, rs, imm) -> fprintf fmt "slti %a, %a, %d" pp_reg rd pp_reg rs imm + | Slt (rd, rs1, rs2) -> fprintf fmt "slt %a, %a, %a" pp_reg rd pp_reg rs1 pp_reg rs2 + | Sltiu (rd, rs, imm) -> fprintf fmt "sltiu %a, %a, %d" pp_reg rd pp_reg rs imm + | Sltu (rd, rs1, rs2) -> fprintf fmt "sltu %a, %a, %a" pp_reg rd pp_reg rs1 pp_reg rs2 + (* Bit manipulation instructions *) + | Bexti (rd, rs, imm) -> fprintf fmt "bexti %a, %a, %d" pp_reg rd pp_reg rs imm + | Bext (rd, rs1, rs2) -> fprintf fmt "bext %a, %a, %a" pp_reg rd pp_reg rs1 pp_reg rs2 + | Andi (rd, rs, imm) -> fprintf fmt "andi %a, %a, %d" pp_reg rd pp_reg rs imm + | And (rd, rs1, rs2) -> fprintf fmt "and %a, %a, %a" pp_reg rd pp_reg rs1 pp_reg rs2 + | Andn (rd, rs1, rs2) -> fprintf fmt "andn %a, %a, %a" pp_reg rd pp_reg rs1 pp_reg rs2 + | Bclri (rd, rs, imm) -> fprintf fmt "bclri %a, %a, %d" pp_reg rd pp_reg rs imm + | Bclr (rd, rs1, rs2) -> fprintf fmt "bclr %a, %a, %a" pp_reg rd pp_reg rs1 pp_reg rs2 + | Ori (rd, rs, imm) -> fprintf fmt "ori %a, %a, %d" pp_reg rd pp_reg rs imm + | Or (rd, rs1, rs2) -> fprintf fmt "or %a, %a, %a" pp_reg rd pp_reg rs1 pp_reg rs2 + | Orn (rd, rs1, rs2) -> fprintf fmt "orn %a, %a, %a" pp_reg rd pp_reg rs1 pp_reg rs2 + | Bseti (rd, rs, imm) -> fprintf fmt "bseti %a, %a, %d" pp_reg rd pp_reg rs imm + | Bset (rd, rs1, rs2) -> fprintf fmt "bset %a, %a, %a" pp_reg rd pp_reg rs1 pp_reg rs2 + | Xori (rd, rs, imm) -> fprintf fmt "xori %a, %a, %d" pp_reg rd pp_reg rs imm + | Xor (rd, rs1, rs2) -> fprintf fmt "xor %a, %a, %a" pp_reg rd pp_reg rs1 pp_reg rs2 + | Xnor (rd, rs1, rs2) -> fprintf fmt "xnor %a, %a, %a" pp_reg rd pp_reg rs1 pp_reg rs2 + | Binvi (rd, rs, imm) -> fprintf fmt "binvi %a, %a, %d" pp_reg rd pp_reg rs imm + | Binv (rd, rs1, rs2) -> fprintf fmt "binv %a, %a, %a" pp_reg rd pp_reg rs1 pp_reg rs2 + | Not (rd, rs) -> fprintf fmt "not %a, %a" pp_reg rd pp_reg rs + | Orc_b (rd, rs) -> fprintf fmt "orc.b %a, %a" pp_reg rd pp_reg rs + (* Shift instructions *) + | Slli (rd, rs, imm) -> fprintf fmt "slli %a, %a, %d" pp_reg rd pp_reg rs imm + | Sll (rd, rs1, rs2) -> fprintf fmt "sll %a, %a, %a" pp_reg rd pp_reg rs1 pp_reg rs2 + | Slliw (rd, rs, imm) -> fprintf fmt "slliw %a, %a, %d" pp_reg rd pp_reg rs imm + | Sllw (rd, rs1, rs2) -> fprintf fmt "sllw %a, %a, %a" pp_reg rd pp_reg rs1 pp_reg rs2 + | Slli_wu (rd, rs, imm) -> fprintf fmt "slli.wu %a, %a, %d" pp_reg rd pp_reg rs imm + | Srli (rd, rs, imm) -> fprintf fmt "srli %a, %a, %d" pp_reg rd pp_reg rs imm + | Srl (rd, rs1, rs2) -> fprintf fmt "srl %a, %a, %a" pp_reg rd pp_reg rs1 pp_reg rs2 + | Srliw (rd, rs, imm) -> fprintf fmt "srliw %a, %a, %d" pp_reg rd pp_reg rs imm + | Srlw (rd, rs1, rs2) -> fprintf fmt "srlw %a, %a, %a" pp_reg rd pp_reg rs1 pp_reg rs2 + | Srai (rd, rs, imm) -> fprintf fmt "srai %a, %a, %d" pp_reg rd pp_reg rs imm + | Sra (rd, rs1, rs2) -> fprintf fmt "sra %a, %a, %a" pp_reg rd pp_reg rs1 pp_reg rs2 + | Sraiw (rd, rs, imm) -> fprintf fmt "sraiw %a, %a, %d" pp_reg rd pp_reg rs imm + | Sraw (rd, rs1, rs2) -> fprintf fmt "sraw %a, %a, %a" pp_reg rd pp_reg rs1 pp_reg rs2 + | Rori (rd, rs, imm) -> fprintf fmt "rori %a, %a, %d" pp_reg rd pp_reg rs imm + | Ror (rd, rs1, rs2) -> fprintf fmt "ror %a, %a, %a" pp_reg rd pp_reg rs1 pp_reg rs2 + | Rol (rd, rs1, rs2) -> fprintf fmt "rol %a, %a, %a" pp_reg rd pp_reg rs1 pp_reg rs2 + | Roriw (rd, rs, imm) -> fprintf fmt "roriw %a, %a, %d" pp_reg rd pp_reg rs imm + | Rorw (rd, rs1, rs2) -> fprintf fmt "rorw %a, %a, %a" pp_reg rd pp_reg rs1 pp_reg rs2 + | Rolw (rd, rs1, rs2) -> fprintf fmt "rolw %a, %a, %a" pp_reg rd pp_reg rs1 pp_reg rs2 + (* Count instructions *) + | Clz (rd, rs) -> fprintf fmt "clz %a, %a" pp_reg rd pp_reg rs + | Clzw (rd, rs) -> fprintf fmt "clzw %a, %a" pp_reg rd pp_reg rs + | Ctz (rd, rs) -> fprintf fmt "ctz %a, %a" pp_reg rd pp_reg rs + | Ctzw (rd, rs) -> fprintf fmt "ctzw %a, %a" pp_reg rd pp_reg rs + | Cpop (rd, rs) -> fprintf fmt "cpop %a, %a" pp_reg rd pp_reg rs + | Cpopw (rd, rs) -> fprintf fmt "cpopw %a, %a" pp_reg rd pp_reg rs + (* Jump and branch instructions *) + | J label -> fprintf fmt "j %s" label + | Jal (rd, imm) -> fprintf fmt "jal %a, %s" pp_reg rd imm + | Jr (rs, None) -> fprintf fmt "jr %a" pp_reg rs + | Jr (rs, Some imm) -> fprintf fmt "jr %a, %d" pp_reg rs imm + | Jalr (rd, rs, None) -> fprintf fmt "jalr %a, %a" pp_reg rd pp_reg rs + | Jalr (rd, rs, Some imm) -> fprintf fmt "jalr %a, %a, %d" pp_reg rd pp_reg rs imm + | Call symbol -> fprintf fmt "call %s" symbol + | Tail symbol -> fprintf fmt "tail %s" symbol + | Ret -> fprintf fmt "ret" + | Beq (rs1, rs2, label) -> fprintf fmt "beq %a, %a, %s" pp_reg rs1 pp_reg rs2 label + | Bne (rs1, rs2, label) -> fprintf fmt "bne %a, %a, %s" pp_reg rs1 pp_reg rs2 label + | Blt (rs1, rs2, label) -> fprintf fmt "blt %a, %a, %s" pp_reg rs1 pp_reg rs2 label + | Bgt (rs1, rs2, label) -> fprintf fmt "bgt %a, %a, %s" pp_reg rs1 pp_reg rs2 label + | Bge (rs1, rs2, label) -> fprintf fmt "bge %a, %a, %s" pp_reg rs1 pp_reg rs2 label + | Ble (rs1, rs2, label) -> fprintf fmt "ble %a, %a, %s" pp_reg rs1 pp_reg rs2 label + | Bltu (rs1, rs2, label) -> fprintf fmt "bltu %a, %a, %s" pp_reg rs1 pp_reg rs2 label + | Bgtu (rs1, rs2, label) -> fprintf fmt "bgtu %a, %a, %s" pp_reg rs1 pp_reg rs2 label + | Bgeu (rs1, rs2, label) -> fprintf fmt "bgeu %a, %a, %s" pp_reg rs1 pp_reg rs2 label + | Bleu (rs1, rs2, label) -> fprintf fmt "bleu %a, %a, %s" pp_reg rs1 pp_reg rs2 label + (* Load & Store *) + | La (rd, label) -> fprintf fmt "la %a, %s" pp_reg rd label + | Ld (rd, offset, rs) -> fprintf fmt "ld %a, %d(%a)" pp_reg rd offset pp_reg rs + | Sd (rs, offset, rd) -> fprintf fmt "sd %a, %d(%a)" pp_reg rs offset pp_reg rd) +;; + +type loc = + | Mem of int + | Reg of reg +[@@deriving variants] + +type line = + | Instr of instr + | Comment of string + | Label of string + | Etc of string +[@@deriving variants] + +let todo () = failwith "todo" + +module Ctx = struct + type t = + { code : line list + ; temp_regs : bool Map.t + ; fail : string option + ; arities : int Map.t + ; offset : int + ; offsets : int Map.t + ; lbls : Set.t + } + + let arities = + Builtin.all + |> List.map (fun (builtin : Builtin.t) -> builtin.name, builtin.arity) + |> Map.of_list + ;; + + let empty = + { code = [] + ; temp_regs = Map.empty + ; fail = None + ; arities + ; offset = -16 + ; offsets = Map.empty + ; lbls = Set.empty + } + ;; + + let arity name (ctx : t) = Map.find_opt name ctx.arities + let comment (c : string) (ctx : t) = { ctx with code = comment c :: ctx.code } + let etc (c : string) (ctx : t) = { ctx with code = etc c :: ctx.code } + + let lbl ?prefix (ctx : t) = + let rec aux cnt = + let prefix = Option.value ~default:"lbl" prefix in + let name = String.concat "" [ prefix; Int.to_string cnt ] in + if Set.mem name ctx.lbls + then aux (cnt + 1) + else name, { ctx with lbls = Set.add name ctx.lbls } + in + aux 0 + ;; + + let label (l : string) (ctx : t) = { ctx with code = label l :: ctx.code } + let emit (i : instr) (ctx : t) = { ctx with code = instr i :: ctx.code } + + (*let as_reg (loc : loc) (ctx : t) = match loc with + | Reg reg -> reg + (*| Mem ->*) + ;;*) + + let as_arg (n : int) (loc : loc) (ctx : t) = + match loc with + | Reg (Arg n' as r) when n' = n -> r, ctx + | Reg rs -> + let rd = arg n in + let ctx = emit (mv rd rs) ctx in + rd, ctx + | Mem offset -> + let rd = arg n in + let ctx = emit (ld rd offset bp) ctx in + rd, ctx + ;; + + (*| Mem ->*) + + let fail (msg : string) (ctx : t) = { ctx with fail = Option.some msg } + let total () (ctx : t) = ctx.offset, { ctx with offset = -16 } + let offset (var : string) (ctx : t) = Map.find_opt var ctx.offsets + + let push r (ctx : t) = + let new_offset = ctx.offset - 8 in + let ctx = emit (addi sp sp ~-8) ctx in + let ctx = emit (sd r 0 sp) ctx in + new_offset, { ctx with offset = new_offset } + ;; + + let var (name : string) (ctx : t) = + let offset, ctx = push zero ctx in + let offsets = Map.add name offset ctx.offsets in + offset, { ctx with offsets } + ;; + + (*addi sp, sp, -8 + sd t0, 0(sp)*) + + let args args' ctx = + List.fold_right + (fun (i, a) acc -> + let offset, ctx = var a acc in + let ctx = if i < 8 then emit (sd (arg i) offset bp) ctx else todo () in + ctx) + (args' |> List.mapi (fun i a -> i, a)) + ctx + ;; + + let define name argc ctx = { ctx with arities = Map.add name argc arities } +end + +module State = struct + include State.M (Ctx) + + let arity name = get >>= fun ctx -> return (Ctx.arity name ctx) + let total = Ctx.total + let offset var = get >>= fun ctx -> return (Ctx.offset var ctx) + let emit instr ctx = put (Ctx.emit instr ctx) () + let label lbl ctx = put (Ctx.label lbl ctx) () + let comment c ctx = put (Ctx.comment c ctx) () + let etc c ctx = put (Ctx.etc c ctx) () + let as_arg = Ctx.as_arg + let var = Ctx.var + let push = Ctx.push + let define name argc ctx = put (Ctx.define name argc ctx) () + let args args' ctx = put (Ctx.args args' ctx) () + let empty = Ctx.empty + let lbl = Ctx.lbl + (*let as_reg = Ctx.as_reg*) +end + +open State + +let spf = Format.asprintf +let failf fmt = Format.kasprintf failwith fmt + +let immexpr reg = function + | Anf.ImmNum c -> + let* () = emit (li reg c) in + return reg + | ImmId var -> + let* offset = offset var in + (match offset with + | Some offset -> + let* () = emit (ld reg offset bp) in + return reg + | None -> failf "Unknown variable %s" var) +;; + +let rec cexpr = + let capp name args = + let binop op lhs rhs = + let op = + fun rd rs1 rs2 -> + let return v = [ v rd rs1 rs2 ] in + match op with + | "+" -> return add + | "-" -> return sub + | "*" -> return mul + | "/" -> return div + | "<" -> return slt + | "<=" -> [ slt rd rs2 rs1; xori rd rd 1 ] + | ">" -> [ slt rd rs2 rs1 ] + | ">=" -> [ slt rd rs1 rs2; xori rd rd 1 ] + | "=" -> [ sub (temp 0) rs1 rs2; seqz rd (temp 0) ] + | "<>" -> [ sub (temp 0) rs1 rs2; snez rd (temp 0) ] + | _ -> assert false + in + let* lhs = immexpr (arg 0) lhs in + let* rhs = immexpr (arg 1) rhs in + let* () = + List.fold_left + (fun acc instr -> + let* _ = acc in + emit instr) + (return ()) + (op rv lhs rhs) + in + return rv + in + match name, args with + | (("+" | "-" | "*" | "/" | "<" | ">" | "<=" | ">=" | "=") as op), [ lhs; rhs ] -> + binop op lhs rhs + | name, args -> + let* arity = arity name in + let* arity = + match arity with + | Some arity -> return (`Fun arity) + | None -> + let* offset = offset name in + let* arity = + match offset with + | Some offset -> + let* () = comment "here should be a partial call" in + return (`Closure offset) + | None -> todo () + in + return arity + in + let argc = List.length args in + (match arity with + | `Fun arity when arity == argc -> + let* _ = + List.fold_left + (fun acc (i, a) -> + let* acc = acc in + let* a = immexpr (arg i) a in + return (a :: acc)) + (return []) + (args |> List.mapi (fun i arg -> i, arg)) + in + let* () = emit (jal ra name) in + return rv + | `Fun arity when argc < arity -> + let* _offset = + List.fold_right + (fun a acc -> + let* _ = acc in + let* a = immexpr (temp 0) a in + let* offset = push a in + return offset) + args + (return 0) + in + let* () = emit (la (arg 0) name) in + let* () = emit (li (arg 1) arity) in + let* () = emit (li (arg 2) argc) in + let* () = emit (mv (arg 3) sp) in + let* () = emit (jal ra "create_closure") in + return rv + | `Closure offset -> + let* _offset = + List.fold_right + (fun a acc -> + let* _ = acc in + let* a = immexpr (temp 0) a in + let* offset = push a in + return offset) + args + (return 0) + in + let* () = emit (ld (arg 0) offset bp) in + let* () = emit (li (arg 1) argc) in + let* () = emit (mv (arg 2) sp) in + let* () = emit (jal ra "closure_apply") in + return rv + | `Fun arity -> + failf + "Too many arguments (%d) are passed for the function %s, expected %d" + argc + name + arity) + in + let cite cond_ then_ else_ = + let* cond_ = immexpr (temp 0) cond_ in + let* then_lbl = lbl ~prefix:"then" in + let* end_lbl = lbl ~prefix:"end" in + let* () = emit (bne cond_ zero then_lbl) in + let* _else_ = aexpr else_ in + let* () = emit (j end_lbl) in + let* () = label then_lbl in + let* _then_ = aexpr then_ in + let* () = label end_lbl in + return rv + in + function + | Anf.CImm imm -> immexpr rv imm + | CIte (cond_, then_, else_) -> cite cond_ then_ else_ + | CApp (name, args) -> capp name args + +and aexpr = function + | Anf.AExpr expr -> cexpr expr + | ALet (bind, v, body) -> + let* bind = var bind in + let* v = cexpr v in + let* () = emit (sd v bind bp) in + aexpr body +;; + +let decl rec_flag name args' body = + let* () = + comment + (spf + "%s(%a)" + name + (Format.pp_print_list + ~pp_sep:(fun ppf () -> Format.fprintf ppf ", ") + Format.pp_print_string) + args') + in + let* () = label name in + let* () = emit (mv (temp 0) sp) in + let* () = emit (addi sp sp ~-8) in + let* () = emit (sd ra 0 sp) in + let* () = emit (addi sp sp ~-8) in + let* () = emit (sd bp 0 sp) in + let* () = emit (mv bp (temp 0)) in + let* () = + match rec_flag with + | Ast.Rec -> define name (List.length args') + | Ast.NonRec -> return () + in + let* () = args args' in + let* () = comment (spf "Prelude of %s ended here" name) in + let* _ = aexpr body in + let* () = comment (spf "Body of %s ended here" name) in + let* total = total () in + let* () = emit (addi sp sp ~-total) in + let* () = emit (ld ra ~-8 bp) in + let* () = emit (ld bp ~-16 bp) in + let* () = + match rec_flag with + | Ast.Rec -> return () + | Ast.NonRec -> define name (List.length args') + in + let* () = if name <> "main" then emit ret else emit (jal ra "exit2") in + return () +;; + +let riscv decls = + let ctx = empty in + let init = + let* () = etc ".global main" in + return () + in + let ctx = init ctx |> snd in + let ctx = + List.fold_left + (fun acc (Anf.Decl (rec_flag, name, args, body)) -> + let* _acc = acc in + let* _d = decl rec_flag name args body in + return ()) + (return ()) + decls + ctx + |> snd + in + ctx.code |> List.rev +;; + +let pp_code = + Format.pp_print_list ~pp_sep:Format.pp_print_newline (fun ppf -> function + | Etc l -> Format.fprintf ppf "%s" l + | Label l -> Format.fprintf ppf "%s:" l + | Comment c -> Format.fprintf ppf "# %s:" c + | Instr instr -> Format.fprintf ppf " %a" _pp_instr instr) +;; + +let%expect_test "basic" = + let ast = + Fe.parse + {| + let rec f = fun n -> + if n = 1 then 1 + else (f (n - 1)) * n + ;; + |} + |> Result.map_error (fun err -> Format.printf "Error %s" err) + |> Result.get_ok + in + Format.printf + "%a" + pp_code + (Cc.cc ast + |> fun asts -> + Format.printf + "CC: %a\n\n" + (Format.pp_print_list ~pp_sep:Format.pp_print_newline Ast.pp_top_level) + asts; + asts + |> Ll.ll + |> fun asts -> + Format.printf + "LL: %a\n\n" + (Format.pp_print_list ~pp_sep:Format.pp_print_newline Ast.pp_top_level) + asts; + asts + |> Anf.anf + |> fun asts -> + Format.printf + "ANF %a\n\n" + (Format.pp_print_list ~pp_sep:Format.pp_print_newline Anf.pp_decl) + asts; + asts |> riscv); + [%expect + {| + CC: let rec f = fun n -> if (=) n 1 then 1 else (*) (f ((-) n 1)) n;; + + + LL: let rec f = fun n -> if (=) n 1 then 1 else (*) (f ((-) n 1)) n;; + + + ANF let rec f n = + let sup1 = + (=) n 1 + in + let ite7 = + if sup1 then + 1 + else + let sup4 = + (-) n 1 + in + let sup5 = + (f) sup4 + in + let sup6 = + (*) sup5 n + in + sup6 + in + ite7 + ;; + + + .global main + # f(n): + f: + addi t0, sp, 0 + addi sp, sp, -8 + sd ra, 0(sp) + addi sp, sp, -8 + sd s0, 0(sp) + addi s0, t0, 0 + addi sp, sp, -8 + sd zero, 0(sp) + sd a0, -24(s0) + # Prelude of f ended here: + addi sp, sp, -8 + sd zero, 0(sp) + ld a0, -24(s0) + addi a1, zero, 1 + sub t0, a0, a1 + seqz a0, t0 + sd a0, -32(s0) + addi sp, sp, -8 + sd zero, 0(sp) + ld t0, -32(s0) + bne t0, zero, then0 + addi sp, sp, -8 + sd zero, 0(sp) + ld a0, -24(s0) + addi a1, zero, 1 + sub a0, a0, a1 + sd a0, -48(s0) + addi sp, sp, -8 + sd zero, 0(sp) + ld a0, -48(s0) + jal ra, f + sd a0, -56(s0) + addi sp, sp, -8 + sd zero, 0(sp) + ld a0, -56(s0) + ld a1, -24(s0) + mul a0, a0, a1 + sd a0, -64(s0) + ld a0, -64(s0) + j end0 + then0: + addi a0, zero, 1 + end0: + sd a0, -40(s0) + ld a0, -40(s0) + # Body of f ended here: + addi sp, sp, 64 + ld ra, -8(s0) + ld s0, -16(s0) + ret + |}] +;; diff --git a/DOOML/lib/runtime.c b/DOOML/lib/runtime.c new file mode 100644 index 00000000..0c7c35cd --- /dev/null +++ b/DOOML/lib/runtime.c @@ -0,0 +1,216 @@ +#include +#include +#include +#include + +#define MEM 65536 +#define STACK 16384 + +typedef long int64_t; + +__attribute__((aligned(16))) +int64_t mem[MEM]; +__attribute__((aligned(16))) +int64_t stack[STACK]; + +int64_t *stack_end = stack + STACK; + +static int64_t ptr = 0; +static int64_t *xmalloc(int64_t size) { + int64_t *res = &(mem[ptr]); + ptr += size; + return res; +} + +typedef int64_t *tuple_t; + +tuple_t create_tuple(int64_t size, int64_t init) { + tuple_t tuple = xmalloc(size + 1); + tuple[0] = size; + for (int64_t i = 0; i < size; i++) + tuple[i + 1] = ((int64_t*) init)[i]; + + return tuple; +} + +int64_t tuple_nth(int64_t tuple, int64_t i) { + return ((int64_t*) tuple)[i + 1]; +} + +typedef int64_t *closure_t; + +int64_t call_function(void *func, int64_t nargs, int64_t *args) { + int64_t result; + + if (nargs <= 8) { + asm volatile ( + "li t0, 0\n" + "beq %[nargs], t0, 1f\n" + "ld a0, 0(%[args])\n" + + "li t0, 1\n" + "beq %[nargs], t0, 1f\n" + "ld a1, 8(%[args])\n" + + "li t0, 2\n" + "beq %[nargs], t0, 1f\n" + "ld a2, 16(%[args])\n" + + "li t0, 3\n" + "beq %[nargs], t0, 1f\n" + "ld a3, 24(%[args])\n" + + "li t0, 4\n" + "beq %[nargs], t0, 1f\n" + "ld a4, 32(%[args])\n" + + "li t0, 5\n" + "beq %[nargs], t0, 1f\n" + "ld a5, 40(%[args])\n" + + "li t0, 6\n" + "beq %[nargs], t0, 1f\n" + "ld a6, 48(%[args])\n" + + "li t0, 7\n" + "beq %[nargs], t0, 1f\n" + "ld a7, 56(%[args])\n" + + "1:\n" + "jalr %[func]\n" + + "mv %[result], a0\n" + + : [result] "=r" (result) + : [func] "r" (func), [nargs] "r" (nargs), [args] "r" (args) + : "ra", "t0", "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", + "memory" + ); + } else { + int64_t stack_size = (nargs - 8) * 8; + + asm volatile ( + "addi sp, sp, -32\n" + "sd ra, 24(sp)\n" + "sd s1, 8(sp)\n" + + "mv s1, %[nargs]\n" + "mv s2, %[args]\n" + + "sub sp, sp, %[stack_size]\n" + + "ld a0, 0(s2)\n" + "ld a1, 8(s2)\n" + "ld a2, 16(s2)\n" + "ld a3, 24(s2)\n" + "ld a4, 32(s2)\n" + "ld a5, 40(s2)\n" + "ld a6, 48(s2)\n" + "ld a7, 56(s2)\n" + + "li t0, 64\n" + "add t1, s2, t0\n" + "mv t2, sp\n" + + "li t3, 8\n" + "sub t4, s1, t3\n" + + "copy_loop:\n" + "beqz t4, copy_done\n" + "ld t5, 0(t1)\n" + "sd t5, 0(t2)\n" + "addi t1, t1, 8\n" + "addi t2, t2, 8\n" + "addi t4, t4, -1\n" + "j copy_loop\n" + + "copy_done:\n" + "jalr %[func]\n" + + "mv %[result], a0\n" + + "add sp, sp, %[stack_size]\n" + + "ld s1, 8(sp)\n" + "ld ra, 24(sp)\n" + "addi sp, sp, 32\n" + : [result] "=r" (result) + : [func] "r" (func), [nargs] "r" (nargs), [args] "r" (args), + [stack_size] "r" (stack_size) + : "ra", "t0", "t1", "t2", "t3", "t4", "t5", + "s1", "s2", + "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", + "memory" + ); + } + + return result; +} + +#define debugf printf + +int64_t create_closure(int64_t callee, int64_t arity, int64_t argc, int64_t argv_) { + assert(argc < arity); + + debugf("> create_closure(%ld, %ld, %ld, %ld)\n", callee, arity, argc, argv_); + + int64_t *argv = (int64_t*) argv_; + + closure_t closure = xmalloc(arity + 3); + closure[0] = callee; + closure[1] = arity; + closure[2] = argc; + + for (int64_t i = 0; i < argc; i++) { + closure[i + 3] = argv[i]; + } + debugf("< create_closure() -> %ld\n", (int64_t) closure); + + return (int64_t) closure; +} + +int64_t copy_closure(int64_t closure_) { + debugf("> copy_closure(%ld)\n", closure_); + + closure_t closure = (closure_t) closure_; + int64_t arity = closure[1]; + + closure_t closure2 = xmalloc(arity + 3); + for (int64_t i = 0; i < arity + 3; i++) { + closure2[i] = closure[i]; + } + + debugf("< copy_closure() -> %ld\n", (int64_t) closure2); + + return (int64_t) closure2; +} + +int64_t closure_apply(int64_t closure_, int64_t argc, int64_t argv_) { + debugf("> closure_apply(%ld, %ld, %ld)\n", closure_, argc, argv_); + + int64_t *argv = (int64_t*) argv_; + closure_t closure = (closure_t) (copy_closure (closure_)); + debugf(" closure_apply: closure stats %ld %ld %ld\n", closure[0], closure[1], closure[2]); + int64_t current = closure[2]; + for (int64_t i = 0; i < argc; i++) { + debugf(" closure_apply: arg %ld %ld is %ld in the orig\n", i, argv[i], i + current); + closure[i + current + 3] = argv[i]; + closure[2]++; + } + + if (closure[2] >= closure[1]) { + debugf(" closure_apply: calling %ld %ld %ld\n", closure[0], closure[1], (int64_t) &(closure[3])); + return call_function((void*) closure[0], closure[1], &(closure[3])); + } else { + debugf(" closure_apply: returning a new closure %ld %ld %ld\n", closure[0], closure[1], (int64_t) &(closure[3])); + return (int64_t) closure; + } +} + +void print_int(int64_t n) { + printf("%ld\n", n); +} + +void exit2(void) { + exit(0); +} From b001c51013e0848666324d1bacaaa14d72146182 Mon Sep 17 00:00:00 2001 From: "ignat.sergeev" Date: Sat, 6 Dec 2025 22:20:39 +0300 Subject: [PATCH 11/49] codegen separate --- DOOML/bin/dune | 14 +- DOOML/bin/main.ml | 70 +++----- DOOML/flake.nix | 51 ++++++ DOOML/lib/codegen.ml | 199 +++++++++++++++++++++++ DOOML/lib/codegen.mli | 3 + DOOML/lib/dune | 9 +- DOOML/lib/ir.ml | 21 +++ DOOML/{bin/LL.ml => lib/llvm_wrapper.ml} | 56 ++++++- 8 files changed, 355 insertions(+), 68 deletions(-) create mode 100644 DOOML/flake.nix create mode 100644 DOOML/lib/codegen.ml create mode 100644 DOOML/lib/codegen.mli create mode 100644 DOOML/lib/ir.ml rename DOOML/{bin/LL.ml => lib/llvm_wrapper.ml} (56%) diff --git a/DOOML/bin/dune b/DOOML/bin/dune index 687e70bf..5388d800 100644 --- a/DOOML/bin/dune +++ b/DOOML/bin/dune @@ -1,20 +1,8 @@ -(library - (name LL) - (public_name DOOML.LL) - (modules LL) - (wrapped false) - (libraries - llvm - llvm.analysis - llvm.executionengine - ; - )) - (executable (public_name DOOML) (name main) (modules main) - (libraries LL) + (libraries DOOML) (instrumentation (backend bisect_ppx))) diff --git a/DOOML/bin/main.ml b/DOOML/bin/main.ml index 37ad326d..47ecf25d 100644 --- a/DOOML/bin/main.ml +++ b/DOOML/bin/main.ml @@ -6,55 +6,25 @@ [@@@ocaml.text "/*"] +open DOOML +module Map = Base.Map.Poly + +let failf fmt = Format.kasprintf failwith fmt + +let parse input = + let code = In_channel.with_open_text input In_channel.input_all in + match Fe.parse code with + | Error msg -> Error msg + | Ok ast_list -> Ok (ast_list |> Cc.cc |> Ll.ll |> Anf.anf) +;; + let () = - let context = Llvm.global_context () in - let builder = Llvm.builder context in - let () = assert (Llvm_executionengine.initialize ()) in - let the_module = Llvm.create_module context "main" in - Llvm.set_target_triple "x86_64-pc-linux-gnu" the_module; - let _the_execution_engine = Llvm_executionengine.create the_module in - let module LL = (val LL.make context builder the_module) in - let i64_type = Llvm.i64_type context in - let void_type = Llvm.void_type context in - let ptr_type = Llvm.pointer_type context in - let prepare_main () = - let ft = - (* TODO main has special args *) - let args = Array.make 0 ptr_type in - Llvm.function_type i64_type args - in - let the_function = Llvm.declare_function "main" ft the_module in - (* Create a new basic block to start insertion into. *) - let bb = Llvm.append_block context "entry" the_function in - Llvm.position_at_end bb builder; - (* Add all arguments to the symbol table and create their allocas. *) - (* Finish off the function. *) - let (_ : Llvm.llvalue) = - LL.build_call - (Llvm.function_type void_type [| i64_type |]) - LL.(lookup_func_exn "print_int") - [ Llvm.const_int i64_type 70 ] - in - let (_ : Llvm.llvalue) = Llvm.build_ret (Llvm.const_int i64_type 0) builder in - (* Validate the generated code, checking for consistency. *) - (match Llvm_analysis.verify_function the_function with - | true -> () - | false -> - Stdlib.Format.printf - "invalid function generated\n%s\n" - (Llvm.string_of_llvalue the_function); - Llvm_analysis.assert_valid_function the_function); - (* Optimize the function. *) - (* let (_ : bool) = Llvm.PassManager.run_function the_function the_fpm in *) - (* Llvm.dump_value the_function; *) - () - in - let _ = - Llvm.declare_function - "print_int" - (Llvm.function_type (Llvm.void_type context) [| i64_type |]) - the_module - in - prepare_main (); - Llvm.print_module "out.ll" the_module + match Array.to_list Sys.argv with + | [ _exe; input; output ] -> + let module_ = (match parse input with + | Error msg -> failf "%s" msg + | Ok anf_list -> Codegen.emit_ir anf_list) in + (* Codegen.optimize_ir module_; *) + Llvm.print_module output module_ + | _ -> exit 1 ;; diff --git a/DOOML/flake.nix b/DOOML/flake.nix new file mode 100644 index 00000000..5f87d889 --- /dev/null +++ b/DOOML/flake.nix @@ -0,0 +1,51 @@ +{ + inputs = { + nixpkgs.url = "github:nixos/nixpkgs"; + flake-utils.url = "github:numtide/flake-utils"; + }; + + outputs = { self, nixpkgs, flake-utils }: + flake-utils.lib.eachDefaultSystem (system: + let + pkgs = nixpkgs.legacyPackages.x86_64-linux; + riscv-cross-pkgs = import nixpkgs { + localSystem = "${system}"; + crossSystem = { + config = "riscv64-linux-gnu"; + }; + }; + in { + packages.dooml = pkgs.stdenv.mkDerivation { + name = "dooml"; + version = "0.0"; + src = ./.; + buildInputs = with pkgs; [ + ]; + buildPhase = '' + ls + ''; + installPhase = ''mkdir $out''; + }; + + packages.default = self.packages.${system}.dooml; + devShell = pkgs.mkShell { + name = "dooml"; + packages = with pkgs; [ + # Lsp, treesitter-parsers and debugger + ocamlformat_0_28_1 + + # Build tools + opam + + # Dependencies + gmp + llvm_19 + zlib + libtinfo + ] ++ [ riscv-cross-pkgs.buildPackages.gcc ]; + shellHook = '' + eval $(opam env) + ''; + }; + }); +} diff --git a/DOOML/lib/codegen.ml b/DOOML/lib/codegen.ml new file mode 100644 index 00000000..3ab1e86f --- /dev/null +++ b/DOOML/lib/codegen.ml @@ -0,0 +1,199 @@ +module Map = Base.Map.Poly + +let context = Llvm.global_context () +let builder = Llvm.builder context +let the_module = Llvm.create_module context "main" +open (val Llvm_wrapper.make context builder the_module) + +let failf fmt = Format.kasprintf failwith fmt + +let define_ibinop name build_f = + let func = define_func name i64_type [| i64_type; i64_type |] in + let entry = entry_block func in + position_at_end entry; + (match params func with + | [| lhs; rhs |] -> + let binop = build_f lhs rhs in + build_ret binop |> ignore; + | _ -> assert false); + Llvm_analysis.assert_valid_function func + +let emit_builtins () = + declare_func "print_int" void_type [| i64_type |] |> ignore; + declare_func "create_closure" i64_type [| i64_type; i64_type; i64_type; i64_type |] |> ignore; + declare_func "apply_closure" i64_type [| i64_type; i64_type; i64_type |] |> ignore; + declare_func "exit2" void_type [| void_type |] |> ignore; + define_ibinop "+" build_add; + define_ibinop "-" build_sub; + define_ibinop "*" build_mul; + define_ibinop "/" build_sdiv; + define_ibinop "<" (build_icmp Llvm.Icmp.Slt); + define_ibinop ">" (build_icmp Llvm.Icmp.Sgt); + define_ibinop "<=" (build_icmp Llvm.Icmp.Sle); + define_ibinop ">=" (build_icmp Llvm.Icmp.Sge); + define_ibinop "=" (build_icmp Llvm.Icmp.Eq) + +let emit_immexpr binds = + function + | Anf.ImmNum n -> const_int i64_type n + | Anf.ImmId s -> + (match Map.find binds s with + | Some lv -> lv + | None -> failf "Unbound variable %s" s) + +let emit_capp binds name args = + let app_type = match lookup_func name with + (** binops are defined inside llvm ir and processed as regular functions + they will be inlined **) + | Some func -> `Fun (func, params func |> Array.length) + | None -> (match Map.find binds name with + | Some closure -> `Closure closure + | None -> failf "Unbound application %s" name) + in + let argc = List.length args + in + match app_type with + | `Fun (func, arity) when argc == arity -> + let args_lv = args |> List.fold_left + (fun acc a -> + (emit_immexpr binds a) :: acc) [] + in + build_call ~name:name func args_lv + | `Fun (func, arity) when argc < arity -> + let args_lv = args |> List.fold_left + (fun acc a -> + (emit_immexpr binds a) :: acc) [] + in + let create_closure = lookup_func_exn "create_closure" in + let func = build_pointercast func i64_type ~name:"func_toi64_cast" in + let argc_lv = const_int i64_type argc in + let arity_lv = const_int i64_type arity in + let argv_lv = build_array_alloca ~name:"create_closure_argv" i64_type argc_lv in + args_lv |> List.iteri + (fun i a -> + let el_ptr = build_gep argv_lv [| (const_int i64_type i) |] in + build_store a el_ptr |> ignore); + let argv_lv = build_pointercast argv_lv i64_type ~name:"args_arr_toi64_cast" in + let create_closure_args = [ func; arity_lv; argc_lv; argv_lv ] in + build_call ~name:name create_closure create_closure_args + | `Fun (_, arity) -> + failf + "Too many arguments (%d) are passed for the function %s, expected %d" + argc + name + arity + | `Closure closure -> + let args_lv = args |> List.fold_left + (fun acc a -> + (emit_immexpr binds a) :: acc) [] + in + let apply_closure = lookup_func_exn "apply_closure" in + let argc_lv = const_int i64_type argc in + let argv_lv = build_array_alloca ~name:"apply_closure_argv" i64_type argc_lv in + args_lv |> List.iteri + (fun i a -> + let el_ptr = build_gep argv_lv [| (const_int i64_type i) |] in + build_store a el_ptr |> ignore); + let argv_lv = build_pointercast argv_lv i64_type ~name:"args_arr_toi64_cast" in + let apply_closure_args = [ closure; argc_lv; argv_lv ] in + build_call ~name:name apply_closure apply_closure_args + +let rec emit_cexpr binds = + function + | Anf.CImm imm -> emit_immexpr binds imm + | Anf.CIte (cond_, then_, else_) -> + let cond_lv = emit_immexpr binds cond_ in + let zero = const_int i64_type 0 in + build_icmp Llvm.Icmp.Ne cond_lv zero |> ignore; + + let start_bb = insertion_block () in + let the_function = block_parent start_bb in + + let then_bb = append_block ~name:"then" the_function in + position_at_end then_bb; + let then_lv = emit_aexpr binds then_ in + (** in case new block created inside then **) + let new_then_bb = insertion_block () in + + let else_bb = append_block ~name:"else" the_function in + position_at_end else_bb; + let else_lv = emit_aexpr binds else_ in + (** in case new block created inside else **) + let new_else_bb = insertion_block () in + + let merge_bb = append_block ~name:"merge" the_function in + position_at_end merge_bb; + + let phi_setup = [(then_lv, new_then_bb); (else_lv, new_else_bb)] in + let phi = build_phi ~name:"ifphi" phi_setup in + + position_at_end start_bb; + build_cond_br cond_lv then_bb else_bb |> ignore; + position_at_end new_then_bb; + build_br merge_bb |> ignore; + position_at_end new_else_bb; + build_br merge_bb |> ignore; + position_at_end merge_bb; + + phi + | Anf.CApp (name, args) -> + emit_capp binds name args + +and emit_aexpr binds = function + | Anf.AExpr expr -> emit_cexpr binds expr + | Anf.ALet (pattern, bind, body) -> + let bind_lv = emit_cexpr binds bind in + let binds = Map.update binds pattern ~f:(fun _ -> bind_lv) in + emit_aexpr binds body + + +let emit_decl (decl: Anf.decl) = + match decl with + | Anf.Decl (rec_flag, name, par, body) -> + (if has_toplevel_func name then failf "Function redefinition %s" name); + let declare () = List.map (fun _ -> i64_type) par |> Array.of_list |> declare_func name i64_type in + let f = match rec_flag with + | Ast.Rec -> declare () + | Ast.NonRec -> failf "todo" in + let par_binds = par |> + List.mapi (fun i a -> (i, a)) |> + List.fold_left (fun acc (i, a) -> + (a, (params f).(i)) :: acc) [ ] |> Map.of_alist_exn in + let entry_bb = append_block ~name:"entry" f in + position_at_end entry_bb; + let body = emit_aexpr par_binds body in + (match rec_flag with + | Ast.Rec -> () + | Ast.NonRec -> failf "todo"); + (match name with + | "main" -> + let exit = lookup_func_exn "exit2" in + build_call ~name:"exit" exit [] + | _ -> build_ret body) |> ignore; + Llvm_analysis.assert_valid_function f; + f +;; + +let emit_ir ?(triple = "x86_64-pc-linux-gnu") program = + assert (Llvm_executionengine.initialize ()); + Llvm.set_target_triple triple the_module; + emit_builtins (); + List.iter (fun d -> emit_decl d |> ignore) program; + the_module + +let optimize_ir ?(triple = "x86_64-pc-linux-gnu") module_ = + let target = Llvm_target.Target.by_triple triple in + let machine = + Llvm_target.TargetMachine.create + ~triple:triple target in + let options = Llvm_passbuilder.create_passbuilder_options () in + Llvm_passbuilder.passbuilder_options_set_verify_each options true; + Llvm_passbuilder.passbuilder_options_set_debug_logging options true; + Llvm_passbuilder.passbuilder_options_set_slp_vectorization options true; + Llvm_passbuilder.passbuilder_options_set_merge_functions options true; + Llvm_passbuilder.passbuilder_options_set_inliner_threshold options 2; + (match Llvm_passbuilder.run_passes module_ "default" machine options with + | Error e -> failf "Optimization error %s" e + | Ok () -> ()); + Llvm_passbuilder.dispose_passbuilder_options options; + diff --git a/DOOML/lib/codegen.mli b/DOOML/lib/codegen.mli new file mode 100644 index 00000000..df5a653d --- /dev/null +++ b/DOOML/lib/codegen.mli @@ -0,0 +1,3 @@ +val emit_ir : ?triple:string -> Anf.decl list -> Llvm.llmodule + +val optimize_ir : ?triple:string -> Llvm.llmodule -> unit diff --git a/DOOML/lib/dune b/DOOML/lib/dune index 5981ac2c..86bed58e 100644 --- a/DOOML/lib/dune +++ b/DOOML/lib/dune @@ -1,8 +1,13 @@ (library (name DOOML) (public_name DOOML.Lib) - (modules Anf Ast Builtin Cc Fe Ll Riscv State) - (libraries base stdlib angstrom) + (modules Anf Ast Builtin Cc Fe Ll Riscv State Codegen Llvm_wrapper Ir) + (libraries base stdlib angstrom + llvm + llvm.analysis + llvm.executionengine + llvm.passbuilder + ) (inline_tests) (preprocess (pps ppx_expect ppx_deriving.show ppx_variants_conv ppx_inline_test)) diff --git a/DOOML/lib/ir.ml b/DOOML/lib/ir.ml new file mode 100644 index 00000000..823b3d1c --- /dev/null +++ b/DOOML/lib/ir.ml @@ -0,0 +1,21 @@ +open Llvm_wrapper + +let spf = Format.asprintf +let failf fmt = Format.kasprintf failwith fmt + +let add_builtins (llvm : (module S)) = + let open (val llvm) in + declare_func "print_int" void_type [| i64_type |] |> ignore; + () + +let emit_ir _ _ = + let context = Llvm.global_context () in + let builder = Llvm.builder context in + let () = assert (Llvm_executionengine.initialize ()) in + let the_module = Llvm.create_module context "main" in + Llvm.set_target_triple "x86_64-pc-linux-gnu" the_module; + let _the_execution_engine = Llvm_executionengine.create the_module in + let module Llvm_wrapper = (val Llvm_wrapper.make context builder the_module) in + let llvm = (module Llvm_wrapper : S) in + add_builtins llvm +;; diff --git a/DOOML/bin/LL.ml b/DOOML/lib/llvm_wrapper.ml similarity index 56% rename from DOOML/bin/LL.ml rename to DOOML/lib/llvm_wrapper.ml index 5e2d2da2..860a00b6 100644 --- a/DOOML/bin/LL.ml +++ b/DOOML/lib/llvm_wrapper.ml @@ -14,14 +14,24 @@ module type S = sig val module_ : Llvm.llmodule val builder : Llvm.llbuilder val build_store : Llvm.llvalue -> Llvm.llvalue -> Llvm.llvalue - val build_call : lltype -> ?name:string -> llvalue -> llvalue list -> llvalue + val build_call : ?name:string -> llvalue -> llvalue list -> llvalue val lookup_func_exn : string -> llvalue + val lookup_func : string -> llvalue option + val define_func : string -> Llvm.lltype -> Llvm.lltype array -> Llvm.llvalue + val declare_func : string -> Llvm.lltype -> Llvm.lltype array -> Llvm.llvalue val has_toplevel_func : string -> bool val build_add : ?name:string -> llvalue -> llvalue -> llvalue val build_sub : ?name:string -> llvalue -> llvalue -> llvalue val build_mul : ?name:string -> llvalue -> llvalue -> llvalue val build_sdiv : ?name:string -> llvalue -> llvalue -> llvalue val build_icmp : ?name:string -> Icmp.t -> llvalue -> llvalue -> llvalue + val build_ret : llvalue -> llvalue + val build_br : llbasicblock -> llvalue + val build_cond_br : llvalue -> llbasicblock -> llbasicblock -> llvalue + val build_phi : ?name:string -> (llvalue * llbasicblock) list -> llvalue + + val build_array_alloca : ?name:string -> lltype -> llvalue -> llvalue + val build_gep : ?name:string -> llvalue -> llvalue array -> llvalue (** [set_metadata v kind fmt] sets metadata to value [v] of kind [k]. Returns this value [v]. Useful for attaching debugging *) @@ -37,8 +47,17 @@ module type S = sig val build_inttoptr : ?name:string -> llvalue -> lltype -> llvalue val build_pointercast : ?name:string -> llvalue -> lltype -> llvalue + val position_at_end : llbasicblock -> unit + val append_block : ?name:string -> llvalue -> llbasicblock + val insertion_block : unit -> llbasicblock + (** Just aliases *) + val void_type : Llvm.lltype + val block_parent : Llvm.llbasicblock -> Llvm.llvalue + val entry_block : Llvm.llvalue -> Llvm.llbasicblock + val i64_type : Llvm.lltype + val function_type : lltype -> lltype array -> lltype val const_int : Llvm.lltype -> int -> Llvm.llvalue val params : Llvm.llvalue -> Llvm.llvalue array val pp_value : Format.formatter -> llvalue -> unit @@ -51,8 +70,8 @@ let make context builder module_ = let module_ = module_ let build_store a b = Llvm.build_store a b builder - let build_call typ ?(name = "") f args = - build_call typ f (Array.of_list args) name builder + let build_call ?(name = "") f args = + build_call (type_of f) f (Array.of_list args) name builder ;; let has_toplevel_func fname = @@ -67,6 +86,21 @@ let make context builder module_ = | None -> failwith (sprintf "Function '%s' not found" fname) ;; + let lookup_func fname = lookup_function fname module_ + ;; + + let declare_func name ret params = + Llvm.declare_function + name + (Llvm.function_type ret params) + module_ + + let define_func name ret params = + Llvm.define_function + name + (Llvm.function_type ret params) + module_ + let build_add ?(name = "") l r = build_add l r name builder let build_sub ?(name = "") l r = build_sub l r name builder let build_mul ?(name = "") l r = build_mul l r name builder @@ -75,6 +109,13 @@ let make context builder module_ = let build_ptrtoint ?(name = "") e typ = Llvm.build_ptrtoint e typ name builder let build_inttoptr ?(name = "") e typ = Llvm.build_inttoptr e typ name builder let build_pointercast ?(name = "") f typ = Llvm.build_pointercast f typ name builder + let build_ret v = build_ret v builder + let build_br bb = build_br bb builder + let build_cond_br c tb fb = build_cond_br c tb fb builder + let build_phi ?(name = "") rules = build_phi rules name builder + + let build_array_alloca ?(name = "") typ n = Llvm.build_array_alloca typ n name builder + let build_gep ?(name = "") v ind = Llvm.build_gep (type_of v) v ind name builder let set_metadata v kind fmt = Format.kasprintf @@ -84,7 +125,16 @@ let make context builder module_ = fmt ;; + let position_at_end bb = Llvm.position_at_end bb builder + let insertion_block () = Llvm.insertion_block builder + let append_block ?(name = "") f = Llvm.append_block context name f + (* Aliases *) + let block_parent = Llvm.block_parent + let entry_block = Llvm.entry_block + let void_type = Llvm.void_type context + let i64_type = Llvm.i64_type context + let function_type = Llvm.function_type let const_int = Llvm.const_int let params = Llvm.params let pp_value ppf x = Format.fprintf ppf "%s" (Llvm.string_of_llvalue x) From 5dc27ba459bd3bffd15f0b3cd4bea25ee48b3dc5 Mon Sep 17 00:00:00 2001 From: "ignat.sergeev" Date: Sun, 18 Jan 2026 19:58:24 +0300 Subject: [PATCH 12/49] fix builtin --- DOOML/lib/codegen.ml | 1 + DOOML/lib/llvm_wrapper.ml | 2 ++ 2 files changed, 3 insertions(+) diff --git a/DOOML/lib/codegen.ml b/DOOML/lib/codegen.ml index 3ab1e86f..72cc1b41 100644 --- a/DOOML/lib/codegen.ml +++ b/DOOML/lib/codegen.ml @@ -14,6 +14,7 @@ let define_ibinop name build_f = (match params func with | [| lhs; rhs |] -> let binop = build_f lhs rhs in + let binop = build_intcast binop i64_type in build_ret binop |> ignore; | _ -> assert false); Llvm_analysis.assert_valid_function func diff --git a/DOOML/lib/llvm_wrapper.ml b/DOOML/lib/llvm_wrapper.ml index 860a00b6..867568bf 100644 --- a/DOOML/lib/llvm_wrapper.ml +++ b/DOOML/lib/llvm_wrapper.ml @@ -46,6 +46,7 @@ module type S = sig val build_ptrtoint : ?name:string -> llvalue -> lltype -> llvalue val build_inttoptr : ?name:string -> llvalue -> lltype -> llvalue val build_pointercast : ?name:string -> llvalue -> lltype -> llvalue + val build_intcast : ?name:string -> llvalue -> lltype -> llvalue val position_at_end : llbasicblock -> unit val append_block : ?name:string -> llvalue -> llbasicblock @@ -109,6 +110,7 @@ let make context builder module_ = let build_ptrtoint ?(name = "") e typ = Llvm.build_ptrtoint e typ name builder let build_inttoptr ?(name = "") e typ = Llvm.build_inttoptr e typ name builder let build_pointercast ?(name = "") f typ = Llvm.build_pointercast f typ name builder + let build_intcast ?(name = "") v typ = Llvm.build_intcast v typ name builder let build_ret v = build_ret v builder let build_br bb = build_br bb builder let build_cond_br c tb fb = build_cond_br c tb fb builder From 3b80f8a4c8ec6e8a4f6016f9eba73c1bd3734ca5 Mon Sep 17 00:00:00 2001 From: "ignat.sergeev" Date: Sun, 18 Jan 2026 20:52:21 +0300 Subject: [PATCH 13/49] fix function type lookup --- DOOML/lib/codegen.ml | 42 +++++++++++++++++++-------------------- DOOML/lib/llvm_wrapper.ml | 26 +++++++++++++++++++----- 2 files changed, 41 insertions(+), 27 deletions(-) diff --git a/DOOML/lib/codegen.ml b/DOOML/lib/codegen.ml index 72cc1b41..5ff063e2 100644 --- a/DOOML/lib/codegen.ml +++ b/DOOML/lib/codegen.ml @@ -7,14 +7,14 @@ open (val Llvm_wrapper.make context builder the_module) let failf fmt = Format.kasprintf failwith fmt -let define_ibinop name build_f = - let func = define_func name i64_type [| i64_type; i64_type |] in +let define_ibinop name ret build_f = + let typ = function_type ret [| i64_type; i64_type |] in + let func = define_func name (Llvm.return_type typ) (Llvm.param_types typ) in let entry = entry_block func in position_at_end entry; (match params func with | [| lhs; rhs |] -> let binop = build_f lhs rhs in - let binop = build_intcast binop i64_type in build_ret binop |> ignore; | _ -> assert false); Llvm_analysis.assert_valid_function func @@ -23,16 +23,15 @@ let emit_builtins () = declare_func "print_int" void_type [| i64_type |] |> ignore; declare_func "create_closure" i64_type [| i64_type; i64_type; i64_type; i64_type |] |> ignore; declare_func "apply_closure" i64_type [| i64_type; i64_type; i64_type |] |> ignore; - declare_func "exit2" void_type [| void_type |] |> ignore; - define_ibinop "+" build_add; - define_ibinop "-" build_sub; - define_ibinop "*" build_mul; - define_ibinop "/" build_sdiv; - define_ibinop "<" (build_icmp Llvm.Icmp.Slt); - define_ibinop ">" (build_icmp Llvm.Icmp.Sgt); - define_ibinop "<=" (build_icmp Llvm.Icmp.Sle); - define_ibinop ">=" (build_icmp Llvm.Icmp.Sge); - define_ibinop "=" (build_icmp Llvm.Icmp.Eq) + define_ibinop "+" i64_type build_add; + define_ibinop "-" i64_type build_sub; + define_ibinop "*" i64_type build_mul; + define_ibinop "/" i64_type build_sdiv; + define_ibinop "<" i1_type (build_icmp Llvm.Icmp.Slt); + define_ibinop ">" i1_type (build_icmp Llvm.Icmp.Sgt); + define_ibinop "<=" i1_type (build_icmp Llvm.Icmp.Sle); + define_ibinop ">=" i1_type (build_icmp Llvm.Icmp.Sge); + define_ibinop "=" i1_type (build_icmp Llvm.Icmp.Eq) let emit_immexpr binds = function @@ -59,7 +58,8 @@ let emit_capp binds name args = (fun acc a -> (emit_immexpr binds a) :: acc) [] in - build_call ~name:name func args_lv + let typ = lookup_func_type_exn name in + build_call ~name:name typ func args_lv | `Fun (func, arity) when argc < arity -> let args_lv = args |> List.fold_left (fun acc a -> @@ -76,7 +76,8 @@ let emit_capp binds name args = build_store a el_ptr |> ignore); let argv_lv = build_pointercast argv_lv i64_type ~name:"args_arr_toi64_cast" in let create_closure_args = [ func; arity_lv; argc_lv; argv_lv ] in - build_call ~name:name create_closure create_closure_args + let typ = lookup_func_type_exn "create_closure" in + build_call ~name:name typ create_closure create_closure_args | `Fun (_, arity) -> failf "Too many arguments (%d) are passed for the function %s, expected %d" @@ -97,14 +98,15 @@ let emit_capp binds name args = build_store a el_ptr |> ignore); let argv_lv = build_pointercast argv_lv i64_type ~name:"args_arr_toi64_cast" in let apply_closure_args = [ closure; argc_lv; argv_lv ] in - build_call ~name:name apply_closure apply_closure_args + let typ = lookup_func_type_exn "apply_closure" in + build_call ~name:name typ apply_closure apply_closure_args let rec emit_cexpr binds = function | Anf.CImm imm -> emit_immexpr binds imm | Anf.CIte (cond_, then_, else_) -> let cond_lv = emit_immexpr binds cond_ in - let zero = const_int i64_type 0 in + let zero = const_int i1_type 0 in build_icmp Llvm.Icmp.Ne cond_lv zero |> ignore; let start_bb = insertion_block () in @@ -166,11 +168,7 @@ let emit_decl (decl: Anf.decl) = (match rec_flag with | Ast.Rec -> () | Ast.NonRec -> failf "todo"); - (match name with - | "main" -> - let exit = lookup_func_exn "exit2" in - build_call ~name:"exit" exit [] - | _ -> build_ret body) |> ignore; + build_ret body |> ignore; Llvm_analysis.assert_valid_function f; f ;; diff --git a/DOOML/lib/llvm_wrapper.ml b/DOOML/lib/llvm_wrapper.ml index 867568bf..7490ae78 100644 --- a/DOOML/lib/llvm_wrapper.ml +++ b/DOOML/lib/llvm_wrapper.ml @@ -14,9 +14,10 @@ module type S = sig val module_ : Llvm.llmodule val builder : Llvm.llbuilder val build_store : Llvm.llvalue -> Llvm.llvalue -> Llvm.llvalue - val build_call : ?name:string -> llvalue -> llvalue list -> llvalue + val build_call : ?name:string -> lltype -> llvalue -> llvalue list -> llvalue val lookup_func_exn : string -> llvalue val lookup_func : string -> llvalue option + val lookup_func_type_exn : string -> lltype val define_func : string -> Llvm.lltype -> Llvm.lltype array -> Llvm.llvalue val declare_func : string -> Llvm.lltype -> Llvm.lltype array -> Llvm.llvalue val has_toplevel_func : string -> bool @@ -58,6 +59,7 @@ module type S = sig val block_parent : Llvm.llbasicblock -> Llvm.llvalue val entry_block : Llvm.llvalue -> Llvm.llbasicblock val i64_type : Llvm.lltype + val i1_type : Llvm.lltype val function_type : lltype -> lltype array -> lltype val const_int : Llvm.lltype -> int -> Llvm.llvalue val params : Llvm.llvalue -> Llvm.llvalue array @@ -69,10 +71,12 @@ let make context builder module_ = let context = context let builder = builder let module_ = module_ + let func_types : (string, Llvm.lltype) Hashtbl.t = Hashtbl.create 100 + let build_store a b = Llvm.build_store a b builder - let build_call ?(name = "") f args = - build_call (type_of f) f (Array.of_list args) name builder + let build_call ?(name = "") typ f args = + build_call typ f (Array.of_list args) name builder ;; let has_toplevel_func fname = @@ -87,19 +91,30 @@ let make context builder module_ = | None -> failwith (sprintf "Function '%s' not found" fname) ;; + let add_func_type name typ = Hashtbl.add func_types name typ + let lookup_func_type_exn name = + match Hashtbl.find_opt func_types name with + | Some t -> t + | None -> failwith (sprintf "Function '%s' not found" name) + + let lookup_func fname = lookup_function fname module_ ;; let declare_func name ret params = + let typ = Llvm.function_type ret params in + add_func_type name typ; Llvm.declare_function name - (Llvm.function_type ret params) + typ module_ let define_func name ret params = + let typ = Llvm.function_type ret params in + add_func_type name typ; Llvm.define_function name - (Llvm.function_type ret params) + typ module_ let build_add ?(name = "") l r = build_add l r name builder @@ -136,6 +151,7 @@ let make context builder module_ = let entry_block = Llvm.entry_block let void_type = Llvm.void_type context let i64_type = Llvm.i64_type context + let i1_type = Llvm.i1_type context let function_type = Llvm.function_type let const_int = Llvm.const_int let params = Llvm.params From d2316569723eefabdbb1dca6f0c51af3b1b8fabf Mon Sep 17 00:00:00 2001 From: "ignat.sergeev" Date: Mon, 19 Jan 2026 00:12:24 +0300 Subject: [PATCH 14/49] fix void call --- DOOML/bin/main.ml | 4 ++-- DOOML/lib/codegen.ml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/DOOML/bin/main.ml b/DOOML/bin/main.ml index 47ecf25d..ae43e714 100644 --- a/DOOML/bin/main.ml +++ b/DOOML/bin/main.ml @@ -23,8 +23,8 @@ let () = | [ _exe; input; output ] -> let module_ = (match parse input with | Error msg -> failf "%s" msg - | Ok anf_list -> Codegen.emit_ir anf_list) in + | Ok anf_list -> Codegen.emit_ir ~triple:"riscv64-unknown-linux-gnu" anf_list) in (* Codegen.optimize_ir module_; *) - Llvm.print_module output module_ + Llvm.print_module output module_ | _ -> exit 1 ;; diff --git a/DOOML/lib/codegen.ml b/DOOML/lib/codegen.ml index 5ff063e2..12f24c5c 100644 --- a/DOOML/lib/codegen.ml +++ b/DOOML/lib/codegen.ml @@ -59,7 +59,7 @@ let emit_capp binds name args = (emit_immexpr binds a) :: acc) [] in let typ = lookup_func_type_exn name in - build_call ~name:name typ func args_lv + build_call typ func args_lv | `Fun (func, arity) when argc < arity -> let args_lv = args |> List.fold_left (fun acc a -> From 5b36f18c6915db3d879a32c986beb523915ba610 Mon Sep 17 00:00:00 2001 From: "ignat.sergeev" Date: Mon, 19 Jan 2026 00:14:08 +0300 Subject: [PATCH 15/49] formatted --- DOOML/bin/main.ml | 10 ++++++---- DOOML/lib/codegen.mli | 1 - DOOML/lib/dune | 10 ++++++---- DOOML/lib/ir.ml | 21 --------------------- DOOML/lib/llvm_wrapper.ml | 34 +++++++++++++--------------------- 5 files changed, 25 insertions(+), 51 deletions(-) delete mode 100644 DOOML/lib/ir.ml diff --git a/DOOML/bin/main.ml b/DOOML/bin/main.ml index ae43e714..b88a891a 100644 --- a/DOOML/bin/main.ml +++ b/DOOML/bin/main.ml @@ -21,10 +21,12 @@ let parse input = let () = match Array.to_list Sys.argv with | [ _exe; input; output ] -> - let module_ = (match parse input with - | Error msg -> failf "%s" msg - | Ok anf_list -> Codegen.emit_ir ~triple:"riscv64-unknown-linux-gnu" anf_list) in + let module_ = + match parse input with + | Error msg -> failf "%s" msg + | Ok anf_list -> Codegen.emit_ir ~triple:"riscv64-unknown-linux-gnu" anf_list + in (* Codegen.optimize_ir module_; *) - Llvm.print_module output module_ + Llvm.print_module output module_ | _ -> exit 1 ;; diff --git a/DOOML/lib/codegen.mli b/DOOML/lib/codegen.mli index df5a653d..fb30d97b 100644 --- a/DOOML/lib/codegen.mli +++ b/DOOML/lib/codegen.mli @@ -1,3 +1,2 @@ val emit_ir : ?triple:string -> Anf.decl list -> Llvm.llmodule - val optimize_ir : ?triple:string -> Llvm.llmodule -> unit diff --git a/DOOML/lib/dune b/DOOML/lib/dune index 86bed58e..ab908b88 100644 --- a/DOOML/lib/dune +++ b/DOOML/lib/dune @@ -1,13 +1,15 @@ (library (name DOOML) (public_name DOOML.Lib) - (modules Anf Ast Builtin Cc Fe Ll Riscv State Codegen Llvm_wrapper Ir) - (libraries base stdlib angstrom + (modules Anf Ast Builtin Cc Fe Ll Riscv State Codegen Llvm_wrapper) + (libraries + base + stdlib + angstrom llvm llvm.analysis llvm.executionengine - llvm.passbuilder - ) + llvm.passbuilder) (inline_tests) (preprocess (pps ppx_expect ppx_deriving.show ppx_variants_conv ppx_inline_test)) diff --git a/DOOML/lib/ir.ml b/DOOML/lib/ir.ml deleted file mode 100644 index 823b3d1c..00000000 --- a/DOOML/lib/ir.ml +++ /dev/null @@ -1,21 +0,0 @@ -open Llvm_wrapper - -let spf = Format.asprintf -let failf fmt = Format.kasprintf failwith fmt - -let add_builtins (llvm : (module S)) = - let open (val llvm) in - declare_func "print_int" void_type [| i64_type |] |> ignore; - () - -let emit_ir _ _ = - let context = Llvm.global_context () in - let builder = Llvm.builder context in - let () = assert (Llvm_executionengine.initialize ()) in - let the_module = Llvm.create_module context "main" in - Llvm.set_target_triple "x86_64-pc-linux-gnu" the_module; - let _the_execution_engine = Llvm_executionengine.create the_module in - let module Llvm_wrapper = (val Llvm_wrapper.make context builder the_module) in - let llvm = (module Llvm_wrapper : S) in - add_builtins llvm -;; diff --git a/DOOML/lib/llvm_wrapper.ml b/DOOML/lib/llvm_wrapper.ml index 7490ae78..fc6a24bc 100644 --- a/DOOML/lib/llvm_wrapper.ml +++ b/DOOML/lib/llvm_wrapper.ml @@ -30,7 +30,6 @@ module type S = sig val build_br : llbasicblock -> llvalue val build_cond_br : llvalue -> llbasicblock -> llbasicblock -> llvalue val build_phi : ?name:string -> (llvalue * llbasicblock) list -> llvalue - val build_array_alloca : ?name:string -> lltype -> llvalue -> llvalue val build_gep : ?name:string -> llvalue -> llvalue array -> llvalue @@ -48,7 +47,6 @@ module type S = sig val build_inttoptr : ?name:string -> llvalue -> lltype -> llvalue val build_pointercast : ?name:string -> llvalue -> lltype -> llvalue val build_intcast : ?name:string -> llvalue -> lltype -> llvalue - val position_at_end : llbasicblock -> unit val append_block : ?name:string -> llvalue -> llbasicblock val insertion_block : unit -> llbasicblock @@ -72,7 +70,6 @@ let make context builder module_ = let builder = builder let module_ = module_ let func_types : (string, Llvm.lltype) Hashtbl.t = Hashtbl.create 100 - let build_store a b = Llvm.build_store a b builder let build_call ?(name = "") typ f args = @@ -92,30 +89,26 @@ let make context builder module_ = ;; let add_func_type name typ = Hashtbl.add func_types name typ - let lookup_func_type_exn name = - match Hashtbl.find_opt func_types name with - | Some t -> t - | None -> failwith (sprintf "Function '%s' not found" name) + let lookup_func_type_exn name = + match Hashtbl.find_opt func_types name with + | Some t -> t + | None -> failwith (sprintf "Function '%s' not found" name) + ;; let lookup_func fname = lookup_function fname module_ - ;; let declare_func name ret params = - let typ = Llvm.function_type ret params in - add_func_type name typ; - Llvm.declare_function - name - typ - module_ + let typ = Llvm.function_type ret params in + add_func_type name typ; + Llvm.declare_function name typ module_ + ;; let define_func name ret params = - let typ = Llvm.function_type ret params in - add_func_type name typ; - Llvm.define_function - name - typ - module_ + let typ = Llvm.function_type ret params in + add_func_type name typ; + Llvm.define_function name typ module_ + ;; let build_add ?(name = "") l r = build_add l r name builder let build_sub ?(name = "") l r = build_sub l r name builder @@ -130,7 +123,6 @@ let make context builder module_ = let build_br bb = build_br bb builder let build_cond_br c tb fb = build_cond_br c tb fb builder let build_phi ?(name = "") rules = build_phi rules name builder - let build_array_alloca ?(name = "") typ n = Llvm.build_array_alloca typ n name builder let build_gep ?(name = "") v ind = Llvm.build_gep (type_of v) v ind name builder From d6345ebeb1bce1305b54d9b76b529c941b1b0678 Mon Sep 17 00:00:00 2001 From: "ignat.sergeev" Date: Mon, 19 Jan 2026 00:30:02 +0300 Subject: [PATCH 16/49] add basic test --- DOOML/lib/codegen.ml | 157 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 156 insertions(+), 1 deletion(-) diff --git a/DOOML/lib/codegen.ml b/DOOML/lib/codegen.ml index 12f24c5c..b2aaf59d 100644 --- a/DOOML/lib/codegen.ml +++ b/DOOML/lib/codegen.ml @@ -194,5 +194,160 @@ let optimize_ir ?(triple = "x86_64-pc-linux-gnu") module_ = (match Llvm_passbuilder.run_passes module_ "default" machine options with | Error e -> failf "Optimization error %s" e | Ok () -> ()); - Llvm_passbuilder.dispose_passbuilder_options options; + Llvm_passbuilder.dispose_passbuilder_options options +let pp_module ppf module_= + Format.fprintf ppf "%s" (Llvm.string_of_llmodule module_) + +let%expect_test "basic" = + let ast = + Fe.parse + {| + let rec f = fun n -> + if n = 1 then 1 + else (f (n - 1)) * n + ;; + |} + |> Result.map_error (fun err -> Format.printf "Error %s" err) + |> Result.get_ok + in + Format.printf + "%a" + pp_module + (Cc.cc ast + |> fun asts -> + Format.printf + "CC: %a\n\n" + (Format.pp_print_list ~pp_sep:Format.pp_print_newline Ast.pp_top_level) + asts; + asts + |> Ll.ll + |> fun asts -> + Format.printf + "LL: %a\n\n" + (Format.pp_print_list ~pp_sep:Format.pp_print_newline Ast.pp_top_level) + asts; + asts + |> Anf.anf + |> fun asts -> + Format.printf + "ANF %a\n\n" + (Format.pp_print_list ~pp_sep:Format.pp_print_newline Anf.pp_decl) + asts; + asts |> emit_ir); + [%expect + {| + CC: let rec f = fun n -> if (=) n 1 then 1 else (*) (f ((-) n 1)) n;; + + + LL: let rec f = fun n -> if (=) n 1 then 1 else (*) (f ((-) n 1)) n;; + + + ANF let rec f n = + let sup1 = + (=) n 1 + in + let ite7 = + if sup1 then + 1 + else + let sup4 = + (-) n 1 + in + let sup5 = + (f) sup4 + in + let sup6 = + (*) sup5 n + in + sup6 + in + ite7 + ;; + + + ; ModuleID = 'main' + source_filename = "main" + target triple = "x86_64-pc-linux-gnu" + + declare void @print_int(i64) + + declare i64 @create_closure(i64, i64, i64, i64) + + declare i64 @apply_closure(i64, i64, i64) + + define i64 @"+"(i64 %0, i64 %1) { + entry: + %2 = add i64 %0, %1 + ret i64 %2 + } + + define i64 @-(i64 %0, i64 %1) { + entry: + %2 = sub i64 %0, %1 + ret i64 %2 + } + + define i64 @"*"(i64 %0, i64 %1) { + entry: + %2 = mul i64 %0, %1 + ret i64 %2 + } + + define i64 @"/"(i64 %0, i64 %1) { + entry: + %2 = sdiv i64 %0, %1 + ret i64 %2 + } + + define i1 @"<"(i64 %0, i64 %1) { + entry: + %2 = icmp slt i64 %0, %1 + ret i1 %2 + } + + define i1 @">"(i64 %0, i64 %1) { + entry: + %2 = icmp sgt i64 %0, %1 + ret i1 %2 + } + + define i1 @"<="(i64 %0, i64 %1) { + entry: + %2 = icmp sle i64 %0, %1 + ret i1 %2 + } + + define i1 @">="(i64 %0, i64 %1) { + entry: + %2 = icmp sge i64 %0, %1 + ret i1 %2 + } + + define i1 @"="(i64 %0, i64 %1) { + entry: + %2 = icmp eq i64 %0, %1 + ret i1 %2 + } + + define i64 @f(i64 %0) { + entry: + %1 = call i1 @"="(i64 1, i64 %0) + %2 = icmp ne i1 %1, false + br i1 %1, label %then, label %else + + then: ; preds = %entry + br label %merge + + else: ; preds = %entry + %3 = call i64 @-(i64 1, i64 %0) + %4 = call i64 @f(i64 %3) + %5 = call i64 @"*"(i64 %0, i64 %4) + br label %merge + + merge: ; preds = %else, %then + %ifphi = phi i64 [ 1, %then ], [ %5, %else ] + ret i64 %ifphi + } + |}] +;; From 944e369f981f23273a78c52f40cbf75618b2afe1 Mon Sep 17 00:00:00 2001 From: "ignat.sergeev" Date: Mon, 19 Jan 2026 00:30:56 +0300 Subject: [PATCH 17/49] rm unused comment --- DOOML/lib/codegen.ml | 2 -- 1 file changed, 2 deletions(-) diff --git a/DOOML/lib/codegen.ml b/DOOML/lib/codegen.ml index b2aaf59d..11ee5265 100644 --- a/DOOML/lib/codegen.ml +++ b/DOOML/lib/codegen.ml @@ -115,13 +115,11 @@ let rec emit_cexpr binds = let then_bb = append_block ~name:"then" the_function in position_at_end then_bb; let then_lv = emit_aexpr binds then_ in - (** in case new block created inside then **) let new_then_bb = insertion_block () in let else_bb = append_block ~name:"else" the_function in position_at_end else_bb; let else_lv = emit_aexpr binds else_ in - (** in case new block created inside else **) let new_else_bb = insertion_block () in let merge_bb = append_block ~name:"merge" the_function in From fc5df6dde9d2ef346f5b906a4f2e3db4ab2c99bc Mon Sep 17 00:00:00 2001 From: "ignat.sergeev" Date: Mon, 19 Jan 2026 01:30:00 +0300 Subject: [PATCH 18/49] fix function variables --- DOOML/lib/codegen.ml | 57 +++++++++++++++++++++++--------------------- 1 file changed, 30 insertions(+), 27 deletions(-) diff --git a/DOOML/lib/codegen.ml b/DOOML/lib/codegen.ml index 11ee5265..63f2fa3c 100644 --- a/DOOML/lib/codegen.ml +++ b/DOOML/lib/codegen.ml @@ -33,13 +33,30 @@ let emit_builtins () = define_ibinop ">=" i1_type (build_icmp Llvm.Icmp.Sge); define_ibinop "=" i1_type (build_icmp Llvm.Icmp.Eq) +let emit_create_closure func args = + let arity = params func |> Array.length in + let argc = List.length args in + let create_closure = lookup_func_exn "create_closure" in + let func = build_pointercast func i64_type ~name:"func_toi64_cast" in + let argc_lv = const_int i64_type argc in + let argv_lv = build_array_alloca ~name:"create_closure_argv" i64_type argc_lv in + args |> List.iteri + (fun i a -> + let el_ptr = build_gep argv_lv [| (const_int i64_type i) |] in + build_store a el_ptr |> ignore); + let argv_lv = build_pointercast argv_lv i64_type ~name:"args_arr_toi64_cast" in + let arity_lv = const_int i64_type arity in + build_call (lookup_func_type_exn "create_closure") create_closure [ func; arity_lv; argc_lv; argv_lv ] + let emit_immexpr binds = function | Anf.ImmNum n -> const_int i64_type n | Anf.ImmId s -> (match Map.find binds s with | Some lv -> lv - | None -> failf "Unbound variable %s" s) + | None -> (match lookup_func s with + | Some f -> emit_create_closure f [] + | None -> failf "Unbound variable %s" s)) let emit_capp binds name args = let app_type = match lookup_func name with @@ -54,30 +71,16 @@ let emit_capp binds name args = in match app_type with | `Fun (func, arity) when argc == arity -> - let args_lv = args |> List.fold_left - (fun acc a -> - (emit_immexpr binds a) :: acc) [] + let args_lv = args |> List.map + (fun a -> + emit_immexpr binds a) in let typ = lookup_func_type_exn name in build_call typ func args_lv | `Fun (func, arity) when argc < arity -> - let args_lv = args |> List.fold_left - (fun acc a -> - (emit_immexpr binds a) :: acc) [] - in - let create_closure = lookup_func_exn "create_closure" in - let func = build_pointercast func i64_type ~name:"func_toi64_cast" in - let argc_lv = const_int i64_type argc in - let arity_lv = const_int i64_type arity in - let argv_lv = build_array_alloca ~name:"create_closure_argv" i64_type argc_lv in - args_lv |> List.iteri - (fun i a -> - let el_ptr = build_gep argv_lv [| (const_int i64_type i) |] in - build_store a el_ptr |> ignore); - let argv_lv = build_pointercast argv_lv i64_type ~name:"args_arr_toi64_cast" in - let create_closure_args = [ func; arity_lv; argc_lv; argv_lv ] in - let typ = lookup_func_type_exn "create_closure" in - build_call ~name:name typ create_closure create_closure_args + let args = args |> List.map + (fun a -> emit_immexpr binds a) in + emit_create_closure func args | `Fun (_, arity) -> failf "Too many arguments (%d) are passed for the function %s, expected %d" @@ -85,9 +88,9 @@ let emit_capp binds name args = name arity | `Closure closure -> - let args_lv = args |> List.fold_left - (fun acc a -> - (emit_immexpr binds a) :: acc) [] + let args_lv = args |> List.map + (fun a -> + emit_immexpr binds a) in let apply_closure = lookup_func_exn "apply_closure" in let argc_lv = const_int i64_type argc in @@ -99,7 +102,7 @@ let emit_capp binds name args = let argv_lv = build_pointercast argv_lv i64_type ~name:"args_arr_toi64_cast" in let apply_closure_args = [ closure; argc_lv; argv_lv ] in let typ = lookup_func_type_exn "apply_closure" in - build_call ~name:name typ apply_closure apply_closure_args + build_call typ apply_closure apply_closure_args let rec emit_cexpr binds = function @@ -153,9 +156,9 @@ let emit_decl (decl: Anf.decl) = | Anf.Decl (rec_flag, name, par, body) -> (if has_toplevel_func name then failf "Function redefinition %s" name); let declare () = List.map (fun _ -> i64_type) par |> Array.of_list |> declare_func name i64_type in - let f = match rec_flag with + let f = (match rec_flag with | Ast.Rec -> declare () - | Ast.NonRec -> failf "todo" in + | Ast.NonRec -> failf "todo") in let par_binds = par |> List.mapi (fun i a -> (i, a)) |> List.fold_left (fun acc (i, a) -> From f6c8bbe89e511c60717712cc8767888a9a1aa068 Mon Sep 17 00:00:00 2001 From: "ignat.sergeev" Date: Mon, 19 Jan 2026 02:32:42 +0300 Subject: [PATCH 19/49] fix closure_apply --- DOOML/lib/codegen.ml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/DOOML/lib/codegen.ml b/DOOML/lib/codegen.ml index 63f2fa3c..3cf0b793 100644 --- a/DOOML/lib/codegen.ml +++ b/DOOML/lib/codegen.ml @@ -22,7 +22,7 @@ let define_ibinop name ret build_f = let emit_builtins () = declare_func "print_int" void_type [| i64_type |] |> ignore; declare_func "create_closure" i64_type [| i64_type; i64_type; i64_type; i64_type |] |> ignore; - declare_func "apply_closure" i64_type [| i64_type; i64_type; i64_type |] |> ignore; + declare_func "closure_apply" i64_type [| i64_type; i64_type; i64_type |] |> ignore; define_ibinop "+" i64_type build_add; define_ibinop "-" i64_type build_sub; define_ibinop "*" i64_type build_mul; @@ -92,17 +92,17 @@ let emit_capp binds name args = (fun a -> emit_immexpr binds a) in - let apply_closure = lookup_func_exn "apply_closure" in + let closure_apply = lookup_func_exn "closure_apply" in let argc_lv = const_int i64_type argc in - let argv_lv = build_array_alloca ~name:"apply_closure_argv" i64_type argc_lv in + let argv_lv = build_array_alloca ~name:"closure_apply_argv" i64_type argc_lv in args_lv |> List.iteri (fun i a -> let el_ptr = build_gep argv_lv [| (const_int i64_type i) |] in build_store a el_ptr |> ignore); let argv_lv = build_pointercast argv_lv i64_type ~name:"args_arr_toi64_cast" in - let apply_closure_args = [ closure; argc_lv; argv_lv ] in - let typ = lookup_func_type_exn "apply_closure" in - build_call typ apply_closure apply_closure_args + let apply_args = [ closure; argc_lv; argv_lv ] in + let typ = lookup_func_type_exn "closure_apply" in + build_call typ closure_apply apply_args let rec emit_cexpr binds = function From acf89431951740ddf2c0393b83c730ee79da060e Mon Sep 17 00:00:00 2001 From: "ignat.sergeev" Date: Mon, 19 Jan 2026 23:19:10 +0300 Subject: [PATCH 20/49] emit binary --- DOOML/bin/main.ml | 7 ++++--- DOOML/lib/codegen.ml | 8 ++++++++ DOOML/lib/codegen.mli | 1 + DOOML/lib/dune | 3 ++- 4 files changed, 15 insertions(+), 4 deletions(-) diff --git a/DOOML/bin/main.ml b/DOOML/bin/main.ml index b88a891a..7be89440 100644 --- a/DOOML/bin/main.ml +++ b/DOOML/bin/main.ml @@ -21,12 +21,13 @@ let parse input = let () = match Array.to_list Sys.argv with | [ _exe; input; output ] -> + let triple = "riscv64-unknown-linux-gnu" in let module_ = match parse input with | Error msg -> failf "%s" msg - | Ok anf_list -> Codegen.emit_ir ~triple:"riscv64-unknown-linux-gnu" anf_list + | Ok anf_list -> Codegen.emit_ir ~triple anf_list in - (* Codegen.optimize_ir module_; *) - Llvm.print_module output module_ + Codegen.optimize_ir ~triple module_; + Codegen.emit_binary ~triple module_ output | _ -> exit 1 ;; diff --git a/DOOML/lib/codegen.ml b/DOOML/lib/codegen.ml index 3cf0b793..d62786e8 100644 --- a/DOOML/lib/codegen.ml +++ b/DOOML/lib/codegen.ml @@ -179,6 +179,7 @@ let emit_ir ?(triple = "x86_64-pc-linux-gnu") program = Llvm.set_target_triple triple the_module; emit_builtins (); List.iter (fun d -> emit_decl d |> ignore) program; + Llvm_all_backends.initialize (); the_module let optimize_ir ?(triple = "x86_64-pc-linux-gnu") module_ = @@ -197,6 +198,13 @@ let optimize_ir ?(triple = "x86_64-pc-linux-gnu") module_ = | Ok () -> ()); Llvm_passbuilder.dispose_passbuilder_options options +let emit_binary ?(triple = "x86_64-pc-linux-gnu") module_ file = + let target = Llvm_target.Target.by_triple triple in + let machine = + Llvm_target.TargetMachine.create + ~triple:triple target in + Llvm_target.TargetMachine.emit_to_file module_ Llvm_target.CodeGenFileType.ObjectFile file machine + let pp_module ppf module_= Format.fprintf ppf "%s" (Llvm.string_of_llmodule module_) diff --git a/DOOML/lib/codegen.mli b/DOOML/lib/codegen.mli index fb30d97b..af009192 100644 --- a/DOOML/lib/codegen.mli +++ b/DOOML/lib/codegen.mli @@ -1,2 +1,3 @@ val emit_ir : ?triple:string -> Anf.decl list -> Llvm.llmodule val optimize_ir : ?triple:string -> Llvm.llmodule -> unit +val emit_binary : ?triple:string -> Llvm.llmodule -> string -> unit diff --git a/DOOML/lib/dune b/DOOML/lib/dune index ab908b88..5c9a31be 100644 --- a/DOOML/lib/dune +++ b/DOOML/lib/dune @@ -9,7 +9,8 @@ llvm llvm.analysis llvm.executionengine - llvm.passbuilder) + llvm.passbuilder + llvm.all_backends) (inline_tests) (preprocess (pps ppx_expect ppx_deriving.show ppx_variants_conv ppx_inline_test)) From 0d790866612600a4ee0c9ebed4f71235f565828e Mon Sep 17 00:00:00 2001 From: "ignat.sergeev" Date: Mon, 19 Jan 2026 23:22:44 +0300 Subject: [PATCH 21/49] disable llvm verbosity --- DOOML/lib/codegen.ml | 1 - 1 file changed, 1 deletion(-) diff --git a/DOOML/lib/codegen.ml b/DOOML/lib/codegen.ml index d62786e8..647955f7 100644 --- a/DOOML/lib/codegen.ml +++ b/DOOML/lib/codegen.ml @@ -189,7 +189,6 @@ let optimize_ir ?(triple = "x86_64-pc-linux-gnu") module_ = ~triple:triple target in let options = Llvm_passbuilder.create_passbuilder_options () in Llvm_passbuilder.passbuilder_options_set_verify_each options true; - Llvm_passbuilder.passbuilder_options_set_debug_logging options true; Llvm_passbuilder.passbuilder_options_set_slp_vectorization options true; Llvm_passbuilder.passbuilder_options_set_merge_functions options true; Llvm_passbuilder.passbuilder_options_set_inliner_threshold options 2; From f080c832190956bf34ae02ef58a464c726b62ca8 Mon Sep 17 00:00:00 2001 From: "ignat.sergeev" Date: Tue, 20 Jan 2026 02:18:48 +0300 Subject: [PATCH 22/49] non rec support --- DOOML/lib/codegen.ml | 111 +++++++++++++++++++++----------------- DOOML/lib/llvm_wrapper.ml | 35 ------------ 2 files changed, 63 insertions(+), 83 deletions(-) diff --git a/DOOML/lib/codegen.ml b/DOOML/lib/codegen.ml index 647955f7..01b665aa 100644 --- a/DOOML/lib/codegen.ml +++ b/DOOML/lib/codegen.ml @@ -7,6 +7,10 @@ open (val Llvm_wrapper.make context builder the_module) let failf fmt = Format.kasprintf failwith fmt +type visibility = + | Internal + | External + let define_ibinop name ret build_f = let typ = function_type ret [| i64_type; i64_type |] in let func = define_func name (Llvm.return_type typ) (Llvm.param_types typ) in @@ -17,12 +21,23 @@ let define_ibinop name ret build_f = let binop = build_f lhs rhs in build_ret binop |> ignore; | _ -> assert false); - Llvm_analysis.assert_valid_function func + Llvm_analysis.assert_valid_function func; + ( name, (func, typ, External) ) + +let declare_internal name ret params = + let f = declare_func name ret params in + let t = function_type ret params in + ( name, (f, t, Internal ) ) + +let declare_external name ret params = + let f = declare_func name ret params in + let t = function_type ret params in + ( name, (f, t, External ) ) let emit_builtins () = - declare_func "print_int" void_type [| i64_type |] |> ignore; - declare_func "create_closure" i64_type [| i64_type; i64_type; i64_type; i64_type |] |> ignore; - declare_func "closure_apply" i64_type [| i64_type; i64_type; i64_type |] |> ignore; + [ declare_external "print_int" void_type [| i64_type |]; + declare_internal "create_closure" i64_type [| i64_type; i64_type; i64_type; i64_type |]; + declare_internal "closure_apply" i64_type [| i64_type; i64_type; i64_type |]; define_ibinop "+" i64_type build_add; define_ibinop "-" i64_type build_sub; define_ibinop "*" i64_type build_mul; @@ -31,12 +46,12 @@ let emit_builtins () = define_ibinop ">" i1_type (build_icmp Llvm.Icmp.Sgt); define_ibinop "<=" i1_type (build_icmp Llvm.Icmp.Sle); define_ibinop ">=" i1_type (build_icmp Llvm.Icmp.Sge); - define_ibinop "=" i1_type (build_icmp Llvm.Icmp.Eq) + define_ibinop "=" i1_type (build_icmp Llvm.Icmp.Eq) ] |> Map.of_alist_exn -let emit_create_closure func args = +let emit_create_closure funcs func args = let arity = params func |> Array.length in let argc = List.length args in - let create_closure = lookup_func_exn "create_closure" in + let (create_closure, typ, _) = Map.find_exn funcs "create_closure" in let func = build_pointercast func i64_type ~name:"func_toi64_cast" in let argc_lv = const_int i64_type argc in let argv_lv = build_array_alloca ~name:"create_closure_argv" i64_type argc_lv in @@ -46,23 +61,25 @@ let emit_create_closure func args = build_store a el_ptr |> ignore); let argv_lv = build_pointercast argv_lv i64_type ~name:"args_arr_toi64_cast" in let arity_lv = const_int i64_type arity in - build_call (lookup_func_type_exn "create_closure") create_closure [ func; arity_lv; argc_lv; argv_lv ] + build_call typ create_closure [ func; arity_lv; argc_lv; argv_lv ] -let emit_immexpr binds = +let emit_immexpr binds funcs = function | Anf.ImmNum n -> const_int i64_type n | Anf.ImmId s -> (match Map.find binds s with | Some lv -> lv - | None -> (match lookup_func s with - | Some f -> emit_create_closure f [] + | None -> (match Map.find funcs s with + | Some (f, _, External) -> emit_create_closure funcs f [] + | Some _ | None -> failf "Unbound variable %s" s)) -let emit_capp binds name args = - let app_type = match lookup_func name with +let emit_capp binds funcs name args = + let app_type = match Map.find funcs name with (** binops are defined inside llvm ir and processed as regular functions they will be inlined **) - | Some func -> `Fun (func, params func |> Array.length) + | Some (func, typ, External) -> `Fun (func, typ, params func |> Array.length) + | Some _ | None -> (match Map.find binds name with | Some closure -> `Closure closure | None -> failf "Unbound application %s" name) @@ -70,18 +87,17 @@ let emit_capp binds name args = let argc = List.length args in match app_type with - | `Fun (func, arity) when argc == arity -> + | `Fun (func, typ, arity) when argc == arity -> let args_lv = args |> List.map (fun a -> - emit_immexpr binds a) + emit_immexpr binds funcs a) in - let typ = lookup_func_type_exn name in build_call typ func args_lv - | `Fun (func, arity) when argc < arity -> + | `Fun (func, _, arity) when argc < arity -> let args = args |> List.map - (fun a -> emit_immexpr binds a) in - emit_create_closure func args - | `Fun (_, arity) -> + (fun a -> emit_immexpr binds funcs a) in + emit_create_closure funcs func args + | `Fun (_, _, arity) -> failf "Too many arguments (%d) are passed for the function %s, expected %d" argc @@ -90,9 +106,9 @@ let emit_capp binds name args = | `Closure closure -> let args_lv = args |> List.map (fun a -> - emit_immexpr binds a) + emit_immexpr binds funcs a) in - let closure_apply = lookup_func_exn "closure_apply" in + let (closure_apply, typ, _) = Map.find_exn funcs "closure_apply" in let argc_lv = const_int i64_type argc in let argv_lv = build_array_alloca ~name:"closure_apply_argv" i64_type argc_lv in args_lv |> List.iteri @@ -101,14 +117,13 @@ let emit_capp binds name args = build_store a el_ptr |> ignore); let argv_lv = build_pointercast argv_lv i64_type ~name:"args_arr_toi64_cast" in let apply_args = [ closure; argc_lv; argv_lv ] in - let typ = lookup_func_type_exn "closure_apply" in build_call typ closure_apply apply_args -let rec emit_cexpr binds = +let rec emit_cexpr binds funcs = function - | Anf.CImm imm -> emit_immexpr binds imm + | Anf.CImm imm -> emit_immexpr binds funcs imm | Anf.CIte (cond_, then_, else_) -> - let cond_lv = emit_immexpr binds cond_ in + let cond_lv = emit_immexpr binds funcs cond_ in let zero = const_int i1_type 0 in build_icmp Llvm.Icmp.Ne cond_lv zero |> ignore; @@ -117,12 +132,12 @@ let rec emit_cexpr binds = let then_bb = append_block ~name:"then" the_function in position_at_end then_bb; - let then_lv = emit_aexpr binds then_ in + let then_lv = emit_aexpr binds funcs then_ in let new_then_bb = insertion_block () in let else_bb = append_block ~name:"else" the_function in position_at_end else_bb; - let else_lv = emit_aexpr binds else_ in + let else_lv = emit_aexpr binds funcs else_ in let new_else_bb = insertion_block () in let merge_bb = append_block ~name:"merge" the_function in @@ -141,44 +156,44 @@ let rec emit_cexpr binds = phi | Anf.CApp (name, args) -> - emit_capp binds name args + emit_capp binds funcs name args -and emit_aexpr binds = function - | Anf.AExpr expr -> emit_cexpr binds expr +and emit_aexpr binds funcs = function + | Anf.AExpr expr -> emit_cexpr binds funcs expr | Anf.ALet (pattern, bind, body) -> - let bind_lv = emit_cexpr binds bind in + let bind_lv = emit_cexpr binds funcs bind in let binds = Map.update binds pattern ~f:(fun _ -> bind_lv) in - emit_aexpr binds body - + emit_aexpr binds funcs body -let emit_decl (decl: Anf.decl) = +let emit_decl funcs (decl: Anf.decl) = match decl with | Anf.Decl (rec_flag, name, par, body) -> - (if has_toplevel_func name then failf "Function redefinition %s" name); - let declare () = List.map (fun _ -> i64_type) par |> Array.of_list |> declare_func name i64_type in - let f = (match rec_flag with - | Ast.Rec -> declare () - | Ast.NonRec -> failf "todo") in + (if Map.find funcs name != None then failf "Function redefinition %s" name); + let (_, funcs_entry) = List.map (fun _ -> i64_type) par |> Array.of_list |> declare_external name i64_type in + let (f, _, _) = funcs_entry in + let funcs = (match rec_flag with + | Ast.Rec -> funcs |> Map.add_exn ~key:name ~data:funcs_entry + | Ast.NonRec -> funcs) in let par_binds = par |> List.mapi (fun i a -> (i, a)) |> List.fold_left (fun acc (i, a) -> (a, (params f).(i)) :: acc) [ ] |> Map.of_alist_exn in let entry_bb = append_block ~name:"entry" f in position_at_end entry_bb; - let body = emit_aexpr par_binds body in - (match rec_flag with - | Ast.Rec -> () - | Ast.NonRec -> failf "todo"); + let body = emit_aexpr par_binds funcs body in build_ret body |> ignore; + let funcs = (match rec_flag with + | Ast.Rec -> funcs + | Ast.NonRec -> funcs |> Map.add_exn ~key:name ~data:funcs_entry) in Llvm_analysis.assert_valid_function f; - f + funcs ;; let emit_ir ?(triple = "x86_64-pc-linux-gnu") program = assert (Llvm_executionengine.initialize ()); Llvm.set_target_triple triple the_module; - emit_builtins (); - List.iter (fun d -> emit_decl d |> ignore) program; + let funcs = emit_builtins () in + List.fold_left (fun funcs d -> emit_decl funcs d) funcs program |> ignore; Llvm_all_backends.initialize (); the_module diff --git a/DOOML/lib/llvm_wrapper.ml b/DOOML/lib/llvm_wrapper.ml index fc6a24bc..73aa487d 100644 --- a/DOOML/lib/llvm_wrapper.ml +++ b/DOOML/lib/llvm_wrapper.ml @@ -15,12 +15,8 @@ module type S = sig val builder : Llvm.llbuilder val build_store : Llvm.llvalue -> Llvm.llvalue -> Llvm.llvalue val build_call : ?name:string -> lltype -> llvalue -> llvalue list -> llvalue - val lookup_func_exn : string -> llvalue - val lookup_func : string -> llvalue option - val lookup_func_type_exn : string -> lltype val define_func : string -> Llvm.lltype -> Llvm.lltype array -> Llvm.llvalue val declare_func : string -> Llvm.lltype -> Llvm.lltype array -> Llvm.llvalue - val has_toplevel_func : string -> bool val build_add : ?name:string -> llvalue -> llvalue -> llvalue val build_sub : ?name:string -> llvalue -> llvalue -> llvalue val build_mul : ?name:string -> llvalue -> llvalue -> llvalue @@ -43,10 +39,7 @@ module type S = sig (* ?? *) - val build_ptrtoint : ?name:string -> llvalue -> lltype -> llvalue - val build_inttoptr : ?name:string -> llvalue -> lltype -> llvalue val build_pointercast : ?name:string -> llvalue -> lltype -> llvalue - val build_intcast : ?name:string -> llvalue -> lltype -> llvalue val position_at_end : llbasicblock -> unit val append_block : ?name:string -> llvalue -> llbasicblock val insertion_block : unit -> llbasicblock @@ -69,44 +62,19 @@ let make context builder module_ = let context = context let builder = builder let module_ = module_ - let func_types : (string, Llvm.lltype) Hashtbl.t = Hashtbl.create 100 let build_store a b = Llvm.build_store a b builder let build_call ?(name = "") typ f args = build_call typ f (Array.of_list args) name builder ;; - let has_toplevel_func fname = - match lookup_function fname module_ with - | Some _ -> true - | None -> false - ;; - - let lookup_func_exn fname = - match lookup_function fname module_ with - | Some f -> f - | None -> failwith (sprintf "Function '%s' not found" fname) - ;; - - let add_func_type name typ = Hashtbl.add func_types name typ - - let lookup_func_type_exn name = - match Hashtbl.find_opt func_types name with - | Some t -> t - | None -> failwith (sprintf "Function '%s' not found" name) - ;; - - let lookup_func fname = lookup_function fname module_ - let declare_func name ret params = let typ = Llvm.function_type ret params in - add_func_type name typ; Llvm.declare_function name typ module_ ;; let define_func name ret params = let typ = Llvm.function_type ret params in - add_func_type name typ; Llvm.define_function name typ module_ ;; @@ -115,10 +83,7 @@ let make context builder module_ = let build_mul ?(name = "") l r = build_mul l r name builder let build_sdiv ?(name = "") l r = build_sdiv l r name builder let build_icmp ?(name = "") op l r = build_icmp op l r name builder - let build_ptrtoint ?(name = "") e typ = Llvm.build_ptrtoint e typ name builder - let build_inttoptr ?(name = "") e typ = Llvm.build_inttoptr e typ name builder let build_pointercast ?(name = "") f typ = Llvm.build_pointercast f typ name builder - let build_intcast ?(name = "") v typ = Llvm.build_intcast v typ name builder let build_ret v = build_ret v builder let build_br bb = build_br bb builder let build_cond_br c tb fb = build_cond_br c tb fb builder From a63f35edd9c8eddfe430884770b434b553692914 Mon Sep 17 00:00:00 2001 From: "ignat.sergeev" Date: Tue, 20 Jan 2026 02:27:26 +0300 Subject: [PATCH 23/49] rm unused open --- DOOML/lib/llvm_wrapper.ml | 1 - 1 file changed, 1 deletion(-) diff --git a/DOOML/lib/llvm_wrapper.ml b/DOOML/lib/llvm_wrapper.ml index 73aa487d..2d786121 100644 --- a/DOOML/lib/llvm_wrapper.ml +++ b/DOOML/lib/llvm_wrapper.ml @@ -7,7 +7,6 @@ [@@@ocaml.text "/*"] open Llvm -open Printf module type S = sig val context : Llvm.llcontext From 0f1d2c10aa945603cf1f0debf7375d7b1e52211f Mon Sep 17 00:00:00 2001 From: "ignat.sergeev" Date: Tue, 20 Jan 2026 03:56:35 +0300 Subject: [PATCH 24/49] report error on multiple params with same name --- DOOML/lib/codegen.ml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/DOOML/lib/codegen.ml b/DOOML/lib/codegen.ml index 01b665aa..1f3fb63a 100644 --- a/DOOML/lib/codegen.ml +++ b/DOOML/lib/codegen.ml @@ -177,7 +177,10 @@ let emit_decl funcs (decl: Anf.decl) = let par_binds = par |> List.mapi (fun i a -> (i, a)) |> List.fold_left (fun acc (i, a) -> - (a, (params f).(i)) :: acc) [ ] |> Map.of_alist_exn in + (a, (params f).(i)) :: acc) [ ] |> Map.of_alist in + let par_binds = match par_binds with + | `Duplicate_key k -> failf "Multiple parameters %s in fun %s" k name + | `Ok m -> m in let entry_bb = append_block ~name:"entry" f in position_at_end entry_bb; let body = emit_aexpr par_binds funcs body in From 5a49d403c5af1365e5aae0e80736fcbf009ce687 Mon Sep 17 00:00:00 2001 From: "ignat.sergeev" Date: Tue, 20 Jan 2026 03:26:50 +0300 Subject: [PATCH 25/49] add features to binary emition --- DOOML/lib/codegen.ml | 4 ++-- DOOML/lib/codegen.mli | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/DOOML/lib/codegen.ml b/DOOML/lib/codegen.ml index 1f3fb63a..36e5065e 100644 --- a/DOOML/lib/codegen.ml +++ b/DOOML/lib/codegen.ml @@ -215,11 +215,11 @@ let optimize_ir ?(triple = "x86_64-pc-linux-gnu") module_ = | Ok () -> ()); Llvm_passbuilder.dispose_passbuilder_options options -let emit_binary ?(triple = "x86_64-pc-linux-gnu") module_ file = +let emit_binary ?(triple = "x86_64-pc-linux-gnu") ?(features = "") module_ file = let target = Llvm_target.Target.by_triple triple in let machine = Llvm_target.TargetMachine.create - ~triple:triple target in + ~triple:triple ~features:features target in Llvm_target.TargetMachine.emit_to_file module_ Llvm_target.CodeGenFileType.ObjectFile file machine let pp_module ppf module_= diff --git a/DOOML/lib/codegen.mli b/DOOML/lib/codegen.mli index af009192..029535e5 100644 --- a/DOOML/lib/codegen.mli +++ b/DOOML/lib/codegen.mli @@ -1,3 +1,3 @@ val emit_ir : ?triple:string -> Anf.decl list -> Llvm.llmodule val optimize_ir : ?triple:string -> Llvm.llmodule -> unit -val emit_binary : ?triple:string -> Llvm.llmodule -> string -> unit +val emit_binary : ?triple:string -> ?features:string -> Llvm.llmodule -> string -> unit From 06fd7c249c9d6821f950fbfce3035fa41c979776 Mon Sep 17 00:00:00 2001 From: "ignat.sergeev" Date: Tue, 20 Jan 2026 04:11:17 +0300 Subject: [PATCH 26/49] fix codegen test --- DOOML/lib/codegen.ml | 66 ++++++++++++++++++++++---------------------- 1 file changed, 33 insertions(+), 33 deletions(-) diff --git a/DOOML/lib/codegen.ml b/DOOML/lib/codegen.ml index 36e5065e..187be301 100644 --- a/DOOML/lib/codegen.ml +++ b/DOOML/lib/codegen.ml @@ -296,34 +296,28 @@ let%expect_test "basic" = source_filename = "main" target triple = "x86_64-pc-linux-gnu" - declare void @print_int(i64) - - declare i64 @create_closure(i64, i64, i64, i64) - - declare i64 @apply_closure(i64, i64, i64) - - define i64 @"+"(i64 %0, i64 %1) { + define i1 @"="(i64 %0, i64 %1) { entry: - %2 = add i64 %0, %1 - ret i64 %2 + %2 = icmp eq i64 %0, %1 + ret i1 %2 } - define i64 @-(i64 %0, i64 %1) { + define i1 @">="(i64 %0, i64 %1) { entry: - %2 = sub i64 %0, %1 - ret i64 %2 + %2 = icmp sge i64 %0, %1 + ret i1 %2 } - define i64 @"*"(i64 %0, i64 %1) { + define i1 @"<="(i64 %0, i64 %1) { entry: - %2 = mul i64 %0, %1 - ret i64 %2 + %2 = icmp sle i64 %0, %1 + ret i1 %2 } - define i64 @"/"(i64 %0, i64 %1) { + define i1 @">"(i64 %0, i64 %1) { entry: - %2 = sdiv i64 %0, %1 - ret i64 %2 + %2 = icmp sgt i64 %0, %1 + ret i1 %2 } define i1 @"<"(i64 %0, i64 %1) { @@ -332,33 +326,39 @@ let%expect_test "basic" = ret i1 %2 } - define i1 @">"(i64 %0, i64 %1) { + define i64 @"/"(i64 %0, i64 %1) { entry: - %2 = icmp sgt i64 %0, %1 - ret i1 %2 + %2 = sdiv i64 %0, %1 + ret i64 %2 } - define i1 @"<="(i64 %0, i64 %1) { + define i64 @"*"(i64 %0, i64 %1) { entry: - %2 = icmp sle i64 %0, %1 - ret i1 %2 + %2 = mul i64 %0, %1 + ret i64 %2 } - define i1 @">="(i64 %0, i64 %1) { + define i64 @-(i64 %0, i64 %1) { entry: - %2 = icmp sge i64 %0, %1 - ret i1 %2 + %2 = sub i64 %0, %1 + ret i64 %2 } - define i1 @"="(i64 %0, i64 %1) { + define i64 @"+"(i64 %0, i64 %1) { entry: - %2 = icmp eq i64 %0, %1 - ret i1 %2 + %2 = add i64 %0, %1 + ret i64 %2 } + declare i64 @closure_apply(i64, i64, i64) + + declare i64 @create_closure(i64, i64, i64, i64) + + declare void @print_int(i64) + define i64 @f(i64 %0) { entry: - %1 = call i1 @"="(i64 1, i64 %0) + %1 = call i1 @"="(i64 %0, i64 1) %2 = icmp ne i1 %1, false br i1 %1, label %then, label %else @@ -366,9 +366,9 @@ let%expect_test "basic" = br label %merge else: ; preds = %entry - %3 = call i64 @-(i64 1, i64 %0) + %3 = call i64 @-(i64 %0, i64 1) %4 = call i64 @f(i64 %3) - %5 = call i64 @"*"(i64 %0, i64 %4) + %5 = call i64 @"*"(i64 %4, i64 %0) br label %merge merge: ; preds = %else, %then From 09da99f177c931a544721135abdbf1c729059789 Mon Sep 17 00:00:00 2001 From: "ignat.sergeev" Date: Tue, 20 Jan 2026 05:10:57 +0300 Subject: [PATCH 27/49] extra ffi non arch specific runtime --- DOOML/lib/codegen.ml | 2 +- DOOML/lib/dune | 4 +- DOOML/lib/riscv-runtime.c | 216 ++++++++++++++++++++++++++++++++++++++ DOOML/lib/runtime.c | 134 ++++------------------- 4 files changed, 242 insertions(+), 114 deletions(-) create mode 100644 DOOML/lib/riscv-runtime.c diff --git a/DOOML/lib/codegen.ml b/DOOML/lib/codegen.ml index 187be301..d58c43a7 100644 --- a/DOOML/lib/codegen.ml +++ b/DOOML/lib/codegen.ml @@ -35,7 +35,7 @@ let declare_external name ret params = ( name, (f, t, External ) ) let emit_builtins () = - [ declare_external "print_int" void_type [| i64_type |]; + [ declare_external "print_int" i64_type [| i64_type |]; declare_internal "create_closure" i64_type [| i64_type; i64_type; i64_type; i64_type |]; declare_internal "closure_apply" i64_type [| i64_type; i64_type; i64_type |]; define_ibinop "+" i64_type build_add; diff --git a/DOOML/lib/dune b/DOOML/lib/dune index 5c9a31be..9333cb99 100644 --- a/DOOML/lib/dune +++ b/DOOML/lib/dune @@ -21,11 +21,11 @@ (targets runtime.so) (deps runtime.c) (action - (run gcc -fPIC -shared %{deps} -o %{targets}))) + (run gcc -fPIC -shared %{deps} -o %{targets} -lffi))) (rule (targets riscv64-runtime.o) - (deps runtime.c) + (deps riscv-runtime.c) (action (run riscv64-linux-gnu-gcc -o %{targets} -c %{deps}))) diff --git a/DOOML/lib/riscv-runtime.c b/DOOML/lib/riscv-runtime.c new file mode 100644 index 00000000..f28351f3 --- /dev/null +++ b/DOOML/lib/riscv-runtime.c @@ -0,0 +1,216 @@ +#include +#include +#include +#include + +#define MEM 65536 +#define STACK 16384 + +typedef long int64_t; + +__attribute__((aligned(16))) +int64_t mem[MEM]; +__attribute__((aligned(16))) +int64_t stack[STACK]; + +int64_t *stack_end = stack + STACK; + +static int64_t ptr = 0; +static int64_t *xmalloc(int64_t size) { + int64_t *res = &(mem[ptr]); + ptr += size; + return res; +} + +typedef int64_t *tuple_t; + +tuple_t create_tuple(int64_t size, int64_t init) { + tuple_t tuple = xmalloc(size + 1); + tuple[0] = size; + for (int64_t i = 0; i < size; i++) + tuple[i + 1] = ((int64_t*) init)[i]; + + return tuple; +} + +int64_t tuple_nth(int64_t tuple, int64_t i) { + return ((int64_t*) tuple)[i + 1]; +} + +typedef int64_t *closure_t; + +int64_t call_function(void *func, int64_t nargs, int64_t *args) { + int64_t result; + + if (nargs <= 8) { + asm volatile ( + "li t0, 0\n" + "beq %[nargs], t0, 1f\n" + "ld a0, 0(%[args])\n" + + "li t0, 1\n" + "beq %[nargs], t0, 1f\n" + "ld a1, 8(%[args])\n" + + "li t0, 2\n" + "beq %[nargs], t0, 1f\n" + "ld a2, 16(%[args])\n" + + "li t0, 3\n" + "beq %[nargs], t0, 1f\n" + "ld a3, 24(%[args])\n" + + "li t0, 4\n" + "beq %[nargs], t0, 1f\n" + "ld a4, 32(%[args])\n" + + "li t0, 5\n" + "beq %[nargs], t0, 1f\n" + "ld a5, 40(%[args])\n" + + "li t0, 6\n" + "beq %[nargs], t0, 1f\n" + "ld a6, 48(%[args])\n" + + "li t0, 7\n" + "beq %[nargs], t0, 1f\n" + "ld a7, 56(%[args])\n" + + "1:\n" + "jalr %[func]\n" + + "mv %[result], a0\n" + + : [result] "=r" (result) + : [func] "r" (func), [nargs] "r" (nargs), [args] "r" (args) + : "ra", "t0", "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", + "memory" + ); + } else { + int64_t stack_size = (nargs - 8) * 8; + + asm volatile ( + "addi sp, sp, -32\n" + "sd ra, 24(sp)\n" + "sd s1, 8(sp)\n" + + "mv s1, %[nargs]\n" + "mv s2, %[args]\n" + + "sub sp, sp, %[stack_size]\n" + + "ld a0, 0(s2)\n" + "ld a1, 8(s2)\n" + "ld a2, 16(s2)\n" + "ld a3, 24(s2)\n" + "ld a4, 32(s2)\n" + "ld a5, 40(s2)\n" + "ld a6, 48(s2)\n" + "ld a7, 56(s2)\n" + + "li t0, 64\n" + "add t1, s2, t0\n" + "mv t2, sp\n" + + "li t3, 8\n" + "sub t4, s1, t3\n" + + "copy_loop:\n" + "beqz t4, copy_done\n" + "ld t5, 0(t1)\n" + "sd t5, 0(t2)\n" + "addi t1, t1, 8\n" + "addi t2, t2, 8\n" + "addi t4, t4, -1\n" + "j copy_loop\n" + + "copy_done:\n" + "jalr %[func]\n" + + "mv %[result], a0\n" + + "add sp, sp, %[stack_size]\n" + + "ld s1, 8(sp)\n" + "ld ra, 24(sp)\n" + "addi sp, sp, 32\n" + : [result] "=r" (result) + : [func] "r" (func), [nargs] "r" (nargs), [args] "r" (args), + [stack_size] "r" (stack_size) + : "ra", "t0", "t1", "t2", "t3", "t4", "t5", + "s1", "s2", + "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", + "memory" + ); + } + + return result; +} + +#define debugf printf + +int64_t create_closure(int64_t callee, int64_t arity, int64_t argc, int64_t argv_) { + assert(argc < arity); + + debugf("> create_closure(%ld, %ld, %ld, %ld)\n", callee, arity, argc, argv_); + + int64_t *argv = (int64_t*) argv_; + + closure_t closure = xmalloc(arity + 3); + closure[0] = callee; + closure[1] = arity; + closure[2] = argc; + + for (int64_t i = 0; i < argc; i++) { + closure[i + 3] = argv[i]; + } + debugf("< create_closure() -> %ld\n", (int64_t) closure); + + return (int64_t) closure; +} + +int64_t copy_closure(int64_t closure_) { + debugf("> copy_closure(%ld)\n", closure_); + + closure_t closure = (closure_t) closure_; + int64_t arity = closure[1]; + + closure_t closure2 = xmalloc(arity + 3); + for (int64_t i = 0; i < arity + 3; i++) { + closure2[i] = closure[i]; + } + + debugf("< copy_closure() -> %ld\n", (int64_t) closure2); + + return (int64_t) closure2; +} + +int64_t closure_apply(int64_t closure_, int64_t argc, int64_t argv_) { + debugf("> closure_apply(%ld, %ld, %ld)\n", closure_, argc, argv_); + + int64_t *argv = (int64_t*) argv_; + closure_t closure = (closure_t) (copy_closure (closure_)); + debugf(" closure_apply: closure stats %ld %ld %ld\n", closure[0], closure[1], closure[2]); + int64_t current = closure[2]; + for (int64_t i = 0; i < argc; i++) { + debugf(" closure_apply: arg %ld %ld is %ld in the orig\n", i, argv[i], i + current); + closure[i + current + 3] = argv[i]; + closure[2]++; + } + + if (closure[2] >= closure[1]) { + debugf(" closure_apply: calling %ld %ld %ld\n", closure[0], closure[1], (int64_t) &(closure[3])); + return call_function((void*) closure[0], closure[1], &(closure[3])); + } else { + debugf(" closure_apply: returning a new closure %ld %ld %ld\n", closure[0], closure[1], (int64_t) &(closure[3])); + return (int64_t) closure; + } +} + +int64_t print_int(int64_t n) { + return printf("%ld\n", n); +} + +void exit2(void) { + exit(0); +} diff --git a/DOOML/lib/runtime.c b/DOOML/lib/runtime.c index 0c7c35cd..06316ec2 100644 --- a/DOOML/lib/runtime.c +++ b/DOOML/lib/runtime.c @@ -1,6 +1,7 @@ #include #include #include +#include #include #define MEM 65536 @@ -39,114 +40,6 @@ int64_t tuple_nth(int64_t tuple, int64_t i) { typedef int64_t *closure_t; -int64_t call_function(void *func, int64_t nargs, int64_t *args) { - int64_t result; - - if (nargs <= 8) { - asm volatile ( - "li t0, 0\n" - "beq %[nargs], t0, 1f\n" - "ld a0, 0(%[args])\n" - - "li t0, 1\n" - "beq %[nargs], t0, 1f\n" - "ld a1, 8(%[args])\n" - - "li t0, 2\n" - "beq %[nargs], t0, 1f\n" - "ld a2, 16(%[args])\n" - - "li t0, 3\n" - "beq %[nargs], t0, 1f\n" - "ld a3, 24(%[args])\n" - - "li t0, 4\n" - "beq %[nargs], t0, 1f\n" - "ld a4, 32(%[args])\n" - - "li t0, 5\n" - "beq %[nargs], t0, 1f\n" - "ld a5, 40(%[args])\n" - - "li t0, 6\n" - "beq %[nargs], t0, 1f\n" - "ld a6, 48(%[args])\n" - - "li t0, 7\n" - "beq %[nargs], t0, 1f\n" - "ld a7, 56(%[args])\n" - - "1:\n" - "jalr %[func]\n" - - "mv %[result], a0\n" - - : [result] "=r" (result) - : [func] "r" (func), [nargs] "r" (nargs), [args] "r" (args) - : "ra", "t0", "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", - "memory" - ); - } else { - int64_t stack_size = (nargs - 8) * 8; - - asm volatile ( - "addi sp, sp, -32\n" - "sd ra, 24(sp)\n" - "sd s1, 8(sp)\n" - - "mv s1, %[nargs]\n" - "mv s2, %[args]\n" - - "sub sp, sp, %[stack_size]\n" - - "ld a0, 0(s2)\n" - "ld a1, 8(s2)\n" - "ld a2, 16(s2)\n" - "ld a3, 24(s2)\n" - "ld a4, 32(s2)\n" - "ld a5, 40(s2)\n" - "ld a6, 48(s2)\n" - "ld a7, 56(s2)\n" - - "li t0, 64\n" - "add t1, s2, t0\n" - "mv t2, sp\n" - - "li t3, 8\n" - "sub t4, s1, t3\n" - - "copy_loop:\n" - "beqz t4, copy_done\n" - "ld t5, 0(t1)\n" - "sd t5, 0(t2)\n" - "addi t1, t1, 8\n" - "addi t2, t2, 8\n" - "addi t4, t4, -1\n" - "j copy_loop\n" - - "copy_done:\n" - "jalr %[func]\n" - - "mv %[result], a0\n" - - "add sp, sp, %[stack_size]\n" - - "ld s1, 8(sp)\n" - "ld ra, 24(sp)\n" - "addi sp, sp, 32\n" - : [result] "=r" (result) - : [func] "r" (func), [nargs] "r" (nargs), [args] "r" (args), - [stack_size] "r" (stack_size) - : "ra", "t0", "t1", "t2", "t3", "t4", "t5", - "s1", "s2", - "a0", "a1", "a2", "a3", "a4", "a5", "a6", "a7", - "memory" - ); - } - - return result; -} - #define debugf printf int64_t create_closure(int64_t callee, int64_t arity, int64_t argc, int64_t argv_) { @@ -200,15 +93,34 @@ int64_t closure_apply(int64_t closure_, int64_t argc, int64_t argv_) { if (closure[2] >= closure[1]) { debugf(" closure_apply: calling %ld %ld %ld\n", closure[0], closure[1], (int64_t) &(closure[3])); - return call_function((void*) closure[0], closure[1], &(closure[3])); + + ffi_cif func; + + int64_t arity = closure[1]; + ffi_type *args_t[arity]; + void *args_v[arity]; + for (int64_t i = 0; i < arity; i++) { + args_t[i] = &ffi_type_sint64; + args_v[i] = &closure[i + 3]; + } + + if (ffi_prep_cif(&func, FFI_DEFAULT_ABI, arity, &ffi_type_sint64, args_t) != FFI_OK) { + debugf("closure call failed"); + exit(1); + } + + ffi_sarg ret; + ffi_call(&func, FFI_FN(closure[0]), &ret, args_v); + + return ret; } else { debugf(" closure_apply: returning a new closure %ld %ld %ld\n", closure[0], closure[1], (int64_t) &(closure[3])); return (int64_t) closure; } } -void print_int(int64_t n) { - printf("%ld\n", n); +int64_t print_int(int64_t n) { + return printf("%ld\n", n); } void exit2(void) { From 55bf9a7da1fda80a983b146e41668b3909b341a3 Mon Sep 17 00:00:00 2001 From: "ignat.sergeev" Date: Tue, 20 Jan 2026 05:26:13 +0300 Subject: [PATCH 28/49] specify riscv features --- DOOML/bin/main.ml | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/DOOML/bin/main.ml b/DOOML/bin/main.ml index 7be89440..5fce87af 100644 --- a/DOOML/bin/main.ml +++ b/DOOML/bin/main.ml @@ -21,13 +21,14 @@ let parse input = let () = match Array.to_list Sys.argv with | [ _exe; input; output ] -> - let triple = "riscv64-unknown-linux-gnu" in + let riscv_triple = "riscv64-unknown-linux-gnu" in + let riscv_features = "+d" in let module_ = match parse input with | Error msg -> failf "%s" msg - | Ok anf_list -> Codegen.emit_ir ~triple anf_list + | Ok anf_list -> Codegen.emit_ir ~triple:riscv_triple anf_list in - Codegen.optimize_ir ~triple module_; - Codegen.emit_binary ~triple module_ output + Codegen.optimize_ir ~triple:riscv_triple module_; + Codegen.emit_binary ~triple:riscv_triple ~features:riscv_features module_ output | _ -> exit 1 ;; From 2b6b2202d1e702ebedd6b4d5e8a613f8f1dc21fd Mon Sep 17 00:00:00 2001 From: "ignat.sergeev" Date: Tue, 20 Jan 2026 20:52:13 +0300 Subject: [PATCH 29/49] merge runtime files --- DOOML/lib/dune | 16 ++- DOOML/lib/runtime/any-call-runtime.c | 26 +++++ DOOML/lib/runtime/call-runtime.h | 3 + .../riscv-call-runtime.c} | 108 +----------------- DOOML/lib/{ => runtime}/runtime.c | 24 +--- 5 files changed, 44 insertions(+), 133 deletions(-) create mode 100644 DOOML/lib/runtime/any-call-runtime.c create mode 100644 DOOML/lib/runtime/call-runtime.h rename DOOML/lib/{riscv-runtime.c => runtime/riscv-call-runtime.c} (52%) rename DOOML/lib/{ => runtime}/runtime.c (84%) diff --git a/DOOML/lib/dune b/DOOML/lib/dune index 9333cb99..e5a7befd 100644 --- a/DOOML/lib/dune +++ b/DOOML/lib/dune @@ -19,18 +19,24 @@ (rule (targets runtime.so) - (deps runtime.c) + (deps runtime/runtime.c runtime/call-runtime.h runtime/any-call-runtime.c) (action (run gcc -fPIC -shared %{deps} -o %{targets} -lffi))) (rule - (targets riscv64-runtime.o) - (deps riscv-runtime.c) + (targets riscv-call-runtime.o) + (deps runtime/riscv-call-runtime.c runtime/call-runtime.h) (action - (run riscv64-linux-gnu-gcc -o %{targets} -c %{deps}))) + (run riscv64-linux-gnu-gcc -c %{deps}))) + +(rule + (targets runtime.o) + (deps runtime/runtime.c runtime/call-runtime.h) + (action + (run riscv64-linux-gnu-gcc -c %{deps}))) (rule (targets riscv64-runtime.a) - (deps riscv64-runtime.o) + (deps runtime.o riscv-call-runtime.o) (action (run riscv64-linux-gnu-ar rcs %{targets} %{deps}))) diff --git a/DOOML/lib/runtime/any-call-runtime.c b/DOOML/lib/runtime/any-call-runtime.c new file mode 100644 index 00000000..8c089c06 --- /dev/null +++ b/DOOML/lib/runtime/any-call-runtime.c @@ -0,0 +1,26 @@ +#include "call-runtime.h" +#include +#include +#include + +int64_t call_function(void *func, int64_t arity, int64_t *args) { + ffi_cif cif; + + ffi_type *args_t[arity]; + void *args_v[arity]; + for (int64_t i = 0; i < arity; i++) { + args_t[i] = &ffi_type_sint64; + args_v[i] = &args[i]; + } + + if (ffi_prep_cif(&cif, FFI_DEFAULT_ABI, arity, &ffi_type_sint64, args_t) != FFI_OK) { + fprintf(stderr, "closure call failed"); + exit(1); + } + + ffi_sarg ret; + ffi_call(&cif, FFI_FN(func), &ret, args_v); + + return ret; +} + diff --git a/DOOML/lib/runtime/call-runtime.h b/DOOML/lib/runtime/call-runtime.h new file mode 100644 index 00000000..6a2b9353 --- /dev/null +++ b/DOOML/lib/runtime/call-runtime.h @@ -0,0 +1,3 @@ +typedef long int64_t; + +int64_t call_function(void *func, int64_t nargs, int64_t *args); diff --git a/DOOML/lib/riscv-runtime.c b/DOOML/lib/runtime/riscv-call-runtime.c similarity index 52% rename from DOOML/lib/riscv-runtime.c rename to DOOML/lib/runtime/riscv-call-runtime.c index f28351f3..bcb8eb88 100644 --- a/DOOML/lib/riscv-runtime.c +++ b/DOOML/lib/runtime/riscv-call-runtime.c @@ -1,43 +1,4 @@ -#include -#include -#include -#include - -#define MEM 65536 -#define STACK 16384 - -typedef long int64_t; - -__attribute__((aligned(16))) -int64_t mem[MEM]; -__attribute__((aligned(16))) -int64_t stack[STACK]; - -int64_t *stack_end = stack + STACK; - -static int64_t ptr = 0; -static int64_t *xmalloc(int64_t size) { - int64_t *res = &(mem[ptr]); - ptr += size; - return res; -} - -typedef int64_t *tuple_t; - -tuple_t create_tuple(int64_t size, int64_t init) { - tuple_t tuple = xmalloc(size + 1); - tuple[0] = size; - for (int64_t i = 0; i < size; i++) - tuple[i + 1] = ((int64_t*) init)[i]; - - return tuple; -} - -int64_t tuple_nth(int64_t tuple, int64_t i) { - return ((int64_t*) tuple)[i + 1]; -} - -typedef int64_t *closure_t; +#include "call-runtime.h" int64_t call_function(void *func, int64_t nargs, int64_t *args) { int64_t result; @@ -147,70 +108,3 @@ int64_t call_function(void *func, int64_t nargs, int64_t *args) { return result; } -#define debugf printf - -int64_t create_closure(int64_t callee, int64_t arity, int64_t argc, int64_t argv_) { - assert(argc < arity); - - debugf("> create_closure(%ld, %ld, %ld, %ld)\n", callee, arity, argc, argv_); - - int64_t *argv = (int64_t*) argv_; - - closure_t closure = xmalloc(arity + 3); - closure[0] = callee; - closure[1] = arity; - closure[2] = argc; - - for (int64_t i = 0; i < argc; i++) { - closure[i + 3] = argv[i]; - } - debugf("< create_closure() -> %ld\n", (int64_t) closure); - - return (int64_t) closure; -} - -int64_t copy_closure(int64_t closure_) { - debugf("> copy_closure(%ld)\n", closure_); - - closure_t closure = (closure_t) closure_; - int64_t arity = closure[1]; - - closure_t closure2 = xmalloc(arity + 3); - for (int64_t i = 0; i < arity + 3; i++) { - closure2[i] = closure[i]; - } - - debugf("< copy_closure() -> %ld\n", (int64_t) closure2); - - return (int64_t) closure2; -} - -int64_t closure_apply(int64_t closure_, int64_t argc, int64_t argv_) { - debugf("> closure_apply(%ld, %ld, %ld)\n", closure_, argc, argv_); - - int64_t *argv = (int64_t*) argv_; - closure_t closure = (closure_t) (copy_closure (closure_)); - debugf(" closure_apply: closure stats %ld %ld %ld\n", closure[0], closure[1], closure[2]); - int64_t current = closure[2]; - for (int64_t i = 0; i < argc; i++) { - debugf(" closure_apply: arg %ld %ld is %ld in the orig\n", i, argv[i], i + current); - closure[i + current + 3] = argv[i]; - closure[2]++; - } - - if (closure[2] >= closure[1]) { - debugf(" closure_apply: calling %ld %ld %ld\n", closure[0], closure[1], (int64_t) &(closure[3])); - return call_function((void*) closure[0], closure[1], &(closure[3])); - } else { - debugf(" closure_apply: returning a new closure %ld %ld %ld\n", closure[0], closure[1], (int64_t) &(closure[3])); - return (int64_t) closure; - } -} - -int64_t print_int(int64_t n) { - return printf("%ld\n", n); -} - -void exit2(void) { - exit(0); -} diff --git a/DOOML/lib/runtime.c b/DOOML/lib/runtime/runtime.c similarity index 84% rename from DOOML/lib/runtime.c rename to DOOML/lib/runtime/runtime.c index 06316ec2..fe15ea61 100644 --- a/DOOML/lib/runtime.c +++ b/DOOML/lib/runtime/runtime.c @@ -1,7 +1,8 @@ +#include "call-runtime.h" + #include #include #include -#include #include #define MEM 65536 @@ -93,26 +94,7 @@ int64_t closure_apply(int64_t closure_, int64_t argc, int64_t argv_) { if (closure[2] >= closure[1]) { debugf(" closure_apply: calling %ld %ld %ld\n", closure[0], closure[1], (int64_t) &(closure[3])); - - ffi_cif func; - - int64_t arity = closure[1]; - ffi_type *args_t[arity]; - void *args_v[arity]; - for (int64_t i = 0; i < arity; i++) { - args_t[i] = &ffi_type_sint64; - args_v[i] = &closure[i + 3]; - } - - if (ffi_prep_cif(&func, FFI_DEFAULT_ABI, arity, &ffi_type_sint64, args_t) != FFI_OK) { - debugf("closure call failed"); - exit(1); - } - - ffi_sarg ret; - ffi_call(&func, FFI_FN(closure[0]), &ret, args_v); - - return ret; + return call_function((void*) closure[0], closure[1], &(closure[3])); } else { debugf(" closure_apply: returning a new closure %ld %ld %ld\n", closure[0], closure[1], (int64_t) &(closure[3])); return (int64_t) closure; From 28318ddb62f787ddc110c78098d5873f505221a0 Mon Sep 17 00:00:00 2001 From: "ignat.sergeev" Date: Fri, 23 Jan 2026 04:40:30 +0300 Subject: [PATCH 30/49] rm flake --- DOOML/flake.nix | 51 ------------------------------------------------- 1 file changed, 51 deletions(-) delete mode 100644 DOOML/flake.nix diff --git a/DOOML/flake.nix b/DOOML/flake.nix deleted file mode 100644 index 5f87d889..00000000 --- a/DOOML/flake.nix +++ /dev/null @@ -1,51 +0,0 @@ -{ - inputs = { - nixpkgs.url = "github:nixos/nixpkgs"; - flake-utils.url = "github:numtide/flake-utils"; - }; - - outputs = { self, nixpkgs, flake-utils }: - flake-utils.lib.eachDefaultSystem (system: - let - pkgs = nixpkgs.legacyPackages.x86_64-linux; - riscv-cross-pkgs = import nixpkgs { - localSystem = "${system}"; - crossSystem = { - config = "riscv64-linux-gnu"; - }; - }; - in { - packages.dooml = pkgs.stdenv.mkDerivation { - name = "dooml"; - version = "0.0"; - src = ./.; - buildInputs = with pkgs; [ - ]; - buildPhase = '' - ls - ''; - installPhase = ''mkdir $out''; - }; - - packages.default = self.packages.${system}.dooml; - devShell = pkgs.mkShell { - name = "dooml"; - packages = with pkgs; [ - # Lsp, treesitter-parsers and debugger - ocamlformat_0_28_1 - - # Build tools - opam - - # Dependencies - gmp - llvm_19 - zlib - libtinfo - ] ++ [ riscv-cross-pkgs.buildPackages.gcc ]; - shellHook = '' - eval $(opam env) - ''; - }; - }); -} From 15f8331fd0774773b94303371dba57d5018f2430 Mon Sep 17 00:00:00 2001 From: "ignat.sergeev" Date: Mon, 19 Jan 2026 19:08:49 +0300 Subject: [PATCH 31/49] basic tuple expr support --- DOOML/lib/anf.ml | 4 +++- DOOML/lib/ast.ml | 11 +++++++++-- DOOML/lib/cc.ml | 4 +++- DOOML/lib/fe.ml | 17 +++++++++++++---- DOOML/lib/ll.ml | 4 +++- 5 files changed, 31 insertions(+), 9 deletions(-) diff --git a/DOOML/lib/anf.ml b/DOOML/lib/anf.ml index cf320508..63385a86 100644 --- a/DOOML/lib/anf.ml +++ b/DOOML/lib/anf.ml @@ -107,7 +107,7 @@ let rec arg = function | Ast.PUnit -> return ("()", []) | Ast.Plug -> return ("_", []) | Ast.Ident name -> return (name, []) - | Ast.Tuple els -> + | Ast.PTuple els -> let* els = List.fold_right (fun a acc -> @@ -133,6 +133,8 @@ let rec anf (k : immexpr -> Ctx.t -> aexpr * Ctx.t) = function let* () = addsym s in let* ret = k (immid s) in return ret + | Tuple _ -> + failwith "todo tuples anf" | App _ as app -> let rec aux immexprs = function | Ast.Var s -> diff --git a/DOOML/lib/ast.ml b/DOOML/lib/ast.ml index 9700203f..e9f1ae70 100644 --- a/DOOML/lib/ast.ml +++ b/DOOML/lib/ast.ml @@ -20,14 +20,14 @@ type pattern = | PUnit (** () *) | Plug (** _ *) | Ident of ident - | Tuple of pattern list + | PTuple of pattern list [@@deriving variants] let rec pp_pattern ppf = function | PUnit -> Format.fprintf ppf "()" | Plug -> Format.fprintf ppf "_" | Ident s -> Format.fprintf ppf "%s" s - | Tuple ss -> + | PTuple ss -> Format.fprintf ppf "(%a)" (Format.pp_print_list ~pp_sep:pp_sep_quote pp_pattern) ss ;; @@ -41,6 +41,7 @@ type rec_flag = type expr = | Const of int | Var of ident + | Tuple of expr list | App of expr * expr | Let of rec_flag * pattern * expr * expr | Ite of expr * expr * expr @@ -55,6 +56,12 @@ let fun_ args = function let rec pp_expr ppf = function | Const c -> Format.fprintf ppf "%d" c | Var ident -> Format.fprintf ppf "%a" pp_ident ident + | Tuple exprs -> + Format.fprintf + ppf + "(%a)" + (Format.pp_print_list ~pp_sep:pp_sep_quote pp_expr) + exprs | App ((Fun _ as f), arg) -> Format.fprintf ppf "(%a) %a" pp_expr f pp_expr arg | App (f, (Const _ as arg)) | App (f, (Var _ as arg)) -> Format.fprintf ppf "%a %a" pp_expr f pp_expr arg diff --git a/DOOML/lib/cc.ml b/DOOML/lib/cc.ml index 7ef43863..fbfaf5d3 100644 --- a/DOOML/lib/cc.ml +++ b/DOOML/lib/cc.ml @@ -11,7 +11,7 @@ module Ctx = struct let rec of_pattern = function | Ast.PUnit | Plug -> Scope.empty | Ident s -> Scope.singleton s - | Tuple patterns -> + | PTuple patterns -> List.map of_pattern patterns |> List.fold_left Scope.union Scope.empty ;; @@ -96,6 +96,8 @@ let rec cc = function | Var name as v -> let* () = capture name in return v + | Tuple _ -> + failwith "todo tuples cc" | Fun (args', body') -> (* fix rec here *) let* ctx' = of_args args' in diff --git a/DOOML/lib/fe.ml b/DOOML/lib/fe.ml index 37472fa1..540283ac 100644 --- a/DOOML/lib/fe.ml +++ b/DOOML/lib/fe.ml @@ -111,18 +111,18 @@ let ident = let punit = (string "()" |> token) *> return Ast.punit let plug = (string "_" |> token) *> return Ast.plug -let tuple pattern = +let ptuple pattern = let tuple = let* fpattern = pattern in let* patterns = many (token (char ',') *> pattern) in - return (Ast.tuple (fpattern :: patterns)) + return (Ast.ptuple (fpattern :: patterns)) in parens tuple ;; let pattern = fix (fun pattern -> - punit <|> plug <|> tuple pattern <|> (ident >>= fun ident -> return (Ast.ident ident))) + punit <|> plug <|> ptuple pattern <|> (ident >>= fun ident -> return (Ast.ident ident))) ;; let const = @@ -200,6 +200,15 @@ let ite expr = Ast.ite cond then_ else_ |> return ;; +let tuple expr = + let tuple = + let* fexpr = expr in + let* exprs = many (token (char ',') *> expr) in + return (Ast.tuple (fexpr :: exprs)) + in + parens tuple +;; + let expr = fix (fun expr -> let expr' = @@ -207,7 +216,7 @@ let expr = match c with | Some '0' .. '9' -> const | Some '(' -> - let* r = parens expr <|> var in + let* r = parens expr <|> tuple expr <|> var in r |> return | _ -> var in diff --git a/DOOML/lib/ll.ml b/DOOML/lib/ll.ml index 9f706109..4df28a4d 100644 --- a/DOOML/lib/ll.ml +++ b/DOOML/lib/ll.ml @@ -21,7 +21,7 @@ module Ctx = struct let rec of_pattern = function | Ast.PUnit | Plug -> Scope.empty | Ident s -> Scope.singleton s - | Tuple patterns -> + | PTuple patterns -> List.map of_pattern patterns |> List.fold_left Scope.union Scope.empty ;; @@ -54,6 +54,8 @@ open State let rec ll = function | Ast.Const _ as c -> return c | Var _ as v -> return v + | Tuple _ -> + failwith "todo tuples ll" | Fun (args', body') -> let* body' = ll body' in let* f = lift args' body' in From 589906ab20731114933efb26112e194205648ea9 Mon Sep 17 00:00:00 2001 From: "ignat.sergeev" Date: Mon, 19 Jan 2026 22:14:14 +0300 Subject: [PATCH 32/49] tuples cc,ll,anf --- DOOML/lib/anf.ml | 20 +++++++++++++++++--- DOOML/lib/ast.ml | 8 ++------ DOOML/lib/cc.ml | 12 ++++++++++-- DOOML/lib/codegen.ml | 1 + DOOML/lib/ll.ml | 12 ++++++++++-- DOOML/lib/riscv.ml | 1 + 6 files changed, 41 insertions(+), 13 deletions(-) diff --git a/DOOML/lib/anf.ml b/DOOML/lib/anf.ml index 63385a86..5c1ee073 100644 --- a/DOOML/lib/anf.ml +++ b/DOOML/lib/anf.ml @@ -1,11 +1,18 @@ type immexpr = | ImmNum of int | ImmId of string + | ImmTuple of immexpr list [@@deriving variants] -let pp_immexpr ppf = function +let rec pp_immexpr ppf = function | ImmNum d -> Format.fprintf ppf "%d" d | ImmId s -> Format.fprintf ppf "%s" s + | ImmTuple s -> + Format.fprintf + ppf + "(%a)" + (Format.pp_print_list ~pp_sep:(fun ppf () -> Format.fprintf ppf ", ") pp_immexpr) + s ;; type cexpr = @@ -133,8 +140,15 @@ let rec anf (k : immexpr -> Ctx.t -> aexpr * Ctx.t) = function let* () = addsym s in let* ret = k (immid s) in return ret - | Tuple _ -> - failwith "todo tuples anf" + | Tuple exprs -> + let rec anf_list immexprs = function + | [] -> + let* tsym = gensym () in + let* expr = k (immid tsym) in + return (alet tsym (cimm (immtuple immexprs)) expr) + | hd :: tl -> anf (fun immhd -> anf_list (immhd :: immexprs) tl) hd + in + anf_list [] exprs | App _ as app -> let rec aux immexprs = function | Ast.Var s -> diff --git a/DOOML/lib/ast.ml b/DOOML/lib/ast.ml index e9f1ae70..a710421b 100644 --- a/DOOML/lib/ast.ml +++ b/DOOML/lib/ast.ml @@ -56,12 +56,8 @@ let fun_ args = function let rec pp_expr ppf = function | Const c -> Format.fprintf ppf "%d" c | Var ident -> Format.fprintf ppf "%a" pp_ident ident - | Tuple exprs -> - Format.fprintf - ppf - "(%a)" - (Format.pp_print_list ~pp_sep:pp_sep_quote pp_expr) - exprs + | Tuple exprs -> + Format.fprintf ppf "(%a)" (Format.pp_print_list ~pp_sep:pp_sep_quote pp_expr) exprs | App ((Fun _ as f), arg) -> Format.fprintf ppf "(%a) %a" pp_expr f pp_expr arg | App (f, (Const _ as arg)) | App (f, (Var _ as arg)) -> Format.fprintf ppf "%a %a" pp_expr f pp_expr arg diff --git a/DOOML/lib/cc.ml b/DOOML/lib/cc.ml index fbfaf5d3..8850ab12 100644 --- a/DOOML/lib/cc.ml +++ b/DOOML/lib/cc.ml @@ -96,8 +96,16 @@ let rec cc = function | Var name as v -> let* () = capture name in return v - | Tuple _ -> - failwith "todo tuples cc" + | Tuple exprs -> + let rec cc_list = function + | [] -> return [] + | hd :: tl -> + let* hd = cc hd in + let* tl = cc_list tl in + return (hd :: tl) + in + let* exprs = cc_list exprs in + return (Ast.tuple exprs) | Fun (args', body') -> (* fix rec here *) let* ctx' = of_args args' in diff --git a/DOOML/lib/codegen.ml b/DOOML/lib/codegen.ml index d58c43a7..4910fa76 100644 --- a/DOOML/lib/codegen.ml +++ b/DOOML/lib/codegen.ml @@ -73,6 +73,7 @@ let emit_immexpr binds funcs = | Some (f, _, External) -> emit_create_closure funcs f [] | Some _ | None -> failf "Unbound variable %s" s)) + | Anf.ImmTuple _ -> failf "todo" let emit_capp binds funcs name args = let app_type = match Map.find funcs name with diff --git a/DOOML/lib/ll.ml b/DOOML/lib/ll.ml index 4df28a4d..45fa0315 100644 --- a/DOOML/lib/ll.ml +++ b/DOOML/lib/ll.ml @@ -54,8 +54,16 @@ open State let rec ll = function | Ast.Const _ as c -> return c | Var _ as v -> return v - | Tuple _ -> - failwith "todo tuples ll" + | Tuple exprs -> + let rec ll_list = function + | [] -> return [] + | hd :: tl -> + let* hd = ll hd in + let* tl = ll_list tl in + return (hd :: tl) + in + let* exprs = ll_list exprs in + return (Ast.tuple exprs) | Fun (args', body') -> let* body' = ll body' in let* f = lift args' body' in diff --git a/DOOML/lib/riscv.ml b/DOOML/lib/riscv.ml index 8c744aca..af0e7c5b 100644 --- a/DOOML/lib/riscv.ml +++ b/DOOML/lib/riscv.ml @@ -407,6 +407,7 @@ let immexpr reg = function let* () = emit (ld reg offset bp) in return reg | None -> failf "Unknown variable %s" var) + | Anf.ImmTuple _ -> failf "todo" ;; let rec cexpr = From 6c33b73aa5e70e3834016c6b8bde871bdd4f12a0 Mon Sep 17 00:00:00 2001 From: "ignat.sergeev" Date: Mon, 19 Jan 2026 22:15:58 +0300 Subject: [PATCH 33/49] fix order --- DOOML/lib/anf.ml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DOOML/lib/anf.ml b/DOOML/lib/anf.ml index 5c1ee073..f059e506 100644 --- a/DOOML/lib/anf.ml +++ b/DOOML/lib/anf.ml @@ -145,7 +145,7 @@ let rec anf (k : immexpr -> Ctx.t -> aexpr * Ctx.t) = function | [] -> let* tsym = gensym () in let* expr = k (immid tsym) in - return (alet tsym (cimm (immtuple immexprs)) expr) + return (alet tsym (cimm (immtuple (List.rev immexprs))) expr) | hd :: tl -> anf (fun immhd -> anf_list (immhd :: immexprs) tl) hd in anf_list [] exprs From 3c8e7ee602406908928510514d34edab6028098e Mon Sep 17 00:00:00 2001 From: "ignat.sergeev" Date: Mon, 19 Jan 2026 22:39:19 +0300 Subject: [PATCH 34/49] tuples llvm --- DOOML/lib/anf.ml | 2 +- DOOML/lib/codegen.ml | 20 ++++++++++++++++++-- DOOML/lib/runtime/runtime.c | 4 ++-- 3 files changed, 21 insertions(+), 5 deletions(-) diff --git a/DOOML/lib/anf.ml b/DOOML/lib/anf.ml index f059e506..e60056f9 100644 --- a/DOOML/lib/anf.ml +++ b/DOOML/lib/anf.ml @@ -126,7 +126,7 @@ let rec arg = function in let* sym = gensym () in let lets = - List.mapi (fun i (name, _) -> name, capp "nth" [ immid sym; immnum i ]) els + List.mapi (fun i (name, _) -> name, capp "tuple_nth" [ immid sym; immnum i ]) els in let lets = lets @ List.concat_map snd els in return (sym, lets) diff --git a/DOOML/lib/codegen.ml b/DOOML/lib/codegen.ml index 4910fa76..753644a5 100644 --- a/DOOML/lib/codegen.ml +++ b/DOOML/lib/codegen.ml @@ -38,6 +38,8 @@ let emit_builtins () = [ declare_external "print_int" i64_type [| i64_type |]; declare_internal "create_closure" i64_type [| i64_type; i64_type; i64_type; i64_type |]; declare_internal "closure_apply" i64_type [| i64_type; i64_type; i64_type |]; + declare_internal "create_tuple" i64_type [| i64_type; i64_type |]; + declare_internal "tuple_nth" i64_type [| i64_type; i64_type |]; define_ibinop "+" i64_type build_add; define_ibinop "-" i64_type build_sub; define_ibinop "*" i64_type build_mul; @@ -63,7 +65,19 @@ let emit_create_closure funcs func args = let arity_lv = const_int i64_type arity in build_call typ create_closure [ func; arity_lv; argc_lv; argv_lv ] -let emit_immexpr binds funcs = +let emit_create_tuple funcs init = + let size = List.length init in + let size_lv = const_int i64_type size in + let init_lv = build_array_alloca ~name:"create_tuple_init" i64_type size_lv in + init |> List.iteri + (fun i a -> + let el_ptr = build_gep init_lv [| (const_int i64_type i) |] in + build_store a el_ptr |> ignore); + let init_lv = build_pointercast init_lv i64_type ~name:"init_arr_toi64_cast" in + let (create_tuple, typ, _) = Map.find_exn funcs "create_tuple" in + build_call typ create_tuple [ size_lv; init_lv ] + +let rec emit_immexpr binds funcs = function | Anf.ImmNum n -> const_int i64_type n | Anf.ImmId s -> @@ -73,7 +87,9 @@ let emit_immexpr binds funcs = | Some (f, _, External) -> emit_create_closure funcs f [] | Some _ | None -> failf "Unbound variable %s" s)) - | Anf.ImmTuple _ -> failf "todo" + | Anf.ImmTuple immexprs -> + let init = List.map (fun immexpr -> emit_immexpr binds funcs immexpr) immexprs in + emit_create_tuple funcs init let emit_capp binds funcs name args = let app_type = match Map.find funcs name with diff --git a/DOOML/lib/runtime/runtime.c b/DOOML/lib/runtime/runtime.c index fe15ea61..e0c4e0a9 100644 --- a/DOOML/lib/runtime/runtime.c +++ b/DOOML/lib/runtime/runtime.c @@ -26,13 +26,13 @@ static int64_t *xmalloc(int64_t size) { typedef int64_t *tuple_t; -tuple_t create_tuple(int64_t size, int64_t init) { +int64_t create_tuple(int64_t size, int64_t init) { tuple_t tuple = xmalloc(size + 1); tuple[0] = size; for (int64_t i = 0; i < size; i++) tuple[i + 1] = ((int64_t*) init)[i]; - return tuple; + return (int64_t) tuple; } int64_t tuple_nth(int64_t tuple, int64_t i) { From 2561d1d90c143049f01792b393e8f7a319c01bb4 Mon Sep 17 00:00:00 2001 From: "ignat.sergeev" Date: Tue, 20 Jan 2026 03:45:40 +0300 Subject: [PATCH 35/49] fix builtin tuple_nth name --- DOOML/lib/builtin.ml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DOOML/lib/builtin.ml b/DOOML/lib/builtin.ml index 6f41f648..a9e48ada 100644 --- a/DOOML/lib/builtin.ml +++ b/DOOML/lib/builtin.ml @@ -18,6 +18,6 @@ let all = ; { name = "&&"; arity = 2 } ; { name = "||"; arity = 2 } ; { name = "print_int"; arity = 1 } - ; { name = "nth"; arity = 2 } + ; { name = "tuple_nth"; arity = 2 } ] ;; From 0b0c0a4fe1464ae1435bc2d072cb96b00696cd7c Mon Sep 17 00:00:00 2001 From: "ignat.sergeev" Date: Tue, 20 Jan 2026 04:03:25 +0300 Subject: [PATCH 36/49] fix tuple_nth visibility --- DOOML/lib/codegen.ml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DOOML/lib/codegen.ml b/DOOML/lib/codegen.ml index 753644a5..261943ca 100644 --- a/DOOML/lib/codegen.ml +++ b/DOOML/lib/codegen.ml @@ -39,7 +39,7 @@ let emit_builtins () = declare_internal "create_closure" i64_type [| i64_type; i64_type; i64_type; i64_type |]; declare_internal "closure_apply" i64_type [| i64_type; i64_type; i64_type |]; declare_internal "create_tuple" i64_type [| i64_type; i64_type |]; - declare_internal "tuple_nth" i64_type [| i64_type; i64_type |]; + declare_external "tuple_nth" i64_type [| i64_type; i64_type |]; define_ibinop "+" i64_type build_add; define_ibinop "-" i64_type build_sub; define_ibinop "*" i64_type build_mul; From 2f228cf382ca3f66162405291db1803c9f443425 Mon Sep 17 00:00:00 2001 From: "ignat.sergeev" Date: Tue, 20 Jan 2026 04:10:56 +0300 Subject: [PATCH 37/49] fix anf test --- DOOML/lib/anf.ml | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/DOOML/lib/anf.ml b/DOOML/lib/anf.ml index e60056f9..82136c15 100644 --- a/DOOML/lib/anf.ml +++ b/DOOML/lib/anf.ml @@ -378,28 +378,28 @@ let%expect_test "task 3" = {| let sup14 a sup0 = let b = - (nth) sup0 0 + (tuple_nth) sup0 0 in let c = - (nth) sup0 1 + (tuple_nth) sup0 1 in let sup1 = b in let b1 = - (nth) sup1 0 + (tuple_nth) sup1 0 in let b2 = - (nth) sup1 1 + (tuple_nth) sup1 1 in let sup4 = c in let c1 = - (nth) sup4 0 + (tuple_nth) sup4 0 in let c2 = - (nth) sup4 1 + (tuple_nth) sup4 1 in let sup11 = (+) b1 b2 @@ -413,8 +413,8 @@ let%expect_test "task 3" = sup13 ;; - let f = (nth) sup14 0;; + let f = (tuple_nth) sup14 0;; - let g = (nth) sup14 1;; + let g = (tuple_nth) sup14 1;; |}] ;; From 88232d1c768e8f810d2d96c3dd699fefa99ae0f2 Mon Sep 17 00:00:00 2001 From: "ignat.sergeev" Date: Tue, 20 Jan 2026 04:12:29 +0300 Subject: [PATCH 38/49] fix codegen test --- DOOML/lib/codegen.ml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/DOOML/lib/codegen.ml b/DOOML/lib/codegen.ml index 261943ca..1dcca43e 100644 --- a/DOOML/lib/codegen.ml +++ b/DOOML/lib/codegen.ml @@ -367,6 +367,10 @@ let%expect_test "basic" = ret i64 %2 } + declare i64 @tuple_nth(i64, i64) + + declare i64 @create_tuple(i64, i64) + declare i64 @closure_apply(i64, i64, i64) declare i64 @create_closure(i64, i64, i64, i64) From fe4e4de4bf1e3c318a6754f1ad9748766bbdcb26 Mon Sep 17 00:00:00 2001 From: "ignat.sergeev" Date: Tue, 20 Jan 2026 20:24:03 +0300 Subject: [PATCH 39/49] implement gc allocation --- DOOML/lib/runtime/runtime.c | 148 +++++++++++++++++++++++++++--------- 1 file changed, 112 insertions(+), 36 deletions(-) diff --git a/DOOML/lib/runtime/runtime.c b/DOOML/lib/runtime/runtime.c index e0c4e0a9..530df536 100644 --- a/DOOML/lib/runtime/runtime.c +++ b/DOOML/lib/runtime/runtime.c @@ -6,16 +6,96 @@ #include #define MEM 65536 -#define STACK 16384 typedef long int64_t; __attribute__((aligned(16))) int64_t mem[MEM]; -__attribute__((aligned(16))) -int64_t stack[STACK]; -int64_t *stack_end = stack + STACK; +typedef struct { + uint32_t size; + uint32_t allocated_size; + uint8_t bank_idx; + uint64_t runs; +} GCStats; + +typedef struct { + GCStats stats; + + int64_t *from_bank_start; + uint32_t from_bank_size; + int64_t *to_bank_start; + uint32_t to_bank_size; + + int64_t *free_space_start; +} GC; + +typedef enum { + Tuple, + Closure, + Forward, + ENUM_SIZE_FORCE_32BIT = UINT8_MAX +} GCObjTag; + +__attribute__((aligned(8))) +typedef struct { + uint32_t size; + GCObjTag tag; +} GCObjHeader; + +typedef struct { + GCObjHeader header; + int64_t callee; + int64_t arity; + int64_t argc; + int64_t *args; +} GCClosure; + +typedef struct { + GCObjHeader header; + int64_t size; + int64_t *fields; +} GCTuple; + +static const uint64_t GC_BANK_SIZE = MEM; +static GC gc; + +void init_gc() { + GCStats init_stats = { + .runs = 0, + .bank_idx = 0, + .size = 0, + .allocated_size = 0, + }; + GC init_gc = { + .stats = init_stats, + .from_bank_size = GC_BANK_SIZE, + .from_bank_start = malloc(GC_BANK_SIZE), + .to_bank_size = GC_BANK_SIZE, + .to_bank_start = malloc(GC_BANK_SIZE), + }; + gc.free_space_start = gc.from_bank_start; + gc = init_gc; +} + +int64_t *alloc_gc(uint32_t size, GCObjTag tag) { + int64_t *ptr = gc.free_space_start; + uint32_t taken_bytes = (ptr - gc.from_bank_start) / 8; + uint32_t free_space = gc.from_bank_size - taken_bytes; + + if (free_space < size) { + fprintf(stderr, "GC OOM\n"); + exit(1); + } + + gc.free_space_start += size * 8; + gc.stats.allocated_size += size; + gc.stats.size += size; + ((GCObjHeader *)ptr)->size = size; + ((GCObjHeader *)ptr)->tag = tag; + + return ptr; +} static int64_t ptr = 0; static int64_t *xmalloc(int64_t size) { @@ -24,23 +104,19 @@ static int64_t *xmalloc(int64_t size) { return res; } -typedef int64_t *tuple_t; - int64_t create_tuple(int64_t size, int64_t init) { - tuple_t tuple = xmalloc(size + 1); - tuple[0] = size; + GCTuple *tuple = (GCTuple *)alloc_gc(sizeof(GCTuple), Tuple); + tuple->size = size; for (int64_t i = 0; i < size; i++) - tuple[i + 1] = ((int64_t*) init)[i]; + tuple->fields[i] = ((int64_t*) init)[i]; return (int64_t) tuple; } int64_t tuple_nth(int64_t tuple, int64_t i) { - return ((int64_t*) tuple)[i + 1]; + return ((GCTuple*) tuple)->fields[i]; } -typedef int64_t *closure_t; - #define debugf printf int64_t create_closure(int64_t callee, int64_t arity, int64_t argc, int64_t argv_) { @@ -50,53 +126,53 @@ int64_t create_closure(int64_t callee, int64_t arity, int64_t argc, int64_t argv int64_t *argv = (int64_t*) argv_; - closure_t closure = xmalloc(arity + 3); - closure[0] = callee; - closure[1] = arity; - closure[2] = argc; + GCClosure *closure = (GCClosure *)alloc_gc(sizeof(GCClosure), Closure); + closure->callee = callee; + closure->arity = arity; + closure->argc = argc; for (int64_t i = 0; i < argc; i++) { - closure[i + 3] = argv[i]; + closure->args[i] = argv[i]; } debugf("< create_closure() -> %ld\n", (int64_t) closure); return (int64_t) closure; } -int64_t copy_closure(int64_t closure_) { - debugf("> copy_closure(%ld)\n", closure_); - - closure_t closure = (closure_t) closure_; - int64_t arity = closure[1]; +GCClosure *copy_closure(GCClosure *closure) { + debugf("> copy_closure(%ld)\n", (int64_t) closure); - closure_t closure2 = xmalloc(arity + 3); - for (int64_t i = 0; i < arity + 3; i++) { - closure2[i] = closure[i]; + GCClosure *closure2 = (GCClosure *)alloc_gc(sizeof(GCClosure), Closure); + closure2->callee = closure->callee; + closure2->arity = closure->arity; + closure2->argc = closure->argc; + for (int64_t i = 0; i < closure->argc; i++) { + closure2->args[i] = closure->args[i]; } debugf("< copy_closure() -> %ld\n", (int64_t) closure2); - return (int64_t) closure2; + return closure2; } int64_t closure_apply(int64_t closure_, int64_t argc, int64_t argv_) { debugf("> closure_apply(%ld, %ld, %ld)\n", closure_, argc, argv_); int64_t *argv = (int64_t*) argv_; - closure_t closure = (closure_t) (copy_closure (closure_)); - debugf(" closure_apply: closure stats %ld %ld %ld\n", closure[0], closure[1], closure[2]); - int64_t current = closure[2]; + GCClosure *closure = copy_closure ((GCClosure *)closure_); + debugf(" closure_apply: closure stats %ld %ld %ld\n", closure->callee, closure->arity, closure->argc); + int64_t current_argc = closure->argc; for (int64_t i = 0; i < argc; i++) { - debugf(" closure_apply: arg %ld %ld is %ld in the orig\n", i, argv[i], i + current); - closure[i + current + 3] = argv[i]; - closure[2]++; + debugf(" closure_apply: arg %ld %ld is %ld in the orig\n", i, argv[i], i + current_argc); + closure->args[i + current_argc] = argv[i]; + closure->argc++; } - if (closure[2] >= closure[1]) { - debugf(" closure_apply: calling %ld %ld %ld\n", closure[0], closure[1], (int64_t) &(closure[3])); - return call_function((void*) closure[0], closure[1], &(closure[3])); + if (closure->argc >= closure->arity) { + debugf(" closure_apply: calling %ld %ld %ld\n", closure->callee, closure->arity, (int64_t) closure->args); + return call_function((void*) closure->callee, closure->arity, closure->args); } else { - debugf(" closure_apply: returning a new closure %ld %ld %ld\n", closure[0], closure[1], (int64_t) &(closure[3])); + debugf(" closure_apply: returning a new closure %ld %ld %ld\n", closure->callee, closure->arity, (int64_t) closure->args); return (int64_t) closure; } } From 7344b75581932e2be5f1774452b27520e51774f0 Mon Sep 17 00:00:00 2001 From: "ignat.sergeev" Date: Tue, 20 Jan 2026 23:46:20 +0300 Subject: [PATCH 40/49] fix alloc sizes, add boxing runtime --- DOOML/lib/runtime/runtime.c | 34 +++++++++++++++++++++------------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/DOOML/lib/runtime/runtime.c b/DOOML/lib/runtime/runtime.c index 530df536..3f0c88e6 100644 --- a/DOOML/lib/runtime/runtime.c +++ b/DOOML/lib/runtime/runtime.c @@ -3,15 +3,13 @@ #include #include #include +#include #include #define MEM 65536 typedef long int64_t; -__attribute__((aligned(16))) -int64_t mem[MEM]; - typedef struct { uint32_t size; uint32_t allocated_size; @@ -48,15 +46,20 @@ typedef struct { int64_t callee; int64_t arity; int64_t argc; - int64_t *args; + int64_t args[]; } GCClosure; typedef struct { GCObjHeader header; int64_t size; - int64_t *fields; + int64_t fields[]; } GCTuple; +typedef struct { + GCObjHeader header; + int64_t ptr; +} GCForward; + static const uint64_t GC_BANK_SIZE = MEM; static GC gc; @@ -97,15 +100,20 @@ int64_t *alloc_gc(uint32_t size, GCObjTag tag) { return ptr; } -static int64_t ptr = 0; -static int64_t *xmalloc(int64_t size) { - int64_t *res = &(mem[ptr]); - ptr += size; - return res; +int64_t box_imm(int64_t n) { + return (n << 1) + 1; +} + +int64_t unbox_imm(int64_t n) { + return n >> 1; +} + +bool is_imm(int64_t n) { + return n | 1; } int64_t create_tuple(int64_t size, int64_t init) { - GCTuple *tuple = (GCTuple *)alloc_gc(sizeof(GCTuple), Tuple); + GCTuple *tuple = (GCTuple *)alloc_gc(sizeof(GCTuple) + size, Tuple); tuple->size = size; for (int64_t i = 0; i < size; i++) tuple->fields[i] = ((int64_t*) init)[i]; @@ -126,7 +134,7 @@ int64_t create_closure(int64_t callee, int64_t arity, int64_t argc, int64_t argv int64_t *argv = (int64_t*) argv_; - GCClosure *closure = (GCClosure *)alloc_gc(sizeof(GCClosure), Closure); + GCClosure *closure = (GCClosure *)alloc_gc(sizeof(GCClosure) + arity, Closure); closure->callee = callee; closure->arity = arity; closure->argc = argc; @@ -142,7 +150,7 @@ int64_t create_closure(int64_t callee, int64_t arity, int64_t argc, int64_t argv GCClosure *copy_closure(GCClosure *closure) { debugf("> copy_closure(%ld)\n", (int64_t) closure); - GCClosure *closure2 = (GCClosure *)alloc_gc(sizeof(GCClosure), Closure); + GCClosure *closure2 = (GCClosure *)alloc_gc(sizeof(GCClosure) + closure->arity, Closure); closure2->callee = closure->callee; closure2->arity = closure->arity; closure2->argc = closure->argc; From 388fd24f19d232fa0aed8558f4625ca725914e2b Mon Sep 17 00:00:00 2001 From: "ignat.sergeev" Date: Wed, 21 Jan 2026 00:53:43 +0300 Subject: [PATCH 41/49] boxing --- DOOML/lib/codegen.ml | 43 ++++++++++++++++++++++++++----------- DOOML/lib/runtime/runtime.c | 41 ++++++++++++++++++++++------------- 2 files changed, 57 insertions(+), 27 deletions(-) diff --git a/DOOML/lib/codegen.ml b/DOOML/lib/codegen.ml index 1dcca43e..6d4cb451 100644 --- a/DOOML/lib/codegen.ml +++ b/DOOML/lib/codegen.ml @@ -11,14 +11,24 @@ type visibility = | Internal | External -let define_ibinop name ret build_f = +let unbox funcs v = + let (f, typ, _) = Map.find_exn funcs "unbox" in + build_call typ f [ v ] + +let box_imm funcs imm = + let (f, typ, _) = Map.find_exn funcs "box_imm" in + build_call typ f [ imm ] + +let define_ibinop ?(box_ret = false) funcs name ret build_f = let typ = function_type ret [| i64_type; i64_type |] in let func = define_func name (Llvm.return_type typ) (Llvm.param_types typ) in let entry = entry_block func in position_at_end entry; (match params func with | [| lhs; rhs |] -> + let (lhs, rhs) = (unbox funcs lhs, unbox funcs rhs) in let binop = build_f lhs rhs in + let binop = if box_ret then box_imm funcs binop else binop in build_ret binop |> ignore; | _ -> assert false); Llvm_analysis.assert_valid_function func; @@ -35,20 +45,25 @@ let declare_external name ret params = ( name, (f, t, External ) ) let emit_builtins () = - [ declare_external "print_int" i64_type [| i64_type |]; + let rt = [ declare_external "print_int" i64_type [| i64_type |]; declare_internal "create_closure" i64_type [| i64_type; i64_type; i64_type; i64_type |]; declare_internal "closure_apply" i64_type [| i64_type; i64_type; i64_type |]; declare_internal "create_tuple" i64_type [| i64_type; i64_type |]; declare_external "tuple_nth" i64_type [| i64_type; i64_type |]; - define_ibinop "+" i64_type build_add; - define_ibinop "-" i64_type build_sub; - define_ibinop "*" i64_type build_mul; - define_ibinop "/" i64_type build_sdiv; - define_ibinop "<" i1_type (build_icmp Llvm.Icmp.Slt); - define_ibinop ">" i1_type (build_icmp Llvm.Icmp.Sgt); - define_ibinop "<=" i1_type (build_icmp Llvm.Icmp.Sle); - define_ibinop ">=" i1_type (build_icmp Llvm.Icmp.Sge); - define_ibinop "=" i1_type (build_icmp Llvm.Icmp.Eq) ] |> Map.of_alist_exn + declare_internal "unbox" i64_type [| i64_type |]; + declare_internal "box_imm" i64_type [| i64_type |]; + declare_internal "init_gc" void_type [| |]; + ] |> Map.of_alist_exn in + let binops =[ define_ibinop ~box_ret:true rt "+" i64_type build_add; + define_ibinop ~box_ret:true rt "-" i64_type build_sub; + define_ibinop ~box_ret:true rt "*" i64_type build_mul; + define_ibinop ~box_ret:true rt "/" i64_type build_sdiv; + define_ibinop rt "<" i1_type (build_icmp Llvm.Icmp.Slt); + define_ibinop rt ">" i1_type (build_icmp Llvm.Icmp.Sgt); + define_ibinop rt "<=" i1_type (build_icmp Llvm.Icmp.Sle); + define_ibinop rt ">=" i1_type (build_icmp Llvm.Icmp.Sge); + define_ibinop rt "=" i1_type (build_icmp Llvm.Icmp.Eq) ] |> Map.of_alist_exn in + Map.merge_disjoint_exn rt binops let emit_create_closure funcs func args = let arity = params func |> Array.length in @@ -79,7 +94,7 @@ let emit_create_tuple funcs init = let rec emit_immexpr binds funcs = function - | Anf.ImmNum n -> const_int i64_type n + | Anf.ImmNum n -> const_int i64_type n |> box_imm funcs | Anf.ImmId s -> (match Map.find binds s with | Some lv -> lv @@ -200,7 +215,11 @@ let emit_decl funcs (decl: Anf.decl) = | `Ok m -> m in let entry_bb = append_block ~name:"entry" f in position_at_end entry_bb; + (if name = "main" then + let (init, init_t, _) = Map.find_exn funcs "init_gc" in + build_call init_t init [ ] |> ignore); let body = emit_aexpr par_binds funcs body in + let body = if name = "main" then unbox funcs body else body in build_ret body |> ignore; let funcs = (match rec_flag with | Ast.Rec -> funcs diff --git a/DOOML/lib/runtime/runtime.c b/DOOML/lib/runtime/runtime.c index 3f0c88e6..9a18b377 100644 --- a/DOOML/lib/runtime/runtime.c +++ b/DOOML/lib/runtime/runtime.c @@ -63,6 +63,8 @@ typedef struct { static const uint64_t GC_BANK_SIZE = MEM; static GC gc; +#define debugf printf + void init_gc() { GCStats init_stats = { .runs = 0, @@ -77,14 +79,15 @@ void init_gc() { .to_bank_size = GC_BANK_SIZE, .to_bank_start = malloc(GC_BANK_SIZE), }; - gc.free_space_start = gc.from_bank_start; + init_gc.free_space_start = init_gc.from_bank_start; gc = init_gc; } int64_t *alloc_gc(uint32_t size, GCObjTag tag) { int64_t *ptr = gc.free_space_start; - uint32_t taken_bytes = (ptr - gc.from_bank_start) / 8; + uint32_t taken_bytes = ((uint32_t) (ptr - gc.from_bank_start)) / 8; uint32_t free_space = gc.from_bank_size - taken_bytes; + debugf("> alloc_gc(%u): had %u free space\n", size, free_space); if (free_space < size) { fprintf(stderr, "GC OOM\n"); @@ -104,29 +107,37 @@ int64_t box_imm(int64_t n) { return (n << 1) + 1; } -int64_t unbox_imm(int64_t n) { - return n >> 1; -} - bool is_imm(int64_t n) { return n | 1; } +int64_t unbox(int64_t n) { + if (!is_imm(n)) { + return n; + } + return n >> 1; +} + int64_t create_tuple(int64_t size, int64_t init) { - GCTuple *tuple = (GCTuple *)alloc_gc(sizeof(GCTuple) + size, Tuple); + GCTuple *tuple = (GCTuple *)alloc_gc(sizeof(GCTuple) + size * 8, Tuple); tuple->size = size; - for (int64_t i = 0; i < size; i++) + for (int64_t i = 0; i < size; i++) { tuple->fields[i] = ((int64_t*) init)[i]; + } return (int64_t) tuple; } int64_t tuple_nth(int64_t tuple, int64_t i) { - return ((GCTuple*) tuple)->fields[i]; + int64_t unboxed_i = unbox(i); + GCTuple *tuple_ptr = (GCTuple*) tuple; + if (unboxed_i >= tuple_ptr->size) { + fprintf(stderr, "tuple_nth: index is out of bounds\n"); + exit(1); + } + return tuple_ptr->fields[unboxed_i]; } -#define debugf printf - int64_t create_closure(int64_t callee, int64_t arity, int64_t argc, int64_t argv_) { assert(argc < arity); @@ -134,7 +145,7 @@ int64_t create_closure(int64_t callee, int64_t arity, int64_t argc, int64_t argv int64_t *argv = (int64_t*) argv_; - GCClosure *closure = (GCClosure *)alloc_gc(sizeof(GCClosure) + arity, Closure); + GCClosure *closure = (GCClosure *)alloc_gc(sizeof(GCClosure) + arity * 8, Closure); closure->callee = callee; closure->arity = arity; closure->argc = argc; @@ -150,7 +161,7 @@ int64_t create_closure(int64_t callee, int64_t arity, int64_t argc, int64_t argv GCClosure *copy_closure(GCClosure *closure) { debugf("> copy_closure(%ld)\n", (int64_t) closure); - GCClosure *closure2 = (GCClosure *)alloc_gc(sizeof(GCClosure) + closure->arity, Closure); + GCClosure *closure2 = (GCClosure *)alloc_gc(sizeof(GCClosure) + closure->arity * 8, Closure); closure2->callee = closure->callee; closure2->arity = closure->arity; closure2->argc = closure->argc; @@ -167,7 +178,7 @@ int64_t closure_apply(int64_t closure_, int64_t argc, int64_t argv_) { debugf("> closure_apply(%ld, %ld, %ld)\n", closure_, argc, argv_); int64_t *argv = (int64_t*) argv_; - GCClosure *closure = copy_closure ((GCClosure *)closure_); + GCClosure *closure = copy_closure((GCClosure *)closure_); debugf(" closure_apply: closure stats %ld %ld %ld\n", closure->callee, closure->arity, closure->argc); int64_t current_argc = closure->argc; for (int64_t i = 0; i < argc; i++) { @@ -186,7 +197,7 @@ int64_t closure_apply(int64_t closure_, int64_t argc, int64_t argv_) { } int64_t print_int(int64_t n) { - return printf("%ld\n", n); + return box_imm(printf("%ld\n", unbox(n))); } void exit2(void) { From 67a58bb9da4256da7cf5da00302be313dbfa0bb3 Mon Sep 17 00:00:00 2001 From: "ignat.sergeev" Date: Wed, 21 Jan 2026 13:46:36 +0300 Subject: [PATCH 42/49] collect runtime --- DOOML/lib/runtime/runtime.c | 143 ++++++++++++++++++++++++++++++++---- 1 file changed, 127 insertions(+), 16 deletions(-) diff --git a/DOOML/lib/runtime/runtime.c b/DOOML/lib/runtime/runtime.c index 9a18b377..33e187dc 100644 --- a/DOOML/lib/runtime/runtime.c +++ b/DOOML/lib/runtime/runtime.c @@ -62,10 +62,11 @@ typedef struct { static const uint64_t GC_BANK_SIZE = MEM; static GC gc; +static int64_t *initial_sp; #define debugf printf -void init_gc() { +void gc_init() { GCStats init_stats = { .runs = 0, .bank_idx = 0, @@ -83,15 +84,27 @@ void init_gc() { gc = init_gc; } -int64_t *alloc_gc(uint32_t size, GCObjTag tag) { +void sp_init(int64_t sp) { + initial_sp = (int64_t *)sp; +} + +void collect(); + +int64_t *gc_alloc(uint32_t size, GCObjTag tag) { int64_t *ptr = gc.free_space_start; uint32_t taken_bytes = ((uint32_t) (ptr - gc.from_bank_start)) / 8; uint32_t free_space = gc.from_bank_size - taken_bytes; debugf("> alloc_gc(%u): had %u free space\n", size, free_space); if (free_space < size) { - fprintf(stderr, "GC OOM\n"); - exit(1); + collect(); + ptr = gc.free_space_start; + taken_bytes = ((uint32_t) (ptr - gc.from_bank_start)) / 8; + free_space = gc.from_bank_size - taken_bytes; + if (free_space < size) { + fprintf(stderr, "GC OOM\n"); + exit(1); + } } gc.free_space_start += size * 8; @@ -103,6 +116,25 @@ int64_t *alloc_gc(uint32_t size, GCObjTag tag) { return ptr; } +GCClosure *gc_alloc_closure_base(int64_t callee, int64_t arity, int64_t argc) { + GCClosure *closure = (GCClosure *)gc_alloc(sizeof(GCClosure) + arity * 8, Closure); + closure->callee = callee; + closure->arity = arity; + closure->argc = argc; + return closure; +} + +GCTuple *gc_alloc_tuple_base(int64_t size) { + GCTuple *tuple = (GCTuple *)gc_alloc(sizeof(GCTuple) + size * 8, Tuple); + tuple->size = size; + return tuple; +} + +void gc_make_fwd(int64_t ptr, int64_t new_ptr) { + ((GCObjHeader *)ptr)->tag = Forward; + ((GCForward *)ptr)->ptr = new_ptr; +} + int64_t box_imm(int64_t n) { return (n << 1) + 1; } @@ -118,9 +150,95 @@ int64_t unbox(int64_t n) { return n >> 1; } +int64_t *get_sp() { + return (int64_t *)__builtin_frame_address(0); +} + +int64_t gc_scan_ptrs_on_stack(int64_t *range_start, int64_t *range_end, int64_t** ptrs) { + int64_t *sp = get_sp(); + int64_t ptrs_idx = 0; + // start from stack bottom to preserve order for minimizing forward ptr usages + for (int64_t i = 0; initial_sp + i < sp; i++) { + int64_t stack_val = initial_sp[i]; + if (!is_imm(stack_val) && stack_val > (int64_t)range_start && stack_val < (int64_t)range_end) { + ptrs[ptrs_idx] = initial_sp + i; + ptrs_idx += 1; + } + } + + return ptrs_idx; +} + +int64_t gc_mark_and_copy(int64_t ptr) { + GCObjTag tag = ((GCObjHeader *)ptr)->tag; + if (tag == Forward) { + return ((GCForward *)ptr)->ptr; + } + + GCObjTag size = ((GCObjHeader *)ptr)->tag; + + if (tag == Closure) { + GCClosure *closure = (GCClosure *)ptr; + GCClosure *closure2 = gc_alloc_closure_base(closure->callee, closure->arity, closure->argc); + + for (int64_t i = 0; i < closure2->argc; i++) { + int64_t arg = closure->args[i]; + if (is_imm(arg)) { + closure2->args[i] = arg; + } else { + closure2->args[i] = gc_mark_and_copy(arg); + } + } + + gc_make_fwd(ptr, (int64_t)closure2); + return (int64_t)closure2; + } + + if (tag == Tuple) { + GCTuple *tuple = (GCTuple *)ptr; + GCTuple *tuple2 = gc_alloc_tuple_base(tuple->size); + + for (int64_t i = 0; i < tuple2->size; i++) { + int64_t field = tuple->fields[i]; + if (is_imm(field)) { + tuple2->fields[i] = field; + } else { + tuple2->fields[i] = gc_mark_and_copy(field); + } + } + + gc_make_fwd(ptr, (int64_t)tuple2); + return (int64_t)tuple2; + } + + fprintf(stderr, "unknown gc tag %u\n", tag); + exit(1); +} + +void collect() { + int64_t *ptrs[UINT32_MAX]; + int64_t ptrs_size = gc_scan_ptrs_on_stack(gc.from_bank_start, gc.free_space_start, ptrs); + + int64_t *to_bank_start = gc.to_bank_start; + int64_t to_bank_size = gc.to_bank_size; + gc.to_bank_start = gc.from_bank_start; + gc.from_bank_start = to_bank_start; + gc.to_bank_size = gc.from_bank_size; + gc.from_bank_size = to_bank_size; + gc.free_space_start = gc.from_bank_start; + gc.stats.bank_idx = 1 - gc.stats.bank_idx; + gc.stats.size = 0; + + for (int64_t i = 0; i < ptrs_size; i++) { + int64_t *old_ptr_on_stack = ptrs[i]; + *old_ptr_on_stack = gc_mark_and_copy(*old_ptr_on_stack); + } + + gc.stats.runs += 1; +} + int64_t create_tuple(int64_t size, int64_t init) { - GCTuple *tuple = (GCTuple *)alloc_gc(sizeof(GCTuple) + size * 8, Tuple); - tuple->size = size; + GCTuple *tuple = gc_alloc_tuple_base(size); for (int64_t i = 0; i < size; i++) { tuple->fields[i] = ((int64_t*) init)[i]; } @@ -143,13 +261,9 @@ int64_t create_closure(int64_t callee, int64_t arity, int64_t argc, int64_t argv debugf("> create_closure(%ld, %ld, %ld, %ld)\n", callee, arity, argc, argv_); - int64_t *argv = (int64_t*) argv_; - - GCClosure *closure = (GCClosure *)alloc_gc(sizeof(GCClosure) + arity * 8, Closure); - closure->callee = callee; - closure->arity = arity; - closure->argc = argc; + GCClosure *closure = gc_alloc_closure_base(callee, arity, argc); + int64_t *argv = (int64_t*) argv_; for (int64_t i = 0; i < argc; i++) { closure->args[i] = argv[i]; } @@ -161,10 +275,7 @@ int64_t create_closure(int64_t callee, int64_t arity, int64_t argc, int64_t argv GCClosure *copy_closure(GCClosure *closure) { debugf("> copy_closure(%ld)\n", (int64_t) closure); - GCClosure *closure2 = (GCClosure *)alloc_gc(sizeof(GCClosure) + closure->arity * 8, Closure); - closure2->callee = closure->callee; - closure2->arity = closure->arity; - closure2->argc = closure->argc; + GCClosure *closure2 = gc_alloc_closure_base(closure->callee, closure->arity, closure->argc); for (int64_t i = 0; i < closure->argc; i++) { closure2->args[i] = closure->args[i]; } From 88cd1ff21504f167c9d5e90b885f09272f4a70e3 Mon Sep 17 00:00:00 2001 From: "ignat.sergeev" Date: Wed, 21 Jan 2026 14:10:53 +0300 Subject: [PATCH 43/49] collect codegen --- DOOML/lib/codegen.ml | 14 +++++++++++--- DOOML/lib/llvm_wrapper.ml | 2 ++ 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/DOOML/lib/codegen.ml b/DOOML/lib/codegen.ml index 6d4cb451..028718fd 100644 --- a/DOOML/lib/codegen.ml +++ b/DOOML/lib/codegen.ml @@ -52,7 +52,10 @@ let emit_builtins () = declare_external "tuple_nth" i64_type [| i64_type; i64_type |]; declare_internal "unbox" i64_type [| i64_type |]; declare_internal "box_imm" i64_type [| i64_type |]; - declare_internal "init_gc" void_type [| |]; + declare_internal "gc_init" void_type [| |]; + declare_internal "sp_init" void_type [| i64_type |]; + declare_internal "llvm.stacksave.p0" ptr_type [| |]; + declare_external "collect" void_type [| |]; ] |> Map.of_alist_exn in let binops =[ define_ibinop ~box_ret:true rt "+" i64_type build_add; define_ibinop ~box_ret:true rt "-" i64_type build_sub; @@ -216,8 +219,13 @@ let emit_decl funcs (decl: Anf.decl) = let entry_bb = append_block ~name:"entry" f in position_at_end entry_bb; (if name = "main" then - let (init, init_t, _) = Map.find_exn funcs "init_gc" in - build_call init_t init [ ] |> ignore); + let (gc_init, gc_init_t, _) = Map.find_exn funcs "gc_init" in + build_call gc_init_t gc_init [ ] |> ignore; + let (sp_init, sp_init_t, _) = Map.find_exn funcs "sp_init" in + let (ss, ss_t, _) = Map.find_exn funcs "llvm.stacksave.p0" in + let sp = build_call ss_t ss [ ] in + let sp = build_pointercast sp i64_type in + build_call sp_init_t sp_init [ sp ] |> ignore); let body = emit_aexpr par_binds funcs body in let body = if name = "main" then unbox funcs body else body in build_ret body |> ignore; diff --git a/DOOML/lib/llvm_wrapper.ml b/DOOML/lib/llvm_wrapper.ml index 2d786121..35976b8c 100644 --- a/DOOML/lib/llvm_wrapper.ml +++ b/DOOML/lib/llvm_wrapper.ml @@ -50,6 +50,7 @@ module type S = sig val entry_block : Llvm.llvalue -> Llvm.llbasicblock val i64_type : Llvm.lltype val i1_type : Llvm.lltype + val ptr_type : Llvm.lltype val function_type : lltype -> lltype array -> lltype val const_int : Llvm.lltype -> int -> Llvm.llvalue val params : Llvm.llvalue -> Llvm.llvalue array @@ -108,6 +109,7 @@ let make context builder module_ = let void_type = Llvm.void_type context let i64_type = Llvm.i64_type context let i1_type = Llvm.i1_type context + let ptr_type = Llvm.pointer_type context let function_type = Llvm.function_type let const_int = Llvm.const_int let params = Llvm.params From 4039ee57bf7d448c18b12e04117334b6bdbac3b6 Mon Sep 17 00:00:00 2001 From: "ignat.sergeev" Date: Fri, 23 Jan 2026 00:18:15 +0300 Subject: [PATCH 44/49] implement unit exprs --- DOOML/lib/anf.ml | 9 ++++++++- DOOML/lib/ast.ml | 13 +++++++++++-- DOOML/lib/builtin.ml | 1 + DOOML/lib/codegen.ml | 3 ++- DOOML/lib/fe.ml | 7 ++++--- DOOML/lib/riscv.ml | 1 + DOOML/lib/runtime/runtime.c | 10 +++++++--- 7 files changed, 34 insertions(+), 10 deletions(-) diff --git a/DOOML/lib/anf.ml b/DOOML/lib/anf.ml index 82136c15..34c0d8cc 100644 --- a/DOOML/lib/anf.ml +++ b/DOOML/lib/anf.ml @@ -1,11 +1,13 @@ type immexpr = | ImmNum of int + | ImmUnit | ImmId of string | ImmTuple of immexpr list [@@deriving variants] let rec pp_immexpr ppf = function | ImmNum d -> Format.fprintf ppf "%d" d + | ImmUnit -> Format.fprintf ppf "()" | ImmId s -> Format.fprintf ppf "%s" s | ImmTuple s -> Format.fprintf @@ -134,7 +136,12 @@ let rec arg = function let rec anf (k : immexpr -> Ctx.t -> aexpr * Ctx.t) = function | Ast.Const d -> - let* ret = k (immnum d) in + let imm = + match d with + | Ast.CInt d -> immnum d + | Ast.CUnit -> ImmUnit + in + let* ret = k imm in return ret | Var s -> let* () = addsym s in diff --git a/DOOML/lib/ast.ml b/DOOML/lib/ast.ml index a710421b..0c4870d4 100644 --- a/DOOML/lib/ast.ml +++ b/DOOML/lib/ast.ml @@ -38,8 +38,13 @@ type rec_flag = | NonRec [@@deriving variants] +type const = + | CInt of int + | CUnit +[@@deriving variants] + type expr = - | Const of int + | Const of const | Var of ident | Tuple of expr list | App of expr * expr @@ -53,8 +58,12 @@ let fun_ args = function | body -> fun_ args body ;; +let pp_const ppf = function + | CInt c -> Format.fprintf ppf "%d" c + | CUnit -> Format.fprintf ppf "()" + let rec pp_expr ppf = function - | Const c -> Format.fprintf ppf "%d" c + | Const c -> Format.fprintf ppf "%a" pp_const c | Var ident -> Format.fprintf ppf "%a" pp_ident ident | Tuple exprs -> Format.fprintf ppf "(%a)" (Format.pp_print_list ~pp_sep:pp_sep_quote pp_expr) exprs diff --git a/DOOML/lib/builtin.ml b/DOOML/lib/builtin.ml index a9e48ada..92dded8e 100644 --- a/DOOML/lib/builtin.ml +++ b/DOOML/lib/builtin.ml @@ -19,5 +19,6 @@ let all = ; { name = "||"; arity = 2 } ; { name = "print_int"; arity = 1 } ; { name = "tuple_nth"; arity = 2 } + ; { name = "collect"; arity = 1 } ] ;; diff --git a/DOOML/lib/codegen.ml b/DOOML/lib/codegen.ml index 028718fd..343acf41 100644 --- a/DOOML/lib/codegen.ml +++ b/DOOML/lib/codegen.ml @@ -55,7 +55,7 @@ let emit_builtins () = declare_internal "gc_init" void_type [| |]; declare_internal "sp_init" void_type [| i64_type |]; declare_internal "llvm.stacksave.p0" ptr_type [| |]; - declare_external "collect" void_type [| |]; + declare_external "collect" void_type [| i64_type |]; ] |> Map.of_alist_exn in let binops =[ define_ibinop ~box_ret:true rt "+" i64_type build_add; define_ibinop ~box_ret:true rt "-" i64_type build_sub; @@ -98,6 +98,7 @@ let emit_create_tuple funcs init = let rec emit_immexpr binds funcs = function | Anf.ImmNum n -> const_int i64_type n |> box_imm funcs + | Anf.ImmUnit -> const_int i64_type 0 |> box_imm funcs | Anf.ImmId s -> (match Map.find binds s with | Some lv -> lv diff --git a/DOOML/lib/fe.ml b/DOOML/lib/fe.ml index 540283ac..71f87ddb 100644 --- a/DOOML/lib/fe.ml +++ b/DOOML/lib/fe.ml @@ -126,8 +126,9 @@ let pattern = ;; let const = - let* v = take_while1 is_digit |> token in - v |> int_of_string |> Ast.const |> return + (string "()" |> token) *> return (Ast.cunit |> Ast.const) <|> + (let* v = take_while1 is_digit |> token in + v |> int_of_string |> Ast.cint |> Ast.const |> return) ;; let var = @@ -216,7 +217,7 @@ let expr = match c with | Some '0' .. '9' -> const | Some '(' -> - let* r = parens expr <|> tuple expr <|> var in + let* r = const <|> parens expr <|> tuple expr <|> var in r |> return | _ -> var in diff --git a/DOOML/lib/riscv.ml b/DOOML/lib/riscv.ml index af0e7c5b..b9f87703 100644 --- a/DOOML/lib/riscv.ml +++ b/DOOML/lib/riscv.ml @@ -400,6 +400,7 @@ let immexpr reg = function | Anf.ImmNum c -> let* () = emit (li reg c) in return reg + | Anf.ImmUnit -> failf "todo" | ImmId var -> let* offset = offset var in (match offset with diff --git a/DOOML/lib/runtime/runtime.c b/DOOML/lib/runtime/runtime.c index 33e187dc..c448db89 100644 --- a/DOOML/lib/runtime/runtime.c +++ b/DOOML/lib/runtime/runtime.c @@ -88,7 +88,7 @@ void sp_init(int64_t sp) { initial_sp = (int64_t *)sp; } -void collect(); +void gc_collect(); int64_t *gc_alloc(uint32_t size, GCObjTag tag) { int64_t *ptr = gc.free_space_start; @@ -97,7 +97,7 @@ int64_t *gc_alloc(uint32_t size, GCObjTag tag) { debugf("> alloc_gc(%u): had %u free space\n", size, free_space); if (free_space < size) { - collect(); + gc_collect(); ptr = gc.free_space_start; taken_bytes = ((uint32_t) (ptr - gc.from_bank_start)) / 8; free_space = gc.from_bank_size - taken_bytes; @@ -215,7 +215,7 @@ int64_t gc_mark_and_copy(int64_t ptr) { exit(1); } -void collect() { +void gc_collect() { int64_t *ptrs[UINT32_MAX]; int64_t ptrs_size = gc_scan_ptrs_on_stack(gc.from_bank_start, gc.free_space_start, ptrs); @@ -237,6 +237,10 @@ void collect() { gc.stats.runs += 1; } +void collect(int64_t unit) { + gc_collect(); +} + int64_t create_tuple(int64_t size, int64_t init) { GCTuple *tuple = gc_alloc_tuple_base(size); for (int64_t i = 0; i < size; i++) { From 485bcba173095ec45aaf4227bea2df63ab876400 Mon Sep 17 00:00:00 2001 From: "ignat.sergeev" Date: Fri, 23 Jan 2026 04:00:23 +0300 Subject: [PATCH 45/49] finalize gc runtime --- DOOML/lib/codegen.ml | 8 +- DOOML/lib/dune | 6 ++ DOOML/lib/runtime/runtime.c | 156 +++++++++++++++++++++++------------- 3 files changed, 108 insertions(+), 62 deletions(-) diff --git a/DOOML/lib/codegen.ml b/DOOML/lib/codegen.ml index 343acf41..36aa6938 100644 --- a/DOOML/lib/codegen.ml +++ b/DOOML/lib/codegen.ml @@ -53,8 +53,7 @@ let emit_builtins () = declare_internal "unbox" i64_type [| i64_type |]; declare_internal "box_imm" i64_type [| i64_type |]; declare_internal "gc_init" void_type [| |]; - declare_internal "sp_init" void_type [| i64_type |]; - declare_internal "llvm.stacksave.p0" ptr_type [| |]; + declare_internal "sp_init" void_type [| |]; declare_external "collect" void_type [| i64_type |]; ] |> Map.of_alist_exn in let binops =[ define_ibinop ~box_ret:true rt "+" i64_type build_add; @@ -223,10 +222,7 @@ let emit_decl funcs (decl: Anf.decl) = let (gc_init, gc_init_t, _) = Map.find_exn funcs "gc_init" in build_call gc_init_t gc_init [ ] |> ignore; let (sp_init, sp_init_t, _) = Map.find_exn funcs "sp_init" in - let (ss, ss_t, _) = Map.find_exn funcs "llvm.stacksave.p0" in - let sp = build_call ss_t ss [ ] in - let sp = build_pointercast sp i64_type in - build_call sp_init_t sp_init [ sp ] |> ignore); + build_call sp_init_t sp_init [ ] |> ignore); let body = emit_aexpr par_binds funcs body in let body = if name = "main" then unbox funcs body else body in build_ret body |> ignore; diff --git a/DOOML/lib/dune b/DOOML/lib/dune index e5a7befd..1b071717 100644 --- a/DOOML/lib/dune +++ b/DOOML/lib/dune @@ -23,6 +23,12 @@ (action (run gcc -fPIC -shared %{deps} -o %{targets} -lffi))) +(rule + (targets debug-runtime.so) + (deps runtime/runtime.c runtime/call-runtime.h runtime/any-call-runtime.c) + (action + (run gcc -fPIC -DDEBUG -shared %{deps} -o %{targets} -lffi))) + (rule (targets riscv-call-runtime.o) (deps runtime/riscv-call-runtime.c runtime/call-runtime.h) diff --git a/DOOML/lib/runtime/runtime.c b/DOOML/lib/runtime/runtime.c index c448db89..a493ceb7 100644 --- a/DOOML/lib/runtime/runtime.c +++ b/DOOML/lib/runtime/runtime.c @@ -64,7 +64,28 @@ static const uint64_t GC_BANK_SIZE = MEM; static GC gc; static int64_t *initial_sp; +#ifdef DEBUG #define debugf printf +#define debug_call(...) __VA_ARGS__ +#else +#define debugf(...) +#define debug_call(...) +#endif + +int64_t box_imm(int64_t n) { + return (n << 1) + 1; +} + +bool is_imm(int64_t n) { + return n & 1; +} + +int64_t unbox(int64_t n) { + if (!is_imm(n)) { + return n; + } + return n >> 1; +} void gc_init() { GCStats init_stats = { @@ -81,11 +102,61 @@ void gc_init() { .to_bank_start = malloc(GC_BANK_SIZE), }; init_gc.free_space_start = init_gc.from_bank_start; + debugf("> bank 0 ranges: %ld : %ld\n", (int64_t)init_gc.from_bank_start, (int64_t)(init_gc.from_bank_start + init_gc.from_bank_size)); + debugf("> bank 1 ranges: %ld : %ld\n", (int64_t)init_gc.to_bank_start, (int64_t)(init_gc.to_bank_start + init_gc.to_bank_size)); gc = init_gc; } -void sp_init(int64_t sp) { - initial_sp = (int64_t *)sp; +static inline int64_t *get_sp() { + return (int64_t *)__builtin_frame_address(0); +} + +void sp_init() { + initial_sp = get_sp(); +} + +void print_obj_helper(int64_t ptr) { + if (is_imm(ptr)) { + printf("int %ld", ptr); + return; + } + GCObjTag tag = ((GCObjHeader *)ptr)->tag; + if (tag == Forward) { + printf("Forward -> %ld: ", ((GCForward *)ptr)->ptr); + print_obj_helper(((GCForward *)ptr)->ptr); + } + if (tag == Closure) { + GCClosure *closure = (GCClosure *)ptr; + printf("Closure %ld(", closure->callee); + for (int i = 0; i < closure->arity; i++) { + if (i < closure->argc) { + print_obj_helper(closure->args[i]); + } else { + printf("..."); + } + if (i != closure->arity - 1) { + printf(", "); + } + } + printf(")"); + } + if (tag == Tuple) { + GCTuple *tuple = (GCTuple *)ptr; + printf("Tuple ("); + for (int i = 0; i < tuple->size; i++) { + print_obj_helper(tuple->fields[i]); + if (i != tuple->size - 1) { + printf(", "); + } + } + printf(")"); + } +} + +void debug_print_value(int64_t ptr) { + debug_call(printf("%ld: ", ptr)); + debug_call(print_obj_helper(ptr)); + debug_call(printf("\n")); } void gc_collect(); @@ -94,7 +165,6 @@ int64_t *gc_alloc(uint32_t size, GCObjTag tag) { int64_t *ptr = gc.free_space_start; uint32_t taken_bytes = ((uint32_t) (ptr - gc.from_bank_start)) / 8; uint32_t free_space = gc.from_bank_size - taken_bytes; - debugf("> alloc_gc(%u): had %u free space\n", size, free_space); if (free_space < size) { gc_collect(); @@ -117,16 +187,20 @@ int64_t *gc_alloc(uint32_t size, GCObjTag tag) { } GCClosure *gc_alloc_closure_base(int64_t callee, int64_t arity, int64_t argc) { - GCClosure *closure = (GCClosure *)gc_alloc(sizeof(GCClosure) + arity * 8, Closure); + int64_t size = sizeof(GCClosure) + arity * 8; + GCClosure *closure = (GCClosure *)gc_alloc(size, Closure); closure->callee = callee; closure->arity = arity; closure->argc = argc; + debugf("< alloc(%ld): closure %ld with %ld(%ld out of %ld)\n", size, (int64_t)closure, callee, argc, arity); return closure; } GCTuple *gc_alloc_tuple_base(int64_t size) { - GCTuple *tuple = (GCTuple *)gc_alloc(sizeof(GCTuple) + size * 8, Tuple); + int64_t obj_size = sizeof(GCClosure) + size * 8; + GCTuple *tuple = (GCTuple *)gc_alloc(obj_size, Tuple); tuple->size = size; + debugf("< alloc(%ld): tuple %ld with size %ld\n", obj_size, (int64_t)tuple, size); return tuple; } @@ -135,40 +209,6 @@ void gc_make_fwd(int64_t ptr, int64_t new_ptr) { ((GCForward *)ptr)->ptr = new_ptr; } -int64_t box_imm(int64_t n) { - return (n << 1) + 1; -} - -bool is_imm(int64_t n) { - return n | 1; -} - -int64_t unbox(int64_t n) { - if (!is_imm(n)) { - return n; - } - return n >> 1; -} - -int64_t *get_sp() { - return (int64_t *)__builtin_frame_address(0); -} - -int64_t gc_scan_ptrs_on_stack(int64_t *range_start, int64_t *range_end, int64_t** ptrs) { - int64_t *sp = get_sp(); - int64_t ptrs_idx = 0; - // start from stack bottom to preserve order for minimizing forward ptr usages - for (int64_t i = 0; initial_sp + i < sp; i++) { - int64_t stack_val = initial_sp[i]; - if (!is_imm(stack_val) && stack_val > (int64_t)range_start && stack_val < (int64_t)range_end) { - ptrs[ptrs_idx] = initial_sp + i; - ptrs_idx += 1; - } - } - - return ptrs_idx; -} - int64_t gc_mark_and_copy(int64_t ptr) { GCObjTag tag = ((GCObjHeader *)ptr)->tag; if (tag == Forward) { @@ -216,8 +256,9 @@ int64_t gc_mark_and_copy(int64_t ptr) { } void gc_collect() { - int64_t *ptrs[UINT32_MAX]; - int64_t ptrs_size = gc_scan_ptrs_on_stack(gc.from_bank_start, gc.free_space_start, ptrs); + int64_t *sp = get_sp(); + int64_t gc_bank_range_start = (int64_t)gc.from_bank_start; + int64_t gc_bank_range_end = (int64_t)gc.free_space_start; int64_t *to_bank_start = gc.to_bank_start; int64_t to_bank_size = gc.to_bank_size; @@ -229,12 +270,19 @@ void gc_collect() { gc.stats.bank_idx = 1 - gc.stats.bank_idx; gc.stats.size = 0; - for (int64_t i = 0; i < ptrs_size; i++) { - int64_t *old_ptr_on_stack = ptrs[i]; - *old_ptr_on_stack = gc_mark_and_copy(*old_ptr_on_stack); + debugf("> gc_collect\n"); + for (int64_t stack_cell = (int64_t)initial_sp; stack_cell >= (int64_t)sp; stack_cell--) { + int64_t obj_ptr = *(int64_t*)stack_cell; + if (!is_imm(obj_ptr) && obj_ptr >= gc_bank_range_start && obj_ptr < gc_bank_range_end) { + debugf(" gc_root: %ld\n", obj_ptr); + debug_print_value(obj_ptr); + *(int64_t*)stack_cell = gc_mark_and_copy(obj_ptr); + debugf(" new pointer on stack: %ld\n", *(int64_t*)stack_cell); + } } gc.stats.runs += 1; + debugf("< gc_collect\n"); } void collect(int64_t unit) { @@ -263,50 +311,46 @@ int64_t tuple_nth(int64_t tuple, int64_t i) { int64_t create_closure(int64_t callee, int64_t arity, int64_t argc, int64_t argv_) { assert(argc < arity); - debugf("> create_closure(%ld, %ld, %ld, %ld)\n", callee, arity, argc, argv_); - GCClosure *closure = gc_alloc_closure_base(callee, arity, argc); int64_t *argv = (int64_t*) argv_; for (int64_t i = 0; i < argc; i++) { closure->args[i] = argv[i]; } - debugf("< create_closure() -> %ld\n", (int64_t) closure); return (int64_t) closure; } GCClosure *copy_closure(GCClosure *closure) { - debugf("> copy_closure(%ld)\n", (int64_t) closure); - GCClosure *closure2 = gc_alloc_closure_base(closure->callee, closure->arity, closure->argc); for (int64_t i = 0; i < closure->argc; i++) { closure2->args[i] = closure->args[i]; } - debugf("< copy_closure() -> %ld\n", (int64_t) closure2); - return closure2; } int64_t closure_apply(int64_t closure_, int64_t argc, int64_t argv_) { - debugf("> closure_apply(%ld, %ld, %ld)\n", closure_, argc, argv_); + debugf("> closure_apply\n"); + debugf(" orig: "); + debug_print_value(closure_); int64_t *argv = (int64_t*) argv_; GCClosure *closure = copy_closure((GCClosure *)closure_); - debugf(" closure_apply: closure stats %ld %ld %ld\n", closure->callee, closure->arity, closure->argc); int64_t current_argc = closure->argc; for (int64_t i = 0; i < argc; i++) { - debugf(" closure_apply: arg %ld %ld is %ld in the orig\n", i, argv[i], i + current_argc); closure->args[i + current_argc] = argv[i]; closure->argc++; } + debugf(" applied: "); + debug_print_value((int64_t)closure); + if (closure->argc >= closure->arity) { - debugf(" closure_apply: calling %ld %ld %ld\n", closure->callee, closure->arity, (int64_t) closure->args); + debugf(" closure_apply: calling\n"); return call_function((void*) closure->callee, closure->arity, closure->args); } else { - debugf(" closure_apply: returning a new closure %ld %ld %ld\n", closure->callee, closure->arity, (int64_t) closure->args); + debugf(" closure_apply: returning a new closure"); return (int64_t) closure; } } From dd886093d5816f76531b7826c6661b1d0ea92869 Mon Sep 17 00:00:00 2001 From: "ignat.sergeev" Date: Fri, 23 Jan 2026 04:16:27 +0300 Subject: [PATCH 46/49] add gc status builtins --- DOOML/lib/codegen.ml | 3 +++ DOOML/lib/runtime/runtime.c | 17 +++++++++++++++++ 2 files changed, 20 insertions(+) diff --git a/DOOML/lib/codegen.ml b/DOOML/lib/codegen.ml index 36aa6938..e394ddc9 100644 --- a/DOOML/lib/codegen.ml +++ b/DOOML/lib/codegen.ml @@ -55,6 +55,9 @@ let emit_builtins () = declare_internal "gc_init" void_type [| |]; declare_internal "sp_init" void_type [| |]; declare_external "collect" void_type [| i64_type |]; + declare_external "get_heap_start" i64_type [| i64_type |]; + declare_external "get_heap_fin" i64_type [| i64_type |]; + declare_external "print_gc_status" void_type [| i64_type |]; ] |> Map.of_alist_exn in let binops =[ define_ibinop ~box_ret:true rt "+" i64_type build_add; define_ibinop ~box_ret:true rt "-" i64_type build_sub; diff --git a/DOOML/lib/runtime/runtime.c b/DOOML/lib/runtime/runtime.c index a493ceb7..975d5fd0 100644 --- a/DOOML/lib/runtime/runtime.c +++ b/DOOML/lib/runtime/runtime.c @@ -289,6 +289,23 @@ void collect(int64_t unit) { gc_collect(); } +int64_t get_heap_start(int64_t unit) { + return box_imm((int64_t)gc.from_bank_start); +} + +int64_t get_heap_fin(int64_t unit) { + return box_imm((int64_t)(gc.from_bank_start + gc.from_bank_size)); +} + +void print_gc_status(int64_t unit) { + printf("GC status\n"); + printf("Bank index: %u\n", gc.stats.bank_idx); + printf("Bank capacity: %u\n", gc.from_bank_size); + printf("Allocated: %u\n", gc.stats.size); + printf("Total allocated: %u\n", gc.stats.allocated_size); + printf("GC runs: %ld\n", gc.stats.runs); +} + int64_t create_tuple(int64_t size, int64_t init) { GCTuple *tuple = gc_alloc_tuple_base(size); for (int64_t i = 0; i < size; i++) { From 9667502a00f048354ba0472b2f50c6b9881e787f Mon Sep 17 00:00:00 2001 From: "ignat.sergeev" Date: Fri, 23 Jan 2026 05:03:55 +0300 Subject: [PATCH 47/49] unbox int when printing debug info --- DOOML/lib/runtime/runtime.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DOOML/lib/runtime/runtime.c b/DOOML/lib/runtime/runtime.c index 975d5fd0..7944908b 100644 --- a/DOOML/lib/runtime/runtime.c +++ b/DOOML/lib/runtime/runtime.c @@ -117,7 +117,7 @@ void sp_init() { void print_obj_helper(int64_t ptr) { if (is_imm(ptr)) { - printf("int %ld", ptr); + printf("int %ld", unbox(ptr)); return; } GCObjTag tag = ((GCObjHeader *)ptr)->tag; From d9072c97040abd8d90b8d7210b52316bb97b07e6 Mon Sep 17 00:00:00 2001 From: "ignat.sergeev" Date: Fri, 23 Jan 2026 05:09:47 +0300 Subject: [PATCH 48/49] add gc status builtins to middleend --- DOOML/lib/builtin.ml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/DOOML/lib/builtin.ml b/DOOML/lib/builtin.ml index 92dded8e..7d2137a0 100644 --- a/DOOML/lib/builtin.ml +++ b/DOOML/lib/builtin.ml @@ -20,5 +20,8 @@ let all = ; { name = "print_int"; arity = 1 } ; { name = "tuple_nth"; arity = 2 } ; { name = "collect"; arity = 1 } + ; { name = "get_heap_start"; arity = 1 } + ; { name = "get_heap_fin"; arity = 1 } + ; { name = "print_gc_status"; arity = 1 } ] ;; From 34b60dc37ff2d9a1aa82888c8b0375067bd110a3 Mon Sep 17 00:00:00 2001 From: "ignat.sergeev" Date: Fri, 23 Jan 2026 05:19:00 +0300 Subject: [PATCH 49/49] mark fwd before copying args or fields (because we can) --- DOOML/lib/runtime/runtime.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/DOOML/lib/runtime/runtime.c b/DOOML/lib/runtime/runtime.c index 7944908b..05655fa7 100644 --- a/DOOML/lib/runtime/runtime.c +++ b/DOOML/lib/runtime/runtime.c @@ -221,6 +221,9 @@ int64_t gc_mark_and_copy(int64_t ptr) { GCClosure *closure = (GCClosure *)ptr; GCClosure *closure2 = gc_alloc_closure_base(closure->callee, closure->arity, closure->argc); + // rewrites header and callee + gc_make_fwd(ptr, (int64_t)closure2); + for (int64_t i = 0; i < closure2->argc; i++) { int64_t arg = closure->args[i]; if (is_imm(arg)) { @@ -230,7 +233,6 @@ int64_t gc_mark_and_copy(int64_t ptr) { } } - gc_make_fwd(ptr, (int64_t)closure2); return (int64_t)closure2; } @@ -238,6 +240,9 @@ int64_t gc_mark_and_copy(int64_t ptr) { GCTuple *tuple = (GCTuple *)ptr; GCTuple *tuple2 = gc_alloc_tuple_base(tuple->size); + // rewrites header and size + gc_make_fwd(ptr, (int64_t)tuple2); + for (int64_t i = 0; i < tuple2->size; i++) { int64_t field = tuple->fields[i]; if (is_imm(field)) { @@ -247,7 +252,6 @@ int64_t gc_mark_and_copy(int64_t ptr) { } } - gc_make_fwd(ptr, (int64_t)tuple2); return (int64_t)tuple2; }