diff --git a/AML/.gitignore b/AML/.gitignore new file mode 100644 index 00000000..0e5f1e4b --- /dev/null +++ b/AML/.gitignore @@ -0,0 +1,3 @@ +/_build +/_coverage + diff --git a/AML/.ocamlformat b/AML/.ocamlformat new file mode 100644 index 00000000..04d5660b --- /dev/null +++ b/AML/.ocamlformat @@ -0,0 +1,2 @@ +version=0.27.0 +profile=janestreet diff --git a/AML/AML.opam b/AML/AML.opam new file mode 100644 index 00000000..1a3afca5 --- /dev/null +++ b/AML/AML.opam @@ -0,0 +1,40 @@ +# This file is generated by dune, edit dune-project instead +opam-version: "2.0" +synopsis: "A ML compiler" +maintainer: ["suvorovrain" "," "kinokotakenoko9"] +authors: ["suvorovrain" "," "kinokotakenoko9"] +license: ["LGPL-2.1-or-later" "WITH" "OCaml-LGPL-linking-exception"] +homepage: "https://github.com/suvorovrain/AML" +bug-reports: "https://github.com/suvorovrain/AML/issues" +depends: [ + "dune" {>= "3.8" & = "3.19.1"} + "base" + "angstrom" {= "0.16.0"} + "qcheck-core" + "ppx_inline_test" {with-test} + "ppx_deriving" + "ppx_deriving_qcheck" {= "0.6"} + "ppx_expect" + "bisect_ppx" + "odoc" {with-doc} +] +build: [ + ["dune" "subst"] {dev} + [ + "dune" + "build" + "-p" + name + "-j" + jobs + "@install" + "@runtest" {with-test} + "@doc" {with-doc} + ] +] +dev-repo: "git+https://github.com/suvorovrain/AML.git" +pin-depends: [ + ["ppx_deriving_qcheck.0.6" "git+https://github.com/c-cube/qcheck.git#42429bf06ba12373cad02b1404f50d0ad6238af5"] +] + +# Don't edit '*.opam' file manually. Use 'dune build @install' \ No newline at end of file diff --git a/AML/AML.opam.template b/AML/AML.opam.template new file mode 100644 index 00000000..1f0a4807 --- /dev/null +++ b/AML/AML.opam.template @@ -0,0 +1,5 @@ +pin-depends: [ + ["ppx_deriving_qcheck.0.6" "git+https://github.com/c-cube/qcheck.git#42429bf06ba12373cad02b1404f50d0ad6238af5"] +] + +# Don't edit '*.opam' file manually. Use 'dune build @install' \ No newline at end of file diff --git a/AML/Makefile b/AML/Makefile new file mode 100644 index 00000000..59f5bf0a --- /dev/null +++ b/AML/Makefile @@ -0,0 +1,30 @@ +.PHONY: all test +all: + dune build + +test: + dune test + +TEST_COV_D = /tmp/cov +COVERAGE_OPTS = --coverage-path $(TEST_COV_D) --expect lib/ + +fmt: + dune build @fmt --auto-promote + +lint: + dune build @lint --force + +release: + dune build --profile=release + dune runtest --profile=release + +.PHONY: test_coverage coverage +test_coverage: coverage +coverage: + $(RM) -r $(TEST_COV_D) + mkdir -p $(TEST_COV_D) + BISECT_FILE=$(TEST_COV_D)/language dune runtest --no-print-directory \ + --instrument-with bisect_ppx --force + bisect-ppx-report html $(COVERAGE_OPTS) + bisect-ppx-report summary $(COVERAGE_OPTS) + @echo "Use 'xdg-open _coverage/index.html' to see coverage report" diff --git a/AML/bin/AML.ml b/AML/bin/AML.ml new file mode 100644 index 00000000..151b6a41 --- /dev/null +++ b/AML/bin/AML.ml @@ -0,0 +1,47 @@ +(** Copyright 2025-2026, Rodion Suvorov, Dmitriy Chirkov*) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open Codegen +open Format + +let usage_msg = "Usage: AML.exe " + +let read_file filename = + let ic = open_in filename in + let len = in_channel_length ic in + let s = really_input_string ic len in + close_in ic; + s +;; + +let write_file filename content = + let oc = open_out filename in + output_string oc content; + close_out oc +;; + +let parse_args = function + | [ input; output ] -> input, output + | _ -> + prerr_endline usage_msg; + exit 1 +;; + +let compile input_file output_file = + let src = read_file input_file in + let program = Parser.parse_str src in + let buf = Buffer.create 1024 in + let fmt = formatter_of_buffer buf in + codegen fmt program; + pp_print_flush fmt (); + write_file output_file (Buffer.contents buf); + Printf.printf "Generated: %s\n" output_file +;; + +let main input_file output_file = + let input_file, output_file = parse_args [ input_file; output_file ] in + compile input_file output_file +;; + +let () = main Sys.argv.(1) Sys.argv.(2) diff --git a/AML/bin/dune b/AML/bin/dune new file mode 100644 index 00000000..36f51a11 --- /dev/null +++ b/AML/bin/dune @@ -0,0 +1,3 @@ +(executable + (name AML) + (libraries stdio base Parser Inferencer Pprinter Codegen)) diff --git a/AML/dune-project b/AML/dune-project new file mode 100644 index 00000000..0a9c1f9a --- /dev/null +++ b/AML/dune-project @@ -0,0 +1,33 @@ +(lang dune 3.8) + +(name AML) + +(generate_opam_files true) + +(source + (github suvorovrain/AML)) + +(authors "suvorovrain" , "kinokotakenoko9") + +(maintainers "suvorovrain" , "kinokotakenoko9") + +(license LGPL-2.1-or-later WITH OCaml-LGPL-linking-exception) + +(package + (name AML) + (synopsis "A ML compiler") + (depends + (dune + (= "3.19.1")) + base + (angstrom + (= "0.16.0")) + qcheck-core + (ppx_inline_test :with-test) + ppx_deriving + (ppx_deriving_qcheck + (= "0.6")) + ppx_expect + bisect_ppx)) + +; See the complete stanza docs at https://dune.readthedocs.io/en/stable/dune-files.html#dune-project diff --git a/AML/lib/ast/ast.ml b/AML/lib/ast/ast.ml new file mode 100644 index 00000000..5deb8dc5 --- /dev/null +++ b/AML/lib/ast/ast.ml @@ -0,0 +1,245 @@ +(** Copyright 2025-2026, Rodion Suvorov, Dmitriy Chirkov*) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open QCheck +open Base +open Gen +open Stdlib + +type ident = string [@@deriving eq, show { with_path = false }] + +let gen_charc = map Char.chr (int_range (Char.code 'a') (Char.code 'z')) + +let is_not_keyword = function + | "let" + | "if" + | "then" + | "else" + | "in" + | "fun" + | "true" + | "false" + | "rec" + | "and" + | "function" + | "match" + | "with" + | "type" + | "of" -> false + | _ -> true +;; + +let rec gen_filtered_ident base_gen = + let open QCheck.Gen in + base_gen + >>= fun ident -> + if is_not_keyword ident then return ident else gen_filtered_ident base_gen +;; + +let gen_ident = + let base_gen = + map2 + (fun start_sym rest_sym -> Base.Char.to_string start_sym ^ rest_sym) + (oneof [ char_range 'A' 'Z'; char_range 'a' 'z'; return '_' ]) + (small_string + ~gen: + (oneof + [ char_range 'A' 'Z'; char_range 'a' 'z'; char_range '0' '9'; return '_' ])) + in + gen_filtered_ident base_gen +;; + +let gen_ident_uc = + let base_gen = + map2 + (fun start_sym rest_sym -> Base.Char.to_string start_sym ^ rest_sym) + (char_range 'A' 'Z') + (small_string + ~gen: + (oneof + [ char_range 'A' 'Z'; char_range 'a' 'z'; char_range '0' '9'; return '_' ])) + in + gen_filtered_ident base_gen +;; + +let gen_ident_lc include_us = + let start_sym = + if include_us then oneof [ char_range 'a' 'z'; return '_' ] else char_range 'a' 'z' + in + let base_gen = + map2 + (fun start_sym rest_sym -> Base.Char.to_string start_sym ^ rest_sym) + start_sym + (small_string + ~gen: + (oneof + [ char_range 'A' 'Z'; char_range 'a' 'z'; char_range '0' '9'; return '_' ])) + in + gen_filtered_ident base_gen +;; + +module List1 = struct + type 'a t = 'a * ('a list[@gen list_size (int_bound 5) gen_a]) + [@@deriving eq, show { with_path = false }, qcheck] +end + +module List2 = struct + type 'a t = 'a * 'a * ('a list[@gen list_size (int_bound 5) gen_a]) + [@@deriving eq, show { with_path = false }, qcheck] +end + +module Constant = struct + type t = + | Const_integer of (int[@gen small_nat]) (** integer as [52] *) + | Const_char of (char[@gen gen_charc]) (** char as ['w'] *) + | Const_string of (string[@gen small_string ~gen:gen_charc]) + (** string as ["Kakadu"] *) + [@@deriving eq, show { with_path = false }, qcheck] +end + +module TypeExpr = struct + type t = + | Type_arrow of t * t + (** [Type_arrow(T1, T2)] represents: + [T1 -> T2] *) + | Type_var of (ident[@gen gen_ident]) + | Type_tuple of t List2.t (** [Type_tuple([T1, T2, ... Tn])] *) + | Type_construct of ident * t list + (** [Type_construct(lident, l)] represents: + - [tconstr] when [l=[]], + - [T tconstr] when [l=[T]], + - [(T1, ..., Tn) tconstr] when [l=[T1 ; ... ; Tn]]. *) + [@@deriving eq, show { with_path = false }, qcheck] +end + +module Pattern = struct + type t = + | Pat_constraint of t * (TypeExpr.t[@gen TypeExpr.gen_sized (n / 2)]) + (** Pattern [(P : T)] *) + | Pat_any (** The pattern [_]. *) + | Pat_var of (ident[@gen gen_ident_lc false]) (** A variable pattern such as [x] *) + | Pat_constant of Constant.t (** Patterns such as [52], ['w'], ["uwu"] *) + | Pat_tuple of t List2.t (** Patterns [(P1, ..., Pn)]. *) + | Pat_construct of (ident[@gen gen_ident_uc]) * t option + (** [Pat_construct(C, args)] represents: + - [C] when [args] is [None], + - [C P] when [args] is [Some (P)] + - [C (P1, ..., Pn)] when [args] is + [Some (Pat_tuple [P1; ...; Pn])] *) + [@@deriving eq, show { with_path = false }, qcheck] +end + +module Expression = struct + type rec_flag = + | Nonrecursive + | Recursive + [@@deriving eq, show { with_path = false }, qcheck] + + type 'expr value_binding = + { pat : Pattern.t + ; expr : 'expr + } + [@@deriving eq, show { with_path = false }] + + let gen_value_binding gen_expr n = + map2 (fun pat expr -> { pat; expr }) (Pattern.gen_sized (n / 2)) (gen_expr (n / 2)) + ;; + + type 'expr case = + { first : Pattern.t + ; second : 'expr + } + [@@deriving eq, show { with_path = false }] + + let gen_case gen_expr n = + map2 + (fun first second -> { first; second }) + (Pattern.gen_sized (n / 2)) + (gen_expr (n / 2)) + ;; + + type t = + | Exp_ident of (ident[@gen gen_ident_lc true]) (** Identifiers such as [x] *) + | Exp_constant of Constant.t (** Expressions constant such as [1], ['a'], ["true"]**) + | Exp_tuple of t List2.t (** Expressions [(E1, E2, ..., En)] *) + | Exp_function of (t case[@gen gen_case gen_sized (n / 2)]) List1.t + (** [Exp_function (P1, [P2; ...; Pn])] represents + [function P1 | ... | Pn] *) + | Exp_fun of (Pattern.t[@gen Pattern.gen_sized (n / 2)]) List1.t * t + (**[Exp_fun (P1, [P2; ...; Pn], E)] represents: + [fun P1 ... Pn -> E] *) + | Exp_apply of t * t + (** [Pexp_apply(E0, E1)] + represents [E0 E1]*) + | Exp_match of t * (t case[@gen gen_case gen_sized (n / 2)]) List1.t + (** [match E0 with P1 -> E1 || Pn -> En] *) + | Exp_constraint of t * (TypeExpr.t[@gen TypeExpr.gen_sized (n / 2)]) (** [(E : T)] *) + | Exp_if of t * t * t option (** [if E1 then E2 else E3] *) + | Exp_let of + rec_flag * (t value_binding[@gen gen_value_binding gen_sized (n / 2)]) List1.t * t + (** [Exp_let(flag, [(P1,E1) ; ... ; (Pn,En)], E)] represents: + - [let P1 = E1 and ... and Pn = EN in E] + when [flag] is [Nonrecursive], + - [let rec P1 = E1 and ... and Pn = EN in E] + when [flag] is [Recursive]. *) + | Exp_construct of (ident[@gen gen_ident_uc]) * t option + (** [Exp_construct(C, exp)] represents: + - [C] when [exp] is [None], + - [C E] when [exp] is [Some E], + - [C (E1, ..., En)] when [exp] is [Some (Exp_tuple[E1;...;En])] *) + [@@deriving eq, show { with_path = false }, qcheck] +end + +module Structure = struct + type structure_item = + | Str_eval of Expression.t + | Str_value of Expression.rec_flag * Expression.t Expression.value_binding List1.t + (** [Str_value(rec, [(P1, E1 ; ... ; (Pn, En))])] represents: + - [let P1 = E1 and ... and Pn = EN] + when [rec] is [Nonrecursive], + - [let rec P1 = E1 and ... and Pn = EN ] + when [rec] is [Recursiv e ee]. *) + | Str_adt of ident list * ident * (ident * TypeExpr.t option) List1.t + (** [Str_type(C0, [(C1, [(T11; T12; ... ; T1n_1)]); (C2, [(T21;T22; ... ; T2n_2)]); ... ; + (Cm, [(Tm1;Tm2; ... ; Tmn_n)]) ])] represents: + + [type C0 = + | C1 of T11 * ... * T1n_1 + | ... + | Cm of Tm1 * ... * Tmn_n + ] + + n_i: [n_i >= 0] + Invariant: [m > 0] *) + [@@deriving eq, show { with_path = false }] + + let gen_structure_item n = + frequency + [ 0, map (fun expr -> Str_eval expr) (Expression.gen_sized (n / 2)) + ; ( 0 + , let* rec_flag = + oneof [ return Expression.Nonrecursive; return Expression.Recursive ] + in + let* bind1 = Expression.gen_value_binding Expression.gen_sized (n / 2) in + let* bindl = + small_list (Expression.gen_value_binding Expression.gen_sized (n / 2)) + in + return (Str_value (rec_flag, (bind1, bindl))) ) + ; ( 1 + , let* tparam = small_list (gen_ident_lc true) in + let* idt = gen_ident_lc true in + let* cons1 = Gen.pair gen_ident_uc (Gen.option (TypeExpr.gen_sized (n / 20))) in + let* consl = + small_list (Gen.pair gen_ident_uc (Gen.option (TypeExpr.gen_sized (n / 20)))) + in + return (Str_adt (tparam, idt, (cons1, consl))) ) + ] + ;; +end + +type program = Structure.structure_item list [@@deriving eq, show { with_path = false }] + +module Program = struct + let gen_program n = list_size (int_bound 6) (Structure.gen_structure_item (n / 2)) +end diff --git a/AML/lib/ast/ast.mli b/AML/lib/ast/ast.mli new file mode 100644 index 00000000..66604e6b --- /dev/null +++ b/AML/lib/ast/ast.mli @@ -0,0 +1,194 @@ +(** Copyright 2025-2026, Rodion Suvorov, Dmitriy Chirkov*) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +type ident = string + +val equal_ident : ident -> ident -> bool +val pp_ident : Format.formatter -> ident -> unit +val show_ident : ident -> string +val gen_charc : char QCheck.Gen.t +val is_not_keyword : string -> bool +val gen_filtered_ident : string QCheck.Gen.t -> string QCheck.Gen.t +val gen_ident : string QCheck.Gen.t +val gen_ident_uc : string QCheck.Gen.t +val gen_ident_lc : bool -> string QCheck.Gen.t + +module List1 : sig + type 'a t = 'a * 'a list + + val equal : ('a -> 'a -> bool) -> 'a t -> 'a t -> bool + val pp : (Format.formatter -> 'a -> unit) -> Format.formatter -> 'a t -> unit + val show : (Format.formatter -> 'a -> unit) -> 'a t -> ident + val gen : 'a QCheck.Gen.t -> ('a * 'a list) QCheck.Gen.t + val arb : 'a QCheck.Gen.t -> ('a * 'a list) QCheck.arbitrary +end + +module List2 : sig + type 'a t = 'a * 'a * 'a list + + val equal : ('a -> 'a -> bool) -> 'a t -> 'a t -> bool + val pp : (Format.formatter -> 'a -> unit) -> Format.formatter -> 'a t -> unit + val show : (Format.formatter -> 'a -> unit) -> 'a t -> ident + val gen : 'a QCheck.Gen.t -> ('a * 'a * 'a list) QCheck.Gen.t + val arb : 'a QCheck.Gen.t -> ('a * 'a * 'a list) QCheck.arbitrary +end + +module Constant : sig + type t = + | Const_integer of int (** Integer constant. *) + | Const_char of char (** Character constant. *) + | Const_string of ident (** String constant. *) + + val equal : t -> t -> bool + val pp : Format.formatter -> t -> unit + val show : t -> ident + val gen : t QCheck.Gen.t + val arb : t QCheck.arbitrary +end + +module TypeExpr : sig + type t = + | Type_arrow of t * t (** Represents a function type: [T1 -> T2]. *) + | Type_var of ident (** Represents a type variable: ['a]. *) + | Type_tuple of t List2.t (** Represents a tuple type: [(T1, T2, ..., Tn)]. *) + | Type_construct of ident * t list + (** Represents a type constructor with arguments: [C T1 ... Tn]. *) + + val equal : t -> t -> bool + val pp : Format.formatter -> t -> unit + val show : t -> ident + val gen_sized : int -> t QCheck.Gen.t + val gen : t QCheck.Gen.t + val arb_sized : int -> t QCheck.arbitrary + val arb : t QCheck.arbitrary +end + +module Pattern : sig + type t = + | Pat_constraint of t * TypeExpr.t (** A pattern with a type constraint: [(P : T)]. *) + | Pat_any (** The wildcard pattern [_]. *) + | Pat_var of ident (** A variable pattern, such as [x]. *) + | Pat_constant of Constant.t + (** A constant pattern, such as [1], ["text"], or ['t']. *) + | Pat_tuple of t List2.t (** A tuple pattern, such as [(P1, P2, ..., Pn)]. *) + | Pat_construct of ident * t option + (** A constructor pattern, such as [C] or [C P]. *) + + val equal : t -> t -> bool + val pp : Format.formatter -> t -> unit + val show : t -> ident + val gen_sized : int -> t QCheck.Gen.t + val gen : t QCheck.Gen.t + val arb_sized : int -> t QCheck.arbitrary + val arb : t QCheck.arbitrary +end + +module Expression : sig + type rec_flag = + | Nonrecursive (** zanuda is zanuda *) + | Recursive (** zanuda is zanuda *) + + val equal_rec_flag : rec_flag -> rec_flag -> bool + val pp_rec_flag : Format.formatter -> rec_flag -> unit + val show_rec_flag : rec_flag -> ident + val gen_rec_flag : rec_flag QCheck.Gen.t + val arb_rec_flag : rec_flag QCheck.arbitrary + + type 'expr value_binding = + { pat : Pattern.t + ; expr : 'expr + } + + val equal_value_binding + : ('expr -> 'expr -> bool) + -> 'expr value_binding + -> 'expr value_binding + -> bool + + val pp_value_binding + : (Format.formatter -> 'expr -> unit) + -> Format.formatter + -> 'expr value_binding + -> unit + + val show_value_binding + : (Format.formatter -> 'expr -> unit) + -> 'expr value_binding + -> ident + + val gen_value_binding : (int -> 'a QCheck.Gen.t) -> int -> 'a value_binding QCheck.Gen.t + + type 'expr case = + { first : Pattern.t + ; second : 'expr + } + + val equal_case : ('expr -> 'expr -> bool) -> 'expr case -> 'expr case -> bool + + val pp_case + : (Format.formatter -> 'expr -> unit) + -> Format.formatter + -> 'expr case + -> unit + + val show_case : (Format.formatter -> 'expr -> unit) -> 'expr case -> ident + val gen_case : (int -> 'a QCheck.Gen.t) -> int -> 'a case QCheck.Gen.t + + type t = + | Exp_ident of ident (** Identifiers such as [x] and [M.x]. *) + | Exp_constant of Constant.t + (** Expressions with constants such as [1], ['a'], ["true"]. *) + | Exp_tuple of t List2.t (** A tuple expression, such as [(E1, E2, ..., En)]. *) + | Exp_function of t case List1.t + (** A function with pattern matching, such as [function P1 -> E1 | ... | Pn -> En]. *) + | Exp_fun of Pattern.t List1.t * t + (** A function expression, such as [fun P1 ... Pn -> E]. *) + | Exp_apply of t * t (** Function application, such as [E0 E1]. *) + | Exp_match of t * t case List1.t + (** A match expression, such as [match E0 with P1 -> E1 | ... | Pn -> En]. *) + | Exp_constraint of t * TypeExpr.t (** A type constraint, such as [(E : T)]. *) + | Exp_if of t * t * t option + (** An if-then-else expression, such as [if E1 then E2 else E3]. *) + | Exp_let of rec_flag * t value_binding List1.t * t + (** A let-binding, such as [let P1 = E1 and ... and Pn = En in E]. *) + | Exp_construct of ident * t option + (** A constructor expression, such as [C] or [C E]. *) + + val equal : t -> t -> bool + val pp : Format.formatter -> t -> unit + val show : t -> ident + val gen_sized : int -> t QCheck.Gen.t + val gen : t QCheck.Gen.t + val arb_sized : int -> t QCheck.arbitrary + val arb : t QCheck.arbitrary +end + +module Structure : sig + type structure_item = + | Str_eval of Expression.t (** An evaluated expression, such as [E]. *) + | Str_value of Expression.rec_flag * Expression.t Expression.value_binding List1.t + (** A let-binding, such as: + - [let P1 = E1 and ... and Pn = En] + when [rec] is [rec_flag.Nonrecursive]. + - [let rec P1 = E1 and ... and Pn = En] + when [rec] is [rec_flag.Recursive]. *) + | Str_adt of ident list * ident * (ident * TypeExpr.t option) List1.t + (** A type declaration for an algebraic data type (ADT), + such as [type t1 = ... | ... | tn = ...]. *) + + val equal_structure_item : structure_item -> structure_item -> bool + val pp_structure_item : Format.formatter -> structure_item -> unit + val show_structure_item : structure_item -> ident + val gen_structure_item : int -> structure_item QCheck.Gen.t +end + +type program = Structure.structure_item list + +val equal_program : program -> program -> bool +val pp_program : Format.formatter -> program -> unit +val show_program : program -> string + +module Program : sig + val gen_program : int -> Structure.structure_item list QCheck.Gen.t +end diff --git a/AML/lib/ast/dune b/AML/lib/ast/dune new file mode 100644 index 00000000..10718e04 --- /dev/null +++ b/AML/lib/ast/dune @@ -0,0 +1,8 @@ +(library + (name Ast) + (public_name AML.Ast) + (libraries base qcheck-core qcheck-core.runner) + (preprocess + (pps ppx_deriving.show ppx_deriving.eq ppx_deriving_qcheck)) + (instrumentation + (backend bisect_ppx))) diff --git a/AML/lib/codegen/codegen.ml b/AML/lib/codegen/codegen.ml new file mode 100644 index 00000000..594f0390 --- /dev/null +++ b/AML/lib/codegen/codegen.ml @@ -0,0 +1,189 @@ +(** Copyright 2025-2026, Rodion Suvorov, Dmitriy Chirkov*) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open Base +open Machine +open Ast +open Ast.Expression +open Ast.Pattern + +type location = + | Loc_reg of reg + | Loc_mem of reg + +type env = (ident, location, String.comparator_witness) Map.t + +type cg_state = + { env : env + ; frame_offset : int + ; label_id : int + } + +module Codegen = struct + type 'a t = Cg of (cg_state -> 'a * cg_state) + + let run (state : cg_state) (Cg f) : 'a * cg_state = f state + let return x = Cg (fun state -> x, state) + + let ( let* ) (Cg m) f = + Cg + (fun state -> + let res, new_state = m state in + let (Cg m') = f res in + m' new_state) + ;; + + let get = Cg (fun state -> state, state) +end + +open Codegen + +let get_state = Cg (fun state -> state, state) +let set_state new_state = Cg (fun _ -> (), new_state) + +let fresh_label prefix = + let* state = get_state in + let id = state.label_id in + let* () = set_state { state with label_id = id + 1 } in + return (Printf.sprintf ".L%s_%d" prefix id) +;; + +let allocate_local_var id = + let* state = get_state in + let new_offset = state.frame_offset + 8 in + let location = ROff (-new_offset, fp) in + let new_env = Map.set state.env ~key:id ~data:(Loc_mem location) in + let* () = set_state { state with frame_offset = new_offset; env = new_env } in + return location +;; + +let lookup_var id = + let* state = get_state in + match Map.find state.env id with + | Some loc -> return loc + | None -> failwith ("Unbound variable: " ^ id) +;; + +let gen_bin_op op dst r1 r2 = + match op with + | "+" -> emit add dst r1 r2 + | "-" -> emit sub dst r1 r2 + | "*" -> emit mul dst r1 r2 + | "<=" -> + emit slt dst t1 t0; + emit xori dst dst 1 + | _ -> failwith ("Unsupported binary operator: " ^ op) +;; + +let rec gen_expr (dst : reg) (expr : Ast.Expression.t) : unit Codegen.t = + match expr with + | Exp_constant (Const_integer i) -> return (emit li dst i) + | Exp_ident id -> + let* loc = lookup_var id in + return + (match loc with + | Loc_reg r -> emit mv dst r + | Loc_mem m -> emit ld dst m) + | Exp_apply (f, arg) -> + (match f, arg with + | Exp_ident op, Exp_tuple (arg1, arg2, []) when is_not_keyword op -> + let* () = gen_expr t0 arg1 in + let* () = gen_expr t1 arg2 in + return (gen_bin_op op dst t0 t1) + | Exp_ident fname, arg_exp -> + let* state = get in + let live_caller_regs = + Map.to_alist state.env + |> List.filter_map ~f:(fun (_, loc) -> + match loc with + | Loc_reg ((A _ | T _) as r) -> Some r + | _ -> None) + |> List.dedup_and_sort ~compare:Poly.compare + in + let* () = + return + (List.iter live_caller_regs ~f:(fun r -> + emit addi sp sp (-8); + emit sd r (ROff (0, sp)))) + in + let* () = gen_expr a0 arg_exp in + let* () = return (emit jal ra fname) in + let* () = if not (equal_reg dst a0) then return (emit mv dst a0) else return () in + let* () = + return + (List.iter (List.rev live_caller_regs) ~f:(fun r -> + emit ld r (ROff (0, sp)); + emit addi sp sp 8)) + in + return () + | _ -> failwith "TODO: general function application not done yet") + | Exp_let (_, ({ pat = Pat_var id; expr }, []), body) -> + let* () = gen_expr t0 expr in + let* loc = allocate_local_var id in + let* () = return (emit sd t0 loc) in + gen_expr dst body + | Exp_if (cond, then_exp, Some else_exp) -> + let* else_label = fresh_label "else" in + let* end_label = fresh_label "endif" in + let* () = gen_expr t0 cond in + let* () = return (emit beq t0 x0 else_label) in + let* () = gen_expr dst then_exp in + let* () = return (emit j end_label) in + let* () = return (emit label else_label) in + let* () = gen_expr dst else_exp in + return (emit label end_label) + | _ -> failwith "TODO: expr" +;; + +let rec count_local_vars = function + | Exp_let (_, _, body) -> 1 + count_local_vars body + | Exp_if (c, t, Some e) -> count_local_vars c + count_local_vars t + count_local_vars e + | Exp_apply (f, a) -> count_local_vars f + count_local_vars a + | _ -> 0 +;; + +let gen_func name args body = + let is_main = String.equal name "main" in + let func_label = if is_main then "_start" else name in + emit directive (Printf.sprintf ".globl %s" func_label); + emit directive (Printf.sprintf ".type %s, @function" func_label); + emit label func_label; + let locals_count = count_local_vars body in + let stack_size = 16 + (locals_count * 8) in + emit addi sp sp (-stack_size); + emit sd ra (ROff (stack_size - 8, sp)); + emit sd fp (ROff (stack_size - 16, sp)); + emit addi fp sp stack_size; + let f i env = function + | Pat_var id when i < 8 -> Map.set env ~key:id ~data:(Loc_reg (A i)) + | _ -> failwith "not yet" + in + let initial_env = List.foldi args ~init:(Map.empty (module String)) ~f in + let initial_cg_state = { env = initial_env; frame_offset = 16; label_id = 0 } in + let (), _final_state = Codegen.run initial_cg_state (gen_expr a0 body) in + let () = emit label (name ^ "_end") in + let () = emit ld ra (ROff (stack_size - 8, sp)) in + let () = emit ld fp (ROff (stack_size - 16, sp)) in + let () = emit addi sp sp stack_size in + let () = + if is_main + then ( + emit li (A 7) 93; + emit ecall) + else emit ret + in + () +;; + +let codegen ppf (s : Structure.structure_item list) = + let open Structure in + emit directive ".text"; + List.iter s ~f:(function + | Str_value (Recursive, ({ pat = Pat_var f; expr = Exp_fun ((p, ps), body) }, [])) -> + gen_func f (p :: ps) body + | Str_value (Nonrecursive, ({ pat = Pat_var f; expr = body }, [])) -> + gen_func f [] body + | _ -> failwith "Unsupported toplevel structure item "); + flush_queue ppf +;; diff --git a/AML/lib/codegen/codegen.mli b/AML/lib/codegen/codegen.mli new file mode 100644 index 00000000..842b5898 --- /dev/null +++ b/AML/lib/codegen/codegen.mli @@ -0,0 +1,7 @@ +(** Copyright 2025-2026, Rodion Suvorov, Dmitriy Chirkov*) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open Ast + +val codegen : Format.formatter -> Structure.structure_item list -> unit diff --git a/AML/lib/codegen/dune b/AML/lib/codegen/dune new file mode 100644 index 00000000..cb24d664 --- /dev/null +++ b/AML/lib/codegen/dune @@ -0,0 +1,8 @@ +(library + (name Codegen) + (public_name AML.Codegen) + (libraries base Ast) + (preprocess + (pps ppx_deriving.eq)) + (instrumentation + (backend bisect_ppx))) diff --git a/AML/lib/codegen/machine.ml b/AML/lib/codegen/machine.ml new file mode 100644 index 00000000..7d9f13d9 --- /dev/null +++ b/AML/lib/codegen/machine.ml @@ -0,0 +1,117 @@ +(** Copyright 2025-2026, Rodion Suvorov, Dmitriy Chirkov*) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +type reg = + | X0 + | A of int + | RA + | SP + | T of int + | S of int + | ROff of int * reg +[@@deriving eq] + +let x0 = X0 +let ra = RA +let sp = SP +let fp = S 0 +let a0 = A 0 +let t0 = T 0 +let t1 = T 1 +let t2 = T 2 + +let rec pp_reg ppf = + let open Format in + function + | X0 -> fprintf ppf "x0" + | A n -> fprintf ppf "a%d" n + | T n -> fprintf ppf "t%d" n + | S n -> fprintf ppf "s%d" n + | RA -> fprintf ppf "ra" + | SP -> fprintf ppf "sp" + | ROff (n, r) -> fprintf ppf "%d(%a)" n pp_reg r +;; + +type instr = + | Addi of reg * reg * int + | Add of reg * reg * reg + | Sub of reg * reg * reg + | Mul of reg * reg * reg + | Slt of reg * reg * reg + | Xori of reg * reg * int + | Beq of reg * reg * string + | Bne of reg * reg * string + | Blt of reg * reg * string + | Jal of reg * string + | J of string + | Ret + | Ld of reg * reg + | Sd of reg * reg + | Li of reg * int + | Ecall + | Label of string + | Directive of string + +let pp_instr ppf = + let open Format in + function + | Addi (r1, r2, n) -> fprintf ppf "addi %a, %a, %d" pp_reg r1 pp_reg r2 n + | Add (r1, r2, r3) -> fprintf ppf "add %a, %a, %a" pp_reg r1 pp_reg r2 pp_reg r3 + | Sub (r1, r2, r3) -> fprintf ppf "sub %a, %a, %a" pp_reg r1 pp_reg r2 pp_reg r3 + | Mul (r1, r2, r3) -> fprintf ppf "mul %a, %a, %a" pp_reg r1 pp_reg r2 pp_reg r3 + | Slt (r1, r2, r3) -> fprintf ppf "slt %a, %a, %a" pp_reg r1 pp_reg r2 pp_reg r3 + | Xori (r1, r2, n) -> fprintf ppf "xori %a, %a, %d" pp_reg r1 pp_reg r2 n + | Beq (r1, r2, s) -> fprintf ppf "beq %a, %a, %s" pp_reg r1 pp_reg r2 s + | Bne (r1, r2, s) -> fprintf ppf "bne %a, %a, %s" pp_reg r1 pp_reg r2 s + | Blt (r1, r2, s) -> fprintf ppf "blt %a, %a, %s" pp_reg r1 pp_reg r2 s + | Jal (r1, s) -> fprintf ppf "jal %a, %s" pp_reg r1 s + | J s -> fprintf ppf "j %s" s + | Ret -> fprintf ppf "ret" + | Sd (r1, r2) -> fprintf ppf "sd %a, %a" pp_reg r1 pp_reg r2 + | Ld (r1, r2) -> fprintf ppf "ld %a, %a" pp_reg r1 pp_reg r2 + | Li (r1, n) -> fprintf ppf "li %a, %d" pp_reg r1 n + | Ecall -> fprintf ppf "ecall" + | Label s -> fprintf ppf "%s:" s + | Directive s -> fprintf ppf "%s" s +;; + +let addi k r1 r2 n = k @@ Addi (r1, r2, n) +let add k r1 r2 r3 = k @@ Add (r1, r2, r3) +let sub k r1 r2 r3 = k @@ Sub (r1, r2, r3) +let mul k r1 r2 r3 = k @@ Mul (r1, r2, r3) +let slt k r1 r2 r3 = k @@ Slt (r1, r2, r3) +let xori k r1 r2 n = k @@ Xori (r1, r2, n) +let beq k r1 r2 s = k @@ Beq (r1, r2, s) +let bne k r1 r2 s = k @@ Bne (r1, r2, s) +let blt k r1 r2 s = k @@ Blt (r1, r2, s) +let ecall k = k Ecall +let ret k = k Ret +let jal k r1 s = k @@ Jal (r1, s) +let j k s = k (J s) +let ld k rd rs = k (Ld (rd, rs)) +let sd k rs rd = k (Sd (rs, rd)) +let li k r n = k (Li (r, n)) +let label k s = k (Label s) +let directive k s = k (Directive s) +let mv k rd rs = k @@ Addi (rd, rs, 0) +let code : (instr * string) Queue.t = Queue.create () +let emit ?(comm = "") instr = instr (fun i -> Queue.add (i, comm) code) + +let rec flush_queue ppf = + if Queue.is_empty code + then () + else + let open Format in + let i, comm = Queue.pop code in + (match i with + | Label _ -> + fprintf ppf "%a" pp_instr i; + if comm <> "" then fprintf ppf " # %s" comm; + fprintf ppf "\n" + | _ -> + fprintf ppf " %a" pp_instr i; + if comm <> "" then fprintf ppf " # %s" comm; + fprintf ppf "\n"); + flush_queue ppf +;; diff --git a/AML/lib/codegen/machine.mli b/AML/lib/codegen/machine.mli new file mode 100644 index 00000000..4e418347 --- /dev/null +++ b/AML/lib/codegen/machine.mli @@ -0,0 +1,67 @@ +(** Copyright 2025-2026, Rodion Suvorov, Dmitriy Chirkov*) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +type reg = + | X0 + | A of int + | RA + | SP + | T of int + | S of int + | ROff of int * reg +[@@deriving eq] + +val x0 : reg +val ra : reg +val sp : reg +val fp : reg +val a0 : reg +val t0 : reg +val t1 : reg +val t2 : reg +val pp_reg : Format.formatter -> reg -> unit + +type instr = + | Addi of reg * reg * int + | Add of reg * reg * reg + | Sub of reg * reg * reg + | Mul of reg * reg * reg + | Slt of reg * reg * reg + | Xori of reg * reg * int + | Beq of reg * reg * string + | Bne of reg * reg * string + | Blt of reg * reg * string + | Jal of reg * string + | J of string + | Ret + | Ld of reg * reg + | Sd of reg * reg + | Li of reg * int + | Ecall + | Label of string + | Directive of string + +val pp_instr : Format.formatter -> instr -> unit +val addi : (instr -> 'a) -> reg -> reg -> int -> 'a +val add : (instr -> 'a) -> reg -> reg -> reg -> 'a +val sub : (instr -> 'a) -> reg -> reg -> reg -> 'a +val mul : (instr -> 'a) -> reg -> reg -> reg -> 'a +val slt : (instr -> 'a) -> reg -> reg -> reg -> 'a +val xori : (instr -> 'a) -> reg -> reg -> int -> 'a +val beq : (instr -> 'a) -> reg -> reg -> string -> 'a +val bne : (instr -> 'a) -> reg -> reg -> string -> 'a +val blt : (instr -> 'a) -> reg -> reg -> string -> 'a +val ecall : (instr -> 'a) -> 'a +val ret : (instr -> 'a) -> 'a +val jal : (instr -> 'a) -> reg -> string -> 'a +val j : (instr -> 'a) -> string -> 'a +val ld : (instr -> 'a) -> reg -> reg -> 'a +val sd : (instr -> 'a) -> reg -> reg -> 'a +val li : (instr -> 'a) -> reg -> int -> 'a +val label : (instr -> 'a) -> string -> 'a +val directive : (instr -> 'a) -> string -> 'a +val mv : (instr -> 'a) -> reg -> reg -> 'a +val code : (instr * string) Queue.t +val emit : ?comm:string -> ((instr -> unit) -> 'a) -> 'a +val flush_queue : Format.formatter -> unit diff --git a/AML/lib/codegen/test/CTest.ml b/AML/lib/codegen/test/CTest.ml new file mode 100644 index 00000000..5153c558 --- /dev/null +++ b/AML/lib/codegen/test/CTest.ml @@ -0,0 +1,107 @@ +(** Copyright 2025-2026, Rodion Suvorov, Dmitriy Chirkov*) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open Parser +open Codegen +open Inferencer.Infer + +let run str = + match parse_str str with + | str -> + (match run_infer_program str env_with_things with + | Ok _ -> Format.printf "%a\n%!" codegen str + | Error _ -> Format.printf "Parsing error\n") +;; + +let%expect_test "binary operations" = + run + {| + let f = + let x = 52 + 52 in + let y = 52 - 52 in + let z = 52 * 52 in + let w = 52 <= 52 in + x + ;; + |}; + [%expect + {| + .text + .globl f + .type f, @function + f: + addi sp, sp, -48 + sd ra, 40(sp) + sd s0, 32(sp) + addi s0, sp, 48 + li t0, 52 + li t1, 52 + add t0, t0, t1 + sd t0, -24(s0) + li t0, 52 + li t1, 52 + sub t0, t0, t1 + sd t0, -32(s0) + li t0, 52 + li t1, 52 + mul t0, t0, t1 + sd t0, -40(s0) + li t0, 52 + li t1, 52 + slt t0, t1, t0 + xori t0, t0, 1 + sd t0, -48(s0) + ld a0, -24(s0) + f_end: + ld ra, 40(sp) + ld s0, 32(sp) + addi sp, sp, 48 + ret |}] +;; + +let%expect_test "some branches" = + run + {| + let f = + let x = 5 in + let y = 2 in + let z = 3 in + let w = 4 in + if x <= y then z else w + ;; + |}; + [%expect + {| + .text + .globl f + .type f, @function + f: + addi sp, sp, -48 + sd ra, 40(sp) + sd s0, 32(sp) + addi s0, sp, 48 + li t0, 5 + sd t0, -24(s0) + li t0, 2 + sd t0, -32(s0) + li t0, 3 + sd t0, -40(s0) + li t0, 4 + sd t0, -48(s0) + ld t0, -24(s0) + ld t1, -32(s0) + slt t0, t1, t0 + xori t0, t0, 1 + beq t0, x0, .Lelse_0 + ld a0, -40(s0) + j .Lendif_1 + .Lelse_0: + ld a0, -48(s0) + .Lendif_1: + f_end: + ld ra, 40(sp) + ld s0, 32(sp) + addi sp, sp, 48 + ret |}] +;; diff --git a/AML/lib/codegen/test/CTest.mli b/AML/lib/codegen/test/CTest.mli new file mode 100644 index 00000000..4375d184 --- /dev/null +++ b/AML/lib/codegen/test/CTest.mli @@ -0,0 +1,3 @@ +(** Copyright 2025-2026, Rodion Suvorov, Dmitriy Chirkov*) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) diff --git a/AML/lib/codegen/test/codegen.t b/AML/lib/codegen/test/codegen.t new file mode 100644 index 00000000..e6ba852f --- /dev/null +++ b/AML/lib/codegen/test/codegen.t @@ -0,0 +1,73 @@ +(** Copyright 2025-2026, Rodion Suvorov, Dmitriy Chirkov*) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + $ cat >fac.ml < let rec fac n = + > if n <= 1 + > then 1 + > else (let n1 = n-1 in + > let m = fac n1 in + > n*m) + > + > let main = fac 4 + > EOF + $ ../../../bin/AML.exe fac.ml fac.s + Generated: fac.s + $ cat fac.s + .text + .globl fac + .type fac, @function + fac: + addi sp, sp, -32 + sd ra, 24(sp) + sd s0, 16(sp) + addi s0, sp, 32 + addi t0, a0, 0 + li t1, 1 + slt t0, t1, t0 + xori t0, t0, 1 + beq t0, x0, .Lelse_0 + li a0, 1 + j .Lendif_1 + .Lelse_0: + addi t0, a0, 0 + li t1, 1 + sub t0, t0, t1 + sd t0, -24(s0) + addi sp, sp, -8 + sd a0, 0(sp) + ld a0, -24(s0) + jal ra, fac + addi t0, a0, 0 + ld a0, 0(sp) + addi sp, sp, 8 + sd t0, -32(s0) + addi t0, a0, 0 + ld t1, -32(s0) + mul a0, t0, t1 + .Lendif_1: + fac_end: + ld ra, 24(sp) + ld s0, 16(sp) + addi sp, sp, 32 + ret + .globl _start + .type _start, @function + _start: + addi sp, sp, -16 + sd ra, 8(sp) + sd s0, 0(sp) + addi s0, sp, 16 + li a0, 4 + jal ra, fac + main_end: + ld ra, 8(sp) + ld s0, 0(sp) + addi sp, sp, 16 + li a7, 93 + ecall + $ riscv64-linux-gnu-as -march=rv64gc fac.s -o fac.o + $ riscv64-linux-gnu-ld fac.o -o fac.elf + $ qemu-riscv64 fac.elf + [24] + diff --git a/AML/lib/codegen/test/dune b/AML/lib/codegen/test/dune new file mode 100644 index 00000000..bd2e23ef --- /dev/null +++ b/AML/lib/codegen/test/dune @@ -0,0 +1,12 @@ +(library + (name CodegenTest) + (libraries base stdio Ast Parser Pprinter Inferencer Codegen) + (preprocess + (pps ppx_expect)) + (inline_tests) + (instrumentation + (backend bisect_ppx))) + +(cram + (applies_to codegen) + (deps ../../../bin/AML.exe)) diff --git a/AML/lib/infer/dune b/AML/lib/infer/dune new file mode 100644 index 00000000..6ddcade9 --- /dev/null +++ b/AML/lib/infer/dune @@ -0,0 +1,6 @@ +(library + (name Inferencer) + (public_name AML.Inferencer) + (libraries Ast Pprinter) + (instrumentation + (backend bisect_ppx))) diff --git a/AML/lib/infer/infer.ml b/AML/lib/infer/infer.ml new file mode 100644 index 00000000..20e4a933 --- /dev/null +++ b/AML/lib/infer/infer.ml @@ -0,0 +1,856 @@ +(** Copyright 2025-2026, Rodion Suvorov, Dmitriy Chirkov*) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open Ast.TypeExpr +open InferTypes + +module MInfer = struct + open Base + + type 'a t = int -> int * ('a, InferTypes.error) Result.t + + let ( >>= ) : 'a 'b. 'a t -> ('a -> 'b t) -> 'b t = + fun m f st -> + let last, r = m st in + match r with + | Result.Error x -> last, Error x + | Ok a -> f a last + ;; + + let fail e st = st, Result.fail e + let return x last = last, Result.return x + let bind x ~f = x >>= f + + let ( >>| ) : 'a 'b. 'a t -> ('a -> 'b) -> 'b t = + fun x f st -> + match x st with + | st, Ok x -> st, Ok (f x) + | st, Result.Error e -> st, Result.Error e + ;; + + module Syntax = struct + let ( let* ) x f = bind x ~f + end + + module RList = struct + let fold_left xs ~init ~f = + Base.List.fold_left xs ~init ~f:(fun acc x -> + let open Syntax in + let* acc = acc in + f acc x) + ;; + + let fold_left2 xs xl ~init ~f = + Base.List.fold2 + ~f:(fun acc x l -> + let open Syntax in + let* acc = acc in + f acc x l) + ~init + xs + xl + ;; + + let fold_right xs ~init ~f = + Base.List.fold_right xs ~init ~f:(fun x acc -> + let open Syntax in + let* acc = acc in + f x acc) + ;; + end + + let fresh : int t = fun last -> last + 1, Result.Ok last + let run m = snd (m 0) +end + +module Type = struct + type t = Ast.TypeExpr.t + + let rec occurs_check tvar = function + | Type_var binder -> binder = tvar + | Type_arrow (l, r) -> occurs_check tvar l || occurs_check tvar r + | Type_tuple (t1, t2, t) -> + List.fold_left (fun acc h -> acc || occurs_check tvar h) false (t1 :: t2 :: t) + | Type_construct (_, ty) -> + List.fold_left (fun acc h -> acc || occurs_check tvar h) false ty + ;; + + let free_vars = + let rec helper acc = function + | Type_var binder -> VarSet.add binder acc + | Type_arrow (l, r) -> helper (helper acc l) r + | Type_tuple (t1, t2, t) -> List.fold_left helper acc (t1 :: t2 :: t) + | Type_construct (_, ty) -> List.fold_left helper acc ty + in + helper VarSet.empty + ;; +end + +module Substitution = struct + open MInfer + open MInfer.Syntax + open Base + + type t = (string, Type.t, Base.String.comparator_witness) Base.Map.t + + let empty = Map.empty (module Base.String) + + let singleton k v = + match k, v with + | a, Type_var b when String.equal a b -> return (Base.Map.empty (module Base.String)) + | _ -> + if Type.occurs_check k v + then fail (Occurs_check (k, v)) + else return (Base.Map.singleton (module Base.String) k v) + ;; + + let remove = Map.remove + + let apply sub = + let rec helper = function + | Type_var b as typ -> + (match Map.find sub b with + | Some b -> b + | None -> typ) + | Type_arrow (l, r) -> Type_arrow (helper l, helper r) + | Type_tuple (t1, t2, t) -> Type_tuple (helper t1, helper t2, List.map t ~f:helper) + | Type_construct (id, ty) -> Type_construct (id, List.map ty ~f:helper) + in + helper + ;; + + let fold mp init f = + Map.fold mp ~init ~f:(fun ~key:k ~data:vm acc -> + let* acc = acc in + f k vm acc) + ;; + + let rec unify l r = + match l, r with + | Type_var a, Type_var b when String.equal a b -> return empty + | Type_var b, t | t, Type_var b -> singleton b t + | Type_arrow (l1, r1), Type_arrow (l2, r2) -> + let* subs1 = unify l1 l2 in + let* subs2 = unify (apply subs1 r1) (apply subs1 r2) in + compose subs1 subs2 + | Type_tuple (l11, l12, l1), Type_tuple (l21, l22, l2) -> + (match + Base.List.fold2 + (l11 :: l12 :: l1) + (l21 :: l22 :: l2) + ~init:(return empty) + ~f:(fun acc t1 t2 -> + let* sub1 = acc in + let* sub2 = unify (apply sub1 t1) (apply sub1 t2) in + compose sub1 sub2) + with + | Ok sub -> sub + | _ -> fail (Unification_failed (l, r))) + | Type_construct (id1, ty1), Type_construct (id2, ty2) when String.equal id1 id2 -> + let* subs = + match + Base.List.fold2 ty1 ty2 ~init:(return empty) ~f:(fun acc t1 t2 -> + let* sub1 = acc in + let* sub2 = unify (apply sub1 t1) (apply sub1 t2) in + compose sub1 sub2) + with + | Ok sub -> sub + | _ -> fail (Unification_failed (l, r)) + in + return subs + | _ -> fail (Unification_failed (l, r)) + + and extend k v s = + match Map.find s k with + | None -> + let v = apply s v in + let* s2 = singleton k v in + fold s (return s2) (fun k v acc -> + let* acc = return acc in + let v = apply s2 v in + return (Map.update acc k ~f:(fun _ -> v))) + | Some v2 -> + let* s2 = unify v v2 in + compose s s2 + + and compose s1 s2 = fold s2 (return s1) extend + and compose_all ss = RList.fold_left ss ~init:(return empty) ~f:compose +end + +module Scheme = struct + type t = scheme + + let free_vars = function + | Forall (bs, t) -> VarSet.diff (Type.free_vars t) bs + ;; + + let apply subst (Forall (binder_set, typ)) = + let s2 = VarSet.fold (fun k s -> Substitution.remove s k) binder_set subst in + Forall (binder_set, Substitution.apply s2 typ) + ;; + + let pp_scheme fmt = function + | Forall (st, typ) -> + if VarSet.is_empty st + then + Format.fprintf + fmt + "%a" + (pprint_type ~poly_names_map:(Base.Map.empty (module Base.String))) + typ + else + Format.fprintf + fmt + "%a. %a" + VarSet.pp + st + (pprint_type ~poly_names_map:(Base.Map.empty (module Base.String))) + typ + ;; +end + +module TypeEnv = struct + open Base + + type t = (string, scheme, String.comparator_witness) Map.t + + let extend env name scheme = Map.set env ~key:name ~data:scheme + let empty = Map.empty (module String) + let fold f init mp = Map.fold mp ~init ~f:(fun ~key:k ~data:v acc -> f k v acc) + + let free_vars : t -> VarSet.t = + fold (fun _ s acc -> VarSet.union acc (Scheme.free_vars s)) VarSet.empty + ;; + + let apply s env = Map.map env ~f:(Scheme.apply s) + let find name xs = Map.find xs name + let find_exn name xs = Map.find_exn xs name + let remove = Base.Map.remove + + let pp_env fmt environment = + Map.iteri environment ~f:(fun ~key ~data -> + Stdlib.Format.fprintf fmt "%S: %a\n" key Scheme.pp_scheme data) + ;; +end + +open MInfer +open MInfer.Syntax + +let fresh_var = fresh >>| fun n -> Type_var (Int.to_string n) + +let instantiate : scheme -> Ast.TypeExpr.t MInfer.t = + fun (Forall (bs, t)) -> + VarSet.fold + (fun name typ -> + let* typ = typ in + let* f1 = fresh_var in + let* s = Substitution.singleton name f1 in + return (Substitution.apply s typ)) + bs + (return t) +;; + +let generalize : TypeEnv.t -> Type.t -> Scheme.t = + fun env ty -> + let free = VarSet.diff (Type.free_vars ty) (TypeEnv.free_vars env) in + Forall (free, ty) +;; + +open Ast.Constant +open Ast.Expression +open Ast.Pattern + +let rec infer_pat ~debug pat env = + match pat with + | Pat_any -> + let* fresh = fresh_var in + return (env, fresh) + | Pat_var ident -> + let* fresh = fresh_var in + let new_env = TypeEnv.extend env ident (Forall (VarSet.empty, fresh)) in + return (new_env, fresh) + | Pat_constant const -> + (match const with + | Const_char _ -> return (env, Type_construct ("char", [])) + | Const_integer _ -> return (env, Type_construct ("int", [])) + | Const_string _ -> return (env, Type_construct ("string", []))) + | Pat_tuple (pat1, pat2, rest) -> + let* env1, typ1 = infer_pat ~debug pat1 env in + let* env2, typ2 = infer_pat ~debug pat2 env1 in + let* env3, typ3 = + RList.fold_right + ~f:(fun pat acc -> + let* env_acc, typ_list = return acc in + let* env, typ = infer_pat ~debug pat env_acc in + return (env, typ :: typ_list)) + ~init:(return (env2, [])) + rest + in + return (env3, Type_tuple (typ1, typ2, typ3)) + | Pat_construct (name, pat) -> + (match TypeEnv.find name env with + | None -> fail (Unbound_variable name) + | Some (Forall (x, Type_arrow (arg, adt))) -> + let* typ = instantiate (Forall (x, Type_arrow (arg, adt))) in + (match pat with + | Some const_pat -> + let* patenv, typepat = infer_pat ~debug const_pat env in + let* uni_sub = Substitution.unify arg typepat in + let new_env = TypeEnv.apply uni_sub patenv in + return (new_env, Substitution.apply uni_sub adt) + | None -> return (env, typ)) + | Some el -> + let* typ = instantiate el in + return (env, typ)) + | Pat_constraint (pat, typ) -> + let* pat_env, pat_typ = infer_pat ~debug pat env in + let* uni_sub = Substitution.unify pat_typ typ in + let new_env = TypeEnv.apply uni_sub pat_env in + return (new_env, Substitution.apply uni_sub pat_typ) +;; + +let rec extend_helper env pat (Forall (binder_set, typ) as scheme) = + match pat, typ with + | Pat_var name, _ -> TypeEnv.extend env name scheme + | Pat_tuple (p1, p2, prest), Type_tuple (t1, t2, trest) -> + let new_env = + Base.List.fold2 + ~init:env + ~f:(fun env pat typ -> extend_helper env pat (Forall (binder_set, typ))) + (p1 :: p2 :: prest) + (t1 :: t2 :: trest) + in + (match new_env with + | Ok new_env -> new_env + | _ -> env) + | _ -> env +;; + +let add_names_rec env vb_list = + RList.fold_right + ~f:(fun vb acc -> + match vb with + | { pat = Pat_var name; _ } | { pat = Pat_constraint (Pat_var name, _); _ } -> + let* env_acc, fresh_acc = return acc in + let* fresh = fresh_var in + let env_acc = TypeEnv.extend env_acc name (Forall (VarSet.empty, fresh)) in + return (env_acc, fresh :: fresh_acc) + | _ -> fail Wrong_rec) + vb_list + ~init:(return (env, [])) +;; + +let infer_rest_vb ~debug env_acc sub_acc sub typ pat = + let* comp_sub = Substitution.compose sub_acc sub in + let new_env = TypeEnv.apply comp_sub env_acc in + let new_scheme = generalize new_env (Substitution.apply comp_sub typ) in + let* pat_env, pat_typ = infer_pat ~debug pat new_env in + let new_env = extend_helper pat_env pat new_scheme in + let* uni_sub = Substitution.unify typ pat_typ in + let* res_sub = Substitution.compose comp_sub uni_sub in + let res_env = TypeEnv.apply res_sub new_env in + return (res_env, res_sub) +;; + +let infer_rec_rest_vb sub_acc env_acc fresh typ name new_sub = + let* uni_sub = Substitution.unify (Substitution.apply new_sub fresh) typ in + let* comp_sub = Substitution.compose_all [ new_sub; uni_sub; sub_acc ] in + let env_acc = TypeEnv.apply comp_sub env_acc in + let env_rm = TypeEnv.remove env_acc name in + let new_scheme = generalize env_rm (Substitution.apply comp_sub fresh) in + let env_acc = TypeEnv.extend env_acc name new_scheme in + return (env_acc, comp_sub) +;; + +let rec get_pat_names acc = function + | Pat_var id -> id :: acc + | Pat_tuple (pat1, pat2, rest) -> + Base.List.fold_left ~f:get_pat_names ~init:acc (pat1 :: pat2 :: rest) + | Pat_construct ("Some", Some pat) -> get_pat_names acc pat + | Pat_constraint (pat, _) -> get_pat_names acc pat + | _ -> acc +;; + +let rec infer_exp ~debug exp env = + match exp with + | Exp_ident varname -> + (match TypeEnv.find varname env with + | None -> fail (Unbound_variable varname) + | Some x -> + let* typ = instantiate x in + return (Substitution.empty, typ)) + | Exp_constant const -> + (match const with + | Const_char _ -> return (Substitution.empty, Type_construct ("char", [])) + | Const_integer _ -> return (Substitution.empty, Type_construct ("int", [])) + | Const_string _ -> return (Substitution.empty, Type_construct ("string", []))) + | Exp_apply (Exp_ident op, Exp_tuple (exp1, exp2, [])) -> + (match op with + | "*" | "/" | "+" | "-" | "<" | ">" | "=" | "<>" | "<=" | ">=" | "&&" | "||" -> + let* sub1, typ1 = infer_exp ~debug exp1 env in + let* sub2, typ2 = infer_exp ~debug exp2 (TypeEnv.apply sub1 env) in + let* arg_typ, res_typ = + match TypeEnv.find op env with + | Some (Forall (_, Type_arrow (Type_arrow (arg, _), res))) -> return (arg, res) + | _ -> fail @@ Unsupported_operator op + in + let* unif_sub1 = Substitution.unify (Substitution.apply sub2 typ1) arg_typ in + let* unif_sub2 = Substitution.unify (Substitution.apply unif_sub1 typ2) arg_typ in + let* comp_sub = Substitution.compose_all [ sub1; sub2; unif_sub1; unif_sub2 ] in + return (comp_sub, res_typ) + | _ -> + let* sub1, typ1 = infer_exp ~debug (Exp_ident op) env in + let* sub2, typ2 = + infer_exp ~debug (Exp_tuple (exp1, exp2, [])) (TypeEnv.apply sub1 env) + in + let* fresh = fresh_var in + let* unif_sub = + Substitution.unify (Substitution.apply sub2 typ1) (Type_arrow (typ2, fresh)) + in + let* comp_sub = Substitution.compose_all [ unif_sub; sub2; sub1 ] in + let res_typ = Substitution.apply comp_sub fresh in + return (comp_sub, res_typ)) + | Exp_apply (exp1, exp2) -> + (match exp1 with + | Exp_ident op when op = "+" || op = "-" -> + let* sub1, typ1 = infer_exp ~debug exp2 env in + let* unif_sub = Substitution.unify typ1 (Type_construct ("int", [])) in + let* comp_sub = Substitution.compose sub1 unif_sub in + return (comp_sub, Type_construct ("int", [])) + | _ -> + let* sub1, typ1 = infer_exp ~debug exp1 env in + let* sub2, typ2 = infer_exp ~debug exp2 (TypeEnv.apply sub1 env) in + let* fresh = fresh_var in + let* unif_sub = + Substitution.unify (Substitution.apply sub2 typ1) (Type_arrow (typ2, fresh)) + in + let* comp_sub = Substitution.compose_all [ unif_sub; sub2; sub1 ] in + let res_typ = Substitution.apply comp_sub fresh in + return (comp_sub, res_typ)) + | Exp_fun ((pattern, patterns), expr) -> + let* new_env, typ1 = infer_pat ~debug pattern env in + let* sub1, typ2 = + match patterns with + | hd :: tl -> infer_exp ~debug (Exp_fun ((hd, tl), expr)) new_env + | [] -> infer_exp ~debug expr new_env + in + return (sub1, Type_arrow (Substitution.apply sub1 typ1, typ2)) + | Exp_construct (name, Some expr) -> + let* ty, sub = infer_exp ~debug (Exp_apply (Exp_ident name, expr)) env in + return (ty, sub) + | Exp_construct (name, None) -> + let* ty, sub = infer_exp ~debug (Exp_ident name) env in + return (ty, sub) + | Exp_tuple (exp1, exp2, rest) -> + let* sub1, typ1 = infer_exp ~debug exp1 env in + let new_env = TypeEnv.apply sub1 env in + let* sub2, typ2 = infer_exp ~debug exp2 new_env in + let new_env = TypeEnv.apply sub2 new_env in + let* sub3, typ3 = + RList.fold_right + ~f:(fun exp acc -> + let* sub_acc, typ_list = return acc in + let new_env = TypeEnv.apply sub_acc new_env in + let* sub, typ = infer_exp ~debug exp new_env in + let* sub_acc = Substitution.compose sub_acc sub in + return (sub_acc, typ :: typ_list)) + ~init:(return (Substitution.empty, [])) + rest + in + let* fin_sub = Substitution.compose_all [ sub1; sub2; sub3 ] in + let typ1 = Substitution.apply fin_sub typ1 in + let typ2 = Substitution.apply fin_sub typ2 in + let typ3 = List.map (fun typ -> Substitution.apply fin_sub typ) typ3 in + return (fin_sub, Type_tuple (typ1, typ2, typ3)) + | Exp_if (ifexp, thenexp, Some elseexp) -> + let* sub1, typ1 = infer_exp ~debug ifexp env in + let* uni_sub1 = Substitution.unify typ1 (Type_construct ("bool", [])) in + let* sub2, typ2 = infer_exp ~debug thenexp env in + let* sub3, typ3 = infer_exp ~debug elseexp env in + let* uni_sub2 = Substitution.unify typ2 typ3 in + let* comp_sub = Substitution.compose_all [ sub1; uni_sub1; sub2; sub3; uni_sub2 ] in + return (comp_sub, typ3) + | Exp_if (ifexp, thenexp, None) -> + let* sub1, typ1 = infer_exp ~debug ifexp env in + let* uni_sub1 = Substitution.unify typ1 (Type_construct ("bool", [])) in + let* sub2, typ2 = infer_exp ~debug thenexp env in + let* comp_sub = Substitution.compose_all [ sub1; uni_sub1; sub2 ] in + return (comp_sub, typ2) + | Exp_match (expr, (case, rest)) -> + let* subexpr, typexpr = infer_exp ~debug expr env in + let new_env = TypeEnv.apply subexpr env in + let* fresh = fresh_var in + let* res_sub, res_typ = + RList.fold_left + (case :: rest) + ~init:(return (subexpr, fresh)) + ~f:(fun acc case -> + let* sub, typ = return acc in + let pat_names = get_pat_names [] case.first in + let* pat_env, pat_typ = infer_pat ~debug case.first new_env in + let* uni_sub = Substitution.unify pat_typ typexpr in + let* comp_sub = Substitution.compose sub uni_sub in + let pat_env = + Base.List.fold_left + ~f:(fun env name -> + let (Forall (_, typ)) = TypeEnv.find_exn name env in + let env = TypeEnv.remove env name in + TypeEnv.extend env name (generalize env typ)) + ~init:(TypeEnv.apply uni_sub pat_env) + pat_names + in + let* subexpr, typexpr = + infer_exp ~debug case.second (TypeEnv.apply comp_sub pat_env) + in + let* uni_sub2 = Substitution.unify typexpr typ in + let* res_sub = Substitution.compose_all [ uni_sub2; subexpr; comp_sub ] in + return (res_sub, Substitution.apply res_sub typ)) + in + return (res_sub, res_typ) + | Exp_function (case, rest) -> + let* fresh1 = fresh_var in + let* fresh2 = fresh_var in + let* res_sub, res_typ = + RList.fold_left + (case :: rest) + ~init:(return (Substitution.empty, fresh2)) + ~f:(fun acc case -> + let* sub, typ = return acc in + let* pat_env, pat_typ = infer_pat ~debug case.first env in + let* uni_sub1 = Substitution.unify pat_typ fresh1 in + let* sub1 = Substitution.compose uni_sub1 sub in + let new_env = TypeEnv.apply sub1 pat_env in + let* subexpr, typexpr = infer_exp ~debug case.second new_env in + let* uni_sub2 = Substitution.unify typ typexpr in + let* comp_sub = Substitution.compose_all [ uni_sub2; subexpr; sub1 ] in + return (comp_sub, Substitution.apply comp_sub typ)) + in + return (res_sub, Type_arrow (Substitution.apply res_sub fresh1, res_typ)) + | Exp_let (Nonrecursive, (value_binding, rest), exp) -> + let* new_env, sub, _ = + infer_value_binding_list ~debug (value_binding :: rest) env Substitution.empty + in + let* subb, typp = infer_exp ~debug exp new_env in + let* comp_sub = Substitution.compose sub subb in + return (comp_sub, typp) + | Exp_let (Recursive, (value_binding, rest), exp) -> + let* new_env, fresh_vars = add_names_rec env (value_binding :: rest) in + let* new_env, sub, _ = + infer_rec_value_binding_list + ~debug + (value_binding :: rest) + new_env + Substitution.empty + fresh_vars + in + let* subb, typp = infer_exp ~debug exp new_env in + let* comp_sub = Substitution.compose subb sub in + return (comp_sub, typp) + | Exp_constraint (expr, typ) -> + let* sub, typ1 = infer_exp ~debug expr env in + let* uni_sub = Substitution.unify typ1 typ in + let* comp_sub = Substitution.compose sub uni_sub in + return (comp_sub, typ1) + +and infer_value_binding_list ~debug vb_list env sub = + let* res_env, res_sub, names = + RList.fold_left + vb_list + ~init:(return (env, sub, [])) + ~f:(fun acc vb -> + let* env_acc, sub_acc, names = return acc in + match vb with + | { pat = Pat_constraint (pat, pat_typ); expr = Exp_fun ((fpat, fpatrest), exp) } + -> + let* sub, typ = + infer_exp + ~debug + (Exp_fun ((fpat, fpatrest), Exp_constraint (exp, pat_typ))) + env_acc + in + let* res_env, res_sub = infer_rest_vb ~debug env_acc sub_acc sub typ pat in + let name = get_pat_names names pat in + return (res_env, res_sub, names @ name) + | { pat = Pat_constraint (pat, pat_typ); expr = Exp_function _ as exp } -> + let* sub, typ = infer_exp ~debug (Exp_constraint (exp, pat_typ)) env_acc in + let* res_env, res_sub = infer_rest_vb ~debug env_acc sub_acc sub typ pat in + let name = get_pat_names names pat in + return (res_env, res_sub, names @ name) + | { pat; expr } -> + let* sub, typ = infer_exp ~debug expr env_acc in + let* res_env, res_sub = infer_rest_vb ~debug env_acc sub_acc sub typ pat in + let name = get_pat_names names pat in + return (res_env, res_sub, names @ name)) + in + return (res_env, res_sub, names) + +and infer_rec_value_binding_list ~debug vb_list env sub fresh_vars = + let* res_env, res_sub, names = + match + RList.fold_left2 + vb_list + fresh_vars + ~init:(return (env, sub, [])) + ~f:(fun acc vb fv -> + let* env_acc, sub_acc, names = return acc in + match vb, fv with + | ( ( { pat = Pat_var name; expr = Exp_fun _ as exp } + | { pat = Pat_var name; expr = Exp_function _ as exp } ) + , fresh ) -> + let* subexpr, typexpr = infer_exp ~debug exp env_acc in + let* res_env, res_sub = + infer_rec_rest_vb sub_acc env_acc fresh typexpr name subexpr + in + return (res_env, res_sub, names @ [ name ]) + | ( { pat = Pat_constraint (Pat_var name, pat_typ) + ; expr = Exp_fun ((pat, pat_list), expr) + } + , fresh ) -> + let* subexpr, typexpr = + infer_exp + ~debug + (Exp_fun ((pat, pat_list), Exp_constraint (expr, pat_typ))) + env + in + let* res_env, res_sub = + infer_rec_rest_vb sub_acc env_acc fresh typexpr name subexpr + in + return (res_env, res_sub, names @ [ name ]) + | { pat = Pat_var name; expr }, fresh -> + let* subexpr, typexpr = infer_exp ~debug expr env_acc in + (match typexpr with + | Type_arrow (_, _) -> + let new_fresh = Substitution.apply sub_acc fresh in + if typexpr = new_fresh + then fail Wrong_rec + else + let* res_env, res_sub = + infer_rec_rest_vb sub_acc env_acc fresh typexpr name subexpr + in + return (res_env, res_sub, names @ [ name ]) + | _ -> fail Wrong_rec) + | _ -> fail Wrong_rec) + with + | Ok result -> result + | Unequal_lengths -> fail Incorrect_list_lengths + in + return (res_env, res_sub, names) +;; + +open Ast.Structure + +let rec check_poly_types ~debug typ_list marity = function + | Type_var var when Base.List.mem typ_list var ~equal:String.equal -> return () + | Type_var name -> fail (Unbound_variable name) + | Type_construct (name, args) -> + let* arity = + Base.Map.find marity name + |> Base.Option.value_map ~f:return ~default:(fail (Undeclared_type name)) + in + if arity = Base.List.length args + then check_many ~debug typ_list marity args + else fail Arity_mismatch + | Type_arrow (l, r) -> + let* () = check_poly_types ~debug typ_list marity l in + check_poly_types ~debug typ_list marity r + | Type_tuple (t1, t2, rest) -> + let* () = check_poly_types ~debug typ_list marity t1 in + let* () = check_poly_types ~debug typ_list marity t2 in + check_many ~debug typ_list marity rest + +and check_many ~debug typ_list marity args = + let rec iter = function + | [] -> return () + | arg :: rest -> + let* () = check_poly_types ~debug typ_list marity arg in + iter rest + in + iter args +;; + +let ( ! ) fresh = Type_var fresh + +let infer_structure_item ~debug env item marity names = + match item with + | Str_eval exp -> + let* _, typ = infer_exp ~debug exp env in + let new_env = TypeEnv.extend env "-" (Forall (VarSet.empty, typ)) in + return (new_env, marity, names @ [ "-" ]) + | Str_value (Nonrecursive, (value_binding, rest)) -> + let* env, _, names = + infer_value_binding_list ~debug (value_binding :: rest) env Substitution.empty + in + return (env, marity, names) + | Str_value (Recursive, (value_binding, rest)) -> + let* new_env, fresh_vars = add_names_rec env (value_binding :: rest) in + let* new_env, _, names = + infer_rec_value_binding_list + ~debug + (value_binding :: rest) + new_env + Substitution.empty + fresh_vars + in + return (new_env, marity, names) + | Str_adt (poly, name, (variant, rest)) -> + let adt_type = Type_construct (name, Base.List.map poly ~f:( ! )) in + let type_arity = List.length poly in + let arity_map = Base.Map.set marity ~key:name ~data:type_arity in + let* constrs = + RList.fold_left + (variant :: rest) + ~init:(return env) + ~f:(fun acc (constr_name, constr_types) -> + let* env_acc = return acc in + let* fresh = fresh in + let* new_env = + match constr_types with + | None -> + return + (TypeEnv.extend + env_acc + constr_name + (Forall (VarSet.singleton (Int.to_string fresh), adt_type))) + | Some typ -> + let* () = check_poly_types ~debug poly arity_map typ in + return + (TypeEnv.extend + env_acc + constr_name + (Forall (VarSet.of_list poly, Type_arrow (typ, adt_type)))) + in + return new_env) + in + return (constrs, arity_map, names) +;; + +let infer_program ~debug program env = + let marity = Base.Map.empty (module Base.String) in + let marity = Base.Map.add_exn marity ~key:"int" ~data:0 in + let marity = Base.Map.add_exn marity ~key:"char" ~data:0 in + let marity = Base.Map.add_exn marity ~key:"string" ~data:0 in + let marity = Base.Map.add_exn marity ~key:"bool" ~data:0 in + let marity = Base.Map.add_exn marity ~key:"unit" ~data:0 in + let* env, _, names = + RList.fold_left + program + ~init:(return (env, marity, [])) + ~f:(fun acc item -> + let* env_acc, arr_acc, names = return acc in + let* env, arr, name = infer_structure_item ~debug env_acc item arr_acc names in + return (env, arr, names @ name)) + in + return (env, names) +;; + +let env_with_things = + let things_list = + [ ( "+" + , Forall + ( VarSet.empty + , Type_arrow + ( Type_arrow (Type_construct ("int", []), Type_construct ("int", [])) + , Type_construct ("int", []) ) ) ) + ; ( "-" + , Forall + ( VarSet.empty + , Type_arrow + ( Type_arrow (Type_construct ("int", []), Type_construct ("int", [])) + , Type_construct ("int", []) ) ) ) + ; ( "*" + , Forall + ( VarSet.empty + , Type_arrow + ( Type_arrow (Type_construct ("int", []), Type_construct ("int", [])) + , Type_construct ("int", []) ) ) ) + ; ( "/" + , Forall + ( VarSet.empty + , Type_arrow + ( Type_arrow (Type_construct ("int", []), Type_construct ("int", [])) + , Type_construct ("int", []) ) ) ) + ; ( "<" + , Forall + ( VarSet.singleton "a" + , Type_arrow + (Type_arrow (Type_var "a", Type_var "a"), Type_construct ("bool", [])) ) ) + ; ( ">" + , Forall + ( VarSet.singleton "a" + , Type_arrow + (Type_arrow (Type_var "a", Type_var "a"), Type_construct ("bool", [])) ) ) + ; ( "<>" + , Forall + ( VarSet.singleton "a" + , Type_arrow + (Type_arrow (Type_var "a", Type_var "a"), Type_construct ("bool", [])) ) ) + ; ( "<=" + , Forall + ( VarSet.singleton "a" + , Type_arrow + (Type_arrow (Type_var "a", Type_var "a"), Type_construct ("bool", [])) ) ) + ; ( ">=" + , Forall + ( VarSet.singleton "a" + , Type_arrow + (Type_arrow (Type_var "a", Type_var "a"), Type_construct ("bool", [])) ) ) + ; ( "=" + , Forall + ( VarSet.singleton "a" + , Type_arrow + (Type_arrow (Type_var "a", Type_var "a"), Type_construct ("bool", [])) ) ) + ; ( "||" + , Forall + ( VarSet.empty + , Type_arrow + ( Type_arrow (Type_construct ("bool", []), Type_construct ("bool", [])) + , Type_construct ("bool", []) ) ) ) + ; ( "&&" + , Forall + ( VarSet.empty + , Type_arrow + ( Type_arrow (Type_construct ("bool", []), Type_construct ("bool", [])) + , Type_construct ("bool", []) ) ) ) + ; ( "print_int" + , Forall + ( VarSet.empty + , Type_arrow (Type_construct ("int", []), Type_construct ("unit", [])) ) ) + ; ( "print_endline" + , Forall + ( VarSet.empty + , Type_arrow (Type_construct ("string", []), Type_construct ("unit", [])) ) ) + ; ( "print_char" + , Forall + ( VarSet.empty + , Type_arrow (Type_construct ("char", []), Type_construct ("unit", [])) ) ) + ; ( "print_bool" + , Forall + ( VarSet.empty + , Type_arrow (Type_construct ("bool", []), Type_construct ("unit", [])) ) ) + ; ( "Some" + , Forall + ( VarSet.singleton "a" + , Type_arrow (Type_var "a", Type_construct ("option", [ Type_var "a" ])) ) ) + ; "None", Forall (VarSet.singleton "a", Type_construct ("option", [ Type_var "a" ])) + ; ( "::" + , Forall + ( VarSet.singleton "a" + , Type_arrow + ( Type_tuple (Type_var "a", Type_construct ("list", [ Type_var "a" ]), []) + , Type_construct ("list", [ Type_var "a" ]) ) ) ) + ; "[]", Forall (VarSet.singleton "a", Type_construct ("list", [ Type_var "a" ])) + ; "()", Forall (VarSet.empty, Type_construct ("unit", [])) + ; "true", Forall (VarSet.empty, Type_construct ("bool", [])) + ; "false", Forall (VarSet.empty, Type_construct ("bool", [])) + ] + in + List.fold_left + (fun env (id, sch) -> TypeEnv.extend env id sch) + TypeEnv.empty + things_list +;; + +let run_infer_program ?(debug = false) (program : Ast.program) env = + run (infer_program ~debug program env) +;; diff --git a/AML/lib/infer/infer.mli b/AML/lib/infer/infer.mli new file mode 100644 index 00000000..fb165376 --- /dev/null +++ b/AML/lib/infer/infer.mli @@ -0,0 +1,30 @@ +(** Copyright 2025-2026, Rodion Suvorov, Dmitriy Chirkov*) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open InferTypes + +module Type : sig + type t = Ast.TypeExpr.t + + val occurs_check : string -> t -> bool + val free_vars : t -> VarSet.t +end + +module Substitution : sig + type t = (string, Type.t, Base.String.comparator_witness) Base.Map.t +end + +module TypeEnv : sig + type t = (string, scheme, Base.String.comparator_witness) Base.Map.t + + val pp_env : Format.formatter -> t -> unit +end + +val env_with_things : TypeEnv.t + +val run_infer_program + : ?debug:bool + -> Ast.program + -> TypeEnv.t + -> (TypeEnv.t * string list, InferTypes.error) Result.t diff --git a/AML/lib/infer/inferTypes.ml b/AML/lib/infer/inferTypes.ml new file mode 100644 index 00000000..24d80cc0 --- /dev/null +++ b/AML/lib/infer/inferTypes.ml @@ -0,0 +1,188 @@ +(** Copyright 2025-2026, Rodion Suvorov, Dmitriy Chirkov*) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open Format +open Ast.TypeExpr +open Stdlib + +type binder = int [@@deriving show { with_path = false }] + +module VarSet = struct + include Set.Make (String) + + let pp ppf s = + Format.fprintf ppf "[ "; + iter (Format.fprintf ppf "%s; ") s; + Format.fprintf ppf "]" + ;; +end + +type binder_set = VarSet.t [@@deriving show { with_path = false }] +type scheme = Forall of binder_set * t [@@deriving show { with_path = false }] + +open Base + +(* get polymorphic type names from VarSet *) +let binder_to_list args = + let args = VarSet.elements args in + List.sort (List.map args ~f:Int.of_string) ~compare:Int.compare +;; + +(** turn ['2, '5, '1231, ...] (value is not important, only order) list of + names of polymorphic types into ['a, 'b, 'c ... ] + when english alphabet is out, turn values into ['aa, 'bb, ...] and etc.*) +let minimize dargs = + let counter = 0 in + let coef = 0 in + let m = Map.empty (module Base.String) in + List.fold_left dargs ~init:(m, coef, counter) ~f:(fun (m, coef, counter) el -> + let str = + let rec build coef counter str = + if coef = 0 + then str ^ Char.escaped (Stdlib.Char.chr (counter + 97)) + else build (coef - 1) counter (str ^ Char.escaped (Stdlib.Char.chr (counter + 97))) + in + build coef counter "" + in + let counter = counter + 1 in + let coef = coef + (counter / 26) in + let counter = counter % 26 in + let el = Stdlib.string_of_int el in + Base.Map.set m ~key:el ~data:str, coef, counter) +;; + +let rec pprint_type_tuple ?(poly_names_map = Map.empty (module String)) fmt = function + | [] -> () + | [ h ] -> + (match h with + | Type_arrow (_, _) -> fprintf fmt "(%a)" (pprint_type ~poly_names_map) h + | _ -> fprintf fmt "%a" (pprint_type ~poly_names_map) h) + | h :: tl -> + (match h with + | Type_arrow (_, _) -> + fprintf + fmt + "(%a) * %a" + (pprint_type ~poly_names_map) + h + (pprint_type_tuple ~poly_names_map) + tl + | _ -> + fprintf + fmt + "%a * %a" + (pprint_type ~poly_names_map) + h + (pprint_type_tuple ~poly_names_map) + tl) + +and pprint_type ?(poly_names_map = Map.empty (module String)) fmt = function + | Type_var num -> + (match Map.find poly_names_map num with + | Some k -> fprintf fmt "'%s" k + | None -> fprintf fmt "'%s" num) + | Type_arrow (ty1, ty2) -> + (match ty1, ty2 with + | Type_arrow (_, _), _ -> + fprintf + fmt + "(%a) -> %a" + (pprint_type ~poly_names_map) + ty1 + (pprint_type ~poly_names_map) + ty2 + | _ -> + fprintf + fmt + "%a -> %a" + (pprint_type ~poly_names_map) + ty1 + (pprint_type ~poly_names_map) + ty2) + | Type_tuple (t1, t2, ty_lst) -> + fprintf fmt "%a" (pprint_type_tuple ~poly_names_map) (t1 :: t2 :: ty_lst) + | Type_construct (name, []) -> fprintf fmt "%s" name + | Type_construct (name, ty_list) -> + fprintf fmt "%a %s" (pprint_type_list_with_parens ~poly_names_map) ty_list name + +and pprint_type_list_with_parens ?(poly_names_map = Map.empty (module String)) fmt ty_list + = + let rec print_types fmt = function + | [] -> () + | [ ty ] -> (pprint_type_with_parens_if_tuple ~poly_names_map) fmt ty + | ty :: rest -> + fprintf + fmt + "%a %a" + (pprint_type_with_parens_if_tuple ~poly_names_map) + ty + print_types + rest + in + print_types fmt ty_list + +and pprint_type_with_parens_if_tuple ?(poly_names_map = Map.empty (module String)) fmt ty = + match ty with + | Type_tuple _ -> fprintf fmt "(%a)" (pprint_type ~poly_names_map) ty + | _ -> (pprint_type ~poly_names_map) fmt ty +;; + +(*errors*) +type error = + | Occurs_check of string * Ast.TypeExpr.t + (** same polymotphic type occured while substitution apply ['a : 'a -> 'b]*) + | Unification_failed of Ast.TypeExpr.t * Ast.TypeExpr.t + | Unbound_variable of string + | Arity_mismatch + (** mismatch of types arity + [type 'a foo = Foo + type bar = Bar of foo] *) + | Undeclared_type of string + | Wrong_rec (** invalid right value in recursive let declaration *) + | Unsupported_operator of string (** for binary operators*) + | Incorrect_list_lengths + +let collect_type_vars typ = + let rec aux acc = function + | Type_var num -> num :: acc + | Type_arrow (t1, t2) -> aux (aux acc t1) t2 + | Type_tuple (t1, t2, tl) -> List.fold_left ~f:aux ~init:(aux (aux acc t1) t2) tl + | Type_construct (_, ty_list) -> List.fold_left ~f:aux ~init:acc ty_list + in + aux [] typ +;; + +let collect_vars_from_error = function + | Occurs_check (str, typ) -> str :: collect_type_vars typ + | Unification_failed (t1, t2) -> collect_type_vars t1 @ collect_type_vars t2 + | _ -> [] +;; + +let pp_inf_err fmt err = + let type_vars = collect_vars_from_error err in + let var_map, _, _ = minimize (List.map type_vars ~f:Stdlib.int_of_string) in + match err with + | Occurs_check (str, t) -> + fprintf + fmt + "Occurs_check: %a and %a\n" + (pprint_type ~poly_names_map:var_map) + (Type_var str) + (pprint_type ~poly_names_map:var_map) + t + | Unification_failed (typ1, typ2) -> + fprintf + fmt + "Unification_failed: %a # %a" + (pprint_type ~poly_names_map:var_map) + typ1 + (pprint_type ~poly_names_map:var_map) + typ2 + | Unbound_variable str -> fprintf fmt "Unbound_variable: %S" str + | Arity_mismatch -> fprintf fmt "Arity_mismatch" + | Undeclared_type str -> fprintf fmt "Undeclared_type: %S" str + | Wrong_rec -> fprintf fmt "Wrong right value in rec" + | Unsupported_operator op -> fprintf fmt "Operator %s is not supported" op + | Incorrect_list_lengths -> fprintf fmt "Lists have unequal lengths" +;; diff --git a/AML/lib/infer/inferTypes.mli b/AML/lib/infer/inferTypes.mli new file mode 100644 index 00000000..f66aced4 --- /dev/null +++ b/AML/lib/infer/inferTypes.mli @@ -0,0 +1,42 @@ +(** Copyright 2025-2026, Rodion Suvorov, Dmitriy Chirkov*) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open Format +open Ast.TypeExpr +open Stdlib + +type binder = int [@@deriving show] + +module VarSet : sig + include Set.S with type elt = string + + val pp : formatter -> t -> unit +end + +type binder_set = VarSet.t [@@deriving show] +type scheme = Forall of binder_set * t [@@deriving show] + +val binder_to_list : binder_set -> int list + +val minimize + : int list + -> (string, string, Base.String.comparator_witness) Base.Map.t * int * int + +val pprint_type + : ?poly_names_map:(string, string, Base.String.comparator_witness) Base.Map.t + -> formatter + -> t + -> unit + +type error = + | Occurs_check of string * t + | Unification_failed of t * t + | Unbound_variable of string + | Arity_mismatch + | Undeclared_type of string + | Wrong_rec + | Unsupported_operator of string + | Incorrect_list_lengths + +val pp_inf_err : formatter -> error -> unit diff --git a/AML/lib/infer/test/ITest.ml b/AML/lib/infer/test/ITest.ml new file mode 100644 index 00000000..fbf14dee --- /dev/null +++ b/AML/lib/infer/test/ITest.ml @@ -0,0 +1,999 @@ +(** Copyright 2025-2026, Rodion Suvorov, Dmitriy Chirkov*) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open Parser +open Format +open Inferencer.InferTypes +open Inferencer.Infer +open Base + +let filter names = + let rev_names = List.rev names in + List.fold_left rev_names ~init:[] ~f:(fun acc name -> + match List.find acc ~f:(fun a -> String.equal a name) with + | None -> acc @ [ name ] + | Some _ -> acc) +;; + +let pprint_result env (names : string list) = + let trash = [] in + List.iter + (List.rev @@ filter names) + ~f:(fun key -> + match Map.find env key with + | None -> printf "" + | Some typ -> + (match List.find trash ~f:(fun a -> String.equal a key) with + | Some _ -> printf "" + | None -> + (match typ with + | Forall (args, typ) -> + let m, _, _ = minimize (binder_to_list args) in + (match key with + | x when Stdlib.Char.code x.[0] >= 65 && Stdlib.Char.code x.[0] <= 90 -> + printf "" + | "-" -> printf "%s : %a\n" key (pprint_type ~poly_names_map:m) typ + | _ -> printf "val %s : %a\n" key (pprint_type ~poly_names_map:m) typ)))) +;; + +let parse_and_infer_result program = + match parse_str program with + | str -> + (match run_infer_program str env_with_things with + | Ok (env, names) -> pprint_result env names + (* | Ok env -> printf "%a\n" TypeEnv.pp_env env *) + | Error err -> printf "%a" pp_inf_err err) +;; + +(* + | ____ U ___ u _ _ U ___ u _____ _____ __ __ ____ U _____ u + | | _"\ \/"_ \/ | \ |"| \/"_ \/|_ " _| |_ " _| \ \ / /U| _"\ u\| ___"|/ + |/| | | | | | | | <| \| |> | | | | | | | | \ V / \| |_) |/ | _|' + |U| |_| |\.-,_| |_| | U| |\ |u.-,_| |_| | /| |\ /| |\ U_|"|_u | __/ | |___ + | |____/ u \_)-\___/ |_| \_| \_)-\___/ u |_|U u |_|U |_| |_| |_____| + | |||_ \\ || \\,-. \\ _// \\_ _// \\_.-,//|(_ ||>>_ << >> + | (__)_) (__) (_") (_/ (__) (__) (__) (__) (__)\_) (__)(__)__) (__) (__) +*) + +(*PASSED*) +let%expect_test "001" = + parse_and_infer_result + {| +let recfac n = if n<=1 then 1 else n * fac (n-1);;|}; + [%expect {| Unbound_variable: "fac" |}] +;; + +(*PASSED*) +let%expect_test "002" = + parse_and_infer_result + {| +let main = if true then 1 else false;;|}; + [%expect {| Unification_failed: int # bool |}] +;; + +(*PASSED*) +let%expect_test "003 " = + parse_and_infer_result + {| +let fix f = (fun x -> f (fun f -> x x f)) (fun x -> f (fun f -> x x f));;|}; + [%expect {| Occurs_check: 'c and 'c -> 'b |}] +;; + +(*PASSED*) +let%expect_test "004 " = + parse_and_infer_result + {| +let _1 = + (fun f -> (f 1, f true)) (fun x -> x);;|}; + [%expect {| Unification_failed: int # bool |}] +;; + +(*PASSED*) +let%expect_test "005 " = + parse_and_infer_result + {| +let _2 = function + | Some f -> let _ = f "42" in f 42 + | None -> 1;;|}; + [%expect {| Unification_failed: string # int |}] +;; + +(*PASSED*) +let%expect_test "015" = + parse_and_infer_result {|let rec (a,b) = (a,b);;|}; + [%expect {| Wrong right value in rec |}] +;; + +(*PASSED*) +let%expect_test "016" = + parse_and_infer_result {|let a, _ = 1, 2, 3;;|}; + [%expect {| Unification_failed: int * int * int # 'b * 'a |}] +;; + +(*PASSED*) +let%expect_test "091.1" = + parse_and_infer_result {|let [a] = (fun x -> x);;|}; + [%expect {| Unification_failed: 'b -> 'b # 'c list |}] +;; + +(*PASSED*) +let%expect_test "097.2" = + parse_and_infer_result {|let () = (fun x -> x);;|}; + [%expect {| Unification_failed: 'b -> 'b # unit |}] +;; + +(*PASSED*) +let%expect_test "098" = + parse_and_infer_result {|let rec x = x + 1;;|}; + [%expect {| Wrong right value in rec |}] +;; + +(*PASSED*) +let%expect_test "098" = + parse_and_infer_result {|let rec x::[] = [1];;|}; + [%expect {| Wrong right value in rec |}] +;; + +(* + | _____ __ __ ____ U _____ u ____ + | |_ " _| \ \ / /U| _"\ u\| ___"|/| _"\ + | | | \ V / \| |_) |/ | _|" /| | | | + | /| |\ U_|"|_u | __/ | |___ U| |_| |\ + | u |_|U |_| |_| |_____| |____/ u + | _// \\_.-,//|(_ ||>>_ << >> |||_ + |(__) (__)\_) (__)(__)__) (__) (__)(__)_) +*) + +(*PASSED*) +let%expect_test "001fact without builtin" = + parse_and_infer_result + {| +let rec fac n = if n<=1 then 1 else n * fac (n-1);;|}; + [%expect + {| + val fac : int -> int |}] +;; + +(*passed*) +let%expect_test "001fact with builtin" = + parse_and_infer_result + {| +let rec fac n = if n<=1 then 1 else n * fac (n-1);; + let main = + let () = print_int (fac 4) in + 0;;|}; + [%expect + {| + val fac : int -> int + val main : int |}] +;; + +(*PASSED*) +let%expect_test "002fact without builtin" = + parse_and_infer_result + {| +let rec fac_cps n k = + if n=1 then k 1 else + fac_cps (n-1) (fun p -> k (p*n));;|}; + [%expect + {| + val fac_cps : int -> (int -> 'a) -> 'a |}] +;; + +(*passed*) +let%expect_test "002fact builtin" = + parse_and_infer_result + {| +let rec fac_cps n k = + if n=1 then k 1 else + fac_cps (n-1) (fun p -> k (p*n));; let main = + let () = print_int (fac_cps 4 (fun print_int -> print_int)) in + 0;;|}; + [%expect + {| + val fac_cps : int -> (int -> 'a) -> 'a + val main : int |}] +;; + +(*PASSED*) +let%expect_test "003fib without builtin" = + parse_and_infer_result + {| +let rec fib_acc a b n = + if n=1 then b + else + let n1 = n-1 in + let ab = a+b in + fib_acc b ab n1;; + let rec fib n = + if n<2 + then n + else fib (n - 1) + fib (n - 2);;|}; + [%expect + {| + val fib_acc : int -> int -> int -> int + val fib : int -> int |}] +;; + +(*passed*) +let%expect_test "003fib builtin" = + parse_and_infer_result + {| +let rec fib_acc a b n = + if n=1 then b + else + let n1 = n-1 in + let ab = a+b in + fib_acc b ab n1;; + let rec fib n = + if n<2 + then n + else fib (n - 1) + fib (n - 2);; + let main = + let () = print_int (fib_acc 0 1 4) in + let () = print_int (fib 4) in + 0;;|}; + [%expect + {| + val fib_acc : int -> int -> int -> int + val fib : int -> int + val main : int |}] +;; + +(*PASSED*) +let%expect_test "004" = + parse_and_infer_result + {| +let wrap f = if 1 = 1 then f else f;; + +let test3 a b c = + let a = print_int a in + let b = print_int b in + let c = print_int c in + 0;; + +let test10 a b c d e f g h i j = a + b + c + d + e + f + g + h + i + j;; + +let main = + let rez = + (wrap test10 1 10 100 1000 10000 100000 1000000 10000000 100000000 + 1000000000) + in + let () = print_int rez in + let temp2 = wrap test3 1 10 100 in + 0;;|}; + [%expect + {| + val wrap : 'a -> 'a + val test3 : int -> int -> int -> int + val test10 : int -> int -> int -> int -> int -> int -> int -> int -> int -> int -> int + val main : int |}] +;; + +(*PASSED*) +let%expect_test "005" = + parse_and_infer_result + {| +let rec fix f x = f (fix f) x;; + +let fac self n = if n<=1 then 1 else n * self (n-1);;|}; + [%expect + {| + val fix : (('a -> 'b) -> 'a -> 'b) -> 'a -> 'b + val fac : (int -> int) -> int -> int |}] +;; + +(*PASSED*) +let%expect_test "005" = + parse_and_infer_result + {| +let rec fix f x = f (fix f) x;; + +let fac self n = if n<=1 then 1 else n * self (n-1);; + +let main = + let () = print_int (fix fac 6) in + 0;;|}; + [%expect + {| + val fix : (('a -> 'b) -> 'a -> 'b) -> 'a -> 'b + val fac : (int -> int) -> int -> int + val main : int |}] +;; + +(*PASSED*) +let%expect_test "006" = + parse_and_infer_result + {|let foo b = if b then (fun foo -> foo+2) else (fun foo -> foo*10);; + let foo x = foo true (foo false (foo true (foo false x)));; + let main = + let () = print_int (foo 11) in + 0;;|}; + [%expect + {| + val foo : int -> int + val main : int |}] +;; + +(*PASSED*) +let%expect_test "006.2" = + parse_and_infer_result + {|let foo a b c = + let () = print_int a in + let () = print_int b in + let () = print_int c in + a + b * c;; + +let main = + let foo = foo 1 in + let foo = foo 2 in + let foo = foo 3 in + let () = print_int foo in + 0;;|}; + [%expect + {| + val foo : int -> int -> int -> int + val main : int |}] +;; + +(*PASSED*) +let%expect_test "006.3" = + parse_and_infer_result + {|let foo a = + let () = print_int a in fun b -> + let () = print_int b in fun c -> + print_int c;; + +let main = + let () = foo 4 8 9 in + 0;;|}; + [%expect + {| + val foo : int -> int -> int -> unit + val main : int |}] +;; + +(*PASSED*) +let%expect_test "007" = + parse_and_infer_result + {|let _start () () a () b _c () d __ = + let () = print_int (a+b) in + let () = print_int __ in + a*b / _c + d;; + +let main = + print_int (_start (print_int 1) (print_int 2) 3 (print_int 4) 100 1000 (print_int (-1)) 10000 (-555555));;|}; + [%expect + {| + val _start : unit -> unit -> int -> unit -> int -> int -> unit -> int -> int -> int + val main : unit |}] +;; + +(*PASSED*) +let%expect_test "008" = + parse_and_infer_result + {| +let addi = fun f g x -> (f x (g x: bool) : int);; +let main = + let () = print_int (addi (fun x b -> if b then x+1 else x*2) (fun _start -> _start/2 = 0) 4) in + 0;;|}; + [%expect + {| + val addi : ('a -> bool -> int) -> ('a -> bool) -> 'a -> int + val main : int |}] +;; + +(*PASSED*) +let%expect_test "009" = + parse_and_infer_result + {| +let temp = + let f = fun x -> x in + (f 1, f true);;|}; + [%expect + {| + val temp : int * bool |}] +;; + +(*PASSED*) +let%expect_test "010" = + parse_and_infer_result + {| + + let _1 = fun x y (a, _) -> (x + y - a) = 1 + +let _2 = + let x, Some f = 1, Some ( "p1onerka was here" ) + in x + +let _3 = Some (1, "hi") + +let _4 = let rec f x = f 5 in f + +let _5 = + let id x = x in + match Some id with + | Some f -> let _ = f "42" in f 42 + | None -> 0 + +let _6 = fun arg -> match arg with Some x -> let y = x in y + +let int_of_option = function Some x -> x | None -> 0 + +let _42 = function 42 -> true | _ -> false + +let id1, id2 = let id x = x in (id, id) + |}; + [%expect + {| + val _1 : int -> int -> int * 'a -> bool + val _2 : int + val _3 : (int * string) option + val _4 : int -> 'a + val _5 : int + val _6 : 'a option -> 'a + val int_of_option : int option -> int + val _42 : int -> bool + val id2 : 'b -> 'b + val id1 : 'a -> 'a |}] +;; + +(*PASSED*) +let%expect_test "015" = + parse_and_infer_result + {| +let rec fix f x = f (fix f) x +let map f p = let (a,b) = p in (f a, f b) +let fixpoly l = + fix (fun self l -> map (fun li x -> li (self l) x) l) l +let feven p n = + let (e, o) = p in + if n = 0 then 1 else o (n - 1) +let fodd p n = + let (e, o) = p in + if n = 0 then 0 else e (n - 1) + let tie = fixpoly (feven, fodd) + let rec meven n = if n = 0 then 1 else modd (n - 1) +and modd n = if n = 0 then 1 else meven (n - 1) +let main = + let () = print_int (modd 1) in + let () = print_int (meven 2) in + let (even,odd) = tie in + let () = print_int (odd 3) in + let () = print_int (even 4) in + 0 +|}; + [%expect + {| + val fix : (('a -> 'b) -> 'a -> 'b) -> 'a -> 'b + val map : ('a -> 'b) -> 'a * 'a -> 'b * 'b + val fixpoly : (('a -> 'b) * ('a -> 'b) -> 'a -> 'b) * (('a -> 'b) * ('a -> 'b) -> 'a -> 'b) -> ('a -> 'b) * ('a -> 'b) + val feven : 'a * (int -> int) -> int -> int + val fodd : (int -> int) * 'a -> int -> int + val tie : (int -> int) * (int -> int) + val meven : int -> int + val modd : int -> int + val main : int |}] +;; + +(*PASSED*) +let%expect_test "016" = + parse_and_infer_result + {| +let rec length xs = + match xs with + | [] -> 0 + | h::tl -> 1 + length tl + +let length_tail = + let rec helper acc xs = + match xs with + | [] -> acc + | h::tl -> helper (acc + 1) tl + in + helper 0 + +let rec map f xs = + match xs with + | [] -> [] + | a::[] -> [f a] + | a::b::[] -> [f a; f b] + | a::b::c::[] -> [f a; f b; f c] + | a::b::c::d::tl -> f a :: f b :: f c :: f d :: map f tl + +let rec append xs ys = match xs with [] -> ys | x::xs -> x::(append xs ys) + +let concat = + let rec helper xs = + match xs with + | [] -> [] + | h::tl -> append h (helper tl) + in helper + +let rec iter f xs = match xs with [] -> () | h::tl -> let () = f h in iter f tl + +let rec cartesian xs ys = + match xs with + | [] -> [] + | h::tl -> append (map (fun a -> (h,a)) ys) (cartesian tl ys) + +let main = + let () = iter print_int [1;2;3] in + let () = print_int (length (cartesian [1;2] [1;2;3;4])) in + 0 + +|}; + [%expect + {| + val length : 'a list -> int + val length_tail : 'a list -> int + val map : ('a -> 'b) -> 'a list -> 'b list + val append : 'a list -> 'a list -> 'a list + val concat : 'a list list -> 'a list + val iter : ('a -> unit) -> 'a list -> unit + val cartesian : 'a list -> 'b list -> ('a * 'b) list + val main : int |}] +;; + +let%expect_test "zero" = + parse_and_infer_result {|5+5;;|}; + [%expect {| - : int |}] +;; + +let%expect_test "zero" = + parse_and_infer_result {|5+5;;|}; + [%expect {| - : int |}] +;; + +let%expect_test "zero" = + parse_and_infer_result {|5/5;;|}; + [%expect {| - : int |}] +;; + +let%expect_test "zero" = + parse_and_infer_result {|5-5;;|}; + [%expect {| - : int |}] +;; + +let%expect_test "zero" = + parse_and_infer_result {|5*5;;|}; + [%expect {| - : int |}] +;; + +let%expect_test "zero" = + parse_and_infer_result {|5>=5;;|}; + [%expect {| - : bool |}] +;; + +let%expect_test "zero" = + parse_and_infer_result {|5<=5;;|}; + [%expect {| - : bool |}] +;; + +let%expect_test "zero" = + parse_and_infer_result {|5>5;;|}; + [%expect {| - : bool |}] +;; + +let%expect_test "zero" = + parse_and_infer_result {|5<5;;|}; + [%expect {| - : bool |}] +;; + +let%expect_test "zero" = + parse_and_infer_result {|true || false;;|}; + [%expect {| - : bool |}] +;; + +let%expect_test "zero" = + parse_and_infer_result {|false && false;;|}; + [%expect {| - : bool |}] +;; + +let%expect_test "zero" = + parse_and_infer_result + {|let id x = x in +let homka = Some id in +match homka with +| Some f -> f 42, f "42";;|}; + [%expect {| - : int * string |}] +;; + +let%expect_test "zero" = + parse_and_infer_result {|function 5 -> 'c';;|}; + [%expect {| - : int -> char |}] +;; + +let%expect_test "zero" = + parse_and_infer_result {|function 5 -> 'c' | 22 -> 'k';;|}; + [%expect {| - : int -> char |}] +;; + +let%expect_test "zero" = + parse_and_infer_result {|function 5 -> 'c' | 22 -> 23;;|}; + [%expect {| Unification_failed: char # int |}] +;; + +let%expect_test "zero" = + parse_and_infer_result {|function 5 -> 'c' | 'c' -> 23;;|}; + [%expect {| Unification_failed: int # char |}] +;; + +let%expect_test "zero" = + parse_and_infer_result + {|let id x = x in +let homkaOBOLTUS = id in +match homkaOBOLTUS with +| f -> f 42, f "42";;|}; + [%expect {| - : int * string |}] +;; + +let%expect_test "zero" = + parse_and_infer_result {|if 5=5 then 1 else 5;;|}; + [%expect {| - : int |}] +;; + +let%expect_test "zero" = + parse_and_infer_result {|if 5=5 then "aboba";;|}; + [%expect {| - : string |}] +;; + +let%expect_test "zero" = + parse_and_infer_result {|if 5 then "aboba";;|}; + [%expect {| Unification_failed: int # bool |}] +;; + +let%expect_test "zero" = + parse_and_infer_result {|if true then "andreichik" else 7;;|}; + [%expect {| Unification_failed: string # int |}] +;; + +let%expect_test "zero" = + parse_and_infer_result {|(5,6,7);;|}; + [%expect {| - : int * int * int |}] +;; + +let%expect_test "zero" = + parse_and_infer_result {|(5,7);;|}; + [%expect {| - : int * int |}] +;; + +let%expect_test "zero" = + parse_and_infer_result {|('c',7,false,"andreichik");;|}; + [%expect {| - : char * int * bool * string |}] +;; + +let%expect_test "zero" = + parse_and_infer_result + {|function +5 -> 'c' +| 67 -> 'b' +| 68 -> 'h' +| 69 -> 's' +| 89 -> 'a';;|}; + [%expect {| - : int -> char |}] +;; + +let%expect_test "zero" = + parse_and_infer_result + {|match 9 with +|5 -> 5 +|6 -> 5 +|7 -> 7 +|7 -> 1 +|7 -> 1 +|7 -> 1 +| _ -> 3 +;;|}; + [%expect {| - : int |}] +;; + +let%expect_test "zero" = + parse_and_infer_result {|fun x -> fun y -> y+x;;|}; + [%expect {| - : int -> int -> int |}] +;; + +let%expect_test "zero" = + parse_and_infer_result {|fun x -> fun y -> fun z -> fun w -> y + x * z / w;;|}; + [%expect {| - : int -> int -> int -> int -> int |}] +;; + +let%expect_test "zero" = + parse_and_infer_result + {|let x = 1 +let y = 2 +let z = 3|}; + [%expect + {| + val x : int + val y : int + val z : int |}] +;; + +let%expect_test "zero" = + parse_and_infer_result {|let y = 5|}; + [%expect + {| + val y : int |}] +;; + +let%expect_test "zero" = + parse_and_infer_result {|let _ = (2,5) and y = ("a","b")|}; + [%expect + {| + val y : string * string |}] +;; + +let%expect_test "zero" = + parse_and_infer_result {|let x = (2,5) and y = ("a","b")|}; + [%expect + {| + val y : string * string + val x : int * int |}] +;; + +let%expect_test "zero" = + parse_and_infer_result + {|let (x,y) = (2,5) and z = ("a","b") + let f = x|}; + [%expect + {| + val z : string * string + val y : int + val x : int + val f : int |}] +;; + +let%expect_test "zero" = + parse_and_infer_result {|let (x,y) = (2,'c');;|}; + [%expect + {| + val y : char + val x : int |}] +;; + +let%expect_test "zero" = + parse_and_infer_result {|let x = 5=5;;|}; + [%expect + {| + val x : bool |}] +;; + +let%expect_test "zero" = + parse_and_infer_result {|let rec g () = g ();;|}; + [%expect + {| + val g : unit -> 'a |}] +;; + +let%expect_test "zero" = + parse_and_infer_result {|let x = 6 and y = 6 in x + y;;|}; + [%expect {| - : int |}] +;; + +let%expect_test "zero" = + parse_and_infer_result {|let rec f x = f x;;|}; + [%expect + {| + val f : 'a -> 'b |}] +;; + +let%expect_test "zero" = + parse_and_infer_result {|let f = fun x -> x;;|}; + [%expect + {| + val f : 'a -> 'a |}] +;; + +let%expect_test "zero" = + parse_and_infer_result + {| +let x = 5;; +let 5 = x;;|}; + [%expect + {| + val x : int |}] +;; + +let%expect_test "zero" = + parse_and_infer_result + {| +let x = (6: char);;|}; + [%expect {| Unification_failed: int # char |}] +;; + +let%expect_test "zero" = + parse_and_infer_result + {| +let x = ()|}; + [%expect + {| + val x : unit |}] +;; + +let%expect_test "zero" = + parse_and_infer_result + {| +let square x = x * x;; +let id = fun x -> x in (id square) (id 123);;|}; + [%expect + {| + val square : int -> int + - : int |}] +;; + +let%expect_test "zero" = + parse_and_infer_result + {| +let rec meven n = if n = 0 then 1 else modd (n - 1) + and modd n = if n = 0 then 1 else meven (n - 1) +;;|}; + [%expect + {| + val meven : int -> int + val modd : int -> int |}] +;; + +let%expect_test "zero" = + parse_and_infer_result {| let f (x: int) = x + x;;|}; + [%expect + {| + val f : int -> int |}] +;; + +let%expect_test "zero" = + parse_and_infer_result {| let f (x: int)(y: int) = x + y;;|}; + [%expect + {| + val f : int -> int -> int |}] +;; + +let%expect_test "zero" = + parse_and_infer_result {| let f (x: int) = x || x;;|}; + [%expect {| Unification_failed: int # bool |}] +;; + +let%expect_test "zero" = + parse_and_infer_result {| let f (x: int) = function 5 -> true | 6 -> false;;|}; + [%expect + {| + val f : int -> int -> bool |}] +;; + +let%expect_test "zero" = + parse_and_infer_result {| let (f: int -> bool) = function 5 -> true | 6 -> false;;|}; + [%expect + {| + val f : int -> bool |}] +;; + +let%expect_test "Simplest ADT" = + parse_and_infer_result + {| + type shape = Circle + let x = Circle +|}; + [%expect + {| + val x : shape |}] +;; + +let%expect_test "ADT of" = + parse_and_infer_result + {| + type shape = Circle of int ;; + type 'a koka = Circle of int ;; + let x = Circle 5 +|}; + [%expect + {| + val x : 'a koka |}] +;; + +let%expect_test "ADT of few" = + parse_and_infer_result + {| + type shape = + Circle of int +| Rectangle of char +| Triangle of int*int +;; +let x = 10;; +let Circle (5,5) = Circle x;; +|}; + [%expect {| Unification_failed: int # int * int |}] +;; + +let%expect_test "ADT with poly" = + parse_and_infer_result + {| + type 'a shape = Circle of int + | Rectangle of int * int + | Square of int +;; +let x = 10 +let y = Circle x +let (z: int shape) = Rectangle (2,5) +let q = Square 34985734895 +|}; + [%expect + {| + val x : int + val y : 'a shape + val z : int shape + val q : 'a shape |}] +;; + +let%expect_test "ADT with poly constraint" = + parse_and_infer_result + {| + type 'a shape = Circle of int + | Rectangle of char * int + | Square of int * 'a * 'a +;; +let (x: shape) = Circle 5;; +|}; + [%expect {| Unification_failed: 'a shape # shape |}] +;; + +let%expect_test "ADT with constraint" = + parse_and_infer_result + {| + type 'a shape = Circle of int + | Rectangle of char * int + | Square of int * 'a * 'a +;; +let (x: (int,int) shape) = Circle 5;; +|}; + [%expect {| Unification_failed: 'a shape # int int shape |}] +;; + +let%expect_test "ADT with constraint exp" = + parse_and_infer_result + {| + type 'a shape = Circle of int + | Rectangle of char * int + | Square of int * 'a * 'a +;; +let y = Circle 5;; +let (x: char shape) = y;; +|}; + [%expect + {| + val y : 'a shape + val x : char shape |}] +;; + +let%expect_test "ADT arity" = + parse_and_infer_result + {| +type 'a foo = Foo +type bar = Bar of foo + +|}; + [%expect + {| + Arity_mismatch |}] +;; + +let%expect_test "ADT arity" = + parse_and_infer_result + {| +type 'a foo = Foo +type 'a bar = Bar of 'a foo +let x = Bar Foo +|}; + [%expect + {| + val x : 'a bar |}] +;; + +let%expect_test "alot" = + parse_and_infer_result + {| +let f q w e r t y u i o p a s d g h j k l z x c v b n m qq ww ee rr tt yy uu ii oo pp aa ss dd ff gg hh jj kk ll zz xx cc vv = 5;;|}; + [%expect + {| + val f : 'a -> 'b -> 'c -> 'd -> 'e -> 'f -> 'g -> 'h -> 'i -> 'j -> 'k -> 'l -> 'm -> 'n -> 'o -> 'p -> 'q -> 'r -> 's -> 't -> 'u -> 'v -> 'w -> 'x -> 'y -> 'z -> 'aa -> 'bb -> 'cc -> 'dd -> 'ee -> 'ff -> 'gg -> 'hh -> 'ii -> 'jj -> 'kk -> 'll -> 'mm -> 'nn -> 'oo -> 'pp -> 'qq -> 'rr -> 'ss -> 'tt -> 'uu -> 'vv -> int |}] +;; diff --git a/AML/lib/infer/test/ITest.mli b/AML/lib/infer/test/ITest.mli new file mode 100644 index 00000000..33c7039e --- /dev/null +++ b/AML/lib/infer/test/ITest.mli @@ -0,0 +1,7 @@ +(** Copyright 2025-2026, Rodion Suvorov, Dmitriy Chirkov*) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open Inferencer.Infer + +val pprint_result : TypeEnv.t -> string list -> unit diff --git a/AML/lib/infer/test/dune b/AML/lib/infer/test/dune new file mode 100644 index 00000000..ddd1f3ae --- /dev/null +++ b/AML/lib/infer/test/dune @@ -0,0 +1,8 @@ +(library + (name InferTest) + (libraries base stdio Ast Parser Pprinter Inferencer) + (preprocess + (pps ppx_expect)) + (inline_tests) + (instrumentation + (backend bisect_ppx))) diff --git a/AML/lib/parser/dune b/AML/lib/parser/dune new file mode 100644 index 00000000..4f845a3a --- /dev/null +++ b/AML/lib/parser/dune @@ -0,0 +1,6 @@ +(library + (name Parser) + (public_name AML.Parser) + (libraries base angstrom Ast) + (instrumentation + (backend bisect_ppx))) diff --git a/AML/lib/parser/parser.ml b/AML/lib/parser/parser.ml new file mode 100644 index 00000000..e6142d6a --- /dev/null +++ b/AML/lib/parser/parser.ml @@ -0,0 +1,623 @@ +(** Copyright 2025-2026, Rodion Suvorov, Dmitriy Chirkov*) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open Ast +open Angstrom +open Base +open Char + +(* + | _ _ _ __ __ _ _ ____ __ __ + |U /"\ uU |"|u| | \ \/"/ ___ |"| ___ U /"\ uU | _'\ u \ \ / / + | \/ _ \/ \| |\| | /\ /\ |_"_| U | | u |_"_| \/ _ \/ \| |_) |/ \ V / + | / ___ \ | |_| |U / \ u | | \| |/__ | | / ___ \ | _ < U_|"|_u + |/_/ \_\ <<\___/ /_/\_\ U/| |\u |_____| U/| |\u /_/ \_\ |_| \_\ |_| + | \\ >>(__) )( ,-,>> \\_.-,_|___|_,-.// \\.-,_|___|_,-.\\ >> // \\_.-,//|(_ + |(__) (__) (__) \_) (__)\_)-' '-(_/(_")("_)\_)-' '-(_/(__) (__)(__) (__)\_) (__) +*) + +let is_whitespace = function + | ' ' | '\t' | '\n' | '\r' -> true + | _ -> false +;; + +let pass_ws = skip_while is_whitespace + +(** Parser that matches string literals an 's' skipping all whitespaces before *) +let pass_ws1 = skip is_whitespace *> pass_ws + +let token s = pass_ws *> string s +let pparenth stmt = token "(" *> stmt <* token ")" + +let ptowhitespace = function + | 'a' .. 'z' | 'A' .. 'Z' | '0' .. '9' | '_' -> true + | _ -> false +;; + +let pident_cap = + let first_char_str = + satisfy (function + | 'A' .. 'Z' -> true + | _ -> false) + in + lift2 (fun fc rs -> String.make 1 fc ^ rs) first_char_str (take_while ptowhitespace) + >>= fun ident -> + if Ast.is_not_keyword ident + then return ident + else fail "Found a keyword instead of an identifier" +;; + +let pident_lc = + let first_char_str = + satisfy (function + | 'a' .. 'z' | '_' -> true + | _ -> false) + in + lift2 (fun fc rs -> String.make 1 fc ^ rs) first_char_str (take_while ptowhitespace) + >>= fun ident -> + if Ast.is_not_keyword ident + then return ident + else fail "Found a keyword instead of an identifier" +;; + +(* + | ____ U ___ u _ _ ____ _____ _ _ _ _____ + |U /"___| \/"_ \/ | \ |"| / __"| u |_ " _| U /"\ u | \ |"| |_ " _| + |\| | u | | | |<| \| |><\___ \/ | | \/ _ \/ <| \| |> | | + | | |/__.-,_| |_| |U| |\ |u u___) | /| |\ / ___ \ U| |\ |u /| |\ + | \____|\_)-\___/ |_| \_| |____/>> u |_|U /_/ \_\ |_| \_| u |_|U + | _// \\ \\ || \\,-.)( (__)_// \\_ \\ >> || \\,-._// \\_ + |(__)(__) (__) (_") (_/(__) (__) (__)(__) (__)(_") (_/(__) (__) + + |U _____ u __ __ ____ ____ U _____ u ____ ____ U ___ u _ _ + |\| ___'|/ \ \/"/ U| _"\ uU | _"\ u \| ___"|// __"| u/ __"| u ___ \/"_ \/ | \ |"| + | | _|" /\ /\ \| |_) |/ \| |_) |/ | _|" <\___ \/<\___ \/ |_"_| | | | |<| \| |> + | | |___ U / \ u | __/ | _ < | |___ u___) | u___) | | | .-,_| |_| |U| |\ |u + | |_____| /_/\_\ |_| |_| \_\ |_____| |____/>>|____/>> U/| |\u\_)-\___/ |_| \_| + | << >>,-,>> \\_ ||>>_ // \\_ << >> )( (__))( (__).-,_|___|_,-. \\ || \\,-. + |(__) (__)\_) (__)(__)__) (__) (__)(__) (__)(__) (__) \_)-' '-(_/ (__) (_") (_/ +*) +let pconstint = + let* number = Int.of_string <$> take_while1 is_digit in + return (Constant.Const_integer number) +;; + +let pconstchar = + let* c = token "'" *> any_char <* token "'" in + return (Constant.Const_char c) +;; + +let pconststring = + token "\"" + *> lift + (fun str -> Constant.Const_string str) + (take_while (function + | '"' -> false + | _ -> true)) + <* token "\"" +;; + +let pconst = pconstchar <|> pconstint <|> pconststring + +let lchain p op = + let rec loop acc = + (let* f = op in + let* y = p in + loop (f acc y)) + <|> return acc + in + let* x = p in + loop x +;; + +let rchain p op = + let rec loop acc = + (let* f = op in + let* y = p in + let new_acc = f acc y in + loop new_acc) + <|> return acc + in + let* x = p in + loop x +;; + +(* + | _____ __ __ ____ U _____ u + | |_ " _| \ \ / /U| _"\ u\| ___'|/ + | | | \ V / \| |_) |/ | _|" + | /| |\ U_|"|_u | __/ | |___ + | u |_|U |_| |_| |_____| + | _// \\_.-,//|(_ ||>>_ << >> + |(__) (__)\_) (__)(__)__) (__) (__) + + |U _____ u __ __ ____ ____ U _____ u ____ ____ U ___ u _ _ + |\| ___'|/ \ \/"/ U| _"\ uU | _"\ u \| ___"|// __"| u/ __"| u ___ \/"_ \/ | \ |"| + | | _|" /\ /\ \| |_) |/ \| |_) |/ | _|" <\___ \/<\___ \/ |_"_| | | | |<| \| |> + | | |___ U / \ u | __/ | _ < | |___ u___) | u___) | | | .-,_| |_| |U| |\ |u + | |_____| /_/\_\ |_| |_| \_\ |_____| |____/>>|____/>> U/| |\u\_)-\___/ |_| \_| + | << >>,-,>> \\_ ||>>_ // \\_ << >> )( (__))( (__).-,_|___|_,-. \\ || \\,-. + |(__) (__)\_) (__)(__)__) (__) (__)(__) (__)(__) (__) \_)-' '-(_/ (__) (_") (_/ +*) + +let ptypearrow = pass_ws *> token "->" >>| fun _ lhs rhs -> TypeExpr.Type_arrow (lhs, rhs) + +let pmultiargsapp pty = + let* args = pparenth @@ sep_by1 (pass_ws *> char ',') pty in + let* id = pass_ws *> pident_lc in + return (TypeExpr.Type_construct (id, args)) +;; + +let ptypevar = + let* id = token "'" *> (pident_lc <|> pident_cap) in + return (TypeExpr.Type_var id) +;; + +let ptypetuple ptype = + let* el1 = ptype in + let* el2 = token "*" *> ptype in + let* rest = many (token "*" *> ptype) in + return (TypeExpr.Type_tuple (el1, el2, rest)) +;; + +let ptypeconstr = + pass_ws + *> fix (fun ptconstr -> + let* tparams = + pass_ws + *> option + [] + (pparenth (sep_by (token ",") ptypevar) + <|> (let* typevar = ptypevar in + return [ typevar ]) + <|> (let* ctuple = pparenth (ptypetuple ptconstr) in + return [ ctuple ]) + <|> + let* ttuple = pparenth (ptypetuple ptypevar) in + return [ ttuple ]) + in + let* tname = + option + None + (let* name = pass_ws *> (pident_lc <|> pident_cap) in + return (Some name)) + in + match tname, tparams with + | Some "", [] | None, [] | None, [ TypeExpr.Type_var _ ] -> + fail "Type constructor cannot have a single type parameter without a name" + | Some name, _ -> return (TypeExpr.Type_construct (name, tparams)) + | None, _ -> + (match tparams with + | x :: _ -> return x + | _ -> fail "Not enough elementts")) +;; + +let ptypeconstr_app = + let* base = ptypeconstr in + let* extra_args = sep_by (token " ") ptypeconstr in + match extra_args with + | [] -> return base + | _ -> + (match base with + | TypeExpr.Type_construct (name, args) -> + return (TypeExpr.Type_construct (name, args @ extra_args)) + | _ -> fail "Expected a type constructor, but found an incompatible expression") +;; + +let ptype = + pass_ws + *> fix (fun ptype -> + let ptvar = + pass_ws + *> choice + [ (pident_lc >>| fun id -> TypeExpr.Type_construct (id, [])) + ; ptypevar + ; pmultiargsapp ptype + ; pparenth ptype + ; ptypeconstr + ] + in + let pttuple = ptypetuple ptvar <|> ptvar in + let ptarr = rchain pttuple ptypearrow <|> pttuple in + let* arg = ptarr in + let rec pcons acc = + option + acc + (pass_ws1 *> pident_lc >>= fun id -> pcons (TypeExpr.Type_construct (id, [ acc ]))) + in + pcons arg) +;; + +let ptype_adt = pass_ws *> ptypeconstr_app <|> ptypevar + +(* + | ____ _ _____ _____ U _____ u ____ _ _ + |U| _"\ uU /"\ u |_ " _| |_ " _| \| ___"|/U | _"\ u | \ |'| + |\| |_) |/ \/ _ \/ | | | | | _|" \| |_) |/<| \| |> + | | __/ / ___ \ /| |\ /| |\ | |___ | _ < U| |\ |u + | |_| /_/ \_\ u |_|U u |_|U |_____| |_| \_\ |_| \_| + | ||>>_ \\ >> _// \\_ _// \\_ << >> // \\_ || \\,-. + |(__)__) (__) (__)(__) (__)(__) (__)(__) (__) (__) (__)(_") (_/ +*) + +let ppatlist ppat = + let* list = token "[" *> sep_by (token ";") ppat <* token "]" in + return + (Stdlib.List.fold_right + (fun x y -> + Ast.Pattern.Pat_construct ("::", Some (Ast.Pattern.Pat_tuple (x, y, [])))) + list + (Ast.Pattern.Pat_construct ("[]", None))) +;; + +let ppatcons ppat = + let rec consparser () = + let* pat = ppat in + token "::" + >>= (fun c -> + consparser () + >>= fun rest -> + return (Ast.Pattern.Pat_construct (c, Some (Ast.Pattern.Pat_tuple (pat, rest, []))))) + <|> return pat + in + consparser () +;; + +let pspecials = choice [ token "()"; token "true"; token "false"; token "None" ] + +let ppatconst = + let* const = pconst in + return (Pattern.Pat_constant const) +;; + +let ptuplepat ppattern = + let* el1 = ppattern in + let* el2 = token "," *> ppattern in + let* rest = many (token "," *> ppattern) in + return (Pattern.Pat_tuple (el1, el2, rest)) +;; + +let ppatvar = + let* id = pident_lc in + match id with + | "_" -> return Pattern.Pat_any + | _ -> return (Pattern.Pat_var id) +;; + +let ppatconstruct (ppattern : Pattern.t Angstrom.t) = + let* name = pident_cap in + let* arg = option None (ppattern >>| Option.some) in + return (Pattern.Pat_construct (name, arg)) +;; + +let ppatconstraint ppattern = + let* pat = token "(" *> ppattern in + let* pattype = token ":" *> pass_ws *> ptype <* token ")" in + return (Pattern.Pat_constraint (pat, pattype)) +;; + +let ppattern = + fix (fun ppattern -> + let poprnd = + fix (fun poprnd -> + pass_ws + *> choice + [ (pspecials >>| fun name -> Pattern.Pat_construct (name, None)) + ; ppatvar + ; ppatconst + ; ppatconstruct poprnd + ; pparenth ppattern + ; ppatconstraint ppattern + ]) + in + let plist = ppatlist poprnd <|> poprnd in + let pcons = ppatcons plist <|> plist in + ptuplepat pcons <|> pcons) +;; + +(* + |U _____ u __ __ ____ ____ U _____ u ____ ____ U ___ u _ _ + |\| ___'|/ \ \/"/ U| _"\ uU | _"\ u \| ___"|// __"| u/ __"| u ___ \/"_ \/ | \ |"| + | | _|" /\ /\ \| |_) |/ \| |_) |/ | _|" <\___ \/<\___ \/ |_"_| | | | |<| \| |> + | | |___ U / \ u | __/ | _ < | |___ u___) | u___) | | | .-,_| |_| |U| |\ |u + | |_____| /_/\_\ |_| |_| \_\ |_____| |____/>>|____/>> U/| |\u\_)-\___/ |_| \_| + | << >>,-,>> \\_ ||>>_ // \\_ << >> )( (__))( (__).-,_|___|_,-. \\ || \\,-. + |(__) (__)\_) (__)(__)__) (__) (__)(__) (__)(__) (__) \_)-' '-(_/ (__) (_") (_/ +*) + +let pexpcons expr = + let rec consparser () = + let* exp = expr in + token "::" + >>= (fun _ -> + consparser () + >>= fun rest -> + return + (Ast.Expression.Exp_construct ("::", Some (Ast.Expression.Exp_tuple (exp, rest, []))))) + <|> return exp + in + consparser () +;; + +let pexplist expr = + let* list = token "[" *> sep_by (token ";") expr <* token "]" in + return + (Base.List.fold_right + list + ~f:(fun x y -> + Ast.Expression.Exp_construct ("::", Some (Ast.Expression.Exp_tuple (x, y, [])))) + ~init:(Ast.Expression.Exp_construct ("[]", None))) +;; + +let pexprconst = + let* const = pconst in + return (Expression.Exp_constant const) +;; + +let pidentexpr = + pident_lc + >>= fun ident -> + if is_not_keyword ident + then return (Expression.Exp_ident ident) + else fail "Found a keyword instead of an identifier" +;; + +let pcase pexpr = + pass_ws + *> option () (token "|" *> return ()) + *> + let* first = pass_ws *> ppattern in + let* second = token "->" *> pass_ws *> pexpr in + return { Expression.first; second } +;; + +let pfunction pexpr = + token "function" + *> + let* first_case = pcase pexpr in + let* case_list = sep_by (token "|") (pcase pexpr) in + return (Ast.Expression.Exp_function (first_case, case_list)) +;; + +let pmatch pexpr = + let* exp = token "match" *> pexpr <* token "with" in + let* casefs = pcase pexpr in + let* case_list = sep_by (token "|") (pcase pexpr) in + return (Ast.Expression.Exp_match (exp, (casefs, case_list))) +;; + +let pletbinding pexpr = + let psimple = + let* pat = ppattern in + let* expr = token "=" *> pexpr in + return { Expression.pat; expr } + in + let pfun = + let* pat = pass_ws *> ppatvar in + let* parameterfs = ppattern in + let* parametertl = many ppattern in + let* exprw = token "=" *> pexpr in + let expr = Expression.Exp_fun ((parameterfs, parametertl), exprw) in + return { Expression.pat; expr } + in + choice [ psimple; pfun ] +;; + +let plethelper pexpr = + let precflag = + token "rec" *> pass_ws1 *> return Expression.Recursive + <|> return Expression.Nonrecursive + in + let* recflag = token "let" *> precflag in + let* bindingfs = pletbinding pexpr in + let* bindingtl = many (token "and" *> pletbinding pexpr) in + return (recflag, bindingfs, bindingtl) +;; + +let pletexpr pexpr = + let* recflag, bindingfs, bindingtl = plethelper pexpr in + let* expr = token "in" *> pass_ws *> pexpr in + return (Expression.Exp_let (recflag, (bindingfs, bindingtl), expr)) +;; + +let ptupleexpr pexpr = + let* el1 = pexpr in + let* el2 = token "," *> pexpr in + let* rest = many (token "," *> pexpr) in + return (Expression.Exp_tuple (el1, el2, rest)) +;; + +let pifexpr pexpr = + let* condition = token "if" *> pass_ws1 *> pexpr in + let* thenexpr = token "then" *> pass_ws1 *> pexpr in + let* elseexpr = + option None (pass_ws1 *> token "else" >>| Option.some) + >>= function + | None -> return None + | Some _ -> pexpr >>| Option.some + in + return (Expression.Exp_if (condition, thenexpr, elseexpr)) +;; + +let pfunexpr pexpr = + lift3 + (fun first_pattern rest_patterns body_expr -> + Expression.Exp_fun ((first_pattern, rest_patterns), body_expr)) + (token "fun" *> ppattern) + (many ppattern) + (token "->" *> pexpr) +;; + +let rec parseprefop pexpr pop = + (let* f = pop in + let* expr = parseprefop pexpr pop in + return @@ f expr) + <|> pexpr +;; + +let parsebinop binoptoken = + token binoptoken + *> return (fun e1 e2 -> + Expression.Exp_apply (Exp_ident binoptoken, Exp_tuple (e1, e2, []))) +;; + +let padd = parsebinop "+" +let psub = parsebinop "-" +let pdiv = parsebinop "/" +let pmul = parsebinop "*" + +let pcompops = + choice + [ parsebinop ">=" + ; parsebinop "<=" + ; parsebinop "<>" + ; parsebinop "<" + ; parsebinop ">" + ; parsebinop "=" + ] +;; + +let plogops = choice [ parsebinop "&&"; parsebinop "||" ] + +let pexprconstraint pexpr = + let* expr = token "(" *> pexpr in + let* exprtype = token ":" *> ptype <* token ")" in + return (Expression.Exp_constraint (expr, exprtype)) +;; + +let papplyexpr = + pass_ws + >>| fun _ lhs rhs -> + match lhs with + | Expression.Exp_construct (id, None) -> Expression.Exp_construct (id, Some rhs) + | _ -> Exp_apply (lhs, rhs) +;; + +let pexpr = + fix (fun pexpr -> + let poprnd = + pass_ws + *> choice + [ (pspecials >>| fun name -> Expression.Exp_construct (name, None)) + ; pparenth pexpr + ; pidentexpr + ; pexprconstraint pexpr + ; (pident_cap >>| fun id -> Expression.Exp_construct (id, None)) + ; pexprconst + ; pfunction pexpr + ; pfunexpr pexpr + ; pexplist pexpr + ; pletexpr pexpr + ; pifexpr pexpr + ; pmatch pexpr + ] + in + let pconstructor_apply = + let* constr = + pparenth (pident_cap >>| fun id -> Expression.Exp_construct (id, None)) + in + let* arg = poprnd in + return (Expression.Exp_apply (constr, arg)) + in + let papply = lchain (pconstructor_apply <|> poprnd) papplyexpr in + let prefop = + parseprefop + papply + (choice [ token "+"; token "-" ] + >>| fun id expr -> Expression.Exp_apply (Exp_ident id, expr)) + <|> papply + in + let pmuldiv = lchain prefop (pmul <|> pdiv) in + let paddsub = lchain pmuldiv (padd <|> psub) in + let pcompare = lchain paddsub pcompops in + let pexpcons = pexpcons pcompare <|> pcompare in + let plogop = rchain pexpcons plogops in + let ptuple = ptupleexpr plogop <|> plogop in + choice + [ pfunction pexpr; pfunexpr pexpr; pletexpr pexpr; pifexpr pexpr; pmatch pexpr ] + <|> ptuple) +;; + +(* + | ____ _____ ____ _ _ ____ _____ _ _ ____ U _____ u + | / __"| u |_ " _|U | _"\ uU |"|u| |U /"___| |_ " _|U |"|u| |U | _"\ u \| ___'|/ + |<\___ \/ | | \| |_) |/ \| |\| |\| | u | | \| |\| | \| |_) |/ | _|" + | u___) | /| |\ | _ < | |_| | | |/__ /| |\ | |_| | | _ < | |___ + | |____/>> u |_|U |_| \_\ <<\___/ \____| u |_|U <<\___/ |_| \_\ |_____| + | )( (__)_// \\_ // \\_(__) )( _// \\ _// \\_(__) )( // \\_ << >> + | (__) (__) (__)(__) (__) (__) (__)(__)(__) (__) (__) (__) (__)(__) (__) + + | _____ U _____ u __ __ ____ + | ___ |_ " _| \| ___"|/U|' \/ '|u / __"| u + | |_"_| | | | _|" \| |\/| |/<\___ \/ + | | | /| |\ | |___ | | | | u___) | + | U/| |\u u |_|U |_____| |_| |_| |____/>> + |.-,_|___|_,-._// \\_ << >> <<,-,,-. )( (__) + | \_)-' '-(_/(__) (__)(__) (__) (./ \.) (__) +*) + +let pseval = lift (fun expr -> Structure.Str_eval expr) pexpr + +let pstrlet = + let* recflag, bindingfs, bindingtl = plethelper pexpr in + return (Structure.Str_value (recflag, (bindingfs, bindingtl))) +;; + +let pstradt = + let* _ = token "type" in + let* type_param = + option + [] + (pparenth (sep_by (token ",") (token "'" *> pident_lc)) + <|> many (token "'" *> pident_lc)) + in + let* type_name = pass_ws *> pident_lc in + let var = + let* name = option None (pass_ws *> pident_cap >>= fun n -> return (Some n)) in + match name with + | Some x -> + (* Constructor case: Can have "of" *) + let* ctype = + option + None + (token "of" + *> let* types = sep_by (token "*") ptype_adt in + match types with + | x :: y :: xs -> return (Some (TypeExpr.Type_tuple (x, y, xs))) + | [ x ] -> return (Some x) + | [] -> fail "Expected type after 'of'") + in + return (x, ctype) + | None -> + (* Lowercase type alias case: Must have a type expression *) + let* ctype = + let* types = sep_by (token "*") ptype_adt in + match types with + | x :: y :: xs -> return (Some (TypeExpr.Type_tuple (x, y, xs))) (* Tuple case *) + | [ x ] -> return (Some x) (* Single type *) + | [] -> fail "Expected type definition" + in + return ("", ctype) + in + let* _ = token "=" in + let* fvar = + option + None + (option None (token "|" *> return None) *> (var >>= fun v -> return (Some v))) + in + let* varl = many (token "|" *> var) in + match fvar with + | Some fvar -> return (Structure.Str_adt (type_param, type_name, (fvar, varl))) + | None -> fail "Expected at least one variant" +;; + +let pstr_item = pseval <|> pstrlet <|> pstradt + +let pstructure = + let psemicolon = many (token ";;") in + sep_by psemicolon pstr_item <* psemicolon <* pass_ws +;; + +let parse str = parse_string ~consume:All pstructure str +let parse_str str = parse str |> Result.ok_or_failwith diff --git a/AML/lib/parser/parser.mli b/AML/lib/parser/parser.mli new file mode 100644 index 00000000..214f6413 --- /dev/null +++ b/AML/lib/parser/parser.mli @@ -0,0 +1,8 @@ +(** Copyright 2025-2026, Rodion Suvorov, Dmitriy Chirkov*) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open Ast + +val parse : string -> (Structure.structure_item list, string) result +val parse_str : string -> Structure.structure_item list diff --git a/AML/lib/parser/test/PTest.ml b/AML/lib/parser/test/PTest.ml new file mode 100644 index 00000000..94fa9ebf --- /dev/null +++ b/AML/lib/parser/test/PTest.ml @@ -0,0 +1,4470 @@ +(** Copyright 2025-2026, Rodion Suvorov, Dmitriy Chirkov*) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open Parser +open Ast + +(* open Angstrom *) +let test_program str = print_endline (show_program (parse_str str)) + +let%expect_test "negative int constant" = + test_program {|-1;;|}; + [%expect + {| [(Str_eval (Exp_apply ((Exp_ident "-"), (Exp_constant (Const_integer 1)))))] |}] +;; + +(*good*) +let%expect_test "positive int constant" = + test_program {|+1;;|}; + [%expect + {| [(Str_eval (Exp_apply ((Exp_ident "+"), (Exp_constant (Const_integer 1)))))] |}] +;; + +(*good*) +let%expect_test " nt constant" = + test_program {|1;;|}; + [%expect {| [(Str_eval (Exp_constant (Const_integer 1)))] |}] +;; + +(*good*) +let%expect_test "whitespace befor int constant" = + test_program {| 1;;|}; + [%expect {| [(Str_eval (Exp_constant (Const_integer 1)))] |}] +;; + +(*good*) +let%expect_test "negative zero" = + test_program {|-0;;|}; + [%expect + {| [(Str_eval (Exp_apply ((Exp_ident "-"), (Exp_constant (Const_integer 0)))))] |}] +;; + +(*good*) +let%expect_test "positive zero" = + test_program {|+0;;|}; + [%expect + {| [(Str_eval (Exp_apply ((Exp_ident "+"), (Exp_constant (Const_integer 0)))))] |}] +;; + +(*good*) +let%expect_test "char" = + test_program {|''';;|}; + [%expect {| [(Str_eval (Exp_constant (Const_char '\'')))] |}] +;; + +(*good*) +let%expect_test "zero" = + test_program {|0;;|}; + [%expect {| [(Str_eval (Exp_constant (Const_integer 0)))] |}] +;; + +(*good*) +let%expect_test "substraction" = + test_program {|5-11;;|}; + [%expect + {| + [(Str_eval + (Exp_apply ((Exp_ident "-"), + (Exp_tuple + ((Exp_constant (Const_integer 5)), + (Exp_constant (Const_integer 11)), [])) + ))) + ] |}] +;; + +(*good*) +let%expect_test "strange move" = + test_program {|5=5;;|}; + [%expect + {| + [(Str_eval + (Exp_apply ((Exp_ident "="), + (Exp_tuple + ((Exp_constant (Const_integer 5)), + (Exp_constant (Const_integer 5)), [])) + ))) + ] |}] +;; + +(*good*) +let%expect_test "(assignment)" = + test_program {|x = 52;;|}; + [%expect + {| + [(Str_eval + (Exp_apply ((Exp_ident "="), + (Exp_tuple ((Exp_ident "x"), (Exp_constant (Const_integer 52)), []))))) + ] |}] +;; + +(*good*) +let%expect_test "multiplication" = + test_program {|5*5;;|}; + [%expect + {| + [(Str_eval + (Exp_apply ((Exp_ident "*"), + (Exp_tuple + ((Exp_constant (Const_integer 5)), + (Exp_constant (Const_integer 5)), [])) + ))) + ] |}] +;; + +(*good*) +let%expect_test "operators with different priorities" = + test_program {|5-5*1;;|}; + [%expect + {| + [(Str_eval + (Exp_apply ((Exp_ident "-"), + (Exp_tuple + ((Exp_constant (Const_integer 5)), + (Exp_apply ((Exp_ident "*"), + (Exp_tuple + ((Exp_constant (Const_integer 5)), + (Exp_constant (Const_integer 1)), [])) + )), + [])) + ))) + ] |}] +;; + +(*good*) +let%expect_test "operators with different priorities" = + test_program {|5*5-1;;|}; + [%expect + {| + [(Str_eval + (Exp_apply ((Exp_ident "-"), + (Exp_tuple + ((Exp_apply ((Exp_ident "*"), + (Exp_tuple + ((Exp_constant (Const_integer 5)), + (Exp_constant (Const_integer 5)), [])) + )), + (Exp_constant (Const_integer 1)), [])) + ))) + ] |}] +;; + +(*good*) + +let%expect_test "parenthesis with operators with different priorities" = + test_program {|5*(5-1);;|}; + [%expect + {| + [(Str_eval + (Exp_apply ((Exp_ident "*"), + (Exp_tuple + ((Exp_constant (Const_integer 5)), + (Exp_apply ((Exp_ident "-"), + (Exp_tuple + ((Exp_constant (Const_integer 5)), + (Exp_constant (Const_integer 1)), [])) + )), + [])) + ))) + ] |}] +;; + +let%expect_test "parenthesis3" = + test_program {|(5);;|}; + [%expect {| [(Str_eval (Exp_constant (Const_integer 5)))] |}] +;; + +let%expect_test "parenthesis1" = + test_program {|(5*(5-1));;|}; + [%expect + {| + [(Str_eval + (Exp_apply ((Exp_ident "*"), + (Exp_tuple + ((Exp_constant (Const_integer 5)), + (Exp_apply ((Exp_ident "-"), + (Exp_tuple + ((Exp_constant (Const_integer 5)), + (Exp_constant (Const_integer 1)), [])) + )), + [])) + ))) + ] |}] +;; + +let%expect_test "parenthesis2" = + test_program {|105 * 64 / 27 - 2 * (5*(5-1)) + 47 / 64 - (56 * (57 *4) - 5);;|}; + [%expect + {| + [(Str_eval + (Exp_apply ((Exp_ident "-"), + (Exp_tuple + ((Exp_apply ((Exp_ident "+"), + (Exp_tuple + ((Exp_apply ((Exp_ident "-"), + (Exp_tuple + ((Exp_apply ((Exp_ident "/"), + (Exp_tuple + ((Exp_apply ((Exp_ident "*"), + (Exp_tuple + ((Exp_constant (Const_integer 105)), + (Exp_constant (Const_integer 64)), + [])) + )), + (Exp_constant (Const_integer 27)), [])) + )), + (Exp_apply ((Exp_ident "*"), + (Exp_tuple + ((Exp_constant (Const_integer 2)), + (Exp_apply ((Exp_ident "*"), + (Exp_tuple + ((Exp_constant (Const_integer 5)), + (Exp_apply ((Exp_ident "-"), + (Exp_tuple + ((Exp_constant (Const_integer 5)), + (Exp_constant (Const_integer 1)), + [])) + )), + [])) + )), + [])) + )), + [])) + )), + (Exp_apply ((Exp_ident "/"), + (Exp_tuple + ((Exp_constant (Const_integer 47)), + (Exp_constant (Const_integer 64)), [])) + )), + [])) + )), + (Exp_apply ((Exp_ident "-"), + (Exp_tuple + ((Exp_apply ((Exp_ident "*"), + (Exp_tuple + ((Exp_constant (Const_integer 56)), + (Exp_apply ((Exp_ident "*"), + (Exp_tuple + ((Exp_constant (Const_integer 57)), + (Exp_constant (Const_integer 4)), [])) + )), + [])) + )), + (Exp_constant (Const_integer 5)), [])) + )), + [])) + ))) + ] |}] +;; + +let%expect_test "parenthesis3" = + test_program {|1 + (2 + 3);;|}; + [%expect + {| + [(Str_eval + (Exp_apply ((Exp_ident "+"), + (Exp_tuple + ((Exp_constant (Const_integer 1)), + (Exp_apply ((Exp_ident "+"), + (Exp_tuple + ((Exp_constant (Const_integer 2)), + (Exp_constant (Const_integer 3)), [])) + )), + [])) + ))) + ] |}] +;; + +let%expect_test "logical ops + parenthesis" = + test_program + {| + ((3 * (9 - 12 / 4) < 7 && 1) || 1 && 5 < 6) || 20 - 100 / (4 + 16) && 10 < 12 ;; +|}; + [%expect + {| + [(Str_eval + (Exp_apply ((Exp_ident "&&"), + (Exp_tuple + ((Exp_apply ((Exp_ident "||"), + (Exp_tuple + ((Exp_apply ((Exp_ident "&&"), + (Exp_tuple + ((Exp_apply ((Exp_ident "||"), + (Exp_tuple + ((Exp_apply ((Exp_ident "&&"), + (Exp_tuple + ((Exp_apply ((Exp_ident "<"), + (Exp_tuple + ((Exp_apply ((Exp_ident "*"), + (Exp_tuple + ((Exp_constant + (Const_integer 3)), + (Exp_apply ( + (Exp_ident "-"), + (Exp_tuple + ((Exp_constant + (Const_integer + 9)), + (Exp_apply ( + (Exp_ident "/"), + (Exp_tuple + ((Exp_constant + (Const_integer + 12)), + (Exp_constant + (Const_integer + 4)), + [])) + )), + [])) + )), + [])) + )), + (Exp_constant (Const_integer 7)), + [])) + )), + (Exp_constant (Const_integer 1)), + [])) + )), + (Exp_constant (Const_integer 1)), [])) + )), + (Exp_apply ((Exp_ident "<"), + (Exp_tuple + ((Exp_constant (Const_integer 5)), + (Exp_constant (Const_integer 6)), [])) + )), + [])) + )), + (Exp_apply ((Exp_ident "-"), + (Exp_tuple + ((Exp_constant (Const_integer 20)), + (Exp_apply ((Exp_ident "/"), + (Exp_tuple + ((Exp_constant (Const_integer 100)), + (Exp_apply ((Exp_ident "+"), + (Exp_tuple + ((Exp_constant (Const_integer 4)), + (Exp_constant (Const_integer 16)), + [])) + )), + [])) + )), + [])) + )), + [])) + )), + (Exp_apply ((Exp_ident "<"), + (Exp_tuple + ((Exp_constant (Const_integer 10)), + (Exp_constant (Const_integer 12)), [])) + )), + [])) + ))) + ] |}] +;; + +let%expect_test "parenthesis4" = + test_program {|((5-1)*5);;|}; + [%expect + {| + [(Str_eval + (Exp_apply ((Exp_ident "*"), + (Exp_tuple + ((Exp_apply ((Exp_ident "-"), + (Exp_tuple + ((Exp_constant (Const_integer 5)), + (Exp_constant (Const_integer 1)), [])) + )), + (Exp_constant (Const_integer 5)), [])) + ))) + ] |}] +;; + +let%expect_test "whitespace befor int constant" = + test_program + {| let x = 10 in +if x > 5 then print_endline "> 5" +else print_endline "<= 5";; + 5+5;;|}; + [%expect + {| + [(Str_eval + (Exp_let (Nonrecursive, + ({ pat = (Pat_var "x"); expr = (Exp_constant (Const_integer 10)) }, []), + (Exp_if ( + (Exp_apply ((Exp_ident ">"), + (Exp_tuple + ((Exp_ident "x"), (Exp_constant (Const_integer 5)), [])) + )), + (Exp_apply ((Exp_ident "print_endline"), + (Exp_constant (Const_string "> 5")))), + (Some (Exp_apply ((Exp_ident "print_endline"), + (Exp_constant (Const_string "<= 5"))))) + )) + ))); + (Str_eval + (Exp_apply ((Exp_ident "+"), + (Exp_tuple + ((Exp_constant (Const_integer 5)), + (Exp_constant (Const_integer 5)), [])) + ))) + ] |}] +;; + +let%expect_test "parenthesis5" = + test_program {|(5*5-1);;|}; + [%expect + {| + [(Str_eval + (Exp_apply ((Exp_ident "-"), + (Exp_tuple + ((Exp_apply ((Exp_ident "*"), + (Exp_tuple + ((Exp_constant (Const_integer 5)), + (Exp_constant (Const_integer 5)), [])) + )), + (Exp_constant (Const_integer 1)), [])) + ))) + ] |}] +;; + +let%expect_test "parenthesis5" = + test_program {|(1-5*5);;|}; + [%expect + {| + [(Str_eval + (Exp_apply ((Exp_ident "-"), + (Exp_tuple + ((Exp_constant (Const_integer 1)), + (Exp_apply ((Exp_ident "*"), + (Exp_tuple + ((Exp_constant (Const_integer 5)), + (Exp_constant (Const_integer 5)), [])) + )), + [])) + ))) + ] |}] +;; + +(* +(+(1, 2), 3) *) + +let%expect_test "parenthesis2" = + test_program {|( 5-1 );;|}; + [%expect + {| + [(Str_eval + (Exp_apply ((Exp_ident "-"), + (Exp_tuple + ((Exp_constant (Const_integer 5)), + (Exp_constant (Const_integer 1)), [])) + ))) + ] |}] +;; + +(* good fr *) +let%expect_test "tuple" = + test_program {|(5,1,2,5);;|}; + [%expect + {| + [(Str_eval + (Exp_tuple + ((Exp_constant (Const_integer 5)), (Exp_constant (Const_integer 1)), + [(Exp_constant (Const_integer 2)); (Exp_constant (Const_integer 5))]))) + ] |}] +;; + +(* good fr *) +let%expect_test "int + a" = + test_program {|5+'a';;|}; + [%expect + {| + [(Str_eval + (Exp_apply ((Exp_ident "+"), + (Exp_tuple + ((Exp_constant (Const_integer 5)), (Exp_constant (Const_char 'a')), + [])) + ))) + ] |}] +;; + +let%expect_test "let assignment" = + test_program {|let x = 5 in 6;;|}; + [%expect + {| + [(Str_eval + (Exp_let (Nonrecursive, + ({ pat = (Pat_var "x"); expr = (Exp_constant (Const_integer 5)) }, []), + (Exp_constant (Const_integer 6))))) + ] |}] +;; + +let%expect_test "let assignment" = + test_program {|let reca = 1;;|}; + [%expect + {| + [(Str_value (Nonrecursive, + ({ pat = (Pat_var "reca"); expr = (Exp_constant (Const_integer 1)) }, []) + )) + ] |}] +;; + +let%expect_test "let assignment" = + test_program {|let Some None = Some 1;;|}; + [%expect + {| + [(Str_value (Nonrecursive, + ({ pat = (Pat_construct ("Some", (Some (Pat_construct ("None", None))))); + expr = + (Exp_construct ("Some", (Some (Exp_constant (Const_integer 1))))) }, + []) + )) + ] |}] +;; + +let%expect_test "let assignment none" = + test_program {|let Some Some Some Some Some None = 1;;|}; + [%expect + {| + [(Str_value (Nonrecursive, + ({ pat = + (Pat_construct ("Some", + (Some (Pat_construct ("Some", + (Some (Pat_construct ("Some", + (Some (Pat_construct ("Some", + (Some (Pat_construct ("Some", + (Some (Pat_construct ("None", + None))) + ))) + ))) + ))) + ))) + )); + expr = (Exp_constant (Const_integer 1)) }, + []) + )) + ] |}] +;; + +let%expect_test "let assignment none" = + test_program {|let Some Some Some Some Some None = 1;;|}; + [%expect + {| + [(Str_value (Nonrecursive, + ({ pat = + (Pat_construct ("Some", + (Some (Pat_construct ("Some", + (Some (Pat_construct ("Some", + (Some (Pat_construct ("Some", + (Some (Pat_construct ("Some", + (Some (Pat_construct ("None", + None))) + ))) + ))) + ))) + ))) + )); + expr = (Exp_constant (Const_integer 1)) }, + []) + )) + ] |}] +;; + +let%expect_test "let assignment with recursion" = + test_program {|let rec x = 5 in 6;;|}; + [%expect + {| + [(Str_eval + (Exp_let (Recursive, + ({ pat = (Pat_var "x"); expr = (Exp_constant (Const_integer 5)) }, []), + (Exp_constant (Const_integer 6))))) + ] |}] +;; + +let%expect_test "let assignment with recursion" = + test_program {|let rec x = 5 in 7;;|}; + [%expect + {| + [(Str_eval + (Exp_let (Recursive, + ({ pat = (Pat_var "x"); expr = (Exp_constant (Const_integer 5)) }, []), + (Exp_constant (Const_integer 7))))) + ] |}] +;; + +let%expect_test "apply without space" = + test_program {|f(x);;|}; + [%expect {| [(Str_eval (Exp_apply ((Exp_ident "f"), (Exp_ident "x"))))] |}] +;; + +let%expect_test "apply num to ident" = + test_program {|f (x-1);;|}; + [%expect + {| + [(Str_eval + (Exp_apply ((Exp_ident "f"), + (Exp_apply ((Exp_ident "-"), + (Exp_tuple ((Exp_ident "x"), (Exp_constant (Const_integer 1)), [])) + )) + ))) + ] |}] +;; + +let%expect_test "simple fun" = + test_program {|fun x -> y;;|}; + [%expect {| [(Str_eval (Exp_fun (((Pat_var "x"), []), (Exp_ident "y"))))] |}] +;; + +let%expect_test "multi pattern fun" = + test_program {|fun x -> y;;|}; + [%expect {| [(Str_eval (Exp_fun (((Pat_var "x"), []), (Exp_ident "y"))))] |}] +;; + +let%expect_test "multi pattern fun" = + test_program {|5>5;;|}; + [%expect + {| + [(Str_eval + (Exp_apply ((Exp_ident ">"), + (Exp_tuple + ((Exp_constant (Const_integer 5)), + (Exp_constant (Const_integer 5)), [])) + ))) + ] |}] +;; + +let%expect_test "multi fun" = + test_program {|fun p -> fun x -> z;;|}; + [%expect + {| + [(Str_eval + (Exp_fun (((Pat_var "p"), []), + (Exp_fun (((Pat_var "x"), []), (Exp_ident "z")))))) + ] |}] +;; + +let%expect_test "apply and subtraction" = + test_program {|f (x-1);;|}; + [%expect + {| + [(Str_eval + (Exp_apply ((Exp_ident "f"), + (Exp_apply ((Exp_ident "-"), + (Exp_tuple ((Exp_ident "x"), (Exp_constant (Const_integer 1)), [])) + )) + ))) + ] |}] +;; + +let%expect_test "exprlet and" = + test_program {|let x = 5 and y = 10;;|}; + [%expect + {| + [(Str_value (Nonrecursive, + ({ pat = (Pat_var "x"); expr = (Exp_constant (Const_integer 5)) }, + [{ pat = (Pat_var "y"); expr = (Exp_constant (Const_integer 10)) }]) + )) + ] |}] +;; + +let%expect_test "exprlet and" = + test_program {|let rec x x x x x x x = y and x = 20 in 5;;|}; + [%expect + {| + [(Str_eval + (Exp_let (Recursive, + ({ pat = (Pat_var "x"); + expr = + (Exp_fun ( + ((Pat_var "x"), + [(Pat_var "x"); (Pat_var "x"); (Pat_var "x"); (Pat_var "x"); + (Pat_var "x")]), + (Exp_ident "y"))) + }, + [{ pat = (Pat_var "x"); expr = (Exp_constant (Const_integer 20)) }]), + (Exp_constant (Const_integer 5))))) + ] |}] +;; + +let%expect_test "let and tuple" = + test_program {|let (a,b) = (b,a);;|}; + [%expect + {| + [(Str_value (Nonrecursive, + ({ pat = (Pat_tuple ((Pat_var "a"), (Pat_var "b"), [])); + expr = (Exp_tuple ((Exp_ident "b"), (Exp_ident "a"), [])) }, + []) + )) + ] |}] +;; + +let%expect_test "let and" = + test_program {|let rec x x x x x x x = y and x = 20;;|}; + [%expect + {| + [(Str_value (Recursive, + ({ pat = (Pat_var "x"); + expr = + (Exp_fun ( + ((Pat_var "x"), + [(Pat_var "x"); (Pat_var "x"); (Pat_var "x"); (Pat_var "x"); + (Pat_var "x")]), + (Exp_ident "y"))) + }, + [{ pat = (Pat_var "x"); expr = (Exp_constant (Const_integer 20)) }]) + )) + ] |}] +;; + +let%expect_test "multiplication and apply" = + test_program {|x * f x;;|}; + [%expect + {| + [(Str_eval + (Exp_apply ((Exp_ident "*"), + (Exp_tuple + ((Exp_ident "x"), (Exp_apply ((Exp_ident "f"), (Exp_ident "x"))), + [])) + ))) + ] |}] +;; + +let%expect_test "let and apply" = + test_program {|let f x = x;;|}; + [%expect + {| + [(Str_value (Nonrecursive, + ({ pat = (Pat_var "f"); + expr = (Exp_fun (((Pat_var "x"), []), (Exp_ident "x"))) }, + []) + )) + ] |}] +;; + +let%expect_test "pattern constraint" = + test_program {|let (x : int * int) = (x: int);;|}; + [%expect + {| + [(Str_value (Nonrecursive, + ({ pat = + (Pat_constraint ((Pat_var "x"), + (Type_tuple + ((Type_construct ("int", [])), (Type_construct ("int", [])), [])) + )); + expr = + (Exp_constraint ((Exp_ident "x"), (Type_construct ("int", [])))) }, + []) + )) + ] |}] +;; + +let%expect_test "pattern constraint" = + test_program {|let (x : int*int) = (x: int*int);;|}; + [%expect + {| + [(Str_value (Nonrecursive, + ({ pat = + (Pat_constraint ((Pat_var "x"), + (Type_tuple + ((Type_construct ("int", [])), (Type_construct ("int", [])), [])) + )); + expr = + (Exp_constraint ((Exp_ident "x"), + (Type_tuple + ((Type_construct ("int", [])), (Type_construct ("int", [])), [])) + )) + }, + []) + )) + ] |}] +;; + +let%expect_test "pattern constraint" = + test_program {|let (x : int->int) = (x: int->int);;|}; + [%expect + {| + [(Str_value (Nonrecursive, + ({ pat = + (Pat_constraint ((Pat_var "x"), + (Type_arrow ((Type_construct ("int", [])), + (Type_construct ("int", [])))) + )); + expr = + (Exp_constraint ((Exp_ident "x"), + (Type_arrow ((Type_construct ("int", [])), + (Type_construct ("int", [])))) + )) + }, + []) + )) + ] |}] +;; + +let%expect_test "let and apply" = + test_program {|let f x = g a b c;;|}; + [%expect + {| + [(Str_value (Nonrecursive, + ({ pat = (Pat_var "f"); + expr = + (Exp_fun (((Pat_var "x"), []), + (Exp_apply ( + (Exp_apply ((Exp_apply ((Exp_ident "g"), (Exp_ident "a"))), + (Exp_ident "b"))), + (Exp_ident "c"))) + )) + }, + []) + )) + ] |}] +;; + +let%expect_test "let and apply v2" = + test_program {|let fact x = fact(x-1);;|}; + [%expect + {| + [(Str_value (Nonrecursive, + ({ pat = (Pat_var "fact"); + expr = + (Exp_fun (((Pat_var "x"), []), + (Exp_apply ((Exp_ident "fact"), + (Exp_apply ((Exp_ident "-"), + (Exp_tuple + ((Exp_ident "x"), (Exp_constant (Const_integer 1)), [])) + )) + )) + )) + }, + []) + )) + ] |}] +;; + +let%expect_test "if then" = + test_program {|if 5 then 6;;|}; + [%expect + {| + [(Str_eval + (Exp_if ((Exp_constant (Const_integer 5)), + (Exp_constant (Const_integer 6)), None))) + ] |}] +;; + +let%expect_test "if statement. condition from fact" = + test_program {|if n = 0 then 1 else 7;;|}; + [%expect + {| + [(Str_eval + (Exp_if ( + (Exp_apply ((Exp_ident "="), + (Exp_tuple ((Exp_ident "n"), (Exp_constant (Const_integer 0)), [])) + )), + (Exp_constant (Const_integer 1)), + (Some (Exp_constant (Const_integer 7)))))) + ] |}] +;; + +let%expect_test "let and if" = + test_program {|let x = if n = 0 then 6 else 7 in 6;;|}; + [%expect + {| + [(Str_eval + (Exp_let (Nonrecursive, + ({ pat = (Pat_var "x"); + expr = + (Exp_if ( + (Exp_apply ((Exp_ident "="), + (Exp_tuple + ((Exp_ident "n"), (Exp_constant (Const_integer 0)), [])) + )), + (Exp_constant (Const_integer 6)), + (Some (Exp_constant (Const_integer 7))))) + }, + []), + (Exp_constant (Const_integer 6))))) + ] |}] +;; + +let%expect_test "factorial" = + test_program {|let rec fact n = if n = 0 then 1 else n * fact(n-1);;|}; + [%expect + {| + [(Str_value (Recursive, + ({ pat = (Pat_var "fact"); + expr = + (Exp_fun (((Pat_var "n"), []), + (Exp_if ( + (Exp_apply ((Exp_ident "="), + (Exp_tuple + ((Exp_ident "n"), (Exp_constant (Const_integer 0)), [])) + )), + (Exp_constant (Const_integer 1)), + (Some (Exp_apply ((Exp_ident "*"), + (Exp_tuple + ((Exp_ident "n"), + (Exp_apply ((Exp_ident "fact"), + (Exp_apply ((Exp_ident "-"), + (Exp_tuple + ((Exp_ident "n"), + (Exp_constant (Const_integer 1)), + [])) + )) + )), + [])) + ))) + )) + )) + }, + []) + )) + ] |}] +;; + +let%expect_test "factorial" = + test_program {|let (x: int->char->string -> x *x* x) = 1;;|}; + [%expect + {| + [(Str_value (Nonrecursive, + ({ pat = + (Pat_constraint ((Pat_var "x"), + (Type_arrow ( + (Type_arrow ( + (Type_arrow ((Type_construct ("int", [])), + (Type_construct ("char", [])))), + (Type_construct ("string", [])))), + (Type_tuple + ((Type_construct ("x", [])), (Type_construct ("x", [])), + [(Type_construct ("x", []))])) + )) + )); + expr = (Exp_constant (Const_integer 1)) }, + []) + )) + ] |}] +;; + +let%expect_test "factorial" = + test_program {|let rec a = 1;;|}; + [%expect + {| + [(Str_value (Recursive, + ({ pat = (Pat_var "a"); expr = (Exp_constant (Const_integer 1)) }, []))) + ] |}] +;; + +let%expect_test "factorial" = + test_program {|let rec a = 1;;|}; + [%expect + {| + [(Str_value (Recursive, + ({ pat = (Pat_var "a"); expr = (Exp_constant (Const_integer 1)) }, []))) + ] |}] +;; + +let%expect_test "factorial" = + test_program {|let reca = 1 in 5;;|}; + [%expect + {| + [(Str_eval + (Exp_let (Nonrecursive, + ({ pat = (Pat_var "reca"); expr = (Exp_constant (Const_integer 1)) }, + []), + (Exp_constant (Const_integer 5))))) + ] |}] +;; + +let%expect_test "factorial" = + test_program {|let reca = 1;;|}; + [%expect + {| + [(Str_value (Nonrecursive, + ({ pat = (Pat_var "reca"); expr = (Exp_constant (Const_integer 1)) }, []) + )) + ] |}] +;; + +let%expect_test "_" = + test_program {|let recgP6Tz_9 = zdghovr and _ = n_4p;;|}; + [%expect + {| + [(Str_value (Nonrecursive, + ({ pat = (Pat_var "recgP6Tz_9"); expr = (Exp_ident "zdghovr") }, + [{ pat = Pat_any; expr = (Exp_ident "n_4p") }]) + )) + ] |}] +;; + +(*good*) +let%expect_test "_" = + test_program {|(f : (int -> int -> int));;|}; + [%expect + {| + [(Str_eval + (Exp_constraint ((Exp_ident "f"), + (Type_arrow ( + (Type_arrow ((Type_construct ("int", [])), + (Type_construct ("int", [])))), + (Type_construct ("int", [])))) + ))) + ] |}] +;; + +let%expect_test "_" = + test_program {|let (f:(x)) = 5;;|}; + [%expect + {| + [(Str_value (Nonrecursive, + ({ pat = (Pat_constraint ((Pat_var "f"), (Type_construct ("x", [])))); + expr = (Exp_constant (Const_integer 5)) }, + []) + )) + ] |}] +;; + +let%expect_test "_" = + test_program + {| function +| "" -> 'a' +| "" -> "izvkvwcet" ;;|}; + [%expect + {| + [(Str_eval + (Exp_function + ({ first = (Pat_constant (Const_string "")); + second = (Exp_constant (Const_char 'a')) }, + [{ first = (Pat_constant (Const_string "")); + second = (Exp_constant (Const_string "izvkvwcet")) } + ]))) + ] |}] +;; + +(*good*) +let%expect_test "_" = + test_program + {|('v' : (sqEcf8boz* s58r6D_P_bX___yy_93GPH__04_r___d9Zc_1U2__c8XmN1n_F_WBqxl68h_8_TCGqp3B_5w_Y_53a6_d_6_H9845__c5__09s* sh__7ud_43* s_KKm_z3r5__jHMLw_qd1760R_G__nI6_J040__AB_6s0__D__d__e32Te6H_4__Ec_V_E__f_* o0_a_W_* f__LcPREH13__mY_CezffoI5_8_u_zU__ZncOnf_v4_L8_44Y72_3_A5_B758TViP_u_vyFU9_1* qD0* g4wp33A_W* e1V_gi_6y* x_Sv_PZ)) ;; |}; + [%expect + {| + [(Str_eval + (Exp_constraint ((Exp_constant (Const_char 'v')), + (Type_tuple + ((Type_construct ("sqEcf8boz", [])), + (Type_construct ( + "s58r6D_P_bX___yy_93GPH__04_r___d9Zc_1U2__c8XmN1n_F_WBqxl68h_8_TCGqp3B_5w_Y_53a6_d_6_H9845__c5__09s", + [])), + [(Type_construct ("sh__7ud_43", [])); + (Type_construct ( + "s_KKm_z3r5__jHMLw_qd1760R_G__nI6_J040__AB_6s0__D__d__e32Te6H_4__Ec_V_E__f_", + [])); + (Type_construct ("o0_a_W_", [])); + (Type_construct ( + "f__LcPREH13__mY_CezffoI5_8_u_zU__ZncOnf_v4_L8_44Y72_3_A5_B758TViP_u_vyFU9_1", + [])); + (Type_construct ("qD0", [])); + (Type_construct ("g4wp33A_W", [])); + (Type_construct ("e1V_gi_6y", [])); + (Type_construct ("x_Sv_PZ", []))])) + ))) + ] |}] +;; + +let%expect_test "not keyword" = + test_program {|(Kakadu_52) (fun x -> x);;|}; + [%expect + {| + [(Str_eval + (Exp_apply ((Exp_construct ("Kakadu_52", None)), + (Exp_fun (((Pat_var "x"), []), (Exp_ident "x")))))) + ] |}] +;; + +let%expect_test "adt v0" = + test_program {|type shape = Circle;;|}; + [%expect {| [(Str_adt ([], "shape", (("Circle", None), [])))] |}] +;; + +let%expect_test "adt v1" = + test_program {|type shape = Circle | Square of int;;|}; + [%expect + {| + [(Str_adt ([], "shape", + (("Circle", None), [("Square", (Some (Type_construct ("int", []))))]))) + ] |}] +;; + +let%expect_test "adt v2" = + test_program {|type shape = Circle | Square;;|}; + [%expect {| [(Str_adt ([], "shape", (("Circle", None), [("Square", None)])))] |}] +;; + +let%expect_test "adt v3" = + test_program {|type shape = Circle | Square of int * int;;|}; + [%expect + {| + [(Str_adt ([], "shape", + (("Circle", None), + [("Square", + (Some (Type_tuple + ((Type_construct ("int", [])), (Type_construct ("int", [])), + [])))) + ]) + )) + ] |}] +;; + +let%expect_test "adt with poly" = + test_program {|type 'a shape = Circle | Square of 'a * 'a ;;|}; + [%expect + {| + [(Str_adt (["a"], "shape", + (("Circle", None), + [("Square", (Some (Type_tuple ((Type_var "a"), (Type_var "a"), []))))]) + )) + ] |}] +;; + +let%expect_test "bad adt with poly (wrong types)" = + test_program {|type 'a shape = Circle | Square of 'b;;|}; + [%expect + {| + [(Str_adt (["a"], "shape", + (("Circle", None), [("Square", (Some (Type_var "b")))]))) + ] |}] +;; + +let%expect_test "adt with poly (not poly in variant)" = + test_program {|type 'a shape = Circle | Square of int;;|}; + [%expect + {| + [(Str_adt (["a"], "shape", + (("Circle", None), [("Square", (Some (Type_construct ("int", []))))]))) + ] |}] +;; + +let%expect_test "adt with poly v.easy" = + test_program {|type 'a shape = Circle;;|}; + [%expect {| [(Str_adt (["a"], "shape", (("Circle", None), [])))] |}] +;; + +let%expect_test "adt with multiple poly v1" = + test_program {|type ('a, 'b) shape = Circle | Square of 'a;;|}; + [%expect + {| + [(Str_adt (["a"; "b"], "shape", + (("Circle", None), [("Square", (Some (Type_var "a")))]))) + ] |}] +;; + +let%expect_test "adt with multiple poly v2" = + test_program {|type ('a, 'b) shape = Circle | Square of ('a,'b) shape;;|}; + [%expect + {| + [(Str_adt (["a"; "b"], "shape", + (("Circle", None), + [("Square", + (Some (Type_construct ("shape", [(Type_var "a"); (Type_var "b")]))))]) + )) + ] |}] +;; + +let%expect_test "just let (char)" = + test_program {|let x = '5';;|}; + [%expect + {| + [(Str_value (Nonrecursive, + ({ pat = (Pat_var "x"); expr = (Exp_constant (Const_char '5')) }, []))) + ] |}] +;; + +let%expect_test "string print_endline" = + test_program + {|let x = "51" in +print_endline x;;|}; + [%expect + {| + [(Str_eval + (Exp_let (Nonrecursive, + ({ pat = (Pat_var "x"); expr = (Exp_constant (Const_string "51")) }, + []), + (Exp_apply ((Exp_ident "print_endline"), (Exp_ident "x")))))) + ] |}] +;; + +let%expect_test "string print_endline" = + test_program {|x = "51";;|}; + [%expect + {| + [(Str_eval + (Exp_apply ((Exp_ident "="), + (Exp_tuple ((Exp_ident "x"), (Exp_constant (Const_string "51")), [])) + ))) + ] |}] +;; + +let%expect_test "match case" = + test_program + {|let classify n = + match n with + | 0 -> "zero" + | 1 -> "one" + | _ -> "other" +;;|}; + [%expect + {| + [(Str_value (Nonrecursive, + ({ pat = (Pat_var "classify"); + expr = + (Exp_fun (((Pat_var "n"), []), + (Exp_match ((Exp_ident "n"), + ({ first = (Pat_constant (Const_integer 0)); + second = (Exp_constant (Const_string "zero")) }, + [{ first = (Pat_constant (Const_integer 1)); + second = (Exp_constant (Const_string "one")) }; + { first = Pat_any; + second = (Exp_constant (Const_string "other")) } + ]) + )) + )) + }, + []) + )) + ] |}] +;; + +let%expect_test "adt with tuple in variant" = + test_program {|type shape = Circle | Square of int * int ;;|}; + [%expect + {| + [(Str_adt ([], "shape", + (("Circle", None), + [("Square", + (Some (Type_tuple + ((Type_construct ("int", [])), (Type_construct ("int", [])), + [])))) + ]) + )) + ] |}] +;; + +let%expect_test "adt with recursive poly variant" = + test_program {|type ('a, 'b) shape = Circle | Square of 'a shape;;|}; + [%expect + {| + [(Str_adt (["a"; "b"], "shape", + (("Circle", None), + [("Square", (Some (Type_construct ("shape", [(Type_var "a")]))))]) + )) + ] |}] +;; + +let%expect_test "adt list" = + test_program + {| +type 'a my_list = Nil | Cons of 'a * 'a my_list;; +|}; + [%expect + {| + [(Str_adt (["a"], "my_list", + (("Nil", None), + [("Cons", + (Some (Type_tuple + ((Type_var "a"), + (Type_construct ("my_list", [(Type_var "a")])), [])))) + ]) + )) + ] |}] +;; + +let%expect_test "adt nested type_construct in args" = + test_program + {| +type 'a nested_list = Nil +| Cons of 'a * 'a nested_list +| List of 'a nested_list;; + +|}; + [%expect + {| + [(Str_adt (["a"], "nested_list", + (("Nil", None), + [("Cons", + (Some (Type_tuple + ((Type_var "a"), + (Type_construct ("nested_list", [(Type_var "a")])), + [])))); + ("List", (Some (Type_construct ("nested_list", [(Type_var "a")]))))]) + )) + ] |}] +;; + +let%expect_test "adt nested type_construct in args" = + test_program + {| +type 'a nested_list = Nil +| Cons of 'a * 'a nested_list +| List of 'a nested_list nested_list;; +|}; + [%expect + {| + [(Str_adt (["a"], "nested_list", + (("Nil", None), + [("Cons", + (Some (Type_tuple + ((Type_var "a"), + (Type_construct ("nested_list", [(Type_var "a")])), + [])))); + ("List", + (Some (Type_construct ("nested_list", + [(Type_var "a"); (Type_construct ("nested_list", []))])))) + ]) + )) + ] |}] +;; + +let%expect_test "poly adt (tree)" = + test_program + {| +type 'a tree = Leaf + | Node of 'a * 'a tree * 'a tree +;; + |}; + [%expect + {| + [(Str_adt (["a"], "tree", + (("Leaf", None), + [("Node", + (Some (Type_tuple + ((Type_var "a"), (Type_construct ("tree", [(Type_var "a")])), + [(Type_construct ("tree", [(Type_var "a")]))])))) + ]) + )) + ] |}] +;; + +let%expect_test "adt list with pair" = + test_program + {| type ('a, 'b) pair_list = Nil + | Cons of ('a * 'b) * ('a, 'b) pair_list;; +|}; + [%expect + {| + [(Str_adt (["a"; "b"], "pair_list", + (("Nil", None), + [("Cons", + (Some (Type_tuple + ((Type_tuple ((Type_var "a"), (Type_var "b"), [])), + (Type_construct ("pair_list", + [(Type_var "a"); (Type_var "b")])), + [])))) + ]) + )) + ] |}] +;; + +let%expect_test "adt list with 2 el in node" = + test_program + {| type ('a, 'b) pair_list = Nil + | Cons of 'a * 'b * ('a, 'b) pair_list;; +|}; + [%expect + {| + [(Str_adt (["a"; "b"], "pair_list", + (("Nil", None), + [("Cons", + (Some (Type_tuple + ((Type_var "a"), (Type_var "b"), + [(Type_construct ("pair_list", + [(Type_var "a"); (Type_var "b")])) + ])))) + ]) + )) + ] |}] +;; + +let%expect_test "adt" = + test_program + {| +type shape = Point of int + | Circle of int * int + | Rect of int * int * int +;; +|}; + [%expect + {| + [(Str_adt ([], "shape", + (("Point", (Some (Type_construct ("int", [])))), + [("Circle", + (Some (Type_tuple + ((Type_construct ("int", [])), (Type_construct ("int", [])), + [])))); + ("Rect", + (Some (Type_tuple + ((Type_construct ("int", [])), (Type_construct ("int", [])), + [(Type_construct ("int", []))])))) + ]) + )) + ] |}] +;; + +let%expect_test "simple adt with pattern matching function (else case) + printing" = + test_program + {| +type shape = Circle of int + | Rectangle of (int*int) * int + | Square of int +;; +let area s = + match s with + | Square c -> 0 + | Circle c -> 3 + | Rectangle c -> 10 +;; +let x = Square 5 in +let y = area x in +print_int y +;; + + |}; + [%expect + {| + [(Str_adt ([], "shape", + (("Circle", (Some (Type_construct ("int", [])))), + [("Rectangle", + (Some (Type_tuple + ((Type_tuple + ((Type_construct ("int", [])), + (Type_construct ("int", [])), [])), + (Type_construct ("int", [])), [])))); + ("Square", (Some (Type_construct ("int", []))))]) + )); + (Str_value (Nonrecursive, + ({ pat = (Pat_var "area"); + expr = + (Exp_fun (((Pat_var "s"), []), + (Exp_match ((Exp_ident "s"), + ({ first = (Pat_construct ("Square", (Some (Pat_var "c")))); + second = (Exp_constant (Const_integer 0)) }, + [{ first = (Pat_construct ("Circle", (Some (Pat_var "c")))); + second = (Exp_constant (Const_integer 3)) }; + { first = + (Pat_construct ("Rectangle", (Some (Pat_var "c")))); + second = (Exp_constant (Const_integer 10)) } + ]) + )) + )) + }, + []) + )); + (Str_eval + (Exp_let (Nonrecursive, + ({ pat = (Pat_var "x"); + expr = + (Exp_construct ("Square", (Some (Exp_constant (Const_integer 5))) + )) + }, + []), + (Exp_let (Nonrecursive, + ({ pat = (Pat_var "y"); + expr = (Exp_apply ((Exp_ident "area"), (Exp_ident "x"))) }, + []), + (Exp_apply ((Exp_ident "print_int"), (Exp_ident "y"))))) + ))) + ] |}] +;; + +let%expect_test "rec fun (pow)" = + test_program + {| +let rec pow x y = if y = 0 then 1 else x * pow x (y - 1) in print_int (pow 5 6) +;; |}; + [%expect + {| + [(Str_eval + (Exp_let (Recursive, + ({ pat = (Pat_var "pow"); + expr = + (Exp_fun (((Pat_var "x"), [(Pat_var "y")]), + (Exp_if ( + (Exp_apply ((Exp_ident "="), + (Exp_tuple + ((Exp_ident "y"), (Exp_constant (Const_integer 0)), [])) + )), + (Exp_constant (Const_integer 1)), + (Some (Exp_apply ((Exp_ident "*"), + (Exp_tuple + ((Exp_ident "x"), + (Exp_apply ( + (Exp_apply ((Exp_ident "pow"), + (Exp_ident "x"))), + (Exp_apply ((Exp_ident "-"), + (Exp_tuple + ((Exp_ident "y"), + (Exp_constant (Const_integer 1)), + [])) + )) + )), + [])) + ))) + )) + )) + }, + []), + (Exp_apply ((Exp_ident "print_int"), + (Exp_apply ( + (Exp_apply ((Exp_ident "pow"), (Exp_constant (Const_integer 5)) + )), + (Exp_constant (Const_integer 6)))) + )) + ))) + ] |}] +;; + +let%expect_test "keyword" = + test_program {|let x = 5 and (z,v,c) = (5,6,7);;|}; + [%expect + {| + [(Str_value (Nonrecursive, + ({ pat = (Pat_var "x"); expr = (Exp_constant (Const_integer 5)) }, + [{ pat = (Pat_tuple ((Pat_var "z"), (Pat_var "v"), [(Pat_var "c")])); + expr = + (Exp_tuple + ((Exp_constant (Const_integer 5)), + (Exp_constant (Const_integer 6)), + [(Exp_constant (Const_integer 7))])) + } + ]) + )) + ] |}] +;; + +let%expect_test "keyword" = + test_program {|fun x -> x+x;;|}; + [%expect + {| + [(Str_eval + (Exp_fun (((Pat_var "x"), []), + (Exp_apply ((Exp_ident "+"), + (Exp_tuple ((Exp_ident "x"), (Exp_ident "x"), [])))) + ))) + ] |}] +;; + +let%expect_test "keyword" = + test_program + {|let main = + let () = print_int (fib 4) in + 0;;|}; + [%expect + {| + [(Str_value (Nonrecursive, + ({ pat = (Pat_var "main"); + expr = + (Exp_let (Nonrecursive, + ({ pat = (Pat_construct ("()", None)); + expr = + (Exp_apply ((Exp_ident "print_int"), + (Exp_apply ((Exp_ident "fib"), + (Exp_constant (Const_integer 4)))) + )) + }, + []), + (Exp_constant (Const_integer 0)))) + }, + []) + )) + ] |}] +;; + +let%expect_test "keyword" = + test_program {|let (x:char) = 20;;|}; + [%expect + {| + [(Str_value (Nonrecursive, + ({ pat = (Pat_constraint ((Pat_var "x"), (Type_construct ("char", [])))); + expr = (Exp_constant (Const_integer 20)) }, + []) + )) + ] |}] +;; + +let%expect_test "keyword" = + test_program {|let (x:(char*char)) = 20;;|}; + [%expect + {| + [(Str_value (Nonrecursive, + ({ pat = + (Pat_constraint ((Pat_var "x"), + (Type_tuple + ((Type_construct ("char", [])), (Type_construct ("char", [])), + [])) + )); + expr = (Exp_constant (Const_integer 20)) }, + []) + )) + ] |}] +;; + +let%expect_test "keyword" = + test_program {|let (x: int option) = 20;;|}; + [%expect + {| + [(Str_value (Nonrecursive, + ({ pat = + (Pat_constraint ((Pat_var "x"), + (Type_construct ("option", [(Type_construct ("int", []))])))); + expr = (Exp_constant (Const_integer 20)) }, + []) + )) + ] |}] +;; + +let%expect_test "keyword" = + test_program {||}; + [%expect {| [] |}] +;; + +let%expect_test "keyword" = + test_program {|let () = print_int 5;;|}; + [%expect + {| + [(Str_value (Nonrecursive, + ({ pat = (Pat_construct ("()", None)); + expr = + (Exp_apply ((Exp_ident "print_int"), (Exp_constant (Const_integer 5)) + )) + }, + []) + )) + ] |}] +;; + +let%expect_test "keyword" = + test_program {|let addi = fun f g x -> (f x (g x: bool) : int);;|}; + [%expect + {| + [(Str_value (Nonrecursive, + ({ pat = (Pat_var "addi"); + expr = + (Exp_fun (((Pat_var "f"), [(Pat_var "g"); (Pat_var "x")]), + (Exp_constraint ( + (Exp_apply ((Exp_apply ((Exp_ident "f"), (Exp_ident "x"))), + (Exp_constraint ( + (Exp_apply ((Exp_ident "g"), (Exp_ident "x"))), + (Type_construct ("bool", [])))) + )), + (Type_construct ("int", [])))) + )) + }, + []) + )) + ] |}] +;; + +let%expect_test "simple adt with pattern matching function + printing v3" = + test_program + {| +type 'a shape = Circle of int + | Rectangle of int * int + | Square of int +;; +let area s = + match s with + | Circle c -> 3 + | Square c -> 0 + | Rectangle (c1, c2) -> c1 * c2 +;; +let x = Rectangle (5, 10) in +let y = area x in +print_int y +;; + |}; + [%expect + {| + [(Str_adt (["a"], "shape", + (("Circle", (Some (Type_construct ("int", [])))), + [("Rectangle", + (Some (Type_tuple + ((Type_construct ("int", [])), (Type_construct ("int", [])), + [])))); + ("Square", (Some (Type_construct ("int", []))))]) + )); + (Str_value (Nonrecursive, + ({ pat = (Pat_var "area"); + expr = + (Exp_fun (((Pat_var "s"), []), + (Exp_match ((Exp_ident "s"), + ({ first = (Pat_construct ("Circle", (Some (Pat_var "c")))); + second = (Exp_constant (Const_integer 3)) }, + [{ first = (Pat_construct ("Square", (Some (Pat_var "c")))); + second = (Exp_constant (Const_integer 0)) }; + { first = + (Pat_construct ("Rectangle", + (Some (Pat_tuple ((Pat_var "c1"), (Pat_var "c2"), []))) + )); + second = + (Exp_apply ((Exp_ident "*"), + (Exp_tuple ((Exp_ident "c1"), (Exp_ident "c2"), [])))) + } + ]) + )) + )) + }, + []) + )); + (Str_eval + (Exp_let (Nonrecursive, + ({ pat = (Pat_var "x"); + expr = + (Exp_construct ("Rectangle", + (Some (Exp_tuple + ((Exp_constant (Const_integer 5)), + (Exp_constant (Const_integer 10)), []))) + )) + }, + []), + (Exp_let (Nonrecursive, + ({ pat = (Pat_var "y"); + expr = (Exp_apply ((Exp_ident "area"), (Exp_ident "x"))) }, + []), + (Exp_apply ((Exp_ident "print_int"), (Exp_ident "y"))))) + ))) + ] |}] +;; + +let%expect_test "simple adt with pattern matching function + printing v3" = + test_program + {| +type ('a,'b) shape = Circle of int + | Rectangle of int * int + | Square of 'a * 'b +;; + |}; + [%expect + {| + [(Str_adt (["a"; "b"], "shape", + (("Circle", (Some (Type_construct ("int", [])))), + [("Rectangle", + (Some (Type_tuple + ((Type_construct ("int", [])), (Type_construct ("int", [])), + [])))); + ("Square", (Some (Type_tuple ((Type_var "a"), (Type_var "b"), []))))]) + )) + ] |}] +;; + +let%expect_test "function assignment with bool operators" = + test_program {| let id = fun (x, y) -> x && y in print_bool (id true false) ;; |}; + [%expect + {| + [(Str_eval + (Exp_let (Nonrecursive, + ({ pat = (Pat_var "id"); + expr = + (Exp_fun (((Pat_tuple ((Pat_var "x"), (Pat_var "y"), [])), []), + (Exp_apply ((Exp_ident "&&"), + (Exp_tuple ((Exp_ident "x"), (Exp_ident "y"), [])))) + )) + }, + []), + (Exp_apply ((Exp_ident "print_bool"), + (Exp_apply ( + (Exp_apply ((Exp_ident "id"), (Exp_construct ("true", None)))), + (Exp_construct ("false", None)))) + )) + ))) + ] |}] +;; + +let%expect_test "function" = + test_program + {| + let f = function + | Some x -> x + | None -> 0 + in + f None, f (Some 42) + |}; + [%expect + {| + [(Str_eval + (Exp_let (Nonrecursive, + ({ pat = (Pat_var "f"); + expr = + (Exp_function + ({ first = (Pat_construct ("Some", (Some (Pat_var "x")))); + second = (Exp_ident "x") }, + [{ first = (Pat_construct ("None", None)); + second = (Exp_constant (Const_integer 0)) } + ])) + }, + []), + (Exp_tuple + ((Exp_apply ((Exp_ident "f"), (Exp_construct ("None", None)))), + (Exp_apply ((Exp_ident "f"), + (Exp_construct ("Some", + (Some (Exp_constant (Const_integer 42))))) + )), + [])) + ))) + ] |}] +;; + +let%expect_test "keyword" = + test_program + {| +let _6 = fun arg -> match arg with Some x -> let y = x in y;; + |}; + [%expect + {| + [(Str_value (Nonrecursive, + ({ pat = (Pat_var "_6"); + expr = + (Exp_fun (((Pat_var "arg"), []), + (Exp_match ((Exp_ident "arg"), + ({ first = (Pat_construct ("Some", (Some (Pat_var "x")))); + second = + (Exp_let (Nonrecursive, + ({ pat = (Pat_var "y"); expr = (Exp_ident "x") }, []), + (Exp_ident "y"))) + }, + []) + )) + )) + }, + []) + )) + ] |}] +;; + +let%expect_test "lists v1" = + test_program + {| +let x = [];; + |}; + [%expect + {| + [(Str_value (Nonrecursive, + ({ pat = (Pat_var "x"); expr = (Exp_construct ("[]", None)) }, []))) + ] |}] +;; + +let%expect_test "keyword" = + test_program + {|let rec fix f x = f (fix f) x;; +let map f p = let (a,b) = p in (f a, f b);; +let fixpoly l = + fix (fun self l -> map (fun li x -> li (self l) x) l) l;; +let feven p n = + let (e, o) = p in + if n = 0 then 1 else o (n - 1);; +let fodd p n = + let (e, o) = p in + if n = 0 then 0 else e (n - 1);; + let tie = fixpoly (feven, fodd);; |}; + [%expect + {| + [(Str_value (Recursive, + ({ pat = (Pat_var "fix"); + expr = + (Exp_fun (((Pat_var "f"), [(Pat_var "x")]), + (Exp_apply ( + (Exp_apply ((Exp_ident "f"), + (Exp_apply ((Exp_ident "fix"), (Exp_ident "f"))))), + (Exp_ident "x"))) + )) + }, + []) + )); + (Str_value (Nonrecursive, + ({ pat = (Pat_var "map"); + expr = + (Exp_fun (((Pat_var "f"), [(Pat_var "p")]), + (Exp_let (Nonrecursive, + ({ pat = (Pat_tuple ((Pat_var "a"), (Pat_var "b"), [])); + expr = (Exp_ident "p") }, + []), + (Exp_tuple + ((Exp_apply ((Exp_ident "f"), (Exp_ident "a"))), + (Exp_apply ((Exp_ident "f"), (Exp_ident "b"))), [])) + )) + )) + }, + []) + )); + (Str_value (Nonrecursive, + ({ pat = (Pat_var "fixpoly"); + expr = + (Exp_fun (((Pat_var "l"), []), + (Exp_apply ( + (Exp_apply ((Exp_ident "fix"), + (Exp_fun (((Pat_var "self"), [(Pat_var "l")]), + (Exp_apply ( + (Exp_apply ((Exp_ident "map"), + (Exp_fun (((Pat_var "li"), [(Pat_var "x")]), + (Exp_apply ( + (Exp_apply ((Exp_ident "li"), + (Exp_apply ((Exp_ident "self"), + (Exp_ident "l"))) + )), + (Exp_ident "x"))) + )) + )), + (Exp_ident "l"))) + )) + )), + (Exp_ident "l"))) + )) + }, + []) + )); + (Str_value (Nonrecursive, + ({ pat = (Pat_var "feven"); + expr = + (Exp_fun (((Pat_var "p"), [(Pat_var "n")]), + (Exp_let (Nonrecursive, + ({ pat = (Pat_tuple ((Pat_var "e"), (Pat_var "o"), [])); + expr = (Exp_ident "p") }, + []), + (Exp_if ( + (Exp_apply ((Exp_ident "="), + (Exp_tuple + ((Exp_ident "n"), (Exp_constant (Const_integer 0)), [])) + )), + (Exp_constant (Const_integer 1)), + (Some (Exp_apply ((Exp_ident "o"), + (Exp_apply ((Exp_ident "-"), + (Exp_tuple + ((Exp_ident "n"), + (Exp_constant (Const_integer 1)), [])) + )) + ))) + )) + )) + )) + }, + []) + )); + (Str_value (Nonrecursive, + ({ pat = (Pat_var "fodd"); + expr = + (Exp_fun (((Pat_var "p"), [(Pat_var "n")]), + (Exp_let (Nonrecursive, + ({ pat = (Pat_tuple ((Pat_var "e"), (Pat_var "o"), [])); + expr = (Exp_ident "p") }, + []), + (Exp_if ( + (Exp_apply ((Exp_ident "="), + (Exp_tuple + ((Exp_ident "n"), (Exp_constant (Const_integer 0)), [])) + )), + (Exp_constant (Const_integer 0)), + (Some (Exp_apply ((Exp_ident "e"), + (Exp_apply ((Exp_ident "-"), + (Exp_tuple + ((Exp_ident "n"), + (Exp_constant (Const_integer 1)), [])) + )) + ))) + )) + )) + )) + }, + []) + )); + (Str_value (Nonrecursive, + ({ pat = (Pat_var "tie"); + expr = + (Exp_apply ((Exp_ident "fixpoly"), + (Exp_tuple ((Exp_ident "feven"), (Exp_ident "fodd"), [])))) + }, + []) + )) + ] |}] +;; + +let%expect_test "keyword" = + test_program + {|type 'a foo = Foo;; +type bar = Bar of foo;; |}; + [%expect + {| + [(Str_adt (["a"], "foo", (("Foo", None), []))); + (Str_adt ([], "bar", (("Bar", (Some (Type_construct ("foo", [])))), [])))] |}] +;; + +let%expect_test "keyword" = + test_program {|let (x: (int*char) option) = Some 5;; |}; + [%expect + {| + [(Str_value (Nonrecursive, + ({ pat = + (Pat_constraint ((Pat_var "x"), + (Type_construct ("option", + [(Type_tuple + ((Type_construct ("int", [])), + (Type_construct ("char", [])), [])) + ] + )) + )); + expr = + (Exp_construct ("Some", (Some (Exp_constant (Const_integer 5))))) }, + []) + )) + ] |}] +;; + +(*lists*) +let%expect_test "list1" = + test_program + {|let rec length xs = + match xs with + | [] -> 0 + | h::tl -> 1 + length tl;; |}; + [%expect + {| + [(Str_value (Recursive, + ({ pat = (Pat_var "length"); + expr = + (Exp_fun (((Pat_var "xs"), []), + (Exp_match ((Exp_ident "xs"), + ({ first = (Pat_construct ("[]", None)); + second = (Exp_constant (Const_integer 0)) }, + [{ first = + (Pat_construct ("::", + (Some (Pat_tuple ((Pat_var "h"), (Pat_var "tl"), []))))); + second = + (Exp_apply ((Exp_ident "+"), + (Exp_tuple + ((Exp_constant (Const_integer 1)), + (Exp_apply ((Exp_ident "length"), (Exp_ident "tl"))), + [])) + )) + } + ]) + )) + )) + }, + []) + )) + ] |}] +;; + +let%expect_test "list2" = + test_program + {|let length_tail = + let rec helper acc xs = + match xs with + | [] -> acc + | h::tl -> helper (acc + 1) tl + in + helper 0 +;; |}; + [%expect + {| + [(Str_value (Nonrecursive, + ({ pat = (Pat_var "length_tail"); + expr = + (Exp_let (Recursive, + ({ pat = (Pat_var "helper"); + expr = + (Exp_fun (((Pat_var "acc"), [(Pat_var "xs")]), + (Exp_match ((Exp_ident "xs"), + ({ first = (Pat_construct ("[]", None)); + second = (Exp_ident "acc") }, + [{ first = + (Pat_construct ("::", + (Some (Pat_tuple + ((Pat_var "h"), (Pat_var "tl"), []))) + )); + second = + (Exp_apply ( + (Exp_apply ((Exp_ident "helper"), + (Exp_apply ((Exp_ident "+"), + (Exp_tuple + ((Exp_ident "acc"), + (Exp_constant (Const_integer 1)), + [])) + )) + )), + (Exp_ident "tl"))) + } + ]) + )) + )) + }, + []), + (Exp_apply ((Exp_ident "helper"), (Exp_constant (Const_integer 0)) + )) + )) + }, + []) + )) + ] |}] +;; + +let%expect_test "list3" = + test_program + {|let rec map f xs = + match xs with + | [] -> [] + | a::[] -> [f a] + | a::b::[] -> [f a; f b] + | a::b::c::[] -> [f a; f b; f c] + | a::b::c::d::tl -> f a :: f b :: f c :: f d :: map f tl +|}; + [%expect + {| + [(Str_value (Recursive, + ({ pat = (Pat_var "map"); + expr = + (Exp_fun (((Pat_var "f"), [(Pat_var "xs")]), + (Exp_match ((Exp_ident "xs"), + ({ first = (Pat_construct ("[]", None)); + second = (Exp_construct ("[]", None)) }, + [{ first = + (Pat_construct ("::", + (Some (Pat_tuple + ((Pat_var "a"), (Pat_construct ("[]", None)), []))) + )); + second = + (Exp_construct ("::", + (Some (Exp_tuple + ((Exp_apply ((Exp_ident "f"), (Exp_ident "a"))), + (Exp_construct ("[]", None)), []))) + )) + }; + { first = + (Pat_construct ("::", + (Some (Pat_tuple + ((Pat_var "a"), + (Pat_construct ("::", + (Some (Pat_tuple + ((Pat_var "b"), + (Pat_construct ("[]", None)), + []))) + )), + []))) + )); + second = + (Exp_construct ("::", + (Some (Exp_tuple + ((Exp_apply ((Exp_ident "f"), (Exp_ident "a"))), + (Exp_construct ("::", + (Some (Exp_tuple + ((Exp_apply ((Exp_ident "f"), + (Exp_ident "b"))), + (Exp_construct ("[]", None)), + []))) + )), + []))) + )) + }; + { first = + (Pat_construct ("::", + (Some (Pat_tuple + ((Pat_var "a"), + (Pat_construct ("::", + (Some (Pat_tuple + ((Pat_var "b"), + (Pat_construct ("::", + (Some (Pat_tuple + ((Pat_var "c"), + (Pat_construct ( + "[]", None)), + []))) + )), + []))) + )), + []))) + )); + second = + (Exp_construct ("::", + (Some (Exp_tuple + ((Exp_apply ((Exp_ident "f"), (Exp_ident "a"))), + (Exp_construct ("::", + (Some (Exp_tuple + ((Exp_apply ((Exp_ident "f"), + (Exp_ident "b"))), + (Exp_construct ("::", + (Some (Exp_tuple + ((Exp_apply ( + (Exp_ident "f"), + (Exp_ident "c"))), + (Exp_construct ( + "[]", None)), + []))) + )), + []))) + )), + []))) + )) + }; + { first = + (Pat_construct ("::", + (Some (Pat_tuple + ((Pat_var "a"), + (Pat_construct ("::", + (Some (Pat_tuple + ((Pat_var "b"), + (Pat_construct ("::", + (Some (Pat_tuple + ((Pat_var "c"), + (Pat_construct ( + "::", + (Some (Pat_tuple + (( + Pat_var + "d"), + (Pat_var + "tl"), + []))) + )), + []))) + )), + []))) + )), + []))) + )); + second = + (Exp_construct ("::", + (Some (Exp_tuple + ((Exp_apply ((Exp_ident "f"), (Exp_ident "a"))), + (Exp_construct ("::", + (Some (Exp_tuple + ((Exp_apply ((Exp_ident "f"), + (Exp_ident "b"))), + (Exp_construct ("::", + (Some (Exp_tuple + ((Exp_apply ( + (Exp_ident "f"), + (Exp_ident "c"))), + (Exp_construct ( + "::", + (Some (Exp_tuple + (( + Exp_apply ( + (Exp_ident + "f"), + (Exp_ident + "d"))), + (Exp_apply ( + (Exp_apply ( + (Exp_ident + "map"), + (Exp_ident + "f"))), + (Exp_ident + "tl"))), + []))) + )), + []))) + )), + []))) + )), + []))) + )) + } + ]) + )) + )) + }, + []) + )) + ] |}] +;; + +let%expect_test "list4" = + test_program + {|let rec append xs ys = match xs with [] -> ys | x::xs -> x::(append xs ys);; +|}; + [%expect + {| + [(Str_value (Recursive, + ({ pat = (Pat_var "append"); + expr = + (Exp_fun (((Pat_var "xs"), [(Pat_var "ys")]), + (Exp_match ((Exp_ident "xs"), + ({ first = (Pat_construct ("[]", None)); + second = (Exp_ident "ys") }, + [{ first = + (Pat_construct ("::", + (Some (Pat_tuple ((Pat_var "x"), (Pat_var "xs"), []))))); + second = + (Exp_construct ("::", + (Some (Exp_tuple + ((Exp_ident "x"), + (Exp_apply ( + (Exp_apply ((Exp_ident "append"), + (Exp_ident "xs"))), + (Exp_ident "ys"))), + []))) + )) + } + ]) + )) + )) + }, + []) + )) + ] |}] +;; + +let%expect_test "list5" = + test_program + {|let concat = + let rec helper xs = + match xs with + | [] -> [] + | h::tl -> append h (helper tl) + in helper +;; +|}; + [%expect + {| + [(Str_value (Nonrecursive, + ({ pat = (Pat_var "concat"); + expr = + (Exp_let (Recursive, + ({ pat = (Pat_var "helper"); + expr = + (Exp_fun (((Pat_var "xs"), []), + (Exp_match ((Exp_ident "xs"), + ({ first = (Pat_construct ("[]", None)); + second = (Exp_construct ("[]", None)) }, + [{ first = + (Pat_construct ("::", + (Some (Pat_tuple + ((Pat_var "h"), (Pat_var "tl"), []))) + )); + second = + (Exp_apply ( + (Exp_apply ((Exp_ident "append"), (Exp_ident "h"))), + (Exp_apply ((Exp_ident "helper"), (Exp_ident "tl") + )) + )) + } + ]) + )) + )) + }, + []), + (Exp_ident "helper"))) + }, + []) + )) + ] |}] +;; + +let%expect_test "list6" = + test_program + {|(1 :: 2) :: [] +;; +|}; + [%expect + {| + [(Str_eval + (Exp_construct ("::", + (Some (Exp_tuple + ((Exp_construct ("::", + (Some (Exp_tuple + ((Exp_constant (Const_integer 1)), + (Exp_constant (Const_integer 2)), []))) + )), + (Exp_construct ("[]", None)), []))) + ))) + ] |}] +;; + +let%expect_test "list7" = + test_program + {|let rec iter f xs = match xs with [] -> () | h::tl -> let () = f h in iter f tl;; +|}; + [%expect + {| + [(Str_value (Recursive, + ({ pat = (Pat_var "iter"); + expr = + (Exp_fun (((Pat_var "f"), [(Pat_var "xs")]), + (Exp_match ((Exp_ident "xs"), + ({ first = (Pat_construct ("[]", None)); + second = (Exp_construct ("()", None)) }, + [{ first = + (Pat_construct ("::", + (Some (Pat_tuple ((Pat_var "h"), (Pat_var "tl"), []))))); + second = + (Exp_let (Nonrecursive, + ({ pat = (Pat_construct ("()", None)); + expr = (Exp_apply ((Exp_ident "f"), (Exp_ident "h"))) + }, + []), + (Exp_apply ( + (Exp_apply ((Exp_ident "iter"), (Exp_ident "f"))), + (Exp_ident "tl"))) + )) + } + ]) + )) + )) + }, + []) + )) + ] |}] +;; + +let%expect_test "list8" = + test_program + {|let rec cartesian xs ys = + match xs with + | [] -> [] + | h::tl -> append (map (fun a -> (h,a)) ys) (cartesian tl ys);; +|}; + [%expect + {| + [(Str_value (Recursive, + ({ pat = (Pat_var "cartesian"); + expr = + (Exp_fun (((Pat_var "xs"), [(Pat_var "ys")]), + (Exp_match ((Exp_ident "xs"), + ({ first = (Pat_construct ("[]", None)); + second = (Exp_construct ("[]", None)) }, + [{ first = + (Pat_construct ("::", + (Some (Pat_tuple ((Pat_var "h"), (Pat_var "tl"), []))))); + second = + (Exp_apply ( + (Exp_apply ((Exp_ident "append"), + (Exp_apply ( + (Exp_apply ((Exp_ident "map"), + (Exp_fun (((Pat_var "a"), []), + (Exp_tuple + ((Exp_ident "h"), (Exp_ident "a"), [])) + )) + )), + (Exp_ident "ys"))) + )), + (Exp_apply ( + (Exp_apply ((Exp_ident "cartesian"), (Exp_ident "tl") + )), + (Exp_ident "ys"))) + )) + } + ]) + )) + )) + }, + []) + )) + ] |}] +;; + +let%expect_test "list9" = + test_program + {|let main = + let () = iter print_int [1;2;3] in + let () = print_int (length (cartesian [1;2] [1;2;3;4])) in + 0 +;; +|}; + [%expect + {| + [(Str_value (Nonrecursive, + ({ pat = (Pat_var "main"); + expr = + (Exp_let (Nonrecursive, + ({ pat = (Pat_construct ("()", None)); + expr = + (Exp_apply ( + (Exp_apply ((Exp_ident "iter"), (Exp_ident "print_int"))), + (Exp_construct ("::", + (Some (Exp_tuple + ((Exp_constant (Const_integer 1)), + (Exp_construct ("::", + (Some (Exp_tuple + ((Exp_constant (Const_integer 2)), + (Exp_construct ("::", + (Some (Exp_tuple + ((Exp_constant + (Const_integer 3)), + (Exp_construct ("[]", + None)), + []))) + )), + []))) + )), + []))) + )) + )) + }, + []), + (Exp_let (Nonrecursive, + ({ pat = (Pat_construct ("()", None)); + expr = + (Exp_apply ((Exp_ident "print_int"), + (Exp_apply ((Exp_ident "length"), + (Exp_apply ( + (Exp_apply ((Exp_ident "cartesian"), + (Exp_construct ("::", + (Some (Exp_tuple + ((Exp_constant (Const_integer 1)), + (Exp_construct ("::", + (Some (Exp_tuple + ((Exp_constant + (Const_integer 2)), + (Exp_construct ("[]", + None)), + []))) + )), + []))) + )) + )), + (Exp_construct ("::", + (Some (Exp_tuple + ((Exp_constant (Const_integer 1)), + (Exp_construct ("::", + (Some (Exp_tuple + ((Exp_constant + (Const_integer 2)), + (Exp_construct ("::", + (Some (Exp_tuple + ((Exp_constant + (Const_integer + 3)), + (Exp_construct ( + "::", + (Some ( + Exp_tuple + (( + Exp_constant + (Const_integer + 4)), + (Exp_construct ( + "[]", + None)), + []))) + )), + []))) + )), + []))) + )), + []))) + )) + )) + )) + )) + }, + []), + (Exp_constant (Const_integer 0)))) + )) + }, + []) + )) + ] |}] +;; + +let%expect_test "list9" = + test_program + {|type 'a list = + Cons of 'a * 'a list + | Nil;;|}; + [%expect + {| + [(Str_adt (["a"], "list", + (("Cons", + (Some (Type_tuple + ((Type_var "a"), (Type_construct ("list", [(Type_var "a")])), + [])))), + [("Nil", None)]) + )) + ] |}] +;; + +let%expect_test "list9" = + test_program + {| +let _1 = fun x y (a, _) -> (x + y - a) = 1 + +let _2 = + let x, Some f = 1, Some ( "p1onerka was here" ) + in x + +let _3 = Some (1, "hi") + +let _4 = let rec f x = f 5 in f + +let _5 = + let id x = x in + match Some id with + | Some f -> let _ = f "42" in f 42 + | None -> 0 + +let _6 = fun arg -> match arg with Some x -> let y = x in y;; + +let int_of_option = function +Some x -> x +| None -> 0 + +let _42 = function 42 -> true | _ -> false + +let id1, id2 = let id x = x in (id, id) + + + |}; + [%expect + {| + [(Str_value (Nonrecursive, + ({ pat = (Pat_var "_1"); + expr = + (Exp_fun ( + ((Pat_var "x"), + [(Pat_var "y"); (Pat_tuple ((Pat_var "a"), Pat_any, []))]), + (Exp_apply ((Exp_ident "="), + (Exp_tuple + ((Exp_apply ((Exp_ident "-"), + (Exp_tuple + ((Exp_apply ((Exp_ident "+"), + (Exp_tuple ((Exp_ident "x"), (Exp_ident "y"), [])) + )), + (Exp_ident "a"), [])) + )), + (Exp_constant (Const_integer 1)), [])) + )) + )) + }, + []) + )); + (Str_value (Nonrecursive, + ({ pat = (Pat_var "_2"); + expr = + (Exp_let (Nonrecursive, + ({ pat = + (Pat_tuple + ((Pat_var "x"), + (Pat_construct ("Some", (Some (Pat_var "f")))), [])); + expr = + (Exp_tuple + ((Exp_constant (Const_integer 1)), + (Exp_construct ("Some", + (Some (Exp_constant (Const_string "p1onerka was here"))) + )), + [])) + }, + []), + (Exp_ident "x"))) + }, + []) + )); + (Str_value (Nonrecursive, + ({ pat = (Pat_var "_3"); + expr = + (Exp_construct ("Some", + (Some (Exp_tuple + ((Exp_constant (Const_integer 1)), + (Exp_constant (Const_string "hi")), []))) + )) + }, + []) + )); + (Str_value (Nonrecursive, + ({ pat = (Pat_var "_4"); + expr = + (Exp_let (Recursive, + ({ pat = (Pat_var "f"); + expr = + (Exp_fun (((Pat_var "x"), []), + (Exp_apply ((Exp_ident "f"), + (Exp_constant (Const_integer 5)))) + )) + }, + []), + (Exp_ident "f"))) + }, + []) + )); + (Str_value (Nonrecursive, + ({ pat = (Pat_var "_5"); + expr = + (Exp_let (Nonrecursive, + ({ pat = (Pat_var "id"); + expr = (Exp_fun (((Pat_var "x"), []), (Exp_ident "x"))) }, + []), + (Exp_match ((Exp_construct ("Some", (Some (Exp_ident "id")))), + ({ first = (Pat_construct ("Some", (Some (Pat_var "f")))); + second = + (Exp_let (Nonrecursive, + ({ pat = Pat_any; + expr = + (Exp_apply ((Exp_ident "f"), + (Exp_constant (Const_string "42")))) + }, + []), + (Exp_apply ((Exp_ident "f"), + (Exp_constant (Const_integer 42)))) + )) + }, + [{ first = (Pat_construct ("None", None)); + second = (Exp_constant (Const_integer 0)) } + ]) + )) + )) + }, + []) + )); + (Str_value (Nonrecursive, + ({ pat = (Pat_var "_6"); + expr = + (Exp_fun (((Pat_var "arg"), []), + (Exp_match ((Exp_ident "arg"), + ({ first = (Pat_construct ("Some", (Some (Pat_var "x")))); + second = + (Exp_let (Nonrecursive, + ({ pat = (Pat_var "y"); expr = (Exp_ident "x") }, []), + (Exp_ident "y"))) + }, + []) + )) + )) + }, + []) + )); + (Str_value (Nonrecursive, + ({ pat = (Pat_var "int_of_option"); + expr = + (Exp_function + ({ first = (Pat_construct ("Some", (Some (Pat_var "x")))); + second = (Exp_ident "x") }, + [{ first = (Pat_construct ("None", None)); + second = (Exp_constant (Const_integer 0)) } + ])) + }, + []) + )); + (Str_value (Nonrecursive, + ({ pat = (Pat_var "_42"); + expr = + (Exp_function + ({ first = (Pat_constant (Const_integer 42)); + second = (Exp_construct ("true", None)) }, + [{ first = Pat_any; second = (Exp_construct ("false", None)) }])) + }, + []) + )); + (Str_value (Nonrecursive, + ({ pat = (Pat_tuple ((Pat_var "id1"), (Pat_var "id2"), [])); + expr = + (Exp_let (Nonrecursive, + ({ pat = (Pat_var "id"); + expr = (Exp_fun (((Pat_var "x"), []), (Exp_ident "x"))) }, + []), + (Exp_tuple ((Exp_ident "id"), (Exp_ident "id"), [])))) + }, + []) + )) + ] |}] +;; + +let%expect_test "list9" = + test_program + {| +let rec fix f x = f (fix f) x +let map f p = let (a,b) = p in (f a, f b) +let fixpoly l = + fix (fun self l -> map (fun li x -> li (self l) x) l) l +let feven p n = + let (e, o) = p in + if n = 0 then 1 else o (n - 1) +let fodd p n = + let (e, o) = p in + if n = 0 then 0 else e (n - 1) +let tie = fixpoly (feven, fodd) + +let rec meven n = if n = 0 then 1 else modd (n - 1) +and modd n = if n = 0 then 1 else meven (n - 1) +let main = + let () = print_int (modd 1) in + let () = print_int (meven 2) in + let (even,odd) = tie in + let () = print_int (odd 3) in + let () = print_int (even 4) in + 0 + + + + |}; + [%expect + {| + [(Str_value (Recursive, + ({ pat = (Pat_var "fix"); + expr = + (Exp_fun (((Pat_var "f"), [(Pat_var "x")]), + (Exp_apply ( + (Exp_apply ((Exp_ident "f"), + (Exp_apply ((Exp_ident "fix"), (Exp_ident "f"))))), + (Exp_ident "x"))) + )) + }, + []) + )); + (Str_value (Nonrecursive, + ({ pat = (Pat_var "map"); + expr = + (Exp_fun (((Pat_var "f"), [(Pat_var "p")]), + (Exp_let (Nonrecursive, + ({ pat = (Pat_tuple ((Pat_var "a"), (Pat_var "b"), [])); + expr = (Exp_ident "p") }, + []), + (Exp_tuple + ((Exp_apply ((Exp_ident "f"), (Exp_ident "a"))), + (Exp_apply ((Exp_ident "f"), (Exp_ident "b"))), [])) + )) + )) + }, + []) + )); + (Str_value (Nonrecursive, + ({ pat = (Pat_var "fixpoly"); + expr = + (Exp_fun (((Pat_var "l"), []), + (Exp_apply ( + (Exp_apply ((Exp_ident "fix"), + (Exp_fun (((Pat_var "self"), [(Pat_var "l")]), + (Exp_apply ( + (Exp_apply ((Exp_ident "map"), + (Exp_fun (((Pat_var "li"), [(Pat_var "x")]), + (Exp_apply ( + (Exp_apply ((Exp_ident "li"), + (Exp_apply ((Exp_ident "self"), + (Exp_ident "l"))) + )), + (Exp_ident "x"))) + )) + )), + (Exp_ident "l"))) + )) + )), + (Exp_ident "l"))) + )) + }, + []) + )); + (Str_value (Nonrecursive, + ({ pat = (Pat_var "feven"); + expr = + (Exp_fun (((Pat_var "p"), [(Pat_var "n")]), + (Exp_let (Nonrecursive, + ({ pat = (Pat_tuple ((Pat_var "e"), (Pat_var "o"), [])); + expr = (Exp_ident "p") }, + []), + (Exp_if ( + (Exp_apply ((Exp_ident "="), + (Exp_tuple + ((Exp_ident "n"), (Exp_constant (Const_integer 0)), [])) + )), + (Exp_constant (Const_integer 1)), + (Some (Exp_apply ((Exp_ident "o"), + (Exp_apply ((Exp_ident "-"), + (Exp_tuple + ((Exp_ident "n"), + (Exp_constant (Const_integer 1)), [])) + )) + ))) + )) + )) + )) + }, + []) + )); + (Str_value (Nonrecursive, + ({ pat = (Pat_var "fodd"); + expr = + (Exp_fun (((Pat_var "p"), [(Pat_var "n")]), + (Exp_let (Nonrecursive, + ({ pat = (Pat_tuple ((Pat_var "e"), (Pat_var "o"), [])); + expr = (Exp_ident "p") }, + []), + (Exp_if ( + (Exp_apply ((Exp_ident "="), + (Exp_tuple + ((Exp_ident "n"), (Exp_constant (Const_integer 0)), [])) + )), + (Exp_constant (Const_integer 0)), + (Some (Exp_apply ((Exp_ident "e"), + (Exp_apply ((Exp_ident "-"), + (Exp_tuple + ((Exp_ident "n"), + (Exp_constant (Const_integer 1)), [])) + )) + ))) + )) + )) + )) + }, + []) + )); + (Str_value (Nonrecursive, + ({ pat = (Pat_var "tie"); + expr = + (Exp_apply ((Exp_ident "fixpoly"), + (Exp_tuple ((Exp_ident "feven"), (Exp_ident "fodd"), [])))) + }, + []) + )); + (Str_value (Recursive, + ({ pat = (Pat_var "meven"); + expr = + (Exp_fun (((Pat_var "n"), []), + (Exp_if ( + (Exp_apply ((Exp_ident "="), + (Exp_tuple + ((Exp_ident "n"), (Exp_constant (Const_integer 0)), [])) + )), + (Exp_constant (Const_integer 1)), + (Some (Exp_apply ((Exp_ident "modd"), + (Exp_apply ((Exp_ident "-"), + (Exp_tuple + ((Exp_ident "n"), + (Exp_constant (Const_integer 1)), [])) + )) + ))) + )) + )) + }, + [{ pat = (Pat_var "modd"); + expr = + (Exp_fun (((Pat_var "n"), []), + (Exp_if ( + (Exp_apply ((Exp_ident "="), + (Exp_tuple + ((Exp_ident "n"), (Exp_constant (Const_integer 0)), [])) + )), + (Exp_constant (Const_integer 1)), + (Some (Exp_apply ((Exp_ident "meven"), + (Exp_apply ((Exp_ident "-"), + (Exp_tuple + ((Exp_ident "n"), + (Exp_constant (Const_integer 1)), [])) + )) + ))) + )) + )) + } + ]) + )); + (Str_value (Nonrecursive, + ({ pat = (Pat_var "main"); + expr = + (Exp_let (Nonrecursive, + ({ pat = (Pat_construct ("()", None)); + expr = + (Exp_apply ((Exp_ident "print_int"), + (Exp_apply ((Exp_ident "modd"), + (Exp_constant (Const_integer 1)))) + )) + }, + []), + (Exp_let (Nonrecursive, + ({ pat = (Pat_construct ("()", None)); + expr = + (Exp_apply ((Exp_ident "print_int"), + (Exp_apply ((Exp_ident "meven"), + (Exp_constant (Const_integer 2)))) + )) + }, + []), + (Exp_let (Nonrecursive, + ({ pat = (Pat_tuple ((Pat_var "even"), (Pat_var "odd"), [])); + expr = (Exp_ident "tie") }, + []), + (Exp_let (Nonrecursive, + ({ pat = (Pat_construct ("()", None)); + expr = + (Exp_apply ((Exp_ident "print_int"), + (Exp_apply ((Exp_ident "odd"), + (Exp_constant (Const_integer 3)))) + )) + }, + []), + (Exp_let (Nonrecursive, + ({ pat = (Pat_construct ("()", None)); + expr = + (Exp_apply ((Exp_ident "print_int"), + (Exp_apply ((Exp_ident "even"), + (Exp_constant (Const_integer 4)))) + )) + }, + []), + (Exp_constant (Const_integer 0)))) + )) + )) + )) + )) + }, + []) + )) + ] |}] +;; + +let%expect_test "list9" = + test_program + {| +let rec length xs = + match xs with + | [] -> 0 + | h::tl -> 1 + length tl + +let length_tail = + let rec helper acc xs = + match xs with + | [] -> acc + | h::tl -> helper (acc + 1) tl + in + helper 0 + +let rec map f xs = + match xs with + | [] -> [] + | a::[] -> [f a] + | a::b::[] -> [f a; f b] + | a::b::c::[] -> [f a; f b; f c] + | a::b::c::d::tl -> f a :: f b :: f c :: f d :: map f tl + +let rec append xs ys = match xs with [] -> ys | x::xs -> x::(append xs ys) + +let concat = + let rec helper xs = + match xs with + | [] -> [] + | h::tl -> append h (helper tl) + in helper + +let rec iter f xs = match xs with [] -> () | h::tl -> let () = f h in iter f tl + +let rec cartesian xs ys = + match xs with + | [] -> [] + | h::tl -> append (map (fun a -> (h,a)) ys) (cartesian tl ys) + +let main = + let () = iter print_int [1;2;3] in + let () = print_int (length (cartesian [1;2] [1;2;3;4])) in + 0 + + + |}; + [%expect + {| + [(Str_value (Recursive, + ({ pat = (Pat_var "length"); + expr = + (Exp_fun (((Pat_var "xs"), []), + (Exp_match ((Exp_ident "xs"), + ({ first = (Pat_construct ("[]", None)); + second = (Exp_constant (Const_integer 0)) }, + [{ first = + (Pat_construct ("::", + (Some (Pat_tuple ((Pat_var "h"), (Pat_var "tl"), []))))); + second = + (Exp_apply ((Exp_ident "+"), + (Exp_tuple + ((Exp_constant (Const_integer 1)), + (Exp_apply ((Exp_ident "length"), (Exp_ident "tl"))), + [])) + )) + } + ]) + )) + )) + }, + []) + )); + (Str_value (Nonrecursive, + ({ pat = (Pat_var "length_tail"); + expr = + (Exp_let (Recursive, + ({ pat = (Pat_var "helper"); + expr = + (Exp_fun (((Pat_var "acc"), [(Pat_var "xs")]), + (Exp_match ((Exp_ident "xs"), + ({ first = (Pat_construct ("[]", None)); + second = (Exp_ident "acc") }, + [{ first = + (Pat_construct ("::", + (Some (Pat_tuple + ((Pat_var "h"), (Pat_var "tl"), []))) + )); + second = + (Exp_apply ( + (Exp_apply ((Exp_ident "helper"), + (Exp_apply ((Exp_ident "+"), + (Exp_tuple + ((Exp_ident "acc"), + (Exp_constant (Const_integer 1)), + [])) + )) + )), + (Exp_ident "tl"))) + } + ]) + )) + )) + }, + []), + (Exp_apply ((Exp_ident "helper"), (Exp_constant (Const_integer 0)) + )) + )) + }, + []) + )); + (Str_value (Recursive, + ({ pat = (Pat_var "map"); + expr = + (Exp_fun (((Pat_var "f"), [(Pat_var "xs")]), + (Exp_match ((Exp_ident "xs"), + ({ first = (Pat_construct ("[]", None)); + second = (Exp_construct ("[]", None)) }, + [{ first = + (Pat_construct ("::", + (Some (Pat_tuple + ((Pat_var "a"), (Pat_construct ("[]", None)), + []))) + )); + second = + (Exp_construct ("::", + (Some (Exp_tuple + ((Exp_apply ((Exp_ident "f"), (Exp_ident "a"))), + (Exp_construct ("[]", None)), []))) + )) + }; + { first = + (Pat_construct ("::", + (Some (Pat_tuple + ((Pat_var "a"), + (Pat_construct ("::", + (Some (Pat_tuple + ((Pat_var "b"), + (Pat_construct ("[]", None)), + []))) + )), + []))) + )); + second = + (Exp_construct ("::", + (Some (Exp_tuple + ((Exp_apply ((Exp_ident "f"), (Exp_ident "a") + )), + (Exp_construct ("::", + (Some (Exp_tuple + ((Exp_apply ((Exp_ident "f"), + (Exp_ident "b"))), + (Exp_construct ("[]", None)), + []))) + )), + []))) + )) + }; + { first = + (Pat_construct ("::", + (Some (Pat_tuple + ((Pat_var "a"), + (Pat_construct ("::", + (Some (Pat_tuple + ((Pat_var "b"), + (Pat_construct ("::", + (Some (Pat_tuple + ((Pat_var "c"), + (Pat_construct ( + "[]", None)), + []))) + )), + []))) + )), + []))) + )); + second = + (Exp_construct ("::", + (Some (Exp_tuple + ((Exp_apply ((Exp_ident "f"), (Exp_ident "a") + )), + (Exp_construct ("::", + (Some (Exp_tuple + ((Exp_apply ((Exp_ident "f"), + (Exp_ident "b"))), + (Exp_construct ("::", + (Some (Exp_tuple + ((Exp_apply ( + (Exp_ident "f"), + (Exp_ident "c") + )), + (Exp_construct ( + "[]", None)), + []))) + )), + []))) + )), + []))) + )) + }; + { first = + (Pat_construct ("::", + (Some (Pat_tuple + ((Pat_var "a"), + (Pat_construct ("::", + (Some (Pat_tuple + ((Pat_var "b"), + (Pat_construct ("::", + (Some (Pat_tuple + ((Pat_var "c"), + (Pat_construct ( + "::", + (Some (Pat_tuple + (( + Pat_var + "d"), + (Pat_var + "tl"), + []))) + )), + []))) + )), + []))) + )), + []))) + )); + second = + (Exp_construct ("::", + (Some (Exp_tuple + ((Exp_apply ((Exp_ident "f"), (Exp_ident "a") + )), + (Exp_construct ("::", + (Some (Exp_tuple + ((Exp_apply ((Exp_ident "f"), + (Exp_ident "b"))), + (Exp_construct ("::", + (Some (Exp_tuple + ((Exp_apply ( + (Exp_ident "f"), + (Exp_ident "c") + )), + (Exp_construct ( + "::", + (Some (Exp_tuple + (( + Exp_apply ( + (Exp_ident + "f"), + (Exp_ident + "d"))), + (Exp_apply ( + (Exp_apply ( + (Exp_ident + "map"), + (Exp_ident + "f"))), + (Exp_ident + "tl"))), + []))) + )), + []))) + )), + []))) + )), + []))) + )) + } + ]) + )) + )) + }, + []) + )); + (Str_value (Recursive, + ({ pat = (Pat_var "append"); + expr = + (Exp_fun (((Pat_var "xs"), [(Pat_var "ys")]), + (Exp_match ((Exp_ident "xs"), + ({ first = (Pat_construct ("[]", None)); + second = (Exp_ident "ys") }, + [{ first = + (Pat_construct ("::", + (Some (Pat_tuple ((Pat_var "x"), (Pat_var "xs"), []))))); + second = + (Exp_construct ("::", + (Some (Exp_tuple + ((Exp_ident "x"), + (Exp_apply ( + (Exp_apply ((Exp_ident "append"), + (Exp_ident "xs"))), + (Exp_ident "ys"))), + []))) + )) + } + ]) + )) + )) + }, + []) + )); + (Str_value (Nonrecursive, + ({ pat = (Pat_var "concat"); + expr = + (Exp_let (Recursive, + ({ pat = (Pat_var "helper"); + expr = + (Exp_fun (((Pat_var "xs"), []), + (Exp_match ((Exp_ident "xs"), + ({ first = (Pat_construct ("[]", None)); + second = (Exp_construct ("[]", None)) }, + [{ first = + (Pat_construct ("::", + (Some (Pat_tuple + ((Pat_var "h"), (Pat_var "tl"), []))) + )); + second = + (Exp_apply ( + (Exp_apply ((Exp_ident "append"), (Exp_ident "h") + )), + (Exp_apply ((Exp_ident "helper"), (Exp_ident "tl") + )) + )) + } + ]) + )) + )) + }, + []), + (Exp_ident "helper"))) + }, + []) + )); + (Str_value (Recursive, + ({ pat = (Pat_var "iter"); + expr = + (Exp_fun (((Pat_var "f"), [(Pat_var "xs")]), + (Exp_match ((Exp_ident "xs"), + ({ first = (Pat_construct ("[]", None)); + second = (Exp_construct ("()", None)) }, + [{ first = + (Pat_construct ("::", + (Some (Pat_tuple ((Pat_var "h"), (Pat_var "tl"), []))))); + second = + (Exp_let (Nonrecursive, + ({ pat = (Pat_construct ("()", None)); + expr = (Exp_apply ((Exp_ident "f"), (Exp_ident "h"))) + }, + []), + (Exp_apply ( + (Exp_apply ((Exp_ident "iter"), (Exp_ident "f"))), + (Exp_ident "tl"))) + )) + } + ]) + )) + )) + }, + []) + )); + (Str_value (Recursive, + ({ pat = (Pat_var "cartesian"); + expr = + (Exp_fun (((Pat_var "xs"), [(Pat_var "ys")]), + (Exp_match ((Exp_ident "xs"), + ({ first = (Pat_construct ("[]", None)); + second = (Exp_construct ("[]", None)) }, + [{ first = + (Pat_construct ("::", + (Some (Pat_tuple ((Pat_var "h"), (Pat_var "tl"), []))))); + second = + (Exp_apply ( + (Exp_apply ((Exp_ident "append"), + (Exp_apply ( + (Exp_apply ((Exp_ident "map"), + (Exp_fun (((Pat_var "a"), []), + (Exp_tuple + ((Exp_ident "h"), (Exp_ident "a"), [])) + )) + )), + (Exp_ident "ys"))) + )), + (Exp_apply ( + (Exp_apply ((Exp_ident "cartesian"), (Exp_ident "tl") + )), + (Exp_ident "ys"))) + )) + } + ]) + )) + )) + }, + []) + )); + (Str_value (Nonrecursive, + ({ pat = (Pat_var "main"); + expr = + (Exp_let (Nonrecursive, + ({ pat = (Pat_construct ("()", None)); + expr = + (Exp_apply ( + (Exp_apply ((Exp_ident "iter"), (Exp_ident "print_int"))), + (Exp_construct ("::", + (Some (Exp_tuple + ((Exp_constant (Const_integer 1)), + (Exp_construct ("::", + (Some (Exp_tuple + ((Exp_constant (Const_integer 2)), + (Exp_construct ("::", + (Some (Exp_tuple + ((Exp_constant + (Const_integer 3)), + (Exp_construct ("[]", + None)), + []))) + )), + []))) + )), + []))) + )) + )) + }, + []), + (Exp_let (Nonrecursive, + ({ pat = (Pat_construct ("()", None)); + expr = + (Exp_apply ((Exp_ident "print_int"), + (Exp_apply ((Exp_ident "length"), + (Exp_apply ( + (Exp_apply ((Exp_ident "cartesian"), + (Exp_construct ("::", + (Some (Exp_tuple + ((Exp_constant (Const_integer 1)), + (Exp_construct ("::", + (Some (Exp_tuple + ((Exp_constant + (Const_integer 2)), + (Exp_construct ("[]", + None)), + []))) + )), + []))) + )) + )), + (Exp_construct ("::", + (Some (Exp_tuple + ((Exp_constant (Const_integer 1)), + (Exp_construct ("::", + (Some (Exp_tuple + ((Exp_constant + (Const_integer 2)), + (Exp_construct ("::", + (Some (Exp_tuple + ((Exp_constant + ( + Const_integer + 3)), + (Exp_construct ( + "::", + (Some ( + Exp_tuple + (( + Exp_constant + (Const_integer + 4)), + (Exp_construct ( + "[]", + None)), + []))))), + []))) + )), + []))) + )), + []))) + )) + )) + )) + )) + }, + []), + (Exp_constant (Const_integer 0)))) + )) + }, + []) + )) + ] |}] +;; + +let%expect_test "list6" = + test_program "(1 :: 2) :: []"; + [%expect + {| + [(Str_eval + (Exp_construct ("::", + (Some (Exp_tuple + ((Exp_construct ("::", + (Some (Exp_tuple + ((Exp_constant (Const_integer 1)), + (Exp_constant (Const_integer 2)), []))) + )), + (Exp_construct ("[]", None)), []))) + ))) + ] |}] +;; + +let%expect_test "list5" = + test_program + "let concat = let rec helper xs = match xs with | [] -> [] | h::tl -> append h \ + (helper tl) in helper"; + [%expect + {| + [(Str_value (Nonrecursive, + ({ pat = (Pat_var "concat"); + expr = + (Exp_let (Recursive, + ({ pat = (Pat_var "helper"); + expr = + (Exp_fun (((Pat_var "xs"), []), + (Exp_match ((Exp_ident "xs"), + ({ first = (Pat_construct ("[]", None)); + second = (Exp_construct ("[]", None)) }, + [{ first = + (Pat_construct ("::", + (Some (Pat_tuple + ((Pat_var "h"), (Pat_var "tl"), []))) + )); + second = + (Exp_apply ( + (Exp_apply ((Exp_ident "append"), (Exp_ident "h"))), + (Exp_apply ((Exp_ident "helper"), (Exp_ident "tl") + )) + )) + } + ]) + )) + )) + }, + []), + (Exp_ident "helper"))) + }, + []) + )) + ] |}] +;; + +let%expect_test "list_basic" = + test_program "let lst = 1 :: 2 :: 3 :: [] in lst"; + [%expect + {| + [(Str_eval + (Exp_let (Nonrecursive, + ({ pat = (Pat_var "lst"); + expr = + (Exp_construct ("::", + (Some (Exp_tuple + ((Exp_constant (Const_integer 1)), + (Exp_construct ("::", + (Some (Exp_tuple + ((Exp_constant (Const_integer 2)), + (Exp_construct ("::", + (Some (Exp_tuple + ((Exp_constant + (Const_integer 3)), + (Exp_construct ("[]", None)), + []))) + )), + []))) + )), + []))) + )) + }, + []), + (Exp_ident "lst")))) + ] |}] +;; + +let%expect_test "list_match" = + test_program "match 1 :: 2 :: 3 :: [] with | [] -> 0 | h :: _ -> h"; + [%expect + {| + [(Str_eval + (Exp_match ( + (Exp_construct ("::", + (Some (Exp_tuple + ((Exp_constant (Const_integer 1)), + (Exp_construct ("::", + (Some (Exp_tuple + ((Exp_constant (Const_integer 2)), + (Exp_construct ("::", + (Some (Exp_tuple + ((Exp_constant (Const_integer 3)), + (Exp_construct ("[]", None)), + []))) + )), + []))) + )), + []))) + )), + ({ first = (Pat_construct ("[]", None)); + second = (Exp_constant (Const_integer 0)) }, + [{ first = + (Pat_construct ("::", + (Some (Pat_tuple ((Pat_var "h"), Pat_any, []))))); + second = (Exp_ident "h") } + ]) + ))) + ] |}] +;; + +let%expect_test "list_append" = + test_program + "let append xs ys = match xs with | [] -> ys | h :: t -> h :: append t ys in append \ + [1; 2] [3; 4]"; + [%expect + {| + [(Str_eval + (Exp_let (Nonrecursive, + ({ pat = (Pat_var "append"); + expr = + (Exp_fun (((Pat_var "xs"), [(Pat_var "ys")]), + (Exp_match ((Exp_ident "xs"), + ({ first = (Pat_construct ("[]", None)); + second = (Exp_ident "ys") }, + [{ first = + (Pat_construct ("::", + (Some (Pat_tuple ((Pat_var "h"), (Pat_var "t"), []))) + )); + second = + (Exp_construct ("::", + (Some (Exp_tuple + ((Exp_ident "h"), + (Exp_apply ( + (Exp_apply ((Exp_ident "append"), + (Exp_ident "t"))), + (Exp_ident "ys"))), + []))) + )) + } + ]) + )) + )) + }, + []), + (Exp_apply ( + (Exp_apply ((Exp_ident "append"), + (Exp_construct ("::", + (Some (Exp_tuple + ((Exp_constant (Const_integer 1)), + (Exp_construct ("::", + (Some (Exp_tuple + ((Exp_constant (Const_integer 2)), + (Exp_construct ("[]", None)), + []))) + )), + []))) + )) + )), + (Exp_construct ("::", + (Some (Exp_tuple + ((Exp_constant (Const_integer 3)), + (Exp_construct ("::", + (Some (Exp_tuple + ((Exp_constant (Const_integer 4)), + (Exp_construct ("[]", None)), []))) + )), + []))) + )) + )) + ))) + ] |}] +;; + +let%expect_test "()" = + test_program + {| + let a = + let b = + let rec f = (let x = 3 in x) + 1 + in f + in ();; + let s = "string";; + |}; + [%expect + {| + [(Str_value (Nonrecursive, + ({ pat = (Pat_var "a"); + expr = + (Exp_let (Nonrecursive, + ({ pat = (Pat_var "b"); + expr = + (Exp_let (Recursive, + ({ pat = (Pat_var "f"); + expr = + (Exp_apply ((Exp_ident "+"), + (Exp_tuple + ((Exp_let (Nonrecursive, + ({ pat = (Pat_var "x"); + expr = (Exp_constant (Const_integer 3)) }, + []), + (Exp_ident "x"))), + (Exp_constant (Const_integer 1)), [])) + )) + }, + []), + (Exp_ident "f"))) + }, + []), + (Exp_construct ("()", None)))) + }, + []) + )); + (Str_value (Nonrecursive, + ({ pat = (Pat_var "s"); expr = (Exp_constant (Const_string "string")) }, + []) + )) + ] |}] +;; + +let%expect_test "()" = + test_program + {| + let rec iter f xs = match xs with [] -> () + |}; + [%expect + {| + [(Str_value (Recursive, + ({ pat = (Pat_var "iter"); + expr = + (Exp_fun (((Pat_var "f"), [(Pat_var "xs")]), + (Exp_match ((Exp_ident "xs"), + ({ first = (Pat_construct ("[]", None)); + second = (Exp_construct ("()", None)) }, + []) + )) + )) + }, + []) + )) + ] |}] +;; + +let%expect_test "()" = + test_program + {| +let rec length xs = + match xs with + | [] -> 0 + | h::tl -> 1 + length tl + +let length_tail = + let rec helper acc xs = + match xs with + | [] -> acc + | h::tl -> helper (acc + 1) tl + in + helper 0 + +let rec map f xs = + match xs with + | [] -> [] + | a::[] -> [f a] + | a::b::[] -> [f a; f b] + | a::b::c::[] -> [f a; f b; f c] + | a::b::c::d::tl -> f a :: f b :: f c :: f d :: map f tl + +let rec append xs ys = match xs with [] -> ys | x::xs -> x::(append xs ys) + +let concat = + let rec helper xs = + match xs with + | [] -> [] + | h::tl -> append h (helper tl) + in helper + +let rec iter f xs = match xs with [] -> () | h::tl -> let () = f h in iter f tl + +let rec cartesian xs ys = + match xs with + | [] -> [] + | h::tl -> append (map (fun a -> (h,a)) ys) (cartesian tl ys) + +let main = + let () = iter print_int [1;2;3] in + let () = print_int (length (cartesian [1;2] [1;2;3;4])) in + 0 + |}; + [%expect + {| + [(Str_value (Recursive, + ({ pat = (Pat_var "length"); + expr = + (Exp_fun (((Pat_var "xs"), []), + (Exp_match ((Exp_ident "xs"), + ({ first = (Pat_construct ("[]", None)); + second = (Exp_constant (Const_integer 0)) }, + [{ first = + (Pat_construct ("::", + (Some (Pat_tuple ((Pat_var "h"), (Pat_var "tl"), []))))); + second = + (Exp_apply ((Exp_ident "+"), + (Exp_tuple + ((Exp_constant (Const_integer 1)), + (Exp_apply ((Exp_ident "length"), (Exp_ident "tl"))), + [])) + )) + } + ]) + )) + )) + }, + []) + )); + (Str_value (Nonrecursive, + ({ pat = (Pat_var "length_tail"); + expr = + (Exp_let (Recursive, + ({ pat = (Pat_var "helper"); + expr = + (Exp_fun (((Pat_var "acc"), [(Pat_var "xs")]), + (Exp_match ((Exp_ident "xs"), + ({ first = (Pat_construct ("[]", None)); + second = (Exp_ident "acc") }, + [{ first = + (Pat_construct ("::", + (Some (Pat_tuple + ((Pat_var "h"), (Pat_var "tl"), []))) + )); + second = + (Exp_apply ( + (Exp_apply ((Exp_ident "helper"), + (Exp_apply ((Exp_ident "+"), + (Exp_tuple + ((Exp_ident "acc"), + (Exp_constant (Const_integer 1)), + [])) + )) + )), + (Exp_ident "tl"))) + } + ]) + )) + )) + }, + []), + (Exp_apply ((Exp_ident "helper"), (Exp_constant (Const_integer 0)) + )) + )) + }, + []) + )); + (Str_value (Recursive, + ({ pat = (Pat_var "map"); + expr = + (Exp_fun (((Pat_var "f"), [(Pat_var "xs")]), + (Exp_match ((Exp_ident "xs"), + ({ first = (Pat_construct ("[]", None)); + second = (Exp_construct ("[]", None)) }, + [{ first = + (Pat_construct ("::", + (Some (Pat_tuple + ((Pat_var "a"), (Pat_construct ("[]", None)), + []))) + )); + second = + (Exp_construct ("::", + (Some (Exp_tuple + ((Exp_apply ((Exp_ident "f"), (Exp_ident "a"))), + (Exp_construct ("[]", None)), []))) + )) + }; + { first = + (Pat_construct ("::", + (Some (Pat_tuple + ((Pat_var "a"), + (Pat_construct ("::", + (Some (Pat_tuple + ((Pat_var "b"), + (Pat_construct ("[]", None)), + []))) + )), + []))) + )); + second = + (Exp_construct ("::", + (Some (Exp_tuple + ((Exp_apply ((Exp_ident "f"), (Exp_ident "a") + )), + (Exp_construct ("::", + (Some (Exp_tuple + ((Exp_apply ((Exp_ident "f"), + (Exp_ident "b"))), + (Exp_construct ("[]", None)), + []))) + )), + []))) + )) + }; + { first = + (Pat_construct ("::", + (Some (Pat_tuple + ((Pat_var "a"), + (Pat_construct ("::", + (Some (Pat_tuple + ((Pat_var "b"), + (Pat_construct ("::", + (Some (Pat_tuple + ((Pat_var "c"), + (Pat_construct ( + "[]", None)), + []))) + )), + []))) + )), + []))) + )); + second = + (Exp_construct ("::", + (Some (Exp_tuple + ((Exp_apply ((Exp_ident "f"), (Exp_ident "a") + )), + (Exp_construct ("::", + (Some (Exp_tuple + ((Exp_apply ((Exp_ident "f"), + (Exp_ident "b"))), + (Exp_construct ("::", + (Some (Exp_tuple + ((Exp_apply ( + (Exp_ident "f"), + (Exp_ident "c") + )), + (Exp_construct ( + "[]", None)), + []))) + )), + []))) + )), + []))) + )) + }; + { first = + (Pat_construct ("::", + (Some (Pat_tuple + ((Pat_var "a"), + (Pat_construct ("::", + (Some (Pat_tuple + ((Pat_var "b"), + (Pat_construct ("::", + (Some (Pat_tuple + ((Pat_var "c"), + (Pat_construct ( + "::", + (Some (Pat_tuple + (( + Pat_var + "d"), + (Pat_var + "tl"), + []))) + )), + []))) + )), + []))) + )), + []))) + )); + second = + (Exp_construct ("::", + (Some (Exp_tuple + ((Exp_apply ((Exp_ident "f"), (Exp_ident "a") + )), + (Exp_construct ("::", + (Some (Exp_tuple + ((Exp_apply ((Exp_ident "f"), + (Exp_ident "b"))), + (Exp_construct ("::", + (Some (Exp_tuple + ((Exp_apply ( + (Exp_ident "f"), + (Exp_ident "c") + )), + (Exp_construct ( + "::", + (Some (Exp_tuple + (( + Exp_apply ( + (Exp_ident + "f"), + (Exp_ident + "d"))), + (Exp_apply ( + (Exp_apply ( + (Exp_ident + "map"), + (Exp_ident + "f"))), + (Exp_ident + "tl"))), + []))) + )), + []))) + )), + []))) + )), + []))) + )) + } + ]) + )) + )) + }, + []) + )); + (Str_value (Recursive, + ({ pat = (Pat_var "append"); + expr = + (Exp_fun (((Pat_var "xs"), [(Pat_var "ys")]), + (Exp_match ((Exp_ident "xs"), + ({ first = (Pat_construct ("[]", None)); + second = (Exp_ident "ys") }, + [{ first = + (Pat_construct ("::", + (Some (Pat_tuple ((Pat_var "x"), (Pat_var "xs"), []))))); + second = + (Exp_construct ("::", + (Some (Exp_tuple + ((Exp_ident "x"), + (Exp_apply ( + (Exp_apply ((Exp_ident "append"), + (Exp_ident "xs"))), + (Exp_ident "ys"))), + []))) + )) + } + ]) + )) + )) + }, + []) + )); + (Str_value (Nonrecursive, + ({ pat = (Pat_var "concat"); + expr = + (Exp_let (Recursive, + ({ pat = (Pat_var "helper"); + expr = + (Exp_fun (((Pat_var "xs"), []), + (Exp_match ((Exp_ident "xs"), + ({ first = (Pat_construct ("[]", None)); + second = (Exp_construct ("[]", None)) }, + [{ first = + (Pat_construct ("::", + (Some (Pat_tuple + ((Pat_var "h"), (Pat_var "tl"), []))) + )); + second = + (Exp_apply ( + (Exp_apply ((Exp_ident "append"), (Exp_ident "h") + )), + (Exp_apply ((Exp_ident "helper"), (Exp_ident "tl") + )) + )) + } + ]) + )) + )) + }, + []), + (Exp_ident "helper"))) + }, + []) + )); + (Str_value (Recursive, + ({ pat = (Pat_var "iter"); + expr = + (Exp_fun (((Pat_var "f"), [(Pat_var "xs")]), + (Exp_match ((Exp_ident "xs"), + ({ first = (Pat_construct ("[]", None)); + second = (Exp_construct ("()", None)) }, + [{ first = + (Pat_construct ("::", + (Some (Pat_tuple ((Pat_var "h"), (Pat_var "tl"), []))))); + second = + (Exp_let (Nonrecursive, + ({ pat = (Pat_construct ("()", None)); + expr = (Exp_apply ((Exp_ident "f"), (Exp_ident "h"))) + }, + []), + (Exp_apply ( + (Exp_apply ((Exp_ident "iter"), (Exp_ident "f"))), + (Exp_ident "tl"))) + )) + } + ]) + )) + )) + }, + []) + )); + (Str_value (Recursive, + ({ pat = (Pat_var "cartesian"); + expr = + (Exp_fun (((Pat_var "xs"), [(Pat_var "ys")]), + (Exp_match ((Exp_ident "xs"), + ({ first = (Pat_construct ("[]", None)); + second = (Exp_construct ("[]", None)) }, + [{ first = + (Pat_construct ("::", + (Some (Pat_tuple ((Pat_var "h"), (Pat_var "tl"), []))))); + second = + (Exp_apply ( + (Exp_apply ((Exp_ident "append"), + (Exp_apply ( + (Exp_apply ((Exp_ident "map"), + (Exp_fun (((Pat_var "a"), []), + (Exp_tuple + ((Exp_ident "h"), (Exp_ident "a"), [])) + )) + )), + (Exp_ident "ys"))) + )), + (Exp_apply ( + (Exp_apply ((Exp_ident "cartesian"), (Exp_ident "tl") + )), + (Exp_ident "ys"))) + )) + } + ]) + )) + )) + }, + []) + )); + (Str_value (Nonrecursive, + ({ pat = (Pat_var "main"); + expr = + (Exp_let (Nonrecursive, + ({ pat = (Pat_construct ("()", None)); + expr = + (Exp_apply ( + (Exp_apply ((Exp_ident "iter"), (Exp_ident "print_int"))), + (Exp_construct ("::", + (Some (Exp_tuple + ((Exp_constant (Const_integer 1)), + (Exp_construct ("::", + (Some (Exp_tuple + ((Exp_constant (Const_integer 2)), + (Exp_construct ("::", + (Some (Exp_tuple + ((Exp_constant + (Const_integer 3)), + (Exp_construct ("[]", + None)), + []))) + )), + []))) + )), + []))) + )) + )) + }, + []), + (Exp_let (Nonrecursive, + ({ pat = (Pat_construct ("()", None)); + expr = + (Exp_apply ((Exp_ident "print_int"), + (Exp_apply ((Exp_ident "length"), + (Exp_apply ( + (Exp_apply ((Exp_ident "cartesian"), + (Exp_construct ("::", + (Some (Exp_tuple + ((Exp_constant (Const_integer 1)), + (Exp_construct ("::", + (Some (Exp_tuple + ((Exp_constant + (Const_integer 2)), + (Exp_construct ("[]", + None)), + []))) + )), + []))) + )) + )), + (Exp_construct ("::", + (Some (Exp_tuple + ((Exp_constant (Const_integer 1)), + (Exp_construct ("::", + (Some (Exp_tuple + ((Exp_constant + (Const_integer 2)), + (Exp_construct ("::", + (Some (Exp_tuple + ((Exp_constant + ( + Const_integer + 3)), + (Exp_construct ( + "::", + (Some ( + Exp_tuple + (( + Exp_constant + (Const_integer + 4)), + (Exp_construct ( + "[]", + None)), + []))))), + []))) + )), + []))) + )), + []))) + )) + )) + )) + )) + }, + []), + (Exp_constant (Const_integer 0)))) + )) + }, + []) + )) + ] + + |}] +;; + +let%expect_test "simple adt with pattern matching function (else case) + printing" = + test_program + {| +type shape = Circle of int + | Rectangle of (int*int) * int +;; + |}; + [%expect + {| + [(Str_adt ([], "shape", + (("Circle", (Some (Type_construct ("int", [])))), + [("Rectangle", + (Some (Type_tuple + ((Type_tuple + ((Type_construct ("int", [])), + (Type_construct ("int", [])), [])), + (Type_construct ("int", [])), [])))) + ]) + )) + ] |}] +;; + +let%expect_test "one arg adt v2" = + test_program + {| +type ('a) shape = Circle of int + | Rectangle of (int*int) * int +;; + |}; + [%expect + {| + [(Str_adt (["a"], "shape", + (("Circle", (Some (Type_construct ("int", [])))), + [("Rectangle", + (Some (Type_tuple + ((Type_tuple + ((Type_construct ("int", [])), + (Type_construct ("int", [])), [])), + (Type_construct ("int", [])), [])))) + ]) + )) + ] |}] +;; + +let%expect_test "multiple args adt v2" = + test_program + {| + type ('a, 'b) s9CG0K = + | R + | F + | H of f +;; +|}; + [%expect + {| + [(Str_adt (["a"; "b"], "s9CG0K", + (("R", None), [("F", None); ("H", (Some (Type_construct ("f", []))))]))) + ] |}] +;; + +let%expect_test "multiple args adt v3" = + test_program + {| + type ('a, 'b, 'c, 'd) s9CG0K = + | R + | F + | H of f +;; +|}; + [%expect + {| + [(Str_adt (["a"; "b"; "c"; "d"], "s9CG0K", + (("R", None), [("F", None); ("H", (Some (Type_construct ("f", []))))]))) + ] |}] +;; + +let%expect_test "multiple args adt v4" = + test_program + {| + type '_3d f = + | J of _f + | K +;; +|}; + [%expect + {| + [(Str_adt (["_3d"], "f", + (("J", (Some (Type_construct ("_f", [])))), [("K", None)]))) + ] |}] +;; + +let%expect_test "multiple args adt v4 (capitalized idents in constr_args)" = + test_program + {| + type ('ot, '_a, 't, '_v) i_ = + | L_ of Z + | Dl of _f + | G of uG_ + | Egd of _a +;; +|}; + [%expect + {| + [(Str_adt (["ot"; "_a"; "t"; "_v"], "i_", + (("L_", (Some (Type_construct ("Z", [])))), + [("Dl", (Some (Type_construct ("_f", [])))); + ("G", (Some (Type_construct ("uG_", [])))); + ("Egd", (Some (Type_construct ("_a", []))))]) + )) + ] |}] +;; + +let%expect_test "adt from default types" = + test_program + {| +type point = int * int;; +|}; + [%expect + {| + [(Str_adt ([], "point", + (("", + (Some (Type_tuple + ((Type_construct ("int", [])), (Type_construct ("int", [])), + [])))), + []) + )) + ] + |}] +;; + +let%expect_test "adt match case (pat_any)" = + test_program + {| +let area s = + match s with + | Square c -> 0 + | Circle c -> 3 + | _ -> 10 +;; +|}; + [%expect + {| + [(Str_value (Nonrecursive, + ({ pat = (Pat_var "area"); + expr = + (Exp_fun (((Pat_var "s"), []), + (Exp_match ((Exp_ident "s"), + ({ first = (Pat_construct ("Square", (Some (Pat_var "c")))); + second = (Exp_constant (Const_integer 0)) }, + [{ first = (Pat_construct ("Circle", (Some (Pat_var "c")))); + second = (Exp_constant (Const_integer 3)) }; + { first = Pat_any; second = (Exp_constant (Const_integer 10)) + } + ]) + )) + )) + }, + []) + )) + ] + |}] +;; + +let%expect_test "006partial2" = + test_program + {| +let foo b = if b then (fun foo -> foo+2) else (fun foo -> foo*10) + +let foo x = foo true (foo false (foo true (foo false x))) +let main = + let () = print_int (foo 11) in + 0 +|}; + [%expect + {| + [(Str_value (Nonrecursive, + ({ pat = (Pat_var "foo"); + expr = + (Exp_fun (((Pat_var "b"), []), + (Exp_if ((Exp_ident "b"), + (Exp_fun (((Pat_var "foo"), []), + (Exp_apply ((Exp_ident "+"), + (Exp_tuple + ((Exp_ident "foo"), (Exp_constant (Const_integer 2)), + [])) + )) + )), + (Some (Exp_fun (((Pat_var "foo"), []), + (Exp_apply ((Exp_ident "*"), + (Exp_tuple + ((Exp_ident "foo"), + (Exp_constant (Const_integer 10)), [])) + )) + ))) + )) + )) + }, + []) + )); + (Str_value (Nonrecursive, + ({ pat = (Pat_var "foo"); + expr = + (Exp_fun (((Pat_var "x"), []), + (Exp_apply ( + (Exp_apply ((Exp_ident "foo"), (Exp_construct ("true", None)))), + (Exp_apply ( + (Exp_apply ((Exp_ident "foo"), + (Exp_construct ("false", None)))), + (Exp_apply ( + (Exp_apply ((Exp_ident "foo"), + (Exp_construct ("true", None)))), + (Exp_apply ( + (Exp_apply ((Exp_ident "foo"), + (Exp_construct ("false", None)))), + (Exp_ident "x"))) + )) + )) + )) + )) + }, + []) + )); + (Str_value (Nonrecursive, + ({ pat = (Pat_var "main"); + expr = + (Exp_let (Nonrecursive, + ({ pat = (Pat_construct ("()", None)); + expr = + (Exp_apply ((Exp_ident "print_int"), + (Exp_apply ((Exp_ident "foo"), + (Exp_constant (Const_integer 11)))) + )) + }, + []), + (Exp_constant (Const_integer 0)))) + }, + []) + )) + ] |}] +;; diff --git a/AML/lib/parser/test/PTest.mli b/AML/lib/parser/test/PTest.mli new file mode 100644 index 00000000..78f85d8f --- /dev/null +++ b/AML/lib/parser/test/PTest.mli @@ -0,0 +1,7 @@ +(** Copyright 2025-2026, Rodion Suvorov, Dmitriy Chirkov*) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open Ast + +val test_program : ident -> unit diff --git a/AML/lib/parser/test/dune b/AML/lib/parser/test/dune new file mode 100644 index 00000000..0a0dbf43 --- /dev/null +++ b/AML/lib/parser/test/dune @@ -0,0 +1,16 @@ +(library + (name ParserTest) + (libraries + base + stdio + Ast + Parser + Pprinter + qcheck-core + qcheck-core.runner + Qchecker) + (preprocess + (pps ppx_expect)) + (inline_tests) + (instrumentation + (backend bisect_ppx))) diff --git a/AML/lib/printer/dune b/AML/lib/printer/dune new file mode 100644 index 00000000..bbd291ac --- /dev/null +++ b/AML/lib/printer/dune @@ -0,0 +1,6 @@ +(library + (name Pprinter) + (public_name AML.Pprinter) + (libraries base Ast Parser stdio) + (instrumentation + (backend bisect_ppx))) diff --git a/AML/lib/printer/pprinter.ml b/AML/lib/printer/pprinter.ml new file mode 100644 index 00000000..7b904f59 --- /dev/null +++ b/AML/lib/printer/pprinter.ml @@ -0,0 +1,290 @@ +(** Copyright 2025-2026, Rodion Suvorov, Dmitriy Chirkov*) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open Base +open Angstrom +open Ast +open Stdlib.Format + +let get_op_pr id = + let open Expression in + match id with + | Exp_ident "&&" -> 3 + | Exp_ident "||" -> 2 + | Exp_ident ">" + | Exp_ident "<" + | Exp_ident ">=" + | Exp_ident "<=" + | Exp_ident "<>" + | Exp_ident "=" -> 4 + | Exp_ident "+" | Exp_ident "-" -> 5 + | Exp_ident "*" | Exp_ident "/" -> 6 + | Exp_if (_, _, _) -> 1 + | Exp_let (_, _, _) + | Exp_match (_, _) + | Exp_function _ + | Exp_fun (_, _) + | Exp_constant _ | Exp_ident _ -> 0 + | Exp_apply (_, _) | Exp_construct _ -> 7 + | _ -> 0 +;; + +let pprint_constant fmt = + let open Constant in + function + | Const_integer n -> fprintf fmt "%d" n + | Const_char c -> fprintf fmt "'%c'" c + | Const_string s -> fprintf fmt "%S" s +;; + +let rec pprint_type fmt = + let open TypeExpr in + function + | Type_arrow (tye1, tye2) -> fprintf fmt "(%a -> %a)" pprint_type tye1 pprint_type tye2 + | Type_var id -> fprintf fmt "'%s" id + | Type_tuple (tye1, tye2, tyel) -> + fprintf + fmt + "(%s)" + (String.concat + ~sep:" * " + (List.map (tye1 :: tye2 :: tyel) ~f:(fun t -> asprintf "%a" pprint_type t))) + | Type_construct (id, tyel) -> + let tyel_str = + String.concat + ~sep:", " + (List.map tyel ~f:(fun t -> + match t with + | Type_var tye -> asprintf "'%s" tye + | Type_tuple (t1, t2, rest) -> + let tuple_types = t1 :: t2 :: rest in + let tuple_str = String.concat ~sep:" * " (List.map tuple_types ~f:show) in + "(" ^ tuple_str ^ ")" + | _ -> show t)) + in + let tyel_strf = + match List.length tyel with + | 0 -> "" + | 1 -> tyel_str ^ " " + | _ -> "(" ^ tyel_str ^ ") " + in + fprintf fmt "%s%s" tyel_strf id +;; + +let rec pprint_pattern fmt = + let open Pattern in + function + | Pat_constraint (p, tye) -> fprintf fmt "(%a : %a)" pprint_pattern p pprint_type tye + | Pat_any -> fprintf fmt "_" + | Pat_var id -> fprintf fmt "%s" id + | Pat_constant c -> pprint_constant fmt c + | Pat_tuple (p1, p2, pl) -> + fprintf + fmt + "(%s)" + (String.concat + ~sep:", " + (List.map (p1 :: p2 :: pl) ~f:(fun p -> asprintf "%a" pprint_pattern p))) + | Pat_construct (id, None) -> fprintf fmt "(%s)" id + | Pat_construct (id, Some p) -> + (match p with + | Pat_tuple _ -> fprintf fmt "(%s (%a))" id pprint_pattern p + | _ -> fprintf fmt "%s %a" id pprint_pattern p) +;; + +let pprint_rec fmt = + let open Expression in + function + | Nonrecursive -> fprintf fmt "" + | Recursive -> fprintf fmt "rec " +;; + +let rec pprint_expression fmt n = + let open Expression in + function + | Exp_ident id -> fprintf fmt "%s" id + | Exp_constant ct -> pprint_constant fmt ct + | Exp_tuple (ex1, ex2, exl) -> + fprintf + fmt + "(%s)" + (String.concat + ~sep:", " + (List.map (ex1 :: ex2 :: exl) ~f:(fun ex -> + let op_pr_t = get_op_pr ex in + asprintf "%a" (fun fmt -> pprint_expression fmt (op_pr_t + 1)) ex))) + | Exp_function (cs1, csl) when n > 0 -> + fprintf fmt "(%a)" pprint_function_with_cases (cs1, csl, n + 1) + | Exp_function (cs1, csl) -> + fprintf fmt "%a" pprint_function_with_cases (cs1, csl, n + 1) + | Exp_fun ((pt1, ptl), exp) -> + let if_string = + asprintf + "fun%s -> %a" + (String.concat + ~sep:"" + (List.map (pt1 :: ptl) ~f:(fun p -> asprintf " %a" pprint_pattern p))) + (fun fmt -> pprint_expression fmt n) + exp + in + if n > 0 then fprintf fmt "(%s)" if_string else fprintf fmt "%s" if_string + | Exp_apply (ex1, ex2) -> + let op_pr = get_op_pr ex1 in + let format_apply = + match ex2 with + | Expression.Exp_tuple (first, second, _) + when List.mem [ 2; 3; 4; 5; 6 ] op_pr ~equal:Int.equal -> + let left_pr, right_pr = + if List.mem [ 2; 3 ] op_pr ~equal:Int.equal + then op_pr + 1, op_pr + else op_pr, op_pr + 1 + in + asprintf + "%a %a %a" + (fun fmt -> pprint_expression fmt left_pr) + first + (fun fmt -> pprint_expression fmt op_pr) + ex1 + (fun fmt -> pprint_expression fmt right_pr) + second + | _ -> + asprintf + "%a %a" + (fun fmt -> pprint_expression fmt (op_pr + 1)) + ex1 + (fun fmt -> pprint_expression fmt (op_pr + 1)) + ex2 + in + if n > op_pr then fprintf fmt "(%s)" format_apply else fprintf fmt "%s" format_apply + | Exp_match (ex, (cs, csl)) -> + let op_pr1 = get_op_pr ex in + let match_string = + asprintf + "match %a with\n | %s" + (fun fmt -> pprint_expression fmt (op_pr1 + 1)) + ex + (String.concat + ~sep:"\n | " + (List.map (cs :: csl) ~f:(fun cs -> + asprintf "%a" (fun fmt -> pprint_case fmt n) cs))) + in + if n > 0 then fprintf fmt "(%s)" match_string else fprintf fmt "%s" match_string + | Exp_constraint (ex, tye) -> + fprintf fmt "(%a : %a)" (fun fmt -> pprint_expression fmt (n + 1)) ex pprint_type tye + | Exp_if (ex1, ex2, None) -> + let if_string = + asprintf + "if %a\n then %a" + (fun fmt -> pprint_expression fmt (n + 1)) + ex1 + (fun fmt -> pprint_expression fmt (n + 1)) + ex2 + in + if n > 0 then fprintf fmt "(%s)" if_string else fprintf fmt "%s" if_string + | Exp_if (ex1, ex2, Some ex3) -> + let if_string = + asprintf + "if %a\n then %a\n else %a" + (fun fmt -> pprint_expression fmt (n + 1)) + ex1 + (fun fmt -> pprint_expression fmt (n + 1)) + ex2 + (fun fmt -> pprint_expression fmt (n + 1)) + ex3 + in + if n > 0 then fprintf fmt "(%s)" if_string else fprintf fmt "%s" if_string + | Exp_let (rec_fl, (vbind1, vbindl), ex) -> + let let_string = + asprintf + "let %a%s in %a" + pprint_rec + rec_fl + (String.concat + ~sep:" and " + (List.map (vbind1 :: vbindl) ~f:(fun vb -> + asprintf "%a" (fun fmt -> pprint_value_binding fmt n) vb))) + (fun fmt -> pprint_expression fmt (n + 1)) + ex + in + if n > 0 then fprintf fmt "(%s)" let_string else fprintf fmt "%s" let_string + | Exp_construct (id, None) -> fprintf fmt "(%s)" id + | Exp_construct (id, Some exp) -> + fprintf fmt "(%s (%a))" id (fun fmt -> pprint_expression fmt (n + 1)) exp + +and pprint_value_binding fmt n vb = + let open Expression in + fprintf + fmt + "%a = %a" + pprint_pattern + vb.pat + (fun fmt -> pprint_expression fmt (n + 1)) + vb.expr + +and pprint_case fmt n case = + let open Expression in + fprintf + fmt + "%a -> %a" + pprint_pattern + case.first + (fun fmt -> pprint_expression fmt (n + 1)) + case.second + +and pprint_function_with_cases fmt (cs, csl, n) = + fprintf + fmt + "function %s" + (String.concat + (List.map (cs :: csl) ~f:(fun c -> + asprintf "\n | %a" (fun fmt -> pprint_case fmt n) c))) +;; + +let pprint_structure_item fmt n = + let open Structure in + function + | Str_eval exp -> fprintf fmt "%a ;;\n" (fun fmt -> pprint_expression fmt n) exp + | Str_value (rec_flag, (vbind1, vbindl)) -> + let bindings_str = + match vbind1 :: vbindl with + | [] -> "" + | _ -> + String.concat + ~sep:" and\n " + (List.map (vbind1 :: vbindl) ~f:(fun vb -> + asprintf "%a" (fun fmt -> pprint_value_binding fmt n) vb)) + in + fprintf fmt "let %a%s;;\n\n" pprint_rec rec_flag bindings_str + | Str_adt (tparam, id, (constr1, constrl)) -> + let tparam_ident_str = + match List.length tparam with + | 0 -> "" + | 1 -> asprintf "'%s " (List.hd_exn tparam) + | _ -> + "('" + ^ String.concat ~sep:", '" (List.map tparam ~f:(fun param -> asprintf "%s" param)) + ^ ") " + in + let var_t_str = + match constr1 :: constrl with + | [] -> "" + | _ -> + " | " + ^ String.concat + ~sep:"\n | " + (List.map (constr1 :: constrl) ~f:(fun (id, typ) -> + match typ with + | Some t -> asprintf "%s of %a" id pprint_type t + | None -> asprintf "%s" id)) + in + fprintf fmt "type %s%s =\n%s\n;;\n\n" tparam_ident_str id var_t_str +;; + +let pprint_program fmt = List.iter ~f:(pprint_structure_item fmt 0) + +let pp printer parser str = + match parse_string ~consume:Angstrom.Consume.All parser str with + | Ok res -> printer std_formatter res + | Error _ -> Stdio.print_endline "Syntax error" +;; diff --git a/AML/lib/printer/pprinter.mli b/AML/lib/printer/pprinter.mli new file mode 100644 index 00000000..512c6f6b --- /dev/null +++ b/AML/lib/printer/pprinter.mli @@ -0,0 +1,22 @@ +(** Copyright 2025-2026, Rodion Suvorov, Dmitriy Chirkov*) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open Ast + +val pprint_constant : Format.formatter -> Constant.t -> unit +val pprint_type : Format.formatter -> TypeExpr.t -> unit +val pprint_pattern : Format.formatter -> Pattern.t -> unit +val pprint_rec : Format.formatter -> Expression.rec_flag -> unit +val pprint_expression : Format.formatter -> int -> Expression.t -> unit + +val pprint_value_binding + : Format.formatter + -> int + -> Expression.t Expression.value_binding + -> unit + +val pprint_case : Format.formatter -> int -> Expression.t Expression.case -> unit +val pprint_structure_item : Format.formatter -> int -> Structure.structure_item -> unit +val pprint_program : Format.formatter -> Structure.structure_item list -> unit +val pp : (Format.formatter -> 'a -> unit) -> 'a Angstrom.t -> string -> unit diff --git a/AML/lib/shrinker/Qcheck.ml b/AML/lib/shrinker/Qcheck.ml new file mode 100644 index 00000000..8163d39e --- /dev/null +++ b/AML/lib/shrinker/Qcheck.ml @@ -0,0 +1,52 @@ +(** Copyright 2025-2026, Rodion Suvorov, Dmitriy Chirkov*) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open Base +open Parser +open Ast +open Pprinter +open Stdlib.Format + +let arbitrary = + QCheck.make + ~print:(fun p -> asprintf "%a" pp_program p) + ~shrink:Shrinker.ShrinkQCheck.shrink_structure + (Program.gen_program 20) +;; + +let test_round_trip2 = + QCheck.Test.make + ~name:"round-trip parsing and pretty printing" + ~count:10 + arbitrary + (fun program -> + let program_ast = show_program program in + if String.equal program_ast "[]" + then ( + printf ""; + true) + else ( + let printed_program = asprintf "%a" pprint_program program in + match parse printed_program with + | Ok parsed_program -> + let result = equal_program parsed_program program in + if result + then () + else + printf + "Mismatch! Original: %s\nPprinted: %s\nParsed: %s\n" + (show_program program) + printed_program + (show_program parsed_program); + result + | Error err -> + printf "Generated program:\n%s\n\n" printed_program; + printf "Parsing failed with error: %s\n" err; + false)) +;; + +let () = + let _ : int = QCheck_base_runner.run_tests [ test_round_trip2 ] in + () +;; diff --git a/AML/lib/shrinker/Qcheck.mli b/AML/lib/shrinker/Qcheck.mli new file mode 100644 index 00000000..4375d184 --- /dev/null +++ b/AML/lib/shrinker/Qcheck.mli @@ -0,0 +1,3 @@ +(** Copyright 2025-2026, Rodion Suvorov, Dmitriy Chirkov*) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) diff --git a/AML/lib/shrinker/dune b/AML/lib/shrinker/dune new file mode 100644 index 00000000..b47f0b61 --- /dev/null +++ b/AML/lib/shrinker/dune @@ -0,0 +1,6 @@ +(library + (name Qchecker) + (public_name AML.Qchecker) + (libraries Pprinter) + (instrumentation + (backend bisect_ppx))) diff --git a/AML/lib/shrinker/shrinker.ml b/AML/lib/shrinker/shrinker.ml new file mode 100644 index 00000000..c81780c4 --- /dev/null +++ b/AML/lib/shrinker/shrinker.ml @@ -0,0 +1,165 @@ +(** Copyright 2025-2026, Rodion Suvorov, Dmitriy Chirkov*) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open QCheck.Iter +open QCheck.Shrink +open Ast + +module ShrinkQCheck = struct + let filter predicate iter = + iter >>= fun x -> if predicate x then QCheck.Iter.return x else QCheck.Iter.empty + ;; + + let shrink_list1 ~shrink_head ~shrink_tail (head, tail) = + match tail with + | [] -> shrink_head head >|= fun head' -> head', tail + | _ -> + let open QCheck.Iter in + shrink_head head + >|= (fun head' -> head', tail) + <+> (QCheck.Shrink.list ~shrink:shrink_tail tail >|= fun tail' -> head, tail') + |> filter (fun (_, t) -> + match t with + | [] -> false + | _ -> true) + ;; + + let shrink_list2 ~shrink_first ~shrink_second ~shrink_tail (first, second, tail) = + match tail with + | [] -> + let open QCheck.Iter in + shrink_first first + >|= (fun first' -> first', second, tail) + <+> (shrink_second second >|= fun second' -> first, second', tail) + | _ -> + let open QCheck.Iter in + shrink_first first + >|= (fun first' -> first', second, tail) + <+> (shrink_second second >|= fun second' -> first, second', tail) + <+> (QCheck.Shrink.list ~shrink:shrink_tail tail + >|= fun tail' -> first, second, tail') + |> filter (fun (_, _, t) -> + match t with + | [] -> false + | _ -> true) + ;; + + let rec shrink_pattern = function + | Pattern.Pat_any -> QCheck.Iter.return Pattern.Pat_any + | Pattern.Pat_var id -> string ~shrink:char id >|= fun id' -> Pattern.Pat_var id' + | Pattern.Pat_constant const -> + (match const with + | Constant.Const_integer i -> + int i >|= fun i' -> Pattern.Pat_constant (Constant.Const_integer i') + | Constant.Const_char ch -> + char ch >|= fun ch' -> Pattern.Pat_constant (Constant.Const_char ch') + | Constant.Const_string str -> + string ~shrink:char str + >|= fun str' -> Pattern.Pat_constant (Constant.Const_string str')) + | Pattern.Pat_tuple pats -> + shrink_list2 + ~shrink_first:shrink_pattern + ~shrink_second:shrink_pattern + ~shrink_tail:shrink_pattern + pats + >|= fun pats' -> Pattern.Pat_tuple pats' + | Pattern.Pat_construct (x, None) -> + QCheck.Iter.return (Pattern.Pat_construct (x, None)) + | Pattern.Pat_construct (id, Some pat) -> + shrink_pattern pat >|= fun pat' -> Pattern.Pat_construct (id, Some pat') + | Pattern.Pat_constraint (pat, core_type) -> + shrink_pattern pat >|= fun pat' -> Pattern.Pat_constraint (pat', core_type) + + and shrink_expression = function + | Expression.Exp_ident id -> + string ~shrink:char id >|= fun id' -> Expression.Exp_ident id' + | Expression.Exp_constant const -> + (match const with + | Constant.Const_integer i -> + int i >|= fun i' -> Expression.Exp_constant (Constant.Const_integer i') + | Constant.Const_char ch -> + char ch >|= fun ch' -> Expression.Exp_constant (Constant.Const_char ch') + | Constant.Const_string str -> + string ~shrink:char str + >|= fun str' -> Expression.Exp_constant (Constant.Const_string str')) + | Expression.Exp_tuple pats -> + shrink_list2 + ~shrink_first:shrink_expression + ~shrink_second:shrink_expression + ~shrink_tail:shrink_expression + pats + >|= fun pats' -> Expression.Exp_tuple pats' + | Expression.Exp_function cases -> + shrink_list1 ~shrink_head:shrink_case ~shrink_tail:shrink_case cases + >|= fun cases' -> Expression.Exp_function cases' + | Expression.Exp_fun (patterns, exp) -> + shrink_list1 ~shrink_head:shrink_pattern ~shrink_tail:shrink_pattern patterns + >|= (fun patterns' -> Expression.Exp_fun (patterns', exp)) + <+> shrink_expression exp + >|= fun exp' -> Expression.Exp_fun (patterns, exp') + | Expression.Exp_apply (exp1, exp2) -> + shrink_expression exp1 + >|= (fun exp1' -> Expression.Exp_apply (exp1', exp2)) + <+> shrink_expression exp2 + >|= fun exp2' -> Expression.Exp_apply (exp1, exp2') + | Expression.Exp_match (exp, cases) -> + shrink_expression exp + >>= fun exp' -> + shrink_list1 ~shrink_head:shrink_case ~shrink_tail:shrink_case cases + >>= fun cases' -> return (Expression.Exp_match (exp', cases')) + | Expression.Exp_let (rec_flag, bindings, exp) -> + shrink_list1 + ~shrink_head:shrink_value_binding + ~shrink_tail:shrink_value_binding + bindings + >|= (fun bindings' -> Expression.Exp_let (rec_flag, bindings', exp)) + <+> shrink_expression exp + >|= fun exp' -> Expression.Exp_let (rec_flag, bindings, exp') + | Expression.Exp_construct (_, None) -> empty + | Expression.Exp_construct (id, Some exp) -> + shrink_expression exp >|= fun exp' -> Expression.Exp_construct (id, Some exp') + | Expression.Exp_constraint (exp, core_type) -> + shrink_expression exp >|= fun exp' -> Expression.Exp_constraint (exp', core_type) + | Expression.Exp_if (cond, then_exp, None) -> + shrink_expression cond + >|= (fun cond' -> Expression.Exp_if (cond', then_exp, None)) + <+> shrink_expression then_exp + >|= fun then_exp' -> Expression.Exp_if (cond, then_exp', None) + | Expression.Exp_if (cond, then_exp, Some else_exp) -> + shrink_expression cond + >|= (fun cond' -> Expression.Exp_if (cond', then_exp, Some else_exp)) + <+> shrink_expression then_exp + >|= (fun then_exp' -> Expression.Exp_if (cond, then_exp', Some else_exp)) + <+> shrink_expression else_exp + >|= fun else_exp' -> Expression.Exp_if (cond, then_exp, Some else_exp') + + and shrink_value_binding value_binding = + let open Expression in + shrink_pattern value_binding.Expression.pat + >>= fun pat' -> + shrink_expression value_binding.Expression.expr + >>= fun expr' -> return { pat = pat'; expr = expr' } + + and shrink_case case = + let open Expression in + shrink_pattern case.Expression.first + >>= fun first' -> + shrink_expression case.Expression.second + >>= fun second' -> return { first = first'; second = second' } + ;; + + let shrink_structure_item = function + | Structure.Str_eval expr -> + shrink_expression expr >|= fun expr' -> Structure.Str_eval expr' + | Structure.Str_value (rec_flag, bindings) -> + shrink_list1 + ~shrink_head:shrink_value_binding + ~shrink_tail:shrink_value_binding + bindings + >|= fun (head', tail') -> Structure.Str_value (rec_flag, (head', tail')) + | Structure.Str_adt (r, b, a) -> return (Structure.Str_adt (r, b, a)) + ;; + + let shrink_structure = list ~shrink:shrink_structure_item +end diff --git a/AML/lib/shrinker/shrinker.mli b/AML/lib/shrinker/shrinker.mli new file mode 100644 index 00000000..003c2696 --- /dev/null +++ b/AML/lib/shrinker/shrinker.mli @@ -0,0 +1,30 @@ +(** Copyright 2025-2026, Rodion Suvorov, Dmitriy Chirkov*) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open Ast +open QCheck.Iter + +module ShrinkQCheck : sig + val filter : ('a -> bool) -> 'a t -> 'a t + + val shrink_list1 + : shrink_head:('a -> 'a t) + -> shrink_tail:'b QCheck.Shrink.t + -> 'a * 'b list + -> ('a * 'b list) t + + val shrink_list2 + : shrink_first:('a -> 'a t) + -> shrink_second:('b -> 'b t) + -> shrink_tail:'c QCheck.Shrink.t + -> 'a * 'b * 'c list + -> ('a * 'b * 'c list) t + + val shrink_pattern : Pattern.t QCheck.Shrink.t + val shrink_expression : Expression.t QCheck.Shrink.t + val shrink_value_binding : Expression.t Expression.value_binding QCheck.Shrink.t + val shrink_case : Expression.t Expression.case QCheck.Shrink.t + val shrink_structure_item : Structure.structure_item -> Structure.structure_item t + val shrink_structure : Structure.structure_item list QCheck.Shrink.t +end diff --git a/AML/out.ll b/AML/out.ll new file mode 100644 index 00000000..9f73f8af --- /dev/null +++ b/AML/out.ll @@ -0,0 +1,12 @@ +; ModuleID = 'main' +source_filename = "main" +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-pc-linux-gnu" + +declare void @print_int(i64) + +define i64 @main() { +entry: + call void @print_int(i64 70) + ret i64 0 +} diff --git a/OMLet/.gitignore b/OMLet/.gitignore new file mode 100644 index 00000000..0e5f1e4b --- /dev/null +++ b/OMLet/.gitignore @@ -0,0 +1,3 @@ +/_build +/_coverage + diff --git a/OMLet/.ocamlformat b/OMLet/.ocamlformat new file mode 100644 index 00000000..25919d0e --- /dev/null +++ b/OMLet/.ocamlformat @@ -0,0 +1,3 @@ +version=0.27.0 +profile=janestreet + diff --git a/OMLet/Makefile b/OMLet/Makefile new file mode 100644 index 00000000..c0615678 --- /dev/null +++ b/OMLet/Makefile @@ -0,0 +1,20 @@ +.PHONY: all test +all: + dune build + +test: + dune test + +TEST_COV_D = /tmp/cov +COVERAGE_OPTS = --coverage-path $(TEST_COV_D) --expect bin/ + +.PHONY: test_coverage coverage +test_coverage: coverage +coverage: + $(RM) -r $(TEST_COV_D) + mkdir -p $(TEST_COV_D) + BISECT_FILE=$(TEST_COV_D)/language dune runtest --no-print-directory \ + --instrument-with bisect_ppx --force + bisect-ppx-report html $(COVERAGE_OPTS) + bisect-ppx-report summary $(COVERAGE_OPTS) + @echo "Use 'xdg-open _coverage/index.html' to see coverage report" diff --git a/OMLet/OMLet.opam b/OMLet/OMLet.opam new file mode 100644 index 00000000..f8ef509b --- /dev/null +++ b/OMLet/OMLet.opam @@ -0,0 +1,43 @@ +# This file is generated by dune, edit dune-project instead +opam-version: "2.0" +synopsis: "Will be later" +maintainer: ["Kotelnikova Ksenia" "Kozyreva Sofya"] +authors: ["Kotelnikova Ksenia" "Kozyreva Sofya"] +license: "LGPL-3.0-or-later WITH OCaml-LGPL-linking-exception" +homepage: "https://github.com/p1onerka/oMLet" +bug-reports: "https://github.com/p1onerka/oMLet/issues" +depends: [ + "ocaml" + "dune" {>= "3.8" & = "3.19.1"} + "angstrom" {= "0.16.0"} + "ppx_inline_test" {with-test} + "ppx_deriving" + "qcheck-core" + "bisect_ppx" + "ocamlformat" {build} + "llvm" {= "18-shared"} + "ppx_deriving_qcheck" {= "0.6"} + "odoc" {with-doc} +] +build: [ + ["dune" "subst"] {dev} + [ + "dune" + "build" + "-p" + name + "-j" + jobs + "@install" + "@runtest" {with-test} + "@doc" {with-doc} + ] +] +dev-repo: "git+https://github.com/p1onerka/oMLet.git" +depexts: [ + [ "llvm-18-dev" "clang-18" "gcc-riscv64-linux-gnu" "g++-riscv64-linux-gnu" "qemu-user"] {os-distribution = "ubuntu"} +] +pin-depends: [ + ["ppx_deriving_qcheck.0.6" "git+https://github.com/c-cube/qcheck.git#42429bf06ba12373cad02b1404f50d0ad6238af5"] +] +# Don't edit '*.opam' file manually. Use 'dune b @install' diff --git a/OMLet/OMLet.opam.template b/OMLet/OMLet.opam.template new file mode 100644 index 00000000..f4e537bf --- /dev/null +++ b/OMLet/OMLet.opam.template @@ -0,0 +1,7 @@ +depexts: [ + [ "llvm-18-dev" "clang-18" "gcc-riscv64-linux-gnu" "g++-riscv64-linux-gnu" "qemu-user"] {os-distribution = "ubuntu"} +] +pin-depends: [ + ["ppx_deriving_qcheck.0.6" "git+https://github.com/c-cube/qcheck.git#42429bf06ba12373cad02b1404f50d0ad6238af5"] +] +# Don't edit '*.opam' file manually. Use 'dune b @install' diff --git a/OMLet/bin/LL.ml b/OMLet/bin/LL.ml new file mode 100644 index 00000000..53331175 --- /dev/null +++ b/OMLet/bin/LL.ml @@ -0,0 +1,94 @@ +[@@@ocaml.text "/*"] + +(** Copyright 2023-2024, Kakadu and contributors *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +[@@@ocaml.text "/*"] + +open Llvm +open Printf + +module type S = sig + val context : Llvm.llcontext + val module_ : Llvm.llmodule + val builder : Llvm.llbuilder + val build_store : Llvm.llvalue -> Llvm.llvalue -> Llvm.llvalue + val build_call : lltype -> ?name:string -> llvalue -> llvalue list -> llvalue + val lookup_func_exn : string -> llvalue + val has_toplevel_func : string -> bool + val build_add : ?name:string -> llvalue -> llvalue -> llvalue + val build_sub : ?name:string -> llvalue -> llvalue -> llvalue + val build_mul : ?name:string -> llvalue -> llvalue -> llvalue + val build_sdiv : ?name:string -> llvalue -> llvalue -> llvalue [@@inline] + val build_icmp : ?name:string -> Icmp.t -> llvalue -> llvalue -> llvalue + + (** [set_metadata v kind fmt] sets metadata to value [v] of kind [k]. + Returns this value [v]. Useful for attaching debugging *) + val set_metadata + : llvalue + -> string + -> ('a, Format.formatter, unit, llvalue) format4 + -> 'a + + (* ?? *) + + val build_ptrtoint : ?name:string -> llvalue -> lltype -> llvalue + val build_inttoptr : ?name:string -> llvalue -> lltype -> llvalue + val build_pointercast : ?name:string -> llvalue -> lltype -> llvalue + + (** Just aliases *) + + val const_int : Llvm.lltype -> int -> Llvm.llvalue + val params : Llvm.llvalue -> Llvm.llvalue array + val pp_value : Format.formatter -> llvalue -> unit +end + +let make context builder module_ = + let module L : S = struct + let context = context + let builder = builder + let module_ = module_ + let build_store a b = Llvm.build_store a b builder + + let build_call typ ?(name = "") f args = + build_call typ f (Array.of_list args) name builder + ;; + + let has_toplevel_func fname = + match lookup_function fname module_ with + | Some _ -> true + | None -> false + ;; + + let lookup_func_exn fname = + match lookup_function fname module_ with + | Some f -> f + | None -> failwith (sprintf "Function '%s' not found" fname) + ;; + + let build_add ?(name = "") l r = build_add l r name builder + let build_sub ?(name = "") l r = build_sub l r name builder + let build_mul ?(name = "") l r = build_mul l r name builder + let build_sdiv ?(name = "") l r = build_sdiv l r name builder + let build_icmp ?(name = "") op l r = build_icmp op l r name builder + let build_ptrtoint ?(name = "") e typ = Llvm.build_ptrtoint e typ name builder + let build_inttoptr ?(name = "") e typ = Llvm.build_inttoptr e typ name builder + let build_pointercast ?(name = "") f typ = Llvm.build_pointercast f typ name builder + + let set_metadata v kind fmt = + Format.kasprintf + (fun s -> + Llvm.set_metadata v (Llvm.mdkind_id context kind) (Llvm.mdstring context s); + v) + fmt + ;; + + (* Aliases *) + let const_int = Llvm.const_int + let params = Llvm.params + let pp_value ppf x = Format.fprintf ppf "%s" (Llvm.string_of_llvalue x) + end + in + (module L : S) +;; diff --git a/OMLet/bin/dune b/OMLet/bin/dune new file mode 100644 index 00000000..e9e94f7b --- /dev/null +++ b/OMLet/bin/dune @@ -0,0 +1,22 @@ +(library + (name LL) + (public_name OMLet.LL) + (modules LL) + (wrapped false) + (libraries + llvm + llvm.analysis + llvm.executionengine + ; + )) + +(executable + (public_name OMLet) + (name main) + (modules main) + (libraries LL) + (instrumentation + (backend bisect_ppx))) + +(cram + (deps ./main.exe runtime.c)) diff --git a/OMLet/bin/main.ml b/OMLet/bin/main.ml new file mode 100644 index 00000000..37ad326d --- /dev/null +++ b/OMLet/bin/main.ml @@ -0,0 +1,60 @@ +[@@@ocaml.text "/*"] + +(** Copyright 2023-2025, Kakadu and contributors *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +[@@@ocaml.text "/*"] + +let () = + let context = Llvm.global_context () in + let builder = Llvm.builder context in + let () = assert (Llvm_executionengine.initialize ()) in + let the_module = Llvm.create_module context "main" in + Llvm.set_target_triple "x86_64-pc-linux-gnu" the_module; + let _the_execution_engine = Llvm_executionengine.create the_module in + let module LL = (val LL.make context builder the_module) in + let i64_type = Llvm.i64_type context in + let void_type = Llvm.void_type context in + let ptr_type = Llvm.pointer_type context in + let prepare_main () = + let ft = + (* TODO main has special args *) + let args = Array.make 0 ptr_type in + Llvm.function_type i64_type args + in + let the_function = Llvm.declare_function "main" ft the_module in + (* Create a new basic block to start insertion into. *) + let bb = Llvm.append_block context "entry" the_function in + Llvm.position_at_end bb builder; + (* Add all arguments to the symbol table and create their allocas. *) + (* Finish off the function. *) + let (_ : Llvm.llvalue) = + LL.build_call + (Llvm.function_type void_type [| i64_type |]) + LL.(lookup_func_exn "print_int") + [ Llvm.const_int i64_type 70 ] + in + let (_ : Llvm.llvalue) = Llvm.build_ret (Llvm.const_int i64_type 0) builder in + (* Validate the generated code, checking for consistency. *) + (match Llvm_analysis.verify_function the_function with + | true -> () + | false -> + Stdlib.Format.printf + "invalid function generated\n%s\n" + (Llvm.string_of_llvalue the_function); + Llvm_analysis.assert_valid_function the_function); + (* Optimize the function. *) + (* let (_ : bool) = Llvm.PassManager.run_function the_function the_fpm in *) + (* Llvm.dump_value the_function; *) + () + in + let _ = + Llvm.declare_function + "print_int" + (Llvm.function_type (Llvm.void_type context) [| i64_type |]) + the_module + in + prepare_main (); + Llvm.print_module "out.ll" the_module +;; diff --git a/OMLet/bin/run.t b/OMLet/bin/run.t new file mode 100644 index 00000000..c5569b68 --- /dev/null +++ b/OMLet/bin/run.t @@ -0,0 +1,20 @@ + $ clang-18 -c runtime.c -o runtime.o + $ ./main.exe + $ ls + main.exe + out.ll + runtime.c + runtime.o + $ cat out.ll | grep -E 'source_filename|target datalayout|ModuleID' --invert-match + target triple = "x86_64-pc-linux-gnu" + + declare void @print_int(i64) + + define i64 @main() { + entry: + call void @print_int(i64 70) + ret i64 0 + } + $ clang-18 out.ll runtime.o -o demo1.exe + $ echo "Press $(./demo1.exe) to pay respect" + Press F to pay respect diff --git a/OMLet/bin/runtime.c b/OMLet/bin/runtime.c new file mode 100644 index 00000000..9c00aa03 --- /dev/null +++ b/OMLet/bin/runtime.c @@ -0,0 +1,10 @@ +/* Copyright 2023-2024, Kakadu and contributors */ +/* SPDX-License-Identifier: LGPL-3.0-or-later */ + +#include +#include + +void print_int(int64_t n) { + putchar(n); + fflush(stdout); +} \ No newline at end of file diff --git a/OMLet/dune b/OMLet/dune new file mode 100644 index 00000000..98e54536 --- /dev/null +++ b/OMLet/dune @@ -0,0 +1,7 @@ +(env + (dev + (flags + (:standard -alert @deprecated -warn-error -A -w -3-9-32-34-58))) + (release + (flags + (:standard -alert @deprecated -warn-error +A -w +A-4-40-42-44-70)))) diff --git a/OMLet/dune-project b/OMLet/dune-project new file mode 100644 index 00000000..8f9521c6 --- /dev/null +++ b/OMLet/dune-project @@ -0,0 +1,35 @@ +(lang dune 3.8) + +(name OMLet) + +(generate_opam_files true) + +(source + (github p1onerka/oMLet)) + +(authors "Kotelnikova Ksenia" "Kozyreva Sofya") + +(maintainers "Kotelnikova Ksenia" "Kozyreva Sofya") + +(license "LGPL-3.0-or-later WITH OCaml-LGPL-linking-exception") + +(package + (name OMLet) + (synopsis "Will be later") + (depends + ocaml + (dune + (= "3.19.1")) + (angstrom + (= "0.16.0")) +(ppx_inline_test :with-test) + ppx_deriving + qcheck-core + bisect_ppx + (ocamlformat :build) + (llvm + (= "18-shared")) + (ppx_deriving_qcheck + (= "0.6")))) + +; See the complete stanza docs at https://dune.readthedocs.io/en/stable/dune-files.html#dune-project diff --git a/OMLet/lib/ast.ml b/OMLet/lib/ast.ml new file mode 100644 index 00000000..f5f505df --- /dev/null +++ b/OMLet/lib/ast.ml @@ -0,0 +1,194 @@ +(** Copyright 2024-2025, Ksenia Kotelnikova , Gleb Nasretdinov *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open KeywordChecker +open TypedTree +open TypesPp + +type ident = Ident of string (** identifier *) [@@deriving show { with_path = false }] + +let gen_char_of_range l r = + QCheck.Gen.(map Char.chr (int_range (Char.code l) (Char.code r))) +;; + +let gen_varname ~uppercase = + let open QCheck.Gen in + let loop = + let gen_first_char = + if uppercase + then frequency [ 26, gen_char_of_range 'A' 'Z'; 1, return '_' ] + else frequency [ 26, gen_char_of_range 'a' 'z'; 1, return '_' ] + in + let gen_next_char = + frequency [ 26 + 26 + 1, gen_first_char; 10, gen_char_of_range '0' '9' ] + in + map2 + (fun first rest -> String.make 1 first ^ Base.String.of_char_list rest) + gen_first_char + (list_size (1 -- 3) gen_next_char) + in + loop >>= fun name -> if is_keyword name then loop else return name +;; + +let gen_ident = QCheck.Gen.map (fun s -> Ident s) (gen_varname ~uppercase:false) +let gen_ident_uppercase = QCheck.Gen.map (fun s -> Ident s) (gen_varname ~uppercase:true) + +let gen_escape_sequence = + let open QCheck.Gen in + oneofl [ "\\\""; "\\\\"; "\\n"; "\\t" ] +;; + +let gen_string_of_regular_char = + let open QCheck.Gen in + let gen_int = + frequency + [ 33 - 32 + 1, int_range 32 33 + ; 91 - 35 + 1, int_range 35 91 + ; 126 - 93 + 1, int_range 93 126 + ] + in + map (fun c -> String.make 1 c) (map Char.chr gen_int) +;; + +let gen_string = + let open QCheck.Gen in + let atom = frequency [ 1, gen_escape_sequence; 30, gen_string_of_regular_char ] in + let+ atoms = list_size (0 -- 20) atom in + String.concat "" atoms +;; + +type literal = + | Int_lt of (int[@gen QCheck.Gen.pint]) (** [0], [1], [30] *) + | Bool_lt of bool (** [false], [true] *) + | String_lt of (string[@gen gen_string]) (** ["Hello world"] *) + | Unit_lt (** [Unit] *) +[@@deriving show { with_path = false }, qcheck] + +type binary_operator = + | Binary_equal (** [=] *) + | Binary_unequal (** [<>] *) + | Binary_less (** [<] *) + | Binary_less_or_equal (** [<=] *) + | Binary_greater (** [>] *) + | Binary_greater_or_equal (** [>=] *) + | Binary_add (** [+] *) + | Binary_subtract (** [-] *) + | Binary_multiply (** [*] *) + | Logical_or (** [||] *) + | Logical_and (** [&&] *) + | Binary_divide (** [/] *) + | Binary_or_bitwise (** [|||] *) + | Binary_xor_bitwise (** [^^^] *) + | Binary_and_bitwise (** [&&&] *) + | Binary_cons (** [::] *) +[@@deriving show { with_path = false }, qcheck] + +type unary_operator = + | Unary_minus (** unary [-] *) + | Unary_not (** unary [not] *) +[@@deriving show { with_path = false }, qcheck] + +type pattern = + | Wild (** [_] *) + | PList of + (pattern list[@gen QCheck.Gen.(list_size (0 -- 3) (gen_pattern_sized (n / 20)))]) + (**[ [], [1;2;3] ] *) + | PCons of pattern * pattern (**[ hd :: tl ] *) + | PTuple of + pattern + * pattern + * (pattern list[@gen QCheck.Gen.(list_size (0 -- 2) (gen_pattern_sized (n / 20)))]) + (** | [(a, b)] -> *) + | PConst of literal (** | [4] -> *) + | PVar of ident (** pattern identifier *) + | POption of pattern option + (*| Variant of (ident list[@gen gen_ident_small_list]) (** | [Blue, Green, Yellow] -> *) *) + | PConstraint of pattern * (typ[@gen gen_typ_primitive]) +[@@deriving show { with_path = false }, qcheck] + +type is_recursive = + | Nonrec (** let factorial n = ... *) + | Rec (** let rec factorial n = ... *) +[@@deriving show { with_path = false }, qcheck] + +type case = (pattern[@gen gen_pattern_sized n]) * (expr[@gen gen_expr_sized n]) +[@@deriving show { with_path = false }, qcheck] + +and expr = + | Const of literal (** [Int], [Bool], [String], [Unit], [Null] *) + | Tuple of + (expr[@gen gen_expr_sized (n / 4)]) + * (expr[@gen gen_expr_sized (n / 4)]) + * (expr list[@gen QCheck.Gen.(list_size (0 -- 2) (gen_expr_sized (n / 20)))]) + (** [(1, "Hello world", true)] *) + | List of (expr list[@gen QCheck.Gen.(list_size (0 -- 3) (gen_expr_sized (n / 20)))]) + (** [], [1;2;3] *) + | Variable of ident (** [x], [y] *) + | Unary_expr of unary_operator * expr (** -x *) + | Bin_expr of binary_operator * expr * expr (** [1 + 2], [3 ||| 12], hd :: tl *) + | If_then_else of + (expr[@gen gen_expr_sized (n / 4)]) + * (expr[@gen gen_expr_sized (n / 4)]) + * (expr option[@gen QCheck.Gen.option (gen_expr_sized (n / 4))]) + (** [if n % 2 = 0 then "Even" else "Odd"] *) + | Lambda of + (pattern[@gen gen_pattern_sized (n / 2)]) + * (pattern list[@gen QCheck.Gen.(list_size (0 -- 2) (gen_pattern_sized (n / 20)))]) + * expr (** fun x y -> x + y *) + | Apply of (expr[@gen gen_expr_sized (n / 4)]) * (expr[@gen gen_expr_sized (n / 4)]) + (** [sum 1 ] *) + | Function of + (case[@gen gen_case_sized (n / 4)]) + * (case list[@gen QCheck.Gen.(list_size (0 -- 2) (gen_case_sized (n / 20)))]) + (** [function | p1 -> e1 | p2 -> e2 | ... |]*) + | Match of + (expr[@gen gen_expr_sized (n / 4)]) + * (case[@gen gen_case_sized (n / 4)]) + * (case list[@gen QCheck.Gen.(list_size (0 -- 2) (gen_case_sized (n / 20)))]) + (** [match x with | p1 -> e1 | p2 -> e2 | ...] *) + | LetIn of + is_recursive + * let_bind + * (let_bind list + [@gen QCheck.Gen.(list_size (0 -- 2) (gen_let_bind_sized (n / 20)))]) + * expr (** [let rec f x = if (x <= 0) then x else g x and g x = f (x-2) in f 3] *) + | Option of expr option (** [int option] *) + | EConstraint of expr * (typ[@gen gen_typ_primitive]) +[@@deriving show { with_path = false }, qcheck] + +and let_bind = + | Let_bind of + (pattern[@gen gen_pattern_sized (n / 2)]) + * (pattern list[@gen QCheck.Gen.(list_size (0 -- 3) (gen_pattern_sized (n / 4)))]) + * expr (** [let sum n m = n + m] *) +[@@deriving show { with_path = false }, qcheck] + +let gen_expr = + QCheck.Gen.( + let* n = small_nat in + gen_expr_sized n) +;; + +let gen_let_bind = + QCheck.Gen.( + let* n = small_nat in + gen_let_bind_sized n) +;; + +type statement = + | Let of + is_recursive + * let_bind + * (let_bind list[@gen QCheck.Gen.(list_size (0 -- 2) gen_let_bind)]) + (** [let name = expr] *) +[@@deriving show { with_path = false }, qcheck] + +type construction = + | Expr of expr (** expression *) + | Statement of statement (** statement *) +[@@deriving show { with_path = false }, qcheck] + +type constructions = + (construction list[@gen QCheck.Gen.(list_size (1 -- 2) gen_construction)]) +[@@deriving show { with_path = false }, qcheck] diff --git a/OMLet/lib/codegen.ml b/OMLet/lib/codegen.ml new file mode 100644 index 00000000..41f57e40 --- /dev/null +++ b/OMLet/lib/codegen.ml @@ -0,0 +1,316 @@ +(** Copyright 2025, Ksenia Kotelnikova , Sofya Kozyreva *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open Ast +open CodegenTypes + +(* for creating unique ite and function labels *) +let label_factory = ref 0 + +(* for giving some names to intermediate results stored in registers *) +let fresh_name = ref 0 + +(* for now, this is the only way to write _start label at suitable place and do it exactly once *) +let is_start_label_put = ref false +let start_label = "_start" +let t_regs = [ Temp 0; Temp 1; Temp 2; Temp 3; Temp 4; Temp 5; Temp 6 ] + +let s_regs = + [ Saved 0 + ; Saved 1 + ; Saved 2 + ; Saved 3 + ; Saved 4 + ; Saved 5 + ; Saved 6 + ; Saved 7 + ; Saved 8 + ; Saved 9 + ; Saved 10 + ; Saved 11 + ] +;; + +let a_regs = [ Arg 0; Arg 1; Arg 2; Arg 3; Arg 4; Arg 5; Arg 6; Arg 7 ] + +(* a_regs are used as a "bridge" to new values, so it is unstable to use them for storing *) +let regs = t_regs @ s_regs + +module PlacementMap = Map.Make (String) +module RegStackMap = Map.Make (Int) + +(* return current stack tail and increment it by size being allocated *) +let extend_stack stack size = + let cur_offset = stack in + stack + size, cur_offset +;; + +(* placement will probably be used later, when there is logic for pushing something into stack *) +let find_free_reg free_regs (*placement*) = + match free_regs with + | reg :: tail -> reg, tail + | [] -> failwith "Empty reg list!" +;; + +let make_label name = + let label = Printf.sprintf ".%s_%d" name !label_factory in + label_factory := !label_factory + 1; + label +;; + +let codegen_pat a_regs free_regs stack placement compiled = function + (* add information about pattern storage into placement *) + | PVar (Ident name) -> + let stack, cur_offset = extend_stack stack 8 in + let place = Offset cur_offset in + let placement = PlacementMap.add name place placement in + a_regs, free_regs, stack, placement, compiled + | _ -> failwith "Pattern is not yet implemented" +;; + +(* for every func arg, move them on stack and update placement via codegen_pat *) +let codegen_func_arg arg i (a_regs, free_regs, stack, placement, compiled) = + let _, _, stack, placement, compiled = + codegen_pat a_regs free_regs stack placement compiled arg + in + let compiled = True (StackType (SD, Arg i, Stack ((i + 1) * 8))) :: compiled in + a_regs, free_regs, stack, placement, compiled +;; + +(* used before function application to put all values in t* registers on stack to prevent overwriting *) +let add_unnamed_value_to_placement value_place placement = + let name = string_of_int !fresh_name in + fresh_name := !fresh_name + 1; + PlacementMap.add name value_place placement +;; + +let change_register old_r new_r placement = + let name = PlacementMap.find old_r placement in + PlacementMap.add name new_r placement +;; + +let rec codegen_expr a_regs free_regs stack placement compiled e = + match e with + (* load const into a* register *) + | Const (Int_lt n) -> + let instr = Pseudo (LI (List.hd a_regs, n)) in + a_regs, free_regs, stack, placement, instr :: compiled + (* if var is value on stack, load it into Arg 0, if var is function name (ergo has label), call it *) + | Variable (Ident name) -> + (match PlacementMap.find_opt name placement with + | None -> failwith "Panic: undefined var in codegen!" + | Some (Offset o) -> + let instr = True (StackType (LD, Arg 0, Stack o)) in + a_regs, free_regs, stack, placement, instr :: compiled + | Some (FuncLabel l) -> + let instr = Pseudo (CALL l) in + a_regs, free_regs, stack, placement, instr :: compiled + | Some (Register reg) -> + let instr = Pseudo (MV (Arg 0, reg)) in + a_regs, free_regs, stack, placement, instr :: compiled) + | Bin_expr (op, e1, e2) -> + (* codegen fst expr, move it into t* or s* register and add it to placement in case of application *) + let reg_fst = List.hd a_regs in + let _, _, stack, placement, compiled = + codegen_expr a_regs free_regs stack placement compiled e1 + in + let reg_fst_free, free_regs = find_free_reg free_regs in + let compiled = Pseudo (MV (reg_fst_free, reg_fst)) :: compiled in + let placement = add_unnamed_value_to_placement (Register reg_fst_free) placement in + let reg_snd = List.hd a_regs in + let _, _, stack, placement, compiled = + codegen_expr a_regs free_regs stack placement compiled e2 + in + let reg_snd_free, free_regs = find_free_reg free_regs in + let compiled = Pseudo (MV (reg_snd_free, reg_snd)) :: compiled in + let placement = add_unnamed_value_to_placement (Register reg_snd_free) placement in + (* codegen binop with two pre-codegened exprs *) + let compiled = + match op with + | Binary_add -> True (RType (ADD, Arg 0, reg_fst_free, reg_snd_free)) :: compiled + | Binary_subtract -> + True (RType (SUB, Arg 0, reg_fst_free, reg_snd_free)) :: compiled + | Binary_multiply -> + True (RType (MUL, Arg 0, reg_fst_free, reg_snd_free)) :: compiled + | Binary_less_or_equal -> + True (IType (XORI, Arg 0, Arg 0, 1)) + :: True (RType (SLT, Arg 0, reg_fst_free, reg_snd_free)) + :: compiled + | Binary_greater_or_equal -> + True (IType (XORI, Arg 0, Arg 0, 1)) + :: True (RType (SLT, Arg 0, reg_snd_free, reg_fst_free)) + :: compiled + | Binary_less -> True (RType (SLT, Arg 0, reg_fst_free, reg_snd_free)) :: compiled + | Binary_greater -> + True (RType (SLT, Arg 0, reg_snd_free, reg_fst_free)) :: compiled + | _ -> failwith "Binary op is not yet implemented" + in + a_regs, free_regs, stack, placement, compiled + | If_then_else (cond, thn, els) -> + (* codegen cond expr and store it into t* or s* register *) + let reg_hd = List.hd a_regs in + let _, _, stack, placement, compiled = + codegen_expr a_regs free_regs stack placement compiled cond + in + let reg_cond, free_regs = find_free_reg free_regs in + let compiled = Pseudo (MV (reg_cond, reg_hd)) :: compiled in + let placement = add_unnamed_value_to_placement (Register reg_cond) placement in + (match els with + | Some els -> + let label_else = make_label "else" in + let label_join = make_label "join" in + (* because we want to jump into else when beq Zero 0 => cond should be reversed *) + let compiled = True (IType (XORI, reg_cond, reg_cond, 1)) :: compiled in + let compiled = True (BType (BEQ, Zero, reg_cond, label_else)) :: compiled in + let _, _, stack, placement, compiled = + codegen_expr a_regs free_regs stack placement compiled thn + in + let compiled = Pseudo (J label_join) :: compiled in + let compiled = True (Label label_else) :: compiled in + let _, _, stack, placement, compiled = + codegen_expr a_regs free_regs stack placement compiled els + in + let compiled = True (Label label_join) :: compiled in + a_regs, free_regs, stack, placement, compiled + | None -> + let label_join = make_label "join" in + let compiled = True (BType (BEQ, Zero, reg_cond, label_join)) :: compiled in + let _, _, stack, placement, compiled = + codegen_expr a_regs free_regs stack placement compiled thn + in + let compiled = Pseudo (J label_join) :: compiled in + let compiled = True (Label label_join) :: compiled in + a_regs, free_regs, stack, placement, compiled) + | Apply (func, arg) -> + (* find all t* registers that should be stored *) + let used_temps = + PlacementMap.bindings placement + |> List.filter_map (fun (name, sp) -> + match sp with + | Register (Temp i) -> Some (name, Temp i) + | _ -> None) + in + (* store them on stack with mapping of who is who *) + let stack, compiled, save_map = + List.fold_left + (fun (stack, compiled, save_map) (name, reg) -> + let stack, cur_offset = extend_stack stack 8 in + let instr = True (StackType (SD, reg, Stack cur_offset)) in + stack, instr :: compiled, (name, (reg, cur_offset)) :: save_map) + (stack, compiled, []) + used_temps + in + (* codegen both argument and function exprs *) + let _, _, stack, placement, compiled = + codegen_expr a_regs free_regs stack placement compiled arg + in + let _, _, stack, placement, compiled = + codegen_expr a_regs free_regs stack placement compiled func + in + (* put all values back into corresponding registers, cleaning stack back *) + let stack, compiled = + List.fold_left + (fun (stack, compiled) (_, (reg, offset)) -> + let instr = True (StackType (LD, reg, Stack offset)) in + stack - 8, instr :: compiled) + (stack, compiled) + save_map + in + a_regs, free_regs, stack, placement, compiled + | LetIn (is_rec, let_b, [], expr) -> + let _, _, stack, placement, compiled = + codegen_let_bind a_regs free_regs stack placement compiled (is_rec, let_b) + in + let _, _, stack, placement, compiled = + codegen_expr a_regs free_regs stack placement compiled expr + in + a_regs, free_regs, stack, placement, compiled + | _ -> failwith "Expr is not yet codegened" + +and codegen_let_bind a_regs free_regs global_stack placement compiled = function + | _, Let_bind (PVar (Ident name), [], expr) -> + (*let compiled = if (not !is_start_label_put) then + True(Label(start_label)) :: compiled + else compiled + in + is_start_label_put := true;*) + + (*let required_stack = 64 in + let fresh_stack = global_stack in + let compiled = True(IType(ADDI, Sp, Sp, -required_stack)) :: compiled in*) + let _, _, global_stack, placement, compiled = + codegen_expr a_regs free_regs global_stack placement compiled expr + in + let global_stack, cur_offset = extend_stack global_stack 8 in + let placement = PlacementMap.add name (Offset cur_offset) placement in + let instr = True (StackType (SD, Arg 0, Stack cur_offset)) in + a_regs, free_regs, global_stack, placement, instr :: compiled + | _, Let_bind (PVar (Ident name), args, expr) -> + (* manage function label *) + let func_label = make_label name in + let compiled = True (Label func_label) :: compiled in + let placement = PlacementMap.add name (FuncLabel func_label) placement in + (* (number of args + ra placement) * 32 bytes -- temporary *) + let required_stack = (List.length args + 1) * 32 in + let fresh_stack = 0 in + let compiled = True (IType (ADDI, Sp, Sp, -required_stack)) :: compiled in + (* put ra and args on stack *) + let compiled = True (StackType (SD, Ra, Stack 0)) :: compiled in + let fresh_stack = fresh_stack + 8 in + let a_regs, free_regs, stack, new_placement, compiled = + List.fold_left + (fun acc (i, arg) -> codegen_func_arg arg i acc) + (a_regs, free_regs, fresh_stack, placement, compiled) + (List.mapi (fun i arg -> i, arg) args) + in + let _, _, _, _, compiled = + codegen_expr a_regs free_regs stack new_placement compiled expr + in + let compiled = True (StackType (LD, Ra, Stack 0)) :: compiled in + let compiled = True (IType (ADDI, Sp, Sp, required_stack)) :: compiled in + let compiled = Pseudo RET :: compiled in + a_regs, free_regs, global_stack, placement, compiled + | _ -> failwith "Let bind type is not yet implemented" +;; + +let codegen_statement a_regs free_regs global_stack placement compiled = function + | Let (is_rec, let_b, []) -> + let is_global, compiled = + match !is_start_label_put, let_b with + | false, Let_bind (PVar _, [], _) -> + is_start_label_put := true; + (* initializing global stack *) + true, True (IType (ADDI, Sp, Sp, -64)) :: True (Label start_label) :: compiled + | _, _ -> false, compiled + in + let _, _, global_stack, placement, compiled = + codegen_let_bind a_regs free_regs global_stack placement compiled (is_rec, let_b) + in + let compiled = + if is_global then True (IType (ADDI, Sp, Sp, 64)) :: compiled else compiled + in + a_regs, free_regs, global_stack, placement, compiled + | _ -> failwith "Statement is not yet implemented" +;; + +let codegen_construction a_regs free_regs global_stack placement compiled = function + | Expr e -> codegen_expr a_regs free_regs global_stack placement compiled e + | Statement s -> codegen_statement a_regs free_regs global_stack placement compiled s +;; + +let codegen cs = + let placement = PlacementMap.empty in + let placement = PlacementMap.add "print_int" (FuncLabel "print_int") placement in + let free_regs = regs in + let global_stack = 64 in + let _, _, _, _, instructions = + List.fold_left + (fun (a_regs, free_regs, stack, placement, compiled) c -> + codegen_construction a_regs free_regs stack placement compiled c) + (a_regs, free_regs, global_stack, placement, []) + cs + in + let instructions = True Ecall :: Pseudo (LI (Arg 7, 93)) :: instructions in + List.rev instructions +;; diff --git a/OMLet/lib/codegenTypes.ml b/OMLet/lib/codegenTypes.ml new file mode 100644 index 00000000..e9daa058 --- /dev/null +++ b/OMLet/lib/codegenTypes.ml @@ -0,0 +1,166 @@ +(** Copyright 2025, Ksenia Kotelnikova , Sofya Kozyreva *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open Format + +type reg = + | Zero + | Ra + | Sp + | Stack of int (* offset from sp *) + | Temp of int + | Saved of int + | Arg of int + +(* for mapping names with the way they can be reached *) +type storage_place = + | Offset of int (* for variables on stack *) + | FuncLabel of string (* for functions with labels *) + | Register of reg (* for values (typically, patterns) in registers *) + +let temp i = Temp i +let saved i = Saved i +let arg i = Arg i + +(* small subset enough to codegen factorial *) +type rtype_op = + | ADD + | SUB + | MUL + | AND + | OR + | XOR + | SLT + +type itype_op = + | ADDI + (* | LI *) + | JALR + | SLTI + | XORI + +type stack_op = + | LW + | SW + | LD + | SD + +(* type stype_op = SW *) + +type btype_op = + | BEQ + | BNE + | BLE + +type utype_op = + | LUI + | AUIPC + +type jtype_op = JAL + +type pseudo_instr = + | LI of reg * int + | MV of reg * reg + | J of string + | RET + | CALL of string + +type true_instr = + | RType of rtype_op * reg * reg * reg (* op rd rs1 rs2 *) + | IType of itype_op * reg * reg * int (* op rd rs1 imm *) + | StackType of stack_op * reg * reg (* op rd offset(rs) *) + (*| SType of stype_op * reg * reg * int (* op rs2 rs1 imm *) *) + | BType of btype_op * reg * reg * string (* op rs1 rs2 imm/label offset *) + | UType of utype_op * reg * int (* op rd imm *) + | JType of jtype_op * reg * int (* op rd imm/label offset *) + | Label of string + | Ecall + +type instr = + | True of true_instr + | Pseudo of pseudo_instr + +let pp_reg fmt = function + | Zero -> fprintf fmt "x0" + | Ra -> fprintf fmt "ra" + | Sp -> fprintf fmt "sp" + | Stack o -> fprintf fmt "%d(sp)" o + | Temp i -> fprintf fmt "t%d" i + | Saved i -> fprintf fmt "s%d" i + | Arg i -> fprintf fmt "a%d" i +;; + +let pp_rtype_op fmt = function + | ADD -> fprintf fmt "add" + | SUB -> fprintf fmt "sub" + | MUL -> fprintf fmt "mul" + | AND -> fprintf fmt "and" + | OR -> fprintf fmt "or" + | XOR -> fprintf fmt "xor" + | SLT -> fprintf fmt "slt" +;; + +let pp_itype_op fmt = function + | ADDI -> fprintf fmt "addi" + (* | LI -> fprintf fmt "li" *) + | JALR -> fprintf fmt "jalr" + | SLTI -> fprintf fmt "slti" + | XORI -> fprintf fmt "xori" +;; + +let pp_stack_op fmt = function + | LW -> fprintf fmt "lw" + | SW -> fprintf fmt "sw" + | LD -> fprintf fmt "ld" + | SD -> fprintf fmt "sd" +;; + +(* let pp_stype_op fmt = function + | SW -> fprintf fmt "sw" +;; *) + +let pp_btype_op fmt = function + | BEQ -> fprintf fmt "beq" + | BNE -> fprintf fmt "bne" + | BLE -> fprintf fmt "ble" +;; + +let pp_utype_op fmt = function + | LUI -> fprintf fmt "lui" + | AUIPC -> fprintf fmt "auipc" +;; + +let pp_jtype_op fmt = function + | JAL -> fprintf fmt "jal" +;; + +let pp_pseudo_instr fmt = function + | LI (r, imm) -> fprintf fmt "@[\tli %a, %d@]@." pp_reg r imm + | MV (r1, r2) -> fprintf fmt "@[\tmv %a, %a@]@." pp_reg r1 pp_reg r2 + | J l -> fprintf fmt "@[\tj %s@]@." l + | RET -> fprintf fmt "@[\tret @]@." + | CALL l -> fprintf fmt "@[\tcall %s@]@." l +;; + +let pp_true_instr fmt = function + | RType (op, rd, rs1, rs2) -> + fprintf fmt "@[\t%a %a, %a, %a@]@." pp_rtype_op op pp_reg rd pp_reg rs1 pp_reg rs2 + | IType (op, rd, rs1, imm) -> + fprintf fmt "@[\t%a %a, %a, %d@]@." pp_itype_op op pp_reg rd pp_reg rs1 imm + | StackType (op, rd, rs) -> + fprintf fmt "@[\t%a %a, %a@]@." pp_stack_op op pp_reg rd pp_reg rs + (* | SType (op, rs2, rs1, imm) -> + fprintf fmt "@[\t%a %a, %d(%a)@]@." pp_stype_op op pp_reg rs2 imm pp_reg rs1 *) + | BType (op, rs1, rs2, l) -> + fprintf fmt "@[\t%a %a, %a, %s@]@." pp_btype_op op pp_reg rs1 pp_reg rs2 l + | UType (op, rd, imm) -> fprintf fmt "@[\t%a %a, %d@]@." pp_utype_op op pp_reg rd imm + | JType (op, rd, imm) -> fprintf fmt "@[\t%a %a, %d@]@." pp_jtype_op op pp_reg rd imm + | Label l -> fprintf fmt "%s:@." l + | Ecall -> fprintf fmt "ecall@." +;; + +let pp_instr = function + | True i -> pp_true_instr std_formatter i + | Pseudo i -> pp_pseudo_instr std_formatter i +;; diff --git a/OMLet/lib/dune b/OMLet/lib/dune new file mode 100644 index 00000000..4f304cb7 --- /dev/null +++ b/OMLet/lib/dune @@ -0,0 +1,18 @@ +(library + (name OMLet) + (public_name OMLet) + (modules + Ast + Parser + PrettyPrinter + KeywordChecker + TypesPp + TypedTree + Codegen + CodegenTypes) + (libraries base angstrom qcheck-core qcheck-core.runner) + (instrumentation + (backend bisect_ppx)) + (preprocess + (pps ppx_deriving.show ppx_inline_test ppx_expect ppx_deriving_qcheck)) + (inline_tests)) diff --git a/OMLet/lib/keywordChecker.ml b/OMLet/lib/keywordChecker.ml new file mode 100644 index 00000000..ff3687d1 --- /dev/null +++ b/OMLet/lib/keywordChecker.ml @@ -0,0 +1,27 @@ +(** Copyright 2024-2025, Ksenia Kotelnikova , Gleb Nasretdinov *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +let is_keyword = function + | "if" + | "then" + | "else" + | "let" + | "in" + | "not" + | "true" + | "false" + | "fun" + | "match" + | "with" + | "and" + | "Some" + | "None" + | "function" + | "->" + | "|" + | ":" + | "::" + | "_" -> true + | _ -> false +;; diff --git a/OMLet/lib/parser.ml b/OMLet/lib/parser.ml new file mode 100644 index 00000000..b7118cd2 --- /dev/null +++ b/OMLet/lib/parser.ml @@ -0,0 +1,435 @@ +(** Copyright 2024, Sofya Kozyreva, Maksim Shipilov *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open Angstrom +open Base +open Ast +open TypedTree + +(*---------------------Control characters---------------------*) + +let pwhitespace = take_while Char.is_whitespace +let pws1 = take_while1 Char.is_whitespace +let pstoken s = pwhitespace *> string s +let ptoken s = pwhitespace *> s +let pparens p = pstoken "(" *> p <* pstoken ")" +let psqparens p = pstoken "[" *> p <* pstoken "]" + +(*------------------Prefix operators-----------------*) + +let ppref_op = + let pref_op = + ptoken + (let* first_char = + take_while1 (function + | '|' + | '~' + | '?' + | '<' + | '>' + | '!' + | '&' + | '*' + | '/' + | '=' + | '+' + | '-' + | '@' + | '^' -> true + | _ -> false) + in + let* rest = + take_while (function + | '.' + | ':' + | '|' + | '~' + | '?' + | '<' + | '>' + | '!' + | '&' + | '*' + | '/' + | '=' + | '+' + | '-' + | '@' + | '^' -> true + | _ -> false) + in + match first_char, rest with + | "|", "" -> fail "Prefix operator cannot be called | " + | "~", "" -> fail "Prefix operator cannot be called ~ " + | "?", "" -> fail "Prefix operator cannot be called ? " + | _ -> return (Ident (first_char ^ rest))) + in + pparens pref_op +;; + +let pEinf_op pexpr = + ppref_op + >>= fun inf_op -> + lift2 + (fun left right -> Apply (Apply (Variable inf_op, left), right)) + (pws1 *> pexpr) + (pwhitespace *> pexpr) +;; + +(* let pEinf_op = + pwhitespace *> pinf_op >>= fun inf_op -> return (fun e1 e2 -> Efun_application (Efun_application (Evar inf_op, e1), e2)) + ;; *) + +(*-------------------------Constants/Variables-------------------------*) + +let pint = + pwhitespace *> take_while1 Char.is_digit + >>= fun str -> + match Stdlib.int_of_string_opt str with + | Some n -> return (Int_lt n) + | None -> fail "Integer value exceeds the allowable range for the int type" +;; + +let pbool = + choice [ pstoken "true" *> return true; pstoken "false" *> return false ] + >>| fun x -> Bool_lt x +;; + +let pstr = + pwhitespace *> char '"' *> take_till (Char.equal '"') + <* char '"' + >>| fun x -> String_lt x +;; + +let punit = pstoken "()" *> return Unit_lt +let const = choice [ pint; pbool; pstr; punit ] + +let varname = + ptoken + (let* first_char = + take_while1 (fun ch -> Char.is_lowercase ch || Char.equal ch '_') + in + let* rest = + take_while (fun ch -> + Char.is_alpha ch || Char.is_digit ch || Char.equal ch '_' || Char.equal ch '\'') + in + match first_char, rest with + | _, _ when KeywordChecker.is_keyword (first_char ^ rest) -> + fail "Variable name conflicts with a keyword" + | "_", "" -> fail "Variable cannot be called _" + | _ -> return (first_char ^ rest)) +;; + +let patomic_type = + choice + [ pstoken "int" *> return (Primitive "int") + ; pstoken "string" *> return (Primitive "string") + ; pstoken "bool" *> return (Primitive "bool") + ; pstoken "unit" *> return (Primitive "unit") + ] +;; + +let plist_type ptype_opt = ptype_opt >>= fun t -> pstoken "list" *> return (Type_list t) + +let ptuple_type ptype_opt = + let star = pstoken "*" in + lift3 + (fun t1 t2 rest -> Type_tuple (t1, t2, rest)) + ptype_opt + (star *> ptype_opt) + (many (star *> ptype_opt)) +;; + +let rec pfun_type ptype_opt = + ptype_opt + >>= fun left -> + pstoken "->" *> pfun_type ptype_opt + >>= (fun right -> return (Arrow (left, right))) + <|> return left +;; + +let poption_type ptype_opt = ptype_opt >>= fun t -> pstoken "option" *> return (TOption t) +(* let precord_type = varname >>= fun t -> return (TRecord t) *) + +let ptype_helper = + fix (fun typ -> + (* let atom = patomic_type <|> pparens typ <|> precord_type in *) + let atom = patomic_type <|> pparens typ in + let list = plist_type atom <|> atom in + let option = poption_type list <|> list in + let tuple = ptuple_type option <|> option in + let func = pfun_type tuple <|> tuple in + func) +;; + +let ptype = + let t = ptype_helper in + pstoken ":" *> t +;; + +let pident = lift (fun t -> Ident t) varname <|> ppref_op +let pat_var = pident >>| fun x -> PVar x +let pat_const = const >>| fun x -> PConst x +let pat_any = pstoken "_" *> return Wild + +let pat_tuple pat = + let commas = pstoken "," in + let tuple = + lift3 + (fun p1 p2 rest -> PTuple (p1, p2, rest)) + pat + (commas *> pat) + (many (commas *> pat)) + <* pwhitespace + in + pparens tuple <|> tuple +;; + +let pat_list pat = + let semicols = pstoken ";" in + psqparens (sep_by semicols pat >>| fun patterns -> PList patterns) +;; + +let rec pat_cons pat = + let cons = + pat + >>= fun head -> + pstoken "::" *> pat_cons pat + >>= (fun tail -> return (PCons (head, tail))) + <|> return head + in + pparens cons <|> cons +;; + +let pat_option pat = + lift + (fun e -> POption e) + (pstoken "Some" *> pat >>| (fun e -> Some e) <|> (pstoken "None" >>| fun _ -> None)) +;; + +let pat_ty pat = + let ty_pat = lift2 (fun pat ty -> PConstraint (pat, ty)) pat ptype in + ty_pat <|> pparens ty_pat +;; + +let ppattern = + fix (fun pat -> + let patom = + pat_const <|> pat_var <|> pat_any <|> pparens pat <|> pparens (pat_ty pat) + in + let poption = pat_option patom <|> patom in + let pptuple = pat_tuple poption <|> poption in + let pplist = pat_list pptuple <|> pptuple in + let pcons = pat_cons pplist <|> pplist in + let pty = pat_ty pcons <|> pcons in + pty) +;; + +(*------------------Binary operators-----------------*) + +let pbinop op token = + pwhitespace *> pstoken token *> return (fun e1 e2 -> Bin_expr (op, e1, e2)) +;; + +let add = pbinop Binary_add "+" +let sub = pbinop Binary_subtract "-" +let mult = pbinop Binary_multiply "*" +let div = pbinop Binary_divide "/" + +let relation = + choice + [ pbinop Binary_equal "=" + ; pbinop Binary_unequal "<>" + ; pbinop Binary_less_or_equal "<=" + ; pbinop Binary_greater_or_equal ">=" + ; pbinop Binary_less "<" + ; pbinop Binary_greater ">" + ] +;; + +let logic = choice [ pbinop Logical_and "&&"; pbinop Logical_or "||" ] +let cons = pbinop Binary_cons "::" + +(*------------------Unary operators-----------------*) + +let punop op token = + pwhitespace *> pstoken token *> return (fun e1 -> Unary_expr (op, e1)) +;; + +let negation = punop Unary_not "not" <* pws1 +let neg_sign = punop Unary_minus "-" +(* let pos_sign = punop Positive "+" *) + +(*------------------------Expressions----------------------*) + +let chain e op = + let rec go acc = lift2 (fun f x -> f acc x) op e >>= go <|> return acc in + e >>= go +;; + +let rec chainr e op = + let* left = e in + (let* f = op in + let* right = chainr e op in + return (f left right)) + <|> return left +;; + +let un_chain e op = + fix (fun self -> op >>= (fun unop -> self >>= fun e -> return (unop e)) <|> e) +;; + +let rec pbody pexpr = + ppattern + >>= fun p -> + many ppattern + >>= fun patterns -> + pbody pexpr <|> (pstoken "=" *> pexpr >>| fun e -> Lambda (p, patterns, e)) +;; + +let p_let_bind p_expr = + let* name = ppattern <|> (pparens ppref_op >>| fun oper -> PVar oper) in + let* args = many ppattern in + let* body = pstoken "=" *> p_expr in + return (Let_bind (name, args, body)) +;; + +let plet pexpr = + pstoken "let" + *> lift4 + (fun rec_flag value_bindings and_bindings body -> + LetIn (rec_flag, value_bindings, and_bindings, body)) + (pstoken "rec" *> (pws1 *> return Rec) <|> return Nonrec) + (p_let_bind pexpr) + (many (pstoken "and" *> p_let_bind pexpr)) + (pstoken "in" *> pexpr) +;; + +let pEfun pexpr = + (* if there's only one argument, ascription without parentheses is possible *) + let single_arg = + lift2 + (fun arg body -> Lambda (arg, [], body)) + (pstoken "fun" *> pws1 *> ppattern) + (pstoken "->" *> pexpr) + in + let mult_args = + lift3 + (fun arg args body -> Lambda (arg, args, body)) + (pstoken "fun" *> pws1 *> ppattern) + (many ppattern) + (pstoken "->" *> pexpr) + in + single_arg <|> mult_args +;; + +let pElist pexpr = + let semicols = pstoken ";" in + psqparens (sep_by semicols pexpr <* (semicols <|> pwhitespace) >>| fun x -> List x) +;; + +let pEtuple pexpr = + let commas = pstoken "," in + let tuple = + lift3 + (fun e1 e2 rest -> Tuple (e1, e2, rest)) + (pexpr <* commas) + pexpr + (many (commas *> pexpr)) + <* pwhitespace + in + pparens tuple <|> tuple +;; + +let pEconst = const >>| fun x -> Const x +let pEvar = pident >>| fun x -> Variable x +let pEapp e = chain e (return (fun e1 e2 -> Apply (e1, e2))) + +let pEoption pexpr = + lift + (fun e -> Option e) + (pstoken "Some" *> pexpr >>| (fun e -> Some e) <|> (pstoken "None" >>| fun _ -> None)) +;; + +let pbranch pexpr = + lift3 + (fun e1 e2 e3 -> If_then_else (e1, e2, e3)) + (pstoken "if" *> pexpr) + (pstoken "then" *> pexpr) + (pstoken "else" *> pexpr >>| (fun e3 -> Some e3) <|> return None) +;; + +let pEmatch pexpr = + let parse_case = + lift2 (fun pat exp -> pat, exp) (ppattern <* pstoken "->") (pwhitespace *> pexpr) + in + let match_cases = + lift3 + (fun e case case_l -> Match (e, case, case_l)) + (pstoken "match" *> pexpr <* pstoken "with") + ((pstoken "|" <|> pwhitespace) *> parse_case) + (many (pstoken "|" *> parse_case)) + in + let function_cases = + lift2 + (fun case case_l -> Function (case, case_l)) + (pstoken "function" *> pstoken "|" *> parse_case + <|> pstoken "function" *> pwhitespace *> parse_case) + (many (pstoken "|" *> parse_case)) + in + function_cases <|> match_cases +;; + +let pEconstraint pexpr = lift2 (fun expr t -> EConstraint (expr, t)) pexpr ptype + +let pexpr = + fix (fun expr -> + let atom_expr = + choice + [ pEconst + ; pEvar + ; pparens expr + ; pElist expr + ; pEfun expr + ; pEoption expr + ; pEmatch expr (* ; pErecord expr *) + ; pparens (pEconstraint expr) + ] + in + let let_expr = plet expr in + let ite_expr = pbranch (expr <|> atom_expr) <|> atom_expr in + let inf_op = pEinf_op (ite_expr <|> atom_expr) <|> ite_expr in + let app_expr = pEapp (inf_op <|> atom_expr) <|> inf_op in + let un_expr = choice [ un_chain app_expr negation; un_chain app_expr neg_sign ] in + let factor_expr = chain un_expr (mult <|> div) in + let sum_expr = chain factor_expr (add <|> sub) in + let rel_expr = chain sum_expr relation in + let log_expr = chain rel_expr logic in + let tuple_expr = pEtuple log_expr <|> log_expr in + (* let field_expr = pEfield_access tuple_expr <|> tuple_expr in + let cons_expr = chainr field_expr cons in *) + let cons_expr = chainr tuple_expr cons in + choice [ let_expr; cons_expr ]) +;; + +let pconstruction = + let pseval = pexpr >>| fun e -> Expr e in + let psvalue = + pstoken "let" + *> lift3 + (fun r id id_list -> Let (r, id, id_list)) + (pstoken "rec" *> (pws1 *> return Rec) <|> return Nonrec) + (p_let_bind pexpr) + (many (pstoken "and" *> p_let_bind pexpr)) + >>| fun s -> Statement s + in + choice [ pseval; psvalue ] +;; + +let pconstructions = + let semicolons = many (pstoken ";;") in + sep_by semicolons pconstruction <* semicolons <* pwhitespace +;; + +let parse str = parse_string ~consume:All pconstructions str diff --git a/OMLet/lib/prettyPrinter.ml b/OMLet/lib/prettyPrinter.ml new file mode 100644 index 00000000..5233b403 --- /dev/null +++ b/OMLet/lib/prettyPrinter.ml @@ -0,0 +1,153 @@ +(** Copyright 2024-2025, Ksenia Kotelnikova , Gleb Nasretdinov *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open Ast +open Format +open TypesPp + +let pp_bin_op fmt = function + | Binary_equal -> fprintf fmt "= " + | Binary_unequal -> fprintf fmt "<> " + | Binary_less -> fprintf fmt "< " + | Binary_less_or_equal -> fprintf fmt "<= " + | Binary_greater -> fprintf fmt "> " + | Binary_greater_or_equal -> fprintf fmt ">= " + | Binary_add -> fprintf fmt "+ " + | Binary_subtract -> fprintf fmt "- " + | Binary_multiply -> fprintf fmt "* " + | Logical_or -> fprintf fmt "|| " + | Logical_and -> fprintf fmt "&& " + | Binary_divide -> fprintf fmt "/ " + | Binary_or_bitwise -> fprintf fmt "||| " + | Binary_xor_bitwise -> fprintf fmt "^^^ " + | Binary_and_bitwise -> fprintf fmt "&&& " + | Binary_cons -> fprintf fmt "::" +;; + +let pp_unary_op fmt = function + | Unary_minus -> fprintf fmt "-" + | Unary_not -> fprintf fmt "not " +;; + +let pp_rec_flag fmt = function + | Rec -> fprintf fmt "rec" + | Nonrec -> () +;; + +let rec pp_pattern fmt = function + | Wild -> fprintf fmt "_ " + | PList l -> + fprintf fmt "["; + pp_print_list ~pp_sep:(fun fmt () -> fprintf fmt "; ") pp_pattern fmt l; + fprintf fmt "]" + | PCons (l, r) -> fprintf fmt "(%a) :: (%a) " pp_pattern l pp_pattern r + | PTuple (p1, p2, rest) -> + fprintf fmt "("; + pp_print_list + ~pp_sep:(fun fmt () -> fprintf fmt ", ") + pp_pattern + fmt + (p1 :: p2 :: rest); + fprintf fmt ")" + | PConst literal -> fprintf fmt "%a " pp_expr (Const literal) + | PVar (Ident name) -> fprintf fmt "%s " name + | POption p -> + (match p with + | None -> fprintf fmt "None " + | Some p -> fprintf fmt "Some (%a) " pp_pattern p) + | PConstraint (p, t) -> fprintf fmt "(%a : %a) " pp_pattern p pp_typ t + +and pp_expr fmt expr = + match expr with + | Const (Int_lt i) -> fprintf fmt "%d " i + | Const (Bool_lt b) -> fprintf fmt "%b " b + | Const (String_lt s) -> fprintf fmt "%S" s + | Const Unit_lt -> fprintf fmt "() " + | List l -> + fprintf fmt "["; + pp_print_list ~pp_sep:(fun fmt () -> fprintf fmt "; ") pp_expr fmt l; + fprintf fmt "]" + | Tuple (e1, e2, rest) -> + fprintf fmt "("; + pp_print_list + ~pp_sep:(fun fmt () -> fprintf fmt ", ") + pp_parens_expr + fmt + (e1 :: e2 :: rest); + fprintf fmt ")" + | Function ((pat1, expr1), cases) -> + fprintf fmt "function "; + List.iter + (fun (pat, expr) -> fprintf fmt "| %a -> (%a) \n" pp_pattern pat pp_expr expr) + ((pat1, expr1) :: cases) + | Match (value, (pat1, expr1), cases) -> + fprintf fmt "match (%a) with \n" pp_expr value; + List.iter + (fun (pat, expr) -> fprintf fmt "| %a -> (%a) \n" pp_pattern pat pp_expr expr) + ((pat1, expr1) :: cases) + | Variable (Ident name) -> fprintf fmt "%s " name + | Unary_expr (op, expr) -> fprintf fmt "%a (%a)" pp_unary_op op pp_expr expr + | Bin_expr (op, left, right) -> + fprintf fmt "(%a) %a (%a)" pp_expr left pp_bin_op op pp_expr right + | If_then_else (cond, then_body, else_body) -> + fprintf fmt "if (%a) then (%a) " pp_expr cond pp_expr then_body; + (match else_body with + | Some body -> fprintf fmt "else %a " pp_expr body + | None -> ()) + | Lambda (arg1, args, body) -> + fprintf fmt "fun "; + List.iter (fun pat -> fprintf fmt "(%a) " pp_pattern pat) (arg1 :: args); + fprintf fmt "-> %a " pp_expr body + | Apply (Apply (Variable (Ident op), left), right) + when String.for_all (fun c -> String.contains "!$%&*+-./:<=>?@^|~" c) op -> + fprintf fmt "(%a) %s (%a)" pp_expr left op pp_expr right + | Apply (func, arg) -> fprintf fmt "(%a) %a" pp_expr func pp_expr arg + | LetIn (rec_flag, let_bind, let_bind_list, in_expr) -> + fprintf fmt "let %a " pp_rec_flag rec_flag; + pp_print_list + ~pp_sep:(fun fmt () -> fprintf fmt "\n\nand ") + pp_let_bind + fmt + (let_bind :: let_bind_list); + fprintf fmt "in\n"; + fprintf fmt "%a " pp_expr in_expr + | Option e -> + (match e with + | None -> fprintf fmt "None " + | Some e -> fprintf fmt "Some (%a)" pp_expr e) + | EConstraint (e, t) -> fprintf fmt "(%a : %a) " pp_expr e pp_typ t + +and pp_args fmt args = + let open Format in + pp_print_list + ~pp_sep:pp_print_space + (fun fmt arg -> fprintf fmt "%a" pp_pattern arg) + fmt + args + +and pp_let_bind fmt = function + | Let_bind (name, args, body) -> + fprintf fmt "%a %a = %a " pp_pattern name pp_args args pp_expr body + +and pp_parens_expr fmt expr = fprintf fmt "(%a)" pp_expr expr + +let pp_statement fmt = function + | Let (rec_flag, let_bind, let_bind_list) -> + fprintf fmt "let %a " pp_rec_flag rec_flag; + pp_print_list + ~pp_sep:(fun fmt () -> fprintf fmt "\n\nand ") + pp_let_bind + fmt + (let_bind :: let_bind_list) +;; + +let pp_construction fmt = function + | Expr e -> fprintf fmt "%a\n" pp_expr e + | Statement s -> fprintf fmt "%a\n" pp_statement s +;; + +let pp_p_res fmt = function + | Some c -> pp_construction fmt c + | None -> fprintf fmt "Error occured\n" +;; diff --git a/OMLet/lib/typedTree.ml b/OMLet/lib/typedTree.ml new file mode 100644 index 00000000..cb70142f --- /dev/null +++ b/OMLet/lib/typedTree.ml @@ -0,0 +1,53 @@ +(** Copyright 2024-2025, Ksenia Kotelnikova , Gleb Nasretdinov *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +type binder = int [@@deriving show { with_path = false }, qcheck] + +type typ = + | Primitive of (string[@gen gen_primitive]) + | Type_var of binder + | Arrow of typ * typ + | Type_list of typ + | Type_tuple of typ * typ * typ list + | TOption of typ + | TActPat of string * typ (** [Even(int)] *) + | Choice of (string, typ, Base.String.comparator_witness) Base.Map.t + (** [Choice] *) +(* Map of Name/typ is Choice of , Name/typ is equiavalent to TActPat *) + +let choice_to_list ch = + Base.List.map (Base.Map.to_alist ch) ~f:(fun (name, typ) -> TActPat (name, typ)) +;; + +let choice_set_many map list = + Base.List.fold ~init:map list ~f:(fun map (name, typ) -> + Base.Map.set map ~key:name ~data:typ) +;; + +let gen_typ_primitive = + QCheck.Gen.(oneofl [ "string"; "int"; "unit"; "bool" ] >|= fun t -> Primitive t) +;; + +let arrow_of_types first_types last_type = + let open Base in + List.fold_right first_types ~init:last_type ~f:(fun left right -> Arrow (left, right)) +;; + +module VarSet = struct + include Stdlib.Set.Make (Int) + + let pp fmt s = + Format.fprintf fmt "[ "; + iter (Format.fprintf fmt "%d; ") s; + Format.fprintf fmt "]" + ;; +end + +type binder_set = VarSet.t [@@deriving show { with_path = false }] +type scheme = Scheme of binder_set * typ + +let int_typ = Primitive "int" +let bool_typ = Primitive "bool" +let string_typ = Primitive "string" +let unit_typ = Primitive "unit" diff --git a/OMLet/lib/typesPp.ml b/OMLet/lib/typesPp.ml new file mode 100644 index 00000000..e129bb56 --- /dev/null +++ b/OMLet/lib/typesPp.ml @@ -0,0 +1,35 @@ +(** Copyright 2024-2025, Ksenia Kotelnikova , Gleb Nasretdinov *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open TypedTree +open Format + +let rec pp_typ fmt = function + | Primitive s -> fprintf fmt "%s" s + | Type_var var -> fprintf fmt "'%d" var + | Arrow (fst, snd) -> + (match fst with + | Arrow _ -> fprintf fmt "(%a) -> %a" pp_typ fst pp_typ snd + | _ -> fprintf fmt "%a -> %a" pp_typ fst pp_typ snd) + | Type_list typ -> fprintf fmt "%a list" pp_typ typ + | Type_tuple (first, second, rest) -> + Format.pp_print_list + ~pp_sep:(fun fmt () -> fprintf fmt " * ") + (fun fmt typ -> + match typ with + | Arrow _ -> fprintf fmt "(%a)" pp_typ typ + | _ -> pp_typ fmt typ) + fmt + (first :: second :: rest) + | TOption t -> + (match t with + | Type_tuple _ | Arrow _ -> fprintf fmt "(%a) option" pp_typ t + | t -> fprintf fmt "%a option" pp_typ t) + | TActPat (name, t) -> fprintf fmt "%s (%a)" name pp_typ t + | Choice map -> + fprintf fmt "Choice<"; + (pp_print_list ~pp_sep:(fun fmt () -> fprintf fmt ", ") pp_typ fmt) + (choice_to_list map); + fprintf fmt ">" +;; diff --git a/OMLet/many_tests/.ocamlformat b/OMLet/many_tests/.ocamlformat new file mode 100644 index 00000000..e3346c16 --- /dev/null +++ b/OMLet/many_tests/.ocamlformat @@ -0,0 +1 @@ +disable=true diff --git a/OMLet/many_tests/typed/001fac.ml b/OMLet/many_tests/typed/001fac.ml new file mode 100644 index 00000000..62b91b94 --- /dev/null +++ b/OMLet/many_tests/typed/001fac.ml @@ -0,0 +1,6 @@ +let rec fac n = if n <= 1 then 1 else n * fac (n - 1) + +let main = + let () = print_int (fac 4) in + 0 +;; diff --git a/OMLet/many_tests/typed/dune b/OMLet/many_tests/typed/dune new file mode 100644 index 00000000..f66331bb --- /dev/null +++ b/OMLet/many_tests/typed/dune @@ -0,0 +1,2 @@ +(cram + (deps ./001fac.ml)) diff --git a/OMLet/many_tests/typed/typed.t b/OMLet/many_tests/typed/typed.t new file mode 100644 index 00000000..ab349b10 --- /dev/null +++ b/OMLet/many_tests/typed/typed.t @@ -0,0 +1,2 @@ + $ wc 001fac.ml + 6 30 105 001fac.ml diff --git a/OMLet/out.ll b/OMLet/out.ll new file mode 100644 index 00000000..9f73f8af --- /dev/null +++ b/OMLet/out.ll @@ -0,0 +1,12 @@ +; ModuleID = 'main' +source_filename = "main" +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-pc-linux-gnu" + +declare void @print_int(i64) + +define i64 @main() { +entry: + call void @print_int(i64 70) + ret i64 0 +} diff --git a/OMLet/repl/dune b/OMLet/repl/dune new file mode 100644 index 00000000..2d164fe1 --- /dev/null +++ b/OMLet/repl/dune @@ -0,0 +1,6 @@ +(executable + (name repl) + (public_name OMLet_main) + (libraries OMLet base stdio) + (instrumentation + (backend bisect_ppx))) diff --git a/OMLet/repl/input.txt b/OMLet/repl/input.txt new file mode 100644 index 00000000..e69de29b diff --git a/OMLet/repl/repl.ml b/OMLet/repl/repl.ml new file mode 100644 index 00000000..e621c79f --- /dev/null +++ b/OMLet/repl/repl.ml @@ -0,0 +1,68 @@ +(** Copyright 2024, Sofya Kozyreva, Maksim Shipilov *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open OMLet.Ast +open OMLet.Parser +open OMLet.Codegen +open OMLet.CodegenTypes +open Base +open Stdio + +type stop_after = + | SA_parsing + | SA_never + +type opts = + { mutable dump_parsetree : bool + ; mutable stop_after : stop_after + ; mutable input_file : string option + } + +let eval ast = + ignore (show_constructions ast); + () +;; + +let run_single dump_parsetree stop_after eval input_source = + let text = + match input_source with + | Some file_name -> In_channel.read_all file_name |> Stdlib.String.trim + | None -> In_channel.input_all stdin |> Stdlib.String.trim + in + match parse text with + | Error e -> Stdlib.Format.printf "Parsing error: %s\n%!" e + | Ok ast -> + if dump_parsetree then print_endline (show_constructions ast); + let instructions = codegen ast in + let _ = Stdlib.Format.fprintf Stdlib.Format.std_formatter ".global _start\n" in + let _ = Stdlib.List.iter pp_instr instructions in + (match stop_after with + | SA_parsing -> () + | SA_never -> eval ast) +;; + +let () = + let opts = { dump_parsetree = false; stop_after = SA_never; input_file = None } in + let () = + Stdlib.Arg.parse + [ ( "-dparsetree" + , Stdlib.Arg.Unit (fun () -> opts.dump_parsetree <- true) + , "Dump parse tree, don't evaluate anything" ) + ; ( "-stop-after" + , Stdlib.Arg.String + (function + | "parsing" -> opts.stop_after <- SA_parsing + | _ -> failwith "Bad argument for -stop-after") + , "Stop after parsing" ) + ; ( "-fromfile" + , Stdlib.Arg.String (fun filename -> opts.input_file <- Some filename) + , "Read code from the specified file" ) + ] + (fun _ -> + Stdlib.Format.eprintf "Positional arguments are not supported\n"; + Stdlib.exit 1) + "Read-Eval-Print-Loop for custom language" + in + run_single opts.dump_parsetree opts.stop_after (fun ast -> eval ast) opts.input_file +;; diff --git a/OMLet/test_qc/dune b/OMLet/test_qc/dune new file mode 100644 index 00000000..9ca61266 --- /dev/null +++ b/OMLet/test_qc/dune @@ -0,0 +1,13 @@ +(executable + (name test_qc) + (modules test_qc) + (libraries qcheck angstrom) + (preprocess + (pps ppx_deriving_qcheck ppx_deriving.show))) + +(rule + (alias runtest) + (deps + (:< test_qc.exe)) + (action + (run %{<}))) diff --git a/OMLet/test_qc/test_qc.ml b/OMLet/test_qc/test_qc.ml new file mode 100644 index 00000000..fa125b7f --- /dev/null +++ b/OMLet/test_qc/test_qc.ml @@ -0,0 +1,98 @@ +[@@@ocaml.text "/*"] + +(** Copyright 2023-2024, Kakadu and contributors *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +[@@@ocaml.text "/*"] + +(* run this test via `dune test --force` *) + +module AST = struct + type t = + | Const of (int[@gen QCheck.Gen.return 1]) + | Add of t * t + [@@deriving qcheck, show { with_path = false }] +end + +module PP = struct + let rec pp ppf = function + | AST.Const n -> Format.fprintf ppf "%d" n + (* | Add (l, r) -> Format.fprintf ppf "%a+%a" pp l pp r *) + | Add (l, r) -> Format.fprintf ppf "(%a+%a)" pp l pp r + ;; +end + +module Parser = struct + open Angstrom + + let prio expr table = + let len = Array.length table in + let rec helper level = + if level >= len + then expr + else ( + let xs = table.(level) in + return (List.fold_left (fun acc (op, r) -> op acc r)) + <*> helper (level + 1) + <*> many + (choice + (List.map + (fun (op, f) -> op *> helper (level + 1) >>= fun r -> return (f, r)) + xs))) + in + helper 0 + ;; + + let expr_small = + let code0 = Char.code '0' in + Angstrom.satisfy (function + | '0' .. '9' -> true + | _ -> false) + >>| fun c -> AST.Const (Char.code c - code0) + ;; + + let expr = + fix (fun self -> + let add a b = AST.Add (a, b) in + prio (expr_small <|> (char '(' *> self <* char ')')) [| [ char '+', add ] |]) + ;; +end + +let rec shrink_expr = + let open QCheck.Iter in + (* fun _ -> empty *) + function + | AST.Const _ -> empty + | Add (l, r) -> + of_list [ l; r ] + <+> (shrink_expr l >>= fun l -> return (AST.Add (l, r))) + <+> (shrink_expr r >>= fun r -> return (AST.Add (l, r))) +;; + +let arbitrary_expr = + (* let open QCheck.Iter in *) + QCheck.make AST.gen ~print:(Format.asprintf "%a" PP.pp) ~shrink:shrink_expr +;; + +let _ = + QCheck_runner.run_tests + [ QCheck.( + Test.make arbitrary_expr (fun l -> + match + Angstrom.parse_string + ~consume:Angstrom.Consume.All + Parser.expr + (Format.asprintf "%a" PP.pp l) + with + | Result.Ok after when after = l -> true + | Result.Ok after -> + Format.printf "before : %a\n%!" AST.pp l; + (* Format.printf " : `%a`\n%!" PP.pp l; *) + Format.printf "`%a`\n%!" AST.pp after; + false + | Result.Error _ -> + (* Format.printf "failed on : %a\n%!" Lam.pp l; *) + false)) + ] +;; diff --git a/OMLet/test_qc/test_qc.mli b/OMLet/test_qc/test_qc.mli new file mode 100644 index 00000000..a65c69d1 --- /dev/null +++ b/OMLet/test_qc/test_qc.mli @@ -0,0 +1,7 @@ +[@@@ocaml.text "/*"] + +(** Copyright 2023-2024, Kakadu and contributors *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +[@@@ocaml.text "/*"] diff --git a/OMLet/tests/compile.ml b/OMLet/tests/compile.ml new file mode 100644 index 00000000..2fbe8119 --- /dev/null +++ b/OMLet/tests/compile.ml @@ -0,0 +1,82 @@ +(** Copyright 2025, Ksenia Kotelnikova , Sofya Kozyreva *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) +open OMLet + +let () = + let source = + "let rec fac n =\n\ + \ if n <= 1\n\ + \ then 1\n\ + \ else let n1 = n-1 in\n\ + \ let m = fac n1 in\n\ + \ n*m\n\n\ + let main = fac 4" + in + match Parser.parse source with + | Error e -> Stdlib.Format.printf "Parsing error: %s\n%!" e + | Ok ast -> + Stdlib.Format.printf ".global _start\n"; + let asm = Codegen.codegen ast in + Stdlib.List.iter CodegenTypes.pp_instr asm +;; +(* let () = + let asm = + " +.global _start +.fac_0: + addi sp, sp, -64 + sd ra, 0(sp) + sd a0, 8(sp) + ld a0, 8(sp) + mv t0, a0 + li a0, 1 + mv t1, a0 + slt a0, t0, t1 + xori a0, a0, 1 + mv t0, a0 + xori t0, t0, 1 + beq x0, t0, .else_1 + li a0, 1 + j .join_2 +.else_1: + ld a0, 8(sp) + mv t1, a0 + li a0, 1 + mv t2, a0 + sub a0, t1, t2 + sd a0, 16(sp) + sd t0, 24(sp) + sd t1, 32(sp) + sd t0, 40(sp) + sd t1, 48(sp) + sd t2, 56(sp) + ld a0, 16(sp) + call .fac_0 + ld t2, 56(sp) + ld t1, 48(sp) + ld t0, 40(sp) + ld t1, 32(sp) + ld t0, 24(sp) + sd a0, 24(sp) + ld a0, 8(sp) + mv t1, a0 + ld a0, 24(sp) + mv t2, a0 + mul a0, t1, t2 +.join_2: + ld ra, 0(sp) + addi sp, sp, 64 + ret +_start: + addi sp, sp, -64 + li a0, 4 + call .fac_0 + sd a0, 64(sp) + addi sp, sp, 64 + li a7, 93 +ecall +" + in + print_string asm +;; *) diff --git a/OMLet/tests/dune b/OMLet/tests/dune new file mode 100644 index 00000000..076e23a6 --- /dev/null +++ b/OMLet/tests/dune @@ -0,0 +1,8 @@ +(executable + (name compile) + (modules compile) + (libraries OMLet)) + +(cram + (applies_to factorial) + (deps ./compile.exe)) diff --git a/OMLet/tests/factorial.t b/OMLet/tests/factorial.t new file mode 100644 index 00000000..698a311d --- /dev/null +++ b/OMLet/tests/factorial.t @@ -0,0 +1,7 @@ +Copyright 2025, Sofya Kozyreva, Maksim Shipilov +SPDX-License-Identifier: LGPL-3.0-or-later + + $ ./compile.exe | riscv64-linux-gnu-as -march=rv64gc -o temp.o - + $ riscv64-linux-gnu-ld temp.o -o factorial.exe + $ qemu-riscv64 -L /usr/riscv64-linux-gnu factorial.exe + [24] diff --git a/PudgeWithMoML/.gitignore b/PudgeWithMoML/.gitignore new file mode 100644 index 00000000..0cbd4201 --- /dev/null +++ b/PudgeWithMoML/.gitignore @@ -0,0 +1,5 @@ +/_build +/_coverage +a.exe +a.s +temp.o diff --git a/PudgeWithMoML/.ocamlformat b/PudgeWithMoML/.ocamlformat new file mode 100644 index 00000000..25919d0e --- /dev/null +++ b/PudgeWithMoML/.ocamlformat @@ -0,0 +1,3 @@ +version=0.27.0 +profile=janestreet + diff --git a/PudgeWithMoML/Makefile b/PudgeWithMoML/Makefile new file mode 100644 index 00000000..c0615678 --- /dev/null +++ b/PudgeWithMoML/Makefile @@ -0,0 +1,20 @@ +.PHONY: all test +all: + dune build + +test: + dune test + +TEST_COV_D = /tmp/cov +COVERAGE_OPTS = --coverage-path $(TEST_COV_D) --expect bin/ + +.PHONY: test_coverage coverage +test_coverage: coverage +coverage: + $(RM) -r $(TEST_COV_D) + mkdir -p $(TEST_COV_D) + BISECT_FILE=$(TEST_COV_D)/language dune runtest --no-print-directory \ + --instrument-with bisect_ppx --force + bisect-ppx-report html $(COVERAGE_OPTS) + bisect-ppx-report summary $(COVERAGE_OPTS) + @echo "Use 'xdg-open _coverage/index.html' to see coverage report" diff --git a/PudgeWithMoML/PudgeWithMoML.opam b/PudgeWithMoML/PudgeWithMoML.opam new file mode 100644 index 00000000..236250e2 --- /dev/null +++ b/PudgeWithMoML/PudgeWithMoML.opam @@ -0,0 +1,41 @@ +# This file is generated by dune, edit dune-project instead +opam-version: "2.0" +synopsis: "MiniML compiler" +maintainer: ["Gleb Nasretdinov" "Ilhom Kombaev"] +authors: ["Gleb Nasretdinov" "Ilhom Kombaev"] +license: "LGPL-3.0-or-later WITH OCaml-LGPL-linking-exception" +homepage: "https://github.com/Ycyken/PudgeWithMoML" +bug-reports: "https://github.com/Ycyken/PudgeWithMoML/issues" +depends: [ + "ocaml" + "dune" {>= "3.8" & = "3.19.1"} + "angstrom" {= "0.16.0"} + "qcheck" + "bisect_ppx" + "llvm" {= "18-shared"} + "qcheck" {with-tests} + "ppx_deriving_qcheck" {= "0.6"} + "odoc" {with-doc} +] +build: [ + ["dune" "subst"] {dev} + [ + "dune" + "build" + "-p" + name + "-j" + jobs + "@install" + "@runtest" {with-test} + "@doc" {with-doc} + ] +] +dev-repo: "git+https://github.com/Ycyken/PudgeWithMoML.git" +depexts: [ + [ "llvm-18-dev" "clang-18" "gcc-riscv64-linux-gnu" "g++-riscv64-linux-gnu" "qemu-user"] {os-distribution = "ubuntu"} +] +pin-depends: [ + ["ppx_deriving_qcheck.0.6" "git+https://github.com/c-cube/qcheck.git#42429bf06ba12373cad02b1404f50d0ad6238af5"] +] +# Don't edit '*.opam' file manually. Use 'dune b @install' diff --git a/PudgeWithMoML/PudgeWithMoML.opam.template b/PudgeWithMoML/PudgeWithMoML.opam.template new file mode 100644 index 00000000..f4e537bf --- /dev/null +++ b/PudgeWithMoML/PudgeWithMoML.opam.template @@ -0,0 +1,7 @@ +depexts: [ + [ "llvm-18-dev" "clang-18" "gcc-riscv64-linux-gnu" "g++-riscv64-linux-gnu" "qemu-user"] {os-distribution = "ubuntu"} +] +pin-depends: [ + ["ppx_deriving_qcheck.0.6" "git+https://github.com/c-cube/qcheck.git#42429bf06ba12373cad02b1404f50d0ad6238af5"] +] +# Don't edit '*.opam' file manually. Use 'dune b @install' diff --git a/PudgeWithMoML/bin/compiler.ml b/PudgeWithMoML/bin/compiler.ml new file mode 100644 index 00000000..d807814d --- /dev/null +++ b/PudgeWithMoML/bin/compiler.ml @@ -0,0 +1,40 @@ +(** Copyright 2025-2026, Gleb Nasretdinov, Ilhom Kombaev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open PudgeWithMoML.Frontend.Parser +open PudgeWithMoML.Riscv.Codegen +open Stdio + +type opts = + { mutable input_file : string + ; mutable output_file : string + } + +let compiler input_file output_file = + let input = In_channel.read_all input_file |> String.trim in + let program = parse input in + match program with + | Error e -> eprintf "Parsing error: %s\n" e + | Ok program -> + let oc = Out_channel.create output_file in + let fmt = Format.formatter_of_out_channel oc in + gen_program program fmt +;; + +let () = + let opts = { input_file = ""; output_file = "a.s" } in + let open Stdlib.Arg in + let speclist = + [ "-fromfile", String (fun filename -> opts.input_file <- filename), "Input file name" + ; "-o", String (fun filename -> opts.output_file <- filename), "Output file name" + ] + in + let anon_func _ = + Stdlib.Format.eprintf "Positioned arguments are not supported\n"; + Stdlib.exit 1 + in + let usage_msg = "Mini-ml to riscv compiler" in + let () = parse speclist anon_func usage_msg in + compiler opts.input_file opts.output_file +;; diff --git a/PudgeWithMoML/bin/dune b/PudgeWithMoML/bin/dune new file mode 100644 index 00000000..f35c3f2d --- /dev/null +++ b/PudgeWithMoML/bin/dune @@ -0,0 +1,10 @@ +(executable + (public_name compiler) + (name compiler) + (modules compiler) + (libraries PudgeWithMoML stdio) + (instrumentation + (backend bisect_ppx))) + +(cram + (deps ./compiler.exe fact)) diff --git a/PudgeWithMoML/bin/fact b/PudgeWithMoML/bin/fact new file mode 100644 index 00000000..22934529 --- /dev/null +++ b/PudgeWithMoML/bin/fact @@ -0,0 +1,8 @@ +let rec fac n = + if n <= 1 + then 1 + else let n1 = n-1 in + let m = fac n1 in + n*m + +let main = fac 4 diff --git a/PudgeWithMoML/bin/run.t b/PudgeWithMoML/bin/run.t new file mode 100644 index 00000000..06a4c596 --- /dev/null +++ b/PudgeWithMoML/bin/run.t @@ -0,0 +1,40 @@ + $ ./compiler.exe -fromfile fact + $ riscv64-linux-gnu-as -march=rv64gc a.s -o temp.o + $ riscv64-linux-gnu-ld temp.o -o a.exe + $ cat a.s + .text + .globl _start + fac: + addi sp, sp, -32 + sd ra, 0(sp) + sd a0, 8(sp) + ld t0, 8(sp) + li t1, 1 + slt t0, t1, t0 + xori t0, t0, 1 + beq t0, zero, L0 + li a0, 1 + j L1 + L0: + ld t0, 8(sp) + li t1, 1 + sub t0, t0, t1 + sd t0, 16(sp) + ld a0, 16(sp) + call fac + mv t0, a0 + sd t0, 24(sp) + ld t0, 8(sp) + ld t1, 24(sp) + mul a0, t0, t1 + L1: + ld ra, 0(sp) + addi sp, sp, 32 + ret + _start: + li a0, 4 + call fac + li a7, 94 + ecall + $ qemu-riscv64 -L /usr/riscv64-linux-gnu -cpu rv64 ./a.exe + [24] diff --git a/PudgeWithMoML/dune-project b/PudgeWithMoML/dune-project new file mode 100644 index 00000000..2a5e4c11 --- /dev/null +++ b/PudgeWithMoML/dune-project @@ -0,0 +1,33 @@ +(lang dune 3.8) + +(name PudgeWithMoML) + +(generate_opam_files true) + +(source + (github Ycyken/PudgeWithMoML)) + +(authors "Gleb Nasretdinov" "Ilhom Kombaev") + +(maintainers "Gleb Nasretdinov" "Ilhom Kombaev") + +(license "LGPL-3.0-or-later WITH OCaml-LGPL-linking-exception") + +(package + (name PudgeWithMoML) + (synopsis "MiniML compiler") + (depends + ocaml + (dune + (= "3.19.1")) + (angstrom + (= "0.16.0")) + qcheck + bisect_ppx + (llvm + (= "18-shared")) + (qcheck :with-tests) + (ppx_deriving_qcheck + (= "0.6")))) + +; See the complete stanza docs at https://dune.readthedocs.io/en/stable/dune-files.html#dune-project diff --git a/PudgeWithMoML/lib/dune b/PudgeWithMoML/lib/dune new file mode 100644 index 00000000..6ae8deb8 --- /dev/null +++ b/PudgeWithMoML/lib/dune @@ -0,0 +1,11 @@ +(library + (name PudgeWithMoML) + (public_name PudgeWithMoML) + (modules :standard) + (libraries base angstrom) + (preprocess + (pps ppx_deriving.show ppx_deriving.eq)) + (instrumentation + (backend bisect_ppx))) + +(include_subdirs qualified) diff --git a/PudgeWithMoML/lib/frontend/ast.ml b/PudgeWithMoML/lib/frontend/ast.ml new file mode 100644 index 00000000..a114a133 --- /dev/null +++ b/PudgeWithMoML/lib/frontend/ast.ml @@ -0,0 +1,73 @@ +(** Copyright 2025-2026, Gleb Nasretdinov, Ilhom Kombaev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open TypedTree + +type ident = string [@@deriving show { with_path = false }] + +type literal = + | Int_lt of int + | Bool_lt of bool + | Unit_lt +[@@deriving show { with_path = false }] + +type pattern = + | Wild (** [_] *) + | PList of pattern list (**[ [], [1;2;3] ] *) + | PCons of pattern * pattern (**[ hd :: tl ] *) + | PTuple of pattern * pattern * pattern list (** | [(a, b)] -> *) + | PConst of literal (** | [4] -> *) + | PVar of ident (** | [x] -> *) + | POption of pattern option + | PConstraint of pattern * typ +[@@deriving show { with_path = false }] + +type is_recursive = + | Nonrec + | Rec +[@@deriving show { with_path = false }] + +type expr = + | Const of literal + | Tuple of expr * expr * expr list + | List of expr list + | Variable of ident + | If_then_else of expr * expr * expr option + | Lambda of pattern * expr + | Apply of expr * expr + | Function of case * case list (** [function | p1 -> e1 | p2 -> e2 | ... |]*) + | Match of expr * case * case list (** [match x with | p1 -> e1 | p2 -> e2 | ...] *) + | Option of expr option + | EConstraint of expr * typ + | LetIn of is_recursive * binding list * expr +[@@deriving show { with_path = false }] + +and binding = pattern * expr [@@deriving show { with_path = false }] +and case = pattern * expr [@@deriving show { with_path = false }] + +type structure_item = is_recursive * binding list [@@deriving show { with_path = false }] +type program = structure_item list [@@deriving show { with_path = false }] + +let eapp func args = + Base.List.fold_left args ~init:func ~f:(fun acc arg -> Apply (acc, arg)) +;; + +let elambda func args = + Base.List.fold_right args ~init:func ~f:(fun arg acc -> Lambda (arg, acc)) +;; + +let eeq a b = eapp (Variable "=") [ a; b ] +let eneq a b = eapp (Variable "<>") [ a; b ] +let elt a b = eapp (Variable "<") [ a; b ] +let elte a b = eapp (Variable "<=") [ a; b ] +let egt a b = eapp (Variable ">") [ a; b ] +let egte a b = eapp (Variable ">=") [ a; b ] +let eadd a b = eapp (Variable "+") [ a; b ] +let esub a b = eapp (Variable "-") [ a; b ] +let emul a b = eapp (Variable "*") [ a; b ] +let ediv a b = eapp (Variable "/") [ a; b ] +let eland a b = eapp (Variable "&&") [ a; b ] +let elor a b = eapp (Variable "||") [ a; b ] +let econs a b = eapp (Variable "::") [ a; b ] +let euminus a = eapp (Variable "-") [ a ] diff --git a/PudgeWithMoML/lib/frontend/parser.ml b/PudgeWithMoML/lib/frontend/parser.ml new file mode 100644 index 00000000..45da1e5c --- /dev/null +++ b/PudgeWithMoML/lib/frontend/parser.ml @@ -0,0 +1,369 @@ +(** Copyright 2025-2026, Gleb Nasretdinov, Ilhom Kombaev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open Angstrom +open Ast +open Base +open TypedTree + +(* TECHNICAL FUNCTIONS *) + +let is_ws = function + | ' ' -> true + | '\n' -> true + | '\t' -> true + | '\r' -> true + | _ -> false +;; + +let skip_ws = skip_while is_ws + +let peek_sep1 = + peek_char + >>= function + | None -> return None + | Some c -> + (match c with + | '(' | ')' | '[' | ']' | ';' | ':' | ',' -> return (Some c) + | _ -> if is_ws c then return (Some c) else fail "need a delimiter") +;; + +let skip_ws_sep1 = peek_sep1 *> skip_ws + +let chainl1 e op = + let rec go acc = lift2 (fun f x -> f acc x) op e >>= go <|> return acc in + e >>= go +;; + +let rec chainr1 e op = + let* left = e in + (let* f = op in + let* right = chainr1 e op in + return (f left right)) + <|> return left +;; + +let rec unary_chain op e = + op >>= (fun unexpr -> unary_chain op e >>= fun expr -> return (unexpr expr)) <|> e +;; + +let is_keyword = function + | "if" + | "then" + | "else" + | "let" + | "in" + | "not" + | "true" + | "false" + | "fun" + | "match" + | "with" + | "and" + | "Some" + | "None" + | "function" + | "->" + | "|" + | ":" + | "::" + | "_" -> true + | _ -> false +;; + +(* SIMPLE PARSERS *) +let expr_const_factory parser = parser >>| fun lit -> Const lit +let pat_const_factory parser = parser >>| fun lit -> PConst lit + +let p_int = + skip_ws + *> let* sign = string "+" <|> string "-" <|> string "" in + let* number = take_while1 Char.is_digit in + return (Int_lt (Int.of_string (sign ^ number))) +;; + +let p_int_expr = expr_const_factory p_int +let p_int_pat = pat_const_factory p_int + +let p_bool = + skip_ws *> string "true" + <|> skip_ws *> string "false" + >>| fun s -> Bool_lt (Bool.of_string s) +;; + +let p_bool_expr = expr_const_factory p_bool +let p_bool_pat = pat_const_factory p_bool +let p_unit = skip_ws *> string "(" *> skip_ws *> string ")" *> return Unit_lt +let p_unit_expr = expr_const_factory p_unit +let p_unit_pat = pat_const_factory p_unit + +let p_oper = + let* oper = + skip_ws + *> take_while1 (function + | '+' + | '-' + | '<' + | '>' + | '*' + | '|' + | '!' + | '$' + | '%' + | '&' + | '.' + | '/' + | ':' + | '=' + | '?' + | '@' + | '^' + | '~' -> true + | _ -> false) + in + if is_keyword oper + then fail "keywords are not allowed as variable names" + else return (PVar oper) +;; + +let p_ident = + let p_fst_letter = + take_while1 (function + | 'a' .. 'z' | '_' -> true + | _ -> false) + in + let* name = + skip_ws + *> lift2 + ( ^ ) + p_fst_letter + (take_while (function + | 'a' .. 'z' | 'A' .. 'Z' | '_' | '0' .. '9' -> true + | _ -> false)) + in + if is_keyword name + then fail "keywords are not allowed as variable names" + else return name +;; + +let p_type = skip_ws *> char ':' *> skip_ws *> p_ident >>| fun s -> Primitive s +let p_var_expr = p_ident >>| fun ident -> Variable ident +let p_var_pat = p_ident >>| fun ident -> PVar ident + +let p_semicolon_list p_elem = + skip_ws + *> string "[" + *> skip_ws + *> let+ list = + fix (fun p_semi_list -> + choice + [ (let* hd = p_elem <* skip_ws <* string ";" in + let* tl = p_semi_list in + return (hd :: tl)) + ; (let* hd = p_elem <* skip_ws <* string "]" in + return [ hd ]) + ; skip_ws *> string "]" *> return [] + ]) + in + list +;; + +let p_semicolon_list_expr p_expr = p_semicolon_list p_expr >>| fun l -> List l +let p_semicolon_list_pat p_pat = p_semicolon_list p_pat >>| fun l -> PList l + +let p_cons_list_pat p_pat = + chainr1 p_pat (skip_ws *> string "::" *> return (fun l r -> PCons (l, r))) +;; + +(* EXPR PARSERS *) +let p_parens p = skip_ws *> char '(' *> skip_ws *> p <* skip_ws <* char ')' +let uminus = skip_ws *> string "-" *> return euminus + +let p_tuple make p = + let tuple = + let* fst = p <* skip_ws <* string "," in + let* snd = p in + let* rest = many (skip_ws *> string "," *> p) in + return (make fst snd rest) + in + p_parens tuple <|> tuple +;; + +let make_tuple_expr e1 e2 rest = Tuple (e1, e2, rest) +let make_tuple_pat p1 p2 rest = PTuple (p1, p2, rest) +let p_tuple_pat p_pat = p_tuple make_tuple_pat p_pat + +let p_if p_expr = + lift3 + (fun cond th el -> If_then_else (cond, th, el)) + (skip_ws *> string "if" *> peek_sep1 *> p_expr) + (skip_ws *> string "then" *> peek_sep1 *> p_expr) + (skip_ws + *> string "else" + *> peek_sep1 + *> (p_expr <* peek_sep1 >>= fun e -> return (Some e)) + <|> return None) +;; + +let p_option p make_option = + skip_ws *> string "None" *> peek_sep1 *> return (make_option None) + <|> let+ inner = skip_ws *> string "Some" *> peek_sep1 *> p in + make_option (Some inner) +;; + +let make_option_expr expr = Option expr +let make_option_pat pat = POption pat +let p_wild_pat = skip_ws *> string "_" *> return Wild +let p_pat_const = choice [ p_int_pat; p_bool_pat; p_unit_pat; p_var_pat; p_wild_pat ] + +let p_constraint_pat p_pat = + let* pat = p_pat in + let* typ = p_type in + return (PConstraint (pat, typ)) +;; + +let p_pat = + skip_ws + *> fix (fun self -> + let atom = + choice + [ p_pat_const; p_parens p_oper; p_parens self; p_parens (p_constraint_pat self) ] + in + let semicolon_list = p_semicolon_list_pat (self <|> atom) <|> atom in + let opt = p_option semicolon_list make_option_pat <|> semicolon_list in + let cons = p_cons_list_pat opt in + let tuple = p_tuple_pat cons <|> cons in + tuple) +;; + +let p_binding p_expr : binding t = + let* name = p_pat in + let* args = many p_pat in + let* body = skip_ws *> string "=" *> p_expr in + return (name, elambda body args) +;; + +let p_letin p_expr = + skip_ws + *> string "let" + *> skip_ws_sep1 + *> + let* rec_flag = string "rec" *> peek_sep1 *> return Rec <|> return Nonrec in + let* bind1 = p_binding p_expr in + let* binds_rest = many (skip_ws *> string "and" *> peek_sep1 *> p_binding p_expr) in + let* inner_expr = skip_ws *> string "in" *> peek_sep1 *> p_expr in + return (LetIn (rec_flag, bind1 :: binds_rest, inner_expr)) +;; + +let p_apply p_expr = + chainl1 + (p_parens p_expr <|> (p_expr <* peek_sep1)) + (return (fun expr1 expr2 -> Apply (expr1, expr2))) +;; + +let p_lambda p_expr = + skip_ws + *> string "fun" + *> peek_sep1 + *> + let* arg1 = p_pat in + let* args = many p_pat <* skip_ws <* string "->" in + let* body = p_expr in + return (elambda body (arg1 :: args)) +;; + +let p_case p_expr = + let* pat = skip_ws *> string "|" *> p_pat <* skip_ws <* string "->" in + let* expr = p_expr in + return (pat, expr) +;; + +let p_first_case p_expr = + let* pat = skip_ws *> (string "|" *> p_pat <|> p_pat) <* skip_ws <* string "->" in + let* expr = p_expr in + return (pat, expr) +;; + +let p_match p_expr = + let* value = skip_ws *> string "match" *> p_expr <* skip_ws <* string "with" in + let* pat1, expr1 = p_first_case p_expr in + let* cases = many (p_case p_expr) in + return (Match (value, (pat1, expr1), cases)) +;; + +let p_function p_expr = + skip_ws + *> string "function" + *> + let* pat1, expr1 = p_first_case p_expr in + let* cases = many (p_case p_expr) in + return (Function ((pat1, expr1), cases)) +;; + +let p_constraint_expr p_expr = + let* expr = p_expr in + let* typ = p_type in + return (EConstraint (expr, typ)) +;; + +let infix_op (op, func) = skip_ws *> string op *> return func + +let infix_precedence_list = + [ [ "*", emul; "/", ediv ] + ; [ "+", eadd; "-", esub ] + ; [ "::", econs ] + ; [ "=", eeq; ">=", egte; ">", egt; "<=", elte; "<", elt; "<>", eneq ] + ; [ "&&", eland ] + ; [ "||", elor ] + ] + |> List.map ~f:(List.map ~f:infix_op) +;; + +let p_infix_expr p_expr = + fix (fun self -> + let atom = p_expr <|> p_parens self in + List.fold_left infix_precedence_list ~init:atom ~f:(fun acc ops -> + chainl1 acc (choice ops))) +;; + +let p_expr = + skip_ws + *> fix (fun self -> + let atom = + choice + [ p_var_expr + ; p_int_expr + ; p_unit_expr + ; p_bool_expr + ; p_parens self + ; p_semicolon_list_expr self + ; p_parens (p_constraint_expr self) + ] + in + let if_expr = p_if (self <|> atom) <|> atom in + let letin_expr = p_letin (self <|> if_expr) <|> if_expr in + let option = p_option letin_expr make_option_expr <|> letin_expr in + let apply = p_apply option <|> option in + let unary = unary_chain uminus apply in + let infix = p_infix_expr unary in + let tuple = p_tuple make_tuple_expr infix <|> infix in + let p_function = p_function (self <|> tuple) <|> tuple in + let ematch = p_match (self <|> p_function) <|> p_function in + let efun = p_lambda (self <|> ematch) <|> ematch in + efun) +;; + +let str_item : structure_item t = + skip_ws + *> string "let" + *> skip_ws_sep1 + *> + let* rec_flag = string "rec" *> peek_sep1 *> return Rec <|> return Nonrec in + let* bind1 = p_binding p_expr in + let* binds_rest = many (skip_ws *> string "and" *> peek_sep1 *> p_binding p_expr) in + return (rec_flag, bind1 :: binds_rest) +;; + +let program : program t = many1 str_item <* skip_ws +let parse (str : string) = parse_string ~consume:All program str diff --git a/PudgeWithMoML/lib/frontend/parser.mli b/PudgeWithMoML/lib/frontend/parser.mli new file mode 100644 index 00000000..6dee7f9a --- /dev/null +++ b/PudgeWithMoML/lib/frontend/parser.mli @@ -0,0 +1,7 @@ +(** Copyright 2025-2026, Gleb Nasretdinov, Ilhom Kombaev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open Ast + +val parse : string -> (program, string) result diff --git a/PudgeWithMoML/lib/frontend/typedTree.ml b/PudgeWithMoML/lib/frontend/typedTree.ml new file mode 100644 index 00000000..6ebd658e --- /dev/null +++ b/PudgeWithMoML/lib/frontend/typedTree.ml @@ -0,0 +1,37 @@ +(** Copyright 2025-2026, Gleb Nasretdinov, Ilhom Kombaev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +type binder = int [@@deriving show { with_path = false }] + +type typ = + | Primitive of string + | Type_var of binder + | Arrow of typ * typ + | Type_list of typ + | Type_tuple of typ * typ * typ list + | TOption of typ +[@@deriving show { with_path = false }] + +let arrow_of_types first_types last_type = + let open Base in + List.fold_right first_types ~init:last_type ~f:(fun left right -> Arrow (left, right)) +;; + +module VarSet = struct + include Stdlib.Set.Make (Int) + + let pp fmt s = + Format.fprintf fmt "[ "; + iter (Format.fprintf fmt "%d; ") s; + Format.fprintf fmt "]" + ;; +end + +type binder_set = VarSet.t [@@deriving show { with_path = false }] +type scheme = Scheme of binder_set * typ + +let int_typ = Primitive "int" +let bool_typ = Primitive "bool" +let string_typ = Primitive "string" +let unit_typ = Primitive "unit" diff --git a/PudgeWithMoML/lib/riscv/codegen.ml b/PudgeWithMoML/lib/riscv/codegen.ml new file mode 100644 index 00000000..da5a1941 --- /dev/null +++ b/PudgeWithMoML/lib/riscv/codegen.ml @@ -0,0 +1,158 @@ +(** Copyright 2025-2026, Gleb Nasretdinov, Ilhom Kombaev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open Machine + +type location = + | Reg of reg + | Stack of int +[@@deriving eq] + +let word_size = 8 + +module M = struct + open Base + + type env = (string, location, String.comparator_witness) Map.t + + type state = + { env : env + ; stack_offset : int + ; fresh : int + } + + type 'a t = state -> 'a * state + + let return x st = x, st + + let bind m f st = + let x, st' = m st in + f x st' + ;; + + let ( let* ) = bind + + let run m = + let init = { env = Map.empty (module String); stack_offset = 0; fresh = 0 } in + m init + ;; + + let fresh : string t = + fun st -> "L" ^ Int.to_string st.fresh, { st with fresh = st.fresh + 1 } + ;; + + let get_env : env t = fun st -> st.env, st + let put_env env : unit t = fun st -> (), { st with env } + let modify_env f : unit t = fun st -> (), { st with env = f st.env } + + let alloc_stack_slot : int t = + fun st -> + let off = st.stack_offset + word_size in + off, { st with stack_offset = off } + ;; + + let get_stack_offset : int t = fun st -> st.stack_offset, st + let set_stack_offset (off : int) : unit t = fun st -> (), { st with stack_offset = off } + + let add_binding name loc : unit t = + modify_env (fun env -> Map.set env ~key:name ~data:loc) + ;; + + let put_on_stack name offset : unit t = add_binding name (Stack offset) + let lookup name : location option t = fun st -> Map.find st.env name, st +end + +open Frontend.Ast +open M + +let imm_of_literal = function + | Int_lt n -> n + | Bool_lt true -> 1 + | Bool_lt false -> 0 + | Unit_lt -> 1 +;; + +let rec gen_expr dst : expr -> instr list M.t = function + | Const lt -> + let imm = imm_of_literal lt in + M.return [ li dst imm ] + | Variable x -> + let* loc = M.lookup x in + (match loc with + | Some (Reg r) when r = dst -> M.return [] + | Some (Reg r) -> M.return [ mv dst r ] + | Some (Stack off) -> M.return [ ld dst off Sp ] + | _ -> failwith ("unbound variable: " ^ x)) + | If_then_else (c, th, Some el) -> + let* cond_code = gen_expr (T 0) c in + let* then_code = gen_expr dst th in + let* else_code = gen_expr dst el in + let* l_else = M.fresh in + let* l_end = M.fresh in + M.return + (cond_code + @ [ beq (T 0) Zero l_else ] + @ then_code + @ [ j l_end; label l_else ] + @ else_code + @ [ label l_end ]) + | Apply (Apply (Variable op, e1), e2) when List.mem op [ "<="; "+"; "-"; "*" ] -> + let* c1 = gen_expr (T 0) e1 in + let* c2 = gen_expr (T 1) e2 in + (match op with + | "<=" -> M.return (c1 @ c2 @ [ slt dst (T 1) (T 0); xori dst dst 1 ]) + | "+" -> M.return (c1 @ c2 @ [ add dst (T 0) (T 1) ]) + | "-" -> M.return (c1 @ c2 @ [ sub dst (T 0) (T 1) ]) + | "*" -> M.return (c1 @ c2 @ [ mul dst (T 0) (T 1) ]) + | _ -> failwith ("unsupported infix operator: " ^ op)) + | Apply (Variable f, arg) -> + let* arg_code = gen_expr (A 0) arg in + let instrs = arg_code @ [ Call f ] @ if dst = A 0 then [] else [ Mv (dst, A 0) ] in + M.return instrs + | LetIn (Nonrec, [ (PVar x, expr) ], inner_expr) -> + let* code1 = gen_expr (T 0) expr in + let* off = alloc_stack_slot in + let* () = M.add_binding x (Stack off) in + let* code2 = gen_expr dst inner_expr in + M.return (code1 @ [ sd (T 0) off Sp ] @ code2) + | _ -> failwith "gen_expr: not implemented" +;; + +let gen_structure_item : structure_item -> instr list M.t = function + | Rec, [ (PVar f, Lambda (PVar x, body)) ] -> + let* saved_off = M.get_stack_offset in + let* () = M.set_stack_offset 0 in + let* x_off = alloc_stack_slot in + let* () = M.add_binding x (Stack x_off) in + let* body_code = gen_expr (A 0) body in + let* locals = M.get_stack_offset in + let frame = locals + word_size in + (* for ra *) + let* () = M.set_stack_offset saved_off in + let prologue = [ addi Sp Sp (-frame); sd Ra 0 Sp; sd (A 0) x_off Sp ] in + let epilogue = [ ld Ra 0 Sp; addi Sp Sp frame; ret ] in + M.return ([ label f ] @ prologue @ body_code @ epilogue) + | Nonrec, [ (PVar "main", e) ] -> + let* body_code = gen_expr (A 0) e in + M.return ([ label "_start" ] @ body_code @ [ li (A 7) 94; ecall ]) + | _ -> failwith "unsupported structure item" +;; + +let rec gather : program -> instr list M.t = function + | [] -> M.return [] + | item :: rest -> + let* code1 = gen_structure_item item in + let* code2 = gather rest in + M.return (code1 @ code2) +;; + +let gen_program (pr : program) fmt = + let open Format in + fprintf fmt ".text\n"; + fprintf fmt ".globl _start\n"; + let code, _ = M.run (gather pr) in + Base.List.iter code ~f:(function + | Label l -> fprintf fmt "%s:\n" l + | i -> fprintf fmt " %a\n" pp_instr i) +;; diff --git a/PudgeWithMoML/lib/riscv/machine.ml b/PudgeWithMoML/lib/riscv/machine.ml new file mode 100644 index 00000000..a96f1861 --- /dev/null +++ b/PudgeWithMoML/lib/riscv/machine.ml @@ -0,0 +1,92 @@ +(** Copyright 2025-2026, Gleb Nasretdinov, Ilhom Kombaev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +type reg = + | Zero + | Ra + | Sp + | A of int + | S of int + | T of int +[@@deriving eq] + +let pp_reg fmt = + let open Format in + function + | Zero -> fprintf fmt "zero" + | Ra -> fprintf fmt "ra" + | Sp -> fprintf fmt "sp" + | A n when n >= 0 && n <= 7 -> fprintf fmt "a%d" n + | S n when n >= 1 && n <= 11 -> fprintf fmt "s%d" n + | S n when n = 0 -> fprintf fmt "fp" + | T n when n >= 0 && n <= 6 -> fprintf fmt "t%d" n + | _ -> failwith "invalid register" +;; + +type offset = int + +type instr = + | Addi of reg * reg * int + | Add of reg * reg * reg + | Sub of reg * reg * reg + | Mul of reg * reg * reg + | Li of reg * int + | Ld of reg * offset * reg + | Slt of reg * reg * reg + | Seqz of reg * reg + | Snez of reg * reg + | Mv of reg * reg + | Sd of reg * offset * reg + | Xori of reg * reg * int + | Beq of reg * reg * string + | Ble of reg * reg * string + | J of string + | Ecall + | Call of string + | Ret + | Label of string + +let pp_instr fmt = + let open Format in + function + | Addi (rd, rs, n) -> fprintf fmt "addi %a, %a, %d" pp_reg rd pp_reg rs n + | Add (rd, rs1, rs2) -> fprintf fmt "add %a, %a, %a" pp_reg rd pp_reg rs1 pp_reg rs2 + | Sub (rd, rs1, rs2) -> fprintf fmt "sub %a, %a, %a" pp_reg rd pp_reg rs1 pp_reg rs2 + | Mul (rd, rs1, rs2) -> fprintf fmt "mul %a, %a, %a" pp_reg rd pp_reg rs1 pp_reg rs2 + | Li (rd, n) -> fprintf fmt "li %a, %d" pp_reg rd n + | Ld (rd, offset, rs) -> fprintf fmt "ld %a, %d(%a)" pp_reg rd offset pp_reg rs + | Slt (rd, rs1, rs2) -> fprintf fmt "slt %a, %a, %a" pp_reg rd pp_reg rs1 pp_reg rs2 + | Seqz (rd, rs) -> fprintf fmt "seqz %a, %a" pp_reg rd pp_reg rs + | Snez (rd, rs) -> fprintf fmt "snez %a, %a" pp_reg rd pp_reg rs + | Mv (rd, rs) -> fprintf fmt "mv %a, %a" pp_reg rd pp_reg rs + | Sd (rs1, offset, rs2) -> fprintf fmt "sd %a, %d(%a)" pp_reg rs1 offset pp_reg rs2 + | Xori (rd, rs, n) -> fprintf fmt "xori %a, %a, %d" pp_reg rd pp_reg rs n + | Beq (rs1, rs2, label) -> fprintf fmt "beq %a, %a, %s" pp_reg rs1 pp_reg rs2 label + | Ble (rs1, rs2, label) -> fprintf fmt "ble %a, %a, %s" pp_reg rs1 pp_reg rs2 label + | J label -> fprintf fmt "j %s" label + | Ecall -> fprintf fmt "ecall" + | Call symbol -> fprintf fmt "call %s" symbol + | Ret -> fprintf fmt "ret" + | Label label -> fprintf fmt "%s:" label +;; + +let addi r1 r2 n = Addi (r1, r2, n) +let add r1 r2 r3 = Add (r1, r2, r3) +let sub r1 r2 r3 = Sub (r1, r2, r3) +let mul r1 r2 r3 = Mul (r1, r2, r3) +let li r n = Li (r, n) +let ld r off base = Ld (r, off, base) +let slt r1 r2 r3 = Slt (r1, r2, r3) +let seqz r1 r2 = Seqz (r1, r2) +let snez r1 r2 = Snez (r1, r2) +let mv r1 r2 = Mv (r1, r2) +let sd r1 off base = Sd (r1, off, base) +let xori r1 r2 n = Xori (r1, r2, n) +let beq r1 r2 label = Beq (r1, r2, label) +let ble r1 r2 label = Ble (r1, r2, label) +let j label = J label +let ecall = Ecall +let call symbol = Call symbol +let ret = Ret +let label l = Label l diff --git a/PudgeWithMoML/many_tests/.ocamlformat b/PudgeWithMoML/many_tests/.ocamlformat new file mode 100644 index 00000000..e3346c16 --- /dev/null +++ b/PudgeWithMoML/many_tests/.ocamlformat @@ -0,0 +1 @@ +disable=true diff --git a/PudgeWithMoML/test/dune b/PudgeWithMoML/test/dune new file mode 100644 index 00000000..a205df25 --- /dev/null +++ b/PudgeWithMoML/test/dune @@ -0,0 +1,7 @@ +(library + (name tests) + (inline_tests) + (libraries PudgeWithMoML) + (modules :standard) + (preprocess + (pps ppx_expect))) diff --git a/PudgeWithMoML/test/parser.ml b/PudgeWithMoML/test/parser.ml new file mode 100644 index 00000000..cb9d8ad7 --- /dev/null +++ b/PudgeWithMoML/test/parser.ml @@ -0,0 +1,51 @@ +(** Copyright 2025-2026, Gleb Nasretdinov, Ilhom Kombaev *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +open PudgeWithMoML.Frontend.Parser +open PudgeWithMoML.Frontend.Ast + +let%expect_test "fac" = + let input = + {| let rec fac n = + if n <= 1 + then 1 + else let n1 = n-1 in + let m = fac n1 in + n*m + +let main = fac 4 |} + in + let result = parse input in + let () = + match result with + | Error e -> print_endline e + | Ok s -> pp_program Format.std_formatter s + in + [%expect + {| + [(Rec, + [((PVar "fac"), + (Lambda ((PVar "n"), + (If_then_else ( + (Apply ((Apply ((Variable "<="), (Variable "n"))), + (Const (Int_lt 1)))), + (Const (Int_lt 1)), + (Some (LetIn (Nonrec, + [((PVar "n1"), + (Apply ((Apply ((Variable "-"), (Variable "n"))), + (Const (Int_lt 1))))) + ], + (LetIn (Nonrec, + [((PVar "m"), + (Apply ((Variable "fac"), (Variable "n1"))))], + (Apply ((Apply ((Variable "*"), (Variable "n"))), + (Variable "m"))) + )) + ))) + )) + ))) + ]); + (Nonrec, [((PVar "main"), (Apply ((Variable "fac"), (Const (Int_lt 4)))))]) + ] |}] +;; diff --git a/PudgeWithMoML/test/parser.mli b/PudgeWithMoML/test/parser.mli new file mode 100644 index 00000000..e69de29b diff --git a/akaML/.gitignore b/akaML/.gitignore new file mode 100644 index 00000000..d8001c82 --- /dev/null +++ b/akaML/.gitignore @@ -0,0 +1,4 @@ +/_build +/_coverage +.vscode/ +*.DS_Store diff --git a/akaML/.ocamlformat b/akaML/.ocamlformat new file mode 100644 index 00000000..04d5660b --- /dev/null +++ b/akaML/.ocamlformat @@ -0,0 +1,2 @@ +version=0.27.0 +profile=janestreet diff --git a/akaML/.zanuda b/akaML/.zanuda new file mode 100644 index 00000000..ea61ca87 --- /dev/null +++ b/akaML/.zanuda @@ -0,0 +1 @@ +forward mutability_check ignore akaML.ml,bin/akaML.ml diff --git a/akaML/Makefile b/akaML/Makefile new file mode 100644 index 00000000..0a42f400 --- /dev/null +++ b/akaML/Makefile @@ -0,0 +1,34 @@ +.PHONY: all test fmt lint clean + +all: + dune build + +test: + dune test + +clean: + @$(RM) -r _build _coverage + +fmt: + dune build @fmt --auto-promote + +lint: + dune build @lint --force + +release: + dune build --profile=release + dune runtest --profile=release + +TEST_COV_D = /tmp/cov +COVERAGE_OPTS = --coverage-path $(TEST_COV_D) --expect lib/ + +.PHONY: test_coverage coverage +test_coverage: coverage +coverage: + $(RM) -r $(TEST_COV_D) + mkdir -p $(TEST_COV_D) + BISECT_FILE=$(TEST_COV_D)/langauge dune runtest --no-print-directory \ + --instrument-with bisect_ppx --force + bisect-ppx-report html $(COVERAGE_OPTS) + bisect-ppx-report summary $(COVERAGE_OPTS) + @echo "Use 'xdg-open _coverage/index.html' to see coverage report" diff --git a/akaML/akaML.opam b/akaML/akaML.opam new file mode 100644 index 00000000..822c9a93 --- /dev/null +++ b/akaML/akaML.opam @@ -0,0 +1,47 @@ +# This file is generated by dune, edit dune-project instead +opam-version: "2.0" +synopsis: "A miniML compiler" +maintainer: [ + "Maxim Rodionov " + "Vladimir Zaikin " +] +authors: [ + "Maxim Rodionov " + "Vladimir Zaikin " +] +license: "LGPL-3.0-or-later WITH OCaml-LGPL-linking-exception" +homepage: "https://github.com/Friend-zva/akaML" +bug-reports: "https://github.com/Friend-zva/akaML/issues" +depends: [ + "dune" {>= "3.19" & = "3.19.1"} + "base" + "angstrom" {= "0.16.0"} + "qcheck-core" + "ppx_inline_test" {with-test} + "ppx_deriving" + "ppx_deriving_qcheck" {= "0.6"} + "ppx_expect" + "bisect_ppx" + "odoc" {with-doc} +] +build: [ + ["dune" "subst"] {dev} + [ + "dune" + "build" + "-p" + name + "-j" + jobs + "@install" + "@runtest" {with-test} + "@doc" {with-doc} + ] +] +dev-repo: "git+https://github.com/Friend-zva/akaML.git" +x-maintenance-intent: ["(latest)"] +pin-depends: [ + ["ppx_deriving_qcheck.0.6" "git+https://github.com/c-cube/qcheck.git#42429bf06ba12373cad02b1404f50d0ad6238af5"] +] + +# Don't edit '*.opam' file manually. Use 'dune build @install' diff --git a/akaML/akaML.opam.template b/akaML/akaML.opam.template new file mode 100644 index 00000000..12867d46 --- /dev/null +++ b/akaML/akaML.opam.template @@ -0,0 +1,5 @@ +pin-depends: [ + ["ppx_deriving_qcheck.0.6" "git+https://github.com/c-cube/qcheck.git#42429bf06ba12373cad02b1404f50d0ad6238af5"] +] + +# Don't edit '*.opam' file manually. Use 'dune build @install' diff --git a/akaML/bin/akaML.ml b/akaML/bin/akaML.ml new file mode 100644 index 00000000..1ac9d708 --- /dev/null +++ b/akaML/bin/akaML.ml @@ -0,0 +1,109 @@ +[@@@ocaml.text "/*"] + +(** Copyright 2025-2026, Friend-zva, RodionovMaxim05 *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +[@@@ocaml.text "/*"] + +open Stdio + +type opts = + { mutable dump_parsetree : bool + ; mutable inference : bool + ; mutable input_file : string option + ; mutable output_file : string option + } + +let pp_global_error ppf = function + | #Inferencer.error as e -> Inferencer.pp_error ppf e +;; + +let compiler dump_parsetree inference input_source output_file = + let run text env_infer out_channel = + let ast = Parser.parse text in + match ast with + | Error error -> + Out_channel.output_string out_channel (Format.asprintf "Parsing error: %s\n" error); + env_infer + | Ok ast -> + if dump_parsetree + then ( + Out_channel.output_string out_channel (Ast.show_structure ast ^ "\n"); + env_infer) + else ( + match Inferencer.run_inferencer env_infer ast with + | Error e_infer -> + Out_channel.output_string + out_channel + (Format.asprintf "Inferencer error: %a\n" pp_global_error e_infer); + env_infer + | Ok (env_infer, out_infer_list) -> + if inference + then ( + Base.List.iter out_infer_list ~f:(function + | Some id, type' -> + Out_channel.output_string + out_channel + (Format.asprintf "val %s : %a\n" id Pprinter.pp_core_type type') + | None, type' -> + Out_channel.output_string + out_channel + (Format.asprintf "- : %a\n" Pprinter.pp_core_type type')); + env_infer) + else ( + let ppf = Format.formatter_of_out_channel out_channel in + Format.fprintf ppf "%a\n%!" RiscV.Codegen.gen_structure ast; + env_infer)) + in + let env_infer = Inferencer.env_with_print_funs in + match input_source with + | Some file_name -> + let text = In_channel.read_all file_name |> String.trim in + (match output_file with + | Some out_name -> + Out_channel.with_file out_name ~f:(fun oc -> + let (_ : Inferencer.TypeEnv.t) = run text env_infer oc in + ()) + | None -> + let (_ : Inferencer.TypeEnv.t) = run text env_infer Out_channel.stdout in + ()) + | None -> + let input = In_channel.input_all stdin |> String.trim in + (match output_file with + | Some out_name -> + Out_channel.with_file out_name ~f:(fun oc -> + let (_ : Inferencer.TypeEnv.t) = run input env_infer oc in + ()) + | None -> + let (_ : Inferencer.TypeEnv.t) = run input env_infer Out_channel.stdout in + ()) +;; + +let () = + let options = + { dump_parsetree = false; inference = false; input_file = None; output_file = None } + in + let () = + let open Arg in + parse + [ ( "-dparsetree" + , Unit (fun () -> options.dump_parsetree <- true) + , "Dump parse tree, don't evaluate anything" ) + ; ( "-inference" + , Unit (fun () -> options.inference <- true) + , "Inference, don't evaluate anything" ) + ; ( "-fromfile" + , String (fun filename -> options.input_file <- Some filename) + , "Read code from the file" ) + ; ( "-o" + , String (fun filename -> options.output_file <- Some filename) + , "Write code to the file" ) + ] + (fun _ -> + Format.eprintf "Positional arguments are not supported\n"; + exit 1) + "Compiler for custom language" + in + compiler options.dump_parsetree options.inference options.input_file options.output_file +;; diff --git a/akaML/bin/akaML.mli b/akaML/bin/akaML.mli new file mode 100644 index 00000000..a460e7de --- /dev/null +++ b/akaML/bin/akaML.mli @@ -0,0 +1,7 @@ +[@@@ocaml.text "/*"] + +(** Copyright 2025-2026, Friend-zva, RodionovMaxim05 *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +[@@@ocaml.text "/*"] diff --git a/akaML/bin/dune b/akaML/bin/dune new file mode 100644 index 00000000..98f1544d --- /dev/null +++ b/akaML/bin/dune @@ -0,0 +1,3 @@ +(executable + (name akaML) + (libraries stdio base Parser Inferencer Pprinter RiscV)) diff --git a/akaML/dune b/akaML/dune new file mode 100644 index 00000000..e35ebd01 --- /dev/null +++ b/akaML/dune @@ -0,0 +1,7 @@ +(env + (dev + (flags + (:standard -warn-error -A -w -3-9-32-34-58))) + (release + (flags + (:standard -warn-error -A)))) diff --git a/akaML/dune-project b/akaML/dune-project new file mode 100644 index 00000000..08085b70 --- /dev/null +++ b/akaML/dune-project @@ -0,0 +1,37 @@ +(lang dune 3.19) + +(name akaML) + +(generate_opam_files true) + +(source + (github Friend-zva/akaML)) + +(authors + "Maxim Rodionov " + "Vladimir Zaikin ") + +(maintainers + "Maxim Rodionov " + "Vladimir Zaikin ") + +(license "LGPL-3.0-or-later WITH OCaml-LGPL-linking-exception") + +(package + (name akaML) + (synopsis "A miniML compiler") + (depends + (dune + (= "3.19.1")) + base + (angstrom + (= "0.16.0")) + qcheck-core + (ppx_inline_test :with-test) + ppx_deriving + (ppx_deriving_qcheck + (= "0.6")) + ppx_expect + bisect_ppx)) + +; See the complete stanza docs at https://dune.readthedocs.io/en/stable/reference/dune-project/index.html diff --git a/akaML/lib/ast/ast.ml b/akaML/lib/ast/ast.ml new file mode 100644 index 00000000..a171da78 --- /dev/null +++ b/akaML/lib/ast/ast.ml @@ -0,0 +1,264 @@ +[@@@ocaml.text "/*"] + +(** Copyright 2025-2026, Friend-zva, RodionovMaxim05 *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +[@@@ocaml.text "/*"] + +open QCheck.Gen + +let coef = 20 (* For the generator's speed. *) +let min_range = int_range 0 10 (* For the generator's speed. *) +let gen_string gen = string_size min_range ~gen +let gen_list gen = list_size min_range gen + +type 'a list_ = ('a list[@gen gen_list gen_a]) +[@@deriving show { with_path = false }, qcheck] + +let gen_char = + (* Exception quotation marks and backslash. *) + oneof [ return '!'; char_range '#' '&'; char_range '(' '['; char_range ']' '~' ] +;; + +let un_op_list = [ "~-" ] +let bin_op_list = [ "*"; "/"; "+"; "-"; ">="; "<="; "<>"; "="; ">"; "<"; "&&"; "||" ] +let is_unary_minus op = op = "~-" + +let is_keyword = function + | "and" + | "else" + | "false" + | "fun" + | "if" + | "in" + | "let" + | "function" + | "match" + | "rec" + | "then" + | "true" + | "with" + | "Some" + | "None" -> true + | _ -> false +;; + +let gen_ident = + let gen_id = + map2 + (fun fst_char rest_str -> + match Base.Char.to_string fst_char ^ rest_str with + | "_" -> "id" + | id -> id) + (oneof [ char_range 'a' 'z'; return '_' ]) + (gen_string + (oneof + [ char_range '0' '9' + ; char_range 'A' 'Z' + ; char_range 'a' 'z' + ; return '_' + ; return '\'' + ])) + in + gen_id >>= fun id -> if is_keyword id then gen_id else return id +;; + +type ident = (string[@gen gen_ident]) [@@deriving show { with_path = false }, qcheck] + +type rec_flag = + | Recursive + | Nonrecursive +[@@deriving show { with_path = false }, qcheck] + +type constant = + | Const_integer of (int[@gen nat]) + | Const_char of (char[@gen gen_char]) + | Const_string of (string[@gen gen_string gen_char]) +[@@deriving show { with_path = false }, qcheck] + +let gen_type_var = + let gen_type_var = + map3 + (fun fst_char snd_char rest_str -> + Printf.sprintf "%c%c%s" fst_char snd_char rest_str) + (oneof [ char_range 'a' 'z' ]) + (oneof [ char_range '0' '9'; char_range 'A' 'Z'; char_range 'a' 'z'; return '_' ]) + (gen_string + (oneof + [ char_range '0' '9' + ; char_range 'A' 'Z' + ; char_range 'a' 'z' + ; return '_' + ; return '\'' + ])) + in + gen_type_var + >>= fun type_var -> if is_keyword type_var then gen_type_var else return ("'" ^ type_var) +;; + +type core_type = + | Type_unit + | Type_char + | Type_int + | Type_string + | Type_bool + | Type_option of (core_type[@gen gen_core_type_sized (n / coef)]) + | Type_var of (ident[@gen gen_type_var]) + | Type_list of (core_type[@gen gen_core_type_sized (n / coef)]) + | Type_tuple of + (core_type[@gen gen_core_type_sized (n / coef)]) + * (core_type[@gen gen_core_type_sized (n / coef)]) + * (core_type[@gen gen_core_type_sized (n / coef)]) list_ + | Type_arrow of + (core_type[@gen gen_core_type_sized (n / coef)]) + * (core_type[@gen gen_core_type_sized (n / coef)]) +[@@deriving show { with_path = false }, qcheck] + +let gen_construct gen n tuple construct = + oneof + [ return ("()", None) + ; return ("true", None) + ; return ("false", None) + ; return ("None", None) + ; map (fun i -> "Some", Some i) (gen (n / coef)) + ; (let rec gen_list n = + if n = 0 + then return ("[]", None) + else ( + let element = gen 0 in + let tail = gen_list (n / coef) in + map2 (fun e t -> "::", Some (tuple (e, construct t, []))) element tail) + in + gen_list n) + ] +;; + +type pattern = + | Pat_any + | Pat_var of ident + | Pat_constant of constant + | Pat_tuple of + (pattern[@gen gen_pattern_sized (n / coef)]) + * (pattern[@gen gen_pattern_sized (n / coef)]) + * (pattern[@gen gen_pattern_sized (n / coef)]) list_ + | Pat_construct of + ((ident * pattern option) + [@gen + gen_construct + gen_pattern_sized + n + (fun (fst_pat, snd_pat, pat_list) -> Pat_tuple (fst_pat, snd_pat, pat_list)) + (fun (id, pat_opt) -> Pat_construct (id, pat_opt))]) + | Pat_constraint of (pattern[@gen gen_pattern_sized (n / coef)]) * core_type +[@@deriving show { with_path = false }, qcheck] + +type 'exp value_binding = + { pat : pattern + ; exp : 'exp + } +[@@deriving show { with_path = false }, qcheck] + +type 'exp case = + { left : pattern + ; right : 'exp + } +[@@deriving show { with_path = false }, qcheck] + +module Expression = struct + let gen_value_binding gen n fix_exp_fun = + oneof + [ map2 (fun var exp -> { pat = Pat_var var; exp }) gen_ident (gen (n / coef)) + ; map3 + (fun id type' exp -> { pat = Pat_constraint (Pat_var id, type'); exp }) + gen_ident + gen_core_type + (gen (n / coef)) + ; map2 (fun pat exp -> { pat; exp = fix_exp_fun exp }) gen_pattern (gen (n / coef)) + ] + ;; + + let gen_exp_apply gen n exp_ident exp_apply = + oneof + [ map2 (fun id arg -> exp_ident id, arg) gen_ident (gen (n / coef)) + ; map2 + (fun opr opn -> opr, opn) + (oneofl (List.map exp_ident un_op_list)) + (gen (n / coef)) + ; map3 + (fun opr opn1 opn2 -> exp_apply (opr, opn1), opn2) + (oneofl (List.map exp_ident bin_op_list)) + (gen (n / coef)) + (gen (n / coef)) + ] + ;; + + type value_binding_exp = + (t value_binding + [@gen + gen_value_binding + gen_sized + n + (let rec fix_exp_fun = function + | Exp_fun (_, _, exp) -> fix_exp_fun exp + | Exp_function ({ left = _; right = exp }, _) -> fix_exp_fun exp + | Exp_constraint (exp, type') -> Exp_constraint (fix_exp_fun exp, type') + | exp -> exp + in + fix_exp_fun)]) + + and case_exp = + (t case + [@gen map2 (fun left right -> { left; right }) gen_pattern (gen_sized (n / coef))]) + + and t = + | Exp_ident of ident + | Exp_constant of constant + | Exp_let of + rec_flag + * value_binding_exp + * value_binding_exp list_ + * (t[@gen gen_sized (n / coef)]) + | Exp_fun of pattern * pattern list_ * (t[@gen gen_sized (n / coef)]) + | Exp_apply of + ((t * t) + [@gen + gen_exp_apply + gen_sized + n + (fun id -> Exp_ident id) + (fun (opn1, opn2) -> Exp_apply (opn1, opn2))]) + | Exp_function of case_exp * case_exp list_ + | Exp_match of (t[@gen gen_sized (n / coef)]) * case_exp * case_exp list_ + | Exp_ifthenelse of + (t[@gen gen_sized (n / coef)]) + * (t[@gen gen_sized (n / coef)]) + * (t[@gen gen_sized (n / coef)]) option + | Exp_tuple of + (t[@gen gen_sized (n / coef)]) + * (t[@gen gen_sized (n / coef)]) + * (t[@gen gen_sized (n / coef)]) list_ + | Exp_construct of + ((ident * t option) + [@gen + gen_construct + gen_sized + n + (fun (fst_exp, snd_exp, exp_list) -> Exp_tuple (fst_exp, snd_exp, exp_list)) + (fun (id, exp_opt) -> Exp_construct (id, exp_opt))]) + | Exp_sequence of (t[@gen gen_sized (n / coef)]) * (t[@gen gen_sized (n / coef)]) + | Exp_constraint of (t[@gen gen_sized (n / coef)]) * core_type + [@@deriving show { with_path = false }, qcheck] +end + +let show_value_binding = Expression.show_value_binding_exp +let show_case = Expression.show_case_exp +let show_expression = Expression.show + +type structure_item = + | Struct_eval of Expression.t + | Struct_value of + rec_flag * Expression.value_binding_exp * Expression.value_binding_exp list_ +[@@deriving show { with_path = false }, qcheck] + +type structure = structure_item list_ [@@deriving show { with_path = false }, qcheck] diff --git a/akaML/lib/ast/ast.mli b/akaML/lib/ast/ast.mli new file mode 100644 index 00000000..b6cc62e4 --- /dev/null +++ b/akaML/lib/ast/ast.mli @@ -0,0 +1,149 @@ +[@@@ocaml.text "/*"] + +(** Copyright 2025-2026, Friend-zva, RodionovMaxim05 *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +[@@@ocaml.text "/*"] + +type 'a list_ = 'a list + +val show_list_ : (Format.formatter -> 'a -> unit) -> 'a list_ -> string + +(** Identifier *) +type ident = string + +val show_ident : ident -> string +val is_unary_minus : string -> bool +val is_keyword : ident -> bool + +type rec_flag = + | Recursive (** Recursive value binding. *) + | Nonrecursive (** Nonrecursive value binding. *) + +val show_rec_flag : rec_flag -> string + +type constant = + | Const_integer of int (** A constant integer such as [1]. *) + | Const_char of char (** A constant character such as ['a']. *) + | Const_string of string (** A constant string such as ["const"]. *) + +val show_constant : constant -> string + +type core_type = + | Type_unit (** The type [unit]. *) + | Type_char (** The type [char]. *) + | Type_int (** The type [int]. *) + | Type_string (** The type [string]. *) + | Type_bool (** The type [bool]. *) + | Type_option of core_type (** [Type_option(T)] represents [T option]. *) + | Type_var of ident (** [Type_var(T)] represents [T](a variable type such as ['a]). *) + | Type_list of core_type (** [Type_list(T)] represents [T list]. *) + | Type_tuple of core_type * core_type * core_type list_ + (** [Type_tuple(T1, T2, [T3; ... ; Tn])] represents [T1 * ... * Tn]. *) + | Type_arrow of core_type * core_type (** [Type_arrow(T1, T2)] represents [T1 -> T2]. *) + +val show_core_type : core_type -> string + +type pattern = + | Pat_any (** [Pat_any] represents [_]. *) + | Pat_var of ident (** [Pat_var(I)] represents [I](a variable pattern such as [x]). *) + | Pat_constant of constant + (** [Pat_constant(C)] represents [C](a pattern such as [1], ['a'], ["const"]). *) + | Pat_tuple of pattern * pattern * pattern list_ + (** [Pat_tuple(P1, P2, [P3; ... ; Pn])] represents [(P1, ... , Pn)]. *) + | Pat_construct of (ident * pattern option) + (** [Pat_construct(I, pat)] represents + - [()] when [I] is ["()"] and [pat] is [None], + - [false] when [I] is ["false"] and [pat] is [None], + - [true] when [I] is ["true"] and [pat] is [None], + - [None] when [I] is ["None"] and [pat] is [None], + - [Some P] when [I] is ["Some"] and [pat] is [Some P], + - [[]] when [I] is ["[]"] and [pat] is [None], + - [[ P ]] when [I] is ["::"] and [pat] is + [Some (Pat_tuple(P, Pat_construct("[]", None), []))], + - [[ P1; P2; ... ]] when [I] is ["::"] and [pat] is + [Some (Pat_tuple(P1, Pat_construct("::", Some (Pat_tuple(P2, Pat_construct("::", ...), []))), []))]. *) + | Pat_constraint of pattern * core_type + (** [Pat_constraint(P, T)] represents [P : T]. *) + +val show_pattern : pattern -> string + +(** [{P; E}] represents [let P = E]. *) +type 'exp value_binding = + { pat : pattern + ; exp : 'exp + } + +(** [{P; E}] represents [P -> E]. *) +type 'exp case = + { left : pattern + ; right : 'exp + } + +module Expression : sig + type value_binding_exp = t value_binding + and case_exp = t case + + and t = + | Exp_ident of ident (** [Exp_ident(I)] represents [I](an identifier such as [x]). *) + | Exp_constant of constant + (** [Exp_constant(C)] represents [C](an expression such as [1], ['a'], ["const"]). *) + | Exp_let of rec_flag * value_binding_exp * value_binding_exp list_ * t + (** [Exp_let(flag, {P1; E1}, [{P2; E2}; ... ; {Pn; En}], E)] represents + - [let P1 = E1 and P2 = E2 and ... and Pn = En in E] when [flag] is [Nonrecursive], + - [let rec P1 = E1 and P2 = E2 and ... and Pn = En in E] when [flag] is [Recursive]. *) + | Exp_fun of pattern * pattern list_ * t + (** [Exp_fun(P1, [P2; ... ; Pn], E)] represents [fun P1 ... Pn -> E] *) + | Exp_apply of (t * t) + (** [Exp_apply(Exp_ident(I), E)] represents [I E] when [I] from [un_op_list], + [Exp_apply(Exp_ident(I), Exp_apply(E1, E2))] represents [E1 I E2] when [I] from [bin_op_list], + [Exp_apply(Exp_ident(I), E)] represents [I E], + [Exp_apply(Exp_ident(I), Exp_apply(E1, Exp_apply(E2, ...)))] represents [I E1 E2 ... ]. *) + | Exp_function of case_exp * case_exp list_ + (** [Exp_function({P1; E1}, [{P2; E2}; ... ; {Pn; En}])] represents + [function P1 -> E1 | P2 -> E2 | ... | Pn -> En]. *) + | Exp_match of t * case_exp * case_exp list_ + (** [Exp_match(E, {P1; E1}, [{P2; E2}; ... ; {Pn; En}])] represents + [match E with P1 -> E1 | P2 -> E2 | ... | Pn -> En]. *) + | Exp_ifthenelse of t * t * t option + (** [Exp_ifthenelse(E1, E2, opt)] represents + - [if E1 then E2] when [opt] is [None], + - [if E1 then E2 else E3] when [opt] is [Some E3]. *) + | Exp_tuple of t * t * t list_ + (** [Exp_tuple(E1, E2, [E3; ... ; En])] represents [(E1, ... , En)]. *) + | Exp_construct of (ident * t option) + (** [Exp_construct(I, exp)] represents + - [()] when [I] is ["()"] and [exp] is [None], + - [false] when [I] is ["false"] and [exp] is [None], + - [true] when [I] is ["true"] and [exp] is [None], + - [None] when [I] is ["None"] and [exp] is [None], + - [Some E] when [I] is ["Some"] and [exp] is [Some E], + - [[]] when [I] is ["[]"] and [exp] is [None], + - [[ E ]] when [I] is ["::"] and [exp] is + [Some (Exp_tuple(E, Exp_construct("[]", None), []))], + - [[ E1; E2; ... ]] when [I] is ["::"] and [exp] is + [Some (Exp_tuple(E1, Exp_construct("::", Some (Exp_tuple(E2, Exp_construct("::", ...), []))), []))]. *) + | Exp_sequence of t * t (** [Exp_sequence(E1, E2)] represents [E1; E2]. *) + | Exp_constraint of t * core_type (** [Exp_constraint(E, T)] represents [(E : T)]. *) +end + +val show_value_binding : Expression.value_binding_exp -> string +val show_case : Expression.case_exp -> string +val show_expression : Expression.t -> ident + +type structure_item = + | Struct_eval of Expression.t (** [Struct_eval(E)] represents [E]. *) + | Struct_value of + rec_flag * Expression.value_binding_exp * Expression.value_binding_exp list_ + (** [Struct_value(flag, {P1; E1}, [{P2; E2}; ... ; {Pn; En}], E)] represents + - [let P1 = E1 and P2 = E2 and ... and Pn = En in E] when [flag] is [Nonrecursive], + - [let rec P1 = E1 and P2 = E2 and ... and Pn = En in E] when [flag] is [Recursive]. *) + +val show_structure_item : structure_item -> string + +type structure = structure_item list_ + +val show_structure : structure -> string +val gen_structure : structure QCheck.Gen.t +val arb_structure : structure QCheck.arbitrary diff --git a/akaML/lib/ast/dune b/akaML/lib/ast/dune new file mode 100644 index 00000000..42b42549 --- /dev/null +++ b/akaML/lib/ast/dune @@ -0,0 +1,8 @@ +(library + (name Ast) + (public_name akaML.Ast) + (libraries base qcheck-core qcheck-core.runner) + (preprocess + (pps ppx_deriving.show ppx_deriving_qcheck)) + (instrumentation + (backend bisect_ppx))) diff --git a/akaML/lib/inferencer/dune b/akaML/lib/inferencer/dune new file mode 100644 index 00000000..4fd598f8 --- /dev/null +++ b/akaML/lib/inferencer/dune @@ -0,0 +1,6 @@ +(library + (name Inferencer) + (public_name akaML.Inferencer) + (libraries Ast Pprinter) + (instrumentation + (backend bisect_ppx))) diff --git a/akaML/lib/inferencer/inferencer.ml b/akaML/lib/inferencer/inferencer.ml new file mode 100644 index 00000000..b32e988f --- /dev/null +++ b/akaML/lib/inferencer/inferencer.ml @@ -0,0 +1,842 @@ +[@@@ocaml.text "/*"] + +(** Copyright 2025-2026, Friend-zva, RodionovMaxim05 *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +[@@@ocaml.text "/*"] + +open Ast + +type error = + [ `No_variable_rec + | `No_arg_rec + | `Bound_several_times of string + | `Occurs_check of string * core_type + | `No_variable of string + | `Unification_failed of core_type * core_type + ] + +let pp_error ppf : error -> _ = function + | `No_variable_rec -> + Format.fprintf ppf "Only variables are allowed as left-hand side of `let rec'" + | `No_arg_rec -> + Format.fprintf + ppf + "This kind of expression is not allowed as right-hand side of `let rec'" + | `Bound_several_times id -> + Format.fprintf ppf "Variable '%s' is bound several times in the matching" id + | `Occurs_check (id, ty) -> + Format.fprintf + ppf + "Occurs check failed: the type variable %s occurs inside %a" + id + Pprinter.pp_core_type + ty + | `No_variable id -> Format.fprintf ppf "Undefined variable '%s'" id + | `Unification_failed (l, r) -> + Format.fprintf + ppf + "Unification failed on %a and %a" + Pprinter.pp_core_type + l + Pprinter.pp_core_type + r +;; + +module State = struct + open Base + + type 'a t = int -> int * ('a, error) Result.t + + let return x state = state, Result.return x + let fail e state = state, Result.fail e + + let ( >>= ) (monad : 'a t) (f : 'a -> 'b t) : 'b t = + fun state -> + match monad state with + | state, Result.Ok result -> f result state + | state, Result.Error e -> fail e state + ;; + + module Syntax = struct + let ( let* ) = ( >>= ) + end + + let ( >>| ) (monad : 'a t) (f : 'a -> 'b) : 'b t = + fun state -> + match monad state with + | state, Result.Ok result -> return (f result) state + | state, Result.Error e -> fail e state + ;; + + module RList = struct + let fold_left xs ~init ~f = + List.fold_left xs ~init ~f:(fun acc x -> + let open Syntax in + let* acc = acc in + f acc x) + ;; + + let fold_right xs ~init ~f = + List.fold_right xs ~init ~f:(fun x acc -> + let open Syntax in + let* acc = acc in + f x acc) + ;; + end + + module RMap = struct + let fold map ~init ~f = + Map.fold map ~init ~f:(fun ~key ~data acc -> + let open Syntax in + let* acc = acc in + f key data acc) + ;; + end + + let fresh state = state + 1, Result.Ok state + let run monad = snd (monad 0) +end + +module VarSet = struct + include Set.Make (String) + + let pp ppf set = + Format.fprintf ppf "[ "; + iter (Format.fprintf ppf "%s; ") set; + Format.fprintf ppf "]" + ;; +end + +type scheme = Scheme of VarSet.t * core_type + +let pp_scheme ppf = function + | Scheme (varset, ty) -> + Format.fprintf ppf "{ %a : %a }" VarSet.pp varset Pprinter.pp_core_type ty +;; + +module Type = struct + let rec occurs_in var = function + | Type_option ty | Type_list ty -> occurs_in var ty + | Type_var name -> name = var + | Type_tuple (fst_ty, snd_ty, ty_list) -> + List.exists (occurs_in var) (fst_ty :: snd_ty :: ty_list) + | Type_arrow (l, r) -> occurs_in var l || occurs_in var r + | _ -> false + ;; + + let free_vars = + let rec helper acc = function + | Type_option ty | Type_list ty -> helper acc ty + | Type_var name -> VarSet.add name acc + | Type_tuple (fst_ty, snd_ty, ty_list) -> + List.fold_left helper acc (fst_ty :: snd_ty :: ty_list) + | Type_arrow (l, r) -> helper (helper acc l) r + | _ -> acc + in + helper VarSet.empty + ;; +end + +module Subst = struct + open State + open State.Syntax + open Base + + let empty = Map.empty (module String) + let singleton1 = Map.singleton (module String) + + let singleton key value = + if Type.occurs_in key value + then fail (`Occurs_check (key, value)) + else return (Map.singleton (module String) key value) + ;; + + let remove = Map.remove + + let apply sub = + let rec helper = function + | Type_var name as ty -> + (match Map.find sub name with + | Some name -> name + | None -> ty) + | Type_option ty -> Type_option (helper ty) + | Type_list ty -> Type_list (helper ty) + | Type_tuple (fst_ty, snd_ty, ty_list) -> + Type_tuple (helper fst_ty, helper snd_ty, List.map ty_list ~f:helper) + | Type_arrow (l, r) -> Type_arrow (helper l, helper r) + | ty -> ty + in + helper + ;; + + let rec unify l r = + match l, r with + | Type_unit, Type_unit + | Type_int, Type_int + | Type_char, Type_char + | Type_string, Type_string + | Type_bool, Type_bool -> return empty + | Type_var l, Type_var r when String.equal l r -> return empty + | Type_var name, ty | ty, Type_var name -> singleton name ty + | Type_list ty1, Type_list ty2 | Type_option ty1, Type_option ty2 -> unify ty1 ty2 + | Type_tuple (fst1, snd1, list1), Type_tuple (fst2, snd2, list2) -> + (match + List.fold2 + (fst1 :: snd1 :: list1) + (fst2 :: snd2 :: list2) + ~init:(return empty) + ~f:(fun acc ty1 ty2 -> + let* sub_acc = acc in + let* unified_sub = unify (apply sub_acc ty1) (apply sub_acc ty2) in + compose sub_acc unified_sub) + with + | Ok res -> res + | _ -> fail (`Unification_failed (l, r))) + | Type_arrow (l1, r1), Type_arrow (l2, r2) -> + let* sub1 = unify l1 l2 in + let* sub2 = unify (apply sub1 r1) (apply sub1 r2) in + compose sub1 sub2 + | _ -> fail (`Unification_failed (l, r)) + + and extend key value sub = + match Map.find sub key with + | None -> + let value = apply sub value in + let* new_sub = singleton key value in + Map.fold sub ~init:(return new_sub) ~f:(fun ~key ~data acc -> + let* acc = acc in + let new_data = apply new_sub data in + return (Map.update acc key ~f:(fun _ -> new_data))) + | Some existing_value -> + let* new_sub = unify value existing_value in + compose sub new_sub + + and compose sub1 sub2 = RMap.fold sub2 ~init:(return sub1) ~f:extend + + let compose_all sub_list = RList.fold_left sub_list ~init:(return empty) ~f:compose + + let pp ppf sub = + Stdlib.Format.fprintf ppf "Subst:\n"; + Map.iteri sub ~f:(fun ~key:str ~data:ty -> + Stdlib.Format.fprintf ppf "%s <-> %a; " str Pprinter.pp_core_type ty); + Stdlib.Format.fprintf ppf "\n" + ;; +end + +module Scheme = struct + let free_vars (Scheme (bind_set, ty)) = VarSet.diff (Type.free_vars ty) bind_set + + let apply sub (Scheme (bind_set, ty)) = + let new_sub = VarSet.fold (fun key sub -> Subst.remove sub key) bind_set sub in + Scheme (bind_set, Subst.apply new_sub ty) + ;; +end + +module TypeEnv = struct + open Base + + type t = (ident, scheme, String.comparator_witness) Map.t + + let empty = Map.empty (module String) + let extend env key value = Map.update env key ~f:(fun _ -> value) + + let rec extend_with_pattern env_acc pat (Scheme (bind_set, ty) as scheme) = + match pat, ty with + | Pat_var id, _ -> extend env_acc id scheme + | Pat_tuple (fst_pat, snd_pat, pat_list), Type_tuple (fst_ty, snd_ty, ty_list) -> + let env = + List.fold2 + ~init:env_acc + ~f:(fun env pat ty -> extend_with_pattern env pat (Scheme (bind_set, ty))) + (fst_pat :: snd_pat :: pat_list) + (fst_ty :: snd_ty :: ty_list) + in + (match env with + | Ok env -> env + | _ -> env_acc) + | Pat_construct ("::", Some pat), Type_list ty -> + (match pat with + | Pat_tuple (head, tail, []) -> + let env_acc = extend_with_pattern env_acc head (Scheme (bind_set, ty)) in + extend_with_pattern env_acc tail scheme + | _ -> env_acc) + | Pat_construct ("Some", Some pat), Type_option ty -> + extend_with_pattern env_acc pat (Scheme (bind_set, ty)) + | _ -> env_acc + ;; + + let free_vars env = + Map.fold env ~init:VarSet.empty ~f:(fun ~key:_ ~data acc -> + VarSet.union acc (Scheme.free_vars data)) + ;; + + let apply sub env = Map.map env ~f:(Scheme.apply sub) + let find = Map.find + + let find_type_exn env key = + let (Scheme (_, ty)) = Map.find_exn env key in + ty + ;; + + let pp ppf env = + Stdlib.Format.fprintf ppf "TypeEnv:\n"; + Map.iteri env ~f:(fun ~key:str ~data:sch -> + Stdlib.Format.fprintf ppf "%s -> %a; " str pp_scheme sch); + Stdlib.Format.fprintf ppf "\n" + ;; +end + +module Infer = struct + open Ast.Expression + open State + open State.Syntax + + let unify = Subst.unify + let fresh_var = fresh >>| fun n -> Type_var ("'ty" ^ Int.to_string n) + + let instantiate (Scheme (bind_set, ty)) = + VarSet.fold + (fun name ty -> + let* ty = ty in + let* fresh = fresh_var in + let* sub = Subst.singleton name fresh in + return (Subst.apply sub ty)) + bind_set + (return ty) + ;; + + let generalize env ty ~remove_from_env id = + let env = + match remove_from_env, id with + | true, Some id -> Base.Map.remove env id + | _ -> env + in + let free = VarSet.diff (Type.free_vars ty) (TypeEnv.free_vars env) in + let new_free, new_ty, _ = + VarSet.fold + (fun str (temp_free, temp_ty, n) -> + let degree = n / 26 in + let new_str = + (* 97 - is number 'a' in ASCII-table *) + Printf.sprintf + "'%c%s" + (Char.chr (97 + (n mod 26))) + (if degree = 0 then "" else Int.to_string degree) + in + let sub = Subst.singleton1 str (Type_var new_str) in + let new_free = VarSet.add new_str temp_free in + let new_ty = Subst.apply sub temp_ty in + new_free, new_ty, n + 1) + free + (VarSet.empty, ty, 0) + in + Scheme (new_free, new_ty) + ;; + + let lookup_env id env = + match TypeEnv.find env id with + | Some scheme -> + let* ans = instantiate scheme in + return (Subst.empty, ans) + | None -> fail (`No_variable id) + ;; + + let rec infer_pattern env = function + | Pat_any -> + let* fresh = fresh_var in + return (env, fresh) + | Pat_var id -> + let* fresh = fresh_var in + let env = TypeEnv.extend env id (Scheme (VarSet.empty, fresh)) in + return (env, fresh) + | Pat_constant const -> + (match const with + | Const_integer _ -> return (env, Type_int) + | Const_string _ -> return (env, Type_string) + | Const_char _ -> return (env, Type_char)) + | Pat_tuple (fst_pat, snd_pat, pat_list) -> + let* env1, ty1 = infer_pattern env fst_pat in + let* env2, ty2 = infer_pattern env1 snd_pat in + let* env_rest, ty_list = + RList.fold_right + ~f:(fun pat acc -> + let* env_acc, ty_list = return acc in + let* env, ty = infer_pattern env_acc pat in + return (env, ty :: ty_list)) + ~init:(return (env2, [])) + pat_list + in + return (env_rest, Type_tuple (ty1, ty2, ty_list)) + | Pat_construct ("[]", None) -> + let* fresh = fresh_var in + return (env, Type_list fresh) + | Pat_construct ("::", Some (Pat_tuple (head, tail, []))) -> + let* fresh = fresh_var in + let* env, type_of_head = infer_pattern env head in + let* unified_sub = unify type_of_head fresh in + let env = TypeEnv.apply unified_sub env in + let rec infer_tail env sub_acc cur_pat = + let helper required_ty pat = + let* env, type_of_pat = infer_pattern env pat in + let* unified_sub = unify required_ty type_of_pat in + return (TypeEnv.apply unified_sub env, unified_sub) + in + match cur_pat with + | Pat_construct (_, None) -> return (env, sub_acc) + | Pat_construct (_, Some (Pat_tuple (next_head, next_tail, []))) -> + let* env, sub = helper fresh next_head in + let* env, final_sub = infer_tail env (sub :: sub_acc) next_tail in + return (env, final_sub) + | _ -> + let* env, sub = helper (Type_list fresh) cur_pat in + return (env, sub :: sub_acc) + in + let* env, sub_list = infer_tail env [ unified_sub ] tail in + let* final_sub = Subst.compose_all sub_list in + return (TypeEnv.apply final_sub env, Subst.apply final_sub (Type_list fresh)) + | Pat_construct (id, None) when id = "true" || id = "false" -> return (env, Type_bool) + | Pat_construct ("()", None) -> return (env, Type_unit) + | Pat_construct ("Some", Some pat) -> + let* env, ty = infer_pattern env pat in + return (env, Type_option ty) + | Pat_construct _ -> + let* fresh = fresh_var in + return (env, fresh) + | Pat_constraint (pat, c_ty) -> + let* env, ty = infer_pattern env pat in + let* unified_sub = unify ty c_ty in + return (TypeEnv.apply unified_sub env, Subst.apply unified_sub ty) + ;; + + let extend_env_with_bind_names env value_binding_list = + RList.fold_right + value_binding_list + ~init:(return (env, [])) + ~f:(fun let_bind acc -> + match let_bind with + | { pat = Pat_var id | Pat_constraint (Pat_var id, _); _ } -> + let* env, fresh_acc = return acc in + let* fresh = fresh_var in + let env = TypeEnv.extend env id (Scheme (VarSet.empty, fresh)) in + return (env, fresh :: fresh_acc) + | _ -> fail `No_variable_rec) + ;; + + let rec extract_names_from_pat func acc = function + | Pat_var id -> func acc id + | Pat_tuple (fst_pat, snd_pat, pat_list) -> + RList.fold_left + (fst_pat :: snd_pat :: pat_list) + ~init:(return acc) + ~f:(extract_names_from_pat func) + | Pat_construct ("::", Some exp) -> + (match exp with + | Pat_tuple (head, tail, []) -> + let* acc = extract_names_from_pat func acc head in + extract_names_from_pat func acc tail + | _ -> return acc) + | Pat_construct ("Some", Some pat) -> extract_names_from_pat func acc pat + | Pat_constraint (pat, _) -> extract_names_from_pat func acc pat + | _ -> return acc + ;; + + module StringSet = struct + include Set.Make (String) + + let add_id set value = + if mem value set then fail (`Bound_several_times value) else return (add value set) + ;; + end + + let check_names_from_let_binds = + RList.fold_left ~init:(return StringSet.empty) ~f:(fun set_acc { pat; _ } -> + extract_names_from_pat StringSet.add_id set_acc pat) + ;; + + let rec infer_expression env = function + | Exp_ident id -> lookup_env id env + | Exp_constant const -> + (match const with + | Const_integer _ -> return (Subst.empty, Type_int) + | Const_string _ -> return (Subst.empty, Type_string) + | Const_char _ -> return (Subst.empty, Type_char)) + | Exp_let (Nonrecursive, value_binding, value_binding_list, exp) -> + let* _ = check_names_from_let_binds (value_binding :: value_binding_list) in + let* env, sub1 = + infer_value_binding_list env Subst.empty (value_binding :: value_binding_list) + in + let* sub2, ty2 = infer_expression env exp in + let* composed_sub = Subst.compose sub2 sub1 in + return (composed_sub, ty2) + | Exp_let (Recursive, value_binding, value_binding_list, exp) -> + let* env, fresh_acc = + extend_env_with_bind_names env (value_binding :: value_binding_list) + in + let* env, sub1 = + infer_rec_value_binding_list + env + fresh_acc + Subst.empty + (value_binding :: value_binding_list) + in + let* sub2, ty2 = infer_expression env exp in + let* composed_sub = Subst.compose sub2 sub1 in + return (composed_sub, ty2) + | Exp_fun (pat, pat_list, exp) -> + let* env, ty1 = infer_pattern env pat in + let* sub, ty2 = + match pat_list with + | [] -> infer_expression env exp + | hd :: tl -> infer_expression env (Exp_fun (hd, tl, exp)) + in + return (sub, Type_arrow (Subst.apply sub ty1, ty2)) + | Exp_apply (exp1, exp2) -> + (match exp1 with + | Exp_ident opr when is_unary_minus opr -> + let* sub, ty = infer_expression env exp2 in + let* unified_sub = Subst.unify ty Type_int in + let* composed_sub = Subst.compose sub unified_sub in + return (composed_sub, Type_int) + | _ -> + let* sub1, ty1 = infer_expression env exp1 in + let* sub2, ty2 = infer_expression (TypeEnv.apply sub1 env) exp2 in + let* fresh = fresh_var in + let* sub3 = unify (Subst.apply sub2 ty1) (Type_arrow (ty2, fresh)) in + let* composed_sub = Subst.compose_all [ sub3; sub2; sub1 ] in + let final_ty = Subst.apply composed_sub fresh in + return (composed_sub, final_ty)) + | Exp_function (case, case_list) -> + let* fresh_for_matching = fresh_var in + let* fresh_for_result = fresh_var in + infer_match_exp + env + ~with_exp:false + Subst.empty + fresh_for_matching + fresh_for_result + (case :: case_list) + | Exp_match (exp, case, case_list) -> + let* exp_sub, exp_ty = infer_expression env exp in + let env = TypeEnv.apply exp_sub env in + let* fresh_for_result = fresh_var in + infer_match_exp + env + ~with_exp:true + exp_sub + exp_ty + fresh_for_result + (case :: case_list) + | Exp_tuple (fst_exp, snd_exp, exp_list) -> + let* sub1, ty1 = infer_expression env fst_exp in + let* sub2, ty2 = infer_expression (TypeEnv.apply sub1 env) snd_exp in + let env = TypeEnv.apply sub2 env in + let* sub_rest, ty_list = + RList.fold_right + ~f:(fun exp acc -> + let* sub_acc, ty_list = return acc in + let* sub, ty = infer_expression (TypeEnv.apply sub_acc env) exp in + let* sub_acc = Subst.compose sub_acc sub in + return (sub_acc, ty :: ty_list)) + ~init:(return (Subst.empty, [])) + exp_list + in + let* sub_result = Subst.compose_all [ sub1; sub2; sub_rest ] in + let ty1 = Subst.apply sub_result ty1 in + let ty2 = Subst.apply sub_result ty2 in + let ty_list = List.map (fun ty -> Subst.apply sub_result ty) ty_list in + return (sub_result, Type_tuple (ty1, ty2, ty_list)) + | Exp_construct ("[]", None) -> + let* fresh = fresh_var in + return (Subst.empty, Type_list fresh) + | Exp_construct ("::", Some (Exp_tuple (head, tail, []))) -> + let* fresh = fresh_var in + let* sub, ty = infer_expression env head in + let* unified_sub = unify fresh ty in + let* sub = Subst.compose sub unified_sub in + let rec infer_tail sub_acc cur_exp = + let helper required_ty exp = + let* sub_of_exp, type_of_exp = infer_expression env exp in + let* unified_sub = unify required_ty type_of_exp in + let* sub = Subst.compose sub_of_exp unified_sub in + return sub + in + match cur_exp with + | Exp_construct (_, None) -> return sub_acc + | Exp_construct (_, Some (Exp_tuple (next_head, next_tail, []))) -> + let* sub = helper fresh next_head in + let* final_sub = infer_tail (sub :: sub_acc) next_tail in + return final_sub + | _ -> + let* sub = helper (Type_list fresh) cur_exp in + return (sub :: sub_acc) + in + let* sub_list = infer_tail [ sub ] tail in + let* final_sub = Subst.compose_all sub_list in + return (final_sub, Subst.apply final_sub (Type_list fresh)) + | Exp_construct (id, None) when id = "true" || id = "false" -> + return (Subst.empty, Type_bool) + | Exp_construct ("()", None) -> return (Subst.empty, Type_unit) + | Exp_construct ("Some", Some pat) -> + let* sub, ty = infer_expression env pat in + return (sub, Type_option ty) + | Exp_construct _ -> + let* fresh = fresh_var in + return (Subst.empty, fresh) + | Exp_ifthenelse (if_exp, then_exp, Some else_exp) -> + let* sub1, ty1 = infer_expression env if_exp in + let* sub2, ty2 = infer_expression (TypeEnv.apply sub1 env) then_exp in + let* sub3, ty3 = infer_expression (TypeEnv.apply sub2 env) else_exp in + let* sub4 = unify ty1 Type_bool in + let* sub5 = unify ty2 ty3 in + let* final_sub = Subst.compose_all [ sub5; sub4; sub3; sub2; sub1 ] in + return (final_sub, Subst.apply final_sub ty2) + | Exp_ifthenelse (if_exp, then_exp, None) -> + let* sub1, ty1 = infer_expression env if_exp in + let* sub2, ty2 = infer_expression (TypeEnv.apply sub1 env) then_exp in + let* sub3 = unify ty1 Type_bool in + let* sub4 = unify ty2 Type_unit in + let* final_sub = Subst.compose_all [ sub4; sub3; sub2; sub1 ] in + return (final_sub, Subst.apply final_sub ty2) + | Exp_sequence (exp1, exp2) -> + let* sub1, ty1 = infer_expression env exp1 in + let* unified_sub = unify ty1 Type_unit in + let* sub2, ty2 = infer_expression (TypeEnv.apply sub1 env) exp2 in + let* final_sub = Subst.compose_all [ unified_sub; sub2; sub1 ] in + return (final_sub, ty2) + | Exp_constraint (exp, c_ty) -> + let* sub, ty = infer_expression env exp in + let* unified_sub = unify ty c_ty in + let* final_sub = Subst.compose unified_sub sub in + return (final_sub, Subst.apply unified_sub ty) + + and infer_match_exp env ~with_exp match_exp_sub match_exp_ty result_ty case_list = + let* cases_sub, case_ty = + RList.fold_left + case_list + ~init:(return (match_exp_sub, result_ty)) + ~f:(fun acc { left = pat; right = case_exp } -> + let* sub_acc, ty_acc = return acc in + let* env, pat_sub = + let* env, pat_ty = infer_pattern env pat in + let* unified_sub1 = unify match_exp_ty pat_ty in + let* pat_names = + extract_names_from_pat StringSet.add_id StringSet.empty pat + >>| StringSet.elements + in + if with_exp + then ( + let env = TypeEnv.apply unified_sub1 env in + let generalized_schemes = + Base.List.map pat_names ~f:(fun name -> + let ty = TypeEnv.find_type_exn env name in + let generalized_ty = + generalize env ty ~remove_from_env:true (Some name) + in + name, generalized_ty) + in + let env = + Base.List.fold generalized_schemes ~init:env ~f:(fun env (key, value) -> + TypeEnv.extend env key value) + in + return (env, unified_sub1)) + else return (env, unified_sub1) + in + let* composed_sub1 = Subst.compose sub_acc pat_sub in + let* case_exp_sub, case_exp_ty = + infer_expression (TypeEnv.apply composed_sub1 env) case_exp + in + let* unified_sub2 = unify ty_acc case_exp_ty in + let* composed_sub2 = + Subst.compose_all [ composed_sub1; case_exp_sub; unified_sub2 ] + in + return (composed_sub2, Subst.apply composed_sub2 ty_acc)) + in + let final_ty = + if with_exp + then case_ty + else Type_arrow (Subst.apply cases_sub match_exp_ty, case_ty) + in + return (cases_sub, final_ty) + + and infer_value_binding_list env sub let_binds = + let infer_vb new_sub env ty pat rest = + let* composed_sub = Subst.compose sub new_sub in + let env = TypeEnv.apply composed_sub env in + let generalized_ty = + generalize env (Subst.apply composed_sub ty) ~remove_from_env:false None + in + let* env, pat_ty = infer_pattern env pat in + let env = TypeEnv.extend_with_pattern env pat generalized_ty in + let* unified_sub = unify ty pat_ty in + let* final_sub = Subst.compose composed_sub unified_sub in + let env = TypeEnv.apply final_sub env in + infer_value_binding_list env final_sub rest + in + match let_binds with + | [] -> return (env, sub) + | { pat = Pat_constraint (pat, pat_ty); exp = Exp_fun (e_pat, e_pat_list, exp) } + :: rest -> + let* new_sub, ty = + infer_expression env (Exp_fun (e_pat, e_pat_list, Exp_constraint (exp, pat_ty))) + in + infer_vb new_sub env ty pat rest + | { pat = Pat_constraint (pat, pat_ty); exp = Exp_function _ as exp } :: rest -> + let* new_sub, ty = infer_expression env (Exp_constraint (exp, pat_ty)) in + infer_vb new_sub env ty pat rest + | { pat; exp } :: rest -> + let* new_sub, ty = infer_expression env exp in + infer_vb new_sub env ty pat rest + + and infer_rec_value_binding_list ?(debug = false) env fresh_acc sub let_binds = + let infer_rec_vb new_sub fresh ty id fresh_acc rest ~required_ty = + let* new_sub = + match required_ty with + | Some c_ty -> + let* unified_sub = unify ty c_ty in + Subst.compose unified_sub new_sub + | None -> return new_sub + in + let* unified_sub = unify (Subst.apply new_sub fresh) ty in + let* composed_sub = Subst.compose_all [ new_sub; unified_sub; sub ] in + if debug then Subst.pp Format.std_formatter composed_sub; + let env = TypeEnv.apply composed_sub env in + let generalized_ty = + generalize env (Subst.apply composed_sub fresh) ~remove_from_env:true (Some id) + in + if debug then pp_scheme Format.std_formatter generalized_ty; + let env = TypeEnv.extend env id generalized_ty in + infer_rec_value_binding_list ~debug env fresh_acc composed_sub rest + in + match let_binds, fresh_acc with + | [], _ -> return (env, sub) + | ( { pat = Pat_var id; exp = (Exp_fun _ | Exp_function _) as exp } :: rest + , fresh :: fresh_acc ) -> + let* new_sub, ty = infer_expression env exp in + infer_rec_vb new_sub fresh ty id fresh_acc rest ~required_ty:None + | ( { pat = Pat_constraint (Pat_var id, pat_ty); exp = Exp_fun (pat, pat_list, exp) } + :: rest + , fresh :: fresh_acc ) -> + let* new_sub, ty = + infer_expression env (Exp_fun (pat, pat_list, Exp_constraint (exp, pat_ty))) + in + infer_rec_vb new_sub fresh ty id fresh_acc rest ~required_ty:None + | ( { pat = Pat_constraint (Pat_var id, pat_ty); exp = Exp_function _ as exp } :: rest + , fresh :: fresh_acc ) -> + let* new_sub, ty = infer_expression env (Exp_constraint (exp, pat_ty)) in + infer_rec_vb new_sub fresh ty id fresh_acc rest ~required_ty:None + | { pat = Pat_var id; exp } :: rest, fresh :: fresh_acc -> + let* new_sub, ty = infer_expression env exp in + let update_fresh = Subst.apply new_sub fresh in + if ty = update_fresh + then fail `No_arg_rec + else infer_rec_vb new_sub fresh ty id fresh_acc rest ~required_ty:None + | { pat = Pat_constraint (Pat_var id, pat_ty); exp } :: rest, fresh :: fresh_acc -> + let* new_sub, ty = infer_expression env exp in + let update_fresh = Subst.apply new_sub fresh in + if ty = update_fresh + then fail `No_arg_rec + else infer_rec_vb new_sub fresh ty id fresh_acc rest ~required_ty:(Some pat_ty) + | _ -> fail `No_variable_rec + ;; + + let infer_structure_item ~debug (env, out_list) = + let get_names_from_let_binds env = + RList.fold_left ~init:(return []) ~f:(fun acc { pat; _ } -> + extract_names_from_pat + (fun acc id -> + return (List.rev_append [ Some id, TypeEnv.find_type_exn env id ] acc)) + acc + pat) + in + function + | Struct_eval exp -> + let* _, ty = infer_expression env exp in + return (env, (None, ty) :: out_list) + | Struct_value (Nonrecursive, value_binding, value_binding_list) -> + let value_binding_list = value_binding :: value_binding_list in + let* _ = check_names_from_let_binds value_binding_list in + let* env, _ = infer_value_binding_list env Subst.empty value_binding_list in + let* id_list_rev = get_names_from_let_binds env value_binding_list in + let id_list = List.rev id_list_rev in + if debug then TypeEnv.pp Format.std_formatter env; + return (env, List.rev_append id_list out_list) + | Struct_value (Recursive, value_binding, value_binding_list) -> + let value_binding_list = value_binding :: value_binding_list in + let* env, fresh_acc = extend_env_with_bind_names env value_binding_list in + let* env, _ = + infer_rec_value_binding_list env fresh_acc Subst.empty value_binding_list + in + let* id_list_rev = get_names_from_let_binds env value_binding_list in + let id_list = List.rev id_list_rev in + if debug then TypeEnv.pp Format.std_formatter env; + return (env, List.rev_append id_list out_list) + ;; + + let infer_srtucture ~debug env ast = + let* env, out_list_rev = + RList.fold_left ast ~init:(return (env, [])) ~f:(infer_structure_item ~debug) + in + let out_list = List.rev out_list_rev in + let remove_duplicates = + let fun_equal el1 el2 = + match el1, el2 with + | (Some id1, _), (Some id2, _) -> String.equal id1 id2 + | _ -> false + in + function + | x :: xs when not (Base.List.mem xs x ~equal:fun_equal) -> x :: xs + | _ :: xs -> xs + | [] -> [] + in + return (env, remove_duplicates out_list) + ;; +end + +let empty_env = TypeEnv.empty + +let bin_op_scheme = + Scheme (VarSet.empty, Type_arrow (Type_int, Type_arrow (Type_int, Type_int))) +;; + +let comp_op_scheme = + Scheme + (VarSet.singleton "a", Type_arrow (Type_var "a", Type_arrow (Type_var "a", Type_bool))) +;; + +let log_op_scheme = + Scheme (VarSet.empty, Type_arrow (Type_bool, Type_arrow (Type_bool, Type_bool))) +;; + +let env_with_print_funs = + let initial_env = + [ (* Printing *) + "print_int", Scheme (VarSet.empty, Type_arrow (Type_int, Type_unit)) + ; "print_endline", Scheme (VarSet.empty, Type_arrow (Type_string, Type_unit)) + ; (* Arithmetic *) + "+", bin_op_scheme + ; "-", bin_op_scheme + ; "*", bin_op_scheme + ; "/", bin_op_scheme + ; (* Comparisons *) + "=", comp_op_scheme + ; "<>", comp_op_scheme + ; "<", comp_op_scheme + ; ">", comp_op_scheme + ; "<=", comp_op_scheme + ; ">=", comp_op_scheme + ; (* Logics *) + "&&", log_op_scheme + ; "||", log_op_scheme + ] + in + List.fold_left + (fun env (id, sch) -> TypeEnv.extend env id sch) + TypeEnv.empty + initial_env +;; + +let run_inferencer ?(debug = false) env ast = + State.run (Infer.infer_srtucture ~debug env ast) +;; diff --git a/akaML/lib/inferencer/inferencer.mli b/akaML/lib/inferencer/inferencer.mli new file mode 100644 index 00000000..0a418f70 --- /dev/null +++ b/akaML/lib/inferencer/inferencer.mli @@ -0,0 +1,49 @@ +[@@@ocaml.text "/*"] + +(** Copyright 2025-2026, Friend-zva, RodionovMaxim05 *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +[@@@ocaml.text "/*"] + +type error = + [ `No_variable_rec + (** Represents an error where a recursive variable is not allowed because that would lead to infinite recursion. + E.g. [let rec x = x + 1] *) + | `No_arg_rec + (** Represents an error where the left-hand side of the recursive binding is not a var. + E.g. [let rec [ a; b ] = ..] *) + | `Bound_several_times of string + (** Represents an error where a pattern bound the variable multiple times. + E.g. [let x, x = ..] *) + | `Occurs_check of string * Ast.core_type + (** Represents an occurs check failure. + This occurs when attempting to unify types, and one type is found to occur within another in a way that violates the rules of type systems. + E.g. [let rec f x = f] *) + | `No_variable of string + (** Represents an error indicating that a variable could not be found in the current scope. *) + | `Unification_failed of Ast.core_type * Ast.core_type + (** Represents that type unification has failed. + This occurs when two types cannot made equivalent during type inference. *) + ] + +val pp_error : Format.formatter -> error -> unit + +module VarSet : sig + type t = Set.Make(String).t +end + +type scheme = Scheme of VarSet.t * Ast.core_type + +module TypeEnv : sig + type t = (Ast.ident, scheme, Base.String.comparator_witness) Base.Map.t +end + +val empty_env : TypeEnv.t +val env_with_print_funs : TypeEnv.t + +val run_inferencer + : ?debug:bool + -> TypeEnv.t + -> Ast.structure + -> (TypeEnv.t * (Ast.ident option * Ast.core_type) list, error) result diff --git a/akaML/lib/parser/dune b/akaML/lib/parser/dune new file mode 100644 index 00000000..0057fb96 --- /dev/null +++ b/akaML/lib/parser/dune @@ -0,0 +1,6 @@ +(library + (name Parser) + (public_name akaML.Parser) + (libraries base angstrom Ast) + (instrumentation + (backend bisect_ppx))) diff --git a/akaML/lib/parser/parser.ml b/akaML/lib/parser/parser.ml new file mode 100644 index 00000000..ace094fe --- /dev/null +++ b/akaML/lib/parser/parser.ml @@ -0,0 +1,611 @@ +[@@@ocaml.text "/*"] + +(** Copyright 2025-2026, Friend-zva, RodionovMaxim05 *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +[@@@ocaml.text "/*"] + +open Base +open Ast +open Ast.Expression +open Angstrom + +(* ==================== Utils ==================== *) + +let skip_whitespaces = skip_while Char.is_whitespace + +let parse_comments = + skip_whitespaces *> string "(*" *> many_till any_char (string "*)") *> return () +;; + +let ws = many parse_comments *> skip_whitespaces +let token str = ws *> string str + +let skip_parens parse = + token "(" *> parse <* (token ")" <|> fail "There is no closing bracket.") +;; + +let is_separator = function + | ')' + | '(' + | '<' + | '>' + | '@' + | ',' + | ';' + | ':' + | '\\' + | '"' + | '/' + | '[' + | ']' + | '?' + | '=' + | '{' + | '}' + | ' ' + | '\r' + | '\t' + | '\n' + | '*' + | '-' -> true + | _ -> false +;; + +let bin_op_list = [ "*"; "/"; "+"; "-"; "^"; ">="; "<="; "<>"; "="; ">"; "<"; "&&"; "||" ] +let is_operator opr = List.exists bin_op_list ~f:(fun str -> String.equal str opr) + +let keyword str = + token str + *> + let* is_space = + peek_char + >>| function + | Some c -> is_separator c + | None -> true + in + if is_space + then return str <* ws + else fail (Printf.sprintf "There is no separator after %S." str) +;; + +let safe_tl = function + | [] -> [] + | _ :: tail -> tail +;; + +let parse_chain_left_associative parse parse_fun = + let rec go acc = + (let* f = parse_fun in + let* elem = parse in + go (f acc elem)) + <|> return acc + in + let* elem = parse in + go elem +;; + +let parse_chain_right_associative parse parse_fun = + let rec go acc = + (let* f = parse_fun in + let* elem = parse in + let* next_elem = go elem in + return (f acc next_elem)) + <|> return acc + in + let* elem = parse in + go elem +;; + +(* ==================== Ident ==================== *) + +let parse_val_ident = + ws + *> + let* fst_char = + satisfy (function + | 'a' .. 'z' | '_' -> true + | _ -> false) + >>| String.of_char + in + let* rest_str = + take_while (function + | 'A' .. 'Z' | 'a' .. 'z' | '0' .. '9' | '_' | '\'' -> true + | _ -> false) + in + let id = fst_char ^ rest_str in + if is_keyword id then fail (Printf.sprintf "Impossible name: %S." id) else return id +;; + +let parse_operator op_list = + choice (List.map ~f:(fun opr -> token opr *> return opr) op_list) +;; + +let parse_op_ident = ws *> char '(' *> ws *> parse_operator bin_op_list <* ws <* char ')' +let parse_ident = parse_val_ident <|> parse_op_ident + +(* ==================== Rec_flag ==================== *) + +let parse_rec_flag = ws *> option Nonrecursive (keyword "rec" *> return Recursive) + +(* ==================== Constant ==================== *) + +let parse_const_int = + take_while1 Char.is_digit >>| fun int_value -> Const_integer (Int.of_string int_value) +;; + +let parse_const_char = + string "\'" *> any_char <* string "\'" >>| fun char_value -> Const_char char_value +;; + +let parse_const_string = + string "\"" *> take_till (Char.equal '\"') + <* string "\"" + >>| fun str_value -> Const_string str_value +;; + +let parse_constant = + ws *> choice [ parse_const_int; parse_const_char; parse_const_string ] +;; + +(* =================== Core_type =================== *) + +let parse_type_var = + token "'" + *> + let* fst_char = + satisfy (function + | 'a' .. 'z' -> true + | _ -> false) + >>| String.of_char + in + let* is_valid_snd_char = + peek_char + >>| function + | Some snd_char -> + (match snd_char with + | 'A' .. 'Z' | 'a' .. 'z' | '0' .. '9' | '_' -> true + | char' when is_separator char' -> true + | _ -> false) + | _ -> true + in + let* rest_str = + take_while (function + | 'A' .. 'Z' | 'a' .. 'z' | '0' .. '9' | '_' | '\'' -> true + | _ -> false) + in + let type_var = fst_char ^ rest_str in + if is_valid_snd_char && not (is_keyword type_var) + then return (Type_var ("'" ^ type_var)) + else fail (Printf.sprintf "Impossible type name: %S." type_var) +;; + +let parse_base_type = + choice + [ keyword "unit" *> return Type_unit + ; keyword "int" *> return Type_int + ; keyword "char" *> return Type_char + ; keyword "string" *> return Type_string + ; keyword "bool" *> return Type_bool + ; parse_type_var + ] +;; + +let parse_list_or_option_type parse_type = + let f acc_ty = function + | "list" -> Type_list acc_ty + | _ -> Type_option acc_ty + in + let chain_left_associative = + let rec go acc_ty = + (let* ty = keyword "list" <|> keyword "option" in + go (f acc_ty ty)) + <|> return acc_ty + in + let* fst_ty = parse_type in + go fst_ty + in + chain_left_associative +;; + +let parse_tuple_type parse_type = + let* fst_type = parse_type in + let* snd_type = token "*" *> parse_type in + let* type_list = many (token "*" *> parse_type) in + return (Type_tuple (fst_type, snd_type, type_list)) +;; + +let rec parse_arrow_type parse_type = + let* type1 = parse_type in + let* type2 = token "->" *> (parse_arrow_type parse_type <|> parse_type) in + return (Type_arrow (type1, type2)) +;; + +let parse_core_type = + ws + *> fix (fun parse_full_type -> + let parse_type = parse_base_type <|> skip_parens parse_full_type in + let parse_type = parse_list_or_option_type parse_type <|> parse_type in + let parse_type = parse_tuple_type parse_type <|> parse_type in + parse_arrow_type parse_type <|> parse_type) +;; + +(* ==================== Pattern & Expression ==================== *) + +let parse_construct_base_keyword = + choice [ keyword "true"; keyword "false"; keyword "None"; keyword "()" ] +;; + +let parse_construct_keyword_some parse = + let* tag = keyword "Some" in + let* opt = parse >>| Option.some in + return (tag, opt) +;; + +let parse_constraint parse = + let* elem = token "(" *> parse in + let* type' = token ":" *> parse_core_type <* token ")" in + return (elem, type') +;; + +let parse_tuple parse tuple = + let* fst = parse in + let* snd = token "," *> parse in + let* tail = many (token "," *> parse) in + return (tuple (fst, snd, tail)) +;; + +let parse_construct_list_1 parse construct func = + token "[" *> sep_by (token ";") parse + <* token "]" + >>| List.fold_right ~init:(construct ("[]", None)) ~f:func +;; + +let parse_construct_list_2 parse construct tuple = + parse_chain_right_associative + parse + (token "::" *> return (fun acc elem -> construct ("::", Some (tuple (acc, elem, []))))) +;; + +(* -------------------- Pattern -------------------- *) + +let parse_pat_any = keyword "_" *> return Pat_any +let parse_pat_var = parse_ident >>| fun var -> Pat_var var +let parse_pat_constant = parse_constant >>| fun const -> Pat_constant const + +let parse_pat_construct_base_keyword = + parse_construct_base_keyword >>| fun tag -> Pat_construct (tag, None) +;; + +let parse_base_pat = + choice + [ parse_pat_any; parse_pat_var; parse_pat_constant; parse_pat_construct_base_keyword ] +;; + +let parse_pat_construct_keyword_some parse_pat = + parse_construct_keyword_some (parse_base_pat <|> skip_parens parse_pat) + >>| fun (tag, pat_opt) -> Pat_construct (tag, pat_opt) +;; + +let parse_pat_constraint parse_pat = + parse_constraint parse_pat >>| fun (pat, type') -> Pat_constraint (pat, type') +;; + +let parse_pat_tuple parse_pat = + parse_tuple parse_pat (fun (fst_pat, snd_pat, pat_list) -> + Pat_tuple (fst_pat, snd_pat, pat_list)) +;; + +let parse_pat_construct_list_1 parse_pat = + parse_construct_list_1 + parse_pat + (fun (tag, pat_opt) -> Pat_construct (tag, pat_opt)) + (fun pat acc_pat -> Pat_construct ("::", Some (Pat_tuple (pat, acc_pat, [])))) +;; + +let parse_pat_construct_list_2 parse_pat = + parse_construct_list_2 + parse_pat + (fun (tag, pat_opt) -> Pat_construct (tag, pat_opt)) + (fun (fst_pat, snd_pat, pat_list) -> Pat_tuple (fst_pat, snd_pat, pat_list)) +;; + +let parse_pattern = + ws + *> fix (fun parse_full_pat -> + let parse_pat = + choice + [ parse_base_pat + ; parse_pat_construct_keyword_some parse_full_pat + ; parse_pat_constraint parse_full_pat + ; skip_parens parse_full_pat + ] + in + let parse_pat = parse_pat_construct_list_1 parse_pat <|> parse_pat in + let parse_pat = parse_pat_construct_list_2 parse_pat <|> parse_pat in + parse_pat_tuple parse_pat <|> parse_pat) +;; + +(* -------------------- Operator -------------------- *) + +let bin_op chain1 parse_exp parse_fun_op = + chain1 + parse_exp + (parse_fun_op + >>| fun opr exp1 exp2 -> Exp_apply (Exp_apply (Exp_ident opr, exp1), exp2)) +;; + +let parse_left_bin_op = bin_op parse_chain_left_associative +let parse_right_bin_op = bin_op parse_chain_right_associative +let mul_div = parse_operator [ "*"; "/" ] +let add_sub = parse_operator [ "+"; "-" ] +let cmp = parse_operator [ ">="; "<="; "<>"; "="; ">"; "<" ] +let and_ = parse_operator [ "&&" ] +let or_ = parse_operator [ "||" ] + +(* -------------------- Value_binding -------------------- *) + +let parse_constraint_vb parse_exp opr = + let* type' = token ":" *> parse_core_type in + let* exp = token opr *> parse_exp in + return (Exp_constraint (exp, type')) +;; + +let parse_fun_binding parse_exp = + let* pat_var = parse_pat_var in + let* pat_list = many1 parse_pattern in + choice + [ (let* exp = parse_constraint_vb parse_exp "=" in + match exp with + | Exp_constraint (exp, type') -> + return + { pat = Pat_constraint (pat_var, type') + ; exp = Exp_fun (List.hd_exn pat_list, safe_tl pat_list, exp) + } + | _ -> + return + { pat = pat_var; exp = Exp_fun (List.hd_exn pat_list, safe_tl pat_list, exp) }) + ; (let* exp = token "=" *> parse_exp in + return + { pat = pat_var; exp = Exp_fun (List.hd_exn pat_list, safe_tl pat_list, exp) }) + ] +;; + +let parse_simple_binding parse_exp = + let* pat = parse_pattern in + choice + [ (let* exp = parse_constraint_vb parse_exp "=" in + match exp with + | Exp_constraint (exp, type') -> return { pat = Pat_constraint (pat, type'); exp } + | _ -> return { pat; exp }) + ; (let* exp = token "=" *> parse_exp in + return { pat; exp }) + ] +;; + +let parse_value_binding_list parse_exp = + sep_by1 + (keyword "and") + (ws *> (parse_fun_binding parse_exp <|> parse_simple_binding parse_exp)) +;; + +(* -------------------- Case -------------------- *) + +let parse_case parse_exp = + ws + *> option () (token "|" *> return ()) + *> + let* pat = parse_pattern in + let* exp = token "->" *> parse_exp in + return { left = pat; right = exp } +;; + +(* -------------------- Expression -------------------- *) + +let parse_exp_ident = parse_ident >>| fun id -> Exp_ident id +let parse_exp_constant = parse_constant >>| fun const -> Exp_constant const + +let parse_exp_construct_base_keyword = + parse_construct_base_keyword >>| fun tag -> Exp_construct (tag, None) +;; + +let parse_base_exp = + choice [ parse_exp_ident; parse_exp_constant; parse_exp_construct_base_keyword ] +;; + +let parse_exp_sequence parse_exp = + parse_chain_left_associative + parse_exp + (token ";" *> return (fun exp1 exp2 -> Exp_sequence (exp1, exp2))) +;; + +let parse_exp_construct_keyword_some parse_exp = + parse_construct_keyword_some (parse_base_exp <|> skip_parens parse_exp) + >>| fun (tag, exp_opt) -> Exp_construct (tag, exp_opt) +;; + +let parse_exp_constraint parse_exp = + parse_constraint parse_exp >>| fun (exp, type') -> Exp_constraint (exp, type') +;; + +let parse_exp_tuple parse_exp = + parse_tuple parse_exp (fun (fst_exp, snd_exp, exp_list) -> + Exp_tuple (fst_exp, snd_exp, exp_list)) +;; + +let parse_exp_construct_list_1 parse_exp = + let parse_exp_sequence = + skip_parens (parse_exp_sequence parse_exp) >>| fun exp -> true, exp + in + let parse_exp_list = parse_exp >>| fun exp -> false, exp in + parse_construct_list_1 + (parse_exp_sequence <|> parse_exp_list) + (fun (tag, exp_opt) -> Exp_construct (tag, exp_opt)) + (fun opt_exp acc_exp -> + let rec fix_exp_sequence opt_exp acc_exp = + match opt_exp with + | false, Exp_sequence (exp1, (Exp_sequence _ as exp2)) -> + fix_exp_sequence (false, exp1) (fix_exp_sequence (true, exp2) acc_exp) + | false, Exp_sequence (exp1, exp2) -> + fix_exp_sequence (false, exp1) (fix_exp_sequence (false, exp2) acc_exp) + | _, exp -> Exp_construct ("::", Some (Exp_tuple (exp, acc_exp, []))) + in + fix_exp_sequence opt_exp acc_exp) +;; + +let parse_exp_construct_list_2 parse_exp = + parse_construct_list_2 + parse_exp + (fun (tag, exp_opt) -> Exp_construct (tag, exp_opt)) + (fun (fst_exp, snd_exp, exp_list) -> Exp_tuple (fst_exp, snd_exp, exp_list)) +;; + +let parse_exp_let parse_exp = + keyword "let" + *> + let* rec_flag = parse_rec_flag in + let* value_binding_list = parse_value_binding_list parse_exp <* keyword "in" in + let* exp = parse_exp in + return + (Exp_let (rec_flag, List.hd_exn value_binding_list, safe_tl value_binding_list, exp)) +;; + +let parse_exp_fun parse_exp = + keyword "fun" + *> + let* pat = parse_pattern in + let* pat_list = many parse_pattern in + choice + [ (let* exp = parse_constraint_vb parse_exp "->" in + match exp with + | Exp_constraint (exp, type') -> + return (Exp_fun (Pat_constraint (pat, type'), pat_list, exp)) + | _ -> return (Exp_fun (pat, pat_list, exp))) + ; (let* exp = token "->" *> parse_exp in + return (Exp_fun (pat, pat_list, exp))) + ] +;; + +let parse_exp_function parse_exp = + keyword "function" + *> + let* case_list = sep_by1 (token "|") (parse_case parse_exp) in + return (Exp_function (List.hd_exn case_list, safe_tl case_list)) +;; + +let parse_exp_match parse_exp = + let* exp = keyword "match" *> parse_exp <* keyword "with" in + let* case_list = sep_by1 (token "|") (parse_case parse_exp) in + return (Exp_match (exp, List.hd_exn case_list, safe_tl case_list)) +;; + +let parse_exp_ifthenelse parse_exp = + let* if_exp = keyword "if" *> parse_exp in + let* then_exp = keyword "then" *> parse_exp in + let* else_exp = + option None (keyword "else" >>| Option.some) + >>= function + | None -> return None + | Some _ -> parse_exp >>| Option.some + in + return (Exp_ifthenelse (if_exp, then_exp, else_exp)) +;; + +let parse_top_exp parse_exp = + choice + [ parse_exp_let parse_exp + ; parse_exp_fun parse_exp + ; parse_exp_function parse_exp + ; parse_exp_match parse_exp + ; parse_exp_ifthenelse parse_exp + ] +;; + +let parse_exp_apply_fun parse_exp = + parse_chain_left_associative + parse_exp + (return (fun exp1 exp2 -> Exp_apply (exp1, exp2))) +;; + +let parse_exp_apply_un_op parse_exp = + let is_not_space = function + | '(' | '[' | '_' | '\'' | '\"' -> true + | c -> Char.is_alphanum c + in + let string_un_op str = + token str + *> + let* char_value = peek_char_fail in + if is_not_space char_value + then return str + else fail (Printf.sprintf "There is no space after unary minus.") + in + string_un_op "-" *> parse_exp + >>| (fun exp -> Exp_apply (Exp_ident "~-", exp)) + <|> parse_exp +;; + +let parse_exp_apply_bin_op parse_exp = + let parse_exp = parse_left_bin_op parse_exp mul_div in + let parse_exp = parse_left_bin_op parse_exp add_sub in + let parse_exp = parse_left_bin_op parse_exp cmp in + let parse_exp = parse_right_bin_op parse_exp and_ in + parse_right_bin_op parse_exp or_ +;; + +let parse_exp_apply parse_exp = + let parse_exp = parse_exp_apply_fun parse_exp in + let parse_exp = parse_exp_apply_un_op parse_exp in + parse_exp_apply_bin_op parse_exp +;; + +let parse_expression = + ws + *> fix (fun parse_full_exp -> + let parse_exp = + choice + [ parse_base_exp + ; parse_exp_construct_keyword_some parse_full_exp + ; parse_exp_constraint parse_full_exp + ; parse_exp_construct_list_1 parse_full_exp + ; parse_top_exp parse_full_exp + ; skip_parens parse_full_exp + ] + in + let parse_exp = parse_exp_apply parse_exp <|> parse_exp in + let parse_exp = parse_exp_construct_list_2 parse_exp <|> parse_exp in + let parse_exp = parse_exp_tuple parse_exp <|> parse_exp in + let parse_exp = parse_exp_sequence parse_exp <|> parse_exp in + parse_top_exp parse_full_exp <|> parse_exp) +;; + +(* ==================== Structure ==================== *) + +let parse_struct_value = + keyword "let" + *> + let* rec_flag = parse_rec_flag in + let* value_binding_list = parse_value_binding_list parse_expression in + option + (Struct_value (rec_flag, List.hd_exn value_binding_list, safe_tl value_binding_list)) + (keyword "in" *> parse_expression + >>| fun exp -> + Struct_eval + (Exp_let (rec_flag, List.hd_exn value_binding_list, safe_tl value_binding_list, exp)) + ) +;; + +let parse_structure = + ws + *> + let parse_structure_item = + parse_struct_value <|> (parse_expression >>| fun exp -> Struct_eval exp) + in + let semicolons = many (token ";;") in + sep_by semicolons parse_structure_item <* semicolons <* ws +;; + +(* ==================== Execute ==================== *) + +let parse = parse_string ~consume:All parse_structure diff --git a/akaML/lib/parser/parser.mli b/akaML/lib/parser/parser.mli new file mode 100644 index 00000000..1337c45d --- /dev/null +++ b/akaML/lib/parser/parser.mli @@ -0,0 +1,10 @@ +[@@@ocaml.text "/*"] + +(** Copyright 2025-2026, Friend-zva, RodionovMaxim05 *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +[@@@ocaml.text "/*"] + +val is_operator : string -> bool +val parse : string -> (Ast.structure, string) result diff --git a/akaML/lib/pprinter/dune b/akaML/lib/pprinter/dune new file mode 100644 index 00000000..9b1ec060 --- /dev/null +++ b/akaML/lib/pprinter/dune @@ -0,0 +1,6 @@ +(library + (name Pprinter) + (public_name akaML.Pprinter) + (libraries base Ast Parser) + (instrumentation + (backend bisect_ppx))) diff --git a/akaML/lib/pprinter/pprinter.ml b/akaML/lib/pprinter/pprinter.ml new file mode 100644 index 00000000..d9e9004a --- /dev/null +++ b/akaML/lib/pprinter/pprinter.ml @@ -0,0 +1,279 @@ +[@@@ocaml.text "/*"] + +(** Copyright 2025-2026, Friend-zva, RodionovMaxim05 *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +[@@@ocaml.text "/*"] + +open Ast +open Ast.Expression +open Parser +open Format + +let pp_rec_flag ppf = function + | Recursive -> fprintf ppf "let rec " + | Nonrecursive -> fprintf ppf "let " +;; + +let pp_comma ppf () = fprintf ppf "@,, " + +let pp_and indent ppf () = + pp_force_newline ppf (); + pp_open_hovbox ppf indent; + fprintf ppf "and " +;; + +let pp_ident ppf = function + | op when is_operator op -> fprintf ppf "( %s )" op + | op -> fprintf ppf "%s" op +;; + +let pp_constant ppf = function + | Const_integer n -> fprintf ppf "%d" n + | Const_char c -> fprintf ppf "'%c'" c + | Const_string s -> fprintf ppf "%S" s +;; + +let rec pp_core_type_deep n ppf = function + | Type_unit -> fprintf ppf "unit" + | Type_int -> fprintf ppf "int" + | Type_char -> fprintf ppf "char" + | Type_string -> fprintf ppf "string" + | Type_bool -> fprintf ppf "bool" + | Type_option type' -> fprintf ppf "%a option" (pp_core_type_deep 2) type' + | Type_var type' -> pp_ident ppf type' + | Type_list type' -> fprintf ppf "%a list" (pp_core_type_deep 2) type' + | Type_tuple (fst_type, snd_type, type_list) -> + if n = 2 then fprintf ppf "("; + fprintf ppf "%a" (pp_core_type_deep 2) fst_type; + List.iter + (fun type' -> + fprintf ppf " * "; + fprintf ppf "%a" (pp_core_type_deep 2) type') + (snd_type :: type_list); + if n = 2 then fprintf ppf ")" + | Type_arrow (fst_type, snd_type) -> + if n <> 0 then fprintf ppf "("; + fprintf ppf "%a -> %a" (pp_core_type_deep 1) fst_type (pp_core_type_deep 0) snd_type; + if n <> 0 then fprintf ppf ")" +;; + +let pp_core_type = pp_core_type_deep 0 + +let rec pp_pattern_deep need_parens ppf = function + | Pat_any -> fprintf ppf "_" + | Pat_var var -> pp_ident ppf var + | Pat_constant const -> pp_constant ppf const + | Pat_tuple (fst_pat, snd_pat, pat_list) -> + pp_open_hvbox ppf 0; + if need_parens then fprintf ppf "( "; + fprintf + ppf + "%a@]" + (pp_print_list ~pp_sep:pp_comma (pp_pattern_deep true)) + (fst_pat :: snd_pat :: pat_list); + if need_parens then fprintf ppf " )" + | Pat_construct ("::", Some (Pat_tuple (head, tail, []))) -> + fprintf ppf "@[[ %a" (pp_pattern_deep true) head; + let rec pp_tail = function + | Pat_construct (_, None) -> fprintf ppf "@ ]@]" + | Pat_construct (_, Some (Pat_tuple (next_head, next_tail, []))) -> + fprintf ppf "@,; %a" (pp_pattern_deep true) next_head; + pp_tail next_tail + | Pat_construct (_, Some _) -> () + | pat -> fprintf ppf ";@ %a@ ]@]" (pp_pattern_deep true) pat + in + pp_tail tail + | Pat_construct (tag, None) -> fprintf ppf "%s" tag + | Pat_construct ("Some", Some pat) -> fprintf ppf "Some (%a)" (pp_pattern_deep true) pat + | Pat_construct _ -> () + | Pat_constraint (pat, core_type) -> + if need_parens then fprintf ppf "("; + fprintf ppf "@[%a@ :@ %a@]" (pp_pattern_deep true) pat pp_core_type core_type; + if need_parens then fprintf ppf ")" +;; + +let pp_pattern = pp_pattern_deep false + +let rec pp_expression_deep need_cut need_parens ppf = function + | Exp_ident id -> pp_ident ppf id + | Exp_constant const -> pp_constant ppf const + | Exp_let (rec_flag, fst_value_binding, value_binding_list, exp) -> + if need_parens then fprintf ppf "("; + pp_open_hvbox ppf 0; + (pp_value_binding_list 0) ppf (rec_flag, fst_value_binding :: value_binding_list); + fprintf ppf " in@ %a" (pp_expression_deep true true) exp; + if need_parens then fprintf ppf ")"; + pp_close_box ppf () + | Exp_fun (fst_pat, pat_list, exp) -> + if need_parens then fprintf ppf "("; + pp_open_box ppf 2; + fprintf + ppf + "fun@ %a@ " + (pp_print_list ~pp_sep:pp_print_space (pp_pattern_deep true)) + (fst_pat :: pat_list); + fprintf ppf "->@ %a" (pp_expression_deep false true) exp; + if need_parens then fprintf ppf ")"; + pp_close_box ppf () + | Exp_apply (exp1, exp2) -> + pp_open_box ppf 2; + (pp_exp_apply ~need_parens) ppf (exp1, exp2); + pp_close_box ppf () + | Exp_function (fst_case, case_list) -> + if need_cut then pp_force_newline ppf (); + if need_parens then fprintf ppf "("; + pp_open_vbox ppf 0; + fprintf ppf "function@ "; + fprintf ppf "%a" (pp_print_list pp_case) (fst_case :: case_list); + if need_parens then fprintf ppf ")"; + pp_close_box ppf () + | Exp_match (exp, fst_case, case_list) -> + if need_cut then pp_force_newline ppf (); + if need_parens then fprintf ppf "("; + pp_open_vbox ppf 0; + pp_open_hvbox ppf 0; + if need_parens then pp_open_vbox ppf 1 else pp_open_vbox ppf 2; + fprintf ppf "match %a@]@ with@]@ " (pp_expression_deep true false) exp; + fprintf ppf "%a" (pp_print_list pp_case) (fst_case :: case_list); + if need_parens then fprintf ppf ")"; + pp_close_box ppf () + | Exp_tuple (fst_exp, snd_exp, exp_list) -> + pp_open_hvbox ppf 0; + if need_parens then fprintf ppf "( "; + fprintf + ppf + "%a@]" + (pp_print_list ~pp_sep:pp_comma (pp_expression_deep false true)) + (fst_exp :: snd_exp :: exp_list); + if need_parens then fprintf ppf " )" + | Exp_construct ("::", Some (Exp_tuple (head, tail, []))) -> + fprintf ppf "@[[ %a" (pp_expression_deep false true) head; + let rec pp_tail = function + | Exp_construct (_, None) -> fprintf ppf "@ ]@]" + | Exp_construct (_, Some (Exp_tuple (next_head, next_tail, []))) -> + fprintf ppf "@,; %a" (pp_expression_deep false true) next_head; + pp_tail next_tail + | Exp_construct (_, Some _) -> () + | exp -> fprintf ppf ";@ %a@ ]@]" (pp_expression_deep false true) exp + in + pp_tail tail + | Exp_construct (tag, None) -> fprintf ppf "%s" tag + | Exp_construct ("Some", Some exp) -> + fprintf ppf "Some (%a)" (pp_expression_deep false true) exp + | Exp_construct _ -> () + | Exp_ifthenelse (exp1, exp2, None) -> + if need_parens then fprintf ppf "("; + pp_open_box ppf 0; + fprintf ppf "if %a@ " (pp_expression_deep false false) exp1; + fprintf ppf "@[then %a@]" (pp_expression_deep true true) exp2; + if need_parens then fprintf ppf ")"; + pp_close_box ppf () + | Exp_ifthenelse (exp1, exp2, Some exp3) -> + if need_parens then fprintf ppf "("; + pp_open_box ppf 0; + fprintf ppf "if %a@ " (pp_expression_deep false false) exp1; + fprintf ppf "@[then %a@]@ " (pp_expression_deep true true) exp2; + fprintf ppf "@[else %a@]" (pp_expression_deep true true) exp3; + if need_parens then fprintf ppf ")"; + pp_close_box ppf () + | Exp_sequence (exp1, exp2) -> + if need_parens then fprintf ppf "("; + pp_open_box ppf 0; + fprintf ppf "%a; " (pp_expression_deep need_cut true) exp1; + fprintf ppf "%a" (pp_expression_deep need_cut true) exp2; + if need_parens then fprintf ppf ")"; + pp_close_box ppf () + | Exp_constraint (exp, core_type) -> + fprintf + ppf + "@[(%a@ :@ %a)@]" + (pp_expression_deep false false) + exp + pp_core_type + core_type + +and pp_exp_apply ?(need_parens = false) ppf (exp1, exp2) = + match exp1 with + | Exp_ident exp_opr when is_unary_minus exp_opr -> + (match exp2 with + | Exp_ident _ | Exp_constant _ -> + fprintf ppf "-%a" (pp_expression_deep false need_parens) exp2 + | Exp_apply _ -> fprintf ppf "-(%a)" (pp_expression_deep false need_parens) exp2 + | _ -> fprintf ppf "-%a" (pp_expression_deep false true) exp2) + | _ -> + fprintf ppf "%a " (pp_expression_deep false true) exp1; + (match exp2 with + | Exp_apply _ -> fprintf ppf "(%a)" (pp_expression_deep false true) exp2 + | _ -> fprintf ppf "%a" (pp_expression_deep false true) exp2) + +and pp_value_binding ppf = + pp_open_hvbox ppf 0; + function + | { pat = pat_var; exp = Exp_fun (pat, pat_list, exp) } -> + let pp_pattern_arg () = + fprintf + ppf + "%a" + (pp_print_list ~pp_sep:pp_print_space (pp_pattern_deep true)) + (pat :: pat_list) + in + (match pat_var with + | Pat_constraint (pat, type') -> + fprintf ppf "%a@ " pp_pattern pat; + pp_pattern_arg (); + fprintf ppf "@ : %a" pp_core_type type' + | _ -> + fprintf ppf "%a@ " pp_pattern pat_var; + pp_pattern_arg ()); + fprintf ppf "@ =@]@ "; + fprintf ppf "@[%a@]@]" (pp_expression_deep false false) exp + | { pat; exp = Exp_let _ as exp } -> + fprintf ppf "%a =@]@\n" pp_pattern pat; + fprintf ppf "@[%a@]@]" (pp_expression_deep false false) exp + | { pat; exp } -> + fprintf ppf "%a =@]@ " pp_pattern pat; + fprintf ppf "@[%a@]@]" (pp_expression_deep false false) exp + +and pp_case ppf = function + | { left; right } -> + fprintf + ppf + "@[| %a ->@ %a@]" + (pp_pattern_deep true) + left + (pp_expression_deep true true) + right + +and pp_value_binding_list indent ppf = function + | rec_flag, value_binding_list -> + pp_open_hovbox ppf indent; + pp_rec_flag ppf rec_flag; + fprintf + ppf + "%a" + (pp_print_list ~pp_sep:(pp_and indent) pp_value_binding) + value_binding_list +;; + +let pp_expression = pp_expression_deep false false + +let pp_structure_item ppf = function + | Struct_eval exp -> + fprintf ppf "@[%a@];;" pp_expression exp; + pp_print_flush ppf () + | Struct_value (rec_flag, fst_value_binding, value_binding_list) -> + (pp_value_binding_list 2) ppf (rec_flag, fst_value_binding :: value_binding_list); + pp_print_if_newline ppf (); + pp_print_cut ppf (); + fprintf ppf ";;"; + pp_print_flush ppf () +;; + +let pp_structure ppf ast = + if Base.List.is_empty ast + then fprintf ppf ";;" + else fprintf ppf "@[%a@]" (pp_print_list ~pp_sep:pp_force_newline pp_structure_item) ast; + pp_print_flush ppf () +;; diff --git a/akaML/lib/pprinter/pprinter.mli b/akaML/lib/pprinter/pprinter.mli new file mode 100644 index 00000000..cdeaa59e --- /dev/null +++ b/akaML/lib/pprinter/pprinter.mli @@ -0,0 +1,18 @@ +[@@@ocaml.text "/*"] + +(** Copyright 2025-2026, Friend-zva, RodionovMaxim05 *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +[@@@ocaml.text "/*"] + +val pp_rec_flag : Format.formatter -> Ast.rec_flag -> unit +val pp_ident : Format.formatter -> string -> unit +val pp_constant : Format.formatter -> Ast.constant -> unit +val pp_core_type : Format.formatter -> Ast.core_type -> unit +val pp_pattern : Format.formatter -> Ast.pattern -> unit +val pp_expression : Format.formatter -> Ast.Expression.t -> unit +val pp_value_binding : Format.formatter -> Ast.Expression.value_binding_exp -> unit +val pp_case : Format.formatter -> Ast.Expression.case_exp -> unit +val pp_structure_item : Format.formatter -> Ast.structure_item -> unit +val pp_structure : Format.formatter -> Ast.structure -> unit diff --git a/akaML/lib/qchecker/dune b/akaML/lib/qchecker/dune new file mode 100644 index 00000000..cfb139bd --- /dev/null +++ b/akaML/lib/qchecker/dune @@ -0,0 +1,6 @@ +(library + (name Qchecker) + (public_name akaML.Qchecker) + (libraries Pprinter) + (instrumentation + (backend bisect_ppx))) diff --git a/akaML/lib/qchecker/qchecker.ml b/akaML/lib/qchecker/qchecker.ml new file mode 100644 index 00000000..d8c710ee --- /dev/null +++ b/akaML/lib/qchecker/qchecker.ml @@ -0,0 +1,51 @@ +[@@@ocaml.text "/*"] + +(** Copyright 2025-2026, Friend-zva, RodionovMaxim05 *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +[@@@ocaml.text "/*"] + +let failure ast = + Format.asprintf + "*** PPrinter ***@.%a@.*** Ast ***@.%s@.*** Parser ***@.%s@." + Pprinter.pp_structure + ast + (Ast.show_structure ast) + (match Parser.parse (Format.asprintf "%a" Pprinter.pp_structure ast) with + | Ok ast_parsed -> Ast.show_structure ast_parsed + | Error error -> error) +;; + +let rule_gen ?(show_passed = false) ?(show_shrinker = false) ast = + match Parser.parse (Format.asprintf "%a" Pprinter.pp_structure ast) with + | Ok ast_parsed -> + if ast = ast_parsed + then ( + if show_passed + then Format.printf "@.*** PPrinter ***@.%a@." Pprinter.pp_structure ast; + true) + else ( + if show_shrinker + then ( + Format.printf "@.*** Shrinker ***@.%a@." Pprinter.pp_structure ast; + Format.printf "@.*** AST ***@.%s@." (Ast.show_structure ast)); + false) + | Error _ -> + if show_shrinker + then Format.printf "@.*** Shrinker ***@.%a@." Pprinter.pp_structure ast; + false +;; + +let run_gen ?(show_passed = false) ?(show_shrinker = false) ?(count = 10) = + let gen type_gen = + QCheck.make type_gen ~print:failure ~shrink:Shrinker.shrink_structure + in + QCheck_base_runner.run_tests_main + [ QCheck.Test.make + ~count + ~name:"the auto generator" + (gen Ast.gen_structure) + (fun ast -> rule_gen ~show_passed ~show_shrinker ast) + ] +;; diff --git a/akaML/lib/qchecker/qchecker.mli b/akaML/lib/qchecker/qchecker.mli new file mode 100644 index 00000000..2a404ccf --- /dev/null +++ b/akaML/lib/qchecker/qchecker.mli @@ -0,0 +1,9 @@ +[@@@ocaml.text "/*"] + +(** Copyright 2025-2026, Friend-zva, RodionovMaxim05 *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +[@@@ocaml.text "/*"] + +val run_gen : ?show_passed:bool -> ?show_shrinker:bool -> ?count:int -> int diff --git a/akaML/lib/qchecker/shrinker.ml b/akaML/lib/qchecker/shrinker.ml new file mode 100644 index 00000000..2a1a4011 --- /dev/null +++ b/akaML/lib/qchecker/shrinker.ml @@ -0,0 +1,174 @@ +[@@@ocaml.text "/*"] + +(** Copyright 2025-2026, Friend-zva, RodionovMaxim05 *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +[@@@ocaml.text "/*"] + +open Ast +open Ast.Expression +open QCheck.Iter +open QCheck.Shrink + +let shrink_type_ident = function + | "'a" -> empty + | _ -> return "'a" +;; + +let shrink_ident = function + | "a" -> empty + | _ -> return "a" +;; + +let rec shrink_core_type = function + | Type_unit | Type_bool | Type_char | Type_int | Type_string -> empty + | Type_var id -> shrink_type_ident id >|= fun id' -> Type_var id' + | Type_list type' -> + return type' <+> (shrink_core_type type' >|= fun type'' -> Type_list type'') + | Type_option type' -> + return type' <+> (shrink_core_type type' >|= fun type'' -> Type_option type'') + | Type_tuple (fst_type, snd_type, type_list) -> + of_list [ fst_type; snd_type ] + <+> of_list type_list + <+> (shrink_core_type fst_type + >|= fun fst_type' -> Type_tuple (fst_type', snd_type, type_list)) + <+> (shrink_core_type snd_type + >|= fun snd_type' -> Type_tuple (fst_type, snd_type', type_list)) + <+> (list ~shrink:shrink_core_type type_list + >|= fun type_list' -> Type_tuple (fst_type, snd_type, type_list')) + | Type_arrow (fst_type, snd_type) -> + of_list [ fst_type; snd_type ] + <+> (shrink_core_type fst_type >|= fun fst_type' -> Type_arrow (fst_type', snd_type)) + <+> (shrink_core_type snd_type >|= fun snd_type' -> Type_arrow (fst_type, snd_type')) +;; + +let rec shrink_pattern = function + | Pat_any -> empty + | Pat_var var -> shrink_ident var >|= fun var' -> Pat_var var' + | Pat_constant const -> + (match const with + | Const_integer i -> int i >|= fun i' -> Pat_constant (Const_integer i') + | Const_char ch -> char ch >|= fun ch' -> Pat_constant (Const_char ch') + | Const_string str -> + shrink_ident str >|= fun str' -> Pat_constant (Const_string str')) + | Pat_tuple (fst_pat, snd_pat, pat_list) -> + of_list [ fst_pat; snd_pat ] + <+> of_list pat_list + <+> (shrink_pattern fst_pat >|= fun fst_pat' -> Pat_tuple (fst_pat', snd_pat, pat_list) + ) + <+> (shrink_pattern snd_pat >|= fun snd_pat' -> Pat_tuple (fst_pat, snd_pat', pat_list) + ) + <+> (list ~shrink:shrink_pattern pat_list + >|= fun pat_list' -> Pat_tuple (fst_pat, snd_pat, pat_list')) + | Pat_construct (_, None) -> empty + | Pat_construct (tag, Some pat) -> + return pat <+> (shrink_pattern pat >|= fun pat' -> Pat_construct (tag, Some pat')) + | Pat_constraint (pat, type') -> + return pat + <+> (shrink_pattern pat >|= fun pat' -> Pat_constraint (pat', type')) + <+> (shrink_core_type type' >|= fun type'' -> Pat_constraint (pat, type'')) +;; + +let rec shrink_expression = function + | Exp_ident id -> shrink_ident id >|= fun id' -> Exp_ident id' + | Exp_constant const -> + (match const with + | Const_integer i -> int i >|= fun i' -> Exp_constant (Const_integer i') + | Const_char ch -> char ch >|= fun ch' -> Exp_constant (Const_char ch') + | Const_string str -> + shrink_ident str >|= fun str' -> Exp_constant (Const_string str')) + | Exp_let (rec_flag, fst_value_binding, value_binding_list, exp) -> + return exp + <+> (shrink_expression exp + >|= fun exp' -> Exp_let (rec_flag, fst_value_binding, value_binding_list, exp')) + <+> (shrink_value_binding fst_value_binding + >|= fun fst_value_binding' -> + Exp_let (rec_flag, fst_value_binding', value_binding_list, exp)) + <+> (list ~shrink:shrink_value_binding value_binding_list + >|= fun value_binding_list' -> + Exp_let (rec_flag, fst_value_binding, value_binding_list', exp)) + <+> (shrink_expression exp + >|= fun exp' -> Exp_let (rec_flag, fst_value_binding, value_binding_list, exp')) + | Exp_fun (fst_pat, pat_list, exp) -> + return exp + <+> (shrink_pattern fst_pat >|= fun fst_pat' -> Exp_fun (fst_pat', pat_list, exp)) + <+> (list ~shrink:shrink_pattern pat_list + >|= fun pat_list' -> Exp_fun (fst_pat, pat_list', exp)) + <+> (shrink_expression exp >|= fun exp' -> Exp_fun (fst_pat, pat_list, exp')) + | Exp_apply (exp_fn, exp) -> + shrink_expression exp + >|= (fun exp_fn' -> Exp_apply (exp_fn', exp)) + <+> (shrink_expression exp >|= fun exp' -> Exp_apply (exp_fn, exp')) + | Exp_function (fst_case, case_list) -> + shrink_case fst_case + >|= (fun fst_case' -> Exp_function (fst_case', case_list)) + <+> (list ~shrink:shrink_case case_list + >|= fun case_list' -> Exp_function (fst_case, case_list')) + | Exp_match (exp, fst_case, case_list) -> + return exp + <+> (shrink_expression exp >|= fun exp' -> Exp_match (exp', fst_case, case_list)) + <+> (shrink_case fst_case >|= fun fst_case' -> Exp_match (exp, fst_case', case_list)) + <+> (list ~shrink:shrink_case case_list + >|= fun case_list' -> Exp_match (exp, fst_case, case_list')) + | Exp_tuple (fst_exp, snd_exp, exp_list) -> + of_list [ fst_exp; snd_exp ] + <+> of_list exp_list + <+> (shrink_expression fst_exp + >|= fun fst_exp' -> Exp_tuple (fst_exp', snd_exp, exp_list)) + <+> (shrink_expression snd_exp + >|= fun snd_exp' -> Exp_tuple (fst_exp, snd_exp', exp_list)) + <+> (list ~shrink:shrink_expression exp_list + >|= fun exp_list' -> Exp_tuple (fst_exp, snd_exp, exp_list')) + | Exp_construct (_, None) -> empty + | Exp_construct (tag, Some exp) -> + return exp <+> (shrink_expression exp >|= fun exp' -> Exp_construct (tag, Some exp')) + | Exp_ifthenelse (if_exp, then_exp, None) -> + of_list [ if_exp; then_exp ] + <+> (shrink_expression if_exp + >|= fun if_exp' -> Exp_ifthenelse (if_exp', then_exp, None)) + <+> (shrink_expression then_exp + >|= fun then_exp' -> Exp_ifthenelse (if_exp, then_exp', None)) + | Exp_ifthenelse (if_exp, then_exp, Some else_exp) -> + of_list [ if_exp; then_exp; else_exp ] + <+> (shrink_expression if_exp + >|= fun if_exp' -> Exp_ifthenelse (if_exp', then_exp, Some else_exp)) + <+> (shrink_expression then_exp + >|= fun then_exp' -> Exp_ifthenelse (if_exp, then_exp', Some else_exp)) + <+> (shrink_expression else_exp + >|= fun else_exp' -> Exp_ifthenelse (if_exp, then_exp, Some else_exp')) + | Exp_sequence (exp1, exp2) -> + of_list [ exp1; exp2 ] + <+> (shrink_expression exp1 >|= fun exp1' -> Exp_sequence (exp1', exp2)) + <+> (shrink_expression exp2 >|= fun exp2' -> Exp_sequence (exp1, exp2')) + | Exp_constraint (exp, type') -> + return exp + <+> (shrink_expression exp >|= fun exp' -> Exp_constraint (exp', type')) + <+> (shrink_core_type type' >|= fun type'' -> Exp_constraint (exp, type'')) + +and shrink_value_binding value_binding = + shrink_pattern value_binding.pat + >|= (fun pat' -> { value_binding with pat = pat' }) + <+> (shrink_expression value_binding.exp + >|= fun exp' -> { value_binding with exp = exp' }) + +and shrink_case case = + shrink_pattern case.left + >|= (fun left' -> { case with left = left' }) + <+> (shrink_expression case.right >|= fun right' -> { case with right = right' }) +;; + +let shrink_structure_item = function + | Struct_eval exp -> shrink_expression exp >|= fun exp' -> Struct_eval exp' + | Struct_value (rec_flag, fst_value_binding, value_binding_list) -> + return (Struct_value (rec_flag, fst_value_binding, [])) + <+> of_list (List.map (fun vb -> Struct_value (rec_flag, vb, [])) value_binding_list) + <+> (shrink_value_binding fst_value_binding + >|= fun fst_value_binding' -> + Struct_value (rec_flag, fst_value_binding', value_binding_list)) + <+> (list ~shrink:shrink_value_binding value_binding_list + >|= fun value_binding_list' -> + Struct_value (rec_flag, fst_value_binding, value_binding_list')) +;; + +let shrink_structure = list ~shrink:shrink_structure_item diff --git a/akaML/lib/qchecker/shrinker.mli b/akaML/lib/qchecker/shrinker.mli new file mode 100644 index 00000000..6d542bcb --- /dev/null +++ b/akaML/lib/qchecker/shrinker.mli @@ -0,0 +1,9 @@ +[@@@ocaml.text "/*"] + +(** Copyright 2025-2026, Friend-zva, RodionovMaxim05 *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +[@@@ocaml.text "/*"] + +val shrink_structure : Ast.structure QCheck.Shrink.t diff --git a/akaML/lib/ricsv/codegen.ml b/akaML/lib/ricsv/codegen.ml new file mode 100644 index 00000000..8697914e --- /dev/null +++ b/akaML/lib/ricsv/codegen.ml @@ -0,0 +1,334 @@ +[@@@ocaml.text "/*"] + +(** Copyright 2025-2026, Friend-zva, RodionovMaxim05 *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +[@@@ocaml.text "/*"] + +open Ast +open Ast.Expression +open Machine +open Base +open Stdlib.Format + +module Platform = struct + let arg_regs_count = 8 + let word_size = 8 +end + +(** Environment context: maps variables to registers or stack offsets *) +type location = + | Loc_reg of reg + | Loc_mem of offset + +let pp_location ppf = function + | Loc_reg r -> fprintf ppf "Reg(%a)" pp_reg r + | Loc_mem ofs -> fprintf ppf "Mem(%a)" pp_offset ofs +;; + +type env = (ident, location, String.comparator_witness) Map.t + +let pp_env ppf env = + let bindings = Map.to_alist env in + fprintf ppf "{"; + List.iteri bindings ~f:(fun i (name, loc) -> + if i > 0 then fprintf ppf "; "; + fprintf ppf "%s -> %a" name pp_location loc); + fprintf ppf "}" +;; + +type codegen_state = + { frame_offset : int + (* Stores the current offset from FP for local variables and some caller-regs *) + ; fresh_id : int + } + +module State = struct + type 'a t = codegen_state -> 'a * codegen_state + + let return x st = x, st + + let bind m f = + fun state -> + let x, st = m state in + f x st + ;; + + let ( let* ) = bind + let get st = st, st + let put st = fun _ -> (), st + + let modify_frame_offset f = + let* st = get in + put { st with frame_offset = f st.frame_offset } + ;; + + let modify_fresh_id f = + let* st = get in + put { st with fresh_id = f st.fresh_id } + ;; + + let fresh = + let* st = get in + let* () = modify_fresh_id Int.succ in + return st.fresh_id + ;; +end + +open State + +module Emission = struct + let code : (instr * string) Queue.t = Queue.create () + let emit ?(comm = "") instr = instr (fun i -> Queue.enqueue code (i, comm)) + + let flush_queue ppf = + while not (Queue.is_empty code) do + let i, comm = Queue.dequeue_exn code in + (match i with + | Label _ -> fprintf ppf "%a" pp_instr i + | _ -> fprintf ppf " %a" pp_instr i); + if String.(comm <> "") then fprintf ppf " # %s" comm; + fprintf ppf "\n" + done + ;; + + let emit_bin_op dst op r1 r2 = + match op with + | "+" -> emit add dst r1 r2 + | "-" -> emit sub dst r1 r2 + | "*" -> emit mul dst r1 r2 + | "<=" -> + emit slt dst r2 r1; + emit xori dst dst 1 + | ">=" -> + emit slt dst r1 r2; + emit xori dst dst 1 + | "=" -> + emit xor dst r1 r2; + emit seqz dst dst + | "<>" -> + emit xor dst r1 r2; + emit snez dst dst + | _ -> failwith ("unsupported binary operator: " ^ op) + ;; + + let emit_store ?(comm = "") reg = + let* () = modify_frame_offset (fun fr_ofs -> fr_ofs + Platform.word_size) in + let* state = get in + let ofs = -state.frame_offset in + emit sd reg (S 0, ofs) ~comm; + return (Loc_mem (S 0, ofs)) + ;; + + (* save 'live' registers from env to stack *) + let emit_save_caller_regs env = + let regs = + Map.to_alist env + |> List.filter_map ~f:(fun (name, loc) -> + match loc with + | Loc_reg r -> + (match r with + | A _ | T _ -> Some (name, r) + | _ -> None) + | _ -> None) + in + let spill_count = List.length regs in + let frame_size = spill_count * Platform.word_size in + if frame_size > 0 then emit addi SP SP (-frame_size) ~comm:"Saving 'live' regs"; + List.fold regs ~init:(return env) ~f:(fun acc (name, r) -> + let* env = acc in + let* new_loc = emit_store r in + return (Map.set env ~key:name ~data:new_loc)) + ;; + + let emit_fn_prologue name stack_size = + (* allocate space on stack, store RA, old FP (S0) and make a new FP *) + emit label name; + emit addi SP SP (-stack_size); + emit sd RA (SP, stack_size - Platform.word_size); + emit sd (S 0) (SP, stack_size - (2 * Platform.word_size)); + emit addi (S 0) SP (stack_size - (2 * Platform.word_size)) ~comm:"Prologue ends" + ;; + + let emit_fn_epilogue is_main = + (* restore SP, S0 and RA using FP (S0) as reference *) + emit addi SP (S 0) (2 * Platform.word_size) ~comm:"Epilogue starts"; + emit ld RA (S 0, Platform.word_size); + emit ld (S 0) (S 0, 0); + if is_main + then ( + emit li (A 7) 93; + emit ecall) + else emit ret + ;; +end + +open Emission + +let reg_is_used env r = + Map.exists env ~f:(function + | Loc_reg r' -> equal_reg r r' + | Loc_mem _ -> false) +;; + +(* If dst contains a live variable, it moves it to another location. *) +let ensure_reg_free env dst = + let relocate env ~(from : reg) ~(to_ : location) = + Map.map env ~f:(function + | Loc_reg r when equal_reg r from -> to_ + | loc -> loc) + in + if not (reg_is_used env dst) + then return env + else ( + let candidate_regs = List.init 8 ~f:(fun i -> A i) in + match List.find candidate_regs ~f:(fun r -> not (reg_is_used env r)) with + | Some new_reg -> + emit mv new_reg dst; + return (relocate env ~from:dst ~to_:(Loc_reg new_reg)) + | None -> + let* new_loc = emit_store dst in + return (relocate env ~from:dst ~to_:new_loc)) +;; + +let rec gen_exp env dst = function + | Exp_constant (Const_integer n) -> + emit li dst n; + return env + | Exp_ident x -> + (match Map.find env x with + | Some (Loc_reg r) -> + if equal_reg r dst + then return env + else ( + emit mv dst r; + return env) + | Some (Loc_mem ofs) -> + emit ld dst ofs; + return env + | None -> failwith ("unbound variable: " ^ x)) + | Exp_ifthenelse (cond, then_e, Some else_e) -> + let* env = gen_exp env (T 0) cond in + let* id = fresh in + let else_lbl = Printf.sprintf "else_%d" id + and end_lbl = Printf.sprintf "end_%d" id in + emit beq (T 0) Zero else_lbl; + (* then case *) + let* env = gen_exp env dst then_e in + emit j end_lbl; + (* else case *) + emit label else_lbl; + let* env = gen_exp env dst else_e in + emit label end_lbl; + return env + | Exp_apply _ as e -> + let rec collect_apps acc = function + | Exp_apply (f, arg) -> collect_apps (arg :: acc) f + | fn -> fn, acc + in + let fn, args = collect_apps [] e in + (match fn, args with + | Exp_ident op, [ a1; a2 ] when Parser.is_operator op -> + let* env = gen_exp env (T 0) a1 in + let* env = gen_exp env (T 1) a2 in + let* env = ensure_reg_free env dst in + emit_bin_op dst op (T 0) (T 1); + return env + | Exp_ident fname, args -> + let* env = + List.foldi args ~init:(return env) ~f:(fun i acc arg -> + let* env = acc in + if i < Platform.arg_regs_count + then gen_exp env (A i) arg + else failwith "too many args") + in + let* env = emit_save_caller_regs env in + emit call fname; + if not (equal_reg dst (A 0)) then emit mv dst (A 0); + return env + | _ -> failwith "unsupported function application") + | Exp_let (_, { pat = Pat_var id; exp }, [], exp_in) -> + let* env = gen_exp env (A 0) exp in + let* loc = emit_store (A 0) ~comm:id in + let env = Map.set env ~key:id ~data:loc in + gen_exp env dst exp_in + | _ -> failwith "expression not supported yet" +;; + +let rec count_local_vars = function + | Exp_ident _ | Exp_constant _ | Exp_construct (_, None) -> 0 + | Exp_let (_, vb, vb_list, body) -> + let count_one_vb { pat; exp } = + let count_vars_in_pat = + match pat with + | Pat_var _ -> 1 + | _ -> 0 + in + count_vars_in_pat + count_local_vars exp + in + List.fold_left (vb :: vb_list) ~init:0 ~f:(fun acc vb -> acc + count_one_vb vb) + + count_local_vars body + | Exp_fun (_, _, exp) | Exp_construct (_, Some exp) | Exp_constraint (exp, _) -> + count_local_vars exp + | Exp_apply (exp1, exp2) -> count_local_vars exp1 + count_local_vars exp2 + | Exp_ifthenelse (cond, then_exp, Some else_exp) -> + count_local_vars cond + count_local_vars then_exp + count_local_vars else_exp + | Exp_ifthenelse (cond, then_exp, None) -> + count_local_vars cond + count_local_vars then_exp + | Exp_function (case, case_list) -> + let count_case { left = _; right } = count_local_vars right in + List.fold_left (case :: case_list) ~init:0 ~f:(fun acc c -> acc + count_case c) + | Exp_match (scrut, case, case_list) -> + let count_case { left = _; right } = count_local_vars right in + count_local_vars scrut + + List.fold_left (case :: case_list) ~init:0 ~f:(fun acc c -> acc + count_case c) + | Exp_tuple (exp1, exp2, exp_list) -> + List.fold_left (exp1 :: exp2 :: exp_list) ~init:0 ~f:(fun acc e -> + acc + count_local_vars e) + | Exp_sequence (exp1, exp2) -> count_local_vars exp1 + count_local_vars exp2 +;; + +let gen_func f_id arg_list body_exp ppf state = + let f_id = if String.equal f_id "main" then "_start" else f_id in + fprintf ppf "\n .globl %s\n .type %s, @function\n" f_id f_id; + let arity = List.length arg_list in + let reg_params, stack_params = + List.split_n arg_list (min arity Platform.arg_regs_count) + in + let stack_size = (2 + count_local_vars body_exp) * Platform.word_size in + let env = Map.empty (module String) in + let env = + List.foldi reg_params ~init:env ~f:(fun i env -> function + | Pat_var name -> Map.set env ~key:name ~data:(Loc_reg (A i)) + | _ -> failwith "unsupported pattern") + in + let env = + List.foldi stack_params ~init:env ~f:(fun i env -> function + | Pat_var name -> + let offset = (i + 2) * Platform.word_size in + Map.set env ~key:name ~data:(Loc_mem (S 0, offset)) + | _ -> failwith "unsupported pattern") + in + emit_fn_prologue f_id stack_size; + let init_state = { state with frame_offset = 0 } in + let _, state = gen_exp env (A 0) body_exp init_state in + emit_fn_epilogue (String.equal f_id "_start"); + flush_queue ppf; + state +;; + +let gen_structure ppf ast = + fprintf ppf ".section .text"; + let init_state = { frame_offset = 0; fresh_id = 0 } in + let _ = + List.fold ast ~init:init_state ~f:(fun state -> function + | Struct_value + (Recursive, { pat = Pat_var f_id; exp = Exp_fun (p, p_list, body_exp) }, _) -> + gen_func f_id (p :: p_list) body_exp ppf state + | Struct_value (Nonrecursive, { pat = Pat_var f_id; exp = body_exp }, _) -> + gen_func f_id [] body_exp ppf state + | _ -> failwith "unsupported structure item") + in + pp_print_flush ppf () +;; diff --git a/akaML/lib/ricsv/codegen.mli b/akaML/lib/ricsv/codegen.mli new file mode 100644 index 00000000..25051c72 --- /dev/null +++ b/akaML/lib/ricsv/codegen.mli @@ -0,0 +1,9 @@ +[@@@ocaml.text "/*"] + +(** Copyright 2025-2026, Friend-zva, RodionovMaxim05 *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +[@@@ocaml.text "/*"] + +val gen_structure : Format.formatter -> Ast.structure -> unit diff --git a/akaML/lib/ricsv/dune b/akaML/lib/ricsv/dune new file mode 100644 index 00000000..fbd859f5 --- /dev/null +++ b/akaML/lib/ricsv/dune @@ -0,0 +1,8 @@ +(library + (name RiscV) + (public_name akaML.RiscV) + (libraries base Ast Parser) + (preprocess + (pps ppx_deriving.eq)) + (instrumentation + (backend bisect_ppx))) diff --git a/akaML/lib/ricsv/machine.ml b/akaML/lib/ricsv/machine.ml new file mode 100644 index 00000000..d47a8a32 --- /dev/null +++ b/akaML/lib/ricsv/machine.ml @@ -0,0 +1,96 @@ +[@@@ocaml.text "/*"] + +(** Copyright 2025-2026, Friend-zva, RodionovMaxim05 *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +[@@@ocaml.text "/*"] + +type reg = + | Zero + | RA + | SP + | A of int + | T of int + | S of int +[@@deriving eq] + +type offset = reg * int + +let pp_reg ppf = + let open Format in + function + | Zero -> fprintf ppf "zero" + | RA -> fprintf ppf "ra" + | SP -> fprintf ppf "sp" + | A n -> fprintf ppf "a%d" n + | T n -> fprintf ppf "t%d" n + | S n -> fprintf ppf "s%d" n +;; + +let pp_offset ppf offset = Format.fprintf ppf "%d(%a)" (snd offset) pp_reg (fst offset) + +type instr = + | Addi of reg * reg * int + | Add of reg * reg * reg + | Sub of reg * reg * reg + | Mul of reg * reg * reg + | Xori of reg * reg * int + | Xor of reg * reg * reg + | Slt of reg * reg * reg + | Seqz of reg * reg + | Snez of reg * reg + | Li of reg * int + | Mv of reg * reg + | Ld of reg * offset + | Sd of reg * offset + | Beq of reg * reg * string + | J of string + | Label of string + | Call of string + | Ret + | Ecall + +let pp_instr ppf = + let open Format in + function + | Addi (rd, rs, imm) -> fprintf ppf "addi %a, %a, %d" pp_reg rd pp_reg rs imm + | Add (rd, rs1, rs2) -> fprintf ppf "add %a, %a, %a" pp_reg rd pp_reg rs1 pp_reg rs2 + | Sub (rd, rs1, rs2) -> fprintf ppf "sub %a, %a, %a" pp_reg rd pp_reg rs1 pp_reg rs2 + | Mul (rd, rs1, rs2) -> fprintf ppf "mul %a, %a, %a" pp_reg rd pp_reg rs1 pp_reg rs2 + | Xori (rd, rs1, imm) -> fprintf ppf "xori %a, %a, %d" pp_reg rd pp_reg rs1 imm + | Xor (rd, rs1, rs2) -> fprintf ppf "xor %a, %a, %a" pp_reg rd pp_reg rs1 pp_reg rs2 + | Slt (rd, rs1, rs2) -> fprintf ppf "slt %a, %a, %a" pp_reg rd pp_reg rs1 pp_reg rs2 + | Seqz (rd, rs) -> fprintf ppf "seqz %a, %a" pp_reg rd pp_reg rs + | Snez (rd, rs) -> fprintf ppf "snez %a, %a" pp_reg rd pp_reg rs + | Li (rd, imm) -> fprintf ppf "li %a, %d" pp_reg rd imm + | Mv (rd, rs) -> fprintf ppf "mv %a, %a" pp_reg rd pp_reg rs + | Ld (rd, ofs) -> fprintf ppf "ld %a, %a" pp_reg rd pp_offset ofs + | Sd (rs, ofs) -> fprintf ppf "sd %a, %a" pp_reg rs pp_offset ofs + | Beq (rs1, rs2, s) -> fprintf ppf "beq %a, %a, %s" pp_reg rs1 pp_reg rs2 s + | J s -> fprintf ppf "j %s" s + | Label s -> fprintf ppf "%s:" s + | Call s -> fprintf ppf "call %s" s + | Ret -> fprintf ppf "ret" + | Ecall -> fprintf ppf "ecall" +;; + +let addi k rd rs imm = k @@ Addi (rd, rs, imm) +let add k rd rs1 rs2 = k @@ Add (rd, rs1, rs2) +let sub k rd rs1 rs2 = k @@ Sub (rd, rs1, rs2) +let mul k rd rs1 rs2 = k @@ Mul (rd, rs1, rs2) +let xori k rd rs1 imm = k @@ Xori (rd, rs1, imm) +let xor k rd rs1 rs2 = k @@ Xor (rd, rs1, rs2) +let slt k rd rs1 rs2 = k @@ Slt (rd, rs1, rs2) +let seqz k rd rs = k (Seqz (rd, rs)) +let snez k rd rs = k (Snez (rd, rs)) +let li k rd imm = k (Li (rd, imm)) +let mv k rd rs = k (Mv (rd, rs)) +let ld k rd ofs = k (Ld (rd, ofs)) +let sd k rd ofs = k (Sd (rd, ofs)) +let beq k rs1 rs2 s = k @@ Beq (rs1, rs2, s) +let j k s = k (J s) +let label k s = k (Label s) +let call k s = k (Call s) +let ret k = k Ret +let ecall k = k Ecall diff --git a/akaML/lib/ricsv/machine.mli b/akaML/lib/ricsv/machine.mli new file mode 100644 index 00000000..281357d6 --- /dev/null +++ b/akaML/lib/ricsv/machine.mli @@ -0,0 +1,84 @@ +[@@@ocaml.text "/*"] + +(** Copyright 2025-2026, Friend-zva, RodionovMaxim05 *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +[@@@ocaml.text "/*"] + +type reg = + | Zero (** Always zero *) + | RA (** Return address *) + | SP (** Stack pointer *) + | A of int (** Arguments A0..A7 *) + | T of int (** Temporary T0..T6 *) + | S of int (** Saved S0..S11 *) + +val equal_reg : reg -> reg -> bool + +type offset = reg * int + +val pp_reg : Format.formatter -> reg -> unit +val pp_offset : Format.formatter -> reg * int -> unit + +type instr = + (* === Arithmetic instructions === *) + | Addi of reg * reg * int + (** [addi rd, rs, imm] Adds register [rs] and immediate [imm], result in [rd] *) + | Add of reg * reg * reg + (** [add rd, rs1, rs2] Adds registers [rs1] and [rs2], result in [rd] *) + | Sub of reg * reg * reg + (** [sub rd, rs1, rs2] Subtracts [rs2] from [rs1], result in [rd] *) + | Mul of reg * reg * reg + (** [mul rd, rs1, rs2] Multiplies [rs1] by [rs2], result in [rd] *) + (* === Logical / bitwise instructions === *) + | Xori of reg * reg * int + (** [xori rd, rs, imm] Bitwise exclusive-or of [rs] and immediate [imm], result in [rd] *) + | Xor of reg * reg * reg + (** [xor rd, rs1, rs2] Bitwise exclusive-or of [rs1] and [rs2], result in [rd] *) + | Slt of reg * reg * reg + (** [slt rd, rs1, rs2] Sets [rd] = 1 if [rs1] < [rs2], else [rd] = 0 *) + | Seqz of reg * reg (** [seqz rd, rs] Sets [rd] = 1 if [rs] == 0, else [rd] = 0 *) + | Snez of reg * reg (** [snez rd, rs] Sets [rd] = 1 if [rs] != 0, else [rd] = 0 *) + (* === Immediate loading === *) + | Li of reg * int (** [li rd, imm] Loads immediate [imm] into register [rd] *) + | Mv of reg * reg + (** [mv rd, rs] Copies the value from register [rs] into register [rd] *) + (* === Memory access === *) + | Ld of reg * offset + (** [ld rd, offset(base)] Loads value from memory [base + offset] into register [rd] *) + | Sd of reg * offset + (** [sd rs, offset(base)] Stores value of register [rs] into memory [base + offset] *) + (* === Control flow: jumps and branches === *) + | Beq of reg * reg * string + (** [beq rs1, rs2, label] Jumps to [label] if [rs1] == [rs2] *) + | J of string (** [j label] Unconditional jump to [label] *) + | Label of string (** [label:] Declares a label for jumps and function calls *) + (* === Function calls and system calls === *) + | Call of string + (** [call label] Calls a function at [label], return address stored in RA *) + | Ret (** [ret] Returns from function to address stored in RA *) + | Ecall + (** [ecall] Environment call (system call). + The syscall number is passed in A7, arguments in A0–A6, result in A0. *) + +val pp_instr : Format.formatter -> instr -> unit +val addi : (instr -> 'a) -> reg -> reg -> int -> 'a +val add : (instr -> 'a) -> reg -> reg -> reg -> 'a +val sub : (instr -> 'a) -> reg -> reg -> reg -> 'a +val mul : (instr -> 'a) -> reg -> reg -> reg -> 'a +val xori : (instr -> 'a) -> reg -> reg -> int -> 'a +val xor : (instr -> 'a) -> reg -> reg -> reg -> 'a +val slt : (instr -> 'a) -> reg -> reg -> reg -> 'a +val seqz : (instr -> 'a) -> reg -> reg -> 'a +val snez : (instr -> 'a) -> reg -> reg -> 'a +val li : (instr -> 'a) -> reg -> int -> 'a +val mv : (instr -> 'a) -> reg -> reg -> 'a +val ld : (instr -> 'a) -> reg -> offset -> 'a +val sd : (instr -> 'a) -> reg -> offset -> 'a +val beq : (instr -> 'a) -> reg -> reg -> string -> 'a +val j : (instr -> 'a) -> string -> 'a +val label : (instr -> 'a) -> string -> 'a +val call : (instr -> 'a) -> string -> 'a +val ret : (instr -> 'a) -> 'a +val ecall : (instr -> 'a) -> 'a diff --git a/akaML/tests/codegen.t b/akaML/tests/codegen.t new file mode 100644 index 00000000..429463a5 --- /dev/null +++ b/akaML/tests/codegen.t @@ -0,0 +1,68 @@ +Copyright 2025-2026, Friend-zva, RodionovMaxim05 +SPDX-License-Identifier: LGPL-3.0-or-later + + $ ../bin/akaML.exe -o factorial.s < let rec fac n = + > if n <= 1 + > then 1 + > else (let n1 = n-1 in + > let m = fac n1 in + > n*m) + > + > let main = fac 4 + + $ cat factorial.s + .section .text + .globl fac + .type fac, @function + fac: + addi sp, sp, -32 + sd ra, 24(sp) + sd s0, 16(sp) + addi s0, sp, 16 # Prologue ends + mv t0, a0 + li t1, 1 + slt t0, t1, t0 + xori t0, t0, 1 + beq t0, zero, else_0 + li a0, 1 + j end_0 + else_0: + mv t0, a0 + li t1, 1 + mv a1, a0 + sub a0, t0, t1 + sd a0, -8(s0) # n1 + ld a0, -8(s0) + addi sp, sp, -8 # Saving 'live' regs + sd a1, -16(s0) + call fac + sd a0, -24(s0) # m + ld t0, -16(s0) + ld t1, -24(s0) + mul a0, t0, t1 + end_0: + addi sp, s0, 16 # Epilogue starts + ld ra, 8(s0) + ld s0, 0(s0) + ret + + .globl _start + .type _start, @function + _start: + addi sp, sp, -16 + sd ra, 8(sp) + sd s0, 0(sp) + addi s0, sp, 0 # Prologue ends + li a0, 4 + call fac + addi sp, s0, 16 # Epilogue starts + ld ra, 8(s0) + ld s0, 0(s0) + li a7, 93 + ecall + + $ riscv64-linux-gnu-as -march=rv64gc factorial.s -o temp.o + $ riscv64-linux-gnu-ld temp.o -o file.exe + $ qemu-riscv64 -L /usr/riscv64-linux-gnu -cpu rv64 ./file.exe + [24] diff --git a/akaML/tests/dune b/akaML/tests/dune new file mode 100644 index 00000000..d8fdae66 --- /dev/null +++ b/akaML/tests/dune @@ -0,0 +1,54 @@ +(library + (name tests) + (libraries + akaML.Ast + akaML.Parser + akaML.Pprinter + akaML.Inferencer + akaML.RiscV) + (modules Test_parser Test_pprinter Test_inferencer Test_riscv) + (preprocess + (pps ppx_expect)) + (inline_tests) + (instrumentation + (backend bisect_ppx))) + +(cram + (applies_to tests inference codegen) + (deps + ../bin/akaML.exe + manytests/do_not_type/001.ml + manytests/do_not_type/002if.ml + manytests/do_not_type/003occurs.ml + manytests/do_not_type/004let_poly.ml + manytests/do_not_type/015tuples.ml + manytests/do_not_type/099.ml + manytests/typed/001fac.ml + manytests/typed/002fac.ml + manytests/typed/003fib.ml + manytests/typed/004manyargs.ml + manytests/typed/005fix.ml + manytests/typed/006partial.ml + manytests/typed/006partial2.ml + manytests/typed/006partial3.ml + manytests/typed/007order.ml + manytests/typed/008ascription.ml + manytests/typed/009let_poly.ml + manytests/typed/010fac_anf.ml + manytests/typed/010sukharev.ml + manytests/typed/011mapcps.ml + manytests/typed/012fibcps.ml + manytests/typed/013foldfoldr.ml + manytests/typed/015tuples.ml + manytests/typed/016lists.ml)) + +(executable + (name run_qchecker) + (modules run_qchecker) + (libraries akaML.Qchecker) + (instrumentation + (backend bisect_ppx))) + +(cram + (applies_to qchecker) + (deps run_qchecker.exe)) diff --git a/akaML/tests/inference.t b/akaML/tests/inference.t new file mode 100644 index 00000000..653caabe --- /dev/null +++ b/akaML/tests/inference.t @@ -0,0 +1,111 @@ +do_not_type: + + $ ../bin/akaML.exe -inference -fromfile manytests/do_not_type/001.ml + Inferencer error: Undefined variable 'fac' + + $ ../bin/akaML.exe -inference -fromfile manytests/do_not_type/002if.ml + Inferencer error: Unification failed on int and bool + + $ ../bin/akaML.exe -inference -fromfile manytests/do_not_type/003occurs.ml + Inferencer error: Occurs check failed: the type variable 'ty1 occurs inside 'ty1 -> 'ty3 + + $ ../bin/akaML.exe -inference -fromfile manytests/do_not_type/004let_poly.ml + Inferencer error: Unification failed on int and bool + + $ ../bin/akaML.exe -inference -fromfile manytests/do_not_type/015tuples.ml + Inferencer error: Only variables are allowed as left-hand side of `let rec' + + $ ../bin/akaML.exe -inference -fromfile manytests/do_not_type/099.ml + Inferencer error: Only variables are allowed as left-hand side of `let rec' + +typed: + + $ ../bin/akaML.exe -inference -fromfile manytests/typed/001fac.ml + val fac : int -> int + val main : int + + $ ../bin/akaML.exe -inference -fromfile manytests/typed/002fac.ml + val fac_cps : int -> (int -> 'a) -> 'a + val main : int + + $ ../bin/akaML.exe -inference -fromfile manytests/typed/003fib.ml + val fib_acc : int -> int -> int -> int + val fib : int -> int + val main : int + + $ ../bin/akaML.exe -inference -fromfile manytests/typed/004manyargs.ml + val wrap : 'a -> 'a + val test3 : int -> int -> int -> int + val test10 : int -> int -> int -> int -> int -> int -> int -> int -> int -> int -> int + val main : int + + $ ../bin/akaML.exe -inference -fromfile manytests/typed/005fix.ml + val fix : (('a -> 'b) -> 'a -> 'b) -> 'a -> 'b + val fac : (int -> int) -> int -> int + val main : int + + $ ../bin/akaML.exe -inference -fromfile manytests/typed/006partial.ml + val foo : int -> int + val main : int + + $ ../bin/akaML.exe -inference -fromfile manytests/typed/006partial2.ml + val foo : int -> int -> int -> int + val main : int + + $ ../bin/akaML.exe -inference -fromfile manytests/typed/006partial3.ml + val foo : int -> int -> int -> unit + val main : int + + $ ../bin/akaML.exe -inference -fromfile manytests/typed/007order.ml + val _start : unit -> unit -> int -> unit -> int -> int -> unit -> int -> int -> int + val main : unit + + $ ../bin/akaML.exe -inference -fromfile manytests/typed/008ascription.ml + val addi : ('a -> bool -> int) -> ('a -> bool) -> 'a -> int + val main : int + + $ ../bin/akaML.exe -inference -fromfile manytests/typed/009let_poly.ml + val temp : int * bool + + $ ../bin/akaML.exe -inference -fromfile manytests/typed/010fac_anf.ml + val fac : int -> int + val main : int + + $ ../bin/akaML.exe -inference -fromfile manytests/typed/010sukharev.ml + Inferencer error: Unification failed on int * int * int and 'ty18 * 'ty19 + + $ ../bin/akaML.exe -inference -fromfile manytests/typed/011mapcps.ml + val map : ('c -> 'a) -> 'c list -> ('a list -> 'b) -> 'b + val iter : ('a -> 'b) -> 'a list -> unit + val main : unit + + $ ../bin/akaML.exe -inference -fromfile manytests/typed/012fibcps.ml + val fib : int -> (int -> 'a) -> 'a + val main : unit + + $ ../bin/akaML.exe -inference -fromfile manytests/typed/013foldfoldr.ml + val id : 'a -> 'a + val fold_right : ('b -> 'a -> 'a) -> 'a -> 'b list -> 'a + val foldl : ('b -> 'a -> 'b) -> 'b -> 'a list -> 'b + val main : unit + + $ ../bin/akaML.exe -inference -fromfile manytests/typed/015tuples.ml + val fix : (('a -> 'b) -> 'a -> 'b) -> 'a -> 'b + val map : ('b -> 'a) -> 'b * 'b -> 'a * 'a + val fixpoly : (('a -> 'b) * ('a -> 'b) -> 'a -> 'b) * (('a -> 'b) * ('a -> 'b) -> 'a -> 'b) -> ('a -> 'b) * ('a -> 'b) + val feven : 'a * (int -> int) -> int -> int + val fodd : (int -> int) * 'a -> int -> int + val tie : (int -> int) * (int -> int) + val meven : int -> int + val modd : int -> int + val main : int + + $ ../bin/akaML.exe -inference -fromfile manytests/typed/016lists.ml + val length : 'a list -> int + val length_tail : 'a list -> int + val map : ('a -> 'b) -> 'a list -> 'b list + val append : 'a list -> 'a list -> 'a list + val concat : 'a list list -> 'a list + val iter : ('a -> unit) -> 'a list -> unit + val cartesian : 'a list -> 'b list -> ('a * 'b) list + val main : int diff --git a/akaML/tests/manytests b/akaML/tests/manytests new file mode 120000 index 00000000..274b7705 --- /dev/null +++ b/akaML/tests/manytests @@ -0,0 +1 @@ +../../manytests/ \ No newline at end of file diff --git a/akaML/tests/qchecker.t b/akaML/tests/qchecker.t new file mode 100644 index 00000000..b0dde893 --- /dev/null +++ b/akaML/tests/qchecker.t @@ -0,0 +1,7 @@ +Copyright 2025-2026, Friend-zva, RodionovMaxim05 +SPDX-License-Identifier: LGPL-3.0-or-later + + $ ./run_qchecker.exe -s 20242025 + seed: 20242025 + ================================================================================ + success (ran 1 tests) diff --git a/akaML/tests/run_qchecker.ml b/akaML/tests/run_qchecker.ml new file mode 100644 index 00000000..cb86a067 --- /dev/null +++ b/akaML/tests/run_qchecker.ml @@ -0,0 +1,14 @@ +[@@@ocaml.text "/*"] + +(** Copyright 2025-2026, Friend-zva, RodionovMaxim05 *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +[@@@ocaml.text "/*"] + +open Qchecker + +let () = + let _ : int = run_gen ~show_passed:false ~show_shrinker:false ~count:10 in + () +;; diff --git a/akaML/tests/run_qchecker.mli b/akaML/tests/run_qchecker.mli new file mode 100644 index 00000000..a460e7de --- /dev/null +++ b/akaML/tests/run_qchecker.mli @@ -0,0 +1,7 @@ +[@@@ocaml.text "/*"] + +(** Copyright 2025-2026, Friend-zva, RodionovMaxim05 *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +[@@@ocaml.text "/*"] diff --git a/akaML/tests/test_inferencer.ml b/akaML/tests/test_inferencer.ml new file mode 100644 index 00000000..1b475252 --- /dev/null +++ b/akaML/tests/test_inferencer.ml @@ -0,0 +1,430 @@ +[@@@ocaml.text "/*"] + +(** Copyright 2025-2026, Friend-zva, RodionovMaxim05 *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +[@@@ocaml.text "/*"] + +open Parser +open Pprinter +open Inferencer + +let run str = + match parse str with + | Ok ast -> + (match run_inferencer env_with_print_funs ast with + | Ok (_, out_list) -> + List.iter + (function + | Some id, type' -> Format.printf "val %s : %a\n" id pp_core_type type' + | None, type' -> Format.printf "- : %a\n" pp_core_type type') + out_list + | Error e -> Format.printf "Inferencer error: %a\n" pp_error e) + | Error _ -> Format.printf "Parsing error\n" +;; + +let%expect_test "parsing error" = + run + {| + let a = ;; + |}; + [%expect + {| + Parsing error + |}] +;; + +let%expect_test "type check undefined variable" = + run + {| + let a = b + |}; + [%expect + {| + Inferencer error: Undefined variable 'b' + |}] +;; + +let%expect_test "type check negative expression" = + run + {| + let f a q = -(if a then q else -q) + |}; + [%expect + {| + val f : bool -> int -> int + |}] +;; + +let%expect_test "type check definition tuple" = + run + {| + let (a, b) = (1, 2);; + |}; + [%expect + {| + val a : int + val b : int + |}] +;; + +let%expect_test "type check definition variable" = + run + {| + let a = 5 + |}; + [%expect + {| + val a : int + |}] +;; + +let%expect_test "type check several definition variable" = + run + {| + let f = 1 and r = "qwe";; let q = 2 + |}; + [%expect + {| + val f : int + val r : string + val q : int + |}] +;; + +let%expect_test "type check several definition variable and executable them" = + run + {| + let f a = a and a = 1;; + f "hello";; + a;; + |}; + [%expect + {| + val f : 'a -> 'a + val a : int + - : string + - : int + |}] +;; + +let%expect_test "type check several recursive definition" = + run + {| + let rec f1 a = a + 1 and f2 b = f1 b;; + |}; + [%expect + {| + val f1 : int -> int + val f2 : int -> int + |}] +;; + +let%expect_test "type check definition function" = + run + {| + let f a b c = if a then b else c + |}; + [%expect + {| + val f : bool -> 'a -> 'a -> 'a + |}] +;; + +let%expect_test "type check definition construct" = + run + {| + let (a :: b :: []) = [ 1; 2 ] + |}; + [%expect + {| + val a : int + val b : int + |}] +;; + +let%expect_test "type check simple recursive let expression" = + run + {| + let rec x : int = 1;; + |}; + [%expect + {| + val x : int + |}] +;; + +let%expect_test "type check error in recursive let expression" = + run + {| + let rec x = x + 1;; + |}; + [%expect + {| + Inferencer error: This kind of expression is not allowed as right-hand side of `let rec' + |}] +;; + +let%expect_test "type check recursive let expression" = + run + {| + let prime n = + let rec check_zero x d = + match d with + | 1 -> true + | _ -> x + d <> 0 && check_zero x (d - 1) + in + match n with + | 0 -> false + | 1 -> false + | _ -> check_zero n (n - 1) + ;; + |}; + [%expect + {| + val prime : int -> bool + |}] +;; + +let%expect_test "type check of operators" = + run + {| + let f x y z = if x + 1 = 0 && y = 1 || z >= 'w' then 2 else 26;; + |}; + [%expect + {| + val f : int -> int -> char -> int + |}] +;; + +let%expect_test "type check pattern matching" = + run + {| + let f a b = match a b with 1 -> 'q' | 2 -> 'w' | _ -> 'e' + |}; + [%expect + {| + val f : ('a -> int) -> 'a -> char + |}] +;; + +let%expect_test "type check pattern bound the variable multiple times" = + run + {| + let f = function + | x, x -> true + | _ -> false + |}; + [%expect + {| + Inferencer error: Variable 'x' is bound several times in the matching + |}] +;; + +let%expect_test "type check of expression list" = + run + {| + let f a = [a; true] + |}; + [%expect + {| + val f : bool -> bool list + |}] +;; + +let%expect_test "type check invalid expression list" = + run + {| + let f a = [true; a; 2] + |}; + [%expect + {| + Inferencer error: Unification failed on bool and int + |}] +;; + +let%expect_test "type check pattern and expression list construct" = + run + {| + let f p = + let list = 1 :: 2 :: p in + match list with + | 1 :: 2 :: [ 3; 4 ] -> true + | [ 1; 2 ] -> true + | _ -> false + |}; + [%expect + {| + val f : int list -> bool + |}] +;; + +let%expect_test "type check pattern-matching" = + run + {| + let fmap f xs = + match xs with + | a :: [] -> [ f a ] + | a :: b :: [] -> [ f a; f b ] + | a :: b :: c :: [] -> [ f a; f b; f c ] + | _ -> [] + ;; + |}; + [%expect + {| + val fmap : ('b -> 'a) -> 'b list -> 'a list + |}] +;; + +let%expect_test "type check of pattern list" = + run + {| + let f a = match a with | [q; 1] -> q | [w; _] -> w + |}; + [%expect + {| + val f : int list -> int + |}] +;; + +let%expect_test "type check Some and None" = + run + {| + let f a = + match a with + | Some (_) -> Some ('a') + | None -> None + ;; + |}; + [%expect + {| + val f : 'a option -> char option + |}] +;; + +let%expect_test "type check definition function" = + run + {| + let f = function + | Some (a) -> (a) + | None -> false + |}; + [%expect + {| + val f : bool option -> bool + |}] +;; + +let%expect_test "type check expression constraint" = + run + {| + let f a b = (b a : int) + |}; + [%expect + {| + val f : 'a -> ('a -> int) -> int + |}] +;; + +let%expect_test "type check pattern constraint" = + run + {| + let f (q : int -> 'a option) (x : int) = q x + |}; + [%expect + {| + val f : (int -> 'a option) -> int -> 'a option + |}] +;; + +let%expect_test "type check pattern constraint with type var" = + run + {| + let f1 (q : 'a -> 'b) (x : 'a) = q x;; + let f2 (q : 'a -> 'b) (x : 'b) = q x;; + let f3 (q : 'a -> 'b) (x : 'c) = q x;; + |}; + [%expect + {| + val f1 : ('a -> 'b) -> 'a -> 'b + val f2 : ('a -> 'a) -> 'a -> 'a + val f3 : ('a -> 'b) -> 'a -> 'b + |}] +;; + +let%expect_test "type check recursive struct value" = + run + {| + let rec factorial n = if n <= 1 then 1 else n * factorial (n - 1) + and strange_factorial k = if k <= 1 then 1 else k + strange_factorial (k - 1) + |}; + [%expect + {| + val factorial : int -> int + val strange_factorial : int -> int + |}] +;; + +let%expect_test "type check polymorphism" = + run + {| + let rec f1 x = x;; + let foo1 = f1 1;; + let foo2 = f1 'a';; + let foo3 = f1 foo1;; + + let f2 x = x;; + let foo4 = f2 1;; + let foo5 = f2 'a';; + let foo6 = f2 foo5;; + |}; + [%expect + {| + val f1 : 'a -> 'a + val foo1 : int + val foo2 : char + val foo3 : int + val f2 : 'a -> 'a + val foo4 : int + val foo5 : char + val foo6 : char + |}] +;; + +let%expect_test "type check expression constraint" = + run + {| + let f a b = (b a : int) + |}; + [%expect + {| + val f : 'a -> ('a -> int) -> int + |}] +;; + +let%expect_test "type check sequence" = + run + {| + let a = print_int 21; 22 + |}; + [%expect + {| + val a : int + |}] +;; + +let%expect_test "type check operator redefinition" = + run + {| + let foo1 x y = x + y + let (+) = (&&) + let foo2 x y = x + y + |}; + [%expect + {| + val foo1 : int -> int -> int + val + : bool -> bool -> bool + val foo2 : bool -> bool -> bool + |}] +;; diff --git a/akaML/tests/test_inferencer.mli b/akaML/tests/test_inferencer.mli new file mode 100644 index 00000000..a460e7de --- /dev/null +++ b/akaML/tests/test_inferencer.mli @@ -0,0 +1,7 @@ +[@@@ocaml.text "/*"] + +(** Copyright 2025-2026, Friend-zva, RodionovMaxim05 *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +[@@@ocaml.text "/*"] diff --git a/akaML/tests/test_parser.ml b/akaML/tests/test_parser.ml new file mode 100644 index 00000000..a2f6be0a --- /dev/null +++ b/akaML/tests/test_parser.ml @@ -0,0 +1,307 @@ +[@@@ocaml.text "/*"] + +(** Copyright 2025-2026, Friend-zva, RodionovMaxim05 *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +[@@@ocaml.text "/*"] + +open Parser +open Pprinter + +let run str = + match parse str with + | Ok ast -> Format.printf "%a \n" pp_structure ast + | Error error -> Format.printf "%s" error +;; + +let%expect_test "parsing error" = + run + {| + let a = ;; + |}; + [%expect + {| + : end_of_input + |}] +;; + +let%expect_test "parsing factorial with `match'" = + run + {| + let rec factorial n = + match n with + | 0 -> 1 + | 1 -> 1 + | _ -> n * factorial (n - 1) + ;; + |}; + [%expect + {| + let rec factorial n = + match n with + | 0 -> 1 + | 1 -> 1 + | _ -> ( * ) n (factorial (( - ) n 1)) + ;; + |}] +;; + +let%expect_test "parsing expression with `fun'" = + run + {| + let sum1 = fun x y -> (x + y) + let sum2 = fun x -> (fun y -> x + y) + |}; + [%expect + {| + let sum1 x y = ( + ) x y;; + let sum2 x = fun y -> ( + ) x y;; + |}] +;; + +let%expect_test "parsing pattern and expression tuples" = + run + {| + let a, b = 1, 2 + let a, b, c = -1, 2 + 3, f d + |}; + [%expect + {| + let a, b = 1, 2;; + let a, b, c = -1, ( + ) 2 3, f d;; + |}] +;; + +let%expect_test "parsing pattern and expression list" = + run + {| + let list [ a; b; c ] = [ a; b; c ];; + let foo1 = f [ a; b ];; + let foo2 = [ f a; f b ];; + let foo3 = f [ f a; f b ];; + let foo4 = f [ f a; [ f a; f b ] ];; + let foo5 = f [ [ f a; [ f a; f b ] ] ];; + [] + [];; + [ 1 + 2; -3; f a ] + [ f a; f b ];; + [ [ [] + []; -3; f a ] ] + [ [ f a; f b ] ] + |}; + [%expect + {| + let list [ a; b; c ] = [ a; b; c ];; + let foo1 = f [ a; b ];; + let foo2 = [ f a; f b ];; + let foo3 = f [ f a; f b ];; + let foo4 = f [ f a; [ f a; f b ] ];; + let foo5 = f [ [ f a; [ f a; f b ] ] ];; + ( + ) [] [];; + ( + ) [ ( + ) 1 2; -3; f a ] [ f a; f b ];; + ( + ) [ [ ( + ) [] []; -3; f a ] ] [ [ f a; f b ] ];; + |}] +;; + +let%expect_test "parsing option and bool types" = + run + {| + let f = function + | Some (_) -> true + | None -> false + ;; + Some true;; + Some (Some true) + |}; + [%expect + {| + let f = function + | Some (_) -> true + | None -> false;; + Some (true);; + Some (Some (true));; + |}] +;; + +let%expect_test "parsing expression with `let'" = + run + {| + 1 + let two = 2 in two * 3 + |}; + [%expect + {| + ( + ) 1 (let two = 2 in ( * ) two 3);; + |}] +;; + +let%expect_test "parsing several structure items" = + run + {| + let squared x = x * x;; + squared 5 + |}; + [%expect + {| + let squared x = ( * ) x x;; + squared 5;; + |}] +;; + +let%expect_test "parsing expression sequence" = + run + {| + let a = (1, 2, ((); 3));; + [ (a; b) ];; + [ f a; [ () ]; ((); []) ];; + let a = [ ( (); 1); ( ( (); 2)); ( ((); (); 3) ); (((); 4); 5)] + |}; + [%expect + {| + let a = 1, 2, ((); 3);; + [ (a; b) ];; + [ f a; [ () ]; ((); []) ];; + let a = [ ((); 1); ((); 2); (((); ()); 3); (((); 4); 5) ];; + |}] +;; + +let%expect_test "parsing identifiers with explicitly assigned types 1" = + run + {| + let f : int list = [ 1; 2; 3 ];; + |}; + [%expect + {| + let f : int list = [ 1; 2; 3 ];; + |}] +;; + +let%expect_test "parsing identifiers with explicitly assigned types 2" = + run + {| + let f : int * char * string list = (1, 'a', ["first"; "second"; "third"]);; + |}; + [%expect + {| + let f : int * char * string list = 1, 'a', [ "first"; "second"; "third" ];; + |}] +;; + +let%expect_test "parsing identifiers with explicitly assigned types 3" = + run + {| + let f (a : int) (b : int) : int = a + b;; + |}; + [%expect + {| + let f (a : int) (b : int) : int = ( + ) a b;; + |}] +;; + +let%expect_test "parsing identifiers with explicitly assigned types 4" = + run + {| + let (a : int -> (char -> int) -> int) = 1 + (x : char -> int);; + |}; + [%expect + {| + let a : int -> (char -> int) -> int = ( + ) 1 (x : char -> int);; + |}] +;; + +let%expect_test "parsing chain right associative" = + run + {| + let f x y z = if x && (y || z && (y || x) || y) then true else false;; + let list (a :: b :: [ c ]) = a :: b :: [ c + 1 ] + |}; + [%expect + {| + let f x y z = + if ( && ) x (( || ) y (( || ) (( && ) z (( || ) y x)) y)) then true + else false + ;; + let list [ a; b; c ] = [ a; b; ( + ) c 1 ];; + |}] +;; + +let%expect_test "parsing chain left associative" = + run + {| + 8 / 800 - 555 * (35 + 35);; + let f x y z = if x = (y >= z && (y <= x) = y) then true else false;; + let f a b c = g a (b + c) b (a * b);; + let f a b c = a; b a; c [ a ];; + let f a : (int option list * unit option -> bool list option list) * string option option = a + |}; + [%expect + {| + ( - ) (( / ) 8 800) (( * ) 555 (( + ) 35 35));; + let f x y z = + if ( = ) x (( && ) (( >= ) y z) (( = ) (( <= ) y x) y)) then true + else false + ;; + let f a b c = g a (( + ) b c) b (( * ) a b);; + let f a b c = (a; b a); c [ a ];; + let f + a + : (int option list * unit option -> bool list option list) * string option option + = a + ;; + |}] +;; + +let%expect_test "parsing expression with priority" = + run + {| + 1 + 2 + 3;; + (1 + 2) - 3;; + (1 + 2) * 3;; + 3 * (1 + 2);; + (1 + 2) * (3 + 4);; + 1 * 2 * (3 + 4);; + (1 + 2) * 3 * 4;; + 1 / 2 - 3 * 4;; + g * f a (b + c) (d e) + |}; + [%expect + {| + ( + ) (( + ) 1 2) 3;; + ( - ) (( + ) 1 2) 3;; + ( * ) (( + ) 1 2) 3;; + ( * ) 3 (( + ) 1 2);; + ( * ) (( + ) 1 2) (( + ) 3 4);; + ( * ) (( * ) 1 2) (( + ) 3 4);; + ( * ) (( * ) (( + ) 1 2) 3) 4;; + ( - ) (( / ) 1 2) (( * ) 3 4);; + ( * ) g (f a (( + ) b c) (d e));; + |}] +;; + +let%expect_test "parsing negative expressions" = + run + {| + -2 + 1;; + -(2 + -2);; + -(-1 + 1);; + let f a = -a;; + let f a = -(if a then -1 else 2);; + g * f (-a) (-b + c) (d (-e)) + |}; + [%expect + {| + ( + ) (-2) 1;; + -(( + ) 2 (-2));; + -(( + ) (-1) 1);; + let f a = -a;; + let f a = -(if a then -1 else 2);; + ( * ) g (f (-a) (( + ) (-b) c) (d (-e)));; + |}] +;; + +let%expect_test "parsing operator redefinition" = + run + {| + let (+) = fun x y -> x - y;; + |}; + [%expect + {| + let ( + ) x y = ( - ) x y;; + |}] +;; diff --git a/akaML/tests/test_parser.mli b/akaML/tests/test_parser.mli new file mode 100644 index 00000000..5c1c0384 --- /dev/null +++ b/akaML/tests/test_parser.mli @@ -0,0 +1,9 @@ +[@@@ocaml.text "/*"] + +(** Copyright 2025-2026, Friend-zva, RodionovMaxim05 *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +[@@@ocaml.text "/*"] + +val run : string -> unit diff --git a/akaML/tests/test_pprinter.ml b/akaML/tests/test_pprinter.ml new file mode 100644 index 00000000..53f1d42e --- /dev/null +++ b/akaML/tests/test_pprinter.ml @@ -0,0 +1,239 @@ +[@@@ocaml.text "/*"] + +(** Copyright 2025-2026, Friend-zva, RodionovMaxim05 *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +[@@@ocaml.text "/*"] + +open Test_parser + +let%expect_test "parsing and pretty printing" = + run + {| +if true then 1 else 0;; + +let a b = if true then 1 else 0;; + +match + function + | _ -> true +with +| b -> true +;; + +if match b with + | b -> true +then ( + match b with + | b -> true) +else ( + match b with + | b -> true) +;; + +let a b = + match b with + | b -> + (match + function + | _ -> true + with + | b -> true) +;; + +let a b = + if match b with + | b -> true + then ( + match b with + | b -> true) + else ( + match b with + | b -> true) +;; + +let f a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a = 1;; + +match b with +| [ a; a; a; a; a; a; a; a; a; a; a; a; a; a; a; a; a; a; a; a; a; a; a; a; a; a; a; a; a; a; a; a; a; a; a; a; a; a; a; a; a; a; a; a ] -> true +| [ a; a; a; a; a ] -> false +| a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a -> true +| a, a, a, a, a, a -> false +| _ -> false +;; + |}; + [%expect + {| +if true then 1 else 0;; +let a b = if true then 1 else 0;; +match + function + | _ -> true with +| b -> true;; +if match b with + | b -> true +then + (match b with + | b -> true) +else + (match b with + | b -> true);; +let a b = match b with + | b -> + (match + function + | _ -> true with + | b -> true);; +let a b = + if match b with + | b -> true + then + (match b with + | b -> true) + else + (match b with + | b -> true) +;; +let f + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + a + = 1 +;; +match b with +| [ a + ; a + ; a + ; a + ; a + ; a + ; a + ; a + ; a + ; a + ; a + ; a + ; a + ; a + ; a + ; a + ; a + ; a + ; a + ; a + ; a + ; a + ; a + ; a + ; a + ; a + ; a + ; a + ; a + ; a + ; a + ; a + ; a + ; a + ; a + ; a + ; a + ; a + ; a + ; a + ; a + ; a + ; a + ; a + ] -> true +| [ a; a; a; a; a ] -> false +| ( a + , a + , a + , a + , a + , a + , a + , a + , a + , a + , a + , a + , a + , a + , a + , a + , a + , a + , a + , a + , a + , a + , a + , a + , a + , a + , a + , a + , a + , a + , a + , a + , a + , a + , a + , a + , a + , a + , a + , a + , a + , a + , a + , a ) -> true +| ( a, a, a, a, a, a ) -> false +| _ -> false;; + |}] +;; diff --git a/akaML/tests/test_pprinter.mli b/akaML/tests/test_pprinter.mli new file mode 100644 index 00000000..a460e7de --- /dev/null +++ b/akaML/tests/test_pprinter.mli @@ -0,0 +1,7 @@ +[@@@ocaml.text "/*"] + +(** Copyright 2025-2026, Friend-zva, RodionovMaxim05 *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +[@@@ocaml.text "/*"] diff --git a/akaML/tests/test_riscv.ml b/akaML/tests/test_riscv.ml new file mode 100644 index 00000000..7bce1149 --- /dev/null +++ b/akaML/tests/test_riscv.ml @@ -0,0 +1,78 @@ +[@@@ocaml.text "/*"] + +(** Copyright 2025-2026, Friend-zva, RodionovMaxim05 *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +[@@@ocaml.text "/*"] + +open Parser + +let run str = + match parse str with + | Ok ast -> Format.printf "%a\n%!" RiscV.Codegen.gen_structure ast + | Error _ -> Format.printf "Parsing error\n" +;; + +let%expect_test "codegen bin op" = + run + {| + let foo = + let a = 1 + 2 in + let b = 3 - 4 in + let c = 5 * 6 in + let d = 7 <= 8 in + let e = 9 >= 10 in + let f = 11 = 12 in + let g = 13 <> 14 in + a + ;; + |}; + [%expect + {| + .section .text + .globl foo + .type foo, @function + foo: + addi sp, sp, -72 + sd ra, 64(sp) + sd s0, 56(sp) + addi s0, sp, 56 # Prologue ends + li t0, 1 + li t1, 2 + add a0, t0, t1 + sd a0, -8(s0) # a + li t0, 3 + li t1, 4 + sub a0, t0, t1 + sd a0, -16(s0) # b + li t0, 5 + li t1, 6 + mul a0, t0, t1 + sd a0, -24(s0) # c + li t0, 7 + li t1, 8 + slt a0, t1, t0 + xori a0, a0, 1 + sd a0, -32(s0) # d + li t0, 9 + li t1, 10 + slt a0, t0, t1 + xori a0, a0, 1 + sd a0, -40(s0) # e + li t0, 11 + li t1, 12 + xor a0, t0, t1 + seqz a0, a0 + sd a0, -48(s0) # f + li t0, 13 + li t1, 14 + xor a0, t0, t1 + snez a0, a0 + sd a0, -56(s0) # g + ld a0, -8(s0) + addi sp, s0, 16 # Epilogue starts + ld ra, 8(s0) + ld s0, 0(s0) + ret |}] +;; diff --git a/akaML/tests/test_riscv.mli b/akaML/tests/test_riscv.mli new file mode 100644 index 00000000..a460e7de --- /dev/null +++ b/akaML/tests/test_riscv.mli @@ -0,0 +1,7 @@ +[@@@ocaml.text "/*"] + +(** Copyright 2025-2026, Friend-zva, RodionovMaxim05 *) + +(** SPDX-License-Identifier: LGPL-3.0-or-later *) + +[@@@ocaml.text "/*"] diff --git a/akaML/tests/tests.t b/akaML/tests/tests.t new file mode 100644 index 00000000..be5dad1f --- /dev/null +++ b/akaML/tests/tests.t @@ -0,0 +1,46 @@ + $ ../bin/akaML.exe -dparsetree < let rec fac n = + > if n <= 1 + > then 1 + > else (let n1 = n-1 in + > let m = fac n1 in + > n*m) + > + > let main = fac 4 + [(Struct_value (Recursive, + { pat = (Pat_var "fac"); + exp = + (Exp_fun ((Pat_var "n"), [], + (Exp_ifthenelse ( + (Exp_apply + ((Exp_apply ((Exp_ident "<="), (Exp_ident "n"))), + (Exp_constant (Const_integer 1)))), + (Exp_constant (Const_integer 1)), + (Some (Exp_let (Nonrecursive, + { pat = (Pat_var "n1"); + exp = + (Exp_apply + ((Exp_apply ((Exp_ident "-"), (Exp_ident "n"))), + (Exp_constant (Const_integer 1)))) + }, + [], + (Exp_let (Nonrecursive, + { pat = (Pat_var "m"); + exp = + (Exp_apply ((Exp_ident "fac"), (Exp_ident "n1"))) }, + [], + (Exp_apply + ((Exp_apply ((Exp_ident "*"), (Exp_ident "n"))), + (Exp_ident "m"))) + )) + ))) + )) + )) + }, + [])); + (Struct_value (Nonrecursive, + { pat = (Pat_var "main"); + exp = + (Exp_apply ((Exp_ident "fac"), (Exp_constant (Const_integer 4)))) }, + [])) + ] diff --git a/pairing.md b/pairing.md index 48448952..e715831c 100644 --- a/pairing.md +++ b/pairing.md @@ -3,4 +3,10 @@ | AML | Суворов | Чирков | MyLittleML | Кочергин | Кузнецов | akaML | [Заикин](https://github.com/Friend-zva) | [Родионов](https://github.com/RodionovMaxim05) +| ZOVML | Орешин | Дмитриевцев +| DOOML | [Сергеев](https://github.com/IgnatSergeev) | [Белянин](https://github.com/georgiy-belyanin) | +| PudgeWithMoML | [Насретдинов](https://github.com/Ycyken) | [Комбаев](https://github.com/homka122/) +| XML | [Гавриленко](https://github.com/qrutyy) | [Руднев-Степанян](https://github.com/Dabzelos) +| ZondbiML | [Власенко](https://github.com/spisladqo) | [Шакиров](https://github.com/shakareem) +| oMLet | [Котельникова](https://github.com/p1onerka) | [Козырева](https://github.com/sofyak0zyreva) | demo | [@Kakadu](http://github.com/Kakadu) | BOSS | diff --git a/tasks.md b/tasks.md index 7e837a09..94ae758e 100644 --- a/tasks.md +++ b/tasks.md @@ -2,11 +2,28 @@ Задания [парные](/pairing.md). Ожидается, что вы будете свободно знать код напарника. Практикуйте парное программирование и т.п. -| | Название | Дедлайн | Баллы | +| | Название | Дедлайн | | +| - | --------------- | ------- | ------ | +| 1 | Факториал в ANF | 4.10 | | +| 2 | ANF | 18.10 | | +| 3 | LL + CC | 25.10 | | +| 4 | Tuples | 1.11 | | +| 5 | GC | 8.11 | | + + #### 1. Факториал @@ -16,5 +33,24 @@ * Ответ можно выдавать как exit code, либо встроить в язык стандартную функцию `exit: int -> 'a` * Разумеется нужны cram тесты +#### 2. ANF + +* Реализовать ANF-преобразование, чтобы уместь писать человеческие вычисления факториалов и чисел Фибоначчи +* Реализовать в рантайме функцию, которая позволит печатать числа. +* Тесты. Факториал/фибоначчи + про печать. + + ``` + $ cat > test.ml << EOF + > let large x = if 0<>x then print 0 else print 1 + > let main = + > let x = if (if (if 0 + > then 0 else (let t42 = print 42 in 1)) + > then 0 else 1) + > then 0 else 1 in + > large x + > EOF + ``` + +#### 3. #### ?