From 9acfa09dba61e59419aacadf94a91f472b65bb26 Mon Sep 17 00:00:00 2001 From: Sorawee Porncharoenwase Date: Sun, 26 Nov 2023 23:02:45 -0800 Subject: [PATCH] first commit --- .gitignore | 5 + CHANGES.md | 3 + LICENSE.md | 15 ++ README.md | 4 + doc/dune | 2 + doc/index.mld | 403 ++++++++++++++++++++++++++++++++++++++ dune-project | 24 +++ lib/dune | 4 + lib/printer.ml | 364 ++++++++++++++++++++++++++++++++++ lib/printer.mli | 51 +++++ lib/signature.mli | 368 ++++++++++++++++++++++++++++++++++ lib/util.ml | 5 + lib/util.mli | 11 ++ pretty_expressive.opam | 32 +++ test/dune | 3 + test/pretty_expressive.ml | 137 +++++++++++++ 16 files changed, 1431 insertions(+) create mode 100644 .gitignore create mode 100644 CHANGES.md create mode 100644 LICENSE.md create mode 100644 README.md create mode 100644 doc/dune create mode 100644 doc/index.mld create mode 100644 dune-project create mode 100644 lib/dune create mode 100644 lib/printer.ml create mode 100644 lib/printer.mli create mode 100644 lib/signature.mli create mode 100644 lib/util.ml create mode 100644 lib/util.mli create mode 100644 pretty_expressive.opam create mode 100644 test/dune create mode 100644 test/pretty_expressive.ml diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..157baf0 --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +.DS_Store +.idea +*.log +tmp/ +_build/ diff --git a/CHANGES.md b/CHANGES.md new file mode 100644 index 0000000..3d2c9cb --- /dev/null +++ b/CHANGES.md @@ -0,0 +1,3 @@ +## 0.1 (2023-11-26) + +* Initial release diff --git a/LICENSE.md b/LICENSE.md new file mode 100644 index 0000000..4122591 --- /dev/null +++ b/LICENSE.md @@ -0,0 +1,15 @@ +## ISC License + +Copyright (c) 2023 Sorawee Porncharoenwase + +Permission to use, copy, modify, and distribute this software for any +purpose with or without fee is hereby granted, provided that the above +copyright notice and this permission notice appear in all copies. + +THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..230793e --- /dev/null +++ b/README.md @@ -0,0 +1,4 @@ +# Pretty_expressive + +An implementation of [A Pretty Expressive Printer](https://dl.acm.org/doi/abs/10.1145/3622837) in OCaml. +View the documentation at https://sorawee.github.io/pretty-expressive-ocaml/pretty_expressive/index.html diff --git a/doc/dune b/doc/dune new file mode 100644 index 0000000..1904dec --- /dev/null +++ b/doc/dune @@ -0,0 +1,2 @@ +(documentation + (package pretty_expressive)) diff --git a/doc/index.mld b/doc/index.mld new file mode 100644 index 0000000..2b6983f --- /dev/null +++ b/doc/index.mld @@ -0,0 +1,403 @@ +{0:index The [pretty_expressive] library} + +This library implements a pretty expressive printer, following the algorithm presented in {{: https://dl.acm.org/doi/abs/10.1145/3622837 }A Pretty Expressive Printer (OOPSLA'23)}. +The pretty printer is expressive, provably optimal, and practically efficient. + +{1 Getting Started} + +General-purpose pretty printing is a process that produces human readable text from structured data. +Users encode the structured data together with styling choices in an abstract {i document} {{!Pretty_expressive.Signature.PrinterT.doc} [doc]}. This document contains printing instructions: things like text, newlines, and indentation. +It can also contain {i choices} ({{!Pretty_expressive.Signature.PrinterT.(<|>)}[<|>]}) between two or more alternatives, resulting in many possible layouts for a document. +{!Pretty_expressive}’s job is to pick a prettiest layout (according to a specified optimality objective) +from among all of the choices. +E.g., the one that minimizes the number of lines while not exceeding the page width limit. + +Here’s a simple example of pretty printing a document encoding a fragment of code. + +{[ +open Pretty_expressive + +(** Prints the example document [d] with the page width limit [w] *) +let print_doc (w : int): unit = + let cf = Printer.default_cost_factory ~page_width:w () in + let module P = Printer.Make (val cf) in + let open P in + + let d = text "while (true) {" <> + nest 4 + (nl <> text "f();" <> nl <> text "if (done())" <> + (let exit_d = text "exit();" in + (space <> exit_d) <|> nest 4 (nl <> exit_d))) <> + nl <> text "}" + in + pretty_print d +]} + +There is a lot of code above, so let's unpack it. + +{2 Document Construction} + +In the above code, [let d = text "while (true) {" <> ....] defines a document [d]. + +{ul {- {{!Pretty_expressive.Signature.PrinterT.text}[text]} prints a string.} + {- {{!Pretty_expressive.Signature.PrinterT.(<>)}[<>]} prints a concatenation of two documents.} + {- {{!Pretty_expressive.Signature.PrinterT.nl}[nl]} prints a newline.} + {- {{!Pretty_expressive.Signature.PrinterT.nest}[nest]} adds indentation level so that + {{!Pretty_expressive.Signature.PrinterT.nl}[nl]} adds indentation spaces.} + {- {{!Pretty_expressive.Signature.PrinterT.(<|>)}[<|>]} creates a choice.}} + +As a result, the above document [d] encodes two possible layouts: + +{v +while (true) { + f(); + if (done()) exit(); +} +v} + +and + +{v +while (true) { + f(); + if (done()) + exit(); +} +v} + +{2 Printer Construction} + +Most pretty printers are parameterized by a {i page width limit}, which indicates +the number of characters that each line should not exceed. +{!Pretty_expressive} is instead parameterized by a {{!factory} {i cost factory}}, +which can control not only the page width limit, but also other aspects of prettiness. +For the sake of simplicity, we will for now use a pre-defined cost factory +{{!Pretty_expressive.Printer.default_cost_factory}[Printer.default_cost_factory]}, which only allows the page width limit adjustment through +the labeled argument [~page_width]. Thus, +[let cf = Printer.default_cost_factory ~page_width:w ()] creates a +({{: https://dev.realworldocaml.org/first-class-modules.html}first-class module}) +cost factory [cf] that sets the page width limit to [w]. + +The pretty printer can then be instantiated by using {{!Pretty_expressive.Printer.Make}[Printer.Make]}. +It is a functor that consumes a {{!Pretty_expressive.Signature.CostFactory} [CostFactory]} module. +Here, [let module P = Printer.Make (val cf)] creates a pretty printer [P] +with the above cost factory. + +We then [let open P] so that combinators like {{!Pretty_expressive.Signature.PrinterT.text}[text]}, {{!Pretty_expressive.Signature.PrinterT.(<>)}[<>]}, and the pretty printing function {{!Pretty_expressive.Signature.PrinterT.pretty_print}[pretty_print]} +are in scope. + +{2 Putting them all together} + +With the above setup, we can pretty-print [d] with the cost factory [cf] by calling [pretty_print d]. +This returns a string that we can then put on screen. + +Let's now actually use the pretty printer. + +{[let () = print_doc 80 |> print_endline]} + +would output: + +{v +while (true) { + f(); + if (done()) exit(); +} +v} + +because the layout fits the page width limit, while having fewer lines. +By contrast: + +{[let () = print_doc 20 |> print_endline]} + +would output: + +{v +while (true) { + f(); + if (done()) + exit(); +} +v} + +because the other layout does not fit the page width limit, +leaving this output layout as the only option. + +{2 Alternative Document Construction} + +There are many ways to construct a document that encodes the same set of layouts. +Some may be easier than the other. + +For example, another way to construct a document for the above example could be: + +{[ + let d = text "while (true) {" <> + nest 4 + (nl <> text "f();" <> nl <> text "if (done())" <> + group (nest 4 (nl <> text "exit();"))) <> + nl <> text "}" +]} + +Here, the {{!Pretty_expressive.Signature.PrinterT.group}[group]} combinator is used. +It creates a choice: whether to collapse {{!Pretty_expressive.Signature.PrinterT.nl}[nl]} +to a single space or not. + +See {!Pretty_expressive.Signature.PrinterT} for the full list of combinators that we provide. +Since combinators are simply regular functions, users may also compose the existing combinators together +to create new user-defined combinators. + +{1 API Reference} + +See the documentation for the module {!pretty_expressive}. + +{1 Explainers} + +In this section, we explain some important concepts. +The full design consideration of {!Pretty_expressive} can be found in {{: https://dl.acm.org/doi/abs/10.1145/3622837 }the paper}. + +{2:bestpractice Best Practice for Document Construction} + +While we can put arbitrary sub-documents as the operands of the choice combinator {{!Pretty_expressive.Signature.PrinterT.(<|>)}[<|>]}, +we should {i share} sub-documents across choices. +This matters because the performance of {!Pretty_expressive} depends on the {i DAG size} of the input document. +Without sharing, the input document would get unfolded into a tree, whose size could be exponentially large +compared to the possible DAG size. + +As an example, say we want to pretty print an S-expression with three possible styles for each list node: +the horizontal style, the vertical style, and the argument list style. That is: + +{v +(a b c d) +v} + +could be printed as itself or + +{v +(a + b + c + d) +v} + +or + +{v +(a b + c + d) +v} + +We can construct a function to convert an S-expression to a document as follows: + +{[ +type sexp = Atom of string | List of sexp list + +let print_sexp (s : sexp) (w : int) = + let cf = Printer.default_cost_factory ~page_width:w () in + let module P = Printer.Make (val cf) in + let open P in + + let acat = fold_doc (fun x y -> x <+> space <+> y) in + + let rec pretty (s : sexp) = + match s with + | Atom s -> text s + | List [] -> lparen <+> rparen + | List [x] -> lparen <+> pretty x <+> rparen + | List (x :: xs) -> + let x_d = pretty x in + let xs_d = List.map pretty xs in + lparen <+> + (acat (x_d :: xs_d) <|> (* the horizontal style *) + vcat (x_d :: xs_d) <|> (* the vertical style *) + (x_d <+> space <+> vcat xs_d)) <+> (* the argument list style *) + rparen + in + pretty_print (pretty s) +]} + +The important point is that we reuse [x_d] and [xs_d] across [<|>]. +Had we written the following code instead, +document construction could take exponential time, and the resulting +document whose DAG size is very large would also cause pretty-printing +to be inefficient. + +{[ + lparen <+> + (acat (pretty x :: List.map pretty xs) <|> (* the horizontal style *) + vcat (pretty x :: List.map pretty xs) <|> (* the vertical style *) + (pretty x <+> space <+> vcat List.map pretty xs)) <+> (* the argument list style *) + rparen +]} + +{2:complimit The Computation Width Limit} + +Unlike other pretty printers, {!Pretty_expressive} needs an additional parameter: +the {i computation width limit}. +Regular users should not need to concern much with this parameter +({{!Pretty_expressive.Printer.default_cost_factory}[default_cost_factory]} will already provide a sensible value for the parameter), +but advanced users who need a fine-grained control may want to adjust this parameter. + +The parameter is used by the pretty printer to bound the computation. +On the flip side, the pretty printer is only guaranteed to return an optimal layout among layout printings that do not {i exceed} the parameter. +As a result, if the parameter is too high, the performance could be negatively impacted, +but if the parameter is too low, the output layout quality could be negatively impacted. + +{!Pretty_expressive} employs various heuristics to make the output layout pretty even when the computation width limit is exceeded, however. +In most applications, the value of {m 1.2 \times w} should suffice, where {m w} is the page width limit. + +{3 Technical Notes} + +A layout printing {i exceeds} the computation width limit when either the column position or indentation level exceeds the limit. For example: + +{[ +text "Racket" +]} + +exceeds the computation width limit of 5, since there are 6 characters in a line. Similarly: + +{[ +nest 6 (text "Rack") +]} + +exceeds the computation width limit of 5, since the indentation level exceeds 5 (even though this indentation level is completely unused). + +When all possible layout printing due to a document exceeds the computation width limit, +{!Pretty_expressive} will still output a layout, with no guarantee that the layout is optimal. +In such case, we say that the output layout is {i tainted}. The {{!Pretty_expressive.Util.info}[info]} record and the {{!Pretty_expressive.Signature.PrinterT.print}[print]} function can be used to find if the output layout is tainted or not. + +{2:factory Cost Factory} + +Pretty printers choose an optimal layout from a document by minimizing an {i optimality objective}. +Unlike other pretty printers, which have built-in optimality objectives, +{!pretty_expressive} allows users to customize an optimality objective via the {i cost factory}. + +The cost factory interface is given in {{!Pretty_expressive.Signature.CostFactory}[CostFactory]}. +A valid cost factory must also satisfy the interface, as well as various contracts specified in +the documentation of {{!Pretty_expressive.Signature.CostFactory}[CostFactory]}. +These contracts ensure that the concept of a cost for a layout is well-defined, +and make it possible to efficiently find a layout with minimal cost. + +See {{!Pretty_expressive.Printer.default_cost_factory}[default_cost_factory]} for an example of a cost factory. + +{3 Custom Cost Factory} + +Consider the example in {!bestpractice}. Each list node can be rendered with three possible styles: +the horizontal style, the vertical style, and the argument list style. + +{[ +# let example_sexp = + List [Atom "abc"; Atom "def"; List [Atom "ghi"; Atom "jkl"; Atom "mno"]];; +val example_sexp : sexp = + List [Atom "abc"; Atom "def"; List [Atom "ghi"; Atom "jkl"; Atom "mno"]] +# print_sexp example_sexp 15 |> print_endline;; +(abc + def + (ghi jkl mno)) +- : unit = () +]} + +Indeed, this is an optimal layout according to {{!Pretty_expressive.Printer.default_cost_factory}[default_cost_factory]}, +because it does not have any badness, and two newlines are minimal. + +However, let’s say that we consider the vertical style to be not as pretty. +The vertical style should still be a possibility however, +since it can help us avoid going over the page width limit and minimize the number of newlines in many situations. +We simply would prefer other styles when all else is equal. In this case, we would prefer the output: + +{v +(abc def + (ghi jkl + mno)) +v} + +To address this issue, we construct a new cost factory that is similar to +{{!Pretty_expressive.Printer.default_cost_factory}[default_cost_factory]}, +but with an extra component: the {i style cost}. + +{[ +let my_cost_factory ~page_width ?computation_width () = + (module struct + type t = int * int * int + let limit = match computation_width with + | None -> (float_of_int page_width) *. 1.2 |> int_of_float + | Some computation_width -> computation_width + + let text pos len = + let stop = pos + len in + if stop > page_width then + let maxwc = max page_width pos in + let a = maxwc - page_width in + let b = stop - maxwc in + (b * (2*a + b), 0, 0) + else + (0, 0, 0) + + let newline _ = (0, 1, 0) + + let combine (o1, h1, s1) (o2, h2, s2) = + (o1 + o2, h1 + h2, s1 + s2) + + let le (o1, h1, s1) (o2, h2, s2) = + if o1 = o2 then + if h1 = h2 then s1 <= s2 + else h1 < h2 + else o1 < o2 + + let debug (o, h, s) = Printf.sprintf "(%d %d %d)" o h s + + end: Signature.CostFactory with type t = int * int * int) +]} + +We now construct a function to convert an S-expression into a document, +and utilize the {{!Pretty_expressive.Signature.PrinterT.val-cost}[cost]} construct +to add a style cost to the vertical style document, thus discouraging it. + +{[ +let revised_print_sexp (s : sexp) (w : int) = + let cf = my_cost_factory ~page_width:w () in + let module P = Printer.Make (val cf) in + let open P in + + let acat = fold_doc (fun x y -> x <+> space <+> y) in + + let rec pretty (s : sexp) = + match s with + | Atom s -> text s + | List [] -> lparen <+> rparen + | List [x] -> lparen <+> pretty x <+> rparen + | List (x :: xs) -> + let x_d = pretty x in + let xs_d = List.map pretty xs in + lparen <+> + (acat (x_d :: xs_d) <|> (* the horizontal style *) + (cost (0, 0, 1) (vcat (x_d :: xs_d))) <|> (* the vertical style -- penalized *) + (x_d <+> space <+> vcat xs_d)) <+> (* the argument list style *) + rparen + in + pretty_print (pretty s) +]} + +Now we can pretty print as we desired: + +{[ +# revised_print_sexp example_sexp 15 |> print_endline;; +(abc def + (ghi jkl + mno)) +- : unit = () +]} + +This does not mean that the vertical style won't ever be used, however. +It is simply discouraged. +With an even lower page width limit, +the vertical style is the only way to avoid overflow, so it is employed. + +{[ +# revised_print_sexp example_sexp 10 |> print_endline;; +(abc + def + (ghi + jkl + mno)) +- : unit = () +]} diff --git a/dune-project b/dune-project new file mode 100644 index 0000000..230f80a --- /dev/null +++ b/dune-project @@ -0,0 +1,24 @@ +(lang dune 3.10) + +(name pretty_expressive) + +(generate_opam_files true) + +(source + (github sorawee/pretty-expressive-ocaml)) + +(authors "Sorawee Porncharoenwase ") + +(maintainers "Sorawee Porncharoenwase ") + +(license ISC) + +(documentation https://sorawee.github.io/pretty-expressive-ocaml/) + +(package + (name pretty_expressive) + (synopsis "A pretty expressive printer") + (description "A pretty printer implementation of 'A Pretty Expressive Printer' (OOPSLA'23), with an emphasis on expressiveness and optimality.") + (depends (ocaml (>= 4.05)) + dune + (alcotest :with-test))) diff --git a/lib/dune b/lib/dune new file mode 100644 index 0000000..206f202 --- /dev/null +++ b/lib/dune @@ -0,0 +1,4 @@ +(library + (public_name pretty_expressive) + (name pretty_expressive) + (modules_without_implementation signature)) diff --git a/lib/printer.ml b/lib/printer.ml new file mode 100644 index 0000000..706997b --- /dev/null +++ b/lib/printer.ml @@ -0,0 +1,364 @@ +(* magic numbers *) +let param_memo_limit = 7 + +type 's treeof = + | One of 's + | Cons of 's treeof * 's treeof + +let tree_flatten (t: 's treeof): 's list = + let rec loop (t: 's treeof) (acc: 's list) = + match t with + | One v -> v :: acc + | Cons (x, y) -> loop x (loop y acc) + in loop t [] + +module Core (C : Signature.CostFactory) = struct + let global_id = ref 0 + let next_id () = + let id = !global_id in + global_id := id + 1; + id + + type measure = { last: int; cost: C.t; layout: string list -> string list } + + let (<==) (m1 : measure) (m2 : measure): bool = + m1.last <= m2.last && C.le m1.cost m2.cost + + type measure_set = + | MeasureSet of measure list (* sorted by last in the decreasing order *) + | Tainted of (unit -> measure) + + type doc = + { dc: doc_case; + id: int; + memo_w: int; + nl_cnt: int; + table: ((int, measure_set) Hashtbl.t) option } + and doc_case = + | Text of string treeof * int + | Newline of string option + | Concat of doc * doc + | Choice of doc * doc + | Nest of int * doc + | Align of doc + | Reset of doc + | Cost of C.t * doc + | Fail + + type cost = C.t + + let init_memo_w = param_memo_limit - 1 + let calc_weight (d : doc) = if d.memo_w = 0 then init_memo_w else d.memo_w - 1 + let init_table (w : int) = if w = 0 then Some (Hashtbl.create 5) else None + + let fail = { dc = Fail; + id = next_id (); + memo_w = init_memo_w; + nl_cnt = 0; + table = None } + + let newline v = + { dc = Newline v; + id = next_id (); + memo_w = init_memo_w; + nl_cnt = 1; + table = None } + + let make_text s l = { dc = Text (s, l); + id = next_id (); + memo_w = init_memo_w; + nl_cnt = 0; + table = None } + + let text s = make_text (One s) (String.length s) + + let rec cost c d = + match d.dc with + | Fail -> fail + | Cost (c2, d) -> cost (C.combine c c2) d + | _ -> + let memo_w = calc_weight d in + { dc = Cost (c, d); + id = next_id (); + memo_w = memo_w; + nl_cnt = d.nl_cnt; + table = init_table memo_w } + + let rec (<>) (d1 : doc) (d2 : doc) = + match (d1.dc, d2.dc) with + | (Fail, _) | (_, Fail) -> fail + | (Text (_, 0), _) -> d2 + | (_, Text (_, 0)) -> d1 + | (Text (s1, l1), Text (s2, l2)) -> make_text (Cons (s1, s2)) (l1 + l2) + | (_, Cost (c, d2)) -> cost c (d1 <> d2) + | (Cost (c, d1), _) -> cost c (d1 <> d2) + | _ -> + let memo_w = min (calc_weight d1) (calc_weight d2) in + { dc = Concat (d1, d2); + id = next_id (); + memo_w = memo_w; + nl_cnt = d1.nl_cnt + d2.nl_cnt; + table = init_table memo_w } + + let rec nest (n : int) (d : doc) = + match d.dc with + | Fail | Align _ | Reset _ | Text _ -> d + | Cost (c, d) -> cost c (nest n d) + | _ -> + let memo_w = calc_weight d in + { dc = Nest (n, d); + id = next_id (); + memo_w = memo_w; + nl_cnt = d.nl_cnt; + table = init_table memo_w } + + let rec reset (d : doc) = + match d.dc with + | Fail | Align _ | Reset _ | Text _ -> d + | Cost (c, d) -> cost c (reset d) + | _ -> + let memo_w = calc_weight d in + { dc = Reset d; + id = next_id (); + memo_w = memo_w; + nl_cnt = d.nl_cnt; + table = init_table memo_w } + + let rec align d = + match d.dc with + | Fail | Align _ | Reset _ | Text _ -> d + | Cost (c, d) -> cost c (align d) + | _ -> + let memo_w = calc_weight d in + { dc = Align d; + id = next_id (); + memo_w = memo_w; + nl_cnt = d.nl_cnt; + table = init_table memo_w } + + let (<|>) d1 d2 = + if d1 == fail then d2 + else if d2 == fail then d1 + else + let memo_w = min (calc_weight d1) (calc_weight d2) in + { dc = Choice (d1, d2); + id = next_id (); + memo_w = memo_w; + nl_cnt = max d1.nl_cnt d2.nl_cnt; + table = init_table memo_w } + + let merge (ml1 : measure_set) (ml2 : measure_set): measure_set = + match (ml1, ml2) with + | (_, Tainted _) -> ml1 + | (Tainted _, _) -> ml2 + | (MeasureSet ms1, MeasureSet ms2) -> + let rec loop ms1 ms2 = match (ms1, ms2) with + | ([], _) -> ms2 + | (_, []) -> ms1 + | (m1 :: ms1p, m2 :: ms2p) -> + if m1 <== m2 then loop ms1 ms2p + else if m2 <== m1 then loop ms1p ms2 + else if m1.last > m2.last then m1 :: loop ms1p ms2 + else (* m2.last < m1.last *) m2 :: loop ms1 ms2p + in MeasureSet (loop ms1 ms2) + + let (++) (m1 : measure) (m2 : measure): measure = + { last = m2.last; + cost = C.combine m1.cost m2.cost; + layout = fun ss -> m1.layout (m2.layout ss) } + + let process_concat + (process_left : measure -> measure_set) + (ml1 : measure_set) = + match ml1 with + | Tainted mt1 -> + Tainted (fun () -> + let m1 = mt1 () in + match process_left m1 with + | Tainted mt2 -> m1 ++ mt2 () + | MeasureSet (m2 :: _) -> m1 ++ m2 + | _ -> failwith "impossible") + | MeasureSet ms1 -> + let do_one (m1 : measure): measure_set = + let rec loop ms2 result current_best = + match ms2 with + | [] -> List.rev (current_best :: result) + | m2 :: ms2 -> + let current = m1 ++ m2 in + if C.le current.cost current_best.cost then loop ms2 result current + else loop ms2 (current_best :: result) current + in match process_left m1 with + | Tainted m2 -> Tainted (fun () -> m1 ++ m2 ()) + | MeasureSet (m2 :: ms2) -> MeasureSet (loop ms2 [] (m1 ++ m2)) + | _ -> failwith "unreachable" in + let rec fold_right (ms: measure list): measure_set = + match ms with + | [] -> failwith "unreachable" + | [m] -> do_one m + | m :: ms -> merge (do_one m) (fold_right ms) + in fold_right ms1 + + let memoize f: doc -> int -> int -> measure_set = + let all_slots = C.limit + 1 in + let rec g ({ memo_w; table; _ } as d) (c : int) (i : int) = + if c <= C.limit && i <= C.limit && memo_w = 0 then + let key = i * all_slots + c in + match table with + | None -> failwith "unreachable" + | Some tbl -> + if Hashtbl.mem tbl key then Hashtbl.find tbl key + else + let ml = f g d c i in + Hashtbl.add tbl key ml; + ml + else f g d c i + in g + + let print ?(init_c = 0) (d : doc): Util.info = + let resolve self { dc; _ } (c : int) (i : int) : measure_set = + let core () = + match dc with + | Text (s, len_s) -> + MeasureSet [{ last = c + len_s; + cost = C.text c len_s; + layout = fun ss -> (tree_flatten s) @ ss }] + | Newline _ -> + MeasureSet [{ last = i; + cost = C.newline i; + layout = fun ss -> "\n" :: String.make i ' ' :: ss }] + | Concat (d1, d2) -> + process_concat (fun (m1 : measure) -> self d2 m1.last i) (self d1 c i) + | Choice (d1, d2) -> + if d1.nl_cnt < d2.nl_cnt then merge (self d2 c i) (self d1 c i) + else merge (self d1 c i) (self d2 c i) + | Nest (n, d) -> self d c (i + n) + | Align d -> self d c c + | Reset d -> self d c 0 + | Cost (co, d) -> + let add_cost (m : measure) = { m with cost = C.combine co m.cost } in + (match self d c i with + | MeasureSet ms -> MeasureSet (List.map add_cost ms) + | Tainted mt -> Tainted (fun () -> add_cost (mt ()))) + | Fail -> failwith "fails to render" + in + let exceeds = match dc with + | Text (_, len) -> (c + len > C.limit) || (i > C.limit) + | _ -> (c > C.limit) || (i > C.limit) in + if exceeds then + Tainted (fun () -> + match core () with + | Tainted mt -> mt () + | MeasureSet (m :: _) -> m + | _ -> failwith "impossible") + else core () in + let (m, is_tainted) = match memoize resolve d init_c 0 with + | MeasureSet (m :: _) -> (m, false) + | Tainted m -> (m (), true) + | _ -> failwith "impossible" in + + (* In Racket, a doc can be printed with many cost factories, *) + (* so the memoization tables should be cleared. *) + (* However, in OCaml, there is no need to do the same, *) + (* since a doc is tied to a cost factory. *) + + { out = String.concat "" (m.layout []); + is_tainted = is_tainted; + cost = C.debug m.cost } + + let pretty_print ?(init_c = 0) (d : doc): string = + (print ~init_c:init_c d).out +end + +(* ----------------------------------------------------------------------0---- *) + +module Make (C : Signature.CostFactory): (Signature.PrinterT with type cost = C.t) = struct + include Core (C) + + (* Constants *) + + let comma = text "," + let lbrack = text "[" + let rbrack = text "]" + let lbrace = text "{" + let rbrace = text "}" + let lparen = text "(" + let rparen = text ")" + let dquote = text "\"" + let empty = text "" + let space = text " " + + let nl = newline (Some " ") + let break = newline (Some "") + let hard_nl = newline None + + let flatten : doc -> doc = + let cache = Hashtbl.create 1000 in + let rec flatten ({ dc = dc; id = id; _ } as d) = + if Hashtbl.mem cache id then + Hashtbl.find cache id + else + let out = match dc with + | Fail | Text _ -> d + | Newline None -> fail + | Newline (Some s) -> text s + | Concat (({ id = a_id; _ } as a), ({ id = b_id; _ } as b)) -> + let { id = a_idp; _ } as ap = flatten a in + let { id = b_idp; _ } as bp = flatten b in + if a_idp = a_id && b_idp = b_id then d else ap <> bp + | Choice (({ id = a_id; _ } as a), ({ id = b_id; _ } as b)) -> + let { id = a_idp; _ } as ap = flatten a in + let { id = b_idp; _ } as bp = flatten b in + if a_idp = a_id && b_idp = b_id then d else ap <|> bp + | Nest (_, d) | Align d | Reset d -> flatten d + | Cost (c, ({ id = id; _ } as d)) -> + let { id = idp; _ } as dp = flatten d in + if idp = id then d else cost c dp + in + Hashtbl.add cache id out; + out + in flatten + + let (<+>) d1 d2 = d1 <> align d2 + let (<$>) d1 d2 = d1 <> hard_nl <> d2 + let group d = d <|> (flatten d) + + let (<->) x y = (flatten x) <+> y + + let fold_doc f ds = + match ds with + | [] -> empty + | x :: xs -> List.fold_left f x xs + + let hcat = fold_doc (<->) + let vcat = fold_doc (<$>) + +end + +let default_cost_factory ~page_width ?computation_width () = + (module struct + type t = int * int + let limit = match computation_width with + | None -> (float_of_int page_width) *. 1.2 |> int_of_float + | Some computation_width -> computation_width + + let text pos len = + let stop = pos + len in + if stop > page_width then + let maxwc = max page_width pos in + let a = maxwc - page_width in + let b = stop - maxwc in + (b * (2*a + b), 0) + else + (0, 0) + + let newline _ = (0, 1) + + let combine (o1, h1) (o2, h2) = + (o1 + o2, h1 + h2) + + let le (o1, h1) (o2, h2) = + if o1 = o2 then h1 <= h2 else o1 < o2 + + let debug (o, h) = Printf.sprintf "(%d %d)" o h + + end: Signature.CostFactory with type t = int * int) diff --git a/lib/printer.mli b/lib/printer.mli new file mode 100644 index 0000000..1e4c1ab --- /dev/null +++ b/lib/printer.mli @@ -0,0 +1,51 @@ +(** This module provides a pretty expressive printer. *) + +module Make(C : Signature.CostFactory): (Signature.PrinterT with type cost = C.t) +(** The pretty printer and doc combinators, parameterized by a cost factory. + See {!Signature.PrinterT} for details. *) + +val default_cost_factory : page_width:int -> ?computation_width:int -> unit -> + (module Signature.CostFactory with type t = int * int) +(** The default cost factory, parameterized by the page width limit [page_width], + and optionally {{!page-index.complimit}the computation width limit} + [computation_width]. + When the computation width limit is not specified, it is set to + [1.2 * page_width]. + + In this cost factory, the cost type [t] is a pair of natural numbers, + where the first component is {i badness}, + which is roughly speaking the sum of squared overflows, + and the second component is the height (number of newlines). + + Internally, [default_cost_factory] is defined as: + + {[ +let default_cost_factory ~page_width ?computation_width () = + (module struct + type t = int * int + let limit = match computation_width with + | None -> (float_of_int page_width) *. 1.2 |> int_of_float + | Some computation_width -> computation_width + + let text pos len = + let stop = pos + len in + if stop > page_width then + let maxwc = max page_width pos in + let a = maxwc - page_width in + let b = stop - maxwc in + (b * (2*a + b), 0) + else + (0, 0) + + let newline _ = (0, 1) + + let combine (o1, h1) (o2, h2) = + (o1 + o2, h1 + h2) + + let le (o1, h1) (o2, h2) = + if o1 = o2 then h1 <= h2 else o1 < o2 + + let debug (o, h) = Printf.sprintf "(%d %d)" o h + + end: Signature.CostFactory with type t = int * int) +]} *) diff --git a/lib/signature.mli b/lib/signature.mli new file mode 100644 index 0000000..1dce12e --- /dev/null +++ b/lib/signature.mli @@ -0,0 +1,368 @@ +(** This module defines types for the pretty printer. *) + +module type CostFactory = +sig + (** The cost factory interface. + + A valid cost factory should also satisfy the following contracts. + + {ul {- [le] is a total ordering.} + {- If [le a b] and [le c d] then [le (combine a c) (combine b d)]} + {- If [a] <= [b], then [le (text a l) (text b l)]} + {- If [a] <= [b], then [le (newline a) (newline b)]} + {- [text c (a + b) = combine (text c a) (text (c + a) b)]} + {- [combine] is associative and has the identity equal to [text 0 0]} + {- [text c 0] = [text 0 0] for any [c]}} + + See {{!Printer.default_cost_factory}[default_cost_factory]}, + {{!page-index.factory}the cost factory section}, + and the paper for examples of cost factories. *) + + type t + (** A type for cost *) + + val text : int -> int -> t + (** [text c l] calculates a cost for a text placement at column position [c] + with length [l] *) + + val newline : int -> t + (** [newline i] calculates a cost for a newline and indentation at level [i] *) + + val combine : t -> t -> t + (** [combine x y] combines the costs [x] and [y] together *) + + val le : t -> t -> bool + (** [le x y] tests if the cost [x] is less than or equal to the cost [y]. *) + + val limit: int + (** [limit] is {{!page-index.complimit}the computation width limit}. *) + + val debug : t -> string + (** [debug c] produces a string representation of a cost [c] *) + +end + +module type PrinterT = +sig + (** A pretty expressive printer. The rest of this section assumes that + the program begins with + + {[ + open Pretty_expressive + + let cf = Printer.default_cost_factory ~page_width:10 () + module P = Printer.Make (val cf) + open P + ]} + *) + + type doc + (** The [doc] type *) + + type cost + (** The [cost] type *) + + (** {2 Text document} *) + + val text : string -> doc + (** [text s] is a document for textual content [s]; + [s] must not contain a newline. + + {5 Examples:} + {[ +# pretty_print (text "Portal") |> print_endline;; +Portal +- : unit = () + ]} *) + + (** {2 Newline documents} *) + + val newline : (string option) -> doc + (** [newline s] is a document for a newline. + When [s] is [None], it {!flatten}s to {!fail}. + When [s] is not [None], it {!flatten}s to [text s]. + See {!flatten} for more details. *) + + val nl : doc + (** [nl] is a document for a newline that {!flatten}s to a single space. *) + + val break : doc + (** [break] is a document for a newline that {!flatten}s to an empty string. *) + + val hard_nl : doc + (** [hard_nl] is a document for a newline that {!fail}s to {!flatten}. *) + + (** {2 Choice document} *) + + val (<|>) : doc -> doc -> doc + (** [a <|> b] is a document for a choice between document [a] and [b]. + + {[ +# let print_doc w = + let cf = Printer.default_cost_factory ~page_width:w () in + let module P = Printer.Make (val cf) in + let open P in + pretty_print (text "Chrono Trigger" <|> + (text "Octopath" <> nl <> text "Traveler")) |> print_endline;; +val print_doc : int -> unit = +# print_doc 10;; +Octopath +Traveler +- : unit = () +# print_doc 15;; +Chrono Trigger +- : unit = () + ]} + + See also {{!page-index.bestpractice}Best Practice for Document Construction} *) + + (** {2 Concatenation document} *) + + val (<>) : doc -> doc -> doc + (** [a <> b] is a document for concatenation of documents [a] and [b] + {i without} alignment. It's also known as the {i unaligned concatenation}, + which is widely used in traditional pretty printers. + + {5 Examples:} + {[ +# let left_doc = text "Splatoon" <> nl <> text "Nier";; +val left_doc : doc = +# let right_doc = text "Automata" <> nl <> text "FEZ";; +val right_doc : doc = +# pretty_print (left_doc <> right_doc) |> print_endline;; +Splatoon +NierAutomata +FEZ +- : unit = () + ]} + + By "without alignment," we mean that the right document is not treated as + as box with a rigid structure. This makes it easy to format code in + C-like languages, whose array expression, function call, and curly braces + should not be rigid. + *) + + (** {2 Indentation documents} *) + + val align : doc -> doc + (** [align d] is a document that aligns [d] at the column position. + + {5 Examples:} + {[ +# pretty_print (left_doc <> align right_doc) |> print_endline;; +Splatoon +NierAutomata + FEZ +- : unit = () + ]} + + The aligned concatenation operator {!(<+>)} is a derived combinator that + composes {!(<>)} and [align] together. It is especially useful for + languages that uses the the box model for code styling. *) + + val nest : int -> doc -> doc + (** [nest n d] is a document that increments the indentation level by [n] + when rendering [d]. + + {5 Examples:} + {[ +# pretty_print (text "when 1 = 2:" <> nest 4 (nl <> text "print 'oh no!'")) + |> print_endline;; +when 1 = 2: + print 'oh no!' +- : unit = () + ]} + + The increment does not affect content on the current line. + In the following example, [when 1 = 2:] is not further indented. + + {[ +# pretty_print (nest 4 (text "when 1 = 2:" <> nl <> text "print 'oh no!'")) + |> print_endline;; +when 1 = 2: + print 'oh no!' +- : unit = () + ]} *) + + val reset : doc -> doc + (** [reset d] is a document that resets indentation level to 0 in [d]. + This is especially useful for formatting multi-line strings and + multi-line comments. + + {5 Examples:} + {[ +# let s_d = reset (text "#< nl <> + text "Zelda" <> nl <> + text "Baba is you" <> nl <> + text "EOF");; +val s_d : doc = +# pretty_print (text "when 1 = 2:" <> nest 4 (nl <> text "print " <> s_d)) + |> print_endline;; +when 1 = 2: + print #< doc -> doc + (** [cost c d] is a document that artificially adds cost [c] to [d]. + + In the below example, we artificially adds overflow to [text "Chrono Trigger"], + making it a non-optimal choice, even though [text "Chrono Trigger"] would have + been the optimal choice had [cost] not been used. + + {5 Examples:} + {[ +# pretty_print (cost (1, 0) (text "CrossCode") <|> + (text "Final" <> nl <> text "Fantasy")) |> print_endline;; +Final +Fantasy +- : unit = () +# pretty_print (text "CrossCode" <|> + (text "Final" <> nl <> text "Fantasy")) |> print_endline;; +CrossCode +- : unit = () + ]} + + [cost] is especially useful in combination with + {{!page-index.factory}a custom cost factory}. + See the section for further details. *) + + (** {2 Failure document} *) + + val fail : doc + (** A document that always fails. It interacts with {!(<|>)}: + failing branches are pruned away. + + {5 Examples:} + {[ +# pretty_print (text "Sea of Stars" <> fail) |> print_endline;; +Exception: Failure "fails to render". +# pretty_print ((text "Sea of Stars" <> fail) <|> text "Hades") |> print_endline;; +Hades +- : unit = () + ]} *) + + (** {2 Pretty printing functions} *) + + val print : ?init_c:int -> doc -> Util.info + (** [print d] prints the document [d] to an [info] record. + The optional [~init_c] can be used to indicate that the printing begins + at a non-zero column position. *) + + val pretty_print : ?init_c:int -> doc -> string + (** [pretty_print d] prints the document [d] to a string. + The optional [~init_c] can be used to indicate that the printing begins + at a non-zero column position. + + {5 Examples:} + {[ +# print_string "Languages: "; + pretty_print (align (text "Racket" <> nl <> + text "OCaml" <> nl <> + text "Pyret")) |> print_endline;; +Languages: Racket +OCaml +Pyret +- : unit = () +# print_string "Languages: "; + pretty_print ~init_c:11 + (align (text "Racket" <> nl <> + text "OCaml" <> nl <> + text "Pyret")) |> print_endline;; +Languages: Racket + OCaml + Pyret +- : unit = () + ]} *) + + (** {2 Other derived combinators} *) + + val flatten : doc -> doc + (** [flatten d] is a document that replaces newlines and indentation spaces + with what's specified in [newline] when rendering [d]. + + {5 Examples:} + {[ +# pretty_print (flatten (text "Fire Emblem" <> nl <> text "Awakening")) + |> print_endline;; +Fire Emblem Awakening +- : unit = () +# pretty_print (flatten (text "Mario + Rabbids" <> break <> text "Kingdom Battle")) + |> print_endline;; +Mario + RabbidsKingdom Battle +- : unit = () +# pretty_print (flatten (text "XCOM 2" <> hard_nl <> text "War of the Chosen")) + |> print_endline;; +Exception: Failure "fails to render". +# pretty_print (flatten (text "Tactics Ogre" <> + newline (Some ": ") <> + text "Reborn")) + |> print_endline;; +Tactics Ogre: Reborn +- : unit = () + ]} *) + + val group : doc -> doc + (** [group d] is a shorthand for [d <|> flatten d]. + This combinator is a part of most traditional pretty printers. *) + + val (<+>) : doc -> doc -> doc + (** [a <+> b] is a shorthand for [a <> align b]. + It is also known as the {i aligned concatenation}. *) + + val (<$>) : doc -> doc -> doc + (** [a <$> b] is a shorthand for [a <> hard_nl <> b]. *) + + val (<->) : doc -> doc -> doc + (** [a <-> b] is a shorthand for [flatten a <+> b]. + This is especially useful when combined with {!hard_nl} and {!(<|>)}: + it can be used when we want to do aligned concatenation, + but don't want the left part to have multiple lines. *) + + val fold_doc : (doc -> doc -> doc) -> doc list -> doc + (** [fold_doc (++) ds] is a shorthand for [d_1 ++ d_2 ++ ... ++ d_n] + where [d_1 d_2 ... d_n] are drawn from [ds]. *) + + val vcat : doc list -> doc + (** [vcat ds] is a shorthand for [d_1 <$> d_2 <$> ... <$> d_n] + where [d_1 d_2 ... d_n] are drawn from [ds]. *) + + val hcat : doc list -> doc + (** [vcat ds] is a shorthand for [d_1 <-> d_2 <-> ... <-> d_n] + where [d_1 d_2 ... d_n] are drawn from [ds]. *) + + val empty : doc + (** Equivalent to [text ""] *) + + val space : doc + (** Equivalent to [text " "] *) + + val comma : doc + (** Equivalent to [text ","] *) + + val lbrack : doc + (** Equivalent to [text "\["] *) + + val rbrack: doc + (** Equivalent to [text "\]"] *) + + val lbrace : doc + (** Equivalent to [text "{"] *) + + val rbrace : doc + (** Equivalent to [text "}"] *) + + val lparen : doc + (** Equivalent to [text "("] *) + + val rparen : doc + (** Equivalent to [text ")"] *) + + val dquote : doc + (** Equivalent to [text "\""] *) +end diff --git a/lib/util.ml b/lib/util.ml new file mode 100644 index 0000000..4201047 --- /dev/null +++ b/lib/util.ml @@ -0,0 +1,5 @@ +type info = { + out : string; + is_tainted : bool; + cost : string +} diff --git a/lib/util.mli b/lib/util.mli new file mode 100644 index 0000000..f1f345b --- /dev/null +++ b/lib/util.mli @@ -0,0 +1,11 @@ +(** This module provides utilities. *) + +type info = { + out : string; + (** Output of the pretty printer *) + is_tainted : bool; + (** Taintedness status *) + cost : string + (** Cost of the output layout *) +} +(** An [info] record, returned from the pretty printer. *) diff --git a/pretty_expressive.opam b/pretty_expressive.opam new file mode 100644 index 0000000..c8a36e5 --- /dev/null +++ b/pretty_expressive.opam @@ -0,0 +1,32 @@ +# This file is generated by dune, edit dune-project instead +opam-version: "2.0" +synopsis: "A pretty expressive printer" +description: + "A pretty printer implementation of 'A Pretty Expressive Printer' (OOPSLA'23), with an emphasis on expressiveness and optimality." +maintainer: ["Sorawee Porncharoenwase "] +authors: ["Sorawee Porncharoenwase "] +license: "ISC" +homepage: "https://github.com/sorawee/pretty-expressive-ocaml" +doc: "https://sorawee.github.io/pretty-expressive-ocaml/" +bug-reports: "https://github.com/sorawee/pretty-expressive-ocaml/issues" +depends: [ + "ocaml" {>= "4.05"} + "dune" {>= "3.10"} + "alcotest" {with-test} + "odoc" {with-doc} +] +build: [ + ["dune" "subst"] {dev} + [ + "dune" + "build" + "-p" + name + "-j" + jobs + "@install" + "@runtest" {with-test} + "@doc" {with-doc} + ] +] +dev-repo: "git+https://github.com/sorawee/pretty-expressive-ocaml.git" diff --git a/test/dune b/test/dune new file mode 100644 index 0000000..a6a2382 --- /dev/null +++ b/test/dune @@ -0,0 +1,3 @@ +(test + (name pretty_expressive) + (libraries pretty_expressive alcotest)) diff --git a/test/pretty_expressive.ml b/test/pretty_expressive.ml new file mode 100644 index 0000000..4456dbf --- /dev/null +++ b/test/pretty_expressive.ml @@ -0,0 +1,137 @@ +open Pretty_expressive + +let print_doc_choice (w : int) = + let cf = Printer.default_cost_factory ~page_width:w () in + let module P = Printer.Make (val cf) in + let open P in + + let d = text "while (true) {" <> + nest 4 + (nl <> text "f();" <> nl <> text "if (done())" <> + (let exit_d = text "exit();" in + (space <> exit_d) <|> nest 4 (nl <> exit_d))) <> + nl <> text "}" + in + pretty_print d + +let print_doc_group (w : int) = + let cf = Printer.default_cost_factory ~page_width:w () in + let module P = Printer.Make (val cf) in + let open P in + + let d = text "while (true) {" <> + nest 4 + (nl <> text "f();" <> nl <> text "if (done())" <> + group (nest 4 (nl <> text "exit();"))) <> + nl <> text "}" + in + pretty_print d + +let test_choice_doc_80 () = + Alcotest.(check string) "same string" + (String.concat "\n" + [ "while (true) {" + ; " f();" + ; " if (done()) exit();" + ; "}" + ]) + (print_doc_choice 80) + +let test_choice_doc_20 () = + Alcotest.(check string) "same string" + (String.concat "\n" + [ "while (true) {" + ; " f();" + ; " if (done())" + ; " exit();" + ; "}" + ]) + (print_doc_choice 20) + +let test_group_doc_80 () = + Alcotest.(check string) "same string" + (String.concat "\n" + [ "while (true) {" + ; " f();" + ; " if (done()) exit();" + ; "}" + ]) + (print_doc_group 80) + +let test_group_doc_20 () = + Alcotest.(check string) "same string" + (String.concat "\n" + [ "while (true) {" + ; " f();" + ; " if (done())" + ; " exit();" + ; "}" + ]) + (print_doc_group 20) + +(******************************************************************************) + +type sexp = Atom of string | List of sexp list + +let print_sexp (s : sexp) (w : int) = + let cf = Printer.default_cost_factory ~page_width:w () in + let module P = Printer.Make (val cf) in + let open P in + + let acat = fold_doc (fun x y -> x <+> space <+> y) in + + let rec pretty (s : sexp) = + match s with + | Atom s -> text s + | List [] -> lparen <+> rparen + | List [x] -> lparen <+> pretty x <+> rparen + | List (x :: xs) -> + let x_d = pretty x in + let xs_d = List.map pretty xs in + lparen <+> + (acat (x_d :: xs_d) <|> (* the horizontal style *) + vcat (x_d :: xs_d) <|> (* the vertical style *) + (x_d <+> space <+> vcat xs_d)) <+> (* the argument list style *) + rparen + in + pretty_print (pretty s) + +let example_sexp = List [Atom "a"; Atom "b"; Atom "c"; Atom "d"] + +let test_sexp_4 () = + Alcotest.(check string) "same string" + (String.concat "\n" + [ "(a" + ; " b" + ; " c" + ; " d)" + ]) + (print_sexp example_sexp 4) + +let test_sexp_6 () = + Alcotest.(check string) "same string" + (String.concat "\n" + [ "(a b" + ; " c" + ; " d)" + ]) + (print_sexp example_sexp 6) + +let test_sexp_10 () = + Alcotest.(check string) "same string" + (String.concat "\n" + [ "(a b c d)" ]) + (print_sexp example_sexp 10) + +let suite = + [ "choice; w = 80", `Quick, test_choice_doc_80 + ; "choice; w = 20", `Quick, test_choice_doc_20 + ; "group; w = 80", `Quick, test_group_doc_80 + ; "group; w = 20", `Quick, test_group_doc_20 + ; "sexp; w = 4", `Quick, test_sexp_4 + ; "sexp; w = 6", `Quick, test_sexp_6 + ; "sexp; w = 10", `Quick, test_sexp_10 + ] + +let () = + Alcotest.run "pretty expressive" [ "example doc", suite ]