diff --git a/CHANGES.md b/CHANGES.md index 666c1bd37..9b97125ee 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -15,6 +15,9 @@ ## Features +- Compatibility with Odoc 2.3.0, with support for the introduced syntax: tables, + and "codeblock output" (#1184) + - Display text of references in doc strings (#1166) - Add mark/remove unused actions for open, types, for loop indexes, modules, diff --git a/dune-project b/dune-project index 7b59875c6..34c04542f 100644 --- a/dune-project +++ b/dune-project @@ -56,7 +56,8 @@ possible and does not make any assumptions about IO. ordering dune-build-info spawn - (odoc-parser (and (>= 2.0.0) (< 2.3.0))) + astring + camlp-streams (ppx_expect (and (>= v0.15.0) :with-test)) (ocamlformat (and :with-test (= 0.24.1))) (ocamlc-loc (>= 3.7.0)) diff --git a/flake.nix b/flake.nix index a19027481..c1ca7f14e 100644 --- a/flake.nix +++ b/flake.nix @@ -92,7 +92,8 @@ ppx_yojson_conv_lib uutf lsp - odoc-parser + astring + camlp-streams merlin-lib ]; doCheck = false; @@ -110,7 +111,8 @@ duneVersion = "3"; buildInputs = with pkgs.ocamlPackages; [ ocamlc-loc - odoc-parser + astring + camlp-streams dune-build-info re dune-rpc diff --git a/ocaml-lsp-server.opam b/ocaml-lsp-server.opam index 3288109c9..411352105 100644 --- a/ocaml-lsp-server.opam +++ b/ocaml-lsp-server.opam @@ -32,7 +32,8 @@ depends: [ "ordering" "dune-build-info" "spawn" - "odoc-parser" {>= "2.0.0" & < "2.3.0"} + "astring" + "camlp-streams" "ppx_expect" {>= "v0.15.0" & with-test} "ocamlformat" {with-test & = "0.24.1"} "ocamlc-loc" {>= "3.7.0"} diff --git a/ocaml-lsp-server/src/doc_to_md.ml b/ocaml-lsp-server/src/doc_to_md.ml index 93bb192a7..774dd4b9c 100644 --- a/ocaml-lsp-server/src/doc_to_md.ml +++ b/ocaml-lsp-server/src/doc_to_md.ml @@ -106,6 +106,41 @@ let rec nestable_block_element_to_block let paragraph = Block.Paragraph.make inline in let meta = loc_to_meta location in Block.Paragraph (paragraph, meta) + | { value = `Table ((grid, alignment), _); location } -> + let meta = loc_to_meta location in + let cell + ((c, _) : Odoc_parser.Ast.nestable_block_element Odoc_parser.Ast.cell) = + let c = nestable_block_element_list_to_inlines c in + (c, (" ", " ") (* Initial and trailing blanks *)) + in + let header_row + (row : Odoc_parser.Ast.nestable_block_element Odoc_parser.Ast.row) = + let row = List.map ~f:cell row in + ((`Header row, Meta.none), "") + in + let data_row + (row : Odoc_parser.Ast.nestable_block_element Odoc_parser.Ast.row) = + let row = List.map ~f:cell row in + ((`Data row, Meta.none), "") + in + let alignment_row = + match alignment with + | None -> [] + | Some alignment -> + let alignment = + List.map + ~f:(fun x -> ((x, 1 (* nb of separator *)), Meta.none)) + alignment + in + [ ((`Sep alignment, Meta.none), "") ] + in + let rows = + match grid with + | [] -> assert false + | h :: t -> (header_row h :: alignment_row) @ List.map ~f:data_row t + in + let tbl = Block.Table.make rows in + Block.Ext_table (tbl, meta) | { value = `List (kind, style, xs); location } -> let type' = match kind with @@ -140,19 +175,31 @@ let rec nestable_block_element_to_block let l = Block.List'.make ~tight type' list_items in let meta = loc_to_meta location in Block.List (l, meta) - | { value = `Code_block (metadata, { value = code; location = code_loc }) + | { value = + `Code_block + { meta = metadata + ; delimiter = _ + ; content = { value = code; location = code_loc } + ; output + } ; location } -> let info_string = match metadata with | None -> Some ("ocaml", loc_to_meta code_loc) - | Some ({ value = lang; location = lang_log }, _env) -> + | Some { language = { value = lang; location = lang_log }; tags = _ } -> Some (lang, loc_to_meta lang_log) in let block_line = Block_line.list_of_string code in let code_block = Block.Code_block.make ?info_string block_line in let meta = loc_to_meta location in - Block.Code_block (code_block, meta) + let main_block = Block.Code_block (code_block, meta) in + let output_block = + match output with + | None -> [] + | Some output -> [ nestable_block_element_list_to_block output ] + in + Block.Blocks (main_block :: output_block, meta) | { value = `Verbatim code; location } -> let info_string = Some ("verb", Meta.none) in let block_line = Block_line.list_of_string code in @@ -165,6 +212,68 @@ let rec nestable_block_element_to_block let meta = loc_to_meta location in Block.Ext_math_block (code_block, meta) +and nestable_block_element_to_inlines + (nestable : + Odoc_parser.Ast.nestable_block_element Odoc_parser.Loc.with_location) = + match nestable with + | { value = `Paragraph text; location = _ } -> + inline_element_list_to_inlines text + | { value = `Table ((grid, _), _); location } -> + let meta = loc_to_meta location in + let cell + ((c, _) : Odoc_parser.Ast.nestable_block_element Odoc_parser.Ast.cell) = + nestable_block_element_list_to_inlines c + in + let row (row : Odoc_parser.Ast.nestable_block_element Odoc_parser.Ast.row) = + let sep = Inline.Text (" | ", Meta.none) in + sep :: List.concat_map ~f:(fun c -> [ cell c; sep ]) row + in + let rows = List.concat_map ~f:row grid in + Inline.Inlines (rows, meta) + | { value = `List (_, _, xs); location } -> + let meta = loc_to_meta location in + let item i = nestable_block_element_list_to_inlines i in + let items = + let sep = Inline.Text (" - ", Meta.none) in + List.concat_map ~f:(fun i -> [ sep; item i ]) xs + in + Inline.Inlines (items, meta) + | { value = `Modules modules; location } -> + let meta = loc_to_meta location in + let s = List.map ~f:(fun x -> x.Odoc_parser.Loc.value) modules in + Inline.Text ("modules: " ^ String.concat ~sep:" " s, meta) + | { value = + `Code_block + { meta = _ + ; delimiter = _ + ; content = { value = code; location = code_loc } + ; output = _ + } + ; location + } -> + let meta = loc_to_meta location in + let meta_code = loc_to_meta code_loc in + let code_span = + Inline.Code_span.make ~backtick_count:1 [ ("", (code, meta_code)) ] + in + Inline.Code_span (code_span, meta) + | { value = `Verbatim code; location } -> + let meta = loc_to_meta location in + let code_span = + Inline.Code_span.make ~backtick_count:1 [ ("", (code, Meta.none)) ] + in + Inline.Code_span (code_span, meta) + | { value = `Math_block code; location } -> + let meta = loc_to_meta location in + let code_span = + Inline.Math_span.make ~display:true [ ("", (code, Meta.none)) ] + in + Inline.Ext_math_span (code_span, meta) + +and nestable_block_element_list_to_inlines l = + let inlines = List.map ~f:nestable_block_element_to_inlines l in + Inline.Inlines (inlines, Meta.none) + and nestable_block_element_list_to_block nestables = let blocks = List.map ~f:nestable_block_element_to_block nestables in Block.Blocks (blocks, Meta.none) @@ -261,6 +370,7 @@ let tag_to_block ~meta (tag : Odoc_parser.Ast.tag) = | `Inline -> format_tag_empty "@inline" | `Open -> format_tag_empty "@open" | `Closed -> format_tag_empty "@closed" + | `Hidden -> format_tag_empty "@hidden" let rec block_element_to_block (block_element : @@ -280,6 +390,7 @@ let rec block_element_to_block | `Modules _ | `Code_block _ | `Verbatim _ + | `Table _ | `Math_block _ ) ; location = _ } as nestable -> nestable_block_element_to_block nestable diff --git a/ocaml-lsp-server/src/dune b/ocaml-lsp-server/src/dune index c62a1507e..eb776adbb 100644 --- a/ocaml-lsp-server/src/dune +++ b/ocaml-lsp-server/src/dune @@ -25,7 +25,7 @@ merlin-lib.utils merlin-lib.extend cmarkit - odoc-parser + odoc_parser ppx_yojson_conv_lib re stdune diff --git a/ocaml-lsp-server/test/e2e-new/doc_to_md.ml b/ocaml-lsp-server/test/e2e-new/doc_to_md.ml index 6ac01860f..2aa4632f2 100644 --- a/ocaml-lsp-server/test/e2e-new/doc_to_md.ml +++ b/ocaml-lsp-server/test/e2e-new/doc_to_md.ml @@ -15,3 +15,44 @@ let%expect_test "subscript" = translate doc |> print_doc; [%expect {| a\_{b} |}] + +let%expect_test "table" = + let doc = + {| {table {tr {td some content} {td some other content}} {tr {td in another} {td row}}} |} + in + + translate doc |> print_doc; + [%expect + {| + | some content | some other content | + | in another | row | |}] + +let%expect_test "table2" = + let doc = {| +{t | z | f | + |:-----|---:| + | fse | e | } + |} in + + translate doc |> print_doc; + [%expect {| + | z | f | + |:-|-:| + | fse | e | |}] + +let%expect_test "problematic_translation" = + let doc = {| {table {tr {td {ul {li first item} {li second item}}}} } |} in + + translate doc |> print_doc; + [%expect {| + | - first item - second item | |}] + +let%expect_test "code_with_output" = + let doc = {| {@ocaml[foo][output {b foo}]} |} in + + translate doc |> print_doc; + [%expect {| + ```ocaml + foo + ``` + output **foo** |}] diff --git a/vendor/odoc-parser/LICENSE b/vendor/odoc-parser/LICENSE new file mode 100644 index 000000000..039fd7bf3 --- /dev/null +++ b/vendor/odoc-parser/LICENSE @@ -0,0 +1,267 @@ +Copyright (c) 2016 Thomas Refis +Copyright (c) 2014, 2015 Leo White +Copyright (c) 2015 David Sheets + +Permission to use, copy, modify, and distribute this software for any +purpose with or without fee is hereby granted, provided that the above +copyright notice and this permission notice appear in all copies. + +THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + + +# Licenses for the support files used by the generated HTML + +## src/html_support_files/highlight.pack.js + +BSD 3-Clause License + +Copyright (c) 2006, Ivan Sagalaev. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +## src/html_support_files/katex.min.js, katex.min.css and fonts/KaTeX_* + +The MIT License (MIT) + +Copyright (c) 2013-2020 Khan Academy and other contributors + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +## src/html_support_files/fonts/fira-mono-* and fonts/fira-sans-* + +Digitized data copyright (c) 2012-2015, The Mozilla Foundation and Telefonica S.A. + +This Font Software is licensed under the SIL Open Font License, Version 1.1. +This license is copied below, and is also available with a FAQ at: +http://scripts.sil.org/OFL + + +----------------------------------------------------------- +SIL OPEN FONT LICENSE Version 1.1 - 26 February 2007 +----------------------------------------------------------- + +PREAMBLE +The goals of the Open Font License (OFL) are to stimulate worldwide +development of collaborative font projects, to support the font creation +efforts of academic and linguistic communities, and to provide a free and +open framework in which fonts may be shared and improved in partnership +with others. + +The OFL allows the licensed fonts to be used, studied, modified and +redistributed freely as long as they are not sold by themselves. The +fonts, including any derivative works, can be bundled, embedded, +redistributed and/or sold with any software provided that any reserved +names are not used by derivative works. The fonts and derivatives, +however, cannot be released under any other type of license. The +requirement for fonts to remain under this license does not apply +to any document created using the fonts or their derivatives. + +DEFINITIONS +"Font Software" refers to the set of files released by the Copyright +Holder(s) under this license and clearly marked as such. This may +include source files, build scripts and documentation. + +"Reserved Font Name" refers to any names specified as such after the +copyright statement(s). + +"Original Version" refers to the collection of Font Software components as +distributed by the Copyright Holder(s). + +"Modified Version" refers to any derivative made by adding to, deleting, +or substituting -- in part or in whole -- any of the components of the +Original Version, by changing formats or by porting the Font Software to a +new environment. + +"Author" refers to any designer, engineer, programmer, technical +writer or other person who contributed to the Font Software. + +PERMISSION & CONDITIONS +Permission is hereby granted, free of charge, to any person obtaining +a copy of the Font Software, to use, study, copy, merge, embed, modify, +redistribute, and sell modified and unmodified copies of the Font +Software, subject to the following conditions: + +1) Neither the Font Software nor any of its individual components, +in Original or Modified Versions, may be sold by itself. + +2) Original or Modified Versions of the Font Software may be bundled, +redistributed and/or sold with any software, provided that each copy +contains the above copyright notice and this license. These can be +included either as stand-alone text files, human-readable headers or +in the appropriate machine-readable metadata fields within text or +binary files as long as those fields can be easily viewed by the user. + +3) No Modified Version of the Font Software may use the Reserved Font +Name(s) unless explicit written permission is granted by the corresponding +Copyright Holder. This restriction only applies to the primary font name as +presented to the users. + +4) The name(s) of the Copyright Holder(s) or the Author(s) of the Font +Software shall not be used to promote, endorse or advertise any +Modified Version, except to acknowledge the contribution(s) of the +Copyright Holder(s) and the Author(s) or with their explicit written +permission. + +5) The Font Software, modified or unmodified, in part or in whole, +must be distributed entirely under this license, and must not be +distributed under any other license. The requirement for fonts to +remain under this license does not apply to any document created +using the Font Software. + +TERMINATION +This license becomes null and void if any of the above conditions are +not met. + +DISCLAIMER +THE FONT SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO ANY WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT +OF COPYRIGHT, PATENT, TRADEMARK, OR OTHER RIGHT. IN NO EVENT SHALL THE +COPYRIGHT HOLDER BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +INCLUDING ANY GENERAL, SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL +DAMAGES, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF THE USE OR INABILITY TO USE THE FONT SOFTWARE OR FROM +OTHER DEALINGS IN THE FONT SOFTWARE. + +## src/html_support_files/fonts/noticia-* + +Copyright (c) 2011, JM Sole (http://jmsole.cl|info@jmsole.cl), +with Reserved Font Name "Noticia Text". + +This Font Software is licensed under the SIL Open Font License, Version 1.1. +This license is copied below, and is also available with a FAQ at: +http://scripts.sil.org/OFL + + +----------------------------------------------------------- +SIL OPEN FONT LICENSE Version 1.1 - 26 February 2007 +----------------------------------------------------------- + +PREAMBLE +The goals of the Open Font License (OFL) are to stimulate worldwide +development of collaborative font projects, to support the font creation +efforts of academic and linguistic communities, and to provide a free and +open framework in which fonts may be shared and improved in partnership +with others. + +The OFL allows the licensed fonts to be used, studied, modified and +redistributed freely as long as they are not sold by themselves. The +fonts, including any derivative works, can be bundled, embedded, +redistributed and/or sold with any software provided that any reserved +names are not used by derivative works. The fonts and derivatives, +however, cannot be released under any other type of license. The +requirement for fonts to remain under this license does not apply +to any document created using the fonts or their derivatives. + +DEFINITIONS +"Font Software" refers to the set of files released by the Copyright +Holder(s) under this license and clearly marked as such. This may +include source files, build scripts and documentation. + +"Reserved Font Name" refers to any names specified as such after the +copyright statement(s). + +"Original Version" refers to the collection of Font Software components as +distributed by the Copyright Holder(s). + +"Modified Version" refers to any derivative made by adding to, deleting, +or substituting -- in part or in whole -- any of the components of the +Original Version, by changing formats or by porting the Font Software to a +new environment. + +"Author" refers to any designer, engineer, programmer, technical +writer or other person who contributed to the Font Software. + +PERMISSION & CONDITIONS +Permission is hereby granted, free of charge, to any person obtaining +a copy of the Font Software, to use, study, copy, merge, embed, modify, +redistribute, and sell modified and unmodified copies of the Font +Software, subject to the following conditions: + +1) Neither the Font Software nor any of its individual components, +in Original or Modified Versions, may be sold by itself. + +2) Original or Modified Versions of the Font Software may be bundled, +redistributed and/or sold with any software, provided that each copy +contains the above copyright notice and this license. These can be +included either as stand-alone text files, human-readable headers or +in the appropriate machine-readable metadata fields within text or +binary files as long as those fields can be easily viewed by the user. + +3) No Modified Version of the Font Software may use the Reserved Font +Name(s) unless explicit written permission is granted by the corresponding +Copyright Holder. This restriction only applies to the primary font name as +presented to the users. + +4) The name(s) of the Copyright Holder(s) or the Author(s) of the Font +Software shall not be used to promote, endorse or advertise any +Modified Version, except to acknowledge the contribution(s) of the +Copyright Holder(s) and the Author(s) or with their explicit written +permission. + +5) The Font Software, modified or unmodified, in part or in whole, +must be distributed entirely under this license, and must not be +distributed under any other license. The requirement for fonts to +remain under this license does not apply to any document created +using the Font Software. + +TERMINATION +This license becomes null and void if any of the above conditions are +not met. + +DISCLAIMER +THE FONT SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO ANY WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT +OF COPYRIGHT, PATENT, TRADEMARK, OR OTHER RIGHT. IN NO EVENT SHALL THE +COPYRIGHT HOLDER BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +INCLUDING ANY GENERAL, SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL +DAMAGES, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +FROM, OUT OF THE USE OR INABILITY TO USE THE FONT SOFTWARE OR FROM +OTHER DEALINGS IN THE FONT SOFTWARE. diff --git a/vendor/odoc-parser/src/ast.ml b/vendor/odoc-parser/src/ast.ml new file mode 100644 index 000000000..85c38931f --- /dev/null +++ b/vendor/odoc-parser/src/ast.ml @@ -0,0 +1,95 @@ +(** Abstract syntax tree representing ocamldoc comments *) + +(** This is a syntactic representation of ocamldoc comments. See + {{:https://ocaml.org/releases/4.12/htmlman/ocamldoc.html}The manual} for a detailed + description of the syntax understood. Note that there is no attempt at semantic + analysis, and hence these types are capable of representing values that will + be rejected by further stages, for example, invalid references or headings that + are out of range. *) + +type 'a with_location = 'a Loc.with_location +type style = [ `Bold | `Italic | `Emphasis | `Superscript | `Subscript ] +type alignment = [ `Left | `Center | `Right ] + +type reference_kind = [ `Simple | `With_text ] +(** References in doc comments can be of two kinds: [{!simple}] or [{{!ref}With text}]. *) + +type inline_element = + [ `Space of string + | `Word of string + | `Code_span of string + | `Raw_markup of string option * string + | `Styled of style * inline_element with_location list + | `Reference of + reference_kind * string with_location * inline_element with_location list + | `Link of string * inline_element with_location list + | `Math_span of string (** @since 2.0.0 *) ] +(** Inline elements are equivalent to what would be found in a [span] in HTML. + Mostly these are straightforward. The [`Reference] constructor takes a triple + whose second element is the reference itself, and the third the replacement + text. Similarly the [`Link] constructor has the link itself as first parameter + and the second is the replacement text. *) + +type 'a cell = 'a with_location list * [ `Header | `Data ] +type 'a row = 'a cell list +type 'a grid = 'a row list +type 'a abstract_table = 'a grid * alignment option list option + +type code_block_meta = { + language : string with_location; + tags : string with_location option; +} + +type code_block = { + meta : code_block_meta option; + delimiter : string option; + content : string with_location; + output : nestable_block_element with_location list option; +} + +and nestable_block_element = + [ `Paragraph of inline_element with_location list + | `Code_block of code_block + | `Verbatim of string + | `Modules of string with_location list + | `List of + [ `Unordered | `Ordered ] + * [ `Light | `Heavy ] + * nestable_block_element with_location list list + | `Table of table + | `Math_block of string (** @since 2.0.0 *) ] +(** Some block elements may be nested within lists or tags, but not all. + The [`List] constructor has a parameter of type [\[`Light | `Heavy\]]. + This corresponds to the syntactic constructor used (see the + {{:https://ocaml.org/releases/4.12/htmlman/ocamldoc.html#sss:ocamldoc-list}manual}). + *) + +and table = nestable_block_element abstract_table * [ `Light | `Heavy ] + +type internal_tag = + [ `Canonical of string with_location | `Inline | `Open | `Closed | `Hidden ] +(** Internal tags are used to exercise fine control over the output of odoc. They + are never rendered in the output *) + +type ocamldoc_tag = + [ `Author of string + | `Deprecated of nestable_block_element with_location list + | `Param of string * nestable_block_element with_location list + | `Raise of string * nestable_block_element with_location list + | `Return of nestable_block_element with_location list + | `See of + [ `Url | `File | `Document ] + * string + * nestable_block_element with_location list + | `Since of string + | `Before of string * nestable_block_element with_location list + | `Version of string ] +(** ocamldoc tags are those that are specified in the {{:https://ocaml.org/releases/4.12/htmlman/ocamldoc.html#ss:ocamldoc-tags}manual}) *) + +type tag = [ ocamldoc_tag | internal_tag ] +type heading = int * string option * inline_element with_location list + +type block_element = + [ nestable_block_element | `Heading of heading | `Tag of tag ] + +type t = block_element with_location list diff --git a/vendor/odoc-parser/src/compat.ml b/vendor/odoc-parser/src/compat.ml new file mode 100644 index 000000000..a7b535d18 --- /dev/null +++ b/vendor/odoc-parser/src/compat.ml @@ -0,0 +1,32 @@ +module Option = struct + type 'a t = 'a option = None | Some of 'a + + let is_some = function None -> false | Some _ -> true + let value ~default = function None -> default | Some x -> x + + let join_list l = + let rec loop acc = function + | [] -> Some (List.rev acc) + | Some a :: q -> loop (a :: acc) q + | None :: _ -> None + in + loop [] l +end + +module Char = struct + include Char + + let equal (x : char) y = x = y +end + +module String = struct + include String + + let for_all f str = + let rec aux i = + if i >= String.length str then true + else if f (String.get str i) then aux (i + 1) + else false + in + aux 0 +end diff --git a/vendor/odoc-parser/src/compat.mli b/vendor/odoc-parser/src/compat.mli new file mode 100644 index 000000000..0959145c2 --- /dev/null +++ b/vendor/odoc-parser/src/compat.mli @@ -0,0 +1,26 @@ +(** @since 4.08 *) +module Option : sig + type 'a t = 'a option = None | Some of 'a + + val is_some : 'a option -> bool + (** [is_some o] is [true] if and only if [o] is [Some o]. *) + + val value : default:'a -> 'a option -> 'a + val join_list : 'a option list -> 'a list option +end + +module Char : sig + include module type of Char + + val equal : t -> t -> bool + (** The equal function for chars. + @since 4.03.0 *) +end + +module String : sig + include module type of String + + val for_all : (char -> bool) -> string -> bool + (** [for_all p s] checks if all characters in [s] satisfy the preficate [p]. + @since 4.13.0 *) +end diff --git a/vendor/odoc-parser/src/dune b/vendor/odoc-parser/src/dune new file mode 100644 index 000000000..b72423c26 --- /dev/null +++ b/vendor/odoc-parser/src/dune @@ -0,0 +1,5 @@ +(ocamllex lexer) + +(library + (name odoc_parser) + (libraries astring camlp-streams)) diff --git a/vendor/odoc-parser/src/lexer.mli b/vendor/odoc-parser/src/lexer.mli new file mode 100644 index 000000000..ce053b495 --- /dev/null +++ b/vendor/odoc-parser/src/lexer.mli @@ -0,0 +1,10 @@ +(* Internal module, not exposed *) + +type input = { + file : string; + offset_to_location : int -> Loc.point; + warnings : Warning.t list ref; + lexbuf : Lexing.lexbuf; +} + +val token : input -> Lexing.lexbuf -> Token.t Loc.with_location diff --git a/vendor/odoc-parser/src/lexer.mll b/vendor/odoc-parser/src/lexer.mll new file mode 100644 index 000000000..0cde0b434 --- /dev/null +++ b/vendor/odoc-parser/src/lexer.mll @@ -0,0 +1,762 @@ +{ + +let unescape_word : string -> string = fun s -> + (* The common case is that there are no escape sequences. *) + match String.index s '\\' with + | exception Not_found -> s + | _ -> + let buffer = Buffer.create (String.length s) in + let rec scan_word index = + if index >= String.length s then + () + else + let c = s.[index] in + let c, increment = + match c with + | '\\' -> + if index + 1 < String.length s then + match s.[index + 1] with + | '{' | '}' | '[' | ']' | '@' as c -> c, 2 + | _ -> c, 1 + else c, 1 + | _ -> c, 1 + in + Buffer.add_char buffer c; + scan_word (index + increment) + in + scan_word 0; + Buffer.contents buffer + +type math_kind = + Inline | Block + +let math_constr kind x = + match kind with + | Inline -> `Math_span x + | Block -> `Math_block x + +(* This is used for code and verbatim blocks. It can be done with a regular + expression, but the regexp gets quite ugly, so a function is easier to + understand. *) +let trim_leading_blank_lines : string -> string = fun s -> + let rec scan_for_last_newline : int -> int -> int = + fun index trim_until -> + if index >= String.length s then + String.length s + else + match s.[index] with + | ' ' | '\t' | '\r' -> scan_for_last_newline (index + 1) trim_until + | '\n' -> scan_for_last_newline (index + 1) (index + 1) + | _ -> trim_until + in + let trim_until = scan_for_last_newline 0 0 in + String.sub s trim_until (String.length s - trim_until) + +let trim_trailing_blank_lines : string -> string = fun s -> + let rec scan_for_last_newline : int -> int option -> int option = + fun index trim_from -> + if index < 0 then + Some 0 + else + match s.[index] with + | ' ' | '\t' | '\r' -> scan_for_last_newline (index - 1) trim_from + | '\n' -> scan_for_last_newline (index - 1) (Some index) + | _ -> trim_from + in + let last = String.length s - 1 in + match scan_for_last_newline last None with + | None -> + s + | Some trim_from -> + let trim_from = + if trim_from > 0 && s.[trim_from - 1] = '\r' then + trim_from - 1 + else + trim_from + in + String.sub s 0 trim_from + +(** Returns [None] for an empty, [Some ident] for an indented line. *) +let trim_leading_whitespace : first_line_offset:int -> string -> string = + fun ~first_line_offset s -> + let count_leading_whitespace line = + let rec count_leading_whitespace' index len = + if index = len then None + else + match line.[index] with + | ' ' | '\t' -> count_leading_whitespace' (index + 1) len + | _ -> Some index + in + let len = String.length line in + (* '\r' may remain because we only split on '\n' below. This is important + for the first line, which would be considered not empty without this check. *) + let len = if len > 0 && line.[len - 1] = '\r' then len - 1 else len in + count_leading_whitespace' 0 len + in + + let lines = Astring.String.cuts ~sep:"\n" s in + + let least_amount_of_whitespace = + List.fold_left (fun least_so_far line -> + match (count_leading_whitespace line, least_so_far) with + | (Some _ as n', None) -> n' + | (Some n as n', Some least) when n < least -> n' + | _ -> least_so_far) + in + + let first_line_max_drop, least_amount_of_whitespace = + match lines with + | [] -> 0, None + | first_line :: tl -> + begin match count_leading_whitespace first_line with + | Some n -> + n, least_amount_of_whitespace (Some (first_line_offset + n)) tl + | None -> + 0, least_amount_of_whitespace None tl + end + in + + match least_amount_of_whitespace with + | None -> + s + | Some least_amount_of_whitespace -> + let drop n line = + (* Since blank lines were ignored when calculating + [least_amount_of_whitespace], their length might be less than the + amount. *) + if String.length line < n then line + else String.sub line n (String.length line - n) + in + let lines = + match lines with + | [] -> [] + | first_line :: tl -> + drop (min first_line_max_drop least_amount_of_whitespace) first_line + :: List.map (drop least_amount_of_whitespace) tl + in + String.concat "\n" lines + +type input = { + file : string; + offset_to_location : int -> Loc.point; + warnings : Warning.t list ref; + lexbuf : Lexing.lexbuf; +} + +let with_location_adjustments + k input ?start_offset ?adjust_start_by ?end_offset ?adjust_end_by value = + + let start = + match start_offset with + | None -> Lexing.lexeme_start input.lexbuf + | Some s -> s + in + let start = + match adjust_start_by with + | None -> start + | Some s -> start + String.length s + in + let end_ = + match end_offset with + | None -> Lexing.lexeme_end input.lexbuf + | Some e -> e + in + let end_ = + match adjust_end_by with + | None -> end_ + | Some s -> end_ - String.length s + in + let location = { + Loc.file = input.file; + start = input.offset_to_location start; + end_ = input.offset_to_location end_; + } + in + k input location value + +let emit = + with_location_adjustments (fun _ -> Loc.at) + +let warning = + with_location_adjustments (fun input location error -> + input.warnings := (error location) :: !(input.warnings)) + +let reference_token start target = + match start with + | "{!" -> `Simple_reference target + | "{{!" -> `Begin_reference_with_replacement_text target + | "{:" -> `Simple_link target + | "{{:" -> `Begin_link_with_replacement_text target + | _ -> assert false + +let trim_leading_space_or_accept_whitespace input start_offset text = + match text.[0] with + | ' ' -> String.sub text 1 (String.length text - 1) + | '\t' | '\r' | '\n' -> text + | exception Invalid_argument _ -> "" + | _ -> + warning + input + ~start_offset + ~end_offset:(start_offset + 2) + Parse_error.no_leading_whitespace_in_verbatim; + text + +let trim_trailing_space_or_accept_whitespace text = + match text.[String.length text - 1] with + | ' ' -> String.sub text 0 (String.length text - 1) + | '\t' | '\r' | '\n' -> text + | _ -> text + | exception Invalid_argument _ -> text + +let emit_verbatim input start_offset buffer = + let t = Buffer.contents buffer in + let t = trim_trailing_space_or_accept_whitespace t in + let t = trim_leading_space_or_accept_whitespace input start_offset t in + let t = trim_leading_blank_lines t in + let t = trim_trailing_blank_lines t in + emit input (`Verbatim t) ~start_offset + +(* The locations have to be treated carefully in this function. We need to ensure that + the []`Code_block] location matches the entirety of the block including the terminator, + and the content location is precicely the location of the text of the code itself. + Note that the location reflects the content _without_ stripping of whitespace, whereas + the value of the content in the tree has whitespace stripped from the beginning, + and trailing empty lines removed. *) +let emit_code_block ~start_offset content_offset input metadata delim terminator c has_results = + let c = Buffer.contents c |> trim_trailing_blank_lines in + let content_location = input.offset_to_location content_offset in + let c = + with_location_adjustments + (fun _ _location c -> + let first_line_offset = content_location.column in + trim_leading_whitespace ~first_line_offset c) + input c + in + let c = trim_leading_blank_lines c in + let c = with_location_adjustments ~adjust_end_by:terminator ~start_offset:content_offset (fun _ -> Loc.at) input c in + emit ~start_offset input (`Code_block (metadata, delim, c, has_results)) + +let heading_level input level = + if String.length level >= 2 && level.[0] = '0' then begin + warning + input ~start_offset:1 (Parse_error.leading_zero_in_heading_level level) + end; + int_of_string level + +let buffer_add_lexeme buffer lexbuf = + Buffer.add_string buffer (Lexing.lexeme lexbuf) + +} + +let markup_char = + ['{' '}' '[' ']' '@' '|'] +let space_char = + [' ' '\t' '\n' '\r'] +let bullet_char = + ['-' '+'] + +let word_char = + (_ # markup_char # space_char # bullet_char) | ('\\' markup_char) + +let horizontal_space = + [' ' '\t'] +let newline = + '\n' | "\r\n" + +let reference_start = + "{!" | "{{!" | "{:" | "{{:" + +let raw_markup = + ([^ '%'] | '%'+ [^ '%' '}'])* '%'* + +let raw_markup_target = + ([^ ':' '%'] | '%'+ [^ ':' '%' '}'])* '%'* + +let language_tag_char = + ['a'-'z' 'A'-'Z' '0'-'9' '_' '-' ] + +let delim_char = + ['a'-'z' 'A'-'Z' '0'-'9' '_' ] + +rule reference_paren_content input start ref_offset start_offset depth_paren + buffer = + parse + | '(' + { + buffer_add_lexeme buffer lexbuf ; + reference_paren_content input start ref_offset start_offset + (depth_paren + 1) buffer lexbuf } + | ')' + { + buffer_add_lexeme buffer lexbuf ; + if depth_paren = 0 then + reference_content input start ref_offset buffer lexbuf + else + reference_paren_content input start ref_offset start_offset + (depth_paren - 1) buffer lexbuf } + | eof + { warning + input + ~start_offset + (Parse_error.unclosed_bracket ~bracket:"(") ; + Buffer.contents buffer } + | _ + { + buffer_add_lexeme buffer lexbuf ; + reference_paren_content input start ref_offset start_offset depth_paren + buffer lexbuf } + +and reference_content input start start_offset buffer = parse + | '}' + { + Buffer.contents buffer + } + | '(' + { + buffer_add_lexeme buffer lexbuf ; + reference_paren_content input start start_offset + (Lexing.lexeme_start lexbuf) 0 buffer lexbuf + } + | '"' [^ '"']* '"' + { + buffer_add_lexeme buffer lexbuf ; + reference_content input start start_offset buffer lexbuf + } + | eof + { warning + input + ~start_offset + (Parse_error.unclosed_bracket ~bracket:start) ; + Buffer.contents buffer } + | _ + { + buffer_add_lexeme buffer lexbuf ; + reference_content input start start_offset buffer lexbuf } + +and token input = parse + | horizontal_space* eof + { emit input `End } + + | ((horizontal_space* newline as prefix) + horizontal_space* ((newline horizontal_space*)+ as suffix) as ws) + { emit input (`Blank_line ws) ~adjust_start_by:prefix ~adjust_end_by:suffix } + + | (horizontal_space* newline horizontal_space* as ws) + { emit input (`Single_newline ws) } + + | (horizontal_space+ as ws) + { emit input (`Space ws) } + + | (horizontal_space* (newline horizontal_space*)? as p) '}' + { emit input `Right_brace ~adjust_start_by:p } + + | '|' + { emit input `Bar } + + | word_char (word_char | bullet_char | '@')* + | bullet_char (word_char | bullet_char | '@')+ as w + { emit input (`Word (unescape_word w)) } + + | '[' + { code_span + (Buffer.create 1024) 0 (Lexing.lexeme_start lexbuf) input lexbuf } + + | '-' + { emit input `Minus } + + | '+' + { emit input `Plus } + + | "{b" + { emit input (`Begin_style `Bold) } + + | "{i" + { emit input (`Begin_style `Italic) } + + | "{e" + { emit input (`Begin_style `Emphasis) } + + | "{L" + { emit input (`Begin_paragraph_style `Left) } + + | "{C" + { emit input (`Begin_paragraph_style `Center) } + + | "{R" + { emit input (`Begin_paragraph_style `Right) } + + | "{^" + { emit input (`Begin_style `Superscript) } + + | "{_" + { emit input (`Begin_style `Subscript) } + + | "{math" space_char + { math Block (Buffer.create 1024) 0 (Lexing.lexeme_start lexbuf) input lexbuf } + + | "{m" horizontal_space + { math Inline (Buffer.create 1024) 0 (Lexing.lexeme_start lexbuf) input lexbuf } + + + | "{!modules:" ([^ '}']* as modules) '}' + { emit input (`Modules modules) } + + | (reference_start as start) + { + let start_offset = Lexing.lexeme_start lexbuf in + let target = + reference_content input start start_offset (Buffer.create 16) lexbuf + in + let token = (reference_token start target) in + emit ~start_offset input token } + + | "{[" + { code_block (Lexing.lexeme_start lexbuf) (Lexing.lexeme_end lexbuf) None (Buffer.create 256) "" input lexbuf } + + | (("{" (delim_char* as delim) "@" horizontal_space*) as prefix) (language_tag_char+ as lang_tag_) + { + let start_offset = Lexing.lexeme_start lexbuf in + let lang_tag = + with_location_adjustments ~adjust_start_by:prefix (fun _ -> Loc.at) input lang_tag_ + in + let emit_truncated_code_block () = + let empty_content = with_location_adjustments (fun _ -> Loc.at) input "" in + emit ~start_offset input (`Code_block (Some (lang_tag, None), delim, empty_content, false)) + in + match code_block_metadata_tail input lexbuf with + | `Ok metadata -> code_block start_offset (Lexing.lexeme_end lexbuf) (Some (lang_tag, metadata)) (Buffer.create 256) delim input lexbuf + | `Eof -> + warning input ~start_offset Parse_error.truncated_code_block_meta; + emit_truncated_code_block () + | `Invalid_char c -> + warning input ~start_offset + (Parse_error.language_tag_invalid_char lang_tag_ c); + code_block start_offset (Lexing.lexeme_end lexbuf) (Some (lang_tag, None)) (Buffer.create 256) delim input lexbuf + } + + | "{@" horizontal_space* '[' + { + warning input Parse_error.no_language_tag_in_meta; + code_block (Lexing.lexeme_start lexbuf) (Lexing.lexeme_end lexbuf) None (Buffer.create 256) "" input lexbuf + } + + | "{v" + { verbatim + (Buffer.create 1024) None (Lexing.lexeme_start lexbuf) input lexbuf } + + | "{%" ((raw_markup_target as target) ':')? (raw_markup as s) + ("%}" | eof as e) + { let token = `Raw_markup (target, s) in + if e <> "%}" then + warning + input + ~start_offset:(Lexing.lexeme_end lexbuf) + (Parse_error.not_allowed + ~what:(Token.describe `End) + ~in_what:(Token.describe token)); + emit input token } + + | "{ul" + { emit input (`Begin_list `Unordered) } + + | "{ol" + { emit input (`Begin_list `Ordered) } + + | "{li" + { emit input (`Begin_list_item `Li) } + + | "{-" + { emit input (`Begin_list_item `Dash) } + + | "{table" + { emit input (`Begin_table_heavy) } + + | "{t" + { emit input (`Begin_table_light) } + + | "{tr" + { emit input `Begin_table_row } + + | "{th" + { emit input (`Begin_table_cell `Header) } + + | "{td" + { emit input (`Begin_table_cell `Data) } + + | '{' (['0'-'9']+ as level) ':' (([^ '}'] # space_char)* as label) + { emit + input (`Begin_section_heading (heading_level input level, Some label)) } + + | '{' (['0'-'9']+ as level) + { emit input (`Begin_section_heading (heading_level input level, None)) } + + | "@author" ((horizontal_space+ [^ '\r' '\n']*)? as author) + { emit input (`Tag (`Author author)) } + + | "@deprecated" + { emit input (`Tag `Deprecated) } + + | "@param" horizontal_space+ ((_ # space_char)+ as name) + { emit input (`Tag (`Param name)) } + + | ("@raise" | "@raises") horizontal_space+ ((_ # space_char)+ as name) + { emit input (`Tag (`Raise name)) } + + | ("@return" | "@returns") + { emit input (`Tag `Return) } + + | "@see" horizontal_space* '<' ([^ '>']* as url) '>' + { emit input (`Tag (`See (`Url, url))) } + + | "@see" horizontal_space* '\'' ([^ '\'']* as filename) '\'' + { emit input (`Tag (`See (`File, filename))) } + + | "@see" horizontal_space* '"' ([^ '"']* as name) '"' + { emit input (`Tag (`See (`Document, name))) } + + | "@since" ((horizontal_space+ [^ '\r' '\n']*)? as version) + { emit input (`Tag (`Since version)) } + + | "@before" horizontal_space+ ((_ # space_char)+ as version) + { emit input (`Tag (`Before version)) } + + | "@version" ((horizontal_space+ [^ '\r' '\n']*)? as version) + { emit input (`Tag (`Version version)) } + + | "@canonical" ((horizontal_space+ [^ '\r' '\n']*)? as identifier) + { emit input (`Tag (`Canonical identifier)) } + + | "@inline" + { emit input (`Tag `Inline) } + + | "@open" + { emit input (`Tag `Open) } + + | "@closed" + { emit input (`Tag `Closed) } + + | "@hidden" + { emit input (`Tag `Hidden) } + + | "]}" + { emit input `Right_code_delimiter} + + | '{' + { try bad_markup_recovery (Lexing.lexeme_start lexbuf) input lexbuf + with Failure _ -> + warning + input + (Parse_error.bad_markup + "{" ~suggestion:"escape the brace with '\\{'."); + emit input (`Word "{") } + + | ']' + { warning input Parse_error.unpaired_right_bracket; + emit input (`Word "]") } + + | "@param" + { warning input Parse_error.truncated_param; + emit input (`Tag (`Param "")) } + + | ("@raise" | "@raises") as tag + { warning input (Parse_error.truncated_raise tag); + emit input (`Tag (`Raise "")) } + + | "@before" + { warning input Parse_error.truncated_before; + emit input (`Tag (`Before "")) } + + | "@see" + { warning input Parse_error.truncated_see; + emit input (`Word "@see") } + + | '@' ['a'-'z' 'A'-'Z']+ as tag + { warning input (Parse_error.unknown_tag tag); + emit input (`Word tag) } + + | '@' + { warning input Parse_error.stray_at; + emit input (`Word "@") } + + | '\r' + { warning input Parse_error.stray_cr; + token input lexbuf } + + | "{!modules:" ([^ '}']* as modules) eof + { warning + input + ~start_offset:(Lexing.lexeme_end lexbuf) + (Parse_error.not_allowed + ~what:(Token.describe `End) + ~in_what:(Token.describe (`Modules ""))); + emit input (`Modules modules) } + +and code_span buffer nesting_level start_offset input = parse + | ']' + { if nesting_level = 0 then + emit input (`Code_span (Buffer.contents buffer)) ~start_offset + else begin + Buffer.add_char buffer ']'; + code_span buffer (nesting_level - 1) start_offset input lexbuf + end } + + | '[' + { Buffer.add_char buffer '['; + code_span buffer (nesting_level + 1) start_offset input lexbuf } + + | '\\' ('[' | ']' as c) + { Buffer.add_char buffer c; + code_span buffer nesting_level start_offset input lexbuf } + + | newline newline + { warning + input + (Parse_error.not_allowed + ~what:(Token.describe (`Blank_line "\n\n")) + ~in_what:(Token.describe (`Code_span ""))); + Buffer.add_char buffer '\n'; + code_span buffer nesting_level start_offset input lexbuf } + + | eof + { warning + input + (Parse_error.not_allowed + ~what:(Token.describe `End) + ~in_what:(Token.describe (`Code_span ""))); + emit input (`Code_span (Buffer.contents buffer)) ~start_offset } + + | _ as c + { Buffer.add_char buffer c; + code_span buffer nesting_level start_offset input lexbuf } + +and math kind buffer nesting_level start_offset input = parse + | '}' + { if nesting_level == 0 then + emit input (math_constr kind (Buffer.contents buffer)) ~start_offset + else begin + Buffer.add_char buffer '}'; + math kind buffer (nesting_level - 1) start_offset input lexbuf + end + } + | '{' + { Buffer.add_char buffer '{'; + math kind buffer (nesting_level + 1) start_offset input lexbuf } + | ("\\{" | "\\}") as s + { Buffer.add_string buffer s; + math kind buffer nesting_level start_offset input lexbuf } + | (newline) as s + { + match kind with + | Inline -> + warning + input + (Parse_error.not_allowed + ~what:(Token.describe (`Blank_line "\n")) + ~in_what:(Token.describe (math_constr kind ""))); + Buffer.add_char buffer '\n'; + math kind buffer nesting_level start_offset input lexbuf + | Block -> + Buffer.add_string buffer s; + math kind buffer nesting_level start_offset input lexbuf + } + | eof + { warning + input + (Parse_error.not_allowed + ~what:(Token.describe `End) + ~in_what:(Token.describe (math_constr kind ""))); + emit input (math_constr kind (Buffer.contents buffer)) ~start_offset } + | _ as c + { Buffer.add_char buffer c; + math kind buffer nesting_level start_offset input lexbuf } + +and verbatim buffer last_false_terminator start_offset input = parse + | (space_char as c) "v}" + { Buffer.add_char buffer c; + emit_verbatim input start_offset buffer } + + | "v}" + { Buffer.add_string buffer "v}"; + verbatim + buffer (Some (Lexing.lexeme_start lexbuf)) start_offset input lexbuf } + + | eof + { begin match last_false_terminator with + | None -> + warning + input + (Parse_error.not_allowed + ~what:(Token.describe `End) + ~in_what:(Token.describe (`Verbatim ""))) + | Some location -> + warning + input + ~start_offset:location + ~end_offset:(location + 2) + Parse_error.no_trailing_whitespace_in_verbatim + end; + emit_verbatim input start_offset buffer } + + | _ as c + { Buffer.add_char buffer c; + verbatim buffer last_false_terminator start_offset input lexbuf } + + + +and bad_markup_recovery start_offset input = parse + | [^ '}']+ as text '}' as rest + { let suggestion = + Printf.sprintf "did you mean '{!%s}' or '[%s]'?" text text in + warning + input + ~start_offset + (Parse_error.bad_markup ("{" ^ rest) ~suggestion); + emit input (`Code_span text) ~start_offset} + +(* The second field of the metadata. + This rule keeps whitespaces and newlines in the 'metadata' field except the + ones just before the '['. *) +and code_block_metadata_tail input = parse + | (space_char+ as prefix) + ((space_char* (_ # space_char # ['['])+)+ as meta) + ((space_char* '[') as suffix) + { + let meta = + with_location_adjustments ~adjust_start_by:prefix ~adjust_end_by:suffix (fun _ -> Loc.at) input meta + in + `Ok (Some meta) + } + | (newline | horizontal_space)* '[' + { `Ok None } + | _ as c + { `Invalid_char c } + | eof + { `Eof } + +and code_block start_offset content_offset metadata prefix delim input = parse + | ("]" (delim_char* as delim') "[") as terminator + { if delim = delim' + then emit_code_block ~start_offset content_offset input metadata delim terminator prefix true + else + (Buffer.add_string prefix terminator; + code_block start_offset content_offset metadata prefix delim input lexbuf) } + | ("]" (delim_char* as delim') "}") as terminator + { + if delim = delim' + then emit_code_block ~start_offset content_offset input metadata delim terminator prefix false + else ( + Buffer.add_string prefix terminator; + code_block start_offset content_offset metadata prefix delim input lexbuf + ) + } + | eof + { + warning input ~start_offset Parse_error.truncated_code_block; + emit_code_block ~start_offset content_offset input metadata delim "" prefix false + } + | (_ as c) + { + Buffer.add_char prefix c; + code_block start_offset content_offset metadata prefix delim input lexbuf + } diff --git a/vendor/odoc-parser/src/loc.ml b/vendor/odoc-parser/src/loc.ml new file mode 100644 index 000000000..0316fa270 --- /dev/null +++ b/vendor/odoc-parser/src/loc.ml @@ -0,0 +1,32 @@ +type point = { line : int; column : int } +type span = { file : string; start : point; end_ : point } +type +'a with_location = { location : span; value : 'a } + +let at location value = { location; value } +let location { location; _ } = location +let value { value; _ } = value +let map f annotated = { annotated with value = f annotated.value } +let same annotated value = { annotated with value } + +let span spans = + match spans with + | [] -> + { + file = "_none_"; + start = { line = 1; column = 0 }; + end_ = { line = 1; column = 0 }; + } + | first :: spans -> + let last = List.fold_left (fun _ span -> span) first spans in + { file = first.file; start = first.start; end_ = last.end_ } + +let nudge_start offset span = + { span with start = { span.start with column = span.start.column + offset } } + +let spans_multiple_lines = function + | { + location = + { start = { line = start_line; _ }; end_ = { line = end_line; _ }; _ }; + _; + } -> + end_line > start_line diff --git a/vendor/odoc-parser/src/loc.mli b/vendor/odoc-parser/src/loc.mli new file mode 100644 index 000000000..135ba0358 --- /dev/null +++ b/vendor/odoc-parser/src/loc.mli @@ -0,0 +1,45 @@ +(** Locations in files. *) + +(** This module concerns locations in source files, both points indicating a specific + character and spans between two points. *) + +(** {2 Basic types} *) + +type point = { line : int; column : int } +(** A specific character *) + +type span = { file : string; start : point; end_ : point } +(** A range of characters between [start] and [end_] in a particular file *) + +val span : span list -> span +(** [span spans] takes a list of spans and returns a single {!type-span} starting + at the start of the first span and ending at the end of the final span *) + +val nudge_start : int -> span -> span +(** This adjusts only the column number, implicitly assuming that the offset does + not move the location across a newline character. *) + +(** {2 Located values} *) + +type +'a with_location = { location : span; value : 'a } +(** Describes values located at a particular span *) + +val at : span -> 'a -> 'a with_location +(** Constructor for {!with_location} *) + +val location : 'a with_location -> span +(** Returns the location of a located value *) + +val value : 'a with_location -> 'a +(** Returns the value of a located value *) + +val map : ('a -> 'b) -> 'a with_location -> 'b with_location +(** Map over a located value without changing its location *) + +val same : _ with_location -> 'b -> 'b with_location +(** [same x y] retuns the value y wrapped in a {!with_location} whose + location is that of [x] *) + +val spans_multiple_lines : _ with_location -> bool +(** [spans_multiple_lines x] checks to see whether [x] is located + on a single line or whether it covers more than one. *) diff --git a/vendor/odoc-parser/src/odoc_parser.ml b/vendor/odoc-parser/src/odoc_parser.ml new file mode 100644 index 000000000..2d9fdd5de --- /dev/null +++ b/vendor/odoc-parser/src/odoc_parser.ml @@ -0,0 +1,121 @@ +module Ast = Ast +module Loc = Loc +module Warning = Warning + +type t = { + ast : Ast.t; + warnings : Warning.t list; + reversed_newlines : (int * int) list; + original_pos : Lexing.position; +} + +(* odoc uses an ocamllex lexer. The "engine" for such lexers is the standard + [Lexing] module. + + As the [Lexing] module reads the input, it keeps track of only the byte + offset into the input. It is normally the job of each particular lexer + implementation to decide which character sequences count as newlines, and + keep track of line/column locations. This is usually done by writing several + extra regular expressions, and calling [Lexing.new_line] at the right time. + + Keeping track of newlines like this makes the odoc lexer somewhat too + diffiult to read, however. To factor the aspect of keeping track of newlines + fully out of the odoc lexer, instead of having it keep track of newlines as + it's scanning the input, the input is pre-scanned before feeding it into the + lexer. A table of all the newlines is assembled, and used to convert offsets + into line/column pairs after the lexer emits tokens. + + [reversed_newlines ~input ~comment_location offset] returns a list of pairs + of (line number * offset), allowing the easy conversion from the byte + [offset], relative to the beginning of a comment, into a location, relative + to the beginning of the file containing the comment. This can then be used + to convert from byte offset to line number / column number - a Loc.point, + and additionally for converting back from a Loc.point to a Lexing.position. +*) + +let reversed_newlines : input:string -> (int * int) list = + fun ~input -> + let rec find_newlines line_number input_index newlines_accumulator = + if input_index >= String.length input then newlines_accumulator + else if + (* This is good enough to detect CR-LF also. *) + input.[input_index] = '\n' + then + find_newlines (line_number + 1) (input_index + 1) + ((line_number + 1, input_index + 1) :: newlines_accumulator) + else find_newlines line_number (input_index + 1) newlines_accumulator + in + find_newlines 1 0 [ (1, 0) ] + +(* [offset_to_location] converts from an offset within the comment text, where + [reversed_newlines] is the result of the above function and [comment_location] + is the location of the comment within its file. The function is meant to be + partially applied to its first two arguments, at which point it is passed to + the lexer, so it can apply the table to its emitted tokens. *) + +let offset_to_location : + reversed_newlines:(int * int) list -> + comment_location:Lexing.position -> + int -> + Loc.point = + fun ~reversed_newlines ~comment_location byte_offset -> + let rec scan_to_last_newline reversed_newlines_prefix = + match reversed_newlines_prefix with + | [] -> assert false + | (line_in_comment, line_start_offset) :: prefix -> + if line_start_offset > byte_offset then scan_to_last_newline prefix + else + let column_in_comment = byte_offset - line_start_offset in + let line_in_file = + line_in_comment + comment_location.Lexing.pos_lnum - 1 + in + let column_in_file = + if line_in_comment = 1 then + column_in_comment + comment_location.Lexing.pos_cnum + - comment_location.Lexing.pos_bol + else column_in_comment + in + { Loc.line = line_in_file; column = column_in_file } + in + scan_to_last_newline reversed_newlines + +(* Given a Loc.point and the result of [parse_comment], this function returns + a valid Lexing.position *) +let position_of_point : t -> Loc.point -> Lexing.position = + fun v point -> + let { reversed_newlines; original_pos; _ } = v in + let line_in_comment = point.Loc.line - original_pos.pos_lnum + 1 in + let rec find_pos_bol reversed_newlines_prefix = + match reversed_newlines_prefix with + | [] -> assert false + | [ _ ] -> original_pos.pos_bol + | (line_number, line_start_offset) :: prefix -> + if line_number > line_in_comment then find_pos_bol prefix + else line_start_offset + original_pos.pos_cnum + in + let pos_bol = find_pos_bol reversed_newlines in + let pos_lnum = point.Loc.line in + let pos_cnum = point.column + pos_bol in + let pos_fname = original_pos.pos_fname in + { Lexing.pos_bol; pos_lnum; pos_cnum; pos_fname } + +(* The main entry point for this module *) +let parse_comment ~location ~text = + let warnings = ref [] in + let reversed_newlines = reversed_newlines ~input:text in + let token_stream = + let lexbuf = Lexing.from_string text in + let offset_to_location = + offset_to_location ~reversed_newlines ~comment_location:location + in + let input : Lexer.input = + { file = location.Lexing.pos_fname; offset_to_location; warnings; lexbuf } + in + Stream.from (fun _token_index -> Some (Lexer.token input lexbuf)) + in + let ast, warnings = Syntax.parse warnings token_stream in + { ast; warnings; reversed_newlines; original_pos = location } + +(* Accessor functions, as [t] is opaque *) +let warnings t = t.warnings +let ast t = t.ast diff --git a/vendor/odoc-parser/src/odoc_parser.mli b/vendor/odoc-parser/src/odoc_parser.mli new file mode 100644 index 000000000..5dbd4a081 --- /dev/null +++ b/vendor/odoc-parser/src/odoc_parser.mli @@ -0,0 +1,46 @@ +(** Parser for ocamldoc formatted comments. *) + +type t +(** [type t] is the result of parsing. *) + +val parse_comment : location:Lexing.position -> text:string -> t +(** [parse_comment ~location ~text] parses [text] as an ocamldoc formatted + string. The parser will try to recover from any invalid syntax encountered, + and therefore this will always produce a result without raising exceptions + with zero or more warnings. The location passed in should represent the + start of the {i content} of the documentation comment - so for a line such + as + {[ + (** A comment starting in the first column (0) *) + ]} + the location should represent the space immediately before the [A], so the + in the 4th column (e.g. [{... pos_bol=0; pos_cnum=3 }]) *) + +module Ast = Ast +module Loc = Loc + +(** Warnings produced during parsing. *) +module Warning : sig + type t = Warning.t = { location : Loc.span; message : string } + (** Warnings are represented as record containing the human-readable text + of the warning alongside the location of the offending text in the source *) + + val pp : Format.formatter -> t -> unit + (** Pretty printer for {!t} *) + + val to_string : t -> string + (** [to_string] will format the location and warning as text to be + printed. *) +end + +val warnings : t -> Warning.t list +(** Extract any warnings from the parser result. *) + +val ast : t -> Ast.t +(** Extract the {!Ast.t} from the parser result. *) + +val position_of_point : t -> Loc.point -> Lexing.position +(** Helper function to turn the internal representation of positions back into + the usual representation in the Lexing module. Note that this relies on + the information passed in {!parse_comment}, and hence requires the result + of that call in addition to the {!Loc.point} being converted. *) diff --git a/vendor/odoc-parser/src/parse_error.ml b/vendor/odoc-parser/src/parse_error.ml new file mode 100644 index 000000000..4ee22c470 --- /dev/null +++ b/vendor/odoc-parser/src/parse_error.ml @@ -0,0 +1,83 @@ +let capitalize_ascii = Astring.String.Ascii.capitalize + +let bad_markup : ?suggestion:string -> string -> Loc.span -> Warning.t = + fun ?suggestion -> Warning.make ?suggestion "'%s': bad markup." + +let leading_zero_in_heading_level : string -> Loc.span -> Warning.t = + Warning.make "'%s': leading zero in heading level." + +let should_not_be_empty : what:string -> Loc.span -> Warning.t = + fun ~what -> Warning.make "%s should not be empty." (capitalize_ascii what) + +let markup_should_not_be_used : what:string -> Loc.span -> Warning.t = + fun ~what -> + Warning.make "%s should not be used because it has no effect." + (capitalize_ascii what) + +let should_begin_on_its_own_line : what:string -> Loc.span -> Warning.t = + fun ~what -> + Warning.make "%s should begin on its own line." (capitalize_ascii what) + +let should_be_followed_by_whitespace : what:string -> Loc.span -> Warning.t = + fun ~what -> + Warning.make "%s should be followed by space, a tab, or a new line." + (capitalize_ascii what) + +let not_allowed : + ?suggestion:string -> what:string -> in_what:string -> Loc.span -> Warning.t + = + fun ?suggestion ~what ~in_what -> + Warning.make ?suggestion "%s is not allowed in %s." (capitalize_ascii what) + in_what + +let unclosed_bracket : + ?suggestion:string -> bracket:string -> Loc.span -> Warning.t = + fun ?suggestion ~bracket -> + Warning.make ?suggestion "Open bracket '%s' is never closed." bracket + +let no_leading_whitespace_in_verbatim : Loc.span -> Warning.t = + Warning.make "'{v' should be followed by whitespace." + +let no_trailing_whitespace_in_verbatim : Loc.span -> Warning.t = + Warning.make "'v}' should be preceded by whitespace." + +let stray_at : Loc.span -> Warning.t = Warning.make "Stray '@'." + +let stray_cr : Loc.span -> Warning.t = + Warning.make "Stray '\\r' (carriage return character)." + +let truncated_before : Loc.span -> Warning.t = + Warning.make "'@before' expects version number on the same line." + +let truncated_param : Loc.span -> Warning.t = + Warning.make "'@param' expects parameter name on the same line." + +let truncated_raise : string -> Loc.span -> Warning.t = + Warning.make "'%s' expects exception constructor on the same line." + +let truncated_see : Loc.span -> Warning.t = + Warning.make + "'@see' should be followed by , 'file', or \"document title\"." + +let unknown_tag : string -> Loc.span -> Warning.t = + Warning.make "Unknown tag '%s'." + +let unpaired_right_brace : Loc.span -> Warning.t = + Warning.make ~suggestion:"try '\\}'." "Unpaired '}' (end of markup)." + +let unpaired_right_bracket : Loc.span -> Warning.t = + Warning.make ~suggestion:"try '\\]'." "Unpaired ']' (end of code)." + +let no_language_tag_in_meta : Loc.span -> Warning.t = + Warning.make ~suggestion:"try '{[ ... ]}' or '{@ocaml[ ... ]}'." + "'{@' should be followed by a language tag." + +let language_tag_invalid_char lang_tag : char -> Loc.span -> Warning.t = + let suggestion = "try '{@" ^ lang_tag ^ "[ ... ]}'." in + Warning.make ~suggestion "Invalid character '%c' in language tag." + +let truncated_code_block_meta : Loc.span -> Warning.t = + Warning.make ~suggestion:"try '{@ocaml[ ... ]}'." "Missing end of code block." + +let truncated_code_block : Loc.span -> Warning.t = + Warning.make ~suggestion:"add ']}'." "Missing end of code block." diff --git a/vendor/odoc-parser/src/syntax.ml b/vendor/odoc-parser/src/syntax.ml new file mode 100644 index 000000000..d8ecb87b5 --- /dev/null +++ b/vendor/odoc-parser/src/syntax.ml @@ -0,0 +1,1463 @@ +(* This module is a recursive descent parser for the ocamldoc syntax. The parser + consumes a token stream of type [Token.t Stream.t], provided by the lexer, + and produces a comment AST of the type defined in [Parser_.Ast]. + + The AST has two main levels: inline elements, which can appear inside + paragraphs, and are spaced horizontally when presented, and block elements, + such as paragraphs and lists, which are spaced vertically when presented. + Block elements contain inline elements, but not vice versa. + + Corresponding to this, the parser has three "main" functions: + + - [delimited_inline_element_list] parses a run of inline elements that is + delimited by curly brace markup ([{...}]). + - [paragraph] parses a run of inline elements that make up a paragraph, and + is not explicitly delimited with curly braces. + - [block_element_list] parses a sequence of block elements. A comment is a + sequence of block elements, so [block_element_list] is the top-level + parser. It is also used for list item and tag content. *) + +open! Compat + +type 'a with_location = 'a Loc.with_location + +(* {2 Input} *) + +type input = { + tokens : Token.t Loc.with_location Stream.t; + warnings : Warning.t list ref; +} + +(* {2 Output} *) + +let add_warning input warning = input.warnings := warning :: !(input.warnings) +let junk input = Stream.junk input.tokens + +let peek input = + match Stream.peek input.tokens with + | Some token -> token + | None -> assert false + +module Table = struct + module Light_syntax = struct + let valid_align = function + | [ { Loc.value = `Word w; _ } ] -> ( + match String.length w with + | 0 -> `Valid None + | 1 -> ( + match w with + | "-" -> `Valid None + | ":" -> `Valid (Some `Center) + | _ -> `Invalid) + | len -> + if String.for_all (Char.equal '-') (String.sub w 1 (len - 2)) then + match (String.get w 0, String.get w (len - 1)) with + | ':', ':' -> `Valid (Some `Center) + | ':', '-' -> `Valid (Some `Left) + | '-', ':' -> `Valid (Some `Right) + | '-', '-' -> `Valid None + | _ -> `Invalid + else `Invalid) + | _ -> `Invalid + + let valid_align_row lx = + let rec loop acc = function + | [] -> Some (List.rev acc) + | x :: q -> ( + match valid_align x with + | `Invalid -> None + | `Valid alignment -> loop (alignment :: acc) q) + in + loop [] lx + + let create ~grid ~align : Ast.table = + let cell_to_block (x, k) = + let whole_loc = Loc.span (List.map (fun x -> x.Loc.location) x) in + match x with + | [] -> ([], k) + | _ -> ([ Loc.at whole_loc (`Paragraph x) ], k) + in + let row_to_block = List.map cell_to_block in + let grid_to_block = List.map row_to_block in + ((grid_to_block grid, align), `Light) + + let with_kind kind : 'a with_location list list -> 'a Ast.row = + List.map (fun c -> (c, kind)) + + let from_raw_data grid : Ast.table = + match grid with + | [] -> create ~grid:[] ~align:None + | row1 :: rows2_N -> ( + match valid_align_row row1 with + (* If the first line is the align row, everything else is data. *) + | Some _ as align -> + create ~grid:(List.map (with_kind `Data) rows2_N) ~align + | None -> ( + match rows2_N with + (* Only 1 line, if this is not the align row this is data. *) + | [] -> create ~grid:[ with_kind `Data row1 ] ~align:None + | row2 :: rows3_N -> ( + match valid_align_row row2 with + (* If the second line is the align row, the first one is the + header and the rest is data. *) + | Some _ as align -> + let header = with_kind `Header row1 in + let data = List.map (with_kind `Data) rows3_N in + create ~grid:(header :: data) ~align + (* No align row in the first 2 lines, everything is considered + data. *) + | None -> + create ~grid:(List.map (with_kind `Data) grid) ~align:None + ))) + end + + module Heavy_syntax = struct + let create ~grid : Ast.table = ((grid, None), `Heavy) + let from_grid grid : Ast.table = create ~grid + end +end + +module Reader = struct + let until_rbrace input acc = + let rec consume () = + let next_token = peek input in + match next_token.value with + | `Right_brace -> + junk input; + `End (acc, next_token.location) + | `Space _ | `Single_newline _ | `Blank_line _ -> + junk input; + consume () + | _ -> `Token next_token + in + consume () + + module Infix = struct + let ( >>> ) consume if_token = + match consume with + | `End (ret, loc) -> (ret, loc) + | `Token t -> if_token t + end +end + +open Reader.Infix + +(* The last token in the stream is always [`End], and it is never consumed by + the parser, so the [None] case is impossible. *) + +let npeek n input = Stream.npeek n input.tokens + +(* {2 Non-link inline elements} *) +type style = [ `Bold | `Italic | `Emphasis | `Superscript | `Subscript ] + +(* Convenient abbreviation for use in patterns. *) +type token_that_always_begins_an_inline_element = + [ `Word of string + | `Code_span of string + | `Raw_markup of string option * string + | `Begin_style of style + | `Simple_reference of string + | `Begin_reference_with_replacement_text of string + | `Simple_link of string + | `Begin_link_with_replacement_text of string + | `Math_span of string ] + +(* Check that the token constructors above actually are all in [Token.t]. *) +let _check_subset : token_that_always_begins_an_inline_element -> Token.t = + fun t -> (t :> Token.t) + +(* Consumes tokens that make up a single non-link inline element: + + - a horizontal space ([`Space], significant in inline elements), + - a word (see [word]), + - a code span ([...], [`Code_span _]), or + - styled text ({e ...}). + + The latter requires a recursive call to [delimited_inline_element_list], + defined below. + + This should be part of [delimited_inline_element_list]; however, it is also + called by function [paragraph]. As a result, it is factored out, and made + mutually-recursive with [delimited_inline_element_list]. + + This is called only when it is known that the first token in the list is the + beginning of an inline element. In the case of [`Minus] and [`Plus], that + means the caller has determined that they are not a list bullet (i.e., not + the first non-whitespace tokens on their line). + + This function consumes exactly the tokens that make up the element. *) +let rec inline_element : + input -> Loc.span -> _ -> Ast.inline_element with_location = + fun input location next_token -> + match next_token with + | `Space _ as token -> + junk input; + Loc.at location token + | `Word _ as token -> + junk input; + Loc.at location token + (* This is actually the same memory representation as the token, complete + with location, and is probably the most common case. Perhaps the token + can be reused somehow. The same is true of [`Space], [`Code_span]. *) + | `Minus -> + junk input; + Loc.at location (`Word "-") + | `Plus -> + junk input; + Loc.at location (`Word "+") + | `Bar -> + junk input; + Loc.at location (`Word "|") + | (`Code_span _ | `Math_span _ | `Raw_markup _) as token -> + junk input; + Loc.at location token + | `Begin_style s as parent_markup -> + junk input; + + let requires_leading_whitespace = + match s with + | `Bold | `Italic | `Emphasis -> true + | `Superscript | `Subscript -> false + in + let content, brace_location = + delimited_inline_element_list ~parent_markup + ~parent_markup_location:location ~requires_leading_whitespace input + in + + let location = Loc.span [ location; brace_location ] in + + if content = [] then + Parse_error.should_not_be_empty + ~what:(Token.describe parent_markup) + location + |> add_warning input; + + Loc.at location (`Styled (s, content)) + | `Simple_reference r -> + junk input; + + let r_location = Loc.nudge_start (String.length "{!") location in + let r = Loc.at r_location r in + + Loc.at location (`Reference (`Simple, r, [])) + | `Begin_reference_with_replacement_text r as parent_markup -> + junk input; + + let r_location = Loc.nudge_start (String.length "{{!") location in + let r = Loc.at r_location r in + + let content, brace_location = + delimited_inline_element_list ~parent_markup + ~parent_markup_location:location ~requires_leading_whitespace:false + input + in + + let location = Loc.span [ location; brace_location ] in + + if content = [] then + Parse_error.should_not_be_empty + ~what:(Token.describe parent_markup) + location + |> add_warning input; + + Loc.at location (`Reference (`With_text, r, content)) + | `Simple_link u -> + junk input; + + let u = String.trim u in + + if u = "" then + Parse_error.should_not_be_empty + ~what:(Token.describe next_token) + location + |> add_warning input; + + Loc.at location (`Link (u, [])) + | `Begin_link_with_replacement_text u as parent_markup -> + junk input; + + let u = String.trim u in + + if u = "" then + Parse_error.should_not_be_empty + ~what:(Token.describe parent_markup) + location + |> add_warning input; + + let content, brace_location = + delimited_inline_element_list ~parent_markup + ~parent_markup_location:location ~requires_leading_whitespace:false + input + in + + `Link (u, content) |> Loc.at (Loc.span [ location; brace_location ]) + +(* Consumes tokens that make up a sequence of inline elements that is ended by + a '}', a [`Right_brace] token. The brace token is also consumed. + + The sequences are also preceded by some markup like '{b'. Some of these + markup tokens require whitespace immediately after the token, and others not. + The caller indicates which way that is through the + [~requires_leading_whitespace] argument. + + Whitespace is significant in inline element lists. In particular, "foo [bar]" + is represented as [`Word "foo"; `Space; `Code_span "bar"], while "foo[bar]" + is [`Word "foo"; `Code_span "bar"]. It doesn't matter how much whitespace is + there, just whether it is present or not. Single newlines and horizontal + space in any amount are allowed. Blank lines are not, as these are separators + for {e block} elements. + + In correct input, the first and last elements emitted will not be [`Space], + i.e. [`Space] appears only between other non-link inline elements. In + incorrect input, there might be [`Space] followed immediately by something + like an @author tag. + + The [~parent_markup] and [~parent_markup_location] arguments are used for + generating error messages. *) +and delimited_inline_element_list : + parent_markup:[< Token.t ] -> + parent_markup_location:Loc.span -> + requires_leading_whitespace:bool -> + input -> + Ast.inline_element with_location list * Loc.span = + fun ~parent_markup ~parent_markup_location ~requires_leading_whitespace input -> + (* [~at_start_of_line] is used to interpret [`Minus] and [`Plus]. These are + word tokens if not the first non-whitespace tokens on their line. Then, + they are allowed in a non-link element list. *) + let rec consume_elements : + at_start_of_line:bool -> + Ast.inline_element with_location list -> + Ast.inline_element with_location list * Loc.span = + fun ~at_start_of_line acc -> + let next_token = peek input in + match next_token.value with + | `Right_brace -> + junk input; + (List.rev acc, next_token.location) + (* The [`Space] token is not space at the beginning or end of line, because + that is combined into [`Single_newline] or [`Blank_line] tokens. It is + also not at the beginning of markup (after e.g. '{b'), because that is + handled separately before calling + [consume_non_link_inline_elements], and not immediately before '}', + because that is combined into the [`Right_brace] token by the lexer. So, + it is an internal space, and we want to add it to the non-link inline + element list. *) + | (`Space _ | #token_that_always_begins_an_inline_element) as token -> + let acc = inline_element input next_token.location token :: acc in + consume_elements ~at_start_of_line:false acc + | `Single_newline ws -> + junk input; + let element = Loc.same next_token (`Space ws) in + consume_elements ~at_start_of_line:true (element :: acc) + | `Blank_line ws as blank -> + Parse_error.not_allowed ~what:(Token.describe blank) + ~in_what:(Token.describe parent_markup) + next_token.location + |> add_warning input; + + junk input; + let element = Loc.same next_token (`Space ws) in + consume_elements ~at_start_of_line:true (element :: acc) + | `Bar as token -> + let acc = inline_element input next_token.location token :: acc in + consume_elements ~at_start_of_line:false acc + | (`Minus | `Plus) as bullet -> + (if at_start_of_line then + let suggestion = + Printf.sprintf "move %s so it isn't the first thing on the line." + (Token.print bullet) + in + Parse_error.not_allowed ~what:(Token.describe bullet) + ~in_what:(Token.describe parent_markup) + ~suggestion next_token.location + |> add_warning input); + + let acc = inline_element input next_token.location bullet :: acc in + consume_elements ~at_start_of_line:false acc + | other_token -> + Parse_error.not_allowed + ~what:(Token.describe other_token) + ~in_what:(Token.describe parent_markup) + next_token.location + |> add_warning input; + + let last_location = + match acc with + | last_token :: _ -> last_token.location + | [] -> parent_markup_location + in + + (List.rev acc, last_location) + in + + let first_token = peek input in + match first_token.value with + | `Space _ -> + junk input; + consume_elements ~at_start_of_line:false [] + (* [~at_start_of_line] is [false] here because the preceding token was some + some markup like '{b', and we didn't move to the next line, so the next + token will not be the first non-whitespace token on its line. *) + | `Single_newline _ -> + junk input; + consume_elements ~at_start_of_line:true [] + | `Blank_line _ as blank -> + (* In case the markup is immediately followed by a blank line, the error + message printed by the catch-all case below can be confusing, as it will + suggest that the markup must be followed by a newline (which it is). It + just must not be followed by two newlines. To explain that clearly, + handle that case specifically. *) + Parse_error.not_allowed ~what:(Token.describe blank) + ~in_what:(Token.describe parent_markup) + first_token.location + |> add_warning input; + + junk input; + consume_elements ~at_start_of_line:true [] + | `Right_brace -> + junk input; + ([], first_token.location) + | _ -> + if requires_leading_whitespace then + Parse_error.should_be_followed_by_whitespace + ~what:(Token.print parent_markup) + parent_markup_location + |> add_warning input; + consume_elements ~at_start_of_line:false [] + +(* {2 Paragraphs} *) + +(* Consumes tokens that make up a paragraph. + + A paragraph is a sequence of inline elements that ends on a blank line, or + explicit block markup such as a verbatim block on a new line. + + Because of the significance of newlines, paragraphs are parsed line-by-line. + The function [paragraph] is called only when the current token is the first + non-whitespace token on its line, and begins an inline element. [paragraph] + then parses a line of inline elements. Afterwards, it looks ahead to the next + line. If that line also begins with an inline element, it parses that line, + and so on. *) +let paragraph : input -> Ast.nestable_block_element with_location = + fun input -> + (* Parses a single line of a paragraph, consisting of inline elements. The + only valid ways to end a paragraph line are with [`End], [`Single_newline], + [`Blank_line], and [`Right_brace]. Everything else either belongs in the + paragraph, or signifies an attempt to begin a block element inside a + paragraph line, which is an error. These errors are caught elsewhere; the + paragraph parser just stops. *) + let rec paragraph_line : + Ast.inline_element with_location list -> + Ast.inline_element with_location list = + fun acc -> + let next_token = peek input in + match next_token.value with + | ( `Space _ | `Minus | `Plus | `Bar + | #token_that_always_begins_an_inline_element ) as token -> + let element = inline_element input next_token.location token in + paragraph_line (element :: acc) + | _ -> acc + in + + (* After each line is parsed, decides whether to parse more lines. *) + let rec additional_lines : + Ast.inline_element with_location list -> + Ast.inline_element with_location list = + fun acc -> + match npeek 2 input with + | { value = `Single_newline ws; location } + :: { value = #token_that_always_begins_an_inline_element | `Bar; _ } + :: _ -> + junk input; + let acc = Loc.at location (`Space ws) :: acc in + let acc = paragraph_line acc in + additional_lines acc + | _ -> List.rev acc + in + + let elements = paragraph_line [] |> additional_lines in + `Paragraph elements |> Loc.at (Loc.span (List.map Loc.location elements)) + +(* {2 Block elements} *) + +(* {3 Helper types} *) + +(* The interpretation of tokens in the block parser depends on where on a line + each token appears. The six possible "locations" are: + + - [`At_start_of_line], when only whitespace has been read on the current + line. + - [`After_tag], when a valid tag token, such as [@deprecated], has been read, + and only whitespace has been read since. + - [`After_shorthand_bullet], when a valid shorthand list item bullet, such as + [-], has been read, and only whitespace has been read since. + - [`After_explicit_list_bullet], when a valid explicit bullet, such as [{li], + has been read, and only whitespace has been read since. + - [`After_table_cell], when a table cell opening markup ('{th' or '{td') has been read. + - [`After_text], when any other valid non-whitespace token has already been + read on the current line. + + Here are some examples of how this affects the interpretation of tokens: + + - A paragraph can start anywhere except [`After_text] (two paragraphs cannot + be on the same line, but paragraphs can be nested in just about anything). + - [`Minus] is interpreted as a list item bullet [`At_start_of_line], + [`After_tag], and [`After_explicit_list_bullet]. + - Tags are only allowed [`At_start_of_line]. + + To track the location accurately, the functions that make up the block parser + pass explicit [where_in_line] values around and return them. + + In a few cases, [where_in_line] can be inferred from what helper was called. + For example, the [paragraph] parser always stops on the same line as the last + significant token that is in the paragraph it consumed, so the location must + be [`After_text]. *) +type where_in_line = + [ `At_start_of_line + | `After_tag + | `After_shorthand_bullet + | `After_explicit_list_bullet + | `After_table_cell + | `After_text ] + +(* The block parsing loop, function [block_element_list], stops when it + encounters certain tokens. + + When it is called for the whole comment, or for in explicit list item + ([{li foo}]), it can only stop on end of input or a right brace. + + When it is called inside a shorthand list item ([- foo]), it stops on end of + input, right brace, a blank line (indicating end of shorthand list), plus or + minus (indicating the start of the next liste item), or a section heading or + tag, which cannot be nested in list markup. + + The block parser [block_element_list] explicitly returns the token that + stopped it, with a type more precise than [Token.t stream_head]: if it was + called for the whole comment or an explicit list item, the stop token will + have type [stops_at_delimiters stream_head], and if it was called for a + shorthand list item, the stop token will have type + [implicit_stop stream_head]. This allows the calling parsers to write precise + cases for exactly the tokens that might be at the front of the stream after + the block parser returns. *) +type stops_at_delimiters = [ `End | `Right_brace ] +type code_stop = [ `End | `Right_code_delimiter ] + +type stopped_implicitly = + [ `End + | `Blank_line of string + | `Right_brace + | `Minus + | `Plus + | Token.section_heading + | Token.tag ] + +(* Ensure that the above two types are really subsets of [Token.t]. *) +let _check_subset : stops_at_delimiters -> Token.t = fun t -> (t :> Token.t) +let _check_subset : stopped_implicitly -> Token.t = fun t -> (t :> Token.t) + +(* The different contexts in which the block parser [block_element_list] can be + called. The block parser's behavior depends somewhat on the context. For + example, while paragraphs are allowed anywhere, shorthand lists are not + allowed immediately inside other shorthand lists, while tags are not allowed + anywhere except at the comment top level. + + Besides telling the block parser how to behave, each context also carries two + types, which determine the return type of the block parser: + + - The type of blocks the parser returns. Note that [nestable_block_element] + is included in [block_element]. However, the extra block kinds in + [block_element] are only allowed at the comment top level. + - The type of token that the block parser stops at. See discussion above. *) +type ('block, 'stops_at_which_tokens) context = + | Top_level : (Ast.block_element, stops_at_delimiters) context + | In_shorthand_list : (Ast.nestable_block_element, stopped_implicitly) context + | In_explicit_list : (Ast.nestable_block_element, stops_at_delimiters) context + | In_table_cell : (Ast.nestable_block_element, stops_at_delimiters) context + | In_code_results : (Ast.nestable_block_element, code_stop) context + | In_tag : (Ast.nestable_block_element, Token.t) context + +(* This is a no-op. It is needed to prove to the type system that nestable block + elements are acceptable block elements in all contexts. *) +let accepted_in_all_contexts : + type block stops_at_which_tokens. + (block, stops_at_which_tokens) context -> + Ast.nestable_block_element -> + block = + fun context block -> + match context with + | Top_level -> (block :> Ast.block_element) + | In_shorthand_list -> block + | In_explicit_list -> block + | In_table_cell -> block + | In_code_results -> block + | In_tag -> block + +(* Converts a tag to a series of words. This is used in error recovery, when a + tag cannot be generated. *) +let tag_to_words = function + | `Author s -> [ `Word "@author"; `Space " "; `Word s ] + | `Before s -> [ `Word "@before"; `Space " "; `Word s ] + | `Canonical s -> [ `Word "@canonical"; `Space " "; `Word s ] + | `Deprecated -> [ `Word "@deprecated" ] + | `Inline -> [ `Word "@inline" ] + | `Open -> [ `Word "@open" ] + | `Closed -> [ `Word "@closed" ] + | `Hidden -> [ `Word "@hidden" ] + | `Param s -> [ `Word "@param"; `Space " "; `Word s ] + | `Raise s -> [ `Word "@raise"; `Space " "; `Word s ] + | `Return -> [ `Word "@return" ] + | `See (`Document, s) -> [ `Word "@see"; `Space " "; `Word ("\"" ^ s ^ "\"") ] + | `See (`File, s) -> [ `Word "@see"; `Space " "; `Word ("'" ^ s ^ "'") ] + | `See (`Url, s) -> [ `Word "@see"; `Space " "; `Word ("<" ^ s ^ ">") ] + | `Since s -> [ `Word "@since"; `Space " "; `Word s ] + | `Version s -> [ `Word "@version"; `Space " "; `Word s ] + +(* {3 Block element lists} *) + +(* Consumes tokens making up a sequence of block elements. These are: + + - paragraphs, + - code blocks, + - verbatim text blocks, + - tables, + - lists, and + - section headings. *) +let rec block_element_list : + type block stops_at_which_tokens. + (block, stops_at_which_tokens) context -> + parent_markup:[< Token.t | `Comment ] -> + input -> + block with_location list + * stops_at_which_tokens with_location + * where_in_line = + fun context ~parent_markup input -> + let rec consume_block_elements : + parsed_a_tag:bool -> + where_in_line -> + block with_location list -> + block with_location list + * stops_at_which_tokens with_location + * where_in_line = + fun ~parsed_a_tag where_in_line acc -> + let describe token = + match token with + | #token_that_always_begins_an_inline_element -> "paragraph" + | _ -> Token.describe token + in + + let warn_if_after_text { Loc.location; value = token } = + if where_in_line = `After_text then + Parse_error.should_begin_on_its_own_line ~what:(describe token) location + |> add_warning input + in + + let warn_if_after_tags { Loc.location; value = token } = + if parsed_a_tag then + let suggestion = + Printf.sprintf "move %s before any tags." (Token.describe token) + in + Parse_error.not_allowed ~what:(describe token) + ~in_what:"the tags section" ~suggestion location + |> add_warning input + in + + let warn_because_not_at_top_level { Loc.location; value = token } = + let suggestion = + Printf.sprintf "move %s outside of any other markup." + (Token.print token) + in + Parse_error.not_allowed ~what:(Token.describe token) + ~in_what:(Token.describe parent_markup) + ~suggestion location + |> add_warning input + in + + match peek input with + (* Terminators: the two tokens that terminate anything. *) + | { value = `End; _ } as next_token -> ( + match context with + | Top_level -> (List.rev acc, next_token, where_in_line) + | In_shorthand_list -> (List.rev acc, next_token, where_in_line) + | In_explicit_list -> (List.rev acc, next_token, where_in_line) + | In_tag -> (List.rev acc, next_token, where_in_line) + | In_table_cell -> (List.rev acc, next_token, where_in_line) + | In_code_results -> (List.rev acc, next_token, where_in_line)) + | { value = `Right_brace; _ } as next_token -> ( + (* This little absurdity is needed to satisfy the type system. Without it, + OCaml is unable to prove that [stream_head] has the right type for all + possible values of [context]. *) + match context with + | Top_level -> (List.rev acc, next_token, where_in_line) + | In_shorthand_list -> (List.rev acc, next_token, where_in_line) + | In_explicit_list -> (List.rev acc, next_token, where_in_line) + | In_table_cell -> (List.rev acc, next_token, where_in_line) + | In_tag -> (List.rev acc, next_token, where_in_line) + | In_code_results -> + junk input; + consume_block_elements ~parsed_a_tag where_in_line acc) + | { value = `Right_code_delimiter; _ } as next_token -> ( + match context with + | In_code_results -> (List.rev acc, next_token, where_in_line) + | _ -> + junk input; + consume_block_elements ~parsed_a_tag where_in_line acc) + (* Whitespace. This can terminate some kinds of block elements. It is also + necessary to track it to interpret [`Minus] and [`Plus] correctly, as + well as to ensure that all block elements begin on their own line. *) + | { value = `Space _; _ } -> + junk input; + consume_block_elements ~parsed_a_tag where_in_line acc + | { value = `Single_newline _; _ } -> + junk input; + consume_block_elements ~parsed_a_tag `At_start_of_line acc + | { value = `Blank_line _; _ } as next_token -> ( + match context with + (* Blank lines terminate shorthand lists ([- foo]). They also terminate + paragraphs, but the paragraph parser is aware of that internally. *) + | In_shorthand_list -> (List.rev acc, next_token, where_in_line) + (* Otherwise, blank lines are pretty much like single newlines. *) + | _ -> + junk input; + consume_block_elements ~parsed_a_tag `At_start_of_line acc) + (* Explicit list items ([{li ...}] and [{- ...}]) can never appear directly + in block content. They can only appear inside [{ul ...}] and [{ol ...}]. + So, catch those. *) + | { value = `Begin_list_item _ as token; location } -> + let suggestion = + Printf.sprintf "move %s into %s, or use %s." (Token.print token) + (Token.describe (`Begin_list `Unordered)) + (Token.describe `Minus) + in + Parse_error.not_allowed ~what:(Token.describe token) + ~in_what:(Token.describe parent_markup) + ~suggestion location + |> add_warning input; + + junk input; + consume_block_elements ~parsed_a_tag where_in_line acc + (* Table rows ([{tr ...}]) can never appear directly + in block content. They can only appear inside [{table ...}]. *) + | { value = `Begin_table_row as token; location } -> + let suggestion = + Printf.sprintf "move %s into %s." (Token.print token) + (Token.describe `Begin_table_heavy) + in + Parse_error.not_allowed ~what:(Token.describe token) + ~in_what:(Token.describe parent_markup) + ~suggestion location + |> add_warning input; + junk input; + consume_block_elements ~parsed_a_tag where_in_line acc + (* Table cells ([{th ...}] and [{td ...}]) can never appear directly + in block content. They can only appear inside [{tr ...}]. *) + | { value = `Begin_table_cell _ as token; location } -> + let suggestion = + Printf.sprintf "move %s into %s." (Token.print token) + (Token.describe `Begin_table_row) + in + Parse_error.not_allowed ~what:(Token.describe token) + ~in_what:(Token.describe parent_markup) + ~suggestion location + |> add_warning input; + junk input; + consume_block_elements ~parsed_a_tag where_in_line acc + (* Tags. These can appear at the top level only. Also, once one tag is seen, + the only top-level elements allowed are more tags. *) + | { value = `Tag tag as token; location } as next_token -> ( + let recover_when_not_at_top_level context = + warn_because_not_at_top_level next_token; + junk input; + let words = List.map (Loc.at location) (tag_to_words tag) in + let paragraph = + `Paragraph words + |> accepted_in_all_contexts context + |> Loc.at location + in + consume_block_elements ~parsed_a_tag `At_start_of_line + (paragraph :: acc) + in + + match context with + (* Tags cannot make sense in an explicit list ([{ul {li ...}}]). *) + | In_explicit_list -> recover_when_not_at_top_level context + (* If a tag starts at the beginning of a line, it terminates the preceding + tag and/or the current shorthand list. In this case, return to the + caller, and let the caller decide how to interpret the tag token. *) + | In_shorthand_list -> + if where_in_line = `At_start_of_line then + (List.rev acc, next_token, where_in_line) + else recover_when_not_at_top_level context + | In_table_cell -> recover_when_not_at_top_level context + | In_tag -> + if where_in_line = `At_start_of_line then + (List.rev acc, next_token, where_in_line) + else recover_when_not_at_top_level context + | In_code_results -> recover_when_not_at_top_level context + (* If this is the top-level call to [block_element_list], parse the + tag. *) + | Top_level -> ( + if where_in_line <> `At_start_of_line then + Parse_error.should_begin_on_its_own_line + ~what:(Token.describe token) location + |> add_warning input; + + junk input; + + match tag with + | (`Author s | `Since s | `Version s | `Canonical s) as tag -> + let s = String.trim s in + if s = "" then + Parse_error.should_not_be_empty ~what:(Token.describe token) + location + |> add_warning input; + let tag = + match tag with + | `Author _ -> `Author s + | `Since _ -> `Since s + | `Version _ -> `Version s + | `Canonical _ -> + (* TODO The location is only approximate, as we need lexer + cooperation to get the real location. *) + let r_location = + Loc.nudge_start (String.length "@canonical ") location + in + `Canonical (Loc.at r_location s) + in + + let tag = Loc.at location (`Tag tag) in + consume_block_elements ~parsed_a_tag:true `After_text + (tag :: acc) + | (`Deprecated | `Return) as tag -> + let content, _stream_head, where_in_line = + block_element_list In_tag ~parent_markup:token input + in + let tag = + match tag with + | `Deprecated -> `Deprecated content + | `Return -> `Return content + in + let location = + location :: List.map Loc.location content |> Loc.span + in + let tag = Loc.at location (`Tag tag) in + consume_block_elements ~parsed_a_tag:true where_in_line + (tag :: acc) + | (`Param _ | `Raise _ | `Before _) as tag -> + let content, _stream_head, where_in_line = + block_element_list In_tag ~parent_markup:token input + in + let tag = + match tag with + | `Param s -> `Param (s, content) + | `Raise s -> `Raise (s, content) + | `Before s -> `Before (s, content) + in + let location = + location :: List.map Loc.location content |> Loc.span + in + let tag = Loc.at location (`Tag tag) in + consume_block_elements ~parsed_a_tag:true where_in_line + (tag :: acc) + | `See (kind, target) -> + let content, _next_token, where_in_line = + block_element_list In_tag ~parent_markup:token input + in + let location = + location :: List.map Loc.location content |> Loc.span + in + let tag = `Tag (`See (kind, target, content)) in + let tag = Loc.at location tag in + consume_block_elements ~parsed_a_tag:true where_in_line + (tag :: acc) + | (`Inline | `Open | `Closed | `Hidden) as tag -> + let tag = Loc.at location (`Tag tag) in + consume_block_elements ~parsed_a_tag:true `After_text + (tag :: acc))) + | ( { value = #token_that_always_begins_an_inline_element; _ } + | { value = `Bar; _ } ) as next_token -> + warn_if_after_tags next_token; + warn_if_after_text next_token; + + let block = paragraph input in + let block = Loc.map (accepted_in_all_contexts context) block in + let acc = block :: acc in + consume_block_elements ~parsed_a_tag `After_text acc + | { value = `Verbatim s as token; location } as next_token -> + warn_if_after_tags next_token; + warn_if_after_text next_token; + if s = "" then + Parse_error.should_not_be_empty ~what:(Token.describe token) location + |> add_warning input; + + junk input; + let block = accepted_in_all_contexts context token in + let block = Loc.at location block in + let acc = block :: acc in + consume_block_elements ~parsed_a_tag `After_text acc + | { value = `Math_block s as token; location } as next_token -> + warn_if_after_tags next_token; + warn_if_after_text next_token; + if s = "" then + Parse_error.should_not_be_empty ~what:(Token.describe token) location + |> add_warning input; + + junk input; + let block = accepted_in_all_contexts context token in + let block = Loc.at location block in + let acc = block :: acc in + consume_block_elements ~parsed_a_tag `After_text acc + | { + value = + `Code_block (meta, delim, { value = s; location = v_loc }, has_outputs) + as token; + location; + } as next_token -> + warn_if_after_tags next_token; + warn_if_after_text next_token; + junk input; + let delimiter = if delim = "" then None else Some delim in + let output, location = + if not has_outputs then (None, location) + else + let content, next_token, _where_in_line = + block_element_list In_code_results ~parent_markup:token input + in + junk input; + let locations = + location :: List.map (fun content -> content.Loc.location) content + in + let location = Loc.span locations in + let location = { location with end_ = next_token.location.end_ } in + (Some content, location) + in + + if s = "" then + Parse_error.should_not_be_empty ~what:(Token.describe token) location + |> add_warning input; + + let meta = + match meta with + | None -> None + | Some (language, tags) -> Some { Ast.language; tags } + in + let block = + accepted_in_all_contexts context + (`Code_block + { + Ast.meta; + delimiter; + content = { value = s; location = v_loc }; + output; + }) + in + let block = Loc.at location block in + let acc = block :: acc in + consume_block_elements ~parsed_a_tag `After_text acc + | { value = `Modules s as token; location } as next_token -> + warn_if_after_tags next_token; + warn_if_after_text next_token; + + junk input; + + (* TODO Use some library for a splitting function, or move this out into a + Util module. *) + let split_string delimiters s = + let rec scan_delimiters acc index = + if index >= String.length s then List.rev acc + else if String.contains delimiters s.[index] then + scan_delimiters acc (index + 1) + else scan_word acc index (index + 1) + and scan_word acc start_index index = + if index >= String.length s then + let word = String.sub s start_index (index - start_index) in + List.rev (word :: acc) + else if String.contains delimiters s.[index] then + let word = String.sub s start_index (index - start_index) in + scan_delimiters (word :: acc) (index + 1) + else scan_word acc start_index (index + 1) + in + + scan_delimiters [] 0 + in + + (* TODO Correct locations await a full implementation of {!modules} + parsing. *) + let modules = + split_string " \t\r\n" s |> List.map (fun r -> Loc.at location r) + in + + if modules = [] then + Parse_error.should_not_be_empty ~what:(Token.describe token) location + |> add_warning input; + + let block = accepted_in_all_contexts context (`Modules modules) in + let block = Loc.at location block in + let acc = block :: acc in + consume_block_elements ~parsed_a_tag `After_text acc + | { value = `Begin_list kind as token; location } as next_token -> + warn_if_after_tags next_token; + warn_if_after_text next_token; + + junk input; + + let items, brace_location = + explicit_list_items ~parent_markup:token input + in + if items = [] then + Parse_error.should_not_be_empty ~what:(Token.describe token) location + |> add_warning input; + + let location = Loc.span [ location; brace_location ] in + let block = `List (kind, `Heavy, items) in + let block = accepted_in_all_contexts context block in + let block = Loc.at location block in + let acc = block :: acc in + consume_block_elements ~parsed_a_tag `After_text acc + | { value = (`Begin_table_light | `Begin_table_heavy) as token; location } + as next_token -> + warn_if_after_tags next_token; + warn_if_after_text next_token; + junk input; + let block, brace_location = + let parent_markup = token in + let parent_markup_location = location in + match token with + | `Begin_table_light -> + light_table input ~parent_markup ~parent_markup_location + | `Begin_table_heavy -> + heavy_table input ~parent_markup ~parent_markup_location + in + let location = Loc.span [ location; brace_location ] in + let block = accepted_in_all_contexts context (`Table block) in + let block = Loc.at location block in + let acc = block :: acc in + consume_block_elements ~parsed_a_tag `After_text acc + | { value = (`Minus | `Plus) as token; location } as next_token -> ( + (match where_in_line with + | `After_text | `After_shorthand_bullet -> + Parse_error.should_begin_on_its_own_line + ~what:(Token.describe token) location + |> add_warning input + | _ -> ()); + + warn_if_after_tags next_token; + + match context with + | In_shorthand_list -> (List.rev acc, next_token, where_in_line) + | _ -> + let items, where_in_line = + shorthand_list_items next_token where_in_line input + in + let kind = + match token with `Minus -> `Unordered | `Plus -> `Ordered + in + let location = + location :: List.map Loc.location (List.flatten items) |> Loc.span + in + let block = `List (kind, `Light, items) in + let block = accepted_in_all_contexts context block in + let block = Loc.at location block in + let acc = block :: acc in + consume_block_elements ~parsed_a_tag where_in_line acc) + | { value = `Begin_section_heading (level, label) as token; location } as + next_token -> ( + warn_if_after_tags next_token; + + let recover_when_not_at_top_level context = + warn_because_not_at_top_level next_token; + junk input; + let content, brace_location = + delimited_inline_element_list ~parent_markup:token + ~parent_markup_location:location ~requires_leading_whitespace:true + input + in + let location = Loc.span [ location; brace_location ] in + let paragraph = + `Paragraph content + |> accepted_in_all_contexts context + |> Loc.at location + in + consume_block_elements ~parsed_a_tag `At_start_of_line + (paragraph :: acc) + in + + match context with + | In_shorthand_list -> + if where_in_line = `At_start_of_line then + (List.rev acc, next_token, where_in_line) + else recover_when_not_at_top_level context + | In_explicit_list -> recover_when_not_at_top_level context + | In_table_cell -> recover_when_not_at_top_level context + | In_tag -> recover_when_not_at_top_level context + | In_code_results -> recover_when_not_at_top_level context + | Top_level -> + if where_in_line <> `At_start_of_line then + Parse_error.should_begin_on_its_own_line + ~what:(Token.describe token) location + |> add_warning input; + + let label = + match label with + | Some "" -> + Parse_error.should_not_be_empty ~what:"heading label" location + |> add_warning input; + None + | _ -> label + in + + junk input; + + let content, brace_location = + delimited_inline_element_list ~parent_markup:token + ~parent_markup_location:location + ~requires_leading_whitespace:true input + in + if content = [] then + Parse_error.should_not_be_empty ~what:(Token.describe token) + location + |> add_warning input; + + let location = Loc.span [ location; brace_location ] in + let heading = `Heading (level, label, content) in + let heading = Loc.at location heading in + let acc = heading :: acc in + consume_block_elements ~parsed_a_tag `After_text acc) + | { value = `Begin_paragraph_style _ as token; location } -> + junk input; + let content, brace_location = + delimited_inline_element_list ~parent_markup:token + ~parent_markup_location:location ~requires_leading_whitespace:true + input + in + let location = Loc.span [ location; brace_location ] in + + Parse_error.markup_should_not_be_used ~what:(Token.describe token) + location + |> add_warning input; + + let paragraph = + `Paragraph content + |> accepted_in_all_contexts context + |> Loc.at location + in + consume_block_elements ~parsed_a_tag `At_start_of_line (paragraph :: acc) + in + + let where_in_line = + match context with + | Top_level -> `At_start_of_line + | In_shorthand_list -> `After_shorthand_bullet + | In_explicit_list -> `After_explicit_list_bullet + | In_table_cell -> `After_table_cell + | In_code_results -> `After_tag + | In_tag -> `After_tag + in + + consume_block_elements ~parsed_a_tag:false where_in_line [] + +(* {3 Lists} *) + +(* Consumes a sequence of implicit list items. Each one consists of a [`Minus] + or [`Plus] token, followed by block elements until: + + - a blank line, or + - a list bullet of the opposite kind (e.g. [`Plus] for a [`Minus] list). + + This function is called when the next token is known to be [`Minus] or + [`Plus]. It consumes that token, and calls the block element parser (see + above). That parser returns to [implicit_list_items] only on [`Blank_line], + [`End], [`Minus] or [`Plus] at the start of a line, or [`Right_brace]. *) +and shorthand_list_items : + [ `Minus | `Plus ] with_location -> + where_in_line -> + input -> + Ast.nestable_block_element with_location list list * where_in_line = + fun first_token where_in_line input -> + let bullet_token = first_token.value in + + let rec consume_list_items : + [> ] with_location -> + where_in_line -> + Ast.nestable_block_element with_location list list -> + Ast.nestable_block_element with_location list list * where_in_line = + fun next_token where_in_line acc -> + match next_token.value with + | `End | `Right_brace | `Blank_line _ | `Tag _ | `Begin_section_heading _ -> + (List.rev acc, where_in_line) + | (`Minus | `Plus) as bullet -> + if bullet = bullet_token then ( + junk input; + + let content, stream_head, where_in_line = + block_element_list In_shorthand_list ~parent_markup:bullet input + in + if content = [] then + Parse_error.should_not_be_empty ~what:(Token.describe bullet) + next_token.location + |> add_warning input; + + let acc = content :: acc in + consume_list_items stream_head where_in_line acc) + else (List.rev acc, where_in_line) + in + + consume_list_items + (first_token :> stopped_implicitly with_location) + where_in_line [] + +(* Consumes a sequence of explicit list items (starting with '{li ...}' and + '{-...}', which are represented by [`Begin_list_item _] tokens). + + This function is called immediately after '{ul' or '{ol' ([`Begin_list _]) is + read. The only "valid" way to exit is by reading a [`Right_brace] token, + which is consumed. + + Whitespace inside the list, but outside list items, is not significant – this + parsing function consumes all of it. Otherwise, only list item start tokens + are accepted. Everything else is an error. *) +and explicit_list_items : + parent_markup:[< Token.t ] -> + input -> + Ast.nestable_block_element with_location list list * Loc.span = + fun ~parent_markup input -> + let rec consume_list_items : + Ast.nestable_block_element with_location list list -> + Ast.nestable_block_element with_location list list * Loc.span = + fun acc -> + let next_token = peek input in + match next_token.value with + | `End -> + Parse_error.not_allowed next_token.location ~what:(Token.describe `End) + ~in_what:(Token.describe parent_markup) + |> add_warning input; + (List.rev acc, next_token.location) + | `Right_brace -> + junk input; + (List.rev acc, next_token.location) + | `Space _ | `Single_newline _ | `Blank_line _ -> + junk input; + consume_list_items acc + | `Begin_list_item kind as token -> + junk input; + + (* '{li', represented by [`Begin_list_item `Li], must be followed by + whitespace. *) + (if kind = `Li then + match (peek input).value with + | `Space _ | `Single_newline _ | `Blank_line _ | `Right_brace -> + () + (* The presence of [`Right_brace] above requires some explanation: + + - It is better to be silent about missing whitespace if the next + token is [`Right_brace], because the error about an empty list + item will be generated below, and that error is more important to + the user. + - The [`Right_brace] token also happens to include all whitespace + before it, as a convenience for the rest of the parser. As a + result, not ignoring it could be wrong: there could in fact be + whitespace in the concrete syntax immediately after '{li', just + it is not represented as [`Space], [`Single_newline], or + [`Blank_line]. *) + | _ -> + Parse_error.should_be_followed_by_whitespace next_token.location + ~what:(Token.print token) + |> add_warning input); + + let content, token_after_list_item, _where_in_line = + block_element_list In_explicit_list ~parent_markup:token input + in + + if content = [] then + Parse_error.should_not_be_empty next_token.location + ~what:(Token.describe token) + |> add_warning input; + + (match token_after_list_item.value with + | `Right_brace -> junk input + | `End -> + Parse_error.not_allowed token_after_list_item.location + ~what:(Token.describe `End) ~in_what:(Token.describe token) + |> add_warning input); + + let acc = content :: acc in + consume_list_items acc + | token -> + let suggestion = + match token with + | `Begin_section_heading _ | `Tag _ -> + Printf.sprintf "move %s outside the list." (Token.describe token) + | _ -> + Printf.sprintf "move %s into a list item, %s or %s." + (Token.describe token) + (Token.print (`Begin_list_item `Li)) + (Token.print (`Begin_list_item `Dash)) + in + Parse_error.not_allowed next_token.location ~what:(Token.describe token) + ~in_what:(Token.describe parent_markup) + ~suggestion + |> add_warning input; + + junk input; + consume_list_items acc + in + + consume_list_items [] + +(* Consumes a sequence of table rows that might start with [`Bar]. + + This function is called immediately after '{t' ([`Begin_table `Light]) is + read. The only "valid" way to exit is by reading a [`Right_brace] token, + which is consumed. *) +and light_table ~parent_markup ~parent_markup_location input = + let rec consume_rows acc ~last_loc = + Reader.until_rbrace input acc >>> fun next_token -> + match next_token.Loc.value with + | `Bar | #token_that_always_begins_an_inline_element -> ( + let next, row, last_loc = + light_table_row ~parent_markup ~last_loc input + in + match next with + | `Continue -> consume_rows (row :: acc) ~last_loc + | `Stop -> (row :: acc, last_loc)) + | other_token -> + Parse_error.not_allowed next_token.location + ~what:(Token.describe other_token) + ~in_what:(Token.describe parent_markup) + |> add_warning input; + junk input; + consume_rows acc ~last_loc + in + let rows, brace_location = consume_rows [] ~last_loc:parent_markup_location in + let grid = List.rev rows in + (Table.Light_syntax.from_raw_data grid, brace_location) + +(* Consumes a table row that might start with [`Bar]. *) +and light_table_row ~parent_markup ~last_loc input = + let rec consume_row acc_row acc_cell acc_space ~new_line ~last_loc = + let push_cells row cell = + match cell with [] -> row | _ -> List.rev cell :: row + in + let return row cell = List.rev (push_cells row cell) in + let next_token = peek input in + match next_token.value with + | `Right_brace -> + junk input; + (`Stop, return acc_row acc_cell, next_token.location) + | `Space _ as token -> + junk input; + let i = Loc.at next_token.location token in + consume_row acc_row acc_cell (i :: acc_space) ~new_line ~last_loc + | `Single_newline _ | `Blank_line _ -> + junk input; + (`Continue, return acc_row acc_cell, last_loc) + | `Bar -> + junk input; + let acc_row = if new_line then [] else List.rev acc_cell :: acc_row in + consume_row acc_row [] [] ~new_line:false ~last_loc + | #token_that_always_begins_an_inline_element as token -> + let i = inline_element input next_token.location token in + if Loc.spans_multiple_lines i then + Parse_error.not_allowed + ~what:(Token.describe (`Single_newline "")) + ~in_what:(Token.describe `Begin_table_light) + i.location + |> add_warning input; + let acc_cell = + if acc_cell = [] then [ i ] else (i :: acc_space) @ acc_cell + in + consume_row acc_row acc_cell [] ~new_line:false + ~last_loc:next_token.location + | other_token -> + Parse_error.not_allowed next_token.location + ~what:(Token.describe other_token) + ~in_what:(Token.describe parent_markup) + |> add_warning input; + junk input; + consume_row acc_row acc_cell acc_space ~new_line ~last_loc + in + consume_row [] [] [] ~new_line:true ~last_loc + +(* Consumes a sequence of table rows (starting with '{tr ...}', which are + represented by [`Begin_table_row] tokens). + + This function is called immediately after '{table' ([`Begin_table `Heavy]) is + read. The only "valid" way to exit is by reading a [`Right_brace] token, + which is consumed. *) +and heavy_table ~parent_markup ~parent_markup_location input = + let rec consume_rows acc ~last_loc = + Reader.until_rbrace input acc >>> fun next_token -> + match next_token.Loc.value with + | `Begin_table_row as token -> + junk input; + let items, last_loc = heavy_table_row ~parent_markup:token input in + consume_rows (List.rev items :: acc) ~last_loc + | token -> + Parse_error.not_allowed next_token.location ~what:(Token.describe token) + ~in_what:(Token.describe parent_markup) + ~suggestion:"Move outside of {table ...}, or inside {tr ...}" + |> add_warning input; + junk input; + consume_rows acc ~last_loc + in + let rows, brace_location = consume_rows [] ~last_loc:parent_markup_location in + let grid = List.rev rows in + (Table.Heavy_syntax.from_grid grid, brace_location) + +(* Consumes a sequence of table cells (starting with '{th ...}' or '{td ... }', + which are represented by [`Begin_table_cell] tokens). + + This function is called immediately after '{tr' ([`Begin_table_row]) is + read. The only "valid" way to exit is by reading a [`Right_brace] token, + which is consumed. *) +and heavy_table_row ~parent_markup input = + let rec consume_cell_items acc = + Reader.until_rbrace input acc >>> fun next_token -> + match next_token.Loc.value with + | `Begin_table_cell kind as token -> + junk input; + let content, token_after_list_item, _where_in_line = + block_element_list In_table_cell ~parent_markup:token input + in + (match token_after_list_item.value with + | `Right_brace -> junk input + | `End -> + Parse_error.not_allowed token_after_list_item.location + ~what:(Token.describe `End) ~in_what:(Token.describe token) + |> add_warning input); + consume_cell_items ((content, kind) :: acc) + | token -> + Parse_error.not_allowed next_token.location ~what:(Token.describe token) + ~in_what:(Token.describe parent_markup) + ~suggestion: + "Move outside of {table ...}, or inside {td ...} or {th ...}" + |> add_warning input; + junk input; + consume_cell_items acc + in + consume_cell_items [] + +(* {2 Entry point} *) + +let parse warnings tokens = + let input : input = { tokens; warnings } in + + let rec parse_block_elements () = + let elements, last_token, _where_in_line = + block_element_list Top_level ~parent_markup:`Comment input + in + + match last_token.value with + | `End -> elements + | `Right_brace -> + Parse_error.unpaired_right_brace last_token.location + |> add_warning input; + + let block = + Loc.same last_token (`Paragraph [ Loc.same last_token (`Word "}") ]) + in + + junk input; + elements @ (block :: parse_block_elements ()) + in + let ast = parse_block_elements () in + (ast, List.rev !(input.warnings)) diff --git a/vendor/odoc-parser/src/syntax.mli b/vendor/odoc-parser/src/syntax.mli new file mode 100644 index 000000000..a40b69841 --- /dev/null +++ b/vendor/odoc-parser/src/syntax.mli @@ -0,0 +1,6 @@ +(* Internal module, not exposed *) + +val parse : + Warning.t list ref -> + Token.t Loc.with_location Stream.t -> + Ast.t * Warning.t list diff --git a/vendor/odoc-parser/src/token.ml b/vendor/odoc-parser/src/token.ml new file mode 100644 index 000000000..83181fe45 --- /dev/null +++ b/vendor/odoc-parser/src/token.ml @@ -0,0 +1,194 @@ +(* This module contains the token type, emitted by the lexer, and consumed by + the comment syntax parser. It also contains two functions that format tokens + for error messages. *) + +type section_heading = [ `Begin_section_heading of int * string option ] +type style = [ `Bold | `Italic | `Emphasis | `Superscript | `Subscript ] +type paragraph_style = [ `Left | `Center | `Right ] + +type tag = + [ `Tag of + [ `Author of string + | `Deprecated + | `Param of string + | `Raise of string + | `Return + | `See of [ `Url | `File | `Document ] * string + | `Since of string + | `Before of string + | `Version of string + | `Canonical of string + | `Inline + | `Open + | `Closed + | `Hidden ] ] + +type t = + [ (* End of input. *) + `End + | (* Runs of whitespace. [Blank_line] is any run of whitespace that contains two + or more newline characters. [Single_newline] is any run of whitespace that + contains exactly one newline character. [Space] is any run of whitespace + that contains no newline characters. + + It is an important invariant in the parser that no adjacent whitespace + tokens are emitted by the lexer. Otherwise, there would be the need for + unbounded lookahead, a (co-?)ambiguity between + [Single_newline Single_newline] and [Blank_line], and other problems. *) + `Space of + string + | `Single_newline of string + | `Blank_line of string + | (* A right curly brace ([}]), i.e. end of markup. *) + `Right_brace + | `Right_code_delimiter + | (* Words are anything that is not whitespace or markup. Markup symbols can be + be part of words if escaped. + + Words can contain plus and minus symbols, but those are emitted as [Plus] + and [Minus] tokens. The parser combines plus and minus into words, except + when they appear first on a line, in which case the tokens are list item + bullets. *) + `Word of + string + | `Code_span of string + | `Raw_markup of string option * string + | `Math_span of string + | `Math_block of string + | `Begin_style of style + | `Begin_paragraph_style of paragraph_style + | (* Other inline element markup. *) + `Simple_reference of string + | `Begin_reference_with_replacement_text of string + | `Simple_link of string + | `Begin_link_with_replacement_text of string + | (* Leaf block element markup. *) + `Code_block of + (string Loc.with_location * string Loc.with_location option) option + * string + * string Loc.with_location + * bool + | `Verbatim of string + | `Modules of string + | (* List markup. *) + `Begin_list of [ `Unordered | `Ordered ] + | `Begin_list_item of [ `Li | `Dash ] + | (* Table markup. *) + `Begin_table_light + | `Begin_table_heavy + | `Begin_table_row + | `Begin_table_cell of [ `Header | `Data ] + | `Minus + | `Plus + | `Bar + | section_heading + | tag ] + +let print : [< t ] -> string = function + | `Begin_paragraph_style `Left -> "'{L'" + | `Begin_paragraph_style `Center -> "'{C'" + | `Begin_paragraph_style `Right -> "'{R'" + | `Begin_style `Bold -> "'{b'" + | `Begin_style `Italic -> "'{i'" + | `Begin_style `Emphasis -> "'{e'" + | `Begin_style `Superscript -> "'{^'" + | `Begin_style `Subscript -> "'{_'" + | `Begin_reference_with_replacement_text _ -> "'{{!'" + | `Begin_link_with_replacement_text _ -> "'{{:'" + | `Begin_list_item `Li -> "'{li ...}'" + | `Begin_list_item `Dash -> "'{- ...}'" + | `Begin_table_light -> "{t" + | `Begin_table_heavy -> "{table" + | `Begin_table_row -> "'{tr'" + | `Begin_table_cell `Header -> "'{th'" + | `Begin_table_cell `Data -> "'{td'" + | `Minus -> "'-'" + | `Plus -> "'+'" + | `Bar -> "'|'" + | `Begin_section_heading (level, label) -> + let label = match label with None -> "" | Some label -> ":" ^ label in + Printf.sprintf "'{%i%s'" level label + | `Tag (`Author _) -> "'@author'" + | `Tag `Deprecated -> "'@deprecated'" + | `Tag (`Param _) -> "'@param'" + | `Tag (`Raise _) -> "'@raise'" + | `Tag `Return -> "'@return'" + | `Tag (`See _) -> "'@see'" + | `Tag (`Since _) -> "'@since'" + | `Tag (`Before _) -> "'@before'" + | `Tag (`Version _) -> "'@version'" + | `Tag (`Canonical _) -> "'@canonical'" + | `Tag `Inline -> "'@inline'" + | `Tag `Open -> "'@open'" + | `Tag `Closed -> "'@closed'" + | `Tag `Hidden -> "'@hidden" + | `Raw_markup (None, _) -> "'{%...%}'" + | `Raw_markup (Some target, _) -> "'{%" ^ target ^ ":...%}'" + +(* [`Minus] and [`Plus] are interpreted as if they start list items. Therefore, + for error messages based on [Token.describe] to be accurate, formatted + [`Minus] and [`Plus] should always be plausibly list item bullets. *) +let describe : [< t | `Comment ] -> string = function + | `Word w -> Printf.sprintf "'%s'" w + | `Code_span _ -> "'[...]' (code)" + | `Raw_markup _ -> "'{%...%}' (raw markup)" + | `Begin_paragraph_style `Left -> "'{L ...}' (left alignment)" + | `Begin_paragraph_style `Center -> "'{C ...}' (center alignment)" + | `Begin_paragraph_style `Right -> "'{R ...}' (right alignment)" + | `Begin_style `Bold -> "'{b ...}' (boldface text)" + | `Begin_style `Italic -> "'{i ...}' (italic text)" + | `Begin_style `Emphasis -> "'{e ...}' (emphasized text)" + | `Begin_style `Superscript -> "'{^...}' (superscript)" + | `Begin_style `Subscript -> "'{_...}' (subscript)" + | `Math_span _ -> "'{m ...}' (math span)" + | `Math_block _ -> "'{math ...}' (math block)" + | `Simple_reference _ -> "'{!...}' (cross-reference)" + | `Begin_reference_with_replacement_text _ -> + "'{{!...} ...}' (cross-reference)" + | `Simple_link _ -> "'{:...} (external link)'" + | `Begin_link_with_replacement_text _ -> "'{{:...} ...}' (external link)" + | `End -> "end of text" + | `Space _ -> "whitespace" + | `Single_newline _ -> "line break" + | `Blank_line _ -> "blank line" + | `Right_brace -> "'}'" + | `Right_code_delimiter -> "']}'" + | `Code_block _ -> "'{[...]}' (code block)" + | `Verbatim _ -> "'{v ... v}' (verbatim text)" + | `Modules _ -> "'{!modules ...}'" + | `Begin_list `Unordered -> "'{ul ...}' (bulleted list)" + | `Begin_list `Ordered -> "'{ol ...}' (numbered list)" + | `Begin_list_item `Li -> "'{li ...}' (list item)" + | `Begin_list_item `Dash -> "'{- ...}' (list item)" + | `Begin_table_light -> "'{t ...}' (table)" + | `Begin_table_heavy -> "'{table ...}' (table)" + | `Begin_table_row -> "'{tr ...}' (table row)" + | `Begin_table_cell `Header -> "'{th ... }' (table header cell)" + | `Begin_table_cell `Data -> "'{td ... }' (table data cell)" + | `Minus -> "'-' (bulleted list item)" + | `Plus -> "'+' (numbered list item)" + | `Bar -> "'|'" + | `Begin_section_heading (level, _) -> + Printf.sprintf "'{%i ...}' (section heading)" level + | `Tag (`Author _) -> "'@author'" + | `Tag `Deprecated -> "'@deprecated'" + | `Tag (`Param _) -> "'@param'" + | `Tag (`Raise _) -> "'@raise'" + | `Tag `Return -> "'@return'" + | `Tag (`See _) -> "'@see'" + | `Tag (`Since _) -> "'@since'" + | `Tag (`Before _) -> "'@before'" + | `Tag (`Version _) -> "'@version'" + | `Tag (`Canonical _) -> "'@canonical'" + | `Tag `Inline -> "'@inline'" + | `Tag `Open -> "'@open'" + | `Tag `Closed -> "'@closed'" + | `Tag `Hidden -> "'@hidden" + | `Comment -> "top-level text" + +let describe_element = function + | `Reference (`Simple, _, _) -> describe (`Simple_reference "") + | `Reference (`With_text, _, _) -> + describe (`Begin_reference_with_replacement_text "") + | `Link _ -> describe (`Begin_link_with_replacement_text "") + | `Heading (level, _, _) -> describe (`Begin_section_heading (level, None)) diff --git a/vendor/odoc-parser/src/warning.ml b/vendor/odoc-parser/src/warning.ml new file mode 100644 index 000000000..61dd987f8 --- /dev/null +++ b/vendor/odoc-parser/src/warning.ml @@ -0,0 +1,29 @@ +type t = { location : Loc.span; message : string } + +let to_string e = + let { location; message } = e in + let location_string = + if location.start.line = location.end_.line then + Printf.sprintf "line %i, characters %i-%i" location.start.line + location.start.column location.end_.column + else + Printf.sprintf "line %i, character %i to line %i, character %i" + location.start.line location.start.column location.end_.line + location.end_.column + in + Printf.sprintf "File \"%s\", %s:\n%s" location.file location_string message + +let pp fmt v = Format.fprintf fmt "%s" (to_string v) + +let kasprintf k fmt = + Format.(kfprintf (fun _ -> k (flush_str_formatter ())) str_formatter fmt) + +let kmake k ?suggestion = + kasprintf (fun message -> + match suggestion with + | None -> k message + | Some suggestion -> k (message ^ "\nSuggestion: " ^ suggestion)) + +let make ?suggestion format = + let k message location = { location; message } in + kmake k ?suggestion format diff --git a/vendor/update-odoc-parser.sh b/vendor/update-odoc-parser.sh new file mode 100755 index 000000000..364dcfa61 --- /dev/null +++ b/vendor/update-odoc-parser.sh @@ -0,0 +1,26 @@ +#!/bin/bash + +version=v2.3.0 + +set -e -o pipefail + +TMP="$(mktemp -d)" +trap "rm -rf $TMP" EXIT + +rm -rf odoc-parser +mkdir -p odoc-parser/src + +( + cd $TMP + git clone https://github.com/ocaml/odoc.git + cd odoc + git checkout $version +) + +SRC=$TMP/odoc + +cp -v $SRC/src/parser/*.{ml,mli,mll} odoc-parser/src +cp -v $SRC/LICENSE odoc-parser/ + +git checkout odoc-parser/src/dune +git add -A .