Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Compatibility with odoc-parser.2.3.0 #1184

Merged
merged 4 commits into from
Oct 22, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@

## Features

- Compatibility with Odoc 2.3.0, with support for the introduced syntax: tables,
and "codeblock output" (#1184)

- Display text of references in doc strings (#1166)

- Add mark/remove unused actions for open, types, for loop indexes, modules,
Expand Down
3 changes: 2 additions & 1 deletion dune-project
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,8 @@ possible and does not make any assumptions about IO.
ordering
dune-build-info
spawn
(odoc-parser (and (>= 2.0.0) (< 2.3.0)))
astring
camlp-streams
(ppx_expect (and (>= v0.15.0) :with-test))
(ocamlformat (and :with-test (= 0.24.1)))
(ocamlc-loc (>= 3.7.0))
Expand Down
6 changes: 4 additions & 2 deletions flake.nix
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,8 @@
ppx_yojson_conv_lib
uutf
lsp
odoc-parser
astring
camlp-streams
merlin-lib
];
doCheck = false;
Expand All @@ -110,7 +111,8 @@
duneVersion = "3";
buildInputs = with pkgs.ocamlPackages; [
ocamlc-loc
odoc-parser
astring
camlp-streams
dune-build-info
re
dune-rpc
Expand Down
3 changes: 2 additions & 1 deletion ocaml-lsp-server.opam
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,8 @@ depends: [
"ordering"
"dune-build-info"
"spawn"
"odoc-parser" {>= "2.0.0" & < "2.3.0"}
"astring"
"camlp-streams"
"ppx_expect" {>= "v0.15.0" & with-test}
"ocamlformat" {with-test & = "0.24.1"}
"ocamlc-loc" {>= "3.7.0"}
Expand Down
117 changes: 114 additions & 3 deletions ocaml-lsp-server/src/doc_to_md.ml
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,41 @@ let rec nestable_block_element_to_block
let paragraph = Block.Paragraph.make inline in
let meta = loc_to_meta location in
Block.Paragraph (paragraph, meta)
| { value = `Table ((grid, alignment), _); location } ->
let meta = loc_to_meta location in
let cell
((c, _) : Odoc_parser.Ast.nestable_block_element Odoc_parser.Ast.cell) =
let c = nestable_block_element_list_to_inlines c in
(c, (" ", " ") (* Initial and trailing blanks *))
in
let header_row
(row : Odoc_parser.Ast.nestable_block_element Odoc_parser.Ast.row) =
let row = List.map ~f:cell row in
((`Header row, Meta.none), "")
in
let data_row
(row : Odoc_parser.Ast.nestable_block_element Odoc_parser.Ast.row) =
let row = List.map ~f:cell row in
((`Data row, Meta.none), "")
in
let alignment_row =
match alignment with
| None -> []
| Some alignment ->
let alignment =
List.map
~f:(fun x -> ((x, 1 (* nb of separator *)), Meta.none))
alignment
in
[ ((`Sep alignment, Meta.none), "") ]
in
let rows =
match grid with
| [] -> assert false
| h :: t -> (header_row h :: alignment_row) @ List.map ~f:data_row t
in
let tbl = Block.Table.make rows in
Block.Ext_table (tbl, meta)
| { value = `List (kind, style, xs); location } ->
let type' =
match kind with
Expand Down Expand Up @@ -140,19 +175,31 @@ let rec nestable_block_element_to_block
let l = Block.List'.make ~tight type' list_items in
let meta = loc_to_meta location in
Block.List (l, meta)
| { value = `Code_block (metadata, { value = code; location = code_loc })
| { value =
`Code_block
{ meta = metadata
; delimiter = _
; content = { value = code; location = code_loc }
; output
}
; location
} ->
let info_string =
match metadata with
| None -> Some ("ocaml", loc_to_meta code_loc)
| Some ({ value = lang; location = lang_log }, _env) ->
| Some { language = { value = lang; location = lang_log }; tags = _ } ->
Some (lang, loc_to_meta lang_log)
in
let block_line = Block_line.list_of_string code in
let code_block = Block.Code_block.make ?info_string block_line in
let meta = loc_to_meta location in
Block.Code_block (code_block, meta)
let main_block = Block.Code_block (code_block, meta) in
let output_block =
match output with
| None -> []
| Some output -> [ nestable_block_element_list_to_block output ]
in
Block.Blocks (main_block :: output_block, meta)
| { value = `Verbatim code; location } ->
let info_string = Some ("verb", Meta.none) in
let block_line = Block_line.list_of_string code in
Expand All @@ -165,6 +212,68 @@ let rec nestable_block_element_to_block
let meta = loc_to_meta location in
Block.Ext_math_block (code_block, meta)

and nestable_block_element_to_inlines
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This function might be a lot of maintenance and hurts the content.
Would it be reasonable to use raw HTML tags for tables ?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree that this function is not ideal...

An advantage of not using proper table compared to raw html is that the output might be more readable even without translating it to html. Also, we get cmarkit type guaranty.

Raw html tags could be an idea, but their start-end semantics not that simple (https://spec.commonmark.org/0.30/#html-blocks).

In particular, it seems to break out of the containing block. For instance,

- first item
  + first subitem
  + second subitem

renders by github as:

  • first item
    • first subitem
    • second subitem

while with html inside (the blank line is the end condition fot the uninterpreted html)

- first item
  + first subitem
<table><tr><td>

useless table
</td></tr></table>

  + second subitem

renders as

  • first item
    • first subitem

useless table

  • second subitem

@voodoos would you like me to investigate the "raw html tag" solution?

Copy link
Collaborator

@voodoos voodoos Oct 16, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In particular, it seems to break out of the containing block

So in the case of tables the content would break out of the table ?
It looks like raw html blocks are only intended to be used at the top level...

Would using only raw html (for the entire table, not only the cell content) be an option ?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So in the case of tables the content would break out of the table ?

What I meant is that the whole table would break out of its container block (recall that all of this is because, in odoc, tables are allowed to be nested in blocks, while it is not allowed in markdown).

For instance, in the example above, I had in mind a table inside a list (which is possible in odoc).

Julow suggested (IIUC) to render in this case the table, not by using markdown tables, but by embedding raw <table>/<tr>/<td> before the table/line/cell, and the corresponding closing tag after.
The markdown example above was thus the translation of a (nested) list containing a table, using this method.

However, we can see, from how github renders it, that in fact the table is not interpreted as inside the list, but "toplevel", and thus also breaks the rest of the list.
(I included a nested list to make it more visible that the included html breaks the flow of the list interpretation, but it might have created noise!)

It seems that cmarkit type system won't enforce constraint on raw html embedding, which is frightening!

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So that's not suitable since it would also break more visibly things like numbered lists (see below).
So far I think the more convincing workaround is your current proposition.

- first item
  1. first subitem
<table><tr><td>

useless table
</td></tr></table>

  2. second subitem
  • first item
    1. first subitem

useless table

  1. second subitem

Copy link
Collaborator Author

@panglesd panglesd Oct 17, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Update: The example was wrong, it is in fact possible to have html inside list items in commonmark.

(With the help of cmarkit) I could generate the right markdown for the example above:

So:

1. first item
   1. first subitem
      <table><tr><td>
      
      useless table
      </td></tr></table>
      
   2. second subitem

renders as

  1. first item
    1. first subitem

      useless table

    2. second subitem

So I think the suggestion to use html is still valid, and the discussion open again. Speaking of tables, let me use one to summarize the differences:

Current approach Html embedding
Source 🟠 Source is not very readable due to tables not properly formatted, and blocks turned into inlines 🟠 Source is not very readable, due to html tags and blank lines added all over the place
Faithful rendering 🔴 The content is modified 🟢 The content is preserved
Compatibility 🟠 The renderer needs to know about tables, which is not in the commonmark standard 🟠 The parser needs to follow the commonmark standard, which is different from the original one if I understand correctly
Rendering target 🟢 No constraint 🔴 Has to be HTML/markdown, otherwise (eg in latex) embedded html will certainly be ignored
Scariness 🟢 Not scary, a problem won't leak outside of the table 🟠 Html embedding is a litlle bit scary as making a mistake can break the whole output...
Maintainability 🔴 Adds maintenance 🟢 Does not add maintenance

If the "rendering target" can handle embedded html, maybe the html embedding approach is better? The "content is preserved" is quite a strong benefit, I guess...

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

According to the specification, LSP clients that have the capability to parse markdown should follow the Github Flavored Markdown Specification.

That plays in favor of Html but they also warn that:

Please Note that clients might sanitize the return markdown. A client could
decide to remove HTML from the markdown to avoid script execution.

Copy link
Collaborator

@voodoos voodoos Oct 17, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And there is more: the client can advertise which tags will actually be allowed in markdown with the "MarkdownClientCapabilities.allowedTags" capability.

If we want to respect the protocol we need to have at least two fallbacks:

  • A text-only version when markdown is not allowed. (Right now ocaml-lsp simply prints the raw comment which is okay)
  • A markdown without html when required tags are not allowed

And we could do better by using raw html when the client accepts it.

In any case we need to keep the current version, and I'm not sure having the additional html one is really worth it ?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In any case we need to keep the current version, and I'm not sure having the additional html one is really worth it ?

I agree with this analysis, and I do not think it is worth it: in the current approach, the content is only hurt in the case of table/lists inside tables, which I think will happen very seldomly.

If it turns out a problem, we can add the html approach later.

(nestable :
Odoc_parser.Ast.nestable_block_element Odoc_parser.Loc.with_location) =
match nestable with
| { value = `Paragraph text; location = _ } ->
inline_element_list_to_inlines text
| { value = `Table ((grid, _), _); location } ->
let meta = loc_to_meta location in
let cell
((c, _) : Odoc_parser.Ast.nestable_block_element Odoc_parser.Ast.cell) =
nestable_block_element_list_to_inlines c
in
let row (row : Odoc_parser.Ast.nestable_block_element Odoc_parser.Ast.row) =
let sep = Inline.Text (" | ", Meta.none) in
sep :: List.concat_map ~f:(fun c -> [ cell c; sep ]) row
in
let rows = List.concat_map ~f:row grid in
Inline.Inlines (rows, meta)
| { value = `List (_, _, xs); location } ->
let meta = loc_to_meta location in
let item i = nestable_block_element_list_to_inlines i in
let items =
let sep = Inline.Text (" - ", Meta.none) in
List.concat_map ~f:(fun i -> [ sep; item i ]) xs
in
Inline.Inlines (items, meta)
| { value = `Modules modules; location } ->
let meta = loc_to_meta location in
let s = List.map ~f:(fun x -> x.Odoc_parser.Loc.value) modules in
Inline.Text ("modules: " ^ String.concat ~sep:" " s, meta)
| { value =
`Code_block
{ meta = _
; delimiter = _
; content = { value = code; location = code_loc }
; output = _
}
; location
} ->
let meta = loc_to_meta location in
let meta_code = loc_to_meta code_loc in
let code_span =
Inline.Code_span.make ~backtick_count:1 [ ("", (code, meta_code)) ]
in
Inline.Code_span (code_span, meta)
| { value = `Verbatim code; location } ->
let meta = loc_to_meta location in
let code_span =
Inline.Code_span.make ~backtick_count:1 [ ("", (code, Meta.none)) ]
in
Inline.Code_span (code_span, meta)
| { value = `Math_block code; location } ->
let meta = loc_to_meta location in
let code_span =
Inline.Math_span.make ~display:true [ ("", (code, Meta.none)) ]
in
Inline.Ext_math_span (code_span, meta)

and nestable_block_element_list_to_inlines l =
let inlines = List.map ~f:nestable_block_element_to_inlines l in
Inline.Inlines (inlines, Meta.none)

and nestable_block_element_list_to_block nestables =
let blocks = List.map ~f:nestable_block_element_to_block nestables in
Block.Blocks (blocks, Meta.none)
Expand Down Expand Up @@ -261,6 +370,7 @@ let tag_to_block ~meta (tag : Odoc_parser.Ast.tag) =
| `Inline -> format_tag_empty "@inline"
| `Open -> format_tag_empty "@open"
| `Closed -> format_tag_empty "@closed"
| `Hidden -> format_tag_empty "@hidden"

let rec block_element_to_block
(block_element :
Expand All @@ -280,6 +390,7 @@ let rec block_element_to_block
| `Modules _
| `Code_block _
| `Verbatim _
| `Table _
| `Math_block _ )
; location = _
} as nestable -> nestable_block_element_to_block nestable
Expand Down
2 changes: 1 addition & 1 deletion ocaml-lsp-server/src/dune
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
merlin-lib.utils
merlin-lib.extend
cmarkit
odoc-parser
odoc_parser
ppx_yojson_conv_lib
re
stdune
Expand Down
41 changes: 41 additions & 0 deletions ocaml-lsp-server/test/e2e-new/doc_to_md.ml
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,44 @@ let%expect_test "subscript" =

translate doc |> print_doc;
[%expect {| a\_{b} |}]

let%expect_test "table" =
let doc =
{| {table {tr {td some content} {td some other content}} {tr {td in another} {td row}}} |}
in

translate doc |> print_doc;
[%expect
{|
| some content | some other content |
| in another | row | |}]

let%expect_test "table2" =
let doc = {|
{t | z | f |
|:-----|---:|
| fse | e | }
|} in

translate doc |> print_doc;
[%expect {|
| z | f |
|:-|-:|
| fse | e | |}]

let%expect_test "problematic_translation" =
let doc = {| {table {tr {td {ul {li first item} {li second item}}}} } |} in

translate doc |> print_doc;
[%expect {|
| - first item - second item | |}]

let%expect_test "code_with_output" =
let doc = {| {@ocaml[foo][output {b foo}]} |} in

translate doc |> print_doc;
[%expect {|
```ocaml
foo
```
output **foo** |}]
Loading
Loading