From a536d90eefb1e9055d2df077c15ae8faab6a3a75 Mon Sep 17 00:00:00 2001 From: Johannes Kaufmann Date: Fri, 2 Jul 2021 16:47:53 +0200 Subject: [PATCH] fix nested lists --- commonmark.go | 39 +++-- from.go | 11 ++ .../TestCommonmark/link/output.inlined.golden | 4 +- .../link/output.referenced_collapsed.golden | 4 +- .../link/output.referenced_full.golden | 4 +- .../link/output.referenced_shortcut.golden | 4 +- .../link/output.relative.golden | 4 +- testdata/TestCommonmark/list/goldmark.golden | 45 +++++- testdata/TestCommonmark/list/input.html | 35 +++++ .../list/output.asterisks.golden | 53 +++++-- .../TestCommonmark/list/output.dash.golden | 53 +++++-- .../TestCommonmark/list/output.plus.golden | 53 +++++-- .../list_nested/goldmark.golden | 83 +++++++++- .../TestCommonmark/list_nested/input.html | 97 ++++++++++++ .../list_nested/output.asterisks.golden | 62 +++++++- .../list_nested/output.dash.golden | 62 +++++++- .../list_nested/output.plus.golden | 62 +++++++- testdata/TestPlugins/checkbox/goldmark.golden | 7 +- .../checkbox/output.default.golden | 8 +- .../output.emphasis_asterisks.golden | 2 +- .../output.emphasis_underscores.golden | 2 +- .../blog.golang.org/output.inlined.golden | 2 +- .../output.referenced_collapsed.golden | 2 +- .../output.referenced_full.golden | 2 +- .../output.referenced_shortcut.golden | 2 +- .../bonnerruderverein.de/goldmark.golden | 14 +- .../output.default.golden | 36 ++--- .../snippets/nav_nested_list/goldmark.golden | 6 +- .../nav_nested_list/output.default.golden | 30 ++-- utils.go | 146 ++++++++++++++++++ 30 files changed, 783 insertions(+), 151 deletions(-) diff --git a/commonmark.go b/commonmark.go index f42be76..3a79f81 100644 --- a/commonmark.go +++ b/commonmark.go @@ -2,11 +2,11 @@ package md import ( "fmt" + "unicode" "regexp" "strconv" "strings" - "unicode" "unicode/utf8" "github.com/JohannesKaufmann/html-to-markdown/escape" @@ -23,7 +23,7 @@ var commonmark = []Rule{ // we have a nested list, were the ul/ol is inside a list item // -> based on work done by @requilence from @anytypeio - if parent.Is("li") && parent.Children().Last().IsSelection(selec) { + if (parent.Is("li") || parent.Is("ul") || parent.Is("ol")) && parent.Children().Last().IsSelection(selec) { // add a line break prefix if the parent's text node doesn't have it. // that makes sure that every list item is on its on line lastContentTextNode := strings.TrimRight(parent.Nodes[0].FirstChild.Data, " \t") @@ -49,21 +49,28 @@ var commonmark = []Rule{ return nil } - parent := selec.Parent() - index := selec.Index() - - var prefix string - if parent.Is("ol") { - prefix = strconv.Itoa(index+1) + ". " - } else { - prefix = opt.BulletListMarker + " " - } // remove leading newlines content = leadingNewlinesR.ReplaceAllString(content, "") // replace trailing newlines with just a single one content = trailingNewlinesR.ReplaceAllString(content, "\n") - // indent - content = indentR.ReplaceAllString(content, "\n ") + // remove leading spaces + content = strings.TrimLeft(content, " ") + + prefix := selec.AttrOr(attrListPrefix, "") + + // `prefixCount` is not nessesarily the length of the empty string `prefix` + // but how much space is reserved for the prefixes of the siblings. + prefixCount, previousPrefixCounts := countListParents(opt, selec) + + // if the prefix is not needed, balance it by adding the usual prefix spaces + if prefix == "" { + prefix = strings.Repeat(" ", prefixCount) + } + // indent the prefix so that the nested links are represented + indent := strings.Repeat(" ", previousPrefixCounts) + prefix = indent + prefix + + content = IndentMultiLineListItem(opt, content, prefixCount+previousPrefixCounts) return String(prefix + content + "\n") }, @@ -82,6 +89,12 @@ var commonmark = []Rule{ text = multipleSpacesR.ReplaceAllString(text, " ") text = escape.MarkdownCharacters(text) + + // if its inside a list, trim the spaces to not mess up the indentation + if IndexWithText(selec) == 0 && (selec.Parent().Is("li") || selec.Parent().Is("ol") || selec.Parent().Is("ul")) { + text = strings.Trim(text, ` `) + } + return &text }, }, diff --git a/from.go b/from.go index 9243b8a..b61f3f2 100644 --- a/from.go +++ b/from.go @@ -93,6 +93,10 @@ func validateOptions(opt Options) error { return nil } +var ( + attrListPrefix = "data-converter-list-prefix" +) + // NewConverter initializes a new converter and holds all the rules. // - `domain` is used for links and images to convert relative urls ("/image.png") to absolute urls. // - CommonMark is the default set of rules. Set enableCommonmark to false if you want @@ -111,6 +115,13 @@ func NewConverter(domain string, enableCommonmark bool, options *Options) *Conve s.SetAttr("data-index", strconv.Itoa(i+1)) }) }) + conv.before = append(conv.before, func(selec *goquery.Selection) { + selec.Find("li").Each(func(i int, s *goquery.Selection) { + prefix := getListPrefix(options, s) + + s.SetAttr(attrListPrefix, prefix) + }) + }) conv.after = append(conv.after, func(markdown string) string { markdown = strings.TrimSpace(markdown) markdown = multipleNewLinesRegex.ReplaceAllString(markdown, "\n\n") diff --git a/testdata/TestCommonmark/link/output.inlined.golden b/testdata/TestCommonmark/link/output.inlined.golden index 620e0a5..b60a5f4 100644 --- a/testdata/TestCommonmark/link/output.inlined.golden +++ b/testdata/TestCommonmark/link/output.inlined.golden @@ -15,8 +15,8 @@ Second Text](http://multi.org/) - [First Text\ - \ - Second Text](http://list.org/) + \ + Second Text](http://list.org/) [GitHub](https://github.com "GitHub") diff --git a/testdata/TestCommonmark/link/output.referenced_collapsed.golden b/testdata/TestCommonmark/link/output.referenced_collapsed.golden index 4545b88..8979a92 100644 --- a/testdata/TestCommonmark/link/output.referenced_collapsed.golden +++ b/testdata/TestCommonmark/link/output.referenced_collapsed.golden @@ -15,8 +15,8 @@ Second Text][] - [First Text\ - \ - Second Text][] + \ + Second Text][] [GitHub][] diff --git a/testdata/TestCommonmark/link/output.referenced_full.golden b/testdata/TestCommonmark/link/output.referenced_full.golden index 2906bd8..d092b13 100644 --- a/testdata/TestCommonmark/link/output.referenced_full.golden +++ b/testdata/TestCommonmark/link/output.referenced_full.golden @@ -15,8 +15,8 @@ Second Text][7] - [First Text\ - \ - Second Text][8] + \ + Second Text][8] [GitHub][9] diff --git a/testdata/TestCommonmark/link/output.referenced_shortcut.golden b/testdata/TestCommonmark/link/output.referenced_shortcut.golden index d65f8f2..30a6b6b 100644 --- a/testdata/TestCommonmark/link/output.referenced_shortcut.golden +++ b/testdata/TestCommonmark/link/output.referenced_shortcut.golden @@ -15,8 +15,8 @@ Second Text] - [First Text\ - \ - Second Text] + \ + Second Text] [GitHub] diff --git a/testdata/TestCommonmark/link/output.relative.golden b/testdata/TestCommonmark/link/output.relative.golden index 7a8b335..1a4a957 100644 --- a/testdata/TestCommonmark/link/output.relative.golden +++ b/testdata/TestCommonmark/link/output.relative.golden @@ -15,8 +15,8 @@ Second Text](http://multi.org/) - [First Text\ - \ - Second Text](http://list.org/) + \ + Second Text](http://list.org/) [GitHub](https://github.com "GitHub") diff --git a/testdata/TestCommonmark/list/goldmark.golden b/testdata/TestCommonmark/list/goldmark.golden index 02e7de9..963e2ff 100644 --- a/testdata/TestCommonmark/list/goldmark.golden +++ b/testdata/TestCommonmark/list/goldmark.golden @@ -55,15 +55,41 @@

15

  • -

    First Thing

    +

    16

    +
  • +
  • +

    17

    +
  • +
  • +

    18

    +
  • +
  • +

    19

    +
  • +
  • +

    20

    +
  • +
  • +

    +
  • +
  • +

    22

    +
  • + + +
      +
    1. First Thing
      • Some Thing
      • Another Thing
    2. -
    3. -

      Second Thing

      -
    4. +
    5. Second Thing

    header1