Skip to content

Commit

Permalink
fix nested lists
Browse files Browse the repository at this point in the history
  • Loading branch information
JohannesKaufmann committed Jul 2, 2021
1 parent 23e6644 commit a536d90
Show file tree
Hide file tree
Showing 30 changed files with 783 additions and 151 deletions.
39 changes: 26 additions & 13 deletions commonmark.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@ package md

import (
"fmt"
"unicode"

"regexp"
"strconv"
"strings"
"unicode"
"unicode/utf8"

"github.com/JohannesKaufmann/html-to-markdown/escape"
Expand All @@ -23,7 +23,7 @@ var commonmark = []Rule{

// we have a nested list, were the ul/ol is inside a list item
// -> based on work done by @requilence from @anytypeio
if parent.Is("li") && parent.Children().Last().IsSelection(selec) {
if (parent.Is("li") || parent.Is("ul") || parent.Is("ol")) && parent.Children().Last().IsSelection(selec) {
// add a line break prefix if the parent's text node doesn't have it.
// that makes sure that every list item is on its on line
lastContentTextNode := strings.TrimRight(parent.Nodes[0].FirstChild.Data, " \t")
Expand All @@ -49,21 +49,28 @@ var commonmark = []Rule{
return nil
}

parent := selec.Parent()
index := selec.Index()

var prefix string
if parent.Is("ol") {
prefix = strconv.Itoa(index+1) + ". "
} else {
prefix = opt.BulletListMarker + " "
}
// remove leading newlines
content = leadingNewlinesR.ReplaceAllString(content, "")
// replace trailing newlines with just a single one
content = trailingNewlinesR.ReplaceAllString(content, "\n")
// indent
content = indentR.ReplaceAllString(content, "\n ")
// remove leading spaces
content = strings.TrimLeft(content, " ")

prefix := selec.AttrOr(attrListPrefix, "")

// `prefixCount` is not nessesarily the length of the empty string `prefix`
// but how much space is reserved for the prefixes of the siblings.
prefixCount, previousPrefixCounts := countListParents(opt, selec)

// if the prefix is not needed, balance it by adding the usual prefix spaces
if prefix == "" {
prefix = strings.Repeat(" ", prefixCount)
}
// indent the prefix so that the nested links are represented
indent := strings.Repeat(" ", previousPrefixCounts)
prefix = indent + prefix

content = IndentMultiLineListItem(opt, content, prefixCount+previousPrefixCounts)

return String(prefix + content + "\n")
},
Expand All @@ -82,6 +89,12 @@ var commonmark = []Rule{
text = multipleSpacesR.ReplaceAllString(text, " ")

text = escape.MarkdownCharacters(text)

// if its inside a list, trim the spaces to not mess up the indentation
if IndexWithText(selec) == 0 && (selec.Parent().Is("li") || selec.Parent().Is("ol") || selec.Parent().Is("ul")) {
text = strings.Trim(text, ` `)
}

return &text
},
},
Expand Down
11 changes: 11 additions & 0 deletions from.go
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,10 @@ func validateOptions(opt Options) error {
return nil
}

var (
attrListPrefix = "data-converter-list-prefix"
)

// NewConverter initializes a new converter and holds all the rules.
// - `domain` is used for links and images to convert relative urls ("/image.png") to absolute urls.
// - CommonMark is the default set of rules. Set enableCommonmark to false if you want
Expand All @@ -111,6 +115,13 @@ func NewConverter(domain string, enableCommonmark bool, options *Options) *Conve
s.SetAttr("data-index", strconv.Itoa(i+1))
})
})
conv.before = append(conv.before, func(selec *goquery.Selection) {
selec.Find("li").Each(func(i int, s *goquery.Selection) {
prefix := getListPrefix(options, s)

s.SetAttr(attrListPrefix, prefix)
})
})
conv.after = append(conv.after, func(markdown string) string {
markdown = strings.TrimSpace(markdown)
markdown = multipleNewLinesRegex.ReplaceAllString(markdown, "\n\n")
Expand Down
4 changes: 2 additions & 2 deletions testdata/TestCommonmark/link/output.inlined.golden
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@
Second Text](http://multi.org/)

- [First Text\
\
Second Text](http://list.org/)
\
Second Text](http://list.org/)

[GitHub](https://github.com "GitHub")

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@
Second Text][]

- [First Text\
\
Second Text][]
\
Second Text][]

[GitHub][]

Expand Down
4 changes: 2 additions & 2 deletions testdata/TestCommonmark/link/output.referenced_full.golden
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@
Second Text][7]

- [First Text\
\
Second Text][8]
\
Second Text][8]

[GitHub][9]

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@
Second Text]

- [First Text\
\
Second Text]
\
Second Text]

[GitHub]

Expand Down
4 changes: 2 additions & 2 deletions testdata/TestCommonmark/link/output.relative.golden
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,8 @@
Second Text](http://multi.org/)

- [First Text\
\
Second Text](http://list.org/)
\
Second Text](http://list.org/)

[GitHub](https://github.com "GitHub")

Expand Down
45 changes: 41 additions & 4 deletions testdata/TestCommonmark/list/goldmark.golden
Original file line number Diff line number Diff line change
Expand Up @@ -55,15 +55,41 @@
<p>15</p>
</li>
<li>
<p>First Thing</p>
<p>16</p>
</li>
<li>
<p>17</p>
</li>
<li>
<p>18</p>
</li>
<li>
<p>19</p>
</li>
<li>
<p>20</p>
</li>
<li>
<p><img src="http://example.com/example.png" alt=""></p>
</li>
<li>
<p>22</p>
</li>
</ol>
<ul>
<li>Link:<a href="https://example.com">example</a> works</li>
<li>Link:
<a href="https://example.com">example</a>
works</li>
</ul>
<ol>
<li>First Thing
<ul>
<li>Some Thing</li>
<li>Another Thing</li>
</ul>
</li>
<li>
<p>Second Thing</p>
</li>
<li>Second Thing</li>
</ol>
<ul>
<li>
Expand Down Expand Up @@ -92,3 +118,14 @@
<p>- Not List</p>
<p>1. Not List 1. Not List
1. Not List</p>
<ol>
<li>
<p>A paragraph
with two lines.</p>
<pre><code>indented code
</code></pre>
<blockquote>
<p>A block quote.</p>
</blockquote>
</li>
</ol>
35 changes: 35 additions & 0 deletions testdata/TestCommonmark/list/input.html
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,29 @@
<li>13</li>
<li>14</li>
<li>15</li>
<li></li>
<li> </li>
<li> <p> </p> </li>
<li>16</li>
<li>17</li>
<li>18</li>
<li>19</li>
<li>20</li>
<li><img src="/example.png" /></li>
<li>22</li>
</ol>


<!--list with link inside-->
<ul>
<li>Link: <a href="https://example.com" target="_blank">example</a> works</li>
<li>
Link:
<a href="https://example.com" target="_blank">example</a>
works
</li>
</ul>

<!--ol with a ul inside-->
<ol>
<li>
Expand Down Expand Up @@ -78,3 +98,18 @@

<p>1. Not List 1. Not List
1. Not List</p>


<!--with other whitespace aware elements-->
<ol>
<li>
<p>A paragraph
with two lines.</p>

<pre><code>indented code</code></pre>

<blockquote>
<p>A block quote.</p>
</blockquote>
</li>
</ol>
53 changes: 39 additions & 14 deletions testdata/TestCommonmark/list/output.asterisks.golden
Original file line number Diff line number Diff line change
Expand Up @@ -4,25 +4,38 @@
1. First Thing
2. Second Thing

1. 1
2. 2
3. 3
4. 4
5. 5
6. 6
7. 7
8. 8
9. 9
01. 1
02. 2
03. 3
04. 4
05. 5
06. 6
07. 7
08. 8
09. 9
10. 10
11. 11
12. 12
13. 13
14. 14
15. 15
19. 16
20. 17
21. 18
22. 19
23. 20
24. ![](http://example.com/example.png)
25. 22

* Link:[example](https://example.com) works
* Link:
[example](https://example.com)
works


1. First Thing
* Some Thing
* Another Thing
* Some Thing
* Another Thing
2. Second Thing

* foo
Expand All @@ -32,13 +45,25 @@
* Ending with
* A space

* Indent First Thing
* Indent First Thing

Second Thing
Second Thing

* Third Thing

\- Not List

1\. Not List 1. Not List
1\. Not List
1\. Not List

1. A paragraph
with two lines.


```
indented code
```



> A block quote.
Loading

0 comments on commit a536d90

Please sign in to comment.