Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature text enrichment #29

Merged
merged 4 commits into from
Sep 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
99 changes: 99 additions & 0 deletions lib/converter.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
package lib

import (
"strings"
"regexp"
)

var markdownUlListActive bool
var markdownOlIndex int64
var codeBlockActive = false
var codeBlockAggregate = ""

func ConvertReferenceLink(text string, markdownLinks *regexp.Regexp) string {
return "<p>" + markdownLinks.ReplaceAllString(text, "<a href='$2'>$1</a>") + "</p>"
}

func ConvertEnclosure(text string, markdownLinks *regexp.Regexp) string {
url := markdownLinks.ReplaceAllString(text, "$2")
size, fileSizeErr := FileSizeUrl(url)
if fileSizeErr != nil {
Error.Println(fileSizeErr)
return ""
}
return "<enclosure " + markdownLinks.ReplaceAllString(text, "url='$2' type='$1' length='") + size + "' />"
}

func ConvertUnorderedlList(text string, markdownUnorderedLists *regexp.Regexp) string {
if markdownUnorderedLists.MatchString(text) {
if !markdownUlListActive {
markdownUlListActive = true
return "<ul><li>" + text[getLeadingWhitespace(text):] + "</li>"
}
return "<li>" + text[getLeadingWhitespace(text):] + "</li>"
}
markdownUlListActive = false
return "</ul><p>" + text + "</p>"
}


func ConvertTextEnrichment(text string) string {
// Strikethrough
text = inlineRewrap(text, regexp.MustCompile(`(~~)([^~~]+)(~~)`), "<s>", "</s>")
// Subscript
text = inlineRewrap(text, regexp.MustCompile(`(~)([^~]+)(~)`), "<sub>", "</sub>")
// Superscript
text = inlineRewrap(text, regexp.MustCompile(`(\^)([^\^]+)(\^)`), "<sup>", "</sup>")
// Strong Emphasis
text = inlineRewrap(text, regexp.MustCompile(`(\*\*\*|___)([^\*^_]+)(\*\*\*|___)`), "<b><i>", "</b></i>")
// Bold
text = inlineRewrap(text, regexp.MustCompile(`(\*\*|__)([^\*^_]+)(\*\*|__)`), "<b>", "</b>")
// Italic
text = inlineRewrap(text, regexp.MustCompile(`(\*|_)([^\*^_]+)(\*|_)`), "<i>", "</i>")
// Inline Codeblock
text = inlineRewrap(text, regexp.MustCompile(`(\x60)([^\x60]+)(\x60)`), "<code>", "</code>")
return text
}

func ConvertLink(text string, markdownLinks *regexp.Regexp) string {
if strings.Contains(text, "audio/mpeg") {
return ConvertEnclosure(text, markdownLinks)
} else {
return ConvertReferenceLink(text, markdownLinks)
}
}

func ConvertCodeblock(text string, fencedCodeBlock *regexp.Regexp) string {
if !codeBlockActive {
codeBlockActive = true
codeBlockAggregate = "<sup>" + text[3:] + "</sup><br>"
return "" + "<pre style=\"word-wrap: break-word;\"><code>"
} else if fencedCodeBlock.MatchString(text) {
out := codeBlockAggregate
codeBlockAggregate, codeBlockActive = "", false
return out + "</code></pre>"
}
if codeBlockAggregate != "" {
codeBlockAggregate += "<br>"
}
codeBlockAggregate += text
return ""
}

func ConvertOrderedlList(text string) string {
if markdownOlIndex == 0 {
markdownOlIndex = 1
return "<ol><li>" + text[getLeadingWhitespace(text):] + "</li>"
}
return "<li>" + text[getLeadingWhitespace(text):] + "</li>"
}

func ConvertOrderedLists(text string, markdownOrderedLists *regexp.Regexp) string {
if markdownOrderedLists.MatchString(text) {
entryText := markdownOrderedLists.FindStringSubmatch(text)[4]
return ConvertOrderedlList(entryText)
}
markdownOlIndex = 0
return "</ol>" + ConvertMarkdownToRSS(text)
}

108 changes: 26 additions & 82 deletions lib/markdown.go
Original file line number Diff line number Diff line change
@@ -1,19 +1,14 @@
package lib

import (
"os"
"bufio"
"errors"
"os"
"regexp"
"strconv"
"strings"
"unicode"
)

var markdownUlListActive bool
var markdownOlIndex int64
var codeBlockAggregate, codeBlockOpen = "", false

func checkMarkdownTitle(text string) bool {
if len(text) > 0 {
return string(text[0]) == string("#")
Expand All @@ -30,88 +25,37 @@ func getLeadingWhitespace(text string) int {
return 0
}

func convertMarkdownLink(text string, markdownLinks *regexp.Regexp) string {
return "<p>" + markdownLinks.ReplaceAllString(text, "<a href='$2'>$1</a>") + "</p>"
}

func convertMarkdownEnclosure(text string, markdownLinks *regexp.Regexp) string {
url := markdownLinks.ReplaceAllString(text, "$2")
size, fileSizeErr := FileSizeUrl(url)
if fileSizeErr != nil {
Error.Println(fileSizeErr)
return ""
}
return "<enclosure " + markdownLinks.ReplaceAllString(text, "url='$2' type='$1' length='") + size + "' />"
}

func convertMarkdownUlList(text string) string {
if !markdownUlListActive {
markdownUlListActive = true
return "<ul><li>" + text[getLeadingWhitespace(text):] + "</li>"
}
return "<li>" + text[getLeadingWhitespace(text):] + "</li>"
}

func convertMarkdownOlList(text string, index int64) string {
if markdownOlIndex == 0 {
markdownOlIndex = 1
return "<ol><li>" + text[getLeadingWhitespace(text):] + "</li>"
}
return "<li>" + text[getLeadingWhitespace(text):] + "</li>"
}

func ConvertMarkdownToRSS(text string) string {
markdownLinks := regexp.MustCompile(`\[(.*)\]\((.*)\)`)
markdownUnorderedLists := regexp.MustCompile(`^(\s*)(-|\*|\+)[\s](.*)`)
markdownOrderedLists := regexp.MustCompile(`^(\s*)(-?\d+)(\.\s+)(.*)$`)
fencedCodeBlock := regexp.MustCompile("^\x60\x60\x60")
inlineCodeBlock := regexp.MustCompile(`([\x60]+)([^\x60]+)([\x60]+)`)

text = ConvertTextEnrichment(text)
switch {
case codeBlockOpen && !fencedCodeBlock.MatchString(text):
if codeBlockAggregate != "" {
codeBlockAggregate += "<br>"
}
codeBlockAggregate += text
return ""
// links
case markdownLinks.MatchString(text):
if strings.Contains(text, "audio/mpeg") {
return convertMarkdownEnclosure(text, markdownLinks)
} else {
return convertMarkdownLink(text, markdownLinks)
}
case markdownUnorderedLists.MatchString(text):
return convertMarkdownUlList(text)
case markdownUlListActive:
markdownUlListActive = false
return "</ul><p>" + text + "</p>"
case markdownOrderedLists.MatchString(text):
entryIndex, entryErr := strconv.ParseInt(markdownOrderedLists.FindStringSubmatch(text)[2], 10, 64)
entryText := markdownOrderedLists.FindStringSubmatch(text)[4]
if entryErr != nil {
return "<p>" + text + "</p>"
}
return convertMarkdownOlList(entryText, entryIndex)
case markdownOlIndex != 0:
markdownOlIndex = 0
return "</ol>" + ConvertMarkdownToRSS(text)
case fencedCodeBlock.MatchString(text):
if !codeBlockOpen {
codeBlockOpen = true
codeBlockAggregate = "<sup>" + text[3:] + "</sup><br>"
return "" + "<pre style=\"word-wrap: break-word;\"><code>"
} else {
out := codeBlockAggregate
codeBlockAggregate, codeBlockOpen = "", false
return out + "</code></pre>"
}
case inlineCodeBlock.Match([]byte(text)):
out := inlineCodeBlock.ReplaceAllFunc([]byte(text), func(b []byte) []byte {
return []byte("<code>" + inlineCodeBlock.FindStringSubmatch(string(b))[2] + "</code>")
})
return string(out)
default:
return "<p>" + text + "</p>"
return ConvertLink(text, markdownLinks)
// lists
case markdownUnorderedLists.MatchString(text) || markdownUlListActive:
return ConvertUnorderedlList(text, markdownUnorderedLists)
case markdownOrderedLists.MatchString(text) || markdownOlIndex != 0:
return ConvertOrderedLists(text, markdownOrderedLists)
//code blocks
case fencedCodeBlock.MatchString(text) || codeBlockActive:
return ConvertCodeblock(text, fencedCodeBlock)
default:
return "<p>" + text + "</p>"
}
}

func inlineRewrap(text string, pattern *regexp.Regexp, prefix string, postfix string) string {
if pattern.Match([]byte(text)) {
out := pattern.ReplaceAllFunc([]byte(text), func(b []byte) []byte {
return []byte(prefix + pattern.FindStringSubmatch(string(b))[2] + postfix)
})
return string(out)
} else {
return text
}
}

Expand Down Expand Up @@ -142,7 +86,7 @@ func ReadMarkdown(config Config, articles []Article) []Article {
for scanner.Scan() {
if checkMarkdownTitle(scanner.Text()) && len(articles[index].Title) == 0 {
articles[index].Title = scanner.Text()[2:len(scanner.Text())]
} else if len(scanner.Text()) > 0 || codeBlockOpen {
} else if len(scanner.Text()) > 0 || codeBlockActive {
articles[index].Description += ConvertMarkdownToRSS(scanner.Text())
}
}
Expand Down
17 changes: 17 additions & 0 deletions lib/types.go
Original file line number Diff line number Diff line change
@@ -1,9 +1,26 @@
package lib

import (
"regexp"
"time"
)

type Markdown struct {
Content []Line
}

type Line struct {
Link *regexp.Regexp
UnorderedList *regexp.Regexp
OrderedList *regexp.Regexp
CodeBlock *regexp.Regexp

// optional fields
CodeBlockOpen bool
UnorderedListActive bool
OrderedListActive bool
}

type Article struct {
Id int
Title string
Expand Down
13 changes: 10 additions & 3 deletions test/another-article.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,14 @@ Timo

And here goes some text...

[link](https://timokats.xyz)
**bold** text, __however you like it__.
*italic* too? _Of course_!
Sometimes ***strong emphasis*** is needed to get across a ___point___.
~~You can always striketrough a bad idea~~,
Make something unique with a ~subscript~,
Or power up with a ^superscript^!

A [link](https://timokats.xyz)

And this is a list:
- hello
Expand All @@ -36,6 +43,8 @@ fencedCodeBlock := regexp.MustCompile("^```")
// Show off your tabwidth in style!
```

Can I write text in between the code blocks?

```C++
int main() {
int n, t1 = 0, t2 = 1, nextTerm = 0;
Expand Down Expand Up @@ -68,5 +77,3 @@ int main() {
This feature also works `inline` as well!

And back to text again


Loading