diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md
new file mode 100644
index 0000000..ef7d38f
--- /dev/null
+++ b/.github/ISSUE_TEMPLATE/bug_report.md
@@ -0,0 +1,30 @@
+---
+name: Bug report
+about: Create a report to help us improve
+title: "\U0001F41B Bug"
+labels: bug
+assignees: ''
+
+---
+
+**Describe the bug**
+A clear and concise description of what the bug is.
+
+**HTML Input**
+```html
+
Title
+```
+
+
+**Generated Markdown**
+````markdown
+# Title
+````
+
+**Expected Markdown**
+````markdown
+# Title!!!
+````
+
+**Additional context**
+Add any other context about the problem here. For example, if you changed the default options or used a plugin. Also adding the version from the `go.mod` is helpful.
diff --git a/.github/dependabot.yml b/.github/dependabot.yml
new file mode 100644
index 0000000..0c13a07
--- /dev/null
+++ b/.github/dependabot.yml
@@ -0,0 +1,9 @@
+# Please see the documentation for all configuration options:
+# https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
+
+version: 2
+updates:
+ - package-ecosystem: "gomod"
+ directory: "/"
+ schedule:
+ interval: "weekly"
diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml
new file mode 100644
index 0000000..fbb64b7
--- /dev/null
+++ b/.github/workflows/go.yml
@@ -0,0 +1,58 @@
+name: Go
+
+on:
+ push:
+ branches: [ main ]
+ pull_request:
+ branches: [ main ]
+
+jobs:
+
+ # Test the latest go version
+ # and upload the test coverage.
+ test_latest:
+ name: Go latest stable
+
+ runs-on: ubuntu-latest
+ steps:
+ - name: Setup Go
+ uses: actions/setup-go@v5
+ with:
+ go-version: 'stable'
+ check-latest: true
+
+ - name: Checkout code
+ uses: actions/checkout@v4
+
+ - name: Build
+ run: go build -v .
+
+ - name: Test
+ run: go test ./... -v -race -coverprofile=coverage.txt -covermode=atomic
+
+ # - uses: codecov/codecov-action@v4
+ # with:
+ # files: ./coverage.txt
+ # token: ${{ secrets.CODECOV_TOKEN }}
+
+ # Test the latest three golang version
+ # on different operating systems.
+ test_versions:
+ strategy:
+ matrix:
+ go: ['1.22']
+ os: [ubuntu-latest, macos-latest, windows-latest]
+ name: Go ${{ matrix.go }} on ${{ matrix.os }}
+
+ runs-on: ${{ matrix.os }}
+ steps:
+ - name: Setup Go
+ uses: actions/setup-go@v5
+ with:
+ go-version: ${{ matrix.go }}
+
+ - name: Checkout code
+ uses: actions/checkout@v4
+
+ - name: Test
+ run: go test ./... -v -race -cover
\ No newline at end of file
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
new file mode 100644
index 0000000..c2941be
--- /dev/null
+++ b/.github/workflows/release.yml
@@ -0,0 +1,36 @@
+name: goreleaser
+
+on:
+ push:
+ tags:
+ - '*'
+
+permissions:
+ contents: write
+
+jobs:
+ goreleaser:
+ runs-on: ubuntu-latest
+ steps:
+ - name: Setup Go
+ uses: actions/setup-go@v5
+ with:
+ go-version: 'stable'
+ check-latest: true
+
+ - name: Checkout
+ uses: actions/checkout@v4
+ with:
+ fetch-depth: 0
+
+ - name: Run GoReleaser
+ uses: goreleaser/goreleaser-action@v6
+ with:
+ distribution: goreleaser
+ version: 'latest'
+ args: release --clean
+ env:
+ GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+
+
+
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..9a3b10d
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,27 @@
+
+# - - - - - General - - - - - #
+
+# Binaries for programs and plugins
+*.exe
+*.exe~
+*.dll
+*.so
+*.dylib
+
+# Test binary, build with `go test -c`
+*.test
+
+# Output of the go coverage tool, specifically when used with LiteIDE
+*.out
+
+.DS_Store
+
+
+
+# - - - - - Project Specific - - - - - #
+
+NOTES.md
+.tmp
+
+
+dist/
diff --git a/.goreleaser.yaml b/.goreleaser.yaml
new file mode 100644
index 0000000..29eded3
--- /dev/null
+++ b/.goreleaser.yaml
@@ -0,0 +1,46 @@
+
+# The lines below are called `modelines`. See `:help modeline`
+# Feel free to remove those if you don't want/need to use them.
+# yaml-language-server: $schema=https://goreleaser.com/static/schema.json
+# vim: set ts=2 sw=2 tw=0 fo=cnqoj
+
+version: 2
+
+before:
+ hooks:
+ - go mod tidy
+
+builds:
+ - env:
+ - CGO_ENABLED=0
+ goos:
+ - linux
+ - windows
+ - darwin
+
+ # Note: We only use goreleaser for the CLI,
+ # so we have to go into the "cli" directory.
+ dir: cli
+ binary: html2markdown
+
+archives:
+ - format: tar.gz
+ # this name template makes the OS and Arch compatible with the results of `uname`.
+ name_template: >-
+ {{ .ProjectName }}_
+ {{- title .Os }}_
+ {{- if eq .Arch "amd64" }}x86_64
+ {{- else if eq .Arch "386" }}i386
+ {{- else }}{{ .Arch }}{{ end }}
+ {{- if .Arm }}v{{ .Arm }}{{ end }}
+ # use zip for windows archives
+ format_overrides:
+ - goos: windows
+ format: zip
+
+changelog:
+ sort: asc
+ filters:
+ exclude:
+ - "^docs:"
+ - "^test:"
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
new file mode 100644
index 0000000..e69de29
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..142f95b
--- /dev/null
+++ b/README.md
@@ -0,0 +1,120 @@
+# html-to-markdown
+
+> [!WARNING]
+> This is an **early experimental version** of the library.
+>
+> We encourage testing and bug reporting. However, please note:
+>
+> - Not production-ready
+> - Default options are well-tested, but custom configurations have limited coverage
+> - Functionality is currently restricted
+> - Focus is on stabilization and core features
+> - No compatibility guarantee
+> - Only use `htmltomarkdown.ConvertString()` and `htmltomarkdown.ConvertNode()` from the root package. They are _unlikely_ to change.
+> - Other functions and nested packages are _very like_ to change.
+
+---
+
+## Golang Library
+
+```go
+package main
+
+import (
+ "fmt"
+ "log"
+
+ htmltomarkdown "github.com/JohannesKaufmann/html-to-markdown/v2"
+)
+
+func main() {
+ input := `Bold Text`
+
+ markdown, err := htmltomarkdown.ConvertString(input)
+ if err != nil {
+ log.Fatal(err)
+ }
+ fmt.Println(markdown)
+ // Output: **Bold Text**
+}
+```
+
+- 🧑💻 [Example code, basics](/examples/basics/main.go)
+
+The function `htmltomarkdown.ConvertString()` is just a small wrapper around `converter.NewConverter()` and `commonmark.NewCommonmarkPlugin()`. If you want more control, use the following:
+
+```go
+package main
+
+import (
+ "fmt"
+ "log"
+
+ "github.com/JohannesKaufmann/html-to-markdown/v2/converter"
+ "github.com/JohannesKaufmann/html-to-markdown/v2/plugin/commonmark"
+)
+
+func main() {
+ input := `Bold Text`
+
+ conv := converter.NewConverter(
+ converter.WithPlugins(
+ commonmark.NewCommonmarkPlugin(
+ commonmark.WithStrongDelimiter("__"),
+ // ...additional configurations for the plugin
+ ),
+ ),
+ )
+
+ markdown, err := conv.ConvertString(input)
+ if err != nil {
+ log.Fatal(err)
+ }
+ fmt.Println(markdown)
+ // Output: __Bold Text__
+}
+```
+
+- 🧑💻 [Example code, options](/examples/options/main.go)
+
+> [!NOTE]
+> If you use `NewConverter` directly make sure to also **register the commonmark plugin**.
+
+---
+
+---
+
+## CLI - Using it on the command line
+
+Using the Golang library provides the most customization, while the CLI is the simplest way to get started.
+
+### Installation
+
+Download the pre-compiled binaries from the [releases page](https://github.com/JohannesKaufmann/html-to-markdown/releases) and copy them to the desired location.
+
+```bash
+html2markdown --version
+```
+
+> [!NOTE]
+> Make sure that `--version` prints `2.X.X` as there is a different CLI for V2 of the converter.
+
+## Usage
+
+```bash
+$ echo "important" | html2markdown
+
+**important**
+```
+
+```text
+$ curl --no-progress-meter http://example.com | html2markdown
+
+# Example Domain
+
+This domain is for use in illustrative examples in documents. You may use this domain in literature without prior coordination or asking for permission.
+
+[More information...](https://www.iana.org/domains/example)
+```
+
+_(The cli does not support every option yet. Over time more customization will be added)_
diff --git a/SECURITY.md b/SECURITY.md
new file mode 100644
index 0000000..0a028f6
--- /dev/null
+++ b/SECURITY.md
@@ -0,0 +1,6 @@
+# Security Policy
+
+## Reporting a Vulnerability
+
+Please report (suspected) security vulnerabilities to johannes@joina.de with the subject _"Security html-to-markdown"_ and you will receive a response within 48 hours.
+
diff --git a/cli/cmd/cli_run.go b/cli/cmd/cli_run.go
new file mode 100644
index 0000000..4963c26
--- /dev/null
+++ b/cli/cmd/cli_run.go
@@ -0,0 +1,30 @@
+package cmd
+
+func Run(
+ stdin ReadWriterWithStat,
+ stdout ReadWriterWithStat,
+ stderr ReadWriterWithStat,
+
+ osArgs []string,
+
+ release Release,
+) {
+
+ cli := CLI{
+ Stdin: stdin,
+ Stdout: stdout,
+ Stderr: stderr,
+
+ OsArgs: osArgs,
+
+ Release: release,
+ }
+
+ // - - - - - init - - - - - //
+ if err := cli.Init(); err != nil {
+ panic(err)
+ }
+
+ // - - - - - exec - - - - - //
+ cli.Execute()
+}
diff --git a/cli/cmd/cmd_convert.go b/cli/cmd/cmd_convert.go
new file mode 100644
index 0000000..fc3891d
--- /dev/null
+++ b/cli/cmd/cmd_convert.go
@@ -0,0 +1,48 @@
+package cmd
+
+import (
+ "bytes"
+ "fmt"
+
+ "github.com/JohannesKaufmann/html-to-markdown/v2/converter"
+ "github.com/JohannesKaufmann/html-to-markdown/v2/plugin/commonmark"
+)
+
+func overrideValidationError(e *commonmark.ValidateConfigError) error {
+
+ // TODO: Maybe OptionFunc should already validate and return an error?
+ // Then it would be easier to override the Key since we have once
+ // place to assemble the []OptionFunc and directly treat the errors...
+
+ switch e.Key {
+ case "StrongDelimiter":
+ e.Key = "opt-strong-delimiter"
+ }
+
+ e.KeyWithValue = fmt.Sprintf("--%s=%q", e.Key, e.Value)
+ return e
+}
+func (cli *CLI) convert(input []byte) ([]error, error) {
+
+ conv := converter.NewConverter(
+ converter.WithPlugins(
+ commonmark.NewCommonmarkPlugin(
+ commonmark.WithStrongDelimiter(cli.config.strongDelimiter),
+ ),
+ ),
+ )
+
+ r := bytes.NewReader(input)
+ markdown, err := conv.ConvertReader(r)
+ if err != nil {
+ e, ok := err.(*commonmark.ValidateConfigError)
+ if ok {
+ return nil, overrideValidationError(e)
+ }
+
+ return nil, err
+ }
+
+ fmt.Fprintln(cli.Stdout, string(markdown))
+ return nil, nil
+}
diff --git a/cli/cmd/cmd_help.go b/cli/cmd/cmd_help.go
new file mode 100644
index 0000000..4957535
--- /dev/null
+++ b/cli/cmd/cmd_help.go
@@ -0,0 +1,108 @@
+package cmd
+
+import (
+ "flag"
+ "fmt"
+ "io"
+ "sort"
+ "strings"
+ "text/template"
+)
+
+var usageTemplate = `
+# html2markdown - convert html to markdown [version {{ .Version }}]
+
+Convert HTML to Markdown. Even works with entire websites!
+
+## Basics
+
+By default the "Commonmark" Plugin will be enabled. You can customize the options,
+for example changing the appearance of bold with --opt-strong-delimiter="__"
+
+Other Plugins can also be enabled. For example "GitHub Flavored Markdown" (GFM)
+extends Commonmark with more features.
+
+
+## Escaping
+
+Some characters have a special meaning in markdown. The library escapes these — if necessary.
+See the documentation for more info.
+
+
+## Security
+
+Once you convert this markdown *back* to HTML you need to be careful of malicious content.
+Use a HTML sanitizer before displaying the HTML in the browser!
+
+
+## Examples
+
+ echo "important" | html2markdown
+
+ curl --no-progress-meter http://example.com | html2markdown
+
+
+## Flags
+
+ -v, --version
+ show the version of html2markdown and exit
+
+ --help
+
+{{ range .Flags }}
+ --{{ .Name }}{{ with .Usage }}
+{{ . | indent 8 }}{{ end }}
+{{ end }}
+
+
+For more information visit the documentation:
+https://github.com/Johanneskaufmann/html-to-markdown
+
+`
+
+var templateFuncs = template.FuncMap{
+ "indent": func(spaces int, v string) string {
+ pad := strings.Repeat(" ", spaces)
+ return pad + strings.Replace(v, "\n", "\n"+pad, -1)
+ },
+}
+
+func tmpl(w io.Writer, text string, data interface{}) error {
+ t := template.New("usage")
+ t.Funcs(templateFuncs)
+
+ _, err := t.Parse(text)
+ if err != nil {
+ return err
+ }
+ return t.Execute(w, data)
+}
+
+func (cli *CLI) initUsageText() error {
+ var flags []*flag.Flag
+ cli.flags.VisitAll(func(f *flag.Flag) {
+ if f.Name == "v" || f.Name == "version" {
+ // We manually mention these in the usage
+ return
+ }
+ flags = append(flags, f)
+ })
+ sort.Slice(flags, func(i, j int) bool {
+ return flags[i].Name < flags[j].Name
+ })
+
+ data := map[string]any{
+ "Version": cli.Release.Version,
+ "Flags": flags,
+ }
+ err := tmpl(&cli.usageText, usageTemplate, data)
+ if err != nil {
+ return err
+ }
+
+ return nil
+}
+
+func (cli CLI) printUsage() {
+ fmt.Fprint(cli.Stdout, cli.usageText.String())
+}
diff --git a/cli/cmd/cmd_version.go b/cli/cmd/cmd_version.go
new file mode 100644
index 0000000..bd292c7
--- /dev/null
+++ b/cli/cmd/cmd_version.go
@@ -0,0 +1,11 @@
+package cmd
+
+import "fmt"
+
+func (cli CLI) printVersion() {
+ fmt.Fprintf(cli.Stdout, "%s\n\n", projectBinary)
+
+ fmt.Fprintf(cli.Stdout, "GitVersion: %s\n", cli.Release.Version)
+ fmt.Fprintf(cli.Stdout, "GitCommit: %s\n", cli.Release.Commit)
+ fmt.Fprintf(cli.Stdout, "BuildDate: %s\n", cli.Release.Date)
+}
diff --git a/cli/cmd/errors.go b/cli/cmd/errors.go
new file mode 100644
index 0000000..3b13481
--- /dev/null
+++ b/cli/cmd/errors.go
@@ -0,0 +1,66 @@
+package cmd
+
+import (
+ "fmt"
+ "io"
+
+ "github.com/muesli/termenv"
+)
+
+type CLIError struct {
+ cause error
+ printers []Printer
+}
+
+func extractCLIError(err error) (CLIError, bool) {
+ if cliErr, ok := err.(*CLIError); ok {
+ return *cliErr, true
+ }
+
+ return CLIError{
+ cause: err,
+ }, false
+}
+
+func NewCLIError(cause error, printers ...Printer) error {
+ return &CLIError{
+ cause: cause,
+ printers: printers,
+ }
+}
+func (e CLIError) Error() string {
+ return e.cause.Error()
+}
+func (e CLIError) PrintDetails(w io.Writer) {
+ errPrinter := ColoredBox("error", e.cause.Error())
+
+ // Prepend the error printer
+ e.printers = append([]Printer{errPrinter}, e.printers...)
+
+ for _, printer := range e.printers {
+ w.Write([]byte("\n"))
+ printer.Print(w)
+ }
+ w.Write([]byte("\n"))
+}
+
+func (cli CLI) PrintErr(err error) {
+ if err == nil {
+ return
+ }
+
+ e, _ := extractCLIError(err)
+ e.PrintDetails(cli.Stderr)
+}
+func (cli CLI) PrintWarn(err error) {
+ if err == nil {
+ return
+ }
+
+ output := termenv.NewOutput(cli.Stderr)
+
+ prefix := output.String("warning:").Background(termenv.ANSIYellow).Foreground(termenv.ANSIBrightWhite).String()
+ message := output.String(err.Error()).Foreground(termenv.ANSIYellow).String()
+
+ fmt.Fprintf(cli.Stderr, "\n%s %s\n\n", prefix, message)
+}
diff --git a/cli/cmd/exec.go b/cli/cmd/exec.go
new file mode 100644
index 0000000..1d65eaa
--- /dev/null
+++ b/cli/cmd/exec.go
@@ -0,0 +1,147 @@
+package cmd
+
+import (
+ "bytes"
+ "flag"
+ "fmt"
+ "io"
+ "os"
+ "strings"
+)
+
+var (
+ projectBinary = "html2markdown"
+)
+
+// OsExiter is the function used when the app exits. If not set defaults to os.Exit.
+var OsExiter = os.Exit
+
+// - - - - - - - - - - - - - //
+
+type Config struct {
+ // args are the positional (non-flag) command-line arguments.
+ args []string
+
+ version bool
+
+ // - - - - //
+
+ strongDelimiter string
+
+ plugins []string
+}
+
+// Release holds the information (from the 3 ldflags) that goreleaser sets.
+type Release struct {
+ // Current Git tag (the v prefix is stripped)
+ Version string
+
+ // Current git commit SHA
+ Commit string
+
+ // Date in the RFC3339 format
+ Date string
+}
+type CLI struct {
+ Stdin ReadWriterWithStat
+ Stdout ReadWriterWithStat
+ Stderr ReadWriterWithStat
+
+ OsArgs []string
+
+ Release Release
+
+ isStdinPipe bool
+ isStdoutPipe bool
+ isStderrPipe bool
+
+ flags *flag.FlagSet
+ config Config
+
+ usageText bytes.Buffer
+}
+
+func (cli *CLI) Init() error {
+ var err error
+ cli.isStdinPipe, err = isPipe(cli.Stdin)
+ if err != nil {
+ return fmt.Errorf("error while checking stdin for is pipe: %w", err)
+ }
+ cli.isStdoutPipe, err = isPipe(cli.Stdout)
+ if err != nil {
+ return fmt.Errorf("error while checking stdout for is pipe: %w", err)
+ }
+ cli.isStderrPipe, err = isPipe(cli.Stderr)
+ if err != nil {
+ return fmt.Errorf("error while checking stderr for is pipe: %w", err)
+ }
+
+ cli.initFlags(cli.OsArgs[0])
+
+ err = cli.initUsageText()
+ if err != nil {
+ return fmt.Errorf("error while initializing the usage text: %w", err)
+ }
+
+ return nil
+}
+func (cli *CLI) Execute() {
+
+ warnings, err := cli.run()
+
+ for _, warning := range warnings {
+ cli.PrintWarn(warning)
+ }
+
+ if err == flag.ErrHelp {
+ cli.printUsage()
+
+ OsExiter(0)
+ return
+ } else if err != nil {
+ cli.PrintErr(err)
+
+ OsExiter(1) // General Error
+ return
+ } else {
+ OsExiter(0)
+ return
+ }
+}
+
+func (cli *CLI) run() ([]error, error) {
+
+ err := cli.parseFlags(cli.OsArgs[1:])
+ if err != nil {
+ return nil, err
+ }
+
+ if len(cli.config.args) != 0 {
+
+ return nil, NewCLIError(
+ fmt.Errorf("unknown arguments: %s", strings.Join(cli.config.args, " ")),
+ Paragraph("Here is how you can use the CLI:"),
+ CodeBlock(`echo "important" | html2markdown`),
+ )
+ }
+
+ if cli.config.version {
+ cli.printVersion()
+ return nil, nil
+ }
+
+ if !cli.isStdinPipe {
+ return nil, NewCLIError(
+ fmt.Errorf("the html input should be piped into the cli"),
+ Paragraph("Here is how you can use the CLI:"),
+ CodeBlock(`echo "important" | html2markdown`),
+ )
+ }
+
+ html, err := io.ReadAll(cli.Stdin)
+ if err != nil {
+ return nil, err
+ }
+
+ return cli.convert(html)
+}
diff --git a/cli/cmd/exec_test.go b/cli/cmd/exec_test.go
new file mode 100644
index 0000000..32d3328
--- /dev/null
+++ b/cli/cmd/exec_test.go
@@ -0,0 +1,319 @@
+package cmd
+
+import (
+ "bytes"
+ "fmt"
+ "io/fs"
+ "os"
+ "path/filepath"
+ "strings"
+ "testing"
+ "time"
+
+ "github.com/sebdah/goldie/v2"
+)
+
+func init() {
+ OsExiter = func(code int) {
+ fmt.Println("OS_EXITER_CALLED", code)
+ }
+}
+
+type MockFileInfo struct {
+ mode os.FileMode
+}
+
+func (info MockFileInfo) Name() string { return "" }
+func (info MockFileInfo) Size() int64 { return 1 }
+func (info MockFileInfo) Mode() os.FileMode { return info.mode }
+func (info MockFileInfo) ModTime() time.Time { return time.Now() }
+func (info MockFileInfo) IsDir() bool { return false }
+func (info MockFileInfo) Sys() interface{} { return nil }
+
+type FakeFile struct {
+ bytes.Buffer
+ mode os.FileMode
+}
+
+func (f FakeFile) Stat() (fs.FileInfo, error) {
+ return &MockFileInfo{mode: f.mode}, nil
+}
+
+const (
+ modePipe = fs.FileMode(33554864) // "prw-rw----"
+ modeTerminal = fs.FileMode(69206416) // "Dcrw--w----"
+)
+
+type CLIInput struct {
+ modeStdin os.FileMode
+ modeStdout os.FileMode
+ modeStderr os.FileMode
+
+ inputStdin []byte
+ inputArgs []string
+}
+
+func cliTester(t *testing.T, input CLIInput) {
+ if input.modeStdin == modeTerminal && input.inputStdin != nil {
+ t.Fatal("invalid test: cannot provide stdin without pipe mode")
+ }
+
+ stdin := &FakeFile{mode: input.modeStdin}
+ stdout := &FakeFile{mode: input.modeStdout}
+ stderr := &FakeFile{mode: input.modeStderr}
+
+ if input.inputStdin != nil {
+ stdin.Write(input.inputStdin)
+ }
+
+ release := Release{
+ Version: "2.3.4-test",
+ Commit: "ca82a6dff817ec66f44342007202690a93763949",
+ Date: "2024-08-18T13:03:43Z",
+ }
+
+ Run(stdin, stdout, stderr, input.inputArgs, release)
+
+ if len(stdout.Bytes()) == 0 && len(stderr.Bytes()) == 0 {
+ t.Fatal("neither stdout nor stderr have any content")
+ }
+
+ g := goldie.New(t)
+ g.Assert(t, filepath.Join(t.Name(), "stdout"), stdout.Bytes())
+ g.Assert(t, filepath.Join(t.Name(), "stderr"), stderr.Bytes())
+}
+
+func TestExecute(t *testing.T) {
+ testCases := []struct {
+ desc string
+ input CLIInput
+ }{
+
+ // - - - - - flag: version / help - - - - - //
+ {
+ desc: "[general] version terminal",
+
+ input: CLIInput{
+ modeStdin: modeTerminal,
+ modeStdout: modeTerminal,
+ modeStderr: modeTerminal,
+
+ inputArgs: []string{"html2markdown", "--version"},
+ },
+ },
+ {
+ desc: "[general] version pipe",
+
+ input: CLIInput{
+ modeStdin: modePipe,
+ modeStdout: modePipe,
+ modeStderr: modePipe,
+
+ inputArgs: []string{"html2markdown", "--version"},
+ },
+ },
+ {
+ desc: "[general] help terminal",
+
+ input: CLIInput{
+ modeStdin: modeTerminal,
+ modeStdout: modeTerminal,
+ modeStderr: modeTerminal,
+
+ inputArgs: []string{"html2markdown", "--help"},
+ },
+ },
+ {
+ desc: "[general] help pipe",
+
+ input: CLIInput{
+ modeStdin: modePipe,
+ modeStdout: modePipe,
+ modeStderr: modePipe,
+
+ inputArgs: []string{"html2markdown", "--help"},
+ },
+ },
+
+ // - - - - - no content - - - - - //
+ {
+ desc: "[general] no content",
+
+ input: CLIInput{
+ modeStdin: modeTerminal,
+ modeStdout: modeTerminal,
+ modeStderr: modeTerminal,
+
+ inputStdin: nil,
+ inputArgs: []string{"html2markdown"},
+ },
+ },
+
+ // - - - - - arguments - - - - - //
+ {
+ desc: "[argument unknown] version",
+
+ input: CLIInput{
+ modeStdin: modeTerminal,
+ modeStdout: modeTerminal,
+ modeStderr: modeTerminal,
+
+ inputArgs: []string{"html2markdown", `version`},
+ },
+ },
+ {
+ desc: "[argument unknown] html",
+
+ input: CLIInput{
+ modeStdin: modeTerminal,
+ modeStdout: modeTerminal,
+ modeStderr: modeTerminal,
+
+ inputArgs: []string{"html2markdown", `"text"`},
+ },
+ },
+ {
+ desc: "[argument unknown] long string",
+
+ input: CLIInput{
+ modeStdin: modeTerminal,
+ modeStdout: modeTerminal,
+ modeStderr: modeTerminal,
+
+ inputArgs: []string{"html2markdown", strings.Repeat("12456789", 40)},
+ },
+ },
+ {
+ desc: "[argument unknown] list of files",
+
+ input: CLIInput{
+ modeStdin: modeTerminal,
+ modeStdout: modeTerminal,
+ modeStderr: modeTerminal,
+
+ // The ** was treated as a file glob
+ inputArgs: []string{"html2markdown", "--opt-strong-delimiter", "CONTRIBUTING.md", "README.md", "SECURITY.md", "a.html", "b.html", "c.html", "d.html", "e.html", "f.html"},
+ },
+ },
+
+ // - - - - - flags - - - - - //
+ {
+ desc: "[flag unknown] with pipe",
+
+ input: CLIInput{
+ modeStdin: modePipe,
+ modeStdout: modePipe,
+ modeStderr: modePipe,
+
+ inputArgs: []string{"html2markdown", "--this-does-not-exist"},
+ },
+ },
+ {
+ desc: "[flag unknown] with terminal",
+
+ input: CLIInput{
+ modeStdin: modeTerminal,
+ modeStdout: modeTerminal,
+ modeStderr: modeTerminal,
+
+ inputArgs: []string{"html2markdown", "--this-does-not-exist"},
+ },
+ },
+
+ {
+ desc: "[flag misspelled] underscore",
+
+ input: CLIInput{
+ modeStdin: modePipe,
+ modeStdout: modePipe,
+ modeStderr: modePipe,
+
+ inputStdin: []byte("text"),
+ // Someone accidentally used underscores instead of dashes
+ inputArgs: []string{"html2markdown", "--opt_strong_delimiter="},
+ },
+ },
+
+ // - - - - - converting - - - - - //
+ {
+ desc: "[convert] strong default",
+
+ input: CLIInput{
+ modeStdin: modePipe,
+ modeStdout: modePipe,
+ modeStderr: modePipe,
+
+ inputStdin: []byte("text"),
+ inputArgs: []string{"html2markdown"},
+ },
+ },
+ {
+ desc: "[convert] strong equal underscore",
+
+ input: CLIInput{
+ modeStdin: modePipe,
+ modeStdout: modePipe,
+ modeStderr: modePipe,
+
+ inputStdin: []byte("text"),
+ // Note: We dont test the quoted version "__" since that is already unquoted by bash/go
+ inputArgs: []string{"html2markdown", `--opt-strong-delimiter=__`},
+ },
+ },
+ {
+ desc: "[convert] strong space underscore",
+
+ input: CLIInput{
+ modeStdin: modePipe,
+ modeStdout: modePipe,
+ modeStderr: modePipe,
+
+ inputStdin: []byte("text"),
+ inputArgs: []string{"html2markdown", `--opt-strong-delimiter`, `__`},
+ },
+ },
+
+ // - - - - - validation of options - - - - - //
+ {
+ desc: "[validation] no value",
+
+ input: CLIInput{
+ modeStdin: modePipe,
+ modeStdout: modePipe,
+ modeStderr: modePipe,
+
+ inputStdin: []byte("text"),
+ inputArgs: []string{"html2markdown", `--opt-strong-delimiter=`},
+ },
+ },
+ {
+ desc: "[validation] invalid value",
+
+ input: CLIInput{
+ modeStdin: modePipe,
+ modeStdout: modePipe,
+ modeStderr: modePipe,
+
+ inputStdin: []byte("text"),
+ inputArgs: []string{"html2markdown", `--opt-strong-delimiter=1234`},
+ },
+ },
+ {
+ desc: "[validation] discouraged value",
+
+ input: CLIInput{
+ modeStdin: modePipe,
+ modeStdout: modePipe,
+ modeStderr: modePipe,
+
+ inputStdin: []byte("text"),
+ inputArgs: []string{"html2markdown", `--opt-strong-delimiter=*`},
+ },
+ },
+ }
+ for _, tC := range testCases {
+ t.Run(tC.desc, func(t *testing.T) {
+ cliTester(t, tC.input)
+ })
+ }
+}
diff --git a/cli/cmd/flags.go b/cli/cmd/flags.go
new file mode 100644
index 0000000..11d0024
--- /dev/null
+++ b/cli/cmd/flags.go
@@ -0,0 +1,80 @@
+package cmd
+
+import (
+ "flag"
+ "fmt"
+ "io"
+ "strings"
+ "unicode"
+)
+
+type FlagString string
+
+func (a *FlagString) Scan(state fmt.ScanState, verb rune) error {
+ token, err := state.Token(true, func(r rune) bool {
+ return unicode.IsLetter(r) || r == '-'
+ })
+ if err != nil {
+ return err
+ }
+ *a = FlagString(token)
+ return nil
+}
+
+func flagStringSlice(elems *[]string) func(string) error {
+ return func(raw string) error {
+ values := strings.Split(raw, ",")
+
+ for _, val := range values {
+ val = strings.TrimSpace(val)
+ if val == "" {
+ continue
+ }
+
+ *elems = append(*elems, val)
+ }
+ return nil
+ }
+}
+
+func (cli *CLI) initFlags(progname string) {
+ cli.flags = flag.NewFlagSet(progname, flag.ContinueOnError)
+ cli.flags.SetOutput(io.Discard)
+
+ // - - - //
+
+ cli.flags.BoolVar(&cli.config.version, "version", false, "display the version")
+ cli.flags.BoolVar(&cli.config.version, "v", false, "display the version")
+
+ // cli.flags.BoolVar(&cli.config.help, "help", false, "display help")
+
+ cli.flags.StringVar(
+ &cli.config.strongDelimiter,
+ "opt-strong-delimiter",
+ "**",
+ `Make bold text. Should be indicated by two asterisks or two underscores?
+"**" or "__" (default: "**")`,
+ )
+
+ // cli.flags.StringVar(&cli.config.strongDelimiter, "opt-heading-style", "", "")
+ // cli.flags.StringVar(&cli.config.strongDelimiter, "opt-horizontal-rule", "", "")
+ // cli.flags.StringVar(&cli.config.strongDelimiter, "opt-bullet-list-marker", "", "")
+
+ // TODO: how to disable commonmark plugin?
+ // --plugin_commonmark=false
+ // --plugin.commonmark=false
+ // --no-plugin="cm" / --disable-plugin="cm"
+ // But what if we have conflicting flags???
+ cli.flags.Func("plugins", "which plugins should be enabled?", flagStringSlice(&cli.config.plugins))
+}
+
+func (cli *CLI) parseFlags(args []string) error {
+ err := cli.flags.Parse(args)
+ if err != nil {
+ return cli.categorizeFlagError(err)
+ }
+
+ cli.config.args = cli.flags.Args()
+
+ return nil
+}
diff --git a/cli/cmd/flags_categorize.go b/cli/cmd/flags_categorize.go
new file mode 100644
index 0000000..e49e3d1
--- /dev/null
+++ b/cli/cmd/flags_categorize.go
@@ -0,0 +1,68 @@
+package cmd
+
+import (
+ "flag"
+ "fmt"
+ "strings"
+ "unicode/utf8"
+
+ "github.com/agnivade/levenshtein"
+)
+
+const flagProvidedButNotDefinedErr = "flag provided but not defined: -"
+
+func formatFlag(name string) string {
+ if len(name) == 1 {
+ return "-" + name
+ }
+ return "--" + name
+}
+func (cli *CLI) getAlternativeFlag(unknownFlag string) string {
+ var closestDistance int = 10000
+ var closestFlag string
+
+ cli.flags.VisitAll(func(f *flag.Flag) {
+
+ distance := levenshtein.ComputeDistance(f.Name, unknownFlag)
+
+ if distance < closestDistance {
+ closestDistance = distance
+ closestFlag = f.Name
+ }
+ })
+
+ fmt.Printf("%q <> %q -> %d \n", unknownFlag, closestFlag, closestDistance)
+
+ if closestDistance >= utf8.RuneCountInString(unknownFlag) {
+ return ""
+ }
+ if closestDistance > 4 {
+ return ""
+ }
+ return closestFlag
+}
+func (cli *CLI) categorizeFlagError(err error) error {
+ if err == nil {
+ return nil
+ }
+
+ message := err.Error()
+
+ if strings.HasPrefix(message, flagProvidedButNotDefinedErr) {
+ flagName := strings.TrimPrefix(message, flagProvidedButNotDefinedErr)
+
+ err := fmt.Errorf("unknown flag: %s", formatFlag(flagName))
+
+ alternative := cli.getAlternativeFlag(flagName)
+ if alternative == "" {
+ return NewCLIError(err)
+ }
+
+ return NewCLIError(
+ err,
+ Paragraph(fmt.Sprintf("Did you mean %s instead?", formatFlag(alternative))),
+ )
+ }
+
+ return err
+}
diff --git a/cli/cmd/flags_test.go b/cli/cmd/flags_test.go
new file mode 100644
index 0000000..65bf230
--- /dev/null
+++ b/cli/cmd/flags_test.go
@@ -0,0 +1,47 @@
+package cmd
+
+import (
+ "reflect"
+ "testing"
+)
+
+func TestFlagStringSlice(t *testing.T) {
+ testCases := []struct {
+ desc string
+ inputs []string
+ expected []string
+ }{
+ {
+ desc: "simple flag",
+ inputs: []string{"a"},
+ expected: []string{"a"},
+ },
+ {
+ desc: "two flags",
+ inputs: []string{"a,b", "c"},
+ expected: []string{"a", "b", "c"},
+ },
+ {
+ desc: "with seperator",
+ inputs: []string{"a,", ",b"},
+ expected: []string{"a", "b"},
+ },
+ {
+ desc: "with spaces",
+ inputs: []string{"a, ,b", " ,c"},
+ expected: []string{"a", "b", "c"},
+ },
+ }
+ for _, tC := range testCases {
+ t.Run(tC.desc, func(t *testing.T) {
+ var result []string
+ for _, input := range tC.inputs {
+ flagStringSlice(&result)(input)
+ }
+
+ if !reflect.DeepEqual(result, tC.expected) {
+ t.Errorf("expected %v but got %v", tC.expected, result)
+ }
+ })
+ }
+}
diff --git a/cli/cmd/print.go b/cli/cmd/print.go
new file mode 100644
index 0000000..189e62f
--- /dev/null
+++ b/cli/cmd/print.go
@@ -0,0 +1,59 @@
+package cmd
+
+import (
+ "fmt"
+ "io"
+
+ "github.com/muesli/termenv"
+)
+
+type Printer interface {
+ Print(w io.Writer)
+}
+
+// - - - - - - - //
+
+type coloredBox struct {
+ prefix string
+ text string
+}
+
+func ColoredBox(prefix string, text string) Printer {
+ return &coloredBox{prefix, text}
+}
+
+func (p coloredBox) Print(w io.Writer) {
+ output := termenv.NewOutput(w)
+
+ prefix := output.String(p.prefix + ":").Background(termenv.ANSIRed).Foreground(termenv.ANSIBrightWhite).String()
+ message := output.String(p.text).Foreground(termenv.ANSIRed).String()
+
+ fmt.Fprintf(w, "%s %s\n", prefix, message)
+}
+
+// - - - - - - - //
+
+type paragraph struct {
+ text string
+}
+
+func Paragraph(text string) Printer {
+ return ¶graph{text}
+}
+func (p paragraph) Print(w io.Writer) {
+ fmt.Fprintln(w, p.text)
+}
+
+// - - - - - - - //
+
+type codeBlock struct {
+ code string
+}
+
+func CodeBlock(code string) Printer {
+ return &codeBlock{code}
+}
+func (cb codeBlock) Print(w io.Writer) {
+ // TODO: what about indenting multiline?
+ fmt.Fprintf(w, " %s\n", cb.code)
+}
diff --git a/cli/cmd/testdata/.gitattributes b/cli/cmd/testdata/.gitattributes
new file mode 100644
index 0000000..a8d2daa
--- /dev/null
+++ b/cli/cmd/testdata/.gitattributes
@@ -0,0 +1,4 @@
+
+# Leave the files untouched. Otherwise they might be
+# changed when cloning the repo on Windows...
+* -text
diff --git a/cli/cmd/testdata/TestExecute/[argument_unknown]_html/stderr.golden b/cli/cmd/testdata/TestExecute/[argument_unknown]_html/stderr.golden
new file mode 100644
index 0000000..48312bb
--- /dev/null
+++ b/cli/cmd/testdata/TestExecute/[argument_unknown]_html/stderr.golden
@@ -0,0 +1,7 @@
+
+error: unknown arguments: "text"
+
+Here is how you can use the CLI:
+
+ echo "important" | html2markdown
+
diff --git a/cli/cmd/testdata/TestExecute/[argument_unknown]_html/stdout.golden b/cli/cmd/testdata/TestExecute/[argument_unknown]_html/stdout.golden
new file mode 100644
index 0000000..e69de29
diff --git a/cli/cmd/testdata/TestExecute/[argument_unknown]_list_of_files/stderr.golden b/cli/cmd/testdata/TestExecute/[argument_unknown]_list_of_files/stderr.golden
new file mode 100644
index 0000000..9b86664
--- /dev/null
+++ b/cli/cmd/testdata/TestExecute/[argument_unknown]_list_of_files/stderr.golden
@@ -0,0 +1,7 @@
+
+error: unknown arguments: README.md SECURITY.md a.html b.html c.html d.html e.html f.html
+
+Here is how you can use the CLI:
+
+ echo "important" | html2markdown
+
diff --git a/cli/cmd/testdata/TestExecute/[argument_unknown]_list_of_files/stdout.golden b/cli/cmd/testdata/TestExecute/[argument_unknown]_list_of_files/stdout.golden
new file mode 100644
index 0000000..e69de29
diff --git a/cli/cmd/testdata/TestExecute/[argument_unknown]_long_string/stderr.golden b/cli/cmd/testdata/TestExecute/[argument_unknown]_long_string/stderr.golden
new file mode 100644
index 0000000..245fc1c
--- /dev/null
+++ b/cli/cmd/testdata/TestExecute/[argument_unknown]_long_string/stderr.golden
@@ -0,0 +1,7 @@
+
+error: unknown arguments: 12456789124567891245678912456789124567891245678912456789124567891245678912456789124567891245678912456789124567891245678912456789124567891245678912456789124567891245678912456789124567891245678912456789124567891245678912456789124567891245678912456789124567891245678912456789124567891245678912456789124567891245678912456789
+
+Here is how you can use the CLI:
+
+ echo "important" | html2markdown
+
diff --git a/cli/cmd/testdata/TestExecute/[argument_unknown]_long_string/stdout.golden b/cli/cmd/testdata/TestExecute/[argument_unknown]_long_string/stdout.golden
new file mode 100644
index 0000000..e69de29
diff --git a/cli/cmd/testdata/TestExecute/[argument_unknown]_version/stderr.golden b/cli/cmd/testdata/TestExecute/[argument_unknown]_version/stderr.golden
new file mode 100644
index 0000000..8070be6
--- /dev/null
+++ b/cli/cmd/testdata/TestExecute/[argument_unknown]_version/stderr.golden
@@ -0,0 +1,7 @@
+
+error: unknown arguments: version
+
+Here is how you can use the CLI:
+
+ echo "important" | html2markdown
+
diff --git a/cli/cmd/testdata/TestExecute/[argument_unknown]_version/stdout.golden b/cli/cmd/testdata/TestExecute/[argument_unknown]_version/stdout.golden
new file mode 100644
index 0000000..e69de29
diff --git a/cli/cmd/testdata/TestExecute/[convert]_strong_default/stderr.golden b/cli/cmd/testdata/TestExecute/[convert]_strong_default/stderr.golden
new file mode 100644
index 0000000..e69de29
diff --git a/cli/cmd/testdata/TestExecute/[convert]_strong_default/stdout.golden b/cli/cmd/testdata/TestExecute/[convert]_strong_default/stdout.golden
new file mode 100644
index 0000000..e5180c8
--- /dev/null
+++ b/cli/cmd/testdata/TestExecute/[convert]_strong_default/stdout.golden
@@ -0,0 +1 @@
+**text**
diff --git a/cli/cmd/testdata/TestExecute/[convert]_strong_equal_underscore/stderr.golden b/cli/cmd/testdata/TestExecute/[convert]_strong_equal_underscore/stderr.golden
new file mode 100644
index 0000000..e69de29
diff --git a/cli/cmd/testdata/TestExecute/[convert]_strong_equal_underscore/stdout.golden b/cli/cmd/testdata/TestExecute/[convert]_strong_equal_underscore/stdout.golden
new file mode 100644
index 0000000..25a055e
--- /dev/null
+++ b/cli/cmd/testdata/TestExecute/[convert]_strong_equal_underscore/stdout.golden
@@ -0,0 +1 @@
+__text__
diff --git a/cli/cmd/testdata/TestExecute/[convert]_strong_space_underscore/stderr.golden b/cli/cmd/testdata/TestExecute/[convert]_strong_space_underscore/stderr.golden
new file mode 100644
index 0000000..e69de29
diff --git a/cli/cmd/testdata/TestExecute/[convert]_strong_space_underscore/stdout.golden b/cli/cmd/testdata/TestExecute/[convert]_strong_space_underscore/stdout.golden
new file mode 100644
index 0000000..25a055e
--- /dev/null
+++ b/cli/cmd/testdata/TestExecute/[convert]_strong_space_underscore/stdout.golden
@@ -0,0 +1 @@
+__text__
diff --git a/cli/cmd/testdata/TestExecute/[flag_misspelled]_underscore/stderr.golden b/cli/cmd/testdata/TestExecute/[flag_misspelled]_underscore/stderr.golden
new file mode 100644
index 0000000..664e5bb
--- /dev/null
+++ b/cli/cmd/testdata/TestExecute/[flag_misspelled]_underscore/stderr.golden
@@ -0,0 +1,5 @@
+
+error: unknown flag: --opt_strong_delimiter
+
+Did you mean --opt-strong-delimiter instead?
+
diff --git a/cli/cmd/testdata/TestExecute/[flag_misspelled]_underscore/stdout.golden b/cli/cmd/testdata/TestExecute/[flag_misspelled]_underscore/stdout.golden
new file mode 100644
index 0000000..e69de29
diff --git a/cli/cmd/testdata/TestExecute/[flag_unknown]_with_pipe/stderr.golden b/cli/cmd/testdata/TestExecute/[flag_unknown]_with_pipe/stderr.golden
new file mode 100644
index 0000000..3043673
--- /dev/null
+++ b/cli/cmd/testdata/TestExecute/[flag_unknown]_with_pipe/stderr.golden
@@ -0,0 +1,3 @@
+
+error: unknown flag: --this-does-not-exist
+
diff --git a/cli/cmd/testdata/TestExecute/[flag_unknown]_with_pipe/stdout.golden b/cli/cmd/testdata/TestExecute/[flag_unknown]_with_pipe/stdout.golden
new file mode 100644
index 0000000..e69de29
diff --git a/cli/cmd/testdata/TestExecute/[flag_unknown]_with_terminal/stderr.golden b/cli/cmd/testdata/TestExecute/[flag_unknown]_with_terminal/stderr.golden
new file mode 100644
index 0000000..3043673
--- /dev/null
+++ b/cli/cmd/testdata/TestExecute/[flag_unknown]_with_terminal/stderr.golden
@@ -0,0 +1,3 @@
+
+error: unknown flag: --this-does-not-exist
+
diff --git a/cli/cmd/testdata/TestExecute/[flag_unknown]_with_terminal/stdout.golden b/cli/cmd/testdata/TestExecute/[flag_unknown]_with_terminal/stdout.golden
new file mode 100644
index 0000000..e69de29
diff --git a/cli/cmd/testdata/TestExecute/[general]_help_pipe/stderr.golden b/cli/cmd/testdata/TestExecute/[general]_help_pipe/stderr.golden
new file mode 100644
index 0000000..e69de29
diff --git a/cli/cmd/testdata/TestExecute/[general]_help_pipe/stdout.golden b/cli/cmd/testdata/TestExecute/[general]_help_pipe/stdout.golden
new file mode 100644
index 0000000..83daf0a
--- /dev/null
+++ b/cli/cmd/testdata/TestExecute/[general]_help_pipe/stdout.golden
@@ -0,0 +1,53 @@
+
+# html2markdown - convert html to markdown [version 2.3.4-test]
+
+Convert HTML to Markdown. Even works with entire websites!
+
+## Basics
+
+By default the "Commonmark" Plugin will be enabled. You can customize the options,
+for example changing the appearance of bold with --opt-strong-delimiter="__"
+
+Other Plugins can also be enabled. For example "GitHub Flavored Markdown" (GFM)
+extends Commonmark with more features.
+
+
+## Escaping
+
+Some characters have a special meaning in markdown. The library escapes these — if necessary.
+See the documentation for more info.
+
+
+## Security
+
+Once you convert this markdown *back* to HTML you need to be careful of malicious content.
+Use a HTML sanitizer before displaying the HTML in the browser!
+
+
+## Examples
+
+ echo "important" | html2markdown
+
+ curl --no-progress-meter http://example.com | html2markdown
+
+
+## Flags
+
+ -v, --version
+ show the version of html2markdown and exit
+
+ --help
+
+
+ --opt-strong-delimiter
+ Make bold text. Should be indicated by two asterisks or two underscores?
+ "**" or "__" (default: "**")
+
+ --plugins
+ which plugins should be enabled?
+
+
+
+For more information visit the documentation:
+https://github.com/Johanneskaufmann/html-to-markdown
+
diff --git a/cli/cmd/testdata/TestExecute/[general]_help_terminal/stderr.golden b/cli/cmd/testdata/TestExecute/[general]_help_terminal/stderr.golden
new file mode 100644
index 0000000..e69de29
diff --git a/cli/cmd/testdata/TestExecute/[general]_help_terminal/stdout.golden b/cli/cmd/testdata/TestExecute/[general]_help_terminal/stdout.golden
new file mode 100644
index 0000000..83daf0a
--- /dev/null
+++ b/cli/cmd/testdata/TestExecute/[general]_help_terminal/stdout.golden
@@ -0,0 +1,53 @@
+
+# html2markdown - convert html to markdown [version 2.3.4-test]
+
+Convert HTML to Markdown. Even works with entire websites!
+
+## Basics
+
+By default the "Commonmark" Plugin will be enabled. You can customize the options,
+for example changing the appearance of bold with --opt-strong-delimiter="__"
+
+Other Plugins can also be enabled. For example "GitHub Flavored Markdown" (GFM)
+extends Commonmark with more features.
+
+
+## Escaping
+
+Some characters have a special meaning in markdown. The library escapes these — if necessary.
+See the documentation for more info.
+
+
+## Security
+
+Once you convert this markdown *back* to HTML you need to be careful of malicious content.
+Use a HTML sanitizer before displaying the HTML in the browser!
+
+
+## Examples
+
+ echo "important" | html2markdown
+
+ curl --no-progress-meter http://example.com | html2markdown
+
+
+## Flags
+
+ -v, --version
+ show the version of html2markdown and exit
+
+ --help
+
+
+ --opt-strong-delimiter
+ Make bold text. Should be indicated by two asterisks or two underscores?
+ "**" or "__" (default: "**")
+
+ --plugins
+ which plugins should be enabled?
+
+
+
+For more information visit the documentation:
+https://github.com/Johanneskaufmann/html-to-markdown
+
diff --git a/cli/cmd/testdata/TestExecute/[general]_no_content/stderr.golden b/cli/cmd/testdata/TestExecute/[general]_no_content/stderr.golden
new file mode 100644
index 0000000..69d0a90
--- /dev/null
+++ b/cli/cmd/testdata/TestExecute/[general]_no_content/stderr.golden
@@ -0,0 +1,7 @@
+
+error: the html input should be piped into the cli
+
+Here is how you can use the CLI:
+
+ echo "important" | html2markdown
+
diff --git a/cli/cmd/testdata/TestExecute/[general]_no_content/stdout.golden b/cli/cmd/testdata/TestExecute/[general]_no_content/stdout.golden
new file mode 100644
index 0000000..e69de29
diff --git a/cli/cmd/testdata/TestExecute/[general]_version_pipe/stderr.golden b/cli/cmd/testdata/TestExecute/[general]_version_pipe/stderr.golden
new file mode 100644
index 0000000..e69de29
diff --git a/cli/cmd/testdata/TestExecute/[general]_version_pipe/stdout.golden b/cli/cmd/testdata/TestExecute/[general]_version_pipe/stdout.golden
new file mode 100644
index 0000000..56ec1e8
--- /dev/null
+++ b/cli/cmd/testdata/TestExecute/[general]_version_pipe/stdout.golden
@@ -0,0 +1,5 @@
+html2markdown
+
+GitVersion: 2.3.4-test
+GitCommit: ca82a6dff817ec66f44342007202690a93763949
+BuildDate: 2024-08-18T13:03:43Z
diff --git a/cli/cmd/testdata/TestExecute/[general]_version_terminal/stderr.golden b/cli/cmd/testdata/TestExecute/[general]_version_terminal/stderr.golden
new file mode 100644
index 0000000..e69de29
diff --git a/cli/cmd/testdata/TestExecute/[general]_version_terminal/stdout.golden b/cli/cmd/testdata/TestExecute/[general]_version_terminal/stdout.golden
new file mode 100644
index 0000000..56ec1e8
--- /dev/null
+++ b/cli/cmd/testdata/TestExecute/[general]_version_terminal/stdout.golden
@@ -0,0 +1,5 @@
+html2markdown
+
+GitVersion: 2.3.4-test
+GitCommit: ca82a6dff817ec66f44342007202690a93763949
+BuildDate: 2024-08-18T13:03:43Z
diff --git a/cli/cmd/testdata/TestExecute/[validation]_discouraged_value/stderr.golden b/cli/cmd/testdata/TestExecute/[validation]_discouraged_value/stderr.golden
new file mode 100644
index 0000000..c043047
--- /dev/null
+++ b/cli/cmd/testdata/TestExecute/[validation]_discouraged_value/stderr.golden
@@ -0,0 +1,3 @@
+
+error: invalid value for --opt-strong-delimiter="*" must be exactly 2 characters of "**" or "__"
+
diff --git a/cli/cmd/testdata/TestExecute/[validation]_discouraged_value/stdout.golden b/cli/cmd/testdata/TestExecute/[validation]_discouraged_value/stdout.golden
new file mode 100644
index 0000000..e69de29
diff --git a/cli/cmd/testdata/TestExecute/[validation]_invalid_value/stderr.golden b/cli/cmd/testdata/TestExecute/[validation]_invalid_value/stderr.golden
new file mode 100644
index 0000000..4cf6826
--- /dev/null
+++ b/cli/cmd/testdata/TestExecute/[validation]_invalid_value/stderr.golden
@@ -0,0 +1,3 @@
+
+error: invalid value for --opt-strong-delimiter="1234" must be exactly 2 characters of "**" or "__"
+
diff --git a/cli/cmd/testdata/TestExecute/[validation]_invalid_value/stdout.golden b/cli/cmd/testdata/TestExecute/[validation]_invalid_value/stdout.golden
new file mode 100644
index 0000000..e69de29
diff --git a/cli/cmd/testdata/TestExecute/[validation]_no_value/stderr.golden b/cli/cmd/testdata/TestExecute/[validation]_no_value/stderr.golden
new file mode 100644
index 0000000..e69de29
diff --git a/cli/cmd/testdata/TestExecute/[validation]_no_value/stdout.golden b/cli/cmd/testdata/TestExecute/[validation]_no_value/stdout.golden
new file mode 100644
index 0000000..e5180c8
--- /dev/null
+++ b/cli/cmd/testdata/TestExecute/[validation]_no_value/stdout.golden
@@ -0,0 +1 @@
+**text**
diff --git a/cli/cmd/util_pipe.go b/cli/cmd/util_pipe.go
new file mode 100644
index 0000000..769e1a7
--- /dev/null
+++ b/cli/cmd/util_pipe.go
@@ -0,0 +1,25 @@
+package cmd
+
+import (
+ "io"
+ "io/fs"
+ "os"
+)
+
+type ReadWriterWithStat interface {
+ io.ReadWriter
+
+ Stat() (fs.FileInfo, error)
+}
+
+func isPipe(f ReadWriterWithStat) (bool, error) {
+ stat, err := f.Stat()
+ if err != nil {
+ return false, err
+ }
+
+ if stat.Mode()&os.ModeCharDevice == 0 {
+ return true, nil
+ }
+ return false, nil
+}
diff --git a/cli/main.go b/cli/main.go
new file mode 100644
index 0000000..cc9dc8e
--- /dev/null
+++ b/cli/main.go
@@ -0,0 +1,30 @@
+package main
+
+import (
+ "os"
+
+ "github.com/JohannesKaufmann/html-to-markdown/v2/cli/cmd"
+)
+
+var (
+ // These are set by goreleaser:
+ version = "dev"
+ commit = "none"
+ date = "unknown"
+)
+
+func main() {
+ release := cmd.Release{
+ Version: version,
+ Commit: commit,
+ Date: date,
+ }
+
+ cmd.Run(
+ os.Stdin,
+ os.Stdout,
+ os.Stderr,
+ os.Args,
+ release,
+ )
+}
diff --git a/collapse/collapse.go b/collapse/collapse.go
new file mode 100644
index 0000000..58a81fb
--- /dev/null
+++ b/collapse/collapse.go
@@ -0,0 +1,187 @@
+/*
+
+The function to collapse whitespace was adapted from the "turndown" library by Dom Christie,
+which was adapted from the "collapse-whitespace" library by Luc Thevenard.
+
+It was ported from Javascript to Golang by Johannes Kaufmann for the use in the "html-to-markdown" library.
+To increase performance the use of regex was replaced by custom code.
+
+https://github.com/wooorm/collapse-white-space
+https://github.com/mixmark-io/turndown
+https://github.com/JohannesKaufmann/html-to-markdown
+
+-----------
+
+MIT License
+
+Copyright (c) 2017 Dom Christie
+Copyright (c) 2014 Luc Thevenard
+Copyright (c) 2018 Johannes Kaufmann
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+
+*/
+
+// collapse can collapse whitespace in html elements.
+//
+// It is a port from the Javascript library "turndown" to Golang.
+package collapse
+
+import (
+ "strings"
+
+ "github.com/JohannesKaufmann/dom"
+ "golang.org/x/net/html"
+)
+
+// Note: Originally in the javascript version, this just checked for "pre".
+// I changed it, to also return true for "code"
+func isPreOrCode(node *html.Node) bool {
+ name := dom.NodeName(node)
+
+ return name == "pre" || name == "code"
+}
+
+func next(prev *html.Node, current *html.Node) *html.Node {
+ if (prev != nil && prev.Parent == current) || isPreOrCode(current) {
+ if current.NextSibling != nil {
+ return current.NextSibling
+ }
+
+ return current.Parent
+ }
+
+ if current.FirstChild != nil {
+ return current.FirstChild
+ }
+ if current.NextSibling != nil {
+ return current.NextSibling
+ }
+
+ return current.Parent
+}
+
+var blockElements = []string{
+ "address", "article", "aside", "audio", "blockquote", "body", "canvas", "center", "dd", "dir", "div", "dl", "dt", "fieldset", "figcaption", "figure", "footer", "form", "frameset", "h1", "h2", "h3", "h4", "h5", "h6", "header", "hgroup", "hr", "html", "isindex", "li", "main", "menu", "nav", "noframes", "noscript", "ol", "output", "p", "pre", "section", "table", "tbody", "td", "tfoot", "th", "thead", "tr", "ul",
+}
+
+var voidElements = []string{
+ // Note: Compared to the javascript implementation, I removed "source"
+ "area", "base", "br", "col", "command", "embed", "hr", "img", "input", "keygen", "link", "meta", "param" /* "source, "*/, "track", "wbr",
+}
+
+func isBlock(node *html.Node) bool {
+ name := dom.NodeName(node)
+
+ for _, elem := range blockElements {
+ if elem == name {
+ return true
+ }
+ }
+ return false
+}
+func isVoid(node *html.Node) bool {
+ name := dom.NodeName(node)
+
+ for _, elem := range voidElements {
+ if elem == name {
+ return true
+ }
+ }
+ return false
+}
+
+func remove(node *html.Node) *html.Node {
+ next := node.NextSibling
+ if next == nil {
+ next = node.Parent
+ }
+
+ node.Parent.RemoveChild(node)
+
+ return next
+
+}
+
+func Collapse(element *html.Node) {
+ if element.FirstChild == nil || isPreOrCode(element) {
+ return
+ }
+
+ var prevText *html.Node = nil
+ var keepLeadingWs = false
+
+ var prev *html.Node = nil
+ var node = next(prev, element)
+
+ for node != element {
+ if node.Type == html.TextNode /* node.nodeType == 4 */ { // Node.TEXT_NODE or Node.CDATA_SECTION_NODE
+ var text = replaceAnyWhitespaceWithSpace(node.Data)
+
+ if (prevText == nil || strings.HasSuffix(prevText.Data, " ")) &&
+ !keepLeadingWs && text[0] == ' ' {
+ text = text[1:]
+ }
+
+ // `text` might be empty at this point.
+ if text == "" {
+ node = remove(node)
+ continue
+ }
+
+ node.Data = text
+
+ prevText = node
+ } else if node.Type == html.ElementNode { // Node.ELEMENT_NODE
+ if isBlock(node) || dom.NodeName(node) == "br" {
+ if prevText != nil {
+ prevText.Data = strings.TrimSuffix(prevText.Data, " ")
+ }
+
+ prevText = nil
+ keepLeadingWs = false
+ } else if isVoid(node) || isPreOrCode(node) || node.Data == "code" {
+ // Avoid trimming space around non-block, non-BR void elements and inline PRE.
+ prevText = nil
+ keepLeadingWs = true
+ } else if prevText != nil {
+ // Drop protection if set previously.
+ keepLeadingWs = false
+ }
+ } else if node.Type == html.CommentNode {
+ // TODO: Is this enough to keep the comments? Does this cause other problems?
+ } else {
+ // E.g. DoctypeNode
+
+ node = remove(node)
+ continue
+ }
+
+ var nextNode = next(prev, node)
+ prev = node
+ node = nextNode
+ }
+
+ if prevText != nil {
+ prevText.Data = strings.TrimSuffix(prevText.Data, " ")
+ if prevText.Data == "" {
+ remove(prevText)
+ }
+ }
+}
diff --git a/collapse/collapse_test.go b/collapse/collapse_test.go
new file mode 100644
index 0000000..4998e85
--- /dev/null
+++ b/collapse/collapse_test.go
@@ -0,0 +1,289 @@
+package collapse
+
+import (
+ "bytes"
+ "strings"
+ "testing"
+
+ "golang.org/x/net/html"
+)
+
+func getBody(doc *html.Node) *html.Node {
+ var body *html.Node
+
+ var finder func(*html.Node)
+ finder = func(node *html.Node) {
+ if node.Type == html.ElementNode && node.Data == "body" {
+ body = node
+ return
+ }
+ for child := node.FirstChild; child != nil; child = child.NextSibling {
+ finder(child)
+ }
+ }
+ finder(doc)
+
+ return body
+}
+
+func TestCollapse_DocType(t *testing.T) {
+ // The DOCTYPE gets removed
+ input := ``
+
+ doc, err := html.Parse(strings.NewReader(input))
+ if err != nil {
+ t.Error(err)
+ }
+
+ Collapse(doc)
+
+ var buf bytes.Buffer
+ err = html.Render(&buf, doc)
+ if err != nil {
+ t.Error(err)
+ }
+
+ expected := ``
+ if buf.String() != expected {
+ t.Errorf("expected %q but got %q", expected, buf.String())
+ }
+}
+
+func TestCollapse_NoFirstChild(t *testing.T) {
+ boldNode := &html.Node{
+ Type: html.ElementNode,
+ Data: "strong",
+ }
+
+ Collapse(boldNode)
+
+ var buf bytes.Buffer
+ err := html.Render(&buf, boldNode)
+ if err != nil {
+ t.Error(err)
+ }
+
+ expected := ``
+ if buf.String() != expected {
+ t.Errorf("expected %q but got %q", expected, buf.String())
+ }
+}
+
+func TestCollapse_StartWithCode(t *testing.T) {
+ textNode := &html.Node{
+ Type: html.TextNode,
+ Data: " text ",
+ }
+ codeNode := &html.Node{
+ Type: html.ElementNode,
+ Data: "code",
+ }
+ codeNode.AppendChild(textNode)
+
+ Collapse(codeNode)
+
+ var buf bytes.Buffer
+ err := html.Render(&buf, codeNode)
+ if err != nil {
+ t.Error(err)
+ }
+
+ expected := ` text `
+ if buf.String() != expected {
+ t.Errorf("expected %q but got %q", expected, buf.String())
+ }
+}
+
+func TestCollapse_TwoTextNodes(t *testing.T) {
+ node1 := &html.Node{
+ Type: html.ElementNode,
+ Data: "span",
+ }
+
+ node2 := &html.Node{
+ Type: html.TextNode,
+ Data: " a ",
+ }
+ node3 := &html.Node{
+ Type: html.TextNode,
+ Data: " b ",
+ }
+ node1.AppendChild(node2)
+ node1.AppendChild(node3)
+
+ Collapse(node1)
+
+ var buf bytes.Buffer
+ err := html.Render(&buf, node1)
+ if err != nil {
+ t.Error(err)
+ }
+
+ expected := `a b`
+ if buf.String() != expected {
+ t.Errorf("expected %q but got %q", expected, buf.String())
+ }
+}
+
+func TestCollapse_LastTextIsEmpty(t *testing.T) {
+ node1 := &html.Node{
+ Type: html.ElementNode,
+ Data: "span",
+ }
+
+ node2 := &html.Node{
+ Type: html.TextNode,
+ Data: "text",
+ }
+ node3 := &html.Node{
+ Type: html.TextNode,
+ Data: " ",
+ }
+ node1.AppendChild(node2)
+ node1.AppendChild(node3)
+
+ Collapse(node1)
+
+ var buf bytes.Buffer
+ err := html.Render(&buf, node1)
+ if err != nil {
+ t.Error(err)
+ }
+
+ expected := `text`
+ if buf.String() != expected {
+ t.Errorf("expected %q but got %q", expected, buf.String())
+ }
+}
+
+func TestCollapse_Table(t *testing.T) {
+ runs := []struct {
+ desc string
+ input string
+ expected string
+ }{
+ {
+ desc: "basic example",
+ input: "
diff --git a/plugin/commonmark/testdata/GoldenFiles/list.out.md b/plugin/commonmark/testdata/GoldenFiles/list.out.md
new file mode 100644
index 0000000..ef9d811
--- /dev/null
+++ b/plugin/commonmark/testdata/GoldenFiles/list.out.md
@@ -0,0 +1,153 @@
+A paragraph
+
+- 1
+- 2
+- - 3.1
+ - 3.2
+- 4 Before
+
+ - 4.1
+ - 4.2
+- - 5.1
+
+ 5 After
+- 6 Before
+
+ 6 also Before
+
+ - 6A.1
+
+ 6 Between
+
+ - 6B.1
+
+ 6 After
+
+ 6 also After
+- 7
+
+* * *
+
+And also other lists...
+
+- First
+- Someone once said:
+
+ > My famous quote
+
+ \- someone
+
+
+
+09. Nine
+10. Ten
+11. 111. Eleven.A
+ 112. Eleven.B
+12. Someone once said:
+
+ > My famous quote
+
+ \- someone
+13. Thirteen
+
+List Item without Container
+
+* * *
+
+
+
+1. one
+2. two
+
+* * *
+
+
+
+8. a
+9. b
+
+
+
+
+
+09. a
+10. b
+
+* * *
+
+- Before text after
+- Before [text](/page) after
+
+* * *
+
+- A double `**` [can open strong emphasis](/page)
+
+* * *
+
+- List 1
+
+
+
+- List 2
+
+
+
+
+
+- List 3
+
+
+
+- List 4
+
+text between
+
+- List 5
+
+
+
+- List 6
+
+
+
+- List 7
+
+* * *
+
+- - List 1
+
+
+
+ - List 2
+
+
+
+ - List 3
+
+
+
+- Start Line
+
+ End Line
+
+
+
+1\.
+
+\-
+
+\+
+
+\*
+
+* * *
+
+1\. not a list
+
+\- not a list
+
+\+ not a list
+
+\* not a list
\ No newline at end of file
diff --git a/plugin/commonmark/testdata/GoldenFiles/metadata.in.html b/plugin/commonmark/testdata/GoldenFiles/metadata.in.html
new file mode 100644
index 0000000..ac5b64e
--- /dev/null
+++ b/plugin/commonmark/testdata/GoldenFiles/metadata.in.html
@@ -0,0 +1,55 @@
+
+
+
+
+
+ Page Title
+
+
+
Heading A
+
+
+
+
+
Heading B
+
+
+
+
\a \* \\
+
+
+ .<name>
+ .< name >.
+ <name>
+
+
+ 2 > 1
+ 1 < 2
+
+ A & B
+ A & B
+ ö
+
+
+
+ *not emphasized*
+ <br/> not a tag
+ [not a link](/foo)
+ `not code`
+ 1. not a list
+ * not a list
+ # not a heading
+ [foo]: /url "not a reference"
+ ö not a character entity
+
+
+
+
+ Start Line
+
+
+
+ End Line
+
+
+
diff --git a/plugin/commonmark/testdata/GoldenFiles/metadata.out.md b/plugin/commonmark/testdata/GoldenFiles/metadata.out.md
new file mode 100644
index 0000000..8d20176
--- /dev/null
+++ b/plugin/commonmark/testdata/GoldenFiles/metadata.out.md
@@ -0,0 +1,29 @@
+#### Heading A
+
+#### Heading B
+
+* * *
+
+\\a \\* \\\\
+
+.<name> .< name >. <name>
+
+2 > 1
+1 < 2
+A & B
+A & B
+ö
+
+\*not emphasized*
+<br/> not a tag
+\[not a link](/foo)
+\`not code\`
+1\. not a list
+\* not a list
+\# not a heading
+\[foo]: /url "not a reference"
+ö not a character entity
+
+Start Line
+
+End Line
\ No newline at end of file
diff --git a/plugin/commonmark/validation.go b/plugin/commonmark/validation.go
new file mode 100644
index 0000000..2b766a9
--- /dev/null
+++ b/plugin/commonmark/validation.go
@@ -0,0 +1,99 @@
+package commonmark
+
+import (
+ "fmt"
+ "strings"
+)
+
+func contains(values []string, searchVal string) bool {
+ for _, val := range values {
+ if val == searchVal {
+ return true
+ }
+ }
+ return false
+}
+
+type ValidateConfigError struct {
+ Key string
+ Value string
+
+ // By default is "Key:Value" but can be
+ // overriden to e.g. "--key=value"
+ KeyWithValue string
+
+ patternDescription string
+}
+
+func (e *ValidateConfigError) setDefaultKeyWithValue() {
+ e.KeyWithValue = fmt.Sprintf("%s:%q", e.Key, e.Value)
+}
+func (e *ValidateConfigError) Error() string {
+ if e.KeyWithValue == "" {
+ e.setDefaultKeyWithValue()
+ }
+
+ return fmt.Sprintf("invalid value for %s must be %s", e.KeyWithValue, e.patternDescription)
+}
+
+func validateConfig(cfg *config) error {
+ if strings.Count(cfg.EmDelimiter, "_") != 1 && strings.Count(cfg.EmDelimiter, "*") != 1 {
+ return &ValidateConfigError{
+ Key: "EmDelimiter",
+ Value: cfg.EmDelimiter,
+ patternDescription: `exactly 1 character of "*" or "_"`,
+ }
+ }
+ if strings.Count(cfg.StrongDelimiter, "_") != 2 && strings.Count(cfg.StrongDelimiter, "*") != 2 {
+ return &ValidateConfigError{
+ Key: "StrongDelimiter",
+ Value: cfg.StrongDelimiter,
+ patternDescription: `exactly 2 characters of "**" or "__"`,
+ }
+ }
+
+ if strings.Count(cfg.HorizontalRule, "*") < 3 &&
+ strings.Count(cfg.HorizontalRule, "_") < 3 &&
+ strings.Count(cfg.HorizontalRule, "-") < 3 {
+ return &ValidateConfigError{
+ Key: "HorizontalRule",
+ Value: cfg.HorizontalRule,
+ patternDescription: `at least 3 characters of "*", "_" or "-"`,
+ }
+ }
+
+ if !contains([]string{"-", "+", "*"}, cfg.BulletListMarker) {
+ return &ValidateConfigError{
+ Key: "BulletListMarker",
+ Value: cfg.BulletListMarker,
+ patternDescription: `one of "-", "+" or "*"`,
+ }
+ }
+
+ if !contains([]string{"```", "~~~"}, cfg.CodeBlockFence) {
+ return &ValidateConfigError{
+ Key: "CodeBlockFence",
+ Value: cfg.CodeBlockFence,
+ patternDescription: "one of \"```\" or \"~~~\"",
+ }
+ }
+
+ if !contains([]string{"atx", "setext"}, string(cfg.HeadingStyle)) {
+ return &ValidateConfigError{
+ Key: "HeadingStyle",
+ Value: string(cfg.HeadingStyle),
+ patternDescription: `one of "atx" or "setext"`,
+ }
+ }
+
+ possibleLinkStyles := []string{string(LinkInlined), string(LinkReferencedIndex), string(LinkReferencedShort)}
+ if !contains(possibleLinkStyles, string(cfg.LinkStyle)) {
+ return &ValidateConfigError{
+ Key: "LinkStyle",
+ Value: string(cfg.LinkStyle),
+ patternDescription: `one of "inlined", "referenced_index" or "referenced_short"`,
+ }
+ }
+
+ return nil
+}
diff --git a/plugin/commonmark/validation_test.go b/plugin/commonmark/validation_test.go
new file mode 100644
index 0000000..1d1f12b
--- /dev/null
+++ b/plugin/commonmark/validation_test.go
@@ -0,0 +1,88 @@
+package commonmark
+
+import (
+ "fmt"
+ "testing"
+)
+
+func TestValidateConfig_Empty(t *testing.T) {
+ cfg := fillInDefaultConfig(&config{})
+ if cfg.HeadingStyle != "atx" {
+ t.Error("the config value was not filled with the default value")
+ }
+
+ err := validateConfig(&cfg)
+ if err != nil {
+ t.Errorf("expected no error but got %+v", err)
+ }
+}
+func TestValidateConfig_Success(t *testing.T) {
+ cfg := fillInDefaultConfig(&config{
+ HeadingStyle: "setext",
+ })
+ if cfg.HeadingStyle != "setext" {
+ t.Error("the config value was overridden")
+ }
+
+ err := validateConfig(&cfg)
+ if err != nil {
+ t.Errorf("expected no error but got %+v", err)
+ }
+}
+func TestValidateConfig_RandomValue(t *testing.T) {
+ cfg := fillInDefaultConfig(&config{
+ HeadingStyle: "random",
+ })
+
+ err := validateConfig(&cfg)
+ if err == nil {
+ t.Error("expected an error")
+ }
+ e, ok := err.(*ValidateConfigError)
+ if !ok {
+ t.Error("expected an error of type ValidateConfigError")
+ }
+ if e.Key != "HeadingStyle" {
+ t.Errorf("expected a different value for 'key' but got %q", e.Key)
+ }
+ if e.Value != "random" {
+ t.Errorf("expected a different value for 'actual' but got %q", e.Value)
+ }
+
+ formatted := err.Error()
+ if formatted != "invalid value for HeadingStyle:\"random\" must be one of \"atx\" or \"setext\"" {
+ t.Errorf("expected a different formatted message but got %q", formatted)
+ }
+}
+
+func TestValidateConfig_KeyWithValue(t *testing.T) {
+ cfg := fillInDefaultConfig(&config{
+ StrongDelimiter: "*",
+ })
+
+ err := validateConfig(&cfg)
+ if err == nil {
+ t.Error("expected an error")
+ }
+ e, ok := err.(*ValidateConfigError)
+ if !ok {
+ t.Fatal("expected an error of type ValidateConfigError")
+ }
+
+ // The default error message for the golang api
+ formatted1 := err.Error()
+ expected1 := `invalid value for StrongDelimiter:"*" must be exactly 2 characters of "**" or "__"`
+ if formatted1 != expected1 {
+ t.Errorf("expected a different formatted message but got %q", formatted1)
+ }
+
+ // The error message for the cli
+ if e.Key == "StrongDelimiter" {
+ e.KeyWithValue = fmt.Sprintf("--%s=%q", "strong_delimiter", e.Value)
+ }
+ formatted2 := err.Error()
+ expected2 := `invalid value for --strong_delimiter="*" must be exactly 2 characters of "**" or "__"`
+ if formatted2 != expected2 {
+ t.Errorf("expected a different formatted message but got %q", formatted2)
+ }
+}
diff --git a/plugin/strikethrough/strikethrough.go b/plugin/strikethrough/strikethrough.go
new file mode 100644
index 0000000..bf0a8a1
--- /dev/null
+++ b/plugin/strikethrough/strikethrough.go
@@ -0,0 +1,103 @@
+package strikethrough
+
+import (
+ "bytes"
+ "unicode"
+
+ "github.com/JohannesKaufmann/dom"
+ "github.com/JohannesKaufmann/html-to-markdown/v2/converter"
+ "github.com/JohannesKaufmann/html-to-markdown/v2/internal/domutils"
+ "github.com/JohannesKaufmann/html-to-markdown/v2/internal/escape"
+ "github.com/JohannesKaufmann/html-to-markdown/v2/internal/textutils"
+ "golang.org/x/net/html"
+)
+
+type option func(p *strikethroughPlugin)
+
+func WithDelimiter(delimiter string) option {
+ return func(p *strikethroughPlugin) {
+ p.delimiter = delimiter
+ }
+}
+
+type strikethroughPlugin struct {
+ delimiter string
+}
+
+// Strikethrough converts ``, ``, and `` elements
+func NewStrikethroughPlugin(opts ...option) converter.Plugin {
+ plugin := &strikethroughPlugin{}
+ for _, opt := range opts {
+ opt(plugin)
+ }
+
+ if plugin.delimiter == "" {
+ plugin.delimiter = "~~"
+ }
+
+ return plugin
+}
+
+func (s *strikethroughPlugin) Init(conv *converter.Converter) error {
+ conv.Register.PreRenderer(s.handlePreRender, converter.PriorityStandard)
+
+ conv.Register.EscapedChar('~')
+ conv.Register.UnEscaper(s.handleUnEscapers, converter.PriorityStandard)
+
+ conv.Register.Renderer(s.handleRender, converter.PriorityStandard)
+
+ return nil
+}
+
+func (s *strikethroughPlugin) handlePreRender(ctx converter.Context, doc *html.Node) {
+ domutils.RemoveRedundant(doc, nameIsBothStrikethough)
+ domutils.MergeAdjacent(doc, nameIsStrikethough)
+}
+
+func (s *strikethroughPlugin) handleUnEscapers(chars []byte, index int) int {
+ if chars[index] != '~' {
+ return -1
+ }
+
+ next := escape.GetNextAsRune(chars, index)
+
+ nextIsWhitespace := unicode.IsSpace(next) || next == 0
+ if nextIsWhitespace {
+ // "not followed by Unicode whitespace"
+ return -1
+ }
+
+ return 1
+}
+
+func nameIsStrikethough(node *html.Node) bool {
+ name := dom.NodeName(node)
+
+ return name == "del" || name == "s" || name == "strike"
+}
+func nameIsBothStrikethough(a *html.Node, b *html.Node) bool {
+ return nameIsStrikethough(a) && nameIsStrikethough(b)
+}
+
+func (s strikethroughPlugin) handleRender(ctx converter.Context, w converter.Writer, n *html.Node) converter.RenderStatus {
+ if nameIsStrikethough(n) {
+ return s.renderStrikethrough(ctx, w, n)
+ }
+
+ return converter.RenderTryNext
+}
+func (s strikethroughPlugin) renderStrikethrough(ctx converter.Context, w converter.Writer, n *html.Node) converter.RenderStatus {
+ var buf bytes.Buffer
+ ctx.RenderChildNodes(ctx, &buf, n)
+
+ content := buf.Bytes()
+
+ // If there is a newline character between the start and end delimiter
+ // the delimiters won't be recognized. Either we remove all newline characters
+ // OR on _every_ line we put start & end delimiters.
+ content = textutils.DelimiterForEveryLine(content, []byte(s.delimiter))
+
+ w.Write(content)
+
+ return converter.RenderSuccess
+}
diff --git a/plugin/strikethrough/strikethrough_test.go b/plugin/strikethrough/strikethrough_test.go
new file mode 100644
index 0000000..1698383
--- /dev/null
+++ b/plugin/strikethrough/strikethrough_test.go
@@ -0,0 +1,95 @@
+package strikethrough_test
+
+import (
+ "bytes"
+ "testing"
+
+ "github.com/JohannesKaufmann/html-to-markdown/v2/converter"
+ "github.com/JohannesKaufmann/html-to-markdown/v2/internal/tester"
+ "github.com/JohannesKaufmann/html-to-markdown/v2/plugin/commonmark"
+ "github.com/JohannesKaufmann/html-to-markdown/v2/plugin/strikethrough"
+)
+
+func TestNewStrikethroughPlugin(t *testing.T) {
+ runs := []struct {
+ desc string
+ input string
+ expected string
+ }{
+ {
+ desc: "simple",
+ input: `