Skip to content

Commit

Permalink
convert zettelkasten bbcode to markdown
Browse files Browse the repository at this point in the history
closes #14
  • Loading branch information
marph91 committed Sep 23, 2024
1 parent 93fcc67 commit 5c15c97
Show file tree
Hide file tree
Showing 3 changed files with 132 additions and 7 deletions.
12 changes: 9 additions & 3 deletions docs/formats/zettelkasten.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,18 @@ This page describes how to convert notes from Zettelkasten to Markdown.

## Instructions

Zettelkasten supports Markdown export. However, it doesn't convert everything and the note links and attachments get lost. Most data can be preserved, when using the original `.zkn3` location in the filesystem without any export.

1. [Install jimmy](../index.md#installation)
2. Convert to Markdown. Example: `jimmy-cli-linux test_zettelkasten.zkn3 --format zettelkasten`
3. [Import to your app](../import_instructions.md)

## Import Structure

Zettelkasten supports Markdown export. However, it doesn't convert everything and the note links and attachments get lost. Most data can be preserved, when using the original `.zkn3` location in the filesystem without any export.

- Each zettel is converted to a separate note.
- The note body of Zettelkasten is converted from [BBCode](https://en.wikipedia.org/wiki/BBCode) to Markdown.
- Resources and attachments are converted, if the original folders are next to the `.zkn3` file. I. e. `img` for images and `attachments` for attachments.

## Known Limitations

The note body is still formatted in [BBCode](https://en.wikipedia.org/wiki/BBCode) until a good conversion path is available (see [github issue](https://github.com/marph91/jimmy/issues/14)).
Some formatting is not converted, because it can't be expressed in pure Markdown (e. g. alignment).
1 change: 1 addition & 0 deletions requirements/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
bbcode==1.1.0
beautifulsoup4==4.12.3
enlighten==1.12.4
markdown==3.7
Expand Down
126 changes: 122 additions & 4 deletions src/formats/zettelkasten.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,110 @@
from pathlib import Path
import xml.etree.ElementTree as ET # noqa: N817

import bbcode

import common
import converter
import intermediate_format as imf


def bbcode_to_markdown(
bbcode_str: str,
) -> tuple[str, list[imf.NoteLink], list[imf.Resource]]:
note_links = []
images = []

parser = bbcode.Parser()
parser.add_simple_formatter("h1", "# %(value)s")
parser.add_simple_formatter("h2", "## %(value)s")
parser.add_simple_formatter("br", "\n", standalone=True)
parser.add_simple_formatter("q", "> %(value)s")
parser.add_simple_formatter("code", "\n```\n%(value)s\n```")

# left, right aligned, centered, justified - not supported
parser.add_simple_formatter("al", "%(value)s")
parser.add_simple_formatter("ar", "%(value)s")
parser.add_simple_formatter("c", "%(value)s")
parser.add_simple_formatter("ab", "%(value)s")

# text formatting
parser.add_simple_formatter("f", "**%(value)s**")
parser.add_simple_formatter("k", "*%(value)s*")
parser.add_simple_formatter("u", "++%(value)s++")
parser.add_simple_formatter("d", "~~%(value)s~~")
parser.add_simple_formatter("qm", '"%(value)s"')
parser.add_simple_formatter("sub", "~%(value)s~")
parser.add_simple_formatter("sup", "^%(value)s^")

# colored -> bold
parser.add_simple_formatter("h", "**%(value)s**")

# forms
def _render_form(name, value, options, parent, context):
return " ".join([f"`{key}={value}`" for key, value in options.items()])

parser.add_formatter("form", _render_form, standalone=True)

# lists
def _render_list_item(name, value, options, parent, context):
match parent.tag_name:
case "l":
return f"* {value}\n"
case "n":
return f"1. {value}\n"
case _:
return value

parser.add_simple_formatter("l", "%(value)s")
parser.add_simple_formatter("n", "%(value)s")
parser.add_formatter("*", _render_list_item)

# images and internal note links
def _render_image(name, value, options, parent, context):
text = f"![]({value})"
images.append(imf.Resource(Path(value), text))
return text

parser.add_formatter("img", _render_image)

def _render_internal_link(name, value, options, parent, context):
id_ = list(options)[0]
text = f"[{value}]({id_})"
note_links.append(imf.NoteLink(text, id_, value))
return text

parser.add_formatter("z", _render_internal_link)

# tables
def _render_table(name, value, options, parent, context):
table_md = []
for line in value.split("\n"):
if "^" in line:
# header
table_md.append("| " + " | ".join(line.split("^")) + " |")
separator = ["---"] * len(line.split("^"))
table_md.append("| " + " | ".join(separator) + " |")
elif "|" in line:
# row
table_md.append("| " + " | ".join(line.split("|")) + " |")
else:
table_md.append(line)
return "\n".join(table_md)

parser.add_formatter("table", _render_table)
parser.add_simple_formatter("tc", "%(value)s\n")

markdown = parser.format(
bbcode_str,
install_defaults=False,
escape_html=False,
newline="\n",
replace_cosmetic=False,
replace_links=False,
)
return markdown, note_links, images


class Converter(converter.BaseConverter):
accepted_extensions = [".zkn3"]

Expand All @@ -19,7 +118,7 @@ def parse_attributes(self, zettel, note_imf: imf.Note):
for key, value in zettel.attrib.items():
match key:
case "zknid":
note_imf.original_id = value
pass # This ID seems to be not used for linking.
case "ts_created":
note_imf.created = dt.datetime.strptime(value, "%y%m%d%H%M")
case "ts_edited":
Expand All @@ -41,18 +140,26 @@ def convert(self, file_or_folder: Path):
"Attachments are not converted."
)

images_folder = file_or_folder.parent / "img"
images_available = images_folder.is_dir()
if not images_available:
self.logger.warning(
f"No images folder found at {images_folder}. "
"Images are not converted."
)

tag_id_name_map = {}
root_node = ET.parse(self.root_path / "keywordFile.xml").getroot()
for keyword in root_node.findall("entry"):
if (tag_id := keyword.attrib.get("f")) is not None:
tag_id_name_map[tag_id] = keyword.text

root_node = ET.parse(self.root_path / "zknFile.xml").getroot()
for zettel in root_node.findall("zettel"):
for id_, zettel in enumerate(root_node.findall("zettel"), start=1):
title = item.text if (item := zettel.find("title")) is not None else ""
assert title is not None
self.logger.debug(f'Converting note "{title}"')
note_imf = imf.Note(title)
note_imf = imf.Note(title, original_id=str(id_))

self.parse_attributes(zettel, note_imf)

Expand All @@ -61,7 +168,18 @@ def convert(self, file_or_folder: Path):
case "title":
pass # handled already
case "content":
note_imf.body = item.text if item.text else ""
body, note_links, images = bbcode_to_markdown(
item.text if item.text else ""
)
note_imf.body = body
note_imf.note_links.extend(note_links)

if images_available:
for image in images:
image.filename = images_folder / image.filename
# Set manually, because with invalid path its set to False.
image.is_image = True
note_imf.resources.append(image)
case "author":
note_imf.author = item.text
case "keywords":
Expand Down

0 comments on commit 5c15c97

Please sign in to comment.