Skip to content

Commit

Permalink
add synology note station
Browse files Browse the repository at this point in the history
  • Loading branch information
marph91 committed Apr 13, 2024
1 parent 8fe9b1f commit d42ae17
Show file tree
Hide file tree
Showing 19 changed files with 433 additions and 19 deletions.
1 change: 1 addition & 0 deletions readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ There are many more apps supported implicitly if they export text files to a fol
| [Notion](https://www.notion.so/) | notion | [link](https://www.notion.so/de-de/help/export-your-content) [1] |
| [Obsidian](https://obsidian.md/) | obsidian | |
| [Simplenote](https://simplenote.com/) | simplenote | [link](https://simplenote.com/help/#export) |
| [Synology Note Station](https://www.synology.com/en-global/dsm/feature/note_station) | synology_note_stattion | [link](https://kb.synology.com/en-global/DSM/help/NoteStation/note_station_managing_notes?version=7#t7) |
| [TiddlyWiki](https://tiddlywiki.com/) | tiddlywiki | [JSON only](https://tiddlywiki.com/static/How%2520to%2520export%2520tiddlers.html) [2] |
| [Todo.txt](http://todotxt.org/) | todo_txt | |
| [Todoist](https://todoist.com/) | todoist | [link](https://todoist.com/de/help/articles/introduction-to-backups-ywaJeQbN) [3] |
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
joppy
platformdirs
puremagic
pypandoc_binary
python-frontmatter
pytodotxt
2 changes: 2 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ warn_unused_ignores = True
no_implicit_reexport = True
strict_equality = True
extra_checks = True
[mypy-puremagic.*]
ignore_missing_imports = True
[mypy-pypandoc.*]
ignore_missing_imports = True
[mypy-pytodotxt.*]
Expand Down
8 changes: 2 additions & 6 deletions src/apps/nimbus_note.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,7 @@
from pathlib import Path
import zipfile

import pypandoc

import common
import converter
import intermediate_format as imf

Expand All @@ -22,10 +21,7 @@ def convert(self, file_or_folder: Path):
for html_note in html_notes:
with zip_ref.open(html_note) as zip_note:
note_body_html = zip_note.read().decode("UTF-8")
# Don't use "commonmark_x". There would be too many noise.
note_body_markdown = pypandoc.convert_text(
note_body_html, "markdown_strict-raw_html", format="html"
)
note_body_markdown = common.html_text_to_markdown(note_body_html)
note_joplin = imf.Note(
{
"title": file_.stem,
Expand Down
158 changes: 158 additions & 0 deletions src/apps/synology_note_station.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,158 @@
"""Convert Synology Note Station notes to the intermediate format."""

from dataclasses import dataclass
import hashlib
import json
from pathlib import Path
import re
import zipfile

import common
import converter
import intermediate_format as imf


@dataclass
class Attachment:
"""Represents a Note Station attachment."""

filename: Path
md5: str
ref: str | None = None
title: str | None = None


class Converter(converter.BaseConverter):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.available_resources = []

def prepare_input(self, input_: Path) -> Path | None:
if input_.suffix.lower() in (".nsx", ".zip"):
temp_folder = common.get_temp_folder()
with zipfile.ZipFile(input_) as zip_ref:
zip_ref.extractall(temp_folder)
return temp_folder
if input_.is_dir():
return input_
self.logger.error(f"Unsupported format for {self.app}")
return None

def find_parent_notebook(self, parent_id) -> imf.Notebook:
for notebook in self.root_notebook.child_notebooks:
if notebook.original_id == parent_id:
return notebook
self.logger.debug(f"Couldn't find parent notebook with id {parent_id}")
return self.root_notebook

def handle_markdown_links(self, body: str) -> tuple[list, list]:
resources = []
for file_prefix, description, url in common.get_markdown_links(body):
if url.startswith("http") or url.startswith("mailto:"):
continue # web link / mail
original_text = f"{file_prefix}[{description}]({url})"
# resource
# Find resource file by "ref".
matched_resources = [
res for res in self.available_resources if res.ref == url
]
if len(matched_resources) != 1:
self.logger.debug(
"Found too less or too many resource: {len(matched_resources)}"
)
continue
resource = matched_resources[0]
resources.append(
imf.Resource(
resource.filename, original_text, description or resource.title
)
)
return resources, []

def convert_notebooks(self, input_json: dict):
for notebook_id in input_json["notebook"]:
notebook = json.loads((self.root_path / notebook_id).read_text())

self.root_notebook.child_notebooks.append(
imf.Notebook(
{
"title": notebook["title"],
"user_created_time": notebook["ctime"],
"user_updated_time": notebook["mtime"],
},
original_id=notebook_id,
)
)

def map_resources_by_hash(self, note: dict) -> list[imf.Resource]:
resources = []
for note_resource in note.get("attachment", {}).values():
for file_resource in self.available_resources:
if note_resource["md5"] == file_resource.md5:
if (ref := note_resource.get("ref")) is not None:
file_resource.ref = ref
file_resource.title = note_resource["name"]
else:
# The attachment is not referenced. Add it here.
# Referenced attachments are added later.
resources.append(
imf.Resource(
file_resource.filename, title=note_resource["name"]
)
)
break
return resources

def convert(self, file_or_folder: Path):
self.root_path = self.prepare_input(file_or_folder)
if self.root_path is None:
return
input_json = json.loads((self.root_path / "config.json").read_text())

# TODO: What is input_json["shortcut"]?
# TODO: Are nested notebooks possible?

self.convert_notebooks(input_json)

# dirty hack: Only option to map the files from file system
# to the note content is by MD5 hash.
for item in self.root_path.iterdir():
if item.is_file() and item.stem.startswith("file_"):
self.available_resources.append(
Attachment(item, hashlib.md5(item.read_bytes()).hexdigest())
)

for note_id in input_json["note"]:
note = json.loads((self.root_path / note_id).read_text())

# resources / attachments
resources = self.map_resources_by_hash(note)

data = {
"title": note["title"],
"user_created_time": note["ctime"],
"user_updated_time": note["mtime"],
"source_application": self.app,
}
if (content_html := note.get("content")) is not None:
# dirty hack: In the original data, the attachment_id is stored in the
# "ref" attribute. Mitigate by storing it in the "src" attribute.
content_html = re.sub("<img.*?ref=", "<img src=", content_html)
content_markdown = common.html_text_to_markdown(content_html)
resources_referenced, _ = self.handle_markdown_links(content_markdown)
resources.extend(resources_referenced)
data["body"] = content_markdown

common.try_transfer_dicts(
note, data, ["latitude", "longitude", "source_url"]
)

parent_notebook = self.find_parent_notebook(note["parent_id"])
parent_notebook.child_notes.append(
imf.Note(
data,
tags=[imf.Tag({"title": tag}, tag) for tag in note.get("tag", [])],
resources=resources,
original_id=note_id,
)
)
27 changes: 26 additions & 1 deletion src/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
import tempfile
import time

import pypandoc


LOGGER = logging.getLogger("joplin_custom_importer")

Expand All @@ -15,7 +17,23 @@
# operations on note body
###########################################################

MARKDOWN_LINK_REGEX = re.compile(r"(!)?\[([^\]]+)\]\(([^)]+)\)")

def try_transfer_dicts(source: dict, target: dict, keys: list[str | tuple[str, str]]):
"""Try to transfer values from one to another dict if they exist."""
for key in keys:
if isinstance(key, tuple):
source_key, target_key = key
else:
source_key = target_key = key
if (value := source.get(source_key)) is not None:
target[target_key] = value


###########################################################
# operations on note body
###########################################################

MARKDOWN_LINK_REGEX = re.compile(r"(!)?\[([^\]]*)\]\(([^)]+)\)")
WIKILINK_LINK_REGEX = re.compile(r"(!)?\[\[(.+?)(?:\|(.+?))?\]\]")


Expand Down Expand Up @@ -46,6 +64,13 @@ def get_inline_tags(text: str, start_characters: list[str]) -> list[str]:
return list(tags)


def html_text_to_markdown(html_text: str) -> str:
# Don't use "commonmark_x". There would be too many noise.
return pypandoc.convert_text(
html_text, "markdown_strict+pipe_tables-raw_html", format="html"
)


###########################################################
# folder operations
###########################################################
Expand Down
2 changes: 1 addition & 1 deletion src/importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def import_note(self, note: imf.Note):
)
if resource.original_text is None:
# append
note.data["body"] = f"{note.data['body']}\n{resource_markdown}"
note.data["body"] = f"{note.data.get('body', '')}\n{resource_markdown}"
else:
# replace existing link
note.data["body"] = note.data["body"].replace(
Expand Down
20 changes: 9 additions & 11 deletions src/intermediate_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

from dataclasses import dataclass, field
from pathlib import Path
import puremagic


@dataclass
Expand All @@ -28,19 +29,15 @@ class Resource:
original_text: str | None = None
# [title_or_filename](:/resource_id)
title: str | None = None
is_image: bool = field(init=False)

@property
def is_image(self) -> bool:
# Just take the supported image types of Joplin:
def __post_init__(self):
# Supported image types of Joplin:
# https://github.com/laurent22/joplin/blob/a3eec19b32684b86202c751c94c092c7339c6307/packages/lib/models/utils/resourceUtils.ts#L40-L43
return self.filename.suffix.lower() in (
".jpg",
".jpeg",
".png",
".gif",
".svg",
".webp",
".avif",
# We can't simply match by extension, because sometimes the files/images
# are stored as binary blob without extension.
self.is_image = puremagic.from_file(self.filename, mime=True).startswith(
"image/"
)


Expand Down Expand Up @@ -72,3 +69,4 @@ class Notebook:
data: dict
child_notebooks: list[Notebook] = field(default_factory=list)
child_notes: list[Note] = field(default_factory=list)
original_id: str | None = None
1 change: 1 addition & 0 deletions test/example_commands.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,5 +19,6 @@ python src/joplin_custom_importer.py test_inputs/dynalist --app dynalist
python src/joplin_custom_importer.py test_inputs/Google\ Keep --app google_keep
python src/joplin_custom_importer.py test_inputs/obsidian_vault --app obsidian
python src/joplin_custom_importer.py test_inputs/simplenote --app simplenote
python src/joplin_custom_importer.py test_inputs/synology_note_station --app synology_note_station
python src/joplin_custom_importer.py test_inputs/todo_txt/examples_from_readme.txt --app todo_txt
python src/joplin_custom_importer.py test_inputs/todoist/Privates.csv --app todoist
6 changes: 6 additions & 0 deletions test/test_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,12 @@ def test_simplenote(self):
stats = self.get_stats([TEST_INPUTS / "simplenote"], "simplenote")
self.assert_stats(stats, notes=2, tags=2, note_links=1)

def test_synology_note_station(self):
stats = self.get_stats(
[TEST_INPUTS / "synology_note_station"], "synology_note_station"
)
self.assert_stats(stats, notebooks=3, notes=4, tags=6, resources=2)

def test_tiddlywiki(self):
# TODO
self.skipTest("no public test data yet")
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"category":"notebook","ctime":1581622991,"mtime":1581622991,"stack":"","title":"Test-Book 2"}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"ctime":1581623497,"encrypt":false,"latitude":51.3497041,"longitude":6.4225708,"mtime":1581623507,"parent_id":"1026_4OIJ8PR6215E9637KLADHU3K5S","title":"Empty note"}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"category":"notebook","ctime":1581019360,"mtime":1581622970,"stack":"","title":"Test-Book 1"}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"attachment":{"_X6UIjCvWr1GScrqvI1bLGw":{"md5":"4b41a3475132bd861b30a878e30aa56a","name":"sample.pdf","rotate":true,"size":3028,"type":"binary"}},"ctime":1581622674,"encrypt":false,"latitude":51.3497041,"longitude":6.4225708,"mtime":1581622807,"parent_id":"1026_95U5E7L2IH3B73EF6RA152KJ3S","source_url":"http://www.africau.edu/images/default/sample.pdf","tag":["pdf-only"],"thumb":null,"title":"note with pdf attached"}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"brief":"This is a simple test note without any attachment.\nbullet point 1\nbullet point 2","content":"<div>This is a simple test note without any attachment.</div><ul><li>bullet point 1</li><li>bullet point 2</li></ul>","ctime":1577746800,"encrypt":false,"latitude":51.3497041,"longitude":6.4225708,"mtime":1581623094,"parent_id":"1026_4OIJ8PR6215E9637KLADHU3K5S","tag":["test_tag_1","test_tag_2"],"title":"Note without any attachment"}
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{"attachment":{"_uWhCuJkW2p3GYY1In8x82A":{"ext":"jpg","height":239,"md5":"4774521912a394266afdd8dc9510992e","name":"tractor-3-1386656.jpg","ref":"MTU4MTYyMzM3NTcyOHRyYWN0b3ItMy0xMzg2NjU2LmpwZw==","rotate":true,"size":25350,"type":"image","width":360}},"brief":"Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod ","content":"<div>Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet.</div><div></div><div></div><div><span style=\"font-size: 12pt;\">Some jpg</span></div><div></div><div><img class=\"syno-notestation-image-object\" src=\"webman/3rdparty/NoteStation/images/transparent.gif\" border=\"0\" ref=\"MTU4MTYyMzM3NTcyOHRyYWN0b3ItMy0xMzg2NjU2LmpwZw==\" adjust=\"true\" /></div><div></div><div>Screenshot</div><div></div><div></div><div><table style=\"border-collapse: collapse; width: 120pt;\" border=\"0\" width=\"160\" cellspacing=\"0\" cellpadding=\"0\"><tbody><tr style=\"height: 15.0pt;\"><td style=\"height: 15.0pt; width: 60pt;\" width=\"80\" height=\"20\">Test1</td><td style=\"width: 60pt;\" width=\"80\">Test2</td></tr><tr style=\"height: 15.0pt;\"><td style=\"height: 15.0pt;\" align=\"right\" height=\"20\">1</td><td>a</td></tr><tr style=\"height: 15.0pt;\"><td style=\"height: 15.0pt;\" align=\"right\" height=\"20\">2</td><td>b</td></tr><tr style=\"height: 15.0pt;\"><td style=\"height: 15.0pt;\" align=\"right\" height=\"20\">3</td><td>c</td></tr><tr style=\"height: 15.0pt;\"><td style=\"height: 15.0pt;\" align=\"right\" height=\"20\">4</td><td>d</td></tr><tr style=\"height: 15.0pt;\"><td style=\"height: 15.0pt;\" align=\"right\" height=\"20\">5</td><td>e</td></tr></tbody></table></div>","ctime":655671600,"latitude":0,"longitude":0,"mtime":1582401173,"parent_id":"1026_95U5E7L2IH3B73EF6RA152KJ3S","source_url":"","tag":["test_tag_1","test_tag_2","test & tag % 3"],"thumb":"_uWhCuJkW2p3GYY1In8x82A","title":"Note with some regular text, some pictures, and some tags"}
22 changes: 22 additions & 0 deletions test_inputs/synology_note_station/config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
{
"note": [
"1026_DFB638943F456724EA95C09995C6D6B20650B28058CA1FCD3D2A9CF2F726B821E71156ED03DA074A700732C505E2190E",
"1026_89F091GQG53B992ODLNRUEAVKS",
"1026_9SLT09NT4P2A3DO2KQHUPT34QG",
"1026_9OKGI8LVSP1AR2F5FQR7V5RT6S"
],
"notebook": [
"1026_95U5E7L2IH3B73EF6RA152KJ3S",
"1026_4OIJ8PR6215E9637KLADHU3K5S"
],
"shortcut": {
"id": [
"1026_B2AB2ACD445B4049127284D71C3BB6C796356DB3198C02E5790A676F66AE32D7DE1D92007909C6DFF8A25C184E272830",
"1026_A7A13E7021E0651D3970131E85B5DA88E37AF4E55E36B360C7011F67BFA1C915F9F83D345A7FBA4BD49E90F61032C3C4",
"1026_AF02E6F04AAF28C39912518354DE88AEC587EC92340C145741410D68B4B7CDB1A556C2FA7D21A8DF2487B0269BC70264",
"1026_LM0IOCP9QH51B0AQTNCS4RVHP4"
],
"stack": [],
"tag": []
}
}
Binary file not shown.
Loading

0 comments on commit d42ae17

Please sign in to comment.