-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
19 changed files
with
433 additions
and
19 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,6 @@ | ||
joppy | ||
platformdirs | ||
puremagic | ||
pypandoc_binary | ||
python-frontmatter | ||
pytodotxt |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,158 @@ | ||
"""Convert Synology Note Station notes to the intermediate format.""" | ||
|
||
from dataclasses import dataclass | ||
import hashlib | ||
import json | ||
from pathlib import Path | ||
import re | ||
import zipfile | ||
|
||
import common | ||
import converter | ||
import intermediate_format as imf | ||
|
||
|
||
@dataclass | ||
class Attachment: | ||
"""Represents a Note Station attachment.""" | ||
|
||
filename: Path | ||
md5: str | ||
ref: str | None = None | ||
title: str | None = None | ||
|
||
|
||
class Converter(converter.BaseConverter): | ||
def __init__(self, *args, **kwargs): | ||
super().__init__(*args, **kwargs) | ||
self.available_resources = [] | ||
|
||
def prepare_input(self, input_: Path) -> Path | None: | ||
if input_.suffix.lower() in (".nsx", ".zip"): | ||
temp_folder = common.get_temp_folder() | ||
with zipfile.ZipFile(input_) as zip_ref: | ||
zip_ref.extractall(temp_folder) | ||
return temp_folder | ||
if input_.is_dir(): | ||
return input_ | ||
self.logger.error(f"Unsupported format for {self.app}") | ||
return None | ||
|
||
def find_parent_notebook(self, parent_id) -> imf.Notebook: | ||
for notebook in self.root_notebook.child_notebooks: | ||
if notebook.original_id == parent_id: | ||
return notebook | ||
self.logger.debug(f"Couldn't find parent notebook with id {parent_id}") | ||
return self.root_notebook | ||
|
||
def handle_markdown_links(self, body: str) -> tuple[list, list]: | ||
resources = [] | ||
for file_prefix, description, url in common.get_markdown_links(body): | ||
if url.startswith("http") or url.startswith("mailto:"): | ||
continue # web link / mail | ||
original_text = f"{file_prefix}[{description}]({url})" | ||
# resource | ||
# Find resource file by "ref". | ||
matched_resources = [ | ||
res for res in self.available_resources if res.ref == url | ||
] | ||
if len(matched_resources) != 1: | ||
self.logger.debug( | ||
"Found too less or too many resource: {len(matched_resources)}" | ||
) | ||
continue | ||
resource = matched_resources[0] | ||
resources.append( | ||
imf.Resource( | ||
resource.filename, original_text, description or resource.title | ||
) | ||
) | ||
return resources, [] | ||
|
||
def convert_notebooks(self, input_json: dict): | ||
for notebook_id in input_json["notebook"]: | ||
notebook = json.loads((self.root_path / notebook_id).read_text()) | ||
|
||
self.root_notebook.child_notebooks.append( | ||
imf.Notebook( | ||
{ | ||
"title": notebook["title"], | ||
"user_created_time": notebook["ctime"], | ||
"user_updated_time": notebook["mtime"], | ||
}, | ||
original_id=notebook_id, | ||
) | ||
) | ||
|
||
def map_resources_by_hash(self, note: dict) -> list[imf.Resource]: | ||
resources = [] | ||
for note_resource in note.get("attachment", {}).values(): | ||
for file_resource in self.available_resources: | ||
if note_resource["md5"] == file_resource.md5: | ||
if (ref := note_resource.get("ref")) is not None: | ||
file_resource.ref = ref | ||
file_resource.title = note_resource["name"] | ||
else: | ||
# The attachment is not referenced. Add it here. | ||
# Referenced attachments are added later. | ||
resources.append( | ||
imf.Resource( | ||
file_resource.filename, title=note_resource["name"] | ||
) | ||
) | ||
break | ||
return resources | ||
|
||
def convert(self, file_or_folder: Path): | ||
self.root_path = self.prepare_input(file_or_folder) | ||
if self.root_path is None: | ||
return | ||
input_json = json.loads((self.root_path / "config.json").read_text()) | ||
|
||
# TODO: What is input_json["shortcut"]? | ||
# TODO: Are nested notebooks possible? | ||
|
||
self.convert_notebooks(input_json) | ||
|
||
# dirty hack: Only option to map the files from file system | ||
# to the note content is by MD5 hash. | ||
for item in self.root_path.iterdir(): | ||
if item.is_file() and item.stem.startswith("file_"): | ||
self.available_resources.append( | ||
Attachment(item, hashlib.md5(item.read_bytes()).hexdigest()) | ||
) | ||
|
||
for note_id in input_json["note"]: | ||
note = json.loads((self.root_path / note_id).read_text()) | ||
|
||
# resources / attachments | ||
resources = self.map_resources_by_hash(note) | ||
|
||
data = { | ||
"title": note["title"], | ||
"user_created_time": note["ctime"], | ||
"user_updated_time": note["mtime"], | ||
"source_application": self.app, | ||
} | ||
if (content_html := note.get("content")) is not None: | ||
# dirty hack: In the original data, the attachment_id is stored in the | ||
# "ref" attribute. Mitigate by storing it in the "src" attribute. | ||
content_html = re.sub("<img.*?ref=", "<img src=", content_html) | ||
content_markdown = common.html_text_to_markdown(content_html) | ||
resources_referenced, _ = self.handle_markdown_links(content_markdown) | ||
resources.extend(resources_referenced) | ||
data["body"] = content_markdown | ||
|
||
common.try_transfer_dicts( | ||
note, data, ["latitude", "longitude", "source_url"] | ||
) | ||
|
||
parent_notebook = self.find_parent_notebook(note["parent_id"]) | ||
parent_notebook.child_notes.append( | ||
imf.Note( | ||
data, | ||
tags=[imf.Tag({"title": tag}, tag) for tag in note.get("tag", [])], | ||
resources=resources, | ||
original_id=note_id, | ||
) | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
1 change: 1 addition & 0 deletions
1
test_inputs/synology_note_station/1026_4OIJ8PR6215E9637KLADHU3K5S
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
{"category":"notebook","ctime":1581622991,"mtime":1581622991,"stack":"","title":"Test-Book 2"} |
1 change: 1 addition & 0 deletions
1
test_inputs/synology_note_station/1026_89F091GQG53B992ODLNRUEAVKS
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
{"ctime":1581623497,"encrypt":false,"latitude":51.3497041,"longitude":6.4225708,"mtime":1581623507,"parent_id":"1026_4OIJ8PR6215E9637KLADHU3K5S","title":"Empty note"} |
1 change: 1 addition & 0 deletions
1
test_inputs/synology_note_station/1026_95U5E7L2IH3B73EF6RA152KJ3S
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
{"category":"notebook","ctime":1581019360,"mtime":1581622970,"stack":"","title":"Test-Book 1"} |
1 change: 1 addition & 0 deletions
1
test_inputs/synology_note_station/1026_9OKGI8LVSP1AR2F5FQR7V5RT6S
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
{"attachment":{"_X6UIjCvWr1GScrqvI1bLGw":{"md5":"4b41a3475132bd861b30a878e30aa56a","name":"sample.pdf","rotate":true,"size":3028,"type":"binary"}},"ctime":1581622674,"encrypt":false,"latitude":51.3497041,"longitude":6.4225708,"mtime":1581622807,"parent_id":"1026_95U5E7L2IH3B73EF6RA152KJ3S","source_url":"http://www.africau.edu/images/default/sample.pdf","tag":["pdf-only"],"thumb":null,"title":"note with pdf attached"} |
1 change: 1 addition & 0 deletions
1
test_inputs/synology_note_station/1026_9SLT09NT4P2A3DO2KQHUPT34QG
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
{"brief":"This is a simple test note without any attachment.\nbullet point 1\nbullet point 2","content":"<div>This is a simple test note without any attachment.</div><ul><li>bullet point 1</li><li>bullet point 2</li></ul>","ctime":1577746800,"encrypt":false,"latitude":51.3497041,"longitude":6.4225708,"mtime":1581623094,"parent_id":"1026_4OIJ8PR6215E9637KLADHU3K5S","tag":["test_tag_1","test_tag_2"],"title":"Note without any attachment"} |
1 change: 1 addition & 0 deletions
1
...43F456724EA95C09995C6D6B20650B28058CA1FCD3D2A9CF2F726B821E71156ED03DA074A700732C505E2190E
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
{"attachment":{"_uWhCuJkW2p3GYY1In8x82A":{"ext":"jpg","height":239,"md5":"4774521912a394266afdd8dc9510992e","name":"tractor-3-1386656.jpg","ref":"MTU4MTYyMzM3NTcyOHRyYWN0b3ItMy0xMzg2NjU2LmpwZw==","rotate":true,"size":25350,"type":"image","width":360}},"brief":"Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod ","content":"<div>Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet. Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata sanctus est Lorem ipsum dolor sit amet.</div><div></div><div></div><div><span style=\"font-size: 12pt;\">Some jpg</span></div><div></div><div><img class=\"syno-notestation-image-object\" src=\"webman/3rdparty/NoteStation/images/transparent.gif\" border=\"0\" ref=\"MTU4MTYyMzM3NTcyOHRyYWN0b3ItMy0xMzg2NjU2LmpwZw==\" adjust=\"true\" /></div><div></div><div>Screenshot</div><div></div><div></div><div><table style=\"border-collapse: collapse; width: 120pt;\" border=\"0\" width=\"160\" cellspacing=\"0\" cellpadding=\"0\"><tbody><tr style=\"height: 15.0pt;\"><td style=\"height: 15.0pt; width: 60pt;\" width=\"80\" height=\"20\">Test1</td><td style=\"width: 60pt;\" width=\"80\">Test2</td></tr><tr style=\"height: 15.0pt;\"><td style=\"height: 15.0pt;\" align=\"right\" height=\"20\">1</td><td>a</td></tr><tr style=\"height: 15.0pt;\"><td style=\"height: 15.0pt;\" align=\"right\" height=\"20\">2</td><td>b</td></tr><tr style=\"height: 15.0pt;\"><td style=\"height: 15.0pt;\" align=\"right\" height=\"20\">3</td><td>c</td></tr><tr style=\"height: 15.0pt;\"><td style=\"height: 15.0pt;\" align=\"right\" height=\"20\">4</td><td>d</td></tr><tr style=\"height: 15.0pt;\"><td style=\"height: 15.0pt;\" align=\"right\" height=\"20\">5</td><td>e</td></tr></tbody></table></div>","ctime":655671600,"latitude":0,"longitude":0,"mtime":1582401173,"parent_id":"1026_95U5E7L2IH3B73EF6RA152KJ3S","source_url":"","tag":["test_tag_1","test_tag_2","test & tag % 3"],"thumb":"_uWhCuJkW2p3GYY1In8x82A","title":"Note with some regular text, some pictures, and some tags"} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
{ | ||
"note": [ | ||
"1026_DFB638943F456724EA95C09995C6D6B20650B28058CA1FCD3D2A9CF2F726B821E71156ED03DA074A700732C505E2190E", | ||
"1026_89F091GQG53B992ODLNRUEAVKS", | ||
"1026_9SLT09NT4P2A3DO2KQHUPT34QG", | ||
"1026_9OKGI8LVSP1AR2F5FQR7V5RT6S" | ||
], | ||
"notebook": [ | ||
"1026_95U5E7L2IH3B73EF6RA152KJ3S", | ||
"1026_4OIJ8PR6215E9637KLADHU3K5S" | ||
], | ||
"shortcut": { | ||
"id": [ | ||
"1026_B2AB2ACD445B4049127284D71C3BB6C796356DB3198C02E5790A676F66AE32D7DE1D92007909C6DFF8A25C184E272830", | ||
"1026_A7A13E7021E0651D3970131E85B5DA88E37AF4E55E36B360C7011F67BFA1C915F9F83D345A7FBA4BD49E90F61032C3C4", | ||
"1026_AF02E6F04AAF28C39912518354DE88AEC587EC92340C145741410D68B4B7CDB1A556C2FA7D21A8DF2487B0269BC70264", | ||
"1026_LM0IOCP9QH51B0AQTNCS4RVHP4" | ||
], | ||
"stack": [], | ||
"tag": [] | ||
} | ||
} |
Binary file added
BIN
+24.8 KB
test_inputs/synology_note_station/file_4774521912a394266afdd8dc9510992e
Binary file not shown.
Oops, something went wrong.