Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions obsidiantools/_constants.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# WIKILINKS AND EMBEDDED FILES: regex that includes any aliases
# group 0 captures embedded link; group 1 is everything inside [[]]
WIKILINK_REGEX = r'(!)?\[{2}([^\]\]]+)\]{2}'
URL_REGEX = r"(?i)\b((?:https?://|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:'\".,<>?«»“”‘’]))"

# TAGS
TAG_INCLUDE_NESTED_REGEX = r'(?<!\()(?<!\\)#{1}([A-z]+[0-9_\-]*[A-Z0-9]?[^\s]+(?![^\[\[]*\]\]))\/?'
Expand Down
7 changes: 7 additions & 0 deletions obsidiantools/md_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import markdown
import frontmatter
from ._constants import (WIKILINK_REGEX,
URL_REGEX,
TAG_MAIN_ONLY_REGEX, TAG_INCLUDE_NESTED_REGEX,
WIKILINK_AS_STRING_REGEX,
EMBEDDED_FILE_LINK_AS_STRING_REGEX,
Expand Down Expand Up @@ -241,6 +242,8 @@ def get_tags(filepath: Path, *, show_nested: bool = False) -> list[str]:
str_transform_func=_transform_md_file_string_for_tag_parsing)
# remove wikilinks so that '#' headers are not caught:
src_txt = _remove_wikilinks_from_source_text(src_txt)
# remove URLs so that '#' in URLs are not caught
src_txt = _remove_URLs_from_source_text(src_txt)
tags = _get_tags_from_source_text(src_txt, show_nested=show_nested)
return tags

Expand Down Expand Up @@ -444,6 +447,10 @@ def _remove_wikilinks_from_source_text(src_txt: str) -> str:
return re.sub(WIKILINK_REGEX, '', src_txt)


def _remove_URLs_from_source_text(src_txt: str) -> str:
return re.sub(URL_REGEX, '', src_txt)


def _transform_md_file_string_for_tag_parsing(txt: str) -> str:
return txt.replace('\\#', '')

Expand Down
11 changes: 10 additions & 1 deletion tests/test_md_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
get_front_matter,
get_tags,
_remove_wikilinks_from_source_text,
_remove_URLs_from_source_text,
_replace_wikilinks_with_their_text,
_replace_md_links_with_their_text,
get_readable_text_from_md_file)
Expand Down Expand Up @@ -288,8 +289,16 @@ def test_latex():
assert actual_latex_list == expected_latex_list


def test_remove_wikilinks(txt_wikilink_extraction_stub):
def test_remove_wikilinks(txt_md_link_extraction_stub):
out_str = _remove_wikilinks_from_source_text(
txt_md_link_extraction_stub)

expected_str = '[Obsidian.md homepage]()\n[Github homepage]()\n\n[Github homepage]()\n'
assert out_str == expected_str


def test_remove_URLs(txt_wikilink_extraction_stub):
out_str = _remove_URLs_from_source_text(
txt_wikilink_extraction_stub)

expected_str = "\n" * 6
Expand Down
1 change: 1 addition & 0 deletions tests/vault-stub/Sussudio.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ However these shouldn't be recognised as tags:
- #1985
- [[American Psycho (film)#Patrick Bateman]]
- \#hash_char_not_tag
- docs.google.com/document/d/12345/edit#heading

```python
# #code_comment_not_tag
Expand Down
Loading