Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions obsidiantools/_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@
# group 0 captures embedded link; group 1 is everything inside [[]]
WIKILINK_REGEX = r'(!)?\[{2}([^\]\]]+)\]{2}'

# PROPERTIES
INLINE_PROPERTY_REGEX = r'^[\s]*(?:"|\')?([^:"\']+?(?::[^:"\']+?)*?)(?:"|\')?[\s]*::\s*(["\']?.*?["\']?)$'
INLINE_PROPERTY_VALUE_ARRAY_REGEX = r'\[([^\]]+)\]'

# TAGS
TAG_INCLUDE_NESTED_REGEX = r'(?<!\()(?<!\\)#{1}([A-z]+[0-9_\-]*[A-Z0-9]?[^\s]+(?![^\[\[]*\]\]))\/?'
TAG_MAIN_ONLY_REGEX = r'(?<!\()#{1}([A-z]+[0-9_\-]*[A-Z0-9]?)\/?'
Expand Down
133 changes: 107 additions & 26 deletions obsidiantools/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@
from itertools import chain

# init
from .md_utils import (get_md_relpaths_matching_subdirs)
from .md_utils import (get_md_relpaths_matching_subdirs,
get_properties,
get_front_matter)
from .canvas_utils import (get_canvas_relpaths_matching_subdirs,
_get_all_valid_canvas_file_relpaths)
# connect
Expand Down Expand Up @@ -175,6 +177,9 @@ def __init__(self, dirpath: Path, *,
self._nonexistent_canvas_files = []
self._isolated_canvas_files = []

# properties index
self._properties_index = {}

@property
def dirpath(self) -> Path:
"""pathlib Path"""
Expand Down Expand Up @@ -460,6 +465,15 @@ def canvas_graph_detail_index(self, value) -> \
]:
self._canvas_graph_detail_index = value

@property
def properties_index(self) -> dict[str, dict]:
"""dict: k is a note name, v is a dictionary of properties for that note."""
return self._properties_index

@properties_index.setter
def properties_index(self, value: dict[str, dict]) -> dict[str, dict]:
self._properties_index = value

def connect(self, *, show_nested_tags: bool = False,
attachments=False):
"""connect your notes together by representing the vault as a
Expand Down Expand Up @@ -494,14 +508,14 @@ def connect(self, *, show_nested_tags: bool = False,
self._tags_index = {}
self._math_index = {}
self._front_matter_index = {}
# to be used for graph:
self._wikilinks_index = {}
self._unique_wikilinks_index = {}

# loop through md files:
for f, relpath in self._md_file_index.items():
# initialize property index:
self._properties_index = {}

# process each note:
for n, relpath in self._md_file_index.items():
self._connect_update_based_on_new_relpath(
relpath, note=f,
relpath,
note=n,
show_nested_tags=show_nested_tags)

# canvas content:
Expand Down Expand Up @@ -543,24 +557,34 @@ def _connect_update_based_on_new_relpath(self, relpath: Path, *,
"""Individual file read & associated attrs update for the
connect method."""
exclude_canvas = not self._attachments

# MAIN file read:
front_matter, content = _get_md_front_matter_and_content(
self._dirpath / relpath)
html = _get_html_from_md_content(content)
src_txt = get_source_text_from_html(
html, remove_code=True)

# info from core text:
self._md_links_index[note] = (
_get_md_links_from_source_text(src_txt))
self._unique_md_links_index[note] = (
_get_unique_md_links_from_source_text(src_txt))
self._embedded_files_index[note] = (
_get_all_embedded_files_from_source_text(
src_txt, remove_aliases=True)
# (aliases are redundant for connect method)
)
file_path = self._dirpath / relpath

try:
# MAIN file read:
front_matter, content = _get_md_front_matter_and_content(file_path)
html = _get_html_from_md_content(content)
src_txt = get_source_text_from_html(
html, remove_code=True)

# Extract all properties from the file
self._properties_index[note] = get_properties(file_path)

# info from core text:
self._md_links_index[note] = (
_get_md_links_from_source_text(src_txt))
self._unique_md_links_index[note] = (
_get_unique_md_links_from_source_text(src_txt))
self._embedded_files_index[note] = (
_get_all_embedded_files_from_source_text(
src_txt, remove_aliases=True)
# (aliases are redundant for connect method)
)
except (FileNotFoundError, IOError) as e:
warnings.warn(f"Failed to read file {file_path}: {str(e)}")
self._properties_index[note] = {}
self._md_links_index[note] = []
self._unique_md_links_index[note] = []
self._embedded_files_index[note] = []
self._wikilinks_index[note] = (
_get_all_wikilinks_from_source_text(
src_txt, remove_aliases=True,
Expand Down Expand Up @@ -1377,3 +1401,60 @@ def _get_isolated_notes(self, *,
These notes are retrieved from the graph."""
return [fn for fn in nx.isolates(graph)
if fn in self._md_file_index]

def get_properties(self, note_name: str) -> dict:
"""Get all properties from a note, combining frontmatter and inline properties.

Properties can be defined in two ways in Obsidian:
1. As frontmatter at the start of the file
2. As inline properties in the format 'property:: value'

This method combines both types of properties into a single dictionary.
If the same property exists in both frontmatter and inline,
the inline value takes precedence.

Args:
note_name (str): Name of the note (without .md extension).
For notes in folders, include the folder path
e.g. 'folder/note'.

Returns:
dict: Combined dictionary of all properties
"""
if note_name not in self._md_file_index:
return {}
# Use dirpath to get the full path
fullpath = self._dirpath / self._md_file_index[note_name]
return get_properties(fullpath)

def get_property(self, note_name: str, property_name: str) -> str | list | None:
"""Get a specific property from a note.

Looks for the property in both frontmatter and inline properties.
If the property exists in both places, the inline value takes precedence.

Args:
note_name (str): Name of the note (without .md extension).
For notes in folders, include the folder path
e.g. 'folder/note'.
property_name (str): Name of the property to retrieve

Returns:
str | list | None: Property value if found, None if not found
"""
properties = self.get_properties(note_name)
return properties.get(property_name) if properties else None

def get_properties_index(self) -> dict:
"""Get properties for all notes in the vault.

Returns a dictionary mapping note names to their properties.
Properties include both frontmatter and inline properties.
If a property exists in both places, the inline value takes precedence.

Returns:
dict: Dictionary mapping note names to property dictionaries
"""
return {note: props
for note, props in self._properties_index.items()
if props} # Only include notes that have properties
163 changes: 162 additions & 1 deletion obsidiantools/md_utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import re
import yaml
import datetime
from pathlib import Path
from bs4 import BeautifulSoup
import markdown
Expand All @@ -9,7 +10,9 @@
WIKILINK_AS_STRING_REGEX,
EMBEDDED_FILE_LINK_AS_STRING_REGEX,
INLINE_LINK_AFTER_HTML_PROC_REGEX,
INLINE_LINK_VIA_MD_ONLY_REGEX)
INLINE_LINK_VIA_MD_ONLY_REGEX,
INLINE_PROPERTY_REGEX,
INLINE_PROPERTY_VALUE_ARRAY_REGEX)
from ._io import (get_relpaths_from_dir,
get_relpaths_matching_subdirs)
from .html_processing import (_get_plaintext_from_html,
Expand Down Expand Up @@ -509,3 +512,161 @@ def _remove_embedded_file_links_from_text(src_txt: str) -> str:
for i in links_list:
out_str = out_str.replace(i, '')
return out_str


def clean_property_key(key: str) -> str:
"""Clean a property key by removing quotes and trailing colons.

Args:
key (str): The property key to clean

Returns:
str: The cleaned key
"""
# Remove trailing colons first (but not embedded ones)
key = key.rstrip(':')
# Remove outer quotes if present
key = key.strip()
if (key.startswith('"') and key.endswith('"')) or (key.startswith("'") and key.endswith("'")):
key = key[1:-1].strip()
return key

def get_properties(filepath: Path) -> dict:
"""Get all properties from a markdown file, combining frontmatter and inline properties.

Properties can be defined in two ways in Obsidian:
1. As frontmatter at the start of the file
2. As inline properties in the format 'property:: value'

This method combines both types of properties into a single dictionary.
If the same property exists in both frontmatter and inline,
the inline value takes precedence.

Args:
filepath (pathlib Path): Path object representing the file from
which info will be extracted.

Returns:
dict: Combined dictionary of all properties
"""
props = {}

# Get frontmatter properties
front_matter = get_front_matter(filepath)
if front_matter:
for key, value in front_matter.items():
clean_key = clean_property_key(key)
# Process value, handling links and converting dates/times
if isinstance(value, str):
if clean_key == 'date' and len(value) == 10 and value[4] == '-' and value[7] == '-':
try:
value = datetime.datetime.strptime(value, '%Y-%m-%d').date()
except ValueError:
print(f"Failed to parse date for key '{clean_key}' with value '{value}' in frontmatter of {filepath}")
elif clean_key == 'time' and len(value) == 19 and value[4] == '-' and value[7] == '-' and value[10] == 'T':
try:
value = datetime.datetime.strptime(value, '%Y-%m-%dT%H:%M:%S')
except ValueError:
pass
elif clean_key == 'due' and len(value) == 10 and value[4] == '-' and value[7] == '-':
try:
value = datetime.datetime.strptime(value, '%Y-%m-%d').date()
except ValueError:
pass
elif value.startswith("[[") and value.endswith("]]"):
# Handle single wikilink as a string
value = _get_all_wikilinks_from_source_text(value, remove_aliases=True)[0]
elif isinstance(value, list):
# Handle list of strings, checking for wikilinks
new_list = []
for item in value:
if isinstance(item, str) and item.startswith("[[") and item.endswith("]]"):
# Extract the wikilink
new_list.extend(_get_all_wikilinks_from_source_text(item, remove_aliases=True))
else:
new_list.append(item)
value = new_list
if clean_key in ('date', 'due') and all(isinstance(item, str) and len(item) == 10 and item[4] == '-' and item[7] == '-' for item in value):
try: # Convert to date objects if they match the pattern
value = datetime.datetime.strptime(value, '%Y-%m-%d').date()
except ValueError:
pass
props[clean_key] = value

# Get inline properties
inline_props = get_inline_properties(filepath)
if inline_props:
for key, value in inline_props.items():
# Keep datetime objects as is - no need to convert to string
clean_key = clean_property_key(key)
props[clean_key] = value # Inline properties override frontmatter
return props

def get_property(filepath: Path, property_name: str) -> str | list | None:
"""Get a specific property from a markdown file.

Looks for the property in both frontmatter and inline properties.
If the property exists in both places, the inline value takes precedence.

Args:
filepath (pathlib Path): Path object representing the file from
which info will be extracted.
property_name (str): Name of the property to retrieve

Returns:
str | list | None: Property value if found, None if not found
"""
props = get_properties(filepath)
return props.get(property_name)

def get_inline_properties(filepath: Path) -> dict:
"""Extract inline properties from a md file.

Looks for lines in the format 'property:: value' and parses them into a dictionary.
Handles array values in the format '[value1, value2]'.
Handles special characters and quoted property names.

Args:
filepath (pathlib Path): Path object representing the file from
which info will be extracted.
Returns:
dict: Dictionary of inline properties
"""
_, content = _get_md_front_matter_and_content(filepath)
properties = {}

for line in content.splitlines():
match = re.match(INLINE_PROPERTY_REGEX, line.strip())
if match:
key = match.group(1).strip()
value = match.group(2)

# Handle array values
array_match = re.match(INLINE_PROPERTY_VALUE_ARRAY_REGEX, value)
if array_match:
# Split array values and clean them up
values = [v.strip(' "\'' ) for v in array_match.group(1).split(',')]
properties[clean_property_key(key)] = values
else:
# Single value
# Remove quotes from beginning and end if present
value = value.strip()
if (value.startswith('"') and value.endswith('"')) or (value.startswith("'") and value.endswith("'")):
value = value[1:-1].strip()

clean_key = clean_property_key(key)
# Handle date and time strings after cleaning quotes
if clean_key in ('date', 'due') and len(value) == 10 and value[4] == '-' and value[7] == '-':
try:
value = datetime.datetime.strptime(value, '%Y-%m-%d').date()
except ValueError:
pass
elif clean_key == 'time' and len(value) == 19 and value[4] == '-' and value[7] == '-' and value[10] == 'T':
try:
value = datetime.datetime.strptime(value, '%Y-%m-%dT%H:%M:%S')
except ValueError:
pass

properties[clean_key] = value

return properties
7 changes: 7 additions & 0 deletions tests/general/property-tests/basic_properties.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
---
prop1: value1
prop2: value2
prop3: [item1, item2, item3]
---

Content after properties.
10 changes: 10 additions & 0 deletions tests/general/property-tests/frontmatter_style.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
---
tags: [tag1, tag2]
aliases: [alias1, alias2]
cssclass: my-class
date: 2025-07-17
status: In Progress
priority: High
---

Content after properties with frontmatter style.
6 changes: 6 additions & 0 deletions tests/general/property-tests/inline_properties.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
prop:: value with inline property
tags:: [tag1, tag2, tag3]
status:: In Progress
due:: 2025-07-17

Content with inline properties.
Loading