diff --git a/obsidiantools/_constants.py b/obsidiantools/_constants.py index a5dcff2..652a906 100644 --- a/obsidiantools/_constants.py +++ b/obsidiantools/_constants.py @@ -2,6 +2,10 @@ # group 0 captures embedded link; group 1 is everything inside [[]] WIKILINK_REGEX = r'(!)?\[{2}([^\]\]]+)\]{2}' +# PROPERTIES +INLINE_PROPERTY_REGEX = r'^[\s]*(?:"|\')?([^:"\']+?(?::[^:"\']+?)*?)(?:"|\')?[\s]*::\s*(["\']?.*?["\']?)$' +INLINE_PROPERTY_VALUE_ARRAY_REGEX = r'\[([^\]]+)\]' + # TAGS TAG_INCLUDE_NESTED_REGEX = r'(? Path: """pathlib Path""" @@ -460,6 +465,15 @@ def canvas_graph_detail_index(self, value) -> \ ]: self._canvas_graph_detail_index = value + @property + def properties_index(self) -> dict[str, dict]: + """dict: k is a note name, v is a dictionary of properties for that note.""" + return self._properties_index + + @properties_index.setter + def properties_index(self, value: dict[str, dict]) -> dict[str, dict]: + self._properties_index = value + def connect(self, *, show_nested_tags: bool = False, attachments=False): """connect your notes together by representing the vault as a @@ -494,14 +508,14 @@ def connect(self, *, show_nested_tags: bool = False, self._tags_index = {} self._math_index = {} self._front_matter_index = {} - # to be used for graph: - self._wikilinks_index = {} - self._unique_wikilinks_index = {} - - # loop through md files: - for f, relpath in self._md_file_index.items(): + # initialize property index: + self._properties_index = {} + + # process each note: + for n, relpath in self._md_file_index.items(): self._connect_update_based_on_new_relpath( - relpath, note=f, + relpath, + note=n, show_nested_tags=show_nested_tags) # canvas content: @@ -543,24 +557,34 @@ def _connect_update_based_on_new_relpath(self, relpath: Path, *, """Individual file read & associated attrs update for the connect method.""" exclude_canvas = not self._attachments - - # MAIN file read: - front_matter, content = _get_md_front_matter_and_content( - self._dirpath / relpath) - html = _get_html_from_md_content(content) - src_txt = get_source_text_from_html( - html, remove_code=True) - - # info from core text: - self._md_links_index[note] = ( - _get_md_links_from_source_text(src_txt)) - self._unique_md_links_index[note] = ( - _get_unique_md_links_from_source_text(src_txt)) - self._embedded_files_index[note] = ( - _get_all_embedded_files_from_source_text( - src_txt, remove_aliases=True) - # (aliases are redundant for connect method) - ) + file_path = self._dirpath / relpath + + try: + # MAIN file read: + front_matter, content = _get_md_front_matter_and_content(file_path) + html = _get_html_from_md_content(content) + src_txt = get_source_text_from_html( + html, remove_code=True) + + # Extract all properties from the file + self._properties_index[note] = get_properties(file_path) + + # info from core text: + self._md_links_index[note] = ( + _get_md_links_from_source_text(src_txt)) + self._unique_md_links_index[note] = ( + _get_unique_md_links_from_source_text(src_txt)) + self._embedded_files_index[note] = ( + _get_all_embedded_files_from_source_text( + src_txt, remove_aliases=True) + # (aliases are redundant for connect method) + ) + except (FileNotFoundError, IOError) as e: + warnings.warn(f"Failed to read file {file_path}: {str(e)}") + self._properties_index[note] = {} + self._md_links_index[note] = [] + self._unique_md_links_index[note] = [] + self._embedded_files_index[note] = [] self._wikilinks_index[note] = ( _get_all_wikilinks_from_source_text( src_txt, remove_aliases=True, @@ -1377,3 +1401,60 @@ def _get_isolated_notes(self, *, These notes are retrieved from the graph.""" return [fn for fn in nx.isolates(graph) if fn in self._md_file_index] + + def get_properties(self, note_name: str) -> dict: + """Get all properties from a note, combining frontmatter and inline properties. + + Properties can be defined in two ways in Obsidian: + 1. As frontmatter at the start of the file + 2. As inline properties in the format 'property:: value' + + This method combines both types of properties into a single dictionary. + If the same property exists in both frontmatter and inline, + the inline value takes precedence. + + Args: + note_name (str): Name of the note (without .md extension). + For notes in folders, include the folder path + e.g. 'folder/note'. + + Returns: + dict: Combined dictionary of all properties + """ + if note_name not in self._md_file_index: + return {} + # Use dirpath to get the full path + fullpath = self._dirpath / self._md_file_index[note_name] + return get_properties(fullpath) + + def get_property(self, note_name: str, property_name: str) -> str | list | None: + """Get a specific property from a note. + + Looks for the property in both frontmatter and inline properties. + If the property exists in both places, the inline value takes precedence. + + Args: + note_name (str): Name of the note (without .md extension). + For notes in folders, include the folder path + e.g. 'folder/note'. + property_name (str): Name of the property to retrieve + + Returns: + str | list | None: Property value if found, None if not found + """ + properties = self.get_properties(note_name) + return properties.get(property_name) if properties else None + + def get_properties_index(self) -> dict: + """Get properties for all notes in the vault. + + Returns a dictionary mapping note names to their properties. + Properties include both frontmatter and inline properties. + If a property exists in both places, the inline value takes precedence. + + Returns: + dict: Dictionary mapping note names to property dictionaries + """ + return {note: props + for note, props in self._properties_index.items() + if props} # Only include notes that have properties \ No newline at end of file diff --git a/obsidiantools/md_utils.py b/obsidiantools/md_utils.py index c99a52e..d3dc4ff 100644 --- a/obsidiantools/md_utils.py +++ b/obsidiantools/md_utils.py @@ -1,5 +1,6 @@ import re import yaml +import datetime from pathlib import Path from bs4 import BeautifulSoup import markdown @@ -9,7 +10,9 @@ WIKILINK_AS_STRING_REGEX, EMBEDDED_FILE_LINK_AS_STRING_REGEX, INLINE_LINK_AFTER_HTML_PROC_REGEX, - INLINE_LINK_VIA_MD_ONLY_REGEX) + INLINE_LINK_VIA_MD_ONLY_REGEX, + INLINE_PROPERTY_REGEX, + INLINE_PROPERTY_VALUE_ARRAY_REGEX) from ._io import (get_relpaths_from_dir, get_relpaths_matching_subdirs) from .html_processing import (_get_plaintext_from_html, @@ -509,3 +512,161 @@ def _remove_embedded_file_links_from_text(src_txt: str) -> str: for i in links_list: out_str = out_str.replace(i, '') return out_str + + +def clean_property_key(key: str) -> str: + """Clean a property key by removing quotes and trailing colons. + + Args: + key (str): The property key to clean + + Returns: + str: The cleaned key + """ + # Remove trailing colons first (but not embedded ones) + key = key.rstrip(':') + # Remove outer quotes if present + key = key.strip() + if (key.startswith('"') and key.endswith('"')) or (key.startswith("'") and key.endswith("'")): + key = key[1:-1].strip() + return key + +def get_properties(filepath: Path) -> dict: + """Get all properties from a markdown file, combining frontmatter and inline properties. + + Properties can be defined in two ways in Obsidian: + 1. As frontmatter at the start of the file + 2. As inline properties in the format 'property:: value' + + This method combines both types of properties into a single dictionary. + If the same property exists in both frontmatter and inline, + the inline value takes precedence. + + Args: + filepath (pathlib Path): Path object representing the file from + which info will be extracted. + + Returns: + dict: Combined dictionary of all properties + """ + props = {} + + # Get frontmatter properties + front_matter = get_front_matter(filepath) + if front_matter: + for key, value in front_matter.items(): + clean_key = clean_property_key(key) + # Process value, handling links and converting dates/times + if isinstance(value, str): + if clean_key == 'date' and len(value) == 10 and value[4] == '-' and value[7] == '-': + try: + value = datetime.datetime.strptime(value, '%Y-%m-%d').date() + except ValueError: + print(f"Failed to parse date for key '{clean_key}' with value '{value}' in frontmatter of {filepath}") + elif clean_key == 'time' and len(value) == 19 and value[4] == '-' and value[7] == '-' and value[10] == 'T': + try: + value = datetime.datetime.strptime(value, '%Y-%m-%dT%H:%M:%S') + except ValueError: + pass + elif clean_key == 'due' and len(value) == 10 and value[4] == '-' and value[7] == '-': + try: + value = datetime.datetime.strptime(value, '%Y-%m-%d').date() + except ValueError: + pass + elif value.startswith("[[") and value.endswith("]]"): + # Handle single wikilink as a string + value = _get_all_wikilinks_from_source_text(value, remove_aliases=True)[0] + elif isinstance(value, list): + # Handle list of strings, checking for wikilinks + new_list = [] + for item in value: + if isinstance(item, str) and item.startswith("[[") and item.endswith("]]"): + # Extract the wikilink + new_list.extend(_get_all_wikilinks_from_source_text(item, remove_aliases=True)) + else: + new_list.append(item) + value = new_list + if clean_key in ('date', 'due') and all(isinstance(item, str) and len(item) == 10 and item[4] == '-' and item[7] == '-' for item in value): + try: # Convert to date objects if they match the pattern + value = datetime.datetime.strptime(value, '%Y-%m-%d').date() + except ValueError: + pass + props[clean_key] = value + + # Get inline properties + inline_props = get_inline_properties(filepath) + if inline_props: + for key, value in inline_props.items(): + # Keep datetime objects as is - no need to convert to string + clean_key = clean_property_key(key) + props[clean_key] = value # Inline properties override frontmatter + return props + +def get_property(filepath: Path, property_name: str) -> str | list | None: + """Get a specific property from a markdown file. + + Looks for the property in both frontmatter and inline properties. + If the property exists in both places, the inline value takes precedence. + + Args: + filepath (pathlib Path): Path object representing the file from + which info will be extracted. + property_name (str): Name of the property to retrieve + + Returns: + str | list | None: Property value if found, None if not found + """ + props = get_properties(filepath) + return props.get(property_name) + +def get_inline_properties(filepath: Path) -> dict: + """Extract inline properties from a md file. + + Looks for lines in the format 'property:: value' and parses them into a dictionary. + Handles array values in the format '[value1, value2]'. + Handles special characters and quoted property names. + + Args: + filepath (pathlib Path): Path object representing the file from + which info will be extracted. + Returns: + dict: Dictionary of inline properties + """ + _, content = _get_md_front_matter_and_content(filepath) + properties = {} + + for line in content.splitlines(): + match = re.match(INLINE_PROPERTY_REGEX, line.strip()) + if match: + key = match.group(1).strip() + value = match.group(2) + + # Handle array values + array_match = re.match(INLINE_PROPERTY_VALUE_ARRAY_REGEX, value) + if array_match: + # Split array values and clean them up + values = [v.strip(' "\'' ) for v in array_match.group(1).split(',')] + properties[clean_property_key(key)] = values + else: + # Single value + # Remove quotes from beginning and end if present + value = value.strip() + if (value.startswith('"') and value.endswith('"')) or (value.startswith("'") and value.endswith("'")): + value = value[1:-1].strip() + + clean_key = clean_property_key(key) + # Handle date and time strings after cleaning quotes + if clean_key in ('date', 'due') and len(value) == 10 and value[4] == '-' and value[7] == '-': + try: + value = datetime.datetime.strptime(value, '%Y-%m-%d').date() + except ValueError: + pass + elif clean_key == 'time' and len(value) == 19 and value[4] == '-' and value[7] == '-' and value[10] == 'T': + try: + value = datetime.datetime.strptime(value, '%Y-%m-%dT%H:%M:%S') + except ValueError: + pass + + properties[clean_key] = value + + return properties diff --git a/tests/general/property-tests/basic_properties.md b/tests/general/property-tests/basic_properties.md new file mode 100644 index 0000000..3306111 --- /dev/null +++ b/tests/general/property-tests/basic_properties.md @@ -0,0 +1,7 @@ +--- +prop1: value1 +prop2: value2 +prop3: [item1, item2, item3] +--- + +Content after properties. diff --git a/tests/general/property-tests/frontmatter_style.md b/tests/general/property-tests/frontmatter_style.md new file mode 100644 index 0000000..510c636 --- /dev/null +++ b/tests/general/property-tests/frontmatter_style.md @@ -0,0 +1,10 @@ +--- +tags: [tag1, tag2] +aliases: [alias1, alias2] +cssclass: my-class +date: 2025-07-17 +status: In Progress +priority: High +--- + +Content after properties with frontmatter style. diff --git a/tests/general/property-tests/inline_properties.md b/tests/general/property-tests/inline_properties.md new file mode 100644 index 0000000..426fec0 --- /dev/null +++ b/tests/general/property-tests/inline_properties.md @@ -0,0 +1,6 @@ +prop:: value with inline property +tags:: [tag1, tag2, tag3] +status:: In Progress +due:: 2025-07-17 + +Content with inline properties. diff --git a/tests/general/property-tests/nested_properties.md b/tests/general/property-tests/nested_properties.md new file mode 100644 index 0000000..012f2af --- /dev/null +++ b/tests/general/property-tests/nested_properties.md @@ -0,0 +1,13 @@ +--- +nested: + prop1: value1 + prop2: + subprop1: val1 + subprop2: val2 + prop3: + - item1 + - item2 + - item3 +--- + +Content after nested properties. diff --git a/tests/general/property-tests/property_datetime.md b/tests/general/property-tests/property_datetime.md new file mode 100644 index 0000000..41c6254 --- /dev/null +++ b/tests/general/property-tests/property_datetime.md @@ -0,0 +1,16 @@ +--- +date: 2020-08-21 +time: 2020-08-21T10:30:00 +datetime: 2025-07-17T15:45:30.123Z +created: 2023-01-01 +modified: 2023-12-31T23:59:59 +invalid_date: not-a-date +--- + +date:: 2021-09-22 +time:: 2021-09-22T11:45:00 +datetime:: 2024-06-15T08:30:00Z +created:: 2023-02-15 +modified:: 2023-11-30T12:00:00 + +This file tests date and time property handling in both frontmatter and inline formats. diff --git a/tests/general/property-tests/property_links.md b/tests/general/property-tests/property_links.md new file mode 100644 index 0000000..12c50ac --- /dev/null +++ b/tests/general/property-tests/property_links.md @@ -0,0 +1,9 @@ +--- +link: "[[Link]]" +linklist: + - "[[Link]]" + - "[[Link2]]" +--- +# Test file for properties with links + +This file tests handling of wikilinks in properties. \ No newline at end of file diff --git a/tests/general/property-tests/property_overriding.md b/tests/general/property-tests/property_overriding.md new file mode 100644 index 0000000..3e4ca3a --- /dev/null +++ b/tests/general/property-tests/property_overriding.md @@ -0,0 +1,9 @@ +--- +prop1: value1 +prop2:: value2 +--- + +prop2:: overridden value2 +prop3:: value3 + +This tests property overriding - inline properties should override frontmatter. diff --git a/tests/general/property-tests/property_special_chars.md b/tests/general/property-tests/property_special_chars.md new file mode 100644 index 0000000..ae32484 --- /dev/null +++ b/tests/general/property-tests/property_special_chars.md @@ -0,0 +1,11 @@ +--- +prop with spaces: value1 +"prop:with:colons": value2 +'prop-with-quotes': value3 +--- + +property with spaces:: value4 +"property:with:colons":: value5 +'property-with-quotes':: value6 + +This tests properties with special characters and spaces. diff --git a/tests/general/property-tests/property_types.md b/tests/general/property-tests/property_types.md new file mode 100644 index 0000000..68326b2 --- /dev/null +++ b/tests/general/property-tests/property_types.md @@ -0,0 +1,12 @@ +--- +number: 42 +float: 3.14 +boolean: true +null_value: null +date: 2020-08-21 +time: 2020-08-21T10:30:00 +array: [1, 2, 3] +empty: "" +--- + +Content after properties with different data types. diff --git a/tests/general/property-tests/property_whitespace.md b/tests/general/property-tests/property_whitespace.md new file mode 100644 index 0000000..c6169c6 --- /dev/null +++ b/tests/general/property-tests/property_whitespace.md @@ -0,0 +1,7 @@ +prop1:: value with spaces +prop2 ::value without spaces + prop3 :: value with spaces before and after +prop4:: +prop5:: [tag1 , tag2, tag3 ] + +This tests property parsing with different whitespace patterns. diff --git a/tests/test_properties.py b/tests/test_properties.py new file mode 100644 index 0000000..89ab195 --- /dev/null +++ b/tests/test_properties.py @@ -0,0 +1,173 @@ +import pytest +from pathlib import Path +import datetime +from obsidiantools.md_utils import get_properties, get_property +from obsidiantools.api import Vault + +# Test fixtures +@pytest.fixture +def property_test_files(): + return { + 'basic': Path('.') / 'tests/general/property-tests/basic_properties.md', + 'frontmatter': Path('.') / 'tests/general/property-tests/frontmatter_style.md', + 'nested': Path('.') / 'tests/general/property-tests/nested_properties.md', + 'types': Path('.') / 'tests/general/property-tests/property_types.md', + 'inline': Path('.') / 'tests/general/property-tests/inline_properties.md', + 'overriding': Path('.') / 'tests/general/property-tests/property_overriding.md', + 'special': Path('.') / 'tests/general/property-tests/property_special_chars.md', + 'whitespace': Path('.') / 'tests/general/property-tests/property_whitespace.md' + , 'links': Path('.') / 'tests/general/property-tests/property_links.md' + } # added test for links + +@pytest.fixture +def test_vault(tmp_path): + return Vault(Path('.') / 'tests/general/property-tests').connect() + +# Test md_utils property functions +def test_get_basic_properties(property_test_files): + props = get_properties(property_test_files['basic']) + assert props == { + 'prop1': 'value1', + 'prop2': 'value2', + 'prop3': ['item1', 'item2', 'item3'] + } + +def test_get_frontmatter_style_properties(property_test_files): + props = get_properties(property_test_files['frontmatter']) + + # Check date property separately since it's a datetime object + assert isinstance(props['date'], datetime.date) + assert props['date'].isoformat() == '2025-07-17' + + # Test the rest of the properties + date_removed = props.copy() + del date_removed['date'] + assert date_removed == { + 'tags': ['tag1', 'tag2'], + 'aliases': ['alias1', 'alias2'], + 'cssclass': 'my-class', + 'status': 'In Progress', + 'priority': 'High' + } + +def test_get_nested_properties(property_test_files): + props = get_properties(property_test_files['nested']) + assert props == { + 'nested': { + 'prop1': 'value1', + 'prop2': { + 'subprop1': 'val1', + 'subprop2': 'val2' + }, + 'prop3': ['item1', 'item2', 'item3'] + } + } + +def test_get_property_types(property_test_files): + props = get_properties(property_test_files['types']) + + # Convert datetime objects to expected values for comparison + assert isinstance(props['date'], datetime.date) + assert isinstance(props['time'], datetime.datetime) + assert props['date'].isoformat() == '2020-08-21' + assert props['time'].isoformat() == '2020-08-21T10:30:00' + + # Test the rest of the properties + date_time_removed = props.copy() + del date_time_removed['date'] + del date_time_removed['time'] + assert date_time_removed == { + 'number': 42, + 'float': 3.14, + 'boolean': True, + 'null_value': None, + 'array': [1, 2, 3], + 'empty': '' + } + +def test_get_inline_properties(property_test_files): + props = get_properties(property_test_files['inline']) + + # Check date property separately since it's a datetime object + assert isinstance(props['due'], datetime.date) + assert props['due'].isoformat() == '2025-07-17' + + # Test the rest of the properties + date_removed = props.copy() + del date_removed['due'] + assert date_removed == { + 'prop': 'value with inline property', + 'tags': ['tag1', 'tag2', 'tag3'], + 'status': 'In Progress' + } + +def test_get_specific_property(property_test_files): + prop = get_property(property_test_files['basic'], 'prop1') + assert prop == 'value1' + + prop = get_property(property_test_files['basic'], 'prop3') + assert prop == ['item1', 'item2', 'item3'] + + prop = get_property(property_test_files['basic'], 'nonexistent') + assert prop is None + +# Test Vault class property methods +def test_vault_get_properties(test_vault): + props = test_vault.get_properties('basic_properties') + assert props == { + 'prop1': 'value1', + 'prop2': 'value2', + 'prop3': ['item1', 'item2', 'item3'] + } + +def test_vault_get_property(test_vault): + prop = test_vault.get_property('basic_properties', 'prop1') + assert prop == 'value1' + + prop = test_vault.get_property('nonexistent_note', 'prop1') + assert prop is None + +def test_vault_get_properties_index(test_vault): + index = test_vault.get_properties_index() + assert 'basic_properties' in index + assert 'frontmatter_style' in index + assert len(index) == 10 # All our test files should have properties + assert index['basic_properties']['prop1'] == 'value1' + assert index['frontmatter_style']['status'] == 'In Progress' + assert index['inline_properties']['tags'] == ['tag1', 'tag2', 'tag3'] + +def test_property_overriding(property_test_files): + props = get_properties(property_test_files['overriding']) + assert props == { + 'prop1': 'value1', + 'prop2': 'overridden value2', # inline should override frontmatter + 'prop3': 'value3' + } + +def test_property_special_chars(property_test_files): + props = get_properties(property_test_files['special']) + assert props == { + 'prop with spaces': 'value1', + 'prop:with:colons': 'value2', + 'prop-with-quotes': 'value3', + 'property with spaces': 'value4', + 'property:with:colons': 'value5', + 'property-with-quotes': 'value6' + } + +def test_property_whitespace_handling(property_test_files): + props = get_properties(property_test_files['whitespace']) + assert props == { + 'prop1': 'value with spaces', # extra spaces trimmed + 'prop2': 'value without spaces', + 'prop3': 'value with spaces before and after', # extra spaces trimmed + 'prop4': '', # empty value + 'prop5': ['tag1', 'tag2', 'tag3'] # spaces in array should be trimmed + } + +def test_property_link_handling(property_test_files): + props = get_properties(property_test_files['links']) + assert props == { + 'link': 'Link', + 'linklist': ['Link', 'Link2'] + } diff --git a/tests/vault-stub/rich.md b/tests/vault-stub/rich.md new file mode 100644 index 0000000..eb14001 --- /dev/null +++ b/tests/vault-stub/rich.md @@ -0,0 +1,36 @@ +--- +title: Rich Properties Example +date: 2025-07-17 +time: 2025-07-17T15:30:00 +status: In Progress +tags: [obsidian, test, properties] +mixed bag: [1, "two", true, null, '', None] +nested: + key1: value1 + key2: + subkey1: nested value + subkey2: another value +priority: High +links: + - [[another-note]] + - [[yet-another-note|custom link text]] +--- + +prop with spaces:: special value +property:with:colons:: complex:value:here +number:: 42 +empty:: + +This is an example note demonstrating various property types supported by Obsidian: + +1. Frontmatter properties: + - Date and time (datetime objects) + - Arrays (tags) + - Nested objects + - Simple key-value pairs + +2. Inline properties: + - Properties with spaces + - Properties with special characters + - Number values + - Empty values \ No newline at end of file diff --git a/workbook.ipynb b/workbook.ipynb new file mode 100644 index 0000000..0e18c60 --- /dev/null +++ b/workbook.ipynb @@ -0,0 +1,1230 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "6624792b", + "metadata": {}, + "source": [ + "# Exploring ObsidianTools Library\n", + "\n", + "This notebook demonstrates the capabilities of the `obsidiantools` library using the test vault provided in the repository. We'll explore:\n", + "\n", + "1. Setting up and loading an Obsidian vault\n", + "2. Exploring vault structure\n", + "3. Working with notes and their content\n", + "4. Analyzing frontmatter\n", + "5. Exploring note links and relationships\n", + "6. Working with tags\n", + "\n", + "Let's begin by importing the necessary libraries and setting up our vault." + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "b3726d0c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import os\n", + "from pathlib import Path\n", + "from obsidiantools.api import Vault\n", + "\n", + "# Set up the path to our test vault\n", + "current_dir = os.getcwd()\n", + "test_vault_path = Path(os.path.join(current_dir, 'tests', 'vault-stub'))\n", + "\n", + "# Initialize the vault and connect with note content gathering\n", + "vault = Vault(test_vault_path)\n", + "vault.connect().gather()" + ] + }, + { + "cell_type": "markdown", + "id": "eafbd7d1", + "metadata": {}, + "source": [ + "## Exploring Vault Structure\n", + "\n", + "Let's examine the structure of our vault, including the files and folders it contains." + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "728a1232", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Markdown files in the vault:\n", + "- Isolated note\n", + "- Sussudio\n", + "- rich\n", + "- Isolated note\n", + "- Causam mihi\n", + "- Vulnera ubera\n", + "- Ne fuit\n", + "- Brevissimus moenia\n", + "- Alimenta\n", + "\n", + "Canvas files in the vault:\n", + "- Crazy wall.canvas\n", + "- Crazy wall 2.canvas\n", + "\n", + "Vault Statistics:\n", + "Number of markdown files: 9\n", + "Number of canvas files: 2\n" + ] + } + ], + "source": [ + "# List all markdown files in the vault\n", + "print(\"Markdown files in the vault:\")\n", + "for note_path in vault.md_file_index:\n", + " print(f\"- {os.path.basename(note_path)}\")\n", + "\n", + "print(\"\\nCanvas files in the vault:\")\n", + "for canvas_path in vault.canvas_file_index:\n", + " print(f\"- {os.path.basename(canvas_path)}\")\n", + "\n", + "# Print some basic vault statistics\n", + "print(\"\\nVault Statistics:\")\n", + "print(f\"Number of markdown files: {len(vault.md_file_index)}\")\n", + "print(f\"Number of canvas files: {len(vault.canvas_file_index)}\")" + ] + }, + { + "cell_type": "markdown", + "id": "26f9c82c", + "metadata": {}, + "source": [ + "## Working with Notes and Frontmatter\n", + "\n", + "Let's examine the content and frontmatter of some notes in our vault. We'll look at:\n", + "1. Reading note content\n", + "2. Extracting frontmatter\n", + "3. Analyzing note metadata" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "7de572d7", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Frontmatter for Sussudio:\n", + "title: Sussudio\n", + "artist: Phil Collins\n", + "category: music\n", + "year: 1985\n", + "url: https://www.discogs.com/Phil-Collins-Sussudio/master/106239\n", + "references: [[['American Psycho (film)']], 'Polka Party!']\n", + "chart_peaks: [{'US': 1}, {'UK': 12}]\n", + "\n", + "Source text (first few lines):\n", + "# Sussudio\n", + "\n", + "Another word with absolutely no meaning 😄\n", + "\n", + "This will be a note inside the vault dir. Others will be lipsum in a subdirectory.\n", + "\n", + "Readable text (processed for better readability):\n", + "# Sussudio\n", + "\n", + "Another word with absolutely no meaning 😄\n", + "\n", + "This will be a note inside the vault dir. Others will be lipsum in a subdirectory.\n" + ] + } + ], + "source": [ + "# Get frontmatter using vault's built-in front_matter_index\n", + "print(\"Frontmatter for Sussudio:\")\n", + "sussudio_frontmatter = vault.get_front_matter('Sussudio')\n", + "if sussudio_frontmatter:\n", + " for key, value in sussudio_frontmatter.items():\n", + " print(f\"{key}: {value}\")\n", + "\n", + "# Get note content using vault's source_text and readable_text features\n", + "print(\"\\nSource text (first few lines):\")\n", + "source_text = vault.get_source_text('Sussudio')\n", + "print(\"\\n\".join(source_text.split(\"\\n\")[:5]))\n", + "\n", + "print(\"\\nReadable text (processed for better readability):\")\n", + "readable_text = vault.get_readable_text('Sussudio')\n", + "print(\"\\n\".join(readable_text.split(\"\\n\")[:5]))" + ] + }, + { + "cell_type": "markdown", + "id": "66cd1509", + "metadata": {}, + "source": [ + "## Exploring Note Links and References\n", + "\n", + "Let's examine how notes are connected to each other through links and references. We'll analyze both internal links and backlinks." + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "c1aba5ad", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Links in Sussudio.md:\n", + "\n", + "Wikilinks:\n", + "- American Psycho (film)\n", + "\n", + "Backlinks (notes that link to Sussudio):\n", + "\n", + "Embedded files:\n", + "- Sussudio.mp3\n", + "- 1999.flac\n", + "\n", + "Markdown links:\n", + "\n", + "Analyzing Vulnera ubera:\n", + "\n", + "Wikilinks:\n", + "- Caelum\n", + "- Tarpeia\n", + "- Vita\n", + "\n", + "Link statistics for Sussudio:\n", + "Links to American Psycho (film): 1\n" + ] + } + ], + "source": [ + "# Let's analyze different types of links in notes\n", + "\n", + "# Analyze Sussudio.md\n", + "print(\"Links in Sussudio.md:\")\n", + "print(\"\\nWikilinks:\")\n", + "wikilinks = vault.get_wikilinks('Sussudio')\n", + "for link in wikilinks:\n", + " print(f\"- {link}\")\n", + "\n", + "print(\"\\nBacklinks (notes that link to Sussudio):\")\n", + "backlinks = vault.get_backlinks('Sussudio')\n", + "for link in backlinks:\n", + " print(f\"- {link}\")\n", + "\n", + "print(\"\\nEmbedded files:\")\n", + "embedded_files = vault.get_embedded_files('Sussudio')\n", + "for file in embedded_files:\n", + " print(f\"- {file}\")\n", + "\n", + "print(\"\\nMarkdown links:\")\n", + "md_links = vault.get_md_links('Sussudio')\n", + "for link in md_links:\n", + " print(f\"- {link}\")\n", + "\n", + "# Analyze links in another note (Vulnera ubera)\n", + "print(\"\\nAnalyzing Vulnera ubera:\")\n", + "print(\"\\nWikilinks:\")\n", + "vulnera_wikilinks = vault.get_wikilinks('Vulnera ubera')\n", + "for link in vulnera_wikilinks:\n", + " print(f\"- {link}\")\n", + "\n", + "# You can also get link counts\n", + "print(\"\\nLink statistics for Sussudio:\")\n", + "wikilink_counts = vault.get_wikilink_counts('Sussudio')\n", + "for target, count in wikilink_counts.items():\n", + " print(f\"Links to {target}: {count}\")\n", + "\n", + "backlink_counts = vault.get_backlink_counts('Sussudio')\n", + "for source, count in backlink_counts.items():\n", + " print(f\"Links from {source}: {count}\")" + ] + }, + { + "cell_type": "markdown", + "id": "45b8c89e", + "metadata": {}, + "source": [ + "## Working with Tags\n", + "\n", + "Let's analyze the tags used across notes in the vault." + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "4896ee9e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "All unique tags in the vault from the vault.tags_index:\n", + "- y-1982\n", + "- y1982\n", + "- y2000\n", + "- y_1982\n", + "\n", + "Files with their tags:\n", + "\n", + "Sussudio:\n", + " - y-1982\n", + " - y1982\n", + " - y1982\n", + " - y2000\n", + " - y_1982\n", + "\n", + "Demonstrating get_tags() for a specific note (Sussudio.md):\n", + "\n", + "Tags in Sussudio.md (including nested tags):\n", + "- y-1982\n", + "- y1982\n", + "- y1982/sep\n", + "- y2000/party-over/oops/out-of-time\n", + "- y_1982\n" + ] + } + ], + "source": [ + "from obsidiantools.md_utils import get_tags\n", + "\n", + "# Get tags using the vault's built-in tags_index\n", + "print(\"All unique tags in the vault from the vault.tags_index:\")\n", + "all_tags = set()\n", + "for note_tags in vault.tags_index.values():\n", + " all_tags.update(note_tags)\n", + "\n", + "for tag in sorted(all_tags):\n", + " print(f\"- {tag}\")\n", + "\n", + "print(\"\\nFiles with their tags:\")\n", + "for note_name, tags in vault.tags_index.items():\n", + " if tags: # Only show files that have tags\n", + " print(f\"\\n{note_name}:\")\n", + " for tag in sorted(tags):\n", + " print(f\" - {tag}\")\n", + "\n", + "# You can also get tags for a specific note\n", + "print(\"\\nDemonstrating get_tags() for a specific note (Sussudio.md):\")\n", + "sussudio_tags = get_tags(Path(os.path.join(test_vault_path, 'Sussudio.md')), show_nested=True)\n", + "print(\"\\nTags in Sussudio.md (including nested tags):\")\n", + "for tag in sorted(sussudio_tags):\n", + " print(f\"- {tag}\")" + ] + }, + { + "cell_type": "markdown", + "id": "f42c6a50", + "metadata": {}, + "source": [ + "## Working with Properties\n", + "\n", + "Let's explore how to work with Obsidian properties, which can be defined either in the frontmatter or inline in the note. Properties in Obsidian are a powerful way to add structured metadata to your notes." + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "07f95eba", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Properties Overview:\n", + "\n", + "No properties found for Isolated note\n", + "\n", + "Properties for Sussudio:\n", + " title: Sussudio\n", + " artist: Phil Collins\n", + " category: music\n", + " year: 1985\n", + " url: https://www.discogs.com/Phil-Collins-Sussudio/master/106239\n", + " references: [[['American Psycho (film)']], 'Polka Party!']\n", + " chart_peaks: [{'US': 1}, {'UK': 12}]\n", + "\n", + "Properties for rich:\n", + " title: Rich Properties Example\n", + " date: 2025-07-17\n", + " time: 2025-07-17 15:30:00\n", + " status: In Progress\n", + " tags: ['obsidian', 'test', 'properties']\n", + " mixed bag: [1, 'two', True, None, '', 'None']\n", + " nested: {'key1': 'value1', 'key2': {'subkey1': 'nested value', 'subkey2': 'another value'}}\n", + " priority: High\n", + " links: [[['another-note']], [['yet-another-note|custom link text']]]\n", + " prop with spaces: special value\n", + " property:with:colons: complex:value:here\n", + " number: 42\n", + " empty: \n", + "\n", + "No properties found for lipsum/Isolated note\n", + "\n", + "Properties for Causam mihi:\n", + " title: Causam mihi\n", + " author: Ovid\n", + " category: literature\n", + " year: 8\n", + " language: la\n", + " description: \\{\\{description\\}\\}\n", + "\n", + "No properties found for Vulnera ubera\n", + "\n", + "No properties found for Ne fuit\n", + "\n", + "No properties found for Brevissimus moenia\n", + "\n", + "No properties found for Alimenta\n", + "\n", + "Properties Index:\n", + "{\n", + " \"Sussudio\": {\n", + " \"title\": \"Sussudio\",\n", + " \"artist\": \"Phil Collins\",\n", + " \"category\": \"music\",\n", + " \"year\": 1985,\n", + " \"url\": \"https://www.discogs.com/Phil-Collins-Sussudio/master/106239\",\n", + " \"references\": [\n", + " [\n", + " [\n", + " \"American Psycho (film)\"\n", + " ]\n", + " ],\n", + " \"Polka Party!\"\n", + " ],\n", + " \"chart_peaks\": [\n", + " {\n", + " \"US\": 1\n", + " },\n", + " {\n", + " \"UK\": 12\n", + " }\n", + " ]\n", + " },\n", + " \"rich\": {\n", + " \"title\": \"Rich Properties Example\",\n", + " \"date\": \"2025-07-17\",\n", + " \"time\": \"2025-07-17T15:30:00\",\n", + " \"status\": \"In Progress\",\n", + " \"tags\": [\n", + " \"obsidian\",\n", + " \"test\",\n", + " \"properties\"\n", + " ],\n", + " \"mixed bag\": [\n", + " 1,\n", + " \"two\",\n", + " true,\n", + " null,\n", + " \"\",\n", + " \"None\"\n", + " ],\n", + " \"nested\": {\n", + " \"key1\": \"value1\",\n", + " \"key2\": {\n", + " \"subkey1\": \"nested value\",\n", + " \"subkey2\": \"another value\"\n", + " }\n", + " },\n", + " \"priority\": \"High\",\n", + " \"links\": [\n", + " [\n", + " [\n", + " \"another-note\"\n", + " ]\n", + " ],\n", + " [\n", + " [\n", + " \"yet-another-note|custom link text\"\n", + " ]\n", + " ]\n", + " ],\n", + " \"prop with spaces\": \"special value\",\n", + " \"property:with:colons\": \"complex:value:here\",\n", + " \"number\": \"42\",\n", + " \"empty\": \"\"\n", + " },\n", + " \"Causam mihi\": {\n", + " \"title\": \"Causam mihi\",\n", + " \"author\": \"Ovid\",\n", + " \"category\": \"literature\",\n", + " \"year\": 8,\n", + " \"language\": \"la\",\n", + " \"description\": \"\\\\{\\\\{description\\\\}\\\\}\"\n", + " }\n", + "}\n", + "\n", + "Example: Getting specific properties\n", + "\n", + "Example: Different property types\n" + ] + } + ], + "source": [ + "# Let's look at properties in the test vault\n", + "from pathlib import Path\n", + "import json\n", + "from datetime import date, datetime\n", + "\n", + "class DateTimeEncoder(json.JSONEncoder):\n", + " def default(self, obj):\n", + " if isinstance(obj, (date, datetime)):\n", + " return obj.isoformat()\n", + " return super().default(obj)\n", + "\n", + "def display_properties_for_note(vault, note_name):\n", + " \"\"\"Helper function to display properties for a note\"\"\"\n", + " props = vault.get_properties(note_name)\n", + " if props:\n", + " print(f\"\\nProperties for {note_name}:\")\n", + " for key, value in props.items():\n", + " print(f\" {key}: {value}\")\n", + " else:\n", + " print(f\"\\nNo properties found for {note_name}\")\n", + "\n", + "# Get properties for all notes in the vault\n", + "print(\"Properties Overview:\")\n", + "for note_name in vault.md_file_index:\n", + " display_properties_for_note(vault, note_name)\n", + "\n", + "# Get properties index (all properties across all notes)\n", + "print(\"\\nProperties Index:\")\n", + "properties_index = vault.get_properties_index()\n", + "print(json.dumps(properties_index, indent=2, cls=DateTimeEncoder))\n", + "\n", + "# Example: Get a specific property from a note\n", + "print(\"\\nExample: Getting specific properties\")\n", + "status = vault.get_property('Sussudio', 'status')\n", + "if status:\n", + " print(f\"Status of Sussudio: {status}\")\n", + "\n", + "# Example: Working with property types\n", + "print(\"\\nExample: Different property types\")\n", + "test_files = Path(os.path.join(current_dir, 'tests', 'general', 'property-tests'))\n", + "for note_path in test_files.glob('*.md'):\n", + " note_name = note_path.stem\n", + " props = vault.get_properties(note_name)\n", + " if props:\n", + " print(f\"\\n{note_name}:\")\n", + " for key, value in props.items():\n", + " print(f\" {key} ({type(value).__name__}): {value}\")" + ] + }, + { + "cell_type": "markdown", + "id": "01290dc1", + "metadata": {}, + "source": [ + "Key features of the properties implementation:\n", + "\n", + "1. **Property Sources**\n", + " - YAML frontmatter at the start of files\n", + " - Inline properties in the format `property:: value`\n", + " - Inline array properties using `[value1, value2]` syntax\n", + "\n", + "2. **Property Types**\n", + " - String values (with special character support)\n", + " - Arrays\n", + " - Numbers\n", + " - Dates (automatically converted to ISO format)\n", + "\n", + "3. **Special Features**\n", + " - Property overriding (inline properties override frontmatter)\n", + " - Special character handling in property keys and values\n", + " - Quote-preserving property values\n", + " - Array type support\n", + "\n", + "Let's now explore some practical examples of working with properties:" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "0007d6ca", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Finding notes by property:\n", + "\n", + "Notes with status='In Progress':\n", + "- rich\n", + "\n", + "Property Statistics:\n", + "\n", + "Total notes: 9\n", + "Notes with properties: 3\n", + "\n", + "Property usage counts:\n", + "- title: 3 notes\n", + "- artist: 1 notes\n", + "- category: 2 notes\n", + "- year: 2 notes\n", + "- url: 1 notes\n", + "- references: 1 notes\n", + "- chart_peaks: 1 notes\n", + "- date: 1 notes\n", + "- time: 1 notes\n", + "- status: 1 notes\n", + "- tags: 1 notes\n", + "- mixed bag: 1 notes\n", + "- nested: 1 notes\n", + "- priority: 1 notes\n", + "- links: 1 notes\n", + "- prop with spaces: 1 notes\n", + "- property:with:colons: 1 notes\n", + "- number: 1 notes\n", + "- empty: 1 notes\n", + "- author: 1 notes\n", + "- language: 1 notes\n", + "- description: 1 notes\n", + "\n", + "Property types:\n", + "\n", + "str properties:\n", + "- artist\n", + "- category\n", + "- prop with spaces\n", + "- description\n", + "- priority\n", + "- title\n", + "- status\n", + "- language\n", + "- url\n", + "- author\n", + "- property:with:colons\n", + "- empty\n", + "- number\n", + "\n", + "int properties:\n", + "- year\n", + "\n", + "list properties:\n", + "- chart_peaks\n", + "- references\n", + "- links\n", + "- tags\n", + "- mixed bag\n", + "\n", + "date properties:\n", + "- date\n", + "\n", + "datetime properties:\n", + "- time\n", + "\n", + "dict properties:\n", + "- nested\n", + "\n", + "Array properties:\n", + "- chart_peaks\n", + "- references\n", + "- links\n", + "- tags\n", + "- mixed bag\n" + ] + } + ], + "source": [ + "# Advanced Property Examples\n", + "\n", + "def find_notes_by_property(vault, property_name, property_value=None):\n", + " \"\"\"Find notes that have a specific property, optionally matching a value\"\"\"\n", + " matching_notes = []\n", + " for note_name in vault.md_file_index:\n", + " props = vault.get_properties(note_name)\n", + " if not props:\n", + " continue\n", + " \n", + " if property_value is None:\n", + " # Just check if property exists\n", + " if property_name in props:\n", + " matching_notes.append(note_name)\n", + " else:\n", + " # Check if property matches value\n", + " if props.get(property_name) == property_value:\n", + " matching_notes.append(note_name)\n", + " \n", + " return matching_notes\n", + "\n", + "def get_property_statistics(vault):\n", + " \"\"\"Get statistics about property usage in the vault\"\"\"\n", + " stats = {\n", + " 'total_notes': len(vault.md_file_index),\n", + " 'notes_with_properties': 0,\n", + " 'property_counts': {},\n", + " 'property_types': {},\n", + " 'array_properties': set(),\n", + " }\n", + " \n", + " for note_name in vault.md_file_index:\n", + " props = vault.get_properties(note_name)\n", + " if props:\n", + " stats['notes_with_properties'] += 1\n", + " for key, value in props.items():\n", + " stats['property_counts'][key] = stats['property_counts'].get(key, 0) + 1\n", + " value_type = type(value).__name__\n", + " if value_type not in stats['property_types']:\n", + " stats['property_types'][value_type] = set()\n", + " stats['property_types'][value_type].add(key)\n", + " if isinstance(value, list):\n", + " stats['array_properties'].add(key)\n", + " \n", + " return stats\n", + "\n", + "# Example usage\n", + "print(\"Finding notes by property:\")\n", + "status_notes = find_notes_by_property(vault, 'status', 'In Progress')\n", + "print(\"\\nNotes with status='In Progress':\")\n", + "for note in status_notes:\n", + " print(f\"- {note}\")\n", + "\n", + "print(\"\\nProperty Statistics:\")\n", + "stats = get_property_statistics(vault)\n", + "print(f\"\\nTotal notes: {stats['total_notes']}\")\n", + "print(f\"Notes with properties: {stats['notes_with_properties']}\")\n", + "print(\"\\nProperty usage counts:\")\n", + "for prop, count in stats['property_counts'].items():\n", + " print(f\"- {prop}: {count} notes\")\n", + "print(\"\\nProperty types:\")\n", + "for type_name, props in stats['property_types'].items():\n", + " print(f\"\\n{type_name} properties:\")\n", + " for prop in props:\n", + " print(f\"- {prop}\")\n", + "print(\"\\nArray properties:\")\n", + "for prop in stats['array_properties']:\n", + " print(f\"- {prop}\")" + ] + }, + { + "cell_type": "markdown", + "id": "49b2189d", + "metadata": {}, + "source": [ + "## Summary\n", + "\n", + "In this notebook, we've explored the main capabilities of the `obsidiantools` library:\n", + "\n", + "1. **Vault Structure**: We learned how to initialize a vault and list its markdown and canvas files\n", + "2. **Note Content**: We demonstrated how to read and parse note content, including frontmatter\n", + "3. **Links and References**: We explored how to extract and analyze internal links between notes\n", + "4. **Tags**: We showed how to extract and analyze tags from both note content and frontmatter\n", + "\n", + "The library provides a solid foundation for programmatically analyzing and working with Obsidian vaults. Some key features we discovered:\n", + "\n", + "- Support for both markdown (.md) and canvas (.canvas) files\n", + "- YAML frontmatter parsing\n", + "- Wiki-style link extraction\n", + "- Tag analysis from both content and frontmatter\n", + "\n", + "You can use these tools to build more complex analyses of your Obsidian vault, such as:\n", + "- Creating network graphs of note relationships\n", + "- Analyzing tag usage patterns\n", + "- Extracting structured data from frontmatter\n", + "- Building custom search and navigation tools" + ] + }, + { + "cell_type": "markdown", + "id": "82cd1271", + "metadata": {}, + "source": [ + "## Additional Vault Methods Reference\n", + "\n", + "Let's explore all the available methods in the Vault class that can be useful for automation and analysis." + ] + }, + { + "cell_type": "markdown", + "id": "9fb82705", + "metadata": {}, + "source": [ + "### Core Methods\n", + "\n", + "1. **Setup Methods**\n", + " - `connect(show_nested_tags=False, attachments=False)`: Connects notes in a graph structure\n", + " - `gather(tags=None)`: Gathers text content of notes for analysis\n", + "\n", + "2. **Note Access Methods**\n", + " - `get_source_text(note_name)`: Get raw source text of a note\n", + " - `get_readable_text(note_name)`: Get processed text optimized for reading/analysis\n", + " - `get_front_matter(note_name)`: Get note's frontmatter\n", + " - `get_tags(note_name)`: Get tags from a note\n", + "\n", + "3. **Link Analysis Methods**\n", + " - `get_wikilinks(note_name)`: Get wiki-style links from a note\n", + " - `get_backlinks(note_name)`: Get notes that link to this note\n", + " - `get_wikilink_counts(note_name)`: Get count of outgoing links\n", + " - `get_backlink_counts(note_name)`: Get count of incoming links\n", + " - `get_embedded_files(file_name)`: Get list of embedded files\n", + " - `get_md_links(file_name)`: Get markdown-style links\n", + "\n", + "4. **Graph and Network Methods**\n", + " - `get_note_metadata()`: Get DataFrame with note statistics\n", + " - `get_media_file_metadata()`: Get DataFrame with media file statistics\n", + " - `get_canvas_file_metadata()`: Get DataFrame with canvas file statistics\n", + " - `get_all_file_metadata()`: Get combined DataFrame of all file types\n", + "\n", + "### Important Properties\n", + "\n", + "1. **File Indices**\n", + " - `md_file_index`: Dictionary of markdown files\n", + " - `canvas_file_index`: Dictionary of canvas files\n", + " - `media_file_index`: Dictionary of media files\n", + "\n", + "2. **Content Indices**\n", + " - `front_matter_index`: Dictionary of all notes' frontmatter\n", + " - `tags_index`: Dictionary of all notes' tags\n", + " - `source_text_index`: Dictionary of all notes' source text\n", + " - `readable_text_index`: Dictionary of all notes' readable text\n", + "\n", + "3. **Link Indices**\n", + " - `wikilinks_index`: Dictionary of all wiki-style links\n", + " - `backlinks_index`: Dictionary of all backlinks\n", + " - `embedded_files_index`: Dictionary of all embedded files\n", + " - `md_links_index`: Dictionary of all markdown links\n", + "\n", + "4. **Special Note Lists**\n", + " - `isolated_notes`: Notes without any connections\n", + " - `nonexistent_notes`: Referenced notes that don't exist yet\n", + "\n", + "5. **Graph Access**\n", + " - `graph`: NetworkX graph object representing the vault\n", + "\n", + "Let's demonstrate some of these additional features with examples." + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "47225064", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Note Metadata Overview:\n", + " rel_filepath \\\n", + "note \n", + "American Psycho (film) NaN \n", + "Amor NaN \n", + "Sussudio Sussudio.md \n", + "Aetna NaN \n", + "Aras Teucras NaN \n", + "\n", + " abs_filepath \\\n", + "note \n", + "American Psycho (film) NaN \n", + "Amor NaN \n", + "Sussudio /Users/veethahavya/Projects/obsidiantools/test... \n", + "Aetna NaN \n", + "Aras Teucras NaN \n", + "\n", + " note_exists n_backlinks n_wikilinks n_tags \\\n", + "note \n", + "American Psycho (film) False 1 NaN NaN \n", + "Amor False 2 NaN NaN \n", + "Sussudio True 0 1.0 5.0 \n", + "Aetna False 1 NaN NaN \n", + "Aras Teucras False 1 NaN NaN \n", + "\n", + " n_embedded_files modified_time \n", + "note \n", + "American Psycho (film) NaN NaT \n", + "Amor NaN NaT \n", + "Sussudio 2.0 2025-07-17 17:50:55.380110741 \n", + "Aetna NaN NaT \n", + "Aras Teucras NaN NaT \n", + "\n", + "Isolated Notes (no connections):\n", + "- Isolated note\n", + "- rich\n", + "- lipsum/Isolated note\n", + "\n", + "Nonexistent Notes (referenced but missing):\n", + "- American Psycho (film)\n", + "- Amor\n", + "- Caelum\n", + "- Aetna\n", + "- Aras Teucras\n", + "- Bacchus\n", + "- Vita\n", + "- Tydides\n", + "- Virtus\n", + "- Manus\n", + "- Tarpeia\n", + "- Dives\n", + "\n", + "Graph Statistics:\n", + "Number of nodes: 21\n", + "Number of edges: 29\n", + "\n", + "All Unique Wikilinks:\n", + "- Aetna\n", + "- American Psycho (film)\n", + "- Amor\n", + "- Aras Teucras\n", + "- Bacchus\n", + "- Brevissimus moenia\n", + "- Caelum\n", + "- Causam mihi\n", + "- Dives\n", + "- Manus\n", + "- Ne fuit\n", + "- Tarpeia\n", + "- Tydides\n", + "- Virtus\n", + "- Vita\n", + "\n", + "Media File Statistics:\n", + " rel_filepath abs_filepath file_exists n_backlinks n_wikilinks \\\n", + "file \n", + "Sussudio.mp3 NaN NaN False 1 NaN \n", + "1999.flac NaN NaN False 1 NaN \n", + "\n", + " n_tags n_embedded_files modified_time \n", + "file \n", + "Sussudio.mp3 NaN NaN NaT \n", + "1999.flac NaN NaN NaT \n", + "\n", + "Canvas File Statistics:\n", + " rel_filepath \\\n", + "file \n", + "Crazy wall.canvas Crazy wall.canvas \n", + "Crazy wall 2.canvas Crazy wall 2.canvas \n", + "\n", + " abs_filepath \\\n", + "file \n", + "Crazy wall.canvas /Users/veethahavya/Projects/obsidiantools/test... \n", + "Crazy wall 2.canvas /Users/veethahavya/Projects/obsidiantools/test... \n", + "\n", + " file_exists n_backlinks n_wikilinks n_tags \\\n", + "file \n", + "Crazy wall.canvas True NaN NaN NaN \n", + "Crazy wall 2.canvas True NaN NaN NaN \n", + "\n", + " n_embedded_files modified_time \n", + "file \n", + "Crazy wall.canvas NaN 2025-07-17 17:50:55.379952431 \n", + "Crazy wall 2.canvas NaN 2025-07-17 17:50:55.379861832 \n" + ] + } + ], + "source": [ + "import pandas as pd\n", + "import networkx as nx\n", + "\n", + "# 1. Get metadata about all notes in a DataFrame\n", + "note_metadata = vault.get_note_metadata()\n", + "print(\"Note Metadata Overview:\")\n", + "print(note_metadata.head())\n", + "\n", + "# 2. Find isolated notes (notes without connections)\n", + "print(\"\\nIsolated Notes (no connections):\")\n", + "for note in vault.isolated_notes:\n", + " print(f\"- {note}\")\n", + "\n", + "# 3. Find nonexistent notes (referenced but don't exist)\n", + "print(\"\\nNonexistent Notes (referenced but missing):\")\n", + "for note in vault.nonexistent_notes:\n", + " print(f\"- {note}\")\n", + "\n", + "# 4. Access the graph for network analysis\n", + "graph = vault.graph\n", + "print(\"\\nGraph Statistics:\")\n", + "print(f\"Number of nodes: {graph.number_of_nodes()}\")\n", + "print(f\"Number of edges: {graph.number_of_edges()}\")\n", + "\n", + "# 5. Show all unique wikilinks in the vault\n", + "print(\"\\nAll Unique Wikilinks:\")\n", + "all_wikilinks = set()\n", + "for links in vault.wikilinks_index.values():\n", + " all_wikilinks.update(links)\n", + "for link in sorted(all_wikilinks):\n", + " print(f\"- {link}\")\n", + "\n", + "# 6. Show media file statistics\n", + "media_metadata = vault.get_media_file_metadata()\n", + "print(\"\\nMedia File Statistics:\")\n", + "if not media_metadata.empty:\n", + " print(media_metadata.head())\n", + "else:\n", + " print(\"No media files found\")\n", + "\n", + "# 7. Show canvas file information\n", + "canvas_metadata = vault.get_canvas_file_metadata()\n", + "print(\"\\nCanvas File Statistics:\")\n", + "if not canvas_metadata.empty:\n", + " print(canvas_metadata.head())\n", + "else:\n", + " print(\"No canvas files found\")" + ] + }, + { + "cell_type": "markdown", + "id": "af15d65c", + "metadata": {}, + "source": [ + "### Practical Automation Examples\n", + "\n", + "Here are some practical ways to use these methods for vault automation:" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "823060af", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Notes Needing Attention:\n", + "- Isolated note\n", + "- rich\n", + "- lipsum/Isolated note\n", + "- Vulnera ubera\n", + "- Alimenta\n", + "\n", + "Network for music-related notes:\n", + "\n", + "Note Complexity Analysis:\n", + " word_count link_count backlink_count tag_count \\\n", + "Isolated note 9 0 0 0 \n", + "Sussudio 71 1 0 5 \n", + "rich 57 0 0 0 \n", + "lipsum/Isolated note 30 0 0 0 \n", + "Causam mihi 200 4 1 0 \n", + "\n", + " has_frontmatter \n", + "Isolated note False \n", + "Sussudio True \n", + "rich True \n", + "lipsum/Isolated note False \n", + "Causam mihi True \n" + ] + } + ], + "source": [ + "# Example 1: Find notes that need attention (no tags, no links)\n", + "def find_notes_needing_attention():\n", + " notes_to_review = []\n", + " for note_name in vault.md_file_index:\n", + " # Check if note has no tags\n", + " has_no_tags = note_name not in vault.tags_index or not vault.tags_index[note_name]\n", + " # Check if note has no outgoing links\n", + " has_no_links = note_name not in vault.wikilinks_index or not vault.wikilinks_index[note_name]\n", + " # Check if note has no backlinks\n", + " has_no_backlinks = note_name not in vault.backlinks_index or not vault.backlinks_index[note_name]\n", + " \n", + " if has_no_tags and (has_no_links or has_no_backlinks):\n", + " notes_to_review.append(note_name)\n", + " \n", + " return notes_to_review\n", + "\n", + "# Example 2: Create a reference network for a specific topic\n", + "def create_topic_network(topic_tag):\n", + " relevant_notes = []\n", + " for note, tags in vault.tags_index.items():\n", + " if topic_tag in tags:\n", + " relevant_notes.append(note)\n", + " \n", + " topic_network = {}\n", + " for note in relevant_notes:\n", + " connections = {\n", + " 'outgoing': vault.get_wikilinks(note),\n", + " 'incoming': vault.get_backlinks(note)\n", + " }\n", + " topic_network[note] = connections\n", + " \n", + " return topic_network\n", + "\n", + "# Example 3: Analyze note complexity\n", + "def analyze_note_complexity():\n", + " complexity_metrics = {}\n", + " for note_name in vault.md_file_index:\n", + " if note_name in vault.source_text_index:\n", + " metrics = {\n", + " 'word_count': len(vault.get_readable_text(note_name).split()),\n", + " 'link_count': len(vault.get_wikilinks(note_name)),\n", + " 'backlink_count': len(vault.get_backlinks(note_name)),\n", + " 'tag_count': len(vault.tags_index.get(note_name, [])),\n", + " 'has_frontmatter': bool(vault.get_front_matter(note_name))\n", + " }\n", + " complexity_metrics[note_name] = metrics\n", + " \n", + " return pd.DataFrame.from_dict(complexity_metrics, orient='index')\n", + "\n", + "# Let's try these functions\n", + "print(\"Notes Needing Attention:\")\n", + "for note in find_notes_needing_attention():\n", + " print(f\"- {note}\")\n", + "\n", + "print(\"\\nNetwork for music-related notes:\")\n", + "music_network = create_topic_network('music')\n", + "for note, connections in music_network.items():\n", + " print(f\"\\n{note}:\")\n", + " print(\" Outgoing links:\", connections['outgoing'])\n", + " print(\" Incoming links:\", connections['incoming'])\n", + "\n", + "print(\"\\nNote Complexity Analysis:\")\n", + "complexity_df = analyze_note_complexity()\n", + "print(complexity_df.head())" + ] + }, + { + "cell_type": "markdown", + "id": "2ca9415b", + "metadata": {}, + "source": [ + "### Advanced Graph Analysis\n", + "\n", + "Since we have access to the full NetworkX graph of the vault, we can perform sophisticated network analysis:" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "a5fe31f3", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Note Centrality Analysis (top 5 by degree):\n", + " degree in_degree out_degree\n", + "Alimenta 0.60 0.00 0.60\n", + "Ne fuit 0.40 0.10 0.30\n", + "Causam mihi 0.25 0.05 0.20\n", + "Bacchus 0.25 0.25 0.00\n", + "Brevissimus moenia 0.20 0.05 0.15\n", + "\n", + "Connected Components:\n", + "Component 1: 1 notes\n", + "Notes: Isolated note\n", + "Component 2: 2 notes\n", + "Notes: American Psycho (film), Sussudio\n", + "Component 3: 1 notes\n", + "Notes: rich\n", + "Component 4: 1 notes\n", + "Notes: lipsum/Isolated note\n", + "Component 5: 16 notes\n", + "Notes: Aetna, Alimenta, Amor, Aras Teucras, Bacchus, Brevissimus moenia, Caelum, Causam mihi, Dives, Manus, Ne fuit, Tarpeia, Tydides, Virtus, Vita, Vulnera ubera\n", + "\n", + "Connections from Sussudio (up to 2 links away):\n", + "To American Psycho (film): Sussudio -> American Psycho (film)\n", + "\n", + "Graph Statistics:\n", + "Number of notes (nodes): 21\n", + "Number of links (edges): 29\n", + "Average out-degree: 1.38\n", + "Notes with no outgoing links: 15\n", + "Notes with no incoming links: 6\n" + ] + } + ], + "source": [ + "# Get the graph\n", + "G = vault.graph\n", + "\n", + "# 1. Calculate basic centrality measures\n", + "centrality = {\n", + " 'degree': nx.degree_centrality(G),\n", + " 'in_degree': nx.in_degree_centrality(G),\n", + " 'out_degree': nx.out_degree_centrality(G)\n", + "}\n", + "\n", + "# Create a DataFrame of centrality measures\n", + "centrality_df = pd.DataFrame(centrality)\n", + "print(\"Note Centrality Analysis (top 5 by degree):\")\n", + "print(centrality_df.sort_values('degree', ascending=False).head())\n", + "\n", + "# 2. Find weakly connected components (groups of connected notes)\n", + "components = list(nx.weakly_connected_components(G))\n", + "print(\"\\nConnected Components:\")\n", + "for i, component in enumerate(components, 1):\n", + " print(f\"Component {i}: {len(component)} notes\")\n", + " print(f\"Notes: {', '.join(sorted(component))}\")\n", + "\n", + "# 3. Calculate shortest paths between notes\n", + "def find_note_connections(start_note, max_distance=2):\n", + " paths = {}\n", + " try:\n", + " for target in G.nodes():\n", + " if target != start_note:\n", + " try:\n", + " path = nx.shortest_path(G, start_note, target)\n", + " if len(path) - 1 <= max_distance: # -1 because path includes start node\n", + " paths[target] = path\n", + " except nx.NetworkXNoPath:\n", + " continue\n", + " except nx.NetworkXError:\n", + " return {}\n", + " return paths\n", + "\n", + "# Example: Find connections from Sussudio\n", + "print(\"\\nConnections from Sussudio (up to 2 links away):\")\n", + "sussudio_connections = find_note_connections('Sussudio')\n", + "for target, path in sussudio_connections.items():\n", + " print(f\"To {target}: {' -> '.join(path)}\")\n", + "\n", + "# 4. Basic graph statistics\n", + "print(\"\\nGraph Statistics:\")\n", + "print(f\"Number of notes (nodes): {G.number_of_nodes()}\")\n", + "print(f\"Number of links (edges): {G.number_of_edges()}\")\n", + "print(f\"Average out-degree: {sum(dict(G.out_degree()).values()) / G.number_of_nodes():.2f}\")\n", + "print(f\"Notes with no outgoing links: {sum(1 for _, d in G.out_degree() if d == 0)}\")\n", + "print(f\"Notes with no incoming links: {sum(1 for _, d in G.in_degree() if d == 0)}\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "obsidian", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.13.5" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}