From c2e7758fb41cda3e89e3224efa02217d9d1a3efe Mon Sep 17 00:00:00 2001 From: Paul Wright Date: Wed, 2 Oct 2024 11:20:59 +0100 Subject: [PATCH] support gdoc --- config.yaml | 2 ++ template/logseq.py | 69 ++++++++++++++++++++++++++++++++++------------ 2 files changed, 54 insertions(+), 17 deletions(-) create mode 100644 config.yaml diff --git a/config.yaml b/config.yaml new file mode 100644 index 0000000..e93c243 --- /dev/null +++ b/config.yaml @@ -0,0 +1,2 @@ +separator: "___" # Custom separator +strip_leading_dash: true # Enable stripping leading '- ' \ No newline at end of file diff --git a/template/logseq.py b/template/logseq.py index 5ed8e79..70e13c0 100755 --- a/template/logseq.py +++ b/template/logseq.py @@ -1,11 +1,13 @@ #!/usr/bin/env python3 -# UID: 2024-09-06-uid789 (replace with a unique identifier each time) +# UID: replace with a unique identifier each time import os import re import shutil import argparse +import yaml +from datetime import datetime # Function to clean the destination directory def clean_destination(destination): @@ -19,19 +21,31 @@ def populate_with_template(template, destination): # Copy the template contents to the destination directory shutil.copytree(template, destination, dirs_exist_ok=True) +# Function to process and fix markdown headers +def process_markdown_headers(content): + """ + Replace escaped front matter delimiters (\---) with standard delimiters (---). + """ + # Replace \--- with --- + content = re.sub(r'\\---', '---', content) + return content + # Function to clean markdown content, using the logic to remove top-level bullet points -def clean_markdown_content(content, uid=None): +def clean_markdown_content(content, uid=None, strip_leading_dash=True): + # First, process markdown headers + content = process_markdown_headers(content) + lines = content.splitlines() processed_lines = [] for line in lines: # Check if it's a top-level item (starting with '- ' and no preceding tabs or spaces) - if line.startswith('- ') and not line.startswith('\t'): + if strip_leading_dash and line.startswith('- ') and not line.startswith('\t'): # Remove the '- ' from the start of the line and left-strip spaces processed_lines.append(line[2:].lstrip()) elif line.startswith('\t'): # Remove the '\t' from the start of the line and left-strip spaces - processed_lines.append(line[1:]) + processed_lines.append(line[1:].lstrip()) else: # Keep other lines (including subtasks) unchanged processed_lines.append(line) @@ -46,7 +60,7 @@ def clean_markdown_content(content, uid=None): return cleaned_markdown.strip() # Function to process course.md and move to destination -def process_course_file(source, destination, uid): +def process_course_file(source, destination, uid, strip_leading_dash): course_md_path = os.path.join(source, 'pages', 'course.md') if not os.path.exists(course_md_path): print(f"File {course_md_path} not found.") @@ -57,7 +71,7 @@ def process_course_file(source, destination, uid): content = f.read() # Clean the markdown content and include the UID in the markdown output - cleaned_content = clean_markdown_content(content, uid=uid) + cleaned_content = clean_markdown_content(content, uid=uid, strip_leading_dash=strip_leading_dash) # Write to the destination directory directly as 'course.md' new_course_md_path = os.path.join(destination, 'course.md') @@ -67,7 +81,7 @@ def process_course_file(source, destination, uid): print(f'Processed {course_md_path} -> {new_course_md_path} (cleaned content, UID added)') # Function to process and move unit files with cleaning (topic.md handling) -def process_and_move_unit_file(unit_file, destination_dir, uid): +def process_and_move_unit_file(unit_file, destination_dir, uid, strip_leading_dash): new_file_path = os.path.join(destination_dir, 'topic.md') # Read and clean content @@ -75,7 +89,7 @@ def process_and_move_unit_file(unit_file, destination_dir, uid): content = f.read() # Clean markdown content using the top-level bullet removal logic - cleaned_content = clean_markdown_content(content, uid=uid) + cleaned_content = clean_markdown_content(content, uid=uid, strip_leading_dash=strip_leading_dash) # Write to the destination as 'topic.md' with open(new_file_path, 'w') as f: @@ -84,7 +98,7 @@ def process_and_move_unit_file(unit_file, destination_dir, uid): print(f'Processed {unit_file} -> {new_file_path} (cleaned top-level bullets, UID added)') # Function to parse the filename and create a new directory structure -def parse_filename_and_move(source, destination, uid): +def parse_filename_and_move(source, destination, uid, separator, strip_leading_dash): pages_dir = os.path.join(source, 'pages') assets_dir = os.path.join(source, 'assets') @@ -100,11 +114,11 @@ def parse_filename_and_move(source, destination, uid): # Skip course.md, handled separately continue - if '___' in file: - # Handle files with '___' in the name by creating nested directories + if separator in file: + # Handle files with separator in the name by creating nested directories original_md_path = os.path.join(root, file) file_stem = file.replace('.md', '') - parts = file_stem.split('___') + parts = file_stem.split(separator) # Create the new directory structure in the destination new_md_dir = os.path.join(destination, *parts[:-1]) @@ -117,7 +131,7 @@ def parse_filename_and_move(source, destination, uid): with open(original_md_path, 'r') as f: content = f.read() - cleaned_content = clean_markdown_content(content, uid=uid) + cleaned_content = clean_markdown_content(content, uid=uid, strip_leading_dash=strip_leading_dash) # Write the cleaned content with open(new_md_path, 'w') as f: @@ -134,7 +148,7 @@ def parse_filename_and_move(source, destination, uid): unit_name = file.replace('.md', '') new_unit_dir = os.path.join(destination, unit_name) os.makedirs(new_unit_dir, exist_ok=True) - process_and_move_unit_file(file_path, new_unit_dir, uid) + process_and_move_unit_file(file_path, new_unit_dir, uid, strip_leading_dash) # Function to move assets referenced in markdown files def move_assets(md_file, assets_dir, new_md_dir): @@ -166,17 +180,37 @@ def move_assets(md_file, assets_dir, new_md_dir): with open(md_file, 'w') as f: f.write(content) +# Function to generate a unique UID based on the current date and time +def generate_uid(): + return datetime.now().strftime("%Y-%m-%d-%H%M%S-uid") + # Main function for the command-line tool def main(): parser = argparse.ArgumentParser(description='Reorganize markdown files and assets.') parser.add_argument('source', type=str, help='Source directory containing the pages and assets.') parser.add_argument('destination', type=str, help='Destination directory for reorganized files.') parser.add_argument('template', type=str, help='Template directory to populate destination before processing.') + parser.add_argument('-c', '--config', type=str, default='config.yaml', help='Path to the configuration file (default: config.yaml)') args = parser.parse_args() + # Load configuration + if os.path.exists(args.config): + with open(args.config, 'r') as config_file: + try: + config = yaml.safe_load(config_file) + separator = config.get('separator', '___') + strip_leading_dash = config.get('strip_leading_dash', True) + except yaml.YAMLError as e: + print(f"Error parsing the config file: {e}") + return + else: + print(f"Config file {args.config} not found. Using default settings.") + separator = '___' + strip_leading_dash = True + # Generate a unique identifier for this version of the script - uid = "2024-09-06-uid789" # Replace with actual UID generation logic if desired + uid = "2024-10-01-uid789" # Replace with actual UID generation logic if desired # Clean destination directory clean_destination(args.destination) @@ -185,10 +219,11 @@ def main(): populate_with_template(args.template, args.destination) # Process course.md specifically - process_course_file(args.source, args.destination, uid) + process_course_file(args.source, args.destination, uid, strip_leading_dash) # Run the file parsing and asset moving - parse_filename_and_move(args.source, args.destination, uid) + parse_filename_and_move(args.source, args.destination, uid, separator, strip_leading_dash) if __name__ == '__main__': main() +