From 0759ddc1c19c2efe8d1a1711792fd7f96c60a326 Mon Sep 17 00:00:00 2001 From: Levente Hunyadi Date: Thu, 19 Sep 2024 12:14:49 +0200 Subject: [PATCH] Add ability to index and synchronize directories recursively --- .vscode/settings.json | 3 +- md2conf/application.py | 83 ++++++++++++++++++++++++++++++------------ 2 files changed, 61 insertions(+), 25 deletions(-) diff --git a/.vscode/settings.json b/.vscode/settings.json index fe687fc..96daaa5 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -20,6 +20,7 @@ "cSpell.words": [ "blockquote", "levelname", - "mmdc" + "mmdc", + "scandir" ] } diff --git a/md2conf/application.py b/md2conf/application.py index 7b72269..290f972 100644 --- a/md2conf/application.py +++ b/md2conf/application.py @@ -42,31 +42,64 @@ def synchronize_page(self, page_path: Path) -> None: self._synchronize_page(page_path, {}) - def synchronize_directory(self, local_dir: Path) -> None: - "Synchronizes a directory of Markdown pages with Confluence." + def _get_qualified_id(self, absolute_path: Path) -> Optional[str]: + with open(absolute_path, "r", encoding="utf-8") as f: + document = f.read() - page_metadata: Dict[Path, ConfluencePageMetadata] = {} - LOGGER.info(f"Synchronizing directory: {local_dir}") + qualified_id, _ = extract_qualified_id(document) + if qualified_id is not None: + return qualified_id.page_id + else: + return None - # Step 1: build index of all page metadata - # NOTE: Pathlib.walk() is implemented only in Python 3.12+ - # so sticking for old os.walk - for root, directories, files in os.walk(local_dir): - for file_name in files: - # Reconstitute Path object back - docfile = (Path(root) / file_name).absolute() + def _index_directory( + self, + local_dir: Path, + root_id: Optional[str], + page_metadata: Dict[Path, ConfluencePageMetadata], + ) -> None: + "Indexes Markdown files in a directory recursively." - # Skip non-markdown files - if docfile.suffix.lower() != ".md": - continue - metadata = self._get_or_create_page(docfile) + LOGGER.info(f"Synchronizing directory: {local_dir}") - LOGGER.debug(f"indexed {docfile} with metadata: {metadata}") - page_metadata[docfile] = metadata + files: list[Path] = [] + directories: list[Path] = [] + for entry in os.scandir(local_dir): + if entry.is_file(): + if entry.name.endswith(".md"): + # skip non-markdown files + files.append((Path(local_dir) / entry.name).absolute()) + elif entry.is_dir(): + if not entry.name.startswith("."): + directories.append((Path(local_dir) / entry.name).absolute()) + + # make page act as parent node in Confluence + parent_id: Optional[str] = None + if "index.md" in files: + parent_id = self._get_qualified_id(Path(local_dir) / "index.md") + elif "README.md" in files: + parent_id = self._get_qualified_id(Path(local_dir) / "README.md") + + if parent_id is None: + parent_id = root_id + + for doc in files: + metadata = self._get_or_create_page(doc, parent_id) + LOGGER.debug(f"indexed {doc} with metadata: {metadata}") + page_metadata[doc] = metadata + + for directory in directories: + self._index_directory(Path(local_dir) / directory, parent_id, page_metadata) + + def synchronize_directory(self, local_dir: Path) -> None: + "Synchronizes a directory of Markdown pages with Confluence." - LOGGER.info(f"indexed {len(page_metadata)} pages") + # Step 1: build index of all page metadata + page_metadata: Dict[Path, ConfluencePageMetadata] = {} + self._index_directory(local_dir, self.options.root_page_id, page_metadata) + LOGGER.info(f"indexed {len(page_metadata)} page(s)") - # Step 2: Convert each page + # Step 2: convert each page for page_path in page_metadata.keys(): self._synchronize_page(page_path, page_metadata) @@ -87,7 +120,11 @@ def _synchronize_page( self._update_document(document, base_path) def _get_or_create_page( - self, absolute_path: Path, title: Optional[str] = None + self, + absolute_path: Path, + parent_id: Optional[str], + *, + title: Optional[str] = None, ) -> ConfluencePageMetadata: """ Creates a new Confluence page if no page is linked in the Markdown document. @@ -103,7 +140,7 @@ def _get_or_create_page( qualified_id.page_id, space_key=qualified_id.space_key ) else: - if self.options.root_page_id is None: + if parent_id is None: raise ValueError( "expected: Confluence page ID to act as parent for Markdown files with no linked Confluence page" ) @@ -112,9 +149,7 @@ def _get_or_create_page( if title is None: title = absolute_path.stem - confluence_page = self.api.get_or_create_page( - title, self.options.root_page_id - ) + confluence_page = self.api.get_or_create_page(title, parent_id) self._update_markdown( absolute_path, document,