diff --git a/.vscode/settings.json b/.vscode/settings.json index fe687fc..96daaa5 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -20,6 +20,7 @@ "cSpell.words": [ "blockquote", "levelname", - "mmdc" + "mmdc", + "scandir" ] } diff --git a/md2conf/application.py b/md2conf/application.py index 7b72269..fa2935f 100644 --- a/md2conf/application.py +++ b/md2conf/application.py @@ -42,33 +42,67 @@ def synchronize_page(self, page_path: Path) -> None: self._synchronize_page(page_path, {}) - def synchronize_directory(self, local_dir: Path) -> None: - "Synchronizes a directory of Markdown pages with Confluence." + def _get_qualified_id(self, absolute_path: Path) -> Optional[str]: + with open(absolute_path, "r", encoding="utf-8") as f: + document = f.read() - page_metadata: Dict[Path, ConfluencePageMetadata] = {} - LOGGER.info(f"Synchronizing directory: {local_dir}") + qualified_id, _ = extract_qualified_id(document) + if qualified_id is not None: + return qualified_id.page_id + else: + return None - # Step 1: build index of all page metadata - # NOTE: Pathlib.walk() is implemented only in Python 3.12+ - # so sticking for old os.walk - for root, directories, files in os.walk(local_dir): - for file_name in files: - # Reconstitute Path object back - docfile = (Path(root) / file_name).absolute() + def _index_directory( + self, + local_dir: Path, + root_id: Optional[str], + page_metadata: Dict[Path, ConfluencePageMetadata], + ) -> None: + "Indexes Markdown files in a directory recursively." - # Skip non-markdown files - if docfile.suffix.lower() != ".md": - continue - metadata = self._get_or_create_page(docfile) + LOGGER.info(f"Synchronizing directory: {local_dir}") - LOGGER.debug(f"indexed {docfile} with metadata: {metadata}") - page_metadata[docfile] = metadata + files: list[Path] = [] + directories: list[Path] = [] + for entry in os.scandir(local_dir): + if entry.is_file(): + if entry.name.endswith(".md"): + # skip non-markdown files + files.append((Path(local_dir) / entry.name).absolute()) + elif entry.is_dir(): + if not entry.name.startswith("."): + directories.append((Path(local_dir) / entry.name).absolute()) + + # make page act as parent node in Confluence + parent_id: Optional[str] = None + if "index.md" in files: + parent_id = self._get_qualified_id(Path(local_dir) / "index.md") + elif "README.md" in files: + parent_id = self._get_qualified_id(Path(local_dir) / "README.md") + + if parent_id is None: + parent_id = root_id + + for doc in files: + metadata = self._get_or_create_page(doc, parent_id) + LOGGER.debug(f"indexed {doc} with metadata: {metadata}") + page_metadata[doc] = metadata + + for directory in directories: + self._index_directory(Path(local_dir) / directory, parent_id, page_metadata) LOGGER.info(f"indexed {len(page_metadata)} pages") - # Step 2: Convert each page - for page_path in page_metadata.keys(): - self._synchronize_page(page_path, page_metadata) + def synchronize_directory(self, local_dir: Path) -> None: + "Synchronizes a directory of Markdown pages with Confluence." + + # Step 1: build index of all page metadata + metadata: Dict[Path, ConfluencePageMetadata] = {} + self._index_directory(local_dir, self.options.root_page_id, metadata) + + # Step 2: convert each page + for page_path in metadata.keys(): + self._synchronize_page(page_path, metadata) def _synchronize_page( self, @@ -87,7 +121,11 @@ def _synchronize_page( self._update_document(document, base_path) def _get_or_create_page( - self, absolute_path: Path, title: Optional[str] = None + self, + absolute_path: Path, + parent_id: Optional[str], + *, + title: Optional[str] = None, ) -> ConfluencePageMetadata: """ Creates a new Confluence page if no page is linked in the Markdown document. @@ -103,7 +141,7 @@ def _get_or_create_page( qualified_id.page_id, space_key=qualified_id.space_key ) else: - if self.options.root_page_id is None: + if parent_id is None: raise ValueError( "expected: Confluence page ID to act as parent for Markdown files with no linked Confluence page" ) @@ -112,9 +150,7 @@ def _get_or_create_page( if title is None: title = absolute_path.stem - confluence_page = self.api.get_or_create_page( - title, self.options.root_page_id - ) + confluence_page = self.api.get_or_create_page(title, parent_id) self._update_markdown( absolute_path, document,