diff --git a/.vscode/settings.json b/.vscode/settings.json index a9347ec..9c70de5 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -20,6 +20,7 @@ "cSpell.words": [ "blockquote", "levelname", + "mdignore", "mmdc", "scandir", "webui" diff --git a/README.md b/README.md index cdcd686..685e7b7 100644 --- a/README.md +++ b/README.md @@ -113,6 +113,12 @@ Provide generated-by prompt text in the Markdown file with a tag: Alternatively, use the `--generated-by GENERATED_BY` option. The tag takes precedence. +### Ignoring files + +Skip files in a directory with rules defined in `.mdignore`. Each rule should occupy a single line. Rules follow the syntax of [fnmatch](https://docs.python.org/3/library/fnmatch.html#fnmatch.fnmatch). Specifically, `?` matches any single character, and `*` matches zero or more characters. For example, use `up-*.md` to exclude Markdown files that start with `up-`. Lines that start with `#` are treated as comments. + +Files that don't have the extension `*.md` are skipped automatically. Hidden directories (whose name starts with `.`) are not recursed into. + ### Running the tool You execute the command-line tool `md2conf` to synchronize the Markdown file with Confluence: diff --git a/md2conf/application.py b/md2conf/application.py index d47fe81..a0bec64 100644 --- a/md2conf/application.py +++ b/md2conf/application.py @@ -13,6 +13,7 @@ extract_qualified_id, read_qualified_id, ) +from .matcher import Matcher, MatcherOptions LOGGER = logging.getLogger(__name__) @@ -89,16 +90,18 @@ def _index_directory( LOGGER.info(f"Indexing directory: {local_dir}") + matcher = Matcher(MatcherOptions(source=".mdignore", extension="md"), local_dir) + files: List[Path] = [] directories: List[Path] = [] for entry in os.scandir(local_dir): + if matcher.is_excluded(entry.name): + continue + if entry.is_file(): - if entry.name.endswith(".md"): - # skip non-markdown files - files.append((Path(local_dir) / entry.name).absolute()) + files.append((Path(local_dir) / entry.name).absolute()) elif entry.is_dir(): - if not entry.name.startswith("."): - directories.append((Path(local_dir) / entry.name).absolute()) + directories.append((Path(local_dir) / entry.name).absolute()) # make page act as parent node in Confluence parent_id: Optional[ConfluenceQualifiedID] = None diff --git a/md2conf/matcher.py b/md2conf/matcher.py new file mode 100644 index 0000000..7418de6 --- /dev/null +++ b/md2conf/matcher.py @@ -0,0 +1,83 @@ +import os.path +from dataclasses import dataclass +from fnmatch import fnmatch +from pathlib import Path +from typing import Iterable, List, Optional + + +@dataclass +class MatcherOptions: + """ + Options for checking against a list of exclude/include patterns. + + :param source: File name to read exclusion rules from. + :param extension: Extension to narrow down search to. + """ + + source: str + extension: Optional[str] = None + + def __post_init__(self) -> None: + if self.extension is not None and not self.extension.startswith("."): + self.extension = f".{self.extension}" + + +class Matcher: + "Compares file and directory names against a list of exclude/include patterns." + + options: MatcherOptions + rules: List[str] + + def __init__(self, options: MatcherOptions, directory: Path) -> None: + self.options = options + if os.path.exists(directory / options.source): + with open(directory / options.source, "r") as f: + rules = f.read().splitlines() + self.rules = [rule for rule in rules if rule and not rule.startswith("#")] + else: + self.rules = [] + + def extension_matches(self, name: str) -> bool: + "True if the file name has the expected extension." + + return self.options.extension is None or name.endswith(self.options.extension) + + def is_excluded(self, name: str) -> bool: + "True if the file or directory name matches any of the exclusion patterns." + + if name.startswith("."): + return True + + if not self.extension_matches(name): + return True + + for rule in self.rules: + if fnmatch(name, rule): + return True + else: + return False + + def is_included(self, name: str) -> bool: + "True if the file or directory name matches none of the exclusion patterns." + + return not self.is_excluded(name) + + def filter(self, items: Iterable[str]) -> List[str]: + """ + Returns only those elements from the input that don't match any of the exclusion rules. + + :param items: A list of names to filter. + :returns: A filtered list of names that didn't match any of the exclusion rules. + """ + + return [item for item in items if self.is_included(item)] + + def scandir(self, path: Path) -> List[str]: + """ + Returns only those entries in a directory whose name doesn't match any of the exclusion rules. + + :param path: Directory to scan. + :returns: A filtered list of entries whose name didn't match any of the exclusion rules. + """ + + return self.filter(entry.name for entry in os.scandir(path)) diff --git a/md2conf/processor.py b/md2conf/processor.py index 467734b..506c9be 100644 --- a/md2conf/processor.py +++ b/md2conf/processor.py @@ -11,6 +11,7 @@ ConfluenceQualifiedID, extract_qualified_id, ) +from .matcher import Matcher, MatcherOptions from .properties import ConfluenceProperties LOGGER = logging.getLogger(__name__) @@ -69,16 +70,18 @@ def _index_directory( LOGGER.info(f"Indexing directory: {local_dir}") + matcher = Matcher(MatcherOptions(source=".mdignore", extension="md"), local_dir) + files: List[Path] = [] directories: List[Path] = [] for entry in os.scandir(local_dir): + if matcher.is_excluded(entry.name): + continue + if entry.is_file(): - if entry.name.endswith(".md"): - # skip non-markdown files - files.append((Path(local_dir) / entry.name).absolute()) + files.append((Path(local_dir) / entry.name).absolute()) elif entry.is_dir(): - if not entry.name.startswith("."): - directories.append((Path(local_dir) / entry.name).absolute()) + directories.append((Path(local_dir) / entry.name).absolute()) for doc in files: metadata = self._get_page(doc) diff --git a/tests/source/.mdignore b/tests/source/.mdignore new file mode 100644 index 0000000..2abe71e --- /dev/null +++ b/tests/source/.mdignore @@ -0,0 +1,3 @@ +# skip comments + +[i][g][n][!abcdefghijklmn]?[e].md* diff --git a/tests/source/ignore.md b/tests/source/ignore.md new file mode 100644 index 0000000..73cc87f --- /dev/null +++ b/tests/source/ignore.md @@ -0,0 +1,3 @@ +## Ignored files + +This Markdown document is skipped as per the rules defined in `.mdignore`. diff --git a/tests/test_conversion.py b/tests/test_conversion.py index 4333d86..a5c66a2 100644 --- a/tests/test_conversion.py +++ b/tests/test_conversion.py @@ -12,6 +12,7 @@ elements_from_string, elements_to_string, ) +from md2conf.matcher import Matcher, MatcherOptions from md2conf.mermaid import has_mmdc logging.basicConfig( @@ -50,8 +51,12 @@ def tearDown(self) -> None: shutil.rmtree(self.out_dir) def test_markdown(self) -> None: + matcher = Matcher( + MatcherOptions(source=".mdignore", extension="md"), self.source_dir + ) + for entry in os.scandir(self.source_dir): - if not entry.name.endswith(".md"): + if matcher.is_excluded(entry.name): continue name, _ = os.path.splitext(entry.name) diff --git a/tests/test_matcher.py b/tests/test_matcher.py new file mode 100644 index 0000000..d943929 --- /dev/null +++ b/tests/test_matcher.py @@ -0,0 +1,43 @@ +import logging +import os +import os.path +import unittest +from pathlib import Path + +from md2conf.matcher import Matcher, MatcherOptions + +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s - %(levelname)s - %(funcName)s [%(lineno)d] - %(message)s", +) + + +class TestMatcher(unittest.TestCase): + def test_extension(self) -> None: + directory = Path(os.path.dirname(__file__)) + expected = [ + entry.name for entry in os.scandir(directory) if entry.name.endswith(".py") + ] + + options = MatcherOptions(".mdignore", ".py") + matcher = Matcher(options, directory) + actual = matcher.scandir(directory) + + self.assertCountEqual(expected, actual) + + def test_rules(self) -> None: + directory = Path(os.path.dirname(__file__)) / "source" + expected = [ + entry.name for entry in os.scandir(directory) if entry.name.endswith(".md") + ] + expected.remove("ignore.md") + + options = MatcherOptions(".mdignore", ".md") + matcher = Matcher(options, directory) + actual = matcher.scandir(directory) + + self.assertCountEqual(expected, actual) + + +if __name__ == "__main__": + unittest.main()