Skip to content

Commit

Permalink
Add support for ignoring files
Browse files Browse the repository at this point in the history
  • Loading branch information
hunyadi committed Sep 30, 2024
1 parent e2b6389 commit eb8447d
Show file tree
Hide file tree
Showing 9 changed files with 161 additions and 11 deletions.
1 change: 1 addition & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
"cSpell.words": [
"blockquote",
"levelname",
"mdignore",
"mmdc",
"scandir",
"webui"
Expand Down
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,12 @@ Provide generated-by prompt text in the Markdown file with a tag:

Alternatively, use the `--generated-by GENERATED_BY` option. The tag takes precedence.

### Ignoring files

Skip files in a directory with rules defined in `.mdignore`. Each rule should occupy a single line. Rules follow the syntax of [fnmatch](https://docs.python.org/3/library/fnmatch.html#fnmatch.fnmatch). Specifically, `?` matches any single character, and `*` matches zero or more characters. For example, use `up-*.md` to exclude Markdown files that start with `up-`. Lines that start with `#` are treated as comments.

Files that don't have the extension `*.md` are skipped automatically. Hidden directories (whose name starts with `.`) are not recursed into.

### Running the tool

You execute the command-line tool `md2conf` to synchronize the Markdown file with Confluence:
Expand Down
13 changes: 8 additions & 5 deletions md2conf/application.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
extract_qualified_id,
read_qualified_id,
)
from .matcher import Matcher, MatcherOptions

LOGGER = logging.getLogger(__name__)

Expand Down Expand Up @@ -89,16 +90,18 @@ def _index_directory(

LOGGER.info(f"Indexing directory: {local_dir}")

matcher = Matcher(MatcherOptions(source=".mdignore", extension="md"), local_dir)

files: List[Path] = []
directories: List[Path] = []
for entry in os.scandir(local_dir):
if matcher.is_excluded(entry.name):
continue

if entry.is_file():
if entry.name.endswith(".md"):
# skip non-markdown files
files.append((Path(local_dir) / entry.name).absolute())
files.append((Path(local_dir) / entry.name).absolute())
elif entry.is_dir():
if not entry.name.startswith("."):
directories.append((Path(local_dir) / entry.name).absolute())
directories.append((Path(local_dir) / entry.name).absolute())

# make page act as parent node in Confluence
parent_id: Optional[ConfluenceQualifiedID] = None
Expand Down
83 changes: 83 additions & 0 deletions md2conf/matcher.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
import os.path
from dataclasses import dataclass
from fnmatch import fnmatch
from pathlib import Path
from typing import Iterable, List, Optional


@dataclass
class MatcherOptions:
"""
Options for checking against a list of exclude/include patterns.
:param source: File name to read exclusion rules from.
:param extension: Extension to narrow down search to.
"""

source: str
extension: Optional[str] = None

def __post_init__(self) -> None:
if self.extension is not None and not self.extension.startswith("."):
self.extension = f".{self.extension}"


class Matcher:
"Compares file and directory names against a list of exclude/include patterns."

options: MatcherOptions
rules: List[str]

def __init__(self, options: MatcherOptions, directory: Path) -> None:
self.options = options
if os.path.exists(directory / options.source):
with open(directory / options.source, "r") as f:
rules = f.read().splitlines()
self.rules = [rule for rule in rules if rule and not rule.startswith("#")]
else:
self.rules = []

def extension_matches(self, name: str) -> bool:
"True if the file name has the expected extension."

return self.options.extension is None or name.endswith(self.options.extension)

def is_excluded(self, name: str) -> bool:
"True if the file or directory name matches any of the exclusion patterns."

if name.startswith("."):
return True

if not self.extension_matches(name):
return True

for rule in self.rules:
if fnmatch(name, rule):
return True
else:
return False

def is_included(self, name: str) -> bool:
"True if the file or directory name matches none of the exclusion patterns."

return not self.is_excluded(name)

def filter(self, items: Iterable[str]) -> List[str]:
"""
Returns only those elements from the input that don't match any of the exclusion rules.
:param items: A list of names to filter.
:returns: A filtered list of names that didn't match any of the exclusion rules.
"""

return [item for item in items if self.is_included(item)]

def scandir(self, path: Path) -> List[str]:
"""
Returns only those entries in a directory whose name doesn't match any of the exclusion rules.
:param path: Directory to scan.
:returns: A filtered list of entries whose name didn't match any of the exclusion rules.
"""

return self.filter(entry.name for entry in os.scandir(path))
13 changes: 8 additions & 5 deletions md2conf/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
ConfluenceQualifiedID,
extract_qualified_id,
)
from .matcher import Matcher, MatcherOptions
from .properties import ConfluenceProperties

LOGGER = logging.getLogger(__name__)
Expand Down Expand Up @@ -69,16 +70,18 @@ def _index_directory(

LOGGER.info(f"Indexing directory: {local_dir}")

matcher = Matcher(MatcherOptions(source=".mdignore", extension="md"), local_dir)

files: List[Path] = []
directories: List[Path] = []
for entry in os.scandir(local_dir):
if matcher.is_excluded(entry.name):
continue

if entry.is_file():
if entry.name.endswith(".md"):
# skip non-markdown files
files.append((Path(local_dir) / entry.name).absolute())
files.append((Path(local_dir) / entry.name).absolute())
elif entry.is_dir():
if not entry.name.startswith("."):
directories.append((Path(local_dir) / entry.name).absolute())
directories.append((Path(local_dir) / entry.name).absolute())

for doc in files:
metadata = self._get_page(doc)
Expand Down
3 changes: 3 additions & 0 deletions tests/source/.mdignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# skip comments

[i][g][n][!abcdefghijklmn]?[e].md*
3 changes: 3 additions & 0 deletions tests/source/ignore.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
## Ignored files

This Markdown document is skipped as per the rules defined in `.mdignore`.
7 changes: 6 additions & 1 deletion tests/test_conversion.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
elements_from_string,
elements_to_string,
)
from md2conf.matcher import Matcher, MatcherOptions
from md2conf.mermaid import has_mmdc

logging.basicConfig(
Expand Down Expand Up @@ -50,8 +51,12 @@ def tearDown(self) -> None:
shutil.rmtree(self.out_dir)

def test_markdown(self) -> None:
matcher = Matcher(
MatcherOptions(source=".mdignore", extension="md"), self.source_dir
)

for entry in os.scandir(self.source_dir):
if not entry.name.endswith(".md"):
if matcher.is_excluded(entry.name):
continue

name, _ = os.path.splitext(entry.name)
Expand Down
43 changes: 43 additions & 0 deletions tests/test_matcher.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
import logging
import os
import os.path
import unittest
from pathlib import Path

from md2conf.matcher import Matcher, MatcherOptions

logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(levelname)s - %(funcName)s [%(lineno)d] - %(message)s",
)


class TestMatcher(unittest.TestCase):
def test_extension(self) -> None:
directory = Path(os.path.dirname(__file__))
expected = [
entry.name for entry in os.scandir(directory) if entry.name.endswith(".py")
]

options = MatcherOptions(".mdignore", ".py")
matcher = Matcher(options, directory)
actual = matcher.scandir(directory)

self.assertCountEqual(expected, actual)

def test_rules(self) -> None:
directory = Path(os.path.dirname(__file__)) / "source"
expected = [
entry.name for entry in os.scandir(directory) if entry.name.endswith(".md")
]
expected.remove("ignore.md")

options = MatcherOptions(".mdignore", ".md")
matcher = Matcher(options, directory)
actual = matcher.scandir(directory)

self.assertCountEqual(expected, actual)


if __name__ == "__main__":
unittest.main()

0 comments on commit eb8447d

Please sign in to comment.