diff --git a/cycode/cli/files_collector/path_documents.py b/cycode/cli/files_collector/path_documents.py index 27d24cd8..14f88888 100644 --- a/cycode/cli/files_collector/path_documents.py +++ b/cycode/cli/files_collector/path_documents.py @@ -1,7 +1,5 @@ import os -from typing import TYPE_CHECKING, Iterable, List, Optional, Tuple - -import pathspec +from typing import TYPE_CHECKING, List, Tuple from cycode.cli.files_collector.excluder import exclude_irrelevant_files from cycode.cli.files_collector.iac.tf_content_generator import ( @@ -30,7 +28,7 @@ def _get_all_existing_files_in_directory(path: str, *, walk_with_ignore_patterns return files -def _get_relevant_files_in_path(path: str, exclude_patterns: Optional[Iterable[str]] = None) -> List[str]: +def _get_relevant_files_in_path(path: str) -> List[str]: absolute_path = get_absolute_path(path) if not os.path.isfile(absolute_path) and not os.path.isdir(absolute_path): @@ -40,11 +38,6 @@ def _get_relevant_files_in_path(path: str, exclude_patterns: Optional[Iterable[s return [absolute_path] file_paths = _get_all_existing_files_in_directory(absolute_path) - - if exclude_patterns: - path_spec = pathspec.PathSpec.from_lines(pathspec.patterns.GitWildMatchPattern, exclude_patterns) - file_paths = path_spec.match_files(file_paths, negate=True) - return [file_path for file_path in file_paths if os.path.isfile(file_path)] diff --git a/cycode/cli/files_collector/walk_ignore.py b/cycode/cli/files_collector/walk_ignore.py index 21299bbc..76d04366 100644 --- a/cycode/cli/files_collector/walk_ignore.py +++ b/cycode/cli/files_collector/walk_ignore.py @@ -1,17 +1,15 @@ import os -from collections import defaultdict from typing import Generator, Iterable, List, Tuple -import pathspec -from pathspec.util import StrPath - -from cycode.cli.utils.path_utils import get_file_content +from cycode.cli.utils.ignore_utils import IgnoreFilterManager from cycode.cyclient import logger -_SUPPORTED_IGNORE_PATTERN_FILES = {'.gitignore', '.cycodeignore'} +_SUPPORTED_IGNORE_PATTERN_FILES = { # oneday we will bring .cycodeignore or something like that + '.gitignore', +} _DEFAULT_GLOBAL_IGNORE_PATTERNS = [ - '**/.git', - '**/.cycode', + '.git', + '.cycode', ] @@ -35,44 +33,10 @@ def _collect_top_level_ignore_files(path: str) -> List[str]: return ignore_files -def _get_global_ignore_patterns(path: str) -> List[str]: - ignore_patterns = _DEFAULT_GLOBAL_IGNORE_PATTERNS.copy() - for ignore_file in _collect_top_level_ignore_files(path): - file_patterns = get_file_content(ignore_file).splitlines() - ignore_patterns.extend(file_patterns) - return ignore_patterns - - -def _should_include_path(ignore_patterns: List[str], path: StrPath) -> bool: - path_spec = pathspec.PathSpec.from_lines(pathspec.patterns.GitWildMatchPattern, ignore_patterns) - return not path_spec.match_file(path) # works with both files and directories; negative match - - def walk_ignore(path: str) -> Generator[Tuple[str, List[str], List[str]], None, None]: - global_ignore_patterns = _get_global_ignore_patterns(path) - path_to_ignore_patterns = defaultdict(list) - - for dirpath, dirnames, filenames in os.walk(path, topdown=True): - # finds and processes ignore files first to get the patterns - for filename in filenames: - filepath = os.path.join(dirpath, filename) - if filename in _SUPPORTED_IGNORE_PATTERN_FILES: - logger.debug('Apply ignore file: %s', filepath) - - parent_dir = os.path.dirname(dirpath) - if dirpath not in path_to_ignore_patterns and parent_dir in path_to_ignore_patterns: - # inherit ignore patterns from parent directory on first occurrence - logger.debug('Inherit ignore patterns: %s', {'inherit_from': parent_dir, 'inherit_to': dirpath}) - path_to_ignore_patterns[dirpath].extend(path_to_ignore_patterns[parent_dir]) - - # always read ignore patterns for the current directory - path_to_ignore_patterns[dirpath].extend(get_file_content(filepath).splitlines()) - - ignore_patterns = global_ignore_patterns + path_to_ignore_patterns.get(dirpath, []) - - # decrease recursion depth of os.walk() because of topdown=True by changing the list in-place - # slicing ([:]) is mandatory to change dict in-place! - dirnames[:] = [d for d in dirnames if _should_include_path(ignore_patterns, os.path.join(dirpath, d))] - filenames[:] = [f for f in filenames if _should_include_path(ignore_patterns, os.path.join(dirpath, f))] - - yield dirpath, dirnames, filenames + ignore_filter_manager = IgnoreFilterManager.build( + path=path, + global_ignore_file_paths=_collect_top_level_ignore_files(path), + global_patterns=_DEFAULT_GLOBAL_IGNORE_PATTERNS, + ) + yield from ignore_filter_manager.walk() diff --git a/cycode/cli/utils/ignore_utils.py b/cycode/cli/utils/ignore_utils.py index a5edccc9..329fa055 100644 --- a/cycode/cli/utils/ignore_utils.py +++ b/cycode/cli/utils/ignore_utils.py @@ -132,12 +132,11 @@ def read_ignore_patterns(f: BinaryIO) -> Iterable[bytes]: f: File-like object to read from Returns: List of patterns """ - for line in f: line = line.rstrip(b'\r\n') # Ignore blank lines, they're used for readability. - if not line: + if not line.strip(): continue if line.startswith(b'#'): @@ -397,7 +396,9 @@ def walk(self, **kwargs) -> Generator[Tuple[str, List[str], List[str]], None, No # decrease recursion depth of os.walk() by ignoring subdirectories because of topdown=True # slicing ([:]) is mandatory to change dict in-place! - dirnames[:] = [dirname for dirname in dirnames if not self.is_ignored(os.path.join(rel_dirpath, dirname, ''))] + dirnames[:] = [ + dirname for dirname in dirnames if not self.is_ignored(os.path.join(rel_dirpath, dirname, '')) + ] # remove ignored files filenames = [os.path.basename(f) for f in filenames if not self.is_ignored(os.path.join(rel_dirpath, f))] @@ -430,6 +431,13 @@ def build( if not global_patterns: global_patterns = [] + global_ignore_file_paths.extend( + [ + os.path.join('.git', 'info', 'exclude'), # relative to an input path, so within the repo + os.path.expanduser(os.path.join('~', '.config', 'git', 'ignore')), # absolute + ] + ) + if hasattr(path, '__fspath__'): path = path.__fspath__() diff --git a/poetry.lock b/poetry.lock index b4d0fc65..c97b44a9 100644 --- a/poetry.lock +++ b/poetry.lock @@ -490,17 +490,6 @@ files = [ {file = "patch-ng-1.18.1.tar.gz", hash = "sha256:52fd46ee46f6c8667692682c1fd7134edc65a2d2d084ebec1d295a6087fc0291"}, ] -[[package]] -name = "pathspec" -version = "0.12.1" -description = "Utility library for gitignore style pattern matching of file paths." -optional = false -python-versions = ">=3.8" -files = [ - {file = "pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08"}, - {file = "pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712"}, -] - [[package]] name = "pefile" version = "2024.8.26" @@ -1050,4 +1039,4 @@ type = ["pytest-mypy"] [metadata] lock-version = "2.0" python-versions = ">=3.8,<3.14" -content-hash = "b91b8db1d8946ee5f22f73cc2f2339e7969c9e3dad41a266c94f9091a1b1e33c" +content-hash = "e91a6f9b7e080cea351f9073ef333afe026df6172b95fba5477af67f15c96000" diff --git a/pyproject.toml b/pyproject.toml index 1754ed5d..42511ec8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,7 +32,6 @@ click = ">=8.1.0,<8.2.0" colorama = ">=0.4.3,<0.5.0" pyyaml = ">=6.0,<7.0" marshmallow = ">=3.15.0,<3.23.0" # 3.23 dropped support for Python 3.8 -pathspec = ">=0.11.1,<0.13.0" gitpython = ">=3.1.30,<3.2.0" arrow = ">=1.0.0,<1.4.0" binaryornot = ">=0.4.4,<0.5.0" diff --git a/tests/cli/files_collector/test_walk_ignore.py b/tests/cli/files_collector/test_walk_ignore.py index 87d6a9de..fd2612d5 100644 --- a/tests/cli/files_collector/test_walk_ignore.py +++ b/tests/cli/files_collector/test_walk_ignore.py @@ -4,7 +4,6 @@ from cycode.cli.files_collector.walk_ignore import ( _collect_top_level_ignore_files, - _get_global_ignore_patterns, _walk_to_top, walk_ignore, ) @@ -52,11 +51,9 @@ def _create_mocked_file_structure(fs: 'FakeFilesystem') -> None: fs.create_dir('/home/user/project/.git') fs.create_file('/home/user/project/.git/HEAD') - fs.create_file('/home/user/project/.gitignore', contents='*.pyc') + fs.create_file('/home/user/project/.gitignore', contents='*.pyc\n*.log') fs.create_file('/home/user/project/ignored.pyc') fs.create_file('/home/user/project/presented.txt') - - fs.create_file('/home/user/project/.cycodeignore', contents='*.log') fs.create_file('/home/user/project/ignored2.log') fs.create_file('/home/user/project/ignored2.pyc') fs.create_file('/home/user/project/presented2.txt') @@ -75,45 +72,27 @@ def test_collect_top_level_ignore_files(fs: 'FakeFilesystem') -> None: # Test with path inside the project path = normpath('/home/user/project/subproject') ignore_files = _collect_top_level_ignore_files(path) - - assert len(ignore_files) == 3 + assert len(ignore_files) == 2 assert normpath('/home/user/project/subproject/.gitignore') in ignore_files assert normpath('/home/user/project/.gitignore') in ignore_files - assert normpath('/home/user/project/.cycodeignore') in ignore_files - - # Test with a path that does not have any ignore files - fs.remove('/home/user/project/.gitignore') - path = normpath('/home/user') - ignore_files = _collect_top_level_ignore_files(path) - - assert len(ignore_files) == 0 # Test with path at the top level with no ignore files path = normpath('/home/user/.git') ignore_files = _collect_top_level_ignore_files(path) - assert len(ignore_files) == 0 # Test with path at the top level with a .gitignore path = normpath('/home/user/project') ignore_files = _collect_top_level_ignore_files(path) - assert len(ignore_files) == 1 - assert normpath('/home/user/project/.cycodeignore') in ignore_files - - -def test_get_global_ignore_patterns(fs: 'FakeFilesystem') -> None: - _create_mocked_file_structure(fs) - ignore_patterns = _get_global_ignore_patterns('/home/user/project/subproject') + assert normpath('/home/user/project/.gitignore') in ignore_files - assert len(ignore_patterns) == 5 - # default global: - assert '**/.git' in ignore_patterns - assert '**/.cycode' in ignore_patterns - # additional: - assert '*.txt' in ignore_patterns - assert '*.pyc' in ignore_patterns - assert '*.log' in ignore_patterns + # Test with a path that does not have any ignore files + fs.remove('/home/user/project/.gitignore') + path = normpath('/home/user') + ignore_files = _collect_top_level_ignore_files(path) + assert len(ignore_files) == 0 + fs.create_file('/home/user/project/.gitignore', contents='*.pyc\n*.log') def _collect_walk_ignore_files(path: str) -> List[str]: @@ -131,7 +110,7 @@ def test_walk_ignore(fs: 'FakeFilesystem') -> None: path = normpath('/home/user/project') result = _collect_walk_ignore_files(path) - assert len(result) == 6 + assert len(result) == 5 # ignored globally by default: assert normpath('/home/user/project/.git/HEAD') not in result assert normpath('/home/user/project/.cycode/config.yaml') not in result @@ -146,7 +125,6 @@ def test_walk_ignore(fs: 'FakeFilesystem') -> None: assert normpath('/home/user/project/subproject/ignored.log') not in result # presented after both .gitignore and .cycodeignore: assert normpath('/home/user/project/.gitignore') in result - assert normpath('/home/user/project/.cycodeignore') in result assert normpath('/home/user/project/subproject/.gitignore') in result assert normpath('/home/user/project/presented.txt') in result assert normpath('/home/user/project/presented2.txt') in result diff --git a/tests/utils/test_ignore_utils.py b/tests/utils/test_ignore_utils.py new file mode 100644 index 00000000..563c11a9 --- /dev/null +++ b/tests/utils/test_ignore_utils.py @@ -0,0 +1,176 @@ +# Copyright (C) 2017 Jelmer Vernooij +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Modified (rewritten to pytest + pyfakefs) from https://github.com/jelmer/dulwich/blob/master/tests/test_ignore.py + +import os +import re +from io import BytesIO +from typing import TYPE_CHECKING + +import pytest + +from cycode.cli.utils.ignore_utils import ( + IgnoreFilter, + IgnoreFilterManager, + Pattern, + match_pattern, + read_ignore_patterns, + translate, +) + +if TYPE_CHECKING: + from pyfakefs.fake_filesystem import FakeFilesystem + +POSITIVE_MATCH_TESTS = [ + (b'foo.c', b'*.c'), + (b'.c', b'*.c'), + (b'foo/foo.c', b'*.c'), + (b'foo/foo.c', b'foo.c'), + (b'foo.c', b'/*.c'), + (b'foo.c', b'/foo.c'), + (b'foo.c', b'foo.c'), + (b'foo.c', b'foo.[ch]'), + (b'foo/bar/bla.c', b'foo/**'), + (b'foo/bar/bla/blie.c', b'foo/**/blie.c'), + (b'foo/bar/bla.c', b'**/bla.c'), + (b'bla.c', b'**/bla.c'), + (b'foo/bar', b'foo/**/bar'), + (b'foo/bla/bar', b'foo/**/bar'), + (b'foo/bar/', b'bar/'), + (b'foo/bar/', b'bar'), + (b'foo/bar/something', b'foo/bar/*'), +] + +NEGATIVE_MATCH_TESTS = [ + (b'foo.c', b'foo.[dh]'), + (b'foo/foo.c', b'/foo.c'), + (b'foo/foo.c', b'/*.c'), + (b'foo/bar/', b'/bar/'), + (b'foo/bar/', b'foo/bar/*'), + (b'foo/bar', b'foo?bar'), +] + +TRANSLATE_TESTS = [ + (b'*.c', b'(?ms)(.*/)?[^/]*\\.c/?\\Z'), + (b'foo.c', b'(?ms)(.*/)?foo\\.c/?\\Z'), + (b'/*.c', b'(?ms)[^/]*\\.c/?\\Z'), + (b'/foo.c', b'(?ms)foo\\.c/?\\Z'), + (b'foo.c', b'(?ms)(.*/)?foo\\.c/?\\Z'), + (b'foo.[ch]', b'(?ms)(.*/)?foo\\.[ch]/?\\Z'), + (b'bar/', b'(?ms)(.*/)?bar\\/\\Z'), + (b'foo/**', b'(?ms)foo(/.*)?/?\\Z'), + (b'foo/**/blie.c', b'(?ms)foo(/.*)?\\/blie\\.c/?\\Z'), + (b'**/bla.c', b'(?ms)(.*/)?bla\\.c/?\\Z'), + (b'foo/**/bar', b'(?ms)foo(/.*)?\\/bar/?\\Z'), + (b'foo/bar/*', b'(?ms)foo\\/bar\\/[^/]+/?\\Z'), + (b'/foo\\[bar\\]', b'(?ms)foo\\[bar\\]/?\\Z'), + (b'/foo[bar]', b'(?ms)foo[bar]/?\\Z'), + (b'/foo[0-9]', b'(?ms)foo[0-9]/?\\Z'), +] + + +@pytest.mark.usefixtures('fs') +class TestIgnoreFiles: + def test_translate(self) -> None: + for pattern, regex in TRANSLATE_TESTS: + if re.escape(b'/') == b'/': + regex = regex.replace(b'\\/', b'/') + assert ( + translate(pattern) == regex + ), f'orig pattern: {pattern!r}, regex: {translate(pattern)!r}, expected: {regex!r}' + + def test_read_file(self) -> None: + f = BytesIO( + b""" +# a comment +\x20\x20 +# and an empty line: + +\\#not a comment +!negative +with trailing whitespace +with escaped trailing whitespace\\ +""" # noqa: W291 (Trailing whitespace) + ) + assert list(read_ignore_patterns(f)) == [ + b'\\#not a comment', + b'!negative', + b'with trailing whitespace', + b'with escaped trailing whitespace ', + ] + + def test_match_patterns_positive(self) -> None: + for path, pattern in POSITIVE_MATCH_TESTS: + assert match_pattern(path, pattern), f'path: {path!r}, pattern: {pattern!r}' + + def test_match_patterns_negative(self) -> None: + for path, pattern in NEGATIVE_MATCH_TESTS: + assert not match_pattern(path, pattern), f'path: {path!r}, pattern: {pattern!r}' + + def test_ignore_filter_inclusion(self) -> None: + ignore_filter = IgnoreFilter([b'a.c', b'b.c']) + assert ignore_filter.is_ignored(b'a.c') + assert ignore_filter.is_ignored(b'c.c') is None + assert list(ignore_filter.find_matching(b'a.c')) == [Pattern(b'a.c')] + assert list(ignore_filter.find_matching(b'c.c')) == [] + + def test_ignore_filter_exclusion(self) -> None: + ignore_filter = IgnoreFilter([b'a.c', b'b.c', b'!c.c']) + assert not ignore_filter.is_ignored(b'c.c') + assert ignore_filter.is_ignored(b'd.c') is None + assert list(ignore_filter.find_matching(b'c.c')) == [Pattern(b'!c.c')] + assert list(ignore_filter.find_matching(b'd.c')) == [] + + def test_ignore_filter_manager(self, fs: 'FakeFilesystem') -> None: + # Prepare sample ignore patterns + fs.create_file('/path/to/repo/.gitignore', contents=b'/foo/bar\n/dir2\n/dir3/\n') + fs.create_file('/path/to/repo/dir/.gitignore', contents=b'/blie\n') + fs.create_file('/path/to/repo/.git/info/exclude', contents=b'/excluded\n') + + m = IgnoreFilterManager.build('/path/to/repo') + + assert m.is_ignored('dir/blie') + assert m.is_ignored(os.path.join('dir', 'bloe')) is None + assert m.is_ignored('dir') is None + assert m.is_ignored(os.path.join('foo', 'bar')) + assert m.is_ignored(os.path.join('excluded')) + assert m.is_ignored(os.path.join('dir2', 'fileinignoreddir')) + assert not m.is_ignored('dir3') + assert m.is_ignored('dir3/') + assert m.is_ignored('dir3/bla') + + def test_nested_gitignores(self, fs: 'FakeFilesystem') -> None: + fs.create_file('/path/to/repo/.gitignore', contents=b'/*\n!/foo\n') + fs.create_file('/path/to/repo/foo/.gitignore', contents=b'/bar\n') + fs.create_file('/path/to/repo/foo/bar', contents=b'IGNORED') + + m = IgnoreFilterManager.build('/path/to/repo') + assert m.is_ignored('foo/bar') + + def test_load_ignore_ignore_case(self, fs: 'FakeFilesystem') -> None: + fs.create_file('/path/to/repo/.gitignore', contents=b'/foo/bar\n/dir\n') + + m = IgnoreFilterManager.build('/path/to/repo', ignore_case=True) + assert m.is_ignored(os.path.join('dir', 'blie')) + assert m.is_ignored(os.path.join('DIR', 'blie')) + + def test_ignored_contents(self, fs: 'FakeFilesystem') -> None: + fs.create_file('/path/to/repo/.gitignore', contents=b'a/*\n!a/*.txt\n') + + m = IgnoreFilterManager.build('/path/to/repo') + assert m.is_ignored('a') is None + assert m.is_ignored('a/') is None + assert not m.is_ignored('a/b.txt') + assert m.is_ignored('a/c.dat')