Skip to content

Commit

Permalink
migrate to ignore_utils; add tests; remove unused pathspec; remove .c…
Browse files Browse the repository at this point in the history
…ycodeignore
  • Loading branch information
MarshalX committed Dec 13, 2024
1 parent 85993ff commit 4fbf23b
Show file tree
Hide file tree
Showing 7 changed files with 212 additions and 105 deletions.
11 changes: 2 additions & 9 deletions cycode/cli/files_collector/path_documents.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
import os
from typing import TYPE_CHECKING, Iterable, List, Optional, Tuple

import pathspec
from typing import TYPE_CHECKING, List, Tuple

from cycode.cli.files_collector.excluder import exclude_irrelevant_files
from cycode.cli.files_collector.iac.tf_content_generator import (
Expand Down Expand Up @@ -30,7 +28,7 @@ def _get_all_existing_files_in_directory(path: str, *, walk_with_ignore_patterns
return files


def _get_relevant_files_in_path(path: str, exclude_patterns: Optional[Iterable[str]] = None) -> List[str]:
def _get_relevant_files_in_path(path: str) -> List[str]:
absolute_path = get_absolute_path(path)

if not os.path.isfile(absolute_path) and not os.path.isdir(absolute_path):
Expand All @@ -40,11 +38,6 @@ def _get_relevant_files_in_path(path: str, exclude_patterns: Optional[Iterable[s
return [absolute_path]

file_paths = _get_all_existing_files_in_directory(absolute_path)

if exclude_patterns:
path_spec = pathspec.PathSpec.from_lines(pathspec.patterns.GitWildMatchPattern, exclude_patterns)
file_paths = path_spec.match_files(file_paths, negate=True)

return [file_path for file_path in file_paths if os.path.isfile(file_path)]


Expand Down
60 changes: 12 additions & 48 deletions cycode/cli/files_collector/walk_ignore.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,15 @@
import os
from collections import defaultdict
from typing import Generator, Iterable, List, Tuple

import pathspec
from pathspec.util import StrPath

from cycode.cli.utils.path_utils import get_file_content
from cycode.cli.utils.ignore_utils import IgnoreFilterManager
from cycode.cyclient import logger

_SUPPORTED_IGNORE_PATTERN_FILES = {'.gitignore', '.cycodeignore'}
_SUPPORTED_IGNORE_PATTERN_FILES = { # oneday we will bring .cycodeignore or something like that
'.gitignore',
}
_DEFAULT_GLOBAL_IGNORE_PATTERNS = [
'**/.git',
'**/.cycode',
'.git',
'.cycode',
]


Expand All @@ -35,44 +33,10 @@ def _collect_top_level_ignore_files(path: str) -> List[str]:
return ignore_files


def _get_global_ignore_patterns(path: str) -> List[str]:
ignore_patterns = _DEFAULT_GLOBAL_IGNORE_PATTERNS.copy()
for ignore_file in _collect_top_level_ignore_files(path):
file_patterns = get_file_content(ignore_file).splitlines()
ignore_patterns.extend(file_patterns)
return ignore_patterns


def _should_include_path(ignore_patterns: List[str], path: StrPath) -> bool:
path_spec = pathspec.PathSpec.from_lines(pathspec.patterns.GitWildMatchPattern, ignore_patterns)
return not path_spec.match_file(path) # works with both files and directories; negative match


def walk_ignore(path: str) -> Generator[Tuple[str, List[str], List[str]], None, None]:
global_ignore_patterns = _get_global_ignore_patterns(path)
path_to_ignore_patterns = defaultdict(list)

for dirpath, dirnames, filenames in os.walk(path, topdown=True):
# finds and processes ignore files first to get the patterns
for filename in filenames:
filepath = os.path.join(dirpath, filename)
if filename in _SUPPORTED_IGNORE_PATTERN_FILES:
logger.debug('Apply ignore file: %s', filepath)

parent_dir = os.path.dirname(dirpath)
if dirpath not in path_to_ignore_patterns and parent_dir in path_to_ignore_patterns:
# inherit ignore patterns from parent directory on first occurrence
logger.debug('Inherit ignore patterns: %s', {'inherit_from': parent_dir, 'inherit_to': dirpath})
path_to_ignore_patterns[dirpath].extend(path_to_ignore_patterns[parent_dir])

# always read ignore patterns for the current directory
path_to_ignore_patterns[dirpath].extend(get_file_content(filepath).splitlines())

ignore_patterns = global_ignore_patterns + path_to_ignore_patterns.get(dirpath, [])

# decrease recursion depth of os.walk() because of topdown=True by changing the list in-place
# slicing ([:]) is mandatory to change dict in-place!
dirnames[:] = [d for d in dirnames if _should_include_path(ignore_patterns, os.path.join(dirpath, d))]
filenames[:] = [f for f in filenames if _should_include_path(ignore_patterns, os.path.join(dirpath, f))]

yield dirpath, dirnames, filenames
ignore_filter_manager = IgnoreFilterManager.build(
path=path,
global_ignore_file_paths=_collect_top_level_ignore_files(path),
global_patterns=_DEFAULT_GLOBAL_IGNORE_PATTERNS,
)
yield from ignore_filter_manager.walk()
14 changes: 11 additions & 3 deletions cycode/cli/utils/ignore_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,12 +132,11 @@ def read_ignore_patterns(f: BinaryIO) -> Iterable[bytes]:
f: File-like object to read from
Returns: List of patterns
"""

for line in f:
line = line.rstrip(b'\r\n')

# Ignore blank lines, they're used for readability.
if not line:
if not line.strip():
continue

if line.startswith(b'#'):
Expand Down Expand Up @@ -397,7 +396,9 @@ def walk(self, **kwargs) -> Generator[Tuple[str, List[str], List[str]], None, No

# decrease recursion depth of os.walk() by ignoring subdirectories because of topdown=True
# slicing ([:]) is mandatory to change dict in-place!
dirnames[:] = [dirname for dirname in dirnames if not self.is_ignored(os.path.join(rel_dirpath, dirname, ''))]
dirnames[:] = [
dirname for dirname in dirnames if not self.is_ignored(os.path.join(rel_dirpath, dirname, ''))
]

# remove ignored files
filenames = [os.path.basename(f) for f in filenames if not self.is_ignored(os.path.join(rel_dirpath, f))]
Expand Down Expand Up @@ -430,6 +431,13 @@ def build(
if not global_patterns:
global_patterns = []

global_ignore_file_paths.extend(
[
os.path.join('.git', 'info', 'exclude'), # relative to an input path, so within the repo
os.path.expanduser(os.path.join('~', '.config', 'git', 'ignore')), # absolute
]
)

if hasattr(path, '__fspath__'):
path = path.__fspath__()

Expand Down
13 changes: 1 addition & 12 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@ click = ">=8.1.0,<8.2.0"
colorama = ">=0.4.3,<0.5.0"
pyyaml = ">=6.0,<7.0"
marshmallow = ">=3.15.0,<3.23.0" # 3.23 dropped support for Python 3.8
pathspec = ">=0.11.1,<0.13.0"
gitpython = ">=3.1.30,<3.2.0"
arrow = ">=1.0.0,<1.4.0"
binaryornot = ">=0.4.4,<0.5.0"
Expand Down
42 changes: 10 additions & 32 deletions tests/cli/files_collector/test_walk_ignore.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

from cycode.cli.files_collector.walk_ignore import (
_collect_top_level_ignore_files,
_get_global_ignore_patterns,
_walk_to_top,
walk_ignore,
)
Expand Down Expand Up @@ -52,11 +51,9 @@ def _create_mocked_file_structure(fs: 'FakeFilesystem') -> None:
fs.create_dir('/home/user/project/.git')
fs.create_file('/home/user/project/.git/HEAD')

fs.create_file('/home/user/project/.gitignore', contents='*.pyc')
fs.create_file('/home/user/project/.gitignore', contents='*.pyc\n*.log')
fs.create_file('/home/user/project/ignored.pyc')
fs.create_file('/home/user/project/presented.txt')

fs.create_file('/home/user/project/.cycodeignore', contents='*.log')
fs.create_file('/home/user/project/ignored2.log')
fs.create_file('/home/user/project/ignored2.pyc')
fs.create_file('/home/user/project/presented2.txt')
Expand All @@ -75,45 +72,27 @@ def test_collect_top_level_ignore_files(fs: 'FakeFilesystem') -> None:
# Test with path inside the project
path = normpath('/home/user/project/subproject')
ignore_files = _collect_top_level_ignore_files(path)

assert len(ignore_files) == 3
assert len(ignore_files) == 2
assert normpath('/home/user/project/subproject/.gitignore') in ignore_files
assert normpath('/home/user/project/.gitignore') in ignore_files
assert normpath('/home/user/project/.cycodeignore') in ignore_files

# Test with a path that does not have any ignore files
fs.remove('/home/user/project/.gitignore')
path = normpath('/home/user')
ignore_files = _collect_top_level_ignore_files(path)

assert len(ignore_files) == 0

# Test with path at the top level with no ignore files
path = normpath('/home/user/.git')
ignore_files = _collect_top_level_ignore_files(path)

assert len(ignore_files) == 0

# Test with path at the top level with a .gitignore
path = normpath('/home/user/project')
ignore_files = _collect_top_level_ignore_files(path)

assert len(ignore_files) == 1
assert normpath('/home/user/project/.cycodeignore') in ignore_files


def test_get_global_ignore_patterns(fs: 'FakeFilesystem') -> None:
_create_mocked_file_structure(fs)
ignore_patterns = _get_global_ignore_patterns('/home/user/project/subproject')
assert normpath('/home/user/project/.gitignore') in ignore_files

assert len(ignore_patterns) == 5
# default global:
assert '**/.git' in ignore_patterns
assert '**/.cycode' in ignore_patterns
# additional:
assert '*.txt' in ignore_patterns
assert '*.pyc' in ignore_patterns
assert '*.log' in ignore_patterns
# Test with a path that does not have any ignore files
fs.remove('/home/user/project/.gitignore')
path = normpath('/home/user')
ignore_files = _collect_top_level_ignore_files(path)
assert len(ignore_files) == 0
fs.create_file('/home/user/project/.gitignore', contents='*.pyc\n*.log')


def _collect_walk_ignore_files(path: str) -> List[str]:
Expand All @@ -131,7 +110,7 @@ def test_walk_ignore(fs: 'FakeFilesystem') -> None:
path = normpath('/home/user/project')
result = _collect_walk_ignore_files(path)

assert len(result) == 6
assert len(result) == 5
# ignored globally by default:
assert normpath('/home/user/project/.git/HEAD') not in result
assert normpath('/home/user/project/.cycode/config.yaml') not in result
Expand All @@ -146,7 +125,6 @@ def test_walk_ignore(fs: 'FakeFilesystem') -> None:
assert normpath('/home/user/project/subproject/ignored.log') not in result
# presented after both .gitignore and .cycodeignore:
assert normpath('/home/user/project/.gitignore') in result
assert normpath('/home/user/project/.cycodeignore') in result
assert normpath('/home/user/project/subproject/.gitignore') in result
assert normpath('/home/user/project/presented.txt') in result
assert normpath('/home/user/project/presented2.txt') in result
Expand Down
Loading

0 comments on commit 4fbf23b

Please sign in to comment.