Skip to content

Commit

Permalink
CM-42771 - Add support of .gitignore files for a file excluding fro…
Browse files Browse the repository at this point in the history
…m scans (#272)
  • Loading branch information
MarshalX authored Dec 13, 2024
1 parent 2f2759b commit aa534b3
Show file tree
Hide file tree
Showing 10 changed files with 887 additions and 46 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -50,4 +50,4 @@ jobs:
run: poetry install

- name: Run Tests
run: poetry run pytest
run: poetry run python -m pytest
4 changes: 2 additions & 2 deletions .github/workflows/tests_full.yml
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ jobs:
uses: actions/cache@v3
with:
path: ~/.local
key: poetry-${{ matrix.os }}-${{ matrix.python-version }}-1 # increment to reset cache
key: poetry-${{ matrix.os }}-${{ matrix.python-version }}-2 # increment to reset cache

- name: Setup Poetry
if: steps.cached-poetry.outputs.cache-hit != 'true'
Expand All @@ -71,4 +71,4 @@ jobs:
./dist/cycode-cli version
- name: Run pytest
run: poetry run pytest
run: poetry run python -m pytest
26 changes: 9 additions & 17 deletions cycode/cli/files_collector/path_documents.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
import os
from typing import TYPE_CHECKING, Iterable, List, Tuple

import pathspec
from typing import TYPE_CHECKING, List, Tuple

from cycode.cli.files_collector.excluder import exclude_irrelevant_files
from cycode.cli.files_collector.iac.tf_content_generator import (
Expand All @@ -10,6 +8,7 @@
is_iac,
is_tfplan_file,
)
from cycode.cli.files_collector.walk_ignore import walk_ignore
from cycode.cli.models import Document
from cycode.cli.utils.path_utils import get_absolute_path, get_file_content
from cycode.cyclient import logger
Expand All @@ -18,17 +17,18 @@
from cycode.cli.utils.progress_bar import BaseProgressBar, ProgressBarSection


def _get_all_existing_files_in_directory(path: str) -> List[str]:
def _get_all_existing_files_in_directory(path: str, *, walk_with_ignore_patterns: bool = True) -> List[str]:
files: List[str] = []

for root, _, filenames in os.walk(path):
walk_func = walk_ignore if walk_with_ignore_patterns else os.walk
for root, _, filenames in walk_func(path):
for filename in filenames:
files.append(os.path.join(root, filename))

return files


def _get_relevant_files_in_path(path: str, exclude_patterns: Iterable[str]) -> List[str]:
def _get_relevant_files_in_path(path: str) -> List[str]:
absolute_path = get_absolute_path(path)

if not os.path.isfile(absolute_path) and not os.path.isdir(absolute_path):
Expand All @@ -37,24 +37,16 @@ def _get_relevant_files_in_path(path: str, exclude_patterns: Iterable[str]) -> L
if os.path.isfile(absolute_path):
return [absolute_path]

all_file_paths = set(_get_all_existing_files_in_directory(absolute_path))

path_spec = pathspec.PathSpec.from_lines(pathspec.patterns.GitWildMatchPattern, exclude_patterns)
excluded_file_paths = set(path_spec.match_files(all_file_paths))

relevant_file_paths = all_file_paths - excluded_file_paths

return [file_path for file_path in relevant_file_paths if os.path.isfile(file_path)]
file_paths = _get_all_existing_files_in_directory(absolute_path)
return [file_path for file_path in file_paths if os.path.isfile(file_path)]


def _get_relevant_files(
progress_bar: 'BaseProgressBar', progress_bar_section: 'ProgressBarSection', scan_type: str, paths: Tuple[str]
) -> List[str]:
all_files_to_scan = []
for path in paths:
all_files_to_scan.extend(
_get_relevant_files_in_path(path=path, exclude_patterns=['**/.git/**', '**/.cycode/**'])
)
all_files_to_scan.extend(_get_relevant_files_in_path(path))

# we are double the progress bar section length because we are going to process the files twice
# first time to get the file list with respect of excluded patterns (excluding takes seconds to execute)
Expand Down
42 changes: 42 additions & 0 deletions cycode/cli/files_collector/walk_ignore.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import os
from typing import Generator, Iterable, List, Tuple

from cycode.cli.utils.ignore_utils import IgnoreFilterManager
from cycode.cyclient import logger

_SUPPORTED_IGNORE_PATTERN_FILES = { # oneday we will bring .cycodeignore or something like that
'.gitignore',
}
_DEFAULT_GLOBAL_IGNORE_PATTERNS = [
'.git',
'.cycode',
]


def _walk_to_top(path: str) -> Iterable[str]:
while os.path.dirname(path) != path:
yield path
path = os.path.dirname(path)

if path:
yield path # Include the top-level directory


def _collect_top_level_ignore_files(path: str) -> List[str]:
ignore_files = []
for dir_path in _walk_to_top(path):
for ignore_file in _SUPPORTED_IGNORE_PATTERN_FILES:
ignore_file_path = os.path.join(dir_path, ignore_file)
if os.path.exists(ignore_file_path):
logger.debug('Apply top level ignore file: %s', ignore_file_path)
ignore_files.append(ignore_file_path)
return ignore_files


def walk_ignore(path: str) -> Generator[Tuple[str, List[str], List[str]], None, None]:
ignore_filter_manager = IgnoreFilterManager.build(
path=path,
global_ignore_file_paths=_collect_top_level_ignore_files(path),
global_patterns=_DEFAULT_GLOBAL_IGNORE_PATTERNS,
)
yield from ignore_filter_manager.walk()
Loading

0 comments on commit aa534b3

Please sign in to comment.