diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 7c4a2b47..0b5ddb58 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -50,4 +50,4 @@ jobs: run: poetry install - name: Run Tests - run: poetry run pytest + run: poetry run python -m pytest diff --git a/.github/workflows/tests_full.yml b/.github/workflows/tests_full.yml index 8181b586..a760d617 100644 --- a/.github/workflows/tests_full.yml +++ b/.github/workflows/tests_full.yml @@ -50,7 +50,7 @@ jobs: uses: actions/cache@v3 with: path: ~/.local - key: poetry-${{ matrix.os }}-${{ matrix.python-version }}-1 # increment to reset cache + key: poetry-${{ matrix.os }}-${{ matrix.python-version }}-2 # increment to reset cache - name: Setup Poetry if: steps.cached-poetry.outputs.cache-hit != 'true' @@ -71,4 +71,4 @@ jobs: ./dist/cycode-cli version - name: Run pytest - run: poetry run pytest + run: poetry run python -m pytest diff --git a/cycode/cli/files_collector/path_documents.py b/cycode/cli/files_collector/path_documents.py index 98a021e4..14f88888 100644 --- a/cycode/cli/files_collector/path_documents.py +++ b/cycode/cli/files_collector/path_documents.py @@ -1,7 +1,5 @@ import os -from typing import TYPE_CHECKING, Iterable, List, Tuple - -import pathspec +from typing import TYPE_CHECKING, List, Tuple from cycode.cli.files_collector.excluder import exclude_irrelevant_files from cycode.cli.files_collector.iac.tf_content_generator import ( @@ -10,6 +8,7 @@ is_iac, is_tfplan_file, ) +from cycode.cli.files_collector.walk_ignore import walk_ignore from cycode.cli.models import Document from cycode.cli.utils.path_utils import get_absolute_path, get_file_content from cycode.cyclient import logger @@ -18,17 +17,18 @@ from cycode.cli.utils.progress_bar import BaseProgressBar, ProgressBarSection -def _get_all_existing_files_in_directory(path: str) -> List[str]: +def _get_all_existing_files_in_directory(path: str, *, walk_with_ignore_patterns: bool = True) -> List[str]: files: List[str] = [] - for root, _, filenames in os.walk(path): + walk_func = walk_ignore if walk_with_ignore_patterns else os.walk + for root, _, filenames in walk_func(path): for filename in filenames: files.append(os.path.join(root, filename)) return files -def _get_relevant_files_in_path(path: str, exclude_patterns: Iterable[str]) -> List[str]: +def _get_relevant_files_in_path(path: str) -> List[str]: absolute_path = get_absolute_path(path) if not os.path.isfile(absolute_path) and not os.path.isdir(absolute_path): @@ -37,14 +37,8 @@ def _get_relevant_files_in_path(path: str, exclude_patterns: Iterable[str]) -> L if os.path.isfile(absolute_path): return [absolute_path] - all_file_paths = set(_get_all_existing_files_in_directory(absolute_path)) - - path_spec = pathspec.PathSpec.from_lines(pathspec.patterns.GitWildMatchPattern, exclude_patterns) - excluded_file_paths = set(path_spec.match_files(all_file_paths)) - - relevant_file_paths = all_file_paths - excluded_file_paths - - return [file_path for file_path in relevant_file_paths if os.path.isfile(file_path)] + file_paths = _get_all_existing_files_in_directory(absolute_path) + return [file_path for file_path in file_paths if os.path.isfile(file_path)] def _get_relevant_files( @@ -52,9 +46,7 @@ def _get_relevant_files( ) -> List[str]: all_files_to_scan = [] for path in paths: - all_files_to_scan.extend( - _get_relevant_files_in_path(path=path, exclude_patterns=['**/.git/**', '**/.cycode/**']) - ) + all_files_to_scan.extend(_get_relevant_files_in_path(path)) # we are double the progress bar section length because we are going to process the files twice # first time to get the file list with respect of excluded patterns (excluding takes seconds to execute) diff --git a/cycode/cli/files_collector/walk_ignore.py b/cycode/cli/files_collector/walk_ignore.py new file mode 100644 index 00000000..76d04366 --- /dev/null +++ b/cycode/cli/files_collector/walk_ignore.py @@ -0,0 +1,42 @@ +import os +from typing import Generator, Iterable, List, Tuple + +from cycode.cli.utils.ignore_utils import IgnoreFilterManager +from cycode.cyclient import logger + +_SUPPORTED_IGNORE_PATTERN_FILES = { # oneday we will bring .cycodeignore or something like that + '.gitignore', +} +_DEFAULT_GLOBAL_IGNORE_PATTERNS = [ + '.git', + '.cycode', +] + + +def _walk_to_top(path: str) -> Iterable[str]: + while os.path.dirname(path) != path: + yield path + path = os.path.dirname(path) + + if path: + yield path # Include the top-level directory + + +def _collect_top_level_ignore_files(path: str) -> List[str]: + ignore_files = [] + for dir_path in _walk_to_top(path): + for ignore_file in _SUPPORTED_IGNORE_PATTERN_FILES: + ignore_file_path = os.path.join(dir_path, ignore_file) + if os.path.exists(ignore_file_path): + logger.debug('Apply top level ignore file: %s', ignore_file_path) + ignore_files.append(ignore_file_path) + return ignore_files + + +def walk_ignore(path: str) -> Generator[Tuple[str, List[str], List[str]], None, None]: + ignore_filter_manager = IgnoreFilterManager.build( + path=path, + global_ignore_file_paths=_collect_top_level_ignore_files(path), + global_patterns=_DEFAULT_GLOBAL_IGNORE_PATTERNS, + ) + yield from ignore_filter_manager.walk() diff --git a/cycode/cli/utils/ignore_utils.py b/cycode/cli/utils/ignore_utils.py new file mode 100644 index 00000000..329fa055 --- /dev/null +++ b/cycode/cli/utils/ignore_utils.py @@ -0,0 +1,459 @@ +# Copyright (C) 2017 Jelmer Vernooij +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Modified from https://github.com/jelmer/dulwich/blob/master/dulwich/ignore.py + +# Copyright 2020 Ben Kehoe +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Modified from https://github.com/benkehoe/ignorelib/blob/main/ignorelib.py + +"""Parsing of ignore files according to gitignore rules. + +For details for the matching rules, see https://git-scm.com/docs/gitignore +""" + +import contextlib +import os.path +import re +from os import PathLike +from typing import ( + Any, + BinaryIO, + Dict, + Generator, + Iterable, + List, + Optional, + Tuple, + Union, +) + + +def _translate_segment(segment: bytes) -> bytes: # noqa: C901 + if segment == b'*': + return b'[^/]+' + res = b'' + i, n = 0, len(segment) + while i < n: + c = segment[i : i + 1] + i = i + 1 + if c == b'*': + res += b'[^/]*' + elif c == b'?': + res += b'[^/]' + elif c == b'\\': + res += re.escape(segment[i : i + 1]) + i += 1 + elif c == b'[': + j = i + if j < n and segment[j : j + 1] == b'!': + j = j + 1 + if j < n and segment[j : j + 1] == b']': + j = j + 1 + while j < n and segment[j : j + 1] != b']': + j = j + 1 + if j >= n: + res += b'\\[' + else: + stuff = segment[i:j].replace(b'\\', b'\\\\') + i = j + 1 + if stuff.startswith(b'!'): + stuff = b'^' + stuff[1:] + elif stuff.startswith(b'^'): + stuff = b'\\' + stuff + res += b'[' + stuff + b']' + else: + res += re.escape(c) + return res + + +def translate(pat: bytes) -> bytes: + """Translate a shell PATTERN to a regular expression. + + There is no way to quote meta-characters. + + Originally copied from fnmatch in Python 2.7, but modified for Dulwich + to cope with features in Git ignore patterns. + """ + + res = b'(?ms)' + + if b'/' not in pat[:-1]: + # If there's no slash, this is a filename-based match + res += b'(.*/)?' + + if pat.startswith(b'**/'): + # Leading **/ + pat = pat[2:] + res += b'(.*/)?' + + if pat.startswith(b'/'): + pat = pat[1:] + + for i, segment in enumerate(pat.split(b'/')): + if segment == b'**': + res += b'(/.*)?' + continue + res += (re.escape(b'/') if i > 0 else b'') + _translate_segment(segment) + + if not pat.endswith(b'/'): + res += b'/?' + + return res + b'\\Z' + + +def read_ignore_patterns(f: BinaryIO) -> Iterable[bytes]: + """Read a git ignore file. + + Args: + f: File-like object to read from + Returns: List of patterns + """ + for line in f: + line = line.rstrip(b'\r\n') + + # Ignore blank lines, they're used for readability. + if not line.strip(): + continue + + if line.startswith(b'#'): + # Comment + continue + + # Trailing spaces are ignored unless they are quoted with a backslash. + while line.endswith(b' ') and not line.endswith(b'\\ '): + line = line[:-1] + line = line.replace(b'\\ ', b' ') + + yield line + + +def match_pattern(path: bytes, pattern: bytes, ignore_case: bool = False) -> bool: + """Match a gitignore-style pattern against a path. + + Args: + path: Path to match + pattern: Pattern to match + ignore_case: Whether to do case-sensitive matching + Returns: + bool indicating whether the pattern matched + """ + return Pattern(pattern, ignore_case).match(path) + + +class Pattern: + """A single ignore pattern.""" + + def __init__(self, pattern: bytes, ignore_case: bool = False) -> None: + self.pattern = pattern + self.ignore_case = ignore_case + if pattern[0:1] == b'!': + self.is_exclude = False + pattern = pattern[1:] + else: + if pattern[0:1] == b'\\': + pattern = pattern[1:] + self.is_exclude = True + flags = 0 + if self.ignore_case: + flags = re.IGNORECASE + self._re = re.compile(translate(pattern), flags) + + def __bytes__(self) -> bytes: + return self.pattern + + def __str__(self) -> str: + return os.fsdecode(self.pattern) + + def __eq__(self, other: object) -> bool: + return isinstance(other, type(self)) and self.pattern == other.pattern and self.ignore_case == other.ignore_case + + def __repr__(self) -> str: + return f'{type(self).__name__}({self.pattern!r}, {self.ignore_case!r})' + + def match(self, path: bytes) -> bool: + """Try to match a path against this ignore pattern. + + Args: + path: Path to match (relative to ignore location) + Returns: boolean + """ + return bool(self._re.match(path)) + + +class IgnoreFilter: + def __init__( + self, + patterns: Iterable[Union[str, bytes]], + ignore_case: bool = False, + path: Optional[Union[PathLike, str]] = None, + ) -> None: + if hasattr(path, '__fspath__'): + path = path.__fspath__() + self._patterns = [] # type: List[Pattern] + self._ignore_case = ignore_case + self._path = path + for pattern in patterns: + self.append_pattern(pattern) + + def to_dict(self) -> Dict[str, Any]: + d = { + 'patterns': [str(p) for p in self._patterns], + 'ignore_case': self._ignore_case, + } + path = getattr(self, '_path', None) + if path: + d['path'] = path + return d + + def append_pattern(self, pattern: Union[str, bytes]) -> None: + """Add a pattern to the set.""" + if isinstance(pattern, str): + pattern = bytes(pattern, 'utf-8') + self._patterns.append(Pattern(pattern, self._ignore_case)) + + def find_matching(self, path: Union[bytes, str]) -> Iterable[Pattern]: + """Yield all matching patterns for path. + + Args: + path: Path to match + Returns: + Iterator over iterators + """ + if not isinstance(path, bytes): + path = os.fsencode(path) + for pattern in self._patterns: + if pattern.match(path): + yield pattern + + def is_ignored(self, path: Union[bytes, str]) -> Optional[bool]: + """Check whether a path is ignored. + + For directories, include a trailing slash. + + Returns: status is None if file is not mentioned, True if it is + included, False if it is explicitly excluded. + """ + if hasattr(path, '__fspath__'): + path = path.__fspath__() + status = None + for pattern in self.find_matching(path): + status = pattern.is_exclude + return status + + @classmethod + def from_path(cls, path: Union[PathLike, str], ignore_case: bool = False) -> 'IgnoreFilter': + if hasattr(path, '__fspath__'): + path = path.__fspath__() + with open(path, 'rb') as f: + return cls(read_ignore_patterns(f), ignore_case, path=path) + + def __repr__(self) -> str: + path = getattr(self, '_path', None) + if path is not None: + return f'{type(self).__name__}.from_path({path!r})' + return f'<{type(self).__name__}>' + + +class IgnoreFilterManager: + """Ignore file manager.""" + + def __init__( + self, + path: str, + global_filters: List[IgnoreFilter], + ignore_file_name: Optional[str] = None, + ignore_case: bool = False, + ) -> None: + if hasattr(path, '__fspath__'): + path = path.__fspath__() + self._path_filters = {} # type: Dict[str, Optional[IgnoreFilter]] + self._top_path = path + self._global_filters = global_filters + + self._ignore_file_name = ignore_file_name + if self._ignore_file_name is None: + self._ignore_file_name = '.gitignore' + + self._ignore_case = ignore_case + + def __repr__(self) -> str: + return f'{type(self).__name__}({self._top_path}, {self._global_filters!r}, {self._ignore_case!r})' + + def to_dict(self, include_path_filters: bool = True) -> Dict[str, Any]: + d = { + 'path': self._top_path, + 'global_filters': [f.to_dict() for f in self._global_filters], + 'ignore_case': self._ignore_case, + } + if include_path_filters: + d['path_filters'] = {path: f.to_dict() for path, f in self._path_filters.items() if f is not None} + return d + + @property + def path(self) -> str: + return self._top_path + + @property + def ignore_file_name(self) -> Optional[str]: + return self._ignore_file_name + + @property + def ignore_case(self) -> bool: + return self._ignore_case + + def _load_path(self, path: str) -> Optional[IgnoreFilter]: + try: + return self._path_filters[path] + except KeyError: + pass + + if not self._ignore_file_name: + self._path_filters[path] = None + else: + p = os.path.join(self._top_path, path, self._ignore_file_name) + try: + self._path_filters[path] = IgnoreFilter.from_path(p, self._ignore_case) + except IOError: + self._path_filters[path] = None + return self._path_filters[path] + + def _find_matching(self, path: str) -> Iterable[Pattern]: + """Find matching patterns for path. + + Args: + path: Path to check + Returns: + Iterator over Pattern instances + """ + if os.path.isabs(path): + raise ValueError(f'{path} is an absolute path') + filters = [(0, f) for f in self._global_filters] + if os.path.sep != '/': + path = path.replace(os.path.sep, '/') + parts = path.split('/') + matches = [] + for i in range(len(parts) + 1): + dirname = '/'.join(parts[:i]) + for s, f in filters: + relpath = '/'.join(parts[s:i]) + if i < len(parts): + # Paths leading up to the final part are all directories, + # so need a trailing slash. + relpath += '/' + matches += list(f.find_matching(relpath)) + ignore_filter = self._load_path(dirname) + if ignore_filter is not None: + filters.insert(0, (i, ignore_filter)) + return iter(matches) + + def is_ignored(self, path: str) -> Optional[bool]: + """Check whether a path is ignored. + + Args: + path: Path to check, relative to the IgnoreFilterManager path + Returns: + True if the path matches an ignore pattern, + False if the path is explicitly not ignored, + or None if the file does not match any patterns. + """ + if hasattr(path, '__fspath__'): + path = path.__fspath__() + matches = list(self._find_matching(path)) + if matches: + return matches[-1].is_exclude + return None + + def walk(self, **kwargs) -> Generator[Tuple[str, List[str], List[str]], None, None]: + """A wrapper for os.walk() without ignored files and subdirectories. + kwargs are passed to walk().""" + + for dirpath, dirnames, filenames in os.walk(self.path, topdown=True, **kwargs): + rel_dirpath = '' if dirpath == self.path else os.path.relpath(dirpath, self.path) + + # decrease recursion depth of os.walk() by ignoring subdirectories because of topdown=True + # slicing ([:]) is mandatory to change dict in-place! + dirnames[:] = [ + dirname for dirname in dirnames if not self.is_ignored(os.path.join(rel_dirpath, dirname, '')) + ] + + # remove ignored files + filenames = [os.path.basename(f) for f in filenames if not self.is_ignored(os.path.join(rel_dirpath, f))] + + yield dirpath, dirnames, filenames + + @classmethod + def build( + cls, + path: str, + global_ignore_file_paths: Optional[Iterable[str]] = None, + global_patterns: Optional[Iterable[Union[str, bytes]]] = None, + ignore_file_name: Optional[str] = None, + ignore_case: bool = False, + ) -> 'IgnoreFilterManager': + """Create a IgnoreFilterManager from patterns and paths. + Args: + path: The root path for ignore checks. + global_ignore_file_paths: A list of file paths to load patterns from. + Relative paths are relative to the IgnoreFilterManager path, not + the current directory. + global_patterns: Global patterns to ignore. + ignore_file_name: The per-directory ignore file name. + ignore_case: Whether to ignore case in matching. + Returns: + A `IgnoreFilterManager` object + """ + if not global_ignore_file_paths: + global_ignore_file_paths = [] + if not global_patterns: + global_patterns = [] + + global_ignore_file_paths.extend( + [ + os.path.join('.git', 'info', 'exclude'), # relative to an input path, so within the repo + os.path.expanduser(os.path.join('~', '.config', 'git', 'ignore')), # absolute + ] + ) + + if hasattr(path, '__fspath__'): + path = path.__fspath__() + + global_filters = [] + for p in global_ignore_file_paths: + if hasattr(p, '__fspath__'): + p = p.__fspath__() + + p = os.path.expanduser(p) + if not os.path.isabs(p): + p = os.path.join(path, p) + + with contextlib.suppress(IOError): + global_filters.append(IgnoreFilter.from_path(p)) + + if global_patterns: + global_filters.append(IgnoreFilter(global_patterns)) + + return cls(path, global_filters=global_filters, ignore_file_name=ignore_file_name, ignore_case=ignore_case) diff --git a/poetry.lock b/poetry.lock index 1a755b08..c97b44a9 100644 --- a/poetry.lock +++ b/poetry.lock @@ -490,17 +490,6 @@ files = [ {file = "patch-ng-1.18.1.tar.gz", hash = "sha256:52fd46ee46f6c8667692682c1fd7134edc65a2d2d084ebec1d295a6087fc0291"}, ] -[[package]] -name = "pathspec" -version = "0.12.1" -description = "Utility library for gitignore style pattern matching of file paths." -optional = false -python-versions = ">=3.8" -files = [ - {file = "pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08"}, - {file = "pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712"}, -] - [[package]] name = "pefile" version = "2024.8.26" @@ -527,6 +516,17 @@ files = [ dev = ["pre-commit", "tox"] testing = ["pytest", "pytest-benchmark"] +[[package]] +name = "pyfakefs" +version = "5.7.2" +description = "pyfakefs implements a fake file system that mocks the Python file system modules." +optional = false +python-versions = ">=3.7" +files = [ + {file = "pyfakefs-5.7.2-py3-none-any.whl", hash = "sha256:e1527b0e8e4b33be52f0b024ca1deb269c73eecd68457c6b0bf608d6dab12ebd"}, + {file = "pyfakefs-5.7.2.tar.gz", hash = "sha256:40da84175c5af8d9c4f3b31800b8edc4af1e74a212671dd658b21cc881c60000"}, +] + [[package]] name = "pygments" version = "2.18.0" @@ -822,13 +822,13 @@ files = [ [[package]] name = "sentry-sdk" -version = "2.19.0" +version = "2.19.2" description = "Python client for Sentry (https://sentry.io)" optional = false python-versions = ">=3.6" files = [ - {file = "sentry_sdk-2.19.0-py2.py3-none-any.whl", hash = "sha256:7b0b3b709dee051337244a09a30dbf6e95afe0d34a1f8b430d45e0982a7c125b"}, - {file = "sentry_sdk-2.19.0.tar.gz", hash = "sha256:ee4a4d2ae8bfe3cac012dcf3e4607975904c137e1738116549fc3dbbb6ff0e36"}, + {file = "sentry_sdk-2.19.2-py2.py3-none-any.whl", hash = "sha256:ebdc08228b4d131128e568d696c210d846e5b9d70aa0327dec6b1272d9d40b84"}, + {file = "sentry_sdk-2.19.2.tar.gz", hash = "sha256:467df6e126ba242d39952375dd816fbee0f217d119bf454a8ce74cf1e7909e8d"}, ] [package.dependencies] @@ -896,13 +896,13 @@ type = ["importlib-metadata (>=7.0.2)", "jaraco.develop (>=7.21)", "mypy (==1.12 [[package]] name = "six" -version = "1.16.0" +version = "1.17.0" description = "Python 2 and 3 compatibility utilities" optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" files = [ - {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"}, - {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, + {file = "six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274"}, + {file = "six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81"}, ] [[package]] @@ -929,24 +929,54 @@ files = [ [[package]] name = "tomli" -version = "2.1.0" +version = "2.2.1" description = "A lil' TOML parser" optional = false python-versions = ">=3.8" files = [ - {file = "tomli-2.1.0-py3-none-any.whl", hash = "sha256:a5c57c3d1c56f5ccdf89f6523458f60ef716e210fc47c4cfb188c5ba473e0391"}, - {file = "tomli-2.1.0.tar.gz", hash = "sha256:3f646cae2aec94e17d04973e4249548320197cfabdf130015d023de4b74d8ab8"}, + {file = "tomli-2.2.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:678e4fa69e4575eb77d103de3df8a895e1591b48e740211bd1067378c69e8249"}, + {file = "tomli-2.2.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:023aa114dd824ade0100497eb2318602af309e5a55595f76b626d6d9f3b7b0a6"}, + {file = "tomli-2.2.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ece47d672db52ac607a3d9599a9d48dcb2f2f735c6c2d1f34130085bb12b112a"}, + {file = "tomli-2.2.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6972ca9c9cc9f0acaa56a8ca1ff51e7af152a9f87fb64623e31d5c83700080ee"}, + {file = "tomli-2.2.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c954d2250168d28797dd4e3ac5cf812a406cd5a92674ee4c8f123c889786aa8e"}, + {file = "tomli-2.2.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:8dd28b3e155b80f4d54beb40a441d366adcfe740969820caf156c019fb5c7ec4"}, + {file = "tomli-2.2.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:e59e304978767a54663af13c07b3d1af22ddee3bb2fb0618ca1593e4f593a106"}, + {file = "tomli-2.2.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:33580bccab0338d00994d7f16f4c4ec25b776af3ffaac1ed74e0b3fc95e885a8"}, + {file = "tomli-2.2.1-cp311-cp311-win32.whl", hash = "sha256:465af0e0875402f1d226519c9904f37254b3045fc5084697cefb9bdde1ff99ff"}, + {file = "tomli-2.2.1-cp311-cp311-win_amd64.whl", hash = "sha256:2d0f2fdd22b02c6d81637a3c95f8cd77f995846af7414c5c4b8d0545afa1bc4b"}, + {file = "tomli-2.2.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:4a8f6e44de52d5e6c657c9fe83b562f5f4256d8ebbfe4ff922c495620a7f6cea"}, + {file = "tomli-2.2.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8d57ca8095a641b8237d5b079147646153d22552f1c637fd3ba7f4b0b29167a8"}, + {file = "tomli-2.2.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4e340144ad7ae1533cb897d406382b4b6fede8890a03738ff1683af800d54192"}, + {file = "tomli-2.2.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:db2b95f9de79181805df90bedc5a5ab4c165e6ec3fe99f970d0e302f384ad222"}, + {file = "tomli-2.2.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:40741994320b232529c802f8bc86da4e1aa9f413db394617b9a256ae0f9a7f77"}, + {file = "tomli-2.2.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:400e720fe168c0f8521520190686ef8ef033fb19fc493da09779e592861b78c6"}, + {file = "tomli-2.2.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:02abe224de6ae62c19f090f68da4e27b10af2b93213d36cf44e6e1c5abd19fdd"}, + {file = "tomli-2.2.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:b82ebccc8c8a36f2094e969560a1b836758481f3dc360ce9a3277c65f374285e"}, + {file = "tomli-2.2.1-cp312-cp312-win32.whl", hash = "sha256:889f80ef92701b9dbb224e49ec87c645ce5df3fa2cc548664eb8a25e03127a98"}, + {file = "tomli-2.2.1-cp312-cp312-win_amd64.whl", hash = "sha256:7fc04e92e1d624a4a63c76474610238576942d6b8950a2d7f908a340494e67e4"}, + {file = "tomli-2.2.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f4039b9cbc3048b2416cc57ab3bda989a6fcf9b36cf8937f01a6e731b64f80d7"}, + {file = "tomli-2.2.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:286f0ca2ffeeb5b9bd4fcc8d6c330534323ec51b2f52da063b11c502da16f30c"}, + {file = "tomli-2.2.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a92ef1a44547e894e2a17d24e7557a5e85a9e1d0048b0b5e7541f76c5032cb13"}, + {file = "tomli-2.2.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9316dc65bed1684c9a98ee68759ceaed29d229e985297003e494aa825ebb0281"}, + {file = "tomli-2.2.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e85e99945e688e32d5a35c1ff38ed0b3f41f43fad8df0bdf79f72b2ba7bc5272"}, + {file = "tomli-2.2.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:ac065718db92ca818f8d6141b5f66369833d4a80a9d74435a268c52bdfa73140"}, + {file = "tomli-2.2.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:d920f33822747519673ee656a4b6ac33e382eca9d331c87770faa3eef562aeb2"}, + {file = "tomli-2.2.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:a198f10c4d1b1375d7687bc25294306e551bf1abfa4eace6650070a5c1ae2744"}, + {file = "tomli-2.2.1-cp313-cp313-win32.whl", hash = "sha256:d3f5614314d758649ab2ab3a62d4f2004c825922f9e370b29416484086b264ec"}, + {file = "tomli-2.2.1-cp313-cp313-win_amd64.whl", hash = "sha256:a38aa0308e754b0e3c67e344754dff64999ff9b513e691d0e786265c93583c69"}, + {file = "tomli-2.2.1-py3-none-any.whl", hash = "sha256:cb55c73c5f4408779d0cf3eef9f762b9c9f147a77de7b258bef0a5628adc85cc"}, + {file = "tomli-2.2.1.tar.gz", hash = "sha256:cd45e1dc79c835ce60f7404ec8119f2eb06d38b1deba146f07ced3bbc44505ff"}, ] [[package]] name = "types-python-dateutil" -version = "2.9.0.20241003" +version = "2.9.0.20241206" description = "Typing stubs for python-dateutil" optional = false python-versions = ">=3.8" files = [ - {file = "types-python-dateutil-2.9.0.20241003.tar.gz", hash = "sha256:58cb85449b2a56d6684e41aeefb4c4280631246a0da1a719bdbe6f3fb0317446"}, - {file = "types_python_dateutil-2.9.0.20241003-py3-none-any.whl", hash = "sha256:250e1d8e80e7bbc3a6c99b907762711d1a1cdd00e978ad39cb5940f6f0a87f3d"}, + {file = "types_python_dateutil-2.9.0.20241206-py3-none-any.whl", hash = "sha256:e248a4bc70a486d3e3ec84d0dc30eec3a5f979d6e7ee4123ae043eedbb987f53"}, + {file = "types_python_dateutil-2.9.0.20241206.tar.gz", hash = "sha256:18f493414c26ffba692a72369fea7a154c502646301ebfe3d56a04b3767284cb"}, ] [[package]] @@ -1009,4 +1039,4 @@ type = ["pytest-mypy"] [metadata] lock-version = "2.0" python-versions = ">=3.8,<3.14" -content-hash = "9ad1d7ff7f6e1dc4b43af55f5f034d051dde5205cf9ac247026f8e3c2f465f31" +content-hash = "e91a6f9b7e080cea351f9073ef333afe026df6172b95fba5477af67f15c96000" diff --git a/pyproject.toml b/pyproject.toml index adb99510..42511ec8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,7 +32,6 @@ click = ">=8.1.0,<8.2.0" colorama = ">=0.4.3,<0.5.0" pyyaml = ">=6.0,<7.0" marshmallow = ">=3.15.0,<3.23.0" # 3.23 dropped support for Python 3.8 -pathspec = ">=0.11.1,<0.13.0" gitpython = ">=3.1.30,<3.2.0" arrow = ">=1.0.0,<1.4.0" binaryornot = ">=0.4.4,<0.5.0" @@ -50,6 +49,7 @@ pytest = ">=7.3.1,<7.4.0" pytest-mock = ">=3.10.0,<3.11.0" coverage = ">=7.2.3,<7.3.0" responses = ">=0.23.1,<0.24.0" +pyfakefs = ">=5.7.2,<5.8.0" [tool.poetry.group.executable.dependencies] pyinstaller = {version=">=5.13.2,<5.14.0", python=">=3.8,<3.13"} diff --git a/tests/cli/files_collector/__init__.py b/tests/cli/files_collector/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/cli/files_collector/test_walk_ignore.py b/tests/cli/files_collector/test_walk_ignore.py new file mode 100644 index 00000000..fd2612d5 --- /dev/null +++ b/tests/cli/files_collector/test_walk_ignore.py @@ -0,0 +1,142 @@ +import os +from os.path import normpath +from typing import TYPE_CHECKING, List + +from cycode.cli.files_collector.walk_ignore import ( + _collect_top_level_ignore_files, + _walk_to_top, + walk_ignore, +) + +if TYPE_CHECKING: + from pyfakefs.fake_filesystem import FakeFilesystem + + +# we are using normpath() in every test to provide multi-platform support + + +def test_walk_to_top() -> None: + path = normpath('/a/b/c/d/e/f/g') + result = list(_walk_to_top(path)) + assert result == [ + normpath('/a/b/c/d/e/f/g'), + normpath('/a/b/c/d/e/f'), + normpath('/a/b/c/d/e'), + normpath('/a/b/c/d'), + normpath('/a/b/c'), + normpath('/a/b'), + normpath('/a'), + normpath('/'), + ] + + path = normpath('/a') + result = list(_walk_to_top(path)) + assert result == [normpath('/a'), normpath('/')] + + path = normpath('/') + result = list(_walk_to_top(path)) + assert result == [normpath('/')] + + path = normpath('a') + result = list(_walk_to_top(path)) + assert result == [normpath('a')] + + +def _create_mocked_file_structure(fs: 'FakeFilesystem') -> None: + fs.create_dir('/home/user/project') + fs.create_dir('/home/user/.git') + + fs.create_dir('/home/user/project/.cycode') + fs.create_file('/home/user/project/.cycode/config.yaml') + fs.create_dir('/home/user/project/.git') + fs.create_file('/home/user/project/.git/HEAD') + + fs.create_file('/home/user/project/.gitignore', contents='*.pyc\n*.log') + fs.create_file('/home/user/project/ignored.pyc') + fs.create_file('/home/user/project/presented.txt') + fs.create_file('/home/user/project/ignored2.log') + fs.create_file('/home/user/project/ignored2.pyc') + fs.create_file('/home/user/project/presented2.txt') + + fs.create_dir('/home/user/project/subproject') + fs.create_file('/home/user/project/subproject/.gitignore', contents='*.txt') + fs.create_file('/home/user/project/subproject/ignored.txt') + fs.create_file('/home/user/project/subproject/ignored.log') + fs.create_file('/home/user/project/subproject/ignored.pyc') + fs.create_file('/home/user/project/subproject/presented.py') + + +def test_collect_top_level_ignore_files(fs: 'FakeFilesystem') -> None: + _create_mocked_file_structure(fs) + + # Test with path inside the project + path = normpath('/home/user/project/subproject') + ignore_files = _collect_top_level_ignore_files(path) + assert len(ignore_files) == 2 + assert normpath('/home/user/project/subproject/.gitignore') in ignore_files + assert normpath('/home/user/project/.gitignore') in ignore_files + + # Test with path at the top level with no ignore files + path = normpath('/home/user/.git') + ignore_files = _collect_top_level_ignore_files(path) + assert len(ignore_files) == 0 + + # Test with path at the top level with a .gitignore + path = normpath('/home/user/project') + ignore_files = _collect_top_level_ignore_files(path) + assert len(ignore_files) == 1 + assert normpath('/home/user/project/.gitignore') in ignore_files + + # Test with a path that does not have any ignore files + fs.remove('/home/user/project/.gitignore') + path = normpath('/home/user') + ignore_files = _collect_top_level_ignore_files(path) + assert len(ignore_files) == 0 + fs.create_file('/home/user/project/.gitignore', contents='*.pyc\n*.log') + + +def _collect_walk_ignore_files(path: str) -> List[str]: + files = [] + for root, _, filenames in walk_ignore(path): + for filename in filenames: + files.append(os.path.join(root, filename)) + + return files + + +def test_walk_ignore(fs: 'FakeFilesystem') -> None: + _create_mocked_file_structure(fs) + + path = normpath('/home/user/project') + result = _collect_walk_ignore_files(path) + + assert len(result) == 5 + # ignored globally by default: + assert normpath('/home/user/project/.git/HEAD') not in result + assert normpath('/home/user/project/.cycode/config.yaml') not in result + # ignored by .gitignore in project directory: + assert normpath('/home/user/project/ignored.pyc') not in result + assert normpath('/home/user/project/subproject/ignored.pyc') not in result + # ignored by .gitignore in subproject directory: + assert normpath('/home/user/project/subproject/ignored.txt') not in result + # ignored by .cycodeignore in project directory: + assert normpath('/home/user/project/ignored2.log') not in result + assert normpath('/home/user/project/ignored2.pyc') not in result + assert normpath('/home/user/project/subproject/ignored.log') not in result + # presented after both .gitignore and .cycodeignore: + assert normpath('/home/user/project/.gitignore') in result + assert normpath('/home/user/project/subproject/.gitignore') in result + assert normpath('/home/user/project/presented.txt') in result + assert normpath('/home/user/project/presented2.txt') in result + assert normpath('/home/user/project/subproject/presented.py') in result + + path = normpath('/home/user/project/subproject') + result = _collect_walk_ignore_files(path) + + assert len(result) == 2 + # ignored: + assert normpath('/home/user/project/subproject/ignored.txt') not in result + assert normpath('/home/user/project/subproject/ignored.log') not in result + assert normpath('/home/user/project/subproject/ignored.pyc') not in result + # presented: + assert normpath('/home/user/project/subproject/presented.py') in result diff --git a/tests/utils/test_ignore_utils.py b/tests/utils/test_ignore_utils.py new file mode 100644 index 00000000..563c11a9 --- /dev/null +++ b/tests/utils/test_ignore_utils.py @@ -0,0 +1,176 @@ +# Copyright (C) 2017 Jelmer Vernooij +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Modified (rewritten to pytest + pyfakefs) from https://github.com/jelmer/dulwich/blob/master/tests/test_ignore.py + +import os +import re +from io import BytesIO +from typing import TYPE_CHECKING + +import pytest + +from cycode.cli.utils.ignore_utils import ( + IgnoreFilter, + IgnoreFilterManager, + Pattern, + match_pattern, + read_ignore_patterns, + translate, +) + +if TYPE_CHECKING: + from pyfakefs.fake_filesystem import FakeFilesystem + +POSITIVE_MATCH_TESTS = [ + (b'foo.c', b'*.c'), + (b'.c', b'*.c'), + (b'foo/foo.c', b'*.c'), + (b'foo/foo.c', b'foo.c'), + (b'foo.c', b'/*.c'), + (b'foo.c', b'/foo.c'), + (b'foo.c', b'foo.c'), + (b'foo.c', b'foo.[ch]'), + (b'foo/bar/bla.c', b'foo/**'), + (b'foo/bar/bla/blie.c', b'foo/**/blie.c'), + (b'foo/bar/bla.c', b'**/bla.c'), + (b'bla.c', b'**/bla.c'), + (b'foo/bar', b'foo/**/bar'), + (b'foo/bla/bar', b'foo/**/bar'), + (b'foo/bar/', b'bar/'), + (b'foo/bar/', b'bar'), + (b'foo/bar/something', b'foo/bar/*'), +] + +NEGATIVE_MATCH_TESTS = [ + (b'foo.c', b'foo.[dh]'), + (b'foo/foo.c', b'/foo.c'), + (b'foo/foo.c', b'/*.c'), + (b'foo/bar/', b'/bar/'), + (b'foo/bar/', b'foo/bar/*'), + (b'foo/bar', b'foo?bar'), +] + +TRANSLATE_TESTS = [ + (b'*.c', b'(?ms)(.*/)?[^/]*\\.c/?\\Z'), + (b'foo.c', b'(?ms)(.*/)?foo\\.c/?\\Z'), + (b'/*.c', b'(?ms)[^/]*\\.c/?\\Z'), + (b'/foo.c', b'(?ms)foo\\.c/?\\Z'), + (b'foo.c', b'(?ms)(.*/)?foo\\.c/?\\Z'), + (b'foo.[ch]', b'(?ms)(.*/)?foo\\.[ch]/?\\Z'), + (b'bar/', b'(?ms)(.*/)?bar\\/\\Z'), + (b'foo/**', b'(?ms)foo(/.*)?/?\\Z'), + (b'foo/**/blie.c', b'(?ms)foo(/.*)?\\/blie\\.c/?\\Z'), + (b'**/bla.c', b'(?ms)(.*/)?bla\\.c/?\\Z'), + (b'foo/**/bar', b'(?ms)foo(/.*)?\\/bar/?\\Z'), + (b'foo/bar/*', b'(?ms)foo\\/bar\\/[^/]+/?\\Z'), + (b'/foo\\[bar\\]', b'(?ms)foo\\[bar\\]/?\\Z'), + (b'/foo[bar]', b'(?ms)foo[bar]/?\\Z'), + (b'/foo[0-9]', b'(?ms)foo[0-9]/?\\Z'), +] + + +@pytest.mark.usefixtures('fs') +class TestIgnoreFiles: + def test_translate(self) -> None: + for pattern, regex in TRANSLATE_TESTS: + if re.escape(b'/') == b'/': + regex = regex.replace(b'\\/', b'/') + assert ( + translate(pattern) == regex + ), f'orig pattern: {pattern!r}, regex: {translate(pattern)!r}, expected: {regex!r}' + + def test_read_file(self) -> None: + f = BytesIO( + b""" +# a comment +\x20\x20 +# and an empty line: + +\\#not a comment +!negative +with trailing whitespace +with escaped trailing whitespace\\ +""" # noqa: W291 (Trailing whitespace) + ) + assert list(read_ignore_patterns(f)) == [ + b'\\#not a comment', + b'!negative', + b'with trailing whitespace', + b'with escaped trailing whitespace ', + ] + + def test_match_patterns_positive(self) -> None: + for path, pattern in POSITIVE_MATCH_TESTS: + assert match_pattern(path, pattern), f'path: {path!r}, pattern: {pattern!r}' + + def test_match_patterns_negative(self) -> None: + for path, pattern in NEGATIVE_MATCH_TESTS: + assert not match_pattern(path, pattern), f'path: {path!r}, pattern: {pattern!r}' + + def test_ignore_filter_inclusion(self) -> None: + ignore_filter = IgnoreFilter([b'a.c', b'b.c']) + assert ignore_filter.is_ignored(b'a.c') + assert ignore_filter.is_ignored(b'c.c') is None + assert list(ignore_filter.find_matching(b'a.c')) == [Pattern(b'a.c')] + assert list(ignore_filter.find_matching(b'c.c')) == [] + + def test_ignore_filter_exclusion(self) -> None: + ignore_filter = IgnoreFilter([b'a.c', b'b.c', b'!c.c']) + assert not ignore_filter.is_ignored(b'c.c') + assert ignore_filter.is_ignored(b'd.c') is None + assert list(ignore_filter.find_matching(b'c.c')) == [Pattern(b'!c.c')] + assert list(ignore_filter.find_matching(b'd.c')) == [] + + def test_ignore_filter_manager(self, fs: 'FakeFilesystem') -> None: + # Prepare sample ignore patterns + fs.create_file('/path/to/repo/.gitignore', contents=b'/foo/bar\n/dir2\n/dir3/\n') + fs.create_file('/path/to/repo/dir/.gitignore', contents=b'/blie\n') + fs.create_file('/path/to/repo/.git/info/exclude', contents=b'/excluded\n') + + m = IgnoreFilterManager.build('/path/to/repo') + + assert m.is_ignored('dir/blie') + assert m.is_ignored(os.path.join('dir', 'bloe')) is None + assert m.is_ignored('dir') is None + assert m.is_ignored(os.path.join('foo', 'bar')) + assert m.is_ignored(os.path.join('excluded')) + assert m.is_ignored(os.path.join('dir2', 'fileinignoreddir')) + assert not m.is_ignored('dir3') + assert m.is_ignored('dir3/') + assert m.is_ignored('dir3/bla') + + def test_nested_gitignores(self, fs: 'FakeFilesystem') -> None: + fs.create_file('/path/to/repo/.gitignore', contents=b'/*\n!/foo\n') + fs.create_file('/path/to/repo/foo/.gitignore', contents=b'/bar\n') + fs.create_file('/path/to/repo/foo/bar', contents=b'IGNORED') + + m = IgnoreFilterManager.build('/path/to/repo') + assert m.is_ignored('foo/bar') + + def test_load_ignore_ignore_case(self, fs: 'FakeFilesystem') -> None: + fs.create_file('/path/to/repo/.gitignore', contents=b'/foo/bar\n/dir\n') + + m = IgnoreFilterManager.build('/path/to/repo', ignore_case=True) + assert m.is_ignored(os.path.join('dir', 'blie')) + assert m.is_ignored(os.path.join('DIR', 'blie')) + + def test_ignored_contents(self, fs: 'FakeFilesystem') -> None: + fs.create_file('/path/to/repo/.gitignore', contents=b'a/*\n!a/*.txt\n') + + m = IgnoreFilterManager.build('/path/to/repo') + assert m.is_ignored('a') is None + assert m.is_ignored('a/') is None + assert not m.is_ignored('a/b.txt') + assert m.is_ignored('a/c.dat')