From 3cdb4598418a83c0215fc48485e84905fedbce26 Mon Sep 17 00:00:00 2001 From: Ilya Siamionau Date: Tue, 10 Dec 2024 19:24:32 +0100 Subject: [PATCH 01/16] CM-42771 - Support `.gitignore` and `.cycodeignore` files for a file excluding from scans --- cycode/cli/files_collector/path_documents.py | 71 +++++++++++- poetry.lock | 101 +++++++++++++----- pyproject.toml | 1 + tests/cli/files_collector/__init__.py | 0 .../files_collector/test_path_documents.py | 80 ++++++++++++++ 5 files changed, 219 insertions(+), 34 deletions(-) create mode 100644 tests/cli/files_collector/__init__.py create mode 100644 tests/cli/files_collector/test_path_documents.py diff --git a/cycode/cli/files_collector/path_documents.py b/cycode/cli/files_collector/path_documents.py index 98a021e4..978169d4 100644 --- a/cycode/cli/files_collector/path_documents.py +++ b/cycode/cli/files_collector/path_documents.py @@ -1,5 +1,6 @@ import os -from typing import TYPE_CHECKING, Iterable, List, Tuple +from collections import defaultdict +from typing import Set, TYPE_CHECKING, Iterable, List, Tuple import pathspec @@ -18,12 +19,72 @@ from cycode.cli.utils.progress_bar import BaseProgressBar, ProgressBarSection -def _get_all_existing_files_in_directory(path: str) -> List[str]: - files: List[str] = [] +def _walk_to_top(path: str) -> Iterable[str]: + while os.path.dirname(path) != path: + yield path + path = os.path.dirname(path) + + if path: + yield path # Include the top-level directory + + +_SUPPORTED_IGNORE_PATTERN_FILES = {'.gitignore', '.cycodeignore'} + + +def _collect_top_level_ignore_files(path: str) -> List[str]: + ignore_files = [] + for dir_path in _walk_to_top(path): + for ignore_file in _SUPPORTED_IGNORE_PATTERN_FILES: + ignore_file_path = os.path.join(dir_path, ignore_file) + if os.path.exists(ignore_file_path): + logger.debug('Found top level ignore file: %s', ignore_file_path) + ignore_files.append(ignore_file_path) + return ignore_files + + +def _get_global_ignore_patterns(path: str) -> List[str]: + ignore_patterns = [] + for ignore_file in _collect_top_level_ignore_files(path): + file_patterns = get_file_content(ignore_file).splitlines() + ignore_patterns.extend(file_patterns) + return ignore_patterns + + +def _apply_ignore_patterns(ignore_patterns: List[str], files: Set[str]) -> Set[str]: + if not ignore_patterns: + return files + + path_spec = pathspec.PathSpec.from_lines(pathspec.patterns.GitWildMatchPattern, ignore_patterns) + excluded_file_paths = set(path_spec.match_files(files)) + + return files - excluded_file_paths + + +def _get_all_existing_files_in_directory(path: str, *, apply_ignore_patterns: bool = True) -> Set[str]: + files: Set[str] = set() + + global_ignore_patterns = _get_global_ignore_patterns(path) + path_to_ignore_patterns = defaultdict(list) for root, _, filenames in os.walk(path): for filename in filenames: - files.append(os.path.join(root, filename)) + filepath = os.path.join(root, filename) + + if filepath in _SUPPORTED_IGNORE_PATTERN_FILES: + logger.debug('Found ignore file: %s', filepath) + # TODO(MarshalX): accumulate ignore pattern from previous levels + path_to_ignore_patterns[root].extend(get_file_content(filepath).splitlines()) + + if apply_ignore_patterns and root in path_to_ignore_patterns: + filtered_paths = _apply_ignore_patterns(path_to_ignore_patterns[root], {filepath,}) + if filtered_paths: + files.update(filtered_paths) + else: + files.add(os.path.join(root, filename)) + + if apply_ignore_patterns: + logger.debug('Applying global ignore patterns %s', {'global_ignore_patterns': global_ignore_patterns}) + return _apply_ignore_patterns(global_ignore_patterns, files) return files @@ -37,7 +98,7 @@ def _get_relevant_files_in_path(path: str, exclude_patterns: Iterable[str]) -> L if os.path.isfile(absolute_path): return [absolute_path] - all_file_paths = set(_get_all_existing_files_in_directory(absolute_path)) + all_file_paths = _get_all_existing_files_in_directory(absolute_path) path_spec = pathspec.PathSpec.from_lines(pathspec.patterns.GitWildMatchPattern, exclude_patterns) excluded_file_paths = set(path_spec.match_files(all_file_paths)) diff --git a/poetry.lock b/poetry.lock index a1d8c39f..12a635c7 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.4 and should not be changed by hand. [[package]] name = "altgraph" @@ -436,13 +436,13 @@ test = ["pytest (<5.4)", "pytest-cov"] [[package]] name = "packaging" -version = "24.1" +version = "24.2" description = "Core utilities for Python packages" optional = false python-versions = ">=3.8" files = [ - {file = "packaging-24.1-py3-none-any.whl", hash = "sha256:5b8f2217dbdbd2f7f384c41c628544e6d52f2d0f53c6d0c3ea61aa5d1d7ff124"}, - {file = "packaging-24.1.tar.gz", hash = "sha256:026ed72c8ed3fcce5bf8950572258698927fd1dbda10a5e981cdf0ac37f4f002"}, + {file = "packaging-24.2-py3-none-any.whl", hash = "sha256:09abb1bccd265c01f4a3aa3f7a7db064b36514d2cba19a2f694fe6150451a759"}, + {file = "packaging-24.2.tar.gz", hash = "sha256:c228a6dc5e932d346bc5739379109d49e8853dd8223571c7c5b55260edc0b97f"}, ] [[package]] @@ -482,6 +482,17 @@ files = [ dev = ["pre-commit", "tox"] testing = ["pytest", "pytest-benchmark"] +[[package]] +name = "pyfakefs" +version = "5.7.2" +description = "pyfakefs implements a fake file system that mocks the Python file system modules." +optional = false +python-versions = ">=3.7" +files = [ + {file = "pyfakefs-5.7.2-py3-none-any.whl", hash = "sha256:e1527b0e8e4b33be52f0b024ca1deb269c73eecd68457c6b0bf608d6dab12ebd"}, + {file = "pyfakefs-5.7.2.tar.gz", hash = "sha256:40da84175c5af8d9c4f3b31800b8edc4af1e74a212671dd658b21cc881c60000"}, +] + [[package]] name = "pyinstaller" version = "5.13.2" @@ -517,13 +528,13 @@ hook-testing = ["execnet (>=1.5.0)", "psutil", "pytest (>=2.7.3)"] [[package]] name = "pyinstaller-hooks-contrib" -version = "2024.8" +version = "2024.10" description = "Community maintained hooks for PyInstaller" optional = false python-versions = ">=3.8" files = [ - {file = "pyinstaller_hooks_contrib-2024.8-py3-none-any.whl", hash = "sha256:0057fe9a5c398d3f580e73e58793a1d4a8315ca91c3df01efea1c14ed557825a"}, - {file = "pyinstaller_hooks_contrib-2024.8.tar.gz", hash = "sha256:29b68d878ab739e967055b56a93eb9b58e529d5b054fbab7a2f2bacf80cef3e2"}, + {file = "pyinstaller_hooks_contrib-2024.10-py3-none-any.whl", hash = "sha256:ad47db0e153683b4151e10d231cb91f2d93c85079e78d76d9e0f57ac6c8a5e10"}, + {file = "pyinstaller_hooks_contrib-2024.10.tar.gz", hash = "sha256:8a46655e5c5b0186b5e527399118a9b342f10513eb1425c483fa4f6d02e8800c"}, ] [package.dependencies] @@ -744,13 +755,13 @@ files = [ [[package]] name = "sentry-sdk" -version = "2.16.0" +version = "2.19.2" description = "Python client for Sentry (https://sentry.io)" optional = false python-versions = ">=3.6" files = [ - {file = "sentry_sdk-2.16.0-py2.py3-none-any.whl", hash = "sha256:49139c31ebcd398f4f6396b18910610a0c1602f6e67083240c33019d1f6aa30c"}, - {file = "sentry_sdk-2.16.0.tar.gz", hash = "sha256:90f733b32e15dfc1999e6b7aca67a38688a567329de4d6e184154a73f96c6892"}, + {file = "sentry_sdk-2.19.2-py2.py3-none-any.whl", hash = "sha256:ebdc08228b4d131128e568d696c210d846e5b9d70aa0327dec6b1272d9d40b84"}, + {file = "sentry_sdk-2.19.2.tar.gz", hash = "sha256:467df6e126ba242d39952375dd816fbee0f217d119bf454a8ce74cf1e7909e8d"}, ] [package.dependencies] @@ -776,14 +787,16 @@ grpcio = ["grpcio (>=1.21.1)", "protobuf (>=3.8.0)"] http2 = ["httpcore[http2] (==1.*)"] httpx = ["httpx (>=0.16.0)"] huey = ["huey (>=2)"] -huggingface-hub = ["huggingface-hub (>=0.22)"] +huggingface-hub = ["huggingface_hub (>=0.22)"] langchain = ["langchain (>=0.0.210)"] +launchdarkly = ["launchdarkly-server-sdk (>=9.8.0)"] litestar = ["litestar (>=2.0.0)"] loguru = ["loguru (>=0.5)"] openai = ["openai (>=1.0.0)", "tiktoken (>=0.3.0)"] +openfeature = ["openfeature-sdk (>=0.7.1)"] opentelemetry = ["opentelemetry-distro (>=0.35b0)"] opentelemetry-experimental = ["opentelemetry-distro"] -pure-eval = ["asttokens", "executing", "pure-eval"] +pure-eval = ["asttokens", "executing", "pure_eval"] pymongo = ["pymongo (>=3.1)"] pyspark = ["pyspark (>=2.4.4)"] quart = ["blinker (>=1.1)", "quart (>=0.16.1)"] @@ -796,33 +809,33 @@ tornado = ["tornado (>=6)"] [[package]] name = "setuptools" -version = "75.1.0" +version = "75.3.0" description = "Easily download, build, install, upgrade, and uninstall Python packages" optional = false python-versions = ">=3.8" files = [ - {file = "setuptools-75.1.0-py3-none-any.whl", hash = "sha256:35ab7fd3bcd95e6b7fd704e4a1539513edad446c097797f2985e0e4b960772f2"}, - {file = "setuptools-75.1.0.tar.gz", hash = "sha256:d59a21b17a275fb872a9c3dae73963160ae079f1049ed956880cd7c09b120538"}, + {file = "setuptools-75.3.0-py3-none-any.whl", hash = "sha256:f2504966861356aa38616760c0f66568e535562374995367b4e69c7143cf6bcd"}, + {file = "setuptools-75.3.0.tar.gz", hash = "sha256:fba5dd4d766e97be1b1681d98712680ae8f2f26d7881245f2ce9e40714f1a686"}, ] [package.extras] check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1)", "ruff (>=0.5.2)"] -core = ["importlib-metadata (>=6)", "importlib-resources (>=5.10.2)", "jaraco.collections", "jaraco.functools", "jaraco.text (>=3.7)", "more-itertools", "more-itertools (>=8.8)", "packaging", "packaging (>=24)", "platformdirs (>=2.6.2)", "tomli (>=2.0.1)", "wheel (>=0.43.0)"] +core = ["importlib-metadata (>=6)", "importlib-resources (>=5.10.2)", "jaraco.collections", "jaraco.functools", "jaraco.text (>=3.7)", "more-itertools", "more-itertools (>=8.8)", "packaging", "packaging (>=24)", "platformdirs (>=4.2.2)", "tomli (>=2.0.1)", "wheel (>=0.43.0)"] cover = ["pytest-cov"] doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "pyproject-hooks (!=1.1)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (>=1,<2)", "sphinx-reredirects", "sphinxcontrib-towncrier", "towncrier (<24.7)"] enabler = ["pytest-enabler (>=2.2)"] -test = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "ini2toml[lite] (>=0.14)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "jaraco.test", "packaging (>=23.2)", "pip (>=19.1)", "pyproject-hooks (!=1.1)", "pytest (>=6,!=8.1.*)", "pytest-home (>=0.5)", "pytest-perf", "pytest-subprocess", "pytest-timeout", "pytest-xdist (>=3)", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel (>=0.44.0)"] -type = ["importlib-metadata (>=7.0.2)", "jaraco.develop (>=7.21)", "mypy (==1.11.*)", "pytest-mypy"] +test = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "ini2toml[lite] (>=0.14)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "jaraco.test (>=5.5)", "packaging (>=23.2)", "pip (>=19.1)", "pyproject-hooks (!=1.1)", "pytest (>=6,!=8.1.*)", "pytest-home (>=0.5)", "pytest-perf", "pytest-subprocess", "pytest-timeout", "pytest-xdist (>=3)", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel (>=0.44.0)"] +type = ["importlib-metadata (>=7.0.2)", "jaraco.develop (>=7.21)", "mypy (==1.12.*)", "pytest-mypy"] [[package]] name = "six" -version = "1.16.0" +version = "1.17.0" description = "Python 2 and 3 compatibility utilities" optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7" files = [ - {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"}, - {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"}, + {file = "six-1.17.0-py2.py3-none-any.whl", hash = "sha256:4721f391ed90541fddacab5acf947aa0d3dc7d27b2e1e8eda2be8970586c3274"}, + {file = "six-1.17.0.tar.gz", hash = "sha256:ff70335d468e7eb6ec65b95b99d3a2836546063f63acc5171de367e834932a81"}, ] [[package]] @@ -849,24 +862,54 @@ files = [ [[package]] name = "tomli" -version = "2.0.2" +version = "2.2.1" description = "A lil' TOML parser" optional = false python-versions = ">=3.8" files = [ - {file = "tomli-2.0.2-py3-none-any.whl", hash = "sha256:2ebe24485c53d303f690b0ec092806a085f07af5a5aa1464f3931eec36caaa38"}, - {file = "tomli-2.0.2.tar.gz", hash = "sha256:d46d457a85337051c36524bc5349dd91b1877838e2979ac5ced3e710ed8a60ed"}, + {file = "tomli-2.2.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:678e4fa69e4575eb77d103de3df8a895e1591b48e740211bd1067378c69e8249"}, + {file = "tomli-2.2.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:023aa114dd824ade0100497eb2318602af309e5a55595f76b626d6d9f3b7b0a6"}, + {file = "tomli-2.2.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ece47d672db52ac607a3d9599a9d48dcb2f2f735c6c2d1f34130085bb12b112a"}, + {file = "tomli-2.2.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6972ca9c9cc9f0acaa56a8ca1ff51e7af152a9f87fb64623e31d5c83700080ee"}, + {file = "tomli-2.2.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c954d2250168d28797dd4e3ac5cf812a406cd5a92674ee4c8f123c889786aa8e"}, + {file = "tomli-2.2.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:8dd28b3e155b80f4d54beb40a441d366adcfe740969820caf156c019fb5c7ec4"}, + {file = "tomli-2.2.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:e59e304978767a54663af13c07b3d1af22ddee3bb2fb0618ca1593e4f593a106"}, + {file = "tomli-2.2.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:33580bccab0338d00994d7f16f4c4ec25b776af3ffaac1ed74e0b3fc95e885a8"}, + {file = "tomli-2.2.1-cp311-cp311-win32.whl", hash = "sha256:465af0e0875402f1d226519c9904f37254b3045fc5084697cefb9bdde1ff99ff"}, + {file = "tomli-2.2.1-cp311-cp311-win_amd64.whl", hash = "sha256:2d0f2fdd22b02c6d81637a3c95f8cd77f995846af7414c5c4b8d0545afa1bc4b"}, + {file = "tomli-2.2.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:4a8f6e44de52d5e6c657c9fe83b562f5f4256d8ebbfe4ff922c495620a7f6cea"}, + {file = "tomli-2.2.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:8d57ca8095a641b8237d5b079147646153d22552f1c637fd3ba7f4b0b29167a8"}, + {file = "tomli-2.2.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4e340144ad7ae1533cb897d406382b4b6fede8890a03738ff1683af800d54192"}, + {file = "tomli-2.2.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:db2b95f9de79181805df90bedc5a5ab4c165e6ec3fe99f970d0e302f384ad222"}, + {file = "tomli-2.2.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:40741994320b232529c802f8bc86da4e1aa9f413db394617b9a256ae0f9a7f77"}, + {file = "tomli-2.2.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:400e720fe168c0f8521520190686ef8ef033fb19fc493da09779e592861b78c6"}, + {file = "tomli-2.2.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:02abe224de6ae62c19f090f68da4e27b10af2b93213d36cf44e6e1c5abd19fdd"}, + {file = "tomli-2.2.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:b82ebccc8c8a36f2094e969560a1b836758481f3dc360ce9a3277c65f374285e"}, + {file = "tomli-2.2.1-cp312-cp312-win32.whl", hash = "sha256:889f80ef92701b9dbb224e49ec87c645ce5df3fa2cc548664eb8a25e03127a98"}, + {file = "tomli-2.2.1-cp312-cp312-win_amd64.whl", hash = "sha256:7fc04e92e1d624a4a63c76474610238576942d6b8950a2d7f908a340494e67e4"}, + {file = "tomli-2.2.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f4039b9cbc3048b2416cc57ab3bda989a6fcf9b36cf8937f01a6e731b64f80d7"}, + {file = "tomli-2.2.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:286f0ca2ffeeb5b9bd4fcc8d6c330534323ec51b2f52da063b11c502da16f30c"}, + {file = "tomli-2.2.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a92ef1a44547e894e2a17d24e7557a5e85a9e1d0048b0b5e7541f76c5032cb13"}, + {file = "tomli-2.2.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9316dc65bed1684c9a98ee68759ceaed29d229e985297003e494aa825ebb0281"}, + {file = "tomli-2.2.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e85e99945e688e32d5a35c1ff38ed0b3f41f43fad8df0bdf79f72b2ba7bc5272"}, + {file = "tomli-2.2.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:ac065718db92ca818f8d6141b5f66369833d4a80a9d74435a268c52bdfa73140"}, + {file = "tomli-2.2.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:d920f33822747519673ee656a4b6ac33e382eca9d331c87770faa3eef562aeb2"}, + {file = "tomli-2.2.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:a198f10c4d1b1375d7687bc25294306e551bf1abfa4eace6650070a5c1ae2744"}, + {file = "tomli-2.2.1-cp313-cp313-win32.whl", hash = "sha256:d3f5614314d758649ab2ab3a62d4f2004c825922f9e370b29416484086b264ec"}, + {file = "tomli-2.2.1-cp313-cp313-win_amd64.whl", hash = "sha256:a38aa0308e754b0e3c67e344754dff64999ff9b513e691d0e786265c93583c69"}, + {file = "tomli-2.2.1-py3-none-any.whl", hash = "sha256:cb55c73c5f4408779d0cf3eef9f762b9c9f147a77de7b258bef0a5628adc85cc"}, + {file = "tomli-2.2.1.tar.gz", hash = "sha256:cd45e1dc79c835ce60f7404ec8119f2eb06d38b1deba146f07ced3bbc44505ff"}, ] [[package]] name = "types-python-dateutil" -version = "2.9.0.20241003" +version = "2.9.0.20241206" description = "Typing stubs for python-dateutil" optional = false python-versions = ">=3.8" files = [ - {file = "types-python-dateutil-2.9.0.20241003.tar.gz", hash = "sha256:58cb85449b2a56d6684e41aeefb4c4280631246a0da1a719bdbe6f3fb0317446"}, - {file = "types_python_dateutil-2.9.0.20241003-py3-none-any.whl", hash = "sha256:250e1d8e80e7bbc3a6c99b907762711d1a1cdd00e978ad39cb5940f6f0a87f3d"}, + {file = "types_python_dateutil-2.9.0.20241206-py3-none-any.whl", hash = "sha256:e248a4bc70a486d3e3ec84d0dc30eec3a5f979d6e7ee4123ae043eedbb987f53"}, + {file = "types_python_dateutil-2.9.0.20241206.tar.gz", hash = "sha256:18f493414c26ffba692a72369fea7a154c502646301ebfe3d56a04b3767284cb"}, ] [[package]] @@ -918,4 +961,4 @@ type = ["pytest-mypy"] [metadata] lock-version = "2.0" python-versions = ">=3.8,<3.14" -content-hash = "6e23c9650b529e0c928f90a17d549d73b8418e11a86c2a1c9213f7582faa7e17" +content-hash = "e79b70897118c6b6ae75a62d6d85bdc28a5f345871600dedc5e191c3cf3dbdfd" diff --git a/pyproject.toml b/pyproject.toml index 9c1e1f9a..32007de5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -48,6 +48,7 @@ pytest = ">=7.3.1,<7.4.0" pytest-mock = ">=3.10.0,<3.11.0" coverage = ">=7.2.3,<7.3.0" responses = ">=0.23.1,<0.24.0" +pyfakefs = ">=5.7.2,<5.8.0" [tool.poetry.group.executable.dependencies] pyinstaller = {version=">=5.13.2,<5.14.0", python=">=3.8,<3.13"} diff --git a/tests/cli/files_collector/__init__.py b/tests/cli/files_collector/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/cli/files_collector/test_path_documents.py b/tests/cli/files_collector/test_path_documents.py new file mode 100644 index 00000000..f830a0d4 --- /dev/null +++ b/tests/cli/files_collector/test_path_documents.py @@ -0,0 +1,80 @@ +from typing import TYPE_CHECKING + +from cycode.cli.files_collector.path_documents import ( + _collect_top_level_ignore_files, + _get_global_ignore_patterns, + _walk_to_top, +) + +if TYPE_CHECKING: + from pyfakefs.fake_filesystem import FakeFilesystem + + +def test_walk_to_top() -> None: + path = '/a/b/c/d/e/f/g' + result = list(_walk_to_top(path)) + assert result == ['/a/b/c/d/e/f/g', '/a/b/c/d/e/f', '/a/b/c/d/e', '/a/b/c/d', '/a/b/c', '/a/b', '/a', '/'] + + path = '/a/b/c' + result = list(_walk_to_top(path)) + assert result == ['/a/b/c', '/a/b', '/a', '/'] + + path = '/a' + result = list(_walk_to_top(path)) + assert result == ['/a', '/'] + + path = '/' + result = list(_walk_to_top(path)) + assert result == ['/'] + + path = 'a' + result = list(_walk_to_top(path)) + assert result == ['a'] + + +def _create_mocked_file_structure(fs: 'FakeFilesystem') -> None: + fs.create_dir('/home/user/project') + fs.create_dir('/home/user/.git') + fs.create_file('/home/user/project/.gitignore', contents='*.pyc') + fs.create_file('/home/user/project/.cycodeignore', contents='*.log') + fs.create_dir('/home/user/project/subdir') + fs.create_file('/home/user/project/subdir/.gitignore', contents='*.txt') + + +def test_collect_top_level_ignore_files(fs: 'FakeFilesystem') -> None: + _create_mocked_file_structure(fs) + + # Test with path inside the project + path = '/home/user/project/subdir' + ignore_files = _collect_top_level_ignore_files(path) + + assert len(ignore_files) == 3 + assert '/home/user/project/subdir/.gitignore' in ignore_files + assert '/home/user/project/.gitignore' in ignore_files + assert '/home/user/project/.cycodeignore' in ignore_files + + # Test with a path that does not have any ignore files + fs.remove('/home/user/project/.gitignore') + path = '/home/user' + ignore_files = _collect_top_level_ignore_files(path) + + assert len(ignore_files) == 0 + + # Test with path at the top level with no ignore files + path = '/home/user/.git' + ignore_files = _collect_top_level_ignore_files(path) + + assert len(ignore_files) == 0 + + # Test with path at the top level with a .gitignore + path = '/home/user/project' + ignore_files = _collect_top_level_ignore_files(path) + + assert len(ignore_files) == 1 + assert '/home/user/project/.cycodeignore' in ignore_files + + +def test_get_global_ignore_patterns(fs: 'FakeFilesystem') -> None: + _create_mocked_file_structure(fs) + ignore_patterns = _get_global_ignore_patterns('/home/user/project/subdir') + assert ignore_patterns == ['*.txt', '*.pyc', '*.log'] From 747950a43331ab1edfd38b511b03c7fc8772a20f Mon Sep 17 00:00:00 2001 From: Ilya Siamionau Date: Tue, 10 Dec 2024 19:26:58 +0100 Subject: [PATCH 02/16] fix ruff --- cycode/cli/files_collector/path_documents.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/cycode/cli/files_collector/path_documents.py b/cycode/cli/files_collector/path_documents.py index 978169d4..2466d6bf 100644 --- a/cycode/cli/files_collector/path_documents.py +++ b/cycode/cli/files_collector/path_documents.py @@ -1,6 +1,6 @@ import os from collections import defaultdict -from typing import Set, TYPE_CHECKING, Iterable, List, Tuple +from typing import TYPE_CHECKING, Iterable, List, Set, Tuple import pathspec @@ -76,7 +76,12 @@ def _get_all_existing_files_in_directory(path: str, *, apply_ignore_patterns: bo path_to_ignore_patterns[root].extend(get_file_content(filepath).splitlines()) if apply_ignore_patterns and root in path_to_ignore_patterns: - filtered_paths = _apply_ignore_patterns(path_to_ignore_patterns[root], {filepath,}) + filtered_paths = _apply_ignore_patterns( + path_to_ignore_patterns[root], + { + filepath, + }, + ) if filtered_paths: files.update(filtered_paths) else: From 51522ba2d33ec811c33c39f0ce342086fc6610c7 Mon Sep 17 00:00:00 2001 From: Ilya Siamionau Date: Tue, 10 Dec 2024 19:29:12 +0100 Subject: [PATCH 03/16] make test deterministic --- tests/cli/files_collector/test_path_documents.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/cli/files_collector/test_path_documents.py b/tests/cli/files_collector/test_path_documents.py index f830a0d4..8a314703 100644 --- a/tests/cli/files_collector/test_path_documents.py +++ b/tests/cli/files_collector/test_path_documents.py @@ -77,4 +77,8 @@ def test_collect_top_level_ignore_files(fs: 'FakeFilesystem') -> None: def test_get_global_ignore_patterns(fs: 'FakeFilesystem') -> None: _create_mocked_file_structure(fs) ignore_patterns = _get_global_ignore_patterns('/home/user/project/subdir') - assert ignore_patterns == ['*.txt', '*.pyc', '*.log'] + + assert len(ignore_patterns) == 3 + assert '*.txt' in ignore_patterns + assert '*.pyc' in ignore_patterns + assert '*.log' in ignore_patterns From 8438f2f4ff5a4119c43508f346e3f33e60cb1e18 Mon Sep 17 00:00:00 2001 From: Ilya Siamionau Date: Wed, 11 Dec 2024 15:59:15 +0100 Subject: [PATCH 04/16] fix tests on Windows --- .../files_collector/test_path_documents.py | 51 +++++++++++-------- 1 file changed, 30 insertions(+), 21 deletions(-) diff --git a/tests/cli/files_collector/test_path_documents.py b/tests/cli/files_collector/test_path_documents.py index 8a314703..304ff813 100644 --- a/tests/cli/files_collector/test_path_documents.py +++ b/tests/cli/files_collector/test_path_documents.py @@ -1,3 +1,4 @@ +from os.path import normpath from typing import TYPE_CHECKING from cycode.cli.files_collector.path_documents import ( @@ -10,26 +11,34 @@ from pyfakefs.fake_filesystem import FakeFilesystem -def test_walk_to_top() -> None: - path = '/a/b/c/d/e/f/g' - result = list(_walk_to_top(path)) - assert result == ['/a/b/c/d/e/f/g', '/a/b/c/d/e/f', '/a/b/c/d/e', '/a/b/c/d', '/a/b/c', '/a/b', '/a', '/'] +# we are using normpath() in every test to provide multi-platform support - path = '/a/b/c' - result = list(_walk_to_top(path)) - assert result == ['/a/b/c', '/a/b', '/a', '/'] - path = '/a' +def test_walk_to_top() -> None: + path = normpath('/a/b/c/d/e/f/g') + result = list(_walk_to_top(path)) + assert result == [ + normpath('/a/b/c/d/e/f/g'), + normpath('/a/b/c/d/e/f'), + normpath('/a/b/c/d/e'), + normpath('/a/b/c/d'), + normpath('/a/b/c'), + normpath('/a/b'), + normpath('/a'), + normpath('/'), + ] + + path = normpath('/a') result = list(_walk_to_top(path)) - assert result == ['/a', '/'] + assert result == [normpath('/a'), normpath('/')] - path = '/' + path = normpath('/') result = list(_walk_to_top(path)) - assert result == ['/'] + assert result == [normpath('/')] - path = 'a' + path = normpath('a') result = list(_walk_to_top(path)) - assert result == ['a'] + assert result == [normpath('a')] def _create_mocked_file_structure(fs: 'FakeFilesystem') -> None: @@ -45,33 +54,33 @@ def test_collect_top_level_ignore_files(fs: 'FakeFilesystem') -> None: _create_mocked_file_structure(fs) # Test with path inside the project - path = '/home/user/project/subdir' + path = normpath('/home/user/project/subdir') ignore_files = _collect_top_level_ignore_files(path) assert len(ignore_files) == 3 - assert '/home/user/project/subdir/.gitignore' in ignore_files - assert '/home/user/project/.gitignore' in ignore_files - assert '/home/user/project/.cycodeignore' in ignore_files + assert normpath('/home/user/project/subdir/.gitignore') in ignore_files + assert normpath('/home/user/project/.gitignore') in ignore_files + assert normpath('/home/user/project/.cycodeignore') in ignore_files # Test with a path that does not have any ignore files fs.remove('/home/user/project/.gitignore') - path = '/home/user' + path = normpath('/home/user') ignore_files = _collect_top_level_ignore_files(path) assert len(ignore_files) == 0 # Test with path at the top level with no ignore files - path = '/home/user/.git' + path = normpath('/home/user/.git') ignore_files = _collect_top_level_ignore_files(path) assert len(ignore_files) == 0 # Test with path at the top level with a .gitignore - path = '/home/user/project' + path = normpath('/home/user/project') ignore_files = _collect_top_level_ignore_files(path) assert len(ignore_files) == 1 - assert '/home/user/project/.cycodeignore' in ignore_files + assert normpath('/home/user/project/.cycodeignore') in ignore_files def test_get_global_ignore_patterns(fs: 'FakeFilesystem') -> None: From 1e9de19ea257b74ac65580ac60dbb3857e95987f Mon Sep 17 00:00:00 2001 From: Ilya Siamionau Date: Wed, 11 Dec 2024 18:36:49 +0100 Subject: [PATCH 05/16] perf optimization; code refactor --- cycode/cli/files_collector/path_documents.py | 95 +++---------------- cycode/cli/files_collector/walk_ignore.py | 73 ++++++++++++++ ..._path_documents.py => test_walk_ignore.py} | 10 +- 3 files changed, 95 insertions(+), 83 deletions(-) create mode 100644 cycode/cli/files_collector/walk_ignore.py rename tests/cli/files_collector/{test_path_documents.py => test_walk_ignore.py} (90%) diff --git a/cycode/cli/files_collector/path_documents.py b/cycode/cli/files_collector/path_documents.py index 2466d6bf..27d24cd8 100644 --- a/cycode/cli/files_collector/path_documents.py +++ b/cycode/cli/files_collector/path_documents.py @@ -1,6 +1,5 @@ import os -from collections import defaultdict -from typing import TYPE_CHECKING, Iterable, List, Set, Tuple +from typing import TYPE_CHECKING, Iterable, List, Optional, Tuple import pathspec @@ -11,6 +10,7 @@ is_iac, is_tfplan_file, ) +from cycode.cli.files_collector.walk_ignore import walk_ignore from cycode.cli.models import Document from cycode.cli.utils.path_utils import get_absolute_path, get_file_content from cycode.cyclient import logger @@ -19,82 +19,18 @@ from cycode.cli.utils.progress_bar import BaseProgressBar, ProgressBarSection -def _walk_to_top(path: str) -> Iterable[str]: - while os.path.dirname(path) != path: - yield path - path = os.path.dirname(path) +def _get_all_existing_files_in_directory(path: str, *, walk_with_ignore_patterns: bool = True) -> List[str]: + files: List[str] = [] - if path: - yield path # Include the top-level directory - - -_SUPPORTED_IGNORE_PATTERN_FILES = {'.gitignore', '.cycodeignore'} - - -def _collect_top_level_ignore_files(path: str) -> List[str]: - ignore_files = [] - for dir_path in _walk_to_top(path): - for ignore_file in _SUPPORTED_IGNORE_PATTERN_FILES: - ignore_file_path = os.path.join(dir_path, ignore_file) - if os.path.exists(ignore_file_path): - logger.debug('Found top level ignore file: %s', ignore_file_path) - ignore_files.append(ignore_file_path) - return ignore_files - - -def _get_global_ignore_patterns(path: str) -> List[str]: - ignore_patterns = [] - for ignore_file in _collect_top_level_ignore_files(path): - file_patterns = get_file_content(ignore_file).splitlines() - ignore_patterns.extend(file_patterns) - return ignore_patterns - - -def _apply_ignore_patterns(ignore_patterns: List[str], files: Set[str]) -> Set[str]: - if not ignore_patterns: - return files - - path_spec = pathspec.PathSpec.from_lines(pathspec.patterns.GitWildMatchPattern, ignore_patterns) - excluded_file_paths = set(path_spec.match_files(files)) - - return files - excluded_file_paths - - -def _get_all_existing_files_in_directory(path: str, *, apply_ignore_patterns: bool = True) -> Set[str]: - files: Set[str] = set() - - global_ignore_patterns = _get_global_ignore_patterns(path) - path_to_ignore_patterns = defaultdict(list) - - for root, _, filenames in os.walk(path): + walk_func = walk_ignore if walk_with_ignore_patterns else os.walk + for root, _, filenames in walk_func(path): for filename in filenames: - filepath = os.path.join(root, filename) - - if filepath in _SUPPORTED_IGNORE_PATTERN_FILES: - logger.debug('Found ignore file: %s', filepath) - # TODO(MarshalX): accumulate ignore pattern from previous levels - path_to_ignore_patterns[root].extend(get_file_content(filepath).splitlines()) - - if apply_ignore_patterns and root in path_to_ignore_patterns: - filtered_paths = _apply_ignore_patterns( - path_to_ignore_patterns[root], - { - filepath, - }, - ) - if filtered_paths: - files.update(filtered_paths) - else: - files.add(os.path.join(root, filename)) - - if apply_ignore_patterns: - logger.debug('Applying global ignore patterns %s', {'global_ignore_patterns': global_ignore_patterns}) - return _apply_ignore_patterns(global_ignore_patterns, files) + files.append(os.path.join(root, filename)) return files -def _get_relevant_files_in_path(path: str, exclude_patterns: Iterable[str]) -> List[str]: +def _get_relevant_files_in_path(path: str, exclude_patterns: Optional[Iterable[str]] = None) -> List[str]: absolute_path = get_absolute_path(path) if not os.path.isfile(absolute_path) and not os.path.isdir(absolute_path): @@ -103,14 +39,13 @@ def _get_relevant_files_in_path(path: str, exclude_patterns: Iterable[str]) -> L if os.path.isfile(absolute_path): return [absolute_path] - all_file_paths = _get_all_existing_files_in_directory(absolute_path) - - path_spec = pathspec.PathSpec.from_lines(pathspec.patterns.GitWildMatchPattern, exclude_patterns) - excluded_file_paths = set(path_spec.match_files(all_file_paths)) + file_paths = _get_all_existing_files_in_directory(absolute_path) - relevant_file_paths = all_file_paths - excluded_file_paths + if exclude_patterns: + path_spec = pathspec.PathSpec.from_lines(pathspec.patterns.GitWildMatchPattern, exclude_patterns) + file_paths = path_spec.match_files(file_paths, negate=True) - return [file_path for file_path in relevant_file_paths if os.path.isfile(file_path)] + return [file_path for file_path in file_paths if os.path.isfile(file_path)] def _get_relevant_files( @@ -118,9 +53,7 @@ def _get_relevant_files( ) -> List[str]: all_files_to_scan = [] for path in paths: - all_files_to_scan.extend( - _get_relevant_files_in_path(path=path, exclude_patterns=['**/.git/**', '**/.cycode/**']) - ) + all_files_to_scan.extend(_get_relevant_files_in_path(path)) # we are double the progress bar section length because we are going to process the files twice # first time to get the file list with respect of excluded patterns (excluding takes seconds to execute) diff --git a/cycode/cli/files_collector/walk_ignore.py b/cycode/cli/files_collector/walk_ignore.py new file mode 100644 index 00000000..ac31e006 --- /dev/null +++ b/cycode/cli/files_collector/walk_ignore.py @@ -0,0 +1,73 @@ +import os +from collections import defaultdict +from typing import Iterable, List + +import pathspec +from pathspec.util import StrPath + +from cycode.cli.utils.path_utils import get_file_content +from cycode.cyclient import logger + +_SUPPORTED_IGNORE_PATTERN_FILES = {'.gitignore', '.cycodeignore'} +_DEFAULT_GLOBAL_IGNORE_PATTERNS = [ + '.git', + '.cycode', + '**/.git/**', + '**/.cycode/**', +] + + +def _walk_to_top(path: str) -> Iterable[str]: + while os.path.dirname(path) != path: + yield path + path = os.path.dirname(path) + + if path: + yield path # Include the top-level directory + + +def _collect_top_level_ignore_files(path: str) -> List[str]: + ignore_files = [] + for dir_path in _walk_to_top(path): + for ignore_file in _SUPPORTED_IGNORE_PATTERN_FILES: + ignore_file_path = os.path.join(dir_path, ignore_file) + if os.path.exists(ignore_file_path): + logger.debug('Apply top level ignore file: %s', ignore_file_path) + ignore_files.append(ignore_file_path) + return ignore_files + + +def _get_global_ignore_patterns(path: str) -> List[str]: + ignore_patterns = _DEFAULT_GLOBAL_IGNORE_PATTERNS.copy() + for ignore_file in _collect_top_level_ignore_files(path): + file_patterns = get_file_content(ignore_file).splitlines() + ignore_patterns.extend(file_patterns) + return ignore_patterns + + +def _should_include_path(ignore_patterns: List[str], path: StrPath) -> bool: + path_spec = pathspec.PathSpec.from_lines(pathspec.patterns.GitWildMatchPattern, ignore_patterns) + return not path_spec.match_file(path) # works with both files and directories; negative match + + +def walk_ignore(path: str) -> List[str]: + global_ignore_patterns = _get_global_ignore_patterns(path) + path_to_ignore_patterns = defaultdict(list) + + for dirpath, dirnames, filenames in os.walk(path, topdown=True): + # finds and processes ignore files first to get the patterns + for filename in filenames: + filepath = os.path.join(dirpath, filename) + if filename in _SUPPORTED_IGNORE_PATTERN_FILES: + logger.debug('Apply ignore file: %s', filepath) + # TODO(MarshalX): accumulate ignore pattern from previous levels + path_to_ignore_patterns[dirpath].extend(get_file_content(filepath).splitlines()) + + ignore_patterns = global_ignore_patterns + path_to_ignore_patterns.get(dirpath, []) + + # decrease recursion depth of os.walk() because of topdown=True by changing the list in-place + # slicing ([:]) is mandatory to change dict in-place! + dirnames[:] = [d for d in dirnames if _should_include_path(ignore_patterns, d)] + filenames[:] = [f for f in filenames if _should_include_path(ignore_patterns, f)] + + yield dirpath, dirnames, filenames diff --git a/tests/cli/files_collector/test_path_documents.py b/tests/cli/files_collector/test_walk_ignore.py similarity index 90% rename from tests/cli/files_collector/test_path_documents.py rename to tests/cli/files_collector/test_walk_ignore.py index 304ff813..dd854eda 100644 --- a/tests/cli/files_collector/test_path_documents.py +++ b/tests/cli/files_collector/test_walk_ignore.py @@ -1,7 +1,7 @@ from os.path import normpath from typing import TYPE_CHECKING -from cycode.cli.files_collector.path_documents import ( +from cycode.cli.files_collector.walk_ignore import ( _collect_top_level_ignore_files, _get_global_ignore_patterns, _walk_to_top, @@ -87,7 +87,13 @@ def test_get_global_ignore_patterns(fs: 'FakeFilesystem') -> None: _create_mocked_file_structure(fs) ignore_patterns = _get_global_ignore_patterns('/home/user/project/subdir') - assert len(ignore_patterns) == 3 + assert len(ignore_patterns) == 7 + # default global: + assert '.git' in ignore_patterns + assert '.cycode' in ignore_patterns + assert '**/.git/**' in ignore_patterns + assert '**/.cycode/**' in ignore_patterns + # additional: assert '*.txt' in ignore_patterns assert '*.pyc' in ignore_patterns assert '*.log' in ignore_patterns From 7cb0f1556052754eaf03c044568fc1f264dc9d8a Mon Sep 17 00:00:00 2001 From: Ilya Siamionau Date: Wed, 11 Dec 2024 18:41:55 +0100 Subject: [PATCH 06/16] fix ignoring; simplify global ignoring patterns --- cycode/cli/files_collector/walk_ignore.py | 10 ++++------ tests/cli/files_collector/test_walk_ignore.py | 8 +++----- 2 files changed, 7 insertions(+), 11 deletions(-) diff --git a/cycode/cli/files_collector/walk_ignore.py b/cycode/cli/files_collector/walk_ignore.py index ac31e006..c23a8ef7 100644 --- a/cycode/cli/files_collector/walk_ignore.py +++ b/cycode/cli/files_collector/walk_ignore.py @@ -10,10 +10,8 @@ _SUPPORTED_IGNORE_PATTERN_FILES = {'.gitignore', '.cycodeignore'} _DEFAULT_GLOBAL_IGNORE_PATTERNS = [ - '.git', - '.cycode', - '**/.git/**', - '**/.cycode/**', + '**/.git', + '**/.cycode', ] @@ -67,7 +65,7 @@ def walk_ignore(path: str) -> List[str]: # decrease recursion depth of os.walk() because of topdown=True by changing the list in-place # slicing ([:]) is mandatory to change dict in-place! - dirnames[:] = [d for d in dirnames if _should_include_path(ignore_patterns, d)] - filenames[:] = [f for f in filenames if _should_include_path(ignore_patterns, f)] + dirnames[:] = [d for d in dirnames if _should_include_path(ignore_patterns, os.path.join(dirpath, d))] + filenames[:] = [f for f in filenames if _should_include_path(ignore_patterns, os.path.join(dirpath, f))] yield dirpath, dirnames, filenames diff --git a/tests/cli/files_collector/test_walk_ignore.py b/tests/cli/files_collector/test_walk_ignore.py index dd854eda..23901d36 100644 --- a/tests/cli/files_collector/test_walk_ignore.py +++ b/tests/cli/files_collector/test_walk_ignore.py @@ -87,12 +87,10 @@ def test_get_global_ignore_patterns(fs: 'FakeFilesystem') -> None: _create_mocked_file_structure(fs) ignore_patterns = _get_global_ignore_patterns('/home/user/project/subdir') - assert len(ignore_patterns) == 7 + assert len(ignore_patterns) == 5 # default global: - assert '.git' in ignore_patterns - assert '.cycode' in ignore_patterns - assert '**/.git/**' in ignore_patterns - assert '**/.cycode/**' in ignore_patterns + assert '**/.git' in ignore_patterns + assert '**/.cycode' in ignore_patterns # additional: assert '*.txt' in ignore_patterns assert '*.pyc' in ignore_patterns From 5813c916a0ca736a93c44a8e1f91144c7f472f9f Mon Sep 17 00:00:00 2001 From: Ilya Siamionau Date: Wed, 11 Dec 2024 19:04:45 +0100 Subject: [PATCH 07/16] add inheriting of ignore patterns --- cycode/cli/files_collector/walk_ignore.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/cycode/cli/files_collector/walk_ignore.py b/cycode/cli/files_collector/walk_ignore.py index c23a8ef7..21299bbc 100644 --- a/cycode/cli/files_collector/walk_ignore.py +++ b/cycode/cli/files_collector/walk_ignore.py @@ -1,6 +1,6 @@ import os from collections import defaultdict -from typing import Iterable, List +from typing import Generator, Iterable, List, Tuple import pathspec from pathspec.util import StrPath @@ -48,7 +48,7 @@ def _should_include_path(ignore_patterns: List[str], path: StrPath) -> bool: return not path_spec.match_file(path) # works with both files and directories; negative match -def walk_ignore(path: str) -> List[str]: +def walk_ignore(path: str) -> Generator[Tuple[str, List[str], List[str]], None, None]: global_ignore_patterns = _get_global_ignore_patterns(path) path_to_ignore_patterns = defaultdict(list) @@ -58,7 +58,14 @@ def walk_ignore(path: str) -> List[str]: filepath = os.path.join(dirpath, filename) if filename in _SUPPORTED_IGNORE_PATTERN_FILES: logger.debug('Apply ignore file: %s', filepath) - # TODO(MarshalX): accumulate ignore pattern from previous levels + + parent_dir = os.path.dirname(dirpath) + if dirpath not in path_to_ignore_patterns and parent_dir in path_to_ignore_patterns: + # inherit ignore patterns from parent directory on first occurrence + logger.debug('Inherit ignore patterns: %s', {'inherit_from': parent_dir, 'inherit_to': dirpath}) + path_to_ignore_patterns[dirpath].extend(path_to_ignore_patterns[parent_dir]) + + # always read ignore patterns for the current directory path_to_ignore_patterns[dirpath].extend(get_file_content(filepath).splitlines()) ignore_patterns = global_ignore_patterns + path_to_ignore_patterns.get(dirpath, []) From 0724e5e542fd0859a0bd65e83f1270a64f91bc80 Mon Sep 17 00:00:00 2001 From: Ilya Siamionau Date: Wed, 11 Dec 2024 19:38:55 +0100 Subject: [PATCH 08/16] cover walk_ignore generator with tests --- tests/cli/files_collector/test_walk_ignore.py | 79 +++++++++++++++++-- 1 file changed, 73 insertions(+), 6 deletions(-) diff --git a/tests/cli/files_collector/test_walk_ignore.py b/tests/cli/files_collector/test_walk_ignore.py index 23901d36..87d6a9de 100644 --- a/tests/cli/files_collector/test_walk_ignore.py +++ b/tests/cli/files_collector/test_walk_ignore.py @@ -1,10 +1,12 @@ +import os from os.path import normpath -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, List from cycode.cli.files_collector.walk_ignore import ( _collect_top_level_ignore_files, _get_global_ignore_patterns, _walk_to_top, + walk_ignore, ) if TYPE_CHECKING: @@ -44,21 +46,38 @@ def test_walk_to_top() -> None: def _create_mocked_file_structure(fs: 'FakeFilesystem') -> None: fs.create_dir('/home/user/project') fs.create_dir('/home/user/.git') + + fs.create_dir('/home/user/project/.cycode') + fs.create_file('/home/user/project/.cycode/config.yaml') + fs.create_dir('/home/user/project/.git') + fs.create_file('/home/user/project/.git/HEAD') + fs.create_file('/home/user/project/.gitignore', contents='*.pyc') + fs.create_file('/home/user/project/ignored.pyc') + fs.create_file('/home/user/project/presented.txt') + fs.create_file('/home/user/project/.cycodeignore', contents='*.log') - fs.create_dir('/home/user/project/subdir') - fs.create_file('/home/user/project/subdir/.gitignore', contents='*.txt') + fs.create_file('/home/user/project/ignored2.log') + fs.create_file('/home/user/project/ignored2.pyc') + fs.create_file('/home/user/project/presented2.txt') + + fs.create_dir('/home/user/project/subproject') + fs.create_file('/home/user/project/subproject/.gitignore', contents='*.txt') + fs.create_file('/home/user/project/subproject/ignored.txt') + fs.create_file('/home/user/project/subproject/ignored.log') + fs.create_file('/home/user/project/subproject/ignored.pyc') + fs.create_file('/home/user/project/subproject/presented.py') def test_collect_top_level_ignore_files(fs: 'FakeFilesystem') -> None: _create_mocked_file_structure(fs) # Test with path inside the project - path = normpath('/home/user/project/subdir') + path = normpath('/home/user/project/subproject') ignore_files = _collect_top_level_ignore_files(path) assert len(ignore_files) == 3 - assert normpath('/home/user/project/subdir/.gitignore') in ignore_files + assert normpath('/home/user/project/subproject/.gitignore') in ignore_files assert normpath('/home/user/project/.gitignore') in ignore_files assert normpath('/home/user/project/.cycodeignore') in ignore_files @@ -85,7 +104,7 @@ def test_collect_top_level_ignore_files(fs: 'FakeFilesystem') -> None: def test_get_global_ignore_patterns(fs: 'FakeFilesystem') -> None: _create_mocked_file_structure(fs) - ignore_patterns = _get_global_ignore_patterns('/home/user/project/subdir') + ignore_patterns = _get_global_ignore_patterns('/home/user/project/subproject') assert len(ignore_patterns) == 5 # default global: @@ -95,3 +114,51 @@ def test_get_global_ignore_patterns(fs: 'FakeFilesystem') -> None: assert '*.txt' in ignore_patterns assert '*.pyc' in ignore_patterns assert '*.log' in ignore_patterns + + +def _collect_walk_ignore_files(path: str) -> List[str]: + files = [] + for root, _, filenames in walk_ignore(path): + for filename in filenames: + files.append(os.path.join(root, filename)) + + return files + + +def test_walk_ignore(fs: 'FakeFilesystem') -> None: + _create_mocked_file_structure(fs) + + path = normpath('/home/user/project') + result = _collect_walk_ignore_files(path) + + assert len(result) == 6 + # ignored globally by default: + assert normpath('/home/user/project/.git/HEAD') not in result + assert normpath('/home/user/project/.cycode/config.yaml') not in result + # ignored by .gitignore in project directory: + assert normpath('/home/user/project/ignored.pyc') not in result + assert normpath('/home/user/project/subproject/ignored.pyc') not in result + # ignored by .gitignore in subproject directory: + assert normpath('/home/user/project/subproject/ignored.txt') not in result + # ignored by .cycodeignore in project directory: + assert normpath('/home/user/project/ignored2.log') not in result + assert normpath('/home/user/project/ignored2.pyc') not in result + assert normpath('/home/user/project/subproject/ignored.log') not in result + # presented after both .gitignore and .cycodeignore: + assert normpath('/home/user/project/.gitignore') in result + assert normpath('/home/user/project/.cycodeignore') in result + assert normpath('/home/user/project/subproject/.gitignore') in result + assert normpath('/home/user/project/presented.txt') in result + assert normpath('/home/user/project/presented2.txt') in result + assert normpath('/home/user/project/subproject/presented.py') in result + + path = normpath('/home/user/project/subproject') + result = _collect_walk_ignore_files(path) + + assert len(result) == 2 + # ignored: + assert normpath('/home/user/project/subproject/ignored.txt') not in result + assert normpath('/home/user/project/subproject/ignored.log') not in result + assert normpath('/home/user/project/subproject/ignored.pyc') not in result + # presented: + assert normpath('/home/user/project/subproject/presented.py') in result From 0c6ff0138fd48dffb17dd27f91f1b38086a11861 Mon Sep 17 00:00:00 2001 From: Ilya Siamionau Date: Wed, 11 Dec 2024 19:42:30 +0100 Subject: [PATCH 09/16] fix lock after merge with main --- poetry.lock | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/poetry.lock b/poetry.lock index 944bec77..b4d0fc65 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1050,4 +1050,4 @@ type = ["pytest-mypy"] [metadata] lock-version = "2.0" python-versions = ">=3.8,<3.14" -content-hash = "e79b70897118c6b6ae75a62d6d85bdc28a5f345871600dedc5e191c3cf3dbdfd" +content-hash = "b91b8db1d8946ee5f22f73cc2f2339e7969c9e3dad41a266c94f9091a1b1e33c" From 6612402966ff754d083bd38ba8280739fd38e366 Mon Sep 17 00:00:00 2001 From: Ilya Siamionau Date: Wed, 11 Dec 2024 19:59:28 +0100 Subject: [PATCH 10/16] drop poetry cache --- .github/workflows/tests_full.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests_full.yml b/.github/workflows/tests_full.yml index 8181b586..76d7a0e3 100644 --- a/.github/workflows/tests_full.yml +++ b/.github/workflows/tests_full.yml @@ -50,7 +50,7 @@ jobs: uses: actions/cache@v3 with: path: ~/.local - key: poetry-${{ matrix.os }}-${{ matrix.python-version }}-1 # increment to reset cache + key: poetry-${{ matrix.os }}-${{ matrix.python-version }}-2 # increment to reset cache - name: Setup Poetry if: steps.cached-poetry.outputs.cache-hit != 'true' From 1041135b9df91549ed90294ef31521fced37b64a Mon Sep 17 00:00:00 2001 From: Ilya Siamionau Date: Wed, 11 Dec 2024 20:41:27 +0100 Subject: [PATCH 11/16] fix CI crashes on Windows (Python 3.12 & 3.13) https://github.com/pytest-dev/pyfakefs/issues/1096 --- .github/workflows/tests.yml | 2 +- .github/workflows/tests_full.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 7c4a2b47..0b5ddb58 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -50,4 +50,4 @@ jobs: run: poetry install - name: Run Tests - run: poetry run pytest + run: poetry run python -m pytest diff --git a/.github/workflows/tests_full.yml b/.github/workflows/tests_full.yml index 76d7a0e3..a760d617 100644 --- a/.github/workflows/tests_full.yml +++ b/.github/workflows/tests_full.yml @@ -71,4 +71,4 @@ jobs: ./dist/cycode-cli version - name: Run pytest - run: poetry run pytest + run: poetry run python -m pytest From 9b9ab86daef190f6e1ef47d067099d6300b79535 Mon Sep 17 00:00:00 2001 From: Ilya Siamionau Date: Fri, 13 Dec 2024 11:36:35 +0100 Subject: [PATCH 12/16] add ignorelib as is with copyright notice --- cycode/cli/utils/ignore_utils.py | 488 +++++++++++++++++++++++++++++++ 1 file changed, 488 insertions(+) create mode 100644 cycode/cli/utils/ignore_utils.py diff --git a/cycode/cli/utils/ignore_utils.py b/cycode/cli/utils/ignore_utils.py new file mode 100644 index 00000000..45579f1d --- /dev/null +++ b/cycode/cli/utils/ignore_utils.py @@ -0,0 +1,488 @@ +# Copyright (C) 2017 Jelmer Vernooij +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Modified from https://github.com/dulwich/dulwich/blob/master/dulwich/ignore.py + +# Copyright 2020 Ben Kehoe +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Modified from https://github.com/benkehoe/ignorelib/blob/main/ignorelib.py + +"""Parsing of ignore files according to gitignore rules. + +For details for the matching rules, see https://git-scm.com/docs/gitignore +""" + +import os.path +import re +from typing import ( + BinaryIO, + Iterable, + List, + Optional, + TYPE_CHECKING, + Dict, + Union, +) + +def _translate_segment(segment: bytes) -> bytes: + if segment == b"*": + return b'[^/]+' + res = b"" + i, n = 0, len(segment) + while i < n: + c = segment[i:i+1] + i = i+1 + if c == b'*': + res += b'[^/]*' + elif c == b'?': + res += b'[^/]' + elif c == b'[': + j = i + if j < n and segment[j:j+1] == b'!': + j = j+1 + if j < n and segment[j:j+1] == b']': + j = j+1 + while j < n and segment[j:j+1] != b']': + j = j+1 + if j >= n: + res += b'\\[' + else: + stuff = segment[i:j].replace(b'\\', b'\\\\') + i = j+1 + if stuff.startswith(b'!'): + stuff = b'^' + stuff[1:] + elif stuff.startswith(b'^'): + stuff = b'\\' + stuff + res += b'[' + stuff + b']' + else: + res += re.escape(c) + return res + + +def translate(pat: bytes) -> bytes: + """Translate a shell PATTERN to a regular expression. + + There is no way to quote meta-characters. + + Originally copied from fnmatch in Python 2.7, but modified for Dulwich + to cope with features in Git ignore patterns. + """ + + res = b'(?ms)' + + if b'/' not in pat[:-1]: + # If there's no slash, this is a filename-based match + res += b'(.*/)?' + + if pat.startswith(b'**/'): + # Leading **/ + pat = pat[2:] + res += b'(.*/)?' + + if pat.startswith(b'/'): + pat = pat[1:] + + for i, segment in enumerate(pat.split(b'/')): + if segment == b'**': + res += b'(/.*)?' + continue + else: + res += ((re.escape(b'/') if i > 0 else b'') + + _translate_segment(segment)) + + if not pat.endswith(b'/'): + res += b'/?' + + return res + b'\\Z' + + +def read_ignore_patterns(f: BinaryIO) -> Iterable[bytes]: + """Read a git ignore file. + + Args: + f: File-like object to read from + Returns: List of patterns + """ + + for line in f: + line = line.rstrip(b"\r\n") + + # Ignore blank lines, they're used for readability. + if not line: + continue + + if line.startswith(b'#'): + # Comment + continue + + # Trailing spaces are ignored unless they are quoted with a backslash. + while line.endswith(b' ') and not line.endswith(b'\\ '): + line = line[:-1] + line = line.replace(b'\\ ', b' ') + + yield line + + +def match_pattern( + path: bytes, pattern: bytes, ignore_case: bool = False) -> bool: + """Match a gitignore-style pattern against a path. + + Args: + path: Path to match + pattern: Pattern to match + ignore_case: Whether to do case-sensitive matching + Returns: + bool indicating whether the pattern matched + """ + return Pattern(pattern, ignore_case).match(path) + + +class Pattern(object): + """A single ignore pattern.""" + + def __init__(self, pattern: bytes, ignore_case: bool = False): + self.pattern = pattern + self.ignore_case = ignore_case + if pattern[0:1] == b'!': + self.is_exclude = False + pattern = pattern[1:] + else: + if pattern[0:1] == b'\\': + pattern = pattern[1:] + self.is_exclude = True + flags = 0 + if self.ignore_case: + flags = re.IGNORECASE + self._re = re.compile(translate(pattern), flags) + + def __bytes__(self) -> bytes: + return self.pattern + + def __str__(self) -> str: + return os.fsdecode(self.pattern) + + def __eq__(self, other: object) -> bool: + return (isinstance(other, type(self)) and + self.pattern == other.pattern and + self.ignore_case == other.ignore_case) + + def __repr__(self) -> str: + return "%s(%r, %r)" % ( + type(self).__name__, self.pattern, self.ignore_case) + + def match(self, path: bytes) -> bool: + """Try to match a path against this ignore pattern. + + Args: + path: Path to match (relative to ignore location) + Returns: boolean + """ + return bool(self._re.match(path)) + + +class IgnoreFilter(object): + + def __init__(self, patterns: Iterable[Union[str, bytes]], ignore_case: bool = False, + path=None): + if hasattr(path, '__fspath__'): + path = path.__fspath__() + self._patterns = [] # type: List[Pattern] + self._ignore_case = ignore_case + self._path = path + for pattern in patterns: + self.append_pattern(pattern) + + def to_dict(self): + d = { + "patterns": [str(p) for p in self._patterns], + "ignore_case": self._ignore_case, + } + path = getattr(self, '_path', None) + if path: + d["path"] = path + return d + + def append_pattern(self, pattern: Union[str, bytes]) -> None: + """Add a pattern to the set.""" + if isinstance(pattern, str): + pattern = bytes(pattern, 'utf-8') + self._patterns.append(Pattern(pattern, self._ignore_case)) + + def find_matching(self, path: Union[bytes, str]) -> Iterable[Pattern]: + """Yield all matching patterns for path. + + Args: + path: Path to match + Returns: + Iterator over iterators + """ + if not isinstance(path, bytes): + path = os.fsencode(path) + for pattern in self._patterns: + if pattern.match(path): + yield pattern + + def is_ignored(self, path: Union[bytes, str]) -> Optional[bool]: + """Check whether a path is ignored. + + For directories, include a trailing slash. + + Returns: status is None if file is not mentioned, True if it is + included, False if it is explicitly excluded. + """ + if hasattr(path, '__fspath__'): + path = path.__fspath__() + status = None + for pattern in self.find_matching(path): + status = pattern.is_exclude + return status + + @classmethod + def from_path(cls, path, ignore_case: bool = False) -> 'IgnoreFilter': + if hasattr(path, '__fspath__'): + path = path.__fspath__() + with open(path, 'rb') as f: + return cls(read_ignore_patterns(f), ignore_case, path=path) + + def __repr__(self) -> str: + path = getattr(self, '_path', None) + if path is not None: + return "%s.from_path(%r)" % ( + type(self).__name__, path) + else: + return "<%s>" % (type(self).__name__) + + +class IgnoreFilterStack(object): + """Check for ignore status in multiple filters.""" + + def __init__(self, filters): + self._filters = filters + + def to_dict(self): + return { + "filters": [f.to_dict() for f in self._filters] + } + + def is_ignored(self, path: str) -> Optional[bool]: + """Check whether a path is ignored. + + Args: + path: Path to check + Returns: + True if the path matches an ignore pattern, + False if the path is explicitly not ignored, + or None if the file does not match any patterns. + """ + if hasattr(path, '__fspath__'): + path = path.__fspath__() + status = None + for filter in self._filters: + status = filter.is_ignored(path) + if status is not None: + return status + return status + + +class IgnoreFilterManager(object): + """Ignore file manager.""" + + def __init__( + self, path: str, + global_filters: List[IgnoreFilter], + ignore_file_name = None, + ignore_case: bool = False): + if hasattr(path, '__fspath__'): + path = path.__fspath__() + self._path_filters = {} # type: Dict[str, Optional[IgnoreFilter]] + self._top_path = path + self._global_filters = global_filters + self._ignore_file_name = ignore_file_name + self._ignore_case = ignore_case + + def __repr__(self) -> str: + return "%s(%s, %r, %r)" % ( + type(self).__name__, self._top_path, + self._global_filters, + self._ignore_case) + + def to_dict(self, include_path_filters=True): + d = { + "path": self._top_path, + "global_filters": [f.to_dict() for f in self._global_filters], + "ignore_case": self._ignore_case, + } + if include_path_filters: + d["path_filters"] = {path: f.to_dict() for path, f in self._path_filters.items() if f is not None} + return d + + @property + def path(self): + return self._top_path + + @property + def ignore_file_name(self): + return self._ignore_file_name + + @property + def ignore_case(self): + return self._ignore_case + + def _load_path(self, path: str) -> Optional[IgnoreFilter]: + try: + return self._path_filters[path] + except KeyError: + pass + + if not self._ignore_file_name: + self._path_filters[path] = None + else: + p = os.path.join(self._top_path, path, self._ignore_file_name) + try: + self._path_filters[path] = IgnoreFilter.from_path( + p, self._ignore_case) + except IOError: + self._path_filters[path] = None + return self._path_filters[path] + + def _find_matching(self, path: str) -> Iterable[Pattern]: + """Find matching patterns for path. + + Stops after the first ignore file with matches. + + Args: + path: Path to check, must be relative. + Returns: + Iterator over Pattern instances + """ + if hasattr(path, '__fspath__'): + path = path.__fspath__() + if os.path.isabs(path): + raise ValueError('%s is an absolute path' % path) + filters = [(0, f) for f in self._global_filters] + if os.path.sep != '/': + path = path.replace(os.path.sep, '/') + parts = path.split('/') + for i in range(len(parts)+1): + dirname = '/'.join(parts[:i]) + for s, f in filters: + relpath = '/'.join(parts[s:i]) + if i < len(parts): + # Paths leading up to the final part are all directories, + # so need a trailing slash. + relpath += '/' + matches = list(f.find_matching(relpath)) + if matches: + return iter(matches) + ignore_filter = self._load_path(dirname) + if ignore_filter is not None: + filters.insert(0, (i, ignore_filter)) + return iter([]) + + def is_ignored(self, path: str) -> Optional[bool]: + """Check whether a path is ignored. + + Args: + path: Path to check, relative to the IgnoreFilterManager path + Returns: + True if the path matches an ignore pattern, + False if the path is explicitly not ignored, + or None if the file does not match any patterns. + """ + if hasattr(path, '__fspath__'): + path = path.__fspath__() + matches = list(self._find_matching(path)) + if matches: + return matches[-1].is_exclude + return None + + def walk(self, **kwargs): + """A wrapper for os.walk() without ignored files and subdirectories. + kwargs are passed to walk().""" + + for dirpath, dirnames, filenames in os.walk(self.path, **kwargs): + if dirpath == self.path: + rel_dirpath = '' + else: + rel_dirpath = os.path.relpath(dirpath, self.path) + + # remove ignored subdirectories + indices_to_remove = [] + for i, dirname in enumerate(dirnames): + if self.is_ignored(os.path.join(rel_dirpath, dirname, '')): + indices_to_remove.append(i) + for i in sorted(indices_to_remove, reverse=True): + del dirnames[i] + + # remove ignored files + filenames = [ + os.path.basename(f) for f in filenames if not self.is_ignored(os.path.join(rel_dirpath, f)) + ] + + # removed this as os.walk visits empty directories + # if nothing is left, don't visit + # if not dirnames and not filenames: + # continue + + yield dirpath, dirnames, filenames + + @classmethod + def build(cls, path: str, + global_ignore_file_paths: Iterable[str] = [], + global_patterns: Iterable[Union[str, bytes]] = [], + ignore_file_name: str = None, + ignore_case: bool = False) -> 'IgnoreFilterManager': + """Create a IgnoreFilterManager from patterns and paths. + Args: + path: The root path for ignore checks. + global_ignore_file_paths: A list of file paths to load patterns from. + Relative paths are relative to the IgnoreFilterManager path, not + the current directory. + global_patterns: Global patterns to ignore. + ignore_file_name: The per-directory ignore file name. + ignore_case: Whether to ignore case in matching. + Returns: + A `IgnoreFilterManager` object + """ + if hasattr(path, '__fspath__'): + path = path.__fspath__() + global_filters = [] + for p in global_ignore_file_paths: + if hasattr(p, '__fspath__'): + p = p.__fspath__() + p = os.path.expanduser(p) + if not os.path.isabs(p): + p = os.path.join(path, p) + try: + global_filters.append(IgnoreFilter.from_path(p)) + except IOError: + pass + if global_patterns: + global_filters.append(IgnoreFilter(global_patterns)) + return cls(path, + global_filters=global_filters, + ignore_file_name=ignore_file_name, + ignore_case=ignore_case) From cd0852afd2867de831fe06731f8d267e2e5e070d Mon Sep 17 00:00:00 2001 From: Ilya Siamionau Date: Fri, 13 Dec 2024 11:47:38 +0100 Subject: [PATCH 13/16] fix and format ignorelib --- cycode/cli/utils/ignore_utils.py | 192 ++++++++++++------------------- 1 file changed, 76 insertions(+), 116 deletions(-) diff --git a/cycode/cli/utils/ignore_utils.py b/cycode/cli/utils/ignore_utils.py index 45579f1d..5da342b0 100644 --- a/cycode/cli/utils/ignore_utils.py +++ b/cycode/cli/utils/ignore_utils.py @@ -35,43 +35,48 @@ For details for the matching rules, see https://git-scm.com/docs/gitignore """ +import contextlib import os.path import re +from os import PathLike from typing import ( + Any, BinaryIO, + Dict, + Generator, Iterable, List, Optional, - TYPE_CHECKING, - Dict, + Tuple, Union, ) -def _translate_segment(segment: bytes) -> bytes: - if segment == b"*": + +def _translate_segment(segment: bytes) -> bytes: # noqa: C901 + if segment == b'*': return b'[^/]+' - res = b"" + res = b'' i, n = 0, len(segment) while i < n: - c = segment[i:i+1] - i = i+1 + c = segment[i : i + 1] + i = i + 1 if c == b'*': res += b'[^/]*' elif c == b'?': res += b'[^/]' elif c == b'[': j = i - if j < n and segment[j:j+1] == b'!': - j = j+1 - if j < n and segment[j:j+1] == b']': - j = j+1 - while j < n and segment[j:j+1] != b']': - j = j+1 + if j < n and segment[j : j + 1] == b'!': + j = j + 1 + if j < n and segment[j : j + 1] == b']': + j = j + 1 + while j < n and segment[j : j + 1] != b']': + j = j + 1 if j >= n: res += b'\\[' else: stuff = segment[i:j].replace(b'\\', b'\\\\') - i = j+1 + i = j + 1 if stuff.startswith(b'!'): stuff = b'^' + stuff[1:] elif stuff.startswith(b'^'): @@ -109,9 +114,7 @@ def translate(pat: bytes) -> bytes: if segment == b'**': res += b'(/.*)?' continue - else: - res += ((re.escape(b'/') if i > 0 else b'') + - _translate_segment(segment)) + res += (re.escape(b'/') if i > 0 else b'') + _translate_segment(segment) if not pat.endswith(b'/'): res += b'/?' @@ -128,7 +131,7 @@ def read_ignore_patterns(f: BinaryIO) -> Iterable[bytes]: """ for line in f: - line = line.rstrip(b"\r\n") + line = line.rstrip(b'\r\n') # Ignore blank lines, they're used for readability. if not line: @@ -146,8 +149,7 @@ def read_ignore_patterns(f: BinaryIO) -> Iterable[bytes]: yield line -def match_pattern( - path: bytes, pattern: bytes, ignore_case: bool = False) -> bool: +def match_pattern(path: bytes, pattern: bytes, ignore_case: bool = False) -> bool: """Match a gitignore-style pattern against a path. Args: @@ -160,10 +162,10 @@ def match_pattern( return Pattern(pattern, ignore_case).match(path) -class Pattern(object): +class Pattern: """A single ignore pattern.""" - def __init__(self, pattern: bytes, ignore_case: bool = False): + def __init__(self, pattern: bytes, ignore_case: bool = False) -> None: self.pattern = pattern self.ignore_case = ignore_case if pattern[0:1] == b'!': @@ -185,13 +187,10 @@ def __str__(self) -> str: return os.fsdecode(self.pattern) def __eq__(self, other: object) -> bool: - return (isinstance(other, type(self)) and - self.pattern == other.pattern and - self.ignore_case == other.ignore_case) + return isinstance(other, type(self)) and self.pattern == other.pattern and self.ignore_case == other.ignore_case def __repr__(self) -> str: - return "%s(%r, %r)" % ( - type(self).__name__, self.pattern, self.ignore_case) + return '%s(%r, %r)' % (type(self).__name__, self.pattern, self.ignore_case) def match(self, path: bytes) -> bool: """Try to match a path against this ignore pattern. @@ -203,10 +202,13 @@ def match(self, path: bytes) -> bool: return bool(self._re.match(path)) -class IgnoreFilter(object): - - def __init__(self, patterns: Iterable[Union[str, bytes]], ignore_case: bool = False, - path=None): +class IgnoreFilter: + def __init__( + self, + patterns: Iterable[Union[str, bytes]], + ignore_case: bool = False, + path: Optional[Union[PathLike, str]] = None, + ) -> None: if hasattr(path, '__fspath__'): path = path.__fspath__() self._patterns = [] # type: List[Pattern] @@ -215,14 +217,14 @@ def __init__(self, patterns: Iterable[Union[str, bytes]], ignore_case: bool = Fa for pattern in patterns: self.append_pattern(pattern) - def to_dict(self): + def to_dict(self) -> Dict[str, Any]: d = { - "patterns": [str(p) for p in self._patterns], - "ignore_case": self._ignore_case, + 'patterns': [str(p) for p in self._patterns], + 'ignore_case': self._ignore_case, } path = getattr(self, '_path', None) if path: - d["path"] = path + d['path'] = path return d def append_pattern(self, pattern: Union[str, bytes]) -> None: @@ -261,7 +263,7 @@ def is_ignored(self, path: Union[bytes, str]) -> Optional[bool]: return status @classmethod - def from_path(cls, path, ignore_case: bool = False) -> 'IgnoreFilter': + def from_path(cls, path: Union[PathLike, str], ignore_case: bool = False) -> 'IgnoreFilter': if hasattr(path, '__fspath__'): path = path.__fspath__() with open(path, 'rb') as f: @@ -270,51 +272,20 @@ def from_path(cls, path, ignore_case: bool = False) -> 'IgnoreFilter': def __repr__(self) -> str: path = getattr(self, '_path', None) if path is not None: - return "%s.from_path(%r)" % ( - type(self).__name__, path) - else: - return "<%s>" % (type(self).__name__) - + return '%s.from_path(%r)' % (type(self).__name__, path) + return '<%s>' % type(self).__name__ -class IgnoreFilterStack(object): - """Check for ignore status in multiple filters.""" - def __init__(self, filters): - self._filters = filters - - def to_dict(self): - return { - "filters": [f.to_dict() for f in self._filters] - } - - def is_ignored(self, path: str) -> Optional[bool]: - """Check whether a path is ignored. - - Args: - path: Path to check - Returns: - True if the path matches an ignore pattern, - False if the path is explicitly not ignored, - or None if the file does not match any patterns. - """ - if hasattr(path, '__fspath__'): - path = path.__fspath__() - status = None - for filter in self._filters: - status = filter.is_ignored(path) - if status is not None: - return status - return status - - -class IgnoreFilterManager(object): +class IgnoreFilterManager: """Ignore file manager.""" def __init__( - self, path: str, - global_filters: List[IgnoreFilter], - ignore_file_name = None, - ignore_case: bool = False): + self, + path: str, + global_filters: List[IgnoreFilter], + ignore_file_name: Optional[str] = None, + ignore_case: bool = False, + ) -> None: if hasattr(path, '__fspath__'): path = path.__fspath__() self._path_filters = {} # type: Dict[str, Optional[IgnoreFilter]] @@ -324,31 +295,28 @@ def __init__( self._ignore_case = ignore_case def __repr__(self) -> str: - return "%s(%s, %r, %r)" % ( - type(self).__name__, self._top_path, - self._global_filters, - self._ignore_case) + return '%s(%s, %r, %r)' % (type(self).__name__, self._top_path, self._global_filters, self._ignore_case) - def to_dict(self, include_path_filters=True): + def to_dict(self, include_path_filters: bool = True) -> Dict[str, Any]: d = { - "path": self._top_path, - "global_filters": [f.to_dict() for f in self._global_filters], - "ignore_case": self._ignore_case, + 'path': self._top_path, + 'global_filters': [f.to_dict() for f in self._global_filters], + 'ignore_case': self._ignore_case, } if include_path_filters: - d["path_filters"] = {path: f.to_dict() for path, f in self._path_filters.items() if f is not None} + d['path_filters'] = {path: f.to_dict() for path, f in self._path_filters.items() if f is not None} return d @property - def path(self): + def path(self) -> str: return self._top_path @property - def ignore_file_name(self): + def ignore_file_name(self) -> Optional[str]: return self._ignore_file_name @property - def ignore_case(self): + def ignore_case(self) -> bool: return self._ignore_case def _load_path(self, path: str) -> Optional[IgnoreFilter]: @@ -362,8 +330,7 @@ def _load_path(self, path: str) -> Optional[IgnoreFilter]: else: p = os.path.join(self._top_path, path, self._ignore_file_name) try: - self._path_filters[path] = IgnoreFilter.from_path( - p, self._ignore_case) + self._path_filters[path] = IgnoreFilter.from_path(p, self._ignore_case) except IOError: self._path_filters[path] = None return self._path_filters[path] @@ -386,7 +353,7 @@ def _find_matching(self, path: str) -> Iterable[Pattern]: if os.path.sep != '/': path = path.replace(os.path.sep, '/') parts = path.split('/') - for i in range(len(parts)+1): + for i in range(len(parts) + 1): dirname = '/'.join(parts[:i]) for s, f in filters: relpath = '/'.join(parts[s:i]) @@ -419,15 +386,12 @@ def is_ignored(self, path: str) -> Optional[bool]: return matches[-1].is_exclude return None - def walk(self, **kwargs): + def walk(self, **kwargs) -> Generator[Tuple[str, List[str], List[str]], None, None]: """A wrapper for os.walk() without ignored files and subdirectories. kwargs are passed to walk().""" for dirpath, dirnames, filenames in os.walk(self.path, **kwargs): - if dirpath == self.path: - rel_dirpath = '' - else: - rel_dirpath = os.path.relpath(dirpath, self.path) + rel_dirpath = '' if dirpath == self.path else os.path.relpath(dirpath, self.path) # remove ignored subdirectories indices_to_remove = [] @@ -438,23 +402,19 @@ def walk(self, **kwargs): del dirnames[i] # remove ignored files - filenames = [ - os.path.basename(f) for f in filenames if not self.is_ignored(os.path.join(rel_dirpath, f)) - ] - - # removed this as os.walk visits empty directories - # if nothing is left, don't visit - # if not dirnames and not filenames: - # continue + filenames = [os.path.basename(f) for f in filenames if not self.is_ignored(os.path.join(rel_dirpath, f))] yield dirpath, dirnames, filenames @classmethod - def build(cls, path: str, - global_ignore_file_paths: Iterable[str] = [], - global_patterns: Iterable[Union[str, bytes]] = [], - ignore_file_name: str = None, - ignore_case: bool = False) -> 'IgnoreFilterManager': + def build( + cls, + path: str, + global_ignore_file_paths: Optional[Iterable[str]] = None, + global_patterns: Optional[Iterable[Union[str, bytes]]] = None, + ignore_file_name: Optional[str] = None, + ignore_case: bool = False, + ) -> 'IgnoreFilterManager': """Create a IgnoreFilterManager from patterns and paths. Args: path: The root path for ignore checks. @@ -467,6 +427,11 @@ def build(cls, path: str, Returns: A `IgnoreFilterManager` object """ + if not global_ignore_file_paths: + global_ignore_file_paths = [] + if not global_patterns: + global_patterns = [] + if hasattr(path, '__fspath__'): path = path.__fspath__() global_filters = [] @@ -476,13 +441,8 @@ def build(cls, path: str, p = os.path.expanduser(p) if not os.path.isabs(p): p = os.path.join(path, p) - try: + with contextlib.suppress(IOError): global_filters.append(IgnoreFilter.from_path(p)) - except IOError: - pass if global_patterns: global_filters.append(IgnoreFilter(global_patterns)) - return cls(path, - global_filters=global_filters, - ignore_file_name=ignore_file_name, - ignore_case=ignore_case) + return cls(path, global_filters=global_filters, ignore_file_name=ignore_file_name, ignore_case=ignore_case) From 266bcfec3d523260179b945bca0fc815a1d9b129 Mon Sep 17 00:00:00 2001 From: Ilya Siamionau Date: Fri, 13 Dec 2024 12:14:19 +0100 Subject: [PATCH 14/16] align codebase of ignorelib with dulwich --- cycode/cli/utils/ignore_utils.py | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/cycode/cli/utils/ignore_utils.py b/cycode/cli/utils/ignore_utils.py index 5da342b0..22ac7eaa 100644 --- a/cycode/cli/utils/ignore_utils.py +++ b/cycode/cli/utils/ignore_utils.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -# Modified from https://github.com/dulwich/dulwich/blob/master/dulwich/ignore.py +# Modified from https://github.com/jelmer/dulwich/blob/master/dulwich/ignore.py # Copyright 2020 Ben Kehoe # @@ -64,6 +64,9 @@ def _translate_segment(segment: bytes) -> bytes: # noqa: C901 res += b'[^/]*' elif c == b'?': res += b'[^/]' + elif c == b'\\': + res += re.escape(segment[i : i + 1]) + i += 1 elif c == b'[': j = i if j < n and segment[j : j + 1] == b'!': @@ -190,7 +193,7 @@ def __eq__(self, other: object) -> bool: return isinstance(other, type(self)) and self.pattern == other.pattern and self.ignore_case == other.ignore_case def __repr__(self) -> str: - return '%s(%r, %r)' % (type(self).__name__, self.pattern, self.ignore_case) + return f'{type(self).__name__}({self.pattern!r}, {self.ignore_case!r})' def match(self, path: bytes) -> bool: """Try to match a path against this ignore pattern. @@ -272,8 +275,8 @@ def from_path(cls, path: Union[PathLike, str], ignore_case: bool = False) -> 'Ig def __repr__(self) -> str: path = getattr(self, '_path', None) if path is not None: - return '%s.from_path(%r)' % (type(self).__name__, path) - return '<%s>' % type(self).__name__ + return f'{type(self).__name__}.from_path({path!r})' + return f'<{type(self).__name__}>' class IgnoreFilterManager: @@ -295,7 +298,7 @@ def __init__( self._ignore_case = ignore_case def __repr__(self) -> str: - return '%s(%s, %r, %r)' % (type(self).__name__, self._top_path, self._global_filters, self._ignore_case) + return f'{type(self).__name__}({self._top_path}, {self._global_filters!r}, {self._ignore_case!r})' def to_dict(self, include_path_filters: bool = True) -> Dict[str, Any]: d = { @@ -338,21 +341,18 @@ def _load_path(self, path: str) -> Optional[IgnoreFilter]: def _find_matching(self, path: str) -> Iterable[Pattern]: """Find matching patterns for path. - Stops after the first ignore file with matches. - Args: - path: Path to check, must be relative. + path: Path to check Returns: Iterator over Pattern instances """ - if hasattr(path, '__fspath__'): - path = path.__fspath__() if os.path.isabs(path): - raise ValueError('%s is an absolute path' % path) + raise ValueError(f'{path} is an absolute path') filters = [(0, f) for f in self._global_filters] if os.path.sep != '/': path = path.replace(os.path.sep, '/') parts = path.split('/') + matches = [] for i in range(len(parts) + 1): dirname = '/'.join(parts[:i]) for s, f in filters: @@ -361,13 +361,11 @@ def _find_matching(self, path: str) -> Iterable[Pattern]: # Paths leading up to the final part are all directories, # so need a trailing slash. relpath += '/' - matches = list(f.find_matching(relpath)) - if matches: - return iter(matches) + matches += list(f.find_matching(relpath)) ignore_filter = self._load_path(dirname) if ignore_filter is not None: filters.insert(0, (i, ignore_filter)) - return iter([]) + return iter(matches) def is_ignored(self, path: str) -> Optional[bool]: """Check whether a path is ignored. From 85993fff06024ceb66d0b572fdc05d68c02c98d8 Mon Sep 17 00:00:00 2001 From: Ilya Siamionau Date: Fri, 13 Dec 2024 13:00:26 +0100 Subject: [PATCH 15/16] fix inefficient subfolders filtering --- cycode/cli/utils/ignore_utils.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/cycode/cli/utils/ignore_utils.py b/cycode/cli/utils/ignore_utils.py index 22ac7eaa..a5edccc9 100644 --- a/cycode/cli/utils/ignore_utils.py +++ b/cycode/cli/utils/ignore_utils.py @@ -294,7 +294,11 @@ def __init__( self._path_filters = {} # type: Dict[str, Optional[IgnoreFilter]] self._top_path = path self._global_filters = global_filters + self._ignore_file_name = ignore_file_name + if self._ignore_file_name is None: + self._ignore_file_name = '.gitignore' + self._ignore_case = ignore_case def __repr__(self) -> str: @@ -388,16 +392,12 @@ def walk(self, **kwargs) -> Generator[Tuple[str, List[str], List[str]], None, No """A wrapper for os.walk() without ignored files and subdirectories. kwargs are passed to walk().""" - for dirpath, dirnames, filenames in os.walk(self.path, **kwargs): + for dirpath, dirnames, filenames in os.walk(self.path, topdown=True, **kwargs): rel_dirpath = '' if dirpath == self.path else os.path.relpath(dirpath, self.path) - # remove ignored subdirectories - indices_to_remove = [] - for i, dirname in enumerate(dirnames): - if self.is_ignored(os.path.join(rel_dirpath, dirname, '')): - indices_to_remove.append(i) - for i in sorted(indices_to_remove, reverse=True): - del dirnames[i] + # decrease recursion depth of os.walk() by ignoring subdirectories because of topdown=True + # slicing ([:]) is mandatory to change dict in-place! + dirnames[:] = [dirname for dirname in dirnames if not self.is_ignored(os.path.join(rel_dirpath, dirname, ''))] # remove ignored files filenames = [os.path.basename(f) for f in filenames if not self.is_ignored(os.path.join(rel_dirpath, f))] @@ -432,15 +432,20 @@ def build( if hasattr(path, '__fspath__'): path = path.__fspath__() + global_filters = [] for p in global_ignore_file_paths: if hasattr(p, '__fspath__'): p = p.__fspath__() + p = os.path.expanduser(p) if not os.path.isabs(p): p = os.path.join(path, p) + with contextlib.suppress(IOError): global_filters.append(IgnoreFilter.from_path(p)) + if global_patterns: global_filters.append(IgnoreFilter(global_patterns)) + return cls(path, global_filters=global_filters, ignore_file_name=ignore_file_name, ignore_case=ignore_case) From 4fbf23b80f18d9e8af040e55486e3e48d09fb78c Mon Sep 17 00:00:00 2001 From: Ilya Siamionau Date: Fri, 13 Dec 2024 14:03:57 +0100 Subject: [PATCH 16/16] migrate to ignore_utils; add tests; remove unused pathspec; remove .cycodeignore --- cycode/cli/files_collector/path_documents.py | 11 +- cycode/cli/files_collector/walk_ignore.py | 60 ++---- cycode/cli/utils/ignore_utils.py | 14 +- poetry.lock | 13 +- pyproject.toml | 1 - tests/cli/files_collector/test_walk_ignore.py | 42 +---- tests/utils/test_ignore_utils.py | 176 ++++++++++++++++++ 7 files changed, 212 insertions(+), 105 deletions(-) create mode 100644 tests/utils/test_ignore_utils.py diff --git a/cycode/cli/files_collector/path_documents.py b/cycode/cli/files_collector/path_documents.py index 27d24cd8..14f88888 100644 --- a/cycode/cli/files_collector/path_documents.py +++ b/cycode/cli/files_collector/path_documents.py @@ -1,7 +1,5 @@ import os -from typing import TYPE_CHECKING, Iterable, List, Optional, Tuple - -import pathspec +from typing import TYPE_CHECKING, List, Tuple from cycode.cli.files_collector.excluder import exclude_irrelevant_files from cycode.cli.files_collector.iac.tf_content_generator import ( @@ -30,7 +28,7 @@ def _get_all_existing_files_in_directory(path: str, *, walk_with_ignore_patterns return files -def _get_relevant_files_in_path(path: str, exclude_patterns: Optional[Iterable[str]] = None) -> List[str]: +def _get_relevant_files_in_path(path: str) -> List[str]: absolute_path = get_absolute_path(path) if not os.path.isfile(absolute_path) and not os.path.isdir(absolute_path): @@ -40,11 +38,6 @@ def _get_relevant_files_in_path(path: str, exclude_patterns: Optional[Iterable[s return [absolute_path] file_paths = _get_all_existing_files_in_directory(absolute_path) - - if exclude_patterns: - path_spec = pathspec.PathSpec.from_lines(pathspec.patterns.GitWildMatchPattern, exclude_patterns) - file_paths = path_spec.match_files(file_paths, negate=True) - return [file_path for file_path in file_paths if os.path.isfile(file_path)] diff --git a/cycode/cli/files_collector/walk_ignore.py b/cycode/cli/files_collector/walk_ignore.py index 21299bbc..76d04366 100644 --- a/cycode/cli/files_collector/walk_ignore.py +++ b/cycode/cli/files_collector/walk_ignore.py @@ -1,17 +1,15 @@ import os -from collections import defaultdict from typing import Generator, Iterable, List, Tuple -import pathspec -from pathspec.util import StrPath - -from cycode.cli.utils.path_utils import get_file_content +from cycode.cli.utils.ignore_utils import IgnoreFilterManager from cycode.cyclient import logger -_SUPPORTED_IGNORE_PATTERN_FILES = {'.gitignore', '.cycodeignore'} +_SUPPORTED_IGNORE_PATTERN_FILES = { # oneday we will bring .cycodeignore or something like that + '.gitignore', +} _DEFAULT_GLOBAL_IGNORE_PATTERNS = [ - '**/.git', - '**/.cycode', + '.git', + '.cycode', ] @@ -35,44 +33,10 @@ def _collect_top_level_ignore_files(path: str) -> List[str]: return ignore_files -def _get_global_ignore_patterns(path: str) -> List[str]: - ignore_patterns = _DEFAULT_GLOBAL_IGNORE_PATTERNS.copy() - for ignore_file in _collect_top_level_ignore_files(path): - file_patterns = get_file_content(ignore_file).splitlines() - ignore_patterns.extend(file_patterns) - return ignore_patterns - - -def _should_include_path(ignore_patterns: List[str], path: StrPath) -> bool: - path_spec = pathspec.PathSpec.from_lines(pathspec.patterns.GitWildMatchPattern, ignore_patterns) - return not path_spec.match_file(path) # works with both files and directories; negative match - - def walk_ignore(path: str) -> Generator[Tuple[str, List[str], List[str]], None, None]: - global_ignore_patterns = _get_global_ignore_patterns(path) - path_to_ignore_patterns = defaultdict(list) - - for dirpath, dirnames, filenames in os.walk(path, topdown=True): - # finds and processes ignore files first to get the patterns - for filename in filenames: - filepath = os.path.join(dirpath, filename) - if filename in _SUPPORTED_IGNORE_PATTERN_FILES: - logger.debug('Apply ignore file: %s', filepath) - - parent_dir = os.path.dirname(dirpath) - if dirpath not in path_to_ignore_patterns and parent_dir in path_to_ignore_patterns: - # inherit ignore patterns from parent directory on first occurrence - logger.debug('Inherit ignore patterns: %s', {'inherit_from': parent_dir, 'inherit_to': dirpath}) - path_to_ignore_patterns[dirpath].extend(path_to_ignore_patterns[parent_dir]) - - # always read ignore patterns for the current directory - path_to_ignore_patterns[dirpath].extend(get_file_content(filepath).splitlines()) - - ignore_patterns = global_ignore_patterns + path_to_ignore_patterns.get(dirpath, []) - - # decrease recursion depth of os.walk() because of topdown=True by changing the list in-place - # slicing ([:]) is mandatory to change dict in-place! - dirnames[:] = [d for d in dirnames if _should_include_path(ignore_patterns, os.path.join(dirpath, d))] - filenames[:] = [f for f in filenames if _should_include_path(ignore_patterns, os.path.join(dirpath, f))] - - yield dirpath, dirnames, filenames + ignore_filter_manager = IgnoreFilterManager.build( + path=path, + global_ignore_file_paths=_collect_top_level_ignore_files(path), + global_patterns=_DEFAULT_GLOBAL_IGNORE_PATTERNS, + ) + yield from ignore_filter_manager.walk() diff --git a/cycode/cli/utils/ignore_utils.py b/cycode/cli/utils/ignore_utils.py index a5edccc9..329fa055 100644 --- a/cycode/cli/utils/ignore_utils.py +++ b/cycode/cli/utils/ignore_utils.py @@ -132,12 +132,11 @@ def read_ignore_patterns(f: BinaryIO) -> Iterable[bytes]: f: File-like object to read from Returns: List of patterns """ - for line in f: line = line.rstrip(b'\r\n') # Ignore blank lines, they're used for readability. - if not line: + if not line.strip(): continue if line.startswith(b'#'): @@ -397,7 +396,9 @@ def walk(self, **kwargs) -> Generator[Tuple[str, List[str], List[str]], None, No # decrease recursion depth of os.walk() by ignoring subdirectories because of topdown=True # slicing ([:]) is mandatory to change dict in-place! - dirnames[:] = [dirname for dirname in dirnames if not self.is_ignored(os.path.join(rel_dirpath, dirname, ''))] + dirnames[:] = [ + dirname for dirname in dirnames if not self.is_ignored(os.path.join(rel_dirpath, dirname, '')) + ] # remove ignored files filenames = [os.path.basename(f) for f in filenames if not self.is_ignored(os.path.join(rel_dirpath, f))] @@ -430,6 +431,13 @@ def build( if not global_patterns: global_patterns = [] + global_ignore_file_paths.extend( + [ + os.path.join('.git', 'info', 'exclude'), # relative to an input path, so within the repo + os.path.expanduser(os.path.join('~', '.config', 'git', 'ignore')), # absolute + ] + ) + if hasattr(path, '__fspath__'): path = path.__fspath__() diff --git a/poetry.lock b/poetry.lock index b4d0fc65..c97b44a9 100644 --- a/poetry.lock +++ b/poetry.lock @@ -490,17 +490,6 @@ files = [ {file = "patch-ng-1.18.1.tar.gz", hash = "sha256:52fd46ee46f6c8667692682c1fd7134edc65a2d2d084ebec1d295a6087fc0291"}, ] -[[package]] -name = "pathspec" -version = "0.12.1" -description = "Utility library for gitignore style pattern matching of file paths." -optional = false -python-versions = ">=3.8" -files = [ - {file = "pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08"}, - {file = "pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712"}, -] - [[package]] name = "pefile" version = "2024.8.26" @@ -1050,4 +1039,4 @@ type = ["pytest-mypy"] [metadata] lock-version = "2.0" python-versions = ">=3.8,<3.14" -content-hash = "b91b8db1d8946ee5f22f73cc2f2339e7969c9e3dad41a266c94f9091a1b1e33c" +content-hash = "e91a6f9b7e080cea351f9073ef333afe026df6172b95fba5477af67f15c96000" diff --git a/pyproject.toml b/pyproject.toml index 1754ed5d..42511ec8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -32,7 +32,6 @@ click = ">=8.1.0,<8.2.0" colorama = ">=0.4.3,<0.5.0" pyyaml = ">=6.0,<7.0" marshmallow = ">=3.15.0,<3.23.0" # 3.23 dropped support for Python 3.8 -pathspec = ">=0.11.1,<0.13.0" gitpython = ">=3.1.30,<3.2.0" arrow = ">=1.0.0,<1.4.0" binaryornot = ">=0.4.4,<0.5.0" diff --git a/tests/cli/files_collector/test_walk_ignore.py b/tests/cli/files_collector/test_walk_ignore.py index 87d6a9de..fd2612d5 100644 --- a/tests/cli/files_collector/test_walk_ignore.py +++ b/tests/cli/files_collector/test_walk_ignore.py @@ -4,7 +4,6 @@ from cycode.cli.files_collector.walk_ignore import ( _collect_top_level_ignore_files, - _get_global_ignore_patterns, _walk_to_top, walk_ignore, ) @@ -52,11 +51,9 @@ def _create_mocked_file_structure(fs: 'FakeFilesystem') -> None: fs.create_dir('/home/user/project/.git') fs.create_file('/home/user/project/.git/HEAD') - fs.create_file('/home/user/project/.gitignore', contents='*.pyc') + fs.create_file('/home/user/project/.gitignore', contents='*.pyc\n*.log') fs.create_file('/home/user/project/ignored.pyc') fs.create_file('/home/user/project/presented.txt') - - fs.create_file('/home/user/project/.cycodeignore', contents='*.log') fs.create_file('/home/user/project/ignored2.log') fs.create_file('/home/user/project/ignored2.pyc') fs.create_file('/home/user/project/presented2.txt') @@ -75,45 +72,27 @@ def test_collect_top_level_ignore_files(fs: 'FakeFilesystem') -> None: # Test with path inside the project path = normpath('/home/user/project/subproject') ignore_files = _collect_top_level_ignore_files(path) - - assert len(ignore_files) == 3 + assert len(ignore_files) == 2 assert normpath('/home/user/project/subproject/.gitignore') in ignore_files assert normpath('/home/user/project/.gitignore') in ignore_files - assert normpath('/home/user/project/.cycodeignore') in ignore_files - - # Test with a path that does not have any ignore files - fs.remove('/home/user/project/.gitignore') - path = normpath('/home/user') - ignore_files = _collect_top_level_ignore_files(path) - - assert len(ignore_files) == 0 # Test with path at the top level with no ignore files path = normpath('/home/user/.git') ignore_files = _collect_top_level_ignore_files(path) - assert len(ignore_files) == 0 # Test with path at the top level with a .gitignore path = normpath('/home/user/project') ignore_files = _collect_top_level_ignore_files(path) - assert len(ignore_files) == 1 - assert normpath('/home/user/project/.cycodeignore') in ignore_files - - -def test_get_global_ignore_patterns(fs: 'FakeFilesystem') -> None: - _create_mocked_file_structure(fs) - ignore_patterns = _get_global_ignore_patterns('/home/user/project/subproject') + assert normpath('/home/user/project/.gitignore') in ignore_files - assert len(ignore_patterns) == 5 - # default global: - assert '**/.git' in ignore_patterns - assert '**/.cycode' in ignore_patterns - # additional: - assert '*.txt' in ignore_patterns - assert '*.pyc' in ignore_patterns - assert '*.log' in ignore_patterns + # Test with a path that does not have any ignore files + fs.remove('/home/user/project/.gitignore') + path = normpath('/home/user') + ignore_files = _collect_top_level_ignore_files(path) + assert len(ignore_files) == 0 + fs.create_file('/home/user/project/.gitignore', contents='*.pyc\n*.log') def _collect_walk_ignore_files(path: str) -> List[str]: @@ -131,7 +110,7 @@ def test_walk_ignore(fs: 'FakeFilesystem') -> None: path = normpath('/home/user/project') result = _collect_walk_ignore_files(path) - assert len(result) == 6 + assert len(result) == 5 # ignored globally by default: assert normpath('/home/user/project/.git/HEAD') not in result assert normpath('/home/user/project/.cycode/config.yaml') not in result @@ -146,7 +125,6 @@ def test_walk_ignore(fs: 'FakeFilesystem') -> None: assert normpath('/home/user/project/subproject/ignored.log') not in result # presented after both .gitignore and .cycodeignore: assert normpath('/home/user/project/.gitignore') in result - assert normpath('/home/user/project/.cycodeignore') in result assert normpath('/home/user/project/subproject/.gitignore') in result assert normpath('/home/user/project/presented.txt') in result assert normpath('/home/user/project/presented2.txt') in result diff --git a/tests/utils/test_ignore_utils.py b/tests/utils/test_ignore_utils.py new file mode 100644 index 00000000..563c11a9 --- /dev/null +++ b/tests/utils/test_ignore_utils.py @@ -0,0 +1,176 @@ +# Copyright (C) 2017 Jelmer Vernooij +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Modified (rewritten to pytest + pyfakefs) from https://github.com/jelmer/dulwich/blob/master/tests/test_ignore.py + +import os +import re +from io import BytesIO +from typing import TYPE_CHECKING + +import pytest + +from cycode.cli.utils.ignore_utils import ( + IgnoreFilter, + IgnoreFilterManager, + Pattern, + match_pattern, + read_ignore_patterns, + translate, +) + +if TYPE_CHECKING: + from pyfakefs.fake_filesystem import FakeFilesystem + +POSITIVE_MATCH_TESTS = [ + (b'foo.c', b'*.c'), + (b'.c', b'*.c'), + (b'foo/foo.c', b'*.c'), + (b'foo/foo.c', b'foo.c'), + (b'foo.c', b'/*.c'), + (b'foo.c', b'/foo.c'), + (b'foo.c', b'foo.c'), + (b'foo.c', b'foo.[ch]'), + (b'foo/bar/bla.c', b'foo/**'), + (b'foo/bar/bla/blie.c', b'foo/**/blie.c'), + (b'foo/bar/bla.c', b'**/bla.c'), + (b'bla.c', b'**/bla.c'), + (b'foo/bar', b'foo/**/bar'), + (b'foo/bla/bar', b'foo/**/bar'), + (b'foo/bar/', b'bar/'), + (b'foo/bar/', b'bar'), + (b'foo/bar/something', b'foo/bar/*'), +] + +NEGATIVE_MATCH_TESTS = [ + (b'foo.c', b'foo.[dh]'), + (b'foo/foo.c', b'/foo.c'), + (b'foo/foo.c', b'/*.c'), + (b'foo/bar/', b'/bar/'), + (b'foo/bar/', b'foo/bar/*'), + (b'foo/bar', b'foo?bar'), +] + +TRANSLATE_TESTS = [ + (b'*.c', b'(?ms)(.*/)?[^/]*\\.c/?\\Z'), + (b'foo.c', b'(?ms)(.*/)?foo\\.c/?\\Z'), + (b'/*.c', b'(?ms)[^/]*\\.c/?\\Z'), + (b'/foo.c', b'(?ms)foo\\.c/?\\Z'), + (b'foo.c', b'(?ms)(.*/)?foo\\.c/?\\Z'), + (b'foo.[ch]', b'(?ms)(.*/)?foo\\.[ch]/?\\Z'), + (b'bar/', b'(?ms)(.*/)?bar\\/\\Z'), + (b'foo/**', b'(?ms)foo(/.*)?/?\\Z'), + (b'foo/**/blie.c', b'(?ms)foo(/.*)?\\/blie\\.c/?\\Z'), + (b'**/bla.c', b'(?ms)(.*/)?bla\\.c/?\\Z'), + (b'foo/**/bar', b'(?ms)foo(/.*)?\\/bar/?\\Z'), + (b'foo/bar/*', b'(?ms)foo\\/bar\\/[^/]+/?\\Z'), + (b'/foo\\[bar\\]', b'(?ms)foo\\[bar\\]/?\\Z'), + (b'/foo[bar]', b'(?ms)foo[bar]/?\\Z'), + (b'/foo[0-9]', b'(?ms)foo[0-9]/?\\Z'), +] + + +@pytest.mark.usefixtures('fs') +class TestIgnoreFiles: + def test_translate(self) -> None: + for pattern, regex in TRANSLATE_TESTS: + if re.escape(b'/') == b'/': + regex = regex.replace(b'\\/', b'/') + assert ( + translate(pattern) == regex + ), f'orig pattern: {pattern!r}, regex: {translate(pattern)!r}, expected: {regex!r}' + + def test_read_file(self) -> None: + f = BytesIO( + b""" +# a comment +\x20\x20 +# and an empty line: + +\\#not a comment +!negative +with trailing whitespace +with escaped trailing whitespace\\ +""" # noqa: W291 (Trailing whitespace) + ) + assert list(read_ignore_patterns(f)) == [ + b'\\#not a comment', + b'!negative', + b'with trailing whitespace', + b'with escaped trailing whitespace ', + ] + + def test_match_patterns_positive(self) -> None: + for path, pattern in POSITIVE_MATCH_TESTS: + assert match_pattern(path, pattern), f'path: {path!r}, pattern: {pattern!r}' + + def test_match_patterns_negative(self) -> None: + for path, pattern in NEGATIVE_MATCH_TESTS: + assert not match_pattern(path, pattern), f'path: {path!r}, pattern: {pattern!r}' + + def test_ignore_filter_inclusion(self) -> None: + ignore_filter = IgnoreFilter([b'a.c', b'b.c']) + assert ignore_filter.is_ignored(b'a.c') + assert ignore_filter.is_ignored(b'c.c') is None + assert list(ignore_filter.find_matching(b'a.c')) == [Pattern(b'a.c')] + assert list(ignore_filter.find_matching(b'c.c')) == [] + + def test_ignore_filter_exclusion(self) -> None: + ignore_filter = IgnoreFilter([b'a.c', b'b.c', b'!c.c']) + assert not ignore_filter.is_ignored(b'c.c') + assert ignore_filter.is_ignored(b'd.c') is None + assert list(ignore_filter.find_matching(b'c.c')) == [Pattern(b'!c.c')] + assert list(ignore_filter.find_matching(b'd.c')) == [] + + def test_ignore_filter_manager(self, fs: 'FakeFilesystem') -> None: + # Prepare sample ignore patterns + fs.create_file('/path/to/repo/.gitignore', contents=b'/foo/bar\n/dir2\n/dir3/\n') + fs.create_file('/path/to/repo/dir/.gitignore', contents=b'/blie\n') + fs.create_file('/path/to/repo/.git/info/exclude', contents=b'/excluded\n') + + m = IgnoreFilterManager.build('/path/to/repo') + + assert m.is_ignored('dir/blie') + assert m.is_ignored(os.path.join('dir', 'bloe')) is None + assert m.is_ignored('dir') is None + assert m.is_ignored(os.path.join('foo', 'bar')) + assert m.is_ignored(os.path.join('excluded')) + assert m.is_ignored(os.path.join('dir2', 'fileinignoreddir')) + assert not m.is_ignored('dir3') + assert m.is_ignored('dir3/') + assert m.is_ignored('dir3/bla') + + def test_nested_gitignores(self, fs: 'FakeFilesystem') -> None: + fs.create_file('/path/to/repo/.gitignore', contents=b'/*\n!/foo\n') + fs.create_file('/path/to/repo/foo/.gitignore', contents=b'/bar\n') + fs.create_file('/path/to/repo/foo/bar', contents=b'IGNORED') + + m = IgnoreFilterManager.build('/path/to/repo') + assert m.is_ignored('foo/bar') + + def test_load_ignore_ignore_case(self, fs: 'FakeFilesystem') -> None: + fs.create_file('/path/to/repo/.gitignore', contents=b'/foo/bar\n/dir\n') + + m = IgnoreFilterManager.build('/path/to/repo', ignore_case=True) + assert m.is_ignored(os.path.join('dir', 'blie')) + assert m.is_ignored(os.path.join('DIR', 'blie')) + + def test_ignored_contents(self, fs: 'FakeFilesystem') -> None: + fs.create_file('/path/to/repo/.gitignore', contents=b'a/*\n!a/*.txt\n') + + m = IgnoreFilterManager.build('/path/to/repo') + assert m.is_ignored('a') is None + assert m.is_ignored('a/') is None + assert not m.is_ignored('a/b.txt') + assert m.is_ignored('a/c.dat')