From 43b3a833f235358e648295053557ad955237367e Mon Sep 17 00:00:00 2001 From: Brahadeesh V <144269250+CodeforGood1@users.noreply.github.com> Date: Tue, 12 May 2026 00:31:11 +0530 Subject: [PATCH 1/2] Refine skip rules and add analyzer tests --- src/contractguard/analyzers/file_filters.py | 9 +++++++- src/contractguard/analyzers/pii_analyzer.py | 23 +++++++++++++++---- .../analyzers/secrets_analyzer.py | 16 +++++++++---- tests/test_pii_analyzer.py | 16 +++++++++++-- tests/test_secrets_analyzer.py | 10 ++++++-- 5 files changed, 61 insertions(+), 13 deletions(-) diff --git a/src/contractguard/analyzers/file_filters.py b/src/contractguard/analyzers/file_filters.py index f82de40..a60caeb 100644 --- a/src/contractguard/analyzers/file_filters.py +++ b/src/contractguard/analyzers/file_filters.py @@ -20,6 +20,13 @@ ".ruff_cache", } +_NORMALIZED_SKIP_DIRS = {part.casefold() for part in _SKIP_DIRS} + def should_skip_path(path: Path) -> bool: - return any(part in _SKIP_DIRS for part in path.parts) + parts = path.parts + if path.exists() and path.is_file(): + parts = path.parent.parts + elif path.suffix: + parts = path.parent.parts + return any(part.casefold() in _NORMALIZED_SKIP_DIRS for part in parts) diff --git a/src/contractguard/analyzers/pii_analyzer.py b/src/contractguard/analyzers/pii_analyzer.py index 7623ef9..ec22b87 100644 --- a/src/contractguard/analyzers/pii_analyzer.py +++ b/src/contractguard/analyzers/pii_analyzer.py @@ -11,6 +11,7 @@ import ipaddress import json import re +import os from pathlib import Path from typing import Any @@ -109,10 +110,17 @@ def load_files(path: str | Path) -> list[tuple[str, str]]: _skip = {".pyc", ".exe", ".dll", ".png", ".jpg", ".gif", ".zip", ".tar", ".gz"} if path.is_dir(): - for f in sorted(path.rglob("*")): - if f.is_file() and f.suffix.lower() not in _skip and not should_skip_path(f): + for root, dirnames, filenames in os.walk(path): + root_path = Path(root) + dirnames[:] = [ + name for name in dirnames if not should_skip_path(root_path / name) + ] + for name in sorted(filenames): + file_path = root_path / name + if file_path.suffix.lower() in _skip or should_skip_path(file_path): + continue try: - files.append((str(f), f.read_text(encoding="utf-8", errors="replace"))) + files.append((str(file_path), file_path.read_text(encoding="utf-8", errors="replace"))) except Exception: continue elif path.is_file(): @@ -130,7 +138,14 @@ def _is_non_personal_ip(value: str) -> bool: ip_value = ipaddress.ip_address(value) except ValueError: return False - return ip_value.is_loopback or ip_value.is_unspecified or ip_value.is_reserved + return ( + ip_value.is_loopback + or ip_value.is_unspecified + or ip_value.is_reserved + or ip_value.is_private + or ip_value.is_link_local + or ip_value.is_multicast + ) def analyze(path: str | Path, rules_dir: str | Path) -> list[Finding]: diff --git a/src/contractguard/analyzers/secrets_analyzer.py b/src/contractguard/analyzers/secrets_analyzer.py index 60eb4b4..bb54d47 100644 --- a/src/contractguard/analyzers/secrets_analyzer.py +++ b/src/contractguard/analyzers/secrets_analyzer.py @@ -7,6 +7,7 @@ from __future__ import annotations import re +import os from pathlib import Path from typing import Any @@ -115,11 +116,18 @@ def load_files(path: str | Path) -> list[tuple[str, str]]: files: list[tuple[str, str]] = [] if path.is_dir(): - for f in sorted(path.rglob("*")): - if f.is_file() and f.suffix.lower() not in _SKIP_EXTENSIONS and not should_skip_path(f): + for root, dirnames, filenames in os.walk(path): + root_path = Path(root) + dirnames[:] = [ + name for name in dirnames if not should_skip_path(root_path / name) + ] + for name in sorted(filenames): + file_path = root_path / name + if file_path.suffix.lower() in _SKIP_EXTENSIONS or should_skip_path(file_path): + continue try: - content = f.read_text(encoding="utf-8", errors="replace") - files.append((str(f), content)) + content = file_path.read_text(encoding="utf-8", errors="replace") + files.append((str(file_path), content)) except Exception: continue elif path.is_file(): diff --git a/tests/test_pii_analyzer.py b/tests/test_pii_analyzer.py index 9b00d18..f279d69 100644 --- a/tests/test_pii_analyzer.py +++ b/tests/test_pii_analyzer.py @@ -3,8 +3,6 @@ from pathlib import Path import tempfile -import pytest - from contractguard.analyzers.pii_analyzer import analyze, extract_facts from contractguard.engine import Severity @@ -50,6 +48,12 @@ def test_clean_content_no_pii(self): assert facts["has_ssn"] is False assert facts["has_credit_card"] is False + def test_suppresses_non_personal_ips(self): + content = "bind 127.0.0.1\nlisten 0.0.0.0\nprivate 10.0.0.1\n" + facts = extract_facts(content) + assert facts["has_ip_address"] is False + assert facts["pii_count"] == 0 + def test_redacted_preview(self): content = '{"ssn": "123-45-6789"}' facts = extract_facts(content) @@ -90,3 +94,11 @@ def test_findings_have_compliance_info(self): assert any(f.attack_vector and ("GDPR" in f.attack_vector or "identity" in f.attack_vector.lower()) for f in findings) finally: path.unlink(missing_ok=True) + + def test_skips_vendor_directories(self, tmp_path): + skipped_dir = tmp_path / "node_modules" + skipped_dir.mkdir() + (skipped_dir / "pii.txt").write_text("ssn: 123-45-6789\n") + (tmp_path / "safe.txt").write_text("No personal info here.\n") + findings = analyze(tmp_path, RULES_DIR) + assert all("node_modules" not in f.location for f in findings) diff --git a/tests/test_secrets_analyzer.py b/tests/test_secrets_analyzer.py index 172c9a2..e1a0fd3 100644 --- a/tests/test_secrets_analyzer.py +++ b/tests/test_secrets_analyzer.py @@ -3,8 +3,6 @@ from pathlib import Path import tempfile -import pytest - from contractguard.analyzers.secrets_analyzer import analyze, extract_facts from contractguard.engine import Severity @@ -118,3 +116,11 @@ def test_findings_have_cwe(self): assert any(f.cwe for f in findings) finally: path.unlink(missing_ok=True) + + def test_skips_vendor_directories(self, tmp_path): + skipped_dir = tmp_path / "node_modules" + skipped_dir.mkdir() + (skipped_dir / "secret.env").write_text("DB_PASSWORD=admin123\n") + (tmp_path / "safe.txt").write_text("Nothing here\n") + findings = analyze(tmp_path, RULES_DIR) + assert all("node_modules" not in f.location for f in findings) From 6187cdc725df414389eb915b6928ea5345a02550 Mon Sep 17 00:00:00 2001 From: Brahadeesh V <144269250+CodeforGood1@users.noreply.github.com> Date: Tue, 12 May 2026 00:33:08 +0530 Subject: [PATCH 2/2] Bump version to 1.3.1 --- package.json | 4 ++-- pyproject.toml | 2 +- src/contractguard/__init__.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/package.json b/package.json index 66b32e5..291d226 100644 --- a/package.json +++ b/package.json @@ -2,7 +2,7 @@ "name": "contract-guard", "displayName": "contract-guard", "description": "A VS Code extension that finds security issues in code, configs, queries, Dockerfiles, and secrets.", - "version": "1.3.0", + "version": "1.3.1", "publisher": "BlackplaneSystems", "license": "Apache-2.0", "icon": "media/icon.png", @@ -164,7 +164,7 @@ }, "scripts": { "build": "tsc -p ./tsconfig.json", - "package": "node -e \"require('fs').mkdirSync('dist-vsix',{recursive:true})\" && vsce package --out dist-vsix/contractguard-1.3.0.vsix", + "package": "node -e \"require('fs').mkdirSync('dist-vsix',{recursive:true})\" && vsce package --out dist-vsix/contractguard-1.3.1.vsix", "prepackage": "npm run build" }, "devDependencies": { diff --git a/pyproject.toml b/pyproject.toml index b1d5504..228404a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "contractguard" -version = "1.3.0" +version = "1.3.1" description = "ContractGuard security analysis core for VS Code and CI workflows." readme = "README.md" license = {text = "Apache-2.0"} diff --git a/src/contractguard/__init__.py b/src/contractguard/__init__.py index 842e375..57b9bbc 100644 --- a/src/contractguard/__init__.py +++ b/src/contractguard/__init__.py @@ -1,3 +1,3 @@ """ContractGuard core package.""" -__version__ = "1.3.0" +__version__ = "1.3.1"