From 792d351c04375b3af219924bda52bbc56c976b24 Mon Sep 17 00:00:00 2001 From: John Morrissey Date: Fri, 5 Jun 2026 21:53:24 +1000 Subject: [PATCH] fix(scanner): reject spoofed builtin trust decorators Close a HIGH-severity false-green. The decorator taint provider trusted any FQN whose prefix was a builtin marker module and whose final segment was a known marker name, without verifying it resolved to Wardline's real package. A scanned project shipping its own `wardline/decorators` (or `loom_markers`) no-op could spoof `@trusted`/`@trust_boundary`, launder untrusted data up to a trusted tier, and suppress real taint->sink findings. - Builtin markers now match ONLY exact exports (`P.`, `P.trust.`); nested-path spoofs are rejected. Custom grammar markers keep prefix+name. - Fail closed for builtin markers under any marker root the scanned project shadows; shadow roots are derived dynamically from BUILTIN_BOUNDARY_TYPES (`wardline` AND `loom_markers`), not hardcoded. - Thread a per-root shadow-aware provider fingerprint through the parse stage into both dirty-detection and the resolver summary cache, so a TRUSTED summary computed under a non-shadowed root cannot be reused under a shadowed one (cross-root cache poisoning). - Document the Clarion fact-key residual: the content-hash freshness gate is a cross-tool contract Clarion independently recomputes, so a Wardline-private shadow bit cannot be folded in without a Clarion read-path change. Opt-in (`--clarion-url`) path, not the scan gate. Reimplements and generalizes the fix proposed in #26 (which closed only the `wardline` root and now conflicts with main). Full suite green (2390 passed), ruff + mypy clean, self-scan unchanged (0 new defects). Co-Authored-By: Claude Opus 4.8 (1M context) --- CHANGELOG.md | 31 ++++++ src/wardline/clarion/facts.py | 13 +++ src/wardline/scanner/analyzer.py | 9 +- src/wardline/scanner/pipeline.py | 39 +++++++- .../scanner/taint/decorator_provider.py | 90 +++++++++++++++-- src/wardline/scanner/taint/provider.py | 10 +- .../scanner/taint/test_decorator_provider.py | 97 ++++++++++++++++++- tests/unit/scanner/test_pipeline.py | 79 +++++++++++++++ 8 files changed, 349 insertions(+), 19 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 110daae7..e3e21383 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -55,6 +55,37 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 CLI verb shares the same filter core. (WS-B1, WS-B2) ### Security +- **Builtin trust-marker decorators are now trusted only when they resolve to the + real exports — closes a spoofable false-green.** The default decorator seeding + trusted ANY FQN whose prefix was a builtin marker module and whose final segment + was a known marker name, without verifying the decorator resolved to Wardline's + real package. A scanned project could ship its own `wardline/decorators/__init__.py` + (or `loom_markers/__init__.py`) defining a no-op `trusted`/`trust_boundary`, apply + it to a leaky function, and have the analyzer anchor it as TRUSTED — suppressing + real taint→sink flows (a false GREEN that hides defects). Nested spoof paths + (`wardline.decorators.evil.trusted`, `loom_markers.evil.trusted`) were also accepted. + Builtin markers now match ONLY their exact public re-export (`P.`) or + implementation-module export (`P.trust.`), and the provider FAILS CLOSED for a + builtin marker root the scanned project shadows (defines its own top-level `wardline` + / `loom_markers` package). The shadowed-root set is derived dynamically from the + grammar (`{bt.module_prefix.split('.')[0] for bt in BUILTIN_BOUNDARY_TYPES if + bt.builtin}`), so every builtin marker root is covered, not just `wardline`. Custom + (non-builtin) grammar markers keep the documented prefix + canonical-name behavior — + a project defining its own custom marker package is the intended extension use. + **Cache-key hardening:** the per-root shadow state is folded into a shadow-aware + provider fingerprint threaded through BOTH the pipeline dirty-detection key and the + resolver's summary cache, so a TRUSTED summary computed under one shadow state can + never be reused under another (cross-root cache poisoning). The fingerprint stays + byte-identical to today's value when nothing is shadowed. **Clarion residual + (documented, not threaded):** the opt-in `--clarion-url` taint-fact + `content_hash_at_compute` is whole-file raw-byte blake3 only — it cannot observe + shadow state, so identical file bytes scanned once unshadowed then under a shadow + could serve a stale TRUSTED fact via the MCP `explain_taint` / Clarion read path. The + shadow bit is deliberately NOT mixed into this hash because it is a cross-tool + contract value Clarion's read path independently recomputes and compares; mixing in a + Wardline-private bit would break fact reconciliation entirely. Closing it fully needs + a Clarion read-path contract change; the keying site carries an explicit comment. This + path is opt-in and not the scan gate, so impact is lower. - **Dangerous-sink rules now see lambda bodies (closes a false-green).** `_own_calls` treated `ast.Lambda` as a separate scope and only inspected lambda *default* expressions, so a sink reached inside a lambda *body* — `cb = lambda: eval(src)`, diff --git a/src/wardline/clarion/facts.py b/src/wardline/clarion/facts.py index 49a24a26..d115f1f5 100644 --- a/src/wardline/clarion/facts.py +++ b/src/wardline/clarion/facts.py @@ -78,6 +78,19 @@ def build_taint_facts(result: ScanResult, root: Path) -> list[dict[str, Any]]: rel_path = entity.location.path if rel_path not in hash_cache: hash_cache[rel_path] = blake3.blake3(_read_bytes(root / rel_path)).hexdigest() + # RESIDUAL (builtin-marker shadow false-green): ``content_hash_at_compute`` + # is whole-file raw-byte blake3 ONLY — it cannot observe the shadow state of + # a builtin marker root. So identical file bytes scanned once UNSHADOWED then + # under a project that shadows ``wardline``/``loom_markers`` could serve a + # stale TRUSTED fact via the MCP explain_taint / Clarion read path. We do NOT + # fold the shadow bit / provider fingerprint into this hash: it is a + # CROSS-TOOL contract value — Clarion's read path INDEPENDENTLY recomputes + # the whole-file blake3 (clarion_storage::current_file_hash) and compares it + # against the in-blob copy. Mixing in a Wardline-private bit would make every + # comparison mismatch and break fact reconciliation entirely; there is no + # separate Wardline-owned compute-key the freshness gate consults. Closing + # this fully needs a Clarion read-path contract change. Lower impact: this + # path is opt-in (--clarion-url) and not the scan gate. See CHANGELOG. content_hash = hash_cache[rel_path] declared = context.project_return_taints.get(qualname) diff --git a/src/wardline/scanner/analyzer.py b/src/wardline/scanner/analyzer.py index 021106b1..47c2db31 100644 --- a/src/wardline/scanner/analyzer.py +++ b/src/wardline/scanner/analyzer.py @@ -128,10 +128,15 @@ def _analyze_inner(self, files: Sequence[Path], config: WardlineConfig, *, root: parse_findings = list(parse_stage.parse_findings) dirty_modules = set(parse_stage.dirty_modules) + # Use the SHADOW-AWARE provider fingerprint computed during the parse stage + # for BOTH the dirty-detection key (above, inside the parse stage) AND the + # resolver's summary cache here. They MUST agree, or a summary computed under + # a non-shadowed root could be served when re-scanning a shadowed one + # (cross-root cache poisoning → a spoofed-trust false GREEN survives). if self._cache is not None: result = resolve_project_taints( modules=modules, - provider_fingerprint=self._provider.fingerprint(), + provider_fingerprint=parse_stage.provider_fingerprint, summary_cache=self._cache, dirty_modules=frozenset(dirty_modules), config=config, @@ -139,7 +144,7 @@ def _analyze_inner(self, files: Sequence[Path], config: WardlineConfig, *, root: else: result = resolve_project_taints( modules=modules, - provider_fingerprint=self._provider.fingerprint(), + provider_fingerprint=parse_stage.provider_fingerprint, config=config, ) diff --git a/src/wardline/scanner/pipeline.py b/src/wardline/scanner/pipeline.py index 5d2fb46b..0e165144 100644 --- a/src/wardline/scanner/pipeline.py +++ b/src/wardline/scanner/pipeline.py @@ -7,7 +7,7 @@ from collections.abc import Sequence from dataclasses import dataclass from pathlib import Path -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Any, cast from wardline.core.finding import Finding, Kind, Location, Severity from wardline.core.qualname import module_dotted_name @@ -59,6 +59,23 @@ class ParseProjectOutput: files: list[ParsedFile] parse_findings: list[Finding] dirty_modules: frozenset[str] + provider_fingerprint: str + + +def _provider_fingerprint_for_project(provider: TaintSourceProvider, project_modules: frozenset[str]) -> str: + """Project-aware provider fingerprint, falling back to the bare one. + + A provider may expose ``fingerprint_for_project(project_modules)`` to fold + project-shadow state (which builtin marker roots the scan shadows) into the + summary-cache key — preventing a warm cache from serving a TRUSTED summary + computed under a non-shadowed root when re-scanning a shadowed one. Providers + that do not (the trivial default) fall back to the plain ``fingerprint()``. + """ + project_fingerprint = getattr(provider, "fingerprint_for_project", None) + if callable(project_fingerprint): + typed_project_fingerprint = cast(Any, project_fingerprint) + return str(typed_project_fingerprint(project_modules)) + return provider.fingerprint() def run_parse_project_stage(stage_input: ParseProjectInput) -> ParseProjectOutput: @@ -69,6 +86,22 @@ def run_parse_project_stage(stage_input: ParseProjectInput) -> ParseProjectOutpu dirty_modules: set[str] = set() root = stage_input.root.resolve() + # The set of dotted module names in the scan. Used to fail closed for builtin + # markers when the project shadows a builtin marker root, AND to compute the + # shadow-aware provider fingerprint threaded into BOTH the dirty-detection key + # below and the resolver's summary cache (see analyzer.py). + project_modules = frozenset( + module + for path in stage_input.files + if ( + module := module_dotted_name( + path.relative_to(root).as_posix() if path.is_relative_to(root) else path.as_posix() + ) + ) + is not None + ) + provider_fingerprint = _provider_fingerprint_for_project(stage_input.provider, project_modules) + for path in stage_input.files: relpath = path.relative_to(root).as_posix() if path.is_relative_to(root) else path.as_posix() module = module_dotted_name(relpath) @@ -90,7 +123,6 @@ def run_parse_project_stage(stage_input: ParseProjectInput) -> ParseProjectOutpu source = path.read_text(encoding="utf-8") source_bytes = source.encode("utf-8") - provider_fingerprint = stage_input.provider.fingerprint() from wardline.scanner.taint.project_resolver import _RESOLVER_VERSION from wardline.scanner.taint.summary import SUMMARY_SCHEMA_VERSION, compute_cache_key @@ -116,7 +148,7 @@ def run_parse_project_stage(stage_input: ParseProjectInput) -> ParseProjectOutpu ) seeds = seed_function_taints( entities, - ctx=SeedContext(module=module, alias_map=alias_map), + ctx=SeedContext(module=module, alias_map=alias_map, project_modules=project_modules), provider=stage_input.provider, ) for ent in entities: @@ -205,6 +237,7 @@ def run_parse_project_stage(stage_input: ParseProjectInput) -> ParseProjectOutpu files=parsed_files, parse_findings=parse_findings, dirty_modules=frozenset(dirty_modules), + provider_fingerprint=provider_fingerprint, ) diff --git a/src/wardline/scanner/taint/decorator_provider.py b/src/wardline/scanner/taint/decorator_provider.py index 43568985..d9d08288 100644 --- a/src/wardline/scanner/taint/decorator_provider.py +++ b/src/wardline/scanner/taint/decorator_provider.py @@ -31,6 +31,15 @@ _LOOM_MARKERS_PREFIX = "loom_markers" _TAINTSTATE_FQN = "wardline.core.taints.TaintState" +# The top-level import roots of every BUILTIN marker module — derived dynamically +# from the grammar so adding a builtin marker root (e.g. a future ``loom_markers`` +# sibling) automatically participates in shadow fail-closed + exact-export matching. +# A ``loom_markers`` boundary type has module_prefix ``loom_markers`` (root +# ``loom_markers``); a ``wardline.decorators`` one has root ``wardline``. +_BUILTIN_MARKER_ROOTS: frozenset[str] = frozenset( + bt.module_prefix.split(".")[0] for bt in BUILTIN_BOUNDARY_TYPES if getattr(bt, "builtin", False) +) + def vocabulary_star_exports() -> dict[str, dict[str, str]]: """Statically-known star-export map for builtin trust-marker modules. @@ -84,6 +93,38 @@ def _resolve_decorator_fqn(deco: ast.expr, alias_map: Mapping[str, str]) -> str return _resolve_dotted_fqn(func, alias_map) +def _shadowed_builtin_roots(project_modules: frozenset[str]) -> frozenset[str]: + """Return the builtin marker roots the scanned project SHADOWS. + + Builtin marker declarations must refer to the installed marker package, not a + module supplied by the scanned project. A root is shadowed when the project + itself defines a TOP-LEVEL module/package equal to that root (e.g. its own + ``wardline`` or ``loom_markers`` package): Python import resolution can then + bind ``wardline.decorators`` / ``loom_markers`` to attacker-controlled code, so + builtin matching fails closed for markers under that root. + + Only the FIRST dotted component is compared, so an unrelated nested module such + as ``app.wardline_helper`` or ``myloom.wardline`` does NOT trip a shadow. + """ + project_roots = {module.split(".", 1)[0] for module in project_modules} + return frozenset(project_roots & _BUILTIN_MARKER_ROOTS) + + +def _is_builtin_decorator_fqn(fqn: str, canonical_name: str, module_prefix: str) -> bool: + """Return whether *fqn* is one of the exact builtin decorator exports. + + For a builtin boundary type with prefix ``P``, only the public re-export + ``P.`` and the implementation-module export ``P.trust.`` are + accepted (mirroring ``wardline/decorators/__init__.py`` and + ``wardline/decorators/trust.py``). Prefix + arbitrary-nested + final-segment + paths (e.g. ``wardline.decorators.evil.trusted``) are rejected for builtins. + """ + return fqn in { + f"{module_prefix}.{canonical_name}", + f"{module_prefix}.trust.{canonical_name}", + } + + def _level_token(value: ast.expr, alias_map: Mapping[str, str]) -> str | None: """Extract a TaintState name token from a keyword-argument value node. @@ -184,8 +225,9 @@ def __init__(self, *, boundary_types: tuple[BoundaryType, ...] | None = None) -> def taint_for(self, entity: Entity, ctx: SeedContext) -> SeedResult: candidates: list[FunctionTaint] = [] unprovable: list[str] = [] + shadowed_roots = _shadowed_builtin_roots(ctx.project_modules) for deco in entity.node.decorator_list: - ft, unprov = self._match(deco, ctx.alias_map) + ft, unprov = self._match(deco, ctx.alias_map, shadowed_roots) if ft is not None: candidates.append(ft) elif unprov is not None: @@ -219,7 +261,30 @@ def fingerprint(self) -> str: return f"decorator-vocab:{REGISTRY_VERSION}" return f"decorator-vocab:{REGISTRY_VERSION}+grammar:{_grammar_digest(self._boundary_types)}" - def _match(self, deco: ast.expr, alias_map: Mapping[str, str]) -> tuple[FunctionTaint | None, str | None]: + def fingerprint_for_project(self, project_modules: frozenset[str]) -> str: + """Fingerprint declaration inputs that are external to a single module. + + Builtin seeding depends on WHICH builtin marker roots the scanned project + shadows; bind the EXACT shadowed-root SET into the summary-cache key so a + warm cache cannot reuse a TRUSTED summary across scans with different + shadow states (cross-root cache poisoning). Crucially this is per-root: a + scan that shadows only ``wardline`` and one that shadows only + ``loom_markers`` must NOT collide on the cache key. When nothing is + shadowed (the common case), returns the bare :meth:`fingerprint` string, + preserving today's exact cache/baseline-stable value. + """ + shadowed = _shadowed_builtin_roots(project_modules) + base = self.fingerprint() + if not shadowed: + return base + return f"{base}:shadowed-roots={','.join(sorted(shadowed))}" + + def _match( + self, + deco: ast.expr, + alias_map: Mapping[str, str], + shadowed_roots: frozenset[str], + ) -> tuple[FunctionTaint | None, str | None]: """Match one decorator against the loaded boundary types. Returns: ``(seed, None)`` — a boundary type matched and its levels proved; @@ -231,15 +296,22 @@ def _match(self, deco: ast.expr, alias_map: Mapping[str, str]) -> tuple[Function fqn = _resolve_decorator_fqn(deco, alias_map) if fqn is None: return None, None - # A decorator matches a boundary type when its FQN is UNDER the type's module - # prefix and its final segment is the canonical name. This accepts BOTH the - # package re-export (``wardline.decorators.trusted``) and the submodule path - # (``wardline.decorators.trust.trusted``) — preserving the pre-Track-2 matcher - # exactly (it used the same prefix + last-segment rule), and generalizing it - # consistently for custom types. + # Builtin markers are security-sensitive defaults: a scanned project could + # ship its own ``wardline/decorators`` (or ``loom_markers``) no-op shadowing + # the real package, spoof @trusted, and suppress real taint→sink flows (a + # false GREEN). So a builtin matches ONLY an EXACT known export + # (``P.`` or ``P.trust.``), and is rejected entirely when its + # marker ROOT is shadowed by a project-local top-level module. Custom + # (non-builtin) grammar markers keep the documented prefix + canonical-name + # rule — a project defining its OWN custom marker package is the intended + # extension use, and its root is not a builtin we ship. last = fqn.rsplit(".", 1)[-1] for bt in self._boundary_types: - if last != bt.canonical_name or not fqn.startswith(bt.module_prefix + "."): + if bt.builtin: + root = bt.module_prefix.split(".")[0] + if root in shadowed_roots or not _is_builtin_decorator_fqn(fqn, bt.canonical_name, bt.module_prefix): + continue + elif last != bt.canonical_name or not fqn.startswith(bt.module_prefix + "."): continue levels: dict[str, TaintState] = {} unreadable = False diff --git a/src/wardline/scanner/taint/provider.py b/src/wardline/scanner/taint/provider.py index 0ee43a63..ab16a58b 100644 --- a/src/wardline/scanner/taint/provider.py +++ b/src/wardline/scanner/taint/provider.py @@ -30,13 +30,17 @@ class SeedContext: ``alias_map`` is the file's ``{local_name: fully_qualified_name}`` import map (from ``build_import_alias_map``); a provider uses it to resolve aliased - decorator names against the trust vocabulary. Defaults to empty so callers - that do not seed from decorators (e.g. the trivial default provider's tests) - need not supply it. + decorator names against the trust vocabulary. ``project_modules`` is the set of + dotted module names discovered in the scanned project; a provider uses it to + fail closed for BUILTIN markers when the project shadows a builtin marker root + (e.g. ships its own ``wardline``/``loom_markers`` package). Both default to + empty so callers that do not seed from decorators (e.g. the trivial default + provider's tests) need not supply them. """ module: str alias_map: Mapping[str, str] = field(default_factory=dict) + project_modules: frozenset[str] = field(default_factory=frozenset) @dataclass(frozen=True, slots=True) diff --git a/tests/unit/scanner/taint/test_decorator_provider.py b/tests/unit/scanner/taint/test_decorator_provider.py index da990a7b..3acb26b5 100644 --- a/tests/unit/scanner/taint/test_decorator_provider.py +++ b/tests/unit/scanner/taint/test_decorator_provider.py @@ -11,12 +11,17 @@ from wardline.scanner.taint.provider import FunctionTaint, SeedContext -def _seed(src: str, *, module: str = "m") -> dict[str, FunctionTaint | None]: +def _seed( + src: str, + *, + module: str = "m", + project_modules: frozenset[str] = frozenset(), +) -> dict[str, FunctionTaint | None]: """Run the provider over every function entity in *src*; map qualname -> result.""" tree = ast.parse(src) alias_map = build_import_alias_map(tree, module_path=module) entities = discover_file_entities(tree, module=module, path="m.py") - ctx = SeedContext(module=module, alias_map=alias_map) + ctx = SeedContext(module=module, alias_map=alias_map, project_modules=project_modules) provider = DecoratorTaintSourceProvider() # .taint: assertions here compare the declared FunctionTaint; the unprovable- # boundary signal (Track 2 T2.4) is exercised separately in tests/grammar/. @@ -272,3 +277,91 @@ def test_wardline_prefixed_but_unknown_decorator_is_no_opinion() -> None: # (``wardline.decorators.bogus``) — canonical not in REGISTRY -> no opinion. out = _seed("import wardline.decorators\n@wardline.decorators.bogus\ndef f():\n return 1\n") assert out["m.f"] is None + + +# ── Security: builtin marker decorators must resolve to the REAL exports only. +# A scanned project shipping a no-op ``trusted``/``trust_boundary`` under a builtin +# marker root (``wardline``/``loom_markers``) must NOT anchor trust — that would +# suppress real taint→sink flows (false GREEN). ── + + +def test_builtin_decorator_requires_exact_known_export() -> None: + # Nested-path spoof: prefix + final-segment matching would accept this. Builtins + # must be EXACT public/implementation exports, so a wardline.decorators.evil.trusted + # path is rejected → no opinion (the @trusted spoof does not anchor trust). + out = _seed("from wardline.decorators import evil\n@evil.trusted\ndef f():\n return 1\n") + assert out["m.f"] is None + + +def test_loom_markers_nested_path_spoof_rejected() -> None: + # Same nested-path spoof under the loom_markers root. + out = _seed("from loom_markers import evil\n@evil.trusted\ndef f():\n return 1\n") + assert out["m.f"] is None + + +def test_builtin_decorator_fails_closed_when_project_shadows_wardline() -> None: + # A scanned project that defines its own ``wardline.decorators`` controls what + # this import means at runtime, so the default provider must NOT anchor it as the + # real marker package — fail closed (no opinion) for the spoofed @trusted. + out = _seed( + "from wardline.decorators import trusted\n@trusted\ndef f():\n return 1\n", + project_modules=frozenset({"app", "wardline", "wardline.decorators"}), + ) + assert out["m.f"] is None + + +def test_builtin_decorator_fails_closed_when_project_shadows_loom_markers() -> None: + # GENERALIZATION (the gap the codex PR left open): a project shadowing + # ``loom_markers`` must also fail closed for loom_markers builtin markers. + out = _seed( + "from loom_markers import trusted\n@trusted\ndef f():\n return 1\n", + project_modules=frozenset({"app", "loom_markers"}), + ) + assert out["m.f"] is None + + +def test_shadowing_one_root_does_not_disable_the_other() -> None: + # Per-root, not a global bool: shadowing ``wardline`` must NOT stop a legitimate + # ``loom_markers`` builtin marker from anchoring (and vice versa). + out = _seed( + "from loom_markers import trusted\n@trusted\ndef f():\n return 1\n", + project_modules=frozenset({"app", "wardline"}), + ) + assert out["m.f"] == FunctionTaint(T.INTEGRAL, T.INTEGRAL) + + +def test_builtin_decorator_accepts_implementation_module_export() -> None: + # Legit impl-module form: ``from wardline.decorators.trust import trusted`` still + # anchors (one of the two exact accepted exports). + out = _seed("from wardline.decorators.trust import trusted\n@trusted\ndef f():\n return 1\n") + assert out["m.f"] == FunctionTaint(T.INTEGRAL, T.INTEGRAL) + + +def test_builtin_decorator_accepts_loom_markers_implementation_module_export() -> None: + out = _seed("from loom_markers.trust import trusted\n@trusted\ndef f():\n return 1\n") + assert out["m.f"] == FunctionTaint(T.INTEGRAL, T.INTEGRAL) + + +def test_unrelated_nested_module_does_not_trip_shadow() -> None: + # Scoping: only TOP-LEVEL module names trigger a shadow. ``app.wardline_helper`` + # and ``myloom.wardline`` are NOT the builtin roots, so a legit @trusted anchors. + out = _seed( + "from wardline.decorators import trusted\n@trusted\ndef f():\n return 1\n", + project_modules=frozenset({"app.wardline_helper", "myloom.wardline"}), + ) + assert out["m.f"] == FunctionTaint(T.INTEGRAL, T.INTEGRAL) + + +def test_fingerprint_for_project_differs_between_shadowed_and_unshadowed() -> None: + # Cache-key hardening: the project-aware fingerprint must differ across shadow + # states (so a TRUSTED summary is never reused across them), and per-root so the + # two roots do not collide. Unshadowed returns the bare (stability-preserving) value. + provider = DecoratorTaintSourceProvider() + bare = provider.fingerprint() + unshadowed = provider.fingerprint_for_project(frozenset({"app"})) + shadow_wardline = provider.fingerprint_for_project(frozenset({"app", "wardline"})) + shadow_loom = provider.fingerprint_for_project(frozenset({"app", "loom_markers"})) + assert unshadowed == bare + assert shadow_wardline != bare + assert shadow_loom != bare + assert shadow_wardline != shadow_loom # per-root, not a single bool diff --git a/tests/unit/scanner/test_pipeline.py b/tests/unit/scanner/test_pipeline.py index 09d6268d..52d892ea 100644 --- a/tests/unit/scanner/test_pipeline.py +++ b/tests/unit/scanner/test_pipeline.py @@ -59,3 +59,82 @@ def test_parse_project_stage_returns_typed_modules_and_dirty_scope(tmp_path) -> assert result.files[0].relpath == "m.py" assert result.files[0].module == "m" assert result.files[0].entities[0].qualname == "m.read_raw" + + +def _shadow_project(tmp_path, root: str): # noqa: ANN001, ANN202 + """Write an app that spoofs ``@trusted`` from a project-local shadow of *root*.""" + app = tmp_path / "app.py" + app.write_text( + f"from {root} import trusted\n@trusted\ndef unsafe(p):\n return p\n", + encoding="utf-8", + ) + shadow_pkg = tmp_path / root / "decorators" if root == "wardline" else tmp_path / root + shadow_pkg.mkdir(parents=True) + if root == "wardline": + (tmp_path / "wardline" / "__init__.py").write_text("", encoding="utf-8") + files = (app, tmp_path / "wardline" / "__init__.py", shadow_pkg / "__init__.py") + else: + files = (app, shadow_pkg / "__init__.py") + (shadow_pkg / "__init__.py").write_text("def trusted(fn):\n return fn\n", encoding="utf-8") + return app, files + + +def test_parse_project_stage_fails_closed_for_shadowed_wardline_decorators(tmp_path) -> None: + _app, files = _shadow_project(tmp_path, "wardline") + result = run_parse_project_stage( + ParseProjectInput( + files=files, + root=tmp_path, + provider=DecoratorTaintSourceProvider(), + config=WardlineConfig(), + star_exports=vocabulary_star_exports(), + ) + ) + app_module = next(m for m in result.modules if m.module_path == "app") + seed = app_module.seeds["app.unsafe"] + assert seed.source == "default" + assert seed.body_taint == T.UNKNOWN_RAW + assert "shadowed-roots=" in result.provider_fingerprint + assert "wardline" in result.provider_fingerprint + + +def test_parse_project_stage_fails_closed_for_shadowed_loom_markers(tmp_path) -> None: + # The generalization the codex PR left open: shadowing ``loom_markers`` must also + # fail closed and the shadow bit must reach the provider fingerprint. + _app, files = _shadow_project(tmp_path, "loom_markers") + result = run_parse_project_stage( + ParseProjectInput( + files=files, + root=tmp_path, + provider=DecoratorTaintSourceProvider(), + config=WardlineConfig(), + star_exports=vocabulary_star_exports(), + ) + ) + app_module = next(m for m in result.modules if m.module_path == "app") + seed = app_module.seeds["app.unsafe"] + assert seed.source == "default" + assert seed.body_taint == T.UNKNOWN_RAW + assert "loom_markers" in result.provider_fingerprint + + +def test_parse_project_stage_unshadowed_fingerprint_is_bare(tmp_path) -> None: + # No shadow → today's exact (cache/baseline-stable) fingerprint, no suffix. + path = tmp_path / "m.py" + path.write_text( + "from wardline.decorators import trusted\n@trusted\ndef f(p):\n return p\n", + encoding="utf-8", + ) + result = run_parse_project_stage( + ParseProjectInput( + files=(path,), + root=tmp_path, + provider=DecoratorTaintSourceProvider(), + config=WardlineConfig(), + star_exports=vocabulary_star_exports(), + ) + ) + assert "shadowed-roots=" not in result.provider_fingerprint + assert result.provider_fingerprint == DecoratorTaintSourceProvider().fingerprint() + seed = result.modules[0].seeds["m.f"] + assert seed.body_taint == T.INTEGRAL