-
Notifications
You must be signed in to change notification settings - Fork 0
fix(scanner): reject spoofed builtin trust decorators (supersedes #26) #27
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -31,6 +31,15 @@ | |
| _LOOM_MARKERS_PREFIX = "loom_markers" | ||
| _TAINTSTATE_FQN = "wardline.core.taints.TaintState" | ||
|
|
||
| # The top-level import roots of every BUILTIN marker module — derived dynamically | ||
| # from the grammar so adding a builtin marker root (e.g. a future ``loom_markers`` | ||
| # sibling) automatically participates in shadow fail-closed + exact-export matching. | ||
| # A ``loom_markers`` boundary type has module_prefix ``loom_markers`` (root | ||
| # ``loom_markers``); a ``wardline.decorators`` one has root ``wardline``. | ||
| _BUILTIN_MARKER_ROOTS: frozenset[str] = frozenset( | ||
| bt.module_prefix.split(".")[0] for bt in BUILTIN_BOUNDARY_TYPES if getattr(bt, "builtin", False) | ||
| ) | ||
|
|
||
|
|
||
| def vocabulary_star_exports() -> dict[str, dict[str, str]]: | ||
| """Statically-known star-export map for builtin trust-marker modules. | ||
|
|
@@ -84,6 +93,38 @@ def _resolve_decorator_fqn(deco: ast.expr, alias_map: Mapping[str, str]) -> str | |
| return _resolve_dotted_fqn(func, alias_map) | ||
|
|
||
|
|
||
| def _shadowed_builtin_roots(project_modules: frozenset[str]) -> frozenset[str]: | ||
| """Return the builtin marker roots the scanned project SHADOWS. | ||
|
|
||
| Builtin marker declarations must refer to the installed marker package, not a | ||
| module supplied by the scanned project. A root is shadowed when the project | ||
| itself defines a TOP-LEVEL module/package equal to that root (e.g. its own | ||
| ``wardline`` or ``loom_markers`` package): Python import resolution can then | ||
| bind ``wardline.decorators`` / ``loom_markers`` to attacker-controlled code, so | ||
| builtin matching fails closed for markers under that root. | ||
|
|
||
| Only the FIRST dotted component is compared, so an unrelated nested module such | ||
| as ``app.wardline_helper`` or ``myloom.wardline`` does NOT trip a shadow. | ||
| """ | ||
| project_roots = {module.split(".", 1)[0] for module in project_modules} | ||
| return frozenset(project_roots & _BUILTIN_MARKER_ROOTS) | ||
|
|
||
|
|
||
| def _is_builtin_decorator_fqn(fqn: str, canonical_name: str, module_prefix: str) -> bool: | ||
| """Return whether *fqn* is one of the exact builtin decorator exports. | ||
|
|
||
| For a builtin boundary type with prefix ``P``, only the public re-export | ||
| ``P.<name>`` and the implementation-module export ``P.trust.<name>`` are | ||
| accepted (mirroring ``wardline/decorators/__init__.py`` and | ||
| ``wardline/decorators/trust.py``). Prefix + arbitrary-nested + final-segment | ||
| paths (e.g. ``wardline.decorators.evil.trusted``) are rejected for builtins. | ||
| """ | ||
| return fqn in { | ||
| f"{module_prefix}.{canonical_name}", | ||
| f"{module_prefix}.trust.{canonical_name}", | ||
| } | ||
|
Comment on lines
+122
to
+125
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
For the Useful? React with 👍 / 👎. |
||
|
|
||
|
|
||
| def _level_token(value: ast.expr, alias_map: Mapping[str, str]) -> str | None: | ||
| """Extract a TaintState name token from a keyword-argument value node. | ||
|
|
||
|
|
@@ -184,8 +225,9 @@ def __init__(self, *, boundary_types: tuple[BoundaryType, ...] | None = None) -> | |
| def taint_for(self, entity: Entity, ctx: SeedContext) -> SeedResult: | ||
| candidates: list[FunctionTaint] = [] | ||
| unprovable: list[str] = [] | ||
| shadowed_roots = _shadowed_builtin_roots(ctx.project_modules) | ||
| for deco in entity.node.decorator_list: | ||
| ft, unprov = self._match(deco, ctx.alias_map) | ||
| ft, unprov = self._match(deco, ctx.alias_map, shadowed_roots) | ||
| if ft is not None: | ||
| candidates.append(ft) | ||
| elif unprov is not None: | ||
|
|
@@ -219,7 +261,30 @@ def fingerprint(self) -> str: | |
| return f"decorator-vocab:{REGISTRY_VERSION}" | ||
| return f"decorator-vocab:{REGISTRY_VERSION}+grammar:{_grammar_digest(self._boundary_types)}" | ||
|
|
||
| def _match(self, deco: ast.expr, alias_map: Mapping[str, str]) -> tuple[FunctionTaint | None, str | None]: | ||
| def fingerprint_for_project(self, project_modules: frozenset[str]) -> str: | ||
| """Fingerprint declaration inputs that are external to a single module. | ||
|
|
||
| Builtin seeding depends on WHICH builtin marker roots the scanned project | ||
| shadows; bind the EXACT shadowed-root SET into the summary-cache key so a | ||
| warm cache cannot reuse a TRUSTED summary across scans with different | ||
| shadow states (cross-root cache poisoning). Crucially this is per-root: a | ||
| scan that shadows only ``wardline`` and one that shadows only | ||
| ``loom_markers`` must NOT collide on the cache key. When nothing is | ||
| shadowed (the common case), returns the bare :meth:`fingerprint` string, | ||
| preserving today's exact cache/baseline-stable value. | ||
| """ | ||
| shadowed = _shadowed_builtin_roots(project_modules) | ||
| base = self.fingerprint() | ||
| if not shadowed: | ||
| return base | ||
| return f"{base}:shadowed-roots={','.join(sorted(shadowed))}" | ||
|
|
||
| def _match( | ||
| self, | ||
| deco: ast.expr, | ||
| alias_map: Mapping[str, str], | ||
| shadowed_roots: frozenset[str], | ||
| ) -> tuple[FunctionTaint | None, str | None]: | ||
| """Match one decorator against the loaded boundary types. Returns: | ||
|
|
||
| ``(seed, None)`` — a boundary type matched and its levels proved; | ||
|
|
@@ -231,15 +296,22 @@ def _match(self, deco: ast.expr, alias_map: Mapping[str, str]) -> tuple[Function | |
| fqn = _resolve_decorator_fqn(deco, alias_map) | ||
| if fqn is None: | ||
| return None, None | ||
| # A decorator matches a boundary type when its FQN is UNDER the type's module | ||
| # prefix and its final segment is the canonical name. This accepts BOTH the | ||
| # package re-export (``wardline.decorators.trusted``) and the submodule path | ||
| # (``wardline.decorators.trust.trusted``) — preserving the pre-Track-2 matcher | ||
| # exactly (it used the same prefix + last-segment rule), and generalizing it | ||
| # consistently for custom types. | ||
| # Builtin markers are security-sensitive defaults: a scanned project could | ||
| # ship its own ``wardline/decorators`` (or ``loom_markers``) no-op shadowing | ||
| # the real package, spoof @trusted, and suppress real taint→sink flows (a | ||
| # false GREEN). So a builtin matches ONLY an EXACT known export | ||
| # (``P.<name>`` or ``P.trust.<name>``), and is rejected entirely when its | ||
| # marker ROOT is shadowed by a project-local top-level module. Custom | ||
| # (non-builtin) grammar markers keep the documented prefix + canonical-name | ||
| # rule — a project defining its OWN custom marker package is the intended | ||
| # extension use, and its root is not a builtin we ship. | ||
| last = fqn.rsplit(".", 1)[-1] | ||
| for bt in self._boundary_types: | ||
| if last != bt.canonical_name or not fqn.startswith(bt.module_prefix + "."): | ||
| if bt.builtin: | ||
| root = bt.module_prefix.split(".")[0] | ||
| if root in shadowed_roots or not _is_builtin_decorator_fqn(fqn, bt.canonical_name, bt.module_prefix): | ||
| continue | ||
| elif last != bt.canonical_name or not fqn.startswith(bt.module_prefix + "."): | ||
| continue | ||
| levels: dict[str, TaintState] = {} | ||
| unreadable = False | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
When a repository-local
wardline/orloom_markers/package is present but excluded from discovery (for exampleexclude: ["wardline/**"], or otherwise omitted fromfiles),project_modulesis built only from the remaining scanned files, soshadowed_rootsstays empty andfrom wardline.decorators import trustedin an analyzed file is still anchored as the real builtin marker. That reopens the spoofed-decorator false-green this change is trying to close for any untrusted repo that can hide the shadow package via config; the shadow check needs to inspect importable roots under the project, not just the files that survived discovery.Useful? React with 👍 / 👎.