diff --git a/.jules/bolt.md b/.jules/bolt.md new file mode 100644 index 00000000..c6e84f7b --- /dev/null +++ b/.jules/bolt.md @@ -0,0 +1,3 @@ +## 2026-06-07 - Python loop overhead in graph traversal +**Learning:** In highly connected graphs within a static analysis engine, looping over sets in Python (`for mod in callers: if mod not in closure: ...`) can introduce significant bytecode execution overhead. +**Action:** Replace Python loops checking membership with fast C-level set operations (e.g., `callers - closure`) and use list-based stacks instead of intermediate set constructions for graph frontiers. This avoids O(N) Python iteration in hot paths. diff --git a/src/wardline/scanner/taint/reverse_edge_index.py b/src/wardline/scanner/taint/reverse_edge_index.py index d7ec7d51..70935e47 100644 --- a/src/wardline/scanner/taint/reverse_edge_index.py +++ b/src/wardline/scanner/taint/reverse_edge_index.py @@ -66,15 +66,15 @@ def callers_of(self, callee_module: str) -> frozenset[str]: def transitive_callers(self, seeds: frozenset[str]) -> frozenset[str]: """``seeds`` plus every transitively-reverse-reachable module.""" closure: set[str] = set(seeds) - frontier: set[str] = set(seeds) + frontier: list[str] = list(seeds) + get_callers = self._reverse.get + while frontier: - next_frontier: set[str] = set() - for mod in frontier: - if mod not in self._reverse: - continue - for caller_mod in self._reverse[mod]: - if caller_mod not in closure: - closure.add(caller_mod) - next_frontier.add(caller_mod) - frontier = next_frontier + mod = frontier.pop() + callers = get_callers(mod) + if callers: + new_callers = callers - closure + if new_callers: + closure.update(new_callers) + frontier.extend(new_callers) return frozenset(closure)