From 7cc8e425b91c20414246ff27543903fe47573d97 Mon Sep 17 00:00:00 2001
From: Rainier Potgieter <rain.singlesource@gmail.com>
Date: Wed, 27 May 2026 10:33:37 +0200
Subject: [PATCH] perf(core): cache parsed ASTs in the expression engine

evaluate() ran extractExpression -> tokenize -> parse on every call, even
though the engine is pure (no eval/Function, and evaluateNode never mutates the
AST) so a parsed AST is valid forever. In a long-running spec where the same
quality gate, invariant, or verification check fires repeatedly, the
tokenize/parse work compounds. This is Candidate 2 of issue #46.

Add a module-level Map<string, ASTNode> keyed on the raw template string, with
a 1000-entry soft cap and LRU eviction (Map insertion order: a hit re-inserts
the key, eviction drops the oldest). Only successful parses are cached; a
malformed template re-throws on every call exactly as before. evaluateNode
still runs per call against the live context, so results stay per-context.

clearExpressionCache(), expressionCacheSize(), and expressionCacheHas() are
exported for test isolation and diagnostics (pure reads; cacheHas does not
affect recency).

Behaviour-preserving: cached and uncached evaluations are identical, distinct
templates never collide, and eviction at the cap re-parses without corrupting
results. Tests cover cache-hit equivalence across contexts, no cross-template
collision, cap-bounded eviction, correct re-evaluation of an evicted template,
and an LRU-vs-FIFO discriminator (verified to fail under FIFO) proving a hot
template survives eviction while the least-recently-used victim is dropped.

Proof (npm run bench, evaluate same expression 10,000x, this machine): best
13.2ms before, 1.46ms after (~9x), widening the margin against the 1000ms
assertion from ~75x to ~685x. Thresholds left unchanged.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 packages/core/expression.test.ts | 91 ++++++++++++++++++++++++++++++++
 packages/core/expression.ts      | 65 ++++++++++++++++++++++-
 2 files changed, 154 insertions(+), 2 deletions(-)

diff --git a/packages/core/expression.test.ts b/packages/core/expression.test.ts
index beb6966..e0cd9ad 100644
--- a/packages/core/expression.test.ts
+++ b/packages/core/expression.test.ts
@@ -2,8 +2,11 @@ import { readFileSync } from "node:fs";
 import { describe, expect, it } from "vitest";
 import {
 	type ASTNode,
+	clearExpressionCache,
 	ExpressionError,
 	evaluate,
+	expressionCacheHas,
+	expressionCacheSize,
 	parse,
 	TokenType,
 	tokenize,
@@ -689,3 +692,91 @@ describe("security -- no eval or Function constructor", () => {
 		expect(stripped).not.toMatch(/new\s+Function\s*\(/);
 	});
 });
+
+// =============================================================================
+// AST cache (issue #46, Candidate 2)
+//
+// evaluate() caches the tokenize/parse result per raw template string so the
+// hot path skips re-parsing. The cache must be transparent: a cached evaluation
+// is identical to an uncached one, distinct templates never collide, and
+// eviction at the soft cap never corrupts a result. evaluateNode still runs per
+// call against the live context.
+// =============================================================================
+
+describe("expression AST cache", () => {
+	const EXPRESSION_CACHE_CAP = 1000;
+
+	it("caches the parsed AST: re-evaluating a template does not grow the cache", () => {
+		clearExpressionCache();
+		const tmpl = "{{ output.score > 0.5 }}";
+		evaluate(tmpl, { output: { score: 0.9 } });
+		evaluate(tmpl, { output: { score: 0.1 } });
+		evaluate(tmpl, { output: { score: 0.7 } });
+		expect(expressionCacheSize()).toBe(1);
+	});
+
+	it("returns correct per-context results for a cached template", () => {
+		clearExpressionCache();
+		const tmpl = "{{ output.score > 0.5 }}";
+		// First call populates the cache; subsequent calls hit it.
+		expect(evaluate(tmpl, { output: { score: 0.9 } })).toBe(true);
+		expect(evaluate(tmpl, { output: { score: 0.1 } })).toBe(false);
+		expect(evaluate(tmpl, { output: { score: 0.5 } })).toBe(false);
+		expect(evaluate(tmpl, { output: { score: 0.51 } })).toBe(true);
+	});
+
+	it("does not collide across distinct templates", () => {
+		clearExpressionCache();
+		const ctx = { output: { score: 0.4, count: 10 } };
+		// Interleave two distinct templates; each must keep its own AST.
+		expect(evaluate("{{ output.score > 0.5 }}", ctx)).toBe(false);
+		expect(evaluate("{{ output.count > 5 }}", ctx)).toBe(true);
+		expect(evaluate("{{ output.score > 0.5 }}", ctx)).toBe(false);
+		expect(evaluate("{{ output.count > 5 }}", ctx)).toBe(true);
+		expect(expressionCacheSize()).toBe(2);
+	});
+
+	it("evicts at the soft cap without exceeding it", () => {
+		clearExpressionCache();
+		for (let i = 0; i < EXPRESSION_CACHE_CAP + 50; i++) {
+			evaluate(`{{ output.v == ${i} }}`, { output: { v: i } });
+		}
+		expect(expressionCacheSize()).toBe(EXPRESSION_CACHE_CAP);
+	});
+
+	it("re-evaluates an evicted template correctly", () => {
+		clearExpressionCache();
+		const firstTmpl = "{{ output.v == 0 }}";
+		// Prime, then overflow the cap so the first template is evicted (LRU).
+		evaluate(firstTmpl, { output: { v: 0 } });
+		for (let i = 1; i <= EXPRESSION_CACHE_CAP + 10; i++) {
+			evaluate(`{{ output.v == ${i} }}`, { output: { v: i } });
+		}
+		// Evicted: must re-parse and still produce correct per-context results.
+		expect(evaluate(firstTmpl, { output: { v: 0 } })).toBe(true);
+		expect(evaluate(firstTmpl, { output: { v: 9 } })).toBe(false);
+	});
+
+	it("evicts the least-recently-used entry, not the oldest-inserted (LRU not FIFO)", () => {
+		clearExpressionCache();
+		const hot = "{{ output.v == 0 }}"; // inserted first
+		const firstFiller = "{{ output.v == 1 }}"; // inserted second
+		evaluate(hot, { output: { v: 0 } });
+		// Fill to exactly the cap with distinct templates, touching `hot` after
+		// each so it stays most-recently-used while `firstFiller` ages out.
+		for (let i = 1; i < EXPRESSION_CACHE_CAP; i++) {
+			evaluate(`{{ output.v == ${i} }}`, { output: { v: i } });
+			evaluate(hot, { output: { v: 0 } });
+		}
+		expect(expressionCacheSize()).toBe(EXPRESSION_CACHE_CAP);
+
+		// One more distinct template forces a single eviction. Under LRU the
+		// victim is `firstFiller` (least recently used) and `hot` survives; under
+		// FIFO the victim would be `hot` (oldest inserted). Membership, not value,
+		// is what distinguishes the two policies (a re-parse yields the same value).
+		evaluate(`{{ output.v == ${EXPRESSION_CACHE_CAP} }}`, { output: { v: EXPRESSION_CACHE_CAP } });
+		expect(expressionCacheSize()).toBe(EXPRESSION_CACHE_CAP);
+		expect(expressionCacheHas(hot)).toBe(true);
+		expect(expressionCacheHas(firstFiller)).toBe(false);
+	});
+});
diff --git a/packages/core/expression.ts b/packages/core/expression.ts
index 71ae16d..8620513 100644
--- a/packages/core/expression.ts
+++ b/packages/core/expression.ts
@@ -600,9 +600,70 @@ function evaluateNode(node: ASTNode, context: ExpressionContext): unknown {
 // Public API
 // -----------------------------------------------------------------------------
 
-export function evaluate(template: string, context: ExpressionContext): unknown {
+/**
+ * Soft cap on the number of cached ASTs. When exceeded, the least-recently-used
+ * entry is evicted. Templates are reused verbatim rarely enough that a workflow
+ * never approaches this; the cap only bounds memory for pathological callers.
+ */
+const AST_CACHE_CAP = 1000;
+
+/**
+ * Module-level cache of parsed ASTs keyed on the raw template string.
+ *
+ * The expression engine is pure (no eval/Function, and evaluateNode never
+ * mutates the AST), so a parsed AST is valid forever and safe to share across
+ * calls and contexts. Map insertion order gives us LRU for free: a cache hit
+ * re-inserts the key (now most-recently-used), and eviction removes the first
+ * (oldest) key.
+ */
+const astCache = new Map<string, ASTNode>();
+
+/**
+ * Resolve the AST for a template, parsing and caching on a miss. Only
+ * successful parses are cached; a malformed template re-throws on every call,
+ * identical to the uncached behaviour.
+ */
+function getCachedAst(template: string): ASTNode {
+	const cached = astCache.get(template);
+	if (cached !== undefined) {
+		// Mark as most-recently-used.
+		astCache.delete(template);
+		astCache.set(template, cached);
+		return cached;
+	}
+
 	const expr = extractExpression(template);
 	const tokens = tokenize(expr);
 	const ast = parse(tokens);
-	return evaluateNode(ast, context);
+
+	if (astCache.size >= AST_CACHE_CAP) {
+		const oldest = astCache.keys().next().value;
+		if (oldest !== undefined) {
+			astCache.delete(oldest);
+		}
+	}
+	astCache.set(template, ast);
+	return ast;
+}
+
+/** Clear the AST cache. Primarily for tests and memory-sensitive callers. */
+export function clearExpressionCache(): void {
+	astCache.clear();
+}
+
+/** Current number of cached ASTs. Primarily for tests and diagnostics. */
+export function expressionCacheSize(): number {
+	return astCache.size;
+}
+
+/**
+ * Whether a template's AST is currently cached. Does not affect LRU recency
+ * (a pure read). Primarily for tests and diagnostics.
+ */
+export function expressionCacheHas(template: string): boolean {
+	return astCache.has(template);
+}
+
+export function evaluate(template: string, context: ExpressionContext): unknown {
+	return evaluateNode(getCachedAst(template), context);
 }