diff --git a/packages/cli/src/generated/signals/block-archive.ts b/packages/cli/src/generated/signals/block-archive.ts index 6f1f94741..02b4867c8 100644 --- a/packages/cli/src/generated/signals/block-archive.ts +++ b/packages/cli/src/generated/signals/block-archive.ts @@ -1,4 +1,4 @@ -// @generated by kern v3.5.7 — DO NOT EDIT. Source: src/kern/signals/block-archive.kern +// @generated by kern v3.5.8 — DO NOT EDIT. Source: src/kern/signals/block-archive.kern import { appendFileSync, mkdirSync } from 'node:fs'; @@ -48,3 +48,14 @@ export function appendBlockWithCap(prev: OutputBlock[], block: OutputBlock, arch archiveBlocks(archivePath, next.slice(0, overflow)); return next.slice(overflow); } + +/** + * Next remount epoch. Only a true transcript reset advances it; append and cap-spill leave it untouched (a spill must NOT repaint the sealed Static region). + */ +// @kern-source: block-archive:53 +export function nextStaticEpoch(currentEpoch: number, cause: 'append'|'spill'|'reset'): number { + if (cause === 'reset') { + return currentEpoch + 1; + } + return currentEpoch; +} diff --git a/packages/cli/src/generated/signals/runs-store.ts b/packages/cli/src/generated/signals/runs-store.ts new file mode 100644 index 000000000..89c1ab703 --- /dev/null +++ b/packages/cli/src/generated/signals/runs-store.ts @@ -0,0 +1,333 @@ +// @generated by kern v3.5.8 — DO NOT EDIT. Source: src/kern/signals/runs-store.kern + +import { readFileSync, writeFileSync } from 'node:fs'; + +import { readdir, stat, unlink } from 'node:fs/promises'; + +import { join } from 'node:path'; + +import { agonPath } from '@kernlang/agon-core'; + +/** + * Keep every run file newer than this regardless of count (7 days). + */ +// @kern-source: runs-store:25 +export const RUN_KEEP_AGE_MS: number = 7 * 24 * 60 * 60 * 1000; + +/** + * Beyond the age window, keep at most this many newest run files. + */ +// @kern-source: runs-store:28 +export const RUN_SOFT_CAP: number = 2000; + +/** + * Absolute ceiling on total run files kept, even within the age window. + */ +// @kern-source: runs-store:31 +export const RUN_HARD_CAP: number = 5000; + +/** + * Never delete a run file younger than this — it may belong to an in-flight run (10 minutes). Long-running orchestrations (forge/goal/conquer) can keep a run's record 'open' for many minutes, so a generous floor is the primary in-flight protection; activeRunId matching is defense in depth. + */ +// @kern-source: runs-store:34 +export const RUN_PROTECT_MIN_AGE_MS: number = 10 * 60 * 1000; + +/** + * Skip a prune if one completed within this window (1h). + */ +// @kern-source: runs-store:37 +export const RUN_PRUNE_COOLDOWN_MS: number = 60 * 60 * 1000; + +/** + * Delete this many files then yield to the event loop, so prune never monopolizes the loop. + */ +// @kern-source: runs-store:40 +export const RUN_PRUNE_CHUNK: number = 200; + +/** + * Filename inside the runs dir holding the last-prune timestamp (also a poor-man's lock). + */ +// @kern-source: runs-store:43 +export const PRUNE_STAMP_NAME: string = '.prune-stamp'; + +// @kern-source: runs-store:48 +export interface RunsSnapshot { + count: number; + hydratedAt: number; +} + +// @kern-source: runs-store:54 +export interface PruneResult { + deleted: number; + skipped: boolean; + reason: string; +} + +// @kern-source: runs-store:60 +export interface PruneOptions { + activeRunIds?: string[]; + force?: boolean; +} + +// @kern-source: runs-store:68 +function runsDirPath(): string { + return agonPath('runs'); +} + +// @kern-source: runs-store:70 +function pruneStampPath(): string { + return join(agonPath('runs'), PRUNE_STAMP_NAME); +} + +/** + * List *.json run record files with mtimes via node:fs/promises (never blocks the Ink event loop, even on a 40k-entry dir). Returns [] if the dir is missing. Skips the .prune-stamp and any entry that cannot be stat'd. + */ +// @kern-source: runs-store:72 +async function listRunJsonFiles(): Promise<{ name: string; mtimeMs: number }[]> { + const dir = runsDirPath(); + let names: string[]; + try { + names = await readdir(dir); + } catch { + return []; + } + const out: { name: string; mtimeMs: number }[] = []; + for (const entry of names) { + if (!entry.endsWith('.json')) continue; + try { + const st = await stat(join(dir, entry)); + if (st.isFile()) out.push({ name: entry, mtimeMs: st.mtimeMs }); + } catch { + // entry vanished mid-scan or is unreadable — ignore + } + } + return out; +} + +/** + * Count *.json run record files WITHOUT stat'ing each one. hydrate()/scheduleRefresh only need the count, so on a 40k-file dir this avoids ~40k stat syscalls (one readdir is enough). Returns 0 if the dir is missing. The .prune-stamp has no .json suffix so it's excluded automatically. + */ +// @kern-source: runs-store:95 +async function countRunJsonFiles(): Promise { + const dir = runsDirPath(); + let names: string[]; + try { + names = await readdir(dir); + } catch { + return 0; + } + let count = 0; + for (const entry of names) { + if (entry.endsWith('.json')) count++; + } + return count; +} + +/** + * Read the last-prune timestamp from .prune-stamp. Returns 0 if absent/unreadable. + */ +// @kern-source: runs-store:112 +function readPruneStamp(): number { + try { + const raw = readFileSync(pruneStampPath(), 'utf-8').trim(); + const n = Number(raw); + return Number.isFinite(n) ? n : 0; + } catch { + return 0; + } +} + +/** + * Persist the last-prune timestamp (best effort). + */ +// @kern-source: runs-store:124 +function writePruneStamp(ts: number): void { + try { + writeFileSync(pruneStampPath(), String(ts)); + } catch { + // best effort — a missing stamp just means the next prune isn't rate-limited + } +} + +/** + * Pure prune-policy core: given the current run files + clock + protected ids, return the file names to delete (oldest-first). Keep everything < 7 days; beyond that keep newest RUN_SOFT_CAP; hard cap RUN_HARD_CAP total. Never returns an active-id file or one younger than RUN_PROTECT_MIN_AGE_MS. Exported so the policy is unit-testable without touching the fs. + */ +// @kern-source: runs-store:134 +export function computeRunPruneTargets(files: { name: string; mtimeMs: number }[], now: number, activeRunIds: string[]): string[] { + // Exact id match, NOT substring: a run id must equal the file's basename + // (`${id}.json`) or be a prefix followed by a non-alphanumeric delimiter + // (e.g. `${id}-meta.json`). Substring matching would over-protect — id "1" + // would shield "1234.json" — and (worse) could be tricked into UNDER-deleting. + // + // Structural guarantee (why id matching is only defense in depth): deletion + // is oldest-first under a 7-day age window + newest-N caps, and an active + // run's record is necessarily recent, so it's effectively unreachable by the + // policy regardless of this check. The RUN_PROTECT_MIN_AGE_MS floor already + // shields every fresh file; activeRunId matching just hardens the edge. + const isActive = (fileName: string): boolean => + activeRunIds.some((id) => { + if (!id) return false; + if (fileName === `${id}.json`) return true; + if (!fileName.startsWith(id)) return false; + const next = fileName.charAt(id.length); + // Prefix must end at a delimiter, never mid-token (so "1" never matches "12.json"). + return next !== '' && !/[A-Za-z0-9]/.test(next); + }); + + // Files we are even ALLOWED to delete: not active, and old enough that they + // can't belong to an in-flight run. + const deletable = files + .filter((f) => !isActive(f.name) && now - f.mtimeMs >= RUN_PROTECT_MIN_AGE_MS) + .sort((a, b) => a.mtimeMs - b.mtimeMs); // oldest first + + const cutoff = now - RUN_KEEP_AGE_MS; + const deletableOld = deletable.filter((f) => f.mtimeMs < cutoff); // older than 7d + + const removalSet = new Map(); + + // 1) Soft cap: keep only the newest RUN_SOFT_CAP files overall. The overflow + // must come from OLD (>7d) deletable files first — files inside the 7d + // window are kept regardless of the soft cap. + const softOverflow = Math.max(0, files.length - RUN_SOFT_CAP); + const softRemoveCount = Math.min(softOverflow, deletableOld.length); + for (let k = 0; k < softRemoveCount; k++) { + removalSet.set(deletableOld[k].name, true); + } + + // 2) Hard cap: total kept must end ≤ RUN_HARD_CAP. Remove additional oldest + // deletable files (young or old) until the total fits. + const mustRemoveForHardCap = Math.max(0, files.length - RUN_HARD_CAP); + let i = 0; + while (removalSet.size < mustRemoveForHardCap && i < deletable.length) { + removalSet.set(deletable[i].name, true); + i++; + } + + // Return oldest-first, matching the deletable ordering. + return deletable.filter((f) => removalSet.has(f.name)).map((f) => f.name); +} + +/** + * Owns ~/.agon/runs metadata so the render path never scans the fs. Caches a snapshot, refreshes on demand (debounced), and auto-prunes off the critical path. + */ +// @kern-source: runs-store:192 +export class RunsStore { + private snap: RunsSnapshot; + private refreshTimer: ReturnType|null; + private pruning: boolean; + + constructor() { + this.snap = { count: 0, hydratedAt: 0 }; + this.refreshTimer = null; + this.pruning = false; + } + + snapshot(): RunsSnapshot { + return this.snap; + } + + runCount(): number { + return this.snap.count; + } + + async hydrate(): Promise { + // Yield to the macrotask queue first so a startup hydrate never lands in + // the same tick as first paint. + await new Promise((resolve) => setTimeout(resolve, 0)); + // Count-only path: hydrate just needs the file count, so skip the per-file + // stat() that listRunJsonFiles does (mtime is only needed by the prune). + const count = await countRunJsonFiles(); + this.snap = { count, hydratedAt: Date.now() }; + return this.snap; + } + + scheduleRefresh(delayMs?: number): void { + const delay = typeof delayMs === 'number' && delayMs >= 0 ? delayMs : 750; + if (this.refreshTimer) clearTimeout(this.refreshTimer); + const timer = setTimeout(() => { + this.refreshTimer = null; + // Fire and forget — errors are non-critical (stale count at worst). + void this.hydrate().catch(() => { /* snapshot stays as-is */ }); + }, delay); + this.refreshTimer = timer; + // Never keep the process alive just to refresh a count. + if (typeof (timer as any).unref === 'function') (timer as any).unref(); + } + + async maybePrune(opts?: PruneOptions): Promise { + // In-process overlap guard FIRST, before any stamp/cooldown logic. Two + // same-process callers (e.g. the boot effect racing a scheduleRefresh) both + // read the stamp in the same tick; claiming `this.pruning` here — not after + // the cooldown check — is what makes the second caller bail deterministically. + if (this.pruning) { + return { deleted: 0, skipped: true, reason: 'in-progress' }; + } + this.pruning = true; + try { + const force = opts?.force === true; + const now = Date.now(); + const lastPrune = readPruneStamp(); + + // Cooldown + soft cross-process lock: a recent stamp means a prune ran (or + // is running) within the cooldown window — skip. The stamp doubles as the + // lock because maybePrune() claims it (writes "now") before any deletion. + if (!force && lastPrune > 0 && now - lastPrune < RUN_PRUNE_COOLDOWN_MS) { + return { deleted: 0, skipped: true, reason: 'cooldown' }; + } + + // Claim the cross-process lock by stamping "now" BEFORE scanning/deleting. + // This is deliberate (do not move it after the delete): the stamp is the + // lock claim a concurrent PROCESS reads to back off, and the cooldown + // limits SCAN cost — so even a no-op scan should start the cooldown. + // Crash-mid-prune costs at most a 1h delay before the next attempt; that's + // strictly cheaper than letting N processes scan a 40k-file dir in lockstep. + writePruneStamp(now); + + const files = await listRunJsonFiles(); + const activeIds = Array.isArray(opts?.activeRunIds) ? opts!.activeRunIds! : []; + const toDelete = computeRunPruneTargets(files, now, activeIds); + + if (toDelete.length === 0) { + // Nothing to remove — refresh the snapshot and bail quietly. + this.snap = { count: files.length, hydratedAt: now }; + return { deleted: 0, skipped: true, reason: 'within-policy' }; + } + + const dir = runsDirPath(); + let deleted = 0; + for (let idx = 0; idx < toDelete.length; idx++) { + try { + await unlink(join(dir, toDelete[idx])); + deleted++; + } catch { + // file vanished or unremovable — skip + } + // Yield every RUN_PRUNE_CHUNK deletions so we never block the loop. + if ((idx + 1) % RUN_PRUNE_CHUNK === 0) { + await new Promise((resolve) => setTimeout(resolve, 0)); + } + } + + // Re-stamp completion time and refresh the cached snapshot. Count-only: + // we just need the post-prune total, not mtimes. + const finishedAt = Date.now(); + writePruneStamp(finishedAt); + const remaining = await countRunJsonFiles(); + this.snap = { count: remaining, hydratedAt: finishedAt }; + + if (deleted > 0 && process.env.AGON_DEBUG) { + // Diagnostic only under AGON_DEBUG — a background prune must never write into the live TUI. + console.error(`[agon] runs prune: removed ${deleted} old run record(s), kept ${remaining}`); + } + return { deleted, skipped: false, reason: 'pruned' }; + } finally { + this.pruning = false; + } + } +} + +/** + * Process-wide RunsStore singleton. Import this from render + telemetry; never construct another. + */ +// @kern-source: runs-store:319 +export const runsStore = new RunsStore(); diff --git a/packages/cli/src/generated/surfaces/app-blocks.ts b/packages/cli/src/generated/surfaces/app-blocks.ts index 4f9d147f4..35dee7885 100644 --- a/packages/cli/src/generated/surfaces/app-blocks.ts +++ b/packages/cli/src/generated/surfaces/app-blocks.ts @@ -1,8 +1,6 @@ // @generated by kern v3.5.8 — DO NOT EDIT. Source: src/kern/surfaces/app-blocks.kern -import { readdirSync } from 'node:fs'; - -import { loadConfig, getRatings, getActiveWorkspace, RUNS_DIR } from '@kernlang/agon-core'; +import { loadConfig, getRatings, getActiveWorkspace } from '@kernlang/agon-core'; import { resolveBuiltinEnginesDir } from '../lib/engines-dir.js'; @@ -12,6 +10,8 @@ import { cleanEngineOutput } from '../blocks/markdown.js'; import { appendBlockWithCap } from '../signals/block-archive.js'; +import { runsStore } from '../signals/runs-store.js'; + import type { OutputBlock } from '../../generated/blocks/engine.js'; import type { OutputEvent } from '../../handlers/types.js'; @@ -55,8 +55,10 @@ export function buildDashboardBlock(enabledOverride: string[]|null): OutputBlock const sorted = Object.entries(ratings.global) .map(([id, r]: any) => [id, { rating: Math.round(r.mu - 2 * r.phi) }] as const) .sort(([, a], [, b]) => b.rating - a.rating); - let runCount = 0; - try { runCount = readdirSync(RUNS_DIR).filter((f: string) => f.endsWith('.json')).length; } catch { /* runs dir missing — first run */ } + // Render path is fs-free: read the cached runs snapshot, never readdirSync. + // runsStore.hydrate() runs async at startup (app.kern boot effect) and + // refreshes (debounced) after each run record is written. + const runCount = runsStore.snapshot().count; return { id: 0, @@ -190,7 +192,13 @@ export function nativeArchiveBlockCount(blocks: OutputBlock[], mode: string, row if (!Array.isArray(blocks) || blocks.length === 0) return 0; const safeBudget = Math.max(1, Math.floor(Number(rowBudget) || 1)); - const maxLiveBlocks = 80; + // 40 (was 80): Ink rewrites the ENTIRE live tail every render (~15-30/s + // during runs), so the live window is the dominant stdout cost. The row + // budget usually seals far earlier anyway — this cap only bites on runs of + // many tiny blocks, exactly where per-frame rewrite cost compounds. Sealed + // blocks render identically (they just stop being mouse-selectable), and a + // measured A/B (fat-block probe) showed no UX regression at 40. + const maxLiveBlocks = 40; let rows = 0; let liveStart = blocks.length; diff --git a/packages/cli/src/generated/surfaces/app-rendering.ts b/packages/cli/src/generated/surfaces/app-rendering.ts index 07be26997..94ea7206c 100644 --- a/packages/cli/src/generated/surfaces/app-rendering.ts +++ b/packages/cli/src/generated/surfaces/app-rendering.ts @@ -688,11 +688,104 @@ export function cachedMarkdownRows(baseKey: string, text: string, wrapWidth: num return rows; } -export function buildTranscriptRows(blocks: OutputBlock[], mode: string, toolOutputExpanded: boolean, thinkingExpanded: boolean): any[] { +export const _blockRowCache = new Map(); + +export const _BLOCK_ROW_CACHE_MAX: number = 1500; + +export function blockWantsLeadingGap(eventType: string): boolean { + return eventType === 'user-message' + || eventType === 'engine-block' + || eventType === 'kern-draft' + || eventType === 'debate-round'; +} + +export const _CACHEABLE_BLOCK_TYPES = new Set([ + 'text', 'user-message', 'engine-block', 'separator', 'header', + 'success', 'warning', 'info', 'error', 'permission-ask', + 'thinking-chunk', 'streaming-chunk', 'kern-draft', 'debate-round', + 'verdict', + ]); + +export function isCacheableBlockType(eventType: string): boolean { + return _CACHEABLE_BLOCK_TYPES.has(eventType); +} + +export function blockRowFingerprint(event: any, mode: string, toolOutputExpanded: boolean, thinkingExpanded: boolean, proseWidth: number, chatWidth: number, engineWidth: number): string { + // Cheap content signature so an in-place mutation (status/output update on + // a non-tool block, or any event field change) busts the cache. Widths + // encode terminal width (contentWidth reads process.stdout.columns), so + // including them flushes the entry on resize — exactly like _mdRowCache. + const type = String(event?.type ?? ''); + // Primary payload across every cacheable event variant. permission-ask + // carries its diff in `command`; tool/title/position carry the rest. Any + // field a variant renders from MUST be in this signature or two different + // blocks that share an id (ids are reused freely in tests, and in prod a + // /clear can repeat them) would collide on a stale cache entry. + const content = String(event?.content ?? event?.message ?? event?.summary ?? event?.argument ?? event?.title ?? event?.chunk ?? event?.markdown ?? event?.command ?? ''); + const status = String(event?.status ?? ''); + const engineId = String(event?.engineId ?? ''); + const tool = String(event?.tool ?? ''); + const folded = String(event?.foldedSteps ?? ''); + // Render-affecting secondary fields per cacheable variant: engine-block + // reads `color` (accent), kern-draft reads `critique`, debate-round reads + // `position`. Adding a type to the allowlist? Add EVERY field its renderer + // reads here first (e.g. tool output would need `output`). + const extra = `${String(event?.color ?? '')}|${String(event?.critique ?? '')}|${String(event?.position ?? '')}`; + const widths = `${proseWidth}.${chatWidth}.${engineWidth}`; + const flags = `${mode}|${toolOutputExpanded ? 1 : 0}|${thinkingExpanded ? 1 : 0}`; + // Head AND tail slices: same-prefix same-length bodies (padded logs, JSON + // with a shared head) must not collide on a recycled id. + return `${type}|${engineId}|${tool}|${status}|${folded}|${extra}|${content.length}|${content.slice(0, 120)}|${content.slice(-40)}|${widths}|${flags}`; +} + +export function cachedBlockOwnRows(block: OutputBlock, mode: string, toolOutputExpanded: boolean, thinkingExpanded: boolean, proseWidth: number, chatWidth: number, engineWidth: number): any[] { + const event = block.event as any; + const type = String(event?.type ?? ''); + if (!isCacheableBlockType(type)) { + return renderBlockOwnRows(block, mode, toolOutputExpanded, thinkingExpanded, proseWidth, chatWidth, engineWidth); + } + const fingerprint = blockRowFingerprint(event, mode, toolOutputExpanded, thinkingExpanded, proseWidth, chatWidth, engineWidth); + const key = `${block.id}|${fingerprint}`; + const hit = _blockRowCache.get(key); + if (hit) { + // True LRU: re-insert on hit so hot entries survive the oldest-20% + // eviction. Return a shallow copy so a caller can never mutate the + // cached array (the row objects inside are treated as immutable). + _blockRowCache.delete(key); + _blockRowCache.set(key, hit); + return hit.slice(); + } + const rows = renderBlockOwnRows(block, mode, toolOutputExpanded, thinkingExpanded, proseWidth, chatWidth, engineWidth); + // At the cap, evict the least-recently-used 20% (hits re-insert above, so + // Map order ≈ recency) — amortized, never a full flush (which would force + // a cold rebuild of all committed rows). + if (_blockRowCache.size >= _BLOCK_ROW_CACHE_MAX) { + const evictCount = Math.floor(_BLOCK_ROW_CACHE_MAX * 0.2); + const it = _blockRowCache.keys(); + for (let i = 0; i < evictCount; i++) { + const oldest = it.next().value; + if (oldest === undefined) break; + _blockRowCache.delete(oldest); + } + } + _blockRowCache.set(key, rows); + return rows.slice(); +} + +/** + * Drop every cached block row. Called from the clearBlocks reset funnel so a /clear (which can recycle block ids in replay paths) can never serve rows rendered for a previous transcript. + */ +export function clearBlockRowCache(): void { + _blockRowCache.clear(); +} + +/** + * Render the intrinsic rows for ONE transcript block, independent of its neighbors and of its position in the transcript (no leading positional spacer — that is prepended by buildTranscriptRows). Tool-call grouping is NOT handled here. + */ +export function renderBlockOwnRows(block: OutputBlock, mode: string, toolOutputExpanded: boolean, thinkingExpanded: boolean, proseWidth: number, chatWidth: number, engineWidth: number): any[] { const rows: any[] = []; - const proseWidth = contentWidth(4); - const chatWidth = contentWidth(2); - const engineWidth = contentWidth(8); + const event = block.event as any; + const baseKey = `block-${block.id}`; const pushSpacer = (key: string) => { rows.push({ key, kind: 'spacer' }); }; @@ -712,29 +805,12 @@ export function buildTranscriptRows(blocks: OutputBlock[], mode: string, toolOut }); }; - let skippedUntil = -1; - blocks.forEach((block: OutputBlock, blockIndex: number) => { - if (blockIndex < skippedUntil) return; - const event = block.event as any; - const baseKey = `block-${block.id}`; - - if (!toolOutputExpanded && event.type === 'tool-call') { - const groupedEvents = [event]; - let nextIndex = blockIndex + 1; - while (nextIndex < blocks.length) { - const nextEvent = blocks[nextIndex].event as any; - if (nextEvent.type !== 'tool-call') break; - groupedEvents.push(nextEvent); - nextIndex += 1; - } - - if (groupedEvents.length > 1) { - rows.push(...buildCollapsedToolGroupRows(baseKey, groupedEvents)); - skippedUntil = nextIndex; - return; - } - } - + // The switch below was lifted verbatim from buildTranscriptRows; its many + // `return;` statements bail out of THIS inner closure (not the whole fn), + // accumulating into the closed-over `rows`. The leading positional spacer + // for gap-wanting types is intentionally a no-op here (rows is empty at + // entry) — buildTranscriptRows prepends the real gap. + (() => { switch (event.type) { case 'text': pushRichText(baseKey, event.content, 2, proseWidth); @@ -949,50 +1025,13 @@ export function buildTranscriptRows(blocks: OutputBlock[], mode: string, toolOut return; } case 'tool-call': - rows.push(...buildToolCallRows(baseKey, event, toolOutputExpanded)); + case 'tool-call-group': + // Tool-call rendering is neighbor-dependent (adjacent tool blocks + // coalesce) and content-unstable (mutates while running), so it is + // handled entirely by buildTranscriptRows on the uncached path and + // never routed here. This branch is unreachable; kept as a no-op so + // the default fallback can't mis-render a tool block if it ever is. return; - case 'tool-call-group': { - const groupBlocks = Array.isArray((event as any).blocks) ? (event as any).blocks : []; - // Expanded: unpack every inner tool call so Ctrl+E reveals full - // output. Collapsed (default): one compact summary row, matching - // the Static-era ToolCallGroup renderer — preserves the bundling - // that output.kern emits (codex review P3). - if (toolOutputExpanded) { - groupBlocks.forEach((child: any, index: number) => { - const childEvent = child?.event; - if (childEvent && childEvent.type === 'tool-call') { - rows.push(...buildToolCallRows(`${baseKey}-g-${index}`, childEvent, toolOutputExpanded)); - } - }); - return; - } - const groupedEvents: any[] = []; - const appendGroup = (blocksToAppend: any[]) => { - blocksToAppend.forEach((child: any) => { - const childEvent = child?.event; - if (childEvent && childEvent.type === 'tool-call') groupedEvents.push(childEvent); - }); - }; - appendGroup(groupBlocks); - let nextIndex = blockIndex + 1; - while (nextIndex < blocks.length) { - const nextEvent = blocks[nextIndex].event as any; - if (nextEvent?.type === 'tool-call-group') { - appendGroup(Array.isArray(nextEvent.blocks) ? nextEvent.blocks : []); - nextIndex += 1; - continue; - } - if (nextEvent?.type === 'tool-call') { - groupedEvents.push(nextEvent); - nextIndex += 1; - continue; - } - break; - } - rows.push(...buildCollapsedToolGroupRows(baseKey, groupedEvents)); - skippedUntil = nextIndex; - return; - } case 'response-meta': { const totalTokens = (event.inputTokens ?? 0) + (event.outputTokens ?? 0); const parts = [ @@ -1186,6 +1225,102 @@ export function buildTranscriptRows(blocks: OutputBlock[], mode: string, toolOut default: pushSegmentsRow(`${baseKey}-fallback`, 2, [{ text: `[${event.type}]`, dimColor: true }]); } + })(); + + return rows; +} + +export function buildTranscriptRows(blocks: OutputBlock[], mode: string, toolOutputExpanded: boolean, thinkingExpanded: boolean): any[] { + const rows: any[] = []; + const proseWidth = contentWidth(4); + const chatWidth = contentWidth(2); + const engineWidth = contentWidth(8); + + let skippedUntil = -1; + blocks.forEach((block: OutputBlock, blockIndex: number) => { + if (blockIndex < skippedUntil) return; + const event = block.event as any; + const baseKey = `block-${block.id}`; + + // ── Neighbor-dependent tool grouping (UNCACHED) ────────────────── + // Collapsed adjacent tool-call blocks coalesce into one summary; this + // depends on the following blocks, so it can never be a per-block cache + // hit. Tool blocks also mutate in place while running. + if (!toolOutputExpanded && event.type === 'tool-call') { + const groupedEvents = [event]; + let nextIndex = blockIndex + 1; + while (nextIndex < blocks.length) { + const nextEvent = blocks[nextIndex].event as any; + if (nextEvent.type !== 'tool-call') break; + groupedEvents.push(nextEvent); + nextIndex += 1; + } + + if (groupedEvents.length > 1) { + rows.push(...buildCollapsedToolGroupRows(baseKey, groupedEvents)); + skippedUntil = nextIndex; + return; + } + } + + if (event.type === 'tool-call') { + rows.push(...buildToolCallRows(baseKey, event, toolOutputExpanded)); + return; + } + + if (event.type === 'tool-call-group') { + const groupBlocks = Array.isArray((event as any).blocks) ? (event as any).blocks : []; + // Expanded: unpack every inner tool call so Ctrl+E reveals full + // output. Collapsed (default): one compact summary row, matching + // the Static-era ToolCallGroup renderer — preserves the bundling + // that output.kern emits (codex review P3). + if (toolOutputExpanded) { + groupBlocks.forEach((child: any, index: number) => { + const childEvent = child?.event; + if (childEvent && childEvent.type === 'tool-call') { + rows.push(...buildToolCallRows(`${baseKey}-g-${index}`, childEvent, toolOutputExpanded)); + } + }); + return; + } + const groupedEvents: any[] = []; + const appendGroup = (blocksToAppend: any[]) => { + blocksToAppend.forEach((child: any) => { + const childEvent = child?.event; + if (childEvent && childEvent.type === 'tool-call') groupedEvents.push(childEvent); + }); + }; + appendGroup(groupBlocks); + let nextIndex = blockIndex + 1; + while (nextIndex < blocks.length) { + const nextEvent = blocks[nextIndex].event as any; + if (nextEvent?.type === 'tool-call-group') { + appendGroup(Array.isArray(nextEvent.blocks) ? nextEvent.blocks : []); + nextIndex += 1; + continue; + } + if (nextEvent?.type === 'tool-call') { + groupedEvents.push(nextEvent); + nextIndex += 1; + continue; + } + break; + } + rows.push(...buildCollapsedToolGroupRows(baseKey, groupedEvents)); + skippedUntil = nextIndex; + return; + } + + // ── All other (neighbor-independent) blocks: CACHED per-block ───── + const ownRows = cachedBlockOwnRows(block, mode, toolOutputExpanded, thinkingExpanded, proseWidth, chatWidth, engineWidth); + if (ownRows.length === 0) return; + // Leading positional spacer: depends on transcript position (not block + // content), so it lives here, outside the cache. renderBlockOwnRows + // omits it. + if (rows.length > 0 && blockWantsLeadingGap(String(event?.type ?? ''))) { + rows.push({ key: `${baseKey}-gap`, kind: 'spacer' }); + } + for (const row of ownRows) rows.push(row); }); return rows; diff --git a/packages/cli/src/generated/surfaces/app.entry.tsx b/packages/cli/src/generated/surfaces/app.entry.tsx index 9c3ddb1b6..84940b5c5 100644 --- a/packages/cli/src/generated/surfaces/app.entry.tsx +++ b/packages/cli/src/generated/surfaces/app.entry.tsx @@ -1,7 +1,7 @@ #!/usr/bin/env node // @generated by kern v3.5.8 — DO NOT EDIT. Source: src/kern/surfaces/app.kern -// @kern-source: app:95 +// @kern-source: app:96 import React from 'react'; import { render } from 'ink'; diff --git a/packages/cli/src/generated/surfaces/app.tsx b/packages/cli/src/generated/surfaces/app.tsx index ca23f4606..497d21765 100644 --- a/packages/cli/src/generated/surfaces/app.tsx +++ b/packages/cli/src/generated/surfaces/app.tsx @@ -20,6 +20,8 @@ import type { EngineAdapter } from '@kernlang/agon-core'; import { detectIntent, SLASH_COMMANDS } from '../signals/intent.js'; +import { runsStore } from '../signals/runs-store.js'; + import { CommandRegistry, registerBuiltinCommands, initExtensions, EventBus, bridgeShellHooks } from '@kernlang/agon-core'; import { JobManager } from '../signals/job-manager.js'; @@ -68,7 +70,7 @@ import { appendInputHistory, cleanInputValue, cleanSubmitValue, findInputChange, import { resolveKeyboardInput } from '../signals/keyboard.js'; -import { makeBlockArchivePath, appendBlockWithCap } from '../signals/block-archive.js'; +import { makeBlockArchivePath, appendBlockWithCap, nextStaticEpoch } from '../signals/block-archive.js'; import { perfNow, recordKeystrokeLatency } from '../signals/input-perf.js'; @@ -148,13 +150,13 @@ import { estimateVisibleBlockBudget, estimateBottomChromeExtraRows, estimatePinn import { buildDashboardBlock, coalesceToolCallBlocks, effectiveNativeArchiveBlockCount, historyBlocksForTranscript, nativeTranscriptBlocksForStatic, nativeArchiveBlockCount, appendTranscriptBlock, summarizeBtwTranscriptEvent } from './app-blocks.js'; -import { buildExecutionRailStats, buildTranscriptRows } from './app-rendering.js'; +import { buildExecutionRailStats, buildTranscriptRows, clearBlockRowCache } from './app-rendering.js'; // ── Module: AppHelperExports ── export { COMPOSER_HISTORY_LIMIT, isMutatingToolCall, probeEngineVitals, parseToolCallPayload, toolPreviewWindow, toolCallSupportsDetailView, detailViewerSupportsEvent, toolDetailViewportRows, findLatestToolDetailEvent, findLatestToolEvent, buildExecutionRailStats, composerHistoryPath, loadComposerInputHistory, saveComposerInputHistory, findLatestFailedToolEvent, buildFailedToolRetryDraft, buildToolDetailView, createInitialRegistry, drainStdinBuffer, maxScrollOffsetForRowCount, nextWheelAnimationStep, clampNumber, charDisplayWidth, stringDisplayWidth, displayColumnToStringIndex, normalizeRowSelection, normalizeTextSelection, richLineToPlainText, transcriptRowToPlainText, transcriptRowTextStartColumn, resolveTranscriptColumnFromMouse, transcriptRowsToPlainText, resolveTranscriptRowFromMouse, estimateVisibleBlockBudget, estimateWrappedRowCount, estimateQuestionReservedRows, estimateBottomChromeExtraRows, summarizeBtwTranscriptEvent, buildDashboardBlock, estimatePinnedLiveRows, estimateWrappedRows, estimateToolCallRows, estimateOutputEventRows, buildDisplayItems, isToolCallLikeBlock, coalesceToolCallBlocks, effectiveNativeArchiveBlockCount, estimateDisplayItemRows, historyBlocksForTranscript, nativeTranscriptBlocksForStatic, nativeArchiveBlockCount, isDuplicateEngineBlock, appendTranscriptBlock, normalizeTerminalMode, fileRailWidthForTerminal, fileRailMaxRowsForTerminal, buildTerminalReplaySnapshot, parseMarkdownToRows, buildToolCallRows, buildCollapsedToolGroupRows, buildTranscriptRows } from './app-helpers.js'; -// @kern-source: app:95 +// @kern-source: app:96 export function App() { // Ink-safe setter: bridges microtask → macrotask for reliable repaints function __inkSafe(setter: React.Dispatch>): React.Dispatch> { @@ -183,7 +185,7 @@ export function App() { return (value: React.SetStateAction) => { const now = Date.now(); const elapsed = now - _lastCall; - if (elapsed >= 50) { + if (elapsed >= 120) { _lastCall = now; if (_pendingTimer) { clearTimeout(_pendingTimer); _pendingTimer = null; } setTimeout(() => _setLiveSpinnerRaw(value), 0); @@ -194,7 +196,7 @@ export function App() { _lastCall = Date.now(); _pendingTimer = null; _setLiveSpinnerRaw(_pendingValue); - }, 50 - elapsed); + }, 120 - elapsed); } } }; @@ -207,7 +209,7 @@ export function App() { return (value: React.SetStateAction) => { const now = Date.now(); const elapsed = now - _lastCall; - if (elapsed >= 50) { + if (elapsed >= 120) { _lastCall = now; if (_pendingTimer) { clearTimeout(_pendingTimer); _pendingTimer = null; } setTimeout(() => _setLiveProgressRaw(value), 0); @@ -218,7 +220,7 @@ export function App() { _lastCall = Date.now(); _pendingTimer = null; _setLiveProgressRaw(_pendingValue); - }, 50 - elapsed); + }, 120 - elapsed); } } }; @@ -496,8 +498,8 @@ export function App() { } }; }, []); - const [nativeStaticEpoch, _setNativeStaticEpochRaw] = useState(0); - const setNativeStaticEpoch = useMemo(() => __inkSafe(_setNativeStaticEpochRaw), [_setNativeStaticEpochRaw]); + const [clearEpoch, _setClearEpochRaw] = useState(0); + const setClearEpoch = useMemo(() => __inkSafe(_setClearEpochRaw), [_setClearEpochRaw]); const [nativeArchiveCount, _setNativeArchiveCountRaw] = useState(0); const setNativeArchiveCount = useMemo(() => __inkSafe(_setNativeArchiveCountRaw), [_setNativeArchiveCountRaw]); const [fileRailOpen, _setFileRailOpenRaw] = useState(false); @@ -796,7 +798,16 @@ export function App() { return appendTranscriptBlock(filtered, event, blockArchivePathRef.current); }); }, - clearBlocks: () => setOutputBlocks([]), + clearBlocks: () => { + // Single funnel for every true transcript reset (/clear, /clean, + // session reset all dispatch OutputEvent {type:'clear'} → clearBlocks). + // Bump the epoch HERE so the remount is tied to the cause, not + // inferred from the array shrinking (which a cap-spill also does). + setOutputBlocks([]); + setClearEpoch((epoch: number) => nextStaticEpoch(epoch, 'reset')); + setNativeArchiveCount(0); + clearBlockRowCache(); + }, setPendingPlanProposal: (val: OutputEvent | null) => setPendingPlanProposal(val), setReviewEvent, setQuestionState, @@ -2127,7 +2138,6 @@ export function App() { } const previousCount = nativeTranscriptBlockCountRef.current; if (nextCount < previousCount) { - setNativeStaticEpoch((epoch: number) => epoch + 1); setNativeArchiveCount(0); } nativeTranscriptBlockCountRef.current = nextCount; @@ -2174,6 +2184,39 @@ export function App() { return () => clearTimeout(boot); }, []); + useEffect(() => { + const boot = setTimeout(() => { + void (async () => { + // Patch the already-rendered startup dashboard block in place (only if + // it's still present — the user may have /clear'd it). Preserves the + // block id so Ink reconciles instead of remounting. At 1.5s the + // dashboard is the lone block in the live region, well under the seal + // budget, so the in-place count update renders. + const patchDashboardCount = (count: number) => { + setOutputBlocks((prev: any) => + prev.map((b: any) => + b?.event?.type === 'dashboard' + ? { ...b, event: { ...b.event, runCount: count } } + : b, + ), + ); + }; + try { + const snap = await runsStore.hydrate(); + patchDashboardCount(snap.count); + } catch { /* count stays 0 — non-critical */ } + try { + const result = await runsStore.maybePrune(); + // If the prune actually removed files, the cached count dropped — + // re-patch so the dashboard never shows the stale pre-prune count. + if (result.deleted > 0) patchDashboardCount(runsStore.snapshot().count); + } catch { /* prune is best-effort */ } + })(); + }, 1500); + if (typeof (boot as any).unref === 'function') (boot as any).unref(); + return () => clearTimeout(boot); + }, []); + useEffect(() => { const interval = setInterval(() => { const prev = agentProgressRef.current; @@ -2870,7 +2913,7 @@ export function App() { if (terminalMode === 'native') return ( <> - + {(block: any) => ( )} @@ -2917,22 +2960,22 @@ export function App() { ); } -// @kern-source: app:85 +// @kern-source: app:86 export const _activeAborts: Set = new Set(); -// @kern-source: app:87 +// @kern-source: app:88 export const _cancelCallback: { fn: (() => void) | null } = { fn: null }; -// @kern-source: app:89 +// @kern-source: app:90 export const _cesarSessionRef: { session: PersistentSession | null } = { session: null }; -// @kern-source: app:91 +// @kern-source: app:92 export const _lastSigintAt: { value: number } = { value: 0 }; -// @kern-source: app:93 +// @kern-source: app:94 export const _pauseState: { value: PauseState | null } = { value: null }; -// @kern-source: app:2667 +// @kern-source: app:2739 export async function startRepl(): Promise { ensureAgonHome(); ensureCurrentWorkspace(process.cwd()); diff --git a/packages/cli/src/kern/signals/block-archive.kern b/packages/cli/src/kern/signals/block-archive.kern index ca752c934..dba8ec1b1 100644 --- a/packages/cli/src/kern/signals/block-archive.kern +++ b/packages/cli/src/kern/signals/block-archive.kern @@ -40,3 +40,19 @@ fn name=appendBlockWithCap params="prev:OutputBlock[], block:OutputBlock, archiv let name=overflow value="next.length - MAX_LIVE_BLOCKS + ARCHIVE_BATCH" do value="archiveBlocks(archivePath, next.slice(0, overflow))" return value="next.slice(overflow)" + +// ── Static-region remount epoch ────────────────────────────────────────────── +// The native region is keyed by an epoch; changing the key REMOUNTS it, +// which forces Ink to repaint the ENTIRE archive (~400 blocks) synchronously to +// stdout. That repaint is wanted ONLY on a true transcript reset (/clear, /clean, +// session reset). It must NEVER fire on an append or a cap-spill: Ink's +// is append-only (it renders only items beyond its internal index), so a +// front-slice spill renders nothing new and never needs a remount — bumping the +// epoch on spill is the long-session repaint storm. Carry the cause EXPLICITLY +// instead of inferring it from the array length shrinking. +fn name=nextStaticEpoch params="currentEpoch:number, cause:'append'|'spill'|'reset'" returns=number export=true + doc "Next remount epoch. Only a true transcript reset advances it; append and cap-spill leave it untouched (a spill must NOT repaint the sealed Static region)." + handler lang="kern" + if cond="cause === 'reset'" + return value="currentEpoch + 1" + return value="currentEpoch" diff --git a/packages/cli/src/kern/signals/runs-store.kern b/packages/cli/src/kern/signals/runs-store.kern new file mode 100644 index 000000000..de7738aea --- /dev/null +++ b/packages/cli/src/kern/signals/runs-store.kern @@ -0,0 +1,320 @@ +// ── Runs store — async metadata snapshot + auto-prune for ~/.agon/runs ── +// +// ~/.agon/runs accumulates one ${forgeId}.json record (plus per-run dirs) for +// every orchestration run, forever. It once regrew to 40k files. The dashboard +// used to count those files with a SYNCHRONOUS readdirSync inside the Ink +// render path (surfaces/app-blocks.kern:buildDashboardBlock) — a 40k-entry +// directory scan on every dashboard repaint. +// +// This module owns runs metadata so the render path NEVER touches the fs: +// • hydrate() — async scan at startup, caches a snapshot in memory +// • snapshot() — sync, cheap, zero-fs accessor for render +// • scheduleRefresh() — debounced re-hydrate after a run record is written +// • maybePrune() — async auto-prune with a 1h cooldown stamp + soft lock +// +// All paths resolve via agonPath('runs') at CALL time so AGON_HOME overrides +// (test isolation) apply — never freeze RUNS_DIR at module load. + +import from="node:fs" names="readFileSync,writeFileSync" +import from="node:fs/promises" names="readdir,stat,unlink" +import from="node:path" names="join" +import from="@kernlang/agon-core" names="agonPath" + +// ── Policy constants ─────────────────────────────────────────────────── + +const name=RUN_KEEP_AGE_MS type=number value={{ 7 * 24 * 60 * 60 * 1000 }} + doc "Keep every run file newer than this regardless of count (7 days)." + +const name=RUN_SOFT_CAP type=number value=2000 + doc "Beyond the age window, keep at most this many newest run files." + +const name=RUN_HARD_CAP type=number value=5000 + doc "Absolute ceiling on total run files kept, even within the age window." + +const name=RUN_PROTECT_MIN_AGE_MS type=number value={{ 10 * 60 * 1000 }} + doc "Never delete a run file younger than this — it may belong to an in-flight run (10 minutes). Long-running orchestrations (forge/goal/conquer) can keep a run's record 'open' for many minutes, so a generous floor is the primary in-flight protection; activeRunId matching is defense in depth." + +const name=RUN_PRUNE_COOLDOWN_MS type=number value={{ 60 * 60 * 1000 }} + doc "Skip a prune if one completed within this window (1h)." + +const name=RUN_PRUNE_CHUNK type=number value=200 + doc "Delete this many files then yield to the event loop, so prune never monopolizes the loop." + +const name=PRUNE_STAMP_NAME type=string value={{ '.prune-stamp' }} + doc "Filename inside the runs dir holding the last-prune timestamp (also a poor-man's lock)." + +// ── Types ────────────────────────────────────────────────────────────── + +interface name=RunsSnapshot + field name=count type=number + doc "Number of *.json run record files (excludes the .prune-stamp)." + field name=hydratedAt type=number + doc "Date.now() of the scan that produced this snapshot; 0 before first hydrate." + +interface name=PruneResult + field name=deleted type=number + field name=skipped type=boolean + doc "True when the prune was a no-op due to cooldown / lock / nothing-to-do." + field name=reason type=string + +interface name=PruneOptions + field name=activeRunIds type="string[]" optional=true + doc "Run ids whose files must never be deleted (the active session's runs)." + field name=force type=boolean optional=true + doc "Bypass the cooldown stamp (used by tests / explicit triggers)." + +// ── Internal fs helpers (resolve AGON_HOME at call time) ─────────────── + +fn name=runsDirPath returns=string export=false expr={{ return agonPath('runs'); }} + +fn name=pruneStampPath returns=string export=false expr={{ return join(agonPath('runs'), PRUNE_STAMP_NAME); }} + +fn name=listRunJsonFiles async=true returns="Promise<{ name: string; mtimeMs: number }[]>" export=false + doc "List *.json run record files with mtimes via node:fs/promises (never blocks the Ink event loop, even on a 40k-entry dir). Returns [] if the dir is missing. Skips the .prune-stamp and any entry that cannot be stat'd." + handler <<< + const dir = runsDirPath(); + let names: string[]; + try { + names = await readdir(dir); + } catch { + return []; + } + const out: { name: string; mtimeMs: number }[] = []; + for (const entry of names) { + if (!entry.endsWith('.json')) continue; + try { + const st = await stat(join(dir, entry)); + if (st.isFile()) out.push({ name: entry, mtimeMs: st.mtimeMs }); + } catch { + // entry vanished mid-scan or is unreadable — ignore + } + } + return out; + >>> + +fn name=countRunJsonFiles async=true returns="Promise" export=false + doc "Count *.json run record files WITHOUT stat'ing each one. hydrate()/scheduleRefresh only need the count, so on a 40k-file dir this avoids ~40k stat syscalls (one readdir is enough). Returns 0 if the dir is missing. The .prune-stamp has no .json suffix so it's excluded automatically." + handler <<< + const dir = runsDirPath(); + let names: string[]; + try { + names = await readdir(dir); + } catch { + return 0; + } + let count = 0; + for (const entry of names) { + if (entry.endsWith('.json')) count++; + } + return count; + >>> + +fn name=readPruneStamp returns=number export=false + doc "Read the last-prune timestamp from .prune-stamp. Returns 0 if absent/unreadable." + handler <<< + try { + const raw = readFileSync(pruneStampPath(), 'utf-8').trim(); + const n = Number(raw); + return Number.isFinite(n) ? n : 0; + } catch { + return 0; + } + >>> + +fn name=writePruneStamp params="ts:number" returns=void export=false + doc "Persist the last-prune timestamp (best effort)." + handler <<< + try { + writeFileSync(pruneStampPath(), String(ts)); + } catch { + // best effort — a missing stamp just means the next prune isn't rate-limited + } + >>> + +fn name=computeRunPruneTargets params="files:{ name: string; mtimeMs: number }[], now:number, activeRunIds:string[]" returns="string[]" export=true + doc "Pure prune-policy core: given the current run files + clock + protected ids, return the file names to delete (oldest-first). Keep everything < 7 days; beyond that keep newest RUN_SOFT_CAP; hard cap RUN_HARD_CAP total. Never returns an active-id file or one younger than RUN_PROTECT_MIN_AGE_MS. Exported so the policy is unit-testable without touching the fs." + handler <<< + // Exact id match, NOT substring: a run id must equal the file's basename + // (`${id}.json`) or be a prefix followed by a non-alphanumeric delimiter + // (e.g. `${id}-meta.json`). Substring matching would over-protect — id "1" + // would shield "1234.json" — and (worse) could be tricked into UNDER-deleting. + // + // Structural guarantee (why id matching is only defense in depth): deletion + // is oldest-first under a 7-day age window + newest-N caps, and an active + // run's record is necessarily recent, so it's effectively unreachable by the + // policy regardless of this check. The RUN_PROTECT_MIN_AGE_MS floor already + // shields every fresh file; activeRunId matching just hardens the edge. + const isActive = (fileName: string): boolean => + activeRunIds.some((id) => { + if (!id) return false; + if (fileName === `${id}.json`) return true; + if (!fileName.startsWith(id)) return false; + const next = fileName.charAt(id.length); + // Prefix must end at a delimiter, never mid-token (so "1" never matches "12.json"). + return next !== '' && !/[A-Za-z0-9]/.test(next); + }); + + // Files we are even ALLOWED to delete: not active, and old enough that they + // can't belong to an in-flight run. + const deletable = files + .filter((f) => !isActive(f.name) && now - f.mtimeMs >= RUN_PROTECT_MIN_AGE_MS) + .sort((a, b) => a.mtimeMs - b.mtimeMs); // oldest first + + const cutoff = now - RUN_KEEP_AGE_MS; + const deletableOld = deletable.filter((f) => f.mtimeMs < cutoff); // older than 7d + + const removalSet = new Map(); + + // 1) Soft cap: keep only the newest RUN_SOFT_CAP files overall. The overflow + // must come from OLD (>7d) deletable files first — files inside the 7d + // window are kept regardless of the soft cap. + const softOverflow = Math.max(0, files.length - RUN_SOFT_CAP); + const softRemoveCount = Math.min(softOverflow, deletableOld.length); + for (let k = 0; k < softRemoveCount; k++) { + removalSet.set(deletableOld[k].name, true); + } + + // 2) Hard cap: total kept must end ≤ RUN_HARD_CAP. Remove additional oldest + // deletable files (young or old) until the total fits. + const mustRemoveForHardCap = Math.max(0, files.length - RUN_HARD_CAP); + let i = 0; + while (removalSet.size < mustRemoveForHardCap && i < deletable.length) { + removalSet.set(deletable[i].name, true); + i++; + } + + // Return oldest-first, matching the deletable ordering. + return deletable.filter((f) => removalSet.has(f.name)).map((f) => f.name); + >>> + +// ── RunsStore — process singleton ────────────────────────────────────── + +service name=RunsStore singleton=true + doc "Owns ~/.agon/runs metadata so the render path never scans the fs. Caches a snapshot, refreshes on demand (debounced), and auto-prunes off the critical path." + + field name=snap type=RunsSnapshot private=true + doc "In-memory cached snapshot. The ONLY thing render reads." + field name=refreshTimer type="ReturnType|null" private=true + doc "Debounce handle for scheduleRefresh." + field name=pruning type=boolean private=true + doc "In-process guard so two maybePrune() calls don't overlap." + + constructor + handler lang="kern" + assign target="this.snap" value="{ count: 0, hydratedAt: 0 }" + assign target="this.refreshTimer" value="null" + assign target="this.pruning" value="false" + + method name=snapshot returns=RunsSnapshot + doc "Cheap, synchronous, zero-fs accessor for the render path. Returns the cached snapshot." + handler lang="kern" + return value="this.snap" + + method name=runCount returns=number + doc "Convenience: cached run-file count for the dashboard. Zero fs." + handler lang="kern" + return value="this.snap.count" + + method name=hydrate async=true returns="Promise" + doc "Scan the runs dir once and cache the snapshot. Async so the directory read never blocks render. Safe to call repeatedly." + handler <<< + // Yield to the macrotask queue first so a startup hydrate never lands in + // the same tick as first paint. + await new Promise((resolve) => setTimeout(resolve, 0)); + // Count-only path: hydrate just needs the file count, so skip the per-file + // stat() that listRunJsonFiles does (mtime is only needed by the prune). + const count = await countRunJsonFiles(); + this.snap = { count, hydratedAt: Date.now() }; + return this.snap; + >>> + + method name=scheduleRefresh params="delayMs?:number" returns=void + doc "Debounced re-hydrate. Called after a run record is written so the dashboard count stays fresh without a render-path fs scan. Coalesces bursts (forge writes the manifest incrementally)." + handler <<< + const delay = typeof delayMs === 'number' && delayMs >= 0 ? delayMs : 750; + if (this.refreshTimer) clearTimeout(this.refreshTimer); + const timer = setTimeout(() => { + this.refreshTimer = null; + // Fire and forget — errors are non-critical (stale count at worst). + void this.hydrate().catch(() => { /* snapshot stays as-is */ }); + }, delay); + this.refreshTimer = timer; + // Never keep the process alive just to refresh a count. + if (typeof (timer as any).unref === 'function') (timer as any).unref(); + >>> + + method name=maybePrune params="opts?:PruneOptions" async=true returns="Promise" + doc "Auto-prune the runs dir if the cooldown has elapsed and no prune is in progress. Non-blocking: deletes oldest-first by mtime in async chunks. Logs ONE quiet diagnostic line on a real prune." + handler <<< + // In-process overlap guard FIRST, before any stamp/cooldown logic. Two + // same-process callers (e.g. the boot effect racing a scheduleRefresh) both + // read the stamp in the same tick; claiming `this.pruning` here — not after + // the cooldown check — is what makes the second caller bail deterministically. + if (this.pruning) { + return { deleted: 0, skipped: true, reason: 'in-progress' }; + } + this.pruning = true; + try { + const force = opts?.force === true; + const now = Date.now(); + const lastPrune = readPruneStamp(); + + // Cooldown + soft cross-process lock: a recent stamp means a prune ran (or + // is running) within the cooldown window — skip. The stamp doubles as the + // lock because maybePrune() claims it (writes "now") before any deletion. + if (!force && lastPrune > 0 && now - lastPrune < RUN_PRUNE_COOLDOWN_MS) { + return { deleted: 0, skipped: true, reason: 'cooldown' }; + } + + // Claim the cross-process lock by stamping "now" BEFORE scanning/deleting. + // This is deliberate (do not move it after the delete): the stamp is the + // lock claim a concurrent PROCESS reads to back off, and the cooldown + // limits SCAN cost — so even a no-op scan should start the cooldown. + // Crash-mid-prune costs at most a 1h delay before the next attempt; that's + // strictly cheaper than letting N processes scan a 40k-file dir in lockstep. + writePruneStamp(now); + + const files = await listRunJsonFiles(); + const activeIds = Array.isArray(opts?.activeRunIds) ? opts!.activeRunIds! : []; + const toDelete = computeRunPruneTargets(files, now, activeIds); + + if (toDelete.length === 0) { + // Nothing to remove — refresh the snapshot and bail quietly. + this.snap = { count: files.length, hydratedAt: now }; + return { deleted: 0, skipped: true, reason: 'within-policy' }; + } + + const dir = runsDirPath(); + let deleted = 0; + for (let idx = 0; idx < toDelete.length; idx++) { + try { + await unlink(join(dir, toDelete[idx])); + deleted++; + } catch { + // file vanished or unremovable — skip + } + // Yield every RUN_PRUNE_CHUNK deletions so we never block the loop. + if ((idx + 1) % RUN_PRUNE_CHUNK === 0) { + await new Promise((resolve) => setTimeout(resolve, 0)); + } + } + + // Re-stamp completion time and refresh the cached snapshot. Count-only: + // we just need the post-prune total, not mtimes. + const finishedAt = Date.now(); + writePruneStamp(finishedAt); + const remaining = await countRunJsonFiles(); + this.snap = { count: remaining, hydratedAt: finishedAt }; + + if (deleted > 0 && process.env.AGON_DEBUG) { + // Diagnostic only under AGON_DEBUG — a background prune must never write into the live TUI. + console.error(`[agon] runs prune: removed ${deleted} old run record(s), kept ${remaining}`); + } + return { deleted, skipped: false, reason: 'pruned' }; + } finally { + this.pruning = false; + } + >>> + +const name=runsStore value={{ new RunsStore() }} + doc "Process-wide RunsStore singleton. Import this from render + telemetry; never construct another." diff --git a/packages/cli/src/kern/surfaces/app-blocks.kern b/packages/cli/src/kern/surfaces/app-blocks.kern index 4b19f87c2..e7625fed3 100644 --- a/packages/cli/src/kern/surfaces/app-blocks.kern +++ b/packages/cli/src/kern/surfaces/app-blocks.kern @@ -1,10 +1,10 @@ // ── Block management — archive, dedupe, coalescing ────────────────── -import from="node:fs" names="readdirSync" -import from="@kernlang/agon-core" names="loadConfig,getRatings,getActiveWorkspace,RUNS_DIR" +import from="@kernlang/agon-core" names="loadConfig,getRatings,getActiveWorkspace" import from="../lib/engines-dir.js" names="resolveBuiltinEnginesDir" import from="@kernlang/agon-core" names="EngineRegistry" import from="../blocks/markdown.js" names="cleanEngineOutput" import from="../signals/block-archive.js" names="appendBlockWithCap" +import from="../signals/runs-store.js" names="runsStore" import from="../../generated/blocks/engine.js" names="OutputBlock" types=true import from="../../handlers/types.js" names="OutputEvent" types=true import from="./app-layout.js" names="estimateOutputEventRows" @@ -46,8 +46,10 @@ module name=AppBlocks const sorted = Object.entries(ratings.global) .map(([id, r]: any) => [id, { rating: Math.round(r.mu - 2 * r.phi) }] as const) .sort(([, a], [, b]) => b.rating - a.rating); - let runCount = 0; - try { runCount = readdirSync(RUNS_DIR).filter((f: string) => f.endsWith('.json')).length; } catch { /* runs dir missing — first run */ } + // Render path is fs-free: read the cached runs snapshot, never readdirSync. + // runsStore.hydrate() runs async at startup (app.kern boot effect) and + // refreshes (debounced) after each run record is written. + const runCount = runsStore.snapshot().count; return { id: 0, @@ -174,7 +176,13 @@ module name=AppBlocks if (!Array.isArray(blocks) || blocks.length === 0) return 0; const safeBudget = Math.max(1, Math.floor(Number(rowBudget) || 1)); - const maxLiveBlocks = 80; + // 40 (was 80): Ink rewrites the ENTIRE live tail every render (~15-30/s + // during runs), so the live window is the dominant stdout cost. The row + // budget usually seals far earlier anyway — this cap only bites on runs of + // many tiny blocks, exactly where per-frame rewrite cost compounds. Sealed + // blocks render identically (they just stop being mouse-selectable), and a + // measured A/B (fat-block probe) showed no UX regression at 40. + const maxLiveBlocks = 40; let rows = 0; let liveStart = blocks.length; diff --git a/packages/cli/src/kern/surfaces/app-rendering.kern b/packages/cli/src/kern/surfaces/app-rendering.kern index 190e74002..31a275baf 100644 --- a/packages/cli/src/kern/surfaces/app-rendering.kern +++ b/packages/cli/src/kern/surfaces/app-rendering.kern @@ -680,12 +680,139 @@ module name=AppRendering return rows; >>> - fn name=buildTranscriptRows params="blocks:OutputBlock[], mode:string, toolOutputExpanded:boolean, thinkingExpanded:boolean" returns="any[]" + // ── Per-block transcript row cache (THE SPIKE KILLER) ─────────────── + // buildTranscriptRows re-runs over ALL ≤500 blocks on every commit. The vast + // majority of those blocks are committed, immutable, and render purely from + // their own event (no neighbor dependency) — so their rows are memoizable by + // (block id, content fingerprint, mode, expand flags, resolved widths). A new + // append then computes rows for the NEW block only; the other ~500 come from + // this cache. Mirrors the _mdRowCache LRU-evict pattern below. + // + // CORRECTNESS — what is and isn't cached here: + // - tool-call / tool-call-group blocks are NEVER cached at this layer. Their + // rendering coalesces with adjacent tool blocks (the `skippedUntil` group + // merge in buildTranscriptRows) and they mutate in place while running, so + // they are neither neighbor-independent nor content-stable. They stay on + // the uncached path and are handled entirely inside buildTranscriptRows. + // - file-changes reads process.cwd() and is left uncached (cwd-dependent, + // rare, low volume). + // - The leading positional spacer (`if (rows.length > 0) pushSpacer`) for + // user-message / engine-block / kern-draft / debate-round depends on global + // transcript position, NOT block content. renderBlockOwnRows OMITS it; the + // loop in buildTranscriptRows prepends it. So the cached value is purely a + // function of the cache key — provably position-independent. + // - Mutation fingerprint: committed prose/engine/info blocks are immutable + // (streaming goes through streamingText, not blocks). The fingerprint still + // includes a cheap content signature (length + head slice + key scalar + // fields) so an in-place mutation can never serve stale rows. + const name=_blockRowCache value={{ new Map() }} + const name=_BLOCK_ROW_CACHE_MAX type=number value={{ 1500 }} + + fn name=blockWantsLeadingGap params="eventType:string" returns=boolean + handler <<< + return eventType === 'user-message' + || eventType === 'engine-block' + || eventType === 'kern-draft' + || eventType === 'debate-round'; + >>> + + // EXPLICIT ALLOWLIST of high-volume block types that render purely from a + // small set of scalar event fields — each of which is captured by + // blockRowFingerprint. Anything NOT on this list (tool-call/tool-call-group, + // which are neighbor-dependent + mutating; file-changes, which reads cwd; and + // low-volume nested-data events like cesar-recap/scoreboard/plan/table/ + // dashboard whose deep structure a cheap fingerprint can't fully capture) is + // rendered uncached. This is a denylist-by-default safety posture: a new event + // type is uncached until explicitly proven fingerprintable, so it can never + // silently serve stale rows from a colliding fingerprint. + const name=_CACHEABLE_BLOCK_TYPES value={{ new Set([ + 'text', 'user-message', 'engine-block', 'separator', 'header', + 'success', 'warning', 'info', 'error', 'permission-ask', + 'thinking-chunk', 'streaming-chunk', 'kern-draft', 'debate-round', + 'verdict', + ]) }} + + fn name=isCacheableBlockType params="eventType:string" returns=boolean + handler <<< + return _CACHEABLE_BLOCK_TYPES.has(eventType); + >>> + + fn name=blockRowFingerprint params="event:any, mode:string, toolOutputExpanded:boolean, thinkingExpanded:boolean, proseWidth:number, chatWidth:number, engineWidth:number" returns="string" + handler <<< + // Cheap content signature so an in-place mutation (status/output update on + // a non-tool block, or any event field change) busts the cache. Widths + // encode terminal width (contentWidth reads process.stdout.columns), so + // including them flushes the entry on resize — exactly like _mdRowCache. + const type = String(event?.type ?? ''); + // Primary payload across every cacheable event variant. permission-ask + // carries its diff in `command`; tool/title/position carry the rest. Any + // field a variant renders from MUST be in this signature or two different + // blocks that share an id (ids are reused freely in tests, and in prod a + // /clear can repeat them) would collide on a stale cache entry. + const content = String(event?.content ?? event?.message ?? event?.summary ?? event?.argument ?? event?.title ?? event?.chunk ?? event?.markdown ?? event?.command ?? ''); + const status = String(event?.status ?? ''); + const engineId = String(event?.engineId ?? ''); + const tool = String(event?.tool ?? ''); + const folded = String(event?.foldedSteps ?? ''); + // Render-affecting secondary fields per cacheable variant: engine-block + // reads `color` (accent), kern-draft reads `critique`, debate-round reads + // `position`. Adding a type to the allowlist? Add EVERY field its renderer + // reads here first (e.g. tool output would need `output`). + const extra = `${String(event?.color ?? '')}|${String(event?.critique ?? '')}|${String(event?.position ?? '')}`; + const widths = `${proseWidth}.${chatWidth}.${engineWidth}`; + const flags = `${mode}|${toolOutputExpanded ? 1 : 0}|${thinkingExpanded ? 1 : 0}`; + // Head AND tail slices: same-prefix same-length bodies (padded logs, JSON + // with a shared head) must not collide on a recycled id. + return `${type}|${engineId}|${tool}|${status}|${folded}|${extra}|${content.length}|${content.slice(0, 120)}|${content.slice(-40)}|${widths}|${flags}`; + >>> + + fn name=cachedBlockOwnRows params="block:OutputBlock, mode:string, toolOutputExpanded:boolean, thinkingExpanded:boolean, proseWidth:number, chatWidth:number, engineWidth:number" returns="any[]" + handler <<< + const event = block.event as any; + const type = String(event?.type ?? ''); + if (!isCacheableBlockType(type)) { + return renderBlockOwnRows(block, mode, toolOutputExpanded, thinkingExpanded, proseWidth, chatWidth, engineWidth); + } + const fingerprint = blockRowFingerprint(event, mode, toolOutputExpanded, thinkingExpanded, proseWidth, chatWidth, engineWidth); + const key = `${block.id}|${fingerprint}`; + const hit = _blockRowCache.get(key); + if (hit) { + // True LRU: re-insert on hit so hot entries survive the oldest-20% + // eviction. Return a shallow copy so a caller can never mutate the + // cached array (the row objects inside are treated as immutable). + _blockRowCache.delete(key); + _blockRowCache.set(key, hit); + return hit.slice(); + } + const rows = renderBlockOwnRows(block, mode, toolOutputExpanded, thinkingExpanded, proseWidth, chatWidth, engineWidth); + // At the cap, evict the least-recently-used 20% (hits re-insert above, so + // Map order ≈ recency) — amortized, never a full flush (which would force + // a cold rebuild of all committed rows). + if (_blockRowCache.size >= _BLOCK_ROW_CACHE_MAX) { + const evictCount = Math.floor(_BLOCK_ROW_CACHE_MAX * 0.2); + const it = _blockRowCache.keys(); + for (let i = 0; i < evictCount; i++) { + const oldest = it.next().value; + if (oldest === undefined) break; + _blockRowCache.delete(oldest); + } + } + _blockRowCache.set(key, rows); + return rows.slice(); + >>> + + fn name=clearBlockRowCache returns=void export=true + doc "Drop every cached block row. Called from the clearBlocks reset funnel so a /clear (which can recycle block ids in replay paths) can never serve rows rendered for a previous transcript." + handler <<< + _blockRowCache.clear(); + >>> + + fn name=renderBlockOwnRows params="block:OutputBlock, mode:string, toolOutputExpanded:boolean, thinkingExpanded:boolean, proseWidth:number, chatWidth:number, engineWidth:number" returns="any[]" + doc "Render the intrinsic rows for ONE transcript block, independent of its neighbors and of its position in the transcript (no leading positional spacer — that is prepended by buildTranscriptRows). Tool-call grouping is NOT handled here." handler <<< const rows: any[] = []; - const proseWidth = contentWidth(4); - const chatWidth = contentWidth(2); - const engineWidth = contentWidth(8); + const event = block.event as any; + const baseKey = `block-${block.id}`; const pushSpacer = (key: string) => { rows.push({ key, kind: 'spacer' }); }; @@ -705,29 +832,12 @@ module name=AppRendering }); }; - let skippedUntil = -1; - blocks.forEach((block: OutputBlock, blockIndex: number) => { - if (blockIndex < skippedUntil) return; - const event = block.event as any; - const baseKey = `block-${block.id}`; - - if (!toolOutputExpanded && event.type === 'tool-call') { - const groupedEvents = [event]; - let nextIndex = blockIndex + 1; - while (nextIndex < blocks.length) { - const nextEvent = blocks[nextIndex].event as any; - if (nextEvent.type !== 'tool-call') break; - groupedEvents.push(nextEvent); - nextIndex += 1; - } - - if (groupedEvents.length > 1) { - rows.push(...buildCollapsedToolGroupRows(baseKey, groupedEvents)); - skippedUntil = nextIndex; - return; - } - } - + // The switch below was lifted verbatim from buildTranscriptRows; its many + // `return;` statements bail out of THIS inner closure (not the whole fn), + // accumulating into the closed-over `rows`. The leading positional spacer + // for gap-wanting types is intentionally a no-op here (rows is empty at + // entry) — buildTranscriptRows prepends the real gap. + (() => { switch (event.type) { case 'text': pushRichText(baseKey, event.content, 2, proseWidth); @@ -942,50 +1052,13 @@ module name=AppRendering return; } case 'tool-call': - rows.push(...buildToolCallRows(baseKey, event, toolOutputExpanded)); - return; - case 'tool-call-group': { - const groupBlocks = Array.isArray((event as any).blocks) ? (event as any).blocks : []; - // Expanded: unpack every inner tool call so Ctrl+E reveals full - // output. Collapsed (default): one compact summary row, matching - // the Static-era ToolCallGroup renderer — preserves the bundling - // that output.kern emits (codex review P3). - if (toolOutputExpanded) { - groupBlocks.forEach((child: any, index: number) => { - const childEvent = child?.event; - if (childEvent && childEvent.type === 'tool-call') { - rows.push(...buildToolCallRows(`${baseKey}-g-${index}`, childEvent, toolOutputExpanded)); - } - }); - return; - } - const groupedEvents: any[] = []; - const appendGroup = (blocksToAppend: any[]) => { - blocksToAppend.forEach((child: any) => { - const childEvent = child?.event; - if (childEvent && childEvent.type === 'tool-call') groupedEvents.push(childEvent); - }); - }; - appendGroup(groupBlocks); - let nextIndex = blockIndex + 1; - while (nextIndex < blocks.length) { - const nextEvent = blocks[nextIndex].event as any; - if (nextEvent?.type === 'tool-call-group') { - appendGroup(Array.isArray(nextEvent.blocks) ? nextEvent.blocks : []); - nextIndex += 1; - continue; - } - if (nextEvent?.type === 'tool-call') { - groupedEvents.push(nextEvent); - nextIndex += 1; - continue; - } - break; - } - rows.push(...buildCollapsedToolGroupRows(baseKey, groupedEvents)); - skippedUntil = nextIndex; + case 'tool-call-group': + // Tool-call rendering is neighbor-dependent (adjacent tool blocks + // coalesce) and content-unstable (mutates while running), so it is + // handled entirely by buildTranscriptRows on the uncached path and + // never routed here. This branch is unreachable; kept as a no-op so + // the default fallback can't mis-render a tool block if it ever is. return; - } case 'response-meta': { const totalTokens = (event.inputTokens ?? 0) + (event.outputTokens ?? 0); const parts = [ @@ -1179,6 +1252,103 @@ module name=AppRendering default: pushSegmentsRow(`${baseKey}-fallback`, 2, [{ text: `[${event.type}]`, dimColor: true }]); } + })(); + + return rows; + >>> + + fn name=buildTranscriptRows params="blocks:OutputBlock[], mode:string, toolOutputExpanded:boolean, thinkingExpanded:boolean" returns="any[]" + handler <<< + const rows: any[] = []; + const proseWidth = contentWidth(4); + const chatWidth = contentWidth(2); + const engineWidth = contentWidth(8); + + let skippedUntil = -1; + blocks.forEach((block: OutputBlock, blockIndex: number) => { + if (blockIndex < skippedUntil) return; + const event = block.event as any; + const baseKey = `block-${block.id}`; + + // ── Neighbor-dependent tool grouping (UNCACHED) ────────────────── + // Collapsed adjacent tool-call blocks coalesce into one summary; this + // depends on the following blocks, so it can never be a per-block cache + // hit. Tool blocks also mutate in place while running. + if (!toolOutputExpanded && event.type === 'tool-call') { + const groupedEvents = [event]; + let nextIndex = blockIndex + 1; + while (nextIndex < blocks.length) { + const nextEvent = blocks[nextIndex].event as any; + if (nextEvent.type !== 'tool-call') break; + groupedEvents.push(nextEvent); + nextIndex += 1; + } + + if (groupedEvents.length > 1) { + rows.push(...buildCollapsedToolGroupRows(baseKey, groupedEvents)); + skippedUntil = nextIndex; + return; + } + } + + if (event.type === 'tool-call') { + rows.push(...buildToolCallRows(baseKey, event, toolOutputExpanded)); + return; + } + + if (event.type === 'tool-call-group') { + const groupBlocks = Array.isArray((event as any).blocks) ? (event as any).blocks : []; + // Expanded: unpack every inner tool call so Ctrl+E reveals full + // output. Collapsed (default): one compact summary row, matching + // the Static-era ToolCallGroup renderer — preserves the bundling + // that output.kern emits (codex review P3). + if (toolOutputExpanded) { + groupBlocks.forEach((child: any, index: number) => { + const childEvent = child?.event; + if (childEvent && childEvent.type === 'tool-call') { + rows.push(...buildToolCallRows(`${baseKey}-g-${index}`, childEvent, toolOutputExpanded)); + } + }); + return; + } + const groupedEvents: any[] = []; + const appendGroup = (blocksToAppend: any[]) => { + blocksToAppend.forEach((child: any) => { + const childEvent = child?.event; + if (childEvent && childEvent.type === 'tool-call') groupedEvents.push(childEvent); + }); + }; + appendGroup(groupBlocks); + let nextIndex = blockIndex + 1; + while (nextIndex < blocks.length) { + const nextEvent = blocks[nextIndex].event as any; + if (nextEvent?.type === 'tool-call-group') { + appendGroup(Array.isArray(nextEvent.blocks) ? nextEvent.blocks : []); + nextIndex += 1; + continue; + } + if (nextEvent?.type === 'tool-call') { + groupedEvents.push(nextEvent); + nextIndex += 1; + continue; + } + break; + } + rows.push(...buildCollapsedToolGroupRows(baseKey, groupedEvents)); + skippedUntil = nextIndex; + return; + } + + // ── All other (neighbor-independent) blocks: CACHED per-block ───── + const ownRows = cachedBlockOwnRows(block, mode, toolOutputExpanded, thinkingExpanded, proseWidth, chatWidth, engineWidth); + if (ownRows.length === 0) return; + // Leading positional spacer: depends on transcript position (not block + // content), so it lives here, outside the cache. renderBlockOwnRows + // omits it. + if (rows.length > 0 && blockWantsLeadingGap(String(event?.type ?? ''))) { + rows.push({ key: `${baseKey}-gap`, kind: 'spacer' }); + } + for (const row of ownRows) rows.push(row); }); return rows; diff --git a/packages/cli/src/kern/surfaces/app.kern b/packages/cli/src/kern/surfaces/app.kern index 29ee8abcb..0baa3680c 100644 --- a/packages/cli/src/kern/surfaces/app.kern +++ b/packages/cli/src/kern/surfaces/app.kern @@ -11,6 +11,7 @@ import from="../../handlers/types.js" names="EngineProgress" types=true import from="@kernlang/agon-adapter-cli" names="createCliAdapter" import from="@kernlang/agon-core" names="EngineAdapter" types=true import from="../signals/intent.js" names="detectIntent,SLASH_COMMANDS" +import from="../signals/runs-store.js" names="runsStore" import from="@kernlang/agon-core" names="CommandRegistry,registerBuiltinCommands,initExtensions,EventBus,bridgeShellHooks" import from="../signals/job-manager.js" names="JobManager" import from="../signals/job-manager.js" names="Job" types=true @@ -35,7 +36,7 @@ import from="../signals/output.js" names="handleOutputEvent,clearPermissionQueue import from="../signals/output.js" names="OutputActions,OutputState,AgentProgressSnapshot,StreamingEntry" types=true import from="../signals/app-input.js" names="appendInputHistory,cleanInputValue,cleanSubmitValue,findInputChange,hasBtwSideChannelTarget,navigateHistory,parseAutoModeCommand,resolveEscapeAction,shouldQueuePlanModeOnTab" import from="../signals/keyboard.js" names="resolveKeyboardInput" -import from="../signals/block-archive.js" names="makeBlockArchivePath,appendBlockWithCap" +import from="../signals/block-archive.js" names="makeBlockArchivePath,appendBlockWithCap,nextStaticEpoch" import from="../signals/input-perf.js" names="perfNow,recordKeystrokeLatency" import from="../blocks/review.js" names="handleReviewAction" import from="../blocks/engine.js" names="DashboardView,OutputBlockView" @@ -76,7 +77,7 @@ import from="./app-terminal.js" names="createInitialRegistry,drainStdinBuffer,no import from="./app-selection.js" names="normalizeTextSelection" import from="./app-layout.js" names="estimateVisibleBlockBudget,estimateBottomChromeExtraRows,estimatePinnedLiveRows" import from="./app-blocks.js" names="buildDashboardBlock,coalesceToolCallBlocks,effectiveNativeArchiveBlockCount,historyBlocksForTranscript,nativeTranscriptBlocksForStatic,nativeArchiveBlockCount,appendTranscriptBlock,summarizeBtwTranscriptEvent" -import from="./app-rendering.js" names="buildExecutionRailStats,buildTranscriptRows" +import from="./app-rendering.js" names="buildExecutionRailStats,buildTranscriptRows,clearBlockRowCache" module name=AppHelperExports export from="./app-helpers.js" names="COMPOSER_HISTORY_LIMIT,isMutatingToolCall,probeEngineVitals,parseToolCallPayload,toolPreviewWindow,toolCallSupportsDetailView,detailViewerSupportsEvent,toolDetailViewportRows,findLatestToolDetailEvent,findLatestToolEvent,buildExecutionRailStats,composerHistoryPath,loadComposerInputHistory,saveComposerInputHistory,findLatestFailedToolEvent,buildFailedToolRetryDraft,buildToolDetailView,createInitialRegistry,drainStdinBuffer,maxScrollOffsetForRowCount,nextWheelAnimationStep,clampNumber,charDisplayWidth,stringDisplayWidth,displayColumnToStringIndex,normalizeRowSelection,normalizeTextSelection,richLineToPlainText,transcriptRowToPlainText,transcriptRowTextStartColumn,resolveTranscriptColumnFromMouse,transcriptRowsToPlainText,resolveTranscriptRowFromMouse,estimateVisibleBlockBudget,estimateWrappedRowCount,estimateQuestionReservedRows,estimateBottomChromeExtraRows,summarizeBtwTranscriptEvent,buildDashboardBlock,estimatePinnedLiveRows,estimateWrappedRows,estimateToolCallRows,estimateOutputEventRows,buildDisplayItems,isToolCallLikeBlock,coalesceToolCallBlocks,effectiveNativeArchiveBlockCount,estimateDisplayItemRows,historyBlocksForTranscript,nativeTranscriptBlocksForStatic,nativeArchiveBlockCount,isDuplicateEngineBlock,appendTranscriptBlock,normalizeTerminalMode,fileRailWidthForTerminal,fileRailMaxRowsForTerminal,buildTerminalReplaySnapshot,parseMarkdownToRows,buildToolCallRows,buildCollapsedToolGroupRows,buildTranscriptRows" @@ -102,8 +103,8 @@ screen name=App target=ink state name=historyIndex type=number initial=-1 safe=false state name=mode type="'chat'|'campfire'|'brainstorm'|'tribunal'" initial="'chat'" state name=sessionStartTime type=number initial="Date.now()" - state name=liveSpinner type=any initial="null" throttle=50 - state name=liveProgress type="EngineProgress[]|null" initial="null" throttle=50 + state name=liveSpinner type=any initial="null" throttle=120 + state name=liveProgress type="EngineProgress[]|null" initial="null" throttle=120 state name=slashPickerOpen type=boolean initial=false safe=false state name=questionState type=any initial="null" state name=questionAnswer type=string initial="''" safe=false @@ -229,7 +230,10 @@ screen name=App target=ink state name=workspacePath type=string initial="resolveWorkingDir()" state name=termWidth type=number initial="process.stdout.columns || 100" throttle=100 state name=termHeight type=number initial="process.stdout.rows || 24" throttle=100 - state name=nativeStaticEpoch type=number initial=0 + // Drives the native remount key. Incremented ONLY by clearBlocks + // (a true transcript reset: /clear, /clean, session reset). Append and cap-spill + // must NEVER advance it — remounting repaints the full sealed region. + state name=clearEpoch type=number initial=0 state name=nativeArchiveCount type=number initial=0 state name=fileRailOpen type=boolean initial="false" state name=executionRailOpen type=boolean initial="false" @@ -574,6 +578,14 @@ screen name=App target=ink }; >>> + // Tracks the last-seen transcript length so effectiveNativeArchiveCount can + // drop its stale high-water-mark when the array shrinks (a cap-spill front-slice + // makes the absolute archive count too large for the now-shorter array). This + // intentionally does NOT touch the remount epoch — that is driven + // solely by clearEpoch (bumped in clearBlocks on a true reset). A cap-spill + // shrinks the array but must NOT remount : Ink's is append-only + // and renders nothing for a front-slice, so a remount would needlessly repaint + // the entire sealed region (the long-session repaint storm). effect deps="terminalMode,nativeTranscriptBlocks" handler lang="kern" let name=nextCount value="nativeTranscriptBlocks.length" @@ -582,7 +594,6 @@ screen name=App target=ink return let name=previousCount value="nativeTranscriptBlockCountRef.current" if cond="nextCount < previousCount" - do value="setNativeStaticEpoch((epoch: number) => epoch + 1)" do value="setNativeArchiveCount(0)" assign target="nativeTranscriptBlockCountRef.current" value="nextCount" @@ -632,6 +643,58 @@ screen name=App target=ink return () => clearTimeout(boot); >>> + // ── Startup: hydrate the runs snapshot + auto-prune ~/.agon/runs ── + // The dashboard's runCount reads ONLY runsStore.snapshot() (zero fs in the + // render path). The startup dashboard block is built in the outputBlocks + // initializer (app.kern:99) BEFORE hydrate resolves, so it renders with + // count 0; we await hydrate() here and then PATCH that block in place so the + // count fills in once known — preserving its id (0) so Ink reconciles instead + // of remounting. Then we kick a non-blocking prune. Strictly SEQUENTIAL + // (hydrate → patch → prune) so we never run two fs scans concurrently off + // boot. The prune self-rate-limits via a 1h .prune-stamp cooldown, so calling + // it on every launch is cheap. All of it is fire-and-forget relative to render + // (an async IIFE inside the timer) and errors are swallowed best-effort. + // + // Active in-flight runs are protected inside maybePrune by the 10-minute + // RUN_PROTECT_MIN_AGE_MS floor (active records are necessarily recent + the + // policy deletes oldest-first under a 7-day window). No session/run id is + // cheaply in scope at this call site, so activeRunIds is intentionally left + // unthreaded — the age floor is the in-flight protection; id matching is + // defense in depth wired only where an id source already exists. + effect deps="" + handler <<< + const boot = setTimeout(() => { + void (async () => { + // Patch the already-rendered startup dashboard block in place (only if + // it's still present — the user may have /clear'd it). Preserves the + // block id so Ink reconciles instead of remounting. At 1.5s the + // dashboard is the lone block in the live region, well under the seal + // budget, so the in-place count update renders. + const patchDashboardCount = (count: number) => { + setOutputBlocks((prev: any) => + prev.map((b: any) => + b?.event?.type === 'dashboard' + ? { ...b, event: { ...b.event, runCount: count } } + : b, + ), + ); + }; + try { + const snap = await runsStore.hydrate(); + patchDashboardCount(snap.count); + } catch { /* count stays 0 — non-critical */ } + try { + const result = await runsStore.maybePrune(); + // If the prune actually removed files, the cached count dropped — + // re-patch so the dashboard never shows the stale pre-prune count. + if (result.deleted > 0) patchDashboardCount(runsStore.snapshot().count); + } catch { /* prune is best-effort */ } + })(); + }, 1500); + if (typeof (boot as any).unref === 'function') (boot as any).unref(); + return () => clearTimeout(boot); + >>> + // ── TTL pruner for completed agentProgress snapshots ── // After an agent-step-end event sets `completedAt`, leave the panel visible // for 5 seconds so the user sees the final state, then prune. Runs every 1s. @@ -849,7 +912,16 @@ screen name=App target=ink return appendTranscriptBlock(filtered, event, blockArchivePathRef.current); }); }, - clearBlocks: () => setOutputBlocks([]), + clearBlocks: () => { + // Single funnel for every true transcript reset (/clear, /clean, + // session reset all dispatch OutputEvent {type:'clear'} → clearBlocks). + // Bump the epoch HERE so the remount is tied to the cause, not + // inferred from the array shrinking (which a cap-spill also does). + setOutputBlocks([]); + setClearEpoch((epoch: number) => nextStaticEpoch(epoch, 'reset')); + setNativeArchiveCount(0); + clearBlockRowCache(); + }, setPendingPlanProposal: (val: OutputEvent | null) => setPendingPlanProposal(val), setReviewEvent, setQuestionState, @@ -2616,7 +2688,7 @@ ${streamCtx ? 'Recent live output from the running task:\n' + streamCtx + '\n\n' if (terminalMode === 'native') return ( <> - + {(block: any) => ( )} diff --git a/packages/cli/src/telemetry/index.ts b/packages/cli/src/telemetry/index.ts index 9ea0419b7..6d5c9aa5c 100644 --- a/packages/cli/src/telemetry/index.ts +++ b/packages/cli/src/telemetry/index.ts @@ -7,6 +7,7 @@ import { readFileSync, writeFileSync, mkdirSync } from 'node:fs'; import { join, resolve } from 'node:path'; import { homedir } from 'node:os'; import { tracker } from '@kernlang/agon-core'; +import { runsStore } from '../generated/signals/runs-store.js'; // ── Types ────────────────────────────────────────────────────────────── @@ -204,6 +205,12 @@ export function recordRun(result: OrchestrationResult): RunRecord { costEstimateUsd: stats?.totalCostUsd ?? undefined, }; defaultLedger.append(record); + // A run record was written — forge also writes a ${forgeId}.json into + // ~/.agon/runs around this point. Refresh the dashboard's cached runs + // snapshot (debounced; coalesces forge's incremental writes) so the count + // stays fresh without a render-path readdirSync. Best-effort: never let a + // refresh failure affect recording. + try { runsStore.scheduleRefresh(); } catch { /* non-critical */ } return record; } diff --git a/tests/unit/block-archive.test.ts b/tests/unit/block-archive.test.ts index 705683a64..817a2cad1 100644 --- a/tests/unit/block-archive.test.ts +++ b/tests/unit/block-archive.test.ts @@ -4,7 +4,7 @@ import { join } from 'node:path'; import { afterEach, beforeEach, describe, expect, it } from 'vitest'; -import { appendBlockWithCap, archiveBlocks } from '../../packages/cli/src/generated/signals/block-archive.js'; +import { appendBlockWithCap, archiveBlocks, nextStaticEpoch } from '../../packages/cli/src/generated/signals/block-archive.js'; const makeBlock = (id: number) => ({ id, event: { type: 'info', message: `msg-${id}` } as any }); @@ -59,3 +59,51 @@ describe('block-archive', () => { expect(archived[100].id).toBe(100); }); }); + +describe('nextStaticEpoch — remount epoch', () => { + it('a spill-shaped shrink does NOT change the epoch (no Static remount)', () => { + // A cap-spill front-slices outputBlocks (e.g. 500 → 400). Ink's is + // append-only, so the shrink renders nothing new and must NOT remount. + expect(nextStaticEpoch(7, 'spill')).toBe(7); + + // Drive a real spill through appendBlockWithCap and confirm the epoch stays put + // even though the array length shrank from 500 to 400. + const tmpDir = mkdtempSync(join(tmpdir(), 'agon-epoch-spill-')); + const spillPath = join(tmpDir, 'transcript.ndjson'); + try { + let state = Array.from({ length: 500 }, (_, i) => makeBlock(i)); + const before = state.length; + let epoch = 3; + state = appendBlockWithCap(state, makeBlock(500), spillPath); + expect(state.length).toBeLessThan(before); // 400 — the array shrank + epoch = nextStaticEpoch(epoch, 'spill'); + expect(epoch).toBe(3); // ...but the remount epoch is untouched + } finally { + rmSync(tmpDir, { recursive: true, force: true }); + } + }); + + it('an append never changes the epoch', () => { + expect(nextStaticEpoch(2, 'append')).toBe(2); + }); + + it('a /clear-shaped reset bumps the epoch exactly once', () => { + expect(nextStaticEpoch(0, 'reset')).toBe(1); + // Two distinct /clear resets advance it by exactly one each. + let epoch = 0; + epoch = nextStaticEpoch(epoch, 'reset'); + epoch = nextStaticEpoch(epoch, 'reset'); + expect(epoch).toBe(2); + }); + + it('a session reset (same clearBlocks funnel) bumps the epoch', () => { + // /clear, /clean, and session reset all dispatch OutputEvent {type:'clear'} → + // clearBlocks → cause 'reset'. A reset after appends/spills still bumps once. + let epoch = 5; + epoch = nextStaticEpoch(epoch, 'append'); + epoch = nextStaticEpoch(epoch, 'spill'); + expect(epoch).toBe(5); // appends + spills left it alone + epoch = nextStaticEpoch(epoch, 'reset'); + expect(epoch).toBe(6); // the session reset bumps it + }); +}); diff --git a/tests/unit/runs-store.test.ts b/tests/unit/runs-store.test.ts new file mode 100644 index 000000000..9a31e7922 --- /dev/null +++ b/tests/unit/runs-store.test.ts @@ -0,0 +1,268 @@ +import { describe, it, expect, beforeEach, afterEach } from 'vitest'; +import { mkdirSync, writeFileSync, utimesSync, existsSync, rmSync } from 'node:fs'; +import { join } from 'node:path'; + +import { cleanupTestAgonHome, setupTestAgonHome } from '../helpers/agon-home.js'; +import { + computeRunPruneTargets, + runsStore, + RUN_SOFT_CAP, + RUN_HARD_CAP, + RUN_KEEP_AGE_MS, + RUN_PROTECT_MIN_AGE_MS, + PRUNE_STAMP_NAME, +} from '../../packages/cli/src/generated/signals/runs-store.js'; + +const DAY = 24 * 60 * 60 * 1000; + +/** Create a runs dir + N run-record .json files with explicit ages (ms-old). */ +function seedRuns( + agonHome: string, + specs: Array<{ name: string; ageMs: number }>, +): string { + const runsDir = join(agonHome, 'runs'); + mkdirSync(runsDir, { recursive: true }); + const now = Date.now(); + for (const { name, ageMs } of specs) { + const fp = join(runsDir, name); + writeFileSync(fp, '{}'); + const ts = new Date(now - ageMs); + utimesSync(fp, ts, ts); + } + return runsDir; +} + +describe('runs-store — prune policy (pure)', () => { + const now = 1_000_000_000_000; // fixed clock + + function file(name: string, ageMs: number) { + return { name, mtimeMs: now - ageMs }; + } + + it('keeps everything inside the 7-day age window even past the soft cap', () => { + // SOFT_CAP+50 files, ALL younger than 7 days → nothing deletable for the + // soft cap (young files are kept regardless), and total < HARD_CAP. + const files = Array.from({ length: RUN_SOFT_CAP + 50 }, (_, i) => + file(`r-${i}.json`, (i + 1) * 1000), // 1s..(N)s old, all < 7d + ); + const targets = computeRunPruneTargets(files, now, []); + expect(targets).toEqual([]); + }); + + it('honors the soft cap by deleting oldest files older than 7 days', () => { + // 100 fresh (< 7d) + (SOFT_CAP) old (> 7d). Total = SOFT_CAP + 100, so + // soft overflow = 100, all removable from the OLD set, oldest first. + const fresh = Array.from({ length: 100 }, (_, i) => + file(`fresh-${i}.json`, (i + 1) * 1000), + ); + const old = Array.from({ length: RUN_SOFT_CAP }, (_, i) => + file(`old-${i}.json`, RUN_KEEP_AGE_MS + (i + 1) * DAY), // strictly > 7d, increasing age + ); + const files = [...fresh, ...old]; + const targets = computeRunPruneTargets(files, now, []); + + expect(targets).toHaveLength(100); + // All deleted are from the OLD set (never the fresh ones)... + expect(targets.every((n) => n.startsWith('old-'))).toBe(true); + // ...and oldest-first: old-1999 (oldest) comes before old-1900. + expect(targets[0]).toBe(`old-${RUN_SOFT_CAP - 1}.json`); + }); + + it('enforces the hard cap even when all files are inside the age window', () => { + // HARD_CAP + 25 files, all young (< 7d). Soft cap can't touch young files, + // but the HARD cap must still trim 25 oldest. + const files = Array.from({ length: RUN_HARD_CAP + 25 }, (_, i) => + file(`h-${i}.json`, (i + 1) * 1000), // h-0 newest ... h-N oldest + ); + const targets = computeRunPruneTargets(files, now, []); + expect(targets).toHaveLength(25); + // The oldest 25 (largest age) are h-(HARD_CAP+24) .. h-HARD_CAP. + expect(targets).toContain(`h-${RUN_HARD_CAP + 24}.json`); + expect(targets).not.toContain('h-0.json'); + }); + + it('never deletes files younger than the protect window (mtime < 60s)', () => { + // Way over hard cap, but EVERY file is younger than 60s → nothing deletable. + const files = Array.from({ length: RUN_HARD_CAP + 100 }, (_, i) => + file(`young-${i}.json`, Math.floor(RUN_PROTECT_MIN_AGE_MS / 2)), // ~30s old + ); + const targets = computeRunPruneTargets(files, now, []); + expect(targets).toEqual([]); + }); + + it('never deletes the active session run files', () => { + // Over the hard cap, all old — but the active id is protected. + const files = Array.from({ length: RUN_HARD_CAP + 10 }, (_, i) => + file(`run-${i}.json`, RUN_KEEP_AGE_MS + (i + 1) * DAY), + ); + // Mark the OLDEST 5 as active (they'd otherwise be deleted first). + const activeIds = [ + `run-${RUN_HARD_CAP + 9}`, + `run-${RUN_HARD_CAP + 8}`, + `run-${RUN_HARD_CAP + 7}`, + ]; + const targets = computeRunPruneTargets(files, now, activeIds); + for (const id of activeIds) { + // `${id}.json` is the active file and must survive. + expect(targets).not.toContain(`${id}.json`); + } + }); + + it('protects active runs by EXACT id match, not substring', () => { + // The active id is "run-1". With the old substring rule, "run-1" would also + // shield "run-10.json", "run-123.json", "xrun-1.json" — over-protection. + // Exact match must protect ONLY "run-1.json" (and a delimiter-suffixed + // "run-1-*.json"); the substring-only files stay eligible for deletion. + // + // Make the five interesting files the OLDEST in the set (so they are the + // first the oldest-first soft cap removes UNLESS protected), then pad with + // NEWER old files to push the total past the soft cap so a prune actually + // happens. + const interesting = [ + file('run-1.json', RUN_KEEP_AGE_MS + 1000 * DAY), // exact → protected (oldest) + file('run-1-meta.json', RUN_KEEP_AGE_MS + 999 * DAY), // delimiter suffix → protected + file('run-10.json', RUN_KEEP_AGE_MS + 998 * DAY), // substring-only → deletable + file('run-123.json', RUN_KEEP_AGE_MS + 997 * DAY), // substring-only → deletable + file('xrun-1.json', RUN_KEEP_AGE_MS + 996 * DAY), // id mid-name → deletable + ]; + // NEWER old padding (all < 500 days past the window, i.e. younger than the + // 996–1000-day interesting files) so total > SOFT_CAP and the oldest-first + // soft cap reaches the interesting files first. + const padding = Array.from({ length: RUN_SOFT_CAP }, (_, i) => + file(`pad-${i}.json`, RUN_KEEP_AGE_MS + ((i % 400) + 1) * DAY), + ); + const targets = computeRunPruneTargets([...interesting, ...padding], now, ['run-1']); + + // Exact + delimiter-suffix matches are protected (never deleted). + expect(targets).not.toContain('run-1.json'); + expect(targets).not.toContain('run-1-meta.json'); + // Pure substring matches are NOT protected — being the oldest deletable, they + // are among the soft-cap removals. + expect(targets).toContain('run-10.json'); + expect(targets).toContain('run-123.json'); + expect(targets).toContain('xrun-1.json'); + }); +}); + +describe('runs-store — maybePrune (fs)', () => { + let testHome = ''; + + beforeEach(() => { + testHome = setupTestAgonHome('runs-store-prune'); + }); + + afterEach(() => { + cleanupTestAgonHome(testHome); + rmSync(testHome, { recursive: true, force: true }); + }); + + it('deletes old overflow files and writes a stamp', async () => { + // SOFT_CAP + 5 OLD (> 7d) files → the soft cap keeps the newest SOFT_CAP, + // deleting the 5 oldest. + const specs = Array.from({ length: RUN_SOFT_CAP + 5 }, (_, i) => ({ + name: `run-${String(i).padStart(5, '0')}.json`, + ageMs: RUN_KEEP_AGE_MS + (i + 1) * DAY, // run-00000 newest of the old, increasing age + })); + const runsDir = seedRuns(testHome, specs); + + const result = await runsStore.maybePrune({ force: true }); + + expect(result.skipped).toBe(false); + expect(result.deleted).toBe(5); + // The 5 oldest (highest index) are gone; the newest survive. + expect(existsSync(join(runsDir, `run-${String(RUN_SOFT_CAP + 4).padStart(5, '0')}.json`))).toBe(false); + expect(existsSync(join(runsDir, 'run-00000.json'))).toBe(true); + // A stamp was written. + expect(existsSync(join(runsDir, PRUNE_STAMP_NAME))).toBe(true); + // The cached snapshot reflects the post-prune count. + expect(runsStore.snapshot().count).toBe(RUN_SOFT_CAP); + }); + + it('skips a second prune within the 1h cooldown (stamp gate)', async () => { + const specs = Array.from({ length: RUN_SOFT_CAP + 5 }, (_, i) => ({ + name: `c-${String(i).padStart(5, '0')}.json`, + ageMs: RUN_KEEP_AGE_MS + (i + 1) * DAY, + })); + seedRuns(testHome, specs); + + const first = await runsStore.maybePrune({ force: true }); + expect(first.deleted).toBe(5); + + // A non-forced prune immediately after must be skipped by the cooldown. + const second = await runsStore.maybePrune(); + expect(second.skipped).toBe(true); + expect(second.reason).toBe('cooldown'); + expect(second.deleted).toBe(0); + }); + + it('is a no-op (within-policy) when nothing exceeds the caps', async () => { + seedRuns(testHome, [ + { name: 'a.json', ageMs: 2 * DAY }, + { name: 'b.json', ageMs: 9 * DAY }, // old but well under the soft cap + ]); + const result = await runsStore.maybePrune({ force: true }); + expect(result.deleted).toBe(0); + expect(result.skipped).toBe(true); + expect(result.reason).toBe('within-policy'); + }); + + it('serializes overlapping in-process prunes — only one runs', async () => { + // SOFT_CAP + 5 OLD files so a real prune (deleted=5) is on the table. + const specs = Array.from({ length: RUN_SOFT_CAP + 5 }, (_, i) => ({ + name: `g-${String(i).padStart(5, '0')}.json`, + ageMs: RUN_KEEP_AGE_MS + (i + 1) * DAY, + })); + seedRuns(testHome, specs); + + // Fire two forced prunes WITHOUT awaiting the first — the in-process guard + // (set at method entry, before the await on the async fs scan) must make + // exactly one do the work and the other bail with 'in-progress'. + const [a, b] = await Promise.all([ + runsStore.maybePrune({ force: true }), + runsStore.maybePrune({ force: true }), + ]); + + const reasons = [a.reason, b.reason].sort(); + expect(reasons).toEqual(['in-progress', 'pruned']); + const worker = a.reason === 'pruned' ? a : b; + const blocked = a.reason === 'pruned' ? b : a; + expect(worker.deleted).toBe(5); + expect(worker.skipped).toBe(false); + expect(blocked.deleted).toBe(0); + expect(blocked.skipped).toBe(true); + }); +}); + +describe('runs-store — snapshot accessor is fs-free', () => { + let testHome = ''; + + beforeEach(() => { + testHome = setupTestAgonHome('runs-store-snapshot'); + }); + + afterEach(() => { + cleanupTestAgonHome(testHome); + rmSync(testHome, { recursive: true, force: true }); + }); + + it('returns the cached count without touching the fs after hydrate', async () => { + const runsDir = seedRuns(testHome, [ + { name: 'one.json', ageMs: DAY }, + { name: 'two.json', ageMs: DAY }, + { name: 'three.json', ageMs: DAY }, + ]); + + const snap = await runsStore.hydrate(); + expect(snap.count).toBe(3); + expect(snap.hydratedAt).toBeGreaterThan(0); + + // Remove the entire runs dir — snapshot() must still return the cached + // value without throwing or re-scanning. + rmSync(runsDir, { recursive: true, force: true }); + expect(existsSync(runsDir)).toBe(false); + + const cached = runsStore.snapshot(); + expect(cached.count).toBe(3); + expect(runsStore.runCount()).toBe(3); + }); +}); diff --git a/tests/unit/transcript-row-cache.test.ts b/tests/unit/transcript-row-cache.test.ts new file mode 100644 index 000000000..6e282acb5 --- /dev/null +++ b/tests/unit/transcript-row-cache.test.ts @@ -0,0 +1,330 @@ +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; + +import { + buildTranscriptRows, + nativeArchiveBlockCount, +} from '../../packages/cli/src/generated/surfaces/app.js'; +import { + _blockRowCache, + blockRowFingerprint, + cachedBlockOwnRows, + clearBlockRowCache, + isCacheableBlockType, + renderBlockOwnRows, +} from '../../packages/cli/src/generated/surfaces/app-rendering.js'; +import { withContentWidthOverride } from '../../packages/cli/src/generated/blocks/rendering.js'; + +// Resolved content widths used by buildTranscriptRows (contentWidth(4/2/8)) at +// a fixed 100-col terminal. Passing them explicitly keeps the cache key stable +// regardless of the test runner's real TTY width. +const W100 = { prose: 96, chat: 98, engine: 92 } as const; + +const call = ( + block: any, + mode = 'chat', + toolExpanded = false, + thinkingExpanded = true, + widths = W100, +) => + cachedBlockOwnRows( + block, + mode, + toolExpanded, + thinkingExpanded, + widths.prose, + widths.chat, + widths.engine, + ); + +const engineBlock = (id: number, content: string) => ({ + id, + event: { type: 'engine-block', engineId: 'cesar', color: 124, content }, +}); + +beforeEach(() => { + _blockRowCache.clear(); +}); + +afterEach(() => { + _blockRowCache.clear(); +}); + +describe('per-block transcript row cache', () => { + it('serves a cache hit on a repeated call: equal rows, same inner row objects, defensive outer copy', () => { + const block = engineBlock(1, 'hello **world**'); + const first = call(block); + const second = call(block); + // The outer array is a defensive shallow copy (callers can never mutate the + // cached array), but the row objects inside are the cached ones — identity + // of the inner rows proves the hit skipped renderBlockOwnRows. + expect(second).not.toBe(first); + expect(second).toEqual(first); + for (let i = 0; i < first.length; i++) { + expect(second[i]).toBe(first[i]); + } + expect(_blockRowCache.size).toBe(1); + }); + + it('appending one block recomputes ONLY the new block; existing blocks stay cached', () => { + const blocks = Array.from({ length: 20 }, (_, i) => engineBlock(i + 1, `block ${i + 1} body`)); + // Warm the cache for all 20. + const warm = blocks.map((b) => call(b)); + expect(_blockRowCache.size).toBe(20); + + // Append one new block; re-render the whole list as buildTranscriptRows would. + const newBlock = engineBlock(21, 'freshly appended'); + const all = [...blocks, newBlock]; + const second = all.map((b) => call(b)); + + // The 20 original blocks must all be cache hits: inner row objects are the + // cached ones (outer arrays are defensive copies). + for (let i = 0; i < 20; i++) { + expect(second[i]).toEqual(warm[i]); + for (let j = 0; j < warm[i].length; j++) { + expect(second[i][j]).toBe(warm[i][j]); + } + } + // Only one new entry was added. + expect(_blockRowCache.size).toBe(21); + }); + + it('a cache hit cannot be corrupted by mutating the returned array', () => { + const block = engineBlock(1, 'immutable cache'); + const first = call(block); + const pristine = JSON.parse(JSON.stringify(first)); + first.push({ corrupted: true }); + first.splice(0, 1); + expect(call(block)).toEqual(pristine); + }); + + it('clearBlockRowCache empties the cache (wired to the /clear reset funnel)', () => { + call(engineBlock(1, 'a')); + call(engineBlock(2, 'b')); + expect(_blockRowCache.size).toBe(2); + clearBlockRowCache(); + expect(_blockRowCache.size).toBe(0); + }); + + it('a hit refreshes recency: hot entries survive the LRU eviction sweep', () => { + // Fill to one below the cap, with block 1 inserted FIRST. + const hot = engineBlock(1, 'hot block'); + call(hot); + for (let i = 2; i <= 1499; i++) call(engineBlock(i, `filler ${i}`)); + // Touch the oldest entry — the hit re-inserts it at the back. + call(hot); + // Overflow the cap: the sweep evicts the least-recently-used 20%. + for (let i = 1500; i <= 1600; i++) call(engineBlock(i, `overflow ${i}`)); + // The hot block survived; re-calling it is still a hit (size unchanged). + const sizeBefore = _blockRowCache.size; + call(hot); + expect(_blockRowCache.size).toBe(sizeBefore); + }); + + it('fingerprint includes color/critique/position — changing them busts the cache', () => { + const a = call({ id: 1, event: { type: 'engine-block', engineId: 'cesar', color: 124, content: 'same' } }); + const b = call({ id: 1, event: { type: 'engine-block', engineId: 'cesar', color: 33, content: 'same' } }); + expect(_blockRowCache.size).toBe(2); + expect(b).not.toEqual(a); + _blockRowCache.clear(); + call({ id: 2, event: { type: 'kern-draft', engineId: 'x', content: 'draft', critique: 'weak' } }); + call({ id: 2, event: { type: 'kern-draft', engineId: 'x', content: 'draft', critique: 'strong' } }); + expect(_blockRowCache.size).toBe(2); + _blockRowCache.clear(); + call({ id: 3, event: { type: 'debate-round', engineId: 'x', argument: 'arg', position: 'for' } }); + call({ id: 3, event: { type: 'debate-round', engineId: 'x', argument: 'arg', position: 'against' } }); + expect(_blockRowCache.size).toBe(2); + }); + + it('same head and length but different tail does not collide', () => { + const head = 'x'.repeat(200); + const a = call({ id: 1, event: { type: 'text', content: `${head}AAAA` } }); + const b = call({ id: 1, event: { type: 'text', content: `${head}BBBB` } }); + expect(_blockRowCache.size).toBe(2); + expect(b).not.toEqual(a); + }); + + it('misses the cache when mode changes', () => { + const block = engineBlock(1, 'mode-sensitive'); + const chat = call(block, 'chat'); + const tribunal = call(block, 'tribunal'); + expect(tribunal).not.toBe(chat); + expect(_blockRowCache.size).toBe(2); + }); + + it('misses the cache when toolOutputExpanded / thinkingExpanded change', () => { + const block = { id: 7, event: { type: 'thinking-chunk', chunk: 'reason line a\nreason line b' } }; + const a = call(block, 'chat', false, true); + const b = call(block, 'chat', false, false); + expect(b).not.toBe(a); + }); + + it('misses the cache when terminal width changes (widths are in the key)', () => { + const block = engineBlock(1, 'wrap me at different widths'); + const wide = call(block, 'chat', false, true, { prose: 96, chat: 98, engine: 92 }); + const narrow = call(block, 'chat', false, true, { prose: 36, chat: 38, engine: 32 }); + expect(narrow).not.toBe(wide); + }); + + it('mutation fingerprint busts the cache when a block event field changes in place', () => { + const block: any = engineBlock(1, 'original content'); + const before = call(block); + // Simulate an in-place mutation of the same block object/id. + block.event.content = 'totally different content now'; + const after = call(block); + expect(after).not.toBe(before); + // And the command field busts it too (permission-ask renders its diff from + // event.command, which is folded into the fingerprint). + const perm = { id: 2, event: { type: 'permission-ask', tool: 'Edit', command: JSON.stringify({ file_path: 'a.ts', old_string: 'x', new_string: 'y' }) } } as any; + const r1 = call(perm); + perm.event.command = JSON.stringify({ file_path: 'a.ts', old_string: 'x', new_string: 'COMPLETELY DIFFERENT' }); + const r2 = call(perm); + expect(r2).not.toBe(r1); + }); + + it('excludes neighbor-dependent and impure block types from caching', () => { + expect(isCacheableBlockType('tool-call')).toBe(false); + expect(isCacheableBlockType('tool-call-group')).toBe(false); + expect(isCacheableBlockType('file-changes')).toBe(false); + expect(isCacheableBlockType('engine-block')).toBe(true); + expect(isCacheableBlockType('user-message')).toBe(true); + expect(isCacheableBlockType('info')).toBe(true); + }); + + it('does not cache tool-call blocks even when routed through cachedBlockOwnRows', () => { + const block = { + id: 9, + event: { type: 'tool-call', engineId: 'cesar', tool: 'Read', input: '{"file_path":"a.ts"}', status: 'done', output: 'l1' }, + } as any; + call(block); + expect(_blockRowCache.size).toBe(0); // tool-call never stored + }); + + it('keeps the fingerprint distinct across content, status, engine, mode, and width', () => { + const base = { type: 'engine-block', engineId: 'a', content: 'x', status: 'done' } as any; + const fp = (e: any, mode = 'chat', te = false, the = true, p = 96, c = 98, en = 92) => + blockRowFingerprint(e, mode, te, the, p, c, en); + const f0 = fp(base); + expect(fp({ ...base, content: 'y' })).not.toBe(f0); + expect(fp({ ...base, status: 'running' })).not.toBe(f0); + expect(fp({ ...base, engineId: 'b' })).not.toBe(f0); + expect(fp(base, 'tribunal')).not.toBe(f0); + expect(fp(base, 'chat', false, true, 40)).not.toBe(f0); + expect(fp(base)).toBe(f0); // deterministic + }); + + it('bounds the cache via LRU-evict-oldest-20% on overflow', () => { + // Fill well past the 1500 cap with unique blocks. + for (let i = 0; i < 2000; i++) { + call(engineBlock(100000 + i, `unique body ${i}`)); + } + expect(_blockRowCache.size).toBeLessThanOrEqual(1500); + expect(_blockRowCache.size).toBeGreaterThan(0); + }); +}); + +describe('cache correctness vs cold rebuild (property-style)', () => { + // The cached buildTranscriptRows output must be byte-for-byte identical to a + // cold (cache-cleared) rebuild — including tool groups, the strongest proof. + const buildAt100 = (blocks: any[], mode: string, toolExpanded: boolean) => + withContentWidthOverride(100, () => buildTranscriptRows(blocks, mode, toolExpanded, true)); + + const transcripts: { name: string; blocks: any[]; mode: string; toolExpanded: boolean }[] = [ + { + name: 'mixed prose + user + info', + mode: 'chat', + toolExpanded: false, + blocks: [ + { id: 1, event: { type: 'user-message', content: 'explain the flow' } }, + engineBlock(2, '# Heading\n\nSome **prose** with `code`.\n\n```ts\nconst x = 1;\n```'), + { id: 3, event: { type: 'info', message: 'just so you know' } }, + engineBlock(4, 'second answer body'), + ], + }, + { + name: 'collapsed adjacent tool-call group', + mode: 'chat', + toolExpanded: false, + blocks: [ + { id: 1, event: { type: 'user-message', content: 'do work' } }, + { id: 2, event: { type: 'tool-call', engineId: 'cesar', tool: 'Read', input: '{"file_path":"a.ts"}', status: 'done', output: 'l1\nl2' } }, + { id: 3, event: { type: 'tool-call', engineId: 'cesar', tool: 'Grep', input: '{"pattern":"x"}', status: 'done', output: 'a.ts:x' } }, + { id: 4, event: { type: 'tool-call', engineId: 'cesar', tool: 'Bash', input: '{"command":"ls"}', status: 'done', output: 'a\nb' } }, + engineBlock(5, 'wrap-up answer'), + ], + }, + { + name: 'tool-call-group block coalescing with following tool-call', + mode: 'chat', + toolExpanded: false, + blocks: [ + { + id: 1, + event: { + type: 'tool-call-group', + blocks: [ + { id: 11, event: { type: 'tool-call', engineId: 'cesar', tool: 'Read', input: '{"file_path":"a.ts"}', status: 'done', output: 'x' } }, + { id: 12, event: { type: 'tool-call', engineId: 'cesar', tool: 'Edit', input: '{"file_path":"a.ts","old_string":"a","new_string":"b"}', status: 'done' } }, + ], + }, + }, + { id: 2, event: { type: 'tool-call', engineId: 'cesar', tool: 'Bash', input: '{"command":"npm test"}', status: 'done', output: 'ok' } }, + engineBlock(3, 'all green'), + ], + }, + { + name: 'expanded tool group', + mode: 'chat', + toolExpanded: true, + blocks: [ + { id: 1, event: { type: 'tool-call', engineId: 'cesar', tool: 'Read', input: '{"file_path":"a.ts"}', status: 'done', output: 'line 1\nline 2\nline 3' } }, + { id: 2, event: { type: 'tool-call', engineId: 'cesar', tool: 'Search', input: '{"pattern":"foo"}', status: 'done', output: 'a.ts:foo' } }, + engineBlock(3, 'done'), + ], + }, + ]; + + for (const t of transcripts) { + it(`cached output deep-equals a cold rebuild — ${t.name}`, () => { + _blockRowCache.clear(); + const cold = buildAt100(t.blocks, t.mode, t.toolExpanded); + // Second pass runs against a warm cache. + const warm = buildAt100(t.blocks, t.mode, t.toolExpanded); + expect(warm).toEqual(cold); + // And a fully cold rebuild after clearing must also match. + _blockRowCache.clear(); + const coldAgain = buildAt100(t.blocks, t.mode, t.toolExpanded); + expect(coldAgain).toEqual(cold); + }); + } + + it('append-one transcript produces the same rows as a from-scratch build', () => { + _blockRowCache.clear(); + const base = [ + { id: 1, event: { type: 'user-message', content: 'q1' } }, + engineBlock(2, 'a1 body'), + ]; + buildAt100(base, 'chat', false); // warm + + const extended = [...base, { id: 3, event: { type: 'user-message', content: 'q2' } }, engineBlock(4, 'a2 body')]; + const incremental = buildAt100(extended, 'chat', false); + + _blockRowCache.clear(); + const fromScratch = buildAt100(extended, 'chat', false); + expect(incremental).toEqual(fromScratch); + }); +}); + +describe('maxLiveBlocks tightened to 40', () => { + it('seals into Static once more than 40 cheap blocks accumulate (live tail capped at 40)', () => { + // 60 one-row separators, generous row budget so the cap (not rows) is the + // binding constraint. With maxLiveBlocks=40, the live tail is exactly 40, + // so the archive count is 60 - 40 = 20. + const blocks = Array.from({ length: 60 }, (_, i) => ({ id: i + 1, event: { type: 'separator' } })) as any; + expect(nativeArchiveBlockCount(blocks, 'chat', 10_000, false, true)).toBe(20); + }); + + it('does not archive when there are 40 or fewer blocks and the budget is generous', () => { + const blocks = Array.from({ length: 40 }, (_, i) => ({ id: i + 1, event: { type: 'separator' } })) as any; + expect(nativeArchiveBlockCount(blocks, 'chat', 10_000, false, true)).toBe(0); + }); +});