From ca2e872c5840d9c01a7673f163361f7309a69fc4 Mon Sep 17 00:00:00 2001
From: nicolascukas <cukasn@gmail.com>
Date: Mon, 8 Jun 2026 10:42:14 +0200
Subject: [PATCH 1/4] fix(cli): ungag agy as acting-Cesar + fix code-block
 render wrapping
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Two unrelated Cesar/TUI bugs surfaced from a real session:

1. agy could not use tools as acting-Cesar. When the configured Cesar
   returns empty, agon falls back to acting-Cesar dispatched in 'exec'
   mode. The adapter injects an agy-specific OUTPUT RULES block
   (engine.id === 'agy' && mode !== 'agent') that forbids file edits /
   tool use and forces a single-pass text answer — so agy refused every
   tool and confabulated a "system harness" excuse. Acting-Cesar (and the
   brain recovery path) are agentic leading roles, so dispatch them in
   'agent' mode when the engine supports it (agy's agent and exec modes
   are the same agentic CLI), falling back to 'exec' otherwise. Matches
   the injection's own intent ("agent mode is left agentic on purpose").

2. CodeBlockView rendered every line wrapped with stray border pipes on
   blank rows. The width math was inconsistent: the box was sized to the
   longest line while code rows padded to the terminal width, so each row
   overflowed the box and Ink wrapped it. Rewrote all rows (border,
   header, code, overflow) around one coherent inner `body` width so each
   row is exactly rowWidth; over-long lines truncate instead of wrapping.

Compile + build clean; 221/221 cesar/adapter/render tests pass.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 .../src/generated/blocks/rendering.entry.tsx  |  4 +-
 .../cli/src/generated/blocks/rendering.tsx    | 38 ++++++++++++-------
 packages/cli/src/generated/cesar/brain.ts     |  8 +++-
 .../signals/dispatch/cesar-router.ts          | 10 ++++-
 packages/cli/src/kern/blocks/rendering.kern   | 26 +++++++++----
 packages/cli/src/kern/cesar/brain.kern        |  8 +++-
 .../kern/signals/dispatch/cesar-router.kern   |  8 +++-
 7 files changed, 73 insertions(+), 29 deletions(-)
diff --git a/packages/cli/src/generated/blocks/rendering.entry.tsx b/packages/cli/src/generated/blocks/rendering.entry.tsx
index 9f5d5889..087f99e4 100644
--- a/packages/cli/src/generated/blocks/rendering.entry.tsx
+++ b/packages/cli/src/generated/blocks/rendering.entry.tsx
@@ -1,7 +1,7 @@
 #!/usr/bin/env node
-// @generated by kern v3.5.7 — DO NOT EDIT. Source: src/kern/blocks/rendering.kern
+// @generated by kern v3.5.8 — DO NOT EDIT. Source: src/kern/blocks/rendering.kern
 
-// @kern-source: rendering:428
+// @kern-source: rendering:438
 
 import React from 'react';
 import { render } from 'ink';
diff --git a/packages/cli/src/generated/blocks/rendering.tsx b/packages/cli/src/generated/blocks/rendering.tsx
index 1c176d6f..cf026dd9 100644
--- a/packages/cli/src/generated/blocks/rendering.tsx
+++ b/packages/cli/src/generated/blocks/rendering.tsx
@@ -71,11 +71,20 @@ export function CodeBlockView({ segment, borderColor }: { segment:ContentSegment
   const bc = borderColor || '#585858';
   const maxLineLen = capped.reduce((m: number, l: string) => Math.max(m, l.length), 0);
   const headerLen = (segment.language || 'code').length + (segment.index !== undefined ? ` [${segment.index}]`.length : 0);
-  const innerWidth = Math.max(maxLineLen, headerLen);
-  const boxWidth = innerWidth + 4;
-  const rule = '\u2500'.repeat(boxWidth);
+  // Single coherent inner-content width for every row. Clamp to the terminal
+  // budget so a long line truncates instead of forcing the box wider than the
+  // screen. Every row is built as: `\u2502  \u258c <content padded to body>  \u2502`
+  //   left frame  `\u2502  \u258c ` = 5 cols, right frame `  \u2502` = 3 cols \u2192 rowWidth = body + 8.
+  //   border row  `\u2502 ` + rule + ` \u2502` = rule.length + 4, so rule = body + 4.
+  // Previously the box was sized to body+2 while rows rendered at body+4..body+11,
+  // so every row overflowed and Ink wrapped it \u2014 dropping the trailing `\u2502` onto a
+  // blank row (the stray pipes + huge gaps between lines).
+  const body = Math.min(Math.max(maxLineLen, headerLen), codeWidth);
+  const rowWidth = body + 8;
+  const rule = '\u2500'.repeat(body + 4);
+  const overflowLabel = `\u2026 ${overflow} more lines`;
   return (
-    <Box flexDirection="column" width={boxWidth + 2} flexShrink={0}>
+    <Box flexDirection="column" width={rowWidth} flexShrink={0}>
       <Text color={bc}>{'\u2502 '}{rule}{' \u2502'}</Text>
       <Text>
         <Text color={bc}>{'\u2502  '}</Text>
@@ -83,7 +92,7 @@ export function CodeBlockView({ segment, borderColor }: { segment:ContentSegment
         <Text> </Text>
         <Text dimColor>{segment.language || 'code'}</Text>
         {segment.index !== undefined && <Text color="#585858">{` [${segment.index}]`}</Text>}
-        <Text>{' '.repeat(Math.max(0, boxWidth - headerLen - 1))}</Text>
+        <Text>{' '.repeat(Math.max(0, body - headerLen))}</Text>
         <Text color={bc}>{'  \u2502'}</Text>
       </Text>
       {capped.map((line: string, i: number) => (
@@ -91,8 +100,8 @@ export function CodeBlockView({ segment, borderColor }: { segment:ContentSegment
           <Text color={bc}>{'\u2502  '}</Text>
           <Text color={CODE_RAIL_COLOR}>{CODE_RAIL}</Text>
           <Text> </Text>
-          {isDiff ? <DiffLine line={line} maxWidth={codeWidth} /> : <SyntaxLine line={line} maxWidth={codeWidth} />}
-          <Text>{' '.repeat(Math.max(0, codeWidth - line.length - 4))}</Text>
+          {isDiff ? <DiffLine line={line} maxWidth={body} /> : <SyntaxLine line={line} maxWidth={body} />}
+          <Text>{' '.repeat(Math.max(0, body - line.length))}</Text>
           <Text color={bc}>{'  \u2502'}</Text>
         </Text>
       ))}
@@ -101,7 +110,8 @@ export function CodeBlockView({ segment, borderColor }: { segment:ContentSegment
           <Text color={bc}>{'\u2502  '}</Text>
           <Text color={CODE_RAIL_COLOR}>{CODE_RAIL}</Text>
           <Text> </Text>
-          <Text dimColor>{'\u2026 '}{overflow}{' more lines'}</Text>
+          <Text dimColor>{overflowLabel}</Text>
+          <Text>{' '.repeat(Math.max(0, body - overflowLabel.length))}</Text>
           <Text color={bc}>{'  \u2502'}</Text>
         </Text>
       )}
@@ -110,7 +120,7 @@ export function CodeBlockView({ segment, borderColor }: { segment:ContentSegment
   );
 }
 
-// @kern-source: rendering:254
+// @kern-source: rendering:264
 export function RichSpanView({ span }: { span:InlineSpan }) {
   if (span.style.code) {
     return <Text color="#a78bfa" backgroundColor="#1e1033">{span.text}</Text>;
@@ -127,7 +137,7 @@ export function RichSpanView({ span }: { span:InlineSpan }) {
   return el;
 }
 
-// @kern-source: rendering:275
+// @kern-source: rendering:285
 export function RichLineView({ line, borderColor }: { line:RichLine; borderColor?:string }) {
   const border = borderColor ? <Text color={borderColor}>{'\u2502 '}</Text> : null;
   const indent = line.indent > 0 ? '  '.repeat(line.indent) : '';
@@ -149,7 +159,7 @@ export function RichLineView({ line, borderColor }: { line:RichLine; borderColor
   return <Text>{border}{indent}{listIndent}{marker}{line.spans.map((s: InlineSpan, i: number) => <RichSpanView key={i} span={s} />)}</Text>;
 }
 
-// @kern-source: rendering:302
+// @kern-source: rendering:312
 export function MarkdownTableView({ headers, rows, alignments, borderColor }: { headers:string[]; rows:string[][]; alignments:('left' | 'center' | 'right')[]; borderColor:string }) {
   const colWidths = headers.map((h: string, i: number) => {
     let max = h.length;
@@ -185,7 +195,7 @@ export function MarkdownTableView({ headers, rows, alignments, borderColor }: {
   );
 }
 
-// @kern-source: rendering:345
+// @kern-source: rendering:355
 export function RenderedSegments({ segments, borderColor, wrapWidth }: { segments:ContentSegment[]; borderColor:string; wrapWidth:number }) {
   return (
     <>
@@ -246,7 +256,7 @@ export function RenderedSegments({ segments, borderColor, wrapWidth }: { segment
   );
 }
 
-// @kern-source: rendering:412
+// @kern-source: rendering:422
 export function GradientLine({ text, colors }: { text:string; colors:readonly string[] }) {
   const step = Math.max(1, Math.ceil(text.length / colors.length));
   return (
@@ -259,7 +269,7 @@ export function GradientLine({ text, colors }: { text:string; colors:readonly st
   );
 }
 
-// @kern-source: rendering:428
+// @kern-source: rendering:438
 export function AnsiLine({ text, maxWidth, fallbackDim }: { text:string; maxWidth:number; fallbackDim?:boolean }) {
   if (!hasAnsiCodes(text)) {
     const display = text.length > maxWidth ? text.slice(0, maxWidth - 4) + '\u2026' : text;
diff --git a/packages/cli/src/generated/cesar/brain.ts b/packages/cli/src/generated/cesar/brain.ts
index 85de85f6..d105a08e 100644
--- a/packages/cli/src/generated/cesar/brain.ts
+++ b/packages/cli/src/generated/cesar/brain.ts
@@ -319,8 +319,14 @@ export async function handleCesarBrain(input: string, dispatch: Dispatch, ctx: H
             const outputDir = join(RUNS_DIR, `cesar-fallback-${Date.now()}`);
             mkdirSync(outputDir, { recursive: true });
             const primedPrompt = buildHistoryPrimedPrompt(ctx.chatSession, input);
+            // Cesar is an agentic leading role, so dispatch in 'agent' mode when the
+            // engine supports it. 'exec' triggers agy's OUTPUT-RULES gag (adapter-helpers:
+            // engine.id === 'agy' && mode !== 'agent') which forbids file edits / tool use
+            // and forces a single-pass text answer — exactly why agy could not run tools
+            // as Cesar. Fall back to 'exec' for engines without an agent mode (no regression).
+            const fallbackMode = ((engine as any)?.agent ? 'agent' : 'exec') as any;
             const freshResult = await ctx.adapter.dispatch({
-              engine, prompt: primedPrompt, cwd: resolveWorkingDir(), mode: 'exec' as any,
+              engine, prompt: primedPrompt, cwd: resolveWorkingDir(), mode: fallbackMode,
               timeout: config.timeout ?? 120, outputDir, signal: abort.signal, systemPrompt: buildCesarSystemPrompt(ctx),
             });
             dispatch({ type: 'spinner-stop' });
diff --git a/packages/cli/src/generated/signals/dispatch/cesar-router.ts b/packages/cli/src/generated/signals/dispatch/cesar-router.ts
index 16e6b299..a13ff8ca 100644
--- a/packages/cli/src/generated/signals/dispatch/cesar-router.ts
+++ b/packages/cli/src/generated/signals/dispatch/cesar-router.ts
@@ -948,11 +948,17 @@ export async function runCesarBrainFallback(input: string, cb: DispatchCallbacks
     const outDir = join(RUNS_DIR, `acting-cesar-${Date.now()}`);
     mkdirSync(outDir, { recursive: true });
     if (!_silentMode) cb.dispatch({ type: 'info', message: formatCesarRecoveryStatus('acting', actingCesar, `log: ${outDir}`) });
+    // Acting-Cesar leads and may need tools, so dispatch in 'agent' mode when the
+    // substitute supports it. 'exec' triggers agy's OUTPUT-RULES gag (adapter-helpers:
+    // engine.id === 'agy' && mode !== 'agent') which forbids file edits / tool use and
+    // forces single-pass text — the reason agy refused to run tools as acting Cesar.
+    // Fall back to 'exec' for engines without an agent mode (no regression).
+    const actingMode = ((actingEngine as any)?.agent ? 'agent' : 'exec') as any;
     const actingResult = await cb.ctx.adapter.dispatch({
       engine: actingEngine,
       prompt: actingPrompt,
       cwd: resolveWorkingDir(),
-      mode: 'exec' as any,
+      mode: actingMode,
       timeout: (cesarConfig as any).timeout ?? 120,
       outputDir: outDir,
       systemPrompt: buildCesarSystemPrompt(cb.ctx),
@@ -988,7 +994,7 @@ export async function runCesarBrainFallback(input: string, cb: DispatchCallbacks
 /**
  * Unified Cesar brain routing. Returns true if a background job was dispatched.
  */
-// @kern-source: cesar-router:929
+// @kern-source: cesar-router:935
 export async function routeWithCesar(input: string, images: ImageAttachment[], cb: DispatchCallbacks): Promise<boolean> {
   cb.setPendingImages(() => []);
   // Hoisted out of the try so the fallback ladder below can see whether the
diff --git a/packages/cli/src/kern/blocks/rendering.kern b/packages/cli/src/kern/blocks/rendering.kern
index c5cac083..fdc1d21d 100644
--- a/packages/cli/src/kern/blocks/rendering.kern
+++ b/packages/cli/src/kern/blocks/rendering.kern
@@ -210,11 +210,20 @@ screen name=CodeBlockView target=ink
       const bc = borderColor || '#585858';
       const maxLineLen = capped.reduce((m: number, l: string) => Math.max(m, l.length), 0);
       const headerLen = (segment.language || 'code').length + (segment.index !== undefined ? ` [${segment.index}]`.length : 0);
-      const innerWidth = Math.max(maxLineLen, headerLen);
-      const boxWidth = innerWidth + 4;
-      const rule = '\u2500'.repeat(boxWidth);
+      // Single coherent inner-content width for every row. Clamp to the terminal
+      // budget so a long line truncates instead of forcing the box wider than the
+      // screen. Every row is built as: `\u2502  \u258c <content padded to body>  \u2502`
+      //   left frame  `\u2502  \u258c ` = 5 cols, right frame `  \u2502` = 3 cols \u2192 rowWidth = body + 8.
+      //   border row  `\u2502 ` + rule + ` \u2502` = rule.length + 4, so rule = body + 4.
+      // Previously the box was sized to body+2 while rows rendered at body+4..body+11,
+      // so every row overflowed and Ink wrapped it \u2014 dropping the trailing `\u2502` onto a
+      // blank row (the stray pipes + huge gaps between lines).
+      const body = Math.min(Math.max(maxLineLen, headerLen), codeWidth);
+      const rowWidth = body + 8;
+      const rule = '\u2500'.repeat(body + 4);
+      const overflowLabel = `\u2026 ${overflow} more lines`;
       return (
-        <Box flexDirection="column" width={boxWidth + 2} flexShrink={0}>
+        <Box flexDirection="column" width={rowWidth} flexShrink={0}>
           <Text color={bc}>{'\u2502 '}{rule}{' \u2502'}</Text>
           <Text>
             <Text color={bc}>{'\u2502  '}</Text>
@@ -222,7 +231,7 @@ screen name=CodeBlockView target=ink
             <Text> </Text>
             <Text dimColor>{segment.language || 'code'}</Text>
             {segment.index !== undefined && <Text color="#585858">{` [${segment.index}]`}</Text>}
-            <Text>{' '.repeat(Math.max(0, boxWidth - headerLen - 1))}</Text>
+            <Text>{' '.repeat(Math.max(0, body - headerLen))}</Text>
             <Text color={bc}>{'  \u2502'}</Text>
           </Text>
           {capped.map((line: string, i: number) => (
@@ -230,8 +239,8 @@ screen name=CodeBlockView target=ink
               <Text color={bc}>{'\u2502  '}</Text>
               <Text color={CODE_RAIL_COLOR}>{CODE_RAIL}</Text>
               <Text> </Text>
-              {isDiff ? <DiffLine line={line} maxWidth={codeWidth} /> : <SyntaxLine line={line} maxWidth={codeWidth} />}
-              <Text>{' '.repeat(Math.max(0, codeWidth - line.length - 4))}</Text>
+              {isDiff ? <DiffLine line={line} maxWidth={body} /> : <SyntaxLine line={line} maxWidth={body} />}
+              <Text>{' '.repeat(Math.max(0, body - line.length))}</Text>
               <Text color={bc}>{'  \u2502'}</Text>
             </Text>
           ))}
@@ -240,7 +249,8 @@ screen name=CodeBlockView target=ink
               <Text color={bc}>{'\u2502  '}</Text>
               <Text color={CODE_RAIL_COLOR}>{CODE_RAIL}</Text>
               <Text> </Text>
-              <Text dimColor>{'\u2026 '}{overflow}{' more lines'}</Text>
+              <Text dimColor>{overflowLabel}</Text>
+              <Text>{' '.repeat(Math.max(0, body - overflowLabel.length))}</Text>
               <Text color={bc}>{'  \u2502'}</Text>
             </Text>
           )}
diff --git a/packages/cli/src/kern/cesar/brain.kern b/packages/cli/src/kern/cesar/brain.kern
index 145dcc45..339128bf 100644
--- a/packages/cli/src/kern/cesar/brain.kern
+++ b/packages/cli/src/kern/cesar/brain.kern
@@ -289,8 +289,14 @@ fn name=handleCesarBrain async=true params="input:string, dispatch:Dispatch, ctx
           const outputDir = join(RUNS_DIR, `cesar-fallback-${Date.now()}`);
           mkdirSync(outputDir, { recursive: true });
           const primedPrompt = buildHistoryPrimedPrompt(ctx.chatSession, input);
+          // Cesar is an agentic leading role, so dispatch in 'agent' mode when the
+          // engine supports it. 'exec' triggers agy's OUTPUT-RULES gag (adapter-helpers:
+          // engine.id === 'agy' && mode !== 'agent') which forbids file edits / tool use
+          // and forces a single-pass text answer — exactly why agy could not run tools
+          // as Cesar. Fall back to 'exec' for engines without an agent mode (no regression).
+          const fallbackMode = ((engine as any)?.agent ? 'agent' : 'exec') as any;
           const freshResult = await ctx.adapter.dispatch({
-            engine, prompt: primedPrompt, cwd: resolveWorkingDir(), mode: 'exec' as any,
+            engine, prompt: primedPrompt, cwd: resolveWorkingDir(), mode: fallbackMode,
             timeout: config.timeout ?? 120, outputDir, signal: abort.signal, systemPrompt: buildCesarSystemPrompt(ctx),
           });
           dispatch({ type: 'spinner-stop' });
diff --git a/packages/cli/src/kern/signals/dispatch/cesar-router.kern b/packages/cli/src/kern/signals/dispatch/cesar-router.kern
index ba1e1fe7..714d4d99 100644
--- a/packages/cli/src/kern/signals/dispatch/cesar-router.kern
+++ b/packages/cli/src/kern/signals/dispatch/cesar-router.kern
@@ -889,11 +889,17 @@ fn name=runCesarBrainFallback async=true params="input:string, cb:DispatchCallba
       const outDir = join(RUNS_DIR, `acting-cesar-${Date.now()}`);
       mkdirSync(outDir, { recursive: true });
       if (!_silentMode) cb.dispatch({ type: 'info', message: formatCesarRecoveryStatus('acting', actingCesar, `log: ${outDir}`) });
+      // Acting-Cesar leads and may need tools, so dispatch in 'agent' mode when the
+      // substitute supports it. 'exec' triggers agy's OUTPUT-RULES gag (adapter-helpers:
+      // engine.id === 'agy' && mode !== 'agent') which forbids file edits / tool use and
+      // forces single-pass text — the reason agy refused to run tools as acting Cesar.
+      // Fall back to 'exec' for engines without an agent mode (no regression).
+      const actingMode = ((actingEngine as any)?.agent ? 'agent' : 'exec') as any;
       const actingResult = await cb.ctx.adapter.dispatch({
         engine: actingEngine,
         prompt: actingPrompt,
         cwd: resolveWorkingDir(),
-        mode: 'exec' as any,
+        mode: actingMode,
         timeout: (cesarConfig as any).timeout ?? 120,
         outputDir: outDir,
         systemPrompt: buildCesarSystemPrompt(cb.ctx),

From 6412119556bc364632c0b41bde4f391482b2134f Mon Sep 17 00:00:00 2001
From: nicolascukas <cukasn@gmail.com>
Date: Mon, 8 Jun 2026 11:09:39 +0200
Subject: [PATCH 2/4] fix(review): fail loudly when branch:X targets the
 current branch
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

`agon review branch:NAME` runs `git diff NAME...HEAD`, which reviews HEAD's
changes relative to NAME as the base — correct when NAME is the base branch
(e.g. branch:main). But when NAME resolves to the same commit as HEAD (the
common footgun of targeting the branch you're currently on), it's an empty
self-diff that surfaced as a silent "No changes to review." A caller — or
Cesar — could mistake that for a clean review and confabulate success.

Now resolveReviewTarget compares the rev-parsed SHAs of NAME and HEAD and
throws an actionable error pointing at the right targets (branch:main for the
branch's commits, uncommitted for working-tree changes) before any engine is
dispatched.

review tests: 58/58 pass. Verified: `agon review branch:<current>` now errors
loudly pre-dispatch instead of returning empty.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 packages/cli/src/generated/handlers/review.ts | 49 +++++++++++++------
 packages/cli/src/kern/handlers/review.kern    | 17 +++++++
 2 files changed, 50 insertions(+), 16 deletions(-)

diff --git a/packages/cli/src/generated/handlers/review.ts b/packages/cli/src/generated/handlers/review.ts
index 77e17dee..209c4021 100644
--- a/packages/cli/src/generated/handlers/review.ts
+++ b/packages/cli/src/generated/handlers/review.ts
@@ -82,6 +82,23 @@ export function resolveReviewTarget(target: string|undefined, cwd: string): {dif
   } else if (t.startsWith('branch:')) {
     const branch = t.slice(7);
     label = `branch ${branch}`;
+    // `git diff BRANCH...HEAD` reviews HEAD's changes relative to BRANCH as the
+    // base — correct when BRANCH is the base (e.g. branch:main). The footgun:
+    // when BRANCH resolves to the same commit as HEAD (targeting the branch you
+    // are currently on), it's an empty self-diff that surfaces as a silent
+    // "No changes to review" — which a caller (or Cesar) can mistake for a clean
+    // review. Detect that and fail LOUDLY with the right targets instead.
+    let branchSha = '';
+    let headSha = '';
+    try {
+      branchSha = execFileSync('git', ['rev-parse', branch], { cwd, encoding: 'utf-8' }).trim();
+      headSha = execFileSync('git', ['rev-parse', 'HEAD'], { cwd, encoding: 'utf-8' }).trim();
+    } catch (err) {
+      throw new Error(`Failed to resolve branch "${branch}": ${err instanceof Error ? err.message : String(err)}`);
+    }
+    if (branchSha && branchSha === headSha) {
+      throw new Error(`branch:${branch} points at the commit you are currently on, so diffing it against HEAD yields nothing to review. Use "branch:main" (or your base branch) to review this branch's commits, or "uncommitted" to review working-tree changes.`);
+    }
     try {
       diff = execFileSync('git', ['diff', `${branch}...HEAD`], { cwd, encoding: 'utf-8', maxBuffer: 10 * 1024 * 1024 }).trim();
     } catch (err) {
@@ -107,7 +124,7 @@ export function resolveReviewTarget(target: string|undefined, cwd: string): {dif
   return { diff, label };
 }
 
-// @kern-source: review:100
+// @kern-source: review:117
 export function selectReviewEngine(requestedEngine: string|undefined, ctx: HandlerContext): string {
   const allActive = ctx.activeEngines();
   const active = requestedEngine ? allActive : filterDefaultOrchestrationEngines(allActive);
@@ -155,7 +172,7 @@ export function selectReviewEngine(requestedEngine: string|undefined, ctx: Handl
   throw new Error('No engines available for review. Try /engines to check availability.');
 }
 
-// @kern-source: review:148
+// @kern-source: review:165
 export interface ReviewCoreResult {
   response: string;
   blocking: boolean;
@@ -165,10 +182,10 @@ export interface ReviewCoreResult {
   usage?: {promptTokens:number,completionTokens:number,totalTokens:number,source:'sdk'|'cli-reported'|'estimated'};
 }
 
-// @kern-source: review:159
+// @kern-source: review:176
 export const REVIEW_SENTINEL: string = '<!--AGON_REVIEW_FINDINGS_v1-->';
 
-// @kern-source: review:161
+// @kern-source: review:178
 export interface ReviewSeverityCounts {
   blocking: number;
   important: number;
@@ -179,7 +196,7 @@ export interface ReviewSeverityCounts {
 /**
  * Sentinel-anchored, fail-closed extraction of the findings array — the single chokepoint shared by parseReviewBlocking (the blocking gate) and summarizeReviewFindings (severity counts). Returns the parsed array (possibly empty []) or null when no parseable block follows the LAST sentinel. Anti-injection: only text after the LAST sentinel is considered, so attacker brackets quoted earlier in the diff are ignored. Tolerant of almost-JSON (trailing commas, line and block JS-style comments) and fenced json code blocks.
  */
-// @kern-source: review:167
+// @kern-source: review:184
 export function extractReviewFindings(response: string): Array<{severity?:string, blocking?:boolean}> | null {
   if (!response || response.trim().length === 0) return null;
 
@@ -279,7 +296,7 @@ export function extractReviewFindings(response: string): Array<{severity?:string
 /**
  * Sentinel-anchored, fail-closed parser. The engine MUST end its response with a unique sentinel followed by a JSON array of findings. Without a parseable block the response is treated as blocking + parseFailed, so the user must explicitly approve. This blocks the prompt-injection attack where an attacker echoes `[{"blocking":false}]` inside diff content — only the engine's real structured output after the LAST sentinel is considered. Thin wrapper over extractReviewFindings.
  */
-// @kern-source: review:265
+// @kern-source: review:282
 export function parseReviewBlocking(response: string): {blocking:boolean, parseFailed:boolean} {
   const findings = extractReviewFindings(response);
   if (findings === null) return { blocking: true, parseFailed: true };
@@ -290,7 +307,7 @@ export function parseReviewBlocking(response: string): {blocking:boolean, parseF
 /**
  * Count findings by severity from the structured block, for human summaries like 'claude: ok, 1 important, 3 nits'. Returns all-zero when there is no parseable findings block (the caller renders that as unstructured/empty). A finding counts as blocking if blocking===true or severity==='blocking'; otherwise by its severity, with anything not 'important' falling to nit.
  */
-// @kern-source: review:274
+// @kern-source: review:291
 export function summarizeReviewFindings(response: string): ReviewSeverityCounts {
   const findings = extractReviewFindings(response);
   if (!findings) return { blocking: 0, important: 0, nit: 0, total: 0 };
@@ -309,7 +326,7 @@ export function summarizeReviewFindings(response: string): ReviewSeverityCounts
 /**
  * Repair pass (B): re-ask the engine for ONLY a bare JSON array of the findings it already wrote in prose. Asking for a bare array (no sentinel, no prose, no fence) is the format LLMs comply with most reliably — far better than 'an HTML-comment marker followed by JSON', which engines routinely truncate to just the marker. The caller (runReviewCore) prepends the sentinel itself before parsing, so the anti-injection anchor is preserved. Best-effort: if this still doesn't parse, the fail-closed/unstructured result stands.
  */
-// @kern-source: review:291
+// @kern-source: review:308
 export async function runReviewRepair(priorReview: string, engineId: string, ctx: HandlerContext, signal?: AbortSignal): Promise<string> {
   const config = ctx.config;
   const cwd = resolveWorkingDir();
@@ -359,7 +376,7 @@ export async function runReviewRepair(priorReview: string, engineId: string, ctx
 /**
  * Repo grounding: read the CURRENT full content of each source file the diff touches and format it as a context block. A diff shows only the changed hunks, so reviewers raise false alarms that reading the whole file would kill instantly ('X is unhandled' when the wrapper handles it three lines down; 'unimported' when it's imported at the top). Bounded hard (per-file + total caps) to protect prompt size / TTFT, and skips generated/dist/min files (derived noise that would blow the budget). Best-effort: deleted/binary/unreadable files are skipped — the diff still covers them.
  */
-// @kern-source: review:329
+// @kern-source: review:346
 export function gatherReviewFileContext(diff: string, cwd: string): string {
   const PER_FILE_MAX = 20_000;
   const TOTAL_MAX = 60_000;
@@ -406,7 +423,7 @@ export function gatherReviewFileContext(diff: string, cwd: string): string {
 /**
  * Core review flow with no ctx side effects. Used by both handleReview (with streaming dispatch) and the plan executor's review step (silent). Does NOT touch ctx.setActiveAbort, ctx.lastReviewResult, ctx.chatSession, or tracker. signal is optional: callers that don't have an abort controller can pass undefined. cwdOverride pins the working directory the review engine runs in AND the repo file-context is gathered from — goal passes the per-task worktree so review engines never operate in (and write to) the parent repo; defaults to resolveWorkingDir() for the interactive/CLI review paths.
  */
-// @kern-source: review:374
+// @kern-source: review:391
 export async function runReviewCore(diff: string, label: string, engineId: string, ctx: HandlerContext, signal?: AbortSignal, onProgress?: (chunk:string)=>void, cwdOverride?: string): Promise<ReviewCoreResult> {
   const cwd = cwdOverride ?? resolveWorkingDir();
   const config = ctx.config;
@@ -502,7 +519,7 @@ export async function runReviewCore(diff: string, label: string, engineId: strin
 /**
  * Strip the trailing machine-readable findings block (sentinel + JSON) from a review so the Ctrl+R results pager shows clean prose — the consensus summary already encodes those findings. Cesar's copy (ctx.lastReviewResult.reviewOutput) keeps the full response, so 'fix it' still has the structured file/line/minimalFix data. No-op when there's no sentinel.
  */
-// @kern-source: review:450
+// @kern-source: review:467
 export function stripMachineBlock(response: string): string {
   const idx = response.lastIndexOf(REVIEW_SENTINEL);
   if (idx < 0) return response;
@@ -512,7 +529,7 @@ export function stripMachineBlock(response: string): string {
 /**
  * Build a consensus EngineOutcome from one engine's review. status!=='ok' yields an empty-findings failure lane (never a phantom blocker), carrying any diagnostic note (error message / timeout detail) through to ConsensusReport.engineFailures; 'ok' parses the engine's structured findings into RawFindings. Shared by the single- and multi-engine paths so the mapping lives in one place.
  */
-// @kern-source: review:458
+// @kern-source: review:475
 export function reviewOutcome(engineId: string, response: string, status: string, note?: string): any {
   if (status !== 'ok') return { engine: engineId, status, findings: [], note };
   // Guard against a model emitting a non-object element (e.g. `[null]` or a
@@ -531,7 +548,7 @@ export function reviewOutcome(engineId: string, response: string, status: string
 /**
  * Render a consensus report into the compact, human-facing summary lines (tiered: verified / needs-check / speculative / nits / failed). The single source of the summary text shown inline AND stored as ReviewResultData.consensusSummary, so the transcript and the Ctrl+R pager always agree.
  */
-// @kern-source: review:475
+// @kern-source: review:492
 export function buildReviewConsensusLines(consensus: any): string[] {
   const fmt = (f: any): string => `  • [${f.severity} ${f.maxConfidence.toFixed(2)} ×${f.engines.length}${f.pairVotes >= 2 ? ' pair' : ''}] ${f.problem}${f.file ? ` (${f.file}${f.lines ? ':' + f.lines : ''})` : ''}`;
   const lines: string[] = [`Consensus — ${consensus.summary}`];
@@ -546,7 +563,7 @@ export function buildReviewConsensusLines(consensus: any): string[] {
 /**
  * One-line severity tail for a single engine's review: '2 important, 3 nits' (zero categories omitted; 'no findings' when empty).
  */
-// @kern-source: review:488
+// @kern-source: review:505
 export function formatReviewCounts(c: ReviewSeverityCounts|undefined): string {
   if (!c || c.total === 0) return 'no findings';
   const parts: string[] = [];
@@ -556,7 +573,7 @@ export function formatReviewCounts(c: ReviewSeverityCounts|undefined): string {
   return parts.join(', ');
 }
 
-// @kern-source: review:499
+// @kern-source: review:516
 export async function handleReview(dispatch: Dispatch, ctx: HandlerContext, target?: string, requestedEngine?: string): Promise<void> {
   const abort = new AbortController();
   try {
@@ -682,7 +699,7 @@ export async function handleReview(dispatch: Dispatch, ctx: HandlerContext, targ
 /**
  * Run review for one or more explicitly requested engines. With 2+ engines they run in PARALLEL — each gets its own hard timeout, so a slow-but-excellent reviewer (codex) never blocks the others and a hung engine can't wedge the whole review. Each engine's block is dispatched as it finishes; findings are combined into ctx.lastReviewResult for Cesar follow-up/fix planning. A single engine delegates to the streaming handleReview path.
  */
-// @kern-source: review:621
+// @kern-source: review:638
 export async function handleReviewMany(dispatch: Dispatch, ctx: HandlerContext, target?: string, requestedEngines?: string[]): Promise<void> {
   const abort = new AbortController();
   try {
diff --git a/packages/cli/src/kern/handlers/review.kern b/packages/cli/src/kern/handlers/review.kern
index 1ea415b2..c137b9e5 100644
--- a/packages/cli/src/kern/handlers/review.kern
+++ b/packages/cli/src/kern/handlers/review.kern
@@ -72,6 +72,23 @@ fn name=resolveReviewTarget params="target:string|undefined, cwd:string" returns
     } else if (t.startsWith('branch:')) {
       const branch = t.slice(7);
       label = `branch ${branch}`;
+      // `git diff BRANCH...HEAD` reviews HEAD's changes relative to BRANCH as the
+      // base — correct when BRANCH is the base (e.g. branch:main). The footgun:
+      // when BRANCH resolves to the same commit as HEAD (targeting the branch you
+      // are currently on), it's an empty self-diff that surfaces as a silent
+      // "No changes to review" — which a caller (or Cesar) can mistake for a clean
+      // review. Detect that and fail LOUDLY with the right targets instead.
+      let branchSha = '';
+      let headSha = '';
+      try {
+        branchSha = execFileSync('git', ['rev-parse', branch], { cwd, encoding: 'utf-8' }).trim();
+        headSha = execFileSync('git', ['rev-parse', 'HEAD'], { cwd, encoding: 'utf-8' }).trim();
+      } catch (err) {
+        throw new Error(`Failed to resolve branch "${branch}": ${err instanceof Error ? err.message : String(err)}`);
+      }
+      if (branchSha && branchSha === headSha) {
+        throw new Error(`branch:${branch} points at the commit you are currently on, so diffing it against HEAD yields nothing to review. Use "branch:main" (or your base branch) to review this branch's commits, or "uncommitted" to review working-tree changes.`);
+      }
       try {
         diff = execFileSync('git', ['diff', `${branch}...HEAD`], { cwd, encoding: 'utf-8', maxBuffer: 10 * 1024 * 1024 }).trim();
       } catch (err) {

From 2d03106e62f44b8b9124177d6bbdaac5b599e26d Mon Sep 17 00:00:00 2001
From: nicolascukas <cukasn@gmail.com>
Date: Mon, 8 Jun 2026 11:17:39 +0200
Subject: [PATCH 3/4] =?UTF-8?q?fix(cesar):=20ground=20confabulated=20deleg?=
 =?UTF-8?q?ations=20=E2=80=94=20stop=20"review=20is=20running"=20lies?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Weak coding-plan engines as Cesar narrate that they dispatched or are running
an async review/forge/agent job ("three reviewers are reading the diff in
parallel", "I kicked off the review", "I'll report when they land") without
ever calling the tool. Cesar's turn has no jobManager access, but it has a
grounded turn-local signal: ctx.cesar.pendingDelegation is set only when a
handoff tool (Review/Forge/Agent/…) is actually emitted this turn. So a
dispatch/running claim + a null pendingDelegation = a fabricated delegation.

Adds detectFabricatedDelegation(text) (requires BOTH a delegable target AND a
dispatch/running claim, so a plain answer mentioning "review" doesn't trip it)
and a guard in brain.kern after the plan-mode nudge: when it fires and nothing
is pending or running, re-prompt once with a [SYSTEM] grounding message — call
the real tool now, or tell the user plainly nothing is running. Mirrors the
existing plan-mode / final-answer nudges; soft (re-prompt, never blocks).

Pairs with the branch:X review fix: that turns a silent "No changes" into a
loud error, so Cesar can't mistake a no-op review for a successful one.

Tests: detectFabricatedDelegation covered with the real transcript phrases +
negatives; 198/198 cesar/brain/pty/adapter tests pass.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 .../cli/src/generated/cesar/brain-helpers.ts  | 25 +++++++++---
 packages/cli/src/generated/cesar/brain.ts     | 39 ++++++++++++++++++-
 .../cli/src/kern/cesar/brain-helpers.kern     | 13 +++++++
 packages/cli/src/kern/cesar/brain.kern        | 39 ++++++++++++++++++-
 tests/unit/cesar-brain.test.ts                | 25 +++++++++++-
 5 files changed, 133 insertions(+), 8 deletions(-)

diff --git a/packages/cli/src/generated/cesar/brain-helpers.ts b/packages/cli/src/generated/cesar/brain-helpers.ts
index 4c9a44ea..79fd486d 100644
--- a/packages/cli/src/generated/cesar/brain-helpers.ts
+++ b/packages/cli/src/generated/cesar/brain-helpers.ts
@@ -121,9 +121,24 @@ export function detectMutationIntentStall(text: string): boolean {
 }
 
 /**
- * Return unique tool names from failed eager tool results. Used to restrict one-shot repair retries to the tool that just failed.
+ * Detect a response that CLAIMS an async review/forge/tribunal/brainstorm/agent or background job was dispatched or is now running — e.g. 'review delegated to codex, claude, agy', 'three reviewers are reading the diff in parallel', 'I kicked off the review', "I'll get back when they report". The caller pairs this with 'no delegation was actually emitted this turn' (ctx.cesar.pendingDelegation is null) to catch the confabulation where a weak engine narrates a dispatch it never made. Requires BOTH a delegable target AND a dispatch/running claim, so a plain answer that merely mentions the word 'review' does not trip it.
  */
 // @kern-source: brain-helpers:113
+export function detectFabricatedDelegation(text: string): boolean {
+  const body = String(text ?? '').trim();
+  if (!body) return false;
+  // A delegable target: review / forge / tribunal / brainstorm / campfire / agents / engines / a background job.
+  const TARGET_RE = /\b(?:review(?:er)?s?|forg(?:e|ing)|tribunal|brainstorm|campfire|agents?|engines?|jobs?)\b/i;
+  if (!TARGET_RE.test(body)) return false;
+  // A claim that the target was dispatched or is now running / will report back.
+  const DISPATCH_RE = /\b(?:kick(?:ed|ing)?\s*(?:it|them|that|the\s+\w+)?\s*off|fired?\s*(?:it|them|off)|dispatch(?:ed|ing)|delegat(?:ed|ing)|(?:is|are|now)\s+running|running\s+(?:in|now)|in\s+parallel|reading\s+the\s+(?:diff|changes|code)|working\s+(?:on\s+it|in\s+parallel)|in\s+progress|under\s*way|i'?ll\s+(?:get\s+back|report|let\s+you\s+know|surface|update)|report(?:s|ing)?\s+back|when\s+they\s+(?:report|land|return|finish|come\s+back)|still\s+(?:running|going|working|in\s+progress)|spun?\s+up|started\s+(?:the|a)\s+(?:review|forge|job|tribunal|brainstorm))\b/i;
+  return DISPATCH_RE.test(body);
+}
+
+/**
+ * Return unique tool names from failed eager tool results. Used to restrict one-shot repair retries to the tool that just failed.
+ */
+// @kern-source: brain-helpers:126
 export function eagerFailedToolNames(results: ToolCallResult[]): string[] {
   const names: string[] = [];
   for (const result of results ?? []) {
@@ -141,7 +156,7 @@ export function eagerFailedToolNames(results: ToolCallResult[]): string[] {
 /**
  * Gate eager tool repair retries. A corrected tool call may run once only if the same tool failed in the immediately previous eager batch.
  */
-// @kern-source: brain-helpers:125
+// @kern-source: brain-helpers:138
 export function shouldRunEagerRepairTool(toolName: string, meta: any, failedToolNames: string[], usedToolNames: string[]): boolean {
   const name = String(toolName ?? '').trim();
   if (!name) return false;
@@ -156,7 +171,7 @@ export function shouldRunEagerRepairTool(toolName: string, meta: any, failedTool
 /**
  * Return true for XML tools that hand control back to the Agon dispatcher. These tools do not produce inline results; continuing the XML tool loop after them can make Cesar claim a delegation happened while the actual forge/brainstorm/etc. job has not started yet.
  */
-// @kern-source: brain-helpers:138
+// @kern-source: brain-helpers:151
 export function shouldStopAfterXmlToolCall(toolName: string): boolean {
   const HANDOFF_TOOLS = new Set(['Forge', 'Brainstorm', 'Tribunal', 'Campfire', 'Pipeline', 'Review', 'Agent', 'Goal', 'ProposePlan', 'ExitPlanMode']);
   return HANDOFF_TOOLS.has(String(toolName ?? ''));
@@ -165,7 +180,7 @@ export function shouldStopAfterXmlToolCall(toolName: string): boolean {
 /**
  * Expand a bare 'fix it' follow-up into an explicit prompt grounded in the most recent stored review result. This avoids making Cesar guess which reviewer findings the user means, especially because /review runs outside Cesar's live session history.
  */
-// @kern-source: brain-helpers:144
+// @kern-source: brain-helpers:157
 export function buildReviewFollowupPrompt(input: string, ctx: HandlerContext): { matched: boolean; prompt: string } {
   const trimmed = input.trim();
   const match = trimmed.match(/^fix it(?:\s+with\s+([a-z0-9._-]+))?[\s?!.,;:]*$/i);
@@ -186,7 +201,7 @@ export function buildReviewFollowupPrompt(input: string, ctx: HandlerContext): {
   return { matched: true, prompt: prompt };
 }
 
-// @kern-source: brain-helpers:163
+// @kern-source: brain-helpers:176
 export function extractDelegation(toolName: string, args: Record<string,unknown>): PendingDelegation {
   const argsRecord = args as Record<string, unknown>;
   const taskKindRaw = argsRecord.taskKind;
diff --git a/packages/cli/src/generated/cesar/brain.ts b/packages/cli/src/generated/cesar/brain.ts
index d105a08e..8b5f047d 100644
--- a/packages/cli/src/generated/cesar/brain.ts
+++ b/packages/cli/src/generated/cesar/brain.ts
@@ -32,7 +32,7 @@ import { applyCesarSelfTurnApproval } from './self-turn-approval.js';
 
 import { createCesarTurnId, recordCesarApprovalDecision, recordCesarToolTimeline, recordCesarConfidence } from './tool-observability.js';
 
-import { yieldToInk, splitBeforeToolMarkup, XML_TOOL_MARKUP_HOLD_CHARS, findTrailingUserQuestion, detectAwaitingUserInput, detectNarratedToolStall, detectMutationIntentStall, eagerFailedToolNames, shouldRunEagerRepairTool, shouldStopAfterXmlToolCall, buildReviewFollowupPrompt, extractDelegation } from './brain-helpers.js';
+import { yieldToInk, splitBeforeToolMarkup, XML_TOOL_MARKUP_HOLD_CHARS, findTrailingUserQuestion, detectAwaitingUserInput, detectNarratedToolStall, detectMutationIntentStall, detectFabricatedDelegation, eagerFailedToolNames, shouldRunEagerRepairTool, shouldStopAfterXmlToolCall, buildReviewFollowupPrompt, extractDelegation } from './brain-helpers.js';
 
 // @kern-source: brain:19
 export async function commitTurnAndDelegate(pendingDel: PendingDelegation, input: string, response: string, cesarEngineId: string, streaming: boolean, dispatch: Dispatch, ctx: HandlerContext, telemetry?: Record<string,unknown>): Promise<CesarTurnOutcome> {
@@ -1631,6 +1631,43 @@ export async function handleCesarBrain(input: string, dispatch: Dispatch, ctx: H
           }
         }
 
+        // ── Fabricated-delegation guard: ground a confabulated dispatch ──
+        // Catches the failure where a weak engine narrates that it dispatched or is
+        // running an async review/forge/agent job ("three reviewers are reading the
+        // diff in parallel", "I kicked off the review", "I'll report when they land")
+        // WITHOUT having emitted any delegation this turn — pendingDelegation is null,
+        // so nothing is actually queued or running. Re-prompt once to ground it: call
+        // the real tool now, or tell the user plainly that nothing is running. If the
+        // re-prompt dispatches for real, pendingDelegation gets set and the existing
+        // downstream delegation path takes over.
+        if (
+          !ctx.cesar!.pendingDelegation
+          && session.alive
+          && !abort.signal.aborted
+          && detectFabricatedDelegation(response.trim())
+        ) {
+          dispatch({ type: 'warning', message: 'Cesar claimed a job was running but never dispatched one — grounding...' });
+          dispatch({ type: 'spinner-start', message: 'Cesar grounding…', color });
+          try {
+            let groundResponse = '';
+            const groundGen = session.send({
+              message: '[SYSTEM] GROUNDING CHECK: You did NOT dispatch any review/forge/tribunal/brainstorm/agent/job this turn, and none is pending or running. Do NOT claim background work is "running", "in parallel", "kicked off", or that anyone "will report back" — that is false and misleads the user. If the user wants that work done, call the actual tool now (Review/Forge/Tribunal/Brainstorm/Agent). Otherwise tell the user plainly that nothing is currently running and ask whether to start it.',
+              signal: abort.signal,
+            });
+            for await (const chunk of groundGen) {
+              if (chunk.type === 'text') groundResponse += chunk.content;
+              if (chunk.type === 'done' || chunk.type === 'error') break;
+            }
+            dispatch({ type: 'spinner-stop' });
+            if (groundResponse.trim()) {
+              dispatch({ type: 'engine-block', engineId: cesarEngineId, color, content: groundResponse.trim() });
+              response = groundResponse.trim();
+            }
+          } catch {
+            dispatch({ type: 'spinner-stop' });
+          }
+        }
+
         // ── Protocol enforcement: DISABLED ──
         // Cesar decides all delegations. The system never forces brainstorm/tribunal on the user.
         // If Cesar wants to delegate, he calls the tool. If he doesn't, that's his call.
diff --git a/packages/cli/src/kern/cesar/brain-helpers.kern b/packages/cli/src/kern/cesar/brain-helpers.kern
index e056161f..96ae62c8 100644
--- a/packages/cli/src/kern/cesar/brain-helpers.kern
+++ b/packages/cli/src/kern/cesar/brain-helpers.kern
@@ -110,6 +110,19 @@ fn name=detectMutationIntentStall params="text:string" returns=boolean export=tr
     return MUTATION_INTENT_RE.test(body) && HANDBACK_RE.test(body);
   >>>
 
+fn name=detectFabricatedDelegation params="text:string" returns=boolean export=true
+  doc "Detect a response that CLAIMS an async review/forge/tribunal/brainstorm/agent or background job was dispatched or is now running — e.g. 'review delegated to codex, claude, agy', 'three reviewers are reading the diff in parallel', 'I kicked off the review', \"I'll get back when they report\". The caller pairs this with 'no delegation was actually emitted this turn' (ctx.cesar.pendingDelegation is null) to catch the confabulation where a weak engine narrates a dispatch it never made. Requires BOTH a delegable target AND a dispatch/running claim, so a plain answer that merely mentions the word 'review' does not trip it."
+  handler <<<
+    const body = String(text ?? '').trim();
+    if (!body) return false;
+    // A delegable target: review / forge / tribunal / brainstorm / campfire / agents / engines / a background job.
+    const TARGET_RE = /\b(?:review(?:er)?s?|forg(?:e|ing)|tribunal|brainstorm|campfire|agents?|engines?|jobs?)\b/i;
+    if (!TARGET_RE.test(body)) return false;
+    // A claim that the target was dispatched or is now running / will report back.
+    const DISPATCH_RE = /\b(?:kick(?:ed|ing)?\s*(?:it|them|that|the\s+\w+)?\s*off|fired?\s*(?:it|them|off)|dispatch(?:ed|ing)|delegat(?:ed|ing)|(?:is|are|now)\s+running|running\s+(?:in|now)|in\s+parallel|reading\s+the\s+(?:diff|changes|code)|working\s+(?:on\s+it|in\s+parallel)|in\s+progress|under\s*way|i'?ll\s+(?:get\s+back|report|let\s+you\s+know|surface|update)|report(?:s|ing)?\s+back|when\s+they\s+(?:report|land|return|finish|come\s+back)|still\s+(?:running|going|working|in\s+progress)|spun?\s+up|started\s+(?:the|a)\s+(?:review|forge|job|tribunal|brainstorm))\b/i;
+    return DISPATCH_RE.test(body);
+  >>>
+
 fn name=eagerFailedToolNames params="results:ToolCallResult[]" returns="string[]" export=true
   doc "Return unique tool names from failed eager tool results. Used to restrict one-shot repair retries to the tool that just failed."
   handler lang="kern"
diff --git a/packages/cli/src/kern/cesar/brain.kern b/packages/cli/src/kern/cesar/brain.kern
index 339128bf..97115387 100644
--- a/packages/cli/src/kern/cesar/brain.kern
+++ b/packages/cli/src/kern/cesar/brain.kern
@@ -14,7 +14,7 @@ import from="./routing.js" names="buildRoutingContext,deriveRoutingHints,shouldS
 import from="./reliability.js" names="readCesarToolReliability,formatCesarReliabilityLine,shouldDowngradeCesarToolWork,buildWhatHappenedSummary"
 import from="./self-turn-approval.js" names="applyCesarSelfTurnApproval"
 import from="./tool-observability.js" names="createCesarTurnId,recordCesarApprovalDecision,recordCesarToolTimeline,recordCesarConfidence"
-import from="./brain-helpers.js" names="yieldToInk,splitBeforeToolMarkup,XML_TOOL_MARKUP_HOLD_CHARS,findTrailingUserQuestion,detectAwaitingUserInput,detectNarratedToolStall,detectMutationIntentStall,eagerFailedToolNames,shouldRunEagerRepairTool,shouldStopAfterXmlToolCall,buildReviewFollowupPrompt,extractDelegation"
+import from="./brain-helpers.js" names="yieldToInk,splitBeforeToolMarkup,XML_TOOL_MARKUP_HOLD_CHARS,findTrailingUserQuestion,detectAwaitingUserInput,detectNarratedToolStall,detectMutationIntentStall,detectFabricatedDelegation,eagerFailedToolNames,shouldRunEagerRepairTool,shouldStopAfterXmlToolCall,buildReviewFollowupPrompt,extractDelegation"
 
 fn name=commitTurnAndDelegate async=true params="pendingDel:PendingDelegation, input:string, response:string, cesarEngineId:string, streaming:boolean, dispatch:Dispatch, ctx:HandlerContext, telemetry?:Record<string,unknown>" returns="Promise<CesarTurnOutcome>"
   handler lang="kern"
@@ -1601,6 +1601,43 @@ ${reviewFollowup.prompt}`;
         }
       }
 
+      // ── Fabricated-delegation guard: ground a confabulated dispatch ──
+      // Catches the failure where a weak engine narrates that it dispatched or is
+      // running an async review/forge/agent job ("three reviewers are reading the
+      // diff in parallel", "I kicked off the review", "I'll report when they land")
+      // WITHOUT having emitted any delegation this turn — pendingDelegation is null,
+      // so nothing is actually queued or running. Re-prompt once to ground it: call
+      // the real tool now, or tell the user plainly that nothing is running. If the
+      // re-prompt dispatches for real, pendingDelegation gets set and the existing
+      // downstream delegation path takes over.
+      if (
+        !ctx.cesar!.pendingDelegation
+        && session.alive
+        && !abort.signal.aborted
+        && detectFabricatedDelegation(response.trim())
+      ) {
+        dispatch({ type: 'warning', message: 'Cesar claimed a job was running but never dispatched one — grounding...' });
+        dispatch({ type: 'spinner-start', message: 'Cesar grounding…', color });
+        try {
+          let groundResponse = '';
+          const groundGen = session.send({
+            message: '[SYSTEM] GROUNDING CHECK: You did NOT dispatch any review/forge/tribunal/brainstorm/agent/job this turn, and none is pending or running. Do NOT claim background work is "running", "in parallel", "kicked off", or that anyone "will report back" — that is false and misleads the user. If the user wants that work done, call the actual tool now (Review/Forge/Tribunal/Brainstorm/Agent). Otherwise tell the user plainly that nothing is currently running and ask whether to start it.',
+            signal: abort.signal,
+          });
+          for await (const chunk of groundGen) {
+            if (chunk.type === 'text') groundResponse += chunk.content;
+            if (chunk.type === 'done' || chunk.type === 'error') break;
+          }
+          dispatch({ type: 'spinner-stop' });
+          if (groundResponse.trim()) {
+            dispatch({ type: 'engine-block', engineId: cesarEngineId, color, content: groundResponse.trim() });
+            response = groundResponse.trim();
+          }
+        } catch {
+          dispatch({ type: 'spinner-stop' });
+        }
+      }
+
       // ── Protocol enforcement: DISABLED ──
       // Cesar decides all delegations. The system never forces brainstorm/tribunal on the user.
       // If Cesar wants to delegate, he calls the tool. If he doesn't, that's his call.
diff --git a/tests/unit/cesar-brain.test.ts b/tests/unit/cesar-brain.test.ts
index dd2e21dd..9082ea98 100644
--- a/tests/unit/cesar-brain.test.ts
+++ b/tests/unit/cesar-brain.test.ts
@@ -2,7 +2,7 @@ import { describe, it, expect } from 'vitest';
 import { parseSuggestion, parseConfidence, confidenceBadge, CONFIDENCE_TIERS, CESAR_SYSTEM_PROMPT, buildReviewFollowupPrompt, detectNarratedToolStall } from '../../packages/cli/src/handlers/cesar-brain.js';
 // Source of truth for these helpers is packages/cli/src/kern/cesar/brain-helpers.kern;
 // the generated/*.js below is regenerated from it (npm run kern:compile) — do not edit by hand.
-import { eagerFailedToolNames, shouldRunEagerRepairTool, shouldStopAfterXmlToolCall, splitBeforeToolMarkup, isUserDirectedQuestion, findTrailingUserQuestion, detectAwaitingUserInput, detectMutationIntentStall } from '../../packages/cli/src/generated/cesar/brain-helpers.js';
+import { eagerFailedToolNames, shouldRunEagerRepairTool, shouldStopAfterXmlToolCall, splitBeforeToolMarkup, isUserDirectedQuestion, findTrailingUserQuestion, detectAwaitingUserInput, detectMutationIntentStall, detectFabricatedDelegation } from '../../packages/cli/src/generated/cesar/brain-helpers.js';
 import { createReportConfidenceTool, createForgeTool, createBrainstormTool, createTribunalTool, createCampfireTool, createPipelineTool } from '../../packages/core/src/tools.js';
 
 describe('Cesar Brain', () => {
@@ -36,6 +36,29 @@ describe('Cesar Brain', () => {
     });
   });
 
+  describe('detectFabricatedDelegation (confabulated dispatch)', () => {
+    it('flags a claim that reviewers/jobs are running or were dispatched', () => {
+      // Real phrases from the confabulation transcript.
+      expect(detectFabricatedDelegation('Going — three reviewers (codex, claude, agy) are reading the 90-file diff in parallel.')).toBe(true);
+      expect(detectFabricatedDelegation('The review is still running — codex, claude, and agy are each reading the diff in parallel.')).toBe(true);
+      expect(detectFabricatedDelegation("Review delegated to codex, claude, and agy. I'll get back when they report.")).toBe(true);
+      expect(detectFabricatedDelegation('I kicked off the review — the agents are working in parallel now.')).toBe(true);
+    });
+
+    it('does not flag a plain answer that merely mentions a review', () => {
+      expect(detectFabricatedDelegation('You should run a review before merging this branch.')).toBe(false);
+      expect(detectFabricatedDelegation('The review tool diffs the branch against its base.')).toBe(false);
+      expect(detectFabricatedDelegation('Here is the fix; nothing is running right now.')).toBe(false);
+    });
+
+    it('requires both a delegable target AND a dispatch/running claim', () => {
+      // "running" but no delegable target → not a fabricated delegation.
+      expect(detectFabricatedDelegation('The build is running in parallel across packages.')).toBe(false);
+      // target but no dispatch claim → not flagged.
+      expect(detectFabricatedDelegation('A tribunal would surface the tradeoffs here.')).toBe(false);
+    });
+  });
+
   describe('detectMutationIntentStall (false read-only hand-back)', () => {
     it('flags "I am read-only" narration with intent to apply a change', () => {
       expect(detectMutationIntentStall('This session is read-only, so I cannot apply the edit — paste it into your terminal.')).toBe(true);

From 6f13bea589b592ace2fc69c037c79819fffb6d19 Mon Sep 17 00:00:00 2001
From: nicolascukas <cukasn@gmail.com>
Date: Mon, 8 Jun 2026 11:25:29 +0200
Subject: [PATCH 4/4] feat(cli): terminal bell + window-title alerts on done /
 awaiting input
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Rings the terminal bell and flips the window title when a turn finishes or
when Cesar needs the user (a question or a plan awaiting approval), so a
backgrounded terminal surfaces a bell + title dot. Originally built by Cesar
(minimax-coding-plan-m3); this completes and corrects it.

- New packages/cli/src/kern/lib/terminal-notify.kern: bell() (BEL once) and
  setWindowTitle(label) (OSC 0 ; label BEL). Both no-op when stdout isn't a TTY
  (piped/CI) and honour AGON_NO_BELL / AGON_NO_TITLE opt-outs.
- Wired into app.kern: bell on await (question / plan-approval, deduped per
  plan id) and on done, with a single-shot pendingBellRef guard; title shows
  "● agon — running" / "● agon — input needed" / "agon".

Fixes in the wiring (the part Cesar left broken — its plan's wire step failed
verify):
- The done-bell only fired on two edge paths (status dashboard / empty
  mode-switch) because the MAIN turn-completion path ended via a direct
  setReplState, bypassing transition()'s bell hook. Route it through
  transition(finishReplState) so every completed turn rings (no-op for job
  turns that already went idle at handoff).
- Background jobs (forge/review/etc.) now ring + reset the title when they
  complete or fail — the "alert me when the long job is done" case.

Adds tests/unit/terminal-notify.test.ts (the unit step Cesar's plan never
reached): BEL-once / opt-out / non-TTY for bell, OSC sequence / opt-out /
non-TTY for setWindowTitle. Full suite green: 1994/1994.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 .../cli/src/generated/lib/terminal-notify.ts  | 31 ++++++
 .../cli/src/generated/surfaces/app.entry.tsx  |  4 +-
 packages/cli/src/generated/surfaces/app.tsx   | 98 ++++++++++++++++---
 .../cli/src/kern/lib/terminal-notify.kern     | 19 ++++
 packages/cli/src/kern/surfaces/app.kern       | 88 +++++++++++++++--
 tests/unit/terminal-notify.test.ts            | 76 ++++++++++++++
 6 files changed, 293 insertions(+), 23 deletions(-)
 create mode 100644 packages/cli/src/generated/lib/terminal-notify.ts
 create mode 100644 packages/cli/src/kern/lib/terminal-notify.kern
 create mode 100644 tests/unit/terminal-notify.test.ts

diff --git a/packages/cli/src/generated/lib/terminal-notify.ts b/packages/cli/src/generated/lib/terminal-notify.ts
new file mode 100644
index 00000000..7feda78f
--- /dev/null
+++ b/packages/cli/src/generated/lib/terminal-notify.ts
@@ -0,0 +1,31 @@
+// @generated by kern v3.5.8 — DO NOT EDIT. Source: src/kern/lib/terminal-notify.kern
+
+import { stdout } from 'node:process';
+
+/**
+ * Ring the terminal bell once on stdout. No-op when stdio is not a TTY (piped runs, CI) or when AGON_NO_BELL is set in the environment, so we never break automation or annoy users who opted out.
+ */
+// @kern-source: terminal-notify:3
+export function bell(): void {
+  if (!stdout.isTTY) {
+    return;
+  }
+  if (process.env.AGON_NO_BELL) {
+    return;
+  }
+  stdout.write('\x07');
+}
+
+/**
+ * Set the terminal window/tab title via the OSC 0 ; <label> BEL sequence. No-op when stdio is not a TTY or when AGON_NO_TITLE is set, so piped runs and CI stay clean and users can opt out.
+ */
+// @kern-source: terminal-notify:12
+export function setWindowTitle(label: string): void {
+  if (!stdout.isTTY) {
+    return;
+  }
+  if (process.env.AGON_NO_TITLE) {
+    return;
+  }
+  stdout.write('\x1b]0;' + label + '\x07');
+}
diff --git a/packages/cli/src/generated/surfaces/app.entry.tsx b/packages/cli/src/generated/surfaces/app.entry.tsx
index d9c167ce..9c3ddb1b 100644
--- a/packages/cli/src/generated/surfaces/app.entry.tsx
+++ b/packages/cli/src/generated/surfaces/app.entry.tsx
@@ -1,7 +1,7 @@
 #!/usr/bin/env node
-// @generated by kern v3.5.7 — DO NOT EDIT. Source: src/kern/surfaces/app.kern
+// @generated by kern v3.5.8 — DO NOT EDIT. Source: src/kern/surfaces/app.kern
 
-// @kern-source: app:94
+// @kern-source: app:95
 
 import React from 'react';
 import { render } from 'ink';
diff --git a/packages/cli/src/generated/surfaces/app.tsx b/packages/cli/src/generated/surfaces/app.tsx
index 7304da57..d8364328 100644
--- a/packages/cli/src/generated/surfaces/app.tsx
+++ b/packages/cli/src/generated/surfaces/app.tsx
@@ -132,6 +132,8 @@ import { parseProseToRichLines } from '../blocks/rich-text.js';
 
 import { checkForUpdate, loadDismissedVersion, saveDismissedVersion, isLinkedDevInstall } from '../services/update-check.js';
 
+import { bell, setWindowTitle } from '../lib/terminal-notify.js';
+
 import { COMPOSER_HISTORY_LIMIT, loadComposerInputHistory, saveComposerInputHistory } from './app-composer.js';
 
 import { toolDetailViewportRows, findLatestToolDetailEvent, findLatestToolEvent, findLatestFailedToolEvent, buildFailedToolRetryDraft, buildToolDetailView } from './app-tool-detail.js';
@@ -152,7 +154,7 @@ import { buildExecutionRailStats, buildTranscriptRows } from './app-rendering.js
 
 export { COMPOSER_HISTORY_LIMIT, isMutatingToolCall, probeEngineVitals, parseToolCallPayload, toolPreviewWindow, toolCallSupportsDetailView, detailViewerSupportsEvent, toolDetailViewportRows, findLatestToolDetailEvent, findLatestToolEvent, buildExecutionRailStats, composerHistoryPath, loadComposerInputHistory, saveComposerInputHistory, findLatestFailedToolEvent, buildFailedToolRetryDraft, buildToolDetailView, createInitialRegistry, drainStdinBuffer, maxScrollOffsetForRowCount, nextWheelAnimationStep, clampNumber, charDisplayWidth, stringDisplayWidth, displayColumnToStringIndex, normalizeRowSelection, normalizeTextSelection, richLineToPlainText, transcriptRowToPlainText, transcriptRowTextStartColumn, resolveTranscriptColumnFromMouse, transcriptRowsToPlainText, resolveTranscriptRowFromMouse, estimateVisibleBlockBudget, estimateWrappedRowCount, estimateQuestionReservedRows, estimateBottomChromeExtraRows, summarizeBtwTranscriptEvent, buildDashboardBlock, estimatePinnedLiveRows, estimateWrappedRows, estimateToolCallRows, estimateOutputEventRows, buildDisplayItems, isToolCallLikeBlock, coalesceToolCallBlocks, effectiveNativeArchiveBlockCount, estimateDisplayItemRows, historyBlocksForTranscript, nativeTranscriptBlocksForStatic, nativeArchiveBlockCount, isDuplicateEngineBlock, appendTranscriptBlock, normalizeTerminalMode, fileRailWidthForTerminal, fileRailMaxRowsForTerminal, buildTerminalReplaySnapshot, parseMarkdownToRows, buildToolCallRows, buildCollapsedToolGroupRows, buildTranscriptRows } from './app-helpers.js';
 
-// @kern-source: app:94
+// @kern-source: app:95
 export function App() {
   // Ink-safe setter: bridges microtask → macrotask for reliable repaints
   function __inkSafe<T>(setter: React.Dispatch<React.SetStateAction<T>>): React.Dispatch<React.SetStateAction<T>> {
@@ -540,6 +542,8 @@ export function App() {
   const activeAbortRef = useRef<AbortController|null>(null);
   const activeTurnRef = useRef<{ input:string; engineId:string; retried:boolean }|null>(null);
   const lastActivityTimeRef = useRef<number>(Date.now());
+  const pendingBellRef = useRef<boolean>(false);
+  const awaitingPlanAnnouncedRef = useRef<string>('');
   const blockArchivePathRef = useRef<string>(makeBlockArchivePath(Date.now()));
   const nestedCtrlShortcutRef = useRef<{ key: string; at: number }>({ key: '', at: 0 });
   const displayRowCountRef = useRef<number>(0);
@@ -840,10 +844,24 @@ export function App() {
 
   const transition = useCallback((fn:any) => {
     setReplState((prev: any) => {
-      try { return fn({ state: prev }).state; }
+      try {
+        const next = fn({ state: prev }).state;
+        // Ring once and reset the window title when we return to idle
+        // (finishReplState / cancelReplState both end a run). Guard with
+        // pendingBellRef so a question and a finishReplState in the same
+        // render don't double-bell; user keystroke or next dispatch clears.
+        if (next === 'idle' && prev !== 'idle') {
+          if (!pendingBellRef.current) {
+            bell();
+            pendingBellRef.current = true;
+          }
+          setWindowTitle('agon');
+        }
+        return next;
+      }
       catch { return prev; }
     });
-  }, []);
+  }, [bell,setWindowTitle,pendingBellRef]);
 
   const trackAbort = useCallback((abort:AbortController|null) => {
     if (activeAbortRef.current) {
@@ -986,6 +1004,11 @@ export function App() {
         dispatch({ type: 'warning', message: message } as any);
       }
       setReplState((prev: any) => (prev === 'idle') ? prev : cancelReplState({ state: prev }).state);
+      if (!pendingBellRef.current) {
+        bell();
+        pendingBellRef.current = true;
+      }
+      setWindowTitle('agon');
     }
     if (clearChat) {
       dispatch({ type: 'clear' } as any);
@@ -1032,6 +1055,10 @@ export function App() {
   const handleInputChange = useCallback((value:string) => {
     // Diagnostic: stamp keystroke arrival; read post-commit by the AGON_PERF effect (0 when off).
     keyT0Ref.current = perfNow();
+    // User is back at the keyboard — clear the pending bell so the NEXT await
+    // (after this turn) can ring again, and announce this new plan approval.
+    pendingBellRef.current = false;
+    awaitingPlanAnnouncedRef.current = '';
     // Swallow input while a choice question is active — the keyboard handler
     // resolves the choice on a single keypress.
     if (questionState && questionState.choices) {
@@ -1088,7 +1115,7 @@ export function App() {
     const updatedValue = nextValue.slice(0, change.start) + replacement + nextValue.slice(change.start + change.inserted.length);
     inputValueRef.current = updatedValue;
     setInputValue(updatedValue);
-  }, [slashPickerOpen,enginePickerOpen,modelPickerOpen,questionState,planModeQueued,autoModeQueued,livePaneVisible]);
+  }, [slashPickerOpen,enginePickerOpen,modelPickerOpen,questionState,planModeQueued,autoModeQueued,livePaneVisible,pendingBellRef]);
 
   const sendBtwMessage = useCallback((question:string) => {
     const q = (question ?? '').trim();
@@ -1184,6 +1211,9 @@ export function App() {
   const handleSubmit = useCallback(async (value:string) => {
     inputEpochRef.current += 1;
     let input = cleanSubmitValue(value);
+    // User is submitting a turn — allow the next await to ring again.
+    pendingBellRef.current = false;
+    awaitingPlanAnnouncedRef.current = '';
     if (!input) return;
     // Bare "/" → open slash picker flyout, don't dump text list
     if (input === '/') {
@@ -1298,6 +1328,8 @@ export function App() {
     const autoModeForTurn = autoModeQueued && input.trim() && !input.startsWith('/');
     if (planModeQueued) setPlanModeQueued(false);
     transition(startCommandReplState);
+    pendingBellRef.current = false;
+    setWindowTitle('\u25cf agon \u2014 running');
     dispatch({ type: 'separator' } as any);
     dispatch({ type: 'user-message', content: input } as any);
     const { text: cleanInput, images: detectedImages } = extractImagesFromInput(input, resolveWorkingDir());
@@ -1325,8 +1357,8 @@ export function App() {
         // Transition to idle so user can submit new commands while job runs
         // Strip stays active via jobList.some(j => j.state === 'running') check
         setReplState((prev: any) => prev === 'idle' ? prev : finishReplState({ state: prev }).state);
-        fn().then(() => { jobManager.complete(job.id); setJobList([...jobManager.list()]); })
-          .catch((err: any) => { jobManager.fail(job.id, err instanceof Error ? err.message : String(err)); setJobList([...jobManager.list()]); dispatch({ type: 'error', message: err instanceof Error ? err.message : String(err) } as any); });
+        fn().then(() => { jobManager.complete(job.id); setJobList([...jobManager.list()]); bell(); setWindowTitle('agon'); })
+          .catch((err: any) => { jobManager.fail(job.id, err instanceof Error ? err.message : String(err)); setJobList([...jobManager.list()]); dispatch({ type: 'error', message: err instanceof Error ? err.message : String(err) } as any); bell(); setWindowTitle('agon'); });
       },
       setMode, setPendingImages, setSessionEngines, setEnginePickerOpen, setModelPickerOpen, setModelPickerEntries, setModelPickerLoading, setCesarPickerOpen, setChatSession, setLastUndoToken, askQuestion, exit: () => process.exit(0),
       setModelPickerTargetEngine, setModelPickerInitialFilter, setModelPickerTitle, setModelPickerCliGroups,
@@ -1351,9 +1383,15 @@ export function App() {
     } catch (err: any) { dispatch({ type: 'error', message: err instanceof Error ? err.message : String(err) } as any); }
     finally {
       if (activeTurnRef.current?.input === input) activeTurnRef.current = null;
-      setReplState((prev: any) => prev === 'idle' ? prev : finishReplState({ state: prev }).state);
+      // Route through transition() so a normally-completed turn rings the
+      // done-bell. A direct setReplState here bypassed transition's bell hook,
+      // so the MAIN completion path (every non-job turn) was silent — only the
+      // status-dashboard / empty-mode-switch edge cases rang. transition's own
+      // `next==='idle' && prev!=='idle'` guard makes this a no-op (no bell) for
+      // job turns that already went idle at the runAsJob handoff above.
+      transition(finishReplState);
     }
-  }, [replState,dispatch,buildContext,mode,pendingImages,jobManager,loadedExtensions,extensionSkills,commandRegistry,eventBus,planModeQueued,autoModeQueued,setPersistentAutoMode,setActivePlanWrapped,outputBlocks,btwPanel,sendBtwMessage]);
+  }, [replState,dispatch,buildContext,mode,pendingImages,jobManager,loadedExtensions,extensionSkills,commandRegistry,eventBus,planModeQueued,autoModeQueued,setPersistentAutoMode,setActivePlanWrapped,outputBlocks,btwPanel,sendBtwMessage,pendingBellRef,awaitingPlanAnnouncedRef]);
 
   const handleReviewActionCb = useCallback((action:'apply'|'edit'|'reject'|'copy') => {
     if (!reviewEvent) {
@@ -1976,6 +2014,38 @@ export function App() {
     modeRef.current = mode;
   }, [mode]);
 
+  useEffect(() => {
+    // Awaiting-user-input: a question is open OR a plan needs approval.
+    // Ring the bell once and set the window title to the alert variant
+    // so the user notices the terminal even when it's backgrounded.
+    const planState = String(activePlan?.state ?? '');
+    const planAwaiting = planState === 'awaiting_approval';
+    const planId = String(activePlan?.id ?? '');
+    if (questionState || planAwaiting) {
+      // Gate per-plan-id: a fresh plan approval (different id) re-rings,
+      // but a re-render of the SAME awaiting_approval state is silent.
+      // The question path re-uses the same pendingBellRef so an open
+      // question and a plan-approval that land in the same render still
+      // produce a single bell.
+      const freshPlan = planAwaiting && planId && planId !== awaitingPlanAnnouncedRef.current;
+      if (!pendingBellRef.current || freshPlan) {
+        bell();
+        pendingBellRef.current = true;
+      }
+      if (freshPlan) awaitingPlanAnnouncedRef.current = planId;
+      setWindowTitle('\u25cf agon \u2014 input needed');
+    } else if (replState === 'idle') {
+      setWindowTitle('agon');
+    } else {
+      setWindowTitle('\u25cf agon \u2014 running');
+    }
+    // Track plan-approval state to avoid re-ringing on the same approval.
+    // Reset on the next user keystroke (inputEpoch bump) or next dispatch.
+    if (!planAwaiting) {
+      awaitingPlanAnnouncedRef.current = '';
+    }
+  }, [questionState,activePlan,replState,bell,setWindowTitle,pendingBellRef,awaitingPlanAnnouncedRef]);
+
   useEffect(() => {
     mouseSelectionRef.current = mouseSelection;
   }, [mouseSelection]);
@@ -2836,22 +2906,22 @@ export function App() {
   );
 }
 
-// @kern-source: app:84
+// @kern-source: app:85
 export const _activeAborts: Set<AbortController> = new Set<AbortController>();
 
-// @kern-source: app:86
+// @kern-source: app:87
 export const _cancelCallback: { fn: (() => void) | null } = { fn: null };
 
-// @kern-source: app:88
+// @kern-source: app:89
 export const _cesarSessionRef: { session: PersistentSession | null } = { session: null };
 
-// @kern-source: app:90
+// @kern-source: app:91
 export const _lastSigintAt: { value: number } = { value: 0 };
 
-// @kern-source: app:92
+// @kern-source: app:93
 export const _pauseState: { value: PauseState | null } = { value: null };
 
-// @kern-source: app:2582
+// @kern-source: app:2656
 export async function startRepl(): Promise<void> {
   ensureAgonHome();
   ensureCurrentWorkspace(process.cwd());
diff --git a/packages/cli/src/kern/lib/terminal-notify.kern b/packages/cli/src/kern/lib/terminal-notify.kern
new file mode 100644
index 00000000..9e9bade8
--- /dev/null
+++ b/packages/cli/src/kern/lib/terminal-notify.kern
@@ -0,0 +1,19 @@
+import from="node:process" names="stdout"
+
+fn name=bell returns=void export=true
+  doc "Ring the terminal bell once on stdout. No-op when stdio is not a TTY (piped runs, CI) or when AGON_NO_BELL is set in the environment, so we never break automation or annoy users who opted out."
+  handler lang="kern"
+    if cond="!stdout.isTTY"
+      return
+    if cond="process.env.AGON_NO_BELL"
+      return
+    do value="stdout.write('\u0007')"
+
+fn name=setWindowTitle params="label:string" returns=void export=true
+  doc "Set the terminal window/tab title via the OSC 0 ; <label> BEL sequence. No-op when stdio is not a TTY or when AGON_NO_TITLE is set, so piped runs and CI stay clean and users can opt out."
+  handler lang="kern"
+    if cond="!stdout.isTTY"
+      return
+    if cond="process.env.AGON_NO_TITLE"
+      return
+    do value="stdout.write('\u001b]0;' + label + '\u0007')"
diff --git a/packages/cli/src/kern/surfaces/app.kern b/packages/cli/src/kern/surfaces/app.kern
index 89f0aefb..9e29bc11 100644
--- a/packages/cli/src/kern/surfaces/app.kern
+++ b/packages/cli/src/kern/surfaces/app.kern
@@ -67,6 +67,7 @@ import from="../../input-utils.js" names="isTerminalFocusReport"
 import from="../../stable-input.js" names="useStableInput"
 import from="../blocks/rich-text.js" names="parseProseToRichLines"
 import from="../services/update-check.js" names="checkForUpdate,loadDismissedVersion,saveDismissedVersion,isLinkedDevInstall"
+import from="../lib/terminal-notify.js" names="bell,setWindowTitle"
 // ── Domain-specific helper imports (refactored from monolithic app-helpers) ──
 import from="./app-composer.js" names="COMPOSER_HISTORY_LIMIT,loadComposerInputHistory,saveComposerInputHistory"
 import from="./app-tool-detail.js" names="toolDetailViewportRows,findLatestToolDetailEvent,findLatestToolEvent,findLatestFailedToolEvent,buildFailedToolRetryDraft,buildToolDetailView"
@@ -196,6 +197,13 @@ screen name=App target=ink
   ref name=activeAbortRef type="AbortController|null" initial="null"
   ref name=activeTurnRef type="{ input:string; engineId:string; retried:boolean }|null" initial="null"
   ref name=lastActivityTimeRef type=number initial="Date.now()"
+  // Single-shot bell guard: set true when we ring on done/await, cleared on
+  // the next user keystroke or next dispatch. Prevents double-bell when both
+  // a question and a plan-approval land in the same render.
+  ref name=pendingBellRef type=boolean initial=false
+  // Tracks whether we've already announced a given plan-approval state so a
+  // re-render of the same awaiting_approval state doesn't re-ring the bell.
+  ref name=awaitingPlanAnnouncedRef type=string initial="''"
   ref name=blockArchivePathRef type=string initial="makeBlockArchivePath(Date.now())"
   ref name=nestedCtrlShortcutRef type="{ key: string; at: number }" initial="{ key: '', at: 0 }"
   ref name=displayRowCountRef type=number initial=0
@@ -470,6 +478,39 @@ screen name=App target=ink
     handler lang="kern"
       assign target="modeRef.current" value="mode"
 
+  effect deps="questionState,activePlan,replState,bell,setWindowTitle,pendingBellRef,awaitingPlanAnnouncedRef"
+    handler <<<
+      // Awaiting-user-input: a question is open OR a plan needs approval.
+      // Ring the bell once and set the window title to the alert variant
+      // so the user notices the terminal even when it's backgrounded.
+      const planState = String(activePlan?.state ?? '');
+      const planAwaiting = planState === 'awaiting_approval';
+      const planId = String(activePlan?.id ?? '');
+      if (questionState || planAwaiting) {
+        // Gate per-plan-id: a fresh plan approval (different id) re-rings,
+        // but a re-render of the SAME awaiting_approval state is silent.
+        // The question path re-uses the same pendingBellRef so an open
+        // question and a plan-approval that land in the same render still
+        // produce a single bell.
+        const freshPlan = planAwaiting && planId && planId !== awaitingPlanAnnouncedRef.current;
+        if (!pendingBellRef.current || freshPlan) {
+          bell();
+          pendingBellRef.current = true;
+        }
+        if (freshPlan) awaitingPlanAnnouncedRef.current = planId;
+        setWindowTitle('\u25cf agon \u2014 input needed');
+      } else if (replState === 'idle') {
+        setWindowTitle('agon');
+      } else {
+        setWindowTitle('\u25cf agon \u2014 running');
+      }
+      // Track plan-approval state to avoid re-ringing on the same approval.
+      // Reset on the next user keystroke (inputEpoch bump) or next dispatch.
+      if (!planAwaiting) {
+        awaitingPlanAnnouncedRef.current = '';
+      }
+    >>>
+
   effect deps="mouseSelection"
     handler lang="kern"
       assign target="mouseSelectionRef.current" value="mouseSelection"
@@ -665,10 +706,24 @@ screen name=App target=ink
       };
     >>>
 
-  callback name=transition params="fn:any"
+  callback name=transition params="fn:any" deps="bell,setWindowTitle,pendingBellRef"
     handler <<<
       setReplState((prev: any) => {
-        try { return fn({ state: prev }).state; }
+        try {
+          const next = fn({ state: prev }).state;
+          // Ring once and reset the window title when we return to idle
+          // (finishReplState / cancelReplState both end a run). Guard with
+          // pendingBellRef so a question and a finishReplState in the same
+          // render don't double-bell; user keystroke or next dispatch clears.
+          if (next === 'idle' && prev !== 'idle') {
+            if (!pendingBellRef.current) {
+              bell();
+              pendingBellRef.current = true;
+            }
+            setWindowTitle('agon');
+          }
+          return next;
+        }
         catch { return prev; }
       });
     >>>
@@ -902,6 +957,10 @@ screen name=App target=ink
         if cond="message"
           do value="dispatch({ type: 'warning', message } as any)"
         do value="setReplState((prev: any) => prev === 'idle' ? prev : cancelReplState({ state: prev }).state)"
+        if cond="!pendingBellRef.current"
+          do value="bell()"
+          assign target="pendingBellRef.current" value="true"
+        do value="setWindowTitle('agon')"
       if cond="clearChat"
         do value="dispatch({ type: 'clear' } as any)"
 
@@ -1141,10 +1200,14 @@ screen name=App target=ink
       assign target="pendingPasteTransformRef.current" value="replacement.length > 0"
       return value="replacement"
 
-  callback name=handleInputChange params="value:string" deps="slashPickerOpen,enginePickerOpen,modelPickerOpen,questionState,planModeQueued,autoModeQueued,livePaneVisible"
+  callback name=handleInputChange params="value:string" deps="slashPickerOpen,enginePickerOpen,modelPickerOpen,questionState,planModeQueued,autoModeQueued,livePaneVisible,pendingBellRef"
     handler lang="kern"
       comment raw="// Diagnostic: stamp keystroke arrival; read post-commit by the AGON_PERF effect (0 when off)."
       assign target="keyT0Ref.current" value="perfNow()"
+      comment raw="// User is back at the keyboard — clear the pending bell so the NEXT await"
+      comment raw="// (after this turn) can ring again, and announce this new plan approval."
+      assign target="pendingBellRef.current" value="false"
+      assign target="awaitingPlanAnnouncedRef.current" value="''"
       comment raw="// Swallow input while a choice question is active — the keyboard handler"
       comment raw="// resolves the choice on a single keypress."
       if cond="questionState && questionState.choices"
@@ -1287,10 +1350,13 @@ ${streamCtx ? 'Recent live output from the running task:\n' + streamCtx + '\n\n'
       });
     >>>
 
-  callback name=handleSubmit async=true params="value:string" deps="replState,dispatch,buildContext,mode,pendingImages,jobManager,loadedExtensions,extensionSkills,commandRegistry,eventBus,planModeQueued,autoModeQueued,setPersistentAutoMode,setActivePlanWrapped,outputBlocks,btwPanel,sendBtwMessage"
+  callback name=handleSubmit async=true params="value:string" deps="replState,dispatch,buildContext,mode,pendingImages,jobManager,loadedExtensions,extensionSkills,commandRegistry,eventBus,planModeQueued,autoModeQueued,setPersistentAutoMode,setActivePlanWrapped,outputBlocks,btwPanel,sendBtwMessage,pendingBellRef,awaitingPlanAnnouncedRef"
     handler <<<
       inputEpochRef.current += 1;
       let input = cleanSubmitValue(value);
+      // User is submitting a turn — allow the next await to ring again.
+      pendingBellRef.current = false;
+      awaitingPlanAnnouncedRef.current = '';
       if (!input) return;
       // Bare "/" → open slash picker flyout, don't dump text list
       if (input === '/') {
@@ -1405,6 +1471,8 @@ ${streamCtx ? 'Recent live output from the running task:\n' + streamCtx + '\n\n'
       const autoModeForTurn = autoModeQueued && input.trim() && !input.startsWith('/');
       if (planModeQueued) setPlanModeQueued(false);
       transition(startCommandReplState);
+      pendingBellRef.current = false;
+      setWindowTitle('\u25cf agon \u2014 running');
       dispatch({ type: 'separator' } as any);
       dispatch({ type: 'user-message', content: input } as any);
       const { text: cleanInput, images: detectedImages } = extractImagesFromInput(input, resolveWorkingDir());
@@ -1432,8 +1500,8 @@ ${streamCtx ? 'Recent live output from the running task:\n' + streamCtx + '\n\n'
           // Transition to idle so user can submit new commands while job runs
           // Strip stays active via jobList.some(j => j.state === 'running') check
           setReplState((prev: any) => prev === 'idle' ? prev : finishReplState({ state: prev }).state);
-          fn().then(() => { jobManager.complete(job.id); setJobList([...jobManager.list()]); })
-            .catch((err: any) => { jobManager.fail(job.id, err instanceof Error ? err.message : String(err)); setJobList([...jobManager.list()]); dispatch({ type: 'error', message: err instanceof Error ? err.message : String(err) } as any); });
+          fn().then(() => { jobManager.complete(job.id); setJobList([...jobManager.list()]); bell(); setWindowTitle('agon'); })
+            .catch((err: any) => { jobManager.fail(job.id, err instanceof Error ? err.message : String(err)); setJobList([...jobManager.list()]); dispatch({ type: 'error', message: err instanceof Error ? err.message : String(err) } as any); bell(); setWindowTitle('agon'); });
         },
         setMode, setPendingImages, setSessionEngines, setEnginePickerOpen, setModelPickerOpen, setModelPickerEntries, setModelPickerLoading, setCesarPickerOpen, setChatSession, setLastUndoToken, askQuestion, exit: () => process.exit(0),
         setModelPickerTargetEngine, setModelPickerInitialFilter, setModelPickerTitle, setModelPickerCliGroups,
@@ -1458,7 +1526,13 @@ ${streamCtx ? 'Recent live output from the running task:\n' + streamCtx + '\n\n'
       } catch (err: any) { dispatch({ type: 'error', message: err instanceof Error ? err.message : String(err) } as any); }
       finally {
         if (activeTurnRef.current?.input === input) activeTurnRef.current = null;
-        setReplState((prev: any) => prev === 'idle' ? prev : finishReplState({ state: prev }).state);
+        // Route through transition() so a normally-completed turn rings the
+        // done-bell. A direct setReplState here bypassed transition's bell hook,
+        // so the MAIN completion path (every non-job turn) was silent — only the
+        // status-dashboard / empty-mode-switch edge cases rang. transition's own
+        // `next==='idle' && prev!=='idle'` guard makes this a no-op (no bell) for
+        // job turns that already went idle at the runAsJob handoff above.
+        transition(finishReplState);
       }
     >>>
 
diff --git a/tests/unit/terminal-notify.test.ts b/tests/unit/terminal-notify.test.ts
new file mode 100644
index 00000000..2a9143e7
--- /dev/null
+++ b/tests/unit/terminal-notify.test.ts
@@ -0,0 +1,76 @@
+import { describe, it, expect, vi, afterEach } from 'vitest';
+// Source of truth is packages/cli/src/kern/lib/terminal-notify.kern;
+// the generated/*.js below is regenerated from it (npm run kern:compile).
+import { bell, setWindowTitle } from '../../packages/cli/src/generated/lib/terminal-notify.js';
+
+const BEL = String.fromCharCode(7); // 0x07 BEL
+const OSC = String.fromCharCode(27) + ']0;'; // ESC ] 0 ; window-title opener
+
+describe('terminal-notify', () => {
+  const origIsTTY = process.stdout.isTTY;
+  const origNoBell = process.env.AGON_NO_BELL;
+  const origNoTitle = process.env.AGON_NO_TITLE;
+
+  afterEach(() => {
+    (process.stdout as any).isTTY = origIsTTY;
+    if (origNoBell === undefined) delete process.env.AGON_NO_BELL;
+    else process.env.AGON_NO_BELL = origNoBell;
+    if (origNoTitle === undefined) delete process.env.AGON_NO_TITLE;
+    else process.env.AGON_NO_TITLE = origNoTitle;
+    vi.restoreAllMocks();
+  });
+
+  describe('bell', () => {
+    it('writes BEL (0x07) exactly once when TTY and AGON_NO_BELL unset', () => {
+      (process.stdout as any).isTTY = true;
+      delete process.env.AGON_NO_BELL;
+      const spy = vi.spyOn(process.stdout, 'write').mockReturnValue(true);
+      bell();
+      expect(spy).toHaveBeenCalledTimes(1);
+      expect(spy).toHaveBeenCalledWith(BEL);
+    });
+
+    it('writes nothing when AGON_NO_BELL is set', () => {
+      (process.stdout as any).isTTY = true;
+      process.env.AGON_NO_BELL = '1';
+      const spy = vi.spyOn(process.stdout, 'write').mockReturnValue(true);
+      bell();
+      expect(spy).not.toHaveBeenCalled();
+    });
+
+    it('writes nothing when stdout is not a TTY (piped/CI)', () => {
+      (process.stdout as any).isTTY = false;
+      delete process.env.AGON_NO_BELL;
+      const spy = vi.spyOn(process.stdout, 'write').mockReturnValue(true);
+      bell();
+      expect(spy).not.toHaveBeenCalled();
+    });
+  });
+
+  describe('setWindowTitle', () => {
+    it('writes the OSC 0 ; <label> BEL sequence when TTY and AGON_NO_TITLE unset', () => {
+      (process.stdout as any).isTTY = true;
+      delete process.env.AGON_NO_TITLE;
+      const spy = vi.spyOn(process.stdout, 'write').mockReturnValue(true);
+      setWindowTitle('agon - running');
+      expect(spy).toHaveBeenCalledTimes(1);
+      expect(spy).toHaveBeenCalledWith(`${OSC}agon - running${BEL}`);
+    });
+
+    it('no-ops when AGON_NO_TITLE is set', () => {
+      (process.stdout as any).isTTY = true;
+      process.env.AGON_NO_TITLE = '1';
+      const spy = vi.spyOn(process.stdout, 'write').mockReturnValue(true);
+      setWindowTitle('agon');
+      expect(spy).not.toHaveBeenCalled();
+    });
+
+    it('no-ops when stdout is not a TTY (piped/CI)', () => {
+      (process.stdout as any).isTTY = false;
+      delete process.env.AGON_NO_TITLE;
+      const spy = vi.spyOn(process.stdout, 'write').mockReturnValue(true);
+      setWindowTitle('agon');
+      expect(spy).not.toHaveBeenCalled();
+    });
+  });
+});