From c80edb98377c1f91bc479a4840b51dcbae191c1c Mon Sep 17 00:00:00 2001 From: Chris Izatt Date: Thu, 4 Jun 2026 01:51:00 +0100 Subject: [PATCH 01/33] refine(bug-report): home-abbreviate the captured workspace path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The auto-captured Workspace field now shows the home-relative form (`~/Documents/AGBench`) instead of the full absolute path — in the read-only preview, the saved bug-reports.md, and the pre-filled GitHub issue. Cleaner to read, and keeps the local home/user path prefix out of a shared issue. Extracts `tildifyHomePath` from ActivityPathDisplay (reusing the existing /Users// -> ~/ logic, behaviour-preserving for activity rows) + 6 unit tests. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../src/components/BugReportSheet.test.tsx | 5 ++- .../src/components/BugReportSheet.tsx | 7 +++- .../src/lib/ActivityPathDisplay.test.ts | 32 ++++++++++++++++++- src/renderer/src/lib/ActivityPathDisplay.ts | 23 +++++++++---- 4 files changed, 58 insertions(+), 9 deletions(-) diff --git a/src/renderer/src/components/BugReportSheet.test.tsx b/src/renderer/src/components/BugReportSheet.test.tsx index c4aa751e..b6e9a11f 100644 --- a/src/renderer/src/components/BugReportSheet.test.tsx +++ b/src/renderer/src/components/BugReportSheet.test.tsx @@ -153,7 +153,10 @@ describe('BugReportSheet', () => { // Values render verbatim — these are the auto-captured strings. expect(html).toContain('1.0.1') expect(html).toContain('codex') - expect(html).toContain('/Users/dev/projects/agbench') + // Workspace path is home-abbreviated (~/) so a reporter's OS username + // never appears in the preview or the pre-filled (public) GitHub issue. + expect(html).toContain('~/projects/agbench') + expect(html).not.toContain('/Users/dev/projects/agbench') expect(html).toContain('default') expect(html).toContain('4 participants') }) diff --git a/src/renderer/src/components/BugReportSheet.tsx b/src/renderer/src/components/BugReportSheet.tsx index c8e3b8c4..9a16d59e 100644 --- a/src/renderer/src/components/BugReportSheet.tsx +++ b/src/renderer/src/components/BugReportSheet.tsx @@ -1,5 +1,6 @@ import React, { useCallback, useEffect, useMemo, useRef, useState } from 'react' import { buildGitHubIssueUrl } from '../lib/githubIssueUrl' +import { tildifyHomePath } from '../lib/ActivityPathDisplay' /** * BugReportSheet — inline bug-report capture for AGBench testers. @@ -240,8 +241,12 @@ export function BugReportSheet({ return () => window.cancelAnimationFrame(frame) }, [open, initialSurface]) + // Home-abbreviate the workspace path (`/Users//…` → `~/…`) so a + // reporter's OS username never lands in the read-only preview, the local + // bug-reports.md, or the pre-filled PUBLIC GitHub issue. The project folder + // stays visible for triage; only the home/user prefix is stripped. const workspaceLabel = useMemo( - () => currentWorkspacePath || '(global chat)', + () => (currentWorkspacePath ? tildifyHomePath(currentWorkspacePath) : '(global chat)'), [currentWorkspacePath] ) diff --git a/src/renderer/src/lib/ActivityPathDisplay.test.ts b/src/renderer/src/lib/ActivityPathDisplay.test.ts index 0bcf38ca..9b400e9f 100644 --- a/src/renderer/src/lib/ActivityPathDisplay.test.ts +++ b/src/renderer/src/lib/ActivityPathDisplay.test.ts @@ -1,5 +1,5 @@ import { describe, it, expect } from 'vitest' -import { displayPathRelativeToWorkspace } from './ActivityPathDisplay' +import { displayPathRelativeToWorkspace, tildifyHomePath } from './ActivityPathDisplay' describe('displayPathRelativeToWorkspace', () => { describe('workspace-relative truncation', () => { @@ -160,3 +160,33 @@ describe('displayPathRelativeToWorkspace', () => { }) }) }) + +describe('tildifyHomePath', () => { + it('collapses a macOS home prefix to ~/ (strips the OS username)', () => { + expect(tildifyHomePath('/Users/bob/Documents/AGBench')).toBe('~/Documents/AGBench') + }) + + it('keeps the project folder + intermediate segments', () => { + expect(tildifyHomePath('/Users/alice/code/proj/src')).toBe('~/code/proj/src') + }) + + it('returns a non-home path unchanged (nothing to strip)', () => { + expect(tildifyHomePath('/Volumes/External/proj')).toBe('/Volumes/External/proj') + }) + + it('does not collapse bare `/Users` (no user segment)', () => { + expect(tildifyHomePath('/Users')).toBe('/Users') + }) + + it('trims surrounding whitespace before collapsing', () => { + expect(tildifyHomePath(' /Users/bob/x ')).toBe('~/x') + }) + + it('returns "" for empty / nullish / non-string inputs', () => { + expect(tildifyHomePath('')).toBe('') + expect(tildifyHomePath(' ')).toBe('') + expect(tildifyHomePath(null)).toBe('') + expect(tildifyHomePath(undefined)).toBe('') + expect(tildifyHomePath(42 as unknown as string)).toBe('') + }) +}) diff --git a/src/renderer/src/lib/ActivityPathDisplay.ts b/src/renderer/src/lib/ActivityPathDisplay.ts index 70b9efab..b799c52f 100644 --- a/src/renderer/src/lib/ActivityPathDisplay.ts +++ b/src/renderer/src/lib/ActivityPathDisplay.ts @@ -40,6 +40,22 @@ function startsWithSegment(haystack: string, prefix: string): boolean { return nextChar === '/' || nextChar === '\\' } +/** + * Collapse a macOS home-directory prefix (`/Users//…`) to `~/…`, + * stripping the OS username. Used for compact display AND to keep a user's + * home path — which embeds their account name — out of anything shared + * publicly, e.g. the pre-filled GitHub issue produced by the bug reporter. + * Non-home paths (and bare `/Users` with no user segment) are returned + * trimmed but otherwise unchanged. Empty / nullish / non-string → ''. + */ +export function tildifyHomePath(filePath: string | undefined | null): string { + if (!filePath || typeof filePath !== 'string') return '' + const trimmed = filePath.trim() + if (!trimmed) return '' + const homeMatch = trimmed.match(HOME_PREFIX_RE) + return homeMatch ? `~/${trimmed.slice(homeMatch[0].length)}` : trimmed +} + /** * Returns a display-friendly path: * - workspace-relative when the file lives under `workspacePath` @@ -69,10 +85,5 @@ export function displayPathRelativeToWorkspace( } } - const homeMatch = trimmedPath.match(HOME_PREFIX_RE) - if (homeMatch) { - return `~/${trimmedPath.slice(homeMatch[0].length)}` - } - - return trimmedPath + return tildifyHomePath(trimmedPath) } From 5fa16b5db65007d1ff6e556bb75ce86bff272b53 Mon Sep 17 00:00:00 2001 From: Chris Izatt Date: Thu, 4 Jun 2026 02:06:11 +0100 Subject: [PATCH 02/33] polish(bug-report): title counter, animated saved-check, crisper severity chips - Live x/140 character counter on the title (warns past 120 chars). - The 'Report saved' confirmation gains a popped checkmark + fade-in. - Selected severity chip lifts with a soft shadow + bolder label. All motion honours [data-reduce-motion='true']. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../assets/css/08-theme-picker-overrides.css | 75 ++++++++++++++++++- .../src/components/BugReportSheet.test.tsx | 3 + .../src/components/BugReportSheet.tsx | 19 ++++- 3 files changed, 93 insertions(+), 4 deletions(-) diff --git a/src/renderer/src/assets/css/08-theme-picker-overrides.css b/src/renderer/src/assets/css/08-theme-picker-overrides.css index c1d1255b..5372507b 100644 --- a/src/renderer/src/assets/css/08-theme-picker-overrides.css +++ b/src/renderer/src/assets/css/08-theme-picker-overrides.css @@ -2427,6 +2427,21 @@ .bug-report-sheet-required { color: #f0b050; } +.bug-report-sheet-label-row { + display: flex; + align-items: baseline; + justify-content: space-between; + gap: 8px; +} +.bug-report-sheet-char-counter { + font-size: 0.72em; + font-variant-numeric: tabular-nums; + color: var(--text-tertiary); + transition: color 140ms ease; +} +.bug-report-sheet-char-counter-warn { + color: #f0b050; +} .bug-report-sheet-input, .bug-report-sheet-textarea { width: 100%; @@ -2471,7 +2486,8 @@ border: 1px solid rgba(255, 255, 255, 0.1); background: rgba(255, 255, 255, 0.03); cursor: pointer; - transition: border-color 140ms ease, background 140ms ease; + transition: border-color 140ms ease, background 140ms ease, transform 140ms ease, + box-shadow 140ms ease; } .bug-report-sheet-severity-chip:hover { border-color: rgba(255, 255, 255, 0.2); @@ -2511,6 +2527,15 @@ border-color: color-mix(in srgb, #f85149 55%, transparent); background: color-mix(in srgb, #f85149 10%, rgba(255, 255, 255, 0.02)); } +/* Selected severity reads crisper: a subtle lift + soft shadow + bolder + * label, layered on top of the per-severity tint above. */ +.bug-report-sheet-severity-chip-checked { + transform: translateY(-1px); + box-shadow: 0 4px 12px rgba(0, 0, 0, 0.16); +} +.bug-report-sheet-severity-chip-checked .bug-report-sheet-severity-chip-label { + font-weight: 700; +} .bug-report-sheet-context { display: flex; flex-direction: column; @@ -2580,14 +2605,62 @@ border-radius: 4px; } .bug-report-sheet-confirmation { + display: inline-flex; + align-items: center; + gap: 6px; font-size: 0.86em; font-weight: 600; color: #7fdc9a; + animation: bug-report-confirm-in 240ms ease; +} +.bug-report-sheet-confirmation-check { + display: inline-flex; + align-items: center; + justify-content: center; + width: 16px; + height: 16px; + border-radius: 50%; + background: color-mix(in srgb, #3fb950 24%, transparent); + color: #8ef0a8; + font-size: 0.72em; + line-height: 1; + animation: bug-report-check-pop 320ms cubic-bezier(0.2, 0.8, 0.2, 1.4); +} +@keyframes bug-report-confirm-in { + from { + opacity: 0; + transform: translateY(2px); + } + to { + opacity: 1; + transform: none; + } +} +@keyframes bug-report-check-pop { + 0% { + transform: scale(0); + } + 60% { + transform: scale(1.15); + } + 100% { + transform: scale(1); + } } .bug-report-sheet-footer-actions { display: flex; gap: 8px; } +/* Honour the in-app reduce-motion setting — no pop/slide for the saved + * confirmation, its check, or the selected-chip lift. */ +[data-reduce-motion='true'] .bug-report-sheet-confirmation, +[data-reduce-motion='true'] .bug-report-sheet-confirmation-check, +[data-reduce-motion='true'] .bug-report-sheet-severity-chip, +[data-reduce-motion='true'] .bug-report-sheet-severity-chip-checked { + animation: none; + transition: none; + transform: none; +} /* Light-mode tweaks. Mirrors the same pattern as FirstLaunchSheet — * keep the surface-1 opaque white and tone down the rgba-baked edges diff --git a/src/renderer/src/components/BugReportSheet.test.tsx b/src/renderer/src/components/BugReportSheet.test.tsx index b6e9a11f..00e3bb9f 100644 --- a/src/renderer/src/components/BugReportSheet.test.tsx +++ b/src/renderer/src/components/BugReportSheet.test.tsx @@ -61,6 +61,9 @@ describe('BugReportSheet', () => { // is empty — that's the strongest signal that the field is // required (the form refuses to submit until it's populated). expect(html).toMatch(/]*type="submit"[^>]*disabled/) + // Live character counter for the 140-char title cap (starts at 0/140). + expect(html).toContain('bug-report-sheet-char-counter') + expect(html).toContain('0/140') }) it('renders all four severity options with "minor" pre-selected', () => { diff --git a/src/renderer/src/components/BugReportSheet.tsx b/src/renderer/src/components/BugReportSheet.tsx index 9a16d59e..1c6ae128 100644 --- a/src/renderer/src/components/BugReportSheet.tsx +++ b/src/renderer/src/components/BugReportSheet.tsx @@ -413,9 +413,19 @@ export function BugReportSheet({
- +
+ + 120 ? ' bug-report-sheet-char-counter-warn' : '' + }`} + aria-hidden + > + {title.length}/140 + +
{confirmation ? ( + + ✓ + {confirmation} ) : ( From 63575968c9fc5932efe7cb764e1905bc4cf6f0db Mon Sep 17 00:00:00 2001 From: Chris Izatt Date: Thu, 4 Jun 2026 02:15:58 +0100 Subject: [PATCH 03/33] feat(empty-states): warmer no-chats / no-runs / no-workspaces with the ghost mascot Adds a reusable MascotGhost glyph (AppChromeSymbols, a leaf module) and uses it to warm three empty states: sidebar 'No chats yet' (centred ghost + a one-line nudge), sidebar 'No active runs' (small, quiet inline ghost respecting its deliberately-quiet design), and Settings 'No workspaces yet' (centred ghost above the existing hint). Co-Authored-By: Claude Opus 4.8 (1M context) --- src/renderer/src/assets/css/01-sidebar.css | 7 +++++ .../src/assets/css/04-settings-controls.css | 8 +++++- .../src/assets/css/05-polish-fx-layouts.css | 10 +++++++ .../src/components/ActiveRunsSection.tsx | 6 +++- .../src/components/AppChromeSymbols.tsx | 28 +++++++++++++++++++ src/renderer/src/components/SettingsPanel.tsx | 2 ++ src/renderer/src/components/Sidebar.tsx | 7 ++++- 7 files changed, 65 insertions(+), 3 deletions(-) diff --git a/src/renderer/src/assets/css/01-sidebar.css b/src/renderer/src/assets/css/01-sidebar.css index 225b64b1..d89b7a1f 100644 --- a/src/renderer/src/assets/css/01-sidebar.css +++ b/src/renderer/src/assets/css/01-sidebar.css @@ -106,10 +106,17 @@ /* Quiet empty-state when the persistent Active runs section has nothing * running — keeps the section anchored under Search without shouting. */ .sidebar-active-runs-empty { + display: flex; + align-items: center; + gap: 6px; padding: 3px var(--space-sm, 8px); font-size: var(--font-size-xs); color: color-mix(in srgb, var(--sidebar-text-secondary, var(--text-secondary)) 62%, transparent); } +.sidebar-active-runs-empty .mascot-ghost { + flex: none; + opacity: 0.65; +} /* * 1.0.5-SB5 — Drag-and-drop pin affordances. diff --git a/src/renderer/src/assets/css/04-settings-controls.css b/src/renderer/src/assets/css/04-settings-controls.css index 535524ab..820918ef 100644 --- a/src/renderer/src/assets/css/04-settings-controls.css +++ b/src/renderer/src/assets/css/04-settings-controls.css @@ -755,14 +755,20 @@ .settings-workspaces-empty { display: flex; flex-direction: column; + align-items: center; + text-align: center; gap: 6px; - padding: 16px 18px; + padding: 18px; border: 1px dashed color-mix(in srgb, var(--text-primary) 14%, transparent); border-radius: 10px; background: color-mix(in srgb, var(--text-primary) 3%, transparent); color: var(--text-secondary); font-size: var(--font-size-sm, 13px); } +.settings-workspaces-empty .mascot-ghost { + color: color-mix(in srgb, var(--text-primary) 34%, transparent); + margin-bottom: 2px; +} .settings-workspaces-empty strong { color: var(--text-primary); diff --git a/src/renderer/src/assets/css/05-polish-fx-layouts.css b/src/renderer/src/assets/css/05-polish-fx-layouts.css index 79e81f46..fe5c102c 100644 --- a/src/renderer/src/assets/css/05-polish-fx-layouts.css +++ b/src/renderer/src/assets/css/05-polish-fx-layouts.css @@ -1982,6 +1982,16 @@ font-size: var(--font-size-xs); } +/* Friendlier empty states — a centred brand ghost above the copy. */ +.app-sidebar .sidebar-empty-state--ghost { + justify-items: center; + text-align: center; + gap: 6px; +} +.app-sidebar .sidebar-empty-state--ghost .mascot-ghost { + color: color-mix(in srgb, var(--text-primary) 34%, transparent); +} + .app-sidebar .run-summary { margin: 0 var(--space-md) var(--space-sm); border-radius: 14px; diff --git a/src/renderer/src/components/ActiveRunsSection.tsx b/src/renderer/src/components/ActiveRunsSection.tsx index 024c1b3c..c6b6e492 100644 --- a/src/renderer/src/components/ActiveRunsSection.tsx +++ b/src/renderer/src/components/ActiveRunsSection.tsx @@ -1,4 +1,5 @@ import { useCallback, useEffect, useMemo, useState, type JSX } from 'react' +import { MascotGhost } from './AppChromeSymbols' import type { ChatRecord, ProviderId, @@ -117,7 +118,10 @@ export function ActiveRunsSection({ {!collapsed && (
{visibleJobs.length === 0 && ( -
No active runs
+
+ + No active runs +
)} {visibleJobs.map((job) => { const chat = job.chatId ? chatById.get(job.chatId) || null : null diff --git a/src/renderer/src/components/AppChromeSymbols.tsx b/src/renderer/src/components/AppChromeSymbols.tsx index 433542ca..6e947a0f 100644 --- a/src/renderer/src/components/AppChromeSymbols.tsx +++ b/src/renderer/src/components/AppChromeSymbols.tsx @@ -131,6 +131,34 @@ export function GhostCompanionIcon() { ) } +/** + * Standalone brand-ghost glyph for empty states — same silhouette as + * GhostCompanionIcon but without the corner-symbol wrapper, so it can be + * sized via the `size` prop and tinted by the parent's `color` + * (currentColor stroke). Used by the friendlier sidebar / settings + * empty states. + */ +export function MascotGhost({ size = 32 }: { size?: number }) { + return ( + + + + + + ) +} + export function SkyWeatherIcon() { return ( diff --git a/src/renderer/src/components/SettingsPanel.tsx b/src/renderer/src/components/SettingsPanel.tsx index b44a6b5f..e5fa241a 100644 --- a/src/renderer/src/components/SettingsPanel.tsx +++ b/src/renderer/src/components/SettingsPanel.tsx @@ -1,4 +1,5 @@ import React, { useEffect, useState } from 'react' +import { MascotGhost } from './AppChromeSymbols' import type { AgenticNetworkPolicy, AgenticServiceId, @@ -4718,6 +4719,7 @@ export function SettingsPanel({
{workspaces.length === 0 ? (
+ No workspaces yet. Use Add workspace above to point AGBench at your first project folder. diff --git a/src/renderer/src/components/Sidebar.tsx b/src/renderer/src/components/Sidebar.tsx index ced8b5e6..7945271d 100644 --- a/src/renderer/src/components/Sidebar.tsx +++ b/src/renderer/src/components/Sidebar.tsx @@ -7,6 +7,7 @@ import { type MouseEvent, type ReactNode } from 'react' +import { MascotGhost } from './AppChromeSymbols' import type { WorkspaceRecord, ChatRecord, @@ -2801,7 +2802,11 @@ export function Sidebar({ ) })} {visibleGlobalChats.length === 0 && !isSidebarSearchActive && ( -
No chats yet.
+
+ + No chats yet + Hit + above to start one. +
)}
)} From aba7aa55e0bed7b953cf8bc980333e41e8b45367 Mon Sep 17 00:00:00 2001 From: Chris Izatt Date: Thu, 4 Jun 2026 02:21:37 +0100 Subject: [PATCH 04/33] polish(welcome): crisper dashboard tabs + mascot on the empty-range state - Dashboard tabs preview their underline faintly on hover (was colour-only) and gain a keyboard :focus-visible accent underline. - The 'No activity in the last 30 days' empty-range card now leads with the ghost mascot, consistent with the sidebar/settings empty states. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/renderer/src/App.tsx | 2 ++ .../assets/css/03-composer-welcome-activity.css | 17 +++++++++++++++++ 2 files changed, 19 insertions(+) diff --git a/src/renderer/src/App.tsx b/src/renderer/src/App.tsx index 50dddd5c..b7dc5916 100644 --- a/src/renderer/src/App.tsx +++ b/src/renderer/src/App.tsx @@ -124,6 +124,7 @@ import { FolderSymbolIcon, GhostCompanionIcon, LinkCircleSymbolIcon, + MascotGhost, ModelSymbolIcon, OverviewSymbolIcon, PermissionSymbolIcon, @@ -878,6 +879,7 @@ function WelcomeUsageDashboard({ replaces the stat grid / chart inside. */} {!data.hasActivity ? (
+ No activity in the last 30 days. Kick off a run on this workspace to start filling the dashboard.
diff --git a/src/renderer/src/assets/css/03-composer-welcome-activity.css b/src/renderer/src/assets/css/03-composer-welcome-activity.css index b2019ca1..7caf9b1a 100644 --- a/src/renderer/src/assets/css/03-composer-welcome-activity.css +++ b/src/renderer/src/assets/css/03-composer-welcome-activity.css @@ -1124,6 +1124,19 @@ .welcome-usage-tab.active::after { background: var(--accent); } +/* Welcome L9 polish — the underline now previews faintly on hover (was + * colour-only) and the tab is keyboard-focusable with a visible accent + * underline, so the tab strip reads as crisp under both pointer + keyboard. */ +.welcome-usage-tab:hover:not(.active)::after { + background: color-mix(in srgb, var(--accent) 34%, transparent); +} +.welcome-usage-tab:focus-visible { + outline: none; + color: var(--text-primary); +} +.welcome-usage-tab:focus-visible::after { + background: color-mix(in srgb, var(--accent) 58%, transparent); +} /* Range toggle (elsewhere in the app) keeps its historical pill style. */ .welcome-usage-range { @@ -2096,6 +2109,10 @@ font-weight: 600; color: color-mix(in srgb, var(--accent) 80%, var(--text-primary) 20%); } +.welcome-usage-empty--range .mascot-ghost { + color: color-mix(in srgb, var(--text-primary) 32%, transparent); + margin-bottom: 4px; +} /* 1.0.6-CRUX43 — the model-meter FILL also reads its provider brand colour via * `currentColor` (see .welcome-usage-model-meter-fill), so it's added to each From 29296821204bb803a3c3213e3947cf038894443a Mon Sep 17 00:00:00 2001 From: Chris Izatt Date: Thu, 4 Jun 2026 02:25:21 +0100 Subject: [PATCH 05/33] tweak: use Avenir Next for AGBench default font --- src/main/store/index.ts | 22 ++++++++++++++++++-- src/renderer/src/lib/typefaceOptions.test.ts | 12 +++++++++++ src/renderer/src/lib/typefaceOptions.ts | 11 ++++++++-- 3 files changed, 41 insertions(+), 4 deletions(-) diff --git a/src/main/store/index.ts b/src/main/store/index.ts index 1c06f7b5..ad78109c 100644 --- a/src/main/store/index.ts +++ b/src/main/store/index.ts @@ -97,6 +97,10 @@ const runEventHashCache = new Map() // so their global chats have a usable runtime out of the box. Unconditional: // unused default profiles for a force-disabled provider are harmless data. const providerIds: ProviderId[] = ['gemini', 'codex', 'claude', 'kimi', 'grok', 'cursor'] +const LEGACY_AGBENCH_FONT_STACK = + '"SF Pro", "SF Pro Text", "SF Pro Display", -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Roboto, Arial, sans-serif' +const AGBENCH_DEFAULT_FONT_STACK = + '"Avenir Next", Avenir, system-ui, -apple-system, BlinkMacSystemFont, "SF Pro Text", "Segoe UI", sans-serif' const defaultSettings: AppSettings = { activeProvider: 'gemini', @@ -120,8 +124,7 @@ const defaultSettings: AppSettings = { userBubbleColor: 'system', promptSurfaceStyle: 'liquid_glass', composerStyle: 'default', - transcriptFontFamily: - '"SF Pro", "SF Pro Text", "SF Pro Display", -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Roboto, Arial, sans-serif', + transcriptFontFamily: AGBENCH_DEFAULT_FONT_STACK, composerFontFamily: 'match-transcript', // 1.0.5-EW25 — Display currency for cost / token-spend chips. // USD by default; user can switch to GBP / EUR via Settings → @@ -216,6 +219,13 @@ function objectOrUndefined(value: T | null | undefined): T | u return value && typeof value === 'object' && !Array.isArray(value) ? value : undefined } +function normalizeSettingsFontFamily(value: unknown, fallback: string): string { + if (typeof value !== 'string') return fallback + const trimmed = value.trim() + if (!trimmed) return fallback + return trimmed === LEGACY_AGBENCH_FONT_STACK ? AGBENCH_DEFAULT_FONT_STACK : trimmed +} + function writeJson(filePath: string, data: T) { const tempPath = `${filePath}.${process.pid}.${Date.now()}.tmp` let fd: number | null = null @@ -384,6 +394,14 @@ export class AppStore { ? null : defaultSettings.defaultGeminiAuthProfileId, geminiAuthProfiles: Array.isArray(stored.geminiAuthProfiles) ? stored.geminiAuthProfiles : [], + transcriptFontFamily: normalizeSettingsFontFamily( + stored.transcriptFontFamily, + defaultSettings.transcriptFontFamily || AGBENCH_DEFAULT_FONT_STACK + ), + composerFontFamily: normalizeSettingsFontFamily( + stored.composerFontFamily, + defaultSettings.composerFontFamily || 'match-transcript' + ), // Phase M1 — coerce any non-enum value (missing, typo'd, legacy) // back to the safe default so the eventual API-vs-CLI dispatch // logic never sees an unexpected mode. diff --git a/src/renderer/src/lib/typefaceOptions.test.ts b/src/renderer/src/lib/typefaceOptions.test.ts index 2027ad64..6605780e 100644 --- a/src/renderer/src/lib/typefaceOptions.test.ts +++ b/src/renderer/src/lib/typefaceOptions.test.ts @@ -4,6 +4,7 @@ import { COMPOSER_FONT_OPTIONS, CUSTOM_FONT_SELECT_VALUE, FONT_STACKS, + LEGACY_AGBENCH_FONT_STACK, TRANSCRIPT_FONT_OPTIONS, getFontSelectValue, normalizeComposerFontFamily, @@ -25,6 +26,7 @@ describe('typeface option lists', () => { expect(TRANSCRIPT_FONT_OPTIONS).not.toContainEqual( expect.objectContaining({ value: COMPOSER_FONT_MATCH_TRANSCRIPT }) ) + expect(FONT_STACKS.agbench).toContain('"Avenir Next", Avenir') }) it('puts match-transcript first for composer options', () => { @@ -44,6 +46,10 @@ describe('normalizeFontFamily', () => { expect(normalizeFontFamily(undefined, FONT_STACKS.system)).toBe(FONT_STACKS.system) expect(normalizeFontFamily(' ', FONT_STACKS.compact)).toBe(FONT_STACKS.compact) }) + + it('normalizes the old SF Pro AGBench default to the current default stack', () => { + expect(normalizeFontFamily(LEGACY_AGBENCH_FONT_STACK)).toBe(FONT_STACKS.agbench) + }) }) describe('composer font resolution', () => { @@ -71,6 +77,12 @@ describe('getFontSelectValue', () => { ) }) + it('maps the old SF Pro AGBench default to the current default option', () => { + expect(getFontSelectValue(TRANSCRIPT_FONT_OPTIONS, LEGACY_AGBENCH_FONT_STACK)).toBe( + FONT_STACKS.agbench + ) + }) + it('maps unknown custom stacks to the custom select value', () => { expect(getFontSelectValue(TRANSCRIPT_FONT_OPTIONS, '"Custom", sans-serif')).toBe( CUSTOM_FONT_SELECT_VALUE diff --git a/src/renderer/src/lib/typefaceOptions.ts b/src/renderer/src/lib/typefaceOptions.ts index a7a17262..27d4f72d 100644 --- a/src/renderer/src/lib/typefaceOptions.ts +++ b/src/renderer/src/lib/typefaceOptions.ts @@ -1,6 +1,9 @@ +export const LEGACY_AGBENCH_FONT_STACK = + '"SF Pro", "SF Pro Text", "SF Pro Display", -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Roboto, Arial, sans-serif' + export const FONT_STACKS = { agbench: - '"SF Pro", "SF Pro Text", "SF Pro Display", -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Roboto, Arial, sans-serif', + '"Avenir Next", Avenir, system-ui, -apple-system, BlinkMacSystemFont, "SF Pro Text", "Segoe UI", sans-serif', compact: '-apple-system, BlinkMacSystemFont, "SF Pro Text", "Segoe UI", sans-serif', humanist: '"Avenir Next", "SF Pro Text", -apple-system, BlinkMacSystemFont, "Segoe UI", sans-serif', @@ -45,6 +48,7 @@ export function normalizeFontFamily( ): string { if (typeof value !== 'string') return fallback const trimmed = value.trim() + if (trimmed === LEGACY_AGBENCH_FONT_STACK) return FONT_STACKS.agbench return trimmed.length > 0 ? trimmed : fallback } @@ -63,7 +67,10 @@ export function resolveComposerFontFamily( } export function getFontSelectValue(options: TypefaceOption[], value: string): string { - return options.some((option) => option.value === value) ? value : CUSTOM_FONT_SELECT_VALUE + const normalizedValue = normalizeFontFamily(value, value) + return options.some((option) => option.value === normalizedValue) + ? normalizedValue + : CUSTOM_FONT_SELECT_VALUE } export function quoteInstalledFontFamily(fontFamily: string): string { From 5aab0d7f34f4a5ab3299ca1fee237cf22913a138 Mon Sep 17 00:00:00 2001 From: Chris Izatt Date: Thu, 4 Jun 2026 02:28:07 +0100 Subject: [PATCH 06/33] fix(provider-accents): ensemble hierarchy tiles use the canonical provider colour The four original providers' tile borders referenced `--provider-X` (undefined -> stale hardcoded fallbacks like #d68a4d) while Grok/Cursor in the same block used the canonical `--provider-X-color`. Unify all six to `--provider-X-color` (theme.css source of truth: gemini #2563EB / codex #6366F1 / claude #D97706 / kimi #84A33B) with corrected fallbacks, so the tile tints match every other provider-tinted surface. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../src/assets/css/03-composer-welcome-activity.css | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/renderer/src/assets/css/03-composer-welcome-activity.css b/src/renderer/src/assets/css/03-composer-welcome-activity.css index 7caf9b1a..99cd5a8d 100644 --- a/src/renderer/src/assets/css/03-composer-welcome-activity.css +++ b/src/renderer/src/assets/css/03-composer-welcome-activity.css @@ -803,16 +803,16 @@ min-width: 78px; } .ensemble-hierarchy-tile.provider-claude { - border-color: color-mix(in srgb, var(--provider-claude, #d68a4d) 48%, var(--panel-border)); + border-color: color-mix(in srgb, var(--provider-claude-color, #d97706) 48%, var(--panel-border)); } .ensemble-hierarchy-tile.provider-codex { - border-color: color-mix(in srgb, var(--provider-codex, #3b82f6) 48%, var(--panel-border)); + border-color: color-mix(in srgb, var(--provider-codex-color, #6366f1) 48%, var(--panel-border)); } .ensemble-hierarchy-tile.provider-gemini { - border-color: color-mix(in srgb, var(--provider-gemini, #4d8edb) 48%, var(--panel-border)); + border-color: color-mix(in srgb, var(--provider-gemini-color, #2563eb) 48%, var(--panel-border)); } .ensemble-hierarchy-tile.provider-kimi { - border-color: color-mix(in srgb, var(--provider-kimi, #6fb04a) 48%, var(--panel-border)); + border-color: color-mix(in srgb, var(--provider-kimi-color, #84a33b) 48%, var(--panel-border)); } .ensemble-hierarchy-tile.provider-grok { border-color: color-mix(in srgb, var(--provider-grok-color) 48%, var(--panel-border)); From 1b3207a4a9279c74f064e1d09a372216fe12ae70 Mon Sep 17 00:00:00 2001 From: Chris Izatt Date: Thu, 4 Jun 2026 02:53:39 +0100 Subject: [PATCH 07/33] fix(theme): Diff Studio + File Editor read correctly on light themes The popouts hardcoded a dark-mode palette: CodeMirror syntax + chrome colours, diff-list white striping, and missing light-theme --diff-* vars all rendered light-on-light (unreadable) under light/citrus/mist/sage/alabaster. - FileEditorPanel: CodeMirror highlight + chrome colours now read --cm-* vars. - theme.css: --cm-* defaults are the EXACT original dark palette (dark unchanged); light themes get a readable GitHub-light-ish editor palette + the missing --diff-add/del/hunk overrides. - diff list/lines: rgba(255,255,255,..) striping -> color-mix(var(--text-primary)..) so it adapts per theme. - popout BrowserWindow backgroundColor follows the theme (no dark flash on light). Co-Authored-By: Claude Opus 4.8 (1M context) --- src/main/index.ts | 27 +++- .../css/03-composer-welcome-activity.css | 29 +++-- .../src/components/FileEditorPanel.tsx | 37 +++--- src/renderer/src/styles/theme.css | 118 ++++++++++++++++++ 4 files changed, 180 insertions(+), 31 deletions(-) diff --git a/src/main/index.ts b/src/main/index.ts index ec239c2d..ac832165 100644 --- a/src/main/index.ts +++ b/src/main/index.ts @@ -13147,6 +13147,31 @@ function resolveNativeVibrancy( return useNativeGlass ? NATIVE_GLASS_VIBRANCY : undefined } +/* + * Per-theme opaque backdrop for popout BrowserWindows. Before React + * mounts (and applies `data-theme`), the OS paints `backgroundColor`. + * A hardcoded `#1e1e1e` flashed a dark slab on the light themes + * (light/citrus/mist/sage/alabaster), which is jarring. We mirror + * each light theme's `--app-bg` so the pre-paint matches the rendered + * surface; every dark theme (and `system`/`dark`, which the renderer + * resolves to the dark `:root`) keeps the original `#1e1e1e`. + * Returns undefined when a glass window is used (caller passes the + * transparent backdrop in that case). + */ +const LIGHT_THEME_POPOUT_BACKDROPS: Record = { + light: '#f4f6f8', + citrus: '#f4f6f8', + mist: '#eef4f6', + sage: '#f0f5f0', + alabaster: '#f4f3ef' +} + +function resolvePopoutBackgroundColor(useGlassWindow: boolean): string { + if (useGlassWindow) return '#00000000' + const theme = AppStore.getSettings().themeAppearance + return LIGHT_THEME_POPOUT_BACKDROPS[theme] ?? '#1e1e1e' +} + function resolveWorkspaceChild(workspace: string, filePath: string): string { const workspaceRoot = resolve(workspace) const targetPath = isAbsolute(filePath) ? resolve(filePath) : resolve(workspaceRoot, filePath) @@ -13671,7 +13696,7 @@ async function openWorkspacePopout(input: unknown): Promise<{ ok: true }> { : undefined, visualEffectState: 'active', transparent: false, - backgroundColor: useGlassWindow ? '#00000000' : '#1e1e1e', + backgroundColor: resolvePopoutBackgroundColor(useGlassWindow), ...(process.platform === 'linux' ? { icon } : {}), webPreferences: { preload: join(__dirname, '../preload/index.js'), diff --git a/src/renderer/src/assets/css/03-composer-welcome-activity.css b/src/renderer/src/assets/css/03-composer-welcome-activity.css index 99cd5a8d..d5e24c29 100644 --- a/src/renderer/src/assets/css/03-composer-welcome-activity.css +++ b/src/renderer/src/assets/css/03-composer-welcome-activity.css @@ -5161,15 +5161,18 @@ } .diff-file-row:nth-child(odd) { - background: rgba(255, 255, 255, 0.02); + background: color-mix(in srgb, var(--text-primary) 2%, transparent); } .diff-file-row + .diff-file-row { - border-top: 1px solid rgba(255, 255, 255, 0.06); + border-top: 1px solid color-mix(in srgb, var(--text-primary) 6%, transparent); } -.diff-file-row:hover { background: rgba(255,255,255,0.04); } -.diff-file-row.selected { background: rgba(255,255,255,0.06); border-left-color: var(--accent); } +.diff-file-row:hover { background: color-mix(in srgb, var(--text-primary) 5%, transparent); } +.diff-file-row.selected { + background: color-mix(in srgb, var(--text-primary) 8%, transparent); + border-left-color: var(--accent); +} button.diff-file-row { appearance: none; @@ -5193,7 +5196,7 @@ button.diff-file-row { .diff-file-name { flex: 1; white-space: nowrap; overflow: hidden; text-overflow: ellipsis; } .diff-file-type-icon { - border: 1px solid rgba(255,255,255,0.12); + border: 1px solid color-mix(in srgb, var(--text-primary) 12%, transparent); flex-shrink: 0; } @@ -5225,7 +5228,7 @@ button.diff-file-row { .diff-file-badge.modified { background: rgba(90,140,255,0.12); color: var(--accent); } .diff-file-badge.deleted { background: rgba(229,77,77,0.12); color: var(--danger); } .diff-file-badge.untracked { background: rgba(245,166,35,0.12); color: var(--warning); } -.diff-file-badge.noise { background: rgba(255,255,255,0.06); color: var(--text-muted); } +.diff-file-badge.noise { background: color-mix(in srgb, var(--text-primary) 6%, transparent); color: var(--text-muted); } .diff-detail { flex: 1; @@ -5242,7 +5245,7 @@ button.diff-file-row { } .diff-lines-section { - border-top: 1px solid rgba(255, 255, 255, 0.06); + border-top: 1px solid color-mix(in srgb, var(--text-primary) 6%, transparent); background: color-mix(in srgb, var(--app-bg-sunken) 70%, transparent); } @@ -5254,8 +5257,8 @@ button.diff-file-row { padding: 3px var(--space-md); font-size: var(--font-size-xs); color: var(--text-muted); - background: rgba(255, 255, 255, 0.04); - border-bottom: 1px solid rgba(255, 255, 255, 0.06); + background: color-mix(in srgb, var(--text-primary) 4%, transparent); + border-bottom: 1px solid color-mix(in srgb, var(--text-primary) 6%, transparent); } .diff-detail-header { @@ -5286,8 +5289,8 @@ button.diff-file-row { color: var(--text-tertiary); text-align: right; user-select: none; - border-right: 1px solid rgba(255, 255, 255, 0.05); - background: rgba(0, 0, 0, 0.10); + border-right: 1px solid color-mix(in srgb, var(--text-primary) 5%, transparent); + background: color-mix(in srgb, var(--text-primary) 6%, transparent); } .diff-line-code { @@ -5296,7 +5299,7 @@ button.diff-file-row { .diff-line.meta { color: var(--text-muted); - background: rgba(255, 255, 255, 0.025); + background: color-mix(in srgb, var(--text-primary) 2.5%, transparent); } .diff-line.add .diff-line-code, @@ -5312,7 +5315,7 @@ button.diff-file-row { } .diff-line + .diff-line { - border-top: 1px solid rgba(255, 255, 255, 0.05); + border-top: 1px solid color-mix(in srgb, var(--text-primary) 5%, transparent); } .diff-line.add { background: var(--diff-add-bg); color: var(--diff-add-text); } diff --git a/src/renderer/src/components/FileEditorPanel.tsx b/src/renderer/src/components/FileEditorPanel.tsx index 747f7801..39f352bf 100644 --- a/src/renderer/src/components/FileEditorPanel.tsx +++ b/src/renderer/src/components/FileEditorPanel.tsx @@ -58,22 +58,22 @@ const codeEditorTheme = EditorView.theme( whiteSpace: 'pre' }, '.cm-gutters': { - background: 'rgba(0,0,0,0.18)', + background: 'var(--cm-gutter-bg)', color: 'var(--text-muted)', - borderRight: '1px solid rgba(255,255,255,0.06)' + borderRight: '1px solid var(--cm-gutter-border)' }, '.cm-activeLine': { - background: 'rgba(255,255,255,0.045)' + background: 'var(--cm-active-line)' }, '.cm-activeLineGutter': { - background: 'rgba(255,255,255,0.045)', + background: 'var(--cm-active-line)', color: 'var(--text-secondary)' }, '.cm-selectionBackground, &.cm-focused .cm-selectionBackground': { background: 'color-mix(in srgb, var(--accent) 34%, transparent)' }, '.cm-matchingBracket, .cm-nonmatchingBracket': { - background: 'rgba(255,255,255,0.12)', + background: 'var(--cm-bracket-match)', outline: '1px solid var(--accent)' }, '.cm-line ::selection, .cm-content ::selection': { @@ -84,23 +84,26 @@ const codeEditorTheme = EditorView.theme( ) const codeHighlightStyle = HighlightStyle.define([ - { tag: tags.keyword, color: '#ff8fb3', fontWeight: '600' }, - { tag: [tags.name, tags.deleted, tags.character, tags.macroName], color: '#f6f0ff' }, - { tag: [tags.propertyName, tags.variableName, tags.labelName], color: '#b9d7ff' }, - { tag: [tags.function(tags.variableName), tags.function(tags.propertyName)], color: '#8fd6ff' }, - { tag: [tags.className, tags.definition(tags.typeName), tags.typeName], color: '#ffd27d' }, - { tag: [tags.number, tags.bool, tags.null, tags.atom], color: '#c7a6ff' }, - { tag: [tags.string, tags.special(tags.string)], color: '#9be69f' }, - { tag: [tags.regexp, tags.escape], color: '#86e1d1' }, + { tag: tags.keyword, color: 'var(--cm-keyword)', fontWeight: '600' }, + { tag: [tags.name, tags.deleted, tags.character, tags.macroName], color: 'var(--cm-name)' }, + { tag: [tags.propertyName, tags.variableName, tags.labelName], color: 'var(--cm-property)' }, + { + tag: [tags.function(tags.variableName), tags.function(tags.propertyName)], + color: 'var(--cm-function)' + }, + { tag: [tags.className, tags.definition(tags.typeName), tags.typeName], color: 'var(--cm-type)' }, + { tag: [tags.number, tags.bool, tags.null, tags.atom], color: 'var(--cm-number)' }, + { tag: [tags.string, tags.special(tags.string)], color: 'var(--cm-string)' }, + { tag: [tags.regexp, tags.escape], color: 'var(--cm-regexp)' }, { tag: [tags.comment, tags.lineComment, tags.blockComment], - color: 'rgba(255,255,255,0.42)', + color: 'var(--cm-comment)', fontStyle: 'italic' }, - { tag: tags.meta, color: '#9aa7ff' }, - { tag: tags.heading, color: '#f4f7ff', fontWeight: '700' }, + { tag: tags.meta, color: 'var(--cm-meta)' }, + { tag: tags.heading, color: 'var(--cm-heading)', fontWeight: '700' }, { tag: tags.link, color: 'var(--accent)', textDecoration: 'underline' }, - { tag: tags.invalid, color: '#ff8080' } + { tag: tags.invalid, color: 'var(--cm-invalid)' } ]) const shellLanguage = StreamLanguage.define(shell) diff --git a/src/renderer/src/styles/theme.css b/src/renderer/src/styles/theme.css index 6d9e135d..b2177e7c 100644 --- a/src/renderer/src/styles/theme.css +++ b/src/renderer/src/styles/theme.css @@ -139,6 +139,32 @@ --diff-del-text: #f08080; --diff-hunk: #5a8cff; + /* CodeMirror syntax + editor chrome (File Editor / popout). + * Defaults here are the original dark-mode palette so dark themes + * render IDENTICALLY to before var-ization. Light themes + * (light/citrus/mist/sage/alabaster) override these with a + * readable dark-on-light editor palette below. FileEditorPanel.tsx + * consumes these via `color: var(--cm-…)` inside its CodeMirror + * HighlightStyle + EditorView.theme — CodeMirror emits those + * declarations into a stylesheet, so the vars resolve at runtime + * and follow `data-theme` like any other token. */ + --cm-keyword: #ff8fb3; + --cm-name: #f6f0ff; + --cm-property: #b9d7ff; + --cm-function: #8fd6ff; + --cm-type: #ffd27d; + --cm-number: #c7a6ff; + --cm-string: #9be69f; + --cm-regexp: #86e1d1; + --cm-comment: rgba(255, 255, 255, 0.42); + --cm-meta: #9aa7ff; + --cm-heading: #f4f7ff; + --cm-invalid: #ff8080; + --cm-gutter-bg: rgba(0, 0, 0, 0.18); + --cm-gutter-border: rgba(255, 255, 255, 0.06); + --cm-active-line: rgba(255, 255, 255, 0.045); + --cm-bracket-match: rgba(255, 255, 255, 0.12); + /* Tool Activity */ --tool-running: #5a8cff; --tool-success: #4cc38a; @@ -228,6 +254,30 @@ --theme-wash-soft: 8%; --theme-wash-medium: 13%; --theme-wash-strong: 18%; + /* Light-theme diff: dark text on faint tints (was bright-on-white). */ + --diff-add-bg: rgba(12, 132, 82, 0.16); + --diff-add-text: #0a7f50; + --diff-del-bg: rgba(198, 52, 66, 0.16); + --diff-del-text: #bc2e3d; + --diff-hunk: #0550ae; + /* Light-editor syntax palette (muted VS Code / GitHub light). + * Dark-on-near-white with WCAG-ish contrast. */ + --cm-keyword: #cf222e; + --cm-name: #1f2328; + --cm-property: #0550ae; + --cm-function: #8250df; + --cm-type: #953800; + --cm-number: #0a7c42; + --cm-string: #0a3069; + --cm-regexp: #116329; + --cm-comment: #6e7781; + --cm-meta: #8250df; + --cm-heading: #0550ae; + --cm-invalid: #cf222e; + --cm-gutter-bg: rgba(18, 21, 27, 0.05); + --cm-gutter-border: rgba(18, 21, 27, 0.10); + --cm-active-line: rgba(18, 21, 27, 0.045); + --cm-bracket-match: rgba(18, 21, 27, 0.12); } [data-theme="midnight"] { @@ -372,6 +422,28 @@ --theme-wash-soft: 8%; --theme-wash-medium: 13%; --theme-wash-strong: 18%; + /* Light-theme diff + editor palette (mist — teal-leaning). */ + --diff-add-bg: rgba(12, 132, 82, 0.16); + --diff-add-text: #0a7f50; + --diff-del-bg: rgba(198, 52, 66, 0.16); + --diff-del-text: #bc2e3d; + --diff-hunk: #2e7180; + --cm-keyword: #b91c50; + --cm-name: #142127; + --cm-property: #0f6b78; + --cm-function: #7a52c8; + --cm-type: #8a4b14; + --cm-number: #0a7c42; + --cm-string: #0a4f5c; + --cm-regexp: #116329; + --cm-comment: #5a7077; + --cm-meta: #7a52c8; + --cm-heading: #2e7180; + --cm-invalid: #cf222e; + --cm-gutter-bg: rgba(18, 29, 35, 0.05); + --cm-gutter-border: rgba(18, 29, 35, 0.10); + --cm-active-line: rgba(18, 29, 35, 0.045); + --cm-bracket-match: rgba(18, 29, 35, 0.12); } [data-theme="sage"] { @@ -414,6 +486,28 @@ --theme-wash-soft: 8%; --theme-wash-medium: 13%; --theme-wash-strong: 18%; + /* Light-theme diff + editor palette (sage — green-leaning). */ + --diff-add-bg: rgba(12, 132, 82, 0.16); + --diff-add-text: #0a7f50; + --diff-del-bg: rgba(198, 52, 66, 0.16); + --diff-del-text: #bc2e3d; + --diff-hunk: #4b7a4f; + --cm-keyword: #b5263b; + --cm-name: #16201a; + --cm-property: #2c6a8c; + --cm-function: #7a52c8; + --cm-type: #8a4b14; + --cm-number: #2f6f1f; + --cm-string: #0a3069; + --cm-regexp: #116329; + --cm-comment: #5f6f5f; + --cm-meta: #7a52c8; + --cm-heading: #4b7a4f; + --cm-invalid: #cf222e; + --cm-gutter-bg: rgba(22, 32, 22, 0.05); + --cm-gutter-border: rgba(22, 32, 22, 0.10); + --cm-active-line: rgba(22, 32, 22, 0.045); + --cm-bracket-match: rgba(22, 32, 22, 0.12); } [data-theme="graphite"] { @@ -726,6 +820,30 @@ --theme-wash-soft: 8%; --theme-wash-medium: 13%; --theme-wash-strong: 18%; + /* Light-theme diff (matches the alabaster popout override in + * 09-ensemble-work-session.css so the in-app diff viewer reads + * the same) + light-editor syntax palette. */ + --diff-add-bg: rgba(12, 132, 82, 0.16); + --diff-add-text: #0a7f50; + --diff-del-bg: rgba(198, 52, 66, 0.16); + --diff-del-text: #bc2e3d; + --diff-hunk: #596174; + --cm-keyword: #cf222e; + --cm-name: #1f2328; + --cm-property: #0550ae; + --cm-function: #8250df; + --cm-type: #953800; + --cm-number: #0a7c42; + --cm-string: #0a3069; + --cm-regexp: #116329; + --cm-comment: #6e7781; + --cm-meta: #8250df; + --cm-heading: #0550ae; + --cm-invalid: #cf222e; + --cm-gutter-bg: rgba(18, 21, 27, 0.05); + --cm-gutter-border: rgba(18, 21, 27, 0.10); + --cm-active-line: rgba(18, 21, 27, 0.045); + --cm-bracket-match: rgba(18, 21, 27, 0.12); /* Cool halo wash — lavender/blue at very low alpha. The * polar inverse of obsidian's warm peach/copper halo. */ --alabaster-halo-cool: rgba(120, 140, 200, 0.10); From a9d5482bf9743a83d5997266cd1da1ece8b88581 Mon Sep 17 00:00:00 2001 From: Chris Izatt Date: Thu, 4 Jun 2026 11:09:56 +0100 Subject: [PATCH 08/33] tweak: replace status rails with rim highlights --- .../assets/css/02-transcript-messages-fx.css | 51 +++++++++++++------ .../assets/css/06-component-panels-modals.css | 22 ++++---- 2 files changed, 47 insertions(+), 26 deletions(-) diff --git a/src/renderer/src/assets/css/02-transcript-messages-fx.css b/src/renderer/src/assets/css/02-transcript-messages-fx.css index 7afc4304..7388d0f1 100644 --- a/src/renderer/src/assets/css/02-transcript-messages-fx.css +++ b/src/renderer/src/assets/css/02-transcript-messages-fx.css @@ -3185,12 +3185,14 @@ /* Ensemble round-status / handback chrome (round opened/closed, @-mention * routing, "control returned to you"). Orchestration STATE CHANGES, not - * participant prose — a left accent bar + faint tint marks them as system + * participant prose — a rim highlight + faint tint marks them as system * authority instead of the muted system-note styling. Box-shadow + background * only (no padding/border added) so the row's measured height is unchanged. */ .message-bubble.system.system-round-status { background: color-mix(in srgb, var(--accent) 7%, transparent); - box-shadow: inset 3px 0 0 color-mix(in srgb, var(--accent) 70%, transparent); + box-shadow: + inset 0 0 0 1px color-mix(in srgb, var(--accent) 42%, transparent), + 0 0 0 1px color-mix(in srgb, var(--accent) 9%, transparent); border-radius: 8px; } @@ -3204,12 +3206,14 @@ * strip mirrors the chip layouts in `.ensemble-above-row-chips` * so the visual vocabulary stays consistent across the app. */ .participant-health-card { + --participant-health-rim: color-mix(in srgb, var(--panel-border) 60%, transparent); width: min(100%, 900px); margin: 6px 0; padding: 10px 12px; border-radius: 12px; - border: 1px solid color-mix(in srgb, var(--panel-border) 60%, transparent); + border: 1px solid var(--participant-health-rim); background: color-mix(in srgb, var(--surface-elevated) 50%, transparent); + box-shadow: inset 0 0 0 1px color-mix(in srgb, var(--text-primary) 3%, transparent); display: flex; flex-direction: column; gap: 8px; @@ -3217,15 +3221,21 @@ color: var(--text-secondary); } .participant-health-card.all-ok { - /* Subtle green-tinted left rail when every participant is + /* Subtle green-tinted rim when every participant is * reachable. Helps the user skim a long transcript and see * "yep, the panel was healthy here" without parsing chips. */ - border-left: 3px solid color-mix(in srgb, #2ea043 55%, transparent); + --participant-health-rim: color-mix(in srgb, #2ea043 42%, var(--panel-border)); + box-shadow: + inset 0 0 0 1px color-mix(in srgb, #2ea043 9%, transparent), + 0 0 0 1px color-mix(in srgb, #2ea043 8%, transparent); } .participant-health-card.has-failures { - /* Amber rail when any participant came back unreachable. The + /* Amber rim when any participant came back unreachable. The * chip strip below shows WHICH one(s). */ - border-left: 3px solid color-mix(in srgb, #f0a93f 65%, transparent); + --participant-health-rim: color-mix(in srgb, #f0a93f 50%, var(--panel-border)); + box-shadow: + inset 0 0 0 1px color-mix(in srgb, #f0a93f 10%, transparent), + 0 0 0 1px color-mix(in srgb, #f0a93f 8%, transparent); } .participant-health-card-header { display: flex; @@ -3318,14 +3328,13 @@ .subthread-return-card { width: min(100%, 860px); - border: 1px solid rgb(var(--agent-accent-rgb) / 0.2); - border-left: 3px solid rgb(var(--agent-accent-rgb) / 0.62); + border: 1px solid rgb(var(--agent-accent-rgb) / 0.28); border-radius: 8px; background: linear-gradient(135deg, rgb(var(--agent-accent-rgb) / 0.12), transparent 58%), rgb(255 255 255 / 0.035); box-shadow: - inset 0 0 0 1px rgb(255 255 255 / 0.024), + inset 0 0 0 1px rgb(var(--agent-accent-rgb) / 0.08), 0 14px 34px rgb(0 0 0 / 0.16); color: var(--text-primary); padding: 0.78rem 0.9rem 0.9rem; @@ -3800,25 +3809,35 @@ /* QMOD (1.0.3) — `ask_user_question` MCP tool surface. Extends the * existing .plan-choice-card visual; only adds the bits that diverge: * - Context sub-paragraph beneath the question. - * - Provider-tinted left border so the user sees who's asking. + * - Provider-tinted rim so the user sees who's asking. * - Free-text textarea path (Cmd/Ctrl+Enter submits, Esc dismisses). * - Dismiss × in the top-right that cancels the parked tool call. */ .agent-question-card { position: relative; - border-left-width: 3px; + --agent-question-rim: rgb(var(--agent-accent-rgb) / 0.24); + border-color: var(--agent-question-rim); + box-shadow: + inset 0 0 0 1px rgb(var(--agent-accent-rgb) / 0.06), + 0 0 18px rgb(var(--agent-accent-rgb) / 0.07); } .agent-question-card.provider-codex { - border-left-color: var(--provider-codex-color); + --agent-question-rim: color-mix(in srgb, var(--provider-codex-color) 58%, transparent); } .agent-question-card.provider-claude { - border-left-color: var(--provider-claude-color); + --agent-question-rim: color-mix(in srgb, var(--provider-claude-color) 58%, transparent); } .agent-question-card.provider-gemini { - border-left-color: var(--provider-gemini-color); + --agent-question-rim: color-mix(in srgb, var(--provider-gemini-color) 58%, transparent); } .agent-question-card.provider-kimi { - border-left-color: var(--provider-kimi-color); + --agent-question-rim: color-mix(in srgb, var(--provider-kimi-color) 58%, transparent); +} +.agent-question-card.provider-grok { + --agent-question-rim: color-mix(in srgb, var(--provider-grok-color) 58%, transparent); +} +.agent-question-card.provider-cursor { + --agent-question-rim: color-mix(in srgb, var(--provider-cursor-color) 58%, transparent); } .agent-question-card-question { font-weight: 600; diff --git a/src/renderer/src/assets/css/06-component-panels-modals.css b/src/renderer/src/assets/css/06-component-panels-modals.css index 8911f253..d43ca323 100644 --- a/src/renderer/src/assets/css/06-component-panels-modals.css +++ b/src/renderer/src/assets/css/06-component-panels-modals.css @@ -55,8 +55,9 @@ margin: 3px 0 5px; padding: 6px 10px; color: color-mix(in srgb, var(--text-primary) 88%, transparent); - border-left: 1px solid color-mix(in srgb, var(--accent-primary) 35%, transparent); - background: linear-gradient(90deg, color-mix(in srgb, var(--accent-primary) 8%, transparent), transparent 70%); + border: 1px solid color-mix(in srgb, var(--accent-primary) 24%, transparent); + background: color-mix(in srgb, var(--accent-primary) 5%, transparent); + box-shadow: inset 0 0 0 1px color-mix(in srgb, var(--accent-primary) 5%, transparent); border-radius: 10px; } @@ -212,29 +213,31 @@ * `{ input, result, timeline }` view — kept distinct from the line so * the transcript stays scannable even with multiple traces expanded. * - * Status comes through the line's left-edge accent (data-status) and + * Status comes through the line's rim highlight (data-status) and * the status pill colour. Provider attribution comes through the * provider-tinted name chip (.compact-tool-trace-provider). */ .compact-tool-trace { + --compact-tool-rim: color-mix(in srgb, var(--border-subtle) 60%, transparent); width: min(760px, 100%); margin: 2px 0; - border-left: 2px solid color-mix(in srgb, var(--border-subtle) 60%, transparent); - border-radius: 0 4px 4px 0; + border: 1px solid var(--compact-tool-rim); + border-radius: 7px; + box-shadow: inset 0 0 0 1px color-mix(in srgb, var(--text-primary) 3%, transparent); } .compact-tool-trace[data-status="success"] { - border-left-color: color-mix(in srgb, var(--tool-success) 60%, transparent); + --compact-tool-rim: color-mix(in srgb, var(--tool-success) 42%, transparent); } .compact-tool-trace[data-status="error"] { - border-left-color: color-mix(in srgb, var(--tool-error) 70%, transparent); + --compact-tool-rim: color-mix(in srgb, var(--tool-error) 48%, transparent); } .compact-tool-trace[data-status="warning"] { - border-left-color: color-mix(in srgb, var(--tool-warning) 65%, transparent); + --compact-tool-rim: color-mix(in srgb, var(--tool-warning) 46%, transparent); } .compact-tool-trace[data-status="running"], .compact-tool-trace[data-status="pending"] { - border-left-color: color-mix(in srgb, var(--accent) 60%, transparent); + --compact-tool-rim: color-mix(in srgb, var(--accent) 42%, transparent); } .compact-tool-trace-line { @@ -760,4 +763,3 @@ [data-appearance="native_glass"][data-reduce-transparency="false"] .app-transcript { background: color-mix(in srgb, var(--content-bg) 60%, transparent); } - From f7c9e21f4707b2d5f41909bc9bdde3c018aaef30 Mon Sep 17 00:00:00 2001 From: Chris Izatt Date: Thu, 4 Jun 2026 11:18:28 +0100 Subject: [PATCH 09/33] tweak: update general setting defaults --- src/main/store/index.ts | 19 +++++++++++++------ src/main/store/types.ts | 14 ++++++-------- src/renderer/src/components/SettingsPanel.tsx | 2 +- 3 files changed, 20 insertions(+), 15 deletions(-) diff --git a/src/main/store/index.ts b/src/main/store/index.ts index ad78109c..66db3704 100644 --- a/src/main/store/index.ts +++ b/src/main/store/index.ts @@ -135,16 +135,20 @@ const defaultSettings: AppSettings = { // General lets the user dial 0–25%. Applied in `formatCost.ts` // before FX conversion so the bias is currency-agnostic. currencyOverestimatePercent: 0, + dashboardStatPrefs: { + dashboardSize: 'small' + }, welcomeHeatmapPrefs: { + layout: 'single', workspaceActivityEnabled: true, agbenchActivityEnabled: true, externalActivityEnabled: true }, - // 1.0.5-EW26 — Kimi compatibility filter defaults. Off by - // default; the user opts in from Settings → General when they - // hit a Moonshot content_filter rejection on an incidental - // topic. Custom keywords stay empty until the user adds any. - kimiSanitiserEnabled: false, + // 1.0.5-EW26 — Kimi compatibility filter defaults. On by + // default so Moonshot content_filter retries get the compatibility + // pass automatically. Custom keywords stay empty until the user + // adds any. + kimiSanitiserEnabled: true, kimiSanitiserCustomKeywords: '', // 1.0.7-M10 — second-pass classifier stays opt-in; when unset // or false, the retry envelope remains keyword-only. @@ -415,7 +419,10 @@ export class AppStore { ...defaultSettings.agenticServices, ...(stored.agenticServices || {}) }, - dashboardStatPrefs: storedDashboardStatPrefs ? { ...storedDashboardStatPrefs } : undefined, + dashboardStatPrefs: { + ...(defaultSettings.dashboardStatPrefs || {}), + ...(storedDashboardStatPrefs || {}) + }, welcomeHeatmapPrefs: { ...defaultSettings.welcomeHeatmapPrefs, ...(storedWelcomeHeatmapPrefs || {}) diff --git a/src/main/store/types.ts b/src/main/store/types.ts index b443bb35..d61d276a 100644 --- a/src/main/store/types.ts +++ b/src/main/store/types.ts @@ -1386,9 +1386,8 @@ export interface AppSettings { */ dashboardEnabled?: boolean /** - * Welcome dashboard display size. Large preserves the existing full-size - * layout; small renders the same dashboard scaled to 30% and pinned to the - * top-right of the welcome surface. + * Welcome dashboard display size. Small is the default compact welcome + * treatment; large preserves the full-size layout. */ dashboardSize?: 'large' | 'small' } @@ -1402,9 +1401,9 @@ export interface AppSettings { externalActivityEnabled?: boolean /** * 1.0.72 — Layout for the welcome standalone heatmaps: - * - 'stacked' (default): every enabled heatmap stacked vertically + * - 'stacked': every enabled heatmap stacked vertically * (the long-standing layout). - * - 'single': one heatmap at a time, auto-cycling every 90s through + * - 'single' (default): one heatmap at a time, auto-cycling every 90s through * the enabled heatmaps (mirrors the dashboard tab auto-cycle). */ layout?: 'single' | 'stacked' @@ -1415,9 +1414,8 @@ export interface AppSettings { * containing a configured trigger keyword (default list + * `kimiSanitiserCustomKeywords`) is replaced with a redacted * placeholder before the Kimi process spawns. Other - * participants always see the unfiltered prompt. Default - * `false` — opt-in for users who hit Moonshot content_filter - * rejections on incidental world-news / geopolitics digressions. */ + * participants always see the unfiltered prompt. Default `true` + * so Moonshot compatibility retries are available out of the box. */ kimiSanitiserEnabled: boolean /** 1.0.5-EW26 — Newline-separated extra trigger keywords the * user wants the Kimi compatibility filter to catch on top of diff --git a/src/renderer/src/components/SettingsPanel.tsx b/src/renderer/src/components/SettingsPanel.tsx index e5fa241a..438bd866 100644 --- a/src/renderer/src/components/SettingsPanel.tsx +++ b/src/renderer/src/components/SettingsPanel.tsx @@ -3011,7 +3011,7 @@ export function SettingsPanel({ {/* 1.0.5-EW26 — Kimi (Moonshot) compatibility filter. - Off by default. When enabled, ensemble-mode Kimi + On by default. Ensemble-mode Kimi participants get their prompt context scanned by `src/main/lib/kimiSanitiser.ts` before spawn: sentences containing curated trigger keywords From ad0b27f1a09e64ccab8a8f35ccf637265935fcc9 Mon Sep 17 00:00:00 2001 From: Chris Izatt Date: Thu, 4 Jun 2026 11:19:59 +0100 Subject: [PATCH 10/33] feat(ensemble): per-round cost/latency readout + surface escalation signals MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Orders 1 + 3 from the dogfood triage. On the run-complete card: a Cost row summing real cost_usd (per-token seats) plus a badged '~est. API-equiv' token->USD projection for subscription/credit seats (Codex/Grok/Cursor) via the provider rate table — row omitted when nothing real or estimable (no misleading $0.00); an explicit Latency (round wall-clock) row; and the previously dark-shipped ensemble escalation signals (incl. disagreement-unresolved) now render as an advisory chip with their recommendedAction copy. Logic in pure libs (runCompleteSummary.ts + new providerRateEstimate.ts), 28 new/extended tests. Makes the cost/value tradeoff visible — never shrinks the panel. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/renderer/src/App.tsx | 82 ++++++- .../assets/css/09-ensemble-work-session.css | 80 +++++++ .../src/lib/providerRateEstimate.test.ts | 112 +++++++++ src/renderer/src/lib/providerRateEstimate.ts | 163 +++++++++++++ .../src/lib/runCompleteSummary.test.ts | 226 +++++++++++++++++- src/renderer/src/lib/runCompleteSummary.ts | 197 ++++++++++++++- 6 files changed, 850 insertions(+), 10 deletions(-) create mode 100644 src/renderer/src/lib/providerRateEstimate.test.ts create mode 100644 src/renderer/src/lib/providerRateEstimate.ts diff --git a/src/renderer/src/App.tsx b/src/renderer/src/App.tsx index b7dc5916..23dc0c39 100644 --- a/src/renderer/src/App.tsx +++ b/src/renderer/src/App.tsx @@ -292,9 +292,11 @@ import { } from './lib/usageStats' import { buildEnsembleRoundSummaryRows, + buildEscalationChips, buildRunCompleteSummaryRows, formatWorkDuration } from './lib/runCompleteSummary' +import { fetchProviderRates, type RendererProviderRates } from './lib/providerRateEstimate' import { getMemoryPreviewText, mergeCommandPaletteItems, @@ -2709,6 +2711,20 @@ type TranscriptPanelProps = { * is disengaged. A stable ref, so it never perturbs the memo. */ autoFollowRef?: React.MutableRefObject + /** + * 1.0.7 — display currency + conservative-overestimate bias (Settings → + * General), threaded in so the ensemble run-complete card's Cost row routes + * through `formatCost`. Defaults to USD / 0 when omitted. + */ + currency?: DisplayCurrency + currencyOverestimatePercent?: number + /** + * 1.0.7 — per-provider rate table (USD per 1M tokens) from the + * `providerRates:get` IPC. Used ONLY to project a clearly-badged + * API-equivalent cost for subscription/credit seats that emit no + * `cost_usd` (Codex / Grok / Cursor). Absent → no estimate. + */ + providerRates?: RendererProviderRates } /** @@ -4150,7 +4166,10 @@ export const TranscriptPanel = memo( copiedId, copy, virtualize, - autoFollowRef + autoFollowRef, + currency, + currencyOverestimatePercent, + providerRates }: TranscriptPanelProps) { const visibleMessages = useMemo(() => { const source = isWelcomeChat ? EMPTY_CHAT_MESSAGES : messages @@ -4174,10 +4193,25 @@ export const TranscriptPanel = memo( // last speaker's), round-envelope duration, and summed tokens. // Solo chats: the original single-run summary. if (currentChat?.chatKind === 'ensemble' && currentChat.ensemble?.activeRound) { - return buildEnsembleRoundSummaryRows(currentChat, runCompleteNotice?.exitCode !== 0) + return buildEnsembleRoundSummaryRows(currentChat, runCompleteNotice?.exitCode !== 0, { + currency, + overestimatePercent: currencyOverestimatePercent, + providerRates + }) } return buildRunCompleteSummaryRows(currentRun) - }, [currentChat, currentRun, runCompleteNotice?.exitCode]) + }, [ + currentChat, + currentRun, + runCompleteNotice?.exitCode, + currency, + currencyOverestimatePercent, + providerRates + ]) + // 1.0.7 (M5 surfacing) — advisory chips for the dark-shipped escalation + // signals on the current round. Read-only: the orchestrator persists + // these; we just surface label + recommended action. + const escalationChips = useMemo(() => buildEscalationChips(currentChat), [currentChat]) const runBoundaryByMessageId = useMemo(() => { const runs = currentChat?.runs || [] const runById = new Map() @@ -4723,6 +4757,22 @@ export const TranscriptPanel = memo(
)} + {escalationChips.length > 0 && ( +
+ {escalationChips.map((chip) => ( +
+ {chip.label} + {chip.action && ( + {chip.action} + )} +
+ ))} +
+ )}
File changes @@ -4887,6 +4937,10 @@ function App(): React.JSX.Element { // transient "Copied" state consistent across the transcript. const { copiedId, copy } = useCopyFeedback() const [settings, setSettings] = useState(null) + // 1.0.7 — per-provider rate table (USD per 1M tokens) for the ensemble + // run-complete card's projected cost estimate. Hydrated once at mount from + // the `providerRates:get` IPC; empty until then (no estimate shown). + const [providerRates, setProviderRates] = useState({}) const [chatContextTurns, setChatContextTurns] = useState(DEFAULT_CONTEXT_TURNS) const [workspaces, setWorkspaces] = useState([]) const [workspacesHydrated, setWorkspacesHydrated] = useState(false) @@ -5202,6 +5256,25 @@ function App(): React.JSX.Element { cancelled = true } }, []) + /** + * 1.0.7 — one-shot fetch of the per-provider rate table over the existing + * `providerRates:get` IPC. Powers the ensemble run-complete card's projected + * cost ESTIMATE for subscription/credit seats (Codex / Grok / Cursor) that + * report no `cost_usd`. The rates are USD per 1M tokens and barely change + * during a session, so one read at mount is enough; on failure we leave the + * map empty and simply render no estimate. + */ + useEffect(() => { + let cancelled = false + void (async () => { + const rates = await fetchProviderRates() + if (cancelled) return + setProviderRates(rates) + })() + return () => { + cancelled = true + } + }, []) /** * Inline bug-report sheet. The tester opens this from the * "!" button next to the onboarding `?` button, describes whatever @@ -16499,6 +16572,9 @@ function App(): React.JSX.Element { copiedId={copiedId} copy={copy} autoFollowRef={autoFollowRef} + currency={displayCurrency} + currencyOverestimatePercent={overestimatePercent} + providerRates={providerRates} /> )} diff --git a/src/renderer/src/assets/css/09-ensemble-work-session.css b/src/renderer/src/assets/css/09-ensemble-work-session.css index 457adf68..bded722f 100644 --- a/src/renderer/src/assets/css/09-ensemble-work-session.css +++ b/src/renderer/src/assets/css/09-ensemble-work-session.css @@ -2056,3 +2056,83 @@ box-shadow: none; } +/* ============================================================ + * ENSEMBLE ESCALATION ADVISORY CHIPS — 1.0.7 (M5 surfacing) + * -------------------------------------------------------------- + * The orchestrator's complexity-escalation heuristic computes a + * signal at each round's close (stuck / looping / + * disagreement-unresolved / tool-error-cluster) and persists it on + * `chat.ensemble.escalationSignals`. These were dark-shipped — + * computed + broadcast but rendered nowhere. This surfaces them as + * a low-key advisory row tucked inside the run-complete card, + * right under "Run details". + * + * Advisory ONLY: nothing here acts on a signal. The copy makes a + * tradeoff VISIBLE so the user decides; it never frames a + * multi-seat panel as waste. Tones are deliberately gentle — + * `attention` for failure-shaped signals, `info` for advisories — + * so the chip informs without alarming. + * ============================================================ */ + +.ensemble-escalation-advisory { + display: flex; + flex-direction: column; + gap: var(--space-xs); + margin-top: var(--space-sm); +} + +.ensemble-escalation-chip { + display: flex; + flex-wrap: wrap; + align-items: baseline; + gap: 4px 8px; + padding: 8px 12px; + border-radius: var(--radius-md); + border: 1px solid color-mix(in srgb, var(--accent) 22%, transparent); + background: color-mix(in srgb, var(--accent) 8%, transparent); + transition: + border-color 160ms ease, + background-color 160ms ease; +} + +.ensemble-escalation-chip.tone-attention { + border-color: color-mix(in srgb, var(--warning, #d98324) 34%, transparent); + background: color-mix(in srgb, var(--warning, #d98324) 10%, transparent); +} + +.ensemble-escalation-chip-label { + position: relative; + padding-left: 14px; + color: var(--text-primary); + font-size: var(--font-size-xs); + font-weight: 650; +} + +/* Leading dot keeps the label scannable without an icon dependency. */ +.ensemble-escalation-chip-label::before { + content: ""; + position: absolute; + left: 0; + top: 0.45em; + width: 7px; + height: 7px; + border-radius: 50%; + background: color-mix(in srgb, var(--accent) 70%, var(--text-primary)); +} + +.ensemble-escalation-chip.tone-attention .ensemble-escalation-chip-label::before { + background: color-mix(in srgb, var(--warning, #d98324) 78%, var(--text-primary)); +} + +.ensemble-escalation-chip-action { + color: var(--text-secondary); + font-size: var(--font-size-xs); + font-weight: 500; +} + +/* Respect the reduce-motion preference — drop the hover/state + * transition so the chip never animates when the user opted out. */ +[data-reduce-motion="true"] .ensemble-escalation-chip { + transition: none; +} + diff --git a/src/renderer/src/lib/providerRateEstimate.test.ts b/src/renderer/src/lib/providerRateEstimate.test.ts new file mode 100644 index 00000000..5e785e9e --- /dev/null +++ b/src/renderer/src/lib/providerRateEstimate.test.ts @@ -0,0 +1,112 @@ +import { describe, expect, it } from 'vitest' +import { + estimateRunCostUsd, + normalizeProviderRates, + resolveModelRate, + type RendererProviderRates +} from './providerRateEstimate' + +const RATES: RendererProviderRates = { + codex: [ + { modelId: 'gpt-5.5', inputUsdPerMillion: 1.25, outputUsdPerMillion: 10.0 }, + { modelId: 'gpt-5.4-mini', inputUsdPerMillion: 0.25, outputUsdPerMillion: 2.0 } + ], + cursor: [] // empty list — Cursor ships no public rate +} + +describe('normalizeProviderRates', () => { + it('unwraps the ProviderRatesSnapshot baseline envelope', () => { + const snapshot = { + rateTableVersion: '2026-05-29', + baseline: { + codex: { + provider: 'codex', + pricingUrl: 'https://example', + models: [ + { + modelId: 'gpt-5.5', + inputUsdPerMillion: 1.25, + outputUsdPerMillion: 10.0, + cachedInputUsdPerMillion: 0.125, + sourceUrl: 'x', + lastVerified: '2026-05-29' + } + ] + }, + cursor: { provider: 'cursor', pricingUrl: '', models: [] } + } + } + const out = normalizeProviderRates(snapshot) + expect(out.codex).toEqual([ + { modelId: 'gpt-5.5', inputUsdPerMillion: 1.25, outputUsdPerMillion: 10.0 } + ]) + // Empty model lists are dropped entirely. + expect(out.cursor).toBeUndefined() + }) + + it('accepts an already-unwrapped table map', () => { + const out = normalizeProviderRates({ + grok: { models: [{ modelId: 'grok-build', inputUsdPerMillion: 1, outputUsdPerMillion: 2 }] } + }) + expect(out.grok).toEqual([ + { modelId: 'grok-build', inputUsdPerMillion: 1, outputUsdPerMillion: 2 } + ]) + }) + + it('returns {} for malformed / missing input and skips invalid entries', () => { + expect(normalizeProviderRates(null)).toEqual({}) + expect(normalizeProviderRates('nope')).toEqual({}) + expect(normalizeProviderRates(undefined)).toEqual({}) + // entry missing a numeric rate is skipped, leaving the provider absent + expect( + normalizeProviderRates({ + codex: { models: [{ modelId: 'x', inputUsdPerMillion: 'bad', outputUsdPerMillion: 1 }] } + }) + ).toEqual({}) + }) +}) + +describe('resolveModelRate', () => { + it('matches exactly, then by prefix, then falls back to the first model', () => { + expect(resolveModelRate(RATES, 'codex', 'gpt-5.5')?.modelId).toBe('gpt-5.5') + // dated suffix resolves to the base entry via prefix match + expect(resolveModelRate(RATES, 'codex', 'gpt-5.5-2026-06-01')?.modelId).toBe('gpt-5.5') + // unknown model on a known provider falls back to first listed + expect(resolveModelRate(RATES, 'codex', 'totally-unknown')?.modelId).toBe('gpt-5.5') + }) + + it('returns null for unknown provider or empty rate list', () => { + expect(resolveModelRate(RATES, undefined, 'gpt-5.5')).toBeNull() + expect(resolveModelRate(RATES, 'cursor', 'composer-2.5')).toBeNull() + expect(resolveModelRate(RATES, 'gemini', 'gemini-3.1-pro')).toBeNull() + }) +}) + +describe('estimateRunCostUsd', () => { + it('projects input+output tokens at the per-million rate', () => { + // 1,000,000 in * $1.25/M + 500,000 out * $10/M = 1.25 + 5.00 = 6.25 + const usd = estimateRunCostUsd(RATES, 'codex', 'gpt-5.5', 1_000_000, 500_000) + expect(usd).toBeCloseTo(6.25, 6) + }) + + it('uses the resolved (prefix/fallback) model rate', () => { + // unknown model → falls back to gpt-5.5 rate + const usd = estimateRunCostUsd(RATES, 'codex', 'mystery', 100_000, 0) + expect(usd).toBeCloseTo(0.125, 6) + }) + + it('returns 0 when provider/model cannot be resolved', () => { + expect(estimateRunCostUsd(RATES, 'cursor', 'composer-2.5', 100_000, 100_000)).toBe(0) + expect(estimateRunCostUsd(RATES, undefined, 'x', 100_000, 100_000)).toBe(0) + }) + + it('returns 0 when there are no tokens', () => { + expect(estimateRunCostUsd(RATES, 'codex', 'gpt-5.5', 0, 0)).toBe(0) + }) + + it('treats non-finite token counts as zero', () => { + expect(estimateRunCostUsd(RATES, 'codex', 'gpt-5.5', NaN, NaN)).toBe(0) + // one valid count still estimates + expect(estimateRunCostUsd(RATES, 'codex', 'gpt-5.5', 1_000_000, NaN)).toBeCloseTo(1.25, 6) + }) +}) diff --git a/src/renderer/src/lib/providerRateEstimate.ts b/src/renderer/src/lib/providerRateEstimate.ts new file mode 100644 index 00000000..04e5003d --- /dev/null +++ b/src/renderer/src/lib/providerRateEstimate.ts @@ -0,0 +1,163 @@ +/** + * 1.0.7 — Renderer-side token->USD ESTIMATOR for subscription/credit seats. + * + * Several providers bill via a flat subscription or a credit pool rather than + * per-token, so their run stats carry NO `cost_usd`: + * - Codex → ChatGPT subscription quota (Plus / Pro / Business) + * - Grok → SuperGrok subscription credits + * - Cursor → Cursor subscription (token-based but no clean public rate) + * + * For those seats the real spend is blank. This module maps summed input / + * output tokens to a PROJECTED API-equivalent USD figure using the per-model + * rate table that `ProviderRateService` exposes over the `providerRates:get` + * IPC (rates are USD per 1,000,000 tokens). + * + * **HONESTY GUARDRAILS** (the maintainer's explicit constraint): + * `ProviderRateService` self-documents these rates as PROJECTED API-equivalents + * — "what this run WOULD have cost on the API", not what was actually billed. + * Therefore: + * (a) callers MUST only estimate when there is no explicit `cost_usd`, and + * (b) an estimate MUST be badged as such (e.g. "~$0.0x est. API-equiv"), + * never rendered as a bare currency string that implies money spent. + * This module returns the raw USD number only; the badging lives in the + * display layer (see `runCompleteSummary.ts`). + * + * Kept PURE + dependency-free so it's exhaustively unit-testable without an + * IPC harness. The (impure) one-shot fetch helper is the only window-touching + * export and is trivially mockable. + */ + +import type { ProviderId } from '../../../main/store/types' + +/** + * Minimal renderer-side mirror of `ProviderRateService`'s `ModelRateEntry`. + * The preload types `getProviderRates()` as `unknown` (the concrete shape + * lives main-side), so we narrow to just the fields the estimator needs and + * stay defensive about everything else. + */ +export interface RendererModelRate { + modelId: string + inputUsdPerMillion: number + outputUsdPerMillion: number +} + +/** Per-provider rate table, keyed by provider id. Partial because a snapshot + * may omit providers (e.g. Cursor ships an empty model list). */ +export type RendererProviderRates = Partial> + +const isFiniteNonNeg = (value: unknown): value is number => + typeof value === 'number' && Number.isFinite(value) && value >= 0 + +/** + * Narrow the loosely-typed `providerRates:get` IPC payload into a + * {@link RendererProviderRates} map. Tolerant of the full + * `ProviderRatesSnapshot` envelope (`{ baseline: { : { models } } }`) + * as well as already-unwrapped shapes. Returns `{}` on anything unexpected so + * a malformed snapshot can never break the estimator or rendering. + */ +export function normalizeProviderRates(raw: unknown): RendererProviderRates { + if (!raw || typeof raw !== 'object') return {} + // The IPC returns the full snapshot; the per-provider tables live under + // `.baseline`. Fall back to treating `raw` itself as the table map for + // forward/backward compatibility. + const envelope = raw as Record + const tables = + envelope.baseline && typeof envelope.baseline === 'object' + ? (envelope.baseline as Record) + : envelope + const out: RendererProviderRates = {} + for (const [provider, table] of Object.entries(tables)) { + if (!table || typeof table !== 'object') continue + const models = (table as Record).models + if (!Array.isArray(models)) continue + const entries: RendererModelRate[] = [] + for (const model of models) { + if (!model || typeof model !== 'object') continue + const m = model as Record + if ( + typeof m.modelId === 'string' && + isFiniteNonNeg(m.inputUsdPerMillion) && + isFiniteNonNeg(m.outputUsdPerMillion) + ) { + entries.push({ + modelId: m.modelId, + inputUsdPerMillion: m.inputUsdPerMillion, + outputUsdPerMillion: m.outputUsdPerMillion + }) + } + } + if (entries.length > 0) out[provider as ProviderId] = entries + } + return out +} + +/** + * Resolve a rate entry for a (provider, model) pair. Matches the model id + * exactly first, then by case-insensitive prefix (CLIs sometimes report + * `gpt-5.5-2026-xx` where the table keys `gpt-5.5`), then falls back to the + * provider's first/cheapest-listed model so a known provider still yields a + * ballpark rather than nothing. Returns `null` when the provider is unknown + * or has no rates (e.g. Cursor's empty list). + */ +export function resolveModelRate( + rates: RendererProviderRates, + provider: ProviderId | undefined, + model: string | undefined +): RendererModelRate | null { + if (!provider) return null + const table = rates[provider] + if (!table || table.length === 0) return null + const wanted = (model || '').trim().toLowerCase() + if (wanted) { + const exact = table.find((r) => r.modelId.toLowerCase() === wanted) + if (exact) return exact + const prefix = table.find( + (r) => + wanted.startsWith(r.modelId.toLowerCase()) || r.modelId.toLowerCase().startsWith(wanted) + ) + if (prefix) return prefix + } + return table[0] +} + +/** + * Pure estimate: project the USD API-equivalent cost of `inputTokens` / + * `outputTokens` for one (provider, model) pair using the rate table. + * + * Returns `0` when the provider/model can't be resolved or both token counts + * are zero — callers treat `<= 0` as "no estimate available" and render + * nothing rather than a misleading `$0.00`. + */ +export function estimateRunCostUsd( + rates: RendererProviderRates, + provider: ProviderId | undefined, + model: string | undefined, + inputTokens: number, + outputTokens: number +): number { + const rate = resolveModelRate(rates, provider, model) + if (!rate) return 0 + const inTok = isFiniteNonNeg(inputTokens) ? inputTokens : 0 + const outTok = isFiniteNonNeg(outputTokens) ? outputTokens : 0 + if (inTok === 0 && outTok === 0) return 0 + const usd = + (inTok / 1_000_000) * rate.inputUsdPerMillion + (outTok / 1_000_000) * rate.outputUsdPerMillion + return Number.isFinite(usd) && usd > 0 ? usd : 0 +} + +/** + * One-shot fetch of the provider rate snapshot over the existing + * `providerRates:get` IPC, narrowed to {@link RendererProviderRates}. The only + * impure export. Returns `{}` (never throws) when the API is unavailable or + * the call fails, so the estimator degrades to "no estimate" gracefully. + */ +export async function fetchProviderRates(): Promise { + try { + const api = (globalThis as { api?: { getProviderRates?: () => Promise } }).api + if (typeof api?.getProviderRates !== 'function') return {} + const raw = await api.getProviderRates() + return normalizeProviderRates(raw) + } catch { + return {} + } +} diff --git a/src/renderer/src/lib/runCompleteSummary.test.ts b/src/renderer/src/lib/runCompleteSummary.test.ts index 8694f2ff..aa1b9fd3 100644 --- a/src/renderer/src/lib/runCompleteSummary.test.ts +++ b/src/renderer/src/lib/runCompleteSummary.test.ts @@ -1,6 +1,13 @@ import { describe, expect, it } from 'vitest' -import type { ChatRecord } from '../../../main/store/types' -import { buildEnsembleRoundSummaryRows, buildRoundOutcomeRows } from './runCompleteSummary' +import type { ChatRecord, ChatRun } from '../../../main/store/types' +import { + buildEnsembleRoundCostRow, + buildEnsembleRoundSummaryRows, + buildEscalationChips, + buildRoundOutcomeRows +} from './runCompleteSummary' +import type { ComplexityEscalationSignal } from '../../../main/store/types' +import type { RendererProviderRates } from './providerRateEstimate' // activeRound.participants is all buildRoundOutcomeRows reads; a partial cast // keeps the fixture focused on status outcomes. @@ -68,4 +75,219 @@ describe('buildEnsembleRoundSummaryRows', () => { expect(labels).toContain('Skipped') expect(labels.indexOf('Contributed')).toBe(labels.indexOf('Status') + 1) }) + + it('labels the round wall-clock row "Latency" (not "Duration")', () => { + const chat = { + chatKind: 'ensemble', + runs: [], + ensemble: { + activeRound: { + roundId: 'r1', + participants: [], + startedAt: '2026-06-04T10:00:00.000Z', + endedAt: '2026-06-04T10:00:30.000Z' + } + } + } as unknown as ChatRecord + const labels = buildEnsembleRoundSummaryRows(chat, false).map((r) => r.label) + expect(labels).toContain('Latency') + expect(labels).not.toContain('Duration') + const latency = buildEnsembleRoundSummaryRows(chat, false).find((r) => r.label === 'Latency') + expect(latency?.value).toBe('30s') + }) + + it('threads cost options through to a Cost row', () => { + const chat = { + chatKind: 'ensemble', + runs: [run({ provider: 'claude', stats: { cost_usd: 0.5 } })], + ensemble: { activeRound: { roundId: 'r1', participants: [] } } + } as unknown as ChatRecord + const cost = buildEnsembleRoundSummaryRows(chat, false, { currency: 'USD' }).find( + (r) => r.label === 'Cost' + ) + expect(cost?.value).toBe('$0.50') + }) +}) + +// A run belonging to round r1, with overridable provider/model/stats. Only the +// fields the cost row reads matter; cast keeps the fixture focused. +function run(partial: Partial): ChatRun { + return { + runId: Math.random().toString(36).slice(2), + ensembleRoundId: 'r1', + ...partial + } as ChatRun +} + +const ESTIMATE_RATES: RendererProviderRates = { + codex: [{ modelId: 'gpt-5.5', inputUsdPerMillion: 1.25, outputUsdPerMillion: 10.0 }], + cursor: [] +} + +describe('buildEnsembleRoundCostRow', () => { + it('sums explicit cost_usd across runs into a plain currency string', () => { + const row = buildEnsembleRoundCostRow( + [run({ provider: 'claude', stats: { cost_usd: 0.5 } }), run({ stats: { cost_usd: 0.25 } })], + { currency: 'USD' } + ) + expect(row).toEqual({ label: 'Cost', value: '$0.75' }) + }) + + it('returns null when there is no real cost AND no estimate', () => { + // Codex run with no cost_usd and no rate table → nothing to show. + expect(buildEnsembleRoundCostRow([run({ provider: 'codex', stats: {} })], {})).toBeNull() + expect(buildEnsembleRoundCostRow([], {})).toBeNull() + }) + + it('projects a clearly-badged API-equivalent estimate for subscription seats', () => { + // Codex emits no cost_usd → estimate from tokens: + // 1,000,000 in * $1.25/M + 100,000 out * $10/M = 1.25 + 1.00 = $2.25 + const row = buildEnsembleRoundCostRow( + [ + run({ + provider: 'codex', + actualModel: 'gpt-5.5', + stats: { input_tokens: 1_000_000, output_tokens: 100_000 } + }) + ], + { currency: 'USD', providerRates: ESTIMATE_RATES } + ) + expect(row?.label).toBe('Cost') + // Badged with leading ~ AND the est. API-equiv qualifier — never a bare $. + expect(row?.value).toBe('~$2.25 est. API-equiv') + }) + + it('shows real + estimate together when seats are mixed, keeping the estimate badged', () => { + const row = buildEnsembleRoundCostRow( + [ + run({ provider: 'claude', stats: { cost_usd: 0.5 } }), + run({ + provider: 'codex', + actualModel: 'gpt-5.5', + stats: { input_tokens: 1_000_000, output_tokens: 0 } + }) + ], + { currency: 'USD', providerRates: ESTIMATE_RATES } + ) + // Real $0.50 + projected $1.25 (1M in * $1.25/M), estimate still badged. + expect(row?.value).toBe('$0.50 + ~$1.25 est. API-equiv') + }) + + it('never estimates a seat that already reported cost_usd', () => { + // Even with a rate table present + tokens, an explicit cost wins and no + // "est." badge appears. + const row = buildEnsembleRoundCostRow( + [ + run({ + provider: 'codex', + actualModel: 'gpt-5.5', + stats: { cost_usd: 3, input_tokens: 1_000_000, output_tokens: 1_000_000 } + }) + ], + { currency: 'USD', providerRates: ESTIMATE_RATES } + ) + expect(row?.value).toBe('$3.00') + expect(row?.value).not.toContain('est.') + }) + + it('does not estimate Cursor (empty rate list) and shows nothing for a pure subscription round with no rates', () => { + const row = buildEnsembleRoundCostRow( + [run({ provider: 'cursor', stats: { input_tokens: 500_000, output_tokens: 500_000 } })], + { currency: 'USD', providerRates: ESTIMATE_RATES } + ) + expect(row).toBeNull() + }) + + it('honours the display currency for the real-cost path', () => { + const row = buildEnsembleRoundCostRow([run({ provider: 'claude', stats: { cost_usd: 1 } })], { + currency: 'GBP' + }) + // £ symbol present (GBP); exact figure depends on the FX table. + expect(row?.value).toMatch(/£/) + }) +}) + +function sig(partial: Partial): ComplexityEscalationSignal { + return { + id: partial.id || Math.random().toString(36).slice(2), + chatId: 'c1', + roundId: 'r1', + kind: 'stuck', + evidence: 'because', + recommendedAction: 'pause-for-user', + createdAt: '2026-06-04T10:00:00.000Z', + ...partial + } +} + +function chatWithSignals( + signals: ComplexityEscalationSignal[], + roundId: string | null = 'r1' +): ChatRecord { + return { + chatKind: 'ensemble', + runs: [], + ensemble: { + ...(roundId ? { activeRound: { roundId, participants: [] } } : {}), + escalationSignals: signals + } + } as unknown as ChatRecord +} + +describe('buildEscalationChips', () => { + it('returns [] when there is no active round or no signals', () => { + expect(buildEscalationChips(null)).toEqual([]) + expect(buildEscalationChips(chatWithSignals([], null))).toEqual([]) + expect(buildEscalationChips(chatWithSignals([sig({ kind: 'stuck' })], null))).toEqual([]) + expect(buildEscalationChips(chatWithParticipants([]))).toEqual([]) + }) + + it('maps kind + recommendedAction to label/action/tone for the current round', () => { + const chips = buildEscalationChips( + chatWithSignals([ + sig({ id: 's1', kind: 'disagreement-unresolved', recommendedAction: 'call-synthesizer' }) + ]) + ) + expect(chips).toEqual([ + { + id: 's1', + label: 'Unreconciled answers', + action: 'Add a synthesizer to reconcile the answers.', + tone: 'info' + } + ]) + }) + + it('marks failure-shaped signals as attention tone', () => { + const chips = buildEscalationChips( + chatWithSignals([ + sig({ id: 's1', kind: 'tool-error-cluster', recommendedAction: 'pause-for-user' }) + ]) + ) + expect(chips[0].tone).toBe('attention') + expect(chips[0].label).toBe('Tool errors clustered') + }) + + it('only surfaces signals for the active round and de-dups by kind', () => { + const chips = buildEscalationChips( + chatWithSignals([ + sig({ id: 'old', kind: 'stuck', roundId: 'r0' }), // previous round — excluded + sig({ id: 'a', kind: 'stuck' }), + sig({ id: 'b', kind: 'stuck' }) // dup kind — collapsed + ]) + ) + expect(chips).toHaveLength(1) + expect(chips[0].id).toBe('a') + }) + + it('never frames panel size as waste — copy leans into the panel', () => { + const chips = buildEscalationChips( + chatWithSignals([ + sig({ id: '1', kind: 'disagreement-unresolved', recommendedAction: 'call-synthesizer' }), + sig({ id: '2', kind: 'looping', recommendedAction: 'extend-rounds' }) + ]) + ) + const allCopy = chips.map((c) => `${c.label} ${c.action}`).join(' ') + expect(allCopy.toLowerCase()).not.toMatch(/too many|waste|fewer seats|reduce/) + }) }) diff --git a/src/renderer/src/lib/runCompleteSummary.ts b/src/renderer/src/lib/runCompleteSummary.ts index c6cd7db8..523672f1 100644 --- a/src/renderer/src/lib/runCompleteSummary.ts +++ b/src/renderer/src/lib/runCompleteSummary.ts @@ -1,12 +1,115 @@ -import type { ChatRecord, ChatRun, EnsembleRoundParticipantState } from '../../../main/store/types' +import type { + ChatRecord, + ChatRun, + ComplexityEscalationAction, + ComplexityEscalationKind, + ComplexityEscalationSignal, + EnsembleRoundParticipantState +} from '../../../main/store/types' import { formatContextTokens } from './contextWindows' -import { extractUsageCount, extractUsageCountsFromCandidate } from './usageStats' +import { formatCostAlwaysOn, type DisplayCurrency } from './formatCost' +import { estimateRunCostUsd, type RendererProviderRates } from './providerRateEstimate' +import { + extractUsageCount, + extractUsageCostUsd, + extractUsageCountsFromCandidate +} from './usageStats' export type RunCompleteSummaryRow = { label: string value: string } +/** + * 1.0.7 — Cost-display inputs threaded into the (otherwise pure) ensemble + * round summary builder so it can render a currency-aware Cost row + a + * projected token->USD estimate for subscription/credit seats. All optional: + * omitting them reproduces the pre-1.0.7 behaviour (no Cost row). + * + * - `currency` / `overestimatePercent`: the user's Settings → General + * preferences, already plumbed to the transcript. + * - `providerRates`: the per-provider rate table from the `providerRates:get` + * IPC (USD per 1M tokens). Used ONLY to estimate seats that emit no + * `cost_usd`. Absent/empty → no estimate, just real cost (which may be + * blank for subscription seats). + */ +export type EnsembleRoundSummaryCostOptions = { + currency?: DisplayCurrency + overestimatePercent?: number + providerRates?: RendererProviderRates +} + +/** + * 1.0.7 — Build the Cost row for a finished ensemble round, kept PURE so the + * estimator math + honesty badging are exhaustively testable. + * + * Two USD figures are accumulated separately across the round's runs: + * - `realUsd`: the sum of explicit `cost_usd` the provider actually + * reported (per-token API seats: Claude / Gemini / Kimi). + * - `estUsd`: a PROJECTED API-equivalent for runs that reported NO + * `cost_usd` (subscription / credit seats: Codex / Grok / Cursor), + * derived from summed tokens × the provider rate table. + * + * HONESTY GUARDRAILS (the maintainer's explicit constraint — + * ProviderRateService self-documents its rates as projected, not billed): + * (a) a run is only estimated when it has no explicit cost_usd, and + * (b) any estimated component is badged "est. API-equiv" (with a leading + * "~" on a fully-estimated row), NEVER rendered as a bare currency + * string that implies money was spent. + * + * Returns `null` when there's nothing to show (no real cost AND no estimate) + * so the caller omits the row entirely rather than printing a misleading + * `$0.00`. When only real cost exists it's a plain currency string; when only + * an estimate exists it's `~ est. API-equiv`; a mix shows both. + */ +export const buildEnsembleRoundCostRow = ( + roundRuns: ChatRun[], + options: EnsembleRoundSummaryCostOptions +): RunCompleteSummaryRow | null => { + const currency: DisplayCurrency = options.currency || 'USD' + const overestimate = options.overestimatePercent ?? 0 + const rates = options.providerRates || {} + + let realUsd = 0 + let estUsd = 0 + for (const run of roundRuns) { + const explicit = extractUsageCostUsd(run.stats) + if (explicit > 0) { + // Per-token seat reported real spend — never override with an estimate. + realUsd += explicit + continue + } + // No explicit cost (subscription / credit seat) — project from tokens. + const counts = extractUsageCountsFromCandidate(run.stats) + const model = run.actualModel || run.requestedModel + estUsd += estimateRunCostUsd( + rates, + run.provider, + model, + counts.inputTokens, + counts.outputTokens + ) + } + + if (realUsd <= 0 && estUsd <= 0) return null + + if (estUsd <= 0) { + // Pure real cost — plain currency string. + return { label: 'Cost', value: formatCostAlwaysOn(realUsd, currency, undefined, overestimate) } + } + + const estText = `~${formatCostAlwaysOn(estUsd, currency, undefined, overestimate)} est. API-equiv` + if (realUsd <= 0) { + // Pure estimate — badge it unmistakably as projected, not billed. + return { label: 'Cost', value: estText } + } + // Mix of real + estimated seats — show both, keep the estimate badged. + return { + label: 'Cost', + value: `${formatCostAlwaysOn(realUsd, currency, undefined, overestimate)} + ${estText}` + } +} + export const formatWorkDuration = (startedAt?: string, completedAt?: string): string | null => { if (!startedAt || !completedAt) { return null @@ -152,7 +255,8 @@ export const buildRoundOutcomeRows = (chat: ChatRecord | null): RunCompleteSumma */ export const buildEnsembleRoundSummaryRows = ( chat: ChatRecord | null, - cancelled: boolean + cancelled: boolean, + costOptions: EnsembleRoundSummaryCostOptions = {} ): RunCompleteSummaryRow[] => { const round = chat?.ensemble?.activeRound if (!round) return [] @@ -195,12 +299,15 @@ export const buildEnsembleRoundSummaryRows = ( // panel's round-close "who passed / skipped / failed" ask. rows.push(...buildRoundOutcomeRows(chat)) - // Round-envelope duration. + // Round-envelope wall-clock — the time the user actually waited for the + // round to close. Labelled "Latency" (its own distinct row) so it reads + // clearly alongside the Cost row below; this is end-to-end round latency, + // not summed per-participant compute time. const startedAtMs = round.startedAt ? new Date(round.startedAt).getTime() : NaN const endedAtMs = round.endedAt ? new Date(round.endedAt).getTime() : Date.now() if (Number.isFinite(startedAtMs) && endedAtMs > startedAtMs) { rows.push({ - label: 'Duration', + label: 'Latency', value: formatCompactDurationMs(endedAtMs - startedAtMs) }) } @@ -223,5 +330,85 @@ export const buildEnsembleRoundSummaryRows = ( rows.push({ label: 'Total', value: `${formatContextTokens(totalTokens)} tokens` }) } + // Cost — real provider-reported spend plus a clearly-badged projected + // API-equivalent for subscription/credit seats that emit no cost_usd. + const costRow = buildEnsembleRoundCostRow(roundRuns, costOptions) + if (costRow) rows.push(costRow) + return rows } + +/** + * 1.0.7 (M5 surfacing) — presentation model for one complexity-escalation + * signal. The orchestrator already computes + persists these every round + * (`chat.ensemble.escalationSignals`); they were dark-shipped and rendered + * nowhere. This is a PURE mapping from the stored signal to the copy an + * advisory chip renders — short label + a one-line recommended next step. + * + * FRAMING (the maintainer's explicit constraint): these are advisory only — + * the orchestrator never auto-acts. The copy makes a tradeoff VISIBLE so the + * user decides; it must never frame a multi-seat panel as waste. Note the + * recommended action for `disagreement-unresolved` is to ADD a synthesizer to + * reconcile — i.e. lean INTO the panel, not shrink it. + */ +export type EscalationChipModel = { + id: string + /** Short human label for the signal kind. */ + label: string + /** One-line recommended next step (advisory). */ + action: string + /** Coarse tone for styling — failures read warmer than advisories. */ + tone: 'attention' | 'info' +} + +const ESCALATION_KIND_LABEL: Record = { + stuck: 'Round stalled', + looping: 'Handoffs exhausted', + 'disagreement-unresolved': 'Unreconciled answers', + 'tool-error-cluster': 'Tool errors clustered' +} + +const ESCALATION_KIND_TONE: Record = { + stuck: 'attention', + looping: 'info', + 'disagreement-unresolved': 'info', + 'tool-error-cluster': 'attention' +} + +const ESCALATION_ACTION_COPY: Record = { + // Lean into the panel — never frame more seats as waste. + 'extend-rounds': 'Consider another round to converge.', + 'call-synthesizer': 'Add a synthesizer to reconcile the answers.', + 'pause-for-user': 'Your input would help unblock this.' +} + +/** + * Map the signals persisted on a chat's ensemble state to chip view-models + * for the CURRENT round only (signals carry their originating `roundId`). + * De-duplicates by signal kind (the orchestrator already uses deterministic + * `${roundId}-esc-${kind}` ids, but a defensive de-dup keeps the chip strip + * tidy). Returns [] when there's no active round or no signals — the caller + * renders nothing. + * + * Pure + side-effect-free so the kind/action copy + filtering are unit-tested + * without a render harness. + */ +export const buildEscalationChips = (chat: ChatRecord | null): EscalationChipModel[] => { + const round = chat?.ensemble?.activeRound + const signals = chat?.ensemble?.escalationSignals + if (!round || !signals || signals.length === 0) return [] + const seenKinds = new Set() + const chips: EscalationChipModel[] = [] + for (const signal of signals as ComplexityEscalationSignal[]) { + if (signal.roundId !== round.roundId) continue + if (seenKinds.has(signal.kind)) continue + seenKinds.add(signal.kind) + chips.push({ + id: signal.id, + label: ESCALATION_KIND_LABEL[signal.kind] || signal.kind, + action: ESCALATION_ACTION_COPY[signal.recommendedAction] || '', + tone: ESCALATION_KIND_TONE[signal.kind] || 'info' + }) + } + return chips +} From 9f82ff5f651605dba41ffd522da59be4520c81ab Mon Sep 17 00:00:00 2001 From: Chris Izatt Date: Thu, 4 Jun 2026 11:38:34 +0100 Subject: [PATCH 11/33] fix(store): deleteChat now cleans up its run-event ledger + artifacts Order 2 from the dogfood triage. deleteChat only unlinked the chat JSON, orphaning that chat's run-event .jsonl + run-artifacts on disk forever. Now it also removes them, derived strictly from the chat's OWN runIds via the deterministic safeRunEventFileName mapping (never a directory scan), so a sibling chat's prefix-similar run files can't be caught. Best-effort + non-fatal (missing files ignored). 3 tests incl. the sibling-untouched anti-footgun case. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/main/AppStoreDeleteChat.test.ts | 104 ++++++++++++++++++++++++++++ src/main/store/index.ts | 43 ++++++++++++ 2 files changed, 147 insertions(+) create mode 100644 src/main/AppStoreDeleteChat.test.ts diff --git a/src/main/AppStoreDeleteChat.test.ts b/src/main/AppStoreDeleteChat.test.ts new file mode 100644 index 00000000..8fa3b70b --- /dev/null +++ b/src/main/AppStoreDeleteChat.test.ts @@ -0,0 +1,104 @@ +import { beforeEach, describe, expect, it, vi } from 'vitest' +import fs from 'fs' +import { join } from 'path' +import { AppStore } from './store' +import type { ChatRecord, ChatRun } from './store/types' + +const userDataPath = vi.hoisted(() => `/tmp/agentbench-delete-chat-test-${process.pid}`) + +vi.mock('electron', () => ({ + app: { + getPath: () => userDataPath + } +})) + +const runEventPath = (runId: string): string => join(userDataPath, 'run-events', `${runId}.jsonl`) +const artifactDir = (runId: string): string => join(userDataPath, 'run-artifacts', runId) + +function makeRun(runId: string): ChatRun { + return { runId, startedAt: '2026-05-08T00:00:00.000Z' } +} + +function seedRunFiles(runId: string): void { + fs.mkdirSync(join(userDataPath, 'run-events'), { recursive: true }) + fs.writeFileSync(runEventPath(runId), `{"runId":"${runId}"}\n`, 'utf8') + fs.mkdirSync(artifactDir(runId), { recursive: true }) + fs.writeFileSync(join(artifactDir(runId), 'stdout.log'), 'stream\n', 'utf8') +} + +function saveChatWithRuns(appChatId: string, runs: ChatRun[]): ChatRecord { + const chat: ChatRecord = { + appChatId, + scope: 'workspace', + chatKind: 'single', + provider: 'gemini', + title: appChatId, + workspaceId: 'workspace-1', + workspacePath: '/repo', + createdAt: 1, + updatedAt: 1, + archived: false, + messages: [], + runs + } + AppStore.saveChat(chat) + return chat +} + +describe('AppStore.deleteChat run cleanup', () => { + beforeEach(() => { + fs.rmSync(userDataPath, { recursive: true, force: true }) + fs.mkdirSync(join(userDataPath, 'chats'), { recursive: true }) + }) + + it('removes the deleted chat run-event files and artifacts', () => { + saveChatWithRuns('chat-a', [makeRun('run-1'), makeRun('run-2')]) + seedRunFiles('run-1') + seedRunFiles('run-2') + + expect(fs.existsSync(runEventPath('run-1'))).toBe(true) + expect(fs.existsSync(artifactDir('run-1'))).toBe(true) + + AppStore.deleteChat('chat-a') + + // Chat JSON gone (behaviour preserved). + expect(fs.existsSync(join(userDataPath, 'chats', 'chat-a.json'))).toBe(false) + // Both runs' forensic files removed. + expect(fs.existsSync(runEventPath('run-1'))).toBe(false) + expect(fs.existsSync(artifactDir('run-1'))).toBe(false) + expect(fs.existsSync(runEventPath('run-2'))).toBe(false) + expect(fs.existsSync(artifactDir('run-2'))).toBe(false) + }) + + it('leaves a sibling chat with a prefix-similar run id untouched', () => { + // chat-a owns `run-1`; sibling chat-b owns `run-1-extra` whose id has + // `run-1` as a string prefix. A prefix/readdir-based delete would wrongly + // catch the sibling's files; an exact-name delete must not. + saveChatWithRuns('chat-a', [makeRun('run-1')]) + saveChatWithRuns('chat-b', [makeRun('run-1-extra')]) + seedRunFiles('run-1') + seedRunFiles('run-1-extra') + + AppStore.deleteChat('chat-a') + + // Deleted chat's run is gone... + expect(fs.existsSync(runEventPath('run-1'))).toBe(false) + expect(fs.existsSync(artifactDir('run-1'))).toBe(false) + // ...but the sibling's prefix-similar run is fully intact. + expect(fs.existsSync(runEventPath('run-1-extra'))).toBe(true) + expect(fs.existsSync(artifactDir('run-1-extra'))).toBe(true) + expect(fs.existsSync(join(userDataPath, 'chats', 'chat-b.json'))).toBe(true) + }) + + it('succeeds when a run-event file is already missing', () => { + // run-1 has files, run-2 was never persisted (missing on disk). + saveChatWithRuns('chat-a', [makeRun('run-1'), makeRun('run-2')]) + seedRunFiles('run-1') + expect(fs.existsSync(runEventPath('run-2'))).toBe(false) + + expect(() => AppStore.deleteChat('chat-a')).not.toThrow() + + expect(fs.existsSync(runEventPath('run-1'))).toBe(false) + expect(fs.existsSync(join(userDataPath, 'chats', 'chat-a.json'))).toBe(false) + }) +}) diff --git a/src/main/store/index.ts b/src/main/store/index.ts index 66db3704..e00fa9cb 100644 --- a/src/main/store/index.ts +++ b/src/main/store/index.ts @@ -299,6 +299,36 @@ function runEventFilePath(runId: string): string { return path.join(runEventsDir, safeRunEventFileName(runId)) } +// Per-run artifact directory. Mirrors the path derivation in +// appendRunStreamArtifact (the `.jsonl`-stripped run file name is used as a +// dedicated directory holding stdout/stderr/stdin .log files for the run), so +// every artifact for a given runId lives under exactly this path. Deriving it +// from `safeRunEventFileName` keeps deletion in lockstep with creation. +function runArtifactDirPath(runId: string): string { + return path.join(runArtifactsDir, safeRunEventFileName(runId).replace(/\.jsonl$/, '')) +} + +// Best-effort, non-fatal cleanup of one run's on-disk forensic data: its +// run-event `.jsonl` ledger and its artifact directory. Each removal is mapped +// from a KNOWN runId via the deterministic safeRunEventFileName transform — we +// never readdir-and-match-by-prefix, so a sibling run whose id is a prefix of +// this one (e.g. `run-1` vs `run-1-extra`) can never be caught: the targets are +// exact file/dir names (`run-1.jsonl` ≠ `run-1-extra.jsonl`). Missing files are +// ignored so a partially-written run cannot abort the chat deletion. +function deleteRunForensicFiles(runId: string): void { + if (!runId) return + try { + fs.rmSync(runEventFilePath(runId), { force: true }) + } catch (e) { + console.error(`Failed to delete run-event file for run ${runId}`, e) + } + try { + fs.rmSync(runArtifactDirPath(runId), { recursive: true, force: true }) + } catch (e) { + console.error(`Failed to delete run artifacts for run ${runId}`, e) + } +} + function readRunEventFile(filePath: string): RunEventRecord[] { try { if (!fs.existsSync(filePath)) return [] @@ -897,6 +927,19 @@ export class AppStore { } static deleteChat(chatId: string) { + // Read the chat's KNOWN runs before unlinking so we can clean up its + // per-run forensic files (run-event ledger + artifacts) that would + // otherwise be orphaned on disk forever. Derived purely from this chat's + // own runIds (never a directory scan), so a sibling chat's similar/prefixed + // run files are guaranteed untouched. All cleanup is best-effort. + const chat = this.getChat(chatId) + const runs = Array.isArray(chat?.runs) ? chat.runs : [] + for (const run of runs) { + if (run && typeof run.runId === 'string') { + deleteRunForensicFiles(run.runId) + } + } + const chatPath = path.join(chatsDir, `${chatId}.json`) if (fs.existsSync(chatPath)) { fs.unlinkSync(chatPath) From f177987746c6168158107fedd00d430255df194d Mon Sep 17 00:00:00 2001 From: Chris Izatt Date: Thu, 4 Jun 2026 11:49:18 +0100 Subject: [PATCH 12/33] feat(approvals): optional intent note on approval decisions Order 4 from the dogfood triage. The approval card gains an optional one-line 'why?' note; it rides the EXISTING approval-ledger metadata channel (extraMetadata -> record.metadata.intentNote, no schema migration), is re-trimmed + capped at the untrusted IPC boundary, and surfaces read-only in the Approval Ledger panel. Always optional (never gates approve/deny), cleared between queued approvals. Surfacing the note to agents/participants is deliberately NOT done (prompt-injection risk). +1 AuditService test. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/main/index.ts | 14 ++++++-- src/main/services/AuditService.test.ts | 11 +++++++ src/preload/index.d.ts | 6 +++- src/preload/index.ts | 7 ++-- src/renderer/src/App.tsx | 32 ++++++++++++++++++- .../css/03-composer-welcome-activity.css | 24 ++++++++++++++ .../src/components/ApprovalLedgerPanel.tsx | 16 ++++++++++ 7 files changed, 104 insertions(+), 6 deletions(-) diff --git a/src/main/index.ts b/src/main/index.ts index ac832165..c5c53f99 100644 --- a/src/main/index.ts +++ b/src/main/index.ts @@ -16717,7 +16717,17 @@ if (isGeminiMcpBridgeProcess) { ipcMain.handle( 'respond-agent-approval', - async (_, requestId: string, action: AgentApprovalAction) => { + async (_, requestId: string, action: AgentApprovalAction, intentNote?: string) => { + // Order-4 — optional one-line "why" note captured in the + // approval card. Trim + cap defensively (the renderer already + // trims, but the IPC boundary is untrusted) and ride it on the + // existing ledger metadata channel as `intentNote`. Empty stays + // off the metadata entirely so we never persist a blank note. + const trimmedIntentNote = + typeof intentNote === 'string' ? intentNote.trim().slice(0, 280) : '' + const resolveOptions = trimmedIntentNote + ? { extraMetadata: { intentNote: trimmedIntentNote } } + : undefined // Slice 5 v2 of the external-path-redesign arc. When the user // clicks "Grant read access" / "Grant edit access" in an // external-path approval modal, peek at the pending approval's stashed @@ -16769,7 +16779,7 @@ if (isGeminiMcpBridgeProcess) { } } } - return approvalServiceInstance.resolve(requestId, action) + return approvalServiceInstance.resolve(requestId, action, resolveOptions) } ) diff --git a/src/main/services/AuditService.test.ts b/src/main/services/AuditService.test.ts index 1eafc50b..cf064167 100644 --- a/src/main/services/AuditService.test.ts +++ b/src/main/services/AuditService.test.ts @@ -61,6 +61,17 @@ describe('AuditService', () => { ) }) + it('threads an optional intent note through extraMetadata (Order-4)', () => { + const { deps } = makeDeps() + const service = new AuditService(deps) + service.resolveApprovalLedgerResponse('approval-1', 'accept', 'user', { + intentNote: 'reviewed the diff, safe' + }) + expect(deps.resolveApprovalResponse).toHaveBeenCalledWith('approval-1', 'accept', 'user', { + intentNote: 'reviewed the diff, safe' + }) + }) + it('does not throw when ledger response resolution fails', () => { const error = new Error('ledger unavailable') const { deps, errors } = makeDeps({ diff --git a/src/preload/index.d.ts b/src/preload/index.d.ts index 9b8c54c6..d4145fe0 100644 --- a/src/preload/index.d.ts +++ b/src/preload/index.d.ts @@ -363,7 +363,11 @@ declare global { numTurns?: number ) => Promise startAgentReview: (provider: ProviderId, threadId: string, params?: any) => Promise - respondAgentApproval: (requestId: string, action: AgentApprovalAction) => Promise + respondAgentApproval: ( + requestId: string, + action: AgentApprovalAction, + intentNote?: string + ) => Promise writeGeminiInput: (data: string) => Promise getDiff: (workspace: string) => Promise<{ type: 'not_repo' | 'no_changes' | 'changes' | 'error' diff --git a/src/preload/index.ts b/src/preload/index.ts index eb641f35..9dbc566d 100644 --- a/src/preload/index.ts +++ b/src/preload/index.ts @@ -177,8 +177,11 @@ const api = { | 'cancel' | 'grantExternalPathRead' | 'grantExternalPathEdit' - | 'declineExternalPath' - ) => ipcRenderer.invoke('respond-agent-approval', requestId, action), + | 'declineExternalPath', + // Order-4 — optional one-line "why" note. Persisted onto the + // approval-ledger row's metadata; never required. + intentNote?: string + ) => ipcRenderer.invoke('respond-agent-approval', requestId, action, intentNote), writeGeminiInput: (data: string) => ipcRenderer.invoke('write-gemini-input', data), getDiff: (workspace: string) => ipcRenderer.invoke('get-diff', workspace), openWorkspacePopout: (input: { kind: 'file-editor' | 'diff-studio'; workspacePath: string }) => diff --git a/src/renderer/src/App.tsx b/src/renderer/src/App.tsx index 23dc0c39..23400205 100644 --- a/src/renderer/src/App.tsx +++ b/src/renderer/src/App.tsx @@ -5412,6 +5412,13 @@ function App(): React.JSX.Element { const [pendingApprovalQueueByChatId, setPendingApprovalQueueByChatId] = useState< Record >({}) + // Order-4 — optional one-line "why" note the user can attach to an + // approval decision. Rides the existing approval-ledger metadata + // channel as `{ intentNote }` (no schema migration). At most one + // approval card is on screen at a time (the queue head), so a single + // string suffices; it's cleared whenever an approval resolves so the + // next queued request starts blank. + const [intentNote, setIntentNote] = useState('') const [isSendConfirming, setIsSendConfirming] = useState(false) // 1.0.6-EW66-1d — Create-PR state is now keyed by workspace PATH // so the primary workspace and each WRITE-access additional @@ -13474,8 +13481,13 @@ function App(): React.JSX.Element { } const handleAgentApprovalAction = async (requestId: string, action: AgentApprovalAction) => { + // Order-4 — capture the optional intent note (trimmed) at decision + // time and pass it down to the IPC, which stamps it onto the ledger + // row's metadata. Empty stays undefined so we never persist a blank + // note. Always optional — never gates the decision. + const noteForDecision = intentNote.trim() || undefined try { - await window.api.respondAgentApproval(requestId, action) + await window.api.respondAgentApproval(requestId, action, noteForDecision) setRawLogs((prev) => [ ...prev, { @@ -13505,6 +13517,9 @@ function App(): React.JSX.Element { // approval so this distinction didn't matter. const composerChatId = getCurrentComposerStateChatId() setPendingAgentApproval((prev) => (prev?.id === requestId ? null : prev)) + // Order-4 — reset the intent note so the next queued approval + // (or the next request entirely) starts with an empty field. + setIntentNote('') if (composerChatId) { advanceApprovalQueueForChat(composerChatId) } @@ -17799,6 +17814,21 @@ function App(): React.JSX.Element {
)} {renderAgentApprovalPreview(pendingAgentApproval.preview)} + {/* Order-4 — optional one-line intent note. Always + optional: it never blocks approve/deny. The text + is captured at click time in + `handleAgentApprovalAction` and persisted onto + the approval-ledger row's metadata as + `intentNote` (no schema migration). */} + setIntentNote(e.target.value)} + placeholder="why? (optional)" + aria-label="Optional note explaining this approval decision" + maxLength={280} + />
{(pendingAgentApproval.actions || ['accept']).includes('accept') && ( + {/* First-launch onboarding sheet re-opener. The sheet auto-shows on a fresh install and stays available @@ -16335,6 +16390,7 @@ function App(): React.JSX.Element { > ! +
@@ -19544,6 +19600,16 @@ function App(): React.JSX.Element { promptBubble={appearance.userBubbleColor || 'system'} ensembleSummary={bugReportEnsembleSummary} /> + {/* 1.0.4-AK2 — Work Session setup sheet. z-index 9130 sits above BugReportSheet (9120) since opening a Work Session is a deliberate intent action and shouldn't be obscured diff --git a/src/renderer/src/assets/css/08-theme-picker-overrides.css b/src/renderer/src/assets/css/08-theme-picker-overrides.css index 5372507b..9c4adac0 100644 --- a/src/renderer/src/assets/css/08-theme-picker-overrides.css +++ b/src/renderer/src/assets/css/08-theme-picker-overrides.css @@ -2798,6 +2798,168 @@ font-size: 0.85em; } +/* ── Changelog + Update Pill ───────────────────────────────────────── */ +.chat-corner-controls .chat-corner-update-pill { + width: auto; + min-width: 44px; + padding: 3px 9px; + color: rgba(245, 250, 255, 0.98); + background: + linear-gradient(135deg, color-mix(in srgb, var(--accent, #5a8cff) 58%, transparent), rgba(255, 255, 255, 0.12)), + rgba(255, 255, 255, 0.1); + border: 1px solid color-mix(in srgb, var(--accent, #5a8cff) 48%, transparent); +} +.chat-corner-update-pill-label { + display: inline-flex; + align-items: center; + justify-content: center; + white-space: nowrap; + font-size: 0.72rem; + font-weight: 700; + line-height: 1; +} +.chat-corner-controls .chat-corner-update-pill-downloading { + min-width: 36px; +} +.chat-corner-controls .chat-corner-update-pill-error { + background: + linear-gradient(135deg, rgba(220, 80, 80, 0.4), rgba(255, 255, 255, 0.1)), + rgba(255, 255, 255, 0.08); + border-color: rgba(255, 150, 150, 0.35); +} +.changelog-sheet-backdrop { + position: fixed; + inset: 0; + background: rgba(0, 0, 0, 0.5); + backdrop-filter: blur(8px); + -webkit-backdrop-filter: blur(8px); + display: flex; + align-items: center; + justify-content: center; + z-index: 9140; + padding: 32px 24px; +} +.changelog-sheet { + width: min(680px, 100%); + max-height: 90vh; + overflow: hidden; + background: var(--surface-1); + border: 1px solid var(--surface-border, rgba(255, 255, 255, 0.08)); + border-radius: 16px; + box-shadow: 0 32px 80px rgba(0, 0, 0, 0.42); + display: flex; + flex-direction: column; + gap: 16px; + padding: 24px 26px 20px; +} +.changelog-sheet-header { + display: flex; + justify-content: space-between; + align-items: flex-start; + gap: 12px; +} +.changelog-sheet-header-text { + display: flex; + align-items: center; + gap: 12px; + min-width: 0; +} +.changelog-sheet-glyph { + flex-shrink: 0; + width: 40px; + height: 40px; + border-radius: 50%; + display: inline-flex; + align-items: center; + justify-content: center; + font-size: 1.1em; + font-weight: 800; + font-family: var(--font-mono, ui-monospace, SFMono-Regular, Menlo, monospace); + background: color-mix(in srgb, var(--accent, #5a8cff) 16%, transparent); + color: color-mix(in srgb, var(--accent, #5a8cff) 78%, white); + border: 1px solid color-mix(in srgb, var(--accent, #5a8cff) 36%, transparent); +} +.changelog-sheet-header h2 { + margin: 0 0 4px; + font-size: 1.18em; + font-weight: 650; +} +.changelog-sheet-subtitle { + margin: 0; + color: var(--text-secondary); + font-size: 0.86em; +} +.changelog-sheet-close { + width: 30px; + height: 30px; + border-radius: 8px; + border: 0; + background: transparent; + color: var(--text-secondary); + cursor: pointer; + font-size: 1.25em; + line-height: 1; +} +.changelog-sheet-close:hover { + background: rgba(255, 255, 255, 0.08); + color: var(--text-primary); +} +.changelog-sheet-progress { + position: relative; + height: 18px; + border-radius: 999px; + overflow: hidden; + background: rgba(255, 255, 255, 0.06); + border: 1px solid rgba(255, 255, 255, 0.08); +} +.changelog-sheet-progress-fill { + position: absolute; + inset: 0 auto 0 0; + background: color-mix(in srgb, var(--accent, #5a8cff) 68%, transparent); + transition: width 180ms ease; +} +.changelog-sheet-progress span { + position: relative; + z-index: 1; + display: flex; + align-items: center; + justify-content: center; + height: 100%; + font-size: 0.76em; + font-weight: 700; +} +.changelog-sheet-error { + padding: 8px 10px; + border-radius: 8px; + background: rgba(220, 80, 80, 0.12); + color: rgba(255, 200, 200, 0.94); + font-size: 0.86em; +} +.changelog-sheet-notes { + min-height: 180px; + max-height: min(52vh, 520px); + overflow: auto; + border-radius: 10px; + border: 1px solid rgba(255, 255, 255, 0.07); + background: rgba(255, 255, 255, 0.03); + padding: 14px; +} +.changelog-sheet-notes pre { + margin: 0; + white-space: pre-wrap; + word-break: break-word; + color: var(--text-primary); + font: inherit; + line-height: 1.48; +} +.changelog-sheet-actions { + display: flex; + justify-content: flex-end; + align-items: center; + gap: 8px; + flex-wrap: wrap; +} + /* User-message collapse — long pasted briefs are truncated by default with a * "Show more" affordance under the bubble. Styling reuses the existing user * bubble; we only need the inner content wrapper, the gradient fade, and the diff --git a/src/renderer/src/components/ChangelogSheet.test.tsx b/src/renderer/src/components/ChangelogSheet.test.tsx new file mode 100644 index 00000000..0c997321 --- /dev/null +++ b/src/renderer/src/components/ChangelogSheet.test.tsx @@ -0,0 +1,165 @@ +import { renderToStaticMarkup } from 'react-dom/server' +import { describe, expect, it } from 'vitest' +import { ChangelogSheet, formatReleaseNotes, resolveChangelogEntry } from './ChangelogSheet' +import { UpdatePill } from './UpdatePill' +import type { ProductChangelogSnapshot } from '../../../main/store/types' +import type { UpdateStateSnapshot } from '../../../main/UpdateService' + +const changelogSnapshot: ProductChangelogSnapshot = { + currentVersion: '1.0.72', + lastSeenChangelogVersion: '1.0.71' +} + +describe('UpdatePill', () => { + it('stays hidden for quiet update states', () => { + const html = renderToStaticMarkup( + {}} + /> + ) + expect(html).toBe('') + }) + + it('renders an accent pill for available updates', () => { + const html = renderToStaticMarkup( + {}} + /> + ) + expect(html).toContain('chat-corner-update-pill-available') + expect(html).toContain('Update 1.0.73') + }) + + it('renders download progress for downloading updates', () => { + const html = renderToStaticMarkup( + {}} + /> + ) + expect(html).toContain('chat-corner-update-pill-downloading') + expect(html).toContain('42%') + }) +}) + +describe('ChangelogSheet', () => { + it('returns null when closed', () => { + const html = renderToStaticMarkup( + {}} + changelogSnapshot={changelogSnapshot} + updateSnapshot={null} + /> + ) + expect(html).toBe('') + }) + + it('shows release notes and download action for available updates', () => { + const updateSnapshot: UpdateStateSnapshot = { + status: 'available', + enabled: true, + channel: 'stable', + latestVersion: '1.0.73', + releaseName: 'AGBench 1.0.73', + releaseDate: '2026-06-04T12:00:00.000Z', + releaseNotes: 'Updater pill and changelog sheet.' + } + const html = renderToStaticMarkup( + {}} + changelogSnapshot={changelogSnapshot} + updateSnapshot={updateSnapshot} + onDownloadUpdate={() => {}} + /> + ) + expect(html).toContain('changelog-sheet-backdrop') + expect(html).toContain('AGBench 1.0.73') + expect(html).toContain('Updater pill and changelog sheet.') + expect(html).toContain('Download update') + }) + + it('shows restart action for downloaded updates', () => { + const html = renderToStaticMarkup( + {}} + changelogSnapshot={changelogSnapshot} + updateSnapshot={{ + status: 'downloaded', + enabled: true, + channel: 'stable', + latestVersion: '1.0.73', + releaseNotes: 'Ready.' + }} + onInstallUpdateNow={() => {}} + /> + ) + expect(html).toContain('Restart to install') + }) + + it('falls back to the bundled changelog when release notes are missing', () => { + const html = renderToStaticMarkup( + {}} + changelogSnapshot={changelogSnapshot} + updateSnapshot={null} + /> + ) + expect(html).toContain('Bundled changelog') + expect(html).toContain('AGBench') + }) + + it('formats full changelog arrays from electron-updater metadata', () => { + expect( + formatReleaseNotes([ + { version: '1.0.73', note: 'New update UI.' }, + { version: '1.0.72', note: null } + ]) + ).toBe('## 1.0.73\nNew update UI.') + }) + + it('prefers live update metadata over pending changelog snapshots', () => { + const entry = resolveChangelogEntry( + { + currentVersion: '1.0.72', + pendingUpdateChangelog: { + version: '1.0.72', + releaseNotes: 'Current app.' + } + }, + { + status: 'available', + enabled: true, + channel: 'stable', + latestVersion: '1.0.73', + releaseNotes: 'Available app.' + } + ) + expect(entry).toMatchObject({ + version: '1.0.73', + releaseNotes: 'Available app.' + }) + }) +}) diff --git a/src/renderer/src/components/ChangelogSheet.tsx b/src/renderer/src/components/ChangelogSheet.tsx new file mode 100644 index 00000000..3e494d6f --- /dev/null +++ b/src/renderer/src/components/ChangelogSheet.tsx @@ -0,0 +1,220 @@ +import React, { useCallback, useEffect, useMemo, useRef } from 'react' +import bundledChangelog from '../../../../CHANGELOG.md?raw' +import type { + ProductChangelogSnapshot, + ProductUpdateChangelog, + ProductUpdateReleaseNotes +} from '../../../main/store/types' +import type { UpdateStateSnapshot } from '../../../main/UpdateService' + +interface ChangelogSheetProps { + open: boolean + onDismiss: () => void + changelogSnapshot: ProductChangelogSnapshot | null + updateSnapshot: UpdateStateSnapshot | null + busy?: boolean + onCheckForUpdates?: () => Promise | unknown + onDownloadUpdate?: () => Promise | unknown + onInstallUpdateNow?: () => Promise | unknown +} + +const SHEET_TITLE_ID = 'changelog-sheet-title' + +export function ChangelogSheet({ + open, + onDismiss, + changelogSnapshot, + updateSnapshot, + busy = false, + onCheckForUpdates, + onDownloadUpdate, + onInstallUpdateNow +}: ChangelogSheetProps): React.JSX.Element | null { + const dismissRef = useRef(onDismiss) + useEffect(() => { + dismissRef.current = onDismiss + }, [onDismiss]) + + useEffect(() => { + if (!open) return + const onKeyDown = (event: KeyboardEvent): void => { + if (event.key === 'Escape') { + event.stopPropagation() + dismissRef.current() + } + } + window.addEventListener('keydown', onKeyDown, true) + return () => window.removeEventListener('keydown', onKeyDown, true) + }, [open]) + + const entry = useMemo( + () => resolveChangelogEntry(changelogSnapshot, updateSnapshot), + [changelogSnapshot, updateSnapshot] + ) + const releaseNotes = formatReleaseNotes(entry.releaseNotes) + const displayNotes = releaseNotes || bundledChangelog.trim() || 'No changelog is available yet.' + const notesSource = releaseNotes ? 'Release notes' : 'Bundled changelog' + const releasePageUrl = updateSnapshot?.releasePageUrl + const updateStatus = updateSnapshot?.status || 'idle' + const canAct = !busy && updateStatus !== 'checking' && updateStatus !== 'downloading' + + const handleInstall = useCallback(() => { + if (!onInstallUpdateNow) return + if (!confirm('Install update and restart AGBench now?')) return + void onInstallUpdateNow() + }, [onInstallUpdateNow]) + + const handleOpenRelease = useCallback(() => { + if (!releasePageUrl || typeof window.api.openExternalOrPath !== 'function') return + void window.api.openExternalOrPath(releasePageUrl) + }, [releasePageUrl]) + + if (!open) return null + + return ( +
{ + if (event.target === event.currentTarget) onDismiss() + }} + > +
+
+
+ +
+

{entry.releaseName || `AGBench ${entry.version}`}

+

+ v{entry.version} + {entry.releaseDate ? ` - ${formatDate(entry.releaseDate)}` : ''} - {notesSource} +

+
+
+ +
+ + {updateStatus === 'downloading' && updateSnapshot?.downloadProgress && ( +
+
+ {updateSnapshot.downloadProgress.percent.toFixed(1)}% +
+ )} + + {updateStatus === 'error' && updateSnapshot?.errorMessage && ( +
+ {updateSnapshot.errorMessage} +
+ )} + +
+
{displayNotes}
+
+ +
+ {releasePageUrl && ( + + )} + {updateStatus === 'available' && onDownloadUpdate && ( + + )} + {updateStatus === 'downloaded' && onInstallUpdateNow && ( + + )} + {(updateStatus === 'error' || + updateStatus === 'idle' || + updateStatus === 'not-available' || + updateStatus === 'disabled') && + onCheckForUpdates && ( + + )} + +
+
+
+ ) +} + +export function resolveChangelogEntry( + changelogSnapshot: ProductChangelogSnapshot | null, + updateSnapshot: UpdateStateSnapshot | null +): ProductUpdateChangelog { + if (updateSnapshot?.latestVersion) { + return { + version: updateSnapshot.latestVersion, + ...(updateSnapshot.releaseName ? { releaseName: updateSnapshot.releaseName } : {}), + ...(updateSnapshot.releaseDate ? { releaseDate: updateSnapshot.releaseDate } : {}), + ...(updateSnapshot.releaseNotes ? { releaseNotes: updateSnapshot.releaseNotes } : {}) + } + } + if (changelogSnapshot?.pendingUpdateChangelog) return changelogSnapshot.pendingUpdateChangelog + return { + version: changelogSnapshot?.currentVersion || 'unknown' + } +} + +export function formatReleaseNotes(notes: ProductUpdateReleaseNotes | undefined): string { + if (typeof notes === 'string') return notes.trim() + if (!Array.isArray(notes)) return '' + return notes + .map((note) => { + const body = note.note?.trim() + return body ? `## ${note.version}\n${body}` : '' + }) + .filter(Boolean) + .join('\n\n') +} + +function formatDate(value: string): string { + const time = Date.parse(value) + if (!Number.isFinite(time)) return value + return new Date(time).toLocaleDateString(undefined, { + year: 'numeric', + month: 'short', + day: '2-digit' + }) +} diff --git a/src/renderer/src/components/UpdatePill.tsx b/src/renderer/src/components/UpdatePill.tsx new file mode 100644 index 00000000..10ec2a2b --- /dev/null +++ b/src/renderer/src/components/UpdatePill.tsx @@ -0,0 +1,65 @@ +import React from 'react' +import type { UpdateStateSnapshot } from '../../../main/UpdateService' + +interface UpdatePillProps { + snapshot: UpdateStateSnapshot | null + onOpen: () => void +} + +const ACTIONABLE_UPDATE_STATUSES = new Set([ + 'available', + 'downloading', + 'downloaded', + 'error' +]) + +export function UpdatePill({ snapshot, onOpen }: UpdatePillProps): React.JSX.Element | null { + if (!snapshot || !ACTIONABLE_UPDATE_STATUSES.has(snapshot.status)) return null + + const label = labelForSnapshot(snapshot) + return ( + + ) +} + +function labelForSnapshot(snapshot: UpdateStateSnapshot): string { + switch (snapshot.status) { + case 'available': + return snapshot.latestVersion ? `Update ${snapshot.latestVersion}` : 'Update' + case 'downloading': + return typeof snapshot.downloadProgress?.percent === 'number' + ? `${Math.round(snapshot.downloadProgress.percent)}%` + : 'Downloading' + case 'downloaded': + return 'Restart' + case 'error': + return 'Update issue' + default: + return 'Update' + } +} + +function titleForSnapshot(snapshot: UpdateStateSnapshot): string { + switch (snapshot.status) { + case 'available': + return snapshot.latestVersion + ? `AGBench ${snapshot.latestVersion} is available` + : 'An AGBench update is available' + case 'downloading': + return 'AGBench update is downloading' + case 'downloaded': + return 'Restart AGBench to install the downloaded update' + case 'error': + return snapshot.errorMessage || 'AGBench update check failed' + default: + return 'AGBench update' + } +} diff --git a/src/renderer/src/components/UpdateStatusPane.tsx b/src/renderer/src/components/UpdateStatusPane.tsx index a8a20d74..61b41195 100644 --- a/src/renderer/src/components/UpdateStatusPane.tsx +++ b/src/renderer/src/components/UpdateStatusPane.tsx @@ -1,4 +1,6 @@ -import React, { useCallback, useEffect, useState } from 'react' +import React, { useCallback } from 'react' +import type { UpdateStateSnapshot } from '../../../main/UpdateService' +import { useUpdateStatus } from '../hooks/useUpdateStatus' /** * UpdateStatusPane — Phase G2 sub-section of Settings → System. @@ -15,79 +17,14 @@ import React, { useCallback, useEffect, useState } from 'react' * reshape the Settings layout later. */ -interface UpdateSnapshot { - status: - | 'disabled' - | 'idle' - | 'checking' - | 'available' - | 'not-available' - | 'downloading' - | 'downloaded' - | 'error' - enabled: boolean - channel: 'debug' | 'stable' | 'nightly' - latestVersion?: string - downloadProgress?: { - bytesPerSecond: number - percent: number - transferred: number - total: number - } - errorMessage?: string - lastCheckedAt?: string -} - export function UpdateStatusPane(): React.JSX.Element { - const [snap, setSnap] = useState(null) - const [busy, setBusy] = useState(false) - - const refresh = useCallback(async () => { - try { - const next = (await window.api.updateSnapshot()) as UpdateSnapshot - setSnap(next) - } catch { - // Ignore — IPC handler may not be registered yet during a hot - // reload; the next event will repopulate. - } - }, []) - - useEffect(() => { - void Promise.resolve().then(() => refresh()) - if (typeof window.api.onUpdateStatusChanged === 'function') { - window.api.onUpdateStatusChanged((next) => setSnap(next as UpdateSnapshot)) - } - }, [refresh]) - - const handleCheck = useCallback(async () => { - setBusy(true) - try { - const next = (await window.api.checkForUpdates()) as UpdateSnapshot - setSnap(next) - } finally { - setBusy(false) - } - }, []) - - const handleDownload = useCallback(async () => { - setBusy(true) - try { - const next = (await window.api.downloadUpdate()) as UpdateSnapshot - setSnap(next) - } finally { - setBusy(false) - } - }, []) + const { snapshot: snap, busy, checkForUpdates, downloadUpdate, installUpdateNow } = + useUpdateStatus() const handleInstall = useCallback(async () => { if (!confirm('Install update and restart AGBench now?')) return - setBusy(true) - try { - await window.api.installUpdateNow() - } finally { - setBusy(false) - } - }, []) + await installUpdateNow() + }, [installUpdateNow]) if (!snap) { return ( @@ -136,7 +73,7 @@ export function UpdateStatusPane(): React.JSX.Element { disabled={ busy || !snap.enabled || snap.status === 'checking' || snap.status === 'downloading' } - onClick={() => void handleCheck()} + onClick={() => void checkForUpdates()} > {snap.status === 'checking' ? 'Checking…' : 'Check for updates'} @@ -145,7 +82,7 @@ export function UpdateStatusPane(): React.JSX.Element { type="button" className="btn btn-sm btn-primary" disabled={busy} - onClick={() => void handleDownload()} + onClick={() => void downloadUpdate()} > Download @@ -174,7 +111,7 @@ export function UpdateStatusPane(): React.JSX.Element { ) } -function labelForStatus(status: UpdateSnapshot['status']): string { +function labelForStatus(status: UpdateStateSnapshot['status']): string { switch (status) { case 'disabled': return 'Disabled' diff --git a/src/renderer/src/hooks/useUpdateStatus.ts b/src/renderer/src/hooks/useUpdateStatus.ts new file mode 100644 index 00000000..1219bb38 --- /dev/null +++ b/src/renderer/src/hooks/useUpdateStatus.ts @@ -0,0 +1,68 @@ +import { useCallback, useEffect, useState } from 'react' +import type { UpdateStateSnapshot } from '../../../main/UpdateService' + +export function useUpdateStatus(): { + snapshot: UpdateStateSnapshot | null + busy: boolean + refresh: () => Promise + checkForUpdates: () => Promise + downloadUpdate: () => Promise + installUpdateNow: () => Promise +} { + const [snapshot, setSnapshot] = useState(null) + const [busy, setBusy] = useState(false) + + const refresh = useCallback(async (): Promise => { + try { + const next = await window.api.updateSnapshot() + setSnapshot(next) + return next + } catch { + return null + } + }, []) + + useEffect(() => { + void refresh() + if (typeof window.api.onUpdateStatusChanged !== 'function') return + return window.api.onUpdateStatusChanged((next) => setSnapshot(next)) + }, [refresh]) + + const runUpdateAction = useCallback( + async ( + action: () => Promise + ): Promise => { + setBusy(true) + try { + const next = await action() + setSnapshot(next) + return next + } finally { + setBusy(false) + } + }, + [] + ) + + const checkForUpdates = useCallback( + () => runUpdateAction(() => window.api.checkForUpdates()), + [runUpdateAction] + ) + const downloadUpdate = useCallback( + () => runUpdateAction(() => window.api.downloadUpdate()), + [runUpdateAction] + ) + const installUpdateNow = useCallback( + () => runUpdateAction(() => window.api.installUpdateNow()), + [runUpdateAction] + ) + + return { + snapshot, + busy, + refresh, + checkForUpdates, + downloadUpdate, + installUpdateNow + } +} From d3add65595d9ec235d8ee14d946cbc3095534f57 Mon Sep 17 00:00:00 2001 From: Chris Izatt Date: Thu, 4 Jun 2026 13:35:00 +0100 Subject: [PATCH 17/33] feat(composer): seat the input in an inner module within a contrast-glass frame (rim slice 2) Track F slice 2. Wraps the native composer's textarea + control rows in a new .composer-inner-module (theme-tone, so the input stays perfectly readable), and turns the outer .composer-surface into a CONTRAST GLASS - light/white-tinted on dark themes, dark/black-tinted on the 5 light themes - visible only as the frame around the module. The slice-1 provider rim carries onto both. Native shell only; the inner wrapper is display:contents on every other shell so codex/claude/grok/kimi/cursor + obsidian/alabaster are untouched. Reduce-motion honoured. typecheck + build green; the lone red test (IpcValidation) is a pre-existing gap in Codex's 6def607, unrelated to this diff. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/renderer/src/App.tsx | 3545 +++++++++-------- .../css/03-composer-welcome-activity.css | 220 + .../src/assets/css/07-composer-shells.css | 75 + 3 files changed, 2090 insertions(+), 1750 deletions(-) diff --git a/src/renderer/src/App.tsx b/src/renderer/src/App.tsx index 39366af8..3075a129 100644 --- a/src/renderer/src/App.tsx +++ b/src/renderer/src/App.tsx @@ -5316,8 +5316,8 @@ function App(): React.JSX.Element { const [showChangelogSheet, setShowChangelogSheet] = useState(false) const [changelogSnapshot, setChangelogSnapshot] = useState(null) const autoChangelogOpenedRef = useRef(false) - const refreshChangelogSnapshot = useCallback( - async (): Promise => { + const refreshChangelogSnapshot = + useCallback(async (): Promise => { try { const next = await window.api.changelogSnapshot() setChangelogSnapshot(next) @@ -5325,9 +5325,7 @@ function App(): React.JSX.Element { } catch { return null } - }, - [] - ) + }, []) const handleOpenChangelogSheet = useCallback(() => { setShowChangelogSheet(true) void refreshChangelogSnapshot() @@ -8225,12 +8223,12 @@ function App(): React.JSX.Element { } } - /** - * Keep a ref to the *latest* `refreshUsageSummary` closure so the - * autonomous polling effect (below) doesn't need to depend on `codexStatus` - * and tear the timer down on every status mutation. - */ - + /** + * Keep a ref to the *latest* `refreshUsageSummary` closure so the + * autonomous polling effect (below) doesn't need to depend on `codexStatus` + * and tear the timer down on every status mutation. + */ + refreshUsageSummaryRef.current = refreshUsageSummary const handleSelectWorkspace = async () => { @@ -17483,1621 +17481,1642 @@ function App(): React.JSX.Element {
)} - {(() => { - // Gate the overlay activation: render the highlight - // layer only when the prompt contains at least one - // RESOLVED `@Token`. Without this, the textarea's - // `color: transparent` zeros out the text in shells - // where the overlay's font/padding drifts from the - // textarea (Claude / Codex / Kimi etc. each override - // base padding). the maintainer hit this on the ensemble - // welcome screen — text invisible in Claude shell, - // vertical sync issues in others. - // 1.0.4 — drop the `isCurrentEnsembleChat` precondition. - // `hasResolvedMention` already self-guards on - // `participants.length === 0`, so non-ensemble chats - // are excluded naturally. The extra gate caused a - // regression on the ensemble welcome screen where - // `chatKind === 'ensemble'` evaluated false during - // some welcome-surface render passes — leaving typed - // tags as plain white text instead of bold + - // provider-tinted (the maintainer's "tags not lighting up" - // report). Now: anywhere participants ARE configured - // and a mention resolves, the overlay activates. - const composerHasMention = hasResolvedMention( - prompt, - currentChat?.ensemble?.participants || [] - ) - // 1.0.4 — sync epoch for the overlay's auto-metric - // mirror. Any change in the inputs below can shift - // the textarea's computed font / padding / border, - // so we encode them into a single string the - // overlay watches as a useLayoutEffect dep. The - // ResizeObserver inside the overlay handles every - // size-changing variation that happens between - // these explicit triggers. - const composerOverlaySyncEpoch = `${appearance.composerStyle}|${appearance.themeAppearance}|${isWelcomeChat ? 'welcome' : 'active'}` - return ( -
- {composerHasMention && ( - + {(() => { + // Gate the overlay activation: render the highlight + // layer only when the prompt contains at least one + // RESOLVED `@Token`. Without this, the textarea's + // `color: transparent` zeros out the text in shells + // where the overlay's font/padding drifts from the + // textarea (Claude / Codex / Kimi etc. each override + // base padding). the maintainer hit this on the ensemble + // welcome screen — text invisible in Claude shell, + // vertical sync issues in others. + // 1.0.4 — drop the `isCurrentEnsembleChat` precondition. + // `hasResolvedMention` already self-guards on + // `participants.length === 0`, so non-ensemble chats + // are excluded naturally. The extra gate caused a + // regression on the ensemble welcome screen where + // `chatKind === 'ensemble'` evaluated false during + // some welcome-surface render passes — leaving typed + // tags as plain white text instead of bold + + // provider-tinted (the maintainer's "tags not lighting up" + // report). Now: anywhere participants ARE configured + // and a mention resolves, the overlay activates. + const composerHasMention = hasResolvedMention( + prompt, + currentChat?.ensemble?.participants || [] + ) + // 1.0.4 — sync epoch for the overlay's auto-metric + // mirror. Any change in the inputs below can shift + // the textarea's computed font / padding / border, + // so we encode them into a single string the + // overlay watches as a useLayoutEffect dep. The + // ResizeObserver inside the overlay handles every + // size-changing variation that happens between + // these explicit triggers. + const composerOverlaySyncEpoch = `${appearance.composerStyle}|${appearance.themeAppearance}|${isWelcomeChat ? 'welcome' : 'active'}` + return ( +
+ {composerHasMention && ( + + )} +