diff --git a/CHANGELOG.md b/CHANGELOG.md index ec1880c4..87583a93 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,38 @@ Notable changes to AGBench, the local-first macOS desktop workbench for running and reviewing AI coding agents. Entries are user-facing highlights; execution, history, and workspace state stay on your machine throughout. +## 1.0.73 — 2026-06-04 + +### Added +- **Commit & open PRs from the composer** — the Review changes menu now drives a + real Git flow: see your branch and changed files, write a message and Stage all + & Commit, then Create PR once the branch is pushed and ready (gated on a live + readiness check). +- **Clearer Ensemble cost & escalation** — each round shows real vs. estimated + spend (a latency line + an "API-equivalent" badge on estimates), and the + orchestrator's escalation signals surface inline, so a multi-seat panel's value + is visible rather than guessed. +- **Optional "why?" on approvals** — attach a short intent note when you allow or + deny an agent action; it's recorded in the approval ledger. + +### Changed +- **Refined native composer** — the AGBench shell is now a cohesive console: the + input sits in a framed module (solid black/white outer frame, theme-tone inner + panel + provider rim, full-bleed and squared), the Ensemble / Create-PR / Steer + rows match the same solid frame, and the permission picker sits up front beside + the + button. Onboarding and Settings → Appearance previews reflect the new look. +- **Deleting a chat tidies up after itself** — removing a chat now also clears + that chat's own run-forensic artifacts (and only those). + +### Fixed +- **Kimi tool calls** — repeated calls coalesce into a single inline card that + updates in place (instead of stacking) and now show the target filename, + matching the other providers. +- **Bug Report Refinement** — tidied how the in-app reporter shows your workspace + (now a friendly `~/…` label). +- Onboarding, empty states, the welcome dashboard, provider accent colours, and + the Diff Studio / File Editor light themes all got polish + readability fixes. + ## 1.0.72 — 2026-06-04 ### Security diff --git a/package-lock.json b/package-lock.json index a1d0e806..13b26334 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "agbench", - "version": "1.0.7", + "version": "1.0.73", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "agbench", - "version": "1.0.7", + "version": "1.0.73", "hasInstallScript": true, "license": "Apache-2.0", "dependencies": { diff --git a/package.json b/package.json index feb7498a..4fc53511 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "agbench", - "version": "1.0.72", + "version": "1.0.73", "description": "AGBench desktop workspace", "main": "./out/main/index.js", "author": "Chris Izatt", diff --git a/src/main/AppStoreDeleteChat.test.ts b/src/main/AppStoreDeleteChat.test.ts new file mode 100644 index 00000000..8fa3b70b --- /dev/null +++ b/src/main/AppStoreDeleteChat.test.ts @@ -0,0 +1,104 @@ +import { beforeEach, describe, expect, it, vi } from 'vitest' +import fs from 'fs' +import { join } from 'path' +import { AppStore } from './store' +import type { ChatRecord, ChatRun } from './store/types' + +const userDataPath = vi.hoisted(() => `/tmp/agentbench-delete-chat-test-${process.pid}`) + +vi.mock('electron', () => ({ + app: { + getPath: () => userDataPath + } +})) + +const runEventPath = (runId: string): string => join(userDataPath, 'run-events', `${runId}.jsonl`) +const artifactDir = (runId: string): string => join(userDataPath, 'run-artifacts', runId) + +function makeRun(runId: string): ChatRun { + return { runId, startedAt: '2026-05-08T00:00:00.000Z' } +} + +function seedRunFiles(runId: string): void { + fs.mkdirSync(join(userDataPath, 'run-events'), { recursive: true }) + fs.writeFileSync(runEventPath(runId), `{"runId":"${runId}"}\n`, 'utf8') + fs.mkdirSync(artifactDir(runId), { recursive: true }) + fs.writeFileSync(join(artifactDir(runId), 'stdout.log'), 'stream\n', 'utf8') +} + +function saveChatWithRuns(appChatId: string, runs: ChatRun[]): ChatRecord { + const chat: ChatRecord = { + appChatId, + scope: 'workspace', + chatKind: 'single', + provider: 'gemini', + title: appChatId, + workspaceId: 'workspace-1', + workspacePath: '/repo', + createdAt: 1, + updatedAt: 1, + archived: false, + messages: [], + runs + } + AppStore.saveChat(chat) + return chat +} + +describe('AppStore.deleteChat run cleanup', () => { + beforeEach(() => { + fs.rmSync(userDataPath, { recursive: true, force: true }) + fs.mkdirSync(join(userDataPath, 'chats'), { recursive: true }) + }) + + it('removes the deleted chat run-event files and artifacts', () => { + saveChatWithRuns('chat-a', [makeRun('run-1'), makeRun('run-2')]) + seedRunFiles('run-1') + seedRunFiles('run-2') + + expect(fs.existsSync(runEventPath('run-1'))).toBe(true) + expect(fs.existsSync(artifactDir('run-1'))).toBe(true) + + AppStore.deleteChat('chat-a') + + // Chat JSON gone (behaviour preserved). + expect(fs.existsSync(join(userDataPath, 'chats', 'chat-a.json'))).toBe(false) + // Both runs' forensic files removed. + expect(fs.existsSync(runEventPath('run-1'))).toBe(false) + expect(fs.existsSync(artifactDir('run-1'))).toBe(false) + expect(fs.existsSync(runEventPath('run-2'))).toBe(false) + expect(fs.existsSync(artifactDir('run-2'))).toBe(false) + }) + + it('leaves a sibling chat with a prefix-similar run id untouched', () => { + // chat-a owns `run-1`; sibling chat-b owns `run-1-extra` whose id has + // `run-1` as a string prefix. A prefix/readdir-based delete would wrongly + // catch the sibling's files; an exact-name delete must not. + saveChatWithRuns('chat-a', [makeRun('run-1')]) + saveChatWithRuns('chat-b', [makeRun('run-1-extra')]) + seedRunFiles('run-1') + seedRunFiles('run-1-extra') + + AppStore.deleteChat('chat-a') + + // Deleted chat's run is gone... + expect(fs.existsSync(runEventPath('run-1'))).toBe(false) + expect(fs.existsSync(artifactDir('run-1'))).toBe(false) + // ...but the sibling's prefix-similar run is fully intact. + expect(fs.existsSync(runEventPath('run-1-extra'))).toBe(true) + expect(fs.existsSync(artifactDir('run-1-extra'))).toBe(true) + expect(fs.existsSync(join(userDataPath, 'chats', 'chat-b.json'))).toBe(true) + }) + + it('succeeds when a run-event file is already missing', () => { + // run-1 has files, run-2 was never persisted (missing on disk). + saveChatWithRuns('chat-a', [makeRun('run-1'), makeRun('run-2')]) + seedRunFiles('run-1') + expect(fs.existsSync(runEventPath('run-2'))).toBe(false) + + expect(() => AppStore.deleteChat('chat-a')).not.toThrow() + + expect(fs.existsSync(runEventPath('run-1'))).toBe(false) + expect(fs.existsSync(join(userDataPath, 'chats', 'chat-a.json'))).toBe(false) + }) +}) diff --git a/src/main/AppStoreSettings.test.ts b/src/main/AppStoreSettings.test.ts new file mode 100644 index 00000000..81a583c1 --- /dev/null +++ b/src/main/AppStoreSettings.test.ts @@ -0,0 +1,44 @@ +import { beforeEach, describe, expect, it, vi } from 'vitest' +import fs from 'fs' +import { AppStore } from './store' + +const userDataPath = vi.hoisted(() => `/tmp/agbench-settings-test-${process.pid}`) + +vi.mock('electron', () => ({ + app: { + getPath: () => userDataPath + } +})) + +describe('AppStore settings defaults', () => { + beforeEach(() => { + fs.rmSync(userDataPath, { recursive: true, force: true }) + fs.mkdirSync(userDataPath, { recursive: true }) + }) + + it('defaults packaged update checks to the stable channel', () => { + expect(AppStore.getSettings().updateChannel).toBe('stable') + }) + + it('normalizes persisted changelog metadata on load', () => { + AppStore.updateSettings({ + lastSeenChangelogVersion: ' 1.0.72 ', + pendingUpdateChangelog: { + version: ' 1.0.73 ', + releaseName: ' AGBench 1.0.73 ', + releaseDate: ' 2026-06-04T12:00:00.000Z ', + releaseNotes: [{ version: ' 1.0.73 ', note: 'Updater UI.' }, { version: '', note: '' }] + } + }) + + expect(AppStore.getSettings()).toMatchObject({ + lastSeenChangelogVersion: '1.0.72', + pendingUpdateChangelog: { + version: '1.0.73', + releaseName: 'AGBench 1.0.73', + releaseDate: '2026-06-04T12:00:00.000Z', + releaseNotes: [{ version: '1.0.73', note: 'Updater UI.' }] + } + }) + }) +}) diff --git a/src/main/EnsemblePrompt.test.ts b/src/main/EnsemblePrompt.test.ts index 23be3111..29907715 100644 --- a/src/main/EnsemblePrompt.test.ts +++ b/src/main/EnsemblePrompt.test.ts @@ -1,6 +1,7 @@ import { describe, expect, it } from 'vitest' import { buildEnsembleParticipantPrompt, + buildParticipantTokenMap, formatRoundModeInstructions, formatSameProviderDisambiguationNote, formatToolTraceSummary, @@ -381,8 +382,10 @@ describe('Ensemble prompt composition', () => { currentPrompt: 'Walk through this codebase.', roundId: 'round-1' }) - // Roster marker present for the first speaker - expect(prompt).toContain('Claude / Reviewer (you — first speaker)') + // Roster marker present for the first speaker. 1.0.7 — the + // rename-stable handle `#p1` now sits between the role and the + // position marker (Claude's id sorts first → `p1`). + expect(prompt).toContain('Claude / Reviewer #p1 (you — first speaker)') // Scoping rule present in the Rules section expect(prompt).toContain('SPEAKING FIRST in a multi-participant round') expect(prompt).toContain('Scope the problem and propose a direction') @@ -401,7 +404,8 @@ describe('Ensemble prompt composition', () => { // 1.0.4-AJ — middle slot now carries an explicit position // count ("you — position 2 of 3") to give the model a turn- // awareness signal. The first-speaker rule itself stays off. - expect(prompt).toContain('Codex / Worker (you — position 2 of 3)') + // 1.0.7 — `#p2` (Codex's id sorts second) sits before the marker. + expect(prompt).toContain('Codex / Worker #p2 (you — position 2 of 3)') expect(prompt).not.toContain('first speaker') expect(prompt).not.toContain('SPEAKING FIRST') }) @@ -421,8 +425,9 @@ describe('Ensemble prompt composition', () => { roundId: 'round-1' }) // Even for the single participant, no "first speaker" framing - // since there's no second/third speaker to defer to. - expect(prompt).toContain('Claude / Reviewer (you)') + // since there's no second/third speaker to defer to. 1.0.7 — a + // solo roster still gets a handle (`#p1`) for tag consistency. + expect(prompt).toContain('Claude / Reviewer #p1 (you)') expect(prompt).not.toContain('first speaker') expect(prompt).not.toContain('SPEAKING FIRST') }) @@ -443,7 +448,7 @@ describe('Ensemble prompt composition', () => { currentPrompt: 'Close out the round.', roundId: 'round-1' }) - expect(prompt).toContain('Gemini / Researcher (you — last speaker, position 3 of 3)') + expect(prompt).toContain('Gemini / Researcher #p3 (you — last speaker, position 3 of 3)') expect(prompt).toContain('SPEAKING LAST in this turn-bound round') expect(prompt).toContain('position 3 of 3') expect(prompt).toContain('`ensemble_yield(target: ...)` cannot route') @@ -462,7 +467,8 @@ describe('Ensemble prompt composition', () => { expect(prompt).not.toContain('SPEAKING LAST') expect(prompt).not.toContain('last speaker') // Middle slot in 3+ round gets the bare position marker - expect(prompt).toContain('Codex / Worker (you — position 2 of 3)') + // (1.0.7 — with the `#p2` handle ahead of it). + expect(prompt).toContain('Codex / Worker #p2 (you — position 2 of 3)') }) it('does NOT emit the last-speaker rule in continuous orchestration mode', () => { @@ -486,8 +492,8 @@ describe('Ensemble prompt composition', () => { // Continuous-mode speaker at the bottom of the roster still // gets a position marker for context (the round can extend // via the hop budget; "position 3 of 3" reflects roster - // position, not a hard-end). - expect(prompt).toContain('Gemini / Researcher (you — position 3 of 3)') + // position, not a hard-end). 1.0.7 — `#p3` handle ahead of it. + expect(prompt).toContain('Gemini / Researcher #p3 (you — position 3 of 3)') }) it('emits the hops-near-cap rule when continuous round is near its limit', () => { @@ -653,6 +659,186 @@ describe('Ensemble prompt composition', () => { }) }) +/* + * 1.0.7 — rename-stable participant handle (`#pN`) in the agent- + * visible tag. + * + * The handle is derived from a STABLE ordering of the roster by the + * immutable participant id (NOT `order`, NOT `role`), so a seat keeps + * the same `#pN` across role renames + speaking-order reshuffles. It's + * an identity anchor — `#`-prefixed so it can never resolve as an + * `@`-mention — that lets a reader tie a renamed seat's later messages + * back to its earlier frozen-role messages. + */ +describe('buildParticipantTokenMap (1.0.7 stable handle)', () => { + it('assigns p1..pN by stable id sort, independent of speaking order', () => { + // Speaking `order` is deliberately reversed vs id sort to prove + // the token tracks the id, not the order. + const map = buildParticipantTokenMap([ + { ...ensemble.participants[0], id: 'gemini', order: 1 }, + { ...ensemble.participants[1], id: 'codex', order: 2 }, + { ...ensemble.participants[2], id: 'claude', order: 3 } + ]) + // claude < codex < gemini lexicographically. + expect(map.get('claude')).toBe('p1') + expect(map.get('codex')).toBe('p2') + expect(map.get('gemini')).toBe('p3') + }) + + it('is stable across a role rename (token keyed on id, not role)', () => { + const before = buildParticipantTokenMap([ + { ...ensemble.participants[0], id: 'seat-a', role: 'Planner' }, + { ...ensemble.participants[1], id: 'seat-b', role: 'Worker' } + ]) + const after = buildParticipantTokenMap([ + { ...ensemble.participants[0], id: 'seat-a', role: 'Architect' }, // renamed + { ...ensemble.participants[1], id: 'seat-b', role: 'Worker' } + ]) + expect(after.get('seat-a')).toBe(before.get('seat-a')) + expect(after.get('seat-b')).toBe(before.get('seat-b')) + }) + + it('is stable across a speaking-order reshuffle', () => { + const before = buildParticipantTokenMap([ + { ...ensemble.participants[0], id: 'seat-a', order: 1 }, + { ...ensemble.participants[1], id: 'seat-b', order: 2 } + ]) + const after = buildParticipantTokenMap([ + { ...ensemble.participants[1], id: 'seat-b', order: 1 }, // moved up + { ...ensemble.participants[0], id: 'seat-a', order: 2 } + ]) + expect(after.get('seat-a')).toBe(before.get('seat-a')) + expect(after.get('seat-b')).toBe(before.get('seat-b')) + }) + + it('dedupes repeated ids and tolerates an empty roster', () => { + expect(buildParticipantTokenMap(undefined).size).toBe(0) + expect(buildParticipantTokenMap([]).size).toBe(0) + const dup = buildParticipantTokenMap([ + { ...ensemble.participants[0], id: 'x' }, + { ...ensemble.participants[1], id: 'x' } + ]) + expect(dup.size).toBe(1) + expect(dup.get('x')).toBe('p1') + }) +}) + +describe('Ensemble tag carries the #pN handle (1.0.7)', () => { + function chatWithParticipantMessages(): ChatRecord { + const base = chat() + // Stamp ensembleParticipantId so the tag can resolve a seat token. + return { + ...base, + messages: [ + { + id: 'u1', + role: 'user', + content: 'Initial request', + timestamp: '2026-05-24T00:00:00.000Z' + }, + { + id: 'a1', + role: 'assistant', + content: 'Review response', + timestamp: '2026-05-24T00:00:01.000Z', + metadata: { + ensembleProvider: 'claude', + ensembleRole: 'Reviewer', + ensembleParticipantId: 'claude' + } + } + ] + } + } + + it('appends #pN to the transcript tag for a message with a roster-resolved id', () => { + const prompt = buildEnsembleParticipantPrompt({ + chat: chatWithParticipantMessages(), + config: ensemble, + participant: ensemble.participants[1], + currentPrompt: 'Continue.', + roundId: 'round-2', + chatContextTurns: 4 + }) + // claude sorts first → #p1. + expect(prompt).toContain('[Claude / Reviewer #p1]') + }) + + it('keeps the frozen role but the CURRENT seat handle after a rename (continuity)', () => { + // The historical message froze role "Reviewer"; the roster has + // since renamed that same id to "Critic". The transcript tag must + // keep the FROZEN role (Reviewer) yet still carry the stable seat + // handle (#p1) so a reader can tie the two together. + const renamedConfig: EnsembleConfig = { + ...ensemble, + participants: ensemble.participants.map((p) => + p.id === 'claude' ? { ...p, role: 'Critic' } : p + ) + } + const prompt = buildEnsembleParticipantPrompt({ + chat: chatWithParticipantMessages(), + config: renamedConfig, + participant: renamedConfig.participants[1], + currentPrompt: 'Continue.', + roundId: 'round-2', + chatContextTurns: 4 + }) + // Frozen role retained in the transcript line... + expect(prompt).toContain('[Claude / Reviewer #p1]') + // ...and the CURRENT roster line shows the renamed role with the + // SAME handle, anchoring the identity across the rename. + expect(prompt).toContain('Claude / Critic #p1') + }) + + it('omits the handle for a message whose id is no longer in the roster', () => { + const base = chatWithParticipantMessages() + const orphan: ChatRecord = { + ...base, + messages: [ + ...base.messages, + { + id: 'a2', + role: 'assistant', + content: 'Departed participant response', + timestamp: '2026-05-24T00:00:02.000Z', + metadata: { + ensembleProvider: 'kimi', + ensembleRole: 'Coder', + ensembleParticipantId: 'removed-seat' + } + } + ] + } + const prompt = buildEnsembleParticipantPrompt({ + chat: orphan, + config: ensemble, + participant: ensemble.participants[1], + currentPrompt: 'Continue.', + roundId: 'round-2', + chatContextTurns: 4 + }) + // No roster seat for `removed-seat` → bare provider/role tag. + expect(prompt).toContain('[Kimi / Coder]') + expect(prompt).not.toContain('Kimi / Coder #') + }) + + it('leaves messages without an ensembleParticipantId in the bare tag form', () => { + // The base fixture message (a1) here intentionally has NO + // participant id — older transcript rows predate the id stamp. + const base = chat() // unmodified: a1 lacks ensembleParticipantId + const prompt = buildEnsembleParticipantPrompt({ + chat: base, + config: ensemble, + participant: ensemble.participants[1], + currentPrompt: 'Continue.', + roundId: 'round-2', + chatContextTurns: 4 + }) + expect(prompt).toContain('[Claude / Reviewer]') + expect(prompt).not.toContain('[Claude / Reviewer #') + }) +}) + describe('formatSameProviderDisambiguationNote', () => { function participant( overrides: Partial & Pick diff --git a/src/main/EnsemblePrompt.ts b/src/main/EnsemblePrompt.ts index b674209a..0ec451a7 100644 --- a/src/main/EnsemblePrompt.ts +++ b/src/main/EnsemblePrompt.ts @@ -149,7 +149,18 @@ function applyChairSummaryOrder( export function buildEnsembleParticipantPrompt(input: BuildEnsemblePromptInput): string { const orderedParticipants = getOrderedEnsembleParticipants(input.config, input.currentPrompt) - const participantLabel = `${providerLabel(input.participant.provider)} / ${input.participant.role || 'Participant'}` + // 1.0.7 — rename-stable participant handles (`#p3`) keyed on the + // immutable participant id. Built from the FULL roster (not just the + // enabled/ordered subset) so a message authored by a participant who + // was later disabled still resolves to its seat token, and so the + // self-label, roster lines, and tagged transcript all reference the + // SAME handle. See `buildParticipantTokenMap` for the derivation + + // why the `#`-prefixed token is resolver-safe. + const participantTokens = buildParticipantTokenMap(input.config.participants) + const selfToken = participantTokens.get(input.participant.id) + const participantLabel = `${providerLabel(input.participant.provider)} / ${input.participant.role || 'Participant'}${ + selfToken ? ` #${selfToken}` : '' + }` const orchestrationMode = input.config.orchestrationMode === 'continuous' ? 'continuous' : 'turn_bound' const maxContinuationHops = input.config.maxContinuationHops || 6 @@ -256,7 +267,15 @@ export function buildEnsembleParticipantPrompt(input: BuildEnsemblePromptInput): modelHintRaw ? `@${titleCase(modelHintRaw)}` : '' ].filter(Boolean) const hint = hintTokens.length ? ` — address with ${hintTokens.join(' or ')}` : '' - return `${participant.order}. ${providerLabel(participant.provider)} / ${participant.role || 'Participant'}${marker}${hint}` + // 1.0.7 — rename-stable handle (`#p3`). Placed right after the + // role so the roster line mirrors the transcript tag form + // (`Provider / Role #pN`). It's an identity anchor, not an + // addressing form — the `@Role` / `@Model` hints above remain + // the routing handles; `#pN` lets a reader (human or agent) + // tie a renamed seat back to its earlier frozen-role messages. + const rosterToken = participantTokens.get(participant.id) + const tokenSuffix = rosterToken ? ` #${rosterToken}` : '' + return `${participant.order}. ${providerLabel(participant.provider)} / ${participant.role || 'Participant'}${tokenSuffix}${marker}${hint}` }) .join('\n') const disambigNote = formatSameProviderDisambiguationNote(orderedParticipants) @@ -267,7 +286,14 @@ export function buildEnsembleParticipantPrompt(input: BuildEnsemblePromptInput): // skipped. Either both ship together or neither does. const hasWorkspaceStanza = workspaceStanza !== null const sessionEventsStanza = formatSessionEventsStanza(input.config) - const transcript = buildTaggedTranscript(input.chat.messages || [], input.chatContextTurns || 8) + // Threaded into the tagged-transcript builder so every + // `[Provider / Role #pN]` header carries the same handle the + // roster + self-label use. + const transcript = buildTaggedTranscript( + input.chat.messages || [], + input.chatContextTurns || 8, + participantTokens + ) return [ 'AGBench Ensemble Mode', @@ -563,14 +589,71 @@ export function formatToolTraceSummary(activities: readonly ToolActivity[] | und return `(tools: ${segments.join(' · ')}${suffix})` } -function buildTaggedTranscript(messages: ChatMessage[], contextTurns: number): string { +/** + * 1.0.7 — stable per-participant handle for the agent-visible + * transcript tag (`[Codex / Planner #p3]`). + * + * Problem it solves: the tag historically carried only `provider / + * role`. When the user renames a participant's role mid-session + * ("Planner" → "Architect"), agents lost the thread — there was no + * rename-stable identifier they could anchor on to address a peer + * across the change. The role is mutable; the model name can be + * shared by multiple participants; only the participant **id** is + * both stable and unique. But the raw id (`ensemble-participant-7`, + * or a base36-timestamp fallback) is long + opaque — unfit for an + * inline tag every model reads each round. + * + * The token derives a short, readable `pN` handle from a STABLE + * ordering of the roster by participant id (NOT by `order`, which + * the user can reshuffle, and NOT by `role`, which they can + * rename). Sorting by the immutable id means a given participant + * keeps the same `pN` across renames AND reorders — the exact + * mutations this feature targets. (Adding/removing a roster member + * can shift the indices, since they're roster-relative; that's an + * accepted trade-off for a readable sequential form, and matches + * the "unique within the roster" contract rather than a globally + * frozen handle.) + * + * The `#`-prefix is deliberate and load-bearing for resolver + * safety: `EnsembleMentionAlias`'s `MENTION_REGEX` requires a + * LETTER immediately after `@`, so `@#p3` can never match as a + * standalone mention — the token cannot be mistaken for, or + * resolve to, a participant. When an agent copies the full role + * form `@Planner #p3`, the resolver consumes `planner` (the role + * alias) and leaves ` #p3` as trailing prose, exactly as it would + * any other trailing word. The token is purely additive: it adds + * NO alias to the resolver and widens NO contract. + */ +export function buildParticipantTokenMap( + participants: readonly EnsembleParticipant[] | undefined +): Map { + const map = new Map() + if (!participants || participants.length === 0) return map + // Stable ordering keyed on the immutable id so the token survives + // role renames + speaking-order reshuffles. Dedupe ids defensively + // (a malformed roster could repeat one) so each id maps to exactly + // one token. + const ids = Array.from(new Set(participants.map((p) => p.id).filter(Boolean))).sort((a, b) => + a.localeCompare(b) + ) + ids.forEach((id, index) => { + map.set(id, `p${index + 1}`) + }) + return map +} + +function buildTaggedTranscript( + messages: ChatMessage[], + contextTurns: number, + participantTokens?: Map +): string { const relevant = messages .filter((message) => message.role !== 'tool') .slice(-Math.max(1, contextTurns * 2)) const lines: string[] = [] let used = 0 for (const message of relevant) { - const tag = messageTag(message) + const tag = messageTag(message, participantTokens) // M6 (1.0.7) — thinking-ephemerality. Strip any inlined reasoning chain // from a message authored by an ephemeral-reasoning provider before it // enters FUTURE-round context, keyed on the message's own authoring @@ -599,13 +682,28 @@ function buildTaggedTranscript(messages: ChatMessage[], contextTurns: number): s return lines.join('\n\n') } -function messageTag(message: ChatMessage): string { +function messageTag(message: ChatMessage, participantTokens?: Map): string { if (message.role === 'user') return 'User' if (message.role === 'assistant') { const provider = message.metadata?.ensembleProvider as ProviderId | undefined const role = typeof message.metadata?.ensembleRole === 'string' ? message.metadata.ensembleRole : '' - if (provider) return `${providerLabel(provider)}${role ? ` / ${role}` : ''}` + if (provider) { + // 1.0.7 — append the rename-stable participant handle (`#p3`) + // when this message carries an `ensembleParticipantId` that maps + // to a CURRENT roster seat. Messages from a participant since + // removed from the roster (or older messages predating the id + // stamp) carry no token and fall back to the bare provider/role + // form. See `buildParticipantTokenMap` for why the token is + // resolver-safe. + const participantId = + typeof message.metadata?.ensembleParticipantId === 'string' + ? message.metadata.ensembleParticipantId + : '' + const token = participantId ? participantTokens?.get(participantId) : undefined + const tokenSuffix = token ? ` #${token}` : '' + return `${providerLabel(provider)}${role ? ` / ${role}` : ''}${tokenSuffix}` + } return 'Assistant' } if (message.role === 'error') return 'Error' diff --git a/src/main/IpcValidation.ts b/src/main/IpcValidation.ts index 5df370b2..e3f70abe 100644 --- a/src/main/IpcValidation.ts +++ b/src/main/IpcValidation.ts @@ -114,6 +114,13 @@ export const IPC_ARGUMENT_SCHEMAS: Record = { 'download-update': [], 'install-update-on-quit': [], 'install-update-now': [], + // Changelog sheet (update-pill feature): `changelog-snapshot` is a no-arg + // read returning ProductChangelogSnapshot | null; `mark-changelog-seen` + // persists the last-seen version. The handler coerces a missing/empty + // version defensively (returns the snapshot unchanged), so optionalString + // mirrors the store-*-api-key channels rather than nonEmptyString. + 'changelog-snapshot': [], + 'mark-changelog-seen': ['optionalString'], // Agent-question modal replies (the payload object carries questionId). 'answer-agent-question': ['optionalObject'], 'cancel-agent-question': ['optionalObject'], @@ -130,6 +137,12 @@ export const IPC_ARGUMENT_SCHEMAS: Record = { 'store-kimi-api-key': ['optionalString'], 'clear-kimi-api-key': [], // GitHub PR creation (optional payload with target path / options). + 'git:snapshot': ['optionalObject'], + 'git:stage': ['optionalObject'], + 'git:commit': ['optionalObject'], + 'git:push': ['optionalObject'], + 'github:pr-status': ['optionalObject'], + 'github:pr-readiness': ['optionalObject'], 'create-github-pr': ['optionalObject'], 'agentic-yolo-get': [], 'agentic-yolo-set': ['boolean'], diff --git a/src/main/ProviderCapabilities.test.ts b/src/main/ProviderCapabilities.test.ts index 5fdea7a6..ca380783 100644 --- a/src/main/ProviderCapabilities.test.ts +++ b/src/main/ProviderCapabilities.test.ts @@ -40,6 +40,11 @@ describe('ProviderCapabilities', () => { expect(contract.tools.fileChanges.tools).toEqual([]) expect(contract.mcp.tools).toEqual([]) expect(contract.warnings.map((warning) => warning.id)).toContain('gemini-bridge-disabled') + // elicit/delegate are unavailable until the bridge is up. + expect(contract.tools.elicit.state).toBe('unavailable') + expect(contract.tools.delegate.state).toBe('unavailable') + expect(contract.tools.elicit.tools).toEqual([]) + expect(contract.tools.delegate.tools).toEqual([]) }) it('advertises Gemini bridge tools with AgentBench approval gates when available', () => { @@ -69,6 +74,14 @@ describe('ProviderCapabilities', () => { ]) expect(contract.mcp.tools).toContain('list_directory') expect(contract.approvals.inAppApprovals).toBe(true) + // ask_user_question is auto-allowed once the bridge is up; delegate + // inherits the subThreadDelegation policy ('ask' -> gated). + expect(contract.tools.elicit.state).toBe('available') + expect(contract.tools.elicit.requiresApproval).toBe(false) + expect(contract.tools.elicit.tools).toEqual(['ask_user_question']) + expect(contract.tools.delegate.state).toBe('gated') + expect(contract.tools.delegate.tools).toEqual(['delegate_to_subthread']) + expect(contract.tools.delegate.policy).toBe('ask') }) it('honors blocked settings in the Codex tooling contract', () => { @@ -87,6 +100,12 @@ describe('ProviderCapabilities', () => { expect(contract.tools.networkAccess.state).toBe('blocked') expect(contract.mcp.tools).toEqual(['read', 'search']) expect(contract.warnings.map((warning) => warning.id)).toContain('codex-shellCommands-blocked') + // Codex routes the AGBench elicitation/delegation tools regardless of the + // codex-native MCP server count; delegate tracks subThreadDelegation ('ask'). + expect(contract.tools.elicit.state).toBe('available') + expect(contract.tools.elicit.enforcedByAgentBench).toBe(true) + expect(contract.tools.delegate.state).toBe('gated') + expect(contract.tools.delegate.enforcedByAgentBench).toBe(true) }) it('keeps a provider runnable when optional metadata has an error', () => { @@ -123,5 +142,88 @@ describe('ProviderCapabilities', () => { expect(kimi.tools.fileChanges.state).toBe('delegated') expect(kimi.approvals.inAppApprovals).toBe(true) expect(kimi.warnings.map((warning) => warning.id)).toContain('kimi-provider-managed-tools') + // Without an available AGBench MCP bridge (no mcpStatus), Claude/Kimi + // elicit/delegate are unavailable rather than delegated, mirroring how + // their bridge-backed tooling falls closed. + expect(claude.tools.elicit.state).toBe('unavailable') + expect(claude.tools.delegate.state).toBe('unavailable') + expect(kimi.tools.elicit.state).toBe('unavailable') + expect(kimi.tools.delegate.state).toBe('unavailable') + }) + + it('marks Claude/Kimi elicit/delegate available once the AGBench MCP bridge is up', () => { + const claude = buildProviderCapabilityContract({ + provider: 'claude', + settings: settings(), + status: { provider: 'claude', available: true, version: '1.0.0' }, + mcpStatus: { + enabled: true, + available: true, + serverName: 'AGBench', + tools: ['ask_user_question'] + } + }) + + expect(claude.tools.elicit.state).toBe('available') + expect(claude.tools.elicit.requiresApproval).toBe(false) + expect(claude.tools.delegate.state).toBe('gated') + expect(claude.tools.delegate.policy).toBe('ask') + }) + + it('treats grok/cursor elicit/delegate as provider-delegated', () => { + const grok = buildProviderCapabilityContract({ + provider: 'grok', + settings: settings(), + status: { provider: 'grok', available: true, version: '1.0.0' } + }) + + expect(grok.tools.elicit.state).toBe('delegated') + expect(grok.tools.elicit.enforcedByAgentBench).toBe(false) + expect(grok.tools.delegate.state).toBe('delegated') + expect(grok.tools.delegate.enforcedByAgentBench).toBe(false) + }) + + it('reflects a denied subThreadDelegation policy as a blocked delegate row', () => { + const codex = buildProviderCapabilityContract({ + provider: 'codex', + settings: settings({ ...defaultServices, subThreadDelegation: 'deny' }), + status: { provider: 'codex', available: true, version: '1.0.0', appServer: 'started' } + }) + + expect(codex.tools.delegate.state).toBe('blocked') + expect(codex.tools.delegate.policy).toBe('deny') + // elicit is unaffected by the delegation policy. + expect(codex.tools.elicit.state).toBe('available') + }) + + it('does not double-count the elicit/delegate rows against the enforcement tally', () => { + // Roster where delegation was already enforced (subThreadDelegation 'allow'). + // The five functional controls drive the enforced count; promoting + // elicit/delegate to rows must NOT change that 5-row tally. + const codex = buildProviderCapabilityContract({ + provider: 'codex', + settings: settings({ ...defaultServices, subThreadDelegation: 'allow' }), + status: { provider: 'codex', available: true, version: '1.0.0', appServer: 'started' } + }) + + const controlIds = [ + 'shellCommands', + 'fileChanges', + 'mcpTools', + 'creativeApps', + 'networkAccess' + ] as const + const controlRows = controlIds.map((id) => codex.tools[id]) + const enforcedControls = controlRows.filter((tool) => tool.enforcedByAgentBench).length + + // Codex: shell+file+creative are AGBench-enforced, mcpTools(provider) and + // networkAccess(allow/none) are not -> 3/5, unchanged by the new rows. + expect(controlRows.length).toBe(5) + expect(enforcedControls).toBe(3) + // delegate is allowed/enforced as a DISPLAY row but lives outside the tally. + expect(codex.tools.delegate.state).toBe('available') + expect(codex.tools.delegate.enforcedByAgentBench).toBe(true) + expect(controlIds).not.toContain('delegate') + expect(controlIds).not.toContain('elicit') }) }) diff --git a/src/main/ProviderCapabilities.ts b/src/main/ProviderCapabilities.ts index 168b89c7..9b0bd119 100644 --- a/src/main/ProviderCapabilities.ts +++ b/src/main/ProviderCapabilities.ts @@ -1,6 +1,5 @@ import type { AgenticNetworkPolicy, - AgenticServiceId, AgenticServicePolicy, AgenticServicesSettings, AppSettings, @@ -24,7 +23,38 @@ const TOOLING_LABELS: Record = { fileChanges: 'File changes', mcpTools: 'MCP and tool calls', creativeApps: 'Creative app tools', - networkAccess: 'Network access' + networkAccess: 'Network access', + elicit: 'Ask the user', + delegate: 'Delegate to sub-thread' +} + +/** The original five "functional control" rows whose AGBench-enforcement is + * tallied across the renderer (ToolingContractCard `enforcedCount`, + * SettingsPanel contract hint) and main (ProviderPreflightService delegated + * chip, DelegationAudit policy label). The `elicit` / `delegate` rows are + * DISPLAY-only additions and are intentionally excluded from these tallies so + * promoting `subThreadDelegation` to a first-class row does not double-count + * against its existing settings gate or inflate the enforced/delegated counts. + * Consumers that tally enforcement MUST iterate this list rather than + * `Object.values(contract.tools)`. */ +export const TOOLING_CONTROL_IDS = [ + 'shellCommands', + 'fileChanges', + 'mcpTools', + 'creativeApps', + 'networkAccess' +] as const satisfies readonly ProviderToolingCapabilityId[] + +export type ToolingControlId = (typeof TOOLING_CONTROL_IDS)[number] + +/** The five functional-control rows of a contract, in canonical order. Tally + * sites (enforced/delegated counts) MUST use this rather than + * `Object.values(contract.tools)` so the DISPLAY-only `elicit` / `delegate` + * rows never shift the enforced/delegated numerator or denominator. */ +export function toolingControlRows( + tools: Record +): ProviderToolingCapability[] { + return TOOLING_CONTROL_IDS.map((id) => tools[id]) } interface BuildProviderCapabilityContractInput { @@ -59,7 +89,7 @@ function serviceRequiresApproval(policy?: AgenticServicePolicy): boolean { } function serviceCapability( - id: Exclude, + id: ProviderToolingCapabilityId, policy: AgenticServicePolicy | undefined, source: ProviderToolingCapability['source'], tools: string[], @@ -80,7 +110,7 @@ function serviceCapability( } function unavailableCapability( - id: Exclude, + id: ProviderToolingCapabilityId, source: ProviderToolingCapability['source'], details: string ): ProviderToolingCapability { @@ -98,7 +128,7 @@ function unavailableCapability( } function delegatedCapability( - id: Exclude, + id: ProviderToolingCapabilityId, policy: AgenticServicePolicy | undefined, tools: string[], details: string @@ -117,6 +147,82 @@ function delegatedCapability( } } +/** `ask_user_question` (the `ui_elicitation` tool class) lets a participant + * ask the user a clarifying question mid-run. It is a universally + * auto-allowed AGBench MCP tool (see AgentbenchMcpTools `ask_user_question`), + * so it carries no service-policy gate — it is reachable whenever the AGBench + * MCP bridge is advertised to the provider, and provider-managed otherwise. + * DISPLAY-only row: excluded from the enforced/delegated tallies. */ +function elicitCapability( + source: ProviderToolingCapability['source'], + mcpAvailable: boolean, + details: string, + unavailableDetails?: string +): ProviderToolingCapability { + const bridgeBacked = source === 'bridge' || source === 'agentbench' + if (bridgeBacked && !mcpAvailable) { + return unavailableCapability('elicit', source, unavailableDetails || details) + } + return { + id: 'elicit', + label: TOOLING_LABELS.elicit, + state: bridgeBacked ? 'available' : 'delegated', + source, + enforcedByAgentBench: bridgeBacked, + enforcement: bridgeBacked ? source : 'provider', + requiresApproval: false, + tools: ['ask_user_question'], + details + } +} + +/** `delegate_to_subthread` (`subThreadDelegation`) lets a participant spawn a + * cross-provider sub-thread. It IS gated by the existing + * `agenticServices.subThreadDelegation` settings policy (see the + * SettingsPanel tool→service map). Promoting it to a first-class row must NOT + * change that gate or the enforcement tallies, so this is a DISPLAY-only row + * excluded from `TOOLING_CONTROL_IDS`; the gate semantics stay in + * PermissionService / EffectiveRunPermissions exactly as before. Its state is + * derived from the same `subThreadDelegation` policy the gate already reads. */ +function delegateCapability( + source: ProviderToolingCapability['source'], + policy: AgenticServicePolicy | undefined, + mcpAvailable: boolean, + details: string, + unavailableDetails?: string +): ProviderToolingCapability { + const bridgeBacked = source === 'bridge' || source === 'agentbench' + if (bridgeBacked && !mcpAvailable) { + return unavailableCapability('delegate', source, unavailableDetails || details) + } + if (!bridgeBacked) { + return { + id: 'delegate', + label: TOOLING_LABELS.delegate, + state: policy === 'deny' ? 'blocked' : 'delegated', + source, + enforcedByAgentBench: false, + enforcement: policy === 'deny' ? 'best_effort' : 'provider', + policy, + requiresApproval: policy !== 'allow' && policy !== 'deny', + tools: ['delegate_to_subthread'], + details + } + } + return { + id: 'delegate', + label: TOOLING_LABELS.delegate, + state: serviceState(policy), + source, + enforcedByAgentBench: true, + enforcement: source, + policy, + requiresApproval: serviceRequiresApproval(policy), + tools: ['delegate_to_subthread'], + details + } +} + function networkCapability(policy?: AgenticNetworkPolicy): ProviderToolingCapability { return { id: 'networkAccess', @@ -371,6 +477,8 @@ export function buildProviderCapabilityContract({ let shellCommands: ProviderToolingCapability let fileChanges: ProviderToolingCapability let mcpTools: ProviderToolingCapability + let elicit: ProviderToolingCapability + let delegate: ProviderToolingCapability let mcp: ProviderMcpCapability if (provider === 'gemini') { @@ -397,6 +505,17 @@ export function buildProviderCapabilityContract({ ['read_file', 'list_directory'], 'Gemini uses the AGBench MCP bridge for workspace read/list tools.' ) + elicit = elicitCapability( + 'bridge', + true, + 'Gemini can ask the user a clarifying question through the AGBench MCP bridge (auto-allowed).' + ) + delegate = delegateCapability( + 'bridge', + services.subThreadDelegation, + true, + 'Gemini can spawn cross-provider sub-threads through the AGBench MCP bridge, gated by the sub-thread delegation setting.' + ) } else { shellCommands = unavailableCapability( 'shellCommands', @@ -413,6 +532,17 @@ export function buildProviderCapabilityContract({ 'bridge', 'AGBench MCP tools are not advertised to Gemini until the bridge is enabled, installed, and available.' ) + elicit = elicitCapability( + 'bridge', + false, + 'Gemini cannot ask the user through AGBench until the MCP bridge is enabled, installed, and available.' + ) + delegate = delegateCapability( + 'bridge', + services.subThreadDelegation, + false, + 'Gemini cannot delegate to sub-threads through AGBench until the MCP bridge is enabled, installed, and available.' + ) warnings.push( warning( mcp.enabled ? 'gemini-bridge-unavailable' : 'gemini-bridge-disabled', @@ -449,6 +579,17 @@ export function buildProviderCapabilityContract({ 'Codex file approvals and diffs are routed through AGBench.' ) mcpTools = serviceCapability('mcpTools', services.mcpTools, 'provider', mcp.tools, mcp.message) + elicit = elicitCapability( + 'agentbench', + true, + 'Codex can ask the user a clarifying question through the AGBench MCP tool surface (auto-allowed).' + ) + delegate = delegateCapability( + 'agentbench', + services.subThreadDelegation, + true, + 'Codex can spawn cross-provider sub-threads through AGBench, gated by the sub-thread delegation setting.' + ) if (settings.codexSandboxFallback === 'ask_rerun') { warnings.push( warning( @@ -485,6 +626,33 @@ export function buildProviderCapabilityContract({ mcp.tools, mcp.message || `${label} MCP status is unavailable.` ) + if (provider === 'claude' || provider === 'kimi') { + elicit = elicitCapability( + 'bridge', + mcp.available, + `${label} can ask the user a clarifying question through the AGBench MCP bridge (auto-allowed).`, + `AGBench cannot route ${label} user questions until the AGBench MCP bridge is available.` + ) + delegate = delegateCapability( + 'bridge', + services.subThreadDelegation, + mcp.available, + `${label} can spawn cross-provider sub-threads through the AGBench MCP bridge, gated by the sub-thread delegation setting.`, + `AGBench cannot route ${label} sub-thread delegation until the AGBench MCP bridge is available.` + ) + } else { + elicit = elicitCapability( + 'provider', + false, + `${label} user-question handling is delegated to the provider CLI; AGBench does not advertise its elicitation tool here yet.` + ) + delegate = delegateCapability( + 'provider', + services.subThreadDelegation, + false, + `${label} sub-thread delegation is delegated to the provider CLI; AGBench does not advertise its delegation tool here yet.` + ) + } warnings.push( warning( `${provider}-provider-managed-tools`, @@ -532,7 +700,9 @@ export function buildProviderCapabilityContract({ fileChanges, mcpTools, creativeApps, - networkAccess + networkAccess, + elicit, + delegate }, approvals: approvalContract(provider, requestedMode, effectiveMode), mcp, diff --git a/src/main/ProviderPreflightService.test.ts b/src/main/ProviderPreflightService.test.ts index 9503bb6a..50138d2d 100644 --- a/src/main/ProviderPreflightService.test.ts +++ b/src/main/ProviderPreflightService.test.ts @@ -66,6 +66,26 @@ function contract(partial: Partial = {}): ProviderCa enforcement: 'none', requiresApproval: false, tools: [] + }, + elicit: { + id: 'elicit', + label: 'Ask the user', + state: 'available', + source: 'agentbench', + enforcedByAgentBench: true, + enforcement: 'agentbench', + requiresApproval: false, + tools: ['ask_user_question'] + }, + delegate: { + id: 'delegate', + label: 'Delegate to sub-thread', + state: 'gated', + source: 'agentbench', + enforcedByAgentBench: true, + enforcement: 'agentbench', + requiresApproval: true, + tools: ['delegate_to_subthread'] } }, approvals: { diff --git a/src/main/ProviderPreflightService.ts b/src/main/ProviderPreflightService.ts index 3aeff240..323fc3d5 100644 --- a/src/main/ProviderPreflightService.ts +++ b/src/main/ProviderPreflightService.ts @@ -4,6 +4,7 @@ import type { ProviderCapabilityWarning, ProviderId } from './store/types' +import { toolingControlRows } from './ProviderCapabilities' export type ProviderPreflightState = 'ready' | 'repairable' | 'blocked' export type ProviderPreflightRepairAction = @@ -97,16 +98,18 @@ export class ProviderPreflightService { } } - const delegatedTools = Object.values(contract.tools).filter( - (tool) => !tool.enforcedByAgentBench - ) + // Tally over the five functional-control rows only. The DISPLAY-only + // elicit/delegate rows are intentionally excluded so promoting + // subThreadDelegation to a row never inflates this count. + const controlRows = toolingControlRows(contract.tools) + const delegatedTools = controlRows.filter((tool) => !tool.enforcedByAgentBench) if (delegatedTools.length > 0) { chips.unshift( warning( `${input.provider}-delegated-enforcement`, 'info', 'Provider-managed controls', - `${delegatedTools.length}/${Object.values(contract.tools).length} tooling controls are delegated or best-effort for ${label}.` + `${delegatedTools.length}/${controlRows.length} tooling controls are delegated or best-effort for ${label}.` ) ) } diff --git a/src/main/UpdateService.test.ts b/src/main/UpdateService.test.ts index f5bf0aca..03494803 100644 --- a/src/main/UpdateService.test.ts +++ b/src/main/UpdateService.test.ts @@ -23,6 +23,13 @@ vi.mock('electron-updater', () => ({ import { UpdateService } from './UpdateService' +function emitUpdaterEvent(name: string, payload?: unknown): void { + const handler = mockAutoUpdater.on.mock.calls.find((call) => call[0] === name)?.[1] + if (typeof handler === 'function') { + handler(payload) + } +} + describe('UpdateService', () => { beforeEach(() => { mockAutoUpdater.checkForUpdates.mockClear() @@ -170,6 +177,74 @@ describe('UpdateService', () => { expect(new Date(snap.lastCheckedAt!).getTime()).toBeGreaterThan(0) }) + it('captures release metadata when an update is available', () => { + const svc = new UpdateService() + svc.configure({ channel: 'stable', enabled: true }) + emitUpdaterEvent('update-available', { + version: '1.0.73', + files: [], + path: 'AGBench-1.0.73.dmg', + sha512: 'abc', + releaseName: 'AGBench 1.0.73', + releaseDate: '2026-06-04T12:00:00.000Z', + releaseNotes: 'New updater UI.' + }) + + expect(svc.snapshot()).toMatchObject({ + status: 'available', + latestVersion: '1.0.73', + releaseName: 'AGBench 1.0.73', + releaseDate: '2026-06-04T12:00:00.000Z', + releaseNotes: 'New updater UI.', + releasePageUrl: 'https://github.com/boggspa/AGBench/releases/tag/v1.0.73' + }) + }) + + it('preserves full changelog arrays when an update is downloaded', () => { + const svc = new UpdateService() + svc.configure({ channel: 'stable', enabled: true }) + emitUpdaterEvent('update-downloaded', { + version: '1.0.74', + files: [], + path: 'AGBench-1.0.74.dmg', + sha512: 'abc', + releaseDate: '2026-06-04T13:00:00.000Z', + releaseNotes: [ + { version: '1.0.74', note: 'Second update.' }, + { version: '1.0.73', note: null } + ] + }) + + expect(svc.snapshot()).toMatchObject({ + status: 'downloaded', + latestVersion: '1.0.74', + releaseNotes: [ + { version: '1.0.74', note: 'Second update.' }, + { version: '1.0.73', note: null } + ] + }) + }) + + it('clears stale release metadata when no update is available', () => { + const svc = new UpdateService() + svc.configure({ channel: 'stable', enabled: true }) + emitUpdaterEvent('update-available', { + version: '1.0.73', + files: [], + path: 'AGBench-1.0.73.dmg', + sha512: 'abc', + releaseDate: '2026-06-04T12:00:00.000Z', + releaseNotes: 'New updater UI.' + }) + emitUpdaterEvent('update-not-available') + + expect(svc.snapshot()).toMatchObject({ + status: 'not-available', + latestVersion: undefined, + releaseNotes: undefined + }) + }) + it('reconfigure to debug after enabled returns to disabled', () => { const svc = new UpdateService() svc.configure({ channel: 'stable', enabled: true }) diff --git a/src/main/UpdateService.ts b/src/main/UpdateService.ts index 11bf82f7..f7d9abd5 100644 --- a/src/main/UpdateService.ts +++ b/src/main/UpdateService.ts @@ -4,7 +4,11 @@ import { type UpdateInfo, type ProgressInfo } from 'electron-updater' -import type { ProductUpdateChannel } from './store/types' +import type { + ProductUpdateChannel, + ProductUpdateReleaseNoteInfo, + ProductUpdateReleaseNotes +} from './store/types' /** * UpdateService — Phase G2 wrapper around `electron-updater`. @@ -61,6 +65,10 @@ export interface UpdateStateSnapshot { enabled: boolean channel: ProductUpdateChannel latestVersion?: string + releaseName?: string + releaseDate?: string + releaseNotes?: ProductUpdateReleaseNotes + releasePageUrl?: string downloadProgress?: ProgressInfo errorMessage?: string /** When the last manual or automatic check was attempted. ISO. */ @@ -73,6 +81,10 @@ export class UpdateService { private channel: ProductUpdateChannel = 'debug' private status: UpdateStatus = 'disabled' private latestVersion: string | undefined + private releaseName: string | undefined + private releaseDate: string | undefined + private releaseNotes: ProductUpdateReleaseNotes | undefined + private releasePageUrl: string | undefined private downloadProgress: ProgressInfo | undefined private errorMessage: string | undefined private lastCheckedAt: string | undefined @@ -98,6 +110,9 @@ export class UpdateService { this.channel = args.channel if (!args.enabled || args.channel === 'debug') { this.status = 'disabled' + this.downloadProgress = undefined + this.errorMessage = undefined + this.clearReleaseMetadata() this.publish() return } @@ -112,6 +127,9 @@ export class UpdateService { autoUpdater.autoDownload = false autoUpdater.autoInstallOnAppQuit = false this.status = 'idle' + this.downloadProgress = undefined + this.errorMessage = undefined + this.clearReleaseMetadata() this.publish() } @@ -123,6 +141,8 @@ export class UpdateService { this.status = 'checking' this.lastCheckedAt = new Date().toISOString() this.errorMessage = undefined + this.downloadProgress = undefined + this.clearReleaseMetadata() this.publish() try { const result = await autoUpdater.checkForUpdates() @@ -171,6 +191,10 @@ export class UpdateService { enabled: this.status !== 'disabled', channel: this.channel, latestVersion: this.latestVersion, + releaseName: this.releaseName, + releaseDate: this.releaseDate, + releaseNotes: this.releaseNotes, + releasePageUrl: this.releasePageUrl, downloadProgress: this.downloadProgress, errorMessage: this.errorMessage, lastCheckedAt: this.lastCheckedAt @@ -195,11 +219,12 @@ export class UpdateService { }) autoUpdater.on('update-available', (info) => { this.status = 'available' - this.latestVersion = info.version + this.applyUpdateInfo(info) this.publish() }) autoUpdater.on('update-not-available', () => { this.status = 'not-available' + this.clearReleaseMetadata() this.publish() }) autoUpdater.on('error', (err) => { @@ -212,12 +237,30 @@ export class UpdateService { }) autoUpdater.on('update-downloaded', (info) => { this.status = 'downloaded' - this.latestVersion = info.version + this.applyUpdateInfo(info) this.downloadProgress = undefined this.publish() }) } + private applyUpdateInfo(info: UpdateInfo): void { + this.latestVersion = info.version + this.releaseName = info.releaseName || undefined + this.releaseDate = info.releaseDate || undefined + this.releaseNotes = normalizeReleaseNotes(info.releaseNotes) + this.releasePageUrl = info.version + ? `https://github.com/boggspa/AGBench/releases/tag/v${info.version}` + : undefined + } + + private clearReleaseMetadata(): void { + this.latestVersion = undefined + this.releaseName = undefined + this.releaseDate = undefined + this.releaseNotes = undefined + this.releasePageUrl = undefined + } + private handleError(message: string): void { this.status = 'error' this.errorMessage = message @@ -239,3 +282,22 @@ export class UpdateService { } } } + +function normalizeReleaseNotes( + notes: UpdateInfo['releaseNotes'] +): ProductUpdateReleaseNotes | undefined { + if (typeof notes === 'string') return notes + if (!Array.isArray(notes)) return undefined + const normalized = notes + .map((note): ProductUpdateReleaseNoteInfo | null => { + if (!note || typeof note.version !== 'string' || !note.version.trim()) { + return null + } + return { + version: note.version.trim(), + note: typeof note.note === 'string' ? note.note : null + } + }) + .filter((note): note is ProductUpdateReleaseNoteInfo => note !== null) + return normalized.length > 0 ? normalized : undefined +} diff --git a/src/main/index.ts b/src/main/index.ts index ec239c2d..c97d9ab2 100644 --- a/src/main/index.ts +++ b/src/main/index.ts @@ -88,6 +88,7 @@ import { RunCoordinator } from './services/RunCoordinator' import { RunQueueService } from './services/RunQueueService' import { SettingsService } from './services/SettingsService' import { WorkspaceService } from './services/WorkspaceService' +import { GitService } from './services/GitService' import { AppShellStatsService } from './services/AppShellStatsService' import { getWorkspaceActivitySnapshot } from './WorkspaceActivityService' import { getCurrentFxRates, refreshFxRates, startFxRateScheduler } from './services/FxRateService' @@ -153,6 +154,8 @@ import { ProductCrashInput, ProductDiagnosticsExportResult, ProductOperationsStatus, + ProductChangelogSnapshot, + ProductUpdateChangelog, RuntimeProfile, HandoffCard, HandoffCardFilter, @@ -4045,6 +4048,30 @@ function claudeProgrammaticUsageWarning(runtime: 'sdk' | 'cli-print', usesApiKey return `${runtimeLabel} is a programmatic Claude path. Anthropic says programmatic Claude usage uses separate Agent SDK credit from 2026-06-15, not the normal interactive Claude Code subscription limit. Use interactive Claude in a terminal when you need native Claude Code subscription-limit behavior.` } +// Recover a single tool-call identifier from a bridge tool payload, regardless +// of which field the provider populates. A `ToolCall` notification keys it `id`; +// the matching `ToolResult` echoes it under `tool_call_id` (and some shapes use +// `call_id` / `tool_id`). Resolving both branches through the SAME ordered +// lookup guarantees the call and its result share one id, so the renderer +// coalesces them into one inline card instead of stacking each event. Mirrors +// the multi-field lookup Grok's mapper already uses (GrokStreamingJson). Falls +// back to a unique generated id only when no identifier is present at all (which +// keeps two genuinely id-less calls from merging). +function cliProviderToolId(payload: Record, prefix: string): string { + const candidates = [ + payload.tool_call_id, + payload.toolCallId, + payload.id, + payload.tool_id, + payload.toolId, + payload.call_id + ] + for (const candidate of candidates) { + if (typeof candidate === 'string' && candidate.trim()) return candidate.trim() + } + return `${prefix}-${Date.now()}-${Math.random().toString(36).slice(2, 8)}` +} + function emitCliProviderToolEvent(state: CliProviderStreamState, event: unknown): void { if (!isRecord(event)) return const params = nestedRecord(event, 'params') @@ -4094,23 +4121,34 @@ function emitCliProviderToolEvent(state: CliProviderStreamState, event: unknown) if (event.method === 'event' && params.type === 'ToolCall') { const toolFunction = nestedRecord(payload, 'function') + // Kimi tool calls must COALESCE with their matching ToolResult into one + // inline card (the way Codex/Claude/Grok do). The renderer pairs a result + // back to its call by `tool_id`, so the call and the result MUST resolve to + // the SAME stable id. Kimi keys the call identifier as `id` here, but the + // result echoes it under `tool_call_id` (see the ToolResult branch + the + // wire-protocol approval echo in ApprovalService, `request_id: payload.id`). + // Mirror Grok's multi-field id lookup (GrokStreamingJson) so the same value + // is recovered regardless of which field Kimi populates on each side. + const toolCallId = cliProviderToolId(payload, 'tool') + // Moonshot/OpenAI function-calling sends `function.arguments` as a + // JSON-ENCODED STRING, so the old `isRecord(...)` check dropped them to `{}` + // — that's why the card never showed the target filename. normalizeMcpTool- + // Arguments parses the string (or passes an object through) so ToolParser + // can surface `file_path`/`path` in the card label. + const rawToolArgs = toolFunction.arguments ?? payload.arguments sendAgentCompatLine( state.sender, state.provider, { type: 'tool_use', - tool_id: typeof payload.id === 'string' ? payload.id : `tool-${Date.now()}`, + tool_id: toolCallId, tool_name: typeof toolFunction.name === 'string' ? toolFunction.name : typeof payload.name === 'string' ? payload.name : 'tool', - parameters: isRecord(toolFunction.arguments) - ? toolFunction.arguments - : isRecord(payload.arguments) - ? payload.arguments - : {}, + parameters: normalizeMcpToolArguments(rawToolArgs), provider: state.provider }, state @@ -4119,13 +4157,15 @@ function emitCliProviderToolEvent(state: CliProviderStreamState, event: unknown) if (event.method === 'event' && params.type === 'ToolResult') { const returnValue = nestedRecord(payload, 'return_value') + // Same stable-id resolution as the ToolCall branch above so the result + // pairs back to its call card instead of stacking as a fresh orphan. + const toolResultId = cliProviderToolId(payload, 'tool') sendAgentCompatLine( state.sender, state.provider, { type: 'tool_result', - tool_id: - typeof payload.tool_call_id === 'string' ? payload.tool_call_id : `tool-${Date.now()}`, + tool_id: toolResultId, status: returnValue.is_error ? 'error' : 'success', output: contentPartsToText(returnValue.output || returnValue.message || ''), provider: state.provider @@ -10188,7 +10228,7 @@ async function getProductOperationsStatus(): Promise { const recentCrashes = AppStore.getProductCrashes({ limit: 20 }) return buildProductOperationsStatus({ - updateChannel: settings.updateChannel || 'debug', + updateChannel: settings.updateChannel || 'stable', appName: app.getName() || 'AGBench', appVersion: app.getVersion() || 'unknown', isPackaged: app.isPackaged, @@ -13147,6 +13187,31 @@ function resolveNativeVibrancy( return useNativeGlass ? NATIVE_GLASS_VIBRANCY : undefined } +/* + * Per-theme opaque backdrop for popout BrowserWindows. Before React + * mounts (and applies `data-theme`), the OS paints `backgroundColor`. + * A hardcoded `#1e1e1e` flashed a dark slab on the light themes + * (light/citrus/mist/sage/alabaster), which is jarring. We mirror + * each light theme's `--app-bg` so the pre-paint matches the rendered + * surface; every dark theme (and `system`/`dark`, which the renderer + * resolves to the dark `:root`) keeps the original `#1e1e1e`. + * Returns undefined when a glass window is used (caller passes the + * transparent backdrop in that case). + */ +const LIGHT_THEME_POPOUT_BACKDROPS: Record = { + light: '#f4f6f8', + citrus: '#f4f6f8', + mist: '#eef4f6', + sage: '#f0f5f0', + alabaster: '#f4f3ef' +} + +function resolvePopoutBackgroundColor(useGlassWindow: boolean): string { + if (useGlassWindow) return '#00000000' + const theme = AppStore.getSettings().themeAppearance + return LIGHT_THEME_POPOUT_BACKDROPS[theme] ?? '#1e1e1e' +} + function resolveWorkspaceChild(workspace: string, filePath: string): string { const workspaceRoot = resolve(workspace) const targetPath = isAbsolute(filePath) ? resolve(filePath) : resolve(workspaceRoot, filePath) @@ -13671,7 +13736,7 @@ async function openWorkspacePopout(input: unknown): Promise<{ ok: true }> { : undefined, visualEffectState: 'active', transparent: false, - backgroundColor: useGlassWindow ? '#00000000' : '#1e1e1e', + backgroundColor: resolvePopoutBackgroundColor(useGlassWindow), ...(process.platform === 'linux' ? { icon } : {}), webPreferences: { preload: join(__dirname, '../preload/index.js'), @@ -14502,6 +14567,8 @@ if (isGeminiMcpBridgeProcess) { } }) + const gitService = new GitService() + // Phase G2: auto-update wiring. Default-off (env override available). // Only enabled in packaged builds AND when updateChannel != 'debug'. // The `AGBENCH_AUTO_UPDATE` env var forces enable/disable for @@ -14529,9 +14596,35 @@ if (isGeminiMcpBridgeProcess) { channel: initialSettings.updateChannel, enabled: autoUpdateEnabled }) + const updateSnapshotToChangelog = ( + snapshot: UpdateStateSnapshot + ): ProductUpdateChangelog | undefined => { + if (!snapshot.latestVersion) return undefined + return { + version: snapshot.latestVersion, + ...(snapshot.releaseName ? { releaseName: snapshot.releaseName } : {}), + ...(snapshot.releaseDate ? { releaseDate: snapshot.releaseDate } : {}), + ...(snapshot.releaseNotes ? { releaseNotes: snapshot.releaseNotes } : {}) + } + } + const changelogSnapshot = (): ProductChangelogSnapshot => { + const settings = AppStore.getSettings() + return { + currentVersion: app.getVersion() || 'unknown', + lastSeenChangelogVersion: settings.lastSeenChangelogVersion, + pendingUpdateChangelog: settings.pendingUpdateChangelog, + latestUpdateChangelog: updateSnapshotToChangelog(updateService.snapshot()) + } + } // Broadcast snapshot changes to the renderer so the Settings panel // can show live status. updateService.subscribe((snapshot: UpdateStateSnapshot) => { + if (snapshot.status === 'downloaded') { + const pendingUpdateChangelog = updateSnapshotToChangelog(snapshot) + if (pendingUpdateChangelog) { + AppStore.updateSettings({ pendingUpdateChangelog }) + } + } try { mainWindow?.webContents.send('update-status-changed', snapshot) } catch { @@ -14587,6 +14680,18 @@ if (isGeminiMcpBridgeProcess) { updateService.quitAndInstall() return updateService.snapshot() }) + ipcMain.handle('changelog-snapshot', () => changelogSnapshot()) + ipcMain.handle('mark-changelog-seen', (_, version: string) => { + const normalizedVersion = typeof version === 'string' ? version.trim() : '' + if (!normalizedVersion) return changelogSnapshot() + AppStore.updateSettings({ lastSeenChangelogVersion: normalizedVersion }) + return changelogSnapshot() + }) + if (updateService.snapshot().enabled) { + setTimeout(() => { + void updateService.checkForUpdates() + }, 3000) + } ipcMain.handle( 'bridge-finalize-pairing', @@ -15675,110 +15780,112 @@ if (isGeminiMcpBridgeProcess) { } }) + const gitPayloadPath = (payload?: { workspacePath?: string; repoPath?: string }) => + typeof payload?.repoPath === 'string' && payload.repoPath.trim() + ? payload.repoPath + : payload?.workspacePath || '' + + ipcMain.handle( + 'git:snapshot', + async (_event, payload?: { workspacePath?: string; repoPath?: string }) => + gitService.snapshot(gitPayloadPath(payload)) + ) + + ipcMain.handle( + 'git:stage', + async ( + _event, + payload?: { + workspacePath?: string + repoPath?: string + paths?: string[] + all?: boolean + update?: boolean + patch?: string + } + ) => + gitService.stage({ + repoPath: gitPayloadPath(payload), + paths: payload?.paths, + all: payload?.all, + update: payload?.update, + patch: payload?.patch + }) + ) + + ipcMain.handle( + 'git:commit', + async ( + _event, + payload?: { + workspacePath?: string + repoPath?: string + message?: string + } + ) => + gitService.commit({ + repoPath: gitPayloadPath(payload), + message: payload?.message || '' + }) + ) + + ipcMain.handle( + 'git:push', + async ( + _event, + payload?: { + workspacePath?: string + repoPath?: string + setUpstream?: boolean + remote?: string + } + ) => + gitService.push({ + repoPath: gitPayloadPath(payload), + setUpstream: payload?.setUpstream, + remote: payload?.remote + }) + ) + + ipcMain.handle( + 'github:pr-status', + async (_event, payload?: { workspacePath?: string; repoPath?: string }) => + gitService.pullRequestStatus(gitPayloadPath(payload)) + ) + + ipcMain.handle( + 'github:pr-readiness', + async (_event, payload?: { workspacePath?: string; repoPath?: string }) => + gitService.pullRequestReadiness(gitPayloadPath(payload)) + ) + ipcMain.handle( 'create-github-pr', async ( _event, payload?: { workspacePath?: string + repoPath?: string title?: string body?: string draft?: boolean openInBrowser?: boolean } ) => { - const requestedPath = expandHomePath(payload?.workspacePath || '') - if (!requestedPath) { - return { ok: false, error: 'A workspace path is required to open a pull request.' } - } - try { - const stat = await fs.stat(requestedPath) - if (!stat.isDirectory()) { - return { ok: false, error: 'Workspace path is not a directory.' } + const result = await gitService.createPullRequest({ + repoPath: gitPayloadPath(payload), + title: payload?.title, + body: payload?.body, + draft: payload?.draft + }) + if (result.ok) { + const url = result.data.url + if (url && payload?.openInBrowser !== false) { + shell.openExternal(url).catch(() => {}) } - } catch { - return { ok: false, error: 'Workspace path does not exist on disk.' } - } - const args = ['pr', 'create'] - const title = typeof payload?.title === 'string' ? payload.title.trim() : '' - const body = typeof payload?.body === 'string' ? payload.body.trim() : '' - if (title) { - args.push('--title', title) - } - if (body) { - args.push('--body', body) + return { ok: true, ...result.data } } - if (!title && !body) { - args.push('--fill') - } - if (payload?.draft) { - args.push('--draft') - } - return await new Promise<{ ok: boolean; url?: string; error?: string; stderr?: string }>( - (resolve) => { - let stdout = '' - let stderr = '' - let settled = false - let child: ReturnType - try { - child = spawn('gh', args, { - cwd: requestedPath, - env: { ...process.env }, - stdio: ['ignore', 'pipe', 'pipe'] - }) - } catch (error) { - resolve({ - ok: false, - error: `Failed to launch \`gh\`: ${error instanceof Error ? error.message : String(error)}` - }) - return - } - const settle = (result: { - ok: boolean - url?: string - error?: string - stderr?: string - }) => { - if (settled) return - settled = true - resolve(result) - } - child.stdout?.on('data', (chunk: Buffer) => { - stdout += chunk.toString('utf8') - }) - child.stderr?.on('data', (chunk: Buffer) => { - stderr += chunk.toString('utf8') - }) - child.on('error', (error) => { - const message = - (error as NodeJS.ErrnoException)?.code === 'ENOENT' - ? 'GitHub CLI (`gh`) is not installed or not on PATH. Install it from https://cli.github.com.' - : `Failed to launch \`gh\`: ${error.message}` - settle({ ok: false, error: message }) - }) - child.on('close', (code) => { - const trimmedOut = stdout.trim() - const trimmedErr = stderr.trim() - if (code === 0) { - const url = trimmedOut.match(/https?:\/\/[^\s]+/)?.[0] - if (url && payload?.openInBrowser !== false) { - shell.openExternal(url).catch(() => {}) - } - settle({ ok: true, url, stderr: trimmedErr || undefined }) - } else { - settle({ - ok: false, - error: trimmedErr || trimmedOut || `\`gh pr create\` exited with code ${code}.`, - stderr: trimmedErr || undefined - }) - } - }) - setTimeout( - () => settle({ ok: false, error: '`gh pr create` timed out after 30s.' }), - 30_000 - ) - } - ) + return result } ) @@ -16692,7 +16799,17 @@ if (isGeminiMcpBridgeProcess) { ipcMain.handle( 'respond-agent-approval', - async (_, requestId: string, action: AgentApprovalAction) => { + async (_, requestId: string, action: AgentApprovalAction, intentNote?: string) => { + // Order-4 — optional one-line "why" note captured in the + // approval card. Trim + cap defensively (the renderer already + // trims, but the IPC boundary is untrusted) and ride it on the + // existing ledger metadata channel as `intentNote`. Empty stays + // off the metadata entirely so we never persist a blank note. + const trimmedIntentNote = + typeof intentNote === 'string' ? intentNote.trim().slice(0, 280) : '' + const resolveOptions = trimmedIntentNote + ? { extraMetadata: { intentNote: trimmedIntentNote } } + : undefined // Slice 5 v2 of the external-path-redesign arc. When the user // clicks "Grant read access" / "Grant edit access" in an // external-path approval modal, peek at the pending approval's stashed @@ -16744,7 +16861,7 @@ if (isGeminiMcpBridgeProcess) { } } } - return approvalServiceInstance.resolve(requestId, action) + return approvalServiceInstance.resolve(requestId, action, resolveOptions) } ) diff --git a/src/main/services/AuditService.test.ts b/src/main/services/AuditService.test.ts index 1eafc50b..cf064167 100644 --- a/src/main/services/AuditService.test.ts +++ b/src/main/services/AuditService.test.ts @@ -61,6 +61,17 @@ describe('AuditService', () => { ) }) + it('threads an optional intent note through extraMetadata (Order-4)', () => { + const { deps } = makeDeps() + const service = new AuditService(deps) + service.resolveApprovalLedgerResponse('approval-1', 'accept', 'user', { + intentNote: 'reviewed the diff, safe' + }) + expect(deps.resolveApprovalResponse).toHaveBeenCalledWith('approval-1', 'accept', 'user', { + intentNote: 'reviewed the diff, safe' + }) + }) + it('does not throw when ledger response resolution fails', () => { const error = new Error('ledger unavailable') const { deps, errors } = makeDeps({ diff --git a/src/main/services/GitService.test.ts b/src/main/services/GitService.test.ts new file mode 100644 index 00000000..b9e3125c --- /dev/null +++ b/src/main/services/GitService.test.ts @@ -0,0 +1,316 @@ +import { execFileSync, spawnSync } from 'child_process' +import { mkdtempSync, mkdirSync, realpathSync, rmSync, writeFileSync } from 'fs' +import { tmpdir } from 'os' +import { join } from 'path' +import { afterEach, beforeEach, describe, expect, it } from 'vitest' +import { GitService, parseStatusPorcelainZ, type GitCommandRunner } from './GitService' + +function runGit(cwd: string, args: string[]): string { + return execFileSync('git', args, { cwd, encoding: 'utf8', stdio: ['ignore', 'pipe', 'pipe'] }) +} + +function createRepo(): string { + const repo = realpathSync(mkdtempSync(join(tmpdir(), 'agbench-git-service-'))) + runGit(repo, ['init', '-b', 'main']) + runGit(repo, ['config', 'user.name', 'AGBench Test']) + runGit(repo, ['config', 'user.email', 'agbench@example.test']) + writeFileSync(join(repo, 'README.md'), 'initial\n') + runGit(repo, ['add', 'README.md']) + runGit(repo, ['commit', '-m', 'Initial commit']) + return repo +} + +describe('GitService', () => { + let repo: string + let extraTempPaths: string[] + + beforeEach(() => { + repo = createRepo() + extraTempPaths = [] + }) + + afterEach(() => { + rmSync(repo, { recursive: true, force: true }) + for (const tempPath of extraTempPaths) { + rmSync(tempPath, { recursive: true, force: true }) + } + }) + + function addBareRemote(): string { + const remote = realpathSync(mkdtempSync(join(tmpdir(), 'agbench-git-remote-'))) + extraTempPaths.push(remote) + runGit(remote, ['init', '--bare']) + runGit(repo, ['remote', 'add', 'origin', remote]) + runGit(repo, ['push', '-u', 'origin', 'main']) + return remote + } + + it('parses porcelain status records', () => { + expect(parseStatusPorcelainZ(' M README.md\0?? new file.txt\0')).toEqual([ + { + path: 'README.md', + index: ' ', + workingTree: 'M', + kind: 'modified', + staged: false, + unstaged: true + }, + { + path: 'new file.txt', + index: '?', + workingTree: '?', + kind: 'untracked', + staged: false, + unstaged: true + } + ]) + }) + + it('resolves the repository root from a nested directory', async () => { + const nested = join(repo, 'src', 'feature') + mkdirSync(nested, { recursive: true }) + + const result = await new GitService().snapshot(nested) + + expect(result.ok).toBe(true) + if (!result.ok) return + expect(result.data.repoRoot).toBe(repo) + expect(result.data.requestedPath).toBe(nested) + expect(result.data.branch).toBe('main') + expect(result.data.clean).toBe(true) + }) + + it('reports changed and untracked files', async () => { + writeFileSync(join(repo, 'README.md'), 'changed\n') + writeFileSync(join(repo, 'new.txt'), 'new\n') + + const result = await new GitService().snapshot(repo) + + expect(result.ok).toBe(true) + if (!result.ok) return + expect(result.data.counts.changed).toBe(2) + expect(result.data.files).toEqual( + expect.arrayContaining([ + expect.objectContaining({ path: 'README.md', kind: 'modified', unstaged: true }), + expect.objectContaining({ path: 'new.txt', kind: 'untracked', unstaged: true }) + ]) + ) + }) + + it('stages selected paths without staging every file', async () => { + writeFileSync(join(repo, 'one.txt'), 'one\n') + writeFileSync(join(repo, 'two.txt'), 'two\n') + + const result = await new GitService().stage({ repoPath: repo, paths: ['one.txt'] }) + + expect(result.ok).toBe(true) + if (!result.ok) return + expect(result.data.files).toEqual( + expect.arrayContaining([ + expect.objectContaining({ path: 'one.txt', staged: true }), + expect.objectContaining({ path: 'two.txt', kind: 'untracked', staged: false }) + ]) + ) + }) + + it('rejects staging paths that escape the repository', async () => { + writeFileSync(join(repo, 'safe.txt'), 'safe\n') + const service = new GitService() + + const traversal = await service.stage({ repoPath: repo, paths: ['../outside.txt'] }) + const absolute = await service.stage({ repoPath: repo, paths: [join(repo, 'safe.txt')] }) + + expect(traversal).toEqual({ + ok: false, + error: 'Stage paths must stay inside the repository.' + }) + expect(absolute).toEqual({ + ok: false, + error: 'Stage paths must be relative to the repository.' + }) + expect(runGit(repo, ['diff', '--cached', '--name-only']).trim()).toBe('') + }) + + it('commits staged changes and returns a clean snapshot', async () => { + writeFileSync(join(repo, 'committed.txt'), 'committed\n') + const service = new GitService() + const stageResult = await service.stage({ repoPath: repo, all: true }) + expect(stageResult.ok).toBe(true) + + const commitResult = await service.commit({ repoPath: repo, message: 'Add committed file' }) + + expect(commitResult.ok).toBe(true) + if (!commitResult.ok) return + expect(commitResult.data.clean).toBe(true) + expect(runGit(repo, ['log', '-1', '--pretty=%s']).trim()).toBe('Add committed file') + }) + + it('refuses to commit with no staged changes', async () => { + const result = await new GitService().commit({ repoPath: repo, message: 'Nothing staged' }) + + expect(result).toEqual({ ok: false, error: 'No staged changes to commit.' }) + }) + + it('refuses to push a branch with no remote', async () => { + const result = await new GitService().push({ repoPath: repo }) + + expect(result).toEqual({ + ok: false, + error: 'No git remote is configured. Add a remote before pushing.' + }) + }) + + it('refuses to push from a detached HEAD', async () => { + addBareRemote() + runGit(repo, ['checkout', '--detach', 'HEAD']) + + const result = await new GitService().push({ repoPath: repo }) + + expect(result).toEqual({ + ok: false, + error: 'Cannot push from a detached HEAD. Create or switch to a branch first.' + }) + }) + + it('runs gh pr create from the resolved repository root', async () => { + addBareRemote() + const nested = join(repo, 'nested') + mkdirSync(nested, { recursive: true }) + const calls: Array<{ + command: string + args: string[] + cwd: string + env?: Record + }> = [] + const runner: GitCommandRunner = async (command, args, options) => { + calls.push({ command, args, cwd: options.cwd, env: options.env }) + if (command === 'gh') { + if (args.includes('view')) { + return { + stdout: '', + stderr: 'no pull requests found for branch "main"', + code: 1 + } + } + return { + stdout: 'https://github.com/boggspa/AGBench/pull/42\n', + stderr: '', + code: 0 + } + } + const result = spawnSync(command, args, { + cwd: options.cwd, + encoding: 'utf8', + stdio: ['ignore', 'pipe', 'pipe'] + }) + return { + stdout: result.stdout || '', + stderr: result.stderr || '', + code: result.status ?? 0 + } + } + + const result = await new GitService({ run: runner }).createPullRequest({ + repoPath: nested, + draft: true + }) + + expect(result.ok).toBe(true) + if (!result.ok) return + expect(result.data.url).toBe('https://github.com/boggspa/AGBench/pull/42') + expect(calls.find((call) => call.command === 'gh' && call.args.includes('create'))).toEqual({ + command: 'gh', + args: ['pr', 'create', '--fill', '--draft'], + cwd: repo, + env: { GH_PROMPT_DISABLED: '1' } + }) + }) + + it('reports PR readiness when the branch needs to be pushed first', async () => { + addBareRemote() + writeFileSync(join(repo, 'ahead.txt'), 'ahead\n') + runGit(repo, ['add', 'ahead.txt']) + runGit(repo, ['commit', '-m', 'Ahead commit']) + + const result = await new GitService({ + run: async (command, args, options) => { + if (command === 'gh') { + return { + stdout: '', + stderr: 'no pull requests found for branch "main"', + code: 1 + } + } + const git = spawnSync(command, args, { + cwd: options.cwd, + encoding: 'utf8', + stdio: ['ignore', 'pipe', 'pipe'] + }) + return { + stdout: git.stdout || '', + stderr: git.stderr || '', + code: git.status ?? 0 + } + } + }).pullRequestReadiness(repo) + + expect(result.ok).toBe(true) + if (!result.ok) return + expect(result.data.canCreatePullRequest).toBe(false) + expect(result.data.shouldPushFirst).toBe(true) + expect(result.data.reason).toBe('Push the current branch before creating a pull request.') + }) + + it('reports an existing pull request as not creatable', async () => { + addBareRemote() + const result = await new GitService({ + run: async (command, args, options) => { + if (command === 'gh') { + return { + stdout: JSON.stringify({ + number: 42, + url: 'https://github.com/boggspa/AGBench/pull/42', + state: 'OPEN', + isDraft: false, + headRefName: 'main', + baseRefName: 'master', + statusCheckRollup: [] + }), + stderr: '', + code: 0 + } + } + const git = spawnSync(command, args, { + cwd: options.cwd, + encoding: 'utf8', + stdio: ['ignore', 'pipe', 'pipe'] + }) + return { + stdout: git.stdout || '', + stderr: git.stderr || '', + code: git.status ?? 0 + } + } + }).pullRequestReadiness(repo) + + expect(result.ok).toBe(true) + if (!result.ok) return + expect(result.data.canCreatePullRequest).toBe(false) + expect(result.data.existingPullRequest?.url).toBe('https://github.com/boggspa/AGBench/pull/42') + expect(result.data.reason).toBe('This branch already has a pull request.') + }) + + it('refuses PR creation before the current branch is pushed', async () => { + addBareRemote() + writeFileSync(join(repo, 'ahead-pr.txt'), 'ahead\n') + runGit(repo, ['add', 'ahead-pr.txt']) + runGit(repo, ['commit', '-m', 'Ahead PR commit']) + + const result = await new GitService().createPullRequest({ repoPath: repo }) + + expect(result).toEqual({ + ok: false, + error: 'Push the current branch before creating a pull request.' + }) + }) +}) diff --git a/src/main/services/GitService.ts b/src/main/services/GitService.ts new file mode 100644 index 00000000..29c3ee3f --- /dev/null +++ b/src/main/services/GitService.ts @@ -0,0 +1,604 @@ +import { spawn } from 'child_process' +import { promises as fs } from 'fs' +import { homedir } from 'os' +import { dirname, isAbsolute, join, normalize, relative, resolve, sep } from 'path' + +const DEFAULT_TIMEOUT_MS = 30_000 + +export interface GitCommandResult { + stdout: string + stderr: string + code: number +} + +export interface GitCommandOptions { + cwd: string + timeoutMs?: number + env?: Record +} + +export interface GitCommandRunner { + (command: string, args: string[], options: GitCommandOptions): Promise +} + +export interface GitFileStatus { + path: string + originalPath?: string + index: string + workingTree: string + kind: 'created' | 'modified' | 'deleted' | 'renamed' | 'untracked' | 'conflicted' | 'ignored' + staged: boolean + unstaged: boolean +} + +export interface GitRepositorySnapshot { + requestedPath: string + repoRoot: string + branch?: string + commit?: string + detached: boolean + upstream?: string + remoteName?: string + remoteUrl?: string + ahead: number + behind: number + files: GitFileStatus[] + counts: { + changed: number + staged: number + unstaged: number + untracked: number + } + clean: boolean +} + +export interface GitPrSummary { + number?: number + url?: string + state?: string + isDraft?: boolean + headRefName?: string + baseRefName?: string + checks?: Array<{ + name?: string + status?: string + conclusion?: string + url?: string + }> +} + +export interface GitPrReadiness { + snapshot: GitRepositorySnapshot + existingPullRequest?: GitPrSummary + canCreatePullRequest: boolean + shouldPushFirst: boolean + reason?: string + warnings: string[] +} + +export type GitResult = { ok: true; data: T } | { ok: false; error: string; stderr?: string } + +export interface GitStageInput { + repoPath: string + paths?: string[] + all?: boolean + update?: boolean + patch?: string +} + +export interface GitCommitInput { + repoPath: string + message: string +} + +export interface GitPushInput { + repoPath: string + setUpstream?: boolean + remote?: string +} + +export interface GitCreatePrInput { + repoPath: string + title?: string + body?: string + draft?: boolean +} + +export class GitService { + private run: GitCommandRunner + private timeoutMs: number + + constructor(options: { run?: GitCommandRunner; timeoutMs?: number } = {}) { + this.run = options.run || runCommand + this.timeoutMs = options.timeoutMs || DEFAULT_TIMEOUT_MS + } + + async snapshot(inputPath: string): Promise> { + try { + return { ok: true, data: await this.buildSnapshot(inputPath) } + } catch (error) { + return failure(error) + } + } + + async stage(input: GitStageInput): Promise> { + try { + const repo = await this.resolveRepository(input.repoPath) + const paths = sanitizeRepoPaths(input.paths, repo.repoRoot) + if (input.patch && input.patch.trim()) { + return { + ok: false, + error: 'Patch staging is not available through the desktop Git service yet.' + } + } + if (input.all) { + await this.mustRun('git', ['add', '-A'], repo.repoRoot) + } else if (input.update) { + await this.mustRun('git', ['add', '-u'], repo.repoRoot) + } else if (paths.length > 0) { + await this.mustRun('git', ['add', '--', ...paths], repo.repoRoot) + } else { + return { ok: false, error: 'Choose files to stage or pass all=true.' } + } + return { ok: true, data: await this.buildSnapshot(repo.repoRoot) } + } catch (error) { + return failure(error) + } + } + + async commit(input: GitCommitInput): Promise> { + try { + const message = input.message.trim() + if (!message) return { ok: false, error: 'Commit message is required.' } + const repo = await this.resolveRepository(input.repoPath) + const staged = await this.run('git', ['diff', '--cached', '--quiet'], { + cwd: repo.repoRoot, + timeoutMs: this.timeoutMs + }) + if (staged.code === 0) return { ok: false, error: 'No staged changes to commit.' } + await this.mustRun('git', ['commit', '-m', message], repo.repoRoot) + return { ok: true, data: await this.buildSnapshot(repo.repoRoot) } + } catch (error) { + return failure(error) + } + } + + async push(input: GitPushInput): Promise> { + try { + const snapshot = await this.buildSnapshot(input.repoPath) + if (snapshot.detached || !snapshot.branch) { + return { ok: false, error: 'Cannot push from a detached HEAD. Create or switch to a branch first.' } + } + if (!snapshot.remoteUrl && !input.remote?.trim()) { + return { ok: false, error: 'No git remote is configured. Add a remote before pushing.' } + } + const remote = input.remote?.trim() || snapshot.remoteName || 'origin' + const args = + snapshot.upstream && !input.setUpstream + ? ['push'] + : ['push', '-u', remote, snapshot.branch] + await this.mustRun('git', args, snapshot.repoRoot) + return { ok: true, data: await this.buildSnapshot(snapshot.repoRoot) } + } catch (error) { + return failure(error) + } + } + + async createPullRequest(input: GitCreatePrInput): Promise> { + try { + const snapshot = await this.buildSnapshot(input.repoPath) + if (snapshot.detached || !snapshot.branch) { + return { ok: false, error: 'Cannot create a pull request from a detached HEAD.' } + } + if (!snapshot.remoteUrl) { + return { ok: false, error: 'No git remote is configured. Add and push to a remote before creating a pull request.' } + } + if (!snapshot.upstream || snapshot.ahead > 0) { + return { ok: false, error: 'Push the current branch before creating a pull request.' } + } + const existingPr = await this.readPullRequestSummary(snapshot.repoRoot) + if (existingPr.ok && existingPr.summary?.url) { + return { ok: false, error: 'This branch already has a pull request.', stderr: existingPr.summary.url } + } + const args = ['pr', 'create'] + const title = input.title?.trim() || '' + const body = input.body?.trim() || '' + if (title) args.push('--title', title) + if (body) args.push('--body', body) + if (!title && !body) args.push('--fill') + if (input.draft) args.push('--draft') + + const result = await this.runGh(args, snapshot.repoRoot) + if (result.code !== 0) { + return { + ok: false, + error: result.stderr.trim() || result.stdout.trim() || '`gh pr create` failed.', + stderr: result.stderr.trim() || undefined + } + } + return { + ok: true, + data: { + url: result.stdout.trim().match(/https?:\/\/[^\s]+/)?.[0], + headRefName: snapshot.branch + } + } + } catch (error) { + return failure(error) + } + } + + async pullRequestStatus(inputPath: string): Promise> { + try { + const snapshot = await this.buildSnapshot(inputPath) + if (snapshot.detached || !snapshot.branch) { + return { ok: false, error: 'Cannot read pull request status from a detached HEAD.' } + } + const existingPr = await this.readPullRequestSummary(snapshot.repoRoot) + if (!existingPr.ok) { + return { + ok: false, + error: existingPr.error, + stderr: existingPr.stderr + } + } + if (!existingPr.summary) { + return { ok: false, error: 'No pull request found for the current branch.' } + } + return { ok: true, data: existingPr.summary } + } catch (error) { + return failure(error) + } + } + + async pullRequestReadiness(inputPath: string): Promise> { + try { + const snapshot = await this.buildSnapshot(inputPath) + const warnings: string[] = [] + let existingPullRequest: GitPrSummary | undefined + if (!snapshot.detached && snapshot.branch && snapshot.remoteUrl) { + const existingPr = await this.readPullRequestSummary(snapshot.repoRoot) + if (existingPr.ok) { + existingPullRequest = existingPr.summary + } else if (!existingPr.notFound) { + warnings.push(existingPr.error) + } + } + let reason: string | undefined + if (snapshot.detached || !snapshot.branch) { + reason = 'Cannot create a pull request from a detached HEAD.' + } else if (!snapshot.remoteUrl) { + reason = 'No git remote is configured.' + } else if (!snapshot.upstream || snapshot.ahead > 0) { + reason = 'Push the current branch before creating a pull request.' + } else if (existingPullRequest?.url) { + reason = 'This branch already has a pull request.' + } + const shouldPushFirst = Boolean( + snapshot.branch && snapshot.remoteUrl && (!snapshot.upstream || snapshot.ahead > 0) + ) + return { + ok: true, + data: { + snapshot, + ...(existingPullRequest ? { existingPullRequest } : {}), + canCreatePullRequest: !reason, + shouldPushFirst, + ...(reason ? { reason } : {}), + warnings + } + } + } catch (error) { + return failure(error) + } + } + + async resolveRepository(inputPath: string): Promise<{ requestedPath: string; repoRoot: string }> { + const rawPath = expandHomePath(inputPath || '').trim() + if (!rawPath) throw new Error('Repository path is required.') + const requestedPath = resolve(rawPath) + let cwd = requestedPath + try { + const stat = await fs.stat(requestedPath) + if (!stat.isDirectory()) cwd = dirname(requestedPath) + } catch { + throw new Error('Path does not exist on disk.') + } + const result = await this.run('git', ['rev-parse', '--show-toplevel'], { + cwd, + timeoutMs: this.timeoutMs + }) + if (result.code !== 0) { + throw new Error(result.stderr.trim() || result.stdout.trim() || 'Path is not inside a git repository.') + } + return { + requestedPath, + repoRoot: result.stdout.trim() + } + } + + private async readPullRequestSummary(repoRoot: string): Promise< + | { ok: true; summary?: GitPrSummary } + | { ok: false; error: string; stderr?: string; notFound?: boolean } + > { + const result = await this.runGh( + [ + 'pr', + 'view', + '--json', + 'number,url,state,isDraft,headRefName,baseRefName,statusCheckRollup' + ], + repoRoot + ) + if (result.code !== 0) { + const stderr = result.stderr.trim() + const stdout = result.stdout.trim() + const message = stderr || stdout || '`gh pr view` failed.' + if (isNoPullRequestMessage(message)) { + return { ok: true } + } + return { + ok: false, + error: message, + stderr: stderr || undefined + } + } + return { ok: true, summary: parsePullRequestSummary(result.stdout) } + } + + private async runGh(args: string[], cwd: string): Promise { + return this.run('gh', args, { + cwd, + timeoutMs: this.timeoutMs, + env: { GH_PROMPT_DISABLED: '1' } + }) + } + + private async buildSnapshot(inputPath: string): Promise { + const repo = await this.resolveRepository(inputPath) + const [branchResult, commitResult, upstreamResult, remoteResult, statusResult] = + await Promise.all([ + this.run('git', ['symbolic-ref', '--quiet', '--short', 'HEAD'], { + cwd: repo.repoRoot, + timeoutMs: this.timeoutMs + }), + this.run('git', ['rev-parse', '--short', 'HEAD'], { + cwd: repo.repoRoot, + timeoutMs: this.timeoutMs + }), + this.run('git', ['rev-parse', '--abbrev-ref', '--symbolic-full-name', '@{u}'], { + cwd: repo.repoRoot, + timeoutMs: this.timeoutMs + }), + this.run('git', ['config', '--get', 'remote.origin.url'], { + cwd: repo.repoRoot, + timeoutMs: this.timeoutMs + }), + this.run('git', ['status', '--porcelain=v1', '-z', '--untracked-files=all'], { + cwd: repo.repoRoot, + timeoutMs: this.timeoutMs + }) + ]) + + const branch = branchResult.code === 0 ? branchResult.stdout.trim() : undefined + const upstream = upstreamResult.code === 0 ? upstreamResult.stdout.trim() : undefined + const files = parseStatusPorcelainZ(statusResult.stdout) + const aheadBehind = upstream + ? await this.readAheadBehind(repo.repoRoot) + : { ahead: 0, behind: 0 } + + return { + requestedPath: repo.requestedPath, + repoRoot: repo.repoRoot, + branch, + commit: commitResult.code === 0 ? commitResult.stdout.trim() : undefined, + detached: !branch, + upstream, + remoteName: upstream?.includes('/') ? upstream.split('/')[0] : remoteResult.code === 0 ? 'origin' : undefined, + remoteUrl: remoteResult.code === 0 ? remoteResult.stdout.trim() : undefined, + ahead: aheadBehind.ahead, + behind: aheadBehind.behind, + files, + counts: { + changed: files.length, + staged: files.filter((file) => file.staged).length, + unstaged: files.filter((file) => file.unstaged).length, + untracked: files.filter((file) => file.kind === 'untracked').length + }, + clean: files.length === 0 + } + } + + private async readAheadBehind(repoRoot: string): Promise<{ ahead: number; behind: number }> { + const result = await this.run('git', ['rev-list', '--left-right', '--count', 'HEAD...@{u}'], { + cwd: repoRoot, + timeoutMs: this.timeoutMs + }) + if (result.code !== 0) return { ahead: 0, behind: 0 } + const [aheadRaw, behindRaw] = result.stdout.trim().split(/\s+/) + return { + ahead: Number(aheadRaw) || 0, + behind: Number(behindRaw) || 0 + } + } + + private async mustRun(command: string, args: string[], cwd: string): Promise { + const result = await this.run(command, args, { cwd, timeoutMs: this.timeoutMs }) + if (result.code !== 0) { + throw new Error(result.stderr.trim() || result.stdout.trim() || `${command} ${args.join(' ')} failed.`) + } + return result + } +} + +export function parseStatusPorcelainZ(output: string): GitFileStatus[] { + const entries: GitFileStatus[] = [] + const parts = output.split('\0') + let i = 0 + while (i < parts.length) { + const entry = parts[i] + if (!entry || entry.length < 3) { + i++ + continue + } + const index = entry[0] || ' ' + const workingTree = entry[1] || ' ' + const path = entry.slice(3) + let originalPath: string | undefined + if ((index === 'R' || index === 'C') && i + 1 < parts.length) { + originalPath = parts[i + 1] || undefined + i += 2 + } else { + i++ + } + entries.push({ + path, + originalPath, + index, + workingTree, + kind: classifyStatus(index, workingTree), + staged: index !== ' ' && index !== '?' && index !== '!', + unstaged: workingTree !== ' ' || index === '?' || index === '!' + }) + } + return entries +} + +async function runCommand( + command: string, + args: string[], + options: GitCommandOptions +): Promise { + return await new Promise((resolve) => { + let stdout = '' + let stderr = '' + let settled = false + const child = spawn(command, args, { + cwd: options.cwd, + env: { ...process.env, ...(command === 'gh' ? { GH_PROMPT_DISABLED: '1' } : {}), ...options.env }, + stdio: ['ignore', 'pipe', 'pipe'] + }) + const timeout = setTimeout(() => { + if (settled) return + settled = true + child.kill() + resolve({ stdout, stderr: `${command} timed out after ${options.timeoutMs || DEFAULT_TIMEOUT_MS}ms.`, code: -1 }) + }, options.timeoutMs || DEFAULT_TIMEOUT_MS) + child.stdout?.on('data', (chunk: Buffer) => { + stdout += chunk.toString('utf8') + }) + child.stderr?.on('data', (chunk: Buffer) => { + stderr += chunk.toString('utf8') + }) + child.on('error', (error: NodeJS.ErrnoException) => { + if (settled) return + settled = true + clearTimeout(timeout) + const message = + error.code === 'ENOENT' + ? `${command} is not installed or not on PATH.` + : `Failed to launch ${command}: ${error.message}` + resolve({ stdout, stderr: message, code: -1 }) + }) + child.on('close', (code) => { + if (settled) return + settled = true + clearTimeout(timeout) + resolve({ stdout, stderr, code: code ?? 0 }) + }) + }) +} + +function classifyStatus( + index: string, + workingTree: string +): GitFileStatus['kind'] { + if (index === '?' || workingTree === '?') return 'untracked' + if (index === '!' || workingTree === '!') return 'ignored' + if (index === 'U' || workingTree === 'U' || (index === 'A' && workingTree === 'A')) { + return 'conflicted' + } + if (index === 'R' || workingTree === 'R') return 'renamed' + if (index === 'A' || workingTree === 'A') return 'created' + if (index === 'D' || workingTree === 'D') return 'deleted' + return 'modified' +} + +function sanitizeRepoPaths(paths: string[] | undefined, repoRoot: string): string[] { + if (!Array.isArray(paths)) return [] + const sanitized: string[] = [] + for (const candidate of paths) { + const trimmed = String(candidate || '').trim() + if (!trimmed) continue + if (isAbsolute(trimmed)) { + throw new Error('Stage paths must be relative to the repository.') + } + const normalized = normalize(trimmed) + if (normalized === '.' || normalized === '..' || normalized.startsWith(`..${sep}`)) { + throw new Error('Stage paths must stay inside the repository.') + } + const resolvedPath = resolve(repoRoot, normalized) + const relativePath = relative(repoRoot, resolvedPath) + if (!relativePath || relativePath === '..' || relativePath.startsWith(`..${sep}`) || isAbsolute(relativePath)) { + throw new Error('Stage paths must stay inside the repository.') + } + sanitized.push(relativePath) + } + return sanitized +} + +function expandHomePath(value?: string | null): string { + const raw = String(value || '').trim() + if (!raw) return '' + if (raw === '~') return homedir() + if (raw.startsWith('~/')) return join(homedir(), raw.slice(2)) + return raw +} + +function parsePullRequestSummary(output: string): GitPrSummary { + const parsed = JSON.parse(output || '{}') as Record + const checks = Array.isArray(parsed.statusCheckRollup) + ? parsed.statusCheckRollup.map((item) => { + const record = isRecord(item) ? item : {} + return { + name: stringField(record.name), + status: stringField(record.status), + conclusion: stringField(record.conclusion), + url: stringField(record.detailsUrl) || stringField(record.url) + } + }) + : undefined + return { + number: typeof parsed.number === 'number' ? parsed.number : undefined, + url: stringField(parsed.url), + state: stringField(parsed.state), + isDraft: typeof parsed.isDraft === 'boolean' ? parsed.isDraft : undefined, + headRefName: stringField(parsed.headRefName), + baseRefName: stringField(parsed.baseRefName), + checks + } +} + +function isNoPullRequestMessage(message: string): boolean { + const normalized = message.toLowerCase() + return ( + normalized.includes('no pull requests found') || + normalized.includes('no open pull requests') || + normalized.includes('could not find any pull requests') + ) +} + +function failure(error: unknown): GitResult { + return { + ok: false, + error: error instanceof Error ? error.message : String(error) + } +} + +function isRecord(value: unknown): value is Record { + return Boolean(value && typeof value === 'object' && !Array.isArray(value)) +} + +function stringField(value: unknown): string | undefined { + return typeof value === 'string' && value.trim() ? value.trim() : undefined +} diff --git a/src/main/settings/MainSanitizers.test.ts b/src/main/settings/MainSanitizers.test.ts index 132dcc9e..e628fc9b 100644 --- a/src/main/settings/MainSanitizers.test.ts +++ b/src/main/settings/MainSanitizers.test.ts @@ -143,4 +143,32 @@ describe('MainSanitizers settings patches', () => { mainAuthorityMs: 60_000 }) }) + + it('sanitizes changelog persistence settings', () => { + const settings = makeSettings() + const { sanitizeSettingsPatch } = makeSanitizers(settings) + + const sanitized = sanitizeSettingsPatch({ + lastSeenChangelogVersion: ' 1.0.73 ', + pendingUpdateChangelog: { + version: ' 1.0.74 ', + releaseName: ' AGBench 1.0.74 ', + releaseDate: ' 2026-06-04T13:00:00.000Z ', + releaseNotes: [ + { version: ' 1.0.74 ', note: 'Updater pill.' }, + { version: '', note: 'ignored' } + ] + } + }) + + expect(sanitized).toMatchObject({ + lastSeenChangelogVersion: '1.0.73', + pendingUpdateChangelog: { + version: '1.0.74', + releaseName: 'AGBench 1.0.74', + releaseDate: '2026-06-04T13:00:00.000Z', + releaseNotes: [{ version: '1.0.74', note: 'Updater pill.' }] + } + }) + }) }) diff --git a/src/main/settings/MainSanitizers.ts b/src/main/settings/MainSanitizers.ts index 5c5d5fd9..031803ed 100644 --- a/src/main/settings/MainSanitizers.ts +++ b/src/main/settings/MainSanitizers.ts @@ -7,6 +7,7 @@ import type { HandoffCard, HandoffCardFilter, ProviderId, + ProductUpdateChangelog, RuntimeProfile, ScheduledTask, WorkspaceRecord @@ -65,6 +66,8 @@ const SETTINGS_PATCH_KEYS = new Set([ 'bridgeDaemonEnabled', 'codexSandboxFallback', 'updateChannel', + 'lastSeenChangelogVersion', + 'pendingUpdateChangelog', 'approvalTimeouts' ]) @@ -207,6 +210,40 @@ function sanitizeApprovalTimeoutMs(value: unknown, fallback: number): number { return Math.max(5_000, Math.min(3_600_000, parsed)) } +function sanitizeUpdateChangelog(value: unknown): ProductUpdateChangelog | undefined { + const record = isRecord(value) ? value : {} + const version = typeof record.version === 'string' ? record.version.trim() : '' + if (!version) return undefined + + const changelog: ProductUpdateChangelog = { version } + if (typeof record.releaseName === 'string' && record.releaseName.trim()) { + changelog.releaseName = record.releaseName.trim() + } + if (typeof record.releaseDate === 'string' && record.releaseDate.trim()) { + changelog.releaseDate = record.releaseDate.trim() + } + if (typeof record.releaseNotes === 'string') { + changelog.releaseNotes = record.releaseNotes + } else if (Array.isArray(record.releaseNotes)) { + const notes = record.releaseNotes + .map((item) => { + const noteRecord = isRecord(item) ? item : {} + const noteVersion = + typeof noteRecord.version === 'string' ? noteRecord.version.trim() : '' + if (!noteVersion) return null + return { + version: noteVersion, + note: typeof noteRecord.note === 'string' ? noteRecord.note : null + } + }) + .filter((item): item is { version: string; note: string | null } => item !== null) + if (notes.length > 0) { + changelog.releaseNotes = notes + } + } + return changelog +} + export function normalizeEnsembleRunIdentity(value: unknown): EnsembleRunIdentity | undefined { if (!isRecord(value)) return undefined return { @@ -615,6 +652,24 @@ export function createMainSanitizers(deps: MainSanitizerDeps) { mainAuthorityMs: sanitizeApprovalTimeoutMs(prefs.mainAuthorityMs, current.mainAuthorityMs) } } + if ('lastSeenChangelogVersion' in sanitized) { + if ( + typeof sanitized.lastSeenChangelogVersion === 'string' && + sanitized.lastSeenChangelogVersion.trim() + ) { + sanitized.lastSeenChangelogVersion = sanitized.lastSeenChangelogVersion.trim() + } else { + delete sanitized.lastSeenChangelogVersion + } + } + if ('pendingUpdateChangelog' in sanitized) { + const changelog = sanitizeUpdateChangelog(sanitized.pendingUpdateChangelog) + if (changelog) { + sanitized.pendingUpdateChangelog = changelog + } else { + delete sanitized.pendingUpdateChangelog + } + } if ('kimiSanitiserEnabled' in sanitized) { sanitized.kimiSanitiserEnabled = typeof sanitized.kimiSanitiserEnabled === 'boolean' diff --git a/src/main/store/index.ts b/src/main/store/index.ts index 1c06f7b5..89b46efd 100644 --- a/src/main/store/index.ts +++ b/src/main/store/index.ts @@ -31,7 +31,8 @@ import { ProductCrashRecord, RuntimeProfile, HandoffCard, - HandoffCardFilter + HandoffCardFilter, + ProductUpdateChangelog } from './types' import { canonicalizeExternalPathGrantMetadata } from './ExternalPathGrants' import { createDefaultEnsembleConfig } from '../EnsembleDefaults' @@ -97,6 +98,10 @@ const runEventHashCache = new Map() // so their global chats have a usable runtime out of the box. Unconditional: // unused default profiles for a force-disabled provider are harmless data. const providerIds: ProviderId[] = ['gemini', 'codex', 'claude', 'kimi', 'grok', 'cursor'] +const LEGACY_AGBENCH_FONT_STACK = + '"SF Pro", "SF Pro Text", "SF Pro Display", -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Roboto, Arial, sans-serif' +const AGBENCH_DEFAULT_FONT_STACK = + '"Avenir Next", Avenir, system-ui, -apple-system, BlinkMacSystemFont, "SF Pro Text", "Segoe UI", sans-serif' const defaultSettings: AppSettings = { activeProvider: 'gemini', @@ -120,8 +125,7 @@ const defaultSettings: AppSettings = { userBubbleColor: 'system', promptSurfaceStyle: 'liquid_glass', composerStyle: 'default', - transcriptFontFamily: - '"SF Pro", "SF Pro Text", "SF Pro Display", -apple-system, BlinkMacSystemFont, "Segoe UI", "Helvetica Neue", Roboto, Arial, sans-serif', + transcriptFontFamily: AGBENCH_DEFAULT_FONT_STACK, composerFontFamily: 'match-transcript', // 1.0.5-EW25 — Display currency for cost / token-spend chips. // USD by default; user can switch to GBP / EUR via Settings → @@ -132,16 +136,20 @@ const defaultSettings: AppSettings = { // General lets the user dial 0–25%. Applied in `formatCost.ts` // before FX conversion so the bias is currency-agnostic. currencyOverestimatePercent: 0, + dashboardStatPrefs: { + dashboardSize: 'small' + }, welcomeHeatmapPrefs: { + layout: 'single', workspaceActivityEnabled: true, agbenchActivityEnabled: true, externalActivityEnabled: true }, - // 1.0.5-EW26 — Kimi compatibility filter defaults. Off by - // default; the user opts in from Settings → General when they - // hit a Moonshot content_filter rejection on an incidental - // topic. Custom keywords stay empty until the user adds any. - kimiSanitiserEnabled: false, + // 1.0.5-EW26 — Kimi compatibility filter defaults. On by + // default so Moonshot content_filter retries get the compatibility + // pass automatically. Custom keywords stay empty until the user + // adds any. + kimiSanitiserEnabled: true, kimiSanitiserCustomKeywords: '', // 1.0.7-M10 — second-pass classifier stays opt-in; when unset // or false, the retry envelope remains keyword-only. @@ -177,7 +185,7 @@ const defaultSettings: AppSettings = { geminiMcpBridgeLastStatus: undefined, bridgeDaemonEnabled: true, codexSandboxFallback: 'ask_rerun', - updateChannel: 'debug', + updateChannel: 'stable', approvalTimeouts: { enabled: true, // Defaults mirror DEFAULT_APPROVAL_TIMEOUT_POLICY in @@ -216,6 +224,50 @@ function objectOrUndefined(value: T | null | undefined): T | u return value && typeof value === 'object' && !Array.isArray(value) ? value : undefined } +function normalizeUpdateChangelog(value: unknown): ProductUpdateChangelog | undefined { + const record = objectOrUndefined(value as Record | null | undefined) + if (!record || typeof record.version !== 'string' || !record.version.trim()) { + return undefined + } + const releaseNotes = record.releaseNotes + const normalized: ProductUpdateChangelog = { + version: record.version.trim() + } + if (typeof record.releaseName === 'string' && record.releaseName.trim()) { + normalized.releaseName = record.releaseName.trim() + } + if (typeof record.releaseDate === 'string' && record.releaseDate.trim()) { + normalized.releaseDate = record.releaseDate.trim() + } + if (typeof releaseNotes === 'string') { + normalized.releaseNotes = releaseNotes + } else if (Array.isArray(releaseNotes)) { + const notes = releaseNotes + .map((item) => { + const noteRecord = objectOrUndefined(item as Record | null | undefined) + if (!noteRecord || typeof noteRecord.version !== 'string' || !noteRecord.version.trim()) { + return null + } + return { + version: noteRecord.version.trim(), + note: typeof noteRecord.note === 'string' ? noteRecord.note : null + } + }) + .filter((item): item is { version: string; note: string | null } => item !== null) + if (notes.length > 0) { + normalized.releaseNotes = notes + } + } + return normalized +} + +function normalizeSettingsFontFamily(value: unknown, fallback: string): string { + if (typeof value !== 'string') return fallback + const trimmed = value.trim() + if (!trimmed) return fallback + return trimmed === LEGACY_AGBENCH_FONT_STACK ? AGBENCH_DEFAULT_FONT_STACK : trimmed +} + function writeJson(filePath: string, data: T) { const tempPath = `${filePath}.${process.pid}.${Date.now()}.tmp` let fd: number | null = null @@ -285,6 +337,36 @@ function runEventFilePath(runId: string): string { return path.join(runEventsDir, safeRunEventFileName(runId)) } +// Per-run artifact directory. Mirrors the path derivation in +// appendRunStreamArtifact (the `.jsonl`-stripped run file name is used as a +// dedicated directory holding stdout/stderr/stdin .log files for the run), so +// every artifact for a given runId lives under exactly this path. Deriving it +// from `safeRunEventFileName` keeps deletion in lockstep with creation. +function runArtifactDirPath(runId: string): string { + return path.join(runArtifactsDir, safeRunEventFileName(runId).replace(/\.jsonl$/, '')) +} + +// Best-effort, non-fatal cleanup of one run's on-disk forensic data: its +// run-event `.jsonl` ledger and its artifact directory. Each removal is mapped +// from a KNOWN runId via the deterministic safeRunEventFileName transform — we +// never readdir-and-match-by-prefix, so a sibling run whose id is a prefix of +// this one (e.g. `run-1` vs `run-1-extra`) can never be caught: the targets are +// exact file/dir names (`run-1.jsonl` ≠ `run-1-extra.jsonl`). Missing files are +// ignored so a partially-written run cannot abort the chat deletion. +function deleteRunForensicFiles(runId: string): void { + if (!runId) return + try { + fs.rmSync(runEventFilePath(runId), { force: true }) + } catch (e) { + console.error(`Failed to delete run-event file for run ${runId}`, e) + } + try { + fs.rmSync(runArtifactDirPath(runId), { recursive: true, force: true }) + } catch (e) { + console.error(`Failed to delete run artifacts for run ${runId}`, e) + } +} + function readRunEventFile(filePath: string): RunEventRecord[] { try { if (!fs.existsSync(filePath)) return [] @@ -370,6 +452,7 @@ export class AppStore { const storedWelcomeHeatmapPrefs = objectOrUndefined(stored.welcomeHeatmapPrefs) const storedApprovalTimeouts = objectOrUndefined(stored.approvalTimeouts) const storedApprovalTimeoutProviderMs = objectOrUndefined(storedApprovalTimeouts?.perProviderMs) + const pendingUpdateChangelog = normalizeUpdateChangelog(stored.pendingUpdateChangelog) return { ...defaultSettings, ...stored, @@ -384,6 +467,14 @@ export class AppStore { ? null : defaultSettings.defaultGeminiAuthProfileId, geminiAuthProfiles: Array.isArray(stored.geminiAuthProfiles) ? stored.geminiAuthProfiles : [], + transcriptFontFamily: normalizeSettingsFontFamily( + stored.transcriptFontFamily, + defaultSettings.transcriptFontFamily || AGBENCH_DEFAULT_FONT_STACK + ), + composerFontFamily: normalizeSettingsFontFamily( + stored.composerFontFamily, + defaultSettings.composerFontFamily || 'match-transcript' + ), // Phase M1 — coerce any non-enum value (missing, typo'd, legacy) // back to the safe default so the eventual API-vs-CLI dispatch // logic never sees an unexpected mode. @@ -397,7 +488,10 @@ export class AppStore { ...defaultSettings.agenticServices, ...(stored.agenticServices || {}) }, - dashboardStatPrefs: storedDashboardStatPrefs ? { ...storedDashboardStatPrefs } : undefined, + dashboardStatPrefs: { + ...(defaultSettings.dashboardStatPrefs || {}), + ...(storedDashboardStatPrefs || {}) + }, welcomeHeatmapPrefs: { ...defaultSettings.welcomeHeatmapPrefs, ...(storedWelcomeHeatmapPrefs || {}) @@ -409,6 +503,12 @@ export class AppStore { stored.nativeSubAgentRequests === 'provider' || stored.nativeSubAgentRequests === 'agbench' ? stored.nativeSubAgentRequests : 'ask', + lastSeenChangelogVersion: + typeof stored.lastSeenChangelogVersion === 'string' && + stored.lastSeenChangelogVersion.trim() + ? stored.lastSeenChangelogVersion.trim() + : undefined, + pendingUpdateChangelog, // Normalize: a stored non-boolean (e.g. an older settings file // where the field is missing) falls back to the default (true) // so the auto-resume behaviour is on for upgrading users. @@ -872,6 +972,19 @@ export class AppStore { } static deleteChat(chatId: string) { + // Read the chat's KNOWN runs before unlinking so we can clean up its + // per-run forensic files (run-event ledger + artifacts) that would + // otherwise be orphaned on disk forever. Derived purely from this chat's + // own runIds (never a directory scan), so a sibling chat's similar/prefixed + // run files are guaranteed untouched. All cleanup is best-effort. + const chat = this.getChat(chatId) + const runs = Array.isArray(chat?.runs) ? chat.runs : [] + for (const run of runs) { + if (run && typeof run.runId === 'string') { + deleteRunForensicFiles(run.runId) + } + } + const chatPath = path.join(chatsDir, `${chatId}.json`) if (fs.existsSync(chatPath)) { fs.unlinkSync(chatPath) diff --git a/src/main/store/types.ts b/src/main/store/types.ts index b443bb35..0cf410e8 100644 --- a/src/main/store/types.ts +++ b/src/main/store/types.ts @@ -168,6 +168,23 @@ export type PermissionPresetId = | 'custom' export type CodexSandboxFallbackMode = 'ask_rerun' | 'off' export type ProductUpdateChannel = 'debug' | 'stable' | 'nightly' +export interface ProductUpdateReleaseNoteInfo { + version: string + note: string | null +} +export type ProductUpdateReleaseNotes = string | ProductUpdateReleaseNoteInfo[] +export interface ProductUpdateChangelog { + version: string + releaseName?: string + releaseDate?: string + releaseNotes?: ProductUpdateReleaseNotes +} +export interface ProductChangelogSnapshot { + currentVersion: string + lastSeenChangelogVersion?: string + pendingUpdateChangelog?: ProductUpdateChangelog + latestUpdateChangelog?: ProductUpdateChangelog +} /** Phase M1 — picks which runtime path AGBench uses for Gemini runs. * * - `'auto'` (default): use the API runtime when an API key / @@ -999,6 +1016,8 @@ export type ProviderToolingCapabilityId = | Exclude | 'creativeApps' | 'networkAccess' + | 'elicit' + | 'delegate' export interface ProviderCapabilityWarning { id: string @@ -1386,9 +1405,8 @@ export interface AppSettings { */ dashboardEnabled?: boolean /** - * Welcome dashboard display size. Large preserves the existing full-size - * layout; small renders the same dashboard scaled to 30% and pinned to the - * top-right of the welcome surface. + * Welcome dashboard display size. Small is the default compact welcome + * treatment; large preserves the full-size layout. */ dashboardSize?: 'large' | 'small' } @@ -1402,9 +1420,9 @@ export interface AppSettings { externalActivityEnabled?: boolean /** * 1.0.72 — Layout for the welcome standalone heatmaps: - * - 'stacked' (default): every enabled heatmap stacked vertically + * - 'stacked': every enabled heatmap stacked vertically * (the long-standing layout). - * - 'single': one heatmap at a time, auto-cycling every 90s through + * - 'single' (default): one heatmap at a time, auto-cycling every 90s through * the enabled heatmaps (mirrors the dashboard tab auto-cycle). */ layout?: 'single' | 'stacked' @@ -1415,9 +1433,8 @@ export interface AppSettings { * containing a configured trigger keyword (default list + * `kimiSanitiserCustomKeywords`) is replaced with a redacted * placeholder before the Kimi process spawns. Other - * participants always see the unfiltered prompt. Default - * `false` — opt-in for users who hit Moonshot content_filter - * rejections on incidental world-news / geopolitics digressions. */ + * participants always see the unfiltered prompt. Default `true` + * so Moonshot compatibility retries are available out of the box. */ kimiSanitiserEnabled: boolean /** 1.0.5-EW26 — Newline-separated extra trigger keywords the * user wants the Kimi compatibility filter to catch on top of @@ -1460,6 +1477,8 @@ export interface AppSettings { bridgeDaemonEnabled?: boolean codexSandboxFallback: CodexSandboxFallbackMode updateChannel: ProductUpdateChannel + lastSeenChangelogVersion?: string + pendingUpdateChangelog?: ProductUpdateChangelog /** Per-provider + main-authority approval timeout policy (Phase E1.1). * When an approval enters the pending registry, a timer fires after * the matching ms value and auto-denies the request. `enabled: false` diff --git a/src/preload/index.d.ts b/src/preload/index.d.ts index 9b8c54c6..33e07d75 100644 --- a/src/preload/index.d.ts +++ b/src/preload/index.d.ts @@ -37,6 +37,7 @@ import { ProductCrashRecord, ProductDiagnosticsExportResult, ProductOperationsStatus, + ProductChangelogSnapshot, RuntimeProfile, HandoffCard, HandoffCardFilter, @@ -47,6 +48,12 @@ import type { UpdateStateSnapshot } from '../main/UpdateService' import type { GrokUsageSnapshot } from '../main/grok/GrokUsage' import type { AppShellStatsSnapshot } from '../main/services/AppShellStatsService' import type { SessionCheckpointRecord } from '../main/checkpoints/SessionCheckpoint' +import type { + GitPrReadiness, + GitPrSummary, + GitRepositorySnapshot, + GitResult +} from '../main/services/GitService' type GeminiCapabilityKind = 'mcp' | 'extensions' | 'skills' | 'agents' type GeminiCapabilityFormat = 'json' | 'raw' | 'error' @@ -315,8 +322,40 @@ declare global { getCodexUsageSnapshot: () => Promise getExternalUsage: () => Promise probeGrokUsage: () => Promise + gitSnapshot: (payload: { + workspacePath?: string + repoPath?: string + }) => Promise> + gitStage: (payload: { + workspacePath?: string + repoPath?: string + paths?: string[] + all?: boolean + update?: boolean + patch?: string + }) => Promise> + gitCommit: (payload: { + workspacePath?: string + repoPath?: string + message: string + }) => Promise> + gitPush: (payload: { + workspacePath?: string + repoPath?: string + setUpstream?: boolean + remote?: string + }) => Promise> + githubPrStatus: (payload: { + workspacePath?: string + repoPath?: string + }) => Promise> + githubPrReadiness: (payload: { + workspacePath?: string + repoPath?: string + }) => Promise> createGithubPr: (payload: { workspacePath?: string + repoPath?: string title?: string body?: string draft?: boolean @@ -363,7 +402,11 @@ declare global { numTurns?: number ) => Promise startAgentReview: (provider: ProviderId, threadId: string, params?: any) => Promise - respondAgentApproval: (requestId: string, action: AgentApprovalAction) => Promise + respondAgentApproval: ( + requestId: string, + action: AgentApprovalAction, + intentNote?: string + ) => Promise writeGeminiInput: (data: string) => Promise getDiff: (workspace: string) => Promise<{ type: 'not_repo' | 'no_changes' | 'changes' | 'error' @@ -481,7 +524,9 @@ declare global { downloadUpdate: () => Promise installUpdateOnQuit: () => Promise installUpdateNow: () => Promise - onUpdateStatusChanged: (callback: (snapshot: UpdateStateSnapshot) => void) => void + changelogSnapshot: () => Promise + markChangelogSeen: (version: string) => Promise + onUpdateStatusChanged: (callback: (snapshot: UpdateStateSnapshot) => void) => () => void bridgeNetworkingStatus: () => Promise<{ lan: { enabled: boolean diff --git a/src/preload/index.ts b/src/preload/index.ts index eb641f35..f22d170b 100644 --- a/src/preload/index.ts +++ b/src/preload/index.ts @@ -6,6 +6,12 @@ import type { } from '../main/store/types' import type { AppShellStatsSnapshot } from '../main/services/AppShellStatsService' import type { SessionCheckpointRecord } from '../main/checkpoints/SessionCheckpoint' +import type { + GitPrReadiness, + GitPrSummary, + GitRepositorySnapshot, + GitResult +} from '../main/services/GitService' type ComposerImageAttachment = { id?: string @@ -132,8 +138,31 @@ const api = { getCodexUsageSnapshot: () => ipcRenderer.invoke('get-codex-usage-snapshot'), getExternalUsage: () => ipcRenderer.invoke('get-external-usage'), probeGrokUsage: () => ipcRenderer.invoke('grok-usage:probe'), + gitSnapshot: (payload: { workspacePath?: string; repoPath?: string }) => + ipcRenderer.invoke('git:snapshot', payload) as Promise>, + gitStage: (payload: { + workspacePath?: string + repoPath?: string + paths?: string[] + all?: boolean + update?: boolean + patch?: string + }) => ipcRenderer.invoke('git:stage', payload) as Promise>, + gitCommit: (payload: { workspacePath?: string; repoPath?: string; message: string }) => + ipcRenderer.invoke('git:commit', payload) as Promise>, + gitPush: (payload: { + workspacePath?: string + repoPath?: string + setUpstream?: boolean + remote?: string + }) => ipcRenderer.invoke('git:push', payload) as Promise>, + githubPrStatus: (payload: { workspacePath?: string; repoPath?: string }) => + ipcRenderer.invoke('github:pr-status', payload) as Promise>, + githubPrReadiness: (payload: { workspacePath?: string; repoPath?: string }) => + ipcRenderer.invoke('github:pr-readiness', payload) as Promise>, createGithubPr: (payload: { workspacePath?: string + repoPath?: string title?: string body?: string draft?: boolean @@ -177,8 +206,11 @@ const api = { | 'cancel' | 'grantExternalPathRead' | 'grantExternalPathEdit' - | 'declineExternalPath' - ) => ipcRenderer.invoke('respond-agent-approval', requestId, action), + | 'declineExternalPath', + // Order-4 — optional one-line "why" note. Persisted onto the + // approval-ledger row's metadata; never required. + intentNote?: string + ) => ipcRenderer.invoke('respond-agent-approval', requestId, action, intentNote), writeGeminiInput: (data: string) => ipcRenderer.invoke('write-gemini-input', data), getDiff: (workspace: string) => ipcRenderer.invoke('get-diff', workspace), openWorkspacePopout: (input: { kind: 'file-editor' | 'diff-studio'; workspacePath: string }) => @@ -378,8 +410,13 @@ const api = { downloadUpdate: () => ipcRenderer.invoke('download-update'), installUpdateOnQuit: () => ipcRenderer.invoke('install-update-on-quit'), installUpdateNow: () => ipcRenderer.invoke('install-update-now'), + changelogSnapshot: () => ipcRenderer.invoke('changelog-snapshot'), + markChangelogSeen: (version: string) => ipcRenderer.invoke('mark-changelog-seen', version), onUpdateStatusChanged: (callback: (snapshot: unknown) => void) => { - ipcRenderer.on('update-status-changed', (_event, snapshot) => callback(snapshot)) + const listener = (_event: Electron.IpcRendererEvent, snapshot: unknown): void => + callback(snapshot) + ipcRenderer.on('update-status-changed', listener) + return () => ipcRenderer.removeListener('update-status-changed', listener) }, bridgeFinalizePairing: (sessionID: string, userConfirmed: boolean) => ipcRenderer.invoke('bridge-finalize-pairing', sessionID, userConfirmed), diff --git a/src/renderer/src/App.tsx b/src/renderer/src/App.tsx index 50dddd5c..9a3225f7 100644 --- a/src/renderer/src/App.tsx +++ b/src/renderer/src/App.tsx @@ -52,6 +52,7 @@ import { RunRecoveryRecord, ProductOperationsStatus, ProductUpdateChannel, + ProductChangelogSnapshot, ChatScope, RuntimeProfile, HandoffCard, @@ -96,7 +97,10 @@ import { WorkspaceActivityHeatmap } from './components/WorkspaceActivityHeatmap' import { WelcomeHeatmaps, type WelcomeHeatmapSlot } from './components/WelcomeHeatmaps' import { useAppearance } from './hooks/useAppearance' import { useExternalPathRepoMetadata } from './hooks/useExternalPathRepoMetadata' +import { useUpdateStatus } from './hooks/useUpdateStatus' import { ExternalPathAboveRow } from './components/ExternalPathAboveRow' +import { GitCommitControls } from './components/GitCommitControls' +import type { GitRepositorySnapshot } from '../../main/services/GitService' import { ProviderBadgeIcon, Sidebar } from './components/Sidebar' import { Inspector } from './components/Inspector' import { SettingsPanel, type SettingsTab } from './components/SettingsPanel' @@ -104,6 +108,8 @@ import { SettingsSidebar } from './components/SettingsSidebar' import { SubThreadCreator } from './components/SubThreadCreator' import { FirstLaunchSheet } from './components/FirstLaunchSheet' import { BugReportSheet, type BugReportSubmission } from './components/BugReportSheet' +import { ChangelogSheet } from './components/ChangelogSheet' +import { UpdatePill } from './components/UpdatePill' import { WorkSessionSetupSheet, type WorkSessionSetupConfirmInput @@ -124,6 +130,7 @@ import { FolderSymbolIcon, GhostCompanionIcon, LinkCircleSymbolIcon, + MascotGhost, ModelSymbolIcon, OverviewSymbolIcon, PermissionSymbolIcon, @@ -175,7 +182,10 @@ import { rebindWelcomeEnsembleChatToGlobal, rebindWelcomeEnsembleChatToWorkspace } from './lib/ensembleWelcomeWorkspace' -import { withSessionActivityLedger } from './lib/sessionActivityLedger' +import { + deriveParticipantRenameContinuity, + withSessionActivityLedger +} from './lib/sessionActivityLedger' // EnsembleSetupSheet retired in 1.0.3 — the bottom-pinned modal had a // z-index race with the picker popovers and the form felt foreign. All // per-participant config now lives inline in the composer above-row @@ -291,9 +301,11 @@ import { } from './lib/usageStats' import { buildEnsembleRoundSummaryRows, + buildEscalationChips, buildRunCompleteSummaryRows, formatWorkDuration } from './lib/runCompleteSummary' +import { fetchProviderRates, type RendererProviderRates } from './lib/providerRateEstimate' import { getMemoryPreviewText, mergeCommandPaletteItems, @@ -878,6 +890,7 @@ function WelcomeUsageDashboard({ replaces the stat grid / chart inside. */} {!data.hasActivity ? (
+ No activity in the last 30 days. Kick off a run on this workspace to start filling the dashboard.
@@ -2707,6 +2720,20 @@ type TranscriptPanelProps = { * is disengaged. A stable ref, so it never perturbs the memo. */ autoFollowRef?: React.MutableRefObject + /** + * 1.0.7 — display currency + conservative-overestimate bias (Settings → + * General), threaded in so the ensemble run-complete card's Cost row routes + * through `formatCost`. Defaults to USD / 0 when omitted. + */ + currency?: DisplayCurrency + currencyOverestimatePercent?: number + /** + * 1.0.7 — per-provider rate table (USD per 1M tokens) from the + * `providerRates:get` IPC. Used ONLY to project a clearly-badged + * API-equivalent cost for subscription/credit seats that emit no + * `cost_usd` (Codex / Grok / Cursor). Absent → no estimate. + */ + providerRates?: RendererProviderRates } /** @@ -4148,7 +4175,10 @@ export const TranscriptPanel = memo( copiedId, copy, virtualize, - autoFollowRef + autoFollowRef, + currency, + currencyOverestimatePercent, + providerRates }: TranscriptPanelProps) { const visibleMessages = useMemo(() => { const source = isWelcomeChat ? EMPTY_CHAT_MESSAGES : messages @@ -4172,10 +4202,25 @@ export const TranscriptPanel = memo( // last speaker's), round-envelope duration, and summed tokens. // Solo chats: the original single-run summary. if (currentChat?.chatKind === 'ensemble' && currentChat.ensemble?.activeRound) { - return buildEnsembleRoundSummaryRows(currentChat, runCompleteNotice?.exitCode !== 0) + return buildEnsembleRoundSummaryRows(currentChat, runCompleteNotice?.exitCode !== 0, { + currency, + overestimatePercent: currencyOverestimatePercent, + providerRates + }) } return buildRunCompleteSummaryRows(currentRun) - }, [currentChat, currentRun, runCompleteNotice?.exitCode]) + }, [ + currentChat, + currentRun, + runCompleteNotice?.exitCode, + currency, + currencyOverestimatePercent, + providerRates + ]) + // 1.0.7 (M5 surfacing) — advisory chips for the dark-shipped escalation + // signals on the current round. Read-only: the orchestrator persists + // these; we just surface label + recommended action. + const escalationChips = useMemo(() => buildEscalationChips(currentChat), [currentChat]) const runBoundaryByMessageId = useMemo(() => { const runs = currentChat?.runs || [] const runById = new Map() @@ -4404,6 +4449,19 @@ export const TranscriptPanel = memo( currentProviderLabel, currentProvider ) + // 1.0.7 — participant-rename continuity. The + // header keeps the FROZEN role label; this quiet + // badge tells the reader the seat has since been + // renamed (e.g. "Planner" here is the seat now + // called "Architect") so they can follow one + // participant across a mid-session rename. Ledger- + // preferred, with a frozen-vs-current fallback — + // see deriveParticipantRenameContinuity. + const renameContinuity = deriveParticipantRenameContinuity( + msg, + currentChat?.ensemble?.participants, + currentChat?.ensemble?.sessionActivityLedger + ) return (
{label} @@ -4416,6 +4474,15 @@ export const TranscriptPanel = memo( {modelBadge} )} + {renameContinuity && ( + + renamed from {renameContinuity.fromRole} + + )}
) } @@ -4721,6 +4788,22 @@ export const TranscriptPanel = memo( )} + {escalationChips.length > 0 && ( +
+ {escalationChips.map((chip) => ( +
+ {chip.label} + {chip.action && ( + {chip.action} + )} +
+ ))} +
+ )}
File changes @@ -4885,6 +4968,10 @@ function App(): React.JSX.Element { // transient "Copied" state consistent across the transcript. const { copiedId, copy } = useCopyFeedback() const [settings, setSettings] = useState(null) + // 1.0.7 — per-provider rate table (USD per 1M tokens) for the ensemble + // run-complete card's projected cost estimate. Hydrated once at mount from + // the `providerRates:get` IPC; empty until then (no estimate shown). + const [providerRates, setProviderRates] = useState({}) const [chatContextTurns, setChatContextTurns] = useState(DEFAULT_CONTEXT_TURNS) const [workspaces, setWorkspaces] = useState([]) const [workspacesHydrated, setWorkspacesHydrated] = useState(false) @@ -5013,7 +5100,7 @@ function App(): React.JSX.Element { ) const [codexSandboxFallback, setCodexSandboxFallback] = useState('ask_rerun') - const [updateChannel, setUpdateChannel] = useState('debug') + const [updateChannel, setUpdateChannel] = useState('stable') const [approvalTimeouts, setApprovalTimeouts] = useState({ enabled: true, perProviderMs: { gemini: 120_000, codex: 30_000, claude: 120_000, kimi: 60_000 }, @@ -5200,6 +5287,25 @@ function App(): React.JSX.Element { cancelled = true } }, []) + /** + * 1.0.7 — one-shot fetch of the per-provider rate table over the existing + * `providerRates:get` IPC. Powers the ensemble run-complete card's projected + * cost ESTIMATE for subscription/credit seats (Codex / Grok / Cursor) that + * report no `cost_usd`. The rates are USD per 1M tokens and barely change + * during a session, so one read at mount is enough; on failure we leave the + * map empty and simply render no estimate. + */ + useEffect(() => { + let cancelled = false + void (async () => { + const rates = await fetchProviderRates() + if (cancelled) return + setProviderRates(rates) + })() + return () => { + cancelled = true + } + }, []) /** * Inline bug-report sheet. The tester opens this from the * "!" button next to the onboarding `?` button, describes whatever @@ -5208,6 +5314,24 @@ function App(): React.JSX.Element { * state — the sheet * resets every open. */ const [showBugReportSheet, setShowBugReportSheet] = useState(false) + const updateStatus = useUpdateStatus() + const [showChangelogSheet, setShowChangelogSheet] = useState(false) + const [changelogSnapshot, setChangelogSnapshot] = useState(null) + const autoChangelogOpenedRef = useRef(false) + const refreshChangelogSnapshot = + useCallback(async (): Promise => { + try { + const next = await window.api.changelogSnapshot() + setChangelogSnapshot(next) + return next + } catch { + return null + } + }, []) + const handleOpenChangelogSheet = useCallback(() => { + setShowChangelogSheet(true) + void refreshChangelogSnapshot() + }, [refreshChangelogSnapshot]) /** 1.0.4-AK2 — Work Session setup sheet open/closed state. * Opened by the composer's "Work Session" button (alongside * Turn/Continuous). On confirm, persists the WorkSessionConfig @@ -5241,6 +5365,27 @@ function App(): React.JSX.Element { cancelled = true } }, []) + useEffect(() => { + void refreshChangelogSnapshot() + }, [refreshChangelogSnapshot]) + useEffect(() => { + if (autoChangelogOpenedRef.current) return + if (!changelogSnapshot || appVersion === 'unknown') return + if (changelogSnapshot.pendingUpdateChangelog?.version !== appVersion) return + if (changelogSnapshot.lastSeenChangelogVersion === appVersion) return + autoChangelogOpenedRef.current = true + setShowChangelogSheet(true) + }, [appVersion, changelogSnapshot]) + const handleDismissChangelogSheet = useCallback(() => { + setShowChangelogSheet(false) + const pendingVersion = changelogSnapshot?.pendingUpdateChangelog?.version + if (!pendingVersion || pendingVersion !== appVersion) return + if (changelogSnapshot?.lastSeenChangelogVersion === appVersion) return + void window.api + .markChangelogSeen(appVersion) + .then((next) => setChangelogSnapshot(next)) + .catch(() => {}) + }, [appVersion, changelogSnapshot]) const handleSubmitBugReport = useCallback( async (submission: BugReportSubmission): Promise => { const api = window.api as typeof window.api & { @@ -5337,6 +5482,13 @@ function App(): React.JSX.Element { const [pendingApprovalQueueByChatId, setPendingApprovalQueueByChatId] = useState< Record >({}) + // Order-4 — optional one-line "why" note the user can attach to an + // approval decision. Rides the existing approval-ledger metadata + // channel as `{ intentNote }` (no schema migration). At most one + // approval card is on screen at a time (the queue head), so a single + // string suffices; it's cleared whenever an approval resolves so the + // next queued request starts blank. + const [intentNote, setIntentNote] = useState('') const [isSendConfirming, setIsSendConfirming] = useState(false) // 1.0.6-EW66-1d — Create-PR state is now keyed by workspace PATH // so the primary workspace and each WRITE-access additional @@ -5354,6 +5506,20 @@ function App(): React.JSX.Element { const setCreatePrStateFor = (path: string, next: CreatePrState): void => setCreatePrStateByPath((prev) => ({ ...prev, [path]: next })) const [diffActionMenuOpen, setDiffActionMenuOpen] = useState(false) + // Live git snapshot lifted out of the GitCommitControls menu so the + // above-bar header can surface real repo state (branch, changed-file + // count, staged/unstaged, push/PR readiness) sourced from + // `gitSnapshot` rather than the stale `currentWorkspace.branch` / + // tool-derived diff counts. Null until the menu opens (lazy fetch) or + // the repo read fails. + const [primaryGitSnapshot, setPrimaryGitSnapshot] = useState(null) + // Drop the lifted snapshot whenever the workspace changes so a stale + // repo's branch/counts never bleed into a freshly-switched workspace + // before its menu is reopened. + const currentWorkspacePath = currentWorkspace?.path + useEffect(() => { + setPrimaryGitSnapshot(null) + }, [currentWorkspacePath]) const [isComposerDragOver, setIsComposerDragOver] = useState(false) type AttachedWindowSnapshot = { handleID: string @@ -6750,7 +6916,7 @@ function App(): React.JSX.Element { setGeminiMcpBridgeEnabledState(Boolean(s.geminiMcpBridgeEnabled)) setGeminiMcpBridgeStatus(s.geminiMcpBridgeLastStatus || null) setCodexSandboxFallback(s.codexSandboxFallback || 'ask_rerun') - setUpdateChannel(s.updateChannel || 'debug') + setUpdateChannel(s.updateChannel || 'stable') if (s.approvalTimeouts) { setApprovalTimeouts(s.approvalTimeouts) } @@ -8073,12 +8239,12 @@ function App(): React.JSX.Element { } } - /** - * Keep a ref to the *latest* `refreshUsageSummary` closure so the - * autonomous polling effect (below) doesn't need to depend on `codexStatus` - * and tear the timer down on every status mutation. - */ - + /** + * Keep a ref to the *latest* `refreshUsageSummary` closure so the + * autonomous polling effect (below) doesn't need to depend on `codexStatus` + * and tear the timer down on every status mutation. + */ + refreshUsageSummaryRef.current = refreshUsageSummary const handleSelectWorkspace = async () => { @@ -13399,8 +13565,13 @@ function App(): React.JSX.Element { } const handleAgentApprovalAction = async (requestId: string, action: AgentApprovalAction) => { + // Order-4 — capture the optional intent note (trimmed) at decision + // time and pass it down to the IPC, which stamps it onto the ledger + // row's metadata. Empty stays undefined so we never persist a blank + // note. Always optional — never gates the decision. + const noteForDecision = intentNote.trim() || undefined try { - await window.api.respondAgentApproval(requestId, action) + await window.api.respondAgentApproval(requestId, action, noteForDecision) setRawLogs((prev) => [ ...prev, { @@ -13430,6 +13601,9 @@ function App(): React.JSX.Element { // approval so this distinction didn't matter. const composerChatId = getCurrentComposerStateChatId() setPendingAgentApproval((prev) => (prev?.id === requestId ? null : prev)) + // Order-4 — reset the intent note so the next queued approval + // (or the next request entirely) starts with an empty field. + setIntentNote('') if (composerChatId) { advanceApprovalQueueForChat(composerChatId) } @@ -15654,19 +15828,12 @@ function App(): React.JSX.Element { window.setTimeout(() => setCreatePrStateFor(workspacePath, { status: 'idle' }), 6000) } - const handlePrimeCommitChangesPrompt = () => { - const fileCount = latestRunDiffStats.filesChanged - const diffSummary = - fileCount > 0 - ? `${fileCount} ${fileCount === 1 ? 'file' : 'files'} changed (+${latestRunDiffStats.additions} -${latestRunDiffStats.deletions})` - : 'the current workspace changes' - setPrompt( - `Commit ${diffSummary}. Review the diff first, choose a concise commit message, then run the commit.` - ) - window.requestAnimationFrame(() => { - composerTextareaRef.current?.focus() - }) - } + // Phase Git-U1 — `handlePrimeCommitChangesPrompt` (which injected a + // "Commit N files… review the diff, then run the commit" prompt at the + // agent) has been removed. Committing is now a real user-driven flow + // via GitCommitControls (gitSnapshot → gitStage → gitCommit) in the + // composer above-bar's diff-action menu, so the model is no longer + // asked to drive git on the user's behalf. // Composer-unification (Phase J1): Gemini's standalone /stats, /help, // GEMINI.md, /restore, persistent-session and checkpoints buttons are @@ -16183,6 +16350,16 @@ function App(): React.JSX.Element { > + {/* First-launch onboarding sheet re-opener. The sheet auto-shows on a fresh install and stays available @@ -16220,6 +16397,7 @@ function App(): React.JSX.Element { > ! +
@@ -16497,6 +16675,9 @@ function App(): React.JSX.Element { copiedId={copiedId} copy={copy} autoFollowRef={autoFollowRef} + currency={displayCurrency} + currencyOverestimatePercent={overestimatePercent} + providerRates={providerRates} /> )} @@ -16889,8 +17070,17 @@ function App(): React.JSX.Element { {currentWorkspace.displayName} {' · '} + {/* Phase Git-U1 — prefer the LIVE branch from + gitSnapshot (populated once the diff-action menu + has been opened); fall back to the workspace + record's cached branch before the first read. A + detached snapshot reads "detached HEAD". */} - {currentWorkspace?.branch || 'detached'} + {primaryGitSnapshot + ? primaryGitSnapshot.detached + ? 'detached HEAD' + : primaryGitSnapshot.branch || 'detached' + : currentWorkspace?.branch || 'detached'} @@ -16942,6 +17132,64 @@ function App(): React.JSX.Element { )} + {/* Phase Git-U1 — live git-state pill driven by + gitSnapshot (populated once the diff-action menu has + been opened). Surfaces staged/unstaged state and + push/PR readiness right in the header so the user can + see what the Review/Commit/Create-PR menu will act on + without opening it. Rendered only once a snapshot is + in hand so the header stays stable pre-read. */} + {primaryGitSnapshot && + (() => { + const c = primaryGitSnapshot.counts + const stagedTitle = primaryGitSnapshot.clean + ? 'Working tree clean' + : `${c.changed} changed · ${c.staged} staged · ${c.unstaged} unstaged${ + c.untracked > 0 ? ` · ${c.untracked} new` : '' + }` + const needsPush = + !primaryGitSnapshot.detached && + !!primaryGitSnapshot.branch && + (!primaryGitSnapshot.upstream || primaryGitSnapshot.ahead > 0) + const prReady = + !primaryGitSnapshot.detached && + !!primaryGitSnapshot.branch && + !!primaryGitSnapshot.remoteUrl + return ( + + {primaryGitSnapshot.clean ? ( + clean + ) : c.staged > 0 ? ( + {c.staged} staged + ) : ( + unstaged + )} + {needsPush ? ( + + push + + ) : prReady ? ( + + PR ready + + ) : null} + + ) + })()} {/* Composer-unification (Phase J1): once the chat has activity, External Path + Worktree migrate from the @@ -16966,6 +17214,13 @@ function App(): React.JSX.Element { // 1.0.6-EW66-1d — primary workspace's PR state is now // read from the per-path map keyed by its own path. const primaryPrState = getCreatePrState(currentWorkspace?.path) + // Phase Git-U1 — the trigger button keeps "Review + // changes" as the FIRST/primary action whenever there's + // a diff (the canonical safety entry point); it falls + // back to the PR label otherwise. The menu it opens is + // now the real user-driven GitCommitControls (status + + // review + stage/commit + gated PR) — no more agent + // prompt-injection for committing. const createPrLabel = primaryPrState.status === 'pending' ? 'Creating…' @@ -16987,60 +17242,25 @@ function App(): React.JSX.Element { aria-expanded={diffActionMenuOpen} title={ primaryPrState.message || - 'Choose what to do with the current workspace changes' + 'Review, commit, or open a PR for the current workspace' } > {primaryLabel} {diffActionMenuOpen && (
- - - + prState={primaryPrState} + onSnapshot={setPrimaryGitSnapshot} + />
)} @@ -17239,7 +17459,7 @@ function App(): React.JSX.Element { Schedule + runtime-profile controls remain — those are genuinely actionable. */} - {(scheduleControls || runtimeProfileControl) && ( + {!isCurrentEnsembleChat && (scheduleControls || runtimeProfileControl) && (
{scheduleControls} {runtimeProfileControl} @@ -17309,1606 +17529,1642 @@ function App(): React.JSX.Element {
)} - {(() => { - // Gate the overlay activation: render the highlight - // layer only when the prompt contains at least one - // RESOLVED `@Token`. Without this, the textarea's - // `color: transparent` zeros out the text in shells - // where the overlay's font/padding drifts from the - // textarea (Claude / Codex / Kimi etc. each override - // base padding). the maintainer hit this on the ensemble - // welcome screen — text invisible in Claude shell, - // vertical sync issues in others. - // 1.0.4 — drop the `isCurrentEnsembleChat` precondition. - // `hasResolvedMention` already self-guards on - // `participants.length === 0`, so non-ensemble chats - // are excluded naturally. The extra gate caused a - // regression on the ensemble welcome screen where - // `chatKind === 'ensemble'` evaluated false during - // some welcome-surface render passes — leaving typed - // tags as plain white text instead of bold + - // provider-tinted (the maintainer's "tags not lighting up" - // report). Now: anywhere participants ARE configured - // and a mention resolves, the overlay activates. - const composerHasMention = hasResolvedMention( - prompt, - currentChat?.ensemble?.participants || [] - ) - // 1.0.4 — sync epoch for the overlay's auto-metric - // mirror. Any change in the inputs below can shift - // the textarea's computed font / padding / border, - // so we encode them into a single string the - // overlay watches as a useLayoutEffect dep. The - // ResizeObserver inside the overlay handles every - // size-changing variation that happens between - // these explicit triggers. - const composerOverlaySyncEpoch = `${appearance.composerStyle}|${appearance.themeAppearance}|${isWelcomeChat ? 'welcome' : 'active'}` - return ( -
- {composerHasMention && ( - + {(() => { + // Gate the overlay activation: render the highlight + // layer only when the prompt contains at least one + // RESOLVED `@Token`. Without this, the textarea's + // `color: transparent` zeros out the text in shells + // where the overlay's font/padding drifts from the + // textarea (Claude / Codex / Kimi etc. each override + // base padding). the maintainer hit this on the ensemble + // welcome screen — text invisible in Claude shell, + // vertical sync issues in others. + // 1.0.4 — drop the `isCurrentEnsembleChat` precondition. + // `hasResolvedMention` already self-guards on + // `participants.length === 0`, so non-ensemble chats + // are excluded naturally. The extra gate caused a + // regression on the ensemble welcome screen where + // `chatKind === 'ensemble'` evaluated false during + // some welcome-surface render passes — leaving typed + // tags as plain white text instead of bold + + // provider-tinted (the maintainer's "tags not lighting up" + // report). Now: anywhere participants ARE configured + // and a mention resolves, the overlay activates. + const composerHasMention = hasResolvedMention( + prompt, + currentChat?.ensemble?.participants || [] + ) + // 1.0.4 — sync epoch for the overlay's auto-metric + // mirror. Any change in the inputs below can shift + // the textarea's computed font / padding / border, + // so we encode them into a single string the + // overlay watches as a useLayoutEffect dep. The + // ResizeObserver inside the overlay handles every + // size-changing variation that happens between + // these explicit triggers. + const composerOverlaySyncEpoch = `${appearance.composerStyle}|${appearance.themeAppearance}|${isWelcomeChat ? 'welcome' : 'active'}` + return ( +
+ {composerHasMention && ( + + )} +