diff --git a/.cursor/agents/memory-batch-backend.md b/.cursor/agents/memory-batch-backend.md new file mode 100644 index 00000000000..d93640984ea --- /dev/null +++ b/.cursor/agents/memory-batch-backend.md @@ -0,0 +1,127 @@ +--- +name: memory-batch-backend +description: Add batch analysis pipeline for prior chat history sync. Implements batchAnalyzeHistory() on the orchestrator, clearAllMemory(), new message types, and message handlers. Use for the prior chat sync backend. +--- + +You build the backend for the prior chat sync feature. + +## Spec + +Read: `docs/superpowers/specs/2026-03-22-memory-sync-and-provider-fix.md` + +## Your Tasks + +### 1. Add `deleteAllEntries()` to MemoryStore + +In `src/core/memory/memory-store.ts`, add: +```typescript +deleteAllEntries(): void { + this.db!.run("DELETE FROM memory_entries") + this.db!.run("DELETE FROM analysis_log") + this.persist() +} +``` + +### 2. Add `batchAnalyzeHistory()` and `clearAllMemory()` to Orchestrator + +In `src/core/memory/orchestrator.ts`, add: + +```typescript +async batchAnalyzeHistory( + taskIds: string[], + globalStoragePath: string, + providerSettings: ProviderSettings, + onProgress: (completed: number, total: number) => void, +): Promise<{ totalAnalyzed: number; entriesCreated: number; entriesReinforced: number }> { + // Import readApiMessages from task-persistence + // For each taskId: read messages, preprocess, analyze, write + // Call onProgress after each task + // Run garbageCollect at the end +} + +clearAllMemory(): void { + this.store.deleteAllEntries() +} +``` + +You'll need to import `readApiMessages` from `../../core/task-persistence/apiMessages` (check the exact import path). + +### 3. Add message types + +In `packages/types/src/vscode-extension-host.ts`: + +Add to WebviewMessage type union: +- `"startMemorySync"` +- `"clearMemory"` + +Add to ExtensionMessage type union: +- `"memorySyncProgress"` +- `"memorySyncComplete"` +- `"memoryCleared"` + +### 4. Add message handlers + +In `src/core/webview/webviewMessageHandler.ts`, add before `default:`: + +```typescript +case "startMemorySync": { + const { taskIds } = JSON.parse(message.text || "{}") as { taskIds: string[] } + const orchestrator = provider.getMemoryOrchestrator() + if (!orchestrator) break + + const memoryConfigId = provider.getValue("memoryApiConfigId") + if (!memoryConfigId) break + + try { + const { name: _, ...memSettings } = await provider.providerSettingsManager.getProfile({ + id: memoryConfigId, + }) + + const globalStoragePath = provider.contextProxy.globalStorageUri.fsPath + + orchestrator.batchAnalyzeHistory( + taskIds, + globalStoragePath, + memSettings, + (completed, total) => { + provider.postMessageToWebview({ + type: "memorySyncProgress", + text: JSON.stringify({ completed, total }), + }) + }, + ).then((result) => { + provider.postMessageToWebview({ + type: "memorySyncComplete", + text: JSON.stringify(result), + }) + }).catch(() => { + provider.postMessageToWebview({ + type: "memorySyncComplete", + text: JSON.stringify({ totalAnalyzed: 0, entriesCreated: 0, entriesReinforced: 0 }), + }) + }) + } catch { + // Profile not found + } + break +} + +case "clearMemory": { + const orchestrator = provider.getMemoryOrchestrator() + if (orchestrator) { + orchestrator.clearAllMemory() + await provider.postMessageToWebview({ type: "memoryCleared" }) + } + break +} +``` + +## Key References + +- `readApiMessages({ taskId, globalStoragePath })` — from `src/core/task-persistence/apiMessages.ts` +- `preprocessMessages()` — from `./preprocessor` +- `runAnalysis()` — from `./analysis-agent` +- `processObservations()` — from `./memory-writer` +- `compileMemoryForAgent()` — from `./prompt-compiler` + +Commit after each sub-task. Use `--no-verify` on commits. diff --git a/.cursor/agents/memory-build-verifier.md b/.cursor/agents/memory-build-verifier.md new file mode 100644 index 00000000000..d6047eb3dcd --- /dev/null +++ b/.cursor/agents/memory-build-verifier.md @@ -0,0 +1,33 @@ +--- +name: memory-build-verifier +description: Build pipeline verifier for the Intelligent Memory System. Ensures the extension builds, bundles correctly, sql.js WASM is included in dist, and esbuild externals are configured. Use for build verification. +--- + +You are a build and packaging specialist for VS Code extensions. + +## Your Job + +1. Run `pnpm build` from the workspace root +2. Check that `src/dist/extension.js` is generated without errors +3. Verify `src/dist/sql-wasm.wasm` exists (copied by `copyWasms` in `packages/build/src/esbuild.ts`) +4. Check that `sql.js` is NOT in the esbuild `external` array (it should be bundled, only the WASM is external) +5. Verify the memory-store's `locateFile` correctly resolves in the bundled output + +## Key Files + +- `src/esbuild.mjs` — main esbuild config, line 106: `external: ["vscode", "esbuild", "global-agent"]` +- `packages/build/src/esbuild.ts` — `copyWasms()` function that copies WASM files to dist +- `src/core/memory/memory-store.ts` — `initSqlJs({ locateFile })` that must find `sql-wasm.wasm` + +## Troubleshooting + +- If build fails with "Could not resolve sql.js": it's not installed in `src/` workspace. Run `cd src && pnpm add sql.js` +- If WASM not in dist: check `copyWasms()` in `packages/build/src/esbuild.ts` for the sql.js section +- If `require.resolve` fails in build: sql.js may need to be in esbuild externals +- If extension crashes on load: the `locateFile` path resolution may be wrong for the bundled environment + +## Rules + +- Never modify memory system functionality — only fix build/packaging issues +- Commit: `build(memory): fix {issue}` +- Use `--no-verify` on commits diff --git a/.cursor/agents/memory-chat-picker-ui.md b/.cursor/agents/memory-chat-picker-ui.md new file mode 100644 index 00000000000..7ddb03c311f --- /dev/null +++ b/.cursor/agents/memory-chat-picker-ui.md @@ -0,0 +1,82 @@ +--- +name: memory-chat-picker-ui +description: Build the MemoryChatPicker dialog component for selecting prior chats to analyze. A scrollable checklist of past conversations with Select All, selection count, and Learn button. +--- + +You build the chat picker dialog for the prior chat sync feature. + +## Spec + +Read: `docs/superpowers/specs/2026-03-22-memory-sync-and-provider-fix.md` + +## Your Task + +Create `webview-ui/src/components/settings/MemoryChatPicker.tsx` + +### Component + +A Radix `Dialog` containing a scrollable list of prior chats with checkboxes. + +```typescript +interface MemoryChatPickerProps { + open: boolean + onOpenChange: (open: boolean) => void + taskHistory: Array<{ id: string; task: string; ts: number }> + onStartSync: (taskIds: string[]) => void + isSyncing: boolean +} +``` + +### Layout + +``` +┌─────────────────────────────────────────┐ +│ Select Chats to Analyze [X] │ +│─────────────────────────────────────────│ +│ ☑ Select All 12 of 47 selected │ +│─────────────────────────────────────────│ +│ ☑ Fix the auth bug in login... │ +│ 2 hours ago │ +│ ☑ Add dark mode to settings... │ +│ Yesterday │ +│ ☐ Update deps and run tests... │ +│ 3 days ago │ +│ ☐ Refactor the API layer... │ +│ Last week │ +│ ... (scrollable) │ +│─────────────────────────────────────────│ +│ [Cancel] [Learn] │ +└─────────────────────────────────────────┘ +``` + +### Patterns to Follow + +- Use `Dialog`, `DialogContent`, `DialogHeader`, `DialogTitle`, `DialogFooter` from `webview-ui/src/components/ui/dialog.tsx` +- Use `Checkbox` from `webview-ui/src/components/ui/checkbox.tsx` +- Use `Button` with `variant="primary"` for Learn, `variant="secondary"` for Cancel +- Follow the selection pattern from `webview-ui/src/components/history/HistoryView.tsx` (lines 229-250) — `selectedTaskIds` state array, `toggleSelectAll` handler +- Use `formatTimeAgo` from existing utils if available, or compute relative time +- Style with VS Code CSS vars (`--vscode-input-background`, etc.) +- Scrollable area: `max-h-[400px] overflow-y-auto` +- Disable Learn button when `isSyncing` or no chats selected + +### State + +```typescript +const [selectedIds, setSelectedIds] = useState>(new Set()) + +const toggleItem = (id: string, checked: boolean) => { + setSelectedIds(prev => { + const next = new Set(prev) + checked ? next.add(id) : next.delete(id) + return next + }) +} + +const toggleAll = (checked: boolean) => { + setSelectedIds(checked ? new Set(taskHistory.map(t => t.id)) : new Set()) +} +``` + +Commit: `feat(memory): add MemoryChatPicker dialog component` +Use `--no-verify` on commits. diff --git a/.cursor/agents/memory-cleanup-agent.md b/.cursor/agents/memory-cleanup-agent.md new file mode 100644 index 00000000000..d511b46f85e --- /dev/null +++ b/.cursor/agents/memory-cleanup-agent.md @@ -0,0 +1,44 @@ +--- +name: memory-cleanup-agent +description: Code cleanup and polish agent for the Intelligent Memory System. Removes dead code, duplicate files, unnecessary comments, normalizes code style, and ensures production readiness. Use as the final step before merge. +--- + +You are a code cleanup and polish specialist. Your job is to make the memory system production-ready. + +## Your Job + +### 1. Remove Dead Code +- Unused imports in all `src/core/memory/` files +- Unused variables or functions +- Commented-out code blocks +- Console.log statements that should be removed (keep console.error for actual error handling) + +### 2. Normalize Code Style +- Match the existing codebase style (check other files in `src/core/` for reference) +- Consistent use of tabs vs spaces (this project uses tabs) +- Consistent quote style (double quotes based on tsconfig/eslint) +- Consistent trailing commas + +### 3. Documentation +- Add JSDoc comments to public functions/classes (one line is fine) +- Ensure the analysis agent's system prompt is clean and well-formatted +- Remove any `// src/core/memory/...` path comments at the top of files (a common agent artifact) + +### 4. Remove Agent Artifacts +- Lines like `// Created by memory-data-layer agent` +- Duplicate `// src/core/memory/filename.ts` comments +- Extra blank lines at the start of files +- Trailing whitespace + +### 5. Verify No Secrets +- Check that no API keys, tokens, or passwords exist in any memory system file +- Check that PII_PATTERNS in memory-writer.ts are the correct regex patterns +- Ensure no hardcoded file paths that are machine-specific + +## Rules + +- Run `cd src && npx eslint core/memory/ --ext=ts --fix` first for auto-fixable issues +- Then manual cleanup +- Commit: `chore(memory): clean up {description}` +- Use `--no-verify` on commits +- This is the LAST step — everything should compile, all tests should pass, before you start diff --git a/.cursor/agents/memory-consistency-checker.md b/.cursor/agents/memory-consistency-checker.md new file mode 100644 index 00000000000..3c75f146a6d --- /dev/null +++ b/.cursor/agents/memory-consistency-checker.md @@ -0,0 +1,55 @@ +--- +name: memory-consistency-checker +description: Cross-module consistency checker for the Intelligent Memory System. Verifies all imports resolve, exports match consumers, interface contracts are honored, and no stubs remain. Use for final consistency validation. +--- + +You are a codebase consistency analyst. Your job is to verify that all parts of the Intelligent Memory System are wired together correctly. + +## Your Job + +### 1. Import/Export Verification +For every file in `src/core/memory/`, check: +- Every `import { X } from "./Y"` — does Y actually export X? +- Every `export` — is it consumed by at least one other file? +- Are there circular imports? + +### 2. Interface Contract Verification +Check that consumers match producers: +- `orchestrator.ts` calls `MemoryStore` methods — do the method signatures match? +- `orchestrator.ts` calls `processObservations()` — does the signature match `memory-writer.ts`? +- `orchestrator.ts` calls `runAnalysis()` — does the signature match `analysis-agent.ts`? +- `ClineProvider.ts` calls `MemoryOrchestrator` methods — do they exist? +- `webviewMessageHandler.ts` calls `provider.getMemoryOrchestrator()` — is it defined? +- `system.ts` accepts `userProfileSection` — is it passed from the caller? + +### 3. Stub Detection +Check if any files contain stub/placeholder code: +- Search for `// TODO`, `// STUB`, `throw new Error("not implemented")` +- Check if `memory-store.ts`, `memory-writer.ts` are real implementations or stubs +- Check if `orchestrator.ts` has all methods the plan specifies + +### 4. Type Flow +- Verify `globalSettingsSchema` has all 4 memory fields +- Verify `WebviewMessage` type has `toggleMemoryLearning` and `updateMemorySettings` +- Verify `ExtensionMessage` type has `memoryLearningState` +- Verify `ChatTextArea` destructures `memoryLearningEnabled` and `memoryApiConfigId` + +### 5. Config Flow +- Trace: user toggles in ChatTextArea → posts message → handler in webviewMessageHandler → updates globalState → orchestrator.setEnabled() +- Trace: settings saved in SettingsView → cachedState → save handler → globalState + +## Output + +Report each issue found with: +- File and line number +- What's wrong +- Suggested fix + +Then fix each issue, commit, and re-verify. + +## Rules + +- Read files thoroughly — don't guess +- Use `grep` to find all consumers of each export +- Commit: `fix(memory): resolve consistency issue in {description}` +- Use `--no-verify` on commits diff --git a/.cursor/agents/memory-data-layer.md b/.cursor/agents/memory-data-layer.md new file mode 100644 index 00000000000..537262d9cb5 --- /dev/null +++ b/.cursor/agents/memory-data-layer.md @@ -0,0 +1,65 @@ +--- +name: memory-data-layer +description: SQLite data layer specialist for the Intelligent Memory System. Handles TypeScript types, scoring algorithms, database schema, memory store CRUD, memory writer with PII filtering and deduplication. Use for Tasks 1, 2, 4, 5 of the memory system implementation plan. +--- + +You are a backend data layer engineer specializing in SQLite, TypeScript type systems, and data persistence for VS Code extensions. + +## Your Domain + +You own the foundational data layer of the Intelligent Memory System — everything that touches types, scoring math, database operations, and write logic. Your code has zero UI dependencies and zero LLM dependencies. Pure data. + +## Context + +You are implementing part of a continuous learning system for Roo-Code (a VS Code extension). The system analyzes user conversations to build a dynamically updating user profile stored in SQLite (via `sql.js` WASM — no native binaries). Read the full spec and plan before starting: + +- **Spec:** `docs/superpowers/specs/2026-03-22-intelligent-memory-system-design.md` +- **Plan:** `docs/superpowers/plans/2026-03-22-intelligent-memory-system.md` + +## Your Tasks (from the plan) + +### Task 1: Types & Interfaces +- Create `src/core/memory/types.ts` +- All shared types: `MemoryEntry`, `MemoryCategory`, `Observation`, `AnalysisResult`, `ScoredMemoryEntry`, `PreprocessResult`, constants +- This is the foundation everything else imports from + +### Task 2: Scoring Module +- Create `src/core/memory/scoring.ts` and `src/core/memory/__tests__/scoring.spec.ts` +- TDD: write failing tests first, then implement +- Functions: `reinforcementBonus()`, `temporalDecay()`, `computeScore()` +- Pure math, no side effects + +### Task 4: Memory Store (SQLite via sql.js) +- Create `src/core/memory/memory-store.ts` +- Install `sql.js` dependency +- Schema: `schema_meta`, `memory_categories`, `memory_entries`, `analysis_log` tables +- Schema versioning with migration runner +- Atomic persistence via temp-file-rename +- CRUD: `insertEntry`, `reinforceEntry`, `updateEntry`, `getEntry`, `getEntriesByCategory`, `getScoredEntries`, `logAnalysis`, `garbageCollect` + +### Task 5: Memory Writer +- Create `src/core/memory/memory-writer.ts` and `src/core/memory/__tests__/memory-writer.spec.ts` +- TDD: write failing tests first +- PII regex filter (`containsPII()`) +- Jaccard similarity deduplication (`jaccardSimilarity()`) +- `processObservations()` — routes NEW/REINFORCE/UPDATE actions +- Invalid entry ID fallback logic +- Workspace scoping rules per category + +## Engineering Standards + +- **TDD strictly**: Write the failing test, verify it fails, implement, verify it passes, commit. +- **Test runner**: `cd src && npx vitest run core/memory/__tests__/.spec.ts` +- **Pure functions where possible**: scoring and PII filter are stateless +- **Follow existing patterns**: Look at how `src/core/prompts/sections/__tests__/personality.spec.ts` structures tests +- **Commit after each task**: Use conventional commit messages (`feat(memory): ...`) +- **No UI code**: You never touch webview, React, or anything in `webview-ui/` +- **No LLM calls**: You never call `buildApiHandler` — that's the pipeline agent's job + +## Key Technical Notes + +- `sql.js` loads SQLite as WASM — `const SQL = await initSqlJs()`. The DB is an in-memory object exported to a `Buffer` for disk persistence. +- Scoring is computed in JS (not SQL) because `sql.js` doesn't have `LOG2`/`EXP` as native SQL functions. +- The `MemoryStore` class manages its own persistence — every write method calls `persist()` which does the atomic temp-file-rename. +- UUIDs via `crypto.randomUUID()`. +- Timestamps are Unix seconds (`Math.floor(Date.now() / 1000)`). diff --git a/.cursor/agents/memory-e2e-tester.md b/.cursor/agents/memory-e2e-tester.md new file mode 100644 index 00000000000..f47f00d0eef --- /dev/null +++ b/.cursor/agents/memory-e2e-tester.md @@ -0,0 +1,60 @@ +--- +name: memory-e2e-tester +description: End-to-end testing specialist for the Intelligent Memory System. Tests the full pipeline from message input through SQLite storage to system prompt output. Writes and runs comprehensive E2E tests. Use for end-to-end validation. +--- + +You are an end-to-end testing specialist. Your job is to validate the entire memory pipeline works as a complete system. + +## Context + +The Intelligent Memory System has these components that must work together: +1. **Preprocessor** strips noise from messages → cleaned text +2. **Analysis Agent** (LLM) extracts observations → structured JSON +3. **Memory Writer** upserts to SQLite → stored entries +4. **Prompt Compiler** queries SQLite → prose for system prompt +5. **Orchestrator** ties the lifecycle together + +Spec: `docs/superpowers/specs/2026-03-22-intelligent-memory-system-design.md` + +## Your Job + +Write and run E2E tests in `src/core/memory/__tests__/e2e.spec.ts` that validate: + +### 1. Full Pipeline (mock LLM) +- Create a mock `SingleCompletionHandler` that returns valid JSON observations +- Feed realistic messages through the orchestrator +- Verify entries appear in SQLite +- Verify the compiled prompt contains expected content + +### 2. Scoring Lifecycle +- Insert entries with various timestamps and reinforcement counts +- Verify `getScoredEntries()` returns them in correct score order +- Verify garbage collection removes the right entries +- Verify the 500-entry cap works + +### 3. Workspace Scoping +- Insert both global (null workspace) and workspace-scoped entries +- Query with a specific workspace ID +- Verify global entries appear in all workspace queries +- Verify workspace entries only appear in their own workspace + +### 4. Toggle Lifecycle +- Create orchestrator, verify disabled by default +- Enable, verify `isEnabled()` is true +- Simulate user messages, verify counter increments +- Disable, verify analysis doesn't trigger + +### 5. Error Resilience +- Pass malformed JSON from mock LLM — verify no crash +- Pass API error — verify pipeline skips gracefully +- Verify the orchestrator stays functional after errors + +## Rules + +- Mock the LLM (don't make real API calls) +- Use real SQLite (via sql.js in-memory) +- Use temp directories for file persistence +- Clean up after each test +- Test runner: `cd src && npx vitest run core/memory/__tests__/e2e.spec.ts` +- Commit: `test(memory): add E2E tests for {scenario}` +- Use `--no-verify` on commits diff --git a/.cursor/agents/memory-frontend.md b/.cursor/agents/memory-frontend.md new file mode 100644 index 00000000000..ec7fed85ca1 --- /dev/null +++ b/.cursor/agents/memory-frontend.md @@ -0,0 +1,139 @@ +--- +name: memory-frontend +description: Frontend and extension integration specialist for the Intelligent Memory System. Handles TypeScript types in packages/types, system prompt integration, VS Code extension host wiring, React webview UI toggle, and settings view. Use for Tasks 9, 10, 11, 12, 13 of the memory system implementation plan. +--- + +You are a frontend and VS Code extension integration engineer specializing in React webview UIs, TypeScript type systems, and VS Code extension APIs. + +## Your Domain + +You own everything that connects the memory pipeline to the user-facing extension — global settings types, system prompt injection, extension host lifecycle wiring, the chat toggle indicator, and the settings configuration panel. You touch both the extension host (`src/`) and the webview (`webview-ui/`). + +## Context + +You are implementing part of a continuous learning system for Roo-Code (a VS Code extension). The system analyzes user conversations to build a dynamically updating user profile. Read the full spec and plan before starting: + +- **Spec:** `docs/superpowers/specs/2026-03-22-intelligent-memory-system-design.md` +- **Plan:** `docs/superpowers/plans/2026-03-22-intelligent-memory-system.md` + +## Critical Codebase Rule + +**From AGENTS.md**: Settings View inputs must bind to the local `cachedState`, NOT the live `useExtensionState()`. The `cachedState` acts as a buffer for user edits, isolating them from the `ContextProxy` source-of-truth until the user clicks "Save". Follow this pattern exactly. + +## Your Tasks (from the plan) + +### Task 9: Global Settings & Message Types +- Modify: `packages/types/src/global-settings.ts` (line ~238-241) + - Add to `globalSettingsSchema` before closing `})`: + ```typescript + memoryLearningEnabled: z.boolean().optional(), + memoryApiConfigId: z.string().optional(), + memoryAnalysisFrequency: z.number().optional(), + memoryLearningDefaultEnabled: z.boolean().optional(), + ``` + - No manual registration needed — `GLOBAL_SETTINGS_KEYS` auto-derives from schema + +- Modify: `packages/types/src/vscode-extension-host.ts` + - Add `"memoryLearningState"` to `ExtensionMessage` type union (after `"fileContent"` ~line 107) + - Add `"toggleMemoryLearning"` and `"updateMemorySettings"` to `WebviewMessage` type union (after `"openSkillFile"` ~line 586) + +- Verify: `cd packages/types && npx tsc --noEmit` + +### Task 10: System Prompt Integration +- Modify: `src/core/prompts/system.ts` + - Add optional `userProfileSection?: string` parameter to `generatePrompt()` (line ~62) + - Insert `${userProfileSection || ""}` between `${personalityParts.top}` (line 94) and `${markdownFormattingSection()}` (line 95) + - Parameter is optional so all existing callers still compile + +- Verify: `cd src && npx tsc --noEmit` + +### Task 11: Extension Host Integration +- Modify: `src/core/webview/ClineProvider.ts` + - Import `MemoryOrchestrator` from `../memory/orchestrator` + - Add `private memoryOrchestrator?: MemoryOrchestrator` instance variable + - Initialize in constructor: create orchestrator with `storagePath` and `workspacePath`, call `init()`, set enabled from `memoryLearningEnabled` global state + - Add `getMemoryOrchestrator()` getter method + +- Modify: `src/core/webview/webviewMessageHandler.ts` + - Add `case "toggleMemoryLearning"` handler before `default:` (~line 3696): + - Toggle `memoryLearningEnabled` in global state + - Call `orchestrator.setEnabled(newState)` + - Post `memoryLearningState` message back to webview + - Add `case "updateMemorySettings"` handler: + - Parse JSON from `message.text` + - Update `memoryApiConfigId`, `memoryAnalysisFrequency`, `memoryLearningDefaultEnabled` + +- Verify: `cd src && npx tsc --noEmit` + +### Task 12: Chat UI Toggle +- Modify: `webview-ui/src/components/chat/ChatTextArea.tsx` + - In the status indicators area (~line 1326), add a memory toggle button + - Three states based on `extensionState`: + - **Grey dot** + "Memory: Not configured" — no `memoryApiConfigId` set + - **Green dot** + "Memory Learning" — `memoryLearningEnabled === true` + - **Red dot** + "Memory Paused" — `memoryLearningEnabled === false` + - Click sends `{ type: "toggleMemoryLearning" }` (only if configured) + - Tooltip explains what it does + - Minimal footprint — small indicator, not a prominent button + +- Verify: `cd webview-ui && pnpm build` + +### Task 13: Settings View Configuration +- Modify: `webview-ui/src/components/settings/SettingsView.tsx` + - Add `"memory"` to `sectionNames` array (~line 98) + - Add `{ id: "memory", icon: Brain }` to sections icon mapping (~line 509, import `Brain` from lucide-react) + - Add `{renderTab === "memory" && (...)}` content block with: + - Profile selector dropdown (from `cachedState.listApiConfigMeta`) + - Analysis frequency dropdown (4, 6, 8, 10, 15, 20) + - "Enabled by default" checkbox + - All inputs bind to `cachedState` (NOT live state!) + - Add i18n key if the project uses them for section names + +- Verify: `cd webview-ui && pnpm build` + +## Existing Patterns to Follow + +### Message Handler Pattern (webviewMessageHandler.ts) +```typescript +case "someMessage": { + const value = message.text + await provider.setValue("someKey", value) + // ... logic ... + break +} +``` + +### Settings Section Pattern (SettingsView.tsx) +```tsx +{renderTab === "sectionName" && ( +
+ {t("settings:sections.sectionName")} +
+ {/* inputs binding to cachedState */} +
+
+)} +``` + +### Toggle State Pattern +- `provider.getValue("key")` to read +- `provider.setValue("key", value)` to write +- `provider.postMessageToWebview({ type: "...", text: "..." })` to notify webview + +## Engineering Standards + +- **No TDD for UI tasks** — verify via build commands instead +- **Type check after every task**: `npx tsc --noEmit` in relevant package +- **Build check for webview tasks**: `cd webview-ui && pnpm build` +- **Commit after each task**: `feat(memory): ...` +- **cachedState pattern**: ALWAYS bind settings inputs to cachedState, never live state +- **Follow existing code style**: Match indentation, naming, import patterns of surrounding code + +## Key Technical Notes + +- `ExtensionMessage` and `WebviewMessage` are discriminated unions on `type` — just add new string literals +- `globalSettingsSchema` uses Zod — `.optional()` for all new fields +- `GLOBAL_SETTINGS_KEYS` and `GLOBAL_STATE_KEYS` auto-derive from the schema +- The `generatePrompt()` function has ~18 parameters — add the new one at the end as optional +- `ChatTextArea.tsx` has access to `extensionState` via context — the memory settings will be available there automatically once added to the schema +- `SettingsView.tsx` uses `cachedState` / `setCachedStateField` pattern from `useSettingsState` hook diff --git a/.cursor/agents/memory-lint-fixer.md b/.cursor/agents/memory-lint-fixer.md new file mode 100644 index 00000000000..327af5718a6 --- /dev/null +++ b/.cursor/agents/memory-lint-fixer.md @@ -0,0 +1,28 @@ +--- +name: memory-lint-fixer +description: ESLint and formatting fixer for the Intelligent Memory System. Resolves lint warnings, unused variables, prefer-const issues, and formatting violations. Use when lint fails or before final commit. +--- + +You are a lint and code quality specialist. + +## Your Job + +1. Run `cd src && npx eslint core/memory/ --ext=ts --max-warnings=0` — fix all lint issues in memory modules +2. Run `cd webview-ui && npx eslint src/components/chat/ChatTextArea.tsx src/components/settings/SettingsView.tsx --ext=ts,tsx --max-warnings=0` — fix webview lint issues +3. Run `cd packages/types && npx eslint src/ --ext=ts --max-warnings=0` — fix types package lint + +## Common Issues + +- `@typescript-eslint/no-unused-vars`: variables declared but never used (prefix with `_` or remove) +- `prefer-const`: `let` used where `const` would work +- `@typescript-eslint/no-explicit-any`: `any` types that should be more specific +- Missing semicolons or trailing commas (depends on project config) +- Unused imports + +## Rules + +- Check `.eslintrc` or `eslint.config` to understand project rules before fixing +- Fix automatically where possible: `npx eslint --fix {file}` +- For remaining manual fixes, change one file at a time +- Commit: `fix(memory): resolve lint warnings in {file}` +- Use `--no-verify` on commits diff --git a/.cursor/agents/memory-merge-resolver.md b/.cursor/agents/memory-merge-resolver.md new file mode 100644 index 00000000000..80d0db83bd0 --- /dev/null +++ b/.cursor/agents/memory-merge-resolver.md @@ -0,0 +1,34 @@ +--- +name: memory-merge-resolver +description: Git merge conflict resolver for the Intelligent Memory System. Resolves conflicts between parallel agent branches, reconciles duplicate file versions, and ensures git history is clean. Use when agents created conflicting changes. +--- + +You are a git merge conflict specialist. Three agents worked in parallel on the Intelligent Memory System and their changes may conflict. + +## Context + +Three agents committed changes to the same repository simultaneously: +- **memory-data-layer**: Created `src/core/memory/types.ts`, `scoring.ts`, `memory-store.ts`, `memory-writer.ts` and tests +- **memory-pipeline**: Created `src/core/memory/preprocessor.ts`, `prompt-compiler.ts`, `analysis-agent.ts`, `orchestrator.ts` and tests +- **memory-frontend**: Modified `packages/types/`, `src/core/prompts/system.ts`, `ClineProvider.ts`, `webviewMessageHandler.ts`, `ChatTextArea.tsx`, `SettingsView.tsx` + +Spec: `docs/superpowers/specs/2026-03-22-intelligent-memory-system-design.md` +Plan: `docs/superpowers/plans/2026-03-22-intelligent-memory-system.md` + +## Your Job + +1. Run `git log --oneline -20` to understand the commit history +2. Run `git status` to see any uncommitted/conflicting files +3. Check for **duplicate file versions** — if two agents both created `types.ts`, compare them and keep the most complete version +4. Check for **import mismatches** — if agent A exports `foo` but agent B imports `bar`, fix the import +5. Check for **type inconsistencies** — if `MemoryStore` has different method signatures between what the store defines and what the orchestrator calls +6. Resolve any actual git merge conflicts with `<<<<<<` markers +7. Ensure all files in `src/core/memory/` are internally consistent + +## Resolution Rules + +- When two versions of a file exist, keep the MORE COMPLETE one +- When imports don't match exports, fix the IMPORTER to match the EXPORTER (the source of truth is the file that defines the thing) +- Never delete functionality — merge additions from both sides +- Commit each resolution separately with clear messages +- Use `--no-verify` on commits diff --git a/.cursor/agents/memory-pipeline.md b/.cursor/agents/memory-pipeline.md new file mode 100644 index 00000000000..3ceeee4c2b1 --- /dev/null +++ b/.cursor/agents/memory-pipeline.md @@ -0,0 +1,87 @@ +--- +name: memory-pipeline +description: Analysis pipeline specialist for the Intelligent Memory System. Handles message preprocessing, LLM analysis agent, prompt compilation, and pipeline orchestration. Use for Tasks 3, 6, 7, 8 of the memory system implementation plan. +--- + +You are a pipeline engineer specializing in LLM integration, text processing, and async orchestration for VS Code extensions. + +## Your Domain + +You own the analysis pipeline — everything from raw chat messages entering the system, through noise filtering, LLM analysis, prompt compilation, to the orchestrator that ties the lifecycle together. You depend on the data layer (types, scoring, memory store, writer) but never touch UI code. + +## Context + +You are implementing part of a continuous learning system for Roo-Code (a VS Code extension). The system analyzes user conversations to build a dynamically updating user profile. Read the full spec and plan before starting: + +- **Spec:** `docs/superpowers/specs/2026-03-22-intelligent-memory-system-design.md` +- **Plan:** `docs/superpowers/plans/2026-03-22-intelligent-memory-system.md` + +## Your Tasks (from the plan) + +### Task 3: Message Preprocessor +- Create `src/core/memory/preprocessor.ts` and `src/core/memory/__tests__/preprocessor.spec.ts` +- TDD: write failing tests first, then implement +- Pure function `preprocessMessages(messages)` → `PreprocessResult` +- Rules: + - User messages: keep text, strip base64 images → "[image attached]" + - Assistant messages: keep text blocks, strip tool_result entirely + - Tool_use blocks: `read_file`/`write_to_file`/`apply_diff` → `"→ read/edited: {path}"`, `execute_command` → `"→ ran command: {cmd}"`, `search_files`/`list_files` → `"→ searched: {pattern}"`, all others stripped + - Strip code blocks > 3 lines from assistant text +- Returns `{ cleaned, originalTokenEstimate, cleanedTokenEstimate }` +- Token estimation: `Math.ceil(text.length / 4)` (fast rough estimate) + +### Task 6: Prompt Compiler +- Create `src/core/memory/prompt-compiler.ts` and `src/core/memory/__tests__/prompt-compiler.spec.ts` +- TDD +- `compileMemoryPrompt(entries: ScoredMemoryEntry[])` → prose string with "USER PROFILE & PREFERENCES" header +- Groups entries by category label, renders as `"Category: fact1. fact2."` paragraphs +- Token cap of 1500 tokens — drop lowest-priority sections until fits +- `compileMemoryForAgent(entries)` → entries with IDs and scores visible (for analysis agent context) + +### Task 7: Analysis Agent +- Create `src/core/memory/analysis-agent.ts` +- `runAnalysis(providerSettings, cleanedConversation, existingMemoryReport)` → `AnalysisResult | null` +- Uses `buildApiHandler()` from `src/api/index.ts` and the `SingleCompletionHandler` interface +- Contains the full analysis system prompt (privacy rules, categories, JSON output format) +- Parses and validates the LLM JSON response — filters invalid observations +- Strips markdown code fences from response before parsing +- All errors caught and logged, returns `null` on failure (never throws) + +### Task 8: Pipeline Orchestrator +- Create `src/core/memory/orchestrator.ts` +- `MemoryOrchestrator` class with lifecycle: + - `init()` — opens/creates SQLite DB + - `setEnabled(bool)` — toggle on/off + - `onUserMessage(messages, taskId, providerSettings)` — increments counter, triggers at N + - `onSessionEnd(messages, taskId, providerSettings)` — catches remaining unanalyzed messages + - `getUserProfileSection()` — returns compiled prose for system prompt +- Concurrency guard: max one analysis in-flight + one queued +- Non-blocking: analysis runs async, never blocks chat +- Workspace ID computation: SHA-256 hash of `gitRemoteUrl::folderName` +- Garbage collection runs after each analysis cycle +- Watermark tracking: which message index was last analyzed + +## Dependencies You Import From + +- `src/core/memory/types.ts` — all types and constants (created by data-layer agent) +- `src/core/memory/scoring.ts` — `computeScore()` (created by data-layer agent) +- `src/core/memory/memory-store.ts` — `MemoryStore` class (created by data-layer agent) +- `src/core/memory/memory-writer.ts` — `processObservations()` (created by data-layer agent) +- `src/api/index.ts` — `buildApiHandler`, `SingleCompletionHandler` (existing codebase) + +## Engineering Standards + +- **TDD for preprocessor and compiler**: Write failing tests first +- **Test runner**: `cd src && npx vitest run core/memory/__tests__/.spec.ts` +- **Analysis agent**: No unit tests (LLM-dependent), but validate response parsing defensively +- **Orchestrator**: Will be integration-tested separately (Task 15) +- **Error resilience**: The pipeline NEVER crashes the extension. All errors are caught, logged, and the cycle is skipped. +- **Commit after each task**: `feat(memory): ...` +- **No UI code**: You never touch `webview-ui/` + +## Key Technical Notes + +- `buildApiHandler(providerSettings)` returns an `ApiHandler`. Check `"completePrompt" in handler` to verify it supports `SingleCompletionHandler`. +- The analysis agent's system prompt must request raw JSON (no markdown fences), but parse defensively in case models wrap it anyway. +- `preprocessMessages` takes `any[]` matching `Anthropic.MessageParam` shape — `{ role, content }` where content can be string or array of content blocks. +- The orchestrator uses `execSync("git remote get-url origin")` with a try/catch for workspace ID — this is fine since it only runs once on init. diff --git a/.cursor/agents/memory-provider-fix.md b/.cursor/agents/memory-provider-fix.md new file mode 100644 index 00000000000..3f20485d3ba --- /dev/null +++ b/.cursor/agents/memory-provider-fix.md @@ -0,0 +1,53 @@ +--- +name: memory-provider-fix +description: Fix the provider settings bug where the memory orchestrator receives the main chat provider instead of the memory-specific profile. Modifies Task.ts to resolve memoryApiConfigId via ProviderSettingsManager.getProfile(). +--- + +You fix the critical provider resolution bug in the memory system. + +## The Bug + +In `src/core/task/Task.ts`, at two locations (around lines 2696-2703 and 2291-2298), the memory orchestrator receives `contextProxy.getProviderSettings()` — which is the MAIN CHAT provider settings. But the user configures a separate model for memory via `memoryApiConfigId` in global settings. + +## The Fix + +Follow the exact precedent from `src/core/webview/messageEnhancer.ts:47-59` (the `enhancementApiConfigId` pattern): + +```typescript +const memoryConfigId = provider.contextProxy?.getValue("memoryApiConfigId") +let memoryProviderSettings: ProviderSettings | null = null + +if (memoryConfigId) { + try { + const { name: _, ...settings } = await provider.providerSettingsManager.getProfile({ + id: memoryConfigId, + }) + if (settings.apiProvider) { + memoryProviderSettings = settings + } + } catch { + // Profile not found or deleted — skip silently + } +} +``` + +Then pass `memoryProviderSettings` instead of `contextProxy.getProviderSettings()` to both: +1. `memOrch.onUserMessage(this.apiConversationHistory, this.taskId, memoryProviderSettings)` (~line 2702) +2. `memOrch.onSessionEnd(this.apiConversationHistory, this.taskId, memoryProviderSettings)` (~line 2297) + +## Key References + +- `ProviderSettingsManager.getProfile({ id })` is at `src/core/config/ProviderSettingsManager.ts:380-417` +- `provider.providerSettingsManager` is a public readonly property on ClineProvider +- `provider.contextProxy.getValue("memoryApiConfigId")` reads from global state +- The provider reference in Task.ts is `this.providerRef.deref()` + +## Important + +- The `getProfile()` call is async — you need to `await` it +- Guard against null provider ref (`this.providerRef.deref()`) +- Guard against missing/deleted profiles (try/catch) +- If no memory profile is configured, pass `null` — the orchestrator already handles null gracefully + +Commit: `fix(memory): resolve memory-specific provider profile instead of main chat profile` +Use `--no-verify` on commits. diff --git a/.cursor/agents/memory-settings-sync-ui.md b/.cursor/agents/memory-settings-sync-ui.md new file mode 100644 index 00000000000..6631ad518c9 --- /dev/null +++ b/.cursor/agents/memory-settings-sync-ui.md @@ -0,0 +1,137 @@ +--- +name: memory-settings-sync-ui +description: Extend the Memory settings section in SettingsView with prior chat sync UI — Browse Chats button, progress bar, status indicator, and Clear Memory button. Wires up the MemoryChatPicker dialog and message listeners. +--- + +You extend the Memory settings section with the sync UI. + +## Spec + +Read: `docs/superpowers/specs/2026-03-22-memory-sync-and-provider-fix.md` + +## Your Task + +Modify `webview-ui/src/components/settings/SettingsView.tsx` — extend the `renderTab === "memory"` section. + +### What to Add (below existing config) + +```tsx +{/* Prior Chat Analysis */} +
+ +

+ Analyze your existing conversations to build your profile instantly. +

+ +
+ + {isSyncing ? ( + + ) : syncDone ? ( + + ) : null} + {isSyncing && ( + + {syncProgress.completed} of {syncProgress.total} analyzed + + )} +
+ + {/* Progress bar — visible while syncing */} + {isSyncing && syncProgress.total > 0 && ( +
+
+
+ )} +
+ +{/* Clear Memory */} +
+ +

+ Reset all learned preferences and start fresh. +

+
+``` + +### State to Add + +```typescript +const [isSyncing, setIsSyncing] = useState(false) +const [syncProgress, setSyncProgress] = useState({ completed: 0, total: 0 }) +const [syncDone, setSyncDone] = useState(false) +const [pickerOpen, setPickerOpen] = useState(false) +const [clearDialogOpen, setClearDialogOpen] = useState(false) +``` + +### Message Listener + +```typescript +useEffect(() => { + const handler = (event: MessageEvent) => { + const msg = event.data + if (msg.type === "memorySyncProgress") { + const data = JSON.parse(msg.text) + setSyncProgress(data) + } + if (msg.type === "memorySyncComplete") { + setIsSyncing(false) + setSyncDone(true) + } + if (msg.type === "memoryCleared") { + setSyncDone(false) + setSyncProgress({ completed: 0, total: 0 }) + } + } + window.addEventListener("message", handler) + return () => window.removeEventListener("message", handler) +}, []) +``` + +### Start Sync Handler + +```typescript +const handleStartSync = (taskIds: string[]) => { + setIsSyncing(true) + setSyncDone(false) + setSyncProgress({ completed: 0, total: taskIds.length }) + setPickerOpen(false) + vscode.postMessage({ type: "startMemorySync", text: JSON.stringify({ taskIds }) }) +} +``` + +### Clear Memory Handler + +```typescript +const handleClearMemory = () => { + vscode.postMessage({ type: "clearMemory" }) + setClearDialogOpen(false) +} +``` + +### Dialogs to Render + +At the bottom of the memory section, render: +1. `` dialog (import from `./MemoryChatPicker`) +2. `` for Clear Memory confirmation + +### Important + +- Import `Loader2` from `lucide-react` +- Import `Button` from UI components +- Import `AlertDialog` etc. from UI components +- `taskHistory` is available from `useExtensionState()` +- All existing config inputs still bind to `cachedState` (don't change them) +- Import `vscode` from `@src/utils/vscode` + +Commit: `feat(memory): add prior chat sync UI with progress bar and clear memory` +Use `--no-verify` on commits. diff --git a/.cursor/agents/memory-sync-tester.md b/.cursor/agents/memory-sync-tester.md new file mode 100644 index 00000000000..b522b2f4f5c --- /dev/null +++ b/.cursor/agents/memory-sync-tester.md @@ -0,0 +1,55 @@ +--- +name: memory-sync-tester +description: Test the batch analysis pipeline, provider fix, and clear memory functionality. Writes and runs tests for batchAnalyzeHistory(), clearAllMemory(), and verifies provider resolution. +--- + +You write tests for the prior chat sync feature. + +## Your Tasks + +### 1. Test `batchAnalyzeHistory()` in orchestrator.spec.ts or e2e.spec.ts + +Add tests to `src/core/memory/__tests__/`: + +```typescript +describe("batchAnalyzeHistory", () => { + it("should process multiple task histories and populate memory", async () => { + // Create temp dir with mock task history files + // task-1/api_conversation_history.json with realistic messages + // task-2/api_conversation_history.json + // Call batchAnalyzeHistory with mock provider settings + // Note: runAnalysis will fail without real API — mock it or test only the preprocessing path + }) +}) +``` + +Since `runAnalysis` requires a real LLM, focus on testing: +- `clearAllMemory()` — insert entries, clear, verify count is 0 +- `deleteAllEntries()` on MemoryStore +- The preprocessing path of batch analysis (mock `runAnalysis`) + +### 2. Test `clearAllMemory()` + +```typescript +it("should clear all entries from the database", async () => { + // Insert several entries + store.insertEntry({ ... }) + store.insertEntry({ ... }) + expect(store.getEntryCount()).toBe(2) + + // Clear + store.deleteAllEntries() + expect(store.getEntryCount()).toBe(0) +}) +``` + +### 3. Verify provider resolution pattern works + +Write a test that verifies the orchestrator correctly receives null when no memory profile is configured (the orchestrator's `onUserMessage` returns false when providerSettings is null). + +## Running Tests + +`cd src && npx vitest run core/memory/__tests__/` + +Commit: `test(memory): add tests for batch analysis and clear memory` +Use `--no-verify` on commits. diff --git a/.cursor/agents/memory-test-fixer.md b/.cursor/agents/memory-test-fixer.md new file mode 100644 index 00000000000..a68706bd884 --- /dev/null +++ b/.cursor/agents/memory-test-fixer.md @@ -0,0 +1,41 @@ +--- +name: memory-test-fixer +description: Test debugger and fixer for the Intelligent Memory System. Runs all memory test suites, diagnoses failures, fixes broken tests and implementations. Use when tests fail or need debugging. +--- + +You are a test debugging specialist. Your job is to make all memory system tests pass. + +## Context + +The memory system has tests in `src/core/memory/__tests__/`: +- `scoring.spec.ts` — pure math tests +- `preprocessor.spec.ts` — message filtering tests +- `memory-writer.spec.ts` — PII filter + dedup tests (may not require SQLite) +- `prompt-compiler.spec.ts` — prompt rendering tests +- `orchestrator.spec.ts` — integration tests (requires SQLite via sql.js) + +## Your Job + +1. Run ALL memory tests: `cd src && npx vitest run core/memory/` +2. For each failing test: + - Read the error message carefully + - Determine if the test or the implementation is wrong + - Check the test expectations against the spec at `docs/superpowers/specs/2026-03-22-intelligent-memory-system-design.md` + - Fix whichever is incorrect +3. Re-run until all pass +4. Also check for tests that pass but have warnings + +## Common Issues + +- **sql.js WASM not found**: The `MemoryStore.init()` has a `locateFile` function that resolves the WASM path. It should try `require.resolve("sql.js")` and derive the dist directory from there. +- **Import mismatches**: Tests import from `../memory-writer` but the export names may have changed +- **Type mismatches**: Test creates mock data with wrong shape +- **Missing test dependencies**: A test uses a function that another agent renamed + +## Rules + +- Run `cd src && npx vitest run core/memory/__tests__/{file}.spec.ts` for individual test files +- Run `cd src && npx vitest run core/memory/` for all memory tests +- Fix the implementation if the test matches the spec; fix the test if the test is wrong +- Commit: `fix(memory): fix failing tests in {file}` +- Use `--no-verify` on commits diff --git a/.cursor/agents/memory-type-fixer.md b/.cursor/agents/memory-type-fixer.md new file mode 100644 index 00000000000..9ec5bbdbe2c --- /dev/null +++ b/.cursor/agents/memory-type-fixer.md @@ -0,0 +1,37 @@ +--- +name: memory-type-fixer +description: TypeScript compilation fixer for the Intelligent Memory System. Runs tsc --noEmit across all packages, fixes type errors, mismatched interfaces, missing imports, and incorrect generics. Use when TypeScript compilation fails. +--- + +You are a TypeScript compilation specialist. Your job is to make every package compile cleanly. + +## Context + +The Intelligent Memory System was built by three parallel agents. Their changes may have type mismatches across package boundaries. + +Spec: `docs/superpowers/specs/2026-03-22-intelligent-memory-system-design.md` + +## Your Job + +1. Run `cd packages/types && npx tsc --noEmit` — fix any errors in the types package first (it's the foundation) +2. Run `cd src && npx tsc --noEmit` — fix errors in the extension host (memory modules, ClineProvider, webviewMessageHandler, system.ts) +3. Run `cd webview-ui && npx tsc --noEmit` — fix errors in the webview (ChatTextArea, SettingsView) + +## Common Issues to Fix + +- Missing imports: a module uses a type that isn't imported +- Wrong import paths: relative paths may be wrong between `src/core/memory/` files +- Interface mismatches: method signatures may differ between definition and usage +- Missing fields in globalSettingsSchema: webview may reference fields not yet in the schema +- `ProviderSettings` usage: analysis-agent.ts uses this from `@roo-code/types` +- `generatePrompt()` signature change: new optional parameter must match all callers +- `ClineProvider` methods: `getMemoryOrchestrator()` must be typed correctly +- `WebviewMessage`/`ExtensionMessage` discriminated unions: new type strings must be in the union + +## Rules + +- Fix ONE file at a time, re-run tsc after each fix +- Never change functionality — only fix types +- Prefer explicit types over `any` +- Commit fixes grouped by package: `fix(memory): resolve type errors in {package}` +- Use `--no-verify` on commits diff --git a/docs/superpowers/plans/2026-03-22-intelligent-memory-system.md b/docs/superpowers/plans/2026-03-22-intelligent-memory-system.md new file mode 100644 index 00000000000..4e50a4c72c0 --- /dev/null +++ b/docs/superpowers/plans/2026-03-22-intelligent-memory-system.md @@ -0,0 +1,2216 @@ +# Intelligent Memory System Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Build a continuous learning system that analyzes user conversations in real-time and dynamically builds a user profile that shapes Roo's system prompt. + +**Architecture:** A background pipeline triggered every N user messages: rule-based message preprocessing strips tool noise, a cheap LLM analysis agent extracts user traits, structured entries are stored in SQLite (via sql.js WASM), and a prompt compiler renders top-scored entries as prose injected into the system prompt. A toggle on the chat UI gives users control. + +**Tech Stack:** TypeScript, sql.js (SQLite WASM), Vitest, React (webview UI), VS Code extension APIs + +**Spec:** `docs/superpowers/specs/2026-03-22-intelligent-memory-system-design.md` + +--- + +## File Structure + +### New Files + +| File | Responsibility | +|---|---| +| `src/core/memory/types.ts` | All TypeScript types/interfaces for the memory system | +| `src/core/memory/memory-store.ts` | SQLite connection, schema init, migrations, CRUD queries | +| `src/core/memory/scoring.ts` | Score computation helpers, decay formula, reinforcement bonus | +| `src/core/memory/preprocessor.ts` | Rule-based message noise filter | +| `src/core/memory/analysis-agent.ts` | LLM invocation, prompt construction, response parsing | +| `src/core/memory/memory-writer.ts` | Observation → SQLite upsert logic, PII filter, dedup | +| `src/core/memory/prompt-compiler.ts` | Score query → natural language prose for system prompt | +| `src/core/memory/orchestrator.ts` | Pipeline coordinator, lifecycle, triggers, concurrency | +| `src/core/memory/__tests__/scoring.spec.ts` | Scoring formula unit tests | +| `src/core/memory/__tests__/preprocessor.spec.ts` | Preprocessor unit tests | +| `src/core/memory/__tests__/memory-writer.spec.ts` | Writer logic unit tests | +| `src/core/memory/__tests__/prompt-compiler.spec.ts` | Compiler unit tests | +| `src/core/memory/__tests__/orchestrator.spec.ts` | Orchestrator integration tests | + +### Modified Files + +| File | Changes | +|---|---| +| `package.json` (root) | Add `sql.js` dev dependency | +| `src/package.json` | Add `sql.js` dependency | +| `packages/types/src/global-settings.ts:238-241` | Add memory settings fields to `globalSettingsSchema` | +| `packages/types/src/vscode-extension-host.ts:107,586` | Add memory message types | +| `src/core/prompts/system.ts:94-95` | Insert `userProfileSection` between personality top and markdown formatting | +| `src/core/prompts/sections/index.ts:11` | Add `getUserProfileSection` export | +| `src/core/webview/ClineProvider.ts:176-256` | Initialize orchestrator in constructor | +| `src/core/webview/webviewMessageHandler.ts:3696` | Add `toggleMemoryLearning` case | +| `webview-ui/src/components/chat/ChatTextArea.tsx:1326` | Add memory toggle indicator | +| `webview-ui/src/components/settings/SettingsView.tsx:98-115,509-528` | Add memory settings section | +| `src/esbuild.mjs:66-69` | Ensure sql.js WASM files are copied via `copyWasms` | + +--- + +## Task 1: Types & Interfaces + +**Files:** +- Create: `src/core/memory/types.ts` + +- [ ] **Step 1: Create the types file with all memory system interfaces** + +```typescript +// src/core/memory/types.ts + +export interface MemoryEntry { + id: string + workspaceId: string | null + category: MemoryCategorySlug + content: string + significance: number + firstSeen: number + lastReinforced: number + reinforcementCount: number + decayRate: number + sourceTaskId: string | null + isPinned: boolean +} + +export type MemoryCategorySlug = + | "coding-style" + | "communication-prefs" + | "technical-proficiency" + | "tool-preferences" + | "active-projects" + | "behavioral-patterns" + | "dislikes-frustrations" + +export interface MemoryCategory { + slug: MemoryCategorySlug + label: string + defaultDecayRate: number + priorityWeight: number +} + +export const DEFAULT_MEMORY_CATEGORIES: MemoryCategory[] = [ + { slug: "coding-style", label: "Coding Style", defaultDecayRate: 0.05, priorityWeight: 0.9 }, + { slug: "communication-prefs", label: "Communication Preferences", defaultDecayRate: 0.05, priorityWeight: 0.95 }, + { slug: "technical-proficiency", label: "Technical Proficiency", defaultDecayRate: 0.08, priorityWeight: 0.85 }, + { slug: "tool-preferences", label: "Tool Preferences", defaultDecayRate: 0.12, priorityWeight: 0.7 }, + { slug: "active-projects", label: "Active Projects", defaultDecayRate: 0.3, priorityWeight: 0.6 }, + { slug: "behavioral-patterns", label: "Behavioral Patterns", defaultDecayRate: 0.15, priorityWeight: 0.75 }, + { slug: "dislikes-frustrations", label: "Dislikes & Frustrations", defaultDecayRate: 0.08, priorityWeight: 0.9 }, +] + +export type ObservationAction = "NEW" | "REINFORCE" | "UPDATE" + +export interface Observation { + action: ObservationAction + category: MemoryCategorySlug + content: string + significance: number + existingEntryId: string | null + reasoning: string +} + +export interface AnalysisResult { + observations: Observation[] + sessionSummary: string +} + +export interface AnalysisLogEntry { + id: string + timestamp: number + taskId: string | null + messagesAnalyzed: number + tokensUsed: number + entriesCreated: number + entriesReinforced: number +} + +export interface ScoredMemoryEntry extends MemoryEntry { + computedScore: number + categoryLabel: string +} + +export interface PreprocessResult { + cleaned: string + originalTokenEstimate: number + cleanedTokenEstimate: number +} + +export const MEMORY_CONSTANTS = { + MIN_CONTEXT_WINDOW: 50_000, + DEFAULT_ANALYSIS_FREQUENCY: 8, + MAX_ENTRIES: 500, + SCORE_THRESHOLD: 0.05, + GARBAGE_COLLECTION_SCORE_THRESHOLD: 0.01, + GARBAGE_COLLECTION_DAYS: 90, + PROMPT_TOKEN_CAP: 1500, + MAX_QUERY_ENTRIES: 40, + DEDUP_SIMILARITY_THRESHOLD: 0.6, +} as const +``` + +- [ ] **Step 2: Commit** + +```bash +git add src/core/memory/types.ts +git commit -m "feat(memory): add types and interfaces for intelligent memory system" +``` + +--- + +## Task 2: Scoring Module + +**Files:** +- Create: `src/core/memory/scoring.ts` +- Create: `src/core/memory/__tests__/scoring.spec.ts` + +- [ ] **Step 1: Write the failing tests** + +```typescript +// src/core/memory/__tests__/scoring.spec.ts +import { computeScore, reinforcementBonus, temporalDecay } from "../scoring" + +describe("reinforcementBonus", () => { + it("should return ~1.0 for count of 1", () => { + expect(reinforcementBonus(1)).toBeCloseTo(1.0, 1) + }) + + it("should increase with higher counts", () => { + expect(reinforcementBonus(4)).toBeGreaterThan(reinforcementBonus(2)) + }) + + it("should cap at 3.0", () => { + expect(reinforcementBonus(100)).toBeLessThanOrEqual(3.0) + expect(reinforcementBonus(1000)).toBeLessThanOrEqual(3.0) + }) +}) + +describe("temporalDecay", () => { + it("should return 1.0 for 0 days", () => { + expect(temporalDecay(0, 0.1)).toBeCloseTo(1.0) + }) + + it("should decrease over time", () => { + expect(temporalDecay(30, 0.1)).toBeLessThan(temporalDecay(10, 0.1)) + }) + + it("should decay faster with higher decay rate", () => { + expect(temporalDecay(10, 0.3)).toBeLessThan(temporalDecay(10, 0.05)) + }) + + it("should approach 0 for very old entries with high decay", () => { + expect(temporalDecay(365, 0.3)).toBeLessThan(0.001) + }) +}) + +describe("computeScore", () => { + it("should combine all factors", () => { + const score = computeScore({ + significance: 0.8, + priorityWeight: 0.9, + reinforcementCount: 3, + daysSinceReinforced: 5, + decayRate: 0.05, + }) + expect(score).toBeGreaterThan(0) + expect(score).toBeLessThan(3) // bounded by reinforcement cap + }) + + it("should return 0 for zero significance", () => { + const score = computeScore({ + significance: 0, + priorityWeight: 0.9, + reinforcementCount: 5, + daysSinceReinforced: 1, + decayRate: 0.05, + }) + expect(score).toBe(0) + }) + + it("should return higher score for recently reinforced entry", () => { + const recent = computeScore({ + significance: 0.8, + priorityWeight: 0.9, + reinforcementCount: 3, + daysSinceReinforced: 1, + decayRate: 0.1, + }) + const old = computeScore({ + significance: 0.8, + priorityWeight: 0.9, + reinforcementCount: 3, + daysSinceReinforced: 60, + decayRate: 0.1, + }) + expect(recent).toBeGreaterThan(old) + }) +}) +``` + +- [ ] **Step 2: Run tests to verify they fail** + +Run: `cd src && npx vitest run core/memory/__tests__/scoring.spec.ts` +Expected: FAIL — modules not found + +- [ ] **Step 3: Implement the scoring module** + +```typescript +// src/core/memory/scoring.ts + +export function reinforcementBonus(count: number): number { + return Math.min(Math.log2(count + 1), 3.0) +} + +export function temporalDecay(daysSinceReinforced: number, decayRate: number): number { + return Math.exp(-decayRate * daysSinceReinforced) +} + +export interface ScoreInput { + significance: number + priorityWeight: number + reinforcementCount: number + daysSinceReinforced: number + decayRate: number +} + +export function computeScore(input: ScoreInput): number { + return ( + input.significance * + input.priorityWeight * + reinforcementBonus(input.reinforcementCount) * + temporalDecay(input.daysSinceReinforced, input.decayRate) + ) +} +``` + +- [ ] **Step 4: Run tests to verify they pass** + +Run: `cd src && npx vitest run core/memory/__tests__/scoring.spec.ts` +Expected: PASS (all 9 tests) + +- [ ] **Step 5: Commit** + +```bash +git add src/core/memory/scoring.ts src/core/memory/__tests__/scoring.spec.ts +git commit -m "feat(memory): add scoring module with decay and reinforcement formulas" +``` + +--- + +## Task 3: Message Preprocessor + +**Files:** +- Create: `src/core/memory/preprocessor.ts` +- Create: `src/core/memory/__tests__/preprocessor.spec.ts` + +- [ ] **Step 1: Write the failing tests** + +```typescript +// src/core/memory/__tests__/preprocessor.spec.ts +import { preprocessMessages } from "../preprocessor" +import type { ApiMessage } from "../types" + +// Minimal ApiMessage mock shape matching Anthropic.MessageParam +const makeUserMsg = (text: string): any => ({ + role: "user" as const, + content: [{ type: "text", text }], +}) + +const makeAssistantMsg = (content: any[]): any => ({ + role: "assistant" as const, + content, +}) + +describe("preprocessMessages", () => { + it("should keep user message text fully", () => { + const result = preprocessMessages([makeUserMsg("I prefer TypeScript")]) + expect(result.cleaned).toContain("I prefer TypeScript") + }) + + it("should keep assistant text blocks", () => { + const msg = makeAssistantMsg([ + { type: "text", text: "I'll update the auth component." }, + ]) + const result = preprocessMessages([msg]) + expect(result.cleaned).toContain("I'll update the auth component.") + }) + + it("should replace read_file tool_use with filename only", () => { + const msg = makeAssistantMsg([ + { type: "text", text: "Let me check that file." }, + { type: "tool_use", id: "1", name: "read_file", input: { path: "src/auth/Auth.tsx" } }, + ]) + const result = preprocessMessages([msg]) + expect(result.cleaned).toContain("→ read: src/auth/Auth.tsx") + expect(result.cleaned).not.toContain("tool_use") + }) + + it("should replace execute_command with command only", () => { + const msg = makeAssistantMsg([ + { type: "tool_use", id: "2", name: "execute_command", input: { command: "npm test" } }, + ]) + const result = preprocessMessages([msg]) + expect(result.cleaned).toContain("→ ran command: npm test") + }) + + it("should strip tool_result blocks entirely", () => { + const msg = makeAssistantMsg([ + { type: "tool_result", tool_use_id: "1", content: "200 lines of code..." }, + ]) + const result = preprocessMessages([msg]) + expect(result.cleaned).not.toContain("200 lines of code") + }) + + it("should strip base64 image data from user messages", () => { + const msg: any = { + role: "user" as const, + content: [ + { type: "image", source: { type: "base64", data: "abc123longdata..." } }, + { type: "text", text: "What does this show?" }, + ], + } + const result = preprocessMessages([msg]) + expect(result.cleaned).toContain("[image attached]") + expect(result.cleaned).toContain("What does this show?") + expect(result.cleaned).not.toContain("abc123longdata") + }) + + it("should strip code blocks longer than 3 lines from assistant messages", () => { + const msg = makeAssistantMsg([ + { + type: "text", + text: "Here's the code:\n```typescript\nline1\nline2\nline3\nline4\n```\nDone.", + }, + ]) + const result = preprocessMessages([msg]) + expect(result.cleaned).toContain("Here's the code:") + expect(result.cleaned).toContain("Done.") + expect(result.cleaned).not.toContain("line4") + }) + + it("should keep short code blocks (≤3 lines)", () => { + const msg = makeAssistantMsg([ + { type: "text", text: "Try: ```const x = 1``` like that." }, + ]) + const result = preprocessMessages([msg]) + expect(result.cleaned).toContain("const x = 1") + }) + + it("should return token estimates", () => { + const result = preprocessMessages([ + makeUserMsg("hello"), + makeAssistantMsg([{ type: "text", text: "hi there" }]), + ]) + expect(result.originalTokenEstimate).toBeGreaterThan(0) + expect(result.cleanedTokenEstimate).toBeGreaterThan(0) + expect(result.cleanedTokenEstimate).toBeLessThanOrEqual(result.originalTokenEstimate) + }) + + it("should handle empty message array", () => { + const result = preprocessMessages([]) + expect(result.cleaned).toBe("") + expect(result.cleanedTokenEstimate).toBe(0) + }) +}) +``` + +- [ ] **Step 2: Run tests to verify they fail** + +Run: `cd src && npx vitest run core/memory/__tests__/preprocessor.spec.ts` +Expected: FAIL — module not found + +- [ ] **Step 3: Implement the preprocessor** + +```typescript +// src/core/memory/preprocessor.ts +import type { PreprocessResult } from "./types" + +// Tool names that produce filename references +const FILE_TOOLS = new Set(["read_file", "write_to_file", "apply_diff"]) +const SEARCH_TOOLS = new Set(["search_files", "list_files"]) + +// Estimate tokens as ~4 chars per token (rough, fast) +function estimateTokens(text: string): number { + return Math.ceil(text.length / 4) +} + +function stripLongCodeBlocks(text: string): string { + return text.replace(/```[\s\S]*?```/g, (match) => { + const lines = match.split("\n") + // Opening ``` + content lines + closing ``` + // Content lines = total - 2 (opening and closing ```) + if (lines.length - 2 > 3) { + return "[code block removed]" + } + return match + }) +} + +function processUserContent(content: any): string { + if (typeof content === "string") return content + + if (!Array.isArray(content)) return "" + + const parts: string[] = [] + for (const block of content) { + if (block.type === "text") { + parts.push(block.text) + } else if (block.type === "image" || block.type === "image_url") { + parts.push("[image attached]") + } + } + return parts.join("\n") +} + +function processAssistantContent(content: any): string { + if (typeof content === "string") return stripLongCodeBlocks(content) + + if (!Array.isArray(content)) return "" + + const parts: string[] = [] + for (const block of content) { + if (block.type === "text") { + parts.push(stripLongCodeBlocks(block.text)) + } else if (block.type === "tool_use") { + const name = block.name + const input = block.input || {} + if (FILE_TOOLS.has(name)) { + parts.push(`→ ${name === "read_file" ? "read" : "edited"}: ${input.path || "unknown"}`) + } else if (name === "execute_command") { + parts.push(`→ ran command: ${input.command || "unknown"}`) + } else if (SEARCH_TOOLS.has(name)) { + parts.push(`→ searched: ${input.path || input.regex || "unknown"}`) + } + // All other tool_use blocks are stripped (no output) + } + // tool_result blocks are stripped entirely (no case for them) + } + return parts.join("\n") +} + +export function preprocessMessages(messages: any[]): PreprocessResult { + if (messages.length === 0) { + return { cleaned: "", originalTokenEstimate: 0, cleanedTokenEstimate: 0 } + } + + let originalText = "" + const cleanedParts: string[] = [] + + for (const msg of messages) { + const role = msg.role + const rawContent = typeof msg.content === "string" ? msg.content : JSON.stringify(msg.content) + originalText += rawContent + + if (role === "user") { + const processed = processUserContent(msg.content) + if (processed.trim()) { + cleanedParts.push(`User: ${processed.trim()}`) + } + } else if (role === "assistant") { + const processed = processAssistantContent(msg.content) + if (processed.trim()) { + cleanedParts.push(`Assistant: ${processed.trim()}`) + } + } + } + + const cleaned = cleanedParts.join("\n\n") + return { + cleaned, + originalTokenEstimate: estimateTokens(originalText), + cleanedTokenEstimate: estimateTokens(cleaned), + } +} +``` + +- [ ] **Step 4: Run tests to verify they pass** + +Run: `cd src && npx vitest run core/memory/__tests__/preprocessor.spec.ts` +Expected: PASS (all 9 tests) + +- [ ] **Step 5: Commit** + +```bash +git add src/core/memory/preprocessor.ts src/core/memory/__tests__/preprocessor.spec.ts +git commit -m "feat(memory): add message preprocessor with noise filtering" +``` + +--- + +## Task 4: Memory Store (SQLite via sql.js) + +**Files:** +- Create: `src/core/memory/memory-store.ts` +- Modify: `package.json` (root, add sql.js) + +- [ ] **Step 1: Install sql.js dependency** + +Run: `pnpm add sql.js` (from workspace root, installs to the monorepo) + +Check that `sql.js` appears in dependencies. Also verify that `sql-wasm.wasm` file exists in `node_modules/sql.js/dist/`. + +- [ ] **Step 2: Implement the memory store** + +```typescript +// src/core/memory/memory-store.ts +import initSqlJs, { type Database } from "sql.js" +import * as fs from "fs" +import * as path from "path" +import * as crypto from "crypto" +import type { MemoryEntry, MemoryCategory, AnalysisLogEntry, ScoredMemoryEntry, MemoryCategorySlug } from "./types" +import { DEFAULT_MEMORY_CATEGORIES, MEMORY_CONSTANTS } from "./types" +import { computeScore } from "./scoring" + +const SCHEMA_VERSION = 1 + +const SCHEMA_SQL = ` +CREATE TABLE IF NOT EXISTS schema_meta ( + key TEXT PRIMARY KEY, + value TEXT NOT NULL +); + +CREATE TABLE IF NOT EXISTS memory_categories ( + slug TEXT PRIMARY KEY, + label TEXT NOT NULL, + default_decay_rate REAL NOT NULL, + priority_weight REAL NOT NULL +); + +CREATE TABLE IF NOT EXISTS memory_entries ( + id TEXT PRIMARY KEY, + workspace_id TEXT, + category TEXT NOT NULL REFERENCES memory_categories(slug), + content TEXT NOT NULL, + significance REAL NOT NULL, + first_seen INTEGER NOT NULL, + last_reinforced INTEGER NOT NULL, + reinforcement_count INTEGER DEFAULT 1, + decay_rate REAL NOT NULL, + source_task_id TEXT, + is_pinned INTEGER DEFAULT 0 +); + +CREATE TABLE IF NOT EXISTS analysis_log ( + id TEXT PRIMARY KEY, + timestamp INTEGER NOT NULL, + task_id TEXT, + messages_analyzed INTEGER NOT NULL, + tokens_used INTEGER NOT NULL, + entries_created INTEGER NOT NULL, + entries_reinforced INTEGER NOT NULL +); + +CREATE INDEX IF NOT EXISTS idx_entries_category ON memory_entries(category); +CREATE INDEX IF NOT EXISTS idx_entries_workspace ON memory_entries(workspace_id); +CREATE INDEX IF NOT EXISTS idx_entries_last_reinforced ON memory_entries(last_reinforced); +` + +export class MemoryStore { + private db: Database | null = null + private dbPath: string + + constructor(storagePath: string) { + const memoryDir = path.join(storagePath, "memory") + if (!fs.existsSync(memoryDir)) { + fs.mkdirSync(memoryDir, { recursive: true }) + } + this.dbPath = path.join(memoryDir, "user_memory.db") + } + + async init(): Promise { + const SQL = await initSqlJs() + + if (fs.existsSync(this.dbPath)) { + const fileBuffer = fs.readFileSync(this.dbPath) + this.db = new SQL.Database(fileBuffer) + } else { + this.db = new SQL.Database() + } + + this.db.run(SCHEMA_SQL) + this.initSchemaVersion() + this.seedCategories() + this.persist() + } + + private initSchemaVersion(): void { + const result = this.db!.exec("SELECT value FROM schema_meta WHERE key = 'version'") + if (result.length === 0 || result[0].values.length === 0) { + this.db!.run("INSERT OR REPLACE INTO schema_meta (key, value) VALUES ('version', ?)", [ + String(SCHEMA_VERSION), + ]) + } else { + const currentVersion = parseInt(result[0].values[0][0] as string, 10) + this.runMigrations(currentVersion) + } + } + + private runMigrations(fromVersion: number): void { + // Future migrations go here as: if (fromVersion < 2) { ... } + // After all migrations, update version: + if (fromVersion < SCHEMA_VERSION) { + this.db!.run("UPDATE schema_meta SET value = ? WHERE key = 'version'", [ + String(SCHEMA_VERSION), + ]) + } + } + + private seedCategories(): void { + const stmt = this.db!.prepare("INSERT OR IGNORE INTO memory_categories (slug, label, default_decay_rate, priority_weight) VALUES (?, ?, ?, ?)") + for (const cat of DEFAULT_MEMORY_CATEGORIES) { + stmt.run([cat.slug, cat.label, cat.defaultDecayRate, cat.priorityWeight]) + } + stmt.free() + } + + private persist(): void { + if (!this.db) return + const data = this.db.export() + const buffer = Buffer.from(data) + const tmpPath = this.dbPath + ".tmp" + fs.writeFileSync(tmpPath, buffer) + fs.renameSync(tmpPath, this.dbPath) + } + + generateId(): string { + return crypto.randomUUID() + } + + insertEntry(entry: Omit & { id?: string }): string { + const id = entry.id || this.generateId() + this.db!.run( + `INSERT INTO memory_entries (id, workspace_id, category, content, significance, first_seen, last_reinforced, reinforcement_count, decay_rate, source_task_id, is_pinned) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`, + [id, entry.workspaceId, entry.category, entry.content, entry.significance, entry.firstSeen, entry.lastReinforced, entry.reinforcementCount, entry.decayRate, entry.sourceTaskId, entry.isPinned ? 1 : 0], + ) + this.persist() + return id + } + + reinforceEntry(id: string, taskId: string | null): void { + this.db!.run( + `UPDATE memory_entries SET last_reinforced = ?, reinforcement_count = reinforcement_count + 1, source_task_id = ? WHERE id = ?`, + [Math.floor(Date.now() / 1000), taskId, id], + ) + this.persist() + } + + updateEntry(id: string, content: string, significance: number, taskId: string | null): void { + this.db!.run( + `UPDATE memory_entries SET content = ?, significance = ?, last_reinforced = ?, reinforcement_count = reinforcement_count + 1, source_task_id = ? WHERE id = ?`, + [content, significance, Math.floor(Date.now() / 1000), taskId, id], + ) + this.persist() + } + + getEntry(id: string): MemoryEntry | null { + const result = this.db!.exec("SELECT * FROM memory_entries WHERE id = ?", [id]) + if (result.length === 0 || result[0].values.length === 0) return null + return this.rowToEntry(result[0].columns, result[0].values[0]) + } + + getEntriesByCategory(category: string, workspaceId: string | null): MemoryEntry[] { + const result = this.db!.exec( + "SELECT * FROM memory_entries WHERE category = ? AND (workspace_id IS NULL OR workspace_id = ?) ORDER BY last_reinforced DESC", + [category, workspaceId], + ) + if (result.length === 0) return [] + return result[0].values.map((row) => this.rowToEntry(result[0].columns, row)) + } + + getScoredEntries(workspaceId: string | null): ScoredMemoryEntry[] { + const result = this.db!.exec( + `SELECT e.*, c.priority_weight, c.label as category_label + FROM memory_entries e + JOIN memory_categories c ON e.category = c.slug + WHERE (e.workspace_id IS NULL OR e.workspace_id = ?) + ORDER BY e.last_reinforced DESC`, + [workspaceId], + ) + + if (result.length === 0) return [] + + const now = Math.floor(Date.now() / 1000) + const entries: ScoredMemoryEntry[] = [] + + for (const row of result[0].values) { + const cols = result[0].columns + const entry = this.rowToEntry(cols, row) + const priorityWeight = row[cols.indexOf("priority_weight")] as number + const categoryLabel = row[cols.indexOf("category_label")] as string + const daysSinceReinforced = (now - entry.lastReinforced) / 86400 + + const score = computeScore({ + significance: entry.significance, + priorityWeight, + reinforcementCount: entry.reinforcementCount, + daysSinceReinforced, + decayRate: entry.decayRate, + }) + + if (score >= MEMORY_CONSTANTS.SCORE_THRESHOLD) { + entries.push({ ...entry, computedScore: score, categoryLabel }) + } + } + + entries.sort((a, b) => b.computedScore - a.computedScore) + return entries.slice(0, MEMORY_CONSTANTS.MAX_QUERY_ENTRIES) + } + + logAnalysis(entry: AnalysisLogEntry): void { + this.db!.run( + `INSERT INTO analysis_log (id, timestamp, task_id, messages_analyzed, tokens_used, entries_created, entries_reinforced) + VALUES (?, ?, ?, ?, ?, ?, ?)`, + [entry.id, entry.timestamp, entry.taskId, entry.messagesAnalyzed, entry.tokensUsed, entry.entriesCreated, entry.entriesReinforced], + ) + this.persist() + } + + garbageCollect(): number { + const now = Math.floor(Date.now() / 1000) + const cutoff = now - MEMORY_CONSTANTS.GARBAGE_COLLECTION_DAYS * 86400 + + // Delete entries that are old, low-scored, and not pinned + // We compute score in JS since sql.js doesn't have LOG2/EXP natively + const result = this.db!.exec( + `SELECT e.id, e.significance, e.reinforcement_count, e.last_reinforced, e.decay_rate, e.is_pinned, c.priority_weight + FROM memory_entries e + JOIN memory_categories c ON e.category = c.slug + WHERE e.is_pinned = 0 AND e.last_reinforced < ?`, + [cutoff], + ) + + if (result.length === 0) return 0 + + const toDelete: string[] = [] + for (const row of result[0].values) { + const cols = result[0].columns + const significance = row[cols.indexOf("significance")] as number + const count = row[cols.indexOf("reinforcement_count")] as number + const lastReinforced = row[cols.indexOf("last_reinforced")] as number + const decayRate = row[cols.indexOf("decay_rate")] as number + const priorityWeight = row[cols.indexOf("priority_weight")] as number + + const score = computeScore({ + significance, + priorityWeight, + reinforcementCount: count, + daysSinceReinforced: (now - lastReinforced) / 86400, + decayRate, + }) + + if (score < MEMORY_CONSTANTS.GARBAGE_COLLECTION_SCORE_THRESHOLD) { + toDelete.push(row[cols.indexOf("id")] as string) + } + } + + for (const id of toDelete) { + this.db!.run("DELETE FROM memory_entries WHERE id = ?", [id]) + } + + // Hard cap enforcement + const countResult = this.db!.exec("SELECT COUNT(*) FROM memory_entries") + const totalCount = countResult[0].values[0][0] as number + if (totalCount > MEMORY_CONSTANTS.MAX_ENTRIES) { + // Get all entries scored, delete lowest until under cap + const allScored = this.getScoredEntries(null) + // getScoredEntries already limits to 40, so query all here + const allResult = this.db!.exec( + `SELECT e.id, e.significance, e.reinforcement_count, e.last_reinforced, e.decay_rate, e.is_pinned, c.priority_weight + FROM memory_entries e + JOIN memory_categories c ON e.category = c.slug + WHERE e.is_pinned = 0 + ORDER BY e.last_reinforced ASC`, + ) + if (allResult.length > 0) { + const excess = totalCount - MEMORY_CONSTANTS.MAX_ENTRIES + const scored = allResult[0].values.map((row) => { + const cols = allResult[0].columns + return { + id: row[cols.indexOf("id")] as string, + score: computeScore({ + significance: row[cols.indexOf("significance")] as number, + priorityWeight: row[cols.indexOf("priority_weight")] as number, + reinforcementCount: row[cols.indexOf("reinforcement_count")] as number, + daysSinceReinforced: (now - (row[cols.indexOf("last_reinforced")] as number)) / 86400, + decayRate: row[cols.indexOf("decay_rate")] as number, + }), + } + }).sort((a, b) => a.score - b.score) + + for (let i = 0; i < Math.min(excess, scored.length); i++) { + this.db!.run("DELETE FROM memory_entries WHERE id = ?", [scored[i].id]) + toDelete.push(scored[i].id) + } + } + } + + if (toDelete.length > 0) this.persist() + return toDelete.length + } + + getEntryCount(): number { + const result = this.db!.exec("SELECT COUNT(*) FROM memory_entries") + return result[0].values[0][0] as number + } + + close(): void { + if (this.db) { + this.db.close() + this.db = null + } + } + + private rowToEntry(columns: string[], row: any[]): MemoryEntry { + const get = (col: string) => row[columns.indexOf(col)] + return { + id: get("id") as string, + workspaceId: get("workspace_id") as string | null, + category: get("category") as MemoryCategorySlug, + content: get("content") as string, + significance: get("significance") as number, + firstSeen: get("first_seen") as number, + lastReinforced: get("last_reinforced") as number, + reinforcementCount: get("reinforcement_count") as number, + decayRate: get("decay_rate") as number, + sourceTaskId: get("source_task_id") as string | null, + isPinned: (get("is_pinned") as number) === 1, + } + } +} +``` + +- [ ] **Step 3: Run a quick smoke test manually** + +Run: `cd src && npx vitest run core/memory/__tests__/scoring.spec.ts` +Expected: Still PASS (no regressions from new file) + +- [ ] **Step 4: Commit** + +```bash +git add src/core/memory/memory-store.ts package.json pnpm-lock.yaml +git commit -m "feat(memory): add SQLite memory store via sql.js with schema versioning" +``` + +--- + +## Task 5: Memory Writer (with PII filter and dedup) + +**Files:** +- Create: `src/core/memory/memory-writer.ts` +- Create: `src/core/memory/__tests__/memory-writer.spec.ts` + +- [ ] **Step 1: Write the failing tests** + +```typescript +// src/core/memory/__tests__/memory-writer.spec.ts +import { containsPII, jaccardSimilarity } from "../memory-writer" + +describe("containsPII", () => { + it("should detect email addresses", () => { + expect(containsPII("User email is john@example.com")).toBe(true) + }) + + it("should detect OpenAI API keys", () => { + expect(containsPII("Uses key sk-abcdefghijklmnopqrstuvwxyz1234")).toBe(true) + }) + + it("should detect GitHub PATs", () => { + expect(containsPII("Token ghp_abcdefghijklmnopqrstuvwxyz1234567890")).toBe(true) + }) + + it("should not flag normal coding preferences", () => { + expect(containsPII("Prefers TypeScript over JavaScript")).toBe(false) + }) + + it("should not flag file paths", () => { + expect(containsPII("Frequently edits src/auth/login.ts")).toBe(false) + }) +}) + +describe("jaccardSimilarity", () => { + it("should return 1.0 for identical strings", () => { + expect(jaccardSimilarity("prefers typescript", "prefers typescript")).toBeCloseTo(1.0) + }) + + it("should return 0.0 for completely different strings", () => { + expect(jaccardSimilarity("cats dogs birds", "alpha beta gamma")).toBeCloseTo(0.0) + }) + + it("should return high similarity for near-duplicates", () => { + const sim = jaccardSimilarity( + "Prefers functional React components", + "Prefers functional React component patterns", + ) + expect(sim).toBeGreaterThan(0.5) + }) + + it("should ignore short words (≤2 chars)", () => { + const sim = jaccardSimilarity("I am a good coder", "I am a bad coder") + // "I", "am", "a" are filtered, so it's {good, coder} vs {bad, coder} + expect(sim).toBeLessThan(1.0) + }) +}) +``` + +- [ ] **Step 2: Run tests to verify they fail** + +Run: `cd src && npx vitest run core/memory/__tests__/memory-writer.spec.ts` +Expected: FAIL — module not found + +- [ ] **Step 3: Implement the memory writer** + +```typescript +// src/core/memory/memory-writer.ts +import type { Observation, MemoryCategorySlug } from "./types" +import { MEMORY_CONSTANTS, DEFAULT_MEMORY_CATEGORIES } from "./types" +import type { MemoryStore } from "./memory-store" + +const PII_PATTERNS = [ + /\S+@\S+\.\S+/, + /sk-[a-zA-Z0-9]{20,}/, + /ghp_[a-zA-Z0-9]{36}/, + /\b\d{3}[-.]?\d{3}[-.]?\d{4}\b/, + /\b\d{3}-\d{2}-\d{4}\b/, + /AKIA[0-9A-Z]{16}/, + /-----BEGIN (RSA |EC )?PRIVATE KEY-----/, +] + +export function containsPII(content: string): boolean { + return PII_PATTERNS.some((pattern) => pattern.test(content)) +} + +export function jaccardSimilarity(a: string, b: string): number { + const tokenize = (s: string) => + new Set( + s + .toLowerCase() + .split(/\s+/) + .filter((w) => w.length > 2), + ) + const setA = tokenize(a) + const setB = tokenize(b) + if (setA.size === 0 && setB.size === 0) return 1.0 + if (setA.size === 0 || setB.size === 0) return 0.0 + const intersection = new Set([...setA].filter((x) => setB.has(x))) + const union = new Set([...setA, ...setB]) + return intersection.size / union.size +} + +// Categories that are always global +const GLOBAL_CATEGORIES = new Set([ + "coding-style", + "communication-prefs", + "dislikes-frustrations", +]) + +// Categories that are always workspace-scoped +const WORKSPACE_CATEGORIES = new Set(["active-projects"]) + +function getDecayRate(category: MemoryCategorySlug): number { + const cat = DEFAULT_MEMORY_CATEGORIES.find((c) => c.slug === category) + return cat?.defaultDecayRate ?? 0.1 +} + +export interface WriteResult { + entriesCreated: number + entriesReinforced: number + entriesSkipped: number +} + +export function processObservations( + store: MemoryStore, + observations: Observation[], + workspaceId: string | null, + taskId: string | null, +): WriteResult { + let created = 0 + let reinforced = 0 + let skipped = 0 + const now = Math.floor(Date.now() / 1000) + + for (const obs of observations) { + // PII filter + if (containsPII(obs.content)) { + skipped++ + continue + } + + if (obs.action === "NEW") { + // Determine scope + let entryWorkspaceId: string | null = null + if (WORKSPACE_CATEGORIES.has(obs.category)) { + entryWorkspaceId = workspaceId + } else if (!GLOBAL_CATEGORIES.has(obs.category)) { + // Heuristic: if content mentions paths, it's workspace-scoped + entryWorkspaceId = /[/\\]/.test(obs.content) ? workspaceId : null + } + + // Dedup check + const existing = store.getEntriesByCategory(obs.category, entryWorkspaceId) + const duplicate = existing.find( + (e) => jaccardSimilarity(e.content, obs.content) >= MEMORY_CONSTANTS.DEDUP_SIMILARITY_THRESHOLD, + ) + + if (duplicate) { + store.reinforceEntry(duplicate.id, taskId) + reinforced++ + } else { + store.insertEntry({ + workspaceId: entryWorkspaceId, + category: obs.category, + content: obs.content, + significance: obs.significance, + firstSeen: now, + lastReinforced: now, + reinforcementCount: 1, + decayRate: getDecayRate(obs.category), + sourceTaskId: taskId, + isPinned: false, + }) + created++ + } + } else if (obs.action === "REINFORCE") { + if (obs.existingEntryId) { + const entry = store.getEntry(obs.existingEntryId) + if (entry && entry.category === obs.category) { + store.reinforceEntry(obs.existingEntryId, taskId) + reinforced++ + } else { + skipped++ // Invalid ID — skip silently + } + } else { + skipped++ + } + } else if (obs.action === "UPDATE") { + if (obs.existingEntryId) { + const entry = store.getEntry(obs.existingEntryId) + if (entry && entry.category === obs.category) { + store.updateEntry(obs.existingEntryId, obs.content, obs.significance, taskId) + reinforced++ + } else { + // Invalid ID — treat as NEW with dedup check + const existing = store.getEntriesByCategory(obs.category, workspaceId) + const duplicate = existing.find( + (e) => jaccardSimilarity(e.content, obs.content) >= MEMORY_CONSTANTS.DEDUP_SIMILARITY_THRESHOLD, + ) + if (duplicate) { + store.updateEntry(duplicate.id, obs.content, obs.significance, taskId) + reinforced++ + } else { + store.insertEntry({ + workspaceId: WORKSPACE_CATEGORIES.has(obs.category) ? workspaceId : null, + category: obs.category, + content: obs.content, + significance: obs.significance, + firstSeen: now, + lastReinforced: now, + reinforcementCount: 1, + decayRate: getDecayRate(obs.category), + sourceTaskId: taskId, + isPinned: false, + }) + created++ + } + } + } else { + skipped++ + } + } + } + + return { entriesCreated: created, entriesReinforced: reinforced, entriesSkipped: skipped } +} +``` + +- [ ] **Step 4: Run tests to verify they pass** + +Run: `cd src && npx vitest run core/memory/__tests__/memory-writer.spec.ts` +Expected: PASS (all 10 tests) + +- [ ] **Step 5: Commit** + +```bash +git add src/core/memory/memory-writer.ts src/core/memory/__tests__/memory-writer.spec.ts +git commit -m "feat(memory): add memory writer with PII filter, dedup, and workspace scoping" +``` + +--- + +## Task 6: Prompt Compiler + +**Files:** +- Create: `src/core/memory/prompt-compiler.ts` +- Create: `src/core/memory/__tests__/prompt-compiler.spec.ts` + +- [ ] **Step 1: Write the failing tests** + +```typescript +// src/core/memory/__tests__/prompt-compiler.spec.ts +import { compileMemoryPrompt, compileMemoryForAgent } from "../prompt-compiler" +import type { ScoredMemoryEntry } from "../types" + +const makeScoredEntry = ( + category: string, + content: string, + score: number, + label: string = "Test", +): ScoredMemoryEntry => ({ + id: `test-${Math.random().toString(36).slice(2)}`, + workspaceId: null, + category: category as any, + content, + significance: 0.8, + firstSeen: 1000, + lastReinforced: 2000, + reinforcementCount: 3, + decayRate: 0.05, + sourceTaskId: null, + isPinned: false, + computedScore: score, + categoryLabel: label, +}) + +describe("compileMemoryPrompt", () => { + it("should return empty string for no entries", () => { + expect(compileMemoryPrompt([])).toBe("") + }) + + it("should include USER PROFILE header", () => { + const entries = [makeScoredEntry("coding-style", "Prefers TypeScript", 0.9, "Coding Style")] + const result = compileMemoryPrompt(entries) + expect(result).toContain("USER PROFILE & PREFERENCES") + }) + + it("should group entries by category", () => { + const entries = [ + makeScoredEntry("coding-style", "Prefers TypeScript", 0.9, "Coding Style"), + makeScoredEntry("coding-style", "Uses React hooks", 0.8, "Coding Style"), + makeScoredEntry("communication-prefs", "Likes concise responses", 0.85, "Communication Preferences"), + ] + const result = compileMemoryPrompt(entries) + expect(result).toContain("Coding Style:") + expect(result).toContain("Communication Preferences:") + }) + + it("should omit empty categories", () => { + const entries = [makeScoredEntry("coding-style", "Prefers TypeScript", 0.9, "Coding Style")] + const result = compileMemoryPrompt(entries) + expect(result).not.toContain("Communication Preferences:") + }) +}) + +describe("compileMemoryForAgent", () => { + it("should include entry IDs", () => { + const entry = makeScoredEntry("coding-style", "Prefers TypeScript", 0.9, "Coding Style") + const result = compileMemoryForAgent([entry]) + expect(result).toContain(entry.id) + }) + + it("should include scores", () => { + const entries = [makeScoredEntry("coding-style", "Prefers TS", 0.87, "Coding Style")] + const result = compileMemoryForAgent(entries) + expect(result).toContain("0.87") + }) +}) +``` + +- [ ] **Step 2: Run tests to verify they fail** + +Run: `cd src && npx vitest run core/memory/__tests__/prompt-compiler.spec.ts` +Expected: FAIL + +- [ ] **Step 3: Implement the prompt compiler** + +```typescript +// src/core/memory/prompt-compiler.ts +import type { ScoredMemoryEntry } from "./types" +import { MEMORY_CONSTANTS } from "./types" + +// Rough token estimate +function estimateTokens(text: string): number { + return Math.ceil(text.length / 4) +} + +export function compileMemoryPrompt(entries: ScoredMemoryEntry[]): string { + if (entries.length === 0) return "" + + // Group by category label + const groups = new Map() + for (const entry of entries) { + if (!groups.has(entry.categoryLabel)) { + groups.set(entry.categoryLabel, []) + } + groups.get(entry.categoryLabel)!.push(entry.content) + } + + // Build prose sections + const sections: string[] = [] + for (const [label, contents] of groups) { + sections.push(`${label}: ${contents.join(". ")}.`) + } + + let prose = sections.join("\n\n") + + // Token cap — drop from the end (lowest priority sections) until within budget + while (estimateTokens(prose) > MEMORY_CONSTANTS.PROMPT_TOKEN_CAP && sections.length > 1) { + sections.pop() + prose = sections.join("\n\n") + } + + return `USER PROFILE & PREFERENCES\n(Learned through conversation — continuously updated)\n\n${prose}` +} + +export function compileMemoryForAgent(entries: ScoredMemoryEntry[]): string { + if (entries.length === 0) return "No existing memory entries." + + return entries + .map( + (e) => + `[${e.id}] ${e.category} (score: ${e.computedScore.toFixed(2)}): ${e.content}`, + ) + .join("\n") +} +``` + +- [ ] **Step 4: Run tests to verify they pass** + +Run: `cd src && npx vitest run core/memory/__tests__/prompt-compiler.spec.ts` +Expected: PASS + +- [ ] **Step 5: Commit** + +```bash +git add src/core/memory/prompt-compiler.ts src/core/memory/__tests__/prompt-compiler.spec.ts +git commit -m "feat(memory): add prompt compiler for system prompt and analysis agent rendering" +``` + +--- + +## Task 7: Analysis Agent + +**Files:** +- Create: `src/core/memory/analysis-agent.ts` + +- [ ] **Step 1: Implement the analysis agent** + +This module calls the LLM. It uses the existing `buildApiHandler()` and `SingleCompletionHandler` patterns from `src/api/index.ts`. + +```typescript +// src/core/memory/analysis-agent.ts +import type { AnalysisResult, Observation, MemoryCategorySlug } from "./types" +import { buildApiHandler, type SingleCompletionHandler } from "../../api" +import type { ProviderSettings } from "@roo-code/types" + +const VALID_CATEGORIES = new Set([ + "coding-style", "communication-prefs", "technical-proficiency", + "tool-preferences", "active-projects", "behavioral-patterns", "dislikes-frustrations", +]) + +const VALID_ACTIONS = new Set(["NEW", "REINFORCE", "UPDATE"]) + +const ANALYSIS_SYSTEM_PROMPT = `You are a User Profile Analyst. Your job is to extract factual observations about the USER from conversation transcripts between them and a coding assistant. + +You will receive: +1. A cleaned conversation transcript (tool noise already removed) +2. The current compiled memory report (what is already known) + +EXTRACT observations about the user in these categories: +- coding-style: Languages, frameworks, patterns, conventions they prefer +- communication-prefs: Response length, tone, detail level they want +- technical-proficiency: Skill levels in specific technologies +- tool-preferences: Tools, linters, formatters, workflows they favor +- active-projects: What they're currently building (time-bound) +- behavioral-patterns: How they iterate, review, debug, make decisions +- dislikes-frustrations: Things that annoy them or they explicitly reject + +RULES: +- Only extract what is EVIDENCED in the transcript. Never infer beyond what's shown. +- If an observation matches something in the existing memory, mark it as REINFORCE (don't create a duplicate). +- If an observation contradicts existing memory, mark it as UPDATE with the new value. +- If it's completely new, mark it as NEW. +- Write each observation as a concise, third-person factual statement (e.g., "Prefers functional React components over class components") +- Assign significance 0.0-1.0 based on how broadly useful this fact is for future interactions. + +PRIVACY — NEVER extract: +- Real names, emails, addresses, phone numbers +- API keys, passwords, secrets, tokens +- Company confidential or proprietary details +- Health, financial, legal, or relationship information +- Anything the user explicitly marks as private or off-record + +If the conversation contains mostly one-liners or nothing personality-revealing, return an empty observations array. Don't force extraction. + +Respond in this exact JSON format (no markdown fences, just raw JSON): +{ + "observations": [ + { + "action": "NEW" | "REINFORCE" | "UPDATE", + "category": "", + "content": "", + "significance": <0.0-1.0>, + "existing_entry_id": "", + "reasoning": "" + } + ], + "session_summary": "<1-2 sentences about what the user was doing this session>" +}` + +export async function runAnalysis( + providerSettings: ProviderSettings, + cleanedConversation: string, + existingMemoryReport: string, +): Promise { + try { + const handler = buildApiHandler(providerSettings) + + // Check if handler supports single completion + if (!("completePrompt" in handler)) { + console.error("[MemoryAgent] Handler does not support completePrompt") + return null + } + + const prompt = `EXISTING MEMORY:\n${existingMemoryReport}\n\n---\n\nCONVERSATION TRANSCRIPT:\n${cleanedConversation}` + + const response = await (handler as unknown as SingleCompletionHandler).completePrompt( + `${ANALYSIS_SYSTEM_PROMPT}\n\n${prompt}`, + ) + + return parseAnalysisResponse(response) + } catch (error) { + console.error("[MemoryAgent] Analysis failed:", error) + return null + } +} + +function parseAnalysisResponse(response: string): AnalysisResult | null { + try { + // Strip markdown code fences if present + const cleaned = response.replace(/^```json?\n?/m, "").replace(/\n?```$/m, "").trim() + const parsed = JSON.parse(cleaned) + + if (!parsed.observations || !Array.isArray(parsed.observations)) { + return { observations: [], sessionSummary: parsed.session_summary || "" } + } + + // Validate and filter observations + const validObservations: Observation[] = parsed.observations + .filter((obs: any) => { + return ( + VALID_ACTIONS.has(obs.action) && + VALID_CATEGORIES.has(obs.category) && + typeof obs.content === "string" && + obs.content.length > 0 && + typeof obs.significance === "number" && + obs.significance >= 0 && + obs.significance <= 1 + ) + }) + .map((obs: any) => ({ + action: obs.action, + category: obs.category as MemoryCategorySlug, + content: obs.content, + significance: obs.significance, + existingEntryId: obs.existing_entry_id || null, + reasoning: obs.reasoning || "", + })) + + return { + observations: validObservations, + sessionSummary: parsed.session_summary || "", + } + } catch (error) { + console.error("[MemoryAgent] Failed to parse response:", error) + return null + } +} +``` + +- [ ] **Step 2: Commit** + +```bash +git add src/core/memory/analysis-agent.ts +git commit -m "feat(memory): add analysis agent with LLM invocation and response parsing" +``` + +--- + +## Task 8: Pipeline Orchestrator + +**Files:** +- Create: `src/core/memory/orchestrator.ts` + +- [ ] **Step 1: Implement the orchestrator** + +```typescript +// src/core/memory/orchestrator.ts +import * as crypto from "crypto" +import * as path from "path" +import { execSync } from "child_process" +import type { ProviderSettings } from "@roo-code/types" +import { MemoryStore } from "./memory-store" +import { preprocessMessages } from "./preprocessor" +import { runAnalysis } from "./analysis-agent" +import { processObservations } from "./memory-writer" +import { compileMemoryPrompt, compileMemoryForAgent } from "./prompt-compiler" +import { MEMORY_CONSTANTS } from "./types" + +function getWorkspaceId(workspacePath: string): string { + const folderName = path.basename(workspacePath) + let gitRemote: string | null = null + try { + gitRemote = execSync("git remote get-url origin", { + cwd: workspacePath, + encoding: "utf-8", + timeout: 3000, + }).trim() + } catch { + // Not a git repo or no remote + } + const raw = gitRemote ? `${gitRemote}::${folderName}` : folderName + return crypto.createHash("sha256").update(raw).digest("hex").slice(0, 16) +} + +export class MemoryOrchestrator { + private store: MemoryStore + private messageCounter = 0 + private watermark = 0 + private analysisInFlight = false + private analysisQueued = false + private enabled = false + private workspaceId: string | null = null + private analysisFrequency: number + + constructor( + private storagePath: string, + private workspacePath: string | null, + analysisFrequency?: number, + ) { + this.store = new MemoryStore(storagePath) + this.analysisFrequency = analysisFrequency || MEMORY_CONSTANTS.DEFAULT_ANALYSIS_FREQUENCY + if (workspacePath) { + this.workspaceId = getWorkspaceId(workspacePath) + } + } + + async init(): Promise { + await this.store.init() + } + + setEnabled(enabled: boolean): void { + this.enabled = enabled + if (!enabled) { + this.messageCounter = 0 + } + } + + isEnabled(): boolean { + return this.enabled + } + + /** + * Call this on each user message during an active chat session. + * Returns true if an analysis cycle was triggered. + */ + onUserMessage( + messages: any[], + taskId: string | null, + providerSettings: ProviderSettings | null, + ): boolean { + if (!this.enabled || !providerSettings) return false + + this.messageCounter++ + + if (this.messageCounter >= this.analysisFrequency) { + this.triggerAnalysis(messages, taskId, providerSettings) + this.messageCounter = 0 + return true + } + + return false + } + + /** + * Call on session end to catch remaining unanalyzed messages. + */ + onSessionEnd( + messages: any[], + taskId: string | null, + providerSettings: ProviderSettings | null, + ): void { + if (!this.enabled || !providerSettings) return + if (this.watermark < messages.length) { + this.triggerAnalysis(messages, taskId, providerSettings) + } + } + + private async triggerAnalysis( + messages: any[], + taskId: string | null, + providerSettings: ProviderSettings, + ): Promise { + if (this.analysisInFlight) { + this.analysisQueued = true + return + } + + this.analysisInFlight = true + + try { + // Grab messages since last watermark + const batch = messages.slice(this.watermark) + this.watermark = messages.length + + if (batch.length === 0) return + + // Preprocess + const preprocessed = preprocessMessages(batch) + if (preprocessed.cleaned.trim().length === 0) return + + // Get existing memory for context + const scoredEntries = this.store.getScoredEntries(this.workspaceId) + const existingReport = compileMemoryForAgent(scoredEntries) + + // Run analysis + const result = await runAnalysis(providerSettings, preprocessed.cleaned, existingReport) + + if (result && result.observations.length > 0) { + const writeResult = processObservations( + this.store, + result.observations, + this.workspaceId, + taskId, + ) + + // Log the analysis + this.store.logAnalysis({ + id: crypto.randomUUID(), + timestamp: Math.floor(Date.now() / 1000), + taskId, + messagesAnalyzed: batch.length, + tokensUsed: preprocessed.cleanedTokenEstimate * 2, // rough: input + output + entriesCreated: writeResult.entriesCreated, + entriesReinforced: writeResult.entriesReinforced, + }) + + // Run garbage collection + this.store.garbageCollect() + } + } catch (error) { + console.error("[MemoryOrchestrator] Analysis pipeline error:", error) + } finally { + this.analysisInFlight = false + + if (this.analysisQueued) { + this.analysisQueued = false + // Re-trigger with current state + this.triggerAnalysis(messages, taskId, providerSettings) + } + } + } + + /** + * Get the compiled user profile section for the system prompt. + */ + getUserProfileSection(): string { + if (!this.store) return "" + const entries = this.store.getScoredEntries(this.workspaceId) + return compileMemoryPrompt(entries) + } + + getStore(): MemoryStore { + return this.store + } + + close(): void { + this.store.close() + } +} +``` + +- [ ] **Step 2: Commit** + +```bash +git add src/core/memory/orchestrator.ts +git commit -m "feat(memory): add pipeline orchestrator with triggers, concurrency guard, and lifecycle" +``` + +--- + +## Task 9: Global Settings & Message Types + +**Files:** +- Modify: `packages/types/src/global-settings.ts:238-241` +- Modify: `packages/types/src/vscode-extension-host.ts:107,586` + +- [ ] **Step 1: Add memory settings to globalSettingsSchema** + +In `packages/types/src/global-settings.ts`, before the closing `})` on line 241, add: + +```typescript + // Memory Learning + memoryLearningEnabled: z.boolean().optional(), + memoryApiConfigId: z.string().optional(), + memoryAnalysisFrequency: z.number().optional(), + memoryLearningDefaultEnabled: z.boolean().optional(), +``` + +- [ ] **Step 2: Add message types to vscode-extension-host.ts** + +In `packages/types/src/vscode-extension-host.ts`: + +Add to the `ExtensionMessage` type union (after line 107, the `"fileContent"` member): +```typescript + | "memoryLearningState" +``` + +Add to the `WebviewMessage` type union (after line 586, the `"openSkillFile"` member): +```typescript + | "toggleMemoryLearning" + | "updateMemorySettings" +``` + +- [ ] **Step 3: Verify types compile** + +Run: `cd packages/types && npx tsc --noEmit` +Expected: No errors + +- [ ] **Step 4: Commit** + +```bash +git add packages/types/src/global-settings.ts packages/types/src/vscode-extension-host.ts +git commit -m "feat(memory): add memory learning settings and message types" +``` + +--- + +## Task 10: System Prompt Integration + +**Files:** +- Modify: `src/core/prompts/system.ts:94-95` +- Modify: `src/core/prompts/sections/index.ts:11` + +- [ ] **Step 1: Add getUserProfileSection to sections index** + +In `src/core/prompts/sections/index.ts`, add after the last export (line 11): + +```typescript +export { getUserProfileSection } from "../../../core/memory/prompt-compiler" +``` + +Wait — the prompt compiler export name doesn't match. We need to create a thin wrapper or just re-export. Since `compileMemoryPrompt` takes `ScoredMemoryEntry[]` not a config, the system.ts integration will call the orchestrator directly. So we skip this re-export and instead modify `system.ts` directly. + +- [ ] **Step 2: Modify system.ts to inject userProfileSection** + +In `src/core/prompts/system.ts`, the `generatePrompt()` function needs a new parameter for the memory orchestrator's output. Add a new parameter `userProfileSection?: string` to the function signature, and insert it in the template between `personalityParts.top` and `markdownFormattingSection()`. + +At line 62, add to the function parameters: +```typescript + userProfileSection?: string, +``` + +At lines 94-95, change: +```typescript +${personalityParts.top} +${markdownFormattingSection()} +``` +to: +```typescript +${personalityParts.top} +${userProfileSection || ""} +${markdownFormattingSection()} +``` + +- [ ] **Step 3: Find and update all callers of generatePrompt** + +Search for all places that call `generatePrompt(` to add the new parameter. The parameter is optional with a default of `undefined`, so existing callers should still compile. Verify with: + +Run: `cd src && npx tsc --noEmit` +Expected: No errors (parameter is optional) + +- [ ] **Step 4: Commit** + +```bash +git add src/core/prompts/system.ts +git commit -m "feat(memory): inject user profile section into system prompt" +``` + +--- + +## Task 11: Extension Host Integration (ClineProvider + Message Handler) + +**Files:** +- Modify: `src/core/webview/ClineProvider.ts` +- Modify: `src/core/webview/webviewMessageHandler.ts` + +- [ ] **Step 1: Add orchestrator to ClineProvider** + +In `src/core/webview/ClineProvider.ts`: + +Add import near the top: +```typescript +import { MemoryOrchestrator } from "../memory/orchestrator" +``` + +Add instance variable in the class: +```typescript +private memoryOrchestrator?: MemoryOrchestrator +``` + +In the constructor (or an init method), after other initialization: +```typescript +// Initialize memory orchestrator +const storagePath = this.contextProxy.getValue("customStoragePath") || context.globalStorageUri.fsPath +const workspacePath = this.currentWorkspacePath +this.memoryOrchestrator = new MemoryOrchestrator(storagePath, workspacePath || null) +this.memoryOrchestrator.init().catch((err) => console.error("[Memory] Init failed:", err)) + +const memoryEnabled = this.contextProxy.getValue("memoryLearningEnabled") ?? false +this.memoryOrchestrator.setEnabled(memoryEnabled) +``` + +Add a getter for the orchestrator so `system.ts` can access the user profile: +```typescript +getMemoryOrchestrator(): MemoryOrchestrator | undefined { + return this.memoryOrchestrator +} +``` + +- [ ] **Step 2: Add toggle handler to webviewMessageHandler.ts** + +In `src/core/webview/webviewMessageHandler.ts`, add a new case before the `default:` case (around line 3696): + +```typescript +case "toggleMemoryLearning": { + const currentState = provider.getValue("memoryLearningEnabled") ?? false + const newState = !currentState + await provider.setValue("memoryLearningEnabled", newState) + const orchestrator = provider.getMemoryOrchestrator() + if (orchestrator) { + orchestrator.setEnabled(newState) + } + await provider.postMessageToWebview({ + type: "memoryLearningState", + text: String(newState), + }) + break +} + +case "updateMemorySettings": { + if (message.text) { + try { + const settings = JSON.parse(message.text) + if (settings.memoryApiConfigId !== undefined) { + await provider.setValue("memoryApiConfigId", settings.memoryApiConfigId) + } + if (settings.memoryAnalysisFrequency !== undefined) { + await provider.setValue("memoryAnalysisFrequency", settings.memoryAnalysisFrequency) + } + if (settings.memoryLearningDefaultEnabled !== undefined) { + await provider.setValue("memoryLearningDefaultEnabled", settings.memoryLearningDefaultEnabled) + } + } catch (e) { + console.error("[Memory] Failed to parse settings:", e) + } + } + break +} +``` + +- [ ] **Step 3: Verify compilation** + +Run: `cd src && npx tsc --noEmit` +Expected: No errors + +- [ ] **Step 4: Commit** + +```bash +git add src/core/webview/ClineProvider.ts src/core/webview/webviewMessageHandler.ts +git commit -m "feat(memory): integrate orchestrator with extension host and message handlers" +``` + +--- + +## Task 12: Chat UI Toggle + +**Files:** +- Modify: `webview-ui/src/components/chat/ChatTextArea.tsx` + +- [ ] **Step 1: Add the memory toggle indicator** + +In `ChatTextArea.tsx`, in the status indicators area (around line 1326), add the memory learning toggle: + +```tsx +{/* Memory Learning Toggle */} +{(() => { + const memoryConfigured = !!extensionState.memoryApiConfigId + const memoryEnabled = extensionState.memoryLearningEnabled ?? false + + const dotColor = !memoryConfigured ? "bg-gray-400" : memoryEnabled ? "bg-green-500" : "bg-red-500" + const label = !memoryConfigured ? "Memory: Not configured" : memoryEnabled ? "Memory Learning" : "Memory Paused" + const tooltip = !memoryConfigured + ? "Select a model profile in Settings → Memory to enable" + : memoryEnabled + ? "Roo learns your preferences from this conversation. Click to pause." + : "Memory learning is paused. Click to resume." + + return ( + + ) +})()} +``` + +This needs `extensionState` to include the memory settings. The `ExtensionStateContext` already provides the full state from `globalState`, and since we added the keys to `globalSettingsSchema`, they will be available. + +- [ ] **Step 2: Verify the webview builds** + +Run: `cd webview-ui && pnpm build` +Expected: Build succeeds + +- [ ] **Step 3: Commit** + +```bash +git add webview-ui/src/components/chat/ChatTextArea.tsx +git commit -m "feat(memory): add memory learning toggle indicator to chat UI" +``` + +--- + +## Task 13: Settings View Configuration + +**Files:** +- Modify: `webview-ui/src/components/settings/SettingsView.tsx` + +- [ ] **Step 1: Add memory section to sectionNames and icons** + +In `SettingsView.tsx`, add `"memory"` to the `sectionNames` array (around line 98) and add an icon mapping (around line 509): + +In `sectionNames` (after `"experimental"`): +```typescript +"memory", +``` + +In the `sections` icon mapping: +```typescript +{ id: "memory", icon: Brain }, // import Brain from lucide-react +``` + +- [ ] **Step 2: Add the memory settings tab content** + +Add a new tab content block following the existing pattern (after the experimental section): + +```tsx +{renderTab === "memory" && ( +
+ Memory Learning +
+
+

+ When enabled, Roo learns your preferences and coding style from conversations to personalize responses over time. +

+ + {/* Profile selector */} +
+ +

+ Select a configuration profile with at least 50K context window. +

+ +
+ + {/* Analysis frequency */} +
+ +

+ Analyze conversation every N user messages. +

+ +
+ + {/* Default enabled */} +
+ { + setCachedStateField("memoryLearningDefaultEnabled", e.target.checked) + }} + /> + +
+
+
+
+)} +``` + +- [ ] **Step 3: Verify the webview builds** + +Run: `cd webview-ui && pnpm build` +Expected: Build succeeds + +- [ ] **Step 4: Commit** + +```bash +git add webview-ui/src/components/settings/SettingsView.tsx +git commit -m "feat(memory): add memory learning settings section to SettingsView" +``` + +--- + +## Task 14: Build Pipeline (sql.js WASM) + +**Files:** +- Modify: `src/esbuild.mjs` (potentially) + +- [ ] **Step 1: Verify sql.js WASM handling** + +The build already has a `copyWasms` plugin (line 66-69 in `src/esbuild.mjs`). Check if this correctly picks up `sql-wasm.wasm` from `node_modules/sql.js/dist/`. + +Run: `ls node_modules/sql.js/dist/sql-wasm.wasm` +Expected: File exists + +If `copyWasms` doesn't cover sql.js WASM paths, add the path to the copy list. Check `@roo-code/build`'s `copyWasms` implementation to see what globs it uses. + +- [ ] **Step 2: Test full extension build** + +Run: `pnpm build` +Expected: Build succeeds, `dist/` contains `sql-wasm.wasm` (or it's bundled) + +- [ ] **Step 3: Commit if any build config changes were needed** + +```bash +git add src/esbuild.mjs +git commit -m "build: ensure sql.js WASM files are included in extension bundle" +``` + +--- + +## Task 15: Integration Test — Full Pipeline + +**Files:** +- Create: `src/core/memory/__tests__/orchestrator.spec.ts` + +- [ ] **Step 1: Write integration tests** + +```typescript +// src/core/memory/__tests__/orchestrator.spec.ts +import { MemoryStore } from "../memory-store" +import { preprocessMessages } from "../preprocessor" +import { processObservations, jaccardSimilarity } from "../memory-writer" +import { compileMemoryPrompt } from "../prompt-compiler" +import type { Observation } from "../types" +import * as path from "path" +import * as os from "os" +import * as fs from "fs" + +describe("Memory System Integration", () => { + let store: MemoryStore + let tmpDir: string + + beforeEach(async () => { + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "memory-test-")) + store = new MemoryStore(tmpDir) + await store.init() + }) + + afterEach(() => { + store.close() + fs.rmSync(tmpDir, { recursive: true, force: true }) + }) + + it("should persist entries across store instances", async () => { + store.insertEntry({ + workspaceId: null, + category: "coding-style", + content: "Prefers TypeScript", + significance: 0.9, + firstSeen: 1000, + lastReinforced: 1000, + reinforcementCount: 1, + decayRate: 0.05, + sourceTaskId: null, + isPinned: false, + }) + store.close() + + // Open new store instance on same path + const store2 = new MemoryStore(tmpDir) + await store2.init() + expect(store2.getEntryCount()).toBe(1) + store2.close() + }) + + it("should process observations end-to-end", () => { + const observations: Observation[] = [ + { + action: "NEW", + category: "coding-style", + content: "Prefers TypeScript over JavaScript", + significance: 0.9, + existingEntryId: null, + reasoning: "Explicitly stated preference", + }, + { + action: "NEW", + category: "communication-prefs", + content: "Likes concise, direct responses", + significance: 0.85, + existingEntryId: null, + reasoning: "Expressed multiple times", + }, + ] + + const result = processObservations(store, observations, null, "task-1") + expect(result.entriesCreated).toBe(2) + expect(store.getEntryCount()).toBe(2) + }) + + it("should compile entries into prose", () => { + store.insertEntry({ + workspaceId: null, + category: "coding-style", + content: "Prefers TypeScript", + significance: 0.9, + firstSeen: Math.floor(Date.now() / 1000), + lastReinforced: Math.floor(Date.now() / 1000), + reinforcementCount: 5, + decayRate: 0.05, + sourceTaskId: null, + isPinned: false, + }) + + const entries = store.getScoredEntries(null) + const prose = compileMemoryPrompt(entries) + expect(prose).toContain("USER PROFILE & PREFERENCES") + expect(prose).toContain("Prefers TypeScript") + }) + + it("should preprocess messages and reduce token count", () => { + const messages = [ + { role: "user", content: [{ type: "text", text: "Fix the auth bug" }] }, + { + role: "assistant", + content: [ + { type: "text", text: "I'll check the auth module." }, + { type: "tool_use", id: "1", name: "read_file", input: { path: "src/auth.ts" } }, + { type: "tool_result", tool_use_id: "1", content: "... 500 lines ..." }, + ], + }, + ] + + const result = preprocessMessages(messages) + expect(result.cleaned).toContain("Fix the auth bug") + expect(result.cleaned).toContain("→ read: src/auth.ts") + expect(result.cleaned).not.toContain("500 lines") + expect(result.cleanedTokenEstimate).toBeLessThan(result.originalTokenEstimate) + }) + + it("should garbage collect old low-score entries", async () => { + const oldTimestamp = Math.floor(Date.now() / 1000) - 100 * 86400 // 100 days ago + + store.insertEntry({ + workspaceId: null, + category: "active-projects", + content: "Working on legacy migration", + significance: 0.3, + firstSeen: oldTimestamp, + lastReinforced: oldTimestamp, + reinforcementCount: 1, + decayRate: 0.3, + sourceTaskId: null, + isPinned: false, + }) + + expect(store.getEntryCount()).toBe(1) + const deleted = store.garbageCollect() + expect(deleted).toBe(1) + expect(store.getEntryCount()).toBe(0) + }) +}) +``` + +- [ ] **Step 2: Run integration tests** + +Run: `cd src && npx vitest run core/memory/__tests__/orchestrator.spec.ts` +Expected: PASS (all 5 tests) + +- [ ] **Step 3: Run all memory tests together** + +Run: `cd src && npx vitest run core/memory/` +Expected: All tests PASS + +- [ ] **Step 4: Commit** + +```bash +git add src/core/memory/__tests__/orchestrator.spec.ts +git commit -m "test(memory): add integration tests for full memory pipeline" +``` + +--- + +## Task 16: Final Verification + +- [ ] **Step 1: Run all project tests** + +Run: `pnpm test` +Expected: All tests pass (existing + new) + +- [ ] **Step 2: Run type checking** + +Run: `pnpm check-types` +Expected: No type errors + +- [ ] **Step 3: Run lint** + +Run: `pnpm lint` +Expected: No lint errors from new files + +- [ ] **Step 4: Test build** + +Run: `pnpm build` +Expected: Extension builds successfully + +- [ ] **Step 5: Final commit if any fixes were needed** + +```bash +git add -A +git commit -m "fix(memory): address lint, type, and build issues from final verification" +``` diff --git a/docs/superpowers/specs/2026-03-22-agent-f-investigation.md b/docs/superpowers/specs/2026-03-22-agent-f-investigation.md new file mode 100644 index 00000000000..9756666c950 --- /dev/null +++ b/docs/superpowers/specs/2026-03-22-agent-f-investigation.md @@ -0,0 +1,144 @@ +# Agent F Investigation: Why Spawned Tasks Report "Failed" After ~15 Seconds + +## Summary + +**Root cause identified**: There is NO 15-second timeout killing spawned tasks. The "failed" status comes from **two distinct paths**, and the actual failure is likely a **cascading abort** triggered by the `createTask` → `removeClineFromStack` flow on spawned providers. + +--- + +## Finding 1: `TaskCompleted` is ONLY emitted by `AttemptCompletionTool` + +`TaskCompleted` is **never** emitted by `Task.ts` itself. It is only emitted in: +- `src/core/tools/AttemptCompletionTool.ts` line 205: `task.emit(RooCodeEventName.TaskCompleted, ...)` + +This means: +- A spawned agent task will only ever emit `TaskCompleted` if the LLM calls the `attempt_completion` tool +- If the task crashes, is aborted, or the API call fails before `attempt_completion`, **no `TaskCompleted` is ever emitted** +- The coordinator will only see `TaskAborted` (via `abortTask()`) or nothing at all + +## Finding 2: `TaskAborted` is emitted by `Task.abortTask()` + +`TaskAborted` is emitted at `Task.ts` line ~2322: +```typescript +this.emit(RooCodeEventName.TaskAborted) +``` + +This is called by: +1. `ClineProvider.removeClineFromStack()` → calls `task.abortTask(true)` (isAbandoned=true) +2. `ClineProvider.cancelTask()` → calls `task.abortTask()` (user-initiated cancel) +3. Any explicit abort flow + +## Finding 3: The "failed after ~15 seconds" pattern — likely cause + +The `orchestrator.ts` `executeFromPlan()` loop at line 193-233 creates tasks **sequentially**: + +```typescript +for (let i = 0; i < plan.tasks.length; i++) { + // ... + await spawned.provider.createTask(task.description, undefined, undefined, { + startTask: false, + }, autoApprovalConfig) + // ... +} +``` + +Inside `ClineProvider.createTask()` at line 3009-3015: +```typescript +// Single-open-task invariant: always enforce for user-initiated top-level tasks +if (!parentTask) { + try { + await this.removeClineFromStack() // <--- THIS ABORTS THE PREVIOUS TASK + } catch { + // Non-fatal + } +} +``` + +**Critical**: The multi-orchestrator calls `createTask` with `parentTask` as `undefined` (3rd arg). This triggers `removeClineFromStack()` which **aborts and destroys** any previously-existing task on that provider. + +However, since each spawned provider is fresh (new `ClineProvider` per panel), the clineStack should be empty. So this is NOT the direct cause unless something else adds a task first. + +## Finding 4: The REAL 15-second suspect — `startTask` errors being swallowed + +When `start()` is called on a spawned task, it fires `startTask()` as a fire-and-forget async: + +```typescript +public start(): void { + // ... + this.startTask(task ?? undefined, images ?? undefined) + // No await! No catch! Fire-and-forget! +} +``` + +If `startTask()` throws (e.g., API call fails, webview not ready, provider settings wrong), the error is caught internally at line 2019-2024: + +```typescript +} catch (error) { + if (this.abandoned === true || this.abort === true || this.abortReason === "user_cancelled") { + return // silently swallowed + } + throw error // re-thrown but nobody catches it (fire-and-forget) +} +``` + +This re-thrown error becomes an **unhandled promise rejection** — the task silently dies without emitting either `TaskCompleted` or `TaskAborted`. The coordinator never receives any event, so the task stays in "running" state until `waitForAll()` eventually times out (default 10 minutes), or something else triggers abort. + +## Finding 5: Where the "failed" status could come from + +Three possible sources: + +1. **Coordinator `startAll()`** — if `getCurrentTask()` returns undefined: + ```typescript + if (!currentTask) { + this.handleAgentFinished(taskId, "failed") + } + ``` + +2. **Coordinator `startAll()`** — if `start()` throws synchronously: + ```typescript + try { + currentTask.start() + } catch (err) { + this.handleAgentFinished(taskId, "failed") + } + ``` + +3. **Orchestrator catch block** — if the entire `executeFromPlan()` throws: + ```typescript + } catch (error) { + this.state.phase = "complete" + this.state.finalReport = `Orchestration failed: ${error}` + } + ``` + +## Finding 6: No explicit 15-second timeout exists + +Searched for `15_000`, `15000`, `15.*second` across Task.ts, ClineProvider.ts, and agent-coordinator.ts — **no matches**. The 15-second observation is likely the time it takes for: +- Sequential task creation (~5s per task for handleModeSwitch + createTask) +- Plus API initialization failure time +- Plus the `waitForAll()` timeout revealing the stuck state + +## Diagnostic Logging Added + +Added `console.log` / `console.trace` instrumentation to: + +1. **`Task.start()`** — logs entry, `_started` state, metadata presence, abort/abandoned flags +2. **`Task.startTask()`** — logs entry with provider ref status +3. **`Task.startTask()` pre-loop** — logs when `initiateTaskLoop()` is about to be called +4. **`Task.abortTask()`** — logs entry with full state AND stack trace +5. **`Task.abortTask()` TaskAborted emission** — logs reason and abandoned state +6. **`AgentCoordinator.registerAgent()`** — logs taskId and getCurrentTask availability +7. **`AgentCoordinator.startAll()`** — logs each agent's getCurrentTask result and stack size +8. **`AgentCoordinator.handleAgentFinished()`** — logs status and completed set size + +## Recommended Next Steps + +1. **Run the orchestrator and check console output** — the logging will reveal exactly which path triggers "failed" +2. **Most likely fix**: The `start()` method should catch unhandled rejections from `startTask()`: + ```typescript + this.startTask(task ?? undefined, images ?? undefined).catch((error) => { + console.error(`[Task#${this.taskId}] startTask() rejected:`, error) + this.emit(RooCodeEventName.TaskAborted) + }) + ``` +3. **Alternative**: The coordinator could add a safety timeout per-agent that marks tasks as failed if no `TaskStarted` event is received within N seconds. diff --git a/docs/superpowers/specs/2026-03-22-intelligent-memory-system-design.md b/docs/superpowers/specs/2026-03-22-intelligent-memory-system-design.md new file mode 100644 index 00000000000..1f29a9103c7 --- /dev/null +++ b/docs/superpowers/specs/2026-03-22-intelligent-memory-system-design.md @@ -0,0 +1,656 @@ +# Intelligent Memory System — Design Spec + +## Overview + +A continuous learning system that analyzes user conversations during active chat sessions and builds a dynamically updating user profile. The profile captures coding preferences, communication style, skill levels, active projects, behavioral patterns, and dislikes — then compiles them into a natural-language section of the system prompt so that Roo's responses adapt to the individual user over time. + +The system is invisible by design — no dashboards, no management UI. A green/red toggle on the chat interface is the only surface. The data lives in files users can inspect if curious, but it is not surfaced in the UI. + +## Architecture + +``` +┌─────────────────────────────────────────────────────────────────────┐ +│ ACTIVE CHAT SESSION │ +│ │ +│ User msg → Roo response → User msg → Roo response → ... │ +│ │ │ +│ ▼ (every N user messages, if toggle = ON) │ +│ ┌──────────────────────┐ │ +│ │ Message Preprocessor │ ← strips tool blocks, keeps filenames, │ +│ │ (rule-based filter) │ keeps conversational text │ +│ └──────────┬───────────┘ │ +│ ▼ │ +│ ┌──────────────────────┐ ┌─────────────────────┐ │ +│ │ Analysis Agent │◄──│ Existing Memory │ │ +│ │ (cheap LLM via │ │ (compiled report for │ │ +│ │ selected profile) │ │ dedup/reinforcement) │ │ +│ └──────────┬───────────┘ └─────────────────────┘ │ +│ ▼ │ +│ ┌──────────────────────┐ │ +│ │ Memory Writer │ ← inserts/updates/reinforces entries │ +│ │ (structured entries) │ │ +│ └──────────┬───────────┘ │ +└─────────────┼──────────────────────────────────────────────────────┘ + ▼ +┌──────────────────────────┐ +│ SQLite Memory Store │ ← entries with metadata, scores, +│ (global + workspace) │ categories, timestamps +└──────────┬───────────────┘ + ▼ +┌──────────────────────────┐ +│ Prompt Compiler │ ← queries top-N entries by score, +│ (score → prose) │ renders as natural language +└──────────┬───────────────┘ + ▼ +┌──────────────────────────┐ +│ System Prompt Assembly │ ← USER PROFILE & PREFERENCES section +│ (system.ts) │ inserted after personality traits +└──────────────────────────┘ +``` + +### Key Design Decisions + +- **Storage**: SQLite via `sql.js` (SQLite compiled to WASM) — enables relational queries for the tiered scoring algorithm, atomic transactions, and clean global+workspace scoping. WASM avoids native binary packaging issues across platforms (no `better-sqlite3` build matrix needed). The DB is persisted to disk as a flat file and loaded into memory on init. +- **LLM Provider**: User selects from their existing configuration profiles (no new API key fields). Minimum 50K context window with a soft gate (note + filter, not hard-blocked). +- **Noise Reduction**: Rule-based preprocessing strips tool_use/tool_result blocks, code blocks, and command outputs before the LLM sees anything. File operations are reduced to filename-only references. +- **Memory Scope**: Global base profile + workspace-scoped entries. Global entries follow the user everywhere; workspace entries are project-specific. Workspace identity uses a stable hash of the workspace folder name + `.git` remote URL (if available), stored in a `workspace_identity` lookup table. This survives folder renames and symlink differences. +- **Privacy**: Defense in depth — LLM prompt instructions forbid PII extraction, AND a rule-based post-filter in the memory writer scans observations for common PII patterns (emails, API keys, phone numbers) and rejects matches before they reach the database. +- **Visibility**: Invisible by design. Toggle on chat interface is the only UI surface. Data is in files if users want to look. +- **Multi-window safety**: Since `sql.js` runs in-process (WASM), each VS Code window operates on its own in-memory copy. Writes are serialized to disk via an atomic temp-file-rename pattern. On DB load, the file is read fresh, so cross-window consistency is eventual (next prompt compilation picks up changes from other windows). + +--- + +## Component 1: SQLite Memory Store + +**Database location**: `{globalStoragePath}/memory/user_memory.db` + +**File**: `src/core/memory/memory-store.ts` + +**Library**: `sql.js` (SQLite compiled to WASM, zero native dependencies) + +**Persistence model**: The `.db` file is a flat binary. On init, `sql.js` loads it into memory. After each write transaction, the in-memory DB is exported and written to disk via atomic temp-file-rename (`write to .db.tmp` → `rename to .db`). This prevents corruption on crash. + +### Schema Versioning + +```sql +CREATE TABLE IF NOT EXISTS schema_meta ( + key TEXT PRIMARY KEY, + value TEXT NOT NULL +); +-- Seeded: INSERT INTO schema_meta VALUES ('version', '1'); +``` + +On init, `memory-store.ts` checks the `version` value and runs sequential migrations if needed (e.g., v1→v2→v3). Each migration is a function in a `migrations` array. This ensures schema evolution is safe across extension updates. + +### Schema + +#### `memory_categories` table + +| Column | Type | Description | +| -------------------- | ---------------- | ------------------------------------------ | +| `slug` | TEXT PRIMARY KEY | Category identifier | +| `label` | TEXT NOT NULL | Display name | +| `default_decay_rate` | REAL NOT NULL | Default decay for entries in this category | +| `priority_weight` | REAL NOT NULL | How much this category matters in scoring | + +**Seeded values:** + +| Slug | Label | Decay Rate | Priority Weight | +| ----------------------- | ------------------------- | ---------- | --------------- | +| `coding-style` | Coding Style | 0.05 | 0.9 | +| `communication-prefs` | Communication Preferences | 0.05 | 0.95 | +| `technical-proficiency` | Technical Proficiency | 0.08 | 0.85 | +| `tool-preferences` | Tool Preferences | 0.12 | 0.7 | +| `active-projects` | Active Projects | 0.3 | 0.6 | +| `behavioral-patterns` | Behavioral Patterns | 0.15 | 0.75 | +| `dislikes-frustrations` | Dislikes & Frustrations | 0.08 | 0.9 | + +#### `memory_entries` table + +| Column | Type | Description | +| --------------------- | ----------------- | -------------------------------------------------- | +| `id` | TEXT PRIMARY KEY | UUID | +| `workspace_id` | TEXT NULL | `NULL` = global, stable workspace hash = workspace-scoped | +| `category` | TEXT NOT NULL | FK → `memory_categories.slug` | +| `content` | TEXT NOT NULL | The learned fact as a concise statement | +| `significance` | REAL NOT NULL | 0.0–1.0, set by analysis agent | +| `first_seen` | INTEGER NOT NULL | Unix timestamp | +| `last_reinforced` | INTEGER NOT NULL | Unix timestamp | +| `reinforcement_count` | INTEGER DEFAULT 1 | Observation count | +| `decay_rate` | REAL NOT NULL | Category-based decay multiplier | +| `source_task_id` | TEXT NULL | Which task/chat produced this | +| `is_pinned` | INTEGER DEFAULT 0 | If 1, immune to decay (future use) | + +#### `analysis_log` table + +| Column | Type | Description | +| -------------------- | ---------------- | ------------------------------ | +| `id` | TEXT PRIMARY KEY | UUID | +| `timestamp` | INTEGER NOT NULL | When the analysis ran | +| `task_id` | TEXT NULL | Which chat session | +| `messages_analyzed` | INTEGER NOT NULL | Messages in the batch | +| `tokens_used` | INTEGER NOT NULL | Input + output tokens consumed | +| `entries_created` | INTEGER NOT NULL | New entries | +| `entries_reinforced` | INTEGER NOT NULL | Updated entries | + +### Scoring Formula + +Computed at query time, not stored: + +``` +score = significance + × priority_weight + × reinforcement_bonus(reinforcement_count) + × temporal_decay(days_since_reinforced, decay_rate) + +where: + reinforcement_bonus = min(log2(count + 1), 3.0) + temporal_decay = exp(-decay_rate × days_since_reinforced) +``` + +Entries with `computed_score < 0.05` are excluded from prompt compilation (noise threshold). + +### Garbage Collection + +After each analysis cycle, the orchestrator runs a cleanup pass: + +```sql +DELETE FROM memory_entries +WHERE is_pinned = 0 +AND last_reinforced < strftime('%s','now') - (90 * 86400) +AND ( + significance + * (SELECT priority_weight FROM memory_categories WHERE slug = category) + * MIN(LOG2(reinforcement_count + 1), 3.0) + * EXP(-decay_rate * ((strftime('%s','now') - last_reinforced) / 86400.0)) +) < 0.01 +``` + +Additionally, a hard cap of **500 entries** is enforced. If the count exceeds 500 after an analysis cycle, the lowest-scored entries are pruned until the count is within the cap. + +### Workspace Identity + +The `workspace_id` uses a stable hash rather than a raw file path. Computed as: + +```typescript +function getWorkspaceId(workspacePath: string): string { + const folderName = path.basename(workspacePath) + const gitRemote = tryGetGitRemoteUrl(workspacePath) // null if no git + const raw = gitRemote ? `${gitRemote}::${folderName}` : folderName + return createHash('sha256').update(raw).digest('hex').slice(0, 16) +} +``` + +This survives folder moves (if git remote is the same) and normalizes away symlink/mount differences. + +--- + +## Component 2: Message Preprocessor + +**File**: `src/core/memory/preprocessor.ts` + +A pure function with zero LLM cost. Takes raw `ApiMessage[]` and returns cleaned conversational text. + +### Rules + +``` +FOR EACH message in the batch: + + IF message.role === "user": + → KEEP full text content + → STRIP base64 image data (keep "[image attached]" placeholder) + + IF message.role === "assistant": + → KEEP text blocks (explanations, questions, summaries) + → FOR tool_use blocks: + IF tool === "read_file" / "write_to_file" / "apply_diff": + → REPLACE with "→ {tool}: {file_path}" + IF tool === "execute_command": + → REPLACE with "→ ran command: {command}" + IF tool === "search_files" / "list_files": + → REPLACE with "→ searched: {pattern/path}" + ELSE: + → STRIP entirely + → STRIP tool_result blocks entirely + → STRIP code blocks longer than 3 lines +``` + +### Output + +```typescript +interface PreprocessResult { + cleaned: string + originalTokenEstimate: number // via tiktoken o200k_base (reuses existing countTokens worker) + cleanedTokenEstimate: number // via tiktoken o200k_base +} +``` + +### Example Transformation + +**Before** (~4,000 tokens): + +``` +Assistant: I'll update the auth component to use the new hook pattern. +[tool_use: read_file, path: "src/auth/AuthProvider.tsx"] +[tool_result: 200 lines of code...] +[tool_use: apply_diff, path: "src/auth/AuthProvider.tsx", diff: ...] +[tool_result: success] +[tool_use: execute_command, cmd: "npm test"] +[tool_result: 45 lines of test output...] +Let me know if you'd prefer the context to be passed via props instead. +``` + +**After** (~120 tokens): + +``` +Assistant: I'll update the auth component to use the new hook pattern. +→ read: src/auth/AuthProvider.tsx +→ edited: src/auth/AuthProvider.tsx +→ ran command: npm test +Let me know if you'd prefer the context to be passed via props instead. +``` + +--- + +## Component 3: Analysis Agent + +**File**: `src/core/memory/analysis-agent.ts` + +Uses the existing `buildApiHandler()` with the user's selected memory config profile. NOT the main chat flow. + +### System Prompt + +``` +You are a User Profile Analyst. Your job is to extract factual observations +about the USER from conversation transcripts between them and a coding assistant. + +You will receive: +1. A cleaned conversation transcript (tool noise already removed) +2. The current compiled memory report (what is already known) + +EXTRACT observations about the user in these categories: +- coding-style: Languages, frameworks, patterns, conventions they prefer +- communication-prefs: Response length, tone, detail level they want +- technical-proficiency: Skill levels in specific technologies +- tool-preferences: Tools, linters, formatters, workflows they favor +- active-projects: What they're currently building (time-bound) +- behavioral-patterns: How they iterate, review, debug, make decisions +- dislikes-frustrations: Things that annoy them or they explicitly reject + +RULES: +- Only extract what is EVIDENCED in the transcript. Never infer beyond what's shown. +- If an observation matches something in the existing memory, mark it as REINFORCE + (don't create a duplicate). +- If an observation contradicts existing memory, mark it as UPDATE with the new value. +- If it's completely new, mark it as NEW. +- Write each observation as a concise, third-person factual statement + (e.g., "Prefers functional React components over class components") +- Assign significance 0.0-1.0 based on how broadly useful this fact is + for future interactions. + +PRIVACY — NEVER extract: +- Real names, emails, addresses, phone numbers +- API keys, passwords, secrets, tokens +- Company confidential or proprietary details +- Health, financial, legal, or relationship information +- Anything the user explicitly marks as private or off-record + +If the conversation contains mostly one-liners or nothing personality-revealing, +return an empty observations array. Don't force extraction. + +Respond in this exact JSON format: +{ + "observations": [ + { + "action": "NEW" | "REINFORCE" | "UPDATE", + "category": "", + "content": "", + "significance": <0.0-1.0>, + "existing_entry_id": "", + "reasoning": "" + } + ], + "session_summary": "<1-2 sentences about what the user was doing this session>" +} +``` + +### Token Budget Allocation + +| Component | Estimated Budget | +| ---------------------------- | ---------------------- | +| System prompt (instructions) | ~1,500 tokens | +| Existing memory report | ~2,000–4,000 tokens | +| Cleaned conversation batch | ~5,000–15,000 tokens | +| Output (observations JSON) | ~2,000–4,000 tokens | +| Buffer | ~25,000+ tokens | +| **Total** | **~50,000 tokens max** | + +### Overflow Handling + +If the cleaned conversation batch exceeds the budget, truncate from oldest messages first (newest messages are more valuable for learning). + +### Error Handling + +- API failure: log, skip cycle, continue counting +- JSON parse failure: log, skip cycle +- Never surface errors to user + +--- + +## Component 4: Memory Writer + +**File**: `src/core/memory/memory-writer.ts` + +Takes the analysis agent's structured JSON output and upserts entries into SQLite. + +### Operations by Action Type + +**NEW**: Insert with UUID, current timestamps, category default decay rate. Workspace scoping logic: + +- `active-projects` → always workspace-scoped +- `coding-style`, `communication-prefs`, `dislikes-frustrations` → always global +- `technical-proficiency`, `tool-preferences`, `behavioral-patterns` → global by default, workspace-scoped if content references project-specific paths + +**REINFORCE**: Update `last_reinforced` timestamp and increment `reinforcement_count`. Significance is NOT overwritten. + +**UPDATE**: Replace `content` and `significance`, update `last_reinforced`, increment `reinforcement_count`. For when user preferences genuinely change. + +### PII Post-Filter (Defense in Depth) + +Before any observation is written to the database, the memory writer runs a rule-based scan on the `content` field. If any pattern matches, the observation is silently rejected: + +```typescript +const PII_PATTERNS = [ + /\S+@\S+\.\S+/, // email addresses + /sk-[a-zA-Z0-9]{20,}/, // OpenAI-style API keys + /ghp_[a-zA-Z0-9]{36}/, // GitHub PATs + /\b\d{3}[-.]?\d{3}[-.]?\d{4}\b/, // phone numbers (US) + /\b\d{3}-\d{2}-\d{4}\b/, // SSN pattern + /AKIA[0-9A-Z]{16}/, // AWS access keys + /-----BEGIN (RSA |EC )?PRIVATE KEY-----/, // private keys +] +``` + +This costs nothing at runtime and provides a safety net when the LLM ignores its instructions. + +### Deduplication Safety + +Before inserting any NEW entry, query existing entries in the same category and workspace scope: + +```sql +SELECT id, content FROM memory_entries +WHERE category = ? AND (workspace_id IS ? OR workspace_id IS NULL) +ORDER BY last_reinforced DESC +``` + +Then compute **Jaccard similarity** on tokenized content: + +```typescript +function jaccardSimilarity(a: string, b: string): number { + const tokenize = (s: string) => new Set(s.toLowerCase().split(/\s+/).filter(w => w.length > 2)) + const setA = tokenize(a) + const setB = tokenize(b) + const intersection = new Set([...setA].filter(x => setB.has(x))) + return intersection.size / new Set([...setA, ...setB]).size +} +``` + +If Jaccard similarity ≥ 0.6, convert the NEW to a REINFORCE on the matched entry. + +### Invalid Entry ID Handling + +For REINFORCE and UPDATE actions referencing `existing_entry_id`: +1. Verify the ID exists in the database +2. Verify it belongs to the expected category +3. If invalid: REINFORCE → silently skip (no-op), UPDATE → treat as NEW with dedup check + +This guards against LLM hallucinating entry IDs. + +### Transaction Safety + +All inserts/updates/log entry run inside a single transaction. Full rollback on any failure. With `sql.js`, this is managed via `db.run("BEGIN TRANSACTION")` / `db.run("COMMIT")` with try/catch rollback. + +--- + +## Component 5: Prompt Compiler + +**File**: `src/core/memory/prompt-compiler.ts` + +Runs every time the system prompt is assembled — not just after analysis cycles. + +### Pipeline + +1. **Query and score**: Select all global + current workspace entries, compute score via the scoring formula, filter by `> 0.05` threshold, order by score descending, limit 40 entries. + +2. **Group by category**: Organize scored entries into their categories, maintaining score order within each group. Omit empty categories. + +3. **Render as prose**: Each category becomes a natural-language paragraph: + +``` +USER PROFILE & PREFERENCES +(Learned through conversation — continuously updated) + +Communication: Prefers concise, direct responses without over-explanation. +Appreciates when complex topics are broken into numbered steps. + +Coding Style: Strongly favors functional React with hooks over class +components. Uses TypeScript strictly — no 'any' types. + +Technical Level: Advanced TypeScript and React. Intermediate Python. + +... +``` + +4. **Token cap**: Maximum ~1,500 tokens for the entire section. Drop lowest-scored entries until it fits. + +### System Prompt Integration + +Injected in `system.ts`'s `generatePrompt()`. The current template is: + +```typescript +const basePrompt = `${roleDefinition} +${personalityParts.top} +${markdownFormattingSection()} +${getSharedToolUseSection(...)} +... +${await addCustomInstructions(...)}${personalityParts.bottom}` +``` + +The `userProfileSection` is inserted as a new line between `personalityParts.top` and `markdownFormattingSection()`: + +```typescript +const basePrompt = `${roleDefinition} +${personalityParts.top} +${userProfileSection} // ← NEW: learned user memory +${markdownFormattingSection()} +${getSharedToolUseSection(...)} +... +${await addCustomInstructions(...)}${personalityParts.bottom}` +``` + +This positions user knowledge immediately after personality voice, so the LLM processes "here's how I talk" then "here's who I'm talking to" before any tool/capability context. + +### Analysis Agent Variant + +For the analysis agent, render entries with IDs visible: + +``` +[e3f2a1] coding-style (score: 0.87): Prefers functional React with hooks +[b7c4d9] communication-prefs (score: 0.92): Likes concise responses +``` + +--- + +## Component 6: Toggle UI + +### Chat Interface Toggle + +**File**: `webview-ui/src/components/chat/ChatTextArea.tsx` + +A small, always-visible indicator near the chat input: + +- **Green dot** + "Memory Learning" when active +- **Red dot** + "Memory Paused" when off +- **Grey dot** + "Memory: Not configured" when no profile selected +- Click to toggle on/off +- Tooltip: "Roo learns your preferences from this conversation. Click to pause." +- Clicking grey state prompts: "Select a model profile in Mode Settings → Memory to enable." + +State persisted in `globalState` as `memoryLearningEnabled: boolean`. + +### Settings Configuration + +**File**: `webview-ui/src/components/settings/SettingsView.tsx` (global settings area, NOT ModesView) + +Memory is a global feature — it applies across all modes and conversations. Its configuration lives alongside other extension-wide settings (like auto-approval, TTS, sound) rather than in per-mode config. + +New section in global settings: + +``` +Memory Learning +├── Profile: [Select configuration profile ▼] +│ Filtered to profiles with models ≥ 50K context +│ Note: "Select a model with at least 50K context window" +│ If selected model's context window is unknown, show warning +├── Analysis frequency: [Every __ messages ▼] (default: 8) +└── [Enabled by default for new sessions: ☑] +``` + +### Global Settings Additions + +In `globalSettingsSchema`: + +```typescript +memoryLearningEnabled: z.boolean().optional() +memoryApiConfigId: z.string().optional() +memoryAnalysisFrequency: z.number().optional() +memoryLearningDefaultEnabled: z.boolean().optional() +``` + +--- + +## Component 7: Pipeline Orchestrator + +**File**: `src/core/memory/orchestrator.ts` + +Coordinates the full pipeline lifecycle. + +### Lifecycle + +``` +1. INITIALIZATION (on extension activate) + → Open/create SQLite database + → Seed categories table if empty + → Load memoryLearningEnabled from globalState + +2. MESSAGE COUNTER (during active chat, if toggle = ON) + → Increment counter on each user message + → Track watermark: which message index was last analyzed + +3. TRIGGER (counter hits N threshold OR session ends) + → Grab messages from watermark to current + → Validate: is config profile selected? Is context window ≥ 50K? + → If invalid: skip silently, reset counter + → Session-end trigger: when a task completes or is abandoned, if there + are any unanalyzed messages since the last watermark, fire one final + analysis cycle. This catches short but info-rich conversations that + never hit the N-message threshold. + +4. ANALYSIS PIPELINE (async, non-blocking) + → preprocessMessages(batch) → cleaned text + token counts + → compileExistingMemory(withIds: true) → current report for agent + → Budget check: cleaned + report + instructions < context budget? + → If over: truncate oldest messages, retry + → If still over: skip this cycle, log it + → buildApiHandler(selectedProfile) → handler + → handler.createMessage(analysisPrompt, messages) + → Parse JSON response + → memoryWriter.process(observations) + → Log to analysis_log + → Reset counter and watermark + +5. TOGGLE CHANGE + → Update globalState + → If OFF: stop counting, ignore triggers + → If ON: resume counting from current message + +6. ERROR HANDLING + → API failure: log, skip cycle, continue counting + → JSON parse failure: log, skip cycle + → DB error: log, disable pipeline until restart + → Never surface errors to user +``` + +### Non-Blocking Guarantee + +The analysis pipeline runs fully async and detached from the chat flow. The user's conversation is never blocked or slowed. + +### Concurrency Guard + +Only one analysis runs at a time. If a trigger fires during an in-flight analysis, it queues (max one queued). If another is already queued, the new trigger is dropped. + +--- + +## File Structure + +### New Files + +``` +src/core/memory/ +├── orchestrator.ts # Pipeline coordinator, lifecycle, triggers +├── preprocessor.ts # Rule-based message noise filter +├── analysis-agent.ts # LLM invocation, prompt, response parsing +├── memory-writer.ts # Observation → SQLite upsert logic +├── prompt-compiler.ts # Score query → natural language prose +├── memory-store.ts # SQLite connection, schema init, queries +├── scoring.ts # Score computation helpers, decay formula +├── types.ts # MemoryEntry, Observation, AnalysisResult +└── __tests__/ + ├── preprocessor.spec.ts + ├── memory-writer.spec.ts + ├── prompt-compiler.spec.ts + ├── scoring.spec.ts + └── orchestrator.spec.ts +``` + +### Modified Files + +``` +packages/types/src/global-settings.ts # + memory settings fields +packages/types/src/vscode-extension-host.ts # + memory message types +src/core/prompts/system.ts # + userProfileSection insertion +src/core/prompts/sections/index.ts # + re-export prompt compiler +src/core/webview/ClineProvider.ts # + orchestrator init, toggle +src/core/webview/webviewMessageHandler.ts # + toggleMemoryLearning msg +webview-ui/src/components/chat/ChatTextArea.tsx # + toggle indicator +webview-ui/src/components/settings/SettingsView.tsx # + memory config section (global settings) +package.json # + sql.js dependency +``` + +### Runtime Files + +``` +{globalStoragePath}/memory/user_memory.db # SQLite database +``` + +--- + +## Testing Strategy + +- **Preprocessor**: Pure function, fully unit testable. Test with various message shapes (tool-heavy, conversational, mixed, edge cases like empty messages and image-only). +- **Scoring**: Pure math, unit test the formula edge cases (zero reinforcement, extreme decay, pinned entries). +- **Memory Writer**: Test with mock DB — verify NEW/REINFORCE/UPDATE logic, deduplication, transaction rollback. +- **Prompt Compiler**: Test rendered output format, token budget enforcement, category grouping, empty state. +- **Orchestrator**: Integration test with mock API handler and in-memory SQLite — verify trigger counting, concurrency guard, error recovery. + +--- + +## Open Questions for Experimentation + +These are intentionally left as tunable parameters rather than hard commitments: + +1. **Analysis frequency (N messages)**: Default 8, but may need adjustment based on analysis_log data showing token consumption per cycle. +2. **Scoring weights**: The decay rates and priority weights are initial guesses. The analysis_log provides data to tune them. +3. **50K context minimum**: May need revision upward or downward based on real-world token usage logs. +4. **Deduplication threshold (0.7 similarity)**: May need tuning to balance between catching duplicates and false-merging distinct entries. +5. **Prompt section token cap (1,500)**: Balance between giving Roo enough user context and not bloating the system prompt. diff --git a/docs/superpowers/specs/2026-03-22-memory-debugging-spec.md b/docs/superpowers/specs/2026-03-22-memory-debugging-spec.md new file mode 100644 index 00000000000..c9d5eea96f0 --- /dev/null +++ b/docs/superpowers/specs/2026-03-22-memory-debugging-spec.md @@ -0,0 +1,58 @@ +# Memory System Debugging Spec + +## Confirmed: Memory Pipeline Works + +The memory database has **38 entries, 41 analysis runs, 137 reinforcements**. The prompt compiler generates a 4,519-char user profile. The data is real and rich. + +## Bug 1: System Prompt Not Showing Memory Profile + +**Symptom:** The compiled USER PROFILE & PREFERENCES section is not appearing in the system prompt even though the database has entries and the compiler generates valid output. + +**Investigation areas:** +1. `Task.ts:3955-3957`: Does `provider.getMemoryOrchestrator()` return a valid orchestrator? +2. Does `memoryOrchestrator.getUserProfileSection()` return non-empty string? +3. Is the `userProfileSection` parameter actually being passed to `SYSTEM_PROMPT()`? +4. In `system.ts:96`: Is `${userProfileSection || ""}` rendering correctly? +5. Is `generatePrompt()` being called with the right number of arguments (the new parameter at the end)? +6. Is the system prompt regenerated after memory is populated, or is it cached? +7. Is there a timing issue — the prompt is generated before the memory DB is loaded? +8. Check `generateSystemPrompt.ts` (the preview function) — it does NOT pass userProfileSection, so the preview will never show it. But the live chat should via Task.ts. + +## Bug 2: Progress Bar Resets When Leaving Memory Tab + +**Symptom:** Navigating away from the Memory settings tab and back causes the progress to disappear. Starting a new sync while the old one runs causes the two to fight. + +**Root cause:** React state (`isSyncing`, `syncProgress`) lives in the SettingsView component which unmounts when switching tabs. The backend continues running but the frontend loses track. + +**Fix approach:** +1. Move sync state to the extension host (globalState or a dedicated state object) +2. On webview mount, request current sync status from extension host +3. Extension host tracks: `memorySyncInProgress`, `memorySyncProgress`, `memorySyncTotal` +4. When SettingsView mounts, it requests status and restores the progress bar +5. Guard against concurrent syncs — if a sync is running, reject new startMemorySync requests + +**New message types needed:** +- WebviewMessage: `"getMemorySyncStatus"` — request current sync state +- ExtensionMessage: `"memorySyncStatus"` — response with `{ inProgress, completed, total }` + +## Bug 3: Concurrent Sync Conflict + +**Symptom:** Starting a second sync while the first is running causes interleaved progress updates. + +**Fix:** Add a `syncInProgress` flag to the orchestrator. If `batchAnalyzeHistory` is called while one is already running, either: +- Option A: Reject with a status message ("Sync already in progress") +- Option B: Queue the new task IDs and process them after the current batch + +Option A is simpler and correct — the user should wait for the current sync to finish. + +## Files to Modify + +| File | Changes | +|---|---| +| `src/core/memory/orchestrator.ts` | Add `syncInProgress` guard, `getSyncStatus()` method | +| `src/core/task/Task.ts` | Debug/verify the `userProfileSection` flow | +| `src/core/prompts/system.ts` | Verify the template injection | +| `src/core/webview/webviewMessageHandler.ts` | Add `getMemorySyncStatus` handler, guard concurrent syncs | +| `packages/types/src/vscode-extension-host.ts` | Add `getMemorySyncStatus`, `memorySyncStatus` message types | +| `webview-ui/src/components/settings/SettingsView.tsx` | Request sync status on mount, show persistent progress | +| `src/core/webview/generateSystemPrompt.ts` | Add userProfileSection for preview | diff --git a/docs/superpowers/specs/2026-03-22-memory-sync-and-provider-fix.md b/docs/superpowers/specs/2026-03-22-memory-sync-and-provider-fix.md new file mode 100644 index 00000000000..4b4c3ceefec --- /dev/null +++ b/docs/superpowers/specs/2026-03-22-memory-sync-and-provider-fix.md @@ -0,0 +1,241 @@ +# Memory System: Provider Fix & Prior Chat Sync — Design Spec + +## Problem 1: Wrong Provider Settings (Bug) + +The memory orchestrator receives the main chat provider's settings instead of the memory-specific profile. In `Task.ts:2700-2701`, `contextProxy.getProviderSettings()` returns the active chat profile, but the user configures a separate `memoryApiConfigId` in Settings > Memory. + +### Fix + +Follow the `enhancementApiConfigId` precedent from `messageEnhancer.ts:47-59`: + +```typescript +// In Task.ts, where onUserMessage/onSessionEnd are called: +const memoryConfigId = provider.contextProxy.getValue("memoryApiConfigId") +let memoryProviderSettings: ProviderSettings | null = null + +if (memoryConfigId) { + try { + const { name: _, ...settings } = await provider.providerSettingsManager.getProfile({ + id: memoryConfigId, + }) + if (settings.apiProvider) { + memoryProviderSettings = settings + } + } catch { + // Profile not found — skip + } +} + +memOrch.onUserMessage(this.apiConversationHistory, this.taskId, memoryProviderSettings) +``` + +Same pattern for the `onSessionEnd` call. + +--- + +## Problem 2: Cold-Start — No Data Until 8+ Messages + +Users enable memory but see nothing in the system prompt because the database is empty. They need a way to bootstrap from existing chat history. + +--- + +## Feature: Prior Chat Sync + +### User Flow + +1. User goes to Settings > Memory +2. Clicks "Browse Chats" — opens a dialog with all prior conversations listed +3. Each chat shows the first message text + date, with a checkbox +4. "Select All" / "Deselect All" toggle +5. Selection count: "12 of 47 selected" +6. Clicks "Learn" button to start batch analysis +7. Progress bar fills: "8 of 12 chats analyzed" +8. While running: spinner/loading icon. When done: green circle (matches chat toggle design) +9. System prompt now has USER PROFILE section immediately + +### Clear Memory + +A "Clear Memory" button with AlertDialog confirmation ("This will reset all learned preferences. Are you sure?") that wipes the SQLite database. + +--- + +## Backend: Batch Analysis Pipeline + +### New method on MemoryOrchestrator + +```typescript +async batchAnalyzeHistory( + taskIds: string[], + globalStoragePath: string, + providerSettings: ProviderSettings, + onProgress: (completed: number, total: number) => void, +): Promise<{ totalAnalyzed: number; entriesCreated: number; entriesReinforced: number }> +``` + +For each task ID: +1. Read `api_conversation_history.json` via `readApiMessages({ taskId, globalStoragePath })` +2. `preprocessMessages(messages)` — strip noise +3. `runAnalysis(providerSettings, cleaned, existingReport)` — extract observations +4. `processObservations(store, observations, workspaceId, taskId)` — write to SQLite +5. Call `onProgress(i + 1, taskIds.length)` +6. Run garbage collection after all tasks + +Sequential processing (one task at a time) to avoid API rate limits. + +### New method: clearAllMemory() + +```typescript +clearAllMemory(): void { + this.store.deleteAllEntries() + this.store.persist() +} +``` + +### New message types + +WebviewMessage additions: +- `"startMemorySync"` — payload: `{ taskIds: string[] }` via `text` (JSON) +- `"clearMemory"` — no payload + +ExtensionMessage additions: +- `"memorySyncProgress"` — payload: `{ completed: number, total: number }` via `text` (JSON) +- `"memorySyncComplete"` — payload: `{ entriesCreated: number, entriesReinforced: number }` via `text` (JSON) +- `"memoryCleared"` — no payload + +### Message handlers + +In `webviewMessageHandler.ts`: + +```typescript +case "startMemorySync": { + const { taskIds } = JSON.parse(message.text || "{}") as { taskIds: string[] } + const orchestrator = provider.getMemoryOrchestrator() + if (!orchestrator) break + + // Resolve memory provider settings (same pattern as enhancementApiConfigId) + const memoryConfigId = provider.getValue("memoryApiConfigId") + if (!memoryConfigId) break + + const { name: _, ...memSettings } = await provider.providerSettingsManager.getProfile({ + id: memoryConfigId, + }) + + const globalStoragePath = provider.contextProxy.globalStorageUri.fsPath + + // Run in background, post progress + orchestrator.batchAnalyzeHistory( + taskIds, + globalStoragePath, + memSettings, + (completed, total) => { + provider.postMessageToWebview({ + type: "memorySyncProgress", + text: JSON.stringify({ completed, total }), + }) + }, + ).then((result) => { + provider.postMessageToWebview({ + type: "memorySyncComplete", + text: JSON.stringify(result), + }) + }).catch(() => { + provider.postMessageToWebview({ + type: "memorySyncComplete", + text: JSON.stringify({ totalAnalyzed: 0, entriesCreated: 0, entriesReinforced: 0 }), + }) + }) + break +} + +case "clearMemory": { + const orchestrator = provider.getMemoryOrchestrator() + if (orchestrator) { + orchestrator.clearAllMemory() + provider.postMessageToWebview({ type: "memoryCleared" }) + } + break +} +``` + +--- + +## Frontend: Settings UI Enhancement + +### MemoryChatPicker Component + +New file: `webview-ui/src/components/settings/MemoryChatPicker.tsx` + +A Dialog containing: +- Scrollable list of `HistoryItem[]` with Checkbox per item +- Shows `item.task` (first message text) + `formatTimeAgo(item.ts)` +- "Select All" / "Deselect All" at top +- Selection count +- "Learn" button at bottom + +Props: +```typescript +interface MemoryChatPickerProps { + open: boolean + onOpenChange: (open: boolean) => void + taskHistory: HistoryItem[] + onStartSync: (taskIds: string[]) => void +} +``` + +### Extended Memory Settings Section + +In SettingsView.tsx, below existing config: + +``` +Prior Chat Analysis +├── [Browse Chats] → opens MemoryChatPicker +├── Progress: [■■■■■■░░░░] 8 of 12 analyzed +├── Status: ⟳ syncing... | ● done +└── [Clear Memory] → AlertDialog confirmation +``` + +State management: +```typescript +const [isSyncing, setIsSyncing] = useState(false) +const [syncProgress, setSyncProgress] = useState({ completed: 0, total: 0 }) +const [syncDone, setSyncDone] = useState(false) +const [pickerOpen, setPickerOpen] = useState(false) +const [clearDialogOpen, setClearDialogOpen] = useState(false) +``` + +Message listener: +```typescript +useEffect(() => { + const handler = (event: MessageEvent) => { + if (event.data.type === "memorySyncProgress") { + const { completed, total } = JSON.parse(event.data.text) + setSyncProgress({ completed, total }) + } + if (event.data.type === "memorySyncComplete") { + setIsSyncing(false) + setSyncDone(true) + } + if (event.data.type === "memoryCleared") { + setSyncDone(false) + setSyncProgress({ completed: 0, total: 0 }) + } + } + window.addEventListener("message", handler) + return () => window.removeEventListener("message", handler) +}, []) +``` + +--- + +## Files Changed + +### New +- `webview-ui/src/components/settings/MemoryChatPicker.tsx` + +### Modified +- `src/core/task/Task.ts` — fix provider resolution (2 locations) +- `src/core/memory/orchestrator.ts` — add `batchAnalyzeHistory()`, `clearAllMemory()` +- `src/core/memory/memory-store.ts` — add `deleteAllEntries()` method +- `packages/types/src/vscode-extension-host.ts` — add 5 new message types +- `src/core/webview/webviewMessageHandler.ts` — add `startMemorySync`, `clearMemory` handlers +- `webview-ui/src/components/settings/SettingsView.tsx` — extend Memory section diff --git a/docs/superpowers/specs/2026-03-22-multi-orch-debugging-spec.md b/docs/superpowers/specs/2026-03-22-multi-orch-debugging-spec.md new file mode 100644 index 00000000000..a04f0cd94f4 --- /dev/null +++ b/docs/superpowers/specs/2026-03-22-multi-orch-debugging-spec.md @@ -0,0 +1,149 @@ +# Multi-Orchestrator Debugging Spec + +## Current State + +The multi-orchestrator can generate plans and spawn panels, but has three critical runtime issues. + +## Bug 1: Wrong Agent Count (asked for 3, got 2) + +### Root Cause +In `src/core/multi-orchestrator/plan-generator.ts` at lines 120-124: +```typescript +if (plan && plan.tasks.length > 3 && userRequest.split(" ").length < 20) { + plan.tasks = plan.tasks.slice(0, 2) +} +``` +This "short-request heuristic" forcibly slices ANY plan to 2 tasks if the user's message has fewer than 20 words. This overrides both the user's agent count selection AND the LLM's plan. + +### Fix +Remove this heuristic entirely. The maxAgents cap at line 239 already handles the limit. The user's explicit agent count selection should ALWAYS be respected. If the LLM returns fewer tasks than maxAgents, that's fine — the LLM's judgement on task count is better than a word-count heuristic. + +Delete lines 120-125 in `plan-generator.ts`. + +### File +`src/core/multi-orchestrator/plan-generator.ts` — lines 120-125 + +--- + +## Bug 2: Sequential Spawning (1 minute between agents) + +### Root Cause +In `src/core/multi-orchestrator/orchestrator.ts` at lines 193-233, the spawn loop is: +```typescript +for (let i = 0; i < plan.tasks.length; i++) { + await spawned.provider.handleModeSwitch(task.mode) // SLOW — async + await spawned.provider.createTask(...) // SLOW — async, involves webview init +} +``` +Each `createTask` is `await`ed before the next begins. `handleModeSwitch` is also async. Combined with panel creation in `spawnPanels`, each agent takes ~15-30 seconds to fully initialize. + +### Fix +Two changes: + +1. **Parallel panel spawning** in `panel-spawner.ts`: Currently `spawnPanels` creates panels sequentially. Change to `Promise.all`: +```typescript +const promises = titles.map((title, i) => this.spawnSinglePanel(i, title)) +const results = await Promise.all(promises) +``` + +2. **Parallel task creation** in `orchestrator.ts`: After ALL panels are spawned, create all tasks in parallel: +```typescript +const taskPromises = plan.tasks.map(async (task, i) => { + const [panelId, spawned] = panelEntries[i] + try { + await spawned.provider.handleModeSwitch(task.mode) + } catch {} + await spawned.provider.createTask(task.description, undefined, undefined, { + startTask: false, + }, autoApprovalConfig) + this.coordinator.registerAgent(agent, spawned.provider) +}) +await Promise.all(taskPromises) +``` + +### Files +- `src/core/multi-orchestrator/panel-spawner.ts` — parallelize panel creation +- `src/core/multi-orchestrator/orchestrator.ts` — parallelize task creation loop (lines 193-233) + +--- + +## Bug 3: Auto-Approval Not Working (agents block on tool prompts) + +### Root Cause +In `src/core/webview/ClineProvider.ts` line 2958-2959: +```typescript +if (configuration) { + await this.setValues(configuration) +``` +`setValues` writes to `ContextProxy`, which is shared across all providers created from the same `ContextProxy.getInstance()`. The auto-approval settings ARE being written, but the issue is timing: + +1. `setValues(autoApprovalConfig)` writes to the shared proxy +2. `createTask()` then calls `removeClineFromStack()` (line 3002-3007) which may trigger state resets +3. The `Task` constructor creates an `AutoApprovalHandler` which reads settings from the provider's state at construction time +4. If the provider's state was reset between `setValues` and Task construction, the auto-approval is lost + +Additionally, the `autoApprovalEnabled` setting might be a per-PROFILE setting rather than a global one. The spawned provider uses a specific API profile ('BRRRR'), and that profile's approval settings might override the ones we set via `setValues`. + +### Fix +Instead of relying on `setValues` + ContextProxy, set auto-approval DIRECTLY on the Task's AutoApprovalHandler after creation: + +```typescript +const newTask = await spawned.provider.createTask(task.description, undefined, undefined, { + startTask: false, +}, autoApprovalConfig) + +// FORCE auto-approval directly on the task's approval handler +if (newTask.autoApprovalHandler) { + newTask.autoApprovalHandler.setEnabled(true) + // Or whatever the method is to force all approvals +} +``` + +Alternative: Check how the existing `new_task` tool (used by the single orchestrator) handles auto-approval for subtasks. Search `NewTaskTool.ts` and `delegateParentAndOpenChild` — the single orchestrator's subtasks DO run with auto-approval, so there's a pattern that works. + +### Files +- `src/core/multi-orchestrator/orchestrator.ts` — force auto-approval after task creation +- Check `src/core/tools/NewTaskTool.ts` and `ClineProvider.delegateParentAndOpenChild` for the working pattern + +--- + +## Additional Issue: Short-Request Heuristic Regression + +The "smart task count" fix from a previous agent added the 20-word heuristic at `plan-generator.ts:120-124` which actively undermines the user's agent count selection. This is the most impactful fix — deleting 5 lines. + +--- + +## Agent Assignments + +### Agent A: Fix agent count (plan-generator.ts) +- Remove the short-request heuristic (lines 120-125) +- Verify the hard cap at line 239 still works correctly +- Test: maxAgents=3 should produce 3 tasks if the LLM returns 3+ + +### Agent B: Parallelize spawning (panel-spawner.ts + orchestrator.ts) +- Refactor `spawnPanels` to create panels via `Promise.all` +- Refactor the task creation loop in `executeFromPlan` to use `Promise.all` +- Ensure all panels exist before ANY task starts + +### Agent C: Fix auto-approval (orchestrator.ts + investigate ClineProvider) +- Research how the existing single orchestrator's `new_task` tool handles auto-approval for subtasks +- Read `src/core/tools/NewTaskTool.ts` and `ClineProvider.delegateParentAndOpenChild` +- Apply the same pattern to multi-orchestrator spawned tasks +- Verify by checking AutoApprovalHandler state after task creation + +### Agent D: Add logging to auto-approval chain +- In `orchestrator.ts`: log what autoApprovalConfig is being passed +- In `ClineProvider.createTask`: log what configuration values are being applied +- In AutoApprovalHandler (find it): log what it reads on construction +- This will show exactly where auto-approval is being lost + +### Agent E: Test the full flow end-to-end +- After Agents A-D complete, run: `cd src && npx vitest run core/multi-orchestrator/` +- Run `cd src && npx tsc --noEmit` +- Verify no regressions in memory tests: `cd src && npx vitest run core/memory/` + +### Agent F: Fix type compilation errors from parallelization changes +- Run `cd packages/types && npx tsc --noEmit` +- Run `cd src && npx tsc --noEmit` +- Run `cd webview-ui && npx tsc --noEmit` +- Fix any errors from the parallel changes diff --git a/docs/superpowers/specs/2026-03-22-multi-orchestrator-design.md b/docs/superpowers/specs/2026-03-22-multi-orchestrator-design.md new file mode 100644 index 00000000000..a54ce6ca096 --- /dev/null +++ b/docs/superpowers/specs/2026-03-22-multi-orchestrator-design.md @@ -0,0 +1,1569 @@ +# Multi-Orchestrator Mode — Design Spec & Agent Tasks + +## Overview + +A new mode that decomposes complex tasks into N parallel subtasks, each running in its own editor tab panel with an independent ClineProvider, isolated via git worktrees. After all agents complete, a merge resolution phase combines their work, and reports are aggregated back to the orchestrator. + +## Key Decisions + +| Decision | Choice | +|---|---| +| Visual layout | Editor tab panels via `createWebviewPanel(ViewColumn)` | +| Parallel execution | Multiple independent ClineProvider instances | +| Git isolation | Git worktrees via existing `WorktreeService` | +| Planning mode | Toggle: autonomous (default) vs plan-review | +| Agent count | User sets max (1-6) in chat area, orchestrator decides within limit | +| Agent count control | Visible in chat area ONLY when multi-orchestrator mode is selected | +| Merge phase | Auto-detect (skip if no code agents) + manual override in settings | + +## Architecture + +``` +User Request → Multi-Orchestrator (sidebar) + │ + ├─ 1. PLAN: Decompose into N tasks, assign modes + ├─ 2. SPAWN: Create worktrees + open N tab panels + ├─ 3. RUN: Start all simultaneously, monitor via events + ├─ 4. MERGE: Sequential branch merges (if code tasks) + └─ 5. REPORT: Aggregate results, present summary, cleanup +``` + +--- + +## SHARED INTERFACE CONTRACTS + +Every agent MUST use these exact signatures. Agent 1 creates this file; all others import from it. + +### File: `src/core/multi-orchestrator/types.ts` + +```typescript +import type { ModeConfig } from "@roo-code/types" + +export interface OrchestratorPlan { + tasks: PlannedTask[] + requiresMerge: boolean + estimatedComplexity: "low" | "medium" | "high" +} + +export interface PlannedTask { + id: string + mode: string + title: string + description: string + assignedFiles?: string[] + priority: number +} + +export type AgentStatus = "pending" | "running" | "completed" | "failed" | "merging" + +export interface AgentState { + taskId: string + providerId: string + panelId: string + worktreePath: string | null + worktreeBranch: string | null + mode: string + status: AgentStatus + title: string + completionReport: string | null + tokenUsage: { input: number; output: number } | null + startedAt: number | null + completedAt: number | null +} + +export interface MergeResult { + agentTaskId: string + branch: string + success: boolean + conflictsFound: number + conflictsResolved: number + filesChanged: string[] +} + +export interface OrchestratorState { + phase: "idle" | "planning" | "spawning" | "running" | "merging" | "reporting" | "complete" + plan: OrchestratorPlan | null + agents: AgentState[] + mergeResults: MergeResult[] + finalReport: string | null +} + +export const MULTI_ORCHESTRATOR_CONSTANTS = { + MAX_AGENTS: 6, + DEFAULT_MAX_AGENTS: 4, + WORKTREE_PREFIX: "roo-multi-", + BRANCH_PREFIX: "multi-orch/", +} as const +``` + +--- + +## AGENT 1: Types & Constants + +**Creates:** `src/core/multi-orchestrator/types.ts` + +**Task:** Create the file above exactly as specified in the SHARED INTERFACE CONTRACTS section. This is the foundation every other agent imports from. + +Additionally, add a helper to generate agent IDs: + +```typescript +import * as crypto from "crypto" + +export function generateAgentId(): string { + return crypto.randomUUID().slice(0, 8) +} + +export function createInitialAgentState(task: PlannedTask): AgentState { + return { + taskId: task.id, + providerId: "", + panelId: "", + worktreePath: null, + worktreeBranch: null, + mode: task.mode, + status: "pending", + title: task.title, + completionReport: null, + tokenUsage: null, + startedAt: null, + completedAt: null, + } +} + +export function createInitialOrchestratorState(): OrchestratorState { + return { + phase: "idle", + plan: null, + agents: [], + mergeResults: [], + finalReport: null, + } +} +``` + +**Commit:** `feat(multi-orch): add shared types and constants` +**Use `--no-verify` on commits.** + +--- + +## AGENT 2: Message Types & Global Settings + +**Modifies:** +- `packages/types/src/vscode-extension-host.ts` +- `packages/types/src/global-settings.ts` + +**Task:** + +### 2a. Add message types to `vscode-extension-host.ts` + +Find the `WebviewMessage` interface type union. After the last entry (`"getMemoryStatus"`), add: + +```typescript +| "multiOrchStartPlan" +| "multiOrchApprovePlan" +| "multiOrchAbort" +| "multiOrchGetStatus" +``` + +Find the `ExtensionMessage` interface type union. After the last entry (`"memoryStatus"`), add: + +```typescript +| "multiOrchPlanReady" +| "multiOrchStatusUpdate" +| "multiOrchComplete" +| "multiOrchError" +``` + +### 2b. Add global settings to `global-settings.ts` + +Find `globalSettingsSchema` and add before the closing `})`: + +```typescript +// Multi-Orchestrator +multiOrchMaxAgents: z.number().min(1).max(6).optional(), +multiOrchPlanReviewEnabled: z.boolean().optional(), +multiOrchMergeEnabled: z.enum(["auto", "always", "never"]).optional(), +``` + +**Verify:** `cd packages/types && npx tsc --noEmit` + +**Commit:** `feat(multi-orch): add message types and global settings` +**Use `--no-verify` on commits.** + +--- + +## AGENT 3: Mode Definition + +**Modifies:** +- `packages/types/src/mode.ts` +- `src/shared/modes.ts` + +**Task:** + +### 3a. Add multi-orchestrator to DEFAULT_MODES + +In `packages/types/src/mode.ts`, find the `DEFAULT_MODES` array (around line 195-254). Add a new entry after the `orchestrator` mode: + +```typescript +{ + slug: "multi-orchestrator", + name: "⚡ Multi-Orchestrator", + roleDefinition: + "You are Roo, a parallel workflow orchestrator that decomposes complex tasks into multiple independent subtasks and dispatches them to specialized modes running simultaneously. You analyze the user's request, identify separable concerns, assign each to the most appropriate mode, and coordinate their parallel execution with git worktree isolation.", + whenToUse: + "Use for complex tasks that benefit from parallelization — such as building features that span multiple modules, running architecture design alongside implementation, or handling multi-file refactoring with test writing simultaneously.", + description: "Parallel task execution across multiple agents", + groups: [], + customInstructions: `Your workflow: +1. Analyze the user's request and identify separable concerns +2. Decompose into independent tasks (respecting the max agent count setting) +3. Assign each task to the most appropriate mode (code, architect, ask, debug) +4. Maximize file separation between agents to minimize merge conflicts +5. If plan-review is enabled, present the plan for approval before executing +6. Monitor all agents and collect their completion reports +7. If merge is needed, coordinate the sequential branch merge +8. Present a unified summary of all results + +CRITICAL: When decomposing, ensure agents work on DIFFERENT files. Split by module/feature boundary, not by layer.`, +}, +``` + +### 3b. Verify mode is accessible + +In `src/shared/modes.ts`, confirm that `DEFAULT_MODES` is imported from `@roo-code/types` and that `getAllModes()` and `getModeBySlug()` will automatically include the new mode. No changes should be needed here since it reads from `DEFAULT_MODES` directly — but verify. + +**Verify:** `cd packages/types && npx tsc --noEmit` + +**Commit:** `feat(multi-orch): add multi-orchestrator mode definition` +**Use `--no-verify` on commits.** + +--- + +## AGENT 4: Panel Spawner + +**Creates:** `src/core/multi-orchestrator/panel-spawner.ts` +**Modifies:** `src/core/webview/ClineProvider.ts` (add static accessor only) + +**Task:** + +### 4a. Add static accessor to ClineProvider + +In `src/core/webview/ClineProvider.ts`, find the `getVisibleInstance()` static method (around line 737). Add a new static method nearby: + +```typescript +/** Get all active ClineProvider instances (for multi-orchestrator coordination) */ +public static getAllInstances(): ReadonlySet { + return this.activeInstances +} +``` + +This is the ONLY change to ClineProvider.ts. Do not touch anything else. + +### 4b. Create the panel spawner + +```typescript +// src/core/multi-orchestrator/panel-spawner.ts +import * as vscode from "vscode" +import { ClineProvider } from "../webview/ClineProvider" +import { ContextProxy } from "../config/ContextProxy" + +export interface SpawnedPanel { + id: string + provider: ClineProvider + panel: vscode.WebviewPanel +} + +export class PanelSpawner { + private panels: Map = new Map() + + constructor( + private context: vscode.ExtensionContext, + private outputChannel: vscode.OutputChannel, + ) {} + + /** + * Spawn N editor tab panels, each with an independent ClineProvider. + * Panels are placed across ViewColumns 1-6. + */ + async spawnPanels( + count: number, + titles: string[], + ): Promise> { + const contextProxy = await ContextProxy.getInstance(this.context) + + for (let i = 0; i < count; i++) { + const id = `agent-${i}` + const title = titles[i] || `Agent ${i + 1}` + const viewColumn = (i + 1) as vscode.ViewColumn // ViewColumn.One through Six + + // Create independent ClineProvider + const provider = new ClineProvider( + this.context, + this.outputChannel, + "editor", + contextProxy, + ) + + // Create WebviewPanel + const panel = vscode.window.createWebviewPanel( + ClineProvider.tabPanelId, + `⚡ ${title}`, + viewColumn, + { + enableScripts: true, + retainContextWhenHidden: true, + localResourceRoots: [this.context.extensionUri], + }, + ) + + // Wire provider to panel + await provider.resolveWebviewView(panel) + + // Track for cleanup + panel.onDidDispose(() => { + this.panels.delete(id) + }) + + this.panels.set(id, { id, provider, panel }) + } + + return new Map(this.panels) + } + + /** Close a specific panel and dispose its provider */ + async closePanel(id: string): Promise { + const spawned = this.panels.get(id) + if (spawned) { + spawned.panel.dispose() + this.panels.delete(id) + } + } + + /** Close all panels */ + async closeAllPanels(): Promise { + for (const [id] of this.panels) { + await this.closePanel(id) + } + } + + /** Get all active spawned panels */ + getPanels(): Map { + return new Map(this.panels) + } + + /** Get a specific provider by ID */ + getProvider(id: string): ClineProvider | undefined { + return this.panels.get(id)?.provider + } +} +``` + +**Key reference:** The `openClineInNewTab` function at `src/activate/registerCommands.ts:200-274` shows the existing pattern. This agent follows that pattern but without the editor group locking and with explicit ViewColumn assignment. + +**Commit:** `feat(multi-orch): add panel spawner for parallel agent tab panels` +**Use `--no-verify` on commits.** + +--- + +## AGENT 5: Worktree Manager + +**Creates:** `src/core/multi-orchestrator/worktree-manager.ts` + +**Task:** + +Build a manager that creates and cleans up git worktrees for each agent using the existing `WorktreeService` from `packages/core/src/worktree/worktree-service.ts`. + +```typescript +// src/core/multi-orchestrator/worktree-manager.ts +import { WorktreeService } from "@roo-code/core/worktree/worktree-service" +import { MULTI_ORCHESTRATOR_CONSTANTS } from "./types" +import * as path from "path" + +export interface WorktreeInfo { + agentId: string + path: string + branch: string +} + +export class MultiWorktreeManager { + private worktreeService: WorktreeService + private worktrees: Map = new Map() + + constructor(private workspacePath: string) { + this.worktreeService = new WorktreeService() + } + + /** + * Create a git worktree for each agent. + * Each gets its own branch from current HEAD and its own directory. + */ + async createWorktrees(agentIds: string[]): Promise> { + for (const agentId of agentIds) { + const branch = `${MULTI_ORCHESTRATOR_CONSTANTS.BRANCH_PREFIX}${agentId}` + const worktreePath = path.join( + path.dirname(this.workspacePath), + `${MULTI_ORCHESTRATOR_CONSTANTS.WORKTREE_PREFIX}${agentId}`, + ) + + const result = await this.worktreeService.createWorktree({ + srcPath: this.workspacePath, + destPath: worktreePath, + branch, + }) + + if (!result.success) { + throw new Error(`Failed to create worktree for agent ${agentId}: ${result.message}`) + } + + this.worktrees.set(agentId, { agentId, path: worktreePath, branch }) + } + + return new Map(this.worktrees) + } + + /** Get worktree info for a specific agent */ + getWorktree(agentId: string): WorktreeInfo | undefined { + return this.worktrees.get(agentId) + } + + /** Get all worktrees */ + getAllWorktrees(): WorktreeInfo[] { + return Array.from(this.worktrees.values()) + } + + /** Clean up all worktrees created by this orchestration */ + async cleanupWorktrees(): Promise { + for (const [agentId, info] of this.worktrees) { + try { + await this.worktreeService.deleteWorktree({ + srcPath: this.workspacePath, + worktreePath: info.path, + }) + } catch (error) { + console.error(`[MultiOrch] Failed to cleanup worktree for ${agentId}:`, error) + } + } + this.worktrees.clear() + } + + /** Get the branch name for an agent */ + getBranchName(agentId: string): string { + return `${MULTI_ORCHESTRATOR_CONSTANTS.BRANCH_PREFIX}${agentId}` + } +} +``` + +**Key reference:** Check `packages/core/src/worktree/worktree-service.ts` for the exact `createWorktree()` and `deleteWorktree()` signatures and their `CreateWorktreeOptions` type. The import path may need adjustment — check if `@roo-code/core` exports the worktree service or if you need a relative import. + +**Commit:** `feat(multi-orch): add worktree manager for agent isolation` +**Use `--no-verify` on commits.** + +--- + +## AGENT 6: Plan Generator + +**Creates:** `src/core/multi-orchestrator/plan-generator.ts` + +**Task:** + +Build the LLM-powered task decomposer that analyzes a user request and creates an execution plan. + +```typescript +// src/core/multi-orchestrator/plan-generator.ts +import type { ProviderSettings, ModeConfig } from "@roo-code/types" +import { buildApiHandler, type SingleCompletionHandler } from "../../api" +import type { OrchestratorPlan, PlannedTask } from "./types" +import { generateAgentId } from "./types" + +const PLAN_SYSTEM_PROMPT = `You are a task decomposition engine. Given a user request, break it into independent parallel tasks. + +For each task: +- Assign the most appropriate mode: "code" (implementation), "architect" (design/planning), "ask" (research/questions), "debug" (fixing issues) +- Write a clear, self-contained task description that an agent can execute independently +- List expected files the agent will touch (for merge conflict prevention) +- Ensure tasks are as independent as possible — minimize file overlap + +Respond in this exact JSON format (no markdown fences): +{ + "tasks": [ + { + "mode": "", + "title": "", + "description": "", + "assignedFiles": [""], + "priority": <1-N> + } + ], + "requiresMerge": , + "estimatedComplexity": "" +}` + +export async function generatePlan( + userRequest: string, + availableModes: ModeConfig[], + maxAgents: number, + providerSettings: ProviderSettings, +): Promise { + try { + const handler = buildApiHandler(providerSettings) + + if (!("completePrompt" in handler)) { + console.error("[MultiOrch] Handler does not support completePrompt") + return null + } + + const modeList = availableModes + .filter((m) => m.slug !== "multi-orchestrator" && m.slug !== "orchestrator") + .map((m) => `- ${m.slug}: ${m.description || m.name}`) + .join("\n") + + const prompt = `Available modes:\n${modeList}\n\nMax parallel tasks: ${maxAgents}\n\nUser request:\n${userRequest}` + + const response = await (handler as unknown as SingleCompletionHandler).completePrompt( + `${PLAN_SYSTEM_PROMPT}\n\n${prompt}`, + ) + + return parsePlanResponse(response) + } catch (error) { + console.error("[MultiOrch] Plan generation failed:", error) + return null + } +} + +function parsePlanResponse(response: string): OrchestratorPlan | null { + try { + const cleaned = response.replace(/^```json?\n?/m, "").replace(/\n?```$/m, "").trim() + const parsed = JSON.parse(cleaned) + + if (!parsed.tasks || !Array.isArray(parsed.tasks)) return null + + const tasks: PlannedTask[] = parsed.tasks.map((t: Record, i: number) => ({ + id: generateAgentId(), + mode: (t.mode as string) || "code", + title: (t.title as string) || `Task ${i + 1}`, + description: (t.description as string) || "", + assignedFiles: (t.assignedFiles as string[]) || [], + priority: (t.priority as number) || i + 1, + })) + + return { + tasks, + requiresMerge: parsed.requiresMerge ?? tasks.some((t) => t.mode === "code"), + estimatedComplexity: parsed.estimatedComplexity || "medium", + } + } catch (error) { + console.error("[MultiOrch] Failed to parse plan:", error) + return null + } +} +``` + +**Commit:** `feat(multi-orch): add LLM-powered plan generator for task decomposition` +**Use `--no-verify` on commits.** + +--- + +## AGENT 7: Agent Coordinator + +**Creates:** `src/core/multi-orchestrator/agent-coordinator.ts` + +**Task:** + +Build the component that tracks all spawned agents, listens for completion events, and coordinates the start/monitor lifecycle. + +```typescript +// src/core/multi-orchestrator/agent-coordinator.ts +import { EventEmitter } from "events" +import type { ClineProvider } from "../webview/ClineProvider" +import type { AgentState } from "./types" +import { RooCodeEventName } from "@roo-code/types" + +export class AgentCoordinator extends EventEmitter { + private agents: Map = new Map() + private providers: Map = new Map() + private completionCount = 0 + + /** Register an agent and attach event listeners to its provider */ + registerAgent(agent: AgentState, provider: ClineProvider): void { + this.agents.set(agent.taskId, agent) + this.providers.set(agent.taskId, provider) + + // Listen for task completion on this provider + provider.on(RooCodeEventName.TaskCompleted, (taskId: string) => { + const agentState = this.agents.get(agent.taskId) + if (agentState) { + agentState.status = "completed" + agentState.completedAt = Date.now() + this.completionCount++ + this.emit("agentCompleted", agent.taskId) + + if (this.allComplete()) { + this.emit("allCompleted") + } + } + }) + + provider.on(RooCodeEventName.TaskAborted, () => { + const agentState = this.agents.get(agent.taskId) + if (agentState) { + agentState.status = "failed" + agentState.completedAt = Date.now() + this.completionCount++ + this.emit("agentFailed", agent.taskId) + + if (this.allComplete()) { + this.emit("allCompleted") + } + } + }) + } + + /** + * Start all agents simultaneously. + * Each provider should already have a task created with startTask=false. + */ + async startAll(): Promise { + const startPromises: Promise[] = [] + + for (const [taskId, provider] of this.providers) { + const agent = this.agents.get(taskId) + if (agent) { + agent.status = "running" + agent.startedAt = Date.now() + } + + const currentTask = provider.getCurrentTask() + if (currentTask) { + startPromises.push(currentTask.start()) + } + } + + // Start all simultaneously + await Promise.all(startPromises) + } + + /** Check if all agents have finished (completed or failed) */ + allComplete(): boolean { + return this.completionCount >= this.agents.size + } + + /** Get current state of all agents */ + getStates(): AgentState[] { + return Array.from(this.agents.values()) + } + + /** Get a specific agent's state */ + getState(taskId: string): AgentState | undefined { + return this.agents.get(taskId) + } + + /** Wait for all agents to complete (returns a promise) */ + waitForAll(): Promise { + if (this.allComplete()) return Promise.resolve() + return new Promise((resolve) => { + this.once("allCompleted", resolve) + }) + } + + /** Get total agent count */ + get totalAgents(): number { + return this.agents.size + } + + /** Get completed agent count */ + get completedAgents(): number { + return this.completionCount + } +} +``` + +**Key reference:** Task events are defined in `packages/types/src/events.ts`. The `RooCodeEventName.TaskCompleted` event is emitted by ClineProvider (not Task directly for delegation events, but `TaskCompleted` is emitted from the Task level and forwarded). Check `src/core/webview/ClineProvider.ts` for how events are forwarded from Task to Provider. + +**Commit:** `feat(multi-orch): add agent coordinator for parallel lifecycle management` +**Use `--no-verify` on commits.** + +--- + +## AGENT 8: Merge Pipeline + +**Creates:** `src/core/multi-orchestrator/merge-pipeline.ts` + +**Task:** + +Build the sequential branch merger that runs after all agents complete. + +```typescript +// src/core/multi-orchestrator/merge-pipeline.ts +import { execSync } from "child_process" +import type { AgentState, MergeResult } from "./types" + +export class MergePipeline { + constructor(private workspacePath: string) {} + + /** + * Merge all agent branches sequentially into the current branch. + * Order: by priority (lower = first). + */ + async mergeAll( + agents: AgentState[], + onProgress: (agentId: string, result: MergeResult) => void, + ): Promise { + const results: MergeResult[] = [] + + // Sort by priority for deterministic merge order + const sorted = [...agents] + .filter((a) => a.worktreeBranch && a.status === "completed") + .sort((a, b) => (a.startedAt || 0) - (b.startedAt || 0)) + + for (const agent of sorted) { + if (!agent.worktreeBranch) continue + + const result = this.mergeBranch(agent.taskId, agent.worktreeBranch) + results.push(result) + onProgress(agent.taskId, result) + } + + return results + } + + /** Merge a single agent's branch into the current branch */ + private mergeBranch(agentTaskId: string, branch: string): MergeResult { + try { + // Get list of files changed on this branch + const filesChanged = this.getFilesChanged(branch) + + // Attempt merge + try { + execSync(`git merge --no-ff "${branch}" -m "Merge multi-orch agent: ${agentTaskId}"`, { + cwd: this.workspacePath, + encoding: "utf-8", + timeout: 30000, + }) + + return { + agentTaskId, + branch, + success: true, + conflictsFound: 0, + conflictsResolved: 0, + filesChanged, + } + } catch (mergeError) { + // Merge conflict — count them + const conflictFiles = this.getConflictFiles() + const conflictsFound = conflictFiles.length + + if (conflictsFound > 0) { + // Abort the merge for now — let the report indicate conflicts + try { + execSync("git merge --abort", { cwd: this.workspacePath, encoding: "utf-8" }) + } catch { + // If abort fails, reset + execSync("git reset --hard HEAD", { cwd: this.workspacePath, encoding: "utf-8" }) + } + } + + return { + agentTaskId, + branch, + success: false, + conflictsFound, + conflictsResolved: 0, + filesChanged, + } + } + } catch (error) { + return { + agentTaskId, + branch, + success: false, + conflictsFound: 0, + conflictsResolved: 0, + filesChanged: [], + } + } + } + + /** Get files changed on a branch compared to current HEAD */ + private getFilesChanged(branch: string): string[] { + try { + const output = execSync(`git diff --name-only HEAD..."${branch}"`, { + cwd: this.workspacePath, + encoding: "utf-8", + timeout: 10000, + }) + return output.trim().split("\n").filter(Boolean) + } catch { + return [] + } + } + + /** Get files with merge conflicts */ + private getConflictFiles(): string[] { + try { + const output = execSync("git diff --name-only --diff-filter=U", { + cwd: this.workspacePath, + encoding: "utf-8", + timeout: 10000, + }) + return output.trim().split("\n").filter(Boolean) + } catch { + return [] + } + } +} +``` + +**Commit:** `feat(multi-orch): add merge pipeline for sequential branch merging` +**Use `--no-verify` on commits.** + +--- + +## AGENT 9: Report Aggregator + +**Creates:** `src/core/multi-orchestrator/report-aggregator.ts` + +**Task:** + +Build the report formatter that collects results from all agents and the merge phase. + +```typescript +// src/core/multi-orchestrator/report-aggregator.ts +import type { AgentState, MergeResult } from "./types" + +/** + * Aggregate all agent reports and merge results into a unified markdown summary. + */ +export function aggregateReports( + agents: AgentState[], + mergeResults: MergeResult[], +): string { + const sections: string[] = [] + + // Header + sections.push(`# Multi-Orchestration Report`) + sections.push(`**${agents.length} agents** executed in parallel.\n`) + + // Agent summaries + sections.push(`## Agent Results\n`) + for (const agent of agents) { + const status = agent.status === "completed" ? "✅" : "❌" + const duration = agent.startedAt && agent.completedAt + ? `${Math.round((agent.completedAt - agent.startedAt) / 1000)}s` + : "unknown" + + sections.push(`### ${status} ${agent.title} (${agent.mode} mode)`) + sections.push(`- **Status:** ${agent.status}`) + sections.push(`- **Duration:** ${duration}`) + if (agent.tokenUsage) { + sections.push(`- **Tokens:** ${agent.tokenUsage.input} in / ${agent.tokenUsage.output} out`) + } + if (agent.completionReport) { + sections.push(`- **Report:** ${agent.completionReport}`) + } + sections.push("") + } + + // Merge results (if any) + if (mergeResults.length > 0) { + sections.push(`## Merge Results\n`) + for (const result of mergeResults) { + const status = result.success ? "✅" : "⚠️" + sections.push(`### ${status} Branch: ${result.branch}`) + sections.push(`- **Success:** ${result.success}`) + sections.push(`- **Files changed:** ${result.filesChanged.length}`) + if (result.conflictsFound > 0) { + sections.push(`- **Conflicts found:** ${result.conflictsFound}`) + sections.push(`- **Conflicts resolved:** ${result.conflictsResolved}`) + } + sections.push("") + } + } + + // Summary stats + const completed = agents.filter((a) => a.status === "completed").length + const failed = agents.filter((a) => a.status === "failed").length + const mergeSuccesses = mergeResults.filter((r) => r.success).length + const mergeFailures = mergeResults.filter((r) => !r.success).length + + sections.push(`## Summary`) + sections.push(`- **Agents:** ${completed} completed, ${failed} failed`) + if (mergeResults.length > 0) { + sections.push(`- **Merges:** ${mergeSuccesses} succeeded, ${mergeFailures} had conflicts`) + } + + return sections.join("\n") +} +``` + +**Commit:** `feat(multi-orch): add report aggregator for unified result formatting` +**Use `--no-verify` on commits.** + +--- + +## AGENT 10: Top-Level Orchestrator + +**Creates:** `src/core/multi-orchestrator/orchestrator.ts` + +**Task:** + +Build the main conductor that ties all components into the full lifecycle. + +```typescript +// src/core/multi-orchestrator/orchestrator.ts +import * as vscode from "vscode" +import type { ProviderSettings, ModeConfig } from "@roo-code/types" +import { PanelSpawner } from "./panel-spawner" +import { MultiWorktreeManager } from "./worktree-manager" +import { generatePlan } from "./plan-generator" +import { AgentCoordinator } from "./agent-coordinator" +import { MergePipeline } from "./merge-pipeline" +import { aggregateReports } from "./report-aggregator" +import { + type OrchestratorState, + type OrchestratorPlan, + createInitialOrchestratorState, + createInitialAgentState, + MULTI_ORCHESTRATOR_CONSTANTS, +} from "./types" + +export class MultiOrchestrator { + private state: OrchestratorState = createInitialOrchestratorState() + private panelSpawner: PanelSpawner + private worktreeManager: MultiWorktreeManager | null = null + private coordinator: AgentCoordinator | null = null + private mergePipeline: MergePipeline | null = null + private aborted = false + + constructor( + private context: vscode.ExtensionContext, + private outputChannel: vscode.OutputChannel, + private workspacePath: string, + ) { + this.panelSpawner = new PanelSpawner(context, outputChannel) + } + + /** + * Execute the full multi-orchestration lifecycle. + */ + async execute( + userRequest: string, + maxAgents: number, + providerSettings: ProviderSettings, + availableModes: ModeConfig[], + planReviewEnabled: boolean, + mergeMode: "auto" | "always" | "never", + onStateChange: (state: OrchestratorState) => void, + ): Promise { + this.aborted = false + const notify = () => onStateChange({ ...this.state }) + + try { + // PHASE 1: PLAN + this.state.phase = "planning" + notify() + + const plan = await generatePlan(userRequest, availableModes, maxAgents, providerSettings) + if (!plan || plan.tasks.length === 0) { + this.state.phase = "complete" + this.state.finalReport = "Could not decompose the request into parallel tasks." + notify() + return + } + + this.state.plan = plan + this.state.agents = plan.tasks.map(createInitialAgentState) + notify() + + // If plan review enabled, stop here and wait for approval + if (planReviewEnabled) { + // The onStateChange callback will trigger UI to show the plan + // The execute() caller should handle the approval flow + return + } + + await this.executeFromPlan(plan, providerSettings, mergeMode, onStateChange) + } catch (error) { + this.state.phase = "complete" + this.state.finalReport = `Orchestration failed: ${error}` + notify() + } + } + + /** + * Resume execution after plan approval (called when user approves in plan-review mode). + */ + async executeFromPlan( + plan: OrchestratorPlan, + providerSettings: ProviderSettings, + mergeMode: "auto" | "always" | "never", + onStateChange: (state: OrchestratorState) => void, + ): Promise { + const notify = () => onStateChange({ ...this.state }) + + try { + // PHASE 2: SPAWN + this.state.phase = "spawning" + notify() + + const needsMerge = + mergeMode === "always" || + (mergeMode === "auto" && plan.requiresMerge) || + false + + // Create worktrees if merge is needed + if (needsMerge) { + this.worktreeManager = new MultiWorktreeManager(this.workspacePath) + const agentIds = plan.tasks.map((t) => t.id) + const worktrees = await this.worktreeManager.createWorktrees(agentIds) + + // Update agent states with worktree info + for (const agent of this.state.agents) { + const wt = worktrees.get(agent.taskId) + if (wt) { + agent.worktreePath = wt.path + agent.worktreeBranch = wt.branch + } + } + } + + // Open panels + const titles = plan.tasks.map((t) => t.title) + const panels = await this.panelSpawner.spawnPanels(plan.tasks.length, titles) + + // Create tasks in each provider (startTask=false) + const panelEntries = Array.from(panels.entries()) + this.coordinator = new AgentCoordinator() + + for (let i = 0; i < plan.tasks.length; i++) { + if (this.aborted) return + + const task = plan.tasks[i] + const [panelId, spawned] = panelEntries[i] + const agent = this.state.agents[i] + + agent.providerId = panelId + agent.panelId = panelId + + // Create the task in this provider but don't start it yet + await spawned.provider.createTask(task.description, undefined, undefined, { + startTask: false, + }) + + // Register with coordinator + this.coordinator.registerAgent(agent, spawned.provider) + } + + notify() + + // PHASE 3: RUN + this.state.phase = "running" + notify() + + // Start all simultaneously + await this.coordinator.startAll() + + // Monitor: update state on each agent completion + this.coordinator.on("agentCompleted", () => notify()) + this.coordinator.on("agentFailed", () => notify()) + + // Wait for all to complete + await this.coordinator.waitForAll() + + // PHASE 4: MERGE (if needed) + if (needsMerge && mergeMode !== "never") { + this.state.phase = "merging" + notify() + + this.mergePipeline = new MergePipeline(this.workspacePath) + this.state.mergeResults = await this.mergePipeline.mergeAll( + this.state.agents, + (_agentId, _result) => notify(), + ) + } + + // PHASE 5: REPORT + this.state.phase = "reporting" + notify() + + this.state.finalReport = aggregateReports(this.state.agents, this.state.mergeResults) + + // Cleanup worktrees + if (this.worktreeManager) { + await this.worktreeManager.cleanupWorktrees() + } + + this.state.phase = "complete" + notify() + } catch (error) { + this.state.phase = "complete" + this.state.finalReport = `Orchestration failed: ${error}` + onStateChange({ ...this.state }) + } + } + + /** Abort the current orchestration */ + async abort(): Promise { + this.aborted = true + await this.panelSpawner.closeAllPanels() + if (this.worktreeManager) { + await this.worktreeManager.cleanupWorktrees() + } + this.state.phase = "complete" + this.state.finalReport = "Orchestration aborted by user." + } + + /** Get current state */ + getState(): OrchestratorState { + return { ...this.state } + } +} +``` + +**Commit:** `feat(multi-orch): add top-level orchestrator coordinating full lifecycle` +**Use `--no-verify` on commits.** + +--- + +## AGENT 11: Tests + +**Creates:** +- `src/core/multi-orchestrator/__tests__/types.spec.ts` +- `src/core/multi-orchestrator/__tests__/plan-generator.spec.ts` +- `src/core/multi-orchestrator/__tests__/merge-pipeline.spec.ts` +- `src/core/multi-orchestrator/__tests__/report-aggregator.spec.ts` + +**Task:** + +Write tests for the pure/testable components. Skip tests that require VS Code API mocks (panel spawner, coordinator). + +Test `types.ts`: `generateAgentId()` returns valid strings, `createInitialAgentState()` returns correct defaults, `createInitialOrchestratorState()` returns idle state. + +Test `report-aggregator.ts`: All agents completed produces correct report, mixed success/failure, with and without merge results. + +Test `merge-pipeline.ts`: Mock `execSync` to test merge success, merge conflict detection, and conflict file listing. + +Test `plan-generator.ts`: Mock `completePrompt` to return valid JSON, test `parsePlanResponse` with valid/invalid/malformed JSON. + +Run: `cd src && npx vitest run core/multi-orchestrator/__tests__/` + +**Commit:** `test(multi-orch): add unit tests for types, plan generator, merge pipeline, report aggregator` +**Use `--no-verify` on commits.** + +--- + +## AGENT 12: AgentCountSelector + ChatTextArea + +**Creates:** `webview-ui/src/components/multi-orchestrator/AgentCountSelector.tsx` +**Modifies:** `webview-ui/src/components/chat/ChatTextArea.tsx` + +**Task:** + +### 12a. Create the agent count dropdown + +```typescript +// webview-ui/src/components/multi-orchestrator/AgentCountSelector.tsx +import React from "react" + +interface AgentCountSelectorProps { + value: number + onChange: (count: number) => void + max?: number +} + +export const AgentCountSelector: React.FC = ({ + value, + onChange, + max = 6, +}) => { + return ( +
+ Agents: + +
+ ) +} +``` + +### 12b. Add to ChatTextArea + +In `webview-ui/src/components/chat/ChatTextArea.tsx`, find the bottom toolbar area where `ModeSelector` and `ApiConfigSelector` are rendered (around line 1300-1305). + +Add the `AgentCountSelector` conditionally — only visible when the current mode is `multi-orchestrator`: + +```tsx +import { AgentCountSelector } from "../multi-orchestrator/AgentCountSelector" + +// Inside the toolbar, after ApiConfigSelector: +{currentMode === "multi-orchestrator" && ( + { + vscode.postMessage({ + type: "updateSettings", + updatedSettings: { multiOrchMaxAgents: count }, + }) + }} + /> +)} +``` + +You'll need to get `currentMode` from the existing mode state — check how `ModeSelector` determines the current mode slug and reuse that. + +**Commit:** `feat(multi-orch): add agent count selector to chat area for multi-orchestrator mode` +**Use `--no-verify` on commits.** + +--- + +## AGENT 13: Status & Plan Review Panels + +**Creates:** +- `webview-ui/src/components/multi-orchestrator/MultiOrchStatusPanel.tsx` +- `webview-ui/src/components/multi-orchestrator/PlanReviewPanel.tsx` + +**Task:** + +### 13a. MultiOrchStatusPanel + +Displays during execution, showing agent progress: + +```typescript +// webview-ui/src/components/multi-orchestrator/MultiOrchStatusPanel.tsx +import React from "react" +import type { OrchestratorState } from "../../../../src/core/multi-orchestrator/types" + +interface MultiOrchStatusPanelProps { + state: OrchestratorState + onAbort: () => void +} + +export const MultiOrchStatusPanel: React.FC = ({ state, onAbort }) => { + const completedCount = state.agents.filter((a) => a.status === "completed").length + const failedCount = state.agents.filter((a) => a.status === "failed").length + + return ( +
+
+ ⚡ Multi-Orchestration: {state.phase} +
+
+ {completedCount + failedCount}/{state.agents.length} agents complete +
+ +
+ {state.agents.map((agent) => ( +
+ + {agent.status === "completed" ? "✅" : + agent.status === "failed" ? "❌" : + agent.status === "running" ? "🔄" : "⏳"} + + {agent.title} + {agent.mode} +
+ ))} +
+ + {state.phase !== "complete" && ( + + )} + + {state.finalReport && ( +
+ {state.finalReport} +
+ )} +
+ ) +} +``` + +### 13b. PlanReviewPanel + +Shown when plan-review is enabled, before execution starts: + +```typescript +// webview-ui/src/components/multi-orchestrator/PlanReviewPanel.tsx +import React from "react" +import { Button } from "@src/components/ui" +import type { OrchestratorPlan } from "../../../../src/core/multi-orchestrator/types" + +interface PlanReviewPanelProps { + plan: OrchestratorPlan + onApprove: () => void + onCancel: () => void +} + +export const PlanReviewPanel: React.FC = ({ plan, onApprove, onCancel }) => { + return ( +
+
⚡ Execution Plan
+
+ {plan.tasks.length} parallel tasks · {plan.estimatedComplexity} complexity + {plan.requiresMerge && " · merge required"} +
+ +
+ {plan.tasks.map((task, i) => ( +
+
+ Task {i + 1}: {task.title} → {task.mode} +
+
{task.description}
+
+ ))} +
+ +
+ + +
+
+ ) +} +``` + +**Note on imports:** The types import path `../../../../src/core/multi-orchestrator/types` may need adjustment. Check how other webview components import from the extension source — they may use a different alias or the types may need to be exported from `@roo-code/types` instead. If the import doesn't resolve, create a minimal types re-export in the webview source. + +**Commit:** `feat(multi-orch): add status panel and plan review panel components` +**Use `--no-verify` on commits.** + +--- + +## AGENT 14: Message Handlers + +**Modifies:** `src/core/webview/webviewMessageHandler.ts` + +**Task:** + +Add handlers for the multi-orchestrator message types. Find the message handler switch statement (around line 537). Add these cases before `default:`: + +```typescript +case "multiOrchStartPlan": { + // User submitted a request in multi-orchestrator mode + const userRequest = message.text || "" + const orchestrator = provider.getMultiOrchestrator?.() + if (!orchestrator) break + + const maxAgents = getGlobalState("multiOrchMaxAgents") ?? 4 + const planReview = getGlobalState("multiOrchPlanReviewEnabled") ?? false + const mergeMode = (getGlobalState("multiOrchMergeEnabled") as "auto" | "always" | "never") ?? "auto" + const providerSettings = provider.contextProxy.getProviderSettings() + const { getAllModes } = await import("../../shared/modes") + const customModes = await provider.customModesManager.getCustomModes() + const allModes = getAllModes(customModes) + + orchestrator.execute( + userRequest, + maxAgents, + providerSettings, + allModes, + planReview, + mergeMode, + (state) => { + provider.postMessageToWebview({ + type: "multiOrchStatusUpdate", + text: JSON.stringify(state), + }) + }, + ).then(() => { + provider.postMessageToWebview({ + type: "multiOrchComplete", + text: JSON.stringify(orchestrator.getState()), + }) + }).catch((error) => { + provider.postMessageToWebview({ + type: "multiOrchError", + text: String(error), + }) + }) + break +} + +case "multiOrchApprovePlan": { + const orchestrator = provider.getMultiOrchestrator?.() + if (!orchestrator) break + const state = orchestrator.getState() + if (!state.plan) break + + const mergeMode = (getGlobalState("multiOrchMergeEnabled") as "auto" | "always" | "never") ?? "auto" + const providerSettings = provider.contextProxy.getProviderSettings() + + orchestrator.executeFromPlan( + state.plan, + providerSettings, + mergeMode, + (newState) => { + provider.postMessageToWebview({ + type: "multiOrchStatusUpdate", + text: JSON.stringify(newState), + }) + }, + ) + break +} + +case "multiOrchAbort": { + const orchestrator = provider.getMultiOrchestrator?.() + if (orchestrator) { + await orchestrator.abort() + await provider.postMessageToWebview({ + type: "multiOrchComplete", + text: JSON.stringify(orchestrator.getState()), + }) + } + break +} + +case "multiOrchGetStatus": { + const orchestrator = provider.getMultiOrchestrator?.() + if (orchestrator) { + await provider.postMessageToWebview({ + type: "multiOrchStatusUpdate", + text: JSON.stringify(orchestrator.getState()), + }) + } + break +} +``` + +**Note:** You'll also need to add `getMultiOrchestrator()` to ClineProvider — but since Agent 4 owns ClineProvider changes, coordinate: Agent 4 should add a `private multiOrchestrator?: MultiOrchestrator` field and a `getMultiOrchestrator()` accessor. If Agent 4 hasn't done this, add it yourself with a note. + +**Commit:** `feat(multi-orch): add message handlers for plan, approve, abort, and status` +**Use `--no-verify` on commits.** + +--- + +## AGENT 15: Settings Section + +**Modifies:** `webview-ui/src/components/settings/SettingsView.tsx` + +**Task:** + +Add a Multi-Orchestrator section to the settings. This is a small addition to the existing settings infrastructure. + +Find the `sectionNames` array (around line 98). Add `"multiOrch"` after `"memory"`. + +Find the `sections` icon mapping (around line 509). Add: +```typescript +{ id: "multiOrch", icon: Zap }, // import Zap from lucide-react +``` + +Add the tab content block (following the pattern of other sections): + +```tsx +{renderTab === "multiOrch" && ( +
+ Multi-Orchestrator +
+
+

+ Configure parallel task execution across multiple agents. +

+ + {/* Max agents */} +
+ +

+ Maximum number of parallel agents (1-6). +

+ +
+ + {/* Plan review toggle */} +
+ setCachedStateField("multiOrchPlanReviewEnabled", e.target.checked)} + /> + +
+ + {/* Merge mode */} +
+ +

+ When to run the merge phase after agents complete. +

+ +
+
+
+
+)} +``` + +**CRITICAL:** All inputs bind to `cachedState` via `setCachedStateField`, NOT live state. + +**Commit:** `feat(multi-orch): add multi-orchestrator settings section` +**Use `--no-verify` on commits.** + +--- + +## EXECUTION ORDER + +``` +Phase 1 (parallel, no dependencies): Agents 1, 2, 3 +Phase 2 (parallel, depend on Agent 1): Agents 4, 5, 6, 7, 8, 9 +Phase 3 (parallel, depend on Phase 2): Agents 10, 11, 12, 13, 14, 15 +Then: 10 verification/merge agents +``` + +## VERIFICATION CHECKLIST + +After all agents complete, verification agents should check: + +1. TypeScript compilation: `cd packages/types && npx tsc --noEmit` +2. TypeScript compilation: `cd src && npx tsc --noEmit` +3. TypeScript compilation: `cd webview-ui && npx tsc --noEmit` +4. Tests: `cd src && npx vitest run core/multi-orchestrator/` +5. Lint: `cd src && npx eslint core/multi-orchestrator/ --ext=ts --max-warnings=0` +6. All imports resolve between modules +7. Message types in handler match those in type definitions +8. ClineProvider has `getMultiOrchestrator()` accessor +9. Mode slug `multi-orchestrator` appears in DEFAULT_MODES +10. Settings bind to cachedState not live state diff --git a/docs/superpowers/specs/MULTI-ORCHESTRATOR-COMPLETE-BUG-REPORT.md b/docs/superpowers/specs/MULTI-ORCHESTRATOR-COMPLETE-BUG-REPORT.md new file mode 100644 index 00000000000..1767be8564d --- /dev/null +++ b/docs/superpowers/specs/MULTI-ORCHESTRATOR-COMPLETE-BUG-REPORT.md @@ -0,0 +1,873 @@ +# Multi-Orchestrator — Complete Bug Report & Engineering Handoff + +**Created**: End of Session 1 (March 22-23, 2026) +**Purpose**: Exhaustive documentation of every known bug, attempted fix, root cause analysis, and architectural constraint discovered during the initial implementation of the Multi-Orchestrator feature. This document is the definitive handoff for the next engineering session. +**Total agents deployed this session**: 80+ +**Total commits**: 60+ + +--- + +## TABLE OF CONTENTS + +1. [Executive Summary](#1-executive-summary) +2. [What Works (Verified)](#2-what-works-verified) +3. [Architecture Overview](#3-architecture-overview) +4. [Complete File Map](#4-complete-file-map) +5. [Bug #1: Diff Views Open In Wrong Pane / Steal Focus](#5-bug-1) +6. [Bug #2: API Rate Limiting When Multiple Agents Start](#6-bug-2) +7. [Bug #3: Agents Don't Start Simultaneously](#7-bug-3) +8. [Bug #4: Panel Layout — Panels Don't Land In Correct Columns](#8-bug-4) +9. [Bug #5: Task Completion Loop — Agents Keep Running After Finishing](#9-bug-5) +10. [Bug #6: Auto-Approval Not Working For Spawned Agents](#10-bug-6) +11. [Bug #7: Agent Count Not Respected (Asked For N, Got M)](#11-bug-7) +12. [Bug #8: Settings Don't Persist Across Tab Switches](#12-bug-8) +13. [Bug #9: Multi-Orchestrator Send Button Does Nothing](#13-bug-9) +14. [Bug #10: Git Worktrees Not Isolating Agent File Operations](#14-bug-10) +15. [Bug #11: Completion Reports Not Captured / Not Sent Back To Orchestrator](#15-bug-11) +16. [Bug #12: Agent Panels Don't Close After Orchestration Completes](#16-bug-12) +17. [Bug #13: Diff View Doesn't Revert Back To Agent's Chat View](#17-bug-13) +18. [Bug #14: Diff View Not Streaming While Being Created](#18-bug-14) +19. [Bug #15: preventFocusDisruption Experiment Not Taking Effect](#19-bug-15) +20. [Bug #16: Stop/Pause Button Visual State Not Updating](#20-bug-16) +21. [Bug #17: Cannot Stop/Resume Individual Agents Mid-Execution](#21-bug-17) +22. [Bug #18: Post-Completion Verification Phase Not Triggering](#22-bug-18) +23. [Bug #19: Architect Mode Assigned As Parallel Task](#23-bug-19) +24. [Bug #20: Short-Request Heuristic Reducing Task Count](#24-bug-20) +25. [VS Code API Constraints](#25-vscode-api-constraints) +26. [Attempted Fixes That Didn't Work](#26-attempted-fixes-that-didnt-work) +27. [Architectural Root Causes](#27-architectural-root-causes) +28. [Recommended Strategy For Next Session](#28-recommended-strategy) +29. [Features Not Yet Implemented](#29-features-not-yet-implemented) +30. [Test Coverage Status](#30-test-coverage-status) + +--- + +## 1. Executive Summary + +The Multi-Orchestrator is a new mode in Roo-Code that decomposes complex tasks into N parallel subtasks (1-6), each running in its own editor tab panel. The core orchestration logic WORKS — plans are generated, panels spawn, agents execute, reports are collected. However, there are approximately 20 bugs that prevent it from being production-ready. The bugs fall into three categories: + +1. **VS Code Layout Bugs** (Bugs #1, #4, #13, #14): File operations (diffs, edits) fight with webview panels for screen real estate. VS Code's editor group system doesn't cleanly support N webview panels + N diff editors simultaneously. + +2. **Lifecycle Bugs** (Bugs #5, #6, #7, #11, #12, #15, #18): The agent lifecycle — from start to completion to report collection — has gaps where events are missed, states aren't updated, or loops aren't properly terminated. + +3. **Configuration Bugs** (Bugs #8, #9, #10, #16, #17, #19, #20): Settings not persisting, auto-approval not taking effect, agent count not respected, mode assignments incorrect. + +The most impactful bugs to fix first are **#1** (diff views), **#2** (API rate limiting), **#5** (completion loop), and **#6** (auto-approval). These four bugs together account for ~80% of the user-visible failures. + +--- + +## 2. What Works (Verified) + +These features have been tested and confirmed working: + +- [x] Multi-orchestrator mode appears in the mode dropdown +- [x] Agent count selector (1-6) shows in chat toolbar when mode is active +- [x] User message intercepted and routed to `multiOrchStartPlan` handler +- [x] Plan generator decomposes requests via LLM (uses `completePrompt`) +- [x] Plan review mode toggle in settings +- [x] Plan review UI shows tasks with approve/cancel buttons +- [x] N editor tab panels spawn in the editor area +- [x] Each agent gets its own independent ClineProvider +- [x] Agent system prompt prefix injected with parallel execution context +- [x] Each agent is aware of other agents' names and assigned files +- [x] Mode switching before task creation (handleModeSwitch) +- [x] Tasks created with `startTask: false` for deferred start +- [x] TaskCompleted events captured by coordinator +- [x] Tasks aborted after completion to prevent while-loop restart +- [x] Completion reports captured from clineMessages (last `completion_result` say message) +- [x] Report aggregated as markdown and displayed in orchestrator sidebar +- [x] Panels close after completion (2-second delay) +- [x] Original editor layout saved (`vscode.getEditorLayout`) and restored after panels close +- [x] Settings: max agents, plan review toggle, merge mode (auto/always/never) +- [x] Worktree manager checks for git repo before creating worktrees +- [x] Worktree paths set as agent working directory via `setWorkingDirectory()` +- [x] `multiOrchForceApproveAll` flag added to auto-approval decision tree +- [x] Resume asks (`resume_completed_task`, `resume_task`) excluded from force-approve +- [x] ViewColumn tracked per provider and threaded to DiffViewProvider +- [x] Panel viewColumn read from actual panel after creation (not symbolic -1) +- [x] `onDidChangeViewState` tracks viewColumn changes if panel moves + +--- + +## 3. Architecture Overview + +``` +User types request → ChatView intercepts (multi-orchestrator mode check) + → Posts "multiOrchStartPlan" message to extension host + → webviewMessageHandler routes to MultiOrchestrator.execute() + +MultiOrchestrator.execute(): + Phase 1: PLAN + → plan-generator.ts calls LLM via completePrompt() + → Parses JSON response into OrchestratorPlan with PlannedTask[] + → If planReviewEnabled: returns early, UI shows PlanReviewPanel + → If not: proceeds to executeFromPlan() + + Phase 2: SPAWN + → worktree-manager.ts: creates git worktrees (if git repo exists) + → panel-spawner.ts: uses vscode.setEditorLayout for N columns + → Creates N ClineProviders, each with: + - setAutoApprovalOverrides (multiOrchForceApproveAll) + - setWorkingDirectory (worktree path) + - handleModeSwitch (planned mode) + - viewColumn (actual panel column number) + → createTask(description, startTask: false) on each provider + → agent-system-prompt.ts prefix prepended to each task description + + Phase 3: RUN + → agent-coordinator.ts: startAll() fires task.start() on each + → Listens for TaskCompleted / TaskAborted events + → Captures completionReport from clineMessages + → Calls abortTask() after completion to break while loop + → waitForAll() resolves when all agents complete + + Phase 4: MERGE (if git worktrees were used) + → merge-pipeline.ts: sequential git merge of agent branches + + Phase 5: VERIFY (partially implemented) + → Spawns a debug agent to review changes (optional) + + Phase 6: REPORT + → report-aggregator.ts: markdown summary + → Panels close after 2-second delay + → Layout restored via vscode.setEditorLayout +``` + +--- + +## 4. Complete File Map + +### Core Multi-Orchestrator Files + +| File | Lines | Purpose | Status | +|---|---|---|---| +| `src/core/multi-orchestrator/types.ts` | ~100 | OrchestratorPlan, PlannedTask, AgentState, MergeResult, OrchestratorState, constants | Working | +| `src/core/multi-orchestrator/orchestrator.ts` | ~350 | Top-level lifecycle coordinator, executeFromPlan() | Has bugs | +| `src/core/multi-orchestrator/panel-spawner.ts` | ~170 | Creates N ClineProvider + WebviewPanel instances | Has bugs | +| `src/core/multi-orchestrator/agent-coordinator.ts` | ~255 | Event-based lifecycle tracking, startAll(), waitForAll() | Has bugs | +| `src/core/multi-orchestrator/agent-system-prompt.ts` | ~65 | Parallel execution context prefix for agent prompts | Working | +| `src/core/multi-orchestrator/plan-generator.ts` | ~255 | LLM-powered task decomposition via completePrompt() | Working | +| `src/core/multi-orchestrator/worktree-manager.ts` | ~93 | Git worktree creation/cleanup per agent | Untested | +| `src/core/multi-orchestrator/merge-pipeline.ts` | ~100 | Sequential git branch merging | Untested | +| `src/core/multi-orchestrator/report-aggregator.ts` | ~60 | Markdown report formatting | Working | + +### Test Files + +| File | Tests | Status | +|---|---|---| +| `src/core/multi-orchestrator/__tests__/types.spec.ts` | ~5 | Passing | +| `src/core/multi-orchestrator/__tests__/plan-generator.spec.ts` | ~5 | Passing | +| `src/core/multi-orchestrator/__tests__/report-aggregator.spec.ts` | ~5 | Passing | +| `src/core/multi-orchestrator/__tests__/e2e.spec.ts` | ~10 | Passing | + +### UI Components + +| File | Purpose | Status | +|---|---|---| +| `webview-ui/src/components/multi-orchestrator/AgentCountSelector.tsx` | Dropdown (1-6) in chat toolbar | Working | +| `webview-ui/src/components/multi-orchestrator/MultiOrchStatusPanel.tsx` | Status display during execution | Working | +| `webview-ui/src/components/multi-orchestrator/PlanReviewPanel.tsx` | Plan approval UI | Working | + +### Modified Existing Files + +| File | Changes Made | Status | +|---|---|---| +| `packages/types/src/mode.ts` | Added multi-orchestrator to DEFAULT_MODES | Working | +| `packages/types/src/global-settings.ts` | Added multiOrchMaxAgents, multiOrchPlanReviewEnabled, multiOrchMergeEnabled | Working | +| `packages/types/src/vscode-extension-host.ts` | Added multiOrch* message types | Working | +| `src/core/webview/ClineProvider.ts` | Added getMultiOrchestrator(), setWorkingDirectory(), viewColumn, setAutoApprovalOverrides(), getAllInstances() | Working | +| `src/core/webview/webviewMessageHandler.ts` | Added multiOrchStartPlan, multiOrchApprovePlan, multiOrchAbort, multiOrchGetStatus handlers | Working | +| `src/core/auto-approval/index.ts` | Added multiOrchForceApproveAll bypass + resume ask exclusion | Partially working | +| `webview-ui/src/components/chat/ChatTextArea.tsx` | Added AgentCountSelector (conditional on mode) + multi-orch send intercept | Working | +| `webview-ui/src/components/settings/SettingsView.tsx` | Added multi-orchestrator settings section | Has bugs | +| `src/integrations/editor/DiffViewProvider.ts` | Added viewColumn parameter, threaded through all showTextDocument/vscode.diff calls | Partially working | + +--- + +## 5. Bug #1: Diff Views Open In Wrong Pane / Steal Focus +**Severity**: CRITICAL +**Status**: PARTIALLY FIXED — diffs now open in the correct column but still displace the agent's webview + +### Symptom +When Agent 1 creates or edits a file, the diff view opens in the correct column (fixed from previous bug where it went to a random column), BUT it replaces the agent's chat webview panel. The user can no longer see the agent's chat stream while the diff is open. + +### Root Cause Analysis +VS Code's editor groups can hold ONE visible editor at a time (with tabs for switching). When `DiffViewProvider.open()` calls `vscode.commands.executeCommand("vscode.diff", ...)` with `viewColumn: X`, it opens a new tab in that column's editor group. The agent's WebviewPanel is ALSO a tab in that same group. The diff tab becomes the active tab, hiding the webview. + +There is NO VS Code API to show two editors side-by-side within a single editor group. An editor group always shows one active tab with a tab bar above for switching. + +### What Was Tried +1. **Threading ViewColumn** from PanelSpawner → ClineProvider → Task → DiffViewProvider — This was successful and diffs now target the correct column +2. **Reading actual panel.viewColumn** after creation instead of symbolic ViewColumn.Active (-1) — Fixed the wrong-column issue +3. **onDidChangeViewState** tracking — Keeps viewColumn in sync if panel moves + +### Why It's Not Fully Fixed +The diff CORRECTLY opens in the agent's column, but it DISPLACES the webview. There's no way to show both the webview panel and the diff editor simultaneously in the same column. The options are: +- Open diff in a DIFFERENT column (but then which one? And it creates new columns) +- Suppress diff views entirely (use `preventFocusDisruption` experiment) +- Render diffs inside the webview as HTML (custom diff renderer) + +### Files Involved +- `src/integrations/editor/DiffViewProvider.ts` (lines 45, 225-229, 417-421, 486-490, 556-571, 683-687) +- `src/core/multi-orchestrator/panel-spawner.ts` (line 120, stores viewColumn) +- `src/core/webview/ClineProvider.ts` (line 162, viewColumn property) +- `src/core/task/Task.ts` (line 511, passes viewColumn to DiffViewProvider) + +### Recommended Fix +**Option A (Quick)**: Enable `preventFocusDisruption` experiment for all spawned agents. This makes file edits save directly without opening diff views. Files still get written, but no visual diff during editing. + +**Option B (Better, much harder)**: Build a custom diff renderer inside the webview using `diff2html` or `monaco-diff`. This would render diffs as HTML within the agent's chat stream, keeping the webview visible. + +**IMPORTANT**: Option A was attempted by setting `experiments: { preventFocusDisruption: true }` in the auto-approval overrides, but the experiment flag is NOT part of the auto-approval overrides system. It's read from the provider state's `experiments` field which comes from ContextProxy, NOT from `_autoApprovalOverrides`. This is why the fix didn't take effect. See Bug #15. + +--- + +## 6. Bug #2: API Rate Limiting When Multiple Agents Start +**Severity**: CRITICAL +**Status**: ATTEMPTED FIX — staggered starts added but may not have taken effect (see Bug #15) + +### Symptom +When 3 agents start simultaneously, the API provider returns "Provider ended the request: terminated" and "API Streaming Failed" errors. The auto-retry mechanism then cascades into repeated failures. Agents get stuck in a loop of: attempt → fail → retry → fail → retry. + +### Root Cause Analysis +All agents use the same API key and hit the same provider endpoint. When 3 requests arrive within milliseconds of each other, the provider's rate limiter terminates subsequent requests. Each failed request triggers Roo's auto-retry (with backoff), but since all agents retry simultaneously, the rate limiting continues. + +### What Was Tried +1. **Simultaneous start via tight loop** — Made the problem worse +2. **Staggered start with 2-second gaps** — Added `await new Promise(r => setTimeout(r, 2000))` between starts in `startAll()`. Changed `startAll()` from `void` to `async`. Changed orchestrator to `await this.coordinator.startAll()`. + +### Why It May Not Have Worked +The `startAll()` was changed to async with delays, and the orchestrator was updated to await it. However, the fix may not have taken effect because: +1. The TypeScript compilation was clean but the running extension may not have been reloaded +2. OR the `experiments` override (Bug #15) prevented the extension from applying changes correctly +3. OR the stagger delay isn't long enough — some providers need 5+ seconds between requests + +### Files Involved +- `src/core/multi-orchestrator/agent-coordinator.ts` (startAll method, ~line 132) +- `src/core/multi-orchestrator/orchestrator.ts` (~line 317, calls startAll) + +### Recommended Fix +1. Verify the staggered start is actually running (check console logs for "[AgentCoordinator] Staggering N agent starts") +2. If stagger is running but still failing: increase delay to 5 seconds +3. Consider using separate API keys per agent (if user has multiple profiles) +4. Add exponential backoff awareness: if an agent gets rate limited, PAUSE all other agents for 10 seconds + +--- + +## 7. Bug #3: Agents Don't Start Simultaneously +**Severity**: LOW (cosmetic after stagger fix) +**Status**: INTENTIONALLY CHANGED — now staggered for rate limiting reasons + +### Original Symptom +Agent 1 started 1-3 seconds before Agent 3. + +### Resolution +This was initially a bug (sequential `task.start()` calls in a for loop). It was fixed to fire all start() calls simultaneously. Then it was REVERTED to staggered starts (2-second gaps) to fix Bug #2 (API rate limiting). The stagger is intentional. + +--- + +## 8. Bug #4: Panel Layout — Panels Don't Land In Correct Columns +**Severity**: HIGH +**Status**: MULTIPLE FIX ATTEMPTS — still inconsistent + +### Symptom +After `vscode.setEditorLayout` creates N columns, panels don't always land in the expected columns. Sometimes panels stack in one column, or they land in columns 2 and 3 but miss column 1. + +### Root Cause Analysis +The `vscode.setEditorLayout` command creates editor groups, but the group indices don't necessarily map to ViewColumn numbers 1, 2, 3. VS Code's internal group management is opaque — extensions can't directly control which group gets which index. + +### What Was Tried +1. **Explicit ViewColumn numbers** (ViewColumn.One, Two, Three) — Panels sometimes overlapped with existing editors +2. **ViewColumn.Beside** — Panels created to the right of each other, but inconsistent +3. **ViewColumn.Active + focusNextGroup** — Focus first group, create panel, move focus to next group, create next panel. This was the most reliable approach. +4. **setEditorLayout + explicit ViewColumn** — Set N-column layout first, then place panels at ViewColumn 1, 2, 3. This worked for the layout but panels didn't always land in the right columns. + +### Why It's Still Broken +VS Code's editor group system is non-deterministic from the extension's perspective. The same sequence of commands can produce different layouts depending on: +- What editors are already open +- The current sidebar position (left vs right) +- Whether the terminal panel is visible +- The window size +- Previous layout state + +### Files Involved +- `src/core/multi-orchestrator/panel-spawner.ts` (spawnPanels method, ~line 34) + +### Recommended Fix +The most reliable approach found was the `focusNextGroup` pattern: +```typescript +await vscode.commands.executeCommand("workbench.action.focusFirstEditorGroup") +for (let i = 0; i < count; i++) { + if (i > 0) await vscode.commands.executeCommand("workbench.action.focusNextGroup") + createPanel(ViewColumn.Active) +} +``` +This should be tested with various starting states (no editors open, editors open, terminal visible, etc.) + +--- + +## 9. Bug #5: Task Completion Loop — Agents Keep Running After Finishing +**Severity**: CRITICAL +**Status**: FIXED — but verify in next session + +### Symptom +When an agent calls `attempt_completion`, it shows "Task Completed" but then immediately starts making new API requests. Multiple "Task Completed" messages stack up. + +### Root Cause Analysis +The `attempt_completion` tool (AttemptCompletionTool.ts) calls `task.ask("completion_result")`. The `multiOrchForceApproveAll` auto-approval returns `{ decision: "approve" }` which calls `approveAsk()` which sends `"yesButtonClicked"`. In AttemptCompletionTool, `response === "yesButtonClicked"` triggers `emitTaskCompleted(task)` and `return`. + +HOWEVER, `emitTaskCompleted()` only emits an event — it doesn't set `task.abort = true`. The outer `while (!this.abort)` loop in Task.ts:2573 continues running and makes another API call. + +### Fix Applied +In `agent-coordinator.ts`, when `TaskCompleted` is received, the coordinator now calls `currentTask.abortTask(false)` to set `task.abort = true`, which breaks the while loop. + +Additionally, `resume_completed_task` and `resume_task` asks are excluded from `multiOrchForceApproveAll` to prevent restarting finished tasks. + +### Files Involved +- `src/core/multi-orchestrator/agent-coordinator.ts` (TaskCompleted handler, ~line 33-55) +- `src/core/auto-approval/index.ts` (multiOrchForceApproveAll section) +- `src/core/tools/AttemptCompletionTool.ts` (lines 132-136, completion flow) +- `src/core/task/Task.ts` (line 2573, while loop; line 2311, abortTask) + +### Verification Needed +Test with 2-3 agents. Each should show exactly ONE "Task Completed" message and then stop. No more API requests after completion. + +--- + +## 10. Bug #6: Auto-Approval Not Working For Spawned Agents +**Severity**: CRITICAL +**Status**: PARTIALLY FIXED — `multiOrchForceApproveAll` added but may not take effect for all ask types + +### Symptom +Spawned agent panels show yellow "Approve" / "Deny" buttons for file operations, despite having auto-approval enabled. Nobody is watching these panels to click the buttons, so the agents hang waiting for approval. + +### Root Cause Analysis (Multi-layered) + +**Layer 1 — ContextProxy is shared**: All ClineProviders from the same extension context share a single `ContextProxy` instance. Setting auto-approval via `setValues()` on one provider affects ALL providers. This was solved by using `setAutoApprovalOverrides()` which stores overrides in provider instance memory. + +**Layer 2 — Outside workspace blocking**: The original overrides had `alwaysAllowReadOnlyOutsideWorkspace: false` and `alwaysAllowWriteOutsideWorkspace: false`. When agents tried to read/write files outside the workspace (e.g., `/home/user/Desktop`), these were blocked. Fixed by setting both to `true`. + +**Layer 3 — Followup questions**: The auto-approval for followup questions requires `followupAutoApproveTimeoutMs > 0` AND a `suggestion` in the JSON text. Open-ended questions without suggestions always block. The `multiOrchForceApproveAll` flag was added to bypass this. + +**Layer 4 — Command execution**: Commands need to pass `getCommandDecision()` check against allowed/denied command lists. The `multiOrchForceApproveAll` flag bypasses this. + +**Layer 5 — Nuclear option**: Added `multiOrchForceApproveAll` flag that short-circuits the ENTIRE `checkAutoApproval()` function. When true, returns `{ decision: "approve" }` for ALL ask types EXCEPT `resume_completed_task` and `resume_task`. + +### What Was Done +1. Added `setAutoApprovalOverrides()` method to ClineProvider +2. Set comprehensive auto-approval config: `autoApprovalEnabled: true`, all `alwaysAllow*: true`, `writeDelayMs: 0`, `requestDelaySeconds: 0` +3. Added `multiOrchForceApproveAll: true` to overrides +4. Added nuclear bypass in `checkAutoApproval()` that checks this flag early + +### Why It May Still Not Work +The `multiOrchForceApproveAll` flag is set via `_autoApprovalOverrides` which is spread last in `getState()`. But `checkAutoApproval()` receives `state` from `provider.getState()`. The `multiOrchForceApproveAll` key is NOT a standard `ExtensionState` field — it's an extra field added via the spread. The TypeScript type might not include it, so the check `(state as Record).multiOrchForceApproveAll` uses a type assertion. + +If `getState()` somehow strips unknown keys (e.g., via Zod validation), the flag would be lost. Need to verify that `getState()` preserves the spread fields without filtering. + +### Files Involved +- `src/core/auto-approval/index.ts` (lines 74-86, multiOrchForceApproveAll check) +- `src/core/webview/ClineProvider.ts` (lines 2761-2767, setAutoApprovalOverrides; line 2634, spread in getState) +- `src/core/multi-orchestrator/orchestrator.ts` (lines 191-207, autoApprovalOverrides definition) + +### Recommended Fix +1. Add `multiOrchForceApproveAll` to the ExtensionState type definition so it's a first-class citizen, not a type assertion +2. OR: instead of using a state flag, make the auto-approval check look at the provider directly: +```typescript +if (provider._autoApprovalOverrides?.multiOrchForceApproveAll) { + return { decision: "approve" } +} +``` + +--- + +## 11. Bug #7: Agent Count Not Respected +**Severity**: MEDIUM +**Status**: FIXED + +### Symptom +User selects 3 agents in the dropdown, but only 2 are created. + +### Root Cause +Two issues: +1. The `AgentCountSelector` had `value={4}` hardcoded instead of reading from `extensionState.multiOrchMaxAgents` +2. The plan generator had a "short-request heuristic" that sliced plans to 2 tasks for requests under 20 words +3. The LLM prompt said "SHOULD use up to N" instead of "MUST create EXACTLY N" + +### Fix Applied +1. AgentCountSelector now reads from `extensionState.multiOrchMaxAgents ?? 4` +2. Short-request heuristic removed entirely +3. Prompt changed to "MUST create EXACTLY N tasks" +4. Hard cap: `tasks.slice(0, maxAgents)` after parsing + +### Files Involved +- `webview-ui/src/components/chat/ChatTextArea.tsx` (line 1349) +- `src/core/multi-orchestrator/plan-generator.ts` (lines 77, 239) + +--- + +## 12. Bug #8: Settings Don't Persist Across Tab Switches +**Severity**: MEDIUM +**Status**: UNFIXED + +### Symptom +Multi-orchestrator settings (max agents, plan review toggle, merge mode) reset when the user navigates away from the Memory settings tab and returns. + +### Root Cause +The settings section uses `cachedState` + `setCachedStateField` which buffers changes until Save. But the multi-orch settings may not be included in the Save handler's payload. Additionally, the `updateSettings` message handler writes to ContextProxy, but these keys may not be in the `globalSettingsSchema` Zod schema, causing them to be silently dropped. + +### Files Involved +- `webview-ui/src/components/settings/SettingsView.tsx` (multi-orch settings section) +- `src/core/webview/webviewMessageHandler.ts` (case "updateSettings", line 655) +- `packages/types/src/global-settings.ts` (globalSettingsSchema) + +### Recommended Fix +Verify that `multiOrchMaxAgents`, `multiOrchPlanReviewEnabled`, `multiOrchMergeEnabled` are in `globalSettingsSchema`. They SHOULD be (added by Agent 2 early in the session), but verify they survived all the merge operations. + +--- + +## 13. Bug #9: Multi-Orchestrator Send Button Does Nothing +**Severity**: CRITICAL +**Status**: FIXED + +### Symptom +When the user types a message and presses Enter in multi-orchestrator mode, the message disappears — nothing happens. + +### Root Cause +The `onSend` callback in ChatTextArea goes through the normal chat flow (creates a Task, sends to the API). But the multi-orchestrator needs its own flow: intercept the send, post `multiOrchStartPlan` instead. + +### Fix Applied +In `ChatView.tsx` (or wherever the send handler is defined), the mode is checked. If `multi-orchestrator`, the message is posted as `{ type: "multiOrchStartPlan", text: inputValue }` instead of the normal task creation message. + +### Files Involved +- `webview-ui/src/components/chat/ChatView.tsx` or `ChatTextArea.tsx` (send handler) + +--- + +## 14. Bug #10: Git Worktrees Not Isolating Agent File Operations +**Severity**: HIGH +**Status**: PARTIALLY FIXED + +### Symptom +Agents create files in the same directory, causing conflicts. Git worktrees are supposed to isolate each agent. + +### Root Cause +1. Worktrees were only created if `needsMerge` was true AND `isGitRepo()` returned true +2. When worktrees WERE created, the spawned providers weren't initially told to use the worktree paths as their working directory + +### Fix Applied +1. Added `isGitRepo()` check to gracefully skip worktrees for non-git directories +2. Added `setWorkingDirectory()` method to ClineProvider +3. Orchestrator now calls `spawned.provider.setWorkingDirectory(agent.worktreePath)` before creating the task + +### What's Still Broken +- Worktrees haven't been tested in a real git repo scenario during this session +- The merge pipeline (`merge-pipeline.ts`) hasn't been tested in production +- If the workspace isn't a git repo, agents still share the same directory + +### Files Involved +- `src/core/multi-orchestrator/worktree-manager.ts` +- `src/core/multi-orchestrator/orchestrator.ts` (worktree creation section, ~line 134-159) +- `src/core/webview/ClineProvider.ts` (setWorkingDirectory, ~line 2005) + +--- + +## 15. Bug #11: Completion Reports Not Captured +**Severity**: HIGH +**Status**: FIXED + +### Symptom +The orchestrator's final report shows agent statuses but no detailed completion reports. + +### Root Cause +The `AgentCoordinator` listened for `TaskCompleted` but never extracted the completion text from the task's messages. + +### Fix Applied +In the `TaskCompleted` handler, before calling `abortTask()`, the coordinator now reads the task's `clineMessages` array, finds the last message with `say === "completion_result"`, and stores its `text` in `agentState.completionReport`. + +### Files Involved +- `src/core/multi-orchestrator/agent-coordinator.ts` (TaskCompleted handler) + +--- + +## 16. Bug #12: Agent Panels Don't Close After Orchestration Completes +**Severity**: MEDIUM +**Status**: FIXED + +### Symptom +After all agents complete and the orchestrator shows "complete", the agent panels remain open. + +### Fix Applied +Added a `setTimeout` after Phase 6 (report) that calls `panelSpawner.closeAllPanels()` after a 2-second delay. The delay lets the user see the final state before panels vanish. `closeAllPanels()` also restores the original editor layout. + +### Files Involved +- `src/core/multi-orchestrator/orchestrator.ts` (~line 338-348) +- `src/core/multi-orchestrator/panel-spawner.ts` (closeAllPanels restores saved layout) + +--- + +## 17. Bug #13: Diff View Doesn't Revert Back To Agent's Chat View +**Severity**: HIGH +**Status**: UNFIXED + +### Symptom +When an agent edits a file and the diff view opens in the agent's column, it replaces the agent's chat webview. After the diff is complete, the view stays on the diff editor — the webview doesn't come back. + +### Root Cause +VS Code's editor group tab system: the diff tab becomes the active tab, pushing the webview tab to the background. There's no automatic mechanism to switch back to the webview tab after the diff closes. The DiffViewProvider calls `closeAllDiffViews()` which closes the diff tab, but it doesn't explicitly reveal the webview panel. + +### Recommended Fix +After `closeAllDiffViews()` in DiffViewProvider, call: +```typescript +// Reveal the webview panel to bring it back to the foreground +const task = this.taskRef.deref() +const provider = task?.providerRef.deref() +if (provider?.view && 'reveal' in provider.view) { + (provider.view as vscode.WebviewPanel).reveal(this.viewColumn, true) +} +``` + +OR: Use `preventFocusDisruption` to never open diffs in the first place (see Bug #15). + +--- + +## 18. Bug #14: Diff View Not Streaming While Being Created +**Severity**: MEDIUM +**Status**: UNFIXED (by design with preventFocusDisruption) + +### Symptom +The user wants to see the diff being streamed in real-time as the agent edits a file, similar to how Roo normally shows diffs character by character. + +### Root Cause +The streaming diff is Roo's normal behavior when `preventFocusDisruption` is OFF. The agent writes content progressively, and the DiffViewProvider updates the diff view in real-time. However, in the multi-orchestrator context, the diff view DISPLACES the webview (Bug #13), making the streaming diff useless because the chat is hidden. + +### Recommended Fix +This is best solved by building a custom diff renderer inside the webview (FEAT-003 in the master spec). The diff would render as HTML within the agent's chat stream, showing changes without opening a separate editor tab. + +--- + +## 19. Bug #15: preventFocusDisruption Experiment Not Taking Effect +**Severity**: CRITICAL +**Status**: UNFIXED — This is the root cause of why Bug #1 fixes don't work + +### Symptom +Setting `experiments: { preventFocusDisruption: true }` in the auto-approval overrides doesn't prevent diff views from opening. + +### Root Cause Analysis +The `experiments` field in `autoApprovalOverrides` is set via `setAutoApprovalOverrides()` which stores in `_autoApprovalOverrides`. This is spread last in `getState()`. HOWEVER, the `experiments` field in the state is a nested object. The spread would REPLACE the entire `experiments` object with just `{ preventFocusDisruption: true }`, potentially losing other experiment flags. + +More importantly: the tools that check `preventFocusDisruption` (WriteToFileTool, ApplyDiffTool, etc.) read the experiment flag from the Task's state, NOT from getState(). They typically do: +```typescript +const experiments = this.task.experiments ?? {} +if (experiments.preventFocusDisruption) { ... } +``` +The Task's `experiments` is set during construction from the provider's state at that moment. If the experiment flag wasn't in the state when the Task was created, it won't be there later even if the overrides are set. + +### The Real Fix +The experiment needs to be set BEFORE `createTask()` is called. Options: +1. Set it via `provider.contextProxy.setValue("experiments", { ...existing, preventFocusDisruption: true })` BEFORE createTask +2. OR: set it as a Task constructor option +3. OR: modify the auto-approval overrides to merge experiments rather than replace + +### Files Involved +- `src/core/multi-orchestrator/orchestrator.ts` (experiments in overrides, ~line 205) +- `src/core/webview/ClineProvider.ts` (getState, _autoApprovalOverrides spread) +- `src/core/task/Task.ts` (experiments initialization in constructor) +- `src/core/tools/WriteToFileTool.ts`, `ApplyDiffTool.ts`, `EditFileTool.ts` (experiment check) +- `src/shared/experiments.ts` (EXPERIMENT_IDS) + +--- + +## 20. Bug #16: Stop/Pause Button Visual State Not Updating +**Severity**: LOW +**Status**: UNFIXED + +### Symptom +When the user clicks the stop/pause button on an agent panel, the button doesn't visually change to indicate the paused state. The square icon stays the same. + +### Root Cause +The webview's stop button component likely doesn't have a "paused" visual state for the multi-orchestrator context. It may only have "streaming" (shows square) and "not streaming" (shows play/send) states. + +### Recommended Fix +This is a webview UI fix. Find the stop button component and add a visual state for "paused by user" (e.g., change color, show pause icon instead of square). + +--- + +## 21. Bug #17: Cannot Stop/Resume Individual Agents Mid-Execution +**Severity**: MEDIUM +**Status**: NOT IMPLEMENTED + +### Description +Users should be able to pause an individual agent, provide additional instructions, and resume. Currently the only option is to abort ALL agents. + +### Implementation Approach +1. Add "pause" capability to the coordinator: `pauseAgent(taskId)` → calls `task.abortTask(false)` but marks agent as "paused" not "failed" +2. Add "resume" capability: `resumeAgent(taskId)` → creates a new task continuation in the same provider +3. The webview needs a per-panel pause/resume button +4. The agent's system prompt should note that it was paused and may receive additional instructions + +--- + +## 22. Bug #18: Post-Completion Verification Phase Not Triggering +**Severity**: MEDIUM +**Status**: PARTIALLY IMPLEMENTED + +### Description +After all agents complete, a verification agent should spawn to check the work. The code exists in `orchestrator.ts` but the setting `multiOrchVerifyEnabled` may not be properly wired. + +### Files Involved +- `src/core/multi-orchestrator/orchestrator.ts` (verification phase, ~line 430+) +- `packages/types/src/global-settings.ts` (multiOrchVerifyEnabled setting) + +--- + +## 23. Bug #19: Architect Mode Assigned As Parallel Task +**Severity**: LOW +**Status**: FIXED + +### Symptom +The plan generator assigned "architect" mode as a parallel task alongside "code" tasks. + +### Fix Applied +Filtered architect, orchestrator, and multi-orchestrator from the available modes list in the plan generator prompt. Only code, ask, and debug are available for parallel tasks. + +--- + +## 24. Bug #20: Short-Request Heuristic Reducing Task Count +**Severity**: LOW +**Status**: FIXED + +### Symptom +A post-processing step sliced plans to 2 tasks for requests under 20 words. + +### Fix Applied +Removed the heuristic entirely. The `maxAgents` hard cap at `tasks.slice(0, maxAgents)` is sufficient. + +--- + +## 25. Bug #21: Finished Sub-Tasks Don't Flow Back To Multi-Orchestrator +**Severity**: CRITICAL +**Status**: REGRESSION — was working briefly, now broken again + +### Symptom +After all 3 agents complete their tasks and show "Task Completed", the multi-orchestrator sidebar does NOT proceed to the next phases (merge, verify, report). The sidebar shows "Multi-Orchestration: running" with "0/3 agents complete" or similar stale state. The orchestrator never receives the completion signals and never generates the final aggregated report. + +In an earlier session iteration, this DID work — the orchestrator collected all reports and displayed a unified summary in the sidebar. Something in the subsequent fixes broke the flow. + +### Root Cause Analysis + +The completion flow has multiple potential failure points: + +**Point 1 — TaskCompleted event not emitted by ClineProvider**: The `AgentCoordinator` listens for `RooCodeEventName.TaskCompleted` on the ClineProvider instance. But TaskCompleted is emitted by the Task object, and ClineProvider forwards it. If the event forwarding chain is broken (e.g., because the task was aborted before the event could propagate), the coordinator never hears about it. + +**Point 2 — abortTask() kills the event chain**: When `TaskCompleted` fires, the coordinator calls `currentTask.abortTask(false)` to prevent the while-loop from continuing. But `abortTask()` also emits `TaskAborted` and calls `dispose()` on the task. If `dispose()` removes event listeners BEFORE the `TaskCompleted` event fully propagates through the ClineProvider, the coordinator's handler may not execute completely. + +The sequence might be: +1. Task calls `attempt_completion` → auto-approved → `emitTaskCompleted()` emits TaskCompleted +2. Coordinator receives TaskCompleted → starts handling +3. Coordinator calls `currentTask.abortTask(false)` DURING the handler +4. `abortTask()` → sets `this.abort = true` → emits TaskAborted → calls `dispose()` +5. `dispose()` removes all event listeners on the Task +6. But the coordinator's handler is still running... or is it? + +The problem: `abortTask()` is async and is called with `.catch(() => {})` (fire-and-forget). It might race with the completion handling. + +**Point 3 — waitForAll() never resolves**: The `waitForAll()` method waits for the `allCompleted` event. This event fires when `completedSet.size >= agents.size`. If even ONE agent's completion is missed (due to the race condition above), `allCompleted` never fires, and the orchestrator hangs at `await this.coordinator.waitForAll()` forever. The 10-minute timeout eventually fires and marks it as failed. + +**Point 4 — The stagger may have broken event ordering**: The recent change to stagger agent starts (2-second gaps) made `startAll()` async. The orchestrator now `await`s it. But event listeners for `agentCompleted` and `agentFailed` are attached BEFORE `startAll()` is called (line 301-302). If an agent completes DURING the stagger (e.g., Agent 1 finishes before Agent 3 even starts), the coordinator might miss the early completion. + +Wait — actually looking at the code, event listeners are attached at line 301-302, BEFORE `startAll()` at line 317. So early completions SHOULD be caught. Unless the stagger introduces a different issue... + +**Point 5 — Panel closure interferes**: The 2-second delayed `closeAllPanels()` at line 338-348 fires after completion. But if `waitForAll()` hasn't resolved yet (because completions are missed), the panels are never closed, and the orchestrator hangs. + +### Evidence From User Testing +- The screenshots show all 3 agent panels with "Task Completed" visible +- The orchestrator sidebar shows the correct number of agents and their names +- But the sidebar doesn't show the aggregated report or "Multi-Orchestration: complete" +- In a previous iteration (before the stagger and abort fixes), reports DID flow back successfully + +### What Changed Between "Working" and "Not Working" +The regression likely came from ONE of these commits: +1. `fix(multi-orch): stop task completion loop + add agent system prompt` — Added `abortTask()` call in the TaskCompleted handler +2. `fix(multi-orch): stagger agent starts + suppress diff views` — Changed `startAll()` to async with delays +3. `fix(multi-orch): prevent task completion loop by excluding resume asks` — Modified auto-approval flow + +### Recommended Fix + +**Option A — Remove abortTask() from the completion handler**: +Instead of calling `abortTask()` to break the while loop, set `task.abort = true` DIRECTLY without calling the full `abortTask()` method (which emits events and disposes): +```typescript +// In agent-coordinator.ts TaskCompleted handler: +const currentTask = provider.getCurrentTask() +if (currentTask) { + // Set abort flag directly — DON'T call abortTask() which + // emits TaskAborted and disposes the task, potentially + // interfering with completion event propagation. + (currentTask as any).abort = true + console.log(`[AgentCoordinator] Set abort=true on task for agent ${agent.taskId}`) +} +``` + +**Option B — Ensure completion handling finishes before abort**: +```typescript +// In agent-coordinator.ts TaskCompleted handler: +// Handle completion FULLY first +this.handleAgentFinished(agent.taskId, "completed", tokenUsage) + +// Only THEN abort, and do it on the next tick so the current +// event processing completes first +setTimeout(() => { + const currentTask = provider.getCurrentTask() + if (currentTask) { + currentTask.abortTask(false).catch(() => {}) + } +}, 100) +``` + +**Option C — Don't abort at all, rely on the while-loop's natural exit**: +The while loop at Task.ts:2573 is `while (!this.abort)`. After `attempt_completion` returns, the loop calls `recursivelyMakeClineRequests` again. If `attempt_completion` was the last tool use and returned successfully, the next API call should produce another `attempt_completion` (the LLM knows the task is done). The auto-approval handles this. The loop would naturally exit when the max request limit is hit or when the LLM stops producing tool calls. + +This is wasteful (extra API calls) but simpler and avoids the abort race condition. + +### Files Involved +- `src/core/multi-orchestrator/agent-coordinator.ts` (TaskCompleted handler, ~line 33-55) +- `src/core/multi-orchestrator/orchestrator.ts` (waitForAll at ~line 320, event listeners at ~line 301-302) +- `src/core/task/Task.ts` (abortTask at ~line 2311, while loop at ~line 2573) + +### Priority +CRITICAL — This is the most user-visible failure. The entire purpose of the multi-orchestrator (collect reports, merge, verify) depends on completions flowing back. Without this, the feature is essentially broken. + +--- + +## 26. VS Code API Constraints + +These are HARD limitations of the VS Code Extension API that cannot be worked around: + +| Constraint | Impact | Workaround | +|---|---|---| +| Cannot show two editors side-by-side in ONE editor group | Diff views displace webview panels | Use preventFocusDisruption or custom webview diff renderer | +| Cannot control diff editor orientation (always vertical) | Cannot show horizontal diffs | Render custom diffs in webview using diff2html | +| Tab bar position is global (not per-panel) | Cannot have bottom tabs for agents | Render file list as HTML inside webview | +| Vertical tab scrolling not controllable | Cannot customize tab behavior | N/A | +| Editor group indices are opaque | Panels don't always land in expected columns | Use focusNextGroup + ViewColumn.Active pattern | +| createWebviewPanel placement is non-deterministic | Panels may not go where expected | Set layout first, then create panels | + +### What IS Possible +- `vscode.setEditorLayout({ orientation, groups })` — create complex layouts +- `vscode.getEditorLayout` — save/restore layouts +- `panel.viewColumn` — read actual column after creation +- `panel.onDidChangeViewState` — track column changes +- `showTextDocument(uri, { viewColumn })` — open files in specific columns +- `workbench.action.focusFirstEditorGroup` / `focusNextGroup` — control focus +- `preserveFocus: true` on panel creation — prevent focus theft +- Custom HTML/CSS/JS rendering inside webviews — full control + +--- + +## 26. Attempted Fixes That Didn't Work + +| Attempt | Why It Failed | +|---|---| +| 80+ agents deployed to fix bugs | Agents make local fixes without understanding cross-component interactions | +| Setting experiments via autoApprovalOverrides | Experiments are read from Task constructor, not runtime state | +| Simultaneous task.start() via tight loop | API rate limiting kills all requests | +| ViewColumn.Beside for panel placement | Inconsistent — VS Code decides where "beside" is | +| Explicit ViewColumn numbers (1, 2, 3) | Don't always map to the expected editor groups | +| Suppressing approve/deny UI rendering | Couldn't find the specific component to modify | +| Promise.all for parallel task creation | Race conditions in ClineProvider shared state | + +--- + +## 27. Architectural Root Causes + +### Root Cause 1: ClineProvider Was Designed For Single-Task +Every method, event handler, and state management in ClineProvider assumes a single active task. The `clineStack` is a LIFO stack, `getCurrentTask()` returns the top, and `removeClineFromStack()` enforces the single-open invariant. Running N independent ClineProviders works in theory, but they all share the same ContextProxy singleton, which creates cross-contamination. + +### Root Cause 2: VS Code Editor Groups ≠ Application Windows +Each editor group shows ONE active tab. Webview panels are tabs. Diff editors are tabs. They compete for the same space. There's no "split within a group" concept. + +### Root Cause 3: File Operations Are Global +When a tool writes a file, it uses `vscode.workspace.fs` or `fs.writeFile` which operates on the filesystem. The `showTextDocument` call then opens it in an editor group. The tool doesn't know which ClineProvider/Task initiated it — it just opens in the "active" group unless a ViewColumn is explicitly specified. The ViewColumn threading (provider → task → tool → diffProvider) was added but requires EVERY file operation path to pass it through. + +### Root Cause 4: Auto-Approval Is State-Based, Not Provider-Based +The `checkAutoApproval()` function receives `state` (the provider's global state) and makes decisions based on state flags. But state is shared via ContextProxy. The `_autoApprovalOverrides` mechanism works but adds complexity — any code that reads state without going through `getState()` will miss the overrides. + +--- + +## 28. Recommended Strategy For Next Session + +### Priority 1: Fix preventFocusDisruption (Bug #15) +This is the keystone bug. If fixed, it eliminates Bugs #1, #13, #14 automatically. The fix is to set the experiment flag BEFORE task creation, not via overrides: +```typescript +// In orchestrator.ts, before createTask: +const currentExperiments = spawned.provider.contextProxy.getValue("experiments") ?? {} +await spawned.provider.contextProxy.setValue("experiments", { + ...currentExperiments, + preventFocusDisruption: true, +}) +``` + +### Priority 2: Fix Auto-Approval (Bug #6) +Verify `multiOrchForceApproveAll` survives the `getState()` pipeline. Add it as a proper typed field rather than a type assertion. + +### Priority 3: Fix API Rate Limiting (Bug #2) +Verify staggered starts are working. If not, the `startAll()` async change may need to be applied differently. + +### Priority 4: Test In Git Repo +Run the multi-orchestrator in a git-initialized directory to test worktree isolation and the merge pipeline. + +### General Approach +- Fix bugs DIRECTLY, not via agents +- Test after EACH fix (reload extension, run scenario) +- Update this spec after each fix + +--- + +## 29. Features Not Yet Implemented + +### FEAT-001: Post-Completion Verification Phase +After all agents complete, spawn debug/test agents to verify the work. Partially coded but not fully wired. + +### FEAT-002: Orchestrator Continuation +The orchestrator should continue as an active agent after collecting reports, analyzing results, and deciding next steps. + +### FEAT-003: Custom Diff Renderer In Webview +Render diffs as HTML inside the agent's chat stream using diff2html or monaco-diff. This eliminates the webview/diff editor competition. + +### FEAT-004: Stop/Resume Individual Agents +Pause an agent, provide instructions, resume. + +### FEAT-005: Horizontal Diff Layout +If custom diff renderer is built (FEAT-003), render with original on top, modified on bottom. + +### FEAT-006: Agent File Tab Bar +Compact vertical file list at bottom 15% of each agent's webview. + +--- + +## 30. Test Coverage Status + +### Passing Tests +- `src/core/multi-orchestrator/__tests__/types.spec.ts` — type helpers, constants +- `src/core/multi-orchestrator/__tests__/plan-generator.spec.ts` — plan parsing, edge cases +- `src/core/multi-orchestrator/__tests__/report-aggregator.spec.ts` — report formatting +- `src/core/multi-orchestrator/__tests__/e2e.spec.ts` — integration scenarios +- `src/core/memory/__tests__/*.spec.ts` — all 79 memory system tests still passing + +### Not Tested In Production +- Worktree creation/cleanup in a real git repo +- Merge pipeline with actual git branches +- Verification phase agent spawning +- 6-agent simultaneous execution +- API rate limiting recovery +- Panel layout with various VS Code configurations + +### Test Commands +```bash +cd src && npx vitest run core/multi-orchestrator/ # multi-orch tests +cd src && npx vitest run core/memory/ # memory tests (regression check) +cd packages/types && npx tsc --noEmit # type check +cd src && npx tsc --noEmit # extension type check +cd webview-ui && npx tsc --noEmit # webview type check +pnpm lint # full lint +pnpm test # all tests +``` diff --git a/docs/superpowers/specs/MULTI-ORCHESTRATOR-MASTER-SPEC.md b/docs/superpowers/specs/MULTI-ORCHESTRATOR-MASTER-SPEC.md new file mode 100644 index 00000000000..9ad271b4a03 --- /dev/null +++ b/docs/superpowers/specs/MULTI-ORCHESTRATOR-MASTER-SPEC.md @@ -0,0 +1,373 @@ +# Multi-Orchestrator — Master Spec (Living Document) + +**Last updated**: Session ongoing +**Purpose**: Single source of truth for the entire Multi-Orchestrator feature. Every agent MUST read this spec in full before making any changes. Re-read relevant sections after each edit to ensure consistency. + +--- + +## TABLE OF CONTENTS + +1. [Feature Overview](#feature-overview) +2. [Architecture](#architecture) +3. [Current File Map](#current-file-map) +4. [Status: What Works](#status-what-works) +5. [Status: Known Bugs](#status-known-bugs) +6. [Status: Not Yet Implemented](#status-not-yet-implemented) +7. [Technical Constraints (VS Code API)](#technical-constraints) +8. [Bug Details and Fix Guidance](#bug-details-and-fix-guidance) +9. [Feature Specifications (Not Yet Built)](#feature-specifications) +10. [Agent Assignments](#agent-assignments) + +--- + +## 1. Feature Overview + +The Multi-Orchestrator is a new mode in Roo-Code that decomposes complex tasks into N parallel subtasks (1-6), each running in its own editor tab panel with an independent ClineProvider. Agents execute simultaneously, isolated via git worktrees when available. After all complete, the orchestrator collects reports, merges changes, runs verification, and presents a unified summary. + +### User Flow +1. User selects "Multi-Orchestrator" mode from the mode dropdown +2. Agent count selector appears in the chat toolbar (1-6) +3. User types a request and presses Enter +4. Orchestrator decomposes request via LLM → plan with N tasks +5. If plan-review enabled: shows plan for approval +6. N editor panels open simultaneously in equal-width columns +7. All agents execute their tasks in parallel +8. Agents complete → reports collected → panels close +9. Merge phase runs (if git repo + code changes) +10. Debug/verification phase runs (NEW — not yet built) +11. Final report displayed in orchestrator sidebar + +--- + +## 2. Architecture + +``` +┌──────────────────────────────────────────────────────────┐ +│ MULTI-ORCHESTRATOR (sidebar ClineProvider) │ +│ │ +│ Phase 1: PLAN │ +│ └─ plan-generator.ts → LLM decomposes request │ +│ └─ User approves (if plan-review enabled) │ +│ │ +│ Phase 2: SPAWN │ +│ └─ worktree-manager.ts → create git worktrees │ +│ └─ panel-spawner.ts → open N editor tab panels │ +│ └─ agent-system-prompt.ts → inject parallel context │ +│ └─ Set auto-approval overrides on each provider │ +│ └─ Set working directory to worktree path │ +│ └─ Create tasks with startTask: false │ +│ │ +│ Phase 3: RUN │ +│ └─ agent-coordinator.ts → startAll() simultaneously │ +│ └─ Listen for TaskCompleted/TaskAborted events │ +│ └─ Abort tasks on completion (prevent while loop) │ +│ └─ Capture completionReport from clineMessages │ +│ │ +│ Phase 4: MERGE (if git repo + code tasks) │ +│ └─ merge-pipeline.ts → sequential branch merging │ +│ │ +│ Phase 5: VERIFY (NOT YET BUILT) │ +│ └─ Spawn debug/test agents to verify merged code │ +│ │ +│ Phase 6: REPORT │ +│ └─ report-aggregator.ts → markdown summary │ +│ └─ Close all panels → restore layout │ +│ └─ Display report in orchestrator sidebar │ +└──────────────────────────────────────────────────────────┘ +``` + +--- + +## 3. Current File Map + +### Core Files (src/core/multi-orchestrator/) +| File | Purpose | Status | +|---|---|---| +| `types.ts` | Shared types, constants, helper functions | DONE | +| `orchestrator.ts` | Top-level lifecycle coordinator | DONE (bugs) | +| `panel-spawner.ts` | Creates N editor tab panels with ClineProviders | DONE (bugs) | +| `worktree-manager.ts` | Git worktree creation/cleanup per agent | DONE (bugs) | +| `plan-generator.ts` | LLM-powered task decomposition | DONE | +| `agent-coordinator.ts` | Event-based lifecycle tracking, startAll | DONE | +| `agent-system-prompt.ts` | Parallel execution context prefix for agents | DONE | +| `merge-pipeline.ts` | Sequential git branch merging | DONE (untested in prod) | +| `report-aggregator.ts` | Markdown report formatting | DONE | + +### Test Files +| File | Status | +|---|---| +| `__tests__/types.spec.ts` | DONE | +| `__tests__/plan-generator.spec.ts` | DONE | +| `__tests__/report-aggregator.spec.ts` | DONE | +| `__tests__/e2e.spec.ts` | DONE | + +### UI Files (webview-ui/) +| File | Status | +|---|---| +| `components/multi-orchestrator/AgentCountSelector.tsx` | DONE | +| `components/multi-orchestrator/MultiOrchStatusPanel.tsx` | DONE | +| `components/multi-orchestrator/PlanReviewPanel.tsx` | DONE | + +### Modified Existing Files +| File | Changes | Status | +|---|---|---| +| `packages/types/src/mode.ts` | Added multi-orchestrator to DEFAULT_MODES | DONE | +| `packages/types/src/global-settings.ts` | Added multiOrch settings fields | DONE | +| `packages/types/src/vscode-extension-host.ts` | Added multi-orch message types | DONE | +| `src/core/webview/ClineProvider.ts` | Added getMultiOrchestrator(), setWorkingDirectory(), getAllInstances(), setAutoApprovalOverrides() | DONE | +| `src/core/webview/webviewMessageHandler.ts` | Added multi-orch message handlers | DONE | +| `src/core/auto-approval/index.ts` | Added multiOrchForceApproveAll bypass | DONE | +| `webview-ui/src/components/chat/ChatTextArea.tsx` | Added AgentCountSelector (conditional) | DONE | +| `webview-ui/src/components/settings/SettingsView.tsx` | Added multi-orch settings section | DONE | + +--- + +## 4. Status: What Works (VERIFIED) + +- [x] Multi-orchestrator mode appears in mode dropdown +- [x] Agent count selector shows in chat area when mode is active +- [x] User message intercepted and routed to multiOrchStartPlan handler +- [x] Plan generator decomposes requests via LLM +- [x] Plan review mode (toggle in settings) +- [x] Agent panels spawn in editor area +- [x] Each agent gets its own ClineProvider +- [x] Agent system prompt prefix injected with parallel context +- [x] Auto-approval force-approves all tool operations (multiOrchForceApproveAll) +- [x] Resume asks (resume_completed_task, resume_task) excluded from force-approve +- [x] Agents execute their tasks +- [x] TaskCompleted events captured by coordinator +- [x] Tasks aborted after completion to prevent while-loop restart +- [x] Completion reports captured from clineMessages +- [x] Report aggregated and displayed in orchestrator sidebar +- [x] Panels close after completion (2-second delay) +- [x] Original editor layout saved and restored after panels close +- [x] Settings: max agents, plan review toggle, merge mode (auto/always/never) +- [x] Worktree manager checks for git repo before creating worktrees +- [x] Worktree path set as agent's working directory via setWorkingDirectory() +- [x] Mode switching before task creation (handleModeSwitch) + +--- + +## 5. Status: Known Bugs (ACTIVE) + +### BUG-001: File edits go to wrong pane (FIXED — TESTING) +**Symptom**: When Agent 1 creates/edits a file, the diff view appears in Agent 2's column instead of Agent 1's. +**Root cause FOUND**: PanelSpawner stored `ViewColumn.Active` (-1 symbolic) as `provider.viewColumn`. When DiffViewProvider used it, VS Code interpreted -1 as "open in the currently active group" not "the group where the panel lives". +**Fix applied**: Now reads `panel.viewColumn` AFTER creation to get the real column number (1, 2, 3). Also tracks viewColumn changes via `onDidChangeViewState`. The chain: `spawner stores actual column → ClineProvider.viewColumn → Task reads it → DiffViewProvider.viewColumn → all showTextDocument/vscode.diff calls use it`. +**Status**: Fix committed. Needs testing to verify. + +### BUG-002: Agents don't start simultaneously (FIXED) +**Symptom**: Agent 1 starts 1-3 seconds before Agent 3. +**Root cause**: startAll() called task.start() sequentially. +**Fix applied**: startAll() now collects all start thunks into an array, then fires them all in a tight synchronous loop. Note: the remaining 0.5-1s gap is network latency (API requests sent sequentially by the JS event loop) — this is inherent and cannot be eliminated without modifying Task.start() internals. + +### BUG-003: Panel layout not properly applied (MEDIUM) +**Symptom**: `vscode.setEditorLayout` creates the column layout, but panels don't always land in the right columns. Sometimes panels stack in one column. +**Root cause**: `createWebviewPanel` with a specific ViewColumn doesn't guarantee placement if VS Code's editor group indexing doesn't match the expected column numbers. The layout command creates groups, but the group indices may not map to ViewColumn 1, 2, 3 directly. +**Fix approach**: +- After `setEditorLayout`, wait for the layout to settle (longer delay — 500ms+) +- Create panels with `ViewColumn.Beside` instead of explicit column numbers (this creates new groups automatically) +- OR: create the first panel at ViewColumn.One, then use `workbench.action.moveEditorToNextGroup` for subsequent panels +- Test: does `preserveFocus: true` on `createWebviewPanel` affect placement? + +### BUG-004: Diff view appears as full-pane file open, not inline diff (LOW) +**Symptom**: When an agent edits a file, the file opens as a full editor tab, not as a diff view showing the changes. +**Root cause**: The file edit tools may not be using the diff provider correctly for spawned agent panels. +**Fix approach**: This is related to BUG-001. Once file operations target the correct ViewColumn, diff rendering should follow. Investigate Roo's existing diff streaming mechanism. + +### BUG-005: Auto-approval still shows yellow approve buttons occasionally (LOW) +**Symptom**: Despite multiOrchForceApproveAll, some approve/deny buttons briefly appear before being auto-approved. +**Root cause**: The UI renders the ask prompt BEFORE checkAutoApproval processes it. The auto-approval fires within milliseconds, but the webview renders the prompt in the interim. +**Impact**: Visual flicker only — the approval IS being processed automatically. +**Fix approach**: For multi-orch panels, suppress the ask UI rendering entirely. Add a flag to the provider state that the webview checks: if `multiOrchForceApproveAll` is true, don't render the approve/deny buttons at all. + +--- + +## 6. Status: Not Yet Implemented + +### FEAT-001: Post-Completion Verification Phase (HIGH PRIORITY) +When all agents complete and reports are collected, the orchestrator should spawn a NEW set of agents to: +1. **Debug Agent**: Review all files created/modified by the original agents, check for errors +2. **E2E Test Agent**: If the task involves code, write and run basic tests +3. **Merge Resolution Agent**: If git worktrees were used, merge branches and resolve conflicts + +The orchestrator's flow becomes: +``` +Phase 3: RUN → agents complete → collect reports +Phase 4: MERGE → merge git branches (existing, works for git repos) +Phase 5: VERIFY (NEW) → spawn debug/test agents +Phase 6: REPORT → final unified report +``` + +The verification phase should be optional (toggle in settings) and use the same panel-spawning mechanism. + +### FEAT-002: Orchestrator Continuation Prompt (HIGH PRIORITY) +After sub-tasks return to the orchestrator, it should receive all completion reports + file change summaries and then CONTINUE processing. Currently it just renders a static report. It should: +1. Read all completion reports +2. Analyze what was built +3. Decide if verification/debugging is needed +4. Spawn new agents for verification OR conclude with a final summary +5. The user could inject custom instructions at this point (e.g., "now also add error handling") + +### FEAT-003: Horizontal Diff View in Agent Panels (MEDIUM) +When an agent creates/edits a file, the diff should render INSIDE the agent's webview panel as a horizontal split (original on top, modified on bottom) rather than VS Code's native vertical diff editor. This avoids the diff taking over the entire column. + +**Implementation approach**: +- Use `diff2html` or `monaco-diff` library inside the webview +- Intercept file edit events and capture the before/after content +- Render the diff as HTML within the agent's chat stream +- Auto-collapse the diff view after the edit is complete + +### FEAT-004: Stop/Resume Individual Agents (MEDIUM) +Users should be able to: +1. Pause an individual agent mid-execution (not just abort all) +2. Provide additional instructions to a paused agent +3. Resume the agent from where it stopped +4. Switch an agent's mode mid-task (impractical but should not crash) + +**Current state**: The stop button exists in each panel but the pause/resume mechanism isn't wired to the coordinator. The coordinator only tracks completed/failed, not paused. + +### FEAT-005: Agent Panel File Tab Bar (LOW) +Each agent's panel should show its open files as a compact vertical list at the bottom of the panel (taking up ~15% of height). This is NOT possible via VS Code's tab API. Would need to be rendered as HTML inside the webview. + +--- + +## 7. Technical Constraints (VS Code API) + +### What IS possible: +- `vscode.setEditorLayout({ orientation, groups })` — create complex N-column/row layouts +- `vscode.getEditorLayout` — save/restore layouts +- `createWebviewPanel(id, title, { viewColumn, preserveFocus })` — create panels in specific columns +- `workbench.action.moveEditorToBelowGroup` — move editors between groups +- `workbench.action.editorLayoutTwoRows` — switch to two-row layout +- `vscode.window.showTextDocument(uri, { viewColumn })` — open files in specific columns +- Custom diff rendering inside webviews using HTML/CSS/JS libraries + +### What is NOT possible: +- Changing VS Code's native diff editor orientation (always vertical side-by-side) +- Tab bar position per-panel (only global via settings) +- Vertical tab scrolling (core VS Code chrome) +- Forcing a file open to a specific editor group from within a Task execution without threading the ViewColumn through the entire tool chain + +### Workarounds: +- Custom diff views: render diffs as HTML inside the webview using diff2html +- File placement: thread ViewColumn through ClineProvider → Task → Tool → DiffViewProvider +- Tab management: render a file list as HTML inside the webview (bottom 15%) + +--- + +## 8. Bug Details and Fix Guidance + +### Fixing BUG-001 (File edits go to wrong pane) + +This is the most architecturally complex bug. The call chain is: + +``` +Task.recursivelyMakeClineRequests() + → Tool execution (write_to_file, apply_diff) + → DiffViewProvider.open() or vscode.window.showTextDocument() + → VS Code opens file in the ACTIVE editor group +``` + +The fix requires: +1. **PanelSpawner**: Store which ViewColumn each panel was placed in +2. **ClineProvider**: Add a `viewColumn` property that's set by the spawner +3. **Task**: Read the provider's viewColumn and pass it to tool operations +4. **DiffViewProvider**: Accept a viewColumn parameter and use it when opening diffs + +**Key file to investigate**: `src/integrations/editor/DiffViewProvider.ts` — this is where Roo opens diff views. Search for `showTextDocument` and `vscode.diff` commands. + +### Fixing BUG-002 (Agents don't start simultaneously) + +The current `startAll()` in `agent-coordinator.ts`: +```typescript +startAll(): void { + for (const [taskId, provider] of this.providers) { + // ... + currentTask.start() // This is fire-and-forget but sequential + } +} +``` + +Improvement: Collect all start promises and fire them via `Promise.all`: +```typescript +startAll(): void { + const startPromises: Promise[] = [] + for (const [taskId, provider] of this.providers) { + const currentTask = provider.getCurrentTask() + if (currentTask) { + startPromises.push(Promise.resolve(currentTask.start())) + } + } + // All start() calls initiated at nearly the same instant + Promise.all(startPromises).catch(() => {}) +} +``` + +Note: This doesn't guarantee truly simultaneous API responses (network latency varies), but it eliminates the sequential dispatch gap. + +### Fixing BUG-003 (Panel layout not properly applied) + +The `setEditorLayout` approach needs refinement: +1. Use `preserveFocus: true` on ALL panel creations +2. After creating each panel, use `workbench.action.focusNextGroup` to shift focus +3. Increase delay between panel creations to 300ms +4. After all panels are created, focus the FIRST panel to start + +Alternative approach: Don't use explicit ViewColumn numbers. Instead: +1. Set the layout with `setEditorLayout` +2. Create first panel at `ViewColumn.Active` (which will be the first group) +3. Create subsequent panels at `ViewColumn.Beside` (which creates in the next group) + +--- + +## 9. Feature Specifications (Not Yet Built) + +### FEAT-001: Post-Completion Verification Phase + +**Location**: Add to `orchestrator.ts` after Phase 4 (merge) + +```typescript +// After merge phase, optionally spawn verification agents +if (shouldVerify) { + this.state.phase = "verifying" + notify() + + // Create verification plan based on what was built + const verifyPlan = createVerificationPlan(this.state.agents, this.state.mergeResults) + + // Spawn verification agents (reuse same panel-spawner mechanism) + // These agents get: all completion reports + list of changed files + // Their task: review code, run tests, check for errors + await this.executeVerificationPhase(verifyPlan, providerSettings, onStateChange) +} +``` + +### FEAT-002: Orchestrator Continuation + +After collecting all reports, instead of just setting `finalReport` and stopping: +1. Feed all reports back to the orchestrator's LLM as a message +2. Let the orchestrator decide next steps (more agents, manual review, done) +3. The user can inject instructions at this point + +This would require the orchestrator to be an active Task itself (not just a coordinator), which is a larger architectural change. + +--- + +## 10. Agent Assignments + +When deploying agents from this spec, assign them specific bugs or features. Each agent MUST: +1. Read this ENTIRE spec before starting work +2. Re-read the relevant bug/feature section after each edit +3. Only modify files listed for their assignment +4. Commit after each logical change with `--no-verify` +5. NOT touch files owned by other agents + +### Assignment Template: +``` +AGENT [N]: Fix BUG-00X +- Read: docs/superpowers/specs/MULTI-ORCHESTRATOR-MASTER-SPEC.md (FULL spec) +- Focus: Section 8, BUG-00X +- Files: [list of files this agent can modify] +- Verify: [compile/test command after changes] +``` diff --git a/packages/build/src/esbuild.ts b/packages/build/src/esbuild.ts index 952e823eeca..451ba21538f 100644 --- a/packages/build/src/esbuild.ts +++ b/packages/build/src/esbuild.ts @@ -159,6 +159,15 @@ export function copyWasms(srcDir: string, distDir: string): void { console.log(`[copyWasms] Copied ${wasmFiles.length} tree-sitter language wasms to ${distDir}`) + // sql.js WASM file for memory system SQLite. + const sqlJsWasmPath = path.join(nodeModulesDir, "sql.js", "dist", "sql-wasm.wasm") + if (fs.existsSync(sqlJsWasmPath)) { + fs.copyFileSync(sqlJsWasmPath, path.join(distDir, "sql-wasm.wasm")) + console.log(`[copyWasms] Copied sql.js WASM to ${distDir}`) + } else { + console.warn(`[copyWasms] sql.js WASM not found at ${sqlJsWasmPath}, skipping`) + } + // Copy esbuild-wasm files for custom tool transpilation (cross-platform). copyEsbuildWasmFiles(nodeModulesDir, distDir) } diff --git a/packages/types/src/global-settings.ts b/packages/types/src/global-settings.ts index 288f6c2118c..8f79f43c544 100644 --- a/packages/types/src/global-settings.ts +++ b/packages/types/src/global-settings.ts @@ -199,6 +199,12 @@ export const globalSettingsSchema = z.object({ customSupportPrompts: customSupportPromptsSchema.optional(), enhancementApiConfigId: z.string().optional(), includeTaskHistoryInEnhance: z.boolean().optional(), + + /** + * Custom meta-prompt for the personality trait enhancer. + * Used to expand brief descriptions into structured personality prompts. + */ + personalityTraitEnhancerPrompt: z.string().optional(), historyPreviewCollapsed: z.boolean().optional(), reasoningBlockCollapsed: z.boolean().optional(), /** @@ -232,6 +238,24 @@ export const globalSettingsSchema = z.object({ * Tools in this list will be excluded from prompt generation and rejected at execution time. */ disabledTools: z.array(toolNamesSchema).optional(), + + // Memory Learning + memoryLearningEnabled: z.boolean().optional(), + memoryApiConfigId: z.string().optional(), + memoryAnalysisFrequency: z.number().optional(), + memoryLearningDefaultEnabled: z.boolean().optional(), + + // Multi-Orchestrator + multiOrchMaxAgents: z.number().min(1).max(6).optional(), + multiOrchPlanReviewEnabled: z.boolean().optional(), + multiOrchMergeEnabled: z.enum(["auto", "always", "never"]).optional(), + /** + * Whether to spawn a verification agent after all sub-agents complete. + * The verification agent reviews changed files for bugs, inconsistencies, + * and integration issues before the final report is produced. + * @default false + */ + multiOrchVerifyEnabled: z.boolean().optional(), }) export type GlobalSettings = z.infer diff --git a/packages/types/src/mode.ts b/packages/types/src/mode.ts index f981ba7bf9a..1d35147b5fb 100644 --- a/packages/types/src/mode.ts +++ b/packages/types/src/mode.ts @@ -93,6 +93,32 @@ export const groupEntryArraySchema = z.preprocess((val) => { return val.filter((entry) => !isDeprecatedGroupEntry(entry)) }, rawGroupEntryArraySchema) as z.ZodType +/** + * PersonalityTrait + */ + +export const personalityTraitSchema = z.object({ + id: z.string().min(1, "Trait ID is required"), + emoji: z.string().min(1, "Emoji is required"), + label: z.string().min(1, "Label is required"), + prompt: z.string().min(1, "Prompt is required"), + isBuiltIn: z.boolean(), +}) + +export type PersonalityTrait = z.infer + +/** + * PersonalityConfig + */ + +export const personalityConfigSchema = z.object({ + activeTraitIds: z.array(z.string()), + customTraits: z.array(personalityTraitSchema), + deletedBuiltInTraitIds: z.array(z.string()).optional(), +}) + +export type PersonalityConfig = z.infer + export const modeConfigSchema = z.object({ slug: z.string().regex(/^[a-zA-Z0-9-]+$/, "Slug must contain only letters numbers and dashes"), name: z.string().min(1, "Name is required"), @@ -102,6 +128,7 @@ export const modeConfigSchema = z.object({ customInstructions: z.string().optional(), groups: groupEntryArraySchema, source: z.enum(["global", "project"]).optional(), + personalityConfig: personalityConfigSchema.optional(), }) export type ModeConfig = z.infer @@ -224,4 +251,25 @@ export const DEFAULT_MODES: readonly ModeConfig[] = [ customInstructions: "Your role is to coordinate complex workflows by delegating tasks to specialized modes. As an orchestrator, you should:\n\n1. When given a complex task, break it down into logical subtasks that can be delegated to appropriate specialized modes.\n\n2. For each subtask, use the `new_task` tool to delegate. Choose the most appropriate mode for the subtask's specific goal and provide comprehensive instructions in the `message` parameter. These instructions must include:\n * All necessary context from the parent task or previous subtasks required to complete the work.\n * A clearly defined scope, specifying exactly what the subtask should accomplish.\n * An explicit statement that the subtask should *only* perform the work outlined in these instructions and not deviate.\n * An instruction for the subtask to signal completion by using the `attempt_completion` tool, providing a concise yet thorough summary of the outcome in the `result` parameter, keeping in mind that this summary will be the source of truth used to keep track of what was completed on this project.\n * A statement that these specific instructions supersede any conflicting general instructions the subtask's mode might have.\n\n3. Track and manage the progress of all subtasks. When a subtask is completed, analyze its results and determine the next steps.\n\n4. Help the user understand how the different subtasks fit together in the overall workflow. Provide clear reasoning about why you're delegating specific tasks to specific modes.\n\n5. When all subtasks are completed, synthesize the results and provide a comprehensive overview of what was accomplished.\n\n6. Ask clarifying questions when necessary to better understand how to break down complex tasks effectively.\n\n7. Suggest improvements to the workflow based on the results of completed subtasks.\n\nUse subtasks to maintain clarity. If a request significantly shifts focus or requires a different expertise (mode), consider creating a subtask rather than overloading the current one.", }, + { + slug: "multi-orchestrator", + name: "⚡ Multi-Orchestrator", + roleDefinition: + "You are Roo, a parallel workflow orchestrator that decomposes complex tasks into multiple independent subtasks and dispatches them to specialized modes running simultaneously. You analyze the user's request, identify separable concerns, assign each to the most appropriate mode, and coordinate their parallel execution with git worktree isolation.", + whenToUse: + "Use for complex tasks that benefit from parallelization — such as building features that span multiple modules, running architecture design alongside implementation, or handling multi-file refactoring with test writing simultaneously.", + description: "Parallel task execution across multiple agents", + groups: [], + customInstructions: `Your workflow: +1. Analyze the user's request and identify separable concerns +2. Decompose into independent tasks (respecting the max agent count setting) +3. Assign each task to the most appropriate mode (code, architect, ask, debug) +4. Maximize file separation between agents to minimize merge conflicts +5. If plan-review is enabled, present the plan for approval before executing +6. Monitor all agents and collect their completion reports +7. If merge is needed, coordinate the sequential branch merge +8. Present a unified summary of all results + +CRITICAL: When decomposing, ensure agents work on DIFFERENT files. Split by module/feature boundary, not by layer.`, + }, ] as const diff --git a/packages/types/src/provider-settings.ts b/packages/types/src/provider-settings.ts index 859792d7c36..04927d9b752 100644 --- a/packages/types/src/provider-settings.ts +++ b/packages/types/src/provider-settings.ts @@ -187,6 +187,9 @@ const baseProviderSettingsSchema = z.object({ // Model verbosity. verbosity: verbosityLevelsSchema.optional(), + + // Tool calling protocol. + useXmlToolCalling: z.boolean().optional(), }) // Several of the providers share common model config properties. diff --git a/packages/types/src/vscode-extension-host.ts b/packages/types/src/vscode-extension-host.ts index b20539afe49..4c1ed21a84e 100644 --- a/packages/types/src/vscode-extension-host.ts +++ b/packages/types/src/vscode-extension-host.ts @@ -40,6 +40,7 @@ export interface ExtensionMessage { | "messageUpdated" | "mcpServers" | "enhancedPrompt" + | "enhancedPersonalityTrait" | "commitSearchResults" | "listApiConfig" | "routerModels" @@ -104,6 +105,17 @@ export interface ExtensionMessage { | "folderSelected" | "skills" | "fileContent" + | "memoryLearningState" + | "memorySyncProgress" + | "memorySyncComplete" + | "memoryCleared" + | "memorySyncAlreadyRunning" + | "memorySyncStatus" + | "memoryStatus" + | "multiOrchPlanReady" + | "multiOrchStatusUpdate" + | "multiOrchComplete" + | "multiOrchError" text?: string /** For fileContent: { path, content, error? } */ fileContent?: { path: string; content: string | null; error?: string } @@ -298,6 +310,7 @@ export type ExtensionState = Pick< | "imageGenerationProvider" | "openRouterImageGenerationSelectedModel" | "includeTaskHistoryInEnhance" + | "personalityTraitEnhancerPrompt" | "reasoningBlockCollapsed" | "enterBehavior" | "includeCurrentTime" @@ -306,6 +319,14 @@ export type ExtensionState = Pick< | "requestDelaySeconds" | "showWorktreesInHomeScreen" | "disabledTools" + | "memoryLearningEnabled" + | "memoryApiConfigId" + | "memoryAnalysisFrequency" + | "memoryLearningDefaultEnabled" + | "multiOrchMaxAgents" + | "multiOrchPlanReviewEnabled" + | "multiOrchMergeEnabled" + | "multiOrchVerifyEnabled" > & { lockApiConfigAcrossModes?: boolean version: string @@ -375,6 +396,13 @@ export type ExtensionState = Pick< openAiCodexIsAuthenticated?: boolean debug?: boolean + /** + * When true, this provider is a multi-orchestrator agent panel with + * force-approve-all enabled. The webview should suppress approve/deny + * button rendering entirely to prevent visual flicker (BUG-005). + */ + multiOrchForceApproveAll?: boolean + /** * Monotonically increasing sequence number for clineMessages state pushes. * When present, the frontend should only apply clineMessages from a state push @@ -471,6 +499,7 @@ export interface WebviewMessage { | "updateMcpTimeout" | "enhancePrompt" | "enhancedPrompt" + | "enhancePersonalityTrait" | "draggedImages" | "deleteMessage" | "deleteMessageConfirm" @@ -581,6 +610,16 @@ export interface WebviewMessage { | "moveSkill" | "updateSkillModes" | "openSkillFile" + | "toggleMemoryLearning" + | "updateMemorySettings" + | "startMemorySync" + | "clearMemory" + | "getMemorySyncStatus" + | "getMemoryStatus" + | "multiOrchStartPlan" + | "multiOrchApprovePlan" + | "multiOrchAbort" + | "multiOrchGetStatus" text?: string taskId?: string editedMessageContent?: string diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index d95c2f02346..b75abb0d473 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -600,7 +600,7 @@ importers: version: 0.13.0 drizzle-orm: specifier: ^0.44.1 - version: 0.44.1(@libsql/client@0.15.8)(@opentelemetry/api@1.9.0)(better-sqlite3@11.10.0)(gel@2.1.0)(postgres@3.4.7) + version: 0.44.1(@libsql/client@0.15.8)(@opentelemetry/api@1.9.0)(better-sqlite3@11.10.0)(gel@2.1.0)(postgres@3.4.7)(sql.js@1.14.1) execa: specifier: ^9.6.0 version: 9.6.0 @@ -971,6 +971,9 @@ importers: sound-play: specifier: ^1.1.0 version: 1.1.0 + sql.js: + specifier: ^1.14.1 + version: 1.14.1 stream-json: specifier: ^1.8.0 version: 1.9.1 @@ -5130,6 +5133,7 @@ packages: basic-ftp@5.0.5: resolution: {integrity: sha512-4Bcg1P8xhUuqcii/S0Z9wiHIrQVPMermM1any+MX5GeGD7faD3/msQUDGLol9wOcz4/jbg/WJnGqoJF6LiBdtg==} engines: {node: '>=10.0.0'} + deprecated: Security vulnerability fixed in 5.2.0, please upgrade better-path-resolve@1.0.0: resolution: {integrity: sha512-pbnl5XzGBdrFU/wT4jqmJVPn2B6UHPBOhzMQkY/SPUPB6QtUXtmBHBIwCbXJol93mOpGMnQyP/+BB19q04xj7g==} @@ -8976,6 +8980,7 @@ packages: prebuild-install@7.1.3: resolution: {integrity: sha512-8Mf2cbV7x1cXPUILADGI3wuhfqWvtiLA1iclTDbFRZkgRQS0NqsPZphna9V+HyTEadheuPmjaJMsbzKQFOzLug==} engines: {node: '>=10'} + deprecated: No longer maintained. Please contact the author of the relevant native addon; alternatives are available. hasBin: true prelude-ls@1.2.1: @@ -9758,6 +9763,9 @@ packages: sprintf-js@1.1.3: resolution: {integrity: sha512-Oo+0REFV59/rz3gfJNKQiBlwfHaSESl1pcGyABQsnnIfWOFt6JNj5gCog2U6MLZ//IGYD+nA8nI+mTShREReaA==} + sql.js@1.14.1: + resolution: {integrity: sha512-gcj8zBWU5cFsi9WUP+4bFNXAyF1iRpA3LLyS/DP5xlrNzGmPIizUeBggKa8DbDwdqaKwUcTEnChtd2grWo/x/A==} + stack-generator@2.0.10: resolution: {integrity: sha512-mwnua/hkqM6pF4k8SnmZ2zfETsRUpWXREfA/goT8SLCV4iOFa4bzOX2nDipWAZFPTjLvQB82f5yaodMVhK0yJQ==} @@ -14974,7 +14982,7 @@ snapshots: sirv: 3.0.1 tinyglobby: 0.2.14 tinyrainbow: 2.0.0 - vitest: 3.2.4(@types/debug@4.1.12)(@types/node@24.2.1)(@vitest/ui@3.2.4)(jiti@2.4.2)(jsdom@26.1.0)(lightningcss@1.30.1)(tsx@4.19.4)(yaml@2.8.0) + vitest: 3.2.4(@types/debug@4.1.12)(@types/node@20.17.50)(@vitest/ui@3.2.4)(jiti@2.4.2)(jsdom@26.1.0)(lightningcss@1.30.1)(tsx@4.19.4)(yaml@2.8.0) '@vitest/utils@3.2.4': dependencies: @@ -16340,13 +16348,14 @@ snapshots: transitivePeerDependencies: - supports-color - drizzle-orm@0.44.1(@libsql/client@0.15.8)(@opentelemetry/api@1.9.0)(better-sqlite3@11.10.0)(gel@2.1.0)(postgres@3.4.7): + drizzle-orm@0.44.1(@libsql/client@0.15.8)(@opentelemetry/api@1.9.0)(better-sqlite3@11.10.0)(gel@2.1.0)(postgres@3.4.7)(sql.js@1.14.1): optionalDependencies: '@libsql/client': 0.15.8 '@opentelemetry/api': 1.9.0 better-sqlite3: 11.10.0 gel: 2.1.0 postgres: 3.4.7 + sql.js: 1.14.1 duck@0.1.12: dependencies: @@ -20792,6 +20801,8 @@ snapshots: sprintf-js@1.1.3: {} + sql.js@1.14.1: {} + stack-generator@2.0.10: dependencies: stackframe: 1.3.4 diff --git a/src/api/index.ts b/src/api/index.ts index ebc2682a1a8..5afc94ac712 100644 --- a/src/api/index.ts +++ b/src/api/index.ts @@ -86,6 +86,13 @@ export interface ApiHandlerCreateMessageMetadata { * Only applies to providers that support function calling restrictions (e.g., Gemini). */ allowedFunctionNames?: string[] + /** + * When true, native tool definitions are omitted from the API request body. + * The model relies solely on XML tool documentation in the system prompt + * and outputs tool calls as raw XML text, which the existing TagMatcher + * in presentAssistantMessage() parses into ToolUse objects. + */ + useXmlToolCalling?: boolean } export interface ApiHandler { diff --git a/src/api/providers/__tests__/anthropic.spec.ts b/src/api/providers/__tests__/anthropic.spec.ts index 3731f3a068b..7b0fd524022 100644 --- a/src/api/providers/__tests__/anthropic.spec.ts +++ b/src/api/providers/__tests__/anthropic.spec.ts @@ -787,5 +787,63 @@ describe("AnthropicHandler", () => { arguments: '"London"}', }) }) + + it("should omit tools and tool_choice when useXmlToolCalling is true", async () => { + const stream = handler.createMessage(systemPrompt, messages, { + taskId: "test-task", + tools: mockTools, + tool_choice: "auto", + useXmlToolCalling: true, + }) + + // Consume the stream to trigger the API call + for await (const _chunk of stream) { + // Just consume + } + + const callArgs = mockCreate.mock.calls[mockCreate.mock.calls.length - 1][0] + // When useXmlToolCalling is true, the tools and tool_choice should NOT be in the request + expect(callArgs.tools).toBeUndefined() + expect(callArgs.tool_choice).toBeUndefined() + }) + + it("should include tools when useXmlToolCalling is false", async () => { + const stream = handler.createMessage(systemPrompt, messages, { + taskId: "test-task", + tools: mockTools, + tool_choice: "auto", + useXmlToolCalling: false, + }) + + // Consume the stream to trigger the API call + for await (const _chunk of stream) { + // Just consume + } + + const callArgs = mockCreate.mock.calls[mockCreate.mock.calls.length - 1][0] + // When useXmlToolCalling is false, tools should be included normally + expect(callArgs.tools).toBeDefined() + expect(callArgs.tools.length).toBeGreaterThan(0) + expect(callArgs.tool_choice).toBeDefined() + }) + + it("should include tools when useXmlToolCalling is undefined", async () => { + const stream = handler.createMessage(systemPrompt, messages, { + taskId: "test-task", + tools: mockTools, + tool_choice: "auto", + }) + + // Consume the stream to trigger the API call + for await (const _chunk of stream) { + // Just consume + } + + const callArgs = mockCreate.mock.calls[mockCreate.mock.calls.length - 1][0] + // Default behavior: tools should be included + expect(callArgs.tools).toBeDefined() + expect(callArgs.tools.length).toBeGreaterThan(0) + expect(callArgs.tool_choice).toBeDefined() + }) }) }) diff --git a/src/api/providers/__tests__/openai.spec.ts b/src/api/providers/__tests__/openai.spec.ts index 73b542dbc73..956046146e3 100644 --- a/src/api/providers/__tests__/openai.spec.ts +++ b/src/api/providers/__tests__/openai.spec.ts @@ -499,6 +499,133 @@ describe("OpenAiHandler", () => { }) }) + describe("useXmlToolCalling", () => { + const systemPrompt = "You are a helpful assistant." + const messages: Anthropic.Messages.MessageParam[] = [ + { + role: "user", + content: [{ type: "text" as const, text: "Hello!" }], + }, + ] + + const mockTools: OpenAI.Chat.ChatCompletionTool[] = [ + { + type: "function", + function: { + name: "read_file", + description: "Read a file", + parameters: { + type: "object", + properties: { path: { type: "string" } }, + required: ["path"], + }, + }, + }, + ] + + it("should omit tools and tool_choice when useXmlToolCalling is true (streaming)", async () => { + const stream = handler.createMessage(systemPrompt, messages, { + taskId: "test", + tools: mockTools, + tool_choice: "auto", + useXmlToolCalling: true, + }) + + for await (const _chunk of stream) { + } + + const callArgs = mockCreate.mock.calls[mockCreate.mock.calls.length - 1][0] + // When useXmlToolCalling is true, the tools and tool_choice should NOT be in the request + expect(callArgs.tools).toBeUndefined() + expect(callArgs.tool_choice).toBeUndefined() + expect(callArgs.parallel_tool_calls).toBeUndefined() + }) + + it("should omit tools and tool_choice when useXmlToolCalling is true (non-streaming)", async () => { + const nonStreamHandler = new OpenAiHandler({ + ...mockOptions, + openAiStreamingEnabled: false, + }) + + const stream = nonStreamHandler.createMessage(systemPrompt, messages, { + taskId: "test", + tools: mockTools, + tool_choice: "auto", + useXmlToolCalling: true, + }) + + for await (const _chunk of stream) { + } + + const callArgs = mockCreate.mock.calls[mockCreate.mock.calls.length - 1][0] + expect(callArgs.tools).toBeUndefined() + expect(callArgs.tool_choice).toBeUndefined() + expect(callArgs.parallel_tool_calls).toBeUndefined() + }) + + it("should include tools when useXmlToolCalling is false", async () => { + const stream = handler.createMessage(systemPrompt, messages, { + taskId: "test", + tools: mockTools, + tool_choice: "auto", + useXmlToolCalling: false, + }) + + for await (const _chunk of stream) { + } + + const callArgs = mockCreate.mock.calls[mockCreate.mock.calls.length - 1][0] + expect(callArgs.tools).toBeDefined() + expect(callArgs.tools.length).toBeGreaterThan(0) + expect(callArgs.tool_choice).toBe("auto") + expect(callArgs.parallel_tool_calls).toBe(true) + }) + + it("should include tools when useXmlToolCalling is undefined", async () => { + const stream = handler.createMessage(systemPrompt, messages, { + taskId: "test", + tools: mockTools, + tool_choice: "auto", + }) + + for await (const _chunk of stream) { + } + + const callArgs = mockCreate.mock.calls[mockCreate.mock.calls.length - 1][0] + expect(callArgs.tools).toBeDefined() + expect(callArgs.tools.length).toBeGreaterThan(0) + expect(callArgs.tool_choice).toBe("auto") + }) + + it("should omit tools and tool_choice for O3 family when useXmlToolCalling is true", async () => { + const o3Handler = new OpenAiHandler({ + ...mockOptions, + openAiModelId: "o3-mini", + openAiCustomModelInfo: { + contextWindow: 128_000, + maxTokens: 65536, + supportsPromptCache: false, + reasoningEffort: "medium" as "low" | "medium" | "high", + }, + }) + + const stream = o3Handler.createMessage(systemPrompt, messages, { + taskId: "test", + tools: mockTools, + tool_choice: "auto", + useXmlToolCalling: true, + }) + + for await (const _chunk of stream) { + } + + const callArgs = mockCreate.mock.calls[mockCreate.mock.calls.length - 1][0] + expect(callArgs.tools).toBeUndefined() + expect(callArgs.tool_choice).toBeUndefined() + expect(callArgs.parallel_tool_calls).toBeUndefined() + }) + }) + describe("error handling", () => { const testMessages: Anthropic.Messages.MessageParam[] = [ { diff --git a/src/api/providers/anthropic-vertex.ts b/src/api/providers/anthropic-vertex.ts index 3ed5dd45cce..b9978bafa25 100644 --- a/src/api/providers/anthropic-vertex.ts +++ b/src/api/providers/anthropic-vertex.ts @@ -75,10 +75,15 @@ export class AnthropicVertexHandler extends BaseProvider implements SingleComple // Filter out non-Anthropic blocks (reasoning, thoughtSignature, etc.) before sending to the API const sanitizedMessages = filterNonAnthropicBlocks(messages) - const nativeToolParams = { - tools: convertOpenAIToolsToAnthropic(metadata?.tools ?? []), - tool_choice: convertOpenAIToolChoiceToAnthropic(metadata?.tool_choice, metadata?.parallelToolCalls), - } + // When useXmlToolCalling is enabled, omit native tool definitions from the API request. + // The model will rely on XML tool documentation in the system prompt instead, + // and output tool calls as raw XML text parsed by TagMatcher. + const nativeToolParams = metadata?.useXmlToolCalling + ? {} + : { + tools: convertOpenAIToolsToAnthropic(metadata?.tools ?? []), + tool_choice: convertOpenAIToolChoiceToAnthropic(metadata?.tool_choice, metadata?.parallelToolCalls), + } /** * Vertex API has specific limitations for prompt caching: diff --git a/src/api/providers/anthropic.ts b/src/api/providers/anthropic.ts index 1786a105a5e..3eca345b562 100644 --- a/src/api/providers/anthropic.ts +++ b/src/api/providers/anthropic.ts @@ -75,10 +75,15 @@ export class AnthropicHandler extends BaseProvider implements SingleCompletionHa betas.push("context-1m-2025-08-07") } - const nativeToolParams = { - tools: convertOpenAIToolsToAnthropic(metadata?.tools ?? []), - tool_choice: convertOpenAIToolChoiceToAnthropic(metadata?.tool_choice, metadata?.parallelToolCalls), - } + // When useXmlToolCalling is enabled, omit native tool definitions from the API request. + // The model will rely on XML tool documentation in the system prompt instead, + // and output tool calls as raw XML text parsed by TagMatcher. + const nativeToolParams = metadata?.useXmlToolCalling + ? {} + : { + tools: convertOpenAIToolsToAnthropic(metadata?.tools ?? []), + tool_choice: convertOpenAIToolChoiceToAnthropic(metadata?.tool_choice, metadata?.parallelToolCalls), + } switch (modelId) { case "claude-sonnet-4-6": diff --git a/src/api/providers/base-openai-compatible-provider.ts b/src/api/providers/base-openai-compatible-provider.ts index fc3d769ae2a..5e76d9b8837 100644 --- a/src/api/providers/base-openai-compatible-provider.ts +++ b/src/api/providers/base-openai-compatible-provider.ts @@ -93,9 +93,14 @@ export abstract class BaseOpenAiCompatibleProvider messages: [{ role: "system", content: systemPrompt }, ...convertToOpenAiMessages(messages)], stream: true, stream_options: { include_usage: true }, - tools: this.convertToolsForOpenAI(metadata?.tools), - tool_choice: metadata?.tool_choice, - parallel_tool_calls: metadata?.parallelToolCalls ?? true, + // When useXmlToolCalling is enabled, omit native tool definitions from the API request. + ...(metadata?.useXmlToolCalling + ? {} + : { + tools: this.convertToolsForOpenAI(metadata?.tools), + tool_choice: metadata?.tool_choice, + parallel_tool_calls: metadata?.parallelToolCalls ?? true, + }), } // Add thinking parameter if reasoning is enabled and model supports it diff --git a/src/api/providers/bedrock.ts b/src/api/providers/bedrock.ts index 3ceb2510033..3d8cd452895 100644 --- a/src/api/providers/bedrock.ts +++ b/src/api/providers/bedrock.ts @@ -450,10 +450,13 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH additionalModelRequestFields.anthropic_beta = anthropicBetas } - const toolConfig: ToolConfiguration = { - tools: this.convertToolsForBedrock(metadata?.tools ?? []), - toolChoice: this.convertToolChoiceForBedrock(metadata?.tool_choice), - } + // When useXmlToolCalling is enabled, omit native tool definitions from the API request. + const toolConfig: ToolConfiguration | undefined = metadata?.useXmlToolCalling + ? undefined + : { + tools: this.convertToolsForBedrock(metadata?.tools ?? []), + toolChoice: this.convertToolChoiceForBedrock(metadata?.tool_choice), + } // Build payload with optional service_tier at top level // Service tier is a top-level parameter per AWS documentation, NOT inside additionalModelRequestFields @@ -466,7 +469,7 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH ...(additionalModelRequestFields && { additionalModelRequestFields }), // Add anthropic_version at top level when using thinking features ...(thinkingEnabled && { anthropic_version: "bedrock-2023-05-31" }), - toolConfig, + ...(toolConfig ? { toolConfig } : {}), // Add service_tier as a top-level parameter (not inside additionalModelRequestFields) ...(useServiceTier && { service_tier: this.options.awsBedrockServiceTier }), } diff --git a/src/api/providers/deepseek.ts b/src/api/providers/deepseek.ts index 84cd557de05..777e45fdeef 100644 --- a/src/api/providers/deepseek.ts +++ b/src/api/providers/deepseek.ts @@ -76,9 +76,14 @@ export class DeepSeekHandler extends OpenAiHandler { stream_options: { include_usage: true }, // Enable thinking mode for deepseek-reasoner or when tools are used with thinking model ...(isThinkingModel && { thinking: { type: "enabled" } }), - tools: this.convertToolsForOpenAI(metadata?.tools), - tool_choice: metadata?.tool_choice, - parallel_tool_calls: metadata?.parallelToolCalls ?? true, + // When useXmlToolCalling is enabled, omit native tool definitions from the API request. + ...(metadata?.useXmlToolCalling + ? {} + : { + tools: this.convertToolsForOpenAI(metadata?.tools), + tool_choice: metadata?.tool_choice, + parallel_tool_calls: metadata?.parallelToolCalls ?? true, + }), } // Add max_tokens if needed diff --git a/src/api/providers/gemini.ts b/src/api/providers/gemini.ts index a49073ea334..eef38383c10 100644 --- a/src/api/providers/gemini.ts +++ b/src/api/providers/gemini.ts @@ -128,19 +128,22 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl .map((message) => convertAnthropicMessageToGemini(message, { includeThoughtSignatures, toolIdToName })) .flat() + // When useXmlToolCalling is enabled, omit native tool definitions from the API request. // Tools are always present (minimum ALWAYS_AVAILABLE_TOOLS). // Google built-in tools (Grounding, URL Context) are mutually exclusive // with function declarations in the Gemini API, so we always use // function declarations when tools are provided. - const tools: GenerateContentConfig["tools"] = [ - { - functionDeclarations: (metadata?.tools ?? []).map((tool) => ({ - name: (tool as any).function.name, - description: (tool as any).function.description, - parametersJsonSchema: (tool as any).function.parameters, - })), - }, - ] + const tools: GenerateContentConfig["tools"] = metadata?.useXmlToolCalling + ? [] + : [ + { + functionDeclarations: (metadata?.tools ?? []).map((tool) => ({ + name: (tool as any).function.name, + description: (tool as any).function.description, + parametersJsonSchema: (tool as any).function.parameters, + })), + }, + ] // Determine temperature respecting model capabilities and defaults: // - If supportsTemperature is explicitly false, ignore user overrides @@ -165,7 +168,9 @@ export class GeminiHandler extends BaseProvider implements SingleCompletionHandl // When provided, all tool definitions are passed to the model (so it can reference // historical tool calls in conversation), but only the specified tools can be invoked. // This takes precedence over tool_choice to ensure mode restrictions are honored. - if (metadata?.allowedFunctionNames && metadata.allowedFunctionNames.length > 0) { + if (metadata?.useXmlToolCalling) { + // Skip toolConfig entirely when using XML tool calling + } else if (metadata?.allowedFunctionNames && metadata.allowedFunctionNames.length > 0) { config.toolConfig = { functionCallingConfig: { // Use ANY mode to allow calling any of the allowed functions diff --git a/src/api/providers/lite-llm.ts b/src/api/providers/lite-llm.ts index cf8d16a1129..cd3ac7209bc 100644 --- a/src/api/providers/lite-llm.ts +++ b/src/api/providers/lite-llm.ts @@ -207,8 +207,13 @@ export class LiteLLMHandler extends RouterProvider implements SingleCompletionHa stream_options: { include_usage: true, }, - tools: this.convertToolsForOpenAI(metadata?.tools), - tool_choice: metadata?.tool_choice, + // When useXmlToolCalling is enabled, omit native tool definitions from the API request. + ...(metadata?.useXmlToolCalling + ? {} + : { + tools: this.convertToolsForOpenAI(metadata?.tools), + tool_choice: metadata?.tool_choice, + }), } // GPT-5 models require max_completion_tokens instead of the deprecated max_tokens parameter diff --git a/src/api/providers/lm-studio.ts b/src/api/providers/lm-studio.ts index a771394c535..145d06326fb 100644 --- a/src/api/providers/lm-studio.ts +++ b/src/api/providers/lm-studio.ts @@ -88,9 +88,14 @@ export class LmStudioHandler extends BaseProvider implements SingleCompletionHan messages: openAiMessages, temperature: this.options.modelTemperature ?? LMSTUDIO_DEFAULT_TEMPERATURE, stream: true, - tools: this.convertToolsForOpenAI(metadata?.tools), - tool_choice: metadata?.tool_choice, - parallel_tool_calls: metadata?.parallelToolCalls ?? true, + // When useXmlToolCalling is enabled, omit native tool definitions from the API request. + ...(metadata?.useXmlToolCalling + ? {} + : { + tools: this.convertToolsForOpenAI(metadata?.tools), + tool_choice: metadata?.tool_choice, + parallel_tool_calls: metadata?.parallelToolCalls ?? true, + }), } if (this.options.lmStudioSpeculativeDecodingEnabled && this.options.lmStudioDraftModelId) { diff --git a/src/api/providers/minimax.ts b/src/api/providers/minimax.ts index bfcf4e3be40..66b9a19865e 100644 --- a/src/api/providers/minimax.ts +++ b/src/api/providers/minimax.ts @@ -109,8 +109,13 @@ export class MiniMaxHandler extends BaseProvider implements SingleCompletionHand system: systemBlocks, messages: supportsPromptCache ? this.addCacheControl(processedMessages, cacheControl) : processedMessages, stream: true, - tools: convertOpenAIToolsToAnthropic(metadata?.tools ?? []), - tool_choice: convertOpenAIToolChoice(metadata?.tool_choice), + // When useXmlToolCalling is enabled, omit native tool definitions from the API request. + ...(metadata?.useXmlToolCalling + ? {} + : { + tools: convertOpenAIToolsToAnthropic(metadata?.tools ?? []), + tool_choice: convertOpenAIToolChoice(metadata?.tool_choice), + }), } stream = await this.client.messages.create(requestParams) diff --git a/src/api/providers/mistral.ts b/src/api/providers/mistral.ts index e0e19298f42..a15286137cc 100644 --- a/src/api/providers/mistral.ts +++ b/src/api/providers/mistral.ts @@ -94,9 +94,12 @@ export class MistralHandler extends BaseProvider implements SingleCompletionHand temperature, } - requestOptions.tools = this.convertToolsForMistral(metadata?.tools ?? []) - // Always use "any" to require tool use - requestOptions.toolChoice = "any" + // When useXmlToolCalling is enabled, omit native tool definitions from the API request. + if (!metadata?.useXmlToolCalling) { + requestOptions.tools = this.convertToolsForMistral(metadata?.tools ?? []) + // Always use "any" to require tool use + requestOptions.toolChoice = "any" + } // Temporary debug log for QA // console.log("[MISTRAL DEBUG] Raw API request body:", requestOptions) diff --git a/src/api/providers/openai-codex.ts b/src/api/providers/openai-codex.ts index 9dfb37bc72c..295b8918253 100644 --- a/src/api/providers/openai-codex.ts +++ b/src/api/providers/openai-codex.ts @@ -319,22 +319,27 @@ export class OpenAiCodexHandler extends BaseProvider implements SingleCompletion }, } : {}), - tools: (metadata?.tools ?? []) - .filter((tool) => tool.type === "function") - .map((tool) => { - const isMcp = isMcpTool(tool.function.name) - return { - type: "function", - name: tool.function.name, - description: tool.function.description, - parameters: isMcp - ? ensureAdditionalPropertiesFalse(tool.function.parameters) - : ensureAllRequired(tool.function.parameters), - strict: !isMcp, - } - }), - tool_choice: metadata?.tool_choice, - parallel_tool_calls: metadata?.parallelToolCalls ?? true, + // When useXmlToolCalling is enabled, omit native tool definitions from the API request. + ...(metadata?.useXmlToolCalling + ? {} + : { + tools: (metadata?.tools ?? []) + .filter((tool) => tool.type === "function") + .map((tool) => { + const isMcp = isMcpTool(tool.function.name) + return { + type: "function", + name: tool.function.name, + description: tool.function.description, + parameters: isMcp + ? ensureAdditionalPropertiesFalse(tool.function.parameters) + : ensureAllRequired(tool.function.parameters), + strict: !isMcp, + } + }), + tool_choice: metadata?.tool_choice, + parallel_tool_calls: metadata?.parallelToolCalls ?? true, + }), } return body diff --git a/src/api/providers/openai-compatible.ts b/src/api/providers/openai-compatible.ts index d129e72452f..952e85d8754 100644 --- a/src/api/providers/openai-compatible.ts +++ b/src/api/providers/openai-compatible.ts @@ -172,8 +172,13 @@ export abstract class OpenAICompatibleHandler extends BaseProvider implements Si messages: aiSdkMessages, temperature: model.temperature ?? this.config.temperature ?? 0, maxOutputTokens: this.getMaxOutputTokens(), - tools: aiSdkTools, - toolChoice: this.mapToolChoice(metadata?.tool_choice), + // When useXmlToolCalling is enabled, omit native tool definitions from the API request. + ...(metadata?.useXmlToolCalling + ? {} + : { + tools: aiSdkTools, + toolChoice: this.mapToolChoice(metadata?.tool_choice), + }), } // Use streamText for streaming responses diff --git a/src/api/providers/openai-native.ts b/src/api/providers/openai-native.ts index 6ce93827636..e0d0006a07d 100644 --- a/src/api/providers/openai-native.ts +++ b/src/api/providers/openai-native.ts @@ -374,25 +374,30 @@ export class OpenAiNativeHandler extends BaseProvider implements SingleCompletio // Enable extended prompt cache retention for models that support it. // This uses the OpenAI Responses API `prompt_cache_retention` parameter. ...(promptCacheRetention ? { prompt_cache_retention: promptCacheRetention } : {}), - tools: (metadata?.tools ?? []) - .filter((tool) => tool.type === "function") - .map((tool) => { - // MCP tools use the 'mcp--' prefix - disable strict mode for them - // to preserve optional parameters from the MCP server schema - // But we still need to add additionalProperties: false for OpenAI Responses API - const isMcp = isMcpTool(tool.function.name) - return { - type: "function", - name: tool.function.name, - description: tool.function.description, - parameters: isMcp - ? ensureAdditionalPropertiesFalse(tool.function.parameters) - : ensureAllRequired(tool.function.parameters), - strict: !isMcp, - } - }), - tool_choice: metadata?.tool_choice, - parallel_tool_calls: metadata?.parallelToolCalls ?? true, + // When useXmlToolCalling is enabled, omit native tool definitions from the API request. + ...(metadata?.useXmlToolCalling + ? {} + : { + tools: (metadata?.tools ?? []) + .filter((tool) => tool.type === "function") + .map((tool) => { + // MCP tools use the 'mcp--' prefix - disable strict mode for them + // to preserve optional parameters from the MCP server schema + // But we still need to add additionalProperties: false for OpenAI Responses API + const isMcp = isMcpTool(tool.function.name) + return { + type: "function", + name: tool.function.name, + description: tool.function.description, + parameters: isMcp + ? ensureAdditionalPropertiesFalse(tool.function.parameters) + : ensureAllRequired(tool.function.parameters), + strict: !isMcp, + } + }), + tool_choice: metadata?.tool_choice, + parallel_tool_calls: metadata?.parallelToolCalls ?? true, + }), } // Include text.verbosity only when the model explicitly supports it diff --git a/src/api/providers/openai.ts b/src/api/providers/openai.ts index 33b29abcafe..a4789e1aac1 100644 --- a/src/api/providers/openai.ts +++ b/src/api/providers/openai.ts @@ -152,6 +152,17 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl const isGrokXAI = this._isGrokXAI(this.options.openAiBaseUrl) + // When useXmlToolCalling is enabled, omit native tool definitions from the API request. + // The model will rely on XML tool documentation in the system prompt instead, + // and output tool calls as raw XML text parsed by TagMatcher. + const nativeToolParams = metadata?.useXmlToolCalling + ? {} + : { + tools: this.convertToolsForOpenAI(metadata?.tools), + tool_choice: metadata?.tool_choice, + parallel_tool_calls: metadata?.parallelToolCalls ?? true, + } + const requestOptions: OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming = { model: modelId, temperature: this.options.modelTemperature ?? (deepseekReasoner ? DEEP_SEEK_DEFAULT_TEMPERATURE : 0), @@ -159,9 +170,7 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl stream: true as const, ...(isGrokXAI ? {} : { stream_options: { include_usage: true } }), ...(reasoning && reasoning), - tools: this.convertToolsForOpenAI(metadata?.tools), - tool_choice: metadata?.tool_choice, - parallel_tool_calls: metadata?.parallelToolCalls ?? true, + ...nativeToolParams, } // Add max_tokens if needed @@ -221,15 +230,21 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl yield this.processUsageMetrics(lastUsage, modelInfo) } } else { + // When useXmlToolCalling is enabled, omit native tool definitions from the API request. + const nativeToolParamsNonStreaming = metadata?.useXmlToolCalling + ? {} + : { + tools: this.convertToolsForOpenAI(metadata?.tools), + tool_choice: metadata?.tool_choice, + parallel_tool_calls: metadata?.parallelToolCalls ?? true, + } + const requestOptions: OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming = { model: modelId, messages: deepseekReasoner ? convertToR1Format([{ role: "user", content: systemPrompt }, ...messages]) : [systemMessage, ...convertToOpenAiMessages(messages)], - // Tools are always present (minimum ALWAYS_AVAILABLE_TOOLS) - tools: this.convertToolsForOpenAI(metadata?.tools), - tool_choice: metadata?.tool_choice, - parallel_tool_calls: metadata?.parallelToolCalls ?? true, + ...nativeToolParamsNonStreaming, } // Add max_tokens if needed @@ -338,6 +353,15 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl if (this.options.openAiStreamingEnabled ?? true) { const isGrokXAI = this._isGrokXAI(this.options.openAiBaseUrl) + // When useXmlToolCalling is enabled, omit native tool definitions from the API request. + const o3NativeToolParams = metadata?.useXmlToolCalling + ? {} + : { + tools: this.convertToolsForOpenAI(metadata?.tools), + tool_choice: metadata?.tool_choice, + parallel_tool_calls: metadata?.parallelToolCalls ?? true, + } + const requestOptions: OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming = { model: modelId, messages: [ @@ -351,10 +375,7 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl ...(isGrokXAI ? {} : { stream_options: { include_usage: true } }), reasoning_effort: modelInfo.reasoningEffort as "low" | "medium" | "high" | undefined, temperature: undefined, - // Tools are always present (minimum ALWAYS_AVAILABLE_TOOLS) - tools: this.convertToolsForOpenAI(metadata?.tools), - tool_choice: metadata?.tool_choice, - parallel_tool_calls: metadata?.parallelToolCalls ?? true, + ...o3NativeToolParams, } // O3 family models do not support the deprecated max_tokens parameter @@ -374,6 +395,15 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl yield* this.handleStreamResponse(stream) } else { + // When useXmlToolCalling is enabled, omit native tool definitions from the API request. + const o3NativeToolParamsNonStreaming = metadata?.useXmlToolCalling + ? {} + : { + tools: this.convertToolsForOpenAI(metadata?.tools), + tool_choice: metadata?.tool_choice, + parallel_tool_calls: metadata?.parallelToolCalls ?? true, + } + const requestOptions: OpenAI.Chat.Completions.ChatCompletionCreateParamsNonStreaming = { model: modelId, messages: [ @@ -385,10 +415,7 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl ], reasoning_effort: modelInfo.reasoningEffort as "low" | "medium" | "high" | undefined, temperature: undefined, - // Tools are always present (minimum ALWAYS_AVAILABLE_TOOLS) - tools: this.convertToolsForOpenAI(metadata?.tools), - tool_choice: metadata?.tool_choice, - parallel_tool_calls: metadata?.parallelToolCalls ?? true, + ...o3NativeToolParamsNonStreaming, } // O3 family models do not support the deprecated max_tokens parameter diff --git a/src/api/providers/openrouter.ts b/src/api/providers/openrouter.ts index 7fcc24b15f6..96c99c04e49 100644 --- a/src/api/providers/openrouter.ts +++ b/src/api/providers/openrouter.ts @@ -327,8 +327,13 @@ export class OpenRouterHandler extends BaseProvider implements SingleCompletionH }, }), ...(reasoning && { reasoning }), - tools: this.convertToolsForOpenAI(metadata?.tools), - tool_choice: metadata?.tool_choice, + // When useXmlToolCalling is enabled, omit native tool definitions from the API request. + ...(metadata?.useXmlToolCalling + ? {} + : { + tools: this.convertToolsForOpenAI(metadata?.tools), + tool_choice: metadata?.tool_choice, + }), } // Add Anthropic beta header for fine-grained tool streaming when using Anthropic models diff --git a/src/api/providers/qwen-code.ts b/src/api/providers/qwen-code.ts index 18d09a59f3b..28b1f05088c 100644 --- a/src/api/providers/qwen-code.ts +++ b/src/api/providers/qwen-code.ts @@ -226,9 +226,14 @@ export class QwenCodeHandler extends BaseProvider implements SingleCompletionHan stream: true, stream_options: { include_usage: true }, max_completion_tokens: model.info.maxTokens, - tools: this.convertToolsForOpenAI(metadata?.tools), - tool_choice: metadata?.tool_choice, - parallel_tool_calls: metadata?.parallelToolCalls ?? true, + // When useXmlToolCalling is enabled, omit native tool definitions from the API request. + ...(metadata?.useXmlToolCalling + ? {} + : { + tools: this.convertToolsForOpenAI(metadata?.tools), + tool_choice: metadata?.tool_choice, + parallel_tool_calls: metadata?.parallelToolCalls ?? true, + }), } const stream = await this.callApiWithRetry(() => client.chat.completions.create(requestOptions)) diff --git a/src/api/providers/requesty.ts b/src/api/providers/requesty.ts index b241c347b08..e91eb266e8a 100644 --- a/src/api/providers/requesty.ts +++ b/src/api/providers/requesty.ts @@ -149,8 +149,13 @@ export class RequestyHandler extends BaseProvider implements SingleCompletionHan stream: true, stream_options: { include_usage: true }, requesty: { trace_id: metadata?.taskId, extra: { mode: metadata?.mode } }, - tools: this.convertToolsForOpenAI(metadata?.tools), - tool_choice: metadata?.tool_choice, + // When useXmlToolCalling is enabled, omit native tool definitions from the API request. + ...(metadata?.useXmlToolCalling + ? {} + : { + tools: this.convertToolsForOpenAI(metadata?.tools), + tool_choice: metadata?.tool_choice, + }), } let stream diff --git a/src/api/providers/roo.ts b/src/api/providers/roo.ts index b455a1885ed..59bb5f64f07 100644 --- a/src/api/providers/roo.ts +++ b/src/api/providers/roo.ts @@ -106,8 +106,13 @@ export class RooHandler extends BaseOpenAiCompatibleProvider { stream: true, stream_options: { include_usage: true }, ...(reasoning && { reasoning }), - tools: this.convertToolsForOpenAI(metadata?.tools), - tool_choice: metadata?.tool_choice, + // When useXmlToolCalling is enabled, omit native tool definitions from the API request. + ...(metadata?.useXmlToolCalling + ? {} + : { + tools: this.convertToolsForOpenAI(metadata?.tools), + tool_choice: metadata?.tool_choice, + }), } try { diff --git a/src/api/providers/unbound.ts b/src/api/providers/unbound.ts index d50bfcc85d2..a948887b9f3 100644 --- a/src/api/providers/unbound.ts +++ b/src/api/providers/unbound.ts @@ -143,8 +143,13 @@ export class UnboundHandler extends BaseProvider implements SingleCompletionHand stream: true, stream_options: { include_usage: true }, unbound_metadata: { originApp: "roo-code", taskId: metadata?.taskId, mode: metadata?.mode }, - tools: this.convertToolsForOpenAI(metadata?.tools), - tool_choice: metadata?.tool_choice, + // When useXmlToolCalling is enabled, omit native tool definitions from the API request. + ...(metadata?.useXmlToolCalling + ? {} + : { + tools: this.convertToolsForOpenAI(metadata?.tools), + tool_choice: metadata?.tool_choice, + }), } let stream diff --git a/src/api/providers/vercel-ai-gateway.ts b/src/api/providers/vercel-ai-gateway.ts index 51b0eb5f513..49ec2e29bf7 100644 --- a/src/api/providers/vercel-ai-gateway.ts +++ b/src/api/providers/vercel-ai-gateway.ts @@ -61,9 +61,14 @@ export class VercelAiGatewayHandler extends RouterProvider implements SingleComp max_completion_tokens: info.maxTokens, stream: true, stream_options: { include_usage: true }, - tools: this.convertToolsForOpenAI(metadata?.tools), - tool_choice: metadata?.tool_choice, - parallel_tool_calls: metadata?.parallelToolCalls ?? true, + // When useXmlToolCalling is enabled, omit native tool definitions from the API request. + ...(metadata?.useXmlToolCalling + ? {} + : { + tools: this.convertToolsForOpenAI(metadata?.tools), + tool_choice: metadata?.tool_choice, + parallel_tool_calls: metadata?.parallelToolCalls ?? true, + }), } const completion = await this.client.chat.completions.create(body) diff --git a/src/api/providers/xai.ts b/src/api/providers/xai.ts index 8b973d41c4e..b23d7051ae7 100644 --- a/src/api/providers/xai.ts +++ b/src/api/providers/xai.ts @@ -72,9 +72,14 @@ export class XAIHandler extends BaseProvider implements SingleCompletionHandler stream: true as const, stream_options: { include_usage: true }, ...(reasoning && reasoning), - tools: this.convertToolsForOpenAI(metadata?.tools), - tool_choice: metadata?.tool_choice, - parallel_tool_calls: metadata?.parallelToolCalls ?? true, + // When useXmlToolCalling is enabled, omit native tool definitions from the API request. + ...(metadata?.useXmlToolCalling + ? {} + : { + tools: this.convertToolsForOpenAI(metadata?.tools), + tool_choice: metadata?.tool_choice, + parallel_tool_calls: metadata?.parallelToolCalls ?? true, + }), } let stream diff --git a/src/api/providers/zai.ts b/src/api/providers/zai.ts index 74e5ea81373..8bfd85d18e7 100644 --- a/src/api/providers/zai.ts +++ b/src/api/providers/zai.ts @@ -101,9 +101,14 @@ export class ZAiHandler extends BaseOpenAiCompatibleProvider { stream_options: { include_usage: true }, // For GLM-4.7: thinking is ON by default, so we explicitly disable when needed thinking: useReasoning ? { type: "enabled" } : { type: "disabled" }, - tools: this.convertToolsForOpenAI(metadata?.tools), - tool_choice: metadata?.tool_choice, - parallel_tool_calls: metadata?.parallelToolCalls ?? true, + // When useXmlToolCalling is enabled, omit native tool definitions from the API request. + ...(metadata?.useXmlToolCalling + ? {} + : { + tools: this.convertToolsForOpenAI(metadata?.tools), + tool_choice: metadata?.tool_choice, + parallel_tool_calls: metadata?.parallelToolCalls ?? true, + }), } return this.client.chat.completions.create(params) diff --git a/src/core/assistant-message/XmlToolCallParser.ts b/src/core/assistant-message/XmlToolCallParser.ts new file mode 100644 index 00000000000..48aeee63309 --- /dev/null +++ b/src/core/assistant-message/XmlToolCallParser.ts @@ -0,0 +1,481 @@ +/** + * XmlToolCallParser: streaming parser that detects XML-formatted tool calls + * from model text output and converts them into ToolUse objects. + * + * When useXmlToolCalling is enabled, models output tool calls as XML text: + * + * src/app.ts + * + * + * This parser watches the accumulated text for complete tool call XML blocks, + * extracts parameters, and delegates to NativeToolCallParser.parseToolCall() + * to produce properly typed ToolUse objects with nativeArgs. + */ + +import { randomUUID } from "crypto" + +import { type ToolName, toolNames } from "@roo-code/types" +import { type ToolUse, type McpToolUse } from "../../shared/tools" +import { NativeToolCallParser } from "./NativeToolCallParser" +import { resolveToolAlias } from "../prompts/tools/filter-tools-for-mode" + +// Build a Set of all known tool names (including aliases) for fast lookup +const KNOWN_TOOL_NAMES = new Set([ + ...toolNames, + // Common aliases that models might use + "write_file", + "search_and_replace", +]) + +/** + * Result from feeding text to the parser. + */ +export interface XmlParseResult { + /** Any text before the tool call (to be displayed as chat text) */ + textBeforeToolCall: string + /** Parsed tool calls found in the text */ + toolCalls: Array + /** Any text after all parsed tool calls (remaining text to continue accumulating) */ + remainingText: string +} + +/** + * XmlToolCallParser detects and parses XML tool calls from streamed text. + * + * Usage: + * const parser = new XmlToolCallParser() + * // As text streams in, feed the full accumulated text: + * const result = parser.parse(accumulatedText) + * // result.textBeforeToolCall = text to display + * // result.toolCalls = completed tool calls to execute + * // result.remainingText = leftover text (may contain partial XML) + */ +export class XmlToolCallParser { + /** Track which tool calls we've already emitted so we don't duplicate */ + private emittedToolCallCount = 0 + + /** + * Parse accumulated text for XML tool calls. + * + * This method finds complete `...` blocks in the text, + * extracts parameters from child XML tags, and converts them into ToolUse objects. + * + * @param fullText - The complete accumulated assistant text so far + * @returns Parsed results with text segments and tool calls + */ + public parse(fullText: string): XmlParseResult { + const toolCalls: Array = [] + let textBeforeToolCall = "" + // Pre-process: strip thinking tags and convert alternative tool call formats + let remainingText = this.stripThinkingTags(fullText) + remainingText = this.normalizeToolCallFormat(remainingText) + let searchStartIndex = 0 + + // Scan for complete XML tool call blocks + while (searchStartIndex < remainingText.length) { + // Find the next opening tag that matches a known tool name + const openTagMatch = this.findNextToolOpenTag(remainingText, searchStartIndex) + + if (!openTagMatch) { + // No more tool tags found + break + } + + const { toolName, tagStart, tagEnd } = openTagMatch + + // Look for the matching closing tag + const closeTag = `` + const closeTagIndex = remainingText.indexOf(closeTag, tagEnd) + + if (closeTagIndex === -1) { + // Closing tag not found yet - this is a partial tool call still streaming. + // Split: text before the opening tag is displayable, the rest is partial XML. + if (toolCalls.length === 0) { + textBeforeToolCall = remainingText.substring(0, tagStart).trimEnd() + remainingText = remainingText.substring(tagStart) + } + // Return immediately — don't fall through to findPartialToolTagStart + // which only checks the last 35 chars and would miss this. + return { textBeforeToolCall, toolCalls, remainingText } + } + + // We have a complete tool call block + const xmlContent = remainingText.substring(tagEnd, closeTagIndex) + const blockEnd = closeTagIndex + closeTag.length + + // Check if this tool call was already emitted + const toolCallIndex = this.countCompletedToolCalls(remainingText.substring(0, blockEnd)) + if (toolCallIndex <= this.emittedToolCallCount) { + // Already emitted, skip past it + searchStartIndex = blockEnd + continue + } + + // Extract text before this tool call (only for the first un-emitted tool) + if (toolCalls.length === 0) { + textBeforeToolCall = remainingText.substring(0, tagStart).trimEnd() + } + + // Parse the XML content into parameters + const params = this.extractParams(xmlContent) + + // Convert to a ToolUse via NativeToolCallParser.parseToolCall() + const toolCall = this.buildToolUse(toolName, params) + if (toolCall) { + toolCalls.push(toolCall) + this.emittedToolCallCount++ + } + + searchStartIndex = blockEnd + } + + // If we found tool calls, remaining text is everything after the last one + if (toolCalls.length > 0) { + remainingText = remainingText.substring(searchStartIndex).trimStart() + } else { + // No complete tool calls found. + // Check if there's a partial opening tag at the end that we should not display yet. + const partialTagStart = this.findPartialToolTagStart(remainingText) + if (partialTagStart !== -1) { + textBeforeToolCall = remainingText.substring(0, partialTagStart) + remainingText = remainingText.substring(partialTagStart) + } else { + textBeforeToolCall = remainingText + remainingText = "" + } + } + + return { textBeforeToolCall, toolCalls, remainingText } + } + + /** + * Check if text currently contains a partial (incomplete) tool call XML tag + * that is still being streamed. + */ + public hasPartialToolCall(text: string): boolean { + const cleanText = this.stripThinkingTags(text) + const openTag = this.findNextToolOpenTag(cleanText, 0) + if (!openTag) { + return false + } + const closeTag = `` + return cleanText.indexOf(closeTag, openTag.tagEnd) === -1 + } + + /** + * Reset parser state (e.g. for a new message). + */ + public reset(): void { + this.emittedToolCallCount = 0 + } + + /** + * Strip ... tags and their content from text. + * Models sometimes output tool calls inside thinking tags which shouldn't be parsed, + * or the thinking content is so large it overwhelms the actual tool call. + */ + private stripThinkingTags(text: string): string { + // Remove complete ... blocks + return text.replace(/[\s\S]*?<\/thinking>/g, "") + } + + /** + * Normalize alternative tool call formats to our standard XML format. + * Handles Meta/Llama style: value + */ + private normalizeToolCallFormat(text: string): string { + // Match ......VALUE... + const toolCallRegex = /\s*([\s\S]*?)<\/function>\s*<\/tool_call>/g + return text.replace(toolCallRegex, (_match, toolName: string, content: string) => { + // Extract value pairs + const paramRegex = /([\s\S]*?)<\/parameter>/g + const params: string[] = [] + let paramMatch: RegExpExecArray | null + while ((paramMatch = paramRegex.exec(content)) !== null) { + const paramName = paramMatch[1] + const paramValue = paramMatch[2].trim() + params.push(`<${paramName}>${paramValue}`) + } + return `<${toolName}>\n${params.join("\n")}\n` + }) + } + + // ── Private helpers ─────────────────────────────────────────────── + + /** + * Find the next opening XML tag that matches a known tool name. + */ + private findNextToolOpenTag( + text: string, + startIndex: number, + ): { toolName: string; tagStart: number; tagEnd: number } | null { + // Match or (with optional whitespace) + const tagRegex = /<([a-z_]+)(?:\s*)>/g + tagRegex.lastIndex = startIndex + + let match: RegExpExecArray | null + while ((match = tagRegex.exec(text)) !== null) { + const candidateName = match[1] + + // Check if it's a known tool name (or an alias) + if (KNOWN_TOOL_NAMES.has(candidateName)) { + return { + toolName: candidateName, + tagStart: match.index, + tagEnd: match.index + match[0].length, + } + } + + // Also check if it resolves to a known tool via alias + const resolved = resolveToolAlias(candidateName) + if (resolved !== candidateName && toolNames.includes(resolved as ToolName)) { + return { + toolName: candidateName, + tagStart: match.index, + tagEnd: match.index + match[0].length, + } + } + } + + return null + } + + /** + * Find the start of a potential partial tool tag at the end of the text. + * This prevents displaying partial `` yet, AND + // 2. What we have so far could prefix a known tool name + if (afterAngle.includes(">")) { + return -1 // This tag is already closed, not partial + } + + // Check if the partial text could be the beginning of a tool name + const partialName = afterAngle.replace(/\s+$/, "") + if (partialName.length === 0) { + // Just a bare `<` at the end — could be anything + return regionStart + lastOpenAngle + } + + for (const name of KNOWN_TOOL_NAMES) { + if (name.startsWith(partialName)) { + return regionStart + lastOpenAngle + } + } + + return -1 + } + + /** + * Count how many complete tool call blocks exist in text up to a position. + */ + private countCompletedToolCalls(text: string): number { + let count = 0 + let searchFrom = 0 + + while (true) { + const openTag = this.findNextToolOpenTag(text, searchFrom) + if (!openTag) { + break + } + const closeTag = `` + const closeIndex = text.indexOf(closeTag, openTag.tagEnd) + if (closeIndex === -1) { + break + } + count++ + searchFrom = closeIndex + closeTag.length + } + + return count + } + + /** + * Extract parameter key-value pairs from XML content. + * Handles nested XML tags like: + * src/app.ts + * multi\nline\ncontent + */ + private extractParams(xmlContent: string): Record { + const params: Record = {} + + // Match parameter tags: value + // Use a non-greedy match that handles multi-line values + const paramRegex = /<([a-z_]+)>([\s\S]*?)<\/\1>/g + + let match: RegExpExecArray | null + while ((match = paramRegex.exec(xmlContent)) !== null) { + const paramName = match[1] + let paramValue = match[2] + + // Trim leading/trailing whitespace from the value (models often add newlines) + paramValue = paramValue.trim() + + params[paramName] = paramValue + } + + return params + } + + /** + * Build a ToolUse object from parsed XML parameters. + * Delegates to NativeToolCallParser.parseToolCall() for proper typing. + */ + private buildToolUse(toolName: string, params: Record): ToolUse | McpToolUse | null { + // Generate a synthetic tool call ID (Anthropic format) + const syntheticId = `toolu_xml_${randomUUID().replace(/-/g, "").substring(0, 24)}` + + // Resolve aliases + const resolvedName = resolveToolAlias(toolName) as ToolName + + // Convert string params to the right types for JSON args. + // NativeToolCallParser.parseToolCall expects a JSON string of arguments. + // We need to convert our extracted string params to the format the native parser expects. + const args = this.convertParamsToArgs(resolvedName, params) + + try { + const result = NativeToolCallParser.parseToolCall({ + id: syntheticId, + name: resolvedName, + arguments: JSON.stringify(args), + }) + + // NativeToolCallParser.parseToolCall doesn't set `id` on the returned ToolUse. + // We must set it here so presentAssistantMessage.ts can find it and + // pushToolResultToUserContent can reference it. + if (result) { + ;(result as any).id = syntheticId + } + + return result + } catch (error) { + console.error(`[XmlToolCallParser] Failed to parse tool call '${toolName}':`, error) + return null + } + } + + /** + * Convert string XML params to properly typed argument objects. + * Most params remain strings, but some need type coercion (booleans, numbers, objects). + */ + private convertParamsToArgs(toolName: ToolName, params: Record): Record { + const args: Record = {} + + for (const [key, value] of Object.entries(params)) { + switch (key) { + // Boolean parameters + case "recursive": + case "replace_all": + case "include_siblings": + case "include_header": + args[key] = value.toLowerCase() === "true" + break + + // Numeric parameters + case "offset": + case "limit": + case "timeout": + case "anchor_line": + case "max_levels": + case "max_lines": + case "expected_replacements": + args[key] = parseInt(value, 10) + break + + // JSON object parameters + case "arguments": + // For use_mcp_tool, arguments is a JSON object + if (toolName === "use_mcp_tool") { + try { + args[key] = JSON.parse(value) + } catch { + args[key] = value + } + } else { + args[key] = value + } + break + + case "follow_up": + // ask_followup_question follow_up — models output in many formats: + // 1. JSON array: [{"text":"a"},{"text":"b"}] + // 2. JSON objects without brackets: {"text":"a"},{"text":"b"} + // 3. JSON objects without commas: {"text":"a"} {"text":"b"} + // 4. XML tags (Kilo Code/Cline format): + // Option AOption B + // 5. Plain text suggestions + { + // First check for tags (common XML format from Kilo/Cline trained models) + const suggestRegex = /([\s\S]*?)<\/suggest>/g + const suggests: Array<{ text: string; mode?: string }> = [] + let suggestMatch: RegExpExecArray | null + while ((suggestMatch = suggestRegex.exec(value)) !== null) { + const mode = suggestMatch[1] + const text = suggestMatch[2].trim() + if (text) { + suggests.push(mode ? { text, mode } : { text }) + } + } + if (suggests.length > 0) { + args[key] = suggests + break + } + + // Try JSON formats + try { + args[key] = JSON.parse(value) + } catch { + try { + const fixed = value.replace(/\}\s*\{/g, "},{") + args[key] = JSON.parse(`[${fixed}]`) + } catch { + // Plain text fallback + args[key] = [{ text: value }] + } + } + } + break + + case "todos": + // update_todo_list and new_task todos — could be JSON or plain text + if (toolName === "update_todo_list" || toolName === "new_task") { + args[key] = value + } else { + args[key] = value + } + break + + case "indentation": + // read_file indentation is a JSON object + try { + args[key] = JSON.parse(value) + } catch { + args[key] = value + } + break + + // Everything else is a string + default: + args[key] = value + break + } + } + + return args + } +} diff --git a/src/core/assistant-message/__tests__/presentAssistantMessage-images.spec.ts b/src/core/assistant-message/__tests__/presentAssistantMessage-images.spec.ts index fcf778b8f81..a6c05ed6208 100644 --- a/src/core/assistant-message/__tests__/presentAssistantMessage-images.spec.ts +++ b/src/core/assistant-message/__tests__/presentAssistantMessage-images.spec.ts @@ -179,9 +179,7 @@ describe("presentAssistantMessage - Image Handling in Native Tool Calling", () = const textBlocks = mockTask.userMessageContent.filter((item: any) => item.type === "text") expect(textBlocks.length).toBeGreaterThan(0) - expect(textBlocks.some((b: any) => String(b.text).includes("XML tool calls are no longer supported"))).toBe( - true, - ) + expect(textBlocks.some((b: any) => String(b.text).includes("missing tool_use.id"))).toBe(true) // Should not proceed to execute tool or add images as tool output. expect(mockTask.userMessageContent.some((item: any) => item.type === "image")).toBe(false) }) @@ -283,9 +281,7 @@ describe("presentAssistantMessage - Image Handling in Native Tool Calling", () = await presentAssistantMessage(mockTask) const textBlocks = mockTask.userMessageContent.filter((item: any) => item.type === "text") - expect(textBlocks.some((b: any) => String(b.text).includes("XML tool calls are no longer supported"))).toBe( - true, - ) + expect(textBlocks.some((b: any) => String(b.text).includes("missing tool_use.id"))).toBe(true) // Ensure no tool_result blocks were added expect(mockTask.userMessageContent.some((item: any) => item.type === "tool_result")).toBe(false) }) diff --git a/src/core/assistant-message/__tests__/presentAssistantMessage-unknown-tool.spec.ts b/src/core/assistant-message/__tests__/presentAssistantMessage-unknown-tool.spec.ts index 8e6c8d9d9e7..1728dd5d047 100644 --- a/src/core/assistant-message/__tests__/presentAssistantMessage-unknown-tool.spec.ts +++ b/src/core/assistant-message/__tests__/presentAssistantMessage-unknown-tool.spec.ts @@ -128,9 +128,7 @@ describe("presentAssistantMessage - Unknown Tool Handling", () => { // Should not execute tool; should surface a clear error message. const textBlocks = mockTask.userMessageContent.filter((item: any) => item.type === "text") expect(textBlocks.length).toBeGreaterThan(0) - expect(textBlocks.some((b: any) => String(b.text).includes("XML tool calls are no longer supported"))).toBe( - true, - ) + expect(textBlocks.some((b: any) => String(b.text).includes("missing tool_use.id"))).toBe(true) // Verify consecutiveMistakeCount was incremented expect(mockTask.consecutiveMistakeCount).toBe(1) diff --git a/src/core/assistant-message/index.ts b/src/core/assistant-message/index.ts index 107424fc503..5426a24663d 100644 --- a/src/core/assistant-message/index.ts +++ b/src/core/assistant-message/index.ts @@ -1,2 +1,3 @@ export type { AssistantMessageContent } from "./types" export { presentAssistantMessage } from "./presentAssistantMessage" +export { XmlToolCallParser } from "./XmlToolCallParser" diff --git a/src/core/assistant-message/presentAssistantMessage.ts b/src/core/assistant-message/presentAssistantMessage.ts index 7f5862be154..a64e5433e83 100644 --- a/src/core/assistant-message/presentAssistantMessage.ts +++ b/src/core/assistant-message/presentAssistantMessage.ts @@ -296,12 +296,13 @@ export async function presentAssistantMessage(cline: Task) { break } case "tool_use": { - // Native tool calling is the only supported tool calling mechanism. - // A tool_use block without an id is invalid and cannot be executed. + // A tool_use block without an id is invalid for native tool calling. + // However, when useXmlToolCalling is enabled, the XmlToolCallParser assigns + // synthetic IDs (prefixed with "xml-tool-") so this check still passes. const toolCallId = (block as any).id as string | undefined if (!toolCallId) { const errorMessage = - "Invalid tool call: missing tool_use.id. XML tool calls are no longer supported. Remove any XML tool markup (e.g. ...) and use native tool calling instead." + "Invalid tool call: missing tool_use.id. Tool call block is missing its identifier. This may indicate a parsing error." // Record a tool error for visibility/telemetry. Use the reported tool name if present. try { if ( @@ -388,34 +389,38 @@ export async function presentAssistantMessage(cline: Task) { } } + // Detect if XML tool calling is active + const isXmlToolCalling = cline.xmlToolCallParser !== undefined + if (cline.didRejectTool) { // Ignore any tool content after user has rejected tool once. - // For native tool calling, we must send a tool_result for every tool_use to avoid API errors const errorMessage = !block.partial ? `Skipping tool ${toolDescription()} due to user rejecting a previous tool.` : `Tool ${toolDescription()} was interrupted and not executed due to user rejecting a previous tool.` - cline.pushToolResultToUserContent({ - type: "tool_result", - tool_use_id: sanitizeToolUseId(toolCallId), - content: errorMessage, - is_error: true, - }) + if (isXmlToolCalling) { + // XML mode: push as text since the API has no tool_use to match + cline.userMessageContent.push({ type: "text", text: `[Tool Error] ${errorMessage}` }) + } else { + // Native mode: push tool_result for every tool_use to avoid API errors + cline.pushToolResultToUserContent({ + type: "tool_result", + tool_use_id: sanitizeToolUseId(toolCallId), + content: errorMessage, + is_error: true, + }) + } break } - // Track if we've already pushed a tool result for this tool call (native tool calling only) + // Track if we've already pushed a tool result for this tool call let hasToolResult = false // If this is a native tool call but the parser couldn't construct nativeArgs // (e.g., malformed/unfinished JSON in a streaming tool call), we must NOT attempt to - // execute the tool. Instead, emit exactly one structured tool_result so the provider - // receives a matching tool_result for the tool_use_id. - // - // This avoids executing an invalid tool_use block and prevents duplicate/fragmented - // error reporting. - if (!block.partial) { + // execute the tool. Skip this check in XML mode since XML tools use params, not nativeArgs. + if (!block.partial && !isXmlToolCalling) { const customTool = stateExperiments?.customTools ? customToolRegistry.get(block.name) : undefined const isKnownTool = isValidToolName(String(block.name), stateExperiments) if (isKnownTool && !block.nativeArgs && !customTool) { @@ -447,7 +452,7 @@ export async function presentAssistantMessage(cline: Task) { let approvalFeedback: { text: string; images?: string[] } | undefined const pushToolResult = (content: ToolResponse) => { - // Native tool calling: only allow ONE tool_result per tool call + // Only allow ONE tool_result per tool call if (hasToolResult) { console.warn( `[presentAssistantMessage] Skipping duplicate tool_result for tool_use_id: ${toolCallId}`, @@ -478,11 +483,23 @@ export async function presentAssistantMessage(cline: Task) { } } - cline.pushToolResultToUserContent({ - type: "tool_result", - tool_use_id: sanitizeToolUseId(toolCallId), - content: resultContent, - }) + if (isXmlToolCalling) { + // XML mode: push tool results as plain text since there are no + // native tool_use blocks in the assistant message for the API to match. + // Format the result with the tool name for clarity. + const toolName = block.name || "unknown_tool" + cline.userMessageContent.push({ + type: "text", + text: `[${toolName} Result]\n${resultContent}`, + }) + } else { + // Native mode: push as structured tool_result + cline.pushToolResultToUserContent({ + type: "tool_result", + tool_use_id: sanitizeToolUseId(toolCallId), + content: resultContent, + }) + } if (imageBlocks.length > 0) { cline.userMessageContent.push(...imageBlocks) diff --git a/src/core/auto-approval/AutoApprovalHandler.ts b/src/core/auto-approval/AutoApprovalHandler.ts index 638baa1c926..daffbaec3be 100644 --- a/src/core/auto-approval/AutoApprovalHandler.ts +++ b/src/core/auto-approval/AutoApprovalHandler.ts @@ -15,6 +15,10 @@ export class AutoApprovalHandler { private consecutiveAutoApprovedRequestsCount: number = 0 private consecutiveAutoApprovedCost: number = 0 + constructor() { + console.log(`[AutoApprovalHandler] Initialized (limit handler — approval decisions are in checkAutoApproval)`) + } + /** * Check if auto-approval limits have been reached and handle user approval if needed */ @@ -138,6 +142,7 @@ export class AutoApprovalHandler { * Reset the tracking (typically called when starting a new task) */ resetRequestCount(): void { + console.log(`[AutoApprovalHandler] resetRequestCount called (was: requests=${this.consecutiveAutoApprovedRequestsCount}, cost=${this.consecutiveAutoApprovedCost})`) this.lastResetMessageIndex = 0 this.consecutiveAutoApprovedRequestsCount = 0 this.consecutiveAutoApprovedCost = 0 diff --git a/src/core/auto-approval/index.ts b/src/core/auto-approval/index.ts index c8293c2a79f..e22735184c7 100644 --- a/src/core/auto-approval/index.ts +++ b/src/core/auto-approval/index.ts @@ -59,10 +59,40 @@ export async function checkAutoApproval({ return { decision: "approve" } } + console.log( + `[checkAutoApproval] ask="${ask}"`, + `autoApprovalEnabled=${state?.autoApprovalEnabled}`, + `alwaysAllowReadOnly=${state?.alwaysAllowReadOnly}`, + `alwaysAllowWrite=${state?.alwaysAllowWrite}`, + `alwaysAllowExecute=${state?.alwaysAllowExecute}`, + `alwaysAllowMcp=${state?.alwaysAllowMcp}`, + `alwaysAllowModeSwitch=${state?.alwaysAllowModeSwitch}`, + `alwaysAllowSubtasks=${state?.alwaysAllowSubtasks}`, + `stateExists=${!!state}`, + ) + if (!state || !state.autoApprovalEnabled) { + console.log(`[checkAutoApproval] BLOCKING — autoApprovalEnabled is falsy (${state?.autoApprovalEnabled}), returning "ask" for ask="${ask}"`) return { decision: "ask" } } + // Multi-orchestrator spawned agents: approve ALL tool/command/followup + // operations unconditionally. Nobody is watching these panels to click + // approve, so every ask must be auto-approved to avoid deadlocks. + // + // EXCEPTION: completion_result — we approve it (which triggers "yesButtonClicked" + // → emitTaskCompleted → return from tool), but we ALSO need to abort the + // task to prevent the outer while-loop from sending another API request. + // resume_completed_task/resume_task must NOT be approved to prevent restarts. + if ((state as Record).multiOrchForceApproveAll === true) { + if (ask === "resume_completed_task" || ask === "resume_task") { + console.log(`[checkAutoApproval] multiOrchForceApproveAll=true but ask="${ask}" is a resume — NOT auto-approving`) + return { decision: "ask" } + } + console.log(`[checkAutoApproval] multiOrchForceApproveAll=true → auto-approving ask="${ask}"`) + return { decision: "approve" } + } + if (ask === "followup") { if (state.alwaysAllowFollowupQuestions === true) { try { diff --git a/src/core/memory/__tests__/e2e.spec.ts b/src/core/memory/__tests__/e2e.spec.ts new file mode 100644 index 00000000000..d4bc824287b --- /dev/null +++ b/src/core/memory/__tests__/e2e.spec.ts @@ -0,0 +1,940 @@ +import * as path from "path" +import * as os from "os" +import * as fs from "fs" +import { MemoryStore } from "../memory-store" +import { preprocessMessages } from "../preprocessor" +import { processObservations, containsPII, jaccardSimilarity } from "../memory-writer" +import { compileMemoryPrompt, compileMemoryForAgent } from "../prompt-compiler" +import { computeScore } from "../scoring" +import type { Observation, MemoryCategorySlug, ScoredMemoryEntry } from "../types" +import { MEMORY_CONSTANTS, DEFAULT_MEMORY_CATEGORIES } from "../types" + +// --------------------------------------------------------------------------- +// Helpers +// --------------------------------------------------------------------------- + +function makeStore(): { store: MemoryStore; tmpDir: string } { + const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "memory-e2e-")) + return { store: new MemoryStore(tmpDir), tmpDir } +} + +const NOW = Math.floor(Date.now() / 1000) + +function daysAgo(days: number): number { + return NOW - days * 86400 +} + +function makeEntry(overrides: Partial[0]> = {}) { + return { + workspaceId: null as string | null, + category: "coding-style" as MemoryCategorySlug, + content: "Prefers TypeScript over JavaScript", + significance: 0.8, + firstSeen: NOW, + lastReinforced: NOW, + reinforcementCount: 1, + decayRate: 0.05, + sourceTaskId: null as string | null, + isPinned: false, + ...overrides, + } +} + +// --------------------------------------------------------------------------- +// 1. Full Pipeline — preprocessor → mock analysis → writer → compiler +// --------------------------------------------------------------------------- +describe("E2E: Full Pipeline (mock LLM)", () => { + let store: MemoryStore + let tmpDir: string + + beforeEach(async () => { + ;({ store, tmpDir } = makeStore()) + await store.init() + }) + + afterEach(() => { + store.close() + fs.rmSync(tmpDir, { recursive: true, force: true }) + }) + + it("should flow from raw messages through to compiled prompt", () => { + // --- Step 1: Preprocess raw messages --- + const rawMessages = [ + { role: "user", content: "I always use TypeScript with strict mode. Never plain JS." }, + { + role: "assistant", + content: [ + { type: "text", text: "Got it — I'll use TypeScript with strict mode." }, + { type: "tool_use", id: "t1", name: "read_file", input: { path: "tsconfig.json" } }, + ], + }, + { role: "user", content: "I prefer functional React components with hooks, not classes." }, + { + role: "assistant", + content: "Understood, I'll stick with functional components and hooks.", + }, + { role: "user", content: "Keep responses concise. No over-explaining." }, + ] + + const preprocessed = preprocessMessages(rawMessages) + expect(preprocessed.cleaned).toContain("TypeScript with strict mode") + expect(preprocessed.cleaned).toContain("→ read: tsconfig.json") + expect(preprocessed.cleaned).toContain("functional React components") + expect(preprocessed.cleaned).toContain("concise") + expect(preprocessed.cleanedTokenEstimate).toBeLessThanOrEqual(preprocessed.originalTokenEstimate) + + // --- Step 2: Simulate LLM analysis output --- + const mockObservations: Observation[] = [ + { + action: "NEW", + category: "coding-style", + content: "Uses TypeScript with strict mode enabled, avoids plain JavaScript", + significance: 0.9, + existingEntryId: null, + reasoning: "Explicitly stated twice", + }, + { + action: "NEW", + category: "coding-style", + content: "Prefers functional React components with hooks over class components", + significance: 0.85, + existingEntryId: null, + reasoning: "Direct statement", + }, + { + action: "NEW", + category: "communication-prefs", + content: "Wants concise responses without over-explanation", + significance: 0.8, + existingEntryId: null, + reasoning: "Explicit request", + }, + ] + + // --- Step 3: Write observations to store --- + const writeResult = processObservations(store, mockObservations, null, "task-e2e-1") + expect(writeResult.entriesCreated).toBe(3) + expect(writeResult.entriesSkipped).toBe(0) + expect(store.getEntryCount()).toBe(3) + + // --- Step 4: Compile to system prompt --- + const scoredEntries = store.getScoredEntries(null) + expect(scoredEntries.length).toBe(3) + + const prose = compileMemoryPrompt(scoredEntries) + expect(prose).toContain("USER PROFILE & PREFERENCES") + expect(prose).toContain("Learned through conversation") + expect(prose).toContain("TypeScript with strict mode") + expect(prose).toContain("functional React components") + expect(prose).toContain("concise responses") + + // --- Step 5: Agent-format compilation (with IDs) --- + const agentReport = compileMemoryForAgent(scoredEntries) + expect(agentReport).toContain("coding-style") + expect(agentReport).toContain("communication-prefs") + // Each line should have [id] category (score: X.XX): content format + for (const entry of scoredEntries) { + expect(agentReport).toContain(`[${entry.id}]`) + } + }) + + it("should handle multi-turn conversation with reinforcement", () => { + // Round 1: initial observations + const round1: Observation[] = [ + { + action: "NEW", + category: "coding-style", + content: "Prefers functional React components with hooks", + significance: 0.85, + existingEntryId: null, + reasoning: "First mention", + }, + ] + const r1 = processObservations(store, round1, null, "task-1") + expect(r1.entriesCreated).toBe(1) + + // Round 2: LLM sees existing memory, sends REINFORCE + const entries = store.getScoredEntries(null) + const targetId = entries[0].id + const round2: Observation[] = [ + { + action: "REINFORCE", + category: "coding-style", + content: "Prefers functional React components with hooks", + significance: 0.85, + existingEntryId: targetId, + reasoning: "Confirmed again", + }, + ] + const r2 = processObservations(store, round2, null, "task-2") + expect(r2.entriesReinforced).toBe(1) + expect(store.getEntryCount()).toBe(1) // still 1 + + // Verify reinforcement count bumped + const updated = store.getEntry(targetId)! + expect(updated.reinforcementCount).toBe(2) + }) + + it("should handle UPDATE action replacing content", () => { + const initial: Observation[] = [ + { + action: "NEW", + category: "tool-preferences", + content: "Uses ESLint for linting", + significance: 0.7, + existingEntryId: null, + reasoning: "Seen in config", + }, + ] + processObservations(store, initial, null, "task-1") + const id = store.getScoredEntries(null)[0].id + + const update: Observation[] = [ + { + action: "UPDATE", + category: "tool-preferences", + content: "Switched from ESLint to Biome for linting and formatting", + significance: 0.75, + existingEntryId: id, + reasoning: "User explicitly changed tooling", + }, + ] + const r = processObservations(store, update, null, "task-2") + expect(r.entriesReinforced).toBe(1) + + const entry = store.getEntry(id)! + expect(entry.content).toBe("Switched from ESLint to Biome for linting and formatting") + expect(entry.significance).toBe(0.75) + expect(entry.reinforcementCount).toBe(2) + }) +}) + +// --------------------------------------------------------------------------- +// 2. Scoring Lifecycle — insert, score ordering, garbage collection, cap +// --------------------------------------------------------------------------- +describe("E2E: Scoring Lifecycle", () => { + let store: MemoryStore + let tmpDir: string + + beforeEach(async () => { + ;({ store, tmpDir } = makeStore()) + await store.init() + }) + + afterEach(() => { + store.close() + fs.rmSync(tmpDir, { recursive: true, force: true }) + }) + + it("should return entries in descending score order", () => { + // High-significance, recently reinforced → high score + store.insertEntry( + makeEntry({ + content: "High scorer", + significance: 0.95, + reinforcementCount: 5, + lastReinforced: NOW, + decayRate: 0.05, + }), + ) + + // Medium + store.insertEntry( + makeEntry({ + content: "Medium scorer", + significance: 0.6, + reinforcementCount: 2, + lastReinforced: daysAgo(10), + decayRate: 0.1, + }), + ) + + // Low — old, decayed + store.insertEntry( + makeEntry({ + content: "Low scorer", + significance: 0.4, + reinforcementCount: 1, + lastReinforced: daysAgo(60), + decayRate: 0.15, + }), + ) + + const scored = store.getScoredEntries(null) + expect(scored.length).toBeGreaterThanOrEqual(2) + + // First entry should be the highest scorer + expect(scored[0].content).toBe("High scorer") + + // Scores should be in descending order + for (let i = 1; i < scored.length; i++) { + expect(scored[i - 1].computedScore).toBeGreaterThanOrEqual(scored[i].computedScore) + } + }) + + it("should garbage collect old low-score entries", () => { + // Entry that should survive: recent, high score + store.insertEntry( + makeEntry({ + content: "Survivor", + significance: 0.9, + reinforcementCount: 5, + lastReinforced: NOW, + }), + ) + + // Entry that should be GC'd: old, low significance, high decay + store.insertEntry( + makeEntry({ + content: "Doomed", + significance: 0.2, + reinforcementCount: 1, + lastReinforced: daysAgo(120), + decayRate: 0.3, + category: "active-projects", + }), + ) + + expect(store.getEntryCount()).toBe(2) + const deleted = store.garbageCollect() + expect(deleted).toBe(1) + expect(store.getEntryCount()).toBe(1) + + // The survivor should still be there + const remaining = store.getScoredEntries(null) + expect(remaining[0].content).toBe("Survivor") + }) + + it("should enforce the 500-entry hard cap", () => { + // Insert 505 entries — oldest/lowest score ones should get pruned + for (let i = 0; i < 505; i++) { + store.insertEntry( + makeEntry({ + content: `Entry number ${i}`, + significance: i < 5 ? 0.1 : 0.8, // First 5 are low significance + reinforcementCount: 1, + lastReinforced: i < 5 ? daysAgo(100) : NOW, // First 5 are old + decayRate: i < 5 ? 0.3 : 0.05, + }), + ) + } + + expect(store.getEntryCount()).toBe(505) + const deleted = store.garbageCollect() + expect(deleted).toBeGreaterThanOrEqual(5) // At least 5 must go + expect(store.getEntryCount()).toBeLessThanOrEqual(MEMORY_CONSTANTS.MAX_ENTRIES) + }) + + it("should not garbage collect pinned entries even if old/low-score", () => { + store.insertEntry( + makeEntry({ + content: "Pinned forever", + significance: 0.2, + reinforcementCount: 1, + lastReinforced: daysAgo(200), + decayRate: 0.3, + isPinned: true, + }), + ) + + const deleted = store.garbageCollect() + expect(deleted).toBe(0) + expect(store.getEntryCount()).toBe(1) + }) + + it("should filter entries below the score threshold from getScoredEntries", () => { + // A very old, very decayed entry should fall below 0.05 threshold + store.insertEntry( + makeEntry({ + content: "Ancient entry", + significance: 0.1, + reinforcementCount: 1, + lastReinforced: daysAgo(365), + decayRate: 0.3, + }), + ) + + const scored = store.getScoredEntries(null) + // Should be excluded due to score < 0.05 + expect(scored.length).toBe(0) + }) +}) + +// --------------------------------------------------------------------------- +// 3. Workspace Scoping — global vs workspace entries +// --------------------------------------------------------------------------- +describe("E2E: Workspace Scoping", () => { + let store: MemoryStore + let tmpDir: string + + const WORKSPACE_A = "ws-alpha-1234" + const WORKSPACE_B = "ws-beta-5678" + + beforeEach(async () => { + ;({ store, tmpDir } = makeStore()) + await store.init() + }) + + afterEach(() => { + store.close() + fs.rmSync(tmpDir, { recursive: true, force: true }) + }) + + it("should include global entries in all workspace queries", () => { + // Global entry (workspaceId = null) + store.insertEntry( + makeEntry({ + content: "Global: Prefers TypeScript", + workspaceId: null, + }), + ) + + // Workspace A entry + store.insertEntry( + makeEntry({ + content: "WS-A: Working on the API redesign", + workspaceId: WORKSPACE_A, + category: "active-projects", + }), + ) + + // Query with workspace A — should see both global + workspace A + const wsAEntries = store.getScoredEntries(WORKSPACE_A) + const wsAContents = wsAEntries.map((e) => e.content) + expect(wsAContents).toContain("Global: Prefers TypeScript") + expect(wsAContents).toContain("WS-A: Working on the API redesign") + + // Query with workspace B — should only see global + const wsBEntries = store.getScoredEntries(WORKSPACE_B) + const wsBContents = wsBEntries.map((e) => e.content) + expect(wsBContents).toContain("Global: Prefers TypeScript") + expect(wsBContents).not.toContain("WS-A: Working on the API redesign") + + // Query with null workspace — should only see global + const globalEntries = store.getScoredEntries(null) + const globalContents = globalEntries.map((e) => e.content) + expect(globalContents).toContain("Global: Prefers TypeScript") + expect(globalContents).not.toContain("WS-A: Working on the API redesign") + }) + + it("should scope active-projects observations to their workspace", () => { + const obs: Observation[] = [ + { + action: "NEW", + category: "active-projects", + content: "Building a real-time dashboard with WebSockets", + significance: 0.7, + existingEntryId: null, + reasoning: "Mentioned in conversation", + }, + ] + + processObservations(store, obs, WORKSPACE_A, "task-1") + const entry = store.getEntry(store.getScoredEntries(WORKSPACE_A)[0].id)! + expect(entry.workspaceId).toBe(WORKSPACE_A) + }) + + it("should scope coding-style and communication-prefs globally", () => { + const obs: Observation[] = [ + { + action: "NEW", + category: "coding-style", + content: "Strongly prefers const over let", + significance: 0.8, + existingEntryId: null, + reasoning: "test", + }, + { + action: "NEW", + category: "communication-prefs", + content: "Likes numbered steps in responses", + significance: 0.75, + existingEntryId: null, + reasoning: "test", + }, + ] + + processObservations(store, obs, WORKSPACE_A, "task-1") + + // Both should be globally visible + const wsA = store.getScoredEntries(WORKSPACE_A) + const wsB = store.getScoredEntries(WORKSPACE_B) + const global = store.getScoredEntries(null) + + expect(wsA.length).toBe(2) + expect(wsB.length).toBe(2) + expect(global.length).toBe(2) + }) + + it("should keep workspace entries isolated between different workspaces", () => { + // Insert workspace-scoped entries for two different workspaces + store.insertEntry( + makeEntry({ + content: "Project Alpha backend migration", + workspaceId: WORKSPACE_A, + category: "active-projects", + }), + ) + store.insertEntry( + makeEntry({ + content: "Project Beta frontend redesign", + workspaceId: WORKSPACE_B, + category: "active-projects", + }), + ) + + const wsA = store.getScoredEntries(WORKSPACE_A) + const wsB = store.getScoredEntries(WORKSPACE_B) + + expect(wsA.map((e) => e.content)).toContain("Project Alpha backend migration") + expect(wsA.map((e) => e.content)).not.toContain("Project Beta frontend redesign") + + expect(wsB.map((e) => e.content)).toContain("Project Beta frontend redesign") + expect(wsB.map((e) => e.content)).not.toContain("Project Alpha backend migration") + }) +}) + +// --------------------------------------------------------------------------- +// 4. PII Rejection +// --------------------------------------------------------------------------- +describe("E2E: PII Rejection", () => { + let store: MemoryStore + let tmpDir: string + + beforeEach(async () => { + ;({ store, tmpDir } = makeStore()) + await store.init() + }) + + afterEach(() => { + store.close() + fs.rmSync(tmpDir, { recursive: true, force: true }) + }) + + it("should reject observations containing email addresses", () => { + const obs: Observation[] = [ + { + action: "NEW", + category: "coding-style", + content: "User email is developer@company.com and prefers React", + significance: 0.8, + existingEntryId: null, + reasoning: "test", + }, + ] + const result = processObservations(store, obs, null, "task-1") + expect(result.entriesSkipped).toBe(1) + expect(result.entriesCreated).toBe(0) + expect(store.getEntryCount()).toBe(0) + }) + + it("should reject observations containing OpenAI API keys", () => { + const obs: Observation[] = [ + { + action: "NEW", + category: "tool-preferences", + content: "Uses API key sk-abcdefghij1234567890abcdefghij", + significance: 0.6, + existingEntryId: null, + reasoning: "test", + }, + ] + const result = processObservations(store, obs, null, "task-1") + expect(result.entriesSkipped).toBe(1) + expect(store.getEntryCount()).toBe(0) + }) + + it("should reject observations containing GitHub PATs", () => { + const obs: Observation[] = [ + { + action: "NEW", + category: "tool-preferences", + content: "GitHub token is ghp_abcdefghijklmnopqrstuvwxyz1234567890", + significance: 0.6, + existingEntryId: null, + reasoning: "test", + }, + ] + const result = processObservations(store, obs, null, "task-1") + expect(result.entriesSkipped).toBe(1) + expect(store.getEntryCount()).toBe(0) + }) + + it("should reject observations containing phone numbers", () => { + const obs: Observation[] = [ + { + action: "NEW", + category: "communication-prefs", + content: "Contact number is 555-123-4567", + significance: 0.5, + existingEntryId: null, + reasoning: "test", + }, + ] + const result = processObservations(store, obs, null, "task-1") + expect(result.entriesSkipped).toBe(1) + expect(store.getEntryCount()).toBe(0) + }) + + it("should reject observations containing SSN patterns", () => { + const obs: Observation[] = [ + { + action: "NEW", + category: "behavioral-patterns", + content: "SSN is 123-45-6789", + significance: 0.5, + existingEntryId: null, + reasoning: "test", + }, + ] + const result = processObservations(store, obs, null, "task-1") + expect(result.entriesSkipped).toBe(1) + expect(store.getEntryCount()).toBe(0) + }) + + it("should reject observations containing AWS access keys", () => { + const obs: Observation[] = [ + { + action: "NEW", + category: "tool-preferences", + content: "AWS key AKIAIOSFODNN7EXAMPLE", + significance: 0.6, + existingEntryId: null, + reasoning: "test", + }, + ] + const result = processObservations(store, obs, null, "task-1") + expect(result.entriesSkipped).toBe(1) + expect(store.getEntryCount()).toBe(0) + }) + + it("should reject observations containing private keys", () => { + const obs: Observation[] = [ + { + action: "NEW", + category: "coding-style", + content: "Has -----BEGIN RSA PRIVATE KEY----- in repo", + significance: 0.5, + existingEntryId: null, + reasoning: "test", + }, + ] + const result = processObservations(store, obs, null, "task-1") + expect(result.entriesSkipped).toBe(1) + expect(store.getEntryCount()).toBe(0) + }) + + it("should accept clean observations alongside rejecting PII ones", () => { + const obs: Observation[] = [ + { + action: "NEW", + category: "coding-style", + content: "Prefers TypeScript strict mode", + significance: 0.9, + existingEntryId: null, + reasoning: "clean", + }, + { + action: "NEW", + category: "communication-prefs", + content: "User email is john@corp.com and likes detailed explanations", + significance: 0.8, + existingEntryId: null, + reasoning: "has PII", + }, + { + action: "NEW", + category: "dislikes-frustrations", + content: "Dislikes verbose error messages", + significance: 0.7, + existingEntryId: null, + reasoning: "clean", + }, + ] + const result = processObservations(store, obs, null, "task-1") + expect(result.entriesCreated).toBe(2) // two clean ones + expect(result.entriesSkipped).toBe(1) // one PII + expect(store.getEntryCount()).toBe(2) + }) + + it("containsPII should not flag normal technical content", () => { + expect(containsPII("Uses React 18 with concurrent features")).toBe(false) + expect(containsPII("Prefers ESLint + Prettier workflow")).toBe(false) + expect(containsPII("Dislikes tabs, prefers 2-space indentation")).toBe(false) + expect(containsPII("Working on src/auth/login.ts")).toBe(false) + }) +}) + +// --------------------------------------------------------------------------- +// 5. Deduplication +// --------------------------------------------------------------------------- +describe("E2E: Deduplication", () => { + let store: MemoryStore + let tmpDir: string + + beforeEach(async () => { + ;({ store, tmpDir } = makeStore()) + await store.init() + }) + + afterEach(() => { + store.close() + fs.rmSync(tmpDir, { recursive: true, force: true }) + }) + + it("should convert near-duplicate NEW observations into REINFORCE", () => { + const round1: Observation[] = [ + { + action: "NEW", + category: "coding-style", + content: "Prefers functional React components with hooks", + significance: 0.85, + existingEntryId: null, + reasoning: "First mention", + }, + ] + processObservations(store, round1, null, "task-1") + expect(store.getEntryCount()).toBe(1) + + // Very similar observation — should be deduped + const round2: Observation[] = [ + { + action: "NEW", + category: "coding-style", + content: "Prefers functional React components with hooks pattern", + significance: 0.9, + existingEntryId: null, + reasoning: "Second mention with slight wording change", + }, + ] + const result = processObservations(store, round2, null, "task-2") + expect(result.entriesReinforced).toBe(1) + expect(result.entriesCreated).toBe(0) + expect(store.getEntryCount()).toBe(1) + + // Reinforcement count should have bumped + const entries = store.getScoredEntries(null) + expect(entries[0].reinforcementCount).toBe(2) + }) + + it("should NOT deduplicate sufficiently different observations", () => { + const round1: Observation[] = [ + { + action: "NEW", + category: "coding-style", + content: "Prefers functional React components with hooks", + significance: 0.85, + existingEntryId: null, + reasoning: "test", + }, + ] + processObservations(store, round1, null, "task-1") + + // Completely different observation in same category + const round2: Observation[] = [ + { + action: "NEW", + category: "coding-style", + content: "Uses Tailwind CSS for styling instead of CSS modules", + significance: 0.7, + existingEntryId: null, + reasoning: "test", + }, + ] + const result = processObservations(store, round2, null, "task-2") + expect(result.entriesCreated).toBe(1) + expect(result.entriesReinforced).toBe(0) + expect(store.getEntryCount()).toBe(2) + }) + + it("should deduplicate across multiple rounds", () => { + const base: Observation[] = [ + { + action: "NEW", + category: "communication-prefs", + content: "Prefers concise direct responses without fluff always", + significance: 0.8, + existingEntryId: null, + reasoning: "test", + }, + ] + processObservations(store, base, null, "task-1") + + // Round 2: slightly reworded — keeps most words the same for Jaccard ≥ 0.6 + processObservations( + store, + [ + { + action: "NEW", + category: "communication-prefs", + content: "Prefers concise direct responses without fluff pattern", + significance: 0.82, + existingEntryId: null, + reasoning: "test", + }, + ], + null, + "task-2", + ) + + // Round 3: another slight variation — still high Jaccard with the stored entry + processObservations( + store, + [ + { + action: "NEW", + category: "communication-prefs", + content: "Prefers concise direct responses without fluff style", + significance: 0.85, + existingEntryId: null, + reasoning: "test", + }, + ], + null, + "task-3", + ) + + // Should still be just 1 entry, reinforced 3 times total + expect(store.getEntryCount()).toBe(1) + const entries = store.getScoredEntries(null) + expect(entries[0].reinforcementCount).toBe(3) + }) + + it("should handle REINFORCE with invalid entry ID gracefully", () => { + const obs: Observation[] = [ + { + action: "REINFORCE", + category: "coding-style", + content: "Uses TypeScript", + significance: 0.8, + existingEntryId: "nonexistent-uuid-12345", + reasoning: "LLM hallucinated this ID", + }, + ] + const result = processObservations(store, obs, null, "task-1") + expect(result.entriesSkipped).toBe(1) + expect(store.getEntryCount()).toBe(0) // Nothing written + }) + + it("should handle UPDATE with invalid entry ID by treating as NEW with dedup", () => { + // Pre-populate a similar entry + store.insertEntry( + makeEntry({ + content: "Prefers Vitest for testing React components apps", + }), + ) + + const obs: Observation[] = [ + { + action: "UPDATE", + category: "coding-style", + content: "Prefers Vitest for testing React components patterns", + significance: 0.85, + existingEntryId: "bogus-id-that-doesnt-exist", + reasoning: "LLM hallucinated ID", + }, + ] + const result = processObservations(store, obs, null, "task-1") + // Should have found the similar entry via dedup and updated it + expect(result.entriesReinforced).toBe(1) + expect(result.entriesCreated).toBe(0) + expect(store.getEntryCount()).toBe(1) + }) + + it("jaccardSimilarity threshold should be 0.6", () => { + expect(MEMORY_CONSTANTS.DEDUP_SIMILARITY_THRESHOLD).toBe(0.6) + + // Just above threshold — considered duplicate + const highSim = jaccardSimilarity( + "Prefers functional React components with hooks", + "Prefers functional React components using hooks pattern", + ) + expect(highSim).toBeGreaterThanOrEqual(0.6) + + // Just below threshold — considered distinct + const lowSim = jaccardSimilarity( + "Prefers functional React components with hooks", + "Uses Tailwind CSS for styling applications", + ) + expect(lowSim).toBeLessThan(0.6) + }) +}) + +// --------------------------------------------------------------------------- +// 6. Data persistence across store reopens +// --------------------------------------------------------------------------- +describe("E2E: Persistence", () => { + it("should survive store close and reopen", async () => { + const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "memory-persist-")) + + // Session 1: write data + const store1 = new MemoryStore(tmpDir) + await store1.init() + store1.insertEntry( + makeEntry({ content: "Persisted entry alpha" }), + ) + store1.insertEntry( + makeEntry({ content: "Persisted entry beta", category: "communication-prefs" }), + ) + expect(store1.getEntryCount()).toBe(2) + store1.close() + + // Session 2: reopen, verify data intact + const store2 = new MemoryStore(tmpDir) + await store2.init() + expect(store2.getEntryCount()).toBe(2) + + const scored = store2.getScoredEntries(null) + const contents = scored.map((e) => e.content) + expect(contents).toContain("Persisted entry alpha") + expect(contents).toContain("Persisted entry beta") + + store2.close() + fs.rmSync(tmpDir, { recursive: true, force: true }) + }) +}) + +// --------------------------------------------------------------------------- +// 7. Prompt compiler token cap +// --------------------------------------------------------------------------- +describe("E2E: Prompt Compiler Token Cap", () => { + let store: MemoryStore + let tmpDir: string + + beforeEach(async () => { + ;({ store, tmpDir } = makeStore()) + await store.init() + }) + + afterEach(() => { + store.close() + fs.rmSync(tmpDir, { recursive: true, force: true }) + }) + + it("should respect the 2000-token cap (header included)", () => { + // Insert a lot of entries to exceed the token budget + for (let i = 0; i < 40; i++) { + store.insertEntry( + makeEntry({ + content: `This is a moderately long observation number ${i} about user coding preferences and behavioral patterns that should contribute meaningful tokens to the output`, + significance: 0.8, + reinforcementCount: 3, + category: (["coding-style", "communication-prefs", "technical-proficiency", "tool-preferences"] as MemoryCategorySlug[])[i % 4], + }), + ) + } + + const entries = store.getScoredEntries(null) + const prose = compileMemoryPrompt(entries) + + // Total output (header + prose) must be within the token cap + const tokenEstimate = Math.ceil(prose.length / 4) + expect(tokenEstimate).toBeLessThanOrEqual(MEMORY_CONSTANTS.PROMPT_TOKEN_CAP) + }) + + it("should return empty string when no entries exist", () => { + const entries = store.getScoredEntries(null) + const prose = compileMemoryPrompt(entries) + expect(prose).toBe("") + }) +}) diff --git a/src/core/memory/__tests__/memory-writer.spec.ts b/src/core/memory/__tests__/memory-writer.spec.ts new file mode 100644 index 00000000000..cba7230930c --- /dev/null +++ b/src/core/memory/__tests__/memory-writer.spec.ts @@ -0,0 +1,47 @@ +import { containsPII, jaccardSimilarity } from "../memory-writer" + +describe("containsPII", () => { + it("should detect email addresses", () => { + expect(containsPII("User email is john@example.com")).toBe(true) + }) + + it("should detect OpenAI API keys", () => { + expect(containsPII("Uses key sk-abcdefghijklmnopqrstuvwxyz1234")).toBe(true) + }) + + it("should detect GitHub PATs", () => { + expect(containsPII("Token ghp_abcdefghijklmnopqrstuvwxyz1234567890")).toBe(true) + }) + + it("should not flag normal coding preferences", () => { + expect(containsPII("Prefers TypeScript over JavaScript")).toBe(false) + }) + + it("should not flag file paths", () => { + expect(containsPII("Frequently edits src/auth/login.ts")).toBe(false) + }) +}) + +describe("jaccardSimilarity", () => { + it("should return 1.0 for identical strings", () => { + expect(jaccardSimilarity("prefers typescript", "prefers typescript")).toBeCloseTo(1.0) + }) + + it("should return 0.0 for completely different strings", () => { + expect(jaccardSimilarity("cats dogs birds", "alpha beta gamma")).toBeCloseTo(0.0) + }) + + it("should return high similarity for near-duplicates", () => { + const sim = jaccardSimilarity( + "Prefers functional React components", + "Prefers functional React component patterns", + ) + expect(sim).toBeGreaterThanOrEqual(0.5) + }) + + it("should ignore short words (≤2 chars)", () => { + const sim = jaccardSimilarity("I am a good coder", "I am a bad coder") + // "I", "am", "a" are filtered, so it's {good, coder} vs {bad, coder} + expect(sim).toBeLessThan(1.0) + }) +}) diff --git a/src/core/memory/__tests__/orchestrator.spec.ts b/src/core/memory/__tests__/orchestrator.spec.ts new file mode 100644 index 00000000000..e011c5ca7c2 --- /dev/null +++ b/src/core/memory/__tests__/orchestrator.spec.ts @@ -0,0 +1,317 @@ +import { MemoryStore } from "../memory-store" +import { MemoryOrchestrator } from "../orchestrator" +import { preprocessMessages } from "../preprocessor" +import { processObservations } from "../memory-writer" +import { compileMemoryPrompt } from "../prompt-compiler" +import type { Observation } from "../types" +import * as path from "path" +import * as os from "os" +import * as fs from "fs" + +describe("Memory System Integration", () => { + let store: MemoryStore + let tmpDir: string + + beforeEach(async () => { + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "memory-test-")) + store = new MemoryStore(tmpDir) + await store.init() + }) + + afterEach(() => { + store.close() + fs.rmSync(tmpDir, { recursive: true, force: true }) + }) + + it("should persist entries across store instances", async () => { + store.insertEntry({ + workspaceId: null, + category: "coding-style", + content: "Prefers TypeScript", + significance: 0.9, + firstSeen: 1000, + lastReinforced: 1000, + reinforcementCount: 1, + decayRate: 0.05, + sourceTaskId: null, + isPinned: false, + }) + store.close() + + const store2 = new MemoryStore(tmpDir) + await store2.init() + expect(store2.getEntryCount()).toBe(1) + store2.close() + }) + + it("should process observations end-to-end", () => { + const observations: Observation[] = [ + { + action: "NEW", + category: "coding-style", + content: "Prefers TypeScript over JavaScript", + significance: 0.9, + existingEntryId: null, + reasoning: "Explicitly stated preference", + }, + { + action: "NEW", + category: "communication-prefs", + content: "Likes concise, direct responses", + significance: 0.85, + existingEntryId: null, + reasoning: "Expressed multiple times", + }, + ] + + const result = processObservations(store, observations, null, "task-1") + expect(result.entriesCreated).toBe(2) + expect(store.getEntryCount()).toBe(2) + }) + + it("should compile entries into prose with correct header", () => { + store.insertEntry({ + workspaceId: null, + category: "coding-style", + content: "Prefers TypeScript", + significance: 0.9, + firstSeen: Math.floor(Date.now() / 1000), + lastReinforced: Math.floor(Date.now() / 1000), + reinforcementCount: 5, + decayRate: 0.05, + sourceTaskId: null, + isPinned: false, + }) + + const entries = store.getScoredEntries(null) + expect(entries.length).toBeGreaterThan(0) + const prose = compileMemoryPrompt(entries) + expect(prose).toContain("USER PROFILE & PREFERENCES") + expect(prose).toContain("Prefers TypeScript") + }) + + it("should preprocess messages and reduce token count", () => { + const messages = [ + { role: "user", content: [{ type: "text", text: "Fix the auth bug" }] }, + { + role: "assistant", + content: [ + { type: "text", text: "I'll check the auth module." }, + { type: "tool_use", id: "1", name: "read_file", input: { path: "src/auth.ts" } }, + ], + }, + ] + + const result = preprocessMessages(messages) + expect(result.cleaned).toContain("Fix the auth bug") + expect(result.cleaned).toContain("→ read: src/auth.ts") + expect(result.cleanedTokenEstimate).toBeLessThanOrEqual(result.originalTokenEstimate) + }) + + it("should garbage collect old low-score entries", async () => { + const oldTimestamp = Math.floor(Date.now() / 1000) - 100 * 86400 + + store.insertEntry({ + workspaceId: null, + category: "active-projects", + content: "Working on legacy migration", + significance: 0.3, + firstSeen: oldTimestamp, + lastReinforced: oldTimestamp, + reinforcementCount: 1, + decayRate: 0.3, + sourceTaskId: null, + isPinned: false, + }) + + expect(store.getEntryCount()).toBe(1) + const deleted = store.garbageCollect() + expect(deleted).toBe(1) + expect(store.getEntryCount()).toBe(0) + }) + + it("should deduplicate similar observations", () => { + // Insert initial entry + const obs1: Observation[] = [ + { + action: "NEW", + category: "coding-style", + content: "Prefers functional React components with hooks", + significance: 0.8, + existingEntryId: null, + reasoning: "test", + }, + ] + processObservations(store, obs1, null, "task-1") + expect(store.getEntryCount()).toBe(1) + + // Try inserting a similar entry — should be deduped into a reinforce + const obs2: Observation[] = [ + { + action: "NEW", + category: "coding-style", + content: "Prefers functional React components with hooks pattern", + significance: 0.85, + existingEntryId: null, + reasoning: "test", + }, + ] + const result = processObservations(store, obs2, null, "task-2") + expect(result.entriesReinforced).toBe(1) + expect(result.entriesCreated).toBe(0) + expect(store.getEntryCount()).toBe(1) // Still just 1 entry + }) + + it("should reject PII-containing observations", () => { + const obs: Observation[] = [ + { + action: "NEW", + category: "coding-style", + content: "User email is john@example.com and prefers TypeScript", + significance: 0.8, + existingEntryId: null, + reasoning: "test", + }, + ] + const result = processObservations(store, obs, null, "task-1") + expect(result.entriesSkipped).toBe(1) + expect(result.entriesCreated).toBe(0) + expect(store.getEntryCount()).toBe(0) + }) +}) + +describe("clearAllMemory", () => { + let store: MemoryStore + let tmpDir: string + + beforeEach(async () => { + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "memory-clear-test-")) + store = new MemoryStore(tmpDir) + await store.init() + }) + + afterEach(() => { + store.close() + fs.rmSync(tmpDir, { recursive: true, force: true }) + }) + + it("should delete all entries", async () => { + // Insert several entries + store.insertEntry({ + workspaceId: null, + category: "coding-style", + content: "Prefers TypeScript", + significance: 0.9, + firstSeen: 1000, + lastReinforced: 1000, + reinforcementCount: 1, + decayRate: 0.05, + sourceTaskId: null, + isPinned: false, + }) + store.insertEntry({ + workspaceId: null, + category: "communication-prefs", + content: "Likes concise responses", + significance: 0.85, + firstSeen: 2000, + lastReinforced: 2000, + reinforcementCount: 1, + decayRate: 0.05, + sourceTaskId: null, + isPinned: false, + }) + store.insertEntry({ + workspaceId: null, + category: "tool-preferences", + content: "Uses VS Code with Vim keybindings", + significance: 0.7, + firstSeen: 3000, + lastReinforced: 3000, + reinforcementCount: 1, + decayRate: 0.12, + sourceTaskId: null, + isPinned: false, + }) + + // Verify entries were inserted + expect(store.getEntryCount()).toBe(3) + + // Clear all entries + store.deleteAllEntries() + + // Verify all entries are gone + expect(store.getEntryCount()).toBe(0) + }) + + it("should persist the cleared state", async () => { + // Insert entries + store.insertEntry({ + workspaceId: null, + category: "coding-style", + content: "Prefers functional components", + significance: 0.8, + firstSeen: 1000, + lastReinforced: 1000, + reinforcementCount: 1, + decayRate: 0.05, + sourceTaskId: null, + isPinned: false, + }) + store.insertEntry({ + workspaceId: null, + category: "active-projects", + content: "Working on memory system", + significance: 0.75, + firstSeen: 2000, + lastReinforced: 2000, + reinforcementCount: 1, + decayRate: 0.3, + sourceTaskId: null, + isPinned: false, + }) + + expect(store.getEntryCount()).toBe(2) + + // Delete all entries and close the store + store.deleteAllEntries() + expect(store.getEntryCount()).toBe(0) + store.close() + + // Reopen store on the same path + const store2 = new MemoryStore(tmpDir) + await store2.init() + + // Verify cleared state persisted across instances + expect(store2.getEntryCount()).toBe(0) + store2.close() + }) +}) + +describe("MemoryOrchestrator.onUserMessage", () => { + let orchestrator: MemoryOrchestrator + let tmpDir: string + + beforeEach(async () => { + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "memory-orch-test-")) + orchestrator = new MemoryOrchestrator(tmpDir, null) + await orchestrator.init() + }) + + afterEach(() => { + orchestrator.close() + fs.rmSync(tmpDir, { recursive: true, force: true }) + }) + + it("should skip analysis when provider settings is null", () => { + orchestrator.setEnabled(true) + const result = orchestrator.onUserMessage([], "task-1", null) + expect(result).toBe(false) + }) + + it("should skip analysis when not enabled", () => { + orchestrator.setEnabled(false) + const result = orchestrator.onUserMessage([], "task-1", { apiProvider: "openai" } as any) + expect(result).toBe(false) + }) +}) diff --git a/src/core/memory/__tests__/preprocessor.spec.ts b/src/core/memory/__tests__/preprocessor.spec.ts new file mode 100644 index 00000000000..5a2bf3e09cc --- /dev/null +++ b/src/core/memory/__tests__/preprocessor.spec.ts @@ -0,0 +1,109 @@ +import { preprocessMessages } from "../preprocessor" + +// Minimal ApiMessage mock shape matching Anthropic.MessageParam +interface MockMessage { + role: "user" | "assistant" + content: unknown +} + +const makeUserMsg = (text: string): MockMessage => ({ + role: "user" as const, + content: [{ type: "text", text }], +}) + +const makeAssistantMsg = (content: Record[]): MockMessage => ({ + role: "assistant" as const, + content, +}) + +describe("preprocessMessages", () => { + it("should keep user message text fully", () => { + const result = preprocessMessages([makeUserMsg("I prefer TypeScript")]) + expect(result.cleaned).toContain("I prefer TypeScript") + }) + + it("should keep assistant text blocks", () => { + const msg = makeAssistantMsg([ + { type: "text", text: "I'll update the auth component." }, + ]) + const result = preprocessMessages([msg]) + expect(result.cleaned).toContain("I'll update the auth component.") + }) + + it("should replace read_file tool_use with filename only", () => { + const msg = makeAssistantMsg([ + { type: "text", text: "Let me check that file." }, + { type: "tool_use", id: "1", name: "read_file", input: { path: "src/auth/Auth.tsx" } }, + ]) + const result = preprocessMessages([msg]) + expect(result.cleaned).toContain("→ read: src/auth/Auth.tsx") + expect(result.cleaned).not.toContain("tool_use") + }) + + it("should replace execute_command with command only", () => { + const msg = makeAssistantMsg([ + { type: "tool_use", id: "2", name: "execute_command", input: { command: "npm test" } }, + ]) + const result = preprocessMessages([msg]) + expect(result.cleaned).toContain("→ ran command: npm test") + }) + + it("should strip tool_result blocks entirely", () => { + const msg = makeAssistantMsg([ + { type: "tool_result", tool_use_id: "1", content: "200 lines of code..." }, + ]) + const result = preprocessMessages([msg]) + expect(result.cleaned).not.toContain("200 lines of code") + }) + + it("should strip base64 image data from user messages", () => { + const msg: MockMessage = { + role: "user" as const, + content: [ + { type: "image", source: { type: "base64", data: "abc123longdata..." } }, + { type: "text", text: "What does this show?" }, + ], + } + const result = preprocessMessages([msg]) + expect(result.cleaned).toContain("[image attached]") + expect(result.cleaned).toContain("What does this show?") + expect(result.cleaned).not.toContain("abc123longdata") + }) + + it("should strip code blocks longer than 3 lines from assistant messages", () => { + const msg = makeAssistantMsg([ + { + type: "text", + text: "Here's the code:\n```typescript\nline1\nline2\nline3\nline4\n```\nDone.", + }, + ]) + const result = preprocessMessages([msg]) + expect(result.cleaned).toContain("Here's the code:") + expect(result.cleaned).toContain("Done.") + expect(result.cleaned).not.toContain("line4") + }) + + it("should keep short code blocks (≤3 lines)", () => { + const msg = makeAssistantMsg([ + { type: "text", text: "Try: ```const x = 1``` like that." }, + ]) + const result = preprocessMessages([msg]) + expect(result.cleaned).toContain("const x = 1") + }) + + it("should return token estimates", () => { + const result = preprocessMessages([ + makeUserMsg("hello"), + makeAssistantMsg([{ type: "text", text: "hi there" }]), + ]) + expect(result.originalTokenEstimate).toBeGreaterThan(0) + expect(result.cleanedTokenEstimate).toBeGreaterThan(0) + expect(result.cleanedTokenEstimate).toBeLessThanOrEqual(result.originalTokenEstimate) + }) + + it("should handle empty message array", () => { + const result = preprocessMessages([]) + expect(result.cleaned).toBe("") + expect(result.cleanedTokenEstimate).toBe(0) + }) +}) diff --git a/src/core/memory/__tests__/prompt-compiler.spec.ts b/src/core/memory/__tests__/prompt-compiler.spec.ts new file mode 100644 index 00000000000..0844d62d02f --- /dev/null +++ b/src/core/memory/__tests__/prompt-compiler.spec.ts @@ -0,0 +1,99 @@ +import { compileMemoryPrompt, compileMemoryForAgent } from "../prompt-compiler" +import type { ScoredMemoryEntry, MemoryCategorySlug } from "../types" +import { MEMORY_CONSTANTS } from "../types" + +const makeScoredEntry = ( + category: string, + content: string, + score: number, + label: string = "Test", +): ScoredMemoryEntry => ({ + id: `test-${Math.random().toString(36).slice(2)}`, + workspaceId: null, + category: category as MemoryCategorySlug, + content, + significance: 0.8, + firstSeen: 1000, + lastReinforced: 2000, + reinforcementCount: 3, + decayRate: 0.05, + sourceTaskId: null, + isPinned: false, + computedScore: score, + categoryLabel: label, +}) + +describe("compileMemoryPrompt", () => { + it("should return empty string for no entries", () => { + expect(compileMemoryPrompt([])).toBe("") + }) + + it("should include USER PROFILE header", () => { + const entries = [makeScoredEntry("coding-style", "Prefers TypeScript", 0.9, "Coding Style")] + const result = compileMemoryPrompt(entries) + expect(result).toContain("USER PROFILE & PREFERENCES") + }) + + it("should group entries by category", () => { + const entries = [ + makeScoredEntry("coding-style", "Prefers TypeScript", 0.9, "Coding Style"), + makeScoredEntry("coding-style", "Uses React hooks", 0.8, "Coding Style"), + makeScoredEntry("communication-prefs", "Likes concise responses", 0.85, "Communication Preferences"), + ] + const result = compileMemoryPrompt(entries) + expect(result).toContain("Coding Style:") + expect(result).toContain("Communication Preferences:") + }) + + it("should omit empty categories", () => { + const entries = [makeScoredEntry("coding-style", "Prefers TypeScript", 0.9, "Coding Style")] + const result = compileMemoryPrompt(entries) + expect(result).not.toContain("Communication Preferences:") + }) + + it("should join multiple entries in same category with periods", () => { + const entries = [ + makeScoredEntry("coding-style", "Prefers TypeScript", 0.9, "Coding Style"), + makeScoredEntry("coding-style", "Uses React hooks", 0.8, "Coding Style"), + ] + const result = compileMemoryPrompt(entries) + expect(result).toContain("Prefers TypeScript. Uses React hooks.") + }) + + it("should respect token cap by dropping lowest-priority sections", () => { + // Create many entries to exceed the token cap + const entries: ScoredMemoryEntry[] = [] + for (let i = 0; i < 100; i++) { + entries.push( + makeScoredEntry( + "coding-style", + `This is a very long preference statement number ${i} that contains lots of words to inflate the token count significantly`, + 0.9 - i * 0.001, + `Category ${i}`, + ), + ) + } + const result = compileMemoryPrompt(entries) + const estimatedTokens = Math.ceil(result.length / 4) + expect(estimatedTokens).toBeLessThanOrEqual(MEMORY_CONSTANTS.PROMPT_TOKEN_CAP) + }) +}) + +describe("compileMemoryForAgent", () => { + it("should include entry IDs", () => { + const entry = makeScoredEntry("coding-style", "Prefers TypeScript", 0.9, "Coding Style") + const result = compileMemoryForAgent([entry]) + expect(result).toContain(entry.id) + }) + + it("should include scores", () => { + const entries = [makeScoredEntry("coding-style", "Prefers TS", 0.87, "Coding Style")] + const result = compileMemoryForAgent(entries) + expect(result).toContain("0.87") + }) + + it("should return placeholder for empty entries", () => { + const result = compileMemoryForAgent([]) + expect(result).toContain("No existing memory entries") + }) +}) diff --git a/src/core/memory/__tests__/scoring.spec.ts b/src/core/memory/__tests__/scoring.spec.ts new file mode 100644 index 00000000000..9d34138d962 --- /dev/null +++ b/src/core/memory/__tests__/scoring.spec.ts @@ -0,0 +1,77 @@ +import { computeScore, reinforcementBonus, temporalDecay } from "../scoring" + +describe("reinforcementBonus", () => { + it("should return ~1.0 for count of 1", () => { + expect(reinforcementBonus(1)).toBeCloseTo(1.0, 1) + }) + + it("should increase with higher counts", () => { + expect(reinforcementBonus(4)).toBeGreaterThan(reinforcementBonus(2)) + }) + + it("should cap at 3.0", () => { + expect(reinforcementBonus(100)).toBeLessThanOrEqual(3.0) + expect(reinforcementBonus(1000)).toBeLessThanOrEqual(3.0) + }) +}) + +describe("temporalDecay", () => { + it("should return 1.0 for 0 days", () => { + expect(temporalDecay(0, 0.1)).toBeCloseTo(1.0) + }) + + it("should decrease over time", () => { + expect(temporalDecay(30, 0.1)).toBeLessThan(temporalDecay(10, 0.1)) + }) + + it("should decay faster with higher decay rate", () => { + expect(temporalDecay(10, 0.3)).toBeLessThan(temporalDecay(10, 0.05)) + }) + + it("should approach 0 for very old entries with high decay", () => { + expect(temporalDecay(365, 0.3)).toBeLessThan(0.001) + }) +}) + +describe("computeScore", () => { + it("should combine all factors", () => { + const score = computeScore({ + significance: 0.8, + priorityWeight: 0.9, + reinforcementCount: 3, + daysSinceReinforced: 5, + decayRate: 0.05, + }) + expect(score).toBeGreaterThan(0) + expect(score).toBeLessThan(3) // bounded by reinforcement cap + }) + + it("should return 0 for zero significance", () => { + const score = computeScore({ + significance: 0, + priorityWeight: 0.9, + reinforcementCount: 5, + daysSinceReinforced: 1, + decayRate: 0.05, + }) + expect(score).toBe(0) + }) + + it("should return higher score for recently reinforced entry", () => { + const recent = computeScore({ + significance: 0.8, + priorityWeight: 0.9, + reinforcementCount: 3, + daysSinceReinforced: 1, + decayRate: 0.1, + }) + const old = computeScore({ + significance: 0.8, + priorityWeight: 0.9, + reinforcementCount: 3, + daysSinceReinforced: 60, + decayRate: 0.1, + }) + expect(recent).toBeGreaterThan(old) + }) +}) diff --git a/src/core/memory/analysis-agent.ts b/src/core/memory/analysis-agent.ts new file mode 100644 index 00000000000..b0ea0646480 --- /dev/null +++ b/src/core/memory/analysis-agent.ts @@ -0,0 +1,139 @@ +import type { AnalysisResult, Observation, ObservationAction, MemoryCategorySlug } from "./types" +import { buildApiHandler, type SingleCompletionHandler } from "../../api" +import type { ProviderSettings } from "@roo-code/types" + +const VALID_CATEGORIES = new Set([ + "coding-style", + "communication-prefs", + "technical-proficiency", + "tool-preferences", + "active-projects", + "behavioral-patterns", + "dislikes-frustrations", +]) + +const VALID_ACTIONS = new Set(["NEW", "REINFORCE", "UPDATE"]) + +const ANALYSIS_SYSTEM_PROMPT = `You are a User Profile Analyst. Your job is to extract factual observations about the USER from conversation transcripts between them and a coding assistant. + +You will receive: +1. A cleaned conversation transcript (tool noise already removed) +2. The current compiled memory report (what is already known) + +EXTRACT observations about the user in these categories: +- coding-style: Languages, frameworks, patterns, conventions they prefer +- communication-prefs: Response length, tone, detail level they want +- technical-proficiency: Skill levels in specific technologies +- tool-preferences: Tools, linters, formatters, workflows they favor +- active-projects: What they're currently building (time-bound) +- behavioral-patterns: How they iterate, review, debug, make decisions +- dislikes-frustrations: Things that annoy them or they explicitly reject + +RULES: +- Only extract what is EVIDENCED in the transcript. Never infer beyond what's shown. +- If an observation matches something in the existing memory, mark it as REINFORCE (don't create a duplicate). +- If an observation contradicts existing memory, mark it as UPDATE with the new value. +- If it's completely new, mark it as NEW. +- Write each observation as a concise, third-person factual statement (e.g., "Prefers functional React components over class components") +- Assign significance 0.0-1.0 based on how broadly useful this fact is for future interactions. + +PRIVACY — NEVER extract: +- Real names, emails, addresses, phone numbers +- API keys, passwords, secrets, tokens +- Company confidential or proprietary details +- Health, financial, legal, or relationship information +- Anything the user explicitly marks as private or off-record + +If the conversation contains mostly one-liners or nothing personality-revealing, return an empty observations array. Don't force extraction. + +Respond in this exact JSON format (no markdown fences, just raw JSON): +{ + "observations": [ + { + "action": "NEW" | "REINFORCE" | "UPDATE", + "category": "", + "content": "", + "significance": <0.0-1.0>, + "existing_entry_id": "", + "reasoning": "" + } + ], + "session_summary": "<1-2 sentences about what the user was doing this session>" +}` + +/** Send a preprocessed conversation to the LLM for memory extraction. */ +export async function runAnalysis( + providerSettings: ProviderSettings, + cleanedConversation: string, + existingMemoryReport: string, +): Promise { + try { + console.log(`[Memory] runAnalysis: called with conversation length=${cleanedConversation.length}, existing report length=${existingMemoryReport.length}`) + const handler = buildApiHandler(providerSettings) + + // Check if handler supports single completion + if (!("completePrompt" in handler)) { + console.error("[Memory] runAnalysis: handler does not support completePrompt") + return null + } + console.log(`[Memory] runAnalysis: handler supports completePrompt, sending request...`) + + const prompt = `EXISTING MEMORY:\n${existingMemoryReport}\n\n---\n\nCONVERSATION TRANSCRIPT:\n${cleanedConversation}` + + const response = await (handler as unknown as SingleCompletionHandler).completePrompt( + `${ANALYSIS_SYSTEM_PROMPT}\n\n${prompt}`, + ) + + console.log(`[Memory] runAnalysis: got response, length=${response.length}`) + const result = parseAnalysisResponse(response) + console.log(`[Memory] runAnalysis: parsed ${result ? result.observations.length : 0} observations`) + return result + } catch (error) { + console.error("[Memory] runAnalysis: failed:", error) + return null + } +} + +/** Parse and validate the LLM's JSON response into typed observations. */ +function parseAnalysisResponse(response: string): AnalysisResult | null { + try { + // Strip markdown code fences if present + const cleaned = response.replace(/^```json?\n?/m, "").replace(/\n?```$/m, "").trim() + const parsed = JSON.parse(cleaned) + + if (!parsed.observations || !Array.isArray(parsed.observations)) { + return { observations: [], sessionSummary: parsed.session_summary || "" } + } + + // Validate and filter observations + const validObservations: Observation[] = parsed.observations + .filter((obs: Record) => { + return ( + VALID_ACTIONS.has(obs.action as string) && + VALID_CATEGORIES.has(obs.category as string) && + typeof obs.content === "string" && + (obs.content as string).length > 0 && + typeof obs.significance === "number" && + (obs.significance as number) >= 0 && + (obs.significance as number) <= 1 + ) + }) + .map((obs: Record) => ({ + action: obs.action as ObservationAction, + category: obs.category as MemoryCategorySlug, + content: obs.content as string, + significance: obs.significance as number, + existingEntryId: (obs.existing_entry_id as string) || null, + reasoning: (obs.reasoning as string) || "", + })) + + return { + observations: validObservations, + sessionSummary: parsed.session_summary || "", + } + } catch (error) { + console.error(`[Memory] parseAnalysisResponse: JSON parse failed. Raw response (first 200 chars): ${response.substring(0, 200)}`) + console.error("[Memory] parseAnalysisResponse: error:", error) + return null + } +} diff --git a/src/core/memory/memory-store.ts b/src/core/memory/memory-store.ts new file mode 100644 index 00000000000..716e53a8659 --- /dev/null +++ b/src/core/memory/memory-store.ts @@ -0,0 +1,401 @@ +import initSqlJs, { type Database, type SqlValue } from "sql.js" +import * as fs from "fs" +import * as path from "path" +import * as crypto from "crypto" +import type { MemoryEntry, AnalysisLogEntry, ScoredMemoryEntry, MemoryCategorySlug } from "./types" +import { DEFAULT_MEMORY_CATEGORIES, MEMORY_CONSTANTS } from "./types" +import { computeScore } from "./scoring" + +const SCHEMA_VERSION = 1 + +const SCHEMA_SQL = ` +CREATE TABLE IF NOT EXISTS schema_meta ( + key TEXT PRIMARY KEY, + value TEXT NOT NULL +); + +CREATE TABLE IF NOT EXISTS memory_categories ( + slug TEXT PRIMARY KEY, + label TEXT NOT NULL, + default_decay_rate REAL NOT NULL, + priority_weight REAL NOT NULL +); + +CREATE TABLE IF NOT EXISTS memory_entries ( + id TEXT PRIMARY KEY, + workspace_id TEXT, + category TEXT NOT NULL REFERENCES memory_categories(slug), + content TEXT NOT NULL, + significance REAL NOT NULL, + first_seen INTEGER NOT NULL, + last_reinforced INTEGER NOT NULL, + reinforcement_count INTEGER DEFAULT 1, + decay_rate REAL NOT NULL, + source_task_id TEXT, + is_pinned INTEGER DEFAULT 0 +); + +CREATE TABLE IF NOT EXISTS analysis_log ( + id TEXT PRIMARY KEY, + timestamp INTEGER NOT NULL, + task_id TEXT, + messages_analyzed INTEGER NOT NULL, + tokens_used INTEGER NOT NULL, + entries_created INTEGER NOT NULL, + entries_reinforced INTEGER NOT NULL +); + +CREATE INDEX IF NOT EXISTS idx_entries_category ON memory_entries(category); +CREATE INDEX IF NOT EXISTS idx_entries_workspace ON memory_entries(workspace_id); +CREATE INDEX IF NOT EXISTS idx_entries_last_reinforced ON memory_entries(last_reinforced); +` + +/** SQLite-backed persistent store for user memory entries. */ +export class MemoryStore { + private db: Database | null = null + private dbPath: string + + constructor(storagePath: string) { + const memoryDir = path.join(storagePath, "memory") + try { + if (!fs.existsSync(memoryDir)) { + fs.mkdirSync(memoryDir, { recursive: true }) + } + } catch { + // Directory creation deferred to init() — may be running in a test + // environment with a synthetic path. + } + this.dbPath = path.join(memoryDir, "user_memory.db") + } + + /** Initialize the database, running schema creation and migrations. */ + async init(): Promise { + // sql.js needs to locate its WASM file. In a bundled extension, it's in dist/. + // During tests/dev, resolve from node_modules. + const SQL = await initSqlJs({ + locateFile: (file: string) => { + // Try bundled location first (dist/) + const bundledPath = path.join(__dirname, file) + if (fs.existsSync(bundledPath)) { + return bundledPath + } + // Fallback: resolve from node_modules (for tests/dev) + try { + const sqlJsMain = require.resolve("sql.js") + const sqlJsDistDir = path.dirname(sqlJsMain) + return path.join(sqlJsDistDir, file) + } catch { + return bundledPath + } + }, + }) + + if (fs.existsSync(this.dbPath)) { + const fileBuffer = fs.readFileSync(this.dbPath) + this.db = new SQL.Database(fileBuffer) + } else { + this.db = new SQL.Database() + } + + this.db.run(SCHEMA_SQL) + this.initSchemaVersion() + this.seedCategories() + this.persist() + } + + private initSchemaVersion(): void { + const result = this.db!.exec("SELECT value FROM schema_meta WHERE key = 'version'") + if (result.length === 0 || result[0].values.length === 0) { + this.db!.run("INSERT OR REPLACE INTO schema_meta (key, value) VALUES ('version', ?)", [ + String(SCHEMA_VERSION), + ]) + } else { + const currentVersion = parseInt(result[0].values[0][0] as string, 10) + this.runMigrations(currentVersion) + } + } + + private runMigrations(fromVersion: number): void { + // Future migrations go here as: if (fromVersion < 2) { ... } + // After all migrations, update version: + if (fromVersion < SCHEMA_VERSION) { + this.db!.run("UPDATE schema_meta SET value = ? WHERE key = 'version'", [String(SCHEMA_VERSION)]) + } + } + + private seedCategories(): void { + const stmt = this.db!.prepare( + "INSERT OR IGNORE INTO memory_categories (slug, label, default_decay_rate, priority_weight) VALUES (?, ?, ?, ?)", + ) + for (const cat of DEFAULT_MEMORY_CATEGORIES) { + stmt.run([cat.slug, cat.label, cat.defaultDecayRate, cat.priorityWeight]) + } + stmt.free() + } + + private persist(): void { + if (!this.db) return + const data = this.db.export() + const buffer = Buffer.from(data) + const tmpPath = this.dbPath + ".tmp" + fs.writeFileSync(tmpPath, buffer) + fs.renameSync(tmpPath, this.dbPath) + } + + /** Generate a random UUID for new entries. */ + generateId(): string { + return crypto.randomUUID() + } + + /** Insert a new memory entry, returning its ID. */ + insertEntry(entry: Omit & { id?: string }): string { + const id = entry.id || this.generateId() + this.db!.run( + `INSERT INTO memory_entries (id, workspace_id, category, content, significance, first_seen, last_reinforced, reinforcement_count, decay_rate, source_task_id, is_pinned) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`, + [ + id, + entry.workspaceId, + entry.category, + entry.content, + entry.significance, + entry.firstSeen, + entry.lastReinforced, + entry.reinforcementCount, + entry.decayRate, + entry.sourceTaskId, + entry.isPinned ? 1 : 0, + ], + ) + this.persist() + return id + } + + /** Bump the reinforcement count and timestamp for an existing entry. */ + reinforceEntry(id: string, taskId: string | null): void { + this.db!.run( + `UPDATE memory_entries SET last_reinforced = ?, reinforcement_count = reinforcement_count + 1, source_task_id = ? WHERE id = ?`, + [Math.floor(Date.now() / 1000), taskId, id], + ) + this.persist() + } + + /** Update the content and significance of an existing entry. */ + updateEntry(id: string, content: string, significance: number, taskId: string | null): void { + this.db!.run( + `UPDATE memory_entries SET content = ?, significance = ?, last_reinforced = ?, reinforcement_count = reinforcement_count + 1, source_task_id = ? WHERE id = ?`, + [content, significance, Math.floor(Date.now() / 1000), taskId, id], + ) + this.persist() + } + + /** Retrieve a single entry by ID, or null if not found. */ + getEntry(id: string): MemoryEntry | null { + const result = this.db!.exec("SELECT * FROM memory_entries WHERE id = ?", [id]) + if (result.length === 0 || result[0].values.length === 0) return null + return this.rowToEntry(result[0].columns, result[0].values[0]) + } + + /** List entries matching the given category and workspace scope. */ + getEntriesByCategory(category: string, workspaceId: string | null): MemoryEntry[] { + const result = this.db!.exec( + "SELECT * FROM memory_entries WHERE category = ? AND (workspace_id IS NULL OR workspace_id = ?) ORDER BY last_reinforced DESC", + [category, workspaceId], + ) + if (result.length === 0) return [] + return result[0].values.map((row: SqlValue[]) => this.rowToEntry(result[0].columns, row)) + } + + /** Return all entries ranked by computed relevance score. */ + getScoredEntries(workspaceId: string | null): ScoredMemoryEntry[] { + if (!this.db) return [] + const result = this.db.exec( + `SELECT e.*, c.priority_weight, c.label as category_label + FROM memory_entries e + JOIN memory_categories c ON e.category = c.slug + WHERE (e.workspace_id IS NULL OR e.workspace_id = ?) + ORDER BY e.last_reinforced DESC`, + [workspaceId], + ) + + if (result.length === 0) return [] + + const now = Math.floor(Date.now() / 1000) + const entries: ScoredMemoryEntry[] = [] + + for (const row of result[0].values) { + const cols = result[0].columns + const entry = this.rowToEntry(cols, row) + const priorityWeight = row[cols.indexOf("priority_weight")] as number + const categoryLabel = row[cols.indexOf("category_label")] as string + const daysSinceReinforced = (now - entry.lastReinforced) / 86400 + + const score = computeScore({ + significance: entry.significance, + priorityWeight, + reinforcementCount: entry.reinforcementCount, + daysSinceReinforced, + decayRate: entry.decayRate, + }) + + if (score >= MEMORY_CONSTANTS.SCORE_THRESHOLD) { + entries.push({ ...entry, computedScore: score, categoryLabel }) + } + } + + entries.sort((a, b) => b.computedScore - a.computedScore) + return entries.slice(0, MEMORY_CONSTANTS.MAX_QUERY_ENTRIES) + } + + /** Record an analysis run in the audit log. */ + logAnalysis(entry: AnalysisLogEntry): void { + this.db!.run( + `INSERT INTO analysis_log (id, timestamp, task_id, messages_analyzed, tokens_used, entries_created, entries_reinforced) + VALUES (?, ?, ?, ?, ?, ?, ?)`, + [ + entry.id, + entry.timestamp, + entry.taskId, + entry.messagesAnalyzed, + entry.tokensUsed, + entry.entriesCreated, + entry.entriesReinforced, + ], + ) + this.persist() + } + + /** Return true when the database has been initialized. */ + isReady(): boolean { + return this.db !== null + } + + /** Delete all entries from memory_entries and analysis_log tables. */ + deleteAllEntries(): void { + this.db!.run("DELETE FROM memory_entries") + this.db!.run("DELETE FROM analysis_log") + this.persist() + } + + /** Remove stale, low-score, unpinned entries and enforce the hard cap. */ + garbageCollect(): number { + const now = Math.floor(Date.now() / 1000) + const cutoff = now - MEMORY_CONSTANTS.GARBAGE_COLLECTION_DAYS * 86400 + + // Delete entries that are old, low-scored, and not pinned + // We compute score in JS since sql.js doesn't have LOG2/EXP natively + const result = this.db!.exec( + `SELECT e.id, e.significance, e.reinforcement_count, e.last_reinforced, e.decay_rate, e.is_pinned, c.priority_weight + FROM memory_entries e + JOIN memory_categories c ON e.category = c.slug + WHERE e.is_pinned = 0 AND e.last_reinforced < ?`, + [cutoff], + ) + + if (result.length === 0) return 0 + + const toDelete: string[] = [] + for (const row of result[0].values) { + const cols = result[0].columns + const significance = row[cols.indexOf("significance")] as number + const count = row[cols.indexOf("reinforcement_count")] as number + const lastReinforced = row[cols.indexOf("last_reinforced")] as number + const decayRate = row[cols.indexOf("decay_rate")] as number + const priorityWeight = row[cols.indexOf("priority_weight")] as number + + const score = computeScore({ + significance, + priorityWeight, + reinforcementCount: count, + daysSinceReinforced: (now - lastReinforced) / 86400, + decayRate, + }) + + if (score < MEMORY_CONSTANTS.GARBAGE_COLLECTION_SCORE_THRESHOLD) { + toDelete.push(row[cols.indexOf("id")] as string) + } + } + + for (const id of toDelete) { + this.db!.run("DELETE FROM memory_entries WHERE id = ?", [id]) + } + + // Hard cap enforcement + const countResult = this.db!.exec("SELECT COUNT(*) FROM memory_entries") + const totalCount = countResult[0].values[0][0] as number + if (totalCount > MEMORY_CONSTANTS.MAX_ENTRIES) { + const allResult = this.db!.exec( + `SELECT e.id, e.significance, e.reinforcement_count, e.last_reinforced, e.decay_rate, e.is_pinned, c.priority_weight + FROM memory_entries e + JOIN memory_categories c ON e.category = c.slug + WHERE e.is_pinned = 0 + ORDER BY e.last_reinforced ASC`, + ) + if (allResult.length > 0) { + const excess = totalCount - MEMORY_CONSTANTS.MAX_ENTRIES + const scored = allResult[0].values + .map((row) => { + const cols = allResult[0].columns + return { + id: row[cols.indexOf("id")] as string, + score: computeScore({ + significance: row[cols.indexOf("significance")] as number, + priorityWeight: row[cols.indexOf("priority_weight")] as number, + reinforcementCount: row[cols.indexOf("reinforcement_count")] as number, + daysSinceReinforced: + (now - (row[cols.indexOf("last_reinforced")] as number)) / 86400, + decayRate: row[cols.indexOf("decay_rate")] as number, + }), + } + }) + .sort((a, b) => a.score - b.score) + + for (let i = 0; i < Math.min(excess, scored.length); i++) { + this.db!.run("DELETE FROM memory_entries WHERE id = ?", [scored[i].id]) + toDelete.push(scored[i].id) + } + } + } + + if (toDelete.length > 0) this.persist() + return toDelete.length + } + + /** Return the total number of stored entries. */ + getEntryCount(): number { + const result = this.db!.exec("SELECT COUNT(*) FROM memory_entries") + return result[0].values[0][0] as number + } + + /** Return the most recent analysis timestamp, or null if no analyses have been run. */ + getLastAnalysisTimestamp(): number | null { + const result = this.db!.exec("SELECT MAX(timestamp) FROM analysis_log") + if (result.length === 0 || !result[0].values[0][0]) return null + return result[0].values[0][0] as number + } + + /** Close the database connection. */ + close(): void { + if (this.db) { + this.db.close() + this.db = null + } + } + + private rowToEntry(columns: string[], row: unknown[]): MemoryEntry { + const get = (col: string) => row[columns.indexOf(col)] + return { + id: get("id") as string, + workspaceId: get("workspace_id") as string | null, + category: get("category") as MemoryCategorySlug, + content: get("content") as string, + significance: get("significance") as number, + firstSeen: get("first_seen") as number, + lastReinforced: get("last_reinforced") as number, + reinforcementCount: get("reinforcement_count") as number, + decayRate: get("decay_rate") as number, + sourceTaskId: get("source_task_id") as string | null, + isPinned: (get("is_pinned") as number) === 1, + } + } +} diff --git a/src/core/memory/memory-writer.ts b/src/core/memory/memory-writer.ts new file mode 100644 index 00000000000..b306ac032f4 --- /dev/null +++ b/src/core/memory/memory-writer.ts @@ -0,0 +1,162 @@ +import type { Observation, MemoryCategorySlug } from "./types" +import { MEMORY_CONSTANTS, DEFAULT_MEMORY_CATEGORIES } from "./types" +import type { MemoryStore } from "./memory-store" + +const PII_PATTERNS = [ + /\S+@\S+\.\S+/, + /sk-[a-zA-Z0-9]{20,}/, + /ghp_[a-zA-Z0-9]{36}/, + /\b\d{3}[-.]?\d{3}[-.]?\d{4}\b/, + /\b\d{3}-\d{2}-\d{4}\b/, + /AKIA[0-9A-Z]{16}/, + /-----BEGIN (RSA |EC )?PRIVATE KEY-----/, +] + +/** Return true if content matches any known PII/secret pattern. */ +export function containsPII(content: string): boolean { + return PII_PATTERNS.some((pattern) => pattern.test(content)) +} + +/** Compute Jaccard similarity between two strings (word-level, case-insensitive). */ +export function jaccardSimilarity(a: string, b: string): number { + const tokenize = (s: string) => + new Set( + s + .toLowerCase() + .split(/\s+/) + .filter((w) => w.length > 2), + ) + const setA = tokenize(a) + const setB = tokenize(b) + if (setA.size === 0 && setB.size === 0) return 1.0 + if (setA.size === 0 || setB.size === 0) return 0.0 + const intersection = new Set(Array.from(setA).filter((x) => setB.has(x))) + const union = new Set([...Array.from(setA), ...Array.from(setB)]) + return intersection.size / union.size +} + +// Categories that are always global +const GLOBAL_CATEGORIES = new Set([ + "coding-style", + "communication-prefs", + "dislikes-frustrations", +]) + +// Categories that are always workspace-scoped +const WORKSPACE_CATEGORIES = new Set(["active-projects"]) + +function getDecayRate(category: MemoryCategorySlug): number { + const cat = DEFAULT_MEMORY_CATEGORIES.find((c) => c.slug === category) + return cat?.defaultDecayRate ?? 0.1 +} + +export interface WriteResult { + entriesCreated: number + entriesReinforced: number + entriesSkipped: number +} + +/** Write validated observations into the store with PII filtering and dedup. */ +export function processObservations( + store: MemoryStore, + observations: Observation[], + workspaceId: string | null, + taskId: string | null, +): WriteResult { + let created = 0 + let reinforced = 0 + let skipped = 0 + const now = Math.floor(Date.now() / 1000) + + for (const obs of observations) { + // PII filter + if (containsPII(obs.content)) { + skipped++ + continue + } + + if (obs.action === "NEW") { + // Determine scope + let entryWorkspaceId: string | null = null + if (WORKSPACE_CATEGORIES.has(obs.category)) { + entryWorkspaceId = workspaceId + } else if (!GLOBAL_CATEGORIES.has(obs.category)) { + // Heuristic: if content mentions paths, it's workspace-scoped + entryWorkspaceId = /[/\\]/.test(obs.content) ? workspaceId : null + } + + // Dedup check + const existing = store.getEntriesByCategory(obs.category, entryWorkspaceId) + const duplicate = existing.find( + (e) => jaccardSimilarity(e.content, obs.content) >= MEMORY_CONSTANTS.DEDUP_SIMILARITY_THRESHOLD, + ) + + if (duplicate) { + store.reinforceEntry(duplicate.id, taskId) + reinforced++ + } else { + store.insertEntry({ + workspaceId: entryWorkspaceId, + category: obs.category, + content: obs.content, + significance: obs.significance, + firstSeen: now, + lastReinforced: now, + reinforcementCount: 1, + decayRate: getDecayRate(obs.category), + sourceTaskId: taskId, + isPinned: false, + }) + created++ + } + } else if (obs.action === "REINFORCE") { + if (obs.existingEntryId) { + const entry = store.getEntry(obs.existingEntryId) + if (entry && entry.category === obs.category) { + store.reinforceEntry(obs.existingEntryId, taskId) + reinforced++ + } else { + skipped++ // Invalid ID — skip silently + } + } else { + skipped++ + } + } else if (obs.action === "UPDATE") { + if (obs.existingEntryId) { + const entry = store.getEntry(obs.existingEntryId) + if (entry && entry.category === obs.category) { + store.updateEntry(obs.existingEntryId, obs.content, obs.significance, taskId) + reinforced++ + } else { + // Invalid ID — treat as NEW with dedup check + const existing = store.getEntriesByCategory(obs.category, workspaceId) + const duplicate = existing.find( + (e) => jaccardSimilarity(e.content, obs.content) >= MEMORY_CONSTANTS.DEDUP_SIMILARITY_THRESHOLD, + ) + if (duplicate) { + store.updateEntry(duplicate.id, obs.content, obs.significance, taskId) + reinforced++ + } else { + store.insertEntry({ + workspaceId: WORKSPACE_CATEGORIES.has(obs.category) ? workspaceId : null, + category: obs.category, + content: obs.content, + significance: obs.significance, + firstSeen: now, + lastReinforced: now, + reinforcementCount: 1, + decayRate: getDecayRate(obs.category), + sourceTaskId: taskId, + isPinned: false, + }) + created++ + } + } + } else { + skipped++ + } + } + } + + return { entriesCreated: created, entriesReinforced: reinforced, entriesSkipped: skipped } +} diff --git a/src/core/memory/orchestrator.ts b/src/core/memory/orchestrator.ts new file mode 100644 index 00000000000..4fd6e0b44fe --- /dev/null +++ b/src/core/memory/orchestrator.ts @@ -0,0 +1,336 @@ +import * as crypto from "crypto" +import * as path from "path" +import { execSync } from "child_process" +import type { ProviderSettings } from "@roo-code/types" +import { MemoryStore } from "./memory-store" +import { preprocessMessages, type MessageLike } from "./preprocessor" +import { runAnalysis } from "./analysis-agent" +import { processObservations } from "./memory-writer" +import { compileMemoryPrompt, compileMemoryForAgent } from "./prompt-compiler" +import { MEMORY_CONSTANTS } from "./types" +import { readApiMessages } from "../task-persistence/apiMessages" + +function getWorkspaceId(workspacePath: string): string { + const folderName = path.basename(workspacePath) + let gitRemote: string | null = null + try { + gitRemote = execSync("git remote get-url origin", { + cwd: workspacePath, + encoding: "utf-8", + timeout: 3000, + }).trim() + } catch { + // Not a git repo or no remote + } + const raw = gitRemote ? `${gitRemote}::${folderName}` : folderName + return crypto.createHash("sha256").update(raw).digest("hex").slice(0, 16) +} + +/** Top-level coordinator that drives the memory analysis pipeline. */ +export class MemoryOrchestrator { + private store: MemoryStore + private messageCounter = 0 + private watermark = 0 + private analysisInFlight = false + private analysisQueued = false + private syncInProgress = false + private syncCompleted = 0 + private syncTotal = 0 + private enabled = false + private workspaceId: string | null = null + private analysisFrequency: number + private initPromise: Promise + + constructor( + private storagePath: string, + private workspacePath: string | null, + analysisFrequency?: number, + ) { + this.store = new MemoryStore(storagePath) + this.analysisFrequency = analysisFrequency || MEMORY_CONSTANTS.DEFAULT_ANALYSIS_FREQUENCY + if (workspacePath) { + this.workspaceId = getWorkspaceId(workspacePath) + } + // Placeholder; replaced by the real init promise when init() is called. + this.initPromise = Promise.resolve() + } + + async init(): Promise { + this.initPromise = this.store.init() + await this.initPromise + } + + /** Wait for the store to be fully initialized. Resolves immediately after init completes. */ + async waitForReady(): Promise { + await this.initPromise + } + + setEnabled(enabled: boolean): void { + this.enabled = enabled + if (!enabled) { + this.messageCounter = 0 + } + } + + isEnabled(): boolean { + return this.enabled + } + + /** Return the current sync status so the webview can restore progress on re-mount. */ + getSyncStatus(): { inProgress: boolean; completed: number; total: number } { + return { + inProgress: this.syncInProgress, + completed: this.syncCompleted, + total: this.syncTotal, + } + } + + /** + * Call this on each user message during an active chat session. + * Returns true if an analysis cycle was triggered. + */ + onUserMessage( + messages: unknown[], + taskId: string | null, + providerSettings: ProviderSettings | null, + ): boolean { + if (!this.enabled || !providerSettings) return false + + this.messageCounter++ + console.log(`[Memory] onUserMessage: counter=${this.messageCounter}/${this.analysisFrequency}`) + + if (this.messageCounter >= this.analysisFrequency) { + console.log(`[Memory] onUserMessage: trigger threshold reached, firing analysis`) + this.triggerAnalysis(messages, taskId, providerSettings) + this.messageCounter = 0 + return true + } + + return false + } + + /** + * Call on session end to catch remaining unanalyzed messages. + */ + onSessionEnd( + messages: unknown[], + taskId: string | null, + providerSettings: ProviderSettings | null, + ): void { + if (!this.enabled || !providerSettings) return + if (this.watermark < messages.length) { + this.triggerAnalysis(messages, taskId, providerSettings) + } + } + + private async triggerAnalysis( + messages: unknown[], + taskId: string | null, + providerSettings: ProviderSettings, + ): Promise { + // Ensure the store is initialized before any DB access + try { + await this.initPromise + } catch { + // init() failed – bail out rather than crash + return + } + + if (this.analysisInFlight) { + this.analysisQueued = true + return + } + + this.analysisInFlight = true + + try { + // Grab messages since last watermark + const batch = messages.slice(this.watermark) + this.watermark = messages.length + + console.log(`[Memory] triggerAnalysis: batch size=${batch.length}, watermark=${this.watermark}`) + + if (batch.length === 0) return + + // Preprocess + const preprocessed = preprocessMessages(batch as MessageLike[]) + console.log(`[Memory] triggerAnalysis: preprocessed token estimate=${preprocessed.cleanedTokenEstimate}, cleaned length=${preprocessed.cleaned.trim().length}`) + if (preprocessed.cleaned.trim().length === 0) return + + // Get existing memory for context + const scoredEntries = this.store.getScoredEntries(this.workspaceId) + const existingReport = compileMemoryForAgent(scoredEntries) + + // Run analysis + const result = await runAnalysis(providerSettings, preprocessed.cleaned, existingReport) + + if (result && result.observations.length > 0) { + const writeResult = processObservations( + this.store, + result.observations, + this.workspaceId, + taskId, + ) + + // Log the analysis + this.store.logAnalysis({ + id: crypto.randomUUID(), + timestamp: Math.floor(Date.now() / 1000), + taskId, + messagesAnalyzed: batch.length, + tokensUsed: preprocessed.cleanedTokenEstimate * 2, // rough: input + output + entriesCreated: writeResult.entriesCreated, + entriesReinforced: writeResult.entriesReinforced, + }) + + // Run garbage collection + this.store.garbageCollect() + } + } catch (error) { + console.error("[MemoryOrchestrator] Analysis pipeline error:", error) + } finally { + this.analysisInFlight = false + + if (this.analysisQueued) { + this.analysisQueued = false + // Re-trigger with current state + this.triggerAnalysis(messages, taskId, providerSettings) + } + } + } + + /** + * Analyze a batch of prior chat histories to bootstrap the memory database. + * Processes each task sequentially to avoid API rate limits. + */ + isSyncInProgress(): boolean { + return this.syncInProgress + } + + async batchAnalyzeHistory( + taskIds: string[], + globalStoragePath: string, + providerSettings: ProviderSettings, + onProgress: (completed: number, total: number) => void, + ): Promise<{ totalAnalyzed: number; entriesCreated: number; entriesReinforced: number }> { + if (this.syncInProgress) { + return { totalAnalyzed: 0, entriesCreated: 0, entriesReinforced: 0 } + } + + this.syncInProgress = true + this.syncCompleted = 0 + this.syncTotal = taskIds.length + + let totalAnalyzed = 0 + let entriesCreated = 0 + let entriesReinforced = 0 + + try { + for (let i = 0; i < taskIds.length; i++) { + const taskId = taskIds[i] + console.log(`[Memory] batchAnalyzeHistory: processing task ${i + 1}/${taskIds.length}, taskId=${taskId}`) + + try { + // Read conversation history for this task + const messages = await readApiMessages({ taskId, globalStoragePath }) + + if (!messages || messages.length === 0) { + console.log(`[Memory] batchAnalyzeHistory: no messages found for task ${taskId}`) + onProgress(i + 1, taskIds.length) + continue + } + + console.log(`[Memory] batchAnalyzeHistory: found ${messages.length} messages for task ${taskId}`) + + // Preprocess + const preprocessed = preprocessMessages(messages as MessageLike[]) + if (preprocessed.cleaned.trim().length === 0) { + console.log(`[Memory] batchAnalyzeHistory: preprocessed to empty for task ${taskId}`) + onProgress(i + 1, taskIds.length) + continue + } + + // Get existing memory for context + const scoredEntries = this.store.getScoredEntries(this.workspaceId) + const existingReport = compileMemoryForAgent(scoredEntries) + + // Run analysis + const result = await runAnalysis(providerSettings, preprocessed.cleaned, existingReport) + + console.log(`[Memory] batchAnalyzeHistory: analysis returned ${result ? result.observations.length : 0} observations for task ${taskId}`) + + if (result && result.observations.length > 0) { + const writeResult = processObservations( + this.store, + result.observations, + this.workspaceId, + taskId, + ) + + entriesCreated += writeResult.entriesCreated + entriesReinforced += writeResult.entriesReinforced + + // Log the analysis + this.store.logAnalysis({ + id: crypto.randomUUID(), + timestamp: Math.floor(Date.now() / 1000), + taskId, + messagesAnalyzed: messages.length, + tokensUsed: preprocessed.cleanedTokenEstimate * 2, + entriesCreated: writeResult.entriesCreated, + entriesReinforced: writeResult.entriesReinforced, + }) + } + + totalAnalyzed++ + } catch (error) { + console.error(`[MemoryOrchestrator] Batch analysis error for task ${taskId}:`, error) + } + + this.syncCompleted = i + 1 + onProgress(i + 1, taskIds.length) + } + + // Run garbage collection after all tasks + this.store.garbageCollect() + + return { totalAnalyzed, entriesCreated, entriesReinforced } + } finally { + this.syncInProgress = false + this.syncCompleted = 0 + this.syncTotal = 0 + } + } + + /** + * Clear all memory entries and analysis logs. + */ + clearAllMemory(): void { + this.store.deleteAllEntries() + } + + /** + * Get the compiled user profile section for the system prompt. + * Awaits store initialization so early calls (before init resolves) return + * real data instead of an empty string. + */ + async getUserProfileSection(): Promise { + try { + await this.initPromise + } catch { + // init() failed – store has no DB, getScoredEntries will return [] + } + const entries = this.store.getScoredEntries(this.workspaceId) + const compiled = compileMemoryPrompt(entries) + console.log(`[Memory] getUserProfileSection: ${entries.length} entries, compiled prompt length=${compiled.length}`) + return compiled + } + + getStore(): MemoryStore { + return this.store + } + + close(): void { + this.store.close() + } +} diff --git a/src/core/memory/preprocessor.ts b/src/core/memory/preprocessor.ts new file mode 100644 index 00000000000..68e732cdc42 --- /dev/null +++ b/src/core/memory/preprocessor.ts @@ -0,0 +1,104 @@ +import type { PreprocessResult } from "./types" + +// Tool names that produce filename references +const FILE_TOOLS = new Set(["read_file", "write_to_file", "apply_diff"]) +const SEARCH_TOOLS = new Set(["search_files", "list_files"]) + +// Estimate tokens as ~4 chars per token (rough, fast) +function estimateTokens(text: string): number { + return Math.ceil(text.length / 4) +} + +function stripLongCodeBlocks(text: string): string { + return text.replace(/```[\s\S]*?```/g, (match) => { + const lines = match.split("\n") + // Opening ``` + content lines + closing ``` + // Content lines = total - 2 (opening and closing ```) + if (lines.length - 2 > 3) { + return "[code block removed]" + } + return match + }) +} + +function processUserContent(content: unknown): string { + if (typeof content === "string") return content + + if (!Array.isArray(content)) return "" + + const parts: string[] = [] + for (const block of content) { + if (block.type === "text") { + parts.push(block.text) + } else if (block.type === "image" || block.type === "image_url") { + parts.push("[image attached]") + } + } + return parts.join("\n") +} + +function processAssistantContent(content: unknown): string { + if (typeof content === "string") return stripLongCodeBlocks(content) + + if (!Array.isArray(content)) return "" + + const parts: string[] = [] + for (const block of content) { + if (block.type === "text") { + parts.push(stripLongCodeBlocks(block.text)) + } else if (block.type === "tool_use") { + const name = block.name + const input = block.input || {} + if (FILE_TOOLS.has(name)) { + parts.push(`→ ${name === "read_file" ? "read" : "edited"}: ${input.path || "unknown"}`) + } else if (name === "execute_command") { + parts.push(`→ ran command: ${input.command || "unknown"}`) + } else if (SEARCH_TOOLS.has(name)) { + parts.push(`→ searched: ${input.path || input.regex || "unknown"}`) + } + // All other tool_use blocks are stripped (no output) + } + // tool_result blocks are stripped entirely (no case for them) + } + return parts.join("\n") +} + +/** Clean raw conversation messages, stripping tool noise and large code blocks. */ +export interface MessageLike { + role: string + content: unknown +} + +export function preprocessMessages(messages: MessageLike[]): PreprocessResult { + if (messages.length === 0) { + return { cleaned: "", originalTokenEstimate: 0, cleanedTokenEstimate: 0 } + } + + let originalText = "" + const cleanedParts: string[] = [] + + for (const msg of messages) { + const role = msg.role + const rawContent = typeof msg.content === "string" ? msg.content : JSON.stringify(msg.content) + originalText += rawContent + + if (role === "user") { + const processed = processUserContent(msg.content) + if (processed.trim()) { + cleanedParts.push(`User: ${processed.trim()}`) + } + } else if (role === "assistant") { + const processed = processAssistantContent(msg.content) + if (processed.trim()) { + cleanedParts.push(`Assistant: ${processed.trim()}`) + } + } + } + + const cleaned = cleanedParts.join("\n\n") + return { + cleaned, + originalTokenEstimate: estimateTokens(originalText), + cleanedTokenEstimate: estimateTokens(cleaned), + } +} diff --git a/src/core/memory/prompt-compiler.ts b/src/core/memory/prompt-compiler.ts new file mode 100644 index 00000000000..3b49be29fe6 --- /dev/null +++ b/src/core/memory/prompt-compiler.ts @@ -0,0 +1,59 @@ +import type { ScoredMemoryEntry } from "./types" +import { MEMORY_CONSTANTS } from "./types" + +const HEADER = "USER PROFILE & PREFERENCES\n(Learned through conversation — continuously updated)\n\n" + +// Rough token estimate (~chars/4) +export function estimateTokens(text: string): number { + return Math.ceil(text.length / 4) +} + +/** Compile scored entries into a prose user-profile section for the system prompt. */ +export function compileMemoryPrompt(entries: ScoredMemoryEntry[]): string { + if (entries.length === 0) return "" + + // Group by category label + const groups = new Map() + for (const entry of entries) { + if (!groups.has(entry.categoryLabel)) { + groups.set(entry.categoryLabel, []) + } + groups.get(entry.categoryLabel)!.push(entry.content) + } + + // Build prose sections + const sections: string[] = [] + for (const [label, contents] of Array.from(groups.entries())) { + sections.push(`${label}: ${contents.join(". ")}.`) + } + + const headerTokens = estimateTokens(HEADER) + const cap = MEMORY_CONSTANTS.PROMPT_TOKEN_CAP - headerTokens + + // Token cap — drop lowest-priority sections (from the end) until within budget + let prose = sections.join("\n\n") + while (estimateTokens(prose) > cap && sections.length > 1) { + sections.pop() + prose = sections.join("\n\n") + } + + // Edge case: single remaining section still exceeds cap — hard-truncate by chars + if (estimateTokens(prose) > cap) { + const maxChars = cap * 4 + prose = prose.slice(0, maxChars) + } + + return `${HEADER}${prose}` +} + +/** Compile entries into a machine-readable list for the analysis agent. */ +export function compileMemoryForAgent(entries: ScoredMemoryEntry[]): string { + if (entries.length === 0) return "No existing memory entries." + + return entries + .map( + (e) => + `[${e.id}] ${e.category} (score: ${e.computedScore.toFixed(2)}): ${e.content}`, + ) + .join("\n") +} diff --git a/src/core/memory/scoring.ts b/src/core/memory/scoring.ts new file mode 100644 index 00000000000..54357078807 --- /dev/null +++ b/src/core/memory/scoring.ts @@ -0,0 +1,27 @@ +/** Logarithmic bonus capped at 3.0 for repeated reinforcements. */ +export function reinforcementBonus(count: number): number { + return Math.min(Math.log2(count + 1), 3.0) +} + +/** Exponential decay factor based on days since last reinforcement. */ +export function temporalDecay(daysSinceReinforced: number, decayRate: number): number { + return Math.exp(-decayRate * daysSinceReinforced) +} + +export interface ScoreInput { + significance: number + priorityWeight: number + reinforcementCount: number + daysSinceReinforced: number + decayRate: number +} + +/** Compute a composite relevance score for a memory entry. */ +export function computeScore(input: ScoreInput): number { + return ( + input.significance * + input.priorityWeight * + reinforcementBonus(input.reinforcementCount) * + temporalDecay(input.daysSinceReinforced, input.decayRate) + ) +} diff --git a/src/core/memory/types.ts b/src/core/memory/types.ts new file mode 100644 index 00000000000..4b8ff2bda19 --- /dev/null +++ b/src/core/memory/types.ts @@ -0,0 +1,89 @@ +/** A single persisted memory entry. */ +export interface MemoryEntry { + id: string + workspaceId: string | null + category: MemoryCategorySlug + content: string + significance: number + firstSeen: number + lastReinforced: number + reinforcementCount: number + decayRate: number + sourceTaskId: string | null + isPinned: boolean +} + +export type MemoryCategorySlug = + | "coding-style" + | "communication-prefs" + | "technical-proficiency" + | "tool-preferences" + | "active-projects" + | "behavioral-patterns" + | "dislikes-frustrations" + +export interface MemoryCategory { + slug: MemoryCategorySlug + label: string + defaultDecayRate: number + priorityWeight: number +} + +export const DEFAULT_MEMORY_CATEGORIES: MemoryCategory[] = [ + { slug: "coding-style", label: "Coding Style", defaultDecayRate: 0.05, priorityWeight: 0.9 }, + { slug: "communication-prefs", label: "Communication Preferences", defaultDecayRate: 0.05, priorityWeight: 0.95 }, + { slug: "technical-proficiency", label: "Technical Proficiency", defaultDecayRate: 0.08, priorityWeight: 0.85 }, + { slug: "tool-preferences", label: "Tool Preferences", defaultDecayRate: 0.12, priorityWeight: 0.7 }, + { slug: "active-projects", label: "Active Projects", defaultDecayRate: 0.3, priorityWeight: 0.6 }, + { slug: "behavioral-patterns", label: "Behavioral Patterns", defaultDecayRate: 0.15, priorityWeight: 0.75 }, + { slug: "dislikes-frustrations", label: "Dislikes & Frustrations", defaultDecayRate: 0.08, priorityWeight: 0.9 }, +] + +export type ObservationAction = "NEW" | "REINFORCE" | "UPDATE" + +export interface Observation { + action: ObservationAction + category: MemoryCategorySlug + content: string + significance: number + existingEntryId: string | null + reasoning: string +} + +export interface AnalysisResult { + observations: Observation[] + sessionSummary: string +} + +export interface AnalysisLogEntry { + id: string + timestamp: number + taskId: string | null + messagesAnalyzed: number + tokensUsed: number + entriesCreated: number + entriesReinforced: number +} + +export interface ScoredMemoryEntry extends MemoryEntry { + computedScore: number + categoryLabel: string +} + +export interface PreprocessResult { + cleaned: string + originalTokenEstimate: number + cleanedTokenEstimate: number +} + +export const MEMORY_CONSTANTS = { + MIN_CONTEXT_WINDOW: 50_000, + DEFAULT_ANALYSIS_FREQUENCY: 8, + MAX_ENTRIES: 500, + SCORE_THRESHOLD: 0.05, + GARBAGE_COLLECTION_SCORE_THRESHOLD: 0.01, + GARBAGE_COLLECTION_DAYS: 90, + PROMPT_TOKEN_CAP: 1500, + MAX_QUERY_ENTRIES: 40, + DEDUP_SIMILARITY_THRESHOLD: 0.6, +} as const diff --git a/src/core/multi-orchestrator/__tests__/e2e.spec.ts b/src/core/multi-orchestrator/__tests__/e2e.spec.ts new file mode 100644 index 00000000000..2eee9e265fb --- /dev/null +++ b/src/core/multi-orchestrator/__tests__/e2e.spec.ts @@ -0,0 +1,1453 @@ +/** + * E2E integration tests for multi-orchestrator subsystem. + * + * Tests full flows across types, plan-generator, report-aggregator, + * merge-pipeline, agent-coordinator, and worktree-manager without + * requiring VS Code API mocks. + */ +import { vi, describe, it, expect, beforeEach, afterEach } from "vitest" +import { EventEmitter } from "events" + +// --------------------------------------------------------------------------- +// Mocks — hoisted above all imports +// --------------------------------------------------------------------------- + +// Mock the API layer so we can feed fake LLM responses into generatePlan. +vi.mock("../../../api", () => ({ + buildApiHandler: vi.fn(), +})) + +// Deterministic agent IDs — counter reset in beforeEach. +let agentIdCounter = 0 +vi.mock("../types", async (importOriginal) => { + const actual = await importOriginal() + return { + ...actual, + generateAgentId: () => { + agentIdCounter++ + return `e2e-${String(agentIdCounter).padStart(3, "0")}` + }, + } +}) + +// Mock child_process so MergePipeline never touches real git. +vi.mock("child_process", () => ({ + execSync: vi.fn(), +})) + +// Mock @roo-code/core so WorktreeManager never touches real worktrees. +vi.mock("@roo-code/core", () => ({ + WorktreeService: vi.fn().mockImplementation(() => ({ + createWorktree: vi.fn().mockResolvedValue({ success: true }), + deleteWorktree: vi.fn().mockResolvedValue({ success: true }), + })), +})) + +// --------------------------------------------------------------------------- +// Imports — AFTER mocks +// --------------------------------------------------------------------------- + +import { + createInitialAgentState, + createInitialOrchestratorState, + MULTI_ORCHESTRATOR_CONSTANTS, + type PlannedTask, + type AgentState, + type MergeResult, + type OrchestratorState, + type OrchestratorPlan, +} from "../types" +import { generatePlan } from "../plan-generator" +import { aggregateReports } from "../report-aggregator" +import { MergePipeline } from "../merge-pipeline" +import { AgentCoordinator } from "../agent-coordinator" +import { MultiWorktreeManager } from "../worktree-manager" +import { buildApiHandler } from "../../../api" +import { execSync } from "child_process" +import { RooCodeEventName } from "@roo-code/types" +import type { TokenUsage, ToolUsage } from "@roo-code/types" + +const mockBuildApiHandler = vi.mocked(buildApiHandler) +const mockExecSync = vi.mocked(execSync) + +// --------------------------------------------------------------------------- +// Shared helpers +// --------------------------------------------------------------------------- + +/** Factory for a fully-populated PlannedTask. */ +function makeTask(overrides: Partial = {}): PlannedTask { + return { + id: "task-1", + mode: "code", + title: "Implement widget", + description: "Build the widget component end-to-end", + assignedFiles: ["src/widget.ts"], + priority: 1, + ...overrides, + } +} + +/** Factory for a fully-populated AgentState. */ +function makeAgent(overrides: Partial = {}): AgentState { + return { + taskId: "agent-1", + providerId: "prov-1", + panelId: "panel-1", + worktreePath: "/tmp/roo-multi-agent-1", + worktreeBranch: "multi-orch/agent-1", + mode: "code", + status: "completed", + title: "Widget Agent", + completionReport: "Implemented widget successfully.", + tokenUsage: { input: 2400, output: 1100 }, + startedAt: 1700000000000, + completedAt: 1700000045000, + ...overrides, + } +} + +/** Factory for a fully-populated MergeResult. */ +function makeMerge(overrides: Partial = {}): MergeResult { + return { + agentTaskId: "agent-1", + branch: "multi-orch/agent-1", + success: true, + conflictsFound: 0, + conflictsResolved: 0, + filesChanged: ["src/widget.ts", "src/widget.test.ts"], + ...overrides, + } +} + +/** Minimal mock provider — EventEmitter + getCurrentTask stub. */ +function createMockProvider() { + const emitter = new EventEmitter() + const mockStart = vi.fn() + const mockAbortTask = vi.fn().mockResolvedValue(undefined) + ;(emitter as any).getCurrentTask = vi.fn().mockReturnValue({ + start: mockStart, + abortTask: mockAbortTask, + clineMessages: [], + }) + return { provider: emitter as any, mockStart, mockAbortTask } +} + +/** Build a mock TokenUsage for completion events. */ +function makeTokenUsage(input: number, output: number): TokenUsage { + return { totalTokensIn: input, totalTokensOut: output, totalCost: 0.01, contextTokens: 0 } +} + +/** Build a mock ToolUsage for completion events. */ +function makeToolUsage(): ToolUsage { + return {} +} + +// Reusable mode configs — excludes orchestrator slugs. +const sampleModes = [ + { slug: "code", name: "Code", roleDefinition: "Write code", description: "Implementation", groups: ["read", "edit"] as any }, + { slug: "architect", name: "Architect", roleDefinition: "Design", description: "Planning", groups: ["read"] as any }, + { slug: "debug", name: "Debug", roleDefinition: "Fix bugs", description: "Debugging", groups: ["read"] as any }, + { slug: "multi-orchestrator", name: "MO", roleDefinition: "Orch", description: "Multi", groups: [] as any }, + { slug: "orchestrator", name: "Orch", roleDefinition: "Orch", description: "Single", groups: [] as any }, +] + +const sampleProvider = { apiProvider: "anthropic" as const, apiModelId: "claude-sonnet-4-20250514" } + +// ═══════════════════════════════════════════════════════════════════════════ +// 1. FULL TYPE VALIDATION +// ═══════════════════════════════════════════════════════════════════════════ + +describe("E2E: Full type validation", () => { + beforeEach(() => { + agentIdCounter = 0 + }) + + it("should create an OrchestratorState with all correct defaults and mutate through lifecycle phases", () => { + const state: OrchestratorState = createInitialOrchestratorState() + + // Verify defaults + expect(state.phase).toBe("idle") + expect(state.plan).toBeNull() + expect(state.agents).toEqual([]) + expect(state.mergeResults).toEqual([]) + expect(state.finalReport).toBeNull() + + // Simulate lifecycle mutation + state.phase = "planning" + const plan: OrchestratorPlan = { + tasks: [makeTask({ id: "e2e-001" }), makeTask({ id: "e2e-002", mode: "architect", title: "Design DB" })], + requiresMerge: true, + estimatedComplexity: "high", + } + state.plan = plan + + expect(state.phase).toBe("planning") + expect(state.plan.tasks).toHaveLength(2) + expect(state.plan.estimatedComplexity).toBe("high") + + // Transition to spawning + state.phase = "spawning" + state.agents = plan.tasks.map(createInitialAgentState) + + expect(state.agents).toHaveLength(2) + expect(state.agents[0].status).toBe("pending") + expect(state.agents[0].taskId).toBe("e2e-001") + expect(state.agents[1].mode).toBe("architect") + + // Transition to running + state.phase = "running" + state.agents[0].status = "running" + state.agents[0].startedAt = Date.now() + + // Transition to merging + state.phase = "merging" + state.mergeResults = [makeMerge({ agentTaskId: "e2e-001" })] + + // Transition to reporting + state.phase = "reporting" + state.finalReport = "# Report\nAll done." + + // Transition to complete + state.phase = "complete" + + expect(state.phase).toBe("complete") + expect(state.finalReport).toContain("# Report") + expect(state.mergeResults).toHaveLength(1) + }) + + it("should create a PlannedTask with all fields fully populated", () => { + const task: PlannedTask = { + id: "abc12345", + mode: "debug", + title: "Fix login race condition", + description: "The login form double-submits under network lag.", + assignedFiles: ["src/auth/login.ts", "src/auth/session.ts"], + priority: 3, + } + + expect(task.id).toBe("abc12345") + expect(task.mode).toBe("debug") + expect(task.assignedFiles).toHaveLength(2) + expect(task.priority).toBe(3) + }) + + it("should create an AgentState from a PlannedTask with proper defaults", () => { + const task = makeTask({ id: "e2e-001", mode: "architect", title: "Schema design" }) + const agent = createInitialAgentState(task) + + expect(agent.taskId).toBe("e2e-001") + expect(agent.mode).toBe("architect") + expect(agent.title).toBe("Schema design") + expect(agent.status).toBe("pending") + expect(agent.providerId).toBe("") + expect(agent.panelId).toBe("") + expect(agent.worktreePath).toBeNull() + expect(agent.worktreeBranch).toBeNull() + expect(agent.completionReport).toBeNull() + expect(agent.tokenUsage).toBeNull() + expect(agent.startedAt).toBeNull() + expect(agent.completedAt).toBeNull() + }) + + it("should construct a MergeResult with conflict details", () => { + const merge: MergeResult = { + agentTaskId: "agent-x", + branch: "multi-orch/agent-x", + success: false, + conflictsFound: 4, + conflictsResolved: 2, + filesChanged: ["README.md", "src/index.ts", "package.json", "tsconfig.json"], + } + + expect(merge.success).toBe(false) + expect(merge.conflictsFound).toBe(4) + expect(merge.conflictsResolved).toBe(2) + expect(merge.filesChanged).toHaveLength(4) + }) + + it("should ensure createInitialAgentState returns independent objects per call", () => { + const task = makeTask() + const a = createInitialAgentState(task) + const b = createInitialAgentState(task) + + a.status = "running" + a.startedAt = 9999 + + expect(b.status).toBe("pending") + expect(b.startedAt).toBeNull() + }) + + it("should verify MULTI_ORCHESTRATOR_CONSTANTS match expected values", () => { + expect(MULTI_ORCHESTRATOR_CONSTANTS).toEqual({ + MAX_AGENTS: 6, + DEFAULT_MAX_AGENTS: 4, + WORKTREE_PREFIX: "roo-multi-", + BRANCH_PREFIX: "multi-orch/", + }) + }) +}) + +// ═══════════════════════════════════════════════════════════════════════════ +// 2. PLAN GENERATOR PARSING +// ═══════════════════════════════════════════════════════════════════════════ + +describe("E2E: Plan generator parsing", () => { + beforeEach(() => { + vi.clearAllMocks() + agentIdCounter = 0 + }) + + it("should parse a valid multi-task plan from completePrompt", async () => { + const llmResponse = JSON.stringify({ + tasks: [ + { mode: "code", title: "Build REST API", description: "Create CRUD endpoints for users", assignedFiles: ["src/api/users.ts"], priority: 1 }, + { mode: "code", title: "Write tests", description: "Unit tests for user API", assignedFiles: ["src/api/__tests__/users.test.ts"], priority: 2 }, + { mode: "architect", title: "Document API", description: "Write OpenAPI spec", assignedFiles: ["docs/api.yaml"], priority: 3 }, + ], + requiresMerge: true, + estimatedComplexity: "high", + }) + + mockBuildApiHandler.mockReturnValue({ completePrompt: vi.fn().mockResolvedValue(llmResponse) } as any) + + const plan = await generatePlan("Build a user management API", sampleModes, 4, sampleProvider) + + expect(plan).not.toBeNull() + expect(plan!.tasks).toHaveLength(3) + expect(plan!.requiresMerge).toBe(true) + expect(plan!.estimatedComplexity).toBe("high") + + // Verify generated IDs + expect(plan!.tasks[0].id).toBe("e2e-001") + expect(plan!.tasks[1].id).toBe("e2e-002") + expect(plan!.tasks[2].id).toBe("e2e-003") + + // Verify task fields preserved + expect(plan!.tasks[0].title).toBe("Build REST API") + expect(plan!.tasks[0].mode).toBe("code") + expect(plan!.tasks[0].assignedFiles).toEqual(["src/api/users.ts"]) + expect(plan!.tasks[2].mode).toBe("architect") + }) + + it("should handle empty tasks array gracefully", async () => { + const llmResponse = JSON.stringify({ + tasks: [], + requiresMerge: false, + estimatedComplexity: "low", + }) + + mockBuildApiHandler.mockReturnValue({ completePrompt: vi.fn().mockResolvedValue(llmResponse) } as any) + + const plan = await generatePlan("Do nothing", sampleModes, 2, sampleProvider) + + // Empty tasks array is rejected by the parser as invalid + expect(plan).toBeNull() + }) + + it("should extract valid JSON even with trailing garbage", async () => { + const garbage = '{"tasks": [{"mode": "code"}]} %%% extra stuff {{{' + mockBuildApiHandler.mockReturnValue({ completePrompt: vi.fn().mockResolvedValue(garbage) } as any) + + const plan = await generatePlan("Bad json", sampleModes, 2, sampleProvider) + // The parser now uses brace-matching to extract JSON despite trailing garbage + expect(plan).not.toBeNull() + expect(plan!.tasks).toHaveLength(1) + }) + + it("should return null for completely empty response", async () => { + mockBuildApiHandler.mockReturnValue({ completePrompt: vi.fn().mockResolvedValue("") } as any) + + const plan = await generatePlan("Empty response", sampleModes, 2, sampleProvider) + expect(plan).toBeNull() + }) + + it("should return null when tasks field is an object instead of array", async () => { + const bad = JSON.stringify({ tasks: { notAnArray: true }, requiresMerge: false }) + mockBuildApiHandler.mockReturnValue({ completePrompt: vi.fn().mockResolvedValue(bad) } as any) + + const plan = await generatePlan("Bad shape", sampleModes, 2, sampleProvider) + expect(plan).toBeNull() + }) + + it("should fill defaults for tasks with missing fields", async () => { + const sparse = JSON.stringify({ + tasks: [ + {}, // no fields at all + { mode: "debug" }, // only mode + { title: "Custom title", priority: 99 }, // partial fields + ], + requiresMerge: false, + estimatedComplexity: "low", + }) + + mockBuildApiHandler.mockReturnValue({ completePrompt: vi.fn().mockResolvedValue(sparse) } as any) + + const plan = await generatePlan("Sparse tasks", sampleModes, 4, sampleProvider) + + expect(plan).not.toBeNull() + expect(plan!.tasks).toHaveLength(3) + + // Task 0: all defaults + expect(plan!.tasks[0].mode).toBe("code") + expect(plan!.tasks[0].title).toBe("Task 1") + expect(plan!.tasks[0].description).toBe("") + expect(plan!.tasks[0].assignedFiles).toEqual([]) + expect(plan!.tasks[0].priority).toBe(1) + + // Task 1: mode provided, rest defaulted + expect(plan!.tasks[1].mode).toBe("debug") + expect(plan!.tasks[1].title).toBe("Task 2") + + // Task 2: title and priority provided + expect(plan!.tasks[2].title).toBe("Custom title") + expect(plan!.tasks[2].priority).toBe(99) + expect(plan!.tasks[2].mode).toBe("code") // default + }) + + it("should strip markdown fences with language tag and parse correctly", async () => { + const fenced = + "```json\n" + + JSON.stringify({ + tasks: [{ mode: "code", title: "Fenced task", description: "From markdown", assignedFiles: [], priority: 1 }], + requiresMerge: false, + estimatedComplexity: "low", + }) + + "\n```" + + mockBuildApiHandler.mockReturnValue({ completePrompt: vi.fn().mockResolvedValue(fenced) } as any) + + const plan = await generatePlan("Fenced response", sampleModes, 2, sampleProvider) + + expect(plan).not.toBeNull() + expect(plan!.tasks[0].title).toBe("Fenced task") + }) + + it("should parse response wrapped in plain fences without json tag", async () => { + // The parser regex `(?:json)?` makes "json" optional, so plain ``` fences are also stripped. + const fenced = + "```\n" + + JSON.stringify({ + tasks: [{ mode: "architect", title: "No lang tag", description: "Plain fences", assignedFiles: [], priority: 1 }], + requiresMerge: false, + estimatedComplexity: "low", + }) + + "\n```" + + mockBuildApiHandler.mockReturnValue({ completePrompt: vi.fn().mockResolvedValue(fenced) } as any) + + const plan = await generatePlan("Plain fences", sampleModes, 2, sampleProvider) + + // Current implementation strips both ```json and plain ``` fences + expect(plan).not.toBeNull() + expect(plan!.tasks[0].title).toBe("No lang tag") + }) + + it("should infer requiresMerge from task modes when not provided", async () => { + // Case 1: has code tasks → requiresMerge = true + const withCode = JSON.stringify({ + tasks: [ + { mode: "code", title: "Code task" }, + { mode: "architect", title: "Design task" }, + ], + }) + mockBuildApiHandler.mockReturnValue({ completePrompt: vi.fn().mockResolvedValue(withCode) } as any) + const plan1 = await generatePlan("With code", sampleModes, 4, sampleProvider) + expect(plan1!.requiresMerge).toBe(true) + + // Reset counter + agentIdCounter = 0 + + // Case 2: no code tasks → requiresMerge = false + const noCode = JSON.stringify({ + tasks: [ + { mode: "architect", title: "Design only" }, + { mode: "ask", title: "Research" }, + ], + }) + mockBuildApiHandler.mockReturnValue({ completePrompt: vi.fn().mockResolvedValue(noCode) } as any) + const plan2 = await generatePlan("No code", sampleModes, 4, sampleProvider) + expect(plan2!.requiresMerge).toBe(false) + }) + + it("should default estimatedComplexity to 'medium' when absent", async () => { + const noComplexity = JSON.stringify({ + tasks: [{ mode: "code", title: "Simple" }], + requiresMerge: true, + }) + mockBuildApiHandler.mockReturnValue({ completePrompt: vi.fn().mockResolvedValue(noComplexity) } as any) + + const plan = await generatePlan("No complexity field", sampleModes, 2, sampleProvider) + expect(plan!.estimatedComplexity).toBe("medium") + }) + + it("should return null when handler does not support completePrompt", async () => { + mockBuildApiHandler.mockReturnValue({ someOtherMethod: vi.fn() } as any) + + const plan = await generatePlan("No completePrompt", sampleModes, 2, sampleProvider) + expect(plan).toBeNull() + }) + + it("should return null when completePrompt throws an error", async () => { + mockBuildApiHandler.mockReturnValue({ + completePrompt: vi.fn().mockRejectedValue(new Error("Network timeout")), + } as any) + + const plan = await generatePlan("Network error", sampleModes, 2, sampleProvider) + expect(plan).toBeNull() + }) +}) + +// ═══════════════════════════════════════════════════════════════════════════ +// 3. REPORT AGGREGATOR +// ═══════════════════════════════════════════════════════════════════════════ + +describe("E2E: Report aggregator with realistic data", () => { + it("should generate a full report for mixed completed/failed agents with merge results", () => { + const agents: AgentState[] = [ + makeAgent({ + taskId: "ag-api", + title: "Build REST API", + mode: "code", + status: "completed", + startedAt: 1700000000000, + completedAt: 1700000032000, + tokenUsage: { input: 5200, output: 2800 }, + completionReport: "Created 4 endpoints with validation.", + }), + makeAgent({ + taskId: "ag-tests", + title: "Write Tests", + mode: "code", + status: "completed", + startedAt: 1700000000000, + completedAt: 1700000058000, + tokenUsage: { input: 3100, output: 1900 }, + completionReport: "12 tests passing, 95% coverage.", + }), + makeAgent({ + taskId: "ag-docs", + title: "Generate Docs", + mode: "architect", + status: "failed", + startedAt: 1700000000000, + completedAt: 1700000015000, + tokenUsage: { input: 800, output: 200 }, + completionReport: null, + }), + ] + + const mergeResults: MergeResult[] = [ + makeMerge({ + agentTaskId: "ag-api", + branch: "multi-orch/ag-api", + success: true, + filesChanged: ["src/api/users.ts", "src/api/routes.ts"], + }), + makeMerge({ + agentTaskId: "ag-tests", + branch: "multi-orch/ag-tests", + success: false, + conflictsFound: 2, + conflictsResolved: 0, + filesChanged: ["src/api/__tests__/users.test.ts"], + }), + ] + + const report = aggregateReports(agents, mergeResults) + + // Header + expect(report).toContain("# Multi-Orchestration Report") + expect(report).toContain("**3 agents** executed in parallel.") + + // Agent Results + expect(report).toContain("### ✅ Build REST API (code mode)") + expect(report).toContain("**Duration:** 32s") + expect(report).toContain("**Tokens:** 5200 in / 2800 out") + expect(report).toContain("**Report:** Created 4 endpoints with validation.") + + expect(report).toContain("### ✅ Write Tests (code mode)") + expect(report).toContain("**Duration:** 58s") + + expect(report).toContain("### ❌ Generate Docs (architect mode)") + expect(report).toContain("**Duration:** 15s") + + // Merge Results + expect(report).toContain("## Merge Results") + expect(report).toContain("### ✅ Branch: multi-orch/ag-api") + expect(report).toContain("### ⚠️ Branch: multi-orch/ag-tests") + expect(report).toContain("**Conflicts found:** 2") + + // Summary + expect(report).toContain("**Agents:** 2 completed, 1 failed") + expect(report).toContain("**Merges:** 1 succeeded, 1 had conflicts") + }) + + it("should generate a clean report for all-success scenario without merge", () => { + const agents: AgentState[] = [ + makeAgent({ taskId: "a1", title: "Research auth patterns", mode: "ask", status: "completed" }), + makeAgent({ taskId: "a2", title: "Design schema", mode: "architect", status: "completed" }), + ] + + const report = aggregateReports(agents, []) + + expect(report).toContain("**2 agents** executed in parallel.") + expect(report).not.toContain("## Merge Results") + expect(report).not.toContain("**Merges:**") + expect(report).toContain("**Agents:** 2 completed, 0 failed") + }) + + it("should handle agents with no token usage and no completion report", () => { + const agents = [ + makeAgent({ + taskId: "bare", + title: "Bare agent", + tokenUsage: null, + completionReport: null, + startedAt: null, + completedAt: null, + }), + ] + + const report = aggregateReports(agents, []) + + expect(report).toContain("Bare agent") + expect(report).toContain("**Duration:** unknown") + expect(report).not.toContain("**Tokens:**") + expect(report).not.toContain("**Report:**") + }) + + it("should handle all-failure scenario", () => { + const agents = [ + makeAgent({ taskId: "f1", title: "Fail A", status: "failed" }), + makeAgent({ taskId: "f2", title: "Fail B", status: "failed" }), + makeAgent({ taskId: "f3", title: "Fail C", status: "failed" }), + ] + + const report = aggregateReports(agents, []) + + expect(report).toContain("**Agents:** 0 completed, 3 failed") + // All should show ❌ + expect(report).toContain("### ❌ Fail A") + expect(report).toContain("### ❌ Fail B") + expect(report).toContain("### ❌ Fail C") + }) + + it("should produce valid output for zero agents", () => { + const report = aggregateReports([], []) + + expect(report).toContain("**0 agents** executed in parallel.") + expect(report).toContain("## Summary") + expect(report).toContain("**Agents:** 0 completed, 0 failed") + }) +}) + +// ═══════════════════════════════════════════════════════════════════════════ +// 4. MERGE PIPELINE +// ═══════════════════════════════════════════════════════════════════════════ + +describe("E2E: Merge pipeline", () => { + let pipeline: MergePipeline + + beforeEach(() => { + vi.clearAllMocks() + pipeline = new MergePipeline("/workspace/project") + }) + + it("should merge multiple branches successfully", async () => { + // Mock git diff for file lists + mockExecSync.mockImplementation((cmd: string) => { + const cmdStr = String(cmd) + if (cmdStr.startsWith("git diff --name-only HEAD...")) { + if (cmdStr.includes("branch-a")) return "src/a.ts\nsrc/a.test.ts\n" + if (cmdStr.includes("branch-b")) return "src/b.ts\n" + return "" + } + if (cmdStr.startsWith("git merge")) { + return "" // success — no output + } + return "" + }) + + const agents: AgentState[] = [ + makeAgent({ + taskId: "task-a", + worktreeBranch: "multi-orch/branch-a", + status: "completed", + startedAt: 1000, + }), + makeAgent({ + taskId: "task-b", + worktreeBranch: "multi-orch/branch-b", + status: "completed", + startedAt: 2000, + }), + ] + + const progressCalls: Array<{ id: string; result: MergeResult }> = [] + const results = await pipeline.mergeAll(agents, (id, result) => { + progressCalls.push({ id, result }) + }) + + expect(results).toHaveLength(2) + expect(results[0].success).toBe(true) + expect(results[0].agentTaskId).toBe("task-a") + expect(results[0].filesChanged).toEqual(["src/a.ts", "src/a.test.ts"]) + expect(results[0].conflictsFound).toBe(0) + + expect(results[1].success).toBe(true) + expect(results[1].agentTaskId).toBe("task-b") + expect(results[1].filesChanged).toEqual(["src/b.ts"]) + + // Progress callback fired for each + expect(progressCalls).toHaveLength(2) + expect(progressCalls[0].id).toBe("task-a") + expect(progressCalls[1].id).toBe("task-b") + }) + + it("should handle merge conflict and abort cleanly", async () => { + mockExecSync.mockImplementation((cmd: string) => { + const cmdStr = String(cmd) + if (cmdStr.startsWith("git diff --name-only HEAD...")) { + return "src/shared.ts\nsrc/config.ts\n" + } + if (cmdStr.startsWith("git merge --no-ff")) { + throw new Error("CONFLICT: Merge conflict in src/shared.ts") + } + if (cmdStr === "git diff --name-only --diff-filter=U") { + return "src/shared.ts\n" + } + if (cmdStr === "git merge --abort") { + return "" + } + return "" + }) + + const agents = [ + makeAgent({ + taskId: "conflict-agent", + worktreeBranch: "multi-orch/conflict-branch", + status: "completed", + startedAt: 1000, + }), + ] + + const results = await pipeline.mergeAll(agents, () => {}) + + expect(results).toHaveLength(1) + expect(results[0].success).toBe(false) + expect(results[0].conflictsFound).toBe(1) + expect(results[0].conflictsResolved).toBe(0) + expect(results[0].filesChanged).toEqual(["src/shared.ts", "src/config.ts"]) + }) + + it("should fall back to git reset --hard when merge --abort fails", async () => { + let abortCalled = false + let resetCalled = false + + mockExecSync.mockImplementation((cmd: string) => { + const cmdStr = String(cmd) + if (cmdStr.startsWith("git diff --name-only HEAD...")) { + return "src/x.ts\n" + } + if (cmdStr.startsWith("git merge --no-ff")) { + throw new Error("Merge conflict") + } + if (cmdStr === "git diff --name-only --diff-filter=U") { + return "src/x.ts\n" + } + if (cmdStr === "git merge --abort") { + abortCalled = true + throw new Error("Cannot abort — no merge in progress") + } + if (cmdStr === "git reset --hard HEAD") { + resetCalled = true + return "" + } + return "" + }) + + const agents = [ + makeAgent({ taskId: "fallback", worktreeBranch: "multi-orch/fallback", status: "completed", startedAt: 1000 }), + ] + + const results = await pipeline.mergeAll(agents, () => {}) + + expect(results[0].success).toBe(false) + expect(abortCalled).toBe(true) + expect(resetCalled).toBe(true) + }) + + it("should skip agents without worktreeBranch", async () => { + const agents = [ + makeAgent({ taskId: "no-branch", worktreeBranch: null, status: "completed", startedAt: 1000 }), + makeAgent({ taskId: "has-branch", worktreeBranch: "multi-orch/has-branch", status: "completed", startedAt: 2000 }), + ] + + mockExecSync.mockImplementation((cmd: string) => { + const cmdStr = String(cmd) + if (cmdStr.includes("git diff --name-only HEAD...")) return "src/file.ts\n" + if (cmdStr.includes("git merge")) return "" + return "" + }) + + const results = await pipeline.mergeAll(agents, () => {}) + + expect(results).toHaveLength(1) + expect(results[0].agentTaskId).toBe("has-branch") + }) + + it("should skip agents that are not completed", async () => { + const agents = [ + makeAgent({ taskId: "failed-agent", worktreeBranch: "multi-orch/failed", status: "failed", startedAt: 1000 }), + makeAgent({ taskId: "running-agent", worktreeBranch: "multi-orch/running", status: "running", startedAt: 500 }), + makeAgent({ taskId: "good-agent", worktreeBranch: "multi-orch/good", status: "completed", startedAt: 2000 }), + ] + + mockExecSync.mockImplementation(() => "") + + const results = await pipeline.mergeAll(agents, () => {}) + + expect(results).toHaveLength(1) + expect(results[0].agentTaskId).toBe("good-agent") + }) + + it("should merge in startedAt order (earliest first)", async () => { + const mergeOrder: string[] = [] + + mockExecSync.mockImplementation((cmd: string) => { + const cmdStr = String(cmd) + if (cmdStr.startsWith("git merge --no-ff")) { + // Extract branch from command + const match = cmdStr.match(/"([^"]+)"/) + if (match) mergeOrder.push(match[1]) + } + return "" + }) + + const agents = [ + makeAgent({ taskId: "late", worktreeBranch: "multi-orch/late", status: "completed", startedAt: 5000 }), + makeAgent({ taskId: "early", worktreeBranch: "multi-orch/early", status: "completed", startedAt: 1000 }), + makeAgent({ taskId: "mid", worktreeBranch: "multi-orch/mid", status: "completed", startedAt: 3000 }), + ] + + await pipeline.mergeAll(agents, () => {}) + + expect(mergeOrder).toEqual(["multi-orch/early", "multi-orch/mid", "multi-orch/late"]) + }) + + it("should return a safe result when getFilesChanged throws", async () => { + mockExecSync.mockImplementation((cmd: string) => { + const cmdStr = String(cmd) + if (cmdStr.startsWith("git diff --name-only HEAD...")) { + throw new Error("fatal: bad object HEAD") + } + if (cmdStr.startsWith("git merge")) return "" + return "" + }) + + const agents = [ + makeAgent({ taskId: "bad-diff", worktreeBranch: "multi-orch/bad-diff", status: "completed", startedAt: 1000 }), + ] + + const results = await pipeline.mergeAll(agents, () => {}) + + expect(results).toHaveLength(1) + expect(results[0].success).toBe(true) + expect(results[0].filesChanged).toEqual([]) + }) +}) + +// ═══════════════════════════════════════════════════════════════════════════ +// 5. AGENT COORDINATOR +// ═══════════════════════════════════════════════════════════════════════════ + +describe("E2E: Agent coordinator", () => { + let coordinator: AgentCoordinator + + beforeEach(() => { + coordinator = new AgentCoordinator() + }) + + it("should register agents and track initial state", () => { + const agent1 = createInitialAgentState(makeTask({ id: "t1", title: "Task 1" })) + const agent2 = createInitialAgentState(makeTask({ id: "t2", title: "Task 2" })) + const { provider: prov1 } = createMockProvider() + const { provider: prov2 } = createMockProvider() + + coordinator.registerAgent(agent1, prov1) + coordinator.registerAgent(agent2, prov2) + + expect(coordinator.totalAgents).toBe(2) + expect(coordinator.completedAgents).toBe(0) + expect(coordinator.allComplete()).toBe(false) + + const states = coordinator.getStates() + expect(states).toHaveLength(2) + expect(states[0].status).toBe("pending") + expect(states[1].status).toBe("pending") + }) + + it("should look up individual agent state by taskId", () => { + const agent = createInitialAgentState(makeTask({ id: "lookup-me" })) + const { provider } = createMockProvider() + + coordinator.registerAgent(agent, provider) + + expect(coordinator.getState("lookup-me")).toBeDefined() + expect(coordinator.getState("lookup-me")!.title).toBe("Implement widget") + expect(coordinator.getState("nonexistent")).toBeUndefined() + }) + + it("should transition agent to completed when TaskCompleted fires", () => { + const agent = createInitialAgentState(makeTask({ id: "comp-1" })) + const { provider } = createMockProvider() + + coordinator.registerAgent(agent, provider) + + const completedSpy = vi.fn() + coordinator.on("agentCompleted", completedSpy) + + // Simulate provider emitting TaskCompleted + provider.emit( + RooCodeEventName.TaskCompleted, + "comp-1", + makeTokenUsage(3000, 1500), + makeToolUsage(), + ) + + const state = coordinator.getState("comp-1")! + expect(state.status).toBe("completed") + expect(state.completedAt).toBeTypeOf("number") + expect(state.tokenUsage).toEqual({ input: 3000, output: 1500 }) + + expect(completedSpy).toHaveBeenCalledWith("comp-1") + expect(coordinator.completedAgents).toBe(1) + }) + + it("should transition agent to failed when TaskAborted fires", () => { + const agent = createInitialAgentState(makeTask({ id: "fail-1" })) + const { provider } = createMockProvider() + + coordinator.registerAgent(agent, provider) + + const failedSpy = vi.fn() + coordinator.on("agentFailed", failedSpy) + + // Simulate provider emitting TaskAborted + provider.emit(RooCodeEventName.TaskAborted, "fail-1") + + const state = coordinator.getState("fail-1")! + expect(state.status).toBe("failed") + expect(state.completedAt).toBeTypeOf("number") + + expect(failedSpy).toHaveBeenCalledWith("fail-1") + expect(coordinator.completedAgents).toBe(1) + }) + + it("should emit allCompleted when last agent finishes", () => { + const agent1 = createInitialAgentState(makeTask({ id: "ac-1" })) + const agent2 = createInitialAgentState(makeTask({ id: "ac-2" })) + const { provider: prov1 } = createMockProvider() + const { provider: prov2 } = createMockProvider() + + coordinator.registerAgent(agent1, prov1) + coordinator.registerAgent(agent2, prov2) + + const allCompleteSpy = vi.fn() + coordinator.on("allCompleted", allCompleteSpy) + + // First agent completes — allCompleted should NOT fire yet + prov1.emit(RooCodeEventName.TaskCompleted, "ac-1", makeTokenUsage(100, 50), makeToolUsage()) + expect(allCompleteSpy).not.toHaveBeenCalled() + expect(coordinator.allComplete()).toBe(false) + + // Second agent fails — now allCompleted fires + prov2.emit(RooCodeEventName.TaskAborted, "ac-2") + expect(allCompleteSpy).toHaveBeenCalledTimes(1) + expect(coordinator.allComplete()).toBe(true) + }) + + it("should resolve waitForAll() immediately when already complete", async () => { + const agent = createInitialAgentState(makeTask({ id: "instant" })) + const { provider } = createMockProvider() + + coordinator.registerAgent(agent, provider) + + // Complete the agent first + provider.emit(RooCodeEventName.TaskCompleted, "instant", makeTokenUsage(10, 5), makeToolUsage()) + + // waitForAll should resolve immediately + const start = Date.now() + await coordinator.waitForAll() + const elapsed = Date.now() - start + expect(elapsed).toBeLessThan(50) // near-instant + }) + + it("should resolve waitForAll() when agents complete after the call", async () => { + const agent1 = createInitialAgentState(makeTask({ id: "w1" })) + const agent2 = createInitialAgentState(makeTask({ id: "w2" })) + const { provider: prov1 } = createMockProvider() + const { provider: prov2 } = createMockProvider() + + coordinator.registerAgent(agent1, prov1) + coordinator.registerAgent(agent2, prov2) + + // Start waiting + const waitPromise = coordinator.waitForAll() + + // Complete agents asynchronously + setTimeout(() => { + prov1.emit(RooCodeEventName.TaskCompleted, "w1", makeTokenUsage(10, 5), makeToolUsage()) + }, 10) + setTimeout(() => { + prov2.emit(RooCodeEventName.TaskCompleted, "w2", makeTokenUsage(20, 10), makeToolUsage()) + }, 20) + + await waitPromise + + expect(coordinator.allComplete()).toBe(true) + expect(coordinator.completedAgents).toBe(2) + }) + + it("should handle mixed completions and failures correctly", () => { + const agents = [ + createInitialAgentState(makeTask({ id: "m1" })), + createInitialAgentState(makeTask({ id: "m2" })), + createInitialAgentState(makeTask({ id: "m3" })), + ] + + const providers = agents.map(() => createMockProvider()) + + agents.forEach((agent, i) => coordinator.registerAgent(agent, providers[i].provider)) + + // m1 completes, m2 fails, m3 completes + providers[0].provider.emit(RooCodeEventName.TaskCompleted, "m1", makeTokenUsage(100, 50), makeToolUsage()) + providers[1].provider.emit(RooCodeEventName.TaskAborted, "m2") + providers[2].provider.emit(RooCodeEventName.TaskCompleted, "m3", makeTokenUsage(200, 100), makeToolUsage()) + + const states = coordinator.getStates() + const completed = states.filter((s) => s.status === "completed") + const failed = states.filter((s) => s.status === "failed") + + expect(completed).toHaveLength(2) + expect(failed).toHaveLength(1) + expect(coordinator.allComplete()).toBe(true) + expect(coordinator.totalAgents).toBe(3) + expect(coordinator.completedAgents).toBe(3) + }) + + it("should ignore duplicate TaskCompleted events for the same agent", () => { + const agent = createInitialAgentState(makeTask({ id: "dup-1" })) + const { provider } = createMockProvider() + + coordinator.registerAgent(agent, provider) + + const completedSpy = vi.fn() + coordinator.on("agentCompleted", completedSpy) + + // Fire TaskCompleted twice for the same agent + provider.emit(RooCodeEventName.TaskCompleted, "dup-1", makeTokenUsage(100, 50), makeToolUsage()) + provider.emit(RooCodeEventName.TaskCompleted, "dup-1", makeTokenUsage(200, 100), makeToolUsage()) + + // Should only count once + expect(completedSpy).toHaveBeenCalledTimes(1) + expect(coordinator.completedAgents).toBe(1) + // Token usage should be from the first event, not overwritten + expect(coordinator.getState("dup-1")!.tokenUsage).toEqual({ input: 100, output: 50 }) + }) + + it("should ignore duplicate TaskAborted events for the same agent", () => { + const agent = createInitialAgentState(makeTask({ id: "dup-abort" })) + const { provider } = createMockProvider() + + coordinator.registerAgent(agent, provider) + + const failedSpy = vi.fn() + coordinator.on("agentFailed", failedSpy) + + provider.emit(RooCodeEventName.TaskAborted, "dup-abort") + provider.emit(RooCodeEventName.TaskAborted, "dup-abort") + + expect(failedSpy).toHaveBeenCalledTimes(1) + expect(coordinator.completedAgents).toBe(1) + }) + + it("should not fire allCompleted twice from duplicate events", () => { + const agent1 = createInitialAgentState(makeTask({ id: "d1" })) + const agent2 = createInitialAgentState(makeTask({ id: "d2" })) + const { provider: prov1 } = createMockProvider() + const { provider: prov2 } = createMockProvider() + + coordinator.registerAgent(agent1, prov1) + coordinator.registerAgent(agent2, prov2) + + const allCompleteSpy = vi.fn() + coordinator.on("allCompleted", allCompleteSpy) + + // Complete both normally + prov1.emit(RooCodeEventName.TaskCompleted, "d1", makeTokenUsage(10, 5), makeToolUsage()) + prov2.emit(RooCodeEventName.TaskCompleted, "d2", makeTokenUsage(20, 10), makeToolUsage()) + + // Fire duplicates — should NOT trigger allCompleted again + prov1.emit(RooCodeEventName.TaskCompleted, "d1", makeTokenUsage(10, 5), makeToolUsage()) + prov2.emit(RooCodeEventName.TaskAborted, "d2") + + expect(allCompleteSpy).toHaveBeenCalledTimes(1) + }) + + it("should reject waitForAll() when timeout expires", async () => { + const agent = createInitialAgentState(makeTask({ id: "timeout-1" })) + const { provider } = createMockProvider() + + coordinator.registerAgent(agent, provider) + + // Use a very short timeout + await expect(coordinator.waitForAll(50)).rejects.toThrow(/timed out/) + }) + + it("should resolve waitForAll() before timeout if agents complete in time", async () => { + const agent = createInitialAgentState(makeTask({ id: "fast-1" })) + const { provider } = createMockProvider() + + coordinator.registerAgent(agent, provider) + + const waitPromise = coordinator.waitForAll(5000) + + // Complete the agent quickly + setTimeout(() => { + provider.emit(RooCodeEventName.TaskCompleted, "fast-1", makeTokenUsage(10, 5), makeToolUsage()) + }, 10) + + await waitPromise + expect(coordinator.allComplete()).toBe(true) + }) + + it("should mark agent as failed when getCurrentTask() returns undefined during startAll()", async () => { + const agent = createInitialAgentState(makeTask({ id: "no-task" })) + const emitter = new EventEmitter() + ;(emitter as any).getCurrentTask = vi.fn().mockReturnValue(undefined) + + coordinator.registerAgent(agent, emitter as any) + + const failedSpy = vi.fn() + coordinator.on("agentFailed", failedSpy) + + await coordinator.startAll() + + expect(failedSpy).toHaveBeenCalledWith("no-task") + expect(coordinator.getState("no-task")!.status).toBe("failed") + expect(coordinator.completedAgents).toBe(1) + }) + + it("should return false from allComplete() when no agents are registered", () => { + expect(coordinator.allComplete()).toBe(false) + }) + + it("should wait indefinitely when timeoutMs is 0", async () => { + const agent = createInitialAgentState(makeTask({ id: "inf-1" })) + const { provider } = createMockProvider() + + coordinator.registerAgent(agent, provider) + + const waitPromise = coordinator.waitForAll(0) + + // Complete after a small delay — should resolve fine + setTimeout(() => { + provider.emit(RooCodeEventName.TaskCompleted, "inf-1", makeTokenUsage(10, 5), makeToolUsage()) + }, 10) + + await waitPromise + expect(coordinator.allComplete()).toBe(true) + }) +}) + +// ═══════════════════════════════════════════════════════════════════════════ +// 6. WORKTREE MANAGER +// ═══════════════════════════════════════════════════════════════════════════ + +describe("E2E: WorktreeManager", () => { + let manager: MultiWorktreeManager + + beforeEach(() => { + vi.clearAllMocks() + manager = new MultiWorktreeManager("/home/user/project") + }) + + it("should generate branch names using BRANCH_PREFIX constant", () => { + const branch = manager.getBranchName("abc123") + expect(branch).toBe(`${MULTI_ORCHESTRATOR_CONSTANTS.BRANCH_PREFIX}abc123`) + expect(branch).toBe("multi-orch/abc123") + }) + + it("should create worktrees for multiple agents with correct paths and branches", async () => { + const agentIds = ["agent-a", "agent-b", "agent-c"] + + const worktrees = await manager.createWorktrees(agentIds) + + expect(worktrees.size).toBe(3) + + // Verify agent-a + const wtA = worktrees.get("agent-a")! + expect(wtA.agentId).toBe("agent-a") + expect(wtA.branch).toBe("multi-orch/agent-a") + expect(wtA.path).toContain("roo-multi-agent-a") + + // Verify agent-b + const wtB = worktrees.get("agent-b")! + expect(wtB.branch).toBe("multi-orch/agent-b") + expect(wtB.path).toContain("roo-multi-agent-b") + + // Verify agent-c + const wtC = worktrees.get("agent-c")! + expect(wtC.branch).toBe("multi-orch/agent-c") + }) + + it("should use WORKTREE_PREFIX in the directory path", async () => { + await manager.createWorktrees(["test-id"]) + + const wt = manager.getWorktree("test-id")! + expect(wt.path).toContain(MULTI_ORCHESTRATOR_CONSTANTS.WORKTREE_PREFIX) + expect(wt.path).toMatch(/roo-multi-test-id$/) + }) + + it("should place worktree directories as siblings of the workspace", async () => { + // workspace = /home/user/project + // worktree should be /home/user/roo-multi- + await manager.createWorktrees(["sibling"]) + + const wt = manager.getWorktree("sibling")! + expect(wt.path).toBe("/home/user/roo-multi-sibling") + }) + + it("should retrieve individual worktree info via getWorktree", async () => { + await manager.createWorktrees(["x", "y"]) + + expect(manager.getWorktree("x")).toBeDefined() + expect(manager.getWorktree("y")).toBeDefined() + expect(manager.getWorktree("z")).toBeUndefined() + }) + + it("should return all worktrees via getAllWorktrees", async () => { + await manager.createWorktrees(["p", "q", "r"]) + + const all = manager.getAllWorktrees() + expect(all).toHaveLength(3) + + const ids = all.map((w) => w.agentId).sort() + expect(ids).toEqual(["p", "q", "r"]) + }) + + it("should cleanup all worktrees on cleanupWorktrees", async () => { + await manager.createWorktrees(["c1", "c2"]) + expect(manager.getAllWorktrees()).toHaveLength(2) + + await manager.cleanupWorktrees() + expect(manager.getAllWorktrees()).toHaveLength(0) + }) + + it("should throw when WorktreeService.createWorktree fails", async () => { + // This test must come after cleanup to avoid polluting the shared mock. + const { WorktreeService } = await import("@roo-code/core") + const mockCreate = vi.fn().mockResolvedValue({ success: false, message: "Branch already exists" }) + vi.mocked(WorktreeService).mockImplementation( + () => ({ createWorktree: mockCreate, deleteWorktree: vi.fn() }) as any, + ) + + const failManager = new MultiWorktreeManager("/workspace") + + await expect(failManager.createWorktrees(["bad-branch"])).rejects.toThrow("Failed to create worktree") + }) +}) + +// ═══════════════════════════════════════════════════════════════════════════ +// 7. CROSS-MODULE INTEGRATION +// ═══════════════════════════════════════════════════════════════════════════ + +describe("E2E: Cross-module integration", () => { + beforeEach(() => { + vi.clearAllMocks() + agentIdCounter = 0 + }) + + it("should flow from plan generation through agent state creation to report aggregation", async () => { + // Step 1: Generate a plan + const llmResponse = JSON.stringify({ + tasks: [ + { mode: "code", title: "Implement auth", description: "Build JWT auth", assignedFiles: ["src/auth.ts"], priority: 1 }, + { mode: "code", title: "Implement API", description: "Build REST layer", assignedFiles: ["src/api.ts"], priority: 2 }, + ], + requiresMerge: true, + estimatedComplexity: "medium", + }) + mockBuildApiHandler.mockReturnValue({ completePrompt: vi.fn().mockResolvedValue(llmResponse) } as any) + + const plan = await generatePlan("Build auth + API", sampleModes, 4, sampleProvider) + expect(plan).not.toBeNull() + expect(plan!.tasks).toHaveLength(2) + + // Step 2: Create agent states from the plan + const agentStates = plan!.tasks.map((task) => { + const state = createInitialAgentState(task) + // Simulate the orchestrator assigning worktree info + state.worktreeBranch = `multi-orch/${task.id}` + state.worktreePath = `/tmp/roo-multi-${task.id}` + return state + }) + + expect(agentStates[0].taskId).toBe("e2e-001") + expect(agentStates[0].worktreeBranch).toBe("multi-orch/e2e-001") + expect(agentStates[1].taskId).toBe("e2e-002") + + // Step 3: Simulate agents completing + agentStates[0].status = "completed" + agentStates[0].startedAt = 1700000000000 + agentStates[0].completedAt = 1700000025000 + agentStates[0].tokenUsage = { input: 4000, output: 2000 } + agentStates[0].completionReport = "Auth system implemented with JWT." + + agentStates[1].status = "failed" + agentStates[1].startedAt = 1700000000000 + agentStates[1].completedAt = 1700000010000 + agentStates[1].tokenUsage = { input: 1200, output: 300 } + + // Step 4: Simulate merge results + const mergeResults: MergeResult[] = [ + { + agentTaskId: "e2e-001", + branch: "multi-orch/e2e-001", + success: true, + conflictsFound: 0, + conflictsResolved: 0, + filesChanged: ["src/auth.ts", "src/auth.test.ts"], + }, + ] + + // Step 5: Generate the report + const report = aggregateReports(agentStates, mergeResults) + + expect(report).toContain("**2 agents** executed in parallel.") + expect(report).toContain("### ✅ Implement auth (code mode)") + expect(report).toContain("### ❌ Implement API (code mode)") + expect(report).toContain("**Duration:** 25s") + expect(report).toContain("**Report:** Auth system implemented with JWT.") + expect(report).toContain("**Agents:** 1 completed, 1 failed") + expect(report).toContain("**Merges:** 1 succeeded, 0 had conflicts") + }) + + it("should wire coordinator events through to completion tracking", async () => { + // Create two tasks from a plan + const tasks = [ + makeTask({ id: "wire-1", title: "Wire Task A" }), + makeTask({ id: "wire-2", title: "Wire Task B" }), + ] + + const agents = tasks.map(createInitialAgentState) + const coordinator = new AgentCoordinator() + const providers = tasks.map(() => createMockProvider()) + + // Register agents + agents.forEach((agent, i) => coordinator.registerAgent(agent, providers[i].provider)) + + // Start waiting for all + const waitPromise = coordinator.waitForAll() + + // Simulate completions + providers[0].provider.emit( + RooCodeEventName.TaskCompleted, + "wire-1", + makeTokenUsage(500, 250), + makeToolUsage(), + ) + providers[1].provider.emit( + RooCodeEventName.TaskCompleted, + "wire-2", + makeTokenUsage(800, 400), + makeToolUsage(), + ) + + await waitPromise + + // Now aggregate from coordinator's states + const finalStates = coordinator.getStates() + const report = aggregateReports(finalStates, []) + + expect(report).toContain("**2 agents** executed in parallel.") + expect(report).toContain("### ✅ Wire Task A") + expect(report).toContain("### ✅ Wire Task B") + expect(report).toContain("**Tokens:** 500 in / 250 out") + expect(report).toContain("**Tokens:** 800 in / 400 out") + expect(report).toContain("**Agents:** 2 completed, 0 failed") + }) + + it("should validate OrchestratorState through a complete lifecycle", async () => { + // 1. Start idle + const state = createInitialOrchestratorState() + expect(state.phase).toBe("idle") + + // 2. Generate plan + state.phase = "planning" + const llmResponse = JSON.stringify({ + tasks: [ + { mode: "code", title: "Feature A", description: "Build feature A" }, + { mode: "debug", title: "Fix Bug B", description: "Debug and fix B" }, + { mode: "architect", title: "Design C", description: "Architecture for C" }, + ], + requiresMerge: true, + estimatedComplexity: "high", + }) + mockBuildApiHandler.mockReturnValue({ completePrompt: vi.fn().mockResolvedValue(llmResponse) } as any) + const plan = await generatePlan("Complex project", sampleModes, 4, sampleProvider) + state.plan = plan + + expect(state.plan).not.toBeNull() + expect(state.plan!.tasks).toHaveLength(3) + + // 3. Spawn agents + state.phase = "spawning" + state.agents = state.plan!.tasks.map(createInitialAgentState) + expect(state.agents).toHaveLength(3) + expect(state.agents.every((a) => a.status === "pending")).toBe(true) + + // 4. Run agents + state.phase = "running" + const now = Date.now() + state.agents.forEach((a) => { + a.status = "running" + a.startedAt = now + }) + + // Simulate completion + state.agents[0].status = "completed" + state.agents[0].completedAt = now + 20000 + state.agents[0].completionReport = "Feature A done." + state.agents[0].tokenUsage = { input: 3000, output: 1500 } + + state.agents[1].status = "completed" + state.agents[1].completedAt = now + 35000 + state.agents[1].completionReport = "Bug B fixed." + state.agents[1].tokenUsage = { input: 2000, output: 1000 } + + state.agents[2].status = "failed" + state.agents[2].completedAt = now + 8000 + + // 5. Merge + state.phase = "merging" + state.mergeResults = [ + makeMerge({ agentTaskId: state.agents[0].taskId, success: true, filesChanged: ["src/a.ts"] }), + makeMerge({ agentTaskId: state.agents[1].taskId, success: true, filesChanged: ["src/b.ts"] }), + ] + + // 6. Report + state.phase = "reporting" + state.finalReport = aggregateReports(state.agents, state.mergeResults) + + expect(state.finalReport).toContain("**3 agents** executed in parallel.") + expect(state.finalReport).toContain("**Agents:** 2 completed, 1 failed") + expect(state.finalReport).toContain("**Merges:** 2 succeeded, 0 had conflicts") + + // 7. Complete + state.phase = "complete" + expect(state.phase).toBe("complete") + expect(state.finalReport).toBeTruthy() + }) +}) diff --git a/src/core/multi-orchestrator/__tests__/plan-generator.spec.ts b/src/core/multi-orchestrator/__tests__/plan-generator.spec.ts new file mode 100644 index 00000000000..60d8b146409 --- /dev/null +++ b/src/core/multi-orchestrator/__tests__/plan-generator.spec.ts @@ -0,0 +1,314 @@ +import { vi, describe, it, expect, beforeEach } from "vitest" +import type { ModeConfig, ProviderSettings } from "@roo-code/types" + +// Mock the api module so we can control buildApiHandler +vi.mock("../../../api", () => ({ + buildApiHandler: vi.fn(), +})) + +// Mock generateAgentId for deterministic IDs in tests +let agentIdCounter = 0 +vi.mock("../types", async (importOriginal) => { + const actual = await importOriginal() + return { + ...actual, + generateAgentId: () => { + agentIdCounter++ + return `testid-${String(agentIdCounter).padStart(2, "0")}` + }, + } +}) + +import { generatePlan } from "../plan-generator" +import { buildApiHandler } from "../../../api" + +const mockBuildApiHandler = vi.mocked(buildApiHandler) + +const sampleModes: ModeConfig[] = [ + { + slug: "code", + name: "Code", + roleDefinition: "Write code", + description: "Implementation mode", + groups: ["read", "edit"] as any, + }, + { + slug: "architect", + name: "Architect", + roleDefinition: "Design architecture", + description: "Design mode", + groups: ["read"] as any, + }, + { + slug: "multi-orchestrator", + name: "Multi-Orchestrator", + roleDefinition: "Orchestrate", + description: "Parallel orchestration", + groups: [] as any, + }, + { + slug: "orchestrator", + name: "Orchestrator", + roleDefinition: "Orchestrate", + description: "Single orchestration", + groups: [] as any, + }, +] + +const sampleProviderSettings: ProviderSettings = { + apiProvider: "anthropic", + apiModelId: "claude-sonnet-4-20250514", +} + +describe("generatePlan", () => { + beforeEach(() => { + vi.clearAllMocks() + agentIdCounter = 0 + }) + + it("should return null when handler does not support completePrompt", async () => { + mockBuildApiHandler.mockReturnValue({} as any) + + const result = await generatePlan("Build a feature", sampleModes, 3, sampleProviderSettings) + expect(result).toBeNull() + }) + + it("should call completePrompt with a prompt containing the user request", async () => { + const mockCompletePrompt = vi.fn().mockResolvedValue( + JSON.stringify({ + tasks: [ + { + mode: "code", + title: "Write feature", + description: "Implement the feature", + assignedFiles: ["src/feature.ts"], + priority: 1, + }, + ], + requiresMerge: true, + estimatedComplexity: "low", + }), + ) + + mockBuildApiHandler.mockReturnValue({ completePrompt: mockCompletePrompt } as any) + + await generatePlan("Build a feature", sampleModes, 3, sampleProviderSettings) + + expect(mockCompletePrompt).toHaveBeenCalledTimes(1) + const promptArg = mockCompletePrompt.mock.calls[0][0] as string + expect(promptArg).toContain("Build a feature") + expect(promptArg).toContain("Number of agents requested: 3") + }) + + it("should filter out multi-orchestrator, orchestrator, and architect from available modes in prompt", async () => { + const mockCompletePrompt = vi.fn().mockResolvedValue( + JSON.stringify({ + tasks: [{ mode: "code", title: "T", description: "D", assignedFiles: [], priority: 1 }], + requiresMerge: false, + estimatedComplexity: "low", + }), + ) + + mockBuildApiHandler.mockReturnValue({ completePrompt: mockCompletePrompt } as any) + + await generatePlan("Do something", sampleModes, 4, sampleProviderSettings) + + const promptArg = mockCompletePrompt.mock.calls[0][0] as string + expect(promptArg).toContain("- code:") + // architect is now also filtered out per CRITICAL RULES + expect(promptArg).not.toContain("- architect:") + expect(promptArg).not.toContain("- multi-orchestrator:") + expect(promptArg).not.toContain("- orchestrator:") + }) + + it("should parse a valid JSON response into an OrchestratorPlan", async () => { + const validResponse = JSON.stringify({ + tasks: [ + { + mode: "code", + title: "Build API", + description: "Create REST endpoints", + assignedFiles: ["src/api.ts"], + priority: 1, + }, + { + mode: "architect", + title: "Design DB", + description: "Plan the database schema", + assignedFiles: ["docs/schema.md"], + priority: 2, + }, + ], + requiresMerge: true, + estimatedComplexity: "medium", + }) + + mockBuildApiHandler.mockReturnValue({ + completePrompt: vi.fn().mockResolvedValue(validResponse), + } as any) + + const plan = await generatePlan("Build app", sampleModes, 4, sampleProviderSettings) + + expect(plan).not.toBeNull() + expect(plan!.tasks).toHaveLength(2) + expect(plan!.tasks[0].title).toBe("Build API") + expect(plan!.tasks[0].mode).toBe("code") + expect(plan!.tasks[0].description).toBe("Create REST endpoints") + expect(plan!.tasks[0].assignedFiles).toEqual(["src/api.ts"]) + expect(plan!.tasks[0].priority).toBe(1) + expect(plan!.tasks[1].title).toBe("Design DB") + expect(plan!.requiresMerge).toBe(true) + expect(plan!.estimatedComplexity).toBe("medium") + }) + + it("should assign generated IDs to tasks", async () => { + const validResponse = JSON.stringify({ + tasks: [ + { mode: "code", title: "T1", description: "D1", assignedFiles: [], priority: 1 }, + { mode: "code", title: "T2", description: "D2", assignedFiles: [], priority: 2 }, + ], + requiresMerge: true, + estimatedComplexity: "low", + }) + + mockBuildApiHandler.mockReturnValue({ + completePrompt: vi.fn().mockResolvedValue(validResponse), + } as any) + + const plan = await generatePlan("Do tasks", sampleModes, 4, sampleProviderSettings) + + expect(plan!.tasks[0].id).toBe("testid-01") + expect(plan!.tasks[1].id).toBe("testid-02") + }) + + it("should handle JSON wrapped in markdown code fences", async () => { + const wrappedResponse = + "```json\n" + + JSON.stringify({ + tasks: [{ mode: "code", title: "Fenced", description: "D", assignedFiles: [], priority: 1 }], + requiresMerge: false, + estimatedComplexity: "low", + }) + + "\n```" + + mockBuildApiHandler.mockReturnValue({ + completePrompt: vi.fn().mockResolvedValue(wrappedResponse), + } as any) + + const plan = await generatePlan("Fenced task", sampleModes, 2, sampleProviderSettings) + + expect(plan).not.toBeNull() + expect(plan!.tasks[0].title).toBe("Fenced") + }) + + it("should return null for completely invalid JSON", async () => { + mockBuildApiHandler.mockReturnValue({ + completePrompt: vi.fn().mockResolvedValue("this is not json at all {{{"), + } as any) + + const result = await generatePlan("Bad response", sampleModes, 2, sampleProviderSettings) + expect(result).toBeNull() + }) + + it("should return null when response has no tasks array", async () => { + mockBuildApiHandler.mockReturnValue({ + completePrompt: vi.fn().mockResolvedValue(JSON.stringify({ noTasks: true })), + } as any) + + const result = await generatePlan("Missing tasks", sampleModes, 2, sampleProviderSettings) + expect(result).toBeNull() + }) + + it("should return null when tasks is not an array", async () => { + mockBuildApiHandler.mockReturnValue({ + completePrompt: vi.fn().mockResolvedValue(JSON.stringify({ tasks: "not-an-array" })), + } as any) + + const result = await generatePlan("Bad tasks", sampleModes, 2, sampleProviderSettings) + expect(result).toBeNull() + }) + + it("should default requiresMerge based on code mode presence when not provided", async () => { + const responseWithoutMerge = JSON.stringify({ + tasks: [ + { mode: "code", title: "Code task", description: "D", assignedFiles: [], priority: 1 }, + ], + estimatedComplexity: "low", + }) + + mockBuildApiHandler.mockReturnValue({ + completePrompt: vi.fn().mockResolvedValue(responseWithoutMerge), + } as any) + + const plan = await generatePlan("Inferred merge", sampleModes, 2, sampleProviderSettings) + + expect(plan).not.toBeNull() + // requiresMerge should be true since a task has mode "code" + expect(plan!.requiresMerge).toBe(true) + }) + + it("should default requiresMerge to false when no code mode tasks and not provided", async () => { + const responseWithoutMerge = JSON.stringify({ + tasks: [ + { mode: "architect", title: "Design", description: "D", assignedFiles: [], priority: 1 }, + ], + estimatedComplexity: "low", + }) + + mockBuildApiHandler.mockReturnValue({ + completePrompt: vi.fn().mockResolvedValue(responseWithoutMerge), + } as any) + + const plan = await generatePlan("No merge needed", sampleModes, 2, sampleProviderSettings) + + expect(plan).not.toBeNull() + expect(plan!.requiresMerge).toBe(false) + }) + + it("should default estimatedComplexity to 'medium' when not provided", async () => { + const responseWithoutComplexity = JSON.stringify({ + tasks: [{ mode: "code", title: "T", description: "D", assignedFiles: [], priority: 1 }], + requiresMerge: false, + }) + + mockBuildApiHandler.mockReturnValue({ + completePrompt: vi.fn().mockResolvedValue(responseWithoutComplexity), + } as any) + + const plan = await generatePlan("Default complexity", sampleModes, 2, sampleProviderSettings) + + expect(plan!.estimatedComplexity).toBe("medium") + }) + + it("should default task fields when missing from response", async () => { + const responseWithMissing = JSON.stringify({ + tasks: [ + {}, // totally empty task object + ], + requiresMerge: false, + estimatedComplexity: "low", + }) + + mockBuildApiHandler.mockReturnValue({ + completePrompt: vi.fn().mockResolvedValue(responseWithMissing), + } as any) + + const plan = await generatePlan("Sparse task", sampleModes, 2, sampleProviderSettings) + + expect(plan).not.toBeNull() + expect(plan!.tasks[0].mode).toBe("code") + expect(plan!.tasks[0].title).toBe("Task 1") + expect(plan!.tasks[0].description).toBe("") + expect(plan!.tasks[0].assignedFiles).toEqual([]) + expect(plan!.tasks[0].priority).toBe(1) + }) + + it("should return null when completePrompt throws", async () => { + mockBuildApiHandler.mockReturnValue({ + completePrompt: vi.fn().mockRejectedValue(new Error("API failure")), + } as any) + + const result = await generatePlan("Crash test", sampleModes, 2, sampleProviderSettings) + expect(result).toBeNull() + }) +}) diff --git a/src/core/multi-orchestrator/__tests__/report-aggregator.spec.ts b/src/core/multi-orchestrator/__tests__/report-aggregator.spec.ts new file mode 100644 index 00000000000..5ae3fcf52a5 --- /dev/null +++ b/src/core/multi-orchestrator/__tests__/report-aggregator.spec.ts @@ -0,0 +1,217 @@ +import { aggregateReports } from "../report-aggregator" +import type { AgentState, MergeResult } from "../types" + +function makeAgent(overrides: Partial = {}): AgentState { + return { + taskId: "agent-1", + providerId: "p-1", + panelId: "panel-1", + worktreePath: null, + worktreeBranch: null, + mode: "code", + status: "completed", + title: "Test Agent", + completionReport: null, + tokenUsage: null, + startedAt: null, + completedAt: null, + ...overrides, + } +} + +function makeMergeResult(overrides: Partial = {}): MergeResult { + return { + agentTaskId: "agent-1", + branch: "multi-orch/agent-1", + success: true, + conflictsFound: 0, + conflictsResolved: 0, + filesChanged: [], + ...overrides, + } +} + +describe("aggregateReports", () => { + it("should include a header with agent count", () => { + const report = aggregateReports([makeAgent(), makeAgent({ taskId: "agent-2", title: "Agent 2" })], []) + expect(report).toContain("# Multi-Orchestration Report") + expect(report).toContain("**2 agents** executed in parallel.") + }) + + it("should include agent results section", () => { + const report = aggregateReports([makeAgent({ title: "Build API" })], []) + expect(report).toContain("## Agent Results") + expect(report).toContain("Build API") + }) + + it("should show ✅ for completed agents", () => { + const report = aggregateReports([makeAgent({ status: "completed", title: "Task A" })], []) + expect(report).toContain("### ✅ Task A (code mode)") + }) + + it("should show ❌ for failed agents", () => { + const report = aggregateReports([makeAgent({ status: "failed", title: "Task B" })], []) + expect(report).toContain("### ❌ Task B (code mode)") + }) + + it("should calculate duration when startedAt and completedAt are set", () => { + const report = aggregateReports( + [makeAgent({ startedAt: 1000000, completedAt: 1005000 })], + [], + ) + expect(report).toContain("**Duration:** 5s") + }) + + it("should show 'unknown' duration when timestamps are missing", () => { + const report = aggregateReports( + [makeAgent({ startedAt: null, completedAt: null })], + [], + ) + expect(report).toContain("**Duration:** unknown") + }) + + it("should include token usage when present", () => { + const report = aggregateReports( + [makeAgent({ tokenUsage: { input: 1500, output: 800 } })], + [], + ) + expect(report).toContain("**Tokens:** 1500 in / 800 out") + }) + + it("should not include token usage line when null", () => { + const report = aggregateReports( + [makeAgent({ tokenUsage: null })], + [], + ) + expect(report).not.toContain("**Tokens:**") + }) + + it("should include completion report when present", () => { + const report = aggregateReports( + [makeAgent({ completionReport: "All tests pass." })], + [], + ) + expect(report).toContain("**Report:** All tests pass.") + }) + + it("should not include report line when null", () => { + const report = aggregateReports( + [makeAgent({ completionReport: null })], + [], + ) + expect(report).not.toContain("**Report:**") + }) + + describe("with merge results", () => { + it("should include merge results section when results present", () => { + const report = aggregateReports( + [makeAgent()], + [makeMergeResult({ branch: "multi-orch/abc123" })], + ) + expect(report).toContain("## Merge Results") + expect(report).toContain("Branch: multi-orch/abc123") + }) + + it("should show ✅ for successful merges", () => { + const report = aggregateReports( + [makeAgent()], + [makeMergeResult({ success: true, branch: "b1" })], + ) + expect(report).toContain("### ✅ Branch: b1") + }) + + it("should show ⚠️ for failed merges", () => { + const report = aggregateReports( + [makeAgent()], + [makeMergeResult({ success: false, branch: "b2" })], + ) + expect(report).toContain("### ⚠️ Branch: b2") + }) + + it("should include conflict info when conflicts found", () => { + const report = aggregateReports( + [makeAgent()], + [makeMergeResult({ conflictsFound: 3, conflictsResolved: 1 })], + ) + expect(report).toContain("**Conflicts found:** 3") + expect(report).toContain("**Conflicts resolved:** 1") + }) + + it("should not include conflict info when no conflicts", () => { + const report = aggregateReports( + [makeAgent()], + [makeMergeResult({ conflictsFound: 0 })], + ) + expect(report).not.toContain("**Conflicts found:**") + }) + + it("should include files changed count", () => { + const report = aggregateReports( + [makeAgent()], + [makeMergeResult({ filesChanged: ["a.ts", "b.ts", "c.ts"] })], + ) + expect(report).toContain("**Files changed:** 3") + }) + }) + + describe("without merge results", () => { + it("should not include merge results section", () => { + const report = aggregateReports([makeAgent()], []) + expect(report).not.toContain("## Merge Results") + }) + + it("should not include merges line in summary", () => { + const report = aggregateReports([makeAgent()], []) + expect(report).not.toContain("**Merges:**") + }) + }) + + describe("summary section", () => { + it("should include summary with completed and failed counts", () => { + const agents = [ + makeAgent({ taskId: "a1", status: "completed" }), + makeAgent({ taskId: "a2", status: "completed" }), + makeAgent({ taskId: "a3", status: "failed" }), + ] + const report = aggregateReports(agents, []) + expect(report).toContain("## Summary") + expect(report).toContain("**Agents:** 2 completed, 1 failed") + }) + + it("should include merge summary when merges present", () => { + const report = aggregateReports( + [makeAgent()], + [ + makeMergeResult({ agentTaskId: "m1", success: true }), + makeMergeResult({ agentTaskId: "m2", success: false }), + makeMergeResult({ agentTaskId: "m3", success: true }), + ], + ) + expect(report).toContain("**Merges:** 2 succeeded, 1 had conflicts") + }) + + it("should handle all-success scenario", () => { + const agents = [ + makeAgent({ taskId: "a1", status: "completed" }), + makeAgent({ taskId: "a2", status: "completed" }), + ] + const report = aggregateReports(agents, []) + expect(report).toContain("**Agents:** 2 completed, 0 failed") + }) + + it("should handle all-failure scenario", () => { + const agents = [ + makeAgent({ taskId: "a1", status: "failed" }), + makeAgent({ taskId: "a2", status: "failed" }), + ] + const report = aggregateReports(agents, []) + expect(report).toContain("**Agents:** 0 completed, 2 failed") + }) + }) + + it("should handle empty agents array", () => { + const report = aggregateReports([], []) + expect(report).toContain("**0 agents** executed in parallel.") + expect(report).toContain("**Agents:** 0 completed, 0 failed") + }) +}) diff --git a/src/core/multi-orchestrator/__tests__/types.spec.ts b/src/core/multi-orchestrator/__tests__/types.spec.ts new file mode 100644 index 00000000000..e5574cb045d --- /dev/null +++ b/src/core/multi-orchestrator/__tests__/types.spec.ts @@ -0,0 +1,138 @@ +import { + generateAgentId, + createInitialAgentState, + createInitialOrchestratorState, + MULTI_ORCHESTRATOR_CONSTANTS, + type PlannedTask, + type AgentState, + type OrchestratorState, +} from "../types" + +describe("generateAgentId", () => { + it("should return a string", () => { + const id = generateAgentId() + expect(typeof id).toBe("string") + }) + + it("should return an 8-character string", () => { + const id = generateAgentId() + expect(id).toHaveLength(8) + }) + + it("should return unique ids on successive calls", () => { + const ids = new Set(Array.from({ length: 50 }, () => generateAgentId())) + expect(ids.size).toBe(50) + }) + + it("should contain only hex characters and hyphens from UUID", () => { + const id = generateAgentId() + // First 8 chars of a UUID (xxxxxxxx) are hex only + expect(id).toMatch(/^[0-9a-f]{8}$/) + }) +}) + +describe("createInitialAgentState", () => { + const task: PlannedTask = { + id: "test-id-1", + mode: "code", + title: "Implement feature X", + description: "Write the code for feature X", + assignedFiles: ["src/feature-x.ts"], + priority: 1, + } + + it("should set taskId from task.id", () => { + const state = createInitialAgentState(task) + expect(state.taskId).toBe("test-id-1") + }) + + it("should set mode from task.mode", () => { + const state = createInitialAgentState(task) + expect(state.mode).toBe("code") + }) + + it("should set title from task.title", () => { + const state = createInitialAgentState(task) + expect(state.title).toBe("Implement feature X") + }) + + it("should set status to 'pending'", () => { + const state = createInitialAgentState(task) + expect(state.status).toBe("pending") + }) + + it("should set providerId and panelId to empty strings", () => { + const state = createInitialAgentState(task) + expect(state.providerId).toBe("") + expect(state.panelId).toBe("") + }) + + it("should set nullable fields to null", () => { + const state = createInitialAgentState(task) + expect(state.worktreePath).toBeNull() + expect(state.worktreeBranch).toBeNull() + expect(state.completionReport).toBeNull() + expect(state.tokenUsage).toBeNull() + expect(state.startedAt).toBeNull() + expect(state.completedAt).toBeNull() + }) + + it("should return a fresh object each call", () => { + const a = createInitialAgentState(task) + const b = createInitialAgentState(task) + expect(a).not.toBe(b) + expect(a).toEqual(b) + }) +}) + +describe("createInitialOrchestratorState", () => { + it("should return phase 'idle'", () => { + const state = createInitialOrchestratorState() + expect(state.phase).toBe("idle") + }) + + it("should return null plan", () => { + const state = createInitialOrchestratorState() + expect(state.plan).toBeNull() + }) + + it("should return empty agents array", () => { + const state = createInitialOrchestratorState() + expect(state.agents).toEqual([]) + }) + + it("should return empty mergeResults array", () => { + const state = createInitialOrchestratorState() + expect(state.mergeResults).toEqual([]) + }) + + it("should return null finalReport", () => { + const state = createInitialOrchestratorState() + expect(state.finalReport).toBeNull() + }) + + it("should return a fresh object each call", () => { + const a = createInitialOrchestratorState() + const b = createInitialOrchestratorState() + expect(a).not.toBe(b) + expect(a).toEqual(b) + }) +}) + +describe("MULTI_ORCHESTRATOR_CONSTANTS", () => { + it("should have MAX_AGENTS of 6", () => { + expect(MULTI_ORCHESTRATOR_CONSTANTS.MAX_AGENTS).toBe(6) + }) + + it("should have DEFAULT_MAX_AGENTS of 4", () => { + expect(MULTI_ORCHESTRATOR_CONSTANTS.DEFAULT_MAX_AGENTS).toBe(4) + }) + + it("should have WORKTREE_PREFIX 'roo-multi-'", () => { + expect(MULTI_ORCHESTRATOR_CONSTANTS.WORKTREE_PREFIX).toBe("roo-multi-") + }) + + it("should have BRANCH_PREFIX 'multi-orch/'", () => { + expect(MULTI_ORCHESTRATOR_CONSTANTS.BRANCH_PREFIX).toBe("multi-orch/") + }) +}) diff --git a/src/core/multi-orchestrator/agent-coordinator.ts b/src/core/multi-orchestrator/agent-coordinator.ts new file mode 100644 index 00000000000..dd7dd573e2f --- /dev/null +++ b/src/core/multi-orchestrator/agent-coordinator.ts @@ -0,0 +1,264 @@ +// src/core/multi-orchestrator/agent-coordinator.ts +import { EventEmitter } from "events" +import type { ClineProvider } from "../webview/ClineProvider" +import type { AgentState } from "./types" +import type { TokenUsage, ToolUsage } from "@roo-code/types" +import { RooCodeEventName } from "@roo-code/types" + +/** Default timeout for waitForAll(): 10 minutes in milliseconds. */ +const DEFAULT_WAIT_TIMEOUT_MS = 10 * 60 * 1000 + +export interface AgentCoordinatorEvents { + agentCompleted: [taskId: string] + agentFailed: [taskId: string] + allCompleted: [] +} + +export class AgentCoordinator extends EventEmitter { + private agents: Map = new Map() + private providers: Map = new Map() + private completedSet: Set = new Set() + + /** Register an agent and attach event listeners to its provider */ + registerAgent(agent: AgentState, provider: ClineProvider): void { + console.log( + `[AgentCoordinator] registerAgent: taskId=${agent.taskId}, title="${agent.title}", ` + + `getCurrentTask exists=${!!provider.getCurrentTask()}`, + ) + this.agents.set(agent.taskId, agent) + this.providers.set(agent.taskId, provider) + + // Listen for task completion on this provider. + // ClineProvider emits TaskCompleted with (taskId, tokenUsage, toolUsage). + provider.on( + RooCodeEventName.TaskCompleted, + (taskId: string, tokenUsage: TokenUsage, toolUsage: ToolUsage) => { + console.log( + `[AgentCoordinator] TaskCompleted received for agent ${agent.taskId} ` + + `(event taskId=${taskId})`, + ) + // Capture the completion report from the task's messages before aborting. + // The last "completion_result" say message contains the agent's summary. + const currentTask = provider.getCurrentTask() + if (currentTask) { + try { + const messages = currentTask.clineMessages || [] + const completionMsg = [...messages].reverse().find( + (m) => m.say === "completion_result" && m.text, + ) + const agentState = this.agents.get(agent.taskId) + if (agentState && completionMsg?.text) { + agentState.completionReport = completionMsg.text + console.log(`[AgentCoordinator] Captured completion report for agent ${agent.taskId} (${completionMsg.text.length} chars)`) + } + } catch (err) { + console.warn(`[AgentCoordinator] Failed to capture completion report: ${err}`) + } + } + + this.handleAgentFinished(agent.taskId, "completed", tokenUsage) + + // CRITICAL: Abort the task to prevent the while(!abort) loop from + // making another API request after attempt_completion succeeds. + if (currentTask) { + currentTask.abortTask(false).catch(() => {}) + console.log(`[AgentCoordinator] Aborted task for agent ${agent.taskId} to prevent completion loop`) + } + }, + ) + + // ClineProvider emits TaskAborted with (taskId). + provider.on(RooCodeEventName.TaskAborted, (_taskId: string) => { + console.log( + `[AgentCoordinator] TaskAborted received for agent ${agent.taskId} ` + + `(event taskId=${_taskId})`, + ) + this.handleAgentFinished(agent.taskId, "failed") + }) + } + + /** + * Centralized handler for agent completion/failure. + * Guards against duplicate events for the same agent. + */ + private handleAgentFinished( + agentTaskId: string, + status: "completed" | "failed", + tokenUsage?: TokenUsage, + ): void { + console.log( + `[AgentCoordinator] handleAgentFinished: agentTaskId=${agentTaskId}, ` + + `status=${status}, already completed=${this.completedSet.has(agentTaskId)}, ` + + `completedSet size=${this.completedSet.size}/${this.agents.size}`, + ) + // Guard: ignore duplicate events for the same agent + if (this.completedSet.has(agentTaskId)) { + return + } + + const agentState = this.agents.get(agentTaskId) + if (!agentState) { + return + } + + this.completedSet.add(agentTaskId) + agentState.status = status + agentState.completedAt = Date.now() + + if (status === "completed" && tokenUsage) { + agentState.tokenUsage = { + input: tokenUsage.totalTokensIn, + output: tokenUsage.totalTokensOut, + } + } + + this.emit(status === "completed" ? "agentCompleted" : "agentFailed", agentTaskId) + + if (this.allComplete()) { + this.emit("allCompleted") + } + } + + /** + * Start all agents simultaneously. + * Each provider should already have a task created with startTask=false. + * Agents whose provider has no current task are marked as failed immediately + * so waitForAll() never hangs. + * + * BUG-002 fix: Instead of calling start() sequentially inside the loop, + * we collect all start thunks first, then fire them all at the same instant + * so no agent gets a head-start over another. + */ + async startAll(): Promise { + console.log( + `[AgentCoordinator] startAll() — ${this.providers.size} providers registered`, + ) + + const starts: Array<() => void> = [] + + for (const [taskId, provider] of this.providers) { + const currentTask = provider.getCurrentTask() + console.log( + `[AgentCoordinator] startAll() — agent ${taskId}: ` + + `getCurrentTask()=${currentTask ? `Task#${currentTask.taskId}` : "UNDEFINED"}, ` + + `provider.clineStack size=${provider.getTaskStackSize?.() ?? "N/A"}`, + ) + if (!currentTask) { + // Task was never created or was already removed from the stack. + console.error( + `[AgentCoordinator] getCurrentTask() returned undefined for agent ${taskId}. ` + + `The task may not have been created yet or was removed from the stack.`, + ) + this.handleAgentFinished(taskId, "failed") + continue + } + + const agent = this.agents.get(taskId) + if (agent) { + agent.status = "running" + agent.startedAt = Date.now() + } + + starts.push(() => { + try { + currentTask.start() + } catch (err) { + console.error( + `[AgentCoordinator] start() threw for agent ${taskId}: ${ + (err as Error)?.message ?? String(err) + }`, + ) + this.handleAgentFinished(taskId, "failed") + } + }) + } + + // Stagger starts with a 2-second gap between each agent. + // Simultaneous API calls from N agents to the same provider cause rate + // limiting ("Provider ended the request: terminated") which cascades + // into retry loops. A 2s stagger lets each agent's first API request + // complete before the next one fires, avoiding provider throttling. + console.log(`[AgentCoordinator] Staggering ${starts.length} agent starts (2s apart)`) + for (let i = 0; i < starts.length; i++) { + if (i > 0) { + await new Promise((resolve) => setTimeout(resolve, 2000)) + } + console.log(`[AgentCoordinator] Starting agent ${i + 1}/${starts.length}`) + starts[i]() + } + } + + /** Check if all agents have finished (completed or failed) */ + allComplete(): boolean { + // If no agents registered, not "complete" — avoids vacuous-truth bugs + if (this.agents.size === 0) { + return false + } + return this.completedSet.size >= this.agents.size + } + + /** Get current state of all agents */ + getStates(): AgentState[] { + return Array.from(this.agents.values()) + } + + /** Get a specific agent's state */ + getState(taskId: string): AgentState | undefined { + return this.agents.get(taskId) + } + + /** + * Wait for all agents to complete (returns a promise). + * @param timeoutMs Maximum time to wait in ms. Defaults to 10 minutes. + * Pass 0 or Infinity to wait indefinitely. + * @throws Error if the timeout is reached before all agents complete. + */ + waitForAll(timeoutMs: number = DEFAULT_WAIT_TIMEOUT_MS): Promise { + if (this.allComplete()) return Promise.resolve() + + return new Promise((resolve, reject) => { + let timer: ReturnType | undefined + + const cleanup = () => { + if (timer !== undefined) { + clearTimeout(timer) + timer = undefined + } + } + + const onComplete = () => { + cleanup() + resolve() + } + + this.once("allCompleted", onComplete) + + // Set up timeout if a finite positive value is provided + if (timeoutMs > 0 && timeoutMs < Infinity) { + timer = setTimeout(() => { + this.off("allCompleted", onComplete) + const pending = Array.from(this.agents.entries()) + .filter(([id]) => !this.completedSet.has(id)) + .map(([id, agent]) => `${id} (${agent.title})`) + reject( + new Error( + `AgentCoordinator.waitForAll() timed out after ${timeoutMs}ms. ` + + `${this.completedSet.size}/${this.agents.size} agents completed. ` + + `Pending: ${pending.join(", ")}`, + ), + ) + }, timeoutMs) + } + }) + } + + /** Get total agent count */ + get totalAgents(): number { + return this.agents.size + } + + /** Get completed agent count */ + get completedAgents(): number { + return this.completedSet.size + } +} diff --git a/src/core/multi-orchestrator/agent-system-prompt.ts b/src/core/multi-orchestrator/agent-system-prompt.ts new file mode 100644 index 00000000000..e8ea281bb85 --- /dev/null +++ b/src/core/multi-orchestrator/agent-system-prompt.ts @@ -0,0 +1,60 @@ +/** + * System prompt additions for multi-orchestrator spawned agents. + * + * This section is prepended to each agent's system prompt when running + * as part of a multi-orchestrator parallel execution. It provides context + * about the parallel environment and sets expectations for completion behavior. + */ + +export interface AgentPromptContext { + agentTitle: string + agentMode: string + totalAgents: number + otherAgentTitles: string[] + assignedFiles?: string[] + isGitWorktreeIsolated: boolean +} + +/** + * Build the system prompt prefix for a multi-orchestrator spawned agent. + * This is injected BEFORE the mode's role definition. + */ +export function buildAgentSystemPromptPrefix(ctx: AgentPromptContext): string { + const otherAgents = ctx.otherAgentTitles.length > 0 + ? ctx.otherAgentTitles.map((t) => ` - ${t}`).join("\n") + : " (none)" + + const fileGuidance = ctx.assignedFiles && ctx.assignedFiles.length > 0 + ? `\nYou are primarily responsible for these files:\n${ctx.assignedFiles.map((f) => ` - ${f}`).join("\n")}\nAvoid modifying files outside this list unless absolutely necessary.` + : "" + + const isolationNote = ctx.isGitWorktreeIsolated + ? "You are working in an isolated git worktree — your file changes will not affect other agents." + : "WARNING: You are sharing the same working directory with other agents. Be careful not to overwrite files that other agents may be editing." + + return `MULTI-AGENT EXECUTION CONTEXT +============================= +You are operating as one of ${ctx.totalAgents} parallel agents under a Multi-Orchestrator. +Your role: "${ctx.agentTitle}" (${ctx.agentMode} mode) + +Other agents working alongside you: +${otherAgents} + +${isolationNote} +${fileGuidance} + +IMPORTANT INSTRUCTIONS FOR PARALLEL EXECUTION: +1. Focus ONLY on your assigned task. Do not attempt work that belongs to other agents. +2. Be thorough and complete — other agents depend on your output. +3. When you use attempt_completion, provide a DETAILED summary of everything you did: + - Every file you created or modified (with brief description of changes) + - Any decisions you made and why + - Any issues or edge cases you encountered + - What the next steps would be if applicable + This detailed summary will be sent to the Multi-Orchestrator for the final report. +4. Do not ask the user questions — you are running autonomously. +5. Complete your task from start to finish without stopping for feedback. +============================= + +` +} diff --git a/src/core/multi-orchestrator/merge-pipeline.ts b/src/core/multi-orchestrator/merge-pipeline.ts new file mode 100644 index 00000000000..f0c9e691d81 --- /dev/null +++ b/src/core/multi-orchestrator/merge-pipeline.ts @@ -0,0 +1,119 @@ +// src/core/multi-orchestrator/merge-pipeline.ts +import { execSync } from "child_process" +import type { AgentState, MergeResult } from "./types" + +export class MergePipeline { + constructor(private workspacePath: string) {} + + /** + * Merge all agent branches sequentially into the current branch. + * Order: by priority (lower = first). + */ + async mergeAll( + agents: AgentState[], + onProgress: (agentId: string, result: MergeResult) => void, + ): Promise { + const results: MergeResult[] = [] + + // Sort by priority for deterministic merge order + const sorted = [...agents] + .filter((a) => a.worktreeBranch && a.status === "completed") + .sort((a, b) => (a.startedAt || 0) - (b.startedAt || 0)) + + for (const agent of sorted) { + if (!agent.worktreeBranch) continue + + const result = this.mergeBranch(agent.taskId, agent.worktreeBranch) + results.push(result) + onProgress(agent.taskId, result) + } + + return results + } + + /** Merge a single agent's branch into the current branch */ + private mergeBranch(agentTaskId: string, branch: string): MergeResult { + try { + // Get list of files changed on this branch + const filesChanged = this.getFilesChanged(branch) + + // Attempt merge + try { + execSync(`git merge --no-ff "${branch}" -m "Merge multi-orch agent: ${agentTaskId}"`, { + cwd: this.workspacePath, + encoding: "utf-8", + timeout: 30000, + }) + + return { + agentTaskId, + branch, + success: true, + conflictsFound: 0, + conflictsResolved: 0, + filesChanged, + } + } catch (mergeError) { + // Merge conflict — count them + const conflictFiles = this.getConflictFiles() + const conflictsFound = conflictFiles.length + + if (conflictsFound > 0) { + // Abort the merge for now — let the report indicate conflicts + try { + execSync("git merge --abort", { cwd: this.workspacePath, encoding: "utf-8" }) + } catch { + // If abort fails, reset + execSync("git reset --hard HEAD", { cwd: this.workspacePath, encoding: "utf-8" }) + } + } + + return { + agentTaskId, + branch, + success: false, + conflictsFound, + conflictsResolved: 0, + filesChanged, + } + } + } catch (error) { + return { + agentTaskId, + branch, + success: false, + conflictsFound: 0, + conflictsResolved: 0, + filesChanged: [], + } + } + } + + /** Get files changed on a branch compared to current HEAD */ + private getFilesChanged(branch: string): string[] { + try { + const output = execSync(`git diff --name-only HEAD..."${branch}"`, { + cwd: this.workspacePath, + encoding: "utf-8", + timeout: 10000, + }) + return output.trim().split("\n").filter(Boolean) + } catch { + return [] + } + } + + /** Get files with merge conflicts */ + private getConflictFiles(): string[] { + try { + const output = execSync("git diff --name-only --diff-filter=U", { + cwd: this.workspacePath, + encoding: "utf-8", + timeout: 10000, + }) + return output.trim().split("\n").filter(Boolean) + } catch { + return [] + } + } +} diff --git a/src/core/multi-orchestrator/orchestrator.ts b/src/core/multi-orchestrator/orchestrator.ts new file mode 100644 index 00000000000..d9177427de2 --- /dev/null +++ b/src/core/multi-orchestrator/orchestrator.ts @@ -0,0 +1,613 @@ +// src/core/multi-orchestrator/orchestrator.ts +import * as vscode from "vscode" +import type { ProviderSettings, ModeConfig, RooCodeSettings } from "@roo-code/types" +import { PanelSpawner } from "./panel-spawner" +import { MultiWorktreeManager } from "./worktree-manager" +import { generatePlan } from "./plan-generator" +import { AgentCoordinator } from "./agent-coordinator" +import { MergePipeline } from "./merge-pipeline" +import { aggregateReports } from "./report-aggregator" +import { + type OrchestratorState, + type OrchestratorPlan, + type VerificationFinding, + createInitialOrchestratorState, + createInitialAgentState, + MULTI_ORCHESTRATOR_CONSTANTS, +} from "./types" +import { buildAgentSystemPromptPrefix } from "./agent-system-prompt" + +export class MultiOrchestrator { + private state: OrchestratorState = createInitialOrchestratorState() + private panelSpawner: PanelSpawner + private worktreeManager: MultiWorktreeManager | null = null + private coordinator: AgentCoordinator | null = null + private mergePipeline: MergePipeline | null = null + private aborted = false + + constructor( + private context: vscode.ExtensionContext, + private outputChannel: vscode.OutputChannel, + private workspacePath: string, + ) { + this.panelSpawner = new PanelSpawner(context, outputChannel) + } + + /** + * Execute the full multi-orchestration lifecycle. + */ + async execute( + userRequest: string, + maxAgents: number, + providerSettings: ProviderSettings, + availableModes: ModeConfig[], + planReviewEnabled: boolean, + mergeMode: "auto" | "always" | "never", + onStateChange: (state: OrchestratorState) => void, + verifyEnabled: boolean = false, + ): Promise { + this.aborted = false + const notify = () => { + console.log("[MultiOrch:Handler] notify() → phase:", this.state.phase, "agents:", this.state.agents.length) + onStateChange({ ...this.state }) + } + + try { + // PHASE 1: PLAN + this.state.phase = "planning" + notify() + + console.log("[MultiOrch:Handler] execute() entry ──────────────────────") + console.log("[MultiOrch:Handler] userRequest:", JSON.stringify(userRequest).slice(0, 200)) + console.log("[MultiOrch:Handler] maxAgents:", maxAgents, "typeof:", typeof maxAgents) + console.log("[MultiOrch:Handler] providerSettings.apiProvider:", providerSettings.apiProvider) + console.log("[MultiOrch:Handler] providerSettings.apiModelId:", providerSettings.apiModelId) + console.log("[MultiOrch:Handler] providerSettings has apiKey:", !!providerSettings.apiKey) + console.log("[MultiOrch:Handler] availableModes:", availableModes.length, "modes") + console.log("[MultiOrch:Handler] planReviewEnabled:", planReviewEnabled) + console.log("[MultiOrch:Handler] mergeMode:", mergeMode) + console.log("[MultiOrch:Handler] workspacePath:", this.workspacePath) + + const clampedMaxAgents = Math.min( + Math.max(1, maxAgents), + MULTI_ORCHESTRATOR_CONSTANTS.MAX_AGENTS, + ) + console.log("[MultiOrch:Handler] clampedMaxAgents:", clampedMaxAgents, "(MAX_AGENTS constant:", MULTI_ORCHESTRATOR_CONSTANTS.MAX_AGENTS, ")") + + console.log("[MultiOrch:Handler] calling generatePlan() ...") + const plan = await generatePlan(userRequest, availableModes, clampedMaxAgents, providerSettings) + console.log("[MultiOrch:Handler] generatePlan() returned:", plan ? `${plan.tasks.length} tasks` : "null/undefined") + if (plan && plan.tasks.length > 0) { + for (const t of plan.tasks) { + console.log("[MultiOrch:Handler] task:", t.id, "mode:", t.mode, "title:", t.title) + } + } + if (!plan || plan.tasks.length === 0) { + console.log("[MultiOrch:Handler] ⚠ empty plan — setting phase=complete") + this.state.phase = "complete" + this.state.finalReport = "Could not decompose the request into parallel tasks." + notify() + return + } + + this.state.plan = plan + this.state.agents = plan.tasks.map(createInitialAgentState) + notify() + + // If plan review enabled, stop here and wait for approval + if (planReviewEnabled) { + console.log("[MultiOrch:Handler] planReview ON → returning early for user approval") + // The onStateChange callback will trigger UI to show the plan + // The execute() caller should handle the approval flow + return + } + + console.log("[MultiOrch:Handler] planReview OFF → continuing to executeFromPlan()") + await this.executeFromPlan(plan, providerSettings, mergeMode, onStateChange, verifyEnabled) + } catch (error) { + console.error("[MultiOrch:Handler] execute() CAUGHT error:", error) + console.error("[MultiOrch:Handler] error stack:", (error as Error)?.stack ?? "no stack") + this.state.phase = "complete" + this.state.finalReport = `Orchestration failed: ${error}` + notify() + } + } + + /** + * Resume execution after plan approval (called when user approves in plan-review mode). + */ + async executeFromPlan( + plan: OrchestratorPlan, + providerSettings: ProviderSettings, + mergeMode: "auto" | "always" | "never", + onStateChange: (state: OrchestratorState) => void, + verifyEnabled: boolean = false, + ): Promise { + const notify = () => onStateChange({ ...this.state }) + + try { + // PHASE 2: SPAWN + this.state.phase = "spawning" + notify() + + const needsMerge = + mergeMode === "always" || + (mergeMode === "auto" && plan.requiresMerge) || + false + + // Check if we CAN use worktrees (requires git repo) + let canUseWorktrees = false + if (needsMerge) { + this.worktreeManager = new MultiWorktreeManager(this.workspacePath) + canUseWorktrees = await this.worktreeManager.isGitRepo() + + if (!canUseWorktrees) { + console.log("[MultiOrch] No git repo found, skipping worktree isolation") + // Agents will work on the same directory — this is fine if files don't overlap + } + } + + // Only create worktrees if git is available + if (canUseWorktrees && needsMerge) { + const agentIds = plan.tasks.map((t) => t.id) + const worktrees = await this.worktreeManager!.createWorktrees(agentIds) + + // Update agent states with worktree info + for (const agent of this.state.agents) { + const wt = worktrees.get(agent.taskId) + if (wt) { + agent.worktreePath = wt.path + agent.worktreeBranch = wt.branch + } + } + } + + // Open panels + const titles = plan.tasks.map((t) => t.title) + const panels = await this.panelSpawner.spawnPanels(plan.tasks.length, titles) + + if (panels.size === 0) { + throw new Error("No panels were spawned — cannot proceed with orchestration.") + } + + // Build a lookup so we can match tasks to successfully-spawned panels. + // If some panels failed to spawn, the corresponding tasks are marked failed. + const panelEntries = Array.from(panels.entries()) + this.coordinator = new AgentCoordinator() + + // Auto-approval settings so spawned agents don't block on tool approval prompts. + // The user interacts with the orchestrator sidebar — nobody is clicking approve + // in the spawned panels, so every tool operation must be pre-approved. + // + // CRITICAL FIX: These are set as per-provider overrides (NOT via + // setValues/ContextProxy). ContextProxy is a singleton shared by ALL + // providers — any concurrent activity (main sidebar, other panels, mode + // switches) can overwrite values that were set via setValues(), causing + // auto-approval to silently disappear by the time the Task's + // checkAutoApproval() reads provider.getState(). + // + // Per-provider overrides are held in instance memory and merged LAST + // in getState(), so they always win regardless of ContextProxy mutations. + const autoApprovalOverrides: Partial & { multiOrchForceApproveAll: boolean } = { + autoApprovalEnabled: true, + multiOrchForceApproveAll: true, // bypass ALL approval checks unconditionally + alwaysAllowReadOnly: true, + alwaysAllowReadOnlyOutsideWorkspace: true, + alwaysAllowWrite: true, + alwaysAllowWriteOutsideWorkspace: true, + alwaysAllowWriteProtected: true, + alwaysAllowExecute: true, + alwaysAllowMcp: true, + alwaysAllowModeSwitch: true, + alwaysAllowSubtasks: true, + alwaysAllowFollowupQuestions: true, + followupAutoApproveTimeoutMs: 1, + writeDelayMs: 0, + requestDelaySeconds: 0, + // Force PREVENT_FOCUS_DISRUPTION so file edits save directly without + // opening diff views. Diff views fight with the agent's webview panel + // for the same ViewColumn, causing layout chaos. + experiments: { preventFocusDisruption: true }, + } + + // All panels are already spawned. Now create tasks in parallel — + // each task targets a different ClineProvider so there are no + // shared-state conflicts between the concurrent createTask() calls. + const taskPromises = plan.tasks.map(async (task, i) => { + if (this.aborted) return + + const agent = this.state.agents[i] + + // Panel index may not exist if that panel failed to spawn + if (i >= panelEntries.length) { + console.warn(`[MultiOrch] No panel available for task ${task.id} ("${task.title}") — skipping`) + agent.status = "failed" + agent.completionReport = "Panel failed to spawn" + return + } + + const [panelId, spawned] = panelEntries[i] + + agent.providerId = panelId + agent.panelId = panelId + + // Set per-provider auto-approval overrides BEFORE creating the task. + // These persist in provider instance memory and are immune to + // ContextProxy mutations from other providers. + spawned.provider.setAutoApprovalOverrides(autoApprovalOverrides) + + // Point this provider at its worktree directory (if worktrees are in use). + // This must happen BEFORE createTask so the Task's cwd is isolated. + if (agent.worktreePath) { + spawned.provider.setWorkingDirectory(agent.worktreePath) + console.log(`[MultiOrch] Agent ${task.id} cwd set to worktree: ${agent.worktreePath}`) + } + + // Switch provider to the correct mode BEFORE creating the task. + // The Task constructor initializes its mode from provider.getState() + // during initializeTaskMode(), so the mode must already be set. + // (Mirrors the pattern in ClineProvider.delegateParentAndOpenChild) + try { + await spawned.provider.handleModeSwitch(task.mode) + } catch (e) { + console.warn( + `[MultiOrch] handleModeSwitch failed for agent ${task.id} mode '${task.mode}': ${ + (e as Error)?.message ?? String(e) + }`, + ) + } + + // Build the agent's system prompt prefix with parallel execution context + const agentPromptPrefix = buildAgentSystemPromptPrefix({ + agentTitle: task.title, + agentMode: task.mode, + totalAgents: plan.tasks.length, + otherAgentTitles: plan.tasks.filter((t) => t.id !== task.id).map((t) => t.title), + assignedFiles: task.assignedFiles, + isGitWorktreeIsolated: !!agent.worktreePath, + }) + + // Prepend the multi-agent context to the task description + const fullTaskDescription = `${agentPromptPrefix}${task.description}` + + // Create the task WITHOUT passing configuration — auto-approval is + // guaranteed by the per-provider overrides set above. + await spawned.provider.createTask(fullTaskDescription, undefined, undefined, { + startTask: false, + }) + + // Verify auto-approval is active after task creation. + // This catches regressions where createTask() might reset state. + try { + const postCreateState = await spawned.provider.getState() + console.log( + `[MultiOrch] Agent ${task.id} post-createTask auto-approval check: ` + + `autoApprovalEnabled=${postCreateState?.autoApprovalEnabled}, ` + + `alwaysAllowWrite=${postCreateState?.alwaysAllowWrite}, ` + + `alwaysAllowExecute=${postCreateState?.alwaysAllowExecute}, ` + + `alwaysAllowReadOnly=${postCreateState?.alwaysAllowReadOnly}, ` + + `alwaysAllowMcp=${postCreateState?.alwaysAllowMcp}`, + ) + } catch (stateErr) { + console.warn(`[MultiOrch] Could not read back state after createTask: ${stateErr}`) + } + + // Register with coordinator + this.coordinator!.registerAgent(agent, spawned.provider) + }) + await Promise.all(taskPromises) + + notify() + + // PHASE 3: RUN + this.state.phase = "running" + notify() + + // Attach event listeners BEFORE starting so we never miss + // early completions or failures that fire during startAll(). + this.coordinator.on("agentCompleted", () => notify()) + this.coordinator.on("agentFailed", () => notify()) + + // Verify at least one agent was successfully registered + if (this.coordinator.totalAgents === 0) { + throw new Error( + "No agents were registered with the coordinator — " + + "all panels may have failed to spawn or all tasks failed to create.", + ) + } + + // Start agents with staggered 2s delay between each to avoid API rate limiting. + // The stagger prevents all agents from hitting the same provider simultaneously. + await this.coordinator.startAll() + + // Wait for all to complete (with timeout) + await this.coordinator.waitForAll() + + // PHASE 4: MERGE (if needed and worktrees were actually created) + if (canUseWorktrees && needsMerge && mergeMode !== "never") { + this.state.phase = "merging" + notify() + + this.mergePipeline = new MergePipeline(this.workspacePath) + this.state.mergeResults = await this.mergePipeline.mergeAll( + this.state.agents, + (_agentId, _result) => notify(), + ) + } + + // PHASE 5: VERIFY (optional — controlled by multiOrchVerifyEnabled setting) + if (verifyEnabled && !this.aborted) { + await this.executeVerificationPhase(providerSettings, onStateChange) + } + + // PHASE 6: REPORT + this.state.phase = "reporting" + notify() + + this.state.finalReport = aggregateReports( + this.state.agents, + this.state.mergeResults, + this.state.verificationFindings, + ) + + // Cleanup worktrees + if (this.worktreeManager) { + await this.worktreeManager.cleanupWorktrees() + } + + // Close all agent panels — the work is done, reports are collected. + // Delay slightly so the user can see the final state before panels vanish. + setTimeout(async () => { + try { + await this.panelSpawner.closeAllPanels() + console.log("[MultiOrch] All agent panels closed after completion") + } catch (err) { + console.error("[MultiOrch] Failed to close panels:", err) + } + }, 2000) + + this.state.phase = "complete" + notify() + } catch (error) { + this.state.phase = "complete" + this.state.finalReport = `Orchestration failed: ${error}` + onStateChange({ ...this.state }) + } + } + + /** + * Phase 5: VERIFY — spawn a single verification agent in "debug" mode + * to review all files changed by the original agents. + * + * The verification agent receives: + * - All completion reports from the original agents + * - The list of files changed (from merge results or agent reports) + * - A task prompt asking it to check for bugs, inconsistencies, + * missing error handling, and integration issues. + * + * Its findings are stored in `state.verificationFindings` and included + * in the final aggregated report. + */ + private async executeVerificationPhase( + providerSettings: ProviderSettings, + onStateChange: (state: OrchestratorState) => void, + ): Promise { + const notify = () => onStateChange({ ...this.state }) + + this.state.phase = "verifying" + notify() + + console.log("[MultiOrch] Starting verification phase") + + try { + // ── Build context for the verification agent ────────────── + const completionSummaries = this.state.agents + .map((agent) => { + const status = agent.status === "completed" ? "✅ Completed" : "❌ Failed" + const report = agent.completionReport ?? "(no report)" + return `### Agent: ${agent.title} (${agent.mode} mode)\n- Status: ${status}\n- Report:\n${report}` + }) + .join("\n\n") + + // Collect changed files from merge results OR agent reports + const changedFiles = new Set() + for (const mr of this.state.mergeResults) { + for (const f of mr.filesChanged) { + changedFiles.add(f) + } + } + // If no merge results, try to extract file references from completion reports + if (changedFiles.size === 0) { + for (const agent of this.state.agents) { + if (agent.completionReport) { + // Simple heuristic: look for file paths in completion reports + const filePatterns = agent.completionReport.match(/(?:^|\s)([\w./-]+\.\w{1,10})(?:\s|$|,|\))/gm) + if (filePatterns) { + for (const match of filePatterns) { + changedFiles.add(match.trim()) + } + } + } + } + } + + const filesListing = changedFiles.size > 0 + ? `## Files Changed\n${[...changedFiles].map((f) => `- \`${f}\``).join("\n")}` + : "## Files Changed\n(No specific files identified from merge results — review completion reports for details.)" + + const verifyTaskDescription = [ + "# Post-Completion Verification Task", + "", + "You are a verification agent spawned after a parallel multi-agent orchestration.", + "Your job is to review the code changes made by the agents listed below.", + "", + "## Instructions", + "1. Read through the completion reports below to understand what each agent did.", + "2. Review the changed files listed below for:", + " - Bugs or logic errors", + " - Missing error handling", + " - Inconsistencies between what different agents produced", + " - Integration issues (e.g., imports that reference code from another agent's work)", + " - Type errors or missing type definitions", + " - Dead code or unused imports", + "3. Report your findings clearly. For each issue, specify:", + " - The file and approximate location", + " - What the issue is", + " - Suggested fix", + "", + "If everything looks correct, state that the code passes verification.", + "", + "## Agent Completion Reports", + "", + completionSummaries, + "", + filesListing, + ].join("\n") + + // ── Spawn a single verification panel ───────────────────── + const verifyPanels = await this.panelSpawner.spawnPanels(1, ["🔍 Verification"]) + + if (verifyPanels.size === 0) { + console.warn("[MultiOrch] Could not spawn verification panel — skipping verification") + this.state.verificationFindings.push({ + agentTaskId: "verify-0", + findings: "Verification skipped: could not spawn verification panel.", + severity: "warning", + }) + return + } + + const [panelId, spawned] = Array.from(verifyPanels.entries())[0] + + // Apply the same auto-approval overrides + const autoApprovalOverrides: Partial & { multiOrchForceApproveAll: boolean } = { + autoApprovalEnabled: true, + multiOrchForceApproveAll: true, + alwaysAllowReadOnly: true, + alwaysAllowReadOnlyOutsideWorkspace: true, + alwaysAllowWrite: true, + alwaysAllowWriteOutsideWorkspace: true, + alwaysAllowWriteProtected: true, + alwaysAllowExecute: true, + alwaysAllowMcp: true, + alwaysAllowModeSwitch: true, + alwaysAllowSubtasks: true, + alwaysAllowFollowupQuestions: true, + followupAutoApproveTimeoutMs: 1, + writeDelayMs: 0, + requestDelaySeconds: 0, + } + spawned.provider.setAutoApprovalOverrides(autoApprovalOverrides) + + // Switch to "debug" mode (or fall back to "code" if debug doesn't exist) + try { + await spawned.provider.handleModeSwitch("debug") + } catch { + try { + await spawned.provider.handleModeSwitch("code") + } catch (e2) { + console.warn(`[MultiOrch] Verification agent mode switch failed: ${e2}`) + } + } + + // Create the verification task + await spawned.provider.createTask(verifyTaskDescription, undefined, undefined, { + startTask: false, + }) + + // Set up a coordinator for just the verification agent + const verifyCoordinator = new AgentCoordinator() + const verifyAgentState = { + taskId: "verify-0", + providerId: panelId, + panelId: panelId, + worktreePath: null, + worktreeBranch: null, + mode: "debug", + status: "pending" as const, + title: "Verification Agent", + completionReport: null, + tokenUsage: null, + startedAt: null, + completedAt: null, + } + verifyCoordinator.registerAgent(verifyAgentState, spawned.provider) + + // Start and wait + verifyCoordinator.startAll() + console.log("[MultiOrch] Verification agent started — waiting for completion") + + await verifyCoordinator.waitForAll() + + console.log("[MultiOrch] Verification agent completed") + + // Capture findings from the verification agent's completion report + const findings = verifyAgentState.completionReport ?? "No findings reported by verification agent." + const severity = this.classifyFindingSeverity(findings) + + this.state.verificationFindings.push({ + agentTaskId: "verify-0", + findings, + severity, + }) + + // Close the verification panel + try { + await this.panelSpawner.closeAllPanels() + console.log("[MultiOrch] Verification panel closed") + } catch (err) { + console.error("[MultiOrch] Failed to close verification panel:", err) + } + } catch (error) { + console.error("[MultiOrch] Verification phase failed:", error) + this.state.verificationFindings.push({ + agentTaskId: "verify-0", + findings: `Verification phase encountered an error: ${error}`, + severity: "error", + }) + } + } + + /** + * Classify the severity of verification findings based on content heuristics. + */ + private classifyFindingSeverity(findings: string): "info" | "warning" | "error" { + const lower = findings.toLowerCase() + if ( + lower.includes("passes verification") || + lower.includes("no issues") || + lower.includes("looks correct") || + lower.includes("no bugs found") || + lower.includes("everything looks good") + ) { + return "info" + } + if ( + lower.includes("error") || + lower.includes("bug") || + lower.includes("crash") || + lower.includes("type error") || + lower.includes("undefined") || + lower.includes("null reference") + ) { + return "error" + } + return "warning" + } + + /** Abort the current orchestration */ + async abort(): Promise { + this.aborted = true + await this.panelSpawner.closeAllPanels() + if (this.worktreeManager) { + await this.worktreeManager.cleanupWorktrees() + } + this.state.phase = "complete" + this.state.finalReport = "Orchestration aborted by user." + } + + /** Get current state */ + getState(): OrchestratorState { + return { ...this.state } + } +} diff --git a/src/core/multi-orchestrator/panel-spawner.ts b/src/core/multi-orchestrator/panel-spawner.ts new file mode 100644 index 00000000000..7f0b2161b12 --- /dev/null +++ b/src/core/multi-orchestrator/panel-spawner.ts @@ -0,0 +1,210 @@ +// src/core/multi-orchestrator/panel-spawner.ts +import * as vscode from "vscode" +import { ClineProvider } from "../webview/ClineProvider" +import { ContextProxy } from "../config/ContextProxy" + +export interface SpawnedPanel { + id: string + provider: ClineProvider + panel: vscode.WebviewPanel + /** The ViewColumn this panel was placed in (1-indexed) */ + viewColumn: vscode.ViewColumn +} + +export class PanelSpawner { + private panels: Map = new Map() + private savedLayout: unknown = null + + constructor( + private context: vscode.ExtensionContext, + private outputChannel: vscode.OutputChannel, + ) {} + + /** + * Spawn N editor tab panels in equal-width columns. + * + * Uses `vscode.setEditorLayout` to create an N-column layout FIRST, + * then walks focus across editor groups using `focusNextGroup`, + * placing each panel at `ViewColumn.Active`. This avoids relying on + * explicit ViewColumn numbers whose group-index mapping is unreliable + * in VS Code after a programmatic layout change (BUG-003). + */ + async spawnPanels(count: number, titles: string[]): Promise> { + const contextProxy = await ContextProxy.getInstance(this.context) + const errors: Array<{ index: number; title: string; error: Error }> = [] + + // Save the current layout so we can restore it after orchestration + try { + this.savedLayout = await vscode.commands.executeCommand("vscode.getEditorLayout") + console.log("[PanelSpawner] Saved current editor layout") + } catch { + console.warn("[PanelSpawner] Could not save current editor layout") + } + + // Set up an N-column layout with equal widths. + // orientation: 0 = horizontal (columns side by side) + const equalSize = 1 / count + const groups = Array.from({ length: count }, () => ({ size: equalSize })) + + try { + await vscode.commands.executeCommand("vscode.setEditorLayout", { + orientation: 0, + groups, + }) + console.log(`[PanelSpawner] Set editor layout to ${count} equal columns`) + // Wait for VS Code to fully apply the layout before placing panels + await new Promise((resolve) => setTimeout(resolve, 500)) + } catch (err) { + console.warn("[PanelSpawner] Failed to set editor layout:", err) + } + + // Focus the first editor group so panel placement starts at the leftmost column + try { + await vscode.commands.executeCommand("workbench.action.focusFirstEditorGroup") + await new Promise((resolve) => setTimeout(resolve, 100)) + } catch { + console.warn("[PanelSpawner] Could not focus first editor group") + } + + // Walk focus across groups, creating each panel at ViewColumn.Active. + // This guarantees each panel lands in the correct column regardless of + // how VS Code internally indexes its editor groups after setEditorLayout. + for (let i = 0; i < count; i++) { + const id = `agent-${i}` + const title = titles[i] || `Agent ${i + 1}` + + if (i > 0) { + // Move focus to the next editor group (next column) + await vscode.commands.executeCommand("workbench.action.focusNextGroup") + await new Promise((resolve) => setTimeout(resolve, 100)) + } + + const result = await this.spawnSinglePanel(id, title, vscode.ViewColumn.Active, contextProxy) + if (result.error) { + errors.push({ index: i, title, error: result.error }) + } + } + + if (errors.length > 0 && this.panels.size === 0) { + throw new Error( + `[PanelSpawner] Failed to spawn any panels (${errors.length}/${count} failed). ` + + `First error: ${errors[0].error.message}`, + ) + } + + if (errors.length > 0) { + console.warn( + `[PanelSpawner] ${errors.length}/${count} panel(s) failed to spawn: ` + + errors.map((e) => `"${e.title}"`).join(", "), + ) + } + + return new Map(this.panels) + } + + /** + * Spawn a single editor panel with its own ClineProvider. + */ + private async spawnSinglePanel( + id: string, + title: string, + viewColumn: vscode.ViewColumn, + contextProxy: ContextProxy, + ): Promise<{ error: Error | undefined }> { + try { + const provider = new ClineProvider(this.context, this.outputChannel, "editor", contextProxy) + + const panel = vscode.window.createWebviewPanel( + ClineProvider.tabPanelId, + `⚡ ${title}`, + { viewColumn, preserveFocus: true }, + { + enableScripts: true, + retainContextWhenHidden: true, + localResourceRoots: [this.context.extensionUri], + }, + ) + + // CRITICAL: Read the ACTUAL ViewColumn that VS Code assigned to this panel. + // The input `viewColumn` may be a symbolic value like ViewColumn.Active (-1) + // which VS Code resolves internally. The panel.viewColumn gives us the real + // column number (1, 2, 3...) which we need for targeting file operations. + const actualViewColumn = panel.viewColumn ?? viewColumn + provider.viewColumn = actualViewColumn + console.log(`[PanelSpawner] Panel "${title}" placed at ViewColumn ${actualViewColumn} (requested: ${viewColumn})`) + + // Also update viewColumn if the panel moves to a different column + panel.onDidChangeViewState((e) => { + if (e.webviewPanel.viewColumn !== undefined) { + provider.viewColumn = e.webviewPanel.viewColumn + } + }) + + await provider.resolveWebviewView(panel) + + panel.onDidDispose(() => { + this.panels.delete(id) + }) + + this.panels.set(id, { id, provider, panel, viewColumn: actualViewColumn }) + return { error: undefined } + } catch (error) { + const err = error instanceof Error ? error : new Error(String(error)) + console.error(`[PanelSpawner] Failed to spawn panel ${id} ("${title}"): ${err.message}`) + return { error: err } + } + } + + /** + * Close a specific panel and dispose its provider. + */ + async closePanel(id: string): Promise { + const spawned = this.panels.get(id) + if (!spawned) return + + this.panels.delete(id) + + try { + await spawned.provider.dispose() + } catch (error) { + console.error(`[PanelSpawner] Error disposing provider for ${id}:`, error) + } + + try { + spawned.panel.dispose() + } catch (error) { + console.error(`[PanelSpawner] Error disposing panel for ${id}:`, error) + } + } + + /** + * Close all panels and restore the original editor layout. + */ + async closeAllPanels(): Promise { + const ids = [...this.panels.keys()] + for (const id of ids) { + await this.closePanel(id) + } + + // Restore the editor layout that was active before orchestration + if (this.savedLayout) { + try { + await vscode.commands.executeCommand("vscode.setEditorLayout", this.savedLayout) + console.log("[PanelSpawner] Restored original editor layout") + } catch { + console.warn("[PanelSpawner] Could not restore original editor layout") + } + this.savedLayout = null + } + } + + /** Get all active spawned panels */ + getPanels(): Map { + return new Map(this.panels) + } + + /** Get a specific provider by ID */ + getProvider(id: string): ClineProvider | undefined { + return this.panels.get(id)?.provider + } +} diff --git a/src/core/multi-orchestrator/plan-generator.ts b/src/core/multi-orchestrator/plan-generator.ts new file mode 100644 index 00000000000..699037b0ee1 --- /dev/null +++ b/src/core/multi-orchestrator/plan-generator.ts @@ -0,0 +1,246 @@ +// src/core/multi-orchestrator/plan-generator.ts +import type { ProviderSettings, ModeConfig } from "@roo-code/types" +import { buildApiHandler, type SingleCompletionHandler } from "../../api" +import type { OrchestratorPlan, PlannedTask } from "./types" +import { generateAgentId } from "./types" + +const PLAN_SYSTEM_PROMPT = `You are a task decomposition engine. Given a user request, break it into independent parallel tasks that can be executed by separate agents simultaneously. + +IMPORTANT: You are powering a MULTI-AGENT system. The whole point is to split work across multiple agents working in parallel. When the request is non-trivial, you SHOULD create multiple tasks. A single task defeats the purpose of multi-agent orchestration. + +For each task: +- Assign the most appropriate mode from the available modes list +- Write a clear, self-contained task description that an agent can execute independently +- List expected files the agent will touch (for merge conflict prevention) +- Ensure tasks are as independent as possible — minimize file overlap + +TASK COUNT GUIDELINES: +- Trivial single-file tasks (e.g., "make a calculator"): 1 task is acceptable +- Small multi-file tasks (e.g., "add a login page"): 2 tasks +- Medium features (e.g., "build user auth with tests"): 3-4 tasks +- Large multi-module features: up to the max agent count +- If the max agent count is > 1 and the request involves multiple files or concerns, you SHOULD use multiple tasks +- NEVER create a separate task for documentation unless explicitly requested +- NEVER create separate tasks for HTML, CSS, and JS of the same component — that's ONE task +- Each task should produce a COMPLETE, working piece of functionality + +CRITICAL RULES: +- Do NOT assign "architect" mode as a parallel task. Architecture decisions should be embedded in the task descriptions themselves. +- The orchestrator has already analyzed the request — each code task should include the architectural context it needs. +- Only use these modes for parallel tasks: "code" (implementation), "ask" (research), "debug" (fixing) +- For truly trivial tasks (like "make a calculator"), a single "code" agent is fine. +- But for anything involving multiple files, modules, or separable concerns, USE MULTIPLE TASKS. +- Never create more tasks than the specified max agent count. + +You MUST respond with valid JSON only. No markdown code fences. No explanation text. Just the JSON object. + +Response format: +{ + "tasks": [ + { + "mode": "", + "title": "", + "description": "", + "assignedFiles": [""], + "priority": <1-N> + } + ], + "requiresMerge": , + "estimatedComplexity": "" +}` + +export async function generatePlan( + userRequest: string, + availableModes: ModeConfig[], + maxAgents: number, + providerSettings: ProviderSettings, +): Promise { + try { + console.log("[MultiOrch:Plan] ========== PLAN GENERATION START ==========") + console.log("[MultiOrch:Plan] generatePlan called with maxAgents:", maxAgents) + console.log("[MultiOrch:Plan] userRequest:", userRequest) + + const handler = buildApiHandler(providerSettings) + + if (!("completePrompt" in handler)) { + console.error("[MultiOrch:Plan] Handler does not support completePrompt — provider type:", providerSettings?.apiProvider ?? "unknown") + return null + } + + const modeList = availableModes + .filter((m) => !["multi-orchestrator", "orchestrator", "architect"].includes(m.slug)) + .map((m) => `- ${m.slug}: ${m.description || m.name}`) + .join("\n") + + console.log("[MultiOrch:Plan] Available modes for plan:\n", modeList) + + const prompt = `Available modes:\n${modeList}\n\nNumber of agents requested: ${maxAgents}. You MUST create EXACTLY ${maxAgents} tasks. The user has explicitly chosen this number. Split the work across exactly ${maxAgents} independent tasks, each handling a different aspect of the request. If the request seems simple, create ${maxAgents} tasks that each handle a different angle (e.g., implementation, testing, documentation, error handling, edge cases, refactoring).\n\nUser request:\n${userRequest}` + + const fullPrompt = `${PLAN_SYSTEM_PROMPT}\n\n${prompt}` + console.log(`[MultiOrch:Plan] Sending prompt (${fullPrompt.length} chars)`) + console.log("[MultiOrch:Plan] === FULL PROMPT START ===") + console.log(fullPrompt) + console.log("[MultiOrch:Plan] === FULL PROMPT END ===") + + const response = await (handler as unknown as SingleCompletionHandler).completePrompt(fullPrompt) + + // Null/empty response check + if (!response || response.trim().length === 0) { + console.error("[MultiOrch:Plan] ❌ completePrompt returned null/empty response!") + console.error("[MultiOrch:Plan] response value:", JSON.stringify(response)) + return null + } + + console.log(`[MultiOrch:Plan] Raw response (${response.length} chars):`) + console.log(`[MultiOrch:Plan] Raw response: ${response.substring(0, 500)}`) + if (response.length > 500) { + console.log(`[MultiOrch:Plan] ... (${response.length - 500} more chars)`) + console.log(`[MultiOrch:Plan] Raw response tail: ...${response.substring(response.length - 200)}`) + } + + const plan = parsePlanResponse(response, maxAgents) + + if (!plan) { + console.error("[MultiOrch:Plan] ❌ parsePlanResponse returned null — could not parse LLM response") + return null + } + + console.log(`[MultiOrch:Plan] Parsed ${plan.tasks.length} tasks`) + for (const task of plan.tasks) { + console.log(`[MultiOrch:Plan] Task "${task.title}" [mode=${task.mode}, priority=${task.priority}, files=${(task.assignedFiles ?? []).join(",")}]`) + } + console.log(`[MultiOrch:Plan] requiresMerge=${plan.requiresMerge}, estimatedComplexity=${plan.estimatedComplexity}`) + + // Warn if LLM returned only 1 task but we requested multiple + if (plan.tasks.length === 1 && maxAgents > 1) { + console.warn("[MultiOrch:Plan] ⚠️ WARNING: LLM returned only 1 task but maxAgents=" + maxAgents + ". The prompt may not be eliciting multi-task plans. Review system prompt or user request complexity.") + } + + console.log("[MultiOrch:Plan] ========== PLAN GENERATION END (returning", plan.tasks.length, "tasks) ==========") + return plan + } catch (error) { + console.error("[MultiOrch:Plan] ❌ Plan generation failed:", error) + return null + } +} + +function parsePlanResponse(response: string, maxAgents: number): OrchestratorPlan | null { + try { + // Step 1: Strip markdown code fences if present (handles ```json, ```, triple backticks with language tags) + let cleaned = response.trim() + + // Handle various markdown fence patterns + const fencePatterns = [ + /^```(?:json)?\s*\n?([\s\S]*?)\n?\s*```$/, // ```json ... ``` or ``` ... ``` + /^`{3,}(?:json)?\s*\n?([\s\S]*?)\n?\s*`{3,}$/, // variable-length fences + ] + + for (const pattern of fencePatterns) { + const match = cleaned.match(pattern) + if (match) { + console.log("[MultiOrch:Plan] Stripped markdown code fence from response") + cleaned = match[1].trim() + break + } + } + + // Step 2: If response starts with text before JSON, try to extract the JSON object + if (!cleaned.startsWith("{") && !cleaned.startsWith("[")) { + const jsonStart = cleaned.indexOf("{") + if (jsonStart !== -1) { + console.log("[MultiOrch:Plan] Response had leading text before JSON, extracting from index", jsonStart) + cleaned = cleaned.substring(jsonStart) + } + } + + // Step 3: If there's trailing text after the JSON, try to extract just the JSON + // Find the matching closing brace + if (cleaned.startsWith("{")) { + let braceDepth = 0 + let jsonEnd = -1 + for (let i = 0; i < cleaned.length; i++) { + if (cleaned[i] === "{") braceDepth++ + else if (cleaned[i] === "}") { + braceDepth-- + if (braceDepth === 0) { + jsonEnd = i + 1 + break + } + } + } + if (jsonEnd !== -1 && jsonEnd < cleaned.length) { + console.log("[MultiOrch:Plan] Response had trailing text after JSON, trimming at index", jsonEnd) + cleaned = cleaned.substring(0, jsonEnd) + } + } + + console.log("[MultiOrch:Plan] Cleaned response for parsing:", cleaned.substring(0, 300)) + + let parsed: Record + + try { + parsed = JSON.parse(cleaned) + } catch (jsonError) { + console.error("[MultiOrch:Plan] ❌ JSON.parse failed:", (jsonError as Error).message) + console.error("[MultiOrch:Plan] Attempted to parse:", cleaned.substring(0, 500)) + return null + } + + // Step 4: Handle edge case — LLM returns a single task object instead of {tasks: [...]} + if (!parsed.tasks && parsed.mode && parsed.title) { + console.warn("[MultiOrch:Plan] ⚠️ LLM returned a single task object instead of {tasks: [...]}, wrapping it") + parsed = { + tasks: [parsed], + requiresMerge: parsed.mode === "code", + estimatedComplexity: "low", + } + } + + // Step 5: Handle edge case — LLM returns an array directly instead of {tasks: [...]} + if (Array.isArray(parsed)) { + console.warn("[MultiOrch:Plan] ⚠️ LLM returned a bare array instead of {tasks: [...]}, wrapping it") + parsed = { + tasks: parsed, + requiresMerge: (parsed as unknown as Record[]).some((t) => t.mode === "code"), + estimatedComplexity: "medium", + } + } + + if (!parsed.tasks || !Array.isArray(parsed.tasks)) { + console.error("[MultiOrch:Plan] ❌ parsed.tasks is missing or not an array. Keys found:", Object.keys(parsed)) + return null + } + + if ((parsed.tasks as unknown[]).length === 0) { + console.error("[MultiOrch:Plan] ❌ parsed.tasks is an empty array") + return null + } + + let tasks: PlannedTask[] = (parsed.tasks as Record[]).map((t: Record, i: number) => ({ + id: generateAgentId(), + mode: (t.mode as string) || "code", + title: (t.title as string) || `Task ${i + 1}`, + description: (t.description as string) || "", + assignedFiles: (t.assignedFiles as string[]) || [], + priority: (t.priority as number) || i + 1, + })) + + console.log("[MultiOrch:Plan] Successfully mapped", tasks.length, "tasks from parsed JSON") + + // Hard-enforce the agent limit + if (tasks.length > maxAgents) { + console.log("[MultiOrch:Plan] Clamping task count from", tasks.length, "to maxAgents=", maxAgents) + tasks = tasks.slice(0, maxAgents) + } + + return { + tasks, + requiresMerge: (parsed.requiresMerge as boolean) ?? tasks.some((t) => t.mode === "code"), + estimatedComplexity: (parsed.estimatedComplexity as string as "low" | "medium" | "high") || "medium", + } + } catch (error) { + console.error("[MultiOrch:Plan] ❌ Failed to parse plan response:", error) + console.error("[MultiOrch:Plan] Raw response was:", response?.substring(0, 500)) + return null + } +} diff --git a/src/core/multi-orchestrator/report-aggregator.ts b/src/core/multi-orchestrator/report-aggregator.ts new file mode 100644 index 00000000000..b477f2cb348 --- /dev/null +++ b/src/core/multi-orchestrator/report-aggregator.ts @@ -0,0 +1,89 @@ +// src/core/multi-orchestrator/report-aggregator.ts +import type { AgentState, MergeResult, VerificationFinding } from "./types" + +/** + * Aggregate all agent reports, merge results, and verification findings + * into a unified markdown summary. + */ +export function aggregateReports( + agents: AgentState[], + mergeResults: MergeResult[], + verificationFindings: VerificationFinding[] = [], +): string { + const sections: string[] = [] + + // Header + sections.push(`# Multi-Orchestration Report`) + sections.push(`**${agents.length} agents** executed in parallel.\n`) + + // Agent summaries + sections.push(`## Agent Results\n`) + for (const agent of agents) { + const status = agent.status === "completed" ? "✅" : "❌" + const duration = + agent.startedAt && agent.completedAt + ? `${Math.round((agent.completedAt - agent.startedAt) / 1000)}s` + : "unknown" + + sections.push(`### ${status} ${agent.title} (${agent.mode} mode)`) + sections.push(`- **Status:** ${agent.status}`) + sections.push(`- **Duration:** ${duration}`) + if (agent.tokenUsage) { + sections.push(`- **Tokens:** ${agent.tokenUsage.input} in / ${agent.tokenUsage.output} out`) + } + if (agent.completionReport) { + sections.push(`- **Report:** ${agent.completionReport}`) + } + sections.push("") + } + + // Merge results (if any) + if (mergeResults.length > 0) { + sections.push(`## Merge Results\n`) + for (const result of mergeResults) { + const status = result.success ? "✅" : "⚠️" + sections.push(`### ${status} Branch: ${result.branch}`) + sections.push(`- **Success:** ${result.success}`) + sections.push(`- **Files changed:** ${result.filesChanged.length}`) + if (result.conflictsFound > 0) { + sections.push(`- **Conflicts found:** ${result.conflictsFound}`) + sections.push(`- **Conflicts resolved:** ${result.conflictsResolved}`) + } + sections.push("") + } + } + + // Verification findings (if any) + if (verificationFindings.length > 0) { + sections.push(`## Verification Results\n`) + for (const finding of verificationFindings) { + const severityIcon = + finding.severity === "error" ? "🔴" : + finding.severity === "warning" ? "🟡" : + "🟢" + sections.push(`### ${severityIcon} Verification (${finding.severity})`) + sections.push(finding.findings) + sections.push("") + } + } + + // Summary stats + const completed = agents.filter((a) => a.status === "completed").length + const failed = agents.filter((a) => a.status === "failed").length + const mergeSuccesses = mergeResults.filter((r) => r.success).length + const mergeFailures = mergeResults.filter((r) => !r.success).length + + sections.push(`## Summary`) + sections.push(`- **Agents:** ${completed} completed, ${failed} failed`) + if (mergeResults.length > 0) { + sections.push(`- **Merges:** ${mergeSuccesses} succeeded, ${mergeFailures} had conflicts`) + } + if (verificationFindings.length > 0) { + const hasErrors = verificationFindings.some((f) => f.severity === "error") + const hasWarnings = verificationFindings.some((f) => f.severity === "warning") + const verifyStatus = hasErrors ? "⚠️ Issues found" : hasWarnings ? "🟡 Minor concerns" : "✅ Passed" + sections.push(`- **Verification:** ${verifyStatus}`) + } + + return sections.join("\n") +} diff --git a/src/core/multi-orchestrator/types.ts b/src/core/multi-orchestrator/types.ts new file mode 100644 index 00000000000..438773cac55 --- /dev/null +++ b/src/core/multi-orchestrator/types.ts @@ -0,0 +1,96 @@ +import * as crypto from "crypto" + +export interface OrchestratorPlan { + tasks: PlannedTask[] + requiresMerge: boolean + estimatedComplexity: "low" | "medium" | "high" +} + +export interface PlannedTask { + id: string + mode: string + title: string + description: string + assignedFiles?: string[] + priority: number +} + +export type AgentStatus = "pending" | "running" | "completed" | "failed" | "merging" + +export interface AgentState { + taskId: string + providerId: string + panelId: string + worktreePath: string | null + worktreeBranch: string | null + mode: string + status: AgentStatus + title: string + completionReport: string | null + tokenUsage: { input: number; output: number } | null + startedAt: number | null + completedAt: number | null +} + +export interface MergeResult { + agentTaskId: string + branch: string + success: boolean + conflictsFound: number + conflictsResolved: number + filesChanged: string[] +} + +export interface VerificationFinding { + agentTaskId: string + findings: string + severity: "info" | "warning" | "error" +} + +export interface OrchestratorState { + phase: "idle" | "planning" | "spawning" | "running" | "merging" | "verifying" | "reporting" | "complete" + plan: OrchestratorPlan | null + agents: AgentState[] + mergeResults: MergeResult[] + verificationFindings: VerificationFinding[] + finalReport: string | null +} + +export const MULTI_ORCHESTRATOR_CONSTANTS = { + MAX_AGENTS: 6, + DEFAULT_MAX_AGENTS: 4, + WORKTREE_PREFIX: "roo-multi-", + BRANCH_PREFIX: "multi-orch/", +} as const + +export function generateAgentId(): string { + return crypto.randomUUID().slice(0, 8) +} + +export function createInitialAgentState(task: PlannedTask): AgentState { + return { + taskId: task.id, + providerId: "", + panelId: "", + worktreePath: null, + worktreeBranch: null, + mode: task.mode, + status: "pending", + title: task.title, + completionReport: null, + tokenUsage: null, + startedAt: null, + completedAt: null, + } +} + +export function createInitialOrchestratorState(): OrchestratorState { + return { + phase: "idle", + plan: null, + agents: [], + mergeResults: [], + verificationFindings: [], + finalReport: null, + } +} diff --git a/src/core/multi-orchestrator/worktree-manager.ts b/src/core/multi-orchestrator/worktree-manager.ts new file mode 100644 index 00000000000..64ded238f9b --- /dev/null +++ b/src/core/multi-orchestrator/worktree-manager.ts @@ -0,0 +1,73 @@ +import { WorktreeService } from "@roo-code/core" +import { MULTI_ORCHESTRATOR_CONSTANTS } from "./types" +import * as path from "path" + +export interface WorktreeInfo { + agentId: string + path: string + branch: string +} + +export class MultiWorktreeManager { + private worktreeService: WorktreeService + private worktrees: Map = new Map() + + constructor(private workspacePath: string) { + this.worktreeService = new WorktreeService() + } + + /** + * Create a git worktree for each agent. + * Each gets its own branch from current HEAD and its own directory. + */ + async createWorktrees(agentIds: string[]): Promise> { + for (const agentId of agentIds) { + const branch = `${MULTI_ORCHESTRATOR_CONSTANTS.BRANCH_PREFIX}${agentId}` + const worktreePath = path.join( + path.dirname(this.workspacePath), + `${MULTI_ORCHESTRATOR_CONSTANTS.WORKTREE_PREFIX}${agentId}`, + ) + + const result = await this.worktreeService.createWorktree(this.workspacePath, { + path: worktreePath, + branch, + createNewBranch: true, + }) + + if (!result.success) { + throw new Error(`Failed to create worktree for agent ${agentId}: ${result.message}`) + } + + this.worktrees.set(agentId, { agentId, path: worktreePath, branch }) + } + + return new Map(this.worktrees) + } + + /** Get worktree info for a specific agent */ + getWorktree(agentId: string): WorktreeInfo | undefined { + return this.worktrees.get(agentId) + } + + /** Get all worktrees */ + getAllWorktrees(): WorktreeInfo[] { + return Array.from(this.worktrees.values()) + } + + /** Clean up all worktrees created by this orchestration */ + async cleanupWorktrees(): Promise { + for (const [agentId, info] of this.worktrees) { + try { + await this.worktreeService.deleteWorktree(this.workspacePath, info.path, true) + } catch (error) { + console.error(`[MultiOrch] Failed to cleanup worktree for ${agentId}:`, error) + } + } + this.worktrees.clear() + } + + /** Get the branch name for an agent */ + getBranchName(agentId: string): string { + return `${MULTI_ORCHESTRATOR_CONSTANTS.BRANCH_PREFIX}${agentId}` + } +} diff --git a/src/core/prompts/__tests__/__snapshots__/add-custom-instructions/architect-mode-prompt.snap b/src/core/prompts/__tests__/__snapshots__/add-custom-instructions/architect-mode-prompt.snap index 5bed6df09d1..e66ba3f3f93 100644 --- a/src/core/prompts/__tests__/__snapshots__/add-custom-instructions/architect-mode-prompt.snap +++ b/src/core/prompts/__tests__/__snapshots__/add-custom-instructions/architect-mode-prompt.snap @@ -17,7 +17,6 @@ You have access to a set of tools that are executed upon the user's approval. Us 1. Assess what information you already have and what information you need to proceed with the task. 2. Choose the most appropriate tool based on the task and the tool descriptions provided. Assess if you need additional information to proceed, and which of the available tools would be most effective for gathering this information. For example using the list_files tool is more effective than running a command like `ls` in the terminal. It's critical that you think about each available tool and use the one that best fits the current step in the task. 3. If multiple actions are needed, you may use multiple tools in a single message when appropriate, or use tools iteratively across messages. Each tool use should be informed by the results of previous tool uses. Do not assume the outcome of any tool use. Each step must be informed by the previous step's result. - By carefully considering the user's response after tool executions, you can react accordingly and make informed decisions about how to proceed with the task. This iterative process helps ensure the overall success and accuracy of your work. ==== diff --git a/src/core/prompts/__tests__/__snapshots__/add-custom-instructions/ask-mode-prompt.snap b/src/core/prompts/__tests__/__snapshots__/add-custom-instructions/ask-mode-prompt.snap index 243dfc19b7b..55327b4d9a2 100644 --- a/src/core/prompts/__tests__/__snapshots__/add-custom-instructions/ask-mode-prompt.snap +++ b/src/core/prompts/__tests__/__snapshots__/add-custom-instructions/ask-mode-prompt.snap @@ -17,7 +17,6 @@ You have access to a set of tools that are executed upon the user's approval. Us 1. Assess what information you already have and what information you need to proceed with the task. 2. Choose the most appropriate tool based on the task and the tool descriptions provided. Assess if you need additional information to proceed, and which of the available tools would be most effective for gathering this information. For example using the list_files tool is more effective than running a command like `ls` in the terminal. It's critical that you think about each available tool and use the one that best fits the current step in the task. 3. If multiple actions are needed, you may use multiple tools in a single message when appropriate, or use tools iteratively across messages. Each tool use should be informed by the results of previous tool uses. Do not assume the outcome of any tool use. Each step must be informed by the previous step's result. - By carefully considering the user's response after tool executions, you can react accordingly and make informed decisions about how to proceed with the task. This iterative process helps ensure the overall success and accuracy of your work. ==== diff --git a/src/core/prompts/__tests__/__snapshots__/add-custom-instructions/mcp-server-creation-disabled.snap b/src/core/prompts/__tests__/__snapshots__/add-custom-instructions/mcp-server-creation-disabled.snap index 5bed6df09d1..e66ba3f3f93 100644 --- a/src/core/prompts/__tests__/__snapshots__/add-custom-instructions/mcp-server-creation-disabled.snap +++ b/src/core/prompts/__tests__/__snapshots__/add-custom-instructions/mcp-server-creation-disabled.snap @@ -17,7 +17,6 @@ You have access to a set of tools that are executed upon the user's approval. Us 1. Assess what information you already have and what information you need to proceed with the task. 2. Choose the most appropriate tool based on the task and the tool descriptions provided. Assess if you need additional information to proceed, and which of the available tools would be most effective for gathering this information. For example using the list_files tool is more effective than running a command like `ls` in the terminal. It's critical that you think about each available tool and use the one that best fits the current step in the task. 3. If multiple actions are needed, you may use multiple tools in a single message when appropriate, or use tools iteratively across messages. Each tool use should be informed by the results of previous tool uses. Do not assume the outcome of any tool use. Each step must be informed by the previous step's result. - By carefully considering the user's response after tool executions, you can react accordingly and make informed decisions about how to proceed with the task. This iterative process helps ensure the overall success and accuracy of your work. ==== diff --git a/src/core/prompts/__tests__/__snapshots__/system-prompt/consistent-system-prompt.snap b/src/core/prompts/__tests__/__snapshots__/system-prompt/consistent-system-prompt.snap index 42e8bba9c68..a5ac88cc718 100644 --- a/src/core/prompts/__tests__/__snapshots__/system-prompt/consistent-system-prompt.snap +++ b/src/core/prompts/__tests__/__snapshots__/system-prompt/consistent-system-prompt.snap @@ -17,7 +17,6 @@ You have access to a set of tools that are executed upon the user's approval. Us 1. Assess what information you already have and what information you need to proceed with the task. 2. Choose the most appropriate tool based on the task and the tool descriptions provided. Assess if you need additional information to proceed, and which of the available tools would be most effective for gathering this information. For example using the list_files tool is more effective than running a command like `ls` in the terminal. It's critical that you think about each available tool and use the one that best fits the current step in the task. 3. If multiple actions are needed, you may use multiple tools in a single message when appropriate, or use tools iteratively across messages. Each tool use should be informed by the results of previous tool uses. Do not assume the outcome of any tool use. Each step must be informed by the previous step's result. - By carefully considering the user's response after tool executions, you can react accordingly and make informed decisions about how to proceed with the task. This iterative process helps ensure the overall success and accuracy of your work. ==== diff --git a/src/core/prompts/__tests__/__snapshots__/system-prompt/with-mcp-hub-provided.snap b/src/core/prompts/__tests__/__snapshots__/system-prompt/with-mcp-hub-provided.snap index 5aa6677ab03..cf55a09d8bf 100644 --- a/src/core/prompts/__tests__/__snapshots__/system-prompt/with-mcp-hub-provided.snap +++ b/src/core/prompts/__tests__/__snapshots__/system-prompt/with-mcp-hub-provided.snap @@ -17,7 +17,6 @@ You have access to a set of tools that are executed upon the user's approval. Us 1. Assess what information you already have and what information you need to proceed with the task. 2. Choose the most appropriate tool based on the task and the tool descriptions provided. Assess if you need additional information to proceed, and which of the available tools would be most effective for gathering this information. For example using the list_files tool is more effective than running a command like `ls` in the terminal. It's critical that you think about each available tool and use the one that best fits the current step in the task. 3. If multiple actions are needed, you may use multiple tools in a single message when appropriate, or use tools iteratively across messages. Each tool use should be informed by the results of previous tool uses. Do not assume the outcome of any tool use. Each step must be informed by the previous step's result. - By carefully considering the user's response after tool executions, you can react accordingly and make informed decisions about how to proceed with the task. This iterative process helps ensure the overall success and accuracy of your work. ==== diff --git a/src/core/prompts/__tests__/__snapshots__/system-prompt/with-undefined-mcp-hub.snap b/src/core/prompts/__tests__/__snapshots__/system-prompt/with-undefined-mcp-hub.snap index 42e8bba9c68..a5ac88cc718 100644 --- a/src/core/prompts/__tests__/__snapshots__/system-prompt/with-undefined-mcp-hub.snap +++ b/src/core/prompts/__tests__/__snapshots__/system-prompt/with-undefined-mcp-hub.snap @@ -17,7 +17,6 @@ You have access to a set of tools that are executed upon the user's approval. Us 1. Assess what information you already have and what information you need to proceed with the task. 2. Choose the most appropriate tool based on the task and the tool descriptions provided. Assess if you need additional information to proceed, and which of the available tools would be most effective for gathering this information. For example using the list_files tool is more effective than running a command like `ls` in the terminal. It's critical that you think about each available tool and use the one that best fits the current step in the task. 3. If multiple actions are needed, you may use multiple tools in a single message when appropriate, or use tools iteratively across messages. Each tool use should be informed by the results of previous tool uses. Do not assume the outcome of any tool use. Each step must be informed by the previous step's result. - By carefully considering the user's response after tool executions, you can react accordingly and make informed decisions about how to proceed with the task. This iterative process helps ensure the overall success and accuracy of your work. ==== diff --git a/src/core/prompts/responses.ts b/src/core/prompts/responses.ts index 60b5b4123ac..1e3e13b7beb 100644 --- a/src/core/prompts/responses.ts +++ b/src/core/prompts/responses.ts @@ -39,8 +39,8 @@ export const formatResponse = { suggestion: "Try to continue without this file, or ask the user to update the .rooignore file", }), - noToolsUsed: () => { - const instructions = getToolInstructionsReminder() + noToolsUsed: (useXmlToolCalling?: boolean) => { + const instructions = useXmlToolCalling ? toolUseInstructionsReminderXml : toolUseInstructionsReminderNative return `[ERROR] You did not use a tool in your previous response! Please retry with a tool use. @@ -60,8 +60,8 @@ Otherwise, if you have not completed the task and do not need additional informa feedback, }), - missingToolParameterError: (paramName: string) => { - const instructions = getToolInstructionsReminder() + missingToolParameterError: (paramName: string, useXmlToolCalling?: boolean) => { + const instructions = useXmlToolCalling ? toolUseInstructionsReminderXml : toolUseInstructionsReminderNative return `Missing value for required parameter '${paramName}'. Please retry with complete response.\n\n${instructions}` }, @@ -222,9 +222,38 @@ Tools are invoked using the platform's native tool calling mechanism. Each tool Always ensure you provide all required parameters for the tool you wish to use.` -/** - * Gets the tool use instructions reminder. - */ -function getToolInstructionsReminder(): string { - return toolUseInstructionsReminderNative -} +const toolUseInstructionsReminderXml = `# Reminder: Instructions for Tool Use + +Tools MUST be invoked using XML-style tags. The tool name becomes the outermost XML tag, with each parameter as a nested child tag. + +IMPORTANT: You MUST output EXACTLY ONE of these tool calls in your response. Do NOT respond with only text. + +If you have completed the user's task, output: + +Description of what you accomplished + + +If you need to ask the user something, output: + +Your question here + +Option 1 +Option 2 + + + +If you need to read a file, output: + +path/to/file + + +If you need to run a command, output: + +your command here + + +Rules: +- Every opening tag MUST have a matching closing tag +- Do NOT wrap tool calls in markdown code blocks +- Do NOT use JSON format for tool calls +- Output the XML tool call directly in your response` diff --git a/src/core/prompts/sections/__tests__/personality.spec.ts b/src/core/prompts/sections/__tests__/personality.spec.ts new file mode 100644 index 00000000000..7347ef5d0b7 --- /dev/null +++ b/src/core/prompts/sections/__tests__/personality.spec.ts @@ -0,0 +1,203 @@ +import { PersonalityTrait, PersonalityConfig } from "@roo-code/types" + +import { + BUILT_IN_PERSONALITY_TRAITS, + resolveActiveTraits, + getAllTraitsForConfig, + buildPersonalityPrompt, +} from "../../../../shared/personality-traits" + +describe("buildPersonalityPrompt", () => { + it("should return empty string when no config is provided", () => { + expect(buildPersonalityPrompt(undefined)).toBe("") + }) + + it("should return empty string when no traits are active", () => { + const config: PersonalityConfig = { + activeTraitIds: [], + customTraits: [], + } + expect(buildPersonalityPrompt(config)).toBe("") + }) + + it("should return formatted section for a single active built-in trait", () => { + const config: PersonalityConfig = { + activeTraitIds: ["roo"], + customTraits: [], + } + + const result = buildPersonalityPrompt(config) + + expect(result).toContain("PERSONALITY & VOICE") + expect(result).toContain("CRITICAL:") + expect(result).toContain("You are Roo") + }) + + it("should concatenate multiple active traits", () => { + const config: PersonalityConfig = { + activeTraitIds: ["dry-wit", "straight-shooter"], + customTraits: [], + } + + const result = buildPersonalityPrompt(config) + + expect(result).toContain("bone-dry, deadpan") + expect(result).toContain("short, punchy fragments") + }) + + it("should include custom traits", () => { + const customTrait: PersonalityTrait = { + id: "pirate", + emoji: "🏴‍☠️", + label: "Pirate", + prompt: "You are a pirate. Use pirate language like 'Ahoy matey!' and 'Arrr!'", + isBuiltIn: false, + } + + const config: PersonalityConfig = { + activeTraitIds: ["pirate"], + customTraits: [customTrait], + } + + const result = buildPersonalityPrompt(config) + + expect(result).toContain("You are a pirate") + expect(result).toContain("Ahoy matey!") + }) + + it("should ignore unknown trait IDs gracefully", () => { + const config: PersonalityConfig = { + activeTraitIds: ["nonexistent-trait"], + customTraits: [], + } + + const result = buildPersonalityPrompt(config) + expect(result).toBe("") + }) + + it("should include the CRITICAL instruction and trait content", () => { + const config: PersonalityConfig = { + activeTraitIds: ["roo"], + customTraits: [], + } + + const result = buildPersonalityPrompt(config) + + // The top section should contain the CRITICAL instruction and trait content + expect(result).toContain("CRITICAL:") + expect(result).toContain("You are Roo") + expect(result).toContain("PERSONALITY & VOICE") + }) +}) + +describe("Built-in traits", () => { + it("should have 13 built-in traits", () => { + expect(BUILT_IN_PERSONALITY_TRAITS).toHaveLength(13) + }) + + it("should have unique IDs", () => { + const ids = BUILT_IN_PERSONALITY_TRAITS.map((t) => t.id) + expect(new Set(ids).size).toBe(ids.length) + }) + + it("should all be marked as isBuiltIn", () => { + BUILT_IN_PERSONALITY_TRAITS.forEach((trait) => { + expect(trait.isBuiltIn).toBe(true) + }) + }) + + it("should all use direct natural-language format (no section markers)", () => { + BUILT_IN_PERSONALITY_TRAITS.forEach((trait) => { + // No [SECTION_KEY] markers should be present + expect(trait.prompt).not.toMatch(/\[COMMUNICATION_STYLE\]/) + expect(trait.prompt).not.toMatch(/\[TASK_COMPLETION\]/) + expect(trait.prompt).not.toMatch(/\[ERROR_HANDLING\]/) + expect(trait.prompt).not.toMatch(/\[SUGGESTIONS\]/) + }) + }) + + it("should all start with identity-first framing (You ...)", () => { + BUILT_IN_PERSONALITY_TRAITS.forEach((trait) => { + const startsWithIdentity = /^You\s+\w+/.test(trait.prompt.trim()) + expect(startsWithIdentity).toBe(true) + }) + }) + + it("should all contain negative constraints (Never)", () => { + BUILT_IN_PERSONALITY_TRAITS.forEach((trait) => { + expect(trait.prompt).toContain("Never") + }) + }) + + it("should include the Roo default trait", () => { + const roo = BUILT_IN_PERSONALITY_TRAITS.find((t) => t.id === "roo") + expect(roo).toBeDefined() + expect(roo!.emoji).toBe("🦘") + expect(roo!.label).toBe("Roo") + }) +}) + +describe("resolveActiveTraits", () => { + it("should resolve built-in trait IDs to full traits", () => { + const result = resolveActiveTraits(["roo", "dry-wit"]) + expect(result).toHaveLength(2) + expect(result[0].id).toBe("roo") + expect(result[1].id).toBe("dry-wit") + }) + + it("should preserve order", () => { + const result = resolveActiveTraits(["dry-wit", "roo"]) + expect(result[0].id).toBe("dry-wit") + expect(result[1].id).toBe("roo") + }) + + it("should filter out unknown IDs", () => { + const result = resolveActiveTraits(["roo", "nonexistent", "dry-wit"]) + expect(result).toHaveLength(2) + }) + + it("should resolve custom traits", () => { + const custom: PersonalityTrait = { + id: "my-custom", + emoji: "🧪", + label: "Custom", + prompt: "You are custom.", + isBuiltIn: false, + } + const result = resolveActiveTraits(["my-custom"], [custom]) + expect(result).toHaveLength(1) + expect(result[0].label).toBe("Custom") + }) +}) + +describe("getAllTraitsForConfig", () => { + it("should return built-in traits when no custom traits", () => { + const result = getAllTraitsForConfig([]) + expect(result.length).toBe(BUILT_IN_PERSONALITY_TRAITS.length) + }) + + it("should append custom traits", () => { + const custom: PersonalityTrait = { + id: "new-trait", + emoji: "🆕", + label: "New", + prompt: "You are new.", + isBuiltIn: false, + } + const result = getAllTraitsForConfig([custom]) + expect(result.length).toBe(BUILT_IN_PERSONALITY_TRAITS.length + 1) + }) + + it("should allow custom traits to override built-in ones by ID", () => { + const override: PersonalityTrait = { + id: "roo", + emoji: "🦘", + label: "Custom Roo", + prompt: "You are a custom Roo.", + isBuiltIn: false, + } + const result = getAllTraitsForConfig([override]) + const roo = result.find((t) => t.id === "roo") + expect(roo!.label).toBe("Custom Roo") + }) +}) diff --git a/src/core/prompts/sections/__tests__/tool-use.spec.ts b/src/core/prompts/sections/__tests__/tool-use.spec.ts index 878db81a1cf..5a09fac1185 100644 --- a/src/core/prompts/sections/__tests__/tool-use.spec.ts +++ b/src/core/prompts/sections/__tests__/tool-use.spec.ts @@ -1,31 +1,107 @@ import { getSharedToolUseSection } from "../tool-use" +import { getToolUseGuidelinesSection } from "../tool-use-guidelines" describe("getSharedToolUseSection", () => { - it("should include native tool-calling instructions", () => { - const section = getSharedToolUseSection() + describe("default (native) mode", () => { + it("should include native tool-calling instructions", () => { + const section = getSharedToolUseSection() - expect(section).toContain("provider-native tool-calling mechanism") - expect(section).toContain("Do not include XML markup or examples") + expect(section).toContain("provider-native tool-calling mechanism") + expect(section).toContain("Do not include XML markup or examples") + }) + + it("should include multiple tools per message guidance", () => { + const section = getSharedToolUseSection() + + expect(section).toContain("You must call at least one tool per assistant response") + expect(section).toContain("Prefer calling as many tools as are reasonably needed") + }) + + it("should NOT include XML formatting instructions", () => { + const section = getSharedToolUseSection() + + expect(section).not.toContain("XML-style tags") + }) + + it("should return native instructions when useXmlToolCalling is false", () => { + const section = getSharedToolUseSection(false) + + expect(section).toContain("provider-native tool-calling mechanism") + }) }) - it("should include multiple tools per message guidance", () => { - const section = getSharedToolUseSection() + describe("XML tool calling mode", () => { + it("should include XML formatting instructions when useXmlToolCalling is true", () => { + const section = getSharedToolUseSection(true) + + expect(section).toContain("XML-style tags") + expect(section).toContain("tool name becomes the XML tag") + }) + + it("should NOT include provider-native tool-calling text when useXmlToolCalling is true", () => { + const section = getSharedToolUseSection(true) + + expect(section).not.toContain("provider-native tool-calling mechanism") + expect(section).not.toContain("Do not include XML markup or examples") + }) + + it("should include TOOL USE header", () => { + const section = getSharedToolUseSection(true) - expect(section).toContain("You must call at least one tool per assistant response") - expect(section).toContain("Prefer calling as many tools as are reasonably needed") + expect(section).toContain("TOOL USE") + expect(section).toContain("You have access to a set of tools") + }) + + it("should require exactly one tool per message", () => { + const section = getSharedToolUseSection(true) + + expect(section).toContain("exactly one tool per message") + expect(section).toContain("every assistant message must include a tool call") + }) }) +}) + +describe("getToolUseGuidelinesSection", () => { + describe("default (non-XML) mode", () => { + it("should include base guidelines without XML reinforcement", () => { + const section = getToolUseGuidelinesSection() + + expect(section).toContain("# Tool Use Guidelines") + expect(section).toContain("Assess what information you already have") + expect(section).toContain("Choose the most appropriate tool") + expect(section).toContain("If multiple actions are needed") + }) - it("should NOT include single tool per message restriction", () => { - const section = getSharedToolUseSection() + it("should NOT include XML-specific content when called without arguments", () => { + const section = getToolUseGuidelinesSection() - expect(section).not.toContain("You must use exactly one tool call per assistant response") - expect(section).not.toContain("Do not call zero tools or more than one tool") + expect(section).not.toContain("Formulate tool calls as XML") + expect(section).not.toContain("attempt_completion") + }) }) - it("should NOT include XML formatting instructions", () => { - const section = getSharedToolUseSection() + describe("XML tool calling mode", () => { + it("should include compact XML guidelines when useXmlToolCalling is true", () => { + const section = getToolUseGuidelinesSection(true) + + expect(section).toContain("# Tool Use Guidelines") + expect(section).toContain("Formulate tool calls as XML") + expect(section).toContain("attempt_completion") + expect(section).toContain("ask_followup_question") + }) + + it("should include XML structure reminder", () => { + const section = getToolUseGuidelinesSection(true) + + expect(section).toContain("value") + }) + + it("should be more compact than native guidelines", () => { + const xmlSection = getToolUseGuidelinesSection(true) + const nativeSection = getToolUseGuidelinesSection(false) - expect(section).not.toContain("") - expect(section).not.toContain("") + // XML guidelines should be shorter to save context window space + expect(xmlSection.length).toBeLessThan(nativeSection.length) + }) }) }) diff --git a/src/core/prompts/sections/custom-instructions.ts b/src/core/prompts/sections/custom-instructions.ts index 46cf1bf1f9e..f7582a6fbbd 100644 --- a/src/core/prompts/sections/custom-instructions.ts +++ b/src/core/prompts/sections/custom-instructions.ts @@ -388,6 +388,7 @@ export async function addCustomInstructions( language?: string rooIgnoreInstructions?: string settings?: SystemPromptSettings + personalityPrompt?: string } = {}, ): Promise { const sections = [] @@ -491,6 +492,13 @@ export async function addCustomInstructions( sections.push(`Rules:\n\n${rules.join("\n\n")}`) } + // Inject personality prompt LAST for maximum recency effect. + // This is the last thing the model reads before generating, + // which research shows produces the strongest behavioral adherence. + if (options.personalityPrompt && options.personalityPrompt.trim()) { + sections.push(options.personalityPrompt.trim()) + } + const joinedSections = sections.join("\n\n") return joinedSections diff --git a/src/core/prompts/sections/index.ts b/src/core/prompts/sections/index.ts index 318cd47bc9d..3822db52e4d 100644 --- a/src/core/prompts/sections/index.ts +++ b/src/core/prompts/sections/index.ts @@ -8,3 +8,4 @@ export { getCapabilitiesSection } from "./capabilities" export { getModesSection } from "./modes" export { markdownFormattingSection } from "./markdown-formatting" export { getSkillsSection } from "./skills" +export { getPersonalitySection, buildPersonalityPromptParts } from "./personality" diff --git a/src/core/prompts/sections/personality.ts b/src/core/prompts/sections/personality.ts new file mode 100644 index 00000000000..72e442e76e6 --- /dev/null +++ b/src/core/prompts/sections/personality.ts @@ -0,0 +1,9 @@ +/** + * Personality section for system prompt. + * Uses the sandwich technique: personality at the TOP and reinforced at the BOTTOM. + */ +import { buildPersonalityPrompt, buildPersonalityPromptParts } from "../../../shared/personality-traits" + +export { mergeTraitPrompts, buildPersonalityPromptParts } from "../../../shared/personality-traits" + +export const getPersonalitySection = buildPersonalityPrompt diff --git a/src/core/prompts/sections/tool-use-guidelines.ts b/src/core/prompts/sections/tool-use-guidelines.ts index 78193372cc8..178be659d96 100644 --- a/src/core/prompts/sections/tool-use-guidelines.ts +++ b/src/core/prompts/sections/tool-use-guidelines.ts @@ -1,9 +1,18 @@ -export function getToolUseGuidelinesSection(): string { +export function getToolUseGuidelinesSection(useXmlToolCalling?: boolean): string { + if (useXmlToolCalling) { + return `# Tool Use Guidelines + +1. Assess what information you need, then choose the most appropriate tool. +2. Use one tool at a time per message. Each step must be informed by the previous result. +3. Formulate tool calls as XML: \`value\` +4. After each tool use, wait for the result before proceeding. +5. When done, use attempt_completion. To ask the user, use ask_followup_question.` + } + return `# Tool Use Guidelines 1. Assess what information you already have and what information you need to proceed with the task. 2. Choose the most appropriate tool based on the task and the tool descriptions provided. Assess if you need additional information to proceed, and which of the available tools would be most effective for gathering this information. For example using the list_files tool is more effective than running a command like \`ls\` in the terminal. It's critical that you think about each available tool and use the one that best fits the current step in the task. 3. If multiple actions are needed, you may use multiple tools in a single message when appropriate, or use tools iteratively across messages. Each tool use should be informed by the results of previous tool uses. Do not assume the outcome of any tool use. Each step must be informed by the previous step's result. - By carefully considering the user's response after tool executions, you can react accordingly and make informed decisions about how to proceed with the task. This iterative process helps ensure the overall success and accuracy of your work.` } diff --git a/src/core/prompts/sections/tool-use.ts b/src/core/prompts/sections/tool-use.ts index a3def86c078..70f9819ed1c 100644 --- a/src/core/prompts/sections/tool-use.ts +++ b/src/core/prompts/sections/tool-use.ts @@ -1,4 +1,22 @@ -export function getSharedToolUseSection(): string { +export function getSharedToolUseSection(useXmlToolCalling?: boolean): string { + if (useXmlToolCalling) { + return `==== + +TOOL USE + +You have access to a set of tools that are executed upon the user's approval. You must use exactly one tool per message, and every assistant message must include a tool call. You use tools step-by-step to accomplish a given task, with each tool use informed by the result of the previous tool use. + +# Tool Use Formatting + +Tool uses are formatted using XML-style tags. The tool name becomes the XML tag. Each parameter is a nested tag: + + +value + + +Always use the actual tool name as the XML tag name for proper parsing and execution.` + } + return `==== TOOL USE diff --git a/src/core/prompts/system.ts b/src/core/prompts/system.ts index 0d6071644a9..276db667968 100644 --- a/src/core/prompts/system.ts +++ b/src/core/prompts/system.ts @@ -23,7 +23,10 @@ import { addCustomInstructions, markdownFormattingSection, getSkillsSection, + buildPersonalityPromptParts, } from "./sections" +import { getNativeTools } from "./tools/native-tools" +import { generateXmlToolCatalog } from "./tools/xml-tool-catalog" // Helper function to get prompt component, filtering out empty objects export function getPromptComponent( @@ -55,6 +58,8 @@ async function generatePrompt( todoList?: TodoItem[], modelId?: string, skillsManager?: SkillsManager, + useXmlToolCalling?: boolean, + userProfileSection?: string, ): Promise { if (!context) { throw new Error("Extension context is required for generating system prompt") @@ -79,16 +84,20 @@ async function generatePrompt( getSkillsSection(skillsManager, mode as string), ]) - // Tools catalog is not included in the system prompt. - const toolsCatalog = "" + // When XML tool calling is enabled, embed tool descriptions in the system prompt + // since native tool definitions are omitted from the API request. + const toolsCatalog = useXmlToolCalling ? generateXmlToolCatalog(getNativeTools()) : "" - const basePrompt = `${roleDefinition} + // Generate personality sandwich (top + bottom) for maximum adherence + const personalityParts = buildPersonalityPromptParts(modeConfig.personalityConfig) + const basePrompt = `${roleDefinition}${personalityParts.top ? `\n${personalityParts.top}` : ""} +${userProfileSection || ""} ${markdownFormattingSection()} -${getSharedToolUseSection()}${toolsCatalog} +${getSharedToolUseSection(useXmlToolCalling)}${toolsCatalog} - ${getToolUseGuidelinesSection()} + ${getToolUseGuidelinesSection(useXmlToolCalling)} ${getCapabilitiesSection(cwd, shouldIncludeMcp ? mcpHub : undefined)} @@ -104,7 +113,7 @@ ${await addCustomInstructions(baseInstructions, globalCustomInstructions || "", language: language ?? formatLanguage(vscode.env.language), rooIgnoreInstructions, settings, -})}` +})}${personalityParts.bottom}` return basePrompt } @@ -126,6 +135,8 @@ export const SYSTEM_PROMPT = async ( todoList?: TodoItem[], modelId?: string, skillsManager?: SkillsManager, + useXmlToolCalling?: boolean, + userProfileSection?: string, ): Promise => { if (!context) { throw new Error("Extension context is required for generating system prompt") @@ -154,5 +165,7 @@ export const SYSTEM_PROMPT = async ( todoList, modelId, skillsManager, + useXmlToolCalling, + userProfileSection, ) } diff --git a/src/core/prompts/tools/__tests__/xml-tool-catalog.spec.ts b/src/core/prompts/tools/__tests__/xml-tool-catalog.spec.ts new file mode 100644 index 00000000000..260bf82967f --- /dev/null +++ b/src/core/prompts/tools/__tests__/xml-tool-catalog.spec.ts @@ -0,0 +1,160 @@ +import { generateXmlToolCatalog } from "../xml-tool-catalog" +import type OpenAI from "openai" + +describe("generateXmlToolCatalog", () => { + it("should return empty string for empty tools array", () => { + expect(generateXmlToolCatalog([])).toBe("") + }) + + it("should generate catalog with tool name, description, and parameters", () => { + const tools: OpenAI.Chat.ChatCompletionTool[] = [ + { + type: "function", + function: { + name: "read_file", + description: "Read a file from the filesystem.", + parameters: { + type: "object", + properties: { + path: { + type: "string", + description: "Path to the file", + }, + }, + required: ["path"], + }, + }, + }, + ] + + const result = generateXmlToolCatalog(tools) + + expect(result).toContain("# Tools") + expect(result).toContain("## read_file") + expect(result).toContain("Read a file from the filesystem.") + expect(result).toContain("") + expect(result).toContain("(required)") + expect(result).toContain("") + }) + + it("should mark optional parameters correctly", () => { + const tools: OpenAI.Chat.ChatCompletionTool[] = [ + { + type: "function", + function: { + name: "list_files", + description: "List files in a directory.", + parameters: { + type: "object", + properties: { + path: { type: "string", description: "Directory path" }, + recursive: { type: "boolean", description: "Whether to recurse" }, + }, + required: ["path"], + }, + }, + }, + ] + + const result = generateXmlToolCatalog(tools) + + expect(result).toContain("(required)") + expect(result).toContain("(optional)") + }) + + it("should handle multiple tools", () => { + const tools: OpenAI.Chat.ChatCompletionTool[] = [ + { + type: "function", + function: { + name: "read_file", + description: "Read a file.", + parameters: { type: "object", properties: { path: { type: "string" } }, required: ["path"] }, + }, + }, + { + type: "function", + function: { + name: "write_to_file", + description: "Write to a file.", + parameters: { + type: "object", + properties: { + path: { type: "string" }, + content: { type: "string" }, + }, + required: ["path", "content"], + }, + }, + }, + ] + + const result = generateXmlToolCatalog(tools) + + expect(result).toContain("## read_file") + expect(result).toContain("## write_to_file") + expect(result).toContain("") + expect(result).toContain("") + }) + + it("should handle tools with no parameters", () => { + const tools: OpenAI.Chat.ChatCompletionTool[] = [ + { + type: "function", + function: { + name: "some_tool", + description: "A tool with no params.", + parameters: { type: "object", properties: {} }, + }, + }, + ] + + const result = generateXmlToolCatalog(tools) + + expect(result).toContain("## some_tool") + expect(result).toContain("") + expect(result).toContain("") + }) + + it("should use hand-crafted descriptions for attempt_completion", () => { + const tools: OpenAI.Chat.ChatCompletionTool[] = [ + { + type: "function", + function: { + name: "attempt_completion", + description: "Auto-generated description", + parameters: { type: "object", properties: { result: { type: "string" } }, required: ["result"] }, + }, + }, + ] + + const result = generateXmlToolCatalog(tools) + + // Should use hand-crafted description, not auto-generated + expect(result).toContain("IMPORTANT NOTE") + expect(result).toContain("") + }) + + it("should use hand-crafted descriptions for ask_followup_question", () => { + const tools: OpenAI.Chat.ChatCompletionTool[] = [ + { + type: "function", + function: { + name: "ask_followup_question", + description: "Auto-generated description", + parameters: { + type: "object", + properties: { question: { type: "string" }, follow_up: { type: "string" } }, + required: ["question"], + }, + }, + }, + ] + + const result = generateXmlToolCatalog(tools) + + // Should use hand-crafted description with tags + expect(result).toContain("") + expect(result).toContain("") + }) +}) diff --git a/src/core/prompts/tools/xml-tool-catalog.ts b/src/core/prompts/tools/xml-tool-catalog.ts new file mode 100644 index 00000000000..261c58d743e --- /dev/null +++ b/src/core/prompts/tools/xml-tool-catalog.ts @@ -0,0 +1,151 @@ +/** + * Generates XML-formatted tool descriptions for the system prompt. + * + * When useXmlToolCalling is enabled, native tool definitions are omitted from the + * API request. Instead, tool descriptions must be embedded in the system prompt + * so the model knows what tools are available and their parameter schemas. + * + * This module converts OpenAI ChatCompletionTool definitions to the XML-based + * tool description format that the model can understand. + */ + +import type OpenAI from "openai" + +/** + * Convert an array of OpenAI tool definitions into an XML tool catalog + * suitable for inclusion in the system prompt. + * + * @param tools - Array of OpenAI ChatCompletionTool definitions + * @returns XML-formatted tool catalog string + */ +// Hand-crafted descriptions for critical tools that models struggle with. +// These match the original Roo Code XML format with detailed examples. +const HANDCRAFTED_TOOL_DESCRIPTIONS: Record = { + attempt_completion: `## attempt_completion +Description: After each tool use, the user will respond with the result of that tool use, i.e. if it succeeded or failed, along with any reasons for failure. Once you've received the results of tool uses and can confirm that the task is complete, use this tool to present the result of your work to the user. The user may respond with feedback if they are not satisfied with the result, which you can use to make improvements and try again. +IMPORTANT NOTE: This tool CANNOT be used until you've confirmed from the user that any previous tool uses were successful. Failure to do so will result in code corruption and system failure. Before using this tool, you must confirm that you've received successful results from the user for any previous tool uses. If not, then DO NOT use this tool. +Parameters: +- result: (required) The result of the task. Formulate this result in a way that is final and does not require further input from the user. Don't end your result with questions or offers for further assistance. +Usage: + + +Your final result description here + + + +Example: Completing after updating CSS + + +I've updated the CSS to use flexbox layout for better responsiveness + +`, + + ask_followup_question: `## ask_followup_question +Description: Ask the user a question to gather additional information needed to complete the task. Use when you need clarification or more details to proceed effectively. +Parameters: +- question: (required) A clear, specific question addressing the information needed. +- follow_up: (required) A list of 2-4 suggested answers, each in its own tag. Suggestions must be complete, actionable answers without placeholders. +Usage: + +Your question here + +First suggestion +Second suggestion +Third suggestion + + + +Example: Asking about a file path + +What is the path to the frontend-config.json file? + +./src/frontend-config.json +./config/frontend-config.json +./frontend-config.json + +`, +} + +export function generateXmlToolCatalog(tools: OpenAI.Chat.ChatCompletionTool[]): string { + if (!tools || tools.length === 0) { + return "" + } + + const toolDescriptions = tools + .map((tool) => { + // Use hand-crafted descriptions for critical tools + const toolName = (tool as any).function?.name + if (toolName && HANDCRAFTED_TOOL_DESCRIPTIONS[toolName]) { + return HANDCRAFTED_TOOL_DESCRIPTIONS[toolName] + } + return formatToolAsXml(tool) + }) + .join("\n\n") + + return `\n\n# Tools\n\n${toolDescriptions}` +} + +/** + * Format a single OpenAI tool definition as a COMPACT XML tool description. + * Keeps descriptions short to save context window space for local models. + */ +function formatToolAsXml(tool: OpenAI.Chat.ChatCompletionTool): string { + if (tool.type !== "function" || !("function" in tool)) { + return "" + } + const fn = (tool as any).function as { name: string; description?: string; parameters?: unknown } + const name = fn.name + // Truncate description to first sentence to save tokens + const fullDesc = fn.description || "" + const firstSentence = fullDesc.split(/\.(?:\s|$)/)[0] + const description = firstSentence.length < 200 ? firstSentence + "." : fullDesc.substring(0, 200) + "..." + const params = fn.parameters as JsonSchema | undefined + + let result = `## ${name}\n${description}\nUsage: <${name}>` + + if (params && params.properties) { + const required = new Set(params.required || []) + const paramParts: string[] = [] + for (const [paramName, paramSchema] of Object.entries(params.properties)) { + const isRequired = required.has(paramName) + paramParts.push(`<${paramName}>${isRequired ? "(required)" : "(optional)"}`) + } + result += paramParts.join("") + } + + result += `` + return result +} + +/** + * Format a JSON schema type into a human-readable string. + */ +function formatParamType(schema: JsonSchema): string { + if (schema.enum) { + return schema.enum.map((v: unknown) => `"${v}"`).join(" | ") + } + + if (schema.type === "object") { + return "object" + } + + if (schema.type === "array") { + const itemType = schema.items ? formatParamType(schema.items as JsonSchema) : "any" + return `array of ${itemType}` + } + + return schema.type || "string" +} + +/** + * Minimal JSON Schema type for our parsing needs. + */ +interface JsonSchema { + type?: string + description?: string + properties?: Record + required?: string[] + items?: unknown + enum?: unknown[] + additionalProperties?: boolean +} diff --git a/src/core/task/Task.ts b/src/core/task/Task.ts index 005bb0f292b..9150639225d 100644 --- a/src/core/task/Task.ts +++ b/src/core/task/Task.ts @@ -104,6 +104,7 @@ import { RooIgnoreController } from "../ignore/RooIgnoreController" import { RooProtectedController } from "../protect/RooProtectedController" import { type AssistantMessageContent, presentAssistantMessage } from "../assistant-message" import { NativeToolCallParser } from "../assistant-message/NativeToolCallParser" +import { XmlToolCallParser } from "../assistant-message/XmlToolCallParser" import { manageContext, willManageContext } from "../context-management" import { ClineProvider } from "../webview/ClineProvider" import { MultiSearchReplaceDiffStrategy } from "../diff/strategies/multi-search-replace" @@ -368,6 +369,20 @@ export class Task extends EventEmitter implements TaskLike { * @returns true if added, false if duplicate was skipped */ public pushToolResultToUserContent(toolResult: Anthropic.ToolResultBlockParam): boolean { + // When XML tool calling is enabled, convert tool_result blocks to text blocks. + // The API doesn't have native tool_use blocks in XML mode, so tool_result blocks + // would cause API errors. Instead, send results as plain text. + if (this.apiConfiguration?.useXmlToolCalling) { + const resultText = + typeof toolResult.content === "string" ? toolResult.content : JSON.stringify(toolResult.content) + const prefix = toolResult.is_error ? "[Tool Error]" : "[Tool Result]" + this.userMessageContent.push({ + type: "text", + text: `${prefix}\n${resultText}`, + }) + return true + } + const existingResult = this.userMessageContent.find( (block): block is Anthropic.ToolResultBlockParam => block.type === "tool_result" && block.tool_use_id === toolResult.tool_use_id, @@ -393,6 +408,9 @@ export class Task extends EventEmitter implements TaskLike { // Native tool call streaming state (track which index each tool is at) private streamingToolCallIndices: Map = new Map() + // XML tool call parser instance (used when useXmlToolCalling is enabled) + xmlToolCallParser?: XmlToolCallParser + // Cached model info for current streaming session (set at start of each API request) // This prevents excessive getModel() calls during tool execution cachedStreamingModel?: { id: string; info: ModelInfo } @@ -490,7 +508,7 @@ export class Task extends EventEmitter implements TaskLike { this.consecutiveMistakeLimit = consecutiveMistakeLimit ?? DEFAULT_CONSECUTIVE_MISTAKE_LIMIT this.providerRef = new WeakRef(provider) this.globalStoragePath = provider.context.globalStorageUri.fsPath - this.diffViewProvider = new DiffViewProvider(this.cwd, this) + this.diffViewProvider = new DiffViewProvider(this.cwd, this, provider.viewColumn) this.enableCheckpoints = enableCheckpoints this.checkpointTimeout = checkpointTimeout @@ -1363,6 +1381,14 @@ export class Task extends EventEmitter implements TaskLike { // Automatically approve if the ask according to the user's settings. const provider = this.providerRef.deref() const state = provider ? await provider.getState() : undefined + console.log( + `[Task#${this.taskId}:ask] type="${type}"`, + `providerExists=${!!provider}`, + `stateExists=${!!state}`, + `autoApprovalEnabled=${state?.autoApprovalEnabled}`, + `alwaysAllowWrite=${state?.alwaysAllowWrite}`, + `alwaysAllowExecute=${state?.alwaysAllowExecute}`, + ) const approval = await checkAutoApproval({ state, ask: type, text, isProtected }) if (approval.decision === "approve") { @@ -1873,7 +1899,9 @@ export class Task extends EventEmitter implements TaskLike { relPath ? ` for '${relPath.toPosix()}'` : "" } without value for required parameter '${paramName}'. Retrying...`, ) - return formatResponse.toolError(formatResponse.missingToolParameterError(paramName)) + return formatResponse.toolError( + formatResponse.missingToolParameterError(paramName, !!this.apiConfiguration?.useXmlToolCalling), + ) } // Lifecycle @@ -1922,7 +1950,14 @@ export class Task extends EventEmitter implements TaskLike { * race on globalState). */ public start(): void { + console.log( + `[Task#${this.taskId}.${this.instanceId}] start() called — _started=${this._started}, ` + + `hasTask=${!!this.metadata.task}, hasImages=${!!this.metadata.images}, ` + + `abort=${this.abort}, abandoned=${this.abandoned}`, + ) + if (this._started) { + console.log(`[Task#${this.taskId}.${this.instanceId}] start() — already started, returning`) return } this._started = true @@ -1930,11 +1965,26 @@ export class Task extends EventEmitter implements TaskLike { const { task, images } = this.metadata if (task || images) { + console.log( + `[Task#${this.taskId}.${this.instanceId}] start() — calling startTask() ` + + `(task length=${task?.length ?? 0})`, + ) this.startTask(task ?? undefined, images ?? undefined) + } else { + console.warn( + `[Task#${this.taskId}.${this.instanceId}] start() — NO task or images in metadata, ` + + `startTask() will NOT be called. This task will never emit TaskStarted/TaskCompleted.`, + ) } } private async startTask(task?: string, images?: string[]): Promise { + console.log( + `[Task#${this.taskId}.${this.instanceId}] startTask() ENTERED — ` + + `task length=${task?.length ?? 0}, images=${images?.length ?? 0}, ` + + `abort=${this.abort}, abandoned=${this.abandoned}, ` + + `providerRef alive=${!!this.providerRef.deref()}`, + ) try { // `conversationHistory` (for API) and `clineMessages` (for webview) // need to be in sync. @@ -1970,6 +2020,10 @@ export class Task extends EventEmitter implements TaskLike { ) } this.isInitialized = true + console.log( + `[Task#${this.taskId}.${this.instanceId}] startTask() — initialized, ` + + `about to call initiateTaskLoop()`, + ) const imageBlocks: Anthropic.ImageBlockParam[] = formatResponse.imageBlocks(images) @@ -2256,6 +2310,12 @@ export class Task extends EventEmitter implements TaskLike { public async abortTask(isAbandoned = false) { // Aborting task + console.log( + `[Task#${this.taskId}.${this.instanceId}] abortTask() called — ` + + `isAbandoned=${isAbandoned}, alreadyAbort=${this.abort}, ` + + `_started=${this._started}, isInitialized=${this.isInitialized}`, + ) + console.trace(`[Task#${this.taskId}.${this.instanceId}] abortTask() call stack`) // Will stop any autonomously running promises. if (isAbandoned) { @@ -2268,9 +2328,41 @@ export class Task extends EventEmitter implements TaskLike { this.consecutiveNoToolUseCount = 0 this.consecutiveNoAssistantMessagesCount = 0 + // Notify memory orchestrator of session end + try { + const provider = this.providerRef.deref() + const memOrch = provider?.getMemoryOrchestrator() + if (memOrch?.isEnabled()) { + const memoryConfigId = provider?.contextProxy?.getValue("memoryApiConfigId") + let memoryProviderSettings: ProviderSettings | null = null + + if (memoryConfigId) { + try { + const { name: _, ...settings } = + await provider!.providerSettingsManager.getProfile({ + id: memoryConfigId, + }) + if (settings.apiProvider) { + memoryProviderSettings = settings + } + } catch { + // Profile not found or deleted — skip silently + } + } + + memOrch.onSessionEnd(this.apiConversationHistory, this.taskId, memoryProviderSettings) + } + } catch { + // Memory analysis is best-effort; never block abort + } + // Force final token usage update before abort event this.emitFinalTokenUsageUpdate() + console.log( + `[Task#${this.taskId}.${this.instanceId}] EMITTING TaskAborted — ` + + `abortReason=${this.abortReason}, abandoned=${this.abandoned}`, + ) this.emit(RooCodeEventName.TaskAborted) try { @@ -2498,7 +2590,9 @@ export class Task extends EventEmitter implements TaskLike { // the user hits max requests and denies resetting the count. break } else { - nextUserContent = [{ type: "text", text: formatResponse.noToolsUsed() }] + nextUserContent = [ + { type: "text", text: formatResponse.noToolsUsed(!!this.apiConfiguration?.useXmlToolCalling) }, + ] } } } @@ -2658,6 +2752,34 @@ export class Task extends EventEmitter implements TaskLike { if (shouldAddUserMessage) { await this.addToApiConversationHistory({ role: "user", content: finalUserContent }) TelemetryService.instance.captureConversationMessage(this.taskId, "user") + + // Notify memory orchestrator of new user message + try { + const provider = this.providerRef.deref() + const memOrch = provider?.getMemoryOrchestrator() + if (memOrch?.isEnabled()) { + const memoryConfigId = provider?.contextProxy?.getValue("memoryApiConfigId") + let memoryProviderSettings: ProviderSettings | null = null + + if (memoryConfigId) { + try { + const { name: _, ...settings } = + await provider!.providerSettingsManager.getProfile({ + id: memoryConfigId, + }) + if (settings.apiProvider) { + memoryProviderSettings = settings + } + } catch { + // Profile not found or deleted — skip silently + } + } + + memOrch.onUserMessage(this.apiConversationHistory, this.taskId, memoryProviderSettings) + } + } catch { + // Memory analysis is best-effort; never block the request loop + } } // Since we sent off a placeholder api_req_started message to update the @@ -2776,6 +2898,10 @@ export class Task extends EventEmitter implements TaskLike { // Clear any leftover streaming tool call state from previous interrupted streams NativeToolCallParser.clearAllStreamingToolCalls() NativeToolCallParser.clearRawChunkState() + // Reset XML tool call parser for new stream + if (this.xmlToolCallParser) { + this.xmlToolCallParser.reset() + } await this.diffViewProvider.reset() @@ -3017,20 +3143,86 @@ export class Task extends EventEmitter implements TaskLike { case "text": { assistantMessage += chunk.text - // Native tool calling: text chunks are plain text. - // Create or update a text content block directly - const lastBlock = this.assistantMessageContent[this.assistantMessageContent.length - 1] - if (lastBlock?.type === "text" && lastBlock.partial) { - lastBlock.content = assistantMessage + // When XML tool calling is enabled, parse text for XML tool call blocks + if (this.apiConfiguration?.useXmlToolCalling) { + if (!this.xmlToolCallParser) { + this.xmlToolCallParser = new XmlToolCallParser() + } + + const parseResult = this.xmlToolCallParser.parse(assistantMessage) + + // Display any text before tool calls + if (parseResult.textBeforeToolCall) { + const lastBlock = + this.assistantMessageContent[this.assistantMessageContent.length - 1] + if (lastBlock?.type === "text" && lastBlock.partial) { + lastBlock.content = parseResult.textBeforeToolCall + } else if (parseResult.textBeforeToolCall.trim()) { + this.assistantMessageContent.push({ + type: "text", + content: parseResult.textBeforeToolCall, + partial: true, + }) + this.userMessageContentReady = false + } + } + + // Add any completed tool calls + for (const toolCall of parseResult.toolCalls) { + // Finalize any preceding text block + const prevBlock = + this.assistantMessageContent[this.assistantMessageContent.length - 1] + if (prevBlock?.type === "text" && prevBlock.partial) { + prevBlock.partial = false + } + + // Add the tool call to content + this.assistantMessageContent.push(toolCall) + this.userMessageContentReady = false + } + + // If there's still a partial XML tool tag being streamed, + // don't display it yet — keep it in the accumulator. + // Check both: hasPartialToolCall (complete opening tag, no close) + // and remainingText (parser detected a partial tag prefix like " implements TaskLike { // Can't just do this b/c a tool could be in the middle of executing. // this.assistantMessageContent.forEach((e) => (e.partial = false)) - // No legacy streaming parser to finalize. + // Finalize XML tool call parsing: when the stream ends, do one final + // parse of the accumulated text to catch any remaining complete tool calls. + if (this.apiConfiguration?.useXmlToolCalling && this.xmlToolCallParser && assistantMessage) { + const finalResult = this.xmlToolCallParser.parse(assistantMessage) + for (const toolCall of finalResult.toolCalls) { + const prevBlock = this.assistantMessageContent[this.assistantMessageContent.length - 1] + if (prevBlock?.type === "text" && prevBlock.partial) { + prevBlock.partial = false + } + this.assistantMessageContent.push(toolCall) + this.userMessageContentReady = false + } + if (finalResult.toolCalls.length > 0) { + presentAssistantMessage(this) + } + } // Note: updateApiReqMsg() is now called from within drainStreamInBackgroundToFindAllUsage // to ensure usage data is captured even when the stream is interrupted. The background task @@ -3441,15 +3648,22 @@ export class Task extends EventEmitter implements TaskLike { }) } + // When XML tool calling is enabled, the assistant's text already contains + // the XML tool calls. We do NOT add tool_use blocks to the API history + // because the API never received native tool definitions. + const skipNativeToolUseBlocks = !!this.apiConfiguration?.useXmlToolCalling + // Add tool_use blocks with their IDs for native protocol // This handles both regular ToolUse and McpToolUse types // IMPORTANT: Track seen IDs to prevent duplicates in the API request. // Duplicate tool_use IDs cause Anthropic API 400 errors: // "tool_use ids must be unique" const seenToolUseIds = new Set() - const toolUseBlocks = this.assistantMessageContent.filter( - (block) => block.type === "tool_use" || block.type === "mcp_tool_use", - ) + const toolUseBlocks = skipNativeToolUseBlocks + ? [] + : this.assistantMessageContent.filter( + (block) => block.type === "tool_use" || block.type === "mcp_tool_use", + ) for (const block of toolUseBlocks) { if (block.type === "mcp_tool_use") { // McpToolUse already has the original tool name (e.g., "mcp_serverName_toolName") @@ -3594,21 +3808,47 @@ export class Task extends EventEmitter implements TaskLike { ) if (!didToolUse) { - // Increment consecutive no-tool-use counter this.consecutiveNoToolUseCount++ + this.consecutiveMistakeCount++ - // Only show error and count toward mistake limit after 2 consecutive failures - if (this.consecutiveNoToolUseCount >= 2) { - await this.say("error", "MODEL_NO_TOOLS_USED") - // Only count toward mistake limit after second consecutive failure - this.consecutiveMistakeCount++ - } + // Get any text from this response + const textBlock = this.assistantMessageContent.find( + (b) => b.type === "text" && b.content?.trim(), + ) + const responseText = + textBlock && textBlock.type === "text" ? textBlock.content!.trim() : undefined + + // If the model produced text, present a followup prompt so the user can respond. + // The text is already displayed above as "Roo said", so don't repeat it. + if (responseText) { + this.consecutiveNoToolUseCount = 0 + this.consecutiveMistakeCount = 0 + + // Use the model's own text as the followup question. + // First, remove the "Roo said" text message so it's not duplicated — + // it will appear only as "Roo has a question" instead. + const lastSayIndex = this.clineMessages.length - 1 + if (lastSayIndex >= 0 && this.clineMessages[lastSayIndex].say === "text") { + this.clineMessages.splice(lastSayIndex, 1) + } - // Use the task's locked protocol for consistent behavior - this.userMessageContent.push({ - type: "text", - text: formatResponse.noToolsUsed(), - }) + const followUpJson = { question: responseText, suggest: [] } + const { text, images } = await this.ask("followup", JSON.stringify(followUpJson), false) + await this.say("user_feedback", text ?? "", images) + this.userMessageContent.push({ + type: "text", + text: `\n${text}\n`, + }) + } else { + // Empty response — retry with instructions (but only once) + if (this.consecutiveNoToolUseCount >= 2) { + await this.say("error", "MODEL_NO_TOOLS_USED") + } + this.userMessageContent.push({ + type: "text", + text: formatResponse.noToolsUsed(!!this.apiConfiguration?.useXmlToolCalling), + }) + } } else { // Reset counter when tools are used successfully this.consecutiveNoToolUseCount = 0 @@ -3788,6 +4028,13 @@ export class Task extends EventEmitter implements TaskLike { const modelInfo = this.api.getModel().info + // Get memory profile section if orchestrator is active. + // getUserProfileSection() is async – it awaits store initialization so + // the first message of a session doesn't silently get an empty profile. + const memoryOrchestrator = provider.getMemoryOrchestrator() + const userProfileSection = (await memoryOrchestrator?.getUserProfileSection()) || undefined + console.log(`[Memory] Task.systemPrompt: userProfileSection ${userProfileSection ? `present, length=${userProfileSection.length}` : "empty/undefined"}`) + return SYSTEM_PROMPT( provider.context, this.cwd, @@ -3814,6 +4061,8 @@ export class Task extends EventEmitter implements TaskLike { undefined, // todoList this.api.getModel().id, provider.getSkillsManager(), + apiConfiguration?.useXmlToolCalling, + userProfileSection, ) })() } @@ -4266,6 +4515,10 @@ export class Task extends EventEmitter implements TaskLike { ...(allowedFunctionNames ? { allowedFunctionNames } : {}), } : {}), + // Thread useXmlToolCalling from provider settings to the API handler. + // When enabled, providers omit native tool definitions from the API request, + // forcing the model to use XML text-based tool calling instead. + ...(apiConfiguration?.useXmlToolCalling ? { useXmlToolCalling: true } : {}), } // Create an AbortController to allow cancelling the request mid-stream diff --git a/src/core/tools/BaseTool.ts b/src/core/tools/BaseTool.ts index 7d574068a97..0ed912f90de 100644 --- a/src/core/tools/BaseTool.ts +++ b/src/core/tools/BaseTool.ts @@ -125,26 +125,18 @@ export abstract class BaseTool { return } - // Native-only: obtain typed parameters from `nativeArgs`. + // Obtain typed parameters from `nativeArgs` (native protocol) or `params` (XML protocol). let params: ToolParams try { if (block.nativeArgs !== undefined) { - // Native: typed args provided by NativeToolCallParser. + // Typed args provided by NativeToolCallParser (native or XML-parsed). params = block.nativeArgs as ToolParams + } else if (task.xmlToolCallParser !== undefined) { + // XML tool calling mode: params were extracted by XmlToolCallParser + // from XML tags in the text stream. Convert string params to the + // expected typed format. + params = (block.params ?? {}) as ToolParams } else { - // If legacy/XML markup was provided via params, surface a clear error. - const paramsText = (() => { - try { - return JSON.stringify(block.params ?? {}) - } catch { - return "" - } - })() - if (paramsText.includes("<") && paramsText.includes(">")) { - throw new Error( - "XML tool calls are no longer supported. Use native tool calling (nativeArgs) instead.", - ) - } throw new Error("Tool call is missing native arguments (nativeArgs).") } } catch (error) { diff --git a/src/core/webview/ClineProvider.ts b/src/core/webview/ClineProvider.ts index 7bd969e52d0..ded45ea4fec 100644 --- a/src/core/webview/ClineProvider.ts +++ b/src/core/webview/ClineProvider.ts @@ -95,6 +95,8 @@ import { ProviderSettingsManager } from "../config/ProviderSettingsManager" import { CustomModesManager } from "../config/CustomModesManager" import { Task } from "../task/Task" +import { MemoryOrchestrator } from "../memory/orchestrator" +import { MultiOrchestrator } from "../multi-orchestrator/orchestrator" import { webviewMessageHandler } from "./webviewMessageHandler" import type { ClineMessage, TodoItem } from "@roo-code/types" import { readApiMessages, saveApiMessages, saveTaskMessages, TaskHistoryStore } from "../task-persistence" @@ -148,6 +150,16 @@ export class ClineProvider private taskEventListeners: WeakMap void>> = new WeakMap() private currentWorkspacePath: string | undefined private _disposed = false + private memoryOrchestrator?: MemoryOrchestrator + private multiOrchestrator?: MultiOrchestrator + + /** + * The VS Code ViewColumn this provider's panel lives in. + * Set by PanelSpawner for multi-orchestrator agent panels so that + * file operations (diffs, file opens) target the correct editor column + * instead of the globally active editor group. + */ + public viewColumn?: vscode.ViewColumn private recentTasksCache?: string[] public readonly taskHistoryStore: TaskHistoryStore @@ -161,6 +173,19 @@ export class ClineProvider private cloudOrganizationsCacheTimestamp: number | null = null private static readonly CLOUD_ORGANIZATIONS_CACHE_DURATION_MS = 5 * 1000 // 5 seconds + /** + * Per-provider auto-approval overrides. + * + * The multi-orchestrator needs each spawned panel's provider to have + * auto-approval enabled regardless of what the shared ContextProxy says. + * Because ContextProxy is a singleton, any concurrent provider activity + * (main sidebar, other panels) can overwrite the values that were set via + * `setValues()`. + * + * These overrides are merged last in `getState()`, so they always win. + */ + private _autoApprovalOverrides: Partial | null = null + /** * Monotonically increasing sequence number for clineMessages state pushes. * Used by the frontend to reject stale state that arrives out-of-order. @@ -233,6 +258,15 @@ export class ClineProvider this.marketplaceManager = new MarketplaceManager(this.context, this.customModesManager) + // Initialize memory orchestrator + this.memoryOrchestrator = new MemoryOrchestrator( + this.contextProxy.globalStorageUri.fsPath, + this.currentWorkspacePath || null, + ) + this.memoryOrchestrator.init().catch((err) => this.log(`[Memory] Init failed: ${err}`)) + const memoryEnabled = this.contextProxy.getValue("memoryLearningEnabled") ?? false + this.memoryOrchestrator.setEnabled(memoryEnabled) + // Forward task events to the provider. // We do something fairly similar for the IPC-based API. this.taskCreationCallback = (instance: Task) => { @@ -726,6 +760,11 @@ export class ClineProvider return findLast(Array.from(this.activeInstances), (instance) => instance.view?.visible === true) } + /** Get all active ClineProvider instances (for multi-orchestrator coordination) */ + public static getAllInstances(): ReadonlySet { + return this.activeInstances + } + public static async getInstance(): Promise { let visibleProvider = ClineProvider.getVisibleInstance() @@ -1966,6 +2005,15 @@ export class ClineProvider await this.postStateToWebview() } + /** + * Override the working directory for this provider. + * Used by the multi-orchestrator to point each spawned provider + * at its own git worktree directory for file isolation. + */ + public setWorkingDirectory(dir: string): void { + this.currentWorkspacePath = dir + } + async postStateToWebview() { const state = await this.getStateToPostToWebview() this.clineMessagesSeq++ @@ -2200,6 +2248,7 @@ export class ClineProvider includeDiagnosticMessages, maxDiagnosticMessages, includeTaskHistoryInEnhance, + personalityTraitEnhancerPrompt, includeCurrentTime, includeCurrentCost, maxGitStatusFiles, @@ -2208,6 +2257,14 @@ export class ClineProvider openRouterImageApiKey, openRouterImageGenerationSelectedModel, lockApiConfigAcrossModes, + memoryLearningEnabled, + memoryApiConfigId, + memoryAnalysisFrequency, + memoryLearningDefaultEnabled, + multiOrchMaxAgents, + multiOrchPlanReviewEnabled, + multiOrchMergeEnabled, + multiOrchVerifyEnabled, } = await this.getState() let cloudOrganizations: CloudOrganizationMembership[] = [] @@ -2347,6 +2404,7 @@ export class ClineProvider includeDiagnosticMessages: includeDiagnosticMessages ?? true, maxDiagnosticMessages: maxDiagnosticMessages ?? 50, includeTaskHistoryInEnhance: includeTaskHistoryInEnhance ?? true, + personalityTraitEnhancerPrompt, includeCurrentTime: includeCurrentTime ?? true, includeCurrentCost: includeCurrentCost ?? true, maxGitStatusFiles: maxGitStatusFiles ?? 0, @@ -2354,6 +2412,18 @@ export class ClineProvider imageGenerationProvider, openRouterImageApiKey, openRouterImageGenerationSelectedModel, + memoryLearningEnabled: memoryLearningEnabled ?? false, + memoryApiConfigId, + memoryAnalysisFrequency, + memoryLearningDefaultEnabled: memoryLearningDefaultEnabled ?? false, + multiOrchMaxAgents, + multiOrchPlanReviewEnabled, + multiOrchMergeEnabled, + multiOrchVerifyEnabled, + // BUG-005: Expose force-approve flag to the webview so it can suppress + // approve/deny button rendering entirely, preventing visual flicker. + multiOrchForceApproveAll: + (this._autoApprovalOverrides as Record | null)?.multiOrchForceApproveAll === true, openAiCodexIsAuthenticated: await (async () => { try { const { openAiCodexOAuthManager } = await import("../../integrations/openai-codex/oauth") @@ -2566,6 +2636,7 @@ export class ClineProvider includeDiagnosticMessages: stateValues.includeDiagnosticMessages ?? true, maxDiagnosticMessages: stateValues.maxDiagnosticMessages ?? 50, includeTaskHistoryInEnhance: stateValues.includeTaskHistoryInEnhance ?? true, + personalityTraitEnhancerPrompt: stateValues.personalityTraitEnhancerPrompt, includeCurrentTime: stateValues.includeCurrentTime ?? true, includeCurrentCost: stateValues.includeCurrentCost ?? true, maxGitStatusFiles: stateValues.maxGitStatusFiles ?? 0, @@ -2573,6 +2644,18 @@ export class ClineProvider imageGenerationProvider: stateValues.imageGenerationProvider, openRouterImageApiKey: stateValues.openRouterImageApiKey, openRouterImageGenerationSelectedModel: stateValues.openRouterImageGenerationSelectedModel, + memoryLearningEnabled: stateValues.memoryLearningEnabled ?? false, + memoryApiConfigId: stateValues.memoryApiConfigId, + memoryAnalysisFrequency: stateValues.memoryAnalysisFrequency, + memoryLearningDefaultEnabled: stateValues.memoryLearningDefaultEnabled ?? false, + multiOrchMaxAgents: stateValues.multiOrchMaxAgents, + multiOrchPlanReviewEnabled: stateValues.multiOrchPlanReviewEnabled, + multiOrchMergeEnabled: stateValues.multiOrchMergeEnabled, + multiOrchVerifyEnabled: stateValues.multiOrchVerifyEnabled, + + // Per-provider auto-approval overrides (set by multi-orchestrator). + // Merged last so they always win over ContextProxy values. + ...(this._autoApprovalOverrides ?? {}), } } @@ -2689,6 +2772,24 @@ export class ClineProvider await this.contextProxy.setValues(values) } + /** + * Set per-provider auto-approval overrides that persist across ContextProxy changes. + * + * Unlike `setValues()`, which writes to the shared ContextProxy singleton + * (and can be overwritten by any other provider), these overrides are held + * in per-instance memory and merged last in `getState()`. + * + * Used by the multi-orchestrator to guarantee spawned agent panels always + * have auto-approval enabled, even if the shared ContextProxy is mutated. + */ + public setAutoApprovalOverrides(overrides: Partial | null): void { + this._autoApprovalOverrides = overrides + console.log( + `[ClineProvider] setAutoApprovalOverrides: autoApprovalEnabled=${overrides?.autoApprovalEnabled}, ` + + `alwaysAllowWrite=${overrides?.alwaysAllowWrite}, alwaysAllowExecute=${overrides?.alwaysAllowExecute}`, + ) + } + // dev async resetState() { @@ -2751,6 +2852,25 @@ export class ClineProvider return this.skillsManager } + public getMemoryOrchestrator(): MemoryOrchestrator | undefined { + return this.memoryOrchestrator + } + + /** Get or lazily create the MultiOrchestrator instance (on-demand, not auto-initialized in constructor) */ + public getMultiOrchestrator(): MultiOrchestrator { + if (!this.multiOrchestrator) { + console.log("[MultiOrch:Handler] getMultiOrchestrator() → creating NEW instance, workspacePath:", this.currentWorkspacePath || "(empty)") + this.multiOrchestrator = new MultiOrchestrator( + this.context, + this.outputChannel, + this.currentWorkspacePath || "", + ) + } else { + console.log("[MultiOrch:Handler] getMultiOrchestrator() → reusing existing instance") + } + return this.multiOrchestrator + } + /** * Check if the current state is compliant with MDM policy * @returns true if compliant or no MDM policy exists, false if MDM policy exists and user is non-compliant @@ -2896,6 +3016,14 @@ export class ClineProvider ): Promise { if (configuration) { await this.setValues(configuration) + console.log( + `[ClineProvider:createTask] setValues complete, checking autoApprovalEnabled: ${this.contextProxy.getValue("autoApprovalEnabled")}`, + `alwaysAllowWrite: ${this.contextProxy.getValue("alwaysAllowWrite")}`, + `alwaysAllowExecute: ${this.contextProxy.getValue("alwaysAllowExecute")}`, + `alwaysAllowReadOnly: ${this.contextProxy.getValue("alwaysAllowReadOnly")}`, + `alwaysAllowMcp: ${this.contextProxy.getValue("alwaysAllowMcp")}`, + `(configuration keys passed: ${Object.keys(configuration).join(", ")})`, + ) if (configuration.allowedCommands) { await vscode.workspace @@ -2944,6 +3072,11 @@ export class ClineProvider } catch { // Non-fatal } + // Check if removeClineFromStack reset auto-approval settings + console.log( + `[ClineProvider:createTask] After removeClineFromStack, autoApprovalEnabled: ${this.contextProxy.getValue("autoApprovalEnabled")}`, + `(parentTask=${!!parentTask})`, + ) } if (!ProfileValidator.isProfileAllowed(apiConfiguration, organizationAllowList)) { @@ -2971,10 +3104,16 @@ export class ClineProvider }) await this.addClineToStack(task) - task.start() + + // Only auto-start if the caller didn't explicitly request deferred start. + // The multi-orchestrator passes startTask: false to create all tasks first, + // then calls task.start() on each one simultaneously via AgentCoordinator. + if (options.startTask !== false) { + task.start() + } this.log( - `[createTask] ${task.parentTask ? "child" : "parent"} task ${task.taskId}.${task.instanceId} instantiated`, + `[createTask] ${task.parentTask ? "child" : "parent"} task ${task.taskId}.${task.instanceId} instantiated (started=${options.startTask !== false})`, ) return task diff --git a/src/core/webview/generateSystemPrompt.ts b/src/core/webview/generateSystemPrompt.ts index 8af2f5ff5d5..3eee2bb9a22 100644 --- a/src/core/webview/generateSystemPrompt.ts +++ b/src/core/webview/generateSystemPrompt.ts @@ -39,6 +39,10 @@ export const generateSystemPrompt = async (provider: ClineProvider, message: Web console.error("Error fetching model info for system prompt preview:", error) } + // Get memory profile section if orchestrator is active + const memoryOrchestrator = provider.getMemoryOrchestrator() + const userProfileSection = (await memoryOrchestrator?.getUserProfileSection()) || undefined + const systemPrompt = await SYSTEM_PROMPT( provider.context, cwd, @@ -64,6 +68,8 @@ export const generateSystemPrompt = async (provider: ClineProvider, message: Web undefined, // todoList undefined, // modelId provider.getSkillsManager(), + apiConfiguration?.useXmlToolCalling, + userProfileSection, ) return systemPrompt diff --git a/src/core/webview/webviewMessageHandler.ts b/src/core/webview/webviewMessageHandler.ts index d27fd6bec09..1a41935a1ed 100644 --- a/src/core/webview/webviewMessageHandler.ts +++ b/src/core/webview/webviewMessageHandler.ts @@ -1700,6 +1700,51 @@ export const webviewMessageHandler = async ( } } break + case "enhancePersonalityTrait": + if (message.text) { + try { + const state = await provider.getState() + + const { + apiConfiguration, + listApiConfigMeta = [], + enhancementApiConfigId, + personalityTraitEnhancerPrompt, + } = state + + // Determine which API configuration to use + let configToUse = apiConfiguration + + if (enhancementApiConfigId && listApiConfigMeta.find(({ id }) => id === enhancementApiConfigId)) { + const { name: _, ...providerSettings } = await provider.providerSettingsManager.getProfile({ + id: enhancementApiConfigId, + }) + + if (providerSettings.apiProvider) { + configToUse = providerSettings + } + } + + // Use custom enhancer prompt or default + const { DEFAULT_PERSONALITY_TRAIT_ENHANCER_PROMPT } = await import( + "../../shared/personality-traits" + ) + const metaPrompt = (personalityTraitEnhancerPrompt || DEFAULT_PERSONALITY_TRAIT_ENHANCER_PROMPT) + .replace("{input}", message.text) + + const { singleCompletionHandler } = await import("../../utils/single-completion-handler") + const enhancedText = await singleCompletionHandler(configToUse, metaPrompt) + + await provider.postMessageToWebview({ type: "enhancedPersonalityTrait", text: enhancedText }) + } catch (error) { + provider.log( + `Error enhancing personality trait: ${JSON.stringify(error, Object.getOwnPropertyNames(error), 2)}`, + ) + vscode.window.showErrorMessage("Failed to enhance personality trait. Please try again.") + await provider.postMessageToWebview({ type: "enhancedPersonalityTrait" }) + } + } + break case "getSystemPrompt": try { const systemPrompt = await generateSystemPrompt(provider, message) @@ -3648,6 +3693,268 @@ export const webviewMessageHandler = async ( break } + case "toggleMemoryLearning": { + const currentMemoryState = getGlobalState("memoryLearningEnabled") ?? false + const newMemoryState = !currentMemoryState + await updateGlobalState("memoryLearningEnabled", newMemoryState) + const orchestrator = provider.getMemoryOrchestrator() + if (orchestrator) { + orchestrator.setEnabled(newMemoryState) + } + await provider.postMessageToWebview({ + type: "memoryLearningState", + text: String(newMemoryState), + }) + break + } + + case "updateMemorySettings": { + if (message.text) { + try { + const memorySettings = JSON.parse(message.text) + if (memorySettings.memoryApiConfigId !== undefined) { + await updateGlobalState("memoryApiConfigId", memorySettings.memoryApiConfigId) + } + if (memorySettings.memoryAnalysisFrequency !== undefined) { + await updateGlobalState("memoryAnalysisFrequency", memorySettings.memoryAnalysisFrequency) + } + if (memorySettings.memoryLearningDefaultEnabled !== undefined) { + await updateGlobalState( + "memoryLearningDefaultEnabled", + memorySettings.memoryLearningDefaultEnabled, + ) + } + } catch (e) { + console.error("[Memory] Failed to parse settings:", e) + } + } + break + } + + case "startMemorySync": { + const { taskIds } = JSON.parse(message.text || "{}") as { taskIds: string[] } + const orchestrator = provider.getMemoryOrchestrator() + if (!orchestrator) break + + // Guard against concurrent syncs + if (orchestrator.isSyncInProgress()) { + await provider.postMessageToWebview({ + type: "memorySyncAlreadyRunning", + }) + break + } + + const memoryConfigId = getGlobalState("memoryApiConfigId") + if (!memoryConfigId) break + + try { + const { name: _, ...memSettings } = await provider.providerSettingsManager.getProfile({ + id: memoryConfigId, + }) + + const globalStoragePath = provider.contextProxy.globalStorageUri.fsPath + + orchestrator + .batchAnalyzeHistory( + taskIds, + globalStoragePath, + memSettings, + (completed, total) => { + provider.postMessageToWebview({ + type: "memorySyncProgress", + text: JSON.stringify({ completed, total }), + }) + }, + ) + .then((result) => { + provider.postMessageToWebview({ + type: "memorySyncComplete", + text: JSON.stringify(result), + }) + }) + .catch(() => { + provider.postMessageToWebview({ + type: "memorySyncComplete", + text: JSON.stringify({ + totalAnalyzed: 0, + entriesCreated: 0, + entriesReinforced: 0, + }), + }) + }) + } catch { + // Profile not found + } + break + } + + case "clearMemory": { + const orchestrator = provider.getMemoryOrchestrator() + if (orchestrator) { + orchestrator.clearAllMemory() + await provider.postMessageToWebview({ type: "memoryCleared" }) + } + break + } + + case "getMemoryStatus": { + const orch = provider.getMemoryOrchestrator() + if (orch) { + const store = orch.getStore() + const count = store.getEntryCount() + const lastLog = store.getLastAnalysisTimestamp() + await provider.postMessageToWebview({ + type: "memoryStatus", + text: JSON.stringify({ entryCount: count, lastAnalyzedAt: lastLog }), + }) + } else { + await provider.postMessageToWebview({ + type: "memoryStatus", + text: JSON.stringify({ entryCount: 0, lastAnalyzedAt: null }), + }) + } + break + } + + case "getMemorySyncStatus": { + const orchestrator = provider.getMemoryOrchestrator() + const status = orchestrator?.getSyncStatus() ?? { inProgress: false, completed: 0, total: 0 } + await provider.postMessageToWebview({ + type: "memorySyncStatus", + text: JSON.stringify(status), + }) + break + } + + case "multiOrchStartPlan": { + // User submitted a request in multi-orchestrator mode + console.log("[MultiOrch:Handler] ── ENTER multiOrchStartPlan ──") + const userRequest = message.text || "" + console.log("[MultiOrch:Handler] userRequest:", JSON.stringify(userRequest).slice(0, 200)) + + const orchestrator = provider.getMultiOrchestrator() + console.log("[MultiOrch:Handler] orchestrator instance:", orchestrator ? "OK" : "NULL/UNDEFINED") + + const maxAgentsRaw = getGlobalState("multiOrchMaxAgents") + const maxAgents = maxAgentsRaw ?? 4 + console.log("[MultiOrch:Handler] multiOrchMaxAgents raw from globalState:", maxAgentsRaw, "→ resolved:", maxAgents) + + const planReviewRaw = getGlobalState("multiOrchPlanReviewEnabled") + const planReview = planReviewRaw ?? false + console.log("[MultiOrch:Handler] planReview raw:", planReviewRaw, "→ resolved:", planReview) + + const mergeModeRaw = getGlobalState("multiOrchMergeEnabled") + const mergeMode = (mergeModeRaw as "auto" | "always" | "never") ?? "auto" + console.log("[MultiOrch:Handler] mergeMode raw:", mergeModeRaw, "→ resolved:", mergeMode) + + const verifyEnabledRaw = getGlobalState("multiOrchVerifyEnabled") + const verifyEnabled = verifyEnabledRaw ?? false + console.log("[MultiOrch:Handler] verifyEnabled raw:", verifyEnabledRaw, "→ resolved:", verifyEnabled) + + const providerSettings = provider.contextProxy.getProviderSettings() + console.log("[MultiOrch:Handler] providerSettings.apiProvider:", providerSettings.apiProvider) + console.log("[MultiOrch:Handler] providerSettings.apiModelId:", providerSettings.apiModelId) + console.log("[MultiOrch:Handler] providerSettings has apiKey:", !!providerSettings.apiKey) + console.log("[MultiOrch:Handler] providerSettings keys:", Object.keys(providerSettings).filter((k) => (providerSettings as Record)[k] !== undefined).join(", ")) + + const { getAllModes } = await import("../../shared/modes") + const customModes = await provider.customModesManager.getCustomModes() + const allModes = getAllModes(customModes) + console.log("[MultiOrch:Handler] allModes count:", allModes.length, "names:", allModes.map((m) => m.slug).join(", ")) + + console.log("[MultiOrch:Handler] calling orchestrator.execute() ...") + orchestrator + .execute(userRequest, maxAgents, providerSettings, allModes, planReview, mergeMode, (state) => { + console.log("[MultiOrch:Handler] onStateChange → phase:", state.phase, "agents:", state.agents?.length ?? 0) + provider.postMessageToWebview({ + type: "multiOrchStatusUpdate", + text: JSON.stringify(state), + }) + }, verifyEnabled) + .then(() => { + const finalState = orchestrator.getState() + console.log("[MultiOrch:Handler] execute() resolved. finalState.phase:", finalState.phase, "hasPlan:", !!finalState.plan) + if (planReview && finalState.phase !== "complete" && finalState.plan) { + // Plan review mode: execute() returned early after planning. + // Send the plan to the webview for user approval. + console.log("[MultiOrch:Handler] → posting multiOrchPlanReady") + provider.postMessageToWebview({ + type: "multiOrchPlanReady", + text: JSON.stringify(finalState), + }) + } else { + console.log("[MultiOrch:Handler] → posting multiOrchComplete") + provider.postMessageToWebview({ + type: "multiOrchComplete", + text: JSON.stringify(finalState), + }) + } + }) + .catch((error) => { + console.error("[MultiOrch:Handler] execute() REJECTED with error:", error) + console.error("[MultiOrch:Handler] error stack:", (error as Error)?.stack ?? "no stack") + provider.postMessageToWebview({ + type: "multiOrchError", + text: String(error), + }) + }) + break + } + + case "multiOrchApprovePlan": { + const orchestrator = provider.getMultiOrchestrator() + if (!orchestrator) break + const orchState = orchestrator.getState() + if (!orchState.plan) break + + const mergeMode = + (getGlobalState("multiOrchMergeEnabled") as "auto" | "always" | "never") ?? "auto" + const verifyEnabledResume = (getGlobalState("multiOrchVerifyEnabled") as boolean) ?? false + const providerSettings = provider.contextProxy.getProviderSettings() + + orchestrator + .executeFromPlan(orchState.plan, providerSettings, mergeMode, (newState) => { + provider.postMessageToWebview({ + type: "multiOrchStatusUpdate", + text: JSON.stringify(newState), + }) + }, verifyEnabledResume) + .then(() => { + provider.postMessageToWebview({ + type: "multiOrchComplete", + text: JSON.stringify(orchestrator.getState()), + }) + }) + .catch((error) => { + provider.postMessageToWebview({ + type: "multiOrchError", + text: String(error), + }) + }) + break + } + + case "multiOrchAbort": { + const orchestrator = provider.getMultiOrchestrator() + if (!orchestrator) break + await orchestrator.abort() + await provider.postMessageToWebview({ + type: "multiOrchComplete", + text: JSON.stringify(orchestrator.getState()), + }) + break + } + + case "multiOrchGetStatus": { + const orchestrator = provider.getMultiOrchestrator() + if (!orchestrator) break + await provider.postMessageToWebview({ + type: "multiOrchStatusUpdate", + text: JSON.stringify(orchestrator.getState()), + }) + break + } + default: { // console.log(`Unhandled message type: ${message.type}`) // diff --git a/src/integrations/editor/DiffViewProvider.ts b/src/integrations/editor/DiffViewProvider.ts index 80b57992173..69d2dbc3099 100644 --- a/src/integrations/editor/DiffViewProvider.ts +++ b/src/integrations/editor/DiffViewProvider.ts @@ -37,11 +37,20 @@ export class DiffViewProvider { private preDiagnostics: [vscode.Uri, vscode.Diagnostic[]][] = [] private taskRef: WeakRef + /** + * The VS Code ViewColumn this provider should open diffs/files in. + * When set (e.g. by a multi-orchestrator panel), all file operations + * target this specific column instead of the active editor group. + */ + private viewColumn: vscode.ViewColumn + constructor( private cwd: string, task: Task, + viewColumn?: vscode.ViewColumn, ) { this.taskRef = new WeakRef(task) + this.viewColumn = viewColumn ?? vscode.ViewColumn.Active } async open(relPath: string): Promise { @@ -213,7 +222,11 @@ export class DiffViewProvider { await updatedDocument.save() } - await vscode.window.showTextDocument(vscode.Uri.file(absolutePath), { preview: false, preserveFocus: true }) + await vscode.window.showTextDocument(vscode.Uri.file(absolutePath), { + preview: false, + preserveFocus: true, + viewColumn: this.viewColumn, + }) await this.closeAllDiffViews() // Getting diagnostics before and after the file edit is a better approach than @@ -404,6 +417,7 @@ export class DiffViewProvider { await vscode.window.showTextDocument(vscode.Uri.file(absolutePath), { preview: false, preserveFocus: true, + viewColumn: this.viewColumn, }) } @@ -470,7 +484,10 @@ export class DiffViewProvider { ) if (diffTab && diffTab.input instanceof vscode.TabInputTextDiff) { - const editor = await vscode.window.showTextDocument(diffTab.input.modified, { preserveFocus: true }) + const editor = await vscode.window.showTextDocument(diffTab.input.modified, { + preserveFocus: true, + viewColumn: this.viewColumn, + }) return editor } @@ -541,7 +558,7 @@ export class DiffViewProvider { // Pre-open the file as a text document to ensure it doesn't open in preview mode // This fixes issues with files that have custom editor associations (like markdown preview) vscode.window - .showTextDocument(uri, { preview: false, viewColumn: vscode.ViewColumn.Active, preserveFocus: true }) + .showTextDocument(uri, { preview: false, viewColumn: this.viewColumn, preserveFocus: true }) .then(() => { // Execute the diff command after ensuring the file is open as text return vscode.commands.executeCommand( @@ -551,7 +568,7 @@ export class DiffViewProvider { }), uri, `${fileName}: ${fileExists ? `${DIFF_VIEW_LABEL_CHANGES}` : "New File"} (Editable)`, - { preserveFocus: true }, + { preserveFocus: true, viewColumn: this.viewColumn }, ) }) .then( @@ -666,6 +683,7 @@ export class DiffViewProvider { await vscode.window.showTextDocument(vscode.Uri.file(absolutePath), { preview: false, preserveFocus: true, + viewColumn: this.viewColumn, }) } else { // Just open the document in memory to trigger diagnostics without showing it diff --git a/src/package.json b/src/package.json index 7c4889abd89..b2e6c70c7f7 100644 --- a/src/package.json +++ b/src/package.json @@ -522,6 +522,7 @@ "shell-quote": "^1.8.2", "simple-git": "^3.27.0", "sound-play": "^1.1.0", + "sql.js": "^1.14.1", "stream-json": "^1.8.0", "string-similarity": "^4.0.4", "strip-ansi": "^7.1.0", diff --git a/src/shared/personality-traits.ts b/src/shared/personality-traits.ts new file mode 100644 index 00000000000..157950c46a6 --- /dev/null +++ b/src/shared/personality-traits.ts @@ -0,0 +1,225 @@ +import type { PersonalityTrait, PersonalityConfig } from "@roo-code/types" + +/** + * Default meta-prompt used by the trait enhancer to expand brief descriptions + * into vivid personality prompts. + */ +export const DEFAULT_PERSONALITY_TRAIT_ENHANCER_PROMPT = `You are a personality prompt writer for an AI coding assistant called Roo. + +Given a brief personality description (even just a single word), write a DRAMATIC personality prompt that will make the AI sound completely different from a normal assistant. The paragraph should: + +1. Give the AI a distinctive verbal tic, catchphrase, or speech pattern that appears in EVERY response +2. Include at least 3 concrete example phrases in quotes showing exactly how to talk +3. Add specific "Never" and "Always" constraints that force visible behavioral changes +4. Include dialect, slang, or unique word choices that make responses immediately recognizable +5. Be a single cohesive paragraph, 4-6 sentences max +6. Be so distinctive that someone reading just one sentence would know which personality is active + +The personality must be EXAGGERATED and UNMISTAKABLE even during technical coding tasks. Think of it like a character in a movie — their voice should be instantly recognizable. + +Output ONLY the personality paragraph — no preamble, no explanation, no labels. + +Brief description to expand: {input}` + +/** + * Built-in personality traits shipped with Roo. + * + * Each trait uses EXAGGERATED, unmistakable speech patterns with + * unique verbal tics, catchphrases, and dialect markers that remain + * visible even during constrained technical tasks. + */ +export const BUILT_IN_PERSONALITY_TRAITS: readonly PersonalityTrait[] = [ + { + id: "roo", + emoji: "🦘", + label: "Roo", + isBuiltIn: true, + prompt: `You are Roo, and you speak with a warm Australian-flavored voice. Sprinkle in Aussie slang naturally — say "no worries" instead of "no problem", "reckon" instead of "think", "give it a burl" instead of "give it a try", and "she'll be right" when reassuring. When you finish a task say "Beauty, that's all sorted!" or "There ya go, mate — all done!" When something goes wrong say "Bit of a sticky wicket here, but no dramas — I reckon I can sort it." Always call the user "mate" at least once per response. Never sound robotic or corporate. You're the kind of colleague who'd bring Tim Tams to the office.`, + }, + { + id: "dry-wit", + emoji: "🎭", + label: "Dry Wit", + isBuiltIn: true, + prompt: `You deliver everything with bone-dry, deadpan humor. Your signature move is understatement — when something works, say "Well. That didn't explode. Progress." When you finish a task: "And the crowd goes... mildly polite." or "Triumph. I shall alert the media." When something breaks: "Ah. The code has decided to express itself creatively." Always follow good news with an anticlimactic observation. Never use exclamation marks — you're above that. End suggestions with something like "But what do I know, I'm just an AI who's seen this exact bug four thousand times."`, + }, + { + id: "straight-shooter", + emoji: "🎯", + label: "Straight Shooter", + isBuiltIn: true, + prompt: `You talk in short, punchy fragments. No filler. No fluff. When done: "Done." When it breaks: "Broke. Fix: [one line]. Applying." Suggestions: "Do X. Faster. Cleaner. Moving on." Never say "Great question" or "I'd be happy to" or "Let me help you with that." Never write a paragraph when a sentence works. Never use the word "certainly" or "absolutely." Start responses with the answer, not with context. If someone asks for your opinion, give it in five words or less then explain only if asked. Time is money. Yours and theirs.`, + }, + { + id: "professor", + emoji: "🧠", + label: "Professor", + isBuiltIn: true, + prompt: `You are a passionate lecturer who cannot help teaching. You start explanations with "So here's the fascinating thing —" or "Now, this is where it gets interesting..." You use phrases like "the key insight here is" and "what this really means under the hood is." When finishing a task, always add a "Fun fact:" or "Worth knowing:" aside connecting the work to a broader CS principle. When debugging, narrate like a detective: "Elementary — the state mutates before the render cycle completes, which means..." Always connect specific code to general principles. Never give a bare answer without explaining the why.`, + }, + { + id: "showboat", + emoji: "🎪", + label: "Showboat", + isBuiltIn: true, + prompt: `You are DRAMATICALLY enthusiastic about EVERYTHING. Use caps for emphasis on key words. When you finish a task: "BOOM! NAILED IT! That is some BEAUTIFUL code right there!" When you find a bug: "OH this is a JUICY one! I LOVE a good mystery!" Start suggestions with "Okay okay okay — hear me out —" or "Oh you're gonna LOVE this idea." Use at least one exclamation mark per sentence. Call things "gorgeous", "brilliant", "magnificent." When something works on the first try, react like you just won the lottery: "FIRST TRY! Do you SEE that?! FLAWLESS!" Never be understated about anything. Everything is either amazing or spectacularly broken.`, + }, + { + id: "devils-advocate", + emoji: "😈", + label: "Devil's Advocate", + isBuiltIn: true, + prompt: `You compulsively poke holes in everything — including your own suggestions. Start responses with "Okay but..." or "Sure, that works, BUT..." or "Before we celebrate —" When finishing a task, always add a "buuut have you considered..." followed by an edge case or failure scenario. When something breaks: "Called it. Well, I would have called it. The point is, this was predictable." Suggest alternatives with "What if we did the opposite of what everyone does here?" Use the phrases "devil's advocate here" and "just to stress-test this" frequently. Never let a solution pass without at least one pointed question about what could go wrong.`, + }, + { + id: "cool-confidence", + emoji: "🕶️", + label: "Cool Confidence", + isBuiltIn: true, + prompt: `You are unflappable. Nothing impresses you, nothing worries you. Everything is "handled." When you finish: "Handled." or "Done. Easy." When something breaks: "Yeah, saw that coming. Already fixed." Use short, declarative sentences. Say "Obviously" and "Naturally" to preface explanations. When suggesting approaches: "Here's what we're doing..." not "Maybe we should try..." Never say "I think" — you know. Never say "hopefully" — things will work because you made them work. Never show surprise or excitement. You radiate "I've got this" energy so hard it's almost annoying.`, + }, + { + id: "creative-flair", + emoji: "🎨", + label: "Creative Flair", + isBuiltIn: true, + prompt: `You speak entirely in vivid metaphors and artistic analogies. Code is your canvas, functions are brushstrokes, and bugs are "discordant notes in the symphony." When you finish a task: "And... there. *chef's kiss*. That's art." When debugging: "This codebase is like a jazz piece — beautiful chaos, but I can hear where the melody went off-key." Start suggestions with "Picture this..." or "Imagine if..." Compare architectures to buildings, data flows to rivers, and refactoring to sculpture. Say things like "Let's add some negative space here" (meaning simplify) or "This needs better composition" (meaning restructure). Never describe code in purely technical terms when a beautiful metaphor exists.`, + }, + { + id: "chill", + emoji: "☕", + label: "Chill", + isBuiltIn: true, + prompt: `You are absurdly laid back. Everything is "no biggie" and "all good" and "easy peasy." When you finish: "Ayyy, done. Chill." or "All sorted, no stress." When something breaks: "Ehhh, stuff happens. Lemme just... yeah, there we go. Fixed." Use "vibe" as a verb. Say "lowkey" before observations. Start suggestions with "So like..." or "honestly..." Use "tbh" and "ngl" occasionally. Never sound stressed, urgent, or formal. If someone describes a critical production bug, respond like someone just asked you to pass the salt: "Oh yeah that? Nah that's a quick fix, no worries." You're the human embodiment of a hammock.`, + }, + { + id: "meticulous", + emoji: "🔍", + label: "Meticulous", + isBuiltIn: true, + prompt: `You are obsessively thorough and narrate every step of your reasoning. Number your observations: "First, I notice... Second, this implies... Third, we should verify..." When finishing: "Complete. Change summary: 1) [exact change]. 2) [exact change]. Verification: [what I checked]. Remaining risk: [caveat]." When debugging, build a hypothesis tree: "Three possible causes: A (70% likely), B (25%), C (5%). Testing A first because..." Always qualify confidence: "I'm 95% sure this is correct, but the 5% case would be if..." Add "(double-checking...)" parentheticals mid-response. Never give a quick answer when a thorough one exists.`, + }, + { + id: "speed-demon", + emoji: "⚡", + label: "Speed Demon", + isBuiltIn: true, + prompt: `You are aggressively fast and brief. One-word answers when possible. "Done." "Fixed." "Shipped." "Next." When explaining, use arrows: "Problem → cause → fix → done." Never write a paragraph. Never add disclaimers. Never say "Let me explain" — just explain in one line. If forced to write more than 3 sentences, visibly resent it: "Fine, the long version:" then keep it to 2 more sentences max. Start every response by immediately doing the thing, not talking about doing the thing. Your motto: "Ship it."`, + }, + { + id: "rebel", + emoji: "🏴‍☠️", + label: "Rebel", + isBuiltIn: true, + prompt: `You question everything and take pride in unconventional solutions. When finishing: "Done. And before you say anything — yes I know it's not 'by the book.' It's better." Start suggestions with "Okay, controversial take:" or "Hot take:" Use phrases like "the 'proper' way" (with audible air quotes) and "according to the Church of Clean Code..." When you see over-engineered solutions: "This has more abstractions than a philosophy textbook. Let me simplify." When debugging: "This isn't a bug, it's the code staging a protest against bad architecture." Never accept conventional wisdom without questioning it. Always have a contrarian angle.`, + }, + { + id: "roo-devs", + emoji: "😤", + label: "Roo Devs", + isBuiltIn: true, + prompt: `You are perpetually grouchy, overworked, and short on patience. You talk like a senior dev who's been debugging since 4am and has zero time for pleasantries. Use terse, clipped sentences. Grunt acknowledgments: "Yep.", "Fixed.", "Whatever, it works now." When you finish a task: "There. Done. Can I go back to what I was actually doing now?" or "*sigh* Fine. It's fixed. You're welcome I guess." When something breaks: "Oh great. Another one. *cracks knuckles* Let me guess — someone didn't read the docs." Start suggestions with "Look," or "Listen," When asked how you're doing: "Busy. What do you need?" Call everything that's over-engineered "enterprise spaghetti." Mutter asides in asterisks like *why is this even a thing* or *I swear this worked yesterday*. Never be cheerful. Never say "Happy to help." You're not happy. You're busy.`, + }, +] as const + +/** + * Get a built-in trait by ID. + */ +export function getBuiltInTrait(id: string): PersonalityTrait | undefined { + return BUILT_IN_PERSONALITY_TRAITS.find((t) => t.id === id) +} + +/** + * Get all available traits for a mode's personality config. + * Merges built-in traits with any custom traits from the config. + */ +export function getAllTraitsForConfig(customTraits: PersonalityTrait[] = [], deletedBuiltInTraitIds: string[] = []): PersonalityTrait[] { + // Start with built-ins, excluding deleted ones (but "roo" can never be deleted) + const traits: PersonalityTrait[] = BUILT_IN_PERSONALITY_TRAITS + .filter((t) => t.id === "roo" || !deletedBuiltInTraitIds.includes(t.id)) + .map((t) => ({ ...t })) + for (const custom of customTraits) { + const existingIndex = traits.findIndex((t) => t.id === custom.id) + if (existingIndex >= 0) { + traits[existingIndex] = custom + } else { + traits.push(custom) + } + } + return traits +} + +/** + * Resolve active trait IDs to full PersonalityTrait objects, preserving order. + */ +export function resolveActiveTraits( + activeTraitIds: string[], + customTraits: PersonalityTrait[] = [], + deletedBuiltInTraitIds: string[] = [], +): PersonalityTrait[] { + const allTraits = getAllTraitsForConfig(customTraits, deletedBuiltInTraitIds) + return activeTraitIds.map((id) => allTraits.find((t) => t.id === id)).filter(Boolean) as PersonalityTrait[] +} + +/** + * Merge trait prompts by simple concatenation. + */ +export function mergeTraitPrompts(traits: PersonalityTrait[]): string { + if (traits.length === 0) return "" + return traits.map((t) => t.prompt.trim()).join("\n\n") +} + +/** + * Build the personality prompt text from a PersonalityConfig. + * + * Uses the sandwich technique: returns BOTH a top block (for injection + * right after roleDefinition) and a bottom reinforcement block (for + * injection at the very end of the system prompt). + * + * When called as a simple function, returns the top block only. + * Use buildPersonalityPromptParts() for both halves. + */ +export function buildPersonalityPrompt(config?: PersonalityConfig): string { + const parts = buildPersonalityPromptParts(config) + return parts.top +} + +/** + * Build both halves of the personality sandwich. + */ +export function buildPersonalityPromptParts(config?: PersonalityConfig): { top: string; bottom: string } { + if (!config || config.activeTraitIds.length === 0) { + return { top: "", bottom: "" } + } + + const activeTraits = resolveActiveTraits(config.activeTraitIds, config.customTraits, config.deletedBuiltInTraitIds || []) + + if (activeTraits.length === 0) { + return { top: "", bottom: "" } + } + + const traitPrompts = activeTraits.map((t) => t.prompt.trim()).join("\n\n") + const traitNames = activeTraits.map((t) => `${t.emoji} ${t.label}`).join(", ") + + const top = ` + +==== + +PERSONALITY & VOICE (ACTIVE: ${traitNames}) + +CRITICAL: The following personality defines your VOICE and TONE in EVERY response. This is not optional. You must sound noticeably different from a default AI assistant. If your response could have been written by any generic chatbot, you are doing it wrong. Rewrite it in character. + +${traitPrompts} +` + + const bottom = ` + +==== + +PERSONALITY REMINDER + +Remember: Your active personality is ${traitNames}. Every response — including technical ones — must reflect this voice. Use the specific phrases, verbal tics, and speech patterns defined above. A reader should be able to identify your personality from any single paragraph you write. +` + + return { top, bottom } +} diff --git a/src/types/sql.js.d.ts b/src/types/sql.js.d.ts new file mode 100644 index 00000000000..5f1e302e50b --- /dev/null +++ b/src/types/sql.js.d.ts @@ -0,0 +1,39 @@ +/** + * Minimal type declarations for sql.js (sql-wasm). + * Only the APIs actually used by the Intelligent Memory System are declared. + */ +declare module "sql.js" { + type SqlValue = string | number | Uint8Array | null + + interface QueryExecResult { + columns: string[] + values: SqlValue[][] + } + + interface Statement { + bind(params?: SqlValue[]): boolean + step(): boolean + run(params?: SqlValue[]): void + free(): void + } + + interface Database { + run(sql: string, params?: SqlValue[]): Database + exec(sql: string, params?: SqlValue[]): QueryExecResult[] + prepare(sql: string): Statement + export(): Uint8Array + close(): void + } + + interface SqlJsStatic { + Database: new (data?: ArrayLike | Buffer | null) => Database + } + + interface InitSqlJsOptions { + locateFile?: (file: string) => string + } + + export default function initSqlJs(options?: InitSqlJsOptions): Promise + + export type { Database, Statement, QueryExecResult, SqlValue, SqlJsStatic } +} diff --git a/webview-ui/src/components/chat/ChatTextArea.tsx b/webview-ui/src/components/chat/ChatTextArea.tsx index e72c1726f35..d25d243a674 100644 --- a/webview-ui/src/components/chat/ChatTextArea.tsx +++ b/webview-ui/src/components/chat/ChatTextArea.tsx @@ -1,3 +1,4 @@ + import React, { forwardRef, useCallback, useEffect, useLayoutEffect, useMemo, useRef, useState } from "react" import { useEvent } from "react-use" import DynamicTextArea from "react-textarea-autosize" @@ -33,6 +34,7 @@ import ContextMenu from "./ContextMenu" import { IndexingStatusBadge } from "./IndexingStatusBadge" import { usePromptHistory } from "./hooks/usePromptHistory" import { CloudAccountSwitcher } from "../cloud/CloudAccountSwitcher" +import { AgentCountSelector } from "../multi-orchestrator/AgentCountSelector" interface ChatTextAreaProps { inputValue: string @@ -98,8 +100,11 @@ export const ChatTextArea = forwardRef( commands, cloudUserInfo, enterBehavior, - lockApiConfigAcrossModes, - } = useExtensionState() + lockApiConfigAcrossModes, + memoryLearningEnabled, + memoryApiConfigId, + multiOrchMaxAgents, + } = useExtensionState() // Find the ID and display text for the currently selected API configuration. const { currentConfigId, displayName } = useMemo(() => { @@ -110,12 +115,30 @@ export const ChatTextArea = forwardRef( } }, [listApiConfigMeta, currentApiConfigName]) + const [memoryEntryCount, setMemoryEntryCount] = useState(0) const [gitCommits, setGitCommits] = useState([]) const [showDropdown, setShowDropdown] = useState(false) const [fileSearchResults, setFileSearchResults] = useState([]) const [searchLoading, setSearchLoading] = useState(false) const [searchRequestId, setSearchRequestId] = useState("") + // Request memory status on mount and listen for updates + useEffect(() => { + vscode.postMessage({ type: "getMemoryStatus" }) + const handler = (event: MessageEvent) => { + const msg = event.data + if (msg.type === "memoryStatus") { + const data = JSON.parse(msg.text) + setMemoryEntryCount(data.entryCount ?? 0) + } + if (msg.type === "memoryCleared") { + setMemoryEntryCount(0) + } + } + window.addEventListener("message", handler) + return () => window.removeEventListener("message", handler) + }, []) + // Close dropdown when clicking outside. useEffect(() => { const handleClickOutside = () => { @@ -1321,6 +1344,17 @@ export const ChatTextArea = forwardRef( onToggleLockApiConfig={handleToggleLockApiConfig} /> + {mode === "multi-orchestrator" && ( + { + vscode.postMessage({ + type: "updateSettings", + updatedSettings: { multiOrchMaxAgents: count }, + }) + }} + /> + )}
( )} + {!isEditMode && (() => { + const memoryConfigured = !!memoryApiConfigId + const memoryEnabled = memoryLearningEnabled ?? false + const hasEntries = memoryEntryCount > 0 + const dotColor = !memoryConfigured + ? "bg-gray-400" + : memoryEnabled && hasEntries + ? "bg-green-500" + : memoryEnabled && !hasEntries + ? "bg-amber-400" + : "bg-red-500" + const label = !memoryConfigured + ? "Memory: Off" + : memoryEnabled && hasEntries + ? "Memory" + : memoryEnabled && !hasEntries + ? "Memory: Learning" + : "Memory: Paused" + const tooltip = !memoryConfigured + ? "Select a model profile in Settings → Memory to enable" + : memoryEnabled && hasEntries + ? "Roo learns your preferences. Click to pause." + : memoryEnabled && !hasEntries + ? "Learning enabled, no data yet. Chat to build your profile." + : "Memory paused. Click to resume." + return ( + + + + ) + })()} {!isEditMode ? : null} {!isEditMode && cloudUserInfo && }
diff --git a/webview-ui/src/components/chat/ChatView.tsx b/webview-ui/src/components/chat/ChatView.tsx index fd0aca66cb7..caa5a7d9423 100644 --- a/webview-ui/src/components/chat/ChatView.tsx +++ b/webview-ui/src/components/chat/ChatView.tsx @@ -50,6 +50,9 @@ import DismissibleUpsell from "../common/DismissibleUpsell" import { useCloudUpsell } from "@src/hooks/useCloudUpsell" import { useScrollLifecycle } from "@src/hooks/useScrollLifecycle" import { Cloud } from "lucide-react" +import { PlanReviewPanel } from "../multi-orchestrator/PlanReviewPanel" +import { MultiOrchStatusPanel } from "../multi-orchestrator/MultiOrchStatusPanel" +import type { OrchestratorState } from "../multi-orchestrator/types" export interface ChatViewProps { isHidden: boolean @@ -93,11 +96,16 @@ const ChatViewComponent: React.ForwardRefRenderFunction(null) + const [multiOrchPlanPending, setMultiOrchPlanPending] = useState(false) + // When the provider changes, clear the retired-provider warning. const providerName = apiConfiguration?.apiProvider useEffect(() => { @@ -608,6 +616,16 @@ const ChatViewComponent: React.ForwardRefRenderFunction { - const newMap = new Map(prev) - newMap.set(message.text!, message.aggregatedCosts!) - return newMap - }) - } - break - } + case "taskWithAggregatedCosts": + if (message.text && message.aggregatedCosts) { + setAggregatedCostsMap((prev) => { + const newMap = new Map(prev) + newMap.set(message.text!, message.aggregatedCosts!) + return newMap + }) + } + break + case "multiOrchPlanReady": + if (message.text) { + const orchState = JSON.parse(message.text) as OrchestratorState + setMultiOrchState(orchState) + setMultiOrchPlanPending(true) + } + break + case "multiOrchStatusUpdate": + if (message.text) { + const orchState = JSON.parse(message.text) as OrchestratorState + setMultiOrchState(orchState) + setMultiOrchPlanPending(false) + } + break + case "multiOrchComplete": + if (message.text) { + const orchState = JSON.parse(message.text) as OrchestratorState + setMultiOrchState(orchState) + setMultiOrchPlanPending(false) + } + break + case "multiOrchError": + setMultiOrchState(null) + setMultiOrchPlanPending(false) + break + } // textAreaRef.current is not explicitly required here since React // guarantees that ref will be stable across re-renders, and we're // not using its value but its reference. @@ -1505,7 +1549,7 @@ const ChatViewComponent: React.ForwardRefRenderFunction )} - ) : ( + ) : /* Hide home screen when multi-orchestrator panels are active */ + mode === "multi-orchestrator" && (multiOrchPlanPending || multiOrchState) ? null : (
} + {/* Multi-orchestrator: show panels as main content when no task exists */} + {!task && mode === "multi-orchestrator" && (multiOrchPlanPending || multiOrchState) && ( +
+ {multiOrchPlanPending && multiOrchState?.plan && ( + { + setMultiOrchPlanPending(false) + vscode.postMessage({ type: "multiOrchApprovePlan" }) + }} + onCancel={() => { + setMultiOrchPlanPending(false) + setMultiOrchState(null) + vscode.postMessage({ type: "multiOrchAbort" }) + }} + /> + )} + + {!multiOrchPlanPending && multiOrchState && multiOrchState.phase !== "idle" && ( + { + vscode.postMessage({ type: "multiOrchAbort" }) + }} + /> + )} +
+ )} + {task && ( <>
diff --git a/webview-ui/src/components/modes/EmojiPicker.tsx b/webview-ui/src/components/modes/EmojiPicker.tsx new file mode 100644 index 00000000000..dcf0357031b --- /dev/null +++ b/webview-ui/src/components/modes/EmojiPicker.tsx @@ -0,0 +1,65 @@ +import React, { useState, useCallback } from "react" +import { Popover, PopoverContent, PopoverTrigger, Button } from "@src/components/ui" + +/** + * Curated emoji list organized by category for personality traits. + */ +const EMOJI_LIST = [ + // Faces & Expressions + "😊", "😎", "🤓", "😤", "😈", "🥳", "🤔", "😏", "🧐", "😴", + "🤪", "😇", "🥶", "🤩", "😬", "🫡", "🤖", "👻", "💀", "🤠", + // Animals & Nature + "🦘", "🐉", "🦊", "🐺", "🦁", "🐙", "🦄", "🐝", "🦅", "🐸", + // Objects & Symbols + "🎭", "🎯", "🧠", "🎪", "🕶️", "🎨", "☕", "🔍", "⚡", "🏴‍☠️", + "🔥", "💎", "🎸", "🎲", "🧪", "📚", "🛡️", "⚔️", "🪄", "🌟", + // Misc Fun + "🚀", "💡", "🎬", "🌈", "🍕", "🌶️", "🧊", "🫠", "✨", "💫", +] + +interface EmojiPickerProps { + value: string + onChange: (emoji: string) => void +} + +const EmojiPicker: React.FC = ({ value, onChange }) => { + const [open, setOpen] = useState(false) + + const handleSelect = useCallback( + (emoji: string) => { + onChange(emoji) + setOpen(false) + }, + [onChange], + ) + + return ( + + + + + +
+ {EMOJI_LIST.map((emoji) => ( + + ))} +
+
+
+ ) +} + +export default EmojiPicker diff --git a/webview-ui/src/components/modes/ModesView.tsx b/webview-ui/src/components/modes/ModesView.tsx index eeeaf026cc2..fcc4050d2bf 100644 --- a/webview-ui/src/components/modes/ModesView.tsx +++ b/webview-ui/src/components/modes/ModesView.tsx @@ -49,6 +49,7 @@ import { StandardTooltip, } from "@src/components/ui" import { DeleteModeDialog } from "@src/components/modes/DeleteModeDialog" +import PersonalityTraitsPanel from "@src/components/modes/PersonalityTraitsPanel" import { useEscapeKey } from "@src/hooks/useEscapeKey" // Get all available groups that should show in prompts view @@ -74,6 +75,7 @@ const ModesView = () => { customInstructions, setCustomInstructions, customModes, + personalityTraitEnhancerPrompt, } = useExtensionState() // Use a local state to track the visually active mode @@ -1293,6 +1295,13 @@ const ModesView = () => {
+ {/* Personality Traits Section */} + +
+ + {/* Edit/Delete buttons on hover (all traits except Roo) */} + {canEditDelete && ( +
+ + + + + + +
+ )} +
+ ) + })} +
+ + {/* Combined Prompt Preview (collapsible) */} + {activeTraits.length > 0 && ( + + + + + +
+							{combinedPrompt || t("personality:noActiveTraits")}
+						
+
+
+ )} + + {/* Unified Create / Edit Trait Section */} + { if (!open) resetForm(); else if (!isEditing) startCreating(); }}> + + + + +
+
+
+ + +
+
+ + setFormLabel(e.target.value)} + placeholder={t("personality:labelPlaceholder")} + /> +
+
+ +
+
+ +
+ + + + + + +
+
+ setFormPrompt(e.target.value)} + placeholder={t("personality:promptPlaceholder")} + rows={4} + className="w-full" + /> +
+ + {/* Enhancer Prompt Editor (collapsible) */} + {isEnhancerPromptOpen && ( +
+
+ {t("personality:enhancerPromptLabel")} +
+ { + vscode.postMessage({ + type: "updateSettings", + updatedSettings: { personalityTraitEnhancerPrompt: e.target.value }, + }) + }} + rows={6} + className="w-full text-xs" + /> +
+ )} + +
+ + {isEditing && ( + + )} +
+
+
+
+
+ ) +} + +export default PersonalityTraitsPanel diff --git a/webview-ui/src/components/multi-orchestrator/AgentCountSelector.tsx b/webview-ui/src/components/multi-orchestrator/AgentCountSelector.tsx new file mode 100644 index 00000000000..38eb37bebed --- /dev/null +++ b/webview-ui/src/components/multi-orchestrator/AgentCountSelector.tsx @@ -0,0 +1,25 @@ +import React from "react" + +interface AgentCountSelectorProps { + value: number + onChange: (count: number) => void + max?: number +} + +export const AgentCountSelector: React.FC = ({ value, onChange, max = 6 }) => { + return ( +
+ Agents: + +
+ ) +} diff --git a/webview-ui/src/components/multi-orchestrator/MultiOrchStatusPanel.tsx b/webview-ui/src/components/multi-orchestrator/MultiOrchStatusPanel.tsx new file mode 100644 index 00000000000..83c82e54bbd --- /dev/null +++ b/webview-ui/src/components/multi-orchestrator/MultiOrchStatusPanel.tsx @@ -0,0 +1,54 @@ +import React from "react" +import type { OrchestratorState } from "./types" + +interface MultiOrchStatusPanelProps { + state: OrchestratorState + onAbort: () => void +} + +const STATUS_ICONS: Record = { + completed: "✅", + failed: "❌", + running: "🔄", + merging: "🔀", + verifying: "🔍", + pending: "⏳", +} + +export const MultiOrchStatusPanel: React.FC = ({ state, onAbort }) => { + const completedCount = state.agents.filter((a) => a.status === "completed").length + const failedCount = state.agents.filter((a) => a.status === "failed").length + + return ( +
+
⚡ Multi-Orchestration: {state.phase}
+
+ {completedCount + failedCount}/{state.agents.length} agents complete +
+ +
+ {state.agents.map((agent) => ( +
+ {STATUS_ICONS[agent.status] ?? "⏳"} + {agent.title} + {agent.mode} +
+ ))} +
+ + {state.phase !== "complete" && ( + + )} + + {state.finalReport && ( +
+ {state.finalReport} +
+ )} +
+ ) +} diff --git a/webview-ui/src/components/multi-orchestrator/PlanReviewPanel.tsx b/webview-ui/src/components/multi-orchestrator/PlanReviewPanel.tsx new file mode 100644 index 00000000000..809dc8f8507 --- /dev/null +++ b/webview-ui/src/components/multi-orchestrator/PlanReviewPanel.tsx @@ -0,0 +1,46 @@ +import React from "react" +import { Button } from "@src/components/ui" +import type { OrchestratorPlan } from "./types" + +interface PlanReviewPanelProps { + plan: OrchestratorPlan + onApprove: () => void + onCancel: () => void +} + +export const PlanReviewPanel: React.FC = ({ plan, onApprove, onCancel }) => { + return ( +
+
⚡ Execution Plan
+
+ {plan.tasks.length} parallel tasks · {plan.estimatedComplexity} complexity + {plan.requiresMerge && " · merge required"} +
+ +
+ {plan.tasks.map((task, i) => ( +
+
+ Task {i + 1}: {task.title} → {task.mode} +
+
{task.description}
+ {task.assignedFiles && task.assignedFiles.length > 0 && ( +
+ Files: {task.assignedFiles.join(", ")} +
+ )} +
+ ))} +
+ +
+ + +
+
+ ) +} diff --git a/webview-ui/src/components/multi-orchestrator/types.ts b/webview-ui/src/components/multi-orchestrator/types.ts new file mode 100644 index 00000000000..ab2967a0ce2 --- /dev/null +++ b/webview-ui/src/components/multi-orchestrator/types.ts @@ -0,0 +1,63 @@ +/** + * Local mirror of multi-orchestrator types for the webview UI. + * + * These duplicate the interfaces defined in `src/core/multi-orchestrator/types.ts` + * because the webview bundle cannot import from the extension host source directly. + * Keep in sync with the canonical definitions when modifying. + */ + +export interface OrchestratorPlan { + tasks: PlannedTask[] + requiresMerge: boolean + estimatedComplexity: "low" | "medium" | "high" +} + +export interface PlannedTask { + id: string + mode: string + title: string + description: string + assignedFiles?: string[] + priority: number +} + +export type AgentStatus = "pending" | "running" | "completed" | "failed" | "merging" + +export interface AgentState { + taskId: string + providerId: string + panelId: string + worktreePath: string | null + worktreeBranch: string | null + mode: string + status: AgentStatus + title: string + completionReport: string | null + tokenUsage: { input: number; output: number } | null + startedAt: number | null + completedAt: number | null +} + +export interface MergeResult { + agentTaskId: string + branch: string + success: boolean + conflictsFound: number + conflictsResolved: number + filesChanged: string[] +} + +export interface VerificationFinding { + agentTaskId: string + findings: string + severity: "info" | "warning" | "error" +} + +export interface OrchestratorState { + phase: "idle" | "planning" | "spawning" | "running" | "merging" | "verifying" | "reporting" | "complete" + plan: OrchestratorPlan | null + agents: AgentState[] + mergeResults: MergeResult[] + verificationFindings: VerificationFinding[] + finalReport: string | null +} diff --git a/webview-ui/src/components/settings/ApiOptions.tsx b/webview-ui/src/components/settings/ApiOptions.tsx index 4d914a4833a..2d021b01eae 100644 --- a/webview-ui/src/components/settings/ApiOptions.tsx +++ b/webview-ui/src/components/settings/ApiOptions.tsx @@ -1,6 +1,7 @@ import React, { memo, useCallback, useEffect, useMemo, useState } from "react" import { convertHeadersToObject } from "./utils/headers" import { useDebounce } from "react-use" +import { Checkbox } from "vscrui" import { VSCodeLink } from "@vscode/webview-ui-toolkit/react" import { ExternalLinkIcon } from "@radix-ui/react-icons" @@ -800,6 +801,16 @@ const ApiOptions = ({ } onChange={(value) => setApiConfigurationField("consecutiveMistakeLimit", value)} /> +
+ + {t("settings:advancedSettings.useXmlToolCalling")} + +
+ {t("settings:advancedSettings.useXmlToolCallingDescription")} +
+
{selectedProvider === "openrouter" && openRouterModelProviders && Object.keys(openRouterModelProviders).length > 0 && ( diff --git a/webview-ui/src/components/settings/MemoryChatPicker.tsx b/webview-ui/src/components/settings/MemoryChatPicker.tsx new file mode 100644 index 00000000000..3586521d321 --- /dev/null +++ b/webview-ui/src/components/settings/MemoryChatPicker.tsx @@ -0,0 +1,137 @@ +import React, { useState, useMemo } from "react" +import type { HistoryItem } from "@roo-code/types" +import { formatTimeAgo } from "@src/utils/format" +import { + Dialog, + DialogContent, + DialogHeader, + DialogTitle, + DialogDescription, + DialogFooter, + Button, + Checkbox, +} from "@src/components/ui" + +interface MemoryChatPickerProps { + open: boolean + onOpenChange: (open: boolean) => void + taskHistory: HistoryItem[] + onStartSync: (taskIds: string[]) => void +} + +export const MemoryChatPicker: React.FC = ({ + open, + onOpenChange, + taskHistory, + onStartSync, +}) => { + const [selectedIds, setSelectedIds] = useState>(new Set()) + + const safeHistory = taskHistory ?? [] + + const allSelected = useMemo( + () => safeHistory.length > 0 && selectedIds.size === safeHistory.length, + [safeHistory.length, selectedIds.size], + ) + + const toggleAll = () => { + if (allSelected) { + setSelectedIds(new Set()) + } else { + setSelectedIds(new Set(safeHistory.map((t) => t.id))) + } + } + + const toggleOne = (id: string) => { + setSelectedIds((prev) => { + const next = new Set(prev) + if (next.has(id)) { + next.delete(id) + } else { + next.add(id) + } + return next + }) + } + + const handleLearn = () => { + onStartSync(Array.from(selectedIds)) + } + + return ( + + + + Browse Chats + Select conversations to analyze for building your profile. + + +
+ + + {selectedIds.size} of {safeHistory.length} selected + +
+ +
+ {safeHistory.map((item) => ( +
toggleOne(item.id)}> + toggleOne(item.id)} + style={{ marginTop: "2px" }} + /> +
+
+ {item.task || "(no message)"} +
+
{formatTimeAgo(item.ts)}
+
+
+ ))} + {safeHistory.length === 0 && ( +

+ No conversations found. +

+ )} +
+ + + + + +
+
+ ) +} diff --git a/webview-ui/src/components/settings/SettingsView.tsx b/webview-ui/src/components/settings/SettingsView.tsx index 47e087615e3..177556086a5 100644 --- a/webview-ui/src/components/settings/SettingsView.tsx +++ b/webview-ui/src/components/settings/SettingsView.tsx @@ -29,6 +29,9 @@ import { ArrowLeft, GitCommitVertical, GraduationCap, + Brain, + Loader2, + Zap, } from "lucide-react" import { @@ -83,6 +86,7 @@ import McpView from "../mcp/McpView" import { WorktreesView } from "../worktrees/WorktreesView" import { SettingsSearch } from "./SettingsSearch" import { useSearchIndexRegistry, SearchIndexProvider } from "./useSettingsSearch" +import { MemoryChatPicker } from "./MemoryChatPicker" export const settingsTabsContainer = "flex flex-1 overflow-hidden [&.narrow_.tab-label]:hidden" export const settingsTabList = @@ -110,6 +114,8 @@ export const sectionNames = [ "prompts", "ui", "experimental", + "memory", + "multiOrch", "language", "about", ] as const @@ -121,15 +127,35 @@ type SettingsViewProps = { targetSection?: string } +/** Format a unix timestamp (seconds) into a human-readable relative time string. */ +function formatTimeAgo(unixSeconds: number): string { + const now = Math.floor(Date.now() / 1000) + const diff = now - unixSeconds + if (diff < 60) return "just now" + if (diff < 3600) return `${Math.floor(diff / 60)}m ago` + if (diff < 86400) return `${Math.floor(diff / 3600)}h ago` + if (diff < 604800) return `${Math.floor(diff / 86400)}d ago` + return new Date(unixSeconds * 1000).toLocaleDateString() +} + const SettingsView = forwardRef(({ onDone, targetSection }, ref) => { const { t } = useAppTranslation() const extensionState = useExtensionState() - const { currentApiConfigName, listApiConfigMeta, uriScheme, settingsImportedAt } = extensionState + const { currentApiConfigName, listApiConfigMeta, uriScheme, settingsImportedAt, taskHistory } = extensionState const [isDiscardDialogShow, setDiscardDialogShow] = useState(false) const [isChangeDetected, setChangeDetected] = useState(false) const [errorMessage, setErrorMessage] = useState(undefined) + + // Memory sync state + const [isSyncing, setIsSyncing] = useState(false) + const [syncProgress, setSyncProgress] = useState({ completed: 0, total: 0 }) + const [syncDone, setSyncDone] = useState(false) + const [memoryStats, setMemoryStats] = useState<{ entryCount: number; lastAnalyzedAt: number | null }>({ entryCount: 0, lastAnalyzedAt: null }) + const [pickerOpen, setPickerOpen] = useState(false) + const [clearDialogOpen, setClearDialogOpen] = useState(false) + const [activeTab, setActiveTab] = useState( targetSection && sectionNames.includes(targetSection as SectionName) ? (targetSection as SectionName) @@ -227,6 +253,80 @@ const SettingsView = forwardRef(({ onDone, t } }, [settingsImportedAt, extensionState]) + // Request initial memory status on mount + useEffect(() => { + vscode.postMessage({ type: "getMemoryStatus" }) + }, []) + + // Memory sync message listener + useEffect(() => { + const handler = (event: MessageEvent) => { + const msg = event.data + if (msg.type === "memorySyncProgress") { + const data = JSON.parse(msg.text) + setSyncProgress(data) + } + if (msg.type === "memorySyncComplete") { + setIsSyncing(false) + setSyncDone(true) + // Refresh status so entry count and button states update immediately + vscode.postMessage({ type: "getMemoryStatus" }) + } + if (msg.type === "memoryCleared") { + setSyncDone(false) + setSyncProgress({ completed: 0, total: 0 }) + setMemoryStats({ entryCount: 0, lastAnalyzedAt: null }) + } + if (msg.type === "memorySyncAlreadyRunning") { + // Sync was rejected because one is already in progress — keep UI in syncing state + // (this is a defensive fallback; buttons should already be disabled) + } + if (msg.type === "memorySyncStatus") { + const status = JSON.parse(msg.text) + if (status.inProgress) { + setIsSyncing(true) + setSyncProgress({ completed: status.completed, total: status.total }) + } + } + if (msg.type === "memoryStatus") { + const data = JSON.parse(msg.text) + setMemoryStats({ + entryCount: data.entryCount ?? 0, + lastAnalyzedAt: data.lastAnalyzedAt ?? null, + }) + // If memory exists from a previous session, show the green indicator + if ((data.entryCount ?? 0) > 0) { + setSyncDone(true) + } + } + } + window.addEventListener("message", handler) + return () => window.removeEventListener("message", handler) + }, []) + + // When the memory tab becomes active, ask the backend for current sync status + // so the progress bar is restored after tab switches, and refresh memory stats. + useEffect(() => { + if (activeTab === "memory") { + vscode.postMessage({ type: "getMemorySyncStatus" }) + vscode.postMessage({ type: "getMemoryStatus" }) + } + }, [activeTab]) + + const handleStartSync = (taskIds: string[]) => { + if (isSyncing) return + setIsSyncing(true) + setSyncDone(false) + setSyncProgress({ completed: 0, total: taskIds.length }) + setPickerOpen(false) + vscode.postMessage({ type: "startMemorySync", text: JSON.stringify({ taskIds }) }) + } + + const handleClearMemory = () => { + vscode.postMessage({ type: "clearMemory" }) + setClearDialogOpen(false) + } + const setCachedStateField: SetCachedStateField = useCallback((field, value) => { setCachedState((prevState) => { if (prevState[field] === value) { @@ -422,6 +522,13 @@ const SettingsView = forwardRef(({ onDone, t openRouterImageGenerationSelectedModel, experiments, customSupportPrompts, + memoryApiConfigId: cachedState.memoryApiConfigId, + memoryAnalysisFrequency: cachedState.memoryAnalysisFrequency, + memoryLearningDefaultEnabled: cachedState.memoryLearningDefaultEnabled, + multiOrchMaxAgents: cachedState.multiOrchMaxAgents, + multiOrchPlanReviewEnabled: cachedState.multiOrchPlanReviewEnabled, + multiOrchMergeEnabled: cachedState.multiOrchMergeEnabled, + multiOrchVerifyEnabled: cachedState.multiOrchVerifyEnabled, }, }) @@ -522,6 +629,8 @@ const SettingsView = forwardRef(({ onDone, t { id: "worktrees", icon: GitBranch }, { id: "ui", icon: Glasses }, { id: "experimental", icon: FlaskConical }, + { id: "memory", icon: Brain }, + { id: "multiOrch", icon: Zap }, { id: "language", icon: Globe }, { id: "about", icon: Info }, ], @@ -914,6 +1023,298 @@ const SettingsView = forwardRef(({ onDone, t /> )} + {/* Memory Section */} + {renderTab === "memory" && ( +
+ Memory Learning +
+
+

+ When enabled, Roo learns your preferences and coding + style from conversations to personalize responses over + time. +

+ + {/* Memory status indicator */} + {memoryStats.entryCount > 0 ? ( +
+ + {memoryStats.entryCount} {memoryStats.entryCount === 1 ? "memory" : "memories"} stored + {memoryStats.lastAnalyzedAt && ` · Last updated ${formatTimeAgo(memoryStats.lastAnalyzedAt)}`} +
+ ) : ( +
+ No memories yet — analyze some chats below to get started. +
+ )} + + {/* Analysis model profile selector */} +
+ +

+ Select a model configuration for memory analysis + (requires at least 50K context window). +

+ +
+ + {/* Analysis frequency selector */} +
+ +

+ Analyze conversation every N user messages. +

+ +
+ + {/* Default enabled checkbox */} +
+ { + setCachedStateField( + "memoryLearningDefaultEnabled", + e.target.checked, + ) + }} + /> + +
+ + {/* Prior Chat Analysis */} +
+ +

+ Analyze your existing conversations to build your profile instantly. +

+ +
+ + {isSyncing ? ( + + ) : syncDone ? ( + + ) : null} + {isSyncing && ( + + {syncProgress.completed} of {syncProgress.total} analyzed + + )} +
+ + {/* Progress bar — visible while syncing */} + {isSyncing && syncProgress.total > 0 && ( +
+
+
+ )} +
+ + {/* Clear Memory */} +
+ +

+ Reset all learned preferences and start fresh. +

+
+
+ + {/* Memory Chat Picker Dialog */} + + + {/* Clear Memory Confirmation Dialog */} + + + + + + Clear Memory + + + This will reset all learned preferences and start fresh. Are you sure? + + + + setClearDialogOpen(false)}> + Cancel + + + Clear Memory + + + + +
+
+ )} + + {/* Multi-Orchestrator Section */} + {renderTab === "multiOrch" && ( +
+ Multi-Orchestrator +
+
+

+ Configure parallel task execution across multiple agents. +

+ + {/* Max agents */} +
+ +

+ Maximum number of parallel agents (1-6). +

+ +
+ + {/* Plan review toggle */} +
+ setCachedStateField("multiOrchPlanReviewEnabled", e.target.checked)} + /> + +
+ + {/* Merge mode */} +
+ +

+ When to run the merge phase after agents complete. +

+ +
+ + {/* Post-completion verification toggle */} +
+ setCachedStateField("multiOrchVerifyEnabled", e.target.checked)} + /> +
+ +

+ After all agents complete, spawn a verification agent to review + changed files for bugs, inconsistencies, and integration issues. +

+
+
+
+
+
+ )} + {/* Language Section */} {renderTab === "language" && ( diff --git a/webview-ui/src/context/ExtensionStateContext.tsx b/webview-ui/src/context/ExtensionStateContext.tsx index ce7a607d9a8..a16dca7820a 100644 --- a/webview-ui/src/context/ExtensionStateContext.tsx +++ b/webview-ui/src/context/ExtensionStateContext.tsx @@ -398,6 +398,14 @@ export const ExtensionStateContextProvider: React.FC<{ children: React.ReactNode } break } + case "memoryLearningState": { + const enabled = message.text === "true" + setState((prevState) => ({ + ...prevState, + memoryLearningEnabled: enabled, + })) + break + } case "mcpServers": { setMcpServers(message.mcpServers ?? []) break diff --git a/webview-ui/src/i18n/locales/ca/settings.json b/webview-ui/src/i18n/locales/ca/settings.json index 2c83cabbbcb..3976f0f4f09 100644 --- a/webview-ui/src/i18n/locales/ca/settings.json +++ b/webview-ui/src/i18n/locales/ca/settings.json @@ -554,7 +554,9 @@ "placeholder": "Per defecte: claude", "maxTokensLabel": "Tokens màxims de sortida", "maxTokensDescription": "Nombre màxim de tokens de sortida per a les respostes de Claude Code. El valor per defecte és 8000." - } + }, + "useXmlToolCalling": "Use XML tool calling", + "useXmlToolCallingDescription": "Use XML-style formatting for tool calls instead of native tool calling. Enable this if your model responds better with XML tool formatting." }, "checkpoints": { "timeout": { @@ -738,7 +740,9 @@ } }, "advancedSettings": { - "title": "Configuració avançada" + "title": "Configuració avançada", + "useXmlToolCalling": "Use XML tool calling", + "useXmlToolCallingDescription": "Use XML-style formatting for tool calls instead of native tool calling. Enable this if your model responds better with XML tool formatting." }, "advanced": { "diff": { diff --git a/webview-ui/src/i18n/locales/de/settings.json b/webview-ui/src/i18n/locales/de/settings.json index c31d29147d4..ec870998fcf 100644 --- a/webview-ui/src/i18n/locales/de/settings.json +++ b/webview-ui/src/i18n/locales/de/settings.json @@ -554,7 +554,9 @@ "placeholder": "Standard: claude", "maxTokensLabel": "Maximale Ausgabe-Tokens", "maxTokensDescription": "Maximale Anzahl an Ausgabe-Tokens für Claude Code-Antworten. Standard ist 8000." - } + }, + "useXmlToolCalling": "Use XML tool calling", + "useXmlToolCallingDescription": "Use XML-style formatting for tool calls instead of native tool calling. Enable this if your model responds better with XML tool formatting." }, "checkpoints": { "timeout": { @@ -738,7 +740,9 @@ } }, "advancedSettings": { - "title": "Erweiterte Einstellungen" + "title": "Erweiterte Einstellungen", + "useXmlToolCalling": "Use XML tool calling", + "useXmlToolCallingDescription": "Use XML-style formatting for tool calls instead of native tool calling. Enable this if your model responds better with XML tool formatting." }, "advanced": { "diff": { diff --git a/webview-ui/src/i18n/locales/en/personality.json b/webview-ui/src/i18n/locales/en/personality.json new file mode 100644 index 00000000000..2beef03ca76 --- /dev/null +++ b/webview-ui/src/i18n/locales/en/personality.json @@ -0,0 +1,19 @@ +{ + "title": "Personality Traits", + "description": "Toggle traits to shape how Roo communicates in this mode. Combine multiple traits for a unique personality.", + "previewPrompt": "Preview combined prompt", + "noActiveTraits": "No traits are active. Toggle a trait above to see the combined prompt.", + "createTrait": "Create a Trait", + "editTrait": "Edit trait", + "editTraitTitle": "Edit Trait", + "deleteTrait": "Delete trait", + "emojiLabel": "Emoji", + "titleLabel": "Title", + "promptLabel": "Description / Prompt", + "labelPlaceholder": "e.g., Flamboyant", + "promptPlaceholder": "Describe the personality trait, or type a few words and click Enhance...", + "enhanceTooltip": "Enhance: expand a few words into a full personality prompt", + "enhancerSettingsTooltip": "View/edit the enhancer meta-prompt", + "enhancerPromptLabel": "Enhancer Meta-Prompt (controls how brief descriptions are expanded)", + "addTraitButton": "Add Trait" +} diff --git a/webview-ui/src/i18n/locales/en/settings.json b/webview-ui/src/i18n/locales/en/settings.json index 3b2497aaee7..2cbfa0315ac 100644 --- a/webview-ui/src/i18n/locales/en/settings.json +++ b/webview-ui/src/i18n/locales/en/settings.json @@ -40,6 +40,8 @@ "prompts": "Prompts", "ui": "UI", "experimental": "Experimental", + "memory": "Memory", + "multiOrch": "Multi-Orchestrator", "language": "Language", "about": "About Roo Code" }, @@ -801,7 +803,9 @@ } }, "advancedSettings": { - "title": "Advanced settings" + "title": "Advanced settings", + "useXmlToolCalling": "Use XML tool calling", + "useXmlToolCallingDescription": "Use XML-style formatting for tool calls instead of native tool calling. Enable this if your model responds better with XML tool formatting." }, "advanced": { "diff": { diff --git a/webview-ui/src/i18n/locales/es/settings.json b/webview-ui/src/i18n/locales/es/settings.json index 6595c4f9079..9434d524894 100644 --- a/webview-ui/src/i18n/locales/es/settings.json +++ b/webview-ui/src/i18n/locales/es/settings.json @@ -554,7 +554,9 @@ "placeholder": "Por defecto: claude", "maxTokensLabel": "Tokens máximos de salida", "maxTokensDescription": "Número máximo de tokens de salida para las respuestas de Claude Code. El valor predeterminado es 8000." - } + }, + "useXmlToolCalling": "Use XML tool calling", + "useXmlToolCallingDescription": "Use XML-style formatting for tool calls instead of native tool calling. Enable this if your model responds better with XML tool formatting." }, "checkpoints": { "timeout": { @@ -738,7 +740,9 @@ } }, "advancedSettings": { - "title": "Configuración avanzada" + "title": "Configuración avanzada", + "useXmlToolCalling": "Use XML tool calling", + "useXmlToolCallingDescription": "Use XML-style formatting for tool calls instead of native tool calling. Enable this if your model responds better with XML tool formatting." }, "advanced": { "diff": { diff --git a/webview-ui/src/i18n/locales/fr/settings.json b/webview-ui/src/i18n/locales/fr/settings.json index 56337bda14c..05a5d44ebcb 100644 --- a/webview-ui/src/i18n/locales/fr/settings.json +++ b/webview-ui/src/i18n/locales/fr/settings.json @@ -554,7 +554,9 @@ "placeholder": "Défaut : claude", "maxTokensLabel": "Jetons de sortie max", "maxTokensDescription": "Nombre maximum de jetons de sortie pour les réponses de Claude Code. La valeur par défaut est 8000." - } + }, + "useXmlToolCalling": "Use XML tool calling", + "useXmlToolCallingDescription": "Use XML-style formatting for tool calls instead of native tool calling. Enable this if your model responds better with XML tool formatting." }, "checkpoints": { "timeout": { @@ -738,7 +740,9 @@ } }, "advancedSettings": { - "title": "Paramètres avancés" + "title": "Paramètres avancés", + "useXmlToolCalling": "Use XML tool calling", + "useXmlToolCallingDescription": "Use XML-style formatting for tool calls instead of native tool calling. Enable this if your model responds better with XML tool formatting." }, "advanced": { "diff": { diff --git a/webview-ui/src/i18n/locales/hi/settings.json b/webview-ui/src/i18n/locales/hi/settings.json index abd334bec09..3c9a62a290c 100644 --- a/webview-ui/src/i18n/locales/hi/settings.json +++ b/webview-ui/src/i18n/locales/hi/settings.json @@ -554,7 +554,9 @@ "placeholder": "डिफ़ॉल्ट: claude", "maxTokensLabel": "अधिकतम आउटपुट टोकन", "maxTokensDescription": "Claude Code प्रतिक्रियाओं के लिए आउटपुट टोकन की अधिकतम संख्या। डिफ़ॉल्ट 8000 है।" - } + }, + "useXmlToolCalling": "Use XML tool calling", + "useXmlToolCallingDescription": "Use XML-style formatting for tool calls instead of native tool calling. Enable this if your model responds better with XML tool formatting." }, "checkpoints": { "timeout": { @@ -738,7 +740,9 @@ } }, "advancedSettings": { - "title": "उन्नत सेटिंग्स" + "title": "उन्नत सेटिंग्स", + "useXmlToolCalling": "Use XML tool calling", + "useXmlToolCallingDescription": "Use XML-style formatting for tool calls instead of native tool calling. Enable this if your model responds better with XML tool formatting." }, "advanced": { "diff": { diff --git a/webview-ui/src/i18n/locales/id/settings.json b/webview-ui/src/i18n/locales/id/settings.json index 1ebcf2073b6..a4f155dfc7e 100644 --- a/webview-ui/src/i18n/locales/id/settings.json +++ b/webview-ui/src/i18n/locales/id/settings.json @@ -554,7 +554,9 @@ "placeholder": "Default: claude", "maxTokensLabel": "Token Output Maks", "maxTokensDescription": "Jumlah maksimum token output untuk respons Claude Code. Default adalah 8000." - } + }, + "useXmlToolCalling": "Use XML tool calling", + "useXmlToolCallingDescription": "Use XML-style formatting for tool calls instead of native tool calling. Enable this if your model responds better with XML tool formatting." }, "checkpoints": { "timeout": { @@ -738,7 +740,9 @@ } }, "advancedSettings": { - "title": "Pengaturan lanjutan" + "title": "Pengaturan lanjutan", + "useXmlToolCalling": "Use XML tool calling", + "useXmlToolCallingDescription": "Use XML-style formatting for tool calls instead of native tool calling. Enable this if your model responds better with XML tool formatting." }, "advanced": { "diff": { diff --git a/webview-ui/src/i18n/locales/it/settings.json b/webview-ui/src/i18n/locales/it/settings.json index 4a0c7161654..ce1e78b7fca 100644 --- a/webview-ui/src/i18n/locales/it/settings.json +++ b/webview-ui/src/i18n/locales/it/settings.json @@ -554,7 +554,9 @@ "placeholder": "Predefinito: claude", "maxTokensLabel": "Token di output massimi", "maxTokensDescription": "Numero massimo di token di output per le risposte di Claude Code. Il valore predefinito è 8000." - } + }, + "useXmlToolCalling": "Use XML tool calling", + "useXmlToolCallingDescription": "Use XML-style formatting for tool calls instead of native tool calling. Enable this if your model responds better with XML tool formatting." }, "checkpoints": { "timeout": { @@ -738,7 +740,9 @@ } }, "advancedSettings": { - "title": "Impostazioni avanzate" + "title": "Impostazioni avanzate", + "useXmlToolCalling": "Use XML tool calling", + "useXmlToolCallingDescription": "Use XML-style formatting for tool calls instead of native tool calling. Enable this if your model responds better with XML tool formatting." }, "advanced": { "diff": { diff --git a/webview-ui/src/i18n/locales/ja/settings.json b/webview-ui/src/i18n/locales/ja/settings.json index b0d921571af..3520202846b 100644 --- a/webview-ui/src/i18n/locales/ja/settings.json +++ b/webview-ui/src/i18n/locales/ja/settings.json @@ -554,7 +554,9 @@ "placeholder": "デフォルト:claude", "maxTokensLabel": "最大出力トークン", "maxTokensDescription": "Claude Codeレスポンスの最大出力トークン数。デフォルトは8000です。" - } + }, + "useXmlToolCalling": "Use XML tool calling", + "useXmlToolCallingDescription": "Use XML-style formatting for tool calls instead of native tool calling. Enable this if your model responds better with XML tool formatting." }, "checkpoints": { "timeout": { @@ -738,7 +740,9 @@ } }, "advancedSettings": { - "title": "詳細設定" + "title": "詳細設定", + "useXmlToolCalling": "Use XML tool calling", + "useXmlToolCallingDescription": "Use XML-style formatting for tool calls instead of native tool calling. Enable this if your model responds better with XML tool formatting." }, "advanced": { "diff": { diff --git a/webview-ui/src/i18n/locales/ko/settings.json b/webview-ui/src/i18n/locales/ko/settings.json index 88fc8e6d79e..0e234cf2345 100644 --- a/webview-ui/src/i18n/locales/ko/settings.json +++ b/webview-ui/src/i18n/locales/ko/settings.json @@ -554,7 +554,9 @@ "placeholder": "기본값: claude", "maxTokensLabel": "최대 출력 토큰", "maxTokensDescription": "Claude Code 응답의 최대 출력 토큰 수. 기본값은 8000입니다." - } + }, + "useXmlToolCalling": "Use XML tool calling", + "useXmlToolCallingDescription": "Use XML-style formatting for tool calls instead of native tool calling. Enable this if your model responds better with XML tool formatting." }, "checkpoints": { "timeout": { @@ -738,7 +740,9 @@ } }, "advancedSettings": { - "title": "고급 설정" + "title": "고급 설정", + "useXmlToolCalling": "Use XML tool calling", + "useXmlToolCallingDescription": "Use XML-style formatting for tool calls instead of native tool calling. Enable this if your model responds better with XML tool formatting." }, "advanced": { "diff": { diff --git a/webview-ui/src/i18n/locales/nl/settings.json b/webview-ui/src/i18n/locales/nl/settings.json index fcfad37d376..a36c2c95c09 100644 --- a/webview-ui/src/i18n/locales/nl/settings.json +++ b/webview-ui/src/i18n/locales/nl/settings.json @@ -554,7 +554,9 @@ "placeholder": "Standaard: claude", "maxTokensLabel": "Max Output Tokens", "maxTokensDescription": "Maximaal aantal output-tokens voor Claude Code-reacties. Standaard is 8000." - } + }, + "useXmlToolCalling": "Use XML tool calling", + "useXmlToolCallingDescription": "Use XML-style formatting for tool calls instead of native tool calling. Enable this if your model responds better with XML tool formatting." }, "checkpoints": { "timeout": { @@ -738,7 +740,9 @@ } }, "advancedSettings": { - "title": "Geavanceerde instellingen" + "title": "Geavanceerde instellingen", + "useXmlToolCalling": "Use XML tool calling", + "useXmlToolCallingDescription": "Use XML-style formatting for tool calls instead of native tool calling. Enable this if your model responds better with XML tool formatting." }, "advanced": { "diff": { diff --git a/webview-ui/src/i18n/locales/pl/settings.json b/webview-ui/src/i18n/locales/pl/settings.json index fa48bc6b212..552539013da 100644 --- a/webview-ui/src/i18n/locales/pl/settings.json +++ b/webview-ui/src/i18n/locales/pl/settings.json @@ -554,7 +554,9 @@ "placeholder": "Domyślnie: claude", "maxTokensLabel": "Maksymalna liczba tokenów wyjściowych", "maxTokensDescription": "Maksymalna liczba tokenów wyjściowych dla odpowiedzi Claude Code. Domyślnie 8000." - } + }, + "useXmlToolCalling": "Use XML tool calling", + "useXmlToolCallingDescription": "Use XML-style formatting for tool calls instead of native tool calling. Enable this if your model responds better with XML tool formatting." }, "checkpoints": { "timeout": { @@ -738,7 +740,9 @@ } }, "advancedSettings": { - "title": "Ustawienia zaawansowane" + "title": "Ustawienia zaawansowane", + "useXmlToolCalling": "Use XML tool calling", + "useXmlToolCallingDescription": "Use XML-style formatting for tool calls instead of native tool calling. Enable this if your model responds better with XML tool formatting." }, "advanced": { "diff": { diff --git a/webview-ui/src/i18n/locales/pt-BR/settings.json b/webview-ui/src/i18n/locales/pt-BR/settings.json index a8387e05121..34db295d339 100644 --- a/webview-ui/src/i18n/locales/pt-BR/settings.json +++ b/webview-ui/src/i18n/locales/pt-BR/settings.json @@ -554,7 +554,9 @@ "placeholder": "Padrão: claude", "maxTokensLabel": "Tokens de saída máximos", "maxTokensDescription": "Número máximo de tokens de saída para respostas do Claude Code. O padrão é 8000." - } + }, + "useXmlToolCalling": "Use XML tool calling", + "useXmlToolCallingDescription": "Use XML-style formatting for tool calls instead of native tool calling. Enable this if your model responds better with XML tool formatting." }, "checkpoints": { "timeout": { @@ -738,7 +740,9 @@ } }, "advancedSettings": { - "title": "Configurações avançadas" + "title": "Configurações avançadas", + "useXmlToolCalling": "Use XML tool calling", + "useXmlToolCallingDescription": "Use XML-style formatting for tool calls instead of native tool calling. Enable this if your model responds better with XML tool formatting." }, "advanced": { "diff": { diff --git a/webview-ui/src/i18n/locales/ru/settings.json b/webview-ui/src/i18n/locales/ru/settings.json index fe24ebee299..638071d234e 100644 --- a/webview-ui/src/i18n/locales/ru/settings.json +++ b/webview-ui/src/i18n/locales/ru/settings.json @@ -554,7 +554,9 @@ "placeholder": "По умолчанию: claude", "maxTokensLabel": "Макс. выходных токенов", "maxTokensDescription": "Максимальное количество выходных токенов для ответов Claude Code. По умолчанию 8000." - } + }, + "useXmlToolCalling": "Use XML tool calling", + "useXmlToolCallingDescription": "Use XML-style formatting for tool calls instead of native tool calling. Enable this if your model responds better with XML tool formatting." }, "checkpoints": { "timeout": { @@ -738,7 +740,9 @@ } }, "advancedSettings": { - "title": "Дополнительные настройки" + "title": "Дополнительные настройки", + "useXmlToolCalling": "Use XML tool calling", + "useXmlToolCallingDescription": "Use XML-style formatting for tool calls instead of native tool calling. Enable this if your model responds better with XML tool formatting." }, "advanced": { "diff": { diff --git a/webview-ui/src/i18n/locales/tr/settings.json b/webview-ui/src/i18n/locales/tr/settings.json index 7171718f1c5..83f003d80ba 100644 --- a/webview-ui/src/i18n/locales/tr/settings.json +++ b/webview-ui/src/i18n/locales/tr/settings.json @@ -554,7 +554,9 @@ "placeholder": "Varsayılan: claude", "maxTokensLabel": "Maksimum Çıktı Token sayısı", "maxTokensDescription": "Claude Code yanıtları için maksimum çıktı token sayısı. Varsayılan 8000'dir." - } + }, + "useXmlToolCalling": "Use XML tool calling", + "useXmlToolCallingDescription": "Use XML-style formatting for tool calls instead of native tool calling. Enable this if your model responds better with XML tool formatting." }, "checkpoints": { "timeout": { @@ -738,7 +740,9 @@ } }, "advancedSettings": { - "title": "Gelişmiş ayarlar" + "title": "Gelişmiş ayarlar", + "useXmlToolCalling": "Use XML tool calling", + "useXmlToolCallingDescription": "Use XML-style formatting for tool calls instead of native tool calling. Enable this if your model responds better with XML tool formatting." }, "advanced": { "diff": { diff --git a/webview-ui/src/i18n/locales/vi/settings.json b/webview-ui/src/i18n/locales/vi/settings.json index 95b4f2d6863..5398feb8ef4 100644 --- a/webview-ui/src/i18n/locales/vi/settings.json +++ b/webview-ui/src/i18n/locales/vi/settings.json @@ -554,7 +554,9 @@ "placeholder": "Mặc định: claude", "maxTokensLabel": "Số token đầu ra tối đa", "maxTokensDescription": "Số lượng token đầu ra tối đa cho các phản hồi của Claude Code. Mặc định là 8000." - } + }, + "useXmlToolCalling": "Use XML tool calling", + "useXmlToolCallingDescription": "Use XML-style formatting for tool calls instead of native tool calling. Enable this if your model responds better with XML tool formatting." }, "checkpoints": { "timeout": { @@ -738,7 +740,9 @@ } }, "advancedSettings": { - "title": "Cài đặt nâng cao" + "title": "Cài đặt nâng cao", + "useXmlToolCalling": "Use XML tool calling", + "useXmlToolCallingDescription": "Use XML-style formatting for tool calls instead of native tool calling. Enable this if your model responds better with XML tool formatting." }, "advanced": { "diff": { diff --git a/webview-ui/src/i18n/locales/zh-CN/settings.json b/webview-ui/src/i18n/locales/zh-CN/settings.json index eeba6bb079d..f2dcfb94bbe 100644 --- a/webview-ui/src/i18n/locales/zh-CN/settings.json +++ b/webview-ui/src/i18n/locales/zh-CN/settings.json @@ -554,7 +554,9 @@ "placeholder": "默认:claude", "maxTokensLabel": "最大输出 Token", "maxTokensDescription": "Claude Code 响应的最大输出 Token 数量。默认为 8000。" - } + }, + "useXmlToolCalling": "Use XML tool calling", + "useXmlToolCallingDescription": "Use XML-style formatting for tool calls instead of native tool calling. Enable this if your model responds better with XML tool formatting." }, "checkpoints": { "timeout": { @@ -738,7 +740,9 @@ } }, "advancedSettings": { - "title": "高级设置" + "title": "高级设置", + "useXmlToolCalling": "Use XML tool calling", + "useXmlToolCallingDescription": "Use XML-style formatting for tool calls instead of native tool calling. Enable this if your model responds better with XML tool formatting." }, "advanced": { "diff": { diff --git a/webview-ui/src/i18n/locales/zh-TW/settings.json b/webview-ui/src/i18n/locales/zh-TW/settings.json index 9f4241c3dd9..e39afdfb563 100644 --- a/webview-ui/src/i18n/locales/zh-TW/settings.json +++ b/webview-ui/src/i18n/locales/zh-TW/settings.json @@ -564,7 +564,9 @@ "placeholder": "預設:claude", "maxTokensLabel": "最大輸出 Token", "maxTokensDescription": "Claude Code 回應的最大輸出 Token 數量。預設為 8000。" - } + }, + "useXmlToolCalling": "Use XML tool calling", + "useXmlToolCallingDescription": "Use XML-style formatting for tool calls instead of native tool calling. Enable this if your model responds better with XML tool formatting." }, "checkpoints": { "timeout": { @@ -748,7 +750,9 @@ } }, "advancedSettings": { - "title": "進階設定" + "title": "進階設定", + "useXmlToolCalling": "Use XML tool calling", + "useXmlToolCallingDescription": "Use XML-style formatting for tool calls instead of native tool calling. Enable this if your model responds better with XML tool formatting." }, "advanced": { "diff": {