From 497d4f74d4121e1cad84a8ad8ea3e054eef3a654 Mon Sep 17 00:00:00 2001
From: Roomote <roomote@roocode.com>
Date: Thu, 14 May 2026 01:34:10 +0000
Subject: [PATCH 1/2] test: unskip settings regression tests

---
 webview-ui/AGENTS.md                          |  8 ++++++
 .../settings/__tests__/ApiOptions.spec.tsx    | 27 +++++++++++++++----
 .../SettingsView.unsaved-changes.spec.tsx     | 19 +++++--------
 3 files changed, 36 insertions(+), 18 deletions(-)
 create mode 100644 webview-ui/AGENTS.md
diff --git a/webview-ui/AGENTS.md b/webview-ui/AGENTS.md
new file mode 100644
index 0000000000..adab48e9e0
--- /dev/null
+++ b/webview-ui/AGENTS.md
@@ -0,0 +1,8 @@
+# AGENTS.md
+
+This file provides guidance to agents working in `webview-ui/`.
+
+- Prefer local `webview-ui` tests for React/webview behavior. If a change is about component rendering, local state, hooks, form dirty-state, validation, or prop wiring inside the webview, add or update Vitest coverage under `webview-ui/src/**/__tests__` instead of reaching for `apps/vscode-e2e`.
+- Use `apps/vscode-e2e` only when the behavior depends on the real VS Code extension environment: extension-host to webview messaging, VS Code workspace APIs, task execution flows, or other end-to-end behavior that needs `@vscode/test-electron`.
+- When a regression can be proven with a component or webview integration test, keep it in `webview-ui`. Do not promote it to e2e just because the UI is hosted inside VS Code.
+- For `SettingsView`, preserve the cached-state pattern from the repo root guidance: inputs should operate on local `cachedState` until the user saves, and tests should distinguish automatic initialization from real user edits.
diff --git a/webview-ui/src/components/settings/__tests__/ApiOptions.spec.tsx b/webview-ui/src/components/settings/__tests__/ApiOptions.spec.tsx
index 6ef39e3d3d..d1c923ebd8 100644
--- a/webview-ui/src/components/settings/__tests__/ApiOptions.spec.tsx
+++ b/webview-ui/src/components/settings/__tests__/ApiOptions.spec.tsx
@@ -1,6 +1,6 @@
 // npx vitest src/components/settings/__tests__/ApiOptions.spec.tsx
 
-import { render, screen, fireEvent } from "@/utils/test-utils"
+import { render, screen, fireEvent, within } from "@/utils/test-utils"
 import { QueryClient, QueryClientProvider } from "@tanstack/react-query"
 
 import { type ModelInfo, type ProviderSettings, openAiModelInfoSaneDefaults } from "@roo-code/types"
@@ -176,7 +176,7 @@ vi.mock("../TodoListSettingsControl", () => ({
 
 // Mock ThinkingBudget component
 vi.mock("../ThinkingBudget", () => ({
-	ThinkingBudget: ({ modelInfo }: any) => {
+	ThinkingBudget: ({ modelInfo, apiConfiguration, setApiConfigurationField }: any) => {
 		// Only render if model supports reasoning budget (thinking models)
 		if (modelInfo?.supportsReasoningBudget || modelInfo?.requiredReasoningBudget) {
 			return (
@@ -186,6 +186,22 @@ vi.mock("../ThinkingBudget", () => ({
 				</div>
 			)
 		}
+
+		if (modelInfo?.supportsReasoningEffort) {
+			return (
+				<div data-testid="reasoning-effort">
+					<select
+						value={apiConfiguration?.reasoningEffort || ""}
+						onChange={(e) => setApiConfigurationField("reasoningEffort", e.target.value)}>
+						<option value="">Select...</option>
+						<option value="low">Low</option>
+						<option value="medium">Medium</option>
+						<option value="high">High</option>
+					</select>
+				</div>
+			)
+		}
+
 		return null
 	},
 }))
@@ -459,7 +475,7 @@ describe("ApiOptions", () => {
 			// However, we've tested the state update call.
 		})
 
-		it.skip("updates reasoningEffort in openAiCustomModelInfo when select value changes", () => {
+		it("updates reasoningEffort in openAiCustomModelInfo when select value changes", () => {
 			const mockSetApiConfigurationField = vi.fn()
 			const initialConfig = {
 				apiProvider: "openai" as const,
@@ -484,10 +500,11 @@ describe("ApiOptions", () => {
 			const selectContainer = screen.getByTestId("reasoning-effort")
 			expect(selectContainer).toBeInTheDocument()
 
-			console.log(selectContainer.querySelector("select")?.value)
+			const reasoningSelect = within(selectContainer).getByRole("combobox")
+			expect(reasoningSelect).toHaveValue("low")
 
 			// Simulate changing the reasoning effort to 'high'
-			fireEvent.change(selectContainer.querySelector("select")!, { target: { value: "high" } })
+			fireEvent.change(reasoningSelect, { target: { value: "high" } })
 
 			// Check if setApiConfigurationField was called correctly for openAiCustomModelInfo
 			expect(mockSetApiConfigurationField).toHaveBeenCalledWith(
diff --git a/webview-ui/src/components/settings/__tests__/SettingsView.unsaved-changes.spec.tsx b/webview-ui/src/components/settings/__tests__/SettingsView.unsaved-changes.spec.tsx
index 83be2509d0..567838cc30 100644
--- a/webview-ui/src/components/settings/__tests__/SettingsView.unsaved-changes.spec.tsx
+++ b/webview-ui/src/components/settings/__tests__/SettingsView.unsaved-changes.spec.tsx
@@ -65,7 +65,7 @@ vi.mock("@src/components/ui", () => ({
 			{...props}
 		/>
 	),
-	AlertDialog: ({ children }: any) => <div>{children}</div>,
+	AlertDialog: ({ open, children }: any) => (open ? <div data-testid="alert-dialog">{children}</div> : null),
 	AlertDialogContent: ({ children }: any) => <div>{children}</div>,
 	AlertDialogTitle: ({ children }: any) => <div>{children}</div>,
 	AlertDialogDescription: ({ children }: any) => <div>{children}</div>,
@@ -325,10 +325,7 @@ describe("SettingsView - Unsaved Changes Detection", () => {
 		;(useExtensionState as any).mockReturnValue(defaultExtensionState)
 	})
 
-	// TODO: Fix underlying issue - dialog appears even when no user changes have been made
-	// This happens because some component is triggering setCachedStateField during initialization
-	// without properly marking it as a non-user action
-	it.skip("should not show unsaved changes when settings are automatically initialized", async () => {
+	it("should not show unsaved changes when settings are automatically initialized", async () => {
 		const onDone = vi.fn()
 
 		render(
@@ -361,8 +358,7 @@ describe("SettingsView - Unsaved Changes Detection", () => {
 		expect(screen.queryByText("settings:unsavedChangesDialog.title")).not.toBeInTheDocument()
 	})
 
-	// TODO: Fix underlying issue - see above
-	it.skip("should not trigger unsaved changes for automatic model initialization", async () => {
+	it("should not trigger unsaved changes for automatic model initialization", async () => {
 		const onDone = vi.fn()
 
 		// Mock ApiOptions to simulate ModelPicker initialization
@@ -460,8 +456,7 @@ describe("SettingsView - Unsaved Changes Detection", () => {
 		expect(onDone).not.toHaveBeenCalled()
 	})
 
-	// TODO: Fix underlying issue - see above
-	it.skip("should handle initialization from undefined to value without triggering unsaved changes", async () => {
+	it("should handle initialization from undefined to value without triggering unsaved changes", async () => {
 		const onDone = vi.fn()
 
 		// Start with undefined apiModelId
@@ -504,8 +499,7 @@ describe("SettingsView - Unsaved Changes Detection", () => {
 		expect(screen.queryByText("settings:unsavedChangesDialog.title")).not.toBeInTheDocument()
 	})
 
-	// TODO: Fix underlying issue - see above
-	it.skip("should handle initialization from null to value without triggering unsaved changes", async () => {
+	it("should handle initialization from null to value without triggering unsaved changes", async () => {
 		const onDone = vi.fn()
 
 		// Start with null apiModelId
@@ -548,8 +542,7 @@ describe("SettingsView - Unsaved Changes Detection", () => {
 		expect(screen.queryByText("settings:unsavedChangesDialog.title")).not.toBeInTheDocument()
 	})
 
-	// TODO: Fix underlying issue - see above
-	it.skip("should not trigger changes when ApiOptions syncs model IDs during mount", async () => {
+	it("should not trigger changes when ApiOptions syncs model IDs during mount", async () => {
 		const onDone = vi.fn()
 
 		// This specifically tests the bug we fixed where ApiOptions' useEffect

From 52788109fbe26843b3bbe8907de1ac03c39b9ca2 Mon Sep 17 00:00:00 2001
From: Elliott de Launay <edelauna@gmail.com>
Date: Thu, 14 May 2026 02:12:39 +0000
Subject: [PATCH 2/2] docs(AGENTS): adding testing instructions - also hoping
 coderabbit references this in reviews

---
 AGENTS.md                 | 11 +++++++++++
 apps/vscode-e2e/AGENTS.md |  2 ++
 2 files changed, 13 insertions(+)

diff --git a/AGENTS.md b/AGENTS.md
index ae09fd9b30..b93d6a76cc 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -3,3 +3,14 @@
 This file provides guidance to agents when working with code in this repository.
 
 - Settings View Pattern: When working on `SettingsView`, inputs must bind to the local `cachedState`, NOT the live `useExtensionState()`. The `cachedState` acts as a buffer for user edits, isolating them from the `ContextProxy` source-of-truth until the user explicitly clicks "Save". Wiring inputs directly to the live state causes race conditions.
+
+## Test Placement Guidance
+
+Prefer the narrowest test layer that proves the behavior. This follows standard test-pyramid guidance: keep most coverage in fast, focused tests; add integration tests for cross-module contracts; reserve end-to-end tests for full workflow confidence.
+
+- Use package-local unit tests for pure logic, parsing, state transitions, validation, serialization, request construction, retry decisions, and error handling.
+- Use integration tests when behavior depends on multiple internal modules working together, but does not require the real VS Code extension host or browser/webview runtime.
+- Use `webview-ui` tests for React rendering, hooks, component state, forms, validation, and webview UI wiring.
+- Use `apps/vscode-e2e` only when the behavior depends on the real VS Code extension host, VS Code workspace APIs, extension activation, webview/extension messaging, file watcher behavior, or a complete user workflow.
+- Keep e2e tests focused on high-value smoke coverage across boundaries. Avoid placing detailed protocol, parsing, storage, retry, or edge-case assertions in e2e when they can be covered reliably at a lower layer.
+- When fixing a regression, add the regression test at the lowest layer that would have failed for the bug. Add an e2e test only if lower-level tests cannot represent the failure mode.
diff --git a/apps/vscode-e2e/AGENTS.md b/apps/vscode-e2e/AGENTS.md
index 2bdf4f5d9c..dfebb00e04 100644
--- a/apps/vscode-e2e/AGENTS.md
+++ b/apps/vscode-e2e/AGENTS.md
@@ -2,6 +2,8 @@
 
 E2E tests run against `@copilotkit/aimock` (`LLMock`) — a local HTTP server that replays recorded LLM responses. This makes tests free, deterministic, and CI-friendly.
 
+Before adding an e2e test, check whether the regression can be proven with a package-local unit or integration test. E2E tests should cover real extension-host boundaries and full workflow smoke checks, not detailed assertions that belong to service, protocol, or UI component tests.
+
 ## How aimock matching works
 
 Fixtures are matched by **substring**: `incoming_last_user_message.includes(fixture.match.userMessage)`. A fixture fires if its match string appears _anywhere_ in the last user message of the API request.