From 1eadaea4bd702514281a5b8093e18fa64bf70498 Mon Sep 17 00:00:00 2001
From: Bertan Ari <bertanari@microsoft.com>
Date: Tue, 23 Jun 2026 17:45:17 -0700
Subject: [PATCH 1/5] fix(vscode-lm): reliable auto context condensing

Port of simurg79/Roo-Code#11 into Zoo-Code.
---
 .changeset/vscode-lm-condense-fix.md          |   5 +
 .../types/src/__tests__/vscode-llm.spec.ts    |  36 +++
 packages/types/src/providers/vscode-llm.ts    | 231 ++++++++++--------
 src/api/index.ts                              |  11 +
 src/api/providers/__tests__/vscode-lm.spec.ts |  50 ++++
 src/api/providers/vscode-lm.ts                |  24 +-
 .../__tests__/context-management.spec.ts      | 165 ++++++++++++-
 src/core/context-management/index.ts          |  27 +-
 src/core/task/Task.ts                         |  42 +++-
 webview-ui/src/components/chat/TaskHeader.tsx |   3 +-
 .../chat/__tests__/TaskHeader.spec.tsx        |  14 ++
 .../hooks/__tests__/useSelectedModel.spec.ts  |  55 +++++
 .../components/ui/hooks/useSelectedModel.ts   |  17 +-
 13 files changed, 549 insertions(+), 131 deletions(-)
 create mode 100644 .changeset/vscode-lm-condense-fix.md
 create mode 100644 packages/types/src/__tests__/vscode-llm.spec.ts

diff --git a/.changeset/vscode-lm-condense-fix.md b/.changeset/vscode-lm-condense-fix.md
new file mode 100644
index 0000000000..a592361786
--- /dev/null
+++ b/.changeset/vscode-lm-condense-fix.md
@@ -0,0 +1,5 @@
+---
+"zoo-code": patch
+---
+
+Fix unreliable automatic context condensing on the VS Code LM (vscode-lm) provider. The condense gate now treats the provider's `maxTokens: -1` (unlimited) as the default output reserve and measures usage against available input space, and a new `getCondenseContextWindow()` seam makes the gate use the curated model `maxInputTokens` instead of the inflated live window. Also refreshes the VS Code LM model catalog and default model.
diff --git a/packages/types/src/__tests__/vscode-llm.spec.ts b/packages/types/src/__tests__/vscode-llm.spec.ts
new file mode 100644
index 0000000000..7a2eabddf7
--- /dev/null
+++ b/packages/types/src/__tests__/vscode-llm.spec.ts
@@ -0,0 +1,36 @@
+import { describe, it, expect } from "vitest"
+import { vscodeLlmModels, vscodeLlmDefaultModelId } from "../providers/vscode-llm.js"
+
+describe("vscodeLlmModels", () => {
+	it("exposes the opus-4.8 row with its measured maxInputTokens and contextWindow", () => {
+		// The VS Code LM API exposes only maxInputTokens; that is the value the UI reads from this
+		// table (useSelectedModel.ts). For claude-opus-4.8 the two fields intentionally DIVERGE:
+		// maxInputTokens (197897) is the enforced input ceiling, while contextWindow (679560) records
+		// the larger advertised window. The UI reads maxInputTokens, so the divergence is a deliberate
+		// tripwire — assert the actual on-disk literals rather than forcing equality.
+		expect(vscodeLlmModels).toHaveProperty("claude-opus-4.8")
+		expect(vscodeLlmModels["claude-opus-4.8"].contextWindow).toBe(679560)
+		expect(vscodeLlmModels["claude-opus-4.8"].maxInputTokens).toBe(197897)
+	})
+	it("preserves the real window for models captured with a smaller maxInputTokens", () => {
+		expect(vscodeLlmModels["gpt-4o-mini"].maxInputTokens).toBe(12078)
+		expect(vscodeLlmModels["gpt-4o-mini"].contextWindow).toBe(12078)
+		expect(vscodeLlmModels["gemini-2.5-pro"].contextWindow).toBe(108594)
+		expect(vscodeLlmModels["gemini-2.5-pro"].maxInputTokens).toBe(108594)
+	})
+	it("keeps both window fields populated and positive for every row", () => {
+		for (const [family, model] of Object.entries(vscodeLlmModels)) {
+			expect(model.contextWindow, `${family}: contextWindow must be a positive integer`).toBeGreaterThan(0)
+			expect(model.maxInputTokens, `${family}: maxInputTokens must be a positive integer`).toBeGreaterThan(0)
+		}
+	})
+	it("excludes fabricated/internal/alias families and the dropped legacy rows", () => {
+		expect(vscodeLlmModels).not.toHaveProperty("claude-opus-4.7-high")
+		expect(vscodeLlmModels).not.toHaveProperty("claude-3.5-sonnet")
+		expect(vscodeLlmModels).not.toHaveProperty("claude-4-sonnet")
+	})
+	it("defaults to a model id that exists in the table", () => {
+		expect(vscodeLlmDefaultModelId).toBe("claude-sonnet-4.5")
+		expect(vscodeLlmModels).toHaveProperty(vscodeLlmDefaultModelId)
+	})
+})
diff --git a/packages/types/src/providers/vscode-llm.ts b/packages/types/src/providers/vscode-llm.ts
index efe0691913..46df75fac9 100644
--- a/packages/types/src/providers/vscode-llm.ts
+++ b/packages/types/src/providers/vscode-llm.ts
@@ -2,189 +2,222 @@ import type { ModelInfo } from "../model.js"
 
 export type VscodeLlmModelId = keyof typeof vscodeLlmModels
 
-export const vscodeLlmDefaultModelId: VscodeLlmModelId = "claude-3.5-sonnet"
+export const vscodeLlmDefaultModelId: VscodeLlmModelId = "claude-sonnet-4.5"
 
-// https://docs.cline.bot/provider-config/vscode-language-model-api
+// Curated VS Code LM (GitHub Copilot) model catalog.
+//
+// The VS Code LM API only exposes `maxInputTokens` per model; it does NOT report a separate
+// total context window. For each row, `contextWindow` records the model's advertised window
+// while `maxInputTokens` is the enforced input ceiling the UI actually reads (via
+// useSelectedModel.ts) and the condense gate measures against. For most rows the two values
+// match. They intentionally DIVERGE only where the provider advertises a larger window than the
+// usable input ceiling (e.g. claude-opus-4.8): keeping both fields lets the context bar and the
+// auto-condense gate stay on a single source of truth (maxInputTokens) without losing the real
+// advertised window.
 export const vscodeLlmModels = {
-	"gpt-3.5-turbo": {
-		contextWindow: 12114,
-		supportsImages: false,
+	"claude-opus-4.8": {
+		contextWindow: 679560,
+		supportsImages: true,
 		supportsPromptCache: false,
 		inputPrice: 0,
 		outputPrice: 0,
-		family: "gpt-3.5-turbo",
-		version: "gpt-3.5-turbo-0613",
-		name: "GPT 3.5 Turbo",
+		family: "claude-opus-4.8",
+		version: "claude-opus-4.8",
+		name: "Claude Opus 4.8",
 		supportsToolCalling: true,
-		maxInputTokens: 12114,
+		maxInputTokens: 197897,
 	},
-	"gpt-4o-mini": {
-		contextWindow: 12115,
-		supportsImages: false,
+	"claude-opus-4.7": {
+		contextWindow: 197897,
+		supportsImages: true,
 		supportsPromptCache: false,
 		inputPrice: 0,
 		outputPrice: 0,
-		family: "gpt-4o-mini",
-		version: "gpt-4o-mini-2024-07-18",
-		name: "GPT-4o mini",
+		family: "claude-opus-4.7",
+		version: "claude-opus-4.7",
+		name: "Claude Opus 4.7",
 		supportsToolCalling: true,
-		maxInputTokens: 12115,
+		maxInputTokens: 197897,
 	},
-	"gpt-4": {
-		contextWindow: 28501,
-		supportsImages: false,
+	"claude-opus-4.6": {
+		contextWindow: 197897,
+		supportsImages: true,
 		supportsPromptCache: false,
 		inputPrice: 0,
 		outputPrice: 0,
-		family: "gpt-4",
-		version: "gpt-4-0613",
-		name: "GPT 4",
+		family: "claude-opus-4.6",
+		version: "claude-opus-4.6",
+		name: "Claude Opus 4.6",
 		supportsToolCalling: true,
-		maxInputTokens: 28501,
+		maxInputTokens: 197897,
 	},
-	"gpt-4-0125-preview": {
-		contextWindow: 63826,
-		supportsImages: false,
+	"claude-opus-4.5": {
+		contextWindow: 167790,
+		supportsImages: true,
 		supportsPromptCache: false,
 		inputPrice: 0,
 		outputPrice: 0,
-		family: "gpt-4-turbo",
-		version: "gpt-4-0125-preview",
-		name: "GPT 4 Turbo",
+		family: "claude-opus-4.5",
+		version: "claude-opus-4.5",
+		name: "Claude Opus 4.5",
 		supportsToolCalling: true,
-		maxInputTokens: 63826,
+		maxInputTokens: 167790,
 	},
-	"gpt-4o": {
-		contextWindow: 63827,
+	"claude-sonnet-4.6": {
+		contextWindow: 197896,
 		supportsImages: true,
 		supportsPromptCache: false,
 		inputPrice: 0,
 		outputPrice: 0,
-		family: "gpt-4o",
-		version: "gpt-4o-2024-11-20",
-		name: "GPT-4o",
+		family: "claude-sonnet-4.6",
+		version: "claude-sonnet-4.6",
+		name: "Claude Sonnet 4.6",
 		supportsToolCalling: true,
-		maxInputTokens: 63827,
+		maxInputTokens: 197896,
 	},
-	o1: {
-		contextWindow: 19827,
-		supportsImages: false,
+	"claude-sonnet-4.5": {
+		contextWindow: 167790,
+		supportsImages: true,
 		supportsPromptCache: false,
 		inputPrice: 0,
 		outputPrice: 0,
-		family: "o1-ga",
-		version: "o1-2024-12-17",
-		name: "o1 (Preview)",
+		family: "claude-sonnet-4.5",
+		version: "claude-sonnet-4.5",
+		name: "Claude Sonnet 4.5",
 		supportsToolCalling: true,
-		maxInputTokens: 19827,
+		maxInputTokens: 167790,
 	},
-	"o3-mini": {
-		contextWindow: 63827,
-		supportsImages: false,
+	"claude-haiku-4.5": {
+		contextWindow: 135790,
+		supportsImages: true,
 		supportsPromptCache: false,
 		inputPrice: 0,
 		outputPrice: 0,
-		family: "o3-mini",
-		version: "o3-mini-2025-01-31",
-		name: "o3-mini",
+		family: "claude-haiku-4.5",
+		version: "claude-haiku-4.5",
+		name: "Claude Haiku 4.5",
 		supportsToolCalling: true,
-		maxInputTokens: 63827,
+		maxInputTokens: 135790,
 	},
-	"claude-3.5-sonnet": {
-		contextWindow: 81638,
+	"gpt-5.5": {
+		contextWindow: 268426,
 		supportsImages: true,
 		supportsPromptCache: false,
 		inputPrice: 0,
 		outputPrice: 0,
-		family: "claude-3.5-sonnet",
-		version: "claude-3.5-sonnet",
-		name: "Claude 3.5 Sonnet",
+		family: "gpt-5.5",
+		version: "gpt-5.5",
+		name: "GPT-5.5",
 		supportsToolCalling: true,
-		maxInputTokens: 81638,
+		maxInputTokens: 268426,
 	},
-	"claude-4-sonnet": {
-		contextWindow: 128000,
+	"gpt-5.4": {
+		contextWindow: 268424,
 		supportsImages: true,
 		supportsPromptCache: false,
 		inputPrice: 0,
 		outputPrice: 0,
-		family: "claude-sonnet-4",
-		version: "claude-sonnet-4",
-		name: "Claude Sonnet 4",
+		family: "gpt-5.4",
+		version: "gpt-5.4",
+		name: "GPT-5.4",
 		supportsToolCalling: true,
-		maxInputTokens: 111836,
+		maxInputTokens: 268424,
 	},
-	"gemini-2.0-flash-001": {
-		contextWindow: 127827,
+	"gpt-5.4-mini": {
+		contextWindow: 271790,
 		supportsImages: true,
 		supportsPromptCache: false,
 		inputPrice: 0,
 		outputPrice: 0,
-		family: "gemini-2.0-flash",
-		version: "gemini-2.0-flash-001",
-		name: "Gemini 2.0 Flash",
-		supportsToolCalling: false,
-		maxInputTokens: 127827,
+		family: "gpt-5.4-mini",
+		version: "gpt-5.4-mini",
+		name: "GPT-5.4 mini",
+		supportsToolCalling: true,
+		maxInputTokens: 271790,
 	},
-	"gemini-2.5-pro": {
-		contextWindow: 128000,
+	"gpt-5.3-codex": {
+		contextWindow: 271790,
 		supportsImages: true,
 		supportsPromptCache: false,
 		inputPrice: 0,
 		outputPrice: 0,
-		family: "gemini-2.5-pro",
-		version: "gemini-2.5-pro-preview-03-25",
-		name: "Gemini 2.5 Pro (Preview)",
+		family: "gpt-5.3-codex",
+		version: "gpt-5.3-codex",
+		name: "GPT-5.3-Codex",
 		supportsToolCalling: true,
-		maxInputTokens: 108637,
+		maxInputTokens: 271790,
 	},
-	"o4-mini": {
-		contextWindow: 128000,
+	"gpt-5-mini": {
+		contextWindow: 127790,
+		supportsImages: true,
+		supportsPromptCache: false,
+		inputPrice: 0,
+		outputPrice: 0,
+		family: "gpt-5-mini",
+		version: "gpt-5-mini",
+		name: "GPT-5 mini",
+		supportsToolCalling: true,
+		maxInputTokens: 127790,
+	},
+	"gpt-4o-mini": {
+		contextWindow: 12078,
 		supportsImages: false,
 		supportsPromptCache: false,
 		inputPrice: 0,
 		outputPrice: 0,
-		family: "o4-mini",
-		version: "o4-mini-2025-04-16",
-		name: "o4-mini (Preview)",
+		family: "gpt-4o-mini",
+		version: "gpt-4o-mini-2024-07-18",
+		name: "GPT-4o mini",
 		supportsToolCalling: true,
-		maxInputTokens: 111452,
+		maxInputTokens: 12078,
 	},
-	"gpt-4.1": {
-		contextWindow: 128000,
+	"gemini-3.1-pro-preview": {
+		contextWindow: 197897,
 		supportsImages: true,
 		supportsPromptCache: false,
 		inputPrice: 0,
 		outputPrice: 0,
-		family: "gpt-4.1",
-		version: "gpt-4.1-2025-04-14",
-		name: "GPT-4.1 (Preview)",
+		family: "gemini-3.1-pro-preview",
+		version: "gemini-3.1-pro-preview",
+		name: "Gemini 3.1 Pro (Preview)",
 		supportsToolCalling: true,
-		maxInputTokens: 111452,
+		maxInputTokens: 197897,
 	},
-	"gpt-5-mini": {
-		contextWindow: 128000,
+	"gemini-3.5-flash": {
+		contextWindow: 197895,
 		supportsImages: true,
 		supportsPromptCache: false,
 		inputPrice: 0,
 		outputPrice: 0,
-		family: "gpt-5-mini",
-		version: "gpt-5-mini",
-		name: "GPT-5 mini (Preview)",
+		family: "gemini-3.5-flash",
+		version: "gemini-3.5-flash",
+		name: "Gemini 3.5 Flash",
+		supportsToolCalling: true,
+		maxInputTokens: 197895,
+	},
+	"gemini-3-flash": {
+		contextWindow: 108594,
+		supportsImages: true,
+		supportsPromptCache: false,
+		inputPrice: 0,
+		outputPrice: 0,
+		family: "gemini-3-flash",
+		version: "gemini-3-flash-preview",
+		name: "Gemini 3 Flash (Preview)",
 		supportsToolCalling: true,
-		maxInputTokens: 108637,
+		maxInputTokens: 108594,
 	},
-	"gpt-5": {
-		contextWindow: 128000,
+	"gemini-2.5-pro": {
+		contextWindow: 108594,
 		supportsImages: true,
 		supportsPromptCache: false,
 		inputPrice: 0,
 		outputPrice: 0,
-		family: "gpt-5",
-		version: "gpt-5",
-		name: "GPT-5 (Preview)",
+		family: "gemini-2.5-pro",
+		version: "gemini-2.5-pro",
+		name: "Gemini 2.5 Pro",
 		supportsToolCalling: true,
-		maxInputTokens: 108637,
+		maxInputTokens: 108594,
 	},
 } as const satisfies Record<
 	string,
diff --git a/src/api/index.ts b/src/api/index.ts
index 0c901f8e23..00201b0d21 100644
--- a/src/api/index.ts
+++ b/src/api/index.ts
@@ -107,6 +107,17 @@ export interface ApiHandler {
 
 	getModel(): { id: string; info: ModelInfo }
 
+	/**
+	 * Optional: the context window (in tokens) to use for context-management /
+	 * auto-condense decisions, when it must differ from getModel().info.contextWindow.
+	 *
+	 * Only the VS Code LM (Copilot) provider overrides this, to measure usage against the
+	 * model's static `maxInputTokens` instead of the inflated live window VS Code reports.
+	 * Other providers leave it undefined and callers fall back to getModel().info.contextWindow,
+	 * so their behavior is unchanged.
+	 */
+	getCondenseContextWindow?(): number
+
 	/**
 	 * Counts tokens for content blocks
 	 * All providers extend BaseProvider which provides a default tiktoken implementation,
diff --git a/src/api/providers/__tests__/vscode-lm.spec.ts b/src/api/providers/__tests__/vscode-lm.spec.ts
index a79a5a4bcb..0e1797ab44 100644
--- a/src/api/providers/__tests__/vscode-lm.spec.ts
+++ b/src/api/providers/__tests__/vscode-lm.spec.ts
@@ -63,6 +63,7 @@ import * as vscode from "vscode"
 import { VsCodeLmHandler } from "../vscode-lm"
 import type { ApiHandlerOptions } from "../../../shared/api"
 import type { Anthropic } from "@anthropic-ai/sdk"
+import { openAiModelInfoSaneDefaults, vscodeLlmModels } from "@roo-code/types"
 
 const mockLanguageModelChat = {
 	id: "test-model",
@@ -440,6 +441,55 @@ describe("VsCodeLmHandler", () => {
 			const model = handler.getModel()
 			expect(model.info).toBeDefined()
 		})
+
+		it("should use the full advertised maxInputTokens without an upper cap", async () => {
+			// VS Code can report a very large advertised window; getModel surfaces it as-is
+			// (Math.max(0, maxInputTokens)) rather than clamping to a smaller default.
+			const mockModel = { ...mockLanguageModelChat, maxInputTokens: 936000 }
+			;(vscode.lm.selectChatModels as Mock).mockResolvedValue([mockModel])
+			handler["client"] = null
+			await handler.initializeClient()
+
+			const model = handler.getModel()
+			expect(model.info.contextWindow).toBe(936000)
+		})
+
+		it("should pass through a small maxInputTokens unchanged", async () => {
+			const mockModel = { ...mockLanguageModelChat, maxInputTokens: 4096 }
+			;(vscode.lm.selectChatModels as Mock).mockResolvedValue([mockModel])
+			handler["client"] = null
+			await handler.initializeClient()
+
+			const model = handler.getModel()
+			expect(model.info.contextWindow).toBe(4096)
+		})
+
+		it("should fall back to sane defaults when maxInputTokens is not a number", async () => {
+			const mockModel = { ...mockLanguageModelChat, maxInputTokens: undefined as unknown as number }
+			;(vscode.lm.selectChatModels as Mock).mockResolvedValue([mockModel])
+			handler["client"] = null
+			await handler.initializeClient()
+
+			const model = handler.getModel()
+			expect(model.info.contextWindow).toBe(openAiModelInfoSaneDefaults.contextWindow)
+		})
+	})
+
+	describe("getCondenseContextWindow", () => {
+		it("uses the static-table maxInputTokens for a known VS Code LM family", () => {
+			const opusHandler = new VsCodeLmHandler({
+				vsCodeLmModelSelector: { vendor: "copilot", family: "claude-opus-4.8" },
+			})
+			expect(opusHandler.getCondenseContextWindow()).toBe(vscodeLlmModels["claude-opus-4.8"].maxInputTokens)
+			opusHandler.dispose()
+		})
+
+		it("falls back to the live model context window for families not in the static table", () => {
+			// test-family is not a curated row, so the gate uses the live runtime window.
+			handler["client"] = mockLanguageModelChat as unknown as vscode.LanguageModelChat
+			expect(handler.getCondenseContextWindow()).toBe(handler.getModel().info.contextWindow)
+			expect(handler.getCondenseContextWindow()).toBe(mockLanguageModelChat.maxInputTokens)
+		})
 	})
 
 	describe("countTokens", () => {
diff --git a/src/api/providers/vscode-lm.ts b/src/api/providers/vscode-lm.ts
index 8fb564a9d5..d730658b44 100644
--- a/src/api/providers/vscode-lm.ts
+++ b/src/api/providers/vscode-lm.ts
@@ -2,7 +2,7 @@ import { Anthropic } from "@anthropic-ai/sdk"
 import * as vscode from "vscode"
 import OpenAI from "openai"
 
-import { type ModelInfo, openAiModelInfoSaneDefaults } from "@roo-code/types"
+import { type ModelInfo, openAiModelInfoSaneDefaults, vscodeLlmModels } from "@roo-code/types"
 
 import type { ApiHandlerOptions } from "../../shared/api"
 import { SELECTOR_SEPARATOR, stringifyVsCodeLmModelSelector } from "../../shared/vsCodeSelectorUtils"
@@ -562,6 +562,28 @@ export class VsCodeLmHandler extends BaseProvider implements SingleCompletionHan
 		}
 	}
 
+	/**
+	 * Context window used for auto-condense / context-management decisions.
+	 *
+	 * VS Code's LM API reports `client.maxInputTokens` as Copilot's *advertised* window,
+	 * which is far larger than the realistic usable window; relying on it keeps auto-condense
+	 * from ever firing. For condense decisions we instead measure usage against the curated
+	 * static table's `maxInputTokens` — the same value the context bar uses via
+	 * `useSelectedModel` — so the gate and the gauge stay on one source of truth.
+	 *
+	 * Falls back to the live runtime window when the selected model isn't in the static table.
+	 */
+	getCondenseContextWindow(): number {
+		const family = this.client?.family ?? this.options.vsCodeLmModelSelector?.family
+		const staticModel = family ? vscodeLlmModels[family as keyof typeof vscodeLlmModels] : undefined
+
+		if (staticModel && typeof staticModel.maxInputTokens === "number" && staticModel.maxInputTokens > 0) {
+			return staticModel.maxInputTokens
+		}
+
+		return this.getModel().info.contextWindow
+	}
+
 	async completePrompt(prompt: string): Promise<string> {
 		try {
 			const client = await this.getClient()
diff --git a/src/core/context-management/__tests__/context-management.spec.ts b/src/core/context-management/__tests__/context-management.spec.ts
index 9950ec536b..2e81d61e9f 100644
--- a/src/core/context-management/__tests__/context-management.spec.ts
+++ b/src/core/context-management/__tests__/context-management.spec.ts
@@ -810,9 +810,10 @@ describe("Context Management", () => {
 			const summarizeSpy = vi.spyOn(condenseModule, "summarizeConversation")
 
 			const modelInfo = createModelInfo(100000, 30000)
-			// Set tokens to be below both the allowedTokens threshold and the percentage threshold
+			// Usage is measured against available input space (contextWindow - maxTokens reserve).
+			// available = 100000 - 30000 = 70000; 30000 / 70000 ≈ 43% < 50% threshold.
 			const contextWindow = modelInfo.contextWindow
-			const totalTokens = 40000 // 40% of context window
+			const totalTokens = 30000
 			const messagesWithSmallContent = [
 				...messages.slice(0, -1),
 				{ ...messages[messages.length - 1], content: "" },
@@ -825,7 +826,7 @@ describe("Context Management", () => {
 				maxTokens: modelInfo.maxTokens,
 				apiHandler: mockApiHandler,
 				autoCondenseContext: true,
-				autoCondenseContextPercent: 50, // Set threshold to 50% - our tokens are at 40%
+				autoCondenseContextPercent: 50, // Set threshold to 50% - usage is ~43% of available input
 				systemPrompt: "System prompt",
 				taskId,
 				profileThresholds: {},
@@ -1507,12 +1508,14 @@ describe("Context Management", () => {
 		})
 
 		it("should return false when context percent is below threshold", () => {
+			// Usage is measured against available input space (contextWindow - maxTokens reserve).
+			// available = 100000 - 30000 = 70000; 30000 / 70000 ≈ 43% < 50% threshold.
 			const result = willManageContext({
-				totalTokens: 40000,
-				contextWindow: 100000, // 40% of context window
+				totalTokens: 30000,
+				contextWindow: 100000,
 				maxTokens: 30000,
 				autoCondenseContext: true,
-				autoCondenseContextPercent: 50, // 50% threshold
+				autoCondenseContextPercent: 50, // 50% threshold; usage is ~43% of available input
 				profileThresholds: {},
 				currentProfileId: "default",
 				lastMessageTokens: 0,
@@ -1520,6 +1523,26 @@ describe("Context Management", () => {
 			expect(result).toBe(false)
 		})
 
+		it("should treat a negative maxTokens (vscode-lm reports -1) as the default reserve, not -1", () => {
+			// vscode-lm reports maxTokens: -1 (unlimited). A naive `maxTokens || DEFAULT` keeps -1,
+			// which would make allowedTokens balloon past the window and skew the percentage. The
+			// guard must treat -1 like an unknown reserve (ANTHROPIC_DEFAULT_MAX_TOKENS for the
+			// allowed-tokens math, zero reserve for the available-input percentage).
+			// With autoCondenseContext disabled, only the allowedTokens path can trigger:
+			// allowedTokens = 100000 * 0.9 - 8192 = 81808; totalTokens 85000 > 81808 → true.
+			const result = willManageContext({
+				totalTokens: 85000,
+				contextWindow: 100000,
+				maxTokens: -1,
+				autoCondenseContext: false,
+				autoCondenseContextPercent: 50,
+				profileThresholds: {},
+				currentProfileId: "default",
+				lastMessageTokens: 0,
+			})
+			expect(result).toBe(true)
+		})
+
 		it("should return true when tokens exceed allowedTokens even if autoCondenseContext is false", () => {
 			// allowedTokens = contextWindow * (1 - 0.1) - reservedTokens = 100000 * 0.9 - 30000 = 60000
 			const result = willManageContext({
@@ -1581,10 +1604,12 @@ describe("Context Management", () => {
 		})
 
 		it("should include lastMessageTokens in the calculation", () => {
-			// Without lastMessageTokens: 49000 tokens = 49%
-			// With lastMessageTokens: 49000 + 2000 = 51000 tokens = 51%
+			// Usage is measured against available input space (contextWindow - maxTokens reserve).
+			// available = 100000 - 30000 = 70000.
+			// Without lastMessageTokens: 34000 / 70000 ≈ 48.6% < 50% threshold.
+			// With lastMessageTokens: (34000 + 2000) / 70000 ≈ 51.4% ≥ 50% threshold.
 			const resultWithoutLastMessage = willManageContext({
-				totalTokens: 49000,
+				totalTokens: 34000,
 				contextWindow: 100000,
 				maxTokens: 30000,
 				autoCondenseContext: true,
@@ -1596,14 +1621,14 @@ describe("Context Management", () => {
 			expect(resultWithoutLastMessage).toBe(false)
 
 			const resultWithLastMessage = willManageContext({
-				totalTokens: 49000,
+				totalTokens: 34000,
 				contextWindow: 100000,
 				maxTokens: 30000,
 				autoCondenseContext: true,
 				autoCondenseContextPercent: 50, // 50% threshold
 				profileThresholds: {},
 				currentProfileId: "default",
-				lastMessageTokens: 2000, // Pushes total to 51%
+				lastMessageTokens: 2000, // Pushes usage over 50% of available input
 			})
 			expect(resultWithLastMessage).toBe(true)
 		})
@@ -1701,4 +1726,122 @@ describe("Context Management", () => {
 			expect(result.newContextTokensAfterTruncation).toBeGreaterThan(0)
 		})
 	})
+
+	/**
+	 * Regression tests: the condense gate must measure usage against available input space
+	 * (contextWindow - reserved output), not the raw context window. This keeps the gate in
+	 * lockstep with the UI context gauge and ensures it actually fires for providers like
+	 * vscode-lm that report maxTokens: -1.
+	 */
+	describe("contextPercent uses available input space (regression)", () => {
+		const createModelInfo = (contextWindow: number, maxTokens?: number): ModelInfo => ({
+			contextWindow,
+			supportsPromptCache: true,
+			maxTokens,
+		})
+
+		const messages: ApiMessage[] = [
+			{ role: "user", content: "First message" },
+			{ role: "assistant", content: "Second message" },
+			{ role: "user", content: "Third message" },
+			{ role: "assistant", content: "Fourth message" },
+			{ role: "user", content: "Fifth message" },
+		]
+
+		it("willManageContext measures the percentage against available input, not the full window", () => {
+			// contextWindow 200000, reserve 64000 → available input 136000.
+			// totalTokens 100000 → 100000 / 136000 ≈ 73.5%, which clears the 70% threshold.
+			// Against the full window it would be only 50% and the gate would (wrongly) stay closed.
+			const result = willManageContext({
+				totalTokens: 100000,
+				contextWindow: 200000,
+				maxTokens: 64000,
+				autoCondenseContext: true,
+				autoCondenseContextPercent: 70,
+				profileThresholds: {},
+				currentProfileId: "default",
+				lastMessageTokens: 0,
+			})
+			expect(result).toBe(true)
+		})
+
+		it("willManageContext stays below threshold when usage is under available input", () => {
+			// available input 136000; totalTokens 90000 → ≈ 66.2% < 70% threshold.
+			const result = willManageContext({
+				totalTokens: 90000,
+				contextWindow: 200000,
+				maxTokens: 64000,
+				autoCondenseContext: true,
+				autoCondenseContextPercent: 70,
+				profileThresholds: {},
+				currentProfileId: "default",
+				lastMessageTokens: 0,
+			})
+			expect(result).toBe(false)
+		})
+
+		it("willManageContext treats an unlimited (-1) reserve as zero reserve for the percentage", () => {
+			// vscode-lm reports maxTokens: -1. The percentage denominator should fall back to the
+			// full window (zero reserve): 150000 / 200000 = 75% ≥ 70% threshold.
+			const result = willManageContext({
+				totalTokens: 150000,
+				contextWindow: 200000,
+				maxTokens: -1,
+				autoCondenseContext: true,
+				autoCondenseContextPercent: 70,
+				profileThresholds: {},
+				currentProfileId: "default",
+				lastMessageTokens: 0,
+			})
+			expect(result).toBe(true)
+		})
+
+		it("manageContext summarizes based on available input space, end-to-end", async () => {
+			const mockSummary = "Available-input summary"
+			const mockSummarizeResponse: condenseModule.SummarizeResponse = {
+				messages: [
+					{ role: "user", content: "First message" },
+					{ role: "user", content: mockSummary, isSummary: true },
+					{ role: "assistant", content: "Last message" },
+				],
+				summary: mockSummary,
+				cost: 0.05,
+				newContextTokens: 100,
+			}
+			const summarizeSpy = vi
+				.spyOn(condenseModule, "summarizeConversation")
+				.mockResolvedValue(mockSummarizeResponse)
+
+			const modelInfo = createModelInfo(200000, 64000)
+			// available input 136000; totalTokens 100000 → ≈ 73.5% ≥ 70% threshold, but only 50% of
+			// the raw window. The end-to-end path must trigger summarization on the available-input math.
+			const totalTokens = 100000
+			const messagesWithSmallContent = [
+				...messages.slice(0, -1),
+				{ ...messages[messages.length - 1], content: "" },
+			]
+
+			const result = await manageContext({
+				messages: messagesWithSmallContent,
+				totalTokens,
+				contextWindow: modelInfo.contextWindow,
+				maxTokens: modelInfo.maxTokens,
+				apiHandler: mockApiHandler,
+				autoCondenseContext: true,
+				autoCondenseContextPercent: 70,
+				systemPrompt: "System prompt",
+				taskId,
+				profileThresholds: {},
+				currentProfileId: "default",
+			})
+
+			expect(summarizeSpy).toHaveBeenCalled()
+			expect(result).toMatchObject({
+				summary: mockSummary,
+				prevContextTokens: totalTokens,
+			})
+
+			summarizeSpy.mockRestore()
+		})
+	})
 })
diff --git a/src/core/context-management/index.ts b/src/core/context-management/index.ts
index 243d7bd797..cc4d5ba3d7 100644
--- a/src/core/context-management/index.ts
+++ b/src/core/context-management/index.ts
@@ -170,13 +170,15 @@ export function willManageContext({
 }: WillManageContextOptions): boolean {
 	if (!autoCondenseContext) {
 		// When auto-condense is disabled, only truncation can occur
-		const reservedTokens = maxTokens || ANTHROPIC_DEFAULT_MAX_TOKENS
+		// vscode-lm reports maxTokens: -1 (unlimited); a negative reserve must not distort the window math.
+		const reservedTokens = maxTokens && maxTokens > 0 ? maxTokens : ANTHROPIC_DEFAULT_MAX_TOKENS
 		const prevContextTokens = totalTokens + lastMessageTokens
 		const allowedTokens = contextWindow * (1 - TOKEN_BUFFER_PERCENTAGE) - reservedTokens
 		return prevContextTokens > allowedTokens
 	}
 
-	const reservedTokens = maxTokens || ANTHROPIC_DEFAULT_MAX_TOKENS
+	// vscode-lm reports maxTokens: -1 (unlimited); a negative reserve must not distort the window math.
+	const reservedTokens = maxTokens && maxTokens > 0 ? maxTokens : ANTHROPIC_DEFAULT_MAX_TOKENS
 	const prevContextTokens = totalTokens + lastMessageTokens
 	const allowedTokens = contextWindow * (1 - TOKEN_BUFFER_PERCENTAGE) - reservedTokens
 
@@ -192,7 +194,14 @@ export function willManageContext({
 		// Invalid values fall back to global setting (effectiveThreshold already set)
 	}
 
-	const contextPercent = (100 * prevContextTokens) / contextWindow
+	// Measure usage against the available input space (context window minus the
+	// reserved output budget), matching the context gauge shown in the UI. Reserved
+	// output tokens can never hold conversation context, so this is the meaningful
+	// "how full is my usable input" figure. When the reserve is unknown/unlimited
+	// (e.g., vscode-lm reports -1), fall back to the full context window.
+	const reservedForOutput = maxTokens && maxTokens > 0 ? maxTokens : 0
+	const availableInputTokens = contextWindow - reservedForOutput
+	const contextPercent = availableInputTokens > 0 ? (100 * prevContextTokens) / availableInputTokens : 100
 	return contextPercent >= effectiveThreshold || prevContextTokens > allowedTokens
 }
 
@@ -267,7 +276,8 @@ export async function manageContext({
 	let errorDetails: string | undefined
 	let cost = 0
 	// Calculate the maximum tokens reserved for response
-	const reservedTokens = maxTokens || ANTHROPIC_DEFAULT_MAX_TOKENS
+	// vscode-lm reports maxTokens: -1 (unlimited); a negative reserve must not distort the window math.
+	const reservedTokens = maxTokens && maxTokens > 0 ? maxTokens : ANTHROPIC_DEFAULT_MAX_TOKENS
 
 	// Estimate tokens for the last message (which is always a user message)
 	const lastMessage = messages[messages.length - 1]
@@ -304,7 +314,14 @@ export async function manageContext({
 	// If no specific threshold is found for the profile, fall back to global setting
 
 	if (autoCondenseContext) {
-		const contextPercent = (100 * prevContextTokens) / contextWindow
+		// Measure usage against the available input space (context window minus the
+		// reserved output budget), matching the context gauge shown in the UI. Reserved
+		// output tokens can never hold conversation context, so this is the meaningful
+		// "how full is my usable input" figure. When the reserve is unknown/unlimited
+		// (e.g., vscode-lm reports -1), fall back to the full context window.
+		const reservedForOutput = maxTokens && maxTokens > 0 ? maxTokens : 0
+		const availableInputTokens = contextWindow - reservedForOutput
+		const contextPercent = availableInputTokens > 0 ? (100 * prevContextTokens) / availableInputTokens : 100
 		if (contextPercent >= effectiveThreshold || prevContextTokens > allowedTokens) {
 			// Attempt to intelligently condense the context
 			const result = await summarizeConversation({
diff --git a/src/core/task/Task.ts b/src/core/task/Task.ts
index 50d4674fd0..183b0cd191 100644
--- a/src/core/task/Task.ts
+++ b/src/core/task/Task.ts
@@ -2688,9 +2688,13 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 								if (signal.aborted) {
 									reject(new Error("Request cancelled by user"))
 								} else {
-									signal.addEventListener("abort", () => {
-										reject(new Error("Request cancelled by user"))
-									}, { once: true })
+									signal.addEventListener(
+										"abort",
+										() => {
+											reject(new Error("Request cancelled by user"))
+										},
+										{ once: true },
+									)
 								}
 							})
 							return await Promise.race([nextPromise, abortPromise])
@@ -3734,7 +3738,10 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 			settings: this.apiConfiguration,
 		})
 
-		const contextWindow = modelInfo.contextWindow
+		// VS Code LM (Copilot) measures usage against its static-table maxInputTokens, not the
+		// inflated live window, so context management runs in line with the context bar. Every other
+		// provider returns undefined here and falls back to modelInfo.contextWindow.
+		const contextWindow = this.api.getCondenseContextWindow?.() ?? modelInfo.contextWindow
 
 		// Get the current profile ID using the helper method
 		const currentProfileId = this.getCurrentProfileId(state)
@@ -3930,7 +3937,10 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 				settings: this.apiConfiguration,
 			})
 
-			const contextWindow = modelInfo.contextWindow
+			// VS Code LM (Copilot) measures usage against its static-table maxInputTokens, not the
+			// inflated live window, so context management runs in line with the context bar. Every other
+			// provider returns undefined here and falls back to modelInfo.contextWindow.
+			const contextWindow = this.api.getCondenseContextWindow?.() ?? modelInfo.contextWindow
 
 			// Get the current profile ID using the helper method
 			const currentProfileId = this.getCurrentProfileId(state)
@@ -4191,10 +4201,14 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 		const iterator = stream[Symbol.asyncIterator]()
 
 		// Set up abort handling - when the signal is aborted, clean up the controller reference
-		abortSignal.addEventListener("abort", () => {
-			console.log(`[Task#${this.taskId}.${this.instanceId}] AbortSignal triggered for current request`)
-			this.currentRequestAbortController = undefined
-		}, { once: true })
+		abortSignal.addEventListener(
+			"abort",
+			() => {
+				console.log(`[Task#${this.taskId}.${this.instanceId}] AbortSignal triggered for current request`)
+				this.currentRequestAbortController = undefined
+			},
+			{ once: true },
+		)
 
 		try {
 			// Awaiting first chunk to see if it will throw an error.
@@ -4206,9 +4220,13 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 				if (abortSignal.aborted) {
 					reject(new Error("Request cancelled by user"))
 				} else {
-					abortSignal.addEventListener("abort", () => {
-						reject(new Error("Request cancelled by user"))
-					}, { once: true })
+					abortSignal.addEventListener(
+						"abort",
+						() => {
+							reject(new Error("Request cancelled by user"))
+						},
+						{ once: true },
+					)
 				}
 			})
 
diff --git a/webview-ui/src/components/chat/TaskHeader.tsx b/webview-ui/src/components/chat/TaskHeader.tsx
index 4ddf5ef35c..927d3d057d 100644
--- a/webview-ui/src/components/chat/TaskHeader.tsx
+++ b/webview-ui/src/components/chat/TaskHeader.tsx
@@ -76,7 +76,8 @@ const TaskHeader = ({
 				: 0,
 		[model, modelId, apiConfiguration],
 	)
-	const reservedForOutput = maxTokens || 0
+	// vscode-lm reports maxTokens: -1 (unlimited); a negative reserve must not distort the window math.
+	const reservedForOutput = maxTokens && maxTokens > 0 ? maxTokens : 0
 
 	const condenseButton = (
 		<LucideIconButton
diff --git a/webview-ui/src/components/chat/__tests__/TaskHeader.spec.tsx b/webview-ui/src/components/chat/__tests__/TaskHeader.spec.tsx
index 41aa452ab1..c845382632 100644
--- a/webview-ui/src/components/chat/__tests__/TaskHeader.spec.tsx
+++ b/webview-ui/src/components/chat/__tests__/TaskHeader.spec.tsx
@@ -267,5 +267,19 @@ describe("TaskHeader", () => {
 			// Should show 0% when available input space is 0
 			expect(screen.getByText("0%")).toBeInTheDocument()
 		})
+
+		it("should treat a negative maxTokens (vscode-lm reports -1) as zero reserve", () => {
+			// vscode-lm reports maxTokens: -1 (unlimited). A naive `maxTokens || 0` keeps -1,
+			// which would inflate available input space and skew the percentage. The guard must
+			// treat -1 as a zero reserve so available space == contextWindow.
+			// contextTokens = 250, contextWindow = 1000, reservedForOutput = 0
+			// Percentage = 250 / 1000 * 100 = 25%
+			mockModelInfo = { contextWindow: 1000, maxTokens: -1 }
+			mockMaxOutputTokens = -1
+
+			renderTaskHeader({ contextTokens: 250 })
+
+			expect(screen.getByText("25%")).toBeInTheDocument()
+		})
 	})
 })
diff --git a/webview-ui/src/components/ui/hooks/__tests__/useSelectedModel.spec.ts b/webview-ui/src/components/ui/hooks/__tests__/useSelectedModel.spec.ts
index 0dc42129c0..6f3f1edc4f 100644
--- a/webview-ui/src/components/ui/hooks/__tests__/useSelectedModel.spec.ts
+++ b/webview-ui/src/components/ui/hooks/__tests__/useSelectedModel.spec.ts
@@ -14,6 +14,8 @@ import {
 	minimaxDefaultModelId,
 	minimaxModels,
 	openRouterDefaultModelId,
+	vscodeLlmModels,
+	vscodeLlmDefaultModelId,
 } from "@roo-code/types"
 
 import { useSelectedModel } from "../useSelectedModel"
@@ -772,4 +774,57 @@ describe("useSelectedModel", () => {
 			expect(result.current.info).toEqual(minimaxModels["MiniMax-M2.7"])
 		})
 	})
+
+	describe("vscode-lm provider", () => {
+		beforeEach(() => {
+			mockUseRouterModels.mockReturnValue({
+				data: {
+					openrouter: {},
+					requesty: {},
+					litellm: {},
+				},
+				isLoading: false,
+				isError: false,
+			} as any)
+
+			mockUseOpenRouterModelProviders.mockReturnValue({
+				data: {},
+				isLoading: false,
+				isError: false,
+			} as any)
+		})
+
+		it("resolves a listed family's contextWindow to its maxInputTokens", () => {
+			const family = vscodeLlmDefaultModelId
+			const apiConfiguration: ProviderSettings = {
+				apiProvider: "vscode-lm",
+				vsCodeLmModelSelector: { vendor: "copilot", family },
+			}
+
+			const wrapper = createWrapper()
+			const { result } = renderHook(() => useSelectedModel(apiConfiguration), { wrapper })
+
+			expect(result.current.provider).toBe("vscode-lm")
+			expect(result.current.id).toBe(`copilot/${family}`)
+			// The bar and the condense gate share one source of truth: contextWindow === maxInputTokens.
+			expect(result.current.info?.contextWindow).toBe(vscodeLlmModels[family].maxInputTokens)
+			expect(result.current.info?.supportsImages).toBe(false)
+		})
+
+		it("falls back to the default model's window for an unlisted family (NOT 128000)", () => {
+			const apiConfiguration: ProviderSettings = {
+				apiProvider: "vscode-lm",
+				vsCodeLmModelSelector: { vendor: "copilot", family: "totally-unknown-family" },
+			}
+
+			const wrapper = createWrapper()
+			const { result } = renderHook(() => useSelectedModel(apiConfiguration), { wrapper })
+
+			// On a family miss we must NOT fall back to openAiModelInfoSaneDefaults' 128000 window,
+			// which would diverge from the gate. Instead, use the default model's maxInputTokens.
+			expect(result.current.info?.contextWindow).not.toBe(128000)
+			expect(result.current.info?.contextWindow).toBe(vscodeLlmModels[vscodeLlmDefaultModelId].maxInputTokens)
+			expect(result.current.info?.supportsImages).toBe(false)
+		})
+	})
 })
diff --git a/webview-ui/src/components/ui/hooks/useSelectedModel.ts b/webview-ui/src/components/ui/hooks/useSelectedModel.ts
index d3ebb6c0dd..8c8a5360da 100644
--- a/webview-ui/src/components/ui/hooks/useSelectedModel.ts
+++ b/webview-ui/src/components/ui/hooks/useSelectedModel.ts
@@ -310,8 +310,21 @@ function getSelectedModel({
 				? `${apiConfiguration.vsCodeLmModelSelector.vendor}/${apiConfiguration.vsCodeLmModelSelector.family}`
 				: vscodeLlmDefaultModelId
 			const modelFamily = apiConfiguration?.vsCodeLmModelSelector?.family ?? vscodeLlmDefaultModelId
-			const info = vscodeLlmModels[modelFamily as keyof typeof vscodeLlmModels]
-			return { id, info: { ...openAiModelInfoSaneDefaults, ...info, supportsImages: false } } // VSCode LM API currently doesn't support images.
+			// On a family miss, fall back to the default model entry instead of openAiModelInfoSaneDefaults,
+			// whose 128K contextWindow would diverge from the gate and make the bar read >100% while
+			// auto-condense never fires (the gate uses the live window).
+			const listedModel =
+				vscodeLlmModels[modelFamily as keyof typeof vscodeLlmModels] ?? vscodeLlmModels[vscodeLlmDefaultModelId]
+			// contextWindow MUST equal maxInputTokens: that is the exact value the gate consumes via
+			// getModel().info.contextWindow = Math.max(0, client.maxInputTokens) in src/api/providers/vscode-lm.ts,
+			// so the UI bar and the condense gate share a single source of truth.
+			const info: ModelInfo = {
+				...openAiModelInfoSaneDefaults,
+				...listedModel,
+				contextWindow: listedModel.maxInputTokens,
+				supportsImages: false, // VSCode LM API currently doesn't support images.
+			}
+			return { id, info }
 		}
 		case "sambanova": {
 			const id = apiConfiguration.apiModelId ?? defaultModelId

From 62a556c97e06f7f78837d314c0e419d06a066d61 Mon Sep 17 00:00:00 2001
From: Bertan Ari <bertanari@microsoft.com>
Date: Wed, 24 Jun 2026 08:23:15 -0700
Subject: [PATCH 2/5] test(vscode-lm): cover condense-window edge branches for
 codecov/patch

Add targeted tests for the previously-uncovered ported branches: the availableInputTokens<=0 fallback to 100% in willManageContext/manageContext, getCondenseContextWindow() guard fallbacks, and the vscode-lm UI family-miss window resolution. Raises patch coverage to satisfy the codecov/patch 80% gate.
---
 src/api/providers/__tests__/vscode-lm.spec.ts | 29 +++++++
 .../__tests__/context-management.spec.ts      | 81 +++++++++++++++++++
 2 files changed, 110 insertions(+)

diff --git a/src/api/providers/__tests__/vscode-lm.spec.ts b/src/api/providers/__tests__/vscode-lm.spec.ts
index 0e1797ab44..42779e813c 100644
--- a/src/api/providers/__tests__/vscode-lm.spec.ts
+++ b/src/api/providers/__tests__/vscode-lm.spec.ts
@@ -490,6 +490,35 @@ describe("VsCodeLmHandler", () => {
 			expect(handler.getCondenseContextWindow()).toBe(handler.getModel().info.contextWindow)
 			expect(handler.getCondenseContextWindow()).toBe(mockLanguageModelChat.maxInputTokens)
 		})
+
+		it("falls back to the live window when no family is resolvable (no client, no selector family)", () => {
+			// With neither a client nor a selector family, `family` is undefined, so the static-table
+			// lookup is skipped entirely and the gate uses getModel().info.contextWindow (fallback info).
+			const noFamilyHandler = new VsCodeLmHandler({ vsCodeLmModelSelector: { vendor: "copilot" } })
+			noFamilyHandler["client"] = null
+			expect(noFamilyHandler.getCondenseContextWindow()).toBe(noFamilyHandler.getModel().info.contextWindow)
+			expect(noFamilyHandler.getCondenseContextWindow()).toBe(openAiModelInfoSaneDefaults.contextWindow)
+			noFamilyHandler.dispose()
+		})
+
+		it("falls back to the live window when the static row exists but maxInputTokens is non-positive", () => {
+			// Guard sub-condition: a curated family is found but its maxInputTokens is <= 0 (corrupt/zeroed).
+			// The gate must NOT trust that value; it falls through to the live runtime window instead.
+			const family = "claude-opus-4.8"
+			const original = vscodeLlmModels[family].maxInputTokens
+			try {
+				;(vscodeLlmModels[family] as { maxInputTokens: number }).maxInputTokens = 0
+				const guardHandler = new VsCodeLmHandler({
+					vsCodeLmModelSelector: { vendor: "copilot", family },
+				})
+				guardHandler["client"] = mockLanguageModelChat as unknown as vscode.LanguageModelChat
+				expect(guardHandler.getCondenseContextWindow()).toBe(guardHandler.getModel().info.contextWindow)
+				expect(guardHandler.getCondenseContextWindow()).toBe(mockLanguageModelChat.maxInputTokens)
+				guardHandler.dispose()
+			} finally {
+				;(vscodeLlmModels[family] as { maxInputTokens: number }).maxInputTokens = original
+			}
+		})
 	})
 
 	describe("countTokens", () => {
diff --git a/src/core/context-management/__tests__/context-management.spec.ts b/src/core/context-management/__tests__/context-management.spec.ts
index 2e81d61e9f..b05ebefabc 100644
--- a/src/core/context-management/__tests__/context-management.spec.ts
+++ b/src/core/context-management/__tests__/context-management.spec.ts
@@ -1796,6 +1796,87 @@ describe("Context Management", () => {
 			expect(result).toBe(true)
 		})
 
+		it("willManageContext falls back to 100% when the reserve is >= the window (availableInput <= 0)", () => {
+			// When maxTokens (reserve) >= contextWindow, availableInputTokens = window - reserve <= 0.
+			// The denominator guard must short-circuit contextPercent to 100 rather than divide by
+			// a non-positive number, so the gate fires regardless of the (tiny) totalTokens.
+			const result = willManageContext({
+				totalTokens: 1,
+				contextWindow: 50000,
+				maxTokens: 60000, // reserve > window → availableInput = -10000
+				autoCondenseContext: true,
+				autoCondenseContextPercent: 80,
+				profileThresholds: {},
+				currentProfileId: "default",
+				lastMessageTokens: 0,
+			})
+			// contextPercent === 100 >= 80 threshold → true.
+			expect(result).toBe(true)
+		})
+
+		it("willManageContext falls back to 100% when the reserve exactly equals the window (availableInput === 0)", () => {
+			// Boundary: reserve === window → availableInputTokens === 0, still the FALSE branch (> 0 is false).
+			const result = willManageContext({
+				totalTokens: 1,
+				contextWindow: 50000,
+				maxTokens: 50000,
+				autoCondenseContext: true,
+				autoCondenseContextPercent: 90,
+				profileThresholds: {},
+				currentProfileId: "default",
+				lastMessageTokens: 0,
+			})
+			expect(result).toBe(true)
+		})
+
+		it("manageContext summarizes via the 100% fallback when the reserve >= the window (availableInput <= 0)", async () => {
+			// Mirror the willManageContext edge for the manageContext path: reserve >= window forces
+			// contextPercent to 100 via the denominator guard, so summarization triggers even though
+			// totalTokens is small relative to the raw window.
+			const mockSummary = "Reserve-exceeds-window summary"
+			const mockSummarizeResponse: condenseModule.SummarizeResponse = {
+				messages: [
+					{ role: "user", content: "First message" },
+					{ role: "user", content: mockSummary, isSummary: true },
+					{ role: "assistant", content: "Last message" },
+				],
+				summary: mockSummary,
+				cost: 0.05,
+				newContextTokens: 100,
+			}
+			const summarizeSpy = vi
+				.spyOn(condenseModule, "summarizeConversation")
+				.mockResolvedValue(mockSummarizeResponse)
+
+			// contextWindow 50000, maxTokens 60000 → availableInput = -10000 → contextPercent = 100.
+			const messagesWithSmallContent = [
+				...messages.slice(0, -1),
+				{ ...messages[messages.length - 1], content: "" },
+			]
+
+			const result = await manageContext({
+				messages: messagesWithSmallContent,
+				totalTokens: 1,
+				contextWindow: 50000,
+				maxTokens: 60000,
+				apiHandler: mockApiHandler,
+				autoCondenseContext: true,
+				autoCondenseContextPercent: 80,
+				systemPrompt: "System prompt",
+				taskId,
+				profileThresholds: {},
+				currentProfileId: "default",
+			})
+
+			expect(summarizeSpy).toHaveBeenCalled()
+			expect(result).toMatchObject({
+				summary: mockSummary,
+				prevContextTokens: 1,
+			})
+
+			summarizeSpy.mockRestore()
+		})
+
 		it("manageContext summarizes based on available input space, end-to-end", async () => {
 			const mockSummary = "Available-input summary"
 			const mockSummarizeResponse: condenseModule.SummarizeResponse = {

From e45155d3b8f784a8af6d2676141e00a691474390 Mon Sep 17 00:00:00 2001
From: Bertan Ari <bertanari@microsoft.com>
Date: Wed, 24 Jun 2026 09:00:17 -0700
Subject: [PATCH 3/5] =?UTF-8?q?chore(vscode-lm):=20address=20review=20?=
 =?UTF-8?q?=E2=80=94=20drop=20changeset,=20fix=20condense-window=20guard?=
 =?UTF-8?q?=20test?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Remove .changeset/vscode-lm-condense-fix.md (changesets are maintainer-managed per AGENTS.md; CodeRabbit flagged).

- Fix getCondenseContextWindow() non-positive-guard test so the selector family (claude-opus-4.8) drives the lookup and the zeroed static row actually exercises the maxInputTokens > 0 guard before falling back.
---
 .changeset/vscode-lm-condense-fix.md          |  5 -----
 src/api/providers/__tests__/vscode-lm.spec.ts | 12 ++++++++----
 2 files changed, 8 insertions(+), 9 deletions(-)
 delete mode 100644 .changeset/vscode-lm-condense-fix.md

diff --git a/.changeset/vscode-lm-condense-fix.md b/.changeset/vscode-lm-condense-fix.md
deleted file mode 100644
index a592361786..0000000000
--- a/.changeset/vscode-lm-condense-fix.md
+++ /dev/null
@@ -1,5 +0,0 @@
----
-"zoo-code": patch
----
-
-Fix unreliable automatic context condensing on the VS Code LM (vscode-lm) provider. The condense gate now treats the provider's `maxTokens: -1` (unlimited) as the default output reserve and measures usage against available input space, and a new `getCondenseContextWindow()` seam makes the gate use the curated model `maxInputTokens` instead of the inflated live window. Also refreshes the VS Code LM model catalog and default model.
diff --git a/src/api/providers/__tests__/vscode-lm.spec.ts b/src/api/providers/__tests__/vscode-lm.spec.ts
index 42779e813c..eb026e8169 100644
--- a/src/api/providers/__tests__/vscode-lm.spec.ts
+++ b/src/api/providers/__tests__/vscode-lm.spec.ts
@@ -501,9 +501,11 @@ describe("VsCodeLmHandler", () => {
 			noFamilyHandler.dispose()
 		})
 
-		it("falls back to the live window when the static row exists but maxInputTokens is non-positive", () => {
+		it("falls back to the derived window when the static row exists but maxInputTokens is non-positive", () => {
 			// Guard sub-condition: a curated family is found but its maxInputTokens is <= 0 (corrupt/zeroed).
-			// The gate must NOT trust that value; it falls through to the live runtime window instead.
+			// With the selector family `claude-opus-4.8` and no live client, the zeroed static row is the one
+			// consulted, so the `maxInputTokens > 0` guard fails and the gate falls back to the derived window
+			// from getModel().info.contextWindow (sane defaults here, since there is no live client).
 			const family = "claude-opus-4.8"
 			const original = vscodeLlmModels[family].maxInputTokens
 			try {
@@ -511,9 +513,11 @@ describe("VsCodeLmHandler", () => {
 				const guardHandler = new VsCodeLmHandler({
 					vsCodeLmModelSelector: { vendor: "copilot", family },
 				})
-				guardHandler["client"] = mockLanguageModelChat as unknown as vscode.LanguageModelChat
+				// Leave the client unset so `family` resolves from the selector (claude-opus-4.8),
+				// forcing the zeroed static row to be read instead of a live client's family.
+				guardHandler["client"] = null
 				expect(guardHandler.getCondenseContextWindow()).toBe(guardHandler.getModel().info.contextWindow)
-				expect(guardHandler.getCondenseContextWindow()).toBe(mockLanguageModelChat.maxInputTokens)
+				expect(guardHandler.getCondenseContextWindow()).toBe(openAiModelInfoSaneDefaults.contextWindow)
 				guardHandler.dispose()
 			} finally {
 				;(vscodeLlmModels[family] as { maxInputTokens: number }).maxInputTokens = original

From d128a5c1d5dbd2a6c5fc360c32f8dd2bcac89b0d Mon Sep 17 00:00:00 2001
From: Bertan Ari <bertanari@microsoft.com>
Date: Fri, 26 Jun 2026 06:14:42 -0700
Subject: [PATCH 4/5] fix(vscode-lm): scope available-input condense
 denominator to vscode-lm; address review

Address review feedback from edelauna on #710:
- Scope the available-input-space condense percent denominator to vscode-lm only (via the getCondenseContextWindow seam); all other providers keep dividing by the full context window. The maxTokens:-1 reserve guard remains global.
- Correct the misleading useSelectedModel comment: the gate's primary window is getCondenseContextWindow() (static maxInputTokens), not getModel().info.contextWindow.
- Strengthen the listed-family test with a claude-opus-4.8 case (contextWindow != maxInputTokens) to catch a field swap.
---
 .../__tests__/context-management.spec.ts      | 131 ++++++++++++++++--
 src/core/context-management/index.ts          |  62 ++++++---
 src/core/task/Task.ts                         |  11 ++
 .../hooks/__tests__/useSelectedModel.spec.ts  |  20 +++
 .../components/ui/hooks/useSelectedModel.ts   |   7 +-
 5 files changed, 203 insertions(+), 28 deletions(-)

diff --git a/src/core/context-management/__tests__/context-management.spec.ts b/src/core/context-management/__tests__/context-management.spec.ts
index b05ebefabc..ba0a77aacf 100644
--- a/src/core/context-management/__tests__/context-management.spec.ts
+++ b/src/core/context-management/__tests__/context-management.spec.ts
@@ -1508,8 +1508,8 @@ describe("Context Management", () => {
 		})
 
 		it("should return false when context percent is below threshold", () => {
-			// Usage is measured against available input space (contextWindow - maxTokens reserve).
-			// available = 100000 - 30000 = 70000; 30000 / 70000 ≈ 43% < 50% threshold.
+			// Available-input denominator (opt-in): available = 100000 - 30000 = 70000;
+			// 30000 / 70000 ≈ 43% < 50% threshold.
 			const result = willManageContext({
 				totalTokens: 30000,
 				contextWindow: 100000,
@@ -1519,6 +1519,7 @@ describe("Context Management", () => {
 				profileThresholds: {},
 				currentProfileId: "default",
 				lastMessageTokens: 0,
+				useAvailableInputForContextPercent: true,
 			})
 			expect(result).toBe(false)
 		})
@@ -1604,10 +1605,10 @@ describe("Context Management", () => {
 		})
 
 		it("should include lastMessageTokens in the calculation", () => {
-			// Usage is measured against available input space (contextWindow - maxTokens reserve).
-			// available = 100000 - 30000 = 70000.
+			// Available-input denominator (opt-in): available = 100000 - 30000 = 70000.
 			// Without lastMessageTokens: 34000 / 70000 ≈ 48.6% < 50% threshold.
 			// With lastMessageTokens: (34000 + 2000) / 70000 ≈ 51.4% ≥ 50% threshold.
+			// (Against the full window both cases are < 50%, so this case requires the opt-in flag.)
 			const resultWithoutLastMessage = willManageContext({
 				totalTokens: 34000,
 				contextWindow: 100000,
@@ -1617,6 +1618,7 @@ describe("Context Management", () => {
 				profileThresholds: {},
 				currentProfileId: "default",
 				lastMessageTokens: 0,
+				useAvailableInputForContextPercent: true,
 			})
 			expect(resultWithoutLastMessage).toBe(false)
 
@@ -1629,6 +1631,7 @@ describe("Context Management", () => {
 				profileThresholds: {},
 				currentProfileId: "default",
 				lastMessageTokens: 2000, // Pushes usage over 50% of available input
+				useAvailableInputForContextPercent: true,
 			})
 			expect(resultWithLastMessage).toBe(true)
 		})
@@ -1728,12 +1731,13 @@ describe("Context Management", () => {
 	})
 
 	/**
-	 * Regression tests: the condense gate must measure usage against available input space
-	 * (contextWindow - reserved output), not the raw context window. This keeps the gate in
-	 * lockstep with the UI context gauge and ensures it actually fires for providers like
-	 * vscode-lm that report maxTokens: -1.
+	 * Regression tests for the opt-in available-input denominator (vscode-lm). With the flag on,
+	 * the condense gate measures usage against available input space (contextWindow - reserved
+	 * output), not the raw context window. This keeps the gate in lockstep with the UI context
+	 * gauge and ensures it actually fires for vscode-lm, which reports maxTokens: -1. The default
+	 * (full-window) behavior for every other provider is covered by the sibling describe below.
 	 */
-	describe("contextPercent uses available input space (regression)", () => {
+	describe("contextPercent uses available input space (opt-in, regression)", () => {
 		const createModelInfo = (contextWindow: number, maxTokens?: number): ModelInfo => ({
 			contextWindow,
 			supportsPromptCache: true,
@@ -1761,6 +1765,7 @@ describe("Context Management", () => {
 				profileThresholds: {},
 				currentProfileId: "default",
 				lastMessageTokens: 0,
+				useAvailableInputForContextPercent: true,
 			})
 			expect(result).toBe(true)
 		})
@@ -1776,6 +1781,7 @@ describe("Context Management", () => {
 				profileThresholds: {},
 				currentProfileId: "default",
 				lastMessageTokens: 0,
+				useAvailableInputForContextPercent: true,
 			})
 			expect(result).toBe(false)
 		})
@@ -1792,6 +1798,7 @@ describe("Context Management", () => {
 				profileThresholds: {},
 				currentProfileId: "default",
 				lastMessageTokens: 0,
+				useAvailableInputForContextPercent: true,
 			})
 			expect(result).toBe(true)
 		})
@@ -1809,6 +1816,7 @@ describe("Context Management", () => {
 				profileThresholds: {},
 				currentProfileId: "default",
 				lastMessageTokens: 0,
+				useAvailableInputForContextPercent: true,
 			})
 			// contextPercent === 100 >= 80 threshold → true.
 			expect(result).toBe(true)
@@ -1825,6 +1833,7 @@ describe("Context Management", () => {
 				profileThresholds: {},
 				currentProfileId: "default",
 				lastMessageTokens: 0,
+				useAvailableInputForContextPercent: true,
 			})
 			expect(result).toBe(true)
 		})
@@ -1866,6 +1875,7 @@ describe("Context Management", () => {
 				taskId,
 				profileThresholds: {},
 				currentProfileId: "default",
+				useAvailableInputForContextPercent: true,
 			})
 
 			expect(summarizeSpy).toHaveBeenCalled()
@@ -1914,6 +1924,7 @@ describe("Context Management", () => {
 				taskId,
 				profileThresholds: {},
 				currentProfileId: "default",
+				useAvailableInputForContextPercent: true,
 			})
 
 			expect(summarizeSpy).toHaveBeenCalled()
@@ -1925,4 +1936,106 @@ describe("Context Management", () => {
 			summarizeSpy.mockRestore()
 		})
 	})
+
+	/**
+	 * Scoping tests: the available-input denominator is opt-in. By default (flag omitted), the gate
+	 * divides by the FULL context window, exactly as every non-vscode-lm provider did before the
+	 * vscode-lm fix. The maxTokens: -1 reserve guard, however, remains global on the default path.
+	 */
+	describe("contextPercent denominator is opt-in (default = full window)", () => {
+		const messages: ApiMessage[] = [
+			{ role: "user", content: "First message" },
+			{ role: "assistant", content: "Second message" },
+			{ role: "user", content: "Third message" },
+			{ role: "assistant", content: "Fourth message" },
+			{ role: "user", content: "Fifth message" },
+		]
+
+		it("willManageContext divides by the full window when the flag is omitted (default)", () => {
+			// Same inputs as the regression block: contextWindow 200000, reserve 64000, totalTokens 100000.
+			// Default (full window): 100000 / 200000 = 50% < 70% threshold → false. Under the opt-in
+			// available-input math it would be ≈ 73.5% and fire — this proves the scoping.
+			const result = willManageContext({
+				totalTokens: 100000,
+				contextWindow: 200000,
+				maxTokens: 64000,
+				autoCondenseContext: true,
+				autoCondenseContextPercent: 70,
+				profileThresholds: {},
+				currentProfileId: "default",
+				lastMessageTokens: 0,
+			})
+			expect(result).toBe(false)
+		})
+
+		it("willManageContext fires on the same inputs when the opt-in flag is true", () => {
+			// Identical inputs, flag on: available input 136000 → 100000 / 136000 ≈ 73.5% ≥ 70% → true.
+			const result = willManageContext({
+				totalTokens: 100000,
+				contextWindow: 200000,
+				maxTokens: 64000,
+				autoCondenseContext: true,
+				autoCondenseContextPercent: 70,
+				profileThresholds: {},
+				currentProfileId: "default",
+				lastMessageTokens: 0,
+				useAvailableInputForContextPercent: true,
+			})
+			expect(result).toBe(true)
+		})
+
+		it("keeps the maxTokens:-1 reserve guard on the default (full-window) path", () => {
+			// The reserve guard is global, independent of the percent denominator. With auto-condense
+			// off, only the allowedTokens path can fire: allowedTokens = 100000 * 0.9 - 8192 = 81808;
+			// totalTokens 85000 > 81808 → true. (A naive `maxTokens || DEFAULT` keeping -1 would break this.)
+			const result = willManageContext({
+				totalTokens: 85000,
+				contextWindow: 100000,
+				maxTokens: -1,
+				autoCondenseContext: false,
+				autoCondenseContextPercent: 50,
+				profileThresholds: {},
+				currentProfileId: "default",
+				lastMessageTokens: 0,
+			})
+			expect(result).toBe(true)
+		})
+
+		it("manageContext does NOT summarize on the default path where the opt-in math would have", async () => {
+			// contextWindow 200000, reserve 64000, totalTokens 100000. Default full-window percent is
+			// 50% < 70% threshold, and allowedTokens = 200000 * 0.9 - 64000 = 116000 > 100000, so neither
+			// condense nor truncation runs. With the opt-in flag this same case summarizes (asserted above
+			// in the regression block), proving the default path reverts to pre-fix behavior.
+			const summarizeSpy = vi.spyOn(condenseModule, "summarizeConversation")
+
+			const messagesWithSmallContent = [
+				...messages.slice(0, -1),
+				{ ...messages[messages.length - 1], content: "" },
+			]
+
+			const result = await manageContext({
+				messages: messagesWithSmallContent,
+				totalTokens: 100000,
+				contextWindow: 200000,
+				maxTokens: 64000,
+				apiHandler: mockApiHandler,
+				autoCondenseContext: true,
+				autoCondenseContextPercent: 70,
+				systemPrompt: "System prompt",
+				taskId,
+				profileThresholds: {},
+				currentProfileId: "default",
+			})
+
+			expect(summarizeSpy).not.toHaveBeenCalled()
+			expect(result).toEqual({
+				messages: messagesWithSmallContent,
+				summary: "",
+				cost: 0,
+				prevContextTokens: 100000,
+			})
+
+			summarizeSpy.mockRestore()
+		})
+	})
 })
diff --git a/src/core/context-management/index.ts b/src/core/context-management/index.ts
index cc4d5ba3d7..b4d89487fd 100644
--- a/src/core/context-management/index.ts
+++ b/src/core/context-management/index.ts
@@ -147,6 +147,14 @@ export type WillManageContextOptions = {
 	profileThresholds: Record<string, number>
 	currentProfileId: string
 	lastMessageTokens: number
+	/**
+	 * Opt-in: measure the condense percentage against the available input space
+	 * (contextWindow - reserved output) instead of the full context window. Only providers
+	 * whose advertised live window is inflated relative to the usable input ceiling (vscode-lm,
+	 * which exposes the seam via getCondenseContextWindow) set this. All other providers leave it
+	 * undefined and keep dividing by the full context window (original behavior).
+	 */
+	useAvailableInputForContextPercent?: boolean
 }
 
 /**
@@ -167,6 +175,7 @@ export function willManageContext({
 	profileThresholds,
 	currentProfileId,
 	lastMessageTokens,
+	useAvailableInputForContextPercent,
 }: WillManageContextOptions): boolean {
 	if (!autoCondenseContext) {
 		// When auto-condense is disabled, only truncation can occur
@@ -194,14 +203,20 @@ export function willManageContext({
 		// Invalid values fall back to global setting (effectiveThreshold already set)
 	}
 
-	// Measure usage against the available input space (context window minus the
-	// reserved output budget), matching the context gauge shown in the UI. Reserved
-	// output tokens can never hold conversation context, so this is the meaningful
-	// "how full is my usable input" figure. When the reserve is unknown/unlimited
-	// (e.g., vscode-lm reports -1), fall back to the full context window.
-	const reservedForOutput = maxTokens && maxTokens > 0 ? maxTokens : 0
-	const availableInputTokens = contextWindow - reservedForOutput
-	const contextPercent = availableInputTokens > 0 ? (100 * prevContextTokens) / availableInputTokens : 100
+	// By default, measure usage against the full context window (original behavior shared by all
+	// providers). Opt-in (vscode-lm via getCondenseContextWindow) measures against the available
+	// input space (context window minus the reserved output budget) to match the UI context gauge,
+	// because that provider's advertised window is inflated relative to its usable input ceiling.
+	// Reserved output tokens can never hold conversation context. When the reserve is
+	// unknown/unlimited (e.g., vscode-lm reports -1), fall back to the full context window.
+	let contextPercent: number
+	if (useAvailableInputForContextPercent) {
+		const reservedForOutput = maxTokens && maxTokens > 0 ? maxTokens : 0
+		const availableInputTokens = contextWindow - reservedForOutput
+		contextPercent = availableInputTokens > 0 ? (100 * prevContextTokens) / availableInputTokens : 100
+	} else {
+		contextPercent = (100 * prevContextTokens) / contextWindow
+	}
 	return contextPercent >= effectiveThreshold || prevContextTokens > allowedTokens
 }
 
@@ -238,6 +253,14 @@ export type ContextManagementOptions = {
 	cwd?: string
 	/** Optional controller for file access validation */
 	rooIgnoreController?: RooIgnoreController
+	/**
+	 * Opt-in: measure the condense percentage against the available input space
+	 * (contextWindow - reserved output) instead of the full context window. Only providers
+	 * whose advertised live window is inflated relative to the usable input ceiling (vscode-lm,
+	 * which exposes the seam via getCondenseContextWindow) set this. All other providers leave it
+	 * undefined and keep dividing by the full context window (original behavior).
+	 */
+	useAvailableInputForContextPercent?: boolean
 }
 
 export type ContextManagementResult = SummarizeResponse & {
@@ -271,6 +294,7 @@ export async function manageContext({
 	filesReadByRoo,
 	cwd,
 	rooIgnoreController,
+	useAvailableInputForContextPercent,
 }: ContextManagementOptions): Promise<ContextManagementResult> {
 	let error: string | undefined
 	let errorDetails: string | undefined
@@ -314,14 +338,20 @@ export async function manageContext({
 	// If no specific threshold is found for the profile, fall back to global setting
 
 	if (autoCondenseContext) {
-		// Measure usage against the available input space (context window minus the
-		// reserved output budget), matching the context gauge shown in the UI. Reserved
-		// output tokens can never hold conversation context, so this is the meaningful
-		// "how full is my usable input" figure. When the reserve is unknown/unlimited
-		// (e.g., vscode-lm reports -1), fall back to the full context window.
-		const reservedForOutput = maxTokens && maxTokens > 0 ? maxTokens : 0
-		const availableInputTokens = contextWindow - reservedForOutput
-		const contextPercent = availableInputTokens > 0 ? (100 * prevContextTokens) / availableInputTokens : 100
+		// By default, measure usage against the full context window (original behavior shared by all
+		// providers). Opt-in (vscode-lm via getCondenseContextWindow) measures against the available
+		// input space (context window minus the reserved output budget) to match the UI context gauge,
+		// because that provider's advertised window is inflated relative to its usable input ceiling.
+		// Reserved output tokens can never hold conversation context. When the reserve is
+		// unknown/unlimited (e.g., vscode-lm reports -1), fall back to the full context window.
+		let contextPercent: number
+		if (useAvailableInputForContextPercent) {
+			const reservedForOutput = maxTokens && maxTokens > 0 ? maxTokens : 0
+			const availableInputTokens = contextWindow - reservedForOutput
+			contextPercent = availableInputTokens > 0 ? (100 * prevContextTokens) / availableInputTokens : 100
+		} else {
+			contextPercent = (100 * prevContextTokens) / contextWindow
+		}
 		if (contextPercent >= effectiveThreshold || prevContextTokens > allowedTokens) {
 			// Attempt to intelligently condense the context
 			const result = await summarizeConversation({
diff --git a/src/core/task/Task.ts b/src/core/task/Task.ts
index 183b0cd191..81a2435452 100644
--- a/src/core/task/Task.ts
+++ b/src/core/task/Task.ts
@@ -3743,6 +3743,10 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 		// provider returns undefined here and falls back to modelInfo.contextWindow.
 		const contextWindow = this.api.getCondenseContextWindow?.() ?? modelInfo.contextWindow
 
+		// Only vscode-lm implements getCondenseContextWindow, so its presence scopes the
+		// available-input condense denominator to that provider; all others use the full window.
+		const useAvailableInputForContextPercent = typeof this.api.getCondenseContextWindow === "function"
+
 		// Get the current profile ID using the helper method
 		const currentProfileId = this.getCurrentProfileId(state)
 
@@ -3810,6 +3814,7 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 				currentProfileId,
 				metadata,
 				environmentDetails,
+				useAvailableInputForContextPercent,
 			})
 
 			if (truncateResult.messages !== this.apiConversationHistory) {
@@ -3942,6 +3947,10 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 			// provider returns undefined here and falls back to modelInfo.contextWindow.
 			const contextWindow = this.api.getCondenseContextWindow?.() ?? modelInfo.contextWindow
 
+			// Only vscode-lm implements getCondenseContextWindow, so its presence scopes the
+			// available-input condense denominator to that provider; all others use the full window.
+			const useAvailableInputForContextPercent = typeof this.api.getCondenseContextWindow === "function"
+
 			// Get the current profile ID using the helper method
 			const currentProfileId = this.getCurrentProfileId(state)
 			// Check if context management will likely run (threshold check)
@@ -3965,6 +3974,7 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 				profileThresholds,
 				currentProfileId,
 				lastMessageTokens,
+				useAvailableInputForContextPercent,
 			})
 
 			// Send condenseTaskContextStarted BEFORE manageContext to show in-progress indicator
@@ -4047,6 +4057,7 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 					filesReadByRoo: contextMgmtFilesReadByRoo,
 					cwd: this.cwd,
 					rooIgnoreController: this.rooIgnoreController,
+					useAvailableInputForContextPercent,
 				})
 				if (truncateResult.messages !== this.apiConversationHistory) {
 					await this.overwriteApiConversationHistory(truncateResult.messages)
diff --git a/webview-ui/src/components/ui/hooks/__tests__/useSelectedModel.spec.ts b/webview-ui/src/components/ui/hooks/__tests__/useSelectedModel.spec.ts
index 6f3f1edc4f..3ffe85e144 100644
--- a/webview-ui/src/components/ui/hooks/__tests__/useSelectedModel.spec.ts
+++ b/webview-ui/src/components/ui/hooks/__tests__/useSelectedModel.spec.ts
@@ -811,6 +811,26 @@ describe("useSelectedModel", () => {
 			expect(result.current.info?.supportsImages).toBe(false)
 		})
 
+		it("pins a divergent family's contextWindow to maxInputTokens, not its advertised window", () => {
+			// claude-opus-4.8 is the row where contextWindow (679560) and maxInputTokens (197897) DIFFER.
+			// The hook must surface maxInputTokens so the bar matches the condense gate; a field swap to
+			// the advertised contextWindow would be caught here (unlike the default model where they match).
+			const family = "claude-opus-4.8"
+			const apiConfiguration: ProviderSettings = {
+				apiProvider: "vscode-lm",
+				vsCodeLmModelSelector: { vendor: "copilot", family },
+			}
+
+			const wrapper = createWrapper()
+			const { result } = renderHook(() => useSelectedModel(apiConfiguration), { wrapper })
+
+			expect(result.current.provider).toBe("vscode-lm")
+			expect(result.current.id).toBe(`copilot/${family}`)
+			expect(result.current.info?.contextWindow).toBe(vscodeLlmModels[family].maxInputTokens) // 197897
+			expect(result.current.info?.contextWindow).not.toBe(vscodeLlmModels[family].contextWindow) // NOT 679560
+			expect(result.current.info?.supportsImages).toBe(false)
+		})
+
 		it("falls back to the default model's window for an unlisted family (NOT 128000)", () => {
 			const apiConfiguration: ProviderSettings = {
 				apiProvider: "vscode-lm",
diff --git a/webview-ui/src/components/ui/hooks/useSelectedModel.ts b/webview-ui/src/components/ui/hooks/useSelectedModel.ts
index 8c8a5360da..a5940ba7d3 100644
--- a/webview-ui/src/components/ui/hooks/useSelectedModel.ts
+++ b/webview-ui/src/components/ui/hooks/useSelectedModel.ts
@@ -315,9 +315,10 @@ function getSelectedModel({
 			// auto-condense never fires (the gate uses the live window).
 			const listedModel =
 				vscodeLlmModels[modelFamily as keyof typeof vscodeLlmModels] ?? vscodeLlmModels[vscodeLlmDefaultModelId]
-			// contextWindow MUST equal maxInputTokens: that is the exact value the gate consumes via
-			// getModel().info.contextWindow = Math.max(0, client.maxInputTokens) in src/api/providers/vscode-lm.ts,
-			// so the UI bar and the condense gate share a single source of truth.
+			// Set contextWindow = maxInputTokens so the UI bar matches what the condense gate uses for
+			// vscode-lm. The gate's primary window comes from getCondenseContextWindow() (which returns the
+			// static-table maxInputTokens); getModel().info.contextWindow is only the fallback. Sharing
+			// maxInputTokens keeps the bar and the gate on a single source of truth.
 			const info: ModelInfo = {
 				...openAiModelInfoSaneDefaults,
 				...listedModel,

From 30389d3ac8a8c693446c702126a4752d0638abee Mon Sep 17 00:00:00 2001
From: Bertan Ari <bertanari@microsoft.com>
Date: Fri, 26 Jun 2026 13:06:48 -0700
Subject: [PATCH 5/5] docs(vscode-lm): tighten auto-condense comments

Simplify comments added in PR #710 to be brief and rationale-focused; no logic, assertions, or test values changed.
---
 .../types/src/__tests__/vscode-llm.spec.ts    |  7 +-
 packages/types/src/providers/vscode-llm.ts    | 11 +--
 src/api/index.ts                              | 10 +--
 src/api/providers/__tests__/vscode-lm.spec.ts | 19 +++--
 src/api/providers/vscode-lm.ts                | 13 ++--
 .../__tests__/context-management.spec.ts      | 69 +++++--------------
 src/core/context-management/index.ts          | 30 +++-----
 src/core/task/Task.ts                         | 16 ++---
 .../chat/__tests__/TaskHeader.spec.tsx        |  7 +-
 .../hooks/__tests__/useSelectedModel.spec.ts  |  8 +--
 .../components/ui/hooks/useSelectedModel.ts   | 12 ++--
 11 files changed, 60 insertions(+), 142 deletions(-)

diff --git a/packages/types/src/__tests__/vscode-llm.spec.ts b/packages/types/src/__tests__/vscode-llm.spec.ts
index 7a2eabddf7..041bc3c8b4 100644
--- a/packages/types/src/__tests__/vscode-llm.spec.ts
+++ b/packages/types/src/__tests__/vscode-llm.spec.ts
@@ -3,11 +3,8 @@ import { vscodeLlmModels, vscodeLlmDefaultModelId } from "../providers/vscode-ll
 
 describe("vscodeLlmModels", () => {
 	it("exposes the opus-4.8 row with its measured maxInputTokens and contextWindow", () => {
-		// The VS Code LM API exposes only maxInputTokens; that is the value the UI reads from this
-		// table (useSelectedModel.ts). For claude-opus-4.8 the two fields intentionally DIVERGE:
-		// maxInputTokens (197897) is the enforced input ceiling, while contextWindow (679560) records
-		// the larger advertised window. The UI reads maxInputTokens, so the divergence is a deliberate
-		// tripwire — assert the actual on-disk literals rather than forcing equality.
+		// claude-opus-4.8 intentionally diverges: maxInputTokens (197897) is the enforced ceiling the
+		// UI reads, contextWindow (679560) the advertised window. Assert the on-disk literals as a tripwire.
 		expect(vscodeLlmModels).toHaveProperty("claude-opus-4.8")
 		expect(vscodeLlmModels["claude-opus-4.8"].contextWindow).toBe(679560)
 		expect(vscodeLlmModels["claude-opus-4.8"].maxInputTokens).toBe(197897)
diff --git a/packages/types/src/providers/vscode-llm.ts b/packages/types/src/providers/vscode-llm.ts
index 46df75fac9..5286b0ed28 100644
--- a/packages/types/src/providers/vscode-llm.ts
+++ b/packages/types/src/providers/vscode-llm.ts
@@ -5,15 +5,8 @@ export type VscodeLlmModelId = keyof typeof vscodeLlmModels
 export const vscodeLlmDefaultModelId: VscodeLlmModelId = "claude-sonnet-4.5"
 
 // Curated VS Code LM (GitHub Copilot) model catalog.
-//
-// The VS Code LM API only exposes `maxInputTokens` per model; it does NOT report a separate
-// total context window. For each row, `contextWindow` records the model's advertised window
-// while `maxInputTokens` is the enforced input ceiling the UI actually reads (via
-// useSelectedModel.ts) and the condense gate measures against. For most rows the two values
-// match. They intentionally DIVERGE only where the provider advertises a larger window than the
-// usable input ceiling (e.g. claude-opus-4.8): keeping both fields lets the context bar and the
-// auto-condense gate stay on a single source of truth (maxInputTokens) without losing the real
-// advertised window.
+// The API exposes only `maxInputTokens`; the UI and condense gate read that. `contextWindow` is
+// the advertised window, kept for rows where it diverges from the ceiling (e.g. claude-opus-4.8).
 export const vscodeLlmModels = {
 	"claude-opus-4.8": {
 		contextWindow: 679560,
diff --git a/src/api/index.ts b/src/api/index.ts
index 00201b0d21..9e4ba3bfb5 100644
--- a/src/api/index.ts
+++ b/src/api/index.ts
@@ -108,13 +108,9 @@ export interface ApiHandler {
 	getModel(): { id: string; info: ModelInfo }
 
 	/**
-	 * Optional: the context window (in tokens) to use for context-management /
-	 * auto-condense decisions, when it must differ from getModel().info.contextWindow.
-	 *
-	 * Only the VS Code LM (Copilot) provider overrides this, to measure usage against the
-	 * model's static `maxInputTokens` instead of the inflated live window VS Code reports.
-	 * Other providers leave it undefined and callers fall back to getModel().info.contextWindow,
-	 * so their behavior is unchanged.
+	 * Optional context window for context-management / auto-condense when it must differ from
+	 * getModel().info.contextWindow. Only VS Code LM overrides it (static `maxInputTokens` vs its
+	 * inflated live window); others leave it undefined and callers fall back.
 	 */
 	getCondenseContextWindow?(): number
 
diff --git a/src/api/providers/__tests__/vscode-lm.spec.ts b/src/api/providers/__tests__/vscode-lm.spec.ts
index eb026e8169..5c425b5e25 100644
--- a/src/api/providers/__tests__/vscode-lm.spec.ts
+++ b/src/api/providers/__tests__/vscode-lm.spec.ts
@@ -443,8 +443,7 @@ describe("VsCodeLmHandler", () => {
 		})
 
 		it("should use the full advertised maxInputTokens without an upper cap", async () => {
-			// VS Code can report a very large advertised window; getModel surfaces it as-is
-			// (Math.max(0, maxInputTokens)) rather than clamping to a smaller default.
+			// A large advertised window is surfaced as-is, not clamped to a smaller default.
 			const mockModel = { ...mockLanguageModelChat, maxInputTokens: 936000 }
 			;(vscode.lm.selectChatModels as Mock).mockResolvedValue([mockModel])
 			handler["client"] = null
@@ -485,15 +484,15 @@ describe("VsCodeLmHandler", () => {
 		})
 
 		it("falls back to the live model context window for families not in the static table", () => {
-			// test-family is not a curated row, so the gate uses the live runtime window.
+			// Not a curated row, so the gate uses the live runtime window.
 			handler["client"] = mockLanguageModelChat as unknown as vscode.LanguageModelChat
 			expect(handler.getCondenseContextWindow()).toBe(handler.getModel().info.contextWindow)
 			expect(handler.getCondenseContextWindow()).toBe(mockLanguageModelChat.maxInputTokens)
 		})
 
 		it("falls back to the live window when no family is resolvable (no client, no selector family)", () => {
-			// With neither a client nor a selector family, `family` is undefined, so the static-table
-			// lookup is skipped entirely and the gate uses getModel().info.contextWindow (fallback info).
+			// No client and no selector family means `family` is undefined, so the gate skips the
+			// static lookup and uses getModel().info.contextWindow.
 			const noFamilyHandler = new VsCodeLmHandler({ vsCodeLmModelSelector: { vendor: "copilot" } })
 			noFamilyHandler["client"] = null
 			expect(noFamilyHandler.getCondenseContextWindow()).toBe(noFamilyHandler.getModel().info.contextWindow)
@@ -502,10 +501,8 @@ describe("VsCodeLmHandler", () => {
 		})
 
 		it("falls back to the derived window when the static row exists but maxInputTokens is non-positive", () => {
-			// Guard sub-condition: a curated family is found but its maxInputTokens is <= 0 (corrupt/zeroed).
-			// With the selector family `claude-opus-4.8` and no live client, the zeroed static row is the one
-			// consulted, so the `maxInputTokens > 0` guard fails and the gate falls back to the derived window
-			// from getModel().info.contextWindow (sane defaults here, since there is no live client).
+			// A curated row exists but its maxInputTokens is <= 0, so the `> 0` guard fails and the gate
+			// falls back to getModel().info.contextWindow.
 			const family = "claude-opus-4.8"
 			const original = vscodeLlmModels[family].maxInputTokens
 			try {
@@ -513,8 +510,8 @@ describe("VsCodeLmHandler", () => {
 				const guardHandler = new VsCodeLmHandler({
 					vsCodeLmModelSelector: { vendor: "copilot", family },
 				})
-				// Leave the client unset so `family` resolves from the selector (claude-opus-4.8),
-				// forcing the zeroed static row to be read instead of a live client's family.
+				// Leave the client unset so `family` resolves from the selector, forcing the zeroed
+				// static row to be read instead of a live client's family.
 				guardHandler["client"] = null
 				expect(guardHandler.getCondenseContextWindow()).toBe(guardHandler.getModel().info.contextWindow)
 				expect(guardHandler.getCondenseContextWindow()).toBe(openAiModelInfoSaneDefaults.contextWindow)
diff --git a/src/api/providers/vscode-lm.ts b/src/api/providers/vscode-lm.ts
index d730658b44..9adcefa972 100644
--- a/src/api/providers/vscode-lm.ts
+++ b/src/api/providers/vscode-lm.ts
@@ -563,15 +563,10 @@ export class VsCodeLmHandler extends BaseProvider implements SingleCompletionHan
 	}
 
 	/**
-	 * Context window used for auto-condense / context-management decisions.
-	 *
-	 * VS Code's LM API reports `client.maxInputTokens` as Copilot's *advertised* window,
-	 * which is far larger than the realistic usable window; relying on it keeps auto-condense
-	 * from ever firing. For condense decisions we instead measure usage against the curated
-	 * static table's `maxInputTokens` — the same value the context bar uses via
-	 * `useSelectedModel` — so the gate and the gauge stay on one source of truth.
-	 *
-	 * Falls back to the live runtime window when the selected model isn't in the static table.
+	 * Context window for auto-condense. The API's advertised `client.maxInputTokens` is far larger
+	 * than usable, so relying on it stops auto-condense from firing; measure against the curated
+	 * static table's `maxInputTokens` instead (the same value the bar uses). Fall back to the live
+	 * window when the model isn't in the table.
 	 */
 	getCondenseContextWindow(): number {
 		const family = this.client?.family ?? this.options.vsCodeLmModelSelector?.family
diff --git a/src/core/context-management/__tests__/context-management.spec.ts b/src/core/context-management/__tests__/context-management.spec.ts
index ba0a77aacf..89797b045f 100644
--- a/src/core/context-management/__tests__/context-management.spec.ts
+++ b/src/core/context-management/__tests__/context-management.spec.ts
@@ -810,8 +810,7 @@ describe("Context Management", () => {
 			const summarizeSpy = vi.spyOn(condenseModule, "summarizeConversation")
 
 			const modelInfo = createModelInfo(100000, 30000)
-			// Usage is measured against available input space (contextWindow - maxTokens reserve).
-			// available = 100000 - 30000 = 70000; 30000 / 70000 ≈ 43% < 50% threshold.
+			// Usage measured against available input space stays below the threshold.
 			const contextWindow = modelInfo.contextWindow
 			const totalTokens = 30000
 			const messagesWithSmallContent = [
@@ -1508,8 +1507,7 @@ describe("Context Management", () => {
 		})
 
 		it("should return false when context percent is below threshold", () => {
-			// Available-input denominator (opt-in): available = 100000 - 30000 = 70000;
-			// 30000 / 70000 ≈ 43% < 50% threshold.
+			// Opt-in available-input denominator: usage stays below threshold.
 			const result = willManageContext({
 				totalTokens: 30000,
 				contextWindow: 100000,
@@ -1525,12 +1523,7 @@ describe("Context Management", () => {
 		})
 
 		it("should treat a negative maxTokens (vscode-lm reports -1) as the default reserve, not -1", () => {
-			// vscode-lm reports maxTokens: -1 (unlimited). A naive `maxTokens || DEFAULT` keeps -1,
-			// which would make allowedTokens balloon past the window and skew the percentage. The
-			// guard must treat -1 like an unknown reserve (ANTHROPIC_DEFAULT_MAX_TOKENS for the
-			// allowed-tokens math, zero reserve for the available-input percentage).
-			// With autoCondenseContext disabled, only the allowedTokens path can trigger:
-			// allowedTokens = 100000 * 0.9 - 8192 = 81808; totalTokens 85000 > 81808 → true.
+			// A -1 reserve must be treated as unknown (default reserve), not kept as -1.
 			const result = willManageContext({
 				totalTokens: 85000,
 				contextWindow: 100000,
@@ -1605,10 +1598,7 @@ describe("Context Management", () => {
 		})
 
 		it("should include lastMessageTokens in the calculation", () => {
-			// Available-input denominator (opt-in): available = 100000 - 30000 = 70000.
-			// Without lastMessageTokens: 34000 / 70000 ≈ 48.6% < 50% threshold.
-			// With lastMessageTokens: (34000 + 2000) / 70000 ≈ 51.4% ≥ 50% threshold.
-			// (Against the full window both cases are < 50%, so this case requires the opt-in flag.)
+			// Adding lastMessageTokens pushes usage over the threshold (opt-in available-input denominator).
 			const resultWithoutLastMessage = willManageContext({
 				totalTokens: 34000,
 				contextWindow: 100000,
@@ -1731,11 +1721,8 @@ describe("Context Management", () => {
 	})
 
 	/**
-	 * Regression tests for the opt-in available-input denominator (vscode-lm). With the flag on,
-	 * the condense gate measures usage against available input space (contextWindow - reserved
-	 * output), not the raw context window. This keeps the gate in lockstep with the UI context
-	 * gauge and ensures it actually fires for vscode-lm, which reports maxTokens: -1. The default
-	 * (full-window) behavior for every other provider is covered by the sibling describe below.
+	 * Regression: with the opt-in flag on, the gate measures usage against available input space
+	 * (contextWindow - reserved output) so it stays in lockstep with the UI gauge and fires for vscode-lm.
 	 */
 	describe("contextPercent uses available input space (opt-in, regression)", () => {
 		const createModelInfo = (contextWindow: number, maxTokens?: number): ModelInfo => ({
@@ -1753,9 +1740,7 @@ describe("Context Management", () => {
 		]
 
 		it("willManageContext measures the percentage against available input, not the full window", () => {
-			// contextWindow 200000, reserve 64000 → available input 136000.
-			// totalTokens 100000 → 100000 / 136000 ≈ 73.5%, which clears the 70% threshold.
-			// Against the full window it would be only 50% and the gate would (wrongly) stay closed.
+			// Dividing by available input clears the threshold; the full window would keep the gate closed.
 			const result = willManageContext({
 				totalTokens: 100000,
 				contextWindow: 200000,
@@ -1771,7 +1756,7 @@ describe("Context Management", () => {
 		})
 
 		it("willManageContext stays below threshold when usage is under available input", () => {
-			// available input 136000; totalTokens 90000 → ≈ 66.2% < 70% threshold.
+			// Usage under available input stays below threshold.
 			const result = willManageContext({
 				totalTokens: 90000,
 				contextWindow: 200000,
@@ -1787,8 +1772,7 @@ describe("Context Management", () => {
 		})
 
 		it("willManageContext treats an unlimited (-1) reserve as zero reserve for the percentage", () => {
-			// vscode-lm reports maxTokens: -1. The percentage denominator should fall back to the
-			// full window (zero reserve): 150000 / 200000 = 75% ≥ 70% threshold.
+			// A -1 reserve falls back to the full window (zero reserve) for the percentage.
 			const result = willManageContext({
 				totalTokens: 150000,
 				contextWindow: 200000,
@@ -1804,9 +1788,7 @@ describe("Context Management", () => {
 		})
 
 		it("willManageContext falls back to 100% when the reserve is >= the window (availableInput <= 0)", () => {
-			// When maxTokens (reserve) >= contextWindow, availableInputTokens = window - reserve <= 0.
-			// The denominator guard must short-circuit contextPercent to 100 rather than divide by
-			// a non-positive number, so the gate fires regardless of the (tiny) totalTokens.
+			// Non-positive available input must short-circuit contextPercent to 100 rather than divide.
 			const result = willManageContext({
 				totalTokens: 1,
 				contextWindow: 50000,
@@ -1818,12 +1800,11 @@ describe("Context Management", () => {
 				lastMessageTokens: 0,
 				useAvailableInputForContextPercent: true,
 			})
-			// contextPercent === 100 >= 80 threshold → true.
 			expect(result).toBe(true)
 		})
 
 		it("willManageContext falls back to 100% when the reserve exactly equals the window (availableInput === 0)", () => {
-			// Boundary: reserve === window → availableInputTokens === 0, still the FALSE branch (> 0 is false).
+			// Boundary: reserve === window → available input 0, still the non-positive guard.
 			const result = willManageContext({
 				totalTokens: 1,
 				contextWindow: 50000,
@@ -1839,9 +1820,7 @@ describe("Context Management", () => {
 		})
 
 		it("manageContext summarizes via the 100% fallback when the reserve >= the window (availableInput <= 0)", async () => {
-			// Mirror the willManageContext edge for the manageContext path: reserve >= window forces
-			// contextPercent to 100 via the denominator guard, so summarization triggers even though
-			// totalTokens is small relative to the raw window.
+			// reserve >= window forces contextPercent to 100, so summarization triggers.
 			const mockSummary = "Reserve-exceeds-window summary"
 			const mockSummarizeResponse: condenseModule.SummarizeResponse = {
 				messages: [
@@ -1857,7 +1836,6 @@ describe("Context Management", () => {
 				.spyOn(condenseModule, "summarizeConversation")
 				.mockResolvedValue(mockSummarizeResponse)
 
-			// contextWindow 50000, maxTokens 60000 → availableInput = -10000 → contextPercent = 100.
 			const messagesWithSmallContent = [
 				...messages.slice(0, -1),
 				{ ...messages[messages.length - 1], content: "" },
@@ -1904,8 +1882,7 @@ describe("Context Management", () => {
 				.mockResolvedValue(mockSummarizeResponse)
 
 			const modelInfo = createModelInfo(200000, 64000)
-			// available input 136000; totalTokens 100000 → ≈ 73.5% ≥ 70% threshold, but only 50% of
-			// the raw window. The end-to-end path must trigger summarization on the available-input math.
+			// Clears the threshold against available input but not the raw window; end-to-end must summarize.
 			const totalTokens = 100000
 			const messagesWithSmallContent = [
 				...messages.slice(0, -1),
@@ -1938,9 +1915,8 @@ describe("Context Management", () => {
 	})
 
 	/**
-	 * Scoping tests: the available-input denominator is opt-in. By default (flag omitted), the gate
-	 * divides by the FULL context window, exactly as every non-vscode-lm provider did before the
-	 * vscode-lm fix. The maxTokens: -1 reserve guard, however, remains global on the default path.
+	 * Scoping: the available-input denominator is opt-in; default divides by the full window.
+	 * The maxTokens: -1 reserve guard stays global on the default path.
 	 */
 	describe("contextPercent denominator is opt-in (default = full window)", () => {
 		const messages: ApiMessage[] = [
@@ -1952,9 +1928,7 @@ describe("Context Management", () => {
 		]
 
 		it("willManageContext divides by the full window when the flag is omitted (default)", () => {
-			// Same inputs as the regression block: contextWindow 200000, reserve 64000, totalTokens 100000.
-			// Default (full window): 100000 / 200000 = 50% < 70% threshold → false. Under the opt-in
-			// available-input math it would be ≈ 73.5% and fire — this proves the scoping.
+			// Default divides by the full window, staying below threshold where the opt-in math would fire.
 			const result = willManageContext({
 				totalTokens: 100000,
 				contextWindow: 200000,
@@ -1969,7 +1943,7 @@ describe("Context Management", () => {
 		})
 
 		it("willManageContext fires on the same inputs when the opt-in flag is true", () => {
-			// Identical inputs, flag on: available input 136000 → 100000 / 136000 ≈ 73.5% ≥ 70% → true.
+			// Same inputs, flag on: dividing by available input clears the threshold.
 			const result = willManageContext({
 				totalTokens: 100000,
 				contextWindow: 200000,
@@ -1985,9 +1959,7 @@ describe("Context Management", () => {
 		})
 
 		it("keeps the maxTokens:-1 reserve guard on the default (full-window) path", () => {
-			// The reserve guard is global, independent of the percent denominator. With auto-condense
-			// off, only the allowedTokens path can fire: allowedTokens = 100000 * 0.9 - 8192 = 81808;
-			// totalTokens 85000 > 81808 → true. (A naive `maxTokens || DEFAULT` keeping -1 would break this.)
+			// The -1 reserve guard is global, independent of the percent denominator.
 			const result = willManageContext({
 				totalTokens: 85000,
 				contextWindow: 100000,
@@ -2002,10 +1974,7 @@ describe("Context Management", () => {
 		})
 
 		it("manageContext does NOT summarize on the default path where the opt-in math would have", async () => {
-			// contextWindow 200000, reserve 64000, totalTokens 100000. Default full-window percent is
-			// 50% < 70% threshold, and allowedTokens = 200000 * 0.9 - 64000 = 116000 > 100000, so neither
-			// condense nor truncation runs. With the opt-in flag this same case summarizes (asserted above
-			// in the regression block), proving the default path reverts to pre-fix behavior.
+			// Default full-window math leaves this case below threshold; the opt-in flag would summarize it.
 			const summarizeSpy = vi.spyOn(condenseModule, "summarizeConversation")
 
 			const messagesWithSmallContent = [
diff --git a/src/core/context-management/index.ts b/src/core/context-management/index.ts
index b4d89487fd..ed2ee6be5f 100644
--- a/src/core/context-management/index.ts
+++ b/src/core/context-management/index.ts
@@ -148,11 +148,8 @@ export type WillManageContextOptions = {
 	currentProfileId: string
 	lastMessageTokens: number
 	/**
-	 * Opt-in: measure the condense percentage against the available input space
-	 * (contextWindow - reserved output) instead of the full context window. Only providers
-	 * whose advertised live window is inflated relative to the usable input ceiling (vscode-lm,
-	 * which exposes the seam via getCondenseContextWindow) set this. All other providers leave it
-	 * undefined and keep dividing by the full context window (original behavior).
+	 * Opt-in (vscode-lm): measure the condense percentage against available input space
+	 * (contextWindow - reserved output) instead of the full window. Others leave it undefined.
 	 */
 	useAvailableInputForContextPercent?: boolean
 }
@@ -203,12 +200,8 @@ export function willManageContext({
 		// Invalid values fall back to global setting (effectiveThreshold already set)
 	}
 
-	// By default, measure usage against the full context window (original behavior shared by all
-	// providers). Opt-in (vscode-lm via getCondenseContextWindow) measures against the available
-	// input space (context window minus the reserved output budget) to match the UI context gauge,
-	// because that provider's advertised window is inflated relative to its usable input ceiling.
-	// Reserved output tokens can never hold conversation context. When the reserve is
-	// unknown/unlimited (e.g., vscode-lm reports -1), fall back to the full context window.
+	// Default: divide by the full context window. Opt-in (vscode-lm) divides by available input
+	// (window minus reserved output); an unknown/unlimited reserve (-1) falls back to the full window.
 	let contextPercent: number
 	if (useAvailableInputForContextPercent) {
 		const reservedForOutput = maxTokens && maxTokens > 0 ? maxTokens : 0
@@ -254,11 +247,8 @@ export type ContextManagementOptions = {
 	/** Optional controller for file access validation */
 	rooIgnoreController?: RooIgnoreController
 	/**
-	 * Opt-in: measure the condense percentage against the available input space
-	 * (contextWindow - reserved output) instead of the full context window. Only providers
-	 * whose advertised live window is inflated relative to the usable input ceiling (vscode-lm,
-	 * which exposes the seam via getCondenseContextWindow) set this. All other providers leave it
-	 * undefined and keep dividing by the full context window (original behavior).
+	 * Opt-in (vscode-lm): measure the condense percentage against available input space
+	 * (contextWindow - reserved output) instead of the full window. Others leave it undefined.
 	 */
 	useAvailableInputForContextPercent?: boolean
 }
@@ -338,12 +328,8 @@ export async function manageContext({
 	// If no specific threshold is found for the profile, fall back to global setting
 
 	if (autoCondenseContext) {
-		// By default, measure usage against the full context window (original behavior shared by all
-		// providers). Opt-in (vscode-lm via getCondenseContextWindow) measures against the available
-		// input space (context window minus the reserved output budget) to match the UI context gauge,
-		// because that provider's advertised window is inflated relative to its usable input ceiling.
-		// Reserved output tokens can never hold conversation context. When the reserve is
-		// unknown/unlimited (e.g., vscode-lm reports -1), fall back to the full context window.
+		// Default: divide by the full context window. Opt-in (vscode-lm) divides by available input
+		// (window minus reserved output); an unknown/unlimited reserve (-1) falls back to the full window.
 		let contextPercent: number
 		if (useAvailableInputForContextPercent) {
 			const reservedForOutput = maxTokens && maxTokens > 0 ? maxTokens : 0
diff --git a/src/core/task/Task.ts b/src/core/task/Task.ts
index 81a2435452..ce9e5bcec2 100644
--- a/src/core/task/Task.ts
+++ b/src/core/task/Task.ts
@@ -3738,13 +3738,9 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 			settings: this.apiConfiguration,
 		})
 
-		// VS Code LM (Copilot) measures usage against its static-table maxInputTokens, not the
-		// inflated live window, so context management runs in line with the context bar. Every other
-		// provider returns undefined here and falls back to modelInfo.contextWindow.
+		// vscode-lm condenses against its static-table maxInputTokens (not the inflated live window);
+		// only it implements getCondenseContextWindow, so others fall back to the full contextWindow.
 		const contextWindow = this.api.getCondenseContextWindow?.() ?? modelInfo.contextWindow
-
-		// Only vscode-lm implements getCondenseContextWindow, so its presence scopes the
-		// available-input condense denominator to that provider; all others use the full window.
 		const useAvailableInputForContextPercent = typeof this.api.getCondenseContextWindow === "function"
 
 		// Get the current profile ID using the helper method
@@ -3942,13 +3938,9 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 				settings: this.apiConfiguration,
 			})
 
-			// VS Code LM (Copilot) measures usage against its static-table maxInputTokens, not the
-			// inflated live window, so context management runs in line with the context bar. Every other
-			// provider returns undefined here and falls back to modelInfo.contextWindow.
+			// vscode-lm condenses against its static-table maxInputTokens (not the inflated live window);
+			// only it implements getCondenseContextWindow, so others fall back to the full contextWindow.
 			const contextWindow = this.api.getCondenseContextWindow?.() ?? modelInfo.contextWindow
-
-			// Only vscode-lm implements getCondenseContextWindow, so its presence scopes the
-			// available-input condense denominator to that provider; all others use the full window.
 			const useAvailableInputForContextPercent = typeof this.api.getCondenseContextWindow === "function"
 
 			// Get the current profile ID using the helper method
diff --git a/webview-ui/src/components/chat/__tests__/TaskHeader.spec.tsx b/webview-ui/src/components/chat/__tests__/TaskHeader.spec.tsx
index c845382632..252cbbb722 100644
--- a/webview-ui/src/components/chat/__tests__/TaskHeader.spec.tsx
+++ b/webview-ui/src/components/chat/__tests__/TaskHeader.spec.tsx
@@ -269,11 +269,8 @@ describe("TaskHeader", () => {
 		})
 
 		it("should treat a negative maxTokens (vscode-lm reports -1) as zero reserve", () => {
-			// vscode-lm reports maxTokens: -1 (unlimited). A naive `maxTokens || 0` keeps -1,
-			// which would inflate available input space and skew the percentage. The guard must
-			// treat -1 as a zero reserve so available space == contextWindow.
-			// contextTokens = 250, contextWindow = 1000, reservedForOutput = 0
-			// Percentage = 250 / 1000 * 100 = 25%
+			// vscode-lm reports maxTokens: -1 (unlimited). The guard must treat that negative reserve
+			// as zero, so available space == contextWindow rather than being inflated by a kept -1.
 			mockModelInfo = { contextWindow: 1000, maxTokens: -1 }
 			mockMaxOutputTokens = -1
 
diff --git a/webview-ui/src/components/ui/hooks/__tests__/useSelectedModel.spec.ts b/webview-ui/src/components/ui/hooks/__tests__/useSelectedModel.spec.ts
index 3ffe85e144..f4fd51cffc 100644
--- a/webview-ui/src/components/ui/hooks/__tests__/useSelectedModel.spec.ts
+++ b/webview-ui/src/components/ui/hooks/__tests__/useSelectedModel.spec.ts
@@ -812,9 +812,8 @@ describe("useSelectedModel", () => {
 		})
 
 		it("pins a divergent family's contextWindow to maxInputTokens, not its advertised window", () => {
-			// claude-opus-4.8 is the row where contextWindow (679560) and maxInputTokens (197897) DIFFER.
-			// The hook must surface maxInputTokens so the bar matches the condense gate; a field swap to
-			// the advertised contextWindow would be caught here (unlike the default model where they match).
+			// claude-opus-4.8 is the row where contextWindow and maxInputTokens differ; a field swap to
+			// the advertised window would be caught here.
 			const family = "claude-opus-4.8"
 			const apiConfiguration: ProviderSettings = {
 				apiProvider: "vscode-lm",
@@ -840,8 +839,7 @@ describe("useSelectedModel", () => {
 			const wrapper = createWrapper()
 			const { result } = renderHook(() => useSelectedModel(apiConfiguration), { wrapper })
 
-			// On a family miss we must NOT fall back to openAiModelInfoSaneDefaults' 128000 window,
-			// which would diverge from the gate. Instead, use the default model's maxInputTokens.
+			// A family miss must not use the 128000 sane-defaults window; use the default model's instead.
 			expect(result.current.info?.contextWindow).not.toBe(128000)
 			expect(result.current.info?.contextWindow).toBe(vscodeLlmModels[vscodeLlmDefaultModelId].maxInputTokens)
 			expect(result.current.info?.supportsImages).toBe(false)
diff --git a/webview-ui/src/components/ui/hooks/useSelectedModel.ts b/webview-ui/src/components/ui/hooks/useSelectedModel.ts
index a5940ba7d3..ddc1a19755 100644
--- a/webview-ui/src/components/ui/hooks/useSelectedModel.ts
+++ b/webview-ui/src/components/ui/hooks/useSelectedModel.ts
@@ -310,15 +310,13 @@ function getSelectedModel({
 				? `${apiConfiguration.vsCodeLmModelSelector.vendor}/${apiConfiguration.vsCodeLmModelSelector.family}`
 				: vscodeLlmDefaultModelId
 			const modelFamily = apiConfiguration?.vsCodeLmModelSelector?.family ?? vscodeLlmDefaultModelId
-			// On a family miss, fall back to the default model entry instead of openAiModelInfoSaneDefaults,
-			// whose 128K contextWindow would diverge from the gate and make the bar read >100% while
-			// auto-condense never fires (the gate uses the live window).
+			// On a family miss, fall back to the default model entry, not openAiModelInfoSaneDefaults
+			// (whose 128K contextWindow would diverge from the gate and skew the bar).
 			const listedModel =
 				vscodeLlmModels[modelFamily as keyof typeof vscodeLlmModels] ?? vscodeLlmModels[vscodeLlmDefaultModelId]
-			// Set contextWindow = maxInputTokens so the UI bar matches what the condense gate uses for
-			// vscode-lm. The gate's primary window comes from getCondenseContextWindow() (which returns the
-			// static-table maxInputTokens); getModel().info.contextWindow is only the fallback. Sharing
-			// maxInputTokens keeps the bar and the gate on a single source of truth.
+			// Set contextWindow = maxInputTokens so the UI bar shares one source of truth with the gate,
+			// whose primary window is getCondenseContextWindow() (static-table maxInputTokens); this
+			// info.contextWindow is only the gate's fallback.
 			const info: ModelInfo = {
 				...openAiModelInfoSaneDefaults,
 				...listedModel,