diff --git a/packages/types/src/__tests__/vscode-llm.spec.ts b/packages/types/src/__tests__/vscode-llm.spec.ts
new file mode 100644
index 0000000000..041bc3c8b4
--- /dev/null
+++ b/packages/types/src/__tests__/vscode-llm.spec.ts
@@ -0,0 +1,33 @@
+import { describe, it, expect } from "vitest"
+import { vscodeLlmModels, vscodeLlmDefaultModelId } from "../providers/vscode-llm.js"
+
+describe("vscodeLlmModels", () => {
+	it("exposes the opus-4.8 row with its measured maxInputTokens and contextWindow", () => {
+		// claude-opus-4.8 intentionally diverges: maxInputTokens (197897) is the enforced ceiling the
+		// UI reads, contextWindow (679560) the advertised window. Assert the on-disk literals as a tripwire.
+		expect(vscodeLlmModels).toHaveProperty("claude-opus-4.8")
+		expect(vscodeLlmModels["claude-opus-4.8"].contextWindow).toBe(679560)
+		expect(vscodeLlmModels["claude-opus-4.8"].maxInputTokens).toBe(197897)
+	})
+	it("preserves the real window for models captured with a smaller maxInputTokens", () => {
+		expect(vscodeLlmModels["gpt-4o-mini"].maxInputTokens).toBe(12078)
+		expect(vscodeLlmModels["gpt-4o-mini"].contextWindow).toBe(12078)
+		expect(vscodeLlmModels["gemini-2.5-pro"].contextWindow).toBe(108594)
+		expect(vscodeLlmModels["gemini-2.5-pro"].maxInputTokens).toBe(108594)
+	})
+	it("keeps both window fields populated and positive for every row", () => {
+		for (const [family, model] of Object.entries(vscodeLlmModels)) {
+			expect(model.contextWindow, `${family}: contextWindow must be a positive integer`).toBeGreaterThan(0)
+			expect(model.maxInputTokens, `${family}: maxInputTokens must be a positive integer`).toBeGreaterThan(0)
+		}
+	})
+	it("excludes fabricated/internal/alias families and the dropped legacy rows", () => {
+		expect(vscodeLlmModels).not.toHaveProperty("claude-opus-4.7-high")
+		expect(vscodeLlmModels).not.toHaveProperty("claude-3.5-sonnet")
+		expect(vscodeLlmModels).not.toHaveProperty("claude-4-sonnet")
+	})
+	it("defaults to a model id that exists in the table", () => {
+		expect(vscodeLlmDefaultModelId).toBe("claude-sonnet-4.5")
+		expect(vscodeLlmModels).toHaveProperty(vscodeLlmDefaultModelId)
+	})
+})
diff --git a/packages/types/src/providers/vscode-llm.ts b/packages/types/src/providers/vscode-llm.ts
index efe0691913..5286b0ed28 100644
--- a/packages/types/src/providers/vscode-llm.ts
+++ b/packages/types/src/providers/vscode-llm.ts
@@ -2,189 +2,215 @@ import type { ModelInfo } from "../model.js"
 
 export type VscodeLlmModelId = keyof typeof vscodeLlmModels
 
-export const vscodeLlmDefaultModelId: VscodeLlmModelId = "claude-3.5-sonnet"
+export const vscodeLlmDefaultModelId: VscodeLlmModelId = "claude-sonnet-4.5"
 
-// https://docs.cline.bot/provider-config/vscode-language-model-api
+// Curated VS Code LM (GitHub Copilot) model catalog.
+// The API exposes only `maxInputTokens`; the UI and condense gate read that. `contextWindow` is
+// the advertised window, kept for rows where it diverges from the ceiling (e.g. claude-opus-4.8).
 export const vscodeLlmModels = {
-	"gpt-3.5-turbo": {
-		contextWindow: 12114,
-		supportsImages: false,
+	"claude-opus-4.8": {
+		contextWindow: 679560,
+		supportsImages: true,
 		supportsPromptCache: false,
 		inputPrice: 0,
 		outputPrice: 0,
-		family: "gpt-3.5-turbo",
-		version: "gpt-3.5-turbo-0613",
-		name: "GPT 3.5 Turbo",
+		family: "claude-opus-4.8",
+		version: "claude-opus-4.8",
+		name: "Claude Opus 4.8",
 		supportsToolCalling: true,
-		maxInputTokens: 12114,
+		maxInputTokens: 197897,
 	},
-	"gpt-4o-mini": {
-		contextWindow: 12115,
-		supportsImages: false,
+	"claude-opus-4.7": {
+		contextWindow: 197897,
+		supportsImages: true,
 		supportsPromptCache: false,
 		inputPrice: 0,
 		outputPrice: 0,
-		family: "gpt-4o-mini",
-		version: "gpt-4o-mini-2024-07-18",
-		name: "GPT-4o mini",
+		family: "claude-opus-4.7",
+		version: "claude-opus-4.7",
+		name: "Claude Opus 4.7",
 		supportsToolCalling: true,
-		maxInputTokens: 12115,
+		maxInputTokens: 197897,
 	},
-	"gpt-4": {
-		contextWindow: 28501,
-		supportsImages: false,
+	"claude-opus-4.6": {
+		contextWindow: 197897,
+		supportsImages: true,
 		supportsPromptCache: false,
 		inputPrice: 0,
 		outputPrice: 0,
-		family: "gpt-4",
-		version: "gpt-4-0613",
-		name: "GPT 4",
+		family: "claude-opus-4.6",
+		version: "claude-opus-4.6",
+		name: "Claude Opus 4.6",
 		supportsToolCalling: true,
-		maxInputTokens: 28501,
+		maxInputTokens: 197897,
 	},
-	"gpt-4-0125-preview": {
-		contextWindow: 63826,
-		supportsImages: false,
+	"claude-opus-4.5": {
+		contextWindow: 167790,
+		supportsImages: true,
 		supportsPromptCache: false,
 		inputPrice: 0,
 		outputPrice: 0,
-		family: "gpt-4-turbo",
-		version: "gpt-4-0125-preview",
-		name: "GPT 4 Turbo",
+		family: "claude-opus-4.5",
+		version: "claude-opus-4.5",
+		name: "Claude Opus 4.5",
 		supportsToolCalling: true,
-		maxInputTokens: 63826,
+		maxInputTokens: 167790,
 	},
-	"gpt-4o": {
-		contextWindow: 63827,
+	"claude-sonnet-4.6": {
+		contextWindow: 197896,
 		supportsImages: true,
 		supportsPromptCache: false,
 		inputPrice: 0,
 		outputPrice: 0,
-		family: "gpt-4o",
-		version: "gpt-4o-2024-11-20",
-		name: "GPT-4o",
+		family: "claude-sonnet-4.6",
+		version: "claude-sonnet-4.6",
+		name: "Claude Sonnet 4.6",
 		supportsToolCalling: true,
-		maxInputTokens: 63827,
+		maxInputTokens: 197896,
 	},
-	o1: {
-		contextWindow: 19827,
-		supportsImages: false,
+	"claude-sonnet-4.5": {
+		contextWindow: 167790,
+		supportsImages: true,
 		supportsPromptCache: false,
 		inputPrice: 0,
 		outputPrice: 0,
-		family: "o1-ga",
-		version: "o1-2024-12-17",
-		name: "o1 (Preview)",
+		family: "claude-sonnet-4.5",
+		version: "claude-sonnet-4.5",
+		name: "Claude Sonnet 4.5",
 		supportsToolCalling: true,
-		maxInputTokens: 19827,
+		maxInputTokens: 167790,
 	},
-	"o3-mini": {
-		contextWindow: 63827,
-		supportsImages: false,
+	"claude-haiku-4.5": {
+		contextWindow: 135790,
+		supportsImages: true,
 		supportsPromptCache: false,
 		inputPrice: 0,
 		outputPrice: 0,
-		family: "o3-mini",
-		version: "o3-mini-2025-01-31",
-		name: "o3-mini",
+		family: "claude-haiku-4.5",
+		version: "claude-haiku-4.5",
+		name: "Claude Haiku 4.5",
 		supportsToolCalling: true,
-		maxInputTokens: 63827,
+		maxInputTokens: 135790,
 	},
-	"claude-3.5-sonnet": {
-		contextWindow: 81638,
+	"gpt-5.5": {
+		contextWindow: 268426,
 		supportsImages: true,
 		supportsPromptCache: false,
 		inputPrice: 0,
 		outputPrice: 0,
-		family: "claude-3.5-sonnet",
-		version: "claude-3.5-sonnet",
-		name: "Claude 3.5 Sonnet",
+		family: "gpt-5.5",
+		version: "gpt-5.5",
+		name: "GPT-5.5",
 		supportsToolCalling: true,
-		maxInputTokens: 81638,
+		maxInputTokens: 268426,
 	},
-	"claude-4-sonnet": {
-		contextWindow: 128000,
+	"gpt-5.4": {
+		contextWindow: 268424,
 		supportsImages: true,
 		supportsPromptCache: false,
 		inputPrice: 0,
 		outputPrice: 0,
-		family: "claude-sonnet-4",
-		version: "claude-sonnet-4",
-		name: "Claude Sonnet 4",
+		family: "gpt-5.4",
+		version: "gpt-5.4",
+		name: "GPT-5.4",
 		supportsToolCalling: true,
-		maxInputTokens: 111836,
+		maxInputTokens: 268424,
 	},
-	"gemini-2.0-flash-001": {
-		contextWindow: 127827,
+	"gpt-5.4-mini": {
+		contextWindow: 271790,
 		supportsImages: true,
 		supportsPromptCache: false,
 		inputPrice: 0,
 		outputPrice: 0,
-		family: "gemini-2.0-flash",
-		version: "gemini-2.0-flash-001",
-		name: "Gemini 2.0 Flash",
-		supportsToolCalling: false,
-		maxInputTokens: 127827,
+		family: "gpt-5.4-mini",
+		version: "gpt-5.4-mini",
+		name: "GPT-5.4 mini",
+		supportsToolCalling: true,
+		maxInputTokens: 271790,
 	},
-	"gemini-2.5-pro": {
-		contextWindow: 128000,
+	"gpt-5.3-codex": {
+		contextWindow: 271790,
 		supportsImages: true,
 		supportsPromptCache: false,
 		inputPrice: 0,
 		outputPrice: 0,
-		family: "gemini-2.5-pro",
-		version: "gemini-2.5-pro-preview-03-25",
-		name: "Gemini 2.5 Pro (Preview)",
+		family: "gpt-5.3-codex",
+		version: "gpt-5.3-codex",
+		name: "GPT-5.3-Codex",
 		supportsToolCalling: true,
-		maxInputTokens: 108637,
+		maxInputTokens: 271790,
 	},
-	"o4-mini": {
-		contextWindow: 128000,
+	"gpt-5-mini": {
+		contextWindow: 127790,
+		supportsImages: true,
+		supportsPromptCache: false,
+		inputPrice: 0,
+		outputPrice: 0,
+		family: "gpt-5-mini",
+		version: "gpt-5-mini",
+		name: "GPT-5 mini",
+		supportsToolCalling: true,
+		maxInputTokens: 127790,
+	},
+	"gpt-4o-mini": {
+		contextWindow: 12078,
 		supportsImages: false,
 		supportsPromptCache: false,
 		inputPrice: 0,
 		outputPrice: 0,
-		family: "o4-mini",
-		version: "o4-mini-2025-04-16",
-		name: "o4-mini (Preview)",
+		family: "gpt-4o-mini",
+		version: "gpt-4o-mini-2024-07-18",
+		name: "GPT-4o mini",
 		supportsToolCalling: true,
-		maxInputTokens: 111452,
+		maxInputTokens: 12078,
 	},
-	"gpt-4.1": {
-		contextWindow: 128000,
+	"gemini-3.1-pro-preview": {
+		contextWindow: 197897,
 		supportsImages: true,
 		supportsPromptCache: false,
 		inputPrice: 0,
 		outputPrice: 0,
-		family: "gpt-4.1",
-		version: "gpt-4.1-2025-04-14",
-		name: "GPT-4.1 (Preview)",
+		family: "gemini-3.1-pro-preview",
+		version: "gemini-3.1-pro-preview",
+		name: "Gemini 3.1 Pro (Preview)",
 		supportsToolCalling: true,
-		maxInputTokens: 111452,
+		maxInputTokens: 197897,
 	},
-	"gpt-5-mini": {
-		contextWindow: 128000,
+	"gemini-3.5-flash": {
+		contextWindow: 197895,
 		supportsImages: true,
 		supportsPromptCache: false,
 		inputPrice: 0,
 		outputPrice: 0,
-		family: "gpt-5-mini",
-		version: "gpt-5-mini",
-		name: "GPT-5 mini (Preview)",
+		family: "gemini-3.5-flash",
+		version: "gemini-3.5-flash",
+		name: "Gemini 3.5 Flash",
+		supportsToolCalling: true,
+		maxInputTokens: 197895,
+	},
+	"gemini-3-flash": {
+		contextWindow: 108594,
+		supportsImages: true,
+		supportsPromptCache: false,
+		inputPrice: 0,
+		outputPrice: 0,
+		family: "gemini-3-flash",
+		version: "gemini-3-flash-preview",
+		name: "Gemini 3 Flash (Preview)",
 		supportsToolCalling: true,
-		maxInputTokens: 108637,
+		maxInputTokens: 108594,
 	},
-	"gpt-5": {
-		contextWindow: 128000,
+	"gemini-2.5-pro": {
+		contextWindow: 108594,
 		supportsImages: true,
 		supportsPromptCache: false,
 		inputPrice: 0,
 		outputPrice: 0,
-		family: "gpt-5",
-		version: "gpt-5",
-		name: "GPT-5 (Preview)",
+		family: "gemini-2.5-pro",
+		version: "gemini-2.5-pro",
+		name: "Gemini 2.5 Pro",
 		supportsToolCalling: true,
-		maxInputTokens: 108637,
+		maxInputTokens: 108594,
 	},
 } as const satisfies Record<
 	string,
diff --git a/src/api/index.ts b/src/api/index.ts
index 0c901f8e23..9e4ba3bfb5 100644
--- a/src/api/index.ts
+++ b/src/api/index.ts
@@ -107,6 +107,13 @@ export interface ApiHandler {
 
 	getModel(): { id: string; info: ModelInfo }
 
+	/**
+	 * Optional context window for context-management / auto-condense when it must differ from
+	 * getModel().info.contextWindow. Only VS Code LM overrides it (static `maxInputTokens` vs its
+	 * inflated live window); others leave it undefined and callers fall back.
+	 */
+	getCondenseContextWindow?(): number
+
 	/**
 	 * Counts tokens for content blocks
 	 * All providers extend BaseProvider which provides a default tiktoken implementation,
diff --git a/src/api/providers/__tests__/vscode-lm.spec.ts b/src/api/providers/__tests__/vscode-lm.spec.ts
index a79a5a4bcb..5c425b5e25 100644
--- a/src/api/providers/__tests__/vscode-lm.spec.ts
+++ b/src/api/providers/__tests__/vscode-lm.spec.ts
@@ -63,6 +63,7 @@ import * as vscode from "vscode"
 import { VsCodeLmHandler } from "../vscode-lm"
 import type { ApiHandlerOptions } from "../../../shared/api"
 import type { Anthropic } from "@anthropic-ai/sdk"
+import { openAiModelInfoSaneDefaults, vscodeLlmModels } from "@roo-code/types"
 
 const mockLanguageModelChat = {
 	id: "test-model",
@@ -440,6 +441,85 @@ describe("VsCodeLmHandler", () => {
 			const model = handler.getModel()
 			expect(model.info).toBeDefined()
 		})
+
+		it("should use the full advertised maxInputTokens without an upper cap", async () => {
+			// A large advertised window is surfaced as-is, not clamped to a smaller default.
+			const mockModel = { ...mockLanguageModelChat, maxInputTokens: 936000 }
+			;(vscode.lm.selectChatModels as Mock).mockResolvedValue([mockModel])
+			handler["client"] = null
+			await handler.initializeClient()
+
+			const model = handler.getModel()
+			expect(model.info.contextWindow).toBe(936000)
+		})
+
+		it("should pass through a small maxInputTokens unchanged", async () => {
+			const mockModel = { ...mockLanguageModelChat, maxInputTokens: 4096 }
+			;(vscode.lm.selectChatModels as Mock).mockResolvedValue([mockModel])
+			handler["client"] = null
+			await handler.initializeClient()
+
+			const model = handler.getModel()
+			expect(model.info.contextWindow).toBe(4096)
+		})
+
+		it("should fall back to sane defaults when maxInputTokens is not a number", async () => {
+			const mockModel = { ...mockLanguageModelChat, maxInputTokens: undefined as unknown as number }
+			;(vscode.lm.selectChatModels as Mock).mockResolvedValue([mockModel])
+			handler["client"] = null
+			await handler.initializeClient()
+
+			const model = handler.getModel()
+			expect(model.info.contextWindow).toBe(openAiModelInfoSaneDefaults.contextWindow)
+		})
+	})
+
+	describe("getCondenseContextWindow", () => {
+		it("uses the static-table maxInputTokens for a known VS Code LM family", () => {
+			const opusHandler = new VsCodeLmHandler({
+				vsCodeLmModelSelector: { vendor: "copilot", family: "claude-opus-4.8" },
+			})
+			expect(opusHandler.getCondenseContextWindow()).toBe(vscodeLlmModels["claude-opus-4.8"].maxInputTokens)
+			opusHandler.dispose()
+		})
+
+		it("falls back to the live model context window for families not in the static table", () => {
+			// Not a curated row, so the gate uses the live runtime window.
+			handler["client"] = mockLanguageModelChat as unknown as vscode.LanguageModelChat
+			expect(handler.getCondenseContextWindow()).toBe(handler.getModel().info.contextWindow)
+			expect(handler.getCondenseContextWindow()).toBe(mockLanguageModelChat.maxInputTokens)
+		})
+
+		it("falls back to the live window when no family is resolvable (no client, no selector family)", () => {
+			// No client and no selector family means `family` is undefined, so the gate skips the
+			// static lookup and uses getModel().info.contextWindow.
+			const noFamilyHandler = new VsCodeLmHandler({ vsCodeLmModelSelector: { vendor: "copilot" } })
+			noFamilyHandler["client"] = null
+			expect(noFamilyHandler.getCondenseContextWindow()).toBe(noFamilyHandler.getModel().info.contextWindow)
+			expect(noFamilyHandler.getCondenseContextWindow()).toBe(openAiModelInfoSaneDefaults.contextWindow)
+			noFamilyHandler.dispose()
+		})
+
+		it("falls back to the derived window when the static row exists but maxInputTokens is non-positive", () => {
+			// A curated row exists but its maxInputTokens is <= 0, so the `> 0` guard fails and the gate
+			// falls back to getModel().info.contextWindow.
+			const family = "claude-opus-4.8"
+			const original = vscodeLlmModels[family].maxInputTokens
+			try {
+				;(vscodeLlmModels[family] as { maxInputTokens: number }).maxInputTokens = 0
+				const guardHandler = new VsCodeLmHandler({
+					vsCodeLmModelSelector: { vendor: "copilot", family },
+				})
+				// Leave the client unset so `family` resolves from the selector, forcing the zeroed
+				// static row to be read instead of a live client's family.
+				guardHandler["client"] = null
+				expect(guardHandler.getCondenseContextWindow()).toBe(guardHandler.getModel().info.contextWindow)
+				expect(guardHandler.getCondenseContextWindow()).toBe(openAiModelInfoSaneDefaults.contextWindow)
+				guardHandler.dispose()
+			} finally {
+				;(vscodeLlmModels[family] as { maxInputTokens: number }).maxInputTokens = original
+			}
+		})
 	})
 
 	describe("countTokens", () => {
diff --git a/src/api/providers/vscode-lm.ts b/src/api/providers/vscode-lm.ts
index 8fb564a9d5..9adcefa972 100644
--- a/src/api/providers/vscode-lm.ts
+++ b/src/api/providers/vscode-lm.ts
@@ -2,7 +2,7 @@ import { Anthropic } from "@anthropic-ai/sdk"
 import * as vscode from "vscode"
 import OpenAI from "openai"
 
-import { type ModelInfo, openAiModelInfoSaneDefaults } from "@roo-code/types"
+import { type ModelInfo, openAiModelInfoSaneDefaults, vscodeLlmModels } from "@roo-code/types"
 
 import type { ApiHandlerOptions } from "../../shared/api"
 import { SELECTOR_SEPARATOR, stringifyVsCodeLmModelSelector } from "../../shared/vsCodeSelectorUtils"
@@ -562,6 +562,23 @@ export class VsCodeLmHandler extends BaseProvider implements SingleCompletionHan
 		}
 	}
 
+	/**
+	 * Context window for auto-condense. The API's advertised `client.maxInputTokens` is far larger
+	 * than usable, so relying on it stops auto-condense from firing; measure against the curated
+	 * static table's `maxInputTokens` instead (the same value the bar uses). Fall back to the live
+	 * window when the model isn't in the table.
+	 */
+	getCondenseContextWindow(): number {
+		const family = this.client?.family ?? this.options.vsCodeLmModelSelector?.family
+		const staticModel = family ? vscodeLlmModels[family as keyof typeof vscodeLlmModels] : undefined
+
+		if (staticModel && typeof staticModel.maxInputTokens === "number" && staticModel.maxInputTokens > 0) {
+			return staticModel.maxInputTokens
+		}
+
+		return this.getModel().info.contextWindow
+	}
+
 	async completePrompt(prompt: string): Promise<string> {
 		try {
 			const client = await this.getClient()
diff --git a/src/core/context-management/__tests__/context-management.spec.ts b/src/core/context-management/__tests__/context-management.spec.ts
index 9950ec536b..89797b045f 100644
--- a/src/core/context-management/__tests__/context-management.spec.ts
+++ b/src/core/context-management/__tests__/context-management.spec.ts
@@ -810,9 +810,9 @@ describe("Context Management", () => {
 			const summarizeSpy = vi.spyOn(condenseModule, "summarizeConversation")
 
 			const modelInfo = createModelInfo(100000, 30000)
-			// Set tokens to be below both the allowedTokens threshold and the percentage threshold
+			// Usage measured against available input space stays below the threshold.
 			const contextWindow = modelInfo.contextWindow
-			const totalTokens = 40000 // 40% of context window
+			const totalTokens = 30000
 			const messagesWithSmallContent = [
 				...messages.slice(0, -1),
 				{ ...messages[messages.length - 1], content: "" },
@@ -825,7 +825,7 @@ describe("Context Management", () => {
 				maxTokens: modelInfo.maxTokens,
 				apiHandler: mockApiHandler,
 				autoCondenseContext: true,
-				autoCondenseContextPercent: 50, // Set threshold to 50% - our tokens are at 40%
+				autoCondenseContextPercent: 50, // Set threshold to 50% - usage is ~43% of available input
 				systemPrompt: "System prompt",
 				taskId,
 				profileThresholds: {},
@@ -1507,19 +1507,36 @@ describe("Context Management", () => {
 		})
 
 		it("should return false when context percent is below threshold", () => {
+			// Opt-in available-input denominator: usage stays below threshold.
 			const result = willManageContext({
-				totalTokens: 40000,
-				contextWindow: 100000, // 40% of context window
+				totalTokens: 30000,
+				contextWindow: 100000,
 				maxTokens: 30000,
 				autoCondenseContext: true,
-				autoCondenseContextPercent: 50, // 50% threshold
+				autoCondenseContextPercent: 50, // 50% threshold; usage is ~43% of available input
 				profileThresholds: {},
 				currentProfileId: "default",
 				lastMessageTokens: 0,
+				useAvailableInputForContextPercent: true,
 			})
 			expect(result).toBe(false)
 		})
 
+		it("should treat a negative maxTokens (vscode-lm reports -1) as the default reserve, not -1", () => {
+			// A -1 reserve must be treated as unknown (default reserve), not kept as -1.
+			const result = willManageContext({
+				totalTokens: 85000,
+				contextWindow: 100000,
+				maxTokens: -1,
+				autoCondenseContext: false,
+				autoCondenseContextPercent: 50,
+				profileThresholds: {},
+				currentProfileId: "default",
+				lastMessageTokens: 0,
+			})
+			expect(result).toBe(true)
+		})
+
 		it("should return true when tokens exceed allowedTokens even if autoCondenseContext is false", () => {
 			// allowedTokens = contextWindow * (1 - 0.1) - reservedTokens = 100000 * 0.9 - 30000 = 60000
 			const result = willManageContext({
@@ -1581,10 +1598,9 @@ describe("Context Management", () => {
 		})
 
 		it("should include lastMessageTokens in the calculation", () => {
-			// Without lastMessageTokens: 49000 tokens = 49%
-			// With lastMessageTokens: 49000 + 2000 = 51000 tokens = 51%
+			// Adding lastMessageTokens pushes usage over the threshold (opt-in available-input denominator).
 			const resultWithoutLastMessage = willManageContext({
-				totalTokens: 49000,
+				totalTokens: 34000,
 				contextWindow: 100000,
 				maxTokens: 30000,
 				autoCondenseContext: true,
@@ -1592,18 +1608,20 @@ describe("Context Management", () => {
 				profileThresholds: {},
 				currentProfileId: "default",
 				lastMessageTokens: 0,
+				useAvailableInputForContextPercent: true,
 			})
 			expect(resultWithoutLastMessage).toBe(false)
 
 			const resultWithLastMessage = willManageContext({
-				totalTokens: 49000,
+				totalTokens: 34000,
 				contextWindow: 100000,
 				maxTokens: 30000,
 				autoCondenseContext: true,
 				autoCondenseContextPercent: 50, // 50% threshold
 				profileThresholds: {},
 				currentProfileId: "default",
-				lastMessageTokens: 2000, // Pushes total to 51%
+				lastMessageTokens: 2000, // Pushes usage over 50% of available input
+				useAvailableInputForContextPercent: true,
 			})
 			expect(resultWithLastMessage).toBe(true)
 		})
@@ -1701,4 +1719,292 @@ describe("Context Management", () => {
 			expect(result.newContextTokensAfterTruncation).toBeGreaterThan(0)
 		})
 	})
+
+	/**
+	 * Regression: with the opt-in flag on, the gate measures usage against available input space
+	 * (contextWindow - reserved output) so it stays in lockstep with the UI gauge and fires for vscode-lm.
+	 */
+	describe("contextPercent uses available input space (opt-in, regression)", () => {
+		const createModelInfo = (contextWindow: number, maxTokens?: number): ModelInfo => ({
+			contextWindow,
+			supportsPromptCache: true,
+			maxTokens,
+		})
+
+		const messages: ApiMessage[] = [
+			{ role: "user", content: "First message" },
+			{ role: "assistant", content: "Second message" },
+			{ role: "user", content: "Third message" },
+			{ role: "assistant", content: "Fourth message" },
+			{ role: "user", content: "Fifth message" },
+		]
+
+		it("willManageContext measures the percentage against available input, not the full window", () => {
+			// Dividing by available input clears the threshold; the full window would keep the gate closed.
+			const result = willManageContext({
+				totalTokens: 100000,
+				contextWindow: 200000,
+				maxTokens: 64000,
+				autoCondenseContext: true,
+				autoCondenseContextPercent: 70,
+				profileThresholds: {},
+				currentProfileId: "default",
+				lastMessageTokens: 0,
+				useAvailableInputForContextPercent: true,
+			})
+			expect(result).toBe(true)
+		})
+
+		it("willManageContext stays below threshold when usage is under available input", () => {
+			// Usage under available input stays below threshold.
+			const result = willManageContext({
+				totalTokens: 90000,
+				contextWindow: 200000,
+				maxTokens: 64000,
+				autoCondenseContext: true,
+				autoCondenseContextPercent: 70,
+				profileThresholds: {},
+				currentProfileId: "default",
+				lastMessageTokens: 0,
+				useAvailableInputForContextPercent: true,
+			})
+			expect(result).toBe(false)
+		})
+
+		it("willManageContext treats an unlimited (-1) reserve as zero reserve for the percentage", () => {
+			// A -1 reserve falls back to the full window (zero reserve) for the percentage.
+			const result = willManageContext({
+				totalTokens: 150000,
+				contextWindow: 200000,
+				maxTokens: -1,
+				autoCondenseContext: true,
+				autoCondenseContextPercent: 70,
+				profileThresholds: {},
+				currentProfileId: "default",
+				lastMessageTokens: 0,
+				useAvailableInputForContextPercent: true,
+			})
+			expect(result).toBe(true)
+		})
+
+		it("willManageContext falls back to 100% when the reserve is >= the window (availableInput <= 0)", () => {
+			// Non-positive available input must short-circuit contextPercent to 100 rather than divide.
+			const result = willManageContext({
+				totalTokens: 1,
+				contextWindow: 50000,
+				maxTokens: 60000, // reserve > window → availableInput = -10000
+				autoCondenseContext: true,
+				autoCondenseContextPercent: 80,
+				profileThresholds: {},
+				currentProfileId: "default",
+				lastMessageTokens: 0,
+				useAvailableInputForContextPercent: true,
+			})
+			expect(result).toBe(true)
+		})
+
+		it("willManageContext falls back to 100% when the reserve exactly equals the window (availableInput === 0)", () => {
+			// Boundary: reserve === window → available input 0, still the non-positive guard.
+			const result = willManageContext({
+				totalTokens: 1,
+				contextWindow: 50000,
+				maxTokens: 50000,
+				autoCondenseContext: true,
+				autoCondenseContextPercent: 90,
+				profileThresholds: {},
+				currentProfileId: "default",
+				lastMessageTokens: 0,
+				useAvailableInputForContextPercent: true,
+			})
+			expect(result).toBe(true)
+		})
+
+		it("manageContext summarizes via the 100% fallback when the reserve >= the window (availableInput <= 0)", async () => {
+			// reserve >= window forces contextPercent to 100, so summarization triggers.
+			const mockSummary = "Reserve-exceeds-window summary"
+			const mockSummarizeResponse: condenseModule.SummarizeResponse = {
+				messages: [
+					{ role: "user", content: "First message" },
+					{ role: "user", content: mockSummary, isSummary: true },
+					{ role: "assistant", content: "Last message" },
+				],
+				summary: mockSummary,
+				cost: 0.05,
+				newContextTokens: 100,
+			}
+			const summarizeSpy = vi
+				.spyOn(condenseModule, "summarizeConversation")
+				.mockResolvedValue(mockSummarizeResponse)
+
+			const messagesWithSmallContent = [
+				...messages.slice(0, -1),
+				{ ...messages[messages.length - 1], content: "" },
+			]
+
+			const result = await manageContext({
+				messages: messagesWithSmallContent,
+				totalTokens: 1,
+				contextWindow: 50000,
+				maxTokens: 60000,
+				apiHandler: mockApiHandler,
+				autoCondenseContext: true,
+				autoCondenseContextPercent: 80,
+				systemPrompt: "System prompt",
+				taskId,
+				profileThresholds: {},
+				currentProfileId: "default",
+				useAvailableInputForContextPercent: true,
+			})
+
+			expect(summarizeSpy).toHaveBeenCalled()
+			expect(result).toMatchObject({
+				summary: mockSummary,
+				prevContextTokens: 1,
+			})
+
+			summarizeSpy.mockRestore()
+		})
+
+		it("manageContext summarizes based on available input space, end-to-end", async () => {
+			const mockSummary = "Available-input summary"
+			const mockSummarizeResponse: condenseModule.SummarizeResponse = {
+				messages: [
+					{ role: "user", content: "First message" },
+					{ role: "user", content: mockSummary, isSummary: true },
+					{ role: "assistant", content: "Last message" },
+				],
+				summary: mockSummary,
+				cost: 0.05,
+				newContextTokens: 100,
+			}
+			const summarizeSpy = vi
+				.spyOn(condenseModule, "summarizeConversation")
+				.mockResolvedValue(mockSummarizeResponse)
+
+			const modelInfo = createModelInfo(200000, 64000)
+			// Clears the threshold against available input but not the raw window; end-to-end must summarize.
+			const totalTokens = 100000
+			const messagesWithSmallContent = [
+				...messages.slice(0, -1),
+				{ ...messages[messages.length - 1], content: "" },
+			]
+
+			const result = await manageContext({
+				messages: messagesWithSmallContent,
+				totalTokens,
+				contextWindow: modelInfo.contextWindow,
+				maxTokens: modelInfo.maxTokens,
+				apiHandler: mockApiHandler,
+				autoCondenseContext: true,
+				autoCondenseContextPercent: 70,
+				systemPrompt: "System prompt",
+				taskId,
+				profileThresholds: {},
+				currentProfileId: "default",
+				useAvailableInputForContextPercent: true,
+			})
+
+			expect(summarizeSpy).toHaveBeenCalled()
+			expect(result).toMatchObject({
+				summary: mockSummary,
+				prevContextTokens: totalTokens,
+			})
+
+			summarizeSpy.mockRestore()
+		})
+	})
+
+	/**
+	 * Scoping: the available-input denominator is opt-in; default divides by the full window.
+	 * The maxTokens: -1 reserve guard stays global on the default path.
+	 */
+	describe("contextPercent denominator is opt-in (default = full window)", () => {
+		const messages: ApiMessage[] = [
+			{ role: "user", content: "First message" },
+			{ role: "assistant", content: "Second message" },
+			{ role: "user", content: "Third message" },
+			{ role: "assistant", content: "Fourth message" },
+			{ role: "user", content: "Fifth message" },
+		]
+
+		it("willManageContext divides by the full window when the flag is omitted (default)", () => {
+			// Default divides by the full window, staying below threshold where the opt-in math would fire.
+			const result = willManageContext({
+				totalTokens: 100000,
+				contextWindow: 200000,
+				maxTokens: 64000,
+				autoCondenseContext: true,
+				autoCondenseContextPercent: 70,
+				profileThresholds: {},
+				currentProfileId: "default",
+				lastMessageTokens: 0,
+			})
+			expect(result).toBe(false)
+		})
+
+		it("willManageContext fires on the same inputs when the opt-in flag is true", () => {
+			// Same inputs, flag on: dividing by available input clears the threshold.
+			const result = willManageContext({
+				totalTokens: 100000,
+				contextWindow: 200000,
+				maxTokens: 64000,
+				autoCondenseContext: true,
+				autoCondenseContextPercent: 70,
+				profileThresholds: {},
+				currentProfileId: "default",
+				lastMessageTokens: 0,
+				useAvailableInputForContextPercent: true,
+			})
+			expect(result).toBe(true)
+		})
+
+		it("keeps the maxTokens:-1 reserve guard on the default (full-window) path", () => {
+			// The -1 reserve guard is global, independent of the percent denominator.
+			const result = willManageContext({
+				totalTokens: 85000,
+				contextWindow: 100000,
+				maxTokens: -1,
+				autoCondenseContext: false,
+				autoCondenseContextPercent: 50,
+				profileThresholds: {},
+				currentProfileId: "default",
+				lastMessageTokens: 0,
+			})
+			expect(result).toBe(true)
+		})
+
+		it("manageContext does NOT summarize on the default path where the opt-in math would have", async () => {
+			// Default full-window math leaves this case below threshold; the opt-in flag would summarize it.
+			const summarizeSpy = vi.spyOn(condenseModule, "summarizeConversation")
+
+			const messagesWithSmallContent = [
+				...messages.slice(0, -1),
+				{ ...messages[messages.length - 1], content: "" },
+			]
+
+			const result = await manageContext({
+				messages: messagesWithSmallContent,
+				totalTokens: 100000,
+				contextWindow: 200000,
+				maxTokens: 64000,
+				apiHandler: mockApiHandler,
+				autoCondenseContext: true,
+				autoCondenseContextPercent: 70,
+				systemPrompt: "System prompt",
+				taskId,
+				profileThresholds: {},
+				currentProfileId: "default",
+			})
+
+			expect(summarizeSpy).not.toHaveBeenCalled()
+			expect(result).toEqual({
+				messages: messagesWithSmallContent,
+				summary: "",
+				cost: 0,
+				prevContextTokens: 100000,
+			})
+
+			summarizeSpy.mockRestore()
+		})
+	})
 })
diff --git a/src/core/context-management/index.ts b/src/core/context-management/index.ts
index 243d7bd797..ed2ee6be5f 100644
--- a/src/core/context-management/index.ts
+++ b/src/core/context-management/index.ts
@@ -147,6 +147,11 @@ export type WillManageContextOptions = {
 	profileThresholds: Record<string, number>
 	currentProfileId: string
 	lastMessageTokens: number
+	/**
+	 * Opt-in (vscode-lm): measure the condense percentage against available input space
+	 * (contextWindow - reserved output) instead of the full window. Others leave it undefined.
+	 */
+	useAvailableInputForContextPercent?: boolean
 }
 
 /**
@@ -167,16 +172,19 @@ export function willManageContext({
 	profileThresholds,
 	currentProfileId,
 	lastMessageTokens,
+	useAvailableInputForContextPercent,
 }: WillManageContextOptions): boolean {
 	if (!autoCondenseContext) {
 		// When auto-condense is disabled, only truncation can occur
-		const reservedTokens = maxTokens || ANTHROPIC_DEFAULT_MAX_TOKENS
+		// vscode-lm reports maxTokens: -1 (unlimited); a negative reserve must not distort the window math.
+		const reservedTokens = maxTokens && maxTokens > 0 ? maxTokens : ANTHROPIC_DEFAULT_MAX_TOKENS
 		const prevContextTokens = totalTokens + lastMessageTokens
 		const allowedTokens = contextWindow * (1 - TOKEN_BUFFER_PERCENTAGE) - reservedTokens
 		return prevContextTokens > allowedTokens
 	}
 
-	const reservedTokens = maxTokens || ANTHROPIC_DEFAULT_MAX_TOKENS
+	// vscode-lm reports maxTokens: -1 (unlimited); a negative reserve must not distort the window math.
+	const reservedTokens = maxTokens && maxTokens > 0 ? maxTokens : ANTHROPIC_DEFAULT_MAX_TOKENS
 	const prevContextTokens = totalTokens + lastMessageTokens
 	const allowedTokens = contextWindow * (1 - TOKEN_BUFFER_PERCENTAGE) - reservedTokens
 
@@ -192,7 +200,16 @@ export function willManageContext({
 		// Invalid values fall back to global setting (effectiveThreshold already set)
 	}
 
-	const contextPercent = (100 * prevContextTokens) / contextWindow
+	// Default: divide by the full context window. Opt-in (vscode-lm) divides by available input
+	// (window minus reserved output); an unknown/unlimited reserve (-1) falls back to the full window.
+	let contextPercent: number
+	if (useAvailableInputForContextPercent) {
+		const reservedForOutput = maxTokens && maxTokens > 0 ? maxTokens : 0
+		const availableInputTokens = contextWindow - reservedForOutput
+		contextPercent = availableInputTokens > 0 ? (100 * prevContextTokens) / availableInputTokens : 100
+	} else {
+		contextPercent = (100 * prevContextTokens) / contextWindow
+	}
 	return contextPercent >= effectiveThreshold || prevContextTokens > allowedTokens
 }
 
@@ -229,6 +246,11 @@ export type ContextManagementOptions = {
 	cwd?: string
 	/** Optional controller for file access validation */
 	rooIgnoreController?: RooIgnoreController
+	/**
+	 * Opt-in (vscode-lm): measure the condense percentage against available input space
+	 * (contextWindow - reserved output) instead of the full window. Others leave it undefined.
+	 */
+	useAvailableInputForContextPercent?: boolean
 }
 
 export type ContextManagementResult = SummarizeResponse & {
@@ -262,12 +284,14 @@ export async function manageContext({
 	filesReadByRoo,
 	cwd,
 	rooIgnoreController,
+	useAvailableInputForContextPercent,
 }: ContextManagementOptions): Promise<ContextManagementResult> {
 	let error: string | undefined
 	let errorDetails: string | undefined
 	let cost = 0
 	// Calculate the maximum tokens reserved for response
-	const reservedTokens = maxTokens || ANTHROPIC_DEFAULT_MAX_TOKENS
+	// vscode-lm reports maxTokens: -1 (unlimited); a negative reserve must not distort the window math.
+	const reservedTokens = maxTokens && maxTokens > 0 ? maxTokens : ANTHROPIC_DEFAULT_MAX_TOKENS
 
 	// Estimate tokens for the last message (which is always a user message)
 	const lastMessage = messages[messages.length - 1]
@@ -304,7 +328,16 @@ export async function manageContext({
 	// If no specific threshold is found for the profile, fall back to global setting
 
 	if (autoCondenseContext) {
-		const contextPercent = (100 * prevContextTokens) / contextWindow
+		// Default: divide by the full context window. Opt-in (vscode-lm) divides by available input
+		// (window minus reserved output); an unknown/unlimited reserve (-1) falls back to the full window.
+		let contextPercent: number
+		if (useAvailableInputForContextPercent) {
+			const reservedForOutput = maxTokens && maxTokens > 0 ? maxTokens : 0
+			const availableInputTokens = contextWindow - reservedForOutput
+			contextPercent = availableInputTokens > 0 ? (100 * prevContextTokens) / availableInputTokens : 100
+		} else {
+			contextPercent = (100 * prevContextTokens) / contextWindow
+		}
 		if (contextPercent >= effectiveThreshold || prevContextTokens > allowedTokens) {
 			// Attempt to intelligently condense the context
 			const result = await summarizeConversation({
diff --git a/src/core/task/Task.ts b/src/core/task/Task.ts
index 50d4674fd0..ce9e5bcec2 100644
--- a/src/core/task/Task.ts
+++ b/src/core/task/Task.ts
@@ -2688,9 +2688,13 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 								if (signal.aborted) {
 									reject(new Error("Request cancelled by user"))
 								} else {
-									signal.addEventListener("abort", () => {
-										reject(new Error("Request cancelled by user"))
-									}, { once: true })
+									signal.addEventListener(
+										"abort",
+										() => {
+											reject(new Error("Request cancelled by user"))
+										},
+										{ once: true },
+									)
 								}
 							})
 							return await Promise.race([nextPromise, abortPromise])
@@ -3734,7 +3738,10 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 			settings: this.apiConfiguration,
 		})
 
-		const contextWindow = modelInfo.contextWindow
+		// vscode-lm condenses against its static-table maxInputTokens (not the inflated live window);
+		// only it implements getCondenseContextWindow, so others fall back to the full contextWindow.
+		const contextWindow = this.api.getCondenseContextWindow?.() ?? modelInfo.contextWindow
+		const useAvailableInputForContextPercent = typeof this.api.getCondenseContextWindow === "function"
 
 		// Get the current profile ID using the helper method
 		const currentProfileId = this.getCurrentProfileId(state)
@@ -3803,6 +3810,7 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 				currentProfileId,
 				metadata,
 				environmentDetails,
+				useAvailableInputForContextPercent,
 			})
 
 			if (truncateResult.messages !== this.apiConversationHistory) {
@@ -3930,7 +3938,10 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 				settings: this.apiConfiguration,
 			})
 
-			const contextWindow = modelInfo.contextWindow
+			// vscode-lm condenses against its static-table maxInputTokens (not the inflated live window);
+			// only it implements getCondenseContextWindow, so others fall back to the full contextWindow.
+			const contextWindow = this.api.getCondenseContextWindow?.() ?? modelInfo.contextWindow
+			const useAvailableInputForContextPercent = typeof this.api.getCondenseContextWindow === "function"
 
 			// Get the current profile ID using the helper method
 			const currentProfileId = this.getCurrentProfileId(state)
@@ -3955,6 +3966,7 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 				profileThresholds,
 				currentProfileId,
 				lastMessageTokens,
+				useAvailableInputForContextPercent,
 			})
 
 			// Send condenseTaskContextStarted BEFORE manageContext to show in-progress indicator
@@ -4037,6 +4049,7 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 					filesReadByRoo: contextMgmtFilesReadByRoo,
 					cwd: this.cwd,
 					rooIgnoreController: this.rooIgnoreController,
+					useAvailableInputForContextPercent,
 				})
 				if (truncateResult.messages !== this.apiConversationHistory) {
 					await this.overwriteApiConversationHistory(truncateResult.messages)
@@ -4191,10 +4204,14 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 		const iterator = stream[Symbol.asyncIterator]()
 
 		// Set up abort handling - when the signal is aborted, clean up the controller reference
-		abortSignal.addEventListener("abort", () => {
-			console.log(`[Task#${this.taskId}.${this.instanceId}] AbortSignal triggered for current request`)
-			this.currentRequestAbortController = undefined
-		}, { once: true })
+		abortSignal.addEventListener(
+			"abort",
+			() => {
+				console.log(`[Task#${this.taskId}.${this.instanceId}] AbortSignal triggered for current request`)
+				this.currentRequestAbortController = undefined
+			},
+			{ once: true },
+		)
 
 		try {
 			// Awaiting first chunk to see if it will throw an error.
@@ -4206,9 +4223,13 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 				if (abortSignal.aborted) {
 					reject(new Error("Request cancelled by user"))
 				} else {
-					abortSignal.addEventListener("abort", () => {
-						reject(new Error("Request cancelled by user"))
-					}, { once: true })
+					abortSignal.addEventListener(
+						"abort",
+						() => {
+							reject(new Error("Request cancelled by user"))
+						},
+						{ once: true },
+					)
 				}
 			})
 
diff --git a/webview-ui/src/components/chat/TaskHeader.tsx b/webview-ui/src/components/chat/TaskHeader.tsx
index 4ddf5ef35c..927d3d057d 100644
--- a/webview-ui/src/components/chat/TaskHeader.tsx
+++ b/webview-ui/src/components/chat/TaskHeader.tsx
@@ -76,7 +76,8 @@ const TaskHeader = ({
 				: 0,
 		[model, modelId, apiConfiguration],
 	)
-	const reservedForOutput = maxTokens || 0
+	// vscode-lm reports maxTokens: -1 (unlimited); a negative reserve must not distort the window math.
+	const reservedForOutput = maxTokens && maxTokens > 0 ? maxTokens : 0
 
 	const condenseButton = (
 		<LucideIconButton
diff --git a/webview-ui/src/components/chat/__tests__/TaskHeader.spec.tsx b/webview-ui/src/components/chat/__tests__/TaskHeader.spec.tsx
index 41aa452ab1..252cbbb722 100644
--- a/webview-ui/src/components/chat/__tests__/TaskHeader.spec.tsx
+++ b/webview-ui/src/components/chat/__tests__/TaskHeader.spec.tsx
@@ -267,5 +267,16 @@ describe("TaskHeader", () => {
 			// Should show 0% when available input space is 0
 			expect(screen.getByText("0%")).toBeInTheDocument()
 		})
+
+		it("should treat a negative maxTokens (vscode-lm reports -1) as zero reserve", () => {
+			// vscode-lm reports maxTokens: -1 (unlimited). The guard must treat that negative reserve
+			// as zero, so available space == contextWindow rather than being inflated by a kept -1.
+			mockModelInfo = { contextWindow: 1000, maxTokens: -1 }
+			mockMaxOutputTokens = -1
+
+			renderTaskHeader({ contextTokens: 250 })
+
+			expect(screen.getByText("25%")).toBeInTheDocument()
+		})
 	})
 })
diff --git a/webview-ui/src/components/ui/hooks/__tests__/useSelectedModel.spec.ts b/webview-ui/src/components/ui/hooks/__tests__/useSelectedModel.spec.ts
index 0dc42129c0..f4fd51cffc 100644
--- a/webview-ui/src/components/ui/hooks/__tests__/useSelectedModel.spec.ts
+++ b/webview-ui/src/components/ui/hooks/__tests__/useSelectedModel.spec.ts
@@ -14,6 +14,8 @@ import {
 	minimaxDefaultModelId,
 	minimaxModels,
 	openRouterDefaultModelId,
+	vscodeLlmModels,
+	vscodeLlmDefaultModelId,
 } from "@roo-code/types"
 
 import { useSelectedModel } from "../useSelectedModel"
@@ -772,4 +774,75 @@ describe("useSelectedModel", () => {
 			expect(result.current.info).toEqual(minimaxModels["MiniMax-M2.7"])
 		})
 	})
+
+	describe("vscode-lm provider", () => {
+		beforeEach(() => {
+			mockUseRouterModels.mockReturnValue({
+				data: {
+					openrouter: {},
+					requesty: {},
+					litellm: {},
+				},
+				isLoading: false,
+				isError: false,
+			} as any)
+
+			mockUseOpenRouterModelProviders.mockReturnValue({
+				data: {},
+				isLoading: false,
+				isError: false,
+			} as any)
+		})
+
+		it("resolves a listed family's contextWindow to its maxInputTokens", () => {
+			const family = vscodeLlmDefaultModelId
+			const apiConfiguration: ProviderSettings = {
+				apiProvider: "vscode-lm",
+				vsCodeLmModelSelector: { vendor: "copilot", family },
+			}
+
+			const wrapper = createWrapper()
+			const { result } = renderHook(() => useSelectedModel(apiConfiguration), { wrapper })
+
+			expect(result.current.provider).toBe("vscode-lm")
+			expect(result.current.id).toBe(`copilot/${family}`)
+			// The bar and the condense gate share one source of truth: contextWindow === maxInputTokens.
+			expect(result.current.info?.contextWindow).toBe(vscodeLlmModels[family].maxInputTokens)
+			expect(result.current.info?.supportsImages).toBe(false)
+		})
+
+		it("pins a divergent family's contextWindow to maxInputTokens, not its advertised window", () => {
+			// claude-opus-4.8 is the row where contextWindow and maxInputTokens differ; a field swap to
+			// the advertised window would be caught here.
+			const family = "claude-opus-4.8"
+			const apiConfiguration: ProviderSettings = {
+				apiProvider: "vscode-lm",
+				vsCodeLmModelSelector: { vendor: "copilot", family },
+			}
+
+			const wrapper = createWrapper()
+			const { result } = renderHook(() => useSelectedModel(apiConfiguration), { wrapper })
+
+			expect(result.current.provider).toBe("vscode-lm")
+			expect(result.current.id).toBe(`copilot/${family}`)
+			expect(result.current.info?.contextWindow).toBe(vscodeLlmModels[family].maxInputTokens) // 197897
+			expect(result.current.info?.contextWindow).not.toBe(vscodeLlmModels[family].contextWindow) // NOT 679560
+			expect(result.current.info?.supportsImages).toBe(false)
+		})
+
+		it("falls back to the default model's window for an unlisted family (NOT 128000)", () => {
+			const apiConfiguration: ProviderSettings = {
+				apiProvider: "vscode-lm",
+				vsCodeLmModelSelector: { vendor: "copilot", family: "totally-unknown-family" },
+			}
+
+			const wrapper = createWrapper()
+			const { result } = renderHook(() => useSelectedModel(apiConfiguration), { wrapper })
+
+			// A family miss must not use the 128000 sane-defaults window; use the default model's instead.
+			expect(result.current.info?.contextWindow).not.toBe(128000)
+			expect(result.current.info?.contextWindow).toBe(vscodeLlmModels[vscodeLlmDefaultModelId].maxInputTokens)
+			expect(result.current.info?.supportsImages).toBe(false)
+		})
+	})
 })
diff --git a/webview-ui/src/components/ui/hooks/useSelectedModel.ts b/webview-ui/src/components/ui/hooks/useSelectedModel.ts
index d3ebb6c0dd..ddc1a19755 100644
--- a/webview-ui/src/components/ui/hooks/useSelectedModel.ts
+++ b/webview-ui/src/components/ui/hooks/useSelectedModel.ts
@@ -310,8 +310,20 @@ function getSelectedModel({
 				? `${apiConfiguration.vsCodeLmModelSelector.vendor}/${apiConfiguration.vsCodeLmModelSelector.family}`
 				: vscodeLlmDefaultModelId
 			const modelFamily = apiConfiguration?.vsCodeLmModelSelector?.family ?? vscodeLlmDefaultModelId
-			const info = vscodeLlmModels[modelFamily as keyof typeof vscodeLlmModels]
-			return { id, info: { ...openAiModelInfoSaneDefaults, ...info, supportsImages: false } } // VSCode LM API currently doesn't support images.
+			// On a family miss, fall back to the default model entry, not openAiModelInfoSaneDefaults
+			// (whose 128K contextWindow would diverge from the gate and skew the bar).
+			const listedModel =
+				vscodeLlmModels[modelFamily as keyof typeof vscodeLlmModels] ?? vscodeLlmModels[vscodeLlmDefaultModelId]
+			// Set contextWindow = maxInputTokens so the UI bar shares one source of truth with the gate,
+			// whose primary window is getCondenseContextWindow() (static-table maxInputTokens); this
+			// info.contextWindow is only the gate's fallback.
+			const info: ModelInfo = {
+				...openAiModelInfoSaneDefaults,
+				...listedModel,
+				contextWindow: listedModel.maxInputTokens,
+				supportsImages: false, // VSCode LM API currently doesn't support images.
+			}
+			return { id, info }
 		}
 		case "sambanova": {
 			const id = apiConfiguration.apiModelId ?? defaultModelId