diff --git a/.changeset/vscode-lm-condense-fix.md b/.changeset/vscode-lm-condense-fix.md
new file mode 100644
index 00000000000..cb4ad77c3ce
--- /dev/null
+++ b/.changeset/vscode-lm-condense-fix.md
@@ -0,0 +1,5 @@
+---
+"roo-cline": patch
+---
+
+Fix unreliable automatic context condensing on the VS Code LM (vscode-lm) provider. The condense gate now treats the provider's `maxTokens: -1` (unlimited) as the default output reserve and measures usage against available input space, and a new `getCondenseContextWindow()` seam makes the gate use the curated model `maxInputTokens` instead of the inflated live window. Also refreshes the VS Code LM model catalog and default model.
diff --git a/packages/types/src/__tests__/vscode-llm.spec.ts b/packages/types/src/__tests__/vscode-llm.spec.ts
new file mode 100644
index 00000000000..b03a3e0a600
--- /dev/null
+++ b/packages/types/src/__tests__/vscode-llm.spec.ts
@@ -0,0 +1,49 @@
+import { describe, it, expect } from "vitest"
+
+import { vscodeLlmModels, vscodeLlmDefaultModelId } from "../providers/vscode-llm.js"
+
+describe("vscodeLlmModels", () => {
+	it("exposes the opus-4.8 row with its measured maxInputTokens and contextWindow", () => {
+		// The VS Code LM API exposes only maxInputTokens; that is the value the UI reads from this
+		// table (useSelectedModel.ts). For claude-opus-4.8 the two fields intentionally DIVERGE:
+		// maxInputTokens (197897) is the enforced input ceiling, while contextWindow (679560) records
+		// the larger advertised window. The UI reads maxInputTokens, so the divergence is a deliberate
+		// tripwire — assert the actual on-disk literals rather than forcing equality.
+		// See GitHub issue simurg79/Roo-Code#10.
+		expect(vscodeLlmModels).toHaveProperty("claude-opus-4.8")
+		expect(vscodeLlmModels["claude-opus-4.8"].contextWindow).toBe(679560)
+		expect(vscodeLlmModels["claude-opus-4.8"].maxInputTokens).toBe(197897)
+	})
+
+	it("preserves the real window for models captured with a smaller maxInputTokens", () => {
+		expect(vscodeLlmModels["gpt-4o-mini"].maxInputTokens).toBe(12078)
+		expect(vscodeLlmModels["gpt-4o-mini"].contextWindow).toBe(12078)
+		expect(vscodeLlmModels["gemini-2.5-pro"].contextWindow).toBe(108594)
+		expect(vscodeLlmModels["gemini-2.5-pro"].maxInputTokens).toBe(108594)
+	})
+
+	it("keeps both window fields populated and positive for every row", () => {
+		// NOTE: contextWindow and maxInputTokens are intentionally ALLOWED to differ (claude-opus-4.8
+		// diverges: 679560 vs 197897). The UI reads maxInputTokens, and that divergence is a deliberate
+		// tripwire, so we do NOT assert contextWindow === maxInputTokens here (see simurg79/Roo-Code#10).
+		// The meaningful invariant is that every row carries positive integers for both fields; a
+		// missing/zero value would point to hand-authored drift rather than a real captured row.
+		for (const [family, model] of Object.entries(vscodeLlmModels)) {
+			expect(model.contextWindow, `${family}: contextWindow must be a positive integer`).toBeGreaterThan(0)
+			expect(model.maxInputTokens, `${family}: maxInputTokens must be a positive integer`).toBeGreaterThan(0)
+		}
+	})
+
+	it("excludes fabricated/internal/alias families and the dropped legacy rows", () => {
+		// Integrity guards: these were never part of the authoritative live capture, or were
+		// removed by the full table REPLACE. Their presence would signal hand-authored drift.
+		expect(vscodeLlmModels).not.toHaveProperty("claude-opus-4.7-high")
+		expect(vscodeLlmModels).not.toHaveProperty("claude-3.5-sonnet")
+		expect(vscodeLlmModels).not.toHaveProperty("claude-4-sonnet")
+	})
+
+	it("defaults to a model id that exists in the table", () => {
+		expect(vscodeLlmDefaultModelId).toBe("claude-sonnet-4.5")
+		expect(vscodeLlmModels).toHaveProperty(vscodeLlmDefaultModelId)
+	})
+})
diff --git a/packages/types/src/providers/vscode-llm.ts b/packages/types/src/providers/vscode-llm.ts
index efe06919134..b00bf4e8daa 100644
--- a/packages/types/src/providers/vscode-llm.ts
+++ b/packages/types/src/providers/vscode-llm.ts
@@ -2,189 +2,228 @@ import type { ModelInfo } from "../model.js"
 
 export type VscodeLlmModelId = keyof typeof vscodeLlmModels
 
-export const vscodeLlmDefaultModelId: VscodeLlmModelId = "claude-3.5-sonnet"
+export const vscodeLlmDefaultModelId: VscodeLlmModelId = "claude-sonnet-4.5"
 
-// https://docs.cline.bot/provider-config/vscode-language-model-api
+// Rows below were originally enumerated from `vscode.lm.selectChatModels({ vendor: "copilot" })`.
+// The VS Code LM API exposes ONLY `maxInputTokens` (there is no separate context-window field), and
+// that is the single value the runtime/condense gate enforces: getModel() sets
+// contextWindow = Math.max(0, client.maxInputTokens) in src/api/providers/vscode-lm.ts. So for every
+// row `maxInputTokens` IS the enforced context window, and `contextWindow` is set equal to it purely
+// as an informational mirror (the UI reads maxInputTokens via useSelectedModel.ts, so the two MUST
+// match to keep the context bar and the gate on one source of truth).
+// These ceilings were measured empirically on 2026-06-18 (VS Code 1.125.0) by binary-searching the
+// single-message "Message exceeds token limit" threshold per model — they are the largest input the
+// backend actually accepts, which for several models is well below the value Copilot advertises:
+//   - claude-opus-4.8:                                   enforced 679560
+//   - claude-opus-4.7 / 4.6, claude-sonnet-4.6,
+//     gemini-3.1-pro-preview, gemini-3.5-flash:          enforced ~197.9K
+//   - gpt-5.5 / gpt-5.4:                                 enforced ~268.4K
+// Guardrail: these are empirically measured — re-measure (do not hand-tune) if the models change.
+// See GitHub issue simurg79/Roo-Code#10 and myplans/VSCode LM Model Table Integrity/vscode_lm_opus_data_integrity_design.md.
 export const vscodeLlmModels = {
-	"gpt-3.5-turbo": {
-		contextWindow: 12114,
-		supportsImages: false,
+	"claude-opus-4.8": {
+		contextWindow: 679560,
+		supportsImages: true,
 		supportsPromptCache: false,
 		inputPrice: 0,
 		outputPrice: 0,
-		family: "gpt-3.5-turbo",
-		version: "gpt-3.5-turbo-0613",
-		name: "GPT 3.5 Turbo",
+		family: "claude-opus-4.8",
+		version: "claude-opus-4.8",
+		name: "Claude Opus 4.8",
 		supportsToolCalling: true,
-		maxInputTokens: 12114,
+		maxInputTokens: 197897,
 	},
-	"gpt-4o-mini": {
-		contextWindow: 12115,
-		supportsImages: false,
+	"claude-opus-4.7": {
+		contextWindow: 197897,
+		supportsImages: true,
 		supportsPromptCache: false,
 		inputPrice: 0,
 		outputPrice: 0,
-		family: "gpt-4o-mini",
-		version: "gpt-4o-mini-2024-07-18",
-		name: "GPT-4o mini",
+		family: "claude-opus-4.7",
+		version: "claude-opus-4.7",
+		name: "Claude Opus 4.7",
 		supportsToolCalling: true,
-		maxInputTokens: 12115,
+		maxInputTokens: 197897,
 	},
-	"gpt-4": {
-		contextWindow: 28501,
-		supportsImages: false,
+	"claude-opus-4.6": {
+		contextWindow: 197897,
+		supportsImages: true,
 		supportsPromptCache: false,
 		inputPrice: 0,
 		outputPrice: 0,
-		family: "gpt-4",
-		version: "gpt-4-0613",
-		name: "GPT 4",
+		family: "claude-opus-4.6",
+		version: "claude-opus-4.6",
+		name: "Claude Opus 4.6",
 		supportsToolCalling: true,
-		maxInputTokens: 28501,
+		maxInputTokens: 197897,
 	},
-	"gpt-4-0125-preview": {
-		contextWindow: 63826,
-		supportsImages: false,
+	"claude-opus-4.5": {
+		contextWindow: 167790,
+		supportsImages: true,
 		supportsPromptCache: false,
 		inputPrice: 0,
 		outputPrice: 0,
-		family: "gpt-4-turbo",
-		version: "gpt-4-0125-preview",
-		name: "GPT 4 Turbo",
+		family: "claude-opus-4.5",
+		version: "claude-opus-4.5",
+		name: "Claude Opus 4.5",
 		supportsToolCalling: true,
-		maxInputTokens: 63826,
+		maxInputTokens: 167790,
 	},
-	"gpt-4o": {
-		contextWindow: 63827,
+	"claude-sonnet-4.6": {
+		contextWindow: 197896,
 		supportsImages: true,
 		supportsPromptCache: false,
 		inputPrice: 0,
 		outputPrice: 0,
-		family: "gpt-4o",
-		version: "gpt-4o-2024-11-20",
-		name: "GPT-4o",
+		family: "claude-sonnet-4.6",
+		version: "claude-sonnet-4.6",
+		name: "Claude Sonnet 4.6",
 		supportsToolCalling: true,
-		maxInputTokens: 63827,
+		maxInputTokens: 197896,
 	},
-	o1: {
-		contextWindow: 19827,
-		supportsImages: false,
+	"claude-sonnet-4.5": {
+		contextWindow: 167790,
+		supportsImages: true,
 		supportsPromptCache: false,
 		inputPrice: 0,
 		outputPrice: 0,
-		family: "o1-ga",
-		version: "o1-2024-12-17",
-		name: "o1 (Preview)",
+		family: "claude-sonnet-4.5",
+		version: "claude-sonnet-4.5",
+		name: "Claude Sonnet 4.5",
 		supportsToolCalling: true,
-		maxInputTokens: 19827,
+		maxInputTokens: 167790,
 	},
-	"o3-mini": {
-		contextWindow: 63827,
-		supportsImages: false,
+	"claude-haiku-4.5": {
+		contextWindow: 135790,
+		supportsImages: true,
 		supportsPromptCache: false,
 		inputPrice: 0,
 		outputPrice: 0,
-		family: "o3-mini",
-		version: "o3-mini-2025-01-31",
-		name: "o3-mini",
+		family: "claude-haiku-4.5",
+		version: "claude-haiku-4.5",
+		name: "Claude Haiku 4.5",
 		supportsToolCalling: true,
-		maxInputTokens: 63827,
+		maxInputTokens: 135790,
 	},
-	"claude-3.5-sonnet": {
-		contextWindow: 81638,
+	"gpt-5.5": {
+		contextWindow: 268426,
 		supportsImages: true,
 		supportsPromptCache: false,
 		inputPrice: 0,
 		outputPrice: 0,
-		family: "claude-3.5-sonnet",
-		version: "claude-3.5-sonnet",
-		name: "Claude 3.5 Sonnet",
+		family: "gpt-5.5",
+		version: "gpt-5.5",
+		name: "GPT-5.5",
 		supportsToolCalling: true,
-		maxInputTokens: 81638,
+		maxInputTokens: 268426,
 	},
-	"claude-4-sonnet": {
-		contextWindow: 128000,
+	"gpt-5.4": {
+		contextWindow: 268424,
 		supportsImages: true,
 		supportsPromptCache: false,
 		inputPrice: 0,
 		outputPrice: 0,
-		family: "claude-sonnet-4",
-		version: "claude-sonnet-4",
-		name: "Claude Sonnet 4",
+		family: "gpt-5.4",
+		version: "gpt-5.4",
+		name: "GPT-5.4",
 		supportsToolCalling: true,
-		maxInputTokens: 111836,
+		maxInputTokens: 268424,
 	},
-	"gemini-2.0-flash-001": {
-		contextWindow: 127827,
+	"gpt-5.4-mini": {
+		contextWindow: 271790,
 		supportsImages: true,
 		supportsPromptCache: false,
 		inputPrice: 0,
 		outputPrice: 0,
-		family: "gemini-2.0-flash",
-		version: "gemini-2.0-flash-001",
-		name: "Gemini 2.0 Flash",
-		supportsToolCalling: false,
-		maxInputTokens: 127827,
+		family: "gpt-5.4-mini",
+		version: "gpt-5.4-mini",
+		name: "GPT-5.4 mini",
+		supportsToolCalling: true,
+		maxInputTokens: 271790,
 	},
-	"gemini-2.5-pro": {
-		contextWindow: 128000,
+	"gpt-5.3-codex": {
+		contextWindow: 271790,
 		supportsImages: true,
 		supportsPromptCache: false,
 		inputPrice: 0,
 		outputPrice: 0,
-		family: "gemini-2.5-pro",
-		version: "gemini-2.5-pro-preview-03-25",
-		name: "Gemini 2.5 Pro (Preview)",
+		family: "gpt-5.3-codex",
+		version: "gpt-5.3-codex",
+		name: "GPT-5.3-Codex",
 		supportsToolCalling: true,
-		maxInputTokens: 108637,
+		maxInputTokens: 271790,
 	},
-	"o4-mini": {
-		contextWindow: 128000,
+	"gpt-5-mini": {
+		contextWindow: 127790,
+		supportsImages: true,
+		supportsPromptCache: false,
+		inputPrice: 0,
+		outputPrice: 0,
+		family: "gpt-5-mini",
+		version: "gpt-5-mini",
+		name: "GPT-5 mini",
+		supportsToolCalling: true,
+		maxInputTokens: 127790,
+	},
+	"gpt-4o-mini": {
+		contextWindow: 12078,
 		supportsImages: false,
 		supportsPromptCache: false,
 		inputPrice: 0,
 		outputPrice: 0,
-		family: "o4-mini",
-		version: "o4-mini-2025-04-16",
-		name: "o4-mini (Preview)",
+		family: "gpt-4o-mini",
+		version: "gpt-4o-mini-2024-07-18",
+		name: "GPT-4o mini",
 		supportsToolCalling: true,
-		maxInputTokens: 111452,
+		maxInputTokens: 12078,
 	},
-	"gpt-4.1": {
-		contextWindow: 128000,
+	"gemini-3.1-pro-preview": {
+		contextWindow: 197897,
 		supportsImages: true,
 		supportsPromptCache: false,
 		inputPrice: 0,
 		outputPrice: 0,
-		family: "gpt-4.1",
-		version: "gpt-4.1-2025-04-14",
-		name: "GPT-4.1 (Preview)",
+		family: "gemini-3.1-pro-preview",
+		version: "gemini-3.1-pro-preview",
+		name: "Gemini 3.1 Pro (Preview)",
 		supportsToolCalling: true,
-		maxInputTokens: 111452,
+		maxInputTokens: 197897,
 	},
-	"gpt-5-mini": {
-		contextWindow: 128000,
+	"gemini-3.5-flash": {
+		contextWindow: 197895,
 		supportsImages: true,
 		supportsPromptCache: false,
 		inputPrice: 0,
 		outputPrice: 0,
-		family: "gpt-5-mini",
-		version: "gpt-5-mini",
-		name: "GPT-5 mini (Preview)",
+		family: "gemini-3.5-flash",
+		version: "gemini-3.5-flash",
+		name: "Gemini 3.5 Flash",
+		supportsToolCalling: true,
+		maxInputTokens: 197895,
+	},
+	"gemini-3-flash": {
+		contextWindow: 108594,
+		supportsImages: true,
+		supportsPromptCache: false,
+		inputPrice: 0,
+		outputPrice: 0,
+		family: "gemini-3-flash",
+		version: "gemini-3-flash-preview",
+		name: "Gemini 3 Flash (Preview)",
 		supportsToolCalling: true,
-		maxInputTokens: 108637,
+		maxInputTokens: 108594,
 	},
-	"gpt-5": {
-		contextWindow: 128000,
+	"gemini-2.5-pro": {
+		contextWindow: 108594,
 		supportsImages: true,
 		supportsPromptCache: false,
 		inputPrice: 0,
 		outputPrice: 0,
-		family: "gpt-5",
-		version: "gpt-5",
-		name: "GPT-5 (Preview)",
+		family: "gemini-2.5-pro",
+		version: "gemini-2.5-pro",
+		name: "Gemini 2.5 Pro",
 		supportsToolCalling: true,
-		maxInputTokens: 108637,
+		maxInputTokens: 108594,
 	},
 } as const satisfies Record<
 	string,
diff --git a/src/api/index.ts b/src/api/index.ts
index 40ba31f39af..aaec4d43e22 100644
--- a/src/api/index.ts
+++ b/src/api/index.ts
@@ -97,6 +97,17 @@ export interface ApiHandler {
 
 	getModel(): { id: string; info: ModelInfo }
 
+	/**
+	 * Optional: the context window (in tokens) to use for context-management /
+	 * auto-condense decisions, when it must differ from getModel().info.contextWindow.
+	 *
+	 * Only the VS Code LM (Copilot) provider overrides this, to measure usage against the
+	 * model's static `maxInputTokens` instead of the inflated live window VS Code reports.
+	 * Other providers leave it undefined and callers fall back to getModel().info.contextWindow,
+	 * so their behavior is unchanged.
+	 */
+	getCondenseContextWindow?(): number
+
 	/**
 	 * Counts tokens for content blocks
 	 * All providers extend BaseProvider which provides a default tiktoken implementation,
diff --git a/src/api/providers/__tests__/vscode-lm.spec.ts b/src/api/providers/__tests__/vscode-lm.spec.ts
index 305305d2289..7434b4de45d 100644
--- a/src/api/providers/__tests__/vscode-lm.spec.ts
+++ b/src/api/providers/__tests__/vscode-lm.spec.ts
@@ -55,6 +55,7 @@ vi.mock("vscode", () => {
 })
 
 import * as vscode from "vscode"
+import { openAiModelInfoSaneDefaults, vscodeLlmModels } from "@roo-code/types"
 import { VsCodeLmHandler } from "../vscode-lm"
 import type { ApiHandlerOptions } from "../../../shared/api"
 import type { Anthropic } from "@anthropic-ai/sdk"
@@ -102,6 +103,29 @@ describe("VsCodeLmHandler", () => {
 		})
 	})
 
+	describe("getCondenseContextWindow", () => {
+		it("uses the static-table maxInputTokens for a known VS Code LM family", () => {
+			const opusHandler = new VsCodeLmHandler({
+				vsCodeLmModelSelector: { vendor: "copilot", family: "claude-opus-4.8" },
+			})
+
+			// The condense gate must measure usage against the curated static window, not the
+			// inflated live Copilot window, so it agrees with the context bar.
+			expect(opusHandler.getCondenseContextWindow()).toBe(vscodeLlmModels["claude-opus-4.8"].maxInputTokens)
+
+			opusHandler.dispose()
+		})
+
+		it("falls back to the live model context window for families not in the static table", () => {
+			// "test-family" isn't in vscodeLlmModels; with a live client present we fall back to
+			// getModel().info.contextWindow (the live maxInputTokens).
+			handler["client"] = mockLanguageModelChat as unknown as vscode.LanguageModelChat
+
+			expect(handler.getCondenseContextWindow()).toBe(handler.getModel().info.contextWindow)
+			expect(handler.getCondenseContextWindow()).toBe(mockLanguageModelChat.maxInputTokens)
+		})
+	})
+
 	describe("createClient", () => {
 		it("should create client with selector", async () => {
 			const mockModel = { ...mockLanguageModelChat }
@@ -435,6 +459,38 @@ describe("VsCodeLmHandler", () => {
 			const model = handler.getModel()
 			expect(model.info).toBeDefined()
 		})
+
+		it("should use the full advertised maxInputTokens without an upper cap", async () => {
+			// The 128K cap was removed per simurg79/Roo-Code#10; contextWindow now reflects the
+			// provider-advertised maxInputTokens directly, even when large (~936K).
+			const mockModel = { ...mockLanguageModelChat, maxInputTokens: 936000 }
+			;(vscode.lm.selectChatModels as Mock).mockResolvedValue([mockModel])
+			handler["client"] = null
+			await handler.initializeClient()
+
+			const model = handler.getModel()
+			expect(model.info.contextWindow).toBe(936000)
+		})
+
+		it("should pass through a small maxInputTokens unchanged", async () => {
+			const mockModel = { ...mockLanguageModelChat, maxInputTokens: 4096 }
+			;(vscode.lm.selectChatModels as Mock).mockResolvedValue([mockModel])
+			handler["client"] = null
+			await handler.initializeClient()
+
+			const model = handler.getModel()
+			expect(model.info.contextWindow).toBe(4096)
+		})
+
+		it("should fall back to sane defaults when maxInputTokens is not a number", async () => {
+			const mockModel = { ...mockLanguageModelChat, maxInputTokens: undefined as unknown as number }
+			;(vscode.lm.selectChatModels as Mock).mockResolvedValue([mockModel])
+			handler["client"] = null
+			await handler.initializeClient()
+
+			const model = handler.getModel()
+			expect(model.info.contextWindow).toBe(openAiModelInfoSaneDefaults.contextWindow)
+		})
 	})
 
 	describe("countTokens", () => {
diff --git a/src/api/providers/vscode-lm.ts b/src/api/providers/vscode-lm.ts
index 8fb564a9d59..d730658b446 100644
--- a/src/api/providers/vscode-lm.ts
+++ b/src/api/providers/vscode-lm.ts
@@ -2,7 +2,7 @@ import { Anthropic } from "@anthropic-ai/sdk"
 import * as vscode from "vscode"
 import OpenAI from "openai"
 
-import { type ModelInfo, openAiModelInfoSaneDefaults } from "@roo-code/types"
+import { type ModelInfo, openAiModelInfoSaneDefaults, vscodeLlmModels } from "@roo-code/types"
 
 import type { ApiHandlerOptions } from "../../shared/api"
 import { SELECTOR_SEPARATOR, stringifyVsCodeLmModelSelector } from "../../shared/vsCodeSelectorUtils"
@@ -562,6 +562,28 @@ export class VsCodeLmHandler extends BaseProvider implements SingleCompletionHan
 		}
 	}
 
+	/**
+	 * Context window used for auto-condense / context-management decisions.
+	 *
+	 * VS Code's LM API reports `client.maxInputTokens` as Copilot's *advertised* window,
+	 * which is far larger than the realistic usable window; relying on it keeps auto-condense
+	 * from ever firing. For condense decisions we instead measure usage against the curated
+	 * static table's `maxInputTokens` — the same value the context bar uses via
+	 * `useSelectedModel` — so the gate and the gauge stay on one source of truth.
+	 *
+	 * Falls back to the live runtime window when the selected model isn't in the static table.
+	 */
+	getCondenseContextWindow(): number {
+		const family = this.client?.family ?? this.options.vsCodeLmModelSelector?.family
+		const staticModel = family ? vscodeLlmModels[family as keyof typeof vscodeLlmModels] : undefined
+
+		if (staticModel && typeof staticModel.maxInputTokens === "number" && staticModel.maxInputTokens > 0) {
+			return staticModel.maxInputTokens
+		}
+
+		return this.getModel().info.contextWindow
+	}
+
 	async completePrompt(prompt: string): Promise<string> {
 		try {
 			const client = await this.getClient()
diff --git a/src/core/context-management/__tests__/context-management.spec.ts b/src/core/context-management/__tests__/context-management.spec.ts
index 240269dd2c5..e239670338e 100644
--- a/src/core/context-management/__tests__/context-management.spec.ts
+++ b/src/core/context-management/__tests__/context-management.spec.ts
@@ -805,9 +805,11 @@ describe("Context Management", () => {
 			const summarizeSpy = vi.spyOn(condenseModule, "summarizeConversation")
 
 			const modelInfo = createModelInfo(100000, 30000)
-			// Set tokens to be below both the allowedTokens threshold and the percentage threshold
+			// Set tokens to be below both the allowedTokens threshold and the percentage threshold.
+			// Usage is measured against available input space (contextWindow - maxTokens = 70000),
+			// so 30000 / 70000 ~= 43% is below the 50% threshold.
 			const contextWindow = modelInfo.contextWindow
-			const totalTokens = 40000 // 40% of context window
+			const totalTokens = 30000 // ~43% of available input space (70000)
 			const messagesWithSmallContent = [
 				...messages.slice(0, -1),
 				{ ...messages[messages.length - 1], content: "" },
@@ -820,7 +822,7 @@ describe("Context Management", () => {
 				maxTokens: modelInfo.maxTokens,
 				apiHandler: mockApiHandler,
 				autoCondenseContext: true,
-				autoCondenseContextPercent: 50, // Set threshold to 50% - our tokens are at 40%
+				autoCondenseContextPercent: 50, // Threshold 50% - usage ~43% of available input space
 				systemPrompt: "System prompt",
 				taskId,
 				profileThresholds: {},
@@ -1503,8 +1505,9 @@ describe("Context Management", () => {
 
 		it("should return false when context percent is below threshold", () => {
 			const result = willManageContext({
-				totalTokens: 40000,
-				contextWindow: 100000, // 40% of context window
+				totalTokens: 30000,
+				// 30000 / (100000 - 30000) ~= 43% of available input space, below the 50% threshold
+				contextWindow: 100000,
 				maxTokens: 30000,
 				autoCondenseContext: true,
 				autoCondenseContextPercent: 50, // 50% threshold
@@ -1575,11 +1578,29 @@ describe("Context Management", () => {
 			expect(result).toBe(false)
 		})
 
+		it("should treat a negative maxTokens (vscode-lm reports -1) as the default reserve, not -1", () => {
+			// vscode-lm reports maxTokens: -1; a negative reserve must not inflate allowedTokens.
+			// New: reservedTokens = ANTHROPIC_DEFAULT_MAX_TOKENS (8192) -> allowedTokens = 100000*0.9 - 8192 = 81808.
+			// Old (maxTokens || DEFAULT kept -1) -> allowedTokens = 90000 - (-1) = 90001, which would be false here.
+			const result = willManageContext({
+				totalTokens: 85000, // Above the corrected allowedTokens (81808), below the buggy 90001
+				contextWindow: 100000,
+				maxTokens: -1,
+				autoCondenseContext: false, // Isolate the allowedTokens/reserve path
+				autoCondenseContextPercent: 100,
+				profileThresholds: {},
+				currentProfileId: "default",
+				lastMessageTokens: 0,
+			})
+			expect(result).toBe(true)
+		})
+
 		it("should include lastMessageTokens in the calculation", () => {
-			// Without lastMessageTokens: 49000 tokens = 49%
-			// With lastMessageTokens: 49000 + 2000 = 51000 tokens = 51%
+			// Usage is measured against available input space (contextWindow - maxTokens = 70000).
+			// Without lastMessageTokens: 34000 / 70000 ~= 49%
+			// With lastMessageTokens: (34000 + 2000) / 70000 ~= 51%
 			const resultWithoutLastMessage = willManageContext({
-				totalTokens: 49000,
+				totalTokens: 34000,
 				contextWindow: 100000,
 				maxTokens: 30000,
 				autoCondenseContext: true,
@@ -1591,19 +1612,144 @@ describe("Context Management", () => {
 			expect(resultWithoutLastMessage).toBe(false)
 
 			const resultWithLastMessage = willManageContext({
-				totalTokens: 49000,
+				totalTokens: 34000,
 				contextWindow: 100000,
 				maxTokens: 30000,
 				autoCondenseContext: true,
 				autoCondenseContextPercent: 50, // 50% threshold
 				profileThresholds: {},
 				currentProfileId: "default",
-				lastMessageTokens: 2000, // Pushes total to 51%
+				lastMessageTokens: 2000, // Pushes usage just over the 50% threshold
 			})
 			expect(resultWithLastMessage).toBe(true)
 		})
 	})
 
+	/**
+	 * Regression: the condense percentage must be measured against the AVAILABLE input
+	 * space (contextWindow - reservedForOutput), not the full contextWindow. This is the
+	 * real vscode-lm / claude-opus-4.8 failure: with a large window and a meaningful output
+	 * reserve, the old full-window denominator under-reported usage and condensation never
+	 * fired even though the UI gauge showed the context as effectively full.
+	 * See myplans/VSCode LM Model Table Integrity/vscode_lm_opus_data_integrity_design.md and
+	 * GitHub issue simurg79/Roo-Code#10.
+	 */
+	describe("contextPercent uses available input space (regression)", () => {
+		const createModelInfo = (contextWindow: number, maxTokens?: number): ModelInfo => ({
+			contextWindow,
+			supportsPromptCache: true,
+			maxTokens,
+		})
+
+		const messages: ApiMessage[] = [
+			{ role: "user", content: "First message" },
+			{ role: "assistant", content: "Second message" },
+			{ role: "user", content: "Third message" },
+			{ role: "assistant", content: "Fourth message" },
+			{ role: "user", content: "" },
+		]
+
+		it("condenses when usage clears the threshold only under the available-input denominator", () => {
+			// contextWindow 200000, reserved output 64000 -> availableInput 136000.
+			// prevContextTokens 100000, threshold 70%.
+			//   OLD math: 100 * 100000 / 200000        = 50.0% -> below 70 -> would NOT condense.
+			//   NEW math: 100 * 100000 / (200000-64000) = 73.5% -> >= 70 -> DOES condense.
+			// allowedTokens = 200000 * 0.9 - 64000 = 116000; 100000 > 116000 is false, so the
+			// percent path (not the absolute allowedTokens cap) is the sole trigger here.
+			const result = willManageContext({
+				totalTokens: 100000,
+				contextWindow: 200000,
+				maxTokens: 64000,
+				autoCondenseContext: true,
+				autoCondenseContextPercent: 70,
+				profileThresholds: {},
+				currentProfileId: "default",
+				lastMessageTokens: 0,
+			})
+			expect(result).toBe(true)
+		})
+
+		it("does NOT condense for the same usage when scored against the full window (old behavior boundary)", () => {
+			// Same usage as above but threshold 60%: NEW math 73.5% still condenses, while a
+			// threshold set just above the OLD 50% (and below the NEW 73.5%) proves the two
+			// denominators disagree. Here 55% sits between them: old=50% (false) vs new=73.5% (true).
+			const result = willManageContext({
+				totalTokens: 100000,
+				contextWindow: 200000,
+				maxTokens: 64000,
+				autoCondenseContext: true,
+				autoCondenseContextPercent: 55,
+				profileThresholds: {},
+				currentProfileId: "default",
+				lastMessageTokens: 0,
+			})
+			expect(result).toBe(true)
+		})
+
+		it("falls back to the full window when maxTokens is unknown/unlimited (vscode-lm reports -1)", () => {
+			// vscode-lm reports maxTokens: -1, so reservedForOutput falls back to 0 and the
+			// denominator is the full contextWindow. prevContextTokens 150000 / 200000 = 75% >= 70%.
+			// allowedTokens = 200000 * 0.9 - ANTHROPIC_DEFAULT_MAX_TOKENS(8192) = 171808; 150000 >
+			// 171808 is false, so the percent path is the sole trigger AND the -1 reserve does not
+			// corrupt allowedTokens.
+			const result = willManageContext({
+				totalTokens: 150000,
+				contextWindow: 200000,
+				maxTokens: -1,
+				autoCondenseContext: true,
+				autoCondenseContextPercent: 70,
+				profileThresholds: {},
+				currentProfileId: "default",
+				lastMessageTokens: 0,
+			})
+			expect(result).toBe(true)
+		})
+
+		it("drives manageContext to summarize via the available-input percent (not the allowedTokens cap)", async () => {
+			// End-to-end proof through manageContext: same 200000/64000/100000 case. Under the OLD
+			// full-window denominator (50%) this would skip condensation; under the NEW denominator
+			// (73.5% >= 70%) summarizeConversation must be invoked. allowedTokens (116000) is not
+			// exceeded by 100000, so the summarization is driven by the percent path alone.
+			const mockSummary = "Available-input regression summary"
+			const mockSummarizeResponse: condenseModule.SummarizeResponse = {
+				messages: [
+					{ role: "user", content: "First message" },
+					{ role: "user", content: mockSummary, isSummary: true },
+					{ role: "assistant", content: "Last message" },
+				],
+				summary: mockSummary,
+				cost: 0.02,
+				newContextTokens: 100,
+			}
+			const summarizeSpy = vi
+				.spyOn(condenseModule, "summarizeConversation")
+				.mockResolvedValue(mockSummarizeResponse)
+
+			const modelInfo = createModelInfo(200000, 64000)
+			const result = await manageContext({
+				messages,
+				totalTokens: 100000,
+				contextWindow: modelInfo.contextWindow,
+				maxTokens: modelInfo.maxTokens,
+				apiHandler: mockApiHandler,
+				autoCondenseContext: true,
+				autoCondenseContextPercent: 70,
+				systemPrompt: "System prompt",
+				taskId,
+				profileThresholds: {},
+				currentProfileId: "default",
+			})
+
+			expect(summarizeSpy).toHaveBeenCalled()
+			expect(result).toMatchObject({
+				messages: mockSummarizeResponse.messages,
+				summary: mockSummary,
+			})
+
+			summarizeSpy.mockRestore()
+		})
+	})
+
 	/**
 	 * Tests for newContextTokensAfterTruncation including system prompt
 	 */
diff --git a/src/core/context-management/index.ts b/src/core/context-management/index.ts
index 4d9608d8e44..2ec477d1037 100644
--- a/src/core/context-management/index.ts
+++ b/src/core/context-management/index.ts
@@ -166,13 +166,15 @@ export function willManageContext({
 }: WillManageContextOptions): boolean {
 	if (!autoCondenseContext) {
 		// When auto-condense is disabled, only truncation can occur
-		const reservedTokens = maxTokens || ANTHROPIC_DEFAULT_MAX_TOKENS
+		// vscode-lm reports maxTokens: -1 (unlimited); a negative reserve must not distort the window math.
+		const reservedTokens = maxTokens && maxTokens > 0 ? maxTokens : ANTHROPIC_DEFAULT_MAX_TOKENS
 		const prevContextTokens = totalTokens + lastMessageTokens
 		const allowedTokens = contextWindow * (1 - TOKEN_BUFFER_PERCENTAGE) - reservedTokens
 		return prevContextTokens > allowedTokens
 	}
 
-	const reservedTokens = maxTokens || ANTHROPIC_DEFAULT_MAX_TOKENS
+	// vscode-lm reports maxTokens: -1 (unlimited); a negative reserve must not distort the window math.
+	const reservedTokens = maxTokens && maxTokens > 0 ? maxTokens : ANTHROPIC_DEFAULT_MAX_TOKENS
 	const prevContextTokens = totalTokens + lastMessageTokens
 	const allowedTokens = contextWindow * (1 - TOKEN_BUFFER_PERCENTAGE) - reservedTokens
 
@@ -188,7 +190,14 @@ export function willManageContext({
 		// Invalid values fall back to global setting (effectiveThreshold already set)
 	}
 
-	const contextPercent = (100 * prevContextTokens) / contextWindow
+	// Measure usage against the available input space (context window minus the
+	// reserved output budget), matching the context gauge shown in the UI. Reserved
+	// output tokens can never hold conversation context, so this is the meaningful
+	// "how full is my usable input" figure. When the reserve is unknown/unlimited
+	// (e.g., vscode-lm reports -1), fall back to the full context window.
+	const reservedForOutput = maxTokens && maxTokens > 0 ? maxTokens : 0
+	const availableInputTokens = contextWindow - reservedForOutput
+	const contextPercent = availableInputTokens > 0 ? (100 * prevContextTokens) / availableInputTokens : 100
 	return contextPercent >= effectiveThreshold || prevContextTokens > allowedTokens
 }
 
@@ -263,7 +272,8 @@ export async function manageContext({
 	let errorDetails: string | undefined
 	let cost = 0
 	// Calculate the maximum tokens reserved for response
-	const reservedTokens = maxTokens || ANTHROPIC_DEFAULT_MAX_TOKENS
+	// vscode-lm reports maxTokens: -1 (unlimited); a negative reserve must not distort the window math.
+	const reservedTokens = maxTokens && maxTokens > 0 ? maxTokens : ANTHROPIC_DEFAULT_MAX_TOKENS
 
 	// Estimate tokens for the last message (which is always a user message)
 	const lastMessage = messages[messages.length - 1]
@@ -300,7 +310,14 @@ export async function manageContext({
 	// If no specific threshold is found for the profile, fall back to global setting
 
 	if (autoCondenseContext) {
-		const contextPercent = (100 * prevContextTokens) / contextWindow
+		// Measure usage against the available input space (context window minus the
+		// reserved output budget), matching the context gauge shown in the UI. Reserved
+		// output tokens can never hold conversation context, so this is the meaningful
+		// "how full is my usable input" figure. When the reserve is unknown/unlimited
+		// (e.g., vscode-lm reports -1), fall back to the full context window.
+		const reservedForOutput = maxTokens && maxTokens > 0 ? maxTokens : 0
+		const availableInputTokens = contextWindow - reservedForOutput
+		const contextPercent = availableInputTokens > 0 ? (100 * prevContextTokens) / availableInputTokens : 100
 		if (contextPercent >= effectiveThreshold || prevContextTokens > allowedTokens) {
 			// Attempt to intelligently condense the context
 			const result = await summarizeConversation({
diff --git a/src/core/task/Task.ts b/src/core/task/Task.ts
index 97f07fcc7aa..c4a8927e826 100644
--- a/src/core/task/Task.ts
+++ b/src/core/task/Task.ts
@@ -3727,7 +3727,10 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 			settings: this.apiConfiguration,
 		})
 
-		const contextWindow = modelInfo.contextWindow
+		// VS Code LM (Copilot) measures usage against its static-table maxInputTokens, not the
+		// inflated live window, so context management runs in line with the context bar. Every
+		// other provider returns undefined here and falls back to modelInfo.contextWindow.
+		const contextWindow = this.api.getCondenseContextWindow?.() ?? modelInfo.contextWindow
 
 		// Get the current profile ID using the helper method
 		const currentProfileId = this.getCurrentProfileId(state)
@@ -3917,7 +3920,10 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
 				settings: this.apiConfiguration,
 			})
 
-			const contextWindow = modelInfo.contextWindow
+			// VS Code LM (Copilot) measures usage against its static-table maxInputTokens, not the
+			// inflated live window, so auto-condense fires in line with the context bar. Every other
+			// provider returns undefined here and falls back to modelInfo.contextWindow.
+			const contextWindow = this.api.getCondenseContextWindow?.() ?? modelInfo.contextWindow
 
 			// Get the current profile ID using the helper method
 			const currentProfileId = this.getCurrentProfileId(state)
diff --git a/src/package.json b/src/package.json
index 06a5a0c7b18..b8c4d45ec45 100644
--- a/src/package.json
+++ b/src/package.json
@@ -3,7 +3,7 @@
 	"displayName": "%extension.displayName%",
 	"description": "%extension.description%",
 	"publisher": "RooVeterinaryInc",
-	"version": "3.53.0",
+	"version": "3.53.1",
 	"icon": "assets/icons/icon.png",
 	"galleryBanner": {
 		"color": "#617A91",
diff --git a/webview-ui/src/components/chat/TaskHeader.tsx b/webview-ui/src/components/chat/TaskHeader.tsx
index 8479f90906b..07cdc33821b 100644
--- a/webview-ui/src/components/chat/TaskHeader.tsx
+++ b/webview-ui/src/components/chat/TaskHeader.tsx
@@ -76,7 +76,8 @@ const TaskHeader = ({
 				: 0,
 		[model, modelId, apiConfiguration],
 	)
-	const reservedForOutput = maxTokens || 0
+	// vscode-lm reports maxTokens: -1 (unlimited); a negative reserve must not distort the window math.
+	const reservedForOutput = maxTokens && maxTokens > 0 ? maxTokens : 0
 
 	const condenseButton = (
 		<LucideIconButton
diff --git a/webview-ui/src/components/chat/__tests__/TaskHeader.spec.tsx b/webview-ui/src/components/chat/__tests__/TaskHeader.spec.tsx
index 41aa452ab1b..9fe5cdadc73 100644
--- a/webview-ui/src/components/chat/__tests__/TaskHeader.spec.tsx
+++ b/webview-ui/src/components/chat/__tests__/TaskHeader.spec.tsx
@@ -267,5 +267,17 @@ describe("TaskHeader", () => {
 			// Should show 0% when available input space is 0
 			expect(screen.getByText("0%")).toBeInTheDocument()
 		})
+
+		it("should treat a negative maxTokens (vscode-lm reports -1) as zero reserve", () => {
+			// vscode-lm reports maxTokens: -1; a negative reserve must not inflate the denominator.
+			// contextTokens = 250, contextWindow = 1000, reservedForOutput treated as 0
+			// Percentage = 250 / 1000 * 100 = 25% (NOT 250 / 1001 from a -1 reserve).
+			mockModelInfo = { contextWindow: 1000, maxTokens: -1 }
+			mockMaxOutputTokens = -1
+
+			renderTaskHeader({ contextTokens: 250 })
+
+			expect(screen.getByText("25%")).toBeInTheDocument()
+		})
 	})
 })
diff --git a/webview-ui/src/components/ui/hooks/__tests__/useSelectedModel.spec.ts b/webview-ui/src/components/ui/hooks/__tests__/useSelectedModel.spec.ts
index 0dc42129c08..431b83c2090 100644
--- a/webview-ui/src/components/ui/hooks/__tests__/useSelectedModel.spec.ts
+++ b/webview-ui/src/components/ui/hooks/__tests__/useSelectedModel.spec.ts
@@ -14,6 +14,8 @@ import {
 	minimaxDefaultModelId,
 	minimaxModels,
 	openRouterDefaultModelId,
+	vscodeLlmModels,
+	vscodeLlmDefaultModelId,
 } from "@roo-code/types"
 
 import { useSelectedModel } from "../useSelectedModel"
@@ -772,4 +774,55 @@ describe("useSelectedModel", () => {
 			expect(result.current.info).toEqual(minimaxModels["MiniMax-M2.7"])
 		})
 	})
+
+	describe("vscode-lm provider", () => {
+		beforeEach(() => {
+			mockUseRouterModels.mockReturnValue({
+				data: { openrouter: {}, requesty: {}, litellm: {} },
+				isLoading: false,
+				isError: false,
+			} as any)
+
+			mockUseOpenRouterModelProviders.mockReturnValue({
+				data: {},
+				isLoading: false,
+				isError: false,
+			} as any)
+		})
+
+		it("resolves a listed family's contextWindow to its maxInputTokens (the value the gate uses)", () => {
+			const listedFamily = vscodeLlmDefaultModelId
+			const apiConfiguration: ProviderSettings = {
+				apiProvider: "vscode-lm",
+				vsCodeLmModelSelector: { vendor: "copilot", family: listedFamily },
+			}
+
+			const wrapper = createWrapper()
+			const { result } = renderHook(() => useSelectedModel(apiConfiguration), { wrapper })
+
+			expect(result.current.provider).toBe("vscode-lm")
+			expect(result.current.id).toBe(`copilot/${listedFamily}`)
+			// contextWindow MUST equal the live window the condense gate consumes (client.maxInputTokens),
+			// not the empirically-measured contextWindow field on the static row.
+			expect(result.current.info?.contextWindow).toBe(vscodeLlmModels[listedFamily].maxInputTokens)
+			expect(result.current.info?.supportsImages).toBe(false)
+		})
+
+		it("falls back to the default model's window for an unlisted family (NOT 128000)", () => {
+			const apiConfiguration: ProviderSettings = {
+				apiProvider: "vscode-lm",
+				vsCodeLmModelSelector: { vendor: "copilot", family: "totally-unknown-family" },
+			}
+
+			const wrapper = createWrapper()
+			const { result } = renderHook(() => useSelectedModel(apiConfiguration), { wrapper })
+
+			expect(result.current.provider).toBe("vscode-lm")
+			// An unlisted family must not silently collapse to the 128K openAiModelInfoSaneDefaults window,
+			// which would diverge from the gate and break the context bar / auto-condense.
+			expect(result.current.info?.contextWindow).not.toBe(128000)
+			expect(result.current.info?.contextWindow).toBe(vscodeLlmModels[vscodeLlmDefaultModelId].maxInputTokens)
+			expect(result.current.info?.supportsImages).toBe(false)
+		})
+	})
 })
diff --git a/webview-ui/src/components/ui/hooks/useSelectedModel.ts b/webview-ui/src/components/ui/hooks/useSelectedModel.ts
index bf78236b824..9bd6c45a2a3 100644
--- a/webview-ui/src/components/ui/hooks/useSelectedModel.ts
+++ b/webview-ui/src/components/ui/hooks/useSelectedModel.ts
@@ -299,8 +299,21 @@ function getSelectedModel({
 				? `${apiConfiguration.vsCodeLmModelSelector.vendor}/${apiConfiguration.vsCodeLmModelSelector.family}`
 				: vscodeLlmDefaultModelId
 			const modelFamily = apiConfiguration?.vsCodeLmModelSelector?.family ?? vscodeLlmDefaultModelId
-			const info = vscodeLlmModels[modelFamily as keyof typeof vscodeLlmModels]
-			return { id, info: { ...openAiModelInfoSaneDefaults, ...info, supportsImages: false } } // VSCode LM API currently doesn't support images.
+			// On a family miss, fall back to the default model entry instead of openAiModelInfoSaneDefaults,
+			// whose 128K contextWindow would diverge from the gate and make the bar read >100% while
+			// auto-condense never fires (the gate uses the live window).
+			const listedModel =
+				vscodeLlmModels[modelFamily as keyof typeof vscodeLlmModels] ?? vscodeLlmModels[vscodeLlmDefaultModelId]
+			// contextWindow MUST equal maxInputTokens: that is the exact value the gate consumes via
+			// getModel().info.contextWindow = Math.max(0, client.maxInputTokens) in src/api/providers/vscode-lm.ts,
+			// so the UI bar and the condense gate share a single source of truth.
+			const info: ModelInfo = {
+				...openAiModelInfoSaneDefaults,
+				...listedModel,
+				contextWindow: listedModel.maxInputTokens,
+				supportsImages: false, // VSCode LM API currently doesn't support images.
+			}
+			return { id, info }
 		}
 		case "sambanova": {
 			const id = apiConfiguration.apiModelId ?? defaultModelId