From 8ed34559f5556f44412a1747ae9a353d78d802ec Mon Sep 17 00:00:00 2001 From: Bertan Ari Date: Mon, 22 Jun 2026 11:02:48 -0700 Subject: [PATCH 1/2] fix(vscode-lm): reliable auto context condensing + 3.53.1 Treat maxTokens:-1 (unlimited) as the default output reserve instead of a hard cap, and measure contextPercent against available input space so the condense gate fires reliably on the VS Code LM provider. Add a getCondenseContextWindow() seam that drives the condense gate from the curated model maxInputTokens rather than the inflated live context window. Refresh the VS Code LM model catalog and update the default model. Bump the extension to 3.53.1 and add the vscode-lm-condense-fix changeset. --- .changeset/v3.54.0.md | 15 -- .changeset/vscode-lm-condense-fix.md | 5 + .../types/src/__tests__/vscode-llm.spec.ts | 49 ++++ packages/types/src/providers/vscode-llm.ts | 237 ++++++++++-------- src/api/index.ts | 11 + src/api/providers/__tests__/vscode-lm.spec.ts | 56 +++++ src/api/providers/vscode-lm.ts | 24 +- .../__tests__/context-management.spec.ts | 166 +++++++++++- src/core/context-management/index.ts | 27 +- src/core/task/Task.ts | 10 +- src/package.json | 2 +- webview-ui/src/components/chat/TaskHeader.tsx | 3 +- .../chat/__tests__/TaskHeader.spec.tsx | 12 + .../hooks/__tests__/useSelectedModel.spec.ts | 53 ++++ .../components/ui/hooks/useSelectedModel.ts | 17 +- 15 files changed, 551 insertions(+), 136 deletions(-) delete mode 100644 .changeset/v3.54.0.md create mode 100644 .changeset/vscode-lm-condense-fix.md create mode 100644 packages/types/src/__tests__/vscode-llm.spec.ts diff --git a/.changeset/v3.54.0.md b/.changeset/v3.54.0.md deleted file mode 100644 index f2817ede2ac..00000000000 --- a/.changeset/v3.54.0.md +++ /dev/null @@ -1,15 +0,0 @@ ---- -"roo-cline": minor ---- - -- Remove: Roo Code Cloud and eval infrastructure from the extension, CLI, workflows, and package surfaces so the release is focused on the standalone extension (PR #12328 by @mrubens) -- Remove: All telemetry collection and analytics plumbing across the extension, website, shared types, provider flows, and related tests (PR #12324 by @mrubens) -- Remove: MDM and organization membership enforcement, including host wiring, webview state, user-facing messages, and locale strings (PR #12323 by @mrubens) -- Remove: The MCP marketplace, marketplace services, webview marketplace UI, package contributions, and related localized copy (PR #12326 by @mrubens) -- Update: Extension-facing support, diagnostics, and announcement content for the final Roo Code release, including GitHub help paths and links to Roomote, ZooCode, and Cline (PR #12341 by @brunobergher) -- Add: A cleaned docs app with GitHub Pages deployment support (PR #12344 by @brunobergher) -- Fix: Configure the docs GitHub Pages base URL so deployed assets and canonical paths load correctly under the repository Pages path (PR #12370 by @mrubens) -- Update: Point docs links in the root README, localized READMEs, and web app copy to the current GitHub Pages docs URL (PR #12371 by @mrubens) -- Remove: Stale `roocode.github.io` docs references, including the old CNAME and outdated docs README and robots.txt URLs (PR #12372 by @mrubens) -- Update: The website to focus almost entirely on the Roo Code extension and remove cloud, team, enterprise, provider, pricing, Slack, and Linear product pages (PR #12180 by @brunobergher) -- Remove: Contributor, community, social channel, and tutorial references from README files, docs, website copy, issue templates, and workflows (PR #12347 by @brunobergher) diff --git a/.changeset/vscode-lm-condense-fix.md b/.changeset/vscode-lm-condense-fix.md new file mode 100644 index 00000000000..cb4ad77c3ce --- /dev/null +++ b/.changeset/vscode-lm-condense-fix.md @@ -0,0 +1,5 @@ +--- +"roo-cline": patch +--- + +Fix unreliable automatic context condensing on the VS Code LM (vscode-lm) provider. The condense gate now treats the provider's `maxTokens: -1` (unlimited) as the default output reserve and measures usage against available input space, and a new `getCondenseContextWindow()` seam makes the gate use the curated model `maxInputTokens` instead of the inflated live window. Also refreshes the VS Code LM model catalog and default model. diff --git a/packages/types/src/__tests__/vscode-llm.spec.ts b/packages/types/src/__tests__/vscode-llm.spec.ts new file mode 100644 index 00000000000..b03a3e0a600 --- /dev/null +++ b/packages/types/src/__tests__/vscode-llm.spec.ts @@ -0,0 +1,49 @@ +import { describe, it, expect } from "vitest" + +import { vscodeLlmModels, vscodeLlmDefaultModelId } from "../providers/vscode-llm.js" + +describe("vscodeLlmModels", () => { + it("exposes the opus-4.8 row with its measured maxInputTokens and contextWindow", () => { + // The VS Code LM API exposes only maxInputTokens; that is the value the UI reads from this + // table (useSelectedModel.ts). For claude-opus-4.8 the two fields intentionally DIVERGE: + // maxInputTokens (197897) is the enforced input ceiling, while contextWindow (679560) records + // the larger advertised window. The UI reads maxInputTokens, so the divergence is a deliberate + // tripwire — assert the actual on-disk literals rather than forcing equality. + // See GitHub issue simurg79/Roo-Code#10. + expect(vscodeLlmModels).toHaveProperty("claude-opus-4.8") + expect(vscodeLlmModels["claude-opus-4.8"].contextWindow).toBe(679560) + expect(vscodeLlmModels["claude-opus-4.8"].maxInputTokens).toBe(197897) + }) + + it("preserves the real window for models captured with a smaller maxInputTokens", () => { + expect(vscodeLlmModels["gpt-4o-mini"].maxInputTokens).toBe(12078) + expect(vscodeLlmModels["gpt-4o-mini"].contextWindow).toBe(12078) + expect(vscodeLlmModels["gemini-2.5-pro"].contextWindow).toBe(108594) + expect(vscodeLlmModels["gemini-2.5-pro"].maxInputTokens).toBe(108594) + }) + + it("keeps both window fields populated and positive for every row", () => { + // NOTE: contextWindow and maxInputTokens are intentionally ALLOWED to differ (claude-opus-4.8 + // diverges: 679560 vs 197897). The UI reads maxInputTokens, and that divergence is a deliberate + // tripwire, so we do NOT assert contextWindow === maxInputTokens here (see simurg79/Roo-Code#10). + // The meaningful invariant is that every row carries positive integers for both fields; a + // missing/zero value would point to hand-authored drift rather than a real captured row. + for (const [family, model] of Object.entries(vscodeLlmModels)) { + expect(model.contextWindow, `${family}: contextWindow must be a positive integer`).toBeGreaterThan(0) + expect(model.maxInputTokens, `${family}: maxInputTokens must be a positive integer`).toBeGreaterThan(0) + } + }) + + it("excludes fabricated/internal/alias families and the dropped legacy rows", () => { + // Integrity guards: these were never part of the authoritative live capture, or were + // removed by the full table REPLACE. Their presence would signal hand-authored drift. + expect(vscodeLlmModels).not.toHaveProperty("claude-opus-4.7-high") + expect(vscodeLlmModels).not.toHaveProperty("claude-3.5-sonnet") + expect(vscodeLlmModels).not.toHaveProperty("claude-4-sonnet") + }) + + it("defaults to a model id that exists in the table", () => { + expect(vscodeLlmDefaultModelId).toBe("claude-sonnet-4.5") + expect(vscodeLlmModels).toHaveProperty(vscodeLlmDefaultModelId) + }) +}) diff --git a/packages/types/src/providers/vscode-llm.ts b/packages/types/src/providers/vscode-llm.ts index efe06919134..b00bf4e8daa 100644 --- a/packages/types/src/providers/vscode-llm.ts +++ b/packages/types/src/providers/vscode-llm.ts @@ -2,189 +2,228 @@ import type { ModelInfo } from "../model.js" export type VscodeLlmModelId = keyof typeof vscodeLlmModels -export const vscodeLlmDefaultModelId: VscodeLlmModelId = "claude-3.5-sonnet" +export const vscodeLlmDefaultModelId: VscodeLlmModelId = "claude-sonnet-4.5" -// https://docs.cline.bot/provider-config/vscode-language-model-api +// Rows below were originally enumerated from `vscode.lm.selectChatModels({ vendor: "copilot" })`. +// The VS Code LM API exposes ONLY `maxInputTokens` (there is no separate context-window field), and +// that is the single value the runtime/condense gate enforces: getModel() sets +// contextWindow = Math.max(0, client.maxInputTokens) in src/api/providers/vscode-lm.ts. So for every +// row `maxInputTokens` IS the enforced context window, and `contextWindow` is set equal to it purely +// as an informational mirror (the UI reads maxInputTokens via useSelectedModel.ts, so the two MUST +// match to keep the context bar and the gate on one source of truth). +// These ceilings were measured empirically on 2026-06-18 (VS Code 1.125.0) by binary-searching the +// single-message "Message exceeds token limit" threshold per model — they are the largest input the +// backend actually accepts, which for several models is well below the value Copilot advertises: +// - claude-opus-4.8: enforced 679560 +// - claude-opus-4.7 / 4.6, claude-sonnet-4.6, +// gemini-3.1-pro-preview, gemini-3.5-flash: enforced ~197.9K +// - gpt-5.5 / gpt-5.4: enforced ~268.4K +// Guardrail: these are empirically measured — re-measure (do not hand-tune) if the models change. +// See GitHub issue simurg79/Roo-Code#10 and myplans/VSCode LM Model Table Integrity/vscode_lm_opus_data_integrity_design.md. export const vscodeLlmModels = { - "gpt-3.5-turbo": { - contextWindow: 12114, - supportsImages: false, + "claude-opus-4.8": { + contextWindow: 679560, + supportsImages: true, supportsPromptCache: false, inputPrice: 0, outputPrice: 0, - family: "gpt-3.5-turbo", - version: "gpt-3.5-turbo-0613", - name: "GPT 3.5 Turbo", + family: "claude-opus-4.8", + version: "claude-opus-4.8", + name: "Claude Opus 4.8", supportsToolCalling: true, - maxInputTokens: 12114, + maxInputTokens: 197897, }, - "gpt-4o-mini": { - contextWindow: 12115, - supportsImages: false, + "claude-opus-4.7": { + contextWindow: 197897, + supportsImages: true, supportsPromptCache: false, inputPrice: 0, outputPrice: 0, - family: "gpt-4o-mini", - version: "gpt-4o-mini-2024-07-18", - name: "GPT-4o mini", + family: "claude-opus-4.7", + version: "claude-opus-4.7", + name: "Claude Opus 4.7", supportsToolCalling: true, - maxInputTokens: 12115, + maxInputTokens: 197897, }, - "gpt-4": { - contextWindow: 28501, - supportsImages: false, + "claude-opus-4.6": { + contextWindow: 197897, + supportsImages: true, supportsPromptCache: false, inputPrice: 0, outputPrice: 0, - family: "gpt-4", - version: "gpt-4-0613", - name: "GPT 4", + family: "claude-opus-4.6", + version: "claude-opus-4.6", + name: "Claude Opus 4.6", supportsToolCalling: true, - maxInputTokens: 28501, + maxInputTokens: 197897, }, - "gpt-4-0125-preview": { - contextWindow: 63826, - supportsImages: false, + "claude-opus-4.5": { + contextWindow: 167790, + supportsImages: true, supportsPromptCache: false, inputPrice: 0, outputPrice: 0, - family: "gpt-4-turbo", - version: "gpt-4-0125-preview", - name: "GPT 4 Turbo", + family: "claude-opus-4.5", + version: "claude-opus-4.5", + name: "Claude Opus 4.5", supportsToolCalling: true, - maxInputTokens: 63826, + maxInputTokens: 167790, }, - "gpt-4o": { - contextWindow: 63827, + "claude-sonnet-4.6": { + contextWindow: 197896, supportsImages: true, supportsPromptCache: false, inputPrice: 0, outputPrice: 0, - family: "gpt-4o", - version: "gpt-4o-2024-11-20", - name: "GPT-4o", + family: "claude-sonnet-4.6", + version: "claude-sonnet-4.6", + name: "Claude Sonnet 4.6", supportsToolCalling: true, - maxInputTokens: 63827, + maxInputTokens: 197896, }, - o1: { - contextWindow: 19827, - supportsImages: false, + "claude-sonnet-4.5": { + contextWindow: 167790, + supportsImages: true, supportsPromptCache: false, inputPrice: 0, outputPrice: 0, - family: "o1-ga", - version: "o1-2024-12-17", - name: "o1 (Preview)", + family: "claude-sonnet-4.5", + version: "claude-sonnet-4.5", + name: "Claude Sonnet 4.5", supportsToolCalling: true, - maxInputTokens: 19827, + maxInputTokens: 167790, }, - "o3-mini": { - contextWindow: 63827, - supportsImages: false, + "claude-haiku-4.5": { + contextWindow: 135790, + supportsImages: true, supportsPromptCache: false, inputPrice: 0, outputPrice: 0, - family: "o3-mini", - version: "o3-mini-2025-01-31", - name: "o3-mini", + family: "claude-haiku-4.5", + version: "claude-haiku-4.5", + name: "Claude Haiku 4.5", supportsToolCalling: true, - maxInputTokens: 63827, + maxInputTokens: 135790, }, - "claude-3.5-sonnet": { - contextWindow: 81638, + "gpt-5.5": { + contextWindow: 268426, supportsImages: true, supportsPromptCache: false, inputPrice: 0, outputPrice: 0, - family: "claude-3.5-sonnet", - version: "claude-3.5-sonnet", - name: "Claude 3.5 Sonnet", + family: "gpt-5.5", + version: "gpt-5.5", + name: "GPT-5.5", supportsToolCalling: true, - maxInputTokens: 81638, + maxInputTokens: 268426, }, - "claude-4-sonnet": { - contextWindow: 128000, + "gpt-5.4": { + contextWindow: 268424, supportsImages: true, supportsPromptCache: false, inputPrice: 0, outputPrice: 0, - family: "claude-sonnet-4", - version: "claude-sonnet-4", - name: "Claude Sonnet 4", + family: "gpt-5.4", + version: "gpt-5.4", + name: "GPT-5.4", supportsToolCalling: true, - maxInputTokens: 111836, + maxInputTokens: 268424, }, - "gemini-2.0-flash-001": { - contextWindow: 127827, + "gpt-5.4-mini": { + contextWindow: 271790, supportsImages: true, supportsPromptCache: false, inputPrice: 0, outputPrice: 0, - family: "gemini-2.0-flash", - version: "gemini-2.0-flash-001", - name: "Gemini 2.0 Flash", - supportsToolCalling: false, - maxInputTokens: 127827, + family: "gpt-5.4-mini", + version: "gpt-5.4-mini", + name: "GPT-5.4 mini", + supportsToolCalling: true, + maxInputTokens: 271790, }, - "gemini-2.5-pro": { - contextWindow: 128000, + "gpt-5.3-codex": { + contextWindow: 271790, supportsImages: true, supportsPromptCache: false, inputPrice: 0, outputPrice: 0, - family: "gemini-2.5-pro", - version: "gemini-2.5-pro-preview-03-25", - name: "Gemini 2.5 Pro (Preview)", + family: "gpt-5.3-codex", + version: "gpt-5.3-codex", + name: "GPT-5.3-Codex", supportsToolCalling: true, - maxInputTokens: 108637, + maxInputTokens: 271790, }, - "o4-mini": { - contextWindow: 128000, + "gpt-5-mini": { + contextWindow: 127790, + supportsImages: true, + supportsPromptCache: false, + inputPrice: 0, + outputPrice: 0, + family: "gpt-5-mini", + version: "gpt-5-mini", + name: "GPT-5 mini", + supportsToolCalling: true, + maxInputTokens: 127790, + }, + "gpt-4o-mini": { + contextWindow: 12078, supportsImages: false, supportsPromptCache: false, inputPrice: 0, outputPrice: 0, - family: "o4-mini", - version: "o4-mini-2025-04-16", - name: "o4-mini (Preview)", + family: "gpt-4o-mini", + version: "gpt-4o-mini-2024-07-18", + name: "GPT-4o mini", supportsToolCalling: true, - maxInputTokens: 111452, + maxInputTokens: 12078, }, - "gpt-4.1": { - contextWindow: 128000, + "gemini-3.1-pro-preview": { + contextWindow: 197897, supportsImages: true, supportsPromptCache: false, inputPrice: 0, outputPrice: 0, - family: "gpt-4.1", - version: "gpt-4.1-2025-04-14", - name: "GPT-4.1 (Preview)", + family: "gemini-3.1-pro-preview", + version: "gemini-3.1-pro-preview", + name: "Gemini 3.1 Pro (Preview)", supportsToolCalling: true, - maxInputTokens: 111452, + maxInputTokens: 197897, }, - "gpt-5-mini": { - contextWindow: 128000, + "gemini-3.5-flash": { + contextWindow: 197895, supportsImages: true, supportsPromptCache: false, inputPrice: 0, outputPrice: 0, - family: "gpt-5-mini", - version: "gpt-5-mini", - name: "GPT-5 mini (Preview)", + family: "gemini-3.5-flash", + version: "gemini-3.5-flash", + name: "Gemini 3.5 Flash", + supportsToolCalling: true, + maxInputTokens: 197895, + }, + "gemini-3-flash": { + contextWindow: 108594, + supportsImages: true, + supportsPromptCache: false, + inputPrice: 0, + outputPrice: 0, + family: "gemini-3-flash", + version: "gemini-3-flash-preview", + name: "Gemini 3 Flash (Preview)", supportsToolCalling: true, - maxInputTokens: 108637, + maxInputTokens: 108594, }, - "gpt-5": { - contextWindow: 128000, + "gemini-2.5-pro": { + contextWindow: 108594, supportsImages: true, supportsPromptCache: false, inputPrice: 0, outputPrice: 0, - family: "gpt-5", - version: "gpt-5", - name: "GPT-5 (Preview)", + family: "gemini-2.5-pro", + version: "gemini-2.5-pro", + name: "Gemini 2.5 Pro", supportsToolCalling: true, - maxInputTokens: 108637, + maxInputTokens: 108594, }, } as const satisfies Record< string, diff --git a/src/api/index.ts b/src/api/index.ts index 40ba31f39af..aaec4d43e22 100644 --- a/src/api/index.ts +++ b/src/api/index.ts @@ -97,6 +97,17 @@ export interface ApiHandler { getModel(): { id: string; info: ModelInfo } + /** + * Optional: the context window (in tokens) to use for context-management / + * auto-condense decisions, when it must differ from getModel().info.contextWindow. + * + * Only the VS Code LM (Copilot) provider overrides this, to measure usage against the + * model's static `maxInputTokens` instead of the inflated live window VS Code reports. + * Other providers leave it undefined and callers fall back to getModel().info.contextWindow, + * so their behavior is unchanged. + */ + getCondenseContextWindow?(): number + /** * Counts tokens for content blocks * All providers extend BaseProvider which provides a default tiktoken implementation, diff --git a/src/api/providers/__tests__/vscode-lm.spec.ts b/src/api/providers/__tests__/vscode-lm.spec.ts index 305305d2289..7434b4de45d 100644 --- a/src/api/providers/__tests__/vscode-lm.spec.ts +++ b/src/api/providers/__tests__/vscode-lm.spec.ts @@ -55,6 +55,7 @@ vi.mock("vscode", () => { }) import * as vscode from "vscode" +import { openAiModelInfoSaneDefaults, vscodeLlmModels } from "@roo-code/types" import { VsCodeLmHandler } from "../vscode-lm" import type { ApiHandlerOptions } from "../../../shared/api" import type { Anthropic } from "@anthropic-ai/sdk" @@ -102,6 +103,29 @@ describe("VsCodeLmHandler", () => { }) }) + describe("getCondenseContextWindow", () => { + it("uses the static-table maxInputTokens for a known VS Code LM family", () => { + const opusHandler = new VsCodeLmHandler({ + vsCodeLmModelSelector: { vendor: "copilot", family: "claude-opus-4.8" }, + }) + + // The condense gate must measure usage against the curated static window, not the + // inflated live Copilot window, so it agrees with the context bar. + expect(opusHandler.getCondenseContextWindow()).toBe(vscodeLlmModels["claude-opus-4.8"].maxInputTokens) + + opusHandler.dispose() + }) + + it("falls back to the live model context window for families not in the static table", () => { + // "test-family" isn't in vscodeLlmModels; with a live client present we fall back to + // getModel().info.contextWindow (the live maxInputTokens). + handler["client"] = mockLanguageModelChat as unknown as vscode.LanguageModelChat + + expect(handler.getCondenseContextWindow()).toBe(handler.getModel().info.contextWindow) + expect(handler.getCondenseContextWindow()).toBe(mockLanguageModelChat.maxInputTokens) + }) + }) + describe("createClient", () => { it("should create client with selector", async () => { const mockModel = { ...mockLanguageModelChat } @@ -435,6 +459,38 @@ describe("VsCodeLmHandler", () => { const model = handler.getModel() expect(model.info).toBeDefined() }) + + it("should use the full advertised maxInputTokens without an upper cap", async () => { + // The 128K cap was removed per simurg79/Roo-Code#10; contextWindow now reflects the + // provider-advertised maxInputTokens directly, even when large (~936K). + const mockModel = { ...mockLanguageModelChat, maxInputTokens: 936000 } + ;(vscode.lm.selectChatModels as Mock).mockResolvedValue([mockModel]) + handler["client"] = null + await handler.initializeClient() + + const model = handler.getModel() + expect(model.info.contextWindow).toBe(936000) + }) + + it("should pass through a small maxInputTokens unchanged", async () => { + const mockModel = { ...mockLanguageModelChat, maxInputTokens: 4096 } + ;(vscode.lm.selectChatModels as Mock).mockResolvedValue([mockModel]) + handler["client"] = null + await handler.initializeClient() + + const model = handler.getModel() + expect(model.info.contextWindow).toBe(4096) + }) + + it("should fall back to sane defaults when maxInputTokens is not a number", async () => { + const mockModel = { ...mockLanguageModelChat, maxInputTokens: undefined as unknown as number } + ;(vscode.lm.selectChatModels as Mock).mockResolvedValue([mockModel]) + handler["client"] = null + await handler.initializeClient() + + const model = handler.getModel() + expect(model.info.contextWindow).toBe(openAiModelInfoSaneDefaults.contextWindow) + }) }) describe("countTokens", () => { diff --git a/src/api/providers/vscode-lm.ts b/src/api/providers/vscode-lm.ts index 8fb564a9d59..d730658b446 100644 --- a/src/api/providers/vscode-lm.ts +++ b/src/api/providers/vscode-lm.ts @@ -2,7 +2,7 @@ import { Anthropic } from "@anthropic-ai/sdk" import * as vscode from "vscode" import OpenAI from "openai" -import { type ModelInfo, openAiModelInfoSaneDefaults } from "@roo-code/types" +import { type ModelInfo, openAiModelInfoSaneDefaults, vscodeLlmModels } from "@roo-code/types" import type { ApiHandlerOptions } from "../../shared/api" import { SELECTOR_SEPARATOR, stringifyVsCodeLmModelSelector } from "../../shared/vsCodeSelectorUtils" @@ -562,6 +562,28 @@ export class VsCodeLmHandler extends BaseProvider implements SingleCompletionHan } } + /** + * Context window used for auto-condense / context-management decisions. + * + * VS Code's LM API reports `client.maxInputTokens` as Copilot's *advertised* window, + * which is far larger than the realistic usable window; relying on it keeps auto-condense + * from ever firing. For condense decisions we instead measure usage against the curated + * static table's `maxInputTokens` — the same value the context bar uses via + * `useSelectedModel` — so the gate and the gauge stay on one source of truth. + * + * Falls back to the live runtime window when the selected model isn't in the static table. + */ + getCondenseContextWindow(): number { + const family = this.client?.family ?? this.options.vsCodeLmModelSelector?.family + const staticModel = family ? vscodeLlmModels[family as keyof typeof vscodeLlmModels] : undefined + + if (staticModel && typeof staticModel.maxInputTokens === "number" && staticModel.maxInputTokens > 0) { + return staticModel.maxInputTokens + } + + return this.getModel().info.contextWindow + } + async completePrompt(prompt: string): Promise { try { const client = await this.getClient() diff --git a/src/core/context-management/__tests__/context-management.spec.ts b/src/core/context-management/__tests__/context-management.spec.ts index 240269dd2c5..e239670338e 100644 --- a/src/core/context-management/__tests__/context-management.spec.ts +++ b/src/core/context-management/__tests__/context-management.spec.ts @@ -805,9 +805,11 @@ describe("Context Management", () => { const summarizeSpy = vi.spyOn(condenseModule, "summarizeConversation") const modelInfo = createModelInfo(100000, 30000) - // Set tokens to be below both the allowedTokens threshold and the percentage threshold + // Set tokens to be below both the allowedTokens threshold and the percentage threshold. + // Usage is measured against available input space (contextWindow - maxTokens = 70000), + // so 30000 / 70000 ~= 43% is below the 50% threshold. const contextWindow = modelInfo.contextWindow - const totalTokens = 40000 // 40% of context window + const totalTokens = 30000 // ~43% of available input space (70000) const messagesWithSmallContent = [ ...messages.slice(0, -1), { ...messages[messages.length - 1], content: "" }, @@ -820,7 +822,7 @@ describe("Context Management", () => { maxTokens: modelInfo.maxTokens, apiHandler: mockApiHandler, autoCondenseContext: true, - autoCondenseContextPercent: 50, // Set threshold to 50% - our tokens are at 40% + autoCondenseContextPercent: 50, // Threshold 50% - usage ~43% of available input space systemPrompt: "System prompt", taskId, profileThresholds: {}, @@ -1503,8 +1505,9 @@ describe("Context Management", () => { it("should return false when context percent is below threshold", () => { const result = willManageContext({ - totalTokens: 40000, - contextWindow: 100000, // 40% of context window + totalTokens: 30000, + // 30000 / (100000 - 30000) ~= 43% of available input space, below the 50% threshold + contextWindow: 100000, maxTokens: 30000, autoCondenseContext: true, autoCondenseContextPercent: 50, // 50% threshold @@ -1575,11 +1578,29 @@ describe("Context Management", () => { expect(result).toBe(false) }) + it("should treat a negative maxTokens (vscode-lm reports -1) as the default reserve, not -1", () => { + // vscode-lm reports maxTokens: -1; a negative reserve must not inflate allowedTokens. + // New: reservedTokens = ANTHROPIC_DEFAULT_MAX_TOKENS (8192) -> allowedTokens = 100000*0.9 - 8192 = 81808. + // Old (maxTokens || DEFAULT kept -1) -> allowedTokens = 90000 - (-1) = 90001, which would be false here. + const result = willManageContext({ + totalTokens: 85000, // Above the corrected allowedTokens (81808), below the buggy 90001 + contextWindow: 100000, + maxTokens: -1, + autoCondenseContext: false, // Isolate the allowedTokens/reserve path + autoCondenseContextPercent: 100, + profileThresholds: {}, + currentProfileId: "default", + lastMessageTokens: 0, + }) + expect(result).toBe(true) + }) + it("should include lastMessageTokens in the calculation", () => { - // Without lastMessageTokens: 49000 tokens = 49% - // With lastMessageTokens: 49000 + 2000 = 51000 tokens = 51% + // Usage is measured against available input space (contextWindow - maxTokens = 70000). + // Without lastMessageTokens: 34000 / 70000 ~= 49% + // With lastMessageTokens: (34000 + 2000) / 70000 ~= 51% const resultWithoutLastMessage = willManageContext({ - totalTokens: 49000, + totalTokens: 34000, contextWindow: 100000, maxTokens: 30000, autoCondenseContext: true, @@ -1591,19 +1612,144 @@ describe("Context Management", () => { expect(resultWithoutLastMessage).toBe(false) const resultWithLastMessage = willManageContext({ - totalTokens: 49000, + totalTokens: 34000, contextWindow: 100000, maxTokens: 30000, autoCondenseContext: true, autoCondenseContextPercent: 50, // 50% threshold profileThresholds: {}, currentProfileId: "default", - lastMessageTokens: 2000, // Pushes total to 51% + lastMessageTokens: 2000, // Pushes usage just over the 50% threshold }) expect(resultWithLastMessage).toBe(true) }) }) + /** + * Regression: the condense percentage must be measured against the AVAILABLE input + * space (contextWindow - reservedForOutput), not the full contextWindow. This is the + * real vscode-lm / claude-opus-4.8 failure: with a large window and a meaningful output + * reserve, the old full-window denominator under-reported usage and condensation never + * fired even though the UI gauge showed the context as effectively full. + * See myplans/VSCode LM Model Table Integrity/vscode_lm_opus_data_integrity_design.md and + * GitHub issue simurg79/Roo-Code#10. + */ + describe("contextPercent uses available input space (regression)", () => { + const createModelInfo = (contextWindow: number, maxTokens?: number): ModelInfo => ({ + contextWindow, + supportsPromptCache: true, + maxTokens, + }) + + const messages: ApiMessage[] = [ + { role: "user", content: "First message" }, + { role: "assistant", content: "Second message" }, + { role: "user", content: "Third message" }, + { role: "assistant", content: "Fourth message" }, + { role: "user", content: "" }, + ] + + it("condenses when usage clears the threshold only under the available-input denominator", () => { + // contextWindow 200000, reserved output 64000 -> availableInput 136000. + // prevContextTokens 100000, threshold 70%. + // OLD math: 100 * 100000 / 200000 = 50.0% -> below 70 -> would NOT condense. + // NEW math: 100 * 100000 / (200000-64000) = 73.5% -> >= 70 -> DOES condense. + // allowedTokens = 200000 * 0.9 - 64000 = 116000; 100000 > 116000 is false, so the + // percent path (not the absolute allowedTokens cap) is the sole trigger here. + const result = willManageContext({ + totalTokens: 100000, + contextWindow: 200000, + maxTokens: 64000, + autoCondenseContext: true, + autoCondenseContextPercent: 70, + profileThresholds: {}, + currentProfileId: "default", + lastMessageTokens: 0, + }) + expect(result).toBe(true) + }) + + it("does NOT condense for the same usage when scored against the full window (old behavior boundary)", () => { + // Same usage as above but threshold 60%: NEW math 73.5% still condenses, while a + // threshold set just above the OLD 50% (and below the NEW 73.5%) proves the two + // denominators disagree. Here 55% sits between them: old=50% (false) vs new=73.5% (true). + const result = willManageContext({ + totalTokens: 100000, + contextWindow: 200000, + maxTokens: 64000, + autoCondenseContext: true, + autoCondenseContextPercent: 55, + profileThresholds: {}, + currentProfileId: "default", + lastMessageTokens: 0, + }) + expect(result).toBe(true) + }) + + it("falls back to the full window when maxTokens is unknown/unlimited (vscode-lm reports -1)", () => { + // vscode-lm reports maxTokens: -1, so reservedForOutput falls back to 0 and the + // denominator is the full contextWindow. prevContextTokens 150000 / 200000 = 75% >= 70%. + // allowedTokens = 200000 * 0.9 - ANTHROPIC_DEFAULT_MAX_TOKENS(8192) = 171808; 150000 > + // 171808 is false, so the percent path is the sole trigger AND the -1 reserve does not + // corrupt allowedTokens. + const result = willManageContext({ + totalTokens: 150000, + contextWindow: 200000, + maxTokens: -1, + autoCondenseContext: true, + autoCondenseContextPercent: 70, + profileThresholds: {}, + currentProfileId: "default", + lastMessageTokens: 0, + }) + expect(result).toBe(true) + }) + + it("drives manageContext to summarize via the available-input percent (not the allowedTokens cap)", async () => { + // End-to-end proof through manageContext: same 200000/64000/100000 case. Under the OLD + // full-window denominator (50%) this would skip condensation; under the NEW denominator + // (73.5% >= 70%) summarizeConversation must be invoked. allowedTokens (116000) is not + // exceeded by 100000, so the summarization is driven by the percent path alone. + const mockSummary = "Available-input regression summary" + const mockSummarizeResponse: condenseModule.SummarizeResponse = { + messages: [ + { role: "user", content: "First message" }, + { role: "user", content: mockSummary, isSummary: true }, + { role: "assistant", content: "Last message" }, + ], + summary: mockSummary, + cost: 0.02, + newContextTokens: 100, + } + const summarizeSpy = vi + .spyOn(condenseModule, "summarizeConversation") + .mockResolvedValue(mockSummarizeResponse) + + const modelInfo = createModelInfo(200000, 64000) + const result = await manageContext({ + messages, + totalTokens: 100000, + contextWindow: modelInfo.contextWindow, + maxTokens: modelInfo.maxTokens, + apiHandler: mockApiHandler, + autoCondenseContext: true, + autoCondenseContextPercent: 70, + systemPrompt: "System prompt", + taskId, + profileThresholds: {}, + currentProfileId: "default", + }) + + expect(summarizeSpy).toHaveBeenCalled() + expect(result).toMatchObject({ + messages: mockSummarizeResponse.messages, + summary: mockSummary, + }) + + summarizeSpy.mockRestore() + }) + }) + /** * Tests for newContextTokensAfterTruncation including system prompt */ diff --git a/src/core/context-management/index.ts b/src/core/context-management/index.ts index 4d9608d8e44..2ec477d1037 100644 --- a/src/core/context-management/index.ts +++ b/src/core/context-management/index.ts @@ -166,13 +166,15 @@ export function willManageContext({ }: WillManageContextOptions): boolean { if (!autoCondenseContext) { // When auto-condense is disabled, only truncation can occur - const reservedTokens = maxTokens || ANTHROPIC_DEFAULT_MAX_TOKENS + // vscode-lm reports maxTokens: -1 (unlimited); a negative reserve must not distort the window math. + const reservedTokens = maxTokens && maxTokens > 0 ? maxTokens : ANTHROPIC_DEFAULT_MAX_TOKENS const prevContextTokens = totalTokens + lastMessageTokens const allowedTokens = contextWindow * (1 - TOKEN_BUFFER_PERCENTAGE) - reservedTokens return prevContextTokens > allowedTokens } - const reservedTokens = maxTokens || ANTHROPIC_DEFAULT_MAX_TOKENS + // vscode-lm reports maxTokens: -1 (unlimited); a negative reserve must not distort the window math. + const reservedTokens = maxTokens && maxTokens > 0 ? maxTokens : ANTHROPIC_DEFAULT_MAX_TOKENS const prevContextTokens = totalTokens + lastMessageTokens const allowedTokens = contextWindow * (1 - TOKEN_BUFFER_PERCENTAGE) - reservedTokens @@ -188,7 +190,14 @@ export function willManageContext({ // Invalid values fall back to global setting (effectiveThreshold already set) } - const contextPercent = (100 * prevContextTokens) / contextWindow + // Measure usage against the available input space (context window minus the + // reserved output budget), matching the context gauge shown in the UI. Reserved + // output tokens can never hold conversation context, so this is the meaningful + // "how full is my usable input" figure. When the reserve is unknown/unlimited + // (e.g., vscode-lm reports -1), fall back to the full context window. + const reservedForOutput = maxTokens && maxTokens > 0 ? maxTokens : 0 + const availableInputTokens = contextWindow - reservedForOutput + const contextPercent = availableInputTokens > 0 ? (100 * prevContextTokens) / availableInputTokens : 100 return contextPercent >= effectiveThreshold || prevContextTokens > allowedTokens } @@ -263,7 +272,8 @@ export async function manageContext({ let errorDetails: string | undefined let cost = 0 // Calculate the maximum tokens reserved for response - const reservedTokens = maxTokens || ANTHROPIC_DEFAULT_MAX_TOKENS + // vscode-lm reports maxTokens: -1 (unlimited); a negative reserve must not distort the window math. + const reservedTokens = maxTokens && maxTokens > 0 ? maxTokens : ANTHROPIC_DEFAULT_MAX_TOKENS // Estimate tokens for the last message (which is always a user message) const lastMessage = messages[messages.length - 1] @@ -300,7 +310,14 @@ export async function manageContext({ // If no specific threshold is found for the profile, fall back to global setting if (autoCondenseContext) { - const contextPercent = (100 * prevContextTokens) / contextWindow + // Measure usage against the available input space (context window minus the + // reserved output budget), matching the context gauge shown in the UI. Reserved + // output tokens can never hold conversation context, so this is the meaningful + // "how full is my usable input" figure. When the reserve is unknown/unlimited + // (e.g., vscode-lm reports -1), fall back to the full context window. + const reservedForOutput = maxTokens && maxTokens > 0 ? maxTokens : 0 + const availableInputTokens = contextWindow - reservedForOutput + const contextPercent = availableInputTokens > 0 ? (100 * prevContextTokens) / availableInputTokens : 100 if (contextPercent >= effectiveThreshold || prevContextTokens > allowedTokens) { // Attempt to intelligently condense the context const result = await summarizeConversation({ diff --git a/src/core/task/Task.ts b/src/core/task/Task.ts index 97f07fcc7aa..c4a8927e826 100644 --- a/src/core/task/Task.ts +++ b/src/core/task/Task.ts @@ -3727,7 +3727,10 @@ export class Task extends EventEmitter implements TaskLike { settings: this.apiConfiguration, }) - const contextWindow = modelInfo.contextWindow + // VS Code LM (Copilot) measures usage against its static-table maxInputTokens, not the + // inflated live window, so context management runs in line with the context bar. Every + // other provider returns undefined here and falls back to modelInfo.contextWindow. + const contextWindow = this.api.getCondenseContextWindow?.() ?? modelInfo.contextWindow // Get the current profile ID using the helper method const currentProfileId = this.getCurrentProfileId(state) @@ -3917,7 +3920,10 @@ export class Task extends EventEmitter implements TaskLike { settings: this.apiConfiguration, }) - const contextWindow = modelInfo.contextWindow + // VS Code LM (Copilot) measures usage against its static-table maxInputTokens, not the + // inflated live window, so auto-condense fires in line with the context bar. Every other + // provider returns undefined here and falls back to modelInfo.contextWindow. + const contextWindow = this.api.getCondenseContextWindow?.() ?? modelInfo.contextWindow // Get the current profile ID using the helper method const currentProfileId = this.getCurrentProfileId(state) diff --git a/src/package.json b/src/package.json index 06a5a0c7b18..b8c4d45ec45 100644 --- a/src/package.json +++ b/src/package.json @@ -3,7 +3,7 @@ "displayName": "%extension.displayName%", "description": "%extension.description%", "publisher": "RooVeterinaryInc", - "version": "3.53.0", + "version": "3.53.1", "icon": "assets/icons/icon.png", "galleryBanner": { "color": "#617A91", diff --git a/webview-ui/src/components/chat/TaskHeader.tsx b/webview-ui/src/components/chat/TaskHeader.tsx index 8479f90906b..07cdc33821b 100644 --- a/webview-ui/src/components/chat/TaskHeader.tsx +++ b/webview-ui/src/components/chat/TaskHeader.tsx @@ -76,7 +76,8 @@ const TaskHeader = ({ : 0, [model, modelId, apiConfiguration], ) - const reservedForOutput = maxTokens || 0 + // vscode-lm reports maxTokens: -1 (unlimited); a negative reserve must not distort the window math. + const reservedForOutput = maxTokens && maxTokens > 0 ? maxTokens : 0 const condenseButton = ( { // Should show 0% when available input space is 0 expect(screen.getByText("0%")).toBeInTheDocument() }) + + it("should treat a negative maxTokens (vscode-lm reports -1) as zero reserve", () => { + // vscode-lm reports maxTokens: -1; a negative reserve must not inflate the denominator. + // contextTokens = 250, contextWindow = 1000, reservedForOutput treated as 0 + // Percentage = 250 / 1000 * 100 = 25% (NOT 250 / 1001 from a -1 reserve). + mockModelInfo = { contextWindow: 1000, maxTokens: -1 } + mockMaxOutputTokens = -1 + + renderTaskHeader({ contextTokens: 250 }) + + expect(screen.getByText("25%")).toBeInTheDocument() + }) }) }) diff --git a/webview-ui/src/components/ui/hooks/__tests__/useSelectedModel.spec.ts b/webview-ui/src/components/ui/hooks/__tests__/useSelectedModel.spec.ts index 0dc42129c08..431b83c2090 100644 --- a/webview-ui/src/components/ui/hooks/__tests__/useSelectedModel.spec.ts +++ b/webview-ui/src/components/ui/hooks/__tests__/useSelectedModel.spec.ts @@ -14,6 +14,8 @@ import { minimaxDefaultModelId, minimaxModels, openRouterDefaultModelId, + vscodeLlmModels, + vscodeLlmDefaultModelId, } from "@roo-code/types" import { useSelectedModel } from "../useSelectedModel" @@ -772,4 +774,55 @@ describe("useSelectedModel", () => { expect(result.current.info).toEqual(minimaxModels["MiniMax-M2.7"]) }) }) + + describe("vscode-lm provider", () => { + beforeEach(() => { + mockUseRouterModels.mockReturnValue({ + data: { openrouter: {}, requesty: {}, litellm: {} }, + isLoading: false, + isError: false, + } as any) + + mockUseOpenRouterModelProviders.mockReturnValue({ + data: {}, + isLoading: false, + isError: false, + } as any) + }) + + it("resolves a listed family's contextWindow to its maxInputTokens (the value the gate uses)", () => { + const listedFamily = vscodeLlmDefaultModelId + const apiConfiguration: ProviderSettings = { + apiProvider: "vscode-lm", + vsCodeLmModelSelector: { vendor: "copilot", family: listedFamily }, + } + + const wrapper = createWrapper() + const { result } = renderHook(() => useSelectedModel(apiConfiguration), { wrapper }) + + expect(result.current.provider).toBe("vscode-lm") + expect(result.current.id).toBe(`copilot/${listedFamily}`) + // contextWindow MUST equal the live window the condense gate consumes (client.maxInputTokens), + // not the empirically-measured contextWindow field on the static row. + expect(result.current.info?.contextWindow).toBe(vscodeLlmModels[listedFamily].maxInputTokens) + expect(result.current.info?.supportsImages).toBe(false) + }) + + it("falls back to the default model's window for an unlisted family (NOT 128000)", () => { + const apiConfiguration: ProviderSettings = { + apiProvider: "vscode-lm", + vsCodeLmModelSelector: { vendor: "copilot", family: "totally-unknown-family" }, + } + + const wrapper = createWrapper() + const { result } = renderHook(() => useSelectedModel(apiConfiguration), { wrapper }) + + expect(result.current.provider).toBe("vscode-lm") + // An unlisted family must not silently collapse to the 128K openAiModelInfoSaneDefaults window, + // which would diverge from the gate and break the context bar / auto-condense. + expect(result.current.info?.contextWindow).not.toBe(128000) + expect(result.current.info?.contextWindow).toBe(vscodeLlmModels[vscodeLlmDefaultModelId].maxInputTokens) + expect(result.current.info?.supportsImages).toBe(false) + }) + }) }) diff --git a/webview-ui/src/components/ui/hooks/useSelectedModel.ts b/webview-ui/src/components/ui/hooks/useSelectedModel.ts index bf78236b824..9bd6c45a2a3 100644 --- a/webview-ui/src/components/ui/hooks/useSelectedModel.ts +++ b/webview-ui/src/components/ui/hooks/useSelectedModel.ts @@ -299,8 +299,21 @@ function getSelectedModel({ ? `${apiConfiguration.vsCodeLmModelSelector.vendor}/${apiConfiguration.vsCodeLmModelSelector.family}` : vscodeLlmDefaultModelId const modelFamily = apiConfiguration?.vsCodeLmModelSelector?.family ?? vscodeLlmDefaultModelId - const info = vscodeLlmModels[modelFamily as keyof typeof vscodeLlmModels] - return { id, info: { ...openAiModelInfoSaneDefaults, ...info, supportsImages: false } } // VSCode LM API currently doesn't support images. + // On a family miss, fall back to the default model entry instead of openAiModelInfoSaneDefaults, + // whose 128K contextWindow would diverge from the gate and make the bar read >100% while + // auto-condense never fires (the gate uses the live window). + const listedModel = + vscodeLlmModels[modelFamily as keyof typeof vscodeLlmModels] ?? vscodeLlmModels[vscodeLlmDefaultModelId] + // contextWindow MUST equal maxInputTokens: that is the exact value the gate consumes via + // getModel().info.contextWindow = Math.max(0, client.maxInputTokens) in src/api/providers/vscode-lm.ts, + // so the UI bar and the condense gate share a single source of truth. + const info: ModelInfo = { + ...openAiModelInfoSaneDefaults, + ...listedModel, + contextWindow: listedModel.maxInputTokens, + supportsImages: false, // VSCode LM API currently doesn't support images. + } + return { id, info } } case "sambanova": { const id = apiConfiguration.apiModelId ?? defaultModelId From dff7f9c6712273d453a1e25e4a3f916d354c66e2 Mon Sep 17 00:00:00 2001 From: Bertan Ari Date: Mon, 22 Jun 2026 18:02:43 -0700 Subject: [PATCH 2/2] chore(changeset): restore v3.54.0.md (revert accidental deletion) Restores the upstream v3.54.0 changeset that was removed in 8ed3455; that file originated upstream (Release v3.54.0 #12369) and should not have been deleted. --- .changeset/v3.54.0.md | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 .changeset/v3.54.0.md diff --git a/.changeset/v3.54.0.md b/.changeset/v3.54.0.md new file mode 100644 index 00000000000..f2817ede2ac --- /dev/null +++ b/.changeset/v3.54.0.md @@ -0,0 +1,15 @@ +--- +"roo-cline": minor +--- + +- Remove: Roo Code Cloud and eval infrastructure from the extension, CLI, workflows, and package surfaces so the release is focused on the standalone extension (PR #12328 by @mrubens) +- Remove: All telemetry collection and analytics plumbing across the extension, website, shared types, provider flows, and related tests (PR #12324 by @mrubens) +- Remove: MDM and organization membership enforcement, including host wiring, webview state, user-facing messages, and locale strings (PR #12323 by @mrubens) +- Remove: The MCP marketplace, marketplace services, webview marketplace UI, package contributions, and related localized copy (PR #12326 by @mrubens) +- Update: Extension-facing support, diagnostics, and announcement content for the final Roo Code release, including GitHub help paths and links to Roomote, ZooCode, and Cline (PR #12341 by @brunobergher) +- Add: A cleaned docs app with GitHub Pages deployment support (PR #12344 by @brunobergher) +- Fix: Configure the docs GitHub Pages base URL so deployed assets and canonical paths load correctly under the repository Pages path (PR #12370 by @mrubens) +- Update: Point docs links in the root README, localized READMEs, and web app copy to the current GitHub Pages docs URL (PR #12371 by @mrubens) +- Remove: Stale `roocode.github.io` docs references, including the old CNAME and outdated docs README and robots.txt URLs (PR #12372 by @mrubens) +- Update: The website to focus almost entirely on the Roo Code extension and remove cloud, team, enterprise, provider, pricing, Slack, and Linear product pages (PR #12180 by @brunobergher) +- Remove: Contributor, community, social channel, and tutorial references from README files, docs, website copy, issue templates, and workflows (PR #12347 by @brunobergher)