Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/vscode-lm-condense-fix.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"roo-cline": patch
---

Fix unreliable automatic context condensing on the VS Code LM (vscode-lm) provider. The condense gate now treats the provider's `maxTokens: -1` (unlimited) as the default output reserve and measures usage against available input space, and a new `getCondenseContextWindow()` seam makes the gate use the curated model `maxInputTokens` instead of the inflated live window. Also refreshes the VS Code LM model catalog and default model.
49 changes: 49 additions & 0 deletions packages/types/src/__tests__/vscode-llm.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
import { describe, it, expect } from "vitest"

import { vscodeLlmModels, vscodeLlmDefaultModelId } from "../providers/vscode-llm.js"

describe("vscodeLlmModels", () => {
it("exposes the opus-4.8 row with its measured maxInputTokens and contextWindow", () => {
// The VS Code LM API exposes only maxInputTokens; that is the value the UI reads from this
// table (useSelectedModel.ts). For claude-opus-4.8 the two fields intentionally DIVERGE:
// maxInputTokens (197897) is the enforced input ceiling, while contextWindow (679560) records
// the larger advertised window. The UI reads maxInputTokens, so the divergence is a deliberate
// tripwire — assert the actual on-disk literals rather than forcing equality.
// See GitHub issue simurg79/Roo-Code#10.
expect(vscodeLlmModels).toHaveProperty("claude-opus-4.8")
expect(vscodeLlmModels["claude-opus-4.8"].contextWindow).toBe(679560)
expect(vscodeLlmModels["claude-opus-4.8"].maxInputTokens).toBe(197897)
})

it("preserves the real window for models captured with a smaller maxInputTokens", () => {
expect(vscodeLlmModels["gpt-4o-mini"].maxInputTokens).toBe(12078)
expect(vscodeLlmModels["gpt-4o-mini"].contextWindow).toBe(12078)
expect(vscodeLlmModels["gemini-2.5-pro"].contextWindow).toBe(108594)
expect(vscodeLlmModels["gemini-2.5-pro"].maxInputTokens).toBe(108594)
})

it("keeps both window fields populated and positive for every row", () => {
// NOTE: contextWindow and maxInputTokens are intentionally ALLOWED to differ (claude-opus-4.8
// diverges: 679560 vs 197897). The UI reads maxInputTokens, and that divergence is a deliberate
// tripwire, so we do NOT assert contextWindow === maxInputTokens here (see simurg79/Roo-Code#10).
// The meaningful invariant is that every row carries positive integers for both fields; a
// missing/zero value would point to hand-authored drift rather than a real captured row.
for (const [family, model] of Object.entries(vscodeLlmModels)) {
expect(model.contextWindow, `${family}: contextWindow must be a positive integer`).toBeGreaterThan(0)
expect(model.maxInputTokens, `${family}: maxInputTokens must be a positive integer`).toBeGreaterThan(0)
}
})

it("excludes fabricated/internal/alias families and the dropped legacy rows", () => {
// Integrity guards: these were never part of the authoritative live capture, or were
// removed by the full table REPLACE. Their presence would signal hand-authored drift.
expect(vscodeLlmModels).not.toHaveProperty("claude-opus-4.7-high")
expect(vscodeLlmModels).not.toHaveProperty("claude-3.5-sonnet")
expect(vscodeLlmModels).not.toHaveProperty("claude-4-sonnet")
})

it("defaults to a model id that exists in the table", () => {
expect(vscodeLlmDefaultModelId).toBe("claude-sonnet-4.5")
expect(vscodeLlmModels).toHaveProperty(vscodeLlmDefaultModelId)
})
})
237 changes: 138 additions & 99 deletions packages/types/src/providers/vscode-llm.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,189 +2,228 @@ import type { ModelInfo } from "../model.js"

export type VscodeLlmModelId = keyof typeof vscodeLlmModels

export const vscodeLlmDefaultModelId: VscodeLlmModelId = "claude-3.5-sonnet"
export const vscodeLlmDefaultModelId: VscodeLlmModelId = "claude-sonnet-4.5"

// https://docs.cline.bot/provider-config/vscode-language-model-api
// Rows below were originally enumerated from `vscode.lm.selectChatModels({ vendor: "copilot" })`.
// The VS Code LM API exposes ONLY `maxInputTokens` (there is no separate context-window field), and
// that is the single value the runtime/condense gate enforces: getModel() sets
// contextWindow = Math.max(0, client.maxInputTokens) in src/api/providers/vscode-lm.ts. So for every
// row `maxInputTokens` IS the enforced context window, and `contextWindow` is set equal to it purely
// as an informational mirror (the UI reads maxInputTokens via useSelectedModel.ts, so the two MUST
// match to keep the context bar and the gate on one source of truth).
// These ceilings were measured empirically on 2026-06-18 (VS Code 1.125.0) by binary-searching the
// single-message "Message exceeds token limit" threshold per model — they are the largest input the
// backend actually accepts, which for several models is well below the value Copilot advertises:
// - claude-opus-4.8: enforced 679560
// - claude-opus-4.7 / 4.6, claude-sonnet-4.6,
// gemini-3.1-pro-preview, gemini-3.5-flash: enforced ~197.9K
// - gpt-5.5 / gpt-5.4: enforced ~268.4K
// Guardrail: these are empirically measured — re-measure (do not hand-tune) if the models change.
// See GitHub issue simurg79/Roo-Code#10 and myplans/VSCode LM Model Table Integrity/vscode_lm_opus_data_integrity_design.md.
export const vscodeLlmModels = {
"gpt-3.5-turbo": {
contextWindow: 12114,
supportsImages: false,
"claude-opus-4.8": {
contextWindow: 679560,
supportsImages: true,
supportsPromptCache: false,
inputPrice: 0,
outputPrice: 0,
family: "gpt-3.5-turbo",
version: "gpt-3.5-turbo-0613",
name: "GPT 3.5 Turbo",
family: "claude-opus-4.8",
version: "claude-opus-4.8",
name: "Claude Opus 4.8",
supportsToolCalling: true,
maxInputTokens: 12114,
maxInputTokens: 197897,
},
"gpt-4o-mini": {
contextWindow: 12115,
supportsImages: false,
"claude-opus-4.7": {
contextWindow: 197897,
supportsImages: true,
supportsPromptCache: false,
inputPrice: 0,
outputPrice: 0,
family: "gpt-4o-mini",
version: "gpt-4o-mini-2024-07-18",
name: "GPT-4o mini",
family: "claude-opus-4.7",
version: "claude-opus-4.7",
name: "Claude Opus 4.7",
supportsToolCalling: true,
maxInputTokens: 12115,
maxInputTokens: 197897,
},
"gpt-4": {
contextWindow: 28501,
supportsImages: false,
"claude-opus-4.6": {
contextWindow: 197897,
supportsImages: true,
supportsPromptCache: false,
inputPrice: 0,
outputPrice: 0,
family: "gpt-4",
version: "gpt-4-0613",
name: "GPT 4",
family: "claude-opus-4.6",
version: "claude-opus-4.6",
name: "Claude Opus 4.6",
supportsToolCalling: true,
maxInputTokens: 28501,
maxInputTokens: 197897,
},
"gpt-4-0125-preview": {
contextWindow: 63826,
supportsImages: false,
"claude-opus-4.5": {
contextWindow: 167790,
supportsImages: true,
supportsPromptCache: false,
inputPrice: 0,
outputPrice: 0,
family: "gpt-4-turbo",
version: "gpt-4-0125-preview",
name: "GPT 4 Turbo",
family: "claude-opus-4.5",
version: "claude-opus-4.5",
name: "Claude Opus 4.5",
supportsToolCalling: true,
maxInputTokens: 63826,
maxInputTokens: 167790,
},
"gpt-4o": {
contextWindow: 63827,
"claude-sonnet-4.6": {
contextWindow: 197896,
supportsImages: true,
supportsPromptCache: false,
inputPrice: 0,
outputPrice: 0,
family: "gpt-4o",
version: "gpt-4o-2024-11-20",
name: "GPT-4o",
family: "claude-sonnet-4.6",
version: "claude-sonnet-4.6",
name: "Claude Sonnet 4.6",
supportsToolCalling: true,
maxInputTokens: 63827,
maxInputTokens: 197896,
},
o1: {
contextWindow: 19827,
supportsImages: false,
"claude-sonnet-4.5": {
contextWindow: 167790,
supportsImages: true,
supportsPromptCache: false,
inputPrice: 0,
outputPrice: 0,
family: "o1-ga",
version: "o1-2024-12-17",
name: "o1 (Preview)",
family: "claude-sonnet-4.5",
version: "claude-sonnet-4.5",
name: "Claude Sonnet 4.5",
supportsToolCalling: true,
maxInputTokens: 19827,
maxInputTokens: 167790,
},
"o3-mini": {
contextWindow: 63827,
supportsImages: false,
"claude-haiku-4.5": {
contextWindow: 135790,
supportsImages: true,
supportsPromptCache: false,
inputPrice: 0,
outputPrice: 0,
family: "o3-mini",
version: "o3-mini-2025-01-31",
name: "o3-mini",
family: "claude-haiku-4.5",
version: "claude-haiku-4.5",
name: "Claude Haiku 4.5",
supportsToolCalling: true,
maxInputTokens: 63827,
maxInputTokens: 135790,
},
"claude-3.5-sonnet": {
contextWindow: 81638,
"gpt-5.5": {
contextWindow: 268426,
supportsImages: true,
supportsPromptCache: false,
inputPrice: 0,
outputPrice: 0,
family: "claude-3.5-sonnet",
version: "claude-3.5-sonnet",
name: "Claude 3.5 Sonnet",
family: "gpt-5.5",
version: "gpt-5.5",
name: "GPT-5.5",
supportsToolCalling: true,
maxInputTokens: 81638,
maxInputTokens: 268426,
},
"claude-4-sonnet": {
contextWindow: 128000,
"gpt-5.4": {
contextWindow: 268424,
supportsImages: true,
supportsPromptCache: false,
inputPrice: 0,
outputPrice: 0,
family: "claude-sonnet-4",
version: "claude-sonnet-4",
name: "Claude Sonnet 4",
family: "gpt-5.4",
version: "gpt-5.4",
name: "GPT-5.4",
supportsToolCalling: true,
maxInputTokens: 111836,
maxInputTokens: 268424,
},
"gemini-2.0-flash-001": {
contextWindow: 127827,
"gpt-5.4-mini": {
contextWindow: 271790,
supportsImages: true,
supportsPromptCache: false,
inputPrice: 0,
outputPrice: 0,
family: "gemini-2.0-flash",
version: "gemini-2.0-flash-001",
name: "Gemini 2.0 Flash",
supportsToolCalling: false,
maxInputTokens: 127827,
family: "gpt-5.4-mini",
version: "gpt-5.4-mini",
name: "GPT-5.4 mini",
supportsToolCalling: true,
maxInputTokens: 271790,
},
"gemini-2.5-pro": {
contextWindow: 128000,
"gpt-5.3-codex": {
contextWindow: 271790,
supportsImages: true,
supportsPromptCache: false,
inputPrice: 0,
outputPrice: 0,
family: "gemini-2.5-pro",
version: "gemini-2.5-pro-preview-03-25",
name: "Gemini 2.5 Pro (Preview)",
family: "gpt-5.3-codex",
version: "gpt-5.3-codex",
name: "GPT-5.3-Codex",
supportsToolCalling: true,
maxInputTokens: 108637,
maxInputTokens: 271790,
},
"o4-mini": {
contextWindow: 128000,
"gpt-5-mini": {
contextWindow: 127790,
supportsImages: true,
supportsPromptCache: false,
inputPrice: 0,
outputPrice: 0,
family: "gpt-5-mini",
version: "gpt-5-mini",
name: "GPT-5 mini",
supportsToolCalling: true,
maxInputTokens: 127790,
},
"gpt-4o-mini": {
contextWindow: 12078,
supportsImages: false,
supportsPromptCache: false,
inputPrice: 0,
outputPrice: 0,
family: "o4-mini",
version: "o4-mini-2025-04-16",
name: "o4-mini (Preview)",
family: "gpt-4o-mini",
version: "gpt-4o-mini-2024-07-18",
name: "GPT-4o mini",
supportsToolCalling: true,
maxInputTokens: 111452,
maxInputTokens: 12078,
},
"gpt-4.1": {
contextWindow: 128000,
"gemini-3.1-pro-preview": {
contextWindow: 197897,
supportsImages: true,
supportsPromptCache: false,
inputPrice: 0,
outputPrice: 0,
family: "gpt-4.1",
version: "gpt-4.1-2025-04-14",
name: "GPT-4.1 (Preview)",
family: "gemini-3.1-pro-preview",
version: "gemini-3.1-pro-preview",
name: "Gemini 3.1 Pro (Preview)",
supportsToolCalling: true,
maxInputTokens: 111452,
maxInputTokens: 197897,
},
"gpt-5-mini": {
contextWindow: 128000,
"gemini-3.5-flash": {
contextWindow: 197895,
supportsImages: true,
supportsPromptCache: false,
inputPrice: 0,
outputPrice: 0,
family: "gpt-5-mini",
version: "gpt-5-mini",
name: "GPT-5 mini (Preview)",
family: "gemini-3.5-flash",
version: "gemini-3.5-flash",
name: "Gemini 3.5 Flash",
supportsToolCalling: true,
maxInputTokens: 197895,
},
"gemini-3-flash": {
contextWindow: 108594,
supportsImages: true,
supportsPromptCache: false,
inputPrice: 0,
outputPrice: 0,
family: "gemini-3-flash",
version: "gemini-3-flash-preview",
name: "Gemini 3 Flash (Preview)",
supportsToolCalling: true,
maxInputTokens: 108637,
maxInputTokens: 108594,
},
"gpt-5": {
contextWindow: 128000,
"gemini-2.5-pro": {
contextWindow: 108594,
supportsImages: true,
supportsPromptCache: false,
inputPrice: 0,
outputPrice: 0,
family: "gpt-5",
version: "gpt-5",
name: "GPT-5 (Preview)",
family: "gemini-2.5-pro",
version: "gemini-2.5-pro",
name: "Gemini 2.5 Pro",
supportsToolCalling: true,
maxInputTokens: 108637,
maxInputTokens: 108594,
},
} as const satisfies Record<
string,
Expand Down
Loading
Loading