From a4c375f7216799baa18f1d45925a758ea09b3ef6 Mon Sep 17 00:00:00 2001 From: Roomote Date: Thu, 14 May 2026 22:59:58 +0000 Subject: [PATCH 1/5] fix: use adaptive reasoning for anthropic opus 4.7 --- apps/vscode-e2e/fixtures/claude-opus-4-7.json | 16 +- .../src/suite/anthropic-opus-4-7.test.ts | 208 +++++++++++++++--- packages/types/src/providers/anthropic.ts | 4 +- src/api/providers/__tests__/anthropic.spec.ts | 48 +++- src/api/providers/anthropic.ts | 76 ++++--- src/api/transform/__tests__/reasoning.spec.ts | 72 ++++++ src/api/transform/reasoning.ts | 14 ++ 7 files changed, 375 insertions(+), 63 deletions(-) diff --git a/apps/vscode-e2e/fixtures/claude-opus-4-7.json b/apps/vscode-e2e/fixtures/claude-opus-4-7.json index 8b1bb09cb5..f269418706 100644 --- a/apps/vscode-e2e/fixtures/claude-opus-4-7.json +++ b/apps/vscode-e2e/fixtures/claude-opus-4-7.json @@ -2,7 +2,21 @@ "fixtures": [ { "match": { - "userMessage": "opus47-e2e: what is 2+2? Reply with only the number." + "userMessage": "opus47-e2e:reasoning-on: what is 2+2? Reply with only the number." + }, + "response": { + "toolCalls": [ + { + "name": "attempt_completion", + "arguments": "{\"result\": \"4\"}", + "id": "toolu_014MmgmKQV9c2DmffmF8bKm3" + } + ] + } + }, + { + "match": { + "userMessage": "opus47-e2e:reasoning-off: what is 2+2? Reply with only the number." }, "response": { "toolCalls": [ diff --git a/apps/vscode-e2e/src/suite/anthropic-opus-4-7.test.ts b/apps/vscode-e2e/src/suite/anthropic-opus-4-7.test.ts index 634d8ebe6b..8230b8f377 100644 --- a/apps/vscode-e2e/src/suite/anthropic-opus-4-7.test.ts +++ b/apps/vscode-e2e/src/suite/anthropic-opus-4-7.test.ts @@ -1,10 +1,136 @@ import * as assert from "assert" +import { createServer, type IncomingMessage, type ServerResponse } from "http" import { RooCodeEventName, type ClineMessage } from "@roo-code/types" import { waitUntilCompleted } from "./utils" import { setDefaultSuiteTimeout } from "./test-utils" +type CapturedAnthropicRequest = { + model?: string + thinkingType?: string + lastUserMessage: string +} + +function isMessagesUrl(rawUrl: string): boolean { + try { + return new URL(rawUrl).pathname.endsWith("/v1/messages") + } catch { + return false + } +} + +function readRequestBody(req: IncomingMessage): Promise { + return new Promise((resolve, reject) => { + const chunks: Buffer[] = [] + req.on("data", (chunk) => chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk))) + req.on("end", () => resolve(Buffer.concat(chunks).toString("utf8"))) + req.on("error", reject) + }) +} + +function writeResponseHeaders(target: ServerResponse, source: Response) { + const headers: Record = {} + source.headers.forEach((value, key) => { + if (key.toLowerCase() !== "content-length") { + headers[key] = value + } + }) + target.writeHead(source.status, headers) +} + +async function pipeFetchResponse(target: ServerResponse, source: Response) { + writeResponseHeaders(target, source) + + if (!source.body) { + target.end() + return + } + + const reader = source.body.getReader() + while (true) { + const { done, value } = await reader.read() + if (done) { + break + } + target.write(value) + } + + target.end() +} + +async function withAnthropicProxy( + baseUrl: string, + run: (args: { proxyUrl: string; requests: CapturedAnthropicRequest[] }) => Promise, +): Promise { + const requests: CapturedAnthropicRequest[] = [] + const server = createServer(async (req, res) => { + try { + const requestUrl = req.url ?? "/" + + if (!isMessagesUrl(`http://127.0.0.1${requestUrl}`)) { + res.writeHead(404) + res.end("Not found") + return + } + + const bodyText = await readRequestBody(req) + const body = JSON.parse(bodyText) as { + model?: string + thinking?: { type?: string } + messages?: Array<{ role?: string; content?: unknown }> + } + + const lastUser = [...(body.messages ?? [])].reverse().find((message) => message.role === "user") + const lastUserMessage = + typeof lastUser?.content === "string" ? lastUser.content : JSON.stringify(lastUser?.content ?? "") + + requests.push({ + model: body.model, + thinkingType: body.thinking?.type, + lastUserMessage, + }) + + const forwardHeaders: Record = {} + for (const [key, value] of Object.entries(req.headers)) { + if ( + key.toLowerCase() !== "host" && + key.toLowerCase() !== "content-length" && + typeof value === "string" + ) { + forwardHeaders[key] = value + } + } + + const upstream = await fetch(`${baseUrl}${requestUrl}`, { + method: req.method, + headers: forwardHeaders, + body: bodyText, + }) + + await pipeFetchResponse(res, upstream) + } catch (error) { + res.writeHead(500) + res.end(error instanceof Error ? error.message : String(error)) + } + }) + + await new Promise((resolve) => server.listen(0, "127.0.0.1", () => resolve())) + const address = server.address() + if (!address || typeof address === "string") { + server.close() + throw new Error("Failed to start Anthropic proxy server") + } + + const proxyUrl = `http://127.0.0.1:${address.port}` + + try { + return await run({ proxyUrl, requests }) + } finally { + await new Promise((resolve, reject) => server.close((error) => (error ? reject(error) : resolve()))) + } +} + suite("Claude Opus 4.7 (Anthropic)", function () { setDefaultSuiteTimeout(this) @@ -20,43 +146,63 @@ suite("Claude Opus 4.7 (Anthropic)", function () { }) }) - test("Should complete a task end-to-end using claude-opus-4-7 via Anthropic provider", async function () { - const api = globalThis.api - const aimockUrl = process.env.AIMOCK_URL - const isRecord = process.env.AIMOCK_RECORD === "true" + for (const reasoningEnabled of [true, false] as const) { + test(`Should complete a task end-to-end using claude-opus-4-7 via Anthropic provider with reasoning ${ + reasoningEnabled ? "enabled" : "disabled" + }`, async function () { + const api = globalThis.api + const aimockUrl = process.env.AIMOCK_URL + const isRecord = process.env.AIMOCK_RECORD === "true" - if (!aimockUrl && !process.env.ANTHROPIC_API_KEY) { - this.skip() - } + if (!aimockUrl && !process.env.ANTHROPIC_API_KEY) { + this.skip() + } - // aimock handles /v1/messages natively and serves Anthropic-format SSE responses. - // In record mode the real x-api-key is forwarded so aimock can proxy to api.anthropic.com. - await api.setConfiguration({ - apiProvider: "anthropic" as const, - apiKey: aimockUrl && !isRecord ? "mock-key" : process.env.ANTHROPIC_API_KEY!, - apiModelId: "claude-opus-4-7", - ...(aimockUrl && { anthropicBaseUrl: aimockUrl }), - }) + const captureBaseUrl = aimockUrl || "https://api.anthropic.com" + await withAnthropicProxy(captureBaseUrl, async ({ proxyUrl, requests }) => { + const promptTag = reasoningEnabled ? "opus47-e2e:reasoning-on" : "opus47-e2e:reasoning-off" - const messages: ClineMessage[] = [] + // aimock handles /v1/messages natively and serves Anthropic-format SSE responses. + // In record mode the real x-api-key is forwarded so aimock can proxy to api.anthropic.com. + await api.setConfiguration({ + apiProvider: "anthropic" as const, + apiKey: aimockUrl && !isRecord ? "mock-key" : process.env.ANTHROPIC_API_KEY!, + apiModelId: "claude-opus-4-7", + enableReasoningEffort: reasoningEnabled, + anthropicBaseUrl: proxyUrl, + }) - api.on(RooCodeEventName.Message, ({ message }) => { - if (message.type === "say" && message.partial === false) { - messages.push(message) - } - }) + const messages: ClineMessage[] = [] - const taskId = await api.startNewTask({ - configuration: { mode: "ask", alwaysAllowModeSwitch: true, autoApprovalEnabled: true }, - text: "opus47-e2e: what is 2+2? Reply with only the number.", - }) + api.on(RooCodeEventName.Message, ({ message }) => { + if (message.type === "say" && message.partial === false) { + messages.push(message) + } + }) - await waitUntilCompleted({ api, taskId }) + const taskId = await api.startNewTask({ + configuration: { mode: "ask", alwaysAllowModeSwitch: true, autoApprovalEnabled: true }, + text: `${promptTag}: what is 2+2? Reply with only the number.`, + }) - const completionMessage = messages.find( - ({ say, text }) => (say === "completion_result" || say === "text") && text?.trim() === "4", - ) + await waitUntilCompleted({ api, taskId }) - assert.ok(completionMessage, "Task should complete with the expected Claude Opus 4.7 response") - }) + const firstRequest = requests[0] + assert.ok(firstRequest, "Anthropic provider should issue at least one /v1/messages request") + assert.strictEqual(firstRequest.model, "claude-opus-4-7") + + if (reasoningEnabled) { + assert.strictEqual(firstRequest.thinkingType, "adaptive") + } else { + assert.strictEqual(firstRequest.thinkingType, undefined) + } + + const completionMessage = messages.find( + ({ say, text }) => (say === "completion_result" || say === "text") && text?.trim() === "4", + ) + + assert.ok(completionMessage, "Task should complete with the expected Claude Opus 4.7 response") + }) + }) + } }) diff --git a/packages/types/src/providers/anthropic.ts b/packages/types/src/providers/anthropic.ts index 88b3302084..2fc706d6cf 100644 --- a/packages/types/src/providers/anthropic.ts +++ b/packages/types/src/providers/anthropic.ts @@ -100,7 +100,9 @@ export const anthropicModels = { outputPrice: 25.0, // $25 per million output tokens cacheWritesPrice: 6.25, // $6.25 per million tokens cacheReadsPrice: 0.5, // $0.50 per million tokens - supportsReasoningBudget: true, + // Direct Anthropic Opus 4.7 no longer accepts budget-token thinking payloads. + // Treat it as a binary reasoning toggle on this provider path. + supportsReasoningBinary: true, supportsTemperature: false, }, "claude-opus-4-5-20251101": { diff --git a/src/api/providers/__tests__/anthropic.spec.ts b/src/api/providers/__tests__/anthropic.spec.ts index c75dc2db75..8cf1960048 100644 --- a/src/api/providers/__tests__/anthropic.spec.ts +++ b/src/api/providers/__tests__/anthropic.spec.ts @@ -234,6 +234,50 @@ describe("AnthropicHandler", () => { expect(requestOptions?.headers?.["anthropic-beta"]).toContain("prompt-caching-2024-07-31") expect(requestOptions?.headers?.["anthropic-beta"]).not.toContain("context-1m-2025-08-07") }) + + it("should use adaptive thinking for Claude Opus 4.7 when reasoning is enabled", async () => { + const opus47Handler = new AnthropicHandler({ + apiKey: "test-api-key", + apiModelId: "claude-opus-4-7", + enableReasoningEffort: true, + }) + + const stream = opus47Handler.createMessage(systemPrompt, [ + { + role: "user", + content: [{ type: "text" as const, text: "Hello" }], + }, + ]) + + for await (const _chunk of stream) { + // Consume stream + } + + const requestBody = mockCreate.mock.calls[mockCreate.mock.calls.length - 1]?.[0] + expect(requestBody?.thinking).toEqual({ type: "adaptive" }) + }) + + it("should omit thinking for Claude Opus 4.7 when reasoning is disabled", async () => { + const opus47Handler = new AnthropicHandler({ + apiKey: "test-api-key", + apiModelId: "claude-opus-4-7", + enableReasoningEffort: false, + }) + + const stream = opus47Handler.createMessage(systemPrompt, [ + { + role: "user", + content: [{ type: "text" as const, text: "Hello" }], + }, + ]) + + for await (const _chunk of stream) { + // Consume stream + } + + const requestBody = mockCreate.mock.calls[mockCreate.mock.calls.length - 1]?.[0] + expect(requestBody?.thinking).toBeUndefined() + }) }) describe("completePrompt", () => { @@ -354,8 +398,10 @@ describe("AnthropicHandler", () => { expect(model.id).toBe("claude-opus-4-7") expect(model.info.maxTokens).toBe(128000) expect(model.info.contextWindow).toBe(1000000) - expect(model.info.supportsReasoningBudget).toBe(true) + expect(model.info.supportsReasoningBinary).toBe(true) + expect(model.info.supportsReasoningBudget).toBeUndefined() expect(model.info.supportsPromptCache).toBe(true) + expect(model.reasoningBudget).toBeUndefined() }) it("should enable 1M context for Claude 4.5 Sonnet when beta flag is set", () => { diff --git a/src/api/providers/anthropic.ts b/src/api/providers/anthropic.ts index 0498816082..07031edeaa 100644 --- a/src/api/providers/anthropic.ts +++ b/src/api/providers/anthropic.ts @@ -18,6 +18,7 @@ import type { ApiHandlerOptions } from "../../shared/api" import { ApiStream } from "../transform/stream" import { getModelParams } from "../transform/model-params" import { filterNonAnthropicBlocks } from "../transform/anthropic-filter" +import { getAnthropicProviderReasoning, type AnthropicProviderReasoningParams } from "../transform/reasoning" import { handleProviderError } from "./utils/error-handler" import { BaseProvider } from "./base-provider" @@ -58,8 +59,16 @@ export class AnthropicHandler extends BaseProvider implements SingleCompletionHa betas = ["fine-grained-tool-streaming-2025-05-14"], maxTokens, temperature, - reasoning: thinking, + info, + reasoningBudget, } = this.getModel() + const thinking = getAnthropicProviderReasoning({ + modelId, + model: info, + reasoningBudget, + reasoningEffort: undefined, + settings: this.options, + }) // Filter out non-Anthropic blocks (reasoning, thoughtSignature, etc.) before sending to the API const sanitizedMessages = filterNonAnthropicBlocks(messages) @@ -114,33 +123,36 @@ export class AnthropicHandler extends BaseProvider implements SingleCompletionHa const secondLastMsgUserIndex = userMsgIndices[userMsgIndices.length - 2] ?? -1 try { - stream = await this.client.messages.create( - { - model: modelId, - max_tokens: maxTokens ?? ANTHROPIC_DEFAULT_MAX_TOKENS, - temperature, - thinking, - // Setting cache breakpoint for system prompt so new tasks can reuse it. - system: [{ text: systemPrompt, type: "text", cache_control: cacheControl }], - messages: sanitizedMessages.map((message, index) => { - if (index === lastUserMsgIndex || index === secondLastMsgUserIndex) { - return { - ...message, - content: - typeof message.content === "string" - ? [{ type: "text", text: message.content, cache_control: cacheControl }] - : message.content.map((content, contentIndex) => - contentIndex === message.content.length - 1 - ? { ...content, cache_control: cacheControl } - : content, - ), - } + const requestParams: Anthropic.Messages.MessageCreateParamsStreaming & { + thinking?: AnthropicProviderReasoningParams + } = { + model: modelId, + max_tokens: maxTokens ?? ANTHROPIC_DEFAULT_MAX_TOKENS, + temperature, + thinking, + // Setting cache breakpoint for system prompt so new tasks can reuse it. + system: [{ text: systemPrompt, type: "text", cache_control: cacheControl }], + messages: sanitizedMessages.map((message, index) => { + if (index === lastUserMsgIndex || index === secondLastMsgUserIndex) { + return { + ...message, + content: + typeof message.content === "string" + ? [{ type: "text", text: message.content, cache_control: cacheControl }] + : message.content.map((content, contentIndex) => + contentIndex === message.content.length - 1 + ? { ...content, cache_control: cacheControl } + : content, + ), } - return message - }), - stream: true, - ...nativeToolParams, - }, + } + return message + }), + stream: true, + ...nativeToolParams, + } + stream = await this.client.messages.create( + requestParams as Anthropic.Messages.MessageCreateParamsStreaming, (() => { // prompt caching: https://x.com/alexalbert__/status/1823751995901272068 // https://github.com/anthropics/anthropic-sdk-typescript?tab=readme-ov-file#default-headers @@ -184,15 +196,21 @@ export class AnthropicHandler extends BaseProvider implements SingleCompletionHa } default: { try { - stream = (await this.client.messages.create({ + const requestParams: Anthropic.Messages.MessageCreateParamsStreaming & { + thinking?: AnthropicProviderReasoningParams + } = { model: modelId, max_tokens: maxTokens ?? ANTHROPIC_DEFAULT_MAX_TOKENS, temperature, + thinking, system: [{ text: systemPrompt, type: "text" }], messages: sanitizedMessages, stream: true, ...nativeToolParams, - })) as any + } + stream = (await this.client.messages.create( + requestParams as Anthropic.Messages.MessageCreateParamsStreaming, + )) as any } catch (error) { TelemetryService.instance.captureException( new ApiProviderError( diff --git a/src/api/transform/__tests__/reasoning.spec.ts b/src/api/transform/__tests__/reasoning.spec.ts index 0b402c6d55..f577b8e602 100644 --- a/src/api/transform/__tests__/reasoning.spec.ts +++ b/src/api/transform/__tests__/reasoning.spec.ts @@ -5,12 +5,14 @@ import type { ModelInfo, ProviderSettings, ReasoningEffortWithMinimal } from "@r import { getOpenRouterReasoning, getAnthropicReasoning, + getAnthropicProviderReasoning, getOpenAiReasoning, getRooReasoning, getGeminiReasoning, GetModelReasoningOptions, OpenRouterReasoningParams, AnthropicReasoningParams, + AnthropicProviderReasoningParams, OpenAiReasoningParams, RooReasoningParams, GeminiReasoningParams, @@ -460,6 +462,59 @@ describe("reasoning.ts", () => { }) }) + describe("getAnthropicProviderReasoning", () => { + it("should use adaptive thinking for Claude Opus 4.7 when reasoning is enabled", () => { + const modelWithBinaryReasoning: ModelInfo = { + ...baseModel, + supportsReasoningBinary: true, + } + + const result = getAnthropicProviderReasoning({ + ...baseOptions, + modelId: "claude-opus-4-7", + model: modelWithBinaryReasoning, + settings: { enableReasoningEffort: true }, + reasoningBudget: undefined, + }) + + expect(result).toEqual({ type: "adaptive" }) + }) + + it("should omit thinking for Claude Opus 4.7 when reasoning is disabled", () => { + const modelWithBinaryReasoning: ModelInfo = { + ...baseModel, + supportsReasoningBinary: true, + } + + const result = getAnthropicProviderReasoning({ + ...baseOptions, + modelId: "claude-opus-4-7", + model: modelWithBinaryReasoning, + settings: { enableReasoningEffort: false }, + reasoningBudget: undefined, + }) + + expect(result).toBeUndefined() + }) + + it("should preserve budget thinking for older Anthropic reasoning-budget models", () => { + const modelWithBudgetReasoning: ModelInfo = { + ...baseModel, + supportsReasoningBudget: true, + } + + const result = getAnthropicProviderReasoning({ + ...baseOptions, + modelId: "claude-sonnet-4-6", + model: modelWithBudgetReasoning, + settings: { enableReasoningEffort: true }, + reasoningBudget: 1000, + }) + + expect(result).toEqual({ type: "enabled", budget_tokens: 1000 }) + }) + }) + describe("getOpenAiReasoning", () => { it("should return reasoning effort params when model supports reasoning effort and has effort in settings", () => { const modelWithSupported: ModelInfo = { @@ -1124,6 +1179,23 @@ describe("reasoning.ts", () => { } }) + it("should return correct types for Anthropic provider reasoning params", () => { + const modelWithBinaryReasoning: ModelInfo = { + ...baseModel, + supportsReasoningBinary: true, + } + + const result: AnthropicProviderReasoningParams | undefined = getAnthropicProviderReasoning({ + ...baseOptions, + modelId: "claude-opus-4-7", + model: modelWithBinaryReasoning, + settings: { enableReasoningEffort: true }, + reasoningBudget: undefined, + }) + + expect(result).toEqual({ type: "adaptive" }) + }) + it("should return correct types for OpenAI reasoning params", () => { const modelWithEffort: ModelInfo = { ...baseModel, diff --git a/src/api/transform/reasoning.ts b/src/api/transform/reasoning.ts index 446221d256..873a4cdc00 100644 --- a/src/api/transform/reasoning.ts +++ b/src/api/transform/reasoning.ts @@ -18,6 +18,7 @@ export type RooReasoningParams = { } export type AnthropicReasoningParams = BetaThinkingConfigParam +export type AnthropicProviderReasoningParams = AnthropicReasoningParams | { type: "adaptive" } export type OpenAiReasoningParams = { reasoning_effort: OpenAI.Chat.ChatCompletionCreateParams["reasoning_effort"] } @@ -111,6 +112,19 @@ export const getAnthropicReasoning = ({ }: GetModelReasoningOptions): AnthropicReasoningParams | undefined => shouldUseReasoningBudget({ model, settings }) ? { type: "enabled", budget_tokens: reasoningBudget! } : undefined +export const getAnthropicProviderReasoning = ({ + modelId, + model, + reasoningBudget, + settings, +}: GetModelReasoningOptions & { modelId: string }): AnthropicProviderReasoningParams | undefined => { + if (modelId === "claude-opus-4-7" && settings.enableReasoningEffort) { + return { type: "adaptive" } + } + + return getAnthropicReasoning({ model, reasoningBudget, reasoningEffort: undefined, settings }) +} + export const getOpenAiReasoning = ({ model, reasoningEffort, From f223bd3a70ee774517e68ed5969360f451c3a127 Mon Sep 17 00:00:00 2001 From: Roomote Date: Thu, 14 May 2026 23:01:57 +0000 Subject: [PATCH 2/5] fix: relax anthropic request typing for adaptive thinking --- src/api/providers/anthropic.ts | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/src/api/providers/anthropic.ts b/src/api/providers/anthropic.ts index 07031edeaa..657405c727 100644 --- a/src/api/providers/anthropic.ts +++ b/src/api/providers/anthropic.ts @@ -18,7 +18,7 @@ import type { ApiHandlerOptions } from "../../shared/api" import { ApiStream } from "../transform/stream" import { getModelParams } from "../transform/model-params" import { filterNonAnthropicBlocks } from "../transform/anthropic-filter" -import { getAnthropicProviderReasoning, type AnthropicProviderReasoningParams } from "../transform/reasoning" +import { getAnthropicProviderReasoning } from "../transform/reasoning" import { handleProviderError } from "./utils/error-handler" import { BaseProvider } from "./base-provider" @@ -123,9 +123,7 @@ export class AnthropicHandler extends BaseProvider implements SingleCompletionHa const secondLastMsgUserIndex = userMsgIndices[userMsgIndices.length - 2] ?? -1 try { - const requestParams: Anthropic.Messages.MessageCreateParamsStreaming & { - thinking?: AnthropicProviderReasoningParams - } = { + const requestParams = { model: modelId, max_tokens: maxTokens ?? ANTHROPIC_DEFAULT_MAX_TOKENS, temperature, @@ -196,9 +194,7 @@ export class AnthropicHandler extends BaseProvider implements SingleCompletionHa } default: { try { - const requestParams: Anthropic.Messages.MessageCreateParamsStreaming & { - thinking?: AnthropicProviderReasoningParams - } = { + const requestParams = { model: modelId, max_tokens: maxTokens ?? ANTHROPIC_DEFAULT_MAX_TOKENS, temperature, From f8b8e10634185e87f5cc62033d2669ff1f65830f Mon Sep 17 00:00:00 2001 From: Roomote Date: Thu, 14 May 2026 23:16:09 +0000 Subject: [PATCH 3/5] fix: preserve opus 4.7 anthropic token handling --- .../src/suite/anthropic-opus-4-7.test.ts | 23 +++++++++++++-- packages/types/src/providers/anthropic.ts | 7 +++-- src/api/providers/__tests__/anthropic.spec.ts | 29 ++++++++++++++++++- src/shared/__tests__/api.spec.ts | 26 +++++++++++++++++ 4 files changed, 79 insertions(+), 6 deletions(-) diff --git a/apps/vscode-e2e/src/suite/anthropic-opus-4-7.test.ts b/apps/vscode-e2e/src/suite/anthropic-opus-4-7.test.ts index 8230b8f377..62a409c374 100644 --- a/apps/vscode-e2e/src/suite/anthropic-opus-4-7.test.ts +++ b/apps/vscode-e2e/src/suite/anthropic-opus-4-7.test.ts @@ -12,6 +12,8 @@ type CapturedAnthropicRequest = { lastUserMessage: string } +const ALLOWED_PROXY_HOSTS = new Set(["127.0.0.1", "localhost", "api.anthropic.com"]) + function isMessagesUrl(rawUrl: string): boolean { try { return new URL(rawUrl).pathname.endsWith("/v1/messages") @@ -59,6 +61,20 @@ async function pipeFetchResponse(target: ServerResponse, source: Response) { target.end() } +function resolveAllowedUpstreamUrl(baseUrl: string, requestUrl: string): URL { + const upstreamBase = new URL(baseUrl) + const isLocalProxy = upstreamBase.hostname === "127.0.0.1" || upstreamBase.hostname === "localhost" + + if ( + !ALLOWED_PROXY_HOSTS.has(upstreamBase.hostname) || + (isLocalProxy ? upstreamBase.protocol !== "http:" : baseUrl !== "https://api.anthropic.com") + ) { + throw new Error(`Unexpected Anthropic proxy target: ${upstreamBase.origin}`) + } + + return new URL(requestUrl, upstreamBase) +} + async function withAnthropicProxy( baseUrl: string, run: (args: { proxyUrl: string; requests: CapturedAnthropicRequest[] }) => Promise, @@ -102,16 +118,17 @@ async function withAnthropicProxy( } } - const upstream = await fetch(`${baseUrl}${requestUrl}`, { + const upstreamUrl = resolveAllowedUpstreamUrl(baseUrl, requestUrl) + const upstream = await fetch(upstreamUrl, { method: req.method, headers: forwardHeaders, body: bodyText, }) await pipeFetchResponse(res, upstream) - } catch (error) { + } catch { res.writeHead(500) - res.end(error instanceof Error ? error.message : String(error)) + res.end("Anthropic proxy request failed") } }) diff --git a/packages/types/src/providers/anthropic.ts b/packages/types/src/providers/anthropic.ts index 2fc706d6cf..f3e99c691d 100644 --- a/packages/types/src/providers/anthropic.ts +++ b/packages/types/src/providers/anthropic.ts @@ -100,8 +100,11 @@ export const anthropicModels = { outputPrice: 25.0, // $25 per million output tokens cacheWritesPrice: 6.25, // $6.25 per million tokens cacheReadsPrice: 0.5, // $0.50 per million tokens - // Direct Anthropic Opus 4.7 no longer accepts budget-token thinking payloads. - // Treat it as a binary reasoning toggle on this provider path. + // Keep the hybrid-reasoning capability so Anthropic token-cap handling and + // stored max-token overrides behave the same as before. + supportsReasoningBudget: true, + // Direct Anthropic Opus 4.7 no longer accepts budget-token thinking payloads, + // so the UI should still present a simple on/off toggle on this provider path. supportsReasoningBinary: true, supportsTemperature: false, }, diff --git a/src/api/providers/__tests__/anthropic.spec.ts b/src/api/providers/__tests__/anthropic.spec.ts index 8cf1960048..1936423388 100644 --- a/src/api/providers/__tests__/anthropic.spec.ts +++ b/src/api/providers/__tests__/anthropic.spec.ts @@ -255,6 +255,7 @@ describe("AnthropicHandler", () => { const requestBody = mockCreate.mock.calls[mockCreate.mock.calls.length - 1]?.[0] expect(requestBody?.thinking).toEqual({ type: "adaptive" }) + expect(requestBody?.max_tokens).toBe(16384) }) it("should omit thinking for Claude Opus 4.7 when reasoning is disabled", async () => { @@ -277,6 +278,31 @@ describe("AnthropicHandler", () => { const requestBody = mockCreate.mock.calls[mockCreate.mock.calls.length - 1]?.[0] expect(requestBody?.thinking).toBeUndefined() + expect(requestBody?.max_tokens).toBe(8192) + }) + + it("should preserve custom maxTokens for Claude Opus 4.7 when reasoning is enabled", async () => { + const opus47Handler = new AnthropicHandler({ + apiKey: "test-api-key", + apiModelId: "claude-opus-4-7", + enableReasoningEffort: true, + modelMaxTokens: 32768, + }) + + const stream = opus47Handler.createMessage(systemPrompt, [ + { + role: "user", + content: [{ type: "text" as const, text: "Hello" }], + }, + ]) + + for await (const _chunk of stream) { + // Consume stream + } + + const requestBody = mockCreate.mock.calls[mockCreate.mock.calls.length - 1]?.[0] + expect(requestBody?.thinking).toEqual({ type: "adaptive" }) + expect(requestBody?.max_tokens).toBe(32768) }) }) @@ -398,8 +424,9 @@ describe("AnthropicHandler", () => { expect(model.id).toBe("claude-opus-4-7") expect(model.info.maxTokens).toBe(128000) expect(model.info.contextWindow).toBe(1000000) + expect(model.maxTokens).toBe(8192) expect(model.info.supportsReasoningBinary).toBe(true) - expect(model.info.supportsReasoningBudget).toBeUndefined() + expect(model.info.supportsReasoningBudget).toBe(true) expect(model.info.supportsPromptCache).toBe(true) expect(model.reasoningBudget).toBeUndefined() }) diff --git a/src/shared/__tests__/api.spec.ts b/src/shared/__tests__/api.spec.ts index b0c2db124b..c147446610 100644 --- a/src/shared/__tests__/api.spec.ts +++ b/src/shared/__tests__/api.spec.ts @@ -80,6 +80,32 @@ describe("getModelMaxOutputTokens", () => { expect(result).toBe(ANTHROPIC_DEFAULT_MAX_TOKENS) // Should be 8192, not 64_000 }) + test("should preserve Anthropic hybrid token handling when a model also supports binary reasoning", () => { + const model: ModelInfo = { + contextWindow: 1_000_000, + supportsPromptCache: true, + supportsReasoningBudget: true, + supportsReasoningBinary: true, + maxTokens: 128_000, + } + + expect( + getModelMaxOutputTokens({ + modelId: "claude-opus-4-7", + model, + settings: { apiProvider: "anthropic", enableReasoningEffort: false }, + }), + ).toBe(ANTHROPIC_DEFAULT_MAX_TOKENS) + + expect( + getModelMaxOutputTokens({ + modelId: "claude-opus-4-7", + model, + settings: { apiProvider: "anthropic", enableReasoningEffort: true, modelMaxTokens: 32_768 }, + }), + ).toBe(32_768) + }) + test("should return model.maxTokens for non-Anthropic models that support reasoning budget but aren't using it", () => { const geminiModelId = "gemini-2.5-flash-preview-04-17" const model: ModelInfo = { From 5831941a0663c3fcfb0b32ae8bba477418baabb3 Mon Sep 17 00:00:00 2001 From: Roomote Date: Thu, 14 May 2026 23:40:18 +0000 Subject: [PATCH 4/5] fix: constrain anthropic e2e proxy path --- apps/vscode-e2e/src/suite/anthropic-opus-4-7.test.ts | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/apps/vscode-e2e/src/suite/anthropic-opus-4-7.test.ts b/apps/vscode-e2e/src/suite/anthropic-opus-4-7.test.ts index 62a409c374..653cbb844d 100644 --- a/apps/vscode-e2e/src/suite/anthropic-opus-4-7.test.ts +++ b/apps/vscode-e2e/src/suite/anthropic-opus-4-7.test.ts @@ -13,10 +13,11 @@ type CapturedAnthropicRequest = { } const ALLOWED_PROXY_HOSTS = new Set(["127.0.0.1", "localhost", "api.anthropic.com"]) +const ANTHROPIC_MESSAGES_PATH = "/v1/messages" function isMessagesUrl(rawUrl: string): boolean { try { - return new URL(rawUrl).pathname.endsWith("/v1/messages") + return new URL(rawUrl).pathname.endsWith(ANTHROPIC_MESSAGES_PATH) } catch { return false } @@ -61,7 +62,7 @@ async function pipeFetchResponse(target: ServerResponse, source: Response) { target.end() } -function resolveAllowedUpstreamUrl(baseUrl: string, requestUrl: string): URL { +function resolveAllowedUpstreamUrl(baseUrl: string): URL { const upstreamBase = new URL(baseUrl) const isLocalProxy = upstreamBase.hostname === "127.0.0.1" || upstreamBase.hostname === "localhost" @@ -72,7 +73,7 @@ function resolveAllowedUpstreamUrl(baseUrl: string, requestUrl: string): URL { throw new Error(`Unexpected Anthropic proxy target: ${upstreamBase.origin}`) } - return new URL(requestUrl, upstreamBase) + return new URL(ANTHROPIC_MESSAGES_PATH, upstreamBase) } async function withAnthropicProxy( @@ -118,7 +119,7 @@ async function withAnthropicProxy( } } - const upstreamUrl = resolveAllowedUpstreamUrl(baseUrl, requestUrl) + const upstreamUrl = resolveAllowedUpstreamUrl(baseUrl) const upstream = await fetch(upstreamUrl, { method: req.method, headers: forwardHeaders, From f7bf2529370b4a3902cce9b147339cdf932de139 Mon Sep 17 00:00:00 2001 From: Roomote Date: Thu, 14 May 2026 23:59:51 +0000 Subject: [PATCH 5/5] Address follow-up Anthropic review feedback --- apps/vscode-e2e/src/suite/anthropic-opus-4-7.test.ts | 11 +++++++++-- src/api/providers/anthropic.ts | 1 - src/api/transform/__tests__/reasoning.spec.ts | 4 ---- src/api/transform/reasoning.ts | 5 ++--- 4 files changed, 11 insertions(+), 10 deletions(-) diff --git a/apps/vscode-e2e/src/suite/anthropic-opus-4-7.test.ts b/apps/vscode-e2e/src/suite/anthropic-opus-4-7.test.ts index 653cbb844d..64c08c2ec0 100644 --- a/apps/vscode-e2e/src/suite/anthropic-opus-4-7.test.ts +++ b/apps/vscode-e2e/src/suite/anthropic-opus-4-7.test.ts @@ -81,6 +81,7 @@ async function withAnthropicProxy( run: (args: { proxyUrl: string; requests: CapturedAnthropicRequest[] }) => Promise, ): Promise { const requests: CapturedAnthropicRequest[] = [] + let proxyError: Error | undefined const server = createServer(async (req, res) => { try { const requestUrl = req.url ?? "/" @@ -127,7 +128,9 @@ async function withAnthropicProxy( }) await pipeFetchResponse(res, upstream) - } catch { + } catch (error) { + proxyError = error instanceof Error ? error : new Error(String(error)) + console.error("Anthropic proxy request failed:", proxyError) res.writeHead(500) res.end("Anthropic proxy request failed") } @@ -143,7 +146,11 @@ async function withAnthropicProxy( const proxyUrl = `http://127.0.0.1:${address.port}` try { - return await run({ proxyUrl, requests }) + const result = await run({ proxyUrl, requests }) + if (proxyError) { + throw proxyError + } + return result } finally { await new Promise((resolve, reject) => server.close((error) => (error ? reject(error) : resolve()))) } diff --git a/src/api/providers/anthropic.ts b/src/api/providers/anthropic.ts index 657405c727..32101588ae 100644 --- a/src/api/providers/anthropic.ts +++ b/src/api/providers/anthropic.ts @@ -63,7 +63,6 @@ export class AnthropicHandler extends BaseProvider implements SingleCompletionHa reasoningBudget, } = this.getModel() const thinking = getAnthropicProviderReasoning({ - modelId, model: info, reasoningBudget, reasoningEffort: undefined, diff --git a/src/api/transform/__tests__/reasoning.spec.ts b/src/api/transform/__tests__/reasoning.spec.ts index f577b8e602..15e531b0b7 100644 --- a/src/api/transform/__tests__/reasoning.spec.ts +++ b/src/api/transform/__tests__/reasoning.spec.ts @@ -471,7 +471,6 @@ describe("reasoning.ts", () => { const result = getAnthropicProviderReasoning({ ...baseOptions, - modelId: "claude-opus-4-7", model: modelWithBinaryReasoning, settings: { enableReasoningEffort: true }, reasoningBudget: undefined, @@ -488,7 +487,6 @@ describe("reasoning.ts", () => { const result = getAnthropicProviderReasoning({ ...baseOptions, - modelId: "claude-opus-4-7", model: modelWithBinaryReasoning, settings: { enableReasoningEffort: false }, reasoningBudget: undefined, @@ -505,7 +503,6 @@ describe("reasoning.ts", () => { const result = getAnthropicProviderReasoning({ ...baseOptions, - modelId: "claude-sonnet-4-6", model: modelWithBudgetReasoning, settings: { enableReasoningEffort: true }, reasoningBudget: 1000, @@ -1187,7 +1184,6 @@ describe("reasoning.ts", () => { const result: AnthropicProviderReasoningParams | undefined = getAnthropicProviderReasoning({ ...baseOptions, - modelId: "claude-opus-4-7", model: modelWithBinaryReasoning, settings: { enableReasoningEffort: true }, reasoningBudget: undefined, diff --git a/src/api/transform/reasoning.ts b/src/api/transform/reasoning.ts index 873a4cdc00..ec166d7985 100644 --- a/src/api/transform/reasoning.ts +++ b/src/api/transform/reasoning.ts @@ -113,12 +113,11 @@ export const getAnthropicReasoning = ({ shouldUseReasoningBudget({ model, settings }) ? { type: "enabled", budget_tokens: reasoningBudget! } : undefined export const getAnthropicProviderReasoning = ({ - modelId, model, reasoningBudget, settings, -}: GetModelReasoningOptions & { modelId: string }): AnthropicProviderReasoningParams | undefined => { - if (modelId === "claude-opus-4-7" && settings.enableReasoningEffort) { +}: GetModelReasoningOptions): AnthropicProviderReasoningParams | undefined => { + if (model.supportsReasoningBinary && settings.enableReasoningEffort) { return { type: "adaptive" } }