diff --git a/apps/vscode-e2e/fixtures/claude-opus-4-7.json b/apps/vscode-e2e/fixtures/claude-opus-4-7.json index 8b1bb09cb5..f269418706 100644 --- a/apps/vscode-e2e/fixtures/claude-opus-4-7.json +++ b/apps/vscode-e2e/fixtures/claude-opus-4-7.json @@ -2,7 +2,21 @@ "fixtures": [ { "match": { - "userMessage": "opus47-e2e: what is 2+2? Reply with only the number." + "userMessage": "opus47-e2e:reasoning-on: what is 2+2? Reply with only the number." + }, + "response": { + "toolCalls": [ + { + "name": "attempt_completion", + "arguments": "{\"result\": \"4\"}", + "id": "toolu_014MmgmKQV9c2DmffmF8bKm3" + } + ] + } + }, + { + "match": { + "userMessage": "opus47-e2e:reasoning-off: what is 2+2? Reply with only the number." }, "response": { "toolCalls": [ diff --git a/apps/vscode-e2e/src/suite/anthropic-opus-4-7.test.ts b/apps/vscode-e2e/src/suite/anthropic-opus-4-7.test.ts index 634d8ebe6b..64c08c2ec0 100644 --- a/apps/vscode-e2e/src/suite/anthropic-opus-4-7.test.ts +++ b/apps/vscode-e2e/src/suite/anthropic-opus-4-7.test.ts @@ -1,10 +1,161 @@ import * as assert from "assert" +import { createServer, type IncomingMessage, type ServerResponse } from "http" import { RooCodeEventName, type ClineMessage } from "@roo-code/types" import { waitUntilCompleted } from "./utils" import { setDefaultSuiteTimeout } from "./test-utils" +type CapturedAnthropicRequest = { + model?: string + thinkingType?: string + lastUserMessage: string +} + +const ALLOWED_PROXY_HOSTS = new Set(["127.0.0.1", "localhost", "api.anthropic.com"]) +const ANTHROPIC_MESSAGES_PATH = "/v1/messages" + +function isMessagesUrl(rawUrl: string): boolean { + try { + return new URL(rawUrl).pathname.endsWith(ANTHROPIC_MESSAGES_PATH) + } catch { + return false + } +} + +function readRequestBody(req: IncomingMessage): Promise { + return new Promise((resolve, reject) => { + const chunks: Buffer[] = [] + req.on("data", (chunk) => chunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk))) + req.on("end", () => resolve(Buffer.concat(chunks).toString("utf8"))) + req.on("error", reject) + }) +} + +function writeResponseHeaders(target: ServerResponse, source: Response) { + const headers: Record = {} + source.headers.forEach((value, key) => { + if (key.toLowerCase() !== "content-length") { + headers[key] = value + } + }) + target.writeHead(source.status, headers) +} + +async function pipeFetchResponse(target: ServerResponse, source: Response) { + writeResponseHeaders(target, source) + + if (!source.body) { + target.end() + return + } + + const reader = source.body.getReader() + while (true) { + const { done, value } = await reader.read() + if (done) { + break + } + target.write(value) + } + + target.end() +} + +function resolveAllowedUpstreamUrl(baseUrl: string): URL { + const upstreamBase = new URL(baseUrl) + const isLocalProxy = upstreamBase.hostname === "127.0.0.1" || upstreamBase.hostname === "localhost" + + if ( + !ALLOWED_PROXY_HOSTS.has(upstreamBase.hostname) || + (isLocalProxy ? upstreamBase.protocol !== "http:" : baseUrl !== "https://api.anthropic.com") + ) { + throw new Error(`Unexpected Anthropic proxy target: ${upstreamBase.origin}`) + } + + return new URL(ANTHROPIC_MESSAGES_PATH, upstreamBase) +} + +async function withAnthropicProxy( + baseUrl: string, + run: (args: { proxyUrl: string; requests: CapturedAnthropicRequest[] }) => Promise, +): Promise { + const requests: CapturedAnthropicRequest[] = [] + let proxyError: Error | undefined + const server = createServer(async (req, res) => { + try { + const requestUrl = req.url ?? "/" + + if (!isMessagesUrl(`http://127.0.0.1${requestUrl}`)) { + res.writeHead(404) + res.end("Not found") + return + } + + const bodyText = await readRequestBody(req) + const body = JSON.parse(bodyText) as { + model?: string + thinking?: { type?: string } + messages?: Array<{ role?: string; content?: unknown }> + } + + const lastUser = [...(body.messages ?? [])].reverse().find((message) => message.role === "user") + const lastUserMessage = + typeof lastUser?.content === "string" ? lastUser.content : JSON.stringify(lastUser?.content ?? "") + + requests.push({ + model: body.model, + thinkingType: body.thinking?.type, + lastUserMessage, + }) + + const forwardHeaders: Record = {} + for (const [key, value] of Object.entries(req.headers)) { + if ( + key.toLowerCase() !== "host" && + key.toLowerCase() !== "content-length" && + typeof value === "string" + ) { + forwardHeaders[key] = value + } + } + + const upstreamUrl = resolveAllowedUpstreamUrl(baseUrl) + const upstream = await fetch(upstreamUrl, { + method: req.method, + headers: forwardHeaders, + body: bodyText, + }) + + await pipeFetchResponse(res, upstream) + } catch (error) { + proxyError = error instanceof Error ? error : new Error(String(error)) + console.error("Anthropic proxy request failed:", proxyError) + res.writeHead(500) + res.end("Anthropic proxy request failed") + } + }) + + await new Promise((resolve) => server.listen(0, "127.0.0.1", () => resolve())) + const address = server.address() + if (!address || typeof address === "string") { + server.close() + throw new Error("Failed to start Anthropic proxy server") + } + + const proxyUrl = `http://127.0.0.1:${address.port}` + + try { + const result = await run({ proxyUrl, requests }) + if (proxyError) { + throw proxyError + } + return result + } finally { + await new Promise((resolve, reject) => server.close((error) => (error ? reject(error) : resolve()))) + } +} + suite("Claude Opus 4.7 (Anthropic)", function () { setDefaultSuiteTimeout(this) @@ -20,43 +171,63 @@ suite("Claude Opus 4.7 (Anthropic)", function () { }) }) - test("Should complete a task end-to-end using claude-opus-4-7 via Anthropic provider", async function () { - const api = globalThis.api - const aimockUrl = process.env.AIMOCK_URL - const isRecord = process.env.AIMOCK_RECORD === "true" + for (const reasoningEnabled of [true, false] as const) { + test(`Should complete a task end-to-end using claude-opus-4-7 via Anthropic provider with reasoning ${ + reasoningEnabled ? "enabled" : "disabled" + }`, async function () { + const api = globalThis.api + const aimockUrl = process.env.AIMOCK_URL + const isRecord = process.env.AIMOCK_RECORD === "true" - if (!aimockUrl && !process.env.ANTHROPIC_API_KEY) { - this.skip() - } + if (!aimockUrl && !process.env.ANTHROPIC_API_KEY) { + this.skip() + } - // aimock handles /v1/messages natively and serves Anthropic-format SSE responses. - // In record mode the real x-api-key is forwarded so aimock can proxy to api.anthropic.com. - await api.setConfiguration({ - apiProvider: "anthropic" as const, - apiKey: aimockUrl && !isRecord ? "mock-key" : process.env.ANTHROPIC_API_KEY!, - apiModelId: "claude-opus-4-7", - ...(aimockUrl && { anthropicBaseUrl: aimockUrl }), - }) + const captureBaseUrl = aimockUrl || "https://api.anthropic.com" + await withAnthropicProxy(captureBaseUrl, async ({ proxyUrl, requests }) => { + const promptTag = reasoningEnabled ? "opus47-e2e:reasoning-on" : "opus47-e2e:reasoning-off" - const messages: ClineMessage[] = [] + // aimock handles /v1/messages natively and serves Anthropic-format SSE responses. + // In record mode the real x-api-key is forwarded so aimock can proxy to api.anthropic.com. + await api.setConfiguration({ + apiProvider: "anthropic" as const, + apiKey: aimockUrl && !isRecord ? "mock-key" : process.env.ANTHROPIC_API_KEY!, + apiModelId: "claude-opus-4-7", + enableReasoningEffort: reasoningEnabled, + anthropicBaseUrl: proxyUrl, + }) - api.on(RooCodeEventName.Message, ({ message }) => { - if (message.type === "say" && message.partial === false) { - messages.push(message) - } - }) + const messages: ClineMessage[] = [] - const taskId = await api.startNewTask({ - configuration: { mode: "ask", alwaysAllowModeSwitch: true, autoApprovalEnabled: true }, - text: "opus47-e2e: what is 2+2? Reply with only the number.", - }) + api.on(RooCodeEventName.Message, ({ message }) => { + if (message.type === "say" && message.partial === false) { + messages.push(message) + } + }) - await waitUntilCompleted({ api, taskId }) + const taskId = await api.startNewTask({ + configuration: { mode: "ask", alwaysAllowModeSwitch: true, autoApprovalEnabled: true }, + text: `${promptTag}: what is 2+2? Reply with only the number.`, + }) - const completionMessage = messages.find( - ({ say, text }) => (say === "completion_result" || say === "text") && text?.trim() === "4", - ) + await waitUntilCompleted({ api, taskId }) - assert.ok(completionMessage, "Task should complete with the expected Claude Opus 4.7 response") - }) + const firstRequest = requests[0] + assert.ok(firstRequest, "Anthropic provider should issue at least one /v1/messages request") + assert.strictEqual(firstRequest.model, "claude-opus-4-7") + + if (reasoningEnabled) { + assert.strictEqual(firstRequest.thinkingType, "adaptive") + } else { + assert.strictEqual(firstRequest.thinkingType, undefined) + } + + const completionMessage = messages.find( + ({ say, text }) => (say === "completion_result" || say === "text") && text?.trim() === "4", + ) + + assert.ok(completionMessage, "Task should complete with the expected Claude Opus 4.7 response") + }) + }) + } }) diff --git a/packages/types/src/providers/anthropic.ts b/packages/types/src/providers/anthropic.ts index 88b3302084..f3e99c691d 100644 --- a/packages/types/src/providers/anthropic.ts +++ b/packages/types/src/providers/anthropic.ts @@ -100,7 +100,12 @@ export const anthropicModels = { outputPrice: 25.0, // $25 per million output tokens cacheWritesPrice: 6.25, // $6.25 per million tokens cacheReadsPrice: 0.5, // $0.50 per million tokens + // Keep the hybrid-reasoning capability so Anthropic token-cap handling and + // stored max-token overrides behave the same as before. supportsReasoningBudget: true, + // Direct Anthropic Opus 4.7 no longer accepts budget-token thinking payloads, + // so the UI should still present a simple on/off toggle on this provider path. + supportsReasoningBinary: true, supportsTemperature: false, }, "claude-opus-4-5-20251101": { diff --git a/src/api/providers/__tests__/anthropic.spec.ts b/src/api/providers/__tests__/anthropic.spec.ts index c75dc2db75..1936423388 100644 --- a/src/api/providers/__tests__/anthropic.spec.ts +++ b/src/api/providers/__tests__/anthropic.spec.ts @@ -234,6 +234,76 @@ describe("AnthropicHandler", () => { expect(requestOptions?.headers?.["anthropic-beta"]).toContain("prompt-caching-2024-07-31") expect(requestOptions?.headers?.["anthropic-beta"]).not.toContain("context-1m-2025-08-07") }) + + it("should use adaptive thinking for Claude Opus 4.7 when reasoning is enabled", async () => { + const opus47Handler = new AnthropicHandler({ + apiKey: "test-api-key", + apiModelId: "claude-opus-4-7", + enableReasoningEffort: true, + }) + + const stream = opus47Handler.createMessage(systemPrompt, [ + { + role: "user", + content: [{ type: "text" as const, text: "Hello" }], + }, + ]) + + for await (const _chunk of stream) { + // Consume stream + } + + const requestBody = mockCreate.mock.calls[mockCreate.mock.calls.length - 1]?.[0] + expect(requestBody?.thinking).toEqual({ type: "adaptive" }) + expect(requestBody?.max_tokens).toBe(16384) + }) + + it("should omit thinking for Claude Opus 4.7 when reasoning is disabled", async () => { + const opus47Handler = new AnthropicHandler({ + apiKey: "test-api-key", + apiModelId: "claude-opus-4-7", + enableReasoningEffort: false, + }) + + const stream = opus47Handler.createMessage(systemPrompt, [ + { + role: "user", + content: [{ type: "text" as const, text: "Hello" }], + }, + ]) + + for await (const _chunk of stream) { + // Consume stream + } + + const requestBody = mockCreate.mock.calls[mockCreate.mock.calls.length - 1]?.[0] + expect(requestBody?.thinking).toBeUndefined() + expect(requestBody?.max_tokens).toBe(8192) + }) + + it("should preserve custom maxTokens for Claude Opus 4.7 when reasoning is enabled", async () => { + const opus47Handler = new AnthropicHandler({ + apiKey: "test-api-key", + apiModelId: "claude-opus-4-7", + enableReasoningEffort: true, + modelMaxTokens: 32768, + }) + + const stream = opus47Handler.createMessage(systemPrompt, [ + { + role: "user", + content: [{ type: "text" as const, text: "Hello" }], + }, + ]) + + for await (const _chunk of stream) { + // Consume stream + } + + const requestBody = mockCreate.mock.calls[mockCreate.mock.calls.length - 1]?.[0] + expect(requestBody?.thinking).toEqual({ type: "adaptive" }) + expect(requestBody?.max_tokens).toBe(32768) + }) }) describe("completePrompt", () => { @@ -354,8 +424,11 @@ describe("AnthropicHandler", () => { expect(model.id).toBe("claude-opus-4-7") expect(model.info.maxTokens).toBe(128000) expect(model.info.contextWindow).toBe(1000000) + expect(model.maxTokens).toBe(8192) + expect(model.info.supportsReasoningBinary).toBe(true) expect(model.info.supportsReasoningBudget).toBe(true) expect(model.info.supportsPromptCache).toBe(true) + expect(model.reasoningBudget).toBeUndefined() }) it("should enable 1M context for Claude 4.5 Sonnet when beta flag is set", () => { diff --git a/src/api/providers/anthropic.ts b/src/api/providers/anthropic.ts index 0498816082..32101588ae 100644 --- a/src/api/providers/anthropic.ts +++ b/src/api/providers/anthropic.ts @@ -18,6 +18,7 @@ import type { ApiHandlerOptions } from "../../shared/api" import { ApiStream } from "../transform/stream" import { getModelParams } from "../transform/model-params" import { filterNonAnthropicBlocks } from "../transform/anthropic-filter" +import { getAnthropicProviderReasoning } from "../transform/reasoning" import { handleProviderError } from "./utils/error-handler" import { BaseProvider } from "./base-provider" @@ -58,8 +59,15 @@ export class AnthropicHandler extends BaseProvider implements SingleCompletionHa betas = ["fine-grained-tool-streaming-2025-05-14"], maxTokens, temperature, - reasoning: thinking, + info, + reasoningBudget, } = this.getModel() + const thinking = getAnthropicProviderReasoning({ + model: info, + reasoningBudget, + reasoningEffort: undefined, + settings: this.options, + }) // Filter out non-Anthropic blocks (reasoning, thoughtSignature, etc.) before sending to the API const sanitizedMessages = filterNonAnthropicBlocks(messages) @@ -114,33 +122,34 @@ export class AnthropicHandler extends BaseProvider implements SingleCompletionHa const secondLastMsgUserIndex = userMsgIndices[userMsgIndices.length - 2] ?? -1 try { - stream = await this.client.messages.create( - { - model: modelId, - max_tokens: maxTokens ?? ANTHROPIC_DEFAULT_MAX_TOKENS, - temperature, - thinking, - // Setting cache breakpoint for system prompt so new tasks can reuse it. - system: [{ text: systemPrompt, type: "text", cache_control: cacheControl }], - messages: sanitizedMessages.map((message, index) => { - if (index === lastUserMsgIndex || index === secondLastMsgUserIndex) { - return { - ...message, - content: - typeof message.content === "string" - ? [{ type: "text", text: message.content, cache_control: cacheControl }] - : message.content.map((content, contentIndex) => - contentIndex === message.content.length - 1 - ? { ...content, cache_control: cacheControl } - : content, - ), - } + const requestParams = { + model: modelId, + max_tokens: maxTokens ?? ANTHROPIC_DEFAULT_MAX_TOKENS, + temperature, + thinking, + // Setting cache breakpoint for system prompt so new tasks can reuse it. + system: [{ text: systemPrompt, type: "text", cache_control: cacheControl }], + messages: sanitizedMessages.map((message, index) => { + if (index === lastUserMsgIndex || index === secondLastMsgUserIndex) { + return { + ...message, + content: + typeof message.content === "string" + ? [{ type: "text", text: message.content, cache_control: cacheControl }] + : message.content.map((content, contentIndex) => + contentIndex === message.content.length - 1 + ? { ...content, cache_control: cacheControl } + : content, + ), } - return message - }), - stream: true, - ...nativeToolParams, - }, + } + return message + }), + stream: true, + ...nativeToolParams, + } + stream = await this.client.messages.create( + requestParams as Anthropic.Messages.MessageCreateParamsStreaming, (() => { // prompt caching: https://x.com/alexalbert__/status/1823751995901272068 // https://github.com/anthropics/anthropic-sdk-typescript?tab=readme-ov-file#default-headers @@ -184,15 +193,19 @@ export class AnthropicHandler extends BaseProvider implements SingleCompletionHa } default: { try { - stream = (await this.client.messages.create({ + const requestParams = { model: modelId, max_tokens: maxTokens ?? ANTHROPIC_DEFAULT_MAX_TOKENS, temperature, + thinking, system: [{ text: systemPrompt, type: "text" }], messages: sanitizedMessages, stream: true, ...nativeToolParams, - })) as any + } + stream = (await this.client.messages.create( + requestParams as Anthropic.Messages.MessageCreateParamsStreaming, + )) as any } catch (error) { TelemetryService.instance.captureException( new ApiProviderError( diff --git a/src/api/transform/__tests__/reasoning.spec.ts b/src/api/transform/__tests__/reasoning.spec.ts index 0b402c6d55..15e531b0b7 100644 --- a/src/api/transform/__tests__/reasoning.spec.ts +++ b/src/api/transform/__tests__/reasoning.spec.ts @@ -5,12 +5,14 @@ import type { ModelInfo, ProviderSettings, ReasoningEffortWithMinimal } from "@r import { getOpenRouterReasoning, getAnthropicReasoning, + getAnthropicProviderReasoning, getOpenAiReasoning, getRooReasoning, getGeminiReasoning, GetModelReasoningOptions, OpenRouterReasoningParams, AnthropicReasoningParams, + AnthropicProviderReasoningParams, OpenAiReasoningParams, RooReasoningParams, GeminiReasoningParams, @@ -460,6 +462,56 @@ describe("reasoning.ts", () => { }) }) + describe("getAnthropicProviderReasoning", () => { + it("should use adaptive thinking for Claude Opus 4.7 when reasoning is enabled", () => { + const modelWithBinaryReasoning: ModelInfo = { + ...baseModel, + supportsReasoningBinary: true, + } + + const result = getAnthropicProviderReasoning({ + ...baseOptions, + model: modelWithBinaryReasoning, + settings: { enableReasoningEffort: true }, + reasoningBudget: undefined, + }) + + expect(result).toEqual({ type: "adaptive" }) + }) + + it("should omit thinking for Claude Opus 4.7 when reasoning is disabled", () => { + const modelWithBinaryReasoning: ModelInfo = { + ...baseModel, + supportsReasoningBinary: true, + } + + const result = getAnthropicProviderReasoning({ + ...baseOptions, + model: modelWithBinaryReasoning, + settings: { enableReasoningEffort: false }, + reasoningBudget: undefined, + }) + + expect(result).toBeUndefined() + }) + + it("should preserve budget thinking for older Anthropic reasoning-budget models", () => { + const modelWithBudgetReasoning: ModelInfo = { + ...baseModel, + supportsReasoningBudget: true, + } + + const result = getAnthropicProviderReasoning({ + ...baseOptions, + model: modelWithBudgetReasoning, + settings: { enableReasoningEffort: true }, + reasoningBudget: 1000, + }) + + expect(result).toEqual({ type: "enabled", budget_tokens: 1000 }) + }) + }) + describe("getOpenAiReasoning", () => { it("should return reasoning effort params when model supports reasoning effort and has effort in settings", () => { const modelWithSupported: ModelInfo = { @@ -1124,6 +1176,22 @@ describe("reasoning.ts", () => { } }) + it("should return correct types for Anthropic provider reasoning params", () => { + const modelWithBinaryReasoning: ModelInfo = { + ...baseModel, + supportsReasoningBinary: true, + } + + const result: AnthropicProviderReasoningParams | undefined = getAnthropicProviderReasoning({ + ...baseOptions, + model: modelWithBinaryReasoning, + settings: { enableReasoningEffort: true }, + reasoningBudget: undefined, + }) + + expect(result).toEqual({ type: "adaptive" }) + }) + it("should return correct types for OpenAI reasoning params", () => { const modelWithEffort: ModelInfo = { ...baseModel, diff --git a/src/api/transform/reasoning.ts b/src/api/transform/reasoning.ts index 446221d256..ec166d7985 100644 --- a/src/api/transform/reasoning.ts +++ b/src/api/transform/reasoning.ts @@ -18,6 +18,7 @@ export type RooReasoningParams = { } export type AnthropicReasoningParams = BetaThinkingConfigParam +export type AnthropicProviderReasoningParams = AnthropicReasoningParams | { type: "adaptive" } export type OpenAiReasoningParams = { reasoning_effort: OpenAI.Chat.ChatCompletionCreateParams["reasoning_effort"] } @@ -111,6 +112,18 @@ export const getAnthropicReasoning = ({ }: GetModelReasoningOptions): AnthropicReasoningParams | undefined => shouldUseReasoningBudget({ model, settings }) ? { type: "enabled", budget_tokens: reasoningBudget! } : undefined +export const getAnthropicProviderReasoning = ({ + model, + reasoningBudget, + settings, +}: GetModelReasoningOptions): AnthropicProviderReasoningParams | undefined => { + if (model.supportsReasoningBinary && settings.enableReasoningEffort) { + return { type: "adaptive" } + } + + return getAnthropicReasoning({ model, reasoningBudget, reasoningEffort: undefined, settings }) +} + export const getOpenAiReasoning = ({ model, reasoningEffort, diff --git a/src/shared/__tests__/api.spec.ts b/src/shared/__tests__/api.spec.ts index b0c2db124b..c147446610 100644 --- a/src/shared/__tests__/api.spec.ts +++ b/src/shared/__tests__/api.spec.ts @@ -80,6 +80,32 @@ describe("getModelMaxOutputTokens", () => { expect(result).toBe(ANTHROPIC_DEFAULT_MAX_TOKENS) // Should be 8192, not 64_000 }) + test("should preserve Anthropic hybrid token handling when a model also supports binary reasoning", () => { + const model: ModelInfo = { + contextWindow: 1_000_000, + supportsPromptCache: true, + supportsReasoningBudget: true, + supportsReasoningBinary: true, + maxTokens: 128_000, + } + + expect( + getModelMaxOutputTokens({ + modelId: "claude-opus-4-7", + model, + settings: { apiProvider: "anthropic", enableReasoningEffort: false }, + }), + ).toBe(ANTHROPIC_DEFAULT_MAX_TOKENS) + + expect( + getModelMaxOutputTokens({ + modelId: "claude-opus-4-7", + model, + settings: { apiProvider: "anthropic", enableReasoningEffort: true, modelMaxTokens: 32_768 }, + }), + ).toBe(32_768) + }) + test("should return model.maxTokens for non-Anthropic models that support reasoning budget but aren't using it", () => { const geminiModelId = "gemini-2.5-flash-preview-04-17" const model: ModelInfo = {