diff --git a/packages/types/src/providers/anthropic.ts b/packages/types/src/providers/anthropic.ts index 13a8a2abc7..f56bfb4513 100644 --- a/packages/types/src/providers/anthropic.ts +++ b/packages/types/src/providers/anthropic.ts @@ -28,6 +28,25 @@ export const anthropicModels = { }, ], }, + "claude-sonnet-5": { + maxTokens: 128_000, // Overridden to 8k if `enableReasoningEffort` is false. + contextWindow: 1_000_000, // 1M context window native (no beta header required) + supportsImages: true, + supportsPromptCache: true, + inputPrice: 2.0, // $2 per million input tokens (introductory pricing through Aug 31, 2026) + outputPrice: 10.0, // $10 per million output tokens (introductory pricing through Aug 31, 2026) + cacheWritesPrice: 2.5, // $2.50 per million tokens (introductory pricing through Aug 31, 2026) + cacheReadsPrice: 0.2, // $0.20 per million tokens (introductory pricing through Aug 31, 2026) + // Sonnet 5 uses the same adaptive-thinking / binary-toggle convention as + // Opus 4.7+ and Fable 5 on the direct Anthropic provider path. Manual + // extended thinking (budget_tokens) is removed and returns a 400, and + // setting sampling parameters (temperature/top_p/top_k) returns a 400. + supportsReasoningBudget: true, + supportsReasoningBinary: true, + supportsTemperature: false, + description: + "Claude Sonnet 5 is the best combination of speed and intelligence, optimized for coding, tool use, and agentic workflows.", + }, "claude-sonnet-4-5": { maxTokens: 64_000, // Overridden to 8k if `enableReasoningEffort` is false. contextWindow: 200_000, // Default 200K, extendable to 1M with beta flag 'context-1m-2025-08-07' diff --git a/packages/types/src/providers/bedrock.ts b/packages/types/src/providers/bedrock.ts index eb03f0c686..70fbe16057 100644 --- a/packages/types/src/providers/bedrock.ts +++ b/packages/types/src/providers/bedrock.ts @@ -51,6 +51,24 @@ export const bedrockModels = { }, ], }, + "anthropic.claude-sonnet-5": { + maxTokens: 8192, + contextWindow: 1_000_000, // 1M context window native (no beta header required) + supportsImages: true, + supportsPromptCache: true, + supportsReasoningBudget: true, + supportsReasoningBinary: true, + supportsTemperature: false, + inputPrice: 2.0, // $2 per million input tokens (introductory pricing through Aug 31, 2026) + outputPrice: 10.0, // $10 per million output tokens (introductory pricing through Aug 31, 2026) + cacheWritesPrice: 2.5, // $2.50 per million tokens (introductory pricing through Aug 31, 2026) + cacheReadsPrice: 0.2, // $0.20 per million tokens (introductory pricing through Aug 31, 2026) + minTokensPerCachePoint: 1024, + maxCachePoints: 4, + cachableFields: ["system", "messages", "tools"], + description: + "Claude Sonnet 5 is the best combination of speed and intelligence, optimized for coding, tool use, and agentic workflows.", + }, "amazon.nova-pro-v1:0": { maxTokens: 5000, contextWindow: 300_000, @@ -602,6 +620,7 @@ export const BEDROCK_1M_CONTEXT_MODEL_IDS = [ // - Claude Sonnet 4 // - Claude Sonnet 4.5 // - Claude Sonnet 4.6 +// - Claude Sonnet 5 // - Claude Haiku 4.5 // - Claude Opus 4.5 // - Claude Opus 4.6 @@ -611,6 +630,7 @@ export const BEDROCK_GLOBAL_INFERENCE_MODEL_IDS = [ "anthropic.claude-sonnet-4-20250514-v1:0", "anthropic.claude-sonnet-4-5-20250929-v1:0", "anthropic.claude-sonnet-4-6", + "anthropic.claude-sonnet-5", "anthropic.claude-haiku-4-5-20251001-v1:0", "anthropic.claude-opus-4-5-20251101-v1:0", "anthropic.claude-opus-4-6-v1", diff --git a/packages/types/src/providers/openrouter.ts b/packages/types/src/providers/openrouter.ts index 9898ace232..9b904bd6a9 100644 --- a/packages/types/src/providers/openrouter.ts +++ b/packages/types/src/providers/openrouter.ts @@ -39,6 +39,7 @@ export const OPEN_ROUTER_PROMPT_CACHING_MODELS = new Set([ "anthropic/claude-sonnet-4", "anthropic/claude-sonnet-4.5", "anthropic/claude-sonnet-4.6", + "anthropic/claude-sonnet-5", "anthropic/claude-opus-4", "anthropic/claude-opus-4.1", "anthropic/claude-opus-4.5", @@ -79,6 +80,7 @@ export const OPEN_ROUTER_REASONING_BUDGET_MODELS = new Set([ "anthropic/claude-sonnet-4", "anthropic/claude-sonnet-4.5", "anthropic/claude-sonnet-4.6", + "anthropic/claude-sonnet-5", "anthropic/claude-haiku-4.5", "google/gemini-2.5-pro-preview", "google/gemini-2.5-pro", diff --git a/packages/types/src/providers/vercel-ai-gateway.ts b/packages/types/src/providers/vercel-ai-gateway.ts index d23cd33cf1..38e959649e 100644 --- a/packages/types/src/providers/vercel-ai-gateway.ts +++ b/packages/types/src/providers/vercel-ai-gateway.ts @@ -16,6 +16,7 @@ export const VERCEL_AI_GATEWAY_PROMPT_CACHING_MODELS = new Set([ "anthropic/claude-fable-5", "anthropic/claude-sonnet-4", "anthropic/claude-sonnet-4.6", + "anthropic/claude-sonnet-5", "openai/gpt-4.1", "openai/gpt-4.1-mini", "openai/gpt-4.1-nano", @@ -59,6 +60,7 @@ export const VERCEL_AI_GATEWAY_VISION_AND_TOOLS_MODELS = new Set([ "anthropic/claude-fable-5", "anthropic/claude-sonnet-4", "anthropic/claude-sonnet-4.6", + "anthropic/claude-sonnet-5", "google/gemini-1.5-flash", "google/gemini-1.5-pro", "google/gemini-2.0-flash", diff --git a/packages/types/src/providers/vertex.ts b/packages/types/src/providers/vertex.ts index 5646b9f22d..dc25191543 100644 --- a/packages/types/src/providers/vertex.ts +++ b/packages/types/src/providers/vertex.ts @@ -355,6 +355,21 @@ export const vertexModels = { }, ], }, + "claude-sonnet-5": { + maxTokens: 8192, + contextWindow: 1_000_000, // 1M context window native (no beta header required) + supportsImages: true, + supportsPromptCache: true, + inputPrice: 2.0, // $2 per million input tokens (introductory pricing through Aug 31, 2026) + outputPrice: 10.0, // $10 per million output tokens (introductory pricing through Aug 31, 2026) + cacheWritesPrice: 2.5, // $2.50 per million tokens (introductory pricing through Aug 31, 2026) + cacheReadsPrice: 0.2, // $0.20 per million tokens (introductory pricing through Aug 31, 2026) + supportsReasoningBudget: true, + supportsReasoningBinary: true, + supportsTemperature: false, + description: + "Claude Sonnet 5 is the best combination of speed and intelligence, optimized for coding, tool use, and agentic workflows.", + }, "claude-haiku-4-5@20251001": { maxTokens: 8192, contextWindow: 200_000, diff --git a/src/api/providers/__tests__/anthropic-vertex.spec.ts b/src/api/providers/__tests__/anthropic-vertex.spec.ts index 2121e86954..6b56c5af98 100644 --- a/src/api/providers/__tests__/anthropic-vertex.spec.ts +++ b/src/api/providers/__tests__/anthropic-vertex.spec.ts @@ -1033,6 +1033,23 @@ describe("VertexHandler", () => { expect(model.info.supportsTemperature).toBe(false) }) + it("should return Claude Sonnet 5 model info", () => { + const handler = new AnthropicVertexHandler({ + apiModelId: "claude-sonnet-5", + vertexProjectId: "test-project", + vertexRegion: "us-central1", + }) + + const model = handler.getModel() + expect(model.id).toBe("claude-sonnet-5") + expect(model.info.maxTokens).toBe(8192) + expect(model.info.contextWindow).toBe(1_000_000) + expect(model.info.supportsReasoningBinary).toBe(true) + expect(model.info.supportsReasoningBudget).toBe(true) + expect(model.info.supportsPromptCache).toBe(true) + expect(model.info.supportsTemperature).toBe(false) + }) + it("should not enable 1M context when flag is disabled", () => { const handler = new AnthropicVertexHandler({ apiModelId: VERTEX_1M_CONTEXT_MODEL_IDS[0], @@ -1309,6 +1326,35 @@ describe("VertexHandler", () => { expect(request.thinking).not.toHaveProperty("budget_tokens") expect(request.temperature).toBeUndefined() }) + + it("should use adaptive thinking for Claude Sonnet 5", async () => { + const sonnetHandler = new AnthropicVertexHandler({ + apiModelId: "claude-sonnet-5", + vertexProjectId: "test-project", + vertexRegion: "us-central1", + enableReasoningEffort: true, + }) + + const mockCreate = vitest.fn().mockImplementation(async () => ({ + async *[Symbol.asyncIterator]() { + yield { type: "message_start", message: { usage: { input_tokens: 10, output_tokens: 5 } } } + }, + })) + ;(sonnetHandler["client"].messages as any).create = mockCreate + + await sonnetHandler.createMessage("You are a helpful assistant", [{ role: "user", content: "Hello" }]).next() + + expect(mockCreate).toHaveBeenCalledWith( + expect.objectContaining({ + thinking: { type: "adaptive" }, + }), + undefined, + ) + + const request = mockCreate.mock.calls[0][0] + expect(request.thinking).not.toHaveProperty("budget_tokens") + expect(request.temperature).toBeUndefined() + }) }) describe("native tool calling", () => { diff --git a/src/api/providers/__tests__/anthropic.spec.ts b/src/api/providers/__tests__/anthropic.spec.ts index a2c0cb88eb..2b944b7db5 100644 --- a/src/api/providers/__tests__/anthropic.spec.ts +++ b/src/api/providers/__tests__/anthropic.spec.ts @@ -428,6 +428,33 @@ describe("AnthropicHandler", () => { expect(requestBody?.max_tokens).toBe(32768) expect(requestOptions?.headers?.["anthropic-beta"]).toContain("prompt-caching-2024-07-31") }) + + it("should use adaptive thinking for Claude Sonnet 5 when reasoning is enabled", async () => { + const sonnetHandler = new AnthropicHandler({ + apiKey: "test-api-key", + apiModelId: "claude-sonnet-5", + enableReasoningEffort: true, + modelMaxTokens: 32768, + }) + + const stream = sonnetHandler.createMessage(systemPrompt, [ + { + role: "user", + content: [{ type: "text" as const, text: "Hello" }], + }, + ]) + + for await (const _chunk of stream) { + // Consume stream + } + + const requestBody = mockCreate.mock.calls[mockCreate.mock.calls.length - 1]?.[0] + const requestOptions = mockCreate.mock.calls[mockCreate.mock.calls.length - 1]?.[1] + expect(requestBody?.thinking).toEqual({ type: "adaptive" }) + expect(requestBody?.temperature).toBeUndefined() + expect(requestBody?.max_tokens).toBe(32768) + expect(requestOptions?.headers?.["anthropic-beta"]).toContain("prompt-caching-2024-07-31") + }) }) describe("completePrompt", () => { @@ -589,6 +616,23 @@ describe("AnthropicHandler", () => { expect(model.reasoningBudget).toBeUndefined() }) + it("should handle Claude Sonnet 5 model correctly", () => { + const handler = new AnthropicHandler({ + apiKey: "test-api-key", + apiModelId: "claude-sonnet-5", + }) + const model = handler.getModel() + expect(model.id).toBe("claude-sonnet-5") + expect(model.info.maxTokens).toBe(128000) + expect(model.info.contextWindow).toBe(1000000) + expect(model.maxTokens).toBe(8192) + expect(model.info.supportsReasoningBinary).toBe(true) + expect(model.info.supportsReasoningBudget).toBe(true) + expect(model.info.supportsPromptCache).toBe(true) + expect(model.info.supportsTemperature).toBe(false) + expect(model.reasoningBudget).toBeUndefined() + }) + it("should enable 1M context for Claude 4.5 Sonnet when beta flag is set", () => { const handler = new AnthropicHandler({ apiKey: "test-api-key", diff --git a/src/api/providers/__tests__/bedrock.spec.ts b/src/api/providers/__tests__/bedrock.spec.ts index 156df8e540..e2b17e4e87 100644 --- a/src/api/providers/__tests__/bedrock.spec.ts +++ b/src/api/providers/__tests__/bedrock.spec.ts @@ -724,6 +724,37 @@ describe("AwsBedrockHandler", () => { const model = handler.getModel() expect(model.id).toBe("global.anthropic.claude-fable-5") }) + + it("should return Claude Sonnet 5 model info", () => { + const handler = new AwsBedrockHandler({ + apiModelId: "anthropic.claude-sonnet-5", + awsAccessKey: "test", + awsSecretKey: "test", + awsRegion: "us-east-1", + }) + + const model = handler.getModel() + expect(model.id).toBe("anthropic.claude-sonnet-5") + expect(model.info.contextWindow).toBe(1_000_000) + expect(model.info.supportsReasoningBinary).toBe(true) + expect(model.info.supportsReasoningBudget).toBe(true) + expect(model.info.supportsPromptCache).toBe(true) + expect(model.info.supportsTemperature).toBe(false) + expect(model.maxTokens).toBe(8192) + }) + + it("should apply global inference prefix for Claude Sonnet 5 when awsUseGlobalInference is true", () => { + const handler = new AwsBedrockHandler({ + apiModelId: "anthropic.claude-sonnet-5", + awsAccessKey: "test", + awsSecretKey: "test", + awsRegion: "us-east-1", + awsUseGlobalInference: true, + }) + + const model = handler.getModel() + expect(model.id).toBe("global.anthropic.claude-sonnet-5") + }) }) describe("1M context beta feature", () => { @@ -1426,6 +1457,36 @@ describe("AwsBedrockHandler", () => { expect(commandArg.inferenceConfig?.temperature).toBeUndefined() }) + it("should send adaptive thinking with effort xhigh for Claude Sonnet 5 when reasoning is enabled", async () => { + // End-to-end regression guard for the Sonnet 5 handler branch. The + // isAdaptiveThinkingModel predicate is unit-covered, but a regression in + // the createMessage adaptive-thinking branch for this specific model + // wouldn't be caught without a request-level test (see review feedback). + const sonnet5Handler = new AwsBedrockHandler({ + apiModelId: "anthropic.claude-sonnet-5", + awsAccessKey: "test-access-key", + awsSecretKey: "test-secret-key", + awsRegion: "us-east-1", + enableReasoningEffort: true, + }) + + const generator = sonnet5Handler.createMessage("System prompt", messages) + await generator.next() + + expect(mockConverseStreamCommand).toHaveBeenCalled() + const commandArg = mockConverseStreamCommand.mock.calls[0][0] as any + + // Sonnet 5 uses the same adaptive-thinking contract as Opus 4.7/4.8 — + // budget_tokens causes a 400, so thinking.type is "adaptive" with effort. + expect(commandArg.additionalModelRequestFields?.thinking).toEqual({ + type: "adaptive", + display: "summarized", + }) + expect(commandArg.additionalModelRequestFields?.output_config).toEqual({ effort: "xhigh" }) + // Sonnet 5 rejects sampling parameters: temperature must be omitted entirely. + expect(commandArg.inferenceConfig?.temperature).toBeUndefined() + }) + it("should omit thinking and temperature for Claude Opus 4.8 when reasoning is disabled", async () => { const opus48Handler = new AwsBedrockHandler({ apiModelId: "anthropic.claude-opus-4-8", @@ -1558,6 +1619,7 @@ describe("AwsBedrockHandler", () => { expect(isAdaptiveThinkingModel("anthropic.claude-opus-4-7")).toBe(true) expect(isAdaptiveThinkingModel("anthropic.claude-opus-4-8")).toBe(true) expect(isAdaptiveThinkingModel("anthropic.claude-fable-5")).toBe(true) + expect(isAdaptiveThinkingModel("anthropic.claude-sonnet-5")).toBe(true) // Future-proof Sonnet patterns — guarded even before a registry entry exists. expect(isAdaptiveThinkingModel("anthropic.claude-sonnet-4-7")).toBe(true) expect(isAdaptiveThinkingModel("anthropic.claude-sonnet-4-8")).toBe(true) @@ -1566,6 +1628,7 @@ describe("AwsBedrockHandler", () => { it("returns true when the id carries a cross-region or global prefix", () => { expect(isAdaptiveThinkingModel("us.anthropic.claude-opus-4-8")).toBe(true) expect(isAdaptiveThinkingModel("global.anthropic.claude-fable-5")).toBe(true) + expect(isAdaptiveThinkingModel("global.anthropic.claude-sonnet-5")).toBe(true) expect(isAdaptiveThinkingModel("eu.anthropic.claude-sonnet-4-7")).toBe(true) expect(isAdaptiveThinkingModel("global.anthropic.claude-opus-4-8")).toBe(true) }) diff --git a/src/api/providers/__tests__/requesty.spec.ts b/src/api/providers/__tests__/requesty.spec.ts index 4dfa2a7c9e..5d829f2374 100644 --- a/src/api/providers/__tests__/requesty.spec.ts +++ b/src/api/providers/__tests__/requesty.spec.ts @@ -64,6 +64,20 @@ vitest.mock("../fetchers/modelCache", () => ({ cacheReadsPrice: 1, description: "Claude Fable 5", }, + "anthropic/claude-sonnet-5": { + maxTokens: 128000, + contextWindow: 1000000, + supportsImages: true, + supportsPromptCache: true, + supportsReasoningBudget: true, + supportsReasoningBinary: true, + supportsTemperature: false, + inputPrice: 3, + outputPrice: 15, + cacheWritesPrice: 3.75, + cacheReadsPrice: 0.3, + description: "Claude Sonnet 5", + }, }) }), })) @@ -254,6 +268,39 @@ describe("RequestyHandler", () => { ) }) + it("uses adaptive thinking for Claude Sonnet 5 when reasoning is enabled", async () => { + const handler = new RequestyHandler({ + requestyApiKey: "test-key", + requestyModelId: "anthropic/claude-sonnet-5", + enableReasoningEffort: true, + modelMaxTokens: 32768, + }) + + const mockStream = { + async *[Symbol.asyncIterator]() { + yield { + id: "test-id", + choices: [{ delta: {} }], + usage: { prompt_tokens: 10, completion_tokens: 20 }, + } + }, + } + + mockCreate.mockResolvedValue(mockStream) + + const generator = handler.createMessage("test system prompt", [{ role: "user" as const, content: "test" }]) + await generator.next() + + expect(mockCreate).toHaveBeenCalledWith( + expect.objectContaining({ + model: "anthropic/claude-sonnet-5", + max_tokens: 32768, + thinking: { type: "adaptive" }, + temperature: undefined, + }), + ) + }) + it("handles API errors", async () => { const handler = new RequestyHandler(mockOptions) const mockError = new Error("API Error") @@ -523,6 +570,23 @@ describe("RequestyHandler", () => { }) }) + it("omits temperature for Claude Sonnet 5 in completePrompt", async () => { + const handler = new RequestyHandler({ + requestyApiKey: "test-key", + requestyModelId: "anthropic/claude-sonnet-5", + }) + mockCreate.mockResolvedValue({ choices: [{ message: { content: "test completion" } }] }) + + await handler.completePrompt("test prompt") + + expect(mockCreate).toHaveBeenCalledWith({ + model: "anthropic/claude-sonnet-5", + max_tokens: 8192, + messages: [{ role: "system", content: "test prompt" }], + temperature: undefined, + }) + }) + it("handles API errors", async () => { const handler = new RequestyHandler(mockOptions) const mockError = new Error("API Error") diff --git a/src/api/providers/__tests__/vercel-ai-gateway.spec.ts b/src/api/providers/__tests__/vercel-ai-gateway.spec.ts index 3370f87dd2..4669904e2f 100644 --- a/src/api/providers/__tests__/vercel-ai-gateway.spec.ts +++ b/src/api/providers/__tests__/vercel-ai-gateway.spec.ts @@ -49,6 +49,18 @@ vitest.mock("../fetchers/modelCache", () => ({ cacheReadsPrice: 1, description: "Claude Fable 5", }, + "anthropic/claude-sonnet-5": { + maxTokens: 128000, + contextWindow: 1000000, + supportsImages: true, + supportsPromptCache: true, + supportsTemperature: false, + inputPrice: 3, + outputPrice: 15, + cacheWritesPrice: 3.75, + cacheReadsPrice: 0.3, + description: "Claude Sonnet 5", + }, "anthropic/claude-3.5-haiku": { maxTokens: 32000, contextWindow: 200000, @@ -305,6 +317,24 @@ describe("VercelAiGatewayHandler", () => { ) }) + it("omits temperature for Claude Sonnet 5", async () => { + const handler = new VercelAiGatewayHandler({ + ...mockOptions, + vercelAiGatewayModelId: "anthropic/claude-sonnet-5", + }) + + await handler.createMessage("You are a helpful assistant.", [{ role: "user", content: "Hello" }]).next() + + // Assert directly on the extracted call arg. `objectContaining({ + // temperature: undefined })` passes whether temperature is explicitly + // undefined or simply absent, so it wouldn't catch a regression where the + // handler stops consulting supportsTemperature. + const call = mockCreate.mock.calls[mockCreate.mock.calls.length - 1][0] + expect(call.model).toBe("anthropic/claude-sonnet-5") + expect(call.temperature).toBeUndefined() + expect(call.max_completion_tokens).toBe(128000) + }) + it("adds cache breakpoints for supported models", async () => { const { addCacheBreakpoints } = await import("../../transform/caching/vercel-ai-gateway") const handler = new VercelAiGatewayHandler({ diff --git a/src/api/providers/anthropic.ts b/src/api/providers/anthropic.ts index 7a4ef30ad0..f9e2ee7d2b 100644 --- a/src/api/providers/anthropic.ts +++ b/src/api/providers/anthropic.ts @@ -89,6 +89,7 @@ export class AnthropicHandler extends BaseProvider implements SingleCompletionHa } switch (modelId) { + case "claude-sonnet-5": case "claude-sonnet-4-6": case "claude-sonnet-4-5": case "claude-sonnet-4-20250514": @@ -159,6 +160,7 @@ export class AnthropicHandler extends BaseProvider implements SingleCompletionHa // Then check for models that support prompt caching switch (modelId) { + case "claude-sonnet-5": case "claude-sonnet-4-6": case "claude-sonnet-4-5": case "claude-sonnet-4-20250514": diff --git a/src/api/providers/bedrock.ts b/src/api/providers/bedrock.ts index d92e993d58..8c2b5ace68 100644 --- a/src/api/providers/bedrock.ts +++ b/src/api/providers/bedrock.ts @@ -301,13 +301,14 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH * Detect models that require the adaptive-thinking API contract. * * Starting with Claude Opus 4.7 (and the matching Sonnet 4.7), and continuing - * in Opus 4.8 / Sonnet 4.8 and Claude Fable 5, Anthropic removed sampling parameters - * (temperature/top_p/top_k) and replaced budget_tokens-based thinking with - * `thinking.type: "adaptive"` plus `output_config.effort`. The migration guide - * from 4.7 → 4.8 confirms there are no further breaking API changes, and Fable 5 - * keeps the same adaptive-thinking contract, so a single - * guard matches both generations. Shared by createMessage and completePrompt so - * both request paths omit temperature for these models (sending it causes a 400). + * in Opus 4.8 / Sonnet 4.8, Claude Fable 5, and Claude Sonnet 5, Anthropic + * removed sampling parameters (temperature/top_p/top_k) and replaced + * budget_tokens-based thinking with `thinking.type: "adaptive"` plus + * `output_config.effort`. The migration guide from 4.7 → 4.8 confirms there + * are no further breaking API changes, and Fable 5 / Sonnet 5 keep the same + * adaptive-thinking contract, so a single guard matches all generations. + * Shared by createMessage and completePrompt so both request paths omit + * temperature for these models (sending it causes a 400). * * Accepts a model ID (with or without a cross-region/global prefix) and strips * the prefix via parseBaseModelId before matching. @@ -319,7 +320,8 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH baseModelId.includes("opus-4-8") || baseModelId.includes("fable-5") || baseModelId.includes("sonnet-4-7") || - baseModelId.includes("sonnet-4-8") + baseModelId.includes("sonnet-4-8") || + baseModelId.includes("sonnet-5") ) } diff --git a/src/api/providers/fetchers/__tests__/openrouter.spec.ts b/src/api/providers/fetchers/__tests__/openrouter.spec.ts index 0616a0b69b..7794031fa5 100644 --- a/src/api/providers/fetchers/__tests__/openrouter.spec.ts +++ b/src/api/providers/fetchers/__tests__/openrouter.spec.ts @@ -318,6 +318,34 @@ describe("OpenRouter API", () => { expect(result.supportsReasoningBinary).toBe(true) }) + it("sets claude-sonnet-5 model to Anthropic max tokens and omits temperature", () => { + const mockModel = { + name: "Claude Sonnet 5", + description: "Test model", + context_length: 1000000, + max_completion_tokens: 128000, + pricing: { + prompt: "0.000003", + completion: "0.000015", + }, + } + + const result = parseOpenRouterModel({ + id: "anthropic/claude-sonnet-5", + model: mockModel, + inputModality: ["text", "image"], + outputModality: ["text"], + maxTokens: 128000, + supportedParameters: ["reasoning", "include_reasoning"], + }) + + expect(result.maxTokens).toBe(128000) + expect(result.contextWindow).toBe(1000000) + expect(result.supportsTemperature).toBe(false) + expect(result.supportsReasoningBudget).toBe(true) + expect(result.supportsReasoningBinary).toBe(true) + }) + it("sets horizon-alpha model to 32k max tokens", () => { const mockModel = { name: "Horizon Alpha", diff --git a/src/api/providers/fetchers/__tests__/requesty.spec.ts b/src/api/providers/fetchers/__tests__/requesty.spec.ts index 9fccb1a3fa..89c56063ef 100644 --- a/src/api/providers/fetchers/__tests__/requesty.spec.ts +++ b/src/api/providers/fetchers/__tests__/requesty.spec.ts @@ -50,6 +50,31 @@ describe("getRequestyModels", () => { expect(fable5.supportsTemperature).toBe(false) }) + it("applies Sonnet 5 overrides when parsing anthropic/claude-sonnet-5", async () => { + const rawSonnet5 = makeRawModel({ + id: "anthropic/claude-sonnet-5", + max_output_tokens: 128000, + context_window: 1000000, + supports_caching: true, + supports_vision: true, + supports_reasoning: true, + input_price: "0.000003", + output_price: "0.000015", + caching_price: "0.00000375", + cached_price: "0.0000003", + }) + + mockAxiosGet.mockResolvedValueOnce({ data: { data: [rawSonnet5] } }) + + const models = await getRequestyModels() + const sonnet5 = models["anthropic/claude-sonnet-5"] + + expect(sonnet5).toBeDefined() + expect(sonnet5.supportsReasoningBudget).toBe(true) + expect(sonnet5.supportsReasoningBinary).toBe(true) + expect(sonnet5.supportsTemperature).toBe(false) + }) + it("does not apply Fable 5 overrides to other models", async () => { const rawSonnet = makeRawModel({ id: "anthropic/claude-sonnet-4.6", diff --git a/src/api/providers/fetchers/__tests__/vercel-ai-gateway.spec.ts b/src/api/providers/fetchers/__tests__/vercel-ai-gateway.spec.ts index 1bc109f006..dfb8ad4b3b 100644 --- a/src/api/providers/fetchers/__tests__/vercel-ai-gateway.spec.ts +++ b/src/api/providers/fetchers/__tests__/vercel-ai-gateway.spec.ts @@ -221,6 +221,22 @@ describe("Vercel AI Gateway Fetchers", () => { expect(result.supportsTemperature).toBe(false) }) + it("marks Claude Sonnet 5 as not supporting temperature", () => { + const result = parseVercelAiGatewayModel({ + id: "anthropic/claude-sonnet-5", + model: { + ...baseModel, + id: "anthropic/claude-sonnet-5", + context_window: 1000000, + max_tokens: 128000, + }, + }) + + expect(result.maxTokens).toBe(128000) + expect(result.contextWindow).toBe(1000000) + expect(result.supportsTemperature).toBe(false) + }) + it("detects vision-only models", () => { // claude 3.5 haiku in VERCEL_AI_GATEWAY_VISION_ONLY_MODELS const visionModel = { diff --git a/src/api/providers/fetchers/openrouter.ts b/src/api/providers/fetchers/openrouter.ts index 7754af3192..2dcb342445 100644 --- a/src/api/providers/fetchers/openrouter.ts +++ b/src/api/providers/fetchers/openrouter.ts @@ -270,6 +270,13 @@ export const parseOpenRouterModel = ({ modelInfo.supportsTemperature = false } + // Set claude-sonnet-5 model to use the correct Anthropic configuration + if (id === "anthropic/claude-sonnet-5") { + modelInfo.maxTokens = anthropicModels["claude-sonnet-5"].maxTokens + modelInfo.supportsReasoningBinary = true + modelInfo.supportsTemperature = false + } + // Ensure correct reasoning handling for Claude Haiku 4.5 on OpenRouter // Use budget control and disable effort-based reasoning fallback if (id === "anthropic/claude-haiku-4.5") { diff --git a/src/api/providers/fetchers/requesty.ts b/src/api/providers/fetchers/requesty.ts index aaf11e4a7b..2260d02978 100644 --- a/src/api/providers/fetchers/requesty.ts +++ b/src/api/providers/fetchers/requesty.ts @@ -51,6 +51,12 @@ export async function getRequestyModels(baseUrl?: string, apiKey?: string): Prom modelInfo.supportsTemperature = false } + if (rawModel.id === "anthropic/claude-sonnet-5") { + modelInfo.supportsReasoningBudget = true + modelInfo.supportsReasoningBinary = true + modelInfo.supportsTemperature = false + } + models[rawModel.id] = modelInfo } } catch (error) { diff --git a/src/api/providers/fetchers/vercel-ai-gateway.ts b/src/api/providers/fetchers/vercel-ai-gateway.ts index 50c3035084..ba748ddd51 100644 --- a/src/api/providers/fetchers/vercel-ai-gateway.ts +++ b/src/api/providers/fetchers/vercel-ai-gateway.ts @@ -118,5 +118,9 @@ export const parseVercelAiGatewayModel = ({ id, model }: { id: string; model: Ve modelInfo.supportsTemperature = false } + if (id === "anthropic/claude-sonnet-5") { + modelInfo.supportsTemperature = false + } + return modelInfo } diff --git a/src/shared/__tests__/api.spec.ts b/src/shared/__tests__/api.spec.ts index 9954d5cbf7..f4fb8330bb 100644 --- a/src/shared/__tests__/api.spec.ts +++ b/src/shared/__tests__/api.spec.ts @@ -162,6 +162,33 @@ describe("getModelMaxOutputTokens", () => { ).toBe(32_768) }) + test("should preserve Anthropic hybrid token handling for Claude Sonnet 5", () => { + const model: ModelInfo = { + contextWindow: 1_000_000, + supportsPromptCache: true, + supportsReasoningBudget: true, + supportsReasoningBinary: true, + supportsTemperature: false, + maxTokens: 128_000, + } + + expect( + getModelMaxOutputTokens({ + modelId: "claude-sonnet-5", + model, + settings: { apiProvider: "anthropic", enableReasoningEffort: false }, + }), + ).toBe(ANTHROPIC_DEFAULT_MAX_TOKENS) + + expect( + getModelMaxOutputTokens({ + modelId: "claude-sonnet-5", + model, + settings: { apiProvider: "anthropic", enableReasoningEffort: true, modelMaxTokens: 32_768 }, + }), + ).toBe(32_768) + }) + test("should return model.maxTokens for non-Anthropic models that support reasoning budget but aren't using it", () => { const geminiModelId = "gemini-2.5-flash-preview-04-17" const model: ModelInfo = {