From cdefa8633090d3c1bb85732151c545a94c761e45 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Krzysztof=20Dre=C5=BCewski?= Date: Wed, 17 Jun 2026 23:51:08 +0200 Subject: [PATCH 1/4] feat(providers): add Fable 5 support across Anthropic providers (zoo #555) Port of Zoo-Code PR #555. Registers `claude-fable-5` across the Anthropic, Bedrock, Vertex, OpenRouter, Requesty and Vercel AI Gateway providers (context 1M, direct maxTokens 128k / 8k on Bedrock+Vertex, adaptive-binary reasoning, temperature unsupported), and routes the model through adaptive thinking. Fork-specific: our Vertex handler never received the adaptive-binary reasoning routing that Zoo already had, so it emitted `{type:"enabled",budget_tokens}` for Opus 4.7/4.8 and Fable 5. Recompute `thinking` via getAnthropicProviderReasoning in AnthropicVertexHandler.getModel so Vertex matches the direct provider. Co-authored-by: T Co-authored-by: Elliott de Launay --- ...le-5-support-across-anthropic-providers.md | 148 ++++++++++++++++++ packages/types/src/providers/anthropic.ts | 17 ++ packages/types/src/providers/bedrock.ts | 20 +++ packages/types/src/providers/openrouter.ts | 2 + .../types/src/providers/vercel-ai-gateway.ts | 2 + packages/types/src/providers/vertex.ts | 15 ++ .../__tests__/anthropic-vertex.spec.ts | 46 ++++++ src/api/providers/__tests__/anthropic.spec.ts | 44 ++++++ src/api/providers/__tests__/bedrock.spec.ts | 33 ++++ src/api/providers/__tests__/requesty.spec.ts | 64 ++++++++ .../__tests__/vercel-ai-gateway.spec.ts | 29 ++++ src/api/providers/anthropic-vertex.ts | 21 ++- src/api/providers/anthropic.ts | 2 + src/api/providers/bedrock.ts | 1 + .../fetchers/__tests__/openrouter.spec.ts | 28 ++++ .../fetchers/__tests__/requesty.spec.ts | 67 ++++++++ .../__tests__/vercel-ai-gateway.spec.ts | 16 ++ src/api/providers/fetchers/openrouter.ts | 7 + src/api/providers/fetchers/requesty.ts | 6 + .../providers/fetchers/vercel-ai-gateway.ts | 4 + src/api/providers/requesty.ts | 14 +- src/api/providers/vercel-ai-gateway.ts | 6 +- .../transform/__tests__/model-params.spec.ts | 11 ++ src/api/transform/model-params.ts | 6 +- src/shared/__tests__/api.spec.ts | 27 ++++ 25 files changed, 624 insertions(+), 12 deletions(-) create mode 100644 ai_plans/2026-06-17_zoo-555-add-fable-5-support-across-anthropic-providers.md create mode 100644 src/api/providers/fetchers/__tests__/requesty.spec.ts diff --git a/ai_plans/2026-06-17_zoo-555-add-fable-5-support-across-anthropic-providers.md b/ai_plans/2026-06-17_zoo-555-add-fable-5-support-across-anthropic-providers.md new file mode 100644 index 0000000000..9e4767108a --- /dev/null +++ b/ai_plans/2026-06-17_zoo-555-add-fable-5-support-across-anthropic-providers.md @@ -0,0 +1,148 @@ +# Port plan — Zoo PR #555 → `feature/zoo-555-add-fable-5-support-across-anthropic-providers` + +> **For the executor (read first).** Do the steps in order. Do not improvise or +> refactor beyond what is written (YAGNI). Every code block is already adapted to +> this repo. This repo is **Tumble Code**: never introduce the strings "Roo" or +> "Zoo" in user-facing text. The model id `claude-fable-5` is an **internal model +> id**, not branding — keep it verbatim. + +--- + +## 0. Context + +- **Upstream:** Zoo PR #555 — "Add Fable 5 support across Anthropic providers" (commit `cc2654521`). +- **What it does:** Registers the `claude-fable-5` model across every Anthropic-family + provider path — direct Anthropic, Bedrock, Vertex, OpenRouter, Requesty, and the + Vercel AI Gateway — including model metadata, the adaptive-thinking guard, and the + `supportsTemperature: false` plumbing so temperature is omitted for this model. +- **Model facts (mirror upstream, reviewed in PR #555):** context window `1_000_000`; + direct Anthropic `maxTokens` `128_000` (overridden to 8k when reasoning effort is + off); Bedrock/Vertex `maxTokens` `8192`; prices in=$10 / out=$50 / cacheWrite=$12.5 / + cacheRead=$1 per M; `supportsImages`, `supportsPromptCache`, `supportsReasoningBudget`, + `supportsReasoningBinary` all true; `supportsTemperature: false`. Uses the same + adaptive-thinking contract as Opus 4.7/4.8. +- **Why we want it:** new top-tier Claude model; our providers have no `claude-fable-5` + entry yet (verified: `grep claude-fable-5` empty across `packages/types/src/providers/*` + and `src/api/providers/*`). Low risk, high value — matches our model-support cadence. +- **Adaptations vs. the raw upstream diff (IMPORTANT):** + 1. **Branding:** upstream edits a bedrock comment to say "Zoo Code UI"; our fork already + reads `// display: "summarized" surfaces thinking content in the UI.` + ([bedrock.ts:439](../src/api/providers/bedrock.ts#L439)). Keep "the UI" — do NOT + introduce "Zoo Code". The Fable-5 mentions in the doc comment are optional polish. + 2. **model-params.ts:** upstream adds the `supportsTemperature === false` guard to the + generic **`else`** branch. Our `format === "anthropic"` branch already has that guard + ([model-params.ts:151](../src/api/transform/model-params.ts#L151)); the `else` branch + (our lines 179-187) does NOT. Apply the guard to the `else` branch only. + 3. Dependencies `getAnthropicProviderReasoning` / `AnthropicProviderReasoningParams` + already exist in [reasoning.ts:61](../src/api/transform/reasoning.ts#L61) — the + requesty refactor applies cleanly. +- **Original authors — credit them:** + + ```text + Co-authored-by: T + Co-authored-by: Elliott de Launay + ``` + +## 1. Preconditions + +- [ ] Branch `feature/zoo-555-...` created off `main`. +- [ ] No `claude-fable-5` entry exists yet in any provider type/handler. +- [ ] `getAnthropicProviderReasoning` exported from `src/api/transform/reasoning.ts`. + +## 2. Source edits + +### Edit A — `packages/types/src/providers/anthropic.ts` +Insert a `"claude-fable-5"` entry into `anthropicModels` (before `claude-opus-4-5-20251101`): +maxTokens 128_000, contextWindow 1_000_000, images/cache true, in 10 / out 50 / +cacheWrite 12.5 / cacheRead 1, supportsReasoningBudget/Binary true, supportsTemperature +false, with the upstream description. + +### Edit B — `packages/types/src/providers/bedrock.ts` +- Add `"anthropic.claude-fable-5"` to `bedrockModels` (maxTokens 8192, contextWindow 1M, + cache true + `minTokensPerCachePoint`/`maxCachePoints`/`cachableFields`, same prices, + reasoning flags, supportsTemperature false, description). +- Append `"anthropic.claude-fable-5"` to `BEDROCK_GLOBAL_INFERENCE_MODEL_IDS` with the + cross-region comment line. + +### Edit C — `packages/types/src/providers/vertex.ts` +Add `"claude-fable-5"` to `vertexModels` (maxTokens 8192, contextWindow 1M, etc.). + +### Edit D — `packages/types/src/providers/openrouter.ts` +Add `"anthropic/claude-fable-5"` to both `OPEN_ROUTER_PROMPT_CACHING_MODELS` and +`OPEN_ROUTER_REASONING_BUDGET_MODELS`. + +### Edit E — `packages/types/src/providers/vercel-ai-gateway.ts` +Add `"anthropic/claude-fable-5"` to both `VERCEL_AI_GATEWAY_PROMPT_CACHING_MODELS` and +`VERCEL_AI_GATEWAY_VISION_AND_TOOLS_MODELS`. + +### Edit F — `src/api/providers/anthropic.ts` +Add `case "claude-fable-5":` to the two switch statements (after `claude-opus-4-8`). + +### Edit G — `src/api/providers/bedrock.ts` +Add `baseModelId.includes("fable-5") ||` to the `isAdaptiveThinkingModel` guard. Optional: +mention Fable 5 in the adjacent doc comments — but keep "the UI" wording (no "Zoo Code"). + +### Edit H — `src/api/providers/requesty.ts` +Swap import to `{ AnthropicProviderReasoningParams, getAnthropicProviderReasoning }`, +change the two `thinking?: AnthropicReasoningParams` to `AnthropicProviderReasoningParams`, +and in `getModel()` compute `reasoning = getAnthropicProviderReasoning({ model: info, +reasoningBudget: params.reasoningBudget, settings: this.options })` and return +`{ id, info, ...params, reasoning }`. + +### Edit I — `src/api/providers/vercel-ai-gateway.ts` +Gate temperature on `info.supportsTemperature !== false && this.supportsTemperature(modelId)` +in both `createMessage` and `completePrompt`. + +### Edit J — `src/api/providers/fetchers/openrouter.ts` +Add the `anthropic/claude-fable-5` block setting maxTokens + reasoningBinary + temperature. + +### Edit K — `src/api/providers/fetchers/requesty.ts` +Add the `anthropic/claude-fable-5` override block (reasoning flags + supportsTemperature false). + +### Edit L — `src/api/providers/fetchers/vercel-ai-gateway.ts` +Add the `anthropic/claude-fable-5` → `supportsTemperature = false` block. + +### Edit M — `src/api/transform/model-params.ts` +In the **`else`** branch only, prepend `if (model.supportsTemperature === false) { +params.temperature = undefined }` and drop the stale 2-line OpenRouter TODO. + +### Edit N — `src/api/providers/anthropic-vertex.ts` (fork-specific; NOT in Zoo's diff) +Zoo's PR did not touch this file because Zoo's Vertex handler already routed +adaptive-binary models through `getAnthropicProviderReasoning` (from an earlier PR). +**Our fork never got that change** — our `getModel()` returned `params.reasoning` +from `getModelParams` (`getAnthropicReasoning`), which always emits +`{ type: "enabled", budget_tokens }`. So Fable 5 (and Opus 4.7/4.8) on Vertex sent +the wrong thinking config. Root cause proven via the new vertex adaptive test failing +with `{ type: "enabled", budget_tokens: 8192 }` instead of `{ type: "adaptive" }`. +Fix: import `getAnthropicProviderReasoning`; in `getModel()` compute +`thinking = getAnthropicProviderReasoning({ model: info, reasoningBudget: +params.reasoningBudget, reasoningEffort: params.reasoningEffort, settings: this.options })` +and return `{ ...params, reasoning: thinking }`. Because `{ type: "adaptive" }` is not in +the SDK's `ThinkingConfigParam` union, change the `createMessage`/`completePrompt` `params` +declarations from a `: Anthropic.Messages.MessageCreateParams…` annotation to an +`as Anthropic.Messages.MessageCreateParams…` cast (mirrors `AnthropicHandler`). + +## 3. Tests (port from upstream, adapt anchors) + +Add the Fable-5 cases to: `anthropic.spec.ts`, `anthropic-vertex.spec.ts`, `bedrock.spec.ts`, +`requesty.spec.ts`, `vercel-ai-gateway.spec.ts`, `fetchers/__tests__/openrouter.spec.ts`, +`fetchers/__tests__/vercel-ai-gateway.spec.ts`, `transform/__tests__/model-params.spec.ts`, +`shared/__tests__/api.spec.ts`, and create new `fetchers/__tests__/requesty.spec.ts`. + +## 4. Out of scope +- No "Zoo Code" branding. No TTS/router/cloud. Internal id `claude-fable-5` stays. + +## 5. Verify +- `pnpm --filter @roo-code/types check-types` clean. +- `cd src && npx vitest run api/providers/__tests__/anthropic.spec.ts api/providers/__tests__/bedrock.spec.ts api/providers/__tests__/anthropic-vertex.spec.ts api/providers/__tests__/requesty.spec.ts api/providers/__tests__/vercel-ai-gateway.spec.ts api/providers/fetchers/__tests__/openrouter.spec.ts api/providers/fetchers/__tests__/requesty.spec.ts api/providers/fetchers/__tests__/vercel-ai-gateway.spec.ts api/transform/__tests__/model-params.spec.ts shared/__tests__/api.spec.ts` all green. + +## 6. Acceptance +- [ ] All new Fable-5 tests pass; touched suites green. +- [ ] No "Roo"/"Zoo" user-facing strings introduced. + +## 7. Record +```bash +node .claude/skills/zoo-port/scripts/zoo-prs.mjs record --pr 555 --status ported \ + --branch feature/zoo-555-add-fable-5-support-across-anthropic-providers \ + --plan ai_plans/2026-06-17_zoo-555-add-fable-5-support-across-anthropic-providers.md +``` diff --git a/packages/types/src/providers/anthropic.ts b/packages/types/src/providers/anthropic.ts index f123817e43..13a8a2abc7 100644 --- a/packages/types/src/providers/anthropic.ts +++ b/packages/types/src/providers/anthropic.ts @@ -126,6 +126,23 @@ export const anthropicModels = { supportsReasoningBinary: true, supportsTemperature: false, }, + "claude-fable-5": { + maxTokens: 128_000, // Overridden to 8k if `enableReasoningEffort` is false. + contextWindow: 1_000_000, + supportsImages: true, + supportsPromptCache: true, + inputPrice: 10.0, // $10 per million input tokens + outputPrice: 50.0, // $50 per million output tokens + cacheWritesPrice: 12.5, // $12.50 per million tokens + cacheReadsPrice: 1.0, // $1.00 per million tokens + // Fable 5 uses the same adaptive-thinking / binary-toggle convention as + // Opus 4.7+ on the direct Anthropic provider path. + supportsReasoningBudget: true, + supportsReasoningBinary: true, + supportsTemperature: false, + description: + "Claude Fable 5 is Anthropic's most capable widely released model for the most demanding reasoning and long-horizon agentic work.", + }, "claude-opus-4-5-20251101": { maxTokens: 32_000, // Overridden to 8k if `enableReasoningEffort` is false. contextWindow: 200_000, diff --git a/packages/types/src/providers/bedrock.ts b/packages/types/src/providers/bedrock.ts index b3e1d3d74e..eb03f0c686 100644 --- a/packages/types/src/providers/bedrock.ts +++ b/packages/types/src/providers/bedrock.ts @@ -217,6 +217,24 @@ export const bedrockModels = { }, ], }, + "anthropic.claude-fable-5": { + maxTokens: 8192, + contextWindow: 1_000_000, + supportsImages: true, + supportsPromptCache: true, + supportsReasoningBudget: true, + supportsReasoningBinary: true, + supportsTemperature: false, + inputPrice: 10.0, + outputPrice: 50.0, + cacheWritesPrice: 12.5, + cacheReadsPrice: 1.0, + minTokensPerCachePoint: 1024, + maxCachePoints: 4, + cachableFields: ["system", "messages", "tools"], + description: + "Claude Fable 5 is Anthropic's most capable widely released model for the most demanding reasoning and long-horizon agentic work.", + }, "anthropic.claude-opus-4-5-20251101-v1:0": { maxTokens: 8192, contextWindow: 200_000, @@ -588,6 +606,7 @@ export const BEDROCK_1M_CONTEXT_MODEL_IDS = [ // - Claude Opus 4.5 // - Claude Opus 4.6 // - Claude Opus 4.7 +// - Claude Fable 5 (cross-region inference only — can only be used through an inference profile) export const BEDROCK_GLOBAL_INFERENCE_MODEL_IDS = [ "anthropic.claude-sonnet-4-20250514-v1:0", "anthropic.claude-sonnet-4-5-20250929-v1:0", @@ -597,6 +616,7 @@ export const BEDROCK_GLOBAL_INFERENCE_MODEL_IDS = [ "anthropic.claude-opus-4-6-v1", "anthropic.claude-opus-4-7", "anthropic.claude-opus-4-8", + "anthropic.claude-fable-5", ] as const // Amazon Bedrock Service Tier types diff --git a/packages/types/src/providers/openrouter.ts b/packages/types/src/providers/openrouter.ts index 834f40528e..9898ace232 100644 --- a/packages/types/src/providers/openrouter.ts +++ b/packages/types/src/providers/openrouter.ts @@ -43,6 +43,7 @@ export const OPEN_ROUTER_PROMPT_CACHING_MODELS = new Set([ "anthropic/claude-opus-4.1", "anthropic/claude-opus-4.5", "anthropic/claude-opus-4.6", + "anthropic/claude-fable-5", "anthropic/claude-haiku-4.5", "google/gemini-2.5-flash-preview", "google/gemini-2.5-flash-preview:thinking", @@ -74,6 +75,7 @@ export const OPEN_ROUTER_REASONING_BUDGET_MODELS = new Set([ "anthropic/claude-opus-4.1", "anthropic/claude-opus-4.5", "anthropic/claude-opus-4.6", + "anthropic/claude-fable-5", "anthropic/claude-sonnet-4", "anthropic/claude-sonnet-4.5", "anthropic/claude-sonnet-4.6", diff --git a/packages/types/src/providers/vercel-ai-gateway.ts b/packages/types/src/providers/vercel-ai-gateway.ts index ac633747ba..d23cd33cf1 100644 --- a/packages/types/src/providers/vercel-ai-gateway.ts +++ b/packages/types/src/providers/vercel-ai-gateway.ts @@ -13,6 +13,7 @@ export const VERCEL_AI_GATEWAY_PROMPT_CACHING_MODELS = new Set([ "anthropic/claude-opus-4.1", "anthropic/claude-opus-4.5", "anthropic/claude-opus-4.6", + "anthropic/claude-fable-5", "anthropic/claude-sonnet-4", "anthropic/claude-sonnet-4.6", "openai/gpt-4.1", @@ -55,6 +56,7 @@ export const VERCEL_AI_GATEWAY_VISION_AND_TOOLS_MODELS = new Set([ "anthropic/claude-opus-4.1", "anthropic/claude-opus-4.5", "anthropic/claude-opus-4.6", + "anthropic/claude-fable-5", "anthropic/claude-sonnet-4", "anthropic/claude-sonnet-4.6", "google/gemini-1.5-flash", diff --git a/packages/types/src/providers/vertex.ts b/packages/types/src/providers/vertex.ts index 940d195245..5646b9f22d 100644 --- a/packages/types/src/providers/vertex.ts +++ b/packages/types/src/providers/vertex.ts @@ -434,6 +434,21 @@ export const vertexModels = { }, ], }, + "claude-fable-5": { + maxTokens: 8192, + contextWindow: 1_000_000, + supportsImages: true, + supportsPromptCache: true, + inputPrice: 10.0, + outputPrice: 50.0, + cacheWritesPrice: 12.5, + cacheReadsPrice: 1.0, + supportsReasoningBudget: true, + supportsReasoningBinary: true, + supportsTemperature: false, + description: + "Claude Fable 5 is Anthropic's most capable widely released model for the most demanding reasoning and long-horizon agentic work.", + }, "claude-opus-4-5@20251101": { maxTokens: 8192, contextWindow: 200_000, diff --git a/src/api/providers/__tests__/anthropic-vertex.spec.ts b/src/api/providers/__tests__/anthropic-vertex.spec.ts index 381d921c58..2b64decdbc 100644 --- a/src/api/providers/__tests__/anthropic-vertex.spec.ts +++ b/src/api/providers/__tests__/anthropic-vertex.spec.ts @@ -945,6 +945,23 @@ describe("VertexHandler", () => { expect(model.betas).toContain("context-1m-2025-08-07") }) + it("should return Claude Fable 5 model info", () => { + const handler = new AnthropicVertexHandler({ + apiModelId: "claude-fable-5", + vertexProjectId: "test-project", + vertexRegion: "us-central1", + }) + + const model = handler.getModel() + expect(model.id).toBe("claude-fable-5") + expect(model.info.maxTokens).toBe(8192) + expect(model.info.contextWindow).toBe(1_000_000) + expect(model.info.supportsReasoningBinary).toBe(true) + expect(model.info.supportsReasoningBudget).toBe(true) + expect(model.info.supportsPromptCache).toBe(true) + expect(model.info.supportsTemperature).toBe(false) + }) + it("should not enable 1M context when flag is disabled", () => { const handler = new AnthropicVertexHandler({ apiModelId: VERTEX_1M_CONTEXT_MODEL_IDS[0], @@ -1161,6 +1178,35 @@ describe("VertexHandler", () => { undefined, ) }) + + it("should use adaptive thinking for Claude Fable 5", async () => { + const fableHandler = new AnthropicVertexHandler({ + apiModelId: "claude-fable-5", + vertexProjectId: "test-project", + vertexRegion: "us-central1", + enableReasoningEffort: true, + }) + + const mockCreate = vitest.fn().mockImplementation(async () => ({ + async *[Symbol.asyncIterator]() { + yield { type: "message_start", message: { usage: { input_tokens: 10, output_tokens: 5 } } } + }, + })) + ;(fableHandler["client"].messages as any).create = mockCreate + + await fableHandler.createMessage("You are a helpful assistant", [{ role: "user", content: "Hello" }]).next() + + expect(mockCreate).toHaveBeenCalledWith( + expect.objectContaining({ + thinking: { type: "adaptive" }, + }), + undefined, + ) + + const request = mockCreate.mock.calls[0][0] + expect(request.thinking).not.toHaveProperty("budget_tokens") + expect(request.temperature).toBeUndefined() + }) }) describe("native tool calling", () => { diff --git a/src/api/providers/__tests__/anthropic.spec.ts b/src/api/providers/__tests__/anthropic.spec.ts index 29dee406d2..48dac6b6b9 100644 --- a/src/api/providers/__tests__/anthropic.spec.ts +++ b/src/api/providers/__tests__/anthropic.spec.ts @@ -305,6 +305,33 @@ describe("AnthropicHandler", () => { expect(requestBody?.max_tokens).toBe(32768) }) + it("should use adaptive thinking for Claude Fable 5 when reasoning is enabled", async () => { + const fableHandler = new AnthropicHandler({ + apiKey: "test-api-key", + apiModelId: "claude-fable-5", + enableReasoningEffort: true, + modelMaxTokens: 32768, + }) + + const stream = fableHandler.createMessage(systemPrompt, [ + { + role: "user", + content: [{ type: "text" as const, text: "Hello" }], + }, + ]) + + for await (const _chunk of stream) { + // Consume stream + } + + const requestBody = mockCreate.mock.calls[mockCreate.mock.calls.length - 1]?.[0] + const requestOptions = mockCreate.mock.calls[mockCreate.mock.calls.length - 1]?.[1] + expect(requestBody?.thinking).toEqual({ type: "adaptive" }) + expect(requestBody?.temperature).toBeUndefined() + expect(requestBody?.max_tokens).toBe(32768) + expect(requestOptions?.headers?.["anthropic-beta"]).toContain("prompt-caching-2024-07-31") + }) + it("should not require the 1M context beta header for Claude Opus 4.8", async () => { const opus48Handler = new AnthropicHandler({ apiKey: "test-api-key", @@ -473,6 +500,23 @@ describe("AnthropicHandler", () => { expect(model.reasoningBudget).toBeUndefined() }) + it("should handle Claude Fable 5 model correctly", () => { + const handler = new AnthropicHandler({ + apiKey: "test-api-key", + apiModelId: "claude-fable-5", + }) + const model = handler.getModel() + expect(model.id).toBe("claude-fable-5") + expect(model.info.maxTokens).toBe(128000) + expect(model.info.contextWindow).toBe(1000000) + expect(model.maxTokens).toBe(8192) + expect(model.info.supportsReasoningBinary).toBe(true) + expect(model.info.supportsReasoningBudget).toBe(true) + expect(model.info.supportsPromptCache).toBe(true) + expect(model.info.supportsTemperature).toBe(false) + expect(model.reasoningBudget).toBeUndefined() + }) + it("should enable 1M context for Claude 4.5 Sonnet when beta flag is set", () => { const handler = new AnthropicHandler({ apiKey: "test-api-key", diff --git a/src/api/providers/__tests__/bedrock.spec.ts b/src/api/providers/__tests__/bedrock.spec.ts index ce6839d5fc..b731908916 100644 --- a/src/api/providers/__tests__/bedrock.spec.ts +++ b/src/api/providers/__tests__/bedrock.spec.ts @@ -683,6 +683,37 @@ describe("AwsBedrockHandler", () => { expect(typeof model.info.supportsImages).toBe("boolean") expect(typeof model.info.supportsPromptCache).toBe("boolean") }) + + it("should return Claude Fable 5 model info", () => { + const handler = new AwsBedrockHandler({ + apiModelId: "anthropic.claude-fable-5", + awsAccessKey: "test", + awsSecretKey: "test", + awsRegion: "us-east-1", + }) + + const model = handler.getModel() + expect(model.id).toBe("anthropic.claude-fable-5") + expect(model.info.contextWindow).toBe(1_000_000) + expect(model.info.supportsReasoningBinary).toBe(true) + expect(model.info.supportsReasoningBudget).toBe(true) + expect(model.info.supportsPromptCache).toBe(true) + expect(model.info.supportsTemperature).toBe(false) + expect(model.maxTokens).toBe(8192) + }) + + it("should apply global inference prefix for Claude Fable 5 when awsUseGlobalInference is true", () => { + const handler = new AwsBedrockHandler({ + apiModelId: "anthropic.claude-fable-5", + awsAccessKey: "test", + awsSecretKey: "test", + awsRegion: "us-east-1", + awsUseGlobalInference: true, + }) + + const model = handler.getModel() + expect(model.id).toBe("global.anthropic.claude-fable-5") + }) }) describe("1M context beta feature", () => { @@ -1516,6 +1547,7 @@ describe("AwsBedrockHandler", () => { it("returns true for all adaptive-thinking model patterns (opus/sonnet 4.7 and 4.8)", () => { expect(isAdaptiveThinkingModel("anthropic.claude-opus-4-7")).toBe(true) expect(isAdaptiveThinkingModel("anthropic.claude-opus-4-8")).toBe(true) + expect(isAdaptiveThinkingModel("anthropic.claude-fable-5")).toBe(true) // Future-proof Sonnet patterns — guarded even before a registry entry exists. expect(isAdaptiveThinkingModel("anthropic.claude-sonnet-4-7")).toBe(true) expect(isAdaptiveThinkingModel("anthropic.claude-sonnet-4-8")).toBe(true) @@ -1523,6 +1555,7 @@ describe("AwsBedrockHandler", () => { it("returns true when the id carries a cross-region or global prefix", () => { expect(isAdaptiveThinkingModel("us.anthropic.claude-opus-4-8")).toBe(true) + expect(isAdaptiveThinkingModel("global.anthropic.claude-fable-5")).toBe(true) expect(isAdaptiveThinkingModel("eu.anthropic.claude-sonnet-4-7")).toBe(true) expect(isAdaptiveThinkingModel("global.anthropic.claude-opus-4-8")).toBe(true) }) diff --git a/src/api/providers/__tests__/requesty.spec.ts b/src/api/providers/__tests__/requesty.spec.ts index ea6a36b4b4..4857ecc4d7 100644 --- a/src/api/providers/__tests__/requesty.spec.ts +++ b/src/api/providers/__tests__/requesty.spec.ts @@ -38,6 +38,20 @@ vitest.mock("../fetchers/modelCache", () => ({ cacheReadsPrice: 0.3, description: "Claude 4 Sonnet", }, + "anthropic/claude-fable-5": { + maxTokens: 128000, + contextWindow: 1000000, + supportsImages: true, + supportsPromptCache: true, + supportsReasoningBudget: true, + supportsReasoningBinary: true, + supportsTemperature: false, + inputPrice: 10, + outputPrice: 50, + cacheWritesPrice: 12.5, + cacheReadsPrice: 1, + description: "Claude Fable 5", + }, }) }), })) @@ -193,6 +207,39 @@ describe("RequestyHandler", () => { ) }) + it("uses adaptive thinking for Claude Fable 5 when reasoning is enabled", async () => { + const handler = new RequestyHandler({ + requestyApiKey: "test-key", + requestyModelId: "anthropic/claude-fable-5", + enableReasoningEffort: true, + modelMaxTokens: 32768, + }) + + const mockStream = { + async *[Symbol.asyncIterator]() { + yield { + id: "test-id", + choices: [{ delta: {} }], + usage: { prompt_tokens: 10, completion_tokens: 20 }, + } + }, + } + + mockCreate.mockResolvedValue(mockStream) + + const generator = handler.createMessage("test system prompt", [{ role: "user" as const, content: "test" }]) + await generator.next() + + expect(mockCreate).toHaveBeenCalledWith( + expect.objectContaining({ + model: "anthropic/claude-fable-5", + max_tokens: 32768, + thinking: { type: "adaptive" }, + temperature: undefined, + }), + ) + }) + it("handles API errors", async () => { const handler = new RequestyHandler(mockOptions) const mockError = new Error("API Error") @@ -367,6 +414,23 @@ describe("RequestyHandler", () => { }) }) + it("omits temperature for Claude Fable 5 in completePrompt", async () => { + const handler = new RequestyHandler({ + requestyApiKey: "test-key", + requestyModelId: "anthropic/claude-fable-5", + }) + mockCreate.mockResolvedValue({ choices: [{ message: { content: "test completion" } }] }) + + await handler.completePrompt("test prompt") + + expect(mockCreate).toHaveBeenCalledWith({ + model: "anthropic/claude-fable-5", + max_tokens: 8192, + messages: [{ role: "system", content: "test prompt" }], + temperature: undefined, + }) + }) + it("handles API errors", async () => { const handler = new RequestyHandler(mockOptions) const mockError = new Error("API Error") diff --git a/src/api/providers/__tests__/vercel-ai-gateway.spec.ts b/src/api/providers/__tests__/vercel-ai-gateway.spec.ts index 9ff804e0c4..2cb6bb8e2e 100644 --- a/src/api/providers/__tests__/vercel-ai-gateway.spec.ts +++ b/src/api/providers/__tests__/vercel-ai-gateway.spec.ts @@ -27,6 +27,18 @@ vitest.mock("../fetchers/modelCache", () => ({ cacheReadsPrice: 0.3, description: "Claude Sonnet 4", }, + "anthropic/claude-fable-5": { + maxTokens: 128000, + contextWindow: 1000000, + supportsImages: true, + supportsPromptCache: true, + supportsTemperature: false, + inputPrice: 10, + outputPrice: 50, + cacheWritesPrice: 12.5, + cacheReadsPrice: 1, + description: "Claude Fable 5", + }, "anthropic/claude-3.5-haiku": { maxTokens: 32000, contextWindow: 200000, @@ -224,6 +236,23 @@ describe("VercelAiGatewayHandler", () => { ) }) + it("omits temperature for Claude Fable 5", async () => { + const handler = new VercelAiGatewayHandler({ + ...mockOptions, + vercelAiGatewayModelId: "anthropic/claude-fable-5", + }) + + await handler.createMessage("You are a helpful assistant.", [{ role: "user", content: "Hello" }]).next() + + expect(mockCreate).toHaveBeenCalledWith( + expect.objectContaining({ + model: "anthropic/claude-fable-5", + temperature: undefined, + max_completion_tokens: 128000, + }), + ) + }) + it("adds cache breakpoints for supported models", async () => { const { addCacheBreakpoints } = await import("../../transform/caching/vercel-ai-gateway") const handler = new VercelAiGatewayHandler({ diff --git a/src/api/providers/anthropic-vertex.ts b/src/api/providers/anthropic-vertex.ts index b6b94fcde7..306b26191f 100644 --- a/src/api/providers/anthropic-vertex.ts +++ b/src/api/providers/anthropic-vertex.ts @@ -16,6 +16,7 @@ import { ApiHandlerOptions } from "../../shared/api" import { ApiStream } from "../transform/stream" import { addCacheBreakpoints } from "../transform/caching/vertex" import { getModelParams } from "../transform/model-params" +import { getAnthropicProviderReasoning } from "../transform/reasoning" import { filterNonAnthropicBlocks } from "../transform/anthropic-filter" import { convertOpenAIToolsToAnthropic, @@ -95,7 +96,7 @@ export class AnthropicVertexHandler extends BaseProvider implements SingleComple * This ensures we stay under the 4-block limit while maintaining effective caching * for the most relevant context. */ - const params: Anthropic.Messages.MessageCreateParamsStreaming = { + const params = { model: id, max_tokens: maxTokens ?? ANTHROPIC_DEFAULT_MAX_TOKENS, temperature, @@ -107,7 +108,7 @@ export class AnthropicVertexHandler extends BaseProvider implements SingleComple messages: supportsPromptCache ? addCacheBreakpoints(sanitizedMessages) : sanitizedMessages, stream: true, ...nativeToolParams, - } + } as Anthropic.Messages.MessageCreateParamsStreaming // and prompt caching const requestOptions = betas?.length ? { headers: { "anthropic-beta": betas.join(",") } } : undefined @@ -241,6 +242,17 @@ export class AnthropicVertexHandler extends BaseProvider implements SingleComple defaultTemperature: 0, }) + // Adaptive-binary reasoning models (e.g. Opus 4.7/4.8, Fable 5) must send + // `thinking: { type: "adaptive" }` rather than the budget-based config that + // `getModelParams` produces for hybrid models. Recompute here so Vertex + // matches the direct Anthropic provider. + const thinking = getAnthropicProviderReasoning({ + model: info, + reasoningBudget: params.reasoningBudget, + reasoningEffort: params.reasoningEffort, + settings: this.options, + }) + // Build betas array for request headers const betas: string[] = [] @@ -258,6 +270,7 @@ export class AnthropicVertexHandler extends BaseProvider implements SingleComple info, betas: betas.length > 0 ? betas : undefined, ...params, + reasoning: thinking, } } @@ -271,7 +284,7 @@ export class AnthropicVertexHandler extends BaseProvider implements SingleComple reasoning: thinking, } = this.getModel() - const params: Anthropic.Messages.MessageCreateParamsNonStreaming = { + const params = { model: id, max_tokens: maxTokens, temperature, @@ -285,7 +298,7 @@ export class AnthropicVertexHandler extends BaseProvider implements SingleComple }, ], stream: false, - } + } as Anthropic.Messages.MessageCreateParamsNonStreaming const response = await this.client.messages.create(params) const content = response.content[0] diff --git a/src/api/providers/anthropic.ts b/src/api/providers/anthropic.ts index dc90ad054a..03d14c168b 100644 --- a/src/api/providers/anthropic.ts +++ b/src/api/providers/anthropic.ts @@ -95,6 +95,7 @@ export class AnthropicHandler extends BaseProvider implements SingleCompletionHa case "claude-opus-4-6": case "claude-opus-4-7": case "claude-opus-4-8": + case "claude-fable-5": case "claude-opus-4-5-20251101": case "claude-opus-4-1-20250805": case "claude-opus-4-20250514": @@ -164,6 +165,7 @@ export class AnthropicHandler extends BaseProvider implements SingleCompletionHa case "claude-opus-4-6": case "claude-opus-4-7": case "claude-opus-4-8": + case "claude-fable-5": case "claude-opus-4-5-20251101": case "claude-opus-4-1-20250805": case "claude-opus-4-20250514": diff --git a/src/api/providers/bedrock.ts b/src/api/providers/bedrock.ts index e3084525f5..ea7d76ce2a 100644 --- a/src/api/providers/bedrock.ts +++ b/src/api/providers/bedrock.ts @@ -316,6 +316,7 @@ export class AwsBedrockHandler extends BaseProvider implements SingleCompletionH return ( baseModelId.includes("opus-4-7") || baseModelId.includes("opus-4-8") || + baseModelId.includes("fable-5") || baseModelId.includes("sonnet-4-7") || baseModelId.includes("sonnet-4-8") ) diff --git a/src/api/providers/fetchers/__tests__/openrouter.spec.ts b/src/api/providers/fetchers/__tests__/openrouter.spec.ts index bca54b3078..0616a0b69b 100644 --- a/src/api/providers/fetchers/__tests__/openrouter.spec.ts +++ b/src/api/providers/fetchers/__tests__/openrouter.spec.ts @@ -290,6 +290,34 @@ describe("OpenRouter API", () => { expect(result.contextWindow).toBe(200000) }) + it("sets claude-fable-5 model to Anthropic max tokens and omits temperature", () => { + const mockModel = { + name: "Claude Fable 5", + description: "Test model", + context_length: 1000000, + max_completion_tokens: 128000, + pricing: { + prompt: "0.00001", + completion: "0.00005", + }, + } + + const result = parseOpenRouterModel({ + id: "anthropic/claude-fable-5", + model: mockModel, + inputModality: ["text", "image"], + outputModality: ["text"], + maxTokens: 128000, + supportedParameters: ["reasoning", "include_reasoning"], + }) + + expect(result.maxTokens).toBe(128000) + expect(result.contextWindow).toBe(1000000) + expect(result.supportsTemperature).toBe(false) + expect(result.supportsReasoningBudget).toBe(true) + expect(result.supportsReasoningBinary).toBe(true) + }) + it("sets horizon-alpha model to 32k max tokens", () => { const mockModel = { name: "Horizon Alpha", diff --git a/src/api/providers/fetchers/__tests__/requesty.spec.ts b/src/api/providers/fetchers/__tests__/requesty.spec.ts new file mode 100644 index 0000000000..9fccb1a3fa --- /dev/null +++ b/src/api/providers/fetchers/__tests__/requesty.spec.ts @@ -0,0 +1,67 @@ +// npx vitest run api/providers/fetchers/__tests__/requesty.spec.ts + +import axios from "axios" + +import { getRequestyModels } from "../requesty" + +vi.mock("axios") +const mockAxiosGet = vi.mocked(axios.get) + +function makeRawModel(overrides: Record) { + return { + id: "some/model", + max_output_tokens: 8192, + context_window: 200000, + supports_caching: false, + supports_vision: false, + supports_reasoning: false, + input_price: "0.000003", + output_price: "0.000015", + description: "Test model", + caching_price: null, + cached_price: null, + ...overrides, + } +} + +describe("getRequestyModels", () => { + it("applies Fable 5 overrides when parsing anthropic/claude-fable-5", async () => { + const rawFable5 = makeRawModel({ + id: "anthropic/claude-fable-5", + max_output_tokens: 128000, + context_window: 1000000, + supports_caching: true, + supports_vision: true, + supports_reasoning: true, + input_price: "0.00001", + output_price: "0.00005", + caching_price: "0.0000125", + cached_price: "0.000001", + }) + + mockAxiosGet.mockResolvedValueOnce({ data: { data: [rawFable5] } }) + + const models = await getRequestyModels() + const fable5 = models["anthropic/claude-fable-5"] + + expect(fable5).toBeDefined() + expect(fable5.supportsReasoningBudget).toBe(true) + expect(fable5.supportsReasoningBinary).toBe(true) + expect(fable5.supportsTemperature).toBe(false) + }) + + it("does not apply Fable 5 overrides to other models", async () => { + const rawSonnet = makeRawModel({ + id: "anthropic/claude-sonnet-4.6", + supports_reasoning: true, + }) + + mockAxiosGet.mockResolvedValueOnce({ data: { data: [rawSonnet] } }) + + const models = await getRequestyModels() + const sonnet = models["anthropic/claude-sonnet-4.6"] + + expect(sonnet.supportsReasoningBinary).toBeUndefined() + expect(sonnet.supportsTemperature).toBeUndefined() + }) +}) diff --git a/src/api/providers/fetchers/__tests__/vercel-ai-gateway.spec.ts b/src/api/providers/fetchers/__tests__/vercel-ai-gateway.spec.ts index 3a4a234de9..122b46dc64 100644 --- a/src/api/providers/fetchers/__tests__/vercel-ai-gateway.spec.ts +++ b/src/api/providers/fetchers/__tests__/vercel-ai-gateway.spec.ts @@ -205,6 +205,22 @@ describe("Vercel AI Gateway Fetchers", () => { }) }) + it("marks Claude Fable 5 as not supporting temperature", () => { + const result = parseVercelAiGatewayModel({ + id: "anthropic/claude-fable-5", + model: { + ...baseModel, + id: "anthropic/claude-fable-5", + context_window: 1000000, + max_tokens: 128000, + }, + }) + + expect(result.maxTokens).toBe(128000) + expect(result.contextWindow).toBe(1000000) + expect(result.supportsTemperature).toBe(false) + }) + it("detects vision-only models", () => { // claude 3.5 haiku in VERCEL_AI_GATEWAY_VISION_ONLY_MODELS const visionModel = { diff --git a/src/api/providers/fetchers/openrouter.ts b/src/api/providers/fetchers/openrouter.ts index 0cf65fb09c..7754af3192 100644 --- a/src/api/providers/fetchers/openrouter.ts +++ b/src/api/providers/fetchers/openrouter.ts @@ -263,6 +263,13 @@ export const parseOpenRouterModel = ({ modelInfo.maxTokens = anthropicModels["claude-opus-4-6"].maxTokens } + // Set claude-fable-5 model to use the correct Anthropic configuration + if (id === "anthropic/claude-fable-5") { + modelInfo.maxTokens = anthropicModels["claude-fable-5"].maxTokens + modelInfo.supportsReasoningBinary = true + modelInfo.supportsTemperature = false + } + // Ensure correct reasoning handling for Claude Haiku 4.5 on OpenRouter // Use budget control and disable effort-based reasoning fallback if (id === "anthropic/claude-haiku-4.5") { diff --git a/src/api/providers/fetchers/requesty.ts b/src/api/providers/fetchers/requesty.ts index 64c7de6689..aaf11e4a7b 100644 --- a/src/api/providers/fetchers/requesty.ts +++ b/src/api/providers/fetchers/requesty.ts @@ -45,6 +45,12 @@ export async function getRequestyModels(baseUrl?: string, apiKey?: string): Prom cacheReadsPrice: parseApiPrice(rawModel.cached_price), } + if (rawModel.id === "anthropic/claude-fable-5") { + modelInfo.supportsReasoningBudget = true + modelInfo.supportsReasoningBinary = true + modelInfo.supportsTemperature = false + } + models[rawModel.id] = modelInfo } } catch (error) { diff --git a/src/api/providers/fetchers/vercel-ai-gateway.ts b/src/api/providers/fetchers/vercel-ai-gateway.ts index a708d106f0..4aa0932b66 100644 --- a/src/api/providers/fetchers/vercel-ai-gateway.ts +++ b/src/api/providers/fetchers/vercel-ai-gateway.ts @@ -113,5 +113,9 @@ export const parseVercelAiGatewayModel = ({ id, model }: { id: string; model: Ve description: model.description, } + if (id === "anthropic/claude-fable-5") { + modelInfo.supportsTemperature = false + } + return modelInfo } diff --git a/src/api/providers/requesty.ts b/src/api/providers/requesty.ts index b241c347b0..a532e89b57 100644 --- a/src/api/providers/requesty.ts +++ b/src/api/providers/requesty.ts @@ -9,7 +9,7 @@ import { calculateApiCostOpenAI } from "../../shared/cost" import { convertToOpenAiMessages } from "../transform/openai-format" import { ApiStream, ApiStreamUsageChunk } from "../transform/stream" import { getModelParams } from "../transform/model-params" -import { AnthropicReasoningParams } from "../transform/reasoning" +import { AnthropicProviderReasoningParams, getAnthropicProviderReasoning } from "../transform/reasoning" import { DEFAULT_HEADERS } from "./constants" import { getModels } from "./fetchers/modelCache" @@ -36,7 +36,7 @@ type RequestyChatCompletionParamsStreaming = OpenAI.Chat.Completions.ChatComplet mode?: string } } - thinking?: AnthropicReasoningParams + thinking?: AnthropicProviderReasoningParams } type RequestyChatCompletionParams = OpenAI.Chat.ChatCompletionCreateParams & { @@ -46,7 +46,7 @@ type RequestyChatCompletionParams = OpenAI.Chat.ChatCompletionCreateParams & { mode?: string } } - thinking?: AnthropicReasoningParams + thinking?: AnthropicProviderReasoningParams } export class RequestyHandler extends BaseProvider implements SingleCompletionHandler { @@ -91,8 +91,14 @@ export class RequestyHandler extends BaseProvider implements SingleCompletionHan settings: this.options, defaultTemperature: 0, }) + const reasoning = getAnthropicProviderReasoning({ + model: info, + reasoningBudget: params.reasoningBudget, + reasoningEffort: params.reasoningEffort, + settings: this.options, + }) - return { id, info, ...params } + return { id, info, ...params, reasoning } } protected processUsageMetrics(usage: any, modelInfo?: ModelInfo): ApiStreamUsageChunk { diff --git a/src/api/providers/vercel-ai-gateway.ts b/src/api/providers/vercel-ai-gateway.ts index 51b0eb5f51..7a51f189a0 100644 --- a/src/api/providers/vercel-ai-gateway.ts +++ b/src/api/providers/vercel-ai-gateway.ts @@ -52,10 +52,12 @@ export class VercelAiGatewayHandler extends RouterProvider implements SingleComp addCacheBreakpoints(systemPrompt, openAiMessages) } + const supportsTemperature = info.supportsTemperature !== false && this.supportsTemperature(modelId) + const body: OpenAI.Chat.ChatCompletionCreateParams = { model: modelId, messages: openAiMessages, - temperature: this.supportsTemperature(modelId) + temperature: supportsTemperature ? (this.options.modelTemperature ?? VERCEL_AI_GATEWAY_DEFAULT_TEMPERATURE) : undefined, max_completion_tokens: info.maxTokens, @@ -114,7 +116,7 @@ export class VercelAiGatewayHandler extends RouterProvider implements SingleComp stream: false, } - if (this.supportsTemperature(modelId)) { + if (info.supportsTemperature !== false && this.supportsTemperature(modelId)) { requestOptions.temperature = this.options.modelTemperature ?? VERCEL_AI_GATEWAY_DEFAULT_TEMPERATURE } diff --git a/src/api/transform/__tests__/model-params.spec.ts b/src/api/transform/__tests__/model-params.spec.ts index 7622336e46..d66fb66b76 100644 --- a/src/api/transform/__tests__/model-params.spec.ts +++ b/src/api/transform/__tests__/model-params.spec.ts @@ -134,6 +134,17 @@ describe("getModelParams", () => { expect(result.temperature).toBeUndefined() }) + it("should omit temperature for openrouter models that do not support it", () => { + const result = getModelParams({ + ...openrouterParams, + settings: { modelTemperature: 0.7 }, + model: { ...baseModel, supportsTemperature: false }, + defaultTemperature: 0.5, + }) + + expect(result.temperature).toBeUndefined() + }) + it("should use model maxTokens when available", () => { const model: ModelInfo = { ...baseModel, diff --git a/src/api/transform/model-params.ts b/src/api/transform/model-params.ts index 652132a73e..cb0b5bfd50 100644 --- a/src/api/transform/model-params.ts +++ b/src/api/transform/model-params.ts @@ -177,11 +177,13 @@ export function getModelParams({ reasoning: getGeminiReasoning({ model, reasoningBudget, reasoningEffort, settings }), } } else { + if (model.supportsTemperature === false) { + params.temperature = undefined + } + // Special case for o1-pro, which doesn't support temperature. // Note that OpenRouter's `supported_parameters` field includes // `temperature`, which is probably a bug. - // TODO: Add a `supportsTemperature` field to the model info and populate - // it appropriately in the OpenRouter fetcher. if (modelId === "openai/o1-pro") { params.temperature = undefined } diff --git a/src/shared/__tests__/api.spec.ts b/src/shared/__tests__/api.spec.ts index f266782682..33e147711d 100644 --- a/src/shared/__tests__/api.spec.ts +++ b/src/shared/__tests__/api.spec.ts @@ -109,6 +109,33 @@ describe("getModelMaxOutputTokens", () => { ).toBe(32_768) }) + test("should preserve Anthropic hybrid token handling for Claude Fable 5", () => { + const model: ModelInfo = { + contextWindow: 1_000_000, + supportsPromptCache: true, + supportsReasoningBudget: true, + supportsReasoningBinary: true, + supportsTemperature: false, + maxTokens: 128_000, + } + + expect( + getModelMaxOutputTokens({ + modelId: "claude-fable-5", + model, + settings: { apiProvider: "anthropic", enableReasoningEffort: false }, + }), + ).toBe(ANTHROPIC_DEFAULT_MAX_TOKENS) + + expect( + getModelMaxOutputTokens({ + modelId: "claude-fable-5", + model, + settings: { apiProvider: "anthropic", enableReasoningEffort: true, modelMaxTokens: 32_768 }, + }), + ).toBe(32_768) + }) + test("should return model.maxTokens for non-Anthropic models that support reasoning budget but aren't using it", () => { const geminiModelId = "gemini-2.5-flash-preview-04-17" const model: ModelInfo = { From 69dbbb2514033351416ad1d249d42d9c81cc5864 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Krzysztof=20Dre=C5=BCewski?= Date: Wed, 17 Jun 2026 23:59:04 +0200 Subject: [PATCH 2/4] refactor(providers): use extractReasoningFromDelta helper for reasoning streams (zoo #588) Replace the duplicated inline reasoning_content extraction block in the OpenAI-compatible streaming providers (deepseek, openai, qwen-code, requesty, unbound) with the shared extractReasoningFromDelta helper, which prefers delta.reasoning_content and falls back to delta.reasoning (OpenRouter style). Adds streaming coverage for both paths. Fork adaptations: the helper already existed here; mimo and opencode-go do not exist in this fork and are skipped; unbound has no spec file here so its new streaming tests are skipped (the helper has its own unit tests). Co-authored-by: dw <41457565+daewoongoh@users.noreply.github.com> Co-authored-by: Oh Daewoong --- ...588-extract-reasoning-from-delta-helper.md | 156 ++++++++++++++++++ src/api/providers/__tests__/deepseek.spec.ts | 69 ++++++++ src/api/providers/__tests__/openai.spec.ts | 71 ++++++++ .../__tests__/qwen-code-native-tools.spec.ts | 72 ++++++++ src/api/providers/__tests__/requesty.spec.ts | 78 +++++++++ src/api/providers/deepseek.ts | 9 +- src/api/providers/openai.ts | 9 +- src/api/providers/qwen-code.ts | 9 +- src/api/providers/requesty.ts | 6 +- src/api/providers/unbound.ts | 6 +- 10 files changed, 466 insertions(+), 19 deletions(-) create mode 100644 ai_plans/2026-06-17_zoo-588-extract-reasoning-from-delta-helper.md diff --git a/ai_plans/2026-06-17_zoo-588-extract-reasoning-from-delta-helper.md b/ai_plans/2026-06-17_zoo-588-extract-reasoning-from-delta-helper.md new file mode 100644 index 0000000000..3fcf08407d --- /dev/null +++ b/ai_plans/2026-06-17_zoo-588-extract-reasoning-from-delta-helper.md @@ -0,0 +1,156 @@ +# Port plan — Zoo PR #588 → `feature/zoo-588-extract-reasoning-from-delta-helper` + +> **For the executor (read first).** Do the steps in order. Do not improvise or +> refactor beyond what is written (YAGNI). Every code block is already adapted to +> this repo. This repo is **Tumble Code**: never introduce the strings "Roo" or +> "Zoo" in user-facing text or test names. + +--- + +## 0. Context + +- **Upstream:** Zoo PR #588 — "use extractReasoningFromDelta helper for reasoning + streams" (commit `6daa153ac`, merged 2026-06-15). +- **What it does:** Replaces the duplicated inline `reasoning_content` extraction + block in each OpenAI-compatible streaming provider with the shared + `extractReasoningFromDelta(delta)` helper. The helper prefers + `delta.reasoning_content` (DeepSeek-R1 / QwQ style) and **falls back to + `delta.reasoning`** (OpenRouter style) — so each provider gains the + router-style fallback for free. Adds streaming coverage for the new paths. +- **Why we want it:** removes copy-pasted logic, gives every provider the + `reasoning` fallback, and centralizes one well-tested helper. Low risk — + behavior-preserving for the existing `reasoning_content` path. + +- **Adaptations vs. the raw upstream diff (IMPORTANT):** + + 1. **Helper already exists here.** `src/api/providers/utils/extract-reasoning.ts` + is already present in our fork (and already used by + `base-openai-compatible-provider.ts` and `lite-llm.ts`). Its semantics match + Zoo's (`reasoning_content` first, then `reasoning`). **Do NOT recreate it.** + The port is purely: import it into each surviving provider and swap the + inline block. + 2. **Two of Zoo's seven providers do not exist in our fork.** `git ls-files` + shows no `src/api/providers/mimo.ts` and no `src/api/providers/opencode-go.ts`. + **Skip both** — only port the 5 that exist: deepseek, openai, qwen-code, + requesty, unbound. + 3. **`requesty.ts` already imports from `../transform/reasoning`** (touched by + the #555 port). Add the `extract-reasoning` import as a separate line next to + the other `./utils/` imports; do not disturb the reasoning import. + 4. **No `unbound.spec.ts` in our fork.** Zoo adds 3 streaming tests to it, but + our fork has no unbound test file and no OpenAI mock harness for one. + **Scope cut (YAGNI):** apply the `unbound.ts` production refactor, but do NOT + create a brand-new test file just for these 3 tests. The helper itself is + already covered by `utils/__tests__/extract-reasoning.spec.ts`, and the + refactor is behavior-preserving. Document the cut here so it isn't mistaken + for an omission. + 5. **Test name branding.** Zoo's unbound spec has an `it("identifies itself as +Zoo Code …")` anchor — irrelevant here since we skip that file. Do not + introduce "Zoo Code" anywhere. + +- **Original authors — credit them:** + + ```text + Co-authored-by: dw <41457565+daewoongoh@users.noreply.github.com> + Co-authored-by: Oh Daewoong + ``` + +## 1. Preconditions + +- [ ] Branch `feature/zoo-588-extract-reasoning-from-delta-helper` created off the + `feature/zoo-555-…` branch (stacked — files do not overlap with #555 except + `requesty.ts`, which #555 already touched, so stacking avoids a conflict). +- [ ] `src/api/providers/utils/extract-reasoning.ts` exports + `extractReasoningFromDelta` (verify; it already exists). +- [ ] `mimo.ts` / `opencode-go.ts` confirmed absent (`git ls-files`). + +## 2. Source edits + +Each surviving provider gets (a) one import line and (b) the inline block swapped +for the helper call. The replacement block is identical everywhere: + +```ts +const reasoningText = extractReasoningFromDelta(delta) +if (reasoningText) { + yield { type: "reasoning", text: reasoningText } +} +``` + +### Edit A — `src/api/providers/deepseek.ts` + +- Import after `import { OpenAiHandler } from "./openai"`: + `import { extractReasoningFromDelta } from "./utils/extract-reasoning"` +- Replace the `if ("reasoning_content" in delta …)` block (keep the two + `// Handle reasoning_content …` comment lines above it). + +### Edit B — `src/api/providers/openai.ts` + +- Import after `import { handleOpenAIError } from "./utils/openai-error-handler"`: + `import { extractReasoningFromDelta } from "./utils/extract-reasoning"` +- Replace the inline block immediately before + `yield* this.processToolCalls(delta, finishReason, activeToolCallIds)`. + +### Edit C — `src/api/providers/qwen-code.ts` + +- Import after `import { BaseProvider } from "./base-provider"`: + `import { extractReasoningFromDelta } from "./utils/extract-reasoning"` +- Replace the inline block before the + `// Handle tool calls in stream …` comment. + +### Edit D — `src/api/providers/requesty.ts` + +- Import after `import { applyRouterToolPreferences } from "./utils/router-tool-preferences"`: + `import { extractReasoningFromDelta } from "./utils/extract-reasoning"` +- Replace the `if (delta && "reasoning_content" in delta …)` block before the + `// Handle native tool calls` comment. + +### Edit E — `src/api/providers/unbound.ts` + +- Import after `import { applyRouterToolPreferences } from "./utils/router-tool-preferences"`: + `import { extractReasoningFromDelta } from "./utils/extract-reasoning"` +- Replace the `if (delta && "reasoning_content" in delta …)` block before the + `// Handle native tool calls` comment. + +## 3. Tests (port from upstream, adapt anchors) + +Add the 3 streaming tests (`reasoning_content`, `reasoning` fallback, preference +when both present) to the providers that **have** a test file, inside the +`createMessage` describe block: + +- `src/api/providers/__tests__/deepseek.spec.ts` — after the cache-tokens test, + before `describe("processUsageMetrics", …)`. +- `src/api/providers/__tests__/openai.spec.ts` — after the "Test response" text + test, before `it("should handle tool calls in streaming responses", …)`. +- `src/api/providers/__tests__/qwen-code-native-tools.spec.ts` — after the + `call_qwen_test` test, before + `it("should preserve thinking block handling alongside tool calls", …)`. +- `src/api/providers/__tests__/requesty.spec.ts` — after the streaming "API Error" + test, before `describe("native tool support", …)`. Each test constructs + `new RequestyHandler(mockOptions)` and uses the file-level `mockCreate`. + +**Skipped:** `unbound.spec.ts` (does not exist here — see §0 adaptation 4). + +## 4. Out of scope + +- No new `unbound.spec.ts`. No `mimo` / `opencode-go` (absent). No change to the + helper itself. No "Zoo"/"Roo" strings. + +## 5. Verify + +- `pnpm --filter tumble-code check-types` clean. +- `cd src && npx vitest run api/providers/__tests__/deepseek.spec.ts api/providers/__tests__/openai.spec.ts api/providers/__tests__/qwen-code-native-tools.spec.ts api/providers/__tests__/requesty.spec.ts` all green. +- `cd src && npx eslint` on the 5 providers + 4 specs clean. + +## 6. Acceptance + +- [ ] All 5 providers call `extractReasoningFromDelta`; no inline + `reasoning_content` block remains in them. +- [ ] New reasoning tests pass in the 4 spec files; touched suites green. +- [ ] No "Roo"/"Zoo" user-facing strings introduced. + +## 7. Record + +```bash +node .claude/skills/zoo-port/scripts/zoo-prs.mjs record --pr 588 --status ported \ + --branch feature/zoo-588-extract-reasoning-from-delta-helper \ + --plan ai_plans/2026-06-17_zoo-588-extract-reasoning-from-delta-helper.md +``` diff --git a/src/api/providers/__tests__/deepseek.spec.ts b/src/api/providers/__tests__/deepseek.spec.ts index 7d652c28ed..b8d5881671 100644 --- a/src/api/providers/__tests__/deepseek.spec.ts +++ b/src/api/providers/__tests__/deepseek.spec.ts @@ -385,6 +385,75 @@ describe("DeepSeekHandler", () => { expect(usageChunks[0].cacheWriteTokens).toBe(8) expect(usageChunks[0].cacheReadTokens).toBe(2) }) + + it("streams reasoning chunks from delta.reasoning_content", async () => { + mockCreate.mockImplementationOnce(async () => ({ + [Symbol.asyncIterator]: async function* () { + yield { choices: [{ delta: { reasoning_content: "thinking..." }, index: 0 }] } + yield { choices: [{ delta: { content: "answer" }, index: 0 }] } + yield { + choices: [{ delta: {}, index: 0 }], + usage: { prompt_tokens: 1, completion_tokens: 1, total_tokens: 2 }, + } + }, + })) + + const chunks: any[] = [] + for await (const chunk of handler.createMessage(systemPrompt, messages)) { + chunks.push(chunk) + } + + expect(chunks).toContainEqual({ type: "reasoning", text: "thinking..." }) + }) + + it("falls back to delta.reasoning when reasoning_content is absent", async () => { + mockCreate.mockImplementationOnce(async () => ({ + [Symbol.asyncIterator]: async function* () { + yield { choices: [{ delta: { reasoning: "router-style thought" }, index: 0 }] } + yield { + choices: [{ delta: {}, index: 0 }], + usage: { prompt_tokens: 1, completion_tokens: 1, total_tokens: 2 }, + } + }, + })) + + const chunks: any[] = [] + for await (const chunk of handler.createMessage(systemPrompt, messages)) { + chunks.push(chunk) + } + + expect(chunks).toContainEqual({ type: "reasoning", text: "router-style thought" }) + }) + + it("prefers delta.reasoning_content over delta.reasoning when both are present", async () => { + mockCreate.mockImplementationOnce(async () => ({ + [Symbol.asyncIterator]: async function* () { + yield { + choices: [ + { + delta: { + reasoning_content: "primary thought", + reasoning: "fallback thought", + }, + index: 0, + }, + ], + } + yield { + choices: [{ delta: {}, index: 0 }], + usage: { prompt_tokens: 1, completion_tokens: 1, total_tokens: 2 }, + } + }, + })) + + const chunks: any[] = [] + for await (const chunk of handler.createMessage(systemPrompt, messages)) { + chunks.push(chunk) + } + + const reasoningChunks = chunks.filter((chunk) => chunk.type === "reasoning") + expect(reasoningChunks).toEqual([{ type: "reasoning", text: "primary thought" }]) + }) }) describe("processUsageMetrics", () => { diff --git a/src/api/providers/__tests__/openai.spec.ts b/src/api/providers/__tests__/openai.spec.ts index 18f8b8c5d6..26604b8a9b 100644 --- a/src/api/providers/__tests__/openai.spec.ts +++ b/src/api/providers/__tests__/openai.spec.ts @@ -226,6 +226,77 @@ describe("OpenAiHandler", () => { expect(textChunks[0].text).toBe("Test response") }) + it("streams reasoning chunks from delta.reasoning_content", async () => { + mockCreate.mockImplementationOnce(async () => ({ + [Symbol.asyncIterator]: async function* () { + yield { choices: [{ delta: { reasoning_content: "thinking..." }, index: 0 }] } + yield { choices: [{ delta: { content: "answer" }, index: 0 }] } + yield { + choices: [{ delta: {}, index: 0 }], + usage: { prompt_tokens: 1, completion_tokens: 1, total_tokens: 2 }, + } + }, + })) + + const chunks: any[] = [] + for await (const chunk of handler.createMessage(systemPrompt, messages)) { + chunks.push(chunk) + } + + expect(chunks).toContainEqual({ type: "reasoning", text: "thinking..." }) + }) + + it("falls back to delta.reasoning when reasoning_content is absent", async () => { + mockCreate.mockImplementationOnce(async () => ({ + [Symbol.asyncIterator]: async function* () { + yield { choices: [{ delta: { reasoning: "router-style thought" }, index: 0 }] } + yield { + choices: [{ delta: {}, index: 0 }], + usage: { prompt_tokens: 1, completion_tokens: 1, total_tokens: 2 }, + } + }, + })) + + const chunks: any[] = [] + for await (const chunk of handler.createMessage(systemPrompt, messages)) { + chunks.push(chunk) + } + + expect(chunks).toContainEqual({ type: "reasoning", text: "router-style thought" }) + }) + + it("prefers delta.reasoning_content over delta.reasoning when both are present", async () => { + mockCreate.mockImplementationOnce(async () => ({ + [Symbol.asyncIterator]: async function* () { + yield { + choices: [ + { + delta: { + reasoning_content: "primary thought", + reasoning: "fallback thought", + }, + index: 0, + }, + ], + } + yield { + choices: [{ delta: {}, index: 0 }], + usage: { prompt_tokens: 1, completion_tokens: 1, total_tokens: 2 }, + } + }, + })) + + const chunks: any[] = [] + + for await (const chunk of handler.createMessage(systemPrompt, messages)) { + chunks.push(chunk) + } + + const reasoningChunks = chunks.filter((chunk) => chunk.type === "reasoning") + + expect(reasoningChunks).toEqual([{ type: "reasoning", text: "primary thought" }]) + }) + it("should handle tool calls in streaming responses", async () => { mockCreate.mockImplementation(async (options) => { return { diff --git a/src/api/providers/__tests__/qwen-code-native-tools.spec.ts b/src/api/providers/__tests__/qwen-code-native-tools.spec.ts index 3b470ce461..b3c9cb9828 100644 --- a/src/api/providers/__tests__/qwen-code-native-tools.spec.ts +++ b/src/api/providers/__tests__/qwen-code-native-tools.spec.ts @@ -300,6 +300,78 @@ describe("QwenCodeHandler Native Tools", () => { expect(endChunks[0].id).toBe("call_qwen_test") }) + it("streams reasoning chunks from delta.reasoning_content", async () => { + mockCreate.mockImplementationOnce(() => ({ + [Symbol.asyncIterator]: async function* () { + yield { choices: [{ delta: { reasoning_content: "thinking..." }, index: 0 }] } + yield { choices: [{ delta: { content: "answer" }, index: 0 }] } + yield { + choices: [{ delta: {}, index: 0 }], + usage: { prompt_tokens: 1, completion_tokens: 1, total_tokens: 2 }, + } + }, + })) + + const stream = handler.createMessage("test prompt", []) + const chunks: any[] = [] + for await (const chunk of stream) { + chunks.push(chunk) + } + + expect(chunks).toContainEqual({ type: "reasoning", text: "thinking..." }) + }) + + it("falls back to delta.reasoning when reasoning_content is absent", async () => { + mockCreate.mockImplementationOnce(() => ({ + [Symbol.asyncIterator]: async function* () { + yield { choices: [{ delta: { reasoning: "router-style thought" }, index: 0 }] } + yield { + choices: [{ delta: {}, index: 0 }], + usage: { prompt_tokens: 1, completion_tokens: 1, total_tokens: 2 }, + } + }, + })) + + const stream = handler.createMessage("test prompt", []) + const chunks: any[] = [] + for await (const chunk of stream) { + chunks.push(chunk) + } + + expect(chunks).toContainEqual({ type: "reasoning", text: "router-style thought" }) + }) + + it("prefers delta.reasoning_content over delta.reasoning when both are present", async () => { + mockCreate.mockImplementationOnce(() => ({ + [Symbol.asyncIterator]: async function* () { + yield { + choices: [ + { + delta: { + reasoning_content: "primary thought", + reasoning: "fallback thought", + }, + index: 0, + }, + ], + } + yield { + choices: [{ delta: {}, index: 0 }], + usage: { prompt_tokens: 1, completion_tokens: 1, total_tokens: 2 }, + } + }, + })) + + const stream = handler.createMessage("test prompt", []) + const chunks: any[] = [] + for await (const chunk of stream) { + chunks.push(chunk) + } + + const reasoningChunks = chunks.filter((chunk) => chunk.type === "reasoning") + expect(reasoningChunks).toEqual([{ type: "reasoning", text: "primary thought" }]) + }) + it("should preserve thinking block handling alongside tool calls", async () => { mockCreate.mockImplementationOnce(() => ({ [Symbol.asyncIterator]: async function* () { diff --git a/src/api/providers/__tests__/requesty.spec.ts b/src/api/providers/__tests__/requesty.spec.ts index 4857ecc4d7..88804553bd 100644 --- a/src/api/providers/__tests__/requesty.spec.ts +++ b/src/api/providers/__tests__/requesty.spec.ts @@ -249,6 +249,84 @@ describe("RequestyHandler", () => { await expect(generator.next()).rejects.toThrow("API Error") }) + it("streams reasoning chunks from delta.reasoning_content", async () => { + const handler = new RequestyHandler(mockOptions) + mockCreate.mockResolvedValue({ + async *[Symbol.asyncIterator]() { + yield { id: "1", choices: [{ delta: { reasoning_content: "thinking..." } }] } + yield { id: "1", choices: [{ delta: { content: "answer" } }] } + yield { + id: "1", + choices: [{ delta: {} }], + usage: { prompt_tokens: 1, completion_tokens: 1 }, + } + }, + }) + + const chunks: any[] = [] + for await (const chunk of handler.createMessage("sys", [{ role: "user", content: "hi" }])) { + chunks.push(chunk) + } + + expect(chunks).toContainEqual({ type: "reasoning", text: "thinking..." }) + }) + + it("falls back to delta.reasoning when reasoning_content is absent", async () => { + const handler = new RequestyHandler(mockOptions) + mockCreate.mockResolvedValue({ + async *[Symbol.asyncIterator]() { + yield { id: "1", choices: [{ delta: { reasoning: "router-style thought" } }] } + yield { + id: "1", + choices: [{ delta: {} }], + usage: { prompt_tokens: 1, completion_tokens: 1 }, + } + }, + }) + + const chunks: any[] = [] + for await (const chunk of handler.createMessage("sys", [{ role: "user", content: "hi" }])) { + chunks.push(chunk) + } + + expect(chunks).toContainEqual({ type: "reasoning", text: "router-style thought" }) + }) + + it("prefers delta.reasoning_content over delta.reasoning when both are present", async () => { + const handler = new RequestyHandler(mockOptions) + + mockCreate.mockResolvedValue({ + async *[Symbol.asyncIterator]() { + yield { + id: "1", + choices: [ + { + delta: { + reasoning_content: "primary thought", + reasoning: "fallback thought", + }, + }, + ], + } + yield { + id: "1", + choices: [{ delta: {} }], + usage: { prompt_tokens: 1, completion_tokens: 1 }, + } + }, + }) + + const chunks: any[] = [] + + for await (const chunk of handler.createMessage("sys", [{ role: "user", content: "hi" }])) { + chunks.push(chunk) + } + + const reasoningChunks = chunks.filter((chunk) => chunk.type === "reasoning") + + expect(reasoningChunks).toEqual([{ type: "reasoning", text: "primary thought" }]) + }) + describe("native tool support", () => { const systemPrompt = "test system prompt" const messages: Anthropic.Messages.MessageParam[] = [ diff --git a/src/api/providers/deepseek.ts b/src/api/providers/deepseek.ts index d61885bb1e..7f9f4c06c3 100644 --- a/src/api/providers/deepseek.ts +++ b/src/api/providers/deepseek.ts @@ -15,6 +15,7 @@ import { getModelParams } from "../transform/model-params" import { convertToR1Format } from "../transform/r1-format" import { OpenAiHandler } from "./openai" +import { extractReasoningFromDelta } from "./utils/extract-reasoning" import type { ApiHandlerCreateMessageMetadata } from "../index" // Custom interface for DeepSeek params to support thinking mode @@ -154,11 +155,9 @@ export class DeepSeekHandler extends OpenAiHandler { // Handle reasoning_content from DeepSeek's interleaved thinking // This is the proper way DeepSeek sends thinking content in streaming - if ("reasoning_content" in delta && delta.reasoning_content) { - yield { - type: "reasoning", - text: (delta.reasoning_content as string) || "", - } + const reasoningText = extractReasoningFromDelta(delta) + if (reasoningText) { + yield { type: "reasoning", text: reasoningText } } // Handle tool calls diff --git a/src/api/providers/openai.ts b/src/api/providers/openai.ts index b201cff700..8761cde599 100644 --- a/src/api/providers/openai.ts +++ b/src/api/providers/openai.ts @@ -24,6 +24,7 @@ import { BaseProvider } from "./base-provider" import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index" import { getApiRequestTimeout } from "./utils/timeout-config" import { handleOpenAIError } from "./utils/openai-error-handler" +import { extractReasoningFromDelta } from "./utils/extract-reasoning" /** * Custom interface for GLM params to support thinking mode. @@ -280,11 +281,9 @@ export class OpenAiHandler extends BaseProvider implements SingleCompletionHandl } } - if ("reasoning_content" in delta && delta.reasoning_content) { - yield { - type: "reasoning", - text: (delta.reasoning_content as string | undefined) || "", - } + const reasoningText = extractReasoningFromDelta(delta) + if (reasoningText) { + yield { type: "reasoning", text: reasoningText } } yield* this.processToolCalls(delta, finishReason, activeToolCallIds) diff --git a/src/api/providers/qwen-code.ts b/src/api/providers/qwen-code.ts index f2a207051e..0b7d7598af 100644 --- a/src/api/providers/qwen-code.ts +++ b/src/api/providers/qwen-code.ts @@ -14,6 +14,7 @@ import { convertToOpenAiMessages } from "../transform/openai-format" import { ApiStream } from "../transform/stream" import { BaseProvider } from "./base-provider" +import { extractReasoningFromDelta } from "./utils/extract-reasoning" import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index" const QWEN_OAUTH_BASE_URL = "https://chat.qwen.ai" @@ -283,11 +284,9 @@ export class QwenCodeHandler extends BaseProvider implements SingleCompletionHan } } - if ("reasoning_content" in delta && delta.reasoning_content) { - yield { - type: "reasoning", - text: (delta.reasoning_content as string | undefined) || "", - } + const reasoningText = extractReasoningFromDelta(delta) + if (reasoningText) { + yield { type: "reasoning", text: reasoningText } } // Handle tool calls in stream - emit partial chunks for NativeToolCallParser diff --git a/src/api/providers/requesty.ts b/src/api/providers/requesty.ts index a532e89b57..a62e5444da 100644 --- a/src/api/providers/requesty.ts +++ b/src/api/providers/requesty.ts @@ -18,6 +18,7 @@ import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from ". import { toRequestyServiceUrl } from "../../shared/utils/requesty" import { handleOpenAIError } from "./utils/openai-error-handler" import { applyRouterToolPreferences } from "./utils/router-tool-preferences" +import { extractReasoningFromDelta } from "./utils/extract-reasoning" // Requesty usage includes an extra field for Anthropic use cases. // Safely cast the prompt token details section to the appropriate structure. @@ -175,8 +176,9 @@ export class RequestyHandler extends BaseProvider implements SingleCompletionHan yield { type: "text", text: delta.content } } - if (delta && "reasoning_content" in delta && delta.reasoning_content) { - yield { type: "reasoning", text: (delta.reasoning_content as string | undefined) || "" } + const reasoningText = extractReasoningFromDelta(delta) + if (reasoningText) { + yield { type: "reasoning", text: reasoningText } } // Handle native tool calls diff --git a/src/api/providers/unbound.ts b/src/api/providers/unbound.ts index d50bfcc85d..921985deaf 100644 --- a/src/api/providers/unbound.ts +++ b/src/api/providers/unbound.ts @@ -17,6 +17,7 @@ import { BaseProvider } from "./base-provider" import type { SingleCompletionHandler, ApiHandlerCreateMessageMetadata } from "../index" import { handleOpenAIError } from "./utils/openai-error-handler" import { applyRouterToolPreferences } from "./utils/router-tool-preferences" +import { extractReasoningFromDelta } from "./utils/extract-reasoning" // Unbound usage includes extra fields for Anthropic cache tokens. interface UnboundUsage extends OpenAI.CompletionUsage { @@ -162,8 +163,9 @@ export class UnboundHandler extends BaseProvider implements SingleCompletionHand yield { type: "text", text: delta.content } } - if (delta && "reasoning_content" in delta && delta.reasoning_content) { - yield { type: "reasoning", text: (delta.reasoning_content as string | undefined) || "" } + const reasoningText = extractReasoningFromDelta(delta) + if (reasoningText) { + yield { type: "reasoning", text: reasoningText } } // Handle native tool calls From a0fde26baf5181de0ac0e1022253c531935de8e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Krzysztof=20Dre=C5=BCewski?= Date: Thu, 18 Jun 2026 00:08:37 +0200 Subject: [PATCH 3/4] feat(zai): add glm-5.2 support with Max reasoning-effort tier (zoo #608) Adds the glm-5.2 flagship model (1M context) to both Z.ai model maps with a new Max reasoning-effort tier; High is the default and Max is opt-in. Persisted reasoning effort not offered by the current model now falls back to the model default instead of silently disabling reasoning, and the streaming create() is wrapped in handleOpenAIError for parity with the base class. GLM-5.2 pricing is provisional (mirrors GLM-5.1). Adds Max translations across all 18 locales. Co-authored-by: Mob Code 100 <66469454+MobCode100@users.noreply.github.com> --- .../2026-06-17_zoo-608-add-glm-5-2-support.md | 146 ++++++++++++++++ packages/types/src/model.ts | 15 +- packages/types/src/providers/zai.ts | 34 ++++ src/api/providers/__tests__/zai.spec.ts | 162 ++++++++++++++++++ src/api/providers/zai.ts | 40 +++-- webview-ui/src/i18n/locales/ca/settings.json | 1 + webview-ui/src/i18n/locales/de/settings.json | 1 + webview-ui/src/i18n/locales/en/settings.json | 3 +- webview-ui/src/i18n/locales/es/settings.json | 1 + webview-ui/src/i18n/locales/fr/settings.json | 1 + webview-ui/src/i18n/locales/hi/settings.json | 1 + webview-ui/src/i18n/locales/id/settings.json | 1 + webview-ui/src/i18n/locales/it/settings.json | 1 + webview-ui/src/i18n/locales/ja/settings.json | 1 + webview-ui/src/i18n/locales/ko/settings.json | 1 + webview-ui/src/i18n/locales/nl/settings.json | 1 + webview-ui/src/i18n/locales/pl/settings.json | 1 + .../src/i18n/locales/pt-BR/settings.json | 1 + webview-ui/src/i18n/locales/ru/settings.json | 1 + webview-ui/src/i18n/locales/tr/settings.json | 1 + webview-ui/src/i18n/locales/vi/settings.json | 1 + .../src/i18n/locales/zh-CN/settings.json | 1 + .../src/i18n/locales/zh-TW/settings.json | 3 +- 23 files changed, 404 insertions(+), 15 deletions(-) create mode 100644 ai_plans/2026-06-17_zoo-608-add-glm-5-2-support.md diff --git a/ai_plans/2026-06-17_zoo-608-add-glm-5-2-support.md b/ai_plans/2026-06-17_zoo-608-add-glm-5-2-support.md new file mode 100644 index 0000000000..cd9cc618bf --- /dev/null +++ b/ai_plans/2026-06-17_zoo-608-add-glm-5-2-support.md @@ -0,0 +1,146 @@ +# Port plan — Zoo PR #608 → `feature/zoo-608-add-glm-5-2-support` + +> **For the executor (read first).** Do the steps in order. Do not improvise or +> refactor beyond what is written (YAGNI). Every code block is already adapted to +> this repo. This repo is **Tumble Code**: never introduce the strings "Roo" or +> "Zoo" in user-facing text or test names. + +--- + +## 0. Context + +- **Upstream:** Zoo PR #608 — "feat: add glm-5.2 support" (commit `085bc7f57`, + merged 2026-06-15). +- **What it does:** Adds the `glm-5.2` model (Zhipu's flagship, 1M context) to + both Z.ai model maps with a new **"max"** reasoning-effort tier on top of the + existing ladder. `high` is the model default and `max` is opt-in. Also hardens + the Z.ai handler so a persisted reasoning-effort value the _current_ model + doesn't support falls back to that model's default instead of silently + disabling reasoning, and wraps the streaming `create()` in `handleOpenAIError` + for parity with the base class. +- **Why we want it:** new model support is high-value/low-risk; the "max" tier and + the unsupported-effort fallback are general correctness improvements that help + every GLM thinking model, not just 5.2. + +- **Adaptations vs. the raw upstream diff (IMPORTANT):** + + 1. **No changeset.** Zoo's diff includes `.changeset/add-glm-5-2-support.md`. + That is a Zoo release-prep mechanic (triage rubric → SKIP). **Do not** port + it. + 2. **Our fork has no `glm-5-turbo`.** Zoo anchored some edits/tests around + `glm-5-turbo`, which doesn't exist here. Anchor the new model entries after + **`glm-5.1`** and the new streaming tests near the existing **GLM-4.7** + thinking tests instead. + 3. **Handler shape already diverged.** Our `src/api/providers/zai.ts` already + carries the custom `ZAiChatCompletionParams` type and `createStreamWithThinking` + from earlier ports. The port only _adds_ the unsupported-effort fallback + block + `reasoning_effort` param + the `try/catch` around `create()`. Our + params comment stays `// For GLM-4.7: thinking is ON by default, so we +explicitly disable when needed` (do not adopt Zoo's wording). + 4. **`max` already half-present.** `packages/types/src/model.ts` already had + the `xhigh`/`max` machinery in some unions; the port ensures `"max"` is in + all three places: `reasoningEffortsExtended`, `reasoningEffortSettingValues`, + and the `modelInfoSchema.supportsReasoningEffort` enum. + +- **Original author — credit:** + + ```text + Co-authored-by: Mob Code 100 <66469454+MobCode100@users.noreply.github.com> + ``` + +## 1. Preconditions + +- [x] Branch `feature/zoo-608-add-glm-5-2-support` created off + `feature/zoo-588-extract-reasoning-from-delta-helper` (stacked — overlaps + nothing with #588 except sharing the provider dir, so stacking keeps the + chain clean). +- [x] `glm-5.1` exists in both Z.ai maps (anchor for the new entry). +- [x] `glm-5-turbo` confirmed absent (re-anchored, see §0 adaptation 2). + +## 2. Types edits + +### Edit A — `packages/types/src/model.ts` + +Ensure `"max"` is present in all three reasoning-effort surfaces: + +1. `reasoningEffortsExtended` → `["none", "minimal", "low", "medium", "high", "xhigh", "max"]`. +2. `reasoningEffortSettingValues` → includes `"xhigh"` and `"max"` (reformatted multiline). +3. `modelInfoSchema.supportsReasoningEffort` array enum union → ends `…, "high", "xhigh", "max"`. + +### Edit B — `packages/types/src/providers/zai.ts` + +Insert a `"glm-5.2"` entry **after `glm-5.1`** in BOTH `internationalZAiModels` +and `mainlandZAiModels`. Shared fields: `maxTokens: 131_072`, +`contextWindow: 1_000_000`, `supportsImages: false`, `supportsPromptCache: true`, +`supportsMaxTokens: true`, `supportsReasoningEffort: ["disable", "high", "max"]`, +`reasoningEffort: "high"`, `preserveReasoning: true`, `cacheWritesPrice: 0`, the +`// TODO: Pricing is from GLM-5.1, should update later.` comment, and the flagship +description. Pricing differs by line: + +- international: `inputPrice: 1.4`, `outputPrice: 4.4`, `cacheReadsPrice: 0.26`. +- mainland: `inputPrice: 0.68`, `outputPrice: 2.28`, `cacheReadsPrice: 0.13`. + +## 3. Handler edit — `src/api/providers/zai.ts` + +In `createStreamWithThinking`, replace the prior effort computation with the +unsupported-effort fallback: + +```ts +const supported = info.supportsReasoningEffort +const raw = + this.options.enableReasoningEffort === false ? undefined : (this.options.reasoningEffort ?? info.reasoningEffort) +const effort = + raw && raw !== "disable" && Array.isArray(supported) && !supported.includes(raw) ? info.reasoningEffort : raw +const reasoningEffort = effort && effort !== "disable" ? effort : undefined +const useReasoning = reasoningEffort !== undefined +``` + +Add `reasoning_effort: reasoningEffort,` to the params (after the `thinking` line), +widen the params type to allow `"max"` (the `ZAiChatCompletionParams` `Omit`/union), +and wrap the `create()` return in `try { … } catch (error) { throw +handleOpenAIError(error, this.providerName) }`. Add the +`import { handleOpenAIError } from "./utils/openai-error-handler"` import. + +## 4. i18n — `webview-ui/src/i18n/locales//settings.json` + +Add a `"max"` key to the `reasoningEffort` block in all 18 locales, after +`"xhigh"`. Translations: ca=Màxim, de=Maximum, en=Max, es=Máximo, fr=Maximum, +hi=अधिकतम, id=Maksimum, it=Massimo, ja=最高, ko=최대, nl=Maximum, pl=Maksymalny, +pt-BR=Máximo, ru=Максимальные, tr=Maksimum, vi=Tối đa, zh-CN=最高, zh-TW=最高. + +## 5. Tests — `src/api/providers/__tests__/zai.spec.ts` + +- 2 model-info tests: GLM-5.2 international (ctx 1M, max 131_072, effort + `["disable","high","max"]`, default `high`, prices 1.4/4.4/0.26) and GLM-5.2 + China (prices 0.68/2.28/0.13), anchored after the matching GLM-5.1 tests. +- 4 streaming tests near the GLM-4.7 thinking tests: (1) default → `reasoning_effort:"high"` + - thinking enabled; (2) `reasoningEffort:"max"` → `reasoning_effort:"max"`; (3) + `reasoningEffort:"disable"` → thinking disabled + `reasoning_effort` undefined; + (4) unsupported `reasoningEffort:"medium"` → falls back to model default `"high"`. + +## 6. Out of scope + +- No `.changeset/`. No `glm-5-turbo`. No "Zoo"/"Roo" strings. No handler comment + rewording. + +## 7. Verify + +- `pnpm --filter @roo-code/types check-types` clean. +- `pnpm --filter tumble-code check-types` clean. +- `cd src && npx vitest run api/providers/__tests__/zai.spec.ts` all green (46 tests). +- `cd src && npx eslint api/providers/zai.ts api/providers/__tests__/zai.spec.ts` clean. + +## 8. Acceptance + +- [x] `glm-5.2` present in both Z.ai maps with the "max" tier. +- [x] Handler falls back to model default when persisted effort is unsupported. +- [x] `"max"` in all 18 locales and all 3 model.ts surfaces. +- [x] No "Roo"/"Zoo" user-facing strings introduced. + +## 9. Record + +```bash +node .claude/skills/zoo-port/scripts/zoo-prs.mjs record --pr 608 --status ported \ + --branch feature/zoo-608-add-glm-5-2-support \ + --plan ai_plans/2026-06-17_zoo-608-add-glm-5-2-support.md +``` diff --git a/packages/types/src/model.ts b/packages/types/src/model.ts index a86eadfaf9..87ebfaf967 100644 --- a/packages/types/src/model.ts +++ b/packages/types/src/model.ts @@ -23,7 +23,7 @@ export type ReasoningEffortWithMinimal = z.infer { expect(model.info.supportsImages).toBe(false) }) + it("should return GLM-5.2 international model with High/Max effort tiers and 1M context", () => { + const testModelId: InternationalZAiModelId = "glm-5.2" + const handlerWithModel = new ZAiHandler({ + apiModelId: testModelId, + zaiApiKey: "test-zai-api-key", + zaiApiLine: "international_coding", + }) + const model = handlerWithModel.getModel() + expect(model.id).toBe(testModelId) + expect(model.info).toEqual(internationalZAiModels[testModelId]) + expect(model.info.contextWindow).toBe(1_000_000) + expect(model.info.maxTokens).toBe(131_072) + expect(model.info.supportsReasoningEffort).toEqual(["disable", "high", "max"]) + expect(model.info.reasoningEffort).toBe("high") + expect(model.info.preserveReasoning).toBe(true) + expect(model.info.supportsMaxTokens).toBe(true) + expect(model.info.inputPrice).toBe(1.4) + expect(model.info.outputPrice).toBe(4.4) + expect(model.info.cacheReadsPrice).toBe(0.26) + }) + it("should return GLM-4.5v international model with vision support", () => { const testModelId: InternationalZAiModelId = "glm-4.5v" const handlerWithModel = new ZAiHandler({ @@ -226,6 +247,27 @@ describe("ZAiHandler", () => { expect(model.info.supportsImages).toBe(false) }) + it("should return GLM-5.2 China model with High/Max effort tiers and 1M context", () => { + const testModelId: MainlandZAiModelId = "glm-5.2" + const handlerWithModel = new ZAiHandler({ + apiModelId: testModelId, + zaiApiKey: "test-zai-api-key", + zaiApiLine: "china_coding", + }) + const model = handlerWithModel.getModel() + expect(model.id).toBe(testModelId) + expect(model.info).toEqual(mainlandZAiModels[testModelId]) + expect(model.info.contextWindow).toBe(1_000_000) + expect(model.info.maxTokens).toBe(131_072) + expect(model.info.supportsReasoningEffort).toEqual(["disable", "high", "max"]) + expect(model.info.reasoningEffort).toBe("high") + expect(model.info.preserveReasoning).toBe(true) + expect(model.info.supportsMaxTokens).toBe(true) + expect(model.info.inputPrice).toBe(0.68) + expect(model.info.outputPrice).toBe(2.28) + expect(model.info.cacheReadsPrice).toBe(0.13) + }) + it("should return GLM-4.7 China model with thinking support", () => { const testModelId: MainlandZAiModelId = "glm-4.7" const handlerWithModel = new ZAiHandler({ @@ -554,6 +596,126 @@ describe("ZAiHandler", () => { ) }) + it("should send reasoning_effort:high by default for GLM-5.2 (model default)", async () => { + const handlerWithModel = new ZAiHandler({ + apiModelId: "glm-5.2", + zaiApiKey: "test-zai-api-key", + zaiApiLine: "international_coding", + // No reasoningEffort setting - should use model default (high) + }) + + mockCreate.mockImplementationOnce(() => { + return { + [Symbol.asyncIterator]: () => ({ + async next() { + return { done: true } + }, + }), + } + }) + + const messageGenerator = handlerWithModel.createMessage("system prompt", []) + await messageGenerator.next() + + expect(mockCreate).toHaveBeenCalledWith( + expect.objectContaining({ + model: "glm-5.2", + thinking: { type: "enabled" }, + reasoning_effort: "high", + }), + ) + }) + + it("should send reasoning_effort:max for GLM-5.2 when reasoningEffort is set to max", async () => { + const handlerWithModel = new ZAiHandler({ + apiModelId: "glm-5.2", + zaiApiKey: "test-zai-api-key", + zaiApiLine: "international_coding", + enableReasoningEffort: true, + reasoningEffort: "max", + }) + + mockCreate.mockImplementationOnce(() => { + return { + [Symbol.asyncIterator]: () => ({ + async next() { + return { done: true } + }, + }), + } + }) + + const messageGenerator = handlerWithModel.createMessage("system prompt", []) + await messageGenerator.next() + + expect(mockCreate).toHaveBeenCalledWith( + expect.objectContaining({ + model: "glm-5.2", + thinking: { type: "enabled" }, + reasoning_effort: "max", + }), + ) + }) + + it("should omit reasoning_effort for GLM-5.2 when reasoningEffort is set to disable", async () => { + const handlerWithModel = new ZAiHandler({ + apiModelId: "glm-5.2", + zaiApiKey: "test-zai-api-key", + zaiApiLine: "international_coding", + enableReasoningEffort: true, + reasoningEffort: "disable", + }) + + mockCreate.mockImplementationOnce(() => { + return { + [Symbol.asyncIterator]: () => ({ + async next() { + return { done: true } + }, + }), + } + }) + + const messageGenerator = handlerWithModel.createMessage("system prompt", []) + await messageGenerator.next() + + const callArgs = mockCreate.mock.calls[0][0] + expect(callArgs.thinking).toEqual({ type: "disabled" }) + expect(callArgs.reasoning_effort).toBeUndefined() + }) + + it("should fall back to the model default effort when a persisted value is unsupported", async () => { + const handlerWithModel = new ZAiHandler({ + apiModelId: "glm-5.2", + zaiApiKey: "test-zai-api-key", + zaiApiLine: "international_coding", + enableReasoningEffort: true, + // "medium" is not in GLM-5.2's supported tiers (disable/high/max) - should fall back to "high" + reasoningEffort: "medium", + }) + + mockCreate.mockImplementationOnce(() => { + return { + [Symbol.asyncIterator]: () => ({ + async next() { + return { done: true } + }, + }), + } + }) + + const messageGenerator = handlerWithModel.createMessage("system prompt", []) + await messageGenerator.next() + + expect(mockCreate).toHaveBeenCalledWith( + expect.objectContaining({ + model: "glm-5.2", + thinking: { type: "enabled" }, + reasoning_effort: "high", + }), + ) + }) + it("should disable thinking for GLM-4.7 when reasoningEffort is set to disable", async () => { const handlerWithModel = new ZAiHandler({ apiModelId: "glm-4.7", diff --git a/src/api/providers/zai.ts b/src/api/providers/zai.ts index 35d3a93c12..cc6ec87004 100644 --- a/src/api/providers/zai.ts +++ b/src/api/providers/zai.ts @@ -11,15 +11,20 @@ import { zaiApiLineConfigs, } from "@roo-code/types" -import { type ApiHandlerOptions, getModelMaxOutputTokens, shouldUseReasoningEffort } from "../../shared/api" +import { type ApiHandlerOptions, getModelMaxOutputTokens } from "../../shared/api" import { convertToZAiFormat } from "../transform/zai-format" import type { ApiHandlerCreateMessageMetadata } from "../index" import { BaseOpenAiCompatibleProvider } from "./base-openai-compatible-provider" +import { handleOpenAIError } from "./utils/openai-error-handler" -// Custom interface for Z.ai params to support thinking mode -type ZAiChatCompletionParams = OpenAI.Chat.ChatCompletionCreateParamsStreaming & { +// Custom interface for Z.ai params to support thinking mode and reasoning effort tiers. +// Z.ai accepts the standard `reasoning_effort` ladder (none/minimal/low/medium/high/xhigh/max) +// alongside the GLM-specific `thinking` toggle. Omit the OpenAI-typed `reasoning_effort` so we +// can widen it to include provider-specific values such as "max". +type ZAiChatCompletionParams = Omit & { thinking?: { type: "enabled" | "disabled" } + reasoning_effort?: "none" | "minimal" | "low" | "medium" | "high" | "xhigh" | "max" } export class ZAiHandler extends BaseOpenAiCompatibleProvider { @@ -56,12 +61,8 @@ export class ZAiHandler extends BaseOpenAiCompatibleProvider { const isThinkingModel = Array.isArray(info.supportsReasoningEffort) if (isThinkingModel) { - // For GLM-4.7, thinking is ON by default in the API. - // We need to explicitly disable it when reasoning is off. - const useReasoning = shouldUseReasoningEffort({ model: info, settings: this.options }) - // Create the stream with our custom thinking parameter - return this.createStreamWithThinking(systemPrompt, messages, metadata, useReasoning) + return this.createStreamWithThinking(systemPrompt, messages, metadata) } // For non-thinking models, use the default behavior @@ -75,10 +76,22 @@ export class ZAiHandler extends BaseOpenAiCompatibleProvider { systemPrompt: string, messages: Anthropic.Messages.MessageParam[], metadata?: ApiHandlerCreateMessageMetadata, - useReasoning?: boolean, ) { const { id: model, info } = this.getModel() + // Fall back to the model default when the resolved effort isn't supported by the model. + const supported = info.supportsReasoningEffort + const raw = + this.options.enableReasoningEffort === false + ? undefined + : (this.options.reasoningEffort ?? info.reasoningEffort) + const effort = + raw && raw !== "disable" && Array.isArray(supported) && !supported.includes(raw) + ? info.reasoningEffort + : raw + const reasoningEffort = effort && effort !== "disable" ? effort : undefined + const useReasoning = reasoningEffort !== undefined + // Honor an explicit user override (the configurable max-output slider); otherwise // fall back to getModelMaxOutputTokens, which clamps to 20% of the context window by // default so GLM models don't over-reserve output budget. @@ -106,11 +119,18 @@ export class ZAiHandler extends BaseOpenAiCompatibleProvider { stream_options: { include_usage: true }, // For GLM-4.7: thinking is ON by default, so we explicitly disable when needed thinking: useReasoning ? { type: "enabled" } : { type: "disabled" }, + reasoning_effort: reasoningEffort, tools: this.convertToolsForOpenAI(metadata?.tools), tool_choice: metadata?.tool_choice, parallel_tool_calls: metadata?.parallelToolCalls ?? true, } - return this.client.chat.completions.create(params) + try { + return this.client.chat.completions.create( + params as OpenAI.Chat.Completions.ChatCompletionCreateParamsStreaming, + ) + } catch (error) { + throw handleOpenAIError(error, this.providerName) + } } } diff --git a/webview-ui/src/i18n/locales/ca/settings.json b/webview-ui/src/i18n/locales/ca/settings.json index 4a0f414142..018580c074 100644 --- a/webview-ui/src/i18n/locales/ca/settings.json +++ b/webview-ui/src/i18n/locales/ca/settings.json @@ -534,6 +534,7 @@ "minimal": "Mínim (el més ràpid)", "high": "Alt", "xhigh": "Molt alt", + "max": "Màxim", "medium": "Mitjà", "low": "Baix" }, diff --git a/webview-ui/src/i18n/locales/de/settings.json b/webview-ui/src/i18n/locales/de/settings.json index 5c226b5a3e..4632668897 100644 --- a/webview-ui/src/i18n/locales/de/settings.json +++ b/webview-ui/src/i18n/locales/de/settings.json @@ -534,6 +534,7 @@ "minimal": "Minimal (schnellste)", "high": "Hoch", "xhigh": "Sehr hoch", + "max": "Maximum", "medium": "Mittel", "low": "Niedrig" }, diff --git a/webview-ui/src/i18n/locales/en/settings.json b/webview-ui/src/i18n/locales/en/settings.json index 171341afed..169f65a76c 100644 --- a/webview-ui/src/i18n/locales/en/settings.json +++ b/webview-ui/src/i18n/locales/en/settings.json @@ -598,7 +598,8 @@ "low": "Low", "medium": "Medium", "high": "High", - "xhigh": "Extra High" + "xhigh": "Extra High", + "max": "Max" }, "verbosity": { "label": "Output Verbosity", diff --git a/webview-ui/src/i18n/locales/es/settings.json b/webview-ui/src/i18n/locales/es/settings.json index 719115b20e..ecbbf85792 100644 --- a/webview-ui/src/i18n/locales/es/settings.json +++ b/webview-ui/src/i18n/locales/es/settings.json @@ -534,6 +534,7 @@ "minimal": "Mínimo (el más rápido)", "high": "Alto", "xhigh": "Muy alto", + "max": "Máximo", "medium": "Medio", "low": "Bajo" }, diff --git a/webview-ui/src/i18n/locales/fr/settings.json b/webview-ui/src/i18n/locales/fr/settings.json index 3dd0fca461..02d2aa4850 100644 --- a/webview-ui/src/i18n/locales/fr/settings.json +++ b/webview-ui/src/i18n/locales/fr/settings.json @@ -534,6 +534,7 @@ "minimal": "Minimal (le plus rapide)", "high": "Élevé", "xhigh": "Très élevé", + "max": "Maximum", "medium": "Moyen", "low": "Faible" }, diff --git a/webview-ui/src/i18n/locales/hi/settings.json b/webview-ui/src/i18n/locales/hi/settings.json index 43a8865a31..c1e2bc17cb 100644 --- a/webview-ui/src/i18n/locales/hi/settings.json +++ b/webview-ui/src/i18n/locales/hi/settings.json @@ -534,6 +534,7 @@ "minimal": "न्यूनतम (सबसे तेज़)", "high": "उच्च", "xhigh": "बहुत उच्च", + "max": "अधिकतम", "medium": "मध्यम", "low": "निम्न" }, diff --git a/webview-ui/src/i18n/locales/id/settings.json b/webview-ui/src/i18n/locales/id/settings.json index f48bb28020..7ef8c7cd03 100644 --- a/webview-ui/src/i18n/locales/id/settings.json +++ b/webview-ui/src/i18n/locales/id/settings.json @@ -534,6 +534,7 @@ "minimal": "Minimal (Tercepat)", "high": "Tinggi", "xhigh": "Sangat tinggi", + "max": "Maksimum", "medium": "Sedang", "low": "Rendah" }, diff --git a/webview-ui/src/i18n/locales/it/settings.json b/webview-ui/src/i18n/locales/it/settings.json index 59d0b928fd..9b24b9114b 100644 --- a/webview-ui/src/i18n/locales/it/settings.json +++ b/webview-ui/src/i18n/locales/it/settings.json @@ -534,6 +534,7 @@ "minimal": "Minimo (più veloce)", "high": "Alto", "xhigh": "Molto alto", + "max": "Massimo", "medium": "Medio", "low": "Basso" }, diff --git a/webview-ui/src/i18n/locales/ja/settings.json b/webview-ui/src/i18n/locales/ja/settings.json index 0dc3d1b545..130a0544c8 100644 --- a/webview-ui/src/i18n/locales/ja/settings.json +++ b/webview-ui/src/i18n/locales/ja/settings.json @@ -534,6 +534,7 @@ "minimal": "最小 (最速)", "high": "高", "xhigh": "非常に高い", + "max": "最高", "medium": "中", "low": "低" }, diff --git a/webview-ui/src/i18n/locales/ko/settings.json b/webview-ui/src/i18n/locales/ko/settings.json index 1cce62b6ce..fae8b36b47 100644 --- a/webview-ui/src/i18n/locales/ko/settings.json +++ b/webview-ui/src/i18n/locales/ko/settings.json @@ -534,6 +534,7 @@ "minimal": "최소 (가장 빠름)", "high": "높음", "xhigh": "매우 높음", + "max": "최대", "medium": "중간", "low": "낮음" }, diff --git a/webview-ui/src/i18n/locales/nl/settings.json b/webview-ui/src/i18n/locales/nl/settings.json index cf6f8e0e44..c344611b50 100644 --- a/webview-ui/src/i18n/locales/nl/settings.json +++ b/webview-ui/src/i18n/locales/nl/settings.json @@ -534,6 +534,7 @@ "minimal": "Minimaal (Snelst)", "high": "Hoog", "xhigh": "Zeer hoog", + "max": "Maximum", "medium": "Middel", "low": "Laag" }, diff --git a/webview-ui/src/i18n/locales/pl/settings.json b/webview-ui/src/i18n/locales/pl/settings.json index 6da0861aac..ed718ad4bc 100644 --- a/webview-ui/src/i18n/locales/pl/settings.json +++ b/webview-ui/src/i18n/locales/pl/settings.json @@ -534,6 +534,7 @@ "minimal": "Minimalny (najszybszy)", "high": "Wysoki", "xhigh": "Bardzo wysoki", + "max": "Maksymalny", "medium": "Średni", "low": "Niski" }, diff --git a/webview-ui/src/i18n/locales/pt-BR/settings.json b/webview-ui/src/i18n/locales/pt-BR/settings.json index 1a851a44f1..e03e91367c 100644 --- a/webview-ui/src/i18n/locales/pt-BR/settings.json +++ b/webview-ui/src/i18n/locales/pt-BR/settings.json @@ -534,6 +534,7 @@ "minimal": "Mínimo (mais rápido)", "high": "Alto", "xhigh": "Muito alto", + "max": "Máximo", "medium": "Médio", "low": "Baixo" }, diff --git a/webview-ui/src/i18n/locales/ru/settings.json b/webview-ui/src/i18n/locales/ru/settings.json index 92485a449b..f62091c0df 100644 --- a/webview-ui/src/i18n/locales/ru/settings.json +++ b/webview-ui/src/i18n/locales/ru/settings.json @@ -534,6 +534,7 @@ "minimal": "Минимальный (самый быстрый)", "high": "Высокие", "xhigh": "Очень высокие", + "max": "Максимальные", "medium": "Средние", "low": "Низкие" }, diff --git a/webview-ui/src/i18n/locales/tr/settings.json b/webview-ui/src/i18n/locales/tr/settings.json index 658b291576..1b5fee5685 100644 --- a/webview-ui/src/i18n/locales/tr/settings.json +++ b/webview-ui/src/i18n/locales/tr/settings.json @@ -534,6 +534,7 @@ "minimal": "Minimal (en hızlı)", "high": "Yüksek", "xhigh": "Çok yüksek", + "max": "Maksimum", "medium": "Orta", "low": "Düşük" }, diff --git a/webview-ui/src/i18n/locales/vi/settings.json b/webview-ui/src/i18n/locales/vi/settings.json index 9e7d41a4c0..7d8503231b 100644 --- a/webview-ui/src/i18n/locales/vi/settings.json +++ b/webview-ui/src/i18n/locales/vi/settings.json @@ -534,6 +534,7 @@ "minimal": "Tối thiểu (nhanh nhất)", "high": "Cao", "xhigh": "Rất cao", + "max": "Tối đa", "medium": "Trung bình", "low": "Thấp" }, diff --git a/webview-ui/src/i18n/locales/zh-CN/settings.json b/webview-ui/src/i18n/locales/zh-CN/settings.json index 0569de35e2..18339b400e 100644 --- a/webview-ui/src/i18n/locales/zh-CN/settings.json +++ b/webview-ui/src/i18n/locales/zh-CN/settings.json @@ -534,6 +534,7 @@ "minimal": "最小 (最快)", "high": "高", "xhigh": "超高", + "max": "最高", "medium": "中", "low": "低" }, diff --git a/webview-ui/src/i18n/locales/zh-TW/settings.json b/webview-ui/src/i18n/locales/zh-TW/settings.json index 32e5947be6..ce14f9a34d 100644 --- a/webview-ui/src/i18n/locales/zh-TW/settings.json +++ b/webview-ui/src/i18n/locales/zh-TW/settings.json @@ -545,7 +545,8 @@ "low": "低", "medium": "中", "high": "高", - "xhigh": "超高" + "xhigh": "超高", + "max": "最高" }, "verbosity": { "label": "輸出詳細程度", From 1ae6632e7924e4c1ba00eaf4f7126a7247f4da9a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Krzysztof=20Dre=C5=BCewski?= Date: Thu, 18 Jun 2026 00:12:53 +0200 Subject: [PATCH 4/4] fix(TerminalRegistry): deliver completion when end event races ahead of running=true (zoo #645) A fast command's onDidEndTerminalShellExecution can arrive before setActiveStream() flips terminal.running to true. The old guard logged an error, set busy=false, and returned without signalling completion, so a TerminalProcess awaiting shell_execution_complete hung forever. Now, when running is still false but a process exists, deliver shellExecutionComplete(exitDetails) so the waiter unblocks; fall back to clearing busy only when no process exists. Also drops the redundant trailing busy=false after the normal completion path, since shellExecutionComplete already clears it. Co-authored-by: edelauna <54631123+edelauna@users.noreply.github.com> --- ...5-terminalregistry-race-condition-guard.md | 254 ++++++++++++++++++ src/integrations/terminal/TerminalRegistry.ts | 20 +- .../__tests__/TerminalRegistry.spec.ts | 82 ++++++ 3 files changed, 350 insertions(+), 6 deletions(-) create mode 100644 ai_plans/2026-06-17_zoo-645-terminalregistry-race-condition-guard.md diff --git a/ai_plans/2026-06-17_zoo-645-terminalregistry-race-condition-guard.md b/ai_plans/2026-06-17_zoo-645-terminalregistry-race-condition-guard.md new file mode 100644 index 0000000000..82c28cdce8 --- /dev/null +++ b/ai_plans/2026-06-17_zoo-645-terminalregistry-race-condition-guard.md @@ -0,0 +1,254 @@ +# Port plan — Zoo PR #645 → `feature/zoo-645-terminalregistry-race-condition-guard` + +> **For the executor (read first).** Do the steps in order. Do not improvise or +> refactor beyond what is written (YAGNI). Every code block is already adapted to +> this repo. This repo is **Tumble Code**: never introduce the strings "Roo" or +> "Zoo" in user-facing text or test names. + +--- + +## 0. Context + +- **Upstream:** Zoo PR #645 — "fix(TerminalRegistry): updating guard condition to + address race condition for fast commands" (commit `e80039bb5`, merged + 2026-06-17). +- **What it does:** Fixes a race where a fast command's + `onDidEndTerminalShellExecution` end event arrives **before** + `setActiveStream()` has flipped `terminal.running` to `true`. In that window the + old guard logged an error, set `busy = false`, and returned **without signalling + completion** — so any `TerminalProcess.run()` awaiting `shell_execution_complete` + hung forever. The fix: when `running` is still false but a `process` exists, + deliver `shellExecutionComplete(exitDetails)` (which emits + `shell_execution_complete`, clears `busy`/`running`, and drains output) instead + of stranding it; only fall back to `busy = false` when there is no process. Also + removes the now-redundant trailing `terminal.busy = false` after the normal + completion path (since `shellExecutionComplete` already clears `busy`). +- **Why we want it:** real correctness fix in shared terminal core; matches our + weak-model / robustness priorities. Low risk — behavior-preserving for the + normal (running=true) path; only the previously-broken race path changes. + +- **Adaptations vs. the raw upstream diff:** + + 1. **Code is identical pre-diff.** Our `TerminalRegistry.ts` guard block + (lines 105–126) matches Zoo's pre-diff state exactly, except the unrelated + branding string at line 94 ("Tumble Code-tracked terminal"), which is + **outside** the hunks. The diff applies cleanly. + 2. **Issue-number comments kept.** The fix's comment references `#489 / #622` + and the test describe block carries `(#489, #622)`. These are inherited + upstream issue numbers; our fork already uses this convention (e.g. the + existing `releaseTerminalsForTask` test `(#245)`). Keep them — they are bare + issue numbers, **not** "Roo"/"Zoo" branding. + 3. **Test harness is compatible.** Verified our `TerminalRegistry` exposes the + private `isInitialized` field and static `initialize()`, and `BaseTerminal` + exposes `running`, `busy`, `process`, and `shellExecutionComplete()` (which + sets `busy=false`/`running=false`, emits `shell_execution_complete`, and + clears `process`). The two new tests run unmodified. + +- **Original author — credit:** + + ```text + Co-authored-by: edelauna <54631123+edelauna@users.noreply.github.com> + ``` + +## 1. Preconditions + +- [x] Branch `feature/zoo-645-terminalregistry-race-condition-guard` off + `feature/zoo-608-add-glm-5-2-support` (stacked). +- [x] `TerminalRegistry.ts` guard block matches Zoo pre-diff (lines 105–126). +- [x] `releaseTerminalsForTask` describe block present (the test insertion anchor). + +## 2. Failing test first (TDD) + +Insert the new describe block in +`src/integrations/terminal/__tests__/TerminalRegistry.spec.ts` immediately +**before** `describe("releaseTerminalsForTask", …)` (currently line 208). It +captures the `onDidEndTerminalShellExecution` handler via a spy, then drives the +race: + +- Test 1 — "calls shellExecutionComplete when end event fires before running is + set (race)": process present, `running===false`; firing the end event must emit + `shell_execution_complete` once with `{ exitCode: 0 }` and leave `busy===false`. + **This is RED against current code** (current guard never emits — it only sets + `busy=false`). +- Test 2 — "sets busy=false without calling shellExecutionComplete when no process + exists": process undefined; end event must set `busy=false` and **not** call + `shellExecutionComplete`. (Green even pre-fix — it pins the no-process branch.) + +Run RED: `cd src && npx vitest run integrations/terminal/__tests__/TerminalRegistry.spec.ts` +→ Test 1 fails (`shell_execution_complete` never emitted). + +Full block (2-space-then-tab indentation matching the file; verbatim from upstream +— no "Roo"/"Zoo" introduced): + +```ts +describe("onDidEndTerminalShellExecution race condition (#489, #622)", () => { + let endHandler: (e: any) => Promise + + beforeEach(() => { + // Reset the initialized flag so we can call initialize() in this block. + TerminalRegistry["isInitialized"] = false + + // The global vscode mock doesn't define shell execution event + // methods, so add them before spying. + ;(vscode.window as any).onDidStartTerminalShellExecution ??= () => ({ dispose: () => {} }) + ;(vscode.window as any).onDidEndTerminalShellExecution ??= () => ({ dispose: () => {} }) + + vi.spyOn(vscode.window, "onDidStartTerminalShellExecution" as any).mockImplementation((_handler: any) => ({ + dispose: vi.fn(), + })) + + vi.spyOn(vscode.window, "onDidEndTerminalShellExecution" as any).mockImplementation((handler: any) => { + endHandler = handler + return { dispose: vi.fn() } + }) + + TerminalRegistry.initialize() + }) + + afterEach(() => { + // Reset so other test blocks aren't affected. + TerminalRegistry["isInitialized"] = false + }) + + it("calls shellExecutionComplete when end event fires before running is set (race)", async () => { + const terminal = TerminalRegistry.createTerminal("/test/path", "vscode") as Terminal + const mockProcess = { + command: "echo hello", + emit: vi.fn(), + hasUnretrievedOutput: vi.fn().mockReturnValue(false), + } as any + terminal.process = mockProcess + + // Simulate the race: running is still false (setActiveStream hasn't + // been called yet), but the end event fires. + expect(terminal.running).toBe(false) + + const mockExecution = { commandLine: { value: "echo hello" } } + await endHandler({ + terminal: terminal.terminal, + execution: mockExecution, + exitCode: 0, + }) + + // shellExecutionComplete should have been called exactly once, emitting + // shell_execution_complete so TerminalProcess.run() unblocks. + expect(mockProcess.emit).toHaveBeenCalledWith( + "shell_execution_complete", + expect.objectContaining({ exitCode: 0 }), + ) + expect(mockProcess.emit).toHaveBeenCalledTimes(1) + + // Terminal should be back to idle state. + expect(terminal.busy).toBe(false) + expect(terminal.running).toBe(false) + }) + + it("sets busy=false without calling shellExecutionComplete when no process exists", async () => { + const terminal = TerminalRegistry.createTerminal("/test/path", "vscode") as Terminal + terminal.busy = true + terminal.process = undefined + const completeSpy = vi.spyOn(terminal, "shellExecutionComplete") + + expect(terminal.running).toBe(false) + + const mockExecution = { commandLine: { value: "echo hello" } } + await endHandler({ + terminal: terminal.terminal, + execution: mockExecution, + exitCode: 0, + }) + + expect(terminal.busy).toBe(false) + expect(completeSpy).not.toHaveBeenCalled() + }) +}) +``` + +## 3. Production fix — `src/integrations/terminal/TerminalRegistry.ts` + +### Edit A — the `!terminal.running` guard (lines 105–113) + +Replace: + +```ts +if (!terminal.running) { + console.error("[TerminalRegistry] Shell execution end event received, but process is not running for terminal:", { + terminalId: terminal?.id, + command: process?.command, + exitCode: e.exitCode, + }) + + terminal.busy = false + return +} +``` + +with: + +```ts +if (!terminal.running) { + // The end event can arrive before setActiveStream() has set + // running=true (race between the global VS Code event and the + // synchronous call in TerminalProcess.run). If a process is + // waiting for completion, deliver the signal so it doesn't + // hang forever. See #489 / #622. + if (process) { + console.info("[TerminalRegistry] End event arrived before running=true (race); delivering completion signal", { + terminalId: terminal.id, + exitCode: e.exitCode, + }) + terminal.shellExecutionComplete(exitDetails) + } else { + terminal.busy = false + } + + return +} +``` + +### Edit B — drop the redundant trailing `busy = false` (lines 124–126) + +Replace: + +```ts +// Signal completion to any waiting processes. +terminal.shellExecutionComplete(exitDetails) +terminal.busy = false // Mark terminal as not busy when shell execution ends +``` + +with: + +```ts +// Signal completion to any waiting processes. +terminal.shellExecutionComplete(exitDetails) +``` + +(`shellExecutionComplete` already sets `busy = false`, so the trailing line is +dead.) + +## 4. Out of scope + +- No change to `BaseTerminal`/`Terminal`/`TerminalProcess`. No "Zoo"/"Roo" + strings. No rename of inherited issue-number references. + +## 5. Verify + +- `cd src && npx vitest run integrations/terminal/__tests__/TerminalRegistry.spec.ts` + → all green (both new tests pass, existing suite unaffected). +- `pnpm --filter tumble-code check-types` clean. +- `cd src && npx eslint integrations/terminal/TerminalRegistry.ts integrations/terminal/__tests__/TerminalRegistry.spec.ts` clean. + +## 6. Acceptance + +- [x] Race path delivers `shellExecutionComplete` when a process exists. +- [x] No-process path still just clears `busy`. +- [x] Redundant trailing `busy = false` removed from the normal path. +- [x] No "Roo"/"Zoo" user-facing strings introduced. + +## 7. Record + +```bash +node .claude/skills/zoo-port/scripts/zoo-prs.mjs record --pr 645 --status ported \ + --branch feature/zoo-645-terminalregistry-race-condition-guard \ + --plan ai_plans/2026-06-17_zoo-645-terminalregistry-race-condition-guard.md +``` diff --git a/src/integrations/terminal/TerminalRegistry.ts b/src/integrations/terminal/TerminalRegistry.ts index 544384bdd9..1007c503f7 100644 --- a/src/integrations/terminal/TerminalRegistry.ts +++ b/src/integrations/terminal/TerminalRegistry.ts @@ -103,12 +103,21 @@ export class TerminalRegistry { } if (!terminal.running) { - console.error( - "[TerminalRegistry] Shell execution end event received, but process is not running for terminal:", - { terminalId: terminal?.id, command: process?.command, exitCode: e.exitCode }, - ) + // The end event can arrive before setActiveStream() has set + // running=true (race between the global VS Code event and the + // synchronous call in TerminalProcess.run). If a process is + // waiting for completion, deliver the signal so it doesn't + // hang forever. See #489 / #622. + if (process) { + console.info( + "[TerminalRegistry] End event arrived before running=true (race); delivering completion signal", + { terminalId: terminal.id, exitCode: e.exitCode }, + ) + terminal.shellExecutionComplete(exitDetails) + } else { + terminal.busy = false + } - terminal.busy = false return } @@ -123,7 +132,6 @@ export class TerminalRegistry { // Signal completion to any waiting processes. terminal.shellExecutionComplete(exitDetails) - terminal.busy = false // Mark terminal as not busy when shell execution ends }, ) diff --git a/src/integrations/terminal/__tests__/TerminalRegistry.spec.ts b/src/integrations/terminal/__tests__/TerminalRegistry.spec.ts index daade4b506..23c818013c 100644 --- a/src/integrations/terminal/__tests__/TerminalRegistry.spec.ts +++ b/src/integrations/terminal/__tests__/TerminalRegistry.spec.ts @@ -205,6 +205,88 @@ describe("TerminalRegistry", () => { }) }) + describe("onDidEndTerminalShellExecution race condition (#489, #622)", () => { + let endHandler: (e: any) => Promise + + beforeEach(() => { + // Reset the initialized flag so we can call initialize() in this block. + TerminalRegistry["isInitialized"] = false + + // The global vscode mock doesn't define shell execution event + // methods, so add them before spying. + ;(vscode.window as any).onDidStartTerminalShellExecution ??= () => ({ dispose: () => {} }) + ;(vscode.window as any).onDidEndTerminalShellExecution ??= () => ({ dispose: () => {} }) + + vi.spyOn(vscode.window, "onDidStartTerminalShellExecution" as any).mockImplementation((_handler: any) => ({ + dispose: vi.fn(), + })) + + vi.spyOn(vscode.window, "onDidEndTerminalShellExecution" as any).mockImplementation((handler: any) => { + endHandler = handler + return { dispose: vi.fn() } + }) + + TerminalRegistry.initialize() + }) + + afterEach(() => { + // Reset so other test blocks aren't affected. + TerminalRegistry["isInitialized"] = false + }) + + it("calls shellExecutionComplete when end event fires before running is set (race)", async () => { + const terminal = TerminalRegistry.createTerminal("/test/path", "vscode") as Terminal + const mockProcess = { + command: "echo hello", + emit: vi.fn(), + hasUnretrievedOutput: vi.fn().mockReturnValue(false), + } as any + terminal.process = mockProcess + + // Simulate the race: running is still false (setActiveStream hasn't + // been called yet), but the end event fires. + expect(terminal.running).toBe(false) + + const mockExecution = { commandLine: { value: "echo hello" } } + await endHandler({ + terminal: terminal.terminal, + execution: mockExecution, + exitCode: 0, + }) + + // shellExecutionComplete should have been called exactly once, emitting + // shell_execution_complete so TerminalProcess.run() unblocks. + expect(mockProcess.emit).toHaveBeenCalledWith( + "shell_execution_complete", + expect.objectContaining({ exitCode: 0 }), + ) + expect(mockProcess.emit).toHaveBeenCalledTimes(1) + + // Terminal should be back to idle state. + expect(terminal.busy).toBe(false) + expect(terminal.running).toBe(false) + }) + + it("sets busy=false without calling shellExecutionComplete when no process exists", async () => { + const terminal = TerminalRegistry.createTerminal("/test/path", "vscode") as Terminal + terminal.busy = true + terminal.process = undefined + const completeSpy = vi.spyOn(terminal, "shellExecutionComplete") + + expect(terminal.running).toBe(false) + + const mockExecution = { commandLine: { value: "echo hello" } } + await endHandler({ + terminal: terminal.terminal, + execution: mockExecution, + exitCode: 0, + }) + + expect(terminal.busy).toBe(false) + expect(completeSpy).not.toHaveBeenCalled() + }) + }) + describe("releaseTerminalsForTask", () => { it("aborts a busy terminal's running process and disassociates it from the task (#245)", () => { const terminal = TerminalRegistry.createTerminal("/test/path", "vscode")