From 4e929bcfbbe3e6d91b3afa048f3d6d8a8a1edc60 Mon Sep 17 00:00:00 2001 From: wanglu241 Date: Wed, 3 Jun 2026 23:27:07 +0800 Subject: [PATCH 1/6] fix(anthropic): forward --thinking to extended thinking API The Anthropic provider accepted `--thinking ` (and the matching `thinking` config field) without error, but never forwarded the value to the API. Requests went out with no `thinking` block, so users who set a reasoning level got no extended thinking on Claude Opus / Sonnet. Two changes: 1. Pass `requestOptions.reasoningEffort` from `generateTextWithModelId` into `completeAnthropicText`, mirroring how the OpenAI path uses it. 2. In `completeAnthropicText`, map the OpenAI effort enum to a pi-ai `ThinkingLevel` and forward it as `reasoning`. When the model came from `createSyntheticModel` (`reasoning: false` by default), opt the model into thinking so the pi-ai adapter does not silently drop the block. Verified end-to-end against a custom `ANTHROPIC_BASE_URL` (Claude Opus 4.7 via a proxy that exposes the synthetic-model code path): --thinking low -> budget_tokens: 2048 --thinking medium -> budget_tokens: 8192 --thinking high -> budget_tokens: 15360 --thinking xhigh -> budget_tokens: 15360 Before: `has_thinking: false` for every effort level. --- src/llm/generate-text.ts | 1 + src/llm/providers/anthropic.ts | 21 +++++++++++++++++++-- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/src/llm/generate-text.ts b/src/llm/generate-text.ts index 6691e70e..51118d04 100644 --- a/src/llm/generate-text.ts +++ b/src/llm/generate-text.ts @@ -250,6 +250,7 @@ export async function generateTextWithModelId({ context, temperature: effectiveTemperature, maxOutputTokens, + reasoningEffort: requestOptions?.reasoningEffort, signal: controller.signal, anthropicBaseUrlOverride, }); diff --git a/src/llm/providers/anthropic.ts b/src/llm/providers/anthropic.ts index 618597b9..39a9c6ff 100644 --- a/src/llm/providers/anthropic.ts +++ b/src/llm/providers/anthropic.ts @@ -1,11 +1,19 @@ -import type { Context } from "@earendil-works/pi-ai"; +import type { Context, ThinkingLevel } from "@earendil-works/pi-ai"; import { completeSimple } from "@earendil-works/pi-ai"; import type { Attachment } from "../attachments.js"; +import type { OpenAiReasoningEffort } from "../model-options.js"; import type { LlmTokenUsage } from "../types.js"; import { normalizeAnthropicUsage, normalizeTokenUsage } from "../usage.js"; import { resolveAnthropicModel } from "./models.js"; import { bytesToBase64, extractText, resolveBaseUrlOverride } from "./shared.js"; +function effortToThinkingLevel( + effort: OpenAiReasoningEffort | undefined, +): ThinkingLevel | undefined { + if (!effort || effort === "none") return undefined; + return effort; +} + function parseAnthropicErrorPayload( responseBody: string, ): { type: string; message: string } | null { @@ -57,6 +65,7 @@ export async function completeAnthropicText({ context, temperature, maxOutputTokens, + reasoningEffort, signal, anthropicBaseUrlOverride, }: { @@ -65,17 +74,25 @@ export async function completeAnthropicText({ context: Context; temperature?: number; maxOutputTokens?: number; + reasoningEffort?: OpenAiReasoningEffort; signal: AbortSignal; anthropicBaseUrlOverride?: string | null; }): Promise<{ text: string; usage: LlmTokenUsage | null }> { - const model = resolveAnthropicModel({ + const baseModel = resolveAnthropicModel({ modelId, context, anthropicBaseUrlOverride, }); + const reasoning = effortToThinkingLevel(reasoningEffort); + // Synthetic (unknown) models default to `reasoning: false`, which makes the + // pi-ai Anthropic adapter silently drop the thinking block. When the caller + // requested a reasoning level, opt the model into thinking so the parameter + // round-trips to the API. + const model = reasoning && !baseModel.reasoning ? { ...baseModel, reasoning: true } : baseModel; const result = await completeSimple(model, context, { ...(typeof temperature === "number" ? { temperature } : {}), ...(typeof maxOutputTokens === "number" ? { maxTokens: maxOutputTokens } : {}), + ...(reasoning ? { reasoning } : {}), apiKey, signal, }); From df787111c14179b284b0b565d3354c57377456a2 Mon Sep 17 00:00:00 2001 From: wanglu241 Date: Thu, 4 Jun 2026 00:07:56 +0800 Subject: [PATCH 2/6] fix(anthropic): cover streaming path; gate reasoning override on synthetic models only MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses the two P1 review findings on #233: 1. Streaming path was uncovered. `--stream auto` enables streaming in TTY, so `summarize` from a terminal would call `streamSimple` without the `reasoning` option and the thinking block would never be sent. Thread the same handling through `streamAnthropic`-style dispatch in `generate-text-stream.ts`. 2. Earlier override flipped `reasoning: true` on ANY model whose pi-ai metadata reported `reasoning: false`. That includes registered Claude 3 / 3.5 models where extended thinking is unsupported, so users with a global `thinking` setting would go from successful no-thinking requests to API rejections. Restrict the override to *synthetic* models (`tryGetModel` miss — typical for custom `ANTHROPIC_BASE_URL` proxies in front of newer Claude versions). Shared helper `prepareAnthropicReasoning` is extracted to keep both dispatch sites in sync. Tests cover the four invariants: - no effort -> model and reasoning untouched - `none` -> off (no reasoning forwarded) - registered supported model -> reasoning forwarded, model metadata preserved - registered unsupported model (`reasoning: false`) -> metadata NOT mutated - synthetic model -> `reasoning: true` flipped so pi-ai forwards thinking Verified end-to-end against a custom `ANTHROPIC_BASE_URL` (Claude Opus 4.7 via proxy) on both `--stream on` and `--stream off`; outbound body now carries `thinking: {type: "enabled", budget_tokens: 15360, display: "summarized"}` in both paths. --- src/llm/generate-text-stream.ts | 13 +++- src/llm/providers/anthropic.ts | 42 +++++++++--- tests/llm.anthropic-reasoning.test.ts | 92 +++++++++++++++++++++++++++ 3 files changed, 137 insertions(+), 10 deletions(-) create mode 100644 tests/llm.anthropic-reasoning.test.ts diff --git a/src/llm/generate-text-stream.ts b/src/llm/generate-text-stream.ts index 92cd8ec5..baa9402d 100644 --- a/src/llm/generate-text-stream.ts +++ b/src/llm/generate-text-stream.ts @@ -10,7 +10,10 @@ import { resolveOpenAiCompatibleClientConfigForProvider, supportsStreaming, } from "./provider-capabilities.js"; -import { normalizeAnthropicModelAccessError } from "./providers/anthropic.js"; +import { + normalizeAnthropicModelAccessError, + prepareAnthropicReasoning, +} from "./providers/anthropic.js"; import { resolveAnthropicModel, resolveGoogleModel, @@ -296,14 +299,20 @@ export async function streamTextWithContext({ if (parsed.provider === "anthropic") { const apiKey = apiKeys.anthropicApiKey; if (!apiKey) throw new Error("Missing ANTHROPIC_API_KEY for anthropic/... model"); - const model = resolveAnthropicModel({ + const baseModel = resolveAnthropicModel({ modelId: parsed.model, context, anthropicBaseUrlOverride, }); + const { model, reasoning } = prepareAnthropicReasoning({ + modelId: parsed.model, + baseModel, + reasoningEffort: requestOptions?.reasoningEffort, + }); const stream = streamSimple(model, context, { ...(typeof effectiveTemperature === "number" ? { temperature: effectiveTemperature } : {}), ...(typeof maxOutputTokens === "number" ? { maxTokens: maxOutputTokens } : {}), + ...(reasoning ? { reasoning } : {}), apiKey, signal: controller.signal, }); diff --git a/src/llm/providers/anthropic.ts b/src/llm/providers/anthropic.ts index 39a9c6ff..b91b9220 100644 --- a/src/llm/providers/anthropic.ts +++ b/src/llm/providers/anthropic.ts @@ -1,11 +1,12 @@ -import type { Context, ThinkingLevel } from "@earendil-works/pi-ai"; +import type { Context, Model, ThinkingLevel } from "@earendil-works/pi-ai"; +import type { Api } from "@earendil-works/pi-ai"; import { completeSimple } from "@earendil-works/pi-ai"; import type { Attachment } from "../attachments.js"; import type { OpenAiReasoningEffort } from "../model-options.js"; import type { LlmTokenUsage } from "../types.js"; import { normalizeAnthropicUsage, normalizeTokenUsage } from "../usage.js"; import { resolveAnthropicModel } from "./models.js"; -import { bytesToBase64, extractText, resolveBaseUrlOverride } from "./shared.js"; +import { bytesToBase64, extractText, resolveBaseUrlOverride, tryGetModel } from "./shared.js"; function effortToThinkingLevel( effort: OpenAiReasoningEffort | undefined, @@ -14,6 +15,36 @@ function effortToThinkingLevel( return effort; } +/** + * Decide the model and `reasoning` option to pass into the pi-ai Anthropic + * adapter. Shared by non-streaming and streaming text dispatch. + * + * pi-ai gates extended thinking on `model.reasoning`. For models present in + * the pi-ai registry, we trust that flag — flipping it for known unsupported + * Claude 3/3.5 models would turn previously successful no-thinking requests + * into API rejections. For synthetic models (`tryGetModel` miss — typically + * custom `ANTHROPIC_BASE_URL` proxies in front of newer Claude versions), + * `createSyntheticModel` hard-codes `reasoning: false`, so we opt them into + * thinking when the caller asked for an effort level. + */ +export function prepareAnthropicReasoning({ + modelId, + baseModel, + reasoningEffort, +}: { + modelId: string; + baseModel: Model; + reasoningEffort?: OpenAiReasoningEffort; +}): { model: Model; reasoning?: ThinkingLevel } { + const reasoning = effortToThinkingLevel(reasoningEffort); + if (!reasoning) return { model: baseModel }; + const isSynthetic = !tryGetModel("anthropic", modelId); + if (isSynthetic && !baseModel.reasoning) { + return { model: { ...baseModel, reasoning: true }, reasoning }; + } + return { model: baseModel, reasoning }; +} + function parseAnthropicErrorPayload( responseBody: string, ): { type: string; message: string } | null { @@ -83,12 +114,7 @@ export async function completeAnthropicText({ context, anthropicBaseUrlOverride, }); - const reasoning = effortToThinkingLevel(reasoningEffort); - // Synthetic (unknown) models default to `reasoning: false`, which makes the - // pi-ai Anthropic adapter silently drop the thinking block. When the caller - // requested a reasoning level, opt the model into thinking so the parameter - // round-trips to the API. - const model = reasoning && !baseModel.reasoning ? { ...baseModel, reasoning: true } : baseModel; + const { model, reasoning } = prepareAnthropicReasoning({ modelId, baseModel, reasoningEffort }); const result = await completeSimple(model, context, { ...(typeof temperature === "number" ? { temperature } : {}), ...(typeof maxOutputTokens === "number" ? { maxTokens: maxOutputTokens } : {}), diff --git a/tests/llm.anthropic-reasoning.test.ts b/tests/llm.anthropic-reasoning.test.ts new file mode 100644 index 00000000..30992829 --- /dev/null +++ b/tests/llm.anthropic-reasoning.test.ts @@ -0,0 +1,92 @@ +import type { Api, Model } from "@earendil-works/pi-ai"; +import { describe, expect, it } from "vitest"; +import { prepareAnthropicReasoning } from "../src/llm/providers/anthropic.js"; + +function makeBase( + modelId: string, + reasoning: boolean, + baseUrl = "https://api.anthropic.com", +): Model { + return { + id: modelId, + name: `anthropic/${modelId}`, + api: "anthropic-messages", + provider: "anthropic", + baseUrl, + reasoning, + input: ["text", "image"], + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + contextWindow: 200_000, + maxTokens: 32_000, + }; +} + +describe("prepareAnthropicReasoning", () => { + it("returns the base model untouched when no effort is requested", () => { + const baseModel = makeBase("claude-opus-4-5", true); + const result = prepareAnthropicReasoning({ modelId: "claude-opus-4-5", baseModel }); + expect(result.model).toBe(baseModel); + expect(result.reasoning).toBeUndefined(); + }); + + it("treats 'none' as off and does not forward reasoning", () => { + const baseModel = makeBase("claude-opus-4-5", true); + const result = prepareAnthropicReasoning({ + modelId: "claude-opus-4-5", + baseModel, + reasoningEffort: "none", + }); + expect(result.reasoning).toBeUndefined(); + expect(result.model).toBe(baseModel); + }); + + it("forwards reasoning for a supported registered model without mutating metadata", () => { + // Registered Anthropic model with reasoning support; resolveAnthropicModel + // returns it intact so we should not flip any flags. + const baseModel = makeBase("claude-opus-4-5", true); + const result = prepareAnthropicReasoning({ + modelId: "claude-opus-4-5", + baseModel, + reasoningEffort: "xhigh", + }); + expect(result.reasoning).toBe("xhigh"); + expect(result.model).toBe(baseModel); + expect(result.model.reasoning).toBe(true); + }); + + it("does NOT flip reasoning:false on registered unsupported models (Claude 3/3.5)", () => { + // pi-ai's registry marks Claude 3.5 Sonnet as reasoning: false because + // extended thinking is unsupported. We must preserve that flag so the SDK + // can keep dropping the thinking block instead of getting a 4xx from API. + const baseModel = makeBase("claude-3-5-sonnet-20241022", false); + const result = prepareAnthropicReasoning({ + modelId: "claude-3-5-sonnet-20241022", + baseModel, + reasoningEffort: "high", + }); + // We still forward `reasoning` to pi-ai — pi-ai's adapter is responsible + // for deciding what to do with it given the model's metadata. The key + // invariant is that we did not mutate the model's reasoning flag. + expect(result.model).toBe(baseModel); + expect(result.model.reasoning).toBe(false); + }); + + it("opts synthetic models into thinking so the request body carries thinking", () => { + // A custom modelId not in the pi-ai registry (e.g. `Claude-Opus-4.7` + // routed through a jdcloud-style proxy) is built via createSyntheticModel + // with reasoning: false. Without opting in, the pi-ai Anthropic adapter + // would silently drop the thinking block. + const baseModel = makeBase("Definitely-Not-A-Real-Claude-Model-Id-42", false, "https://proxy.example/anthropic"); + const result = prepareAnthropicReasoning({ + modelId: "Definitely-Not-A-Real-Claude-Model-Id-42", + baseModel, + reasoningEffort: "xhigh", + }); + expect(result.reasoning).toBe("xhigh"); + expect(result.model).not.toBe(baseModel); + expect(result.model.reasoning).toBe(true); + // Other model fields should be preserved. + expect(result.model.id).toBe(baseModel.id); + expect(result.model.baseUrl).toBe(baseModel.baseUrl); + }); +}); From ffe640d6f2b629809cf203f59c1ef3bc235c76a0 Mon Sep 17 00:00:00 2001 From: wanglu241 Date: Sat, 6 Jun 2026 15:13:50 +0800 Subject: [PATCH 3/6] fix(anthropic): drop reasoning for registered unsupported Claude models --- src/llm/providers/anthropic.ts | 27 ++++++++++++++++++--------- tests/llm.anthropic-reasoning.test.ts | 19 +++++++++++-------- 2 files changed, 29 insertions(+), 17 deletions(-) diff --git a/src/llm/providers/anthropic.ts b/src/llm/providers/anthropic.ts index b91b9220..6c647aac 100644 --- a/src/llm/providers/anthropic.ts +++ b/src/llm/providers/anthropic.ts @@ -19,13 +19,17 @@ function effortToThinkingLevel( * Decide the model and `reasoning` option to pass into the pi-ai Anthropic * adapter. Shared by non-streaming and streaming text dispatch. * - * pi-ai gates extended thinking on `model.reasoning`. For models present in - * the pi-ai registry, we trust that flag — flipping it for known unsupported - * Claude 3/3.5 models would turn previously successful no-thinking requests - * into API rejections. For synthetic models (`tryGetModel` miss — typically - * custom `ANTHROPIC_BASE_URL` proxies in front of newer Claude versions), - * `createSyntheticModel` hard-codes `reasoning: false`, so we opt them into - * thinking when the caller asked for an effort level. + * pi-ai 0.75.5 enables extended thinking whenever the caller passes a + * `reasoning` option, regardless of `model.reasoning`. So: + * + * - Registered models with `reasoning: true` (Claude 4+): forward `reasoning`. + * - Registered models with `reasoning: false` (Claude 3 / 3.5): drop + * `reasoning` entirely; forwarding it would have pi-ai send a `thinking` + * block to an API that rejects it. + * - Synthetic models (`tryGetModel` miss — typically custom + * `ANTHROPIC_BASE_URL` proxies in front of newer Claude versions): + * `createSyntheticModel` hard-codes `reasoning: false`, so we flip a copy + * to `reasoning: true` and forward the effort level. */ export function prepareAnthropicReasoning({ modelId, @@ -39,8 +43,13 @@ export function prepareAnthropicReasoning({ const reasoning = effortToThinkingLevel(reasoningEffort); if (!reasoning) return { model: baseModel }; const isSynthetic = !tryGetModel("anthropic", modelId); - if (isSynthetic && !baseModel.reasoning) { - return { model: { ...baseModel, reasoning: true }, reasoning }; + if (!baseModel.reasoning) { + if (isSynthetic) { + return { model: { ...baseModel, reasoning: true }, reasoning }; + } + // Registered but flagged unsupported (e.g. Claude 3/3.5): drop reasoning + // so pi-ai does not enable thinking on a model the API rejects it for. + return { model: baseModel }; } return { model: baseModel, reasoning }; } diff --git a/tests/llm.anthropic-reasoning.test.ts b/tests/llm.anthropic-reasoning.test.ts index 30992829..7ea71f3c 100644 --- a/tests/llm.anthropic-reasoning.test.ts +++ b/tests/llm.anthropic-reasoning.test.ts @@ -54,21 +54,20 @@ describe("prepareAnthropicReasoning", () => { expect(result.model.reasoning).toBe(true); }); - it("does NOT flip reasoning:false on registered unsupported models (Claude 3/3.5)", () => { - // pi-ai's registry marks Claude 3.5 Sonnet as reasoning: false because - // extended thinking is unsupported. We must preserve that flag so the SDK - // can keep dropping the thinking block instead of getting a 4xx from API. + it("drops reasoning on registered unsupported models (Claude 3/3.5) so pi-ai does not enable thinking", () => { + // pi-ai 0.75.5 enables extended thinking whenever `options.reasoning` is + // present, regardless of `model.reasoning`. For Claude 3/3.5 the API + // rejects thinking blocks, so we must drop the reasoning option entirely + // when the user has a global `thinking` setting active. const baseModel = makeBase("claude-3-5-sonnet-20241022", false); const result = prepareAnthropicReasoning({ modelId: "claude-3-5-sonnet-20241022", baseModel, reasoningEffort: "high", }); - // We still forward `reasoning` to pi-ai — pi-ai's adapter is responsible - // for deciding what to do with it given the model's metadata. The key - // invariant is that we did not mutate the model's reasoning flag. expect(result.model).toBe(baseModel); expect(result.model.reasoning).toBe(false); + expect(result.reasoning).toBeUndefined(); }); it("opts synthetic models into thinking so the request body carries thinking", () => { @@ -76,7 +75,11 @@ describe("prepareAnthropicReasoning", () => { // routed through a jdcloud-style proxy) is built via createSyntheticModel // with reasoning: false. Without opting in, the pi-ai Anthropic adapter // would silently drop the thinking block. - const baseModel = makeBase("Definitely-Not-A-Real-Claude-Model-Id-42", false, "https://proxy.example/anthropic"); + const baseModel = makeBase( + "Definitely-Not-A-Real-Claude-Model-Id-42", + false, + "https://proxy.example/anthropic", + ); const result = prepareAnthropicReasoning({ modelId: "Definitely-Not-A-Real-Claude-Model-Id-42", baseModel, From 760a03bc220cd315f06d7f728b7ebbf29499289f Mon Sep 17 00:00:00 2001 From: wanglu241 Date: Sat, 6 Jun 2026 15:41:21 +0800 Subject: [PATCH 4/6] fix(anthropic): do not inherit openai-scoped reasoning defaults --- src/llm/model-options.ts | 26 ++++++++++++++++++++ src/run/summary-engine.ts | 13 +++++----- tests/model-options.test.ts | 48 +++++++++++++++++++++++++++++++++++++ 3 files changed, 81 insertions(+), 6 deletions(-) diff --git a/src/llm/model-options.ts b/src/llm/model-options.ts index f34db280..23788b19 100644 --- a/src/llm/model-options.ts +++ b/src/llm/model-options.ts @@ -79,6 +79,32 @@ export function mergeModelRequestOptions( return Object.keys(merged).length > 0 ? merged : undefined; } +/** + * Merge request options for a specific provider. The OpenAI-scoped global + * default (`openaiRequestOptions`) and CLI override (`openaiRequestOptionsOverride`) + * are sourced from `openai.*` config and `--thinking`/`--fast`/`--service-tier` + * flags that documentation/CLI help describe as OpenAI-only. Those entries are + * only applied for the `openai` provider; for every other provider only the + * per-attempt options (set via the model config or a provider-prefixed CLI + * model id) flow through. + */ +export function mergeRequestOptionsForProvider({ + provider, + openaiGlobalDefault, + attemptOptions, + openaiOverride, +}: { + provider: string; + openaiGlobalDefault: ModelRequestOptionsInput | null | undefined; + attemptOptions: ModelRequestOptionsInput | null | undefined; + openaiOverride: ModelRequestOptionsInput | null | undefined; +}): ModelRequestOptions | undefined { + if (provider === "openai") { + return mergeModelRequestOptions(openaiGlobalDefault, attemptOptions, openaiOverride); + } + return mergeModelRequestOptions(attemptOptions); +} + export function toOpenAiServiceTierParam(serviceTier: string | undefined): string | undefined { const normalized = serviceTier?.trim(); if (!normalized) return undefined; diff --git a/src/run/summary-engine.ts b/src/run/summary-engine.ts index e35cac77..d82d5634 100644 --- a/src/run/summary-engine.ts +++ b/src/run/summary-engine.ts @@ -5,7 +5,7 @@ import { isCliDisabled, runCliModel } from "../llm/cli.js"; import { streamTextWithModelId } from "../llm/generate-text.js"; import { resolveGitHubModelsApiKey } from "../llm/github-models.js"; import { parseGatewayStyleModelId } from "../llm/model-id.js"; -import { mergeModelRequestOptions } from "../llm/model-options.js"; +import { mergeRequestOptionsForProvider } from "../llm/model-options.js"; import type { ModelRequestOptions } from "../llm/model-options.js"; import type { Prompt } from "../llm/prompt.js"; import { formatCompactCount } from "../tty/format.js"; @@ -322,11 +322,12 @@ export function createSummaryEngine(deps: SummaryEngineDeps) { ); } const parsedModelEffective = parseGatewayStyleModelId(modelResolution.modelId); - const requestOptions = mergeModelRequestOptions( - deps.openaiRequestOptions, - attempt.requestOptions, - deps.openaiRequestOptionsOverride, - ); + const requestOptions = mergeRequestOptionsForProvider({ + provider: parsedModelEffective.provider, + openaiGlobalDefault: deps.openaiRequestOptions, + attemptOptions: attempt.requestOptions, + openaiOverride: deps.openaiRequestOptionsOverride, + }); const streamingEnabledForCall = allowStreaming && deps.streamingEnabled && diff --git a/tests/model-options.test.ts b/tests/model-options.test.ts index 4f131091..ac8ae922 100644 --- a/tests/model-options.test.ts +++ b/tests/model-options.test.ts @@ -1,6 +1,7 @@ import { describe, expect, it } from "vitest"; import { mergeModelRequestOptions, + mergeRequestOptionsForProvider, parseOpenAiReasoningEffort, toOpenAiServiceTierParam, } from "../src/llm/model-options.js"; @@ -26,3 +27,50 @@ describe("model request options", () => { expect(() => parseOpenAiReasoningEffort("minimal")).toThrow(/expected none, low/); }); }); + +describe("mergeRequestOptionsForProvider", () => { + const openaiGlobalDefault = { reasoningEffort: "high" as const }; + const openaiOverride = { serviceTier: "fast" }; + + it("applies the OpenAI-scoped global default and override only for the openai provider", () => { + const merged = mergeRequestOptionsForProvider({ + provider: "openai", + openaiGlobalDefault, + attemptOptions: undefined, + openaiOverride, + }); + expect(merged).toEqual({ reasoningEffort: "high", serviceTier: "fast" }); + }); + + it("does not bleed the OpenAI-scoped global default into the anthropic provider", () => { + const merged = mergeRequestOptionsForProvider({ + provider: "anthropic", + openaiGlobalDefault, + attemptOptions: undefined, + openaiOverride, + }); + expect(merged).toBeUndefined(); + }); + + it("forwards the per-attempt reasoning effort to anthropic when the user opted in for that attempt", () => { + const merged = mergeRequestOptionsForProvider({ + provider: "anthropic", + openaiGlobalDefault, + attemptOptions: { reasoningEffort: "xhigh" }, + openaiOverride, + }); + expect(merged).toEqual({ reasoningEffort: "xhigh" }); + }); + + it("isolates other non-openai providers (zai, google, xai) from openai-scoped defaults", () => { + for (const provider of ["zai", "google", "xai", "nvidia", "ollama", "github-copilot"]) { + const merged = mergeRequestOptionsForProvider({ + provider, + openaiGlobalDefault, + attemptOptions: undefined, + openaiOverride, + }); + expect(merged, `provider ${provider}`).toBeUndefined(); + } + }); +}); From aead17db904a208a849ee68ff8381a7ed9cb09bb Mon Sep 17 00:00:00 2001 From: wanglu241 Date: Sat, 6 Jun 2026 16:29:11 +0800 Subject: [PATCH 5/6] fix(anthropic): preserve explicit CLI --thinking when scoping defaults --- src/daemon/chat.ts | 17 +++- src/llm/model-options.ts | 32 +++++-- src/run/flows/url/markdown.ts | 26 ++--- src/run/flows/url/types.ts | 3 +- src/run/run-config.ts | 14 ++- src/run/runner-plan.ts | 3 + src/run/summary-engine.ts | 4 +- tests/model-options.test.ts | 57 +++++++++++ tests/run.config.test.ts | 15 ++- tests/run.url-markdown-anthropic.test.ts | 117 +++++++++++++++++++++++ 10 files changed, 253 insertions(+), 35 deletions(-) create mode 100644 tests/run.url-markdown-anthropic.test.ts diff --git a/src/daemon/chat.ts b/src/daemon/chat.ts index 5e4c1512..505dd73a 100644 --- a/src/daemon/chat.ts +++ b/src/daemon/chat.ts @@ -4,7 +4,8 @@ import { runCliModel } from "../llm/cli.js"; import type { LlmApiKeys } from "../llm/generate-text.js"; import { streamTextWithContext } from "../llm/generate-text.js"; import { resolveGitHubModelsApiKey } from "../llm/github-models.js"; -import { mergeModelRequestOptions } from "../llm/model-options.js"; +import { parseGatewayStyleModelId } from "../llm/model-id.js"; +import { mergeModelRequestOptions, mergeRequestOptionsForProvider } from "../llm/model-options.js"; import { buildAutoModelAttempts, envHasKey } from "../model-auto.js"; import { parseBooleanEnv, parseCliUserModelId } from "../run/env.js"; import { resolveEnvState } from "../run/run-env.js"; @@ -257,7 +258,12 @@ export async function streamChatResponse({ forceOpenRouter: resolved.forceOpenRouter, openaiBaseUrlOverride: resolved.openaiBaseUrlOverride, forceChatCompletions: resolved.forceChatCompletions, - requestOptions: mergeModelRequestOptions(openaiRequestOptions, resolved.requestOptions), + requestOptions: mergeRequestOptionsForProvider({ + provider: parseGatewayStyleModelId(resolved.modelId!).provider, + openaiGlobalDefault: openaiRequestOptions, + attemptOptions: resolved.requestOptions, + openaiOverride: undefined, + }), }); for await (const chunk of result.textStream) { pushToSession({ event: "content", data: chunk }); @@ -332,7 +338,12 @@ export async function streamChatResponse({ : attempt.requiredEnv === "OPENAI_API_KEY" ? openaiUseChatCompletions : undefined, - requestOptions: mergeModelRequestOptions(openaiRequestOptions, attempt.requestOptions), + requestOptions: mergeRequestOptionsForProvider({ + provider: parseGatewayStyleModelId(attempt.llmModelId!).provider, + openaiGlobalDefault: openaiRequestOptions, + attemptOptions: attempt.requestOptions, + openaiOverride: undefined, + }), }); for await (const chunk of result.textStream) { pushToSession({ event: "content", data: chunk }); diff --git a/src/llm/model-options.ts b/src/llm/model-options.ts index 23788b19..489323ba 100644 --- a/src/llm/model-options.ts +++ b/src/llm/model-options.ts @@ -80,29 +80,43 @@ export function mergeModelRequestOptions( } /** - * Merge request options for a specific provider. The OpenAI-scoped global - * default (`openaiRequestOptions`) and CLI override (`openaiRequestOptionsOverride`) - * are sourced from `openai.*` config and `--thinking`/`--fast`/`--service-tier` - * flags that documentation/CLI help describe as OpenAI-only. Those entries are - * only applied for the `openai` provider; for every other provider only the - * per-attempt options (set via the model config or a provider-prefixed CLI - * model id) flow through. + * Merge request options for a specific provider. + * + * - `openaiGlobalDefault` comes from the persisted `openai.*` config block — a + * provider-scoped default that must NOT bleed into non-openai requests. + * - `openaiOverride` comes from `--fast` / `--service-tier`, which are + * documented as OpenAI-only knobs. Also only for the openai provider. + * - `cliReasoningEffortOverride` comes from the explicit `--thinking` CLI flag, + * which is cross-provider (the user opted in for this run). It is forwarded + * to whichever provider is dispatched. + * - `attemptOptions` is the per-attempt options bag (from the model config or + * provider-prefixed CLI id) and applies to every provider. */ export function mergeRequestOptionsForProvider({ provider, openaiGlobalDefault, attemptOptions, openaiOverride, + cliReasoningEffortOverride, }: { provider: string; openaiGlobalDefault: ModelRequestOptionsInput | null | undefined; attemptOptions: ModelRequestOptionsInput | null | undefined; openaiOverride: ModelRequestOptionsInput | null | undefined; + cliReasoningEffortOverride?: OpenAiReasoningEffort | undefined; }): ModelRequestOptions | undefined { + const cliReasoningEntry: ModelRequestOptionsInput | undefined = cliReasoningEffortOverride + ? { reasoningEffort: cliReasoningEffortOverride } + : undefined; if (provider === "openai") { - return mergeModelRequestOptions(openaiGlobalDefault, attemptOptions, openaiOverride); + return mergeModelRequestOptions( + openaiGlobalDefault, + attemptOptions, + openaiOverride, + cliReasoningEntry, + ); } - return mergeModelRequestOptions(attemptOptions); + return mergeModelRequestOptions(attemptOptions, cliReasoningEntry); } export function toOpenAiServiceTierParam(serviceTier: string | undefined): string | undefined { diff --git a/src/run/flows/url/markdown.ts b/src/run/flows/url/markdown.ts index d79f2e73..e0227f7f 100644 --- a/src/run/flows/url/markdown.ts +++ b/src/run/flows/url/markdown.ts @@ -1,7 +1,7 @@ import { resolveGitHubModelsApiKey } from "../../../llm/github-models.js"; import { createHtmlToMarkdownConverter } from "../../../llm/html-to-markdown.js"; import { parseGatewayStyleModelId } from "../../../llm/model-id.js"; -import { mergeModelRequestOptions } from "../../../llm/model-options.js"; +import { mergeRequestOptionsForProvider } from "../../../llm/model-options.js"; import { type ConvertTranscriptToMarkdown, createTranscriptToMarkdownConverter, @@ -257,11 +257,13 @@ export function createMarkdownConverters( forceChatCompletions: markdownModel.forceChatCompletions ?? (ctx.model.openaiUseChatCompletions && markdownProvider === "openai"), - requestOptions: mergeModelRequestOptions( - ctx.model.openaiRequestOptions, - markdownModel.requestOptions, - ctx.model.openaiRequestOptionsOverride, - ), + requestOptions: mergeRequestOptionsForProvider({ + provider: markdownProvider, + openaiGlobalDefault: ctx.model.openaiRequestOptions, + attemptOptions: markdownModel.requestOptions, + openaiOverride: ctx.model.openaiRequestOptionsOverride, + cliReasoningEffortOverride: ctx.model.cliReasoningEffortOverride, + }), fetchImpl: ctx.io.fetch, retries: ctx.flags.retries, onRetry: createRetryLogger({ @@ -363,11 +365,13 @@ export function createMarkdownConverters( forceChatCompletions: markdownModel.forceChatCompletions ?? (ctx.model.openaiUseChatCompletions && markdownProvider === "openai"), - requestOptions: mergeModelRequestOptions( - ctx.model.openaiRequestOptions, - markdownModel.requestOptions, - ctx.model.openaiRequestOptionsOverride, - ), + requestOptions: mergeRequestOptionsForProvider({ + provider: markdownProvider, + openaiGlobalDefault: ctx.model.openaiRequestOptions, + attemptOptions: markdownModel.requestOptions, + openaiOverride: ctx.model.openaiRequestOptionsOverride, + cliReasoningEffortOverride: ctx.model.cliReasoningEffortOverride, + }), fetchImpl: ctx.io.fetch, retries: ctx.flags.retries, onRetry: createRetryLogger({ diff --git a/src/run/flows/url/types.ts b/src/run/flows/url/types.ts index 9619293a..78d6cf63 100644 --- a/src/run/flows/url/types.ts +++ b/src/run/flows/url/types.ts @@ -8,7 +8,7 @@ import type { import type { LlmCall, RunMetricsReport } from "../../../costs.js"; import type { StreamMode } from "../../../flags.js"; import type { OutputLanguage } from "../../../language.js"; -import type { ModelRequestOptions } from "../../../llm/model-options.js"; +import type { ModelRequestOptions, OpenAiReasoningEffort } from "../../../llm/model-options.js"; import type { ExecFileFn } from "../../../markitdown.js"; import type { FixedModelSpec, RequestedModel } from "../../../model-spec.js"; import type { SummaryLength } from "../../../shared/contracts.js"; @@ -89,6 +89,7 @@ export type UrlFlowModel = { openaiUseChatCompletions: boolean | undefined; openaiRequestOptions?: ModelRequestOptions; openaiRequestOptionsOverride?: ModelRequestOptions; + cliReasoningEffortOverride?: OpenAiReasoningEffort; openaiWhisperUsdPerMinute: number; apiStatus: { xaiApiKey: string | null; diff --git a/src/run/run-config.ts b/src/run/run-config.ts index ba38e2a5..47a94a82 100644 --- a/src/run/run-config.ts +++ b/src/run/run-config.ts @@ -3,7 +3,7 @@ import { loadSummarizeConfig } from "../config.js"; import { parseVideoMode } from "../flags.js"; import { type OutputLanguage, parseOutputLanguage } from "../language.js"; import { parseOpenAiReasoningEffort, parseOpenAiServiceTier } from "../llm/model-options.js"; -import type { ModelRequestOptions } from "../llm/model-options.js"; +import type { ModelRequestOptions, OpenAiReasoningEffort } from "../llm/model-options.js"; import { parseBooleanEnv } from "./env.js"; export type ConfigState = { @@ -17,6 +17,7 @@ export type ConfigState = { openaiUseChatCompletions: boolean | undefined; openaiRequestOptions: ModelRequestOptions | undefined; openaiRequestOptionsOverride: ModelRequestOptions | undefined; + cliReasoningEffortOverride: OpenAiReasoningEffort | undefined; configModelLabel: string | null; }; @@ -110,13 +111,15 @@ export function resolveConfigState({ } options.serviceTier = serviceTier; } - const rawThinking = typeof programOpts.thinking === "string" ? programOpts.thinking : null; - if (rawThinking) { - options.reasoningEffort = parseOpenAiReasoningEffort(rawThinking, "--thinking"); - } return Object.keys(options).length > 0 ? options : undefined; })(); + const cliReasoningEffortOverride: OpenAiReasoningEffort | undefined = (() => { + const rawThinking = typeof programOpts.thinking === "string" ? programOpts.thinking : null; + if (!rawThinking) return undefined; + return parseOpenAiReasoningEffort(rawThinking, "--thinking"); + })(); + const configModelLabel = (() => { const model = config?.model; if (!model) return null; @@ -137,6 +140,7 @@ export function resolveConfigState({ openaiUseChatCompletions, openaiRequestOptions, openaiRequestOptionsOverride, + cliReasoningEffortOverride, configModelLabel, }; } diff --git a/src/run/runner-plan.ts b/src/run/runner-plan.ts index 4b72237a..0c096cdf 100644 --- a/src/run/runner-plan.ts +++ b/src/run/runner-plan.ts @@ -145,6 +145,7 @@ export async function createRunnerPlan(options: { openaiUseChatCompletions, openaiRequestOptions, openaiRequestOptionsOverride, + cliReasoningEffortOverride, configModelLabel, apiKey, openrouterApiKey, @@ -340,6 +341,7 @@ export async function createRunnerPlan(options: { openaiUseChatCompletions, openaiRequestOptions, openaiRequestOptionsOverride, + cliReasoningEffortOverride, cliConfigForRun: cliConfigForRun ?? null, cliAvailability, trackedFetch, @@ -452,6 +454,7 @@ export async function createRunnerPlan(options: { openaiUseChatCompletions, openaiRequestOptions, openaiRequestOptionsOverride, + cliReasoningEffortOverride, openaiWhisperUsdPerMinute, apiStatus: { xaiApiKey, diff --git a/src/run/summary-engine.ts b/src/run/summary-engine.ts index d82d5634..b4f407a9 100644 --- a/src/run/summary-engine.ts +++ b/src/run/summary-engine.ts @@ -6,7 +6,7 @@ import { streamTextWithModelId } from "../llm/generate-text.js"; import { resolveGitHubModelsApiKey } from "../llm/github-models.js"; import { parseGatewayStyleModelId } from "../llm/model-id.js"; import { mergeRequestOptionsForProvider } from "../llm/model-options.js"; -import type { ModelRequestOptions } from "../llm/model-options.js"; +import type { ModelRequestOptions, OpenAiReasoningEffort } from "../llm/model-options.js"; import type { Prompt } from "../llm/prompt.js"; import { formatCompactCount } from "../tty/format.js"; import { createRetryLogger, writeVerbose } from "./logging.js"; @@ -39,6 +39,7 @@ export type SummaryEngineDeps = { openaiUseChatCompletions: boolean | undefined; openaiRequestOptions?: ModelRequestOptions; openaiRequestOptionsOverride?: ModelRequestOptions; + cliReasoningEffortOverride?: OpenAiReasoningEffort; cliConfigForRun: Parameters[0]["config"]; cliAvailability: Partial>; trackedFetch: typeof fetch; @@ -327,6 +328,7 @@ export function createSummaryEngine(deps: SummaryEngineDeps) { openaiGlobalDefault: deps.openaiRequestOptions, attemptOptions: attempt.requestOptions, openaiOverride: deps.openaiRequestOptionsOverride, + cliReasoningEffortOverride: deps.cliReasoningEffortOverride, }); const streamingEnabledForCall = allowStreaming && diff --git a/tests/model-options.test.ts b/tests/model-options.test.ts index ac8ae922..05aa2b29 100644 --- a/tests/model-options.test.ts +++ b/tests/model-options.test.ts @@ -73,4 +73,61 @@ describe("mergeRequestOptionsForProvider", () => { expect(merged, `provider ${provider}`).toBeUndefined(); } }); + + it("forwards an explicit CLI --thinking override to the anthropic provider", () => { + const merged = mergeRequestOptionsForProvider({ + provider: "anthropic", + openaiGlobalDefault, + attemptOptions: undefined, + openaiOverride, + cliReasoningEffortOverride: "xhigh", + }); + expect(merged).toEqual({ reasoningEffort: "xhigh" }); + }); + + it("does not leak a persisted openai.thinking config default into anthropic without a CLI override", () => { + const merged = mergeRequestOptionsForProvider({ + provider: "anthropic", + openaiGlobalDefault: { reasoningEffort: "high" }, + attemptOptions: undefined, + openaiOverride: undefined, + cliReasoningEffortOverride: undefined, + }); + expect(merged).toBeUndefined(); + }); + + it("respects persisted openai.thinking for openai when no CLI override is set", () => { + const merged = mergeRequestOptionsForProvider({ + provider: "openai", + openaiGlobalDefault: { reasoningEffort: "high" }, + attemptOptions: undefined, + openaiOverride: undefined, + cliReasoningEffortOverride: undefined, + }); + expect(merged).toEqual({ reasoningEffort: "high" }); + }); + + it("lets a CLI --thinking override beat persisted openai.thinking for the openai provider", () => { + const merged = mergeRequestOptionsForProvider({ + provider: "openai", + openaiGlobalDefault: { reasoningEffort: "high" }, + attemptOptions: undefined, + openaiOverride: undefined, + cliReasoningEffortOverride: "xhigh", + }); + expect(merged).toEqual({ reasoningEffort: "xhigh" }); + }); + + it("forwards CLI --thinking to other non-openai providers (zai, google, xai, ...)", () => { + for (const provider of ["zai", "google", "xai", "nvidia", "ollama", "github-copilot"]) { + const merged = mergeRequestOptionsForProvider({ + provider, + openaiGlobalDefault, + attemptOptions: undefined, + openaiOverride, + cliReasoningEffortOverride: "xhigh", + }); + expect(merged, `provider ${provider}`).toEqual({ reasoningEffort: "xhigh" }); + } + }); }); diff --git a/tests/run.config.test.ts b/tests/run.config.test.ts index b296e275..bafa7d96 100644 --- a/tests/run.config.test.ts +++ b/tests/run.config.test.ts @@ -33,13 +33,12 @@ function resolveTestConfigStateWithEnv( } describe("run config", () => { - it("maps --fast and --thinking to OpenAI request overrides", () => { - expect( - resolveTestConfigState({ fast: true, thinking: "mid" }).openaiRequestOptionsOverride, - ).toEqual({ + it("maps --fast to OpenAI request overrides and --thinking to the cross-provider CLI override", () => { + const state = resolveTestConfigState({ fast: true, thinking: "mid" }); + expect(state.openaiRequestOptionsOverride).toEqual({ serviceTier: "fast", - reasoningEffort: "medium", }); + expect(state.cliReasoningEffortOverride).toBe("medium"); }); it("maps --service-tier to OpenAI request overrides", () => { @@ -72,4 +71,10 @@ describe("run config", () => { it("leaves openaiUseChatCompletions unset when there is no env or config override", () => { expect(resolveTestConfigState({}).openaiUseChatCompletions).toBeUndefined(); }); + + it("lifts --thinking out of the openai-scoped override entirely", () => { + const state = resolveTestConfigState({ thinking: "xhigh" }); + expect(state.openaiRequestOptionsOverride).toBeUndefined(); + expect(state.cliReasoningEffortOverride).toBe("xhigh"); + }); }); diff --git a/tests/run.url-markdown-anthropic.test.ts b/tests/run.url-markdown-anthropic.test.ts new file mode 100644 index 00000000..3cd3ab5a --- /dev/null +++ b/tests/run.url-markdown-anthropic.test.ts @@ -0,0 +1,117 @@ +import { Writable } from "node:stream"; +import { describe, expect, it, vi } from "vitest"; +import { parseRequestedModelId } from "../src/model-spec.js"; + +const mocks = vi.hoisted(() => ({ + createHtmlToMarkdownConverter: vi.fn(() => async () => "# Converted"), +})); + +vi.mock("../src/llm/html-to-markdown.js", () => ({ + createHtmlToMarkdownConverter: mocks.createHtmlToMarkdownConverter, +})); + +import { createMarkdownConverters } from "../src/run/flows/url/markdown.js"; +import type { UrlFlowContext } from "../src/run/flows/url/types.js"; + +function sink() { + return new Writable({ + write(_chunk, _encoding, callback) { + callback(); + }, + }); +} + +function buildCtx(opts: { + openaiRequestOptions?: { reasoningEffort?: "high" }; + openaiRequestOptionsOverride?: { serviceTier?: "fast" }; + cliReasoningEffortOverride?: "xhigh"; +}): UrlFlowContext { + const fixedModel = parseRequestedModelId("anthropic/claude-sonnet-4-5"); + if (fixedModel.kind !== "fixed" || fixedModel.transport !== "native") { + throw new Error("expected fixed native anthropic model"); + } + return { + io: { + env: {}, + envForRun: {}, + stdout: sink(), + stderr: sink(), + fetch: globalThis.fetch.bind(globalThis), + execFileImpl: vi.fn(), + }, + flags: { + format: "markdown", + markdownMode: "llm", + transcriptTimestamps: false, + preprocessMode: "off", + retries: 0, + verbose: false, + verboseColor: false, + }, + model: { + requestedModel: fixedModel, + fixedModelSpec: fixedModel, + apiStatus: { + xaiApiKey: null, + googleApiKey: null, + apiKey: null, + anthropicApiKey: "sk-test", + openrouterApiKey: null, + openrouterConfigured: false, + googleConfigured: false, + anthropicConfigured: true, + zaiApiKey: null, + zaiBaseUrl: "", + nvidiaApiKey: null, + nvidiaBaseUrl: "", + ollamaBaseUrl: "", + providerBaseUrls: { + openai: null, + anthropic: null, + google: null, + xai: null, + }, + }, + openaiUseChatCompletions: false, + openaiRequestOptions: opts.openaiRequestOptions, + openaiRequestOptionsOverride: opts.openaiRequestOptionsOverride, + cliReasoningEffortOverride: opts.cliReasoningEffortOverride, + llmCalls: [], + }, + } as unknown as UrlFlowContext; +} + +describe("URL markdown anthropic routing", () => { + it("scopes openai-only request options away from anthropic markdown calls", () => { + mocks.createHtmlToMarkdownConverter.mockClear(); + const ctx = buildCtx({ + openaiRequestOptions: { reasoningEffort: "high" }, + openaiRequestOptionsOverride: { serviceTier: "fast" }, + }); + + createMarkdownConverters(ctx, { isYoutubeUrl: false }); + + expect(mocks.createHtmlToMarkdownConverter).toHaveBeenCalledWith( + expect.objectContaining({ + modelId: expect.stringContaining("anthropic"), + requestOptions: undefined, + }), + ); + }); + + it("forwards an explicit CLI --thinking override to anthropic markdown calls", () => { + mocks.createHtmlToMarkdownConverter.mockClear(); + const ctx = buildCtx({ + openaiRequestOptions: { reasoningEffort: "high" }, + cliReasoningEffortOverride: "xhigh", + }); + + createMarkdownConverters(ctx, { isYoutubeUrl: false }); + + expect(mocks.createHtmlToMarkdownConverter).toHaveBeenCalledWith( + expect.objectContaining({ + requestOptions: { reasoningEffort: "xhigh" }, + }), + ); + }); +}); From eefcb7cb6edcdc8d6f2828e8cb15e796244119a7 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Thu, 11 Jun 2026 02:41:46 +0100 Subject: [PATCH 6/6] docs: note Anthropic thinking passthrough --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1acef696..bde6bb66 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,7 @@ - OpenAI-compatible gateways: honor `OPENAI_USE_CHAT_COMPLETIONS=false` and `openai.useChatCompletions=false` so custom base URLs can use the Responses API (#235, #236, thanks @mzbgf). - RSS transcripts: block feed-controlled transcript URLs that target loopback, private, link-local, reserved, or redirected local-network addresses (#239, thanks @Hinotoi-agent). - Podcast transcripts: cap remote media downloads at 512 MB by default, with a finite opt-in override for larger files (#237, thanks @Hinotoi-agent). +- Anthropic: forward explicit CLI `--thinking` to Anthropic text and streaming requests without leaking persisted OpenAI thinking defaults into non-OpenAI providers (#233, thanks @wangwllu). - Chrome extension: abort stale side-panel summary streams on tab changes so delayed output from a closed or replaced tab cannot render under the new page title. - Core: extract video IDs from YouTube `/live/` URLs so live and premiere links no longer abort summarization (#232, thanks @devYRPauli). - Chrome extension: keep YouTube slide cards on the shared slide-summary path so local browser thumbnails receive the same summary text shape as CLI `--slides`.