diff --git a/CHANGELOG.md b/CHANGELOG.md index 1acef696..bde6bb66 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,7 @@ - OpenAI-compatible gateways: honor `OPENAI_USE_CHAT_COMPLETIONS=false` and `openai.useChatCompletions=false` so custom base URLs can use the Responses API (#235, #236, thanks @mzbgf). - RSS transcripts: block feed-controlled transcript URLs that target loopback, private, link-local, reserved, or redirected local-network addresses (#239, thanks @Hinotoi-agent). - Podcast transcripts: cap remote media downloads at 512 MB by default, with a finite opt-in override for larger files (#237, thanks @Hinotoi-agent). +- Anthropic: forward explicit CLI `--thinking` to Anthropic text and streaming requests without leaking persisted OpenAI thinking defaults into non-OpenAI providers (#233, thanks @wangwllu). - Chrome extension: abort stale side-panel summary streams on tab changes so delayed output from a closed or replaced tab cannot render under the new page title. - Core: extract video IDs from YouTube `/live/` URLs so live and premiere links no longer abort summarization (#232, thanks @devYRPauli). - Chrome extension: keep YouTube slide cards on the shared slide-summary path so local browser thumbnails receive the same summary text shape as CLI `--slides`. diff --git a/src/daemon/chat.ts b/src/daemon/chat.ts index 5e4c1512..505dd73a 100644 --- a/src/daemon/chat.ts +++ b/src/daemon/chat.ts @@ -4,7 +4,8 @@ import { runCliModel } from "../llm/cli.js"; import type { LlmApiKeys } from "../llm/generate-text.js"; import { streamTextWithContext } from "../llm/generate-text.js"; import { resolveGitHubModelsApiKey } from "../llm/github-models.js"; -import { mergeModelRequestOptions } from "../llm/model-options.js"; +import { parseGatewayStyleModelId } from "../llm/model-id.js"; +import { mergeModelRequestOptions, mergeRequestOptionsForProvider } from "../llm/model-options.js"; import { buildAutoModelAttempts, envHasKey } from "../model-auto.js"; import { parseBooleanEnv, parseCliUserModelId } from "../run/env.js"; import { resolveEnvState } from "../run/run-env.js"; @@ -257,7 +258,12 @@ export async function streamChatResponse({ forceOpenRouter: resolved.forceOpenRouter, openaiBaseUrlOverride: resolved.openaiBaseUrlOverride, forceChatCompletions: resolved.forceChatCompletions, - requestOptions: mergeModelRequestOptions(openaiRequestOptions, resolved.requestOptions), + requestOptions: mergeRequestOptionsForProvider({ + provider: parseGatewayStyleModelId(resolved.modelId!).provider, + openaiGlobalDefault: openaiRequestOptions, + attemptOptions: resolved.requestOptions, + openaiOverride: undefined, + }), }); for await (const chunk of result.textStream) { pushToSession({ event: "content", data: chunk }); @@ -332,7 +338,12 @@ export async function streamChatResponse({ : attempt.requiredEnv === "OPENAI_API_KEY" ? openaiUseChatCompletions : undefined, - requestOptions: mergeModelRequestOptions(openaiRequestOptions, attempt.requestOptions), + requestOptions: mergeRequestOptionsForProvider({ + provider: parseGatewayStyleModelId(attempt.llmModelId!).provider, + openaiGlobalDefault: openaiRequestOptions, + attemptOptions: attempt.requestOptions, + openaiOverride: undefined, + }), }); for await (const chunk of result.textStream) { pushToSession({ event: "content", data: chunk }); diff --git a/src/llm/generate-text-stream.ts b/src/llm/generate-text-stream.ts index 92cd8ec5..baa9402d 100644 --- a/src/llm/generate-text-stream.ts +++ b/src/llm/generate-text-stream.ts @@ -10,7 +10,10 @@ import { resolveOpenAiCompatibleClientConfigForProvider, supportsStreaming, } from "./provider-capabilities.js"; -import { normalizeAnthropicModelAccessError } from "./providers/anthropic.js"; +import { + normalizeAnthropicModelAccessError, + prepareAnthropicReasoning, +} from "./providers/anthropic.js"; import { resolveAnthropicModel, resolveGoogleModel, @@ -296,14 +299,20 @@ export async function streamTextWithContext({ if (parsed.provider === "anthropic") { const apiKey = apiKeys.anthropicApiKey; if (!apiKey) throw new Error("Missing ANTHROPIC_API_KEY for anthropic/... model"); - const model = resolveAnthropicModel({ + const baseModel = resolveAnthropicModel({ modelId: parsed.model, context, anthropicBaseUrlOverride, }); + const { model, reasoning } = prepareAnthropicReasoning({ + modelId: parsed.model, + baseModel, + reasoningEffort: requestOptions?.reasoningEffort, + }); const stream = streamSimple(model, context, { ...(typeof effectiveTemperature === "number" ? { temperature: effectiveTemperature } : {}), ...(typeof maxOutputTokens === "number" ? { maxTokens: maxOutputTokens } : {}), + ...(reasoning ? { reasoning } : {}), apiKey, signal: controller.signal, }); diff --git a/src/llm/generate-text.ts b/src/llm/generate-text.ts index 6691e70e..51118d04 100644 --- a/src/llm/generate-text.ts +++ b/src/llm/generate-text.ts @@ -250,6 +250,7 @@ export async function generateTextWithModelId({ context, temperature: effectiveTemperature, maxOutputTokens, + reasoningEffort: requestOptions?.reasoningEffort, signal: controller.signal, anthropicBaseUrlOverride, }); diff --git a/src/llm/model-options.ts b/src/llm/model-options.ts index f34db280..489323ba 100644 --- a/src/llm/model-options.ts +++ b/src/llm/model-options.ts @@ -79,6 +79,46 @@ export function mergeModelRequestOptions( return Object.keys(merged).length > 0 ? merged : undefined; } +/** + * Merge request options for a specific provider. + * + * - `openaiGlobalDefault` comes from the persisted `openai.*` config block — a + * provider-scoped default that must NOT bleed into non-openai requests. + * - `openaiOverride` comes from `--fast` / `--service-tier`, which are + * documented as OpenAI-only knobs. Also only for the openai provider. + * - `cliReasoningEffortOverride` comes from the explicit `--thinking` CLI flag, + * which is cross-provider (the user opted in for this run). It is forwarded + * to whichever provider is dispatched. + * - `attemptOptions` is the per-attempt options bag (from the model config or + * provider-prefixed CLI id) and applies to every provider. + */ +export function mergeRequestOptionsForProvider({ + provider, + openaiGlobalDefault, + attemptOptions, + openaiOverride, + cliReasoningEffortOverride, +}: { + provider: string; + openaiGlobalDefault: ModelRequestOptionsInput | null | undefined; + attemptOptions: ModelRequestOptionsInput | null | undefined; + openaiOverride: ModelRequestOptionsInput | null | undefined; + cliReasoningEffortOverride?: OpenAiReasoningEffort | undefined; +}): ModelRequestOptions | undefined { + const cliReasoningEntry: ModelRequestOptionsInput | undefined = cliReasoningEffortOverride + ? { reasoningEffort: cliReasoningEffortOverride } + : undefined; + if (provider === "openai") { + return mergeModelRequestOptions( + openaiGlobalDefault, + attemptOptions, + openaiOverride, + cliReasoningEntry, + ); + } + return mergeModelRequestOptions(attemptOptions, cliReasoningEntry); +} + export function toOpenAiServiceTierParam(serviceTier: string | undefined): string | undefined { const normalized = serviceTier?.trim(); if (!normalized) return undefined; diff --git a/src/llm/providers/anthropic.ts b/src/llm/providers/anthropic.ts index 618597b9..6c647aac 100644 --- a/src/llm/providers/anthropic.ts +++ b/src/llm/providers/anthropic.ts @@ -1,10 +1,58 @@ -import type { Context } from "@earendil-works/pi-ai"; +import type { Context, Model, ThinkingLevel } from "@earendil-works/pi-ai"; +import type { Api } from "@earendil-works/pi-ai"; import { completeSimple } from "@earendil-works/pi-ai"; import type { Attachment } from "../attachments.js"; +import type { OpenAiReasoningEffort } from "../model-options.js"; import type { LlmTokenUsage } from "../types.js"; import { normalizeAnthropicUsage, normalizeTokenUsage } from "../usage.js"; import { resolveAnthropicModel } from "./models.js"; -import { bytesToBase64, extractText, resolveBaseUrlOverride } from "./shared.js"; +import { bytesToBase64, extractText, resolveBaseUrlOverride, tryGetModel } from "./shared.js"; + +function effortToThinkingLevel( + effort: OpenAiReasoningEffort | undefined, +): ThinkingLevel | undefined { + if (!effort || effort === "none") return undefined; + return effort; +} + +/** + * Decide the model and `reasoning` option to pass into the pi-ai Anthropic + * adapter. Shared by non-streaming and streaming text dispatch. + * + * pi-ai 0.75.5 enables extended thinking whenever the caller passes a + * `reasoning` option, regardless of `model.reasoning`. So: + * + * - Registered models with `reasoning: true` (Claude 4+): forward `reasoning`. + * - Registered models with `reasoning: false` (Claude 3 / 3.5): drop + * `reasoning` entirely; forwarding it would have pi-ai send a `thinking` + * block to an API that rejects it. + * - Synthetic models (`tryGetModel` miss — typically custom + * `ANTHROPIC_BASE_URL` proxies in front of newer Claude versions): + * `createSyntheticModel` hard-codes `reasoning: false`, so we flip a copy + * to `reasoning: true` and forward the effort level. + */ +export function prepareAnthropicReasoning({ + modelId, + baseModel, + reasoningEffort, +}: { + modelId: string; + baseModel: Model; + reasoningEffort?: OpenAiReasoningEffort; +}): { model: Model; reasoning?: ThinkingLevel } { + const reasoning = effortToThinkingLevel(reasoningEffort); + if (!reasoning) return { model: baseModel }; + const isSynthetic = !tryGetModel("anthropic", modelId); + if (!baseModel.reasoning) { + if (isSynthetic) { + return { model: { ...baseModel, reasoning: true }, reasoning }; + } + // Registered but flagged unsupported (e.g. Claude 3/3.5): drop reasoning + // so pi-ai does not enable thinking on a model the API rejects it for. + return { model: baseModel }; + } + return { model: baseModel, reasoning }; +} function parseAnthropicErrorPayload( responseBody: string, @@ -57,6 +105,7 @@ export async function completeAnthropicText({ context, temperature, maxOutputTokens, + reasoningEffort, signal, anthropicBaseUrlOverride, }: { @@ -65,17 +114,20 @@ export async function completeAnthropicText({ context: Context; temperature?: number; maxOutputTokens?: number; + reasoningEffort?: OpenAiReasoningEffort; signal: AbortSignal; anthropicBaseUrlOverride?: string | null; }): Promise<{ text: string; usage: LlmTokenUsage | null }> { - const model = resolveAnthropicModel({ + const baseModel = resolveAnthropicModel({ modelId, context, anthropicBaseUrlOverride, }); + const { model, reasoning } = prepareAnthropicReasoning({ modelId, baseModel, reasoningEffort }); const result = await completeSimple(model, context, { ...(typeof temperature === "number" ? { temperature } : {}), ...(typeof maxOutputTokens === "number" ? { maxTokens: maxOutputTokens } : {}), + ...(reasoning ? { reasoning } : {}), apiKey, signal, }); diff --git a/src/run/flows/url/markdown.ts b/src/run/flows/url/markdown.ts index d79f2e73..e0227f7f 100644 --- a/src/run/flows/url/markdown.ts +++ b/src/run/flows/url/markdown.ts @@ -1,7 +1,7 @@ import { resolveGitHubModelsApiKey } from "../../../llm/github-models.js"; import { createHtmlToMarkdownConverter } from "../../../llm/html-to-markdown.js"; import { parseGatewayStyleModelId } from "../../../llm/model-id.js"; -import { mergeModelRequestOptions } from "../../../llm/model-options.js"; +import { mergeRequestOptionsForProvider } from "../../../llm/model-options.js"; import { type ConvertTranscriptToMarkdown, createTranscriptToMarkdownConverter, @@ -257,11 +257,13 @@ export function createMarkdownConverters( forceChatCompletions: markdownModel.forceChatCompletions ?? (ctx.model.openaiUseChatCompletions && markdownProvider === "openai"), - requestOptions: mergeModelRequestOptions( - ctx.model.openaiRequestOptions, - markdownModel.requestOptions, - ctx.model.openaiRequestOptionsOverride, - ), + requestOptions: mergeRequestOptionsForProvider({ + provider: markdownProvider, + openaiGlobalDefault: ctx.model.openaiRequestOptions, + attemptOptions: markdownModel.requestOptions, + openaiOverride: ctx.model.openaiRequestOptionsOverride, + cliReasoningEffortOverride: ctx.model.cliReasoningEffortOverride, + }), fetchImpl: ctx.io.fetch, retries: ctx.flags.retries, onRetry: createRetryLogger({ @@ -363,11 +365,13 @@ export function createMarkdownConverters( forceChatCompletions: markdownModel.forceChatCompletions ?? (ctx.model.openaiUseChatCompletions && markdownProvider === "openai"), - requestOptions: mergeModelRequestOptions( - ctx.model.openaiRequestOptions, - markdownModel.requestOptions, - ctx.model.openaiRequestOptionsOverride, - ), + requestOptions: mergeRequestOptionsForProvider({ + provider: markdownProvider, + openaiGlobalDefault: ctx.model.openaiRequestOptions, + attemptOptions: markdownModel.requestOptions, + openaiOverride: ctx.model.openaiRequestOptionsOverride, + cliReasoningEffortOverride: ctx.model.cliReasoningEffortOverride, + }), fetchImpl: ctx.io.fetch, retries: ctx.flags.retries, onRetry: createRetryLogger({ diff --git a/src/run/flows/url/types.ts b/src/run/flows/url/types.ts index 9619293a..78d6cf63 100644 --- a/src/run/flows/url/types.ts +++ b/src/run/flows/url/types.ts @@ -8,7 +8,7 @@ import type { import type { LlmCall, RunMetricsReport } from "../../../costs.js"; import type { StreamMode } from "../../../flags.js"; import type { OutputLanguage } from "../../../language.js"; -import type { ModelRequestOptions } from "../../../llm/model-options.js"; +import type { ModelRequestOptions, OpenAiReasoningEffort } from "../../../llm/model-options.js"; import type { ExecFileFn } from "../../../markitdown.js"; import type { FixedModelSpec, RequestedModel } from "../../../model-spec.js"; import type { SummaryLength } from "../../../shared/contracts.js"; @@ -89,6 +89,7 @@ export type UrlFlowModel = { openaiUseChatCompletions: boolean | undefined; openaiRequestOptions?: ModelRequestOptions; openaiRequestOptionsOverride?: ModelRequestOptions; + cliReasoningEffortOverride?: OpenAiReasoningEffort; openaiWhisperUsdPerMinute: number; apiStatus: { xaiApiKey: string | null; diff --git a/src/run/run-config.ts b/src/run/run-config.ts index ba38e2a5..47a94a82 100644 --- a/src/run/run-config.ts +++ b/src/run/run-config.ts @@ -3,7 +3,7 @@ import { loadSummarizeConfig } from "../config.js"; import { parseVideoMode } from "../flags.js"; import { type OutputLanguage, parseOutputLanguage } from "../language.js"; import { parseOpenAiReasoningEffort, parseOpenAiServiceTier } from "../llm/model-options.js"; -import type { ModelRequestOptions } from "../llm/model-options.js"; +import type { ModelRequestOptions, OpenAiReasoningEffort } from "../llm/model-options.js"; import { parseBooleanEnv } from "./env.js"; export type ConfigState = { @@ -17,6 +17,7 @@ export type ConfigState = { openaiUseChatCompletions: boolean | undefined; openaiRequestOptions: ModelRequestOptions | undefined; openaiRequestOptionsOverride: ModelRequestOptions | undefined; + cliReasoningEffortOverride: OpenAiReasoningEffort | undefined; configModelLabel: string | null; }; @@ -110,13 +111,15 @@ export function resolveConfigState({ } options.serviceTier = serviceTier; } - const rawThinking = typeof programOpts.thinking === "string" ? programOpts.thinking : null; - if (rawThinking) { - options.reasoningEffort = parseOpenAiReasoningEffort(rawThinking, "--thinking"); - } return Object.keys(options).length > 0 ? options : undefined; })(); + const cliReasoningEffortOverride: OpenAiReasoningEffort | undefined = (() => { + const rawThinking = typeof programOpts.thinking === "string" ? programOpts.thinking : null; + if (!rawThinking) return undefined; + return parseOpenAiReasoningEffort(rawThinking, "--thinking"); + })(); + const configModelLabel = (() => { const model = config?.model; if (!model) return null; @@ -137,6 +140,7 @@ export function resolveConfigState({ openaiUseChatCompletions, openaiRequestOptions, openaiRequestOptionsOverride, + cliReasoningEffortOverride, configModelLabel, }; } diff --git a/src/run/runner-plan.ts b/src/run/runner-plan.ts index 4b72237a..0c096cdf 100644 --- a/src/run/runner-plan.ts +++ b/src/run/runner-plan.ts @@ -145,6 +145,7 @@ export async function createRunnerPlan(options: { openaiUseChatCompletions, openaiRequestOptions, openaiRequestOptionsOverride, + cliReasoningEffortOverride, configModelLabel, apiKey, openrouterApiKey, @@ -340,6 +341,7 @@ export async function createRunnerPlan(options: { openaiUseChatCompletions, openaiRequestOptions, openaiRequestOptionsOverride, + cliReasoningEffortOverride, cliConfigForRun: cliConfigForRun ?? null, cliAvailability, trackedFetch, @@ -452,6 +454,7 @@ export async function createRunnerPlan(options: { openaiUseChatCompletions, openaiRequestOptions, openaiRequestOptionsOverride, + cliReasoningEffortOverride, openaiWhisperUsdPerMinute, apiStatus: { xaiApiKey, diff --git a/src/run/summary-engine.ts b/src/run/summary-engine.ts index e35cac77..b4f407a9 100644 --- a/src/run/summary-engine.ts +++ b/src/run/summary-engine.ts @@ -5,8 +5,8 @@ import { isCliDisabled, runCliModel } from "../llm/cli.js"; import { streamTextWithModelId } from "../llm/generate-text.js"; import { resolveGitHubModelsApiKey } from "../llm/github-models.js"; import { parseGatewayStyleModelId } from "../llm/model-id.js"; -import { mergeModelRequestOptions } from "../llm/model-options.js"; -import type { ModelRequestOptions } from "../llm/model-options.js"; +import { mergeRequestOptionsForProvider } from "../llm/model-options.js"; +import type { ModelRequestOptions, OpenAiReasoningEffort } from "../llm/model-options.js"; import type { Prompt } from "../llm/prompt.js"; import { formatCompactCount } from "../tty/format.js"; import { createRetryLogger, writeVerbose } from "./logging.js"; @@ -39,6 +39,7 @@ export type SummaryEngineDeps = { openaiUseChatCompletions: boolean | undefined; openaiRequestOptions?: ModelRequestOptions; openaiRequestOptionsOverride?: ModelRequestOptions; + cliReasoningEffortOverride?: OpenAiReasoningEffort; cliConfigForRun: Parameters[0]["config"]; cliAvailability: Partial>; trackedFetch: typeof fetch; @@ -322,11 +323,13 @@ export function createSummaryEngine(deps: SummaryEngineDeps) { ); } const parsedModelEffective = parseGatewayStyleModelId(modelResolution.modelId); - const requestOptions = mergeModelRequestOptions( - deps.openaiRequestOptions, - attempt.requestOptions, - deps.openaiRequestOptionsOverride, - ); + const requestOptions = mergeRequestOptionsForProvider({ + provider: parsedModelEffective.provider, + openaiGlobalDefault: deps.openaiRequestOptions, + attemptOptions: attempt.requestOptions, + openaiOverride: deps.openaiRequestOptionsOverride, + cliReasoningEffortOverride: deps.cliReasoningEffortOverride, + }); const streamingEnabledForCall = allowStreaming && deps.streamingEnabled && diff --git a/tests/llm.anthropic-reasoning.test.ts b/tests/llm.anthropic-reasoning.test.ts new file mode 100644 index 00000000..7ea71f3c --- /dev/null +++ b/tests/llm.anthropic-reasoning.test.ts @@ -0,0 +1,95 @@ +import type { Api, Model } from "@earendil-works/pi-ai"; +import { describe, expect, it } from "vitest"; +import { prepareAnthropicReasoning } from "../src/llm/providers/anthropic.js"; + +function makeBase( + modelId: string, + reasoning: boolean, + baseUrl = "https://api.anthropic.com", +): Model { + return { + id: modelId, + name: `anthropic/${modelId}`, + api: "anthropic-messages", + provider: "anthropic", + baseUrl, + reasoning, + input: ["text", "image"], + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + contextWindow: 200_000, + maxTokens: 32_000, + }; +} + +describe("prepareAnthropicReasoning", () => { + it("returns the base model untouched when no effort is requested", () => { + const baseModel = makeBase("claude-opus-4-5", true); + const result = prepareAnthropicReasoning({ modelId: "claude-opus-4-5", baseModel }); + expect(result.model).toBe(baseModel); + expect(result.reasoning).toBeUndefined(); + }); + + it("treats 'none' as off and does not forward reasoning", () => { + const baseModel = makeBase("claude-opus-4-5", true); + const result = prepareAnthropicReasoning({ + modelId: "claude-opus-4-5", + baseModel, + reasoningEffort: "none", + }); + expect(result.reasoning).toBeUndefined(); + expect(result.model).toBe(baseModel); + }); + + it("forwards reasoning for a supported registered model without mutating metadata", () => { + // Registered Anthropic model with reasoning support; resolveAnthropicModel + // returns it intact so we should not flip any flags. + const baseModel = makeBase("claude-opus-4-5", true); + const result = prepareAnthropicReasoning({ + modelId: "claude-opus-4-5", + baseModel, + reasoningEffort: "xhigh", + }); + expect(result.reasoning).toBe("xhigh"); + expect(result.model).toBe(baseModel); + expect(result.model.reasoning).toBe(true); + }); + + it("drops reasoning on registered unsupported models (Claude 3/3.5) so pi-ai does not enable thinking", () => { + // pi-ai 0.75.5 enables extended thinking whenever `options.reasoning` is + // present, regardless of `model.reasoning`. For Claude 3/3.5 the API + // rejects thinking blocks, so we must drop the reasoning option entirely + // when the user has a global `thinking` setting active. + const baseModel = makeBase("claude-3-5-sonnet-20241022", false); + const result = prepareAnthropicReasoning({ + modelId: "claude-3-5-sonnet-20241022", + baseModel, + reasoningEffort: "high", + }); + expect(result.model).toBe(baseModel); + expect(result.model.reasoning).toBe(false); + expect(result.reasoning).toBeUndefined(); + }); + + it("opts synthetic models into thinking so the request body carries thinking", () => { + // A custom modelId not in the pi-ai registry (e.g. `Claude-Opus-4.7` + // routed through a jdcloud-style proxy) is built via createSyntheticModel + // with reasoning: false. Without opting in, the pi-ai Anthropic adapter + // would silently drop the thinking block. + const baseModel = makeBase( + "Definitely-Not-A-Real-Claude-Model-Id-42", + false, + "https://proxy.example/anthropic", + ); + const result = prepareAnthropicReasoning({ + modelId: "Definitely-Not-A-Real-Claude-Model-Id-42", + baseModel, + reasoningEffort: "xhigh", + }); + expect(result.reasoning).toBe("xhigh"); + expect(result.model).not.toBe(baseModel); + expect(result.model.reasoning).toBe(true); + // Other model fields should be preserved. + expect(result.model.id).toBe(baseModel.id); + expect(result.model.baseUrl).toBe(baseModel.baseUrl); + }); +}); diff --git a/tests/model-options.test.ts b/tests/model-options.test.ts index 4f131091..05aa2b29 100644 --- a/tests/model-options.test.ts +++ b/tests/model-options.test.ts @@ -1,6 +1,7 @@ import { describe, expect, it } from "vitest"; import { mergeModelRequestOptions, + mergeRequestOptionsForProvider, parseOpenAiReasoningEffort, toOpenAiServiceTierParam, } from "../src/llm/model-options.js"; @@ -26,3 +27,107 @@ describe("model request options", () => { expect(() => parseOpenAiReasoningEffort("minimal")).toThrow(/expected none, low/); }); }); + +describe("mergeRequestOptionsForProvider", () => { + const openaiGlobalDefault = { reasoningEffort: "high" as const }; + const openaiOverride = { serviceTier: "fast" }; + + it("applies the OpenAI-scoped global default and override only for the openai provider", () => { + const merged = mergeRequestOptionsForProvider({ + provider: "openai", + openaiGlobalDefault, + attemptOptions: undefined, + openaiOverride, + }); + expect(merged).toEqual({ reasoningEffort: "high", serviceTier: "fast" }); + }); + + it("does not bleed the OpenAI-scoped global default into the anthropic provider", () => { + const merged = mergeRequestOptionsForProvider({ + provider: "anthropic", + openaiGlobalDefault, + attemptOptions: undefined, + openaiOverride, + }); + expect(merged).toBeUndefined(); + }); + + it("forwards the per-attempt reasoning effort to anthropic when the user opted in for that attempt", () => { + const merged = mergeRequestOptionsForProvider({ + provider: "anthropic", + openaiGlobalDefault, + attemptOptions: { reasoningEffort: "xhigh" }, + openaiOverride, + }); + expect(merged).toEqual({ reasoningEffort: "xhigh" }); + }); + + it("isolates other non-openai providers (zai, google, xai) from openai-scoped defaults", () => { + for (const provider of ["zai", "google", "xai", "nvidia", "ollama", "github-copilot"]) { + const merged = mergeRequestOptionsForProvider({ + provider, + openaiGlobalDefault, + attemptOptions: undefined, + openaiOverride, + }); + expect(merged, `provider ${provider}`).toBeUndefined(); + } + }); + + it("forwards an explicit CLI --thinking override to the anthropic provider", () => { + const merged = mergeRequestOptionsForProvider({ + provider: "anthropic", + openaiGlobalDefault, + attemptOptions: undefined, + openaiOverride, + cliReasoningEffortOverride: "xhigh", + }); + expect(merged).toEqual({ reasoningEffort: "xhigh" }); + }); + + it("does not leak a persisted openai.thinking config default into anthropic without a CLI override", () => { + const merged = mergeRequestOptionsForProvider({ + provider: "anthropic", + openaiGlobalDefault: { reasoningEffort: "high" }, + attemptOptions: undefined, + openaiOverride: undefined, + cliReasoningEffortOverride: undefined, + }); + expect(merged).toBeUndefined(); + }); + + it("respects persisted openai.thinking for openai when no CLI override is set", () => { + const merged = mergeRequestOptionsForProvider({ + provider: "openai", + openaiGlobalDefault: { reasoningEffort: "high" }, + attemptOptions: undefined, + openaiOverride: undefined, + cliReasoningEffortOverride: undefined, + }); + expect(merged).toEqual({ reasoningEffort: "high" }); + }); + + it("lets a CLI --thinking override beat persisted openai.thinking for the openai provider", () => { + const merged = mergeRequestOptionsForProvider({ + provider: "openai", + openaiGlobalDefault: { reasoningEffort: "high" }, + attemptOptions: undefined, + openaiOverride: undefined, + cliReasoningEffortOverride: "xhigh", + }); + expect(merged).toEqual({ reasoningEffort: "xhigh" }); + }); + + it("forwards CLI --thinking to other non-openai providers (zai, google, xai, ...)", () => { + for (const provider of ["zai", "google", "xai", "nvidia", "ollama", "github-copilot"]) { + const merged = mergeRequestOptionsForProvider({ + provider, + openaiGlobalDefault, + attemptOptions: undefined, + openaiOverride, + cliReasoningEffortOverride: "xhigh", + }); + expect(merged, `provider ${provider}`).toEqual({ reasoningEffort: "xhigh" }); + } + }); +}); diff --git a/tests/run.config.test.ts b/tests/run.config.test.ts index b296e275..bafa7d96 100644 --- a/tests/run.config.test.ts +++ b/tests/run.config.test.ts @@ -33,13 +33,12 @@ function resolveTestConfigStateWithEnv( } describe("run config", () => { - it("maps --fast and --thinking to OpenAI request overrides", () => { - expect( - resolveTestConfigState({ fast: true, thinking: "mid" }).openaiRequestOptionsOverride, - ).toEqual({ + it("maps --fast to OpenAI request overrides and --thinking to the cross-provider CLI override", () => { + const state = resolveTestConfigState({ fast: true, thinking: "mid" }); + expect(state.openaiRequestOptionsOverride).toEqual({ serviceTier: "fast", - reasoningEffort: "medium", }); + expect(state.cliReasoningEffortOverride).toBe("medium"); }); it("maps --service-tier to OpenAI request overrides", () => { @@ -72,4 +71,10 @@ describe("run config", () => { it("leaves openaiUseChatCompletions unset when there is no env or config override", () => { expect(resolveTestConfigState({}).openaiUseChatCompletions).toBeUndefined(); }); + + it("lifts --thinking out of the openai-scoped override entirely", () => { + const state = resolveTestConfigState({ thinking: "xhigh" }); + expect(state.openaiRequestOptionsOverride).toBeUndefined(); + expect(state.cliReasoningEffortOverride).toBe("xhigh"); + }); }); diff --git a/tests/run.url-markdown-anthropic.test.ts b/tests/run.url-markdown-anthropic.test.ts new file mode 100644 index 00000000..3cd3ab5a --- /dev/null +++ b/tests/run.url-markdown-anthropic.test.ts @@ -0,0 +1,117 @@ +import { Writable } from "node:stream"; +import { describe, expect, it, vi } from "vitest"; +import { parseRequestedModelId } from "../src/model-spec.js"; + +const mocks = vi.hoisted(() => ({ + createHtmlToMarkdownConverter: vi.fn(() => async () => "# Converted"), +})); + +vi.mock("../src/llm/html-to-markdown.js", () => ({ + createHtmlToMarkdownConverter: mocks.createHtmlToMarkdownConverter, +})); + +import { createMarkdownConverters } from "../src/run/flows/url/markdown.js"; +import type { UrlFlowContext } from "../src/run/flows/url/types.js"; + +function sink() { + return new Writable({ + write(_chunk, _encoding, callback) { + callback(); + }, + }); +} + +function buildCtx(opts: { + openaiRequestOptions?: { reasoningEffort?: "high" }; + openaiRequestOptionsOverride?: { serviceTier?: "fast" }; + cliReasoningEffortOverride?: "xhigh"; +}): UrlFlowContext { + const fixedModel = parseRequestedModelId("anthropic/claude-sonnet-4-5"); + if (fixedModel.kind !== "fixed" || fixedModel.transport !== "native") { + throw new Error("expected fixed native anthropic model"); + } + return { + io: { + env: {}, + envForRun: {}, + stdout: sink(), + stderr: sink(), + fetch: globalThis.fetch.bind(globalThis), + execFileImpl: vi.fn(), + }, + flags: { + format: "markdown", + markdownMode: "llm", + transcriptTimestamps: false, + preprocessMode: "off", + retries: 0, + verbose: false, + verboseColor: false, + }, + model: { + requestedModel: fixedModel, + fixedModelSpec: fixedModel, + apiStatus: { + xaiApiKey: null, + googleApiKey: null, + apiKey: null, + anthropicApiKey: "sk-test", + openrouterApiKey: null, + openrouterConfigured: false, + googleConfigured: false, + anthropicConfigured: true, + zaiApiKey: null, + zaiBaseUrl: "", + nvidiaApiKey: null, + nvidiaBaseUrl: "", + ollamaBaseUrl: "", + providerBaseUrls: { + openai: null, + anthropic: null, + google: null, + xai: null, + }, + }, + openaiUseChatCompletions: false, + openaiRequestOptions: opts.openaiRequestOptions, + openaiRequestOptionsOverride: opts.openaiRequestOptionsOverride, + cliReasoningEffortOverride: opts.cliReasoningEffortOverride, + llmCalls: [], + }, + } as unknown as UrlFlowContext; +} + +describe("URL markdown anthropic routing", () => { + it("scopes openai-only request options away from anthropic markdown calls", () => { + mocks.createHtmlToMarkdownConverter.mockClear(); + const ctx = buildCtx({ + openaiRequestOptions: { reasoningEffort: "high" }, + openaiRequestOptionsOverride: { serviceTier: "fast" }, + }); + + createMarkdownConverters(ctx, { isYoutubeUrl: false }); + + expect(mocks.createHtmlToMarkdownConverter).toHaveBeenCalledWith( + expect.objectContaining({ + modelId: expect.stringContaining("anthropic"), + requestOptions: undefined, + }), + ); + }); + + it("forwards an explicit CLI --thinking override to anthropic markdown calls", () => { + mocks.createHtmlToMarkdownConverter.mockClear(); + const ctx = buildCtx({ + openaiRequestOptions: { reasoningEffort: "high" }, + cliReasoningEffortOverride: "xhigh", + }); + + createMarkdownConverters(ctx, { isYoutubeUrl: false }); + + expect(mocks.createHtmlToMarkdownConverter).toHaveBeenCalledWith( + expect.objectContaining({ + requestOptions: { reasoningEffort: "xhigh" }, + }), + ); + }); +});