Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
- OpenAI-compatible gateways: honor `OPENAI_USE_CHAT_COMPLETIONS=false` and `openai.useChatCompletions=false` so custom base URLs can use the Responses API (#235, #236, thanks @mzbgf).
- RSS transcripts: block feed-controlled transcript URLs that target loopback, private, link-local, reserved, or redirected local-network addresses (#239, thanks @Hinotoi-agent).
- Podcast transcripts: cap remote media downloads at 512 MB by default, with a finite opt-in override for larger files (#237, thanks @Hinotoi-agent).
- Anthropic: forward explicit CLI `--thinking` to Anthropic text and streaming requests without leaking persisted OpenAI thinking defaults into non-OpenAI providers (#233, thanks @wangwllu).
- Chrome extension: abort stale side-panel summary streams on tab changes so delayed output from a closed or replaced tab cannot render under the new page title.
- Core: extract video IDs from YouTube `/live/` URLs so live and premiere links no longer abort summarization (#232, thanks @devYRPauli).
- Chrome extension: keep YouTube slide cards on the shared slide-summary path so local browser thumbnails receive the same summary text shape as CLI `--slides`.
Expand Down
17 changes: 14 additions & 3 deletions src/daemon/chat.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@ import { runCliModel } from "../llm/cli.js";
import type { LlmApiKeys } from "../llm/generate-text.js";
import { streamTextWithContext } from "../llm/generate-text.js";
import { resolveGitHubModelsApiKey } from "../llm/github-models.js";
import { mergeModelRequestOptions } from "../llm/model-options.js";
import { parseGatewayStyleModelId } from "../llm/model-id.js";
import { mergeModelRequestOptions, mergeRequestOptionsForProvider } from "../llm/model-options.js";
import { buildAutoModelAttempts, envHasKey } from "../model-auto.js";
import { parseBooleanEnv, parseCliUserModelId } from "../run/env.js";
import { resolveEnvState } from "../run/run-env.js";
Expand Down Expand Up @@ -257,7 +258,12 @@ export async function streamChatResponse({
forceOpenRouter: resolved.forceOpenRouter,
openaiBaseUrlOverride: resolved.openaiBaseUrlOverride,
forceChatCompletions: resolved.forceChatCompletions,
requestOptions: mergeModelRequestOptions(openaiRequestOptions, resolved.requestOptions),
requestOptions: mergeRequestOptionsForProvider({
provider: parseGatewayStyleModelId(resolved.modelId!).provider,
openaiGlobalDefault: openaiRequestOptions,
attemptOptions: resolved.requestOptions,
openaiOverride: undefined,
}),
});
for await (const chunk of result.textStream) {
pushToSession({ event: "content", data: chunk });
Expand Down Expand Up @@ -332,7 +338,12 @@ export async function streamChatResponse({
: attempt.requiredEnv === "OPENAI_API_KEY"
? openaiUseChatCompletions
: undefined,
requestOptions: mergeModelRequestOptions(openaiRequestOptions, attempt.requestOptions),
requestOptions: mergeRequestOptionsForProvider({
provider: parseGatewayStyleModelId(attempt.llmModelId!).provider,
openaiGlobalDefault: openaiRequestOptions,
attemptOptions: attempt.requestOptions,
openaiOverride: undefined,
}),
});
for await (const chunk of result.textStream) {
pushToSession({ event: "content", data: chunk });
Expand Down
13 changes: 11 additions & 2 deletions src/llm/generate-text-stream.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,10 @@ import {
resolveOpenAiCompatibleClientConfigForProvider,
supportsStreaming,
} from "./provider-capabilities.js";
import { normalizeAnthropicModelAccessError } from "./providers/anthropic.js";
import {
normalizeAnthropicModelAccessError,
prepareAnthropicReasoning,
} from "./providers/anthropic.js";
import {
resolveAnthropicModel,
resolveGoogleModel,
Expand Down Expand Up @@ -296,14 +299,20 @@ export async function streamTextWithContext({
if (parsed.provider === "anthropic") {
const apiKey = apiKeys.anthropicApiKey;
if (!apiKey) throw new Error("Missing ANTHROPIC_API_KEY for anthropic/... model");
const model = resolveAnthropicModel({
const baseModel = resolveAnthropicModel({
modelId: parsed.model,
context,
anthropicBaseUrlOverride,
});
const { model, reasoning } = prepareAnthropicReasoning({
modelId: parsed.model,
baseModel,
reasoningEffort: requestOptions?.reasoningEffort,
});
const stream = streamSimple(model, context, {
...(typeof effectiveTemperature === "number" ? { temperature: effectiveTemperature } : {}),
...(typeof maxOutputTokens === "number" ? { maxTokens: maxOutputTokens } : {}),
...(reasoning ? { reasoning } : {}),
apiKey,
signal: controller.signal,
});
Expand Down
1 change: 1 addition & 0 deletions src/llm/generate-text.ts
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,7 @@ export async function generateTextWithModelId({
context,
temperature: effectiveTemperature,
maxOutputTokens,
reasoningEffort: requestOptions?.reasoningEffort,
signal: controller.signal,
anthropicBaseUrlOverride,
});
Expand Down
40 changes: 40 additions & 0 deletions src/llm/model-options.ts
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,46 @@ export function mergeModelRequestOptions(
return Object.keys(merged).length > 0 ? merged : undefined;
}

/**
* Merge request options for a specific provider.
*
* - `openaiGlobalDefault` comes from the persisted `openai.*` config block — a
* provider-scoped default that must NOT bleed into non-openai requests.
* - `openaiOverride` comes from `--fast` / `--service-tier`, which are
* documented as OpenAI-only knobs. Also only for the openai provider.
* - `cliReasoningEffortOverride` comes from the explicit `--thinking` CLI flag,
* which is cross-provider (the user opted in for this run). It is forwarded
* to whichever provider is dispatched.
* - `attemptOptions` is the per-attempt options bag (from the model config or
* provider-prefixed CLI id) and applies to every provider.
*/
export function mergeRequestOptionsForProvider({
provider,
openaiGlobalDefault,
attemptOptions,
openaiOverride,
cliReasoningEffortOverride,
}: {
provider: string;
openaiGlobalDefault: ModelRequestOptionsInput | null | undefined;
attemptOptions: ModelRequestOptionsInput | null | undefined;
openaiOverride: ModelRequestOptionsInput | null | undefined;
cliReasoningEffortOverride?: OpenAiReasoningEffort | undefined;
}): ModelRequestOptions | undefined {
const cliReasoningEntry: ModelRequestOptionsInput | undefined = cliReasoningEffortOverride
? { reasoningEffort: cliReasoningEffortOverride }
: undefined;
if (provider === "openai") {
return mergeModelRequestOptions(
openaiGlobalDefault,
attemptOptions,
openaiOverride,
cliReasoningEntry,
);
}
return mergeModelRequestOptions(attemptOptions, cliReasoningEntry);
}

export function toOpenAiServiceTierParam(serviceTier: string | undefined): string | undefined {
const normalized = serviceTier?.trim();
if (!normalized) return undefined;
Expand Down
58 changes: 55 additions & 3 deletions src/llm/providers/anthropic.ts
Original file line number Diff line number Diff line change
@@ -1,10 +1,58 @@
import type { Context } from "@earendil-works/pi-ai";
import type { Context, Model, ThinkingLevel } from "@earendil-works/pi-ai";
import type { Api } from "@earendil-works/pi-ai";
import { completeSimple } from "@earendil-works/pi-ai";
import type { Attachment } from "../attachments.js";
import type { OpenAiReasoningEffort } from "../model-options.js";
import type { LlmTokenUsage } from "../types.js";
import { normalizeAnthropicUsage, normalizeTokenUsage } from "../usage.js";
import { resolveAnthropicModel } from "./models.js";
import { bytesToBase64, extractText, resolveBaseUrlOverride } from "./shared.js";
import { bytesToBase64, extractText, resolveBaseUrlOverride, tryGetModel } from "./shared.js";

function effortToThinkingLevel(
effort: OpenAiReasoningEffort | undefined,
): ThinkingLevel | undefined {
if (!effort || effort === "none") return undefined;
return effort;
}

/**
* Decide the model and `reasoning` option to pass into the pi-ai Anthropic
* adapter. Shared by non-streaming and streaming text dispatch.
*
* pi-ai 0.75.5 enables extended thinking whenever the caller passes a
* `reasoning` option, regardless of `model.reasoning`. So:
*
* - Registered models with `reasoning: true` (Claude 4+): forward `reasoning`.
* - Registered models with `reasoning: false` (Claude 3 / 3.5): drop
* `reasoning` entirely; forwarding it would have pi-ai send a `thinking`
* block to an API that rejects it.
* - Synthetic models (`tryGetModel` miss — typically custom
* `ANTHROPIC_BASE_URL` proxies in front of newer Claude versions):
* `createSyntheticModel` hard-codes `reasoning: false`, so we flip a copy
* to `reasoning: true` and forward the effort level.
*/
export function prepareAnthropicReasoning({
modelId,
baseModel,
reasoningEffort,
}: {
modelId: string;
baseModel: Model<Api>;
reasoningEffort?: OpenAiReasoningEffort;
}): { model: Model<Api>; reasoning?: ThinkingLevel } {
const reasoning = effortToThinkingLevel(reasoningEffort);
if (!reasoning) return { model: baseModel };
const isSynthetic = !tryGetModel("anthropic", modelId);
if (!baseModel.reasoning) {
if (isSynthetic) {
return { model: { ...baseModel, reasoning: true }, reasoning };
}
// Registered but flagged unsupported (e.g. Claude 3/3.5): drop reasoning
// so pi-ai does not enable thinking on a model the API rejects it for.
return { model: baseModel };
}
return { model: baseModel, reasoning };
}

function parseAnthropicErrorPayload(
responseBody: string,
Expand Down Expand Up @@ -57,6 +105,7 @@ export async function completeAnthropicText({
context,
temperature,
maxOutputTokens,
reasoningEffort,
signal,
anthropicBaseUrlOverride,
}: {
Expand All @@ -65,17 +114,20 @@ export async function completeAnthropicText({
context: Context;
temperature?: number;
maxOutputTokens?: number;
reasoningEffort?: OpenAiReasoningEffort;
signal: AbortSignal;
anthropicBaseUrlOverride?: string | null;
}): Promise<{ text: string; usage: LlmTokenUsage | null }> {
const model = resolveAnthropicModel({
const baseModel = resolveAnthropicModel({
modelId,
context,
anthropicBaseUrlOverride,
});
const { model, reasoning } = prepareAnthropicReasoning({ modelId, baseModel, reasoningEffort });
const result = await completeSimple(model, context, {
...(typeof temperature === "number" ? { temperature } : {}),
...(typeof maxOutputTokens === "number" ? { maxTokens: maxOutputTokens } : {}),
...(reasoning ? { reasoning } : {}),
apiKey,
signal,
});
Expand Down
26 changes: 15 additions & 11 deletions src/run/flows/url/markdown.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import { resolveGitHubModelsApiKey } from "../../../llm/github-models.js";
import { createHtmlToMarkdownConverter } from "../../../llm/html-to-markdown.js";
import { parseGatewayStyleModelId } from "../../../llm/model-id.js";
import { mergeModelRequestOptions } from "../../../llm/model-options.js";
import { mergeRequestOptionsForProvider } from "../../../llm/model-options.js";
import {
type ConvertTranscriptToMarkdown,
createTranscriptToMarkdownConverter,
Expand Down Expand Up @@ -257,11 +257,13 @@ export function createMarkdownConverters(
forceChatCompletions:
markdownModel.forceChatCompletions ??
(ctx.model.openaiUseChatCompletions && markdownProvider === "openai"),
requestOptions: mergeModelRequestOptions(
ctx.model.openaiRequestOptions,
markdownModel.requestOptions,
ctx.model.openaiRequestOptionsOverride,
),
requestOptions: mergeRequestOptionsForProvider({
provider: markdownProvider,
openaiGlobalDefault: ctx.model.openaiRequestOptions,
attemptOptions: markdownModel.requestOptions,
openaiOverride: ctx.model.openaiRequestOptionsOverride,
cliReasoningEffortOverride: ctx.model.cliReasoningEffortOverride,
}),
fetchImpl: ctx.io.fetch,
retries: ctx.flags.retries,
onRetry: createRetryLogger({
Expand Down Expand Up @@ -363,11 +365,13 @@ export function createMarkdownConverters(
forceChatCompletions:
markdownModel.forceChatCompletions ??
(ctx.model.openaiUseChatCompletions && markdownProvider === "openai"),
requestOptions: mergeModelRequestOptions(
ctx.model.openaiRequestOptions,
markdownModel.requestOptions,
ctx.model.openaiRequestOptionsOverride,
),
requestOptions: mergeRequestOptionsForProvider({
provider: markdownProvider,
openaiGlobalDefault: ctx.model.openaiRequestOptions,
attemptOptions: markdownModel.requestOptions,
openaiOverride: ctx.model.openaiRequestOptionsOverride,
cliReasoningEffortOverride: ctx.model.cliReasoningEffortOverride,
}),
fetchImpl: ctx.io.fetch,
retries: ctx.flags.retries,
onRetry: createRetryLogger({
Expand Down
3 changes: 2 additions & 1 deletion src/run/flows/url/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ import type {
import type { LlmCall, RunMetricsReport } from "../../../costs.js";
import type { StreamMode } from "../../../flags.js";
import type { OutputLanguage } from "../../../language.js";
import type { ModelRequestOptions } from "../../../llm/model-options.js";
import type { ModelRequestOptions, OpenAiReasoningEffort } from "../../../llm/model-options.js";
import type { ExecFileFn } from "../../../markitdown.js";
import type { FixedModelSpec, RequestedModel } from "../../../model-spec.js";
import type { SummaryLength } from "../../../shared/contracts.js";
Expand Down Expand Up @@ -89,6 +89,7 @@ export type UrlFlowModel = {
openaiUseChatCompletions: boolean | undefined;
openaiRequestOptions?: ModelRequestOptions;
openaiRequestOptionsOverride?: ModelRequestOptions;
cliReasoningEffortOverride?: OpenAiReasoningEffort;
openaiWhisperUsdPerMinute: number;
apiStatus: {
xaiApiKey: string | null;
Expand Down
14 changes: 9 additions & 5 deletions src/run/run-config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ import { loadSummarizeConfig } from "../config.js";
import { parseVideoMode } from "../flags.js";
import { type OutputLanguage, parseOutputLanguage } from "../language.js";
import { parseOpenAiReasoningEffort, parseOpenAiServiceTier } from "../llm/model-options.js";
import type { ModelRequestOptions } from "../llm/model-options.js";
import type { ModelRequestOptions, OpenAiReasoningEffort } from "../llm/model-options.js";
import { parseBooleanEnv } from "./env.js";

export type ConfigState = {
Expand All @@ -17,6 +17,7 @@ export type ConfigState = {
openaiUseChatCompletions: boolean | undefined;
openaiRequestOptions: ModelRequestOptions | undefined;
openaiRequestOptionsOverride: ModelRequestOptions | undefined;
cliReasoningEffortOverride: OpenAiReasoningEffort | undefined;
configModelLabel: string | null;
};

Expand Down Expand Up @@ -110,13 +111,15 @@ export function resolveConfigState({
}
options.serviceTier = serviceTier;
}
const rawThinking = typeof programOpts.thinking === "string" ? programOpts.thinking : null;
if (rawThinking) {
options.reasoningEffort = parseOpenAiReasoningEffort(rawThinking, "--thinking");
}
return Object.keys(options).length > 0 ? options : undefined;
})();

const cliReasoningEffortOverride: OpenAiReasoningEffort | undefined = (() => {
const rawThinking = typeof programOpts.thinking === "string" ? programOpts.thinking : null;
if (!rawThinking) return undefined;
return parseOpenAiReasoningEffort(rawThinking, "--thinking");
})();

const configModelLabel = (() => {
const model = config?.model;
if (!model) return null;
Expand All @@ -137,6 +140,7 @@ export function resolveConfigState({
openaiUseChatCompletions,
openaiRequestOptions,
openaiRequestOptionsOverride,
cliReasoningEffortOverride,
configModelLabel,
};
}
3 changes: 3 additions & 0 deletions src/run/runner-plan.ts
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,7 @@ export async function createRunnerPlan(options: {
openaiUseChatCompletions,
openaiRequestOptions,
openaiRequestOptionsOverride,
cliReasoningEffortOverride,
configModelLabel,
apiKey,
openrouterApiKey,
Expand Down Expand Up @@ -340,6 +341,7 @@ export async function createRunnerPlan(options: {
openaiUseChatCompletions,
openaiRequestOptions,
openaiRequestOptionsOverride,
cliReasoningEffortOverride,
cliConfigForRun: cliConfigForRun ?? null,
cliAvailability,
trackedFetch,
Expand Down Expand Up @@ -452,6 +454,7 @@ export async function createRunnerPlan(options: {
openaiUseChatCompletions,
openaiRequestOptions,
openaiRequestOptionsOverride,
cliReasoningEffortOverride,
openaiWhisperUsdPerMinute,
apiStatus: {
xaiApiKey,
Expand Down
17 changes: 10 additions & 7 deletions src/run/summary-engine.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@ import { isCliDisabled, runCliModel } from "../llm/cli.js";
import { streamTextWithModelId } from "../llm/generate-text.js";
import { resolveGitHubModelsApiKey } from "../llm/github-models.js";
import { parseGatewayStyleModelId } from "../llm/model-id.js";
import { mergeModelRequestOptions } from "../llm/model-options.js";
import type { ModelRequestOptions } from "../llm/model-options.js";
import { mergeRequestOptionsForProvider } from "../llm/model-options.js";
import type { ModelRequestOptions, OpenAiReasoningEffort } from "../llm/model-options.js";
import type { Prompt } from "../llm/prompt.js";
import { formatCompactCount } from "../tty/format.js";
import { createRetryLogger, writeVerbose } from "./logging.js";
Expand Down Expand Up @@ -39,6 +39,7 @@ export type SummaryEngineDeps = {
openaiUseChatCompletions: boolean | undefined;
openaiRequestOptions?: ModelRequestOptions;
openaiRequestOptionsOverride?: ModelRequestOptions;
cliReasoningEffortOverride?: OpenAiReasoningEffort;
cliConfigForRun: Parameters<typeof runCliModel>[0]["config"];
cliAvailability: Partial<Record<CliProvider, boolean>>;
trackedFetch: typeof fetch;
Expand Down Expand Up @@ -322,11 +323,13 @@ export function createSummaryEngine(deps: SummaryEngineDeps) {
);
}
const parsedModelEffective = parseGatewayStyleModelId(modelResolution.modelId);
const requestOptions = mergeModelRequestOptions(
deps.openaiRequestOptions,
attempt.requestOptions,
deps.openaiRequestOptionsOverride,
);
const requestOptions = mergeRequestOptionsForProvider({
provider: parsedModelEffective.provider,
openaiGlobalDefault: deps.openaiRequestOptions,
attemptOptions: attempt.requestOptions,
openaiOverride: deps.openaiRequestOptionsOverride,
cliReasoningEffortOverride: deps.cliReasoningEffortOverride,
});
const streamingEnabledForCall =
allowStreaming &&
deps.streamingEnabled &&
Expand Down
Loading