Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
- CLI: pass Codex image attachments to `codex exec` so local image summaries no longer fail before starting (#242, #243, thanks @alfozan).
- OpenAI-compatible gateways: honor `OPENAI_USE_CHAT_COMPLETIONS=false` and `openai.useChatCompletions=false` so custom base URLs can use the Responses API (#235, #236, thanks @mzbgf).
- RSS transcripts: block feed-controlled transcript URLs that target loopback, private, link-local, reserved, or redirected local-network addresses (#239, thanks @Hinotoi-agent).
- Podcast transcripts: cap remote media downloads at 512 MB by default, with a finite opt-in override for larger files (#237, thanks @Hinotoi-agent).
- Chrome extension: abort stale side-panel summary streams on tab changes so delayed output from a closed or replaced tab cannot render under the new page title.
- Core: extract video IDs from YouTube `/live/` URLs so live and premiere links no longer abort summarization (#232, thanks @devYRPauli).
- Chrome extension: keep YouTube slide cards on the shared slide-summary path so local browser thumbnails receive the same summary text shape as CLI `--slides`.
Expand Down
3 changes: 2 additions & 1 deletion docs/media.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@ read_when:
- YouTube still uses the YouTube transcript pipeline (captions → yt-dlp fallback).
- X/Twitter status URLs with detected video auto-switch to transcript-first (yt-dlp), even in auto mode.
- X broadcasts (`/i/broadcasts/...`) are treated as media-only and go transcript-first by default.
- Local media files are capped at 2 GB; remote media URLs are best-effort via yt-dlp (no explicit size limit).
- Local media files are capped at 2 GB. Remote podcast/media transcription downloads are capped at 512 MB by default and fail closed with `Remote media too large` even when the server omits or under-reports `Content-Length`; other remote media URLs are best-effort via yt-dlp.
- Operators who accept the disk/DoS tradeoff for larger remote podcast/media files can opt in with `SUMMARIZE_REMOTE_MEDIA_MAX_BYTES=<positive integer byte limit>`. The override must be a finite positive integer byte count; fractional, sub-byte, or otherwise invalid values are ignored and the default 512 MB cap remains in effect.
- Remote transcription providers: `ASSEMBLYAI_API_KEY`, `GEMINI_API_KEY` / `GOOGLE_GENERATIVE_AI_API_KEY` / `GOOGLE_API_KEY`, `OPENAI_API_KEY`, `FAL_KEY` (plus `GROQ_API_KEY` before local/remote fallback).
- Gemini uses the Files API automatically for larger uploads.

Expand Down
2 changes: 2 additions & 0 deletions packages/core/src/content/transcript/providers/podcast.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import {
filenameFromUrl,
formatBytes,
normalizeHeaderType,
parseContentRangeTotal,
parseContentLength,
probeRemoteMedia,
type TranscribeRequest,
Expand Down Expand Up @@ -131,6 +132,7 @@ export const __test__ = {
downloadCappedBytes,
downloadToFile,
normalizeHeaderType,
parseContentRangeTotal,
parseContentLength,
filenameFromUrl,
looksLikeBlockedHtml,
Expand Down
153 changes: 127 additions & 26 deletions packages/core/src/content/transcript/providers/podcast/media.ts
Original file line number Diff line number Diff line change
Expand Up @@ -73,17 +73,16 @@ export async function transcribeMediaUrl({
falApiKey,
});
const effectiveEnv = effectiveTranscription.env ?? process.env;
const remoteMediaMaxBytes = effectiveTranscription.remoteMediaMaxBytes ?? MAX_REMOTE_MEDIA_BYTES;
const startInfo = await resolveTranscriptionStartInfo({
transcription: effectiveTranscription,
});
const providerHint = startInfo.providerHint;
const modelId = startInfo.modelId;

const head = await probeRemoteMedia(fetchImpl, url);
if (head.contentLength !== null && head.contentLength > MAX_REMOTE_MEDIA_BYTES) {
throw new Error(
`Remote media too large (${formatBytes(head.contentLength)}). Limit is ${formatBytes(MAX_REMOTE_MEDIA_BYTES)}.`,
);
if (head.contentLength !== null && head.contentLength > remoteMediaMaxBytes) {
throw remoteMediaTooLargeError(head.contentLength, remoteMediaMaxBytes);
}

const mediaType = head.mediaType ?? "application/octet-stream";
Expand All @@ -99,8 +98,7 @@ export async function transcribeMediaUrl({
totalBytes,
});
if (!canChunk) {
const bytes = await downloadCappedBytes(fetchImpl, url, MAX_OPENAI_UPLOAD_BYTES, {
totalBytes,
const bytes = await downloadCappedMediaBytes(fetchImpl, url, remoteMediaMaxBytes, totalBytes, {
onProgress: (downloadedBytes) =>
progress?.onProgress?.({
kind: "transcript-media-download-progress",
Expand Down Expand Up @@ -157,8 +155,7 @@ export async function transcribeMediaUrl({
}

if (head.contentLength !== null && head.contentLength <= MAX_OPENAI_UPLOAD_BYTES) {
const bytes = await downloadCappedBytes(fetchImpl, url, MAX_OPENAI_UPLOAD_BYTES, {
totalBytes,
const bytes = await downloadCappedMediaBytes(fetchImpl, url, remoteMediaMaxBytes, totalBytes, {
onProgress: (downloadedBytes) =>
progress?.onProgress?.({
kind: "transcript-media-download-progress",
Expand Down Expand Up @@ -216,6 +213,7 @@ export async function transcribeMediaUrl({
const tmpFile = join(tmpdir(), `summarize-podcast-${randomUUID()}.bin`);
try {
const downloadedBytes = await downloadToFile(fetchImpl, url, tmpFile, {
maxBytes: remoteMediaMaxBytes,
totalBytes,
onProgress: (nextDownloadedBytes) =>
progress?.onProgress?.({
Expand Down Expand Up @@ -301,47 +299,92 @@ export async function downloadCappedBytes(
fetchImpl: typeof fetch,
url: string,
maxBytes: number,
options?: { totalBytes: number | null; onProgress?: ((downloadedBytes: number) => void) | null },
options?: {
rejectAboveBytes?: number;
totalBytes: number | null;
onProgress?: ((downloadedBytes: number) => void) | null;
} | null,
): Promise<Uint8Array> {
const rejectAboveBytes = options?.rejectAboveBytes ?? null;
const retainBytes = Math.min(maxBytes, rejectAboveBytes ?? maxBytes);
const res = await fetchImpl(url, {
redirect: "follow",
headers: { Range: `bytes=0-${maxBytes - 1}` },
headers: { Range: `bytes=0-${retainBytes - 1}` },
signal: AbortSignal.timeout(TRANSCRIPTION_TIMEOUT_MS),
});
if (!res.ok) {
throw new Error(`Download failed (${res.status})`);
}
const contentRange = parseContentRange(res.headers.get("content-range"));
const contentRangeTotal = contentRange?.total ?? null;
const contentLength =
res.status === 206 ? null : parseContentLength(res.headers.get("content-length"));
const getBoundedTotalBytes = contentRangeTotal ?? contentLength ?? null;
const declaredTotalBytes = options?.totalBytes ?? null;
const boundedTotalBytes = getBoundedTotalBytes ?? declaredTotalBytes;
if (
rejectAboveBytes !== null &&
boundedTotalBytes !== null &&
boundedTotalBytes > rejectAboveBytes
) {
throw remoteMediaTooLargeError(boundedTotalBytes, rejectAboveBytes);
}
const declaredBodyBytes =
res.status === 206 && contentRange !== null ? contentRange.end - contentRange.start + 1 : null;
const verifyOverflowByReading =
rejectAboveBytes !== null &&
(boundedTotalBytes === null ||
(declaredBodyBytes !== null && declaredBodyBytes <= retainBytes) ||
(contentLength !== null && contentLength <= retainBytes) ||
(getBoundedTotalBytes === null &&
declaredTotalBytes !== null &&
declaredTotalBytes <= retainBytes) ||
(rejectAboveBytes <= maxBytes && boundedTotalBytes <= rejectAboveBytes));
const body = res.body;
if (!body) {
const arrayBuffer = await res.arrayBuffer();
return new Uint8Array(arrayBuffer.slice(0, maxBytes));
if (verifyOverflowByReading && arrayBuffer.byteLength > rejectAboveBytes) {
throw remoteMediaTooLargeError(arrayBuffer.byteLength, rejectAboveBytes);
}
return new Uint8Array(arrayBuffer.slice(0, retainBytes));
}

const reader = body.getReader();
const chunks: Uint8Array[] = [];
let total = 0;
let retained = 0;
let totalRead = 0;
let lastReported = 0;
try {
while (total < maxBytes) {
while (retained < retainBytes || verifyOverflowByReading) {
const { value, done } = await reader.read();
if (done) break;
if (!value) continue;
const remaining = maxBytes - total;
const next = value.byteLength > remaining ? value.slice(0, remaining) : value;
chunks.push(next);
total += next.byteLength;
if (total - lastReported >= 64 * 1024) {
lastReported = total;
options?.onProgress?.(total);
if (!value || value.byteLength === 0) continue;
const nextTotalRead = totalRead + value.byteLength;
if (declaredBodyBytes !== null && nextTotalRead > declaredBodyBytes) {
throw new Error("Download failed (range response exceeded declared length)");
}
if (verifyOverflowByReading && nextTotalRead > rejectAboveBytes) {
throw remoteMediaTooLargeError(nextTotalRead, rejectAboveBytes);
}
if (retained < retainBytes) {
const remaining = retainBytes - retained;
const next = value.byteLength > remaining ? value.slice(0, remaining) : value;
chunks.push(next);
retained += next.byteLength;
if (retained - lastReported >= 64 * 1024) {
lastReported = retained;
options?.onProgress?.(retained);
}
}
if (total >= maxBytes) break;
totalRead = nextTotalRead;
if (retained >= retainBytes && !verifyOverflowByReading) break;
}
} finally {
await reader.cancel().catch(() => {});
}
options?.onProgress?.(total);
options?.onProgress?.(retained);

const out = new Uint8Array(total);
const out = new Uint8Array(retained);
let offset = 0;
for (const chunk of chunks) {
out.set(chunk, offset);
Expand All @@ -350,11 +393,29 @@ export async function downloadCappedBytes(
return out;
}

async function downloadCappedMediaBytes(
fetchImpl: typeof fetch,
url: string,
remoteMediaMaxBytes: number,
totalBytes: number | null,
options?: { onProgress?: ((downloadedBytes: number) => void) | null },
): Promise<Uint8Array> {
return await downloadCappedBytes(fetchImpl, url, MAX_OPENAI_UPLOAD_BYTES, {
rejectAboveBytes: remoteMediaMaxBytes,
totalBytes,
onProgress: options?.onProgress,
});
}

export async function downloadToFile(
fetchImpl: typeof fetch,
url: string,
filePath: string,
options?: { totalBytes: number | null; onProgress?: ((downloadedBytes: number) => void) | null },
options?: {
maxBytes?: number;
totalBytes: number | null;
onProgress?: ((downloadedBytes: number) => void) | null;
},
): Promise<number> {
const res = await fetchImpl(url, {
redirect: "follow",
Expand All @@ -363,9 +424,13 @@ export async function downloadToFile(
if (!res.ok) {
throw new Error(`Download failed (${res.status})`);
}
const maxBytes = options?.maxBytes ?? Number.POSITIVE_INFINITY;
const body = res.body;
if (!body) {
const bytes = new Uint8Array(await res.arrayBuffer());
if (bytes.byteLength > maxBytes) {
throw remoteMediaTooLargeError(bytes.byteLength, maxBytes);
}
await fs.writeFile(filePath, bytes);
options?.onProgress?.(bytes.byteLength);
return bytes.byteLength;
Expand All @@ -381,8 +446,12 @@ export async function downloadToFile(
const { value, done } = await reader.read();
if (done) break;
if (!value) continue;
const nextDownloadedBytes = downloadedBytes + value.byteLength;
if (nextDownloadedBytes > maxBytes) {
throw remoteMediaTooLargeError(nextDownloadedBytes, maxBytes);
}
await handle.write(value);
downloadedBytes += value.byteLength;
downloadedBytes = nextDownloadedBytes;
if (downloadedBytes - lastReported >= 128 * 1024) {
lastReported = downloadedBytes;
options?.onProgress?.(downloadedBytes);
Expand All @@ -398,6 +467,12 @@ export async function downloadToFile(
return downloadedBytes;
}

function remoteMediaTooLargeError(bytes: number, maxBytes: number): Error {
return new Error(
`Remote media too large (${formatBytes(bytes)}). Limit is ${formatBytes(maxBytes)}.`,
);
}

export function normalizeHeaderType(value: string | null): string | null {
if (!value) return null;
const trimmed = value.trim();
Expand All @@ -411,6 +486,32 @@ export function parseContentLength(value: string | null): number | null {
return Number.isFinite(parsed) && parsed > 0 ? Math.floor(parsed) : null;
}

export function parseContentRangeTotal(value: string | null): number | null {
return parseContentRange(value)?.total ?? null;
}

function parseContentRange(
value: string | null,
): { start: number; end: number; total: number } | null {
if (!value) return null;
const match = value.trim().match(/^bytes\s+(\d+)-(\d+)\/(\d+)$/i);
if (!match?.[1] || !match[2] || !match[3]) return null;
const start = Number(match[1]);
const end = Number(match[2]);
const total = Number(match[3]);
if (
!Number.isSafeInteger(start) ||
!Number.isSafeInteger(end) ||
!Number.isSafeInteger(total) ||
start < 0 ||
end < start ||
total <= end
) {
return null;
}
return { start, end, total };
}

export function filenameFromUrl(url: string): string | null {
try {
const parsed = new URL(url);
Expand Down
18 changes: 18 additions & 0 deletions packages/core/src/content/transcript/transcription-config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ export type TranscriptionConfig = {
openaiApiKey: string | null;
falApiKey: string | null;
geminiModel: string | null;
remoteMediaMaxBytes: number | null;
};

type TranscriptionConfigInput = {
Expand All @@ -25,13 +26,25 @@ type TranscriptionConfigInput = {
openaiApiKey?: string | null;
falApiKey?: string | null;
geminiModel?: string | null;
remoteMediaMaxBytes?: number | string | null;
};

export const REMOTE_MEDIA_MAX_BYTES_ENV = "SUMMARIZE_REMOTE_MEDIA_MAX_BYTES";

function normalizeKey(raw: string | null | undefined): string | null {
const trimmed = typeof raw === "string" ? raw.trim() : "";
return trimmed.length > 0 ? trimmed : null;
}

export function normalizeRemoteMediaMaxBytes(
raw: number | string | null | undefined,
): number | null {
if (raw == null) return null;

const parsed = typeof raw === "number" ? raw : Number(raw.trim());
return Number.isSafeInteger(parsed) && parsed > 0 ? parsed : null;
}

export function resolveTranscriptionConfig(input: TranscriptionConfigInput): TranscriptionConfig {
const fromObject = input.transcription ?? null;
const env = fromObject?.env ?? input.env;
Expand All @@ -58,5 +71,10 @@ export function resolveTranscriptionConfig(input: TranscriptionConfigInput): Tra
falApiKey: fromObject?.falApiKey ?? input.falApiKey,
}),
geminiModel: normalizeKey(fromObject?.geminiModel ?? input.geminiModel),
remoteMediaMaxBytes: normalizeRemoteMediaMaxBytes(
fromObject?.remoteMediaMaxBytes ??
input.remoteMediaMaxBytes ??
env?.[REMOTE_MEDIA_MAX_BYTES_ENV],
),
};
}
Loading