diff --git a/CHANGELOG.md b/CHANGELOG.md index 8e9ccc658..842dd040e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,7 @@ - CLI cache: include local media `fileMtime` when writing transcript cache entries so repeated unchanged audio/video extraction can hit cache (#240, #241, thanks @alfozan). - CLI: pass Codex image attachments to `codex exec` so local image summaries no longer fail before starting (#242, #243, thanks @alfozan). - OpenAI-compatible gateways: honor `OPENAI_USE_CHAT_COMPLETIONS=false` and `openai.useChatCompletions=false` so custom base URLs can use the Responses API (#235, #236, thanks @mzbgf). +- RSS transcripts: block feed-controlled transcript URLs that target loopback, private, link-local, reserved, or redirected local-network addresses (#239, thanks @Hinotoi-agent). - Chrome extension: abort stale side-panel summary streams on tab changes so delayed output from a closed or replaced tab cannot render under the new page title. - Core: extract video IDs from YouTube `/live/` URLs so live and premiere links no longer abort summarization (#232, thanks @devYRPauli). - Chrome extension: keep YouTube slide cards on the shared slide-summary path so local browser thumbnails receive the same summary text shape as CLI `--slides`. diff --git a/packages/core/package.json b/packages/core/package.json index ddff31c9c..6d6a3c05c 100644 --- a/packages/core/package.json +++ b/packages/core/package.json @@ -52,7 +52,8 @@ "cheerio": "^1.2.0", "es-toolkit": "^1.47.0", "jsdom": "29.1.1", - "sanitize-html": "^2.17.4" + "sanitize-html": "^2.17.4", + "undici": "8.4.1" }, "devDependencies": { "@types/jsdom": "^28.0.3", diff --git a/packages/core/src/content/dns-pinned-fetch.ts b/packages/core/src/content/dns-pinned-fetch.ts new file mode 100644 index 000000000..3c7f37e0b --- /dev/null +++ b/packages/core/src/content/dns-pinned-fetch.ts @@ -0,0 +1,99 @@ +import http from "node:http"; +import https from "node:https"; +import { Readable } from "node:stream"; +import { readDnsPinnedAddresses, type DnsPinnedAddress } from "./fetch-capabilities.js"; + +type PinnedLookupAddress = { address: string; family: number }; +type LookupCallback = ( + error: Error | null, + address: string | PinnedLookupAddress[], + family?: number, +) => void; + +function getInputUrl(input: RequestInfo | URL): string { + if (typeof input === "string") return input; + if (input instanceof URL) return input.href; + return input.url; +} + +function createPinnedLookup(addresses: DnsPinnedAddress[]) { + const pinnedAddresses: PinnedLookupAddress[] = addresses.map((entry) => ({ + address: entry.address, + family: entry.family ?? 4, + })); + return (_hostname: string, options: unknown, callback: LookupCallback): void => { + if ((options as { all?: boolean } | undefined)?.all) { + callback(null, pinnedAddresses); + return; + } + const first = pinnedAddresses[0]; + callback(null, first?.address ?? "0.0.0.0", first?.family ?? 4); + }; +} + +function headersFrom(input: RequestInfo | URL, init?: RequestInit): Headers { + if (init?.headers) return new Headers(init.headers); + if (typeof input !== "string" && !(input instanceof URL)) return new Headers(input.headers); + return new Headers(); +} + +function methodFrom(input: RequestInfo | URL, init?: RequestInit): string { + if (init?.method) return init.method; + if (typeof input !== "string" && !(input instanceof URL)) return input.method; + return "GET"; +} + +function hasRequestBody(input: RequestInfo | URL, init?: RequestInit): boolean { + if (init && "body" in init && init.body != null) return true; + if (typeof input !== "string" && !(input instanceof URL)) return input.body != null; + return false; +} + +export async function fetchWithDnsPinnedAddresses( + input: RequestInfo | URL, + init?: RequestInit, +): Promise { + const addresses = readDnsPinnedAddresses(init); + if (!addresses) throw new Error("Pinned DNS fetch missing validated addresses"); + if (hasRequestBody(input, init)) { + throw new Error("Pinned DNS fetch does not support request bodies"); + } + + const url = new URL(getInputUrl(input)); + const client = url.protocol === "https:" ? https : http; + const headers: Record = {}; + headersFrom(input, init).forEach((value, key) => { + headers[key] = value; + }); + + return await new Promise((resolve, reject) => { + const req = client.request( + url, + { + headers, + lookup: createPinnedLookup(addresses), + method: methodFrom(input, init), + ...(init?.signal ? { signal: init.signal } : {}), + }, + (res) => { + const responseHeaders = new Headers(); + for (const [key, value] of Object.entries(res.headers)) { + if (Array.isArray(value)) { + for (const entry of value) responseHeaders.append(key, entry); + } else if (typeof value === "string") { + responseHeaders.set(key, value); + } + } + const response = new Response(Readable.toWeb(res) as ReadableStream, { + headers: responseHeaders, + status: res.statusCode ?? 200, + statusText: res.statusMessage, + }); + Object.defineProperty(response, "url", { configurable: true, value: url.href }); + resolve(response); + }, + ); + req.on("error", reject); + req.end(); + }); +} diff --git a/packages/core/src/content/fetch-capabilities.ts b/packages/core/src/content/fetch-capabilities.ts new file mode 100644 index 000000000..6260fd4b2 --- /dev/null +++ b/packages/core/src/content/fetch-capabilities.ts @@ -0,0 +1,54 @@ +const DNS_PINNED_FETCH = Symbol.for("@steipete/summarize.dnsPinnedFetch"); +const DNS_PINNED_ADDRESSES = Symbol.for("@steipete/summarize.dnsPinnedAddresses"); + +export type DnsPinnedAddress = { address: string; family?: number }; + +export function markFetchAsDnsPinned( + fetchImpl: T, + pinnedFetchImpl: typeof fetch = fetchImpl, +): T { + Object.defineProperty(fetchImpl, DNS_PINNED_FETCH, { + configurable: false, + enumerable: false, + value: pinnedFetchImpl, + }); + return fetchImpl; +} + +export function resolveDnsPinnedFetch(fetchImpl: typeof fetch): typeof fetch | null { + const pinnedFetchImpl = (fetchImpl as { [DNS_PINNED_FETCH]?: typeof fetch })[DNS_PINNED_FETCH]; + return pinnedFetchImpl ?? null; +} + +export function supportsDnsPinnedFetch(fetchImpl: typeof fetch): boolean { + return resolveDnsPinnedFetch(fetchImpl) !== null; +} + +export function isNativeOrBoundGlobalFetch(fetchImpl: typeof fetch): boolean { + if (fetchImpl === globalThis.fetch) return true; + + const expectedBoundName = `bound ${globalThis.fetch.name || "fetch"}`; + return ( + fetchImpl.name === expectedBoundName && + Function.prototype.toString.call(fetchImpl).includes("[native code]") + ); +} + +export function attachDnsPinnedAddresses( + init: T, + addresses: DnsPinnedAddress[], +): T { + Object.defineProperty(init, DNS_PINNED_ADDRESSES, { + configurable: true, + enumerable: true, + value: addresses, + }); + return init; +} + +export function readDnsPinnedAddresses(init: RequestInit | undefined): DnsPinnedAddress[] | null { + const addresses = (init as { [DNS_PINNED_ADDRESSES]?: DnsPinnedAddress[] } | undefined)?.[ + DNS_PINNED_ADDRESSES + ]; + return Array.isArray(addresses) && addresses.length > 0 ? addresses : null; +} diff --git a/packages/core/src/content/index.ts b/packages/core/src/content/index.ts index bd89074a0..58ca0b70d 100644 --- a/packages/core/src/content/index.ts +++ b/packages/core/src/content/index.ts @@ -18,6 +18,15 @@ export { type ExtractedLinkContent, type FetchLinkContentOptions, } from "./link-preview/content/types.js"; +export { + attachDnsPinnedAddresses, + isNativeOrBoundGlobalFetch, + markFetchAsDnsPinned, + readDnsPinnedAddresses, + resolveDnsPinnedFetch, + supportsDnsPinnedFetch, + type DnsPinnedAddress, +} from "./fetch-capabilities.js"; export type { ConvertHtmlToMarkdown, FirecrawlScrapeResult, diff --git a/packages/core/src/content/link-preview/client.ts b/packages/core/src/content/link-preview/client.ts index 89f6313fe..e3e7ab9dd 100644 --- a/packages/core/src/content/link-preview/client.ts +++ b/packages/core/src/content/link-preview/client.ts @@ -41,8 +41,7 @@ export interface LinkPreviewClientOptions { /** Public factory for a link preview client with injectable dependencies. */ export function createLinkPreviewClient(options: LinkPreviewClientOptions = {}): LinkPreviewClient { - const fetchImpl: typeof fetch = - options.fetch ?? ((...args: Parameters) => globalThis.fetch(...args)); + const fetchImpl: typeof fetch = options.fetch ?? globalThis.fetch; const env = typeof options.env === "object" && options.env ? options.env : undefined; const scrape: ScrapeWithFirecrawl | null = options.scrapeWithFirecrawl ?? null; const apifyApiToken = typeof options.apifyApiToken === "string" ? options.apifyApiToken : null; diff --git a/packages/core/src/content/transcript/providers/podcast/rss-transcript.ts b/packages/core/src/content/transcript/providers/podcast/rss-transcript.ts index d07701df7..c169d71de 100644 --- a/packages/core/src/content/transcript/providers/podcast/rss-transcript.ts +++ b/packages/core/src/content/transcript/providers/podcast/rss-transcript.ts @@ -1,3 +1,13 @@ +import { lookup as dnsLookup } from "node:dns/promises"; +import { createRequire } from "node:module"; +import { isIP } from "node:net"; +import { fetchWithDnsPinnedAddresses } from "../../../dns-pinned-fetch.js"; +import { + attachDnsPinnedAddresses, + isNativeOrBoundGlobalFetch, + resolveDnsPinnedFetch, + supportsDnsPinnedFetch, +} from "../../../fetch-capabilities.js"; import type { TranscriptSegment } from "../../../link-preview/types.js"; import { jsonTranscriptToPlainText, @@ -14,17 +24,37 @@ import { } from "./rss-feed.js"; type TranscriptCandidate = { url: string; type: string | null }; +type LookupAddress = { address: string; family?: number }; +type LookupFn = (hostname: string) => Promise; +type LookupCallback = ( + error: Error | null, + address: string | LookupAddress[], + family?: number, +) => void; +type UndiciAgentConstructor = new (options: { + autoSelectFamily?: boolean; + autoSelectFamilyAttemptTimeout?: number; + connect: { + lookup: (hostname: string, options: unknown, callback: LookupCallback) => void; + }; +}) => unknown; +type UndiciModule = { Agent: UndiciAgentConstructor; fetch: typeof fetch }; + +const MAX_TRANSCRIPT_REDIRECTS = 10; +const require = createRequire(import.meta.url); export async function tryFetchTranscriptFromFeedXml({ fetchImpl, feedXml, episodeTitle, notes, + lookup, }: { fetchImpl: typeof fetch; feedXml: string; episodeTitle: string | null; notes: string[]; + lookup?: LookupFn; }): Promise<{ text: string; transcriptUrl: string; @@ -50,8 +80,8 @@ export async function tryFetchTranscriptFromFeedXml({ const transcriptUrl = decodeXmlEntities(preferred.url); try { - const res = await fetchImpl(transcriptUrl, { - redirect: "follow", + const res = await fetchTranscriptUrl(fetchImpl, transcriptUrl, { + lookup, signal: AbortSignal.timeout(TRANSCRIPTION_TIMEOUT_MS), headers: { accept: "text/vtt,text/plain,application/json;q=0.9,*/*;q=0.8" }, }); @@ -91,6 +121,227 @@ export async function tryFetchTranscriptFromFeedXml({ return null; } +function parseIpv4(address: string): number[] | null { + const parts = address.split("."); + if (parts.length !== 4) return null; + const octets = parts.map((part) => { + if (!/^\d{1,3}$/.test(part)) return null; + const value = Number(part); + return Number.isInteger(value) && value >= 0 && value <= 255 ? value : null; + }); + return octets.every((value) => value != null) ? (octets as number[]) : null; +} + +function isBlockedIpv4(address: string): boolean { + const octets = parseIpv4(address); + if (!octets) return true; + const [a, b] = octets; + return ( + a === 0 || + a === 10 || + a === 127 || + (a === 100 && b >= 64 && b <= 127) || + (a === 169 && b === 254) || + (a === 172 && b >= 16 && b <= 31) || + (a === 192 && b === 168) || + (a === 192 && b === 0 && octets[2] === 0) || + (a === 192 && b === 0 && octets[2] === 2) || + (a === 198 && (b === 18 || b === 19)) || + (a === 198 && b === 51 && octets[2] === 100) || + (a === 203 && b === 0 && octets[2] === 113) || + a >= 224 + ); +} + +function expandIpv6(address: string): number[] | null { + const normalized = address.split("%", 1)[0]?.toLowerCase() ?? ""; + if (!normalized) return null; + const mapped = normalized.match(/^(.*:)(\d{1,3}(?:\.\d{1,3}){3})$/); + const ipv4 = mapped ? parseIpv4(mapped[2] ?? "") : null; + const head = mapped ? (mapped[1] ?? "") : normalized; + const partsAroundGap = head.split("::"); + if (partsAroundGap.length > 2) return null; + const [leftRaw, rightRaw] = partsAroundGap; + const left = leftRaw ? leftRaw.split(":").filter(Boolean) : []; + const right = typeof rightRaw === "string" && rightRaw ? rightRaw.split(":").filter(Boolean) : []; + const ipv4Parts = ipv4 + ? [((ipv4[0] ?? 0) << 8) | (ipv4[1] ?? 0), ((ipv4[2] ?? 0) << 8) | (ipv4[3] ?? 0)] + : []; + const missing = 8 - left.length - right.length - ipv4Parts.length; + if (missing < 0 || (partsAroundGap.length === 1 && missing !== 0)) return null; + const parsePart = (part: string) => (/^[0-9a-f]{1,4}$/.test(part) ? parseInt(part, 16) : -1); + const parts = [ + ...left.map(parsePart), + ...Array.from({ length: missing }, () => 0), + ...right.map(parsePart), + ...ipv4Parts, + ]; + return parts.length === 8 && parts.every((part) => part >= 0 && part <= 0xffff) ? parts : null; +} + +function isBlockedIpv6(address: string): boolean { + const parts = expandIpv6(address); + if (!parts) return true; + const [first, second, third, fourth, , fifth, sixth, eighth] = parts; + const allZero = parts.every((part) => part === 0); + const loopback = parts.slice(0, 7).every((part) => part === 0) && eighth === 1; + const mappedIpv4 = parts.slice(0, 5).every((part) => part === 0) && fifth === 0xffff; + const compatibleIpv4 = parts.slice(0, 6).every((part) => part === 0) && !allZero && !loopback; + if (mappedIpv4 || compatibleIpv4) { + const ipv4 = `${((sixth ?? 0) >> 8) & 0xff}.${(sixth ?? 0) & 0xff}.${((eighth ?? 0) >> 8) & 0xff}.${(eighth ?? 0) & 0xff}`; + return isBlockedIpv4(ipv4); + } + const wellKnownNat64 = + first === 0x64 && second === 0xff9b && parts.slice(2, 6).every((part) => part === 0); + if (wellKnownNat64) { + const ipv4 = `${((sixth ?? 0) >> 8) & 0xff}.${(sixth ?? 0) & 0xff}.${((eighth ?? 0) >> 8) & 0xff}.${(eighth ?? 0) & 0xff}`; + return isBlockedIpv4(ipv4); + } + return ( + allZero || + loopback || + (first === 0x64 && second === 0xff9b && third === 1) || + (first === 0x100 && second === 0 && third === 0 && fourth === 0) || + ((first ?? 0) & 0xfe00) === 0xfc00 || + ((first ?? 0) & 0xffc0) === 0xfe80 || + ((first ?? 0) & 0xff00) === 0xff00 || + (first === 0x2001 && (second ?? 0) <= 0x01ff) || + (first === 0x2001 && second === 0xdb8) || + first === 0x2002 || + (first === 0x3fff && (second ?? 0) <= 0x0fff) || + first === 0x5f00 + ); +} + +function isBlockedNetworkAddress(address: string): boolean { + const normalized = address.trim().replace(/^\[|\]$/g, ""); + const family = isIP(normalized); + if (family === 4) return isBlockedIpv4(normalized); + if (family === 6) return isBlockedIpv6(normalized); + return true; +} + +function normalizeUrlHostname(hostname: string): string { + return hostname.trim().replace(/^\[|\]$/g, ""); +} + +function isBlockedHostname(hostname: string): boolean { + const host = normalizeUrlHostname(hostname).toLowerCase().replace(/\.$/, ""); + return host === "localhost" || host.endsWith(".localhost"); +} + +async function defaultLookup(hostname: string): Promise { + return await dnsLookup(hostname, { all: true, verbatim: true }); +} + +async function resolveTranscriptFetchTarget( + rawUrl: string, + { lookup = defaultLookup }: { lookup?: LookupFn } = {}, +): Promise<{ url: URL; addresses: LookupAddress[] }> { + let url: URL; + try { + url = new URL(rawUrl); + } catch { + throw new Error("RSS transcript URL is invalid"); + } + if (url.protocol !== "http:" && url.protocol !== "https:") { + throw new Error("RSS transcript URL must use http or https"); + } + const hostname = normalizeUrlHostname(url.hostname); + if (isBlockedHostname(hostname)) { + throw new Error("RSS transcript URL resolves to a blocked local network host"); + } + if (isIP(hostname)) { + if (isBlockedNetworkAddress(hostname)) { + throw new Error("RSS transcript URL resolves to a blocked local network address"); + } + return { url, addresses: [] }; + } + const addresses = await lookup(hostname); + if (addresses.length === 0 || addresses.some((entry) => isBlockedNetworkAddress(entry.address))) { + throw new Error("RSS transcript URL resolves to a blocked local network address"); + } + return { url, addresses }; +} + +function isNativeFetchImpl(fetchImpl: typeof fetch): boolean { + return isNativeOrBoundGlobalFetch(fetchImpl); +} + +function isBunRuntime(): boolean { + return typeof (process.versions as { bun?: string }).bun === "string"; +} + +function loadUndici(): UndiciModule { + return require("undici") as UndiciModule; +} + +function createPinnedDispatcher(addresses: LookupAddress[]): unknown { + const { Agent } = loadUndici(); + const pinnedAddresses = addresses.map((entry) => ({ + address: entry.address, + family: entry.family ?? (isIP(entry.address) || 4), + })); + return new Agent({ + autoSelectFamily: true, + autoSelectFamilyAttemptTimeout: 250, + connect: { + lookup: (_hostname, options, callback) => { + if ((options as { all?: boolean } | undefined)?.all) { + callback(null, pinnedAddresses); + return; + } + const first = pinnedAddresses[0]; + callback(null, first?.address ?? "0.0.0.0", first?.family ?? 4); + }, + }, + }); +} + +async function fetchTranscriptUrl( + fetchImpl: typeof fetch, + rawUrl: string, + options: { + lookup?: LookupFn; + signal?: AbortSignal; + headers?: HeadersInit; + }, + redirectCount = 0, +): Promise { + const target = await resolveTranscriptFetchTarget(rawUrl, { lookup: options.lookup }); + const requiresPinnedDns = target.addresses.length > 0; + const isNativeFetch = isNativeFetchImpl(fetchImpl); + if (requiresPinnedDns && !isNativeFetch && !supportsDnsPinnedFetch(fetchImpl)) { + throw new Error("RSS transcript URL requires native fetch for DNS pinning"); + } + const pinnedInit = { headers: options.headers, signal: options.signal, redirect: "manual" }; + const fetchInit = requiresPinnedDns + ? attachDnsPinnedAddresses( + { + ...pinnedInit, + dispatcher: createPinnedDispatcher(target.addresses), + } as RequestInit & { dispatcher: unknown }, + target.addresses, + ) + : (pinnedInit as RequestInit); + const pinnedFetchImpl = requiresPinnedDns + ? isNativeFetch + ? isBunRuntime() + ? fetchWithDnsPinnedAddresses + : loadUndici().fetch + : (resolveDnsPinnedFetch(fetchImpl) ?? fetchImpl) + : fetchImpl; + const response = await pinnedFetchImpl(target.url.href, fetchInit); + if (![301, 302, 303, 307, 308].includes(response.status)) return response; + const location = response.headers.get("location"); + if (!location) return response; + if (redirectCount >= MAX_TRANSCRIPT_REDIRECTS) { + throw new Error("RSS transcript URL redirected too many times"); + } + const nextUrl = new URL(location, target.url.href).href; + return await fetchTranscriptUrl(fetchImpl, nextUrl, options, redirectCount + 1); +} + function extractPodcastTranscriptCandidatesFromItem(itemXml: string): TranscriptCandidate[] { const matches = itemXml.matchAll( /]*\burl\s*=\s*(['"])([^'"]+)\1[^>]*>/gi, diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 250dff599..41c371ab9 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -154,6 +154,9 @@ importers: sanitize-html: specifier: ^2.17.4 version: 2.17.4 + undici: + specifier: 8.4.1 + version: 8.4.1 devDependencies: '@types/jsdom': specifier: ^28.0.3 diff --git a/src/cli.ts b/src/cli.ts index 4cbe23d8a..f865c33e8 100644 --- a/src/cli.ts +++ b/src/cli.ts @@ -4,7 +4,7 @@ import { runCliMain } from "./cli-main.js"; void runCliMain({ argv: process.argv.slice(2), env: process.env, - fetch: globalThis.fetch.bind(globalThis), + fetch: globalThis.fetch, stdout: process.stdout, stderr: process.stderr, exit: (code) => process.exit(code), diff --git a/src/daemon/url-fetch-guard.ts b/src/daemon/url-fetch-guard.ts index 26e997f02..cbc98c855 100644 --- a/src/daemon/url-fetch-guard.ts +++ b/src/daemon/url-fetch-guard.ts @@ -1,6 +1,14 @@ import { lookup as dnsLookup } from "node:dns/promises"; import { createRequire } from "node:module"; import { isIP } from "node:net"; +import { fetchWithDnsPinnedAddresses } from "../shared/dns-pinned-fetch.js"; +import { + attachDnsPinnedAddresses, + isNativeOrBoundGlobalFetch, + markFetchAsDnsPinned, + resolveDnsPinnedFetch, + supportsDnsPinnedFetch, +} from "../shared/fetch-capabilities.js"; type LookupAddress = { address: string; family?: number }; type LookupFn = (hostname: string) => Promise; @@ -79,7 +87,7 @@ function expandIpv6(address: string): number[] | null { function isBlockedIpv6(address: string): boolean { const parts = expandIpv6(address); if (!parts) return true; - const [first, second, , , , fifth, sixth, eighth] = parts; + const [first, second, third, fourth, , fifth, sixth, eighth] = parts; const allZero = parts.every((part) => part === 0); const loopback = parts.slice(0, 7).every((part) => part === 0) && eighth === 1; const mappedIpv4 = parts.slice(0, 5).every((part) => part === 0) && fifth === 0xffff; @@ -88,13 +96,25 @@ function isBlockedIpv6(address: string): boolean { const ipv4 = `${((sixth ?? 0) >> 8) & 0xff}.${(sixth ?? 0) & 0xff}.${((eighth ?? 0) >> 8) & 0xff}.${(eighth ?? 0) & 0xff}`; return isBlockedIpv4(ipv4); } + const wellKnownNat64 = + first === 0x64 && second === 0xff9b && parts.slice(2, 6).every((part) => part === 0); + if (wellKnownNat64) { + const ipv4 = `${((sixth ?? 0) >> 8) & 0xff}.${(sixth ?? 0) & 0xff}.${((eighth ?? 0) >> 8) & 0xff}.${(eighth ?? 0) & 0xff}`; + return isBlockedIpv4(ipv4); + } return ( allZero || loopback || + (first === 0x64 && second === 0xff9b && third === 1) || + (first === 0x100 && second === 0 && third === 0 && fourth === 0) || ((first ?? 0) & 0xfe00) === 0xfc00 || ((first ?? 0) & 0xffc0) === 0xfe80 || ((first ?? 0) & 0xff00) === 0xff00 || - (first === 0x2001 && second === 0xdb8) + (first === 0x2001 && (second ?? 0) <= 0x01ff) || + (first === 0x2001 && second === 0xdb8) || + first === 0x2002 || + (first === 0x3fff && (second ?? 0) <= 0x0fff) || + first === 0x5f00 ); } @@ -180,12 +200,19 @@ function getRedirectMode( } function isNativeFetchImpl(fetchImpl: typeof fetch): boolean { - return fetchImpl === globalThis.fetch || fetchImpl.name === "bound fetch"; + return isNativeOrBoundGlobalFetch(fetchImpl); +} + +function isBunRuntime(): boolean { + return typeof (process.versions as { bun?: string }).bun === "string"; } export function createDaemonUrlFetchGuard( fetchImpl: typeof fetch, - { lookup = defaultLookup }: { lookup?: LookupFn } = {}, + { + lookup = defaultLookup, + pinnedFetchImpl, + }: { lookup?: LookupFn; pinnedFetchImpl?: typeof fetch } = {}, ): typeof fetch { const loadUndici = (): UndiciModule => require("undici") as UndiciModule; const createPinnedDispatcher = (addresses: LookupAddress[]) => { @@ -218,19 +245,29 @@ export function createDaemonUrlFetchGuard( const url = getInputUrl(input); const target = await resolveDaemonUrlFetchTarget(url, { lookup }); const redirectMode = getRedirectMode(input, init); - const pinnedInit = - target.addresses.length > 0 - ? ({ + const requiresPinnedDns = target.addresses.length > 0; + const isNativeFetch = isNativeFetchImpl(fetchImpl); + if (requiresPinnedDns && !isNativeFetch && !supportsDnsPinnedFetch(fetchImpl)) { + throw new Error("URL fetch target requires native fetch for DNS pinning"); + } + const pinnedInit = requiresPinnedDns + ? attachDnsPinnedAddresses( + { ...init, dispatcher: createPinnedDispatcher(target.addresses), - } as Parameters[1] & { dispatcher: unknown }) - : init; - const pinnedFetchImpl = - target.addresses.length > 0 && isNativeFetchImpl(fetchImpl) ? loadUndici().fetch : fetchImpl; + } as Parameters[1] & { dispatcher: unknown }, + target.addresses, + ) + : init; + const fetchForPinnedDns = requiresPinnedDns + ? isNativeFetch + ? (pinnedFetchImpl ?? (isBunRuntime() ? fetchWithDnsPinnedAddresses : loadUndici().fetch)) + : (resolveDnsPinnedFetch(fetchImpl) ?? fetchImpl) + : fetchImpl; if (redirectMode !== "follow") { - return await pinnedFetchImpl(input, pinnedInit); + return await fetchForPinnedDns(input, pinnedInit); } - const response = await pinnedFetchImpl(input, { ...pinnedInit, redirect: "manual" }); + const response = await fetchForPinnedDns(input, { ...pinnedInit, redirect: "manual" }); if (![301, 302, 303, 307, 308].includes(response.status)) { return response; } @@ -246,5 +283,5 @@ export function createDaemonUrlFetchGuard( const nextUrl = new URL(location, response.url || url).href; return await guardedFetch(nextUrl, { ...init, body: null, method }, redirectCount + 1); }; - return guardedFetch as typeof fetch; + return markFetchAsDnsPinned(guardedFetch as typeof fetch); } diff --git a/src/run/run-metrics.ts b/src/run/run-metrics.ts index 6ca351b38..e66db06ae 100644 --- a/src/run/run-metrics.ts +++ b/src/run/run-metrics.ts @@ -1,4 +1,5 @@ import { normalizeTokenUsage, tallyCosts } from "tokentally"; +import { fetch as undiciFetch } from "undici"; import type { LlmCall, RunMetricsReport } from "../costs.js"; import { buildRunMetricsReport } from "../costs.js"; import { @@ -8,6 +9,13 @@ import { resolveLiteLlmMaxOutputTokensForModelId, resolveLiteLlmPricingForModelId, } from "../pricing/litellm.js"; +import { fetchWithDnsPinnedAddresses } from "../shared/dns-pinned-fetch.js"; +import { + isNativeOrBoundGlobalFetch, + markFetchAsDnsPinned, + resolveDnsPinnedFetch, + supportsDnsPinnedFetch, +} from "../shared/fetch-capabilities.js"; export type RunMetrics = { llmCalls: LlmCall[]; @@ -152,7 +160,7 @@ export function createRunMetrics({ return buildRunMetricsReport({ llmCalls, firecrawlRequests, apifyRequests }); }; - const trackedFetch: typeof fetch = async (input: RequestInfo | URL, init?: RequestInit) => { + const recordFetch = (input: RequestInfo | URL): void => { const url = typeof input === "string" ? input : input instanceof URL ? input.toString() : input.url; let hostname: string | null = null; @@ -166,8 +174,36 @@ export function createRunMetrics({ } else if (hostname === "api.apify.com") { apifyRequests += 1; } - return fetchImpl(input as RequestInfo, init); }; + const fetchAndTrack = async ( + targetFetch: typeof fetch, + input: RequestInfo | URL, + init?: RequestInit, + ): Promise => { + recordFetch(input); + return await targetFetch(input as RequestInfo, init); + }; + const trackedFetch: typeof fetch = async (input: RequestInfo | URL, init?: RequestInit) => { + return await fetchAndTrack(fetchImpl, input, init); + }; + const isBunRuntime = typeof (process.versions as { bun?: string }).bun === "string"; + const isNativeFetch = isNativeOrBoundGlobalFetch(fetchImpl); + const pinnedFetchImpl = + resolveDnsPinnedFetch(fetchImpl) ?? + (isNativeFetch + ? isBunRuntime + ? fetchWithDnsPinnedAddresses + : (undiciFetch as unknown as typeof fetch) + : null); + if ((isNativeFetch || supportsDnsPinnedFetch(fetchImpl)) && pinnedFetchImpl) { + const trackedPinnedFetch: typeof fetch = async ( + input: RequestInfo | URL, + init?: RequestInit, + ) => { + return await fetchAndTrack(pinnedFetchImpl, input, init); + }; + markFetchAsDnsPinned(trackedFetch, trackedPinnedFetch); + } return { llmCalls, diff --git a/src/shared/dns-pinned-fetch.ts b/src/shared/dns-pinned-fetch.ts new file mode 100644 index 000000000..3c7f37e0b --- /dev/null +++ b/src/shared/dns-pinned-fetch.ts @@ -0,0 +1,99 @@ +import http from "node:http"; +import https from "node:https"; +import { Readable } from "node:stream"; +import { readDnsPinnedAddresses, type DnsPinnedAddress } from "./fetch-capabilities.js"; + +type PinnedLookupAddress = { address: string; family: number }; +type LookupCallback = ( + error: Error | null, + address: string | PinnedLookupAddress[], + family?: number, +) => void; + +function getInputUrl(input: RequestInfo | URL): string { + if (typeof input === "string") return input; + if (input instanceof URL) return input.href; + return input.url; +} + +function createPinnedLookup(addresses: DnsPinnedAddress[]) { + const pinnedAddresses: PinnedLookupAddress[] = addresses.map((entry) => ({ + address: entry.address, + family: entry.family ?? 4, + })); + return (_hostname: string, options: unknown, callback: LookupCallback): void => { + if ((options as { all?: boolean } | undefined)?.all) { + callback(null, pinnedAddresses); + return; + } + const first = pinnedAddresses[0]; + callback(null, first?.address ?? "0.0.0.0", first?.family ?? 4); + }; +} + +function headersFrom(input: RequestInfo | URL, init?: RequestInit): Headers { + if (init?.headers) return new Headers(init.headers); + if (typeof input !== "string" && !(input instanceof URL)) return new Headers(input.headers); + return new Headers(); +} + +function methodFrom(input: RequestInfo | URL, init?: RequestInit): string { + if (init?.method) return init.method; + if (typeof input !== "string" && !(input instanceof URL)) return input.method; + return "GET"; +} + +function hasRequestBody(input: RequestInfo | URL, init?: RequestInit): boolean { + if (init && "body" in init && init.body != null) return true; + if (typeof input !== "string" && !(input instanceof URL)) return input.body != null; + return false; +} + +export async function fetchWithDnsPinnedAddresses( + input: RequestInfo | URL, + init?: RequestInit, +): Promise { + const addresses = readDnsPinnedAddresses(init); + if (!addresses) throw new Error("Pinned DNS fetch missing validated addresses"); + if (hasRequestBody(input, init)) { + throw new Error("Pinned DNS fetch does not support request bodies"); + } + + const url = new URL(getInputUrl(input)); + const client = url.protocol === "https:" ? https : http; + const headers: Record = {}; + headersFrom(input, init).forEach((value, key) => { + headers[key] = value; + }); + + return await new Promise((resolve, reject) => { + const req = client.request( + url, + { + headers, + lookup: createPinnedLookup(addresses), + method: methodFrom(input, init), + ...(init?.signal ? { signal: init.signal } : {}), + }, + (res) => { + const responseHeaders = new Headers(); + for (const [key, value] of Object.entries(res.headers)) { + if (Array.isArray(value)) { + for (const entry of value) responseHeaders.append(key, entry); + } else if (typeof value === "string") { + responseHeaders.set(key, value); + } + } + const response = new Response(Readable.toWeb(res) as ReadableStream, { + headers: responseHeaders, + status: res.statusCode ?? 200, + statusText: res.statusMessage, + }); + Object.defineProperty(response, "url", { configurable: true, value: url.href }); + resolve(response); + }, + ); + req.on("error", reject); + req.end(); + }); +} diff --git a/src/shared/fetch-capabilities.ts b/src/shared/fetch-capabilities.ts new file mode 100644 index 000000000..6260fd4b2 --- /dev/null +++ b/src/shared/fetch-capabilities.ts @@ -0,0 +1,54 @@ +const DNS_PINNED_FETCH = Symbol.for("@steipete/summarize.dnsPinnedFetch"); +const DNS_PINNED_ADDRESSES = Symbol.for("@steipete/summarize.dnsPinnedAddresses"); + +export type DnsPinnedAddress = { address: string; family?: number }; + +export function markFetchAsDnsPinned( + fetchImpl: T, + pinnedFetchImpl: typeof fetch = fetchImpl, +): T { + Object.defineProperty(fetchImpl, DNS_PINNED_FETCH, { + configurable: false, + enumerable: false, + value: pinnedFetchImpl, + }); + return fetchImpl; +} + +export function resolveDnsPinnedFetch(fetchImpl: typeof fetch): typeof fetch | null { + const pinnedFetchImpl = (fetchImpl as { [DNS_PINNED_FETCH]?: typeof fetch })[DNS_PINNED_FETCH]; + return pinnedFetchImpl ?? null; +} + +export function supportsDnsPinnedFetch(fetchImpl: typeof fetch): boolean { + return resolveDnsPinnedFetch(fetchImpl) !== null; +} + +export function isNativeOrBoundGlobalFetch(fetchImpl: typeof fetch): boolean { + if (fetchImpl === globalThis.fetch) return true; + + const expectedBoundName = `bound ${globalThis.fetch.name || "fetch"}`; + return ( + fetchImpl.name === expectedBoundName && + Function.prototype.toString.call(fetchImpl).includes("[native code]") + ); +} + +export function attachDnsPinnedAddresses( + init: T, + addresses: DnsPinnedAddress[], +): T { + Object.defineProperty(init, DNS_PINNED_ADDRESSES, { + configurable: true, + enumerable: true, + value: addresses, + }); + return init; +} + +export function readDnsPinnedAddresses(init: RequestInit | undefined): DnsPinnedAddress[] | null { + const addresses = (init as { [DNS_PINNED_ADDRESSES]?: DnsPinnedAddress[] } | undefined)?.[ + DNS_PINNED_ADDRESSES + ]; + return Array.isArray(addresses) && addresses.length > 0 ? addresses : null; +} diff --git a/tests/daemon.limits.test.ts b/tests/daemon.limits.test.ts index 6da0a55ff..e29880c7b 100644 --- a/tests/daemon.limits.test.ts +++ b/tests/daemon.limits.test.ts @@ -5,6 +5,8 @@ import { join } from "node:path"; import { describe, expect, it, vi } from "vitest"; import { runDaemonServer } from "../src/daemon/server.js"; +const PUBLIC_TEST_URL = "http://93.184.216.34/article"; + const findFreePort = async (): Promise => await new Promise((resolve, reject) => { const server = createServer(); @@ -22,14 +24,14 @@ const findFreePort = async (): Promise => const summarizeBody = () => JSON.stringify({ - url: "https://example.com/article", + url: PUBLIC_TEST_URL, mode: "url", extractOnly: true, }); const summaryBody = () => JSON.stringify({ - url: "https://example.com/article", + url: PUBLIC_TEST_URL, mode: "url", text: "", title: "Article", diff --git a/tests/daemon.logging.e2e.test.ts b/tests/daemon.logging.e2e.test.ts index 7083a5904..d2b7a037f 100644 --- a/tests/daemon.logging.e2e.test.ts +++ b/tests/daemon.logging.e2e.test.ts @@ -5,6 +5,8 @@ import { join } from "node:path"; import { describe, expect, it } from "vitest"; import { runDaemonServer } from "../src/daemon/server.js"; +const PUBLIC_TEST_URL = "http://93.184.216.34/article"; + const findFreePort = async (): Promise => await new Promise((resolve, reject) => { const server = createServer(); @@ -140,7 +142,7 @@ describe("daemon logging", () => { "content-type": "application/json", }, body: JSON.stringify({ - url: "https://example.com/article", + url: PUBLIC_TEST_URL, title: "Example", model: "cli/codex", length: "short", @@ -191,7 +193,7 @@ describe("daemon logging", () => { expect(doneExtended).toBeTruthy(); expect(doneMinimal).toBeTruthy(); - expect(doneExtended?.summary).toContain("https://example.com/article"); + expect(doneExtended?.summary).toContain(PUBLIC_TEST_URL); expect(doneExtended?.extracted).toBeTruthy(); expect(doneMinimal?.summary).toBeUndefined(); expect(doneMinimal?.extracted).toBeUndefined(); diff --git a/tests/daemon.url-fetch-guard.test.ts b/tests/daemon.url-fetch-guard.test.ts index 062abf89f..ffbdc8693 100644 --- a/tests/daemon.url-fetch-guard.test.ts +++ b/tests/daemon.url-fetch-guard.test.ts @@ -4,6 +4,24 @@ import { createDaemonUrlFetchGuard, isBlockedNetworkAddress, } from "../src/daemon/url-fetch-guard.js"; +import { markFetchAsDnsPinned } from "../src/shared/fetch-capabilities.js"; + +async function withBunRuntime(fn: () => Promise | T): Promise { + const descriptor = Object.getOwnPropertyDescriptor(process.versions, "bun"); + Object.defineProperty(process.versions, "bun", { + configurable: true, + value: "1.3.0", + }); + try { + return await fn(); + } finally { + if (descriptor) { + Object.defineProperty(process.versions, "bun", descriptor); + } else { + delete (process.versions as { bun?: string }).bun; + } + } +} describe("daemon URL fetch guard", () => { it("blocks local and private network targets", async () => { @@ -15,9 +33,17 @@ describe("daemon URL fetch guard", () => { expect(isBlockedNetworkAddress("::1")).toBe(true); expect(isBlockedNetworkAddress("::ffff:127.0.0.1")).toBe(true); expect(isBlockedNetworkAddress("::7f00:1")).toBe(true); + expect(isBlockedNetworkAddress("64:ff9b::a9fe:a9fe")).toBe(true); + expect(isBlockedNetworkAddress("64:ff9b:1::808:808")).toBe(true); + expect(isBlockedNetworkAddress("100::1")).toBe(true); + expect(isBlockedNetworkAddress("2001:2::1")).toBe(true); + expect(isBlockedNetworkAddress("2002:ac10:1::1")).toBe(true); + expect(isBlockedNetworkAddress("3fff::1")).toBe(true); + expect(isBlockedNetworkAddress("5f00::1")).toBe(true); expect(isBlockedNetworkAddress("fc00::1")).toBe(true); expect(isBlockedNetworkAddress("fe80::1")).toBe(true); expect(isBlockedNetworkAddress("8.8.8.8")).toBe(false); + expect(isBlockedNetworkAddress("64:ff9b::808:808")).toBe(false); expect(isBlockedNetworkAddress("[2606:4700:4700::1111]")).toBe(false); }); @@ -48,7 +74,9 @@ describe("daemon URL fetch guard", () => { }); it("pins fetch DNS resolution to the validated address", async () => { - const fetchImpl = vi.fn(async () => new Response("ok", { status: 200 })); + const fetchImpl = markFetchAsDnsPinned( + vi.fn(async () => new Response("ok", { status: 200 })) as unknown as typeof fetch, + ); const guarded = createDaemonUrlFetchGuard(fetchImpl as unknown as typeof fetch, { lookup: async () => [{ address: "93.184.216.34", family: 4 }], }); @@ -64,6 +92,44 @@ describe("daemon URL fetch guard", () => { ); }); + it("routes Bun global fetch through a pinned transport for hostname targets", async () => { + await withBunRuntime(async () => { + const pinnedFetchImpl = vi.fn(async () => new Response("ok", { status: 200 })); + const guarded = createDaemonUrlFetchGuard(globalThis.fetch, { + lookup: async () => [{ address: "93.184.216.34", family: 4 }], + pinnedFetchImpl: pinnedFetchImpl as unknown as typeof fetch, + }); + + await expect(guarded("https://public.example/article")).resolves.toBeInstanceOf(Response); + + expect(pinnedFetchImpl).toHaveBeenCalledWith( + "https://public.example/article", + expect.objectContaining({ + redirect: "manual", + dispatcher: expect.any(Object), + }), + ); + }); + }); + + it("routes bound global fetch through a pinned transport for hostname targets", async () => { + const pinnedFetchImpl = vi.fn(async () => new Response("ok", { status: 200 })); + const guarded = createDaemonUrlFetchGuard(globalThis.fetch.bind(globalThis), { + lookup: async () => [{ address: "93.184.216.34", family: 4 }], + pinnedFetchImpl: pinnedFetchImpl as unknown as typeof fetch, + }); + + await expect(guarded("https://public.example/article")).resolves.toBeInstanceOf(Response); + + expect(pinnedFetchImpl).toHaveBeenCalledWith( + "https://public.example/article", + expect.objectContaining({ + redirect: "manual", + dispatcher: expect.any(Object), + }), + ); + }); + it("revalidates redirect targets instead of auto-following to private hosts", async () => { const fetchImpl = vi.fn(async () => { return new Response(null, { diff --git a/tests/dns-pinned-fetch.test.ts b/tests/dns-pinned-fetch.test.ts new file mode 100644 index 000000000..0c2ab4d95 --- /dev/null +++ b/tests/dns-pinned-fetch.test.ts @@ -0,0 +1,78 @@ +import { createServer, type Server } from "node:http"; +import { afterEach, describe, expect, it } from "vitest"; +import { fetchWithDnsPinnedAddresses as fetchWithCorePinnedDns } from "../packages/core/src/content/dns-pinned-fetch.js"; +import { attachDnsPinnedAddresses as attachCoreDnsPinnedAddresses } from "../packages/core/src/content/index.js"; +import { fetchWithDnsPinnedAddresses as fetchWithRootPinnedDns } from "../src/shared/dns-pinned-fetch.js"; +import { attachDnsPinnedAddresses as attachRootDnsPinnedAddresses } from "../src/shared/fetch-capabilities.js"; + +const servers: Server[] = []; + +async function listen(server: Server): Promise { + return await new Promise((resolve, reject) => { + server.once("error", reject); + server.listen(0, "127.0.0.1", () => { + const address = server.address(); + if (!address || typeof address === "string") { + reject(new Error("Failed to bind test server")); + return; + } + servers.push(server); + resolve(address.port); + }); + }); +} + +afterEach(async () => { + await Promise.all( + servers.splice(0).map( + (server) => + new Promise((resolve, reject) => { + server.close((error) => (error ? reject(error) : resolve())); + }), + ), + ); +}); + +describe("DNS-pinned fetch transport", () => { + for (const [label, fetchImpl, attachAddresses] of [ + ["root", fetchWithRootPinnedDns, attachRootDnsPinnedAddresses], + ["core", fetchWithCorePinnedDns, attachCoreDnsPinnedAddresses], + ] as const) { + it(`fetches ${label} HTTP URLs using the validated address list`, async () => { + const server = createServer((req, res) => { + expect(req.headers.host).toMatch(/^pinned\.example:/); + res.writeHead(200, { "content-type": "text/plain" }); + res.end(`ok ${req.url ?? ""}`); + }); + const port = await listen(server); + + const response = await fetchImpl( + `http://pinned.example:${port}/transcript.txt`, + attachAddresses({ headers: { accept: "text/plain" }, redirect: "manual" }, [ + { address: "127.0.0.1", family: 4 }, + ]), + ); + + expect(response.url).toBe(`http://pinned.example:${port}/transcript.txt`); + expect(response.headers.get("content-type")).toBe("text/plain"); + await expect(response.text()).resolves.toBe("ok /transcript.txt"); + }); + } + + it("rejects calls without attached validated addresses", async () => { + await expect(fetchWithRootPinnedDns("http://pinned.example/")).rejects.toThrow( + /missing validated addresses/i, + ); + }); + + it("rejects body-bearing pinned requests instead of dropping the body", async () => { + await expect( + fetchWithRootPinnedDns( + "http://pinned.example/", + attachRootDnsPinnedAddresses({ body: "payload", method: "POST" }, [ + { address: "127.0.0.1", family: 4 }, + ]), + ), + ).rejects.toThrow(/does not support request bodies/i); + }); +}); diff --git a/tests/fetch-capabilities.test.ts b/tests/fetch-capabilities.test.ts new file mode 100644 index 000000000..b6aee3c1d --- /dev/null +++ b/tests/fetch-capabilities.test.ts @@ -0,0 +1,117 @@ +import { describe, expect, it, vi } from "vitest"; +import { + attachDnsPinnedAddresses as attachCoreDnsPinnedAddresses, + isNativeOrBoundGlobalFetch as isCoreNativeOrBoundGlobalFetch, + markFetchAsDnsPinned as markCoreFetchAsDnsPinned, + readDnsPinnedAddresses as readCoreDnsPinnedAddresses, + resolveDnsPinnedFetch as resolveCoreDnsPinnedFetch, + supportsDnsPinnedFetch as coreSupportsDnsPinnedFetch, +} from "../packages/core/src/content/index.js"; +import { createRunMetrics } from "../src/run/run-metrics.js"; +import { + attachDnsPinnedAddresses as attachRootDnsPinnedAddresses, + isNativeOrBoundGlobalFetch as isRootNativeOrBoundGlobalFetch, + markFetchAsDnsPinned as markRootFetchAsDnsPinned, + readDnsPinnedAddresses as readRootDnsPinnedAddresses, + resolveDnsPinnedFetch as resolveRootDnsPinnedFetch, + supportsDnsPinnedFetch as rootSupportsDnsPinnedFetch, +} from "../src/shared/fetch-capabilities.js"; + +async function withBunRuntime(fn: () => Promise | T): Promise { + const descriptor = Object.getOwnPropertyDescriptor(process.versions, "bun"); + Object.defineProperty(process.versions, "bun", { + configurable: true, + value: "1.3.0", + }); + try { + return await fn(); + } finally { + if (descriptor) { + Object.defineProperty(process.versions, "bun", descriptor); + } else { + delete (process.versions as { bun?: string }).bun; + } + } +} + +describe("DNS-pinned fetch capabilities", () => { + it("does not advertise unwrapped global fetch as an explicit DNS-pinned wrapper", () => { + expect(coreSupportsDnsPinnedFetch(globalThis.fetch)).toBe(false); + expect(rootSupportsDnsPinnedFetch(globalThis.fetch)).toBe(false); + }); + + it("does not advertise Bun global fetch as an explicit DNS-pinned wrapper", async () => { + await withBunRuntime(() => { + expect(coreSupportsDnsPinnedFetch(globalThis.fetch)).toBe(false); + expect(rootSupportsDnsPinnedFetch(globalThis.fetch)).toBe(false); + }); + }); + + it("recognizes global fetch and bound global fetch as native fetch transports", () => { + const boundGlobalFetch = globalThis.fetch.bind(globalThis); + const customFetch = vi.fn(async () => new Response("ok")) as unknown as typeof fetch; + + expect(isCoreNativeOrBoundGlobalFetch(globalThis.fetch)).toBe(true); + expect(isRootNativeOrBoundGlobalFetch(globalThis.fetch)).toBe(true); + expect(isCoreNativeOrBoundGlobalFetch(boundGlobalFetch)).toBe(true); + expect(isRootNativeOrBoundGlobalFetch(boundGlobalFetch)).toBe(true); + expect(isCoreNativeOrBoundGlobalFetch(customFetch)).toBe(false); + expect(isRootNativeOrBoundGlobalFetch(customFetch)).toBe(false); + }); + + it("shares explicit DNS-pinned markers across core and root helpers", () => { + const coreMarked = markCoreFetchAsDnsPinned(async () => new Response("ok")); + const rootMarked = markRootFetchAsDnsPinned(async () => new Response("ok")); + + expect(rootSupportsDnsPinnedFetch(coreMarked)).toBe(true); + expect(coreSupportsDnsPinnedFetch(rootMarked)).toBe(true); + expect(resolveRootDnsPinnedFetch(coreMarked)).toBe(coreMarked); + expect(resolveCoreDnsPinnedFetch(rootMarked)).toBe(rootMarked); + }); + + it("preserves pinned address metadata through RequestInit cloning", () => { + const addresses = [{ address: "93.184.216.34", family: 4 }]; + + const rootInit = attachRootDnsPinnedAddresses({ redirect: "manual" }, addresses); + const coreInit = attachCoreDnsPinnedAddresses({ redirect: "manual" }, addresses); + + expect(readRootDnsPinnedAddresses({ ...rootInit })).toEqual(addresses); + expect(readCoreDnsPinnedAddresses({ ...coreInit })).toEqual(addresses); + }); + + it("preserves explicit pinned transports through the run metrics fetch wrapper", async () => { + const baseFetch = vi.fn(async () => new Response("base")) as unknown as typeof fetch; + const pinnedFetch = vi.fn(async () => new Response("pinned")) as unknown as typeof fetch; + const metrics = createRunMetrics({ + env: {}, + fetchImpl: markRootFetchAsDnsPinned(baseFetch, pinnedFetch), + maxOutputTokensArg: null, + }); + const trackedPinnedFetch = resolveRootDnsPinnedFetch(metrics.trackedFetch); + + expect(trackedPinnedFetch).toBeTruthy(); + await expect(trackedPinnedFetch?.("https://api.firecrawl.dev/scrape")).resolves.toBeInstanceOf( + Response, + ); + + expect(baseFetch).not.toHaveBeenCalled(); + expect(pinnedFetch).toHaveBeenCalledWith("https://api.firecrawl.dev/scrape", undefined); + await expect(metrics.buildReport()).resolves.toMatchObject({ + services: { firecrawl: { requests: 1 } }, + }); + }); + + it("preserves Bun native pinned transports through the run metrics fetch wrapper", async () => { + await withBunRuntime(async () => { + const metrics = createRunMetrics({ + env: {}, + fetchImpl: globalThis.fetch, + maxOutputTokensArg: null, + }); + const trackedPinnedFetch = resolveRootDnsPinnedFetch(metrics.trackedFetch); + + expect(trackedPinnedFetch).toBeTruthy(); + expect(trackedPinnedFetch).not.toBe(globalThis.fetch); + }); + }); +}); diff --git a/tests/security.rss-transcript-ssrf.test.ts b/tests/security.rss-transcript-ssrf.test.ts new file mode 100644 index 000000000..e1f3023ba --- /dev/null +++ b/tests/security.rss-transcript-ssrf.test.ts @@ -0,0 +1,305 @@ +import { describe, expect, it, vi } from "vitest"; +import { markFetchAsDnsPinned } from "../packages/core/src/content/index.js"; +import { tryFetchTranscriptFromFeedXml } from "../packages/core/src/content/transcript/providers/podcast/rss-transcript.js"; + +const feedWithTranscript = (url: string) => ` + + + + Security Episode + + + +`; + +async function fetchTranscript(args: { + transcriptUrl: string; + fetchImpl: typeof fetch; + lookup?: (hostname: string) => Promise<{ address: string; family?: number }[]>; +}) { + const notes: string[] = []; + const result = await tryFetchTranscriptFromFeedXml({ + fetchImpl: args.fetchImpl, + feedXml: feedWithTranscript(args.transcriptUrl), + episodeTitle: "Security Episode", + notes, + lookup: args.lookup, + }); + return { notes, result }; +} + +async function expectBlockedBeforeFetch(transcriptUrl: string) { + const fetchImpl = vi.fn(async () => new Response("blocked", { status: 200 })); + + const { notes, result } = await fetchTranscript({ + transcriptUrl, + fetchImpl: fetchImpl as unknown as typeof fetch, + }); + + expect(result).toBeNull(); + expect(fetchImpl).not.toHaveBeenCalled(); + expect(notes.join("\n")).toMatch(/blocked local network|must use http|invalid/i); +} + +describe("RSS SSRF guard", () => { + it("blocks loopback URL literals before fetching attacker-controlled transcript URLs", async () => { + const fetchImpl = vi.fn(async () => new Response("internal secret", { status: 200 })); + + const { notes, result } = await fetchTranscript({ + transcriptUrl: "http://127.0.0.1:8080/admin/transcript.txt", + fetchImpl: fetchImpl as unknown as typeof fetch, + }); + + expect(result).toBeNull(); + expect(fetchImpl).not.toHaveBeenCalled(); + expect(notes.join("\n")).toMatch(/blocked local network/i); + }); + + it("resolves hostnames and blocks DNS answers that point at private addresses", async () => { + const lookup = vi.fn(async () => [{ address: "169.254.169.254", family: 4 }]); + const fetchImpl = vi.fn(async () => new Response("metadata token", { status: 200 })); + + const { notes, result } = await fetchTranscript({ + transcriptUrl: "https://transcripts.attacker.example/episode.txt", + fetchImpl: fetchImpl as unknown as typeof fetch, + lookup, + }); + + expect(result).toBeNull(); + expect(lookup).toHaveBeenCalledWith("transcripts.attacker.example"); + expect(fetchImpl).not.toHaveBeenCalled(); + expect(notes.join("\n")).toMatch(/blocked local network address/i); + }); + + it("uses manual redirects and revalidates redirected transcript targets", async () => { + const fetchImpl = vi.fn(async () => { + return new Response(null, { + status: 302, + headers: { location: "http://127.0.0.1:7777/private-transcript" }, + }); + }); + + const { notes, result } = await fetchTranscript({ + transcriptUrl: "http://8.8.8.8/redirect-transcript", + fetchImpl: fetchImpl as unknown as typeof fetch, + }); + + expect(result).toBeNull(); + expect(fetchImpl).toHaveBeenCalledWith( + "http://8.8.8.8/redirect-transcript", + expect.objectContaining({ redirect: "manual" }), + ); + expect(notes.join("\n")).toMatch(/blocked local network address/i); + }); + + it("fails closed for custom fetch implementations that cannot guarantee DNS pinning", async () => { + const lookup = vi.fn(async () => [{ address: "93.184.216.34", family: 4 }]); + const fetchImpl = vi.fn(async () => { + return new Response("public transcript", { + status: 200, + headers: { "content-type": "text/plain" }, + }); + }); + + const { notes, result } = await fetchTranscript({ + transcriptUrl: "https://transcripts.example.test/episode.txt", + fetchImpl: fetchImpl as unknown as typeof fetch, + lookup, + }); + + expect(result).toBeNull(); + expect(lookup).toHaveBeenCalledWith("transcripts.example.test"); + expect(fetchImpl).not.toHaveBeenCalled(); + expect(notes.join("\n")).toMatch(/requires native fetch for DNS pinning/i); + }); + + it("passes pinned dispatchers through explicit DNS-pinned fetch wrappers", async () => { + const lookup = vi.fn(async () => [{ address: "93.184.216.34", family: 4 }]); + const fetchImpl = markFetchAsDnsPinned( + vi.fn(async () => { + return new Response("public transcript", { + status: 200, + headers: { "content-type": "text/plain" }, + }); + }) as unknown as typeof fetch, + ); + + const { result } = await fetchTranscript({ + transcriptUrl: "https://transcripts.example.test/episode.txt", + fetchImpl, + lookup, + }); + + expect(result?.text).toBe("public transcript"); + expect(lookup).toHaveBeenCalledWith("transcripts.example.test"); + expect(fetchImpl).toHaveBeenCalledWith( + "https://transcripts.example.test/episode.txt", + expect.objectContaining({ + redirect: "manual", + dispatcher: expect.any(Object), + }), + ); + }); + + it("allows public IP literals through custom fetch implementations without DNS pinning", async () => { + const fetchImpl = vi.fn(async () => { + return new Response("public transcript", { + status: 200, + headers: { "content-type": "text/plain" }, + }); + }); + + const { result } = await fetchTranscript({ + transcriptUrl: "http://192.0.1.1/episode.txt", + fetchImpl: fetchImpl as unknown as typeof fetch, + }); + + expect(result?.text).toBe("public transcript"); + expect(fetchImpl).toHaveBeenCalledWith( + "http://192.0.1.1/episode.txt", + expect.objectContaining({ redirect: "manual" }), + ); + }); + + it("blocks reserved documentation IP literals before fetching transcript URLs", async () => { + const fetchImpl = vi.fn(async () => new Response("documentation address", { status: 200 })); + + const { notes, result } = await fetchTranscript({ + transcriptUrl: "http://192.0.2.1/episode.txt", + fetchImpl: fetchImpl as unknown as typeof fetch, + }); + + expect(result).toBeNull(); + expect(fetchImpl).not.toHaveBeenCalled(); + expect(notes.join("\n")).toMatch(/blocked local network address/i); + }); + + it("blocks private, local, multicast, and reserved IP literal ranges", async () => { + for (const transcriptUrl of [ + "http://0.0.0.0/transcript.txt", + "http://10.0.0.1/transcript.txt", + "http://100.64.0.1/transcript.txt", + "http://169.254.1.1/transcript.txt", + "http://172.16.0.1/transcript.txt", + "http://172.31.255.255/transcript.txt", + "http://192.168.1.1/transcript.txt", + "http://192.0.0.1/transcript.txt", + "http://198.18.0.1/transcript.txt", + "http://198.19.0.1/transcript.txt", + "http://198.51.100.1/transcript.txt", + "http://203.0.113.1/transcript.txt", + "http://224.0.0.1/transcript.txt", + "http://[::]/transcript.txt", + "http://[::1]/transcript.txt", + "http://[::ffff:127.0.0.1]/transcript.txt", + "http://[::7f00:1]/transcript.txt", + "http://[64:ff9b::a9fe:a9fe]/transcript.txt", + "http://[64:ff9b:1::808:808]/transcript.txt", + "http://[100::1]/transcript.txt", + "http://[2001:2::1]/transcript.txt", + "http://[fc00::1]/transcript.txt", + "http://[fe80::1]/transcript.txt", + "http://[ff00::1]/transcript.txt", + "http://[2001:db8::1]/transcript.txt", + "http://[2002:ac10:1::1]/transcript.txt", + "http://[3fff::1]/transcript.txt", + "http://[5f00::1]/transcript.txt", + ]) { + await expectBlockedBeforeFetch(transcriptUrl); + } + }); + + it("allows well-known NAT64 literals when the embedded IPv4 address is public", async () => { + const fetchImpl = vi.fn(async () => { + return new Response("public transcript", { + status: 200, + headers: { "content-type": "text/plain" }, + }); + }); + + const { result } = await fetchTranscript({ + transcriptUrl: "http://[64:ff9b::808:808]/episode.txt", + fetchImpl: fetchImpl as unknown as typeof fetch, + }); + + expect(result?.text).toBe("public transcript"); + expect(fetchImpl).toHaveBeenCalledWith( + "http://[64:ff9b::808:808]/episode.txt", + expect.objectContaining({ redirect: "manual" }), + ); + }); + + it("blocks invalid schemes, invalid URLs, and localhost names", async () => { + for (const transcriptUrl of [ + "not a url", + "file:///private/transcript.txt", + "http://localhost/transcript.txt", + "http://feed.localhost./transcript.txt", + ]) { + await expectBlockedBeforeFetch(transcriptUrl); + } + }); + + it("rejects empty or malformed DNS answers before fetching transcript URLs", async () => { + const fetchImpl = vi.fn(async () => new Response("unexpected", { status: 200 })); + + await expect( + fetchTranscript({ + transcriptUrl: "https://empty.example.test/episode.txt", + fetchImpl: fetchImpl as unknown as typeof fetch, + lookup: async () => [], + }), + ).resolves.toMatchObject({ result: null }); + + await expect( + fetchTranscript({ + transcriptUrl: "https://malformed.example.test/episode.txt", + fetchImpl: fetchImpl as unknown as typeof fetch, + lookup: async () => [{ address: "999.1.1.1", family: 4 }], + }), + ).resolves.toMatchObject({ result: null }); + + expect(fetchImpl).not.toHaveBeenCalled(); + }); + + it("allows public IPv4 and IPv6 literals through custom fetch implementations", async () => { + const fetchImpl = vi.fn(async () => new Response("public transcript", { status: 200 })); + + for (const transcriptUrl of [ + "http://8.8.8.8/transcript.txt", + "http://[2606:4700:4700::1111]/transcript.txt", + "http://[::ffff:8.8.8.8]/transcript.txt", + ]) { + const { result } = await fetchTranscript({ + transcriptUrl, + fetchImpl: fetchImpl as unknown as typeof fetch, + }); + expect(result?.text).toBe("public transcript"); + } + + expect(fetchImpl).toHaveBeenCalledTimes(3); + }); + + it("handles redirects without locations and caps redirect chains", async () => { + const noLocationFetch = vi.fn(async () => new Response(null, { status: 302 })); + const noLocation = await fetchTranscript({ + transcriptUrl: "http://8.8.8.8/no-location", + fetchImpl: noLocationFetch as unknown as typeof fetch, + }); + expect(noLocation.result).toBeNull(); + expect(noLocation.notes.join("\n")).toMatch(/transcript fetch failed \(302\)/i); + + const redirectFetch = vi.fn(async () => { + return new Response(null, { + status: 302, + headers: { location: "http://8.8.8.8/next" }, + }); + }); + const redirected = await fetchTranscript({ + transcriptUrl: "http://8.8.8.8/redirect", + fetchImpl: redirectFetch as unknown as typeof fetch, + }); + expect(redirected.result).toBeNull(); + expect(redirected.notes.join("\n")).toMatch(/redirected too many times/i); + }); +}); diff --git a/tests/transcript.podcast-provider.paths-coverage.test.ts b/tests/transcript.podcast-provider.paths-coverage.test.ts index 326e564e4..325ff18d8 100644 --- a/tests/transcript.podcast-provider.paths-coverage.test.ts +++ b/tests/transcript.podcast-provider.paths-coverage.test.ts @@ -36,7 +36,8 @@ describe("podcast transcript provider - coverage paths", () => { it("prefers RSS over Apple Podcasts streamUrl when both are present", async () => { const html = ``; - const feedXml = `<![CDATA[Episode 1]]>`; + const transcriptUrl = "http://93.184.216.34/transcript.vtt"; + const feedXml = `<![CDATA[Episode 1]]>`; const vtt = `WEBVTT 00:00:00.000 --> 00:00:01.000 @@ -52,7 +53,7 @@ Hello from VTT headers: { "content-type": "application/xml" }, }); } - if (url === "https://example.com/transcript.vtt") { + if (url === transcriptUrl) { return new Response(vtt, { status: 200, headers: { "content-type": "text/vtt" } }); } throw new Error(`Unexpected fetch: ${url}`); diff --git a/tests/transcript.podcast-provider.podcast-transcript.test.ts b/tests/transcript.podcast-provider.podcast-transcript.test.ts index cc3163fe3..00d99079b 100644 --- a/tests/transcript.podcast-provider.podcast-transcript.test.ts +++ b/tests/transcript.podcast-provider.podcast-transcript.test.ts @@ -38,7 +38,7 @@ describe("podcast transcript provider: RSS ", () => { it("uses JSON transcript from RSS without requiring transcription providers", async () => { const { fetchTranscript } = await importPodcastProviderWithoutTranscription(); - const transcriptUrl = "https://example.com/transcript.json"; + const transcriptUrl = "http://93.184.216.34/transcript.json"; const feedXml = ` @@ -79,7 +79,7 @@ describe("podcast transcript provider: RSS ", () => { const showId = "1794526548"; const episodeId = "1000741457032"; const feedUrl = "https://example.com/feed.xml"; - const transcriptUrl = "https://example.com/transcript.vtt"; + const transcriptUrl = "http://93.184.216.34/transcript.vtt"; const lookupResponse = JSON.stringify({ resultCount: 2, diff --git a/tests/transcript.podcast-provider.test.ts b/tests/transcript.podcast-provider.test.ts index 8e3085bd0..a60db7fe1 100644 --- a/tests/transcript.podcast-provider.test.ts +++ b/tests/transcript.podcast-provider.test.ts @@ -34,11 +34,12 @@ describe("podcast transcript provider module", () => { }); it("extracts Podcasting 2.0 transcript from RSS (JSON) without needing Whisper", async () => { - const xml = `Episode 1`; + const transcriptUrl = "http://93.184.216.34/transcript.json"; + const xml = `Episode 1`; const fetchImpl = vi.fn(async (input: RequestInfo | URL) => { const url = typeof input === "string" ? input : input.toString(); - if (url === "https://example.com/transcript.json") { + if (url === transcriptUrl) { return new Response(JSON.stringify([{ text: "Hello" }, { text: "world" }]), { status: 200, headers: { "content-type": "application/json" }, @@ -65,7 +66,7 @@ describe("podcast transcript provider module", () => { it("resolves Apple Podcasts iTunes lookup → RSS transcript (VTT) and avoids preview audio", async () => { const appleUrl = "https://podcasts.apple.com/us/podcast/x/id123?i=456"; const feedUrl = "https://example.com/feed.xml"; - const transcriptUrl = "https://example.com/transcript.vtt"; + const transcriptUrl = "http://93.184.216.34/transcript.vtt"; const itunesPayload = { resultCount: 2,