diff --git a/packages/core/package.json b/packages/core/package.json index 032aafe25..aedcffc55 100644 --- a/packages/core/package.json +++ b/packages/core/package.json @@ -52,7 +52,8 @@ "cheerio": "^1.2.0", "es-toolkit": "^1.46.1", "jsdom": "29.1.1", - "sanitize-html": "^2.17.4" + "sanitize-html": "^2.17.4", + "undici": "8.3.0" }, "devDependencies": { "@types/jsdom": "^28.0.3", diff --git a/packages/core/src/content/transcript/providers/podcast/rss-transcript.ts b/packages/core/src/content/transcript/providers/podcast/rss-transcript.ts index d07701df7..4f8ad5b6b 100644 --- a/packages/core/src/content/transcript/providers/podcast/rss-transcript.ts +++ b/packages/core/src/content/transcript/providers/podcast/rss-transcript.ts @@ -1,3 +1,6 @@ +import { lookup as dnsLookup } from "node:dns/promises"; +import { createRequire } from "node:module"; +import { isIP } from "node:net"; import type { TranscriptSegment } from "../../../link-preview/types.js"; import { jsonTranscriptToPlainText, @@ -14,17 +17,226 @@ import { } from "./rss-feed.js"; type TranscriptCandidate = { url: string; type: string | null }; +type LookupAddress = { address: string; family?: number }; +type LookupFn = (hostname: string) => Promise; +type LookupCallback = ( + error: Error | null, + address: string | LookupAddress[], + family?: number, +) => void; +type UndiciAgentConstructor = new (options: { + autoSelectFamily?: boolean; + autoSelectFamilyAttemptTimeout?: number; + connect: { + lookup: (hostname: string, options: unknown, callback: LookupCallback) => void; + }; +}) => unknown; +type UndiciModule = { Agent: UndiciAgentConstructor; fetch: typeof fetch }; + +const MAX_TRANSCRIPT_REDIRECTS = 10; +const require = createRequire(import.meta.url); + +function parseIpv4(address: string): number[] | null { + const parts = address.split("."); + if (parts.length !== 4) return null; + const octets = parts.map((part) => { + if (!/^\d{1,3}$/.test(part)) return null; + const value = Number(part); + return Number.isInteger(value) && value >= 0 && value <= 255 ? value : null; + }); + return octets.every((value) => value != null) ? (octets as number[]) : null; +} + +function isBlockedIpv4(address: string): boolean { + const octets = parseIpv4(address); + if (!octets) return true; + const [a, b] = octets; + return ( + a === 0 || + a === 10 || + a === 127 || + (a === 100 && b >= 64 && b <= 127) || + (a === 169 && b === 254) || + (a === 172 && b >= 16 && b <= 31) || + (a === 192 && b === 168) || + (a === 192 && b === 0) || + (a === 198 && (b === 18 || b === 19)) || + a >= 224 + ); +} + +function expandIpv6(address: string): number[] | null { + const normalized = address.split("%", 1)[0]?.toLowerCase() ?? ""; + if (!normalized) return null; + const mapped = normalized.match(/^(.*:)(\d{1,3}(?:\.\d{1,3}){3})$/); + const ipv4 = mapped ? parseIpv4(mapped[2] ?? "") : null; + const head = mapped ? (mapped[1] ?? "") : normalized; + const partsAroundGap = head.split("::"); + if (partsAroundGap.length > 2) return null; + const [leftRaw, rightRaw] = partsAroundGap; + const left = leftRaw ? leftRaw.split(":").filter(Boolean) : []; + const right = typeof rightRaw === "string" && rightRaw ? rightRaw.split(":").filter(Boolean) : []; + const ipv4Parts = ipv4 + ? [((ipv4[0] ?? 0) << 8) | (ipv4[1] ?? 0), ((ipv4[2] ?? 0) << 8) | (ipv4[3] ?? 0)] + : []; + const missing = 8 - left.length - right.length - ipv4Parts.length; + if (missing < 0 || (partsAroundGap.length === 1 && missing !== 0)) return null; + const parsePart = (part: string) => (/^[0-9a-f]{1,4}$/.test(part) ? parseInt(part, 16) : -1); + const parts = [ + ...left.map(parsePart), + ...Array.from({ length: missing }, () => 0), + ...right.map(parsePart), + ...ipv4Parts, + ]; + return parts.length === 8 && parts.every((part) => part >= 0 && part <= 0xffff) ? parts : null; +} + +function isBlockedIpv6(address: string): boolean { + const parts = expandIpv6(address); + if (!parts) return true; + const [first, second, , , , fifth, sixth, eighth] = parts; + const allZero = parts.every((part) => part === 0); + const loopback = parts.slice(0, 7).every((part) => part === 0) && eighth === 1; + const mappedIpv4 = parts.slice(0, 5).every((part) => part === 0) && fifth === 0xffff; + const compatibleIpv4 = parts.slice(0, 6).every((part) => part === 0) && !allZero && !loopback; + if (mappedIpv4 || compatibleIpv4) { + const ipv4 = `${((sixth ?? 0) >> 8) & 0xff}.${(sixth ?? 0) & 0xff}.${((eighth ?? 0) >> 8) & 0xff}.${(eighth ?? 0) & 0xff}`; + return isBlockedIpv4(ipv4); + } + return ( + allZero || + loopback || + ((first ?? 0) & 0xfe00) === 0xfc00 || + ((first ?? 0) & 0xffc0) === 0xfe80 || + ((first ?? 0) & 0xff00) === 0xff00 || + (first === 0x2001 && second === 0xdb8) + ); +} + +function normalizeHostname(hostname: string): string { + return hostname + .trim() + .replace(/^\[|\]$/g, "") + .toLowerCase() + .replace(/\.$/, ""); +} + +export function isBlockedNetworkAddress(address: string): boolean { + const normalized = address.trim().replace(/^\[|\]$/g, ""); + const family = isIP(normalized); + if (family === 4) return isBlockedIpv4(normalized); + if (family === 6) return isBlockedIpv6(normalized); + return true; +} + +async function defaultLookup(hostname: string): Promise { + return await dnsLookup(hostname, { all: true, verbatim: true }); +} + +async function resolveSafeTranscriptUrl( + rawUrl: string, + { lookup = defaultLookup }: { lookup?: LookupFn } = {}, +): Promise<{ url: URL; addresses: LookupAddress[] }> { + let url: URL; + try { + url = new URL(rawUrl); + } catch { + throw new Error("RSS transcript URL is invalid"); + } + if (url.protocol !== "http:" && url.protocol !== "https:") { + throw new Error("RSS transcript URL must use http or https"); + } + const hostname = normalizeHostname(url.hostname); + if (hostname === "localhost" || hostname.endsWith(".localhost")) { + throw new Error("RSS transcript URL targets a blocked local network host"); + } + if (isIP(hostname)) { + if (isBlockedNetworkAddress(hostname)) { + throw new Error("RSS transcript URL resolves to a blocked local network address"); + } + return { url, addresses: [] }; + } + const addresses = await lookup(hostname); + if (addresses.length === 0 || addresses.some((entry) => isBlockedNetworkAddress(entry.address))) { + throw new Error("RSS transcript URL resolves to a blocked local network address"); + } + return { url, addresses }; +} + +function isRedirectStatus(status: number): boolean { + return status === 301 || status === 302 || status === 303 || status === 307 || status === 308; +} + +function isNativeFetchImpl(fetchImpl: typeof fetch): boolean { + return fetchImpl === globalThis.fetch || fetchImpl.name === "bound fetch"; +} + +function loadUndici(): UndiciModule { + return require("undici") as UndiciModule; +} + +function createPinnedDispatcher(addresses: LookupAddress[]): unknown { + const { Agent } = loadUndici(); + const pinnedAddresses = addresses.map((address) => ({ + address: address.address, + family: address.family ?? (isIP(address.address) || 4), + })); + return new Agent({ + autoSelectFamily: true, + autoSelectFamilyAttemptTimeout: 250, + connect: { + lookup: (_hostname, options, callback) => { + if ((options as { all?: boolean } | undefined)?.all) { + callback(null, pinnedAddresses); + return; + } + const first = pinnedAddresses[0]; + callback(null, first?.address ?? "0.0.0.0", first?.family ?? 4); + }, + }, + }); +} + +async function fetchSafeTranscriptUrl( + fetchImpl: typeof fetch, + transcriptUrl: string, + { lookup = defaultLookup }: { lookup?: LookupFn } = {}, + redirectCount = 0, +): Promise { + const target = await resolveSafeTranscriptUrl(transcriptUrl, { lookup }); + const pinnedInit = { + redirect: "manual" as const, + signal: AbortSignal.timeout(TRANSCRIPTION_TIMEOUT_MS), + headers: { accept: "text/vtt,text/plain,application/json;q=0.9,*/*;q=0.8" }, + ...(target.addresses.length > 0 + ? { dispatcher: createPinnedDispatcher(target.addresses) } + : {}), + } as RequestInit & { dispatcher?: unknown }; + const pinnedFetchImpl = + target.addresses.length > 0 && isNativeFetchImpl(fetchImpl) ? loadUndici().fetch : fetchImpl; + const res = await pinnedFetchImpl(target.url.href, pinnedInit); + if (!isRedirectStatus(res.status)) return res; + const location = res.headers.get("location"); + if (!location) return res; + if (redirectCount >= MAX_TRANSCRIPT_REDIRECTS) { + throw new Error("RSS transcript URL redirected too many times"); + } + const nextUrl = new URL(location, res.url || target.url.href).href; + return await fetchSafeTranscriptUrl(fetchImpl, nextUrl, { lookup }, redirectCount + 1); +} export async function tryFetchTranscriptFromFeedXml({ fetchImpl, feedXml, episodeTitle, notes, + lookup, }: { fetchImpl: typeof fetch; feedXml: string; episodeTitle: string | null; notes: string[]; + lookup?: LookupFn; }): Promise<{ text: string; transcriptUrl: string; @@ -50,11 +262,7 @@ export async function tryFetchTranscriptFromFeedXml({ const transcriptUrl = decodeXmlEntities(preferred.url); try { - const res = await fetchImpl(transcriptUrl, { - redirect: "follow", - signal: AbortSignal.timeout(TRANSCRIPTION_TIMEOUT_MS), - headers: { accept: "text/vtt,text/plain,application/json;q=0.9,*/*;q=0.8" }, - }); + const res = await fetchSafeTranscriptUrl(fetchImpl, transcriptUrl, { lookup }); if (!res.ok) throw new Error(`transcript fetch failed (${res.status})`); const contentType = diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 7a21aa3bb..c8401eb08 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -157,6 +157,9 @@ importers: sanitize-html: specifier: ^2.17.4 version: 2.17.4 + undici: + specifier: 8.3.0 + version: 8.3.0 devDependencies: '@types/jsdom': specifier: ^28.0.3 diff --git a/tests/security.rss-transcript-ssrf.test.ts b/tests/security.rss-transcript-ssrf.test.ts new file mode 100644 index 000000000..906cfbfae --- /dev/null +++ b/tests/security.rss-transcript-ssrf.test.ts @@ -0,0 +1,187 @@ +import { describe, expect, it, vi } from "vitest"; +import { tryFetchTranscriptFromFeedXml } from "../packages/core/src/content/transcript/providers/podcast/rss-transcript.js"; + +describe("RSS podcast transcript URL handling", () => { + it("rejects loopback transcript URLs from feed XML before fetching them", async () => { + const internalTranscriptUrl = "http://127.0.0.1:65535/admin/metadata?token=[REDACTED]"; + const feedXml = ` + + + + Episode 1 + episode-1 + + + + + `; + + const fetchImpl = vi.fn(async () => { + throw new Error("internal transcript URL should not be fetched"); + }); + + const notes: string[] = []; + const result = await tryFetchTranscriptFromFeedXml({ + feedXml, + episodeTitle: "Episode 1", + fetchImpl: fetchImpl as unknown as typeof fetch, + notes, + }); + + expect(fetchImpl).not.toHaveBeenCalled(); + expect(result).toBeNull(); + expect(notes.join(" ")).toMatch(/blocked/i); + }); + + it("rejects redirects from public transcript URLs to loopback targets", async () => { + const publicTranscriptUrl = "https://transcripts.example/episode.vtt"; + const internalRedirectUrl = "http://127.0.0.1:65535/admin/metadata?token=[REDACTED]"; + const feedXml = ` + + + + Episode 1 + episode-1 + + + + `; + + const fetchImpl = vi.fn(async (input: RequestInfo | URL) => { + const url = input.toString(); + if (url === publicTranscriptUrl) { + return new Response(null, { + status: 302, + headers: { location: internalRedirectUrl }, + }); + } + throw new Error(`unexpected fetch: ${url}`); + }); + + const notes: string[] = []; + const lookup = vi.fn(async () => [{ address: "93.184.216.34", family: 4 }]); + const result = await tryFetchTranscriptFromFeedXml({ + feedXml, + episodeTitle: "Episode 1", + fetchImpl: fetchImpl as unknown as typeof fetch, + notes, + lookup, + }); + + expect(fetchImpl).toHaveBeenCalledTimes(1); + expect(fetchImpl).toHaveBeenCalledWith( + publicTranscriptUrl, + expect.objectContaining({ redirect: "manual" }), + ); + expect(result).toBeNull(); + expect(notes.join(" ")).toMatch(/blocked/i); + }); + + it("rejects transcript hostnames that resolve to private addresses before fetching", async () => { + const transcriptUrl = "https://attacker-controlled.example/episode.vtt"; + const feedXml = ` + + + + Episode 1 + + + + `; + const lookup = vi.fn(async () => [{ address: "10.0.0.7", family: 4 }]); + const fetchImpl = vi.fn(async () => { + throw new Error("hostname resolving to a private address should not be fetched"); + }); + + const notes: string[] = []; + const result = await tryFetchTranscriptFromFeedXml({ + feedXml, + episodeTitle: "Episode 1", + fetchImpl: fetchImpl as unknown as typeof fetch, + notes, + lookup, + }); + + expect(lookup).toHaveBeenCalledWith("attacker-controlled.example"); + expect(fetchImpl).not.toHaveBeenCalled(); + expect(result).toBeNull(); + expect(notes.join(" ")).toMatch(/blocked local network address/i); + }); + + it("revalidates redirect hostnames with DNS before following to private addresses", async () => { + const publicTranscriptUrl = "https://transcripts.example/episode.vtt"; + const reboundRedirectUrl = "https://rebind.example/internal.vtt"; + const feedXml = ` + + + + Episode 1 + + + + `; + const lookup = vi.fn(async (hostname: string) => { + if (hostname === "transcripts.example") return [{ address: "93.184.216.34", family: 4 }]; + if (hostname === "rebind.example") return [{ address: "127.0.0.1", family: 4 }]; + return []; + }); + const fetchImpl = vi.fn(async (input: RequestInfo | URL) => { + if (input.toString() !== publicTranscriptUrl) throw new Error(`unexpected fetch: ${input}`); + return new Response(null, { status: 302, headers: { location: reboundRedirectUrl } }); + }); + + const notes: string[] = []; + const result = await tryFetchTranscriptFromFeedXml({ + feedXml, + episodeTitle: "Episode 1", + fetchImpl: fetchImpl as unknown as typeof fetch, + notes, + lookup, + }); + + expect(lookup).toHaveBeenCalledWith("transcripts.example"); + expect(lookup).toHaveBeenCalledWith("rebind.example"); + expect(fetchImpl).toHaveBeenCalledTimes(1); + expect(result).toBeNull(); + expect(notes.join(" ")).toMatch(/blocked local network address/i); + }); + + it("pins transcript fetches to the DNS addresses that were validated", async () => { + const transcriptUrl = "https://transcripts.example/episode.vtt"; + const feedXml = ` + + + + Episode 1 + + + + `; + const lookup = vi.fn(async () => [{ address: "93.184.216.34", family: 4 }]); + const fetchImpl = vi.fn( + async () => + new Response("WEBVTT\n\n00:00.000 --> 00:01.000\nPinned", { + status: 200, + headers: { "content-type": "text/vtt" }, + }), + ); + + const notes: string[] = []; + const result = await tryFetchTranscriptFromFeedXml({ + feedXml, + episodeTitle: "Episode 1", + fetchImpl: fetchImpl as unknown as typeof fetch, + notes, + lookup, + }); + + expect(result?.text).toBe("Pinned"); + expect(fetchImpl).toHaveBeenCalledWith( + transcriptUrl, + expect.objectContaining({ + redirect: "manual", + dispatcher: expect.any(Object), + }), + ); + }); +});