From 456313f0ef90a78c48a025b730a94474fc0019fd Mon Sep 17 00:00:00 2001 From: hinotoi-agent Date: Fri, 5 Jun 2026 09:07:24 +0800 Subject: [PATCH 1/2] fix: guard rss transcript fetches --- .../providers/podcast/rss-transcript.ts | 141 +++++++++++++++++- tests/security.rss-transcript-ssrf.test.ts | 77 ++++++++++ 2 files changed, 213 insertions(+), 5 deletions(-) create mode 100644 tests/security.rss-transcript-ssrf.test.ts diff --git a/packages/core/src/content/transcript/providers/podcast/rss-transcript.ts b/packages/core/src/content/transcript/providers/podcast/rss-transcript.ts index d07701df7..254023823 100644 --- a/packages/core/src/content/transcript/providers/podcast/rss-transcript.ts +++ b/packages/core/src/content/transcript/providers/podcast/rss-transcript.ts @@ -15,6 +15,141 @@ import { type TranscriptCandidate = { url: string; type: string | null }; +const MAX_TRANSCRIPT_REDIRECTS = 10; + +function parseIpv4(address: string): number[] | null { + const parts = address.split("."); + if (parts.length !== 4) return null; + const octets = parts.map((part) => { + if (!/^\d{1,3}$/.test(part)) return null; + const value = Number(part); + return Number.isInteger(value) && value >= 0 && value <= 255 ? value : null; + }); + return octets.every((value) => value != null) ? (octets as number[]) : null; +} + +function isBlockedIpv4Literal(hostname: string): boolean { + const octets = parseIpv4(hostname); + if (!octets) return false; + const [a, b] = octets; + return ( + a === 0 || + a === 10 || + a === 127 || + (a === 100 && b >= 64 && b <= 127) || + (a === 169 && b === 254) || + (a === 172 && b >= 16 && b <= 31) || + (a === 192 && b === 168) || + (a === 192 && b === 0) || + (a === 198 && (b === 18 || b === 19)) || + a >= 224 + ); +} + +function expandIpv6(address: string): number[] | null { + const normalized = address.split("%", 1)[0]?.toLowerCase() ?? ""; + if (!normalized) return null; + const mapped = normalized.match(/^(.*:)(\d{1,3}(?:\.\d{1,3}){3})$/); + const ipv4 = mapped ? parseIpv4(mapped[2] ?? "") : null; + const head = mapped ? (mapped[1] ?? "") : normalized; + const partsAroundGap = head.split("::"); + if (partsAroundGap.length > 2) return null; + const [leftRaw, rightRaw] = partsAroundGap; + const left = leftRaw ? leftRaw.split(":").filter(Boolean) : []; + const right = typeof rightRaw === "string" && rightRaw ? rightRaw.split(":").filter(Boolean) : []; + const ipv4Parts = ipv4 + ? [((ipv4[0] ?? 0) << 8) | (ipv4[1] ?? 0), ((ipv4[2] ?? 0) << 8) | (ipv4[3] ?? 0)] + : []; + const missing = 8 - left.length - right.length - ipv4Parts.length; + if (missing < 0 || (partsAroundGap.length === 1 && missing !== 0)) return null; + const parsePart = (part: string) => (/^[0-9a-f]{1,4}$/.test(part) ? parseInt(part, 16) : -1); + const parts = [ + ...left.map(parsePart), + ...Array.from({ length: missing }, () => 0), + ...right.map(parsePart), + ...ipv4Parts, + ]; + return parts.length === 8 && parts.every((part) => part >= 0 && part <= 0xffff) ? parts : null; +} + +function isBlockedIpv6Literal(hostname: string): boolean { + const parts = expandIpv6(hostname); + if (!parts) return false; + const [first, second, , , , fifth, sixth, eighth] = parts; + const allZero = parts.every((part) => part === 0); + const loopback = parts.slice(0, 7).every((part) => part === 0) && eighth === 1; + const mappedIpv4 = parts.slice(0, 5).every((part) => part === 0) && fifth === 0xffff; + const compatibleIpv4 = parts.slice(0, 6).every((part) => part === 0) && !allZero && !loopback; + if (mappedIpv4 || compatibleIpv4) { + const ipv4 = `${((sixth ?? 0) >> 8) & 0xff}.${(sixth ?? 0) & 0xff}.${((eighth ?? 0) >> 8) & 0xff}.${(eighth ?? 0) & 0xff}`; + return isBlockedIpv4Literal(ipv4); + } + return ( + allZero || + loopback || + ((first ?? 0) & 0xfe00) === 0xfc00 || + ((first ?? 0) & 0xffc0) === 0xfe80 || + ((first ?? 0) & 0xff00) === 0xff00 || + (first === 0x2001 && second === 0xdb8) + ); +} + +function normalizeHostname(hostname: string): string { + return hostname + .trim() + .replace(/^\[|\]$/g, "") + .toLowerCase() + .replace(/\.$/, ""); +} + +function assertSafeTranscriptUrl(rawUrl: string): URL { + let url: URL; + try { + url = new URL(rawUrl); + } catch { + throw new Error("RSS transcript URL is invalid"); + } + if (url.protocol !== "http:" && url.protocol !== "https:") { + throw new Error("RSS transcript URL must use http or https"); + } + const hostname = normalizeHostname(url.hostname); + if ( + hostname === "localhost" || + hostname.endsWith(".localhost") || + isBlockedIpv4Literal(hostname) || + isBlockedIpv6Literal(hostname) + ) { + throw new Error("RSS transcript URL targets a blocked local network host"); + } + return url; +} + +function isRedirectStatus(status: number): boolean { + return status === 301 || status === 302 || status === 303 || status === 307 || status === 308; +} + +async function fetchSafeTranscriptUrl( + fetchImpl: typeof fetch, + transcriptUrl: string, + redirectCount = 0, +): Promise { + const url = assertSafeTranscriptUrl(transcriptUrl); + const res = await fetchImpl(url.href, { + redirect: "manual", + signal: AbortSignal.timeout(TRANSCRIPTION_TIMEOUT_MS), + headers: { accept: "text/vtt,text/plain,application/json;q=0.9,*/*;q=0.8" }, + }); + if (!isRedirectStatus(res.status)) return res; + const location = res.headers.get("location"); + if (!location) return res; + if (redirectCount >= MAX_TRANSCRIPT_REDIRECTS) { + throw new Error("RSS transcript URL redirected too many times"); + } + const nextUrl = new URL(location, res.url || url.href).href; + assertSafeTranscriptUrl(nextUrl); + return await fetchSafeTranscriptUrl(fetchImpl, nextUrl, redirectCount + 1); +} + export async function tryFetchTranscriptFromFeedXml({ fetchImpl, feedXml, @@ -50,11 +185,7 @@ export async function tryFetchTranscriptFromFeedXml({ const transcriptUrl = decodeXmlEntities(preferred.url); try { - const res = await fetchImpl(transcriptUrl, { - redirect: "follow", - signal: AbortSignal.timeout(TRANSCRIPTION_TIMEOUT_MS), - headers: { accept: "text/vtt,text/plain,application/json;q=0.9,*/*;q=0.8" }, - }); + const res = await fetchSafeTranscriptUrl(fetchImpl, transcriptUrl); if (!res.ok) throw new Error(`transcript fetch failed (${res.status})`); const contentType = diff --git a/tests/security.rss-transcript-ssrf.test.ts b/tests/security.rss-transcript-ssrf.test.ts new file mode 100644 index 000000000..cd4afff36 --- /dev/null +++ b/tests/security.rss-transcript-ssrf.test.ts @@ -0,0 +1,77 @@ +import { describe, expect, it, vi } from "vitest"; +import { tryFetchTranscriptFromFeedXml } from "../packages/core/src/content/transcript/providers/podcast/rss-transcript.js"; + +describe("RSS podcast transcript URL handling", () => { + it("rejects loopback transcript URLs from feed XML before fetching them", async () => { + const internalTranscriptUrl = "http://127.0.0.1:65535/admin/metadata?token=[REDACTED]"; + const feedXml = ` + + + + Episode 1 + episode-1 + + + + + `; + + const fetchImpl = vi.fn(async () => { + throw new Error("internal transcript URL should not be fetched"); + }); + + const notes: string[] = []; + const result = await tryFetchTranscriptFromFeedXml({ + feedXml, + episodeTitle: "Episode 1", + fetchImpl: fetchImpl as unknown as typeof fetch, + notes, + }); + + expect(fetchImpl).not.toHaveBeenCalled(); + expect(result).toBeNull(); + expect(notes.join(" ")).toMatch(/blocked/i); + }); + + it("rejects redirects from public transcript URLs to loopback targets", async () => { + const publicTranscriptUrl = "https://transcripts.example/episode.vtt"; + const internalRedirectUrl = "http://127.0.0.1:65535/admin/metadata?token=[REDACTED]"; + const feedXml = ` + + + + Episode 1 + episode-1 + + + + `; + + const fetchImpl = vi.fn(async (input: RequestInfo | URL) => { + const url = input.toString(); + if (url === publicTranscriptUrl) { + return new Response(null, { + status: 302, + headers: { location: internalRedirectUrl }, + }); + } + throw new Error(`unexpected fetch: ${url}`); + }); + + const notes: string[] = []; + const result = await tryFetchTranscriptFromFeedXml({ + feedXml, + episodeTitle: "Episode 1", + fetchImpl: fetchImpl as unknown as typeof fetch, + notes, + }); + + expect(fetchImpl).toHaveBeenCalledTimes(1); + expect(fetchImpl).toHaveBeenCalledWith( + publicTranscriptUrl, + expect.objectContaining({ redirect: "manual" }), + ); + expect(result).toBeNull(); + expect(notes.join(" ")).toMatch(/blocked/i); + }); +}); From b86cd337db28e71e4629ab4b6382ea44a0c5ddee Mon Sep 17 00:00:00 2001 From: hinotoi-agent Date: Fri, 5 Jun 2026 09:54:14 +0800 Subject: [PATCH 2/2] fix: pin rss transcript dns resolution --- packages/core/package.json | 3 +- .../providers/podcast/rss-transcript.ts | 123 ++++++++++++++---- pnpm-lock.yaml | 3 + tests/security.rss-transcript-ssrf.test.ts | 110 ++++++++++++++++ 4 files changed, 215 insertions(+), 24 deletions(-) diff --git a/packages/core/package.json b/packages/core/package.json index 032aafe25..aedcffc55 100644 --- a/packages/core/package.json +++ b/packages/core/package.json @@ -52,7 +52,8 @@ "cheerio": "^1.2.0", "es-toolkit": "^1.46.1", "jsdom": "29.1.1", - "sanitize-html": "^2.17.4" + "sanitize-html": "^2.17.4", + "undici": "8.3.0" }, "devDependencies": { "@types/jsdom": "^28.0.3", diff --git a/packages/core/src/content/transcript/providers/podcast/rss-transcript.ts b/packages/core/src/content/transcript/providers/podcast/rss-transcript.ts index 254023823..4f8ad5b6b 100644 --- a/packages/core/src/content/transcript/providers/podcast/rss-transcript.ts +++ b/packages/core/src/content/transcript/providers/podcast/rss-transcript.ts @@ -1,3 +1,6 @@ +import { lookup as dnsLookup } from "node:dns/promises"; +import { createRequire } from "node:module"; +import { isIP } from "node:net"; import type { TranscriptSegment } from "../../../link-preview/types.js"; import { jsonTranscriptToPlainText, @@ -14,8 +17,24 @@ import { } from "./rss-feed.js"; type TranscriptCandidate = { url: string; type: string | null }; +type LookupAddress = { address: string; family?: number }; +type LookupFn = (hostname: string) => Promise; +type LookupCallback = ( + error: Error | null, + address: string | LookupAddress[], + family?: number, +) => void; +type UndiciAgentConstructor = new (options: { + autoSelectFamily?: boolean; + autoSelectFamilyAttemptTimeout?: number; + connect: { + lookup: (hostname: string, options: unknown, callback: LookupCallback) => void; + }; +}) => unknown; +type UndiciModule = { Agent: UndiciAgentConstructor; fetch: typeof fetch }; const MAX_TRANSCRIPT_REDIRECTS = 10; +const require = createRequire(import.meta.url); function parseIpv4(address: string): number[] | null { const parts = address.split("."); @@ -28,9 +47,9 @@ function parseIpv4(address: string): number[] | null { return octets.every((value) => value != null) ? (octets as number[]) : null; } -function isBlockedIpv4Literal(hostname: string): boolean { - const octets = parseIpv4(hostname); - if (!octets) return false; +function isBlockedIpv4(address: string): boolean { + const octets = parseIpv4(address); + if (!octets) return true; const [a, b] = octets; return ( a === 0 || @@ -72,9 +91,9 @@ function expandIpv6(address: string): number[] | null { return parts.length === 8 && parts.every((part) => part >= 0 && part <= 0xffff) ? parts : null; } -function isBlockedIpv6Literal(hostname: string): boolean { - const parts = expandIpv6(hostname); - if (!parts) return false; +function isBlockedIpv6(address: string): boolean { + const parts = expandIpv6(address); + if (!parts) return true; const [first, second, , , , fifth, sixth, eighth] = parts; const allZero = parts.every((part) => part === 0); const loopback = parts.slice(0, 7).every((part) => part === 0) && eighth === 1; @@ -82,7 +101,7 @@ function isBlockedIpv6Literal(hostname: string): boolean { const compatibleIpv4 = parts.slice(0, 6).every((part) => part === 0) && !allZero && !loopback; if (mappedIpv4 || compatibleIpv4) { const ipv4 = `${((sixth ?? 0) >> 8) & 0xff}.${(sixth ?? 0) & 0xff}.${((eighth ?? 0) >> 8) & 0xff}.${(eighth ?? 0) & 0xff}`; - return isBlockedIpv4Literal(ipv4); + return isBlockedIpv4(ipv4); } return ( allZero || @@ -102,7 +121,22 @@ function normalizeHostname(hostname: string): string { .replace(/\.$/, ""); } -function assertSafeTranscriptUrl(rawUrl: string): URL { +export function isBlockedNetworkAddress(address: string): boolean { + const normalized = address.trim().replace(/^\[|\]$/g, ""); + const family = isIP(normalized); + if (family === 4) return isBlockedIpv4(normalized); + if (family === 6) return isBlockedIpv6(normalized); + return true; +} + +async function defaultLookup(hostname: string): Promise { + return await dnsLookup(hostname, { all: true, verbatim: true }); +} + +async function resolveSafeTranscriptUrl( + rawUrl: string, + { lookup = defaultLookup }: { lookup?: LookupFn } = {}, +): Promise<{ url: URL; addresses: LookupAddress[] }> { let url: URL; try { url = new URL(rawUrl); @@ -113,41 +147,82 @@ function assertSafeTranscriptUrl(rawUrl: string): URL { throw new Error("RSS transcript URL must use http or https"); } const hostname = normalizeHostname(url.hostname); - if ( - hostname === "localhost" || - hostname.endsWith(".localhost") || - isBlockedIpv4Literal(hostname) || - isBlockedIpv6Literal(hostname) - ) { + if (hostname === "localhost" || hostname.endsWith(".localhost")) { throw new Error("RSS transcript URL targets a blocked local network host"); } - return url; + if (isIP(hostname)) { + if (isBlockedNetworkAddress(hostname)) { + throw new Error("RSS transcript URL resolves to a blocked local network address"); + } + return { url, addresses: [] }; + } + const addresses = await lookup(hostname); + if (addresses.length === 0 || addresses.some((entry) => isBlockedNetworkAddress(entry.address))) { + throw new Error("RSS transcript URL resolves to a blocked local network address"); + } + return { url, addresses }; } function isRedirectStatus(status: number): boolean { return status === 301 || status === 302 || status === 303 || status === 307 || status === 308; } +function isNativeFetchImpl(fetchImpl: typeof fetch): boolean { + return fetchImpl === globalThis.fetch || fetchImpl.name === "bound fetch"; +} + +function loadUndici(): UndiciModule { + return require("undici") as UndiciModule; +} + +function createPinnedDispatcher(addresses: LookupAddress[]): unknown { + const { Agent } = loadUndici(); + const pinnedAddresses = addresses.map((address) => ({ + address: address.address, + family: address.family ?? (isIP(address.address) || 4), + })); + return new Agent({ + autoSelectFamily: true, + autoSelectFamilyAttemptTimeout: 250, + connect: { + lookup: (_hostname, options, callback) => { + if ((options as { all?: boolean } | undefined)?.all) { + callback(null, pinnedAddresses); + return; + } + const first = pinnedAddresses[0]; + callback(null, first?.address ?? "0.0.0.0", first?.family ?? 4); + }, + }, + }); +} + async function fetchSafeTranscriptUrl( fetchImpl: typeof fetch, transcriptUrl: string, + { lookup = defaultLookup }: { lookup?: LookupFn } = {}, redirectCount = 0, ): Promise { - const url = assertSafeTranscriptUrl(transcriptUrl); - const res = await fetchImpl(url.href, { - redirect: "manual", + const target = await resolveSafeTranscriptUrl(transcriptUrl, { lookup }); + const pinnedInit = { + redirect: "manual" as const, signal: AbortSignal.timeout(TRANSCRIPTION_TIMEOUT_MS), headers: { accept: "text/vtt,text/plain,application/json;q=0.9,*/*;q=0.8" }, - }); + ...(target.addresses.length > 0 + ? { dispatcher: createPinnedDispatcher(target.addresses) } + : {}), + } as RequestInit & { dispatcher?: unknown }; + const pinnedFetchImpl = + target.addresses.length > 0 && isNativeFetchImpl(fetchImpl) ? loadUndici().fetch : fetchImpl; + const res = await pinnedFetchImpl(target.url.href, pinnedInit); if (!isRedirectStatus(res.status)) return res; const location = res.headers.get("location"); if (!location) return res; if (redirectCount >= MAX_TRANSCRIPT_REDIRECTS) { throw new Error("RSS transcript URL redirected too many times"); } - const nextUrl = new URL(location, res.url || url.href).href; - assertSafeTranscriptUrl(nextUrl); - return await fetchSafeTranscriptUrl(fetchImpl, nextUrl, redirectCount + 1); + const nextUrl = new URL(location, res.url || target.url.href).href; + return await fetchSafeTranscriptUrl(fetchImpl, nextUrl, { lookup }, redirectCount + 1); } export async function tryFetchTranscriptFromFeedXml({ @@ -155,11 +230,13 @@ export async function tryFetchTranscriptFromFeedXml({ feedXml, episodeTitle, notes, + lookup, }: { fetchImpl: typeof fetch; feedXml: string; episodeTitle: string | null; notes: string[]; + lookup?: LookupFn; }): Promise<{ text: string; transcriptUrl: string; @@ -185,7 +262,7 @@ export async function tryFetchTranscriptFromFeedXml({ const transcriptUrl = decodeXmlEntities(preferred.url); try { - const res = await fetchSafeTranscriptUrl(fetchImpl, transcriptUrl); + const res = await fetchSafeTranscriptUrl(fetchImpl, transcriptUrl, { lookup }); if (!res.ok) throw new Error(`transcript fetch failed (${res.status})`); const contentType = diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 7a21aa3bb..c8401eb08 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -157,6 +157,9 @@ importers: sanitize-html: specifier: ^2.17.4 version: 2.17.4 + undici: + specifier: 8.3.0 + version: 8.3.0 devDependencies: '@types/jsdom': specifier: ^28.0.3 diff --git a/tests/security.rss-transcript-ssrf.test.ts b/tests/security.rss-transcript-ssrf.test.ts index cd4afff36..906cfbfae 100644 --- a/tests/security.rss-transcript-ssrf.test.ts +++ b/tests/security.rss-transcript-ssrf.test.ts @@ -59,11 +59,13 @@ describe("RSS podcast transcript URL handling", () => { }); const notes: string[] = []; + const lookup = vi.fn(async () => [{ address: "93.184.216.34", family: 4 }]); const result = await tryFetchTranscriptFromFeedXml({ feedXml, episodeTitle: "Episode 1", fetchImpl: fetchImpl as unknown as typeof fetch, notes, + lookup, }); expect(fetchImpl).toHaveBeenCalledTimes(1); @@ -74,4 +76,112 @@ describe("RSS podcast transcript URL handling", () => { expect(result).toBeNull(); expect(notes.join(" ")).toMatch(/blocked/i); }); + + it("rejects transcript hostnames that resolve to private addresses before fetching", async () => { + const transcriptUrl = "https://attacker-controlled.example/episode.vtt"; + const feedXml = ` + + + + Episode 1 + + + + `; + const lookup = vi.fn(async () => [{ address: "10.0.0.7", family: 4 }]); + const fetchImpl = vi.fn(async () => { + throw new Error("hostname resolving to a private address should not be fetched"); + }); + + const notes: string[] = []; + const result = await tryFetchTranscriptFromFeedXml({ + feedXml, + episodeTitle: "Episode 1", + fetchImpl: fetchImpl as unknown as typeof fetch, + notes, + lookup, + }); + + expect(lookup).toHaveBeenCalledWith("attacker-controlled.example"); + expect(fetchImpl).not.toHaveBeenCalled(); + expect(result).toBeNull(); + expect(notes.join(" ")).toMatch(/blocked local network address/i); + }); + + it("revalidates redirect hostnames with DNS before following to private addresses", async () => { + const publicTranscriptUrl = "https://transcripts.example/episode.vtt"; + const reboundRedirectUrl = "https://rebind.example/internal.vtt"; + const feedXml = ` + + + + Episode 1 + + + + `; + const lookup = vi.fn(async (hostname: string) => { + if (hostname === "transcripts.example") return [{ address: "93.184.216.34", family: 4 }]; + if (hostname === "rebind.example") return [{ address: "127.0.0.1", family: 4 }]; + return []; + }); + const fetchImpl = vi.fn(async (input: RequestInfo | URL) => { + if (input.toString() !== publicTranscriptUrl) throw new Error(`unexpected fetch: ${input}`); + return new Response(null, { status: 302, headers: { location: reboundRedirectUrl } }); + }); + + const notes: string[] = []; + const result = await tryFetchTranscriptFromFeedXml({ + feedXml, + episodeTitle: "Episode 1", + fetchImpl: fetchImpl as unknown as typeof fetch, + notes, + lookup, + }); + + expect(lookup).toHaveBeenCalledWith("transcripts.example"); + expect(lookup).toHaveBeenCalledWith("rebind.example"); + expect(fetchImpl).toHaveBeenCalledTimes(1); + expect(result).toBeNull(); + expect(notes.join(" ")).toMatch(/blocked local network address/i); + }); + + it("pins transcript fetches to the DNS addresses that were validated", async () => { + const transcriptUrl = "https://transcripts.example/episode.vtt"; + const feedXml = ` + + + + Episode 1 + + + + `; + const lookup = vi.fn(async () => [{ address: "93.184.216.34", family: 4 }]); + const fetchImpl = vi.fn( + async () => + new Response("WEBVTT\n\n00:00.000 --> 00:01.000\nPinned", { + status: 200, + headers: { "content-type": "text/vtt" }, + }), + ); + + const notes: string[] = []; + const result = await tryFetchTranscriptFromFeedXml({ + feedXml, + episodeTitle: "Episode 1", + fetchImpl: fetchImpl as unknown as typeof fetch, + notes, + lookup, + }); + + expect(result?.text).toBe("Pinned"); + expect(fetchImpl).toHaveBeenCalledWith( + transcriptUrl, + expect.objectContaining({ + redirect: "manual", + dispatcher: expect.any(Object), + }), + ); + }); });