diff --git a/src/gateway/__tests__/body-size.test.ts b/src/gateway/__tests__/body-size.test.ts new file mode 100644 index 0000000..ff73b74 --- /dev/null +++ b/src/gateway/__tests__/body-size.test.ts @@ -0,0 +1,193 @@ +import { describe, it, expect, beforeAll, beforeEach, afterAll, vi } from "vitest"; +import http from "node:http"; +import { TdaiGateway } from "../server.js"; + +interface RequestResult { + status: number; + body: string; + headers: http.IncomingHttpHeaders; +} + +/** + * POST a body to the gateway. Two body modes: + * - "trusted" (default): let Node compute Content-Length from the body. + * - "lying-cl": set Content-Length to a small value but stream a larger + * body — emulates a hostile client. + * - "no-cl": use Transfer-Encoding: chunked, so the server cannot fail + * fast on Content-Length and must rely on running-total. + */ +async function postBody( + port: number, + path: string, + body: Buffer, + mode: "trusted" | "lying-cl" | "no-cl" = "trusted", + fakeCl?: number, +): Promise { + return new Promise((resolve, reject) => { + const headers: Record = { "Content-Type": "application/json" }; + if (mode === "trusted") { + headers["Content-Length"] = String(body.length); + } else if (mode === "lying-cl") { + headers["Content-Length"] = String(fakeCl ?? 10); + } else if (mode === "no-cl") { + headers["Transfer-Encoding"] = "chunked"; + } + const req = http.request( + { host: "127.0.0.1", port, path, method: "POST", headers }, + (res) => { + const chunks: Buffer[] = []; + res.on("data", (c) => chunks.push(c)); + res.on("end", () => + resolve({ + status: res.statusCode ?? 0, + headers: res.headers, + body: Buffer.concat(chunks).toString("utf-8"), + }), + ); + }, + ); + req.on("error", (err) => { + // ECONNRESET is expected when the server destroys the socket on + // PayloadTooLarge before the body finishes uploading. Treat it as + // a successful "rejected" outcome — the test then asserts via + // status === 413 on a follow-up request OR via the error here. + resolve({ status: 0, headers: {}, body: String(err) }); + }); + if (mode === "no-cl") { + // Stream chunks to give the server a chance to abort mid-upload. + const chunkSize = 4096; + let offset = 0; + const flush = () => { + if (offset >= body.length) { + req.end(); + return; + } + const ok = req.write(body.subarray(offset, offset + chunkSize)); + offset += chunkSize; + if (ok) setImmediate(flush); + else req.once("drain", flush); + }; + flush(); + } else { + req.write(body); + req.end(); + } + }); +} + +describe("Gateway request body size limit", () => { + let gateway: TdaiGateway; + const PORT = 18433; + + beforeAll(async () => { + // 1 KiB cap — small enough to exercise the limit without producing + // megabyte-sized test fixtures, large enough to fit a small valid + // JSON body for the happy path. + vi.stubEnv("TDAI_GATEWAY_MAX_BODY_BYTES", "1024"); + gateway = new TdaiGateway({ + server: { port: PORT, host: "127.0.0.1" }, + } as never); + await gateway.start(); + }); + + // vitest config has `unstubEnvs: true`, which resets stubs before each + // test. `parseJsonBody`'s default `maxBytes` arg re-reads the env on + // every call, so the stub must be re-applied here. + beforeEach(() => { + vi.stubEnv("TDAI_GATEWAY_MAX_BODY_BYTES", "1024"); + }); + + afterAll(async () => { + await gateway.stop(); + }); + + it("accepts a small JSON body under the limit", async () => { + // /recall with missing fields → 400, but that proves the body parsed. + const res = await postBody( + PORT, + "/recall", + Buffer.from(JSON.stringify({ query: "hi", session_key: "k" })), + ); + // Body parsed successfully → status is whatever the handler returns, + // NOT 413. + expect(res.status).not.toBe(413); + expect(res.status).not.toBe(500); + }); + + it("rejects with 413 when Content-Length declares a body over the limit", async () => { + // 2 KiB body, honest Content-Length — fail-fast path. + const big = Buffer.alloc(2 * 1024, "x"); + const res = await postBody(PORT, "/recall", big, "trusted"); + expect(res.status).toBe(413); + expect(res.body).toMatch(/exceeds 1024 bytes/); + }); + + it("rejects with 413 when a lying Content-Length is small but actual body exceeds the limit", async () => { + // Streamed mode (no Content-Length) — server tracks running total. + // Body is 4 KiB but server cap is 1 KiB. + const big = Buffer.alloc(4 * 1024, "x"); + const res = await postBody(PORT, "/recall", big, "no-cl"); + // Either the server replied 413 cleanly, or it tore the socket down + // mid-upload (ECONNRESET) — both are acceptable signals that the + // running-total guard fired. What is NOT acceptable: a 2xx/4xx that + // implies the full body was buffered. + if (res.status === 413) { + expect(res.body).toMatch(/exceeds 1024 bytes/); + } else { + expect(res.status).toBe(0); // ECONNRESET / socket hangup + } + }); + + it("returns 413 from the dispatcher, NOT 500", async () => { + // Regression guard: a stray `catch` somewhere upstream wrapping + // PayloadTooLargeError into a generic 500 would silently break the + // contract for clients that retry on 5xx but not on 4xx. + const big = Buffer.alloc(5 * 1024, "x"); + const res = await postBody(PORT, "/capture", big, "trusted"); + expect(res.status).toBe(413); + }); + + it("includes a descriptive error body with the declared limit", async () => { + const big = Buffer.alloc(2 * 1024, "x"); + const res = await postBody(PORT, "/seed", big, "trusted"); + expect(res.status).toBe(413); + // JSON envelope { "error": "..." } from sendError(). + const parsed = JSON.parse(res.body) as { error: string }; + expect(parsed.error).toMatch(/1024 bytes/); + }); +}); + +describe("Gateway body-size limit env override", () => { + it("respects TDAI_GATEWAY_MAX_BODY_BYTES at gateway construction time", async () => { + // Tiny cap: 50 bytes — even a minimal valid /recall JSON exceeds it. + vi.stubEnv("TDAI_GATEWAY_MAX_BODY_BYTES", "50"); + const PORT = 18434; + const gw = new TdaiGateway({ server: { port: PORT, host: "127.0.0.1" } } as never); + await gw.start(); + try { + const body = Buffer.from(JSON.stringify({ query: "x".repeat(80), session_key: "k" })); + const res = await postBody(PORT, "/recall", body, "trusted"); + expect(res.status).toBe(413); + expect(res.body).toMatch(/exceeds 50 bytes/); + } finally { + await gw.stop(); + } + }); + + it("falls back to the default cap when TDAI_GATEWAY_MAX_BODY_BYTES is malformed", async () => { + // Garbage env should NOT cause the daemon to start with an undefined + // / NaN cap — that would either reject every request or cap nothing. + vi.stubEnv("TDAI_GATEWAY_MAX_BODY_BYTES", "not-a-number"); + const PORT = 18435; + const gw = new TdaiGateway({ server: { port: PORT, host: "127.0.0.1" } } as never); + await gw.start(); + try { + // Default is 8 MiB — a small valid body must succeed. + const body = Buffer.from(JSON.stringify({ query: "hi", session_key: "k" })); + const res = await postBody(PORT, "/recall", body, "trusted"); + expect(res.status).not.toBe(413); + } finally { + await gw.stop(); + } + }); +}); diff --git a/src/gateway/server.ts b/src/gateway/server.ts index bd7d0a0..5ffdb0d 100644 --- a/src/gateway/server.ts +++ b/src/gateway/server.ts @@ -63,11 +63,72 @@ function createConsoleLogger(): Logger { // Request body parser // ============================ -async function parseJsonBody(req: http.IncomingMessage): Promise { +/** Default cap on request body size (bytes). Large enough for /seed payloads + * with hundreds of historical sessions; small enough that a single malicious + * /capture cannot OOM the daemon. Override at runtime via the + * TDAI_GATEWAY_MAX_BODY_BYTES env var. */ +const DEFAULT_MAX_BODY_BYTES = 8 * 1024 * 1024; // 8 MiB + +function resolveMaxBodyBytes(): number { + const raw = process.env.TDAI_GATEWAY_MAX_BODY_BYTES; + if (!raw) return DEFAULT_MAX_BODY_BYTES; + const n = Number.parseInt(raw, 10); + return Number.isFinite(n) && n > 0 ? n : DEFAULT_MAX_BODY_BYTES; +} + +/** Thrown when an incoming request body exceeds the size cap. The dispatcher + * catches this and replies with HTTP 413, NOT 500. */ +class PayloadTooLargeError extends Error { + constructor(public readonly limitBytes: number) { + super(`Request body exceeds ${limitBytes} bytes`); + this.name = "PayloadTooLargeError"; + } +} + +async function parseJsonBody( + req: http.IncomingMessage, + maxBytes: number = resolveMaxBodyBytes(), +): Promise { return new Promise((resolve, reject) => { + // Fast path: trust a present Content-Length header to fail before we + // buffer anything. A lying client (CL smaller than actual body) is still + // caught by the running-total check below. + const cl = req.headers["content-length"]; + if (cl !== undefined) { + const declared = Number.parseInt(cl, 10); + if (Number.isFinite(declared) && declared > maxBytes) { + // Pause the request stream instead of destroying it: the + // dispatcher needs the response side of this socket to be writable + // to send the 413. The handler returns immediately after writing + // the response, after which Node will close the keep-alive socket + // and the lying client's residual upload bytes are discarded. + req.pause(); + reject(new PayloadTooLargeError(maxBytes)); + return; + } + } + const chunks: Buffer[] = []; - req.on("data", (chunk: Buffer) => chunks.push(chunk)); + let received = 0; + let rejected = false; + req.on("data", (chunk: Buffer) => { + if (rejected) return; + received += chunk.length; + if (received > maxBytes) { + rejected = true; + // Pause the request stream instead of destroying it: the + // dispatcher needs the response side of this socket to be writable + // to send the 413. The handler returns immediately after writing + // the response, after which Node will close the keep-alive socket + // and the lying client's residual upload bytes are discarded. + req.pause(); + reject(new PayloadTooLargeError(maxBytes)); + return; + } + chunks.push(chunk); + }); req.on("end", () => { + if (rejected) return; try { const body = Buffer.concat(chunks).toString("utf-8"); resolve(JSON.parse(body) as T); @@ -204,6 +265,11 @@ export class TdaiGateway { sendError(res, 404, `Not found: ${method} ${pathname}`); } } catch (err) { + if (err instanceof PayloadTooLargeError) { + this.logger.warn(`Request body too large [${method} ${pathname}]: ${err.message}`); + sendError(res, 413, err.message); + return; + } const msg = err instanceof Error ? err.message : String(err); this.logger.error(`Request error [${method} ${pathname}]: ${msg}`); sendError(res, 500, msg);